summaryrefslogtreecommitdiffstats
path: root/xlators/storage
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/storage')
-rw-r--r--xlators/storage/posix/src/posix-helpers.c20
-rw-r--r--xlators/storage/posix/src/posix-inode-fd-ops.c268
-rw-r--r--xlators/storage/posix/src/posix-messages.h3
-rw-r--r--xlators/storage/posix/src/posix-metadata.c78
-rw-r--r--xlators/storage/posix/src/posix-metadata.h5
-rw-r--r--xlators/storage/posix/src/posix.c1
-rw-r--r--xlators/storage/posix/src/posix.h7
7 files changed, 381 insertions, 1 deletions
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
index ed0516d4c4a..54fc1dc1195 100644
--- a/xlators/storage/posix/src/posix-helpers.c
+++ b/xlators/storage/posix/src/posix-helpers.c
@@ -3314,3 +3314,23 @@ unlock:
out:
return ret;
}
+
+int
+posix_check_dev_file(xlator_t *this, inode_t *inode, char *fop, int *op_errno)
+{
+ int ret = -1;
+
+ if (inode->ia_type == IA_IFBLK || inode->ia_type == IA_IFCHR) {
+ *op_errno = EINVAL;
+ gf_msg(this->name, GF_LOG_ERROR, *op_errno, P_MSG_INVALID_ARGUMENT,
+ "%s received on %s file (%s)", fop,
+ (inode->ia_type == IA_IFBLK) ? "block" : "char",
+ uuid_utoa(inode->gfid));
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c
index 9e2b37f582c..dafd1855ef9 100644
--- a/xlators/storage/posix/src/posix-inode-fd-ops.c
+++ b/xlators/storage/posix/src/posix-inode-fd-ops.c
@@ -1960,6 +1960,274 @@ out:
}
int32_t
+posix_copy_file_range(call_frame_t *frame, xlator_t *this, fd_t *fd_in,
+ off64_t off_in, fd_t *fd_out, off64_t off_out, size_t len,
+ uint32_t flags, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int _fd_in = -1;
+ int _fd_out = -1;
+ struct posix_private *priv = NULL;
+ struct posix_fd *pfd_in = NULL;
+ struct posix_fd *pfd_out = NULL;
+ struct iatt preop_dst = {
+ 0,
+ };
+ struct iatt postop_dst = {
+ 0,
+ };
+ struct iatt stbuf = {
+ 0,
+ };
+ int ret = -1;
+ dict_t *rsp_xdata = NULL;
+ int is_append = 0;
+ gf_boolean_t locked = _gf_false;
+ gf_boolean_t update_atomic = _gf_false;
+ posix_inode_ctx_t *ctx = NULL;
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(fd_in, out);
+ VALIDATE_OR_GOTO(fd_in->inode, out);
+ VALIDATE_OR_GOTO(fd_out, out);
+ VALIDATE_OR_GOTO(fd_out->inode, out);
+ VALIDATE_OR_GOTO(this->private, out);
+
+ priv = this->private;
+
+ VALIDATE_OR_GOTO(priv, out);
+ DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out);
+
+ if (posix_check_dev_file(this, fd_in->inode, "copy_file_range", &op_errno))
+ goto out;
+
+ if (posix_check_dev_file(this, fd_out->inode, "copy_file_range", &op_errno))
+ goto out;
+
+ ret = posix_fd_ctx_get(fd_in, this, &pfd_in, &op_errno);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL,
+ "pfd is NULL from fd=%p", fd_in);
+ goto out;
+ }
+
+ _fd_in = pfd_in->fd;
+
+ ret = posix_fd_ctx_get(fd_out, this, &pfd_out, &op_errno);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL,
+ "pfd is NULL from fd=%p", fd_out);
+ goto out;
+ }
+
+ _fd_out = pfd_out->fd;
+
+ /*
+ * Currently, the internal write is checked via xdata which
+ * is set by some xlator above. It could be due to several of
+ * the reasons such as healing or a snapshot operation happening
+ * using copy_file_range. As of now (i.e. writing the patch with
+ * this change) none of the xlators above posix are using the
+ * internal write with copy_file_range. In future it might
+ * change. Atleast as of now the hope is that, when that happens
+ * this functon or fop does not require additional changes for
+ * handling internal writes.
+ */
+ ret = posix_check_internal_writes(this, fd_out, _fd_out, xdata);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "possible overwrite from internal client, fd=%p", fd_out);
+ op_ret = -1;
+ op_errno = EBUSY;
+ goto out;
+ }
+
+ if (xdata) {
+ if (dict_get(xdata, GLUSTERFS_WRITE_UPDATE_ATOMIC))
+ update_atomic = _gf_true;
+ }
+
+ /*
+ * The update_atomic option is to instruct posix to do prestat,
+ * write and poststat atomically. This is to prevent any modification to
+ * ia_size and ia_blocks until poststat and the diff in their values
+ * between pre and poststat could be of use for some translators.
+ * This is similar to the atomic write operation. atmoic write is
+ * (i.e. prestat + write + poststat) used by shard as of now. In case,
+ * some xlator needs copy_file_range to be atomic from prestat and postat
+ * prespective (i.e. prestat + copy_file_range + poststat) then it has
+ * to send "GLUSTERFS_WRITE_UPDATE_ATOMIC" key in xdata.
+ */
+
+ op_ret = posix_inode_ctx_get_all(fd_out->inode, this, &ctx);
+ if (op_ret < 0) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ if (update_atomic) {
+ ret = pthread_mutex_lock(&ctx->write_atomic_lock);
+ if (!ret)
+ locked = _gf_true;
+ else {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_MUTEX_FAILED,
+ "failed to hold write atomic lock on %s",
+ uuid_utoa(fd_out->inode->gfid));
+ goto out;
+ }
+ }
+
+ op_ret = posix_fdstat(this, fd_out->inode, _fd_out, &preop_dst);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "pre-operation fstat failed on fd=%p", fd_out);
+ goto out;
+ }
+
+ /*
+ * Since, only the destination file (fd_out) is undergoing
+ * modification, the write related tests are done on that.
+ * i.e. this is treater similar to as if the destination file
+ * undergoing write fop from maintenance perspective.
+ */
+ if (xdata) {
+ op_ret = posix_cs_maintenance(this, fd_out, NULL, &_fd_out, &preop_dst,
+ NULL, xdata, &rsp_xdata, _gf_false);
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "file state check failed, fd %p", fd_out);
+ op_errno = EIO;
+ goto out;
+ }
+ }
+
+ /*
+ * NOTE: This is just doing a single execution of copy_file_range
+ * system call. If the returned value of this system call is less
+ * than len, then should we keep doing it in a for loop until the
+ * copy_file_range of all the len bytes is done?
+ * Check the example program provided in the man page of
+ * copy_file_range.
+ * If so, then a separate variables for both off_in and off_out
+ * should be used which are initialized to off_in and off_out
+ * that this function call receives, but then advanced by the
+ * value returned by sys_copy_file_range and then use that as
+ * off_in and off_out for next instance of copy_file_range execution.
+ */
+ op_ret = sys_copy_file_range(_fd_in, &off_in, _fd_out, &off_out, len,
+ flags);
+
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ op_ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_COPY_FILE_RANGE_FAILED,
+ "copy_file_range failed: fd_in: %p (gfid: %s) ,"
+ " fd_out %p (gfid:%s)",
+ fd_in, uuid_utoa(fd_in->inode->gfid), fd_out,
+ uuid_utoa(fd_out->inode->gfid));
+ goto out;
+ }
+
+ /*
+ * Let this be as it is for now. This function collects
+ * infomration such as open fd count etc. So, even though
+ * is_append does not apply to copy_file_range, for now,
+ * allowing it to be recorded in the dict as _gf_false.
+ */
+ rsp_xdata = _fill_writev_xdata(fd_out, xdata, this, is_append);
+
+ /* copy_file_range successful, we also need to get the stat of
+ * the file we wrote to (i.e. destination file or fd_out).
+ */
+ ret = posix_fdstat(this, fd_out->inode, _fd_out, &postop_dst);
+ if (ret == -1) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "post-operation fstat failed on fd=%p", fd_out);
+ goto out;
+ }
+
+ /*
+ * Also perform the stat on the source fd (i.e. fd_in). For now,
+ * allowing it to be done within the locked region if the request
+ * is for atomic operation (and update) of copy_file_range.
+ */
+ ret = posix_fdstat(this, fd_in->inode, _fd_in, &stbuf);
+ if (ret == -1) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "post-operation fstat failed on fd=%p", fd_in);
+ goto out;
+ }
+
+ /*
+ * The core logic of what time attributes are to be updated
+ * on a fop is decided at client side xlator utime.
+ * All the remaining fops call posix_set_ctime function
+ * to update the {a,m,c}time. But, for all the other fops,
+ * the operation is happening on only one file (or inode).
+ * But here, there are 2 fds (source and destination). Hence
+ * the new function below to update the appropriate times for
+ * both the source and the destination file.
+ * For the source file, if at all anything has to be updated,
+ * it would be atime (as that file is only read, not updated).
+ * For the destination file, the attributes that require the
+ * modification would be mtime and ctime.
+ * What times have to be changed is actually determined by
+ * utime xlator. But, all of them would be in frame->root->flags.
+ * So, currently posix assumes that, the atime flag is for
+ * the source file and the other 2 flags are for the destination
+ * file. Since, the assumption is rigid (i.e. atime for source
+ * and {m,c}time for destination), the below function is called
+ * posix_set_ctime_cfr (cfr standing for copy_file_range).
+ * FUTURE TODO:
+ * In future, some other functionality or fop might operate
+ * simultaneously on 2 files. Then, depending upon what that new
+ * fop does or what are its requirements, the below function might
+ * require changes to become generic for consumption in case of
+ * simultaneous operations on 2 files.
+ */
+ posix_set_ctime_cfr(frame, this, NULL, pfd_in->fd, fd_in->inode, &stbuf,
+ NULL, pfd_out->fd, fd_out->inode, &postop_dst);
+
+ if (locked) {
+ pthread_mutex_unlock(&ctx->write_atomic_lock);
+ locked = _gf_false;
+ }
+
+ /*
+ * Record copy_file_range in priv->write_value for now.
+ * If not needed, remove below section of code along with
+ * this comment (or add comment to explain why it is not
+ * needed).
+ */
+ LOCK(&priv->lock);
+ {
+ priv->write_value += op_ret;
+ }
+ UNLOCK(&priv->lock);
+
+out:
+
+ if (locked) {
+ pthread_mutex_unlock(&ctx->write_atomic_lock);
+ locked = _gf_false;
+ }
+
+ STACK_UNWIND_STRICT(copy_file_range, frame, op_ret, op_errno, &stbuf,
+ &preop_dst, &postop_dst, rsp_xdata);
+
+ if (rsp_xdata)
+ dict_unref(rsp_xdata);
+ return 0;
+}
+
+int32_t
posix_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
char *real_path = NULL;
diff --git a/xlators/storage/posix/src/posix-messages.h b/xlators/storage/posix/src/posix-messages.h
index 62af32ac8fe..928723db8f9 100644
--- a/xlators/storage/posix/src/posix-messages.h
+++ b/xlators/storage/posix/src/posix-messages.h
@@ -67,6 +67,7 @@ GLFS_MSGID(POSIX, P_MSG_XATTR_FAILED, P_MSG_NULL_GFID, P_MSG_FCNTL_FAILED,
P_MSG_ANCESTORY_FAILED, P_MSG_DISK_SPACE_CHECK_FAILED,
P_MSG_FALLOCATE_FAILED, P_MSG_STOREMDATA_FAILED,
P_MSG_FETCHMDATA_FAILED, P_MSG_GETMDATA_FAILED,
- P_MSG_SETMDATA_FAILED, P_MSG_FRESHFILE);
+ P_MSG_SETMDATA_FAILED, P_MSG_FRESHFILE, P_MSG_MUTEX_FAILED,
+ P_MSG_COPY_FILE_RANGE_FAILED);
#endif /* !_GLUSTERD_MESSAGES_H_ */
diff --git a/xlators/storage/posix/src/posix-metadata.c b/xlators/storage/posix/src/posix-metadata.c
index 26fae2019b5..62669a0b83f 100644
--- a/xlators/storage/posix/src/posix-metadata.c
+++ b/xlators/storage/posix/src/posix-metadata.c
@@ -663,3 +663,81 @@ posix_set_parent_ctime(call_frame_t *frame, xlator_t *this,
out:
return;
}
+
+void
+posix_set_ctime_cfr(call_frame_t *frame, xlator_t *this,
+ const char *real_path_in, int fd_in, inode_t *inode_in,
+ struct iatt *stbuf_in, const char *real_path_out,
+ int fd_out, inode_t *inode_out, struct iatt *stbuf_out)
+{
+ posix_mdata_flag_t flag = {
+ 0,
+ };
+ posix_mdata_flag_t flag_dup = {
+ 0,
+ };
+ int ret = 0;
+ struct posix_private *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->ctime) {
+ (void)posix_get_mdata_flag(frame->root->flags, &flag);
+ if ((flag.ctime == 0) && (flag.mtime == 0) && (flag.atime == 0)) {
+ goto out;
+ }
+
+ if (frame->root->ctime.tv_sec == 0) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED,
+ "posix set mdata failed, No ctime : in: %s gfid_in:%s "
+ "out: %s gfid_out:%s",
+ real_path_in,
+ inode_in ? uuid_utoa(inode_in->gfid) : "No inode",
+ real_path_out,
+ inode_out ? uuid_utoa(inode_out->gfid) : "No inode");
+ goto out;
+ }
+
+ flag_dup = flag;
+
+ /*
+ * For the destination file, no need to update atime.
+ * It got modified. Hence the things that need to be
+ * changed are mtime and ctime (provided the utime
+ * xlator from the client has set those flags, which
+ * are just copied to flag_dup).
+ */
+ if (flag.atime)
+ flag_dup.atime = 0;
+
+ ret = posix_set_mdata_xattr(this, real_path_out, fd_out, inode_out,
+ &frame->root->ctime, stbuf_out, &flag_dup,
+ _gf_false);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED,
+ "posix set mdata failed on file: %s gfid:%s", real_path_out,
+ inode_out ? uuid_utoa(inode_out->gfid) : "No inode");
+ }
+
+ /*
+ * For the source file, no need to change the mtime and ctime.
+ * For source file, it is only read operation. So, if at all
+ * anything needs to be updated, it is only the atime.
+ */
+ if (flag.atime)
+ flag_dup.atime = flag.atime;
+ flag_dup.mtime = 0;
+ flag_dup.ctime = 0;
+
+ ret = posix_set_mdata_xattr(this, real_path_in, fd_out, inode_out,
+ &frame->root->ctime, stbuf_out, &flag_dup,
+ _gf_false);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED,
+ "posix set mdata failed on file: %s gfid:%s", real_path_in,
+ inode_in ? uuid_utoa(inode_in->gfid) : "No inode");
+ }
+ }
+out:
+ return;
+}
diff --git a/xlators/storage/posix/src/posix-metadata.h b/xlators/storage/posix/src/posix-metadata.h
index e1b549d55a1..3416148ea97 100644
--- a/xlators/storage/posix/src/posix-metadata.h
+++ b/xlators/storage/posix/src/posix-metadata.h
@@ -48,5 +48,10 @@ void
posix_set_parent_ctime(call_frame_t *frame, xlator_t *this,
const char *real_path, int fd, inode_t *inode,
struct iatt *stbuf);
+void
+posix_set_ctime_cfr(call_frame_t *frame, xlator_t *this,
+ const char *real_path_in, int fd_in, inode_t *inode_in,
+ struct iatt *stbuf_in, const char *read_path_put,
+ int fd_out, inode_t *inode_out, struct iatt *stbuf_out);
#endif /* _POSIX_METADATA_H */
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
index d6a20135f96..42b965434b9 100644
--- a/xlators/storage/posix/src/posix.c
+++ b/xlators/storage/posix/src/posix.c
@@ -76,6 +76,7 @@ struct xlator_fops fops = {
.seek = posix_seek,
.lease = posix_lease,
.put = posix_put,
+ .copy_file_range = posix_copy_file_range,
};
struct xlator_cbks cbks = {
diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
index 1f1d4fc2774..a1ec996f4b2 100644
--- a/xlators/storage/posix/src/posix.h
+++ b/xlators/storage/posix/src/posix.h
@@ -638,6 +638,11 @@ posix_put(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
off_t offset, struct iobref *iobref, dict_t *xattr, dict_t *xdata);
int32_t
+posix_copy_file_range(call_frame_t *frame, xlator_t *this, fd_t *fd_in,
+ off64_t off_in, fd_t *fd_out, off64_t off_out, size_t len,
+ uint32_t flags, dict_t *xdata);
+
+int32_t
posix_set_mode_in_dict(dict_t *in_dict, dict_t *out_dict,
struct iatt *in_stbuf);
@@ -656,5 +661,7 @@ int
posix_cs_maintenance(xlator_t *this, fd_t *fd, loc_t *loc, int *pfd,
struct iatt *buf, const char *realpath, dict_t *xattr_req,
dict_t **xattr_rsp, gf_boolean_t ignore_failure);
+int
+posix_check_dev_file(xlator_t *this, inode_t *inode, char *fop, int *op_errno);
#endif /* _POSIX_H */