summaryrefslogtreecommitdiffstats
path: root/xlators/mount/fuse/src
diff options
context:
space:
mode:
authorRaghavendra Bhat <raghavendra@redhat.com>2018-11-06 15:27:31 -0500
committerAmar Tumballi <amarts@redhat.com>2018-12-12 15:56:55 +0000
commit7dadea15c58eb92e5f5727190bf9446dd6fe7a3c (patch)
tree4ced04de0219407604f30b1663b586f16b54dd06 /xlators/mount/fuse/src
parent5c723ade196600030ee84621384cceb10fff64d8 (diff)
copy_file_range support in GlusterFS
* libglusterfs changes to add new fop * Fuse changes: - Changes in fuse bridge xlator to receive and send responses * posix changes to perform the op on the backend filesystem * protocol and rpc changes for sending and receiving the fop * gfapi changes for performing the fop * tools: glfs-copy-file-range tool for testing copy_file_range fop - Although, copy_file_range support has been added to the upstream fuse kernel module, no release has been made yet of a kernel which contains the support. It is expected to come in the upcoming release of linux-4.20 So, as of now, executing copy_file_range fop on a fused based filesystem results in fuse kernel module sending read on the source fd and write on the destination fd. Therefore a small gfapi based tool has been written to be able test the copy_file_range fop. This tool is similar (in functionality) to the example program given in copy_file_range man page. So, running regular copy_file_range on a fuse mount point and running gfapi based glfs-copy-file-range tool gives some idea about how fast, the copy_file_range (or reflink) can be. On the local machine this was the result obtained. mount -t glusterfs workstation:new /mnt/glusterfs [root@workstation ~]# cd /mnt/glusterfs/ [root@workstation glusterfs]# ls file [root@workstation glusterfs]# cd [root@workstation ~]# time /tmp/a.out /mnt/glusterfs/file /mnt/glusterfs/new real 0m6.495s user 0m0.000s sys 0m1.439s [root@workstation ~]# time glfs-copy-file-range $(hostname) new /tmp/glfs.log /file /rrr OPEN_SRC: opening /file is success OPEN_DST: opening /rrr is success FSTAT_SRC: fstat on /rrr is success copy_file_range successful real 0m0.309s user 0m0.039s sys 0m0.017s This tool needs following arguments 1) hostname 2) volume name 3) log file path 4) source file path (relative to the gluster volume root) 5) destination file path (relative to the gluster volume root) "glfs-copy-file-range <hostname> <volume> <log file path> <source> <destination>" - Added a testcase as well to run glfs-copy-file-range tool * io-stats changes to capture the fop for profiling * NOTE: - Added conditional check to see whether the copy_file_range syscall is available or not. If not, then return ENOSYS. - Added conditional check for kernel minor version in fuse_kernel.h and fuse-bridge while referring to copy_file_range. And the kernel minor version is kept as it is. i.e. 24. Increment it in future when there is a kernel release which contains the support for copy_file_range fop in fuse kernel module. * The document which contains a writeup on this enhancement can be found at https://docs.google.com/document/d/1BSILbXr_knynNwxSyyu503JoTz5QFM_4suNIh2WwrSc/edit Change-Id: I280069c814dd21ce6ec3be00a884fc24ab692367 updates: #536 Signed-off-by: Raghavendra Bhat <raghavendra@redhat.com>
Diffstat (limited to 'xlators/mount/fuse/src')
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.c114
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.h36
2 files changed, 150 insertions, 0 deletions
diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
index 3b2622b431f..3f4e19c211e 100644
--- a/xlators/mount/fuse/src/fuse-bridge.c
+++ b/xlators/mount/fuse/src/fuse-bridge.c
@@ -2993,6 +2993,116 @@ fuse_write(xlator_t *this, fuse_in_header_t *finh, void *msg,
return;
}
+#if FUSE_KERNEL_MINOR_VERSION >= 28
+static int
+fuse_copy_file_range_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
+ struct iatt *prebuf_dst, struct iatt *postbuf_dst,
+ dict_t *xdata)
+{
+ fuse_state_t *state = NULL;
+ fuse_in_header_t *finh = NULL;
+ /*
+ * Fuse kernel module uses fuse_write_out itself as the
+ * output collector. In fact, fuse_kernel.h in the upstream
+ * kernel just defines the input structure fuse_copy_file_range_in
+ * for the fop. So, just use the fuse_write_out to send the
+ * response back to the kernel.
+ */
+ struct fuse_write_out fcfro = {
+ 0,
+ };
+
+ char src_gfid[GF_UUID_BUF_SIZE] = {0};
+ char dst_gfid[GF_UUID_BUF_SIZE] = {0};
+
+ state = frame->root->state;
+ finh = state->finh;
+
+ fuse_log_eh_fop(this, state, frame, op_ret, op_errno);
+
+ if (op_ret >= 0) {
+ gf_log("glusterfs-fuse", GF_LOG_TRACE,
+ "%" PRIu64 ": WRITE => %d/%" GF_PRI_SIZET ",%" PRIu64
+ " , %" PRIu64 " ,%" PRIu64 ",%" PRIu64,
+ frame->root->unique, op_ret, state->size, state->off_in,
+ state->off_out, stbuf->ia_size, postbuf_dst->ia_size);
+
+ fcfro.size = op_ret;
+ send_fuse_obj(this, finh, &fcfro);
+ } else {
+ if (state->fd && state->fd->inode)
+ uuid_utoa_r(state->fd->inode->gfid, src_gfid);
+ else
+ snprintf(src_gfid, sizeof(src_gfid), "nil");
+
+ if (state->fd_dst && state->fd_dst->inode)
+ uuid_utoa_r(state->fd_dst->inode->gfid, dst_gfid);
+ else
+ snprintf(dst_gfid, sizeof(dst_gfid), "nil");
+
+ gf_log("glusterfs-fuse", GF_LOG_WARNING,
+ "%" PRIu64
+ ": COPY_FILE_RANGE => -1 gfid_in=%s fd_in=%p "
+ "gfid_out=%s fd_out=%p (%s)",
+ frame->root->unique, src_gfid, state->fd, dst_gfid,
+ state->fd_dst, strerror(op_errno));
+
+ send_fuse_err(this, finh, op_errno);
+ }
+
+ free_fuse_state(state);
+ STACK_DESTROY(frame->root);
+
+ return 0;
+}
+
+void
+fuse_copy_file_range_resume(fuse_state_t *state)
+{
+ gf_log("glusterfs-fuse", GF_LOG_TRACE,
+ "%" PRIu64
+ ": COPY_FILE_RANGE "
+ "(input fd: %p (gfid: %s), "
+ "output fd: %p (gfid: %s) size=%zu, "
+ "offset_in=%" PRIu64 ", offset_out=%" PRIu64 ")",
+ state->finh->unique, state->fd, uuid_utoa(state->fd->inode->gfid),
+ state->fd_dst, uuid_utoa(state->fd_dst->inode->gfid), state->size,
+ state->off_in, state->off_out);
+
+ FUSE_FOP(state, fuse_copy_file_range_cbk, GF_FOP_COPY_FILE_RANGE,
+ copy_file_range, state->fd, state->off_in, state->fd_dst,
+ state->off_out, state->size, state->io_flags, state->xdata);
+}
+
+static void
+fuse_copy_file_range(xlator_t *this, fuse_in_header_t *finh, void *msg,
+ struct iobuf *iobuf)
+{
+ struct fuse_copy_file_range_in *fcfri = msg;
+ fuse_state_t *state = NULL;
+ fd_t *fd_in = NULL;
+ fd_t *fd_out = NULL;
+
+ GET_STATE(this, finh, state);
+
+ fd_in = FH_TO_FD(fcfri->fh_in);
+ fd_out = FH_TO_FD(fcfri->fh_out);
+ state->fd = fd_in;
+ state->fd_dst = fd_out;
+
+ fuse_resolve_fd_init(state, &state->resolve, fd_in);
+ fuse_resolve_fd_init(state, &state->resolve2, fd_out);
+
+ state->size = fcfri->len;
+ state->off_in = fcfri->off_in;
+ state->off_out = fcfri->off_out;
+ state->io_flags = fcfri->flags;
+
+ fuse_resolve_and_resume(state, fuse_copy_file_range_resume);
+}
+#endif /* FUSE_KERNEL_MINOR_VERSION >= 28 */
+
#if FUSE_KERNEL_MINOR_VERSION >= 24 && HAVE_SEEK_HOLE
static int
fuse_lseek_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
@@ -6087,6 +6197,10 @@ static fuse_handler_t *fuse_std_ops[FUSE_OP_HIGH] = {
#if FUSE_KERNEL_MINOR_VERSION >= 24 && HAVE_SEEK_HOLE
[FUSE_LSEEK] = fuse_lseek,
#endif
+
+#if FUSE_KERNEL_MINOR_VERSION >= 28
+ [FUSE_COPY_FILE_RANGE] = fuse_copy_file_range,
+#endif
};
static fuse_handler_t *fuse_dump_ops[FUSE_OP_HIGH];
diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h
index 57380786f17..60702ab1da5 100644
--- a/xlators/mount/fuse/src/fuse-bridge.h
+++ b/xlators/mount/fuse/src/fuse-bridge.h
@@ -41,8 +41,32 @@
#include <glusterfs/gidcache.h>
#if defined(GF_LINUX_HOST_OS) || defined(__FreeBSD__) || defined(__NetBSD__)
+
+/*
+ * TODO:
+ * So, with the addition of copy_file_range support, it might
+ * require a bump up of fuse kernel minor version (like it was
+ * done when support for lseek fop was added. But, as of now,
+ * the copy_file_range support has just landed in upstream
+ * kernel fuse module. So, until, there is a release of that
+ * fuse as part of a kernel, the FUSE_KERNEL_MINOR_VERSION
+ * from fuse_kernel.h in the contrib might not be changed.
+ * If so, then the highest op available should be based on
+ * the current minor version (which is 24). So, selectively
+ * determine. When, the minor version is changed to 28 in
+ * fuse_kernel.h from contrib (because in upstream linux
+ * kernel source tree, the kernel minor version which
+ * contains support for copy_file_range is 28), then remove
+ * the reference to FUSE_LSEEK below and just determine
+ * FUSE_OP_HIGH based on copy_file_range.
+ */
+#if FUSE_KERNEL_MINOR_VERSION >= 28
+#define FUSE_OP_HIGH (FUSE_COPY_FILE_RANGE + 1)
+#else
#define FUSE_OP_HIGH (FUSE_LSEEK + 1)
#endif
+
+#endif
#ifdef GF_DARWIN_HOST_OS
#define FUSE_OP_HIGH (FUSE_DESTROY + 1)
#endif
@@ -400,10 +424,22 @@ typedef struct {
loc_t loc2;
fuse_in_header_t *finh;
int32_t flags;
+
off_t off;
+ /*
+ * The man page of copy_file_range tells that the offset
+ * arguments are of type loff_t *. Here in fuse state, the values of
+ * those offsets are saved instead of pointers as the kernel sends
+ * the values of the offsets from those pointers instead of pointers.
+ * But the type loff_t is linux specific and is actually a typedef of
+ * off64_t. Hence using off64_t
+ */
+ off64_t off_in; /* for copy_file_range source fd */
+ off64_t off_out; /* for copy_file_range destination fd */
size_t size;
unsigned long nlookup;
fd_t *fd;
+ fd_t *fd_dst; /* for copy_file_range destination */
dict_t *xattr;
dict_t *xdata;
char *name;