summaryrefslogtreecommitdiffstats
path: root/xlators/performance
diff options
context:
space:
mode:
authorM. Mohan Kumar <mohan@in.ibm.com>2013-11-09 14:51:53 +0530
committerVijay Bellur <vbellur@redhat.com>2013-11-10 21:25:49 -0800
commitc8fef37c5d566c906728b5f6f27baaa9a8d2a20d (patch)
tree03c833446bc73bfa3da6c621315b590c0d65c748 /xlators/performance
parentd5335f9e40f6e9533f7812d153b9727bcc04aa4e (diff)
glusterfs: zerofill support
Add support for a new ZEROFILL fop. Zerofill writes zeroes to a file in the specified range. This fop will be useful when a whole file needs to be initialized with zero (could be useful for zero filled VM disk image provisioning or during scrubbing of VM disk images). Client/application can issue this FOP for zeroing out. Gluster server will zero out required range of bytes ie server offloaded zeroing. In the absence of this fop, client/application has to repetitively issue write (zero) fop to the server, which is very inefficient method because of the overheads involved in RPC calls and acknowledgements. WRITESAME is a SCSI T10 command that takes a block of data as input and writes the same data to other blocks and this write is handled completely within the storage and hence is known as offload . Linux ,now has support for SCSI WRITESAME command which is exposed to the user in the form of BLKZEROOUT ioctl. BD Xlator can exploit BLKZEROOUT ioctl to implement this fop. Thus zeroing out operations can be completely offloaded to the storage device , making it highly efficient. The fop takes two arguments offset and size. It zeroes out 'size' number of bytes in an opened file starting from 'offset' position. This patch adds zerofill support to the following areas: - libglusterfs - io-stats - performance/md-cache,open-behind - quota - cluster/afr,dht,stripe - rpc/xdr - protocol/client,server - io-threads - marker - storage/posix - libgfapi Client applications can exloit this fop by using glfs_zerofill introduced in libgfapi.FUSE support to this fop has not been added as there is no system call for this fop. Changes from previous version 3: * Removed redundant memory failure log messages Changes from previous version 2: * Rebased and fixed build error Changes from previous version 1: * Rebased for latest master TODO : * Add zerofill support to trace xlator * Expose zerofill capability as part of gluster volume info Here is a performance comparison of server offloaded zeofill vs zeroing out using repeated writes. [root@llmvm02 remote]# time ./offloaded aakash-test log 20 real 3m34.155s user 0m0.018s sys 0m0.040s [root@llmvm02 remote]# time ./manually aakash-test log 20 real 4m23.043s user 0m2.197s sys 0m14.457s [root@llmvm02 remote]# time ./offloaded aakash-test log 25; real 4m28.363s user 0m0.021s sys 0m0.025s [root@llmvm02 remote]# time ./manually aakash-test log 25 real 5m34.278s user 0m2.957s sys 0m18.808s The argument log is a file which we want to set for logging purpose and the third argument is size in GB . As we can see there is a performance improvement of around 20% with this fop. Change-Id: I081159f5f7edde0ddb78169fb4c21c776ec91a18 BUG: 1028673 Signed-off-by: Aakash Lal Das <aakash@linux.vnet.ibm.com> Signed-off-by: M. Mohan Kumar <mohan@in.ibm.com> Reviewed-on: http://review.gluster.org/5327 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'xlators/performance')
-rw-r--r--xlators/performance/io-cache/src/io-cache.c26
-rw-r--r--xlators/performance/io-threads/src/io-threads.c51
-rw-r--r--xlators/performance/md-cache/src/md-cache.c41
-rw-r--r--xlators/performance/open-behind/src/open-behind.c21
-rw-r--r--xlators/performance/read-ahead/src/read-ahead.c52
5 files changed, 191 insertions, 0 deletions
diff --git a/xlators/performance/io-cache/src/io-cache.c b/xlators/performance/io-cache/src/io-cache.c
index 68ca4c565..201777b38 100644
--- a/xlators/performance/io-cache/src/io-cache.c
+++ b/xlators/performance/io-cache/src/io-cache.c
@@ -1449,6 +1449,31 @@ ioc_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
return 0;
}
+static int32_t
+ioc_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(zerofill, frame, op_ret,
+ op_errno, pre, post, xdata);
+ return 0;
+}
+
+static int32_t
+ioc_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ uint64_t ioc_inode = 0;
+
+ inode_ctx_get (fd->inode, this, &ioc_inode);
+
+ if (ioc_inode)
+ ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
+
+ STACK_WIND(frame, ioc_zerofill_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata);
+ return 0;
+}
int32_t
@@ -2077,6 +2102,7 @@ struct xlator_fops fops = {
.readdirp = ioc_readdirp,
.discard = ioc_discard,
+ .zerofill = ioc_zerofill,
};
diff --git a/xlators/performance/io-threads/src/io-threads.c b/xlators/performance/io-threads/src/io-threads.c
index a5fcd0300..bbcf4ed26 100644
--- a/xlators/performance/io-threads/src/io-threads.c
+++ b/xlators/performance/io-threads/src/io-threads.c
@@ -309,6 +309,7 @@ iot_schedule (call_frame_t *frame, xlator_t *this, call_stub_t *stub)
case GF_FOP_RCHECKSUM:
case GF_FOP_FALLOCATE:
case GF_FOP_DISCARD:
+ case GF_FOP_ZEROFILL:
pri = IOT_PRI_LO;
break;
@@ -2510,6 +2511,55 @@ out:
}
int
+iot_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (zerofill, frame, op_ret, op_errno, preop, postop,
+ xdata);
+ return 0;
+}
+
+int
+iot_zerofill_wrapper(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ STACK_WIND (frame, iot_zerofill_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->zerofill, fd, offset, len, xdata);
+ return 0;
+}
+
+int
+iot_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ int ret = -1;
+
+ stub = fop_zerofill_stub(frame, iot_zerofill_wrapper, fd,
+ offset, len, xdata);
+ if (!stub) {
+ gf_log (this->name, GF_LOG_ERROR, "cannot create zerofill stub"
+ "(out of memory)");
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = iot_schedule (frame, this, stub);
+
+out:
+ if (ret < 0) {
+ STACK_UNWIND_STRICT (zerofill, frame, -1, -ret, NULL, NULL,
+ NULL);
+ if (stub != NULL) {
+ call_stub_destroy (stub);
+ }
+ }
+ return 0;
+}
+
+
+int
__iot_workers_scale (iot_conf_t *conf)
{
int scale = 0;
@@ -2840,6 +2890,7 @@ struct xlator_fops fops = {
.rchecksum = iot_rchecksum,
.fallocate = iot_fallocate,
.discard = iot_discard,
+ .zerofill = iot_zerofill,
};
struct xlator_cbks cbks;
diff --git a/xlators/performance/md-cache/src/md-cache.c b/xlators/performance/md-cache/src/md-cache.c
index 36d81887c..3a5b7a5d1 100644
--- a/xlators/performance/md-cache/src/md-cache.c
+++ b/xlators/performance/md-cache/src/md-cache.c
@@ -2098,6 +2098,46 @@ int mdc_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
}
int
+mdc_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret != 0)
+ goto out;
+
+ if (!local)
+ goto out;
+
+ mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf);
+
+out:
+ MDC_STACK_UNWIND(zerofill, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+
+ return 0;
+}
+
+int mdc_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ mdc_local_t *local;
+
+ local = mdc_local_get(frame);
+ local->fd = fd_ref(fd);
+
+ STACK_WIND(frame, mdc_zerofill_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->zerofill, fd, offset, len,
+ xdata);
+
+ return 0;
+}
+
+
+int
mdc_forget (xlator_t *this, inode_t *inode)
{
mdc_inode_wipe (this, inode);
@@ -2229,6 +2269,7 @@ struct xlator_fops fops = {
.readdir = mdc_readdir,
.fallocate = mdc_fallocate,
.discard = mdc_discard,
+ .zerofill = mdc_zerofill,
};
diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c
index df4027509..7e5b57278 100644
--- a/xlators/performance/open-behind/src/open-behind.c
+++ b/xlators/performance/open-behind/src/open-behind.c
@@ -720,6 +720,26 @@ err:
}
int
+ob_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ call_stub_t *stub;
+
+ stub = fop_zerofill_stub(frame, default_zerofill_resume, fd,
+ offset, len, xdata);
+ if (!stub)
+ goto err;
+
+ open_and_resume(this, fd, stub);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT(zerofill, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+}
+
+
+int
ob_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags,
dict_t *xdata)
{
@@ -946,6 +966,7 @@ struct xlator_fops fops = {
.fsetattr = ob_fsetattr,
.fallocate = ob_fallocate,
.discard = ob_discard,
+ .zerofill = ob_zerofill,
.unlink = ob_unlink,
.rename = ob_rename,
.lk = ob_lk,
diff --git a/xlators/performance/read-ahead/src/read-ahead.c b/xlators/performance/read-ahead/src/read-ahead.c
index 241fa477f..069ab1f1a 100644
--- a/xlators/performance/read-ahead/src/read-ahead.c
+++ b/xlators/performance/read-ahead/src/read-ahead.c
@@ -993,6 +993,57 @@ unwind:
}
int
+ra_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ GF_ASSERT (frame);
+
+ STACK_UNWIND_STRICT (zerofill, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+ return 0;
+}
+
+static int
+ra_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ ra_file_t *file = NULL;
+ fd_t *iter_fd = NULL;
+ inode_t *inode = NULL;
+ uint64_t tmp_file = 0;
+ int32_t op_errno = EINVAL;
+
+ GF_ASSERT (frame);
+ GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
+ GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind);
+
+ inode = fd->inode;
+
+ LOCK (&inode->lock);
+ {
+ list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
+ fd_ctx_get (iter_fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
+ if (!file)
+ continue;
+
+ flush_region(frame, file, offset, len, 1);
+ }
+ }
+ UNLOCK (&inode->lock);
+
+ STACK_WIND (frame, ra_zerofill_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->zerofill, fd,
+ offset, len, xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT (zerofill, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int
ra_priv_dump (xlator_t *this)
{
ra_conf_t *conf = NULL;
@@ -1173,6 +1224,7 @@ struct xlator_fops fops = {
.ftruncate = ra_ftruncate,
.fstat = ra_fstat,
.discard = ra_discard,
+ .zerofill = ra_zerofill,
};
struct xlator_cbks cbks = {