From 37ac6bdca826046cbcb0d50727af29baf9407950 Mon Sep 17 00:00:00 2001 From: Anand Avati Date: Fri, 19 Jul 2013 08:31:41 -0700 Subject: storage/posix: implement batched fsync in a single thread Because of the extra fsync()s issued by AFR transaction, they could potentially "clog" all the io-threads denying unrelated operations from making progress. This patch assigns a dedicated thread to issues fsyncs, as an experimental feature to understand performance characteristics with the approach. As a basis, incoming individual fsync requests are grouped into batches, falling in the same @batch-fsync-delay-usec window of time. These windows can extend in practice, as processing of the previous batch can take longer than @batch-fsync-delay-usec while new requests are getting batched. The feature support three modes (similar to the -S modes of fs_mark) - syncfs: In this mode one syncfs() is issued per batch, instead of N fsync()s (one per file.) - syncfs-single-fsync: In this mode one syncfs() is issued per batch (which, on Linux, guarantees the completion of write-out of dirty pages in the filesystem up to that point) and one single fsync() to synchronize or flush the controller/drive cache. This corresponds to -S 2 of fsmark. - syncfs-reverse-fsync: In this mode, one syncfs() is issued per batch, and all the open files in that batch are fsync()'ed in the reverse order of the queue. This corresponds to -S 4 of fsmark. - reverse-fsync: In this mode, no syncfs() is issued and all the files in the batch are fsync()'ed in the reverse order. This corresponds to -S 3 of fsmark. Change-Id: Ia1e170a810c780c8d80e02cf910accc4170c4cd4 BUG: 927146 Signed-off-by: Anand Avati Reviewed-on: http://review.gluster.org/4746 Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- xlators/storage/posix/src/posix.c | 111 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) (limited to 'xlators/storage/posix/src/posix.c') diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index fee2ce9d77c..e917766b5fe 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -2417,6 +2417,33 @@ out: } +int +posix_batch_fsync (call_frame_t *frame, xlator_t *this, + fd_t *fd, int datasync, dict_t *xdata) +{ + call_stub_t *stub = NULL; + struct posix_private *priv = NULL; + + priv = this->private; + + stub = fop_fsync_stub (frame, default_fsync, fd, datasync, xdata); + if (!stub) { + STACK_UNWIND_STRICT (fsync, frame, -1, ENOMEM, 0, 0, 0); + return 0; + } + + pthread_mutex_lock (&priv->fsync_mutex); + { + list_add_tail (&stub->list, &priv->fsyncs); + priv->fsync_queue_count++; + pthread_cond_signal (&priv->fsync_cond); + } + pthread_mutex_unlock (&priv->fsync_mutex); + + return 0; +} + + int32_t posix_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, dict_t *xdata) @@ -2428,6 +2455,7 @@ posix_fsync (call_frame_t *frame, xlator_t *this, int ret = -1; struct iatt preop = {0,}; struct iatt postop = {0,}; + struct posix_private *priv = NULL; DECLARE_OLD_FS_ID_VAR; @@ -2443,6 +2471,12 @@ posix_fsync (call_frame_t *frame, xlator_t *this, goto out; #endif + priv = this->private; + if (priv->batch_fsync_mode && xdata && dict_get (xdata, "batch-fsync")) { + posix_batch_fsync (frame, this, fd, datasync, xdata); + return 0; + } + ret = posix_fd_ctx_get (fd, this, &pfd); if (ret < 0) { op_errno = -ret; @@ -4303,6 +4337,27 @@ posix_set_owner (xlator_t *this, uid_t uid, gid_t gid) return ret; } + +static int +set_batch_fsync_mode (struct posix_private *priv, const char *str) +{ + if (strcmp (str, "none") == 0) + priv->batch_fsync_mode = BATCH_NONE; + else if (strcmp (str, "syncfs") == 0) + priv->batch_fsync_mode = BATCH_SYNCFS; + else if (strcmp (str, "syncfs-single-fsync") == 0) + priv->batch_fsync_mode = BATCH_SYNCFS_SINGLE_FSYNC; + else if (strcmp (str, "syncfs-reverse-fsync") == 0) + priv->batch_fsync_mode = BATCH_SYNCFS_REVERSE_FSYNC; + else if (strcmp (str, "reverse-fsync") == 0) + priv->batch_fsync_mode = BATCH_REVERSE_FSYNC; + else + return -1; + + return 0; +} + + int reconfigure (xlator_t *this, dict_t *options) { @@ -4310,6 +4365,7 @@ reconfigure (xlator_t *this, dict_t *options) struct posix_private *priv = NULL; uid_t uid = -1; gid_t gid = -1; + char *batch_fsync_mode_str = NULL; priv = this->private; @@ -4317,6 +4373,18 @@ reconfigure (xlator_t *this, dict_t *options) GF_OPTION_RECONF ("brick-gid", gid, options, uint32, out); posix_set_owner (this, uid, gid); + GF_OPTION_RECONF ("batch-fsync-delay-usec", priv->batch_fsync_delay_usec, + options, uint32, out); + + GF_OPTION_RECONF ("batch-fsync-mode", batch_fsync_mode_str, + options, str, out); + + if (set_batch_fsync_mode (priv, batch_fsync_mode_str) != 0) { + gf_log (this->name, GF_LOG_ERROR, "Unknown mode string: %s", + batch_fsync_mode_str); + goto out; + } + GF_OPTION_RECONF ("linux-aio", priv->aio_configured, options, bool, out); @@ -4368,6 +4436,7 @@ init (xlator_t *this) char *guuid = NULL; uid_t uid = -1; gid_t gid = -1; + char *batch_fsync_mode_str; dir_data = dict_get (this->options, "directory"); @@ -4720,6 +4789,28 @@ init (xlator_t *this) INIT_LIST_HEAD (&_private->janitor_fds); posix_spawn_janitor_thread (this); + + pthread_mutex_init (&_private->fsync_mutex, NULL); + pthread_cond_init (&_private->fsync_cond, NULL); + INIT_LIST_HEAD (&_private->fsyncs); + + ret = pthread_create (&_private->fsyncer, NULL, posix_fsyncer, this); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "fsyncer thread" + " creation failed (%s)", strerror (errno)); + goto out; + } + + GF_OPTION_INIT ("batch-fsync-mode", batch_fsync_mode_str, str, out); + + if (set_batch_fsync_mode (_private, batch_fsync_mode_str) != 0) { + gf_log (this->name, GF_LOG_ERROR, "Unknown mode string: %s", + batch_fsync_mode_str); + goto out; + } + + GF_OPTION_INIT ("batch-fsync-delay-usec", _private->batch_fsync_delay_usec, + uint32, out); out: return ret; } @@ -4849,5 +4940,25 @@ struct volume_options options[] = { .description = "Interval in seconds for a filesystem health check, " "set to 0 to disable" }, + { .key = {"batch-fsync-mode"}, + .type = GF_OPTION_TYPE_STR, + .default_value = "reverse-fsync", + .description = "Possible values:\n" + "\t- syncfs: Perform one syncfs() on behalf oa batch" + "of fsyncs.\n" + "\t- syncfs-single-fsync: Perform one syncfs() on behalf of a batch" + " of fsyncs and one fsync() per batch.\n" + "\t- syncfs-reverse-fsync: Preform one syncfs() on behalf of a batch" + " of fsyncs and fsync() each file in the batch in reverse order.\n" + " in reverse order.\n" + "\t- reverse-fsync: Perform fsync() of each file in the batch in" + " reverse order." + }, + { .key = {"batch-fsync-delay-usec"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "1000000", + .description = "Num of usecs to wait for aggregating fsync" + " requests", + }, { .key = {NULL} } }; -- cgit