summaryrefslogtreecommitdiffstats
path: root/xlators/storage/posix/src/posix.c
diff options
context:
space:
mode:
authorAnand Avati <avati@redhat.com>2013-07-19 08:31:41 -0700
committerVijay Bellur <vbellur@redhat.com>2013-07-23 06:11:12 -0700
commit37ac6bdca826046cbcb0d50727af29baf9407950 (patch)
treeb899eb81a70c7719c5d7e4328697cc314da24b97 /xlators/storage/posix/src/posix.c
parentcee1f9b5c7917bba220f1156b342bf07cac4ad38 (diff)
storage/posix: implement batched fsync in a single thread
Because of the extra fsync()s issued by AFR transaction, they could potentially "clog" all the io-threads denying unrelated operations from making progress. This patch assigns a dedicated thread to issues fsyncs, as an experimental feature to understand performance characteristics with the approach. As a basis, incoming individual fsync requests are grouped into batches, falling in the same @batch-fsync-delay-usec window of time. These windows can extend in practice, as processing of the previous batch can take longer than @batch-fsync-delay-usec while new requests are getting batched. The feature support three modes (similar to the -S modes of fs_mark) - syncfs: In this mode one syncfs() is issued per batch, instead of N fsync()s (one per file.) - syncfs-single-fsync: In this mode one syncfs() is issued per batch (which, on Linux, guarantees the completion of write-out of dirty pages in the filesystem up to that point) and one single fsync() to synchronize or flush the controller/drive cache. This corresponds to -S 2 of fsmark. - syncfs-reverse-fsync: In this mode, one syncfs() is issued per batch, and all the open files in that batch are fsync()'ed in the reverse order of the queue. This corresponds to -S 4 of fsmark. - reverse-fsync: In this mode, no syncfs() is issued and all the files in the batch are fsync()'ed in the reverse order. This corresponds to -S 3 of fsmark. Change-Id: Ia1e170a810c780c8d80e02cf910accc4170c4cd4 BUG: 927146 Signed-off-by: Anand Avati <avati@redhat.com> Reviewed-on: http://review.gluster.org/4746 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'xlators/storage/posix/src/posix.c')
-rw-r--r--xlators/storage/posix/src/posix.c111
1 files changed, 111 insertions, 0 deletions
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
index fee2ce9d..e917766b 100644
--- a/xlators/storage/posix/src/posix.c
+++ b/xlators/storage/posix/src/posix.c
@@ -2417,6 +2417,33 @@ out:
}
+int
+posix_batch_fsync (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int datasync, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ struct posix_private *priv = NULL;
+
+ priv = this->private;
+
+ stub = fop_fsync_stub (frame, default_fsync, fd, datasync, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (fsync, frame, -1, ENOMEM, 0, 0, 0);
+ return 0;
+ }
+
+ pthread_mutex_lock (&priv->fsync_mutex);
+ {
+ list_add_tail (&stub->list, &priv->fsyncs);
+ priv->fsync_queue_count++;
+ pthread_cond_signal (&priv->fsync_cond);
+ }
+ pthread_mutex_unlock (&priv->fsync_mutex);
+
+ return 0;
+}
+
+
int32_t
posix_fsync (call_frame_t *frame, xlator_t *this,
fd_t *fd, int32_t datasync, dict_t *xdata)
@@ -2428,6 +2455,7 @@ posix_fsync (call_frame_t *frame, xlator_t *this,
int ret = -1;
struct iatt preop = {0,};
struct iatt postop = {0,};
+ struct posix_private *priv = NULL;
DECLARE_OLD_FS_ID_VAR;
@@ -2443,6 +2471,12 @@ posix_fsync (call_frame_t *frame, xlator_t *this,
goto out;
#endif
+ priv = this->private;
+ if (priv->batch_fsync_mode && xdata && dict_get (xdata, "batch-fsync")) {
+ posix_batch_fsync (frame, this, fd, datasync, xdata);
+ return 0;
+ }
+
ret = posix_fd_ctx_get (fd, this, &pfd);
if (ret < 0) {
op_errno = -ret;
@@ -4303,6 +4337,27 @@ posix_set_owner (xlator_t *this, uid_t uid, gid_t gid)
return ret;
}
+
+static int
+set_batch_fsync_mode (struct posix_private *priv, const char *str)
+{
+ if (strcmp (str, "none") == 0)
+ priv->batch_fsync_mode = BATCH_NONE;
+ else if (strcmp (str, "syncfs") == 0)
+ priv->batch_fsync_mode = BATCH_SYNCFS;
+ else if (strcmp (str, "syncfs-single-fsync") == 0)
+ priv->batch_fsync_mode = BATCH_SYNCFS_SINGLE_FSYNC;
+ else if (strcmp (str, "syncfs-reverse-fsync") == 0)
+ priv->batch_fsync_mode = BATCH_SYNCFS_REVERSE_FSYNC;
+ else if (strcmp (str, "reverse-fsync") == 0)
+ priv->batch_fsync_mode = BATCH_REVERSE_FSYNC;
+ else
+ return -1;
+
+ return 0;
+}
+
+
int
reconfigure (xlator_t *this, dict_t *options)
{
@@ -4310,6 +4365,7 @@ reconfigure (xlator_t *this, dict_t *options)
struct posix_private *priv = NULL;
uid_t uid = -1;
gid_t gid = -1;
+ char *batch_fsync_mode_str = NULL;
priv = this->private;
@@ -4317,6 +4373,18 @@ reconfigure (xlator_t *this, dict_t *options)
GF_OPTION_RECONF ("brick-gid", gid, options, uint32, out);
posix_set_owner (this, uid, gid);
+ GF_OPTION_RECONF ("batch-fsync-delay-usec", priv->batch_fsync_delay_usec,
+ options, uint32, out);
+
+ GF_OPTION_RECONF ("batch-fsync-mode", batch_fsync_mode_str,
+ options, str, out);
+
+ if (set_batch_fsync_mode (priv, batch_fsync_mode_str) != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Unknown mode string: %s",
+ batch_fsync_mode_str);
+ goto out;
+ }
+
GF_OPTION_RECONF ("linux-aio", priv->aio_configured,
options, bool, out);
@@ -4368,6 +4436,7 @@ init (xlator_t *this)
char *guuid = NULL;
uid_t uid = -1;
gid_t gid = -1;
+ char *batch_fsync_mode_str;
dir_data = dict_get (this->options, "directory");
@@ -4720,6 +4789,28 @@ init (xlator_t *this)
INIT_LIST_HEAD (&_private->janitor_fds);
posix_spawn_janitor_thread (this);
+
+ pthread_mutex_init (&_private->fsync_mutex, NULL);
+ pthread_cond_init (&_private->fsync_cond, NULL);
+ INIT_LIST_HEAD (&_private->fsyncs);
+
+ ret = pthread_create (&_private->fsyncer, NULL, posix_fsyncer, this);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "fsyncer thread"
+ " creation failed (%s)", strerror (errno));
+ goto out;
+ }
+
+ GF_OPTION_INIT ("batch-fsync-mode", batch_fsync_mode_str, str, out);
+
+ if (set_batch_fsync_mode (_private, batch_fsync_mode_str) != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Unknown mode string: %s",
+ batch_fsync_mode_str);
+ goto out;
+ }
+
+ GF_OPTION_INIT ("batch-fsync-delay-usec", _private->batch_fsync_delay_usec,
+ uint32, out);
out:
return ret;
}
@@ -4849,5 +4940,25 @@ struct volume_options options[] = {
.description = "Interval in seconds for a filesystem health check, "
"set to 0 to disable"
},
+ { .key = {"batch-fsync-mode"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "reverse-fsync",
+ .description = "Possible values:\n"
+ "\t- syncfs: Perform one syncfs() on behalf oa batch"
+ "of fsyncs.\n"
+ "\t- syncfs-single-fsync: Perform one syncfs() on behalf of a batch"
+ " of fsyncs and one fsync() per batch.\n"
+ "\t- syncfs-reverse-fsync: Preform one syncfs() on behalf of a batch"
+ " of fsyncs and fsync() each file in the batch in reverse order.\n"
+ " in reverse order.\n"
+ "\t- reverse-fsync: Perform fsync() of each file in the batch in"
+ " reverse order."
+ },
+ { .key = {"batch-fsync-delay-usec"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "1000000",
+ .description = "Num of usecs to wait for aggregating fsync"
+ " requests",
+ },
{ .key = {NULL} }
};