summaryrefslogtreecommitdiffstats
path: root/xlators/mount
diff options
context:
space:
mode:
authorXavi Hernandez <xhernandez@redhat.com>2019-01-24 18:44:06 +0100
committerAmar Tumballi <amarts@redhat.com>2019-02-18 02:58:24 +0000
commitdddcf52020004d98f688ebef968de51d76cbf9a6 (patch)
tree01ee4c39a7859a76562e15aa7045c5bd86417a60 /xlators/mount
parentec273a46820ba17f46488c082c65cd1aa6739be3 (diff)
core: implement a global thread pool
This patch implements a thread pool that is wait-free for adding jobs to the queue and uses a very small locked region to get jobs. This makes it possible to decrease contention drastically. It's based on wfcqueue structure provided by urcu library. It automatically enables more threads when load demands it, and stops them when not needed. There's a maximum number of threads that can be used. This value can be configured. Depending on the workload, the maximum number of threads plays an important role. So it needs to be configured for optimal performance. Currently the thread pool doesn't self adjust the maximum for the workload, so this configuration needs to be changed manually. For this reason, the global thread pool has been made optional, so that volumes can still use the thread pool provided by io-threads. To enable it for bricks, the following option needs to be set: config.global-threading = on This option has no effect if bricks are already running. A restart is required to activate it. It's recommended to also enable the following option when running bricks with the global thread pool: performance.iot-pass-through = on To enable it for a FUSE mount point, the option '--global-threading' must be added to the mount command. To change it, an umount and remount is needed. It's recommended to disable the following option when using global threading on a mount point: performance.client-io-threads = off To enable it for services managed by glusterd, glusterd needs to be started with option '--global-threading'. In this case all daemons, like self-heal, will be using the global thread pool. Currently it can only be enabled for bricks, FUSE mounts and glusterd services. The maximum number of threads for clients and bricks can be configured using the following options: config.client-threads config.brick-threads These options can be applied online and its effect is immediate most of the times. If one of them is set to 0, the maximum number of threads will be calcutated as #cores * 2. Some distributions use a very old userspace-rcu library (version 0.7) for this reason, some header files from version 0.10 have been copied into contrib/userspace-rcu and are used if the detected version is 0.7 or older. An additional change has been made to io-threads to prevent that threads are started when iot-pass-through is set. Change-Id: I09d19e246b9e6d53c6247b29dfca6af6ee00a24b updates: #532 Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
Diffstat (limited to 'xlators/mount')
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.c79
-rwxr-xr-xxlators/mount/fuse/utils/mount.glusterfs.in7
2 files changed, 65 insertions, 21 deletions
diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
index 3479d40ceeb..bd8bc114a32 100644
--- a/xlators/mount/fuse/src/fuse-bridge.c
+++ b/xlators/mount/fuse/src/fuse-bridge.c
@@ -16,10 +16,17 @@
#include <glusterfs/glusterfs-acl.h>
#include <glusterfs/syscall.h>
#include <glusterfs/timespec.h>
+#include <glusterfs/async.h>
#ifdef __NetBSD__
#undef open /* in perfuse.h, pulled from mount-gluster-compat.h */
#endif
+typedef struct _fuse_async {
+ struct iobuf *iobuf;
+ fuse_in_header_t *finh;
+ void *msg;
+ gf_async_t async;
+} fuse_async_t;
static int gf_fuse_xattr_enotsup_log;
@@ -5810,6 +5817,28 @@ fuse_get_mount_status(xlator_t *this)
return kid_status;
}
+static void
+fuse_dispatch(xlator_t *xl, gf_async_t *async)
+{
+ fuse_async_t *fasync;
+ fuse_private_t *priv;
+ fuse_in_header_t *finh;
+ struct iobuf *iobuf;
+
+ priv = xl->private;
+ fasync = caa_container_of(async, fuse_async_t, async);
+ finh = fasync->finh;
+ iobuf = fasync->iobuf;
+
+ priv->fuse_ops[finh->opcode](xl, finh, fasync->msg, iobuf);
+
+ iobuf_unref(iobuf);
+}
+
+/* We need 512 extra buffer size for BATCH_FORGET fop. By tests, it is
+ * found to be reduces 'REALLOC()' in the loop */
+#define FUSE_EXTRA_ALLOC 512
+
static void *
fuse_thread_proc(void *data)
{
@@ -5821,24 +5850,20 @@ fuse_thread_proc(void *data)
fuse_in_header_t *finh = NULL;
struct iovec iov_in[2];
void *msg = NULL;
- /* we need 512 extra buffer size for BATCH_FORGET fop. By tests, it is
- found to be reduces 'REALLOC()' in the loop */
- const size_t msg0_size = sizeof(*finh) + 512;
- fuse_handler_t **fuse_ops = NULL;
+ size_t msg0_size = sizeof(*finh) + sizeof(struct fuse_write_in);
+ fuse_async_t *fasync;
struct pollfd pfd[2] = {{
0,
}};
+ uint32_t psize;
this = data;
priv = this->private;
- fuse_ops = priv->fuse_ops;
THIS = this;
- iov_in[0].iov_len = sizeof(*finh) + sizeof(struct fuse_write_in);
- iov_in[1].iov_len = ((struct iobuf_pool *)this->ctx->iobuf_pool)
- ->default_page_size;
- priv->msg0_len_p = &iov_in[0].iov_len;
+ psize = ((struct iobuf_pool *)this->ctx->iobuf_pool)->default_page_size;
+ priv->msg0_len_p = &msg0_size;
for (;;) {
/* THIS has to be reset here */
@@ -5895,14 +5920,15 @@ fuse_thread_proc(void *data)
changing this one too */
iobuf = iobuf_get(this->ctx->iobuf_pool);
- /* Add extra 128 byte to the first iov so that it can
+ /* Add extra 512 byte to the first iov so that it can
* accommodate "ordinary" non-write requests. It's not
* guaranteed to be big enough, as SETXATTR and namespace
* operations with very long names may grow behind it,
* but it's good enough in most cases (and we can handle
- * rest via realloc).
- */
- iov_in[0].iov_base = GF_CALLOC(1, msg0_size, gf_fuse_mt_iov_base);
+ * rest via realloc). */
+ iov_in[0].iov_base = GF_MALLOC(
+ sizeof(fuse_async_t) + msg0_size + FUSE_EXTRA_ALLOC,
+ gf_fuse_mt_iov_base);
if (!iobuf || !iov_in[0].iov_base) {
gf_log(this->name, GF_LOG_ERROR, "Out of memory");
@@ -5915,6 +5941,9 @@ fuse_thread_proc(void *data)
iov_in[1].iov_base = iobuf->ptr;
+ iov_in[0].iov_len = msg0_size;
+ iov_in[1].iov_len = psize;
+
res = sys_readv(priv->fd, iov_in, 2);
if (res == -1) {
@@ -5941,7 +5970,7 @@ fuse_thread_proc(void *data)
goto cont_err;
}
- if (res < sizeof(finh)) {
+ if (res < sizeof(*finh)) {
gf_log("glusterfs-fuse", GF_LOG_WARNING, "short read on /dev/fuse");
fuse_log_eh(this,
"glusterfs-fuse: short read on "
@@ -5983,8 +6012,9 @@ fuse_thread_proc(void *data)
if (finh->opcode == FUSE_WRITE)
msg = iov_in[1].iov_base;
else {
- if (res > msg0_size) {
- void *b = GF_REALLOC(iov_in[0].iov_base, res);
+ if (res > msg0_size + FUSE_EXTRA_ALLOC) {
+ void *b = GF_REALLOC(iov_in[0].iov_base,
+ sizeof(fuse_async_t) + res);
if (b) {
iov_in[0].iov_base = b;
finh = (fuse_in_header_t *)iov_in[0].iov_base;
@@ -5996,22 +6026,29 @@ fuse_thread_proc(void *data)
}
}
- if (res > iov_in[0].iov_len)
+ if (res > iov_in[0].iov_len) {
memcpy(iov_in[0].iov_base + iov_in[0].iov_len,
iov_in[1].iov_base, res - iov_in[0].iov_len);
+ iov_in[0].iov_len = res;
+ }
msg = finh + 1;
}
if (priv->uid_map_root && finh->uid == priv->uid_map_root)
finh->uid = 0;
- if (finh->opcode >= FUSE_OP_HIGH)
+ if (finh->opcode >= FUSE_OP_HIGH) {
/* turn down MacFUSE specific messages */
fuse_enosys(this, finh, msg, NULL);
- else
- fuse_ops[finh->opcode](this, finh, msg, iobuf);
+ iobuf_unref(iobuf);
+ } else {
+ fasync = iov_in[0].iov_base + iov_in[0].iov_len;
+ fasync->finh = finh;
+ fasync->msg = msg;
+ fasync->iobuf = iobuf;
+ gf_async(&fasync->async, this, fuse_dispatch);
+ }
- iobuf_unref(iobuf);
continue;
cont_err:
diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in
index 3f5d76d2e93..243c9c71af4 100755
--- a/xlators/mount/fuse/utils/mount.glusterfs.in
+++ b/xlators/mount/fuse/utils/mount.glusterfs.in
@@ -189,6 +189,10 @@ start_glusterfs ()
cmd_line=$(echo "$cmd_line --thin-client");
fi
+ if [ -n "$global_threading" ]; then
+ cmd_line=$(echo "$cmd_line --global-threading");
+ fi
+
#options with values start here
if [ -n "$halo_max_latency" ]; then
cmd_line=$(echo "$cmd_line --xlator-option \
@@ -629,6 +633,9 @@ without_options()
# "mount -t glusterfs" sends this, but it's useless.
"rw")
;;
+ "global-threading")
+ global_threading=1
+ ;;
# TODO: not sure how to handle this yet
"async"|"sync"|"dirsync"|\
"mand"|"nomand"|\