core: implement a global thread pool

This patch implements a thread pool that is wait-free for adding jobs to the queue and uses a very small locked region to get jobs. This makes it possible to decrease contention drastically. It's based on wfcqueue structure provided by urcu library. It automatically enables more threads when load demands it, and stops them when not needed. There's a maximum number of threads that can be used. This value can be configured. Depending on the workload, the maximum number of threads plays an important role. So it needs to be configured for optimal performance. Currently the thread pool doesn't self adjust the maximum for the workload, so this configuration needs to be changed manually. For this reason, the global thread pool has been made optional, so that volumes can still use the thread pool provided by io-threads. To enable it for bricks, the following option needs to be set: config.global-threading = on This option has no effect if bricks are already running. A restart is required to activate it. It's recommended to also enable the following option when running bricks with the global thread pool: performance.iot-pass-through = on To enable it for a FUSE mount point, the option '--global-threading' must be added to the mount command. To change it, an umount and remount is needed. It's recommended to disable the following option when using global threading on a mount point: performance.client-io-threads = off To enable it for services managed by glusterd, glusterd needs to be started with option '--global-threading'. In this case all daemons, like self-heal, will be using the global thread pool. Currently it can only be enabled for bricks, FUSE mounts and glusterd services. The maximum number of threads for clients and bricks can be configured using the following options: config.client-threads config.brick-threads These options can be applied online and its effect is immediate most of the times. If one of them is set to 0, the maximum number of threads will be calcutated as #cores * 2. Some distributions use a very old userspace-rcu library (version 0.7) for this reason, some header files from version 0.10 have been copied into contrib/userspace-rcu and are used if the detected version is 0.7 or older. An additional change has been made to io-threads to prevent that threads are started when iot-pass-through is set. Change-Id: I09d19e246b9e6d53c6247b29dfca6af6ee00a24b updates: #532 Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
author: Xavi Hernandez <xhernandez@redhat.com> 2019-01-24 18:44:06 +0100
committer: Amar Tumballi <amarts@redhat.com> 2019-02-18 02:58:24 +0000
commit: dddcf52020004d98f688ebef968de51d76cbf9a6 (patch)
tree: 01ee4c39a7859a76562e15aa7045c5bd86417a60 /xlators/debug/io-stats/src/io-stats.c
parent: ec273a46820ba17f46488c082c65cd1aa6739be3 (diff)
1 files changed, 40 insertions, 1 deletions
diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c
index f12191fb8df..101e403d39a 100644
--- a/xlators/debug/io-stats/src/io-stats.c
+++ b/xlators/debug/io-stats/src/io-stats.c
@@ -40,6 +40,7 @@
 #include <pwd.h>
 #include <grp.h>
 #include <glusterfs/upcall-utils.h>
+#include <glusterfs/async.h>
 
 #define MAX_LIST_MEMBERS 100
 #define DEFAULT_PWD_BUF_SZ 16384
@@ -3737,6 +3738,7 @@ reconfigure(xlator_t *this, dict_t *options)
     uint32_t log_buf_size = 0;
     uint32_t log_flush_timeout = 0;
     int32_t old_dump_interval;
+    int32_t threads;
 
     if (!this || !this->private)
         goto out;
@@ -3809,6 +3811,9 @@ reconfigure(xlator_t *this, dict_t *options)
                      out);
     gf_log_set_log_flush_timeout(log_flush_timeout);
 
+    GF_OPTION_RECONF("threads", threads, options, int32, out);
+    gf_async_adjust_threads(threads);
+
     ret = 0;
 out:
     gf_log(this ? this->name : "io-stats", GF_LOG_DEBUG,
@@ -3888,6 +3893,7 @@ init(xlator_t *this)
     int ret = -1;
     uint32_t log_buf_size = 0;
     uint32_t log_flush_timeout = 0;
+    int32_t threads;
 
     if (!this)
         return -1;
@@ -3951,6 +3957,7 @@ init(xlator_t *this)
         gf_log(this->name, GF_LOG_ERROR, "Out of memory.");
         goto out;
     }
+    ret = -1;
 
     GF_OPTION_INIT("ios-dnscache-ttl-sec", conf->ios_dnscache_ttl_sec, int32,
                    out);
@@ -3987,6 +3994,9 @@ init(xlator_t *this)
     GF_OPTION_INIT("log-flush-timeout", log_flush_timeout, time, out);
     gf_log_set_log_flush_timeout(log_flush_timeout);
 
+    GF_OPTION_INIT("threads", threads, int32, out);
+    gf_async_adjust_threads(threads);
+
     this->private = conf;
     if (conf->ios_dump_interval > 0) {
         conf->dump_thread_running = _gf_true;
@@ -4430,8 +4440,37 @@ struct volume_options options[] = {
      .type = GF_OPTION_TYPE_STR,
      .default_value = "/no/such/path",
      .description = "Unique ID for our files."},
+    {.key = {"global-threading"},
+     .type = GF_OPTION_TYPE_BOOL,
+     .default_value = "off",
+     .op_version = {GD_OP_VERSION_6_0},
+     .flags = OPT_FLAG_SETTABLE,
+     .tags = {"io-stats", "threading"},
+     .description = "This option enables the global threading support for "
+                    "bricks. If enabled, it's recommended to also enable "
+                    "'performance.iot-pass-through'"},
+    {.key = {"threads"}, .type = GF_OPTION_TYPE_INT},
+    {.key = {"brick-threads"},
+     .type = GF_OPTION_TYPE_INT,
+     .default_value = "16",
+     .min = 0,
+     .max = GF_ASYNC_MAX_THREADS,
+     .op_version = {GD_OP_VERSION_6_0},
+     .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+     .tags = {"io-stats", "threading"},
+     .description = "When global threading is used, this value determines the "
+                    "maximum amount of threads that can be created on bricks"},
+    {.key = {"client-threads"},
+     .type = GF_OPTION_TYPE_INT,
+     .default_value = "16",
+     .min = 0,
+     .max = GF_ASYNC_MAX_THREADS,
+     .op_version = {GD_OP_VERSION_6_0},
+     .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT,
+     .tags = {"io-stats", "threading"},
+     .description = "When global threading is used, this value determines the "
+                    "maximum amount of threads that can be created on clients"},
     {.key = {NULL}},
-
 };
 
 xlator_api_t xlator_api = {
author	Xavi Hernandez <xhernandez@redhat.com>	2019-01-24 18:44:06 +0100
committer	Amar Tumballi <amarts@redhat.com>	2019-02-18 02:58:24 +0000
commit	dddcf52020004d98f688ebef968de51d76cbf9a6 (patch)
tree	01ee4c39a7859a76562e15aa7045c5bd86417a60 /xlators/debug/io-stats/src/io-stats.c
parent	ec273a46820ba17f46488c082c65cd1aa6739be3 (diff)