summaryrefslogtreecommitdiffstats
path: root/xlators/features/bit-rot/src/bitd/bit-rot.c
diff options
context:
space:
mode:
authorVenky Shankar <vshankar@redhat.com>2015-04-17 15:00:13 +0530
committerNiels de Vos <ndevos@redhat.com>2015-05-10 05:29:05 -0700
commit37bb956ee3d181314d487dfdabd9a1fd8b5f9d9c (patch)
treee0b8c2aefb19feb6654236ab71b5aa173db8acb6 /xlators/features/bit-rot/src/bitd/bit-rot.c
parent738620a5eeeee3802c09275831ac2b85d4ce91e5 (diff)
features/bit-rot: Token Bucket based throttling
BitRot daemons (signer & scrubber) are disk/cpu hoggers when left running full throttle. Checksum calculations (especially SHA family of hash routines) can be quite CPU intensive. Moreover periodic disk scans performed by scrubber followed by reading data blocks for hash calculation (which is also done by signer) generate lot of heavy IO request(s). This causes interference with actual client operations (be it a regular client or filesystems daemons such as self-heal, etc..) and results in degraded system performance. This patch introduces throttling based on Token Bucket Filtering[1]. It's a well known algorithm for checking (and ensuring) that data transmission conform to defined limits and generally used in packet switched networks. Linux control groups (Cgroups) uses a variant[2] of this algorithm to provide block device IO throttling (cgroup subsys "blkio": blk-iothrottle). So, why not just live with Cgroups? Cgroups is linux specific. We need to have a throttling mechanism for other supported UNIXes. Moreover, having our own implementation gives much more finer control in terms of tuning it for our needs (plus the simplicity of the alogorithm itself). Ideally, throttling should be a part of server stack (either as a separate translator or integrated with io-threads) since that's the point of entry for IO request(s) from *all* client(s). That way one could selectively throttle IO request(s) based on client PIDs (frame->root->pid), e.g., self-heal daemon, bitrot, etc.. (*actual* clients can run full throttle). This implementation avoids that deliberately (there needs to be a much more smarter queueing mechanism) and throttles CPU usage for hash calculations. This patch is just the infrastructure part with no interfaces exposed to set various throttling values. The tunable selected here (basically hardcoded) avoids 100% CPU usage during hash calculation (with some bursts cycles). We'd need much more intensive test(s) to assign values for various throttling options (lazy/normal/aggressive). [1] https://en.wikipedia.org/wiki/Token_bucket [2] http://en.wikipedia.org/wiki/Token_bucket#Hierarchical_token_bucket > Change-Id: Icc49af80eeab6adb60166d0810e69ef37cfe2fd8 > BUG: 1207020 > Signed-off-by: Venky Shankar <vshankar@redhat.com> > Reviewed-on: http://review.gluster.org/10307 > Reviewed-by: Vijay Bellur <vbellur@redhat.com> > Tested-by: Vijay Bellur <vbellur@redhat.com> Change-Id: I034ba1095aa3bfc3212a67a63ffb931431b372f6 Signed-off-by: Venky Shankar <vshankar@redhat.com> BUG: 1220041 Reviewed-on: http://review.gluster.org/10719 Tested-by: NetBSD Build System Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Gaurav Kumar Garg <ggarg@redhat.com>
Diffstat (limited to 'xlators/features/bit-rot/src/bitd/bit-rot.c')
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot.c47
1 files changed, 42 insertions, 5 deletions
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c
index d985cc4442c..880b16edfa8 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot.c
+++ b/xlators/features/bit-rot/src/bitd/bit-rot.c
@@ -27,6 +27,17 @@
#include "tw.h"
+#define BR_HASH_CALC_READ_SIZE (128 * 1024)
+
+br_tbf_opspec_t opthrottle[] = {
+ {
+ .op = BR_TBF_OP_HASH,
+ .rate = BR_HASH_CALC_READ_SIZE,
+ .maxlimit = (2 * BR_WORKERS * BR_HASH_CALC_READ_SIZE),
+ },
+ /** TODO: throttle getdents(), read() request(s) */
+};
+
static int
br_find_child_index (xlator_t *this, xlator_t *child)
{
@@ -288,8 +299,10 @@ br_object_read_block_and_sign (xlator_t *this, fd_t *fd, br_child_t *child,
off_t offset, size_t size, SHA256_CTX *sha256)
{
int32_t ret = -1;
+ br_tbf_t *tbf = NULL;
struct iovec *iovec = NULL;
struct iobref *iobref = NULL;
+ br_private_t *priv = NULL;
int count = 0;
int i = 0;
@@ -297,6 +310,12 @@ br_object_read_block_and_sign (xlator_t *this, fd_t *fd, br_child_t *child,
GF_VALIDATE_OR_GOTO (this->name, fd, out);
GF_VALIDATE_OR_GOTO (this->name, fd->inode, out);
GF_VALIDATE_OR_GOTO (this->name, child, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+
+ priv = this->private;
+
+ GF_VALIDATE_OR_GOTO (this->name, priv->tbf, out);
+ tbf = priv->tbf;
ret = syncop_readv (child->xl, fd,
size, offset, 0, &iovec, &count, &iobref, NULL,
@@ -313,9 +332,12 @@ br_object_read_block_and_sign (xlator_t *this, fd_t *fd, br_child_t *child,
goto out;
for (i = 0; i < count; i++) {
- SHA256_Update (sha256,
- (const unsigned char *) (iovec[i].iov_base),
- iovec[i].iov_len);
+ TBF_THROTTLE_BEGIN (tbf, BR_TBF_OP_HASH, iovec[i].iov_len);
+ {
+ SHA256_Update (sha256, (const unsigned char *)
+ (iovec[i].iov_base), iovec[i].iov_len);
+ }
+ TBF_THROTTLE_BEGIN (tbf, BR_TBF_OP_HASH, iovec[i].iov_len);
}
out:
@@ -334,7 +356,7 @@ br_calculate_obj_checksum (unsigned char *md,
{
int32_t ret = -1;
off_t offset = 0;
- size_t block = 128 * 1024; /* 128K block size */
+ size_t block = BR_HASH_CALC_READ_SIZE;
xlator_t *this = NULL;
SHA256_CTX sha256;
@@ -1358,6 +1380,16 @@ br_init_signer (xlator_t *this, br_private_t *priv)
}
int32_t
+br_init_rate_limiter (br_private_t *priv)
+{
+ br_tbf_opspec_t *spec = opthrottle;
+ priv->tbf = br_tbf_init (spec, sizeof (opthrottle)
+ / sizeof (br_tbf_opspec_t));
+
+ return priv->tbf ? 0 : -1;
+}
+
+int32_t
init (xlator_t *this)
{
int i = 0;
@@ -1411,12 +1443,16 @@ init (xlator_t *this)
INIT_LIST_HEAD (&priv->children[i].list);
INIT_LIST_HEAD (&priv->bricks);
+ ret = br_init_rate_limiter (priv);
+ if (ret)
+ goto cleanup_mutex;
+
this->private = priv;
if (!priv->iamscrubber) {
ret = br_init_signer (this, priv);
if (ret)
- goto cleanup_mutex;
+ goto cleanup_tbf;
}
ret = gf_thread_create (&priv->thread, NULL, br_handle_events, this);
@@ -1433,6 +1469,7 @@ init (xlator_t *this)
return 0;
}
+ cleanup_tbf:
cleanup_mutex:
(void) pthread_cond_destroy (&priv->cond);
(void) pthread_mutex_destroy (&priv->lock);