summaryrefslogtreecommitdiffstats
path: root/xlators/features/bit-rot/src/bitd/bit-rot.c
diff options
context:
space:
mode:
authorVenky Shankar <vshankar@redhat.com>2015-04-27 21:34:34 +0530
committerNiels de Vos <ndevos@redhat.com>2015-05-10 05:29:31 -0700
commit32865f8650057123a5fcf590c96a1ae3f6d22608 (patch)
tree601c33044633bfc1abe0a46103727bfc350702cf /xlators/features/bit-rot/src/bitd/bit-rot.c
parent37bb956ee3d181314d487dfdabd9a1fd8b5f9d9c (diff)
features/bitrot: Throttle filesystem scrubber
This patch introduces multithreaded filesystem scrubber based on throttling option configured for a particular volume. The implementation "logically" breaks scanning and scrubbing with the number of scrubber threads auto-configured depending upon the throttle configuration. Scanning (crawling) is left single threaded (per brick) with entries scrubbed in bulk. On reaching this "bulk" watermark, scanner waits until entries are scrubbed. Bricks for a particular volume have a set of thread(s) assigned for scrubbing, with entries for each brick scrubbed in a round robin fashion to avoid scrub "stalls" when a brick (out of N bricks) is under active scrubbing. This mechanism helps us implement "pause/resume" with ease: all one need to do is to cleanup scrubber threads and let the main scanner thread "wait" untill scrubbing is resumed (where the scrubber thread(s) are spawned again), therefore continuing where we left off (unless we restart the deamons, where crawl initiates from root directory again, but I guess that's OK). [ NOTE: Throttling is optional for the signer daemon, without which it runs full throttle. However, passing "-DBR_RATE_LIMIT_SIGNER" predefined in CFLAGS enables CPU throttling (during checksum calculation) thereby avoiding high CPU usage. ] Subsequent patches would introduce CPU throttling during hash calculation for scrubber. > Change-Id: I5701dd6cd4dff27ca3144ac5e3798a2216b39d4f > BUG: 1207020 > Signed-off-by: Venky Shankar <vshankar@redhat.com> > Reviewed-on: http://review.gluster.org/10511 > Tested-by: Gluster Build System <jenkins@build.gluster.com> > Reviewed-by: Vijay Bellur <vbellur@redhat.com> Change-Id: I5a125b2d0ac7dafd3e278b7fe4c6c9dd07af76dd Signed-off-by: Venky Shankar <vshankar@redhat.com> BUG: 1220041 Reviewed-on: http://review.gluster.org/10720 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Gaurav Kumar Garg <ggarg@redhat.com>
Diffstat (limited to 'xlators/features/bit-rot/src/bitd/bit-rot.c')
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot.c146
1 files changed, 117 insertions, 29 deletions
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c
index 880b16edfa8..eea81aec53a 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot.c
+++ b/xlators/features/bit-rot/src/bitd/bit-rot.c
@@ -29,15 +29,6 @@
#define BR_HASH_CALC_READ_SIZE (128 * 1024)
-br_tbf_opspec_t opthrottle[] = {
- {
- .op = BR_TBF_OP_HASH,
- .rate = BR_HASH_CALC_READ_SIZE,
- .maxlimit = (2 * BR_WORKERS * BR_HASH_CALC_READ_SIZE),
- },
- /** TODO: throttle getdents(), read() request(s) */
-};
-
static int
br_find_child_index (xlator_t *this, xlator_t *child)
{
@@ -1066,6 +1057,7 @@ br_enact_signer (xlator_t *this, br_child_t *child, br_stub_init_t *stub)
child->threadrunning = 1;
/* it's OK to continue, "old" objects would be signed when modified */
+ list_del_init (&child->list);
return 0;
dealloc:
@@ -1078,14 +1070,45 @@ static inline int32_t
br_enact_scrubber (xlator_t *this, br_child_t *child)
{
int32_t ret = 0;
+ br_private_t *priv = NULL;
+ struct br_scanfs *fsscan = NULL;
+ struct br_scrubber *fsscrub = NULL;
+
+ priv = this->private;
+
+ fsscan = &child->fsscan;
+ fsscrub = &priv->fsscrub;
+
+ LOCK_INIT (&fsscan->entrylock);
+ pthread_mutex_init (&fsscan->waitlock, NULL);
+ pthread_cond_init (&fsscan->waitcond, NULL);
- ret = gf_thread_create (&child->thread, NULL, br_scrubber, child);
+ fsscan->entries = 0;
+ INIT_LIST_HEAD (&fsscan->queued);
+ INIT_LIST_HEAD (&fsscan->ready);
+
+ ret = gf_thread_create (&child->thread, NULL, br_fsscanner, child);
if (ret != 0) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR, "failed to spawn scrubber");
+ gf_log (this->name, GF_LOG_ALERT, "failed to spawn bitrot "
+ "scrubber daemon [Brick: %s]", child->brick_path);
+ goto error_return;
}
- return ret;
+ /**
+ * Everything has been setup.. add this subvolume to scrubbers
+ * list.
+ */
+ pthread_mutex_lock (&fsscrub->mutex);
+ {
+ list_move (&child->list, &fsscrub->scrublist);
+ pthread_cond_broadcast (&fsscrub->cond);
+ }
+ pthread_mutex_unlock (&fsscrub->mutex);
+
+ return 0;
+
+ error_return:
+ return -1;
}
/**
@@ -1202,8 +1225,7 @@ br_handle_events (void *arg)
"failed to connect to the "
"child (subvolume: %s)",
child->xl->name);
- else
- list_del_init (&child->list);
+
}
}
@@ -1379,16 +1401,72 @@ br_init_signer (xlator_t *this, br_private_t *priv)
return -1;
}
-int32_t
-br_init_rate_limiter (br_private_t *priv)
+/**
+ * For signer, only rate limit CPU usage (during hash calculation) when
+ * compiled with -DBR_RATE_LIMIT_SIGNER cflags, else let it run full
+ * throttle.
+ */
+static int32_t
+br_rate_limit_signer (xlator_t *this, int child_count, int numbricks)
{
- br_tbf_opspec_t *spec = opthrottle;
- priv->tbf = br_tbf_init (spec, sizeof (opthrottle)
- / sizeof (br_tbf_opspec_t));
+ br_private_t *priv = NULL;
+ br_tbf_opspec_t spec = {0,};
+
+ priv = this->private;
+
+ spec.op = BR_TBF_OP_HASH;
+ spec.rate = 0;
+ spec.maxlimit = 0;
+
+#ifdef BR_RATE_LIMIT_SIGNER
+
+ double contribution = 0;
+ contribution = ((double)1 - ((double)child_count / (double)numbricks));
+ if (contribution == 0)
+ contribution = 1;
+ spec.rate = BR_HASH_CALC_READ_SIZE * contribution;
+ spec.maxlimit = BR_WORKERS * BR_HASH_CALC_READ_SIZE;
+
+#endif
+
+ if (!spec.rate)
+ gf_log (this->name,
+ GF_LOG_INFO, "[Rate Limit Info] \"FULL THROTTLE\"");
+ else
+ gf_log (this->name, GF_LOG_INFO,
+ "[Rate Limit Info] \"tokens/sec (rate): %lu, "
+ "maxlimit: %lu\"", spec.rate, spec.maxlimit);
+ priv->tbf = br_tbf_init (&spec, 1);
return priv->tbf ? 0 : -1;
}
+static int32_t
+br_signer_init (xlator_t *this, br_private_t *priv)
+{
+ int32_t ret = 0;
+ int numbricks = 0;
+
+ GF_OPTION_INIT ("expiry-time", priv->expiry_time, int32, error_return);
+ GF_OPTION_INIT ("brick-count", numbricks, int32, error_return);
+
+ ret = br_rate_limit_signer (this, priv->child_count, numbricks);
+ if (ret)
+ goto error_return;
+
+ ret = br_init_signer (this, priv);
+ if (ret)
+ goto cleanup_tbf;
+
+ return 0;
+
+ cleanup_tbf:
+ /* cleanup TBF */
+ error_return:
+ return -1;
+
+}
+
int32_t
init (xlator_t *this)
{
@@ -1410,7 +1488,6 @@ init (xlator_t *this)
}
GF_OPTION_INIT ("scrubber", priv->iamscrubber, bool, out);
- GF_OPTION_INIT ("expiry-time", priv->expiry_time, int32, out);
priv->child_count = xlator_subvolume_count (this);
priv->children = GF_CALLOC (priv->child_count, sizeof (*priv->children),
@@ -1443,18 +1520,19 @@ init (xlator_t *this)
INIT_LIST_HEAD (&priv->children[i].list);
INIT_LIST_HEAD (&priv->bricks);
- ret = br_init_rate_limiter (priv);
- if (ret)
- goto cleanup_mutex;
-
this->private = priv;
if (!priv->iamscrubber) {
- ret = br_init_signer (this, priv);
- if (ret)
- goto cleanup_tbf;
+ ret = br_signer_init (this, priv);
+ } else {
+ ret = br_scrubber_init (this, priv);
+ if (!ret)
+ ret = br_scrubber_handle_options (this, priv, NULL);
}
+ if (ret)
+ goto cleanup_mutex;
+
ret = gf_thread_create (&priv->thread, NULL, br_handle_events, this);
if (ret != 0) {
gf_log (this->name, GF_LOG_ERROR,
@@ -1469,7 +1547,6 @@ init (xlator_t *this)
return 0;
}
- cleanup_tbf:
cleanup_mutex:
(void) pthread_cond_destroy (&priv->cond);
(void) pthread_mutex_destroy (&priv->lock);
@@ -1505,6 +1582,17 @@ fini (xlator_t *this)
return;
}
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ br_private_t *priv = this->private;
+
+ if (!priv->iamscrubber)
+ return 0;
+
+ return br_scrubber_handle_options (this, priv, options);
+}
+
struct xlator_fops fops;
struct xlator_cbks cbks;