summaryrefslogtreecommitdiffstats
path: root/xlators/features/bit-rot/src/bitd/bit-rot.c
diff options
context:
space:
mode:
authorVenky Shankar <vshankar@redhat.com>2015-04-27 21:34:34 +0530
committerVijay Bellur <vbellur@redhat.com>2015-05-07 22:51:41 -0700
commit9ba8963999bca431ec14a25961a163810cfe1e5b (patch)
tree783f5a29b7cfc63331a88a1ec5d222a7a4c2d57e /xlators/features/bit-rot/src/bitd/bit-rot.c
parent4ccd70b323d4cb929b7b7a88e592fc98fab06198 (diff)
features/bitrot: Throttle filesystem scrubber
This patch introduces multithreaded filesystem scrubber based on throttling option configured for a particular volume. The implementation "logically" breaks scanning and scrubbing with the number of scrubber threads auto-configured depending upon the throttle configuration. Scanning (crawling) is left single threaded (per brick) with entries scrubbed in bulk. On reaching this "bulk" watermark, scanner waits until entries are scrubbed. Bricks for a particular volume have a set of thread(s) assigned for scrubbing, with entries for each brick scrubbed in a round robin fashion to avoid scrub "stalls" when a brick (out of N bricks) is under active scrubbing. This mechanism helps us implement "pause/resume" with ease: all one need to do is to cleanup scrubber threads and let the main scanner thread "wait" untill scrubbing is resumed (where the scrubber thread(s) are spawned again), therefore continuing where we left off (unless we restart the deamons, where crawl initiates from root directory again, but I guess that's OK). [ NOTE: Throttling is optional for the signer daemon, without which it runs full throttle. However, passing "-DBR_RATE_LIMIT_SIGNER" predefined in CFLAGS enables CPU throttling (during checksum calculation) thereby avoiding high CPU usage. ] Subsequent patches would introduce CPU throttling during hash calculation for scrubber. Change-Id: I5701dd6cd4dff27ca3144ac5e3798a2216b39d4f BUG: 1207020 Signed-off-by: Venky Shankar <vshankar@redhat.com> Reviewed-on: http://review.gluster.org/10511 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'xlators/features/bit-rot/src/bitd/bit-rot.c')
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot.c146
1 files changed, 117 insertions, 29 deletions
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c
index 880b16edfa8..eea81aec53a 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot.c
+++ b/xlators/features/bit-rot/src/bitd/bit-rot.c
@@ -29,15 +29,6 @@
#define BR_HASH_CALC_READ_SIZE (128 * 1024)
-br_tbf_opspec_t opthrottle[] = {
- {
- .op = BR_TBF_OP_HASH,
- .rate = BR_HASH_CALC_READ_SIZE,
- .maxlimit = (2 * BR_WORKERS * BR_HASH_CALC_READ_SIZE),
- },
- /** TODO: throttle getdents(), read() request(s) */
-};
-
static int
br_find_child_index (xlator_t *this, xlator_t *child)
{
@@ -1066,6 +1057,7 @@ br_enact_signer (xlator_t *this, br_child_t *child, br_stub_init_t *stub)
child->threadrunning = 1;
/* it's OK to continue, "old" objects would be signed when modified */
+ list_del_init (&child->list);
return 0;
dealloc:
@@ -1078,14 +1070,45 @@ static inline int32_t
br_enact_scrubber (xlator_t *this, br_child_t *child)
{
int32_t ret = 0;
+ br_private_t *priv = NULL;
+ struct br_scanfs *fsscan = NULL;
+ struct br_scrubber *fsscrub = NULL;
+
+ priv = this->private;
+
+ fsscan = &child->fsscan;
+ fsscrub = &priv->fsscrub;
+
+ LOCK_INIT (&fsscan->entrylock);
+ pthread_mutex_init (&fsscan->waitlock, NULL);
+ pthread_cond_init (&fsscan->waitcond, NULL);
- ret = gf_thread_create (&child->thread, NULL, br_scrubber, child);
+ fsscan->entries = 0;
+ INIT_LIST_HEAD (&fsscan->queued);
+ INIT_LIST_HEAD (&fsscan->ready);
+
+ ret = gf_thread_create (&child->thread, NULL, br_fsscanner, child);
if (ret != 0) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR, "failed to spawn scrubber");
+ gf_log (this->name, GF_LOG_ALERT, "failed to spawn bitrot "
+ "scrubber daemon [Brick: %s]", child->brick_path);
+ goto error_return;
}
- return ret;
+ /**
+ * Everything has been setup.. add this subvolume to scrubbers
+ * list.
+ */
+ pthread_mutex_lock (&fsscrub->mutex);
+ {
+ list_move (&child->list, &fsscrub->scrublist);
+ pthread_cond_broadcast (&fsscrub->cond);
+ }
+ pthread_mutex_unlock (&fsscrub->mutex);
+
+ return 0;
+
+ error_return:
+ return -1;
}
/**
@@ -1202,8 +1225,7 @@ br_handle_events (void *arg)
"failed to connect to the "
"child (subvolume: %s)",
child->xl->name);
- else
- list_del_init (&child->list);
+
}
}
@@ -1379,16 +1401,72 @@ br_init_signer (xlator_t *this, br_private_t *priv)
return -1;
}
-int32_t
-br_init_rate_limiter (br_private_t *priv)
+/**
+ * For signer, only rate limit CPU usage (during hash calculation) when
+ * compiled with -DBR_RATE_LIMIT_SIGNER cflags, else let it run full
+ * throttle.
+ */
+static int32_t
+br_rate_limit_signer (xlator_t *this, int child_count, int numbricks)
{
- br_tbf_opspec_t *spec = opthrottle;
- priv->tbf = br_tbf_init (spec, sizeof (opthrottle)
- / sizeof (br_tbf_opspec_t));
+ br_private_t *priv = NULL;
+ br_tbf_opspec_t spec = {0,};
+
+ priv = this->private;
+
+ spec.op = BR_TBF_OP_HASH;
+ spec.rate = 0;
+ spec.maxlimit = 0;
+
+#ifdef BR_RATE_LIMIT_SIGNER
+
+ double contribution = 0;
+ contribution = ((double)1 - ((double)child_count / (double)numbricks));
+ if (contribution == 0)
+ contribution = 1;
+ spec.rate = BR_HASH_CALC_READ_SIZE * contribution;
+ spec.maxlimit = BR_WORKERS * BR_HASH_CALC_READ_SIZE;
+
+#endif
+
+ if (!spec.rate)
+ gf_log (this->name,
+ GF_LOG_INFO, "[Rate Limit Info] \"FULL THROTTLE\"");
+ else
+ gf_log (this->name, GF_LOG_INFO,
+ "[Rate Limit Info] \"tokens/sec (rate): %lu, "
+ "maxlimit: %lu\"", spec.rate, spec.maxlimit);
+ priv->tbf = br_tbf_init (&spec, 1);
return priv->tbf ? 0 : -1;
}
+static int32_t
+br_signer_init (xlator_t *this, br_private_t *priv)
+{
+ int32_t ret = 0;
+ int numbricks = 0;
+
+ GF_OPTION_INIT ("expiry-time", priv->expiry_time, int32, error_return);
+ GF_OPTION_INIT ("brick-count", numbricks, int32, error_return);
+
+ ret = br_rate_limit_signer (this, priv->child_count, numbricks);
+ if (ret)
+ goto error_return;
+
+ ret = br_init_signer (this, priv);
+ if (ret)
+ goto cleanup_tbf;
+
+ return 0;
+
+ cleanup_tbf:
+ /* cleanup TBF */
+ error_return:
+ return -1;
+
+}
+
int32_t
init (xlator_t *this)
{
@@ -1410,7 +1488,6 @@ init (xlator_t *this)
}
GF_OPTION_INIT ("scrubber", priv->iamscrubber, bool, out);
- GF_OPTION_INIT ("expiry-time", priv->expiry_time, int32, out);
priv->child_count = xlator_subvolume_count (this);
priv->children = GF_CALLOC (priv->child_count, sizeof (*priv->children),
@@ -1443,18 +1520,19 @@ init (xlator_t *this)
INIT_LIST_HEAD (&priv->children[i].list);
INIT_LIST_HEAD (&priv->bricks);
- ret = br_init_rate_limiter (priv);
- if (ret)
- goto cleanup_mutex;
-
this->private = priv;
if (!priv->iamscrubber) {
- ret = br_init_signer (this, priv);
- if (ret)
- goto cleanup_tbf;
+ ret = br_signer_init (this, priv);
+ } else {
+ ret = br_scrubber_init (this, priv);
+ if (!ret)
+ ret = br_scrubber_handle_options (this, priv, NULL);
}
+ if (ret)
+ goto cleanup_mutex;
+
ret = gf_thread_create (&priv->thread, NULL, br_handle_events, this);
if (ret != 0) {
gf_log (this->name, GF_LOG_ERROR,
@@ -1469,7 +1547,6 @@ init (xlator_t *this)
return 0;
}
- cleanup_tbf:
cleanup_mutex:
(void) pthread_cond_destroy (&priv->cond);
(void) pthread_mutex_destroy (&priv->lock);
@@ -1505,6 +1582,17 @@ fini (xlator_t *this)
return;
}
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ br_private_t *priv = this->private;
+
+ if (!priv->iamscrubber)
+ return 0;
+
+ return br_scrubber_handle_options (this, priv, options);
+}
+
struct xlator_fops fops;
struct xlator_cbks cbks;