summaryrefslogtreecommitdiffstats
path: root/xlators
diff options
context:
space:
mode:
authorVenky Shankar <vshankar@redhat.com>2015-04-27 21:34:34 +0530
committerNiels de Vos <ndevos@redhat.com>2015-05-10 05:29:31 -0700
commit32865f8650057123a5fcf590c96a1ae3f6d22608 (patch)
tree601c33044633bfc1abe0a46103727bfc350702cf /xlators
parent37bb956ee3d181314d487dfdabd9a1fd8b5f9d9c (diff)
features/bitrot: Throttle filesystem scrubber
This patch introduces multithreaded filesystem scrubber based on throttling option configured for a particular volume. The implementation "logically" breaks scanning and scrubbing with the number of scrubber threads auto-configured depending upon the throttle configuration. Scanning (crawling) is left single threaded (per brick) with entries scrubbed in bulk. On reaching this "bulk" watermark, scanner waits until entries are scrubbed. Bricks for a particular volume have a set of thread(s) assigned for scrubbing, with entries for each brick scrubbed in a round robin fashion to avoid scrub "stalls" when a brick (out of N bricks) is under active scrubbing. This mechanism helps us implement "pause/resume" with ease: all one need to do is to cleanup scrubber threads and let the main scanner thread "wait" untill scrubbing is resumed (where the scrubber thread(s) are spawned again), therefore continuing where we left off (unless we restart the deamons, where crawl initiates from root directory again, but I guess that's OK). [ NOTE: Throttling is optional for the signer daemon, without which it runs full throttle. However, passing "-DBR_RATE_LIMIT_SIGNER" predefined in CFLAGS enables CPU throttling (during checksum calculation) thereby avoiding high CPU usage. ] Subsequent patches would introduce CPU throttling during hash calculation for scrubber. > Change-Id: I5701dd6cd4dff27ca3144ac5e3798a2216b39d4f > BUG: 1207020 > Signed-off-by: Venky Shankar <vshankar@redhat.com> > Reviewed-on: http://review.gluster.org/10511 > Tested-by: Gluster Build System <jenkins@build.gluster.com> > Reviewed-by: Vijay Bellur <vbellur@redhat.com> Change-Id: I5a125b2d0ac7dafd3e278b7fe4c6c9dd07af76dd Signed-off-by: Venky Shankar <vshankar@redhat.com> BUG: 1220041 Reviewed-on: http://review.gluster.org/10720 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Gaurav Kumar Garg <ggarg@redhat.com>
Diffstat (limited to 'xlators')
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-scrub.c565
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-scrub.h9
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot.c146
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot.h40
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h3
5 files changed, 712 insertions, 51 deletions
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
index e0581a40df0..8a80052f250 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
@@ -13,17 +13,35 @@
#include "config.h"
#endif
+#include <math.h>
#include <ctype.h>
#include <sys/uio.h>
#include "glusterfs.h"
-#include "xlator.h"
#include "logging.h"
+#include "common-utils.h"
-#include "bit-rot.h"
#include "bit-rot-scrub.h"
#include <pthread.h>
+struct br_scrubbers {
+ pthread_t scrubthread;
+
+ struct list_head list;
+};
+
+struct br_fsscan_entry {
+ void *data;
+
+ loc_t parent;
+
+ gf_dirent_t *entry;
+
+ struct br_scanfs *fsscan; /* backpointer to subvolume scanner */
+
+ struct list_head list;
+};
+
/**
* fetch signature extended attribute from an object's fd.
* NOTE: On success @xattr is not unref'd as @sign points
@@ -246,8 +264,7 @@ bitd_compare_ckum (xlator_t *this,
* signs with SHA256).
*/
int
-bitd_start_scrub (xlator_t *subvol,
- gf_dirent_t *entry, loc_t *parent, void *data)
+br_scrubber_scrub_begin (xlator_t *this, struct br_fsscan_entry *fsentry)
{
int32_t ret = -1;
fd_t *fd = NULL;
@@ -256,17 +273,22 @@ bitd_start_scrub (xlator_t *subvol,
struct iatt parent_buf = {0, };
pid_t pid = 0;
br_child_t *child = NULL;
- xlator_t *this = NULL;
unsigned char *md = NULL;
inode_t *linked_inode = NULL;
br_isignature_out_t *sign = NULL;
unsigned long signedversion = 0;
+ gf_dirent_t *entry = NULL;
+ loc_t *parent = NULL;
- GF_VALIDATE_OR_GOTO ("bit-rot", subvol, out);
- GF_VALIDATE_OR_GOTO ("bit-rot", data, out);
+ GF_VALIDATE_OR_GOTO ("bit-rot", fsentry, out);
- child = data;
- this = child->this;
+ entry = fsentry->entry;
+ parent = &fsentry->parent;
+ child = fsentry->data;
+
+ GF_VALIDATE_OR_GOTO ("bit-rot", entry, out);
+ GF_VALIDATE_OR_GOTO ("bit-rot", parent, out);
+ GF_VALIDATE_OR_GOTO ("bit-rot", child, out);
pid = GF_CLIENT_PID_SCRUB;
@@ -366,29 +388,532 @@ bitd_start_scrub (xlator_t *subvol,
return ret;
}
-#define BR_SCRUB_THROTTLE_COUNT 30
-#define BR_SCRUB_THROTTLE_ZZZ 60
+static void
+wait_for_scrubbing (xlator_t *this, struct br_scanfs *fsscan)
+{
+ br_private_t *priv = NULL;
+ struct br_scrubber *fsscrub = NULL;
+
+ priv = this->private;
+ fsscrub = &priv->fsscrub;
+
+ pthread_mutex_lock (&fsscan->waitlock);
+ {
+ pthread_mutex_lock (&fsscrub->mutex);
+ {
+ list_replace_init (&fsscan->queued, &fsscan->ready);
+
+ /* wake up scrubbers */
+ pthread_cond_broadcast (&fsscrub->cond);
+ }
+ pthread_mutex_unlock (&fsscrub->mutex);
+
+ while (fsscan->entries != 0)
+ pthread_cond_wait
+ (&fsscan->waitcond, &fsscan->waitlock);
+ }
+ pthread_mutex_unlock (&fsscan->waitlock);
+}
+
+static inline void
+_br_fsscan_inc_entry_count (struct br_scanfs *fsscan)
+{
+ fsscan->entries++;
+}
+
+static inline void
+_br_fsscan_dec_entry_count (struct br_scanfs *fsscan)
+{
+ if (--fsscan->entries == 0) {
+ pthread_mutex_lock (&fsscan->waitlock);
+ {
+ pthread_cond_signal (&fsscan->waitcond);
+ }
+ pthread_mutex_unlock (&fsscan->waitlock);
+ }
+}
+
+static void
+_br_fsscan_collect_entry (struct br_scanfs *fsscan,
+ struct br_fsscan_entry *fsentry)
+{
+ list_add_tail (&fsentry->list, &fsscan->queued);
+ _br_fsscan_inc_entry_count (fsscan);
+}
+
+#define NR_ENTRIES (1<<7) /* ..bulk scrubbing */
+
+int
+br_fsscanner_handle_entry (xlator_t *subvol,
+ gf_dirent_t *entry, loc_t *parent, void *data)
+{
+ int32_t ret = -1;
+ int scrub = 0;
+ br_child_t *child = NULL;
+ xlator_t *this = NULL;
+ struct br_scanfs *fsscan = NULL;
+ struct br_fsscan_entry *fsentry = NULL;
+
+ GF_VALIDATE_OR_GOTO ("bit-rot", subvol, error_return);
+ GF_VALIDATE_OR_GOTO ("bit-rot", data, error_return);
+
+ child = data;
+ this = child->this;
+ fsscan = &child->fsscan;
+
+ fsentry = GF_CALLOC (1, sizeof (*fsentry), gf_br_mt_br_fsscan_entry_t);
+ if (!fsentry)
+ goto error_return;
+
+ {
+ fsentry->data = data;
+ fsentry->fsscan = &child->fsscan;
+
+ /* copy parent loc */
+ ret = loc_copy (&fsentry->parent, parent);
+ if (ret)
+ goto dealloc;
+
+ /* copy child entry */
+ fsentry->entry = entry_copy (entry);
+ if (!fsentry->entry)
+ goto locwipe;
+
+ INIT_LIST_HEAD (&fsentry->list);
+ }
+
+ LOCK (&fsscan->entrylock);
+ {
+ _br_fsscan_collect_entry (fsscan, fsentry);
+
+ /**
+ * need not be a equality check as entries may be pushed
+ * back onto the scanned queue when thread(s) are cleaned.
+ */
+ if (fsscan->entries >= NR_ENTRIES)
+ scrub = 1;
+ }
+ UNLOCK (&fsscan->entrylock);
+
+ if (scrub)
+ wait_for_scrubbing (this, fsscan);
+
+ return 0;
+
+ locwipe:
+ loc_wipe (&fsentry->parent);
+ dealloc:
+ GF_FREE (fsentry);
+ error_return:
+ return -1;
+}
+
void *
-br_scrubber (void *arg)
+br_fsscanner (void *arg)
{
- loc_t loc = {0,};
- xlator_t *this = NULL;
- br_child_t *child = NULL;
+ loc_t loc = {0,};
+ xlator_t *this = NULL;
+ br_child_t *child = NULL;
+ struct br_scanfs *fsscan = NULL;
child = arg;
this = child->this;
+ fsscan = &child->fsscan;
THIS = this;
loc.inode = child->table->root;
while (1) {
- (void) syncop_ftw_throttle
- (child->xl, &loc,
- GF_CLIENT_PID_SCRUB, child, bitd_start_scrub,
- BR_SCRUB_THROTTLE_COUNT, BR_SCRUB_THROTTLE_ZZZ);
+ (void) syncop_ftw (child->xl, &loc,
+ GF_CLIENT_PID_SCRUB,
+ child, br_fsscanner_handle_entry);
+ if (!list_empty (&fsscan->queued))
+ wait_for_scrubbing (this, fsscan);
+ }
+
+ return NULL;
+}
+
+#define BR_SCRUB_THREAD_SCALE_LAZY 0
+#define BR_SCRUB_THREAD_SCALE_NORMAL 0.4
+#define BR_SCRUB_THREAD_SCALE_AGGRESSIVE 1.0
+
+#ifndef M_E
+#define M_E 2.718
+#endif
+
+/**
+ * This is just a simple exponential scale to a fixed value selected
+ * per throttle config. We probably need to be more smart and select
+ * the scale based on the number of processor cores too.
+ */
+static unsigned int
+br_scrubber_calc_scale (xlator_t *this,
+ br_private_t *priv, scrub_throttle_t throttle)
+{
+ unsigned int scale = 0;
+
+ switch (throttle) {
+ case BR_SCRUB_THROTTLE_VOID:
+ scale = 0;
+ break;
+ case BR_SCRUB_THROTTLE_LAZY:
+ scale = priv->child_count *
+ pow (M_E, BR_SCRUB_THREAD_SCALE_LAZY);
+ break;
+ case BR_SCRUB_THROTTLE_NORMAL:
+ scale = priv->child_count *
+ pow (M_E, BR_SCRUB_THREAD_SCALE_NORMAL);
+ break;
+ case BR_SCRUB_THROTTLE_AGGRESSIVE:
+ scale = priv->child_count *
+ pow (M_E, BR_SCRUB_THREAD_SCALE_AGGRESSIVE);
+ break;
+ default:
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unknown throttle %d", throttle);
+ }
+
+ return scale;
+
+}
+
+static void
+br_scrubber_cleanup_handler (void *arg)
+{
+ struct br_scrubber *fsscrub = arg;
+ pthread_mutex_unlock (&fsscrub->mutex);
+}
+
+static inline br_child_t *
+_br_scrubber_get_next_child (struct br_scrubber *fsscrub)
+{
+ br_child_t *child = NULL;
+
+ child = list_first_entry (&fsscrub->scrublist, br_child_t, list);
+ list_rotate_left (&fsscrub->scrublist);
+
+ return child;
+}
+
+static inline void
+_br_scrubber_get_entry (br_child_t *child, struct br_fsscan_entry **fsentry)
+{
+ struct br_scanfs *fsscan = &child->fsscan;
+
+ if (list_empty (&fsscan->ready))
+ return;
+ *fsentry = list_first_entry
+ (&fsscan->ready, struct br_fsscan_entry, list);
+ list_del_init (&(*fsentry)->list);
+}
+
+static inline void
+_br_scrubber_find_scrubbable_entry (struct br_scrubber *fsscrub,
+ struct br_fsscan_entry **fsentry)
+{
+ br_child_t *child = NULL;
+ br_child_t *firstchild = NULL;
+
+ while (1) {
+ if (list_empty (&fsscrub->scrublist))
+ pthread_cond_wait (&fsscrub->cond, &fsscrub->mutex);
+
+ firstchild = NULL;
+ for (child = _br_scrubber_get_next_child (fsscrub);
+ child != firstchild;
+ child = _br_scrubber_get_next_child (fsscrub)) {
+
+ if (!firstchild)
+ firstchild = child;
+
+ _br_scrubber_get_entry (child, fsentry);
+ if (*fsentry)
+ break;
+ }
+
+ if (*fsentry)
+ break;
+
+ /* nothing to work on.. wait till available */
+ pthread_cond_wait (&fsscrub->cond, &fsscrub->mutex);
+ }
+}
+
+static void
+br_scrubber_pick_entry (struct br_scrubber *fsscrub,
+ struct br_fsscan_entry **fsentry)
+{
+ pthread_cleanup_push (br_scrubber_cleanup_handler, fsscrub);
+
+ pthread_mutex_lock (&fsscrub->mutex);
+ {
+ *fsentry = NULL;
+ _br_scrubber_find_scrubbable_entry (fsscrub, fsentry);
+ }
+ pthread_mutex_unlock (&fsscrub->mutex);
+
+ pthread_cleanup_pop (0);
+}
- sleep (BR_SCRUB_THROTTLE_ZZZ);
+struct br_scrub_entry {
+ gf_boolean_t scrubbed;
+ struct br_fsscan_entry *fsentry;
+};
+
+/**
+ * We need to be a bit careful here. These thread(s) are prone to cancellations
+ * when threads are scaled down (depending on the thottling value configured)
+ * and pausing scrub. A thread can get cancelled while it's waiting for entries
+ * in the ->pending queue or when an object is undergoing scrubbing.
+ */
+static void
+br_scrubber_entry_handle (void *arg)
+{
+ struct br_scanfs *fsscan = NULL;
+ struct br_scrub_entry *sentry = NULL;
+ struct br_fsscan_entry *fsentry = NULL;
+
+ sentry = arg;
+
+ fsentry = sentry->fsentry;
+ fsscan = fsentry->fsscan;
+
+ LOCK (&fsscan->entrylock);
+ {
+ if (sentry->scrubbed) {
+ _br_fsscan_dec_entry_count (fsscan);
+
+ /* cleanup ->entry */
+ fsentry->data = NULL;
+ fsentry->fsscan = NULL;
+ loc_wipe (&fsentry->parent);
+ gf_dirent_entry_free (fsentry->entry);
+
+ GF_FREE (sentry->fsentry);
+ } else {
+ /* (re)queue the entry again for scrub */
+ _br_fsscan_collect_entry (fsscan, sentry->fsentry);
+ }
+ }
+ UNLOCK (&fsscan->entrylock);
+}
+
+static void
+br_scrubber_scrub_entry (xlator_t *this, struct br_fsscan_entry *fsentry)
+{
+ struct br_scrub_entry sentry = {0, };
+
+ sentry.scrubbed = 0;
+ sentry.fsentry = fsentry;
+
+ pthread_cleanup_push (br_scrubber_entry_handle, &sentry);
+ {
+ (void) br_scrubber_scrub_begin (this, fsentry);
+ sentry.scrubbed = 1;
+ }
+ pthread_cleanup_pop (1);
+}
+
+void *br_scrubber_proc (void *arg)
+{
+ xlator_t *this = NULL;
+ struct br_scrubber *fsscrub = NULL;
+ struct br_fsscan_entry *fsentry = NULL;
+
+ fsscrub = arg;
+ THIS = this = fsscrub->this;
+
+ while (1) {
+ br_scrubber_pick_entry (fsscrub, &fsentry);
+ br_scrubber_scrub_entry (this, fsentry);
+ sleep (1);
}
return NULL;
}
+
+static int32_t
+br_scrubber_scale_up (xlator_t *this,
+ struct br_scrubber *fsscrub,
+ unsigned int v1, unsigned int v2)
+{
+ int i = 0;
+ int32_t ret = -1;
+ int diff = 0;
+ struct br_scrubbers *scrub = NULL;
+
+ diff = (int)(v2 - v1);
+
+ gf_log (this->name, GF_LOG_INFO,
+ "Scaling up scrubbers [%d => %d]", v1, v2);
+
+ for (i = 0; i < diff; i++) {
+ scrub = GF_CALLOC (diff, sizeof (*scrub),
+ gf_br_mt_br_scrubber_t);
+ if (!scrub)
+ break;
+
+ INIT_LIST_HEAD (&scrub->list);
+ ret = gf_thread_create (&scrub->scrubthread,
+ NULL, br_scrubber_proc, fsscrub);
+ if (ret)
+ break;
+
+ fsscrub->nr_scrubbers++;
+ list_add_tail (&scrub->list, &fsscrub->scrubbers);
+ }
+
+ if ((i != diff) && !scrub)
+ goto error_return;
+
+ if (i != diff) /* degraded scaling.. */
+ gf_log (this->name, GF_LOG_WARNING,
+ "Could not fully scale up to %d scrubber(s). Spawned "
+ "%d/%d [total scrubber(s): %d]", v2, i, diff, (v1 + i));
+
+ return 0;
+
+ error_return:
+ return -1;
+}
+
+static int32_t
+br_scrubber_scale_down (xlator_t *this,
+ struct br_scrubber *fsscrub,
+ unsigned int v1, unsigned int v2)
+{
+ int i = 0;
+ int diff = 0;
+ int32_t ret = -1;
+ struct br_scrubbers *scrub = NULL;
+
+ diff = (int)(v1 - v2);
+
+ gf_log (this->name, GF_LOG_INFO,
+ "Scaling down scrubbers [%d => %d]", v1, v2);
+
+ for (i = 0 ; i < diff; i++) {
+ scrub = list_first_entry
+ (&fsscrub->scrubbers, struct br_scrubbers, list);
+
+ list_del_init (&scrub->list);
+ ret = gf_thread_cleanup_xint (scrub->scrubthread);
+ if (ret)
+ break;
+ GF_FREE (scrub);
+
+ fsscrub->nr_scrubbers--;
+ }
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Could not fully scale down to %d scrubber(s). "
+ "Terminated %d/%d [total scrubber(s): %d]",
+ v1, i, diff, (v2 - i));
+ ret = 0;
+ }
+
+ return ret;
+}
+
+static int32_t
+br_scrubber_configure (xlator_t *this, br_private_t *priv,
+ struct br_scrubber *fsscrub, scrub_throttle_t nthrottle)
+{
+ int32_t ret = 0;
+ unsigned int v1 = 0;
+ unsigned int v2 = 0;
+
+ v1 = fsscrub->nr_scrubbers;
+ v2 = br_scrubber_calc_scale (this, priv, nthrottle);
+
+ if (v1 == v2)
+ return 0;
+
+ if (v1 > v2)
+ ret = br_scrubber_scale_down (this, fsscrub, v1, v2);
+ else
+ ret = br_scrubber_scale_up (this, fsscrub, v1, v2);
+
+ return ret;
+}
+
+/* TODO: token buket spec */
+static int32_t
+br_scrubber_handle_throttle (xlator_t *this,
+ br_private_t *priv, dict_t *options)
+{
+ int32_t ret = 0;
+ char *tmp = NULL;
+ struct br_scrubber *fsscrub = NULL;
+ scrub_throttle_t nthrottle = BR_SCRUB_THROTTLE_VOID;
+
+ fsscrub = &priv->fsscrub;
+
+ if (options)
+ GF_OPTION_RECONF ("scrub-throttle",
+ tmp, options, str, error_return);
+ else
+ GF_OPTION_INIT ("scrub-throttle", tmp, str, error_return);
+
+ if (strcasecmp (tmp, "lazy") == 0)
+ nthrottle = BR_SCRUB_THROTTLE_LAZY;
+ else if (strcasecmp (tmp, "normal") == 0)
+ nthrottle = BR_SCRUB_THROTTLE_NORMAL;
+ else if (strcasecmp (tmp, "aggressive") == 0)
+ nthrottle = BR_SCRUB_THROTTLE_AGGRESSIVE;
+ else
+ goto error_return;
+
+ /* on failure old throttling value is preserved */
+ ret = br_scrubber_configure (this, priv, fsscrub, nthrottle);
+ if (ret)
+ goto error_return;
+
+ fsscrub->throttle = nthrottle;
+ return 0;
+
+ error_return:
+ return -1;
+}
+
+/* TODO: pause/resume, frequency */
+int32_t
+br_scrubber_handle_options (xlator_t *this, br_private_t *priv, dict_t *options)
+{
+ int32_t ret = 0;
+
+ ret = br_scrubber_handle_throttle (this, priv, options);
+ if (ret)
+ goto error_return;
+
+ return 0;
+
+ error_return:
+ return -1;
+}
+
+int32_t
+br_scrubber_init (xlator_t *this, br_private_t *priv)
+{
+ struct br_scrubber *fsscrub = NULL;
+
+ priv->tbf = br_tbf_init (NULL, 0);
+ if (!priv->tbf)
+ return -1;
+
+ fsscrub = &priv->fsscrub;
+
+ fsscrub->this = this;
+ fsscrub->throttle = BR_SCRUB_THROTTLE_VOID;
+
+ pthread_mutex_init (&fsscrub->mutex, NULL);
+ pthread_cond_init (&fsscrub->cond, NULL);
+
+ fsscrub->nr_scrubbers = 0;
+ INIT_LIST_HEAD (&fsscrub->scrubbers);
+ INIT_LIST_HEAD (&fsscrub->scrublist);
+
+ return 0;
+}
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h
index daec9ad8196..4f00020d66a 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h
@@ -11,6 +11,13 @@
#ifndef __BIT_ROT__SCRUB_H__
#define __BIT_ROT_SCRUB_H__
-void *br_scrubber (void *);
+#include "xlator.h"
+#include "bit-rot.h"
+
+void *br_fsscanner (void *);
+
+int32_t br_scrubber_handle_options (xlator_t *, br_private_t *, dict_t *);
+
+int32_t br_scrubber_init (xlator_t *, br_private_t *);
#endif /* __BIT_ROT_SCRUB_H__ */
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c
index 880b16edfa8..eea81aec53a 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot.c
+++ b/xlators/features/bit-rot/src/bitd/bit-rot.c
@@ -29,15 +29,6 @@
#define BR_HASH_CALC_READ_SIZE (128 * 1024)
-br_tbf_opspec_t opthrottle[] = {
- {
- .op = BR_TBF_OP_HASH,
- .rate = BR_HASH_CALC_READ_SIZE,
- .maxlimit = (2 * BR_WORKERS * BR_HASH_CALC_READ_SIZE),
- },
- /** TODO: throttle getdents(), read() request(s) */
-};
-
static int
br_find_child_index (xlator_t *this, xlator_t *child)
{
@@ -1066,6 +1057,7 @@ br_enact_signer (xlator_t *this, br_child_t *child, br_stub_init_t *stub)
child->threadrunning = 1;
/* it's OK to continue, "old" objects would be signed when modified */
+ list_del_init (&child->list);
return 0;
dealloc:
@@ -1078,14 +1070,45 @@ static inline int32_t
br_enact_scrubber (xlator_t *this, br_child_t *child)
{
int32_t ret = 0;
+ br_private_t *priv = NULL;
+ struct br_scanfs *fsscan = NULL;
+ struct br_scrubber *fsscrub = NULL;
+
+ priv = this->private;
+
+ fsscan = &child->fsscan;
+ fsscrub = &priv->fsscrub;
+
+ LOCK_INIT (&fsscan->entrylock);
+ pthread_mutex_init (&fsscan->waitlock, NULL);
+ pthread_cond_init (&fsscan->waitcond, NULL);
- ret = gf_thread_create (&child->thread, NULL, br_scrubber, child);
+ fsscan->entries = 0;
+ INIT_LIST_HEAD (&fsscan->queued);
+ INIT_LIST_HEAD (&fsscan->ready);
+
+ ret = gf_thread_create (&child->thread, NULL, br_fsscanner, child);
if (ret != 0) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR, "failed to spawn scrubber");
+ gf_log (this->name, GF_LOG_ALERT, "failed to spawn bitrot "
+ "scrubber daemon [Brick: %s]", child->brick_path);
+ goto error_return;
}
- return ret;
+ /**
+ * Everything has been setup.. add this subvolume to scrubbers
+ * list.
+ */
+ pthread_mutex_lock (&fsscrub->mutex);
+ {
+ list_move (&child->list, &fsscrub->scrublist);
+ pthread_cond_broadcast (&fsscrub->cond);
+ }
+ pthread_mutex_unlock (&fsscrub->mutex);
+
+ return 0;
+
+ error_return:
+ return -1;
}
/**
@@ -1202,8 +1225,7 @@ br_handle_events (void *arg)
"failed to connect to the "
"child (subvolume: %s)",
child->xl->name);
- else
- list_del_init (&child->list);
+
}
}
@@ -1379,16 +1401,72 @@ br_init_signer (xlator_t *this, br_private_t *priv)
return -1;
}
-int32_t
-br_init_rate_limiter (br_private_t *priv)
+/**
+ * For signer, only rate limit CPU usage (during hash calculation) when
+ * compiled with -DBR_RATE_LIMIT_SIGNER cflags, else let it run full
+ * throttle.
+ */
+static int32_t
+br_rate_limit_signer (xlator_t *this, int child_count, int numbricks)
{
- br_tbf_opspec_t *spec = opthrottle;
- priv->tbf = br_tbf_init (spec, sizeof (opthrottle)
- / sizeof (br_tbf_opspec_t));
+ br_private_t *priv = NULL;
+ br_tbf_opspec_t spec = {0,};
+
+ priv = this->private;
+
+ spec.op = BR_TBF_OP_HASH;
+ spec.rate = 0;
+ spec.maxlimit = 0;
+
+#ifdef BR_RATE_LIMIT_SIGNER
+
+ double contribution = 0;
+ contribution = ((double)1 - ((double)child_count / (double)numbricks));
+ if (contribution == 0)
+ contribution = 1;
+ spec.rate = BR_HASH_CALC_READ_SIZE * contribution;
+ spec.maxlimit = BR_WORKERS * BR_HASH_CALC_READ_SIZE;
+
+#endif
+
+ if (!spec.rate)
+ gf_log (this->name,
+ GF_LOG_INFO, "[Rate Limit Info] \"FULL THROTTLE\"");
+ else
+ gf_log (this->name, GF_LOG_INFO,
+ "[Rate Limit Info] \"tokens/sec (rate): %lu, "
+ "maxlimit: %lu\"", spec.rate, spec.maxlimit);
+ priv->tbf = br_tbf_init (&spec, 1);
return priv->tbf ? 0 : -1;
}
+static int32_t
+br_signer_init (xlator_t *this, br_private_t *priv)
+{
+ int32_t ret = 0;
+ int numbricks = 0;
+
+ GF_OPTION_INIT ("expiry-time", priv->expiry_time, int32, error_return);
+ GF_OPTION_INIT ("brick-count", numbricks, int32, error_return);
+
+ ret = br_rate_limit_signer (this, priv->child_count, numbricks);
+ if (ret)
+ goto error_return;
+
+ ret = br_init_signer (this, priv);
+ if (ret)
+ goto cleanup_tbf;
+
+ return 0;
+
+ cleanup_tbf:
+ /* cleanup TBF */
+ error_return:
+ return -1;
+
+}
+
int32_t
init (xlator_t *this)
{
@@ -1410,7 +1488,6 @@ init (xlator_t *this)
}
GF_OPTION_INIT ("scrubber", priv->iamscrubber, bool, out);
- GF_OPTION_INIT ("expiry-time", priv->expiry_time, int32, out);
priv->child_count = xlator_subvolume_count (this);
priv->children = GF_CALLOC (priv->child_count, sizeof (*priv->children),
@@ -1443,18 +1520,19 @@ init (xlator_t *this)
INIT_LIST_HEAD (&priv->children[i].list);
INIT_LIST_HEAD (&priv->bricks);
- ret = br_init_rate_limiter (priv);
- if (ret)
- goto cleanup_mutex;
-
this->private = priv;
if (!priv->iamscrubber) {
- ret = br_init_signer (this, priv);
- if (ret)
- goto cleanup_tbf;
+ ret = br_signer_init (this, priv);
+ } else {
+ ret = br_scrubber_init (this, priv);
+ if (!ret)
+ ret = br_scrubber_handle_options (this, priv, NULL);
}
+ if (ret)
+ goto cleanup_mutex;
+
ret = gf_thread_create (&priv->thread, NULL, br_handle_events, this);
if (ret != 0) {
gf_log (this->name, GF_LOG_ERROR,
@@ -1469,7 +1547,6 @@ init (xlator_t *this)
return 0;
}
- cleanup_tbf:
cleanup_mutex:
(void) pthread_cond_destroy (&priv->cond);
(void) pthread_mutex_destroy (&priv->lock);
@@ -1505,6 +1582,17 @@ fini (xlator_t *this)
return;
}
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ br_private_t *priv = this->private;
+
+ if (!priv->iamscrubber)
+ return 0;
+
+ return br_scrubber_handle_options (this, priv, options);
+}
+
struct xlator_fops fops;
struct xlator_cbks cbks;
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.h b/xlators/features/bit-rot/src/bitd/bit-rot.h
index 5b641801916..6f21a6985ba 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot.h
+++ b/xlators/features/bit-rot/src/bitd/bit-rot.h
@@ -38,8 +38,26 @@
*/
#define BR_WORKERS 4
+typedef enum scrub_throttle {
+ BR_SCRUB_THROTTLE_VOID = -1,
+ BR_SCRUB_THROTTLE_LAZY = 0,
+ BR_SCRUB_THROTTLE_NORMAL = 1,
+ BR_SCRUB_THROTTLE_AGGRESSIVE = 2,
+} scrub_throttle_t;
+
#define signature_size(hl) (sizeof (br_isignature_t) + hl + 1)
+struct br_scanfs {
+ gf_lock_t entrylock;
+
+ pthread_mutex_t waitlock;
+ pthread_cond_t waitcond;
+
+ unsigned int entries;
+ struct list_head queued;
+ struct list_head ready;
+};
+
struct br_child {
char child_up; /* Indicates whether this child is
up or not */
@@ -53,12 +71,14 @@ struct br_child {
xlator_t *this; /* Bit rot xlator */
pthread_t thread; /* initial crawler for unsigned
- object(s) */
+ object(s) or scrub crawler */
int threadrunning; /* active thread */
struct mem_pool *timer_pool; /* timer-wheel's timer mem-pool */
struct timeval tv;
+
+ struct br_scanfs fsscan; /* per subvolume FS scanner */
};
typedef struct br_child br_child_t;
@@ -72,6 +92,23 @@ struct br_obj_n_workers {
signing each object */
};
+struct br_scrubber {
+ xlator_t *this;
+
+ scrub_throttle_t throttle;
+
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+
+ unsigned int nr_scrubbers;
+ struct list_head scrubbers;
+
+ /*
+ * list of "rotatable" subvolume(s) undergoing scrubbing
+ */
+ struct list_head scrublist;
+};
+
typedef struct br_obj_n_workers br_obj_n_workers_t;
struct br_private {
@@ -100,6 +137,7 @@ struct br_private {
br_tbf_t *tbf; /* token bucket filter */
gf_boolean_t iamscrubber; /* function as a fs scrubber */
+ struct br_scrubber fsscrub; /* scrubbers for this subvolume */
};
typedef struct br_private br_private_t;
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h
index bb4030493db..46271407219 100644
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h
@@ -25,6 +25,9 @@ enum br_mem_types {
gf_br_mt_br_tbf_t,
gf_br_mt_br_tbf_bucket_t,
gf_br_mt_br_tbf_throttle_t,
+ gf_br_mt_br_tbf_opspec_t,
+ gf_br_mt_br_scrubber_t,
+ gf_br_mt_br_fsscan_entry_t,
gf_br_stub_mt_end
};