summaryrefslogtreecommitdiffstats
path: root/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/features/bit-rot/src/bitd/bit-rot-scrub.c')
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-scrub.c565
1 files changed, 545 insertions, 20 deletions
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
index e0581a40df0..8a80052f250 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
@@ -13,17 +13,35 @@
#include "config.h"
#endif
+#include <math.h>
#include <ctype.h>
#include <sys/uio.h>
#include "glusterfs.h"
-#include "xlator.h"
#include "logging.h"
+#include "common-utils.h"
-#include "bit-rot.h"
#include "bit-rot-scrub.h"
#include <pthread.h>
+struct br_scrubbers {
+ pthread_t scrubthread;
+
+ struct list_head list;
+};
+
+struct br_fsscan_entry {
+ void *data;
+
+ loc_t parent;
+
+ gf_dirent_t *entry;
+
+ struct br_scanfs *fsscan; /* backpointer to subvolume scanner */
+
+ struct list_head list;
+};
+
/**
* fetch signature extended attribute from an object's fd.
* NOTE: On success @xattr is not unref'd as @sign points
@@ -246,8 +264,7 @@ bitd_compare_ckum (xlator_t *this,
* signs with SHA256).
*/
int
-bitd_start_scrub (xlator_t *subvol,
- gf_dirent_t *entry, loc_t *parent, void *data)
+br_scrubber_scrub_begin (xlator_t *this, struct br_fsscan_entry *fsentry)
{
int32_t ret = -1;
fd_t *fd = NULL;
@@ -256,17 +273,22 @@ bitd_start_scrub (xlator_t *subvol,
struct iatt parent_buf = {0, };
pid_t pid = 0;
br_child_t *child = NULL;
- xlator_t *this = NULL;
unsigned char *md = NULL;
inode_t *linked_inode = NULL;
br_isignature_out_t *sign = NULL;
unsigned long signedversion = 0;
+ gf_dirent_t *entry = NULL;
+ loc_t *parent = NULL;
- GF_VALIDATE_OR_GOTO ("bit-rot", subvol, out);
- GF_VALIDATE_OR_GOTO ("bit-rot", data, out);
+ GF_VALIDATE_OR_GOTO ("bit-rot", fsentry, out);
- child = data;
- this = child->this;
+ entry = fsentry->entry;
+ parent = &fsentry->parent;
+ child = fsentry->data;
+
+ GF_VALIDATE_OR_GOTO ("bit-rot", entry, out);
+ GF_VALIDATE_OR_GOTO ("bit-rot", parent, out);
+ GF_VALIDATE_OR_GOTO ("bit-rot", child, out);
pid = GF_CLIENT_PID_SCRUB;
@@ -366,29 +388,532 @@ bitd_start_scrub (xlator_t *subvol,
return ret;
}
-#define BR_SCRUB_THROTTLE_COUNT 30
-#define BR_SCRUB_THROTTLE_ZZZ 60
+static void
+wait_for_scrubbing (xlator_t *this, struct br_scanfs *fsscan)
+{
+ br_private_t *priv = NULL;
+ struct br_scrubber *fsscrub = NULL;
+
+ priv = this->private;
+ fsscrub = &priv->fsscrub;
+
+ pthread_mutex_lock (&fsscan->waitlock);
+ {
+ pthread_mutex_lock (&fsscrub->mutex);
+ {
+ list_replace_init (&fsscan->queued, &fsscan->ready);
+
+ /* wake up scrubbers */
+ pthread_cond_broadcast (&fsscrub->cond);
+ }
+ pthread_mutex_unlock (&fsscrub->mutex);
+
+ while (fsscan->entries != 0)
+ pthread_cond_wait
+ (&fsscan->waitcond, &fsscan->waitlock);
+ }
+ pthread_mutex_unlock (&fsscan->waitlock);
+}
+
+static inline void
+_br_fsscan_inc_entry_count (struct br_scanfs *fsscan)
+{
+ fsscan->entries++;
+}
+
+static inline void
+_br_fsscan_dec_entry_count (struct br_scanfs *fsscan)
+{
+ if (--fsscan->entries == 0) {
+ pthread_mutex_lock (&fsscan->waitlock);
+ {
+ pthread_cond_signal (&fsscan->waitcond);
+ }
+ pthread_mutex_unlock (&fsscan->waitlock);
+ }
+}
+
+static void
+_br_fsscan_collect_entry (struct br_scanfs *fsscan,
+ struct br_fsscan_entry *fsentry)
+{
+ list_add_tail (&fsentry->list, &fsscan->queued);
+ _br_fsscan_inc_entry_count (fsscan);
+}
+
+#define NR_ENTRIES (1<<7) /* ..bulk scrubbing */
+
+int
+br_fsscanner_handle_entry (xlator_t *subvol,
+ gf_dirent_t *entry, loc_t *parent, void *data)
+{
+ int32_t ret = -1;
+ int scrub = 0;
+ br_child_t *child = NULL;
+ xlator_t *this = NULL;
+ struct br_scanfs *fsscan = NULL;
+ struct br_fsscan_entry *fsentry = NULL;
+
+ GF_VALIDATE_OR_GOTO ("bit-rot", subvol, error_return);
+ GF_VALIDATE_OR_GOTO ("bit-rot", data, error_return);
+
+ child = data;
+ this = child->this;
+ fsscan = &child->fsscan;
+
+ fsentry = GF_CALLOC (1, sizeof (*fsentry), gf_br_mt_br_fsscan_entry_t);
+ if (!fsentry)
+ goto error_return;
+
+ {
+ fsentry->data = data;
+ fsentry->fsscan = &child->fsscan;
+
+ /* copy parent loc */
+ ret = loc_copy (&fsentry->parent, parent);
+ if (ret)
+ goto dealloc;
+
+ /* copy child entry */
+ fsentry->entry = entry_copy (entry);
+ if (!fsentry->entry)
+ goto locwipe;
+
+ INIT_LIST_HEAD (&fsentry->list);
+ }
+
+ LOCK (&fsscan->entrylock);
+ {
+ _br_fsscan_collect_entry (fsscan, fsentry);
+
+ /**
+ * need not be a equality check as entries may be pushed
+ * back onto the scanned queue when thread(s) are cleaned.
+ */
+ if (fsscan->entries >= NR_ENTRIES)
+ scrub = 1;
+ }
+ UNLOCK (&fsscan->entrylock);
+
+ if (scrub)
+ wait_for_scrubbing (this, fsscan);
+
+ return 0;
+
+ locwipe:
+ loc_wipe (&fsentry->parent);
+ dealloc:
+ GF_FREE (fsentry);
+ error_return:
+ return -1;
+}
+
void *
-br_scrubber (void *arg)
+br_fsscanner (void *arg)
{
- loc_t loc = {0,};
- xlator_t *this = NULL;
- br_child_t *child = NULL;
+ loc_t loc = {0,};
+ xlator_t *this = NULL;
+ br_child_t *child = NULL;
+ struct br_scanfs *fsscan = NULL;
child = arg;
this = child->this;
+ fsscan = &child->fsscan;
THIS = this;
loc.inode = child->table->root;
while (1) {
- (void) syncop_ftw_throttle
- (child->xl, &loc,
- GF_CLIENT_PID_SCRUB, child, bitd_start_scrub,
- BR_SCRUB_THROTTLE_COUNT, BR_SCRUB_THROTTLE_ZZZ);
+ (void) syncop_ftw (child->xl, &loc,
+ GF_CLIENT_PID_SCRUB,
+ child, br_fsscanner_handle_entry);
+ if (!list_empty (&fsscan->queued))
+ wait_for_scrubbing (this, fsscan);
+ }
+
+ return NULL;
+}
+
+#define BR_SCRUB_THREAD_SCALE_LAZY 0
+#define BR_SCRUB_THREAD_SCALE_NORMAL 0.4
+#define BR_SCRUB_THREAD_SCALE_AGGRESSIVE 1.0
+
+#ifndef M_E
+#define M_E 2.718
+#endif
+
+/**
+ * This is just a simple exponential scale to a fixed value selected
+ * per throttle config. We probably need to be more smart and select
+ * the scale based on the number of processor cores too.
+ */
+static unsigned int
+br_scrubber_calc_scale (xlator_t *this,
+ br_private_t *priv, scrub_throttle_t throttle)
+{
+ unsigned int scale = 0;
+
+ switch (throttle) {
+ case BR_SCRUB_THROTTLE_VOID:
+ scale = 0;
+ break;
+ case BR_SCRUB_THROTTLE_LAZY:
+ scale = priv->child_count *
+ pow (M_E, BR_SCRUB_THREAD_SCALE_LAZY);
+ break;
+ case BR_SCRUB_THROTTLE_NORMAL:
+ scale = priv->child_count *
+ pow (M_E, BR_SCRUB_THREAD_SCALE_NORMAL);
+ break;
+ case BR_SCRUB_THROTTLE_AGGRESSIVE:
+ scale = priv->child_count *
+ pow (M_E, BR_SCRUB_THREAD_SCALE_AGGRESSIVE);
+ break;
+ default:
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unknown throttle %d", throttle);
+ }
+
+ return scale;
+
+}
+
+static void
+br_scrubber_cleanup_handler (void *arg)
+{
+ struct br_scrubber *fsscrub = arg;
+ pthread_mutex_unlock (&fsscrub->mutex);
+}
+
+static inline br_child_t *
+_br_scrubber_get_next_child (struct br_scrubber *fsscrub)
+{
+ br_child_t *child = NULL;
+
+ child = list_first_entry (&fsscrub->scrublist, br_child_t, list);
+ list_rotate_left (&fsscrub->scrublist);
+
+ return child;
+}
+
+static inline void
+_br_scrubber_get_entry (br_child_t *child, struct br_fsscan_entry **fsentry)
+{
+ struct br_scanfs *fsscan = &child->fsscan;
+
+ if (list_empty (&fsscan->ready))
+ return;
+ *fsentry = list_first_entry
+ (&fsscan->ready, struct br_fsscan_entry, list);
+ list_del_init (&(*fsentry)->list);
+}
+
+static inline void
+_br_scrubber_find_scrubbable_entry (struct br_scrubber *fsscrub,
+ struct br_fsscan_entry **fsentry)
+{
+ br_child_t *child = NULL;
+ br_child_t *firstchild = NULL;
+
+ while (1) {
+ if (list_empty (&fsscrub->scrublist))
+ pthread_cond_wait (&fsscrub->cond, &fsscrub->mutex);
+
+ firstchild = NULL;
+ for (child = _br_scrubber_get_next_child (fsscrub);
+ child != firstchild;
+ child = _br_scrubber_get_next_child (fsscrub)) {
+
+ if (!firstchild)
+ firstchild = child;
+
+ _br_scrubber_get_entry (child, fsentry);
+ if (*fsentry)
+ break;
+ }
+
+ if (*fsentry)
+ break;
+
+ /* nothing to work on.. wait till available */
+ pthread_cond_wait (&fsscrub->cond, &fsscrub->mutex);
+ }
+}
+
+static void
+br_scrubber_pick_entry (struct br_scrubber *fsscrub,
+ struct br_fsscan_entry **fsentry)
+{
+ pthread_cleanup_push (br_scrubber_cleanup_handler, fsscrub);
+
+ pthread_mutex_lock (&fsscrub->mutex);
+ {
+ *fsentry = NULL;
+ _br_scrubber_find_scrubbable_entry (fsscrub, fsentry);
+ }
+ pthread_mutex_unlock (&fsscrub->mutex);
+
+ pthread_cleanup_pop (0);
+}
- sleep (BR_SCRUB_THROTTLE_ZZZ);
+struct br_scrub_entry {
+ gf_boolean_t scrubbed;
+ struct br_fsscan_entry *fsentry;
+};
+
+/**
+ * We need to be a bit careful here. These thread(s) are prone to cancellations
+ * when threads are scaled down (depending on the thottling value configured)
+ * and pausing scrub. A thread can get cancelled while it's waiting for entries
+ * in the ->pending queue or when an object is undergoing scrubbing.
+ */
+static void
+br_scrubber_entry_handle (void *arg)
+{
+ struct br_scanfs *fsscan = NULL;
+ struct br_scrub_entry *sentry = NULL;
+ struct br_fsscan_entry *fsentry = NULL;
+
+ sentry = arg;
+
+ fsentry = sentry->fsentry;
+ fsscan = fsentry->fsscan;
+
+ LOCK (&fsscan->entrylock);
+ {
+ if (sentry->scrubbed) {
+ _br_fsscan_dec_entry_count (fsscan);
+
+ /* cleanup ->entry */
+ fsentry->data = NULL;
+ fsentry->fsscan = NULL;
+ loc_wipe (&fsentry->parent);
+ gf_dirent_entry_free (fsentry->entry);
+
+ GF_FREE (sentry->fsentry);
+ } else {
+ /* (re)queue the entry again for scrub */
+ _br_fsscan_collect_entry (fsscan, sentry->fsentry);
+ }
+ }
+ UNLOCK (&fsscan->entrylock);
+}
+
+static void
+br_scrubber_scrub_entry (xlator_t *this, struct br_fsscan_entry *fsentry)
+{
+ struct br_scrub_entry sentry = {0, };
+
+ sentry.scrubbed = 0;
+ sentry.fsentry = fsentry;
+
+ pthread_cleanup_push (br_scrubber_entry_handle, &sentry);
+ {
+ (void) br_scrubber_scrub_begin (this, fsentry);
+ sentry.scrubbed = 1;
+ }
+ pthread_cleanup_pop (1);
+}
+
+void *br_scrubber_proc (void *arg)
+{
+ xlator_t *this = NULL;
+ struct br_scrubber *fsscrub = NULL;
+ struct br_fsscan_entry *fsentry = NULL;
+
+ fsscrub = arg;
+ THIS = this = fsscrub->this;
+
+ while (1) {
+ br_scrubber_pick_entry (fsscrub, &fsentry);
+ br_scrubber_scrub_entry (this, fsentry);
+ sleep (1);
}
return NULL;
}
+
+static int32_t
+br_scrubber_scale_up (xlator_t *this,
+ struct br_scrubber *fsscrub,
+ unsigned int v1, unsigned int v2)
+{
+ int i = 0;
+ int32_t ret = -1;
+ int diff = 0;
+ struct br_scrubbers *scrub = NULL;
+
+ diff = (int)(v2 - v1);
+
+ gf_log (this->name, GF_LOG_INFO,
+ "Scaling up scrubbers [%d => %d]", v1, v2);
+
+ for (i = 0; i < diff; i++) {
+ scrub = GF_CALLOC (diff, sizeof (*scrub),
+ gf_br_mt_br_scrubber_t);
+ if (!scrub)
+ break;
+
+ INIT_LIST_HEAD (&scrub->list);
+ ret = gf_thread_create (&scrub->scrubthread,
+ NULL, br_scrubber_proc, fsscrub);
+ if (ret)
+ break;
+
+ fsscrub->nr_scrubbers++;
+ list_add_tail (&scrub->list, &fsscrub->scrubbers);
+ }
+
+ if ((i != diff) && !scrub)
+ goto error_return;
+
+ if (i != diff) /* degraded scaling.. */
+ gf_log (this->name, GF_LOG_WARNING,
+ "Could not fully scale up to %d scrubber(s). Spawned "
+ "%d/%d [total scrubber(s): %d]", v2, i, diff, (v1 + i));
+
+ return 0;
+
+ error_return:
+ return -1;
+}
+
+static int32_t
+br_scrubber_scale_down (xlator_t *this,
+ struct br_scrubber *fsscrub,
+ unsigned int v1, unsigned int v2)
+{
+ int i = 0;
+ int diff = 0;
+ int32_t ret = -1;
+ struct br_scrubbers *scrub = NULL;
+
+ diff = (int)(v1 - v2);
+
+ gf_log (this->name, GF_LOG_INFO,
+ "Scaling down scrubbers [%d => %d]", v1, v2);
+
+ for (i = 0 ; i < diff; i++) {
+ scrub = list_first_entry
+ (&fsscrub->scrubbers, struct br_scrubbers, list);
+
+ list_del_init (&scrub->list);
+ ret = gf_thread_cleanup_xint (scrub->scrubthread);
+ if (ret)
+ break;
+ GF_FREE (scrub);
+
+ fsscrub->nr_scrubbers--;
+ }
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Could not fully scale down to %d scrubber(s). "
+ "Terminated %d/%d [total scrubber(s): %d]",
+ v1, i, diff, (v2 - i));
+ ret = 0;
+ }
+
+ return ret;
+}
+
+static int32_t
+br_scrubber_configure (xlator_t *this, br_private_t *priv,
+ struct br_scrubber *fsscrub, scrub_throttle_t nthrottle)
+{
+ int32_t ret = 0;
+ unsigned int v1 = 0;
+ unsigned int v2 = 0;
+
+ v1 = fsscrub->nr_scrubbers;
+ v2 = br_scrubber_calc_scale (this, priv, nthrottle);
+
+ if (v1 == v2)
+ return 0;
+
+ if (v1 > v2)
+ ret = br_scrubber_scale_down (this, fsscrub, v1, v2);
+ else
+ ret = br_scrubber_scale_up (this, fsscrub, v1, v2);
+
+ return ret;
+}
+
+/* TODO: token buket spec */
+static int32_t
+br_scrubber_handle_throttle (xlator_t *this,
+ br_private_t *priv, dict_t *options)
+{
+ int32_t ret = 0;
+ char *tmp = NULL;
+ struct br_scrubber *fsscrub = NULL;
+ scrub_throttle_t nthrottle = BR_SCRUB_THROTTLE_VOID;
+
+ fsscrub = &priv->fsscrub;
+
+ if (options)
+ GF_OPTION_RECONF ("scrub-throttle",
+ tmp, options, str, error_return);
+ else
+ GF_OPTION_INIT ("scrub-throttle", tmp, str, error_return);
+
+ if (strcasecmp (tmp, "lazy") == 0)
+ nthrottle = BR_SCRUB_THROTTLE_LAZY;
+ else if (strcasecmp (tmp, "normal") == 0)
+ nthrottle = BR_SCRUB_THROTTLE_NORMAL;
+ else if (strcasecmp (tmp, "aggressive") == 0)
+ nthrottle = BR_SCRUB_THROTTLE_AGGRESSIVE;
+ else
+ goto error_return;
+
+ /* on failure old throttling value is preserved */
+ ret = br_scrubber_configure (this, priv, fsscrub, nthrottle);
+ if (ret)
+ goto error_return;
+
+ fsscrub->throttle = nthrottle;
+ return 0;
+
+ error_return:
+ return -1;
+}
+
+/* TODO: pause/resume, frequency */
+int32_t
+br_scrubber_handle_options (xlator_t *this, br_private_t *priv, dict_t *options)
+{
+ int32_t ret = 0;
+
+ ret = br_scrubber_handle_throttle (this, priv, options);
+ if (ret)
+ goto error_return;
+
+ return 0;
+
+ error_return:
+ return -1;
+}
+
+int32_t
+br_scrubber_init (xlator_t *this, br_private_t *priv)
+{
+ struct br_scrubber *fsscrub = NULL;
+
+ priv->tbf = br_tbf_init (NULL, 0);
+ if (!priv->tbf)
+ return -1;
+
+ fsscrub = &priv->fsscrub;
+
+ fsscrub->this = this;
+ fsscrub->throttle = BR_SCRUB_THROTTLE_VOID;
+
+ pthread_mutex_init (&fsscrub->mutex, NULL);
+ pthread_cond_init (&fsscrub->cond, NULL);
+
+ fsscrub->nr_scrubbers = 0;
+ INIT_LIST_HEAD (&fsscrub->scrubbers);
+ INIT_LIST_HEAD (&fsscrub->scrublist);
+
+ return 0;
+}