diff options
Diffstat (limited to 'xlators/features/bit-rot/src/bitd/bit-rot-scrub.c')
| -rw-r--r-- | xlators/features/bit-rot/src/bitd/bit-rot-scrub.c | 565 | 
1 files changed, 545 insertions, 20 deletions
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c index e0581a40df0..8a80052f250 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c +++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c @@ -13,17 +13,35 @@  #include "config.h"  #endif +#include <math.h>  #include <ctype.h>  #include <sys/uio.h>  #include "glusterfs.h" -#include "xlator.h"  #include "logging.h" +#include "common-utils.h" -#include "bit-rot.h"  #include "bit-rot-scrub.h"  #include <pthread.h> +struct br_scrubbers { +        pthread_t scrubthread; + +        struct list_head list; +}; + +struct br_fsscan_entry { +        void *data; + +        loc_t parent; + +        gf_dirent_t *entry; + +        struct br_scanfs *fsscan;  /* backpointer to subvolume scanner */ + +        struct list_head list; +}; +  /**   * fetch signature extended attribute from an object's fd.   * NOTE: On success @xattr is not unref'd as @sign points @@ -246,8 +264,7 @@ bitd_compare_ckum (xlator_t *this,   * signs with SHA256).   */  int -bitd_start_scrub (xlator_t *subvol, -                  gf_dirent_t *entry, loc_t *parent, void *data) +br_scrubber_scrub_begin (xlator_t *this, struct br_fsscan_entry *fsentry)  {          int32_t              ret           = -1;          fd_t                *fd            = NULL; @@ -256,17 +273,22 @@ bitd_start_scrub (xlator_t *subvol,          struct iatt          parent_buf    = {0, };          pid_t                pid           = 0;          br_child_t          *child         = NULL; -        xlator_t            *this          = NULL;          unsigned char       *md            = NULL;          inode_t             *linked_inode  = NULL;          br_isignature_out_t *sign          = NULL;          unsigned long        signedversion = 0; +        gf_dirent_t         *entry         = NULL; +        loc_t               *parent        = NULL; -        GF_VALIDATE_OR_GOTO ("bit-rot", subvol, out); -        GF_VALIDATE_OR_GOTO ("bit-rot", data, out); +        GF_VALIDATE_OR_GOTO ("bit-rot", fsentry, out); -        child = data; -        this = child->this; +        entry = fsentry->entry; +        parent = &fsentry->parent; +        child = fsentry->data; + +        GF_VALIDATE_OR_GOTO ("bit-rot", entry, out); +        GF_VALIDATE_OR_GOTO ("bit-rot", parent, out); +        GF_VALIDATE_OR_GOTO ("bit-rot", child, out);          pid = GF_CLIENT_PID_SCRUB; @@ -366,29 +388,532 @@ bitd_start_scrub (xlator_t *subvol,          return ret;  } -#define BR_SCRUB_THROTTLE_COUNT 30 -#define BR_SCRUB_THROTTLE_ZZZ   60 +static void +wait_for_scrubbing (xlator_t *this, struct br_scanfs *fsscan) +{ +        br_private_t *priv = NULL; +        struct br_scrubber *fsscrub = NULL; + +        priv = this->private; +        fsscrub = &priv->fsscrub; + +        pthread_mutex_lock (&fsscan->waitlock); +        { +                pthread_mutex_lock (&fsscrub->mutex); +                { +                        list_replace_init (&fsscan->queued, &fsscan->ready); + +                        /* wake up scrubbers */ +                        pthread_cond_broadcast (&fsscrub->cond); +                } +                pthread_mutex_unlock (&fsscrub->mutex); + +                while (fsscan->entries != 0) +                        pthread_cond_wait +                                    (&fsscan->waitcond, &fsscan->waitlock); +        } +        pthread_mutex_unlock (&fsscan->waitlock); +} + +static inline void +_br_fsscan_inc_entry_count (struct br_scanfs *fsscan) +{ +        fsscan->entries++; +} + +static inline void +_br_fsscan_dec_entry_count (struct br_scanfs *fsscan) +{ +        if (--fsscan->entries == 0) { +                pthread_mutex_lock (&fsscan->waitlock); +                { +                        pthread_cond_signal (&fsscan->waitcond); +                } +                pthread_mutex_unlock (&fsscan->waitlock); +        } +} + +static void +_br_fsscan_collect_entry (struct br_scanfs *fsscan, +                           struct br_fsscan_entry *fsentry) +{ +        list_add_tail (&fsentry->list, &fsscan->queued); +        _br_fsscan_inc_entry_count (fsscan); +} + +#define NR_ENTRIES (1<<7) /* ..bulk scrubbing */ + +int +br_fsscanner_handle_entry (xlator_t *subvol, +                           gf_dirent_t *entry, loc_t *parent, void *data) +{ +        int32_t                 ret     = -1; +        int                     scrub   = 0; +        br_child_t             *child   = NULL; +        xlator_t               *this    = NULL; +        struct br_scanfs       *fsscan  = NULL; +        struct br_fsscan_entry *fsentry = NULL; + +        GF_VALIDATE_OR_GOTO ("bit-rot", subvol, error_return); +        GF_VALIDATE_OR_GOTO ("bit-rot", data, error_return); + +        child = data; +        this = child->this; +        fsscan = &child->fsscan; + +        fsentry = GF_CALLOC (1, sizeof (*fsentry), gf_br_mt_br_fsscan_entry_t); +        if (!fsentry) +                goto error_return; + +        { +                fsentry->data = data; +                fsentry->fsscan = &child->fsscan; + +                /* copy parent loc */ +                ret = loc_copy (&fsentry->parent, parent); +                if (ret) +                        goto dealloc; + +                /* copy child entry */ +                fsentry->entry = entry_copy (entry); +                if (!fsentry->entry) +                        goto locwipe; + +                INIT_LIST_HEAD (&fsentry->list); +        } + +        LOCK (&fsscan->entrylock); +        { +                _br_fsscan_collect_entry (fsscan, fsentry); + +                /** +                 * need not be a equality check as entries may be pushed +                 * back onto the scanned queue when thread(s) are cleaned. +                 */ +                if (fsscan->entries >= NR_ENTRIES) +                        scrub = 1; +        } +        UNLOCK (&fsscan->entrylock); + +        if (scrub) +                wait_for_scrubbing (this, fsscan); + +        return 0; + + locwipe: +        loc_wipe (&fsentry->parent); + dealloc: +        GF_FREE (fsentry); + error_return: +        return -1; +} +  void * -br_scrubber (void *arg) +br_fsscanner (void *arg)  { -        loc_t       loc   = {0,}; -        xlator_t   *this  = NULL; -        br_child_t *child = NULL; +        loc_t             loc    = {0,}; +        xlator_t         *this   = NULL; +        br_child_t       *child  = NULL; +        struct br_scanfs *fsscan = NULL;          child = arg;          this = child->this; +        fsscan = &child->fsscan;          THIS = this;          loc.inode = child->table->root;          while (1) { -                (void) syncop_ftw_throttle -                           (child->xl, &loc, -                            GF_CLIENT_PID_SCRUB, child, bitd_start_scrub, -                            BR_SCRUB_THROTTLE_COUNT, BR_SCRUB_THROTTLE_ZZZ); +                (void) syncop_ftw (child->xl, &loc, +                                   GF_CLIENT_PID_SCRUB, +                                   child, br_fsscanner_handle_entry); +                if (!list_empty (&fsscan->queued)) +                        wait_for_scrubbing (this, fsscan); +        } + +        return NULL; +} + +#define BR_SCRUB_THREAD_SCALE_LAZY       0 +#define BR_SCRUB_THREAD_SCALE_NORMAL     0.4 +#define BR_SCRUB_THREAD_SCALE_AGGRESSIVE 1.0 + +#ifndef M_E +#define M_E 2.718 +#endif + +/** + * This is just a simple exponential scale to a fixed value selected + * per throttle config. We probably need to be more smart and select + * the scale based on the number of processor cores too. + */ +static unsigned int +br_scrubber_calc_scale (xlator_t *this, +                        br_private_t *priv, scrub_throttle_t throttle) +{ +        unsigned int scale = 0; + +        switch (throttle) { +        case BR_SCRUB_THROTTLE_VOID: +                scale = 0; +                break; +        case BR_SCRUB_THROTTLE_LAZY: +                scale = priv->child_count * +                              pow (M_E, BR_SCRUB_THREAD_SCALE_LAZY); +                break; +        case BR_SCRUB_THROTTLE_NORMAL: +                scale = priv->child_count * +                              pow (M_E, BR_SCRUB_THREAD_SCALE_NORMAL); +                break; +        case BR_SCRUB_THROTTLE_AGGRESSIVE: +                scale = priv->child_count * +                              pow (M_E, BR_SCRUB_THREAD_SCALE_AGGRESSIVE); +                break; +        default: +                gf_log (this->name, GF_LOG_ERROR, +                        "Unknown throttle %d", throttle); +        } + +        return scale; + +} + +static void +br_scrubber_cleanup_handler (void *arg) +{ +        struct br_scrubber *fsscrub = arg; +        pthread_mutex_unlock (&fsscrub->mutex); +} + +static inline br_child_t * +_br_scrubber_get_next_child (struct br_scrubber *fsscrub) +{ +        br_child_t *child = NULL; + +        child = list_first_entry (&fsscrub->scrublist, br_child_t, list); +        list_rotate_left (&fsscrub->scrublist); + +        return child; +} + +static inline void +_br_scrubber_get_entry (br_child_t *child, struct br_fsscan_entry **fsentry) +{ +        struct br_scanfs *fsscan = &child->fsscan; + +        if (list_empty (&fsscan->ready)) +                return; +        *fsentry = list_first_entry +                            (&fsscan->ready, struct br_fsscan_entry, list); +        list_del_init (&(*fsentry)->list); +} + +static inline void +_br_scrubber_find_scrubbable_entry (struct br_scrubber *fsscrub, +                                     struct br_fsscan_entry **fsentry) +{ +        br_child_t *child = NULL; +        br_child_t *firstchild = NULL; + +        while (1) { +                if (list_empty (&fsscrub->scrublist)) +                        pthread_cond_wait (&fsscrub->cond, &fsscrub->mutex); + +                firstchild = NULL; +                for (child = _br_scrubber_get_next_child (fsscrub); +                     child != firstchild; +                     child = _br_scrubber_get_next_child (fsscrub)) { + +                        if (!firstchild) +                                firstchild = child; + +                        _br_scrubber_get_entry (child, fsentry); +                        if (*fsentry) +                                break; +                } + +                if (*fsentry) +                        break; + +                /* nothing to work on.. wait till available */ +                pthread_cond_wait (&fsscrub->cond, &fsscrub->mutex); +        } +} + +static void +br_scrubber_pick_entry (struct br_scrubber *fsscrub, +                        struct br_fsscan_entry **fsentry) +{ +        pthread_cleanup_push (br_scrubber_cleanup_handler, fsscrub); + +        pthread_mutex_lock (&fsscrub->mutex); +        { +                *fsentry = NULL; +                _br_scrubber_find_scrubbable_entry (fsscrub, fsentry); +        } +        pthread_mutex_unlock (&fsscrub->mutex); + +        pthread_cleanup_pop (0); +} -                sleep (BR_SCRUB_THROTTLE_ZZZ); +struct br_scrub_entry { +        gf_boolean_t scrubbed; +        struct br_fsscan_entry *fsentry; +}; + +/** + * We need to be a bit careful here. These thread(s) are prone to cancellations + * when threads are scaled down (depending on the thottling value configured) + * and pausing scrub. A thread can get cancelled while it's waiting for entries + * in the ->pending queue or when an object is undergoing scrubbing. + */ +static void +br_scrubber_entry_handle (void *arg) +{ +        struct br_scanfs       *fsscan  = NULL; +        struct br_scrub_entry  *sentry  = NULL; +        struct br_fsscan_entry *fsentry = NULL; + +        sentry = arg; + +        fsentry = sentry->fsentry; +        fsscan  = fsentry->fsscan; + +        LOCK (&fsscan->entrylock); +        { +                if (sentry->scrubbed) { +                        _br_fsscan_dec_entry_count (fsscan); + +                        /* cleanup ->entry */ +                        fsentry->data   = NULL; +                        fsentry->fsscan = NULL; +                        loc_wipe (&fsentry->parent); +                        gf_dirent_entry_free (fsentry->entry); + +                        GF_FREE (sentry->fsentry); +                } else { +                        /* (re)queue the entry again for scrub */ +                        _br_fsscan_collect_entry (fsscan, sentry->fsentry); +                } +        } +        UNLOCK (&fsscan->entrylock); +} + +static void +br_scrubber_scrub_entry (xlator_t *this, struct br_fsscan_entry *fsentry) +{ +        struct br_scrub_entry sentry = {0, }; + +        sentry.scrubbed = 0; +        sentry.fsentry = fsentry; + +        pthread_cleanup_push (br_scrubber_entry_handle, &sentry); +        { +                (void) br_scrubber_scrub_begin (this, fsentry); +                sentry.scrubbed = 1; +        } +        pthread_cleanup_pop (1); +} + +void *br_scrubber_proc (void *arg) +{ +        xlator_t *this = NULL; +        struct br_scrubber *fsscrub = NULL; +        struct br_fsscan_entry *fsentry = NULL; + +        fsscrub = arg; +        THIS = this = fsscrub->this; + +        while (1) { +                br_scrubber_pick_entry (fsscrub, &fsentry); +                br_scrubber_scrub_entry (this, fsentry); +                sleep (1);          }          return NULL;  } + +static int32_t +br_scrubber_scale_up (xlator_t *this, +                      struct br_scrubber *fsscrub, +                      unsigned int v1, unsigned int v2) +{ +        int i = 0; +        int32_t ret = -1; +        int diff = 0; +        struct br_scrubbers *scrub = NULL; + +        diff = (int)(v2 - v1); + +        gf_log (this->name, GF_LOG_INFO, +                "Scaling up scrubbers [%d => %d]", v1, v2); + +        for (i = 0; i < diff; i++) { +                scrub = GF_CALLOC (diff, sizeof (*scrub), +                                   gf_br_mt_br_scrubber_t); +                if (!scrub) +                        break; + +                INIT_LIST_HEAD (&scrub->list); +                ret = gf_thread_create (&scrub->scrubthread, +                                        NULL, br_scrubber_proc, fsscrub); +                if (ret) +                        break; + +                fsscrub->nr_scrubbers++; +                list_add_tail (&scrub->list, &fsscrub->scrubbers); +        } + +        if ((i != diff) && !scrub) +                goto error_return; + +        if (i != diff) /* degraded scaling.. */ +                gf_log (this->name, GF_LOG_WARNING, +                        "Could not fully scale up to %d scrubber(s). Spawned " +                        "%d/%d [total scrubber(s): %d]", v2, i, diff, (v1 + i)); + +        return 0; + + error_return: +        return -1; +} + +static int32_t +br_scrubber_scale_down (xlator_t *this, +                        struct br_scrubber *fsscrub, +                        unsigned int v1, unsigned int v2) +{ +        int i = 0; +        int diff = 0; +        int32_t ret = -1; +        struct br_scrubbers *scrub = NULL; + +        diff = (int)(v1 - v2); + +        gf_log (this->name, GF_LOG_INFO, +                "Scaling down scrubbers [%d => %d]", v1, v2); + +        for (i = 0 ; i < diff; i++) { +                scrub = list_first_entry +                            (&fsscrub->scrubbers, struct br_scrubbers, list); + +                list_del_init (&scrub->list); +                ret = gf_thread_cleanup_xint (scrub->scrubthread); +                if (ret) +                        break; +                GF_FREE (scrub); + +                fsscrub->nr_scrubbers--; +        } + +        if (ret) { +                gf_log (this->name, GF_LOG_WARNING, +                        "Could not fully scale down to %d scrubber(s). " +                        "Terminated %d/%d [total scrubber(s): %d]", +                        v1, i, diff, (v2 - i)); +                ret = 0; +        } + +        return ret; +} + +static int32_t +br_scrubber_configure (xlator_t *this, br_private_t *priv, +                       struct br_scrubber *fsscrub, scrub_throttle_t nthrottle) +{ +        int32_t ret = 0; +        unsigned int v1 = 0; +        unsigned int v2 = 0; + +        v1 = fsscrub->nr_scrubbers; +        v2 = br_scrubber_calc_scale (this, priv, nthrottle); + +        if (v1 == v2) +                return 0; + +        if (v1 > v2) +                ret = br_scrubber_scale_down (this, fsscrub, v1, v2); +        else +                ret = br_scrubber_scale_up (this, fsscrub, v1, v2); + +        return ret; +} + +/* TODO: token buket spec */ +static int32_t +br_scrubber_handle_throttle (xlator_t *this, +                             br_private_t *priv, dict_t *options) +{ +        int32_t ret = 0; +        char *tmp = NULL; +        struct br_scrubber *fsscrub = NULL; +        scrub_throttle_t nthrottle = BR_SCRUB_THROTTLE_VOID; + +        fsscrub = &priv->fsscrub; + +        if (options) +                GF_OPTION_RECONF ("scrub-throttle", +                                  tmp, options, str, error_return); +        else +                GF_OPTION_INIT ("scrub-throttle", tmp, str, error_return); + +        if (strcasecmp (tmp, "lazy") == 0) +                nthrottle = BR_SCRUB_THROTTLE_LAZY; +        else if (strcasecmp (tmp, "normal") == 0) +                nthrottle = BR_SCRUB_THROTTLE_NORMAL; +        else if (strcasecmp (tmp, "aggressive") == 0) +                nthrottle = BR_SCRUB_THROTTLE_AGGRESSIVE; +        else +                goto error_return; + +        /* on failure old throttling value is preserved */ +        ret = br_scrubber_configure (this, priv, fsscrub, nthrottle); +        if (ret) +                goto error_return; + +        fsscrub->throttle = nthrottle; +        return 0; + + error_return: +        return -1; +} + +/* TODO: pause/resume, frequency */ +int32_t +br_scrubber_handle_options (xlator_t *this, br_private_t *priv, dict_t *options) +{ +        int32_t ret = 0; + +        ret = br_scrubber_handle_throttle (this, priv, options); +        if (ret) +                goto error_return; + +        return 0; + + error_return: +        return -1; +} + +int32_t +br_scrubber_init (xlator_t *this, br_private_t *priv) +{ +        struct br_scrubber *fsscrub = NULL; + +        priv->tbf = br_tbf_init (NULL, 0); +        if (!priv->tbf) +                return -1; + +        fsscrub = &priv->fsscrub; + +        fsscrub->this = this; +        fsscrub->throttle = BR_SCRUB_THROTTLE_VOID; + +        pthread_mutex_init (&fsscrub->mutex, NULL); +        pthread_cond_init (&fsscrub->cond, NULL); + +        fsscrub->nr_scrubbers = 0; +        INIT_LIST_HEAD (&fsscrub->scrubbers); +        INIT_LIST_HEAD (&fsscrub->scrublist); + +        return 0; +}  | 
