diff options
| -rw-r--r-- | libglusterfs/src/gf-dirent.c | 39 | ||||
| -rw-r--r-- | libglusterfs/src/gf-dirent.h | 2 | ||||
| -rw-r--r-- | libglusterfs/src/list.h | 14 | ||||
| -rw-r--r-- | libglusterfs/src/syncop.c | 17 | ||||
| -rw-r--r-- | xlators/features/bit-rot/src/bitd/bit-rot-scrub.c | 565 | ||||
| -rw-r--r-- | xlators/features/bit-rot/src/bitd/bit-rot-scrub.h | 9 | ||||
| -rw-r--r-- | xlators/features/bit-rot/src/bitd/bit-rot.c | 146 | ||||
| -rw-r--r-- | xlators/features/bit-rot/src/bitd/bit-rot.h | 40 | ||||
| -rw-r--r-- | xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h | 3 | 
9 files changed, 760 insertions, 75 deletions
diff --git a/libglusterfs/src/gf-dirent.c b/libglusterfs/src/gf-dirent.c index b5f395afc36..99c0eb6441d 100644 --- a/libglusterfs/src/gf-dirent.c +++ b/libglusterfs/src/gf-dirent.c @@ -171,6 +171,20 @@ gf_dirent_for_name (const char *name)          return gf_dirent;  } +void +gf_dirent_entry_free (gf_dirent_t *entry) +{ +        if (!entry) +                return; + +        if (entry->dict) +                dict_unref (entry->dict); +        if (entry->inode) +                inode_unref (entry->inode); + +        list_del (&entry->list); +        GF_FREE (entry); +}  void  gf_dirent_free (gf_dirent_t *entries) @@ -185,16 +199,27 @@ gf_dirent_free (gf_dirent_t *entries)                  return;          list_for_each_entry_safe (entry, tmp, &entries->list, list) { -                if (entry->dict) -                        dict_unref (entry->dict); -                if (entry->inode) -                        inode_unref (entry->inode); - -                list_del (&entry->list); -                GF_FREE (entry); +                gf_dirent_entry_free (entry);          }  } +gf_dirent_t * +entry_copy (gf_dirent_t *source) +{ +        gf_dirent_t *sink = NULL; + +        sink = gf_dirent_for_name (source->d_name); + +        sink->d_off = source->d_off; +        sink->d_ino = source->d_ino; +        sink->d_type = source->d_type; +        sink->d_stat = source->d_stat; + +	if (source->inode) +		sink->inode = inode_ref (source->inode); +        return sink; +} +  void  gf_link_inode_from_dirent (xlator_t *this, inode_t *parent, gf_dirent_t *entry)  { diff --git a/libglusterfs/src/gf-dirent.h b/libglusterfs/src/gf-dirent.h index 07c605f82b0..faeaf411941 100644 --- a/libglusterfs/src/gf-dirent.h +++ b/libglusterfs/src/gf-dirent.h @@ -61,6 +61,8 @@ struct _gf_dirent_t {  #define DT_ISDIR(mode) (mode == DT_DIR)  gf_dirent_t *gf_dirent_for_name (const char *name); +gf_dirent_t *entry_copy (gf_dirent_t *source); +void gf_dirent_entry_free (gf_dirent_t *entry);  void gf_dirent_free (gf_dirent_t *entries);  int gf_link_inodes_from_dirent (xlator_t *this, inode_t *parent,                                  gf_dirent_t *entries); diff --git a/libglusterfs/src/list.h b/libglusterfs/src/list.h index 875594136a2..b8f9a6eebd8 100644 --- a/libglusterfs/src/list.h +++ b/libglusterfs/src/list.h @@ -214,6 +214,20 @@ static inline void list_replace_init(struct list_head *old,  	INIT_LIST_HEAD(old);  } +/** + * list_rotate_left - rotate the list to the left + * @head: the head of the list + */ +static inline void list_rotate_left (struct list_head *head) +{ +	struct list_head *first; + +	if (!list_empty (head)) { +		first = head->next; +		list_move_tail (first, head); +	} +} +  #define list_entry(ptr, type, member)					\  	((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) diff --git a/libglusterfs/src/syncop.c b/libglusterfs/src/syncop.c index 9224abaeeed..48daa3226d5 100644 --- a/libglusterfs/src/syncop.c +++ b/libglusterfs/src/syncop.c @@ -1217,23 +1217,6 @@ syncop_lookup (xlator_t *subvol, loc_t *loc, struct iatt *iatt,          return args.op_ret;  } -static gf_dirent_t * -entry_copy (gf_dirent_t *source) -{ -        gf_dirent_t *sink = NULL; - -        sink = gf_dirent_for_name (source->d_name); - -        sink->d_off = source->d_off; -        sink->d_ino = source->d_ino; -        sink->d_type = source->d_type; -        sink->d_stat = source->d_stat; - -	if (source->inode) -		sink->inode = inode_ref (source->inode); -        return sink; -} -  int32_t  syncop_readdirp_cbk (call_frame_t *frame,                       void *cookie, diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c index e0581a40df0..8a80052f250 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c +++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c @@ -13,17 +13,35 @@  #include "config.h"  #endif +#include <math.h>  #include <ctype.h>  #include <sys/uio.h>  #include "glusterfs.h" -#include "xlator.h"  #include "logging.h" +#include "common-utils.h" -#include "bit-rot.h"  #include "bit-rot-scrub.h"  #include <pthread.h> +struct br_scrubbers { +        pthread_t scrubthread; + +        struct list_head list; +}; + +struct br_fsscan_entry { +        void *data; + +        loc_t parent; + +        gf_dirent_t *entry; + +        struct br_scanfs *fsscan;  /* backpointer to subvolume scanner */ + +        struct list_head list; +}; +  /**   * fetch signature extended attribute from an object's fd.   * NOTE: On success @xattr is not unref'd as @sign points @@ -246,8 +264,7 @@ bitd_compare_ckum (xlator_t *this,   * signs with SHA256).   */  int -bitd_start_scrub (xlator_t *subvol, -                  gf_dirent_t *entry, loc_t *parent, void *data) +br_scrubber_scrub_begin (xlator_t *this, struct br_fsscan_entry *fsentry)  {          int32_t              ret           = -1;          fd_t                *fd            = NULL; @@ -256,17 +273,22 @@ bitd_start_scrub (xlator_t *subvol,          struct iatt          parent_buf    = {0, };          pid_t                pid           = 0;          br_child_t          *child         = NULL; -        xlator_t            *this          = NULL;          unsigned char       *md            = NULL;          inode_t             *linked_inode  = NULL;          br_isignature_out_t *sign          = NULL;          unsigned long        signedversion = 0; +        gf_dirent_t         *entry         = NULL; +        loc_t               *parent        = NULL; -        GF_VALIDATE_OR_GOTO ("bit-rot", subvol, out); -        GF_VALIDATE_OR_GOTO ("bit-rot", data, out); +        GF_VALIDATE_OR_GOTO ("bit-rot", fsentry, out); -        child = data; -        this = child->this; +        entry = fsentry->entry; +        parent = &fsentry->parent; +        child = fsentry->data; + +        GF_VALIDATE_OR_GOTO ("bit-rot", entry, out); +        GF_VALIDATE_OR_GOTO ("bit-rot", parent, out); +        GF_VALIDATE_OR_GOTO ("bit-rot", child, out);          pid = GF_CLIENT_PID_SCRUB; @@ -366,29 +388,532 @@ bitd_start_scrub (xlator_t *subvol,          return ret;  } -#define BR_SCRUB_THROTTLE_COUNT 30 -#define BR_SCRUB_THROTTLE_ZZZ   60 +static void +wait_for_scrubbing (xlator_t *this, struct br_scanfs *fsscan) +{ +        br_private_t *priv = NULL; +        struct br_scrubber *fsscrub = NULL; + +        priv = this->private; +        fsscrub = &priv->fsscrub; + +        pthread_mutex_lock (&fsscan->waitlock); +        { +                pthread_mutex_lock (&fsscrub->mutex); +                { +                        list_replace_init (&fsscan->queued, &fsscan->ready); + +                        /* wake up scrubbers */ +                        pthread_cond_broadcast (&fsscrub->cond); +                } +                pthread_mutex_unlock (&fsscrub->mutex); + +                while (fsscan->entries != 0) +                        pthread_cond_wait +                                    (&fsscan->waitcond, &fsscan->waitlock); +        } +        pthread_mutex_unlock (&fsscan->waitlock); +} + +static inline void +_br_fsscan_inc_entry_count (struct br_scanfs *fsscan) +{ +        fsscan->entries++; +} + +static inline void +_br_fsscan_dec_entry_count (struct br_scanfs *fsscan) +{ +        if (--fsscan->entries == 0) { +                pthread_mutex_lock (&fsscan->waitlock); +                { +                        pthread_cond_signal (&fsscan->waitcond); +                } +                pthread_mutex_unlock (&fsscan->waitlock); +        } +} + +static void +_br_fsscan_collect_entry (struct br_scanfs *fsscan, +                           struct br_fsscan_entry *fsentry) +{ +        list_add_tail (&fsentry->list, &fsscan->queued); +        _br_fsscan_inc_entry_count (fsscan); +} + +#define NR_ENTRIES (1<<7) /* ..bulk scrubbing */ + +int +br_fsscanner_handle_entry (xlator_t *subvol, +                           gf_dirent_t *entry, loc_t *parent, void *data) +{ +        int32_t                 ret     = -1; +        int                     scrub   = 0; +        br_child_t             *child   = NULL; +        xlator_t               *this    = NULL; +        struct br_scanfs       *fsscan  = NULL; +        struct br_fsscan_entry *fsentry = NULL; + +        GF_VALIDATE_OR_GOTO ("bit-rot", subvol, error_return); +        GF_VALIDATE_OR_GOTO ("bit-rot", data, error_return); + +        child = data; +        this = child->this; +        fsscan = &child->fsscan; + +        fsentry = GF_CALLOC (1, sizeof (*fsentry), gf_br_mt_br_fsscan_entry_t); +        if (!fsentry) +                goto error_return; + +        { +                fsentry->data = data; +                fsentry->fsscan = &child->fsscan; + +                /* copy parent loc */ +                ret = loc_copy (&fsentry->parent, parent); +                if (ret) +                        goto dealloc; + +                /* copy child entry */ +                fsentry->entry = entry_copy (entry); +                if (!fsentry->entry) +                        goto locwipe; + +                INIT_LIST_HEAD (&fsentry->list); +        } + +        LOCK (&fsscan->entrylock); +        { +                _br_fsscan_collect_entry (fsscan, fsentry); + +                /** +                 * need not be a equality check as entries may be pushed +                 * back onto the scanned queue when thread(s) are cleaned. +                 */ +                if (fsscan->entries >= NR_ENTRIES) +                        scrub = 1; +        } +        UNLOCK (&fsscan->entrylock); + +        if (scrub) +                wait_for_scrubbing (this, fsscan); + +        return 0; + + locwipe: +        loc_wipe (&fsentry->parent); + dealloc: +        GF_FREE (fsentry); + error_return: +        return -1; +} +  void * -br_scrubber (void *arg) +br_fsscanner (void *arg)  { -        loc_t       loc   = {0,}; -        xlator_t   *this  = NULL; -        br_child_t *child = NULL; +        loc_t             loc    = {0,}; +        xlator_t         *this   = NULL; +        br_child_t       *child  = NULL; +        struct br_scanfs *fsscan = NULL;          child = arg;          this = child->this; +        fsscan = &child->fsscan;          THIS = this;          loc.inode = child->table->root;          while (1) { -                (void) syncop_ftw_throttle -                           (child->xl, &loc, -                            GF_CLIENT_PID_SCRUB, child, bitd_start_scrub, -                            BR_SCRUB_THROTTLE_COUNT, BR_SCRUB_THROTTLE_ZZZ); +                (void) syncop_ftw (child->xl, &loc, +                                   GF_CLIENT_PID_SCRUB, +                                   child, br_fsscanner_handle_entry); +                if (!list_empty (&fsscan->queued)) +                        wait_for_scrubbing (this, fsscan); +        } + +        return NULL; +} + +#define BR_SCRUB_THREAD_SCALE_LAZY       0 +#define BR_SCRUB_THREAD_SCALE_NORMAL     0.4 +#define BR_SCRUB_THREAD_SCALE_AGGRESSIVE 1.0 + +#ifndef M_E +#define M_E 2.718 +#endif + +/** + * This is just a simple exponential scale to a fixed value selected + * per throttle config. We probably need to be more smart and select + * the scale based on the number of processor cores too. + */ +static unsigned int +br_scrubber_calc_scale (xlator_t *this, +                        br_private_t *priv, scrub_throttle_t throttle) +{ +        unsigned int scale = 0; + +        switch (throttle) { +        case BR_SCRUB_THROTTLE_VOID: +                scale = 0; +                break; +        case BR_SCRUB_THROTTLE_LAZY: +                scale = priv->child_count * +                              pow (M_E, BR_SCRUB_THREAD_SCALE_LAZY); +                break; +        case BR_SCRUB_THROTTLE_NORMAL: +                scale = priv->child_count * +                              pow (M_E, BR_SCRUB_THREAD_SCALE_NORMAL); +                break; +        case BR_SCRUB_THROTTLE_AGGRESSIVE: +                scale = priv->child_count * +                              pow (M_E, BR_SCRUB_THREAD_SCALE_AGGRESSIVE); +                break; +        default: +                gf_log (this->name, GF_LOG_ERROR, +                        "Unknown throttle %d", throttle); +        } + +        return scale; + +} + +static void +br_scrubber_cleanup_handler (void *arg) +{ +        struct br_scrubber *fsscrub = arg; +        pthread_mutex_unlock (&fsscrub->mutex); +} + +static inline br_child_t * +_br_scrubber_get_next_child (struct br_scrubber *fsscrub) +{ +        br_child_t *child = NULL; + +        child = list_first_entry (&fsscrub->scrublist, br_child_t, list); +        list_rotate_left (&fsscrub->scrublist); + +        return child; +} + +static inline void +_br_scrubber_get_entry (br_child_t *child, struct br_fsscan_entry **fsentry) +{ +        struct br_scanfs *fsscan = &child->fsscan; + +        if (list_empty (&fsscan->ready)) +                return; +        *fsentry = list_first_entry +                            (&fsscan->ready, struct br_fsscan_entry, list); +        list_del_init (&(*fsentry)->list); +} + +static inline void +_br_scrubber_find_scrubbable_entry (struct br_scrubber *fsscrub, +                                     struct br_fsscan_entry **fsentry) +{ +        br_child_t *child = NULL; +        br_child_t *firstchild = NULL; + +        while (1) { +                if (list_empty (&fsscrub->scrublist)) +                        pthread_cond_wait (&fsscrub->cond, &fsscrub->mutex); + +                firstchild = NULL; +                for (child = _br_scrubber_get_next_child (fsscrub); +                     child != firstchild; +                     child = _br_scrubber_get_next_child (fsscrub)) { + +                        if (!firstchild) +                                firstchild = child; + +                        _br_scrubber_get_entry (child, fsentry); +                        if (*fsentry) +                                break; +                } + +                if (*fsentry) +                        break; + +                /* nothing to work on.. wait till available */ +                pthread_cond_wait (&fsscrub->cond, &fsscrub->mutex); +        } +} + +static void +br_scrubber_pick_entry (struct br_scrubber *fsscrub, +                        struct br_fsscan_entry **fsentry) +{ +        pthread_cleanup_push (br_scrubber_cleanup_handler, fsscrub); + +        pthread_mutex_lock (&fsscrub->mutex); +        { +                *fsentry = NULL; +                _br_scrubber_find_scrubbable_entry (fsscrub, fsentry); +        } +        pthread_mutex_unlock (&fsscrub->mutex); + +        pthread_cleanup_pop (0); +} -                sleep (BR_SCRUB_THROTTLE_ZZZ); +struct br_scrub_entry { +        gf_boolean_t scrubbed; +        struct br_fsscan_entry *fsentry; +}; + +/** + * We need to be a bit careful here. These thread(s) are prone to cancellations + * when threads are scaled down (depending on the thottling value configured) + * and pausing scrub. A thread can get cancelled while it's waiting for entries + * in the ->pending queue or when an object is undergoing scrubbing. + */ +static void +br_scrubber_entry_handle (void *arg) +{ +        struct br_scanfs       *fsscan  = NULL; +        struct br_scrub_entry  *sentry  = NULL; +        struct br_fsscan_entry *fsentry = NULL; + +        sentry = arg; + +        fsentry = sentry->fsentry; +        fsscan  = fsentry->fsscan; + +        LOCK (&fsscan->entrylock); +        { +                if (sentry->scrubbed) { +                        _br_fsscan_dec_entry_count (fsscan); + +                        /* cleanup ->entry */ +                        fsentry->data   = NULL; +                        fsentry->fsscan = NULL; +                        loc_wipe (&fsentry->parent); +                        gf_dirent_entry_free (fsentry->entry); + +                        GF_FREE (sentry->fsentry); +                } else { +                        /* (re)queue the entry again for scrub */ +                        _br_fsscan_collect_entry (fsscan, sentry->fsentry); +                } +        } +        UNLOCK (&fsscan->entrylock); +} + +static void +br_scrubber_scrub_entry (xlator_t *this, struct br_fsscan_entry *fsentry) +{ +        struct br_scrub_entry sentry = {0, }; + +        sentry.scrubbed = 0; +        sentry.fsentry = fsentry; + +        pthread_cleanup_push (br_scrubber_entry_handle, &sentry); +        { +                (void) br_scrubber_scrub_begin (this, fsentry); +                sentry.scrubbed = 1; +        } +        pthread_cleanup_pop (1); +} + +void *br_scrubber_proc (void *arg) +{ +        xlator_t *this = NULL; +        struct br_scrubber *fsscrub = NULL; +        struct br_fsscan_entry *fsentry = NULL; + +        fsscrub = arg; +        THIS = this = fsscrub->this; + +        while (1) { +                br_scrubber_pick_entry (fsscrub, &fsentry); +                br_scrubber_scrub_entry (this, fsentry); +                sleep (1);          }          return NULL;  } + +static int32_t +br_scrubber_scale_up (xlator_t *this, +                      struct br_scrubber *fsscrub, +                      unsigned int v1, unsigned int v2) +{ +        int i = 0; +        int32_t ret = -1; +        int diff = 0; +        struct br_scrubbers *scrub = NULL; + +        diff = (int)(v2 - v1); + +        gf_log (this->name, GF_LOG_INFO, +                "Scaling up scrubbers [%d => %d]", v1, v2); + +        for (i = 0; i < diff; i++) { +                scrub = GF_CALLOC (diff, sizeof (*scrub), +                                   gf_br_mt_br_scrubber_t); +                if (!scrub) +                        break; + +                INIT_LIST_HEAD (&scrub->list); +                ret = gf_thread_create (&scrub->scrubthread, +                                        NULL, br_scrubber_proc, fsscrub); +                if (ret) +                        break; + +                fsscrub->nr_scrubbers++; +                list_add_tail (&scrub->list, &fsscrub->scrubbers); +        } + +        if ((i != diff) && !scrub) +                goto error_return; + +        if (i != diff) /* degraded scaling.. */ +                gf_log (this->name, GF_LOG_WARNING, +                        "Could not fully scale up to %d scrubber(s). Spawned " +                        "%d/%d [total scrubber(s): %d]", v2, i, diff, (v1 + i)); + +        return 0; + + error_return: +        return -1; +} + +static int32_t +br_scrubber_scale_down (xlator_t *this, +                        struct br_scrubber *fsscrub, +                        unsigned int v1, unsigned int v2) +{ +        int i = 0; +        int diff = 0; +        int32_t ret = -1; +        struct br_scrubbers *scrub = NULL; + +        diff = (int)(v1 - v2); + +        gf_log (this->name, GF_LOG_INFO, +                "Scaling down scrubbers [%d => %d]", v1, v2); + +        for (i = 0 ; i < diff; i++) { +                scrub = list_first_entry +                            (&fsscrub->scrubbers, struct br_scrubbers, list); + +                list_del_init (&scrub->list); +                ret = gf_thread_cleanup_xint (scrub->scrubthread); +                if (ret) +                        break; +                GF_FREE (scrub); + +                fsscrub->nr_scrubbers--; +        } + +        if (ret) { +                gf_log (this->name, GF_LOG_WARNING, +                        "Could not fully scale down to %d scrubber(s). " +                        "Terminated %d/%d [total scrubber(s): %d]", +                        v1, i, diff, (v2 - i)); +                ret = 0; +        } + +        return ret; +} + +static int32_t +br_scrubber_configure (xlator_t *this, br_private_t *priv, +                       struct br_scrubber *fsscrub, scrub_throttle_t nthrottle) +{ +        int32_t ret = 0; +        unsigned int v1 = 0; +        unsigned int v2 = 0; + +        v1 = fsscrub->nr_scrubbers; +        v2 = br_scrubber_calc_scale (this, priv, nthrottle); + +        if (v1 == v2) +                return 0; + +        if (v1 > v2) +                ret = br_scrubber_scale_down (this, fsscrub, v1, v2); +        else +                ret = br_scrubber_scale_up (this, fsscrub, v1, v2); + +        return ret; +} + +/* TODO: token buket spec */ +static int32_t +br_scrubber_handle_throttle (xlator_t *this, +                             br_private_t *priv, dict_t *options) +{ +        int32_t ret = 0; +        char *tmp = NULL; +        struct br_scrubber *fsscrub = NULL; +        scrub_throttle_t nthrottle = BR_SCRUB_THROTTLE_VOID; + +        fsscrub = &priv->fsscrub; + +        if (options) +                GF_OPTION_RECONF ("scrub-throttle", +                                  tmp, options, str, error_return); +        else +                GF_OPTION_INIT ("scrub-throttle", tmp, str, error_return); + +        if (strcasecmp (tmp, "lazy") == 0) +                nthrottle = BR_SCRUB_THROTTLE_LAZY; +        else if (strcasecmp (tmp, "normal") == 0) +                nthrottle = BR_SCRUB_THROTTLE_NORMAL; +        else if (strcasecmp (tmp, "aggressive") == 0) +                nthrottle = BR_SCRUB_THROTTLE_AGGRESSIVE; +        else +                goto error_return; + +        /* on failure old throttling value is preserved */ +        ret = br_scrubber_configure (this, priv, fsscrub, nthrottle); +        if (ret) +                goto error_return; + +        fsscrub->throttle = nthrottle; +        return 0; + + error_return: +        return -1; +} + +/* TODO: pause/resume, frequency */ +int32_t +br_scrubber_handle_options (xlator_t *this, br_private_t *priv, dict_t *options) +{ +        int32_t ret = 0; + +        ret = br_scrubber_handle_throttle (this, priv, options); +        if (ret) +                goto error_return; + +        return 0; + + error_return: +        return -1; +} + +int32_t +br_scrubber_init (xlator_t *this, br_private_t *priv) +{ +        struct br_scrubber *fsscrub = NULL; + +        priv->tbf = br_tbf_init (NULL, 0); +        if (!priv->tbf) +                return -1; + +        fsscrub = &priv->fsscrub; + +        fsscrub->this = this; +        fsscrub->throttle = BR_SCRUB_THROTTLE_VOID; + +        pthread_mutex_init (&fsscrub->mutex, NULL); +        pthread_cond_init (&fsscrub->cond, NULL); + +        fsscrub->nr_scrubbers = 0; +        INIT_LIST_HEAD (&fsscrub->scrubbers); +        INIT_LIST_HEAD (&fsscrub->scrublist); + +        return 0; +} diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h index daec9ad8196..4f00020d66a 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h +++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h @@ -11,6 +11,13 @@  #ifndef __BIT_ROT__SCRUB_H__  #define __BIT_ROT_SCRUB_H__ -void *br_scrubber (void *); +#include "xlator.h" +#include "bit-rot.h" + +void *br_fsscanner (void *); + +int32_t br_scrubber_handle_options (xlator_t *, br_private_t *, dict_t *); + +int32_t br_scrubber_init (xlator_t *, br_private_t *);  #endif /* __BIT_ROT_SCRUB_H__ */ diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c index 880b16edfa8..eea81aec53a 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot.c +++ b/xlators/features/bit-rot/src/bitd/bit-rot.c @@ -29,15 +29,6 @@  #define BR_HASH_CALC_READ_SIZE  (128 * 1024) -br_tbf_opspec_t opthrottle[] = { -        { -                .op       = BR_TBF_OP_HASH, -                .rate     = BR_HASH_CALC_READ_SIZE, -                .maxlimit = (2 * BR_WORKERS * BR_HASH_CALC_READ_SIZE), -        }, -        /** TODO: throttle getdents(), read() request(s) */ -}; -  static int  br_find_child_index (xlator_t *this, xlator_t *child)  { @@ -1066,6 +1057,7 @@ br_enact_signer (xlator_t *this, br_child_t *child, br_stub_init_t *stub)                  child->threadrunning = 1;          /* it's OK to continue, "old" objects would be signed when modified */ +        list_del_init (&child->list);          return 0;   dealloc: @@ -1078,14 +1070,45 @@ static inline int32_t  br_enact_scrubber (xlator_t *this, br_child_t *child)  {          int32_t ret = 0; +        br_private_t *priv = NULL; +        struct br_scanfs *fsscan = NULL; +        struct br_scrubber *fsscrub = NULL; + +        priv = this->private; + +        fsscan = &child->fsscan; +        fsscrub = &priv->fsscrub; + +        LOCK_INIT (&fsscan->entrylock); +        pthread_mutex_init (&fsscan->waitlock, NULL); +        pthread_cond_init (&fsscan->waitcond, NULL); -        ret = gf_thread_create (&child->thread, NULL, br_scrubber, child); +        fsscan->entries = 0; +        INIT_LIST_HEAD (&fsscan->queued); +        INIT_LIST_HEAD (&fsscan->ready); + +        ret = gf_thread_create (&child->thread, NULL, br_fsscanner, child);          if (ret != 0) { -                ret = -1; -                gf_log (this->name, GF_LOG_ERROR, "failed to spawn scrubber"); +                gf_log (this->name, GF_LOG_ALERT, "failed to spawn bitrot " +                        "scrubber daemon [Brick: %s]", child->brick_path); +                goto error_return;          } -        return ret; +        /** +         * Everything has been setup.. add this subvolume to scrubbers +         * list. +         */ +        pthread_mutex_lock (&fsscrub->mutex); +        { +                list_move (&child->list, &fsscrub->scrublist); +                pthread_cond_broadcast (&fsscrub->cond); +        } +        pthread_mutex_unlock (&fsscrub->mutex); + +        return 0; + + error_return: +        return -1;  }  /** @@ -1202,8 +1225,7 @@ br_handle_events (void *arg)                                                  "failed to connect to the "                                                  "child (subvolume: %s)",                                                  child->xl->name); -                                else -                                        list_del_init (&child->list); +                          }                  } @@ -1379,16 +1401,72 @@ br_init_signer (xlator_t *this, br_private_t *priv)          return -1;  } -int32_t -br_init_rate_limiter (br_private_t *priv) +/** + * For signer, only rate limit CPU usage (during hash calculation) when + * compiled with -DBR_RATE_LIMIT_SIGNER cflags, else let it run full + * throttle. + */ +static int32_t +br_rate_limit_signer (xlator_t *this, int child_count, int numbricks)  { -        br_tbf_opspec_t *spec = opthrottle; -        priv->tbf = br_tbf_init (spec, sizeof (opthrottle) -                                           / sizeof (br_tbf_opspec_t)); +        br_private_t *priv = NULL; +        br_tbf_opspec_t spec = {0,}; + +        priv = this->private; + +        spec.op       = BR_TBF_OP_HASH; +        spec.rate     = 0; +        spec.maxlimit = 0; + +#ifdef BR_RATE_LIMIT_SIGNER + +        double contribution = 0; +        contribution = ((double)1 - ((double)child_count / (double)numbricks)); +        if (contribution == 0) +                contribution = 1; +        spec.rate = BR_HASH_CALC_READ_SIZE * contribution; +        spec.maxlimit = BR_WORKERS * BR_HASH_CALC_READ_SIZE; + +#endif + +        if (!spec.rate) +                gf_log (this->name, +                GF_LOG_INFO, "[Rate Limit Info] \"FULL THROTTLE\""); +        else +                gf_log (this->name, GF_LOG_INFO, +                        "[Rate Limit Info] \"tokens/sec (rate): %lu, " +                        "maxlimit: %lu\"", spec.rate, spec.maxlimit); +        priv->tbf = br_tbf_init (&spec, 1);          return priv->tbf ? 0 : -1;  } +static int32_t +br_signer_init (xlator_t *this, br_private_t *priv) +{ +        int32_t ret = 0; +        int numbricks = 0; + +        GF_OPTION_INIT ("expiry-time", priv->expiry_time, int32, error_return); +        GF_OPTION_INIT ("brick-count", numbricks, int32, error_return); + +        ret = br_rate_limit_signer (this, priv->child_count, numbricks); +        if (ret) +                goto error_return; + +        ret = br_init_signer (this, priv); +        if (ret) +                goto cleanup_tbf; + +        return 0; + + cleanup_tbf: +        /* cleanup TBF */ + error_return: +        return -1; + +} +  int32_t  init (xlator_t *this)  { @@ -1410,7 +1488,6 @@ init (xlator_t *this)          }          GF_OPTION_INIT ("scrubber", priv->iamscrubber, bool, out); -        GF_OPTION_INIT ("expiry-time", priv->expiry_time, int32, out);          priv->child_count = xlator_subvolume_count (this);          priv->children = GF_CALLOC (priv->child_count, sizeof (*priv->children), @@ -1443,18 +1520,19 @@ init (xlator_t *this)                  INIT_LIST_HEAD (&priv->children[i].list);          INIT_LIST_HEAD (&priv->bricks); -        ret = br_init_rate_limiter (priv); -        if (ret) -                goto cleanup_mutex; -  	this->private = priv;          if (!priv->iamscrubber) { -                ret = br_init_signer (this, priv); -                if (ret) -                        goto cleanup_tbf; +                ret = br_signer_init (this, priv); +        } else { +                ret = br_scrubber_init (this, priv); +                if (!ret) +                        ret = br_scrubber_handle_options (this, priv, NULL);          } +        if (ret) +                goto cleanup_mutex; +          ret = gf_thread_create (&priv->thread, NULL, br_handle_events, this);          if (ret != 0) {                  gf_log (this->name, GF_LOG_ERROR, @@ -1469,7 +1547,6 @@ init (xlator_t *this)                  return 0;          } - cleanup_tbf:   cleanup_mutex:          (void) pthread_cond_destroy (&priv->cond);          (void) pthread_mutex_destroy (&priv->lock); @@ -1505,6 +1582,17 @@ fini (xlator_t *this)  	return;  } +int +reconfigure (xlator_t *this, dict_t *options) +{ +        br_private_t *priv = this->private; + +        if (!priv->iamscrubber) +                return 0; + +        return br_scrubber_handle_options (this, priv, options); +} +  struct xlator_fops fops;  struct xlator_cbks cbks; diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.h b/xlators/features/bit-rot/src/bitd/bit-rot.h index 5b641801916..6f21a6985ba 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot.h +++ b/xlators/features/bit-rot/src/bitd/bit-rot.h @@ -38,8 +38,26 @@   */  #define BR_WORKERS 4 +typedef enum scrub_throttle { +        BR_SCRUB_THROTTLE_VOID       = -1, +        BR_SCRUB_THROTTLE_LAZY       = 0, +        BR_SCRUB_THROTTLE_NORMAL     = 1, +        BR_SCRUB_THROTTLE_AGGRESSIVE = 2, +} scrub_throttle_t; +  #define signature_size(hl) (sizeof (br_isignature_t) + hl + 1) +struct br_scanfs { +        gf_lock_t entrylock; + +        pthread_mutex_t waitlock; +        pthread_cond_t  waitcond; + +        unsigned int     entries; +        struct list_head queued; +        struct list_head ready; +}; +  struct br_child {          char child_up;                /* Indicates whether this child is                                           up or not */ @@ -53,12 +71,14 @@ struct br_child {          xlator_t *this;               /* Bit rot xlator */          pthread_t thread;             /* initial crawler for unsigned -                                         object(s) */ +                                         object(s) or scrub crawler */          int threadrunning;            /* active thread */          struct mem_pool *timer_pool;  /* timer-wheel's timer mem-pool */          struct timeval tv; + +        struct br_scanfs fsscan;      /* per subvolume FS scanner */  };  typedef struct br_child br_child_t; @@ -72,6 +92,23 @@ struct br_obj_n_workers {                                               signing each object */  }; +struct br_scrubber { +        xlator_t *this; + +        scrub_throttle_t throttle; + +        pthread_mutex_t mutex; +        pthread_cond_t  cond; + +        unsigned int nr_scrubbers; +        struct list_head scrubbers; + +        /* +         * list of "rotatable" subvolume(s) undergoing scrubbing +         */ +        struct list_head scrublist; +}; +  typedef struct br_obj_n_workers br_obj_n_workers_t;  struct br_private { @@ -100,6 +137,7 @@ struct br_private {          br_tbf_t *tbf;                    /* token bucket filter */          gf_boolean_t iamscrubber;         /* function as a fs scrubber */ +        struct br_scrubber fsscrub;       /* scrubbers for this subvolume */  };  typedef struct br_private br_private_t; diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h index bb4030493db..46271407219 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h +++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h @@ -25,6 +25,9 @@ enum br_mem_types {          gf_br_mt_br_tbf_t,          gf_br_mt_br_tbf_bucket_t,          gf_br_mt_br_tbf_throttle_t, +        gf_br_mt_br_tbf_opspec_t, +        gf_br_mt_br_scrubber_t, +        gf_br_mt_br_fsscan_entry_t,          gf_br_stub_mt_end  };  | 
