diff options
| -rw-r--r-- | xlators/features/bit-rot/src/bitd/Makefile.am | 6 | ||||
| -rw-r--r-- | xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h | 18 | ||||
| -rw-r--r-- | xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c | 73 | ||||
| -rw-r--r-- | xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h | 46 | ||||
| -rw-r--r-- | xlators/features/bit-rot/src/bitd/bit-rot-scrub.c | 545 | ||||
| -rw-r--r-- | xlators/features/bit-rot/src/bitd/bit-rot-scrub.h | 14 | ||||
| -rw-r--r-- | xlators/features/bit-rot/src/bitd/bit-rot-ssm.c | 65 | ||||
| -rw-r--r-- | xlators/features/bit-rot/src/bitd/bit-rot-ssm.h | 4 | ||||
| -rw-r--r-- | xlators/features/bit-rot/src/bitd/bit-rot.c | 178 | ||||
| -rw-r--r-- | xlators/features/bit-rot/src/bitd/bit-rot.h | 72 | 
10 files changed, 697 insertions, 324 deletions
diff --git a/xlators/features/bit-rot/src/bitd/Makefile.am b/xlators/features/bit-rot/src/bitd/Makefile.am index 154cdfba674..cabdf3cd224 100644 --- a/xlators/features/bit-rot/src/bitd/Makefile.am +++ b/xlators/features/bit-rot/src/bitd/Makefile.am @@ -9,11 +9,13 @@ AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \                   -I$(CONTRIBDIR)/timer-wheel \                   -I$(top_srcdir)/xlators/features/bit-rot/src/stub -bit_rot_la_SOURCES = bit-rot.c bit-rot-scrub.c bit-rot-tbf.c bit-rot-ssm.c +bit_rot_la_SOURCES = bit-rot.c bit-rot-scrub.c bit-rot-tbf.c bit-rot-ssm.c \ +		     bit-rot-scrub-status.c  bit_rot_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \                      $(top_builddir)/xlators/features/changelog/lib/src/libgfchangelog.la -noinst_HEADERS = bit-rot.h bit-rot-scrub.h bit-rot-tbf.h bit-rot-bitd-messages.h bit-rot-ssm.h +noinst_HEADERS = bit-rot.h bit-rot-scrub.h bit-rot-tbf.h bit-rot-bitd-messages.h bit-rot-ssm.h \ +		 bit-rot-scrub-status.h  AM_CFLAGS = -Wall $(GF_CFLAGS) diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h b/xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h index c0b83c681d7..c6b6a4afa05 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h +++ b/xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h @@ -40,7 +40,7 @@   */  #define GLFS_BITROT_BITD_BASE                   GLFS_MSGID_COMP_BITROT_BITD -#define GLFS_BITROT_BITD_NUM_MESSAGES           53 +#define GLFS_BITROT_BITD_NUM_MESSAGES           55  #define GLFS_MSGID_END                          (GLFS_BITROT_BITD_BASE + \                                             GLFS_BITROT_BITD_NUM_MESSAGES + 1)  /* Messaged with message IDs */ @@ -427,6 +427,22 @@   *   */  /*------------*/ +#define BRB_MSG_SSM_FAILED                 (GLFS_BITROT_BITD_BASE + 54) +/*! + * @messageid + * @diagnosis + * @recommendedaction + * + */ +/*------------*/ +#define BRB_MSG_SCRUB_WAIT_FAILED          (GLFS_BITROT_BITD_BASE + 55) +/*! + * @messageid + * @diagnosis + * @recommendedaction + * + */ +/*------------*/  #define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"  #endif /* !_BITROT_BITD_MESSAGES_H_ */ diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c new file mode 100644 index 00000000000..0afd7ea05b1 --- /dev/null +++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c @@ -0,0 +1,73 @@ +/* +  Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> +  This file is part of GlusterFS. + +  This file is licensed to you under your choice of the GNU Lesser +  General Public License, version 3 or any later version (LGPLv3 or +  later), or the GNU General Public License, version 2 (GPLv2), in all +  cases as published by the Free Software Foundation. +*/ + +#include <string.h> + +#include "bit-rot-scrub-status.h" + +void +br_inc_unsigned_file_count (br_scrub_stats_t *scrub_stat) +{ +        if (!scrub_stat) +                return; + +        pthread_mutex_lock (&scrub_stat->lock); +        { +                scrub_stat->unsigned_files++; +        } +        pthread_mutex_unlock (&scrub_stat->lock); +} + +void +br_inc_scrubbed_file (br_scrub_stats_t *scrub_stat) +{ +        if (!scrub_stat) +                return; + +        pthread_mutex_lock (&scrub_stat->lock); +        { +                scrub_stat->scrubbed_files++; +        } +        pthread_mutex_unlock (&scrub_stat->lock); +} + +void +br_update_scrub_start_time (br_scrub_stats_t *scrub_stat, struct timeval *tv) +{ +        if (!scrub_stat) +                return; + +        pthread_mutex_lock (&scrub_stat->lock); +        { +                scrub_stat->scrub_start_tv.tv_sec = tv->tv_sec; +        } +        pthread_mutex_unlock (&scrub_stat->lock); +} + +void +br_update_scrub_finish_time (br_scrub_stats_t *scrub_stat, char *timestr, +                             struct timeval *tv) +{ +        if (!scrub_stat) +                return; + +        pthread_mutex_lock (&scrub_stat->lock); +        { +                scrub_stat->scrub_end_tv.tv_sec = tv->tv_sec; + +                scrub_stat->scrub_duration = +                                 scrub_stat->scrub_end_tv.tv_sec - +                                 scrub_stat->scrub_start_tv.tv_sec; + +                strncpy (scrub_stat->last_scrub_time, timestr, +                         sizeof (scrub_stat->last_scrub_time)); +        } +        pthread_mutex_unlock (&scrub_stat->lock); +} diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h new file mode 100644 index 00000000000..694ba0acbe3 --- /dev/null +++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h @@ -0,0 +1,46 @@ +/* +   Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> +   This file is part of GlusterFS. + +   This file is licensed to you under your choice of the GNU Lesser +   General Public License, version 3 or any later version (LGPLv3 or +   later), or the GNU General Public License, version 2 (GPLv2), in all +   cases as published by the Free Software Foundation. +*/ + +#ifndef __BIT_ROT_SCRUB_STATUS_H__ +#define __BIT_ROT_SCRUB_STATUS_H__ + +#include <stdint.h> +#include <sys/time.h> +#include <pthread.h> + +struct br_scrub_stats { +        uint64_t       scrubbed_files;       /* Total number of scrubbed file */ + +        uint64_t       unsigned_files;       /* Total number of unsigned file */ + +        uint64_t       scrub_duration;            /* Duration of last scrub */ + +        char           last_scrub_time[1024];    /*last scrub completion time */ + +        struct         timeval scrub_start_tv;   /* Scrubbing starting time*/ + +        struct         timeval scrub_end_tv;     /* Scrubbing finishing time */ + +        pthread_mutex_t  lock; +}; + +typedef struct br_scrub_stats br_scrub_stats_t; + +void +br_inc_unsigned_file_count (br_scrub_stats_t *scrub_stat); +void +br_inc_scrubbed_file (br_scrub_stats_t *scrub_stat); +void +br_update_scrub_start_time (br_scrub_stats_t *scrub_stat, struct timeval *tv); +void +br_update_scrub_finish_time (br_scrub_stats_t *scrub_stat, char *timestr, +                             struct timeval *tv); + +#endif /* __BIT_ROT_SCRUB_STATUS_H__ */ diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c index 47d1d262b5f..e36762e8b22 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c +++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c @@ -24,6 +24,7 @@  #include "bit-rot-scrub.h"  #include <pthread.h>  #include "bit-rot-bitd-messages.h" +#include "bit-rot-scrub-status.h"  struct br_scrubbers {          pthread_t scrubthread; @@ -79,20 +80,6 @@ bitd_fetch_signature (xlator_t *this, br_child_t *child,  } -static void -br_inc_unsigned_file_count (xlator_t *this) -{ -        br_private_t   *priv = NULL; - -        priv = this->private; - -        pthread_mutex_lock (&priv->scrub_stat.lock); -        { -                priv->scrub_stat.unsigned_files++; -        } -        pthread_mutex_unlock (&priv->scrub_stat.lock); -} -  /**   * POST COMPUTE CHECK   * @@ -106,7 +93,8 @@ int32_t  bitd_scrub_post_compute_check (xlator_t *this,                                 br_child_t *child,                                 fd_t *fd, unsigned long version, -                               br_isignature_out_t **signature) +                               br_isignature_out_t **signature, +                               br_scrub_stats_t *scrub_stat)  {          int32_t              ret     = 0;          size_t               signlen = 0; @@ -114,8 +102,10 @@ bitd_scrub_post_compute_check (xlator_t *this,          br_isignature_out_t *signptr = NULL;          ret = bitd_fetch_signature (this, child, fd, &xattr, &signptr); -        if (ret < 0) +        if (ret < 0) { +                br_inc_unsigned_file_count (scrub_stat);                  goto out; +        }          /**           * Either the object got dirtied during the time the signature was @@ -126,7 +116,7 @@ bitd_scrub_post_compute_check (xlator_t *this,           * The log entry looks pretty ugly, but helps in debugging..           */          if (signptr->stale || (signptr->version != version)) { -                br_inc_unsigned_file_count (this); +                br_inc_unsigned_file_count (scrub_stat);                  gf_msg_debug (this->name, 0, "<STAGE: POST> Object [GFID: %s] "                                "either has a stale signature OR underwent "                                "signing during checksumming {Stale: %d | " @@ -154,15 +144,18 @@ bitd_scrub_post_compute_check (xlator_t *this,  static int32_t  bitd_signature_staleness (xlator_t *this,                            br_child_t *child, fd_t *fd, -                          int *stale, unsigned long *version) +                          int *stale, unsigned long *version, +                          br_scrub_stats_t *scrub_stat)  {          int32_t ret = -1;          dict_t *xattr = NULL;          br_isignature_out_t *signptr = NULL;          ret = bitd_fetch_signature (this, child, fd, &xattr, &signptr); -        if (ret < 0) +        if (ret < 0) { +                br_inc_unsigned_file_count (scrub_stat);                  goto out; +        }          /**           * save verison for validation in post compute stage @@ -187,7 +180,8 @@ bitd_signature_staleness (xlator_t *this,   */  int32_t  bitd_scrub_pre_compute_check (xlator_t *this, br_child_t *child, -                              fd_t *fd, unsigned long *version) +                              fd_t *fd, unsigned long *version, +                              br_scrub_stats_t *scrub_stat)  {          int     stale = 0;          int32_t ret   = -1; @@ -199,9 +193,10 @@ bitd_scrub_pre_compute_check (xlator_t *this, br_child_t *child,                  goto out;          } -        ret = bitd_signature_staleness (this, child, fd, &stale, version); +        ret = bitd_signature_staleness (this, child, fd, &stale, version, +                                        scrub_stat);          if (!ret && stale) { -                br_inc_unsigned_file_count (this); +                br_inc_unsigned_file_count (scrub_stat);                  gf_msg_debug (this->name, 0, "<STAGE: PRE> Object [GFID: %s] "                                "has stale signature",                                uuid_utoa (fd->inode->gfid)); @@ -274,16 +269,6 @@ bitd_compare_ckum (xlator_t *this,          return ret;  } -static void -br_inc_scrubbed_file (br_private_t *priv) -{ -        pthread_mutex_lock (&priv->scrub_stat.lock); -        { -                priv->scrub_stat.scrubbed_files++; -        } -        pthread_mutex_unlock (&priv->scrub_stat.lock); -} -  /**   * "The Scrubber"   * @@ -376,7 +361,8 @@ br_scrubber_scrub_begin (xlator_t *this, struct br_fsscan_entry *fsentry)           *  - presence of bad object           *  - signature staleness           */ -        ret = bitd_scrub_pre_compute_check (this, child, fd, &signedversion); +        ret = bitd_scrub_pre_compute_check (this, child, fd, &signedversion, +                                            &priv->scrub_stat);          if (ret)                  goto unrefd; /* skip this object */ @@ -399,8 +385,8 @@ br_scrubber_scrub_begin (xlator_t *this, struct br_fsscan_entry *fsentry)           * perform post compute checks as an object's signature may have           * become stale while scrubber calculated checksum.           */ -        ret = bitd_scrub_post_compute_check (this, child, -                                             fd, signedversion, &sign); +        ret = bitd_scrub_post_compute_check (this, child, fd, signedversion, +                                             &sign, &priv->scrub_stat);          if (ret)                  goto free_md; @@ -408,7 +394,7 @@ br_scrubber_scrub_begin (xlator_t *this, struct br_fsscan_entry *fsentry)                                   linked_inode, entry, fd, child, &loc);          /* Increment of total number of scrubbed file counter */ -        br_inc_scrubbed_file (priv); +        br_inc_scrubbed_file (&priv->scrub_stat);          GF_FREE (sign); /* alloced on post-compute */ @@ -562,171 +548,215 @@ br_fsscanner_handle_entry (xlator_t *subvol,  }  int32_t -br_fsscan_deactivate (xlator_t *this, br_child_t *child) +br_fsscan_deactivate (xlator_t *this)  {          int ret = 0;          br_private_t *priv = NULL;          br_scrub_state_t nstate = 0; -        struct br_scanfs *fsscan = NULL; +        struct br_monitor *scrub_monitor = NULL;          priv = this->private; -        fsscan = &child->fsscan; +        scrub_monitor = &priv->scrub_monitor; -        ret = gf_tw_del_timer (priv->timer_wheel, fsscan->timer); +        ret = gf_tw_del_timer (priv->timer_wheel, scrub_monitor->timer);          if (ret == 0) {                  nstate = BR_SCRUB_STATE_STALLED;                  gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO, -                        "Brick [%s] is under active scrubbing. Pausing scrub..", -                        child->brick_path); +                        "Volume is under active scrubbing. Pausing scrub..");          } else {                  nstate = BR_SCRUB_STATE_PAUSED;                  gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO, -                        "Scrubber paused [Brick: %s]", child->brick_path); +                        "Scrubber paused");          } -        _br_child_set_scrub_state (child, nstate); +        _br_monitor_set_scrub_state (scrub_monitor, nstate);          return 0;  } +  static void -br_update_scrub_start_time (xlator_t *this, struct timeval *tv) +br_scrubber_log_time (xlator_t *this, const char *sfx)  { -        br_private_t     *priv = NULL; -        static int       child; +        char           timestr[1024] = {0,}; +        struct         timeval tv    = {0,}; +        br_private_t  *priv          = NULL;          priv = this->private; +        gettimeofday (&tv, NULL); +        gf_time_fmt (timestr, sizeof (timestr), tv.tv_sec, gf_timefmt_FT); -        /* Setting scrubber starting time for first child only */ -        if (child == 0) { -                pthread_mutex_lock (&priv->scrub_stat.lock); -                { -                        priv->scrub_stat.scrub_start_tv.tv_sec = tv->tv_sec; -                } -                pthread_mutex_unlock (&priv->scrub_stat.lock); +        if (strcasecmp (sfx, "started") == 0) { +                br_update_scrub_start_time (&priv->scrub_stat, &tv); +                gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_START, +                        "Scrubbing %s at %s", sfx, timestr); +        } else { +                br_update_scrub_finish_time (&priv->scrub_stat, timestr, &tv); +                gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_FINISH, +                        "Scrubbing %s at %s", sfx, timestr);          } +} -        if (++child == priv->up_children) { -                child = 0; +static void +br_fsscanner_log_time (xlator_t *this, br_child_t *child, const char *sfx) +{ +        char           timestr[1024] = {0,}; +        struct         timeval tv    = {0,}; + +        gettimeofday (&tv, NULL); +        gf_time_fmt (timestr, sizeof (timestr), tv.tv_sec, gf_timefmt_FT); + +        if (strcasecmp (sfx, "started") == 0) { +                gf_msg_debug (this->name, 0, "Scrubbing \"%s\" %s at %s", +                              child->brick_path, sfx, timestr); +        } else { +                gf_msg_debug (this->name, 0, "Scrubbing \"%s\" %s at %s", +                              child->brick_path, sfx, timestr);          }  } +void +br_child_set_scrub_state (br_child_t *child, gf_boolean_t state) +{ +        child->active_scrubbing = state; +} +  static void -br_update_scrub_finish_time (xlator_t *this, char *timestr, struct timeval *tv) +br_fsscanner_wait_until_kicked (xlator_t *this, br_child_t *child)  { -        br_private_t     *priv = NULL; -        static int       child; +        br_private_t      *priv          = NULL; +        struct br_monitor *scrub_monitor = NULL;          priv = this->private; +        scrub_monitor = &priv->scrub_monitor; -        /*Setting scrubber finishing time at time time of last child operation*/ -        if (++child == priv->up_children) { -                pthread_mutex_lock (&priv->scrub_stat.lock); -                { -                        priv->scrub_stat.scrub_end_tv.tv_sec = tv->tv_sec; - -                        priv->scrub_stat.scrub_duration = -                                         priv->scrub_stat.scrub_end_tv.tv_sec - -                                         priv->scrub_stat.scrub_start_tv.tv_sec; - -                        strncpy (priv->scrub_stat.last_scrub_time, timestr, -                                 sizeof (priv->scrub_stat.last_scrub_time)); +        pthread_cleanup_push (_br_lock_cleaner, &scrub_monitor->wakelock); +        pthread_mutex_lock (&scrub_monitor->wakelock); +        { +                while (!scrub_monitor->kick) +                        pthread_cond_wait (&scrub_monitor->wakecond, +                                           &scrub_monitor->wakelock); -                        child = 0; +                /* Child lock is to synchronize with disconnect events */ +                pthread_cleanup_push (_br_lock_cleaner, &child->lock); +                pthread_mutex_lock (&child->lock); +                { +                        scrub_monitor->active_child_count++; +                        br_child_set_scrub_state (child, _gf_true);                  } -                pthread_mutex_unlock (&priv->scrub_stat.lock); +                pthread_mutex_unlock (&child->lock); +                pthread_cleanup_pop (0);          } +        pthread_mutex_unlock (&scrub_monitor->wakelock); +        pthread_cleanup_pop (0);  }  static void -br_fsscanner_log_time (xlator_t *this, br_child_t *child, const char *sfx) +br_scrubber_entry_control (xlator_t *this)  { -        char           timestr[1024] = {0,}; -        struct         timeval tv    = {0,}; +        br_private_t      *priv          = NULL; +        struct br_monitor *scrub_monitor = NULL; -        gettimeofday (&tv, NULL); -        gf_time_fmt (timestr, sizeof (timestr), tv.tv_sec, gf_timefmt_FT); +        priv = this->private; +        scrub_monitor = &priv->scrub_monitor; -        if (strcasecmp (sfx, "started") == 0) { -                br_update_scrub_start_time (this, &tv); -                gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_START, -                        "Scrubbing \"%s\" %s at %s", child->brick_path, sfx, -                        timestr); -        } else { -                br_update_scrub_finish_time (this, timestr, &tv); -                gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_FINISH, -                        "Scrubbing \"%s\" %s at %s", child->brick_path, sfx, -                        timestr); +        LOCK (&scrub_monitor->lock); +        { +                /* Move the state to BR_SCRUB_STATE_ACTIVE */ +                if (scrub_monitor->state == BR_SCRUB_STATE_PENDING) +                        scrub_monitor->state = BR_SCRUB_STATE_ACTIVE; +                br_scrubber_log_time (this, "started");          } +        UNLOCK (&scrub_monitor->lock);  }  static void -br_fsscanner_wait_until_kicked (xlator_t *this, struct br_scanfs *fsscan) +br_scrubber_exit_control (xlator_t *this)  { -        static int            i; -        br_private_t         *priv    = NULL; +        br_private_t      *priv          = NULL; +        struct br_monitor *scrub_monitor = NULL;          priv = this->private; +        scrub_monitor = &priv->scrub_monitor; -        pthread_cleanup_push (_br_lock_cleaner, &fsscan->wakelock); -        pthread_mutex_lock (&fsscan->wakelock); +        LOCK (&scrub_monitor->lock);          { -                while (!fsscan->kick) -                        pthread_cond_wait (&fsscan->wakecond, -                                           &fsscan->wakelock); - -                /* resetting total number of scrubbed file when scrubbing -                 * done for all of its children */ -                if (i == priv->up_children) { -                        pthread_mutex_lock (&priv->scrub_stat.lock); -                        { -                                priv->scrub_stat.scrubbed_files = 0; -                                priv->scrub_stat.unsigned_files = 0; -                                i = 0; -                        } -                        pthread_mutex_unlock (&priv->scrub_stat.lock); -                } -                ++i; +                br_scrubber_log_time (this, "finished"); -                fsscan->kick = _gf_false; +                if (scrub_monitor->state == BR_SCRUB_STATE_ACTIVE) { +                        (void) br_fsscan_activate (this); +                } else { +                        gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO, +                                "Volume waiting to get rescheduled.."); +                }          } -        pthread_mutex_unlock (&fsscan->wakelock); -        pthread_cleanup_pop (0); +        UNLOCK (&scrub_monitor->lock);  }  static void  br_fsscanner_entry_control (xlator_t *this, br_child_t *child)  { -        struct br_scanfs *fsscan = &child->fsscan; - -        LOCK (&child->lock); -        { -                if (fsscan->state == BR_SCRUB_STATE_PENDING) -                        fsscan->state = BR_SCRUB_STATE_ACTIVE;                  br_fsscanner_log_time (this, child, "started"); -        } -        UNLOCK (&child->lock);  }  static void  br_fsscanner_exit_control (xlator_t *this, br_child_t *child)  { -        struct br_scanfs *fsscan = &child->fsscan; +        br_private_t *priv = NULL; +        struct br_monitor *scrub_monitor = NULL; -        LOCK (&child->lock); +        priv = this->private; +        scrub_monitor = &priv->scrub_monitor; + +        if (!_br_is_child_connected (child)) { +                gf_msg (this->name, GF_LOG_WARNING, 0, BRB_MSG_SCRUB_INFO, +                        "Brick [%s] disconnected while scrubbing. Scrubbing " +                        "might be incomplete", child->brick_path); +        } + +        br_fsscanner_log_time (this, child, "finished"); + +        pthread_cleanup_push (_br_lock_cleaner, &scrub_monitor->wakelock); +        pthread_mutex_lock (&scrub_monitor->wakelock);          { -                fsscan->over = _gf_true; -                br_fsscanner_log_time (this, child, "finished"); +                scrub_monitor->active_child_count--; +                pthread_cleanup_push (_br_lock_cleaner, &child->lock); +                pthread_mutex_lock (&child->lock); +                { +                        br_child_set_scrub_state (child, _gf_false); +                } +                pthread_mutex_unlock (&child->lock); +                pthread_cleanup_pop (0); -                if (fsscan->state == BR_SCRUB_STATE_ACTIVE) { -                        (void) br_fsscan_activate (this, child); +                if (scrub_monitor->active_child_count == 0) { +                        /* The last child has finished scrubbing. +                         * Set the kick to false and  wake up other +                         * children who are waiting for the last +                         * child to complete scrubbing. +                         */ +                        scrub_monitor->kick = _gf_false; +                        pthread_cond_broadcast (&scrub_monitor->wakecond); + +                        /* Signal monitor thread waiting for the all +                         * the children to finish scrubbing. +                         */ +                        pthread_cleanup_push (_br_lock_cleaner, +                                              &scrub_monitor->donelock); +                        pthread_mutex_lock (&scrub_monitor->donelock); +                        { +                                scrub_monitor->done = _gf_true; +                                pthread_cond_signal (&scrub_monitor->donecond); +                        } +                        pthread_mutex_unlock (&scrub_monitor->donelock); +                        pthread_cleanup_pop (0);                  } else { -                        gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO, -                                "Brick [%s] waiting to get rescheduled..", -                                child->brick_path); +                        while (scrub_monitor->active_child_count) +                                pthread_cond_wait (&scrub_monitor->wakecond, +                                                   &scrub_monitor->wakelock);                  }          } -        UNLOCK (&child->lock); +        pthread_mutex_unlock (&scrub_monitor->wakelock); +        pthread_cleanup_pop (0);  }  void * @@ -745,7 +775,7 @@ br_fsscanner (void *arg)          loc.inode = child->table->root;          while (1) { -                br_fsscanner_wait_until_kicked (this, fsscan); +                br_fsscanner_wait_until_kicked (this, child);                  {                          /* precursor for scrub */                          br_fsscanner_entry_control (this, child); @@ -777,22 +807,29 @@ br_kickstart_scanner (struct gf_tw_timer_list *timer,                        void *data, unsigned long calltime)  {          xlator_t *this = NULL; -        br_child_t *child = data; -        struct br_scanfs *fsscan = NULL; +        struct br_monitor *scrub_monitor = data; +        br_private_t *priv = NULL; -        THIS = this = child->this; -        fsscan = &child->fsscan; +        THIS = this = scrub_monitor->this; +        priv = this->private; + +        /* Reset scrub statistics */ +        priv->scrub_stat.scrubbed_files = 0; +        priv->scrub_stat.unsigned_files = 0; + +        /* Moves state from PENDING to ACTIVE */ +        (void) br_scrubber_entry_control (this);          /* kickstart scanning.. */ -        pthread_mutex_lock (&fsscan->wakelock); +        pthread_mutex_lock (&scrub_monitor->wakelock);          { -                fsscan->kick = _gf_true; -                pthread_cond_signal (&fsscan->wakecond); +                scrub_monitor->kick = _gf_true; +                GF_ASSERT (scrub_monitor->active_child_count == 0); +                pthread_cond_broadcast (&scrub_monitor->wakecond);          } -        pthread_mutex_unlock (&fsscan->wakelock); +        pthread_mutex_unlock (&scrub_monitor->wakelock);          return; -  }  static uint32_t @@ -836,22 +873,22 @@ br_fsscan_calculate_timeout (scrub_freq_t freq)  }  int32_t -br_fsscan_schedule (xlator_t *this, br_child_t *child) +br_fsscan_schedule (xlator_t *this)  {          uint32_t timo = 0;          br_private_t *priv = NULL;          struct timeval tv = {0,};          char timestr[1024] = {0,}; -        struct br_scanfs *fsscan = NULL;          struct br_scrubber *fsscrub = NULL;          struct gf_tw_timer_list *timer = NULL; +        struct br_monitor *scrub_monitor = NULL;          priv = this->private; -        fsscan = &child->fsscan;          fsscrub = &priv->fsscrub; +        scrub_monitor = &priv->scrub_monitor;          (void) gettimeofday (&tv, NULL); -        fsscan->boot = tv.tv_sec; +        scrub_monitor->boot = tv.tv_sec;          timo = br_fsscan_calculate_timeout (fsscrub->frequency);          if (timo == 0) { @@ -860,25 +897,25 @@ br_fsscan_schedule (xlator_t *this, br_child_t *child)                  goto error_return;          } -        fsscan->timer = GF_CALLOC (1, sizeof (*fsscan->timer), +        scrub_monitor->timer = GF_CALLOC (1, sizeof (*scrub_monitor->timer),                                     gf_br_stub_mt_br_scanner_freq_t); -        if (!fsscan->timer) +        if (!scrub_monitor->timer)                  goto error_return; -        timer = fsscan->timer; +        timer = scrub_monitor->timer;          INIT_LIST_HEAD (&timer->entry); -        timer->data = child; +        timer->data = scrub_monitor;          timer->expires = timo;          timer->function = br_kickstart_scanner;          gf_tw_add_timer (priv->timer_wheel, timer); -        _br_child_set_scrub_state (child, BR_SCRUB_STATE_PENDING); +        _br_monitor_set_scrub_state (scrub_monitor, BR_SCRUB_STATE_PENDING);          gf_time_fmt (timestr, sizeof (timestr), -                     (fsscan->boot + timo), gf_timefmt_FT); -        gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO, "Scrubbing for " -                "%s scheduled to run at %s", child->brick_path, timestr); +                     (scrub_monitor->boot + timo), gf_timefmt_FT); +        gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO, "Scrubbing is " +                "scheduled to run at %s", timestr);          return 0; @@ -887,18 +924,18 @@ br_fsscan_schedule (xlator_t *this, br_child_t *child)  }  int32_t -br_fsscan_activate (xlator_t *this, br_child_t *child) +br_fsscan_activate (xlator_t *this)  {          uint32_t            timo    = 0;          char timestr[1024]          = {0,};          struct timeval      now     = {0,};          br_private_t       *priv    = NULL; -        struct br_scanfs   *fsscan  = NULL;          struct br_scrubber *fsscrub = NULL; +        struct br_monitor  *scrub_monitor = NULL;          priv = this->private; -        fsscan = &child->fsscan;          fsscrub = &priv->fsscrub; +        scrub_monitor = &priv->scrub_monitor;          (void) gettimeofday (&now, NULL);          timo = br_fsscan_calculate_timeout (fsscrub->frequency); @@ -908,32 +945,37 @@ br_fsscan_activate (xlator_t *this, br_child_t *child)                  return -1;          } -        fsscan->over = _gf_false; +        pthread_mutex_lock (&scrub_monitor->donelock); +        { +                scrub_monitor->done = _gf_false; +        } +        pthread_mutex_unlock (&scrub_monitor->donelock); +          gf_time_fmt (timestr, sizeof (timestr),                       (now.tv_sec + timo), gf_timefmt_FT); -        (void) gf_tw_mod_timer (priv->timer_wheel, fsscan->timer, timo); +        (void) gf_tw_mod_timer (priv->timer_wheel, scrub_monitor->timer, timo); -        _br_child_set_scrub_state (child, BR_SCRUB_STATE_PENDING); -        gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO, "Scrubbing for " -                "%s rescheduled to run at %s", child->brick_path, timestr); +        _br_monitor_set_scrub_state (scrub_monitor, BR_SCRUB_STATE_PENDING); +        gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO, "Scrubbing is " +                "rescheduled to run at %s", timestr);          return 0;  }  int32_t -br_fsscan_reschedule (xlator_t *this, br_child_t *child) +br_fsscan_reschedule (xlator_t *this)  {          int32_t             ret     = 0;          uint32_t            timo    = 0;          char timestr[1024]          = {0,};          struct timeval      now     = {0,};          br_private_t       *priv    = NULL; -        struct br_scanfs   *fsscan  = NULL;          struct br_scrubber *fsscrub = NULL; +        struct br_monitor  *scrub_monitor = NULL;          priv = this->private; -        fsscan = &child->fsscan;          fsscrub = &priv->fsscrub; +        scrub_monitor = &priv->scrub_monitor;          if (!fsscrub->frequency_reconf)                  return 0; @@ -949,17 +991,21 @@ br_fsscan_reschedule (xlator_t *this, br_child_t *child)          gf_time_fmt (timestr, sizeof (timestr),                       (now.tv_sec + timo), gf_timefmt_FT); -        fsscan->over = _gf_false; -        ret = gf_tw_mod_timer_pending (priv->timer_wheel, fsscan->timer, timo); +        pthread_mutex_lock (&scrub_monitor->donelock); +        { +                scrub_monitor->done = _gf_false; +        } +        pthread_mutex_unlock (&scrub_monitor->donelock); + +        ret = gf_tw_mod_timer_pending (priv->timer_wheel, scrub_monitor->timer, timo);          if (ret == 0)                  gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO, -                        "Scrubber for %s is currently running and would be " -                        "rescheduled after completion", child->brick_path); +                        "Scrubber is currently running and would be " +                        "rescheduled after completion");          else { -                _br_child_set_scrub_state (child, BR_SCRUB_STATE_PENDING); +                _br_monitor_set_scrub_state (scrub_monitor, BR_SCRUB_STATE_PENDING);                  gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO, -                        "Scrubbing for %s rescheduled to run at %s", -                        child->brick_path, timestr); +                        "Scrubbing rescheduled to run at %s", timestr);          }          return 0; @@ -1725,15 +1771,174 @@ out:          return ret;  } +static int +wait_for_scrub_to_finish (xlator_t *this) +{ +        int                  ret               = -1; +        br_private_t         *priv             = NULL; +        struct br_monitor    *scrub_monitor    = NULL; + +        priv = this->private; +        scrub_monitor = &priv->scrub_monitor; + +        GF_VALIDATE_OR_GOTO ("bit-rot", scrub_monitor, out); +        GF_VALIDATE_OR_GOTO ("bit-rot", this, out); + +        gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO, +                "Waiting for all children to start and finish scrub"); + +        pthread_mutex_lock (&scrub_monitor->donelock); +        { +                while (!scrub_monitor->done) +                        pthread_cond_wait (&scrub_monitor->donecond, +                                           &scrub_monitor->donelock); +        } +        pthread_mutex_unlock (&scrub_monitor->donelock); +        ret = 0; +out: +        return ret; +} + +/** + * This function is executed in a separate thread. This is scrubber monitor + * thread that takes care of state machine. + */ +void * +br_monitor_thread (void *arg) +{ +        int32_t              ret               = 0; +        xlator_t            *this              = NULL; +        br_private_t        *priv              = NULL; +        struct br_monitor   *scrub_monitor     = NULL; + +        this = arg; +        priv = this->private; + +        /* +         * Since, this is the topmost xlator, THIS has to be set by bit-rot +         * xlator itself (STACK_WIND wont help in this case). Also it has +         * to be done for each thread that gets spawned. Otherwise, a new +         * thread will get global_xlator's pointer when it does "THIS". +         */ +        THIS = this; + +        scrub_monitor = &priv->scrub_monitor; + +        pthread_mutex_lock (&scrub_monitor->mutex); +        { +                while (!scrub_monitor->inited) +                        pthread_cond_wait (&scrub_monitor->cond, +                                           &scrub_monitor->mutex); +        } +        pthread_mutex_unlock (&scrub_monitor->mutex); + +        /* this needs to be serialized with reconfigure() */ +        pthread_mutex_lock (&priv->lock); +        { +                ret = br_scrub_state_machine (this); +        } +        pthread_mutex_unlock (&priv->lock); +        if (ret) { +                gf_msg (this->name, GF_LOG_ERROR, -ret, +                        BRB_MSG_SSM_FAILED, +                        "Scrub state machine failed"); +                goto out; +        } + +        while (1) { +                /* Wait for all children to finish scrubbing */ +                ret = wait_for_scrub_to_finish (this); +                if (ret) { +                        gf_msg (this->name, GF_LOG_ERROR, -ret, +                                BRB_MSG_SCRUB_WAIT_FAILED, +                                "Scrub wait failed"); +                        goto out; +                } + +                /* scrub exit criteria: Move the state to PENDING */ +                br_scrubber_exit_control (this); +        } + +out: +        return NULL; +} + +static void +br_set_scrub_state (struct br_monitor *scrub_monitor, br_scrub_state_t state) +{ +        LOCK (&scrub_monitor->lock); +        { +                _br_monitor_set_scrub_state (scrub_monitor, state); +        } +        UNLOCK (&scrub_monitor->lock); +} + +int32_t +br_scrubber_monitor_init (xlator_t *this, br_private_t *priv) +{ +        struct br_monitor *scrub_monitor = NULL; +        int                ret           = 0; + +        scrub_monitor = &priv->scrub_monitor; + +        LOCK_INIT (&scrub_monitor->lock); +        scrub_monitor->this = this; + +        scrub_monitor->inited = _gf_false; +        pthread_mutex_init (&scrub_monitor->mutex, NULL); +        pthread_cond_init (&scrub_monitor->cond, NULL); + +        scrub_monitor->kick = _gf_false; +        scrub_monitor->active_child_count = 0; +        pthread_mutex_init (&scrub_monitor->wakelock, NULL); +        pthread_cond_init (&scrub_monitor->wakecond, NULL); + +        scrub_monitor->done = _gf_false; +        pthread_mutex_init (&scrub_monitor->donelock, NULL); +        pthread_cond_init (&scrub_monitor->donecond, NULL); + +        /* Set the state to INACTIVE */ +        br_set_scrub_state (&priv->scrub_monitor, BR_SCRUB_STATE_INACTIVE); + +        /* Start the monitor thread */ +        ret = gf_thread_create (&scrub_monitor->thread, NULL, br_monitor_thread, this); +        if (ret != 0) { +                gf_msg (this->name, GF_LOG_ERROR, -ret, +                        BRB_MSG_SPAWN_FAILED, "monitor thread creation failed"); +                ret = -1; +                goto err; +        } + +        return 0; +err: +        pthread_mutex_destroy (&scrub_monitor->mutex); +        pthread_cond_destroy (&scrub_monitor->cond); + +        pthread_mutex_destroy (&scrub_monitor->wakelock); +        pthread_cond_destroy (&scrub_monitor->wakecond); + +        pthread_mutex_destroy (&scrub_monitor->donelock); +        pthread_cond_destroy (&scrub_monitor->donecond); + +        LOCK_DESTROY (&scrub_monitor->lock); + +        return ret; +} +  int32_t  br_scrubber_init (xlator_t *this, br_private_t *priv)  {          struct br_scrubber *fsscrub = NULL; +        int                 ret     = 0;          priv->tbf = br_tbf_init (NULL, 0);          if (!priv->tbf)                  return -1; +        ret = br_scrubber_monitor_init (this, priv); +        if (ret) +                return -1; +          fsscrub = &priv->fsscrub;          fsscrub->this = this; diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h index e730582f1f8..93bb29639fa 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h +++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h @@ -16,15 +16,21 @@  void *br_fsscanner (void *); -int32_t br_fsscan_schedule (xlator_t *, br_child_t *); -int32_t br_fsscan_reschedule (xlator_t *, br_child_t *); -int32_t br_fsscan_activate (xlator_t *, br_child_t *); -int32_t br_fsscan_deactivate (xlator_t *, br_child_t *); +int32_t br_fsscan_schedule (xlator_t *); +int32_t br_fsscan_reschedule (xlator_t *); +int32_t br_fsscan_activate (xlator_t *); +int32_t br_fsscan_deactivate (xlator_t *);  int32_t br_scrubber_handle_options (xlator_t *, br_private_t *, dict_t *); +int32_t +br_scrubber_monitor_init (xlator_t *, br_private_t *); +  int32_t br_scrubber_init (xlator_t *, br_private_t *);  int32_t br_collect_bad_objects_from_children (xlator_t *this, dict_t *dict); +void +br_child_set_scrub_state (br_child_t *, gf_boolean_t); +  #endif /* __BIT_ROT_SCRUB_H__ */ diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-ssm.c b/xlators/features/bit-rot/src/bitd/bit-rot-ssm.c index fcffc04feda..d304fc804ee 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot-ssm.c +++ b/xlators/features/bit-rot/src/bitd/bit-rot-ssm.c @@ -12,52 +12,73 @@  #include "bit-rot-scrub.h"  #include "bit-rot-bitd-messages.h" -int br_scrub_ssm_noop (xlator_t *this, br_child_t *child) +int br_scrub_ssm_noop (xlator_t *this)  {          return 0;  }  int -br_scrub_ssm_state_pause (xlator_t *this, br_child_t *child) +br_scrub_ssm_state_pause (xlator_t *this)  { +        br_private_t        *priv               = NULL; +        struct br_monitor   *scrub_monitor      = NULL; + +        priv = this->private; +        scrub_monitor = &priv->scrub_monitor; +          gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_GENERIC_SSM_INFO, -                "Scrubber paused [Brick: %s]", child->brick_path); -        _br_child_set_scrub_state (child, BR_SCRUB_STATE_PAUSED); +                "Scrubber paused"); +        _br_monitor_set_scrub_state (scrub_monitor, BR_SCRUB_STATE_PAUSED);          return 0;  }  int -br_scrub_ssm_state_ipause (xlator_t *this, br_child_t *child) +br_scrub_ssm_state_ipause (xlator_t *this)  { +        br_private_t        *priv               = NULL; +        struct br_monitor   *scrub_monitor      = NULL; + +        priv = this->private; +        scrub_monitor = &priv->scrub_monitor; +          gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_GENERIC_SSM_INFO, -                "Scrubber paused [Brick: %s]", child->brick_path); -        _br_child_set_scrub_state (child, BR_SCRUB_STATE_IPAUSED); +                "Scrubber paused"); +        _br_monitor_set_scrub_state (scrub_monitor, BR_SCRUB_STATE_IPAUSED);          return 0;  }  int -br_scrub_ssm_state_active (xlator_t *this, br_child_t *child) +br_scrub_ssm_state_active (xlator_t *this)  { -        struct br_scanfs *fsscan = &child->fsscan; +        br_private_t        *priv               = NULL; +        struct br_monitor   *scrub_monitor      = NULL; -        if (fsscan->over) { -                (void) br_fsscan_activate (this, child); +        priv = this->private; +        scrub_monitor = &priv->scrub_monitor; + +        if (scrub_monitor->done) { +                (void) br_fsscan_activate (this);          } else {                  gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_GENERIC_SSM_INFO, -                        "Scrubbing resumed [Brick %s]", child->brick_path); -                _br_child_set_scrub_state (child, BR_SCRUB_STATE_ACTIVE); +                        "Scrubbing resumed"); +                _br_monitor_set_scrub_state (scrub_monitor, BR_SCRUB_STATE_ACTIVE);          }          return 0;  }  int -br_scrub_ssm_state_stall (xlator_t *this, br_child_t *child) +br_scrub_ssm_state_stall (xlator_t *this)  { +        br_private_t        *priv               = NULL; +        struct br_monitor   *scrub_monitor      = NULL; + +        priv = this->private; +        scrub_monitor = &priv->scrub_monitor; +          gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_GENERIC_SSM_INFO, -                "Brick [%s] is under active scrubbing. Pausing scrub..", -                child->brick_path); -        _br_child_set_scrub_state (child, BR_SCRUB_STATE_STALLED); +                "Volume is under active scrubbing. Pausing scrub.."); +        _br_monitor_set_scrub_state (scrub_monitor, BR_SCRUB_STATE_STALLED);          return 0;  } @@ -72,22 +93,22 @@ br_scrub_ssm[BR_SCRUB_MAXSTATES][BR_SCRUB_MAXEVENTS] = {  };  int32_t -br_scrub_state_machine (xlator_t *this, br_child_t *child) +br_scrub_state_machine (xlator_t *this)  {          br_private_t       *priv      = NULL;          br_scrub_ssm_call  *call      = NULL; -        struct br_scanfs   *fsscan    = NULL;          struct br_scrubber *fsscrub   = NULL;          br_scrub_state_t    currstate = 0;          br_scrub_event_t    event     = 0; +        struct br_monitor  *scrub_monitor = NULL;          priv = this->private; -        fsscan = &child->fsscan;          fsscrub = &priv->fsscrub; +        scrub_monitor = &priv->scrub_monitor; -        currstate = fsscan->state; +        currstate = scrub_monitor->state;          event = _br_child_get_scrub_event (fsscrub);          call = br_scrub_ssm[currstate][event]; -        return call (this, child); +        return call (this);  } diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-ssm.h b/xlators/features/bit-rot/src/bitd/bit-rot-ssm.h index 72fd62b3630..936ee4d837c 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot-ssm.h +++ b/xlators/features/bit-rot/src/bitd/bit-rot-ssm.h @@ -29,8 +29,8 @@ typedef enum br_scrub_event {          BR_SCRUB_MAXEVENTS,  } br_scrub_event_t; -struct br_child; +struct br_monitor; -int32_t br_scrub_state_machine (xlator_t *, struct br_child *); +int32_t br_scrub_state_machine (xlator_t *);  #endif /* __BIT_ROT_SSM_H__ */ diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c index 0eba4472a33..45f8d1d624c 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot.c +++ b/xlators/features/bit-rot/src/bitd/bit-rot.c @@ -1097,21 +1097,11 @@ br_oneshot_signer (void *arg)  static void  br_set_child_state (br_child_t *child, br_child_state_t state)  { -        LOCK (&child->lock); +        pthread_mutex_lock (&child->lock);          {                  _br_set_child_state (child, state);          } -        UNLOCK (&child->lock); -} - -static void -br_set_scrub_state (br_child_t *child, br_scrub_state_t state) -{ -        LOCK (&child->lock); -        { -                _br_child_set_scrub_state (child, state); -        } -        UNLOCK (&child->lock); +        pthread_mutex_unlock (&child->lock);  }  /** @@ -1173,11 +1163,11 @@ br_launch_scrubber (xlator_t *this, br_child_t *child,  {          int32_t ret = -1;          br_private_t *priv = NULL; +        struct br_monitor *scrub_monitor = NULL;          priv = this->private; -        fsscan->kick = _gf_false; -        fsscan->over = _gf_false; +        scrub_monitor = &priv->scrub_monitor;          ret = gf_thread_create (&child->thread, NULL, br_fsscanner, child);          if (ret != 0) {                  gf_msg (this->name, GF_LOG_ALERT, 0, BRB_MSG_SPAWN_FAILED, @@ -1186,14 +1176,14 @@ br_launch_scrubber (xlator_t *this, br_child_t *child,                  goto error_return;          } -        /* this needs to be serialized with reconfigure() */ -        pthread_mutex_lock (&priv->lock); +        /* Signal monitor to kick off state machine*/ +        pthread_mutex_lock (&scrub_monitor->mutex);          { -                ret = br_scrub_state_machine (this, child); +                if (!scrub_monitor->inited) +                        pthread_cond_signal (&scrub_monitor->cond); +                scrub_monitor->inited = _gf_true;          } -        pthread_mutex_unlock (&priv->lock); -        if (ret) -                goto cleanup_thread; +        pthread_mutex_unlock (&scrub_monitor->mutex);          /**           * Everything has been setup.. add this subvolume to scrubbers @@ -1208,8 +1198,6 @@ br_launch_scrubber (xlator_t *this, br_child_t *child,          return 0; - cleanup_thread: -        (void) gf_thread_cleanup_xint (child->thread);   error_return:          return -1;  } @@ -1242,10 +1230,6 @@ br_enact_scrubber (xlator_t *this, br_child_t *child)          INIT_LIST_HEAD (&fsscan->queued);          INIT_LIST_HEAD (&fsscan->ready); -        /* init scheduler related variables */ -        pthread_mutex_init (&fsscan->wakelock, NULL); -        pthread_cond_init (&fsscan->wakecond, NULL); -          ret = br_launch_scrubber (this, child, fsscan, fsscrub);          if (ret)                  goto error_return; @@ -1266,7 +1250,7 @@ br_child_enaction (xlator_t *this, br_child_t *child, br_stub_init_t *stub)          int32_t ret = -1;          br_private_t *priv = this->private; -        LOCK (&child->lock); +        pthread_mutex_lock (&child->lock);          {                  if (priv->iamscrubber)                          ret = br_enact_scrubber (this, child); @@ -1281,7 +1265,7 @@ br_child_enaction (xlator_t *this, br_child_t *child, br_stub_init_t *stub)                                  "Connected to brick %s..", child->brick_path);                  }          } -        UNLOCK (&child->lock); +        pthread_mutex_unlock (&child->lock);          return ret;  } @@ -1308,6 +1292,7 @@ br_brick_connect (xlator_t *this, br_child_t *child)          GF_VALIDATE_OR_GOTO (this->name, child, out);          GF_VALIDATE_OR_GOTO (this->name, this->private, out); +        br_child_set_scrub_state (child, _gf_false);          br_set_child_state (child, BR_CHILD_STATE_INITIALIZING);          loc.inode = inode_ref (child->table->root); @@ -1369,12 +1354,17 @@ br_cleanup_scrubber (xlator_t *this, br_child_t *child)  {          int32_t ret = 0;          br_private_t *priv = NULL; -        struct br_scanfs *fsscan = NULL;          struct br_scrubber *fsscrub = NULL; +        struct br_monitor *scrub_monitor = NULL;          priv    = this->private; -        fsscan  = &child->fsscan;          fsscrub = &priv->fsscrub; +        scrub_monitor = &priv->scrub_monitor; + +        if (_br_is_child_scrub_active (child)) { +                scrub_monitor->active_child_count--; +                br_child_set_scrub_state (child, _gf_false); +        }          /**           * 0x0: child (brick) goes out of rotation @@ -1406,21 +1396,6 @@ br_cleanup_scrubber (xlator_t *this, br_child_t *child)                          0, BRB_MSG_SCRUB_THREAD_CLEANUP,                          "Error cleaning up scanner thread"); -        /** -         * 0x2: free()up resources -         */ -        if (fsscan->timer) { -                (void) gf_tw_del_timer (priv->timer_wheel, fsscan->timer); - -                GF_FREE (fsscan->timer); -                fsscan->timer = NULL; -        } - -        /** -         * 0x3: reset scrubber state -         */ -        _br_child_set_scrub_state (child, BR_SCRUB_STATE_INACTIVE); -          gf_msg (this->name, GF_LOG_INFO,                  0, BRB_MSG_SCRUBBER_CLEANED,                  "Cleaned up scrubber for brick [%s]", child->brick_path); @@ -1437,23 +1412,33 @@ int32_t  br_brick_disconnect (xlator_t *this, br_child_t *child)  {          int32_t ret = 0; +        struct br_monitor *scrub_monitor = NULL;          br_private_t *priv = this->private; -        LOCK (&child->lock); +        scrub_monitor = &priv->scrub_monitor; + +        /* Lock order should be wakelock and then child lock to +         * dead locks. +         */ +        pthread_mutex_lock (&scrub_monitor->wakelock);          { -                if (!_br_is_child_connected (child)) -                        goto unblock; +                pthread_mutex_lock (&child->lock); +                { +                        if (!_br_is_child_connected (child)) +                                goto unblock; -                /* child is on death row.. */ -                _br_set_child_state (child, BR_CHILD_STATE_DISCONNECTED); +                        /* child is on death row.. */ +                        _br_set_child_state (child, BR_CHILD_STATE_DISCONNECTED); -                if (priv->iamscrubber) -                        ret = br_cleanup_scrubber (this, child); -                else -                        ret = br_cleanup_signer (this, child); -        } +                        if (priv->iamscrubber) +                                ret = br_cleanup_scrubber (this, child); +                        else +                                ret = br_cleanup_signer (this, child); +                }   unblock: -        UNLOCK (&child->lock); +                pthread_mutex_unlock (&child->lock); +        } +        pthread_mutex_unlock (&scrub_monitor->wakelock);           return ret;  } @@ -1574,7 +1559,7 @@ br_scrubber_status_get (xlator_t *this, dict_t **dict)          memset (key, 0, 256);          snprintf (key, 256, "scrubbed-files"); -        ret = dict_set_uint32 (*dict, key, scrub_stats->scrubbed_files); +        ret = dict_set_uint64 (*dict, key, scrub_stats->scrubbed_files);          if (ret) {                  gf_msg_debug (this->name, 0, "Failed to setting scrubbed file "                                "entry to the dictionary"); @@ -1582,7 +1567,7 @@ br_scrubber_status_get (xlator_t *this, dict_t **dict)          memset (key, 0, 256);          snprintf (key, 256, "unsigned-files"); -        ret = dict_set_uint32 (*dict, key, scrub_stats->unsigned_files); +        ret = dict_set_uint64 (*dict, key, scrub_stats->unsigned_files);          if (ret) {                  gf_msg_debug (this->name, 0, "Failed to set unsigned file count"                                " entry to the dictionary"); @@ -1590,7 +1575,7 @@ br_scrubber_status_get (xlator_t *this, dict_t **dict)          memset (key, 0, 256);          snprintf (key, 256, "scrub-duration"); -        ret = dict_set_uint32 (*dict, key, scrub_stats->scrub_duration); +        ret = dict_set_uint64 (*dict, key, scrub_stats->scrub_duration);          if (ret) {                  gf_msg_debug (this->name, 0, "Failed to set scrub duration"                                " entry to the dictionary"); @@ -1848,6 +1833,33 @@ br_signer_init (xlator_t *this, br_private_t *priv)  }  static void +br_free_scrubber_monitor (xlator_t *this, br_private_t *priv) +{ +        struct br_monitor *scrub_monitor = &priv->scrub_monitor; + +        if (scrub_monitor->timer) { +                (void) gf_tw_del_timer (priv->timer_wheel, scrub_monitor->timer); + +                GF_FREE (scrub_monitor->timer); +                scrub_monitor->timer = NULL; +        } + +        (void) gf_thread_cleanup_xint (scrub_monitor->thread); + +        /* Clean up cond and mutex variables */ +        pthread_mutex_destroy (&scrub_monitor->mutex); +        pthread_cond_destroy (&scrub_monitor->cond); + +        pthread_mutex_destroy (&scrub_monitor->wakelock); +        pthread_cond_destroy (&scrub_monitor->wakecond); + +        pthread_mutex_destroy (&scrub_monitor->donelock); +        pthread_cond_destroy (&scrub_monitor->donecond); + +        LOCK_DESTROY (&scrub_monitor->lock); +} + +static void  br_free_children (xlator_t *this, br_private_t *priv, int count)  {          br_child_t *child = NULL; @@ -1855,7 +1867,7 @@ br_free_children (xlator_t *this, br_private_t *priv, int count)          for (--count; count >= 0; count--) {                  child = &priv->children[count];                  mem_pool_destroy (child->timer_pool); -                LOCK_DESTROY (&child->lock); +                pthread_mutex_destroy (&child->lock);          }          GF_FREE (priv->children); @@ -1879,10 +1891,9 @@ br_init_children (xlator_t *this, br_private_t *priv)          while (trav) {                  child = &priv->children[i]; -                LOCK_INIT (&child->lock); +                pthread_mutex_init (&child->lock, NULL);                  child->witnessed = 0; -                br_set_scrub_state (child, BR_SCRUB_STATE_INACTIVE);                  br_set_child_state (child, BR_CHILD_STATE_DISCONNECTED);                  child->this = this; @@ -2003,6 +2014,9 @@ fini (xlator_t *this)          if (!priv->iamscrubber)                  br_fini_signer (this, priv); +        else +                (void) br_free_scrubber_monitor (this, priv); +          br_free_children (this, priv, priv->child_count);          this->private = NULL; @@ -2012,26 +2026,23 @@ fini (xlator_t *this)  }  static void -br_reconfigure_child (xlator_t *this, br_child_t *child) +br_reconfigure_monitor (xlator_t *this)  {          int32_t ret = 0; -        ret = br_scrub_state_machine (this, child); +        ret = br_scrub_state_machine (this);          if (ret) {                  gf_msg (this->name, GF_LOG_ERROR, 0,                          BRB_MSG_RESCHEDULE_SCRUBBER_FAILED, -                        "Could not reschedule scrubber for brick: %s. Scubbing " -                        "will continue according to old frequency.", -                        child->brick_path); +                        "Could not reschedule scrubber for the volume. Scrubbing " +                        "will continue according to old frequency.");          }  }  static int  br_reconfigure_scrubber (xlator_t *this, dict_t *options)  { -        int           i     = 0;          int32_t       ret   = -1; -        br_child_t   *child = NULL;          br_private_t *priv  = NULL;          priv = this->private; @@ -2046,32 +2057,11 @@ br_reconfigure_scrubber (xlator_t *this, dict_t *options)                  goto err;          /* change state for all _up_ subvolume(s) */ -        for (; i < priv->child_count; i++) { -                child = &priv->children[i]; - -                LOCK (&child->lock); -                { -                        if (_br_child_failed_conn (child)) { -                                gf_msg (this->name, GF_LOG_INFO, -                                        0, BRB_MSG_BRICK_INFO, -                                        "Scrubber for brick [%s] failed " -                                        "initialization, rescheduling is " -                                        "skipped", child->brick_path); -                                goto unblock; -                        } - -                        if (_br_is_child_connected (child)) -                                br_reconfigure_child (this, child); - -                        /** -                         * for the rest.. either the child is in initialization -                         * phase or is disconnected. either way, updated values -                         * would be reflected on successful connection. -                         */ -                } -        unblock: -                UNLOCK (&child->lock); +        pthread_mutex_lock (&priv->lock); +        { +                br_reconfigure_monitor (this);          } +        pthread_mutex_unlock (&priv->lock);   err:          return ret; diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.h b/xlators/features/bit-rot/src/bitd/bit-rot.h index 39ce790b4e6..835b9ca3bcc 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot.h +++ b/xlators/features/bit-rot/src/bitd/bit-rot.h @@ -31,6 +31,7 @@  #include "bit-rot-common.h"  #include "bit-rot-stub-mem-types.h" +#include "bit-rot-scrub-status.h"  #include <openssl/sha.h> @@ -68,18 +69,6 @@ struct br_scanfs {          unsigned int     entries;          struct list_head queued;          struct list_head ready; - -        /* scheduler */ -        uint32_t boot; -        gf_boolean_t kick; -        gf_boolean_t over; - -        br_scrub_state_t state;   /* current scrub state */ - -        pthread_mutex_t wakelock; -        pthread_cond_t  wakecond; - -        struct gf_tw_timer_list *timer;  };  /* just need three states to track child status */ @@ -91,7 +80,7 @@ typedef enum br_child_state {  } br_child_state_t;  struct br_child { -        gf_lock_t lock;               /* protects child state */ +        pthread_mutex_t lock;         /* protects child state */          char witnessed;               /* witnessed at least one succesfull                                           connection */          br_child_state_t c_state;     /* current state of this child */ @@ -116,6 +105,8 @@ struct br_child {          struct timeval tv;          struct br_scanfs fsscan;      /* per subvolume FS scanner */ + +        gf_boolean_t active_scrubbing; /* Actively scrubbing or not */  };  typedef struct br_child br_child_t; @@ -157,27 +148,42 @@ struct br_scrubber {          struct list_head scrublist;  }; -typedef struct br_obj_n_workers br_obj_n_workers_t; +struct br_monitor { +        gf_lock_t lock; +        pthread_t thread;         /* Monitor thread */ -typedef struct br_private br_private_t; +        gf_boolean_t  inited; +        pthread_mutex_t mutex; +        pthread_cond_t cond;      /* Thread starts and will be waiting on cond. +                                     First child which is up wakes this up */ -typedef void (*br_scrubbed_file_update) (br_private_t *priv); +        xlator_t *this; +        /* scheduler */ +        uint32_t boot; -struct br_scrub_stats { -        uint32_t       scrubbed_files;       /* Total number of scrubbed file */ +        int32_t active_child_count; /* Number of children currently scrubbing */ +        gf_boolean_t kick;          /* This variable tracks the scrubber is +                                     * kicked or not. Both 'kick' and +                                     * 'active_child_count' uses the same pair +                                     * of mutex-cond variable, i.e, wakelock and +                                     * wakecond. */ -        uint32_t       unsigned_files;       /* Total number of unsigned file */ +        pthread_mutex_t wakelock; +        pthread_cond_t  wakecond; -        uint32_t       scrub_duration;            /* Duration of last scrub */ +        gf_boolean_t done; +        pthread_mutex_t donelock; +        pthread_cond_t  donecond; -        char           last_scrub_time[1024];    /*last scrub completion time */ +        struct gf_tw_timer_list *timer; +        br_scrub_state_t state;   /* current scrub state */ +}; -        struct         timeval scrub_start_tv;   /* Scrubbing starting time*/ +typedef struct br_obj_n_workers br_obj_n_workers_t; -        struct         timeval scrub_end_tv;     /* Scrubbing finishing time */ +typedef struct br_private br_private_t; -        pthread_mutex_t  lock; -}; +typedef void (*br_scrubbed_file_update) (br_private_t *priv);  struct br_private {          pthread_mutex_t lock; @@ -214,6 +220,8 @@ struct br_private {          struct br_scrub_stats scrub_stat; /* statistics of scrub*/          struct br_scrubber fsscrub;       /* scrubbers for this subvolume */ + +        struct br_monitor scrub_monitor;  /* scrubber monitor */  };  struct br_object { @@ -233,7 +241,7 @@ struct br_object {  };  typedef struct br_object br_object_t; -typedef int32_t (br_scrub_ssm_call) (xlator_t *, br_child_t *); +typedef int32_t (br_scrub_ssm_call) (xlator_t *);  void  br_log_object (xlator_t *, char *, uuid_t, int32_t); @@ -264,6 +272,12 @@ _br_is_child_connected (br_child_t *child)  }  static inline int +_br_is_child_scrub_active (br_child_t *child) +{ +        return child->active_scrubbing; +} + +static inline int  _br_child_failed_conn (br_child_t *child)  {          return (child->c_state == BR_CHILD_STATE_CONNFAILED); @@ -277,10 +291,10 @@ _br_child_witnessed_connection (br_child_t *child)  /* scrub state */  static inline void -_br_child_set_scrub_state (br_child_t *child, br_scrub_state_t state) +_br_monitor_set_scrub_state (struct br_monitor *scrub_monitor, +                           br_scrub_state_t state)  { -        struct br_scanfs *fsscan = &child->fsscan; -        fsscan->state = state; +        scrub_monitor->state = state;  }  static inline br_scrub_event_t  | 
