summaryrefslogtreecommitdiffstats
path: root/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
diff options
context:
space:
mode:
authorVenky Shankar <vshankar@redhat.com>2015-06-09 10:02:11 +0530
committerRaghavendra Bhat <raghavendra@redhat.com>2015-06-25 05:57:22 -0700
commit913631a0edffaefc0fae73df46f80e19422ebed9 (patch)
tree9be7034a4cb95bb368d948c673278e6bd150d488 /xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
parentf1139ca4f8532c35f5f8024610ddc5a412f970e8 (diff)
features/bitrot: handle scrub states via state machine
A bunch of command line options for scrubber tempted the use of state machine to track current state of scrubber under various circumstances where the options could be in effect. Change-Id: Id614bb2e6af30a90d2391ea31ae0a3edeb4e0d69 BUG: 1231619 Signed-off-by: Venky Shankar <vshankar@redhat.com> Reviewed-on: http://review.gluster.org/11149 Tested-by: NetBSD Build System <jenkins@build.gluster.org> Reviewed-by: Raghavendra Bhat <raghavendra@redhat.com> Tested-by: Gluster Build System <jenkins@build.gluster.com>
Diffstat (limited to 'xlators/features/bit-rot/src/bitd/bit-rot-scrub.c')
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-scrub.c181
1 files changed, 149 insertions, 32 deletions
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
index d9c17260259..ee50c8c97b7 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
@@ -523,6 +523,33 @@ br_fsscanner_handle_entry (xlator_t *subvol,
return -1;
}
+int32_t
+br_fsscan_deactivate (xlator_t *this, br_child_t *child)
+{
+ int ret = 0;
+ br_private_t *priv = NULL;
+ br_scrub_state_t nstate = 0;
+ struct br_scanfs *fsscan = NULL;
+
+ priv = this->private;
+ fsscan = &child->fsscan;
+
+ ret = gf_tw_del_timer (priv->timer_wheel, fsscan->timer);
+ if (ret == 0) {
+ nstate = BR_SCRUB_STATE_STALLED;
+ gf_log (this->name, GF_LOG_INFO, "Brick [%s] is under active "
+ "scrubbing. Pausing scrub..", child->brick_path);
+ } else {
+ nstate = BR_SCRUB_STATE_PAUSED;
+ gf_log (this->name, GF_LOG_INFO,
+ "Scrubber paused [Brick: %s]", child->brick_path);
+ }
+
+ _br_child_set_scrub_state (child, nstate);
+
+ return 0;
+}
+
static inline void
br_fsscanner_log_time (xlator_t *this, br_child_t *child, const char *sfx)
{
@@ -558,22 +585,51 @@ br_fsscanner_wait_until_kicked (struct br_scanfs *fsscan)
pthread_cleanup_pop (0);
}
+static inline void
+br_fsscanner_entry_control (xlator_t *this, br_child_t *child)
+{
+ struct br_scanfs *fsscan = &child->fsscan;
+
+ LOCK (&child->lock);
+ {
+ if (fsscan->state == BR_SCRUB_STATE_PENDING)
+ fsscan->state = BR_SCRUB_STATE_ACTIVE;
+ br_fsscanner_log_time (this, child, "started");
+ }
+ UNLOCK (&child->lock);
+}
+
+static inline void
+br_fsscanner_exit_control (xlator_t *this, br_child_t *child)
+{
+ struct br_scanfs *fsscan = &child->fsscan;
+
+ LOCK (&child->lock);
+ {
+ fsscan->over = _gf_true;
+ br_fsscanner_log_time (this, child, "finished");
+
+ if (fsscan->state == BR_SCRUB_STATE_ACTIVE) {
+ (void) br_fsscan_activate (this, child);
+ } else {
+ gf_log (this->name, GF_LOG_INFO, "Brick [%s] waiting "
+ "to get rescheduled..", child->brick_path);
+ }
+ }
+ UNLOCK (&child->lock);
+}
+
void *
br_fsscanner (void *arg)
{
loc_t loc = {0,};
br_child_t *child = NULL;
xlator_t *this = NULL;
- br_private_t *priv = NULL;
struct br_scanfs *fsscan = NULL;
- struct br_scrubber *fsscrub = NULL;
child = arg;
this = child->this;
- priv = this->private;
-
fsscan = &child->fsscan;
- fsscrub = &priv->fsscrub;
THIS = this;
loc.inode = child->table->root;
@@ -581,8 +637,8 @@ br_fsscanner (void *arg)
while (1) {
br_fsscanner_wait_until_kicked (fsscan);
{
- /* log start time */
- br_fsscanner_log_time (this, child, "started");
+ /* precursor for scrub */
+ br_fsscanner_entry_control (this, child);
/* scrub */
(void) syncop_ftw (child->xl,
@@ -591,15 +647,21 @@ br_fsscanner (void *arg)
if (!list_empty (&fsscan->queued))
wait_for_scrubbing (this, fsscan);
- /* log finish time */
- br_fsscanner_log_time (this, child, "finished");
+ /* scrub exit criteria */
+ br_fsscanner_exit_control (this, child);
}
- br_fsscan_reschedule (this, child, fsscan, fsscrub, _gf_false);
}
return NULL;
}
+/**
+ * Keep this routine extremely simple and do not ever try to acquire
+ * child->lock here: it may lead to deadlock. Scrubber state is
+ * modified in br_fsscanner(). An intermediate state change to pause
+ * changes the scrub state to the _correct_ state by identifying a
+ * non-pending timer.
+ */
void
br_kickstart_scanner (struct gf_tw_timer_list *timer,
void *data, unsigned long calltime)
@@ -661,28 +723,38 @@ br_fsscan_calculate_timeout (uint32_t boot, uint32_t now, scrub_freq_t freq)
break;
case BR_FSSCRUB_FREQ_MONTHLY:
timo = br_fsscan_calculate_delta (boot, now, BR_SCRUB_MONTHLY);
+ break;
+ default:
+ timo = 0;
}
return timo;
}
int32_t
-br_fsscan_schedule (xlator_t *this, br_child_t *child,
- struct br_scanfs *fsscan, struct br_scrubber *fsscrub)
+br_fsscan_schedule (xlator_t *this, br_child_t *child)
{
uint32_t timo = 0;
br_private_t *priv = NULL;
struct timeval tv = {0,};
char timestr[1024] = {0,};
+ struct br_scanfs *fsscan = NULL;
+ struct br_scrubber *fsscrub = NULL;
struct gf_tw_timer_list *timer = NULL;
priv = this->private;
+ fsscan = &child->fsscan;
+ fsscrub = &priv->fsscrub;
(void) gettimeofday (&tv, NULL);
fsscan->boot = tv.tv_sec;
timo = br_fsscan_calculate_timeout (fsscan->boot,
fsscan->boot, fsscrub->frequency);
+ if (timo == 0) {
+ gf_log (this->name, GF_LOG_ERROR, "BUG: Zero schedule timeout");
+ goto error_return;
+ }
fsscan->timer = GF_CALLOC (1, sizeof (*fsscan->timer),
gf_br_stub_mt_br_scanner_freq_t);
@@ -695,7 +767,9 @@ br_fsscan_schedule (xlator_t *this, br_child_t *child,
timer->data = child;
timer->expires = timo;
timer->function = br_kickstart_scanner;
+
gf_tw_add_timer (priv->timer_wheel, timer);
+ _br_child_set_scrub_state (child, BR_SCRUB_STATE_PENDING);
gf_time_fmt (timestr, sizeof (timestr),
(fsscan->boot + timo), gf_timefmt_FT);
@@ -709,39 +783,76 @@ br_fsscan_schedule (xlator_t *this, br_child_t *child,
}
int32_t
-br_fsscan_reschedule (xlator_t *this,
- br_child_t *child, struct br_scanfs *fsscan,
- struct br_scrubber *fsscrub, gf_boolean_t pendingcheck)
+br_fsscan_activate (xlator_t *this, br_child_t *child)
{
- int32_t ret = 0;
- uint32_t timo = 0;
- char timestr[1024] = {0,};
- struct timeval now = {0,};
- br_private_t *priv = NULL;
+ uint32_t timo = 0;
+ char timestr[1024] = {0,};
+ struct timeval now = {0,};
+ br_private_t *priv = NULL;
+ struct br_scanfs *fsscan = NULL;
+ struct br_scrubber *fsscrub = NULL;
priv = this->private;
+ fsscan = &child->fsscan;
+ fsscrub = &priv->fsscrub;
(void) gettimeofday (&now, NULL);
timo = br_fsscan_calculate_timeout (fsscan->boot,
now.tv_sec, fsscrub->frequency);
+ if (timo == 0) {
+ gf_log (this->name, GF_LOG_ERROR, "BUG: Zero schedule timeout");
+ return -1;
+ }
+ fsscan->over = _gf_false;
gf_time_fmt (timestr, sizeof (timestr),
(now.tv_sec + timo), gf_timefmt_FT);
+ (void) gf_tw_mod_timer (priv->timer_wheel, fsscan->timer, timo);
- if (pendingcheck)
- ret = gf_tw_mod_timer_pending (priv->timer_wheel,
- fsscan->timer, timo);
- else
- ret = gf_tw_mod_timer (priv->timer_wheel, fsscan->timer, timo);
+ _br_child_set_scrub_state (child, BR_SCRUB_STATE_PENDING);
+ gf_log (this->name, GF_LOG_INFO, "Scrubbing for %s rescheduled to run "
+ "at %s", child->brick_path, timestr);
+
+ return 0;
+}
+
+int32_t
+br_fsscan_reschedule (xlator_t *this, br_child_t *child)
+{
+ int32_t ret = 0;
+ uint32_t timo = 0;
+ char timestr[1024] = {0,};
+ struct timeval now = {0,};
+ br_private_t *priv = NULL;
+ struct br_scanfs *fsscan = NULL;
+ struct br_scrubber *fsscrub = NULL;
+
+ priv = this->private;
+ fsscan = &child->fsscan;
+ fsscrub = &priv->fsscrub;
+
+ (void) gettimeofday (&now, NULL);
+ timo = br_fsscan_calculate_timeout (fsscan->boot,
+ now.tv_sec, fsscrub->frequency);
+ if (timo == 0) {
+ gf_log (this->name, GF_LOG_ERROR, "BUG: Zero schedule timeout");
+ return -1;
+ }
+
+ gf_time_fmt (timestr, sizeof (timestr),
+ (now.tv_sec + timo), gf_timefmt_FT);
- if (!ret && pendingcheck)
- gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_RUNNING,
+ fsscan->over = _gf_false;
+ ret = gf_tw_mod_timer_pending (priv->timer_wheel, fsscan->timer, timo);
+ if (ret == 0)
+ gf_log (this->name, GF_LOG_INFO,
"Scrubber for %s is currently running and would be "
"rescheduled after completion", child->brick_path);
- else
- gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_RESCHEDULED,
- "Scrubbing for %s rescheduled "
+ else {
+ _br_child_set_scrub_state (child, BR_SCRUB_STATE_PENDING);
+ gf_log (this->name, GF_LOG_INFO, "Scrubbing for %s rescheduled "
"to run at %s", child->brick_path, timestr);
+ }
return 0;
}
@@ -1126,7 +1237,8 @@ br_scrubber_handle_stall (xlator_t *this, br_private_t *priv,
}
static int32_t
-br_scrubber_handle_freq (xlator_t *this, br_private_t *priv, dict_t *options)
+br_scrubber_handle_freq (xlator_t *this, br_private_t *priv,
+ dict_t *options, gf_boolean_t scrubstall)
{
int32_t ret = -1;
char *tmp = NULL;
@@ -1139,6 +1251,9 @@ br_scrubber_handle_freq (xlator_t *this, br_private_t *priv, dict_t *options)
if (ret)
goto error_return;
+ if (scrubstall)
+ tmp = BR_SCRUB_STALLED;
+
if (strcasecmp (tmp, "hourly") == 0) {
frequency = BR_FSSCRUB_FREQ_HOURLY;
} else if (strcasecmp (tmp, "daily") == 0) {
@@ -1149,6 +1264,8 @@ br_scrubber_handle_freq (xlator_t *this, br_private_t *priv, dict_t *options)
frequency = BR_FSSCRUB_FREQ_BIWEEKLY;
} else if (strcasecmp (tmp, "monthly") == 0) {
frequency = BR_FSSCRUB_FREQ_MONTHLY;
+ } else if (strcasecmp (tmp, BR_SCRUB_STALLED) == 0) {
+ frequency = BR_FSSCRUB_FREQ_STALLED;
} else
goto error_return;
@@ -1200,7 +1317,7 @@ br_scrubber_handle_options (xlator_t *this, br_private_t *priv, dict_t *options)
if (ret)
goto error_return;
- ret = br_scrubber_handle_freq (this, priv, options);
+ ret = br_scrubber_handle_freq (this, priv, options, scrubstall);
if (ret)
goto error_return;