From 0b3e4130b576c11156d6327e4cc3c9310a74c143 Mon Sep 17 00:00:00 2001 From: Kotresh HR Date: Fri, 5 Aug 2016 09:03:22 +0530 Subject: feature/bitrot: Ondemand scrub option for bitrot The bitrot scrubber takes 'hourly/daily/biweekly/monthly' as the values for 'scrub-frequency'. There is no way to schedule the scrubbing when the admin wants it. Ondemand scrubbing brings in the new option 'ondemand' with which the admin can start scrubbing ondemand. It starts the scrubbing immediately. Ondemand scrubbing is successful only if the scrubber is in 'Active (Idle)' (waiting for it's next frequency cycle to start scrubbing). It is not entertained when the scrubber is in 'Paused' or already running. Here is the command line syntax. gluster volume bitrot scrub ondemand Change-Id: I84c28904367eed827a7dae8d6a535c14b28e9f4d BUG: 1366195 Signed-off-by: Kotresh HR Reviewed-on: http://review.gluster.org/15111 Smoke: Gluster Build System NetBSD-regression: NetBSD Build System CentOS-regression: Gluster Build System Reviewed-by: Venky Shankar --- xlators/features/bit-rot/src/bitd/bit-rot-scrub.c | 50 ++++++++++++++++++++++- xlators/features/bit-rot/src/bitd/bit-rot-scrub.h | 1 + xlators/features/bit-rot/src/bitd/bit-rot-ssm.c | 25 ++++++++---- xlators/features/bit-rot/src/bitd/bit-rot-ssm.h | 3 +- xlators/features/bit-rot/src/bitd/bit-rot.c | 29 ++++++++++++- xlators/features/bit-rot/src/bitd/bit-rot.h | 2 +- xlators/mgmt/glusterd/src/glusterd-bitrot.c | 29 +++++++++++++ xlators/mgmt/glusterd/src/glusterd-op-sm.c | 16 ++++++-- xlators/mgmt/glusterd/src/glusterd-rpc-ops.c | 1 + xlators/mgmt/glusterd/src/glusterd-syncop.c | 4 ++ xlators/mgmt/glusterd/src/glusterd.h | 1 + 11 files changed, 144 insertions(+), 17 deletions(-) (limited to 'xlators') diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c index 1db38e43ee8..72bdb843488 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c +++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c @@ -863,6 +863,7 @@ br_fsscan_calculate_delta (uint32_t times) return times; } +#define BR_SCRUB_ONDEMAND (1) #define BR_SCRUB_MINUTE (60) #define BR_SCRUB_HOURLY (60 * 60) #define BR_SCRUB_DAILY (1 * 24 * 60 * 60) @@ -1040,6 +1041,53 @@ br_fsscan_reschedule (xlator_t *this) return 0; } +int32_t +br_fsscan_ondemand (xlator_t *this) +{ + int32_t ret = 0; + uint32_t timo = 0; + char timestr[1024] = {0,}; + struct timeval now = {0,}; + br_private_t *priv = NULL; + struct br_scrubber *fsscrub = NULL; + struct br_monitor *scrub_monitor = NULL; + + priv = this->private; + fsscrub = &priv->fsscrub; + scrub_monitor = &priv->scrub_monitor; + + if (!fsscrub->frequency_reconf) + return 0; + + (void) gettimeofday (&now, NULL); + + timo = BR_SCRUB_ONDEMAND; + + gf_time_fmt (timestr, sizeof (timestr), + (now.tv_sec + timo), gf_timefmt_FT); + + pthread_mutex_lock (&scrub_monitor->donelock); + { + scrub_monitor->done = _gf_false; + } + pthread_mutex_unlock (&scrub_monitor->donelock); + + ret = gf_tw_mod_timer_pending (priv->timer_wheel, scrub_monitor->timer, + timo); + if (ret == 0) + gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO, + "Scrubber is currently running and would be " + "rescheduled after completion"); + else { + _br_monitor_set_scrub_state (scrub_monitor, + BR_SCRUB_STATE_PENDING); + gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO, + "Ondemand Scrubbing scheduled to run at %s", timestr); + } + + return 0; +} + #define BR_SCRUB_THREAD_SCALE_LAZY 0 #define BR_SCRUB_THREAD_SCALE_NORMAL 0.4 #define BR_SCRUB_THREAD_SCALE_AGGRESSIVE 1.0 @@ -1867,7 +1915,7 @@ br_monitor_thread (void *arg) /* this needs to be serialized with reconfigure() */ pthread_mutex_lock (&priv->lock); { - ret = br_scrub_state_machine (this); + ret = br_scrub_state_machine (this, _gf_false); } pthread_mutex_unlock (&priv->lock); if (ret) { diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h index 63169068ed4..8cc88ec153e 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h +++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h @@ -20,6 +20,7 @@ int32_t br_fsscan_schedule (xlator_t *); int32_t br_fsscan_reschedule (xlator_t *); int32_t br_fsscan_activate (xlator_t *); int32_t br_fsscan_deactivate (xlator_t *); +int32_t br_fsscan_ondemand (xlator_t *); int32_t br_scrubber_handle_options (xlator_t *, br_private_t *, dict_t *); diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-ssm.c b/xlators/features/bit-rot/src/bitd/bit-rot-ssm.c index d304fc804ee..af887a1ff8b 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot-ssm.c +++ b/xlators/features/bit-rot/src/bitd/bit-rot-ssm.c @@ -84,16 +84,22 @@ br_scrub_ssm_state_stall (xlator_t *this) static br_scrub_ssm_call * br_scrub_ssm[BR_SCRUB_MAXSTATES][BR_SCRUB_MAXEVENTS] = { - {br_fsscan_schedule, br_scrub_ssm_state_ipause}, /* INACTIVE */ - {br_fsscan_reschedule, br_fsscan_deactivate}, /* PENDING */ - {br_scrub_ssm_noop, br_scrub_ssm_state_stall}, /* ACTIVE */ - {br_fsscan_activate, br_scrub_ssm_noop}, /* PAUSED */ - {br_fsscan_schedule, br_scrub_ssm_noop}, /* IPAUSED */ - {br_scrub_ssm_state_active, br_scrub_ssm_noop}, /* STALLED */ + /* INACTIVE */ + {br_fsscan_schedule, br_scrub_ssm_state_ipause, br_scrub_ssm_noop}, + /* PENDING */ + {br_fsscan_reschedule, br_fsscan_deactivate, br_fsscan_ondemand}, + /* ACTIVE */ + {br_scrub_ssm_noop, br_scrub_ssm_state_stall, br_scrub_ssm_noop}, + /* PAUSED */ + {br_fsscan_activate, br_scrub_ssm_noop, br_scrub_ssm_noop}, + /* IPAUSED */ + {br_fsscan_schedule, br_scrub_ssm_noop, br_scrub_ssm_noop}, + /* STALLED */ + {br_scrub_ssm_state_active, br_scrub_ssm_noop, br_scrub_ssm_noop}, }; int32_t -br_scrub_state_machine (xlator_t *this) +br_scrub_state_machine (xlator_t *this, gf_boolean_t scrub_ondemand) { br_private_t *priv = NULL; br_scrub_ssm_call *call = NULL; @@ -107,7 +113,10 @@ br_scrub_state_machine (xlator_t *this) scrub_monitor = &priv->scrub_monitor; currstate = scrub_monitor->state; - event = _br_child_get_scrub_event (fsscrub); + if (scrub_ondemand) + event = BR_SCRUB_EVENT_ONDEMAND; + else + event = _br_child_get_scrub_event (fsscrub); call = br_scrub_ssm[currstate][event]; return call (this); diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-ssm.h b/xlators/features/bit-rot/src/bitd/bit-rot-ssm.h index 936ee4d837c..8609477180b 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot-ssm.h +++ b/xlators/features/bit-rot/src/bitd/bit-rot-ssm.h @@ -26,11 +26,12 @@ typedef enum br_scrub_state { typedef enum br_scrub_event { BR_SCRUB_EVENT_SCHEDULE = 0, BR_SCRUB_EVENT_PAUSE, + BR_SCRUB_EVENT_ONDEMAND, BR_SCRUB_MAXEVENTS, } br_scrub_event_t; struct br_monitor; -int32_t br_scrub_state_machine (xlator_t *); +int32_t br_scrub_state_machine (xlator_t *, gf_boolean_t); #endif /* __BIT_ROT_SSM_H__ */ diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c index ca3fc273e9f..882ac1da6b4 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot.c +++ b/xlators/features/bit-rot/src/bitd/bit-rot.c @@ -1534,7 +1534,6 @@ _br_qchild_event (xlator_t *this, br_child_t *child, br_child_handler *call) int br_scrubber_status_get (xlator_t *this, dict_t **dict) { - int ret = -1; br_private_t *priv = NULL; struct br_scrub_stats *scrub_stats = NULL; @@ -1600,9 +1599,11 @@ notify (xlator_t *this, int32_t event, void *data, ...) br_private_t *priv = NULL; dict_t *output = NULL; va_list ap; + struct br_monitor *scrub_monitor = NULL; subvol = (xlator_t *)data; priv = this->private; + scrub_monitor = &priv->scrub_monitor; gf_msg_trace (this->name, 0, "Notification received: %d", event); @@ -1676,6 +1677,30 @@ notify (xlator_t *this, int32_t event, void *data, ...) ret = br_scrubber_status_get (this, &output); gf_msg_debug (this->name, 0, "returning %d", ret); break; + + case GF_EVENT_SCRUB_ONDEMAND: + gf_log (this->name, GF_LOG_INFO, "BitRot scrub ondemand " + "called"); + + if (scrub_monitor->state != BR_SCRUB_STATE_PENDING) + return -2; + + /* Needs synchronization with reconfigure thread */ + pthread_mutex_lock (&priv->lock); + { + ret = br_scrub_state_machine (this, _gf_true); + } + pthread_mutex_unlock (&priv->lock); + + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + BRB_MSG_RESCHEDULE_SCRUBBER_FAILED, + "Could not schedule ondemand scrubbing. " + "Scrubbing will continue according to " + "old frequency."); + } + gf_msg_debug (this->name, 0, "returning %d", ret); + break; default: default_notify (this, event, data); } @@ -2045,7 +2070,7 @@ br_reconfigure_monitor (xlator_t *this) { int32_t ret = 0; - ret = br_scrub_state_machine (this); + ret = br_scrub_state_machine (this, _gf_false); if (ret) { gf_msg (this->name, GF_LOG_ERROR, 0, BRB_MSG_RESCHEDULE_SCRUBBER_FAILED, diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.h b/xlators/features/bit-rot/src/bitd/bit-rot.h index b5448f76d52..c2c0c225792 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot.h +++ b/xlators/features/bit-rot/src/bitd/bit-rot.h @@ -297,7 +297,7 @@ static inline br_scrub_event_t _br_child_get_scrub_event (struct br_scrubber *fsscrub) { return (fsscrub->frequency == BR_FSSCRUB_FREQ_STALLED) - ? BR_SCRUB_EVENT_PAUSE : BR_SCRUB_EVENT_SCHEDULE; + ? BR_SCRUB_EVENT_PAUSE : BR_SCRUB_EVENT_SCHEDULE; } int32_t diff --git a/xlators/mgmt/glusterd/src/glusterd-bitrot.c b/xlators/mgmt/glusterd/src/glusterd-bitrot.c index 6e91106c8e5..8c5ddfd7896 100644 --- a/xlators/mgmt/glusterd/src/glusterd-bitrot.c +++ b/xlators/mgmt/glusterd/src/glusterd-bitrot.c @@ -138,6 +138,34 @@ __glusterd_handle_bitrot (rpcsvc_request_t *req) } } + if (type == GF_BITROT_CMD_SCRUB_ONDEMAND) { + /* Backward compatibility handling for scrub status command*/ + if (conf->op_version < GD_OP_VERSION_3_9_0) { + snprintf (msg, sizeof (msg), "Cannot execute command. " + "The cluster is operating at version %d. " + "Bitrot scrub ondemand command unavailable in " + "this version", conf->op_version); + ret = -1; + goto out; + } + + ret = dict_get_str (dict, "scrub-value", &scrub); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_DICT_GET_FAILED, + "Failed to get scrub value."); + ret = -1; + goto out; + } + + if (!strncmp (scrub, "ondemand", strlen ("ondemand"))) { + ret = glusterd_op_begin_synctask (req, + GD_OP_SCRUB_ONDEMAND, + dict); + goto out; + } + } + ret = glusterd_op_begin_synctask (req, GD_OP_BITROT, dict); out: @@ -572,6 +600,7 @@ glusterd_op_bitrot (dict_t *dict, char **op_errstr, dict_t *rsp_dict) if (ret) goto out; case GF_BITROT_CMD_SCRUB_STATUS: + case GF_BITROT_CMD_SCRUB_ONDEMAND: break; default: diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index bae9be872f4..b4eb8b13c66 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -714,6 +714,7 @@ glusterd_node_op_build_payload (glusterd_op_t op, gd1_mgmt_brick_op_req **req, break; case GD_OP_SCRUB_STATUS: + case GD_OP_SCRUB_ONDEMAND: brick_req = GF_CALLOC (1, sizeof(*brick_req), gf_gld_mt_mop_brick_req_t); if (!brick_req) @@ -4131,6 +4132,7 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx) case GD_OP_BARRIER: case GD_OP_BITROT: case GD_OP_SCRUB_STATUS: + case GD_OP_SCRUB_ONDEMAND: { do_common = _gf_true; } @@ -4725,6 +4727,7 @@ glusterd_op_modify_op_ctx (glusterd_op_t op, void *ctx) */ case GD_OP_DEFRAG_BRICK_VOLUME: case GD_OP_SCRUB_STATUS: + case GD_OP_SCRUB_ONDEMAND: ret = dict_get_int32 (op_ctx, "count", &count); if (ret) { gf_msg_debug (this->name, 0, @@ -4772,10 +4775,11 @@ glusterd_op_modify_op_ctx (glusterd_op_t op, void *ctx) GD_MSG_CONVERSION_FAILED, "Failed uuid to hostname conversion"); - /* Since Both rebalance and bitrot scrub status are going to - * use same code path till here, we should break in case - * of scrub status */ - if (op == GD_OP_SCRUB_STATUS) { + /* Since Both rebalance and bitrot scrub status/ondemand + * are going to use same code path till here, we should + * break in case of scrub status. + */ + if (op == GD_OP_SCRUB_STATUS || op == GD_OP_SCRUB_ONDEMAND) { break; } @@ -5442,6 +5446,7 @@ glusterd_need_brick_op (glusterd_op_t op) case GD_OP_DEFRAG_BRICK_VOLUME: case GD_OP_HEAL_VOLUME: case GD_OP_SCRUB_STATUS: + case GD_OP_SCRUB_ONDEMAND: ret = _gf_true; break; default: @@ -5713,6 +5718,7 @@ glusterd_op_stage_validate (glusterd_op_t op, dict_t *dict, char **op_errstr, case GD_OP_BITROT: case GD_OP_SCRUB_STATUS: + case GD_OP_SCRUB_ONDEMAND: ret = glusterd_op_stage_bitrot (dict, op_errstr, rsp_dict); break; @@ -5838,6 +5844,7 @@ glusterd_op_commit_perform (glusterd_op_t op, dict_t *dict, char **op_errstr, case GD_OP_BITROT: case GD_OP_SCRUB_STATUS: + case GD_OP_SCRUB_ONDEMAND: ret = glusterd_op_bitrot (dict, op_errstr, rsp_dict); break; @@ -7288,6 +7295,7 @@ glusterd_op_bricks_select (glusterd_op_t op, dict_t *dict, char **op_errstr, ret = glusterd_bricks_select_snap (dict, op_errstr, selected); break; case GD_OP_SCRUB_STATUS: + case GD_OP_SCRUB_ONDEMAND: ret = glusterd_bricks_select_scrub (dict, op_errstr, selected); break; default: diff --git a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c index 890ccf06cdc..9cf6fda5152 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c @@ -143,6 +143,7 @@ glusterd_op_send_cli_response (glusterd_op_t op, int32_t op_ret, case GD_OP_BARRIER: case GD_OP_BITROT: case GD_OP_SCRUB_STATUS: + case GD_OP_SCRUB_ONDEMAND: { /*nothing specific to be done*/ break; diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c index 7c5721f25d0..7aeda77a59d 100644 --- a/xlators/mgmt/glusterd/src/glusterd-syncop.c +++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c @@ -309,6 +309,10 @@ glusterd_syncop_aggr_rsp_dict (glusterd_op_t op, dict_t *aggr, dict_t *rsp) case GD_OP_SCRUB_STATUS: ret = glusterd_volume_bitrot_scrub_use_rsp_dict (aggr, rsp); break; + + case GD_OP_SCRUB_ONDEMAND: + break; + default: break; } diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index 77eded4f29c..c07fe42fe61 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -117,6 +117,7 @@ typedef enum glusterd_op_ { GD_OP_DETACH_TIER, GD_OP_TIER_MIGRATE, GD_OP_SCRUB_STATUS, + GD_OP_SCRUB_ONDEMAND, GD_OP_MAX, } glusterd_op_t; -- cgit