summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/dht/src
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/cluster/dht/src')
-rw-r--r--xlators/cluster/dht/src/dht-common.c6
-rw-r--r--xlators/cluster/dht/src/dht-common.h11
-rw-r--r--xlators/cluster/dht/src/dht-messages.h19
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c125
-rw-r--r--xlators/cluster/dht/src/dht-shared.c5
-rw-r--r--xlators/cluster/dht/src/tier.c68
6 files changed, 227 insertions, 7 deletions
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index 5e9d5c31748..e9985adfbf5 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -7663,10 +7663,14 @@ dht_notify (xlator_t *this, int event, void *data, ...)
cmd == GF_DEFRAG_CMD_STOP_DETACH_TIER)
gf_defrag_stop (defrag,
GF_DEFRAG_STATUS_STOPPED, output);
+ else if (cmd == GF_DEFRAG_CMD_PAUSE_TIER)
+ ret = gf_defrag_pause_tier (this, defrag);
+ else if (cmd == GF_DEFRAG_CMD_RESUME_TIER)
+ ret = gf_defrag_resume_tier (this, defrag);
}
unlock:
UNLOCK (&defrag->lock);
- return 0;
+ return ret;
break;
}
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index 26cf27a8676..6cc2959a831 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -298,7 +298,8 @@ enum gf_defrag_type {
GF_DEFRAG_CMD_STATUS_TIER = 1 + 6,
GF_DEFRAG_CMD_START_DETACH_TIER = 1 + 7,
GF_DEFRAG_CMD_STOP_DETACH_TIER = 1 + 8,
-
+ GF_DEFRAG_CMD_PAUSE_TIER = 1 + 9,
+ GF_DEFRAG_CMD_RESUME_TIER = 1 + 10,
};
typedef enum gf_defrag_type gf_defrag_type;
@@ -358,6 +359,8 @@ typedef struct gf_tier_conf {
int tier_demote_frequency;
uint64_t st_last_promoted_size;
uint64_t st_last_demoted_size;
+ int request_pause;
+ gf_boolean_t paused;
} gf_tier_conf_t;
struct gf_defrag_info_ {
@@ -987,6 +990,12 @@ int
gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict);
int
+gf_defrag_pause_tier (xlator_t *this, gf_defrag_info_t *defrag);
+
+int
+gf_defrag_resume_tier (xlator_t *this, gf_defrag_info_t *defrag);
+
+int
gf_defrag_start_detach_tier (gf_defrag_info_t *defrag);
int
diff --git a/xlators/cluster/dht/src/dht-messages.h b/xlators/cluster/dht/src/dht-messages.h
index c491600394c..de5062eb5e5 100644
--- a/xlators/cluster/dht/src/dht-messages.h
+++ b/xlators/cluster/dht/src/dht-messages.h
@@ -45,7 +45,7 @@
*/
#define GLFS_DHT_BASE GLFS_MSGID_COMP_DHT
-#define GLFS_DHT_NUM_MESSAGES 107
+#define GLFS_DHT_NUM_MESSAGES 109
#define GLFS_MSGID_END (GLFS_DHT_BASE + GLFS_DHT_NUM_MESSAGES + 1)
/* Messages with message IDs */
@@ -1007,5 +1007,22 @@
#define DHT_MSG_LOG_IPC_TIER_ERROR (GLFS_DHT_BASE + 107)
+/*
+ * @messageid 109108
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_TIER_PAUSED (GLFS_DHT_BASE + 108)
+
+/*
+ * @messageid 109108
+ * @diagnosis
+ * @recommendedaction None
+ */
+
+#define DHT_MSG_TIER_RESUME (GLFS_DHT_BASE + 109)
+
+
#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
#endif /* _DHT_MESSAGES_H_ */
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index 1e98142e9ec..3cb247f1865 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -731,7 +731,7 @@ out:
static inline int
__dht_rebalance_migrate_data (xlator_t *from, xlator_t *to, fd_t *src, fd_t *dst,
- uint64_t ia_size, int hole_exists)
+ uint64_t ia_size, int hole_exists)
{
int ret = 0;
int count = 0;
@@ -783,6 +783,68 @@ __dht_rebalance_migrate_data (xlator_t *from, xlator_t *to, fd_t *src, fd_t *dst
return ret;
}
+static int
+__tier_migrate_data (gf_defrag_info_t *defrag, xlator_t *from, xlator_t *to, fd_t *src, fd_t *dst,
+ uint64_t ia_size, int hole_exists)
+{
+ int ret = 0;
+ int count = 0;
+ off_t offset = 0;
+ struct iovec *vector = NULL;
+ struct iobref *iobref = NULL;
+ uint64_t total = 0;
+ size_t read_size = 0;
+
+ /* if file size is '0', no need to enter this loop */
+ while (total < ia_size) {
+
+ read_size = (((ia_size - total) > DHT_REBALANCE_BLKSIZE) ?
+ DHT_REBALANCE_BLKSIZE : (ia_size - total));
+
+ ret = syncop_readv (from, src, read_size,
+ offset, 0, &vector, &count, &iobref, NULL,
+ NULL);
+ if (!ret || (ret < 0)) {
+ break;
+ }
+
+ if (hole_exists)
+ ret = dht_write_with_holes (to, dst, vector, count,
+ ret, offset, iobref);
+ else
+ ret = syncop_writev (to, dst, vector, count,
+ offset, iobref, 0, NULL, NULL);
+ if (defrag->tier_conf.request_pause) {
+ gf_msg ("tier", GF_LOG_INFO, 0,
+ DHT_MSG_TIER_PAUSED,
+ "Migrate file paused");
+ ret = -1;
+ }
+
+ if (ret < 0) {
+ break;
+ }
+ offset += ret;
+ total += ret;
+
+ GF_FREE (vector);
+ if (iobref)
+ iobref_unref (iobref);
+ iobref = NULL;
+ vector = NULL;
+ }
+ if (iobref)
+ iobref_unref (iobref);
+ GF_FREE (vector);
+
+ if (ret >= 0)
+ ret = 0;
+ else
+ ret = -1;
+
+ return ret;
+}
+
static inline int
__dht_rebalance_open_src_file (xlator_t *from, xlator_t *to, loc_t *loc,
@@ -1255,8 +1317,14 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
/* All I/O happens in this function */
- ret = __dht_rebalance_migrate_data (from, to, src_fd, dst_fd,
- stbuf.ia_size, file_has_holes);
+ if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) {
+ ret = __tier_migrate_data (defrag, from, to, src_fd, dst_fd,
+ stbuf.ia_size, file_has_holes);
+ } else {
+ ret = __dht_rebalance_migrate_data (from, to, src_fd, dst_fd,
+ stbuf.ia_size, file_has_holes);
+ }
+
if (ret) {
gf_msg (this->name, GF_LOG_ERROR, 0,
DHT_MSG_MIGRATE_FILE_FAILED,
@@ -3420,6 +3488,57 @@ out:
}
int
+gf_defrag_pause_tier (xlator_t *this, gf_defrag_info_t *defrag)
+{
+ int poll = 0;
+ int ret = 0;
+ int usec_sleep = 100000; /* 1/10th of a sec */
+ int poll_max = 15; /* 15 times = wait at most 3/2 sec */
+
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED)
+ goto out;
+
+ /*
+ * Set flag requesting to pause tiering. Wait a finite time for
+ * tiering to actually stop as indicated by the "paused" boolean,
+ * before returning success or failure.
+ */
+ defrag->tier_conf.request_pause = 1;
+
+ for (poll = 0; poll < poll_max; poll++) {
+ if ((defrag->tier_conf.paused == _gf_true) ||
+ (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED)) {
+ goto out;
+ }
+
+ usleep (usec_sleep);
+ }
+
+ ret = -1;
+
+out:
+
+ gf_msg (this->name, GF_LOG_DEBUG, 0,
+ DHT_MSG_TIER_PAUSED,
+ "Pause tiering ret=%d", ret);
+
+ return ret;
+}
+
+int
+gf_defrag_resume_tier (xlator_t *this, gf_defrag_info_t *defrag)
+{
+ gf_msg (this->name, GF_LOG_DEBUG, 0,
+ DHT_MSG_TIER_RESUME,
+ "Resume tiering");
+
+ defrag->tier_conf.request_pause = 0;
+ defrag->tier_conf.paused = _gf_false;
+
+ return 0;
+}
+
+int
gf_defrag_start_detach_tier (gf_defrag_info_t *defrag)
{
defrag->cmd = GF_DEFRAG_CMD_START_DETACH_TIER;
diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c
index dc5211a55fd..75957d587e5 100644
--- a/xlators/cluster/dht/src/dht-shared.c
+++ b/xlators/cluster/dht/src/dht-shared.c
@@ -968,6 +968,11 @@ struct volume_options options[] = {
},
/* tier options */
+ { .key = {"tier-pause"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ },
+
{ .key = {"tier-promote-frequency"},
.type = GF_OPTION_TYPE_INT,
.default_value = "120",
diff --git a/xlators/cluster/dht/src/tier.c b/xlators/cluster/dht/src/tier.c
index efdc57fe644..ecb8709547c 100644
--- a/xlators/cluster/dht/src/tier.c
+++ b/xlators/cluster/dht/src/tier.c
@@ -311,6 +311,13 @@ tier_migrate_using_query_file (void *_args)
per_file_status = 0;
per_link_status = 0;
+ if (defrag->tier_conf.request_pause) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_LOG_TIER_STATUS,
+ "Tiering paused. Exiting tier_migrate_using_query_file");
+ break;
+ }
+
memset (gfid_str, 0, UUID_CANONICAL_FORM_LEN+1);
memset (query_record->_link_info_str, 0, DB_QUERY_RECORD_SIZE);
@@ -372,6 +379,14 @@ tier_migrate_using_query_file (void *_args)
/* Per link of file */
while (token_str != NULL) {
+ if (defrag->tier_conf.request_pause) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_LOG_TIER_STATUS,
+ "Tiering paused. "
+ "Exiting tier_migrate_using_query_file");
+ goto abort;
+ }
+
link_str = gf_strdup (token_str);
if (!link_info) {
@@ -489,6 +504,14 @@ tier_migrate_using_query_file (void *_args)
gf_uuid_copy (loc.gfid, loc.inode->gfid);
+ if (defrag->tier_conf.request_pause) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ DHT_MSG_LOG_TIER_STATUS,
+ "Tiering paused. "
+ "Exiting tier_migrate_using_query_file");
+ goto abort;
+ }
+
ret = syncop_setxattr (this, &loc, migrate_data, 0,
NULL, NULL);
if (ret) {
@@ -1348,6 +1371,11 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag)
goto out;
}
+ if (defrag->tier_conf.request_pause)
+ defrag->tier_conf.paused = _gf_true;
+ else
+ defrag->tier_conf.paused = _gf_false;
+
sleep(1);
if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
@@ -1370,6 +1398,11 @@ tier_start (xlator_t *this, gf_defrag_info_t *defrag)
goto out;
}
+ if ((defrag->tier_conf.paused) ||
+ (defrag->tier_conf.request_pause))
+ continue;
+
+
/* To have proper synchronization amongst all
* brick holding nodes, so that promotion and demotions
* start atomicly w.r.t promotion/demotion frequency
@@ -1660,6 +1693,7 @@ tier_init (xlator_t *this)
gf_defrag_info_t *defrag = NULL;
char *voldir = NULL;
char *mode = NULL;
+ char *paused = NULL;
ret = dht_init (this);
if (ret) {
@@ -1778,7 +1812,15 @@ tier_init (xlator_t *this)
defrag->tier_conf.mode = ret;
}
- ret = gf_asprintf (&voldir, "%s/%s",
+ defrag->tier_conf.request_pause = 0;
+
+ ret = dict_get_str (this->options,
+ "tier-pause", &paused);
+
+ if (paused && strcmp (paused, "on") == 0)
+ defrag->tier_conf.request_pause = 1;
+
+ ret = gf_asprintf(&voldir, "%s/%s",
DEFAULT_VAR_RUN_DIRECTORY,
this->name);
if (ret < 0)
@@ -1846,6 +1888,9 @@ tier_reconfigure (xlator_t *this, dict_t *options)
gf_defrag_info_t *defrag = NULL;
char *mode = NULL;
int migrate_mb = 0;
+ gf_boolean_t req_pause = _gf_false;
+ int ret = 0;
+
conf = this->private;
if (conf->defrag) {
@@ -1887,6 +1932,27 @@ tier_reconfigure (xlator_t *this, dict_t *options)
GF_OPTION_RECONF ("tier-max-files",
defrag->tier_conf.max_migrate_files, options,
int32, out);
+
+ GF_OPTION_RECONF ("tier-pause",
+ req_pause, options,
+ bool, out);
+
+ if (req_pause == _gf_true) {
+ ret = gf_defrag_pause_tier (this, defrag);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_ERROR,
+ "pause tier failed on reconfigure");
+ }
+ } else {
+ ret = gf_defrag_resume_tier (this, defrag);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_LOG_TIER_ERROR,
+ "resume tier failed on reconfigure");
+ }
+ }
+
}
out: