summaryrefslogtreecommitdiffstats
path: root/xlators/mgmt
diff options
context:
space:
mode:
authorDan Lambright <dlambrig@redhat.com>2015-09-18 00:49:06 -0400
committerDan Lambright <dlambrig@redhat.com>2015-10-10 06:00:04 -0700
commit672baab88fb7f32e844cd4be22e0924e4e0e83fc (patch)
tree8a05387b3508cc8f15ec448ab2e7b2baac897413 /xlators/mgmt
parenta4f982be9b21323038704069a56fb2448369d6a0 (diff)
cluster/tier: add watermarks and policy driver
This fix introduces infrastructure to support different policies for promotion and demotion. Currently the tier feature automatically promotes and demotes files periodically based on access. This is good for testing but too stringent for most real workloads. It makes it difficult to fully utilize a hot tier- data will be demoted before it is touched- its unlikely a 100GB hot SSD will have all its data touched in a window of time. A new parameter "mode" allows the user to pick promotion/demotion polcies. The "test mode" will be used for *.t and other general testing. This is the current mechanism. The "cache mode" introduces watermarks. The watermarks represent levels of data residing on the hot tier. "cache mode" policy: The % the hot tier is full is called P. Do not promote or demote more than D MB or F files. A random number [0-100] is called R. Rules for migration: if (P < watermark_low) don't demote, always promote. if (P >= watermark_low) && (P < watermark_hi) demote if R < P; promote if R > P. if (P > watermark_hi) always demote, don't promote. gluster volume set {vol} cluster.watermark-hi % gluster volume set {vol} cluster.watermark-low % gluster volume set {vol} cluster.tier-max-mb {D} gluster volume set {vol} cluster.tier-max-files {F} gluster volume set {vol} cluster.tier-mode {test|cache} Change-Id: I157f19667ec95aa1d53406041c1e3b073be127c2 BUG: 1257911 Signed-off-by: Dan Lambright <dlambrig@redhat.com> Reviewed-on: http://review.gluster.org/12039 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
Diffstat (limited to 'xlators/mgmt')
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-set.c135
1 files changed, 122 insertions, 13 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index c62f2d79c1f..8fdee165c68 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -19,6 +19,10 @@ validate_tier (glusterd_volinfo_t *volinfo, dict_t *dict, char *key,
int ret = 0;
xlator_t *this = NULL;
int origin_val = -1;
+ char *current_wm_hi = NULL;
+ char *current_wm_low = NULL;
+ uint64_t wm_hi = 0;
+ uint64_t wm_low = 0;
this = THIS;
GF_ASSERT (this);
@@ -34,12 +38,20 @@ validate_tier (glusterd_volinfo_t *volinfo, dict_t *dict, char *key,
goto out;
}
+ if (strstr (key, "cluster.tier-mode")) {
+ if (strcmp(value, "test") &&
+ strcmp(value, "cache")) {
+ ret = -1;
+ goto out;
+ }
+ goto out;
+ }
+
/*
- * All the volume set options for tier are expecting a positive
+ * Rest of the volume set options for tier are expecting a positive
* Integer. Change the function accordingly if this constraint is
* changed.
*/
-
ret = gf_string2int (value, &origin_val);
if (ret) {
snprintf (errstr, sizeof (errstr), "%s is not a compatible "
@@ -51,13 +63,55 @@ validate_tier (glusterd_volinfo_t *volinfo, dict_t *dict, char *key,
ret = -1;
goto out;
}
+ if (strstr (key, "watermark-hi") ||
+ strstr (key, "watermark-low")) {
+ if ((origin_val < 1) || (origin_val > 99)) {
+ snprintf (errstr, sizeof (errstr), "%s is not a compatible"
+ "value. %s expects a percentage from 1-99.",
+ value, key);
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INCOMPATIBLE_VALUE, "%s", errstr);
+ *op_errstr = gf_strdup (errstr);
+ ret = -1;
+ goto out;
+ }
+
+ if (strstr (key, "watermark-hi")) {
+ wm_hi = origin_val;
+ } else {
+ glusterd_volinfo_get (volinfo,
+ "cluster.watermark-hi",
+ &current_wm_hi);
+ gf_string2bytesize_uint64 (current_wm_hi,
+ &wm_hi);
+ }
- if (strstr ("cluster.tier-promote-frequency", key) ||
- strstr ("cluster.tier-demote-frequency", key)) {
+ if (strstr (key, "watermark-low")) {
+ wm_low = origin_val;
+ } else {
+ glusterd_volinfo_get (volinfo,
+ "cluster.watermark-low",
+ &current_wm_low);
+ gf_string2bytesize_uint64 (current_wm_low,
+ &wm_low);
+ }
+ if (wm_low > wm_hi) {
+ snprintf (errstr, sizeof (errstr), "lower watermark"
+ " cannot exceed upper watermark.");
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INCOMPATIBLE_VALUE, "%s", errstr);
+ *op_errstr = gf_strdup (errstr);
+ ret = -1;
+ goto out;
+ }
+ } else if (strstr (key, "tier-promote-frequency") ||
+ strstr (key, "tier-max-mb") ||
+ strstr (key, "tier-max-files") ||
+ strstr (key, "tier-demote-frequency")) {
if (origin_val < 1) {
snprintf (errstr, sizeof (errstr), "%s is not a "
- "compatible value. %s expects a positive "
- "integer value.",
+ " compatible value. %s expects a positive "
+ "integer value greater than 0.",
value, key);
gf_msg (this->name, GF_LOG_ERROR, EINVAL,
GD_MSG_INCOMPATIBLE_VALUE, "%s", errstr);
@@ -65,10 +119,12 @@ validate_tier (glusterd_volinfo_t *volinfo, dict_t *dict, char *key,
ret = -1;
goto out;
}
+
} else {
+ /* check write-freq-threshold and read-freq-threshold. */
if (origin_val < 0) {
snprintf (errstr, sizeof (errstr), "%s is not a "
- "compatible value. %s expects a non-negative"
+ "compatible value. %s expects a positive"
" integer value.",
value, key);
gf_msg (this->name, GF_LOG_ERROR, EINVAL,
@@ -1906,6 +1962,7 @@ struct volopt_map_entry glusterd_volopt_map[] = {
/* tier translator - global tunables */
{ .key = "cluster.write-freq-threshold",
.voltype = "cluster/tier",
+ .value = "0",
.option = "write-freq-threshold",
.op_version = GD_OP_VERSION_3_7_0,
.flags = OPT_FLAG_CLIENT_OPT,
@@ -1917,6 +1974,7 @@ struct volopt_map_entry glusterd_volopt_map[] = {
},
{ .key = "cluster.read-freq-threshold",
.voltype = "cluster/tier",
+ .value = "0",
.option = "read-freq-threshold",
.op_version = GD_OP_VERSION_3_7_0,
.flags = OPT_FLAG_CLIENT_OPT,
@@ -1928,23 +1986,74 @@ struct volopt_map_entry glusterd_volopt_map[] = {
},
{ .key = "cluster.tier-promote-frequency",
.voltype = "cluster/tier",
+ .value = "120",
.option = "tier-promote-frequency",
.op_version = GD_OP_VERSION_3_7_0,
.flags = OPT_FLAG_CLIENT_OPT,
.validate_fn = validate_tier,
- .description = "Defines how often the promotion should be triggered "
- "i.e. periodicity of promotion cycles. The value is in "
- "secs."
},
{ .key = "cluster.tier-demote-frequency",
.voltype = "cluster/tier",
+ .value = "120",
.option = "tier-demote-frequency",
.op_version = GD_OP_VERSION_3_7_0,
.flags = OPT_FLAG_CLIENT_OPT,
.validate_fn = validate_tier,
- .description = "Defines how often the demotion should be triggered "
- "i.e. periodicity of demotion cycles. The value is in "
- "secs."
+ },
+ { .key = "cluster.watermark-hi",
+ .voltype = "cluster/tier",
+ .value = "90",
+ .option = "watermark-hi",
+ .op_version = GD_OP_VERSION_3_7_6,
+ .flags = OPT_FLAG_CLIENT_OPT,
+ .validate_fn = validate_tier,
+ .description = "Upper % watermark for promotion. If hot tier fills"
+ " above this percentage, no promotion will happen and demotion will "
+ "happen with high probability."
+ },
+ { .key = "cluster.watermark-low",
+ .voltype = "cluster/tier",
+ .value = "75",
+ .option = "watermark-low",
+ .op_version = GD_OP_VERSION_3_7_6,
+ .flags = OPT_FLAG_CLIENT_OPT,
+ .validate_fn = validate_tier,
+ .description = "Lower % watermark. If hot tier is less "
+ "full than this, promotion will happen and demotion will not happen. "
+ "If greater than this, promotion/demotion will happen at a probability "
+ "relative to how full the hot tier is."
+ },
+ { .key = "cluster.tier-mode",
+ .voltype = "cluster/tier",
+ .option = "tier-mode",
+ .value = "test",
+ .op_version = GD_OP_VERSION_3_7_6,
+ .flags = OPT_FLAG_CLIENT_OPT,
+ .validate_fn = validate_tier,
+ .description = "Either 'test' or 'cache'. Test mode periodically"
+ " demotes or promotes files automatically based on access."
+ " Cache mode does so based on whether the cache is full or not,"
+ " as specified with watermarks."
+ },
+ { .key = "cluster.tier-max-mb",
+ .voltype = "cluster/tier",
+ .option = "tier-max-mb",
+ .value = "1000",
+ .op_version = GD_OP_VERSION_3_7_6,
+ .flags = OPT_FLAG_CLIENT_OPT,
+ .validate_fn = validate_tier,
+ .description = "The maximum number of MB that may be migrated"
+ " in any direction in a given cycle."
+ },
+ { .key = "cluster.tier-max-files",
+ .voltype = "cluster/tier",
+ .option = "tier-max-files",
+ .value = "5000",
+ .op_version = GD_OP_VERSION_3_7_6,
+ .flags = OPT_FLAG_CLIENT_OPT,
+ .validate_fn = validate_tier,
+ .description = "The maximum number of files that may be migrated"
+ " in any direction in a given cycle."
},
{ .key = "features.ctr-enabled",
.voltype = "features/changetimerecorder",