summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan Lambright <dlambrig@redhat.com>2015-02-25 16:11:23 -0500
committerVijay Bellur <vbellur@redhat.com>2015-03-19 06:32:28 -0700
commit6f71bc02df5bd177c2f5dbf4e54b2af1525ab979 (patch)
treea676a70da909dedebc21dca408fafc9dee9d5810
parent99586305f66d6b5e81542139d84fbf111ace2554 (diff)
glusterd: CLI commands to create and manage tiered volumes.
A tiered volume is a normal volume with some number of new bricks representing "hot" storage. The "hot" bricks can be attached or detached dynamically to a normal volume. When this happens, a new graph is constructed. The root of the new graph is an instance of the tier translator. One subvolume of the tier translator leads to the old volume, and another leads to the new hot bricks. attach-tier <VOLNAME> [<replica> <COUNT>] <NEW-BRICK> ... [force] volume detach-tier <VOLNAME> [replica <COUNT>] <BRICK> ... <start|stop|status|commit|force> gluster volume rebalance <volume> tier start gluster volume rebalance <volume> tier stop gluster volume rebalance <volume> tier status The "tier start" CLI command starts a server side daemon. The daemon initiates file level migration based on caching policies. The daemon's status can be monitored and stopped. Note development on the "tier status" command is incomplete. It will be added in a subsequent patch. When the "hot" storage is detached, the tier translator is removed from the graph and the tiered volume reverts to its original state as described in the volume's info file. For more background and design see the feature page [1]. [1] http://www.gluster.org/community/documentation/index.php/Features/data-classification Change-Id: Ic8042ce37327b850b9e199236e5be3dae95d2472 BUG: 1194753 Signed-off-by: Dan Lambright <dlambrig@redhat.com> Reviewed-on: http://review.gluster.org/9753 Reviewed-by: Vijay Bellur <vbellur@redhat.com> Tested-by: Vijay Bellur <vbellur@redhat.com>
-rw-r--r--cli/src/cli-cmd-parser.c23
-rw-r--r--cli/src/cli-cmd-volume.c144
-rw-r--r--cli/src/cli-rpc-ops.c215
-rw-r--r--rpc/rpc-lib/src/protocol-common.h2
-rw-r--r--rpc/xdr/src/cli1-xdr.x10
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-brick-ops.c135
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handler.c2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.c10
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rebalance.c15
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.c83
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.h8
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c1
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.c181
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-set.c26
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h9
15 files changed, 821 insertions, 43 deletions
diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c
index 5520c9e46b1..54a57008457 100644
--- a/cli/src/cli-cmd-parser.c
+++ b/cli/src/cli-cmd-parser.c
@@ -355,6 +355,10 @@ cli_validate_disperse_volume (char *word, gf1_cluster_type type,
cli_err ("striped-replicated-dispersed volume "
"is not supported");
goto out;
+ case GF_CLUSTER_TYPE_TIER:
+ cli_err ("tier-dispersed volume is not "
+ "supported");
+ goto out;
case GF_CLUSTER_TYPE_STRIPE:
cli_err ("striped-dispersed volume is not "
"supported");
@@ -490,6 +494,11 @@ cli_cmd_volume_create_parse (struct cli_state *state, const char **words,
case GF_CLUSTER_TYPE_STRIPE:
type = GF_CLUSTER_TYPE_STRIPE_REPLICATE;
break;
+ case GF_CLUSTER_TYPE_TIER:
+ cli_err ("replicated-tiered volume is not "
+ "supported");
+ goto out;
+ break;
case GF_CLUSTER_TYPE_DISPERSE:
cli_err ("replicated-dispersed volume is not "
"supported");
@@ -529,6 +538,10 @@ cli_cmd_volume_create_parse (struct cli_state *state, const char **words,
cli_err ("striped-dispersed volume is not "
"supported");
goto out;
+ case GF_CLUSTER_TYPE_TIER:
+ cli_err ("striped-tier volume is not "
+ "supported");
+ goto out;
}
if (wordcount < (index + 2)) {
ret = -1;
@@ -3384,6 +3397,16 @@ cli_cmd_volume_defrag_parse (const char **words, int wordcount,
if (strcmp (words[3], "start") && strcmp (words[3], "stop") &&
strcmp (words[3], "status"))
goto out;
+ } else if ((strcmp (words[3], "tier") == 0) &&
+ (strcmp (words[4], "start") == 0)) {
+ volname = (char *) words[2];
+ cmd = GF_DEFRAG_CMD_START_TIER;
+ goto done;
+ } else if ((strcmp (words[3], "tier") == 0) &&
+ (strcmp (words[4], "status") == 0)) {
+ volname = (char *) words[2];
+ cmd = GF_DEFRAG_CMD_STATUS_TIER;
+ goto done;
} else {
if (strcmp (words[3], "fix-layout") &&
strcmp (words[3], "start"))
diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c
index 6c950da4e97..3098d74491c 100644
--- a/cli/src/cli-cmd-volume.c
+++ b/cli/src/cli-cmd-volume.c
@@ -840,6 +840,142 @@ out:
return ret;
}
+int
+cli_cmd_volume_attach_tier_cbk (struct cli_state *state,
+ struct cli_cmd_word *word, const char **words,
+ int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ gf_answer_t answer = GF_ANSWER_NO;
+ cli_local_t *local = NULL;
+
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+
+ ret = cli_cmd_volume_add_brick_parse (words, wordcount, &options);
+ if (ret) {
+ cli_usage_out (word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ if (state->mode & GLUSTER_MODE_WIGNORE) {
+ ret = dict_set_int32 (options, "force", _gf_true);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to set force "
+ "option");
+ goto out;
+ }
+ }
+
+ ret = dict_set_int32 (options, "attach-tier", 1);
+ if (ret)
+ goto out;
+
+ ret = dict_set_int32 (options, "type", GF_CLUSTER_TYPE_TIER);
+ if (ret)
+ goto out;
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_ATTACH_TIER];
+
+ CLI_LOCAL_INIT (local, words, frame, options);
+
+ if (proc->fn) {
+ ret = proc->fn (frame, THIS, options);
+ }
+
+out:
+ if (ret) {
+ cli_cmd_sent_status_get (&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out ("attach-tier failed");
+ }
+
+ CLI_STACK_DESTROY (frame);
+
+ return ret;
+}
+
+int
+cli_cmd_volume_detach_tier_cbk (struct cli_state *state,
+ struct cli_cmd_word *word, const char **words,
+ int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ gf_answer_t answer = GF_ANSWER_NO;
+ cli_local_t *local = NULL;
+ int need_question = 0;
+
+ const char *question = "Removing tier can result in data loss. "
+ "Do you want to Continue?";
+
+ if (wordcount != 3)
+ goto out;
+
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+
+ options = dict_new ();
+ if (!options)
+ goto out;
+
+ ret = dict_set_int32 (options, "force", 1);
+ if (ret)
+ goto out;
+
+ ret = dict_set_int32 (options, "command", GF_OP_CMD_DETACH);
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (options, "volname", (char *)words[2]);
+ if (ret)
+ goto out;
+
+ ret = dict_set_int32 (options, "count", 1);
+ if (ret)
+ goto out;
+
+ if (!(state->mode & GLUSTER_MODE_SCRIPT) && need_question) {
+ /* we need to ask question only in case of 'commit or force' */
+ answer = cli_cmd_get_confirmation (state, question);
+ if (GF_ANSWER_NO == answer) {
+ ret = 0;
+ goto out;
+ }
+ }
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_DETACH_TIER];
+
+ CLI_LOCAL_INIT (local, words, frame, options);
+
+ if (proc->fn) {
+ ret = proc->fn (frame, THIS, options);
+ }
+
+out:
+ if (ret) {
+ cli_cmd_sent_status_get (&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out ("Volume detach-tier failed");
+ }
+
+ CLI_STACK_DESTROY (frame);
+
+ return ret;
+}
+
static int
gf_cli_create_auxiliary_mount (char *volname)
{
@@ -2435,6 +2571,14 @@ struct cli_cmd volume_cmds[] = {
cli_cmd_volume_rename_cbk,
"rename volume <VOLNAME> to <NEW-VOLNAME>"},*/
+ { "volume attach-tier <VOLNAME> [<replica COUNT>] <NEW-BRICK>...",
+ cli_cmd_volume_attach_tier_cbk,
+ "attach tier to volume <VOLNAME>"},
+
+ { "volume detach-tier <VOLNAME>",
+ cli_cmd_volume_detach_tier_cbk,
+ "detach tier from volume <VOLNAME>"},
+
{ "volume add-brick <VOLNAME> [<stripe|replica> <COUNT>] <NEW-BRICK> ... [force]",
cli_cmd_volume_add_brick_cbk,
"add brick to volume <VOLNAME>"},
diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c
index 6e66e377ed5..c9b01694436 100644
--- a/cli/src/cli-rpc-ops.c
+++ b/cli/src/cli-rpc-ops.c
@@ -61,6 +61,7 @@ char *cli_vol_type_str[] = {"Distribute",
"Replicate",
"Striped-Replicate",
"Disperse",
+ "Tier",
"Distributed-Stripe",
"Distributed-Replicate",
"Distributed-Striped-Replicate",
@@ -739,8 +740,9 @@ xml_output:
vol_type = type;
// Distributed (stripe/replicate/stripe-replica) setups
- if ((type > 0) && ( dist_count < brick_count))
- vol_type = type + 4;
+ if ((type != GF_CLUSTER_TYPE_TIER) && (type > 0) &&
+ (dist_count < brick_count))
+ vol_type = type + 5;
cli_out ("Volume Name: %s", volname);
cli_out ("Type: %s", cli_vol_type_str[vol_type]);
@@ -1441,6 +1443,134 @@ out:
}
int
+gf_cli_print_tier_status (dict_t *dict, enum gf_task_types task_type)
+{
+ int ret = -1;
+ int count = 0;
+ int i = 1;
+ char key[256] = {0,};
+ gf_defrag_status_t status_rcd = GF_DEFRAG_STATUS_NOT_STARTED;
+ uint64_t files = 0;
+ uint64_t size = 0;
+ uint64_t lookup = 0;
+ char *node_name = NULL;
+ uint64_t failures = 0;
+ uint64_t skipped = 0;
+ double elapsed = 0;
+ char *status_str = NULL;
+ char *size_str = NULL;
+
+ ret = dict_get_int32 (dict, "count", &count);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "count not set");
+ goto out;
+ }
+
+
+ cli_out ("%40s %16s %13s %13s %13s %13s %20s %18s", "Node",
+ "Rebalanced-files", "size", "scanned", "failures", "skipped",
+ "status", "run time in secs");
+ cli_out ("%40s %16s %13s %13s %13s %13s %20s %18s", "---------",
+ "-----------", "-----------", "-----------", "-----------",
+ "-----------", "------------", "--------------");
+ for (i = 1; i <= count; i++) {
+ /* Reset the variables to prevent carryover of values */
+ node_name = NULL;
+ files = 0;
+ size = 0;
+ lookup = 0;
+ skipped = 0;
+ status_str = NULL;
+ elapsed = 0;
+
+ /* Check if status is NOT_STARTED, and continue early */
+ memset (key, 0, 256);
+ snprintf (key, 256, "status-%d", i);
+ ret = dict_get_int32 (dict, key, (int32_t *)&status_rcd);
+ if (ret) {
+ gf_log ("cli", GF_LOG_TRACE, "failed to get status");
+ goto out;
+ }
+ if (GF_DEFRAG_STATUS_NOT_STARTED == status_rcd)
+ continue;
+
+
+ snprintf (key, 256, "node-name-%d", i);
+ ret = dict_get_str (dict, key, &node_name);
+ if (ret)
+ gf_log ("cli", GF_LOG_TRACE, "failed to get node-name");
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "files-%d", i);
+ ret = dict_get_uint64 (dict, key, &files);
+ if (ret)
+ gf_log ("cli", GF_LOG_TRACE,
+ "failed to get file count");
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "size-%d", i);
+ ret = dict_get_uint64 (dict, key, &size);
+ if (ret)
+ gf_log ("cli", GF_LOG_TRACE,
+ "failed to get size of xfer");
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "lookups-%d", i);
+ ret = dict_get_uint64 (dict, key, &lookup);
+ if (ret)
+ gf_log ("cli", GF_LOG_TRACE,
+ "failed to get lookedup file count");
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "failures-%d", i);
+ ret = dict_get_uint64 (dict, key, &failures);
+ if (ret)
+ gf_log ("cli", GF_LOG_TRACE,
+ "failed to get failures count");
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "skipped-%d", i);
+ ret = dict_get_uint64 (dict, key, &skipped);
+ if (ret)
+ gf_log ("cli", GF_LOG_TRACE,
+ "failed to get skipped count");
+
+ /* For remove-brick include skipped count into failure count*/
+ if (task_type != GF_TASK_TYPE_REBALANCE) {
+ failures += skipped;
+ skipped = 0;
+ }
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "run-time-%d", i);
+ ret = dict_get_double (dict, key, &elapsed);
+ if (ret)
+ gf_log ("cli", GF_LOG_TRACE, "failed to get run-time");
+
+ /* Check for array bound */
+ if (status_rcd >= GF_DEFRAG_STATUS_MAX)
+ status_rcd = GF_DEFRAG_STATUS_MAX;
+
+ status_str = cli_vol_task_status_str[status_rcd];
+ size_str = gf_uint64_2human_readable(size);
+ if (size_str) {
+ cli_out ("%40s %16"PRIu64 " %13s" " %13"PRIu64 " %13"
+ PRIu64" %13"PRIu64 " %20s %18.2f", node_name,
+ files, size_str, lookup, failures, skipped,
+ status_str, elapsed);
+ } else {
+ cli_out ("%40s %16"PRIu64 " %13"PRIu64 " %13"PRIu64
+ " %13"PRIu64" %13"PRIu64 " %20s %18.2f",
+ node_name, files, size, lookup, failures,
+ skipped, status_str, elapsed);
+ }
+ GF_FREE(size_str);
+ }
+out:
+ return ret;
+}
+
+int
gf_cli_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
@@ -1504,7 +1634,9 @@ gf_cli_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov,
}
}
- if (!((cmd == GF_DEFRAG_CMD_STOP) || (cmd == GF_DEFRAG_CMD_STATUS)) &&
+ if (!((cmd == GF_DEFRAG_CMD_STOP) ||
+ (cmd == GF_DEFRAG_CMD_STATUS) ||
+ (cmd == GF_DEFRAG_CMD_STATUS_TIER)) &&
!(global_state->mode & GLUSTER_MODE_XML)) {
/* All other possibilites are about starting a rebalance */
ret = dict_get_str (dict, GF_REBALANCE_TID_KEY, &task_id_str);
@@ -1577,7 +1709,12 @@ gf_cli_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov,
goto out;
}
- ret = gf_cli_print_rebalance_status (dict, GF_TASK_TYPE_REBALANCE);
+ if (cmd == GF_DEFRAG_CMD_STATUS_TIER)
+ ret = gf_cli_print_tier_status (dict, GF_TASK_TYPE_REBALANCE);
+ else
+ ret = gf_cli_print_rebalance_status (dict,
+ GF_TASK_TYPE_REBALANCE);
+
if (ret)
gf_log ("cli", GF_LOG_ERROR,
"Failed to print rebalance status");
@@ -3616,7 +3753,7 @@ int32_t
gf_cli_reset_volume (call_frame_t *frame, xlator_t *this,
void *data)
{
- gf_cli_req req = {{0,}};
+ gf_cli_req req = {{0,} };
int ret = 0;
dict_t *dict = NULL;
@@ -3665,7 +3802,7 @@ int32_t
gf_cli_set_volume (call_frame_t *frame, xlator_t *this,
void *data)
{
- gf_cli_req req = {{0,}};
+ gf_cli_req req = {{0,} };
int ret = 0;
dict_t *dict = NULL;
@@ -3691,7 +3828,7 @@ int32_t
gf_cli_add_brick (call_frame_t *frame, xlator_t *this,
void *data)
{
- gf_cli_req req = {{0,}};
+ gf_cli_req req = {{0,} };
int ret = 0;
dict_t *dict = NULL;
char *volname = NULL;
@@ -3726,6 +3863,66 @@ out:
}
int32_t
+gf_cli_attach_tier (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gf_cli_req req = {{0,} };
+ int ret = 0;
+ dict_t *dict = NULL;
+
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
+
+ dict = data;
+
+ if (ret)
+ goto out;
+
+ ret = cli_to_glusterd (&req, frame, gf_cli_add_brick_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_ATTACH_TIER, this,
+ cli_rpc_prog, NULL);
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+
+ GF_FREE (req.dict.dict_val);
+ return ret;
+}
+
+int32_t
+gf_cli_detach_tier (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gf_cli_req req = {{0,} };
+ int ret = 0;
+ dict_t *dict = NULL;
+ char *volname = NULL;
+
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
+
+ dict = data;
+
+ ret = cli_to_glusterd (&req, frame, gf_cli_remove_brick_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_DETACH_TIER, this,
+ cli_rpc_prog, NULL);
+
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+
+ GF_FREE (req.dict.dict_val);
+
+ return ret;
+}
+
+
+int32_t
gf_cli_remove_brick (call_frame_t *frame, xlator_t *this,
void *data)
{
@@ -9965,7 +10162,9 @@ struct rpc_clnt_procedure gluster_cli_actors[GLUSTER_CLI_MAXVALUE] = {
[GLUSTER_CLI_BARRIER_VOLUME] = {"BARRIER VOLUME", gf_cli_barrier_volume},
[GLUSTER_CLI_GANESHA] = {"GANESHA", gf_cli_ganesha},
[GLUSTER_CLI_GET_VOL_OPT] = {"GET_VOL_OPT", gf_cli_get_vol_opt},
- [GLUSTER_CLI_BITROT] = {"BITROT", gf_cli_bitrot}
+ [GLUSTER_CLI_BITROT] = {"BITROT", gf_cli_bitrot},
+ [GLUSTER_CLI_ATTACH_TIER] = {"ATTACH_TIER", gf_cli_attach_tier},
+ [GLUSTER_CLI_DETACH_TIER] = {"DETACH_TIER", gf_cli_detach_tier}
};
struct rpc_clnt_program cli_prog = {
diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h
index bf68366f5dd..60697b8fa66 100644
--- a/rpc/rpc-lib/src/protocol-common.h
+++ b/rpc/rpc-lib/src/protocol-common.h
@@ -183,6 +183,8 @@ enum gluster_cli_procnum {
GLUSTER_CLI_GET_VOL_OPT,
GLUSTER_CLI_GANESHA,
GLUSTER_CLI_BITROT,
+ GLUSTER_CLI_ATTACH_TIER,
+ GLUSTER_CLI_DETACH_TIER,
GLUSTER_CLI_MAXVALUE,
};
diff --git a/rpc/xdr/src/cli1-xdr.x b/rpc/xdr/src/cli1-xdr.x
index 925700699ab..72581b0c5d5 100644
--- a/rpc/xdr/src/cli1-xdr.x
+++ b/rpc/xdr/src/cli1-xdr.x
@@ -3,7 +3,9 @@
GF_DEFRAG_CMD_STOP,
GF_DEFRAG_CMD_STATUS,
GF_DEFRAG_CMD_START_LAYOUT_FIX,
- GF_DEFRAG_CMD_START_FORCE /* used by remove-brick data migration */
+ GF_DEFRAG_CMD_START_FORCE, /* used by remove-brick data migration */
+ GF_DEFRAG_CMD_START_TIER,
+ GF_DEFRAG_CMD_STATUS_TIER
};
enum gf_defrag_status_t {
@@ -24,7 +26,8 @@
GF_CLUSTER_TYPE_STRIPE,
GF_CLUSTER_TYPE_REPLICATE,
GF_CLUSTER_TYPE_STRIPE_REPLICATE,
- GF_CLUSTER_TYPE_DISPERSE
+ GF_CLUSTER_TYPE_DISPERSE,
+ GF_CLUSTER_TYPE_TIER
};
enum gf1_cli_replace_op {
@@ -53,7 +56,8 @@ enum gf_bitrot_type {
GF_OP_CMD_COMMIT,
GF_OP_CMD_STOP,
GF_OP_CMD_STATUS,
- GF_OP_CMD_COMMIT_FORCE
+ GF_OP_CMD_COMMIT_FORCE,
+ GF_OP_CMD_DETACH
};
enum gf_quota_type {
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
index fd4618bb78c..fa5e533f135 100644
--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
@@ -288,6 +288,10 @@ gd_rmbr_validate_replica_count (glusterd_volinfo_t *volinfo,
int replica_nodes = 0;
switch (volinfo->type) {
+ case GF_CLUSTER_TYPE_TIER:
+ ret = 1;
+ goto out;
+
case GF_CLUSTER_TYPE_NONE:
case GF_CLUSTER_TYPE_STRIPE:
case GF_CLUSTER_TYPE_DISPERSE:
@@ -367,7 +371,6 @@ __glusterd_handle_add_brick (rpcsvc_request_t *req)
int32_t replica_count = 0;
int32_t stripe_count = 0;
int type = 0;
-
this = THIS;
GF_ASSERT(this);
@@ -454,6 +457,17 @@ __glusterd_handle_add_brick (rpcsvc_request_t *req)
total_bricks = volinfo->brick_count + brick_count;
+ if (dict_get (dict, "attach-tier")) {
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
+ snprintf (err_str, sizeof (err_str),
+ "Volume %s is already a tier.", volname);
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ ret = -1;
+ goto out;
+ }
+ goto brick_val;
+ }
+
if (!stripe_count && !replica_count) {
if (volinfo->type == GF_CLUSTER_TYPE_NONE)
goto brick_val;
@@ -639,6 +653,40 @@ subvol_matcher_destroy (int *subvols)
GF_FREE (subvols);
}
+static int
+glusterd_set_detach_bricks(dict_t *dict, glusterd_volinfo_t *volinfo)
+{
+ char key[256] = {0,};
+ char value[256] = {0,};
+ int brick_num = 0;
+ int hot_brick_num = 0;
+ glusterd_brickinfo_t *brickinfo;
+ int ret = 0;
+
+ /* cold tier bricks at tail of list so use reverse iteration */
+ cds_list_for_each_entry_reverse (brickinfo, &volinfo->bricks,
+ brick_list) {
+ brick_num++;
+ if (brick_num > volinfo->tier_info.cold_brick_count) {
+ hot_brick_num++;
+ sprintf (key, "brick%d", hot_brick_num);
+ snprintf (value, 256, "%s:%s",
+ brickinfo->hostname,
+ brickinfo->path);
+
+ ret = dict_set_str (dict, key, strdup(value));
+ if (ret)
+ break;
+ }
+ }
+
+ ret = dict_set_int32(dict, "count", hot_brick_num);
+ if (ret)
+ return -1;
+
+ return hot_brick_num;
+}
+
int
__glusterd_handle_remove_brick (rpcsvc_request_t *req)
{
@@ -794,7 +842,8 @@ __glusterd_handle_remove_brick (rpcsvc_request_t *req)
/* Do not allow remove-brick if the bricks given is less than
the replica count or stripe count */
- if (!replica_count && (volinfo->type != GF_CLUSTER_TYPE_NONE)) {
+ if (!replica_count && (volinfo->type != GF_CLUSTER_TYPE_NONE) &&
+ (volinfo->type != GF_CLUSTER_TYPE_TIER)) {
if (volinfo->dist_leaf_count &&
(count % volinfo->dist_leaf_count)) {
snprintf (err_str, sizeof (err_str), "Remove brick "
@@ -813,6 +862,7 @@ __glusterd_handle_remove_brick (rpcsvc_request_t *req)
goto out;
}
+
strcpy (brick_list, " ");
if ((volinfo->type != GF_CLUSTER_TYPE_NONE) &&
@@ -822,6 +872,9 @@ __glusterd_handle_remove_brick (rpcsvc_request_t *req)
goto out;
}
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER)
+ count = glusterd_set_detach_bricks(dict, volinfo);
+
while ( i <= count) {
snprintf (key, sizeof (key), "brick%d", i);
ret = dict_get_str (dict, key, &brick);
@@ -836,6 +889,7 @@ __glusterd_handle_remove_brick (rpcsvc_request_t *req)
ret = glusterd_volume_brickinfo_get_by_brick(brick, volinfo,
&brickinfo);
+
if (ret) {
snprintf (err_str, sizeof (err_str), "Incorrect brick "
"%s for volume %s", brick, volname);
@@ -883,7 +937,8 @@ out:
}
- GF_FREE (brick_list);
+ if (brick_list)
+ GF_FREE (brick_list);
subvol_matcher_destroy (subvols);
free (cli_req.dict.dict_val); //its malloced by xdr
@@ -1081,7 +1136,11 @@ glusterd_op_perform_add_bricks (glusterd_volinfo_t *volinfo, int32_t count,
ret = glusterd_resolve_brick (brickinfo);
if (ret)
goto out;
- if (stripe_count || replica_count) {
+
+ /* hot tier bricks are added to head of brick list */
+ if (dict_get (dict, "attach-tier")) {
+ cds_list_add (&brickinfo->brick_list, &volinfo->bricks);
+ } else if (stripe_count || replica_count) {
add_brick_at_right_order (brickinfo, volinfo, (i - 1),
stripe_count, replica_count);
} else {
@@ -1674,6 +1733,7 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr)
break;
+ case GF_OP_CMD_DETACH:
case GF_OP_CMD_COMMIT_FORCE:
break;
}
@@ -1767,6 +1827,35 @@ glusterd_remove_brick_migrate_cbk (glusterd_volinfo_t *volinfo,
return ret;
}
+static int
+glusterd_op_perform_attach_tier (dict_t *dict,
+ glusterd_volinfo_t *volinfo,
+ int count,
+ char *bricks)
+{
+ int ret = 0;
+ int replica_count = 0;
+
+ /*
+ * Store the new (cold) tier's structure until the graph is generated.
+ * If there is a failure before the graph is generated the
+ * structure will revert to its original state.
+ */
+ volinfo->tier_info.cold_dist_leaf_count = volinfo->dist_leaf_count;
+ volinfo->tier_info.cold_type = volinfo->type;
+ volinfo->tier_info.cold_brick_count = volinfo->brick_count;
+ volinfo->tier_info.cold_replica_count = volinfo->replica_count;
+ volinfo->tier_info.cold_disperse_count = volinfo->disperse_count;
+
+ ret = dict_get_int32 (dict, "replica-count", &replica_count);
+ if (!ret)
+ volinfo->tier_info.hot_replica_count = replica_count;
+ else
+ volinfo->tier_info.hot_replica_count = 1;
+ volinfo->tier_info.hot_brick_count = count;
+
+ return ret;
+}
int
glusterd_op_add_brick (dict_t *dict, char **op_errstr)
@@ -1778,6 +1867,7 @@ glusterd_op_add_brick (dict_t *dict, char **op_errstr)
xlator_t *this = NULL;
char *bricks = NULL;
int32_t count = 0;
+ int32_t replica_count = 0;
this = THIS;
GF_ASSERT (this);
@@ -1812,6 +1902,11 @@ glusterd_op_add_brick (dict_t *dict, char **op_errstr)
goto out;
}
+ if (dict_get(dict, "attach-tier")) {
+ gf_log (THIS->name, GF_LOG_DEBUG, "Adding tier");
+ glusterd_op_perform_attach_tier (dict, volinfo, count, bricks);
+ }
+
ret = glusterd_op_perform_add_bricks (volinfo, count, bricks, dict);
if (ret) {
gf_log ("", GF_LOG_ERROR, "Unable to add bricks");
@@ -1829,6 +1924,14 @@ out:
return ret;
}
+static void
+glusterd_op_perform_detach_tier (glusterd_volinfo_t *volinfo)
+{
+ volinfo->type = volinfo->tier_info.cold_type;
+ volinfo->replica_count = volinfo->tier_info.cold_replica_count;
+ volinfo->disperse_count = volinfo->tier_info.cold_disperse_count;
+}
+
int
glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
{
@@ -1959,6 +2062,10 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
force = 1;
break;
+ case GF_OP_CMD_DETACH:
+ glusterd_op_perform_detach_tier (volinfo);
+ /* fall through */
+
case GF_OP_CMD_COMMIT_FORCE:
if (volinfo->decommission_in_progress) {
@@ -2051,7 +2158,12 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
volinfo->sub_count = replica_count;
volinfo->dist_leaf_count = glusterd_get_dist_leaf_count (volinfo);
- if (replica_count == 1) {
+ /*
+ * volinfo->type and sub_count have already been set for
+ * volumes undergoing a detach operation, they should not
+ * be modified here.
+ */
+ if ((replica_count == 1) && (cmd != GF_OP_CMD_DETACH)) {
if (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) {
volinfo->type = GF_CLUSTER_TYPE_NONE;
/* backward compatibility */
@@ -2224,3 +2336,16 @@ out:
gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
return ret;
}
+
+int
+glusterd_handle_attach_tier (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __glusterd_handle_add_brick);
+}
+
+int
+glusterd_handle_detach_tier (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_remove_brick);
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
index 77fa96400ba..a41b36b9715 100644
--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
+++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
@@ -4817,6 +4817,8 @@ rpcsvc_actor_t gd_svc_cli_actors[GLUSTER_CLI_MAXVALUE] = {
[GLUSTER_CLI_DELETE_VOLUME] = { "DELETE_VOLUME", GLUSTER_CLI_DELETE_VOLUME, glusterd_handle_cli_delete_volume, NULL, 0, DRC_NA},
[GLUSTER_CLI_GET_VOLUME] = { "GET_VOLUME", GLUSTER_CLI_GET_VOLUME, glusterd_handle_cli_get_volume, NULL, 0, DRC_NA},
[GLUSTER_CLI_ADD_BRICK] = { "ADD_BRICK", GLUSTER_CLI_ADD_BRICK, glusterd_handle_add_brick, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_ATTACH_TIER] = { "ATTACH_TIER", GLUSTER_CLI_ATTACH_TIER, glusterd_handle_attach_tier, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_DETACH_TIER] = { "DETACH_TIER", GLUSTER_CLI_DETACH_TIER, glusterd_handle_detach_tier, NULL, 0, DRC_NA},
[GLUSTER_CLI_REPLACE_BRICK] = { "REPLACE_BRICK", GLUSTER_CLI_REPLACE_BRICK, glusterd_handle_replace_brick, NULL, 0, DRC_NA},
[GLUSTER_CLI_REMOVE_BRICK] = { "REMOVE_BRICK", GLUSTER_CLI_REMOVE_BRICK, glusterd_handle_remove_brick, NULL, 0, DRC_NA},
[GLUSTER_CLI_LOG_ROTATE] = { "LOG FILENAME", GLUSTER_CLI_LOG_ROTATE, glusterd_handle_log_rotate, NULL, 0, DRC_NA},
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
index 75756518f28..c5fcb7698e5 100644
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
@@ -448,6 +448,7 @@ glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickin
char name[1024] = {0,};
gf_xl_afr_op_t heal_op = GF_SHD_OP_INVALID;
xlator_t *this = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
this = THIS;
GF_ASSERT (this);
@@ -514,7 +515,11 @@ glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickin
ret = dict_get_str (dict, "volname", &volname);
if (ret)
goto out;
- snprintf (name, 1024, "%s-dht",volname);
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER)
+ snprintf (name, 1024, "tier-dht");
+ else
+ snprintf (name, 1024, "%s-dht", volname);
brick_req->name = gf_strdup (name);
break;
@@ -5159,6 +5164,7 @@ glusterd_bricks_select_remove_brick (dict_t *dict, char **op_errstr,
while ( i <= count) {
snprintf (key, 256, "brick%d", i);
+
ret = dict_get_str (dict, key, &brick);
if (ret) {
gf_log ("glusterd", GF_LOG_ERROR, "Unable to get brick");
@@ -5167,8 +5173,10 @@ glusterd_bricks_select_remove_brick (dict_t *dict, char **op_errstr,
ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo,
&brickinfo);
+
if (ret)
goto out;
+
if (glusterd_is_brick_started (brickinfo)) {
pending_node = GF_CALLOC (1, sizeof (*pending_node),
gf_gld_mt_pending_node_t);
diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
index ba67df436ff..0d66571300f 100644
--- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c
+++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
@@ -278,6 +278,13 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,
"--xlator-option", "*replicate*.readdir-failover=off",
"--xlator-option", "*dht.readdir-optimize=on",
NULL);
+
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
+ runner_add_arg (&runner, "--xlator-option");
+ runner_argprintf (&runner,
+ "*tier-dht.xattr-name=trusted.tier-gfid");
+ }
+
runner_add_arg (&runner, "--xlator-option");
runner_argprintf ( &runner, "*dht.rebalance-cmd=%d",cmd);
runner_add_arg (&runner, "--xlator-option");
@@ -487,6 +494,7 @@ __glusterd_handle_defrag_volume (rpcsvc_request_t *req)
goto out;
if ((cmd == GF_DEFRAG_CMD_STATUS) ||
+ (cmd == GF_DEFRAG_CMD_STATUS_TIER) ||
(cmd == GF_DEFRAG_CMD_STOP)) {
ret = glusterd_op_begin (req, GD_OP_DEFRAG_BRICK_VOLUME,
dict, msg, sizeof (msg));
@@ -556,6 +564,7 @@ glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr)
switch (cmd) {
case GF_DEFRAG_CMD_START:
case GF_DEFRAG_CMD_START_LAYOUT_FIX:
+ case GF_DEFRAG_CMD_START_TIER:
/* Check if the connected clients are all of version
* glusterfs-3.6 and higher. This is needed to prevent some data
* loss issues that could occur when older clients are connected
@@ -690,7 +699,9 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
/* Set task-id, if available, in op_ctx dict for operations other than
* start
*/
- if (cmd == GF_DEFRAG_CMD_STATUS || cmd == GF_DEFRAG_CMD_STOP) {
+ if (cmd == GF_DEFRAG_CMD_STATUS ||
+ cmd == GF_DEFRAG_CMD_STOP ||
+ cmd == GF_DEFRAG_CMD_STATUS_TIER) {
if (!uuid_is_null (volinfo->rebal.rebalance_id)) {
ctx = glusterd_op_get_ctx ();
if (!ctx) {
@@ -720,6 +731,7 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
case GF_DEFRAG_CMD_START:
case GF_DEFRAG_CMD_START_LAYOUT_FIX:
case GF_DEFRAG_CMD_START_FORCE:
+ case GF_DEFRAG_CMD_START_TIER:
/* Reset defrag status to 'NOT STARTED' whenever a
* remove-brick/rebalance command is issued to remove
* stale information from previous run.
@@ -791,6 +803,7 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
break;
case GF_DEFRAG_CMD_STATUS:
+ case GF_DEFRAG_CMD_STATUS_TIER:
break;
default:
break;
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c
index 5b2b14503ae..5696229572d 100644
--- a/xlators/mgmt/glusterd/src/glusterd-store.c
+++ b/xlators/mgmt/glusterd/src/glusterd-store.c
@@ -812,6 +812,63 @@ out:
" for volume %s", volinfo->volname);
return ret;
}
+
+int32_t
+glusterd_volume_write_tier_details (int fd, glusterd_volinfo_t *volinfo)
+{
+ int32_t ret = -1;
+ char buf[PATH_MAX] = "";
+
+ if (volinfo->type != GF_CLUSTER_TYPE_TIER) {
+ ret = 0;
+ goto out;
+ }
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->tier_info.cold_brick_count);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_COLD_COUNT, buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d",
+ volinfo->tier_info.cold_replica_count);
+ ret = gf_store_save_value (fd,
+ GLUSTERD_STORE_KEY_COLD_REPLICA_COUNT,
+ buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->tier_info.cold_disperse_count);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_COLD_DISPERSE_COUNT,
+ buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->tier_info.hot_brick_count);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_HOT_COUNT,
+ buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->tier_info.hot_replica_count);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_HOT_REPLICA_COUNT,
+ buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->tier_info.hot_type);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_HOT_TYPE, buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->tier_info.cold_type);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_COLD_TYPE, buf);
+ if (ret)
+ goto out;
+
+ out:
+ return ret;
+}
+
int32_t
glusterd_volume_exclude_options_write (int fd, glusterd_volinfo_t *volinfo)
{
@@ -917,6 +974,8 @@ glusterd_volume_exclude_options_write (int fd, glusterd_volinfo_t *volinfo)
goto out;
}
+ ret = glusterd_volume_write_tier_details (fd, volinfo);
+
ret = glusterd_volume_write_snap_details (fd, volinfo);
out:
@@ -2725,6 +2784,27 @@ glusterd_store_update_volinfo (glusterd_volinfo_t *volinfo)
strlen (GLUSTERD_STORE_KEY_PARENT_VOLNAME))) {
strncpy (volinfo->parent_volname, value,
sizeof(volinfo->parent_volname) - 1);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_COLD_COUNT,
+ strlen (key))) {
+ volinfo->tier_info.cold_brick_count = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_COLD_REPLICA_COUNT,
+ strlen (key))) {
+ volinfo->tier_info.cold_replica_count = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_COLD_DISPERSE_COUNT,
+ strlen (key))) {
+ volinfo->tier_info.cold_disperse_count = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_HOT_COUNT,
+ strlen (key))) {
+ volinfo->tier_info.cold_brick_count = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_HOT_REPLICA_COUNT,
+ strlen (key))) {
+ volinfo->tier_info.cold_replica_count = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_HOT_TYPE,
+ strlen (key))) {
+ volinfo->tier_info.hot_type = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_COLD_TYPE,
+ strlen (key))) {
+ volinfo->tier_info.cold_type = atoi (value);
} else {
if (is_key_glusterd_hooks_friendly (key)) {
@@ -2809,6 +2889,9 @@ glusterd_store_update_volinfo (glusterd_volinfo_t *volinfo)
GF_ASSERT (volinfo->redundancy_count > 0);
break;
+ case GF_CLUSTER_TYPE_TIER:
+ break;
+
default:
GF_ASSERT (0);
break;
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h
index afa96be77cf..45ed86a4163 100644
--- a/xlators/mgmt/glusterd/src/glusterd-store.h
+++ b/xlators/mgmt/glusterd/src/glusterd-store.h
@@ -64,6 +64,14 @@ typedef enum glusterd_store_ver_ac_{
#define GLUSTERD_STORE_KEY_VOL_OP_VERSION "op-version"
#define GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION "client-op-version"
+#define GLUSTERD_STORE_KEY_COLD_TYPE "cold_type"
+#define GLUSTERD_STORE_KEY_COLD_COUNT "cold_count"
+#define GLUSTERD_STORE_KEY_COLD_REPLICA_COUNT "cold_replica_count"
+#define GLUSTERD_STORE_KEY_COLD_DISPERSE_COUNT "cold_disperse_count"
+#define GLUSTERD_STORE_KEY_HOT_TYPE "hot_type"
+#define GLUSTERD_STORE_KEY_HOT_COUNT "hot_count"
+#define GLUSTERD_STORE_KEY_HOT_REPLICA_COUNT "hot_replica_count"
+
#define GLUSTERD_STORE_KEY_SNAP_NAME "name"
#define GLUSTERD_STORE_KEY_SNAP_ID "snap-id"
#define GLUSTERD_STORE_KEY_SNAP_DESC "desc"
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index 727a19d24d1..27357955fe8 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -560,6 +560,7 @@ glusterd_volinfo_dup (glusterd_volinfo_t *volinfo,
new_volinfo->sub_count = volinfo->sub_count;
new_volinfo->transport_type = volinfo->transport_type;
new_volinfo->brick_count = volinfo->brick_count;
+ new_volinfo->tier_info = volinfo->tier_info;
dict_copy (volinfo->dict, new_volinfo->dict);
dict_copy (volinfo->gsync_slaves, new_volinfo->gsync_slaves);
diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
index 114e57485fc..79da432bafe 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
@@ -1472,7 +1472,6 @@ brick_graph_add_posix (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
out:
return ret;
}
-
static int
brick_graph_add_trash (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
dict_t *set_dict, glusterd_brickinfo_t *brickinfo)
@@ -2712,24 +2711,22 @@ out:
}
static int
-volgen_graph_build_clusters (volgen_graph_t *graph,
- glusterd_volinfo_t *volinfo, char *xl_type,
- char *xl_namefmt, size_t child_count,
- size_t sub_count)
+volgen_link_bricks (volgen_graph_t *graph,
+ glusterd_volinfo_t *volinfo, char *xl_type,
+ char *xl_namefmt, size_t child_count,
+ size_t sub_count,
+ xlator_t *trav)
{
int i = 0;
int j = 0;
- xlator_t *txl = NULL;
xlator_t *xl = NULL;
- xlator_t *trav = NULL;
char *volname = NULL;
int ret = -1;
if (child_count == 0)
goto out;
volname = volinfo->volname;
- txl = first_of (graph);
- for (trav = txl; --child_count; trav = trav->next);
+
for (;; trav = trav->prev) {
if ((i % sub_count) == 0) {
xl = volgen_graph_add_nolink (graph, xl_type,
@@ -2745,10 +2742,9 @@ volgen_graph_build_clusters (volgen_graph_t *graph,
if (ret)
goto out;
- if (trav == txl)
- break;
-
i++;
+ if (i == child_count)
+ break;
}
ret = j;
@@ -2756,6 +2752,46 @@ out:
return ret;
}
+static int
+volgen_link_bricks_from_list_tail (volgen_graph_t *graph,
+ glusterd_volinfo_t *volinfo,
+ char *xl_type,
+ char *xl_namefmt, size_t child_count,
+ size_t sub_count)
+{
+ xlator_t *trav = NULL;
+ size_t cnt = child_count;
+
+ for (trav = first_of(graph); --cnt; trav = trav->next)
+ ;
+
+ return volgen_link_bricks (graph, volinfo,
+ xl_type,
+ xl_namefmt,
+ child_count,
+ sub_count,
+ trav);
+}
+
+static int
+volgen_link_bricks_from_list_head (volgen_graph_t *graph,
+ glusterd_volinfo_t *volinfo, char *xl_type,
+ char *xl_namefmt, size_t child_count,
+ size_t sub_count)
+{
+ xlator_t *trav = NULL;
+
+ for (trav = first_of(graph); trav->next; trav = trav->next)
+ ;
+
+ return volgen_link_bricks (graph, volinfo,
+ xl_type,
+ xl_namefmt,
+ child_count,
+ sub_count,
+ trav);
+}
+
/**
* This is the build graph function for user-serviceable snapshots.
* Generates snapview-client
@@ -2948,7 +2984,7 @@ volgen_graph_build_dht_cluster (volgen_graph_t *graph,
else
name_fmt = "%s-dht";
- clusters = volgen_graph_build_clusters (graph, volinfo,
+ clusters = volgen_link_bricks_from_list_tail (graph, volinfo,
voltype,
name_fmt,
child_count,
@@ -2985,7 +3021,7 @@ volgen_graph_build_ec_clusters (volgen_graph_t *graph,
xlator_t *ec = NULL;
char option[32] = {0};
- clusters = volgen_graph_build_clusters (graph, volinfo,
+ clusters = volgen_link_bricks_from_list_tail (graph, volinfo,
disperse_args[0],
disperse_args[1],
volinfo->brick_count,
@@ -3015,12 +3051,19 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph,
{
char *replicate_args[] = {"cluster/replicate",
"%s-replicate-%d"};
+ char *tier_args[] = {"cluster/tier",
+ "%s-tier-%d"};
char *stripe_args[] = {"cluster/stripe",
"%s-stripe-%d"};
+ char *disperse_args[] = {"cluster/disperse",
+ "%s-disperse-%d"};
+ char option[32] = "";
int rclusters = 0;
int clusters = 0;
int dist_count = 0;
int ret = -1;
+ xlator_t *ec = NULL;
+ xlator_t *client = NULL;
if (!volinfo->dist_leaf_count)
goto out;
@@ -3031,7 +3074,7 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph,
/* All other cases, it will have one or the other cluster type */
switch (volinfo->type) {
case GF_CLUSTER_TYPE_REPLICATE:
- clusters = volgen_graph_build_clusters (graph, volinfo,
+ clusters = volgen_link_bricks_from_list_tail (graph, volinfo,
replicate_args[0],
replicate_args[1],
volinfo->brick_count,
@@ -3040,7 +3083,7 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph,
goto out;
break;
case GF_CLUSTER_TYPE_STRIPE:
- clusters = volgen_graph_build_clusters (graph, volinfo,
+ clusters = volgen_link_bricks_from_list_tail (graph, volinfo,
stripe_args[0],
stripe_args[1],
volinfo->brick_count,
@@ -3048,11 +3091,18 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph,
if (clusters < 0)
goto out;
break;
+ case GF_CLUSTER_TYPE_TIER:
+ ret = volgen_link_bricks_from_list_head (graph, volinfo,
+ tier_args[0],
+ tier_args[1],
+ volinfo->brick_count,
+ volinfo->replica_count);
+ break;
case GF_CLUSTER_TYPE_STRIPE_REPLICATE:
/* Replicate after the clients, then stripe */
if (volinfo->replica_count == 0)
goto out;
- clusters = volgen_graph_build_clusters (graph, volinfo,
+ clusters = volgen_link_bricks_from_list_tail (graph, volinfo,
replicate_args[0],
replicate_args[1],
volinfo->brick_count,
@@ -3062,7 +3112,7 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph,
rclusters = volinfo->brick_count / volinfo->replica_count;
GF_ASSERT (rclusters == clusters);
- clusters = volgen_graph_build_clusters (graph, volinfo,
+ clusters = volgen_link_bricks_from_list_tail (graph, volinfo,
stripe_args[0],
stripe_args[1],
rclusters,
@@ -3162,7 +3212,7 @@ graph_set_generic_options (xlator_t *this, volgen_graph_t *graph,
"log-buf-size option");
ret = volgen_graph_set_options_generic (graph, set_dict, "client",
- &log_flush_timeout_option_handler);
+ &log_flush_timeout_option_handler);
if (ret)
gf_log (this->name, GF_LOG_WARNING, "Failed to change "
"log-flush-timeout option");
@@ -3170,6 +3220,88 @@ graph_set_generic_options (xlator_t *this, volgen_graph_t *graph,
}
static int
+volume_volgen_graph_build_clusters_tier (volgen_graph_t *graph,
+ glusterd_volinfo_t *volinfo,
+ gf_boolean_t is_quotad)
+{
+ int ret = -1;
+ xlator_t *root;
+ xlator_t *xl, *hxl, *cxl;
+ glusterd_brickinfo_t *brick = NULL;
+ char *rule;
+ int st_brick_count = 0;
+ int st_replica_count = 0;
+ int st_disperse_count = 0;
+ int st_dist_leaf_count = 0;
+ int st_type = 0;
+ char st_volname[GD_VOLUME_NAME_MAX];
+ int dist_count = 0;
+
+ st_brick_count = volinfo->brick_count;
+ st_replica_count = volinfo->replica_count;
+ st_disperse_count = volinfo->disperse_count;
+ st_type = volinfo->type;
+ st_dist_leaf_count = volinfo->dist_leaf_count;
+ strcpy(st_volname, volinfo->volname);
+
+ volinfo->dist_leaf_count = volinfo->tier_info.cold_dist_leaf_count;
+ volinfo->brick_count = volinfo->tier_info.cold_brick_count;
+ volinfo->replica_count = volinfo->tier_info.cold_replica_count;
+ volinfo->disperse_count = volinfo->tier_info.cold_disperse_count;
+ volinfo->type = volinfo->tier_info.cold_type;
+ sprintf (volinfo->volname, "%s-cold", st_volname);
+
+ ret = volume_volgen_graph_build_clusters (graph, volinfo, _gf_false);
+ if (ret)
+ goto out;
+ cxl = first_of(graph);
+
+ volinfo->type = GF_CLUSTER_TYPE_TIER;
+ volinfo->brick_count = volinfo->tier_info.hot_brick_count;
+ volinfo->replica_count = volinfo->tier_info.hot_replica_count;
+ volinfo->dist_leaf_count = glusterd_get_dist_leaf_count(volinfo);
+ volinfo->disperse_count = 0;
+
+ sprintf (volinfo->volname, "%s-hot", st_volname);
+
+ if (volinfo->dist_leaf_count == 1) {
+ dist_count = volinfo->brick_count / volinfo->dist_leaf_count;
+ ret = volgen_link_bricks_from_list_head (graph, volinfo,
+ "cluster/distribute",
+ "%s-dht",
+ dist_count,
+ dist_count);
+ } else {
+ ret = volume_volgen_graph_build_clusters (graph,
+ volinfo,
+ _gf_false);
+ }
+
+ hxl = first_of(graph);
+
+ xl = volgen_graph_add_nolink (graph, "cluster/tier", "%s",
+ "tier-dht", 0);
+ gf_asprintf(&rule, "%s-hot-dht", st_volname);
+ xlator_set_option(xl, "rule", rule);
+ xlator_set_option(xl, "xattr-name", "trusted.tier-gfid");
+
+ ret = volgen_xlator_link (xl, cxl);
+ ret = volgen_xlator_link (xl, hxl);
+
+ st_type = GF_CLUSTER_TYPE_TIER;
+
+ out:
+ volinfo->brick_count = st_brick_count;
+ volinfo->replica_count = st_replica_count;
+ volinfo->disperse_count = st_disperse_count;
+ volinfo->type = st_type;
+ volinfo->dist_leaf_count = st_dist_leaf_count;
+ strcpy(volinfo->volname, st_volname);
+
+ return ret;
+}
+
+static int
client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
dict_t *set_dict, void *param)
{
@@ -3188,11 +3320,16 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
GF_ASSERT (conf);
volname = volinfo->volname;
- ret = volgen_graph_build_clients (graph, volinfo, set_dict, param);
+ ret = volgen_graph_build_clients (graph, volinfo, set_dict,
+ param);
if (ret)
goto out;
- ret = volume_volgen_graph_build_clusters (graph, volinfo, _gf_false);
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER)
+ ret = volume_volgen_graph_build_clusters_tier (graph, volinfo, _gf_false);
+ else
+ ret = volume_volgen_graph_build_clusters (graph, volinfo, _gf_false);
+
if (ret == -1)
goto out;
@@ -3730,7 +3867,7 @@ volgen_graph_build_replicate_clusters (volgen_graph_t *graph,
char *replicate_args[] = {"cluster/replicate",
"%s-replicate-%d"};
- return volgen_graph_build_clusters (graph, volinfo, "cluster/replicate",
+ return volgen_link_bricks_from_list_tail (graph, volinfo, "cluster/replicate",
"%s-replicate-%d",
volinfo->brick_count,
volinfo->replica_count);
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index ae866b7ccfc..ada814bb25d 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -1690,6 +1690,32 @@ struct volopt_map_entry glusterd_volopt_map[] = {
.voltype = "features/trash",
.op_version = GD_OP_VERSION_3_7_0,
},
+
+ /* tier translator - global tunables */
+ { .key = "cluster.write-freq-thresold",
+ .voltype = "cluster/tier",
+ .option = "write-freq-thresold",
+ .op_version = GD_OP_VERSION_3_7_0,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.read-freq-thresold",
+ .voltype = "cluster/tier",
+ .option = "read-freq-thresold",
+ .op_version = GD_OP_VERSION_3_7_0,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.tier-promote-frequency",
+ .voltype = "cluster/tier",
+ .option = "tier-promote-frequency",
+ .op_version = GD_OP_VERSION_3_7_0,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.tier-demote-frequency",
+ .voltype = "cluster/tier",
+ .option = "tier-demote-frequency",
+ .op_version = GD_OP_VERSION_3_7_0,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
{ .key = "features.ctr-enabled",
.voltype = "features/changetimerecorder",
.value = "off",
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
index ad280eda053..bac1598598b 100644
--- a/xlators/mgmt/glusterd/src/glusterd.h
+++ b/xlators/mgmt/glusterd/src/glusterd.h
@@ -302,9 +302,6 @@ typedef struct tier_info_ {
int hot_type;
int hot_brick_count;
int hot_replica_count;
- int hot_disperse_count;
- /*Commented for now Dan's DHT Tier patch will have it*/
- /*tier_group_t *root;*/
} gd_tier_info_t;
struct glusterd_volinfo_ {
@@ -814,6 +811,12 @@ int
glusterd_handle_add_brick (rpcsvc_request_t *req);
int
+glusterd_handle_attach_tier (rpcsvc_request_t *req);
+
+int
+glusterd_handle_detach_tier (rpcsvc_request_t *req);
+
+int
glusterd_handle_replace_brick (rpcsvc_request_t *req);
int