summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--cli/src/cli-cmd-parser.c23
-rw-r--r--cli/src/cli-cmd-volume.c144
-rw-r--r--cli/src/cli-rpc-ops.c215
-rw-r--r--rpc/rpc-lib/src/protocol-common.h2
-rw-r--r--rpc/xdr/src/cli1-xdr.x10
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-brick-ops.c135
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handler.c2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.c10
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rebalance.c15
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.c83
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.h8
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c1
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.c181
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-set.c26
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h9
15 files changed, 821 insertions, 43 deletions
diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c
index 5520c9e46b1..54a57008457 100644
--- a/cli/src/cli-cmd-parser.c
+++ b/cli/src/cli-cmd-parser.c
@@ -355,6 +355,10 @@ cli_validate_disperse_volume (char *word, gf1_cluster_type type,
cli_err ("striped-replicated-dispersed volume "
"is not supported");
goto out;
+ case GF_CLUSTER_TYPE_TIER:
+ cli_err ("tier-dispersed volume is not "
+ "supported");
+ goto out;
case GF_CLUSTER_TYPE_STRIPE:
cli_err ("striped-dispersed volume is not "
"supported");
@@ -490,6 +494,11 @@ cli_cmd_volume_create_parse (struct cli_state *state, const char **words,
case GF_CLUSTER_TYPE_STRIPE:
type = GF_CLUSTER_TYPE_STRIPE_REPLICATE;
break;
+ case GF_CLUSTER_TYPE_TIER:
+ cli_err ("replicated-tiered volume is not "
+ "supported");
+ goto out;
+ break;
case GF_CLUSTER_TYPE_DISPERSE:
cli_err ("replicated-dispersed volume is not "
"supported");
@@ -529,6 +538,10 @@ cli_cmd_volume_create_parse (struct cli_state *state, const char **words,
cli_err ("striped-dispersed volume is not "
"supported");
goto out;
+ case GF_CLUSTER_TYPE_TIER:
+ cli_err ("striped-tier volume is not "
+ "supported");
+ goto out;
}
if (wordcount < (index + 2)) {
ret = -1;
@@ -3384,6 +3397,16 @@ cli_cmd_volume_defrag_parse (const char **words, int wordcount,
if (strcmp (words[3], "start") && strcmp (words[3], "stop") &&
strcmp (words[3], "status"))
goto out;
+ } else if ((strcmp (words[3], "tier") == 0) &&
+ (strcmp (words[4], "start") == 0)) {
+ volname = (char *) words[2];
+ cmd = GF_DEFRAG_CMD_START_TIER;
+ goto done;
+ } else if ((strcmp (words[3], "tier") == 0) &&
+ (strcmp (words[4], "status") == 0)) {
+ volname = (char *) words[2];
+ cmd = GF_DEFRAG_CMD_STATUS_TIER;
+ goto done;
} else {
if (strcmp (words[3], "fix-layout") &&
strcmp (words[3], "start"))
diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c
index 6c950da4e97..3098d74491c 100644
--- a/cli/src/cli-cmd-volume.c
+++ b/cli/src/cli-cmd-volume.c
@@ -840,6 +840,142 @@ out:
return ret;
}
+int
+cli_cmd_volume_attach_tier_cbk (struct cli_state *state,
+ struct cli_cmd_word *word, const char **words,
+ int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ gf_answer_t answer = GF_ANSWER_NO;
+ cli_local_t *local = NULL;
+
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+
+ ret = cli_cmd_volume_add_brick_parse (words, wordcount, &options);
+ if (ret) {
+ cli_usage_out (word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ if (state->mode & GLUSTER_MODE_WIGNORE) {
+ ret = dict_set_int32 (options, "force", _gf_true);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to set force "
+ "option");
+ goto out;
+ }
+ }
+
+ ret = dict_set_int32 (options, "attach-tier", 1);
+ if (ret)
+ goto out;
+
+ ret = dict_set_int32 (options, "type", GF_CLUSTER_TYPE_TIER);
+ if (ret)
+ goto out;
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_ATTACH_TIER];
+
+ CLI_LOCAL_INIT (local, words, frame, options);
+
+ if (proc->fn) {
+ ret = proc->fn (frame, THIS, options);
+ }
+
+out:
+ if (ret) {
+ cli_cmd_sent_status_get (&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out ("attach-tier failed");
+ }
+
+ CLI_STACK_DESTROY (frame);
+
+ return ret;
+}
+
+int
+cli_cmd_volume_detach_tier_cbk (struct cli_state *state,
+ struct cli_cmd_word *word, const char **words,
+ int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ gf_answer_t answer = GF_ANSWER_NO;
+ cli_local_t *local = NULL;
+ int need_question = 0;
+
+ const char *question = "Removing tier can result in data loss. "
+ "Do you want to Continue?";
+
+ if (wordcount != 3)
+ goto out;
+
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+
+ options = dict_new ();
+ if (!options)
+ goto out;
+
+ ret = dict_set_int32 (options, "force", 1);
+ if (ret)
+ goto out;
+
+ ret = dict_set_int32 (options, "command", GF_OP_CMD_DETACH);
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (options, "volname", (char *)words[2]);
+ if (ret)
+ goto out;
+
+ ret = dict_set_int32 (options, "count", 1);
+ if (ret)
+ goto out;
+
+ if (!(state->mode & GLUSTER_MODE_SCRIPT) && need_question) {
+ /* we need to ask question only in case of 'commit or force' */
+ answer = cli_cmd_get_confirmation (state, question);
+ if (GF_ANSWER_NO == answer) {
+ ret = 0;
+ goto out;
+ }
+ }
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_DETACH_TIER];
+
+ CLI_LOCAL_INIT (local, words, frame, options);
+
+ if (proc->fn) {
+ ret = proc->fn (frame, THIS, options);
+ }
+
+out:
+ if (ret) {
+ cli_cmd_sent_status_get (&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out ("Volume detach-tier failed");
+ }
+
+ CLI_STACK_DESTROY (frame);
+
+ return ret;
+}
+
static int
gf_cli_create_auxiliary_mount (char *volname)
{
@@ -2435,6 +2571,14 @@ struct cli_cmd volume_cmds[] = {
cli_cmd_volume_rename_cbk,
"rename volume <VOLNAME> to <NEW-VOLNAME>"},*/
+ { "volume attach-tier <VOLNAME> [<replica COUNT>] <NEW-BRICK>...",
+ cli_cmd_volume_attach_tier_cbk,
+ "attach tier to volume <VOLNAME>"},
+
+ { "volume detach-tier <VOLNAME>",
+ cli_cmd_volume_detach_tier_cbk,
+ "detach tier from volume <VOLNAME>"},
+
{ "volume add-brick <VOLNAME> [<stripe|replica> <COUNT>] <NEW-BRICK> ... [force]",
cli_cmd_volume_add_brick_cbk,
"add brick to volume <VOLNAME>"},
diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c
index 6e66e377ed5..c9b01694436 100644
--- a/cli/src/cli-rpc-ops.c
+++ b/cli/src/cli-rpc-ops.c
@@ -61,6 +61,7 @@ char *cli_vol_type_str[] = {"Distribute",
"Replicate",
"Striped-Replicate",
"Disperse",
+ "Tier",
"Distributed-Stripe",
"Distributed-Replicate",
"Distributed-Striped-Replicate",
@@ -739,8 +740,9 @@ xml_output:
vol_type = type;
// Distributed (stripe/replicate/stripe-replica) setups
- if ((type > 0) && ( dist_count < brick_count))
- vol_type = type + 4;
+ if ((type != GF_CLUSTER_TYPE_TIER) && (type > 0) &&
+ (dist_count < brick_count))
+ vol_type = type + 5;
cli_out ("Volume Name: %s", volname);
cli_out ("Type: %s", cli_vol_type_str[vol_type]);
@@ -1441,6 +1443,134 @@ out:
}
int
+gf_cli_print_tier_status (dict_t *dict, enum gf_task_types task_type)
+{
+ int ret = -1;
+ int count = 0;
+ int i = 1;
+ char key[256] = {0,};
+ gf_defrag_status_t status_rcd = GF_DEFRAG_STATUS_NOT_STARTED;
+ uint64_t files = 0;
+ uint64_t size = 0;
+ uint64_t lookup = 0;
+ char *node_name = NULL;
+ uint64_t failures = 0;
+ uint64_t skipped = 0;
+ double elapsed = 0;
+ char *status_str = NULL;
+ char *size_str = NULL;
+
+ ret = dict_get_int32 (dict, "count", &count);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "count not set");
+ goto out;
+ }
+
+
+ cli_out ("%40s %16s %13s %13s %13s %13s %20s %18s", "Node",
+ "Rebalanced-files", "size", "scanned", "failures", "skipped",
+ "status", "run time in secs");
+ cli_out ("%40s %16s %13s %13s %13s %13s %20s %18s", "---------",
+ "-----------", "-----------", "-----------", "-----------",
+ "-----------", "------------", "--------------");
+ for (i = 1; i <= count; i++) {
+ /* Reset the variables to prevent carryover of values */
+ node_name = NULL;
+ files = 0;
+ size = 0;
+ lookup = 0;
+ skipped = 0;
+ status_str = NULL;
+ elapsed = 0;
+
+ /* Check if status is NOT_STARTED, and continue early */
+ memset (key, 0, 256);
+ snprintf (key, 256, "status-%d", i);
+ ret = dict_get_int32 (dict, key, (int32_t *)&status_rcd);
+ if (ret) {
+ gf_log ("cli", GF_LOG_TRACE, "failed to get status");
+ goto out;
+ }
+ if (GF_DEFRAG_STATUS_NOT_STARTED == status_rcd)
+ continue;
+
+
+ snprintf (key, 256, "node-name-%d", i);
+ ret = dict_get_str (dict, key, &node_name);
+ if (ret)
+ gf_log ("cli", GF_LOG_TRACE, "failed to get node-name");
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "files-%d", i);
+ ret = dict_get_uint64 (dict, key, &files);
+ if (ret)
+ gf_log ("cli", GF_LOG_TRACE,
+ "failed to get file count");
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "size-%d", i);
+ ret = dict_get_uint64 (dict, key, &size);
+ if (ret)
+ gf_log ("cli", GF_LOG_TRACE,
+ "failed to get size of xfer");
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "lookups-%d", i);
+ ret = dict_get_uint64 (dict, key, &lookup);
+ if (ret)
+ gf_log ("cli", GF_LOG_TRACE,
+ "failed to get lookedup file count");
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "failures-%d", i);
+ ret = dict_get_uint64 (dict, key, &failures);
+ if (ret)
+ gf_log ("cli", GF_LOG_TRACE,
+ "failed to get failures count");
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "skipped-%d", i);
+ ret = dict_get_uint64 (dict, key, &skipped);
+ if (ret)
+ gf_log ("cli", GF_LOG_TRACE,
+ "failed to get skipped count");
+
+ /* For remove-brick include skipped count into failure count*/
+ if (task_type != GF_TASK_TYPE_REBALANCE) {
+ failures += skipped;
+ skipped = 0;
+ }
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "run-time-%d", i);
+ ret = dict_get_double (dict, key, &elapsed);
+ if (ret)
+ gf_log ("cli", GF_LOG_TRACE, "failed to get run-time");
+
+ /* Check for array bound */
+ if (status_rcd >= GF_DEFRAG_STATUS_MAX)
+ status_rcd = GF_DEFRAG_STATUS_MAX;
+
+ status_str = cli_vol_task_status_str[status_rcd];
+ size_str = gf_uint64_2human_readable(size);
+ if (size_str) {
+ cli_out ("%40s %16"PRIu64 " %13s" " %13"PRIu64 " %13"
+ PRIu64" %13"PRIu64 " %20s %18.2f", node_name,
+ files, size_str, lookup, failures, skipped,
+ status_str, elapsed);
+ } else {
+ cli_out ("%40s %16"PRIu64 " %13"PRIu64 " %13"PRIu64
+ " %13"PRIu64" %13"PRIu64 " %20s %18.2f",
+ node_name, files, size, lookup, failures,
+ skipped, status_str, elapsed);
+ }
+ GF_FREE(size_str);
+ }
+out:
+ return ret;
+}
+
+int
gf_cli_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
@@ -1504,7 +1634,9 @@ gf_cli_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov,
}
}
- if (!((cmd == GF_DEFRAG_CMD_STOP) || (cmd == GF_DEFRAG_CMD_STATUS)) &&
+ if (!((cmd == GF_DEFRAG_CMD_STOP) ||
+ (cmd == GF_DEFRAG_CMD_STATUS) ||
+ (cmd == GF_DEFRAG_CMD_STATUS_TIER)) &&
!(global_state->mode & GLUSTER_MODE_XML)) {
/* All other possibilites are about starting a rebalance */
ret = dict_get_str (dict, GF_REBALANCE_TID_KEY, &task_id_str);
@@ -1577,7 +1709,12 @@ gf_cli_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov,
goto out;
}
- ret = gf_cli_print_rebalance_status (dict, GF_TASK_TYPE_REBALANCE);
+ if (cmd == GF_DEFRAG_CMD_STATUS_TIER)
+ ret = gf_cli_print_tier_status (dict, GF_TASK_TYPE_REBALANCE);
+ else
+ ret = gf_cli_print_rebalance_status (dict,
+ GF_TASK_TYPE_REBALANCE);
+
if (ret)
gf_log ("cli", GF_LOG_ERROR,
"Failed to print rebalance status");
@@ -3616,7 +3753,7 @@ int32_t
gf_cli_reset_volume (call_frame_t *frame, xlator_t *this,
void *data)
{
- gf_cli_req req = {{0,}};
+ gf_cli_req req = {{0,} };
int ret = 0;
dict_t *dict = NULL;
@@ -3665,7 +3802,7 @@ int32_t
gf_cli_set_volume (call_frame_t *frame, xlator_t *this,
void *data)
{
- gf_cli_req req = {{0,}};
+ gf_cli_req req = {{0,} };
int ret = 0;
dict_t *dict = NULL;
@@ -3691,7 +3828,7 @@ int32_t
gf_cli_add_brick (call_frame_t *frame, xlator_t *this,
void *data)
{
- gf_cli_req req = {{0,}};
+ gf_cli_req req = {{0,} };
int ret = 0;
dict_t *dict = NULL;
char *volname = NULL;
@@ -3726,6 +3863,66 @@ out:
}
int32_t
+gf_cli_attach_tier (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gf_cli_req req = {{0,} };
+ int ret = 0;
+ dict_t *dict = NULL;
+
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
+
+ dict = data;
+
+ if (ret)
+ goto out;
+
+ ret = cli_to_glusterd (&req, frame, gf_cli_add_brick_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_ATTACH_TIER, this,
+ cli_rpc_prog, NULL);
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+
+ GF_FREE (req.dict.dict_val);
+ return ret;
+}
+
+int32_t
+gf_cli_detach_tier (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gf_cli_req req = {{0,} };
+ int ret = 0;
+ dict_t *dict = NULL;
+ char *volname = NULL;
+
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
+
+ dict = data;
+
+ ret = cli_to_glusterd (&req, frame, gf_cli_remove_brick_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_DETACH_TIER, this,
+ cli_rpc_prog, NULL);
+
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+
+ GF_FREE (req.dict.dict_val);
+
+ return ret;
+}
+
+
+int32_t
gf_cli_remove_brick (call_frame_t *frame, xlator_t *this,
void *data)
{
@@ -9965,7 +10162,9 @@ struct rpc_clnt_procedure gluster_cli_actors[GLUSTER_CLI_MAXVALUE] = {
[GLUSTER_CLI_BARRIER_VOLUME] = {"BARRIER VOLUME", gf_cli_barrier_volume},
[GLUSTER_CLI_GANESHA] = {"GANESHA", gf_cli_ganesha},
[GLUSTER_CLI_GET_VOL_OPT] = {"GET_VOL_OPT", gf_cli_get_vol_opt},
- [GLUSTER_CLI_BITROT] = {"BITROT", gf_cli_bitrot}
+ [GLUSTER_CLI_BITROT] = {"BITROT", gf_cli_bitrot},
+ [GLUSTER_CLI_ATTACH_TIER] = {"ATTACH_TIER", gf_cli_attach_tier},
+ [GLUSTER_CLI_DETACH_TIER] = {"DETACH_TIER", gf_cli_detach_tier}
};
struct rpc_clnt_program cli_prog = {
diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h
index bf68366f5dd..60697b8fa66 100644
--- a/rpc/rpc-lib/src/protocol-common.h
+++ b/rpc/rpc-lib/src/protocol-common.h
@@ -183,6 +183,8 @@ enum gluster_cli_procnum {
GLUSTER_CLI_GET_VOL_OPT,
GLUSTER_CLI_GANESHA,
GLUSTER_CLI_BITROT,
+ GLUSTER_CLI_ATTACH_TIER,
+ GLUSTER_CLI_DETACH_TIER,
GLUSTER_CLI_MAXVALUE,
};
diff --git a/rpc/xdr/src/cli1-xdr.x b/rpc/xdr/src/cli1-xdr.x
index 925700699ab..72581b0c5d5 100644
--- a/rpc/xdr/src/cli1-xdr.x
+++ b/rpc/xdr/src/cli1-xdr.x
@@ -3,7 +3,9 @@
GF_DEFRAG_CMD_STOP,
GF_DEFRAG_CMD_STATUS,
GF_DEFRAG_CMD_START_LAYOUT_FIX,
- GF_DEFRAG_CMD_START_FORCE /* used by remove-brick data migration */
+ GF_DEFRAG_CMD_START_FORCE, /* used by remove-brick data migration */
+ GF_DEFRAG_CMD_START_TIER,
+ GF_DEFRAG_CMD_STATUS_TIER
};
enum gf_defrag_status_t {
@@ -24,7 +26,8 @@
GF_CLUSTER_TYPE_STRIPE,
GF_CLUSTER_TYPE_REPLICATE,
GF_CLUSTER_TYPE_STRIPE_REPLICATE,
- GF_CLUSTER_TYPE_DISPERSE
+ GF_CLUSTER_TYPE_DISPERSE,
+ GF_CLUSTER_TYPE_TIER
};
enum gf1_cli_replace_op {
@@ -53,7 +56,8 @@ enum gf_bitrot_type {
GF_OP_CMD_COMMIT,
GF_OP_CMD_STOP,
GF_OP_CMD_STATUS,
- GF_OP_CMD_COMMIT_FORCE
+ GF_OP_CMD_COMMIT_FORCE,
+ GF_OP_CMD_DETACH
};
enum gf_quota_type {
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
index fd4618bb78c..fa5e533f135 100644
--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
@@ -288,6 +288,10 @@ gd_rmbr_validate_replica_count (glusterd_volinfo_t *volinfo,
int replica_nodes = 0;
switch (volinfo->type) {
+ case GF_CLUSTER_TYPE_TIER:
+ ret = 1;
+ goto out;
+
case GF_CLUSTER_TYPE_NONE:
case GF_CLUSTER_TYPE_STRIPE:
case GF_CLUSTER_TYPE_DISPERSE:
@@ -367,7 +371,6 @@ __glusterd_handle_add_brick (rpcsvc_request_t *req)
int32_t replica_count = 0;
int32_t stripe_count = 0;
int type = 0;
-
this = THIS;
GF_ASSERT(this);
@@ -454,6 +457,17 @@ __glusterd_handle_add_brick (rpcsvc_request_t *req)
total_bricks = volinfo->brick_count + brick_count;
+ if (dict_get (dict, "attach-tier")) {
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
+ snprintf (err_str, sizeof (err_str),
+ "Volume %s is already a tier.", volname);
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ ret = -1;
+ goto out;
+ }
+ goto brick_val;
+ }
+
if (!stripe_count && !replica_count) {
if (volinfo->type == GF_CLUSTER_TYPE_NONE)
goto brick_val;
@@ -639,6 +653,40 @@ subvol_matcher_destroy (int *subvols)
GF_FREE (subvols);
}
+static int
+glusterd_set_detach_bricks(dict_t *dict, glusterd_volinfo_t *volinfo)
+{
+ char key[256] = {0,};
+ char value[256] = {0,};
+ int brick_num = 0;
+ int hot_brick_num = 0;
+ glusterd_brickinfo_t *brickinfo;
+ int ret = 0;
+
+ /* cold tier bricks at tail of list so use reverse iteration */
+ cds_list_for_each_entry_reverse (brickinfo, &volinfo->bricks,
+ brick_list) {
+ brick_num++;
+ if (brick_num > volinfo->tier_info.cold_brick_count) {
+ hot_brick_num++;
+ sprintf (key, "brick%d", hot_brick_num);
+ snprintf (value, 256, "%s:%s",
+ brickinfo->hostname,
+ brickinfo->path);
+
+ ret = dict_set_str (dict, key, strdup(value));
+ if (ret)
+ break;
+ }
+ }
+
+ ret = dict_set_int32(dict, "count", hot_brick_num);
+ if (ret)
+ return -1;
+
+ return hot_brick_num;
+}
+
int
__glusterd_handle_remove_brick (rpcsvc_request_t *req)
{
@@ -794,7 +842,8 @@ __glusterd_handle_remove_brick (rpcsvc_request_t *req)
/* Do not allow remove-brick if the bricks given is less than
the replica count or stripe count */
- if (!replica_count && (volinfo->type != GF_CLUSTER_TYPE_NONE)) {
+ if (!replica_count && (volinfo->type != GF_CLUSTER_TYPE_NONE) &&
+ (volinfo->type != GF_CLUSTER_TYPE_TIER)) {
if (volinfo->dist_leaf_count &&
(count % volinfo->dist_leaf_count)) {
snprintf (err_str, sizeof (err_str), "Remove brick "
@@ -813,6 +862,7 @@ __glusterd_handle_remove_brick (rpcsvc_request_t *req)
goto out;
}
+
strcpy (brick_list, " ");
if ((volinfo->type != GF_CLUSTER_TYPE_NONE) &&
@@ -822,6 +872,9 @@ __glusterd_handle_remove_brick (rpcsvc_request_t *req)
goto out;
}
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER)
+ count = glusterd_set_detach_bricks(dict, volinfo);
+
while ( i <= count) {
snprintf (key, sizeof (key), "brick%d", i);
ret = dict_get_str (dict, key, &brick);
@@ -836,6 +889,7 @@ __glusterd_handle_remove_brick (rpcsvc_request_t *req)
ret = glusterd_volume_brickinfo_get_by_brick(brick, volinfo,
&brickinfo);
+
if (ret) {
snprintf (err_str, sizeof (err_str), "Incorrect brick "
"%s for volume %s", brick, volname);
@@ -883,7 +937,8 @@ out:
}
- GF_FREE (brick_list);
+ if (brick_list)
+ GF_FREE (brick_list);
subvol_matcher_destroy (subvols);
free (cli_req.dict.dict_val); //its malloced by xdr
@@ -1081,7 +1136,11 @@ glusterd_op_perform_add_bricks (glusterd_volinfo_t *volinfo, int32_t count,
ret = glusterd_resolve_brick (brickinfo);
if (ret)
goto out;
- if (stripe_count || replica_count) {
+
+ /* hot tier bricks are added to head of brick list */
+ if (dict_get (dict, "attach-tier")) {
+ cds_list_add (&brickinfo->brick_list, &volinfo->bricks);
+ } else if (stripe_count || replica_count) {
add_brick_at_right_order (brickinfo, volinfo, (i - 1),
stripe_count, replica_count);
} else {
@@ -1674,6 +1733,7 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr)
break;
+ case GF_OP_CMD_DETACH:
case GF_OP_CMD_COMMIT_FORCE:
break;
}
@@ -1767,6 +1827,35 @@ glusterd_remove_brick_migrate_cbk (glusterd_volinfo_t *volinfo,
return ret;
}
+static int
+glusterd_op_perform_attach_tier (dict_t *dict,
+ glusterd_volinfo_t *volinfo,
+ int count,
+ char *bricks)
+{
+ int ret = 0;
+ int replica_count = 0;
+
+ /*
+ * Store the new (cold) tier's structure until the graph is generated.
+ * If there is a failure before the graph is generated the
+ * structure will revert to its original state.
+ */
+ volinfo->tier_info.cold_dist_leaf_count = volinfo->dist_leaf_count;
+ volinfo->tier_info.cold_type = volinfo->type;
+ volinfo->tier_info.cold_brick_count = volinfo->brick_count;
+ volinfo->tier_info.cold_replica_count = volinfo->replica_count;
+ volinfo->tier_info.cold_disperse_count = volinfo->disperse_count;
+
+ ret = dict_get_int32 (dict, "replica-count", &replica_count);
+ if (!ret)
+ volinfo->tier_info.hot_replica_count = replica_count;
+ else
+ volinfo->tier_info.hot_replica_count = 1;
+ volinfo->tier_info.hot_brick_count = count;
+
+ return ret;
+}
int
glusterd_op_add_brick (dict_t *dict, char **op_errstr)
@@ -1778,6 +1867,7 @@ glusterd_op_add_brick (dict_t *dict, char **op_errstr)
xlator_t *this = NULL;
char *bricks = NULL;
int32_t count = 0;
+ int32_t replica_count = 0;
this = THIS;
GF_ASSERT (this);
@@ -1812,6 +1902,11 @@ glusterd_op_add_brick (dict_t *dict, char **op_errstr)
goto out;
}
+ if (dict_get(dict, "attach-tier")) {
+ gf_log (THIS->name, GF_LOG_DEBUG, "Adding tier");
+ glusterd_op_perform_attach_tier (dict, volinfo, count, bricks);
+ }
+
ret = glusterd_op_perform_add_bricks (volinfo, count, bricks, dict);
if (ret) {
gf_log ("", GF_LOG_ERROR, "Unable to add bricks");
@@ -1829,6 +1924,14 @@ out:
return ret;
}
+static void
+glusterd_op_perform_detach_tier (glusterd_volinfo_t *volinfo)
+{
+ volinfo->type = volinfo->tier_info.cold_type;
+ volinfo->replica_count = volinfo->tier_info.cold_replica_count;
+ volinfo->disperse_count = volinfo->tier_info.cold_disperse_count;
+}
+
int
glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
{
@@ -1959,6 +2062,10 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
force = 1;
break;
+ case GF_OP_CMD_DETACH:
+ glusterd_op_perform_detach_tier (volinfo);
+ /* fall through */
+
case GF_OP_CMD_COMMIT_FORCE:
if (volinfo->decommission_in_progress) {
@@ -2051,7 +2158,12 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
volinfo->sub_count = replica_count;
volinfo->dist_leaf_count = glusterd_get_dist_leaf_count (volinfo);
- if (replica_count == 1) {
+ /*
+ * volinfo->type and sub_count have already been set for
+ * volumes undergoing a detach operation, they should not
+ * be modified here.
+ */
+ if ((replica_count == 1) && (cmd != GF_OP_CMD_DETACH)) {
if (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) {
volinfo->type = GF_CLUSTER_TYPE_NONE;
/* backward compatibility */
@@ -2224,3 +2336,16 @@ out:
gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
return ret;
}
+
+int
+glusterd_handle_attach_tier (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __glusterd_handle_add_brick);
+}
+
+int
+glusterd_handle_detach_tier (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_remove_brick);
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
index 77fa96400ba..a41b36b9715 100644
--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
+++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
@@ -4817,6 +4817,8 @@ rpcsvc_actor_t gd_svc_cli_actors[GLUSTER_CLI_MAXVALUE] = {
[GLUSTER_CLI_DELETE_VOLUME] = { "DELETE_VOLUME", GLUSTER_CLI_DELETE_VOLUME, glusterd_handle_cli_delete_volume, NULL, 0, DRC_NA},
[GLUSTER_CLI_GET_VOLUME] = { "GET_VOLUME", GLUSTER_CLI_GET_VOLUME, glusterd_handle_cli_get_volume, NULL, 0, DRC_NA},
[GLUSTER_CLI_ADD_BRICK] = { "ADD_BRICK", GLUSTER_CLI_ADD_BRICK, glusterd_handle_add_brick, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_ATTACH_TIER] = { "ATTACH_TIER", GLUSTER_CLI_ATTACH_TIER, glusterd_handle_attach_tier, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_DETACH_TIER] = { "DETACH_TIER", GLUSTER_CLI_DETACH_TIER, glusterd_handle_detach_tier, NULL, 0, DRC_NA},
[GLUSTER_CLI_REPLACE_BRICK] = { "REPLACE_BRICK", GLUSTER_CLI_REPLACE_BRICK, glusterd_handle_replace_brick, NULL, 0, DRC_NA},
[GLUSTER_CLI_REMOVE_BRICK] = { "REMOVE_BRICK", GLUSTER_CLI_REMOVE_BRICK, glusterd_handle_remove_brick, NULL, 0, DRC_NA},
[GLUSTER_CLI_LOG_ROTATE] = { "LOG FILENAME", GLUSTER_CLI_LOG_ROTATE, glusterd_handle_log_rotate, NULL, 0, DRC_NA},
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
index 75756518f28..c5fcb7698e5 100644
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
@@ -448,6 +448,7 @@ glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickin
char name[1024] = {0,};
gf_xl_afr_op_t heal_op = GF_SHD_OP_INVALID;
xlator_t *this = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
this = THIS;
GF_ASSERT (this);
@@ -514,7 +515,11 @@ glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickin
ret = dict_get_str (dict, "volname", &volname);
if (ret)
goto out;
- snprintf (name, 1024, "%s-dht",volname);
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER)
+ snprintf (name, 1024, "tier-dht");
+ else
+ snprintf (name, 1024, "%s-dht", volname);
brick_req->name = gf_strdup (name);
break;
@@ -5159,6 +5164,7 @@ glusterd_bricks_select_remove_brick (dict_t *dict, char **op_errstr,
while ( i <= count) {
snprintf (key, 256, "brick%d", i);
+
ret = dict_get_str (dict, key, &brick);
if (ret) {
gf_log ("glusterd", GF_LOG_ERROR, "Unable to get brick");
@@ -5167,8 +5173,10 @@ glusterd_bricks_select_remove_brick (dict_t *dict, char **op_errstr,
ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo,
&brickinfo);
+
if (ret)
goto out;
+
if (glusterd_is_brick_started (brickinfo)) {
pending_node = GF_CALLOC (1, sizeof (*pending_node),
gf_gld_mt_pending_node_t);
diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
index ba67df436ff..0d66571300f 100644
--- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c
+++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
@@ -278,6 +278,13 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,
"--xlator-option", "*replicate*.readdir-failover=off",
"--xlator-option", "*dht.readdir-optimize=on",
NULL);
+
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
+ runner_add_arg (&runner, "--xlator-option");
+ runner_argprintf (&runner,
+ "*tier-dht.xattr-name=trusted.tier-gfid");
+ }
+
runner_add_arg (&runner, "--xlator-option");
runner_argprintf ( &runner, "*dht.rebalance-cmd=%d",cmd);
runner_add_arg (&runner, "--xlator-option");
@@ -487,6 +494,7 @@ __glusterd_handle_defrag_volume (rpcsvc_request_t *req)
goto out;
if ((cmd == GF_DEFRAG_CMD_STATUS) ||
+ (cmd == GF_DEFRAG_CMD_STATUS_TIER) ||
(cmd == GF_DEFRAG_CMD_STOP)) {
ret = glusterd_op_begin (req, GD_OP_DEFRAG_BRICK_VOLUME,
dict, msg, sizeof (msg));
@@ -556,6 +564,7 @@ glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr)
switch (cmd) {
case GF_DEFRAG_CMD_START:
case GF_DEFRAG_CMD_START_LAYOUT_FIX:
+ case GF_DEFRAG_CMD_START_TIER:
/* Check if the connected clients are all of version
* glusterfs-3.6 and higher. This is needed to prevent some data
* loss issues that could occur when older clients are connected
@@ -690,7 +699,9 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
/* Set task-id, if available, in op_ctx dict for operations other than
* start
*/
- if (cmd == GF_DEFRAG_CMD_STATUS || cmd == GF_DEFRAG_CMD_STOP) {
+ if (cmd == GF_DEFRAG_CMD_STATUS ||
+ cmd == GF_DEFRAG_CMD_STOP ||
+ cmd == GF_DEFRAG_CMD_STATUS_TIER) {
if (!uuid_is_null (volinfo->rebal.rebalance_id)) {
ctx = glusterd_op_get_ctx ();
if (!ctx) {
@@ -720,6 +731,7 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
case GF_DEFRAG_CMD_START:
case GF_DEFRAG_CMD_START_LAYOUT_FIX:
case GF_DEFRAG_CMD_START_FORCE:
+ case GF_DEFRAG_CMD_START_TIER:
/* Reset defrag status to 'NOT STARTED' whenever a
* remove-brick/rebalance command is issued to remove
* stale information from previous run.
@@ -791,6 +803,7 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
break;
case GF_DEFRAG_CMD_STATUS:
+ case GF_DEFRAG_CMD_STATUS_TIER:
break;
default:
break;
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c
index 5b2b14503ae..5696229572d 100644
--- a/xlators/mgmt/glusterd/src/glusterd-store.c
+++ b/xlators/mgmt/glusterd/src/glusterd-store.c
@@ -812,6 +812,63 @@ out:
" for volume %s", volinfo->volname);
return ret;
}
+
+int32_t
+glusterd_volume_write_tier_details (int fd, glusterd_volinfo_t *volinfo)
+{
+ int32_t ret = -1;
+ char buf[PATH_MAX] = "";
+
+ if (volinfo->type != GF_CLUSTER_TYPE_TIER) {
+ ret = 0;
+ goto out;
+ }
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->tier_info.cold_brick_count);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_COLD_COUNT, buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d",
+ volinfo->tier_info.cold_replica_count);
+ ret = gf_store_save_value (fd,
+ GLUSTERD_STORE_KEY_COLD_REPLICA_COUNT,
+ buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->tier_info.cold_disperse_count);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_COLD_DISPERSE_COUNT,
+ buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->tier_info.hot_brick_count);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_HOT_COUNT,
+ buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->tier_info.hot_replica_count);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_HOT_REPLICA_COUNT,
+ buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->tier_info.hot_type);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_HOT_TYPE, buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->tier_info.cold_type);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_COLD_TYPE, buf);
+ if (ret)
+ goto out;
+
+ out:
+ return ret;
+}
+
int32_t
glusterd_volume_exclude_options_write (int fd, glusterd_volinfo_t *volinfo)
{
@@ -917,6 +974,8 @@ glusterd_volume_exclude_options_write (int fd, glusterd_volinfo_t *volinfo)
goto out;
}
+ ret = glusterd_volume_write_tier_details (fd, volinfo);
+
ret = glusterd_volume_write_snap_details (fd, volinfo);
out:
@@ -2725,6 +2784,27 @@ glusterd_store_update_volinfo (glusterd_volinfo_t *volinfo)
strlen (GLUSTERD_STORE_KEY_PARENT_VOLNAME))) {
strncpy (volinfo->parent_volname, value,
sizeof(volinfo->parent_volname) - 1);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_COLD_COUNT,
+ strlen (key))) {
+ volinfo->tier_info.cold_brick_count = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_COLD_REPLICA_COUNT,
+ strlen (key))) {
+ volinfo->tier_info.cold_replica_count = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_COLD_DISPERSE_COUNT,
+ strlen (key))) {
+ volinfo->tier_info.cold_disperse_count = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_HOT_COUNT,
+ strlen (key))) {
+ volinfo->tier_info.cold_brick_count = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_HOT_REPLICA_COUNT,
+ strlen (key))) {
+ volinfo->tier_info.cold_replica_count = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_HOT_TYPE,
+ strlen (key))) {
+ volinfo->tier_info.hot_type = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_COLD_TYPE,
+ strlen (key))) {
+ volinfo->tier_info.cold_type = atoi (value);
} else {
if (is_key_glusterd_hooks_friendly (key)) {
@@ -2809,6 +2889,9 @@ glusterd_store_update_volinfo (glusterd_volinfo_t *volinfo)
GF_ASSERT (volinfo->redundancy_count > 0);
break;
+ case GF_CLUSTER_TYPE_TIER:
+ break;
+
default:
GF_ASSERT (0);
break;
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h
index afa96be77cf..45ed86a4163 100644
--- a/xlators/mgmt/glusterd/src/glusterd-store.h
+++ b/xlators/mgmt/glusterd/src/glusterd-store.h
@@ -64,6 +64,14 @@ typedef enum glusterd_store_ver_ac_{
#define GLUSTERD_STORE_KEY_VOL_OP_VERSION "op-version"
#define GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION "client-op-version"
+#define GLUSTERD_STORE_KEY_COLD_TYPE "cold_type"
+#define GLUSTERD_STORE_KEY_COLD_COUNT "cold_count"
+#define GLUSTERD_STORE_KEY_COLD_REPLICA_COUNT "cold_replica_count"
+#define GLUSTERD_STORE_KEY_COLD_DISPERSE_COUNT "cold_disperse_count"
+#define GLUSTERD_STORE_KEY_HOT_TYPE "hot_type"
+#define GLUSTERD_STORE_KEY_HOT_COUNT "hot_count"
+#define GLUSTERD_STORE_KEY_HOT_REPLICA_COUNT "hot_replica_count"
+
#define GLUSTERD_STORE_KEY_SNAP_NAME "name"
#define GLUSTERD_STORE_KEY_SNAP_ID "snap-id"
#define GLUSTERD_STORE_KEY_SNAP_DESC "desc"
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index 727a19d24d1..27357955fe8 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -560,6 +560,7 @@ glusterd_volinfo_dup (glusterd_volinfo_t *volinfo,
new_volinfo->sub_count = volinfo->sub_count;
new_volinfo->transport_type = volinfo->transport_type;
new_volinfo->brick_count = volinfo->brick_count;
+ new_volinfo->tier_info = volinfo->tier_info;
dict_copy (volinfo->dict, new_volinfo->dict);
dict_copy (volinfo->gsync_slaves, new_volinfo->gsync_slaves);
diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
index 114e57485fc..79da432bafe 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
@@ -1472,7 +1472,6 @@ brick_graph_add_posix (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
out:
return ret;
}
-
static int
brick_graph_add_trash (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
dict_t *set_dict, glusterd_brickinfo_t *brickinfo)
@@ -2712,24 +2711,22 @@ out:
}
static int
-volgen_graph_build_clusters (volgen_graph_t *graph,
- glusterd_volinfo_t *volinfo, char *xl_type,
- char *xl_namefmt, size_t child_count,
- size_t sub_count)
+volgen_link_bricks (volgen_graph_t *graph,
+ glusterd_volinfo_t *volinfo, char *xl_type,
+ char *xl_namefmt, size_t child_count,
+ size_t sub_count,
+ xlator_t *trav)
{
int i = 0;
int j = 0;
- xlator_t *txl = NULL;
xlator_t *xl = NULL;
- xlator_t *trav = NULL;
char *volname = NULL;
int ret = -1;
if (child_count == 0)
goto out;
volname = volinfo->volname;
- txl = first_of (graph);
- for (trav = txl; --child_count; trav = trav->next);
+
for (;; trav = trav->prev) {
if ((i % sub_count) == 0) {
xl = volgen_graph_add_nolink (graph, xl_type,
@@ -2745,10 +2742,9 @@ volgen_graph_build_clusters (volgen_graph_t *graph,
if (ret)
goto out;
- if (trav == txl)
- break;
-
i++;
+ if (i == child_count)
+ break;
}
ret = j;
@@ -2756,6 +2752,46 @@ out:
return ret;
}
+static int
+volgen_link_bricks_from_list_tail (volgen_graph_t *graph,
+ glusterd_volinfo_t *volinfo,
+ char *xl_type,
+ char *xl_namefmt, size_t child_count,
+ size_t sub_count)
+{
+ xlator_t *trav = NULL;
+ size_t cnt = child_count;
+
+ for (trav = first_of(graph); --cnt; trav = trav->next)
+ ;
+
+ return volgen_link_bricks (graph, volinfo,
+ xl_type,
+ xl_namefmt,
+ child_count,
+ sub_count,
+ trav);
+}
+
+static int
+volgen_link_bricks_from_list_head (volgen_graph_t *graph,
+ glusterd_volinfo_t *volinfo, char *xl_type,
+ char *xl_namefmt, size_t child_count,
+ size_t sub_count)
+{
+ xlator_t *trav = NULL;
+
+ for (trav = first_of(graph); trav->next; trav = trav->next)
+ ;
+
+ return volgen_link_bricks (graph, volinfo,
+ xl_type,
+ xl_namefmt,
+ child_count,
+ sub_count,
+ trav);
+}
+
/**
* This is the build graph function for user-serviceable snapshots.
* Generates snapview-client
@@ -2948,7 +2984,7 @@ volgen_graph_build_dht_cluster (volgen_graph_t *graph,
else
name_fmt = "%s-dht";
- clusters = volgen_graph_build_clusters (graph, volinfo,
+ clusters = volgen_link_bricks_from_list_tail (graph, volinfo,
voltype,
name_fmt,
child_count,
@@ -2985,7 +3021,7 @@ volgen_graph_build_ec_clusters (volgen_graph_t *graph,
xlator_t *ec = NULL;
char option[32] = {0};
- clusters = volgen_graph_build_clusters (graph, volinfo,
+ clusters = volgen_link_bricks_from_list_tail (graph, volinfo,
disperse_args[0],
disperse_args[1],
volinfo->brick_count,
@@ -3015,12 +3051,19 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph,
{
char *replicate_args[] = {"cluster/replicate",
"%s-replicate-%d"};
+ char *tier_args[] = {"cluster/tier",
+ "%s-tier-%d"};
char *stripe_args[] = {"cluster/stripe",
"%s-stripe-%d"};
+ char *disperse_args[] = {"cluster/disperse",
+ "%s-disperse-%d"};
+ char option[32] = "";
int rclusters = 0;
int clusters = 0;
int dist_count = 0;
int ret = -1;
+ xlator_t *ec = NULL;
+ xlator_t *client = NULL;
if (!volinfo->dist_leaf_count)
goto out;
@@ -3031,7 +3074,7 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph,
/* All other cases, it will have one or the other cluster type */
switch (volinfo->type) {
case GF_CLUSTER_TYPE_REPLICATE:
- clusters = volgen_graph_build_clusters (graph, volinfo,
+ clusters = volgen_link_bricks_from_list_tail (graph, volinfo,
replicate_args[0],
replicate_args[1],
volinfo->brick_count,
@@ -3040,7 +3083,7 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph,
goto out;
break;
case GF_CLUSTER_TYPE_STRIPE:
- clusters = volgen_graph_build_clusters (graph, volinfo,
+ clusters = volgen_link_bricks_from_list_tail (graph, volinfo,
stripe_args[0],
stripe_args[1],
volinfo->brick_count,
@@ -3048,11 +3091,18 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph,
if (clusters < 0)
goto out;
break;
+ case GF_CLUSTER_TYPE_TIER:
+ ret = volgen_link_bricks_from_list_head (graph, volinfo,
+ tier_args[0],
+ tier_args[1],
+ volinfo->brick_count,
+ volinfo->replica_count);
+ break;
case GF_CLUSTER_TYPE_STRIPE_REPLICATE:
/* Replicate after the clients, then stripe */
if (volinfo->replica_count == 0)
goto out;
- clusters = volgen_graph_build_clusters (graph, volinfo,
+ clusters = volgen_link_bricks_from_list_tail (graph, volinfo,
replicate_args[0],
replicate_args[1],
volinfo->brick_count,
@@ -3062,7 +3112,7 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph,
rclusters = volinfo->brick_count / volinfo->replica_count;
GF_ASSERT (rclusters == clusters);
- clusters = volgen_graph_build_clusters (graph, volinfo,
+ clusters = volgen_link_bricks_from_list_tail (graph, volinfo,
stripe_args[0],
stripe_args[1],
rclusters,
@@ -3162,7 +3212,7 @@ graph_set_generic_options (xlator_t *this, volgen_graph_t *graph,
"log-buf-size option");
ret = volgen_graph_set_options_generic (graph, set_dict, "client",
- &log_flush_timeout_option_handler);
+ &log_flush_timeout_option_handler);
if (ret)
gf_log (this->name, GF_LOG_WARNING, "Failed to change "
"log-flush-timeout option");
@@ -3170,6 +3220,88 @@ graph_set_generic_options (xlator_t *this, volgen_graph_t *graph,
}
static int
+volume_volgen_graph_build_clusters_tier (volgen_graph_t *graph,
+ glusterd_volinfo_t *volinfo,
+ gf_boolean_t is_quotad)
+{
+ int ret = -1;
+ xlator_t *root;
+ xlator_t *xl, *hxl, *cxl;
+ glusterd_brickinfo_t *brick = NULL;
+ char *rule;
+ int st_brick_count = 0;
+ int st_replica_count = 0;
+ int st_disperse_count = 0;
+ int st_dist_leaf_count = 0;
+ int st_type = 0;
+ char st_volname[GD_VOLUME_NAME_MAX];
+ int dist_count = 0;
+
+ st_brick_count = volinfo->brick_count;
+ st_replica_count = volinfo->replica_count;
+ st_disperse_count = volinfo->disperse_count;
+ st_type = volinfo->type;
+ st_dist_leaf_count = volinfo->dist_leaf_count;
+ strcpy(st_volname, volinfo->volname);
+
+ volinfo->dist_leaf_count = volinfo->tier_info.cold_dist_leaf_count;
+ volinfo->brick_count = volinfo->tier_info.cold_brick_count;
+ volinfo->replica_count = volinfo->tier_info.cold_replica_count;
+ volinfo->disperse_count = volinfo->tier_info.cold_disperse_count;
+ volinfo->type = volinfo->tier_info.cold_type;
+ sprintf (volinfo->volname, "%s-cold", st_volname);
+
+ ret = volume_volgen_graph_build_clusters (graph, volinfo, _gf_false);
+ if (ret)
+ goto out;
+ cxl = first_of(graph);
+
+ volinfo->type = GF_CLUSTER_TYPE_TIER;
+ volinfo->brick_count = volinfo->tier_info.hot_brick_count;
+ volinfo->replica_count = volinfo->tier_info.hot_replica_count;
+ volinfo->dist_leaf_count = glusterd_get_dist_leaf_count(volinfo);
+ volinfo->disperse_count = 0;
+
+ sprintf (volinfo->volname, "%s-hot", st_volname);
+
+ if (volinfo->dist_leaf_count == 1) {
+ dist_count = volinfo->brick_count / volinfo->dist_leaf_count;
+ ret = volgen_link_bricks_from_list_head (graph, volinfo,
+ "cluster/distribute",
+ "%s-dht",
+ dist_count,
+ dist_count);
+ } else {
+ ret = volume_volgen_graph_build_clusters (graph,
+ volinfo,
+ _gf_false);
+ }
+
+ hxl = first_of(graph);
+
+ xl = volgen_graph_add_nolink (graph, "cluster/tier", "%s",
+ "tier-dht", 0);
+ gf_asprintf(&rule, "%s-hot-dht", st_volname);
+ xlator_set_option(xl, "rule", rule);
+ xlator_set_option(xl, "xattr-name", "trusted.tier-gfid");
+
+ ret = volgen_xlator_link (xl, cxl);
+ ret = volgen_xlator_link (xl, hxl);
+
+ st_type = GF_CLUSTER_TYPE_TIER;
+
+ out:
+ volinfo->brick_count = st_brick_count;
+ volinfo->replica_count = st_replica_count;
+ volinfo->disperse_count = st_disperse_count;
+ volinfo->type = st_type;
+ volinfo->dist_leaf_count = st_dist_leaf_count;
+ strcpy(volinfo->volname, st_volname);
+
+ return ret;
+}
+
+static int
client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
dict_t *set_dict, void *param)
{
@@ -3188,11 +3320,16 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
GF_ASSERT (conf);
volname = volinfo->volname;
- ret = volgen_graph_build_clients (graph, volinfo, set_dict, param);
+ ret = volgen_graph_build_clients (graph, volinfo, set_dict,
+ param);
if (ret)
goto out;
- ret = volume_volgen_graph_build_clusters (graph, volinfo, _gf_false);
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER)
+ ret = volume_volgen_graph_build_clusters_tier (graph, volinfo, _gf_false);
+ else
+ ret = volume_volgen_graph_build_clusters (graph, volinfo, _gf_false);
+
if (ret == -1)
goto out;
@@ -3730,7 +3867,7 @@ volgen_graph_build_replicate_clusters (volgen_graph_t *graph,
char *replicate_args[] = {"cluster/replicate",
"%s-replicate-%d"};
- return volgen_graph_build_clusters (graph, volinfo, "cluster/replicate",
+ return volgen_link_bricks_from_list_tail (graph, volinfo, "cluster/replicate",
"%s-replicate-%d",
volinfo->brick_count,
volinfo->replica_count);
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index ae866b7ccfc..ada814bb25d 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -1690,6 +1690,32 @@ struct volopt_map_entry glusterd_volopt_map[] = {
.voltype = "features/trash",
.op_version = GD_OP_VERSION_3_7_0,
},
+
+ /* tier translator - global tunables */
+ { .key = "cluster.write-freq-thresold",
+ .voltype = "cluster/tier",
+ .option = "write-freq-thresold",
+ .op_version = GD_OP_VERSION_3_7_0,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.read-freq-thresold",
+ .voltype = "cluster/tier",
+ .option = "read-freq-thresold",
+ .op_version = GD_OP_VERSION_3_7_0,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.tier-promote-frequency",
+ .voltype = "cluster/tier",
+ .option = "tier-promote-frequency",
+ .op_version = GD_OP_VERSION_3_7_0,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.tier-demote-frequency",
+ .voltype = "cluster/tier",
+ .option = "tier-demote-frequency",
+ .op_version = GD_OP_VERSION_3_7_0,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
{ .key = "features.ctr-enabled",
.voltype = "features/changetimerecorder",
.value = "off",
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
index ad280eda053..bac1598598b 100644
--- a/xlators/mgmt/glusterd/src/glusterd.h
+++ b/xlators/mgmt/glusterd/src/glusterd.h
@@ -302,9 +302,6 @@ typedef struct tier_info_ {
int hot_type;
int hot_brick_count;
int hot_replica_count;
- int hot_disperse_count;
- /*Commented for now Dan's DHT Tier patch will have it*/
- /*tier_group_t *root;*/
} gd_tier_info_t;
struct glusterd_volinfo_ {
@@ -814,6 +811,12 @@ int
glusterd_handle_add_brick (rpcsvc_request_t *req);
int
+glusterd_handle_attach_tier (rpcsvc_request_t *req);
+
+int
+glusterd_handle_detach_tier (rpcsvc_request_t *req);
+
+int
glusterd_handle_replace_brick (rpcsvc_request_t *req);
int