summaryrefslogtreecommitdiffstats
path: root/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
diff options
context:
space:
mode:
authorDan Lambright <dlambrig@redhat.com>2015-02-25 16:11:23 -0500
committerVijay Bellur <vbellur@redhat.com>2015-03-19 06:32:28 -0700
commit6f71bc02df5bd177c2f5dbf4e54b2af1525ab979 (patch)
treea676a70da909dedebc21dca408fafc9dee9d5810 /xlators/mgmt/glusterd/src/glusterd-brick-ops.c
parent99586305f66d6b5e81542139d84fbf111ace2554 (diff)
glusterd: CLI commands to create and manage tiered volumes.
A tiered volume is a normal volume with some number of new bricks representing "hot" storage. The "hot" bricks can be attached or detached dynamically to a normal volume. When this happens, a new graph is constructed. The root of the new graph is an instance of the tier translator. One subvolume of the tier translator leads to the old volume, and another leads to the new hot bricks. attach-tier <VOLNAME> [<replica> <COUNT>] <NEW-BRICK> ... [force] volume detach-tier <VOLNAME> [replica <COUNT>] <BRICK> ... <start|stop|status|commit|force> gluster volume rebalance <volume> tier start gluster volume rebalance <volume> tier stop gluster volume rebalance <volume> tier status The "tier start" CLI command starts a server side daemon. The daemon initiates file level migration based on caching policies. The daemon's status can be monitored and stopped. Note development on the "tier status" command is incomplete. It will be added in a subsequent patch. When the "hot" storage is detached, the tier translator is removed from the graph and the tiered volume reverts to its original state as described in the volume's info file. For more background and design see the feature page [1]. [1] http://www.gluster.org/community/documentation/index.php/Features/data-classification Change-Id: Ic8042ce37327b850b9e199236e5be3dae95d2472 BUG: 1194753 Signed-off-by: Dan Lambright <dlambrig@redhat.com> Reviewed-on: http://review.gluster.org/9753 Reviewed-by: Vijay Bellur <vbellur@redhat.com> Tested-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'xlators/mgmt/glusterd/src/glusterd-brick-ops.c')
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-brick-ops.c135
1 files changed, 130 insertions, 5 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
index fd4618bb78c..fa5e533f135 100644
--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
@@ -288,6 +288,10 @@ gd_rmbr_validate_replica_count (glusterd_volinfo_t *volinfo,
int replica_nodes = 0;
switch (volinfo->type) {
+ case GF_CLUSTER_TYPE_TIER:
+ ret = 1;
+ goto out;
+
case GF_CLUSTER_TYPE_NONE:
case GF_CLUSTER_TYPE_STRIPE:
case GF_CLUSTER_TYPE_DISPERSE:
@@ -367,7 +371,6 @@ __glusterd_handle_add_brick (rpcsvc_request_t *req)
int32_t replica_count = 0;
int32_t stripe_count = 0;
int type = 0;
-
this = THIS;
GF_ASSERT(this);
@@ -454,6 +457,17 @@ __glusterd_handle_add_brick (rpcsvc_request_t *req)
total_bricks = volinfo->brick_count + brick_count;
+ if (dict_get (dict, "attach-tier")) {
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
+ snprintf (err_str, sizeof (err_str),
+ "Volume %s is already a tier.", volname);
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ ret = -1;
+ goto out;
+ }
+ goto brick_val;
+ }
+
if (!stripe_count && !replica_count) {
if (volinfo->type == GF_CLUSTER_TYPE_NONE)
goto brick_val;
@@ -639,6 +653,40 @@ subvol_matcher_destroy (int *subvols)
GF_FREE (subvols);
}
+static int
+glusterd_set_detach_bricks(dict_t *dict, glusterd_volinfo_t *volinfo)
+{
+ char key[256] = {0,};
+ char value[256] = {0,};
+ int brick_num = 0;
+ int hot_brick_num = 0;
+ glusterd_brickinfo_t *brickinfo;
+ int ret = 0;
+
+ /* cold tier bricks at tail of list so use reverse iteration */
+ cds_list_for_each_entry_reverse (brickinfo, &volinfo->bricks,
+ brick_list) {
+ brick_num++;
+ if (brick_num > volinfo->tier_info.cold_brick_count) {
+ hot_brick_num++;
+ sprintf (key, "brick%d", hot_brick_num);
+ snprintf (value, 256, "%s:%s",
+ brickinfo->hostname,
+ brickinfo->path);
+
+ ret = dict_set_str (dict, key, strdup(value));
+ if (ret)
+ break;
+ }
+ }
+
+ ret = dict_set_int32(dict, "count", hot_brick_num);
+ if (ret)
+ return -1;
+
+ return hot_brick_num;
+}
+
int
__glusterd_handle_remove_brick (rpcsvc_request_t *req)
{
@@ -794,7 +842,8 @@ __glusterd_handle_remove_brick (rpcsvc_request_t *req)
/* Do not allow remove-brick if the bricks given is less than
the replica count or stripe count */
- if (!replica_count && (volinfo->type != GF_CLUSTER_TYPE_NONE)) {
+ if (!replica_count && (volinfo->type != GF_CLUSTER_TYPE_NONE) &&
+ (volinfo->type != GF_CLUSTER_TYPE_TIER)) {
if (volinfo->dist_leaf_count &&
(count % volinfo->dist_leaf_count)) {
snprintf (err_str, sizeof (err_str), "Remove brick "
@@ -813,6 +862,7 @@ __glusterd_handle_remove_brick (rpcsvc_request_t *req)
goto out;
}
+
strcpy (brick_list, " ");
if ((volinfo->type != GF_CLUSTER_TYPE_NONE) &&
@@ -822,6 +872,9 @@ __glusterd_handle_remove_brick (rpcsvc_request_t *req)
goto out;
}
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER)
+ count = glusterd_set_detach_bricks(dict, volinfo);
+
while ( i <= count) {
snprintf (key, sizeof (key), "brick%d", i);
ret = dict_get_str (dict, key, &brick);
@@ -836,6 +889,7 @@ __glusterd_handle_remove_brick (rpcsvc_request_t *req)
ret = glusterd_volume_brickinfo_get_by_brick(brick, volinfo,
&brickinfo);
+
if (ret) {
snprintf (err_str, sizeof (err_str), "Incorrect brick "
"%s for volume %s", brick, volname);
@@ -883,7 +937,8 @@ out:
}
- GF_FREE (brick_list);
+ if (brick_list)
+ GF_FREE (brick_list);
subvol_matcher_destroy (subvols);
free (cli_req.dict.dict_val); //its malloced by xdr
@@ -1081,7 +1136,11 @@ glusterd_op_perform_add_bricks (glusterd_volinfo_t *volinfo, int32_t count,
ret = glusterd_resolve_brick (brickinfo);
if (ret)
goto out;
- if (stripe_count || replica_count) {
+
+ /* hot tier bricks are added to head of brick list */
+ if (dict_get (dict, "attach-tier")) {
+ cds_list_add (&brickinfo->brick_list, &volinfo->bricks);
+ } else if (stripe_count || replica_count) {
add_brick_at_right_order (brickinfo, volinfo, (i - 1),
stripe_count, replica_count);
} else {
@@ -1674,6 +1733,7 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr)
break;
+ case GF_OP_CMD_DETACH:
case GF_OP_CMD_COMMIT_FORCE:
break;
}
@@ -1767,6 +1827,35 @@ glusterd_remove_brick_migrate_cbk (glusterd_volinfo_t *volinfo,
return ret;
}
+static int
+glusterd_op_perform_attach_tier (dict_t *dict,
+ glusterd_volinfo_t *volinfo,
+ int count,
+ char *bricks)
+{
+ int ret = 0;
+ int replica_count = 0;
+
+ /*
+ * Store the new (cold) tier's structure until the graph is generated.
+ * If there is a failure before the graph is generated the
+ * structure will revert to its original state.
+ */
+ volinfo->tier_info.cold_dist_leaf_count = volinfo->dist_leaf_count;
+ volinfo->tier_info.cold_type = volinfo->type;
+ volinfo->tier_info.cold_brick_count = volinfo->brick_count;
+ volinfo->tier_info.cold_replica_count = volinfo->replica_count;
+ volinfo->tier_info.cold_disperse_count = volinfo->disperse_count;
+
+ ret = dict_get_int32 (dict, "replica-count", &replica_count);
+ if (!ret)
+ volinfo->tier_info.hot_replica_count = replica_count;
+ else
+ volinfo->tier_info.hot_replica_count = 1;
+ volinfo->tier_info.hot_brick_count = count;
+
+ return ret;
+}
int
glusterd_op_add_brick (dict_t *dict, char **op_errstr)
@@ -1778,6 +1867,7 @@ glusterd_op_add_brick (dict_t *dict, char **op_errstr)
xlator_t *this = NULL;
char *bricks = NULL;
int32_t count = 0;
+ int32_t replica_count = 0;
this = THIS;
GF_ASSERT (this);
@@ -1812,6 +1902,11 @@ glusterd_op_add_brick (dict_t *dict, char **op_errstr)
goto out;
}
+ if (dict_get(dict, "attach-tier")) {
+ gf_log (THIS->name, GF_LOG_DEBUG, "Adding tier");
+ glusterd_op_perform_attach_tier (dict, volinfo, count, bricks);
+ }
+
ret = glusterd_op_perform_add_bricks (volinfo, count, bricks, dict);
if (ret) {
gf_log ("", GF_LOG_ERROR, "Unable to add bricks");
@@ -1829,6 +1924,14 @@ out:
return ret;
}
+static void
+glusterd_op_perform_detach_tier (glusterd_volinfo_t *volinfo)
+{
+ volinfo->type = volinfo->tier_info.cold_type;
+ volinfo->replica_count = volinfo->tier_info.cold_replica_count;
+ volinfo->disperse_count = volinfo->tier_info.cold_disperse_count;
+}
+
int
glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
{
@@ -1959,6 +2062,10 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
force = 1;
break;
+ case GF_OP_CMD_DETACH:
+ glusterd_op_perform_detach_tier (volinfo);
+ /* fall through */
+
case GF_OP_CMD_COMMIT_FORCE:
if (volinfo->decommission_in_progress) {
@@ -2051,7 +2158,12 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
volinfo->sub_count = replica_count;
volinfo->dist_leaf_count = glusterd_get_dist_leaf_count (volinfo);
- if (replica_count == 1) {
+ /*
+ * volinfo->type and sub_count have already been set for
+ * volumes undergoing a detach operation, they should not
+ * be modified here.
+ */
+ if ((replica_count == 1) && (cmd != GF_OP_CMD_DETACH)) {
if (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) {
volinfo->type = GF_CLUSTER_TYPE_NONE;
/* backward compatibility */
@@ -2224,3 +2336,16 @@ out:
gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
return ret;
}
+
+int
+glusterd_handle_attach_tier (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __glusterd_handle_add_brick);
+}
+
+int
+glusterd_handle_detach_tier (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_remove_brick);
+}