summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVishal Pandey <vpandey@redhat.com>2019-04-24 13:37:16 +0530
committerAtin Mukherjee <amukherj@redhat.com>2019-06-28 17:30:53 +0000
commit9b223b15ab69fce4076de036ee162f36a058bcd2 (patch)
treee29e9119fd8a24d5fb681d3468e9828bb20bd713
parent29ad22aa9482a69f3fcf04eea762e76602bbe9a0 (diff)
glusterd/thin-arbiter: Thin-arbiter integration with GD1
gluster volume create <VOLNAME> replica 2 thin-arbiter 1 <host1>:<brick1> <host2>:<brick2> <thin-arbiter-host>:<path-to-store-replica-id-file> [force] The changes have been made in a way that the last brick in the bricks list will be treated as the thin-arbiter. GD1 will be manipulated to consider replica count to be as 2 and continue creating the volume like any other replica 2 volume but since thin-arbiter volumes need ta-brick client xlator entries for each subvolume in fuse volfile, volfile generation is modified in a way to inject these entries seperately in the volfile for every subvolume. Few more additions - 1- Save the volinfo with new fields ta_bricks list and thin_arbiter_count. 2- Introduce a new option client.ta-brick-port to add remote-port to ta-brick xlator entry in fuse volfiles. The option can be set using the following CLI syntax - gluster volume set <VOLNAME> client.ta-brick-port <PORTNO.> 3- Volume Info will contain a Thin-Arbiter-path entry to distinguish from other replicate volumes. Change-Id: Ib434e2313b29716f32476c6c211d282c4ef39406 Updates #687 Signed-off-by: Vishal Pandey <vpandey@redhat.com>
-rw-r--r--cli/src/cli-cmd-parser.c145
-rw-r--r--cli/src/cli-cmd-volume.c4
-rw-r--r--cli/src/cli-rpc-ops.c15
-rw-r--r--doc/gluster.82
-rw-r--r--heal/src/glfs-heal.c3
-rw-r--r--tests/basic/glusterd/thin-arbiter-volume-probe.t25
-rw-r--r--tests/basic/glusterd/thin-arbiter-volume.t45
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handler.c27
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.c236
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.h2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c253
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.h7
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.c144
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-ops.c74
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-set.c8
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h2
16 files changed, 961 insertions, 31 deletions
diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c
index a6ce490..decdd10 100644
--- a/cli/src/cli-cmd-parser.c
+++ b/cli/src/cli-cmd-parser.c
@@ -80,6 +80,95 @@ str_getunamb(const char *tok, char **opwords)
}
int32_t
+cli_cmd_ta_brick_parse(const char **words, int wordcount, char **ta_brick)
+{
+ char *host_name = NULL;
+ char *tmp_host = NULL;
+ char *delimiter = NULL;
+ cli_brick_t *brick = NULL;
+ int ret = 0;
+
+ GF_ASSERT(words);
+ GF_ASSERT(wordcount);
+
+ if (validate_brick_name((char *)words[wordcount - 1])) {
+ cli_err(
+ "Wrong brick type: %s, use <HOSTNAME>:"
+ "<export-dir-abs-path>",
+ words[wordcount - 1]);
+ ret = -1;
+ goto out;
+ } else {
+ delimiter = strrchr(words[wordcount - 1], ':');
+ ret = gf_canonicalize_path(delimiter + 1);
+ if (ret)
+ goto out;
+ }
+
+ tmp_host = gf_strdup((char *)words[wordcount - 1]);
+ if (!tmp_host) {
+ gf_log("cli", GF_LOG_ERROR, "Out of memory");
+ ret = -1;
+ goto out;
+ }
+ get_host_name(tmp_host, &host_name);
+ if (!host_name) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR,
+ "Unable to retrieve "
+ "hostname");
+ goto out;
+ }
+
+ if (!(strcmp(host_name, "localhost") && strcmp(host_name, "127.0.0.1") &&
+ strncmp(host_name, "0.", 2))) {
+ cli_err(
+ "Please provide a valid hostname/ip other "
+ "than localhost, 127.0.0.1 or loopback "
+ "address (0.0.0.0 to 0.255.255.255).");
+ ret = -1;
+ goto out;
+ }
+ if (!valid_internet_address(host_name, _gf_false, _gf_false)) {
+ cli_err(
+ "internet address '%s' does not conform to "
+ "standards",
+ host_name);
+ }
+
+ brick = GF_MALLOC(sizeof(cli_brick_t), gf_common_list_node);
+ if (brick == NULL) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Out of memory");
+ goto out;
+ }
+
+ brick->name = words[wordcount - 1];
+ brick->len = strlen(words[wordcount - 1]);
+ *ta_brick = GF_MALLOC(brick->len + 3, gf_common_mt_char);
+ if (*ta_brick == NULL) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Out of memory");
+ goto out;
+ }
+
+ strcat(*ta_brick, " ");
+ strcat(*ta_brick, brick->name);
+ strcat(*ta_brick, " ");
+out:
+ if (tmp_host) {
+ GF_FREE(tmp_host);
+ tmp_host = NULL;
+ }
+ if (brick) {
+ GF_FREE(brick);
+ brick = NULL;
+ }
+
+ return ret;
+}
+
+int32_t
cli_cmd_bricks_parse(const char **words, int wordcount, int brick_index,
char **bricks, int *brick_count)
{
@@ -476,14 +565,17 @@ cli_cmd_volume_create_parse(struct cli_state *state, const char **words,
char *trans_type = NULL;
int32_t index = 0;
char *bricks = NULL;
+ char *ta_brick = NULL;
int32_t brick_count = 0;
- char *opwords[] = {"replica", "stripe", "transport", "disperse",
- "redundancy", "disperse-data", "arbiter", NULL};
+ char *opwords[] = {"replica", "stripe", "transport",
+ "disperse", "redundancy", "disperse-data",
+ "arbiter", "thin-arbiter", NULL};
char *w = NULL;
int op_count = 0;
int32_t replica_count = 1;
int32_t arbiter_count = 0;
+ int32_t thin_arbiter_count = 0;
int32_t stripe_count = 1;
int32_t disperse_count = -1;
int32_t redundancy_count = -1;
@@ -581,6 +673,25 @@ cli_cmd_volume_create_parse(struct cli_state *state, const char **words,
if (ret)
goto out;
index += 2;
+ } else if (!strcmp(words[index], "thin-arbiter")) {
+ ret = gf_string2int(words[index + 1], &thin_arbiter_count);
+ if ((ret == -1) || (thin_arbiter_count != 1)) {
+ cli_err(
+ "For thin-arbiter "
+ "configuration, "
+ "replica count must be"
+ " 2 and thin-arbiter count "
+ "must be 1. The 3rd "
+ "brick of the replica "
+ "will be the thin-arbiter brick");
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_int32(dict, "thin-arbiter-count",
+ thin_arbiter_count);
+ if (ret)
+ goto out;
+ index += 2;
}
}
@@ -589,7 +700,7 @@ cli_cmd_volume_create_parse(struct cli_state *state, const char **words,
if ((arbiter_count == 1) && (replica_count == 2))
replica_count += arbiter_count;
- if (replica_count == 2) {
+ if (replica_count == 2 && thin_arbiter_count == 0) {
if (strcmp(words[wordcount - 1], "force")) {
question =
"Replica 2 volumes are prone"
@@ -657,6 +768,12 @@ cli_cmd_volume_create_parse(struct cli_state *state, const char **words,
"option.");
ret = -1;
goto out;
+ } else if ((strcmp(w, "thin-arbiter") == 0)) {
+ cli_err(
+ "thin-arbiter option must be preceded by replica "
+ "option.");
+ ret = -1;
+ goto out;
} else {
GF_ASSERT(!"opword mismatch");
ret = -1;
@@ -680,7 +797,20 @@ cli_cmd_volume_create_parse(struct cli_state *state, const char **words,
wc = wordcount - 1;
}
- ret = cli_cmd_bricks_parse(words, wc, brick_index, &bricks, &brick_count);
+ // Exclude the thin-arbiter-brick i.e. last brick in the bricks list
+ if (thin_arbiter_count == 1) {
+ ret = cli_cmd_bricks_parse(words, wc - 1, brick_index, &bricks,
+ &brick_count);
+ if (ret)
+ goto out;
+
+ ret = cli_cmd_ta_brick_parse(words, wc, &ta_brick);
+
+ } else {
+ ret = cli_cmd_bricks_parse(words, wc, brick_index, &bricks,
+ &brick_count);
+ }
+
if (ret)
goto out;
@@ -739,6 +869,12 @@ cli_cmd_volume_create_parse(struct cli_state *state, const char **words,
if (ret)
goto out;
+ if (thin_arbiter_count == 1) {
+ ret = dict_set_dynstr(dict, "ta-brick", ta_brick);
+ if (ret)
+ goto out;
+ }
+
ret = dict_set_int32(dict, "count", brick_count);
if (ret)
goto out;
@@ -752,6 +888,7 @@ cli_cmd_volume_create_parse(struct cli_state *state, const char **words,
out:
if (ret) {
GF_FREE(bricks);
+ GF_FREE(ta_brick);
gf_log("cli", GF_LOG_ERROR, "Unable to parse create volume CLI");
if (dict)
dict_unref(dict);
diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c
index f2948a3..c6f0898 100644
--- a/cli/src/cli-cmd-volume.c
+++ b/cli/src/cli-cmd-volume.c
@@ -2999,9 +2999,9 @@ struct cli_cmd volume_cmds[] = {
"list information of all volumes"},
{"volume create <NEW-VOLNAME> [stripe <COUNT>] "
- "[replica <COUNT> [arbiter <COUNT>]] "
+ "[[replica <COUNT> [arbiter <COUNT>]]|[replica 2 thin-arbiter 1]] "
"[disperse [<COUNT>]] [disperse-data <COUNT>] [redundancy <COUNT>] "
- "[transport <tcp|rdma|tcp,rdma>] <NEW-BRICK>"
+ "[transport <tcp|rdma|tcp,rdma>] <NEW-BRICK> <TA-BRICK>"
"... [force]",
cli_cmd_volume_create_cbk,
diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c
index 16d5f73..35985ab 100644
--- a/cli/src/cli-rpc-ops.c
+++ b/cli/src/cli-rpc-ops.c
@@ -723,10 +723,12 @@ gf_cli_get_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
int32_t redundancy_count = 0;
int32_t arbiter_count = 0;
int32_t snap_count = 0;
+ int32_t thin_arbiter_count = 0;
int32_t vol_type = 0;
int32_t transport = 0;
char *volume_id_str = NULL;
char *volname = NULL;
+ char *ta_brick = NULL;
dict_t *dict = NULL;
cli_local_t *local = NULL;
char key[1024] = {0};
@@ -903,6 +905,11 @@ xml_output:
if (ret)
goto out;
+ snprintf(key, 256, "volume%d.thin_arbiter_count", i);
+ ret = dict_get_int32(dict, key, &thin_arbiter_count);
+ if (ret)
+ goto out;
+
// Distributed (stripe/replicate/stripe-replica) setups
vol_type = get_vol_type(type, dist_count, brick_count);
@@ -929,6 +936,14 @@ xml_output:
if (ret)
goto out;
+ if (thin_arbiter_count) {
+ snprintf(key, 1024, "volume%d.thin_arbiter_brick", i);
+ ret = dict_get_str(dict, key, &ta_brick);
+ if (ret)
+ goto out;
+ cli_out("Thin-arbiter-path: %s", ta_brick);
+ }
+
snprintf(key, 256, "volume%d.opt_count", i);
ret = dict_get_int32(dict, key, &opt_count);
if (ret)
diff --git a/doc/gluster.8 b/doc/gluster.8
index 4f36c13..99a8d5e 100644
--- a/doc/gluster.8
+++ b/doc/gluster.8
@@ -41,7 +41,7 @@ List all volumes in cluster
\fB\ volume status [all | <VOLNAME> [nfs|shd|<BRICK>|quotad]] [detail|clients|mem|inode|fd|callpool|tasks|client-list] \fR
Display status of all or specified volume(s)/brick
.TP
-\fB\ volume create <NEW-VOLNAME> [stripe <COUNT>] [replica <COUNT>] [disperse [<COUNT>]] [redundancy <COUNT>] [transport <tcp|rdma|tcp,rdma>] <NEW-BRICK> ... \fR
+\fB\ volume create <NEW-VOLNAME> [stripe <COUNT>] [[replica <COUNT> [arbiter <COUNT>]]|[replica 2 thin-arbiter 1]] [disperse [<COUNT>]] [redundancy <COUNT>] [transport <tcp|rdma|tcp,rdma>] <NEW-BRICK> ... <TA-BRICK> \fR
Create a new volume of the specified type using the specified bricks and transport type (the default transport type is tcp).
To create a volume with both transports (tcp and rdma), give 'transport tcp,rdma' as an option.
.TP
diff --git a/heal/src/glfs-heal.c b/heal/src/glfs-heal.c
index ce6925a..3ebf79e 100644
--- a/heal/src/glfs-heal.c
+++ b/heal/src/glfs-heal.c
@@ -1144,7 +1144,8 @@ glfsh_gather_heal_info(glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc,
while (xl->next)
xl = xl->next;
while (xl) {
- if (strcmp(xl->type, "protocol/client") == 0) {
+ if (strcmp(xl->type, "protocol/client") == 0 &&
+ !strstr(xl->name, "-ta-")) {
heal_xl = _get_ancestor(xl, heal_op);
if (heal_xl) {
old_THIS = THIS;
diff --git a/tests/basic/glusterd/thin-arbiter-volume-probe.t b/tests/basic/glusterd/thin-arbiter-volume-probe.t
new file mode 100644
index 0000000..acc6943
--- /dev/null
+++ b/tests/basic/glusterd/thin-arbiter-volume-probe.t
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+
+#This tests if the thin-arbiter-count is transferred to the other peer.
+function check_peers {
+ $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+
+cleanup;
+
+TEST launch_cluster 2;
+TEST $CLI_1 peer probe $H2;
+
+EXPECT_WITHIN $PROBE_TIMEOUT 1 check_peers
+
+kill_glusterd 2
+$CLI_1 volume create $V0 replica 2 thin-arbiter 1 $H0:$B0/b{1..3}
+TEST $glusterd_2
+EXPECT_WITHIN $PROBE_TIMEOUT 1 check_peers
+EXPECT "1 x 2 = 2" volinfo_field_1 $V0 "Number of Bricks"
+EXPECT "1 x 2 = 2" volinfo_field_2 $V0 "Number of Bricks"
+
+cleanup;
diff --git a/tests/basic/glusterd/thin-arbiter-volume.t b/tests/basic/glusterd/thin-arbiter-volume.t
new file mode 100644
index 0000000..4e81389
--- /dev/null
+++ b/tests/basic/glusterd/thin-arbiter-volume.t
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../ volume.rc
+. $(dirname $0)/../../thin-arbiter.rc
+
+#This command tests the volume create command validation for thin-arbiter volumes.
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 2 thin-arbiter 1 $H0:$B0/b1 $H0:$B0/b2 $H0:$B0/b3
+EXPECT "1 x 2 = 2" volinfo_field $V0 "Number of Bricks"
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+TEST touch $M0/a.txt
+TEST ls $B0/b1/a.txt
+TEST ls $B0/b2/a.txt
+TEST ! ls $B0/b3/a.txt
+
+TEST umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+TEST $CLI volume create $V0 replica 2 thin-arbiter 1 $H0:$B0/b{4..8}
+EXPECT "2 x 2 = 4" volinfo_field $V0 "Number of Bricks"
+
+TEST $CLI volume delete $V0
+
+TEST rm -rf $B0/b{1..3}
+
+TEST $CLI volume create $V0 replica 2 thin-arbiter 1 $H0:$B0/b1 $H0:$B0/b2 $H0:$B0/b3
+EXPECT "1 x 2 = 2" volinfo_field $V0 "Number of Bricks"
+
+TEST killall -15 glusterd
+TEST glusterd
+TEST pidof glusterd
+EXPECT "1 x 2 = 2" volinfo_field $V0 "Number of Bricks"
+
+cleanup
+
diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
index 576cae7..a0bf409 100644
--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
+++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
@@ -357,6 +357,7 @@ glusterd_add_volume_detail_to_dict(glusterd_volinfo_t *volinfo, dict_t *volumes,
};
int keylen;
glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_brickinfo_t *ta_brickinfo = NULL;
char *buf = NULL;
int i = 1;
dict_t *dict = NULL;
@@ -368,6 +369,10 @@ glusterd_add_volume_detail_to_dict(glusterd_volinfo_t *volinfo, dict_t *volumes,
xlator_t *this = NULL;
int32_t len = 0;
+ char ta_brick[4096] = {
+ 0,
+ };
+
GF_ASSERT(volinfo);
GF_ASSERT(volumes);
@@ -431,6 +436,11 @@ glusterd_add_volume_detail_to_dict(glusterd_volinfo_t *volinfo, dict_t *volumes,
if (ret)
goto out;
+ keylen = snprintf(key, sizeof(key), "volume%d.thin_arbiter_count", count);
+ ret = dict_set_int32n(volumes, key, keylen, volinfo->thin_arbiter_count);
+ if (ret)
+ goto out;
+
volume_id_str = gf_strdup(uuid_utoa(volinfo->volume_id));
if (!volume_id_str)
goto out;
@@ -481,6 +491,23 @@ glusterd_add_volume_detail_to_dict(glusterd_volinfo_t *volinfo, dict_t *volumes,
i++;
}
+ if (volinfo->thin_arbiter_count == 1) {
+ ta_brickinfo = list_first_entry(&volinfo->ta_bricks,
+ glusterd_brickinfo_t, brick_list);
+ len = snprintf(ta_brick, sizeof(ta_brick), "%s:%s",
+ ta_brickinfo->hostname, ta_brickinfo->path);
+ if ((len < 0) || (len >= sizeof(ta_brick))) {
+ ret = -1;
+ goto out;
+ }
+ buf = gf_strdup(ta_brick);
+ keylen = snprintf(key, sizeof(key), "volume%d.thin_arbiter_brick",
+ count);
+ ret = dict_set_dynstrn(volumes, key, keylen, buf);
+ if (ret)
+ goto out;
+ }
+
ret = glusterd_add_arbiter_info_to_bricks(volinfo, volumes, count);
if (ret)
goto out;
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c
index fc0df11..311e7d3 100644
--- a/xlators/mgmt/glusterd/src/glusterd-store.c
+++ b/xlators/mgmt/glusterd/src/glusterd-store.c
@@ -266,7 +266,8 @@ out:
int32_t
glusterd_store_volinfo_brick_fname_write(int vol_fd,
glusterd_brickinfo_t *brickinfo,
- int32_t brick_count)
+ int32_t brick_count,
+ int is_thin_arbiter)
{
char key[64] = {
0,
@@ -276,8 +277,13 @@ glusterd_store_volinfo_brick_fname_write(int vol_fd,
};
int32_t ret = -1;
- snprintf(key, sizeof(key), "%s-%d", GLUSTERD_STORE_KEY_VOL_BRICK,
- brick_count);
+ if (!is_thin_arbiter) {
+ snprintf(key, sizeof(key), "%s-%d", GLUSTERD_STORE_KEY_VOL_BRICK,
+ brick_count);
+ } else {
+ snprintf(key, sizeof(key), "%s-%d", GLUSTERD_STORE_KEY_VOL_TA_BRICK,
+ brick_count);
+ }
glusterd_store_brickinfofname_set(brickinfo, brickfname,
sizeof(brickfname));
ret = gf_store_save_value(vol_fd, key, brickfname);
@@ -498,14 +504,14 @@ glusterd_store_perform_brick_store(glusterd_brickinfo_t *brickinfo)
ret = -1;
goto out;
}
-
ret = glusterd_store_brickinfo_write(fd, brickinfo);
if (ret)
goto out;
out:
- if (ret && (fd > 0))
+ if (ret && (fd > 0)) {
gf_store_unlink_tmppath(brickinfo->shandle);
+ }
gf_msg_debug(THIS->name, 0, "Returning %d", ret);
return ret;
}
@@ -553,15 +559,15 @@ out:
static int32_t
glusterd_store_brickinfo(glusterd_volinfo_t *volinfo,
glusterd_brickinfo_t *brickinfo, int32_t brick_count,
- int vol_fd)
+ int vol_fd, int is_thin_arbiter)
{
int32_t ret = -1;
GF_ASSERT(volinfo);
GF_ASSERT(brickinfo);
- ret = glusterd_store_volinfo_brick_fname_write(vol_fd, brickinfo,
- brick_count);
+ ret = glusterd_store_volinfo_brick_fname_write(
+ vol_fd, brickinfo, brick_count, is_thin_arbiter);
if (ret)
goto out;
@@ -988,6 +994,18 @@ glusterd_volume_exclude_options_write(int fd, glusterd_volinfo_t *volinfo)
total_len += ret;
}
+ if ((conf->op_version >= GD_OP_VERSION_7_0) &&
+ volinfo->thin_arbiter_count) {
+ ret = snprintf(buf + total_len, sizeof(buf) - total_len, "%s=%d\n",
+ GLUSTERD_STORE_KEY_VOL_THIN_ARBITER_CNT,
+ volinfo->thin_arbiter_count);
+ if (ret < 0 || ret >= sizeof(buf) - total_len) {
+ ret = -1;
+ goto out;
+ }
+ total_len += ret;
+ }
+
ret = gf_store_save_items(fd, buf);
if (ret)
goto out;
@@ -1320,17 +1338,29 @@ glusterd_store_brickinfos(glusterd_volinfo_t *volinfo, int vol_fd)
{
int32_t ret = 0;
glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_brickinfo_t *ta_brickinfo = NULL;
int32_t brick_count = 0;
+ int32_t ta_brick_count = 0;
GF_ASSERT(volinfo);
cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
{
- ret = glusterd_store_brickinfo(volinfo, brickinfo, brick_count, vol_fd);
+ ret = glusterd_store_brickinfo(volinfo, brickinfo, brick_count, vol_fd,
+ 0);
if (ret)
goto out;
brick_count++;
}
+ if (volinfo->thin_arbiter_count == 1) {
+ ta_brickinfo = list_first_entry(&volinfo->ta_bricks,
+ glusterd_brickinfo_t, brick_list);
+ ret = glusterd_store_brickinfo(volinfo, ta_brickinfo, ta_brick_count,
+ vol_fd, 1);
+ if (ret)
+ goto out;
+ }
+
out:
gf_msg_debug(THIS->name, 0, "Returning %d", ret);
return ret;
@@ -1507,6 +1537,7 @@ glusterd_store_brickinfos_atomic_update(glusterd_volinfo_t *volinfo)
{
int ret = -1;
glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_brickinfo_t *ta_brickinfo = NULL;
GF_ASSERT(volinfo);
@@ -1516,6 +1547,15 @@ glusterd_store_brickinfos_atomic_update(glusterd_volinfo_t *volinfo)
if (ret)
goto out;
}
+
+ if (volinfo->thin_arbiter_count == 1) {
+ ta_brickinfo = list_first_entry(&volinfo->ta_bricks,
+ glusterd_brickinfo_t, brick_list);
+ ret = gf_store_rename_tmppath(ta_brickinfo->shandle);
+ if (ret)
+ goto out;
+ }
+
out:
return ret;
}
@@ -1670,6 +1710,7 @@ glusterd_store_volinfo(glusterd_volinfo_t *volinfo,
unlock:
pthread_mutex_unlock(&volinfo->store_volinfo_lock);
pthread_mutex_unlock(&ctx->cleanup_lock);
+
if (ret)
glusterd_store_volume_cleanup_tmp(volinfo);
@@ -2435,6 +2476,7 @@ glusterd_store_retrieve_bricks(glusterd_volinfo_t *volinfo)
{
int32_t ret = 0;
glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_brickinfo_t *ta_brickinfo = NULL;
gf_store_iter_t *iter = NULL;
char *key = NULL;
char *value = NULL;
@@ -2446,6 +2488,7 @@ glusterd_store_retrieve_bricks(glusterd_volinfo_t *volinfo)
};
glusterd_conf_t *priv = NULL;
int32_t brick_count = 0;
+ int32_t ta_brick_count = 0;
char tmpkey[4096] = {
0,
};
@@ -2455,6 +2498,10 @@ glusterd_store_retrieve_bricks(glusterd_volinfo_t *volinfo)
struct pmap_registry *pmap = NULL;
xlator_t *this = NULL;
int brickid = 0;
+ /* ta_brick_id initialization with 2 since ta-brick id starts with
+ * volname-ta-2
+ */
+ int ta_brick_id = 2;
gf_store_op_errno_t op_errno = GD_STORE_SUCCESS;
int32_t len = 0;
@@ -2748,6 +2795,175 @@ glusterd_store_retrieve_bricks(glusterd_volinfo_t *volinfo)
brick_count++;
}
+ ret = gf_store_iter_new(volinfo->shandle, &tmpiter);
+
+ if (ret)
+ goto out;
+
+ if (volinfo->thin_arbiter_count == 1) {
+ while (ta_brick_count < volinfo->subvol_count) {
+ ret = glusterd_brickinfo_new(&ta_brickinfo);
+ if (ret)
+ goto out;
+
+ snprintf(tmpkey, sizeof(tmpkey), "%s-%d",
+ GLUSTERD_STORE_KEY_VOL_TA_BRICK, 0);
+
+ ret = gf_store_iter_get_matching(tmpiter, tmpkey, &tmpvalue);
+
+ len = snprintf(path, sizeof(path), "%s/%s", brickdir, tmpvalue);
+ if ((len < 0) || (len >= sizeof(path))) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = gf_store_handle_retrieve(path, &ta_brickinfo->shandle);
+
+ if (ret)
+ goto out;
+
+ ret = gf_store_iter_new(ta_brickinfo->shandle, &iter);
+
+ if (ret)
+ goto out;
+
+ ret = gf_store_iter_get_next(iter, &key, &value, &op_errno);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, op_errno,
+ GD_MSG_STORE_ITER_GET_FAIL,
+ "Unable to iterate "
+ "the store for brick: %s",
+ path);
+ goto out;
+ }
+
+ while (!ret) {
+ if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_HOSTNAME,
+ SLEN(GLUSTERD_STORE_KEY_BRICK_HOSTNAME))) {
+ if (snprintf(ta_brickinfo->hostname,
+ sizeof(ta_brickinfo->hostname), "%s",
+ value) >= sizeof(ta_brickinfo->hostname)) {
+ gf_msg("glusterd", GF_LOG_ERROR, op_errno,
+ GD_MSG_PARSE_BRICKINFO_FAIL,
+ "brick hostname truncated: %s",
+ ta_brickinfo->hostname);
+ goto out;
+ }
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_PATH,
+ SLEN(GLUSTERD_STORE_KEY_BRICK_PATH))) {
+ if (snprintf(ta_brickinfo->path, sizeof(ta_brickinfo->path),
+ "%s", value) >= sizeof(ta_brickinfo->path)) {
+ gf_msg("glusterd", GF_LOG_ERROR, op_errno,
+ GD_MSG_PARSE_BRICKINFO_FAIL,
+ "brick path truncated: %s", ta_brickinfo->path);
+ goto out;
+ }
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_REAL_PATH,
+ SLEN(GLUSTERD_STORE_KEY_BRICK_REAL_PATH))) {
+ if (snprintf(ta_brickinfo->real_path,
+ sizeof(ta_brickinfo->real_path), "%s",
+ value) >= sizeof(ta_brickinfo->real_path)) {
+ gf_msg("glusterd", GF_LOG_ERROR, op_errno,
+ GD_MSG_PARSE_BRICKINFO_FAIL,
+ "real_path truncated: %s",
+ ta_brickinfo->real_path);
+ goto out;
+ }
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_PORT,
+ SLEN(GLUSTERD_STORE_KEY_BRICK_PORT))) {
+ ret = gf_string2int(value, &ta_brickinfo->port);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INCOMPATIBLE_VALUE,
+ "Failed to convert "
+ "string to integer");
+ }
+
+ if (ta_brickinfo->port < priv->base_port) {
+ /* This is required to adhere to the
+ IANA standards */
+ ta_brickinfo->port = 0;
+ }
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_RDMA_PORT,
+ SLEN(GLUSTERD_STORE_KEY_BRICK_RDMA_PORT))) {
+ ret = gf_string2int(value, &ta_brickinfo->rdma_port);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INCOMPATIBLE_VALUE,
+ "Failed to convert "
+ "string to integer");
+ }
+
+ if (ta_brickinfo->rdma_port < priv->base_port) {
+ /* This is required to adhere to the
+ IANA standards */
+ ta_brickinfo->rdma_port = 0;
+ }
+ } else if (!strncmp(
+ key, GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED,
+ SLEN(GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED))) {
+ ret = gf_string2int(value, &ta_brickinfo->decommissioned);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INCOMPATIBLE_VALUE,
+ "Failed to convert "
+ "string to integer");
+ }
+
+ } else if (!strcmp(key, GLUSTERD_STORE_KEY_BRICK_ID)) {
+ if (snprintf(ta_brickinfo->brick_id,
+ sizeof(ta_brickinfo->brick_id), "%s",
+ value) >= sizeof(ta_brickinfo->brick_id)) {
+ gf_msg("glusterd", GF_LOG_ERROR, op_errno,
+ GD_MSG_PARSE_BRICKINFO_FAIL,
+ "brick_id truncated: %s",
+ ta_brickinfo->brick_id);
+ goto out;
+ }
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_FSID,
+ SLEN(GLUSTERD_STORE_KEY_BRICK_FSID))) {
+ ret = gf_string2uint64(value, &ta_brickinfo->statfs_fsid);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_INVALID_ENTRY,
+ "%s "
+ "is not a valid uint64_t value",
+ value);
+ }
+ } else if (!strcmp(key, GLUSTERD_STORE_KEY_BRICK_UUID)) {
+ gf_uuid_parse(value, brickinfo->uuid);
+ } else if (!strncmp(
+ key, GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS,
+ SLEN(GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS))) {
+ ret = gf_string2int(value, &ta_brickinfo->snap_status);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INCOMPATIBLE_VALUE,
+ "Failed to convert "
+ "string to integer");
+ }
+
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UNKNOWN_KEY,
+ "Unknown key: %s", key);
+ }
+
+ GF_FREE(key);
+ GF_FREE(value);
+ key = NULL;
+ value = NULL;
+ ret = gf_store_iter_get_next(iter, &key, &value, &op_errno);
+ }
+
+ GLUSTERD_ASSIGN_BRICKID_TO_TA_BRICKINFO(ta_brickinfo, volinfo,
+ ta_brick_id);
+ ta_brick_id += 3;
+
+ cds_list_add_tail(&ta_brickinfo->brick_list, &volinfo->ta_bricks);
+ ta_brick_count++;
+ }
+ }
+
assign_brick_groups(volinfo);
ret = 0;
@@ -2994,6 +3210,8 @@ glusterd_store_update_volinfo(glusterd_volinfo_t *volinfo)
volinfo->replica_count = atoi(value);
} else if (!strcmp(key, GLUSTERD_STORE_KEY_VOL_ARBITER_CNT)) {
volinfo->arbiter_count = atoi(value);
+ } else if (!strcmp(key, GLUSTERD_STORE_KEY_VOL_THIN_ARBITER_CNT)) {
+ volinfo->thin_arbiter_count = atoi(value);
} else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DISPERSE_CNT,
SLEN(GLUSTERD_STORE_KEY_VOL_DISPERSE_CNT))) {
volinfo->disperse_count = atoi(value);
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h
index 59aee88..45aba64 100644
--- a/xlators/mgmt/glusterd/src/glusterd-store.h
+++ b/xlators/mgmt/glusterd/src/glusterd-store.h
@@ -42,7 +42,9 @@ typedef enum glusterd_store_ver_ac_ {
#define GLUSTERD_STORE_KEY_VOL_DISPERSE_CNT "disperse_count"
#define GLUSTERD_STORE_KEY_VOL_REDUNDANCY_CNT "redundancy_count"
#define GLUSTERD_STORE_KEY_VOL_ARBITER_CNT "arbiter_count"
+#define GLUSTERD_STORE_KEY_VOL_THIN_ARBITER_CNT "thin_arbiter_count"
#define GLUSTERD_STORE_KEY_VOL_BRICK "brick"
+#define GLUSTERD_STORE_KEY_VOL_TA_BRICK "ta-brick"
#define GLUSTERD_STORE_KEY_VOL_VERSION "version"
#define GLUSTERD_STORE_KEY_VOL_TRANSPORT "transport-type"
#define GLUSTERD_STORE_KEY_VOL_ID "volume-id"
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index 80fb829..45cdf8a 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -639,6 +639,7 @@ glusterd_volinfo_new(glusterd_volinfo_t **volinfo)
CDS_INIT_LIST_HEAD(&new_volinfo->vol_list);
CDS_INIT_LIST_HEAD(&new_volinfo->snapvol_list);
CDS_INIT_LIST_HEAD(&new_volinfo->bricks);
+ CDS_INIT_LIST_HEAD(&new_volinfo->ta_bricks);
CDS_INIT_LIST_HEAD(&new_volinfo->snap_volumes);
new_volinfo->dict = dict_new();
@@ -1526,6 +1527,37 @@ out:
}
int32_t
+glusterd_volume_ta_brickinfo_get(uuid_t uuid, char *hostname, char *path,
+ glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t **ta_brickinfo)
+{
+ glusterd_brickinfo_t *ta_brickiter = NULL;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ ret = -1;
+
+ cds_list_for_each_entry(ta_brickiter, &volinfo->ta_bricks, brick_list)
+ {
+ if (strcmp(ta_brickiter->path, path) == 0 &&
+ strcmp(ta_brickiter->hostname, hostname) == 0) {
+ gf_msg_debug(this->name, 0, LOGSTR_FOUND_BRICK,
+ ta_brickiter->hostname, ta_brickiter->path,
+ volinfo->volname);
+ ret = 0;
+ if (ta_brickinfo)
+ *ta_brickinfo = ta_brickiter;
+ break;
+ }
+ }
+
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
glusterd_volume_brickinfo_get_by_brick(char *brick, glusterd_volinfo_t *volinfo,
glusterd_brickinfo_t **brickinfo,
gf_boolean_t construct_real_path)
@@ -2831,6 +2863,7 @@ glusterd_add_volume_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict,
char key[64] = "";
int keylen;
glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_brickinfo_t *ta_brickinfo = NULL;
int32_t i = 1;
char *volume_id_str = NULL;
char *str = NULL;
@@ -2881,6 +2914,11 @@ glusterd_add_volume_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict,
if (ret)
goto out;
+ keylen = snprintf(key, sizeof(key), "%s.subvol_count", pfx);
+ ret = dict_set_int32n(dict, key, keylen, volinfo->subvol_count);
+ if (ret)
+ goto out;
+
keylen = snprintf(key, sizeof(key), "%s.stripe_count", pfx);
ret = dict_set_int32n(dict, key, keylen, volinfo->stripe_count);
if (ret)
@@ -2896,6 +2934,11 @@ glusterd_add_volume_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict,
if (ret)
goto out;
+ keylen = snprintf(key, sizeof(key), "%s.thin_arbiter_count", pfx);
+ ret = dict_set_int32n(dict, key, keylen, volinfo->thin_arbiter_count);
+ if (ret)
+ goto out;
+
keylen = snprintf(key, sizeof(key), "%s.disperse_count", pfx);
ret = dict_set_int32n(dict, key, keylen, volinfo->disperse_count);
if (ret)
@@ -3058,6 +3101,44 @@ glusterd_add_volume_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict,
i++;
}
+ i = 1;
+ if (volinfo->thin_arbiter_count == 1) {
+ cds_list_for_each_entry(ta_brickinfo, &volinfo->ta_bricks, brick_list)
+ {
+ keylen = snprintf(key, sizeof(key), "%s.ta-brick%d.hostname", pfx,
+ i);
+ ret = dict_set_strn(dict, key, keylen, ta_brickinfo->hostname);
+ if (ret)
+ goto out;
+
+ keylen = snprintf(key, sizeof(key), "%s.ta-brick%d.path", pfx, i);
+ ret = dict_set_strn(dict, key, keylen, ta_brickinfo->path);
+ if (ret)
+ goto out;
+
+ keylen = snprintf(key, sizeof(key), "%s.ta-brick%d.decommissioned",
+ pfx, i);
+ ret = dict_set_int32n(dict, key, keylen,
+ ta_brickinfo->decommissioned);
+ if (ret)
+ goto out;
+
+ keylen = snprintf(key, sizeof(key), "%s.ta-brick%d.brick_id", pfx,
+ i);
+ ret = dict_set_strn(dict, key, keylen, ta_brickinfo->brick_id);
+ if (ret)
+ goto out;
+
+ snprintf(key, sizeof(key), "%s.ta-brick%d.uuid", pfx, i);
+ ret = dict_set_dynstr_with_alloc(dict, key,
+ uuid_utoa(ta_brickinfo->uuid));
+ if (ret)
+ goto out;
+
+ i++;
+ }
+ }
+
/* Add volume op-versions to dict. This prevents volume inconsistencies
* in the cluster
*/
@@ -3746,6 +3827,100 @@ out:
return ret;
}
+static int32_t
+glusterd_import_new_ta_brick(dict_t *peer_data, int32_t vol_count,
+ int32_t brick_count,
+ glusterd_brickinfo_t **ta_brickinfo, char *prefix)
+{
+ char key[128];
+ char key_prefix[64];
+ int keylen;
+ int ret = -1;
+ char *hostname = NULL;
+ char *path = NULL;
+ char *brick_id = NULL;
+ int decommissioned = 0;
+ glusterd_brickinfo_t *new_ta_brickinfo = NULL;
+ char msg[256] = "";
+ char *brick_uuid_str = NULL;
+
+ GF_ASSERT(peer_data);
+ GF_ASSERT(vol_count >= 0);
+ GF_ASSERT(ta_brickinfo);
+ GF_ASSERT(prefix);
+
+ ret = snprintf(key_prefix, sizeof(key_prefix), "%s%d.ta-brick%d", prefix,
+ vol_count, brick_count);
+
+ if (ret < 0 || ret >= sizeof(key_prefix)) {
+ ret = -1;
+ snprintf(msg, sizeof(msg), "key_prefix too long");
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "%s.hostname", key_prefix);
+ ret = dict_get_strn(peer_data, key, keylen, &hostname);
+ if (ret) {
+ snprintf(msg, sizeof(msg), "%s missing in payload", key);
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "%s.path", key_prefix);
+ ret = dict_get_strn(peer_data, key, keylen, &path);
+ if (ret) {
+ snprintf(msg, sizeof(msg), "%s missing in payload", key);
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "%s.brick_id", key_prefix);
+ ret = dict_get_strn(peer_data, key, keylen, &brick_id);
+
+ keylen = snprintf(key, sizeof(key), "%s.decommissioned", key_prefix);
+ ret = dict_get_int32n(peer_data, key, keylen, &decommissioned);
+ if (ret) {
+ /* For backward compatibility */
+ ret = 0;
+ }
+
+ ret = glusterd_brickinfo_new(&new_ta_brickinfo);
+ if (ret)
+ goto out;
+
+ ret = snprintf(new_ta_brickinfo->path, sizeof(new_ta_brickinfo->path), "%s",
+ path);
+ if (ret < 0 || ret >= sizeof(new_ta_brickinfo->path)) {
+ ret = -1;
+ goto out;
+ }
+ ret = snprintf(new_ta_brickinfo->hostname,
+ sizeof(new_ta_brickinfo->hostname), "%s", hostname);
+ if (ret < 0 || ret >= sizeof(new_ta_brickinfo->hostname)) {
+ ret = -1;
+ goto out;
+ }
+ new_ta_brickinfo->decommissioned = decommissioned;
+ if (brick_id)
+ (void)snprintf(new_ta_brickinfo->brick_id,
+ sizeof(new_ta_brickinfo->brick_id), "%s", brick_id);
+ keylen = snprintf(key, sizeof(key), "%s.uuid", key_prefix);
+ ret = dict_get_strn(peer_data, key, keylen, &brick_uuid_str);
+ if (ret)
+ goto out;
+ gf_uuid_parse(brick_uuid_str, new_ta_brickinfo->uuid);
+
+ *ta_brickinfo = new_ta_brickinfo;
+
+out:
+ if (msg[0]) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_BRICK_IMPORT_FAIL, "%s",
+ msg);
+ gf_event(EVENT_IMPORT_BRICK_FAILED, "peer=%s;ta-brick=%s",
+ new_ta_brickinfo->hostname, new_ta_brickinfo->path);
+ }
+ gf_msg_debug("glusterd", 0, "Returning with %d", ret);
+ return ret;
+}
+
/* The prefix represents the type of volume to be added.
* It will be "volume" for normal volumes, and snap# like
* snap1, snap2, for snapshot volumes
@@ -3857,8 +4032,10 @@ glusterd_import_bricks(dict_t *peer_data, int32_t vol_count,
{
int ret = -1;
int brick_count = 1;
+ int ta_brick_count = 1;
int brickid = 0;
glusterd_brickinfo_t *new_brickinfo = NULL;
+ glusterd_brickinfo_t *new_ta_brickinfo = NULL;
GF_ASSERT(peer_data);
GF_ASSERT(vol_count >= 0);
@@ -3877,6 +4054,19 @@ glusterd_import_bricks(dict_t *peer_data, int32_t vol_count,
cds_list_add_tail(&new_brickinfo->brick_list, &new_volinfo->bricks);
brick_count++;
}
+
+ if (new_volinfo->thin_arbiter_count == 1) {
+ while (ta_brick_count <= new_volinfo->subvol_count) {
+ ret = glusterd_import_new_ta_brick(peer_data, vol_count,
+ ta_brick_count,
+ &new_ta_brickinfo, prefix);
+ if (ret)
+ goto out;
+ cds_list_add_tail(&new_ta_brickinfo->brick_list,
+ &new_volinfo->ta_bricks);
+ ta_brick_count++;
+ }
+ }
ret = 0;
out:
gf_msg_debug("glusterd", 0, "Returning with %d", ret);
@@ -4155,6 +4345,14 @@ glusterd_import_volinfo(dict_t *peer_data, int count,
goto out;
}
+ keylen = snprintf(key, sizeof(key), "%s.subvol_count", key_prefix);
+ ret = dict_get_int32n(peer_data, key, keylen, &new_volinfo->subvol_count);
+ if (ret) {
+ snprintf(msg, sizeof(msg), "%s missing in payload for %s", key,
+ volname);
+ goto out;
+ }
+
/* not having a 'stripe_count' key is not a error
(as peer may be of old version) */
keylen = snprintf(key, sizeof(key), "%s.stripe_count", key_prefix);
@@ -4179,6 +4377,15 @@ glusterd_import_volinfo(dict_t *peer_data, int count,
gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_DICT_GET_FAILED,
"peer is possibly old version");
+ /* not having a 'thin_arbiter_count' key is not a error
+ (as peer may be of old version) */
+ keylen = snprintf(key, sizeof(key), "%s.thin_arbiter_count", key_prefix);
+ ret = dict_get_int32n(peer_data, key, keylen,
+ &new_volinfo->thin_arbiter_count);
+ if (ret)
+ gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_DICT_GET_FAILED,
+ "peer is possibly old version");
+
/* not having a 'disperse_count' key is not a error
(as peer may be of old version) */
keylen = snprintf(key, sizeof(key), "%s.disperse_count", key_prefix);
@@ -4392,6 +4599,8 @@ glusterd_volinfo_copy_brickinfo(glusterd_volinfo_t *old_volinfo,
{
glusterd_brickinfo_t *new_brickinfo = NULL;
glusterd_brickinfo_t *old_brickinfo = NULL;
+ glusterd_brickinfo_t *new_ta_brickinfo = NULL;
+ glusterd_brickinfo_t *old_ta_brickinfo = NULL;
glusterd_conf_t *priv = NULL;
int ret = 0;
xlator_t *this = NULL;
@@ -4440,6 +4649,46 @@ glusterd_volinfo_copy_brickinfo(glusterd_volinfo_t *old_volinfo,
}
}
}
+ if (new_volinfo->thin_arbiter_count == 1) {
+ cds_list_for_each_entry(new_ta_brickinfo, &new_volinfo->ta_bricks,
+ brick_list)
+ {
+ ret = glusterd_volume_ta_brickinfo_get(
+ new_ta_brickinfo->uuid, new_ta_brickinfo->hostname,
+ new_ta_brickinfo->path, old_volinfo, &old_ta_brickinfo);
+ if (ret == 0) {
+ new_ta_brickinfo->port = old_ta_brickinfo->port;
+
+ if (old_ta_brickinfo->real_path[0] == '\0') {
+ if (!realpath(new_ta_brickinfo->path, abspath)) {
+ /* Here an ENOENT should also be a
+ * failure as the brick is expected to
+ * be in existence
+ */
+ gf_msg(this->name, GF_LOG_CRITICAL, errno,
+ GD_MSG_BRICKINFO_CREATE_FAIL,
+ "realpath () failed for brick "
+ "%s. The underlying filesystem "
+ "may be in bad state",
+ new_brickinfo->path);
+ ret = -1;
+ goto out;
+ }
+ if (strlen(abspath) >=
+ sizeof(new_ta_brickinfo->real_path)) {
+ ret = -1;
+ goto out;
+ }
+ (void)strncpy(new_ta_brickinfo->real_path, abspath,
+ sizeof(new_ta_brickinfo->real_path));
+ } else {
+ (void)strncpy(new_ta_brickinfo->real_path,
+ old_ta_brickinfo->real_path,
+ sizeof(new_ta_brickinfo->real_path));
+ }
+ }
+ }
+ }
ret = 0;
out:
@@ -4608,8 +4857,8 @@ gd_check_and_update_rebalance_info(glusterd_volinfo_t *old_volinfo,
new->rebalance_time = old->rebalance_time;
/* glusterd_rebalance_t.{op, id, defrag_cmd} are copied during volume
- * import
- * a new defrag object should come to life with rebalance being restarted
+ * import a new defrag object should come to life with rebalance being
+ * restarted
*/
out:
return ret;
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
index c506da3..2312d42 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
@@ -32,6 +32,13 @@
brickid); \
} while (0)
+#define GLUSTERD_ASSIGN_BRICKID_TO_TA_BRICKINFO(ta_brickinfo, volinfo, \
+ brickid) \
+ do { \
+ sprintf(ta_brickinfo->brick_id, "%s-ta-%d", volinfo->volname, \
+ brickid); \
+ } while (0)
+
#define ALL_VOLUME_OPTION_CHECK(volname, get_opt, key, ret, op_errstr, label) \
do { \
gf_boolean_t _all = !strcmp("all", volname); \
diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
index 479ae77..8243548 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
@@ -566,7 +566,13 @@ no_filter_option_handler(volgen_graph_t *graph, struct volopt_map_entry *vme,
for (trav = first_of(graph); trav; trav = trav->next) {
if (strcmp(trav->type, vme->voltype) != 0)
continue;
-
+ if (strcmp(vme->option, "ta-remote-port") == 0) {
+ if (strstr(trav->name, "-ta-") != NULL) {
+ ret = xlator_set_option(trav, "remote-port",
+ strlen(vme->option), vme->value);
+ }
+ continue;
+ }
ret = xlator_set_option(trav, vme->option, strlen(vme->option),
vme->value);
if (ret)
@@ -3185,7 +3191,10 @@ volgen_graph_build_clients(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
0,
};
glusterd_brickinfo_t *brick = NULL;
+ glusterd_brickinfo_t *ta_brick = NULL;
xlator_t *xl = NULL;
+ int subvol_index = 0;
+ int thin_arbiter_index = 0;
if (volinfo->brick_count == 0) {
gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLUME_INCONSISTENCY,
@@ -3212,6 +3221,30 @@ volgen_graph_build_clients(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
i = 0;
cds_list_for_each_entry(brick, &volinfo->bricks, brick_list)
{
+ /* insert ta client xlator entry.
+ * eg - If subvol count is > 1, then after every two client xlator
+ * entries there should be a ta client xlator entry in the volfile. ta
+ * client xlator indexes are - 2, 5, 8 etc depending on the index of
+ * subvol.
+ */
+ if (volinfo->thin_arbiter_count &&
+ (i + 1) % (volinfo->replica_count + 1) == 0) {
+ thin_arbiter_index = 0;
+ cds_list_for_each_entry(ta_brick, &volinfo->ta_bricks, brick_list)
+ {
+ if (thin_arbiter_index == subvol_index) {
+ xl = volgen_graph_build_client(
+ graph, volinfo, ta_brick->hostname, NULL,
+ ta_brick->path, ta_brick->brick_id, transt, set_dict);
+ if (!xl) {
+ ret = -1;
+ goto out;
+ }
+ }
+ thin_arbiter_index++;
+ }
+ subvol_index++;
+ }
xl = volgen_graph_build_client(graph, volinfo, brick->hostname, NULL,
brick->path, brick->brick_id, transt,
set_dict);
@@ -3223,6 +3256,28 @@ volgen_graph_build_clients(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
i++;
}
+ /* Add ta client xlator entry for last subvol
+ * Above loop will miss out on making the ta client
+ * xlator entry for the last subvolume in the volfile
+ */
+ if (volinfo->thin_arbiter_count) {
+ thin_arbiter_index = 0;
+ cds_list_for_each_entry(ta_brick, &volinfo->ta_bricks, brick_list)
+ {
+ if (thin_arbiter_index == subvol_index) {
+ xl = volgen_graph_build_client(
+ graph, volinfo, ta_brick->hostname, NULL, ta_brick->path,
+ ta_brick->brick_id, transt, set_dict);
+ if (!xl) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ thin_arbiter_index++;
+ }
+ }
+
if (i != volinfo->brick_count) {
gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLUME_INCONSISTENCY,
"volume inconsistency: actual number of bricks (%d) "
@@ -3599,12 +3654,15 @@ set_afr_pending_xattrs_option(volgen_graph_t *graph,
xlator_t *this = NULL;
glusterd_conf_t *conf = NULL;
glusterd_brickinfo_t *brick = NULL;
+ glusterd_brickinfo_t *ta_brick = NULL;
char *ptr = NULL;
int i = 0;
int index = -1;
int ret = 0;
char *afr_xattrs_list = NULL;
int list_size = -1;
+ int ta_brick_index = 0;
+ int subvol_index = 0;
this = THIS;
GF_VALIDATE_OR_GOTO("glusterd", this, out);
@@ -3643,6 +3701,26 @@ set_afr_pending_xattrs_option(volgen_graph_t *graph,
break;
strncat(ptr, brick->brick_id, strlen(brick->brick_id));
if (i == volinfo->replica_count) {
+ /* add ta client xlator in afr-pending-xattrs before making entries
+ * for client xlators in volfile.
+ * ta client xlator indexes are - 2, 5, 8 depending on the index of
+ * subvol. e.g- For first subvol ta client xlator id is volname-ta-2
+ */
+ ta_brick_index = 0;
+ if (volinfo->thin_arbiter_count == 1) {
+ ptr[strlen(brick->brick_id)] = ',';
+ cds_list_for_each_entry(ta_brick, &volinfo->ta_bricks,
+ brick_list)
+ {
+ if (ta_brick_index == subvol_index) {
+ break;
+ }
+ ta_brick_index++;
+ }
+
+ strncat(ptr, ta_brick->brick_id, strlen(ta_brick->brick_id));
+ }
+
ret = xlator_set_fixed_option(afr_xlators_list[index++],
"afr-pending-xattr", afr_xattrs_list);
if (ret)
@@ -3650,6 +3728,7 @@ set_afr_pending_xattrs_option(volgen_graph_t *graph,
memset(afr_xattrs_list, 0, list_size);
ptr = afr_xattrs_list;
i = 1;
+ subvol_index++;
continue;
}
ptr[strlen(brick->brick_id)] = ',';
@@ -3674,6 +3753,13 @@ volgen_graph_build_afr_clusters(volgen_graph_t *graph,
char *replicate_name = "%s-replicate-%d";
xlator_t *afr = NULL;
char option[32] = {0};
+ glusterd_brickinfo_t *ta_brick = NULL;
+ int ta_brick_index = 0;
+ int ta_replica_offset = 0;
+ int ta_brick_offset = 0;
+ char ta_option[4096] = {
+ 0,
+ };
if (glusterd_volinfo_get_boolean(volinfo, "cluster.jbr") > 0) {
replicate_type = "experimental/jbrc";
@@ -3681,9 +3767,20 @@ volgen_graph_build_afr_clusters(volgen_graph_t *graph,
replicate_type = "cluster/replicate";
}
+ /* In thin-arbiter case brick count and replica count remain same
+ * but due to additional entries of ta client xlators in the volfile,
+ * GD1 is manipulated to include these client xlators while linking them to
+ * afr/cluster entry in the volfile.
+ */
+ if (volinfo->thin_arbiter_count == 1) {
+ ta_replica_offset = 1;
+ ta_brick_offset = volinfo->subvol_count;
+ }
+
clusters = volgen_link_bricks_from_list_tail(
- graph, volinfo, replicate_type, replicate_name, volinfo->brick_count,
- volinfo->replica_count);
+ graph, volinfo, replicate_type, replicate_name,
+ volinfo->brick_count + ta_brick_offset,
+ volinfo->replica_count + ta_replica_offset);
if (clusters < 0)
goto out;
@@ -3693,18 +3790,43 @@ volgen_graph_build_afr_clusters(volgen_graph_t *graph,
clusters = -1;
goto out;
}
- if (!volinfo->arbiter_count)
+ if (!volinfo->arbiter_count && !volinfo->thin_arbiter_count)
goto out;
afr = first_of(graph);
- sprintf(option, "%d", volinfo->arbiter_count);
- for (i = 0; i < clusters; i++) {
- ret = xlator_set_fixed_option(afr, "arbiter-count", option);
- if (ret) {
- clusters = -1;
- goto out;
+
+ if (volinfo->arbiter_count) {
+ sprintf(option, "%d", volinfo->arbiter_count);
+ for (i = 0; i < clusters; i++) {
+ ret = xlator_set_fixed_option(afr, "arbiter-count", option);
+ if (ret) {
+ clusters = -1;
+ goto out;
+ }
+
+ afr = afr->next;
+ }
+ }
+
+ if (volinfo->thin_arbiter_count == 1) {
+ for (i = 0; i < clusters; i++) {
+ ta_brick_index = 0;
+ cds_list_for_each_entry(ta_brick, &volinfo->ta_bricks, brick_list)
+ {
+ if (ta_brick_index == i) {
+ break;
+ }
+ ta_brick_index++;
+ }
+ snprintf(ta_option, sizeof(ta_option), "%s:%s", ta_brick->hostname,
+ ta_brick->path);
+ ret = xlator_set_fixed_option(afr, "thin-arbiter", ta_option);
+ if (ret) {
+ clusters = -1;
+ goto out;
+ }
+ afr = afr->next;
}
- afr = afr->next;
}
out:
return clusters;
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
index 7eb74d7..4624fe1 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
@@ -276,6 +276,7 @@ __glusterd_handle_create_volume(rpcsvc_request_t *req)
char *bricks = NULL;
char *volname = NULL;
int brick_count = 0;
+ int thin_arbiter_count = 0;
void *cli_rsp = NULL;
char err_str[2048] = {
0,
@@ -435,6 +436,21 @@ __glusterd_handle_create_volume(rpcsvc_request_t *req)
goto out;
}
+ ret = dict_get_int32n(dict, "thin-arbiter-count",
+ SLEN("thin-arbiter-count"), &thin_arbiter_count);
+ if (thin_arbiter_count && conf->op_version < GD_OP_VERSION_7_0) {
+ snprintf(err_str, sizeof(err_str),
+ "Cannot execute command. "
+ "The cluster is operating at version %d. "
+ "Thin-arbiter volume creation is unavailable in "
+ "this version",
+ conf->op_version);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLUSTERD_OP_FAILED, "%s",
+ err_str);
+ ret = -1;
+ goto out;
+ }
+
if (!dict_getn(dict, "force", SLEN("force"))) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
"Failed to get 'force' flag");
@@ -2028,14 +2044,20 @@ glusterd_op_create_volume(dict_t *dict, char **op_errstr)
glusterd_volinfo_t *volinfo = NULL;
gf_boolean_t vol_added = _gf_false;
glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_brickinfo_t *ta_brickinfo = NULL;
xlator_t *this = NULL;
char *brick = NULL;
+ char *ta_brick = NULL;
int32_t count = 0;
int32_t i = 1;
char *bricks = NULL;
+ char *ta_bricks = NULL;
char *brick_list = NULL;
+ char *ta_brick_list = NULL;
char *free_ptr = NULL;
+ char *ta_free_ptr = NULL;
char *saveptr = NULL;
+ char *ta_saveptr = NULL;
char *trans_type = NULL;
char *str = NULL;
char *username = NULL;
@@ -2153,6 +2175,20 @@ glusterd_op_create_volume(dict_t *dict, char **op_errstr)
/* coverity[unused_value] arbiter count is optional */
ret = dict_get_int32n(dict, "arbiter-count", SLEN("arbiter-count"),
&volinfo->arbiter_count);
+ ret = dict_get_int32n(dict, "thin-arbiter-count",
+ SLEN("thin-arbiter-count"),
+ &volinfo->thin_arbiter_count);
+ if (volinfo->thin_arbiter_count) {
+ ret = dict_get_strn(dict, "ta-brick", SLEN("ta-brick"), &ta_bricks);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get thin arbiter brick for "
+ "volume %s",
+ volname);
+ goto out;
+ }
+ }
+
} else if (GF_CLUSTER_TYPE_DISPERSE == volinfo->type) {
ret = dict_get_int32n(dict, "disperse-count", SLEN("disperse-count"),
&volinfo->disperse_count);
@@ -2241,6 +2277,38 @@ glusterd_op_create_volume(dict_t *dict, char **op_errstr)
volinfo->transport_type = GF_TRANSPORT_BOTH_TCP_RDMA;
}
+ if (ta_bricks) {
+ ta_brick_list = gf_strdup(ta_bricks);
+ ta_free_ptr = ta_brick_list;
+ }
+
+ if (volinfo->thin_arbiter_count) {
+ ta_brick = strtok_r(ta_brick_list + 1, " \n", &ta_saveptr);
+
+ count = 1;
+ brickid = volinfo->replica_count;
+ /* assign brickid to ta_bricks
+ * Following loop runs for number of subvols times. Although
+ * there is only one ta-brick for a volume but the volume fuse volfile
+ * requires an entry of ta-brick for each subvolume. Also, the ta-brick
+ * id needs to be adjusted according to the subvol count.
+ * For eg- For first subvolume ta-brick id is volname-ta-2, for second
+ * subvol ta-brick id is volname-ta-5.
+ */
+ while (count <= volinfo->subvol_count) {
+ ret = glusterd_brickinfo_new_from_brick(ta_brick, &ta_brickinfo,
+ _gf_false, op_errstr);
+ if (ret)
+ goto out;
+
+ GLUSTERD_ASSIGN_BRICKID_TO_TA_BRICKINFO(ta_brickinfo, volinfo,
+ brickid);
+ cds_list_add_tail(&ta_brickinfo->brick_list, &volinfo->ta_bricks);
+ count++;
+ brickid += volinfo->replica_count + 1;
+ }
+ }
+
if (bricks) {
brick_list = gf_strdup(bricks);
free_ptr = brick_list;
@@ -2259,7 +2327,10 @@ glusterd_op_create_volume(dict_t *dict, char **op_errstr)
op_errstr);
if (ret)
goto out;
-
+ if (volinfo->thin_arbiter_count == 1 &&
+ (brickid + 1) % (volinfo->replica_count + 1) == 0) {
+ brickid = brickid + 1;
+ }
GLUSTERD_ASSIGN_BRICKID_TO_BRICKINFO(brickinfo, volinfo, brickid++);
ret = glusterd_resolve_brick(brickinfo);
@@ -2350,6 +2421,7 @@ glusterd_op_create_volume(dict_t *dict, char **op_errstr)
out:
GF_FREE(free_ptr);
+ GF_FREE(ta_free_ptr);
if (!vol_added && volinfo)
glusterd_volinfo_unref(volinfo);
return ret;
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index dba8fbe..b943f66 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -1479,6 +1479,14 @@ struct volopt_map_entry glusterd_volopt_map[] = {
.value = "9",
.flags = VOLOPT_FLAG_CLIENT_OPT},
+ /* Although the following option is named ta-remote-port but it will be
+ * added as remote-port in client volfile for ta-bricks only.
+ */
+ {.key = "client.ta-brick-port",
+ .voltype = "protocol/client",
+ .option = "ta-remote-port",
+ .op_version = GD_OP_VERSION_7_0},
+
/* Server xlator options */
{.key = "network.tcp-window-size",
.voltype = "protocol/server",
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
index 575f8c5..f63f4c1 100644
--- a/xlators/mgmt/glusterd/src/glusterd.h
+++ b/xlators/mgmt/glusterd/src/glusterd.h
@@ -440,6 +440,7 @@ struct glusterd_volinfo_ {
/* This is a current pointer for
glusterd_volinfo_t->snap_volumes */
struct cds_list_head bricks;
+ struct cds_list_head ta_bricks;
struct cds_list_head snap_volumes;
/* TODO : Need to remove this, as this
* is already part of snapshot object.
@@ -449,6 +450,7 @@ struct glusterd_volinfo_ {
int stripe_count;
int replica_count;
int arbiter_count;
+ int thin_arbiter_count;
int disperse_count;
int redundancy_count;
int subvol_count; /* Number of subvolumes in a