summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJeff Darcy <jdarcy@redhat.com>2015-12-14 16:25:22 -0500
committerJeff Darcy <jdarcy@redhat.com>2016-02-08 11:40:53 -0800
commitfac8038a9fa80e16b557d91b9e2fec271cfda5fa (patch)
tree71a3cd9aa8da5b45e7cd1df364f393a044ed5dcc
parentb28a1d8e54d70deca14efa49da1781e29ce8eb51 (diff)
NSR: Volgen Support
Allows the user to convert an afr-volume to a nsr-volume by using cluster.nsr option in the volume set command gluster volume set <volname> cluster.nsr <on/off> Change-Id: Ia1c5aa89d27535f7275d474cf312dc5efb8e222f BUG: 1158654 Signed-off-by: Jeff Darcy <jdarcy@redhat.com> Reviewed-on: http://review.gluster.org/12943 Smoke: Gluster Build System <jenkins@build.gluster.com> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> Reviewed-by: Avra Sengupta <asengupt@redhat.com> CentOS-regression: Gluster Build System <jenkins@build.gluster.com>
-rw-r--r--libglusterfs/src/globals.h4
-rw-r--r--tests/basic/nsr/nsr-volgen.t37
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.c151
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-set.c13
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h2
-rw-r--r--xlators/protocol/server/src/server-rpc-fops.c8
6 files changed, 203 insertions, 12 deletions
diff --git a/libglusterfs/src/globals.h b/libglusterfs/src/globals.h
index 3a4ff44a335..ae05a3abd15 100644
--- a/libglusterfs/src/globals.h
+++ b/libglusterfs/src/globals.h
@@ -38,7 +38,7 @@
*/
#define GD_OP_VERSION_MIN 1 /* MIN is the fresh start op-version, mostly
should not change */
-#define GD_OP_VERSION_MAX GD_OP_VERSION_3_7_7 /* MAX VERSION is the maximum
+#define GD_OP_VERSION_MAX GD_OP_VERSION_4_0_0 /* MAX VERSION is the maximum
count in VME table, should
keep changing with
introduction of newer
@@ -62,6 +62,8 @@
#define GD_OP_VERSION_3_7_7 30707 /* Op-version for GlusterFS 3.7.7 */
+#define GD_OP_VERSION_4_0_0 40000 /* Op-version for GlusterFS 4.0.0 */
+
#define GD_OP_VER_PERSISTENT_AFR_XATTRS GD_OP_VERSION_3_6_0
#include "xlator.h"
diff --git a/tests/basic/nsr/nsr-volgen.t b/tests/basic/nsr/nsr-volgen.t
new file mode 100644
index 00000000000..99563ef608a
--- /dev/null
+++ b/tests/basic/nsr/nsr-volgen.t
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+volfiles=${GLUSTERD_WORKDIR}/vols/${V0}/
+check_brick_volfiles () {
+ for vf in ${volfiles}${V0}.$(hostname).*.vol; do
+ grep -qs experimental/nsr $vf || return
+ # At least for now, nothing else would put a client translator
+ # in a brick volfile.
+ grep -qs protocol/client $vf || return
+ done
+ echo "OK"
+}
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2}
+TEST $CLI volume set $V0 cluster.nsr on
+
+# Check that the client volfile got modified properly.
+TEST grep -qs experimental/nsrc ${volfiles}${V0}.tcp-fuse.vol
+
+# Check that the brick volfiles got modified as well.
+EXPECT "OK" check_brick_volfiles
+
+# Put things back and make sure the "undo" worked.
+TEST $CLI volume set $V0 cluster.nsr off
+TEST $CLI volume start $V0
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+echo hello > $M0/probe
+EXPECT hello cat ${B0}/${V0}1/probe
+EXPECT hello cat ${B0}/${V0}2/probe
+
+cleanup
diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
index c760b947551..2c52cf72a3f 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
@@ -115,7 +115,6 @@ xlator_instantiate_va (const char *type, const char *format, va_list arg)
return NULL;
}
-#ifdef __not_used_as_of_now_
static xlator_t *
xlator_instantiate (const char *type, const char *format, ...)
{
@@ -128,7 +127,6 @@ xlator_instantiate (const char *type, const char *format, ...)
return xl;
}
-#endif
static int
volgen_xlator_link (xlator_t *pxl, xlator_t *cxl)
@@ -1825,6 +1823,107 @@ out:
return ret;
}
+xlator_t *
+add_one_peer (volgen_graph_t *graph, glusterd_brickinfo_t *peer,
+ char *volname, uint16_t index)
+{
+ xlator_t *kid;
+
+ kid = volgen_graph_add_nolink (graph, "protocol/client",
+ "%s-client-%u", volname,
+ index++);
+ if (!kid) {
+ return NULL;
+ }
+
+ /* TBD: figure out where to get the proper transport list */
+ if (xlator_set_option(kid, "transport-type", "socket")) {
+ return NULL;
+ }
+ if (xlator_set_option(kid, "remote-host", peer->hostname)) {
+ return NULL;
+ }
+ if (xlator_set_option(kid, "remote-subvolume", peer->path)) {
+ return NULL;
+ }
+ /* TBD: deal with RDMA, SSL */
+
+ return kid;
+}
+
+int
+add_nsr_stuff (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo)
+{
+ xlator_t *me;
+ glusterd_brickinfo_t *peer;
+ glusterd_brickinfo_t *prev_peer;
+ char *leader_opt;
+ uint16_t index = 0;
+ xlator_t *kid;
+
+ /* Create the NSR xlator, but defer linkage for now. */
+ me = xlator_instantiate ("experimental/nsr", "%s-nsr",
+ volinfo->volname);
+ if (!me || volgen_xlator_link(me, first_of(graph))) {
+ return -1;
+ }
+
+ /* Figure out if we should start as leader, mark appropriately. */
+ peer = list_prev (brickinfo, &volinfo->bricks,
+ glusterd_brickinfo_t, brick_list);
+ leader_opt = (!peer || (peer->group != brickinfo->group)) ? "yes"
+ : "no";
+ if (xlator_set_option(me, "leader", leader_opt)) {
+ /*
+ * TBD: fix memory leak ("me" and associated dictionary)
+ * There seems to be no function already to clean up a
+ * just-allocated translator object if something else fails.
+ * Apparently the convention elsewhere in this file is to return
+ * without freeing anything, but we can't keep being that sloppy
+ * forever.
+ */
+ return -1;
+ }
+
+ /*
+ * Make sure we're at the beginning of the list of bricks in this
+ * replica set. This way all bricks' volfiles have peers in a
+ * consistent order.
+ */
+ peer = brickinfo;
+ for (;;) {
+ prev_peer = list_prev (peer, &volinfo->bricks,
+ glusterd_brickinfo_t, brick_list);
+ if (!prev_peer || (prev_peer->group != brickinfo->group)) {
+ break;
+ }
+ peer = prev_peer;
+ }
+
+ /* Actually add the peers. */
+ do {
+ if (peer != brickinfo) {
+ gf_log ("glusterd", GF_LOG_INFO,
+ "%s:%s needs client for %s:%s",
+ brickinfo->hostname, brickinfo->path,
+ peer->hostname, peer->path);
+ kid = add_one_peer (graph, peer,
+ volinfo->volname, index++);
+ if (!kid || volgen_xlator_link(me, kid)) {
+ return -1;
+ }
+ }
+ peer = list_next (peer, &volinfo->bricks,
+ glusterd_brickinfo_t, brick_list);
+ } while (peer && (peer->group == brickinfo->group));
+
+ /* Finish linkage to client file. */
+ glusterfs_graph_set_first(&graph->graph, me);
+
+ return 0;
+}
+
static int
brick_graph_add_index (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
dict_t *set_dict, glusterd_brickinfo_t *brickinfo)
@@ -1837,6 +1936,11 @@ brick_graph_add_index (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
if (!graph || !volinfo || !brickinfo || !set_dict)
goto out;
+ /* For NSR we don't need/want index. */
+ if (glusterd_volinfo_get_boolean(volinfo, "cluster.nsr") > 0) {
+ return add_nsr_stuff (graph, volinfo, brickinfo);
+ }
+
xl = volgen_graph_add (graph, "features/index", volinfo->volname);
if (!xl)
goto out;
@@ -3379,12 +3483,18 @@ volgen_graph_build_afr_clusters (volgen_graph_t *graph,
int i = 0;
int ret = 0;
int clusters = 0;
- char *replicate_args[] = {"cluster/replicate",
- "%s-replicate-%d"};
+ char *replicate_type = NULL;
+ char *replicate_name = "%s-replicate-%d";
xlator_t *afr = NULL;
char option[32] = {0};
int start_count = 0;
+ if (glusterd_volinfo_get_boolean(volinfo, "cluster.nsr") > 0) {
+ replicate_type = "experimental/nsrc";
+ } else {
+ replicate_type = "cluster/replicate";
+ }
+
if (volinfo->tier_info.cold_type == GF_CLUSTER_TYPE_REPLICATE)
start_count = volinfo->tier_info.cold_brick_count /
volinfo->tier_info.cold_replica_count;
@@ -3392,16 +3502,16 @@ volgen_graph_build_afr_clusters (volgen_graph_t *graph,
if (volinfo->tier_info.cur_tier_hot)
clusters = volgen_link_bricks_from_list_head_start (graph,
volinfo,
- replicate_args[0],
- replicate_args[1],
+ replicate_type,
+ replicate_name,
volinfo->brick_count,
volinfo->replica_count,
start_count);
else
clusters = volgen_link_bricks_from_list_tail (graph,
volinfo,
- replicate_args[0],
- replicate_args[1],
+ replicate_type,
+ replicate_name,
volinfo->brick_count,
volinfo->replica_count);
@@ -5139,6 +5249,27 @@ get_parent_vol_tstamp_file (char *filename, glusterd_volinfo_t *volinfo)
PATH_MAX - strlen(filename) - 1);
}
+void
+assign_groups (glusterd_volinfo_t *volinfo)
+{
+ glusterd_brickinfo_t *brickinfo = NULL;
+ uint16_t group_num = 0;
+ int in_group = 0;
+ uuid_t tmp_uuid;
+
+ list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ if (in_group == 0) {
+ gf_uuid_generate(tmp_uuid);
+ }
+ brickinfo->group = group_num;
+ gf_uuid_copy(brickinfo->nsr_uuid, tmp_uuid);
+ if (++in_group >= volinfo->replica_count) {
+ in_group = 0;
+ ++group_num;
+ }
+ }
+}
+
int
generate_brick_volfiles (glusterd_volinfo_t *volinfo)
{
@@ -5207,6 +5338,10 @@ generate_brick_volfiles (glusterd_volinfo_t *volinfo)
}
}
+ if (glusterd_volinfo_get_boolean(volinfo, "cluster.nsr") > 0) {
+ assign_groups(volinfo);
+ }
+
ret = glusterd_volume_brick_for_each (volinfo, NULL,
glusterd_generate_brick_volfile);
if (ret)
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index 112980c4661..ec7a19070ab 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -2644,6 +2644,19 @@ struct volopt_map_entry glusterd_volopt_map[] = {
.op_version = GD_OP_VERSION_3_7_6,
.flags = OPT_FLAG_CLIENT_OPT
},
+ { .key = "cluster.nsr",
+ .voltype = "experimental/nsr",
+ .option = "!nsr",
+ .op_version = GD_OP_VERSION_4_0_0,
+ .description = "enable NSR instead of AFR for replication",
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT
+ },
+ { .key = "cluster.nsr.quorum-percent",
+ .voltype = "experimental/nsr",
+ .option = "quorum-percent",
+ .op_version = GD_OP_VERSION_4_0_0,
+ .description = "percent of rep_count-1 bricks that must be up"
+ },
{ .key = NULL
}
};
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
index 0af7be07300..0a313918856 100644
--- a/xlators/mgmt/glusterd/src/glusterd.h
+++ b/xlators/mgmt/glusterd/src/glusterd.h
@@ -215,7 +215,7 @@ struct glusterd_brickinfo {
* a replica 3 volume with arbiter enabled.
*/
uint16_t group;
-
+ uuid_t nsr_uuid;
};
typedef struct glusterd_brickinfo glusterd_brickinfo_t;
diff --git a/xlators/protocol/server/src/server-rpc-fops.c b/xlators/protocol/server/src/server-rpc-fops.c
index 9a7d8eb71ed..239323382ea 100644
--- a/xlators/protocol/server/src/server-rpc-fops.c
+++ b/xlators/protocol/server/src/server-rpc-fops.c
@@ -1772,8 +1772,12 @@ server_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- gf_stat_from_iatt (&rsp.statpre, statpre);
- gf_stat_from_iatt (&rsp.statpost, statpost);
+ if (statpre) {
+ gf_stat_from_iatt (&rsp.statpre, statpre);
+ }
+ if (statpost) {
+ gf_stat_from_iatt (&rsp.statpost, statpost);
+ }
out:
rsp.op_ret = op_ret;