summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRavishankar N <ravishankar@redhat.com>2015-11-25 09:49:19 +0530
committerAtin Mukherjee <amukherj@redhat.com>2015-12-16 22:02:55 -0800
commit486b07dfc33782d27e3458659cdd6090f496ad35 (patch)
treec07068e0026591113344b57f24bf4e6dd79e6ed8
parent96da2fbc7fa7f9e27c645b98d8b12491be24a4c4 (diff)
glusterd/afr: store afr pending xattrs as a volume option
Backport of http://review.gluster.org/#/c/12738/ Problem: When AFR xlator initialises, it uses the name of the client xlators below it for storing the pending changelogs (xattrs). This can be problem when some other xlator is loaded in between AFR and the client. Though that is a trivial 'traverse-graph-till-the-client-and-use-the-name' fix in AFR's init(), there are other issues like when there's no client xlator at all when, say, AFR is moved to the server side. Fix: The client xlator names are currenly unique and stored as brickinfo->brick_ids. So persist these ids as comma separated values in AFR's volume_options and use them as xattr values during init(). Change-Id: Ie761ffeb3373a4c4d85ad05c84a768c4188aa90d BUG: 1291985 Signed-off-by: Ravishankar N <ravishankar@redhat.com> Reviewed-on: http://review.gluster.org/12977 Tested-by: NetBSD Build System <jenkins@build.gluster.org> Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com> Tested-by: Gluster Build System <jenkins@build.gluster.com>
-rwxr-xr-xtests/bugs/glusterfs/bug-853690.t1
-rwxr-xr-xtests/bugs/glusterfs/bug-892730.t1
-rw-r--r--xlators/cluster/afr/src/afr.c32
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.c104
4 files changed, 122 insertions, 16 deletions
diff --git a/tests/bugs/glusterfs/bug-853690.t b/tests/bugs/glusterfs/bug-853690.t
index 59facfcddb0..7880b64488f 100755
--- a/tests/bugs/glusterfs/bug-853690.t
+++ b/tests/bugs/glusterfs/bug-853690.t
@@ -53,6 +53,7 @@ end-volume
volume test-replicate-0
type cluster/replicate
+ option afr-pending-xattr test-locks-0,test-locks-1
option background-self-heal-count 0
subvolumes test-locks-0 test-locks-1
end-volume
diff --git a/tests/bugs/glusterfs/bug-892730.t b/tests/bugs/glusterfs/bug-892730.t
index a76961134c5..1fa0ff3bfb4 100755
--- a/tests/bugs/glusterfs/bug-892730.t
+++ b/tests/bugs/glusterfs/bug-892730.t
@@ -53,6 +53,7 @@ end-volume
volume test-replicate-0
type cluster/replicate
+ option afr-pending-xattr test-locks-0,test-locks-1
option background-self-heal-count 0
subvolumes test-locks-0 test-locks-1
end-volume
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index 5ef920a13d1..4c072b6c69f 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -248,6 +248,8 @@ init (xlator_t *this)
int read_subvol_index = -1;
xlator_t *fav_child = NULL;
char *qtype = NULL;
+ char *xattrs_list = NULL;
+ char *ptr = NULL;
if (!this->children) {
gf_msg (this->name, GF_LOG_ERROR, 0,
@@ -401,6 +403,7 @@ init (xlator_t *this)
goto out;
}
+ GF_OPTION_INIT ("afr-pending-xattr", xattrs_list, str, out);
priv->pending_key = GF_CALLOC (sizeof (*priv->pending_key),
child_count,
gf_afr_mt_char);
@@ -408,20 +411,25 @@ init (xlator_t *this)
ret = -ENOMEM;
goto out;
}
-
- trav = this->children;
- i = 0;
- while (i < child_count) {
- priv->children[i] = trav->xlator;
-
+ ptr = gf_strdup (xattrs_list);
+ if (!ptr) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ for (i = 0, ptr = strtok (ptr, ","); ptr; ptr = strtok (NULL, ",")) {
ret = gf_asprintf (&priv->pending_key[i], "%s.%s",
- AFR_XATTR_PREFIX,
- trav->xlator->name);
- if (-1 == ret) {
+ AFR_XATTR_PREFIX, ptr);
+ if (ret == -1) {
ret = -ENOMEM;
goto out;
}
+ i++;
+ }
+ trav = this->children;
+ i = 0;
+ while (i < child_count) {
+ priv->children[i] = trav->xlator;
trav = trav->next;
i++;
}
@@ -457,6 +465,7 @@ init (xlator_t *this)
ret = 0;
out:
+ GF_FREE (ptr);
return ret;
}
@@ -780,6 +789,11 @@ struct volume_options options[] = {
.type = GF_OPTION_TYPE_STR,
.default_value = AFR_DIRTY_DEFAULT,
},
+ { .key = {"afr-pending-xattr"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "Comma seperated list of xattrs that are used to "
+ "capture information on pending heals."
+ },
{ .key = {"metadata-splitbrain-forced-heal"},
.type = GF_OPTION_TYPE_BOOL,
.default_value = "off",
diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
index 988c6053d41..29e5fe9b3f9 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
@@ -3286,6 +3286,80 @@ out:
}
static int
+set_afr_pending_xattrs_option (volgen_graph_t *graph,
+ glusterd_volinfo_t *volinfo,
+ int clusters)
+{
+ xlator_t *xlator = NULL;
+ xlator_t **afr_xlators_list = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ glusterd_brickinfo_t *brick = NULL;
+ char *ptr = NULL;
+ int i = 0;
+ int index = -1;
+ int ret = 0;
+ char *afr_xattrs_list = NULL;
+ int list_size = -1;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO ("glusterd", this, out);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, conf, out);
+
+ if (conf->op_version < GD_OP_VERSION_3_7_7)
+ return ret;
+
+ /* (brick_id x rep.count) + (rep.count-1 commas) + NULL*/
+ list_size = (1024 * volinfo->replica_count) +
+ (volinfo->replica_count - 1) + 1;
+ afr_xattrs_list = GF_CALLOC (1, list_size, gf_common_mt_char);
+ if (!afr_xattrs_list)
+ goto out;
+
+ ptr = afr_xattrs_list;
+ afr_xlators_list = GF_CALLOC (clusters, sizeof (xlator_t *),
+ gf_common_mt_xlator_t);
+ if (!afr_xlators_list)
+ goto out;
+
+ xlator = first_of (graph);
+
+ for (i = 0, index = clusters - 1; i < clusters; i++) {
+ afr_xlators_list[index--] = xlator;
+ xlator = xlator->next;
+ }
+
+ i = 1;
+ index = 0;
+
+ cds_list_for_each_entry (brick, &volinfo->bricks, brick_list) {
+ if (index == clusters)
+ break;
+ strncat (ptr, brick->brick_id, strlen(brick->brick_id));
+ if (i == volinfo->replica_count) {
+ ret = xlator_set_option (afr_xlators_list[index++],
+ "afr-pending-xattr",
+ afr_xattrs_list);
+ if (ret)
+ return ret;
+ memset (afr_xattrs_list, 0, list_size);
+ ptr = afr_xattrs_list;
+ i = 1;
+ continue;
+ }
+ ptr[strlen(brick->brick_id)] = ',';
+ ptr += strlen (brick->brick_id) + 1;
+ i++;
+ }
+
+out:
+ GF_FREE (afr_xattrs_list);
+ GF_FREE (afr_xlators_list);
+ return ret;
+}
+
+static int
volgen_graph_build_afr_clusters (volgen_graph_t *graph,
glusterd_volinfo_t *volinfo)
{
@@ -3296,7 +3370,7 @@ volgen_graph_build_afr_clusters (volgen_graph_t *graph,
"%s-replicate-%d"};
xlator_t *afr = NULL;
char option[32] = {0};
- int start_count = 0;
+ int start_count = 0;
if (volinfo->tier_info.cold_type == GF_CLUSTER_TYPE_REPLICATE)
start_count = volinfo->tier_info.cold_brick_count /
@@ -3321,6 +3395,11 @@ volgen_graph_build_afr_clusters (volgen_graph_t *graph,
if (clusters < 0)
goto out;
+ ret = set_afr_pending_xattrs_option (graph, volinfo, clusters);
+ if (ret) {
+ clusters = -1;
+ goto out;
+ }
if (!volinfo->arbiter_count)
goto out;
@@ -3343,8 +3422,6 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph,
glusterd_volinfo_t *volinfo,
gf_boolean_t is_quotad)
{
- char *replicate_args[] = {"cluster/replicate",
- "%s-replicate-%d"};
char *tier_args[] = {"cluster/tier",
"%s-tier-%d"};
char *stripe_args[] = {"cluster/stripe",
@@ -3536,7 +3613,6 @@ volume_volgen_graph_build_clusters_tier (volgen_graph_t *graph,
int ret = -1;
xlator_t *root = NULL;
xlator_t *xl, *hxl, *cxl;
- glusterd_brickinfo_t *brick = NULL;
char *rule = NULL;
int st_brick_count = 0;
int st_replica_count = 0;
@@ -3546,6 +3622,8 @@ volume_volgen_graph_build_clusters_tier (volgen_graph_t *graph,
int dist_count = 0;
int start_count = 0;
char *decommissioned_children = NULL;
+ glusterd_volinfo_t *dup_volinfo = NULL;
+ gf_boolean_t is_hot_tier = _gf_false;
st_brick_count = volinfo->brick_count;
st_replica_count = volinfo->replica_count;
@@ -3560,8 +3638,13 @@ volume_volgen_graph_build_clusters_tier (volgen_graph_t *graph,
volinfo->redundancy_count = volinfo->tier_info.cold_redundancy_count;
volinfo->type = volinfo->tier_info.cold_type;
volinfo->tier_info.cur_tier_hot = 0;
+ ret = glusterd_create_sub_tier_volinfo (volinfo, &dup_volinfo,
+ is_hot_tier, volinfo->volname);
+ if (ret)
+ goto out;
- ret = volume_volgen_graph_build_clusters (graph, volinfo, is_quotad);
+ ret = volume_volgen_graph_build_clusters (graph, dup_volinfo,
+ is_quotad);
if (ret)
goto out;
cxl = first_of(graph);
@@ -3588,12 +3671,17 @@ volume_volgen_graph_build_clusters_tier (volgen_graph_t *graph,
volinfo->brick_count,
volinfo->replica_count,
start_count);
- if (ret != -1)
- volgen_link_bricks_from_list_tail (graph, volinfo,
+ if (ret != -1) {
+ ret = set_afr_pending_xattrs_option (graph, volinfo,
+ ret);
+ if (ret)
+ goto out;
+ volgen_link_bricks_from_list_tail (graph, volinfo,
"cluster/distribute",
"%s-hot-dht",
dist_count,
dist_count);
+ }
} else {
ret = volgen_link_bricks_from_list_head (graph, volinfo,
"cluster/distribute",
@@ -3653,6 +3741,8 @@ volume_volgen_graph_build_clusters_tier (volgen_graph_t *graph,
volinfo->dist_leaf_count = st_dist_leaf_count;
volinfo->tier_info.cur_tier_hot = 0;
+ if (dup_volinfo)
+ glusterd_volinfo_delete (dup_volinfo);
GF_FREE (rule);
return ret;
}