summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPoornima G <pgurusid@redhat.com>2017-04-13 16:20:29 +0530
committerRaghavendra G <rgowdapp@redhat.com>2017-04-18 02:16:11 -0400
commit94196dee1f1b0e22faab69cd9b1b1c70ba3d2f6f (patch)
treea49dae1cd1da8079de34d5bfa5be3589927c774a
parenta9b5333d7bae6e20ffef07dffcda49eaf9d6823b (diff)
dht: Add readdir-ahead in rebalance graph if parallel-readdir is on
Issue: The value of linkto xattr is generally the name of the dht's next subvol, this requires that the next subvol of dht is not changed for the life time of the volume. But with parallel readdir enabled, the readdir-ahead loaded below dht, is optional. The linkto xattr for first subvol, when: - parallel readdir is enabled : "<volname>-readdir-head-0" - plain distribute volume : "<volname>-client-0" - distribute replicate volume : "<volname>-afr-0" The value of linkto xattr is "<volname>-readdir-head-0" when parallel readdir is enabled, and is "<volname>-client-0" if its disabled. But the dht_lookup takes care of healing if it cannot identify which linkto subvol, the xattr points to. In dht_lookup_cbk, if linkto xattr is found to be "<volname>-client-0" and parallel readdir is enabled, then it cannot understand the value "<volname>-client-0" as it expects "<volname>-readdir-head-0". In that case, dht_lookup_everywhere is issued and then the linkto file is unlinked and recreated with the right linkto xattr. The issue is when parallel readdir is enabled, mount point accesses the file that is currently being migrated. Since rebalance process doesn't have parallel-readdir feature, it expects "<volname>-client-0" where as mount expects "<volname>-readdir-head-0". Thus at some point either the mount or rebalance will fail. Solution: Enable parallel-readdir for rebalance as well and then do not allow enabling/disabling parallel-readdir if rebalance is in progress. Change-Id: I241ab966bdd850e667f7768840540546f5289483 BUG: 1436090 Signed-off-by: Poornima G <pgurusid@redhat.com> Reviewed-on: https://review.gluster.org/17056 Smoke: Gluster Build System <jenkins@build.gluster.org> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Atin Mukherjee <amukherj@redhat.com> Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
-rw-r--r--libglusterfs/src/inode.c9
-rwxr-xr-xtests/bugs/distribute/bug-1161311.t1
-rwxr-xr-xtests/bugs/readdir-ahead/bug-1436090.t44
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.c6
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-set.c26
5 files changed, 81 insertions, 5 deletions
diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c
index 573ec03819a..3a6464ac5e1 100644
--- a/libglusterfs/src/inode.c
+++ b/libglusterfs/src/inode.c
@@ -2549,6 +2549,15 @@ inode_ctx_size (inode_t *inode)
old_THIS = THIS;
THIS = xl;
+ /* If inode ref is taken when THIS is global xlator,
+ * the ctx xl_key is set, but the value is NULL.
+ * For global xlator the cbks can be NULL, hence check
+ * for the same */
+ if (!xl->cbks) {
+ THIS = old_THIS;
+ continue;
+ }
+
if (xl->cbks->ictxsize)
size += xl->cbks->ictxsize (xl, inode);
diff --git a/tests/bugs/distribute/bug-1161311.t b/tests/bugs/distribute/bug-1161311.t
index 8db66351ebe..d88642edc32 100755
--- a/tests/bugs/distribute/bug-1161311.t
+++ b/tests/bugs/distribute/bug-1161311.t
@@ -63,6 +63,7 @@ EXPECT "$V0" volinfo_field $V0 'Volume Name';
EXPECT 'Created' volinfo_field $V0 'Status';
EXPECT '3' brick_count $V0
+TEST $CLI volume set $V0 parallel-readdir on
TEST $CLI volume start $V0;
EXPECT 'Started' volinfo_field $V0 'Status';
diff --git a/tests/bugs/readdir-ahead/bug-1436090.t b/tests/bugs/readdir-ahead/bug-1436090.t
new file mode 100755
index 00000000000..58e9093f1c3
--- /dev/null
+++ b/tests/bugs/readdir-ahead/bug-1436090.t
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+cleanup;
+
+TEST launch_cluster 2;
+TEST $CLI_1 peer probe $H2;
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+$CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0
+EXPECT 'Created' cluster_volinfo_field 1 $V0 'Status';
+
+$CLI_1 volume start $V0
+EXPECT 'Started' cluster_volinfo_field 1 $V0 'Status';
+
+TEST glusterfs -s $H1 --volfile-id $V0 $M0;
+TEST mkdir $M0/dir1
+
+# Create a large file (3.2 GB), so that rebalance takes time
+# Reading from /dev/urandom is slow, so we will cat it together
+dd if=/dev/urandom of=/tmp/FILE2 bs=64k count=10240
+for i in {1..5}; do
+ cat /tmp/FILE2 >> $M0/dir1/foo
+done
+
+TEST mv $M0/dir1/foo $M0/dir1/bar
+
+TEST $CLI_1 volume rebalance $V0 start force
+TEST ! $CLI_1 volume set $V0 parallel-readdir on
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" cluster_rebalance_status_field 1 $V0
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" cluster_rebalance_status_field 2 $V0
+TEST $CLI_1 volume set $V0 parallel-readdir on
+TEST mv $M0/dir1/bar $M0/dir1/foo
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST glusterfs -s $H1 --volfile-id $V0 $M0;
+TEST $CLI_1 volume rebalance $V0 start force
+TEST ln $M0/dir1/foo $M0/dir1/bar
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" cluster_rebalance_status_field 1 $V0
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" cluster_rebalance_status_field 2 $V0
+cleanup;
diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
index 30d22fadbf4..2577ad81fc7 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
@@ -3391,8 +3391,7 @@ volgen_graph_build_readdir_ahead (volgen_graph_t *graph,
{
int32_t clusters = 0;
- if (graph->type == GF_REBALANCED ||
- graph->type == GF_QUOTAD ||
+ if (graph->type == GF_QUOTAD ||
graph->type == GF_SNAPD ||
!glusterd_volinfo_get_boolean (volinfo, VKEY_PARALLEL_READDIR) ||
!glusterd_volinfo_get_boolean (volinfo, VKEY_READDIR_AHEAD))
@@ -3780,8 +3779,7 @@ client_graph_set_rda_options (volgen_graph_t *graph,
if (dist_count <= 1)
goto out;
- if (graph->type == GF_REBALANCED ||
- graph->type == GF_QUOTAD ||
+ if (graph->type == GF_QUOTAD ||
graph->type == GF_SNAPD ||
!glusterd_volinfo_get_boolean (volinfo, VKEY_PARALLEL_READDIR) ||
!glusterd_volinfo_get_boolean (volinfo, VKEY_READDIR_AHEAD))
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index e5818a1aa15..8d944a546b2 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -955,6 +955,30 @@ out:
static int
+validate_parallel_readdir (glusterd_volinfo_t *volinfo, dict_t *dict,
+ char *key, char *value, char **op_errstr)
+{
+ int ret = -1;
+
+ ret = validate_boolean (volinfo, dict, key, value, op_errstr);
+ if (ret)
+ goto out;
+
+ ret = glusterd_is_defrag_on (volinfo);
+ if (ret) {
+ gf_asprintf (op_errstr, "%s option should be set "
+ "after rebalance is complete", key);
+ gf_msg ("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_INVALID_ENTRY, "%s", *op_errstr);
+ }
+out:
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+
+ return ret;
+}
+
+
+static int
validate_worm_period (glusterd_volinfo_t *volinfo, dict_t *dict, char *key,
char *value, char **op_errstr)
{
@@ -3191,7 +3215,7 @@ struct volopt_map_entry glusterd_volopt_map[] = {
.value = "off",
.type = DOC,
.op_version = GD_OP_VERSION_3_10_0,
- .validate_fn = validate_boolean,
+ .validate_fn = validate_parallel_readdir,
.description = "If this option is enabled, the readdir operation is "
"performed parallely on all the bricks, thus improving"
" the performance of readdir. Note that the performance"