summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKrutika Dhananjay <kdhananj@redhat.com>2016-02-01 11:46:08 +0530
committerPranith Kumar Karampuri <pkarampu@redhat.com>2016-02-04 20:31:54 -0800
commit476abe074b63e4b348b48af9b04a3d27244d7d17 (patch)
tree17b5db99c4c359a5ef74268ec757660cca47f35b
parentbacdf9335bc674d87ca408feafa3515fb00f47b2 (diff)
cluster/afr: Fix heal-info slow response while IO is in progress
Backport of: http://review.gluster.org/#/c/13326/ Now heal-info does an open() on the file being examined so that the client at some point sees open-fd count being > 1 and releases the eager-lock so that heal-info doesn't remain blocked forever until IO completes. Change-Id: I7d4a8aa4de459216408b666894ee7bb42e406547 BUG: 1303899 Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com> Reviewed-on: http://review.gluster.org/13348 Smoke: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com> Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.com>
-rw-r--r--tests/bugs/replicate/bug-1297695.t43
-rw-r--r--xlators/cluster/afr/src/afr-common.c17
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c37
-rw-r--r--xlators/cluster/afr/src/afr.h4
4 files changed, 82 insertions, 19 deletions
diff --git a/tests/bugs/replicate/bug-1297695.t b/tests/bugs/replicate/bug-1297695.t
new file mode 100644
index 00000000000..e0f431684e8
--- /dev/null
+++ b/tests/bugs/replicate/bug-1297695.t
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+function write_to_file {
+ dd of=$M0/dir/file if=/dev/urandom bs=1024k count=128 2>&1 >/dev/null
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+
+TEST $CLI volume set $V0 eager-lock on
+TEST $CLI volume set $V0 post-op-delay-secs 3
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+
+TEST $CLI volume start $V0
+TEST $CLI volume profile $V0 start
+TEST $CLI volume set $V0 ensure-durability off
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+TEST mkdir $M0/dir
+TEST touch $M0/dir/file
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST `echo 'abc' > $M0/dir/file`
+
+TEST $CLI volume start $V0 force
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+write_to_file &
+#Test if the MAX [F]INODELK fop latency is of the order of seconds.
+EXPECT "^1$" get_pending_heal_count $V0
+inodelk_max_latency=$($CLI volume profile $V0 info | grep INODELK | awk 'BEGIN {max = 0} {if ($6 > max) max=$6;} END {print max}' | cut -d. -f 1 | egrep "[0-9]{7,}")
+TEST [ -z $inodelk_max_latency ]
+cleanup
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index e457dbce73c..ccdaaaaf052 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -4515,12 +4515,13 @@ afr_selfheal_locked_data_inspect (call_frame_t *frame, xlator_t *this,
gf_boolean_t *pflag)
{
int ret = -1;
- afr_private_t *priv = NULL;
unsigned char *locked_on = NULL;
unsigned char *data_lock = NULL;
unsigned char *sources = NULL;
unsigned char *sinks = NULL;
unsigned char *healed_sinks = NULL;
+ afr_private_t *priv = NULL;
+ fd_t *fd = NULL;
struct afr_reply *locked_replies = NULL;
priv = this->private;
@@ -4530,6 +4531,18 @@ afr_selfheal_locked_data_inspect (call_frame_t *frame, xlator_t *this,
sinks = alloca0 (priv->child_count);
healed_sinks = alloca0 (priv->child_count);
+ /* Heal-info does an open() on the file being examined so that the
+ * current eager-lock holding client, if present, at some point sees
+ * open-fd count being > 1 and releases the eager-lock so that heal-info
+ * doesn't remain blocked forever until IO completes.
+ */
+ ret = afr_selfheal_data_open (this, inode, &fd);
+ if (ret < 0) {
+ gf_msg_debug (this->name, -ret, "%s: Failed to open",
+ uuid_utoa (inode->gfid));
+ goto out;
+ }
+
locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count);
ret = afr_selfheal_tryinodelk (frame, this, inode, priv->sh_domain,
@@ -4567,6 +4580,8 @@ unlock:
out:
if (locked_replies)
afr_replies_wipe (locked_replies, priv->child_count);
+ if (fd)
+ fd_unref (fd);
return ret;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index e04b6575e60..ebf262e4f36 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -803,31 +803,32 @@ out:
}
-static fd_t *
-afr_selfheal_data_open (xlator_t *this, inode_t *inode)
+int
+afr_selfheal_data_open (xlator_t *this, inode_t *inode, fd_t **fd)
{
- loc_t loc = {0,};
- int ret = 0;
- fd_t *fd = NULL;
+ int ret = 0;
+ fd_t *fd_tmp = NULL;
+ loc_t loc = {0,};
- fd = fd_create (inode, 0);
- if (!fd)
- return NULL;
+ fd_tmp = fd_create (inode, 0);
+ if (!fd_tmp)
+ return -ENOMEM;
loc.inode = inode_ref (inode);
gf_uuid_copy (loc.gfid, inode->gfid);
- ret = syncop_open (this, &loc, O_RDWR|O_LARGEFILE, fd, NULL, NULL);
- if (ret) {
- fd_unref (fd);
- fd = NULL;
+ ret = syncop_open (this, &loc, O_RDWR|O_LARGEFILE, fd_tmp, NULL, NULL);
+ if (ret < 0) {
+ fd_unref (fd_tmp);
+ loc_wipe (&loc);
+ goto out;
} else {
- fd_bind (fd);
+ fd_bind (fd_tmp);
}
- loc_wipe (&loc);
-
- return fd;
+ *fd = fd_tmp;
+out:
+ return ret;
}
int
@@ -840,9 +841,9 @@ afr_selfheal_data (call_frame_t *frame, xlator_t *this, inode_t *inode)
priv = this->private;
- fd = afr_selfheal_data_open (this, inode);
+ ret = afr_selfheal_data_open (this, inode, &fd);
if (!fd) {
- gf_msg_debug (this->name, 0, "%s: Failed to open",
+ gf_msg_debug (this->name, -ret, "%s: Failed to open",
uuid_utoa (inode->gfid));
return -EIO;
}
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 8e0a1f18816..52f9c513a9e 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -1097,4 +1097,8 @@ afr_get_need_heal (xlator_t *this);
void
afr_set_need_heal (xlator_t *this, afr_local_t *local);
+
+int
+afr_selfheal_data_open (xlator_t *this, inode_t *inode, fd_t **fd);
+
#endif /* __AFR_H__ */