summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--tests/bugs/bug-921231.t31
-rw-r--r--xlators/cluster/afr/src/afr-common.c25
-rw-r--r--xlators/cluster/afr/src/afr-inode-write.c33
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c36
-rw-r--r--xlators/cluster/afr/src/afr.h9
5 files changed, 129 insertions, 5 deletions
diff --git a/tests/bugs/bug-921231.t b/tests/bugs/bug-921231.t
new file mode 100644
index 00000000000..db9cf3b6f06
--- /dev/null
+++ b/tests/bugs/bug-921231.t
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+# This test writes to same file with 2 fds and tests that eager-lock is not
+# causing extra delay because of post-op-delay-secs
+cleanup;
+
+function write_to_file {
+ dd of=$M0/1 if=/dev/zero bs=1M count=128 oflag=append 2>&1 >/dev/null
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+TEST $CLI volume set $V0 eager-lock on
+TEST $CLI volume set $V0 post-op-delay-secs 3
+TEST $CLI volume set $V0 client-log-level DEBUG
+TEST $CLI volume start $V0
+TEST $CLI volume profile $V0 start
+TEST $CLI volume set $V0 ensure-durability off
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+write_to_file &
+write_to_file &
+wait
+#Test if the MAX [F]INODELK fop latency is of the order of seconds.
+inodelk_max_latency=$($CLI volume profile $V0 info | grep INODELK | awk 'BEGIN {max = 0} {if ($6 > max) max=$6;} END {print max}' | cut -d. -f 1 | egrep "[0-9]{7,}")
+TEST [ -z $inodelk_max_latency ]
+
+cleanup;
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 075f33c618b..79644b74086 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -4890,3 +4890,28 @@ afr_is_fd_fixable (fd_t *fd)
return _gf_true;
}
+
+void
+afr_handle_open_fd_count (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ inode_t *inode = NULL;
+ afr_inode_ctx_t *ctx = NULL;
+
+ local = frame->local;
+
+ if (local->fd)
+ inode = local->fd->inode;
+ else
+ inode = local->loc.inode;
+
+ if (!inode)
+ return;
+
+ LOCK (&inode->lock);
+ {
+ ctx = __afr_inode_ctx_get (inode, this);
+ ctx->open_fd_count = local->open_fd_count;
+ }
+ UNLOCK (&inode->lock);
+}
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
index ce4fbf22698..68570f15afe 100644
--- a/xlators/cluster/afr/src/afr-inode-write.c
+++ b/xlators/cluster/afr/src/afr-inode-write.c
@@ -137,6 +137,8 @@ afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int child_index = (long) cookie;
int call_count = -1;
int read_child = 0;
+ int ret = 0;
+ uint32_t open_fd_count = 0;
local = frame->local;
@@ -162,6 +164,17 @@ afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
if (op_ret != -1) {
+ if (xdata) {
+ ret = dict_get_uint32 (xdata,
+ GLUSTERFS_OPEN_FD_COUNT,
+ &open_fd_count);
+ if ((ret == 0) &&
+ (open_fd_count > local->open_fd_count)) {
+ local->open_fd_count = open_fd_count;
+ local->update_open_fd_count = _gf_true;
+ }
+ }
+
if ((local->success_count == 0) ||
(child_index == read_child)) {
local->cont.writev.prebuf = *prebuf;
@@ -176,8 +189,11 @@ afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (call_count == 0) {
- if (!local->stable_write)
- afr_fd_report_unstable_write (this, local->fd);
+ if (local->update_open_fd_count)
+ afr_handle_open_fd_count (frame, this);
+
+ if (!local->stable_write)
+ afr_fd_report_unstable_write (this, local->fd);
afr_writev_handle_short_writes (frame, this);
/*
@@ -206,6 +222,8 @@ afr_writev_wind (call_frame_t *frame, xlator_t *this)
afr_private_t *priv = NULL;
int i = 0;
int call_count = -1;
+ dict_t *xdata = NULL;
+ GF_UNUSED int ret = 0;
local = frame->local;
priv = this->private;
@@ -229,6 +247,12 @@ afr_writev_wind (call_frame_t *frame, xlator_t *this)
return 0;
}
+ xdata = dict_new ();
+ if (xdata) {
+ ret = dict_set_uint32 (xdata, GLUSTERFS_OPEN_FD_COUNT,
+ sizeof (uint32_t));
+ }
+
for (i = 0; i < priv->child_count; i++) {
if (local->transaction.pre_op[i]) {
STACK_WIND_COOKIE (frame, afr_writev_wind_cbk,
@@ -241,13 +265,16 @@ afr_writev_wind (call_frame_t *frame, xlator_t *this)
local->cont.writev.offset,
local->cont.writev.flags,
local->cont.writev.iobref,
- NULL);
+ xdata);
if (!--call_count)
break;
}
}
+ if (xdata)
+ dict_unref (xdata);
+
return 0;
}
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index def00e28834..817645505c6 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -1308,6 +1308,34 @@ afr_set_delayed_post_op (call_frame_t *frame, xlator_t *this)
local->delayed_post_op = _gf_true;
}
+gf_boolean_t
+afr_are_multiple_fds_opened (inode_t *inode, xlator_t *this)
+{
+ afr_inode_ctx_t *ictx = NULL;
+
+ if (!inode) {
+ /* If false is returned, it may keep on taking eager-lock
+ * which may lead to starvation, so return true to avoid that.
+ */
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "Invalid inode");
+ return _gf_true;
+ }
+ /* Lets say mount1 has eager-lock(full-lock) and after the eager-lock
+ * is taken mount2 opened the same file, it won't be able to
+ * perform any data operations until mount1 releases eager-lock.
+ * To avoid such scenario do not enable eager-lock for this transaction
+ * if open-fd-count is > 1
+ */
+
+ ictx = afr_inode_ctx_get (inode, this);
+ if (!ictx)
+ return _gf_true;
+
+ if (ictx->open_fd_count > 1)
+ return _gf_true;
+
+ return _gf_false;
+}
gf_boolean_t
is_afr_delayed_changelog_post_op_needed (call_frame_t *frame, xlator_t *this)
@@ -1322,6 +1350,9 @@ is_afr_delayed_changelog_post_op_needed (call_frame_t *frame, xlator_t *this)
if (!local->delayed_post_op)
goto out;
+ if (local->fd && afr_are_multiple_fds_opened (local->fd->inode, this))
+ goto out;
+
res = _gf_true;
out:
return res;
@@ -1753,8 +1784,7 @@ afr_locals_overlap (afr_local_t *local1, afr_local_t *local2)
return ((end1 >= start2) && (end2 >= start1));
}
-
- void
+void
afr_transaction_eager_lock_init (afr_local_t *local, xlator_t *this)
{
afr_private_t *priv = NULL;
@@ -1776,6 +1806,8 @@ afr_transaction_eager_lock_init (afr_local_t *local, xlator_t *this)
if (!fdctx)
return;
+ if (afr_are_multiple_fds_opened (local->fd->inode, this))
+ return;
/*
* Once full file lock is acquired in eager-lock phase, overlapping
* writes do not compete for inode-locks, instead are transferred to the
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index c6e6913df29..49d281acae1 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -87,6 +87,7 @@ typedef struct afr_inode_ctx_ {
int32_t *fresh_children;//increasing order of latency
afr_spb_state_t mdata_spb;
afr_spb_state_t data_spb;
+ uint32_t open_fd_count;
} afr_inode_ctx_t;
typedef enum {
@@ -445,6 +446,8 @@ typedef struct _afr_local {
unsigned int call_count;
unsigned int success_count;
unsigned int enoent_count;
+ uint32_t open_fd_count;
+ gf_boolean_t update_open_fd_count;
unsigned int unhealable;
@@ -1171,4 +1174,10 @@ afr_delayed_changelog_wake_resume (xlator_t *this, fd_t *fd, call_stub_t *stub);
int
afr_inodelk_init (afr_inodelk_t *lk, char *dom, size_t child_count);
+void
+afr_handle_open_fd_count (call_frame_t *frame, xlator_t *this);
+
+afr_inode_ctx_t*
+afr_inode_ctx_get (inode_t *inode, xlator_t *this);
+
#endif /* __AFR_H__ */