summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPranith Kumar K <pkarampu@redhat.com>2014-09-15 14:22:44 +0530
committerNiels de Vos <ndevos@redhat.com>2014-09-29 00:00:31 -0700
commitf67921dec0ab5db6c7c0bc1b00459dbcfa1d568d (patch)
treec6b85e7c312c02decbe64ee2f6fe08d3ecb496be
parentd26442a2a6319602c2eec0ff10eca3bc73f9eb78 (diff)
cluster/afr: Handle EAGAIN properly in inodelk
Problem: When one of the brick is taken down and brough back up in a replica pair, locks on that brick will be allowed. Afr returns inodelk success even when one of the bricks already has the lock taken. Fix: If any brick returns EAGAIN return failure to parent xlator. Note: This change only works for non-blocking inodelks. This patch addresses dht-synchronization which uses non-blocking locks for rename. Blocking lock is issued by only one of the rebalance processes. So for now there is no possibility of deadlock. Change-Id: I07673f8873263da334e03f35c6cdb5db9410a616 BUG: 1141733 Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> Reviewed-on: http://review.gluster.org/8739 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Raghavendra G <rgowdapp@redhat.com> Reviewed-by: Niels de Vos <ndevos@redhat.com>
-rw-r--r--xlators/cluster/afr/src/afr-common.c164
-rw-r--r--xlators/cluster/afr/src/afr.h5
2 files changed, 155 insertions, 14 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 4ca81e81dae..58444ddb896 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -3400,6 +3400,72 @@ out:
/* }}} */
+int32_t
+afr_unlock_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int child_index = (long)cookie;
+ uuid_t gfid = {0};
+
+ local = frame->local;
+ priv = this->private;
+
+ if (op_ret < 0 && op_errno != ENOTCONN) {
+ loc_gfid (&local->loc, gfid);
+ gf_log (this->name, GF_LOG_ERROR, "%s: Failed to unlock %s "
+ "with lk_owner: %s (%s)", uuid_utoa (gfid),
+ priv->children[child_index]->name,
+ lkowner_utoa (&frame->root->lk_owner),
+ strerror (op_errno));
+ }
+
+ call_count = afr_frame_return (frame);
+ if (call_count == 0) {
+ AFR_STACK_UNWIND (inodelk, frame, local->op_ret,
+ local->op_errno, local->xdata_rsp);
+ }
+
+ return 0;
+}
+
+int32_t
+afr_unlock_inodelks_and_unwind (call_frame_t *frame, xlator_t *this,
+ int call_count)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+ priv = this->private;
+ local->call_count = call_count;
+ local->cont.inodelk.flock.l_type = F_UNLCK;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->child_up[i])
+ continue;
+
+ if (local->child_errno[i])
+ continue;
+
+ STACK_WIND_COOKIE (frame, afr_unlock_inodelk_cbk,
+ (void*) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->inodelk,
+ local->cont.inodelk.volume,
+ &local->loc, local->cont.inodelk.cmd,
+ &local->cont.inodelk.flock, 0);
+
+ if (!--call_count)
+ break;
+ }
+
+ return 0;
+}
int32_t
afr_inodelk_cbk (call_frame_t *frame, void *cookie,
@@ -3407,24 +3473,88 @@ afr_inodelk_cbk (call_frame_t *frame, void *cookie,
{
afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
int call_count = -1;
+ int child_index = (long)cookie;
+ int i = 0;
+ int lock_count = 0;
local = frame->local;
+ priv = this->private;
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
-
- local->op_errno = op_errno;
+ if (op_ret < 0)
+ local->child_errno[child_index] = op_errno;
+ if (op_ret == 0 && xdata) {
+ LOCK (&frame->lock);
+ {
+ if (!local->xdata_rsp)
+ local->xdata_rsp = dict_ref (xdata);
+ }
+ UNLOCK (&frame->lock);
}
- UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
- if (call_count == 0)
- AFR_STACK_UNWIND (inodelk, frame, local->op_ret,
- local->op_errno, xdata);
+ if (call_count == 0) {
+ for (i = 0; i < priv->child_count; i++) {
+ /*
+ * The idea is to not allow lock even if at least one of
+ * the bricks already have a competing lock granted. If
+ * there is a competing lock the errno returned is
+ * EAGAIN. so in this loop the following criteria
+ * should be met.
+ * 1) If the errno is anything other than EAGAIN
+ * on some of the subvols but there is at least one
+ * success, the fop should be considered success.
+ * 2) If the errno is EAGAIN on at least one of the
+ * subvols the fop should fail with -1, EAGAIN.
+ */
+ if (!local->child_up[i])
+ continue;
+
+ if (local->child_errno[i] == 0)
+ lock_count++;
+
+ if (local->op_ret == -1 && local->op_errno == EAGAIN)
+ continue;
+ /*
+ * For meeting '2)' we set op_ret to -1, op_errno to
+ * EAGAIN if any of the bricks give that error. Check
+ * above prevents any more modifications to
+ * local->op_ret, local->op_errno
+ * (i.e. final status of the fop).
+ */
+ if (local->child_errno[i] == EAGAIN) {
+ local->op_ret = -1;
+ local->op_errno = EAGAIN;
+ continue;
+ }
+
+ /*
+ * For meeting '1)'
+ * Here we set the op_ret to 0 if the fop succeeds on
+ * any of the bricks provided we haven't witnessed
+ * any -1, EAGAIN from other bricks. So if the bricks
+ * fail with some other reason other than EAGAIN but
+ * succeed on at least one of the bricks the final
+ * result is SUCCESS for the fop.
+ */
+
+ if (local->child_errno[i] == 0)
+ local->op_ret = 0;
+
+ local->op_errno = local->child_errno[i];
+ }
+
+ if (lock_count && local->cont.inodelk.flock.l_type != F_UNLCK &&
+ (local->op_ret == -1 && local->op_errno == EAGAIN)) {
+ afr_unlock_inodelks_and_unwind (frame, this,
+ lock_count);
+ } else {
+ AFR_STACK_UNWIND (inodelk, frame, local->op_ret,
+ local->op_errno, local->xdata_rsp);
+ }
+ }
return 0;
}
@@ -3455,14 +3585,20 @@ afr_inodelk (call_frame_t *frame, xlator_t *this,
if (ret < 0)
goto out;
+ loc_copy (&local->loc, loc);
+ local->cont.inodelk.volume = volume;
+ local->cont.inodelk.cmd = cmd;
+ local->cont.inodelk.flock = *flock;
+
call_count = local->call_count;
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
- STACK_WIND (frame, afr_inodelk_cbk,
- priv->children[i],
- priv->children[i]->fops->inodelk,
- volume, loc, cmd, flock, xdata);
+ STACK_WIND_COOKIE (frame, afr_inodelk_cbk,
+ (void*) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->inodelk,
+ volume, loc, cmd, flock, xdata);
if (!--call_count)
break;
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 1bca07c0768..0bdb0ed54bd 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -730,6 +730,11 @@ typedef struct _afr_local {
struct iatt postbuf;
} zerofill;
+ struct {
+ const char *volume;
+ int32_t cmd;
+ struct gf_flock flock;
+ } inodelk;
} cont;