diff options
Diffstat (limited to 'xlators/cluster/ec/src/ec-locks.c')
-rw-r--r-- | xlators/cluster/ec/src/ec-locks.c | 69 |
1 files changed, 51 insertions, 18 deletions
diff --git a/xlators/cluster/ec/src/ec-locks.c b/xlators/cluster/ec/src/ec-locks.c index 8e84977d2b3..601960d6154 100644 --- a/xlators/cluster/ec/src/ec-locks.c +++ b/xlators/cluster/ec/src/ec-locks.c @@ -24,9 +24,36 @@ ec_lock_check(ec_fop_data_t *fop, uintptr_t *mask) ec_t *ec = fop->xl->private; ec_cbk_data_t *ans = NULL; ec_cbk_data_t *cbk = NULL; - uintptr_t locked = 0, notlocked = 0; + uintptr_t locked = 0; + int32_t good = 0; + int32_t eagain = 0; + int32_t estale = 0; int32_t error = -1; + /* There are some errors that we'll handle in an special way while trying + * to acquire a lock. + * + * EAGAIN: If it's found during a parallel non-blocking lock request, we + * consider that there's contention on the inode, so we consider + * the acquisition a failure and try again with a sequential + * blocking lock request. This will ensure that we get a lock on + * as many bricks as possible (ignoring EAGAIN here would cause + * unnecessary triggers of self-healing). + * + * If it's found during a sequential blocking lock request, it's + * considered an error. Lock will only succeed if there are + * enough other bricks locked. + * + * ESTALE: This can appear during parallel or sequential lock request if + * the inode has just been unlinked. We consider this error is + * not recoverable, but we also don't consider it as fatal. So, + * if it happens during parallel lock, we won't attempt a + * sequential one unless there are EAGAIN errors on other + * bricks (and are enough to form a quorum), but if we reach + * quorum counting the ESTALE bricks, we consider the whole + * result of the operation is ESTALE instead of EIO. + */ + list_for_each_entry(ans, &fop->cbk_list, list) { if (ans->op_ret >= 0) { @@ -34,24 +61,23 @@ ec_lock_check(ec_fop_data_t *fop, uintptr_t *mask) error = EIO; } locked |= ans->mask; + good = ans->count; cbk = ans; - } else { - if (ans->op_errno == EAGAIN) { - switch (fop->uint32) { - case EC_LOCK_MODE_NONE: - case EC_LOCK_MODE_ALL: - /* Goal is to treat non-blocking lock as failure - * even if there is a single EAGAIN*/ - notlocked |= ans->mask; - break; - } - } + } else if (ans->op_errno == ESTALE) { + estale += ans->count; + } else if ((ans->op_errno == EAGAIN) && + (fop->uint32 != EC_LOCK_MODE_INC)) { + eagain += ans->count; } } if (error == -1) { - if (gf_bits_count(locked | notlocked) >= ec->fragments) { - if (notlocked == 0) { + /* If we have enough quorum with succeeded and EAGAIN answers, we + * ignore for now any ESTALE answer. If there are EAGAIN answers, + * we retry with a sequential blocking lock request if needed. + * Otherwise we succeed. */ + if ((good + eagain) >= ec->fragments) { + if (eagain == 0) { if (fop->answer == NULL) { fop->answer = cbk; } @@ -64,21 +90,28 @@ ec_lock_check(ec_fop_data_t *fop, uintptr_t *mask) case EC_LOCK_MODE_NONE: error = EAGAIN; break; - case EC_LOCK_MODE_ALL: fop->uint32 = EC_LOCK_MODE_INC; break; - default: + /* This shouldn't happen because eagain cannot be > 0 + * when fop->uint32 is EC_LOCK_MODE_INC. */ error = EIO; break; } } } else { - if (fop->answer && fop->answer->op_ret < 0) + /* We have been unable to find enough candidates that will be able + * to take the lock. If we have quorum on some answer, we return + * it. Otherwise we check if ESTALE answers allow us to reach + * quorum. If so, we return ESTALE. */ + if (fop->answer && fop->answer->op_ret < 0) { error = fop->answer->op_errno; - else + } else if ((good + eagain + estale) >= ec->fragments) { + error = ESTALE; + } else { error = EIO; + } } } |