summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/ec/src/ec-common.c
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/cluster/ec/src/ec-common.c')
-rw-r--r--xlators/cluster/ec/src/ec-common.c191
1 files changed, 132 insertions, 59 deletions
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
index 9b9cde8..f3463ba 100644
--- a/xlators/cluster/ec/src/ec-common.c
+++ b/xlators/cluster/ec/src/ec-common.c
@@ -26,6 +26,40 @@
EC_FLAG_WAITING_DATA_DIRTY |\
EC_FLAG_WAITING_METADATA_DIRTY)
+off_t
+ec_range_end_get (off_t fl_start, size_t fl_size)
+{
+ off_t fl_end = 0;
+ switch (fl_size) {
+ case 0:
+ return fl_start;
+ case LLONG_MAX: /*Infinity*/
+ return LLONG_MAX;
+ default:
+ fl_end = fl_start + fl_size - 1;
+ if (fl_end < 0) /*over-flow*/
+ return LLONG_MAX;
+ else
+ return fl_end;
+ }
+}
+
+static gf_boolean_t
+ec_is_range_conflict (ec_lock_link_t *l1, ec_lock_link_t *l2)
+{
+ return ((l1->fl_end >= l2->fl_start) && (l2->fl_end >= l1->fl_start));
+}
+
+static gf_boolean_t
+ec_lock_conflict (ec_lock_link_t *l1, ec_lock_link_t *l2)
+{
+ if ((l1->fop->flags & EC_FLAG_LOCK_SHARED) &&
+ (l2->fop->flags & EC_FLAG_LOCK_SHARED))
+ return _gf_false;
+
+ return ec_is_range_conflict (l1, l2);
+}
+
uint32_t
ec_select_first_by_read_policy (ec_t *ec, ec_fop_data_t *fop)
{
@@ -729,7 +763,7 @@ int32_t ec_lock_compare(ec_lock_t * lock1, ec_lock_t * lock2)
}
void ec_lock_insert(ec_fop_data_t *fop, ec_lock_t *lock, uint32_t flags,
- loc_t *base)
+ loc_t *base, off_t fl_start, size_t fl_size)
{
ec_lock_link_t *link;
@@ -763,12 +797,15 @@ void ec_lock_insert(ec_fop_data_t *fop, ec_lock_t *lock, uint32_t flags,
link->update[EC_DATA_TXN] = (flags & EC_UPDATE_DATA) != 0;
link->update[EC_METADATA_TXN] = (flags & EC_UPDATE_META) != 0;
link->base = base;
+ link->fl_start = fl_start;
+ link->fl_end = ec_range_end_get (fl_start, fl_size);
lock->refs_pending++;
}
void ec_lock_prepare_inode_internal(ec_fop_data_t *fop, loc_t *loc,
- uint32_t flags, loc_t *base)
+ uint32_t flags, loc_t *base,
+ off_t fl_start, size_t fl_size)
{
ec_lock_t *lock = NULL;
ec_inode_t *ctx;
@@ -829,16 +866,17 @@ void ec_lock_prepare_inode_internal(ec_fop_data_t *fop, loc_t *loc,
ctx->inode_lock = lock;
insert:
- ec_lock_insert(fop, lock, flags, base);
+ ec_lock_insert(fop, lock, flags, base, fl_start, fl_size);
update_query:
lock->query |= (flags & EC_QUERY_INFO) != 0;
unlock:
UNLOCK(&loc->inode->lock);
}
-void ec_lock_prepare_inode(ec_fop_data_t *fop, loc_t *loc, uint32_t flags)
+void ec_lock_prepare_inode(ec_fop_data_t *fop, loc_t *loc, uint32_t flags,
+ off_t fl_start, size_t fl_size)
{
- ec_lock_prepare_inode_internal(fop, loc, flags, NULL);
+ ec_lock_prepare_inode_internal(fop, loc, flags, NULL, fl_start, fl_size);
}
void ec_lock_prepare_parent_inode(ec_fop_data_t *fop, loc_t *loc, loc_t *base,
@@ -864,12 +902,13 @@ void ec_lock_prepare_parent_inode(ec_fop_data_t *fop, loc_t *loc, loc_t *base,
base = NULL;
}
- ec_lock_prepare_inode_internal(fop, &tmp, flags, base);
+ ec_lock_prepare_inode_internal(fop, &tmp, flags, base, 0, LLONG_MAX);
loc_wipe(&tmp);
}
-void ec_lock_prepare_fd(ec_fop_data_t *fop, fd_t *fd, uint32_t flags)
+void ec_lock_prepare_fd(ec_fop_data_t *fop, fd_t *fd, uint32_t flags,
+ off_t fl_start, size_t fl_size)
{
loc_t loc;
int32_t err;
@@ -885,7 +924,7 @@ void ec_lock_prepare_fd(ec_fop_data_t *fop, fd_t *fd, uint32_t flags)
return;
}
- ec_lock_prepare_inode_internal(fop, &loc, flags, NULL);
+ ec_lock_prepare_inode_internal(fop, &loc, flags, NULL, fl_start, fl_size);
loc_wipe(&loc);
}
@@ -1319,17 +1358,16 @@ out:
}
}
-gf_boolean_t ec_get_inode_size(ec_fop_data_t *fop, inode_t *inode,
- uint64_t *size)
+gf_boolean_t
+__ec_get_inode_size(ec_fop_data_t *fop, inode_t *inode,
+ uint64_t *size)
{
ec_inode_t *ctx;
gf_boolean_t found = _gf_false;
- LOCK(&inode->lock);
-
ctx = __ec_inode_get(inode, fop->xl);
if (ctx == NULL) {
- goto unlock;
+ goto out;
}
if (ctx->have_size) {
@@ -1337,23 +1375,35 @@ gf_boolean_t ec_get_inode_size(ec_fop_data_t *fop, inode_t *inode,
found = _gf_true;
}
-unlock:
+out:
+ return found;
+}
+
+gf_boolean_t
+ec_get_inode_size(ec_fop_data_t *fop, inode_t *inode,
+ uint64_t *size)
+{
+ gf_boolean_t found = _gf_false;
+
+ LOCK(&inode->lock);
+ {
+ found = __ec_get_inode_size (fop, inode, size);
+ }
UNLOCK(&inode->lock);
return found;
}
-gf_boolean_t ec_set_inode_size(ec_fop_data_t *fop, inode_t *inode,
- uint64_t size)
+gf_boolean_t
+__ec_set_inode_size(ec_fop_data_t *fop, inode_t *inode,
+ uint64_t size)
{
ec_inode_t *ctx;
gf_boolean_t found = _gf_false;
- LOCK(&inode->lock);
-
ctx = __ec_inode_get(inode, fop->xl);
if (ctx == NULL) {
- goto unlock;
+ goto out;
}
/* Normal fops always have ctx->have_size set. However self-heal calls this
@@ -1368,8 +1418,21 @@ gf_boolean_t ec_set_inode_size(ec_fop_data_t *fop, inode_t *inode,
found = _gf_true;
-unlock:
- UNLOCK(&inode->lock);
+out:
+ return found;
+}
+
+gf_boolean_t
+ec_set_inode_size(ec_fop_data_t *fop, inode_t *inode,
+ uint64_t size)
+{
+ gf_boolean_t found = _gf_false;
+
+ LOCK (&inode->lock);
+ {
+ found = __ec_set_inode_size (fop, inode, size);
+ }
+ UNLOCK (&inode->lock);
return found;
}
@@ -1476,34 +1539,47 @@ ec_lock_update_fd(ec_lock_t *lock, ec_fop_data_t *fop)
}
}
+static gf_boolean_t
+ec_link_has_lock_conflict (ec_lock_link_t *link, struct list_head *owners)
+{
+ ec_lock_link_t *owner_link = NULL;
+ ec_t *ec = link->fop->xl->private;
+
+ if (!ec->parallel_writes)
+ return _gf_true;
+
+ list_for_each_entry (owner_link, owners, owner_list) {
+ if (ec_lock_conflict (owner_link, link))
+ return _gf_true;
+ }
+ return _gf_false;
+}
+
static void
ec_lock_wake_shared(ec_lock_t *lock, struct list_head *list)
{
ec_fop_data_t *fop;
ec_lock_link_t *link;
- gf_boolean_t exclusive = _gf_false;
+ gf_boolean_t conflict = _gf_false;
- while (!exclusive && !list_empty(&lock->waiting)) {
+ while (!conflict && !list_empty(&lock->waiting)) {
link = list_entry(lock->waiting.next, ec_lock_link_t, wait_list);
fop = link->fop;
/* If lock is not acquired, at most one fop can be assigned as owner.
* The following fops will need to wait in the lock->waiting queue
* until the lock has been fully acquired. */
- exclusive = !lock->acquired;
+ conflict = !lock->acquired;
/* If the fop is not shareable, only this fop can be assigned as owner.
* Other fops will need to wait until this one finishes. */
- if ((fop->flags & EC_FLAG_LOCK_SHARED) == 0) {
- exclusive = _gf_true;
-
- /* Avoid other requests to be assigned as owners. */
- lock->exclusive = 1;
+ if (ec_link_has_lock_conflict (link, &lock->owners)) {
+ conflict = _gf_true;
}
/* If only one fop is allowed, it can be assigned as the owner of the
* lock only if there weren't any other owner. */
- if (exclusive && !list_empty(&lock->owners)) {
+ if (conflict && !list_empty(&lock->owners)) {
break;
}
@@ -1570,9 +1646,7 @@ void ec_lock_acquired(ec_lock_link_t *link)
lock->acquired = _gf_true;
ec_lock_update_fd(lock, fop);
- if ((fop->flags & EC_FLAG_LOCK_SHARED) != 0) {
- ec_lock_wake_shared(lock, &list);
- }
+ ec_lock_wake_shared(lock, &list);
UNLOCK(&lock->loc.inode->lock);
@@ -1683,11 +1757,11 @@ ec_lock_assign_owner(ec_lock_link_t *link)
/* We are trying to acquire a lock that has an unlock timer active.
* This means that the lock must be idle, i.e. no fop can be in the
* owner, waiting or frozen lists. It also means that the lock cannot
- * have been marked as being released (this is done without timers)
- * and it must not be exclusive. There should only be one owner
- * reference, but it's possible that some fops are being prepared to
- * use this lock. */
- GF_ASSERT ((lock->exclusive == 0) && (lock->refs_owners == 1) &&
+ * have been marked as being released (this is done without timers).
+ * There should only be one owner reference, but it's possible that
+ * some fops are being prepared to use this lock.
+ */
+ GF_ASSERT ((lock->refs_owners == 1) &&
list_empty(&lock->owners) && list_empty(&lock->waiting));
/* We take the timer_link before cancelling the timer, since a
@@ -1735,13 +1809,15 @@ ec_lock_assign_owner(ec_lock_link_t *link)
lock->timer = NULL;
}
- lock->exclusive |= (fop->flags & EC_FLAG_LOCK_SHARED) == 0;
-
if (!list_empty(&lock->owners)) {
/* There are other owners of this lock. We can only take ownership if
- * the lock is already acquired and can be shared. Otherwise we need
- * to wait. */
- if (!lock->acquired || (lock->exclusive != 0)) {
+ * the lock is already acquired and doesn't have conflict with existing
+ * owners, or waiters(to prevent starvation).
+ * Otherwise we need to wait.
+ */
+ if (!lock->acquired ||
+ ec_link_has_lock_conflict (link, &lock->owners) ||
+ ec_link_has_lock_conflict (link, &lock->waiting)) {
ec_trace("LOCK_QUEUE_WAIT", fop, "lock=%p", lock);
list_add_tail(&link->wait_list, &lock->waiting);
@@ -1819,10 +1895,7 @@ ec_lock_next_owner(ec_lock_link_t *link, ec_cbk_data_t *cbk,
}
ec_lock_update_good(lock, fop);
- lock->exclusive -= (fop->flags & EC_FLAG_LOCK_SHARED) == 0;
- if (list_empty(&lock->owners)) {
- ec_lock_wake_shared(lock, &list);
- }
+ ec_lock_wake_shared(lock, &list);
UNLOCK(&lock->loc.inode->lock);
@@ -1876,11 +1949,11 @@ ec_lock_unfreeze(ec_lock_link_t *link)
lock->acquired = _gf_false;
/* We are unfreezing a lock. This means that the lock has already been
- * released. In this state it shouldn't be exclusive nor have a pending
- * timer nor have any owner, and the waiting list should be empty. Only
- * the frozen list can contain some fop. */
- GF_ASSERT((lock->exclusive == 0) && (lock->timer == NULL) &&
- list_empty(&lock->waiting) && list_empty(&lock->owners));
+ * released. In this state it shouldn't have a pending timer nor have any
+ * owner, and the waiting list should be empty. Only the frozen list can
+ * contain some fop. */
+ GF_ASSERT((lock->timer == NULL) && list_empty(&lock->waiting) &&
+ list_empty(&lock->owners));
/* We move all frozen fops to the waiting list. */
list_splice_init(&lock->frozen, &lock->waiting);
@@ -2013,7 +2086,7 @@ ec_update_size_version(ec_lock_link_t *link, uint64_t *version,
ec_fop_data_t *fop;
ec_lock_t *lock;
ec_inode_t *ctx;
- dict_t * dict;
+ dict_t *dict = NULL;
uintptr_t update_on = 0;
int32_t err = -ENOMEM;
@@ -2203,12 +2276,12 @@ ec_unlock_timer_del(ec_lock_link_t *link)
ec_trace("UNLOCK_DELAYED", link->fop, "lock=%p", lock);
/* The unlock timer has expired without anyone cancelling it.
- * This means that it shouldn't have any owner, and the
- * waiting and frozen lists should be empty. It shouldn't have
- * been marked as release nor be exclusive either. It must have
- * only one owner reference, but there can be fops being
- * prepared though. */
- GF_ASSERT(!lock->release && (lock->exclusive == 0) &&
+ * This means that it shouldn't have any owner, and the waiting
+ * and frozen lists should be empty. It must have only one
+ * owner reference, but there can be fops being prepared
+ * though.
+ * */
+ GF_ASSERT(!lock->release &&
(lock->refs_owners == 1) &&
list_empty(&lock->owners) &&
list_empty(&lock->waiting) &&