summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorXavi Hernandez <xhernandez@redhat.com>2018-05-15 11:37:16 +0200
committerShyamsundar Ranganathan <srangana@redhat.com>2018-05-25 02:06:11 +0000
commit16b3830c337acacc1155c6d463c3ced852603e12 (patch)
tree05fbec45fced749955cfb223220c517b2dde0066
parentd8b7ec6b4176981cf8b4fe4fae4da2ca5fca4bdf (diff)
cluster/ec: Fix pre-op xattrop management
Multiple pre-op xattrop can be simultaneously being processed. On the cbk it was checked if the fop was waiting for some specific data (like size and version) and, if so, it was assumed that this answer should contain that data. This is not true, since a fop can be waiting for some data, but it may come from the xattrop of another fop. This patch differentiates between needing some information and providing it. This is related to parallel writes. Disabling them fixed the problem, but also prevented concurrent reads. A change has been made so that disabling parallel writes still allows parallel reads. Backport of: > BUG: 1578325 Fixes: bz#1582056 Change-Id: I74772ad6b80b7b37805da93d5ec3ae099e96b041 Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
-rw-r--r--xlators/cluster/ec/src/ec-common.c69
-rw-r--r--xlators/cluster/ec/src/ec-common.h28
-rw-r--r--xlators/cluster/ec/src/ec.c1
3 files changed, 66 insertions, 32 deletions
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
index 27ea5c9..e3e3481 100644
--- a/xlators/cluster/ec/src/ec-common.c
+++ b/xlators/cluster/ec/src/ec-common.c
@@ -22,9 +22,6 @@
#include "ec-messages.h"
#define EC_INVALID_INDEX UINT32_MAX
-#define EC_XATTROP_ALL_WAITING_FLAGS (EC_FLAG_WAITING_XATTROP |\
- EC_FLAG_WAITING_DATA_DIRTY |\
- EC_FLAG_WAITING_METADATA_DIRTY)
void
ec_update_fd_status (fd_t *fd, xlator_t *xl, int idx,
@@ -161,6 +158,8 @@ ec_is_range_conflict (ec_lock_link_t *l1, ec_lock_link_t *l2)
static gf_boolean_t
ec_lock_conflict (ec_lock_link_t *l1, ec_lock_link_t *l2)
{
+ ec_t *ec = l1->fop->xl->private;
+
/* Fops like access/stat won't have to worry what the other fops are
* modifying as the fop is wound only to one brick. So it can be
* executed in parallel*/
@@ -172,6 +171,10 @@ ec_lock_conflict (ec_lock_link_t *l1, ec_lock_link_t *l2)
(l2->fop->flags & EC_FLAG_LOCK_SHARED))
return _gf_false;
+ if (!ec->parallel_writes) {
+ return _gf_true;
+ }
+
return ec_is_range_conflict (l1, l2);
}
@@ -1130,7 +1133,7 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie,
ec_lock_t *lock = NULL;
ec_inode_t *ctx;
gf_boolean_t release = _gf_false;
- uint64_t waiting_flags = 0;
+ uint64_t provided_flags = 0;
uint64_t dirty[EC_VERSION_SIZE] = {0, 0};
lock = parent_link->lock;
@@ -1138,14 +1141,14 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie,
ctx = lock->ctx;
INIT_LIST_HEAD(&list);
- waiting_flags = parent_link->waiting_flags & EC_XATTROP_ALL_WAITING_FLAGS;
+ provided_flags = EC_PROVIDED_FLAGS(parent_link->waiting_flags);
LOCK(&lock->loc.inode->lock);
list_for_each_entry(link, &lock->owners, owner_list) {
- if ((link->waiting_flags & waiting_flags) != 0) {
- link->waiting_flags ^= (link->waiting_flags & waiting_flags);
- if ((link->waiting_flags & EC_XATTROP_ALL_WAITING_FLAGS) == 0)
+ if ((link->waiting_flags & provided_flags) != 0) {
+ link->waiting_flags ^= (link->waiting_flags & provided_flags);
+ if (EC_NEEDED_FLAGS(link->waiting_flags) == 0)
list_add_tail(&link->fop->cbk_list, &list);
}
}
@@ -1158,7 +1161,7 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie,
goto unlock;
}
- if (waiting_flags & EC_FLAG_WAITING_XATTROP) {
+ if (EC_FLAGS_HAVE(provided_flags, EC_FLAG_XATTROP)) {
op_errno = -ec_dict_del_array(dict, EC_XATTR_VERSION,
ctx->pre_version,
EC_VERSION_SIZE);
@@ -1219,20 +1222,20 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie,
ec_set_dirty_flag (fop->data, ctx, dirty);
if (dirty[EC_METADATA_TXN] &&
- (waiting_flags & EC_FLAG_WAITING_METADATA_DIRTY)) {
+ (EC_FLAGS_HAVE(provided_flags, EC_FLAG_METADATA_DIRTY))) {
GF_ASSERT (!ctx->dirty[EC_METADATA_TXN]);
ctx->dirty[EC_METADATA_TXN] = 1;
}
if (dirty[EC_DATA_TXN] &&
- (waiting_flags & EC_FLAG_WAITING_DATA_DIRTY)) {
+ (EC_FLAGS_HAVE(provided_flags, EC_FLAG_DATA_DIRTY))) {
GF_ASSERT (!ctx->dirty[EC_DATA_TXN]);
ctx->dirty[EC_DATA_TXN] = 1;
}
op_errno = 0;
unlock:
- lock->waiting_flags ^= waiting_flags;
+ lock->waiting_flags ^= provided_flags;
if (op_errno == 0) {
/* If the fop fails on any of the good bricks, it is important to mark
@@ -1279,6 +1282,24 @@ unlock:
return 0;
}
+static gf_boolean_t
+ec_set_needed_flag(ec_lock_t *lock, ec_lock_link_t *link, uint64_t flag)
+{
+ uint64_t current;
+
+ link->waiting_flags |= EC_FLAG_NEEDS(flag);
+
+ current = EC_NEEDED_FLAGS(lock->waiting_flags);
+ if (!EC_FLAGS_HAVE(current, flag)) {
+ lock->waiting_flags |= EC_FLAG_NEEDS(flag);
+ link->waiting_flags |= EC_FLAG_PROVIDES(flag);
+
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
+
static uint64_t
ec_set_xattrop_flags_and_params (ec_lock_t *lock, ec_lock_link_t *link,
uint64_t *dirty)
@@ -1287,31 +1308,25 @@ ec_set_xattrop_flags_and_params (ec_lock_t *lock, ec_lock_link_t *link,
uint64_t newflags = 0;
ec_inode_t *ctx = lock->ctx;
- oldflags = lock->waiting_flags & EC_XATTROP_ALL_WAITING_FLAGS;
+ oldflags = EC_NEEDED_FLAGS(lock->waiting_flags);
if (lock->query && !ctx->have_info) {
- lock->waiting_flags |= EC_FLAG_WAITING_XATTROP;
- link->waiting_flags |= EC_FLAG_WAITING_XATTROP;
+ ec_set_needed_flag(lock, link, EC_FLAG_XATTROP);
}
if (dirty[EC_DATA_TXN]) {
- if (oldflags & EC_FLAG_WAITING_DATA_DIRTY) {
+ if (!ec_set_needed_flag(lock, link, EC_FLAG_DATA_DIRTY)) {
dirty[EC_DATA_TXN] = 0;
- } else {
- lock->waiting_flags |= EC_FLAG_WAITING_DATA_DIRTY;
}
- link->waiting_flags |= EC_FLAG_WAITING_DATA_DIRTY;
}
if (dirty[EC_METADATA_TXN]) {
- if (oldflags & EC_FLAG_WAITING_METADATA_DIRTY) {
+ if (!ec_set_needed_flag(lock, link, EC_FLAG_METADATA_DIRTY)) {
dirty[EC_METADATA_TXN] = 0;
- } else {
- lock->waiting_flags |= EC_FLAG_WAITING_METADATA_DIRTY;
}
- link->waiting_flags |= EC_FLAG_WAITING_METADATA_DIRTY;
}
- newflags = lock->waiting_flags & EC_XATTROP_ALL_WAITING_FLAGS;
+ newflags = EC_NEEDED_FLAGS(lock->waiting_flags);
+
return oldflags ^ newflags;
}
@@ -1381,7 +1396,7 @@ void ec_get_size_version(ec_lock_link_t *link)
goto out;
}
- if (changed_flags & EC_FLAG_WAITING_XATTROP) {
+ if (EC_FLAGS_HAVE(changed_flags, EC_FLAG_XATTROP)) {
/* Once we know that an xattrop will be needed,
* we try to get all available information in a
* single call. */
@@ -1676,10 +1691,6 @@ static gf_boolean_t
ec_link_has_lock_conflict (ec_lock_link_t *link, gf_boolean_t waitlist_check)
{
ec_lock_link_t *trav_link = NULL;
- ec_t *ec = link->fop->xl->private;
-
- if (!ec->parallel_writes)
- return _gf_true;
list_for_each_entry (trav_link, &link->lock->owners, owner_list) {
if (ec_lock_conflict (trav_link, link))
diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h
index 99e2f06..372be52 100644
--- a/xlators/cluster/ec/src/ec-common.h
+++ b/xlators/cluster/ec/src/ec-common.h
@@ -29,9 +29,31 @@ typedef enum {
#define EC_FLAG_LOCK_SHARED 0x0001
-#define EC_FLAG_WAITING_XATTROP 0x0001
-#define EC_FLAG_WAITING_DATA_DIRTY 0x0002
-#define EC_FLAG_WAITING_METADATA_DIRTY 0x0004
+enum _ec_xattrop_flags {
+ EC_FLAG_XATTROP,
+ EC_FLAG_DATA_DIRTY,
+ EC_FLAG_METADATA_DIRTY,
+
+ /* Add any new flag here, before EC_FLAG_MAX. The maximum number of
+ * flags that can be defined is 16. */
+
+ EC_FLAG_MAX
+};
+
+/* We keep two sets of flags. One to determine what's really providing the
+ * currect xattrop and the other to know what the parent fop of the xattrop
+ * needs to proceed. It might happen that a fop needs some information that
+ * is being already requested by a previous fop. The two sets are stored
+ * contiguously. */
+
+#define EC_FLAG_NEEDS(_flag) (1 << (_flag))
+#define EC_FLAG_PROVIDES(_flag) (1 << ((_flag) + EC_FLAG_MAX))
+
+#define EC_NEEDED_FLAGS(_flags) ((_flags) & ((1 << EC_FLAG_MAX) - 1))
+
+#define EC_PROVIDED_FLAGS(_flags) EC_NEEDED_FLAGS((_flags) >> EC_FLAG_MAX)
+
+#define EC_FLAGS_HAVE(_flags, _flag) (((_flags) & (1 << (_flag))) != 0)
#define EC_SELFHEAL_BIT 62
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
index ac7e849..31e9c0e 100644
--- a/xlators/cluster/ec/src/ec.c
+++ b/xlators/cluster/ec/src/ec.c
@@ -1384,6 +1384,7 @@ int32_t ec_dump_private(xlator_t *this)
gf_proc_dump_write("healers", "%d", ec->healers);
gf_proc_dump_write("heal-waiters", "%d", ec->heal_waiters);
gf_proc_dump_write("read-policy", "%s", ec_read_policies[ec->read_policy]);
+ gf_proc_dump_write("parallel-writes", "%d", ec->parallel_writes);
snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s.stats.stripe_cache",
this->type, this->name);