summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/ec/src
diff options
context:
space:
mode:
authorKrutika Dhananjay <kdhananj@redhat.com>2016-07-28 21:29:59 +0530
committerPranith Kumar Karampuri <pkarampu@redhat.com>2016-08-22 03:05:08 -0700
commitfebaa1e46d3a91a29c4786a17abf29cfc7178254 (patch)
tree0fe52522cb3bfe318d9032243283f3ab6751ec9e /xlators/cluster/ec/src
parent888ad44a9da3006b3e5695e5e5b40d6e446aa109 (diff)
cluster/afr: Prevent split-brain when bricks are brought off and on in cyclic order
Backport of: http://review.gluster.org/15080 When the bricks are brought offline and then online in cyclic order while writes are in progress on a file, thanks to inode refresh in write txns, AFR will mostly fail the write attempt when the only good copy is offline. However, there is still a remote possibility that the file will run into split-brain if the brick that has the lone good copy goes offline *after* the inode refresh but *before* the write txn completes (I call it in-flight split-brain in the patch for ease of reference), requiring intervention from admin to resolve the split-brain before the IO can resume normally on the file. To get around this, the patch does the following things: i) retains the dirty xattrs on the file ii) avoids marking the last of the good copies as bad (or accused) in case it is the one to go down during the course of a write. iii) fails that particular write with the appropriate errno. This way, we still have one good copy left despite the split-brain situation which when it is back online, will be chosen as source to do the heal. Change-Id: I7c13c6ddd5b8fe88b0f2684e8ce5f4a9c3a24a08 BUG: 1367270 Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com> Reviewed-on: http://review.gluster.org/15222 Smoke: Gluster Build System <jenkins@build.gluster.org> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Oleksandr Natalenko <oleksandr@natalenko.name> Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Diffstat (limited to 'xlators/cluster/ec/src')
-rw-r--r--xlators/cluster/ec/src/ec-common.c16
-rw-r--r--xlators/cluster/ec/src/ec-helpers.c21
-rw-r--r--xlators/cluster/ec/src/ec-helpers.h2
-rw-r--r--xlators/cluster/ec/src/ec-locks.c2
4 files changed, 9 insertions, 32 deletions
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
index 43e7fcf72dc..7089f61e6be 100644
--- a/xlators/cluster/ec/src/ec-common.c
+++ b/xlators/cluster/ec/src/ec-common.c
@@ -74,8 +74,8 @@ int32_t ec_heal_report(call_frame_t * frame, void * cookie, xlator_t * this,
gf_msg (this->name, GF_LOG_INFO, 0,
EC_MSG_HEAL_SUCCESS, "Heal succeeded on %d/%d "
"subvolumes",
- ec_bits_count(mask & ~(good | bad)),
- ec_bits_count(mask & ~good));
+ gf_bits_count(mask & ~(good | bad)),
+ gf_bits_count(mask & ~good));
}
}
@@ -333,7 +333,7 @@ void ec_complete(ec_fop_data_t * fop)
if (fop->answer == NULL) {
if (!list_empty(&fop->cbk_list)) {
cbk = list_entry(fop->cbk_list.next, ec_cbk_data_t, list);
- healing_count = ec_bits_count (cbk->mask & fop->healing);
+ healing_count = gf_bits_count (cbk->mask & fop->healing);
/* fop shouldn't be treated as success if it is not
* successful on at least fop->minimum good copies*/
if ((cbk->count - healing_count) >= fop->minimum) {
@@ -424,7 +424,7 @@ int32_t ec_child_select(ec_fop_data_t * fop)
switch (fop->minimum)
{
case EC_MINIMUM_ALL:
- fop->minimum = ec_bits_count(fop->mask);
+ fop->minimum = gf_bits_count(fop->mask);
if (fop->minimum >= ec->fragments)
{
break;
@@ -451,7 +451,7 @@ int32_t ec_child_select(ec_fop_data_t * fop)
ec_trace("SELECT", fop, "");
- num = ec_bits_count(fop->mask);
+ num = gf_bits_count(fop->mask);
if ((num < fop->minimum) && (num < ec->fragments))
{
gf_msg (ec->xl->name, GF_LOG_ERROR, 0,
@@ -500,7 +500,7 @@ void ec_dispatch_mask(ec_fop_data_t * fop, uintptr_t mask)
ec_t * ec = fop->xl->private;
int32_t count, idx;
- count = ec_bits_count(mask);
+ count = gf_bits_count(mask);
LOCK(&fop->lock);
@@ -578,7 +578,7 @@ void ec_dispatch_inc(ec_fop_data_t * fop)
if (ec_child_select(fop))
{
- fop->expected = ec_bits_count(fop->remaining);
+ fop->expected = gf_bits_count(fop->remaining);
fop->first = 0;
ec_dispatch_next(fop, 0);
@@ -591,7 +591,7 @@ ec_dispatch_all (ec_fop_data_t *fop)
ec_dispatch_start(fop);
if (ec_child_select(fop)) {
- fop->expected = ec_bits_count(fop->remaining);
+ fop->expected = gf_bits_count(fop->remaining);
fop->first = 0;
ec_dispatch_mask(fop, fop->remaining);
diff --git a/xlators/cluster/ec/src/ec-helpers.c b/xlators/cluster/ec/src/ec-helpers.c
index 28641cec5f7..612febe969e 100644
--- a/xlators/cluster/ec/src/ec-helpers.c
+++ b/xlators/cluster/ec/src/ec-helpers.c
@@ -17,10 +17,6 @@
#include "ec-helpers.h"
#include "ec-messages.h"
-#ifndef ffsll
-#define ffsll(x) __builtin_ffsll(x)
-#endif
-
static const char * ec_fop_list[] =
{
[-EC_FOP_HEAL] = "HEAL"
@@ -96,23 +92,6 @@ void ec_trace(const char * event, ec_fop_data_t * fop, const char * fmt, ...)
}
}
-int32_t ec_bits_count(uint64_t n)
-{
- n -= (n >> 1) & 0x5555555555555555ULL;
- n = ((n >> 2) & 0x3333333333333333ULL) + (n & 0x3333333333333333ULL);
- n = (n + (n >> 4)) & 0x0F0F0F0F0F0F0F0FULL;
- n += n >> 8;
- n += n >> 16;
- n += n >> 32;
-
- return n & 0xFF;
-}
-
-int32_t ec_bits_index(uint64_t n)
-{
- return ffsll(n) - 1;
-}
-
int32_t ec_bits_consume(uint64_t * n)
{
uint64_t tmp;
diff --git a/xlators/cluster/ec/src/ec-helpers.h b/xlators/cluster/ec/src/ec-helpers.h
index 1f39da2c09f..93d77726089 100644
--- a/xlators/cluster/ec/src/ec-helpers.h
+++ b/xlators/cluster/ec/src/ec-helpers.h
@@ -16,8 +16,6 @@
const char * ec_bin(char * str, size_t size, uint64_t value, int32_t digits);
const char * ec_fop_name(int32_t id);
void ec_trace(const char * event, ec_fop_data_t * fop, const char * fmt, ...);
-int32_t ec_bits_count(uint64_t n);
-int32_t ec_bits_index(uint64_t n);
int32_t ec_bits_consume(uint64_t * n);
size_t ec_iov_copy_to(void * dst, struct iovec * vector, int32_t count,
off_t offset, size_t size);
diff --git a/xlators/cluster/ec/src/ec-locks.c b/xlators/cluster/ec/src/ec-locks.c
index 0253b51bf5e..ed835f1aadc 100644
--- a/xlators/cluster/ec/src/ec-locks.c
+++ b/xlators/cluster/ec/src/ec-locks.c
@@ -52,7 +52,7 @@ int32_t ec_lock_check(ec_fop_data_t *fop, uintptr_t *mask)
}
if (error == -1) {
- if (ec_bits_count(locked | notlocked) >= ec->fragments) {
+ if (gf_bits_count(locked | notlocked) >= ec->fragments) {
if (notlocked == 0) {
if (fop->answer == NULL) {
fop->answer = cbk;