summaryrefslogtreecommitdiffstats
path: root/xlators
diff options
context:
space:
mode:
authorJeff Darcy <jdarcy@redhat.com>2014-03-31 18:37:38 +0000
committerJeff Darcy <jdarcy@redhat.com>2014-04-22 15:20:46 +0000
commit46d333783a968ab39e0beade9c7a1eec8035f8b1 (patch)
treefaf1db8cb7ea7fefb0a4d8374440fa095116fef1 /xlators
parentacd2292f085b15c2c5c28169d11f20dca90f5ec9 (diff)
nsr: add quorum enforcement
Change-Id: I0241f8c1ac97c80ae438e3d9f1ac492d63da9347 Signed-off-by: Jeff Darcy <jdarcy@redhat.com>
Diffstat (limited to 'xlators')
-rw-r--r--xlators/cluster/nsr-server/src/all-templates.c25
-rw-r--r--xlators/cluster/nsr-server/src/nsr-internal.h1
-rw-r--r--xlators/cluster/nsr-server/src/nsr.c8
-rw-r--r--xlators/cluster/nsr-server/src/recon_notify.c10
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-set.c6
5 files changed, 48 insertions, 2 deletions
diff --git a/xlators/cluster/nsr-server/src/all-templates.c b/xlators/cluster/nsr-server/src/all-templates.c
index 2f0509a6c..fa29de7b2 100644
--- a/xlators/cluster/nsr-server/src/all-templates.c
+++ b/xlators/cluster/nsr-server/src/all-templates.c
@@ -59,6 +59,31 @@ nsr_$NAME$ (call_frame_t *frame, xlator_t *this,
int from_leader;
int from_recon;
uint32_t ti = 0;
+ double must_be_up;
+ double are_up;
+
+ /*
+ * Our first goal here is to avoid "split brain surprise" for users who
+ * specify exactly 50% with two- or three-way replication. That means
+ * either a more-than check against half the total replicas or an
+ * at-least check against half of our peers (one less). Of the two,
+ * only an at-least check supports the intuitive use of 100% to mean
+ * all replicas must be present, because "more than 100%" will never
+ * succeed regardless of which count we use. This leaves us with a
+ * slightly non-traditional definition of quorum ("at least X% of peers
+ * not including ourselves") but one that's useful enough to be worth
+ * it.
+ *
+ * Note that n_children and up_children *do* include the local
+ * subvolume, so we need to subtract one in each case.
+ */
+ must_be_up = ((double)(priv->n_children - 1)) * priv->quorum_pct;
+ are_up = ((double)(priv->up_children - 1)) * 100.0;
+ if (are_up < must_be_up) {
+ /* Emulate the AFR client-side-quorum behavior. */
+ op_errno = EROFS;
+ goto err;
+ }
local = mem_get0(this->local_pool);
if (!local) {
diff --git a/xlators/cluster/nsr-server/src/nsr-internal.h b/xlators/cluster/nsr-server/src/nsr-internal.h
index fc612c136..72b61bfa5 100644
--- a/xlators/cluster/nsr-server/src/nsr-internal.h
+++ b/xlators/cluster/nsr-server/src/nsr-internal.h
@@ -59,6 +59,7 @@ typedef struct {
volatile uint32_t ops_in_flight;
uint32_t index;
gf_lock_t index_lock;
+ double quorum_pct;
} nsr_private_t;
typedef struct {
diff --git a/xlators/cluster/nsr-server/src/nsr.c b/xlators/cluster/nsr-server/src/nsr.c
index eda9e555a..85eba09b5 100644
--- a/xlators/cluster/nsr-server/src/nsr.c
+++ b/xlators/cluster/nsr-server/src/nsr.c
@@ -591,6 +591,9 @@ nsr_init (xlator_t *this)
goto err;
}
+
+ GF_OPTION_INIT ("quorum-percent", priv->quorum_pct, percent, err);
+
GF_OPTION_INIT ("subvol-uuid", priv->subvol_uuid, str, err);
gf_log (this->name, GF_LOG_INFO, "subvol_uuid = %s", priv->subvol_uuid);
if (gf_asprintf(&priv->leader_key,"%s:leader",priv->subvol_uuid) <= 0) {
@@ -800,5 +803,10 @@ struct volume_options options[] = {
.type = GF_OPTION_TYPE_STR,
.description = "UUID for this NSR (sub)volume"
},
+ { .key = {"quorum-percent"},
+ .type = GF_OPTION_TYPE_PERCENT,
+ .default_value = "50.0",
+ .description = "percentage of rep_count-1 that must be up"
+ },
{ .key = {NULL} },
};
diff --git a/xlators/cluster/nsr-server/src/recon_notify.c b/xlators/cluster/nsr-server/src/recon_notify.c
index 24f7cf2de..1c50de234 100644
--- a/xlators/cluster/nsr-server/src/recon_notify.c
+++ b/xlators/cluster/nsr-server/src/recon_notify.c
@@ -91,8 +91,14 @@ nsr_recon_set_leader (xlator_t *this)
if (ctx->last_reconciled_term == priv->current_term)
return;
- // No majority as of yet
- if (priv->up_children <= (priv->n_children / 2))
+ /*
+ * Quorum for reconciliation is not the same as quorum for I/O. Here,
+ * we require a true majority. The +1 is because we don't count
+ * ourselves as part of n_children or up_children.
+ *
+ * TBD: re-evaluate when to reconcile (including partial)
+ */
+ if (priv->up_children <= (priv->n_children / 2))
return;
gf_log (this->name, GF_LOG_INFO,
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index 39bbe0a13..24a6ed7cd 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -889,6 +889,12 @@ struct volopt_map_entry glusterd_volopt_map[] = {
.description = "enable NSR reconciliation",
.flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT
},
+ { .key = "cluster.nsr.quorum-percent",
+ .voltype = "cluster/nsr",
+ .option = "quorum-percent",
+ .op_version = 3,
+ .description = "percent of rep_count-1 bricks that must be up"
+ },
/* Performance xlators enable/disbable options */
{ .key = "performance.write-behind",