summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJeff Darcy <jdarcy@redhat.com>2011-09-20 09:49:57 -0400
committerVijay Bellur <vijay@gluster.com>2011-11-20 07:34:31 -0800
commit76d5e5d5b51eb2ffe5a0608bf8869650bb76585f (patch)
treed349474b5f4eebe00973c05df29b9ebf5715d889
parente6a3902410aaa8756e9c07302d88399fb99641c7 (diff)
Add quorum checks to avoid split-brain.
Change-Id: I2f123ef93989862aa796903a45682981d5d7fc3c BUG: 3533 Reviewed-on: http://review.gluster.com/473 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Vijay Bellur <vijay@gluster.com>
-rw-r--r--xlators/cluster/afr/src/afr-common.c32
-rw-r--r--xlators/cluster/afr/src/afr-dir-write.c17
-rw-r--r--xlators/cluster/afr/src/afr-inode-write.c14
-rw-r--r--xlators/cluster/afr/src/afr-open.c4
-rw-r--r--xlators/cluster/afr/src/afr.c6
-rw-r--r--xlators/cluster/afr/src/afr.h18
6 files changed, 90 insertions, 1 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index e25ff2353..01a092e25 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -3872,3 +3872,35 @@ afr_child_fd_ctx_set (xlator_t *this, fd_t *fd, int32_t child,
out:
return ret;
}
+
+gf_boolean_t
+afr_have_quorum (char *logname, afr_private_t *priv)
+{
+ unsigned int quorum = 0;
+
+ GF_VALIDATE_OR_GOTO(logname,priv,out);
+
+ quorum = priv->child_count / 2 + 1;
+ if (priv->up_count >= (priv->down_count + quorum)) {
+ return _gf_true;
+ }
+
+ /*
+ * Special case for even numbers of nodes: if we have exactly half
+ * and that includes the first ("senior-most") node, then that counts
+ * as quorum even if it wouldn't otherwise. This supports e.g. N=2
+ * while preserving the critical property that there can only be one
+ * such group.
+ */
+ if ((priv->child_count % 2) == 0) {
+ quorum = priv->child_count / 2;
+ if (priv->up_count >= (priv->down_count + quorum)) {
+ if (priv->child_up[0]) {
+ return _gf_true;
+ }
+ }
+ }
+
+out:
+ return _gf_false;
+}
diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
index ca669b684..11df550d5 100644
--- a/xlators/cluster/afr/src/afr-dir-write.c
+++ b/xlators/cluster/afr/src/afr-dir-write.c
@@ -47,7 +47,6 @@
#include "afr.h"
#include "afr-transaction.h"
-
void
afr_build_parent_loc (loc_t *parent, loc_t *child)
{
@@ -280,6 +279,8 @@ afr_create (call_frame_t *frame, xlator_t *this,
priv = this->private;
+ QUORUM_CHECK(create,out);
+
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
goto out;
@@ -503,6 +504,8 @@ afr_mknod (call_frame_t *frame, xlator_t *this,
priv = this->private;
+ QUORUM_CHECK(mknod,out);
+
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
goto out;
@@ -727,6 +730,8 @@ afr_mkdir (call_frame_t *frame, xlator_t *this,
priv = this->private;
+ QUORUM_CHECK(mkdir,out);
+
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
goto out;
@@ -950,6 +955,8 @@ afr_link (call_frame_t *frame, xlator_t *this,
priv = this->private;
+ QUORUM_CHECK(link,out);
+
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
goto out;
@@ -1172,6 +1179,8 @@ afr_symlink (call_frame_t *frame, xlator_t *this,
priv = this->private;
+ QUORUM_CHECK(symlink,out);
+
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
goto out;
@@ -1388,6 +1397,8 @@ afr_rename (call_frame_t *frame, xlator_t *this,
priv = this->private;
+ QUORUM_CHECK(rename,out);
+
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
goto out;
@@ -1585,6 +1596,8 @@ afr_unlink (call_frame_t *frame, xlator_t *this,
priv = this->private;
+ QUORUM_CHECK(unlink,out);
+
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
goto out;
@@ -1779,6 +1792,8 @@ afr_rmdir (call_frame_t *frame, xlator_t *this,
priv = this->private;
+ QUORUM_CHECK(rmdir,out);
+
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
goto out;
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
index 3f78c7b36..4135ba947 100644
--- a/xlators/cluster/afr/src/afr-inode-write.c
+++ b/xlators/cluster/afr/src/afr-inode-write.c
@@ -449,6 +449,8 @@ afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
priv = this->private;
+ QUORUM_CHECK(writev,out);
+
ALLOC_OR_GOTO (local, afr_local_t, out);
ret = AFR_LOCAL_INIT (local, priv);
@@ -647,6 +649,8 @@ afr_truncate (call_frame_t *frame, xlator_t *this,
priv = this->private;
+ QUORUM_CHECK(truncate,out);
+
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
goto out;
@@ -896,6 +900,8 @@ afr_ftruncate (call_frame_t *frame, xlator_t *this,
priv = this->private;
+ QUORUM_CHECK(ftruncate,out);
+
ALLOC_OR_GOTO (local, afr_local_t, out);
ret = AFR_LOCAL_INIT (local, priv);
@@ -1093,6 +1099,8 @@ afr_setattr (call_frame_t *frame, xlator_t *this,
priv = this->private;
+ QUORUM_CHECK(setattr,out);
+
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
goto out;
@@ -1298,6 +1306,8 @@ afr_fsetattr (call_frame_t *frame, xlator_t *this,
priv = this->private;
+ QUORUM_CHECK(fsetattr,out);
+
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
op_errno = ENOMEM;
@@ -1487,6 +1497,8 @@ afr_setxattr (call_frame_t *frame, xlator_t *this,
priv = this->private;
+ QUORUM_CHECK(setxattr,out);
+
ALLOC_OR_GOTO (local, afr_local_t, out);
ret = AFR_LOCAL_INIT (local, priv);
@@ -1671,6 +1683,8 @@ afr_removexattr (call_frame_t *frame, xlator_t *this,
priv = this->private;
+ QUORUM_CHECK(removexattr,out);
+
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
goto out;
diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c
index 646d23ccb..e19847b0b 100644
--- a/xlators/cluster/afr/src/afr-open.c
+++ b/xlators/cluster/afr/src/afr-open.c
@@ -214,6 +214,10 @@ afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
priv = this->private;
+ if (flags & (O_CREAT|O_TRUNC)) {
+ QUORUM_CHECK(open,out);
+ }
+
if (afr_is_split_brain (this, loc->inode)) {
/* self-heal failed */
gf_log (this->name, GF_LOG_WARNING,
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index 30da3fc72..7791ec86b 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -260,6 +260,8 @@ init (xlator_t *this)
GF_OPTION_INIT ("strict-readdir", priv->strict_readdir, bool, out);
+ GF_OPTION_INIT ("enforce-quorum", priv->enforce_quorum, bool, out);
+
priv->wait_count = 1;
child_count = xlator_subvolume_count (this);
@@ -490,5 +492,9 @@ struct volume_options options[] = {
.type = GF_OPTION_TYPE_BOOL,
.default_value = "off",
},
+ { .key = {"enforce-quorum"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ },
{ .key = {NULL} },
};
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index e1f13b376..0677b96e9 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -146,6 +146,7 @@ typedef struct _afr_private {
struct list_head saved_fds; /* list of fds on which locks have succeeded */
gf_boolean_t optimistic_change_log;
gf_boolean_t eager_lock;
+ gf_boolean_t enforce_quorum;
char vol_uuid[UUID_SIZE + 1];
int32_t *last_event;
@@ -998,4 +999,21 @@ afr_set_low_priority (call_frame_t *frame);
int
afr_child_fd_ctx_set (xlator_t *this, fd_t *fd, int32_t child,
int flags, int32_t wb_flags);
+
+gf_boolean_t
+afr_have_quorum (char *logname, afr_private_t *priv);
+
+/*
+ * Having this as a macro will make debugging a bit weirder, but does reduce
+ * the probability of functions handling this check inconsistently.
+ */
+#define QUORUM_CHECK(_func,_label) do { \
+ if (priv->enforce_quorum && !afr_have_quorum(this->name,priv)) { \
+ gf_log(this->name,GF_LOG_WARNING, \
+ "failing "#_func" due to lack of quorum"); \
+ op_errno = EROFS; \
+ goto _label; \
+ } \
+} while (0);
+
#endif /* __AFR_H__ */