summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--xlators/cluster/afr/src/afr-common.c55
-rw-r--r--xlators/cluster/afr/src/afr-dir-write.c16
-rw-r--r--xlators/cluster/afr/src/afr-inode-write.c14
-rw-r--r--xlators/cluster/afr/src/afr-open.c6
-rw-r--r--xlators/cluster/afr/src/afr.c72
-rw-r--r--xlators/cluster/afr/src/afr.h24
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.c2
7 files changed, 120 insertions, 69 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 9512f6a5477..209024bbeda 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -3893,31 +3893,36 @@ out:
gf_boolean_t
afr_have_quorum (char *logname, afr_private_t *priv)
{
- unsigned int quorum = 0;
-
- GF_VALIDATE_OR_GOTO(logname,priv,out);
-
- quorum = priv->child_count / 2 + 1;
- if (priv->up_count >= (priv->down_count + quorum)) {
- return _gf_true;
- }
-
- /*
- * Special case for even numbers of nodes: if we have exactly half
- * and that includes the first ("senior-most") node, then that counts
- * as quorum even if it wouldn't otherwise. This supports e.g. N=2
- * while preserving the critical property that there can only be one
- * such group.
- */
- if ((priv->child_count % 2) == 0) {
- quorum = priv->child_count / 2;
- if (priv->up_count >= (priv->down_count + quorum)) {
- if (priv->child_up[0]) {
- return _gf_true;
- }
- }
- }
+ unsigned int quorum = 0;
+
+ GF_VALIDATE_OR_GOTO(logname,priv,out);
+
+ quorum = priv->quorum_count;
+ if (quorum != AFR_QUORUM_AUTO) {
+ return (priv->up_count >= (priv->down_count + quorum));
+ }
+
+ quorum = priv->child_count / 2 + 1;
+ if (priv->up_count >= (priv->down_count + quorum)) {
+ return _gf_true;
+ }
+
+ /*
+ * Special case for even numbers of nodes: if we have exactly half
+ * and that includes the first ("senior-most") node, then that counts
+ * as quorum even if it wouldn't otherwise. This supports e.g. N=2
+ * while preserving the critical property that there can only be one
+ * such group.
+ */
+ if ((priv->child_count % 2) == 0) {
+ quorum = priv->child_count / 2;
+ if (priv->up_count >= (priv->down_count + quorum)) {
+ if (priv->child_up[0]) {
+ return _gf_true;
+ }
+ }
+ }
out:
- return _gf_false;
+ return _gf_false;
}
diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
index 11df550d53e..7ed8c52052e 100644
--- a/xlators/cluster/afr/src/afr-dir-write.c
+++ b/xlators/cluster/afr/src/afr-dir-write.c
@@ -279,7 +279,7 @@ afr_create (call_frame_t *frame, xlator_t *this,
priv = this->private;
- QUORUM_CHECK(create,out);
+ QUORUM_CHECK(create,out);
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
@@ -504,7 +504,7 @@ afr_mknod (call_frame_t *frame, xlator_t *this,
priv = this->private;
- QUORUM_CHECK(mknod,out);
+ QUORUM_CHECK(mknod,out);
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
@@ -730,7 +730,7 @@ afr_mkdir (call_frame_t *frame, xlator_t *this,
priv = this->private;
- QUORUM_CHECK(mkdir,out);
+ QUORUM_CHECK(mkdir,out);
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
@@ -955,7 +955,7 @@ afr_link (call_frame_t *frame, xlator_t *this,
priv = this->private;
- QUORUM_CHECK(link,out);
+ QUORUM_CHECK(link,out);
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
@@ -1179,7 +1179,7 @@ afr_symlink (call_frame_t *frame, xlator_t *this,
priv = this->private;
- QUORUM_CHECK(symlink,out);
+ QUORUM_CHECK(symlink,out);
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
@@ -1397,7 +1397,7 @@ afr_rename (call_frame_t *frame, xlator_t *this,
priv = this->private;
- QUORUM_CHECK(rename,out);
+ QUORUM_CHECK(rename,out);
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
@@ -1596,7 +1596,7 @@ afr_unlink (call_frame_t *frame, xlator_t *this,
priv = this->private;
- QUORUM_CHECK(unlink,out);
+ QUORUM_CHECK(unlink,out);
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
@@ -1792,7 +1792,7 @@ afr_rmdir (call_frame_t *frame, xlator_t *this,
priv = this->private;
- QUORUM_CHECK(rmdir,out);
+ QUORUM_CHECK(rmdir,out);
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
index 4135ba947d2..2fd2538cd2b 100644
--- a/xlators/cluster/afr/src/afr-inode-write.c
+++ b/xlators/cluster/afr/src/afr-inode-write.c
@@ -449,7 +449,7 @@ afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
priv = this->private;
- QUORUM_CHECK(writev,out);
+ QUORUM_CHECK(writev,out);
ALLOC_OR_GOTO (local, afr_local_t, out);
@@ -649,7 +649,7 @@ afr_truncate (call_frame_t *frame, xlator_t *this,
priv = this->private;
- QUORUM_CHECK(truncate,out);
+ QUORUM_CHECK(truncate,out);
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
@@ -900,7 +900,7 @@ afr_ftruncate (call_frame_t *frame, xlator_t *this,
priv = this->private;
- QUORUM_CHECK(ftruncate,out);
+ QUORUM_CHECK(ftruncate,out);
ALLOC_OR_GOTO (local, afr_local_t, out);
ret = AFR_LOCAL_INIT (local, priv);
@@ -1099,7 +1099,7 @@ afr_setattr (call_frame_t *frame, xlator_t *this,
priv = this->private;
- QUORUM_CHECK(setattr,out);
+ QUORUM_CHECK(setattr,out);
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
@@ -1306,7 +1306,7 @@ afr_fsetattr (call_frame_t *frame, xlator_t *this,
priv = this->private;
- QUORUM_CHECK(fsetattr,out);
+ QUORUM_CHECK(fsetattr,out);
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
@@ -1497,7 +1497,7 @@ afr_setxattr (call_frame_t *frame, xlator_t *this,
priv = this->private;
- QUORUM_CHECK(setxattr,out);
+ QUORUM_CHECK(setxattr,out);
ALLOC_OR_GOTO (local, afr_local_t, out);
@@ -1683,7 +1683,7 @@ afr_removexattr (call_frame_t *frame, xlator_t *this,
priv = this->private;
- QUORUM_CHECK(removexattr,out);
+ QUORUM_CHECK(removexattr,out);
transaction_frame = copy_frame (frame);
if (!transaction_frame) {
diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c
index 5031cfd5ef2..0b46f213f4b 100644
--- a/xlators/cluster/afr/src/afr-open.c
+++ b/xlators/cluster/afr/src/afr-open.c
@@ -214,9 +214,9 @@ afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
priv = this->private;
- if (flags & (O_CREAT|O_TRUNC)) {
- QUORUM_CHECK(open,out);
- }
+ if (flags & (O_CREAT|O_TRUNC)) {
+ QUORUM_CHECK(open,out);
+ }
if (afr_is_split_brain (this, loc->inode)) {
/* self-heal failed */
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index 7791ec86bfc..c66f386f2fe 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -97,14 +97,30 @@ xlator_subvolume_count (xlator_t *this)
return i;
}
+void
+fix_quorum_options (xlator_t *this, afr_private_t *priv, char *qtype)
+{
+ if (priv->quorum_count && strcmp(qtype,"fixed")) {
+ gf_log(this->name,GF_LOG_WARNING,
+ "quorum-type %s overriding quorum-count %u",
+ qtype, priv->quorum_count);
+ }
+ if (!strcmp(qtype,"none")) {
+ priv->quorum_count = 0;
+ }
+ else if (!strcmp(qtype,"auto")) {
+ priv->quorum_count = AFR_QUORUM_AUTO;
+ }
+}
int
reconfigure (xlator_t *this, dict_t *options)
{
- afr_private_t * priv = NULL;
- xlator_t * read_subvol = NULL;
- int ret = -1;
- int index = -1;
+ afr_private_t *priv = NULL;
+ xlator_t *read_subvol = NULL;
+ int ret = -1;
+ int index = -1;
+ char *qtype = NULL;
priv = this->private;
@@ -154,6 +170,11 @@ reconfigure (xlator_t *this, dict_t *options)
priv->read_child = index;
}
+ GF_OPTION_RECONF ("quorum-type", qtype, options, str, out);
+ GF_OPTION_RECONF ("quorum-count", priv->quorum_count, options,
+ uint32, out);
+ fix_quorum_options(this,priv,qtype);
+
ret = 0;
out:
return ret;
@@ -173,15 +194,15 @@ static const char *favorite_child_warning_str = "You have specified subvolume '%
int32_t
init (xlator_t *this)
{
- afr_private_t * priv = NULL;
- int child_count = 0;
- xlator_list_t * trav = NULL;
- int i = 0;
- int ret = -1;
- GF_UNUSED int op_errno = 0;
- xlator_t * read_subvol = NULL;
- xlator_t * fav_child = NULL;
-
+ afr_private_t *priv = NULL;
+ int child_count = 0;
+ xlator_list_t *trav = NULL;
+ int i = 0;
+ int ret = -1;
+ GF_UNUSED int op_errno = 0;
+ xlator_t *read_subvol = NULL;
+ xlator_t *fav_child = NULL;
+ char *qtype = NULL;
if (!this->children) {
gf_log (this->name, GF_LOG_ERROR,
@@ -260,7 +281,9 @@ init (xlator_t *this)
GF_OPTION_INIT ("strict-readdir", priv->strict_readdir, bool, out);
- GF_OPTION_INIT ("enforce-quorum", priv->enforce_quorum, bool, out);
+ GF_OPTION_INIT ("quorum-type", qtype, str, out);
+ GF_OPTION_INIT ("quorum-count", priv->quorum_count, uint32, out);
+ fix_quorum_options(this,priv,qtype);
priv->wait_count = 1;
@@ -492,9 +515,24 @@ struct volume_options options[] = {
.type = GF_OPTION_TYPE_BOOL,
.default_value = "off",
},
- { .key = {"enforce-quorum"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
+ { .key = {"quorum-type"},
+ .type = GF_OPTION_TYPE_STR,
+ .value = { "none", "auto", "fixed", "" },
+ .default_value = "none",
+ .description = "If value is \"fixed\" only allow writes if "
+ "quorum-count bricks are present. If value is "
+ "\"auto\" only allow writes if more than half of "
+ "bricks, or exactly half including the first, are "
+ "present.",
+ },
+ { .key = {"quorum-count"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = INT_MAX,
+ .default_value = 0,
+ .description = "If quorum-type is \"fixed\" only allow writes if "
+ "this many bricks or present. Other quorum types "
+ "will OVERWRITE this value.",
},
{ .key = {NULL} },
};
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 0566495fe96..4aea44c4275 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -144,9 +144,9 @@ typedef struct _afr_private {
pthread_mutex_t mutex;
struct list_head saved_fds; /* list of fds on which locks have succeeded */
- gf_boolean_t optimistic_change_log;
- gf_boolean_t eager_lock;
- gf_boolean_t enforce_quorum;
+ gf_boolean_t optimistic_change_log;
+ gf_boolean_t eager_lock;
+ unsigned int quorum_count;
char vol_uuid[UUID_SIZE + 1];
int32_t *last_event;
@@ -1006,16 +1006,22 @@ gf_boolean_t
afr_have_quorum (char *logname, afr_private_t *priv);
/*
+ * Special value indicating we should use the "auto" quorum method instead of
+ * a fixed value (including zero to turn off quorum enforcement).
+ */
+#define AFR_QUORUM_AUTO INT_MAX
+
+/*
* Having this as a macro will make debugging a bit weirder, but does reduce
* the probability of functions handling this check inconsistently.
*/
#define QUORUM_CHECK(_func,_label) do { \
- if (priv->enforce_quorum && !afr_have_quorum(this->name,priv)) { \
- gf_log(this->name,GF_LOG_WARNING, \
- "failing "#_func" due to lack of quorum"); \
- op_errno = EROFS; \
- goto _label; \
- } \
+ if (priv->quorum_count && !afr_have_quorum(this->name,priv)) { \
+ gf_log(this->name,GF_LOG_WARNING, \
+ "failing "#_func" due to lack of quorum"); \
+ op_errno = EROFS; \
+ goto _label; \
+ } \
} while (0);
#endif /* __AFR_H__ */
diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
index 0fa7601f145..f83a052ed50 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
@@ -125,6 +125,8 @@ static struct volopt_map_entry glusterd_volopt_map[] = {
{"cluster.data-change-log", "cluster/replicate", NULL, NULL, NO_DOC, 0 },
{"cluster.metadata-change-log", "cluster/replicate", NULL, NULL, NO_DOC, 0 },
{"cluster.data-self-heal-algorithm", "cluster/replicate", "data-self-heal-algorithm", NULL,DOC, 0},
+ {"cluster.quorum-type", "cluster/replicate", "quorum-type", NULL, NO_DOC, 0},
+ {"cluster.quorum-count", "cluster/replicate", "quorum-count", NULL, NO_DOC, 0},
{"cluster.stripe-block-size", "cluster/stripe", "block-size", NULL, DOC, 0},