From 1b3571d38fc789100e2c053750eca1109494596b Mon Sep 17 00:00:00 2001 From: Jeff Darcy Date: Mon, 21 Nov 2011 12:35:34 -0500 Subject: Add command-line support (but no doc) for enforce-quorum option. Change-Id: Ia52ddb551e24c27969f7f5fa0f94c1044789731f BUG: 3823 Reviewed-on: http://review.gluster.com/743 Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- xlators/cluster/afr/src/afr-common.c | 55 ++++++++++++---------- xlators/cluster/afr/src/afr-dir-write.c | 16 +++---- xlators/cluster/afr/src/afr-inode-write.c | 14 +++--- xlators/cluster/afr/src/afr-open.c | 6 +-- xlators/cluster/afr/src/afr.c | 72 ++++++++++++++++++++++------- xlators/cluster/afr/src/afr.h | 24 ++++++---- xlators/mgmt/glusterd/src/glusterd-volgen.c | 2 + 7 files changed, 120 insertions(+), 69 deletions(-) (limited to 'xlators') diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 9512f6a5477..209024bbeda 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -3893,31 +3893,36 @@ out: gf_boolean_t afr_have_quorum (char *logname, afr_private_t *priv) { - unsigned int quorum = 0; - - GF_VALIDATE_OR_GOTO(logname,priv,out); - - quorum = priv->child_count / 2 + 1; - if (priv->up_count >= (priv->down_count + quorum)) { - return _gf_true; - } - - /* - * Special case for even numbers of nodes: if we have exactly half - * and that includes the first ("senior-most") node, then that counts - * as quorum even if it wouldn't otherwise. This supports e.g. N=2 - * while preserving the critical property that there can only be one - * such group. - */ - if ((priv->child_count % 2) == 0) { - quorum = priv->child_count / 2; - if (priv->up_count >= (priv->down_count + quorum)) { - if (priv->child_up[0]) { - return _gf_true; - } - } - } + unsigned int quorum = 0; + + GF_VALIDATE_OR_GOTO(logname,priv,out); + + quorum = priv->quorum_count; + if (quorum != AFR_QUORUM_AUTO) { + return (priv->up_count >= (priv->down_count + quorum)); + } + + quorum = priv->child_count / 2 + 1; + if (priv->up_count >= (priv->down_count + quorum)) { + return _gf_true; + } + + /* + * Special case for even numbers of nodes: if we have exactly half + * and that includes the first ("senior-most") node, then that counts + * as quorum even if it wouldn't otherwise. This supports e.g. N=2 + * while preserving the critical property that there can only be one + * such group. + */ + if ((priv->child_count % 2) == 0) { + quorum = priv->child_count / 2; + if (priv->up_count >= (priv->down_count + quorum)) { + if (priv->child_up[0]) { + return _gf_true; + } + } + } out: - return _gf_false; + return _gf_false; } diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c index 11df550d53e..7ed8c52052e 100644 --- a/xlators/cluster/afr/src/afr-dir-write.c +++ b/xlators/cluster/afr/src/afr-dir-write.c @@ -279,7 +279,7 @@ afr_create (call_frame_t *frame, xlator_t *this, priv = this->private; - QUORUM_CHECK(create,out); + QUORUM_CHECK(create,out); transaction_frame = copy_frame (frame); if (!transaction_frame) { @@ -504,7 +504,7 @@ afr_mknod (call_frame_t *frame, xlator_t *this, priv = this->private; - QUORUM_CHECK(mknod,out); + QUORUM_CHECK(mknod,out); transaction_frame = copy_frame (frame); if (!transaction_frame) { @@ -730,7 +730,7 @@ afr_mkdir (call_frame_t *frame, xlator_t *this, priv = this->private; - QUORUM_CHECK(mkdir,out); + QUORUM_CHECK(mkdir,out); transaction_frame = copy_frame (frame); if (!transaction_frame) { @@ -955,7 +955,7 @@ afr_link (call_frame_t *frame, xlator_t *this, priv = this->private; - QUORUM_CHECK(link,out); + QUORUM_CHECK(link,out); transaction_frame = copy_frame (frame); if (!transaction_frame) { @@ -1179,7 +1179,7 @@ afr_symlink (call_frame_t *frame, xlator_t *this, priv = this->private; - QUORUM_CHECK(symlink,out); + QUORUM_CHECK(symlink,out); transaction_frame = copy_frame (frame); if (!transaction_frame) { @@ -1397,7 +1397,7 @@ afr_rename (call_frame_t *frame, xlator_t *this, priv = this->private; - QUORUM_CHECK(rename,out); + QUORUM_CHECK(rename,out); transaction_frame = copy_frame (frame); if (!transaction_frame) { @@ -1596,7 +1596,7 @@ afr_unlink (call_frame_t *frame, xlator_t *this, priv = this->private; - QUORUM_CHECK(unlink,out); + QUORUM_CHECK(unlink,out); transaction_frame = copy_frame (frame); if (!transaction_frame) { @@ -1792,7 +1792,7 @@ afr_rmdir (call_frame_t *frame, xlator_t *this, priv = this->private; - QUORUM_CHECK(rmdir,out); + QUORUM_CHECK(rmdir,out); transaction_frame = copy_frame (frame); if (!transaction_frame) { diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c index 4135ba947d2..2fd2538cd2b 100644 --- a/xlators/cluster/afr/src/afr-inode-write.c +++ b/xlators/cluster/afr/src/afr-inode-write.c @@ -449,7 +449,7 @@ afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, priv = this->private; - QUORUM_CHECK(writev,out); + QUORUM_CHECK(writev,out); ALLOC_OR_GOTO (local, afr_local_t, out); @@ -649,7 +649,7 @@ afr_truncate (call_frame_t *frame, xlator_t *this, priv = this->private; - QUORUM_CHECK(truncate,out); + QUORUM_CHECK(truncate,out); transaction_frame = copy_frame (frame); if (!transaction_frame) { @@ -900,7 +900,7 @@ afr_ftruncate (call_frame_t *frame, xlator_t *this, priv = this->private; - QUORUM_CHECK(ftruncate,out); + QUORUM_CHECK(ftruncate,out); ALLOC_OR_GOTO (local, afr_local_t, out); ret = AFR_LOCAL_INIT (local, priv); @@ -1099,7 +1099,7 @@ afr_setattr (call_frame_t *frame, xlator_t *this, priv = this->private; - QUORUM_CHECK(setattr,out); + QUORUM_CHECK(setattr,out); transaction_frame = copy_frame (frame); if (!transaction_frame) { @@ -1306,7 +1306,7 @@ afr_fsetattr (call_frame_t *frame, xlator_t *this, priv = this->private; - QUORUM_CHECK(fsetattr,out); + QUORUM_CHECK(fsetattr,out); transaction_frame = copy_frame (frame); if (!transaction_frame) { @@ -1497,7 +1497,7 @@ afr_setxattr (call_frame_t *frame, xlator_t *this, priv = this->private; - QUORUM_CHECK(setxattr,out); + QUORUM_CHECK(setxattr,out); ALLOC_OR_GOTO (local, afr_local_t, out); @@ -1683,7 +1683,7 @@ afr_removexattr (call_frame_t *frame, xlator_t *this, priv = this->private; - QUORUM_CHECK(removexattr,out); + QUORUM_CHECK(removexattr,out); transaction_frame = copy_frame (frame); if (!transaction_frame) { diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c index 5031cfd5ef2..0b46f213f4b 100644 --- a/xlators/cluster/afr/src/afr-open.c +++ b/xlators/cluster/afr/src/afr-open.c @@ -214,9 +214,9 @@ afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, priv = this->private; - if (flags & (O_CREAT|O_TRUNC)) { - QUORUM_CHECK(open,out); - } + if (flags & (O_CREAT|O_TRUNC)) { + QUORUM_CHECK(open,out); + } if (afr_is_split_brain (this, loc->inode)) { /* self-heal failed */ diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index 7791ec86bfc..c66f386f2fe 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -97,14 +97,30 @@ xlator_subvolume_count (xlator_t *this) return i; } +void +fix_quorum_options (xlator_t *this, afr_private_t *priv, char *qtype) +{ + if (priv->quorum_count && strcmp(qtype,"fixed")) { + gf_log(this->name,GF_LOG_WARNING, + "quorum-type %s overriding quorum-count %u", + qtype, priv->quorum_count); + } + if (!strcmp(qtype,"none")) { + priv->quorum_count = 0; + } + else if (!strcmp(qtype,"auto")) { + priv->quorum_count = AFR_QUORUM_AUTO; + } +} int reconfigure (xlator_t *this, dict_t *options) { - afr_private_t * priv = NULL; - xlator_t * read_subvol = NULL; - int ret = -1; - int index = -1; + afr_private_t *priv = NULL; + xlator_t *read_subvol = NULL; + int ret = -1; + int index = -1; + char *qtype = NULL; priv = this->private; @@ -154,6 +170,11 @@ reconfigure (xlator_t *this, dict_t *options) priv->read_child = index; } + GF_OPTION_RECONF ("quorum-type", qtype, options, str, out); + GF_OPTION_RECONF ("quorum-count", priv->quorum_count, options, + uint32, out); + fix_quorum_options(this,priv,qtype); + ret = 0; out: return ret; @@ -173,15 +194,15 @@ static const char *favorite_child_warning_str = "You have specified subvolume '% int32_t init (xlator_t *this) { - afr_private_t * priv = NULL; - int child_count = 0; - xlator_list_t * trav = NULL; - int i = 0; - int ret = -1; - GF_UNUSED int op_errno = 0; - xlator_t * read_subvol = NULL; - xlator_t * fav_child = NULL; - + afr_private_t *priv = NULL; + int child_count = 0; + xlator_list_t *trav = NULL; + int i = 0; + int ret = -1; + GF_UNUSED int op_errno = 0; + xlator_t *read_subvol = NULL; + xlator_t *fav_child = NULL; + char *qtype = NULL; if (!this->children) { gf_log (this->name, GF_LOG_ERROR, @@ -260,7 +281,9 @@ init (xlator_t *this) GF_OPTION_INIT ("strict-readdir", priv->strict_readdir, bool, out); - GF_OPTION_INIT ("enforce-quorum", priv->enforce_quorum, bool, out); + GF_OPTION_INIT ("quorum-type", qtype, str, out); + GF_OPTION_INIT ("quorum-count", priv->quorum_count, uint32, out); + fix_quorum_options(this,priv,qtype); priv->wait_count = 1; @@ -492,9 +515,24 @@ struct volume_options options[] = { .type = GF_OPTION_TYPE_BOOL, .default_value = "off", }, - { .key = {"enforce-quorum"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "off", + { .key = {"quorum-type"}, + .type = GF_OPTION_TYPE_STR, + .value = { "none", "auto", "fixed", "" }, + .default_value = "none", + .description = "If value is \"fixed\" only allow writes if " + "quorum-count bricks are present. If value is " + "\"auto\" only allow writes if more than half of " + "bricks, or exactly half including the first, are " + "present.", + }, + { .key = {"quorum-count"}, + .type = GF_OPTION_TYPE_INT, + .min = 1, + .max = INT_MAX, + .default_value = 0, + .description = "If quorum-type is \"fixed\" only allow writes if " + "this many bricks or present. Other quorum types " + "will OVERWRITE this value.", }, { .key = {NULL} }, }; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 0566495fe96..4aea44c4275 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -144,9 +144,9 @@ typedef struct _afr_private { pthread_mutex_t mutex; struct list_head saved_fds; /* list of fds on which locks have succeeded */ - gf_boolean_t optimistic_change_log; - gf_boolean_t eager_lock; - gf_boolean_t enforce_quorum; + gf_boolean_t optimistic_change_log; + gf_boolean_t eager_lock; + unsigned int quorum_count; char vol_uuid[UUID_SIZE + 1]; int32_t *last_event; @@ -1005,17 +1005,23 @@ afr_child_fd_ctx_set (xlator_t *this, fd_t *fd, int32_t child, gf_boolean_t afr_have_quorum (char *logname, afr_private_t *priv); +/* + * Special value indicating we should use the "auto" quorum method instead of + * a fixed value (including zero to turn off quorum enforcement). + */ +#define AFR_QUORUM_AUTO INT_MAX + /* * Having this as a macro will make debugging a bit weirder, but does reduce * the probability of functions handling this check inconsistently. */ #define QUORUM_CHECK(_func,_label) do { \ - if (priv->enforce_quorum && !afr_have_quorum(this->name,priv)) { \ - gf_log(this->name,GF_LOG_WARNING, \ - "failing "#_func" due to lack of quorum"); \ - op_errno = EROFS; \ - goto _label; \ - } \ + if (priv->quorum_count && !afr_have_quorum(this->name,priv)) { \ + gf_log(this->name,GF_LOG_WARNING, \ + "failing "#_func" due to lack of quorum"); \ + op_errno = EROFS; \ + goto _label; \ + } \ } while (0); #endif /* __AFR_H__ */ diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index 0fa7601f145..f83a052ed50 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -125,6 +125,8 @@ static struct volopt_map_entry glusterd_volopt_map[] = { {"cluster.data-change-log", "cluster/replicate", NULL, NULL, NO_DOC, 0 }, {"cluster.metadata-change-log", "cluster/replicate", NULL, NULL, NO_DOC, 0 }, {"cluster.data-self-heal-algorithm", "cluster/replicate", "data-self-heal-algorithm", NULL,DOC, 0}, + {"cluster.quorum-type", "cluster/replicate", "quorum-type", NULL, NO_DOC, 0}, + {"cluster.quorum-count", "cluster/replicate", "quorum-count", NULL, NO_DOC, 0}, {"cluster.stripe-block-size", "cluster/stripe", "block-size", NULL, DOC, 0}, -- cgit