summaryrefslogtreecommitdiffstats
path: root/xlators/mgmt/glusterd/src/glusterd-server-quorum.c
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/mgmt/glusterd/src/glusterd-server-quorum.c')
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-server-quorum.c486
1 files changed, 486 insertions, 0 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-server-quorum.c b/xlators/mgmt/glusterd/src/glusterd-server-quorum.c
new file mode 100644
index 00000000000..b0b8a2e4018
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-server-quorum.c
@@ -0,0 +1,486 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <glusterfs/common-utils.h>
+#include "glusterd.h"
+#include "glusterd-utils.h"
+#include "glusterd-messages.h"
+#include "glusterd-server-quorum.h"
+#include "glusterd-store.h"
+#include "glusterd-syncop.h"
+#include "glusterd-op-sm.h"
+
+#define CEILING_POS(X) (((X) - (int)(X)) > 0 ? (int)((X) + 1) : (int)(X))
+
+static gf_boolean_t
+glusterd_is_get_op(xlator_t *this, glusterd_op_t op, dict_t *dict)
+{
+ char *key = NULL;
+ char *volname = NULL;
+ int ret = 0;
+
+ if (op == GD_OP_STATUS_VOLUME)
+ return _gf_true;
+
+ if (op == GD_OP_SET_VOLUME) {
+ /*check for set volume help*/
+ ret = dict_get_str(dict, "volname", &volname);
+ if (volname && ((strcmp(volname, "help") == 0) ||
+ (strcmp(volname, "help-xml") == 0))) {
+ ret = dict_get_str(dict, "key1", &key);
+ if (ret < 0)
+ return _gf_true;
+ }
+ }
+ return _gf_false;
+}
+
+gf_boolean_t
+glusterd_is_quorum_validation_required(xlator_t *this, glusterd_op_t op,
+ dict_t *dict)
+{
+ gf_boolean_t required = _gf_true;
+ char *key = NULL;
+ char *key_fixed = NULL;
+ int ret = -1;
+
+ if (glusterd_is_get_op(this, op, dict)) {
+ required = _gf_false;
+ goto out;
+ }
+ if ((op != GD_OP_SET_VOLUME) && (op != GD_OP_RESET_VOLUME))
+ goto out;
+ if (op == GD_OP_SET_VOLUME)
+ ret = dict_get_str(dict, "key1", &key);
+ else if (op == GD_OP_RESET_VOLUME)
+ ret = dict_get_str(dict, "key", &key);
+ if (ret)
+ goto out;
+ ret = glusterd_check_option_exists(key, &key_fixed);
+ if (ret <= 0)
+ goto out;
+ if (key_fixed)
+ key = key_fixed;
+ if (glusterd_is_quorum_option(key))
+ required = _gf_false;
+out:
+ GF_FREE(key_fixed);
+ return required;
+}
+
+int
+glusterd_validate_quorum(xlator_t *this, glusterd_op_t op, dict_t *dict,
+ char **op_errstr)
+{
+ int ret = 0;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char *errstr = NULL;
+
+ errstr = "Quorum not met. Volume operation not allowed.";
+ if (!glusterd_is_quorum_validation_required(this, op, dict))
+ goto out;
+
+ ret = dict_get_str(dict, "volname", &volname);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=volname", NULL);
+ ret = 0;
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_VOLINFO_GET_FAIL, NULL);
+ ret = 0;
+ goto out;
+ }
+
+ if (!glusterd_is_volume_in_server_quorum(volinfo)) {
+ ret = 0;
+ goto out;
+ }
+
+ if (does_gd_meet_server_quorum(this)) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = -1;
+ *op_errstr = gf_strdup(errstr);
+
+out:
+ return ret;
+}
+
+gf_boolean_t
+glusterd_is_quorum_option(char *option)
+{
+ gf_boolean_t res = _gf_false;
+ int i = 0;
+ static const char *const keys[] = {GLUSTERD_QUORUM_TYPE_KEY,
+ GLUSTERD_QUORUM_RATIO_KEY, NULL};
+
+ for (i = 0; keys[i]; i++) {
+ if (strcmp(option, keys[i]) == 0) {
+ res = _gf_true;
+ break;
+ }
+ }
+ return res;
+}
+
+gf_boolean_t
+glusterd_is_quorum_changed(dict_t *options, char *option, char *value)
+{
+ int ret = 0;
+ gf_boolean_t reconfigured = _gf_false;
+ gf_boolean_t all = _gf_false;
+ char *oldquorum = NULL;
+ char *newquorum = NULL;
+ char *oldratio = NULL;
+ char *newratio = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ if ((strcmp("all", option) != 0) && !glusterd_is_quorum_option(option))
+ goto out;
+
+ if (strcmp("all", option) == 0)
+ all = _gf_true;
+
+ if (all || (strcmp(GLUSTERD_QUORUM_TYPE_KEY, option) == 0)) {
+ newquorum = value;
+ ret = dict_get_str(options, GLUSTERD_QUORUM_TYPE_KEY, &oldquorum);
+ if (ret)
+ gf_msg(this->name, GF_LOG_DEBUG, 0, GD_MSG_DICT_GET_FAILED,
+ "dict_get_str failed on %s", GLUSTERD_QUORUM_TYPE_KEY);
+ }
+
+ if (all || (strcmp(GLUSTERD_QUORUM_RATIO_KEY, option) == 0)) {
+ newratio = value;
+ ret = dict_get_str(options, GLUSTERD_QUORUM_RATIO_KEY, &oldratio);
+ if (ret)
+ gf_msg(this->name, GF_LOG_DEBUG, 0, GD_MSG_DICT_GET_FAILED,
+ "dict_get_str failed on %s", GLUSTERD_QUORUM_RATIO_KEY);
+ }
+
+ reconfigured = _gf_true;
+
+ if (oldquorum && newquorum && (strcmp(oldquorum, newquorum) == 0))
+ reconfigured = _gf_false;
+ if (oldratio && newratio && (strcmp(oldratio, newratio) == 0))
+ reconfigured = _gf_false;
+
+ if ((oldratio == NULL) && (newratio == NULL) && (oldquorum == NULL) &&
+ (newquorum == NULL))
+ reconfigured = _gf_false;
+out:
+ return reconfigured;
+}
+
+static gf_boolean_t
+_is_contributing_to_quorum(gd_quorum_contrib_t contrib)
+{
+ if ((contrib == QUORUM_UP) || (contrib == QUORUM_DOWN))
+ return _gf_true;
+ return _gf_false;
+}
+
+gf_boolean_t
+does_quorum_meet(int active_count, int quorum_count)
+{
+ return (active_count >= quorum_count);
+}
+
+int
+glusterd_get_quorum_cluster_counts(xlator_t *this, int *active_count,
+ int *quorum_count)
+{
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_conf_t *conf = NULL;
+ int ret = -1;
+ int inquorum_count = 0;
+ char *val = NULL;
+ double quorum_percentage = 0.0;
+ gf_boolean_t ratio = _gf_false;
+ int count = 0;
+
+ conf = this->private;
+
+ /* Start with counting self */
+ inquorum_count = 1;
+ if (active_count)
+ *active_count = 1;
+
+ RCU_READ_LOCK;
+ cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list)
+ {
+ if (_is_contributing_to_quorum(peerinfo->quorum_contrib))
+ inquorum_count = inquorum_count + 1;
+ if (active_count && (peerinfo->quorum_contrib == QUORUM_UP))
+ *active_count = *active_count + 1;
+ }
+ RCU_READ_UNLOCK;
+
+ ret = dict_get_str(conf->opts, GLUSTERD_QUORUM_RATIO_KEY, &val);
+ if (ret == 0) {
+ ret = gf_string2percent(val, &quorum_percentage);
+ if (ret == 0)
+ ratio = _gf_true;
+ }
+ if (ratio)
+ count = CEILING_POS(inquorum_count * quorum_percentage / 100.0);
+ else
+ count = (inquorum_count * 50 / 100) + 1;
+
+ *quorum_count = count;
+ ret = 0;
+
+ return ret;
+}
+
+gf_boolean_t
+glusterd_is_volume_in_server_quorum(glusterd_volinfo_t *volinfo)
+{
+ gf_boolean_t res = _gf_false;
+ char *quorum_type = NULL;
+ int ret = 0;
+
+ ret = dict_get_str(volinfo->dict, GLUSTERD_QUORUM_TYPE_KEY, &quorum_type);
+ if (ret) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=%s", GLUSTERD_QUORUM_TYPE_KEY, NULL);
+ goto out;
+ }
+
+ if (strcmp(quorum_type, GLUSTERD_SERVER_QUORUM) == 0)
+ res = _gf_true;
+out:
+ return res;
+}
+
+gf_boolean_t
+glusterd_is_any_volume_in_server_quorum(xlator_t *this)
+{
+ glusterd_conf_t *conf = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+
+ conf = this->private;
+ list_for_each_entry(volinfo, &conf->volumes, vol_list)
+ {
+ if (glusterd_is_volume_in_server_quorum(volinfo)) {
+ return _gf_true;
+ }
+ }
+ return _gf_false;
+}
+
+gf_boolean_t
+does_gd_meet_server_quorum(xlator_t *this)
+{
+ int quorum_count = 0;
+ int active_count = 0;
+ gf_boolean_t in = _gf_false;
+ int ret = -1;
+
+ ret = glusterd_get_quorum_cluster_counts(this, &active_count,
+ &quorum_count);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_QUORUM_CLUSTER_COUNT_GET_FAIL, NULL);
+ goto out;
+ }
+
+ if (!does_quorum_meet(active_count, quorum_count)) {
+ goto out;
+ }
+
+ in = _gf_true;
+out:
+ return in;
+}
+
+void
+glusterd_do_volume_quorum_action(xlator_t *this, glusterd_volinfo_t *volinfo,
+ gf_boolean_t meets_quorum)
+{
+ int ret = -1;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ gd_quorum_status_t quorum_status = NOT_APPLICABLE_QUORUM;
+ gf_boolean_t follows_quorum = _gf_false;
+ gf_boolean_t quorum_status_unchanged = _gf_false;
+
+ if (volinfo->status != GLUSTERD_STATUS_STARTED) {
+ volinfo->quorum_status = NOT_APPLICABLE_QUORUM;
+ goto out;
+ }
+
+ follows_quorum = glusterd_is_volume_in_server_quorum(volinfo);
+ if (follows_quorum) {
+ if (meets_quorum)
+ quorum_status = MEETS_QUORUM;
+ else
+ quorum_status = DOESNT_MEET_QUORUM;
+ } else {
+ quorum_status = NOT_APPLICABLE_QUORUM;
+ }
+
+ /*
+ * The following check is added to prevent spurious brick starts when
+ * events occur that affect quorum.
+ * Example:
+ * There is a cluster of 10 peers. Volume is in quorum. User
+ * takes down one brick from the volume to perform maintenance.
+ * Suddenly one of the peers go down. Cluster is still in quorum. But
+ * because of this 'peer going down' event, quorum is calculated and
+ * the bricks that are down are brought up again. In this process it
+ * also brings up the brick that is purposefully taken down.
+ */
+ if (volinfo->quorum_status == quorum_status) {
+ quorum_status_unchanged = _gf_true;
+ goto out;
+ }
+
+ if (quorum_status == MEETS_QUORUM) {
+ gf_msg(this->name, GF_LOG_CRITICAL, 0,
+ GD_MSG_SERVER_QUORUM_MET_STARTING_BRICKS,
+ "Server quorum regained for volume %s. Starting local "
+ "bricks.",
+ volinfo->volname);
+ gf_event(EVENT_QUORUM_REGAINED, "volume=%s", volinfo->volname);
+ } else if (quorum_status == DOESNT_MEET_QUORUM) {
+ gf_msg(this->name, GF_LOG_CRITICAL, 0,
+ GD_MSG_SERVER_QUORUM_LOST_STOPPING_BRICKS,
+ "Server quorum lost for volume %s. Stopping local "
+ "bricks.",
+ volinfo->volname);
+ gf_event(EVENT_QUORUM_LOST, "volume=%s", volinfo->volname);
+ }
+
+ list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
+ {
+ if (!glusterd_is_local_brick(this, volinfo, brickinfo))
+ continue;
+ if (quorum_status == DOESNT_MEET_QUORUM) {
+ ret = glusterd_brick_stop(volinfo, brickinfo, _gf_false);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_STOP_FAIL,
+ "Failed to "
+ "stop brick %s:%s",
+ brickinfo->hostname, brickinfo->path);
+ }
+ } else {
+ if (!brickinfo->start_triggered) {
+ pthread_mutex_lock(&brickinfo->restart_mutex);
+ {
+ /* coverity[SLEEP] */
+ ret = glusterd_brick_start(volinfo, brickinfo, _gf_false,
+ _gf_false);
+ }
+ pthread_mutex_unlock(&brickinfo->restart_mutex);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_DISCONNECTED, "Failed to start %s:%s",
+ brickinfo->hostname, brickinfo->path);
+ }
+ }
+ }
+ }
+ volinfo->quorum_status = quorum_status;
+ if (quorum_status == MEETS_QUORUM) {
+ /* bricks might have been restarted and so as the port change
+ * might have happened
+ */
+ ret = glusterd_store_volinfo(volinfo, GLUSTERD_VOLINFO_VER_AC_NONE);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_STORE_FAIL,
+ "Failed to write volinfo for volume %s", volinfo->volname);
+ goto out;
+ }
+ }
+out:
+ if (quorum_status_unchanged) {
+ list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
+ {
+ if (!glusterd_is_local_brick(this, volinfo, brickinfo))
+ continue;
+ ret = glusterd_brick_start(volinfo, brickinfo, _gf_false, _gf_true);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_DISCONNECTED,
+ "Failed to "
+ "connect to %s:%s",
+ brickinfo->hostname, brickinfo->path);
+ }
+ }
+ }
+ return;
+}
+
+int
+glusterd_do_quorum_action()
+{
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ int ret = 0;
+ int active_count = 0;
+ int quorum_count = 0;
+ gf_boolean_t meets = _gf_false;
+
+ this = THIS;
+ conf = this->private;
+
+ conf->pending_quorum_action = _gf_true;
+ ret = glusterd_lock(conf->uuid);
+ if (ret)
+ goto out;
+
+ {
+ ret = glusterd_get_quorum_cluster_counts(this, &active_count,
+ &quorum_count);
+ if (ret)
+ goto unlock;
+
+ if (does_quorum_meet(active_count, quorum_count))
+ meets = _gf_true;
+ list_for_each_entry(volinfo, &conf->volumes, vol_list)
+ {
+ glusterd_do_volume_quorum_action(this, volinfo, meets);
+ }
+ }
+unlock:
+ (void)glusterd_unlock(conf->uuid);
+ conf->pending_quorum_action = _gf_false;
+out:
+ return ret;
+}
+
+/* ret = 0 represents quorum is not met
+ * ret = 1 represents quorum is met
+ * ret = 2 represents quorum not applicable
+ */
+
+int
+check_quorum_for_brick_start(glusterd_volinfo_t *volinfo,
+ gf_boolean_t node_quorum)
+{
+ gf_boolean_t volume_quorum = _gf_false;
+ int ret = 0;
+
+ volume_quorum = glusterd_is_volume_in_server_quorum(volinfo);
+ if (volume_quorum) {
+ if (node_quorum)
+ ret = 1;
+ } else {
+ ret = 2;
+ }
+ return ret;
+}