diff options
author | Pranith Kumar K <pranithk@gluster.com> | 2012-09-27 00:49:54 +0530 |
---|---|---|
committer | Pranith Kumar K <pkarampu@redhat.com> | 2012-09-27 10:41:19 +0530 |
commit | 13cfed388a19dd70984197fb14330d0277ba91e7 (patch) | |
tree | 8e86f3cc4f441b3804741ee9dc6dbb9288269b12 /xlators | |
parent | c8f9437b6ef507a4dc7fe03aa32a9fbf220d09f6 (diff) |
mgmt/glusterd: Implementation of server-side quorum.
Feature-page:
http://www.gluster.org/community/documentation/index.php/Features/Server-quorum
Change-Id: Ifec0f1a697d390a29ba447a09750602fea1b3a4b
BUG: 840122
Signed-off-by: Pranith Kumar K <pranithk@gluster.com>
Diffstat (limited to 'xlators')
-rw-r--r-- | xlators/cluster/dht/src/dht-common.h | 4 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht.c | 4 | ||||
-rw-r--r-- | xlators/cluster/dht/src/nufa.c | 13 | ||||
-rw-r--r-- | xlators/cluster/dht/src/switch.c | 18 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-handler.c | 177 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-op-sm.c | 453 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-sm.c | 35 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-sm.h | 16 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-store.c | 118 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-store.h | 6 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 416 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.h | 31 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volgen.c | 2 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd.c | 41 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd.h | 23 |
15 files changed, 1182 insertions, 175 deletions
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index da83967e76d..d244921279d 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -222,8 +222,8 @@ struct dht_conf { gf_boolean_t search_unhashed; int gen; dht_du_t *du_stats; - uint64_t min_free_disk; - uint32_t min_free_inodes; + double min_free_disk; + double min_free_inodes; char disk_unit; int32_t refresh_interval; gf_boolean_t unhashed_sticky_bit; diff --git a/xlators/cluster/dht/src/dht.c b/xlators/cluster/dht/src/dht.c index 47b387437e4..a678bb9ef04 100644 --- a/xlators/cluster/dht/src/dht.c +++ b/xlators/cluster/dht/src/dht.c @@ -118,8 +118,8 @@ dht_priv_dump (xlator_t *this) gf_proc_dump_write("search_unhashed", "%d", conf->search_unhashed); gf_proc_dump_write("gen", "%d", conf->gen); - gf_proc_dump_write("min_free_disk", "%lu", conf->min_free_disk); - gf_proc_dump_write("min_free_inodes", "%lu", conf->min_free_inodes); + gf_proc_dump_write("min_free_disk", "%lf", conf->min_free_disk); + gf_proc_dump_write("min_free_inodes", "%lf", conf->min_free_inodes); gf_proc_dump_write("disk_unit", "%c", conf->disk_unit); gf_proc_dump_write("refresh_interval", "%d", conf->refresh_interval); gf_proc_dump_write("unhashed_sticky_bit", "%d", conf->unhashed_sticky_bit); diff --git a/xlators/cluster/dht/src/nufa.c b/xlators/cluster/dht/src/nufa.c index 2179870d9ad..1d3e3224763 100644 --- a/xlators/cluster/dht/src/nufa.c +++ b/xlators/cluster/dht/src/nufa.c @@ -504,7 +504,8 @@ init (xlator_t *this) int ret = -1; int i = 0; char my_hostname[256]; - uint32_t temp_free_disk = 0; + double temp_free_disk = 0; + uint64_t size = 0; if (!this->children) { gf_log (this->name, GF_LOG_CRITICAL, @@ -589,16 +590,16 @@ init (xlator_t *this) if (gf_string2percent (temp_str, &temp_free_disk) == 0) { if (temp_free_disk > 100) { - gf_string2bytesize (temp_str, - &conf->min_free_disk); + gf_string2bytesize (temp_str, &size); + conf->min_free_disk = size; conf->disk_unit = 'b'; } else { - conf->min_free_disk = (uint64_t)temp_free_disk; + conf->min_free_disk = temp_free_disk; conf->disk_unit = 'p'; } } else { - gf_string2bytesize (temp_str, - &conf->min_free_disk); + gf_string2bytesize (temp_str, &size); + conf->min_free_disk = size; conf->disk_unit = 'b'; } } diff --git a/xlators/cluster/dht/src/switch.c b/xlators/cluster/dht/src/switch.c index fe75914f247..bc1f5f1f499 100644 --- a/xlators/cluster/dht/src/switch.c +++ b/xlators/cluster/dht/src/switch.c @@ -832,7 +832,8 @@ init (xlator_t *this) char *temp_str = NULL; int ret = -1; int i = 0; - uint32_t temp_free_disk = 0; + double temp_free_disk = 0; + uint64_t size = 0; if (!this->children) { gf_log (this->name, GF_LOG_CRITICAL, @@ -865,24 +866,23 @@ init (xlator_t *this) gf_string2boolean (temp_str, &conf->unhashed_sticky_bit); } - conf->min_free_disk = 10; + conf->min_free_disk = 10.0; conf->disk_unit = 'p'; if (dict_get_str (this->options, "min-free-disk", &temp_str) == 0) { - if (gf_string2percent (temp_str, - &temp_free_disk) == 0) { + if (gf_string2percent (temp_str, &temp_free_disk) == 0) { if (temp_free_disk > 100) { - gf_string2bytesize (temp_str, - &conf->min_free_disk); + gf_string2bytesize (temp_str, &size); + conf->min_free_disk = size; conf->disk_unit = 'b'; } else { - conf->min_free_disk = (uint64_t)temp_free_disk; + conf->min_free_disk = temp_free_disk; conf->disk_unit = 'p'; } } else { - gf_string2bytesize (temp_str, - &conf->min_free_disk); + gf_string2bytesize (temp_str, &size); + conf->min_free_disk = size; conf->disk_unit = 'b'; } } diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c index c4706f930e4..a9c79ce2ab1 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-handler.c @@ -286,19 +286,19 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo, glusterd_brickinfo_t *brickinfo = NULL; char *buf = NULL; int i = 1; - data_pair_t *pairs = NULL; char reconfig_key[256] = {0, }; dict_t *dict = NULL; - data_t *value = NULL; int opt_count = 0; glusterd_conf_t *priv = NULL; char *volume_id_str = NULL; + xlator_t *this = NULL; GF_ASSERT (volinfo); GF_ASSERT (volumes); - priv = THIS->private; + this = THIS; + priv = this->private; GF_ASSERT (priv); @@ -374,22 +374,16 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo, goto out; } - pairs = dict->members_list; - - while (pairs) { - if (1 == glusterd_check_option_exists (pairs->key, NULL)) { - value = pairs->value; - if (!value) - continue; - - snprintf (reconfig_key, 256, "volume%d.option.%s", count, - pairs->key); - ret = dict_set_str (volumes, reconfig_key, value->data); - if (!ret) - opt_count++; - } - pairs = pairs->next; + void _build_option_key (dict_t *d, char *k, data_t *v, void *tmp) + { + snprintf (reconfig_key, 256, "volume%d.option.%s", count, k); + ret = dict_set_str (volumes, reconfig_key, v->data); + if (0 == ret) + opt_count++; + return; } + dict_foreach (dict, _build_option_key, NULL); + dict_foreach (priv->opts, _build_option_key, NULL); snprintf (key, 256, "volume%d.opt_count", count); ret = dict_set_int32 (volumes, key, opt_count); @@ -665,7 +659,10 @@ glusterd_handle_cli_probe (rpcsvc_request_t *req) gf1_cli_probe_req cli_req = {0,}; glusterd_peerinfo_t *peerinfo = NULL; gf_boolean_t run_fsm = _gf_true; + xlator_t *this = NULL; + GF_ASSERT (req); + this = THIS; if (!xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf1_cli_probe_req)) { //failed to decode msg; @@ -674,6 +671,16 @@ glusterd_handle_cli_probe (rpcsvc_request_t *req) goto out; } + if (glusterd_is_any_volume_in_server_quorum (this) && + !does_gd_meet_server_quorum (this)) { + glusterd_xfer_cli_probe_resp (req, -1, GF_PROBE_QUORUM_NOT_MET, + cli_req.hostname, cli_req.port); + gf_log (this->name, GF_LOG_ERROR, "Quorum does not meet, " + "rejecting operation"); + ret = 0; + goto out; + } + gf_cmd_log ("peer probe", " on host %s:%d", cli_req.hostname, cli_req.port); gf_log ("glusterd", GF_LOG_INFO, "Received CLI probe req %s %d", @@ -722,7 +729,7 @@ int glusterd_handle_cli_deprobe (rpcsvc_request_t *req) { int32_t ret = -1; - gf1_cli_deprobe_req cli_req = {0,}; + gf1_cli_deprobe_req cli_req = {0,}; uuid_t uuid = {0}; int op_errno = 0; xlator_t *this = NULL; @@ -755,18 +762,29 @@ glusterd_handle_cli_deprobe (rpcsvc_request_t *req) goto out; } - if (!uuid_is_null (uuid) && !(cli_req.flags & GF_CLI_FLAG_OP_FORCE)) { - /* Check if peers are connected, except peer being detached*/ - if (!glusterd_chk_peers_connected_befriended (uuid)) { - ret = -1; - op_errno = GF_DEPROBE_FRIEND_DOWN; - goto out; + if (!(cli_req.flags & GF_CLI_FLAG_OP_FORCE)) { + if (!uuid_is_null (uuid)) { + /* Check if peers are connected, except peer being detached*/ + if (!glusterd_chk_peers_connected_befriended (uuid)) { + ret = -1; + op_errno = GF_DEPROBE_FRIEND_DOWN; + goto out; + } + ret = glusterd_all_volume_cond_check ( + glusterd_friend_brick_belongs, + -1, &uuid); + if (ret) { + op_errno = GF_DEPROBE_BRICK_EXIST; + goto out; + } } - ret = glusterd_all_volume_cond_check ( - glusterd_friend_brick_belongs, - -1, &uuid); - if (ret) { - op_errno = GF_DEPROBE_BRICK_EXIST; + + if (glusterd_is_any_volume_in_server_quorum (this) && + !does_gd_meet_server_quorum (this)) { + gf_log (this->name, GF_LOG_ERROR, "Quorum does not " + "meet, rejecting operation"); + ret = -1; + op_errno = GF_DEPROBE_QUORUM_NOT_MET; goto out; } } @@ -2157,6 +2175,43 @@ out: } int +glusterd_friend_rpc_create (xlator_t *this, glusterd_peerinfo_t *peerinfo, + glusterd_peerctx_args_t *args) +{ + dict_t *options = NULL; + int ret = -1; + glusterd_peerctx_t *peerctx = NULL; + + peerctx = GF_CALLOC (1, sizeof (*peerctx), gf_gld_mt_peerctx_t); + if (!peerctx) + goto out; + + if (args) + peerctx->args = *args; + + peerctx->peerinfo = peerinfo; + + ret = glusterd_transport_inet_options_build (&options, + peerinfo->hostname, + peerinfo->port); + if (ret) + goto out; + + ret = glusterd_rpc_create (&peerinfo->rpc, options, + glusterd_peer_rpc_notify, peerctx); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "failed to create rpc for" + " peer %s", peerinfo->hostname); + goto out; + } + peerctx = NULL; + ret = 0; +out: + GF_FREE (peerctx); + return ret; +} + +int glusterd_friend_add (const char *hoststr, int port, glusterd_friend_sm_state_t state, uuid_t *uuid, @@ -2167,8 +2222,6 @@ glusterd_friend_add (const char *hoststr, int port, int ret = 0; xlator_t *this = NULL; glusterd_conf_t *conf = NULL; - glusterd_peerctx_t *peerctx = NULL; - dict_t *options = NULL; gf_boolean_t handover = _gf_false; this = THIS; @@ -2176,49 +2229,35 @@ glusterd_friend_add (const char *hoststr, int port, GF_ASSERT (conf); GF_ASSERT (hoststr); - peerctx = GF_CALLOC (1, sizeof (*peerctx), gf_gld_mt_peerctx_t); - if (!peerctx) { - ret = -1; - goto out; - } - - if (args) - peerctx->args = *args; - - ret = glusterd_peerinfo_new (friend, state, uuid, hoststr); + ret = glusterd_peerinfo_new (friend, state, uuid, hoststr, port); if (ret) goto out; - peerctx->peerinfo = *friend; + //restore needs to first create the list of peers, then create rpcs + //to keep track of quorum in race-free manner. In restore for each peer + //rpc-create calls rpc_notify when the friend-list is partially + //constructed, leading to wrong quorum calculations. + if (restore) + goto done; - ret = glusterd_transport_inet_options_build (&options, hoststr, port); - if (ret) - goto out; - - if (!restore) { - ret = glusterd_store_peerinfo (*friend); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Failed to store " - "peerinfo"); - - goto out; - } - } - list_add_tail (&(*friend)->uuid_list, &conf->peers); - ret = glusterd_rpc_create (&(*friend)->rpc, options, - glusterd_peer_rpc_notify, - peerctx); + ret = glusterd_store_peerinfo (*friend); if (ret) { - gf_log (this->name, GF_LOG_ERROR, "failed to create rpc for" - " peer %s", (char*)hoststr); + gf_log (this->name, GF_LOG_ERROR, "Failed to store " + "peerinfo"); + goto out; } + ret = glusterd_friend_rpc_create (this, *friend, args); + if (ret) + goto out; +done: + list_add_tail (&(*friend)->uuid_list, &conf->peers); handover = _gf_true; out: if (ret && !handover) { - (void) glusterd_friend_cleanup (*friend); - *friend = NULL; + (void) glusterd_friend_cleanup (*friend); + *friend = NULL; } gf_log (this->name, GF_LOG_INFO, "connect returned %d", ret); @@ -2880,6 +2919,7 @@ glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata, glusterd_peerctx_t *peerctx = NULL; uuid_t owner = {0,}; uuid_t *peer_uuid = NULL; + gf_boolean_t quorum_action = _gf_false; peerctx = mydata; if (!peerctx) @@ -2894,6 +2934,7 @@ glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata, { gf_log (this->name, GF_LOG_DEBUG, "got RPC_CLNT_CONNECT"); peerinfo->connected = 1; + peerinfo->quorum_action = _gf_true; ret = glusterd_peer_handshake (this, rpc, peerctx); if (ret) @@ -2906,6 +2947,12 @@ glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata, gf_log (this->name, GF_LOG_DEBUG, "got RPC_CLNT_DISCONNECT %d", peerinfo->state.state); + if ((peerinfo->quorum_contrib != QUORUM_DOWN) && + (peerinfo->state.state == GD_FRIEND_STATE_BEFRIENDED)) { + peerinfo->quorum_contrib = QUORUM_DOWN; + quorum_action = _gf_true; + peerinfo->quorum_action = _gf_false; + } peerinfo->connected = 0; /* @@ -2954,6 +3001,8 @@ glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata, glusterd_friend_sm (); glusterd_op_sm (); + if (quorum_action) + glusterd_do_quorum_action (); return ret; } diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index 2d56778102f..a46b470f0a1 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -56,6 +56,24 @@ #include <signal.h> #include <sys/wait.h> +#define ALL_VOLUME_OPTION_CHECK(volname, key, ret, op_errstr, label) \ + do { \ + gf_boolean_t _all = !strcmp ("all", volname); \ + gf_boolean_t _ratio = !strcmp (key, \ + GLUSTERD_QUORUM_RATIO_KEY); \ + if (_all && !_ratio) { \ + ret = -1; \ + *op_errstr = gf_strdup ("Not a valid option for all " \ + "volumes"); \ + goto label; \ + } else if (!_all && _ratio) { \ + ret = -1; \ + *op_errstr = gf_strdup ("Not a valid option for " \ + "single volume"); \ + goto label; \ + } \ + } while (0) + static struct list_head gd_op_sm_queue; pthread_mutex_t gd_op_sm_lock; glusterd_op_info_t opinfo = {{0},}; @@ -301,6 +319,24 @@ out: } static int +glusterd_validate_quorum_options (xlator_t *this, char *fullkey, char *value, + char **op_errstr) +{ + int ret = 0; + char *key = NULL; + volume_option_t *opt = NULL; + + if (!glusterd_is_quorum_option (fullkey)) + goto out; + key = strchr (fullkey, '.'); + key++; + opt = xlator_volume_option_get (this, key); + ret = xlator_option_validate (this, key, value, opt, op_errstr); +out: + return ret; +} + +static int glusterd_op_stage_set_volume (dict_t *dict, char **op_errstr) { int ret = 0; @@ -319,6 +355,7 @@ glusterd_op_stage_set_volume (dict_t *dict, char **op_errstr) glusterd_volinfo_t *voliter = NULL; glusterd_conf_t *priv = NULL; xlator_t *this = NULL; + gf_boolean_t all_vol = _gf_false; GF_ASSERT (dict); this = THIS; @@ -371,26 +408,30 @@ glusterd_op_stage_set_volume (dict_t *dict, char **op_errstr) goto out; } - exists = glusterd_check_volume_exists (volname); - if (!exists) { - snprintf (errstr, sizeof (errstr), "Volume %s does not exist", - volname); - gf_log (this->name, GF_LOG_ERROR, "%s", errstr); - *op_errstr = gf_strdup (errstr); - ret = -1; - goto out; - } + if (strcasecmp (volname, "all") != 0) { + exists = glusterd_check_volume_exists (volname); + if (!exists) { + snprintf (errstr, sizeof (errstr), "Volume %s does " + "not exist", volname); + gf_log (this->name, GF_LOG_ERROR, "%s", errstr); + *op_errstr = gf_strdup (errstr); + ret = -1; + goto out; + } - ret = glusterd_volinfo_find (volname, &volinfo); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "Unable to allocate memory"); - goto out; - } + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to allocate memory"); + goto out; + } - ret = glusterd_validate_volume_id (dict, volinfo); - if (ret) - goto out; + ret = glusterd_validate_volume_id (dict, volinfo); + if (ret) + goto out; + } else { + all_vol = _gf_true; + } for ( count = 1; ret != 1 ; count++ ) { global_opt = _gf_false; @@ -420,6 +461,7 @@ glusterd_op_stage_set_volume (dict_t *dict, char **op_errstr) ret = -1; goto out; } + if (!exists) { gf_log (this->name, GF_LOG_ERROR, "Option with name: %s " @@ -438,6 +480,12 @@ glusterd_op_stage_set_volume (dict_t *dict, char **op_errstr) if (key_fixed) key = key_fixed; + ALL_VOLUME_OPTION_CHECK (volname, key, ret, op_errstr, out); + ret = glusterd_validate_quorum_options (this, key, value, + op_errstr); + if (ret) + goto out; + ret = glusterd_check_globaloption (key); if (ret) global_opt = _gf_true; @@ -452,9 +500,9 @@ glusterd_op_stage_set_volume (dict_t *dict, char **op_errstr) } *op_errstr = NULL; - if (!global_opt) + if (!global_opt && !all_vol) ret = glusterd_validate_reconfopts (volinfo, val_dict, op_errstr); - else { + else if (!all_vol) { voliter = NULL; list_for_each_entry (voliter, &priv->volumes, vol_list) { ret = glusterd_validate_globalopts (voliter, val_dict, op_errstr); @@ -476,7 +524,6 @@ glusterd_op_stage_set_volume (dict_t *dict, char **op_errstr) } } - ret = 0; out: @@ -518,23 +565,24 @@ glusterd_op_stage_reset_volume (dict_t *dict, char **op_errstr) goto out; } - exists = glusterd_check_volume_exists (volname); + if (strcasecmp (volname, "all") != 0) { + exists = glusterd_check_volume_exists (volname); + if (!exists) { + snprintf (msg, sizeof (msg), "Volume %s does not " + "exist", volname); + gf_log ("", GF_LOG_ERROR, "%s", msg); + *op_errstr = gf_strdup (msg); + ret = -1; + goto out; + } + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) + goto out; - if (!exists) { - snprintf (msg, sizeof (msg), "Volume %s does not " - "exist", volname); - gf_log ("", GF_LOG_ERROR, "%s", msg); - *op_errstr = gf_strdup (msg); - ret = -1; - goto out; + ret = glusterd_validate_volume_id (dict, volinfo); + if (ret) + goto out; } - ret = glusterd_volinfo_find (volname, &volinfo); - if (ret) - goto out; - - ret = glusterd_validate_volume_id (dict, volinfo); - if (ret) - goto out; ret = dict_get_str (dict, "key", &key); if (ret) { @@ -558,6 +606,11 @@ glusterd_op_stage_reset_volume (dict_t *dict, char **op_errstr) *op_errstr = gf_strdup (msg); ret = -1; goto out; + } else if (exists > 0) { + if (key_fixed) + key = key_fixed; + ALL_VOLUME_OPTION_CHECK (volname, key, ret, + op_errstr, out); } } @@ -899,6 +952,22 @@ out: return; } +void +_delete_reconfig_global_opt (dict_t *this, char *key, data_t *value, void *data) +{ + int32_t *is_force = 0; + + GF_ASSERT (data); + is_force = (int32_t*)data; + + if (strcmp (GLUSTERD_GLOBAL_OPT_VERSION, key) == 0) + goto out; + + _delete_reconfig_opt (this, key, value, data); +out: + return; +} + int glusterd_options_reset (glusterd_volinfo_t *volinfo, char *key, int32_t is_force) @@ -949,6 +1018,91 @@ out: return ret; } +static int +glusterd_op_reset_all_volume_options (xlator_t *this, dict_t *dict) +{ + char *key = NULL; + char *key_fixed = NULL; + int ret = -1; + int32_t is_force = 0; + glusterd_conf_t *conf = NULL; + dict_t *dup_opt = NULL; + gf_boolean_t all = _gf_false; + char *next_version = NULL; + gf_boolean_t quorum_action = _gf_false; + + conf = this->private; + ret = dict_get_str (dict, "key", &key); + if (ret) + goto out; + + ret = dict_get_int32 (dict, "force", &is_force); + if (ret) + is_force = 0; + + if (strcmp (key, "all")) { + ret = glusterd_check_option_exists (key, &key_fixed); + if (ret <= 0) { + gf_log (this->name, GF_LOG_ERROR, "Invalid key %s", + key); + ret = -1; + goto out; + } + } else { + all = _gf_true; + } + + if (key_fixed) + key = key_fixed; + + ret = -1; + dup_opt = dict_new (); + if (!dup_opt) + goto out; + if (!all) { + dict_copy (conf->opts, dup_opt); + dict_del (dup_opt, key); + } + ret = glusterd_get_next_global_opt_version_str (conf->opts, + &next_version); + if (ret) + goto out; + + ret = dict_set_str (dup_opt, GLUSTERD_GLOBAL_OPT_VERSION, next_version); + if (ret) + goto out; + + ret = glusterd_store_options (this, dup_opt); + if (ret) + goto out; + + if (glusterd_is_quorum_changed (conf->opts, key, NULL)) + quorum_action = _gf_true; + + ret = dict_set_dynstr (conf->opts, GLUSTERD_GLOBAL_OPT_VERSION, + next_version); + if (ret) + goto out; + else + next_version = NULL; + + if (!all) { + dict_del (conf->opts, key); + } else { + dict_foreach (conf->opts, _delete_reconfig_global_opt, + &is_force); + } +out: + GF_FREE (key_fixed); + if (dup_opt) + dict_unref (dup_opt); + + gf_log (this->name, GF_LOG_DEBUG, "returning %d", ret); + if (quorum_action) + glusterd_do_quorum_action (); + GF_FREE (next_version); + return ret; +} static int glusterd_op_reset_volume (dict_t *dict) @@ -957,14 +1111,23 @@ glusterd_op_reset_volume (dict_t *dict) int ret = -1; char *volname = NULL; char *key = NULL; + char *key_fixed = NULL; int32_t is_force = 0; + gf_boolean_t quorum_action = _gf_false; + xlator_t *this = NULL; + this = THIS; ret = dict_get_str (dict, "volname", &volname); if (ret) { gf_log ("", GF_LOG_ERROR, "Unable to get volume name " ); goto out; } + if (strcasecmp (volname, "all") == 0) { + ret = glusterd_op_reset_all_volume_options (this, dict); + goto out; + } + ret = dict_get_int32 (dict, "force", &is_force); if (ret) is_force = 0; @@ -981,9 +1144,26 @@ glusterd_op_reset_volume (dict_t *dict) goto out; } + if (strcmp (key, "all") && + glusterd_check_option_exists (key, &key_fixed) != 1) { + gf_log ("glusterd", GF_LOG_ERROR, + "volinfo dict inconsistency: option %s not found", + key); + ret = -1; + goto out; + } + if (key_fixed) + key = key_fixed; + + if (glusterd_is_quorum_changed (volinfo->dict, key, NULL)) + quorum_action = _gf_true; + ret = glusterd_options_reset (volinfo, key, is_force); out: + GF_FREE (key_fixed); + if (quorum_action) + glusterd_do_quorum_action (); gf_log ("", GF_LOG_DEBUG, "'volume reset' returning %d", ret); return ret; @@ -1036,6 +1216,91 @@ glusterd_volset_help (dict_t *dict) } static int +glusterd_op_set_all_volume_options (xlator_t *this, dict_t *dict) +{ + char *key = NULL; + char *key_fixed = NULL; + char *value = NULL; + char *dup_value = NULL; + int ret = -1; + glusterd_conf_t *conf = NULL; + dict_t *dup_opt = NULL; + char *next_version = NULL; + gf_boolean_t quorum_action = _gf_false; + + conf = this->private; + ret = dict_get_str (dict, "key1", &key); + if (ret) + goto out; + + ret = dict_get_str (dict, "value1", &value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "invalid key,value pair in 'volume set'"); + goto out; + } + ret = glusterd_check_option_exists (key, &key_fixed); + if (ret <= 0) { + gf_log (this->name, GF_LOG_ERROR, "Invalid key %s", key); + ret = -1; + goto out; + } + + if (key_fixed) + key = key_fixed; + + ret = -1; + dup_opt = dict_new (); + if (!dup_opt) + goto out; + dict_copy (conf->opts, dup_opt); + ret = dict_set_str (dup_opt, key, value); + if (ret) + goto out; + + ret = glusterd_get_next_global_opt_version_str (conf->opts, + &next_version); + if (ret) + goto out; + + ret = dict_set_str (dup_opt, GLUSTERD_GLOBAL_OPT_VERSION, next_version); + if (ret) + goto out; + + dup_value = gf_strdup (value); + if (!dup_value) + goto out; + + ret = glusterd_store_options (this, dup_opt); + if (ret) + goto out; + + if (glusterd_is_quorum_changed (conf->opts, key, value)) + quorum_action = _gf_true; + + ret = dict_set_dynstr (conf->opts, GLUSTERD_GLOBAL_OPT_VERSION, + next_version); + if (ret) + goto out; + else + next_version = NULL; + + ret = dict_set_dynstr (conf->opts, key, dup_value); + if (ret) + goto out; +out: + GF_FREE (key_fixed); + if (dup_opt) + dict_unref (dup_opt); + + gf_log (this->name, GF_LOG_DEBUG, "returning %d", ret); + if (quorum_action) + glusterd_do_quorum_action (); + GF_FREE (next_version); + return ret; +} + +static int glusterd_op_set_volume (dict_t *dict) { int ret = 0; @@ -1051,6 +1316,7 @@ glusterd_op_set_volume (dict_t *dict) gf_boolean_t global_opt = _gf_false; glusterd_volinfo_t *voliter = NULL; int32_t dict_count = 0; + gf_boolean_t quorum_action = _gf_false; this = THIS; GF_ASSERT (this); @@ -1078,6 +1344,11 @@ glusterd_op_set_volume (dict_t *dict) goto out; } + if (strcasecmp (volname, "all") == 0) { + ret = glusterd_op_set_all_volume_options (this, dict); + goto out; + } + ret = glusterd_volinfo_find (volname, &volinfo); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Unable to allocate memory"); @@ -1130,6 +1401,9 @@ glusterd_op_set_volume (dict_t *dict) if (key_fixed) key = key_fixed; + if (glusterd_is_quorum_changed (volinfo->dict, key, value)) + quorum_action = _gf_true; + if (global_opt) { list_for_each_entry (voliter, &priv->volumes, vol_list) { value = gf_strdup (value); @@ -1211,6 +1485,8 @@ glusterd_op_set_volume (dict_t *dict) if (key_fixed) GF_FREE (key_fixed); gf_log (this->name, GF_LOG_DEBUG, "returning %d", ret); + if (quorum_action) + glusterd_do_quorum_action (); return ret; } @@ -1737,20 +2013,26 @@ glusterd_op_ac_lock (glusterd_op_sm_event_t *event, void *ctx) static int glusterd_op_ac_unlock (glusterd_op_sm_event_t *event, void *ctx) { - int ret = 0; + int ret = 0; glusterd_op_lock_ctx_t *lock_ctx = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; GF_ASSERT (event); GF_ASSERT (ctx); + this = THIS; + priv = this->private; lock_ctx = (glusterd_op_lock_ctx_t *)ctx; ret = glusterd_unlock (lock_ctx->uuid); - gf_log ("", GF_LOG_DEBUG, "Unlock Returned %d", ret); + gf_log (this->name, GF_LOG_DEBUG, "Unlock Returned %d", ret); glusterd_op_unlock_send_resp (lock_ctx->req, ret); + if (priv->pending_quorum_action) + glusterd_do_quorum_action (); return ret; } @@ -1894,7 +2176,8 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr) goto out; } if (strcmp (volname, "help") && - strcmp (volname, "help-xml")) { + strcmp (volname, "help-xml") && + strcasecmp (volname, "all")) { ret = glusterd_dict_set_volid (dict, volname, op_errstr); if (ret) @@ -1947,10 +2230,13 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr) goto out; } - ret = glusterd_dict_set_volid (dict, volname, - op_errstr); - if (ret) - goto out; + if (strcasecmp (volname, "all")) { + ret = glusterd_dict_set_volid (dict, + volname, + op_errstr); + if (ret) + goto out; + } dict_copy (dict, req_dict); } break; @@ -1966,6 +2252,78 @@ out: return ret; } +gf_boolean_t +glusterd_is_op_quorum_validation_required (xlator_t *this, glusterd_op_t op, + dict_t *dict) +{ + gf_boolean_t required = _gf_true; + char *key = NULL; + char *key_fixed = NULL; + int ret = -1; + + if (op == GD_OP_STATUS_VOLUME) { + required = _gf_false; + goto out; + } + if ((op != GD_OP_SET_VOLUME) && (op != GD_OP_RESET_VOLUME)) + goto out; + if (op == GD_OP_SET_VOLUME) + ret = dict_get_str (dict, "key1", &key); + else if (op == GD_OP_RESET_VOLUME) + ret = dict_get_str (dict, "key", &key); + ret = glusterd_check_option_exists (key, &key_fixed); + if (ret <= 0) + goto out; + if (key_fixed) + key = key_fixed; + if (glusterd_is_quorum_option (key)) + required = _gf_false; +out: + GF_FREE (key_fixed); + return required; +} + +static int +glusterd_op_validate_quorum (xlator_t *this, glusterd_op_t op, + dict_t *dict, char **op_errstr) +{ + int ret = 0; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + char *errstr = NULL; + + + errstr = "Quorum not met. Volume operation not allowed."; + if (!glusterd_is_op_quorum_validation_required (this, op, dict)) + goto out; + + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + ret = 0; + goto out; + } + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + ret = 0; + goto out; + } + + if (does_gd_meet_server_quorum (this)) { + ret = 0; + goto out; + } + + if (glusterd_is_volume_in_server_quorum (volinfo)) { + ret = -1; + *op_errstr = gf_strdup (errstr); + goto out; + } + ret = 0; +out: + return ret; +} + static int glusterd_op_ac_send_stage_op (glusterd_op_sm_event_t *event, void *ctx) { @@ -1993,6 +2351,13 @@ glusterd_op_ac_send_stage_op (glusterd_op_sm_event_t *event, void *ctx) goto out; } + ret = glusterd_op_validate_quorum (this, op, dict, &op_errstr); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, op_errstr); + opinfo.op_errstr = op_errstr; + goto out; + } + /* rsp_dict NULL from source */ ret = glusterd_op_stage_validate (op, dict, &op_errstr, NULL); if (ret) { @@ -2668,6 +3033,8 @@ glusterd_op_txn_complete () GF_FREE (op_errstr); + if (priv->pending_quorum_action) + glusterd_do_quorum_action (); gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); return ret; } diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.c b/xlators/mgmt/glusterd/src/glusterd-sm.c index 72af8c05b57..62a096bbf82 100644 --- a/xlators/mgmt/glusterd/src/glusterd-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-sm.c @@ -974,6 +974,27 @@ glusterd_destroy_friend_event_context (glusterd_friend_sm_event_t *event) } } +gf_boolean_t +gd_does_peer_affect_quorum (glusterd_friend_sm_state_t old_state, + glusterd_friend_sm_event_type_t event_type, + glusterd_peerinfo_t *peerinfo) +{ + gf_boolean_t affects = _gf_false; + + //When glusterd comes up with friends in BEFRIENDED state in store, + //wait until compare-data happens. + if ((old_state == GD_FRIEND_STATE_BEFRIENDED) && + (event_type != GD_FRIEND_EVENT_RCVD_ACC) && + (event_type != GD_FRIEND_EVENT_LOCAL_ACC)) + goto out; + if ((peerinfo->state.state == GD_FRIEND_STATE_BEFRIENDED) + && peerinfo->connected) { + affects = _gf_true; + } +out: + return affects; +} + int glusterd_friend_sm () { @@ -985,6 +1006,8 @@ glusterd_friend_sm () glusterd_peerinfo_t *peerinfo = NULL; glusterd_friend_sm_event_type_t event_type = 0; gf_boolean_t is_await_conn = _gf_false; + gf_boolean_t quorum_action = _gf_false; + glusterd_friend_sm_state_t old_state = GD_FRIEND_STATE_DEFAULT; while (!list_empty (&gd_friend_sm_queue)) { list_for_each_entry_safe (event, tmp, &gd_friend_sm_queue, list) { @@ -1004,6 +1027,7 @@ glusterd_friend_sm () glusterd_friend_sm_event_name_get (event_type)); + old_state = peerinfo->state.state; state = glusterd_friend_state_table[peerinfo->state.state]; GF_ASSERT (state); @@ -1044,6 +1068,15 @@ glusterd_friend_sm () goto out; } + if (gd_does_peer_affect_quorum (old_state, event_type, + peerinfo)) { + peerinfo->quorum_contrib = QUORUM_UP; + if (peerinfo->quorum_action) { + peerinfo->quorum_action = _gf_false; + quorum_action = _gf_true; + } + } + ret = glusterd_store_peerinfo (peerinfo); glusterd_destroy_friend_event_context (event); @@ -1057,6 +1090,8 @@ glusterd_friend_sm () ret = 0; out: + if (quorum_action) + glusterd_do_quorum_action (); return ret; } diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.h b/xlators/mgmt/glusterd/src/glusterd-sm.h index 7baaf023602..0c1c4804a33 100644 --- a/xlators/mgmt/glusterd/src/glusterd-sm.h +++ b/xlators/mgmt/glusterd/src/glusterd-sm.h @@ -46,6 +46,20 @@ struct glusterd_store_handle_ { typedef struct glusterd_store_handle_ glusterd_store_handle_t; +typedef enum gd_quorum_contribution_ { + QUORUM_NONE, + QUORUM_WAITING, + QUORUM_DOWN, + QUORUM_UP +} gd_quorum_contrib_t; + +typedef enum gd_quorum_status_ { + QUORUM_UNKNOWN, + QUORUM_NOT_APPLICABLE, + QUORUM_MEETS, + QUORUM_DOES_NOT_MEET +} gd_quorum_status_t; + typedef enum glusterd_friend_sm_state_ { GD_FRIEND_STATE_DEFAULT = 0, GD_FRIEND_STATE_REQ_SENT, @@ -101,6 +115,8 @@ struct glusterd_peerinfo_ { int connected; glusterd_store_handle_t *shandle; glusterd_sm_tr_log_t sm_log; + gf_boolean_t quorum_action; + gd_quorum_contrib_t quorum_contrib; }; typedef struct glusterd_peerinfo_ glusterd_peerinfo_t; diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c index f5dd024b876..cc7b54a18fc 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.c +++ b/xlators/mgmt/glusterd/src/glusterd-store.c @@ -1647,10 +1647,13 @@ glusterd_store_iter_destroy (glusterd_store_iter_t *iter) { int32_t ret = -1; - GF_ASSERT (iter); - GF_ASSERT (iter->fd > 0); + if (!iter) + return 0; - ret = fclose (iter->file); + if (iter->file) + ret = fclose (iter->file); + else + ret = 0; if (ret) { gf_log ("", GF_LOG_ERROR, "Unable to close fd: %d, ret: %d, " @@ -1923,7 +1926,6 @@ glusterd_store_retrieve_node_state (char *volname) ret = glusterd_store_handle_retrieve (path, &volinfo->node_state_shandle); - if (ret) goto out; @@ -1935,6 +1937,7 @@ glusterd_store_retrieve_node_state (char *volname) ret = glusterd_store_iter_get_next (iter, &key, &value, &op_errno); if (ret) goto out; + if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_DEFRAG, strlen (GLUSTERD_STORE_KEY_VOL_DEFRAG))) { volinfo->defrag_cmd = atoi (value); @@ -2148,6 +2151,102 @@ out: return ret; } +inline void +glusterd_store_set_options_path (glusterd_conf_t *conf, char *path, size_t len) +{ + snprintf (path, len, "%s/options", conf->workdir); +} + +void +_store_global_opts (dict_t *this, char *key, data_t *value, void *data) +{ + glusterd_store_handle_t *shandle = data; + + glusterd_store_save_value (shandle->fd, key, (char*)value->data); + return; +} + +int32_t +glusterd_store_options (xlator_t *this, dict_t *opts) +{ + glusterd_store_handle_t *shandle = NULL; + glusterd_conf_t *conf = NULL; + char path[PATH_MAX] = {0}; + int fd = -1; + int32_t ret = -1; + + conf = this->private; + glusterd_store_set_options_path (conf, path, sizeof (path)); + + ret = glusterd_store_handle_new (path, &shandle); + if (ret) + goto out; + + fd = glusterd_store_mkstemp (shandle); + if (fd <= 0) { + ret = -1; + goto out; + } + + shandle->fd = fd; + dict_foreach (opts, _store_global_opts, shandle); + shandle->fd = 0; + ret = glusterd_store_rename_tmppath (shandle); + if (ret) + goto out; +out: + glusterd_store_handle_destroy (shandle); + if (fd >=0 ) + close (fd); + return ret; +} + +int32_t +glusterd_store_retrieve_options (xlator_t *this) +{ + char path[PATH_MAX] = {0}; + glusterd_conf_t *conf = NULL; + glusterd_store_handle_t *shandle = NULL; + glusterd_store_iter_t *iter = NULL; + char *key = NULL; + char *value = NULL; + glusterd_store_op_errno_t op_errno = 0; + int ret = -1; + + conf = this->private; + glusterd_store_set_options_path (conf, path, sizeof (path)); + + ret = glusterd_store_handle_retrieve (path, &shandle); + if (ret) + goto out; + + ret = glusterd_store_iter_new (shandle, &iter); + if (ret) + goto out; + + ret = glusterd_store_iter_get_next (iter, &key, &value, &op_errno); + while (!ret) { + ret = dict_set_dynstr (conf->opts, key, value); + if (ret) { + GF_FREE (key); + GF_FREE (value); + goto out; + } + GF_FREE (key); + key = NULL; + value = NULL; + + ret = glusterd_store_iter_get_next (iter, &key, &value, + &op_errno); + } + if (op_errno != GD_STORE_EOF) + goto out; + ret = 0; +out: + glusterd_store_iter_destroy (iter); + glusterd_store_handle_destroy (shandle); + return ret; +} int32_t glusterd_store_retrieve_volumes (xlator_t *this) @@ -2545,9 +2644,8 @@ glusterd_store_retrieve_peers (xlator_t *this) (void) glusterd_store_iter_destroy (iter); - args.mode = GD_MODE_SWITCH_ON; ret = glusterd_friend_add (hostname, 0, state, &uuid, - &peerinfo, 1, &args); + &peerinfo, 1, NULL); GF_FREE (hostname); if (ret) @@ -2557,6 +2655,13 @@ glusterd_store_retrieve_peers (xlator_t *this) glusterd_for_each_entry (entry, dir); } + args.mode = GD_MODE_ON; + list_for_each_entry (peerinfo, &priv->peers, uuid_list) { + ret = glusterd_friend_rpc_create (this, peerinfo, &args); + if (ret) + goto out; + } + out: if (dir) closedir (dir); @@ -2604,7 +2709,6 @@ glusterd_restore () this = THIS; ret = glusterd_store_retrieve_volumes (this); - if (ret) goto out; diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h index b2cceb393bd..41ac1ed9925 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.h +++ b/xlators/mgmt/glusterd/src/glusterd-store.h @@ -143,4 +143,10 @@ glusterd_store_is_valid_brickpath (char *volname, char *brick); int32_t glusterd_store_perform_node_state_store (glusterd_volinfo_t *volinfo); + +int32_t +glusterd_store_retrieve_options (xlator_t *this); + +int32_t +glusterd_store_options (xlator_t *this, dict_t *opts); #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 458605c9651..a7a48b6b5eb 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -945,12 +945,10 @@ glusterd_volinfo_find (char *volname, glusterd_volinfo_t **volinfo) } } - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); return ret; } - int32_t glusterd_service_stop (const char *service, char *pidfile, int sig, gf_boolean_t force_kill) @@ -1585,25 +1583,25 @@ out: } void -_add_volinfo_dict_to_prdict (dict_t *this, char *key, data_t *value, void *data) +_add_dict_to_prdict (dict_t *this, char *key, data_t *value, void *data) { - glusterd_voldict_ctx_t *ctx = NULL; + glusterd_dict_ctx_t *ctx = NULL; char optkey[512] = {0,}; int ret = -1; ctx = data; - snprintf (optkey, sizeof (optkey), "volume%d.%s%d", ctx->count, + snprintf (optkey, sizeof (optkey), "%s.%s%d", ctx->prefix, ctx->key_name, ctx->opt_count); ret = dict_set_str (ctx->dict, optkey, key); if (ret) gf_log ("", GF_LOG_ERROR, "option add for %s%d %s", - ctx->key_name, ctx->count, key); - snprintf (optkey, sizeof (optkey), "volume%d.%s%d", ctx->count, + ctx->key_name, ctx->opt_count, key); + snprintf (optkey, sizeof (optkey), "%s.%s%d", ctx->prefix, ctx->val_name, ctx->opt_count); ret = dict_set_str (ctx->dict, optkey, value->data); if (ret) gf_log ("", GF_LOG_ERROR, "option add for %s%d %s", - ctx->val_name, ctx->count, value->data); + ctx->val_name, ctx->opt_count, value->data); ctx->opt_count++; return; @@ -1641,6 +1639,7 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, dict_t *dict, int32_t count) { int32_t ret = -1; + char prefix[512] = {0,}; char key[512] = {0,}; glusterd_brickinfo_t *brickinfo = NULL; int32_t i = 1; @@ -1648,7 +1647,7 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, char *src_brick = NULL; char *dst_brick = NULL; char *str = NULL; - glusterd_voldict_ctx_t ctx = {0}; + glusterd_dict_ctx_t ctx = {0}; GF_ASSERT (dict); GF_ASSERT (volinfo); @@ -1780,14 +1779,15 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, goto out; } + snprintf (prefix, sizeof (prefix), "volume%d", count); ctx.dict = dict; - ctx.count = count; + ctx.prefix = prefix; ctx.opt_count = 1; ctx.key_name = "key"; ctx.val_name = "value"; GF_ASSERT (volinfo->dict); - dict_foreach (volinfo->dict, _add_volinfo_dict_to_prdict, &ctx); + dict_foreach (volinfo->dict, _add_dict_to_prdict, &ctx); ctx.opt_count--; memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "volume%d.opt-count", count); @@ -1796,13 +1796,13 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, goto out; ctx.dict = dict; - ctx.count = count; + ctx.prefix = prefix; ctx.opt_count = 1; ctx.key_name = "slave-num"; ctx.val_name = "slave-val"; GF_ASSERT (volinfo->gsync_slaves); - dict_foreach (volinfo->gsync_slaves, _add_volinfo_dict_to_prdict, &ctx); + dict_foreach (volinfo->gsync_slaves, _add_dict_to_prdict, &ctx); ctx.opt_count--; memset (key, 0, sizeof (key)); @@ -1844,6 +1844,7 @@ glusterd_build_volume_dict (dict_t **vols) glusterd_conf_t *priv = NULL; glusterd_volinfo_t *volinfo = NULL; int32_t count = 0; + glusterd_dict_ctx_t ctx = {0}; priv = THIS->private; @@ -1864,6 +1865,17 @@ glusterd_build_volume_dict (dict_t **vols) if (ret) goto out; + ctx.dict = dict; + ctx.prefix = "global"; + ctx.opt_count = 1; + ctx.key_name = "key"; + ctx.val_name = "val"; + dict_foreach (priv->opts, _add_dict_to_prdict, &ctx); + ctx.opt_count--; + ret = dict_set_int32 (dict, "global-opt-count", ctx.opt_count); + if (ret) + goto out; + *vols = dict; out: gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); @@ -1945,8 +1957,8 @@ out: } static int32_t -import_prdict_volinfo_dict (dict_t *vols, dict_t *dst_dict, char *key_prefix, - char *value_prefix, int opt_count, int count) +import_prdict_dict (dict_t *vols, dict_t *dst_dict, char *key_prefix, + char *value_prefix, int opt_count, char *prefix) { char key[512] = {0,}; int32_t ret = 0; @@ -1958,8 +1970,8 @@ import_prdict_volinfo_dict (dict_t *vols, dict_t *dst_dict, char *key_prefix, while (i <= opt_count) { memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.%s%d", - count, key_prefix, i); + snprintf (key, sizeof (key), "%s.%s%d", + prefix, key_prefix, i); ret = dict_get_str (vols, key, &opt_key); if (ret) { snprintf (msg, sizeof (msg), "Volume dict key not " @@ -1968,8 +1980,8 @@ import_prdict_volinfo_dict (dict_t *vols, dict_t *dst_dict, char *key_prefix, } memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.%s%d", - count, value_prefix, i); + snprintf (key, sizeof (key), "%s.%s%d", + prefix, value_prefix, i); ret = dict_get_str (vols, key, &opt_val); if (ret) { snprintf (msg, sizeof (msg), "Volume dict value not " @@ -1998,6 +2010,245 @@ out: } +gf_boolean_t +glusterd_is_quorum_option (char *option) +{ + gf_boolean_t res = _gf_false; + int i = 0; + char *keys[] = {GLUSTERD_QUORUM_TYPE_KEY, + GLUSTERD_QUORUM_RATIO_KEY, NULL}; + + for (i = 0; keys[i]; i++) { + if (strcmp (option, keys[i]) == 0) { + res = _gf_true; + break; + } + } + return res; +} + +gf_boolean_t +glusterd_is_quorum_changed (dict_t *options, char *option, char *value) +{ + int ret = 0; + gf_boolean_t reconfigured = _gf_false; + gf_boolean_t all = _gf_false; + char *oldquorum = NULL; + char *newquorum = NULL; + char *oldratio = NULL; + char *newratio = NULL; + + if ((strcmp ("all", option) != 0) && + !glusterd_is_quorum_option (option)) + goto out; + + if (strcmp ("all", option) == 0) + all = _gf_true; + + if (all || (strcmp (GLUSTERD_QUORUM_TYPE_KEY, option) == 0)) { + newquorum = value; + ret = dict_get_str (options, GLUSTERD_QUORUM_TYPE_KEY, + &oldquorum); + } + + if (all || (strcmp (GLUSTERD_QUORUM_RATIO_KEY, option) == 0)) { + newratio = value; + ret = dict_get_str (options, GLUSTERD_QUORUM_RATIO_KEY, + &oldratio); + } + + reconfigured = _gf_true; + + if (oldquorum && newquorum && (strcmp (oldquorum, newquorum) == 0)) + reconfigured = _gf_false; + if (oldratio && newratio && (strcmp (oldratio, newratio) == 0)) + reconfigured = _gf_false; + + if ((oldratio == NULL) && (newratio == NULL) && (oldquorum == NULL) && + (newquorum == NULL)) + reconfigured = _gf_false; +out: + return reconfigured; +} + +static inline gf_boolean_t +_is_contributing_to_quorum (gd_quorum_contrib_t contrib) +{ + if ((contrib == QUORUM_UP) || (contrib == QUORUM_DOWN)) + return _gf_true; + return _gf_false; +} + +static inline gf_boolean_t +_does_quorum_meet (int active_count, int quorum_count) +{ + return (active_count >= quorum_count); +} + +int +glusterd_get_quorum_cluster_counts (xlator_t *this, int *active_count, + int *quorum_count) +{ + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_conf_t *conf = NULL; + int ret = -1; + int inquorum_count = 0; + char *val = NULL; + double quorum_percentage = 0.0; + gf_boolean_t ratio = _gf_false; + + conf = this->private; + //Start with counting self + inquorum_count = 1; + if (active_count) + *active_count = 1; + list_for_each_entry (peerinfo, &conf->peers, uuid_list) { + if (peerinfo->quorum_contrib == QUORUM_WAITING) + goto out; + + if (_is_contributing_to_quorum (peerinfo->quorum_contrib)) + inquorum_count = inquorum_count + 1; + + if (active_count && (peerinfo->quorum_contrib == QUORUM_UP)) + *active_count = *active_count + 1; + } + + ret = dict_get_str (conf->opts, GLUSTERD_QUORUM_RATIO_KEY, &val); + if (ret == 0) { + ratio = _gf_true; + (void)gf_string2double(val, &quorum_percentage); + } + if (ratio) + *quorum_count = (inquorum_count * quorum_percentage / 100.0); + else + *quorum_count = (inquorum_count * 50 / 100) + 1; + + ret = 0; +out: + return ret; +} + +gf_boolean_t +glusterd_is_volume_in_server_quorum (glusterd_volinfo_t *volinfo) +{ + gf_boolean_t res = _gf_false; + char *quorum_type = NULL; + int ret = 0; + + ret = dict_get_str (volinfo->dict, GLUSTERD_QUORUM_TYPE_KEY, + &quorum_type); + if (ret) + goto out; + + if (strcmp (quorum_type, GLUSTERD_SERVER_QUORUM) == 0) + res = _gf_true; +out: + return res; +} + +gf_boolean_t +glusterd_is_any_volume_in_server_quorum (xlator_t *this) +{ + glusterd_conf_t *conf = NULL; + glusterd_volinfo_t *volinfo = NULL; + + conf = this->private; + list_for_each_entry (volinfo, &conf->volumes, vol_list) { + if (glusterd_is_volume_in_server_quorum (volinfo)) { + return _gf_true; + } + } + return _gf_false; +} + +gf_boolean_t +does_gd_meet_server_quorum (xlator_t *this) +{ + int quorum_count = 0; + int active_count = 0; + gf_boolean_t in = _gf_false; + glusterd_conf_t *conf = NULL; + int ret = -1; + + conf = this->private; + ret = glusterd_get_quorum_cluster_counts (this, &active_count, + &quorum_count); + if (ret) + goto out; + + if (!_does_quorum_meet (active_count, quorum_count)) { + goto out; + } + + in = _gf_true; +out: + return in; +} + +void +glusterd_do_volume_quorum_action (xlator_t *this, glusterd_volinfo_t *volinfo, + gf_boolean_t meets_quorum) +{ + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_conf_t *conf = NULL; + + conf = this->private; + if (volinfo->status != GLUSTERD_STATUS_STARTED) + goto out; + + if (!glusterd_is_volume_in_server_quorum (volinfo)) + goto out; + + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + if (!glusterd_is_local_brick (this, volinfo, brickinfo)) + continue; + if (meets_quorum) + glusterd_brick_start (volinfo, brickinfo); + else + glusterd_brick_stop (volinfo, brickinfo); + } +out: + return; +} + +int +glusterd_do_quorum_action () +{ + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + glusterd_volinfo_t *volinfo = NULL; + int ret = 0; + int active_count = 0; + int quorum_count = 0; + gf_boolean_t meets = _gf_false; + + this = THIS; + conf = this->private; + + conf->pending_quorum_action = _gf_true; + ret = glusterd_lock (conf->uuid); + if (ret) + goto out; + + { + ret = glusterd_get_quorum_cluster_counts (this, &active_count, + &quorum_count); + if (ret) + goto unlock; + + if (_does_quorum_meet (active_count, quorum_count)) + meets = _gf_true; + list_for_each_entry (volinfo, &conf->volumes, vol_list) { + glusterd_do_volume_quorum_action (this, volinfo, meets); + } + } +unlock: + (void)glusterd_unlock (conf->uuid); + conf->pending_quorum_action = _gf_false; +out: + return ret; +} + int32_t glusterd_import_friend_volume_opts (dict_t *vols, int count, glusterd_volinfo_t *volinfo) @@ -2006,6 +2257,7 @@ glusterd_import_friend_volume_opts (dict_t *vols, int count, int32_t ret = -1; int opt_count = 0; char msg[2048] = {0}; + char volume_prefix[1024] = {0}; memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "volume%d.opt-count", count); @@ -2016,8 +2268,9 @@ glusterd_import_friend_volume_opts (dict_t *vols, int count, goto out; } - ret = import_prdict_volinfo_dict (vols, volinfo->dict, "key", - "value", opt_count, count); + snprintf (volume_prefix, sizeof (volume_prefix), "volume%d", count); + ret = import_prdict_dict (vols, volinfo->dict, "key", "value", + opt_count, volume_prefix); if (ret) { snprintf (msg, sizeof (msg), "Unable to import options dict " "specified for %s", volinfo->volname); @@ -2033,9 +2286,8 @@ glusterd_import_friend_volume_opts (dict_t *vols, int count, goto out; } - ret = import_prdict_volinfo_dict (vols, volinfo->gsync_slaves, - "slave-num", "slave-val", opt_count, - count); + ret = import_prdict_dict (vols, volinfo->gsync_slaves, "slave-num", + "slave-val", opt_count, volume_prefix); if (ret) { snprintf (msg, sizeof (msg), "Unable to import gsync sessions " "specified for %s", volinfo->volname); @@ -2530,6 +2782,95 @@ out: return ret; } +int +glusterd_get_global_opt_version (dict_t *opts, uint32_t *version) +{ + int ret = -1; + char *version_str = NULL; + + ret = dict_get_str (opts, GLUSTERD_GLOBAL_OPT_VERSION, &version_str); + if (ret) + goto out; + + ret = gf_string2uint (version_str, version); + if (ret) + goto out; + ret = 0; +out: + return ret; +} + +int +glusterd_get_next_global_opt_version_str (dict_t *opts, char **version_str) +{ + int ret = -1; + char version_string[64] = {0}; + uint32_t version = 0; + + ret = glusterd_get_global_opt_version (opts, &version); + if (ret) + goto out; + version++; + snprintf (version_string, sizeof (version_string), "%"PRIu32, version); + *version_str = gf_strdup (version_string); + if (*version_str) + ret = 0; +out: + return ret; +} + +int32_t +glusterd_import_global_opts (dict_t *friend_data) +{ + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + int ret = -1; + dict_t *import_options = NULL; + int count = 0; + uint32_t local_version = 0; + uint32_t remote_version = 0; + + this = THIS; + conf = this->private; + + ret = dict_get_int32 (friend_data, "global-opt-count", &count); + if (ret) { + //old version peer + ret = 0; + goto out; + } + + import_options = dict_new (); + if (!import_options) + goto out; + ret = import_prdict_dict (friend_data, import_options, "key", "val", + count, "global"); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to import" + " global options"); + goto out; + } + + ret = glusterd_get_global_opt_version (conf->opts, &local_version); + if (ret) + goto out; + ret = glusterd_get_global_opt_version (import_options, &remote_version); + if (ret) + goto out; + if (remote_version > local_version) { + ret = glusterd_store_options (this, import_options); + if (ret) + goto out; + dict_unref (conf->opts); + conf->opts = dict_ref (import_options); + } + ret = 0; +out: + if (import_options) + dict_unref (import_options); + return ret; +} + int32_t glusterd_compare_friend_data (dict_t *vols, int32_t *status) { @@ -2567,6 +2908,9 @@ glusterd_compare_friend_data (dict_t *vols, int32_t *status) stale_nfs = _gf_true; if (glusterd_is_nodesvc_running ("glustershd")) stale_shd = _gf_true; + ret = glusterd_import_global_opts (vols); + if (ret) + goto out; ret = glusterd_import_friend_volumes (vols); if (ret) goto out; @@ -3451,13 +3795,16 @@ glusterd_restart_bricks (glusterd_conf_t *conf) int ret = 0; list_for_each_entry (volinfo, &conf->volumes, vol_list) { - /* If volume status is not started, do not proceed */ - if (volinfo->status == GLUSTERD_STATUS_STARTED) { - list_for_each_entry (brickinfo, &volinfo->bricks, - brick_list) { - glusterd_brick_start (volinfo, brickinfo); - } - start_nodesvcs = _gf_true; + if (volinfo->status != GLUSTERD_STATUS_STARTED) + continue; + start_nodesvcs = _gf_true; + if (glusterd_is_volume_in_server_quorum (volinfo)) { + //these bricks will be restarted once the quorum is met + continue; + } + + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + glusterd_brick_start (volinfo, brickinfo); } } @@ -4730,8 +5077,8 @@ out: int glusterd_peerinfo_new (glusterd_peerinfo_t **peerinfo, - glusterd_friend_sm_state_t state, - uuid_t *uuid, const char *hostname) + glusterd_friend_sm_state_t state, uuid_t *uuid, + const char *hostname, int port) { glusterd_peerinfo_t *new_peer = NULL; int ret = -1; @@ -4761,6 +5108,9 @@ glusterd_peerinfo_new (glusterd_peerinfo_t **peerinfo, if (ret) goto out; + if (new_peer->state.state == GD_FRIEND_STATE_BEFRIENDED) + new_peer->quorum_contrib = QUORUM_WAITING; + new_peer->port = port; *peerinfo = new_peer; out: if (ret && new_peer) diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index 408facce32f..70caf2482ad 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -49,13 +49,13 @@ struct glusterd_lock_ { time_t timestamp; }; -typedef struct glusterd_voldict_ctx_ { +typedef struct glusterd_dict_ctx_ { dict_t *dict; - int count; int opt_count; char *key_name; char *val_name; -} glusterd_voldict_ctx_t; + char *prefix; +} glusterd_dict_ctx_t; /* Moved the definition from gluster-utils.c avoiding * extern'ing in multiple places. @@ -339,8 +339,8 @@ glusterd_sm_tr_log_transition_add (glusterd_sm_tr_log_t *log, int event); int glusterd_peerinfo_new (glusterd_peerinfo_t **peerinfo, - glusterd_friend_sm_state_t state, - uuid_t *uuid, const char *hostname); + glusterd_friend_sm_state_t state, uuid_t *uuid, + const char *hostname, int port); int glusterd_sm_tr_log_init (glusterd_sm_tr_log_t *log, char * (*state_name_get) (int), @@ -454,4 +454,25 @@ glusterd_defrag_volume_status_update (glusterd_volinfo_t *volinfo, int glusterd_check_files_identical (char *filename1, char *filename2, gf_boolean_t *identical); + +gf_boolean_t +glusterd_is_quorum_changed (dict_t *options, char *option, char *value); + +int +glusterd_do_quorum_action (); + +int +glusterd_get_quorum_cluster_counts (xlator_t *this, int *active_count, + int *quorum_count); + +int +glusterd_get_next_global_opt_version_str (dict_t *opts, char **version_str); +gf_boolean_t +glusterd_is_quorum_option (char *option); +gf_boolean_t +glusterd_is_volume_in_server_quorum (glusterd_volinfo_t *volinfo); +gf_boolean_t +glusterd_is_any_volume_in_server_quorum (xlator_t *this); +gf_boolean_t +does_gd_meet_server_quorum (xlator_t *this); #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index 198d91ad95e..c5eb6b0f3ce 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -229,6 +229,8 @@ static struct volopt_map_entry glusterd_volopt_map[] = { {"features.grace-timeout", "protocol/client", "grace-timeout", NULL, NO_DOC, 0}, {"features.grace-timeout", "protocol/server", "grace-timeout", NULL, DOC, 0}, {"feature.read-only", "features/read-only", "!read-only", "off", DOC, 0}, + {GLUSTERD_QUORUM_TYPE_KEY, "mgmt/glusterd", NULL, "off", DOC, 0}, + {GLUSTERD_QUORUM_RATIO_KEY, "mgmt/glusterd", NULL, "0", DOC, 0}, {NULL, } }; diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c index 2567d9c241f..a61dd19275c 100644 --- a/xlators/mgmt/glusterd/src/glusterd.c +++ b/xlators/mgmt/glusterd/src/glusterd.c @@ -115,6 +115,36 @@ glusterd_uuid_init (int flag) } int +glusterd_options_init (xlator_t *this) +{ + int ret = -1; + glusterd_conf_t *priv = NULL; + char *initial_version = "0"; + + priv = this->private; + + priv->opts = dict_new (); + if (!priv->opts) + goto out; + + ret = glusterd_store_retrieve_options (this); + if (ret == 0) + goto out; + + ret = dict_set_str (priv->opts, GLUSTERD_GLOBAL_OPT_VERSION, + initial_version); + if (ret) + goto out; + ret = glusterd_store_options (this, priv->opts); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to store version"); + return ret; + } +out: + + return 0; +} +int glusterd_fetchspec_notify (xlator_t *this) { int ret = -1; @@ -1020,6 +1050,10 @@ init (xlator_t *this) if (ret < 0) goto out; + ret = glusterd_options_init (this); + if (ret < 0) + goto out; + ret = glusterd_handle_upgrade_downgrade (this->options, conf); if (ret) goto out; @@ -1164,5 +1198,12 @@ struct volume_options options[] = { .type = GF_OPTION_TYPE_BOOL, }, #endif + { .key = {"server-quorum-type"}, + .type = GF_OPTION_TYPE_STR, + .value = { "none", "server"}, + }, + { .key = {"server-quorum-ratio"}, + .type = GF_OPTION_TYPE_PERCENT, + }, { .key = {NULL} }, }; diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index 53a2a115490..e0b5ce1bfe3 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -53,7 +53,14 @@ #define GLUSTERD_TR_LOG_SIZE 50 #define GLUSTERD_NAME "glusterd" #define GLUSTERD_SOCKET_LISTEN_BACKLOG 128 +#define GLUSTERD_QUORUM_TYPE_KEY "cluster.server-quorum-type" +#define GLUSTERD_QUORUM_RATIO_KEY "cluster.server-quorum-ratio" +#define GLUSTERD_GLOBAL_OPT_VERSION "global-option-version" +#define GLUSTERD_SERVER_QUORUM "server" + +struct glusterd_volinfo_; +typedef struct glusterd_volinfo_ glusterd_volinfo_t; typedef enum glusterd_op_ { GD_OP_NONE = 0, @@ -84,7 +91,6 @@ typedef enum glusterd_op_ { GD_OP_MAX, } glusterd_op_t; - struct glusterd_store_iter_ { int fd; FILE *file; @@ -96,12 +102,19 @@ typedef struct glusterd_store_iter_ glusterd_store_iter_t; struct glusterd_volgen { dict_t *dict; }; + typedef struct { struct rpc_clnt *rpc; gf_boolean_t running; } nodesrv_t; typedef struct { + gf_boolean_t quorum; + double quorum_ratio; + uint64_t gl_opt_version; +} gd_global_opts_t; + +typedef struct { struct _volfile_ctx *volfile; pthread_mutex_t mutex; struct list_head peers; @@ -127,6 +140,8 @@ typedef struct { pthread_t brick_thread; void *hooks_priv; xlator_t *xl; /* Should be set to 'THIS' before creating thread */ + gf_boolean_t pending_quorum_action; + dict_t *opts; } glusterd_conf_t; typedef enum gf_brick_status { @@ -157,9 +172,6 @@ struct gf_defrag_brickinfo_ { int size; }; -struct glusterd_volinfo_; -typedef struct glusterd_volinfo_ glusterd_volinfo_t; - typedef int (*defrag_cbk_fn_t) (glusterd_volinfo_t *volinfo, gf_defrag_status_t status); @@ -383,6 +395,9 @@ glusterd_friend_add (const char *hoststr, int port, gf_boolean_t restore, glusterd_peerctx_args_t *args); int +glusterd_friend_rpc_create (xlator_t *this, glusterd_peerinfo_t *peerinfo, + glusterd_peerctx_args_t *args); +int glusterd_friend_remove (uuid_t uuid, char *hostname); int |