From 7c23a94516c5dd21536c259f323a3cc113fdfa0d Mon Sep 17 00:00:00 2001 From: Pranith Kumar K Date: Tue, 6 Nov 2012 12:18:24 +0530 Subject: mgmt/glusterd: Implementation of server-side quorum Feature-page: http://www.gluster.org/community/documentation/index.php/Features/Server-quorum Change-Id: I747b222519e71022462343d2c1bcd3626e1f9c86 BUG: 839595 Signed-off-by: Pranith Kumar K Reviewed-on: http://review.gluster.org/3811 Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- xlators/mgmt/glusterd/src/glusterd-handler.c | 170 ++++++++++++++++++--------- 1 file changed, 117 insertions(+), 53 deletions(-) (limited to 'xlators/mgmt/glusterd/src/glusterd-handler.c') diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c index 7cdad10e219..f33d2caeb56 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-handler.c @@ -280,6 +280,8 @@ _build_option_key (dict_t *d, char *k, data_t *v, void *tmp) int ret = -1; pack = tmp; + if (strcmp (k, GLUSTERD_GLOBAL_OPT_VERSION) == 0) + return 0; snprintf (reconfig_key, 256, "volume%d.option.%s", pack->vol_count, k); ret = dict_set_str (pack->dict, reconfig_key, v->data); @@ -303,12 +305,14 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo, glusterd_conf_t *priv = NULL; char *volume_id_str = NULL; struct args_pack pack = {0,}; + xlator_t *this = NULL; GF_ASSERT (volinfo); GF_ASSERT (volumes); - priv = THIS->private; + this = THIS; + priv = this->private; GF_ASSERT (priv); @@ -388,6 +392,7 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo, pack.vol_count = count; pack.opt_count = 0; dict_foreach (dict, _build_option_key, (void *) &pack); + dict_foreach (priv->opts, _build_option_key, &pack); snprintf (key, 256, "volume%d.opt_count", pack.vol_count); ret = dict_set_int32 (volumes, key, pack.opt_count); @@ -445,7 +450,6 @@ glusterd_op_txn_begin (rpcsvc_request_t *req, glusterd_op_t op, void *ctx) GF_ASSERT (priv); ret = glusterd_lock (MY_UUID); - if (ret) { gf_log (this->name, GF_LOG_ERROR, "Unable to acquire local lock, ret: %d", ret); @@ -660,15 +664,30 @@ glusterd_handle_cli_probe (rpcsvc_request_t *req) gf1_cli_probe_req cli_req = {0,}; glusterd_peerinfo_t *peerinfo = NULL; gf_boolean_t run_fsm = _gf_true; + xlator_t *this = NULL; + GF_ASSERT (req); + this = THIS; - if (!xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf1_cli_probe_req)) { + if (!xdr_to_generic (req->msg[0], &cli_req, + (xdrproc_t)xdr_gf1_cli_probe_req)) { //failed to decode msg; gf_log ("", GF_LOG_ERROR, "xdr decoding error"); req->rpc_err = GARBAGE_ARGS; goto out; } + if (glusterd_is_any_volume_in_server_quorum (this) && + !does_gd_meet_server_quorum (this)) { + glusterd_xfer_cli_probe_resp (req, -1, GF_PROBE_QUORUM_NOT_MET, + NULL, + cli_req.hostname, cli_req.port); + gf_log (this->name, GF_LOG_ERROR, "Quorum does not meet, " + "rejecting operation"); + ret = 0; + goto out; + } + gf_cmd_log ("peer probe", " on host %s:%d", cli_req.hostname, cli_req.port); gf_log ("glusterd", GF_LOG_INFO, "Received CLI probe req %s %d", @@ -684,8 +703,9 @@ glusterd_handle_cli_probe (rpcsvc_request_t *req) &peerinfo))) { if (strcmp (peerinfo->hostname, cli_req.hostname) == 0) { - gf_log ("glusterd", GF_LOG_DEBUG, "Probe host %s port %d" - " already a peer", cli_req.hostname, cli_req.port); + gf_log ("glusterd", GF_LOG_DEBUG, "Probe host %s port " + "%d already a peer", cli_req.hostname, + cli_req.port); glusterd_xfer_cli_probe_resp (req, 0, GF_PROBE_FRIEND, NULL, cli_req.hostname, cli_req.port); @@ -694,8 +714,8 @@ glusterd_handle_cli_probe (rpcsvc_request_t *req) } ret = glusterd_probe_begin (req, cli_req.hostname, cli_req.port); - gf_cmd_log ("peer probe","on host %s:%d %s",cli_req.hostname, cli_req.port, - (ret) ? "FAILED" : "SUCCESS"); + gf_cmd_log ("peer probe","on host %s:%d %s",cli_req.hostname, + cli_req.port, (ret) ? "FAILED" : "SUCCESS"); if (ret == GLUSTERD_CONNECTION_AWAITED) { //fsm should be run after connection establishes @@ -717,7 +737,7 @@ int glusterd_handle_cli_deprobe (rpcsvc_request_t *req) { int32_t ret = -1; - gf1_cli_deprobe_req cli_req = {0,}; + gf1_cli_deprobe_req cli_req = {0,}; uuid_t uuid = {0}; int op_errno = 0; xlator_t *this = NULL; @@ -750,18 +770,29 @@ glusterd_handle_cli_deprobe (rpcsvc_request_t *req) goto out; } - if (!uuid_is_null (uuid) && !(cli_req.flags & GF_CLI_FLAG_OP_FORCE)) { - /* Check if peers are connected, except peer being detached*/ - if (!glusterd_chk_peers_connected_befriended (uuid)) { - ret = -1; - op_errno = GF_DEPROBE_FRIEND_DOWN; - goto out; + if (!(cli_req.flags & GF_CLI_FLAG_OP_FORCE)) { + if (!uuid_is_null (uuid)) { + /* Check if peers are connected, except peer being detached*/ + if (!glusterd_chk_peers_connected_befriended (uuid)) { + ret = -1; + op_errno = GF_DEPROBE_FRIEND_DOWN; + goto out; + } + ret = glusterd_all_volume_cond_check ( + glusterd_friend_brick_belongs, + -1, &uuid); + if (ret) { + op_errno = GF_DEPROBE_BRICK_EXIST; + goto out; + } } - ret = glusterd_all_volume_cond_check ( - glusterd_friend_brick_belongs, - -1, &uuid); - if (ret) { - op_errno = GF_DEPROBE_BRICK_EXIST; + + if (glusterd_is_any_volume_in_server_quorum (this) && + !does_gd_meet_server_quorum (this)) { + gf_log (this->name, GF_LOG_ERROR, "Quorum does not " + "meet, rejecting operation"); + ret = -1; + op_errno = GF_DEPROBE_QUORUM_NOT_MET; goto out; } } @@ -2145,6 +2176,43 @@ out: return ret; } +int +glusterd_friend_rpc_create (xlator_t *this, glusterd_peerinfo_t *peerinfo, + glusterd_peerctx_args_t *args) +{ + dict_t *options = NULL; + int ret = -1; + glusterd_peerctx_t *peerctx = NULL; + + peerctx = GF_CALLOC (1, sizeof (*peerctx), gf_gld_mt_peerctx_t); + if (!peerctx) + goto out; + + if (args) + peerctx->args = *args; + + peerctx->peerinfo = peerinfo; + + ret = glusterd_transport_inet_options_build (&options, + peerinfo->hostname, + peerinfo->port); + if (ret) + goto out; + + ret = glusterd_rpc_create (&peerinfo->rpc, options, + glusterd_peer_rpc_notify, peerctx); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "failed to create rpc for" + " peer %s", peerinfo->hostname); + goto out; + } + peerctx = NULL; + ret = 0; +out: + GF_FREE (peerctx); + return ret; +} + int glusterd_friend_add (const char *hoststr, int port, glusterd_friend_sm_state_t state, @@ -2156,8 +2224,6 @@ glusterd_friend_add (const char *hoststr, int port, int ret = 0; xlator_t *this = NULL; glusterd_conf_t *conf = NULL; - glusterd_peerctx_t *peerctx = NULL; - dict_t *options = NULL; gf_boolean_t handover = _gf_false; this = THIS; @@ -2165,49 +2231,35 @@ glusterd_friend_add (const char *hoststr, int port, GF_ASSERT (conf); GF_ASSERT (hoststr); - peerctx = GF_CALLOC (1, sizeof (*peerctx), gf_gld_mt_peerctx_t); - if (!peerctx) { - ret = -1; - goto out; - } - - if (args) - peerctx->args = *args; - - ret = glusterd_peerinfo_new (friend, state, uuid, hoststr); - if (ret) - goto out; - - peerctx->peerinfo = *friend; - - ret = glusterd_transport_inet_options_build (&options, hoststr, port); + ret = glusterd_peerinfo_new (friend, state, uuid, hoststr, port); if (ret) goto out; - if (!restore) { - ret = glusterd_store_peerinfo (*friend); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Failed to store " - "peerinfo"); + //restore needs to first create the list of peers, then create rpcs + //to keep track of quorum in race-free manner. In restore for each peer + //rpc-create calls rpc_notify when the friend-list is partially + //constructed, leading to wrong quorum calculations. + if (restore) + goto done; - goto out; - } - } - list_add_tail (&(*friend)->uuid_list, &conf->peers); - ret = glusterd_rpc_create (&(*friend)->rpc, options, - glusterd_peer_rpc_notify, - peerctx); + ret = glusterd_store_peerinfo (*friend); if (ret) { - gf_log (this->name, GF_LOG_ERROR, "failed to create rpc for" - " peer %s", (char*)hoststr); + gf_log (this->name, GF_LOG_ERROR, "Failed to store " + "peerinfo"); + goto out; } + ret = glusterd_friend_rpc_create (this, *friend, args); + if (ret) + goto out; +done: + list_add_tail (&(*friend)->uuid_list, &conf->peers); handover = _gf_true; out: if (ret && !handover) { - (void) glusterd_friend_cleanup (*friend); - *friend = NULL; + (void) glusterd_friend_cleanup (*friend); + *friend = NULL; } gf_log (this->name, GF_LOG_INFO, "connect returned %d", ret); @@ -2866,6 +2918,7 @@ glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata, glusterd_peerctx_t *peerctx = NULL; uuid_t owner = {0,}; uuid_t *peer_uuid = NULL; + gf_boolean_t quorum_action = _gf_false; peerctx = mydata; if (!peerctx) @@ -2880,6 +2933,7 @@ glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata, { gf_log (this->name, GF_LOG_DEBUG, "got RPC_CLNT_CONNECT"); peerinfo->connected = 1; + peerinfo->quorum_action = _gf_true; ret = glusterd_peer_dump_version (this, rpc, peerctx); if (ret) @@ -2892,6 +2946,14 @@ glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata, gf_log (this->name, GF_LOG_DEBUG, "got RPC_CLNT_DISCONNECT %d", peerinfo->state.state); + if ((peerinfo->quorum_contrib != QUORUM_DOWN) && + (peerinfo->state.state == GD_FRIEND_STATE_BEFRIENDED)) { + peerinfo->quorum_contrib = QUORUM_DOWN; + quorum_action = _gf_true; + peerinfo->quorum_action = _gf_false; + } + peerinfo->connected = 0; + /* local glusterd (thinks that it) is the owner of the cluster lock and 'fails' the operation on the first disconnect from @@ -2944,6 +3006,8 @@ glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata, glusterd_friend_sm (); glusterd_op_sm (); + if (quorum_action) + glusterd_do_quorum_action (); return ret; } -- cgit