From d7cc779da48e484d0edb7bfbd903f941d0ccca32 Mon Sep 17 00:00:00 2001 From: Kaushal M Date: Mon, 3 Sep 2012 15:58:26 +0530 Subject: glusterd: op-version handshake implementation Brings in a new rpc program MGMT_HANDSHAKE, which implements the op-version handshake. This is required for bringing in the op-version feature as described in http://www.gluster.org/community/documentation/index.php/Features/Opversion Change-Id: I4333fd2714dbbd3a2a3fca5862cbb3c56615529e BUG: 814534 Signed-off-by: Kaushal M Reviewed-on: http://review.gluster.org/3688 Reviewed-by: Vijay Bellur Tested-by: Gluster Build System --- xlators/mgmt/glusterd/src/glusterd-handshake.c | 574 +++++++++++++++++++++++-- 1 file changed, 550 insertions(+), 24 deletions(-) (limited to 'xlators/mgmt/glusterd/src/glusterd-handshake.c') diff --git a/xlators/mgmt/glusterd/src/glusterd-handshake.c b/xlators/mgmt/glusterd/src/glusterd-handshake.c index ab38de17c..428051d4a 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handshake.c +++ b/xlators/mgmt/glusterd/src/glusterd-handshake.c @@ -21,6 +21,7 @@ #include "glusterd.h" #include "glusterd-utils.h" #include "glusterd-op-sm.h" +#include "glusterd-store.h" #include "glusterfs3.h" #include "protocol-common.h" @@ -267,34 +268,247 @@ fail: return 0; } + +int +gd_validate_cluster_op_version (xlator_t *this, int cluster_op_version, + char *peerid) +{ + int ret = -1; + glusterd_conf_t *conf = NULL; + + conf = this->private; + + if (cluster_op_version > GD_OP_VERSION_MAX) { + gf_log (this->name, GF_LOG_ERROR, + "operating version %d is more than the maximum " + "supported (%d) on the machine (as per peer request " + "from %s)", cluster_op_version, GD_OP_VERSION_MAX, + peerid); + goto out; + } + + if (cluster_op_version < conf->op_version) { + gf_log (this->name, GF_LOG_ERROR, + "operating version %d is less than the currently " + "running version (%d) on the machine (as per peer " + "request from %s)", cluster_op_version, + conf->op_version, peerid); + goto out; + } + + ret = 0; +out: + return ret; +} + +int +glusterd_mgmt_hndsk_versions (rpcsvc_request_t *req) +{ + dict_t *dict = NULL; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + int ret = -1; + int op_errno = EINVAL; + gf_mgmt_hndsk_req args = {{0,},}; + gf_mgmt_hndsk_rsp rsp = {0,}; + + this = THIS; + conf = this->private; + + if (!xdr_to_generic (req->msg[0], &args, + (xdrproc_t)xdr_gf_mgmt_hndsk_req)) { + //failed to decode msg; + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + dict = dict_new (); + if (!dict) + goto out; + + ret = dict_set_int32 (dict, GD_OP_VERSION_KEY, conf->op_version); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "failed to set operating version"); + rsp.op_ret = ret; + goto out; + } + + ret = dict_set_int32 (dict, GD_MIN_OP_VERSION_KEY, GD_OP_VERSION_MIN); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "failed to set %s", GD_MIN_OP_VERSION_KEY); + rsp.op_ret = ret; + goto out; + } + + ret = dict_set_int32 (dict, GD_MAX_OP_VERSION_KEY, GD_OP_VERSION_MAX); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "failed to set %s", GD_MAX_OP_VERSION_KEY); + rsp.op_ret = ret; + goto out; + } + + ret = 0; + + GF_PROTOCOL_DICT_SERIALIZE (this, dict, (&rsp.hndsk.hndsk_val), + rsp.hndsk.hndsk_len, op_errno, out); +out: + + rsp.op_ret = ret; + rsp.op_errno = op_errno; + + glusterd_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gf_mgmt_hndsk_rsp); + + ret = 0; + + if (dict) + dict_unref (dict); + + if (args.hndsk.hndsk_val) + free (args.hndsk.hndsk_val); + + if (rsp.hndsk.hndsk_val) + GF_FREE (rsp.hndsk.hndsk_val); + + return ret; +} + +int +glusterd_mgmt_hndsk_versions_ack (rpcsvc_request_t *req) +{ + dict_t *clnt_dict = NULL; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + int ret = -1; + int op_errno = EINVAL; + int32_t peer_op_version = 0; + gf_mgmt_hndsk_req args = {{0,},}; + gf_mgmt_hndsk_rsp rsp = {0,}; + + this = THIS; + conf = this->private; + + if (!xdr_to_generic (req->msg[0], &args, + (xdrproc_t)xdr_gf_mgmt_hndsk_req)) { + //failed to decode msg; + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + GF_PROTOCOL_DICT_UNSERIALIZE (this, clnt_dict, args.hndsk.hndsk_val, + (args.hndsk.hndsk_len), ret, op_errno, + out); + + ret = dict_get_int32 (clnt_dict, GD_OP_VERSION_KEY, + &peer_op_version); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "failed to get the op-version key peer=%s", + req->trans->peerinfo.identifier); + goto out; + } + + ret = gd_validate_cluster_op_version (this, peer_op_version, + req->trans->peerinfo.identifier); + if (ret) + goto out; + + + /* As this is ACK from the Cluster for the versions supported, + can set the op-version of 'this' glusterd to the one + received. */ + gf_log (this->name, GF_LOG_INFO, "using the op-version %d", + peer_op_version); + conf->op_version = peer_op_version; + ret = glusterd_store_global_info (this); + if (ret) + gf_log (this->name, GF_LOG_ERROR, "Failed to store op-version"); + +out: + rsp.op_ret = ret; + rsp.op_errno = op_errno; + + glusterd_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gf_mgmt_hndsk_rsp); + + if (clnt_dict) + dict_unref (clnt_dict); + + if (args.hndsk.hndsk_val) + free (args.hndsk.hndsk_val); + + return ret; +} + + rpcsvc_actor_t gluster_handshake_actors[] = { - [GF_HNDSK_NULL] = {"NULL", GF_HNDSK_NULL, NULL, NULL, 0}, - [GF_HNDSK_GETSPEC] = {"GETSPEC", GF_HNDSK_GETSPEC, server_getspec, NULL, 0}, - [GF_HNDSK_EVENT_NOTIFY] = {"EVENTNOTIFY", GF_HNDSK_EVENT_NOTIFY, server_event_notify, - NULL, 0}, + [GF_HNDSK_NULL] = {"NULL", GF_HNDSK_NULL, NULL, NULL, 0}, + [GF_HNDSK_GETSPEC] = {"GETSPEC", GF_HNDSK_GETSPEC, + server_getspec, NULL, 0}, + [GF_HNDSK_EVENT_NOTIFY] = {"EVENTNOTIFY", GF_HNDSK_EVENT_NOTIFY, + server_event_notify, NULL, 0}, }; struct rpcsvc_program gluster_handshake_prog = { - .progname = "GlusterFS Handshake", + .progname = "Gluster Handshake", .prognum = GLUSTER_HNDSK_PROGRAM, .progver = GLUSTER_HNDSK_VERSION, .actors = gluster_handshake_actors, .numactors = GF_HNDSK_MAXVALUE, }; + char *glusterd_dump_proc[GF_DUMP_MAXVALUE] = { [GF_DUMP_NULL] = "NULL", [GF_DUMP_DUMP] = "DUMP", }; rpc_clnt_prog_t glusterd_dump_prog = { - .progname = "GLUSTERD-DUMP", - .prognum = GLUSTER_DUMP_PROGRAM, - .progver = GLUSTER_DUMP_VERSION, - .procnames = glusterd_dump_proc, + .progname = "GLUSTERD-DUMP", + .prognum = GLUSTER_DUMP_PROGRAM, + .progver = GLUSTER_DUMP_VERSION, + .procnames = glusterd_dump_proc, }; + +rpcsvc_actor_t glusterd_mgmt_hndsk_actors[] = { + [GD_MGMT_HNDSK_NULL] = {"NULL", GD_MGMT_HNDSK_NULL, NULL, + NULL, 0}, + [GD_MGMT_HNDSK_VERSIONS] = {"MGMT-VERS", GD_MGMT_HNDSK_VERSIONS, + glusterd_mgmt_hndsk_versions, NULL, + 0}, + [GD_MGMT_HNDSK_VERSIONS_ACK] = {"MGMT-VERS-ACK", + GD_MGMT_HNDSK_VERSIONS_ACK, + glusterd_mgmt_hndsk_versions_ack, + NULL, 0}, +}; + +struct rpcsvc_program glusterd_mgmt_hndsk_prog = { + .progname = "Gluster MGMT Handshake", + .prognum = GD_MGMT_HNDSK_PROGRAM, + .progver = GD_MGMT_HNDSK_VERSION, + .actors = glusterd_mgmt_hndsk_actors, + .numactors = GD_MGMT_HNDSK_MAXVALUE, +}; + +char *glusterd_mgmt_hndsk_proc[GD_MGMT_HNDSK_MAXVALUE] = { + [GD_MGMT_HNDSK_NULL] = "NULL", + [GD_MGMT_HNDSK_VERSIONS] = "MGMT-VERS", + [GD_MGMT_HNDSK_VERSIONS_ACK] = "MGMT-VERS-ACK", +}; + +rpc_clnt_prog_t gd_clnt_mgmt_hndsk_prog = { + .progname = "Gluster MGMT Handshake", + .prognum = GD_MGMT_HNDSK_PROGRAM, + .progver = GD_MGMT_HNDSK_VERSION, + .procnames = glusterd_mgmt_hndsk_proc, +}; + + static int glusterd_event_connected_inject (glusterd_peerctx_t *peerctx) { @@ -343,12 +557,279 @@ out: return ret; } + +int +gd_validate_peer_op_version (xlator_t *this, glusterd_peerinfo_t *peerinfo, + dict_t *dict, char **errstr) +{ + int ret = -1; + glusterd_conf_t *conf = NULL; + int32_t peer_op_version = 0; + int32_t peer_min_op_version = 0; + int32_t peer_max_op_version = 0; + + if (!dict && !this && !peerinfo) + goto out; + + conf = this->private; + + ret = dict_get_int32 (dict, GD_OP_VERSION_KEY, &peer_op_version); + if (ret) + goto out; + + ret = dict_get_int32 (dict, GD_MAX_OP_VERSION_KEY, + &peer_max_op_version); + if (ret) + goto out; + + ret = dict_get_int32 (dict, GD_MIN_OP_VERSION_KEY, + &peer_min_op_version); + if (ret) + goto out; + + ret = -1; + /* Check if peer can support our op_version */ + if ((peer_max_op_version < conf->op_version) || + (peer_min_op_version > conf->op_version)) { + ret = gf_asprintf (errstr, "Peer %s does not support required " + "op-version", peerinfo->hostname); + ret = -1; + goto out; + } + + /* If peer is already operating at a higher op_version reject it. + * Cluster cannot be moved to higher op_version to accomodate a peer. + */ + if (peer_op_version > conf->op_version) { + ret = gf_asprintf (errstr, "Peer %s is already at a higher " + "op-version", peerinfo->hostname); + ret = -1; + goto out; + } + + ret = 0; +out: + gf_log (this->name , GF_LOG_DEBUG, "Peer %s %s", peerinfo->hostname, + ((ret < 0) ? "rejected" : "accepted")); + return ret; +} + +int +glusterd_mgmt_hndsk_version_ack_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int ret = -1; + int op_errno = EINVAL; + gf_mgmt_hndsk_rsp rsp = {0,}; + xlator_t *this = NULL; + call_frame_t *frame = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_peerctx_t *peerctx = NULL; + char msg[1024] = {0,}; + + this = THIS; + frame = myframe; + peerctx = frame->local; + peerinfo = peerctx->peerinfo; + + if (-1 == req->rpc_status) { + snprintf (msg, sizeof (msg), + "Error through RPC layer, retry again later"); + gf_log ("", GF_LOG_ERROR, "%s", msg); + peerctx->errstr = gf_strdup (msg); + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_mgmt_hndsk_rsp); + if (ret < 0) { + snprintf (msg, sizeof (msg), "Failed to decode XDR"); + gf_log ("", GF_LOG_ERROR, "%s", msg); + peerctx->errstr = gf_strdup (msg); + goto out; + } + + op_errno = rsp.op_errno; + if (-1 == rsp.op_ret) { + ret = -1; + snprintf (msg, sizeof (msg), + "Failed to get handshake ack from remote server"); + gf_log (frame->this->name, GF_LOG_ERROR, "%s", msg); + peerctx->errstr = gf_strdup (msg); + goto out; + } + + /* TODO: this is hardcoded as of now, but I don't forsee any problems + * with this as long as we are properly handshaking operating versions + */ + peerinfo->mgmt = &gd_mgmt_prog; + peerinfo->peer = &gd_peer_prog; + + ret = default_notify (this, GF_EVENT_CHILD_UP, NULL); + + if (GD_MODE_ON == peerctx->args.mode) { + ret = glusterd_event_connected_inject (peerctx); + peerctx->args.req = NULL; + } else if (GD_MODE_SWITCH_ON == peerctx->args.mode) { + peerctx->args.mode = GD_MODE_ON; + } else { + gf_log (this->name, GF_LOG_WARNING, "unknown mode %d", + peerctx->args.mode); + } + + glusterd_friend_sm (); + + ret = 0; +out: + + frame->local = NULL; + STACK_DESTROY (frame->root); + + if (ret != 0) + rpc_transport_disconnect (peerinfo->rpc->conn.trans); + + if (rsp.hndsk.hndsk_val) + free (rsp.hndsk.hndsk_val); + + return 0; +} + +int +glusterd_mgmt_hndsk_version_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int ret = -1; + int op_errno = EINVAL; + gf_mgmt_hndsk_rsp rsp = {0,}; + gf_mgmt_hndsk_req arg = {{0,}}; + xlator_t *this = NULL; + call_frame_t *frame = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_peerctx_t *peerctx = NULL; + dict_t *dict = NULL; + dict_t *rsp_dict = NULL; + glusterd_conf_t *conf = NULL; + char msg[1024] = {0,}; + + this = THIS; + conf = this->private; + frame = myframe; + peerctx = frame->local; + peerinfo = peerctx->peerinfo; + + if (-1 == req->rpc_status) { + ret = -1; + snprintf (msg, sizeof (msg), + "Error through RPC layer, retry again later"); + gf_log (this->name, GF_LOG_ERROR, "%s", msg); + peerctx->errstr = gf_strdup (msg); + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_mgmt_hndsk_rsp); + if (ret < 0) { + snprintf (msg, sizeof (msg), "Failed to decode management " + "handshake response"); + gf_log (this->name, GF_LOG_ERROR, "%s", msg); + peerctx->errstr = gf_strdup (msg); + goto out; + } + + GF_PROTOCOL_DICT_UNSERIALIZE (this, dict, rsp.hndsk.hndsk_val, + rsp.hndsk.hndsk_len, ret, op_errno, + out); + + op_errno = rsp.op_errno; + if (-1 == rsp.op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "failed to get the 'versions' from peer (%s)", + req->conn->trans->peerinfo.identifier); + goto out; + } + + /* Check if peer can be part of cluster */ + ret = gd_validate_peer_op_version (this, peerinfo, dict, + &peerctx->errstr); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to validate the operating version of peer (%s)", + peerinfo->hostname); + goto out; + } + + rsp_dict = dict_new (); + if (!rsp_dict) + goto out; + + ret = dict_set_int32 (rsp_dict, GD_OP_VERSION_KEY, conf->op_version); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, + "failed to set operating version in dict"); + goto out; + } + + GF_PROTOCOL_DICT_SERIALIZE (this, rsp_dict, (&arg.hndsk.hndsk_val), + arg.hndsk.hndsk_len, op_errno, out); + + ret = glusterd_submit_request (peerctx->peerinfo->rpc, &arg, frame, + &gd_clnt_mgmt_hndsk_prog, + GD_MGMT_HNDSK_VERSIONS_ACK, NULL, this, + glusterd_mgmt_hndsk_version_ack_cbk, + (xdrproc_t)xdr_gf_mgmt_hndsk_req); + +out: + if (ret) { + frame->local = NULL; + STACK_DESTROY (frame->root); + rpc_transport_disconnect (peerinfo->rpc->conn.trans); + } + + if (rsp.hndsk.hndsk_val) + free (rsp.hndsk.hndsk_val); + + if (arg.hndsk.hndsk_val) + GF_FREE (arg.hndsk.hndsk_val); + + if (dict) + dict_unref (dict); + + if (rsp_dict) + dict_unref (rsp_dict); + + return 0; +} + +int +glusterd_mgmt_handshake (xlator_t *this, glusterd_peerctx_t *peerctx) +{ + call_frame_t *frame = NULL; + gf_mgmt_hndsk_req req = {{0,},}; + int ret = -1; + + frame = create_frame (this, this->ctx->pool); + if (!frame) + goto out; + + frame->local = peerctx; + + ret = glusterd_submit_request (peerctx->peerinfo->rpc, &req, frame, + &gd_clnt_mgmt_hndsk_prog, + GD_MGMT_HNDSK_VERSIONS, NULL, this, + glusterd_mgmt_hndsk_version_cbk, + (xdrproc_t)xdr_gf_mgmt_hndsk_req); + ret = 0; +out: + if (ret && frame) + STACK_DESTROY (frame->root); + + return ret; +} + int glusterd_set_clnt_mgmt_program (glusterd_peerinfo_t *peerinfo, gf_prog_detail *prog) { - gf_prog_detail *trav = NULL; - int ret = -1; + gf_prog_detail *trav = NULL; + int ret = -1; if (!peerinfo || !prog) goto out; @@ -356,39 +837,65 @@ glusterd_set_clnt_mgmt_program (glusterd_peerinfo_t *peerinfo, trav = prog; while (trav) { - /* Select 'programs' */ + ret = -1; if ((gd_mgmt_prog.prognum == trav->prognum) && (gd_mgmt_prog.progver == trav->progver)) { peerinfo->mgmt = &gd_mgmt_prog; ret = 0; } + if ((gd_peer_prog.prognum == trav->prognum) && (gd_peer_prog.progver == trav->progver)) { peerinfo->peer = &gd_peer_prog; ret = 0; } + if (ret) { gf_log ("", GF_LOG_DEBUG, "%s (%"PRId64":%"PRId64") not supported", trav->progname, trav->prognum, trav->progver); } + trav = trav->next; } if (peerinfo->mgmt) { - gf_log ("", GF_LOG_INFO, - "Using Program %s, Num (%d), Version (%d)", - peerinfo->mgmt->progname, peerinfo->mgmt->prognum, - peerinfo->mgmt->progver); + gf_log ("", GF_LOG_INFO, + "Using Program %s, Num (%d), Version (%d)", + peerinfo->mgmt->progname, peerinfo->mgmt->prognum, + peerinfo->mgmt->progver); } + if (peerinfo->peer) { - gf_log ("", GF_LOG_INFO, - "Using Program %s, Num (%d), Version (%d)", - peerinfo->peer->progname, peerinfo->peer->prognum, - peerinfo->peer->progver); + gf_log ("", GF_LOG_INFO, + "Using Program %s, Num (%d), Version (%d)", + peerinfo->peer->progname, peerinfo->peer->prognum, + peerinfo->peer->progver); } + ret = 0; +out: + return ret; + +} +static gf_boolean_t +_mgmt_hndsk_prog_present (gf_prog_detail *prog) { + gf_boolean_t ret = _gf_false; + gf_prog_detail *trav = NULL; + + GF_ASSERT (prog); + + trav = prog; + + while (trav) { + if ((trav->prognum == GD_MGMT_HNDSK_PROGRAM) && + (trav->progver == GD_MGMT_HNDSK_VERSION)) { + ret = _gf_true; + goto out; + } + trav = trav->next; + } out: return ret; } @@ -405,9 +912,11 @@ glusterd_peer_dump_version_cbk (struct rpc_req *req, struct iovec *iov, call_frame_t *frame = NULL; glusterd_peerinfo_t *peerinfo = NULL; glusterd_peerctx_t *peerctx = NULL; + glusterd_conf_t *conf = NULL; char msg[1024] = {0,}; this = THIS; + conf = this->private; frame = myframe; peerctx = frame->local; peerinfo = peerctx->peerinfo; @@ -435,6 +944,22 @@ glusterd_peer_dump_version_cbk (struct rpc_req *req, struct iovec *iov, goto out; } + if (_mgmt_hndsk_prog_present (rsp.prog)) { + gf_log (this->name, GF_LOG_DEBUG, + "Proceeding to op-version handshake with peer %s", + peerinfo->hostname); + ret = glusterd_mgmt_handshake (this, peerctx); + goto out; + } else if (conf->op_version > 1) { + ret = -1; + snprintf (msg, sizeof (msg), + "Peer %s does not support required op-version", + peerinfo->hostname); + peerctx->errstr = gf_strdup (msg); + gf_log (this->name, GF_LOG_ERROR, "%s", msg); + goto out; + } + /* Make sure we assign the proper program to peer */ ret = glusterd_set_clnt_mgmt_program (peerinfo, rsp.prog); if (ret) { @@ -454,10 +979,11 @@ glusterd_peer_dump_version_cbk (struct rpc_req *req, struct iovec *iov, peerctx->args.mode); } - glusterd_friend_sm (); - glusterd_op_sm (); + glusterd_friend_sm(); + glusterd_op_sm(); ret = 0; + out: /* don't use GF_FREE, buffer was allocated by libc */ @@ -482,8 +1008,8 @@ out: int -glusterd_peer_handshake (xlator_t *this, struct rpc_clnt *rpc, - glusterd_peerctx_t *peerctx) +glusterd_peer_dump_version (xlator_t *this, struct rpc_clnt *rpc, + glusterd_peerctx_t *peerctx) { call_frame_t *frame = NULL; gf_dump_req req = {0,}; -- cgit