summaryrefslogtreecommitdiffstats
path: root/xlators/mgmt/glusterd/src/glusterd-sm.c
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/mgmt/glusterd/src/glusterd-sm.c')
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-sm.c264
1 files changed, 206 insertions, 58 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.c b/xlators/mgmt/glusterd/src/glusterd-sm.c
index 94e7ca08a..7a8b2c94f 100644
--- a/xlators/mgmt/glusterd/src/glusterd-sm.c
+++ b/xlators/mgmt/glusterd/src/glusterd-sm.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2010 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is GF_FREE software; you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
@@ -44,6 +34,7 @@
#include "glusterd-op-sm.h"
#include "glusterd-utils.h"
#include "glusterd-store.h"
+#include "glusterd-etcd.h"
static struct list_head gd_friend_sm_queue;
@@ -100,8 +91,7 @@ glusterd_destroy_probe_ctx (glusterd_probe_ctx_t *ctx)
if (!ctx)
return;
- if (ctx->hostname)
- GF_FREE (ctx->hostname);
+ GF_FREE (ctx->hostname);
GF_FREE (ctx);
}
@@ -113,8 +103,7 @@ glusterd_destroy_friend_req_ctx (glusterd_friend_req_ctx_t *ctx)
if (ctx->vols)
dict_unref (ctx->vols);
- if (ctx->hostname)
- GF_FREE (ctx->hostname);
+ GF_FREE (ctx->hostname);
GF_FREE (ctx);
}
@@ -123,8 +112,7 @@ glusterd_destroy_friend_update_ctx (glusterd_friend_update_ctx_t *ctx)
{
if (!ctx)
return;
- if (ctx->hostname)
- GF_FREE (ctx->hostname);
+ GF_FREE (ctx->hostname);
GF_FREE (ctx);
}
@@ -168,7 +156,7 @@ glusterd_broadcast_friend_delete (char *hostname, uuid_t uuid)
goto out;
list_for_each_entry (peerinfo, &priv->peers, uuid_list) {
- if (!peerinfo->connected || !peerinfo->mgmt)
+ if (!peerinfo->connected || !peerinfo->peer)
continue;
ret = dict_set_static_ptr (friends, "peerinfo", peerinfo);
@@ -177,7 +165,7 @@ glusterd_broadcast_friend_delete (char *hostname, uuid_t uuid)
goto out;
}
- proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_FRIEND_UPDATE];
+ proc = &peerinfo->peer->proctable[GLUSTERD_FRIEND_UPDATE];
if (proc->fn) {
ret = proc->fn (NULL, this, friends);
}
@@ -257,12 +245,9 @@ glusterd_ac_reverse_probe_begin (glusterd_friend_sm_event_t *event, void *ctx)
out:
if (ret) {
- if (new_event)
- GF_FREE (new_event);
- if (new_ev_ctx->hostname)
- GF_FREE (new_ev_ctx->hostname);
- if (new_ev_ctx)
- GF_FREE (new_ev_ctx);
+ GF_FREE (new_event);
+ GF_FREE (new_ev_ctx->hostname);
+ GF_FREE (new_ev_ctx);
}
gf_log ("", GF_LOG_DEBUG, "returning with %d", ret);
return ret;
@@ -286,9 +271,9 @@ glusterd_ac_friend_add (glusterd_friend_sm_event_t *event, void *ctx)
GF_ASSERT (conf);
- if (!peerinfo->mgmt)
+ if (!peerinfo->peer)
goto out;
- proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_FRIEND_ADD];
+ proc = &peerinfo->peer->proctable[GLUSTERD_FRIEND_ADD];
if (proc->fn) {
frame = create_frame (this, this->ctx->pool);
if (!frame) {
@@ -335,9 +320,9 @@ glusterd_ac_friend_probe (glusterd_friend_sm_event_t *event, void *ctx)
goto out;
}
- if (!peerinfo->mgmt)
+ if (!peerinfo->peer)
goto out;
- proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_PROBE_QUERY];
+ proc = &peerinfo->peer->proctable[GLUSTERD_PROBE_QUERY];
if (proc->fn) {
frame = create_frame (this, this->ctx->pool);
if (!frame) {
@@ -415,7 +400,9 @@ glusterd_ac_send_friend_remove_req (glusterd_friend_sm_event_t *event,
if (ctx)
ret = glusterd_xfer_cli_deprobe_resp (ctx->req, ret, 0,
- ctx->hostname);
+ NULL,
+ ctx->hostname,
+ ctx->dict);
glusterd_friend_sm ();
glusterd_op_sm ();
@@ -426,9 +413,9 @@ glusterd_ac_send_friend_remove_req (glusterd_friend_sm_event_t *event,
goto out;
}
- if (!peerinfo->mgmt)
+ if (!peerinfo->peer)
goto out;
- proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_FRIEND_REMOVE];
+ proc = &peerinfo->peer->proctable[GLUSTERD_FRIEND_REMOVE];
if (proc->fn) {
frame = create_frame (this, this->ctx->pool);
if (!frame) {
@@ -444,22 +431,36 @@ out:
return ret;
}
+static gf_boolean_t
+glusterd_should_update_peer (glusterd_peerinfo_t *peerinfo,
+ glusterd_peerinfo_t *cur_peerinfo)
+{
+ gf_boolean_t is_valid = _gf_false;
+
+ if ((peerinfo == cur_peerinfo) ||
+ (peerinfo->state.state == GD_FRIEND_STATE_BEFRIENDED))
+ is_valid = _gf_true;
+
+ return is_valid;
+}
+
static int
glusterd_ac_send_friend_update (glusterd_friend_sm_event_t *event, void *ctx)
{
- int ret = 0;
- glusterd_peerinfo_t *peerinfo = NULL;
- rpc_clnt_procedure_t *proc = NULL;
- xlator_t *this = NULL;
- glusterd_friend_update_ctx_t ev_ctx = {{0}};
- glusterd_conf_t *priv = NULL;
- dict_t *friends = NULL;
- char key[100] = {0,};
- char *dup_buf = NULL;
- int32_t count = 0;
+ int ret = 0;
+ glusterd_peerinfo_t *cur_peerinfo = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ xlator_t *this = NULL;
+ glusterd_friend_update_ctx_t ev_ctx = {{0}};
+ glusterd_conf_t *priv = NULL;
+ dict_t *friends = NULL;
+ char key[100] = {0,};
+ char *dup_buf = NULL;
+ int32_t count = 0;
GF_ASSERT (event);
- peerinfo = event->peerinfo;
+ cur_peerinfo = event->peerinfo;
this = THIS;
priv = this->private;
@@ -478,6 +479,9 @@ glusterd_ac_send_friend_update (glusterd_friend_sm_event_t *event, void *ctx)
goto out;
list_for_each_entry (peerinfo, &priv->peers, uuid_list) {
+ if (!glusterd_should_update_peer (peerinfo, cur_peerinfo))
+ continue;
+
count++;
snprintf (key, sizeof (key), "friend%d.uuid", count);
dup_buf = gf_strdup (uuid_utoa (peerinfo->uuid));
@@ -497,7 +501,10 @@ glusterd_ac_send_friend_update (glusterd_friend_sm_event_t *event, void *ctx)
goto out;
list_for_each_entry (peerinfo, &priv->peers, uuid_list) {
- if (!peerinfo->connected || !peerinfo->mgmt)
+ if (!peerinfo->connected || !peerinfo->peer)
+ continue;
+
+ if (!glusterd_should_update_peer (peerinfo, cur_peerinfo))
continue;
ret = dict_set_static_ptr (friends, "peerinfo", peerinfo);
@@ -506,7 +513,7 @@ glusterd_ac_send_friend_update (glusterd_friend_sm_event_t *event, void *ctx)
goto out;
}
- proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_FRIEND_UPDATE];
+ proc = &peerinfo->peer->proctable[GLUSTERD_FRIEND_UPDATE];
if (proc->fn) {
ret = proc->fn (NULL, this, friends);
}
@@ -521,6 +528,43 @@ out:
return ret;
}
+/* Clean up stale volumes on the peer being detached. The volumes which have
+ * bricks on other peers are stale with respect to the detached peer.
+ */
+static int
+glusterd_peer_detach_cleanup (glusterd_conf_t *priv)
+{
+ int ret = -1;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_volinfo_t *tmp_volinfo = NULL;
+
+ GF_ASSERT (priv);
+
+ list_for_each_entry_safe (volinfo,tmp_volinfo,
+ &priv->volumes, vol_list) {
+ /* The peer detach checks make sure that, at this point in the
+ * detach process, there are only volumes contained completely
+ * within or completely outside the detached peer.
+ * The only stale volumes at this point are the ones
+ * completely outside the peer and can be safely deleted.
+ */
+ if (!glusterd_friend_contains_vol_bricks (volinfo,
+ MY_UUID)) {
+ gf_log (THIS->name, GF_LOG_INFO,
+ "Deleting stale volume %s", volinfo->volname);
+ ret = glusterd_delete_volume (volinfo);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Error deleting stale volume");
+ goto out;
+ }
+ }
+ }
+ ret = 0;
+out:
+ gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
static int
glusterd_ac_handle_friend_remove_req (glusterd_friend_sm_event_t *event,
@@ -556,9 +600,17 @@ glusterd_ac_handle_friend_remove_req (glusterd_friend_sm_event_t *event,
if (ret)
goto out;
}
-
+ ret = glusterd_peer_detach_cleanup (priv);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "Peer detach cleanup was not successful");
+ ret = 0;
+ }
+ gf_log (THIS->name, GF_LOG_INFO, "detached, stopping etcd");
+ stop_etcd(priv->etcd_pid);
+ nuke_etcd_dir();
out:
- gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+ gf_log (THIS->name, GF_LOG_DEBUG, "Returning with %d", ret);
return ret;
}
@@ -568,10 +620,13 @@ glusterd_ac_friend_remove (glusterd_friend_sm_event_t *event, void *ctx)
{
int ret = -1;
- ret = glusterd_friend_cleanup (event->peerinfo);
+ ret = glusterd_friend_remove_cleanup_vols (event->peerinfo->uuid);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING, "Volumes cleanup failed");
+ ret = glusterd_friend_cleanup (event->peerinfo);
if (ret) {
- gf_log ("", GF_LOG_ERROR, "Cleanup returned: %d", ret);
+ gf_log (THIS->name, GF_LOG_ERROR, "Cleanup returned: %d", ret);
}
return 0;
@@ -597,10 +652,14 @@ glusterd_ac_handle_friend_add_req (glusterd_friend_sm_event_t *event, void *ctx)
glusterd_friend_update_ctx_t *new_ev_ctx = NULL;
glusterd_friend_sm_event_t *new_event = NULL;
glusterd_friend_sm_event_type_t event_type = GD_FRIEND_EVENT_NONE;
+ glusterd_conf_t *conf = NULL;
int status = 0;
int32_t op_ret = -1;
int32_t op_errno = 0;
+ xlator_t *this = NULL;
+ this = THIS;
+ GF_ASSERT (this);
GF_ASSERT (ctx);
ev_ctx = ctx;
uuid_copy (uuid, ev_ctx->uuid);
@@ -608,8 +667,12 @@ glusterd_ac_handle_friend_add_req (glusterd_friend_sm_event_t *event, void *ctx)
GF_ASSERT (peerinfo);
uuid_copy (peerinfo->uuid, ev_ctx->uuid);
+ conf = this->private;
+ GF_ASSERT (conf);
+
//Build comparison logic here.
- ret = glusterd_compare_friend_data (ev_ctx->vols, &status);
+ ret = glusterd_compare_friend_data (ev_ctx->vols, &status,
+ peerinfo->hostname);
if (ret)
goto out;
@@ -622,6 +685,31 @@ glusterd_ac_handle_friend_add_req (glusterd_friend_sm_event_t *event, void *ctx)
op_ret = -1;
}
+ /* Compare missed_snapshot list with the peer *
+ * if volume comparison is successful */
+ if ((op_ret == 0) &&
+ (conf->op_version >= GD_OP_VERSION_4)) {
+ ret = glusterd_import_friend_missed_snap_list (ev_ctx->vols);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to import peer's "
+ "missed_snaps_list.");
+ event_type = GD_FRIEND_EVENT_LOCAL_RJT;
+ op_errno = GF_PROBE_MISSED_SNAP_CONFLICT;
+ op_ret = -1;
+ }
+
+ ret = glusterd_compare_friend_snapshots (ev_ctx->vols,
+ peerinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Conflict in comparing peer's snapshots");
+ event_type = GD_FRIEND_EVENT_LOCAL_RJT;
+ op_errno = GF_PROBE_SNAP_CONFLICT;
+ op_ret = -1;
+ }
+ }
+
ret = glusterd_friend_sm_new_event (event_type, &new_event);
if (ret) {
@@ -646,7 +734,15 @@ glusterd_ac_handle_friend_add_req (glusterd_friend_sm_event_t *event, void *ctx)
glusterd_friend_sm_inject_event (new_event);
ret = glusterd_xfer_friend_add_resp (ev_ctx->req, ev_ctx->hostname,
- ev_ctx->port, op_ret, op_errno);
+ peerinfo->hostname, ev_ctx->port,
+ op_ret, op_errno);
+
+ // apply a deterministic function to decide via whom we should join the cluster
+ if (strcmp(peerinfo->hostname, ev_ctx->hostname) > 0) {
+ stop_etcd(conf->etcd_pid);
+ nuke_etcd_dir();
+ conf->etcd_pid = start_etcd (uuid_utoa(MY_UUID), peerinfo->hostname);
+ }
out:
gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
@@ -889,7 +985,7 @@ int
glusterd_friend_sm_inject_event (glusterd_friend_sm_event_t *event)
{
GF_ASSERT (event);
- gf_log ("glusterd", GF_LOG_DEBUG, "Enqueuing event: '%s'",
+ gf_log ("glusterd", GF_LOG_DEBUG, "Enqueue event: '%s'",
glusterd_friend_sm_event_name_get (event->event));
list_add_tail (&event->list, &gd_friend_sm_queue);
@@ -918,6 +1014,27 @@ glusterd_destroy_friend_event_context (glusterd_friend_sm_event_t *event)
}
}
+gf_boolean_t
+gd_does_peer_affect_quorum (glusterd_friend_sm_state_t old_state,
+ glusterd_friend_sm_event_type_t event_type,
+ glusterd_peerinfo_t *peerinfo)
+{
+ gf_boolean_t affects = _gf_false;
+
+ //When glusterd comes up with friends in BEFRIENDED state in store,
+ //wait until compare-data happens.
+ if ((old_state == GD_FRIEND_STATE_BEFRIENDED) &&
+ (event_type != GD_FRIEND_EVENT_RCVD_ACC) &&
+ (event_type != GD_FRIEND_EVENT_LOCAL_ACC))
+ goto out;
+ if ((peerinfo->state.state == GD_FRIEND_STATE_BEFRIENDED)
+ && peerinfo->connected) {
+ affects = _gf_true;
+ }
+out:
+ return affects;
+}
+
int
glusterd_friend_sm ()
{
@@ -929,6 +1046,8 @@ glusterd_friend_sm ()
glusterd_peerinfo_t *peerinfo = NULL;
glusterd_friend_sm_event_type_t event_type = 0;
gf_boolean_t is_await_conn = _gf_false;
+ gf_boolean_t quorum_action = _gf_false;
+ glusterd_friend_sm_state_t old_state = GD_FRIEND_STATE_DEFAULT;
while (!list_empty (&gd_friend_sm_queue)) {
list_for_each_entry_safe (event, tmp, &gd_friend_sm_queue, list) {
@@ -948,6 +1067,7 @@ glusterd_friend_sm ()
glusterd_friend_sm_event_name_get (event_type));
+ old_state = peerinfo->state.state;
state = glusterd_friend_state_table[peerinfo->state.state];
GF_ASSERT (state);
@@ -988,6 +1108,15 @@ glusterd_friend_sm ()
goto out;
}
+ if (gd_does_peer_affect_quorum (old_state, event_type,
+ peerinfo)) {
+ peerinfo->quorum_contrib = QUORUM_UP;
+ if (peerinfo->quorum_action) {
+ peerinfo->quorum_action = _gf_false;
+ quorum_action = _gf_true;
+ }
+ }
+
ret = glusterd_store_peerinfo (peerinfo);
glusterd_destroy_friend_event_context (event);
@@ -1001,6 +1130,25 @@ glusterd_friend_sm ()
ret = 0;
out:
+ if (quorum_action) {
+ /* When glusterd is restarted, it needs to wait until the 'friends' view
+ * of the volumes settle, before it starts any of the internal daemons.
+ *
+ * Every friend that was part of the cluster, would send its
+ * cluster-view, 'our' way. For every friend, who belongs to
+ * a partition which has a different cluster-view from our
+ * partition, we may update our cluster-view. For subsequent
+ * friends from that partition would agree with us, if the first
+ * friend wasn't rejected. For every first friend, whom we agreed with,
+ * we would need to start internal daemons/bricks belonging to the
+ * new volumes.
+ * glusterd_spawn_daemons calls functions that are idempotent. ie,
+ * the functions spawn process(es) only if they are not started yet.
+ *
+ * */
+ glusterd_spawn_daemons (NULL);
+ glusterd_do_quorum_action ();
+ }
return ret;
}