From 86e312d872a957afff1e508f13f62c4102dba22d Mon Sep 17 00:00:00 2001 From: Ravishankar N Date: Thu, 1 Sep 2016 13:01:22 +0530 Subject: afr: add replication events Added the following events for the eventing framework: "EVENT_AFR_QUORUM_MET", --> Sent when quorum is met. "EVENT_AFR_QUORUM_FAIL" -->Sent when quorum is lost. "EVENT_AFR_SUBVOL_UP" -->Sent when afr witnesses the first up subvolume. "EVENT_AFR_SUBVOLS_DOWN"-->Sent when all children of an afr subvol are down. "EVENT_AFR_SPLIT_BRAIN" -->Sent when self-heal detects split-brain in heal path (not read/write path). Change-Id: I937c61ca1ce78b5922ade73c7bfa3051df59c513 BUG: 1371485 Signed-off-by: Ravishankar N Reviewed-on: http://review.gluster.org/15349 Reviewed-by: Pranith Kumar Karampuri Tested-by: Pranith Kumar Karampuri Smoke: Gluster Build System NetBSD-regression: NetBSD Build System CentOS-regression: Gluster Build System --- xlators/cluster/afr/src/afr-common.c | 16 +++++++-- xlators/cluster/afr/src/afr-self-heal-common.c | 8 +++++ xlators/cluster/afr/src/afr-self-heal-entry.c | 27 ++++++++++++--- xlators/cluster/afr/src/afr-self-heal-name.c | 47 ++++++++++++++++++++------ 4 files changed, 82 insertions(+), 16 deletions(-) (limited to 'xlators') diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index cf838846cbd..8c59da7ecca 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -31,6 +31,7 @@ #include "byte-order.h" #include "statedump.h" #include "inode.h" +#include "events.h" #include "fd.h" @@ -4422,6 +4423,9 @@ afr_notify (xlator_t *this, int32_t event, AFR_MSG_SUBVOL_UP, "Subvolume '%s' came back up; " "going online.", ((xlator_t *)data)->name); + gf_event (EVENT_AFR_SUBVOL_UP, + "subvol=%s", this->name); + } else { event = GF_EVENT_CHILD_MODIFIED; } @@ -4444,6 +4448,8 @@ afr_notify (xlator_t *this, int32_t event, AFR_MSG_SUBVOLS_DOWN, "All subvolumes are down. Going offline " "until atleast one of them comes back up."); + gf_event (EVENT_AFR_SUBVOLS_DOWN, + "subvol=%s", this->name); } else { event = GF_EVENT_SOME_CHILD_DOWN; } @@ -4495,13 +4501,19 @@ afr_notify (xlator_t *this, int32_t event, if (priv->quorum_count) { has_quorum = afr_has_quorum (priv->child_up, this); - if (!had_quorum && has_quorum) + if (!had_quorum && has_quorum) { gf_msg (this->name, GF_LOG_INFO, 0, AFR_MSG_QUORUM_MET, "Client-quorum is met"); - if (had_quorum && !has_quorum) + gf_event (EVENT_AFR_QUORUM_MET, + "subvol=%s", this->name); + } + if (had_quorum && !has_quorum) { gf_msg (this->name, GF_LOG_WARNING, 0, AFR_MSG_QUORUM_FAIL, "Client-quorum is not met"); + gf_event (EVENT_AFR_QUORUM_FAIL, "subvol=%s", + this->name); + } } /* if all subvols have reported status, no need to hide anything diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index 52e0c75d73e..6b852bf2e78 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -14,6 +14,7 @@ #include "byte-order.h" #include "protocol-common.h" #include "afr-messages.h" +#include "events.h" void afr_heal_synctask (xlator_t *this, afr_local_t *local); @@ -1653,6 +1654,13 @@ afr_selfheal_unlocked_inspect (call_frame_t *frame, xlator_t *this, (int) replies[i].poststat.ia_type, priv->children[i]->name, uuid_utoa (replies[i].poststat.ia_gfid)); + gf_event (EVENT_AFR_SPLIT_BRAIN, "subvol=%s;" + "msg=file type mismatch;gfid=%s;" + "ia_type-%d=%s;ia_type-%d=%s", + this->name, + uuid_utoa (replies[i].poststat.ia_gfid), first, + gf_inode_type_to_str (first.ia_type), i, + gf_inode_type_to_str (replies[i].poststat.ia_type)); ret = -EIO; goto out; } diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index bc0cf25281b..254ff6d742c 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -15,6 +15,7 @@ #include "afr-transaction.h" #include "afr-messages.h" #include "syncop-utils.h" +#include "events.h" /* Max file name length is 255 this filename is of length 256. No file with * this name can ever come, entry-lock with this name is going to prevent @@ -240,13 +241,22 @@ afr_selfheal_detect_gfid_and_type_mismatch (xlator_t *this, replies[i].poststat.ia_gfid)) { gf_msg (this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN, "Gfid mismatch " - "detected for <%s/%s>, %s on %s and %s on %s. " + "detected for /%s>, %s on %s and %s on %s. " "Skipping conservative merge on the file.", uuid_utoa (pargfid), bname, uuid_utoa_r (replies[i].poststat.ia_gfid, g1), priv->children[i]->name, uuid_utoa_r (replies[src_idx].poststat.ia_gfid, g2), priv->children[src_idx]->name); + gf_event (EVENT_AFR_SPLIT_BRAIN, + "subvol=%s;msg=gfid mismatch. Skipping " + "conservative merge.;file=/%s>;count=2;" + "child-%d=%s;gfid-%d=%s;child-%d=%s;gfid-%d=%s", + this->name, uuid_utoa (pargfid), bname, i, + priv->children[i]->name, i, + uuid_utoa_r (replies[i].poststat.ia_gfid, g1), + src_idx, priv->children[src_idx]->name, src_idx, + uuid_utoa_r (replies[src_idx].poststat.ia_gfid, g2)); return -1; } @@ -254,13 +264,22 @@ afr_selfheal_detect_gfid_and_type_mismatch (xlator_t *this, (replies[i].poststat.ia_type)) { gf_msg (this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN, "Type mismatch " - "detected for <%s/%s>, %d on %s and %d on %s. " + "detected for /%s>, %s on %s and %s on %s. " "Skipping conservative merge on the file.", uuid_utoa (pargfid), bname, - replies[i].poststat.ia_type, + gf_inode_type_to_str (replies[i].poststat.ia_type), priv->children[i]->name, - replies[src_idx].poststat.ia_type, + gf_inode_type_to_str (replies[src_idx].poststat.ia_type), priv->children[src_idx]->name); + gf_event (EVENT_AFR_SPLIT_BRAIN, + "subvol=%s;msg=file type mismatch. Skipping " + "conservative merge;file=/%s>;count=2;" + "child-%d=%s;type-%d=%s;child-%d=%s;type-%d=%s", + this->name, uuid_utoa (pargfid), bname, i, + priv->children[i]->name, i, + gf_inode_type_to_str(replies[i].poststat.ia_type), + src_idx, priv->children[src_idx]->name, src_idx, + gf_inode_type_to_str(replies[src_idx].poststat.ia_type)); return -1; } } diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c index b11285ca54e..a0ee7e0419f 100644 --- a/xlators/cluster/afr/src/afr-self-heal-name.c +++ b/xlators/cluster/afr/src/afr-self-heal-name.c @@ -9,6 +9,7 @@ */ +#include "events.h" #include "afr.h" #include "afr-self-heal.h" #include "afr-messages.h" @@ -274,6 +275,7 @@ afr_selfheal_name_type_mismatch_check (xlator_t *this, struct afr_reply *replies int i = 0; int type_idx = -1; ia_type_t inode_type = IA_INVAL; + ia_type_t inode_type1 = IA_INVAL; afr_private_t *priv = NULL; priv = this->private; @@ -290,21 +292,32 @@ afr_selfheal_name_type_mismatch_check (xlator_t *this, struct afr_reply *replies type_idx = i; continue; } - + inode_type1 = replies[i].poststat.ia_type; if (sources[i] || source == -1) { if ((sources[type_idx] || source == -1) && - (inode_type != replies[i].poststat.ia_type)) { + (inode_type != inode_type1)) { gf_msg (this->name, GF_LOG_WARNING, 0, AFR_MSG_SPLIT_BRAIN, "Type mismatch for /%s: " - "%d on %s and %d on %s", + "%s on %s and %s on %s", uuid_utoa(pargfid), bname, - replies[i].poststat.ia_type, + gf_inode_type_to_str (inode_type1), priv->children[i]->name, - replies[type_idx].poststat.ia_type, + gf_inode_type_to_str (inode_type), priv->children[type_idx]->name); - - return -EIO; + gf_event (EVENT_AFR_SPLIT_BRAIN, + "subvol=%s;msg=file type mismatch;" + "file=/%s;count=2;" + "child-%d=%s;type-%d=%s;child-%d=%s;" + "type-%d=%s", this->name, + uuid_utoa (pargfid), bname, i, + priv->children[i]->name, i, + gf_inode_type_to_str (inode_type1), + type_idx, + priv->children[type_idx]->name, + type_idx, + gf_inode_type_to_str (inode_type)); + return -EIO; } inode_type = replies[i].poststat.ia_type; type_idx = i; @@ -322,6 +335,7 @@ afr_selfheal_name_gfid_mismatch_check (xlator_t *this, struct afr_reply *replies int i = 0; int gfid_idx_iter = -1; void *gfid = NULL; + void *gfid1 = NULL; afr_private_t *priv = NULL; char g1[64], g2[64]; @@ -340,18 +354,31 @@ afr_selfheal_name_gfid_mismatch_check (xlator_t *this, struct afr_reply *replies continue; } + gfid1 = &replies[i].poststat.ia_gfid; if (sources[i] || source == -1) { if ((sources[gfid_idx_iter] || source == -1) && - gf_uuid_compare (gfid, replies[i].poststat.ia_gfid)) { + gf_uuid_compare (gfid, gfid1)) { gf_msg (this->name, GF_LOG_WARNING, 0, AFR_MSG_SPLIT_BRAIN, "GFID mismatch for /%s " "%s on %s and %s on %s", uuid_utoa (pargfid), bname, - uuid_utoa_r (replies[i].poststat.ia_gfid, g1), + uuid_utoa_r (gfid1, g1), priv->children[i]->name, - uuid_utoa_r (replies[gfid_idx_iter].poststat.ia_gfid, g2), + uuid_utoa_r (gfid, g2), priv->children[gfid_idx_iter]->name); + gf_event (EVENT_AFR_SPLIT_BRAIN, + "subvol=%s;msg=gfid mismatch;" + "file=/%s;count=2;" + "child-%d=%s;gfid-%d=%s;child-%d=%s;" + "gfid-%d=%s", this->name, + uuid_utoa (pargfid), bname, i, + priv->children[i]->name, i, + uuid_utoa_r (gfid1, g1), + gfid_idx_iter, + priv->children[gfid_idx_iter]->name, + gfid_idx_iter, + uuid_utoa_r (gfid, g2)); return -EIO; } -- cgit