summaryrefslogtreecommitdiffstats
path: root/xlators/experimental
diff options
context:
space:
mode:
authorAvra Sengupta <asengupt@redhat.com>2016-02-12 14:57:47 +0530
committerJeff Darcy <jdarcy@redhat.com>2016-03-31 10:13:17 -0700
commitb4cbfdac0d35e6896f337b4ae7b75dcf4e714a1a (patch)
tree290ece66f510129a16ea8dd44dbd5d89a24613ec /xlators/experimental
parentb2a5eed9b17a82ec4b6366b0107fe2271328c16a (diff)
nsr: Introducing a happy path test case
Write infra for nsr_server to not send a CHILD_UP before it gets a CHILD_UP from a quorum of it's children. Using the CHILD_UP received in the nsr client translator from the server, to decide the right time for starting the I/Os Change-Id: I9551638b306bdcbc6bae6aeda00316576ea832fe Signed-off-by: Avra Sengupta <asengupt@redhat.com> Reviewed-on: http://review.gluster.org/13623 CentOS-regression: Gluster Build System <jenkins@build.gluster.com> Smoke: Gluster Build System <jenkins@build.gluster.com> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> Reviewed-by: Jeff Darcy <jdarcy@redhat.com>
Diffstat (limited to 'xlators/experimental')
-rw-r--r--xlators/experimental/nsr-client/src/nsrc.c110
-rw-r--r--xlators/experimental/nsr-client/src/nsrc.h3
-rw-r--r--xlators/experimental/nsr-server/src/all-templates.c12
-rw-r--r--xlators/experimental/nsr-server/src/nsr-internal.h2
-rw-r--r--xlators/experimental/nsr-server/src/nsr.c90
5 files changed, 197 insertions, 20 deletions
diff --git a/xlators/experimental/nsr-client/src/nsrc.c b/xlators/experimental/nsr-client/src/nsrc.c
index dd3ad20544e..13f1a2d38c5 100644
--- a/xlators/experimental/nsr-client/src/nsrc.c
+++ b/xlators/experimental/nsr-client/src/nsrc.c
@@ -18,6 +18,7 @@
#include "xlator.h"
#include "nsr-messages.h"
#include "nsrc.h"
+#include "statedump.h"
#define SCAR_LIMIT 20
#define HILITE(x) (""x"")
@@ -168,6 +169,7 @@ int32_t
nsrc_init (xlator_t *this)
{
nsrc_private_t *priv = NULL;
+ xlator_list_t *trav = NULL;
this->local_pool = mem_pool_new (nsrc_local_t, 128);
if (!this->local_pool) {
@@ -181,6 +183,10 @@ nsrc_init (xlator_t *this)
goto err;
}
+ for (trav = this->children; trav; trav = trav->next) {
+ ++(priv->n_children);
+ }
+
priv->active = FIRST_CHILD(this);
this->private = priv;
return 0;
@@ -198,33 +204,111 @@ nsrc_fini (xlator_t *this)
GF_FREE(this->private);
}
+int
+nsrc_get_child_index (xlator_t *this, xlator_t *kid)
+{
+ xlator_list_t *trav;
+ int retval = -1;
+
+ for (trav = this->children; trav; trav = trav->next) {
+ ++retval;
+ if (trav->xlator == kid) {
+ return retval;
+ }
+ }
+
+ return -1;
+}
+
+uint8_t
+nsrc_count_up_kids (nsrc_private_t *priv)
+{
+ uint8_t retval = 0;
+ uint8_t i;
+
+ for (i = 0; i < priv->n_children; ++i) {
+ if (priv->kid_state & (1 << i)) {
+ ++retval;
+ }
+ }
+
+ return retval;
+}
+
int32_t
nsrc_notify (xlator_t *this, int32_t event, void *data, ...)
{
- int32_t ret = 0;
+ int32_t ret = 0;
+ int32_t index = 0;
+ nsrc_private_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO (THIS->name, this, out);
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, priv, out);
switch (event) {
+ case GF_EVENT_CHILD_UP:
+ index = nsrc_get_child_index(this, data);
+ if (index >= 0) {
+ priv->kid_state |= (1 << index);
+ priv->up_children = nsrc_count_up_kids(priv);
+ gf_msg (this->name, GF_LOG_INFO, 0, N_MSG_GENERIC,
+ "got CHILD_UP for %s, now %u kids",
+ ((xlator_t *)data)->name,
+ priv->up_children);
+ }
+ ret = default_notify (this, event, data);
+ break;
case GF_EVENT_CHILD_DOWN:
- /*
- * TBD: handle this properly
- *
- * What we really should do is propagate this only if it caused
- * us to lose quorum, and likewise for GF_EVENT_CHILD_UP only
- * if it caused us to gain quorum. However, that requires
- * tracking child states and for now it's easier to swallow
- * these unconditionally. The consequence of failing to do
- * this is that DHT sees the first GF_EVENT_CHILD_DOWN and gets
- * confused, so it doesn't call us and doesn't get up-to-date
- * directory listings etc.
- */
+ index = nsrc_get_child_index(this, data);
+ if (index >= 0) {
+ priv->kid_state &= ~(1 << index);
+ priv->up_children = nsrc_count_up_kids(priv);
+ gf_msg (this->name, GF_LOG_INFO, 0, N_MSG_GENERIC,
+ "got CHILD_DOWN for %s, now %u kids",
+ ((xlator_t *)data)->name,
+ priv->up_children);
+ }
break;
default:
ret = default_notify (this, event, data);
}
+out:
return ret;
}
+int
+nsrc_priv_dump (xlator_t *this)
+{
+ nsrc_private_t *priv = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ xlator_list_t *trav = NULL;
+ int32_t i = -1;
+
+ GF_VALIDATE_OR_GOTO (THIS->name, this, out);
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, priv, out);
+
+ snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s",
+ this->type, this->name);
+ gf_proc_dump_add_section(key_prefix);
+
+ gf_proc_dump_write("up_children", "%u", priv->up_children);
+
+ for (trav = this->children, i = 0; trav; trav = trav->next, i++) {
+ snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "child_%d", i);
+ gf_proc_dump_write(key_prefix, "%s", trav->xlator->name);
+ }
+
+out:
+ return 0;
+}
+
+struct xlator_dumpops dumpops = {
+ .priv = nsrc_priv_dump,
+};
+
class_methods_t class_methods = {
.init = nsrc_init,
.fini = nsrc_fini,
diff --git a/xlators/experimental/nsr-client/src/nsrc.h b/xlators/experimental/nsr-client/src/nsrc.h
index 0c61d7a9fa8..15f0d7c85a0 100644
--- a/xlators/experimental/nsr-client/src/nsrc.h
+++ b/xlators/experimental/nsr-client/src/nsrc.h
@@ -13,6 +13,9 @@
typedef struct {
xlator_t *active;
+ uint8_t up_children;
+ uint8_t n_children;
+ uint32_t kid_state;
} nsrc_private_t;
typedef struct {
diff --git a/xlators/experimental/nsr-server/src/all-templates.c b/xlators/experimental/nsr-server/src/all-templates.c
index 300abea959d..c3819d2af54 100644
--- a/xlators/experimental/nsr-server/src/all-templates.c
+++ b/xlators/experimental/nsr-server/src/all-templates.c
@@ -83,6 +83,9 @@ nsr_@NAME@ (call_frame_t *frame, xlator_t *this,
if (result == _gf_false) {
/* Emulate the AFR client-side-quorum behavior. */
+ gf_msg (this->name, GF_LOG_ERROR, EROFS,
+ N_MSG_QUORUM_NOT_MET, "Sufficient number of "
+ "subvolumes are not up to meet quorum.");
op_errno = EROFS;
goto err;
}
@@ -309,6 +312,10 @@ nsr_@NAME@_continue (call_frame_t *frame, xlator_t *this,
result = fop_quorum_check (this, (double)priv->n_children,
(double)local->successful_acks + 1);
if (result == _gf_false) {
+ gf_msg (this->name, GF_LOG_ERROR, EROFS,
+ N_MSG_QUORUM_NOT_MET, "Didn't receive enough acks "
+ "to meet quorum. Failing the operation without trying "
+ "it on the leader.");
STACK_UNWIND_STRICT (@NAME@, frame, -1, EROFS,
@ERROR_ARGS@);
} else {
@@ -406,8 +413,9 @@ nsr_@NAME@_complete (call_frame_t *frame, void *cookie, xlator_t *this,
if (result == _gf_false) {
op_ret = -1;
op_errno = EROFS;
- gf_msg_debug (this->name, 0,
- "Quorum is not met. The operation has failed.");
+ gf_msg (this->name, GF_LOG_ERROR, EROFS,
+ N_MSG_QUORUM_NOT_MET, "Quorum is not met. "
+ "The operation has failed.");
} else {
#if defined(NSR_CG_NEED_FD)
op_ret = local->successful_op_ret;
diff --git a/xlators/experimental/nsr-server/src/nsr-internal.h b/xlators/experimental/nsr-server/src/nsr-internal.h
index b8c7fc314b7..d43fbac9a53 100644
--- a/xlators/experimental/nsr-server/src/nsr-internal.h
+++ b/xlators/experimental/nsr-server/src/nsr-internal.h
@@ -74,6 +74,8 @@ typedef struct {
* TBD: re-evaluate how to manage this
*/
char term_buf[CHANGELOG_ENTRY_SIZE];
+ gf_boolean_t child_up; /* To maintain the state of *
+ * the translator */
} nsr_private_t;
typedef struct {
diff --git a/xlators/experimental/nsr-server/src/nsr.c b/xlators/experimental/nsr-server/src/nsr.c
index 48966ab15a1..0fb618f236e 100644
--- a/xlators/experimental/nsr-server/src/nsr.c
+++ b/xlators/experimental/nsr-server/src/nsr.c
@@ -860,13 +860,23 @@ nsr_get_child_index (xlator_t *this, xlator_t *kid)
int
nsr_notify (xlator_t *this, int event, void *data, ...)
{
- nsr_private_t *priv = this->private;
- int index;
+ nsr_private_t *priv = this->private;
+ int index = -1;
+ int ret = -1;
+ gf_boolean_t result = _gf_false;
+ gf_boolean_t relevant = _gf_false;
switch (event) {
case GF_EVENT_CHILD_UP:
index = nsr_get_child_index(this, data);
if (index >= 0) {
+ /* Check if the child was previously down
+ * and it's not a false CHILD_UP
+ */
+ if (!(priv->kid_state & (1 << index))) {
+ relevant = _gf_true;
+ }
+
priv->kid_state |= (1 << index);
priv->up_children = nsr_count_up_kids(priv);
gf_msg (this->name, GF_LOG_INFO, 0, N_MSG_GENERIC,
@@ -876,27 +886,96 @@ nsr_notify (xlator_t *this, int event, void *data, ...)
if (!priv->config_leader && (priv->up_children > 1)) {
priv->leader = _gf_false;
}
+
+ /* If it's not relevant, or we have already *
+ * sent CHILD_UP just break */
+ if (!relevant || priv->child_up)
+ break;
+
+ /* If it's not a leader, just send the notify up */
+ if (!priv->leader) {
+ ret = default_notify(this, event, data);
+ if (!ret)
+ priv->child_up = _gf_true;
+ break;
+ }
+
+ result = fop_quorum_check (this,
+ (double)(priv->n_children - 1),
+ (double)(priv->up_children - 1));
+ if (result == _gf_false) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ N_MSG_GENERIC, "Not enough children "
+ "are up to meet quorum. Waiting to "
+ "send CHILD_UP from leader");
+ } else {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ N_MSG_GENERIC, "Enough children are up "
+ "to meet quorum. Sending CHILD_UP "
+ "from leader");
+ ret = default_notify(this, event, data);
+ if (!ret)
+ priv->child_up = _gf_true;
+ }
}
break;
case GF_EVENT_CHILD_DOWN:
index = nsr_get_child_index(this, data);
if (index >= 0) {
+ /* Check if the child was previously up
+ * and it's not a false CHILD_DOWN
+ */
+ if (priv->kid_state & (1 << index)) {
+ relevant = _gf_true;
+ }
priv->kid_state &= ~(1 << index);
priv->up_children = nsr_count_up_kids(priv);
gf_msg (this->name, GF_LOG_INFO, 0, N_MSG_GENERIC,
"got CHILD_DOWN for %s, now %u kids",
((xlator_t *)data)->name,
priv->up_children);
- if (!priv->config_leader && (priv->up_children < 2)) {
+ if (!priv->config_leader && (priv->up_children < 2)
+ && relevant) {
priv->leader = _gf_true;
}
+
+ /* If it's not relevant, or we have already *
+ * sent CHILD_DOWN just break */
+ if (!relevant || !priv->child_up)
+ break;
+
+ /* If it's not a leader, just break coz we shouldn't *
+ * propagate the failure from the failure till it *
+ * itself goes down *
+ */
+ if (!priv->leader) {
+ break;
+ }
+
+ result = fop_quorum_check (this,
+ (double)(priv->n_children - 1),
+ (double)(priv->up_children - 1));
+ if (result == _gf_false) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ N_MSG_GENERIC, "Enough children are "
+ "to down to fail quorum. "
+ "Sending CHILD_DOWN from leader");
+ ret = default_notify(this, event, data);
+ if (!ret)
+ priv->child_up = _gf_false;
+ } else {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ N_MSG_GENERIC, "Not enough children "
+ "are down to fail quorum. Waiting to "
+ "send CHILD_DOWN from leader");
+ }
}
break;
default:
- ;
+ ret = default_notify(this, event, data);
}
- return default_notify(this, event, data);
+ return ret;
}
@@ -995,6 +1074,7 @@ nsr_init (xlator_t *this)
GF_OPTION_INIT ("quorum-percent", priv->quorum_pct, percent, err);
priv->leader = priv->config_leader;
+ priv->child_up = _gf_false;
if (pthread_create(&kid, NULL, nsr_flush_thread,
this) != 0) {