From d25b6065469eb978d40450b5aebcf5711fb50205 Mon Sep 17 00:00:00 2001 From: Poornima G Date: Fri, 26 Jan 2018 15:34:43 +0530 Subject: quiesce, gfproxy: Implement failover across multiple gfproxy nodes Updates: #242 Change-Id: I767e574a26e922760a7130bd209c178d74e8cf69 Signed-off-by: Poornima G --- xlators/features/quiesce/src/Makefile.am | 2 +- xlators/features/quiesce/src/quiesce-mem-types.h | 1 + xlators/features/quiesce/src/quiesce-messages.h | 31 +++ xlators/features/quiesce/src/quiesce.c | 237 +++++++++++++++++++---- xlators/features/quiesce/src/quiesce.h | 9 + 5 files changed, 246 insertions(+), 34 deletions(-) create mode 100644 xlators/features/quiesce/src/quiesce-messages.h (limited to 'xlators/features/quiesce') diff --git a/xlators/features/quiesce/src/Makefile.am b/xlators/features/quiesce/src/Makefile.am index a6cabb3012d..74ea999c045 100644 --- a/xlators/features/quiesce/src/Makefile.am +++ b/xlators/features/quiesce/src/Makefile.am @@ -6,7 +6,7 @@ quiesce_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) quiesce_la_SOURCES = quiesce.c quiesce_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la -noinst_HEADERS = quiesce.h quiesce-mem-types.h +noinst_HEADERS = quiesce.h quiesce-mem-types.h quiesce-messages.h AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src diff --git a/xlators/features/quiesce/src/quiesce-mem-types.h b/xlators/features/quiesce/src/quiesce-mem-types.h index 6e582f424ea..31346c1a794 100644 --- a/xlators/features/quiesce/src/quiesce-mem-types.h +++ b/xlators/features/quiesce/src/quiesce-mem-types.h @@ -15,6 +15,7 @@ enum gf_quiesce_mem_types_ { gf_quiesce_mt_priv_t = gf_common_mt_end + 1, + gf_quiesce_mt_failover_hosts, gf_quiesce_mt_end }; #endif diff --git a/xlators/features/quiesce/src/quiesce-messages.h b/xlators/features/quiesce/src/quiesce-messages.h new file mode 100644 index 00000000000..8af3b10b06e --- /dev/null +++ b/xlators/features/quiesce/src/quiesce-messages.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2016 Red Hat, Inc. + * This file is part of GlusterFS. + * + * This file is licensed to you under your choice of the GNU Lesser + * General Public License, version 3 or any later version (LGPLv3 or + * later), or the GNU General Public License, version 2 (GPLv2), in all + * cases as published by the Free Software Foundation. + */ + +#ifndef __QUIESCE_MESSAGES_H__ +#define __QUIESCE_MESSAGES_H__ + +#include "glfs-message-id.h" + +/* To add new message IDs, append new identifiers at the end of the list. + * + * Never remove a message ID. If it's not used anymore, you can rename it or + * leave it as it is, but not delete it. This is to prevent reutilization of + * IDs by other messages. + * + * The component name must match one of the entries defined in + * glfs-message-id.h. + */ + +GLFS_MSGID(QUIESCE, + QUIESCE_MSG_INVAL_HOST, + QUIESCE_MSG_FAILOVER_FAILED +); + +#endif /* __NL_CACHE_MESSAGES_H__ */ diff --git a/xlators/features/quiesce/src/quiesce.c b/xlators/features/quiesce/src/quiesce.c index 59e57d284a0..95c0c8b0829 100644 --- a/xlators/features/quiesce/src/quiesce.c +++ b/xlators/features/quiesce/src/quiesce.c @@ -14,6 +14,9 @@ /* TODO: */ /* Think about 'writev/_*_lk/setattr/xattrop/' fops to do re-transmittion */ +void +gf_quiesce_timeout (void *data); + /* Quiesce Specific Functions */ void @@ -37,6 +40,173 @@ gf_quiesce_local_wipe (xlator_t *this, quiesce_local_t *local) mem_put (local); } +void +__gf_quiesce_start_timer (xlator_t *this, quiesce_priv_t *priv) +{ + struct timespec timeout = {0,}; + + if (!priv->timer) { + timeout.tv_sec = priv->timeout; + timeout.tv_nsec = 0; + + priv->timer = gf_timer_call_after (this->ctx, + timeout, + gf_quiesce_timeout, + (void *) this); + if (priv->timer == NULL) { + gf_log (this->name, GF_LOG_ERROR, + "Cannot create timer"); + } + } +} + +static void +__gf_quiesce_cleanup_failover_hosts (xlator_t *this, quiesce_priv_t *priv) +{ + quiesce_failover_hosts_t *tmp = NULL; + quiesce_failover_hosts_t *failover_host = NULL; + + list_for_each_entry_safe (failover_host, tmp, + &priv->failover_list, list) { + GF_FREE (failover_host->addr); + list_del (&failover_host->list); + GF_FREE (failover_host); + } + return; +} + +void +gf_quiesce_populate_failover_hosts (xlator_t *this, quiesce_priv_t *priv, + const char *value) +{ + char *dup_val = NULL; + char *addr_tok = NULL; + char *save_ptr = NULL; + quiesce_failover_hosts_t *failover_host = NULL; + + if (!value) + goto out; + + dup_val = gf_strdup (value); + if (!dup_val) + goto out; + + LOCK (&priv->lock); + { + if (!list_empty (&priv->failover_list)) + __gf_quiesce_cleanup_failover_hosts (this, priv); + addr_tok = strtok_r (dup_val, ",", &save_ptr); + while (addr_tok) { + if (!valid_internet_address (addr_tok, _gf_true)) { + gf_msg (this->name, GF_LOG_INFO, 0, + QUIESCE_MSG_INVAL_HOST, "Specified " + "invalid internet address:%s", + addr_tok); + continue; + } + failover_host = GF_CALLOC (1, sizeof(*failover_host), + gf_quiesce_mt_failover_hosts); + failover_host->addr = gf_strdup (addr_tok); + INIT_LIST_HEAD (&failover_host->list); + list_add (&failover_host->list, &priv->failover_list); + addr_tok = strtok_r (NULL, ",", &save_ptr); + } + } + UNLOCK (&priv->lock); + GF_FREE (dup_val); +out: + return; +} + +int32_t +gf_quiesce_failover_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + + if (op_ret < 0) { + /* Failure here doesn't mean the failover to another host didn't + * succeed, we will know if failover succeeds or not by the + * CHILD_UP/CHILD_DOWN event. A failure here indicates something + * went wrong with the submission of failover command, hence + * just abort the failover attempts without retrying with other + * hosts. + */ + gf_msg (this->name, GF_LOG_INFO, op_errno, + QUIESCE_MSG_FAILOVER_FAILED, + "Initiating failover to host:%s failed:", (char *)cookie); + } + + GF_FREE (cookie); + STACK_DESTROY (frame->root); + + priv = this->private; + __gf_quiesce_start_timer (this, priv); + + return 0; +} + +int +__gf_quiesce_perform_failover (xlator_t *this) +{ + int ret = 0; + call_frame_t *frame = NULL; + dict_t *dict = NULL; + quiesce_priv_t *priv = NULL; + quiesce_failover_hosts_t *failover_host = NULL; + quiesce_failover_hosts_t *host = NULL; + + priv = this->private; + + if (priv->pass_through) { + gf_msg_trace (this->name, 0, "child is up, hence not " + "performing any failover"); + goto out; + } + + list_for_each_entry (failover_host, &priv->failover_list, list) { + if (failover_host->tried == 0) { + host = failover_host; + failover_host->tried = 1; + break; + } + } + if (!host) { + /*TODO: Keep trying until any of the gfproxy comes back up. + Currently it tries failing over once for each host, + if it doesn't succeed then returns error to mount point + list_for_each_entry (failover_host, + &priv->failover_list, list) { + failover_host->tried = 0; + }*/ + gf_msg_debug (this->name, 0, "all the failover hosts have " + "been tried and looks like didn't succeed"); + ret = -1; + goto out; + } + + frame = create_frame (this, this->ctx->pool); + + dict = dict_new (); + + ret = dict_set_dynstr (dict, CLIENT_CMD_CONNECT, + gf_strdup (host->addr)); + + gf_msg_trace (this->name, 0, "Initiating failover to:%s", + host->addr); + + STACK_WIND_COOKIE (frame, gf_quiesce_failover_cbk, NULL, + FIRST_CHILD (this), + FIRST_CHILD (this)->fops->setxattr, + NULL, dict, 0, NULL); +out: + + if (dict) + dict_unref (dict); + + return ret; +} + call_stub_t * gf_quiesce_dequeue (xlator_t *this) { @@ -86,6 +256,7 @@ gf_quiesce_timeout (void *data) { xlator_t *this = NULL; quiesce_priv_t *priv = NULL; + int ret = -1; this = data; priv = this->private; @@ -93,12 +264,21 @@ gf_quiesce_timeout (void *data) LOCK (&priv->lock); { - priv->pass_through = _gf_true; + priv->timer = NULL; + if (priv->pass_through) { + UNLOCK (&priv->lock); + goto out; + } + ret = __gf_quiesce_perform_failover (THIS); } UNLOCK (&priv->lock); - gf_quiesce_dequeue_start (this); + if (ret < 0) { + priv->pass_through = _gf_true; + gf_quiesce_dequeue_start (this); + } +out: return; } @@ -106,7 +286,6 @@ void gf_quiesce_enqueue (xlator_t *this, call_stub_t *stub) { quiesce_priv_t *priv = NULL; - struct timespec timeout = {0,}; priv = this->private; if (!priv) { @@ -119,19 +298,10 @@ gf_quiesce_enqueue (xlator_t *this, call_stub_t *stub) { list_add_tail (&stub->list, &priv->req); priv->queue_size++; + __gf_quiesce_start_timer (this, priv); } UNLOCK (&priv->lock); - if (!priv->timer) { - timeout.tv_sec = priv->timeout; - timeout.tv_nsec = 0; - - priv->timer = gf_timer_call_after (this->ctx, - timeout, - gf_quiesce_timeout, - (void *) this); - } - return; } @@ -2553,6 +2723,10 @@ reconfigure (xlator_t *this, dict_t *options) priv = this->private; GF_OPTION_RECONF("timeout", priv->timeout, options, time, out); + GF_OPTION_RECONF ("failover-hosts", priv->failover_hosts, options, + str, out); + gf_quiesce_populate_failover_hosts (this, priv, priv->failover_hosts); + ret = 0; out: return ret; @@ -2579,7 +2753,11 @@ init (xlator_t *this) if (!priv) goto out; + INIT_LIST_HEAD (&priv->failover_list); + GF_OPTION_INIT ("timeout", priv->timeout, time, out); + GF_OPTION_INIT ("failover-hosts", priv->failover_hosts, str, out); + gf_quiesce_populate_failover_hosts (this, priv, priv->failover_hosts); priv->local_pool = mem_pool_new (quiesce_local_t, GF_FOPS_EXPECTED_IN_PARALLEL); @@ -2617,7 +2795,6 @@ notify (xlator_t *this, int event, void *data, ...) { int ret = 0; quiesce_priv_t *priv = NULL; - struct timespec timeout = {0,}; priv = this->private; if (!priv) @@ -2645,24 +2822,10 @@ notify (xlator_t *this, int event, void *data, ...) LOCK (&priv->lock); { priv->pass_through = _gf_false; - } - UNLOCK (&priv->lock); + __gf_quiesce_start_timer (this, priv); - if (priv->timer) - break; - timeout.tv_sec = priv->timeout; - timeout.tv_nsec = 0; - - priv->timer = gf_timer_call_after (this->ctx, - timeout, - gf_quiesce_timeout, - (void *) this); - - if (priv->timer == NULL) { - gf_log (this->name, GF_LOG_ERROR, - "Cannot create timer"); } - + UNLOCK (&priv->lock); break; default: break; @@ -2735,14 +2898,22 @@ struct xlator_cbks cbks; struct volume_options options[] = { { .key = {"timeout"}, .type = GF_OPTION_TYPE_TIME, - .default_value = "20", + .default_value = "45", .description = "After 'timeout' seconds since the time 'quiesce' " "option was set to \"!pass-through\", acknowledgements to file " "operations are no longer quiesced and previously " "quiesced acknowledgements are sent to the application", - .tags = {"debug", "diagnose"}, .op_version = { GD_OP_VERSION_4_0_0 }, - .flags = OPT_FLAG_CLIENT_OPT, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + }, + { .key = {"failover-hosts"}, + .type = GF_OPTION_TYPE_INTERNET_ADDRESS_LIST, + .op_version = { GD_OP_VERSION_4_0_0 }, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .description = "It is a comma separated list of hostname/IP " + "addresses. It Specifies the list of hosts where " + "the gfproxy daemons are running, to which the " + "the thin clients can failover to." }, { .key = {NULL} }, }; diff --git a/xlators/features/quiesce/src/quiesce.h b/xlators/features/quiesce/src/quiesce.h index e76523c602b..c084801c6c6 100644 --- a/xlators/features/quiesce/src/quiesce.h +++ b/xlators/features/quiesce/src/quiesce.h @@ -12,11 +12,18 @@ #define __QUIESCE_H__ #include "quiesce-mem-types.h" +#include "quiesce-messages.h" #include "xlator.h" #include "timer.h" #define GF_FOPS_EXPECTED_IN_PARALLEL 512 +typedef struct { + struct list_head list; + char *addr; + gf_boolean_t tried; /* indicates attempted connecting */ +} quiesce_failover_hosts_t; + typedef struct { gf_timer_t *timer; gf_boolean_t pass_through; @@ -26,6 +33,8 @@ typedef struct { pthread_t thr; struct mem_pool *local_pool; uint32_t timeout; + char *failover_hosts; + struct list_head failover_list; } quiesce_priv_t; typedef struct { -- cgit