diff options
Diffstat (limited to 'xlators/mgmt/glusterd/src')
86 files changed, 103842 insertions, 20204 deletions
diff --git a/xlators/mgmt/glusterd/src/Makefile.am b/xlators/mgmt/glusterd/src/Makefile.am index 1c89dba9776..685beb42d27 100644 --- a/xlators/mgmt/glusterd/src/Makefile.am +++ b/xlators/mgmt/glusterd/src/Makefile.am @@ -1,23 +1,79 @@ +if WITH_SERVER xlator_LTLIBRARIES = glusterd.la +endif + xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/mgmt -glusterd_la_LDFLAGS = -module -avoidversion -glusterd_la_SOURCES = glusterd.c glusterd-handler.c glusterd-sm.c glusterd-op-sm.c \ - glusterd-utils.c glusterd-rpc-ops.c glusterd-store.c glusterd-handshake.c \ - glusterd-pmap.c glusterd-volgen.c glusterd-rebalance.c +glusterd_la_CPPFLAGS = $(AM_CPPFLAGS) \ + -DFILTERDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/filter\" \ + -DXLATORDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator\" \ + -I$(top_srcdir)/libglusterd/src/ + +glusterd_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) +glusterd_la_SOURCES = glusterd.c glusterd-handler.c glusterd-sm.c \ + glusterd-op-sm.c glusterd-utils.c glusterd-rpc-ops.c \ + glusterd-store.c glusterd-handshake.c glusterd-pmap.c \ + glusterd-volgen.c glusterd-rebalance.c \ + glusterd-quota.c glusterd-bitrot.c glusterd-geo-rep.c \ + glusterd-replace-brick.c glusterd-log-ops.c \ + glusterd-volume-ops.c glusterd-brick-ops.c glusterd-mountbroker.c \ + glusterd-syncop.c glusterd-hooks.c glusterd-volume-set.c \ + glusterd-locks.c glusterd-snapshot.c glusterd-mgmt-handler.c \ + glusterd-mgmt.c glusterd-peer-utils.c glusterd-statedump.c \ + glusterd-snapshot-utils.c glusterd-conn-mgmt.c \ + glusterd-proc-mgmt.c glusterd-svc-mgmt.c \ + glusterd-nfs-svc.c glusterd-quotad-svc.c glusterd-svc-helper.c \ + glusterd-conn-helper.c glusterd-snapd-svc.c glusterd-snapd-svc-helper.c \ + glusterd-bitd-svc.c glusterd-scrub-svc.c glusterd-server-quorum.c \ + glusterd-reset-brick.c glusterd-shd-svc.c glusterd-shd-svc-helper.c \ + glusterd-gfproxyd-svc.c glusterd-gfproxyd-svc-helper.c glusterd-ganesha.c \ + $(CONTRIBDIR)/mount/mntent.c + +glusterd_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ + $(top_builddir)/libglusterd/src/libglusterd.la \ + $(top_builddir)/rpc/xdr/src/libgfxdr.la \ + $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \ + $(XML_LIBS) -lcrypto $(URCU_LIBS) $(URCU_CDS_LIBS) $(LIB_DL) $(GF_XLATOR_MGNT_LIBADD) -glusterd_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la\ - $(top_builddir)/rpc/xdr/src/libgfxdr.la\ - $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la +noinst_HEADERS = glusterd.h glusterd-utils.h glusterd-op-sm.h \ + glusterd-sm.h glusterd-store.h glusterd-mem-types.h \ + glusterd-pmap.h glusterd-volgen.h glusterd-mountbroker.h \ + glusterd-syncop.h glusterd-hooks.h glusterd-locks.h glusterd-quota.h \ + glusterd-mgmt.h glusterd-messages.h glusterd-peer-utils.h \ + glusterd-statedump.h glusterd-snapshot-utils.h glusterd-geo-rep.h \ + glusterd-conn-mgmt.h glusterd-conn-helper.h glusterd-proc-mgmt.h \ + glusterd-svc-mgmt.h glusterd-nfs-svc.h \ + glusterd-quotad-svc.h glusterd-svc-helper.h glusterd-snapd-svc.h \ + glusterd-snapd-svc-helper.h glusterd-rcu.h glusterd-bitd-svc.h \ + glusterd-scrub-svc.h glusterd-server-quorum.h glusterd-errno.h \ + glusterd-shd-svc.h glusterd-shd-svc-helper.h \ + glusterd-gfproxyd-svc.h glusterd-gfproxyd-svc-helper.h \ + $(CONTRIBDIR)/userspace-rcu/rculist-extra.h \ + $(CONTRIBDIR)/mount/mntent_compat.h -noinst_HEADERS = glusterd.h glusterd-utils.h glusterd-op-sm.h glusterd-sm.h \ - glusterd-store.h glusterd-mem-types.h glusterd-pmap.h glusterd-volgen.h +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ + -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \ + -I$(CONTRIBDIR)/rbtree -I$(top_srcdir)/rpc/rpc-lib/src \ + -I$(CONTRIBDIR)/mount -I$(CONTRIBDIR)/userspace-rcu \ + -DSBIN_DIR=\"$(sbindir)\" -DDATADIR=\"$(localstatedir)\" \ + -DGSYNCD_PREFIX=\"$(GLUSTERFS_LIBEXECDIR)\" \ + -DCONFDIR=\"$(localstatedir)/run/gluster/shared_storage/nfs-ganesha\" \ + -DGANESHA_PREFIX=\"$(libexecdir)/ganesha\" \ + -DSYNCDAEMON_COMPILE=$(SYNCDAEMON_COMPILE) \ + -I$(top_srcdir)/libglusterd/src/ -AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\ - -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)\ - -I$(rpclibdir) -L$(xlatordir)/ -I$(CONTRIBDIR)/rbtree -I$(top_srcdir)/rpc/xdr/src\ - -I$(top_srcdir)/rpc/rpc-lib/src -I$(CONTRIBDIR)/uuid -I$(top_srcdir)/contrib/md5 -DGFS_PREFIX=\"$(prefix)\" \ - -DDATADIR=\"$(localstatedir)\" -DGSYNCD_PREFIX=\"$(libexecdir)/glusterfs\"\ - -DSYNCDAEMON_COMPILE=$(SYNCDAEMON_COMPILE) +AM_CFLAGS = -Wall $(GF_CFLAGS) $(URCU_CFLAGS) $(URCU_CDS_CFLAGS) $(XML_CFLAGS) + +AM_LDFLAGS = -L$(xlatordir) $(URCU_LIBS) $(URCU_CDS_LIBS) CLEANFILES = + +install-data-hook: +if WITH_SERVER +if GF_INSTALL_GLUSTERD_WORKDIR + $(mkdir_p) $(DESTDIR)$(GLUSTERD_WORKDIR) + (stat $(DESTDIR)$(sysconfdir)/glusterd && \ + mv $(DESTDIR)$(sysconfdir)/glusterd $(DESTDIR)$(GLUSTERD_WORKDIR)) || true; + (ln -sf $(DESTDIR)$(GLUSTERD_WORKDIR) $(sysconfdir)/glusterd) || true; +endif +endif diff --git a/xlators/mgmt/glusterd/src/glusterd-bitd-svc.c b/xlators/mgmt/glusterd/src/glusterd-bitd-svc.c new file mode 100644 index 00000000000..6adb799b18f --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-bitd-svc.c @@ -0,0 +1,206 @@ +/* + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include <glusterfs/globals.h> +#include <glusterfs/run.h> +#include "glusterd.h" +#include "glusterd-utils.h" +#include "glusterd-volgen.h" +#include "glusterd-bitd-svc.h" +#include "glusterd-svc-helper.h" + +void +glusterd_bitdsvc_build(glusterd_svc_t *svc) +{ + svc->manager = glusterd_bitdsvc_manager; + svc->start = glusterd_bitdsvc_start; + svc->stop = glusterd_bitdsvc_stop; +} + +int +glusterd_bitdsvc_init(glusterd_svc_t *svc) +{ + return glusterd_svc_init(svc, bitd_svc_name); +} + +static int +glusterd_bitdsvc_create_volfile() +{ + char filepath[PATH_MAX] = { + 0, + }; + int ret = -1; + glusterd_conf_t *conf = NULL; + xlator_t *this = NULL; + + this = THIS; + conf = this->private; + GF_ASSERT(conf); + + glusterd_svc_build_volfile_path(bitd_svc_name, conf->workdir, filepath, + sizeof(filepath)); + + ret = glusterd_create_global_volfile(build_bitd_graph, filepath, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL, + "Failed to create volfile"); + goto out; + } + +out: + gf_msg_debug(this->name, 0, "Returning %d", ret); + + return ret; +} + +int +glusterd_bitdsvc_manager(glusterd_svc_t *svc, void *data, int flags) +{ + int ret = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + if (!svc->inited) { + ret = glusterd_bitdsvc_init(svc); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BITD_INIT_FAIL, + "Failed to init " + "bitd service"); + goto out; + } else { + svc->inited = _gf_true; + gf_msg_debug(this->name, 0, + "BitD service " + "initialized"); + } + } + + if (glusterd_should_i_stop_bitd()) { + ret = svc->stop(svc, SIGTERM); + } else { + ret = glusterd_bitdsvc_create_volfile(); + if (ret) + goto out; + + ret = svc->stop(svc, SIGKILL); + if (ret) + goto out; + + ret = svc->start(svc, flags); + if (ret) + goto out; + + ret = glusterd_conn_connect(&(svc->conn)); + if (ret) + goto out; + } + +out: + if (ret) + gf_event(EVENT_SVC_MANAGER_FAILED, "svc_name=%s", svc->name); + + gf_msg_debug(THIS->name, 0, "Returning %d", ret); + + return ret; +} + +int +glusterd_bitdsvc_start(glusterd_svc_t *svc, int flags) +{ + int ret = -1; + dict_t *cmdict = NULL; + + cmdict = dict_new(); + if (!cmdict) + goto error_return; + + ret = dict_set_str(cmdict, "cmdarg0", "--global-timer-wheel"); + if (ret) + goto dealloc_dict; + + ret = glusterd_svc_start(svc, flags, cmdict); + +dealloc_dict: + dict_unref(cmdict); +error_return: + return ret; +} + +int +glusterd_bitdsvc_stop(glusterd_svc_t *svc, int sig) +{ + return glusterd_svc_stop(svc, sig); +} + +int +glusterd_bitdsvc_reconfigure() +{ + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + gf_boolean_t identical = _gf_false; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, out); + + if (glusterd_should_i_stop_bitd()) + goto manager; + /* + * Check both OLD and NEW volfiles, if they are SAME by size + * and cksum i.e. "character-by-character". If YES, then + * NOTHING has been changed, just return. + */ + ret = glusterd_svc_check_volfile_identical(priv->bitd_svc.name, + build_bitd_graph, &identical); + if (ret) + goto out; + if (identical) { + ret = 0; + goto out; + } + + /* + * They are not identical. Find out if the topology is changed + * OR just the volume options. If just the options which got + * changed, then inform the xlator to reconfigure the options. + */ + identical = _gf_false; /* RESET the FLAG */ + ret = glusterd_svc_check_topology_identical(priv->bitd_svc.name, + build_bitd_graph, &identical); + if (ret) + goto out; /*not able to compare due to some corruption */ + + /* Topology is not changed, but just the options. But write the + * options to bitd volfile, so that bitd will be reconfigured. + */ + if (identical) { + ret = glusterd_bitdsvc_create_volfile(); + if (ret == 0) { /* Only if above PASSES */ + ret = glusterd_fetchspec_notify(THIS); + } + goto out; + } + +manager: + /* + * bitd volfile's topology has been changed. bitd server needs + * to be RESTARTED to ACT on the changed volfile. + */ + ret = priv->bitd_svc.manager(&(priv->bitd_svc), NULL, PROC_START_NO_WAIT); + +out: + gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret); + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-bitd-svc.h b/xlators/mgmt/glusterd/src/glusterd-bitd-svc.h new file mode 100644 index 00000000000..1bff084a9a8 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-bitd-svc.h @@ -0,0 +1,40 @@ +/* + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _GLUSTERD_BITD_SVC_H_ +#define _GLUSTERD_BITD_SVC_H_ + +#include "glusterd-svc-mgmt.h" + +#define bitd_svc_name "bitd" + +void +glusterd_bitdsvc_build(glusterd_svc_t *svc); + +int +glusterd_bitdsvc_init(glusterd_svc_t *svc); + +int +glusterd_bitdsvc_manager(glusterd_svc_t *svc, void *data, int flags); + +int +glusterd_bitdsvc_start(glusterd_svc_t *svc, int flags); + +int +glusterd_bitdsvc_stop(glusterd_svc_t *svc, int sig); + +int +glusterd_bitdsvc_reconfigure(); + +void +glusterd_bitdsvc_build_volfile_path(char *server, char *workdir, char *volfile, + size_t len); + +#endif diff --git a/xlators/mgmt/glusterd/src/glusterd-bitrot.c b/xlators/mgmt/glusterd/src/glusterd-bitrot.c new file mode 100644 index 00000000000..37429fe9214 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-bitrot.c @@ -0,0 +1,822 @@ +/* + Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. + */ + +#include <glusterfs/common-utils.h> +#include "cli1-xdr.h" +#include "xdr-generic.h" +#include "glusterd.h" +#include "glusterd-op-sm.h" +#include "glusterd-store.h" +#include "glusterd-utils.h" +#include "glusterd-volgen.h" +#include <glusterfs/run.h> +#include <glusterfs/syscall.h> +#include <glusterfs/byte-order.h> +#include <glusterfs/compat-errno.h> +#include "glusterd-scrub-svc.h" +#include "glusterd-messages.h" + +#include <sys/wait.h> +#include <dlfcn.h> + +const char *gd_bitrot_op_list[GF_BITROT_OPTION_TYPE_MAX] = { + [GF_BITROT_OPTION_TYPE_NONE] = "none", + [GF_BITROT_OPTION_TYPE_ENABLE] = "enable", + [GF_BITROT_OPTION_TYPE_DISABLE] = "disable", + [GF_BITROT_OPTION_TYPE_SCRUB_THROTTLE] = "scrub-throttle", + [GF_BITROT_OPTION_TYPE_SCRUB_FREQ] = "scrub-frequency", + [GF_BITROT_OPTION_TYPE_SCRUB] = "scrub", + [GF_BITROT_OPTION_TYPE_EXPIRY_TIME] = "expiry-time", + [GF_BITROT_OPTION_TYPE_SIGNER_THREADS] = "signer-threads", +}; + +int +__glusterd_handle_bitrot(rpcsvc_request_t *req) +{ + int32_t ret = -1; + gf_cli_req cli_req = {{ + 0, + }}; + dict_t *dict = NULL; + glusterd_op_t cli_op = GD_OP_BITROT; + char *volname = NULL; + char *scrub = NULL; + int32_t type = 0; + char msg[256] = { + 0, + }; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + + GF_ASSERT(req); + + this = THIS; + GF_ASSERT(this); + + conf = this->private; + GF_ASSERT(conf); + + ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (cli_req.dict.dict_len) { + /* Unserialize the dictionary */ + dict = dict_new(); + + ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + "failed to " + "unserialize req-buffer to dictionary"); + snprintf(msg, sizeof(msg), + "Unable to decode the " + "command"); + goto out; + } else { + dict->extra_stdfree = cli_req.dict.dict_val; + } + } + + ret = dict_get_str(dict, "volname", &volname); + if (ret) { + snprintf(msg, sizeof(msg), "Unable to get volume name"); + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Unable to get volume name, " + "while handling bitrot command"); + goto out; + } + + ret = dict_get_int32(dict, "type", &type); + if (ret) { + snprintf(msg, sizeof(msg), "Unable to get type of command"); + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Unable to get type of cmd, " + "while handling bitrot command"); + goto out; + } + + if (conf->op_version < GD_OP_VERSION_3_7_0) { + snprintf(msg, sizeof(msg), + "Cannot execute command. The " + "cluster is operating at version %d. Bitrot command " + "%s is unavailable in this version", + conf->op_version, gd_bitrot_op_list[type]); + ret = -1; + goto out; + } + + if (type == GF_BITROT_CMD_SCRUB_STATUS) { + /* Backward compatibility handling for scrub status command*/ + if (conf->op_version < GD_OP_VERSION_3_7_7) { + snprintf(msg, sizeof(msg), + "Cannot execute command. " + "The cluster is operating at version %d. " + "Bitrot scrub status command unavailable in " + "this version", + conf->op_version); + ret = -1; + goto out; + } + + ret = dict_get_str(dict, "scrub-value", &scrub); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get scrub value."); + ret = -1; + goto out; + } + + if (!strncmp(scrub, "status", SLEN("status"))) { + ret = glusterd_op_begin_synctask(req, GD_OP_SCRUB_STATUS, dict); + goto out; + } + } + + if (type == GF_BITROT_CMD_SCRUB_ONDEMAND) { + /* Backward compatibility handling for scrub status command*/ + if (conf->op_version < GD_OP_VERSION_3_9_0) { + snprintf(msg, sizeof(msg), + "Cannot execute command. " + "The cluster is operating at version %d. " + "Bitrot scrub ondemand command unavailable in " + "this version", + conf->op_version); + ret = -1; + goto out; + } + + ret = dict_get_str(dict, "scrub-value", &scrub); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get scrub value."); + ret = -1; + goto out; + } + + if (!strncmp(scrub, "ondemand", SLEN("ondemand"))) { + ret = glusterd_op_begin_synctask(req, GD_OP_SCRUB_ONDEMAND, dict); + goto out; + } + } + + ret = glusterd_op_begin_synctask(req, GD_OP_BITROT, dict); + +out: + if (ret) { + if (msg[0] == '\0') + snprintf(msg, sizeof(msg), "Bitrot operation failed"); + ret = glusterd_op_send_cli_response(cli_op, ret, 0, req, dict, msg); + } + + return ret; +} + +int +glusterd_handle_bitrot(rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler(req, __glusterd_handle_bitrot); +} + +static int +glusterd_bitrot_scrub_throttle(glusterd_volinfo_t *volinfo, dict_t *dict, + char *key, char **op_errstr) +{ + int32_t ret = -1; + char *scrub_throttle = NULL; + char *option = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + ret = dict_get_str(dict, "scrub-throttle-value", &scrub_throttle); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Unable to fetch scrub-" + "throttle value"); + goto out; + } + + option = gf_strdup(scrub_throttle); + ret = dict_set_dynstr(volinfo->dict, key, option); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Failed to set option %s", key); + goto out; + } + + ret = glusterd_scrubsvc_reconfigure(); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SCRUBSVC_RECONF_FAIL, + "Failed to reconfigure scrub " + "services"); + goto out; + } + +out: + return ret; +} + +static int +glusterd_bitrot_scrub_freq(glusterd_volinfo_t *volinfo, dict_t *dict, char *key, + char **op_errstr) +{ + int32_t ret = -1; + char *scrub_freq = NULL; + xlator_t *this = NULL; + char *option = NULL; + + this = THIS; + GF_ASSERT(this); + + ret = dict_get_str(dict, "scrub-frequency-value", &scrub_freq); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Unable to fetch scrub-" + "freq value"); + goto out; + } + + option = gf_strdup(scrub_freq); + ret = dict_set_dynstr(volinfo->dict, key, option); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Failed to set option %s", key); + goto out; + } + + ret = glusterd_scrubsvc_reconfigure(); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SCRUBSVC_RECONF_FAIL, + "Failed to reconfigure scrub " + "services"); + goto out; + } + +out: + return ret; +} + +static int +glusterd_bitrot_scrub(glusterd_volinfo_t *volinfo, dict_t *dict, char *key, + char **op_errstr) +{ + int32_t ret = -1; + char *scrub_value = NULL; + xlator_t *this = NULL; + char *option = NULL; + + this = THIS; + GF_ASSERT(this); + + ret = dict_get_str(dict, "scrub-value", &scrub_value); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to fetch scrub" + "value"); + goto out; + } + + if (!strcmp(scrub_value, "resume")) { + option = gf_strdup("Active"); + } else { + option = gf_strdup(scrub_value); + } + + ret = dict_set_dynstr(volinfo->dict, key, option); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Failed to set option %s", key); + goto out; + } + + ret = glusterd_scrubsvc_reconfigure(); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SCRUBSVC_RECONF_FAIL, + "Failed to reconfigure scrub " + "services"); + goto out; + } + +out: + return ret; +} + +static int +glusterd_bitrot_expiry_time(glusterd_volinfo_t *volinfo, dict_t *dict, + char *key, char **op_errstr) +{ + int32_t ret = -1; + uint32_t expiry_time = 0; + xlator_t *this = NULL; + char dkey[32] = { + 0, + }; + + this = THIS; + GF_ASSERT(this); + + ret = dict_get_uint32(dict, "expiry-time", &expiry_time); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Unable to get bitrot expiry" + " timer value."); + goto out; + } + + snprintf(dkey, sizeof(dkey), "%d", expiry_time); + + ret = dict_set_dynstr_with_alloc(volinfo->dict, key, dkey); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Failed to set option %s", key); + goto out; + } + + ret = glusterd_bitdsvc_reconfigure(); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BITDSVC_RECONF_FAIL, + "Failed to reconfigure bitrot" + "services"); + goto out; + } +out: + return ret; +} + +static gf_boolean_t +is_bitd_configure_noop(xlator_t *this, glusterd_volinfo_t *volinfo) +{ + gf_boolean_t noop = _gf_true; + glusterd_brickinfo_t *brickinfo = NULL; + + if (!glusterd_is_bitrot_enabled(volinfo)) + goto out; + else if (volinfo->status != GLUSTERD_STATUS_STARTED) + goto out; + else { + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + if (!glusterd_is_local_brick(this, volinfo, brickinfo)) + continue; + noop = _gf_false; + return noop; + } + } +out: + return noop; +} + +static int +glusterd_bitrot_signer_threads(glusterd_volinfo_t *volinfo, dict_t *dict, + char *key, char **op_errstr) +{ + int32_t ret = -1; + uint32_t signer_th_count = 0; + uint32_t existing_th_count = 0; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + char dkey[32] = { + 0, + }; + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, out); + + ret = dict_get_uint32(dict, "signer-threads", &signer_th_count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Unable to get bitrot signer thread count."); + goto out; + } + + ret = dict_get_uint32(volinfo->dict, key, &existing_th_count); + if (ret == 0 && signer_th_count == existing_th_count) { + goto out; + } + + snprintf(dkey, sizeof(dkey), "%d", signer_th_count); + ret = dict_set_dynstr_with_alloc(volinfo->dict, key, dkey); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Failed to set option %s", key); + goto out; + } + + if (!is_bitd_configure_noop(this, volinfo)) { + ret = priv->bitd_svc.manager(&(priv->bitd_svc), NULL, + PROC_START_NO_WAIT); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BITDSVC_RECONF_FAIL, + "Failed to reconfigure bitrot services"); + goto out; + } + } +out: + return ret; +} + +static int +glusterd_bitrot_enable(glusterd_volinfo_t *volinfo, char **op_errstr) +{ + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + GF_VALIDATE_OR_GOTO(this->name, volinfo, out); + GF_VALIDATE_OR_GOTO(this->name, op_errstr, out); + + if (glusterd_is_volume_started(volinfo) == 0) { + *op_errstr = gf_strdup( + "Volume is stopped, start volume " + "to enable bitrot."); + ret = -1; + goto out; + } + + ret = glusterd_is_bitrot_enabled(volinfo); + if (ret) { + *op_errstr = gf_strdup("Bitrot is already enabled"); + ret = -1; + goto out; + } + + ret = dict_set_dynstr_with_alloc(volinfo->dict, VKEY_FEATURES_BITROT, "on"); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "dict set failed"); + goto out; + } + + /*Once bitrot is enable scrubber should be in Active state*/ + ret = dict_set_dynstr_with_alloc(volinfo->dict, "features.scrub", "Active"); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Failed to set option " + "features.scrub value"); + goto out; + } + + ret = 0; +out: + if (ret && op_errstr && !*op_errstr) + gf_asprintf(op_errstr, + "Enabling bitrot on volume %s has been " + "unsuccessful", + volinfo->volname); + return ret; +} + +static int +glusterd_bitrot_disable(glusterd_volinfo_t *volinfo, char **op_errstr) +{ + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + + GF_VALIDATE_OR_GOTO(this->name, volinfo, out); + GF_VALIDATE_OR_GOTO(this->name, op_errstr, out); + + ret = dict_set_dynstr_with_alloc(volinfo->dict, VKEY_FEATURES_BITROT, + "off"); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "dict set failed"); + goto out; + } + + /*Once bitrot disabled scrubber should be Inactive state*/ + ret = dict_set_dynstr_with_alloc(volinfo->dict, "features.scrub", + "Inactive"); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Failed to set " + "features.scrub value"); + goto out; + } + + ret = 0; +out: + if (ret && op_errstr && !*op_errstr) + gf_asprintf(op_errstr, + "Disabling bitrot on volume %s has " + "been unsuccessful", + volinfo->volname); + return ret; +} + +gf_boolean_t +glusterd_should_i_stop_bitd() +{ + glusterd_conf_t *conf = THIS->private; + glusterd_volinfo_t *volinfo = NULL; + gf_boolean_t stopped = _gf_true; + glusterd_brickinfo_t *brickinfo = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + cds_list_for_each_entry(volinfo, &conf->volumes, vol_list) + { + if (!glusterd_is_bitrot_enabled(volinfo)) + continue; + else if (volinfo->status != GLUSTERD_STATUS_STARTED) + continue; + else { + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + if (!glusterd_is_local_brick(this, volinfo, brickinfo)) + continue; + stopped = _gf_false; + return stopped; + } + + /* Before stopping bitrot/scrubber daemon check + * other volume also whether respective volume + * host a brick from this node or not.*/ + continue; + } + } + + return stopped; +} + +static int +glusterd_manage_bitrot(int opcode) +{ + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + GF_ASSERT(priv); + + switch (opcode) { + case GF_BITROT_OPTION_TYPE_ENABLE: + case GF_BITROT_OPTION_TYPE_DISABLE: + ret = priv->bitd_svc.manager(&(priv->bitd_svc), NULL, + PROC_START_NO_WAIT); + if (ret) + break; + ret = priv->scrub_svc.manager(&(priv->scrub_svc), NULL, + PROC_START_NO_WAIT); + break; + default: + ret = 0; + break; + } + + return ret; +} + +int +glusterd_op_bitrot(dict_t *dict, char **op_errstr, dict_t *rsp_dict) +{ + glusterd_volinfo_t *volinfo = NULL; + int32_t ret = -1; + char *volname = NULL; + int type = -1; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + GF_ASSERT(dict); + GF_ASSERT(op_errstr); + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + ret = dict_get_str(dict, "volname", &volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Unable to get volume name"); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + gf_asprintf(op_errstr, FMTSTR_CHECK_VOL_EXISTS, volname); + goto out; + } + + ret = dict_get_int32(dict, "type", &type); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Unable to get type from " + "dict"); + goto out; + } + + switch (type) { + case GF_BITROT_OPTION_TYPE_ENABLE: + ret = glusterd_bitrot_enable(volinfo, op_errstr); + if (ret < 0) + goto out; + break; + + case GF_BITROT_OPTION_TYPE_DISABLE: + ret = glusterd_bitrot_disable(volinfo, op_errstr); + if (ret < 0) + goto out; + + break; + + case GF_BITROT_OPTION_TYPE_SCRUB_THROTTLE: + ret = glusterd_bitrot_scrub_throttle( + volinfo, dict, "features.scrub-throttle", op_errstr); + if (ret) + goto out; + break; + + case GF_BITROT_OPTION_TYPE_SCRUB_FREQ: + ret = glusterd_bitrot_scrub_freq(volinfo, dict, + "features.scrub-freq", op_errstr); + if (ret) + goto out; + break; + + case GF_BITROT_OPTION_TYPE_SCRUB: + ret = glusterd_bitrot_scrub(volinfo, dict, "features.scrub", + op_errstr); + if (ret) + goto out; + break; + + case GF_BITROT_OPTION_TYPE_EXPIRY_TIME: + ret = glusterd_bitrot_expiry_time( + volinfo, dict, "features.expiry-time", op_errstr); + if (ret) + goto out; + break; + + case GF_BITROT_OPTION_TYPE_SIGNER_THREADS: + ret = glusterd_bitrot_signer_threads( + volinfo, dict, "features.signer-threads", op_errstr); + if (ret) + goto out; + break; + + case GF_BITROT_CMD_SCRUB_STATUS: + case GF_BITROT_CMD_SCRUB_ONDEMAND: + break; + + default: + gf_asprintf(op_errstr, + "Bitrot command failed. Invalid " + "opcode"); + ret = -1; + goto out; + } + + ret = glusterd_manage_bitrot(type); + if (ret) + goto out; + + ret = glusterd_create_volfiles_and_notify_services(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL, + "Unable to re-create " + "volfiles"); + ret = -1; + goto out; + } + + ret = glusterd_store_volinfo(volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) { + gf_msg_debug(this->name, 0, + "Failed to store volinfo for " + "bitrot"); + goto out; + } + +out: + return ret; +} + +int +glusterd_op_stage_bitrot(dict_t *dict, char **op_errstr, dict_t *rsp_dict) +{ + int ret = 0; + char *volname = NULL; + char *scrub_cmd = NULL; + char *scrub_cmd_from_dict = NULL; + char msg[2048] = { + 0, + }; + int type = 0; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *volinfo = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + GF_ASSERT(dict); + GF_ASSERT(op_errstr); + + ret = dict_get_str(dict, "volname", &volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Unable to get volume name"); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + gf_asprintf(op_errstr, FMTSTR_CHECK_VOL_EXISTS, volname); + goto out; + } + + if (!glusterd_is_volume_started(volinfo)) { + *op_errstr = gf_strdup( + "Volume is stopped, start volume " + "before executing bit rot command."); + ret = -1; + goto out; + } + + ret = dict_get_int32(dict, "type", &type); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Unable to get type for " + "operation"); + + *op_errstr = gf_strdup( + "Staging stage failed for bitrot " + "operation."); + goto out; + } + + if ((GF_BITROT_OPTION_TYPE_ENABLE != type) && + (glusterd_is_bitrot_enabled(volinfo) == 0)) { + ret = -1; + gf_asprintf(op_errstr, "Bitrot is not enabled on volume %s", volname); + goto out; + } + + if ((GF_BITROT_OPTION_TYPE_SCRUB == type)) { + ret = dict_get_str(volinfo->dict, "features.scrub", + &scrub_cmd_from_dict); + if (!ret) { + ret = dict_get_str(dict, "scrub-value", &scrub_cmd); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Unable to " + "get scrub-value"); + *op_errstr = gf_strdup( + "Staging failed for " + "bitrot operation. " + "Please check log file" + " for more details."); + goto out; + } + /* If scrubber is resume then value of scrubber will be + * "Active" in the dictionary. */ + if (!strcmp(scrub_cmd_from_dict, scrub_cmd) || + (!strncmp("Active", scrub_cmd_from_dict, SLEN("Active")) && + !strncmp("resume", scrub_cmd, SLEN("resume")))) { + snprintf(msg, sizeof(msg), + "Scrub is already" + " %sd for volume %s", + scrub_cmd, volinfo->volname); + *op_errstr = gf_strdup(msg); + ret = -1; + goto out; + } + } + ret = 0; + } + +out: + if (ret && op_errstr && *op_errstr) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OP_STAGE_BITROT_FAIL, "%s", + *op_errstr); + gf_msg_debug(this->name, 0, "Returning %d", ret); + + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c new file mode 100644 index 00000000000..e56cd0e6c74 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c @@ -0,0 +1,2796 @@ +/* + Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#include <glusterfs/common-utils.h> +#include "cli1-xdr.h" +#include "xdr-generic.h" +#include "glusterd.h" +#include "glusterd-op-sm.h" +#include "glusterd-geo-rep.h" +#include "glusterd-store.h" +#include "glusterd-mgmt.h" +#include "glusterd-utils.h" +#include "glusterd-volgen.h" +#include "glusterd-svc-helper.h" +#include "glusterd-messages.h" +#include "glusterd-server-quorum.h" +#include <glusterfs/run.h> +#include <glusterfs/syscall.h> +#include <sys/signal.h> + +/* misc */ + +/* In this function, we decide, based on the 'count' of the brick, + where to add it in the current volume. 'count' tells us already + how many of the given bricks are added. other argument are self- + descriptive. */ +int +add_brick_at_right_order(glusterd_brickinfo_t *brickinfo, + glusterd_volinfo_t *volinfo, int count, + int32_t stripe_cnt, int32_t replica_cnt) +{ + int idx = 0; + int i = 0; + int sub_cnt = 0; + glusterd_brickinfo_t *brick = NULL; + + /* The complexity of the function is in deciding at which index + to add new brick. Even though it can be defined with a complex + single formula for all volume, it is separated out to make it + more readable */ + if (stripe_cnt) { + /* common formula when 'stripe_count' is set */ + /* idx = ((count / ((stripe_cnt * volinfo->replica_count) - + volinfo->dist_leaf_count)) * volinfo->dist_leaf_count) + + (count + volinfo->dist_leaf_count); + */ + + sub_cnt = volinfo->dist_leaf_count; + + idx = ((count / ((stripe_cnt * volinfo->replica_count) - sub_cnt)) * + sub_cnt) + + (count + sub_cnt); + + goto insert_brick; + } + + /* replica count is set */ + /* common formula when 'replica_count' is set */ + /* idx = ((count / (replica_cnt - existing_replica_count)) * + existing_replica_count) + + (count + existing_replica_count); + */ + + sub_cnt = volinfo->replica_count; + idx = (count / (replica_cnt - sub_cnt) * sub_cnt) + (count + sub_cnt); + +insert_brick: + i = 0; + cds_list_for_each_entry(brick, &volinfo->bricks, brick_list) + { + i++; + if (i < idx) + continue; + gf_msg_debug(THIS->name, 0, "brick:%s index=%d, count=%d", brick->path, + idx, count); + + cds_list_add(&brickinfo->brick_list, &brick->brick_list); + break; + } + + return 0; +} + +static int +gd_addbr_validate_replica_count(glusterd_volinfo_t *volinfo, int replica_count, + int arbiter_count, int total_bricks, int *type, + char *err_str, int err_len) +{ + int ret = -1; + + /* replica count is set */ + switch (volinfo->type) { + case GF_CLUSTER_TYPE_NONE: + if ((volinfo->brick_count * replica_count) == total_bricks) { + /* Change the volume type */ + *type = GF_CLUSTER_TYPE_REPLICATE; + gf_msg(THIS->name, GF_LOG_INFO, 0, + GD_MSG_VOL_TYPE_CHANGING_INFO, + "Changing the type of volume %s from " + "'distribute' to 'replica'", + volinfo->volname); + ret = 0; + goto out; + + } else { + snprintf(err_str, err_len, + "Incorrect number of " + "bricks (%d) supplied for replica count (%d).", + (total_bricks - volinfo->brick_count), replica_count); + gf_msg(THIS->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, + "%s", err_str); + goto out; + } + break; + case GF_CLUSTER_TYPE_REPLICATE: + if (replica_count < volinfo->replica_count) { + snprintf(err_str, err_len, + "Incorrect replica count (%d) supplied. " + "Volume already has (%d)", + replica_count, volinfo->replica_count); + gf_msg(THIS->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, + "%s", err_str); + goto out; + } + if (replica_count == volinfo->replica_count) { + if (arbiter_count && !volinfo->arbiter_count) { + snprintf(err_str, err_len, + "Cannot convert replica 3 volume " + "to arbiter volume."); + gf_msg(THIS->name, GF_LOG_ERROR, EINVAL, + GD_MSG_INVALID_ENTRY, "%s", err_str); + goto out; + } + if (!(total_bricks % volinfo->dist_leaf_count)) { + ret = 1; + goto out; + } + } + if (replica_count > volinfo->replica_count) { + /* We have to make sure before and after 'add-brick', + the number or subvolumes for distribute will remain + same, when replica count is given */ + if ((total_bricks * volinfo->dist_leaf_count) == + (volinfo->brick_count * + (replica_count * volinfo->stripe_count))) { + /* Change the dist_leaf_count */ + gf_msg(THIS->name, GF_LOG_INFO, 0, + GD_MSG_REPLICA_COUNT_CHANGE_INFO, + "Changing the replica count of " + "volume %s from %d to %d", + volinfo->volname, volinfo->replica_count, + replica_count); + ret = 0; + goto out; + } + } + break; + case GF_CLUSTER_TYPE_DISPERSE: + snprintf(err_str, err_len, + "Volume %s cannot be converted " + "from dispersed to replicated-" + "dispersed", + volinfo->volname); + gf_msg(THIS->name, GF_LOG_ERROR, EPERM, GD_MSG_OP_NOT_PERMITTED, + "%s", err_str); + goto out; + } +out: + return ret; +} + +static int +gd_rmbr_validate_replica_count(glusterd_volinfo_t *volinfo, + int32_t replica_count, int32_t brick_count, + char *err_str, size_t err_len) +{ + int ret = -1; + int replica_nodes = 0; + xlator_t *this = NULL; + this = THIS; + GF_ASSERT(this); + + switch (volinfo->type) { + case GF_CLUSTER_TYPE_NONE: + case GF_CLUSTER_TYPE_DISPERSE: + snprintf(err_str, err_len, + "replica count (%d) option given for non replicate " + "volume %s", + replica_count, volinfo->volname); + gf_smsg(this->name, GF_LOG_WARNING, EINVAL, GD_MSG_INVALID_ARGUMENT, + err_str, NULL); + goto out; + + case GF_CLUSTER_TYPE_REPLICATE: + /* in remove brick, you can only reduce the replica count */ + if (replica_count > volinfo->replica_count) { + snprintf(err_str, err_len, + "given replica count (%d) option is more " + "than volume %s's replica count (%d)", + replica_count, volinfo->volname, + volinfo->replica_count); + gf_smsg(this->name, GF_LOG_WARNING, EINVAL, + GD_MSG_INVALID_ARGUMENT, err_str, NULL); + goto out; + } + if (replica_count == volinfo->replica_count) { + /* This means the 'replica N' option on CLI was + redundant. Check if the total number of bricks given + for removal is same as 'dist_leaf_count' */ + if (brick_count % volinfo->dist_leaf_count) { + snprintf(err_str, err_len, + "number of bricks provided (%d) is " + "not valid. need at least %d " + "(or %dxN)", + brick_count, volinfo->dist_leaf_count, + volinfo->dist_leaf_count); + gf_smsg(this->name, GF_LOG_WARNING, EINVAL, + GD_MSG_INVALID_ARGUMENT, err_str, NULL); + goto out; + } + ret = 1; + goto out; + } + + replica_nodes = ((volinfo->brick_count / volinfo->replica_count) * + (volinfo->replica_count - replica_count)); + + if (brick_count % replica_nodes) { + snprintf(err_str, err_len, + "need %d(xN) bricks for reducing replica " + "count of the volume from %d to %d", + replica_nodes, volinfo->replica_count, replica_count); + gf_smsg(this->name, GF_LOG_WARNING, EINVAL, + GD_MSG_INVALID_ARGUMENT, err_str, NULL); + goto out; + } + break; + } + + ret = 0; +out: + return ret; +} + +/* Handler functions */ +int +__glusterd_handle_add_brick(rpcsvc_request_t *req) +{ + int32_t ret = -1; + gf_cli_req cli_req = {{ + 0, + }}; + dict_t *dict = NULL; + char *bricks = NULL; + char *volname = NULL; + int brick_count = 0; + void *cli_rsp = NULL; + char err_str[2048] = ""; + gf_cli_rsp rsp = { + 0, + }; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + int total_bricks = 0; + int32_t replica_count = 0; + int32_t arbiter_count = 0; + int32_t stripe_count = 0; + int type = 0; + glusterd_conf_t *conf = NULL; + + this = THIS; + GF_ASSERT(this); + + GF_ASSERT(req); + + conf = this->private; + GF_ASSERT(conf); + + ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + // failed to decode msg; + req->rpc_err = GARBAGE_ARGS; + snprintf(err_str, sizeof(err_str), "Garbage args received"); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL); + goto out; + } + + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_ADD_BRICK_REQ_RECVD, + "Received add brick req"); + + if (cli_req.dict.dict_len) { + /* Unserialize the dictionary */ + dict = dict_new(); + + ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_UNSERIALIZE_FAIL, + "failed to " + "unserialize req-buffer to dictionary"); + snprintf(err_str, sizeof(err_str), + "Unable to decode " + "the command"); + goto out; + } + } + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + + if (ret) { + snprintf(err_str, sizeof(err_str), + "Unable to get volume " + "name"); + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, "%s", + err_str); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + snprintf(err_str, sizeof(err_str), + "Unable to get volinfo " + "for volume name %s", + volname); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, "%s", + err_str); + goto out; + } + + ret = dict_get_int32n(dict, "count", SLEN("count"), &brick_count); + if (ret) { + snprintf(err_str, sizeof(err_str), + "Unable to get volume " + "brick count"); + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, "%s", + err_str); + goto out; + } + + ret = dict_get_int32n(dict, "replica-count", SLEN("replica-count"), + &replica_count); + if (!ret) { + gf_msg(this->name, GF_LOG_INFO, errno, GD_MSG_DICT_GET_SUCCESS, + "replica-count is %d", replica_count); + } + + ret = dict_get_int32n(dict, "arbiter-count", SLEN("arbiter-count"), + &arbiter_count); + if (!ret) { + gf_msg(this->name, GF_LOG_INFO, errno, GD_MSG_DICT_GET_SUCCESS, + "arbiter-count is %d", arbiter_count); + } + + ret = dict_get_int32n(dict, "stripe-count", SLEN("stripe-count"), + &stripe_count); + if (!ret) { + gf_msg(this->name, GF_LOG_INFO, errno, GD_MSG_DICT_GET_SUCCESS, + "stripe-count is %d", stripe_count); + } + + if (!dict_getn(dict, "force", SLEN("force"))) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Failed to get flag"); + goto out; + } + + total_bricks = volinfo->brick_count + brick_count; + + if (!stripe_count && !replica_count) { + if (volinfo->type == GF_CLUSTER_TYPE_NONE) + goto brick_val; + + if ((volinfo->brick_count < volinfo->dist_leaf_count) && + (total_bricks <= volinfo->dist_leaf_count)) + goto brick_val; + + if ((brick_count % volinfo->dist_leaf_count) != 0) { + snprintf(err_str, sizeof(err_str), + "Incorrect number " + "of bricks supplied %d with count %d", + brick_count, volinfo->dist_leaf_count); + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_REPLICA, + "%s", err_str); + ret = -1; + goto out; + } + goto brick_val; + /* done with validation.. below section is if stripe|replica + count is given */ + } + + ret = gd_addbr_validate_replica_count(volinfo, replica_count, arbiter_count, + total_bricks, &type, err_str, + sizeof(err_str)); + if (ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COUNT_VALIDATE_FAILED, "%s", + err_str); + goto out; + } + + /* if replica count is same as earlier, set it back to 0 */ + if (ret == 1) + replica_count = 0; + + ret = dict_set_int32n(dict, "replica-count", SLEN("replica-count"), + replica_count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "failed to set the replica-count in dict"); + goto out; + } + +brick_val: + ret = dict_get_strn(dict, "bricks", SLEN("bricks"), &bricks); + if (ret) { + snprintf(err_str, sizeof(err_str), + "Unable to get volume " + "bricks"); + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, "%s", + err_str); + goto out; + } + + if (type != volinfo->type) { + ret = dict_set_int32n(dict, "type", SLEN("type"), type); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "failed to set the new type in dict"); + goto out; + } + } + + if (conf->op_version <= GD_OP_VERSION_3_7_5) { + gf_msg_debug(this->name, 0, + "The cluster is operating at " + "version less than or equal to %d. Falling back " + "to syncop framework.", + GD_OP_VERSION_3_7_5); + ret = glusterd_op_begin_synctask(req, GD_OP_ADD_BRICK, dict); + } else { + ret = glusterd_mgmt_v3_initiate_all_phases(req, GD_OP_ADD_BRICK, dict); + } + +out: + if (ret) { + rsp.op_ret = -1; + rsp.op_errno = 0; + if (err_str[0] == '\0') + snprintf(err_str, sizeof(err_str), "Operation failed"); + rsp.op_errstr = err_str; + cli_rsp = &rsp; + glusterd_to_cli(req, cli_rsp, NULL, 0, NULL, (xdrproc_t)xdr_gf_cli_rsp, + dict); + ret = 0; // sent error to cli, prevent second reply + } + + free(cli_req.dict.dict_val); // its malloced by xdr + + return ret; +} + +int +glusterd_handle_add_brick(rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler(req, __glusterd_handle_add_brick); +} + +static int +subvol_matcher_init(int **subvols, int count) +{ + int ret = -1; + + *subvols = GF_CALLOC(count, sizeof(int), gf_gld_mt_int); + if (*subvols) + ret = 0; + + return ret; +} + +static void +subvol_matcher_update(int *subvols, glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo) +{ + glusterd_brickinfo_t *tmp = NULL; + int32_t sub_volume = 0; + int pos = 0; + if (subvols) { + cds_list_for_each_entry(tmp, &volinfo->bricks, brick_list) + { + if (strcmp(tmp->hostname, brickinfo->hostname) || + strcmp(tmp->path, brickinfo->path)) { + pos++; + continue; + } + gf_msg_debug(THIS->name, 0, LOGSTR_FOUND_BRICK, brickinfo->hostname, + brickinfo->path, volinfo->volname); + sub_volume = (pos / volinfo->dist_leaf_count); + subvols[sub_volume]++; + break; + } + } +} + +static int +subvol_matcher_verify(int *subvols, glusterd_volinfo_t *volinfo, char *err_str, + size_t err_len, char *vol_type, int replica_count) +{ + int i = 0; + int ret = 0; + int count = volinfo->replica_count - replica_count; + xlator_t *this = THIS; + GF_ASSERT(this); + + if (replica_count && subvols) { + for (i = 0; i < volinfo->subvol_count; i++) { + if (subvols[i] != count) { + ret = -1; + snprintf(err_str, err_len, + "Remove exactly %d" + " brick(s) from each subvolume.", + count); + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_BRICK_SUBVOL_VERIFY_FAIL, err_str, NULL); + break; + } + } + return ret; + } + + do { + if (subvols && (subvols[i] % volinfo->dist_leaf_count == 0)) { + continue; + } else { + ret = -1; + snprintf(err_str, err_len, "Bricks not from same subvol for %s", + vol_type); + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_BRICK_SUBVOL_VERIFY_FAIL, err_str, NULL); + break; + } + } while (++i < volinfo->subvol_count); + + return ret; +} + +static void +subvol_matcher_destroy(int *subvols) +{ + GF_FREE(subvols); +} + +static int +glusterd_remove_brick_validate_arbiters(glusterd_volinfo_t *volinfo, + int32_t count, int32_t replica_count, + glusterd_brickinfo_t **brickinfo_list, + char *err_str, size_t err_len) +{ + int i = 0; + int ret = 0; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_brickinfo_t *last = NULL; + char *arbiter_array = NULL; + xlator_t *this = NULL; + this = THIS; + GF_ASSERT(this); + + if (volinfo->type != GF_CLUSTER_TYPE_REPLICATE) + goto out; + + if (!replica_count || !volinfo->arbiter_count) + goto out; + + if (replica_count == 2) { + /* If it is an arbiter to replica 2 conversion, only permit + * removal of the arbiter brick.*/ + for (i = 0; i < count; i++) { + brickinfo = brickinfo_list[i]; + last = get_last_brick_of_brick_group(volinfo, brickinfo); + if (last != brickinfo) { + snprintf(err_str, err_len, + "Remove arbiter " + "brick(s) only when converting from " + "arbiter to replica 2 subvolume."); + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_REMOVE_ARBITER_BRICK, err_str, NULL); + ret = -1; + goto out; + } + } + } else if (replica_count == 1) { + /* If it is an arbiter to plain distribute conversion, in every + * replica subvol, the arbiter has to be one of the bricks that + * are removed. */ + arbiter_array = GF_CALLOC(volinfo->subvol_count, sizeof(*arbiter_array), + gf_common_mt_char); + if (!arbiter_array) + return -1; + for (i = 0; i < count; i++) { + brickinfo = brickinfo_list[i]; + last = get_last_brick_of_brick_group(volinfo, brickinfo); + if (last == brickinfo) + arbiter_array[brickinfo->group] = 1; + } + for (i = 0; i < volinfo->subvol_count; i++) + if (!arbiter_array[i]) { + snprintf(err_str, err_len, + "Removed bricks " + "must contain arbiter when converting" + " to plain distribute."); + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_REMOVE_ARBITER_BRICK, err_str, NULL); + ret = -1; + break; + } + GF_FREE(arbiter_array); + } + +out: + return ret; +} + +int +__glusterd_handle_remove_brick(rpcsvc_request_t *req) +{ + int32_t ret = -1; + gf_cli_req cli_req = {{ + 0, + }}; + dict_t *dict = NULL; + int32_t count = 0; + char *brick = NULL; + char key[64] = ""; + int keylen; + int i = 1; + glusterd_conf_t *conf = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_brickinfo_t **brickinfo_list = NULL; + int *subvols = NULL; + char err_str[2048] = ""; + gf_cli_rsp rsp = { + 0, + }; + void *cli_rsp = NULL; + char vol_type[256] = ""; + int32_t replica_count = 0; + char *volname = 0; + xlator_t *this = NULL; + int cmd = -1; + + GF_ASSERT(req); + this = THIS; + GF_ASSERT(this); + conf = this->private; + GF_ASSERT(conf); + + ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + // failed to decode msg; + req->rpc_err = GARBAGE_ARGS; + snprintf(err_str, sizeof(err_str), "Received garbage args"); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL); + goto out; + } + + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_REM_BRICK_REQ_RECVD, + "Received rem brick req"); + + if (cli_req.dict.dict_len) { + /* Unserialize the dictionary */ + dict = dict_new(); + + ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_UNSERIALIZE_FAIL, + "failed to " + "unserialize req-buffer to dictionary"); + snprintf(err_str, sizeof(err_str), + "Unable to decode " + "the command"); + goto out; + } + } + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + snprintf(err_str, sizeof(err_str), + "Unable to get volume " + "name"); + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, "%s", + err_str); + goto out; + } + + ret = dict_get_int32n(dict, "count", SLEN("count"), &count); + if (ret) { + snprintf(err_str, sizeof(err_str), + "Unable to get brick " + "count"); + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, "%s", + err_str); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + snprintf(err_str, sizeof(err_str), "Volume %s does not exist", volname); + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND, "%s", + err_str); + goto out; + } + + ret = dict_get_int32n(dict, "command", SLEN("command"), &cmd); + if (ret) { + snprintf(err_str, sizeof(err_str), + "Unable to get cmd " + "ccommand"); + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, "%s", + err_str); + goto out; + } + + ret = dict_get_int32n(dict, "replica-count", SLEN("replica-count"), + &replica_count); + if (!ret) { + gf_msg(this->name, GF_LOG_INFO, errno, GD_MSG_DICT_GET_FAILED, + "request to change replica-count to %d", replica_count); + ret = gd_rmbr_validate_replica_count(volinfo, replica_count, count, + err_str, sizeof(err_str)); + if (ret < 0) { + /* logging and error msg are done in above function + itself */ + goto out; + } + dict_deln(dict, "replica-count", SLEN("replica-count")); + if (ret) { + replica_count = 0; + } else { + ret = dict_set_int32n(dict, "replica-count", SLEN("replica-count"), + replica_count); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, + GD_MSG_DICT_SET_FAILED, + "failed to set the replica_count " + "in dict"); + goto out; + } + } + } + + /* 'vol_type' is used for giving the meaning full error msg for user */ + if (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) { + strcpy(vol_type, "replica"); + } else if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE) { + strcpy(vol_type, "disperse"); + } else { + strcpy(vol_type, "distribute"); + } + + if (!replica_count && (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) && + (volinfo->brick_count == volinfo->dist_leaf_count)) { + snprintf(err_str, sizeof(err_str), + "Removing bricks from replicate configuration " + "is not allowed without reducing replica count " + "explicitly."); + gf_msg(this->name, GF_LOG_ERROR, EPERM, GD_MSG_OP_NOT_PERMITTED_AC_REQD, + "%s", err_str); + ret = -1; + goto out; + } + + /* Do not allow remove-brick if the bricks given is less than + the replica count or stripe count */ + if (!replica_count && (volinfo->type != GF_CLUSTER_TYPE_NONE)) { + if (volinfo->dist_leaf_count && (count % volinfo->dist_leaf_count)) { + snprintf(err_str, sizeof(err_str), + "Remove brick " + "incorrect brick count of %d for %s %d", + count, vol_type, volinfo->dist_leaf_count); + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, "%s", + err_str); + ret = -1; + goto out; + } + } + + if ((volinfo->type != GF_CLUSTER_TYPE_NONE) && + (volinfo->subvol_count > 1)) { + ret = subvol_matcher_init(&subvols, volinfo->subvol_count); + if (ret) + goto out; + } + + brickinfo_list = GF_CALLOC(count, sizeof(*brickinfo_list), + gf_common_mt_pointer); + if (!brickinfo_list) { + ret = -1; + goto out; + } + + while (i <= count) { + keylen = snprintf(key, sizeof(key), "brick%d", i); + ret = dict_get_strn(dict, key, keylen, &brick); + if (ret) { + snprintf(err_str, sizeof(err_str), "Unable to get %s", key); + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "%s", err_str); + goto out; + } + gf_msg_debug(this->name, 0, + "Remove brick count %d brick:" + " %s", + i, brick); + + ret = glusterd_volume_brickinfo_get_by_brick(brick, volinfo, &brickinfo, + _gf_false); + + if (ret) { + snprintf(err_str, sizeof(err_str), + "Incorrect brick " + "%s for volume %s", + brick, volname); + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_BRICK_NOT_FOUND, + "%s", err_str); + goto out; + } + brickinfo_list[i - 1] = brickinfo; + + i++; + if ((volinfo->type == GF_CLUSTER_TYPE_NONE) || + (volinfo->brick_count <= volinfo->dist_leaf_count)) + continue; + + subvol_matcher_update(subvols, volinfo, brickinfo); + } + + if ((volinfo->type != GF_CLUSTER_TYPE_NONE) && + (volinfo->subvol_count > 1)) { + ret = subvol_matcher_verify(subvols, volinfo, err_str, sizeof(err_str), + vol_type, replica_count); + if (ret) + goto out; + } + + ret = glusterd_remove_brick_validate_arbiters(volinfo, count, replica_count, + brickinfo_list, err_str, + sizeof(err_str)); + if (ret) + goto out; + + if (conf->op_version < GD_OP_VERSION_8_0) { + gf_msg_debug(this->name, 0, + "The cluster is operating at " + "version less than %d. remove-brick operation" + "falling back to syncop framework.", + GD_OP_VERSION_8_0); + ret = glusterd_op_begin_synctask(req, GD_OP_REMOVE_BRICK, dict); + } else { + ret = glusterd_mgmt_v3_initiate_all_phases(req, GD_OP_REMOVE_BRICK, + dict); + } + +out: + if (ret) { + rsp.op_ret = -1; + rsp.op_errno = 0; + if (err_str[0] == '\0') + snprintf(err_str, sizeof(err_str), "Operation failed"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLUSTERD_OP_FAILED, "%s", + err_str); + rsp.op_errstr = err_str; + cli_rsp = &rsp; + glusterd_to_cli(req, cli_rsp, NULL, 0, NULL, (xdrproc_t)xdr_gf_cli_rsp, + dict); + + ret = 0; // sent error to cli, prevent second reply + } + + if (brickinfo_list) + GF_FREE(brickinfo_list); + subvol_matcher_destroy(subvols); + free(cli_req.dict.dict_val); // its malloced by xdr + + return ret; +} + +int +glusterd_handle_remove_brick(rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler(req, __glusterd_handle_remove_brick); +} + +static int +_glusterd_restart_gsync_session(dict_t *this, char *key, data_t *value, + void *data) +{ + char *slave = NULL; + char *slave_buf = NULL; + char *path_list = NULL; + char *slave_vol = NULL; + char *slave_host = NULL; + char *slave_url = NULL; + char *conf_path = NULL; + char **errmsg = NULL; + int ret = -1; + glusterd_gsync_status_temp_t *param = NULL; + gf_boolean_t is_running = _gf_false; + + param = (glusterd_gsync_status_temp_t *)data; + + GF_ASSERT(param); + GF_ASSERT(param->volinfo); + + slave = strchr(value->data, ':'); + if (slave) { + slave++; + slave_buf = gf_strdup(slave); + if (!slave_buf) { + gf_msg("glusterd", GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "Failed to gf_strdup"); + ret = -1; + goto out; + } + } else + return 0; + + ret = dict_set_dynstrn(param->rsp_dict, "slave", SLEN("slave"), slave_buf); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Unable to store slave"); + if (slave_buf) + GF_FREE(slave_buf); + goto out; + } + + ret = glusterd_get_slave_details_confpath(param->volinfo, param->rsp_dict, + &slave_url, &slave_host, + &slave_vol, &conf_path, errmsg); + if (ret) { + if (errmsg && *errmsg) + gf_msg("glusterd", GF_LOG_ERROR, 0, + GD_MSG_SLAVE_CONFPATH_DETAILS_FETCH_FAIL, "%s", *errmsg); + else + gf_msg("glusterd", GF_LOG_ERROR, 0, + GD_MSG_SLAVE_CONFPATH_DETAILS_FETCH_FAIL, + "Unable to fetch slave or confpath details."); + goto out; + } + + /* In cases that gsyncd is not running, we will not invoke it + * because of add-brick. */ + ret = glusterd_check_gsync_running_local(param->volinfo->volname, slave, + conf_path, &is_running); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_GSYNC_VALIDATION_FAIL, + "gsync running validation failed."); + goto out; + } + if (_gf_false == is_running) { + gf_msg_debug("glusterd", 0, + "gsync session for %s and %s is" + " not running on this node. Hence not restarting.", + param->volinfo->volname, slave); + ret = 0; + goto out; + } + + ret = glusterd_get_local_brickpaths(param->volinfo, &path_list); + if (!path_list) { + gf_msg_debug("glusterd", 0, + "This node not being part of" + " volume should not be running gsyncd. Hence" + " no gsyncd process to restart."); + ret = 0; + goto out; + } + + ret = glusterd_check_restart_gsync_session( + param->volinfo, slave, param->rsp_dict, path_list, conf_path, 0); + if (ret) + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_GSYNC_RESTART_FAIL, + "Unable to restart gsync session."); + +out: + gf_msg_debug("glusterd", 0, "Returning %d.", ret); + return ret; +} + +/* op-sm */ + +int +glusterd_op_perform_add_bricks(glusterd_volinfo_t *volinfo, int32_t count, + char *bricks, dict_t *dict) +{ + char *brick = NULL; + int32_t i = 1; + char *brick_list = NULL; + char *free_ptr1 = NULL; + char *free_ptr2 = NULL; + char *saveptr = NULL; + int32_t ret = -1; + int32_t stripe_count = 0; + int32_t replica_count = 0; + int32_t arbiter_count = 0; + int32_t type = 0; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_gsync_status_temp_t param = { + 0, + }; + gf_boolean_t restart_needed = 0; + int brickid = 0; + char key[64] = ""; + char *brick_mount_dir = NULL; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + gf_boolean_t is_valid_add_brick = _gf_false; + gf_boolean_t restart_shd = _gf_false; + struct statvfs brickstat = { + 0, + }; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(volinfo); + + conf = this->private; + GF_ASSERT(conf); + + if (bricks) { + brick_list = gf_strdup(bricks); + free_ptr1 = brick_list; + } + + if (count) + brick = strtok_r(brick_list + 1, " \n", &saveptr); + + if (dict) { + ret = dict_get_int32n(dict, "stripe-count", SLEN("stripe-count"), + &stripe_count); + if (!ret) + gf_msg(THIS->name, GF_LOG_INFO, errno, GD_MSG_DICT_GET_SUCCESS, + "stripe-count is set %d", stripe_count); + + ret = dict_get_int32n(dict, "replica-count", SLEN("replica-count"), + &replica_count); + if (!ret) + gf_msg(THIS->name, GF_LOG_INFO, errno, GD_MSG_DICT_GET_SUCCESS, + "replica-count is set %d", replica_count); + ret = dict_get_int32n(dict, "arbiter-count", SLEN("arbiter-count"), + &arbiter_count); + if (!ret) + gf_msg(THIS->name, GF_LOG_INFO, errno, GD_MSG_DICT_GET_SUCCESS, + "arbiter-count is set %d", arbiter_count); + ret = dict_get_int32n(dict, "type", SLEN("type"), &type); + if (!ret) + gf_msg(THIS->name, GF_LOG_INFO, errno, GD_MSG_DICT_GET_SUCCESS, + "type is set %d, need to change it", type); + } + + brickid = glusterd_get_next_available_brickid(volinfo); + if (brickid < 0) + goto out; + while (i <= count) { + ret = glusterd_brickinfo_new_from_brick(brick, &brickinfo, _gf_true, + NULL); + if (ret) + goto out; + + GLUSTERD_ASSIGN_BRICKID_TO_BRICKINFO(brickinfo, volinfo, brickid++); + + /* A bricks mount dir is required only by snapshots which were + * introduced in gluster-3.6.0 + */ + if (conf->op_version >= GD_OP_VERSION_3_6_0) { + brick_mount_dir = NULL; + + snprintf(key, sizeof(key), "brick%d.mount_dir", i); + ret = dict_get_str(dict, key, &brick_mount_dir); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "%s not present", key); + goto out; + } + strncpy(brickinfo->mount_dir, brick_mount_dir, + SLEN(brickinfo->mount_dir)); + } + + ret = glusterd_resolve_brick(brickinfo); + if (ret) + goto out; + + if (!gf_uuid_compare(brickinfo->uuid, MY_UUID)) { + ret = sys_statvfs(brickinfo->path, &brickstat); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_STATVFS_FAILED, + "Failed to fetch disk utilization " + "from the brick (%s:%s). Please check the health of " + "the brick. Error code was %s", + brickinfo->hostname, brickinfo->path, strerror(errno)); + + goto out; + } + brickinfo->statfs_fsid = brickstat.f_fsid; + } + if (stripe_count || replica_count) { + add_brick_at_right_order(brickinfo, volinfo, (i - 1), stripe_count, + replica_count); + } else { + cds_list_add_tail(&brickinfo->brick_list, &volinfo->bricks); + } + brick = strtok_r(NULL, " \n", &saveptr); + i++; + volinfo->brick_count++; + } + + /* Gets changed only if the options are given in add-brick cli */ + if (type) + volinfo->type = type; + /* performance.client-io-threads is turned on by default, + * however this has adverse effects on replicate volumes due to + * replication design issues, till that get addressed + * performance.client-io-threads option is turned off for all + * replicate volumes if not already explicitly enabled. + */ + if (type && glusterd_is_volume_replicate(volinfo) && + conf->op_version >= GD_OP_VERSION_3_12_2) { + ret = dict_set_nstrn(volinfo->dict, "performance.client-io-threads", + SLEN("performance.client-io-threads"), "off", + SLEN("off")); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set " + "performance.client-io-threads to off"); + goto out; + } + } + + if (replica_count) { + volinfo->replica_count = replica_count; + } + if (arbiter_count) { + volinfo->arbiter_count = arbiter_count; + } + if (stripe_count) { + volinfo->stripe_count = stripe_count; + } + volinfo->dist_leaf_count = glusterd_get_dist_leaf_count(volinfo); + + /* backward compatibility */ + volinfo->sub_count = ((volinfo->dist_leaf_count == 1) + ? 0 + : volinfo->dist_leaf_count); + + volinfo->subvol_count = (volinfo->brick_count / volinfo->dist_leaf_count); + + ret = 0; + if (GLUSTERD_STATUS_STARTED != volinfo->status) + goto generate_volfiles; + + ret = generate_brick_volfiles(volinfo); + if (ret) + goto out; + + brick_list = gf_strdup(bricks); + free_ptr2 = brick_list; + i = 1; + + if (count) + brick = strtok_r(brick_list + 1, " \n", &saveptr); + + if (glusterd_is_volume_replicate(volinfo)) { + if (replica_count && conf->op_version >= GD_OP_VERSION_3_7_10) { + is_valid_add_brick = _gf_true; + if (volinfo->status == GLUSTERD_STATUS_STARTED) { + ret = volinfo->shd.svc.stop(&(volinfo->shd.svc), SIGTERM); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, + GD_MSG_GLUSTER_SERVICES_STOP_FAIL, + "Failed to stop shd for %s.", volinfo->volname); + } + restart_shd = _gf_true; + } + ret = generate_dummy_client_volfiles(volinfo); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL, + "Failed to create volfile."); + goto out; + } + } + } + + while (i <= count) { + ret = glusterd_volume_brickinfo_get_by_brick(brick, volinfo, &brickinfo, + _gf_true); + if (ret) + goto out; + + if (gf_uuid_is_null(brickinfo->uuid)) { + ret = glusterd_resolve_brick(brickinfo); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_RESOLVE_BRICK_FAIL, + FMTSTR_RESOLVE_BRICK, brickinfo->hostname, + brickinfo->path); + goto out; + } + } + + /* if the volume is a replicate volume, do: */ + if (is_valid_add_brick) { + if (!gf_uuid_compare(brickinfo->uuid, MY_UUID)) { + ret = glusterd_handle_replicate_brick_ops(volinfo, brickinfo, + GD_OP_ADD_BRICK); + if (ret < 0) + goto out; + } + } + ret = glusterd_brick_start(volinfo, brickinfo, _gf_true, _gf_false); + if (ret) + goto out; + i++; + brick = strtok_r(NULL, " \n", &saveptr); + + /* Check if the brick is added in this node, and set + * the restart_needed flag. */ + if ((!gf_uuid_compare(brickinfo->uuid, MY_UUID)) && !restart_needed) { + restart_needed = 1; + gf_msg_debug("glusterd", 0, + "Restart gsyncd session, if it's already " + "running."); + } + } + + /* If the restart_needed flag is set, restart gsyncd sessions for that + * particular master with all the slaves. */ + if (restart_needed) { + param.rsp_dict = dict; + param.volinfo = volinfo; + dict_foreach(volinfo->gsync_slaves, _glusterd_restart_gsync_session, + ¶m); + } + +generate_volfiles: + if (conf->op_version <= GD_OP_VERSION_3_7_5) { + ret = glusterd_create_volfiles_and_notify_services(volinfo); + } else { + /* + * The cluster is operating at version greater than + * gluster-3.7.5. So no need to sent volfile fetch + * request in commit phase, the same will be done + * in post validate phase with v3 framework. + */ + } + +out: + GF_FREE(free_ptr1); + GF_FREE(free_ptr2); + if (restart_shd) { + if (volinfo->shd.svc.manager(&(volinfo->shd.svc), volinfo, + PROC_START_NO_WAIT)) { + gf_msg("glusterd", GF_LOG_CRITICAL, 0, + GD_MSG_GLUSTER_SERVICE_START_FAIL, + "Failed to start shd for %s.", volinfo->volname); + } + } + + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; +} + +int +glusterd_op_perform_remove_brick(glusterd_volinfo_t *volinfo, char *brick, + int force, int *need_migrate) +{ + glusterd_brickinfo_t *brickinfo = NULL; + int32_t ret = -1; + glusterd_conf_t *priv = NULL; + + GF_ASSERT(volinfo); + GF_ASSERT(brick); + + priv = THIS->private; + GF_ASSERT(priv); + + ret = glusterd_volume_brickinfo_get_by_brick(brick, volinfo, &brickinfo, + _gf_false); + if (ret) + goto out; + + ret = glusterd_resolve_brick(brickinfo); + if (ret) + goto out; + + glusterd_volinfo_reset_defrag_stats(volinfo); + + if (!gf_uuid_compare(brickinfo->uuid, MY_UUID)) { + /* Only if the brick is in this glusterd, do the rebalance */ + if (need_migrate) + *need_migrate = 1; + } + + if (force) { + ret = glusterd_brick_stop(volinfo, brickinfo, _gf_true); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_STOP_FAIL, + "Unable to stop " + "glusterfs, ret: %d", + ret); + } + goto out; + } + + brickinfo->decommissioned = 1; + ret = 0; +out: + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; +} + +int +glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict) +{ + int ret = 0; + char *volname = NULL; + int count = 0; + int replica_count = 0; + int arbiter_count = 0; + int i = 0; + int32_t local_brick_count = 0; + char *bricks = NULL; + char *brick_list = NULL; + char *saveptr = NULL; + char *free_ptr = NULL; + char *brick = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + char msg[4096] = ""; + char key[64] = ""; + gf_boolean_t brick_alloc = _gf_false; + char *all_bricks = NULL; + char *str_ret = NULL; + gf_boolean_t is_force = _gf_false; + glusterd_conf_t *conf = NULL; + int32_t len = 0; + + this = THIS; + GF_ASSERT(this); + conf = this->private; + GF_ASSERT(conf); + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Unable to get volume name"); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, + "Unable to find volume: %s", volname); + goto out; + } + + ret = glusterd_validate_volume_id(dict, volinfo); + if (ret) + goto out; + + ret = dict_get_int32n(dict, "replica-count", SLEN("replica-count"), + &replica_count); + if (ret) { + gf_msg_debug(this->name, 0, "Unable to get replica count"); + } + + if (replica_count > 0) { + ret = op_version_check(this, GD_OP_VER_PERSISTENT_AFR_XATTRS, msg, + sizeof(msg)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OP_VERSION_MISMATCH, + "%s", msg); + *op_errstr = gf_strdup(msg); + goto out; + } + } + + glusterd_add_peers_to_auth_list(volname); + + if (replica_count && glusterd_is_volume_replicate(volinfo)) { + /* Do not allow add-brick for stopped volumes when replica-count + * is being increased. + */ + if (GLUSTERD_STATUS_STOPPED == volinfo->status && + conf->op_version >= GD_OP_VERSION_3_7_10) { + ret = -1; + snprintf(msg, sizeof(msg), + " Volume must not be in" + " stopped state when replica-count needs to " + " be increased."); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s", + msg); + *op_errstr = gf_strdup(msg); + goto out; + } + /* op-version check for replica 2 to arbiter conversion. If we + * don't have this check, an older peer added as arbiter brick + * will not have the arbiter xlator in its volfile. */ + if ((replica_count == 3) && (conf->op_version < GD_OP_VERSION_3_8_0)) { + ret = dict_get_int32n(dict, "arbiter-count", SLEN("arbiter-count"), + &arbiter_count); + if (ret) { + gf_msg_debug(this->name, 0, + "No arbiter count present in the dict"); + } else if (arbiter_count == 1) { + ret = -1; + snprintf(msg, sizeof(msg), + "Cluster op-version must " + "be >= 30800 to add arbiter brick to a " + "replica 2 volume."); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s", + msg); + *op_errstr = gf_strdup(msg); + goto out; + } + } + /* Do not allow increasing replica count for arbiter volumes. */ + if (volinfo->arbiter_count) { + ret = -1; + snprintf(msg, sizeof(msg), + "Increasing replica count " + "for arbiter volumes is not supported."); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s", + msg); + *op_errstr = gf_strdup(msg); + goto out; + } + } + + is_force = dict_get_str_boolean(dict, "force", _gf_false); + + /* Check brick order if the volume type is replicate or disperse. If + * force at the end of command not given then check brick order. + * doing this check at the originator node is sufficient. + */ + + if (!is_force && is_origin_glusterd(dict)) { + ret = 0; + if (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) { + gf_msg_debug(this->name, 0, + "Replicate cluster type " + "found. Checking brick order."); + if (replica_count) + ret = glusterd_check_brick_order(dict, msg, volinfo->type, + &volname, &bricks, &count, + replica_count); + else + ret = glusterd_check_brick_order(dict, msg, volinfo->type, + &volname, &bricks, &count, + volinfo->replica_count); + } else if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE) { + gf_msg_debug(this->name, 0, + "Disperse cluster type" + " found. Checking brick order."); + ret = glusterd_check_brick_order(dict, msg, volinfo->type, &volname, + &bricks, &count, + volinfo->disperse_count); + } + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BAD_BRKORDER, + "Not adding brick because of " + "bad brick order. %s", + msg); + *op_errstr = gf_strdup(msg); + goto out; + } + } + + if (volinfo->replica_count < replica_count && !is_force) { + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + if (gf_uuid_compare(brickinfo->uuid, MY_UUID)) + continue; + if (brickinfo->status == GF_BRICK_STOPPED) { + ret = -1; + len = snprintf(msg, sizeof(msg), + "Brick %s " + "is down, changing replica " + "count needs all the bricks " + "to be up to avoid data loss", + brickinfo->path); + if (len < 0) { + strcpy(msg, "<error>"); + } + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s", + msg); + *op_errstr = gf_strdup(msg); + goto out; + } + } + } + + if (conf->op_version > GD_OP_VERSION_3_7_5 && is_origin_glusterd(dict)) { + ret = glusterd_validate_quorum(this, GD_OP_ADD_BRICK, dict, op_errstr); + if (ret) { + gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_SERVER_QUORUM_NOT_MET, + "Server quorum not met. Rejecting operation."); + goto out; + } + } else { + /* Case 1: conf->op_version <= GD_OP_VERSION_3_7_5 + * in this case the add-brick is running + * syncop framework that will do a quorum + * check by default + * Case 2: We don't need to do quorum check on every + * node, only originator glusterd need to + * check for quorum + * So nothing need to be done in else + */ + } + + if (glusterd_is_defrag_on(volinfo)) { + snprintf(msg, sizeof(msg), + "Volume name %s rebalance is in " + "progress. Please retry after completion", + volname); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OIP_RETRY_LATER, "%s", msg); + *op_errstr = gf_strdup(msg); + ret = -1; + goto out; + } + + if (volinfo->snap_count > 0 || !cds_list_empty(&volinfo->snap_volumes)) { + snprintf(msg, sizeof(msg), + "Volume %s has %" PRIu64 + " snapshots. " + "Changing the volume configuration will not effect snapshots." + "But the snapshot brick mount should be intact to " + "make them function.", + volname, volinfo->snap_count); + gf_msg("glusterd", GF_LOG_WARNING, 0, GD_MSG_SNAP_WARN, "%s", msg); + msg[0] = '\0'; + } + + if (!count) { + ret = dict_get_int32n(dict, "count", SLEN("count"), &count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Unable to get count"); + goto out; + } + } + + if (!bricks) { + ret = dict_get_strn(dict, "bricks", SLEN("bricks"), &bricks); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Unable to get bricks"); + goto out; + } + } + + if (bricks) { + brick_list = gf_strdup(bricks); + all_bricks = gf_strdup(bricks); + free_ptr = brick_list; + } + + if (count) + brick = strtok_r(brick_list + 1, " \n", &saveptr); + + while (i < count) { + if (!glusterd_store_is_valid_brickpath(volname, brick) || + !glusterd_is_valid_volfpath(volname, brick)) { + snprintf(msg, sizeof(msg), + "brick path %s is " + "too long", + brick); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRKPATH_TOO_LONG, "%s", + msg); + *op_errstr = gf_strdup(msg); + + ret = -1; + goto out; + } + + ret = glusterd_brickinfo_new_from_brick(brick, &brickinfo, _gf_true, + NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_NOT_FOUND, + "Add-brick: Unable" + " to get brickinfo"); + goto out; + } + brick_alloc = _gf_true; + + ret = glusterd_new_brick_validate(brick, brickinfo, msg, sizeof(msg), + NULL); + if (ret) { + *op_errstr = gf_strdup(msg); + ret = -1; + goto out; + } + + if (!gf_uuid_compare(brickinfo->uuid, MY_UUID)) { + ret = glusterd_validate_and_create_brickpath( + brickinfo, volinfo->volume_id, volinfo->volname, op_errstr, + is_force, _gf_false); + if (ret) + goto out; + + /* A bricks mount dir is required only by snapshots which were + * introduced in gluster-3.6.0 + */ + if (conf->op_version >= GD_OP_VERSION_3_6_0) { + ret = glusterd_get_brick_mount_dir( + brickinfo->path, brickinfo->hostname, brickinfo->mount_dir); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_BRICK_MOUNTDIR_GET_FAIL, + "Failed to get brick mount_dir"); + goto out; + } + + snprintf(key, sizeof(key), "brick%d.mount_dir", i + 1); + ret = dict_set_dynstr_with_alloc(rsp_dict, key, + brickinfo->mount_dir); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_SET_FAILED, "Failed to set %s", key); + goto out; + } + } + + local_brick_count = i + 1; + } + + glusterd_brickinfo_delete(brickinfo); + brick_alloc = _gf_false; + brickinfo = NULL; + brick = strtok_r(NULL, " \n", &saveptr); + i++; + } + + ret = dict_set_int32n(rsp_dict, "brick_count", SLEN("brick_count"), + local_brick_count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Failed to set local_brick_count"); + goto out; + } + +out: + GF_FREE(free_ptr); + if (brick_alloc && brickinfo) + glusterd_brickinfo_delete(brickinfo); + GF_FREE(str_ret); + GF_FREE(all_bricks); + + gf_msg_debug(this->name, 0, "Returning %d", ret); + + return ret; +} + +int +glusterd_remove_brick_validate_bricks(gf1_op_commands cmd, int32_t brick_count, + dict_t *dict, glusterd_volinfo_t *volinfo, + char **errstr, + gf_cli_defrag_type cmd_defrag) +{ + char *brick = NULL; + char msg[2048] = ""; + char key[64] = ""; + int keylen; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + int i = 0; + int ret = -1; + char pidfile[PATH_MAX + 1] = { + 0, + }; + glusterd_conf_t *priv = THIS->private; + int pid = -1; + xlator_t *this = THIS; + GF_ASSERT(this); + + /* Check whether all the nodes of the bricks to be removed are + * up, if not fail the operation */ + for (i = 1; i <= brick_count; i++) { + keylen = snprintf(key, sizeof(key), "brick%d", i); + ret = dict_get_strn(dict, key, keylen, &brick); + if (ret) { + snprintf(msg, sizeof(msg), "Unable to get %s", key); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "key=%s", key, NULL); + *errstr = gf_strdup(msg); + goto out; + } + + ret = glusterd_volume_brickinfo_get_by_brick(brick, volinfo, &brickinfo, + _gf_false); + if (ret) { + snprintf(msg, sizeof(msg), + "Incorrect brick " + "%s for volume %s", + brick, volinfo->volname); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INCORRECT_BRICK, + "Brick=%s, Volume=%s", brick, volinfo->volname, NULL); + *errstr = gf_strdup(msg); + goto out; + } + /* Do not allow commit if the bricks are not decommissioned + * if its a remove brick commit + */ + if (!brickinfo->decommissioned && cmd == GF_OP_CMD_COMMIT) { + snprintf(msg, sizeof(msg), + "Brick %s " + "is not decommissioned. " + "Use start or force option", + brick); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_BRICK_NOT_DECOM, + "Use 'start' or 'force' option, Brick=%s", brick, NULL); + *errstr = gf_strdup(msg); + ret = -1; + goto out; + } + + if (glusterd_is_local_brick(THIS, volinfo, brickinfo)) { + switch (cmd) { + case GF_OP_CMD_START: + goto check; + case GF_OP_CMD_NONE: + default: + break; + } + + switch (cmd_defrag) { + case GF_DEFRAG_CMD_NONE: + default: + continue; + } + check: + if (brickinfo->status != GF_BRICK_STARTED) { + snprintf(msg, sizeof(msg), + "Found stopped " + "brick %s. Use force option to " + "remove the offline brick", + brick); + gf_smsg( + this->name, GF_LOG_ERROR, errno, GD_MSG_BRICK_STOPPED, + "Use 'force' option to remove the offline brick, Brick=%s", + brick, NULL); + *errstr = gf_strdup(msg); + ret = -1; + goto out; + } + GLUSTERD_GET_BRICK_PIDFILE(pidfile, volinfo, brickinfo, priv); + if (!gf_is_service_running(pidfile, &pid)) { + snprintf(msg, sizeof(msg), + "Found dead " + "brick %s", + brick); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_BRICK_DEAD, + "Brick=%s", brick, NULL); + *errstr = gf_strdup(msg); + ret = -1; + goto out; + } else { + ret = 0; + } + continue; + } + + RCU_READ_LOCK; + peerinfo = glusterd_peerinfo_find_by_uuid(brickinfo->uuid); + if (!peerinfo) { + RCU_READ_UNLOCK; + snprintf(msg, sizeof(msg), + "Host node of the " + "brick %s is not in cluster", + brick); + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_BRICK_HOST_NOT_FOUND, "Brick=%s", brick, NULL); + *errstr = gf_strdup(msg); + ret = -1; + goto out; + } + if (!peerinfo->connected) { + RCU_READ_UNLOCK; + snprintf(msg, sizeof(msg), + "Host node of the " + "brick %s is down", + brick); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_HOST_DOWN, + "Brick=%s", brick, NULL); + *errstr = gf_strdup(msg); + ret = -1; + goto out; + } + RCU_READ_UNLOCK; + } + +out: + return ret; +} + +int +glusterd_op_stage_remove_brick(dict_t *dict, char **op_errstr) +{ + int ret = -1; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + char *errstr = NULL; + int32_t brick_count = 0; + char msg[2048] = ""; + int32_t flag = 0; + gf1_op_commands cmd = GF_OP_CMD_NONE; + char *task_id_str = NULL; + xlator_t *this = NULL; + gsync_status_param_t param = { + 0, + }; + + this = THIS; + GF_ASSERT(this); + + ret = op_version_check(this, GD_OP_VER_PERSISTENT_AFR_XATTRS, msg, + sizeof(msg)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OP_VERSION_MISMATCH, "%s", + msg); + *op_errstr = gf_strdup(msg); + goto out; + } + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Unable to get volume name"); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, + "Volume %s does not exist", volname); + goto out; + } + + ret = glusterd_validate_volume_id(dict, volinfo); + if (ret) + goto out; + + ret = dict_get_int32n(dict, "command", SLEN("command"), &flag); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Unable to get brick command"); + goto out; + } + cmd = flag; + + ret = dict_get_int32n(dict, "count", SLEN("count"), &brick_count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Unable to get brick count"); + goto out; + } + + ret = 0; + if (volinfo->brick_count == brick_count) { + errstr = gf_strdup( + "Deleting all the bricks of the " + "volume is not allowed"); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_DELETE, NULL); + ret = -1; + goto out; + } + + ret = -1; + switch (cmd) { + case GF_OP_CMD_NONE: + errstr = gf_strdup("no remove-brick command issued"); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_NO_REMOVE_CMD, + NULL); + goto out; + + case GF_OP_CMD_STATUS: + ret = 0; + goto out; + case GF_OP_CMD_START: { + if ((volinfo->type == GF_CLUSTER_TYPE_REPLICATE) && + dict_getn(dict, "replica-count", SLEN("replica-count"))) { + snprintf(msg, sizeof(msg), + "Migration of data is not " + "needed when reducing replica count. Use the" + " 'force' option"); + errstr = gf_strdup(msg); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_USE_THE_FORCE, "%s", + errstr); + goto out; + } + + if (GLUSTERD_STATUS_STARTED != volinfo->status) { + snprintf(msg, sizeof(msg), + "Volume %s needs " + "to be started before remove-brick " + "(you can use 'force' or 'commit' " + "to override this behavior)", + volinfo->volname); + errstr = gf_strdup(msg); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_STARTED, + "%s", errstr); + goto out; + } + if (!gd_is_remove_brick_committed(volinfo)) { + snprintf(msg, sizeof(msg), + "An earlier remove-brick " + "task exists for volume %s. Either commit it" + " or stop it before starting a new task.", + volinfo->volname); + errstr = gf_strdup(msg); + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_OLD_REMOVE_BRICK_EXISTS, + "Earlier remove-brick" + " task exists for volume %s.", + volinfo->volname); + goto out; + } + if (glusterd_is_defrag_on(volinfo)) { + errstr = gf_strdup( + "Rebalance is in progress. Please " + "retry after completion"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OIP_RETRY_LATER, + "%s", errstr); + goto out; + } + + /* Check if the connected clients are all of version + * glusterfs-3.6 and higher. This is needed to prevent some data + * loss issues that could occur when older clients are connected + * when rebalance is run. + */ + ret = glusterd_check_client_op_version_support( + volname, GD_OP_VERSION_3_6_0, NULL); + if (ret) { + ret = gf_asprintf(op_errstr, + "Volume %s has one or " + "more connected clients of a version" + " lower than GlusterFS-v3.6.0. " + "Starting remove-brick in this state " + "could lead to data loss.\nPlease " + "disconnect those clients before " + "attempting this command again.", + volname); + goto out; + } + + if (volinfo->snap_count > 0 || + !cds_list_empty(&volinfo->snap_volumes)) { + snprintf(msg, sizeof(msg), + "Volume %s has %" PRIu64 + " snapshots. " + "Changing the volume configuration will not effect " + "snapshots." + "But the snapshot brick mount should be intact to " + "make them function.", + volname, volinfo->snap_count); + gf_msg("glusterd", GF_LOG_WARNING, 0, GD_MSG_SNAP_WARN, "%s", + msg); + msg[0] = '\0'; + } + + ret = glusterd_remove_brick_validate_bricks( + cmd, brick_count, dict, volinfo, &errstr, GF_DEFRAG_CMD_NONE); + if (ret) + goto out; + + if (is_origin_glusterd(dict)) { + ret = glusterd_generate_and_set_task_id( + dict, GF_REMOVE_BRICK_TID_KEY, + SLEN(GF_REMOVE_BRICK_TID_KEY)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TASKID_GEN_FAIL, + "Failed to generate task-id"); + goto out; + } + } else { + ret = dict_get_strn(dict, GF_REMOVE_BRICK_TID_KEY, + SLEN(GF_REMOVE_BRICK_TID_KEY), + &task_id_str); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, + GD_MSG_DICT_GET_FAILED, "Missing remove-brick-id"); + ret = 0; + } + } + break; + } + + case GF_OP_CMD_STOP: + ret = 0; + break; + + case GF_OP_CMD_COMMIT: + if (volinfo->decommission_in_progress) { + errstr = gf_strdup( + "use 'force' option as migration " + "is in progress"); + gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_MIGRATION_PROG, + "Use 'force' option", NULL); + goto out; + } + + if (volinfo->rebal.defrag_status == GF_DEFRAG_STATUS_FAILED) { + errstr = gf_strdup( + "use 'force' option as migration " + "has failed"); + gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_MIGRATION_FAIL, + "Use 'force' option", NULL); + goto out; + } + + if (volinfo->rebal.defrag_status == GF_DEFRAG_STATUS_COMPLETE) { + if (volinfo->rebal.rebalance_failures > 0 || + volinfo->rebal.skipped_files > 0) { + errstr = gf_strdup( + "use 'force' option as migration " + "of some files might have been skipped or " + "has failed"); + gf_smsg(this->name, GF_LOG_WARNING, 0, + GD_MSG_MIGRATION_FAIL, + "Use 'force' option, some files might have been " + "skipped", + NULL); + goto out; + } + } + + ret = glusterd_remove_brick_validate_bricks( + cmd, brick_count, dict, volinfo, &errstr, GF_DEFRAG_CMD_NONE); + if (ret) + goto out; + + /* If geo-rep is configured, for this volume, it should be + * stopped. + */ + param.volinfo = volinfo; + ret = glusterd_check_geo_rep_running(¶m, op_errstr); + if (ret || param.is_active) { + ret = -1; + goto out; + } + + break; + + case GF_OP_CMD_COMMIT_FORCE: + case GF_OP_CMD_DETACH_START: + case GF_OP_CMD_DETACH_COMMIT: + case GF_OP_CMD_DETACH_COMMIT_FORCE: + case GF_OP_CMD_STOP_DETACH_TIER: + break; + } + ret = 0; + +out: + gf_msg_debug(this->name, 0, "Returning %d", ret); + if (ret && errstr) { + if (op_errstr) + *op_errstr = errstr; + } + if (!op_errstr && errstr) + GF_FREE(errstr); + return ret; +} + +int +glusterd_remove_brick_migrate_cbk(glusterd_volinfo_t *volinfo, + gf_defrag_status_t status) +{ + int ret = 0; + +#if 0 /* TODO: enable this behavior once cluster-wide awareness comes for \ + defrag cbk function */ + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_brickinfo_t *tmp = NULL; + + switch (status) { + case GF_DEFRAG_STATUS_PAUSED: + case GF_DEFRAG_STATUS_FAILED: + /* No changes required in the volume file. + everything should remain as is */ + break; + case GF_DEFRAG_STATUS_STOPPED: + /* Fall back to the old volume file */ + cds_list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks, + brick_list) { + if (!brickinfo->decommissioned) + continue; + brickinfo->decommissioned = 0; + } + break; + + case GF_DEFRAG_STATUS_COMPLETE: + /* Done with the task, you can remove the brick from the + volume file */ + cds_list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks, + brick_list) { + if (!brickinfo->decommissioned) + continue; + gf_log (THIS->name, GF_LOG_INFO, "removing the brick %s", + brickinfo->path); + brickinfo->decommissioned = 0; + if (GLUSTERD_STATUS_STARTED == volinfo->status) { + /*TODO: use the 'atomic' flavour of brick_stop*/ + ret = glusterd_brick_stop (volinfo, brickinfo); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, + "Unable to stop glusterfs (%d)", ret); + } + } + glusterd_delete_brick (volinfo, brickinfo); + } + break; + + default: + GF_ASSERT (!"cbk function called with wrong status"); + break; + } + + ret = glusterd_create_volfiles_and_notify_services (volinfo); + if (ret) + gf_log (THIS->name, GF_LOG_ERROR, + "Unable to write volume files (%d)", ret); + + ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) + gf_log (THIS->name, GF_LOG_ERROR, + "Unable to store volume info (%d)", ret); + + + if (GLUSTERD_STATUS_STARTED == volinfo->status) { + ret = glusterd_check_generate_start_nfs (); + if (ret) + gf_log (THIS->name, GF_LOG_ERROR, + "Unable to start nfs process (%d)", ret); + } + +#endif + + volinfo->decommission_in_progress = 0; + return ret; +} + +int +glusterd_op_add_brick(dict_t *dict, char **op_errstr) +{ + int ret = 0; + char *volname = NULL; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + char *bricks = NULL; + int32_t count = 0; + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + GF_ASSERT(priv); + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Unable to get volume name"); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND, + "Unable to allocate memory"); + goto out; + } + + ret = dict_get_int32n(dict, "count", SLEN("count"), &count); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Unable to get count"); + goto out; + } + + ret = dict_get_strn(dict, "bricks", SLEN("bricks"), &bricks); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Unable to get bricks"); + goto out; + } + + ret = glusterd_op_perform_add_bricks(volinfo, count, bricks, dict); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, + "Unable to add bricks"); + goto out; + } + if (priv->op_version <= GD_OP_VERSION_3_7_5) { + ret = glusterd_store_volinfo(volinfo, + GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) + goto out; + } else { + /* + * The cluster is operating at version greater than + * gluster-3.7.5. So no need to store volfiles + * in commit phase, the same will be done + * in post validate phase with v3 framework. + */ + } + + if (GLUSTERD_STATUS_STARTED == volinfo->status) + ret = glusterd_svcs_manager(volinfo); + +out: + return ret; +} + +int +glusterd_post_commit_add_brick(dict_t *dict, char **op_errstr) +{ + int ret = 0; + char *volname = NULL; + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Unable to get volume name"); + goto out; + } + ret = glusterd_replace_old_auth_allow_list(volname); +out: + return ret; +} + +int +glusterd_post_commit_replace_brick(dict_t *dict, char **op_errstr) +{ + int ret = 0; + char *volname = NULL; + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Unable to get volume name"); + goto out; + } + ret = glusterd_replace_old_auth_allow_list(volname); +out: + return ret; +} + +int +glusterd_set_rebalance_id_for_remove_brick(dict_t *req_dict, dict_t *rsp_dict) +{ + int ret = -1; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + char msg[2048] = {0}; + char *task_id_str = NULL; + xlator_t *this = NULL; + int32_t cmd = 0; + + this = THIS; + GF_ASSERT(this); + + GF_ASSERT(rsp_dict); + GF_ASSERT(req_dict); + + ret = dict_get_strn(rsp_dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg_debug(this->name, 0, "volname not found"); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND, + "Unable to allocate memory"); + goto out; + } + + ret = dict_get_int32n(rsp_dict, "command", SLEN("command"), &cmd); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Unable to get command"); + goto out; + } + + /* remove brick task id is generted in glusterd_op_stage_remove_brick(), + * but rsp_dict is unavailable there. So copying it to rsp_dict from + * req_dict here. */ + + if (is_origin_glusterd(rsp_dict)) { + ret = dict_get_strn(req_dict, GF_REMOVE_BRICK_TID_KEY, + SLEN(GF_REMOVE_BRICK_TID_KEY), &task_id_str); + if (ret) { + snprintf(msg, sizeof(msg), "Missing rebalance id for remove-brick"); + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_REBALANCE_ID_MISSING, + "%s", msg); + ret = 0; + } else { + gf_uuid_parse(task_id_str, volinfo->rebal.rebalance_id); + + ret = glusterd_copy_uuid_to_dict(volinfo->rebal.rebalance_id, + rsp_dict, GF_REMOVE_BRICK_TID_KEY, + SLEN(GF_REMOVE_BRICK_TID_KEY)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_REMOVE_BRICK_ID_SET_FAIL, + "Failed to set remove-brick-id"); + goto out; + } + } + } + if (!gf_uuid_is_null(volinfo->rebal.rebalance_id) && + GD_OP_REMOVE_BRICK == volinfo->rebal.op) { + ret = glusterd_copy_uuid_to_dict(volinfo->rebal.rebalance_id, rsp_dict, + GF_REMOVE_BRICK_TID_KEY, + SLEN(GF_REMOVE_BRICK_TID_KEY)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set task-id for volume %s", volname); + goto out; + } + } +out: + return ret; +} +int +glusterd_op_remove_brick(dict_t *dict, char **op_errstr) +{ + int ret = -1; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + char *brick = NULL; + int32_t count = 0; + int32_t i = 1; + char key[64] = ""; + int keylen; + int32_t flag = 0; + int need_rebalance = 0; + int force = 0; + gf1_op_commands cmd = 0; + int32_t replica_count = 0; + char *task_id_str = NULL; + xlator_t *this = NULL; + dict_t *bricks_dict = NULL; + char *brick_tmpstr = NULL; + int start_remove = 0; + uint32_t commit_hash = 0; + int defrag_cmd = 0; + glusterd_conf_t *conf = NULL; + + this = THIS; + GF_ASSERT(this); + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, out); + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, + "Unable to get volume name"); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND, + "Unable to allocate memory"); + goto out; + } + + ret = dict_get_int32n(dict, "command", SLEN("command"), &flag); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Unable to get command"); + goto out; + } + cmd = flag; + + if (GF_OP_CMD_START == cmd) + start_remove = 1; + + /* Set task-id, if available, in ctx dict for operations other than + * start + */ + + if (is_origin_glusterd(dict) && (!start_remove)) { + if (!gf_uuid_is_null(volinfo->rebal.rebalance_id)) { + ret = glusterd_copy_uuid_to_dict(volinfo->rebal.rebalance_id, dict, + GF_REMOVE_BRICK_TID_KEY, + SLEN(GF_REMOVE_BRICK_TID_KEY)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_REMOVE_BRICK_ID_SET_FAIL, + "Failed to set remove-brick-id"); + goto out; + } + } + } + + /* Clear task-id, rebal.op and stored bricks on commmitting/stopping + * remove-brick */ + if ((!start_remove) && (cmd != GF_OP_CMD_STATUS)) { + gf_uuid_clear(volinfo->rebal.rebalance_id); + volinfo->rebal.op = GD_OP_NONE; + dict_unref(volinfo->rebal.dict); + volinfo->rebal.dict = NULL; + } + + ret = -1; + switch (cmd) { + case GF_OP_CMD_NONE: + goto out; + + case GF_OP_CMD_STATUS: + ret = 0; + goto out; + + case GF_OP_CMD_STOP: + case GF_OP_CMD_START: + /* Reset defrag status to 'NOT STARTED' whenever a + * remove-brick/rebalance command is issued to remove + * stale information from previous run. + * Update defrag_cmd as well or it will only be done + * for nodes on which the brick to be removed exists. + */ + /* coverity[MIXED_ENUMS] */ + volinfo->rebal.defrag_cmd = cmd; + volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_NOT_STARTED; + ret = dict_get_strn(dict, GF_REMOVE_BRICK_TID_KEY, + SLEN(GF_REMOVE_BRICK_TID_KEY), &task_id_str); + if (ret) { + gf_msg_debug(this->name, errno, "Missing remove-brick-id"); + ret = 0; + } else { + gf_uuid_parse(task_id_str, volinfo->rebal.rebalance_id); + volinfo->rebal.op = GD_OP_REMOVE_BRICK; + } + force = 0; + break; + + case GF_OP_CMD_COMMIT: + force = 1; + break; + + case GF_OP_CMD_COMMIT_FORCE: + + if (volinfo->decommission_in_progress) { + if (volinfo->rebal.defrag) { + LOCK(&volinfo->rebal.defrag->lock); + /* Fake 'rebalance-complete' so the graph change + happens right away */ + volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_COMPLETE; + + UNLOCK(&volinfo->rebal.defrag->lock); + } + /* Graph change happens in rebalance _cbk function, + no need to do anything here */ + /* TODO: '_cbk' function is not doing anything for now */ + } + + ret = 0; + force = 1; + break; + case GF_OP_CMD_DETACH_START: + case GF_OP_CMD_DETACH_COMMIT_FORCE: + case GF_OP_CMD_DETACH_COMMIT: + case GF_OP_CMD_STOP_DETACH_TIER: + break; + } + + ret = dict_get_int32n(dict, "count", SLEN("count"), &count); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Unable to get count"); + goto out; + } + /* Save the list of bricks for later usage only on starting a + * remove-brick. Right now this is required for displaying the task + * parameters with task status in volume status. + */ + + if (start_remove) { + bricks_dict = dict_new(); + if (!bricks_dict) { + ret = -1; + goto out; + } + ret = dict_set_int32n(bricks_dict, "count", SLEN("count"), count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Failed to save remove-brick count"); + goto out; + } + } + + while (i <= count) { + keylen = snprintf(key, sizeof(key), "brick%d", i); + ret = dict_get_strn(dict, key, keylen, &brick); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Unable to get %s", key); + goto out; + } + + if (start_remove) { + brick_tmpstr = gf_strdup(brick); + if (!brick_tmpstr) { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "Failed to duplicate brick name"); + goto out; + } + ret = dict_set_dynstrn(bricks_dict, key, keylen, brick_tmpstr); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Failed to add brick to dict"); + goto out; + } + brick_tmpstr = NULL; + } + + ret = glusterd_op_perform_remove_brick(volinfo, brick, force, + &need_rebalance); + if (ret) + goto out; + i++; + } + + if (start_remove) + volinfo->rebal.dict = dict_ref(bricks_dict); + + ret = dict_get_int32n(dict, "replica-count", SLEN("replica-count"), + &replica_count); + if (!ret) { + gf_msg(this->name, GF_LOG_INFO, errno, GD_MSG_DICT_GET_FAILED, + "changing replica count %d to %d on volume %s", + volinfo->replica_count, replica_count, volinfo->volname); + volinfo->replica_count = replica_count; + /* A reduction in replica count implies an arbiter volume + * earlier is now no longer one. */ + if (volinfo->arbiter_count) + volinfo->arbiter_count = 0; + volinfo->sub_count = replica_count; + volinfo->dist_leaf_count = glusterd_get_dist_leaf_count(volinfo); + + /* + * volinfo->type and sub_count have already been set for + * volumes undergoing a detach operation, they should not + * be modified here. + */ + if (replica_count == 1) { + if (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) { + volinfo->type = GF_CLUSTER_TYPE_NONE; + /* backward compatibility */ + volinfo->sub_count = 0; + } + } + } + volinfo->subvol_count = (volinfo->brick_count / volinfo->dist_leaf_count); + + if (!glusterd_is_volume_replicate(volinfo) && + conf->op_version >= GD_OP_VERSION_3_12_2) { + ret = dict_set_nstrn(volinfo->dict, "performance.client-io-threads", + SLEN("performance.client-io-threads"), "on", + SLEN("on")); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set " + "performance.client-io-threads to on"); + goto out; + } + } + + ret = glusterd_create_volfiles_and_notify_services(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOLFILE_CREATE_FAIL, + "failed to create volfiles"); + goto out; + } + + ret = glusterd_store_volinfo(volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOLINFO_STORE_FAIL, + "failed to store volinfo"); + goto out; + } + + if (start_remove && volinfo->status == GLUSTERD_STATUS_STARTED) { + ret = glusterd_svcs_reconfigure(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_NFS_RECONF_FAIL, + "Unable to reconfigure NFS-Server"); + goto out; + } + } + + /* Need to reset the defrag/rebalance status accordingly */ + switch (volinfo->rebal.defrag_status) { + case GF_DEFRAG_STATUS_FAILED: + case GF_DEFRAG_STATUS_COMPLETE: + volinfo->rebal.defrag_status = 0; + /* FALLTHROUGH */ + default: + break; + } + if (!force && need_rebalance) { + if (dict_get_uint32(dict, "commit-hash", &commit_hash) == 0) { + volinfo->rebal.commit_hash = commit_hash; + } + /* perform the rebalance operations */ + defrag_cmd = GF_DEFRAG_CMD_START_FORCE; + /* + * We need to set this *before* we issue commands to the + * bricks, or else we might end up setting it after the bricks + * have responded. If we fail to send the request(s) we'll + * clear it ourselves because nobody else will. + */ + volinfo->decommission_in_progress = 1; + char err_str[4096] = ""; + ret = glusterd_handle_defrag_start( + volinfo, err_str, sizeof(err_str), defrag_cmd, + glusterd_remove_brick_migrate_cbk, GD_OP_REMOVE_BRICK); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REBALANCE_START_FAIL, + "failed to start the rebalance"); + /* TBD: shouldn't we do more than print a message? */ + volinfo->decommission_in_progress = 0; + if (op_errstr) + *op_errstr = gf_strdup(err_str); + } + } else { + if (GLUSTERD_STATUS_STARTED == volinfo->status) + ret = glusterd_svcs_manager(volinfo); + } +out: + GF_FREE(brick_tmpstr); + if (bricks_dict) + dict_unref(bricks_dict); + gf_msg_debug(this->name, 0, "returning %d ", ret); + return ret; +} + +int +glusterd_op_stage_barrier(dict_t *dict, char **op_errstr) +{ + int ret = -1; + xlator_t *this = NULL; + char *volname = NULL; + glusterd_volinfo_t *vol = NULL; + char *barrier_op = NULL; + + GF_ASSERT(dict); + this = THIS; + GF_ASSERT(this); + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Volname not present in " + "dict"); + goto out; + } + + ret = glusterd_volinfo_find(volname, &vol); + if (ret) { + gf_asprintf(op_errstr, "Volume %s does not exist", volname); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, "%s", + *op_errstr); + goto out; + } + + if (!glusterd_is_volume_started(vol)) { + gf_asprintf(op_errstr, "Volume %s is not started", volname); + ret = -1; + goto out; + } + + ret = dict_get_strn(dict, "barrier", SLEN("barrier"), &barrier_op); + if (ret == -1) { + gf_asprintf(op_errstr, + "Barrier op for volume %s not present " + "in dict", + volname); + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, "%s", + *op_errstr); + goto out; + } + ret = 0; +out: + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; +} + +int +glusterd_op_barrier(dict_t *dict, char **op_errstr) +{ + int ret = -1; + xlator_t *this = NULL; + char *volname = NULL; + glusterd_volinfo_t *vol = NULL; + char *barrier_op = NULL; + + GF_ASSERT(dict); + this = THIS; + GF_ASSERT(this); + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Volname not present in " + "dict"); + goto out; + } + + ret = glusterd_volinfo_find(volname, &vol); + if (ret) { + gf_asprintf(op_errstr, "Volume %s does not exist", volname); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, "%s", + *op_errstr); + goto out; + } + + ret = dict_get_strn(dict, "barrier", SLEN("barrier"), &barrier_op); + if (ret) { + gf_asprintf(op_errstr, + "Barrier op for volume %s not present " + "in dict", + volname); + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, "%s", + *op_errstr); + goto out; + } + + ret = dict_set_dynstr_with_alloc(vol->dict, "features.barrier", barrier_op); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Failed to set barrier op in" + " volume option dict"); + goto out; + } + + gd_update_volume_op_versions(vol); + ret = glusterd_create_volfiles(vol); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL, + "Failed to create volfiles"); + goto out; + } + ret = glusterd_store_volinfo(vol, GLUSTERD_VOLINFO_VER_AC_INCREMENT); + +out: + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; +} + +int +glusterd_handle_add_tier_brick(rpcsvc_request_t *req) +{ + return 0; +} + +int +glusterd_handle_attach_tier(rpcsvc_request_t *req) +{ + return 0; +} + +int +glusterd_handle_detach_tier(rpcsvc_request_t *req) +{ + return 0; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-conn-helper.c b/xlators/mgmt/glusterd/src/glusterd-conn-helper.c new file mode 100644 index 00000000000..a7f54ec24b7 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-conn-helper.c @@ -0,0 +1,21 @@ +/* + Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include "glusterd-conn-mgmt.h" +#include "glusterd-svc-mgmt.h" + +#define _LGPL_SOURCE +#include <urcu/rculist.h> + +glusterd_svc_t * +glusterd_conn_get_svc_object(glusterd_conn_t *conn) +{ + return cds_list_entry(conn, glusterd_svc_t, conn); +} diff --git a/xlators/mgmt/glusterd/src/glusterd-conn-helper.h b/xlators/mgmt/glusterd/src/glusterd-conn-helper.h new file mode 100644 index 00000000000..6f500309175 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-conn-helper.h @@ -0,0 +1,21 @@ +/* + Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _GLUSTERD_CONN_HELPER_H_ +#define _GLUSTERD_CONN_HELPER_H_ + +#include "rpc-clnt.h" + +#include "glusterd-conn-mgmt.h" + +glusterd_svc_t * +glusterd_conn_get_svc_object(glusterd_conn_t *conn); + +#endif diff --git a/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c new file mode 100644 index 00000000000..5c01f0c70b6 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c @@ -0,0 +1,191 @@ +/* + Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include <glusterfs/xlator.h> +#include "rpc-clnt.h" +#include "glusterd.h" +#include "glusterd-conn-mgmt.h" +#include "glusterd-conn-helper.h" +#include "glusterd-utils.h" +#include "glusterd-messages.h" + +int +glusterd_conn_init(glusterd_conn_t *conn, char *sockpath, int frame_timeout, + glusterd_conn_notify_t notify) +{ + int ret = -1; + dict_t *options = NULL; + struct rpc_clnt *rpc = NULL; + xlator_t *this = THIS; + glusterd_svc_t *svc = NULL; + + if (!this) { + gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_XLATOR_NOT_DEFINED, + NULL); + goto out; + } + + options = dict_new(); + if (!options) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); + goto out; + } + + svc = glusterd_conn_get_svc_object(conn); + if (!svc) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_GET_FAIL, + "Failed to get the service"); + goto out; + } + + ret = rpc_transport_unix_options_build(options, sockpath, frame_timeout); + if (ret) + goto out; + + ret = dict_set_int32n(options, "transport.socket.ignore-enoent", + SLEN("transport.socket.ignore-enoent"), 1); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=transport.socket.ignore-enoent", NULL); + goto out; + } + + /* @options is free'd by rpc_transport when destroyed */ + rpc = rpc_clnt_new(options, this, (char *)svc->name, 16); + if (!rpc) { + ret = -1; + goto out; + } + + ret = rpc_clnt_register_notify(rpc, glusterd_conn_common_notify, conn); + if (ret) + goto out; + + ret = snprintf(conn->sockpath, sizeof(conn->sockpath), "%s", sockpath); + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); + goto out; + } else + ret = 0; + + conn->frame_timeout = frame_timeout; + conn->rpc = rpc; + conn->notify = notify; +out: + if (options) + dict_unref(options); + if (ret) { + if (rpc) { + rpc_clnt_unref(rpc); + rpc = NULL; + } + } + return ret; +} + +int +glusterd_conn_term(glusterd_conn_t *conn) +{ + rpc_clnt_unref(conn->rpc); + return 0; +} + +int +glusterd_conn_connect(glusterd_conn_t *conn) +{ + return rpc_clnt_start(conn->rpc); +} + +int +glusterd_conn_disconnect(glusterd_conn_t *conn) +{ + rpc_clnt_disable(conn->rpc); + + return 0; +} + +int +__glusterd_conn_common_notify(struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data) +{ + glusterd_conn_t *conn = mydata; + + /* Silently ignoring this error, exactly like the current + * implementation */ + if (!conn) + return 0; + + return conn->notify(conn, event); +} + +int +glusterd_conn_common_notify(struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data) +{ + return glusterd_big_locked_notify(rpc, mydata, event, data, + __glusterd_conn_common_notify); +} + +int32_t +glusterd_conn_build_socket_filepath(char *rundir, uuid_t uuid, char *socketpath, + int len) +{ + char sockfilepath[PATH_MAX] = { + 0, + }; + + snprintf(sockfilepath, sizeof(sockfilepath), "%s/run-%s", rundir, + uuid_utoa(uuid)); + + glusterd_set_socket_filepath(sockfilepath, socketpath, len); + return 0; +} + +int +__glusterd_muxsvc_conn_common_notify(struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data) +{ + glusterd_conf_t *conf = THIS->private; + glusterd_svc_proc_t *mux_proc = mydata; + int ret = -1; + + /* Silently ignoring this error, exactly like the current + * implementation */ + if (!mux_proc) + return 0; + + if (event == RPC_CLNT_DESTROY) { + /*RPC_CLNT_DESTROY will only called after mux_proc detached from the + * list. So it is safe to call without lock. Processing + * RPC_CLNT_DESTROY under a lock will lead to deadlock. + */ + if (mux_proc->data) { + glusterd_volinfo_unref(mux_proc->data); + mux_proc->data = NULL; + } + GF_FREE(mux_proc); + ret = 0; + } else { + pthread_mutex_lock(&conf->attach_lock); + { + ret = mux_proc->notify(mux_proc, event); + } + pthread_mutex_unlock(&conf->attach_lock); + } + return ret; +} + +int +glusterd_muxsvc_conn_common_notify(struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data) +{ + return glusterd_big_locked_notify(rpc, mydata, event, data, + __glusterd_muxsvc_conn_common_notify); +} diff --git a/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h new file mode 100644 index 00000000000..1b225621ab1 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h @@ -0,0 +1,53 @@ +/* + Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _GLUSTERD_CONN_MGMT_H_ +#define _GLUSTERD_CONN_MGMT_H_ + +#include "rpc-clnt.h" + +typedef struct glusterd_conn_ glusterd_conn_t; + +typedef int (*glusterd_conn_notify_t)(glusterd_conn_t *conn, + rpc_clnt_event_t event); + +struct glusterd_conn_ { + struct rpc_clnt *rpc; + /* Existing daemons tend to specialize their respective + * notify implementations, so ... */ + glusterd_conn_notify_t notify; + int frame_timeout; + char sockpath[PATH_MAX]; +}; + +int +glusterd_conn_init(glusterd_conn_t *conn, char *sockpath, int frame_timeout, + glusterd_conn_notify_t notify); + +int +glusterd_conn_term(glusterd_conn_t *conn); + +int +glusterd_conn_connect(glusterd_conn_t *conn); + +int +glusterd_conn_disconnect(glusterd_conn_t *conn); + +int +glusterd_conn_common_notify(struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data); +int +glusterd_muxsvc_conn_common_notify(struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data); + +int32_t +glusterd_conn_build_socket_filepath(char *rundir, uuid_t uuid, char *socketpath, + int len); +#endif diff --git a/xlators/mgmt/glusterd/src/glusterd-errno.h b/xlators/mgmt/glusterd/src/glusterd-errno.h new file mode 100644 index 00000000000..c74070e0e8d --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-errno.h @@ -0,0 +1,33 @@ +/* + Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _GLUSTERD_ERRNO_H +#define _GLUSTERD_ERRNO_H + +enum glusterd_op_errno { + EG_INTRNL = 30800, /* Internal Error */ + EG_OPNOTSUP = 30801, /* Gluster Op Not Supported */ + EG_ANOTRANS = 30802, /* Another Transaction in Progress */ + EG_BRCKDWN = 30803, /* One or more brick is down */ + EG_NODEDWN = 30804, /* One or more node is down */ + EG_HRDLMT = 30805, /* Hard Limit is reached */ + EG_NOVOL = 30806, /* Volume does not exist */ + EG_NOSNAP = 30807, /* Snap does not exist */ + EG_RBALRUN = 30808, /* Rebalance is running */ + EG_VOLRUN = 30809, /* Volume is running */ + EG_VOLSTP = 30810, /* Volume is not running */ + EG_VOLEXST = 30811, /* Volume exists */ + EG_SNAPEXST = 30812, /* Snapshot exists */ + EG_ISSNAP = 30813, /* Volume is a snap volume */ + EG_GEOREPRUN = 30814, /* Geo-Replication is running */ + EG_NOTTHINP = 30815, /* Bricks are not thinly provisioned */ + EG_NOGANESHA = 30816, /* Global ganesha is not enabled */ +}; + +#endif diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c new file mode 100644 index 00000000000..f08bd6cebee --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c @@ -0,0 +1,927 @@ +/* + Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include <glusterfs/common-utils.h> +#include "glusterd.h" +#include "glusterd-op-sm.h" +#include "glusterd-store.h" +#include "glusterd-utils.h" +#include "glusterd-volgen.h" +#include "glusterd-messages.h" +#include <glusterfs/syscall.h> + +#include <ctype.h> + +int +start_ganesha(char **op_errstr); + +typedef struct service_command { + char *binary; + char *service; + int (*action)(struct service_command *, char *); +} service_command; + +/* parsing_ganesha_ha_conf will allocate the returned string + * to be freed (GF_FREE) by the caller + * return NULL if error or not found */ +static char * +parsing_ganesha_ha_conf(const char *key) +{ +#define MAX_LINE 1024 + char scratch[MAX_LINE * 2] = { + 0, + }; + char *value = NULL, *pointer = NULL, *end_pointer = NULL; + FILE *fp; + + fp = fopen(GANESHA_HA_CONF, "r"); + if (fp == NULL) { + gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "couldn't open the file %s", GANESHA_HA_CONF); + goto end_ret; + } + while ((pointer = fgets(scratch, MAX_LINE, fp)) != NULL) { + /* Read config file until we get matching "^[[:space:]]*key" */ + if (*pointer == '#') { + continue; + } + while (isblank(*pointer)) { + pointer++; + } + if (strncmp(pointer, key, strlen(key))) { + continue; + } + pointer += strlen(key); + /* key found : if we fail to parse, we'll return an error + * rather than trying next one + * - supposition : conf file is bash compatible : no space + * around the '=' */ + if (*pointer != '=') { + gf_msg(THIS->name, GF_LOG_ERROR, errno, + GD_MSG_GET_CONFIG_INFO_FAILED, "Parsing %s failed at key %s", + GANESHA_HA_CONF, key); + goto end_close; + } + pointer++; /* jump the '=' */ + + if (*pointer == '"' || *pointer == '\'') { + /* dont get the quote */ + pointer++; + } + end_pointer = pointer; + /* stop at the next closing quote or blank/newline */ + do { + end_pointer++; + } while (!(*end_pointer == '\'' || *end_pointer == '"' || + isspace(*end_pointer) || *end_pointer == '\0')); + *end_pointer = '\0'; + + /* got it. copy it and return */ + value = gf_strdup(pointer); + break; + } + +end_close: + fclose(fp); +end_ret: + return value; +} + +static int +sc_systemctl_action(struct service_command *sc, char *command) +{ + runner_t runner = { + 0, + }; + + runinit(&runner); + runner_add_args(&runner, sc->binary, command, sc->service, NULL); + return runner_run(&runner); +} + +static int +sc_service_action(struct service_command *sc, char *command) +{ + runner_t runner = { + 0, + }; + + runinit(&runner); + runner_add_args(&runner, sc->binary, sc->service, command, NULL); + return runner_run(&runner); +} + +static int +manage_service(char *action) +{ + int i = 0; + int ret = 0; + struct service_command sc_list[] = {{.binary = "/bin/systemctl", + .service = "nfs-ganesha", + .action = sc_systemctl_action}, + {.binary = "/sbin/invoke-rc.d", + .service = "nfs-ganesha", + .action = sc_service_action}, + {.binary = "/sbin/service", + .service = "nfs-ganesha", + .action = sc_service_action}, + {.binary = NULL}}; + + while (sc_list[i].binary != NULL) { + ret = sys_access(sc_list[i].binary, X_OK); + if (ret == 0) { + gf_msg_debug(THIS->name, 0, "%s found.", sc_list[i].binary); + return sc_list[i].action(&sc_list[i], action); + } + i++; + } + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_UNRECOGNIZED_SVC_MNGR, + "Could not %s NFS-Ganesha.Service manager for distro" + " not recognized.", + action); + return ret; +} + +/* + * Check if the cluster is a ganesha cluster or not * + */ +gf_boolean_t +glusterd_is_ganesha_cluster() +{ + int ret = -1; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + gf_boolean_t ret_bool = _gf_false; + + this = THIS; + GF_VALIDATE_OR_GOTO("ganesha", this, out); + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, out); + + ret = dict_get_str_boolean(priv->opts, GLUSTERD_STORE_KEY_GANESHA_GLOBAL, + _gf_false); + if (ret == _gf_true) { + ret_bool = _gf_true; + gf_msg_debug(this->name, 0, "nfs-ganesha is enabled for the cluster"); + } else + gf_msg_debug(this->name, 0, "nfs-ganesha is disabled for the cluster"); + +out: + return ret_bool; +} + +/* Check if ganesha.enable is set to 'on', that checks if + * a particular volume is exported via NFS-Ganesha */ +gf_boolean_t +glusterd_check_ganesha_export(glusterd_volinfo_t *volinfo) +{ + char *value = NULL; + gf_boolean_t is_exported = _gf_false; + int ret = 0; + + ret = glusterd_volinfo_get(volinfo, "ganesha.enable", &value); + if ((ret == 0) && value) { + if (strcmp(value, "on") == 0) { + gf_msg_debug(THIS->name, 0, + "ganesha.enable set" + " to %s", + value); + is_exported = _gf_true; + } + } + return is_exported; +} + +/* * + * The below function is called as part of commit phase for volume set option + * "ganesha.enable". If the value is "on", it creates export configuration file + * and then export the volume via dbus command. Incase of "off", the volume + * will be already unexported during stage phase, so it will remove the conf + * file from shared storage + */ +int +glusterd_check_ganesha_cmd(char *key, char *value, char **errstr, dict_t *dict) +{ + int ret = 0; + char *volname = NULL; + + GF_ASSERT(key); + GF_ASSERT(value); + GF_ASSERT(dict); + + if ((strcmp(key, "ganesha.enable") == 0)) { + if ((strcmp(value, "on")) && (strcmp(value, "off"))) { + gf_asprintf(errstr, + "Invalid value" + " for volume set command. Use on/off only."); + ret = -1; + goto out; + } + if (strcmp(value, "on") == 0) { + ret = glusterd_handle_ganesha_op(dict, errstr, key, value); + + } else if (is_origin_glusterd(dict)) { + ret = dict_get_str(dict, "volname", &volname); + if (ret) { + gf_msg("glusterd-ganesha", GF_LOG_ERROR, errno, + GD_MSG_DICT_GET_FAILED, "Unable to get volume name"); + goto out; + } + ret = manage_export_config(volname, "off", errstr); + } + } +out: + if (ret) { + gf_msg("glusterd-ganesha", GF_LOG_ERROR, 0, + GD_MSG_NFS_GNS_OP_HANDLE_FAIL, + "Handling NFS-Ganesha" + " op failed."); + } + return ret; +} + +int +glusterd_op_stage_set_ganesha(dict_t *dict, char **op_errstr) +{ + int ret = -1; + char *value = NULL; + char *str = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + GF_ASSERT(dict); + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + ret = dict_get_str(dict, "value", &value); + if (value == NULL) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "value not present."); + goto out; + } + /* This dict_get will fail if the user had never set the key before */ + /*Ignoring the ret value and proceeding */ + ret = dict_get_str(priv->opts, GLUSTERD_STORE_KEY_GANESHA_GLOBAL, &str); + if (str ? strcmp(value, str) == 0 : strcmp(value, "disable") == 0) { + gf_asprintf(op_errstr, "nfs-ganesha is already %sd.", value); + ret = -1; + goto out; + } + + if (strcmp(value, "enable") == 0) { + ret = start_ganesha(op_errstr); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_NFS_GNS_START_FAIL, + "Could not start NFS-Ganesha"); + } + } else { + ret = stop_ganesha(op_errstr); + if (ret) + gf_msg_debug(THIS->name, 0, + "Could not stop " + "NFS-Ganesha."); + } + +out: + + if (ret) { + if (!(*op_errstr)) { + *op_errstr = gf_strdup("Error, Validation Failed"); + gf_msg_debug(this->name, 0, "Error, Cannot Validate option :%s", + GLUSTERD_STORE_KEY_GANESHA_GLOBAL); + } else { + gf_msg_debug(this->name, 0, "Error, Cannot Validate option"); + } + } + return ret; +} + +int +glusterd_op_set_ganesha(dict_t *dict, char **errstr) +{ + int ret = 0; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + char *key = NULL; + char *value = NULL; + char *next_version = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(dict); + + priv = this->private; + GF_ASSERT(priv); + + ret = dict_get_str(dict, "key", &key); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Couldn't get key in global option set"); + goto out; + } + + ret = dict_get_str(dict, "value", &value); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Couldn't get value in global option set"); + goto out; + } + + ret = glusterd_handle_ganesha_op(dict, errstr, key, value); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_NFS_GNS_SETUP_FAIL, + "Initial NFS-Ganesha set up failed"); + ret = -1; + goto out; + } + ret = dict_set_dynstr_with_alloc(priv->opts, + GLUSTERD_STORE_KEY_GANESHA_GLOBAL, value); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_DICT_SET_FAILED, + "Failed to set" + " nfs-ganesha in dict."); + goto out; + } + ret = glusterd_get_next_global_opt_version_str(priv->opts, &next_version); + if (ret) { + gf_msg_debug(THIS->name, 0, + "Could not fetch " + " global op version"); + goto out; + } + ret = dict_set_str(priv->opts, GLUSTERD_GLOBAL_OPT_VERSION, next_version); + if (ret) + goto out; + + ret = glusterd_store_options(this, priv->opts); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_FAIL, + "Failed to store options"); + goto out; + } + +out: + gf_msg_debug(this->name, 0, "returning %d", ret); + return ret; +} + +/* Following function parse GANESHA_HA_CONF + * The sample file looks like below, + * HA_NAME="ganesha-ha-360" + * HA_VOL_NAME="ha-state" + * HA_CLUSTER_NODES="server1,server2" + * VIP_rhs_1="10.x.x.x" + * VIP_rhs_2="10.x.x.x." */ + +/* Check if the localhost is listed as one of nfs-ganesha nodes */ +gf_boolean_t +check_host_list(void) +{ + glusterd_conf_t *priv = NULL; + char *hostname, *hostlist; + gf_boolean_t ret = _gf_false; + xlator_t *this = NULL; + + this = THIS; + priv = THIS->private; + GF_ASSERT(priv); + + hostlist = parsing_ganesha_ha_conf("HA_CLUSTER_NODES"); + if (hostlist == NULL) { + gf_msg(this->name, GF_LOG_INFO, errno, GD_MSG_GET_CONFIG_INFO_FAILED, + "couldn't get HA_CLUSTER_NODES from file %s", GANESHA_HA_CONF); + return _gf_false; + } + + /* Hostlist is a comma separated list now */ + hostname = strtok(hostlist, ","); + while (hostname != NULL) { + ret = gf_is_local_addr(hostname); + if (ret) { + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_NFS_GNS_HOST_FOUND, + "ganesha host found " + "Hostname is %s", + hostname); + break; + } + hostname = strtok(NULL, ","); + } + + GF_FREE(hostlist); + return ret; +} + +int +gd_ganesha_send_dbus(char *volname, char *value) +{ + runner_t runner = { + 0, + }; + int ret = -1; + runinit(&runner); + + GF_VALIDATE_OR_GOTO("glusterd-ganesha", volname, out); + GF_VALIDATE_OR_GOTO("glusterd-ganesha", value, out); + + ret = 0; + if (check_host_list()) { + /* Check whether ganesha is running on this node */ + if (manage_service("status")) { + gf_msg("glusterd-ganesha", GF_LOG_WARNING, 0, + GD_MSG_GANESHA_NOT_RUNNING, + "Export failed, NFS-Ganesha is not running"); + } else { + runner_add_args(&runner, GANESHA_PREFIX "/dbus-send.sh", CONFDIR, + value, volname, NULL); + ret = runner_run(&runner); + } + } +out: + return ret; +} + +int +manage_export_config(char *volname, char *value, char **op_errstr) +{ + runner_t runner = { + 0, + }; + int ret = -1; + + GF_ASSERT(volname); + runinit(&runner); + runner_add_args(&runner, GANESHA_PREFIX "/create-export-ganesha.sh", + CONFDIR, value, volname, NULL); + ret = runner_run(&runner); + + if (ret && op_errstr) + gf_asprintf(op_errstr, + "Failed to create" + " NFS-Ganesha export config file."); + + return ret; +} + +/* Exports and unexports a particular volume via NFS-Ganesha */ +int +ganesha_manage_export(dict_t *dict, char *value, + gf_boolean_t update_cache_invalidation, char **op_errstr) +{ + int ret = -1; + glusterd_volinfo_t *volinfo = NULL; + dict_t *vol_opts = NULL; + char *volname = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + gf_boolean_t option = _gf_false; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + + GF_ASSERT(value); + GF_ASSERT(dict); + GF_ASSERT(priv); + + ret = dict_get_str(dict, "volname", &volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Unable to get volume name"); + goto out; + } + ret = gf_string2boolean(value, &option); + if (ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, + "invalid value."); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND, + FMTSTR_CHECK_VOL_EXISTS, volname); + goto out; + } + + ret = glusterd_check_ganesha_export(volinfo); + if (ret && option) { + gf_asprintf(op_errstr, + "ganesha.enable " + "is already 'on'."); + ret = -1; + goto out; + + } else if (!option && !ret) { + gf_asprintf(op_errstr, + "ganesha.enable " + "is already 'off'."); + ret = -1; + goto out; + } + + /* Check if global option is enabled, proceed only then */ + ret = dict_get_str_boolean(priv->opts, GLUSTERD_STORE_KEY_GANESHA_GLOBAL, + _gf_false); + if (ret == -1) { + gf_msg_debug(this->name, 0, + "Failed to get " + "global option dict."); + gf_asprintf(op_errstr, + "The option " + "nfs-ganesha should be " + "enabled before setting ganesha.enable."); + goto out; + } + if (!ret) { + gf_asprintf(op_errstr, + "The option " + "nfs-ganesha should be " + "enabled before setting ganesha.enable."); + ret = -1; + goto out; + } + + /* * + * Create the export file from the node where ganesha.enable "on" + * is executed + * */ + if (option && is_origin_glusterd(dict)) { + ret = manage_export_config(volname, "on", op_errstr); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_EXPORT_FILE_CREATE_FAIL, + "Failed to create" + "export file for NFS-Ganesha\n"); + goto out; + } + } + ret = gd_ganesha_send_dbus(volname, value); + if (ret) { + gf_asprintf(op_errstr, + "Dynamic export addition/deletion failed." + " Please see log file for details"); + goto out; + } + if (update_cache_invalidation) { + vol_opts = volinfo->dict; + ret = dict_set_dynstr_with_alloc(vol_opts, + "features.cache-invalidation", value); + if (ret) + gf_asprintf(op_errstr, + "Cache-invalidation could not" + " be set to %s.", + value); + ret = glusterd_store_volinfo(volinfo, + GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) + gf_asprintf(op_errstr, "failed to store volinfo for %s", + volinfo->volname); + } +out: + return ret; +} + +int +tear_down_cluster(gf_boolean_t run_teardown) +{ + int ret = 0; + runner_t runner = { + 0, + }; + struct stat st = { + 0, + }; + DIR *dir = NULL; + struct dirent *entry = NULL; + struct dirent scratch[2] = { + { + 0, + }, + }; + char path[PATH_MAX] = { + 0, + }; + + if (run_teardown) { + runinit(&runner); + runner_add_args(&runner, GANESHA_PREFIX "/ganesha-ha.sh", "teardown", + CONFDIR, NULL); + ret = runner_run(&runner); + /* * + * Remove all the entries in CONFDIR expect ganesha.conf and + * ganesha-ha.conf + */ + dir = sys_opendir(CONFDIR); + if (!dir) { + gf_msg_debug(THIS->name, 0, + "Failed to open directory %s. " + "Reason : %s", + CONFDIR, strerror(errno)); + ret = 0; + goto out; + } + + while ((entry = sys_readdir(dir, scratch))) { + if (gf_irrelevant_entry(entry)) + continue; + snprintf(path, PATH_MAX, "%s/%s", CONFDIR, entry->d_name); + ret = sys_lstat(path, &st); + if (ret == -1) { + gf_msg_debug(THIS->name, 0, + "Failed to stat entry %s :" + " %s", + path, strerror(errno)); + goto out; + } + + if (strcmp(entry->d_name, "ganesha.conf") == 0 || + strcmp(entry->d_name, "ganesha-ha.conf") == 0) + gf_msg_debug(THIS->name, 0, + " %s is not required" + " to remove", + path); + else if (S_ISDIR(st.st_mode)) + ret = recursive_rmdir(path); + else + ret = sys_unlink(path); + + if (ret) { + gf_msg_debug(THIS->name, 0, + " Failed to remove %s. " + "Reason : %s", + path, strerror(errno)); + } + + gf_msg_debug(THIS->name, 0, "%s %s", + ret ? "Failed to remove" : "Removed", entry->d_name); + } + + ret = sys_closedir(dir); + if (ret) { + gf_msg_debug(THIS->name, 0, + "Failed to close dir %s. Reason :" + " %s", + CONFDIR, strerror(errno)); + } + goto exit; + } + +out: + if (dir && sys_closedir(dir)) { + gf_msg_debug(THIS->name, 0, + "Failed to close dir %s. Reason :" + " %s", + CONFDIR, strerror(errno)); + } +exit: + return ret; +} + +int +setup_cluster(gf_boolean_t run_setup) +{ + int ret = 0; + runner_t runner = { + 0, + }; + + if (run_setup) { + runinit(&runner); + runner_add_args(&runner, GANESHA_PREFIX "/ganesha-ha.sh", "setup", + CONFDIR, NULL); + ret = runner_run(&runner); + } + return ret; +} + +static int +teardown(gf_boolean_t run_teardown, char **op_errstr) +{ + runner_t runner = { + 0, + }; + int ret = 1; + glusterd_volinfo_t *volinfo = NULL; + glusterd_conf_t *priv = NULL; + dict_t *vol_opts = NULL; + + priv = THIS->private; + + ret = tear_down_cluster(run_teardown); + if (ret == -1) { + gf_asprintf(op_errstr, + "Cleanup of NFS-Ganesha" + " HA config failed."); + goto out; + } + + runinit(&runner); + runner_add_args(&runner, GANESHA_PREFIX "/ganesha-ha.sh", "cleanup", + CONFDIR, NULL); + ret = runner_run(&runner); + if (ret) + gf_msg_debug(THIS->name, 0, + "Could not clean up" + " NFS-Ganesha related config"); + + cds_list_for_each_entry(volinfo, &priv->volumes, vol_list) + { + vol_opts = volinfo->dict; + /* All the volumes exported via NFS-Ganesha will be + unexported, hence setting the appropriate keys */ + ret = dict_set_str(vol_opts, "features.cache-invalidation", "off"); + if (ret) + gf_msg(THIS->name, GF_LOG_WARNING, errno, GD_MSG_DICT_SET_FAILED, + "Could not set features.cache-invalidation " + "to off for %s", + volinfo->volname); + + ret = dict_set_str(vol_opts, "ganesha.enable", "off"); + if (ret) + gf_msg(THIS->name, GF_LOG_WARNING, errno, GD_MSG_DICT_SET_FAILED, + "Could not set ganesha.enable to off for %s", + volinfo->volname); + + ret = glusterd_store_volinfo(volinfo, + GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) + gf_msg(THIS->name, GF_LOG_WARNING, 0, GD_MSG_VOLINFO_SET_FAIL, + "failed to store volinfo for %s", volinfo->volname); + } +out: + return ret; +} + +int +stop_ganesha(char **op_errstr) +{ + int ret = 0; + runner_t runner = { + 0, + }; + + if (check_host_list()) { + runinit(&runner); + runner_add_args(&runner, GANESHA_PREFIX "/ganesha-ha.sh", + "--setup-ganesha-conf-files", CONFDIR, "no", NULL); + ret = runner_run(&runner); + if (ret) { + gf_asprintf(op_errstr, + "removal of symlink ganesha.conf " + "in /etc/ganesha failed"); + } + ret = manage_service("stop"); + if (ret) + gf_asprintf(op_errstr, + "NFS-Ganesha service could not" + "be stopped."); + } + return ret; +} + +int +start_ganesha(char **op_errstr) +{ + int ret = -1; + glusterd_volinfo_t *volinfo = NULL; + glusterd_conf_t *priv = NULL; + runner_t runner = { + 0, + }; + + priv = THIS->private; + GF_ASSERT(priv); + + cds_list_for_each_entry(volinfo, &priv->volumes, vol_list) + { +#ifdef BUILD_GNFS + /* Gluster-nfs has to be disabled across the trusted pool */ + /* before attempting to start nfs-ganesha */ + ret = dict_set_str_sizen(volinfo->dict, NFS_DISABLE_MAP_KEY, "on"); + if (ret) + goto out; +#endif + ret = glusterd_store_volinfo(volinfo, + GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) { + *op_errstr = gf_strdup( + "Failed to store the " + "Volume information"); + goto out; + } + } + + /* If the nfs svc is not initialized it means that the service is not + * running, hence we can skip the process of stopping gluster-nfs + * service + */ +#ifdef BUILD_GNFS + if (priv->nfs_svc.inited) { + ret = priv->nfs_svc.stop(&(priv->nfs_svc), SIGKILL); + if (ret) { + ret = -1; + gf_asprintf(op_errstr, + "Gluster-NFS service could" + "not be stopped, exiting."); + goto out; + } + } +#endif + + if (check_host_list()) { + runinit(&runner); + runner_add_args(&runner, GANESHA_PREFIX "/ganesha-ha.sh", + "--setup-ganesha-conf-files", CONFDIR, "yes", NULL); + ret = runner_run(&runner); + if (ret) { + gf_asprintf(op_errstr, + "creation of symlink ganesha.conf " + "in /etc/ganesha failed"); + goto out; + } + ret = manage_service("start"); + if (ret) + gf_asprintf(op_errstr, + "NFS-Ganesha failed to start." + "Please see log file for details"); + } + +out: + return ret; +} + +static int +pre_setup(gf_boolean_t run_setup, char **op_errstr) +{ + int ret = 0; + if (run_setup) { + if (!check_host_list()) { + gf_asprintf(op_errstr, + "Running nfs-ganesha setup command " + "from node which is not part of ganesha cluster"); + return -1; + } + } + ret = setup_cluster(run_setup); + if (ret == -1) + gf_asprintf(op_errstr, + "Failed to set up HA " + "config for NFS-Ganesha. " + "Please check the log file for details"); + return ret; +} + +int +glusterd_handle_ganesha_op(dict_t *dict, char **op_errstr, char *key, + char *value) +{ + int32_t ret = -1; + gf_boolean_t option = _gf_false; + + GF_ASSERT(dict); + GF_ASSERT(op_errstr); + GF_ASSERT(key); + GF_ASSERT(value); + + if (strcmp(key, "ganesha.enable") == 0) { + ret = ganesha_manage_export(dict, value, _gf_true, op_errstr); + if (ret < 0) + goto out; + } + + /* It is possible that the key might not be set */ + ret = gf_string2boolean(value, &option); + if (ret == -1) { + gf_asprintf(op_errstr, "Invalid value in key-value pair."); + goto out; + } + + if (strcmp(key, GLUSTERD_STORE_KEY_GANESHA_GLOBAL) == 0) { + /* * + * The set up/teardown of pcs cluster should be performed only + * once. This will done on the node in which the cli command + * 'gluster nfs-ganesha <enable/disable>' got executed. So that + * node should part of ganesha HA cluster + */ + if (option) { + ret = pre_setup(is_origin_glusterd(dict), op_errstr); + if (ret < 0) + goto out; + } else { + ret = teardown(is_origin_glusterd(dict), op_errstr); + if (ret < 0) + goto out; + } + } + +out: + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c new file mode 100644 index 00000000000..bf062c87060 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c @@ -0,0 +1,6782 @@ +/* + Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#include <glusterfs/common-utils.h> +#include "cli1-xdr.h" +#include "xdr-generic.h" +#include "glusterd.h" +#include "glusterd-op-sm.h" +#include "glusterd-geo-rep.h" +#include "glusterd-store.h" +#include "glusterd-utils.h" +#include "glusterd-volgen.h" +#include "glusterd-svc-helper.h" +#include <glusterfs/run.h> +#include <glusterfs/syscall.h> +#include "glusterd-messages.h" + +#include <signal.h> + +static int +dict_get_param(dict_t *dict, char *key, char **param); + +struct gsync_config_opt_vals_ gsync_confopt_vals[] = { + { + .op_name = "change_detector", + .no_of_pos_vals = 2, + .case_sensitive = _gf_true, + .values = {"xsync", "changelog"}, + }, + {.op_name = "special_sync_mode", + .no_of_pos_vals = 2, + .case_sensitive = _gf_true, + .values = {"partial", "recover"}}, + {.op_name = "log-level", + .no_of_pos_vals = 5, + .case_sensitive = _gf_false, + .values = {"critical", "error", "warning", "info", "debug"}}, + {.op_name = "use-tarssh", + .no_of_pos_vals = 6, + .case_sensitive = _gf_false, + .values = {"true", "false", "0", "1", "yes", "no"}}, + {.op_name = "ignore_deletes", + .no_of_pos_vals = 6, + .case_sensitive = _gf_false, + .values = {"true", "false", "0", "1", "yes", "no"}}, + {.op_name = "use_meta_volume", + .no_of_pos_vals = 6, + .case_sensitive = _gf_false, + .values = {"true", "false", "0", "1", "yes", "no"}}, + {.op_name = "use-meta-volume", + .no_of_pos_vals = 6, + .case_sensitive = _gf_false, + .values = {"true", "false", "0", "1", "yes", "no"}}, + { + .op_name = NULL, + }, +}; + +static char *gsync_reserved_opts[] = {"gluster-command", + "pid-file", + "state-file", + "session-owner", + "state-socket-unencoded", + "socketdir", + "local-id", + "local-path", + "slave-id", + NULL}; + +static char *gsync_no_restart_opts[] = {"checkpoint", "log_rsync_performance", + "log-rsync-performance", NULL}; + +void +set_gsyncd_inet6_arg(runner_t *runner) +{ + xlator_t *this = NULL; + char *af; + int ret; + + this = THIS; + ret = dict_get_str(this->options, "transport.address-family", &af); + if (ret == 0) + runner_argprintf(runner, "--%s", af); +} + +int +__glusterd_handle_sys_exec(rpcsvc_request_t *req) +{ + int32_t ret = 0; + dict_t *dict = NULL; + gf_cli_req cli_req = { + {0}, + }; + glusterd_op_t cli_op = GD_OP_SYS_EXEC; + glusterd_conf_t *priv = NULL; + char *host_uuid = NULL; + char err_str[64] = { + 0, + }; + xlator_t *this = NULL; + + GF_ASSERT(req); + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + req->rpc_err = GARBAGE_ARGS; + snprintf(err_str, sizeof(err_str), "Garbage args received"); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL); + goto out; + } + + if (cli_req.dict.dict_len) { + dict = dict_new(); + if (!dict) { + gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, + NULL); + goto out; + } + + ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + "failed to " + "unserialize req-buffer to dictionary"); + snprintf(err_str, sizeof(err_str), + "Unable to decode " + "the command"); + goto out; + } else { + dict->extra_stdfree = cli_req.dict.dict_val; + } + + host_uuid = gf_strdup(uuid_utoa(MY_UUID)); + if (host_uuid == NULL) { + snprintf(err_str, sizeof(err_str), + "Failed to get " + "the uuid of local glusterd"); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_UUID_GET_FAIL, + NULL); + ret = -1; + goto out; + } + + ret = dict_set_dynstr(dict, "host-uuid", host_uuid); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=host-uuid", NULL); + goto out; + } + } + + ret = glusterd_op_begin_synctask(req, cli_op, dict); + +out: + if (ret) { + if (err_str[0] == '\0') + snprintf(err_str, sizeof(err_str), "Operation failed"); + ret = glusterd_op_send_cli_response(cli_op, ret, 0, req, dict, err_str); + } + return ret; +} + +int +__glusterd_handle_copy_file(rpcsvc_request_t *req) +{ + int32_t ret = 0; + dict_t *dict = NULL; + gf_cli_req cli_req = { + {0}, + }; + glusterd_op_t cli_op = GD_OP_COPY_FILE; + glusterd_conf_t *priv = NULL; + char *host_uuid = NULL; + char err_str[64] = { + 0, + }; + xlator_t *this = NULL; + + GF_ASSERT(req); + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + req->rpc_err = GARBAGE_ARGS; + snprintf(err_str, sizeof(err_str), "Garbage args received"); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL); + goto out; + } + + if (cli_req.dict.dict_len) { + dict = dict_new(); + if (!dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, + NULL); + goto out; + } + + ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + "failed to" + "unserialize req-buffer to dictionary"); + snprintf(err_str, sizeof(err_str), + "Unable to decode " + "the command"); + goto out; + } else { + dict->extra_stdfree = cli_req.dict.dict_val; + } + + host_uuid = gf_strdup(uuid_utoa(MY_UUID)); + if (host_uuid == NULL) { + snprintf(err_str, sizeof(err_str), + "Failed to get " + "the uuid of local glusterd"); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_UUID_GET_FAIL, + NULL); + ret = -1; + goto out; + } + + ret = dict_set_dynstr(dict, "host-uuid", host_uuid); + if (ret) + goto out; + } + + ret = glusterd_op_begin_synctask(req, cli_op, dict); + +out: + if (ret) { + if (err_str[0] == '\0') + snprintf(err_str, sizeof(err_str), "Operation failed"); + ret = glusterd_op_send_cli_response(cli_op, ret, 0, req, dict, err_str); + } + return ret; +} + +int +__glusterd_handle_gsync_set(rpcsvc_request_t *req) +{ + int32_t ret = 0; + dict_t *dict = NULL; + gf_cli_req cli_req = { + {0}, + }; + glusterd_op_t cli_op = GD_OP_GSYNC_SET; + char *master = NULL; + char *slave = NULL; + char operation[64] = { + 0, + }; + int type = 0; + glusterd_conf_t *priv = NULL; + char *host_uuid = NULL; + char err_str[64] = { + 0, + }; + xlator_t *this = NULL; + + GF_ASSERT(req); + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + req->rpc_err = GARBAGE_ARGS; + snprintf(err_str, sizeof(err_str), "Garbage args received"); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL); + goto out; + } + + if (cli_req.dict.dict_len) { + dict = dict_new(); + if (!dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, + NULL); + goto out; + } + + ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + "failed to " + "unserialize req-buffer to dictionary"); + snprintf(err_str, sizeof(err_str), + "Unable to decode " + "the command"); + goto out; + } else { + dict->extra_stdfree = cli_req.dict.dict_val; + } + + host_uuid = gf_strdup(uuid_utoa(MY_UUID)); + if (host_uuid == NULL) { + snprintf(err_str, sizeof(err_str), + "Failed to get " + "the uuid of local glusterd"); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_UUID_GET_FAIL, + NULL); + ret = -1; + goto out; + } + ret = dict_set_dynstr(dict, "host-uuid", host_uuid); + if (ret) + goto out; + } + + ret = dict_get_str(dict, "master", &master); + if (ret < 0) { + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_DICT_GET_FAILED, + "master not found, while handling " GEOREP " options"); + master = "(No Master)"; + } + + ret = dict_get_str(dict, "slave", &slave); + if (ret < 0) { + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_DICT_GET_FAILED, + "slave not found, while handling " GEOREP " options"); + slave = "(No Slave)"; + } + + ret = dict_get_int32(dict, "type", &type); + if (ret < 0) { + snprintf(err_str, sizeof(err_str), + "Command type not found " + "while handling " GEOREP " options"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", + err_str); + goto out; + } + + switch (type) { + case GF_GSYNC_OPTION_TYPE_CREATE: + snprintf(operation, sizeof(operation), "create"); + cli_op = GD_OP_GSYNC_CREATE; + break; + + case GF_GSYNC_OPTION_TYPE_START: + snprintf(operation, sizeof(operation), "start"); + break; + + case GF_GSYNC_OPTION_TYPE_STOP: + snprintf(operation, sizeof(operation), "stop"); + break; + + case GF_GSYNC_OPTION_TYPE_PAUSE: + snprintf(operation, sizeof(operation), "pause"); + break; + + case GF_GSYNC_OPTION_TYPE_RESUME: + snprintf(operation, sizeof(operation), "resume"); + break; + + case GF_GSYNC_OPTION_TYPE_CONFIG: + snprintf(operation, sizeof(operation), "config"); + break; + + case GF_GSYNC_OPTION_TYPE_STATUS: + snprintf(operation, sizeof(operation), "status"); + break; + } + + ret = glusterd_op_begin_synctask(req, cli_op, dict); + +out: + if (ret) { + if (err_str[0] == '\0') + snprintf(err_str, sizeof(err_str), "Operation failed"); + ret = glusterd_op_send_cli_response(cli_op, ret, 0, req, dict, err_str); + } + return ret; +} + +int +glusterd_handle_sys_exec(rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler(req, __glusterd_handle_sys_exec); +} + +int +glusterd_handle_copy_file(rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler(req, __glusterd_handle_copy_file); +} + +int +glusterd_handle_gsync_set(rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler(req, __glusterd_handle_gsync_set); +} + +/***** + * + * glusterd_urltransform* internal API + * + *****/ + +static void +glusterd_urltransform_init(runner_t *runner, const char *transname) +{ + runinit(runner); + runner_add_arg(runner, GSYNCD_PREFIX "/gsyncd"); + set_gsyncd_inet6_arg(runner); + runner_argprintf(runner, "--%s-url", transname); +} + +static void +glusterd_urltransform_add(runner_t *runner, const char *url) +{ + runner_add_arg(runner, url); +} + +/* Helper routine to terminate just before slave_voluuid */ +static int32_t +parse_slave_url(char *slv_url, char **slave) +{ + char *tmp = NULL; + xlator_t *this = NULL; + int32_t ret = -1; + + this = THIS; + + /* slave format: + * master_node_uuid:ssh://slave_host::slave_vol:slave_voluuid */ + *slave = strchr(slv_url, ':'); + if (!(*slave)) { + goto out; + } + (*slave)++; + + /* To terminate at : before slave volume uuid */ + tmp = strstr(*slave, "::"); + if (!tmp) { + goto out; + } + tmp += 2; + tmp = strchr(tmp, ':'); + if (!tmp) + gf_msg_debug(this->name, 0, "old slave: %s!", *slave); + else + *tmp = '\0'; + + ret = 0; + gf_msg_debug(this->name, 0, "parsed slave: %s!", *slave); +out: + return ret; +} + +static int +_glusterd_urltransform_add_iter(dict_t *dict, char *key, data_t *value, + void *data) +{ + runner_t *runner = (runner_t *)data; + char slv_url[VOLINFO_SLAVE_URL_MAX] = {0}; + char *slave = NULL; + xlator_t *this = NULL; + int32_t ret = -1; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + + gf_msg_debug(this->name, 0, "value->data %s", value->data); + + if (snprintf(slv_url, sizeof(slv_url), "%s", value->data) >= + sizeof(slv_url)) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVE_VOL_PARSE_FAIL, + "Error in copying slave: %s!", value->data); + goto out; + } + + ret = parse_slave_url(slv_url, &slave); + if (ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVE_VOL_PARSE_FAIL, + "Error in parsing slave: %s!", value->data); + goto out; + } + + runner_add_arg(runner, slave); + ret = 0; +out: + return ret; +} + +static void +glusterd_urltransform_free(char **linearr, unsigned n) +{ + int i = 0; + + for (; i < n; i++) + GF_FREE(linearr[i]); + + GF_FREE(linearr); +} + +static int +glusterd_urltransform(runner_t *runner, char ***linearrp) +{ + char **linearr = NULL; + char *line = NULL; + unsigned arr_len = 32; + unsigned arr_idx = 0; + gf_boolean_t error = _gf_false; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + linearr = GF_CALLOC(arr_len, sizeof(char *), gf_gld_mt_linearr); + if (!linearr) { + error = _gf_true; + goto out; + } + + runner_redir(runner, STDOUT_FILENO, RUN_PIPE); + if (runner_start(runner) != 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SPAWNING_CHILD_FAILED, + "spawning child failed"); + + error = _gf_true; + goto out; + } + + arr_idx = 0; + for (;;) { + size_t len; + line = GF_MALLOC(1024, gf_gld_mt_linebuf); + if (!line) { + error = _gf_true; + goto out; + } + + if (fgets(line, 1024, runner_chio(runner, STDOUT_FILENO)) == NULL) { + GF_FREE(line); + break; + } + + len = strlen(line); + if (len == 0 || line[len - 1] != '\n') { + GF_FREE(line); + error = _gf_true; + goto out; + } + line[len - 1] = '\0'; + + if (arr_idx == arr_len) { + void *p = linearr; + arr_len <<= 1; + p = GF_REALLOC(linearr, arr_len); + if (!p) { + GF_FREE(line); + error = _gf_true; + goto out; + } + linearr = p; + } + linearr[arr_idx] = line; + + arr_idx++; + } + +out: + + /* XXX chpid field is not exported by run API + * but runner_end() does not abort the invoked + * process (ie. it might block in waitpid(2)) + * so we resort to a manual kill a the private field + */ + if (error && runner->chpid > 0) + kill(runner->chpid, SIGKILL); + + if (runner_end(runner) != 0) + error = _gf_true; + + if (error) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_READ_CHILD_DATA_FAILED, + "reading data from child failed"); + glusterd_urltransform_free(linearr, arr_idx); + return -1; + } + + *linearrp = linearr; + return arr_idx; +} + +static int +glusterd_urltransform_single(const char *url, const char *transname, + char ***linearrp) +{ + runner_t runner = { + 0, + }; + + glusterd_urltransform_init(&runner, transname); + glusterd_urltransform_add(&runner, url); + return glusterd_urltransform(&runner, linearrp); +} + +struct dictidxmark { + unsigned isrch; + unsigned ithis; + char *ikey; +}; + +struct slave_vol_config { + char old_slvhost[_POSIX_HOST_NAME_MAX + 1]; + char old_slvuser[LOGIN_NAME_MAX]; + unsigned old_slvidx; + char slave_voluuid[UUID_CANONICAL_FORM_LEN + 1]; +}; + +static int +_dict_mark_atindex(dict_t *dict, char *key, data_t *value, void *data) +{ + struct dictidxmark *dim = data; + + if (dim->isrch == dim->ithis) + dim->ikey = key; + + dim->ithis++; + return 0; +} + +static char * +dict_get_by_index(dict_t *dict, unsigned i) +{ + struct dictidxmark dim = { + 0, + }; + + dim.isrch = i; + dict_foreach(dict, _dict_mark_atindex, &dim); + + return dim.ikey; +} + +static int +glusterd_get_slave(glusterd_volinfo_t *vol, const char *slaveurl, + char **slavekey) +{ + runner_t runner = { + 0, + }; + int n = 0; + int i = 0; + char **linearr = NULL; + int32_t ret = 0; + + glusterd_urltransform_init(&runner, "canonicalize"); + ret = dict_foreach(vol->gsync_slaves, _glusterd_urltransform_add_iter, + &runner); + if (ret < 0) + return -2; + + glusterd_urltransform_add(&runner, slaveurl); + + n = glusterd_urltransform(&runner, &linearr); + if (n == -1) + return -2; + + for (i = 0; i < n - 1; i++) { + if (strcmp(linearr[i], linearr[n - 1]) == 0) + break; + } + glusterd_urltransform_free(linearr, n); + + if (i < n - 1) + *slavekey = dict_get_by_index(vol->gsync_slaves, i); + else + i = -1; + + return i; +} + +static int +glusterd_query_extutil_generic(char *resbuf, size_t blen, runner_t *runner, + void *data, + int (*fcbk)(char *resbuf, size_t blen, FILE *fp, + void *data)) +{ + int ret = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + runner_redir(runner, STDOUT_FILENO, RUN_PIPE); + if (runner_start(runner) != 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SPAWNING_CHILD_FAILED, + "spawning child failed"); + + return -1; + } + + ret = fcbk(resbuf, blen, runner_chio(runner, STDOUT_FILENO), data); + + ret |= runner_end(runner); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_READ_CHILD_DATA_FAILED, + "reading data from child failed"); + + return ret ? -1 : 0; +} + +static int +_fcbk_singleline(char *resbuf, size_t blen, FILE *fp, void *data) +{ + char *ptr = NULL; + + errno = 0; + ptr = fgets(resbuf, blen, fp); + if (ptr) { + size_t len = strlen(resbuf); + if (len && resbuf[len - 1] == '\n') + resbuf[len - 1] = '\0'; // strip off \n + } + + return errno ? -1 : 0; +} + +static int +glusterd_query_extutil(char *resbuf, runner_t *runner) +{ + return glusterd_query_extutil_generic(resbuf, PATH_MAX, runner, NULL, + _fcbk_singleline); +} + +static int +glusterd_get_slave_voluuid(char *slave_host, char *slave_vol, char *vol_uuid) +{ + runner_t runner = { + 0, + }; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + int ret = -1; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, out); + + runinit(&runner); + runner_add_arg(&runner, GSYNCD_PREFIX "/gsyncd"); + set_gsyncd_inet6_arg(&runner); + runner_add_arg(&runner, "--slavevoluuid-get"); + runner_argprintf(&runner, "%s::%s", slave_host, slave_vol); + + synclock_unlock(&priv->big_lock); + ret = glusterd_query_extutil(vol_uuid, &runner); + synclock_lock(&priv->big_lock); + +out: + return ret; +} + +static int +_fcbk_conftodict(char *resbuf, size_t blen, FILE *fp, void *data) +{ + char *ptr = NULL; + dict_t *dict = data; + char *v = NULL; + + for (;;) { + errno = 0; + ptr = fgets(resbuf, blen - 2, fp); + if (!ptr) + break; + v = resbuf + strlen(resbuf) - 1; + while (isspace(*v)) + /* strip trailing space */ + *v-- = '\0'; + if (v == resbuf) + /* skip empty line */ + continue; + v = strchr(resbuf, ':'); + if (!v) + return -1; + *v++ = '\0'; + while (isspace(*v)) + v++; + v = gf_strdup(v); + if (!v) + return -1; + if (dict_set_dynstr(dict, resbuf, v) != 0) { + GF_FREE(v); + return -1; + } + } + + return errno ? -1 : 0; +} + +static int +glusterd_gsync_get_config(char *master, char *slave, char *conf_path, + dict_t *dict) +{ + /* key + value, where value must be able to accommodate a path */ + char resbuf[256 + PATH_MAX] = { + 0, + }; + runner_t runner = { + 0, + }; + + runinit(&runner); + runner_add_args(&runner, GSYNCD_PREFIX "/gsyncd", "-c", NULL); + runner_argprintf(&runner, "%s", conf_path); + set_gsyncd_inet6_arg(&runner); + runner_argprintf(&runner, "--iprefix=%s", DATADIR); + runner_argprintf(&runner, ":%s", master); + runner_add_args(&runner, slave, "--config-get-all", NULL); + + return glusterd_query_extutil_generic(resbuf, sizeof(resbuf), &runner, dict, + _fcbk_conftodict); +} + +static int +_fcbk_statustostruct(char *resbuf, size_t blen, FILE *fp, void *data) +{ + char *ptr = NULL; + char *v = NULL; + char *k = NULL; + gf_gsync_status_t *sts_val = NULL; + size_t len = 0; + + sts_val = (gf_gsync_status_t *)data; + + for (;;) { + errno = 0; + ptr = fgets(resbuf, blen - 2, fp); + if (!ptr) + break; + + v = resbuf + strlen(resbuf) - 1; + while (isspace(*v)) + /* strip trailing space */ + *v-- = '\0'; + if (v == resbuf) + /* skip empty line */ + continue; + v = strchr(resbuf, ':'); + if (!v) + return -1; + *v++ = '\0'; + while (isspace(*v)) + v++; + v = gf_strdup(v); + if (!v) + return -1; + + k = gf_strdup(resbuf); + if (!k) { + GF_FREE(v); + return -1; + } + + if (strcmp(k, "worker_status") == 0) { + len = min(strlen(v), (sizeof(sts_val->worker_status) - 1)); + memcpy(sts_val->worker_status, v, len); + sts_val->worker_status[len] = '\0'; + } else if (strcmp(k, "slave_node") == 0) { + len = min(strlen(v), (sizeof(sts_val->slave_node) - 1)); + memcpy(sts_val->slave_node, v, len); + sts_val->slave_node[len] = '\0'; + } else if (strcmp(k, "crawl_status") == 0) { + len = min(strlen(v), (sizeof(sts_val->crawl_status) - 1)); + memcpy(sts_val->crawl_status, v, len); + sts_val->crawl_status[len] = '\0'; + } else if (strcmp(k, "last_synced") == 0) { + len = min(strlen(v), (sizeof(sts_val->last_synced) - 1)); + memcpy(sts_val->last_synced, v, len); + sts_val->last_synced[len] = '\0'; + } else if (strcmp(k, "last_synced_utc") == 0) { + len = min(strlen(v), (sizeof(sts_val->last_synced_utc) - 1)); + memcpy(sts_val->last_synced_utc, v, len); + sts_val->last_synced_utc[len] = '\0'; + } else if (strcmp(k, "entry") == 0) { + len = min(strlen(v), (sizeof(sts_val->entry) - 1)); + memcpy(sts_val->entry, v, len); + sts_val->entry[len] = '\0'; + } else if (strcmp(k, "data") == 0) { + len = min(strlen(v), (sizeof(sts_val->data) - 1)); + memcpy(sts_val->data, v, len); + sts_val->data[len] = '\0'; + } else if (strcmp(k, "meta") == 0) { + len = min(strlen(v), (sizeof(sts_val->meta) - 1)); + memcpy(sts_val->meta, v, len); + sts_val->meta[len] = '\0'; + } else if (strcmp(k, "failures") == 0) { + len = min(strlen(v), (sizeof(sts_val->failures) - 1)); + memcpy(sts_val->failures, v, len); + sts_val->failures[len] = '\0'; + } else if (strcmp(k, "checkpoint_time") == 0) { + len = min(strlen(v), (sizeof(sts_val->checkpoint_time) - 1)); + memcpy(sts_val->checkpoint_time, v, len); + sts_val->checkpoint_time[len] = '\0'; + } else if (strcmp(k, "checkpoint_time_utc") == 0) { + len = min(strlen(v), (sizeof(sts_val->checkpoint_time_utc) - 1)); + memcpy(sts_val->checkpoint_time_utc, v, len); + sts_val->checkpoint_time_utc[len] = '\0'; + } else if (strcmp(k, "checkpoint_completed") == 0) { + len = min(strlen(v), (sizeof(sts_val->checkpoint_completed) - 1)); + memcpy(sts_val->checkpoint_completed, v, len); + sts_val->checkpoint_completed[len] = '\0'; + } else if (strcmp(k, "checkpoint_completion_time") == 0) { + len = min(strlen(v), + (sizeof(sts_val->checkpoint_completion_time) - 1)); + memcpy(sts_val->checkpoint_completion_time, v, len); + sts_val->checkpoint_completion_time[len] = '\0'; + } else if (strcmp(k, "checkpoint_completion_time_utc") == 0) { + len = min(strlen(v), + (sizeof(sts_val->checkpoint_completion_time_utc) - 1)); + memcpy(sts_val->checkpoint_completion_time_utc, v, len); + sts_val->checkpoint_completion_time_utc[len] = '\0'; + } + GF_FREE(v); + GF_FREE(k); + } + + return errno ? -1 : 0; +} + +static int +glusterd_gsync_get_status(char *master, char *slave, char *conf_path, + char *brick_path, gf_gsync_status_t *sts_val) +{ + /* key + value, where value must be able to accommodate a path */ + char resbuf[256 + PATH_MAX] = { + 0, + }; + runner_t runner = { + 0, + }; + + runinit(&runner); + runner_add_args(&runner, GSYNCD_PREFIX "/gsyncd", "-c", NULL); + runner_argprintf(&runner, "%s", conf_path); + set_gsyncd_inet6_arg(&runner); + runner_argprintf(&runner, "--iprefix=%s", DATADIR); + runner_argprintf(&runner, ":%s", master); + runner_add_args(&runner, slave, "--status-get", NULL); + runner_add_args(&runner, "--path", brick_path, NULL); + + return glusterd_query_extutil_generic(resbuf, sizeof(resbuf), &runner, + sts_val, _fcbk_statustostruct); +} + +static int +glusterd_gsync_get_param_file(char *prmfile, const char *param, char *master, + char *slave, char *conf_path) +{ + runner_t runner = { + 0, + }; + + runinit(&runner); + runner_add_args(&runner, GSYNCD_PREFIX "/gsyncd", "-c", NULL); + runner_argprintf(&runner, "%s", conf_path); + set_gsyncd_inet6_arg(&runner); + runner_argprintf(&runner, "--iprefix=%s", DATADIR); + runner_argprintf(&runner, ":%s", master); + runner_add_args(&runner, slave, "--config-get", NULL); + runner_argprintf(&runner, "%s-file", param); + + return glusterd_query_extutil(prmfile, &runner); +} + +static int +gsyncd_getpidfile(char *master, char *slave, char *pidfile, char *conf_path, + gf_boolean_t *is_template_in_use) +{ + char temp_conf_path[PATH_MAX] = ""; + char *working_conf_path = NULL; + glusterd_conf_t *priv = NULL; + int ret = -1; + struct stat stbuf = { + 0, + }; + xlator_t *this = NULL; + int32_t len = 0; + + this = THIS; + GF_ASSERT(this); + + GF_ASSERT(this->private); + GF_ASSERT(conf_path); + + priv = this->private; + + GF_VALIDATE_OR_GOTO("gsync", master, out); + GF_VALIDATE_OR_GOTO("gsync", slave, out); + + len = snprintf(temp_conf_path, sizeof(temp_conf_path), + "%s/" GSYNC_CONF_TEMPLATE, priv->workdir); + if ((len < 0) || (len >= sizeof(temp_conf_path))) { + goto out; + } + + ret = sys_lstat(conf_path, &stbuf); + if (!ret) { + gf_msg_debug(this->name, 0, "Using passed config template(%s).", + conf_path); + working_conf_path = conf_path; + } else { + gf_msg(this->name, GF_LOG_WARNING, ENOENT, GD_MSG_FILE_OP_FAILED, + "Config file (%s) missing. Looking for template " + "config file (%s)", + conf_path, temp_conf_path); + ret = sys_lstat(temp_conf_path, &stbuf); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, ENOENT, GD_MSG_FILE_OP_FAILED, + "Template config file (%s) missing.", temp_conf_path); + goto out; + } + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_DEFAULT_TEMP_CONFIG, + "Using default config template(%s).", temp_conf_path); + working_conf_path = temp_conf_path; + *is_template_in_use = _gf_true; + } + +fetch_data: + + ret = glusterd_gsync_get_param_file(pidfile, "pid", master, slave, + working_conf_path); + if ((ret == -1) || strlen(pidfile) == 0) { + if (*is_template_in_use == _gf_false) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_PIDFILE_CREATE_FAILED, + "failed to create the pidfile string. " + "Trying default config template"); + working_conf_path = temp_conf_path; + *is_template_in_use = _gf_true; + goto fetch_data; + } else { + ret = -2; + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_PIDFILE_CREATE_FAILED, + "failed to " + "create the pidfile string from template " + "config"); + goto out; + } + } + + gf_msg_debug(this->name, 0, "pidfile = %s", pidfile); + + ret = open(pidfile, O_RDWR); +out: + return ret; +} + +static int +gsync_status_byfd(int fd) +{ + GF_ASSERT(fd >= -1); + + if (lockf(fd, F_TEST, 0) == -1 && (errno == EAGAIN || errno == EACCES)) + /* gsyncd keeps the pidfile locked */ + return 0; + + return -1; +} + +/* status: return 0 when gsync is running + * return -1 when not running + */ +int +gsync_status(char *master, char *slave, char *conf_path, int *status, + gf_boolean_t *is_template_in_use) +{ + char pidfile[PATH_MAX] = { + 0, + }; + int fd = -1; + + fd = gsyncd_getpidfile(master, slave, pidfile, conf_path, + is_template_in_use); + if (fd == -2) + return -1; + + *status = gsync_status_byfd(fd); + + sys_close(fd); + + return 0; +} + +static int32_t +glusterd_gsync_volinfo_dict_set(glusterd_volinfo_t *volinfo, char *key, + char *value) +{ + int32_t ret = -1; + char *gsync_status = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + gsync_status = gf_strdup(value); + if (!gsync_status) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "Unable to allocate memory"); + goto out; + } + + ret = dict_set_dynstr(volinfo->dict, key, gsync_status); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to set dict"); + goto out; + } + + ret = 0; +out: + return ret; +} + +static int +glusterd_verify_gsyncd_spawn(char *master, char *slave) +{ + int ret = 0; + runner_t runner = { + 0, + }; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + runinit(&runner); + runner_add_args(&runner, GSYNCD_PREFIX "/gsyncd", "--verify", "spawning", + NULL); + runner_argprintf(&runner, ":%s", master); + runner_add_args(&runner, slave, NULL); + runner_redir(&runner, STDOUT_FILENO, RUN_PIPE); + ret = runner_start(&runner); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SPAWNING_CHILD_FAILED, + "spawning child failed"); + ret = -1; + goto out; + } + + if (runner_end(&runner) != 0) + ret = -1; + +out: + gf_msg_debug(this->name, 0, "returning %d", ret); + return ret; +} + +static int +gsync_verify_config_options(dict_t *dict, char **op_errstr, char *volname) +{ + char **resopt = NULL; + int i = 0; + int ret = -1; + char *subop = NULL; + char *slave = NULL; + char *op_name = NULL; + char *op_value = NULL; + char *t = NULL; + char errmsg[PATH_MAX] = ""; + gf_boolean_t banned = _gf_true; + gf_boolean_t op_match = _gf_true; + gf_boolean_t val_match = _gf_true; + struct gsync_config_opt_vals_ *conf_vals = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + if (dict_get_str(dict, "subop", &subop) != 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED, + "missing subop"); + *op_errstr = gf_strdup("Invalid config request"); + return -1; + } + + if (dict_get_str(dict, "slave", &slave) != 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED, + GEOREP " CONFIG: no slave given"); + *op_errstr = gf_strdup("Slave required"); + return -1; + } + + if (strcmp(subop, "get-all") == 0) + return 0; + + if (dict_get_str(dict, "op_name", &op_name) != 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED, + "option name missing"); + *op_errstr = gf_strdup("Option name missing"); + return -1; + } + + if (runcmd(GSYNCD_PREFIX "/gsyncd", "--config-check", op_name, NULL)) { + ret = glusterd_verify_gsyncd_spawn(volname, slave); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GSYNCD_SPAWN_FAILED, + "Unable to spawn " + "gsyncd"); + return 0; + } + + gf_msg(this->name, GF_LOG_WARNING, EINVAL, GD_MSG_INVALID_ENTRY, + "Invalid option %s", op_name); + *op_errstr = gf_strdup("Invalid option"); + + return -1; + } + + if (strcmp(subop, "get") == 0) + return 0; + + t = strtail(subop, "set"); + if (!t) + t = strtail(subop, "del"); + if (!t || (t[0] && strcmp(t, "-glob") != 0)) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SUBOP_NOT_FOUND, + "unknown subop %s", subop); + *op_errstr = gf_strdup("Invalid config request"); + return -1; + } + + if (strtail(subop, "set") && + dict_get_str(dict, "op_value", &op_value) != 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED, + "missing value for set"); + *op_errstr = gf_strdup("missing value"); + } + + /* match option name against reserved options, modulo -/_ + * difference + */ + for (resopt = gsync_reserved_opts; *resopt; resopt++) { + banned = _gf_true; + for (i = 0; (*resopt)[i] && op_name[i]; i++) { + if ((*resopt)[i] == op_name[i] || + ((*resopt)[i] == '-' && op_name[i] == '_')) + continue; + banned = _gf_false; + } + + if (op_name[i] != '\0') + banned = _gf_false; + + if (banned) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_RESERVED_OPTION, + "Reserved option %s", op_name); + *op_errstr = gf_strdup("Reserved option"); + + return -1; + break; + } + } + + /* Check options in gsync_confopt_vals for invalid values */ + for (conf_vals = gsync_confopt_vals; conf_vals->op_name; conf_vals++) { + op_match = _gf_true; + for (i = 0; conf_vals->op_name[i] && op_name[i]; i++) { + if (conf_vals->op_name[i] == op_name[i] || + (conf_vals->op_name[i] == '_' && op_name[i] == '-')) + continue; + op_match = _gf_false; + } + + if (op_match) { + if (!op_value) + goto out; + val_match = _gf_false; + for (i = 0; i < conf_vals->no_of_pos_vals; i++) { + if (conf_vals->case_sensitive) { + if (!strcmp(conf_vals->values[i], op_value)) + val_match = _gf_true; + } else { + if (!strcasecmp(conf_vals->values[i], op_value)) + val_match = _gf_true; + } + } + + if (!val_match) { + ret = snprintf(errmsg, sizeof(errmsg) - 1, + "Invalid value(%s) for" + " option %s", + op_value, op_name); + errmsg[ret] = '\0'; + + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, + "%s", errmsg); + *op_errstr = gf_strdup(errmsg); + return -1; + } + } + } +out: + return 0; +} + +static int +glusterd_get_gsync_status_mst_slv(glusterd_volinfo_t *volinfo, char *slave, + char *conf_path, dict_t *rsp_dict, + char *node); + +static int +_get_status_mst_slv(dict_t *dict, char *key, data_t *value, void *data) +{ + glusterd_gsync_status_temp_t *param = NULL; + char *slave = NULL; + char *slave_buf = NULL; + char *slave_url = NULL; + char *slave_vol = NULL; + char *slave_host = NULL; + char *errmsg = NULL; + char conf_path[PATH_MAX] = ""; + int ret = -1; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + char slv_url[VOLINFO_SLAVE_URL_MAX] = {0}; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + + param = (glusterd_gsync_status_temp_t *)data; + + GF_VALIDATE_OR_GOTO(this->name, param, out); + GF_VALIDATE_OR_GOTO(this->name, param->volinfo, out); + + if (this) + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, out); + + if (snprintf(slv_url, sizeof(slv_url), "%s", value->data) >= + sizeof(slv_url)) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVE_VOL_PARSE_FAIL, + "Error in copying slave: %s!", value->data); + goto out; + } + + ret = parse_slave_url(slv_url, &slave); + if (ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVE_VOL_PARSE_FAIL, + "Error in parsing slave: %s!", value->data); + goto out; + } + + ret = glusterd_get_slave_info(slave, &slave_url, &slave_host, &slave_vol, + &errmsg); + if (ret) { + if (errmsg) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVEINFO_FETCH_ERROR, + "Unable to fetch slave details. Error: %s", errmsg); + else + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVEINFO_FETCH_ERROR, + "Unable to fetch slave details."); + ret = -1; + goto out; + } + + ret = snprintf(conf_path, sizeof(conf_path) - 1, + "%s/" GEOREP "/%s_%s_%s/gsyncd.conf", priv->workdir, + param->volinfo->volname, slave_host, slave_vol); + conf_path[ret] = '\0'; + + ret = glusterd_get_gsync_status_mst_slv(param->volinfo, slave, conf_path, + param->rsp_dict, param->node); +out: + + if (errmsg) + GF_FREE(errmsg); + + if (slave_buf) + GF_FREE(slave_buf); + + if (slave_vol) + GF_FREE(slave_vol); + + if (slave_url) + GF_FREE(slave_url); + + if (slave_host) + GF_FREE(slave_host); + + gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d.", ret); + return ret; +} + +static int +_get_max_gsync_slave_num(dict_t *dict, char *key, data_t *value, void *data) +{ + int tmp_slvnum = 0; + int *slvnum = (int *)data; + + sscanf(key, "slave%d", &tmp_slvnum); + if (tmp_slvnum > *slvnum) + *slvnum = tmp_slvnum; + + return 0; +} + +static int +_get_slave_idx_slave_voluuid(dict_t *dict, char *key, data_t *value, void *data) +{ + char *slave_info = NULL; + xlator_t *this = NULL; + struct slave_vol_config *slave_cfg = NULL; + int i = 0; + int ret = -1; + unsigned tmp_slvnum = 0; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + + slave_cfg = data; + + if (value) + slave_info = value->data; + + if (!(slave_info) || strlen(slave_info) == 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_SLAVE, + "Invalid slave in dict"); + ret = -2; + goto out; + } + + /* slave format: + * master_node_uuid:ssh://slave_host::slave_vol:slave_voluuid */ + while (i++ < 5) { + slave_info = strchr(slave_info, ':'); + if (slave_info) + slave_info++; + else { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVE_VOL_PARSE_FAIL, + "slave_info becomes NULL!"); + ret = -2; + goto out; + } + } + if (strcmp(slave_info, slave_cfg->slave_voluuid) == 0) { + gf_msg_debug(this->name, 0, + "Same slave volume " + "already present %s", + slave_cfg->slave_voluuid); + ret = -1; + + sscanf(key, "slave%d", &tmp_slvnum); + slave_cfg->old_slvidx = tmp_slvnum; + + gf_msg_debug(this->name, 0, + "and " + "its index is: %d", + tmp_slvnum); + goto out; + } + + ret = 0; +out: + return ret; +} + +static int +glusterd_remove_slave_in_info(glusterd_volinfo_t *volinfo, char *slave, + char **op_errstr) +{ + int zero_slave_entries = _gf_true; + int ret = 0; + char *slavekey = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + GF_ASSERT(volinfo); + GF_ASSERT(slave); + + do { + ret = glusterd_get_slave(volinfo, slave, &slavekey); + if (ret < 0 && zero_slave_entries) { + ret++; + goto out; + } + zero_slave_entries = _gf_false; + dict_del(volinfo->gsync_slaves, slavekey); + } while (ret >= 0); + + ret = glusterd_store_volinfo(volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) { + *op_errstr = gf_strdup( + "Failed to store the Volume" + "information"); + goto out; + } +out: + gf_msg_debug(this->name, 0, "returning %d", ret); + return ret; +} + +static int +glusterd_gsync_get_uuid(char *slave, glusterd_volinfo_t *vol, uuid_t uuid) +{ + int ret = 0; + char *slavekey = NULL; + char *slaveentry = NULL; + char *t = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + GF_ASSERT(vol); + GF_ASSERT(slave); + + ret = glusterd_get_slave(vol, slave, &slavekey); + if (ret < 0) { + /* XXX colliding cases of failure and non-extant + * slave... now just doing this as callers of this + * function can make sense only of -1 and 0 as retvals; + * getting at the proper semanticals will involve + * fixing callers as well. + */ + ret = -1; + goto out; + } + + ret = dict_get_str(vol->gsync_slaves, slavekey, &slaveentry); + GF_ASSERT(ret == 0); + + t = strchr(slaveentry, ':'); + GF_ASSERT(t); + *t = '\0'; + ret = gf_uuid_parse(slaveentry, uuid); + *t = ':'; + +out: + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; +} + +static int +update_slave_voluuid(dict_t *dict, char *key, data_t *value, void *data) +{ + char *slave = NULL; + char *slave_url = NULL; + char *slave_vol = NULL; + char *slave_host = NULL; + char *errmsg = NULL; + xlator_t *this = NULL; + int ret = -1; + char slv_url[VOLINFO_SLAVE_URL_MAX] = {0}; + char slave_voluuid[GF_UUID_BUF_SIZE] = {0}; + char *slave_info = NULL; + char *new_value = NULL; + char *same_key = NULL; + int cnt = 0; + gf_boolean_t *voluuid_updated = NULL; + + this = THIS; + + voluuid_updated = data; + slave_info = value->data; + gf_msg_debug(this->name, 0, "slave_info: %s!", slave_info); + + /* old slave format: + * master_node_uuid:ssh://slave_host::slave_vol + * New slave format: + * master_node_uuid:ssh://slave_host::slave_vol:slave_voluuid */ + while (slave_info) { + slave_info = strchr(slave_info, ':'); + if (slave_info) + cnt++; + else + break; + + slave_info++; + } + + gf_msg_debug(this->name, 0, "cnt: %d", cnt); + /* check whether old slave format and update vol uuid if old format. + * With volume uuid, number of ':' is 5 and is 4 without. + */ + if (cnt == 4) { + if (snprintf(slv_url, sizeof(slv_url), "%s", value->data) >= + sizeof(slv_url)) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVE_VOL_PARSE_FAIL, + "Error in copying slave: %s!", value->data); + goto out; + } + + ret = parse_slave_url(slv_url, &slave); + if (ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVE_VOL_PARSE_FAIL, + "Error in parsing slave: %s!", value->data); + goto out; + } + + ret = glusterd_get_slave_info(slave, &slave_url, &slave_host, + &slave_vol, &errmsg); + if (ret) { + if (errmsg) + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_SLAVEINFO_FETCH_ERROR, + "Unable to fetch slave details. Error: %s", errmsg); + else + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_SLAVEINFO_FETCH_ERROR, + "Unable to fetch slave details."); + ret = -1; + goto out; + } + + ret = glusterd_get_slave_voluuid(slave_host, slave_vol, slave_voluuid); + if ((ret) || (strlen(slave_voluuid) == 0)) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REMOTE_VOL_UUID_FAIL, + "Unable to get remote volume uuid" + "slavehost:%s slavevol:%s", + slave_host, slave_vol); + /* Avoiding failure due to remote vol uuid fetch */ + ret = 0; + goto out; + } + ret = gf_asprintf(&new_value, "%s:%s", value->data, slave_voluuid); + ret = gf_asprintf(&same_key, "%s", key); + + /* delete old key and add new value */ + dict_del(dict, key); + + /* set new value for the same key*/ + ret = dict_set_dynstr(dict, same_key, new_value); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REMOTE_VOL_UUID_FAIL, + "Error in setting dict value" + "new_value :%s", + new_value); + goto out; + } + *voluuid_updated = _gf_true; + } + + ret = 0; +out: + if (errmsg) + GF_FREE(errmsg); + + if (slave_url) + GF_FREE(slave_url); + + if (slave_vol) + GF_FREE(slave_vol); + + if (slave_host) + GF_FREE(slave_host); + + gf_msg_debug(this->name, 0, "Returning %d.", ret); + return ret; +} + +static int +glusterd_update_slave_voluuid_slaveinfo(glusterd_volinfo_t *volinfo) +{ + int ret = -1; + xlator_t *this = NULL; + gf_boolean_t voluuid_updated = _gf_false; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + GF_VALIDATE_OR_GOTO(this->name, volinfo, out); + + ret = dict_foreach(volinfo->gsync_slaves, update_slave_voluuid, + &voluuid_updated); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REMOTE_VOL_UUID_FAIL, + "Error in updating" + "volinfo"); + goto out; + } + + if (_gf_true == voluuid_updated) { + ret = glusterd_store_volinfo(volinfo, + GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_STORE_FAIL, + "Error in storing" + "volinfo"); + goto out; + } + } + + ret = 0; +out: + gf_msg_debug((this ? this->name : "glusterd"), 0, "Returning %d", ret); + return ret; +} + +int +glusterd_check_gsync_running_local(char *master, char *slave, char *conf_path, + gf_boolean_t *is_run) +{ + int ret = -1; + int ret_status = 0; + gf_boolean_t is_template_in_use = _gf_false; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + GF_ASSERT(master); + GF_ASSERT(slave); + GF_ASSERT(is_run); + + *is_run = _gf_false; + ret = gsync_status(master, slave, conf_path, &ret_status, + &is_template_in_use); + if (ret == 0 && ret_status == 0) + *is_run = _gf_true; + else if (ret == -1) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VALIDATE_FAILED, + GEOREP " validation failed"); + goto out; + } + ret = 0; +out: + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; +} + +static int +glusterd_store_slave_in_info(glusterd_volinfo_t *volinfo, char *slave, + char *host_uuid, char *slave_voluuid, + char **op_errstr, gf_boolean_t is_force) +{ + int ret = 0; + int maxslv = 0; + char **linearr = NULL; + char *value = NULL; + char *slavekey = NULL; + char *slaveentry = NULL; + char key[32] = { + 0, + }; + int keylen; + char *t = NULL; + xlator_t *this = NULL; + struct slave_vol_config slave1 = { + {0}, + }; + + this = THIS; + GF_ASSERT(this); + + GF_ASSERT(volinfo); + GF_ASSERT(slave); + GF_ASSERT(host_uuid); + GF_VALIDATE_OR_GOTO(this->name, slave_voluuid, out); + + ret = glusterd_get_slave(volinfo, slave, &slavekey); + switch (ret) { + case -2: + ret = -1; + goto out; + case -1: + break; + default: + if (!is_force) + GF_ASSERT(ret > 0); + ret = dict_get_str(volinfo->gsync_slaves, slavekey, &slaveentry); + GF_ASSERT(ret == 0); + + /* same-name + same-uuid slave entries should have been filtered + * out in glusterd_op_verify_gsync_start_options(), so we can + * assert an uuid mismatch + */ + t = strtail(slaveentry, host_uuid); + if (!is_force) + GF_ASSERT(!t || *t != ':'); + + if (is_force) { + gf_msg_debug(this->name, 0, + GEOREP + " has already " + "been invoked for the %s (master) and " + "%s (slave). Allowing without saving " + "info again due to force command.", + volinfo->volname, slave); + ret = 0; + goto out; + } + + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVOKE_ERROR, + GEOREP + " has already been invoked for " + "the %s (master) and %s (slave) from a different " + "machine", + volinfo->volname, slave); + *op_errstr = gf_strdup(GEOREP + " already running in " + "another machine"); + ret = -1; + goto out; + } + + ret = glusterd_urltransform_single(slave, "normalize", &linearr); + if (ret == -1) + goto out; + + ret = gf_asprintf(&value, "%s:%s:%s", host_uuid, linearr[0], slave_voluuid); + + glusterd_urltransform_free(linearr, 1); + if (ret == -1) + goto out; + + /* Given the slave volume uuid, check and get any existing slave */ + memcpy(slave1.slave_voluuid, slave_voluuid, UUID_CANONICAL_FORM_LEN); + ret = dict_foreach(volinfo->gsync_slaves, _get_slave_idx_slave_voluuid, + &slave1); + + if (ret == 0) { /* New slave */ + dict_foreach(volinfo->gsync_slaves, _get_max_gsync_slave_num, &maxslv); + keylen = snprintf(key, sizeof(key), "slave%d", maxslv + 1); + + ret = dict_set_dynstrn(volinfo->gsync_slaves, key, keylen, value); + if (ret) { + GF_FREE(value); + goto out; + } + } else if (ret == -1) { /* Existing slave */ + keylen = snprintf(key, sizeof(key), "slave%d", slave1.old_slvidx); + + gf_msg_debug(this->name, 0, + "Replacing key:%s with new value" + ":%s", + key, value); + + /* Add new slave's value, with the same slave index */ + ret = dict_set_dynstrn(volinfo->gsync_slaves, key, keylen, value); + if (ret) { + GF_FREE(value); + goto out; + } + } else { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REMOTE_VOL_UUID_FAIL, + "_get_slave_idx_slave_voluuid failed!"); + GF_FREE(value); + ret = -1; + goto out; + } + + ret = glusterd_store_volinfo(volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) { + *op_errstr = gf_strdup( + "Failed to store the Volume " + "information"); + goto out; + } + ret = 0; +out: + return ret; +} + +static int +glusterd_op_verify_gsync_start_options(glusterd_volinfo_t *volinfo, char *slave, + char *conf_path, char *statefile, + char **op_errstr, gf_boolean_t is_force) +{ + int ret = -1; + int ret_status = 0; + gf_boolean_t is_template_in_use = _gf_false; + char msg[2048] = {0}; + uuid_t uuid = {0}; + xlator_t *this = NULL; + struct stat stbuf = { + 0, + }; + char statefiledir[PATH_MAX] = { + 0, + }; + char *statedir = NULL; + + this = THIS; + GF_ASSERT(this); + + GF_ASSERT(volinfo); + GF_ASSERT(slave); + GF_ASSERT(op_errstr); + GF_ASSERT(conf_path); + GF_ASSERT(this && this->private); + + if (GLUSTERD_STATUS_STARTED != volinfo->status) { + snprintf(msg, sizeof(msg), + "Volume %s needs to be started " + "before " GEOREP " start", + volinfo->volname); + goto out; + } + + /* check session directory as statefile may not present + * during upgrade */ + if (snprintf(statefiledir, sizeof(statefiledir), "%s", statefile) >= + sizeof(statefiledir)) { + snprintf(msg, sizeof(msg), "statefiledir truncated"); + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, "%s", + msg); + *op_errstr = gf_strdup(msg); + goto out; + } + statedir = dirname(statefiledir); + + ret = sys_lstat(statedir, &stbuf); + if (ret) { + snprintf(msg, sizeof(msg), + "Session between %s and %s has" + " not been created. Please create session and retry.", + volinfo->volname, slave); + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "%s statefile: %s", msg, statefile); + *op_errstr = gf_strdup(msg); + goto out; + } + + /* Check if the gsync slave info is stored. If not + * session has not been created */ + ret = glusterd_gsync_get_uuid(slave, volinfo, uuid); + if (ret) { + snprintf(msg, sizeof(msg), + "Session between %s and %s has" + " not been created. Please create session and retry.", + volinfo->volname, slave); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SESSION_CREATE_ERROR, "%s", + msg); + goto out; + } + + /*Check if the gsync is already started in cmd. inited host + * If so initiate add it into the glusterd's priv*/ + ret = gsync_status(volinfo->volname, slave, conf_path, &ret_status, + &is_template_in_use); + if (ret == 0) { + if ((ret_status == 0) && !is_force) { + snprintf(msg, sizeof(msg), + GEOREP + " session between" + " %s & %s already started", + volinfo->volname, slave); + ret = -1; + goto out; + } + } else if (ret == -1) { + snprintf(msg, sizeof(msg), + GEOREP + " start option " + "validation failed "); + goto out; + } + + if (is_template_in_use == _gf_true) { + snprintf(msg, sizeof(msg), + GEOREP + " start " + "failed : pid-file entry missing " + "in config file."); + ret = -1; + goto out; + } + + ret = glusterd_verify_gsyncd_spawn(volinfo->volname, slave); + if (ret && !is_force) { + snprintf(msg, sizeof(msg), "Unable to spawn gsyncd"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GSYNCD_SPAWN_FAILED, "%s", + msg); + } +out: + if (ret && (msg[0] != '\0')) { + *op_errstr = gf_strdup(msg); + } + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; +} + +void +glusterd_check_geo_rep_configured(glusterd_volinfo_t *volinfo, + gf_boolean_t *flag) +{ + GF_ASSERT(volinfo); + GF_ASSERT(flag); + + if (volinfo->gsync_slaves->count) + *flag = _gf_true; + else + *flag = _gf_false; + + return; +} + +/* + * is_geo_rep_active: + * This function reads the state_file and sets is_active to 1 if the + * monitor status is neither "Stopped" or "Created" + * + * RETURN VALUE: + * 0: On successful read of state_file. + * -1: error. + */ + +static int +is_geo_rep_active(glusterd_volinfo_t *volinfo, char *slave, char *conf_path, + int *is_active) +{ + dict_t *confd = NULL; + char *statefile = NULL; + char *master = NULL; + char monitor_status[PATH_MAX] = ""; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + master = volinfo->volname; + + confd = dict_new(); + if (!confd) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, + "Not able to create dict."); + goto out; + } + + ret = glusterd_gsync_get_config(master, slave, conf_path, confd); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GET_CONFIG_INFO_FAILED, + "Unable to get configuration data " + "for %s(master), %s(slave)", + master, slave); + ret = -1; + goto out; + } + + ret = dict_get_param(confd, "state_file", &statefile); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get state_file's name " + "for %s(master), %s(slave). Please check gsync " + "config file.", + master, slave); + ret = -1; + goto out; + } + + ret = glusterd_gsync_read_frm_status(statefile, monitor_status, + sizeof(monitor_status)); + if (ret <= 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STAT_FILE_READ_FAILED, + "Unable to read the status file for %s(master), " + "%s(slave)", + master, slave); + snprintf(monitor_status, sizeof(monitor_status), "defunct"); + } + + if ((!strcmp(monitor_status, "Stopped")) || + (!strcmp(monitor_status, "Created"))) { + *is_active = 0; + } else { + *is_active = 1; + } + ret = 0; +out: + if (confd) + dict_unref(confd); + return ret; +} + +/* + * _get_slave_status: + * Called for each slave in the volume from dict_foreach. + * It calls is_geo_rep_active to get the monitor status. + * + * RETURN VALUE: + * 0: On successful read of state_file from is_geo_rep_active. + * When it is found geo-rep is already active from previous calls. + * When there is no slave. + * -1: On error. + */ + +int +_get_slave_status(dict_t *dict, char *key, data_t *value, void *data) +{ + gsync_status_param_t *param = NULL; + char *slave = NULL; + char *slave_url = NULL; + char *slave_vol = NULL; + char *slave_host = NULL; + char *errmsg = NULL; + char conf_path[PATH_MAX] = ""; + int ret = -1; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + param = (gsync_status_param_t *)data; + + GF_ASSERT(param); + GF_ASSERT(param->volinfo); + if (param->is_active) { + ret = 0; + goto out; + } + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + if (priv == NULL) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLUSTERD_PRIV_NOT_FOUND, + "priv of glusterd not present"); + goto out; + } + + slave = strchr(value->data, ':'); + if (!slave) { + ret = 0; + goto out; + } + slave++; + + ret = glusterd_get_slave_info(slave, &slave_url, &slave_host, &slave_vol, + &errmsg); + if (ret) { + if (errmsg) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVEINFO_FETCH_ERROR, + "Unable to fetch" + " slave details. Error: %s", + errmsg); + else + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVEINFO_FETCH_ERROR, + "Unable to fetch slave details."); + ret = -1; + goto out; + } + + ret = snprintf(conf_path, sizeof(conf_path) - 1, + "%s/" GEOREP "/%s_%s_%s/gsyncd.conf", priv->workdir, + param->volinfo->volname, slave_host, slave_vol); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CONF_PATH_ASSIGN_FAILED, + "Unable to assign conf_path."); + ret = -1; + goto out; + } + conf_path[ret] = '\0'; + + ret = is_geo_rep_active(param->volinfo, slave, conf_path, + ¶m->is_active); +out: + if (errmsg) + GF_FREE(errmsg); + + if (slave_vol) + GF_FREE(slave_vol); + + if (slave_url) + GF_FREE(slave_url); + if (slave_host) + GF_FREE(slave_host); + + return ret; +} + +/* glusterd_check_geo_rep_running: + * Checks if any geo-rep session is running for the volume. + * + * RETURN VALUE: + * Sets param.active to true if any geo-rep session is active. + * This function sets op_errstr during some error and when any geo-rep + * session is active. It is caller's responsibility to free op_errstr + * in above cases. + */ + +int +glusterd_check_geo_rep_running(gsync_status_param_t *param, char **op_errstr) +{ + char msg[2048] = { + 0, + }; + gf_boolean_t enabled = _gf_false; + int ret = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(param); + GF_ASSERT(param->volinfo); + GF_ASSERT(op_errstr); + + glusterd_check_geo_rep_configured(param->volinfo, &enabled); + + if (enabled) { + ret = dict_foreach(param->volinfo->gsync_slaves, _get_slave_status, + param); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVEINFO_FETCH_ERROR, + "_get_slave_satus failed"); + snprintf(msg, sizeof(msg), + GEOREP + " Unable to" + " get the status of active " GEOREP + "" + " session for the volume '%s'.\n" + " Please check the log file for" + " more info.", + param->volinfo->volname); + *op_errstr = gf_strdup(msg); + ret = -1; + goto out; + } + + if (param->is_active) { + snprintf(msg, sizeof(msg), + GEOREP + " sessions" + " are active for the volume %s.\nStop" + " " GEOREP + " sessions involved in this" + " volume. Use 'volume " GEOREP + " status' command for more info.", + param->volinfo->volname); + *op_errstr = gf_strdup(msg); + goto out; + } + } +out: + return ret; +} + +static int +glusterd_op_verify_gsync_running(glusterd_volinfo_t *volinfo, char *slave, + char *conf_path, char **op_errstr) +{ + int pfd = -1; + int ret = -1; + char msg[2048] = {0}; + char pidfile[PATH_MAX] = { + 0, + }; + gf_boolean_t is_template_in_use = _gf_false; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + GF_ASSERT(THIS && THIS->private); + GF_ASSERT(volinfo); + GF_ASSERT(slave); + GF_ASSERT(conf_path); + GF_ASSERT(op_errstr); + + if (GLUSTERD_STATUS_STARTED != volinfo->status) { + snprintf(msg, sizeof(msg), + "Volume %s needs to be started " + "before " GEOREP " start", + volinfo->volname); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_GEO_REP_START_FAILED, + "Volume is not in a started state, Volname=%s", + volinfo->volname, NULL); + + goto out; + } + + pfd = gsyncd_getpidfile(volinfo->volname, slave, pidfile, conf_path, + &is_template_in_use); + if (pfd == -2) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VALIDATE_FAILED, + GEOREP " stop validation failed for %s & %s", volinfo->volname, + slave); + ret = -1; + goto out; + } + if (gsync_status_byfd(pfd) == -1) { + snprintf(msg, sizeof(msg), + GEOREP + " session b/w %s & %s is " + "not running on this node.", + volinfo->volname, slave); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SESSION_INACTIVE, "%s", msg); + ret = -1; + /* monitor gsyncd already dead */ + goto out; + } + + if (is_template_in_use) { + snprintf(msg, sizeof(msg), + "pid-file entry missing in " + "the config file(%s).", + conf_path); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PIDFILE_NOT_FOUND, "%s", + msg); + ret = -1; + goto out; + } + + if (pfd < 0) + goto out; + + ret = 0; +out: + if (ret && (msg[0] != '\0')) { + *op_errstr = gf_strdup(msg); + } + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; +} + +static int +glusterd_verify_gsync_status_opts(dict_t *dict, char **op_errstr) +{ + char *slave = NULL; + char *volname = NULL; + char errmsg[PATH_MAX] = { + 0, + }; + glusterd_volinfo_t *volinfo = NULL; + int ret = 0; + char *conf_path = NULL; + char *slave_url = NULL; + char *slave_host = NULL; + char *slave_vol = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + if (THIS) + priv = THIS->private; + if (priv == NULL) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLUSTERD_PRIV_NOT_FOUND, + "priv of glusterd not present"); + *op_errstr = gf_strdup("glusterd defunct"); + goto out; + } + + ret = dict_get_str(dict, "master", &volname); + if (ret < 0) { + ret = 0; + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOL_NOT_FOUND, + "volume name does not exist"); + snprintf(errmsg, sizeof(errmsg), + "Volume name %s does not" + " exist", + volname); + *op_errstr = gf_strdup(errmsg); + goto out; + } + + ret = dict_get_str(dict, "slave", &slave); + if (ret < 0) { + ret = 0; + goto out; + } + + ret = glusterd_get_slave_details_confpath(volinfo, dict, &slave_url, + &slave_host, &slave_vol, + &conf_path, op_errstr); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVEINFO_FETCH_ERROR, + "Unable to fetch slave or confpath details."); + ret = -1; + goto out; + } + +out: + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; +} + +int +glusterd_op_gsync_args_get(dict_t *dict, char **op_errstr, char **master, + char **slave, char **host_uuid) +{ + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(dict); + GF_ASSERT(op_errstr); + + if (master) { + ret = dict_get_str(dict, "master", master); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED, + "master not found"); + *op_errstr = gf_strdup("master not found"); + goto out; + } + } + + if (slave) { + ret = dict_get_str(dict, "slave", slave); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED, + "slave not found"); + *op_errstr = gf_strdup("slave not found"); + goto out; + } + } + + if (host_uuid) { + ret = dict_get_str(dict, "host-uuid", host_uuid); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED, + "host_uuid not found"); + *op_errstr = gf_strdup("host_uuid not found"); + goto out; + } + } + + ret = 0; +out: + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; +} + +int +glusterd_op_stage_sys_exec(dict_t *dict, char **op_errstr) +{ + char errmsg[PATH_MAX] = ""; + char *command = NULL; + char command_path[PATH_MAX] = ""; + struct stat st = { + 0, + }; + int ret = -1; + glusterd_conf_t *conf = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + conf = this->private; + GF_ASSERT(conf); + + if (conf->op_version < 2) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UNSUPPORTED_VERSION, + "Op Version not supported."); + snprintf(errmsg, sizeof(errmsg), + "One or more nodes do not" + " support the required op version."); + *op_errstr = gf_strdup(errmsg); + ret = -1; + goto out; + } + + ret = dict_get_str(dict, "command", &command); + if (ret) { + strcpy(errmsg, "internal error"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get command from dict"); + goto out; + } + + /* enforce local occurrence of the command */ + if (strchr(command, '/')) { + strcpy(errmsg, "invalid command name"); + ret = -1; + goto out; + } + + sprintf(command_path, GSYNCD_PREFIX "/peer_%s", command); + /* check if it's executable */ + ret = sys_access(command_path, X_OK); + if (!ret) + /* check if it's a regular file */ + ret = sys_stat(command_path, &st); + if (!ret && !S_ISREG(st.st_mode)) + ret = -1; + +out: + if (ret) { + if (errmsg[0] == '\0') { + if (command) + snprintf(errmsg, sizeof(errmsg), + "gsync peer_%s command not found.", command); + else + snprintf(errmsg, sizeof(errmsg), "%s", + "gsync peer command was not " + "specified"); + } + *op_errstr = gf_strdup(errmsg); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PEER_CMD_ERROR, "%s", + errmsg); + } + + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; +} + +int +glusterd_op_stage_copy_file(dict_t *dict, char **op_errstr) +{ + char abs_filename[PATH_MAX] = ""; + char errmsg[PATH_MAX] = ""; + char *filename = NULL; + char *host_uuid = NULL; + char uuid_str[64] = {0}; + int ret = -1; + glusterd_conf_t *priv = NULL; + struct stat stbuf = { + 0, + }; + xlator_t *this = NULL; + char workdir[PATH_MAX] = { + 0, + }; + char realpath_filename[PATH_MAX] = { + 0, + }; + char realpath_workdir[PATH_MAX] = { + 0, + }; + int32_t len = 0; + + this = THIS; + GF_ASSERT(this); + + if (THIS) + priv = THIS->private; + if (priv == NULL) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLUSTERD_PRIV_NOT_FOUND, + "priv of glusterd not present"); + *op_errstr = gf_strdup("glusterd defunct"); + goto out; + } + + if (priv->op_version < 2) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UNSUPPORTED_VERSION, + "Op Version not supported."); + snprintf(errmsg, sizeof(errmsg), + "One or more nodes do not" + " support the required op version."); + *op_errstr = gf_strdup(errmsg); + ret = -1; + goto out; + } + + ret = dict_get_str(dict, "host-uuid", &host_uuid); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to fetch host-uuid from dict."); + goto out; + } + + uuid_utoa_r(MY_UUID, uuid_str); + if (!strcmp(uuid_str, host_uuid)) { + ret = dict_get_str(dict, "source", &filename); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to fetch filename from dict."); + *op_errstr = gf_strdup("command unsuccessful"); + goto out; + } + len = snprintf(abs_filename, sizeof(abs_filename), "%s/%s", + priv->workdir, filename); + if ((len < 0) || (len >= sizeof(abs_filename))) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_COPY_FAIL, NULL); + ret = -1; + goto out; + } + + if (!realpath(priv->workdir, realpath_workdir)) { + len = snprintf(errmsg, sizeof(errmsg), + "Failed to " + "get realpath of %s: %s", + priv->workdir, strerror(errno)); + if (len < 0) { + strcpy(errmsg, "<error>"); + } + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_REALPATH_GET_FAIL, + "Realpath=%s, Reason=%s", priv->workdir, strerror(errno), + NULL); + *op_errstr = gf_strdup(errmsg); + ret = -1; + goto out; + } + + if (!realpath(abs_filename, realpath_filename)) { + snprintf(errmsg, sizeof(errmsg), + "Failed to get " + "realpath of %s: %s", + filename, strerror(errno)); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_REALPATH_GET_FAIL, + "Filename=%s, Reason=%s", filename, strerror(errno), NULL); + *op_errstr = gf_strdup(errmsg); + ret = -1; + goto out; + } + + /* Add Trailing slash to workdir, without slash strncmp + will succeed for /var/lib/glusterd_bad */ + len = snprintf(workdir, sizeof(workdir), "%s/", realpath_workdir); + if ((len < 0) || (len >= sizeof(workdir))) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_COPY_FAIL, NULL); + ret = -1; + goto out; + } + + /* Protect against file copy outside $workdir */ + if (strncmp(workdir, realpath_filename, strlen(workdir))) { + len = snprintf(errmsg, sizeof(errmsg), + "Source file" + " is outside of %s directory", + priv->workdir); + if (len < 0) { + strcpy(errmsg, "<error>"); + } + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_SRC_FILE_ERROR, errmsg, + NULL); + *op_errstr = gf_strdup(errmsg); + ret = -1; + goto out; + } + + ret = sys_lstat(abs_filename, &stbuf); + if (ret) { + len = snprintf(errmsg, sizeof(errmsg), + "Source file" + " does not exist in %s", + priv->workdir); + if (len < 0) { + strcpy(errmsg, "<error>"); + } + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_SRC_FILE_ERROR, errmsg, + NULL); + *op_errstr = gf_strdup(errmsg); + goto out; + } + + if (!S_ISREG(stbuf.st_mode)) { + snprintf(errmsg, sizeof(errmsg), + "Source file" + " is not a regular file."); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_SRC_FILE_ERROR, errmsg, + NULL); + *op_errstr = gf_strdup(errmsg); + ret = -1; + goto out; + } + } + + ret = 0; +out: + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; +} + +int +glusterd_get_statefile_name(glusterd_volinfo_t *volinfo, char *slave, + char *conf_path, char **statefile, + gf_boolean_t *is_template_in_use) +{ + char *master = NULL; + char *buf = NULL; + char *working_conf_path = NULL; + char temp_conf_path[PATH_MAX] = ""; + dict_t *confd = NULL; + glusterd_conf_t *priv = NULL; + int ret = -1; + struct stat stbuf = { + 0, + }; + xlator_t *this = NULL; + int32_t len = 0; + + this = THIS; + GF_ASSERT(this); + + GF_ASSERT(this->private); + GF_ASSERT(volinfo); + GF_ASSERT(conf_path); + GF_ASSERT(is_template_in_use); + + master = volinfo->volname; + + confd = dict_new(); + if (!confd) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, + "Unable to create new dict"); + goto out; + } + + priv = THIS->private; + + len = snprintf(temp_conf_path, sizeof(temp_conf_path), + "%s/" GSYNC_CONF_TEMPLATE, priv->workdir); + if ((len < 0) || (len >= sizeof(temp_conf_path))) { + goto out; + } + + ret = sys_lstat(conf_path, &stbuf); + if (!ret) { + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_CONFIG_INFO, + "Using passed config template(%s).", conf_path); + working_conf_path = conf_path; + } else { + gf_msg(this->name, GF_LOG_WARNING, ENOENT, GD_MSG_FILE_OP_FAILED, + "Config file (%s) missing. Looking for template config" + " file (%s)", + conf_path, temp_conf_path); + ret = sys_lstat(temp_conf_path, &stbuf); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, ENOENT, GD_MSG_FILE_OP_FAILED, + "Template " + "config file (%s) missing.", + temp_conf_path); + goto out; + } + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_DEFAULT_TEMP_CONFIG, + "Using default config template(%s).", temp_conf_path); + working_conf_path = temp_conf_path; + *is_template_in_use = _gf_true; + } + +fetch_data: + ret = glusterd_gsync_get_config(master, slave, working_conf_path, confd); + if (ret) { + if (*is_template_in_use == _gf_false) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GET_CONFIG_INFO_FAILED, + "Unable to get configuration data " + "for %s(master), %s(slave). " + "Trying template config.", + master, slave); + working_conf_path = temp_conf_path; + *is_template_in_use = _gf_true; + goto fetch_data; + } else { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GET_CONFIG_INFO_FAILED, + "Unable to get configuration data " + "for %s(master), %s(slave) from " + "template config", + master, slave); + goto out; + } + } + + ret = dict_get_param(confd, "state_file", &buf); + if (ret) { + if (*is_template_in_use == _gf_false) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get state_file's name. " + "Trying template config."); + working_conf_path = temp_conf_path; + *is_template_in_use = _gf_true; + goto fetch_data; + } else { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_GET_STATEFILE_NAME_FAILED, + "Unable to get state_file's " + "name from template."); + goto out; + } + } + + ret = 0; +out: + if (buf) { + *statefile = gf_strdup(buf); + if (!*statefile) + ret = -1; + } + + if (confd) + dict_unref(confd); + + gf_msg_debug(this->name, 0, "Returning %d ", ret); + return ret; +} + +int +glusterd_create_status_file(char *master, char *slave, char *slave_host, + char *slave_vol, char *status) +{ + int ret = -1; + runner_t runner = { + 0, + }; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + if (THIS) + priv = THIS->private; + if (priv == NULL) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLUSTERD_PRIV_NOT_FOUND, + "priv of glusterd not present"); + goto out; + } + + if (!status) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STATUS_NULL, "Status Empty"); + goto out; + } + gf_msg_debug(this->name, 0, "slave = %s", slave); + + runinit(&runner); + runner_add_args(&runner, GSYNCD_PREFIX "/gsyncd", "--create", status, "-c", + NULL); + runner_argprintf(&runner, "%s/" GEOREP "/%s_%s_%s/gsyncd.conf", + priv->workdir, master, slave_host, slave_vol); + runner_argprintf(&runner, "--iprefix=%s", DATADIR); + runner_argprintf(&runner, ":%s", master); + runner_add_args(&runner, slave, NULL); + synclock_unlock(&priv->big_lock); + ret = runner_run(&runner); + synclock_lock(&priv->big_lock); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STATUSFILE_CREATE_FAILED, + "Creating status file failed."); + ret = -1; + goto out; + } + + ret = 0; +out: + gf_msg_debug(this->name, 0, "returning %d", ret); + return ret; +} + +static int +glusterd_verify_slave(char *volname, char *slave_url, char *slave_vol, + int ssh_port, char **op_errstr, + gf_boolean_t *is_force_blocker) +{ + int32_t ret = -1; + runner_t runner = { + 0, + }; + char log_file_path[PATH_MAX] = ""; + char buf[PATH_MAX] = ""; + char *tmp = NULL; + char *slave_url_buf = NULL; + char *save_ptr = NULL; + char *slave_user = NULL; + char *slave_ip = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + char *af = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + GF_ASSERT(volname); + GF_ASSERT(slave_url); + GF_ASSERT(slave_vol); + + /* Fetch the slave_user and slave_ip from the slave_url. + * If the slave_user is not present. Use "root" + */ + if (strstr(slave_url, "@")) { + slave_url_buf = gf_strdup(slave_url); + if (!slave_url_buf) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_STRDUP_FAILED, + "Slave_url=%s", slave_url, NULL); + goto out; + } + + slave_user = strtok_r(slave_url_buf, "@", &save_ptr); + slave_ip = strtok_r(NULL, "@", &save_ptr); + } else { + slave_user = "root"; + slave_ip = slave_url; + } + + if (!slave_user || !slave_ip) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVE_URL_INVALID, + "Invalid slave url."); + goto out; + } + + snprintf(log_file_path, sizeof(log_file_path), "%s/create_verify_log", + priv->logdir); + + runinit(&runner); + runner_add_args(&runner, GSYNCD_PREFIX "/gverify.sh", NULL); + runner_argprintf(&runner, "%s", volname); + runner_argprintf(&runner, "%s", slave_user); + runner_argprintf(&runner, "%s", slave_ip); + runner_argprintf(&runner, "%s", slave_vol); + runner_argprintf(&runner, "%d", ssh_port); + runner_argprintf(&runner, "%s", log_file_path); + ret = dict_get_str(this->options, "transport.address-family", &af); + if (ret) + af = "-"; + + runner_argprintf(&runner, "%s", af); + + gf_msg_debug(this->name, 0, "gverify Args = %s %s %s %s %s %s %s %s", + runner.argv[0], runner.argv[1], runner.argv[2], runner.argv[3], + runner.argv[4], runner.argv[5], runner.argv[6], + runner.argv[7]); + runner_redir(&runner, STDOUT_FILENO, RUN_PIPE); + synclock_unlock(&priv->big_lock); + ret = runner_run(&runner); + synclock_lock(&priv->big_lock); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_SLAVE, + "Not a valid slave"); + ret = glusterd_gsync_read_frm_status(log_file_path, buf, sizeof(buf)); + if (ret <= 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_READ_ERROR, + "Unable to read from %s", log_file_path); + goto out; + } + + /* Tokenize the error message from gverify.sh to figure out + * if the error is a force blocker or not. */ + tmp = strtok_r(buf, "|", &save_ptr); + if (!tmp) { + ret = -1; + goto out; + } + if (!strcmp(tmp, "FORCE_BLOCKER")) + *is_force_blocker = 1; + else { + /* No FORCE_BLOCKER flag present so all that is + * present is the error message. */ + *is_force_blocker = 0; + *op_errstr = gf_strdup(tmp); + ret = -1; + goto out; + } + + /* Copy rest of the error message to op_errstr */ + tmp = strtok_r(NULL, "|", &save_ptr); + if (tmp) + *op_errstr = gf_strdup(tmp); + ret = -1; + goto out; + } + ret = 0; +out: + GF_FREE(slave_url_buf); + sys_unlink(log_file_path); + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; +} + +/** @slave_ip remains unmodified */ +int +glusterd_geo_rep_parse_slave(char *slave_url, char **hostname, char **op_errstr) +{ + int ret = -1; + char *tmp = NULL; + char *save_ptr = NULL; + char *host = NULL; + char errmsg[PATH_MAX] = ""; + char *saved_url = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + GF_ASSERT(slave_url); + GF_ASSERT(*slave_url); + + saved_url = gf_strdup(slave_url); + if (!saved_url) + goto out; + + /* Checking if hostname has user specified */ + host = strstr(saved_url, "@"); + if (!host) { /* no user specified */ + if (hostname) { + *hostname = gf_strdup(saved_url); + if (!*hostname) + goto out; + } + + ret = 0; + goto out; + } else { + /* Moving the host past the '@' and checking if the + * actual hostname also has '@' */ + host++; + if (strstr(host, "@")) { + gf_msg_debug(this->name, 0, "host = %s", host); + ret = snprintf(errmsg, sizeof(errmsg) - 1, "Invalid Hostname (%s).", + host); + errmsg[ret] = '\0'; + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, "%s", + errmsg); + ret = -1; + if (op_errstr) + *op_errstr = gf_strdup(errmsg); + goto out; + } + + ret = -1; + + /** + * preliminary check for valid slave format. + */ + tmp = strtok_r(saved_url, "@", &save_ptr); + tmp = strtok_r(NULL, "@", &save_ptr); + if (!tmp) + goto out; + if (hostname) { + *hostname = gf_strdup(tmp); + if (!*hostname) + goto out; + } + } + + ret = 0; +out: + GF_FREE(saved_url); + if (ret) + if (hostname) + GF_FREE(*hostname); + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; +} + +/* Return -1 only if there is a match in volume uuid */ +static int +get_slavehost_from_voluuid(dict_t *dict, char *key, data_t *value, void *data) +{ + char *slave_info = NULL; + char *tmp = NULL; + char *slave_host = NULL; + xlator_t *this = NULL; + struct slave_vol_config *slave_vol = NULL; + int i = 0; + int ret = -1; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + + slave_vol = data; + slave_info = value->data; + + gf_msg_debug(this->name, 0, "slave_info:%s !", slave_info); + + if (!(slave_info) || strlen(slave_info) == 0) { + /* no slaves present, peace */ + ret = 0; + goto out; + } + + /* slave format: + * master_node_uuid:ssh://slave_host::slave_vol:slave_voluuid */ + while (i++ < 5) { + slave_info = strchr(slave_info, ':'); + if (slave_info) + slave_info++; + else + break; + } + + if (!(slave_info) || strlen(slave_info) == 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVE_VOL_PARSE_FAIL, + "slave_info format is wrong!"); + ret = -2; + goto out; + } else { + if (strcmp(slave_info, slave_vol->slave_voluuid) == 0) { + ret = -1; + + /* get corresponding slave host for reference*/ + slave_host = value->data; + slave_host = strstr(slave_host, "://"); + if (slave_host) { + slave_host += 3; + } else { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVE_VOL_PARSE_FAIL, + "Invalid slave_host format!"); + ret = -2; + goto out; + } + /* To go past username in non-root geo-rep session */ + tmp = strchr(slave_host, '@'); + if (tmp) { + if ((tmp - slave_host) >= LOGIN_NAME_MAX) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_SLAVE_VOL_PARSE_FAIL, + "Invalid slave user length in %s", slave_host); + ret = -2; + goto out; + } + strncpy(slave_vol->old_slvuser, slave_host, (tmp - slave_host)); + slave_vol->old_slvuser[(tmp - slave_host) + 1] = '\0'; + slave_host = tmp + 1; + } else + strcpy(slave_vol->old_slvuser, "root"); + + tmp = strchr(slave_host, ':'); + if (!tmp) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVE_VOL_PARSE_FAIL, + "Invalid slave_host!"); + ret = -2; + goto out; + } + + strncpy(slave_vol->old_slvhost, slave_host, (tmp - slave_host)); + slave_vol->old_slvhost[(tmp - slave_host) + 1] = '\0'; + + goto out; + } + } + + ret = 0; +out: + return ret; +} + +/* Given slave host and slave volume, check whether slave volume uuid + * already present. + * If slave volume uuid is present, get corresponding slave host + * for reference */ +static int +glusterd_get_slavehost_from_voluuid(glusterd_volinfo_t *volinfo, + char *slave_host, char *slave_vol, + struct slave_vol_config *slave1) +{ + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + + GF_VALIDATE_OR_GOTO(this->name, volinfo, out); + + ret = dict_foreach(volinfo->gsync_slaves, get_slavehost_from_voluuid, + slave1); +out: + return ret; +} + +int +glusterd_op_stage_gsync_create(dict_t *dict, char **op_errstr) +{ + char *down_peerstr = NULL; + char *slave = NULL; + char *volname = NULL; + char *host_uuid = NULL; + char *statefile = NULL; + char *slave_url = NULL; + char *slave_host = NULL; + char *slave_vol = NULL; + char *conf_path = NULL; + char errmsg[PATH_MAX] = ""; + char common_pem_file[PATH_MAX] = ""; + char hook_script[PATH_MAX] = ""; + char uuid_str[64] = ""; + int ret = -1; + int is_pem_push = -1; + int ssh_port = 22; + gf_boolean_t is_force = -1; + gf_boolean_t is_no_verify = -1; + gf_boolean_t is_force_blocker = -1; + gf_boolean_t is_template_in_use = _gf_false; + glusterd_conf_t *conf = NULL; + glusterd_volinfo_t *volinfo = NULL; + struct stat stbuf = { + 0, + }; + xlator_t *this = NULL; + struct slave_vol_config slave1 = { + {0}, + }; + char old_slave_url[SLAVE_URL_INFO_MAX] = {0}; + char old_confpath[PATH_MAX] = {0}; + gf_boolean_t is_running = _gf_false; + char *statedir = NULL; + char statefiledir[PATH_MAX] = { + 0, + }; + gf_boolean_t is_different_slavehost = _gf_false; + gf_boolean_t is_different_username = _gf_false; + char *slave_user = NULL; + char *save_ptr = NULL; + char *slave_url_buf = NULL; + int32_t len = 0; + + this = THIS; + GF_ASSERT(this); + conf = this->private; + GF_ASSERT(conf); + + ret = glusterd_op_gsync_args_get(dict, op_errstr, &volname, &slave, + &host_uuid); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_ARG_FETCH_ERROR, + "Unable to fetch arguments"); + gf_msg_debug(this->name, 0, "Returning %d", ret); + return -1; + } + + if (conf->op_version < 2) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UNSUPPORTED_VERSION, + "Op Version not supported."); + snprintf(errmsg, sizeof(errmsg), + "One or more nodes do not" + " support the required op version."); + *op_errstr = gf_strdup(errmsg); + ret = -1; + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOL_NOT_FOUND, + "volume name does not exist"); + snprintf(errmsg, sizeof(errmsg), + "Volume name %s does not" + " exist", + volname); + goto out; + } + + ret = glusterd_get_slave_details_confpath(volinfo, dict, &slave_url, + &slave_host, &slave_vol, + &conf_path, op_errstr); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVEINFO_FETCH_ERROR, + "Unable to fetch slave or confpath details."); + ret = -1; + goto out; + } + + is_force = dict_get_str_boolean(dict, "force", _gf_false); + + uuid_utoa_r(MY_UUID, uuid_str); + if (!strcmp(uuid_str, host_uuid)) { + ret = glusterd_are_vol_all_peers_up(volinfo, &conf->peers, + &down_peerstr); + if ((ret == _gf_false) && !is_force) { + snprintf(errmsg, sizeof(errmsg), + "Peer %s," + " which is a part of %s volume, is" + " down. Please bring up the peer and" + " retry.", + down_peerstr, volinfo->volname); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PEER_DISCONNECTED, "%s", + errmsg); + *op_errstr = gf_strdup(errmsg); + GF_FREE(down_peerstr); + down_peerstr = NULL; + gf_msg_debug(this->name, 0, "Returning %d", ret); + return -1; + } else if (ret == _gf_false) { + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_PEER_DISCONNECTED, + "Peer %s, which is a part of %s volume, is" + " down. Force creating geo-rep session." + " On bringing up the peer, re-run" + " \"gluster system:: execute" + " gsec_create\" and \"gluster volume" + " geo-replication %s %s create push-pem" + " force\"", + down_peerstr, volinfo->volname, volinfo->volname, slave); + GF_FREE(down_peerstr); + down_peerstr = NULL; + } + + ret = dict_get_int32(dict, "ssh_port", &ssh_port); + if (ret < 0 && ret != -ENOENT) { + snprintf(errmsg, sizeof(errmsg), + "Fetching ssh_port failed while " + "handling " GEOREP " options"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", + errmsg); + goto out; + } + + is_no_verify = dict_get_str_boolean(dict, "no_verify", _gf_false); + + if (!is_no_verify) { + /* Checking if slave host is pingable, has proper passwordless + * ssh login setup, slave volume is created, slave vol is empty, + * and if it has enough memory and bypass in case of force if + * the error is not a force blocker */ + ret = glusterd_verify_slave(volname, slave_url, slave_vol, ssh_port, + op_errstr, &is_force_blocker); + if (ret) { + if (is_force && !is_force_blocker) { + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_INVALID_SLAVE, + "%s is not a valid slave " + "volume. Error: %s. Force " + "creating geo-rep" + " session.", + slave, *op_errstr); + } else { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_SLAVE, + "%s is not a valid slave " + "volume. Error: %s", + slave, *op_errstr); + ret = -1; + + goto out; + } + } + } + + ret = dict_get_int32(dict, "push_pem", &is_pem_push); + if (!ret && is_pem_push) { + ret = snprintf(common_pem_file, sizeof(common_pem_file), + "%s" GLUSTERD_COMMON_PEM_PUB_FILE, conf->workdir); + if ((ret < 0) || (ret >= sizeof(common_pem_file))) { + ret = -1; + goto out; + } + + ret = snprintf(hook_script, sizeof(hook_script), + "%s" GLUSTERD_CREATE_HOOK_SCRIPT, conf->workdir); + if ((ret < 0) || (ret >= sizeof(hook_script))) { + ret = -1; + goto out; + } + + ret = sys_lstat(common_pem_file, &stbuf); + if (ret) { + len = snprintf(errmsg, sizeof(errmsg), + "%s" + " required for push-pem is" + " not present. Please run" + " \"gluster system:: execute" + " gsec_create\"", + common_pem_file); + if (len < 0) { + strcpy(errmsg, "<error>"); + } + gf_msg(this->name, GF_LOG_ERROR, ENOENT, GD_MSG_FILE_OP_FAILED, + "%s", errmsg); + *op_errstr = gf_strdup(errmsg); + ret = -1; + goto out; + } + + ret = sys_lstat(hook_script, &stbuf); + if (ret) { + len = snprintf(errmsg, sizeof(errmsg), + "The hook-script (%s) " + "required for push-pem is not " + "present. Please install the " + "hook-script and retry", + hook_script); + if (len < 0) { + strcpy(errmsg, "<error>"); + } + gf_msg(this->name, GF_LOG_ERROR, ENOENT, GD_MSG_FILE_OP_FAILED, + "%s", errmsg); + *op_errstr = gf_strdup(errmsg); + ret = -1; + goto out; + } + + if (!S_ISREG(stbuf.st_mode)) { + len = snprintf(errmsg, sizeof(errmsg), + "%s" + " required for push-pem is" + " not a regular file. Please" + " run \"gluster system:: " + "execute gsec_create\"", + common_pem_file); + if (len < 0) { + strcpy(errmsg, "<error>"); + } + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REG_FILE_MISSING, + "%s", errmsg); + ret = -1; + goto out; + } + } + } + + ret = glusterd_get_statefile_name(volinfo, slave, conf_path, &statefile, + &is_template_in_use); + if (ret) { + if (!strstr(slave, "::")) + snprintf(errmsg, sizeof(errmsg), "%s is not a valid slave url.", + slave); + else + snprintf(errmsg, sizeof(errmsg), + "Please check gsync " + "config file. Unable to get statefile's name"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STATEFILE_NAME_NOT_FOUND, + "%s", errmsg); + ret = -1; + goto out; + } + + ret = dict_set_str(dict, "statefile", statefile); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to store statefile path"); + goto out; + } + + if (snprintf(statefiledir, sizeof(statefiledir), "%s", statefile) >= + sizeof(statefiledir)) { + snprintf(errmsg, sizeof(errmsg), "Failed copying statefiledir"); + goto out; + } + statedir = dirname(statefiledir); + + ret = sys_lstat(statedir, &stbuf); + if (!ret && !is_force) { + snprintf(errmsg, sizeof(errmsg), + "Session between %s" + " and %s is already created.", + volinfo->volname, slave); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SESSION_ALREADY_EXIST, "%s", + errmsg); + ret = -1; + goto out; + } else if (!ret) + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_FORCE_CREATE_SESSION, + "Session between %s and %s is already created. Force" + " creating again.", + volinfo->volname, slave); + + ret = glusterd_get_slave_voluuid(slave_host, slave_vol, + slave1.slave_voluuid); + if ((ret) || (strlen(slave1.slave_voluuid) == 0)) { + snprintf(errmsg, sizeof(errmsg), "Unable to get remote volume uuid."); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REMOTE_VOL_UUID_FAIL, "%s", + errmsg); + ret = -1; + goto out; + } + + ret = dict_set_dynstr_with_alloc(dict, "slave_voluuid", + slave1.slave_voluuid); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to set slave volume uuid in the dict"); + goto out; + } + + /* Check whether session is already created using slave volume uuid */ + ret = glusterd_get_slavehost_from_voluuid(volinfo, slave_host, slave_vol, + &slave1); + if (ret == -1) { + if (!is_force) { + snprintf(errmsg, sizeof(errmsg), + "Session between %s" + " and %s:%s is already created! Cannot create " + "with new slave:%s again!", + volinfo->volname, slave1.old_slvhost, slave_vol, + slave_host); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FORCE_CREATE_SESSION, + "Session between" + " %s and %s:%s is already created! " + "Cannot create with new slave:%s again!", + volinfo->volname, slave1.old_slvhost, slave_vol, slave_host); + goto out; + } + + /* There is a remote possibility that slave_host can be NULL when + control reaches here. Add a check so we wouldn't crash in next + line */ + if (!slave_host) + goto out; + + /* Now, check whether session is already started.If so, warn!*/ + is_different_slavehost = (strcmp(slave_host, slave1.old_slvhost) != 0) + ? _gf_true + : _gf_false; + + if (strstr(slave_url, "@")) { + slave_url_buf = gf_strdup(slave_url); + if (!slave_url_buf) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "Unable to allocate memory"); + ret = -1; + goto out; + } + slave_user = strtok_r(slave_url_buf, "@", &save_ptr); + } else + slave_user = "root"; + is_different_username = (strcmp(slave_user, slave1.old_slvuser) != 0) + ? _gf_true + : _gf_false; + + /* Do the check, only if different slave host/slave user */ + if (is_different_slavehost || is_different_username) { + len = snprintf(old_confpath, sizeof(old_confpath), + "%s/" GEOREP "/%s_%s_%s/gsyncd.conf", conf->workdir, + volinfo->volname, slave1.old_slvhost, slave_vol); + if ((len < 0) || (len >= sizeof(old_confpath))) { + ret = -1; + goto out; + } + + /* construct old slave url with (old) slave host */ + len = snprintf(old_slave_url, sizeof(old_slave_url), "%s::%s", + slave1.old_slvhost, slave_vol); + if ((len < 0) || (len >= sizeof(old_slave_url))) { + ret = -1; + goto out; + } + + ret = glusterd_check_gsync_running_local( + volinfo->volname, old_slave_url, old_confpath, &is_running); + if (_gf_true == is_running) { + (void)snprintf(errmsg, sizeof(errmsg), + "Geo" + "-replication session between %s and %s" + " is still active. Please stop the " + "session and retry.", + volinfo->volname, old_slave_url); + ret = -1; + goto out; + } + } + + ret = dict_set_dynstr_with_alloc(dict, "old_slavehost", + slave1.old_slvhost); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to set old_slavehost in the dict"); + goto out; + } + + ret = dict_set_int32(dict, "existing_session", _gf_true); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to set existing_session in the dict"); + goto out; + } + } else if (ret == -2) { + snprintf(errmsg, sizeof(errmsg), + "get_slavehost_from_voluuid" + " failed for %s::%s. Please check the glusterd logs.", + slave_host, slave_vol); + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_FORCE_CREATE_SESSION, + "get_slavehost_from_voluuid failed %s %s!!", slave_host, + slave_vol); + goto out; + } + + ret = glusterd_verify_gsyncd_spawn(volinfo->volname, slave); + if (ret) { + snprintf(errmsg, sizeof(errmsg), "Unable to spawn gsyncd."); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GSYNCD_SPAWN_FAILED, "%s", + errmsg); + goto out; + } + + ret = 0; +out: + + if (ret && errmsg[0] != '\0') + *op_errstr = gf_strdup(errmsg); + + if (slave_url_buf) + GF_FREE(slave_url_buf); + + return ret; +} + +/* pre-condition check for geo-rep pause/resume. + * Return: 0 on success + * -1 on any check failed. + */ +static int +gd_pause_resume_validation(int type, glusterd_volinfo_t *volinfo, char *slave, + char *statefile, char **op_errstr) +{ + int ret = 0; + char errmsg[PATH_MAX] = { + 0, + }; + char monitor_status[NAME_MAX] = { + 0, + }; + + GF_ASSERT(volinfo); + GF_ASSERT(slave); + GF_ASSERT(statefile); + GF_ASSERT(op_errstr); + + ret = glusterd_gsync_read_frm_status(statefile, monitor_status, + sizeof(monitor_status)); + if (ret <= 0) { + snprintf(errmsg, sizeof(errmsg), + "Pause check Failed:" + " Geo-rep session is not setup"); + ret = -1; + goto out; + } + + if (type == GF_GSYNC_OPTION_TYPE_PAUSE && + strstr(monitor_status, "Paused")) { + snprintf(errmsg, sizeof(errmsg), + "Geo-replication" + " session between %s and %s already Paused.", + volinfo->volname, slave); + ret = -1; + goto out; + } + if (type == GF_GSYNC_OPTION_TYPE_RESUME && + !strstr(monitor_status, "Paused")) { + snprintf(errmsg, sizeof(errmsg), + "Geo-replication" + " session between %s and %s is not Paused.", + volinfo->volname, slave); + ret = -1; + goto out; + } + ret = 0; +out: + if (ret && (errmsg[0] != '\0')) { + *op_errstr = gf_strdup(errmsg); + } + return ret; +} + +int +glusterd_op_stage_gsync_set(dict_t *dict, char **op_errstr) +{ + int ret = 0; + int type = 0; + char *volname = NULL; + char *slave = NULL; + char *slave_url = NULL; + char *slave_host = NULL; + char *slave_vol = NULL; + char *down_peerstr = NULL; + char *statefile = NULL; + char statefiledir[PATH_MAX] = { + 0, + }; + char *statedir = NULL; + char *path_list = NULL; + char *conf_path = NULL; + glusterd_volinfo_t *volinfo = NULL; + char errmsg[PATH_MAX] = { + 0, + }; + dict_t *ctx = NULL; + gf_boolean_t is_force = 0; + gf_boolean_t is_running = _gf_false; + gf_boolean_t is_template_in_use = _gf_false; + uuid_t uuid = {0}; + char uuid_str[64] = {0}; + char *host_uuid = NULL; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + struct stat stbuf = { + 0, + }; + + this = THIS; + GF_ASSERT(this); + conf = this->private; + GF_ASSERT(conf); + + ret = dict_get_int32(dict, "type", &type); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED, + "command type not found"); + *op_errstr = gf_strdup("command unsuccessful"); + goto out; + } + + if (type == GF_GSYNC_OPTION_TYPE_STATUS) { + ret = glusterd_verify_gsync_status_opts(dict, op_errstr); + goto out; + } + + ret = glusterd_op_gsync_args_get(dict, op_errstr, &volname, &slave, + &host_uuid); + if (ret) + goto out; + + uuid_utoa_r(MY_UUID, uuid_str); + + if (conf->op_version < 2) { + snprintf(errmsg, sizeof(errmsg), + "One or more nodes do not" + " support the required op version."); + ret = -1; + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + snprintf(errmsg, sizeof(errmsg), + "Volume name %s does not" + " exist", + volname); + goto out; + } + + ret = glusterd_get_slave_details_confpath(volinfo, dict, &slave_url, + &slave_host, &slave_vol, + &conf_path, op_errstr); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVEINFO_FETCH_ERROR, + "Unable to fetch slave or confpath details."); + ret = -1; + goto out; + } + + is_force = dict_get_str_boolean(dict, "force", _gf_false); + + ret = glusterd_get_statefile_name(volinfo, slave, conf_path, &statefile, + &is_template_in_use); + if (ret) { + if (!strstr(slave, "::")) { + snprintf(errmsg, sizeof(errmsg), "%s is not a valid slave url.", + slave); + ret = -1; + goto out; + } else { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVE_URL_INVALID, + "state_file entry missing in config file (%s)", conf_path); + + if ((type == GF_GSYNC_OPTION_TYPE_STOP) && is_force) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_STOP_FORCE, + "Allowing stop " + "force to bypass missing statefile " + "entry in config file (%s), and " + "template file", + conf_path); + ret = 0; + } else + goto out; + } + } else { + ret = dict_set_str(dict, "statefile", statefile); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to store statefile path"); + goto out; + } + } + + /* Allowing stop force to bypass the statefile check + * as this command acts as a fail safe method to stop geo-rep + * session. */ + if (!((type == GF_GSYNC_OPTION_TYPE_STOP) && is_force)) { + /* check session directory as statefile may not present + * during upgrade */ + if (snprintf(statefiledir, sizeof(statefiledir), "%s", statefile) >= + sizeof(statefiledir)) { + snprintf(errmsg, sizeof(errmsg), "Failed copying statefiledir"); + ret = -1; + goto out; + } + statedir = dirname(statefiledir); + + ret = sys_lstat(statedir, &stbuf); + if (ret) { + snprintf(errmsg, sizeof(errmsg), + "Geo-replication" + " session between %s and %s does not exist.", + volinfo->volname, slave); + gf_msg(this->name, GF_LOG_ERROR, ENOENT, GD_MSG_FILE_OP_FAILED, + "%s. statefile = %s", errmsg, statefile); + ret = -1; + goto out; + } + } + + /* Check if all peers that are a part of the volume are up or not */ + if ((type == GF_GSYNC_OPTION_TYPE_DELETE) || + ((type == GF_GSYNC_OPTION_TYPE_STOP) && !is_force) || + (type == GF_GSYNC_OPTION_TYPE_PAUSE) || + (type == GF_GSYNC_OPTION_TYPE_RESUME)) { + if (!strcmp(uuid_str, host_uuid)) { + ret = glusterd_are_vol_all_peers_up(volinfo, &conf->peers, + &down_peerstr); + if (ret == _gf_false) { + snprintf(errmsg, sizeof(errmsg), + "Peer %s," + " which is a part of %s volume, is" + " down. Please bring up the peer and" + " retry.", + down_peerstr, volinfo->volname); + ret = -1; + GF_FREE(down_peerstr); + down_peerstr = NULL; + goto out; + } + } + } + + switch (type) { + case GF_GSYNC_OPTION_TYPE_START: + if (is_template_in_use) { + snprintf(errmsg, sizeof(errmsg), + "state-file entry " + "missing in the config file(%s).", + conf_path); + ret = -1; + goto out; + } + + ret = glusterd_op_verify_gsync_start_options( + volinfo, slave, conf_path, statefile, op_errstr, is_force); + if (ret) + goto out; + ctx = glusterd_op_get_ctx(); + if (ctx) { + /* gsyncd does a fuse mount to start + * the geo-rep session */ + if (!glusterd_is_fuse_available()) { + gf_msg("glusterd", GF_LOG_ERROR, errno, + GD_MSG_GEO_REP_START_FAILED, + "Unable " + "to open /dev/fuse (%s), " + "geo-replication start failed", + strerror(errno)); + snprintf(errmsg, sizeof(errmsg), "fuse unavailable"); + ret = -1; + goto out; + } + } + break; + + case GF_GSYNC_OPTION_TYPE_STOP: + if (!is_force) { + if (is_template_in_use) { + snprintf(errmsg, sizeof(errmsg), + "state-file entry missing in " + "the config file(%s).", + conf_path); + ret = -1; + goto out; + } + + ret = glusterd_op_verify_gsync_running(volinfo, slave, + conf_path, op_errstr); + if (ret) { + ret = glusterd_get_local_brickpaths(volinfo, &path_list); + if (!path_list && ret == -1) + goto out; + } + + /* Check for geo-rep session is active or not for + * configured user.*/ + ret = glusterd_gsync_get_uuid(slave, volinfo, uuid); + if (ret) { + snprintf(errmsg, sizeof(errmsg), + "Geo-replication session between %s " + "and %s does not exist.", + volinfo->volname, slave); + ret = -1; + goto out; + } + } + break; + + case GF_GSYNC_OPTION_TYPE_PAUSE: + case GF_GSYNC_OPTION_TYPE_RESUME: + if (is_template_in_use) { + snprintf(errmsg, sizeof(errmsg), + "state-file entry missing in " + "the config file(%s).", + conf_path); + ret = -1; + goto out; + } + + ret = glusterd_op_verify_gsync_running(volinfo, slave, conf_path, + op_errstr); + if (ret) { + ret = glusterd_get_local_brickpaths(volinfo, &path_list); + if (!path_list && ret == -1) + goto out; + } + + /* Check for geo-rep session is active or not + * for configured user.*/ + ret = glusterd_gsync_get_uuid(slave, volinfo, uuid); + if (ret) { + snprintf(errmsg, sizeof(errmsg), + "Geo-replication" + " session between %s and %s does not exist.", + volinfo->volname, slave); + ret = -1; + goto out; + } + + if (!is_force) { + ret = gd_pause_resume_validation(type, volinfo, slave, + statefile, op_errstr); + if (ret) { + ret = glusterd_get_local_brickpaths(volinfo, &path_list); + if (!path_list && ret == -1) + goto out; + } + } + break; + + case GF_GSYNC_OPTION_TYPE_CONFIG: + if (is_template_in_use) { + snprintf(errmsg, sizeof(errmsg), + "state-file entry " + "missing in the config file(%s).", + conf_path); + ret = -1; + goto out; + } + + ret = gsync_verify_config_options(dict, op_errstr, volname); + goto out; + break; + + case GF_GSYNC_OPTION_TYPE_DELETE: + /* Check if the gsync session is still running + * If so ask the user to stop geo-replication first.*/ + if (is_template_in_use) { + snprintf(errmsg, sizeof(errmsg), + "state-file entry " + "missing in the config file(%s).", + conf_path); + ret = -1; + goto out; + } + + ret = glusterd_gsync_get_uuid(slave, volinfo, uuid); + if (ret) { + snprintf(errmsg, sizeof(errmsg), + "Geo-replication" + " session between %s and %s does not exist.", + volinfo->volname, slave); + ret = -1; + goto out; + } else { + ret = glusterd_check_gsync_running_local( + volinfo->volname, slave, conf_path, &is_running); + if (_gf_true == is_running) { + snprintf(errmsg, sizeof(errmsg), + GEOREP + " session between %s & %s is " + "still active. Please stop the " + "session and retry.", + volinfo->volname, slave); + ret = -1; + goto out; + } + } + + ret = glusterd_verify_gsyncd_spawn(volinfo->volname, slave); + if (ret) { + snprintf(errmsg, sizeof(errmsg), "Unable to spawn gsyncd"); + } + + break; + } + +out: + + if (path_list) + GF_FREE(path_list); + + if (ret && errmsg[0] != '\0') { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GSYNCD_ERROR, "%s", errmsg); + *op_errstr = gf_strdup(errmsg); + } + + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; +} + +static int +gd_pause_or_resume_gsync(dict_t *dict, char *master, char *slave, + char *slave_host, char *slave_vol, char *conf_path, + char **op_errstr, gf_boolean_t is_pause) +{ + int32_t ret = 0; + int pfd = -1; + long pid = 0; + char pidfile[PATH_MAX] = { + 0, + }; + char errmsg[PATH_MAX] = ""; + char buf[4096] = { + 0, + }; + gf_boolean_t is_template_in_use = _gf_false; + char monitor_status[NAME_MAX] = { + 0, + }; + char *statefile = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(dict); + GF_ASSERT(master); + GF_ASSERT(slave); + GF_ASSERT(slave_host); + GF_ASSERT(slave_vol); + GF_ASSERT(conf_path); + + pfd = gsyncd_getpidfile(master, slave, pidfile, conf_path, + &is_template_in_use); + if (pfd == -2) { + snprintf(errmsg, sizeof(errmsg), + "pid-file entry mising in config file and " + "template config file."); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PIDFILE_NOT_FOUND, "%s", + errmsg); + *op_errstr = gf_strdup(errmsg); + ret = -1; + goto out; + } + + if (gsync_status_byfd(pfd) == -1) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GSYNCD_ERROR, + "gsyncd b/w %s & %s is not running", master, slave); + /* monitor gsyncd already dead */ + goto out; + } + + if (pfd < 0) + goto out; + + /* Prepare to update status file*/ + ret = dict_get_str(dict, "statefile", &statefile); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Pause/Resume Failed: Unable to fetch statefile path"); + goto out; + } + ret = glusterd_gsync_read_frm_status(statefile, monitor_status, + sizeof(monitor_status)); + if (ret <= 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STAT_FILE_READ_FAILED, + "Pause/Resume Failed: " + "Unable to read status file for %s(master)" + " %s(slave)", + master, slave); + goto out; + } + + ret = sys_read(pfd, buf, sizeof(buf) - 1); + if (ret > 0) { + buf[ret] = '\0'; + pid = strtol(buf, NULL, 10); + if (is_pause) { + ret = kill(-pid, SIGSTOP); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_PID_KILL_FAIL, + "Failed" + " to pause gsyncd. Error: %s", + strerror(errno)); + goto out; + } + /*On pause force, if status is already paused + do not update status again*/ + if (strstr(monitor_status, "Paused")) + goto out; + + ret = glusterd_create_status_file(master, slave, slave_host, + slave_vol, "Paused"); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_UPDATE_STATEFILE_FAILED, + "Unable to update state_file." + " Error : %s", + strerror(errno)); + /* If status cannot be updated resume back */ + if (kill(-pid, SIGCONT)) { + snprintf(errmsg, sizeof(errmsg), + "Pause successful but could " + "not update status file. " + "Please use 'resume force' to" + " resume back and retry pause" + " to reflect in status"); + gf_msg(this->name, GF_LOG_ERROR, errno, + GD_MSG_PID_KILL_FAIL, + "Resume back Failed. Error:" + "%s", + strerror(errno)); + *op_errstr = gf_strdup(errmsg); + } + goto out; + } + } else { + ret = glusterd_create_status_file(master, slave, slave_host, + slave_vol, "Started"); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_UPDATE_STATEFILE_FAILED, + "Resume Failed: Unable to update " + "state_file. Error : %s", + strerror(errno)); + goto out; + } + ret = kill(-pid, SIGCONT); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_PID_KILL_FAIL, + "Resumed Failed: Unable to send" + " SIGCONT. Error: %s", + strerror(errno)); + /* Process can't be resumed, update status + * back to paused. */ + ret = glusterd_create_status_file(master, slave, slave_host, + slave_vol, monitor_status); + if (ret) { + snprintf(errmsg, sizeof(errmsg), + "Resume failed!!! Status " + "inconsistent. Please use " + "'resume force' to resume and" + " reach consistent state"); + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_STATUS_UPDATE_FAILED, + "Updating status back to paused" + " Failed. Error: %s", + strerror(errno)); + *op_errstr = gf_strdup(errmsg); + } + goto out; + } + } + } + ret = 0; + +out: + sys_close(pfd); + /* coverity[INTEGER_OVERFLOW] */ + return ret; +} + +static int +stop_gsync(char *master, char *slave, char **msg, char *conf_path, + char **op_errstr, gf_boolean_t is_force) +{ + int32_t ret = 0; + int pfd = -1; + long pid = 0; + char pidfile[PATH_MAX] = { + 0, + }; + char errmsg[PATH_MAX] = ""; + char buf[4096] = { + 0, + }; + int i = 0; + gf_boolean_t is_template_in_use = _gf_false; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + GF_ASSERT(this->private); + + pfd = gsyncd_getpidfile(master, slave, pidfile, conf_path, + &is_template_in_use); + if (pfd == -2) { + snprintf(errmsg, sizeof(errmsg) - 1, + "pid-file entry mising in config file and " + "template config file."); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PIDFILE_NOT_FOUND, "%s", + errmsg); + *op_errstr = gf_strdup(errmsg); + ret = -1; + goto out; + } + if (gsync_status_byfd(pfd) == -1 && !is_force) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GSYNCD_ERROR, + "gsyncd b/w %s & %s is not running", master, slave); + /* monitor gsyncd already dead */ + goto out; + } + + if (pfd < 0) + goto out; + + ret = sys_read(pfd, buf, sizeof(buf) - 1); + if (ret > 0) { + buf[ret] = '\0'; + pid = strtol(buf, NULL, 10); + ret = kill(-pid, SIGTERM); + if (ret && !is_force) { + gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_PID_KILL_FAIL, + "failed to kill gsyncd"); + goto out; + } + for (i = 0; i < 20; i++) { + if (gsync_status_byfd(pfd) == -1) { + /* monitor gsyncd is dead but worker may + * still be alive, give some more time + * before SIGKILL (hack) + */ + gf_nanosleep(50000 * GF_US_IN_NS); + break; + } + gf_nanosleep(50000 * GF_US_IN_NS); + } + kill(-pid, SIGKILL); + sys_unlink(pidfile); + } + ret = 0; + +out: + sys_close(pfd); + /* coverity[INTEGER_OVERFLOW] */ + return ret; +} + +/* + * glusterd_gsync_op_already_set: + * This function checks whether the op_value is same as in the + * gsyncd.conf file. + * + * RETURN VALUE: + * 0 : op_value matches the conf file. + * 1 : op_value does not matches the conf file or op_param not + * found in conf file. + * -1 : error + */ + +int +glusterd_gsync_op_already_set(char *master, char *slave, char *conf_path, + char *op_name, char *op_value) +{ + dict_t *confd = NULL; + char *op_val_buf = NULL; + int32_t op_val_conf = 0; + int32_t op_val_cli = 0; + int32_t ret = -1; + gf_boolean_t is_bool = _gf_true; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + confd = dict_new(); + if (!confd) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, + "Not able to create dict."); + return -1; + } + + ret = glusterd_gsync_get_config(master, slave, conf_path, confd); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GET_CONFIG_INFO_FAILED, + "Unable to get configuration data for %s(master), " + "%s(slave)", + master, slave); + goto out; + } + + ret = dict_get_param(confd, op_name, &op_val_buf); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get op_value for %s(master), %s(slave). " + "Please check gsync config file.", + master, slave); + ret = 1; + goto out; + } + + gf_msg_debug(this->name, 0, "val_cli:%s val_conf:%s", op_value, + op_val_buf); + + if (!strcmp(op_val_buf, "true") || !strcmp(op_val_buf, "1") || + !strcmp(op_val_buf, "yes")) { + op_val_conf = 1; + } else if (!strcmp(op_val_buf, "false") || !strcmp(op_val_buf, "0") || + !strcmp(op_val_buf, "no")) { + op_val_conf = 0; + } else { + is_bool = _gf_false; + } + + if (is_bool) { + if (op_value && (!strcmp(op_value, "true") || !strcmp(op_value, "1") || + !strcmp(op_value, "yes"))) { + op_val_cli = 1; + } else { + op_val_cli = 0; + } + + if (op_val_cli == op_val_conf) { + ret = 0; + goto out; + } + } else { + if (op_value && !strcmp(op_val_buf, op_value)) { + ret = 0; + goto out; + } + } + + ret = 1; + +out: + dict_unref(confd); + return ret; +} + +static int +glusterd_gsync_configure(glusterd_volinfo_t *volinfo, char *slave, + char *path_list, dict_t *dict, dict_t *resp_dict, + char **op_errstr) +{ + int32_t ret = -1; + char *op_name = NULL; + char *op_value = NULL; + runner_t runner = { + 0, + }; + glusterd_conf_t *priv = NULL; + char *subop = NULL; + char *master = NULL; + char *conf_path = NULL; + char *slave_host = NULL; + char *slave_vol = NULL; + struct stat stbuf = { + 0, + }; + gf_boolean_t restart_required = _gf_true; + char **resopt = NULL; + gf_boolean_t op_already_set = _gf_false; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + GF_ASSERT(slave); + GF_ASSERT(op_errstr); + GF_ASSERT(dict); + GF_ASSERT(resp_dict); + + ret = dict_get_str(dict, "subop", &subop); + if (ret != 0) + goto out; + + if (strcmp(subop, "get") == 0 || strcmp(subop, "get-all") == 0) { + /* deferred to cli */ + gf_msg_debug(this->name, 0, "Returning 0"); + return 0; + } + + ret = dict_get_str(dict, "op_name", &op_name); + if (ret != 0) + goto out; + + if (strtail(subop, "set")) { + ret = dict_get_str(dict, "op_value", &op_value); + if (ret != 0) + goto out; + } + + priv = THIS->private; + if (priv == NULL) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLUSTERD_PRIV_NOT_FOUND, + "priv of glusterd not present"); + *op_errstr = gf_strdup("glusterd defunct"); + goto out; + } + + ret = dict_get_str(dict, "conf_path", &conf_path); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to fetch conf file path."); + goto out; + } + + master = ""; + runinit(&runner); + runner_add_args(&runner, GSYNCD_PREFIX "/gsyncd", "-c", NULL); + runner_argprintf(&runner, "%s", conf_path); + runner_argprintf(&runner, "--iprefix=%s", DATADIR); + if (volinfo) { + master = volinfo->volname; + runner_argprintf(&runner, ":%s", master); + } + runner_add_arg(&runner, slave); + runner_argprintf(&runner, "--config-%s", subop); + runner_add_arg(&runner, op_name); + if (op_value) { + runner_argprintf(&runner, "--value=%s", op_value); + } + + if (strcmp(op_name, "checkpoint") != 0 && strtail(subop, "set")) { + ret = glusterd_gsync_op_already_set(master, slave, conf_path, op_name, + op_value); + if (ret == -1) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_GSYNCD_OP_SET_FAILED, + "glusterd_gsync_op_already_set failed."); + gf_asprintf(op_errstr, + GEOREP + " config-%s failed for " + "%s %s", + subop, master, slave); + goto out; + } + if (ret == 0) { + gf_msg_debug(this->name, 0, "op_value is already set"); + op_already_set = _gf_true; + goto out; + } + } + + synclock_unlock(&priv->big_lock); + ret = runner_run(&runner); + synclock_lock(&priv->big_lock); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_GSYNCD_ERROR, + "gsyncd failed to %s %s option for " + "%s %s peers", + subop, op_name, master, slave); + + gf_asprintf(op_errstr, GEOREP " config-%s failed for %s %s", subop, + master, slave); + + goto out; + } + + if ((!strcmp(op_name, "state_file")) && (op_value)) { + ret = sys_lstat(op_value, &stbuf); + if (ret) { + ret = dict_get_str(dict, "slave_host", &slave_host); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to fetch slave host."); + goto out; + } + + ret = dict_get_str(dict, "slave_vol", &slave_vol); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to fetch slave volume name."); + goto out; + } + + ret = glusterd_create_status_file(volinfo->volname, slave, + slave_host, slave_vol, + "Switching Status " + "File"); + if (ret || sys_lstat(op_value, &stbuf)) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "Unable to " + "create %s. Error : %s", + op_value, strerror(errno)); + ret = -1; + goto out; + } + } + } + + ret = 0; + gf_asprintf(op_errstr, "config-%s successful", subop); + +out: + if (!ret && volinfo && !op_already_set) { + for (resopt = gsync_no_restart_opts; *resopt; resopt++) { + restart_required = _gf_true; + if (!strcmp((*resopt), op_name)) { + restart_required = _gf_false; + break; + } + } + + if (restart_required) { + ret = glusterd_check_restart_gsync_session( + volinfo, slave, resp_dict, path_list, conf_path, 0); + if (ret) + *op_errstr = gf_strdup("internal error"); + } + } + + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; +} + +int +glusterd_gsync_read_frm_status(char *path, char *buf, size_t blen) +{ + int ret = 0; + int status_fd = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + GF_ASSERT(path); + GF_ASSERT(buf); + status_fd = open(path, O_RDONLY); + if (status_fd == -1) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED, + "Unable to read gsyncd status file %s", path); + return -1; + } + ret = sys_read(status_fd, buf, blen - 1); + if (ret > 0) { + size_t len = strnlen(buf, ret); + /* Ensure there is a NUL byte and that it's not the first. */ + if (len == 0 || len == blen - 1) { + ret = -1; + } else { + char *p = buf + len - 1; + while (isspace(*p)) + *p-- = '\0'; + } + } else if (ret == 0) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GSYNCD_ERROR, + "Status file of gsyncd is empty"); + else /* ret < 0 */ + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GSYNCD_ERROR, + "Status file of gsyncd is corrupt"); + + sys_close(status_fd); + return ret; +} + +static int +dict_get_param(dict_t *dict, char *key, char **param) +{ + char *dk = NULL; + char *s = NULL; + char x = '\0'; + int ret = 0; + + if (dict_get_str(dict, key, param) == 0) + return 0; + + dk = gf_strdup(key); + if (!dk) + return -1; + + s = strpbrk(dk, "-_"); + if (!s) { + ret = -1; + goto out; + } + x = (*s == '-') ? '_' : '-'; + *s++ = x; + while ((s = strpbrk(s, "-_"))) + *s++ = x; + + ret = dict_get_str(dict, dk, param); +out: + GF_FREE(dk); + return ret; +} + +int +glusterd_fetch_values_from_config(char *master, char *slave, char *confpath, + dict_t *confd, char **statefile, + char **georep_session_wrkng_dir, + char **socketfile) +{ + int ret = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + ret = glusterd_gsync_get_config(master, slave, confpath, confd); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GET_CONFIG_INFO_FAILED, + "Unable to get configuration data for %s(master), " + "%s(slave)", + master, slave); + goto out; + } + + if (statefile) { + ret = dict_get_param(confd, "state_file", statefile); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get state_file's name " + "for %s(master), %s(slave). " + "Please check gsync config file.", + master, slave); + goto out; + } + } + + if (georep_session_wrkng_dir) { + ret = dict_get_param(confd, "georep_session_working_dir", + georep_session_wrkng_dir); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get geo-rep session's " + "working directory name for %s(master), " + "%s(slave). Please check gsync config file.", + master, slave); + goto out; + } + } + + if (socketfile) { + ret = dict_get_param(confd, "state_socket_unencoded", socketfile); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get socket file's name " + "for %s(master), %s(slave). " + "Please check gsync config file.", + master, slave); + goto out; + } + } + + ret = 0; +out: + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; +} + +int +glusterd_read_status_file(glusterd_volinfo_t *volinfo, char *slave, + char *conf_path, dict_t *dict, char *node) +{ + char temp_conf_path[PATH_MAX] = ""; + char *working_conf_path = NULL; + char *georep_session_wrkng_dir = NULL; + char *master = NULL; + char sts_val_name[1024] = ""; + char monitor_status[NAME_MAX] = ""; + char *statefile = NULL; + char *socketfile = NULL; + dict_t *confd = NULL; + char *slavekey = NULL; + char *slaveentry = NULL; + char *slaveuser = NULL; + char *saveptr = NULL; + char *temp = NULL; + char *temp_inp = NULL; + char *brick_host_uuid = NULL; + int brick_host_uuid_length = 0; + int gsync_count = 0; + int ret = 0; + glusterd_brickinfo_t *brickinfo = NULL; + gf_gsync_status_t *sts_val = NULL; + gf_boolean_t is_template_in_use = _gf_false; + glusterd_conf_t *priv = NULL; + struct stat stbuf = { + 0, + }; + xlator_t *this = NULL; + int32_t len = 0; + + this = THIS; + GF_ASSERT(this); + + GF_ASSERT(this->private); + GF_ASSERT(volinfo); + GF_ASSERT(conf_path); + + master = volinfo->volname; + + confd = dict_new(); + if (!confd) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, + "Not able to create dict."); + return -1; + } + + priv = THIS->private; + + len = snprintf(temp_conf_path, sizeof(temp_conf_path), + "%s/" GSYNC_CONF_TEMPLATE, priv->workdir); + if ((len < 0) || (len >= sizeof(temp_conf_path))) { + return -1; + } + + ret = sys_lstat(conf_path, &stbuf); + if (!ret) { + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_CONFIG_INFO, + "Using passed config template(%s).", conf_path); + working_conf_path = conf_path; + } else { + gf_msg(this->name, GF_LOG_WARNING, ENOENT, GD_MSG_FILE_OP_FAILED, + "Config file (%s) missing. Looking for template " + "config file (%s)", + conf_path, temp_conf_path); + ret = sys_lstat(temp_conf_path, &stbuf); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, ENOENT, GD_MSG_FILE_OP_FAILED, + "Template " + "config file (%s) missing.", + temp_conf_path); + goto out; + } + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_DEFAULT_TEMP_CONFIG, + "Using default config template(%s).", temp_conf_path); + working_conf_path = temp_conf_path; + is_template_in_use = _gf_true; + } + +fetch_data: + ret = glusterd_fetch_values_from_config( + master, slave, working_conf_path, confd, &statefile, + &georep_session_wrkng_dir, &socketfile); + if (ret) { + if (is_template_in_use == _gf_false) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FETCH_CONFIG_VAL_FAILED, + "Unable to fetch config values " + "for %s(master), %s(slave). " + "Trying default config template", + master, slave); + working_conf_path = temp_conf_path; + is_template_in_use = _gf_true; + goto fetch_data; + } else { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FETCH_CONFIG_VAL_FAILED, + "Unable to " + "fetch config values for %s(master), " + "%s(slave)", + master, slave); + goto out; + } + } + + ret = glusterd_gsync_read_frm_status(statefile, monitor_status, + sizeof(monitor_status)); + if (ret <= 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STAT_FILE_READ_FAILED, + "Unable to read the status file for %s(master), " + "%s(slave) statefile: %s", + master, slave, statefile); + snprintf(monitor_status, sizeof(monitor_status), "defunct"); + } + + ret = dict_get_int32(dict, "gsync-count", &gsync_count); + if (ret) + gsync_count = 0; + + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + if (gf_uuid_compare(brickinfo->uuid, MY_UUID)) + continue; + + sts_val = GF_CALLOC(1, sizeof(gf_gsync_status_t), + gf_common_mt_gsync_status_t); + if (!sts_val) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "Out Of Memory"); + goto out; + } + + /* Slave Key */ + ret = glusterd_get_slave(volinfo, slave, &slavekey); + if (ret < 0) { + GF_FREE(sts_val); + goto out; + } + memcpy(sts_val->slavekey, slavekey, strlen(slavekey)); + sts_val->slavekey[strlen(slavekey)] = '\0'; + + /* Master Volume */ + memcpy(sts_val->master, master, strlen(master)); + sts_val->master[strlen(master)] = '\0'; + + /* Master Brick Node */ + memcpy(sts_val->node, brickinfo->hostname, strlen(brickinfo->hostname)); + sts_val->node[strlen(brickinfo->hostname)] = '\0'; + + /* Master Brick Path */ + memcpy(sts_val->brick, brickinfo->path, strlen(brickinfo->path)); + sts_val->brick[strlen(brickinfo->path)] = '\0'; + + /* Brick Host UUID */ + brick_host_uuid = uuid_utoa(brickinfo->uuid); + brick_host_uuid_length = strlen(brick_host_uuid); + memcpy(sts_val->brick_host_uuid, brick_host_uuid, + brick_host_uuid_length); + sts_val->brick_host_uuid[brick_host_uuid_length] = '\0'; + + /* Slave */ + memcpy(sts_val->slave, slave, strlen(slave)); + sts_val->slave[strlen(slave)] = '\0'; + + snprintf(sts_val->slave_node, sizeof(sts_val->slave_node), "N/A"); + + snprintf(sts_val->worker_status, sizeof(sts_val->worker_status), "N/A"); + + snprintf(sts_val->crawl_status, sizeof(sts_val->crawl_status), "N/A"); + + snprintf(sts_val->last_synced, sizeof(sts_val->last_synced), "N/A"); + + snprintf(sts_val->last_synced_utc, sizeof(sts_val->last_synced_utc), + "N/A"); + + snprintf(sts_val->entry, sizeof(sts_val->entry), "N/A"); + + snprintf(sts_val->data, sizeof(sts_val->data), "N/A"); + + snprintf(sts_val->meta, sizeof(sts_val->meta), "N/A"); + + snprintf(sts_val->failures, sizeof(sts_val->failures), "N/A"); + + snprintf(sts_val->checkpoint_time, sizeof(sts_val->checkpoint_time), + "N/A"); + + snprintf(sts_val->checkpoint_time_utc, + sizeof(sts_val->checkpoint_time_utc), "N/A"); + + snprintf(sts_val->checkpoint_completed, + sizeof(sts_val->checkpoint_completed), "N/A"); + + snprintf(sts_val->checkpoint_completion_time, + sizeof(sts_val->checkpoint_completion_time), "N/A"); + + snprintf(sts_val->checkpoint_completion_time_utc, + sizeof(sts_val->checkpoint_completion_time_utc), "N/A"); + + /* Get all the other values from Gsyncd */ + ret = glusterd_gsync_get_status(master, slave, conf_path, + brickinfo->path, sts_val); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GET_STATUS_DATA_FAIL, + "Unable to get status data " + "for %s(master), %s(slave), %s(brick)", + master, slave, brickinfo->path); + ret = -1; + goto out; + } + + if (is_template_in_use) { + snprintf(sts_val->worker_status, sizeof(sts_val->worker_status), + "Config Corrupted"); + } + + ret = dict_get_str(volinfo->gsync_slaves, slavekey, &slaveentry); + if (ret < 0) { + GF_FREE(sts_val); + goto out; + } + + memcpy(sts_val->session_slave, slaveentry, strlen(slaveentry)); + sts_val->session_slave[strlen(slaveentry)] = '\0'; + + temp_inp = gf_strdup(slaveentry); + if (!temp_inp) + goto out; + + if (strstr(temp_inp, "@") == NULL) { + slaveuser = "root"; + } else { + temp = strtok_r(temp_inp, "//", &saveptr); + temp = strtok_r(NULL, "/", &saveptr); + slaveuser = strtok_r(temp, "@", &saveptr); + } + memcpy(sts_val->slave_user, slaveuser, strlen(slaveuser)); + sts_val->slave_user[strlen(slaveuser)] = '\0'; + + snprintf(sts_val_name, sizeof(sts_val_name), "status_value%d", + gsync_count); + ret = dict_set_bin(dict, sts_val_name, sts_val, + sizeof(gf_gsync_status_t)); + if (ret) { + GF_FREE(sts_val); + goto out; + } + + gsync_count++; + sts_val = NULL; + } + + ret = dict_set_int32(dict, "gsync-count", gsync_count); + if (ret) + goto out; + +out: + GF_FREE(temp_inp); + dict_unref(confd); + + return 0; +} + +int +glusterd_check_restart_gsync_session(glusterd_volinfo_t *volinfo, char *slave, + dict_t *resp_dict, char *path_list, + char *conf_path, gf_boolean_t is_force) +{ + int ret = 0; + glusterd_conf_t *priv = NULL; + char *status_msg = NULL; + gf_boolean_t is_running = _gf_false; + char *op_errstr = NULL; + char *key = NULL; + xlator_t *this = NULL; + + GF_ASSERT(volinfo); + GF_ASSERT(slave); + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + key = slave; + + ret = glusterd_check_gsync_running_local(volinfo->volname, slave, conf_path, + &is_running); + if (!ret && (_gf_true != is_running)) + /* gsynd not running, nothing to do */ + goto out; + + ret = stop_gsync(volinfo->volname, slave, &status_msg, conf_path, + &op_errstr, is_force); + if (ret == 0 && status_msg) + ret = dict_set_str(resp_dict, "gsync-status", status_msg); + if (ret == 0) { + dict_del(volinfo->gsync_active_slaves, key); + ret = glusterd_start_gsync(volinfo, slave, path_list, conf_path, + uuid_utoa(MY_UUID), NULL, _gf_false); + if (!ret) { + /* Add slave to the dict indicating geo-rep session is + * running.*/ + ret = dict_set_dynstr_with_alloc(volinfo->gsync_active_slaves, key, + "running"); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to set" + " key:%s value:running in dict. But " + "the config succeeded.", + key); + goto out; + } + } + } + +out: + gf_msg_debug(this->name, 0, "Returning %d", ret); + if (op_errstr) + GF_FREE(op_errstr); + return ret; +} + +static int32_t +glusterd_marker_changelog_create_volfile(glusterd_volinfo_t *volinfo) +{ + int32_t ret = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + ret = glusterd_create_volfiles_and_notify_services(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL, + "Unable to create volfile for setting of marker " + "while '" GEOREP " start'"); + ret = -1; + goto out; + } + + ret = glusterd_store_volinfo(volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) + goto out; + + if (GLUSTERD_STATUS_STARTED == volinfo->status) { + ret = glusterd_svcs_manager(volinfo); + goto out; + } + ret = 0; +out: + return ret; +} + +static int +glusterd_set_gsync_knob(glusterd_volinfo_t *volinfo, char *key, int *vc) +{ + int ret = -1; + int conf_enabled = _gf_false; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + GF_ASSERT(this->private); + + conf_enabled = glusterd_volinfo_get_boolean(volinfo, key); + if (conf_enabled == -1) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GET_KEY_FAILED, + "failed to get key %s from volinfo", key); + goto out; + } + + ret = 0; + if (conf_enabled == _gf_false) { + *vc = 1; + ret = glusterd_gsync_volinfo_dict_set(volinfo, key, "on"); + } + +out: + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; +} + +static int +glusterd_set_gsync_confs(glusterd_volinfo_t *volinfo) +{ + int ret = -1; + int volfile_changed = 0; + + ret = glusterd_set_gsync_knob(volinfo, VKEY_MARKER_XTIME, &volfile_changed); + if (ret) + goto out; + + /** + * enable ignore-pid-check blindly as it could be needed for + * cascading setups. + */ + ret = glusterd_set_gsync_knob(volinfo, VKEY_MARKER_XTIME_FORCE, + &volfile_changed); + if (ret) + goto out; + + ret = glusterd_set_gsync_knob(volinfo, VKEY_CHANGELOG, &volfile_changed); + if (ret) + goto out; + + if (volfile_changed) + ret = glusterd_marker_changelog_create_volfile(volinfo); + +out: + return ret; +} + +static int +glusterd_get_gsync_status_mst_slv(glusterd_volinfo_t *volinfo, char *slave, + char *conf_path, dict_t *rsp_dict, char *node) +{ + char *statefile = NULL; + uuid_t uuid = { + 0, + }; + int ret = 0; + gf_boolean_t is_template_in_use = _gf_false; + struct stat stbuf = { + 0, + }; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + GF_ASSERT(volinfo); + GF_ASSERT(slave); + GF_ASSERT(this->private); + + ret = glusterd_gsync_get_uuid(slave, volinfo, uuid); + if (ret) { + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_SESSION_INACTIVE, + "geo-replication status %s %s : session is not " + "active", + volinfo->volname, slave); + + ret = glusterd_get_statefile_name(volinfo, slave, conf_path, &statefile, + &is_template_in_use); + if (ret) { + if (!strstr(slave, "::")) + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_SLAVE_URL_INVALID, + "%s is not a valid slave url.", slave); + else + gf_msg(this->name, GF_LOG_INFO, 0, + GD_MSG_GET_STATEFILE_NAME_FAILED, + "Unable to get statefile's name"); + ret = 0; + goto out; + } + + ret = sys_lstat(statefile, &stbuf); + if (ret) { + gf_msg(this->name, GF_LOG_INFO, ENOENT, GD_MSG_FILE_OP_FAILED, + "%s statefile not present.", statefile); + ret = 0; + goto out; + } + } + + ret = glusterd_read_status_file(volinfo, slave, conf_path, rsp_dict, node); +out: + if (statefile) + GF_FREE(statefile); + + gf_msg_debug(this->name, 0, "Returning with %d", ret); + return ret; +} + +int +glusterd_get_gsync_status_mst(glusterd_volinfo_t *volinfo, dict_t *rsp_dict, + char *node) +{ + glusterd_gsync_status_temp_t param = { + 0, + }; + + GF_ASSERT(volinfo); + + param.rsp_dict = rsp_dict; + param.volinfo = volinfo; + param.node = node; + dict_foreach(volinfo->gsync_slaves, _get_status_mst_slv, ¶m); + + return 0; +} + +static int +glusterd_get_gsync_status_all(dict_t *rsp_dict, char *node) +{ + int32_t ret = 0; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + + GF_ASSERT(priv); + + cds_list_for_each_entry(volinfo, &priv->volumes, vol_list) + { + ret = glusterd_get_gsync_status_mst(volinfo, rsp_dict, node); + if (ret) + goto out; + } + +out: + gf_msg_debug(this->name, 0, "Returning with %d", ret); + return ret; +} + +static int +glusterd_get_gsync_status(dict_t *dict, char **op_errstr, dict_t *rsp_dict) +{ + char *slave = NULL; + char *volname = NULL; + char *conf_path = NULL; + char errmsg[PATH_MAX] = { + 0, + }; + glusterd_volinfo_t *volinfo = NULL; + int ret = 0; + char my_hostname[256] = { + 0, + }; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + ret = gethostname(my_hostname, 256); + if (ret) { + /* stick to N/A */ + (void)strcpy(my_hostname, "N/A"); + } + + ret = dict_get_str(dict, "master", &volname); + if (ret < 0) { + ret = glusterd_get_gsync_status_all(rsp_dict, my_hostname); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOL_NOT_FOUND, + "volume name does not exist"); + snprintf(errmsg, sizeof(errmsg), + "Volume name %s does not" + " exist", + volname); + *op_errstr = gf_strdup(errmsg); + goto out; + } + + ret = dict_get_str(dict, "slave", &slave); + if (ret < 0) { + ret = glusterd_get_gsync_status_mst(volinfo, rsp_dict, my_hostname); + goto out; + } + + ret = dict_get_str(dict, "conf_path", &conf_path); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to fetch conf file path."); + goto out; + } + + ret = glusterd_get_gsync_status_mst_slv(volinfo, slave, conf_path, rsp_dict, + my_hostname); + +out: + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; +} + +static int +glusterd_gsync_delete(glusterd_volinfo_t *volinfo, char *slave, + char *slave_host, char *slave_vol, char *path_list, + dict_t *dict, dict_t *resp_dict, char **op_errstr) +{ + int32_t ret = -1; + runner_t runner = { + 0, + }; + glusterd_conf_t *priv = NULL; + char *master = NULL; + char *gl_workdir = NULL; + char geo_rep_dir[PATH_MAX] = ""; + char *conf_path = NULL; + xlator_t *this = NULL; + uint32_t reset_sync_time = _gf_false; + + this = THIS; + GF_ASSERT(this); + + GF_ASSERT(slave); + GF_ASSERT(slave_host); + GF_ASSERT(slave_vol); + GF_ASSERT(op_errstr); + GF_ASSERT(dict); + GF_ASSERT(resp_dict); + + if (THIS) + priv = THIS->private; + if (priv == NULL) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLUSTERD_PRIV_NOT_FOUND, + "priv of glusterd not present"); + *op_errstr = gf_strdup("glusterd defunct"); + goto out; + } + + ret = dict_get_str(dict, "conf_path", &conf_path); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to fetch conf file path."); + goto out; + } + + gl_workdir = priv->workdir; + master = ""; + runinit(&runner); + runner_add_args(&runner, GSYNCD_PREFIX "/gsyncd", "--delete", "-c", NULL); + runner_argprintf(&runner, "%s", conf_path); + runner_argprintf(&runner, "--iprefix=%s", DATADIR); + + runner_argprintf(&runner, "--path-list=%s", path_list); + + ret = dict_get_uint32(dict, "reset-sync-time", &reset_sync_time); + if (!ret && reset_sync_time) { + runner_add_args(&runner, "--reset-sync-time", NULL); + } + + if (volinfo) { + master = volinfo->volname; + runner_argprintf(&runner, ":%s", master); + } + runner_add_arg(&runner, slave); + runner_redir(&runner, STDOUT_FILENO, RUN_PIPE); + synclock_unlock(&priv->big_lock); + ret = runner_run(&runner); + synclock_lock(&priv->big_lock); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SESSION_DEL_FAILED, + "gsyncd failed to delete session info for %s and " + "%s peers", + master, slave); + + gf_asprintf(op_errstr, + "gsyncd failed to " + "delete session info for %s and %s peers", + master, slave); + + goto out; + } + + ret = snprintf(geo_rep_dir, sizeof(geo_rep_dir) - 1, + "%s/" GEOREP "/%s_%s_%s", gl_workdir, volinfo->volname, + slave_host, slave_vol); + geo_rep_dir[ret] = '\0'; + + ret = sys_rmdir(geo_rep_dir); + if (ret) { + if (errno == ENOENT) + gf_msg_debug(this->name, 0, "Geo Rep Dir(%s) Not Present.", + geo_rep_dir); + else { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED, + "Unable to delete Geo Rep Dir(%s). Error: %s", geo_rep_dir, + strerror(errno)); + goto out; + } + } + + ret = 0; + + gf_asprintf(op_errstr, "delete successful"); + +out: + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; +} + +int +glusterd_op_sys_exec(dict_t *dict, char **op_errstr, dict_t *rsp_dict) +{ + char buf[PATH_MAX] = ""; + char cmd_arg_name[PATH_MAX] = ""; + char output_name[PATH_MAX] = ""; + char errmsg[PATH_MAX] = ""; + char *ptr = NULL; + char *bufp = NULL; + char *command = NULL; + char **cmd_args = NULL; + int ret = -1; + int i = -1; + int cmd_args_count = 0; + int output_count = 0; + glusterd_conf_t *priv = NULL; + runner_t runner = { + 0, + }; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + GF_ASSERT(dict); + GF_ASSERT(op_errstr); + GF_ASSERT(rsp_dict); + + if (THIS) + priv = THIS->private; + if (priv == NULL) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLUSTERD_PRIV_NOT_FOUND, + "priv of glusterd not present"); + *op_errstr = gf_strdup("glusterd defunct"); + goto out; + } + + ret = dict_get_str(dict, "command", &command); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get command from dict"); + goto out; + } + + ret = dict_get_int32(dict, "cmd_args_count", &cmd_args_count); + if (ret) + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_DICT_GET_FAILED, + "No cmd_args_count"); + + if (cmd_args_count) { + cmd_args = GF_CALLOC(cmd_args_count, sizeof(char *), gf_common_mt_char); + if (!cmd_args) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "Unable to calloc. Errno = %s", strerror(errno)); + goto out; + } + + for (i = 1; i <= cmd_args_count; i++) { + snprintf(cmd_arg_name, sizeof(cmd_arg_name), "cmd_arg_%d", i); + ret = dict_get_str(dict, cmd_arg_name, &cmd_args[i - 1]); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get" + " %s in dict", + cmd_arg_name); + goto out; + } + } + } + + runinit(&runner); + runner_argprintf(&runner, GSYNCD_PREFIX "/peer_%s", command); + for (i = 0; i < cmd_args_count; i++) + runner_add_arg(&runner, cmd_args[i]); + runner_redir(&runner, STDOUT_FILENO, RUN_PIPE); + synclock_unlock(&priv->big_lock); + ret = runner_start(&runner); + if (ret == -1) { + snprintf(errmsg, sizeof(errmsg), + "Unable to " + "execute command. Error : %s", + strerror(errno)); + *op_errstr = gf_strdup(errmsg); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CMD_EXEC_FAIL, "%s", errmsg); + ret = -1; + synclock_lock(&priv->big_lock); + goto out; + } + + do { + ptr = fgets(buf, sizeof(buf), runner_chio(&runner, STDOUT_FILENO)); + if (ptr) { + ret = dict_get_int32(rsp_dict, "output_count", &output_count); + if (ret) + output_count = 1; + else + output_count++; + snprintf(output_name, sizeof(output_name), "output_%d", + output_count); + if (buf[strlen(buf) - 1] == '\n') + buf[strlen(buf) - 1] = '\0'; + bufp = gf_strdup(buf); + if (!bufp) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STRDUP_FAILED, + "gf_strdup failed."); + ret = dict_set_dynstr(rsp_dict, output_name, bufp); + if (ret) { + GF_FREE(bufp); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "output set " + "failed."); + } + ret = dict_set_int32(rsp_dict, "output_count", output_count); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "output_count " + "set failed."); + } + } while (ptr); + + ret = runner_end(&runner); + if (ret) { + snprintf(errmsg, sizeof(errmsg), + "Unable to " + "end. Error : %s", + strerror(errno)); + *op_errstr = gf_strdup(errmsg); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UNABLE_TO_END, "%s", errmsg); + ret = -1; + synclock_lock(&priv->big_lock); + goto out; + } + synclock_lock(&priv->big_lock); + + ret = 0; +out: + if (cmd_args) { + GF_FREE(cmd_args); + cmd_args = NULL; + } + + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; +} + +int +glusterd_op_copy_file(dict_t *dict, char **op_errstr) +{ + char abs_filename[PATH_MAX] = ""; + char errmsg[PATH_MAX] = ""; + char *filename = NULL; + char *host_uuid = NULL; + char uuid_str[64] = {0}; + char *contents = NULL; + char buf[4096] = ""; + int ret = -1; + int fd = -1; + int bytes_writen = 0; + int bytes_read = 0; + int contents_size = -1; + int file_mode = -1; + glusterd_conf_t *priv = NULL; + struct stat stbuf = { + 0, + }; + gf_boolean_t free_contents = _gf_true; + xlator_t *this = NULL; + int32_t len = 0; + + this = THIS; + GF_ASSERT(this); + + if (THIS) + priv = THIS->private; + if (priv == NULL) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLUSTERD_PRIV_NOT_FOUND, + "priv of glusterd not present"); + *op_errstr = gf_strdup("glusterd defunct"); + goto out; + } + + ret = dict_get_str(dict, "host-uuid", &host_uuid); + if (ret < 0) + goto out; + + ret = dict_get_str(dict, "source", &filename); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to fetch filename from dict."); + *op_errstr = gf_strdup("command unsuccessful"); + goto out; + } + len = snprintf(abs_filename, sizeof(abs_filename), "%s/%s", priv->workdir, + filename); + if ((len < 0) || (len >= sizeof(abs_filename))) { + ret = -1; + goto out; + } + + uuid_utoa_r(MY_UUID, uuid_str); + if (!strcmp(uuid_str, host_uuid)) { + ret = sys_lstat(abs_filename, &stbuf); + if (ret) { + len = snprintf(errmsg, sizeof(errmsg), + "Source file " + "does not exist in %s", + priv->workdir); + if (len < 0) { + strcpy(errmsg, "<error>"); + } + *op_errstr = gf_strdup(errmsg); + gf_msg(this->name, GF_LOG_ERROR, ENOENT, GD_MSG_FILE_OP_FAILED, + "%s", errmsg); + goto out; + } + + contents = GF_CALLOC(1, stbuf.st_size + 1, gf_common_mt_char); + if (!contents) { + snprintf(errmsg, sizeof(errmsg), "Unable to allocate memory"); + *op_errstr = gf_strdup(errmsg); + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, "%s", + errmsg); + ret = -1; + goto out; + } + + fd = open(abs_filename, O_RDONLY); + if (fd < 0) { + len = snprintf(errmsg, sizeof(errmsg), "Unable to open %s", + abs_filename); + if (len < 0) { + strcpy(errmsg, "<error>"); + } + *op_errstr = gf_strdup(errmsg); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED, "%s", + errmsg); + ret = -1; + goto out; + } + + do { + ret = sys_read(fd, buf, sizeof(buf) - 1); + if (ret > 0) { + buf[ret] = '\0'; + memcpy(contents + bytes_read, buf, ret); + bytes_read += ret; + } + } while (ret > 0); + + if (bytes_read != stbuf.st_size) { + len = snprintf(errmsg, sizeof(errmsg), + "Unable to read all the data from %s", abs_filename); + if (len < 0) { + strcpy(errmsg, "<error>"); + } + *op_errstr = gf_strdup(errmsg); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_READ_ERROR, "%s", + errmsg); + ret = -1; + goto out; + } + + ret = dict_set_int32(dict, "contents_size", stbuf.st_size); + if (ret) { + snprintf(errmsg, sizeof(errmsg), + "Unable to set" + " contents size in dict."); + *op_errstr = gf_strdup(errmsg); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "%s", + errmsg); + goto out; + } + + ret = dict_set_int32(dict, "file_mode", (int32_t)stbuf.st_mode); + if (ret) { + snprintf(errmsg, sizeof(errmsg), + "Unable to set" + " file mode in dict."); + *op_errstr = gf_strdup(errmsg); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "%s", + errmsg); + goto out; + } + + ret = dict_set_bin(dict, "common_pem_contents", contents, + stbuf.st_size); + if (ret) { + snprintf(errmsg, sizeof(errmsg), + "Unable to set" + " pem contents in dict."); + *op_errstr = gf_strdup(errmsg); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "%s", + errmsg); + goto out; + } + free_contents = _gf_false; + } else { + free_contents = _gf_false; + ret = dict_get_bin(dict, "common_pem_contents", (void **)&contents); + if (ret) { + snprintf(errmsg, sizeof(errmsg), + "Unable to get" + " pem contents in dict."); + *op_errstr = gf_strdup(errmsg); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", + errmsg); + goto out; + } + ret = dict_get_int32(dict, "contents_size", &contents_size); + if (ret) { + snprintf(errmsg, sizeof(errmsg), + "Unable to set" + " contents size in dict."); + *op_errstr = gf_strdup(errmsg); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", + errmsg); + goto out; + } + + ret = dict_get_int32(dict, "file_mode", &file_mode); + if (ret) { + snprintf(errmsg, sizeof(errmsg), + "Unable to get" + " file mode in dict."); + *op_errstr = gf_strdup(errmsg); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", + errmsg); + goto out; + } + + fd = open(abs_filename, O_WRONLY | O_TRUNC | O_CREAT, 0600); + if (fd < 0) { + len = snprintf(errmsg, sizeof(errmsg), "Unable to open %s", + abs_filename); + if (len < 0) { + strcpy(errmsg, "<error>"); + } + *op_errstr = gf_strdup(errmsg); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED, "%s", + errmsg); + ret = -1; + goto out; + } + + bytes_writen = sys_write(fd, contents, contents_size); + + if (bytes_writen != contents_size) { + len = snprintf(errmsg, sizeof(errmsg), "Failed to write to %s", + abs_filename); + if (len < 0) { + strcpy(errmsg, "<error>"); + } + *op_errstr = gf_strdup(errmsg); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED, "%s", + errmsg); + ret = -1; + goto out; + } + + sys_fchmod(fd, file_mode); + } + + ret = 0; +out: + if (fd != -1) + sys_close(fd); + + if (free_contents) + GF_FREE(contents); + + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; +} + +int +glusterd_op_gsync_set(dict_t *dict, char **op_errstr, dict_t *rsp_dict) +{ + int32_t ret = -1; + int32_t type = -1; + char *host_uuid = NULL; + char *slave = NULL; + char *slave_url = NULL; + char *slave_vol = NULL; + char *slave_host = NULL; + char *volname = NULL; + char *path_list = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_conf_t *priv = NULL; + gf_boolean_t is_force = _gf_false; + char *status_msg = NULL; + gf_boolean_t is_running = _gf_false; + char *conf_path = NULL; + char *key = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + GF_ASSERT(dict); + GF_ASSERT(op_errstr); + GF_ASSERT(rsp_dict); + + ret = dict_get_int32(dict, "type", &type); + if (ret < 0) + goto out; + + ret = dict_get_str(dict, "host-uuid", &host_uuid); + if (ret < 0) + goto out; + + if (type == GF_GSYNC_OPTION_TYPE_STATUS) { + ret = glusterd_get_gsync_status(dict, op_errstr, rsp_dict); + goto out; + } + + ret = dict_get_str(dict, "slave", &slave); + if (ret < 0) + goto out; + + key = slave; + + ret = dict_get_str(dict, "slave_url", &slave_url); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to fetch slave url."); + goto out; + } + + ret = dict_get_str(dict, "slave_host", &slave_host); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to fetch slave hostname."); + goto out; + } + + ret = dict_get_str(dict, "slave_vol", &slave_vol); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to fetch slave volume name."); + goto out; + } + + ret = dict_get_str(dict, "conf_path", &conf_path); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to fetch conf file path."); + goto out; + } + + if (dict_get_str(dict, "master", &volname) == 0) { + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED, + "Volinfo for" + " %s (master) not found", + volname); + goto out; + } + + ret = glusterd_get_local_brickpaths(volinfo, &path_list); + if (!path_list && ret == -1) + goto out; + } + + if (type == GF_GSYNC_OPTION_TYPE_CONFIG) { + ret = glusterd_gsync_configure(volinfo, slave, path_list, dict, + rsp_dict, op_errstr); + if (!ret) { + ret = dict_set_str(rsp_dict, "conf_path", conf_path); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to store conf_file_path."); + goto out; + } + } + goto out; + } + + if (type == GF_GSYNC_OPTION_TYPE_DELETE) { + ret = glusterd_remove_slave_in_info(volinfo, slave, op_errstr); + if (ret && !is_force && path_list) + goto out; + + ret = glusterd_gsync_delete(volinfo, slave, slave_host, slave_vol, + path_list, dict, rsp_dict, op_errstr); + goto out; + } + + if (!volinfo) { + ret = -1; + goto out; + } + + is_force = dict_get_str_boolean(dict, "force", _gf_false); + + if (type == GF_GSYNC_OPTION_TYPE_START) { + /* Add slave to the dict indicating geo-rep session is running*/ + ret = dict_set_dynstr_with_alloc(volinfo->gsync_active_slaves, key, + "running"); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to set key:%s" + " value:running in the dict", + key); + goto out; + } + + /* If slave volume uuid is not present in gsync_slaves + * update it*/ + ret = glusterd_update_slave_voluuid_slaveinfo(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REMOTE_VOL_UUID_FAIL, + "Error in updating" + " slave volume uuid for old slave info"); + goto out; + } + + ret = glusterd_start_gsync(volinfo, slave, path_list, conf_path, + host_uuid, op_errstr, _gf_false); + + /* Delete added slave in the dict if start fails*/ + if (ret) + dict_del(volinfo->gsync_active_slaves, key); + } + + if (type == GF_GSYNC_OPTION_TYPE_STOP || + type == GF_GSYNC_OPTION_TYPE_PAUSE || + type == GF_GSYNC_OPTION_TYPE_RESUME) { + ret = glusterd_check_gsync_running_local(volinfo->volname, slave, + conf_path, &is_running); + if (!ret && !is_force && path_list && (_gf_true != is_running)) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_GSYNCD_OP_SET_FAILED, + GEOREP + " is not " + "set up for %s(master) and %s(slave)", + volname, slave); + *op_errstr = gf_strdup(GEOREP " is not set up"); + goto out; + } + + if (type == GF_GSYNC_OPTION_TYPE_PAUSE) { + ret = gd_pause_or_resume_gsync(dict, volname, slave, slave_host, + slave_vol, conf_path, op_errstr, + _gf_true); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PAUSE_FAILED, + GEOREP " Pause Failed"); + else + dict_del(volinfo->gsync_active_slaves, key); + + } else if (type == GF_GSYNC_OPTION_TYPE_RESUME) { + /* Add slave to the dict indicating geo-rep session is + * running*/ + ret = dict_set_dynstr_with_alloc(volinfo->gsync_active_slaves, key, + "running"); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to set " + "key:%s value:running in dict", + key); + goto out; + } + + ret = gd_pause_or_resume_gsync(dict, volname, slave, slave_host, + slave_vol, conf_path, op_errstr, + _gf_false); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RESUME_FAILED, + GEOREP " Resume Failed"); + dict_del(volinfo->gsync_active_slaves, key); + } + } else { + ret = stop_gsync(volname, slave, &status_msg, conf_path, op_errstr, + is_force); + + if (ret == 0 && status_msg) + ret = dict_set_str(rsp_dict, "gsync-status", status_msg); + if (!ret) { + ret = glusterd_create_status_file( + volinfo->volname, slave, slave_host, slave_vol, "Stopped"); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_UPDATE_STATEFILE_FAILED, + "Unable to update state_file. " + "Error : %s", + strerror(errno)); + } + dict_del(volinfo->gsync_active_slaves, key); + } + } + } + +out: + if (path_list) { + GF_FREE(path_list); + path_list = NULL; + } + + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; +} + +int +glusterd_get_slave_details_confpath(glusterd_volinfo_t *volinfo, dict_t *dict, + char **slave_url, char **slave_host, + char **slave_vol, char **conf_path, + char **op_errstr) +{ + int ret = -1; + char confpath[PATH_MAX] = ""; + glusterd_conf_t *priv = NULL; + char *slave = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + GF_ASSERT(priv); + + ret = dict_get_str(dict, "slave", &slave); + if (ret || !slave) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to fetch slave from dict"); + ret = -1; + goto out; + } + + ret = glusterd_get_slave_info(slave, slave_url, slave_host, slave_vol, + op_errstr); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVEINFO_FETCH_ERROR, + "Unable to fetch slave details."); + ret = -1; + goto out; + } + + ret = dict_set_str(dict, "slave_url", *slave_url); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to store slave IP."); + goto out; + } + + ret = dict_set_str(dict, "slave_host", *slave_host); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to store slave hostname"); + goto out; + } + + ret = dict_set_str(dict, "slave_vol", *slave_vol); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to store slave volume name."); + goto out; + } + + ret = snprintf(confpath, sizeof(confpath) - 1, + "%s/" GEOREP "/%s_%s_%s/gsyncd.conf", priv->workdir, + volinfo->volname, *slave_host, *slave_vol); + confpath[ret] = '\0'; + *conf_path = gf_strdup(confpath); + if (!(*conf_path)) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED, + "Unable to gf_strdup. Error: %s", strerror(errno)); + ret = -1; + goto out; + } + + ret = dict_set_str(dict, "conf_path", *conf_path); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to store conf_path"); + goto out; + } + +out: + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; +} + +int +glusterd_get_slave_info(char *slave, char **slave_url, char **hostname, + char **slave_vol, char **op_errstr) +{ + char *tmp = NULL; + char *save_ptr = NULL; + char **linearr = NULL; + int32_t ret = -1; + char errmsg[PATH_MAX] = ""; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + ret = glusterd_urltransform_single(slave, "normalize", &linearr); + if ((ret == -1) || (linearr[0] == NULL)) { + ret = snprintf(errmsg, sizeof(errmsg) - 1, "Invalid Url: %s", slave); + errmsg[ret] = '\0'; + *op_errstr = gf_strdup(errmsg); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_NORMALIZE_URL_FAIL, + "Failed to normalize url"); + goto out; + } + + tmp = strtok_r(linearr[0], "/", &save_ptr); + tmp = strtok_r(NULL, "/", &save_ptr); + slave = NULL; + if (tmp != NULL) { + slave = strtok_r(tmp, ":", &save_ptr); + } + if (slave) { + ret = glusterd_geo_rep_parse_slave(slave, hostname, op_errstr); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVE_URL_INVALID, + "Invalid slave url: %s", *op_errstr); + goto out; + } + gf_msg_debug(this->name, 0, "Hostname : %s", *hostname); + + *slave_url = gf_strdup(slave); + if (!*slave_url) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STRDUP_FAILED, + "Failed to gf_strdup"); + ret = -1; + goto out; + } + gf_msg_debug(this->name, 0, "Slave URL : %s", *slave_url); + ret = 0; + } else { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, + "Invalid slave name"); + goto out; + } + + slave = strtok_r(NULL, ":", &save_ptr); + if (slave) { + *slave_vol = gf_strdup(slave); + if (!*slave_vol) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STRDUP_FAILED, + "Failed to gf_strdup"); + ret = -1; + GF_FREE(*slave_url); + goto out; + } + gf_msg_debug(this->name, 0, "Slave Vol : %s", *slave_vol); + ret = 0; + } else { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, + "Invalid slave name"); + goto out; + } + +out: + if (linearr) + glusterd_urltransform_free(linearr, 1); + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; +} + +static void +runinit_gsyncd_setrx(runner_t *runner, char *conf_path) +{ + runinit(runner); + runner_add_args(runner, GSYNCD_PREFIX "/gsyncd", "-c", NULL); + runner_argprintf(runner, "%s", conf_path); + runner_add_arg(runner, "--config-set-rx"); +} + +static int +glusterd_check_gsync_present(int *valid_state) +{ + char buff[PATH_MAX] = { + 0, + }; + runner_t runner = { + 0, + }; + char *ptr = NULL; + int ret = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + runinit(&runner); + runner_add_args(&runner, GSYNCD_PREFIX "/gsyncd", "--version", NULL); + runner_redir(&runner, STDOUT_FILENO, RUN_PIPE); + ret = runner_start(&runner); + if (ret == -1) { + if (errno == ENOENT) { + gf_msg("glusterd", GF_LOG_INFO, ENOENT, GD_MSG_MODULE_NOT_INSTALLED, + GEOREP + " module " + "not installed in the system"); + *valid_state = 0; + } else { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_MODULE_ERROR, + GEOREP " module not working as desired"); + *valid_state = -1; + } + goto out; + } + + ptr = fgets(buff, sizeof(buff), runner_chio(&runner, STDOUT_FILENO)); + if (ptr) { + if (!strstr(buff, "gsyncd")) { + ret = -1; + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_MODULE_ERROR, + GEOREP " module not working as desired"); + *valid_state = -1; + goto out; + } + } else { + ret = -1; + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_MODULE_ERROR, + GEOREP " module not working as desired"); + *valid_state = -1; + goto out; + } + + ret = 0; +out: + + runner_end(&runner); + + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; +} + +static int +create_conf_file(glusterd_conf_t *conf, char *conf_path) +#define RUN_GSYNCD_CMD \ + do { \ + ret = runner_run_reuse(&runner); \ + if (ret == -1) { \ + runner_log(&runner, "glusterd", GF_LOG_ERROR, "command failed"); \ + runner_end(&runner); \ + goto out; \ + } \ + runner_end(&runner); \ + } while (0) +{ + int ret = 0; + runner_t runner = { + 0, + }; + char georepdir[PATH_MAX] = { + 0, + }; + int valid_state = 0; + + valid_state = -1; + ret = glusterd_check_gsync_present(&valid_state); + if (-1 == ret) { + ret = valid_state; + goto out; + } + + ret = snprintf(georepdir, sizeof(georepdir) - 1, "%s/" GEOREP, + conf->workdir); + georepdir[ret] = '\0'; + + /************ + * master pre-configuration + ************/ + + /* remote-gsyncd */ + runinit_gsyncd_setrx(&runner, conf_path); + runner_add_args(&runner, "remote-gsyncd", GSYNCD_PREFIX "/gsyncd", ".", ".", + NULL); + RUN_GSYNCD_CMD; + + runinit_gsyncd_setrx(&runner, conf_path); + runner_add_args(&runner, "remote-gsyncd", "/nonexistent/gsyncd", ".", + "^ssh:", NULL); + RUN_GSYNCD_CMD; + + /* gluster-command-dir */ + runinit_gsyncd_setrx(&runner, conf_path); + runner_add_args(&runner, "gluster-command-dir", SBIN_DIR "/", ".", ".", + NULL); + RUN_GSYNCD_CMD; + + /* gluster-params */ + runinit_gsyncd_setrx(&runner, conf_path); + runner_add_args(&runner, "gluster-params", "aux-gfid-mount acl", ".", ".", + NULL); + RUN_GSYNCD_CMD; + + /* ssh-command */ + runinit_gsyncd_setrx(&runner, conf_path); + runner_add_arg(&runner, "ssh-command"); + runner_argprintf(&runner, + "ssh -oPasswordAuthentication=no " + "-oStrictHostKeyChecking=no " + "-i %s/secret.pem", + georepdir); + runner_add_args(&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* ssh-command tar */ + runinit_gsyncd_setrx(&runner, conf_path); + runner_add_arg(&runner, "ssh-command-tar"); + runner_argprintf(&runner, + "ssh -oPasswordAuthentication=no " + "-oStrictHostKeyChecking=no " + "-i %s/tar_ssh.pem", + georepdir); + runner_add_args(&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* pid-file */ + runinit_gsyncd_setrx(&runner, conf_path); + runner_add_arg(&runner, "pid-file"); + runner_argprintf(&runner, + "%s/${mastervol}_${remotehost}_${slavevol}/monitor.pid", + georepdir); + runner_add_args(&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* geo-rep-working-dir */ + runinit_gsyncd_setrx(&runner, conf_path); + runner_add_arg(&runner, "georep-session-working-dir"); + runner_argprintf(&runner, "%s/${mastervol}_${remotehost}_${slavevol}/", + georepdir); + runner_add_args(&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* state-file */ + runinit_gsyncd_setrx(&runner, conf_path); + runner_add_arg(&runner, "state-file"); + runner_argprintf(&runner, + "%s/${mastervol}_${remotehost}_${slavevol}/monitor.status", + georepdir); + runner_add_args(&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* state-detail-file */ + runinit_gsyncd_setrx(&runner, conf_path); + runner_add_arg(&runner, "state-detail-file"); + runner_argprintf( + &runner, + "%s/${mastervol}_${remotehost}_${slavevol}/${eSlave}-detail.status", + georepdir); + runner_add_args(&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* state-socket */ + runinit_gsyncd_setrx(&runner, conf_path); + runner_add_arg(&runner, "state-socket-unencoded"); + runner_argprintf( + &runner, "%s/${mastervol}_${remotehost}_${slavevol}/${eSlave}.socket", + georepdir); + runner_add_args(&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* socketdir */ + runinit_gsyncd_setrx(&runner, conf_path); + runner_add_args(&runner, "socketdir", GLUSTERD_SOCK_DIR, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* log-file */ + runinit_gsyncd_setrx(&runner, conf_path); + runner_add_arg(&runner, "log-file"); + runner_argprintf(&runner, "%s/%s/${mastervol}/${eSlave}.log", conf->logdir, + GEOREP); + runner_add_args(&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* changelog-log-file */ + runinit_gsyncd_setrx(&runner, conf_path); + runner_add_arg(&runner, "changelog-log-file"); + runner_argprintf(&runner, + "%s/%s/${mastervol}/${eSlave}${local_id}-changes.log", + conf->logdir, GEOREP); + runner_add_args(&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* gluster-log-file */ + runinit_gsyncd_setrx(&runner, conf_path); + runner_add_arg(&runner, "gluster-log-file"); + runner_argprintf(&runner, + "%s/%s/${mastervol}/${eSlave}${local_id}.gluster.log", + conf->logdir, GEOREP); + runner_add_args(&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* ignore-deletes */ + runinit_gsyncd_setrx(&runner, conf_path); + runner_add_args(&runner, "ignore-deletes", "false", ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* special-sync-mode */ + runinit_gsyncd_setrx(&runner, conf_path); + runner_add_args(&runner, "special-sync-mode", "partial", ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* change-detector == changelog */ + runinit_gsyncd_setrx(&runner, conf_path); + runner_add_args(&runner, "change-detector", "changelog", ".", ".", NULL); + RUN_GSYNCD_CMD; + + runinit_gsyncd_setrx(&runner, conf_path); + runner_add_arg(&runner, "working-dir"); + runner_argprintf(&runner, "%s/${mastervol}/${eSlave}", + DEFAULT_GLUSTERFSD_MISC_DIRETORY); + runner_add_args(&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /************ + * slave pre-configuration + ************/ + + /* slave-gluster-command-dir */ + runinit_gsyncd_setrx(&runner, conf_path); + runner_add_args(&runner, "slave-gluster-command-dir", SBIN_DIR "/", ".", + NULL); + RUN_GSYNCD_CMD; + + /* gluster-params */ + runinit_gsyncd_setrx(&runner, conf_path); + runner_add_args(&runner, "gluster-params", "aux-gfid-mount acl", ".", NULL); + RUN_GSYNCD_CMD; + + /* log-file */ + runinit_gsyncd_setrx(&runner, conf_path); + runner_add_arg(&runner, "log-file"); + runner_argprintf(&runner, + "%s/%s-slaves/" + "${session_owner}:${local_node}${local_id}.${slavevol}." + "log", + conf->logdir, GEOREP); + runner_add_args(&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* MountBroker log-file */ + runinit_gsyncd_setrx(&runner, conf_path); + runner_add_arg(&runner, "log-file-mbr"); + runner_argprintf(&runner, + "%s/%s-slaves/mbr/" + "${session_owner}:${local_node}${local_id}.${slavevol}." + "log", + conf->logdir, GEOREP); + runner_add_args(&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* gluster-log-file */ + runinit_gsyncd_setrx(&runner, conf_path); + runner_add_arg(&runner, "gluster-log-file"); + runner_argprintf(&runner, + "%s/%s-slaves/" + "${session_owner}:${local_node}${local_id}.${slavevol}." + "gluster.log", + conf->logdir, GEOREP); + runner_add_args(&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + +out: + return ret ? -1 : 0; +} + +static int +glusterd_create_essential_dir_files(glusterd_volinfo_t *volinfo, dict_t *dict, + char *slave, char *slave_host, + char *slave_vol, char **op_errstr) +{ + int ret = -1; + char *conf_path = NULL; + char *statefile = NULL; + char buf[PATH_MAX] = ""; + char errmsg[PATH_MAX] = ""; + glusterd_conf_t *conf = NULL; + struct stat stbuf = { + 0, + }; + xlator_t *this = NULL; + int32_t len = 0; + + this = THIS; + GF_ASSERT(this); + + conf = this->private; + + ret = dict_get_str(dict, "conf_path", &conf_path); + if (ret) { + snprintf(errmsg, sizeof(errmsg), "Unable to fetch conf file path."); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", + errmsg); + goto out; + } + + ret = dict_get_str(dict, "statefile", &statefile); + if (ret) { + snprintf(errmsg, sizeof(errmsg), "Unable to fetch statefile path."); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", + errmsg); + goto out; + } + + ret = snprintf(buf, sizeof(buf), "%s/" GEOREP "/%s_%s_%s", conf->workdir, + volinfo->volname, slave_host, slave_vol); + if ((ret < 0) || (ret >= sizeof(buf))) { + ret = -1; + goto out; + } + ret = mkdir_p(buf, 0755, _gf_true); + if (ret) { + len = snprintf(errmsg, sizeof(errmsg), + "Unable to create %s" + ". Error : %s", + buf, strerror(errno)); + if (len < 0) { + strcpy(errmsg, "<error>"); + } + *op_errstr = gf_strdup(errmsg); + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED, "%s", + errmsg); + goto out; + } + + ret = snprintf(buf, PATH_MAX, "%s/" GEOREP "/%s", conf->logdir, + volinfo->volname); + if ((ret < 0) || (ret >= PATH_MAX)) { + ret = -1; + goto out; + } + ret = mkdir_p(buf, 0755, _gf_true); + if (ret) { + len = snprintf(errmsg, sizeof(errmsg), + "Unable to create %s" + ". Error : %s", + buf, strerror(errno)); + if (len < 0) { + strcpy(errmsg, "<error>"); + } + *op_errstr = gf_strdup(errmsg); + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED, "%s", + errmsg); + goto out; + } + + ret = sys_lstat(conf_path, &stbuf); + if (!ret) { + gf_msg_debug(this->name, 0, + "Session already running." + " Not creating config file again."); + } else { + ret = create_conf_file(conf, conf_path); + if (ret || sys_lstat(conf_path, &stbuf)) { + snprintf(errmsg, sizeof(errmsg), + "Failed to create" + " config file(%s).", + conf_path); + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, "%s", + errmsg); + goto out; + } + } + + ret = sys_lstat(statefile, &stbuf); + if (!ret) { + gf_msg_debug(this->name, 0, + "Session already running." + " Not creating status file again."); + goto out; + } else { + ret = glusterd_create_status_file(volinfo->volname, slave, slave_host, + slave_vol, "Created"); + if (ret || sys_lstat(statefile, &stbuf)) { + snprintf(errmsg, sizeof(errmsg), + "Unable to create %s" + ". Error : %s", + statefile, strerror(errno)); + *op_errstr = gf_strdup(errmsg); + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, "%s", + errmsg); + ret = -1; + goto out; + } + } + +out: + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; +} + +int +glusterd_op_gsync_create(dict_t *dict, char **op_errstr, dict_t *rsp_dict) +{ + char common_pem_file[PATH_MAX] = ""; + char errmsg[PATH_MAX] = { + 0, + }; + char hooks_args[PATH_MAX] = ""; + char uuid_str[64] = ""; + char *host_uuid = NULL; + char *slave_url = NULL; + char *slave_url_buf = NULL; + char *slave_user = NULL; + char *slave_ip = NULL; + char *save_ptr = NULL; + char *slave_host = NULL; + char *slave_vol = NULL; + char *arg_buf = NULL; + char *volname = NULL; + char *slave = NULL; + int32_t ret = -1; + int32_t is_pem_push = -1; + int32_t ssh_port = 22; + gf_boolean_t is_force = -1; + glusterd_conf_t *conf = NULL; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + char old_working_dir[PATH_MAX] = {0}; + char new_working_dir[PATH_MAX] = {0}; + char *slave_voluuid = NULL; + char *old_slavehost = NULL; + gf_boolean_t is_existing_session = _gf_false; + int32_t len = 0; + + this = THIS; + GF_ASSERT(this); + conf = this->private; + GF_ASSERT(conf); + GF_ASSERT(dict); + GF_ASSERT(op_errstr); + + ret = glusterd_op_gsync_args_get(dict, op_errstr, &volname, &slave, + &host_uuid); + if (ret) + goto out; + + len = snprintf(common_pem_file, sizeof(common_pem_file), + "%s" GLUSTERD_COMMON_PEM_PUB_FILE, conf->workdir); + if ((len < 0) || (len >= sizeof(common_pem_file))) { + ret = -1; + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, + "Volinfo for %s (master) not found", volname); + goto out; + } + + ret = dict_get_str(dict, "slave_vol", &slave_vol); + if (ret) { + snprintf(errmsg, sizeof(errmsg), "Unable to fetch slave volume name."); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", + errmsg); + goto out; + } + + ret = dict_get_str(dict, "slave_url", &slave_url); + if (ret) { + snprintf(errmsg, sizeof(errmsg), "Unable to fetch slave IP."); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", + errmsg); + ret = -1; + goto out; + } + + /* Fetch the slave_user and slave_ip from the slave_url. + * If the slave_user is not present. Use "root" + */ + if (strstr(slave_url, "@")) { + slave_url_buf = gf_strdup(slave_url); + if (!slave_url_buf) { + ret = -1; + goto out; + } + slave_user = strtok_r(slave_url, "@", &save_ptr); + slave_ip = strtok_r(NULL, "@", &save_ptr); + } else { + slave_user = "root"; + slave_ip = slave_url; + } + + if (!slave_user || !slave_ip) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVE_URL_INVALID, + "Invalid slave url."); + ret = -1; + goto out; + } + + ret = dict_get_str(dict, "slave_host", &slave_host); + if (ret) { + snprintf(errmsg, sizeof(errmsg), "Unable to fetch slave host"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", + errmsg); + ret = -1; + goto out; + } + + ret = dict_get_int32(dict, "ssh_port", &ssh_port); + if (ret < 0 && ret != -ENOENT) { + snprintf(errmsg, sizeof(errmsg), "Fetching ssh_port failed"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", + errmsg); + ret = -1; + goto out; + } + + is_force = dict_get_str_boolean(dict, "force", _gf_false); + + uuid_utoa_r(MY_UUID, uuid_str); + if (!strcmp(uuid_str, host_uuid)) { + ret = dict_get_int32(dict, "push_pem", &is_pem_push); + if (!ret && is_pem_push) { + gf_msg_debug(this->name, 0, + "Trying to setup" + " pem files in slave"); + is_pem_push = 1; + } else + is_pem_push = 0; + + len = snprintf(hooks_args, sizeof(hooks_args), + "is_push_pem=%d,pub_file=%s,slave_user=%s," + "slave_ip=%s,slave_vol=%s,ssh_port=%d", + is_pem_push, common_pem_file, slave_user, slave_ip, + slave_vol, ssh_port); + if ((len < 0) || (len >= sizeof(hooks_args))) { + ret = -1; + goto out; + } + } else + snprintf(hooks_args, sizeof(hooks_args), + "This argument will stop the hooks script"); + + arg_buf = gf_strdup(hooks_args); + if (!arg_buf) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STRDUP_FAILED, + "Failed to gf_strdup"); + if (is_force) { + ret = 0; + goto create_essentials; + } + ret = -1; + goto out; + } + + ret = dict_set_str(dict, "hooks_args", arg_buf); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set hooks_args in dict."); + if (is_force) { + ret = 0; + goto create_essentials; + } + goto out; + } + +create_essentials: + /* Fetch slave volume uuid, to get stored in volume info. */ + ret = dict_get_str(dict, "slave_voluuid", &slave_voluuid); + if (ret) { + snprintf(errmsg, sizeof(errmsg), + "Unable to fetch slave volume uuid from dict"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", + errmsg); + ret = -1; + goto out; + } + + is_existing_session = dict_get_str_boolean(dict, "existing_session", + _gf_false); + if (is_existing_session) { + ret = dict_get_str(dict, "old_slavehost", &old_slavehost); + if (ret) { + snprintf(errmsg, sizeof(errmsg), "Unable to fetch old_slavehost"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", + errmsg); + ret = -1; + goto out; + } + + /* Rename existing geo-rep session with new Slave Host */ + ret = snprintf(old_working_dir, sizeof(old_working_dir) - 1, + "%s/" GEOREP "/%s_%s_%s", conf->workdir, + volinfo->volname, old_slavehost, slave_vol); + + ret = snprintf(new_working_dir, sizeof(new_working_dir) - 1, + "%s/" GEOREP "/%s_%s_%s", conf->workdir, + volinfo->volname, slave_host, slave_vol); + + ret = sys_rename(old_working_dir, new_working_dir); + if (!ret) { + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_FORCE_CREATE_SESSION, + "rename of old working dir %s to " + "new working dir %s is done! ", + old_working_dir, new_working_dir); + } else { + if (errno == ENOENT) { + /* log error, but proceed with directory + * creation below */ + gf_msg_debug(this->name, 0, + "old_working_dir(%s) " + "not present.", + old_working_dir); + } else { + len = snprintf(errmsg, sizeof(errmsg), + "rename of old working dir %s " + "to new working dir %s " + "failed! Error: %s", + old_working_dir, new_working_dir, + strerror(errno)); + if (len < 0) { + strcpy(errmsg, "<error>"); + } + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_FORCE_CREATE_SESSION, + "rename of old working dir %s to " + "new working dir %s failed! Error: %s!", + old_working_dir, new_working_dir, strerror(errno)); + + ret = -1; + goto out; + } + } + } + + ret = glusterd_create_essential_dir_files(volinfo, dict, slave, slave_host, + slave_vol, op_errstr); + if (ret) + goto out; + + ret = glusterd_store_slave_in_info(volinfo, slave, host_uuid, slave_voluuid, + op_errstr, is_force); + if (ret) { + snprintf(errmsg, sizeof(errmsg), + "Unable to store" + " slave info."); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVEINFO_STORE_ERROR, "%s", + errmsg); + goto out; + } + + /* Enable marker and changelog */ + ret = glusterd_set_gsync_confs(volinfo); + if (ret != 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_MARKER_START_FAIL, + "marker/changelog" + " start failed"); + snprintf(errmsg, sizeof(errmsg), "Index initialization failed"); + + ret = -1; + goto out; + } + +out: + if (ret && errmsg[0] != '\0') { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GSYNCD_ERROR, "%s", errmsg); + *op_errstr = gf_strdup(errmsg); + } + + GF_FREE(slave_url_buf); + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-geo-rep.h b/xlators/mgmt/glusterd/src/glusterd-geo-rep.h new file mode 100644 index 00000000000..7d1318f522c --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-geo-rep.h @@ -0,0 +1,52 @@ +/* + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _GLUSTERD_GEO_REP_H_ +#define _GLUSTERD_GEO_REP_H_ + +#ifndef GSYNC_CONF_TEMPLATE +#define GSYNC_CONF_TEMPLATE GEOREP "/gsyncd_template.conf" +#endif + +/* <slave host>::<slave volume> */ +#define SLAVE_URL_INFO_MAX (_POSIX_HOST_NAME_MAX + GD_VOLUME_NAME_MAX + 3) + +/* slave info format: + * <master host uuid>:ssh://{<slave_user>@}<slave host>::<slave volume> \ + * :<slave volume uuid> */ +#define VOLINFO_SLAVE_URL_MAX \ + (LOGIN_NAME_MAX + (2 * GF_UUID_BUF_SIZE) + SLAVE_URL_INFO_MAX + 10) + +typedef struct glusterd_gsync_status_temp { + dict_t *rsp_dict; + glusterd_volinfo_t *volinfo; + char *node; +} glusterd_gsync_status_temp_t; + +typedef struct gsync_status_param { + glusterd_volinfo_t *volinfo; + int is_active; +} gsync_status_param_t; + +int +gsync_status(char *master, char *slave, char *conf_path, int *status, + gf_boolean_t *is_template_in_use); + +void +glusterd_check_geo_rep_configured(glusterd_volinfo_t *volinfo, + gf_boolean_t *flag); +int +_get_slave_status(dict_t *dict, char *key, data_t *value, void *data); +int +glusterd_check_geo_rep_running(gsync_status_param_t *param, char **op_errstr); + +int +glusterd_get_gsync_status_mst(glusterd_volinfo_t *volinfo, dict_t *rsp_dict, + char *node); +#endif diff --git a/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc-helper.c new file mode 100644 index 00000000000..319bfa140f3 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc-helper.c @@ -0,0 +1,235 @@ +/* + Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include "glusterd.h" +#include "glusterd-utils.h" +#include "glusterd-gfproxyd-svc-helper.h" +#include "glusterd-messages.h" +#include <glusterfs/syscall.h> +#include "glusterd-volgen.h" + +void +glusterd_svc_build_gfproxyd_rundir(glusterd_volinfo_t *volinfo, char *path, + int path_len) +{ + char workdir[PATH_MAX] = { + 0, + }; + glusterd_conf_t *priv = THIS->private; + + GLUSTERD_GET_VOLUME_PID_DIR(workdir, volinfo, priv); + + snprintf(path, path_len, "%s", workdir); +} + +void +glusterd_svc_build_gfproxyd_socket_filepath(glusterd_volinfo_t *volinfo, + char *path, int path_len) +{ + char sockfilepath[PATH_MAX] = { + 0, + }; + char rundir[PATH_MAX] = { + 0, + }; + int32_t len = 0; + + glusterd_svc_build_gfproxyd_rundir(volinfo, rundir, sizeof(rundir)); + len = snprintf(sockfilepath, sizeof(sockfilepath), "%s/run-%s", rundir, + uuid_utoa(MY_UUID)); + if ((len < 0) || (len >= sizeof(sockfilepath))) { + sockfilepath[0] = 0; + } + + glusterd_set_socket_filepath(sockfilepath, path, path_len); +} + +void +glusterd_svc_build_gfproxyd_pidfile(glusterd_volinfo_t *volinfo, char *path, + int path_len) +{ + char rundir[PATH_MAX] = { + 0, + }; + + glusterd_svc_build_gfproxyd_rundir(volinfo, rundir, sizeof(rundir)); + + snprintf(path, path_len, "%s/%s.gfproxyd.pid", rundir, volinfo->volname); +} + +void +glusterd_svc_build_gfproxyd_volfile_path(glusterd_volinfo_t *volinfo, + char *path, int path_len) +{ + char workdir[PATH_MAX] = { + 0, + }; + glusterd_conf_t *priv = THIS->private; + + GLUSTERD_GET_VOLUME_DIR(workdir, volinfo, priv); + + snprintf(path, path_len, "%s/%s.gfproxyd.vol", workdir, volinfo->volname); +} + +void +glusterd_svc_build_gfproxyd_logdir(char *logdir, char *volname, size_t len) +{ + glusterd_conf_t *conf = THIS->private; + snprintf(logdir, len, "%s/gfproxy/%s", conf->logdir, volname); +} + +void +glusterd_svc_build_gfproxyd_logfile(char *logfile, char *logdir, size_t len) +{ + snprintf(logfile, len, "%s/gfproxyd.log", logdir); +} + +int +glusterd_is_gfproxyd_enabled(glusterd_volinfo_t *volinfo) +{ + return glusterd_volinfo_get_boolean(volinfo, VKEY_CONFIG_GFPROXY); +} + +static int +glusterd_svc_get_gfproxyd_volfile(glusterd_volinfo_t *volinfo, char *svc_name, + char *orgvol, char **tmpvol, int path_len) +{ + int tmp_fd = -1; + int ret = -1; + int need_unlink = 0; + + glusterd_svc_build_gfproxyd_volfile_path(volinfo, orgvol, path_len); + + ret = gf_asprintf(tmpvol, "/tmp/g%s-XXXXXX", svc_name); + if (ret < 0) { + goto out; + } + + /* coverity[SECURE_TEMP] mkstemp uses 0600 as the mode and is safe */ + tmp_fd = mkstemp(*tmpvol); + if (tmp_fd < 0) { + gf_msg("glusterd", GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED, + "Unable to create temp file" + " %s:(%s)", + *tmpvol, strerror(errno)); + ret = -1; + goto out; + } + + need_unlink = 1; + ret = glusterd_build_gfproxyd_volfile(volinfo, *tmpvol); +out: + if (need_unlink && ret < 0) + sys_unlink(*tmpvol); + + if ((ret < 0) && (*tmpvol != NULL)) { + GF_FREE(*tmpvol); + *tmpvol = NULL; + } + + if (tmp_fd >= 0) + sys_close(tmp_fd); + + return ret; +} + +int +glusterd_svc_check_gfproxyd_volfile_identical(char *svc_name, + glusterd_volinfo_t *volinfo, + gf_boolean_t *identical) +{ + char orgvol[PATH_MAX] = { + 0, + }; + char *tmpvol = NULL; + int ret = -1; + int need_unlink = 0; + + GF_VALIDATE_OR_GOTO("glusterd", identical, out); + + ret = glusterd_svc_get_gfproxyd_volfile(volinfo, svc_name, orgvol, &tmpvol, + PATH_MAX); + if (ret) + goto out; + + need_unlink = 1; + ret = glusterd_check_files_identical(orgvol, tmpvol, identical); + if (ret) + goto out; + +out: + if (need_unlink) + sys_unlink(tmpvol); + + if (tmpvol != NULL) + GF_FREE(tmpvol); + + return ret; +} + +int +glusterd_svc_check_gfproxyd_topology_identical(char *svc_name, + glusterd_volinfo_t *volinfo, + gf_boolean_t *identical) +{ + char orgvol[PATH_MAX] = { + 0, + }; + char *tmpvol = NULL; + int ret = -1; + int tmpclean = 0; + + GF_VALIDATE_OR_GOTO("glusterd", identical, out); + + ret = glusterd_svc_get_gfproxyd_volfile(volinfo, svc_name, orgvol, &tmpvol, + PATH_MAX); + if (ret) + goto out; + + tmpclean = 1; /* SET the flag to unlink() tmpfile */ + + /* Compare the topology of volfiles */ + ret = glusterd_check_topology_identical(orgvol, tmpvol, identical); +out: + if (tmpclean) + sys_unlink(tmpvol); + + if (tmpvol != NULL) + GF_FREE(tmpvol); + + return ret; +} + +glusterd_volinfo_t * +glusterd_gfproxyd_volinfo_from_svc(glusterd_svc_t *svc) +{ + glusterd_volinfo_t *volinfo = NULL; + glusterd_gfproxydsvc_t *gfproxyd = NULL; + + /* Get volinfo->gfproxyd from svc object */ + gfproxyd = cds_list_entry(svc, glusterd_gfproxydsvc_t, svc); + if (!gfproxyd) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_SNAPD_OBJ_GET_FAIL, + "Failed to get gfproxyd " + "object from gfproxyd service"); + goto out; + } + + /* Get volinfo from gfproxyd */ + volinfo = cds_list_entry(gfproxyd, glusterd_volinfo_t, gfproxyd); + if (!volinfo) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, + "Failed to get volinfo from " + "from gfproxyd"); + goto out; + } +out: + return volinfo; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc-helper.h b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc-helper.h new file mode 100644 index 00000000000..3aca218a65d --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc-helper.h @@ -0,0 +1,51 @@ +/* + Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _GLUSTERD_GFPROXYD_SVC_HELPER_H_ +#define _GLUSTERD_GFPROXYD_SVC_HELPER_H_ + +#include "glusterd.h" + +void +glusterd_svc_build_gfproxyd_rundir(glusterd_volinfo_t *volinfo, char *path, + int path_len); + +void +glusterd_svc_build_gfproxyd_socket_filepath(glusterd_volinfo_t *volinfo, + char *path, int path_len); + +void +glusterd_svc_build_gfproxyd_pidfile(glusterd_volinfo_t *volinfo, char *path, + int path_len); + +void +glusterd_svc_build_gfproxyd_volfile_path(glusterd_volinfo_t *volinfo, + char *path, int path_len); + +void +glusterd_svc_build_gfproxyd_logdir(char *logdir, char *volname, size_t len); + +void +glusterd_svc_build_gfproxyd_logfile(char *logfile, char *logdir, size_t len); + +int +glusterd_svc_check_gfproxyd_volfile_identical(char *svc_name, + glusterd_volinfo_t *volinfo, + gf_boolean_t *identical); +int +glusterd_svc_check_gfproxyd_topology_identical(char *svc_name, + glusterd_volinfo_t *volinfo, + gf_boolean_t *identical); +int +glusterd_is_gfproxyd_enabled(glusterd_volinfo_t *volinfo); + +glusterd_volinfo_t * +glusterd_gfproxyd_volinfo_from_svc(glusterd_svc_t *svc); +#endif diff --git a/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c new file mode 100644 index 00000000000..a0bfea41f0f --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c @@ -0,0 +1,478 @@ +/* + Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include <glusterfs/globals.h> +#include <glusterfs/run.h> +#include "glusterd.h" +#include "glusterd-utils.h" +#include "glusterd-volgen.h" +#include "glusterd-gfproxyd-svc.h" +#include "glusterd-messages.h" +#include "glusterd-svc-helper.h" +#include "glusterd-svc-mgmt.h" +#include "glusterd-gfproxyd-svc-helper.h" +#include <glusterfs/syscall.h> + +void +glusterd_gfproxydsvc_build(glusterd_svc_t *svc) +{ + svc->manager = glusterd_gfproxydsvc_manager; + svc->start = glusterd_gfproxydsvc_start; + svc->stop = glusterd_gfproxydsvc_stop; + svc->reconfigure = glusterd_gfproxydsvc_reconfigure; +} + +int +glusterd_gfproxydsvc_stop(glusterd_svc_t *svc, int sig) +{ + glusterd_volinfo_t *volinfo = NULL; + int ret = 0; + + ret = glusterd_svc_stop(svc, sig); + if (ret) + goto out; + + volinfo = glusterd_gfproxyd_volinfo_from_svc(svc); + volinfo->gfproxyd.port = 0; + +out: + return ret; +} + +int +glusterd_gfproxydsvc_init(glusterd_volinfo_t *volinfo) +{ + int ret = -1; + char rundir[PATH_MAX] = { + 0, + }; + char sockpath[PATH_MAX] = { + 0, + }; + char pidfile[PATH_MAX] = { + 0, + }; + char volfile[PATH_MAX] = { + 0, + }; + char logdir[PATH_MAX] = { + 0, + }; + char logfile[PATH_MAX] = { + 0, + }; + char volfileid[256] = {0}; + glusterd_svc_t *svc = NULL; + glusterd_conf_t *priv = NULL; + glusterd_conn_notify_t notify = NULL; + xlator_t *this = NULL; + char *volfileserver = NULL; + int32_t len = 0; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, out); + + svc = &(volinfo->gfproxyd.svc); + + ret = snprintf(svc->name, sizeof(svc->name), "%s", gfproxyd_svc_name); + if (ret < 0) + goto out; + + notify = glusterd_svc_common_rpc_notify; + + glusterd_svc_build_gfproxyd_rundir(volinfo, rundir, sizeof(rundir)); + glusterd_svc_create_rundir(rundir); + + /* Initialize the connection mgmt */ + glusterd_svc_build_gfproxyd_socket_filepath(volinfo, sockpath, + sizeof(sockpath)); + ret = glusterd_conn_init(&(svc->conn), sockpath, 600, notify); + if (ret) + goto out; + + /* Initialize the process mgmt */ + glusterd_svc_build_gfproxyd_pidfile(volinfo, pidfile, sizeof(pidfile)); + glusterd_svc_build_gfproxyd_volfile_path(volinfo, volfile, sizeof(volfile)); + glusterd_svc_build_gfproxyd_logdir(logdir, volinfo->volname, + sizeof(logdir)); + ret = mkdir_p(logdir, 0755, _gf_true); + if ((ret == -1) && (EEXIST != errno)) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_CREATE_DIR_FAILED, + "Unable to create logdir %s", logdir); + goto out; + } + glusterd_svc_build_gfproxyd_logfile(logfile, logdir, sizeof(logfile)); + len = snprintf(volfileid, sizeof(volfileid), "gfproxyd/%s", + volinfo->volname); + if ((len < 0) || (len >= sizeof(volfileid))) { + ret = -1; + goto out; + } + + if (dict_get_strn(this->options, "transport.socket.bind-address", + SLEN("transport.socket.bind-address"), + &volfileserver) != 0) { + volfileserver = "localhost"; + } + ret = glusterd_proc_init(&(svc->proc), gfproxyd_svc_name, pidfile, logdir, + logfile, volfile, volfileid, volfileserver); + if (ret) + goto out; + +out: + gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret); + return ret; +} + +static int +glusterd_gfproxydsvc_create_volfile(glusterd_volinfo_t *volinfo) +{ + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + + ret = glusterd_generate_gfproxyd_volfile(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL, + "Failed to create volfile"); + goto out; + } + +out: + gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret); + + return ret; +} + +int +glusterd_gfproxydsvc_manager(glusterd_svc_t *svc, void *data, int flags) +{ + int ret = -1; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + + volinfo = data; + GF_VALIDATE_OR_GOTO(this->name, data, out); + + if (!svc->inited) { + ret = glusterd_gfproxydsvc_init(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FAILED_INIT_QUOTASVC, + "Failed to init " + "gfproxyd service"); + goto out; + } else { + svc->inited = _gf_true; + gf_msg_debug(this->name, 0, + "gfproxyd service " + "initialized"); + } + } + + ret = glusterd_is_gfproxyd_enabled(volinfo); + if (ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, + "Failed to read volume " + "options"); + goto out; + } + + if (ret) { + if (!glusterd_is_volume_started(volinfo)) { + if (glusterd_proc_is_running(&svc->proc)) { + ret = svc->stop(svc, SIGTERM); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPD_STOP_FAIL, + "Couldn't stop gfproxyd for " + "volume: %s", + volinfo->volname); + } else { + /* Since gfproxyd is not running set ret to 0 */ + ret = 0; + } + goto out; + } + + ret = glusterd_gfproxydsvc_create_volfile(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPD_CREATE_FAIL, + "Couldn't create " + "gfroxyd volfile for volume: %s", + volinfo->volname); + goto out; + } + ret = svc->stop(svc, SIGTERM); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPD_START_FAIL, + "Couldn't stop " + "gfproxyd for volume: %s", + volinfo->volname); + goto out; + } + + ret = svc->start(svc, flags); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPD_START_FAIL, + "Couldn't start " + "gfproxyd for volume: %s", + volinfo->volname); + goto out; + } + + glusterd_volinfo_ref(volinfo); + ret = glusterd_conn_connect(&(svc->conn)); + if (ret) { + glusterd_volinfo_unref(volinfo); + volinfo = NULL; + goto out; + } + + } else if (glusterd_proc_is_running(&svc->proc)) { + ret = svc->stop(svc, SIGTERM); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPD_STOP_FAIL, + "Couldn't stop gfproxyd for volume: %s", volinfo->volname); + goto out; + } + } + +out: + if (ret) { + if (volinfo) { + gf_event(EVENT_SVC_MANAGER_FAILED, "volume=%s;svc_name=%s", + volinfo->volname, svc->name); + } + } + + gf_msg_debug("glusterd", 0, "Returning %d", ret); + + return ret; +} + +int +glusterd_gfproxydsvc_start(glusterd_svc_t *svc, int flags) +{ + int ret = -1; + runner_t runner = { + 0, + }; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + char valgrind_logfile[PATH_MAX] = {0}; + int gfproxyd_port = 0; + char msg[1024] = { + 0, + }; + char gfproxyd_id[PATH_MAX] = { + 0, + }; + glusterd_volinfo_t *volinfo = NULL; + char *localtime_logging = NULL; + int32_t len = 0; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, out); + + volinfo = glusterd_gfproxyd_volinfo_from_svc(svc); + if (!volinfo) + goto out; + + ret = sys_access(svc->proc.volfile, F_OK); + if (ret) { + gf_msg(this->name, GF_LOG_DEBUG, 0, GD_MSG_VOLINFO_GET_FAIL, + "gfproxyd Volfile %s is not present", svc->proc.volfile); + ret = glusterd_gfproxydsvc_create_volfile(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL, + "Couldn't create " + "gfproxyd volfile for volume: %s", + volinfo->volname); + goto out; + } + } + runinit(&runner); + + if (this->ctx->cmd_args.vgtool != _gf_none) { + len = snprintf(valgrind_logfile, PATH_MAX, "%s/valgrind-%s", + svc->proc.logdir, svc->proc.logfile); + if ((len < 0) || (len >= PATH_MAX)) { + ret = -1; + goto out; + } + + if (this->ctx->cmd_args.vgtool == _gf_memcheck) + runner_add_args(&runner, "valgrind", "--leak-check=full", + "--trace-children=yes", "--track-origins=yes", + NULL); + else + runner_add_args(&runner, "valgrind", "--tool=drd", NULL); + + runner_argprintf(&runner, "--log-file=%s", valgrind_logfile); + } + + snprintf(gfproxyd_id, sizeof(gfproxyd_id), "gfproxyd-%s", volinfo->volname); + runner_add_args(&runner, SBIN_DIR "/glusterfsd", "-s", + svc->proc.volfileserver, "--volfile-id", + svc->proc.volfileid, "-p", svc->proc.pidfile, "-l", + svc->proc.logfile, "--brick-name", gfproxyd_id, "-S", + svc->conn.sockpath, NULL); + + if (volinfo->memory_accounting) + runner_add_arg(&runner, "--mem-accounting"); + if (dict_get_strn(priv->opts, GLUSTERD_LOCALTIME_LOGGING_KEY, + SLEN(GLUSTERD_LOCALTIME_LOGGING_KEY), + &localtime_logging) == 0) { + if (strcmp(localtime_logging, "enable") == 0) + runner_add_arg(&runner, "--localtime-logging"); + } + + gfproxyd_port = pmap_assign_port(this, volinfo->gfproxyd.port, gfproxyd_id); + volinfo->gfproxyd.port = gfproxyd_port; + + runner_add_arg(&runner, "--brick-port"); + runner_argprintf(&runner, "%d", gfproxyd_port); + runner_add_arg(&runner, "--xlator-option"); + runner_argprintf(&runner, "%s-server.listen-port=%d", volinfo->volname, + gfproxyd_port); + + snprintf(msg, sizeof(msg), "Starting the gfproxyd service for volume %s", + volinfo->volname); + runner_log(&runner, this->name, GF_LOG_DEBUG, msg); + + if (flags == PROC_START_NO_WAIT) { + ret = runner_run_nowait(&runner); + } else { + synclock_unlock(&priv->big_lock); + { + ret = runner_run(&runner); + } + synclock_lock(&priv->big_lock); + } + +out: + return ret; +} + +int +glusterd_gfproxydsvc_restart() +{ + glusterd_volinfo_t *volinfo = NULL; + glusterd_volinfo_t *tmp = NULL; + int ret = -1; + xlator_t *this = THIS; + glusterd_conf_t *conf = NULL; + glusterd_svc_t *svc = NULL; + + GF_VALIDATE_OR_GOTO("glusterd", this, out); + + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, out); + + cds_list_for_each_entry_safe(volinfo, tmp, &conf->volumes, vol_list) + { + /* Start per volume gfproxyd svc */ + if (volinfo->status == GLUSTERD_STATUS_STARTED) { + svc = &(volinfo->gfproxyd.svc); + ret = svc->manager(svc, volinfo, PROC_START_NO_WAIT); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPD_START_FAIL, + "Couldn't resolve gfproxyd for " + "vol: %s on restart", + volinfo->volname); + gf_event(EVENT_SVC_MANAGER_FAILED, "volume=%s;svc_name=%s", + volinfo->volname, svc->name); + goto out; + } + } + } +out: + return ret; +} + +int +glusterd_gfproxydsvc_reconfigure(void *data) +{ + int ret = -1; + xlator_t *this = NULL; + gf_boolean_t identical = _gf_false; + glusterd_volinfo_t *volinfo = NULL; + + volinfo = data; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + + if (!volinfo->gfproxyd.svc.inited) + goto manager; + + if (!glusterd_is_gfproxyd_enabled(volinfo)) + goto manager; + else if (!glusterd_proc_is_running(&volinfo->gfproxyd.svc.proc)) + goto manager; + + /* + * Check both OLD and NEW volfiles, if they are SAME by size + * and cksum i.e. "character-by-character". If YES, then + * NOTHING has been changed, just return. + */ + ret = glusterd_svc_check_gfproxyd_volfile_identical( + volinfo->gfproxyd.svc.name, volinfo, &identical); + if (ret) + goto out; + + if (identical) { + ret = 0; + goto out; + } + + /* + * They are not identical. Find out if the topology is changed + * OR just the volume options. If just the options which got + * changed, then inform the xlator to reconfigure the options. + */ + identical = _gf_false; /* RESET the FLAG */ + ret = glusterd_svc_check_gfproxyd_topology_identical( + volinfo->gfproxyd.svc.name, volinfo, &identical); + if (ret) + goto out; + + /* Topology is not changed, but just the options. But write the + * options to gfproxyd volfile, so that gfproxyd will be reconfigured. + */ + if (identical) { + ret = glusterd_gfproxydsvc_create_volfile(volinfo); + if (ret == 0) { /* Only if above PASSES */ + ret = glusterd_fetchspec_notify(this); + } + goto out; + } +manager: + /* + * gfproxyd volfile's topology has been changed. gfproxyd server needs + * to be RESTARTED to ACT on the changed volfile. + */ + ret = volinfo->gfproxyd.svc.manager(&(volinfo->gfproxyd.svc), volinfo, + PROC_START_NO_WAIT); + +out: + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.h b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.h new file mode 100644 index 00000000000..d396b4015f3 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.h @@ -0,0 +1,47 @@ +/* + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _GLUSTERD_GFPROXYD_SVC_H_ +#define _GLUSTERD_GFPROXYD_SVC_H_ + +#include "glusterd-svc-mgmt.h" + +#define gfproxyd_svc_name "gfproxyd" + +struct glusterd_gfproxydsvc_ { + glusterd_svc_t svc; + gf_store_handle_t *handle; + int port; +}; + +typedef struct glusterd_gfproxydsvc_ glusterd_gfproxydsvc_t; + +void +glusterd_gfproxydsvc_build(glusterd_svc_t *svc); + +int +glusterd_gfproxydsvc_manager(glusterd_svc_t *svc, void *data, int flags); + +int +glusterd_gfproxydsvc_start(glusterd_svc_t *svc, int flags); + +int +glusterd_gfproxydsvc_stop(glusterd_svc_t *svc, int sig); + +int +glusterd_gfproxydsvc_reconfigure(); + +void +glusterd_gfproxydsvc_build_volfile_path(char *server, char *workdir, + char *volfile, size_t len); + +int +glusterd_gfproxydsvc_restart(); +#endif diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c index 5351d4a9d66..1b21c40596d 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-handler.c @@ -1,3811 +1,6713 @@ /* - Copyright (c) 2006-2010 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ #include <inttypes.h> - -#include "globals.h" -#include "glusterfs.h" -#include "compat.h" -#include "dict.h" +#include <glusterfs/glusterfs.h> +#include <glusterfs/compat.h> +#include <glusterfs/dict.h> #include "protocol-common.h" -#include "xlator.h" -#include "logging.h" -#include "timer.h" -#include "defaults.h" -#include "compat.h" -#include "compat-errno.h" -#include "statedump.h" +#include <glusterfs/xlator.h> +#include <glusterfs/logging.h> +#include <glusterfs/syscall.h> +#include <glusterfs/timer.h> +#include <glusterfs/defaults.h> +#include <glusterfs/compat.h> +#include <glusterfs/compat-errno.h> +#include <glusterfs/statedump.h> +#include <glusterfs/run.h> #include "glusterd-mem-types.h" #include "glusterd.h" #include "glusterd-sm.h" #include "glusterd-op-sm.h" #include "glusterd-utils.h" +#include "glusterd-mgmt.h" +#include "glusterd-server-quorum.h" #include "glusterd-store.h" +#include "glusterd-locks.h" +#include "glusterd-snapshot-utils.h" +#include "glusterd-geo-rep.h" -#include "glusterd1.h" -#include "cli1.h" -#include "rpc-clnt.h" #include "glusterd1-xdr.h" +#include "cli1-xdr.h" +#include "xdr-generic.h" +#include "rpc-clnt.h" #include "glusterd-volgen.h" +#include "glusterd-mountbroker.h" +#include "glusterd-messages.h" +#include "glusterd-errno.h" #include <sys/resource.h> #include <inttypes.h> -#include "defaults.c" -#include "common-utils.h" - -static int -glusterd_handle_friend_req (rpcsvc_request_t *req, uuid_t uuid, - char *hostname, int port, - gd1_mgmt_friend_req *friend_req) -{ - int ret = -1; - glusterd_peerinfo_t *peerinfo = NULL; - glusterd_friend_sm_event_t *event = NULL; - glusterd_friend_req_ctx_t *ctx = NULL; - char rhost[UNIX_PATH_MAX + 1] = {0}; - uuid_t friend_uuid = {0}; - dict_t *dict = NULL; - - uuid_parse (uuid_utoa (uuid), friend_uuid); - if (!port) - port = GF_DEFAULT_BASE_PORT; - - ret = glusterd_remote_hostname_get (req, rhost, sizeof (rhost)); - ret = glusterd_friend_find (uuid, rhost, &peerinfo); - - if (ret) { - ret = glusterd_xfer_friend_add_resp (req, rhost, port, -1, - GF_PROBE_UNKNOWN_PEER); - if (friend_req->vols.vols_val) - free (friend_req->vols.vols_val); - goto out; - } - - ret = glusterd_friend_sm_new_event - (GD_FRIEND_EVENT_RCVD_FRIEND_REQ, &event); - - if (ret) { - gf_log ("", GF_LOG_ERROR, "event generation failed: %d", ret); - return ret; - } - - event->peerinfo = peerinfo; - - ctx = GF_CALLOC (1, sizeof (*ctx), gf_gld_mt_friend_req_ctx_t); +#include <glusterfs/common-utils.h> - if (!ctx) { - gf_log ("", GF_LOG_ERROR, "Unable to allocate memory"); - ret = -1; - goto out; - } +#include "glusterd-syncop.h" +#include "glusterd-messages.h" - uuid_copy (ctx->uuid, uuid); - if (hostname) - ctx->hostname = gf_strdup (hostname); - ctx->req = req; +extern glusterd_op_info_t opinfo; +static int volcount; - dict = dict_new (); - if (!dict) { - ret = -1; - goto out; - } +int +glusterd_big_locked_notify(struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data, + rpc_clnt_notify_t notify_fn) +{ + glusterd_conf_t *priv = THIS->private; + int ret = -1; - ret = dict_unserialize (friend_req->vols.vols_val, - friend_req->vols.vols_len, - &dict); + synclock_lock(&priv->big_lock); + ret = notify_fn(rpc, mydata, event, data); + synclock_unlock(&priv->big_lock); - if (ret) - goto out; - else - dict->extra_stdfree = friend_req->vols.vols_val; - - ctx->vols = dict; - event->ctx = ctx; + return ret; +} - ret = glusterd_friend_sm_inject_event (event); - if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, "Unable to inject event %d, " - "ret = %d", event->event, ret); - goto out; - } +int +glusterd_big_locked_handler(rpcsvc_request_t *req, rpcsvc_actor actor_fn) +{ + glusterd_conf_t *priv = THIS->private; + int ret = -1; - ret = 0; + synclock_lock(&priv->big_lock); + ret = actor_fn(req); + synclock_unlock(&priv->big_lock); -out: - if (0 != ret) { - if (ctx && ctx->hostname) - GF_FREE (ctx->hostname); - if (ctx) - GF_FREE (ctx); - if (dict) { - if ((!dict->extra_stdfree) && - friend_req->vols.vols_val) - free (friend_req->vols.vols_val); - dict_unref (dict); - } else { - if (friend_req->vols.vols_val) - free (friend_req->vols.vols_val); - } - if (event) - GF_FREE (event); - } else { - if (peerinfo && (0 == peerinfo->connected)) - ret = GLUSTERD_CONNECTION_AWAITED; - } - return ret; + return ret; } static int -glusterd_handle_unfriend_req (rpcsvc_request_t *req, uuid_t uuid, - char *hostname, int port) +glusterd_handle_friend_req(rpcsvc_request_t *req, uuid_t uuid, char *hostname, + int port, gd1_mgmt_friend_req *friend_req) { - int ret = -1; - glusterd_peerinfo_t *peerinfo = NULL; - glusterd_friend_sm_event_t *event = NULL; - glusterd_friend_req_ctx_t *ctx = NULL; + int ret = -1; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_friend_sm_event_t *event = NULL; + glusterd_friend_req_ctx_t *ctx = NULL; + char rhost[UNIX_PATH_MAX + 1] = {0}; + dict_t *dict = NULL; - if (!port) - port = GF_DEFAULT_BASE_PORT; + if (!port) + port = GF_DEFAULT_BASE_PORT; - ret = glusterd_friend_find (uuid, hostname, &peerinfo); + ret = glusterd_remote_hostname_get(req, rhost, sizeof(rhost)); - if (ret) { - gf_log ("glusterd", GF_LOG_CRITICAL, - "Received remove-friend from unknown peer %s", - hostname); - ret = glusterd_xfer_friend_remove_resp (req, hostname, - port); - goto out; - } - - ret = glusterd_friend_sm_new_event - (GD_FRIEND_EVENT_RCVD_REMOVE_FRIEND, &event); - - if (ret) { - gf_log ("", GF_LOG_ERROR, "event generation failed: %d", ret); - return ret; - } + ctx = GF_CALLOC(1, sizeof(*ctx), gf_gld_mt_friend_req_ctx_t); + dict = dict_new(); - event->peerinfo = peerinfo; + RCU_READ_LOCK; - ctx = GF_CALLOC (1, sizeof (*ctx), gf_gld_mt_friend_req_ctx_t); + peerinfo = glusterd_peerinfo_find(uuid, rhost); - if (!ctx) { - gf_log ("", GF_LOG_ERROR, "Unable to allocate memory"); - ret = -1; - goto out; + if (peerinfo == NULL) { + gf_event(EVENT_PEER_REJECT, "peer=%s", hostname); + ret = glusterd_xfer_friend_add_resp(req, hostname, rhost, port, -1, + GF_PROBE_UNKNOWN_PEER); + if (friend_req->vols.vols_val) { + free(friend_req->vols.vols_val); + friend_req->vols.vols_val = NULL; } + goto out; + } - uuid_copy (ctx->uuid, uuid); - if (hostname) - ctx->hostname = gf_strdup (hostname); - ctx->req = req; + ret = glusterd_friend_sm_new_event(GD_FRIEND_EVENT_RCVD_FRIEND_REQ, &event); - event->ctx = ctx; + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_EVENT_NEW_GET_FAIL, + "event generation failed: %d", ret); + goto out; + } - ret = glusterd_friend_sm_inject_event (event); + event->peername = gf_strdup(peerinfo->hostname); + gf_uuid_copy(event->peerid, peerinfo->uuid); - if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, "Unable to inject event %d, " - "ret = %d", event->event, ret); - goto out; - } + if (!ctx) { + gf_msg("glusterd", GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "Unable to allocate memory"); + ret = -1; + goto out; + } - ret = 0; + gf_uuid_copy(ctx->uuid, uuid); + if (hostname) + ctx->hostname = gf_strdup(hostname); + ctx->req = req; + + if (!dict) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); + ret = -1; + goto out; + } + + ret = dict_unserialize(friend_req->vols.vols_val, friend_req->vols.vols_len, + &dict); + + if (ret) { + gf_smsg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + NULL); + goto out; + } else + dict->extra_stdfree = friend_req->vols.vols_val; + + ctx->vols = dict; + event->ctx = ctx; + + ret = glusterd_friend_sm_inject_event(event); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_EVENT_INJECT_FAIL, + "Unable to inject event %d, " + "ret = %d", + event->event, ret); + goto out; + } + + ret = 0; + if (peerinfo && (0 == peerinfo->connected)) + ret = GLUSTERD_CONNECTION_AWAITED; out: - if (0 != ret) { - if (ctx && ctx->hostname) - GF_FREE (ctx->hostname); - if (ctx) - GF_FREE (ctx); + RCU_READ_UNLOCK; + + if (ret && (ret != GLUSTERD_CONNECTION_AWAITED)) { + if (ctx && ctx->hostname) + GF_FREE(ctx->hostname); + GF_FREE(ctx); + if (dict) { + if ((!dict->extra_stdfree) && friend_req->vols.vols_val) + free(friend_req->vols.vols_val); + dict_unref(dict); + } else { + free(friend_req->vols.vols_val); } + if (event) + GF_FREE(event->peername); + GF_FREE(event); + } - return ret; + return ret; } static int -glusterd_add_peer_detail_to_dict (glusterd_peerinfo_t *peerinfo, - dict_t *friends, int count) +glusterd_handle_unfriend_req(rpcsvc_request_t *req, uuid_t uuid, char *hostname, + int port) { + int ret = -1; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_friend_sm_event_t *event = NULL; + glusterd_friend_req_ctx_t *ctx = NULL; - int ret = -1; - char key[256] = {0, }; + if (!port) + port = GF_DEFAULT_BASE_PORT; - GF_ASSERT (peerinfo); - GF_ASSERT (friends); + ctx = GF_CALLOC(1, sizeof(*ctx), gf_gld_mt_friend_req_ctx_t); - snprintf (key, 256, "friend%d.uuid", count); - uuid_utoa_r (peerinfo->uuid, peerinfo->uuid_str); - ret = dict_set_str (friends, key, peerinfo->uuid_str); - if (ret) - goto out; + RCU_READ_LOCK; - snprintf (key, 256, "friend%d.hostname", count); - ret = dict_set_str (friends, key, peerinfo->hostname); - if (ret) - goto out; + peerinfo = glusterd_peerinfo_find(uuid, hostname); - snprintf (key, 256, "friend%d.port", count); - ret = dict_set_int32 (friends, key, peerinfo->port); - if (ret) - goto out; + if (peerinfo == NULL) { + RCU_READ_UNLOCK; + gf_msg("glusterd", GF_LOG_CRITICAL, 0, GD_MSG_REQ_FROM_UNKNOWN_PEER, + "Received remove-friend from unknown peer %s", hostname); + ret = glusterd_xfer_friend_remove_resp(req, hostname, port); + goto out; + } - snprintf (key, 256, "friend%d.state", count); - ret = dict_set_str (friends, key, - glusterd_friend_sm_state_name_get(peerinfo->state.state)); - if (ret) - goto out; + ret = glusterd_friend_sm_new_event(GD_FRIEND_EVENT_RCVD_REMOVE_FRIEND, + &event); - snprintf (key, 256, "friend%d.connected", count); - ret = dict_set_int32 (friends, key, (int32_t)peerinfo->connected); - if (ret) - goto out; - -out: - return ret; -} - - -int -glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo, - dict_t *volumes, int count) -{ + if (ret) { + RCU_READ_UNLOCK; + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_EVENT_NEW_GET_FAIL, + "event generation failed: %d", ret); + goto out; + } - int ret = -1; - char key[256] = {0, }; - glusterd_brickinfo_t *brickinfo = NULL; - char *buf = NULL; - int i = 1; - data_pair_t *pairs = NULL; - char reconfig_key[256] = {0, }; - dict_t *dict = NULL; - data_t *value = NULL; - int opt_count = 0; - glusterd_conf_t *priv = NULL; + if (hostname) + event->peername = gf_strdup(hostname); + gf_uuid_copy(event->peerid, uuid); - GF_ASSERT (volinfo); - GF_ASSERT (volumes); + if (!ctx) { + RCU_READ_UNLOCK; + ret = -1; + gf_msg("glusterd", GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "Unable to allocate memory"); + goto out; + } - priv = THIS->private; + gf_uuid_copy(ctx->uuid, uuid); + if (hostname) + ctx->hostname = gf_strdup(hostname); + ctx->req = req; - GF_ASSERT (priv); + event->ctx = ctx; - snprintf (key, 256, "volume%d.name", count); - ret = dict_set_str (volumes, key, volinfo->volname); - if (ret) - goto out; + ret = glusterd_friend_sm_inject_event(event); - snprintf (key, 256, "volume%d.type", count); - ret = dict_set_int32 (volumes, key, volinfo->type); - if (ret) - goto out; + if (ret) { + RCU_READ_UNLOCK; + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_EVENT_INJECT_FAIL, + "Unable to inject event %d, " + "ret = %d", + event->event, ret); + goto out; + } - snprintf (key, 256, "volume%d.status", count); - ret = dict_set_int32 (volumes, key, volinfo->status); - if (ret) - goto out; + RCU_READ_UNLOCK; - snprintf (key, 256, "volume%d.brick_count", count); - ret = dict_set_int32 (volumes, key, volinfo->brick_count); - if (ret) - goto out; + return 0; - snprintf (key, 256, "volume%d.sub_count", count); - ret = dict_set_int32 (volumes, key, volinfo->sub_count); - if (ret) - goto out; +out: - snprintf (key, 256, "volume%d.transport", count); - ret = dict_set_int32 (volumes, key, volinfo->transport_type); - if (ret) - goto out; + if (0 != ret) { + if (ctx && ctx->hostname) + GF_FREE(ctx->hostname); + GF_FREE(ctx); + if (event) + GF_FREE(event->peername); + GF_FREE(event); + } - list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { - char brick[1024] = {0,}; - snprintf (key, 256, "volume%d.brick%d", count, i); - snprintf (brick, 1024, "%s:%s", brickinfo->hostname, - brickinfo->path); - buf = gf_strdup (brick); - ret = dict_set_dynstr (volumes, key, buf); - if (ret) - goto out; - i++; - } + return ret; +} - dict = volinfo->dict; - if (!dict) { - ret = 0; - goto out; - } +struct args_pack { + dict_t *dict; + int vol_count; + int opt_count; +}; - pairs = dict->members_list; +static int +_build_option_key(dict_t *d, char *k, data_t *v, void *tmp) +{ + char reconfig_key[256] = { + 0, + }; + int keylen; + struct args_pack *pack = NULL; + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + pack = tmp; + if (strcmp(k, GLUSTERD_GLOBAL_OPT_VERSION) == 0) + return 0; - while (pairs) { - if (1 == glusterd_check_option_exists (pairs->key, NULL)) { - value = pairs->value; - if (!value) - continue; + if (priv->op_version > GD_OP_VERSION_MIN) { + if ((strcmp(k, "features.limit-usage") == 0) || + (strcmp(k, "features.soft-limit") == 0)) + return 0; + } + + /* snap-max-hard-limit and snap-max-soft-limit are system * + * options set and managed by snapshot config option. Hence * + * they should not be displayed in gluster volume info. * + */ + if ((strcmp(k, "snap-max-hard-limit") == 0) || + (strcmp(k, "snap-max-soft-limit") == 0)) + return 0; - snprintf (reconfig_key, 256, "volume%d.option.%s", count, - pairs->key); - ret = dict_set_str (volumes, reconfig_key, value->data); - if (!ret) - opt_count++; - } - pairs = pairs->next; - } + keylen = snprintf(reconfig_key, sizeof(reconfig_key), "volume%d.option.%s", + pack->vol_count, k); + ret = dict_set_strn(pack->dict, reconfig_key, keylen, v->data); + if (0 == ret) + pack->opt_count++; - snprintf (key, 256, "volume%d.opt_count", count); - ret = dict_set_int32 (volumes, key, opt_count); -out: - return ret; + return 0; } int -glusterd_friend_find (uuid_t uuid, char *hostname, - glusterd_peerinfo_t **peerinfo) +glusterd_add_arbiter_info_to_bricks(glusterd_volinfo_t *volinfo, + dict_t *volumes, int count) { - int ret = -1; - - if (uuid) { - ret = glusterd_friend_find_by_uuid (uuid, peerinfo); - - if (ret) { - gf_log ("glusterd", GF_LOG_INFO, - "Unable to find peer by uuid"); - } else { - goto out; - } - - } + char key[64] = { + 0, + }; + int keylen; + int i = 0; + int ret = 0; + + if (volinfo->replica_count == 1 || volinfo->arbiter_count != 1) + return 0; + for (i = 1; i <= volinfo->brick_count; i++) { + if (i % volinfo->replica_count != 0) + continue; + keylen = snprintf(key, sizeof(key), "volume%d.brick%d.isArbiter", count, + i); + ret = dict_set_int32n(volumes, key, keylen, 1); + if (ret) + return ret; + } + return 0; +} - if (hostname) { - ret = glusterd_friend_find_by_hostname (hostname, peerinfo); +int +glusterd_add_volume_detail_to_dict(glusterd_volinfo_t *volinfo, dict_t *volumes, + int count) +{ + int ret = -1; + char key[64] = { + 0, + }; + int keylen; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_brickinfo_t *ta_brickinfo = NULL; + char *buf = NULL; + int i = 1; + dict_t *dict = NULL; + glusterd_conf_t *priv = NULL; + char *volume_id_str = NULL; + struct args_pack pack = { + 0, + }; + xlator_t *this = NULL; + int32_t len = 0; + + char ta_brick[4096] = { + 0, + }; + + GF_ASSERT(volinfo); + GF_ASSERT(volumes); + + this = THIS; + priv = this->private; + + GF_ASSERT(priv); + + keylen = snprintf(key, sizeof(key), "volume%d.name", count); + ret = dict_set_strn(volumes, key, keylen, volinfo->volname); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + goto out; + } + + keylen = snprintf(key, sizeof(key), "volume%d.type", count); + ret = dict_set_int32n(volumes, key, keylen, volinfo->type); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + goto out; + } + + keylen = snprintf(key, sizeof(key), "volume%d.status", count); + ret = dict_set_int32n(volumes, key, keylen, volinfo->status); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + goto out; + } + + keylen = snprintf(key, sizeof(key), "volume%d.brick_count", count); + ret = dict_set_int32n(volumes, key, keylen, volinfo->brick_count); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + goto out; + } + + keylen = snprintf(key, sizeof(key), "volume%d.dist_count", count); + ret = dict_set_int32n(volumes, key, keylen, volinfo->dist_leaf_count); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + goto out; + } + + keylen = snprintf(key, sizeof(key), "volume%d.stripe_count", count); + ret = dict_set_int32n(volumes, key, keylen, volinfo->stripe_count); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + goto out; + } + + keylen = snprintf(key, sizeof(key), "volume%d.replica_count", count); + ret = dict_set_int32n(volumes, key, keylen, volinfo->replica_count); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + goto out; + } + + keylen = snprintf(key, sizeof(key), "volume%d.disperse_count", count); + ret = dict_set_int32n(volumes, key, keylen, volinfo->disperse_count); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + goto out; + } + + keylen = snprintf(key, sizeof(key), "volume%d.redundancy_count", count); + ret = dict_set_int32n(volumes, key, keylen, volinfo->redundancy_count); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + goto out; + } + + keylen = snprintf(key, sizeof(key), "volume%d.arbiter_count", count); + ret = dict_set_int32n(volumes, key, keylen, volinfo->arbiter_count); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + goto out; + } + + keylen = snprintf(key, sizeof(key), "volume%d.transport", count); + ret = dict_set_int32n(volumes, key, keylen, volinfo->transport_type); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + goto out; + } + + keylen = snprintf(key, sizeof(key), "volume%d.thin_arbiter_count", count); + ret = dict_set_int32n(volumes, key, keylen, volinfo->thin_arbiter_count); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + goto out; + } + + volume_id_str = gf_strdup(uuid_utoa(volinfo->volume_id)); + if (!volume_id_str) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED, NULL); + goto out; + } + + keylen = snprintf(key, sizeof(key), "volume%d.volume_id", count); + ret = dict_set_dynstrn(volumes, key, keylen, volume_id_str); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + goto out; + } + + keylen = snprintf(key, sizeof(key), "volume%d.rebalance", count); + ret = dict_set_int32n(volumes, key, keylen, volinfo->rebal.defrag_cmd); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + goto out; + } + + keylen = snprintf(key, sizeof(key), "volume%d.snap_count", count); + ret = dict_set_int32n(volumes, key, keylen, volinfo->snap_count); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + goto out; + } + + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + char brick[1024] = { + 0, + }; + char brick_uuid[64] = { + 0, + }; + len = snprintf(brick, sizeof(brick), "%s:%s", brickinfo->hostname, + brickinfo->path); + if ((len < 0) || (len >= sizeof(brick))) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); + ret = -1; + goto out; + } + buf = gf_strdup(brick); + keylen = snprintf(key, sizeof(key), "volume%d.brick%d", count, i); + ret = dict_set_dynstrn(volumes, key, keylen, buf); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + goto out; + } + keylen = snprintf(key, sizeof(key), "volume%d.brick%d.uuid", count, i); + snprintf(brick_uuid, sizeof(brick_uuid), "%s", + uuid_utoa(brickinfo->uuid)); + buf = gf_strdup(brick_uuid); + if (!buf) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED, + "brick_uuid=%s", brick_uuid, NULL); + goto out; + } + ret = dict_set_dynstrn(volumes, key, keylen, buf); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + goto out; + } + + i++; + } + if (volinfo->thin_arbiter_count == 1) { + ta_brickinfo = list_first_entry(&volinfo->ta_bricks, + glusterd_brickinfo_t, brick_list); + len = snprintf(ta_brick, sizeof(ta_brick), "%s:%s", + ta_brickinfo->hostname, ta_brickinfo->path); + if ((len < 0) || (len >= sizeof(ta_brick))) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); + ret = -1; + goto out; + } + buf = gf_strdup(ta_brick); + keylen = snprintf(key, sizeof(key), "volume%d.thin_arbiter_brick", + count); + ret = dict_set_dynstrn(volumes, key, keylen, buf); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + goto out; + } + } + + ret = glusterd_add_arbiter_info_to_bricks(volinfo, volumes, count); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_ARBITER_BRICK_SET_INFO_FAIL, NULL); + goto out; + } + + dict = volinfo->dict; + if (!dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); + ret = 0; + goto out; + } - if (ret) { - gf_log ("glusterd", GF_LOG_INFO, - "Unable to find hostname: %s", hostname); - } else { - goto out; - } - } + pack.dict = volumes; + pack.vol_count = count; + pack.opt_count = 0; + dict_foreach(dict, _build_option_key, (void *)&pack); + dict_foreach(priv->opts, _build_option_key, &pack); + keylen = snprintf(key, sizeof(key), "volume%d.opt_count", pack.vol_count); + ret = dict_set_int32n(volumes, key, keylen, pack.opt_count); out: - return ret; + return ret; } int32_t -glusterd_op_txn_begin () +glusterd_op_txn_begin(rpcsvc_request_t *req, glusterd_op_t op, void *ctx, + char *err_str, size_t err_len) { - int32_t ret = -1; - glusterd_conf_t *priv = NULL; - int32_t locked = 0; - - priv = THIS->private; - GF_ASSERT (priv); - - ret = glusterd_lock (priv->uuid); - + int32_t ret = -1; + dict_t *dict = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + int32_t locked = 0; + char *tmp = NULL; + char *volname = NULL; + uuid_t *txn_id = NULL; + glusterd_op_info_t txn_op_info = { + {0}, + }; + glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; + uint32_t op_errno = 0; + uint32_t timeout = 0; + + GF_ASSERT(req); + GF_ASSERT((op > GD_OP_NONE) && (op < GD_OP_MAX)); + GF_ASSERT(NULL != ctx); + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + dict = ctx; + + /* Generate a transaction-id for this operation and + * save it in the dict. This transaction id distinguishes + * each transaction, and helps separate opinfos in the + * op state machine. */ + ret = glusterd_generate_txn_id(dict, &txn_id); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_IDGEN_FAIL, + "Failed to generate transaction id"); + goto out; + } + + /* Save the MY_UUID as the originator_uuid. This originator_uuid + * will be used by is_origin_glusterd() to determine if a node + * is the originator node for a command. */ + ret = glusterd_set_originator_uuid(dict); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UUID_SET_FAIL, + "Failed to set originator_uuid."); + goto out; + } + + /* Based on the op_version, acquire a cluster or mgmt_v3 lock */ + if (priv->op_version < GD_OP_VERSION_3_6_0) { + ret = glusterd_lock(MY_UUID); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLUSTERD_LOCK_FAIL, + "Unable to acquire lock on localhost, ret: %d", ret); + snprintf(err_str, err_len, + "Another transaction is in progress. " + "Please try again after some time."); + goto out; + } + } else { + /* If no volname is given as a part of the command, locks will + * not be held */ + ret = dict_get_strn(dict, "volname", SLEN("volname"), &tmp); if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, - "Unable to acquire local lock, ret: %d", ret); + gf_msg(this->name, GF_LOG_INFO, errno, GD_MSG_DICT_GET_FAILED, + "No Volume name present. " + "Locks not being held."); + goto local_locking_done; + } else { + /* Use a copy of volname, as cli response will be + * sent before the unlock, and the volname in the + * dict, might be removed */ + volname = gf_strdup(tmp); + if (!volname) goto out; } - locked = 1; - gf_log ("glusterd", GF_LOG_INFO, "Acquired local lock"); - - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_START_LOCK, NULL); + /* Cli will add timeout key to dict if the default timeout is + * other than 2 minutes. Here we use this value to check whether + * mgmt_v3_lock_timeout should be set to default value or we + * need to change the value according to timeout value + * i.e, timeout + 120 seconds. */ + ret = dict_get_uint32(dict, "timeout", &timeout); + if (!ret) + priv->mgmt_v3_lock_timeout = timeout + 120; - gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); + ret = glusterd_mgmt_v3_lock(volname, MY_UUID, &op_errno, "vol"); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_LOCK_GET_FAIL, + "Unable to acquire lock for %s", volname); + snprintf(err_str, err_len, + "Another transaction is in progress for %s. " + "Please try again after some time.", + volname); + goto out; + } + } + + locked = 1; + gf_msg_debug(this->name, 0, "Acquired lock on localhost"); + +local_locking_done: + /* If no volname is given as a part of the command, locks will + * not be held, hence sending stage event. */ + if (volname || (priv->op_version < GD_OP_VERSION_3_6_0)) + event_type = GD_OP_EVENT_START_LOCK; + else { + txn_op_info.state.state = GD_OP_STATE_LOCK_SENT; + event_type = GD_OP_EVENT_ALL_ACC; + } + + /* Save opinfo for this transaction with the transaction id */ + glusterd_txn_opinfo_init(&txn_op_info, NULL, &op, ctx, req); + + ret = glusterd_set_txn_opinfo(txn_id, &txn_op_info); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL, + "Unable to set transaction's opinfo"); + if (ctx) + dict_unref(ctx); + goto out; + } + + ret = glusterd_op_sm_inject_event(event_type, txn_id, ctx); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_EVENT_INJECT_FAIL, + "Failed to acquire cluster" + " lock."); + goto out; + } out: - if (locked && ret) - glusterd_unlock (priv->uuid); - return ret; + if (locked && ret) { + /* Based on the op-version, we release the + * cluster or mgmt_v3 lock */ + if (priv->op_version < GD_OP_VERSION_3_6_0) + glusterd_unlock(MY_UUID); + else { + ret = glusterd_mgmt_v3_unlock(volname, MY_UUID, "vol"); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_UNLOCK_FAIL, + "Unable to release lock for %s", volname); + ret = -1; + } + } + + if (volname) + GF_FREE(volname); + + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; } int -glusterd_handle_cluster_lock (rpcsvc_request_t *req) +__glusterd_handle_cluster_lock(rpcsvc_request_t *req) { - gd1_mgmt_cluster_lock_req lock_req = {{0},}; - int32_t ret = -1; - glusterd_op_lock_ctx_t *ctx = NULL; - - GF_ASSERT (req); - - if (!gd_xdr_to_mgmt_cluster_lock_req (req->msg[0], &lock_req)) { - //failed to decode msg; - req->rpc_err = GARBAGE_ARGS; - goto out; - } - - gf_log ("glusterd", GF_LOG_INFO, - "Received LOCK from uuid: %s", uuid_utoa (lock_req.uuid)); - + dict_t *op_ctx = NULL; + int32_t ret = -1; + gd1_mgmt_cluster_lock_req lock_req = { + {0}, + }; + glusterd_op_lock_ctx_t *ctx = NULL; + glusterd_op_sm_event_type_t op = GD_OP_EVENT_LOCK; + glusterd_op_info_t txn_op_info = { + {0}, + }; + glusterd_conf_t *priv = NULL; + uuid_t *txn_id = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + GF_ASSERT(req); + + txn_id = &priv->global_txn_id; + + ret = xdr_to_generic(req->msg[0], &lock_req, + (xdrproc_t)xdr_gd1_mgmt_cluster_lock_req); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, + "Failed to decode lock " + "request received from peer"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + gf_msg_debug(this->name, 0, "Received LOCK from uuid: %s", + uuid_utoa(lock_req.uuid)); + + RCU_READ_LOCK; + ret = (glusterd_peerinfo_find_by_uuid(lock_req.uuid) == NULL); + RCU_READ_UNLOCK; + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_PEER_NOT_FOUND, + "%s doesn't " + "belong to the cluster. Ignoring request.", + uuid_utoa(lock_req.uuid)); + ret = -1; + goto out; + } - ctx = GF_CALLOC (1, sizeof (*ctx), gf_gld_mt_op_lock_ctx_t); + ctx = GF_CALLOC(1, sizeof(*ctx), gf_gld_mt_op_lock_ctx_t); - if (!ctx) { - //respond here - return -1; - } + if (!ctx) { + // respond here + return -1; + } - uuid_copy (ctx->uuid, lock_req.uuid); - ctx->req = req; + gf_uuid_copy(ctx->uuid, lock_req.uuid); + ctx->req = req; + ctx->dict = NULL; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_LOCK, ctx); + op_ctx = dict_new(); + if (!op_ctx) { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_DICT_CREATE_FAIL, + "Unable to set new dict"); + goto out; + } + + glusterd_txn_opinfo_init(&txn_op_info, NULL, &op, op_ctx, req); + + ret = glusterd_set_txn_opinfo(txn_id, &txn_op_info); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL, + "Unable to set transaction's opinfo"); + dict_unref(txn_op_info.op_ctx); + goto out; + } + + ret = glusterd_op_sm_inject_event(GD_OP_EVENT_LOCK, txn_id, ctx); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_EVENT_INJECT_FAIL, + "Failed to inject event GD_OP_EVENT_LOCK"); out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + gf_msg_debug(this->name, 0, "Returning %d", ret); - glusterd_friend_sm (); - glusterd_op_sm (); + glusterd_friend_sm(); + glusterd_op_sm(); - return ret; + if (ret) + GF_FREE(ctx); + + return ret; } int -glusterd_req_ctx_create (rpcsvc_request_t *rpc_req, - glusterd_op_t op, uuid_t uuid, - char *buf_val, size_t buf_len, - gf_gld_mem_types_t mem_type, - glusterd_req_ctx_t **req_ctx_out) +glusterd_handle_cluster_lock(rpcsvc_request_t *req) { - int ret = -1; - glusterd_req_ctx_t *req_ctx = NULL; - char str[50] = {0,}; - dict_t *dict = NULL; - char volname[GLUSTERD_MAX_VOLUME_NAME] = {0}; - char *dup_volname = NULL; - - uuid_unparse (uuid, str); - gf_log ("glusterd", GF_LOG_INFO, - "Received op from uuid: %s", str); - - dict = dict_new (); - if (!dict) - goto out; - req_ctx = GF_CALLOC (1, sizeof (*req_ctx), mem_type); - - if (!req_ctx) { - goto out; - } - - uuid_copy (req_ctx->uuid, uuid); - req_ctx->op = op; - if (GD_OP_DELETE_VOLUME == op) { - strncpy (volname, buf_val, buf_len); - dup_volname = gf_strdup (volname); - if (dup_volname) { - ret = dict_set_dynstr (dict, "volname", dup_volname); - if (ret) { - gf_log ("", GF_LOG_WARNING, - "failed to set volume name from payload"); - goto out; - } - } else { - ret = -1; - goto out; - } - } else { - ret = dict_unserialize (buf_val, buf_len, &dict); - - if (ret) { - gf_log ("", GF_LOG_WARNING, - "failed to unserialize the dictionary"); - goto out; - } - } + return glusterd_big_locked_handler(req, __glusterd_handle_cluster_lock); +} - req_ctx->dict = dict; - req_ctx->req = rpc_req; - *req_ctx_out = req_ctx; - ret = 0; +static int +glusterd_req_ctx_create(rpcsvc_request_t *rpc_req, int op, uuid_t uuid, + char *buf_val, size_t buf_len, + gf_gld_mem_types_t mem_type, + glusterd_req_ctx_t **req_ctx_out) +{ + int ret = -1; + char str[50] = { + 0, + }; + glusterd_req_ctx_t *req_ctx = NULL; + dict_t *dict = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + gf_uuid_unparse(uuid, str); + gf_msg_debug(this->name, 0, "Received op from uuid %s", str); + + dict = dict_new(); + if (!dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); + goto out; + } + + req_ctx = GF_CALLOC(1, sizeof(*req_ctx), mem_type); + if (!req_ctx) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); + goto out; + } + + gf_uuid_copy(req_ctx->uuid, uuid); + req_ctx->op = op; + ret = dict_unserialize(buf_val, buf_len, &dict); + if (ret) { + gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + NULL); + goto out; + } + + req_ctx->dict = dict; + req_ctx->req = rpc_req; + *req_ctx_out = req_ctx; + ret = 0; out: - if (ret) { - if (dict) - dict_unref (dict); - if (req_ctx) - GF_FREE (req_ctx); - } - return ret; + if (ret) { + if (dict) + dict_unref(dict); + GF_FREE(req_ctx); + } + return ret; } int -glusterd_handle_stage_op (rpcsvc_request_t *req) +__glusterd_handle_stage_op(rpcsvc_request_t *req) { - int32_t ret = -1; - glusterd_req_ctx_t *req_ctx = NULL; - gd1_mgmt_stage_op_req op_req = {{0},}; - - GF_ASSERT (req); - if (!gd_xdr_to_mgmt_stage_op_req (req->msg[0], &op_req)) { - //failed to decode msg; - req->rpc_err = GARBAGE_ARGS; - goto out; + int32_t ret = -1; + glusterd_req_ctx_t *req_ctx = NULL; + gd1_mgmt_stage_op_req op_req = { + {0}, + }; + xlator_t *this = NULL; + uuid_t *txn_id = NULL; + glusterd_op_info_t txn_op_info = { + {0}, + }; + glusterd_op_sm_state_info_t state = { + 0, + }; + glusterd_conf_t *priv = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + GF_ASSERT(req); + + txn_id = &priv->global_txn_id; + + ret = xdr_to_generic(req->msg[0], &op_req, + (xdrproc_t)xdr_gd1_mgmt_stage_op_req); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, + "Failed to decode stage " + "request received from peer"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + ret = glusterd_req_ctx_create(req, op_req.op, op_req.uuid, + op_req.buf.buf_val, op_req.buf.buf_len, + gf_gld_mt_op_stage_ctx_t, &req_ctx); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_CTX_CREATE_FAIL, + "Failed to create req_ctx"); + goto out; + } + + ret = dict_get_bin(req_ctx->dict, "transaction_id", (void **)&txn_id); + gf_msg_debug(this->name, 0, "transaction ID = %s", uuid_utoa(*txn_id)); + + RCU_READ_LOCK; + ret = (glusterd_peerinfo_find_by_uuid(op_req.uuid) == NULL); + RCU_READ_UNLOCK; + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_PEER_NOT_FOUND, + "%s doesn't " + "belong to the cluster. Ignoring request.", + uuid_utoa(op_req.uuid)); + ret = -1; + goto out; + } + + /* In cases where there is no volname, the receivers won't have a + * transaction opinfo created, as for those operations, the locking + * phase where the transaction opinfos are created, won't be called. + * skip_locking will be true for all such transaction and we clear + * the txn_opinfo after the staging phase, except for geo-replication + * operations where we need to access txn_opinfo in the later phases also. + */ + ret = glusterd_get_txn_opinfo(txn_id, &txn_op_info); + if (ret) { + gf_msg_debug(this->name, 0, "No transaction's opinfo set"); + + state.state = GD_OP_STATE_LOCKED; + glusterd_txn_opinfo_init(&txn_op_info, &state, &op_req.op, + req_ctx->dict, req); + + if (req_ctx->op != GD_OP_GSYNC_SET) + txn_op_info.skip_locking = _gf_true; + ret = glusterd_set_txn_opinfo(txn_id, &txn_op_info); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL, + "Unable to set transaction's opinfo"); + dict_unref(req_ctx->dict); + goto out; } + } - ret = glusterd_req_ctx_create (req, op_req.op, op_req.uuid, - op_req.buf.buf_val, op_req.buf.buf_len, - gf_gld_mt_op_stage_ctx_t, &req_ctx); - if (ret) - goto out; - - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_STAGE_OP, req_ctx); + ret = glusterd_op_sm_inject_event(GD_OP_EVENT_STAGE_OP, txn_id, req_ctx); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_EVENT_INJECT_FAIL, + "Failed to inject event GD_OP_EVENT_STAGE_OP"); - out: - if (op_req.buf.buf_val) - free (op_req.buf.buf_val);//malloced by xdr - glusterd_friend_sm (); - glusterd_op_sm (); - return ret; +out: + free(op_req.buf.buf_val); // malloced by xdr + glusterd_friend_sm(); + glusterd_op_sm(); + return ret; } int -glusterd_handle_commit_op (rpcsvc_request_t *req) +glusterd_handle_stage_op(rpcsvc_request_t *req) { - int32_t ret = -1; - glusterd_req_ctx_t *req_ctx = NULL; - gd1_mgmt_commit_op_req op_req = {{0},}; + return glusterd_big_locked_handler(req, __glusterd_handle_stage_op); +} - GF_ASSERT (req); +int +__glusterd_handle_commit_op(rpcsvc_request_t *req) +{ + int32_t ret = -1; + glusterd_req_ctx_t *req_ctx = NULL; + gd1_mgmt_commit_op_req op_req = { + {0}, + }; + xlator_t *this = NULL; + uuid_t *txn_id = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + GF_ASSERT(req); + + txn_id = &priv->global_txn_id; + + ret = xdr_to_generic(req->msg[0], &op_req, + (xdrproc_t)xdr_gd1_mgmt_commit_op_req); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, + "Failed to decode commit " + "request received from peer"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + RCU_READ_LOCK; + ret = (glusterd_peerinfo_find_by_uuid(op_req.uuid) == NULL); + RCU_READ_UNLOCK; + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_PEER_NOT_FOUND, + "%s doesn't " + "belong to the cluster. Ignoring request.", + uuid_utoa(op_req.uuid)); + ret = -1; + goto out; + } - if (!gd_xdr_to_mgmt_commit_op_req (req->msg[0], &op_req)) { - //failed to decode msg; - req->rpc_err = GARBAGE_ARGS; - goto out; - } + // the structures should always be equal + GF_ASSERT(sizeof(gd1_mgmt_commit_op_req) == sizeof(gd1_mgmt_stage_op_req)); + ret = glusterd_req_ctx_create(req, op_req.op, op_req.uuid, + op_req.buf.buf_val, op_req.buf.buf_len, + gf_gld_mt_op_commit_ctx_t, &req_ctx); + if (ret) + goto out; - //the structures should always be equal - GF_ASSERT (sizeof (gd1_mgmt_commit_op_req) == sizeof (gd1_mgmt_stage_op_req)); - ret = glusterd_req_ctx_create (req, op_req.op, op_req.uuid, - op_req.buf.buf_val, op_req.buf.buf_len, - gf_gld_mt_op_commit_ctx_t, &req_ctx); - if (ret) - goto out; + ret = dict_get_bin(req_ctx->dict, "transaction_id", (void **)&txn_id); + gf_msg_debug(this->name, 0, "transaction ID = %s", uuid_utoa(*txn_id)); - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_COMMIT_OP, req_ctx); - if (ret) - goto out; - ret = glusterd_op_init_ctx (op_req.op); + ret = glusterd_op_sm_inject_event(GD_OP_EVENT_COMMIT_OP, txn_id, req_ctx); out: - if (op_req.buf.buf_val) - free (op_req.buf.buf_val);//malloced by xdr - glusterd_friend_sm (); - glusterd_op_sm (); - return ret; + free(op_req.buf.buf_val); // malloced by xdr + glusterd_friend_sm(); + glusterd_op_sm(); + return ret; } + int -glusterd_handle_cli_probe (rpcsvc_request_t *req) +glusterd_handle_commit_op(rpcsvc_request_t *req) { - int32_t ret = -1; - gf1_cli_probe_req cli_req = {0,}; - glusterd_peerinfo_t *peerinfo = NULL; - gf_boolean_t run_fsm = _gf_true; - GF_ASSERT (req); + return glusterd_big_locked_handler(req, __glusterd_handle_commit_op); +} - if (!gf_xdr_to_cli_probe_req (req->msg[0], &cli_req)) { - //failed to decode msg; - gf_log ("", GF_LOG_ERROR, "xdr decoding error"); - req->rpc_err = GARBAGE_ARGS; - goto out; - } +int +__glusterd_handle_cli_probe(rpcsvc_request_t *req) +{ + int32_t ret = -1; + gf_cli_req cli_req = { + { + 0, + }, + }; + glusterd_peerinfo_t *peerinfo = NULL; + gf_boolean_t run_fsm = _gf_true; + xlator_t *this = NULL; + char *bind_name = NULL; + dict_t *dict = NULL; + char *hostname = NULL; + int port = 0; + int op_errno = 0; + + GF_ASSERT(req); + this = THIS; + + ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + // failed to decode msg; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, + "xdr decoding error"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (cli_req.dict.dict_len) { + dict = dict_new(); + + ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + "Failed to " + "unserialize req-buffer to dictionary"); + goto out; + } + } + + ret = dict_get_strn(dict, "hostname", SLEN("hostname"), &hostname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_HOSTNAME_NOTFOUND_IN_DICT, + "Failed to get hostname"); + goto out; + } + + ret = dict_get_int32n(dict, "port", SLEN("port"), &port); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PORT_NOTFOUND_IN_DICT, + "Failed to get port"); + goto out; + } + + if (glusterd_is_any_volume_in_server_quorum(this) && + !does_gd_meet_server_quorum(this)) { + glusterd_xfer_cli_probe_resp(req, -1, GF_PROBE_QUORUM_NOT_MET, NULL, + hostname, port, dict); + gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_SERVER_QUORUM_NOT_MET, + "Server quorum not met. Rejecting operation."); + ret = 0; + goto out; + } + + gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_CLI_REQ_RECVD, + "Received CLI probe req %s %d", hostname, port); + + if (dict_get_strn(this->options, "transport.socket.bind-address", + SLEN("transport.socket.bind-address"), &bind_name) == 0) { + gf_msg_debug("glusterd", 0, + "only checking probe address vs. bind address"); + ret = gf_is_same_address(bind_name, hostname); + } else { + ret = gf_is_local_addr(hostname); + } + if (ret) { + glusterd_xfer_cli_probe_resp(req, 0, GF_PROBE_LOCALHOST, NULL, hostname, + port, dict); + ret = 0; + goto out; + } - gf_cmd_log ("peer probe", " on host %s:%d", cli_req.hostname, - cli_req.port); - gf_log ("glusterd", GF_LOG_INFO, "Received CLI probe req %s %d", - cli_req.hostname, cli_req.port); + RCU_READ_LOCK; - if (!(ret = glusterd_is_local_addr(cli_req.hostname))) { - glusterd_xfer_cli_probe_resp (req, 0, GF_PROBE_LOCALHOST, - cli_req.hostname, cli_req.port); - goto out; - } + peerinfo = glusterd_peerinfo_find_by_hostname(hostname); + ret = (peerinfo && gd_peer_has_address(peerinfo, hostname)); - if (!(ret = glusterd_friend_find_by_hostname(cli_req.hostname, - &peerinfo))) { - if (strcmp (peerinfo->hostname, cli_req.hostname) == 0) { + RCU_READ_UNLOCK; - gf_log ("glusterd", GF_LOG_DEBUG, "Probe host %s port %d" - " already a peer", cli_req.hostname, cli_req.port); - glusterd_xfer_cli_probe_resp (req, 0, GF_PROBE_FRIEND, - cli_req.hostname, cli_req.port); - goto out; - } - } - ret = glusterd_probe_begin (req, cli_req.hostname, cli_req.port); + if (ret) { + gf_msg_debug("glusterd", 0, + "Probe host %s port %d " + "already a peer", + hostname, port); + glusterd_xfer_cli_probe_resp(req, 0, GF_PROBE_FRIEND, NULL, hostname, + port, dict); + ret = 0; + goto out; + } - gf_cmd_log ("peer probe","on host %s:%d %s",cli_req.hostname, cli_req.port, - (ret) ? "FAILED" : "SUCCESS"); + ret = glusterd_probe_begin(req, hostname, port, dict, &op_errno); + + if (ret == GLUSTERD_CONNECTION_AWAITED) { + // fsm should be run after connection establishes + run_fsm = _gf_false; + ret = 0; + + } else if (ret == -1) { + glusterd_xfer_cli_probe_resp(req, -1, op_errno, NULL, hostname, port, + dict); + goto out; + } - if (ret == GLUSTERD_CONNECTION_AWAITED) { - //fsm should be run after connection establishes - run_fsm = _gf_false; - ret = 0; - } out: - if (cli_req.hostname) - free (cli_req.hostname);//its malloced by xdr + free(cli_req.dict.dict_val); - if (run_fsm) { - glusterd_friend_sm (); - glusterd_op_sm (); - } + if (run_fsm) { + glusterd_friend_sm(); + glusterd_op_sm(); + } - return ret; + return ret; } int -glusterd_handle_cli_deprobe (rpcsvc_request_t *req) +glusterd_handle_cli_probe(rpcsvc_request_t *req) { - int32_t ret = -1; - gf1_cli_probe_req cli_req = {0,}; - uuid_t uuid = {0}; - int op_errno = 0; - xlator_t *this = NULL; - glusterd_conf_t *priv = NULL; - - this = THIS; - GF_ASSERT (this); - priv = this->private; - GF_ASSERT (priv); - GF_ASSERT (req); - - if (!gf_xdr_to_cli_probe_req (req->msg[0], &cli_req)) { - //failed to decode msg; - req->rpc_err = GARBAGE_ARGS; - goto out; - } - - gf_log ("glusterd", GF_LOG_INFO, "Received CLI deprobe req"); - - ret = glusterd_hostname_to_uuid (cli_req.hostname, uuid); - if (ret) { - op_errno = GF_DEPROBE_NOT_FRIEND; - goto out; - } - - if (!uuid_compare (uuid, priv->uuid)) { - op_errno = GF_DEPROBE_LOCALHOST; - ret = -1; - goto out; - } + return glusterd_big_locked_handler(req, __glusterd_handle_cli_probe); +} - if (!uuid_is_null (uuid)) { - ret = glusterd_all_volume_cond_check ( - glusterd_friend_brick_belongs, - -1, &uuid); - if (ret) { - op_errno = GF_DEPROBE_BRICK_EXIST; - goto out; - } - } +int +__glusterd_handle_cli_deprobe(rpcsvc_request_t *req) +{ + int32_t ret = -1; + gf_cli_req cli_req = { + { + 0, + }, + }; + uuid_t uuid = {0}; + int op_errno = 0; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + dict_t *dict = NULL; + char *hostname = NULL; + int port = 0; + int flags = 0; + glusterd_volinfo_t *volinfo = NULL; + glusterd_volinfo_t *tmp = NULL; + glusterd_snap_t *snapinfo = NULL; + glusterd_snap_t *tmpsnap = NULL; + gf_boolean_t need_free = _gf_false; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + GF_ASSERT(req); + + ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + // failed to decode msg; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, + "Failed to decode " + "request received from cli"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (cli_req.dict.dict_len) { + dict = dict_new(); - if (!uuid_is_null (uuid)) { - ret = glusterd_deprobe_begin (req, cli_req.hostname, - cli_req.port, uuid); + if (dict) { + need_free = _gf_true; } else { - ret = glusterd_deprobe_begin (req, cli_req.hostname, - cli_req.port, NULL); + ret = -1; + goto out; } - gf_cmd_log ("peer deprobe", "on host %s:%d %s", cli_req.hostname, - cli_req.port, (ret) ? "FAILED" : "SUCCESS"); + ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + "Failed to " + "unserialize req-buffer to dictionary"); + goto out; + } + } + + gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_CLI_REQ_RECVD, + "Received CLI deprobe req"); + + ret = dict_get_strn(dict, "hostname", SLEN("hostname"), &hostname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_HOSTNAME_NOTFOUND_IN_DICT, + "Failed to get hostname"); + goto out; + } + + ret = dict_get_int32n(dict, "port", SLEN("port"), &port); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PORT_NOTFOUND_IN_DICT, + "Failed to get port"); + goto out; + } + ret = dict_get_int32n(dict, "flags", SLEN("flags"), &flags); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FLAGS_NOTFOUND_IN_DICT, + "Failed to get flags"); + goto out; + } + + ret = glusterd_hostname_to_uuid(hostname, uuid); + if (ret) { + op_errno = GF_DEPROBE_NOT_FRIEND; + goto out; + } + + if (!gf_uuid_compare(uuid, MY_UUID)) { + op_errno = GF_DEPROBE_LOCALHOST; + ret = -1; + goto out; + } + + if (!(flags & GF_CLI_FLAG_OP_FORCE)) { + /* Check if peers are connected, except peer being + * detached*/ + if (!glusterd_chk_peers_connected_befriended(uuid)) { + ret = -1; + op_errno = GF_DEPROBE_FRIEND_DOWN; + goto out; + } + } + + /* Check for if volumes exist with some bricks on the peer being + * detached. It's not a problem if a volume contains none or all + * of its bricks on the peer being detached + */ + cds_list_for_each_entry_safe(volinfo, tmp, &priv->volumes, vol_list) + { + ret = glusterd_friend_contains_vol_bricks(volinfo, uuid); + if (ret == 1) { + op_errno = GF_DEPROBE_BRICK_EXIST; + goto out; + } + } + + cds_list_for_each_entry_safe(snapinfo, tmpsnap, &priv->snapshots, snap_list) + { + ret = glusterd_friend_contains_snap_bricks(snapinfo, uuid); + if (ret == 1) { + op_errno = GF_DEPROBE_SNAP_BRICK_EXIST; + goto out; + } + } + if (!(flags & GF_CLI_FLAG_OP_FORCE)) { + if (glusterd_is_any_volume_in_server_quorum(this) && + !does_gd_meet_server_quorum(this)) { + gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_SERVER_QUORUM_NOT_MET, + "Server quorum not met. Rejecting operation."); + ret = -1; + op_errno = GF_DEPROBE_QUORUM_NOT_MET; + goto out; + } + } + + if (!gf_uuid_is_null(uuid)) { + ret = glusterd_deprobe_begin(req, hostname, port, uuid, dict, + &op_errno); + } else { + ret = glusterd_deprobe_begin(req, hostname, port, NULL, dict, + &op_errno); + } + + need_free = _gf_false; + out: - if (ret) { - ret = glusterd_xfer_cli_deprobe_resp (req, ret, op_errno, - cli_req.hostname); - } + free(cli_req.dict.dict_val); - if (cli_req.hostname) - free (cli_req.hostname);//malloced by xdr + if (ret) { + ret = glusterd_xfer_cli_deprobe_resp(req, ret, op_errno, NULL, hostname, + dict); + if (need_free) { + dict_unref(dict); + } + } - glusterd_friend_sm (); - glusterd_op_sm (); + glusterd_friend_sm(); + glusterd_op_sm(); - return ret; + return ret; } int -glusterd_handle_cli_list_friends (rpcsvc_request_t *req) +glusterd_handle_cli_deprobe(rpcsvc_request_t *req) { - int32_t ret = -1; - gf1_cli_peer_list_req cli_req = {0,}; - dict_t *dict = NULL; - - GF_ASSERT (req); - - if (!gf_xdr_to_cli_peer_list_req (req->msg[0], &cli_req)) { - //failed to decode msg; - req->rpc_err = GARBAGE_ARGS; - goto out; - } - - gf_log ("glusterd", GF_LOG_INFO, "Received cli list req"); - - if (cli_req.dict.dict_len) { - /* Unserialize the dictionary */ - dict = dict_new (); + return glusterd_big_locked_handler(req, __glusterd_handle_cli_deprobe); +} - ret = dict_unserialize (cli_req.dict.dict_val, - cli_req.dict.dict_len, - &dict); - if (ret < 0) { - gf_log ("glusterd", GF_LOG_ERROR, - "failed to " - "unserialize req-buffer to dictionary"); - goto out; - } else { - dict->extra_stdfree = cli_req.dict.dict_val; - } +int +__glusterd_handle_cli_list_friends(rpcsvc_request_t *req) +{ + int32_t ret = -1; + gf1_cli_peer_list_req cli_req = { + 0, + }; + dict_t *dict = NULL; + + GF_ASSERT(req); + + ret = xdr_to_generic(req->msg[0], &cli_req, + (xdrproc_t)xdr_gf1_cli_peer_list_req); + if (ret < 0) { + // failed to decode msg; + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, + "Failed to decode " + "request received from cli"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_CLI_REQ_RECVD, + "Received cli list req"); + + if (cli_req.dict.dict_len) { + /* Unserialize the dictionary */ + dict = dict_new(); + + ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + "failed to " + "unserialize req-buffer to dictionary"); + goto out; + } else { + dict->extra_stdfree = cli_req.dict.dict_val; } + } - ret = glusterd_list_friends (req, dict, cli_req.flags); + ret = glusterd_list_friends(req, dict, cli_req.flags); out: - if (dict) - dict_unref (dict); + if (dict) + dict_unref(dict); - glusterd_friend_sm (); - glusterd_op_sm (); + glusterd_friend_sm(); + glusterd_op_sm(); - return ret; + return ret; } int -glusterd_handle_cli_get_volume (rpcsvc_request_t *req) +glusterd_handle_cli_list_friends(rpcsvc_request_t *req) { - int32_t ret = -1; - gf1_cli_get_vol_req cli_req = {0,}; - dict_t *dict = NULL; - - GF_ASSERT (req); - - if (!gf_xdr_to_cli_get_vol_req (req->msg[0], &cli_req)) { - //failed to decode msg; - req->rpc_err = GARBAGE_ARGS; - goto out; - } - - gf_log ("glusterd", GF_LOG_INFO, "Received get vol req"); - - if (cli_req.dict.dict_len) { - /* Unserialize the dictionary */ - dict = dict_new (); + return glusterd_big_locked_handler(req, __glusterd_handle_cli_list_friends); +} - ret = dict_unserialize (cli_req.dict.dict_val, - cli_req.dict.dict_len, - &dict); - if (ret < 0) { - gf_log ("glusterd", GF_LOG_ERROR, - "failed to " - "unserialize req-buffer to dictionary"); - goto out; - } else { - dict->extra_stdfree = cli_req.dict.dict_val; - } +static int +__glusterd_handle_cli_get_volume(rpcsvc_request_t *req) +{ + int32_t ret = -1; + gf_cli_req cli_req = {{ + 0, + }}; + int32_t flags = 0; + dict_t *dict = NULL; + xlator_t *this = NULL; + + GF_ASSERT(req); + this = THIS; + + ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + // failed to decode msg; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, + "Failed to decode " + "request received from cli"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + gf_msg(this->name, GF_LOG_DEBUG, 0, GD_MSG_GET_VOL_REQ_RCVD, + "Received get vol req"); + + if (cli_req.dict.dict_len) { + /* Unserialize the dictionary */ + dict = dict_new(); + + ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + "failed to " + "unserialize req-buffer to dictionary"); + goto out; + } else { + dict->extra_stdfree = cli_req.dict.dict_val; } + } - ret = glusterd_get_volumes (req, dict, cli_req.flags); + ret = dict_get_int32n(dict, "flags", SLEN("flags"), &flags); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FLAGS_NOTFOUND_IN_DICT, + "failed to get flags"); + goto out; + } + ret = glusterd_get_volumes(req, dict, flags); out: - if (dict) - dict_unref (dict); + if (dict) + dict_unref(dict); - glusterd_friend_sm (); - glusterd_op_sm (); + glusterd_friend_sm(); + glusterd_op_sm(); - return ret; + return ret; } -int32_t -glusterd_op_begin (rpcsvc_request_t *req, glusterd_op_t op, void *ctx, - gf_boolean_t is_ctx_free) +int +glusterd_handle_cli_get_volume(rpcsvc_request_t *req) { - int ret = -1; - GF_ASSERT (req); - GF_ASSERT ((op > GD_OP_NONE) && (op < GD_OP_MAX)); - GF_ASSERT ((NULL != ctx) || (_gf_false == is_ctx_free)); - - glusterd_op_set_op (op); - glusterd_op_set_ctx (op, ctx); - glusterd_op_set_ctx_free (op, is_ctx_free); - glusterd_op_set_req (req); - - ret = glusterd_op_txn_begin (); - - return ret; + return glusterd_big_locked_handler(req, __glusterd_handle_cli_get_volume); } int -glusterd_handle_create_volume (rpcsvc_request_t *req) -{ - int32_t ret = -1; - gf1_cli_create_vol_req cli_req = {0,}; - dict_t *dict = NULL; - glusterd_brickinfo_t *brickinfo = NULL; - char *brick = NULL; - char *bricks = NULL; - char *volname = NULL; - int brick_count = 0; - char *tmpptr = NULL; - int i = 0; - char *brick_list = NULL; - void *cli_rsp = NULL; - char err_str[2048] = {0,}; - gf1_cli_create_vol_rsp rsp = {0,}; - glusterd_conf_t *priv = NULL; - xlator_t *this = NULL; - char *free_ptr = NULL; - char *trans_type = NULL; - uuid_t volume_id = {0,}; - glusterd_brickinfo_t *tmpbrkinfo = NULL; - glusterd_volinfo_t tmpvolinfo = {{0},}; - int lock_fail = 0; - glusterd_op_t cli_op = GD_OP_CREATE_VOLUME; - - GF_ASSERT (req); - - INIT_LIST_HEAD (&tmpvolinfo.bricks); - - ret = glusterd_op_set_cli_op (cli_op); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to set cli op: %d", - ret); - lock_fail = 1; - snprintf (err_str, sizeof (err_str), "Another operation is in " - "progress, please retry after some time"); - goto out; - } - - this = THIS; - GF_ASSERT(this); - - priv = this->private; - +__glusterd_handle_cli_uuid_reset(rpcsvc_request_t *req) +{ + int ret = -1; + dict_t *dict = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + uuid_t uuid = {0}; + gf_cli_rsp rsp = { + 0, + }; + gf_cli_req cli_req = {{ + 0, + }}; + char msg_str[128] = { + 0, + }; + + GF_ASSERT(req); + + this = THIS; + priv = this->private; + GF_ASSERT(priv); + + ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + // failed to decode msg; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, + "Failed to decode " + "request received from cli"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + gf_msg_debug("glusterd", 0, "Received uuid reset req"); + + if (cli_req.dict.dict_len) { + /* Unserialize the dictionary */ + dict = dict_new(); + + ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + "failed to " + "unserialize req-buffer to dictionary"); + snprintf(msg_str, sizeof(msg_str), + "Unable to decode " + "the buffer"); + goto out; + } else { + dict->extra_stdfree = cli_req.dict.dict_val; + } + } + + /* In the above section if dict_unserialize is successful, ret is set + * to zero. + */ + ret = -1; + // Do not allow peer reset if there are any volumes in the cluster + if (!cds_list_empty(&priv->volumes)) { + snprintf(msg_str, sizeof(msg_str), + "volumes are already " + "present in the cluster. Resetting uuid is not " + "allowed"); + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOLS_ALREADY_PRESENT, "%s", + msg_str); + goto out; + } + + // Do not allow peer reset if trusted storage pool is already formed + if (!cds_list_empty(&priv->peers)) { + snprintf(msg_str, sizeof(msg_str), + "trusted storage pool " + "has been already formed. Please detach this peer " + "from the pool and reset its uuid."); + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_TSP_ALREADY_FORMED, "%s", + msg_str); + goto out; + } + + gf_uuid_copy(uuid, priv->uuid); + ret = glusterd_uuid_generate_save(); + + if (!gf_uuid_compare(uuid, MY_UUID)) { + snprintf(msg_str, sizeof(msg_str), + "old uuid and the new uuid" + " are same. Try gluster peer reset again"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UUIDS_SAME_RETRY, "%s", + msg_str); ret = -1; - if (!gf_xdr_to_cli_create_vol_req (req->msg[0], &cli_req)) { - //failed to decode msg; - req->rpc_err = GARBAGE_ARGS; - snprintf (err_str, sizeof (err_str), "Garbage args received"); - goto out; - } - - gf_log ("glusterd", GF_LOG_INFO, "Received create volume req"); - - if (cli_req.bricks.bricks_len) { - /* Unserialize the dictionary */ - dict = dict_new (); - - ret = dict_unserialize (cli_req.bricks.bricks_val, - cli_req.bricks.bricks_len, - &dict); - if (ret < 0) { - gf_log ("glusterd", GF_LOG_ERROR, - "failed to " - "unserialize req-buffer to dictionary"); - snprintf (err_str, sizeof (err_str), "Unable to decode " - "the buffer"); - goto out; - } else { - dict->extra_stdfree = cli_req.bricks.bricks_val; - } - } - - ret = dict_get_str (dict, "volname", &volname); - - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); - snprintf (err_str, sizeof (err_str), "Unable to get volume " - "name"); - goto out; - } - gf_cmd_log ("Volume create", "on volname: %s attempted", volname); - - if ((ret = glusterd_check_volume_exists (volname))) { - snprintf(err_str, 2048, "Volume %s already exists", volname); - gf_log ("glusterd", GF_LOG_ERROR, "%s", err_str); - goto out; - } - - ret = dict_get_int32 (dict, "count", &brick_count); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get count"); - snprintf (err_str, sizeof (err_str), "Unable to get volume " - "brick count"); - goto out; - } - - ret = dict_get_str (dict, "transport", &trans_type); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get transport-type"); - snprintf (err_str, sizeof (err_str), "Unable to get volume " - "transport-type"); - goto out; - } - ret = dict_get_str (dict, "bricks", &bricks); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get bricks"); - snprintf (err_str, sizeof (err_str), "Unable to get volume " - "bricks"); - goto out; - } - - uuid_generate (volume_id); - free_ptr = gf_strdup (uuid_utoa (volume_id)); - ret = dict_set_dynstr (dict, "volume-id", free_ptr); - if (ret) { - gf_log ("", GF_LOG_ERROR, "unable to set volume-id"); - snprintf (err_str, sizeof (err_str), "Unable to set volume " - "id"); - goto out; - } - free_ptr = NULL; - - if (bricks) { - brick_list = gf_strdup (bricks); - free_ptr = brick_list; - } - - gf_cmd_log ("Volume create", "on volname: %s type:%s count:%d bricks:%s", - cli_req.volname, ((cli_req.type == 0)? "DEFAULT": - ((cli_req.type == 1)? "STRIPE":"REPLICATE")), cli_req.count, - bricks); - + goto out; + } - while ( i < brick_count) { - i++; - brick= strtok_r (brick_list, " \n", &tmpptr); - brick_list = tmpptr; - ret = glusterd_brickinfo_from_brick (brick, &brickinfo); - if (ret) { - snprintf (err_str, sizeof (err_str), "Unable to get " - "brick info from brick %s", brick); - goto out; - } - - ret = glusterd_new_brick_validate (brick, brickinfo, err_str, - sizeof (err_str)); - if (ret) - goto out; - ret = glusterd_volume_brickinfo_get (brickinfo->uuid, - brickinfo->hostname, - brickinfo->path, - &tmpvolinfo, &tmpbrkinfo); - if (!ret) { - ret = -1; - snprintf (err_str, sizeof (err_str), "Brick: %s:%s, %s" - " in the arguments mean the same", - tmpbrkinfo->hostname, tmpbrkinfo->path, - brick); - goto out; - } - list_add_tail (&brickinfo->brick_list, &tmpvolinfo.bricks); - brickinfo = NULL; - } +out: + if (ret) { + rsp.op_ret = -1; + if (msg_str[0] == '\0') + snprintf(msg_str, sizeof(msg_str), + "Operation " + "failed"); + rsp.op_errstr = msg_str; + ret = 0; + } else { + rsp.op_errstr = ""; + } - ret = glusterd_op_begin (req, GD_OP_CREATE_VOLUME, dict, _gf_true); - gf_cmd_log ("Volume create", "on volname: %s %s", volname, - (ret != 0) ? "FAILED": "SUCCESS"); + glusterd_to_cli(req, &rsp, NULL, 0, NULL, (xdrproc_t)xdr_gf_cli_rsp, dict); -out: - if (ret) { - if (dict) - dict_unref (dict); - rsp.op_ret = -1; - rsp.op_errno = 0; - rsp.volname = ""; - if (err_str[0] == '\0') - snprintf (err_str, sizeof (err_str), "Operation failed"); - rsp.op_errstr = err_str; - cli_rsp = &rsp; - glusterd_submit_reply(req, cli_rsp, NULL, 0, NULL, - gf_xdr_serialize_cli_create_vol_rsp); - if (!lock_fail) - (void) glusterd_opinfo_unlock (); - - ret = 0; //Client response sent, prevent second response - } - - if (free_ptr) - GF_FREE(free_ptr); - - glusterd_volume_brickinfos_delete (&tmpvolinfo); - if (brickinfo) - glusterd_brickinfo_delete (brickinfo); - if (cli_req.volname) - free (cli_req.volname); // its a malloced by xdr - - glusterd_friend_sm (); - glusterd_op_sm (); + return ret; +} - return ret; +int +glusterd_handle_cli_uuid_reset(rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler(req, __glusterd_handle_cli_uuid_reset); } int -glusterd_handle_cli_start_volume (rpcsvc_request_t *req) +__glusterd_handle_cli_uuid_get(rpcsvc_request_t *req) { - int32_t ret = -1; - gf1_cli_start_vol_req cli_req = {0,}; - int lock_fail = 0; - char *dup_volname = NULL; - dict_t *dict = NULL; - glusterd_op_t cli_op = GD_OP_START_VOLUME; + int ret = -1; + dict_t *dict = NULL; + dict_t *rsp_dict = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + gf_cli_rsp rsp = { + 0, + }; + gf_cli_req cli_req = {{ + 0, + }}; + char err_str[64] = { + 0, + }; + char uuid_str[64] = { + 0, + }; + + GF_ASSERT(req); + + this = THIS; + priv = this->private; + GF_ASSERT(priv); + + ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, + "Failed to decode " + "request received from cli"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + gf_msg_debug("glusterd", 0, "Received uuid get req"); + + if (cli_req.dict.dict_len) { + dict = dict_new(); + if (!dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, + NULL); + ret = -1; + goto out; + } - GF_ASSERT (req); + ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + "failed to " + "unserialize req-buffer to dictionary"); + snprintf(err_str, sizeof(err_str), + "Unable to decode " + "the buffer"); + goto out; - ret = glusterd_op_set_cli_op (cli_op); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to set cli op: %d", ret); - lock_fail = 1; - goto out; + } else { + dict->extra_stdfree = cli_req.dict.dict_val; } + } + rsp_dict = dict_new(); + if (!rsp_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); ret = -1; - if (!gf_xdr_to_cli_start_vol_req (req->msg[0], &cli_req)) { - //failed to decode msg; - req->rpc_err = GARBAGE_ARGS; - goto out; - } - - gf_log ("glusterd", GF_LOG_INFO, "Received start vol req" - "for volume %s", cli_req.volname); - - dict = dict_new (); - - if (!dict) - goto out; - - dup_volname = gf_strdup (cli_req.volname); - if (!dup_volname) - goto out; - - ret = dict_set_dynstr (dict, "volname", dup_volname); - if (ret) - goto out; - - ret = dict_set_int32 (dict, "flags", cli_req.flags); - if (ret) - goto out; - ret = glusterd_op_begin (req, GD_OP_START_VOLUME, dict, _gf_true); - - gf_cmd_log ("volume start","on volname: %s %s", cli_req.volname, - ((ret == 0) ? "SUCCESS": "FAILED")); - + goto out; + } + + uuid_utoa_r(MY_UUID, uuid_str); + ret = dict_set_strn(rsp_dict, "uuid", SLEN("uuid"), uuid_str); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set uuid in " + "dictionary."); + goto out; + } + + ret = dict_allocate_and_serialize(rsp_dict, &rsp.dict.dict_val, + &rsp.dict.dict_len); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); + goto out; + } + ret = 0; out: - if (ret && dict) - dict_unref (dict); - if (cli_req.volname) - free (cli_req.volname); //its malloced by xdr + if (ret) { + rsp.op_ret = -1; + if (err_str[0] == '\0') + snprintf(err_str, sizeof(err_str), + "Operation " + "failed"); + rsp.op_errstr = err_str; - glusterd_friend_sm (); - glusterd_op_sm (); + } else { + rsp.op_errstr = ""; + } - if (ret) { - ret = glusterd_op_send_cli_response (cli_op, ret, 0, req, - NULL, "operation failed"); - if (!lock_fail) - (void) glusterd_opinfo_unlock (); + glusterd_to_cli(req, &rsp, NULL, 0, NULL, (xdrproc_t)xdr_gf_cli_rsp, dict); - } + if (rsp_dict) + dict_unref(rsp_dict); + GF_FREE(rsp.dict.dict_val); - return ret; + return 0; +} +int +glusterd_handle_cli_uuid_get(rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler(req, __glusterd_handle_cli_uuid_get); } - int -glusterd_handle_cli_stop_volume (rpcsvc_request_t *req) +__glusterd_handle_cli_list_volume(rpcsvc_request_t *req) { - int32_t ret = -1; - gf1_cli_stop_vol_req cli_req = {0,}; - int lock_fail = 0; - char *dup_volname = NULL; - dict_t *dict = NULL; - glusterd_op_t cli_op = GD_OP_STOP_VOLUME; + int ret = -1; + dict_t *dict = NULL; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *volinfo = NULL; + int count = 0; + char key[64] = { + 0, + }; + int keylen; + gf_cli_rsp rsp = { + 0, + }; + + GF_ASSERT(req); + + priv = THIS->private; + GF_ASSERT(priv); + + dict = dict_new(); + if (!dict) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); + goto out; + } + + cds_list_for_each_entry(volinfo, &priv->volumes, vol_list) + { + keylen = snprintf(key, sizeof(key), "volume%d", count); + ret = dict_set_strn(dict, key, keylen, volinfo->volname); + if (ret) + goto out; + count++; + } - GF_ASSERT (req); + ret = dict_set_int32n(dict, "count", SLEN("count"), count); + if (ret) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=count", NULL); + goto out; + } - ret = glusterd_op_set_cli_op (cli_op); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to set cli op: %d", - ret); - lock_fail = 1; - goto out; - } - - ret = -1; - if (!gf_xdr_to_cli_stop_vol_req (req->msg[0], &cli_req)) { - //failed to decode msg; - req->rpc_err = GARBAGE_ARGS; - goto out; - } + ret = dict_allocate_and_serialize(dict, &rsp.dict.dict_val, + &rsp.dict.dict_len); + if (ret) + goto out; - gf_log ("glusterd", GF_LOG_INFO, "Received stop vol req" - "for volume %s", cli_req.volname); + ret = 0; - dict = dict_new (); +out: + rsp.op_ret = ret; + if (ret) + rsp.op_errstr = "Error listing volumes"; + else + rsp.op_errstr = ""; - if (!dict) - goto out; + glusterd_submit_reply(req, &rsp, NULL, 0, NULL, (xdrproc_t)xdr_gf_cli_rsp); + ret = 0; - dup_volname = gf_strdup (cli_req.volname); - if (!dup_volname) - goto out; + if (dict) + dict_unref(dict); - ret = dict_set_dynstr (dict, "volname", dup_volname); - if (ret) - goto out; + GF_FREE(rsp.dict.dict_val); - ret = dict_set_int32 (dict, "flags", cli_req.flags); - if (ret) - goto out; + glusterd_friend_sm(); + glusterd_op_sm(); - ret = glusterd_op_begin (req, GD_OP_STOP_VOLUME, dict, _gf_true); - gf_cmd_log ("Volume stop","on volname: %s %s", cli_req.volname, - ((ret)?"FAILED":"SUCCESS")); + return ret; +} -out: - if (cli_req.volname) - free (cli_req.volname); //its malloced by xdr +int +glusterd_handle_cli_list_volume(rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler(req, __glusterd_handle_cli_list_volume); +} - glusterd_friend_sm (); - glusterd_op_sm (); +int32_t +glusterd_op_begin(rpcsvc_request_t *req, glusterd_op_t op, void *ctx, + char *err_str, size_t err_len) +{ + int ret = -1; - if (ret) { - if (dict) - dict_unref (dict); - ret = glusterd_op_send_cli_response (cli_op, ret, 0, req, - NULL, "operation failed"); - if (!lock_fail) - (void) glusterd_opinfo_unlock (); - } + ret = glusterd_op_txn_begin(req, op, ctx, err_str, err_len); - return ret; + return ret; } int -glusterd_handle_cli_delete_volume (rpcsvc_request_t *req) +__glusterd_handle_ganesha_cmd(rpcsvc_request_t *req) { - int lock_fail = 0; - int32_t ret = -1; - gf1_cli_delete_vol_req cli_req = {0,}; - glusterd_op_delete_volume_ctx_t *ctx = NULL; - glusterd_op_t cli_op = GD_OP_DELETE_VOLUME; - - GF_ASSERT (req); - - ret = glusterd_op_set_cli_op (cli_op); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to set cli op: %d", - ret); - lock_fail = 1; - goto out; + int32_t ret = -1; + gf_cli_req cli_req = {{ + 0, + }}; + dict_t *dict = NULL; + glusterd_op_t cli_op = GD_OP_GANESHA; + char *op_errstr = NULL; + char err_str[2048] = { + 0, + }; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + GF_ASSERT(req); + + ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + snprintf(err_str, sizeof(err_str), + "Failed to decode " + "request received from cli"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, "%s", + err_str); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (cli_req.dict.dict_len) { + /* Unserialize the dictionary */ + dict = dict_new(); + if (!dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, + NULL); + ret = -1; + goto out; } - ret = -1; - if (!gf_xdr_to_cli_delete_vol_req (req->msg[0], &cli_req)) { - //failed to decode msg; - req->rpc_err = GARBAGE_ARGS; - goto out; + ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + "failed to " + "unserialize req-buffer to dictionary"); + snprintf(err_str, sizeof(err_str), + "Unable to decode " + "the command"); + goto out; + } else { + dict->extra_stdfree = cli_req.dict.dict_val; } - gf_cmd_log ("Volume delete","on volname: %s attempted", cli_req.volname); + } - gf_log ("glusterd", GF_LOG_INFO, "Received delete vol req" - "for volume %s", cli_req.volname); + gf_msg_trace(this->name, 0, "Received global option request"); + ret = glusterd_op_begin_synctask(req, GD_OP_GANESHA, dict); +out: + if (ret) { + if (err_str[0] == '\0') + snprintf(err_str, sizeof(err_str), "Operation failed"); + ret = glusterd_op_send_cli_response(cli_op, ret, 0, req, dict, err_str); + } + if (op_errstr) + GF_FREE(op_errstr); + if (dict) + dict_unref(dict); + + return ret; +} - ctx = GF_CALLOC (1, sizeof (*ctx), gf_gld_mt_delete_volume_ctx_t); - if (!ctx) - goto out; - - strncpy (ctx->volume_name, cli_req.volname, GD_VOLUME_NAME_MAX); +int +glusterd_handle_ganesha_cmd(rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler(req, __glusterd_handle_ganesha_cmd); +} - ret = glusterd_op_begin (req, GD_OP_DELETE_VOLUME, ctx, _gf_true); - gf_cmd_log ("Volume delete", "on volname: %s %s", cli_req.volname, - ((ret) ? "FAILED" : "SUCCESS")); +static int +__glusterd_handle_reset_volume(rpcsvc_request_t *req) +{ + int32_t ret = -1; + gf_cli_req cli_req = {{ + 0, + }}; + dict_t *dict = NULL; + glusterd_op_t cli_op = GD_OP_RESET_VOLUME; + char *volname = NULL; + char err_str[64] = { + 0, + }; + xlator_t *this = NULL; + + GF_ASSERT(req); + this = THIS; + GF_ASSERT(this); + + gf_msg(this->name, GF_LOG_INFO, 0, 0, "Received reset vol req"); + + ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + snprintf(err_str, sizeof(err_str), + "Failed to decode request " + "received from cli"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, "%s", + err_str); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (cli_req.dict.dict_len) { + /* Unserialize the dictionary */ + dict = dict_new(); + + ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + "failed to " + "unserialize req-buffer to dictionary"); + snprintf(err_str, sizeof(err_str), + "Unable to decode " + "the command"); + goto out; + } else { + dict->extra_stdfree = cli_req.dict.dict_val; + } + } + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + snprintf(err_str, sizeof(err_str), + "Failed to get volume " + "name"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLNAME_NOTFOUND_IN_DICT, + "%s", err_str); + goto out; + } + gf_msg_debug(this->name, 0, + "Received volume reset request for " + "volume %s", + volname); + + ret = glusterd_op_begin_synctask(req, GD_OP_RESET_VOLUME, dict); out: - if (cli_req.volname) - free (cli_req.volname); //its malloced by xdr + if (ret) { + if (err_str[0] == '\0') + snprintf(err_str, sizeof(err_str), "Operation failed"); + ret = glusterd_op_send_cli_response(cli_op, ret, 0, req, dict, err_str); + } - glusterd_friend_sm (); - glusterd_op_sm (); - - if (ret) { - ret = glusterd_op_send_cli_response (cli_op, ret, 0, req, - NULL, "operation failed"); - if (!lock_fail) - (void) glusterd_opinfo_unlock (); - } + return ret; +} - return ret; +int +glusterd_handle_reset_volume(rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler(req, __glusterd_handle_reset_volume); } int -glusterd_handle_add_brick (rpcsvc_request_t *req) +__glusterd_handle_set_volume(rpcsvc_request_t *req) { - int32_t ret = -1; - gf1_cli_add_brick_req cli_req = {0,}; - dict_t *dict = NULL; - glusterd_brickinfo_t *brickinfo = NULL; - char *brick = NULL; - char *bricks = NULL; - char *volname = NULL; - int brick_count = 0; - char *tmpptr = NULL; - int i = 0; - char *brick_list = NULL; - void *cli_rsp = NULL; - char err_str[2048] = {0,}; - gf1_cli_add_brick_rsp rsp = {0,}; - glusterd_volinfo_t *volinfo = NULL; - glusterd_conf_t *priv = NULL; - xlator_t *this = NULL; - char *free_ptr = NULL; - glusterd_brickinfo_t *tmpbrkinfo = NULL; - glusterd_volinfo_t tmpvolinfo = {{0},}; - int lock_fail = 0; - glusterd_op_t cli_op = GD_OP_ADD_BRICK; + int32_t ret = -1; + gf_cli_req cli_req = {{ + 0, + }}; + dict_t *dict = NULL; + glusterd_op_t cli_op = GD_OP_SET_VOLUME; + char *key = NULL; + char *value = NULL; + char *volname = NULL; + char *op_errstr = NULL; + gf_boolean_t help = _gf_false; + char err_str[2048] = { + 0, + }; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + GF_ASSERT(req); + + ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + snprintf(err_str, sizeof(err_str), + "Failed to decode " + "request received from cli"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, "%s", + err_str); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (cli_req.dict.dict_len) { + /* Unserialize the dictionary */ + dict = dict_new(); + + ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_UNSERIALIZE_FAIL, + "failed to " + "unserialize req-buffer to dictionary"); + snprintf(err_str, sizeof(err_str), + "Unable to decode " + "the command"); + goto out; + } else { + dict->extra_stdfree = cli_req.dict.dict_val; + } + } + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + snprintf(err_str, sizeof(err_str), + "Failed to get volume " + "name while handling volume set command"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", + err_str); + goto out; + } + + if (strcmp(volname, "help") == 0 || strcmp(volname, "help-xml") == 0) { + ret = glusterd_volset_help(dict, &op_errstr); + help = _gf_true; + goto out; + } + + ret = dict_get_strn(dict, "key1", SLEN("key1"), &key); + if (ret) { + snprintf(err_str, sizeof(err_str), + "Failed to get key while" + " handling volume set for %s", + volname); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", + err_str); + goto out; + } + + ret = dict_get_strn(dict, "value1", SLEN("value1"), &value); + if (ret) { + snprintf(err_str, sizeof(err_str), + "Failed to get value while" + " handling volume set for %s", + volname); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", + err_str); + goto out; + } + gf_msg_debug(this->name, 0, + "Received volume set request for " + "volume %s", + volname); + + ret = glusterd_op_begin_synctask(req, GD_OP_SET_VOLUME, dict); - this = THIS; - GF_ASSERT(this); +out: + if (help) + ret = glusterd_op_send_cli_response(cli_op, ret, 0, req, dict, + (op_errstr) ? op_errstr : ""); + else if (ret) { + if (err_str[0] == '\0') + snprintf(err_str, sizeof(err_str), "Operation failed"); + ret = glusterd_op_send_cli_response(cli_op, ret, 0, req, dict, err_str); + } + if (op_errstr) + GF_FREE(op_errstr); + + return ret; +} - priv = this->private; +int +glusterd_handle_set_volume(rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler(req, __glusterd_handle_set_volume); +} - GF_ASSERT (req); +int +__glusterd_handle_sync_volume(rpcsvc_request_t *req) +{ + int32_t ret = -1; + gf_cli_req cli_req = {{ + 0, + }}; + dict_t *dict = NULL; + gf_cli_rsp cli_rsp = {0.}; + char msg[2048] = { + 0, + }; + char *volname = NULL; + gf1_cli_sync_volume flags = 0; + char *hostname = NULL; + xlator_t *this = NULL; + + GF_ASSERT(req); + this = THIS; + GF_ASSERT(this); + + ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + // failed to decode msg; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, "%s", + "Failed to decode " + "request received from cli"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (cli_req.dict.dict_len) { + /* Unserialize the dictionary */ + dict = dict_new(); + + ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + "failed to " + "unserialize req-buffer to dictionary"); + snprintf(msg, sizeof(msg), + "Unable to decode the " + "command"); + goto out; + } else { + dict->extra_stdfree = cli_req.dict.dict_val; + } + } - INIT_LIST_HEAD (&tmpvolinfo.bricks); + ret = dict_get_strn(dict, "hostname", SLEN("hostname"), &hostname); + if (ret) { + snprintf(msg, sizeof(msg), "Failed to get hostname"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_HOSTNAME_NOTFOUND_IN_DICT, + "%s", msg); + goto out; + } - ret = glusterd_op_set_cli_op (cli_op); + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + ret = dict_get_int32n(dict, "flags", SLEN("flags"), (int32_t *)&flags); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to set cli op: %d", - ret); - lock_fail = 1; - snprintf (err_str, sizeof (err_str), "Another operation is in " - "progress, please retry after some time"); - goto out; + snprintf(msg, sizeof(msg), "Failed to get volume name or flags"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FLAGS_NOTFOUND_IN_DICT, + "%s", msg); + goto out; } + } - ret = -1; - if (!gf_xdr_to_cli_add_brick_req (req->msg[0], &cli_req)) { - //failed to decode msg; - req->rpc_err = GARBAGE_ARGS; - snprintf (err_str, sizeof (err_str), "Garbage args received"); - goto out; - } + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_VOL_SYNC_REQ_RCVD, + "Received volume sync req " + "for volume %s", + (flags & GF_CLI_SYNC_ALL) ? "all" : volname); - gf_cmd_log ("Volume add-brick", "on volname: %s attempted", - cli_req.volname); - gf_log ("glusterd", GF_LOG_INFO, "Received add brick req"); + if (gf_is_local_addr(hostname)) { + ret = -1; + snprintf(msg, sizeof(msg), + "sync from localhost" + " not allowed"); + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_SYNC_FROM_LOCALHOST_UNALLOWED, "%s", msg); + goto out; + } - if (cli_req.bricks.bricks_len) { - /* Unserialize the dictionary */ - dict = dict_new (); + ret = glusterd_op_begin_synctask(req, GD_OP_SYNC_VOLUME, dict); - ret = dict_unserialize (cli_req.bricks.bricks_val, - cli_req.bricks.bricks_len, - &dict); - if (ret < 0) { - gf_log ("glusterd", GF_LOG_ERROR, - "failed to " - "unserialize req-buffer to dictionary"); - snprintf (err_str, sizeof (err_str), "Unable to decode " - "the buffer"); - goto out; - } else { - dict->extra_stdfree = cli_req.bricks.bricks_val; - } - } +out: + if (ret) { + cli_rsp.op_ret = -1; + cli_rsp.op_errstr = msg; + if (msg[0] == '\0') + snprintf(msg, sizeof(msg), "Operation failed"); + glusterd_to_cli(req, &cli_rsp, NULL, 0, NULL, (xdrproc_t)xdr_gf_cli_rsp, + dict); + + ret = 0; // sent error to cli, prevent second reply + } + + return ret; +} - ret = dict_get_str (dict, "volname", &volname); +int +glusterd_handle_sync_volume(rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler(req, __glusterd_handle_sync_volume); +} - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); - snprintf (err_str, sizeof (err_str), "Unable to get volume " - "name"); - goto out; - } +int +glusterd_fsm_log_send_resp(rpcsvc_request_t *req, int op_ret, char *op_errstr, + dict_t *dict) +{ + int ret = -1; + gf1_cli_fsm_log_rsp rsp = {0}; - if (!(ret = glusterd_check_volume_exists (volname))) { - ret = -1; - snprintf(err_str, 2048, "Volume %s does not exist", volname); - gf_log ("glusterd", GF_LOG_ERROR, "%s", err_str); - goto out; - } + GF_ASSERT(req); + GF_ASSERT(op_errstr); - ret = dict_get_int32 (dict, "count", &brick_count); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get count"); - snprintf (err_str, sizeof (err_str), "Unable to get volume " - "brick count"); - goto out; + rsp.op_ret = op_ret; + rsp.op_errstr = op_errstr; + if (rsp.op_ret == 0) { + ret = dict_allocate_and_serialize(dict, &rsp.fsm_log.fsm_log_val, + &rsp.fsm_log.fsm_log_len); + if (ret < 0) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); + return ret; } + } - if (!(ret = glusterd_volinfo_find (volname, &volinfo))) { - if (volinfo->type == GF_CLUSTER_TYPE_NONE) - goto brick_val; - if (!brick_count || !volinfo->sub_count) - goto brick_val; + ret = glusterd_submit_reply(req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gf1_cli_fsm_log_rsp); + GF_FREE(rsp.fsm_log.fsm_log_val); - /* If the brick count is less than sub_count then, allow add-brick only for - plain replicate volume since in plain stripe brick_count becoming less than - the sub_count is not allowed */ - if (volinfo->brick_count < volinfo->sub_count && (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) ) { - if ((volinfo->sub_count - volinfo->brick_count) == brick_count) - goto brick_val; - } + gf_msg_debug("glusterd", 0, "Responded, ret: %d", ret); - if ((brick_count % volinfo->sub_count) != 0) { - snprintf(err_str, 2048, "Incorrect number of bricks" - " supplied %d for type %s with count %d", - brick_count, (volinfo->type == 1)? "STRIPE": - "REPLICATE", volinfo->sub_count); - gf_log("glusterd", GF_LOG_ERROR, "%s", err_str); - ret = -1; - goto out; - } - } else { - snprintf (err_str, sizeof (err_str), "Unable to get volinfo " - "for volume name %s", volname); - gf_log ("glusterd", GF_LOG_ERROR, "%s", err_str); - goto out; - } + return 0; +} -brick_val: - ret = dict_get_str (dict, "bricks", &bricks); - if (ret) { - snprintf (err_str, sizeof (err_str), "Unable to get volume " - "bricks"); - gf_log ("glusterd", GF_LOG_ERROR, "%s", err_str); - goto out; - } +int +__glusterd_handle_fsm_log(rpcsvc_request_t *req) +{ + int32_t ret = -1; + gf1_cli_fsm_log_req cli_req = { + 0, + }; + dict_t *dict = NULL; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + char msg[2048] = {0}; + glusterd_peerinfo_t *peerinfo = NULL; + + GF_ASSERT(req); + + this = THIS; + GF_VALIDATE_OR_GOTO("xlator", (this != NULL), out); + + ret = xdr_to_generic(req->msg[0], &cli_req, + (xdrproc_t)xdr_gf1_cli_fsm_log_req); + if (ret < 0) { + // failed to decode msg; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, + "Failed to decode " + "request received from client."); + req->rpc_err = GARBAGE_ARGS; + snprintf(msg, sizeof(msg), "Garbage request"); + goto out; + } + + dict = dict_new(); + if (!dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); + ret = -1; + goto out; + } - if (bricks) - brick_list = gf_strdup (bricks); - if (!brick_list) { - ret = -1; - snprintf (err_str, sizeof (err_str), "Out of memory"); - goto out; + if (strcmp("", cli_req.name) == 0) { + conf = this->private; + ret = glusterd_sm_tr_log_add_to_dict(dict, &conf->op_sm_log); + } else { + RCU_READ_LOCK; + + peerinfo = glusterd_peerinfo_find_by_hostname(cli_req.name); + if (!peerinfo) { + RCU_READ_UNLOCK; + ret = -1; + snprintf(msg, sizeof(msg), "%s is not a peer", cli_req.name); } else { - free_ptr = brick_list; - } - - gf_cmd_log ("Volume add-brick", "volname: %s type %s count:%d bricks:%s" - ,volname, ((volinfo->type == 0)? "DEFAULT" : ((volinfo->type - == 1)? "STRIPE": "REPLICATE")), brick_count, brick_list); - - - while ( i < brick_count) { - i++; - brick= strtok_r (brick_list, " \n", &tmpptr); - brick_list = tmpptr; - brickinfo = NULL; - ret = glusterd_brickinfo_from_brick (brick, &brickinfo); - if (ret) { - snprintf (err_str, sizeof (err_str), "Unable to get " - "brick info from brick %s", brick); - goto out; - } - ret = glusterd_new_brick_validate (brick, brickinfo, err_str, - sizeof (err_str)); - if (ret) - goto out; - ret = glusterd_volume_brickinfo_get (brickinfo->uuid, - brickinfo->hostname, - brickinfo->path, - &tmpvolinfo, &tmpbrkinfo); - if (!ret) { - ret = -1; - snprintf (err_str, sizeof (err_str), "Brick: %s:%s, %s" - " in the arguments mean the same", - tmpbrkinfo->hostname, tmpbrkinfo->path, - brick); - goto out; - } - list_add_tail (&brickinfo->brick_list, &tmpvolinfo.bricks); - brickinfo = NULL; + ret = glusterd_sm_tr_log_add_to_dict(dict, &peerinfo->sm_log); + RCU_READ_UNLOCK; } - - ret = glusterd_op_begin (req, GD_OP_ADD_BRICK, dict, _gf_true); - gf_cmd_log ("Volume add-brick","on volname: %s %s", volname, - (ret != 0)? "FAILED" : "SUCCESS"); + } out: - if (ret) { - if (dict) - dict_unref (dict); - rsp.op_ret = -1; - rsp.op_errno = 0; - rsp.volname = ""; - if (err_str[0] == '\0') - snprintf (err_str, sizeof (err_str), "Operation failed"); - rsp.op_errstr = err_str; - cli_rsp = &rsp; - glusterd_submit_reply(req, cli_rsp, NULL, 0, NULL, - gf_xdr_serialize_cli_add_brick_rsp); - if (!lock_fail) - glusterd_opinfo_unlock(); - ret = 0; //sent error to cli, prevent second reply - } - - glusterd_friend_sm (); - glusterd_op_sm (); - - if (free_ptr) - GF_FREE (free_ptr); - glusterd_volume_brickinfos_delete (&tmpvolinfo); - if (brickinfo) - glusterd_brickinfo_delete (brickinfo); - if (cli_req.volname) - free (cli_req.volname); //its malloced by xdr + (void)glusterd_fsm_log_send_resp(req, ret, msg, dict); + free(cli_req.name); // malloced by xdr + if (dict) + dict_unref(dict); - return ret; + glusterd_friend_sm(); + glusterd_op_sm(); + + return 0; // send 0 to avoid double reply } int -glusterd_handle_replace_brick (rpcsvc_request_t *req) +glusterd_handle_fsm_log(rpcsvc_request_t *req) { - int32_t ret = -1; - gf1_cli_replace_brick_req cli_req = {0,}; - dict_t *dict = NULL; - char *src_brick = NULL; - char *dst_brick = NULL; - int32_t op = 0; - char operation[256]; - int lock_fail = 0; - glusterd_op_t cli_op = GD_OP_REPLACE_BRICK; + return glusterd_big_locked_handler(req, __glusterd_handle_fsm_log); +} - GF_ASSERT (req); +int +glusterd_op_lock_send_resp(rpcsvc_request_t *req, int32_t status) +{ + gd1_mgmt_cluster_lock_rsp rsp = { + {0}, + }; + int ret = -1; - ret = glusterd_op_set_cli_op (cli_op); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to set cli op: %d", - ret); - lock_fail = 1; - goto out; - } + GF_ASSERT(req); + glusterd_get_uuid(&rsp.uuid); + rsp.op_ret = status; - ret = -1; - if (!gf_xdr_to_cli_replace_brick_req (req->msg[0], &cli_req)) { - //failed to decode msg; - req->rpc_err = GARBAGE_ARGS; - goto out; - } + ret = glusterd_submit_reply(req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_cluster_lock_rsp); - gf_log ("glusterd", GF_LOG_INFO, "Received replace brick req"); + gf_msg_debug(THIS->name, 0, "Responded to lock, ret: %d", ret); - if (cli_req.bricks.bricks_len) { - /* Unserialize the dictionary */ - dict = dict_new (); + return 0; +} - ret = dict_unserialize (cli_req.bricks.bricks_val, - cli_req.bricks.bricks_len, - &dict); - if (ret < 0) { - gf_log ("glusterd", GF_LOG_ERROR, - "failed to " - "unserialize req-buffer to dictionary"); - goto out; - } else { - dict->extra_stdfree = cli_req.bricks.bricks_val; - } - } +int +glusterd_op_unlock_send_resp(rpcsvc_request_t *req, int32_t status) +{ + gd1_mgmt_cluster_unlock_rsp rsp = { + {0}, + }; + int ret = -1; - ret = dict_get_int32 (dict, "operation", &op); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "dict_get on operation failed"); - goto out; - } + GF_ASSERT(req); + rsp.op_ret = status; + glusterd_get_uuid(&rsp.uuid); - ret = dict_get_str (dict, "src-brick", &src_brick); + ret = glusterd_submit_reply(req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_cluster_unlock_rsp); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get src brick"); - goto out; - } - gf_log ("", GF_LOG_DEBUG, - "src brick=%s", src_brick); + gf_msg_debug(THIS->name, 0, "Responded to unlock, ret: %d", ret); - ret = dict_get_str (dict, "dst-brick", &dst_brick); + return ret; +} - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get dest brick"); - goto out; - } +int +glusterd_op_mgmt_v3_lock_send_resp(rpcsvc_request_t *req, uuid_t *txn_id, + int32_t status) +{ + gd1_mgmt_v3_lock_rsp rsp = { + {0}, + }; + int ret = -1; - gf_log ("", GF_LOG_DEBUG, - "dst brick=%s", dst_brick); + GF_ASSERT(req); + GF_ASSERT(txn_id); + glusterd_get_uuid(&rsp.uuid); + rsp.op_ret = status; + if (rsp.op_ret) + rsp.op_errno = errno; + gf_uuid_copy(rsp.txn_id, *txn_id); - switch (op) { - case GF_REPLACE_OP_START: strcpy (operation, "start"); - break; - case GF_REPLACE_OP_COMMIT: strcpy (operation, "commit"); - break; - case GF_REPLACE_OP_PAUSE: strcpy (operation, "pause"); - break; - case GF_REPLACE_OP_ABORT: strcpy (operation, "abort"); - break; - case GF_REPLACE_OP_STATUS: strcpy (operation, "status"); - break; - case GF_REPLACE_OP_COMMIT_FORCE: strcpy (operation, "commit-force"); - break; - default:strcpy (operation, "unknown"); - break; - } + ret = glusterd_submit_reply(req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_v3_lock_rsp); - gf_log ("glusterd", GF_LOG_INFO, "Received replace brick %s request", operation); - gf_cmd_log ("Volume replace-brick","volname: %s src_brick:%s" - " dst_brick:%s op:%s",cli_req.volname, src_brick, dst_brick - ,operation); + gf_msg_debug(THIS->name, 0, "Responded to mgmt_v3 lock, ret: %d", ret); - ret = glusterd_op_begin (req, GD_OP_REPLACE_BRICK, dict, _gf_true); - gf_cmd_log ("Volume replace-brick","on volname: %s %s", cli_req.volname, - (ret) ? "FAILED" : "SUCCESS"); + return ret; +} -out: - if (ret && dict) - dict_unref (dict); - if (cli_req.volname) - free (cli_req.volname);//malloced by xdr +int +glusterd_op_mgmt_v3_unlock_send_resp(rpcsvc_request_t *req, uuid_t *txn_id, + int32_t status) +{ + gd1_mgmt_v3_unlock_rsp rsp = { + {0}, + }; + int ret = -1; - glusterd_friend_sm (); - glusterd_op_sm (); + GF_ASSERT(req); + GF_ASSERT(txn_id); + rsp.op_ret = status; + if (rsp.op_ret) + rsp.op_errno = errno; + glusterd_get_uuid(&rsp.uuid); + gf_uuid_copy(rsp.txn_id, *txn_id); - if (ret) { - ret = glusterd_op_send_cli_response (cli_op, ret, 0, req, - NULL, "operation failed"); - if (!lock_fail) - (void) glusterd_opinfo_unlock (); + ret = glusterd_submit_reply(req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_v3_unlock_rsp); - } + gf_msg_debug(THIS->name, 0, "Responded to mgmt_v3 unlock, ret: %d", ret); - return ret; + return ret; } - - - int -glusterd_handle_reset_volume (rpcsvc_request_t *req) +__glusterd_handle_cluster_unlock(rpcsvc_request_t *req) { - int32_t ret = -1; - gf1_cli_reset_vol_req cli_req = {0,}; - dict_t *dict = NULL; - int lock_fail = 0; - glusterd_op_t cli_op = GD_OP_RESET_VOLUME; - - GF_ASSERT (req); - - ret = glusterd_op_set_cli_op (cli_op); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to set cli op: %d", - ret); - lock_fail = 1; - goto out; - } - + gd1_mgmt_cluster_unlock_req unlock_req = { + {0}, + }; + int32_t ret = -1; + glusterd_op_lock_ctx_t *ctx = NULL; + xlator_t *this = NULL; + uuid_t *txn_id = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + GF_ASSERT(req); + + txn_id = &priv->global_txn_id; + + ret = xdr_to_generic(req->msg[0], &unlock_req, + (xdrproc_t)xdr_gd1_mgmt_cluster_unlock_req); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, + "Failed to decode unlock " + "request received from peer"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + gf_msg_debug(this->name, 0, "Received UNLOCK from uuid: %s", + uuid_utoa(unlock_req.uuid)); + + RCU_READ_LOCK; + ret = (glusterd_peerinfo_find_by_uuid(unlock_req.uuid) == NULL); + RCU_READ_LOCK; + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_PEER_NOT_FOUND, + "%s doesn't " + "belong to the cluster. Ignoring request.", + uuid_utoa(unlock_req.uuid)); ret = -1; - if (!gf_xdr_to_cli_set_vol_req (req->msg[0], &cli_req)) { - //failed to decode msg; - req->rpc_err = GARBAGE_ARGS; - goto out; - } + goto out; + } - if (cli_req.dict.dict_len) { - /* Unserialize the dictionary */ - dict = dict_new (); + ctx = GF_CALLOC(1, sizeof(*ctx), gf_gld_mt_op_lock_ctx_t); - ret = dict_unserialize (cli_req.dict.dict_val, - cli_req.dict.dict_len, - &dict); - if (ret < 0) { - gf_log ("glusterd", GF_LOG_ERROR, "failed to " - "unserialize req-buffer to dictionary"); - goto out; - } else { - dict->extra_stdfree = cli_req.dict.dict_val; - } - } + if (!ctx) { + // respond here + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "No memory."); + return -1; + } + gf_uuid_copy(ctx->uuid, unlock_req.uuid); + ctx->req = req; + ctx->dict = NULL; - ret = glusterd_op_begin (req, GD_OP_RESET_VOLUME, dict, _gf_true); + ret = glusterd_op_sm_inject_event(GD_OP_EVENT_UNLOCK, txn_id, ctx); out: - if (cli_req.volname) - free (cli_req.volname);//malloced by xdr + glusterd_friend_sm(); + glusterd_op_sm(); - glusterd_friend_sm (); - glusterd_op_sm (); - if (ret) { - if (dict) - dict_unref (dict); - ret = glusterd_op_send_cli_response (cli_op, ret, 0, req, - NULL, "operation failed"); - if (!lock_fail) - (void) glusterd_opinfo_unlock (); - } + return ret; +} - return ret; +int +glusterd_handle_cluster_unlock(rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler(req, __glusterd_handle_cluster_unlock); } int -glusterd_handle_gsync_set (rpcsvc_request_t *req) +glusterd_op_stage_send_resp(rpcsvc_request_t *req, int32_t op, int32_t status, + char *op_errstr, dict_t *rsp_dict) { - int32_t ret = 0; - dict_t *dict = NULL; - gf1_cli_gsync_set_req cli_req = {{0},}; - int lock_fail = 0; - glusterd_op_t cli_op = GD_OP_GSYNC_SET; + gd1_mgmt_stage_op_rsp rsp = { + {0}, + }; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(req); + + rsp.op_ret = status; + glusterd_get_uuid(&rsp.uuid); + rsp.op = op; + if (op_errstr) + rsp.op_errstr = op_errstr; + else + rsp.op_errstr = ""; + + ret = dict_allocate_and_serialize(rsp_dict, &rsp.dict.dict_val, + &rsp.dict.dict_len); + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); + return ret; + } - GF_ASSERT (req); + ret = glusterd_submit_reply(req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_stage_op_rsp); - ret = glusterd_op_set_cli_op (cli_op); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to set cli op: %d", - ret); - lock_fail = 1; - goto out; - } + gf_msg_debug(this->name, 0, "Responded to stage, ret: %d", ret); + GF_FREE(rsp.dict.dict_val); - ret = -1; - if (!gf_xdr_to_cli_gsync_set_req (req->msg[0], &cli_req)) { - req->rpc_err = GARBAGE_ARGS; - goto out; - } + return ret; +} - if (cli_req.dict.dict_len) { - dict = dict_new (); - if (!dict) - goto out; +int +glusterd_op_commit_send_resp(rpcsvc_request_t *req, int32_t op, int32_t status, + char *op_errstr, dict_t *rsp_dict) +{ + gd1_mgmt_commit_op_rsp rsp = { + {0}, + }; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(req); + rsp.op_ret = status; + glusterd_get_uuid(&rsp.uuid); + rsp.op = op; + + if (op_errstr) + rsp.op_errstr = op_errstr; + else + rsp.op_errstr = ""; - ret = dict_unserialize (cli_req.dict.dict_val, - cli_req.dict.dict_len, - &dict); - if (ret < 0) { - gf_log ("glusterd", GF_LOG_ERROR, "failed to " - "unserialize req-buffer to dictionary"); - goto out; - } else { - dict->extra_stdfree = cli_req.dict.dict_val; - } + if (rsp_dict) { + ret = dict_allocate_and_serialize(rsp_dict, &rsp.dict.dict_val, + &rsp.dict.dict_len); + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); + goto out; } + } - ret = glusterd_op_begin (req, GD_OP_GSYNC_SET, dict, _gf_true); + ret = glusterd_submit_reply(req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_commit_op_rsp); -out: - glusterd_friend_sm (); - glusterd_op_sm (); + gf_msg_debug(this->name, 0, "Responded to commit, ret: %d", ret); - if (ret) { - if (dict) - dict_unref (dict); - ret = glusterd_op_send_cli_response (cli_op, ret, 0, req, - NULL, "operation failed"); - if (!lock_fail) - (void) glusterd_opinfo_unlock (); - } - return ret; +out: + GF_FREE(rsp.dict.dict_val); + return ret; } int -glusterd_handle_quota (rpcsvc_request_t *req) +__glusterd_handle_incoming_friend_req(rpcsvc_request_t *req) { - int32_t ret = -1; - gf1_cli_quota_req cli_req = {0,}; - dict_t *dict = NULL; - int lock_fail = 0; - glusterd_op_t cli_op = GD_OP_QUOTA; + int32_t ret = -1; + gd1_mgmt_friend_req friend_req = { + {0}, + }; + gf_boolean_t run_fsm = _gf_true; + + GF_ASSERT(req); + ret = xdr_to_generic(req->msg[0], &friend_req, + (xdrproc_t)xdr_gd1_mgmt_friend_req); + if (ret < 0) { + // failed to decode msg; + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, + "Failed to decode " + "request received from friend"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_PROBE_RCVD, + "Received probe from uuid: %s", uuid_utoa(friend_req.uuid)); + ret = glusterd_handle_friend_req(req, friend_req.uuid, friend_req.hostname, + friend_req.port, &friend_req); + + if (ret == GLUSTERD_CONNECTION_AWAITED) { + // fsm should be run after connection establishes + run_fsm = _gf_false; + ret = 0; + } - GF_ASSERT (req); +out: + free(friend_req.hostname); // malloced by xdr - ret = glusterd_op_set_cli_op (cli_op); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to set cli op: %d", - ret); - lock_fail = 1; - goto out; - } + if (run_fsm) { + glusterd_friend_sm(); + glusterd_op_sm(); + } - if (!gf_xdr_to_cli_quota_req (req->msg[0], &cli_req)) { - //failed to decode msg; - req->rpc_err = GARBAGE_ARGS; - goto out; - } + return ret; +} - if (cli_req.dict.dict_len) { - /* Unserialize the dictionary */ - dict = dict_new (); +int +glusterd_handle_incoming_friend_req(rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler(req, + __glusterd_handle_incoming_friend_req); +} - ret = dict_unserialize (cli_req.dict.dict_val, - cli_req.dict.dict_len, - &dict); - if (ret < 0) { - gf_log ("glusterd", GF_LOG_ERROR, "failed to " - "unserialize req-buffer to dictionary"); - goto out; - } else { - dict->extra_stdfree = cli_req.dict.dict_val; - } - } - ret = glusterd_op_begin (req, GD_OP_QUOTA, dict, _gf_true); +int +__glusterd_handle_incoming_unfriend_req(rpcsvc_request_t *req) +{ + int32_t ret = -1; + gd1_mgmt_friend_req friend_req = { + {0}, + }; + char remote_hostname[UNIX_PATH_MAX + 1] = { + 0, + }; + + GF_ASSERT(req); + ret = xdr_to_generic(req->msg[0], &friend_req, + (xdrproc_t)xdr_gd1_mgmt_friend_req); + if (ret < 0) { + // failed to decode msg; + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, + "Failed to decode " + "request received."); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_UNFRIEND_REQ_RCVD, + "Received unfriend from uuid: %s", uuid_utoa(friend_req.uuid)); + + ret = glusterd_remote_hostname_get(req, remote_hostname, + sizeof(remote_hostname)); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_HOSTNAME_RESOLVE_FAIL, + "Unable to get the remote hostname"); + goto out; + } + ret = glusterd_handle_unfriend_req(req, friend_req.uuid, remote_hostname, + friend_req.port); out: - glusterd_friend_sm (); - glusterd_op_sm (); + free(friend_req.hostname); // malloced by xdr + free(friend_req.vols.vols_val); // malloced by xdr - if (ret) { - if (dict) - dict_unref (dict); - ret = glusterd_op_send_cli_response (cli_op, ret, 0, req, - NULL, "operation failed"); - if (!lock_fail) - (void) glusterd_opinfo_unlock (); - } + glusterd_friend_sm(); + glusterd_op_sm(); - return ret; + return ret; } int -glusterd_handle_set_volume (rpcsvc_request_t *req) +glusterd_handle_incoming_unfriend_req(rpcsvc_request_t *req) { - int32_t ret = -1; - gf1_cli_set_vol_req cli_req = {0,}; - dict_t *dict = NULL; - int lock_fail = 0; - glusterd_op_t cli_op = GD_OP_SET_VOLUME; + return glusterd_big_locked_handler(req, + __glusterd_handle_incoming_unfriend_req); +} - GF_ASSERT (req); +int +glusterd_handle_friend_update_delete(dict_t *dict) +{ + char *hostname = NULL; + int32_t ret = -1; - ret = glusterd_op_set_cli_op (cli_op); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to set cli op: %d", - ret); - lock_fail = 1; - goto out; - } + GF_ASSERT(dict); - ret = -1; - if (!gf_xdr_to_cli_set_vol_req (req->msg[0], &cli_req)) { - //failed to decode msg; - req->rpc_err = GARBAGE_ARGS; - goto out; - } + ret = dict_get_strn(dict, "hostname", SLEN("hostname"), &hostname); + if (ret) + goto out; - if (cli_req.dict.dict_len) { - /* Unserialize the dictionary */ - dict = dict_new (); + ret = glusterd_friend_remove(NULL, hostname); - ret = dict_unserialize (cli_req.dict.dict_val, - cli_req.dict.dict_len, - &dict); - if (ret < 0) { - gf_log ("glusterd", GF_LOG_ERROR, - "failed to " - "unserialize req-buffer to dictionary"); - goto out; - } else { - dict->extra_stdfree = cli_req.dict.dict_val; - } - } +out: + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; +} - ret = glusterd_op_begin (req, GD_OP_SET_VOLUME, dict, _gf_true); +int +glusterd_peer_hostname_update(glusterd_peerinfo_t *peerinfo, + const char *hostname, gf_boolean_t store_update) +{ + int ret = 0; -out: - if (cli_req.volname) - free (cli_req.volname);//malloced by xdr + GF_ASSERT(peerinfo); + GF_ASSERT(hostname); - glusterd_friend_sm (); - glusterd_op_sm (); + ret = gd_add_address_to_peer(peerinfo, hostname); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, + GD_MSG_HOSTNAME_ADD_TO_PEERLIST_FAIL, + "Couldn't add address to the peer info"); + goto out; + } - if (ret) { - if (dict) - dict_unref (dict); - ret = glusterd_op_send_cli_response (cli_op, ret, 0, req, - NULL, "operation failed"); - if (!lock_fail) - (void) glusterd_opinfo_unlock (); - } - return ret; + if (store_update) + ret = glusterd_store_peerinfo(peerinfo); +out: + gf_msg_debug(THIS->name, 0, "Returning %d", ret); + return ret; } int -glusterd_handle_remove_brick (rpcsvc_request_t *req) -{ - int32_t ret = -1; - gf1_cli_remove_brick_req cli_req = {0,}; - dict_t *dict = NULL; - int32_t count = 0; - char *brick = NULL; - char key[256] = {0,}; - char *brick_list = NULL; - int i = 1; - glusterd_volinfo_t *volinfo = NULL; - glusterd_brickinfo_t *brickinfo = NULL; - int32_t pos = 0; - int32_t sub_volume = 0; - int32_t sub_volume_start = 0; - int32_t sub_volume_end = 0; - glusterd_brickinfo_t *tmp = NULL; - char err_str[2048] = {0}; - gf1_cli_remove_brick_rsp rsp = {0,}; - void *cli_rsp = NULL; - char vol_type[256] = {0,}; - int lock_fail = 0; - glusterd_op_t cli_op = GD_OP_REMOVE_BRICK; - - GF_ASSERT (req); - - ret = glusterd_op_set_cli_op (cli_op); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to set cli op: %d", - ret); - lock_fail = 1; - goto out; - } - +__glusterd_handle_friend_update(rpcsvc_request_t *req) +{ + int32_t ret = -1; + gd1_mgmt_friend_update friend_req = { + {0}, + }; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + gd1_mgmt_friend_update_rsp rsp = { + {0}, + }; + dict_t *dict = NULL; + char key[32] = { + 0, + }; + int keylen; + char *uuid_buf = NULL; + int i = 1; + int count = 0; + uuid_t uuid = { + 0, + }; + glusterd_peerctx_args_t args = {0}; + int32_t op = 0; + + GF_ASSERT(req); + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + ret = xdr_to_generic(req->msg[0], &friend_req, + (xdrproc_t)xdr_gd1_mgmt_friend_update); + if (ret < 0) { + // failed to decode msg; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, + "Failed to decode " + "request received"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + ret = 0; + RCU_READ_LOCK; + if (glusterd_peerinfo_find(friend_req.uuid, NULL) == NULL) { ret = -1; - if (!gf_xdr_to_cli_remove_brick_req (req->msg[0], &cli_req)) { - //failed to decode msg; - req->rpc_err = GARBAGE_ARGS; - goto out; - } - - gf_cmd_log ("Volume remove-brick","on volname: %s attempted",cli_req.volname); - gf_log ("glusterd", GF_LOG_INFO, "Received rem brick req"); + } + RCU_READ_UNLOCK; + if (ret) { + gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_REQ_FROM_UNKNOWN_PEER, + "Received friend update request " + "from unknown peer %s", + uuid_utoa(friend_req.uuid)); + gf_event(EVENT_UNKNOWN_PEER, "peer=%s", uuid_utoa(friend_req.uuid)); + goto out; + } + + gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_FRIEND_UPDATE_RCVD, + "Received friend update from uuid: %s", uuid_utoa(friend_req.uuid)); + + if (friend_req.friends.friends_len) { + /* Unserialize the dictionary */ + dict = dict_new(); + + ret = dict_unserialize(friend_req.friends.friends_val, + friend_req.friends.friends_len, &dict); + if (ret < 0) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + "failed to " + "unserialize req-buffer to dictionary"); + goto out; + } else { + dict->extra_stdfree = friend_req.friends.friends_val; + } + } + + ret = dict_get_int32n(dict, "count", SLEN("count"), &count); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=count", NULL); + goto out; + } + + ret = dict_get_int32n(dict, "op", SLEN("op"), &op); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=op", NULL); + goto out; + } + + if (GD_FRIEND_UPDATE_DEL == op) { + (void)glusterd_handle_friend_update_delete(dict); + goto out; + } + + args.mode = GD_MODE_ON; + while (i <= count) { + keylen = snprintf(key, sizeof(key), "friend%d.uuid", i); + ret = dict_get_strn(dict, key, keylen, &uuid_buf); + if (ret) + goto out; + gf_uuid_parse(uuid_buf, uuid); + + if (!gf_uuid_compare(uuid, MY_UUID)) { + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_UUID_RECEIVED, + "Received my uuid as Friend"); + i++; + continue; + } + + snprintf(key, sizeof(key), "friend%d", i); + + RCU_READ_LOCK; + peerinfo = glusterd_peerinfo_find(uuid, NULL); + if (peerinfo == NULL) { + /* Create a new peer and add it to the list as there is + * no existing peer with the uuid + */ + peerinfo = gd_peerinfo_from_dict(dict, key); + if (peerinfo == NULL) { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PEERINFO_CREATE_FAIL, + "Could not create peerinfo from dict " + "for prefix %s", + key); + goto unlock; + } + + /* As this is a new peer, it should be added as a + * friend. The friend state machine will take care of + * correcting the state as required + */ + peerinfo->state.state = GD_FRIEND_STATE_BEFRIENDED; + + ret = glusterd_friend_add_from_peerinfo(peerinfo, 0, &args); + } else { + /* As an existing peer was found, update it with the new + * information + */ + ret = gd_update_peerinfo_from_dict(peerinfo, dict, key); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_PEER_INFO_UPDATE_FAIL, + "Failed to " + "update peer %s", + peerinfo->hostname); + goto unlock; + } + ret = glusterd_store_peerinfo(peerinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PEERINFO_CREATE_FAIL, + "Failed to store peerinfo"); + gf_event(EVENT_PEER_STORE_FAILURE, "peer=%s", + peerinfo->hostname); + } + } + unlock: + RCU_READ_UNLOCK; + if (ret) + break; - if (cli_req.bricks.bricks_len) { - /* Unserialize the dictionary */ - dict = dict_new (); + peerinfo = NULL; + i++; + } - ret = dict_unserialize (cli_req.bricks.bricks_val, - cli_req.bricks.bricks_len, - &dict); - if (ret < 0) { - gf_log ("glusterd", GF_LOG_ERROR, - "failed to " - "unserialize req-buffer to dictionary"); - goto out; - } else { - dict->extra_stdfree = cli_req.bricks.bricks_val; - } - } +out: + gf_uuid_copy(rsp.uuid, MY_UUID); + ret = glusterd_submit_reply(req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_friend_update_rsp); + if (dict) { + if (!dict->extra_stdfree && friend_req.friends.friends_val) + free(friend_req.friends.friends_val); // malloced by xdr + dict_unref(dict); + } else { + free(friend_req.friends.friends_val); // malloced by xdr + } + + if (peerinfo) + glusterd_peerinfo_cleanup(peerinfo); + + glusterd_friend_sm(); + glusterd_op_sm(); + + return ret; +} - ret = dict_get_int32 (dict, "count", &count); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get count"); - goto out; - } +int +glusterd_handle_friend_update(rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler(req, __glusterd_handle_friend_update); +} - ret = glusterd_volinfo_find (cli_req.volname, &volinfo); +int +__glusterd_handle_probe_query(rpcsvc_request_t *req) +{ + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + gd1_mgmt_probe_req probe_req = { + {0}, + }; + gd1_mgmt_probe_rsp rsp = { + {0}, + }; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_peerctx_args_t args = {0}; + int port = 0; + char remote_hostname[UNIX_PATH_MAX + 1] = { + 0, + }; + + GF_ASSERT(req); + + this = THIS; + GF_VALIDATE_OR_GOTO("xlator", (this != NULL), out); + + ret = xdr_to_generic(req->msg[0], &probe_req, + (xdrproc_t)xdr_gd1_mgmt_probe_req); + if (ret < 0) { + // failed to decode msg; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, + "Failed to decode probe " + "request"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + conf = this->private; + if (probe_req.port) + port = probe_req.port; + else + port = GF_DEFAULT_BASE_PORT; + + gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_PROBE_RCVD, + "Received probe from uuid: %s", uuid_utoa(probe_req.uuid)); + + /* Check for uuid collision and handle it in a user friendly way by + * sending the error. + */ + if (!gf_uuid_compare(probe_req.uuid, MY_UUID)) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_UUIDS_SAME_RETRY, + "Peer uuid %s is same as " + "local uuid. Please check the uuid of both the peers " + "from %s/%s", + uuid_utoa(probe_req.uuid), GLUSTERD_DEFAULT_WORKDIR, + GLUSTERD_INFO_FILE); + rsp.op_ret = -1; + rsp.op_errno = GF_PROBE_SAME_UUID; + rsp.port = port; + goto respond; + } + + ret = glusterd_remote_hostname_get(req, remote_hostname, + sizeof(remote_hostname)); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_HOSTNAME_RESOLVE_FAIL, + "Unable to get the remote hostname"); + goto out; + } + + RCU_READ_LOCK; + peerinfo = glusterd_peerinfo_find(probe_req.uuid, remote_hostname); + if ((peerinfo == NULL) && (!cds_list_empty(&conf->peers))) { + rsp.op_ret = -1; + rsp.op_errno = GF_PROBE_ANOTHER_CLUSTER; + } else if (peerinfo == NULL) { + gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_PEER_NOT_FOUND, + "Unable to find peerinfo" + " for host: %s (%d)", + remote_hostname, port); + args.mode = GD_MODE_ON; + ret = glusterd_friend_add(remote_hostname, port, + GD_FRIEND_STATE_PROBE_RCVD, NULL, &peerinfo, + 0, &args); if (ret) { - snprintf (err_str, 2048, "Volume %s does not exist", - cli_req.volname); - gf_log ("", GF_LOG_ERROR, "%s", err_str); - goto out; - } - - if (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) - strcpy (vol_type, "replica"); - else if (volinfo->type == GF_CLUSTER_TYPE_STRIPE) - strcpy (vol_type, "stripe"); - else - strcpy (vol_type, "distribute"); - - /* Do not allow remove-brick if the volume is plain stripe */ - if ((volinfo->type == GF_CLUSTER_TYPE_STRIPE) && (volinfo->brick_count == volinfo->sub_count)) { - snprintf (err_str, 2048, "Removing brick from a plain stripe is not allowed"); - gf_log ("glusterd", GF_LOG_ERROR, "%s", err_str); - ret = -1; - goto out; - } - - /* Do not allow remove-brick if the bricks given is less than the replica count - or stripe count */ - if (((volinfo->type == GF_CLUSTER_TYPE_REPLICATE) || (volinfo->type == GF_CLUSTER_TYPE_STRIPE)) - && !(volinfo->brick_count <= volinfo->sub_count)) { - if (volinfo->sub_count && (count % volinfo->sub_count != 0)) { - snprintf (err_str, 2048, "Remove brick incorrect" - " brick count of %d for %s %d", - count, vol_type, volinfo->sub_count); - gf_log ("", GF_LOG_ERROR, "%s", err_str); - ret = -1; - goto out; - } + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_PEER_ADD_FAIL, + "Failed to add peer %s", remote_hostname); + rsp.op_errno = GF_PROBE_ADD_FAILED; } + } + RCU_READ_UNLOCK; - brick_list = GF_MALLOC (120000 * sizeof(*brick_list),gf_common_mt_char); - - if (!brick_list) { - ret = -1; - goto out; - } +respond: + gf_uuid_copy(rsp.uuid, MY_UUID); - strcpy (brick_list, " "); - while ( i <= count) { - snprintf (key, 256, "brick%d", i); - ret = dict_get_str (dict, key, &brick); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get %s", key); - goto out; - } - gf_log ("", GF_LOG_DEBUG, "Remove brick count %d brick: %s", - i, brick); + rsp.hostname = probe_req.hostname; + rsp.op_errstr = ""; - ret = glusterd_volume_brickinfo_get_by_brick(brick, volinfo, &brickinfo); - if (ret) { - snprintf(err_str, 2048,"Incorrect brick %s for volume" - " %s", brick, cli_req.volname); - gf_log ("", GF_LOG_ERROR, "%s", err_str); - goto out; - } - strcat(brick_list, brick); - strcat(brick_list, " "); - - i++; - if ((volinfo->type == GF_CLUSTER_TYPE_NONE) || - (volinfo->brick_count <= volinfo->sub_count)) - continue; - - pos = 0; - list_for_each_entry (tmp, &volinfo->bricks, brick_list) { - - if ((!strcmp (tmp->hostname,brickinfo->hostname)) && - !strcmp (tmp->path, brickinfo->path)) { - gf_log ("", GF_LOG_INFO, "Found brick"); - if (!sub_volume && volinfo->sub_count) { - sub_volume = (pos / volinfo-> - sub_count) + 1; - sub_volume_start = volinfo->sub_count * - (sub_volume - 1); - sub_volume_end = (volinfo->sub_count * - sub_volume) -1 ; - } else { - if (pos < sub_volume_start || - pos >sub_volume_end) { - ret = -1; - snprintf(err_str, 2048,"Bricks" - " not from same subvol" - " for %s", vol_type); - gf_log ("",GF_LOG_ERROR, - "%s", err_str); - goto out; - } - } - break; - } - pos++; - } - } - gf_cmd_log ("Volume remove-brick","volname: %s count:%d bricks:%s", - cli_req.volname, count, brick_list); + glusterd_submit_reply(req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_probe_rsp); + ret = 0; - ret = glusterd_op_begin (req, GD_OP_REMOVE_BRICK, dict, _gf_true); - gf_cmd_log ("Volume remove-brick","on volname: %s %s",cli_req.volname, - (ret) ? "FAILED" : "SUCCESS"); + gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_RESPONSE_INFO, + "Responded to %s, op_ret: %d, " + "op_errno: %d, ret: %d", + remote_hostname, rsp.op_ret, rsp.op_errno, ret); out: - if (ret) { - if (dict) - dict_unref (dict); - rsp.op_ret = -1; - rsp.op_errno = 0; - rsp.volname = ""; - if (err_str[0] == '\0') - snprintf (err_str, sizeof (err_str), "Operation failed"); - gf_log ("", GF_LOG_ERROR, "%s", err_str); - rsp.op_errstr = err_str; - cli_rsp = &rsp; - glusterd_submit_reply(req, cli_rsp, NULL, 0, NULL, - gf_xdr_serialize_cli_remove_brick_rsp); - if (!lock_fail) - glusterd_opinfo_unlock(); - - ret = 0; //sent error to cli, prevent second reply - - } - if (brick_list) - GF_FREE (brick_list); - if (cli_req.volname) - free (cli_req.volname); //its malloced by xdr - - glusterd_friend_sm (); - glusterd_op_sm (); + free(probe_req.hostname); // malloced by xdr - return ret; + glusterd_friend_sm(); + glusterd_op_sm(); + + return ret; } int -glusterd_handle_log_filename (rpcsvc_request_t *req) +glusterd_handle_probe_query(rpcsvc_request_t *req) { - int32_t ret = -1; - gf1_cli_log_filename_req cli_req = {0,}; - dict_t *dict = NULL; - int lock_fail = 0; - glusterd_op_t cli_op = GD_OP_LOG_FILENAME; - - GF_ASSERT (req); - - ret = glusterd_op_set_cli_op (cli_op); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to set cli op: %d", - ret); - lock_fail = 1; - goto out; - } - - ret = -1; - if (!gf_xdr_to_cli_log_filename_req (req->msg[0], &cli_req)) { - //failed to decode msg; - req->rpc_err = GARBAGE_ARGS; - goto out; - } - - gf_log ("glusterd", GF_LOG_INFO, "Received log filename req " - "for volume %s", cli_req.volname); - - dict = dict_new (); - if (!dict) - goto out; - - ret = dict_set_dynmstr (dict, "volname", cli_req.volname); - if (ret) - goto out; - ret = dict_set_dynmstr (dict, "brick", cli_req.brick); - if (ret) - goto out; - ret = dict_set_dynmstr (dict, "path", cli_req.path); - if (ret) - goto out; + return glusterd_big_locked_handler(req, __glusterd_handle_probe_query); +} - ret = glusterd_op_begin (req, GD_OP_LOG_FILENAME, dict, _gf_true); +int +__glusterd_handle_cli_profile_volume(rpcsvc_request_t *req) +{ + int32_t ret = -1; + gf_cli_req cli_req = {{ + 0, + }}; + dict_t *dict = NULL; + glusterd_op_t cli_op = GD_OP_PROFILE_VOLUME; + char *volname = NULL; + int32_t op = 0; + char err_str[64] = { + 0, + }; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + + GF_ASSERT(req); + this = THIS; + GF_ASSERT(this); + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, out); + + ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + // failed to decode msg; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, + "Failed to decode " + "request received from cli"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (cli_req.dict.dict_len > 0) { + dict = dict_new(); + if (!dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, + NULL); + goto out; + } + dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len, &dict); + } + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + snprintf(err_str, sizeof(err_str), + "Unable to get volume " + "name"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLNAME_NOTFOUND_IN_DICT, + "%s", err_str); + goto out; + } + + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_VOL_PROFILE_REQ_RCVD, + "Received volume profile req " + "for volume %s", + volname); + ret = dict_get_int32n(dict, "op", SLEN("op"), &op); + if (ret) { + snprintf(err_str, sizeof(err_str), "Unable to get operation"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", + err_str); + goto out; + } + + if (conf->op_version < GD_OP_VERSION_6_0) { + gf_msg_debug(this->name, 0, + "The cluster is operating at " + "version less than %d. Falling back " + "to op-sm framework.", + GD_OP_VERSION_6_0); + ret = glusterd_op_begin(req, cli_op, dict, err_str, sizeof(err_str)); + glusterd_friend_sm(); + glusterd_op_sm(); + } else { + ret = glusterd_mgmt_v3_initiate_all_phases_with_brickop_phase( + req, cli_op, dict); + } out: - if (ret && dict) - dict_unref (dict); - - glusterd_friend_sm (); - glusterd_op_sm (); + free(cli_req.dict.dict_val); - if (ret) { - ret = glusterd_op_send_cli_response (cli_op, ret, 0, req, - NULL, "operation failed"); - if (!lock_fail) - (void) glusterd_opinfo_unlock (); - - } + if (ret) { + if (err_str[0] == '\0') + snprintf(err_str, sizeof(err_str), "Operation failed"); + ret = glusterd_op_send_cli_response(cli_op, ret, 0, req, dict, err_str); + } - return ret; + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; } int -glusterd_handle_log_locate (rpcsvc_request_t *req) +glusterd_handle_cli_profile_volume(rpcsvc_request_t *req) { - int32_t ret = -1; - gf1_cli_log_locate_req cli_req = {0,}; - gf1_cli_log_locate_rsp rsp = {0,}; - glusterd_conf_t *priv = NULL; - glusterd_volinfo_t *volinfo = NULL; - glusterd_brickinfo_t *brickinfo = NULL; - char tmp_str[PATH_MAX] = {0,}; - char *tmp_brick = NULL; - uint32_t found = 0; - glusterd_brickinfo_t *tmpbrkinfo = NULL; - int lock_fail = 0; - glusterd_op_t cli_op = GD_OP_LOG_LOCATE; + return glusterd_big_locked_handler(req, + __glusterd_handle_cli_profile_volume); +} - GF_ASSERT (req); +int +__glusterd_handle_getwd(rpcsvc_request_t *req) +{ + int32_t ret = -1; + gf1_cli_getwd_rsp rsp = { + 0, + }; + glusterd_conf_t *priv = NULL; - priv = THIS->private; + GF_ASSERT(req); - ret = glusterd_op_set_cli_op (cli_op); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to set cli op: %d", - ret); - lock_fail = 1; - goto out; - } + priv = THIS->private; + GF_ASSERT(priv); - ret = -1; - if (!gf_xdr_to_cli_log_locate_req (req->msg[0], &cli_req)) { - //failed to decode msg; - req->rpc_err = GARBAGE_ARGS; - goto out; - } + gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_GETWD_REQ_RCVD, + "Received getwd req"); - gf_log ("glusterd", GF_LOG_INFO, "Received log locate req " - "for volume %s", cli_req.volname); + rsp.wd = priv->workdir; - if (strchr (cli_req.brick, ':')) { - /* TODO: need to get info of only that brick and then - tell what is the exact location */ - tmp_brick = gf_strdup (cli_req.brick); - if (!tmp_brick) - goto out; + glusterd_submit_reply(req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gf1_cli_getwd_rsp); + ret = 0; - gf_log ("", GF_LOG_DEBUG, "brick : %s", cli_req.brick); - ret = glusterd_brickinfo_from_brick (tmp_brick, &tmpbrkinfo); - if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, - "Cannot get brickinfo from the brick"); - goto out; - } - } + glusterd_friend_sm(); + glusterd_op_sm(); - ret = glusterd_volinfo_find (cli_req.volname, &volinfo); - if (ret) { - rsp.path = "request sent on non-existent volume"; - goto out; - } + return ret; +} - list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { - if (tmpbrkinfo) { - ret = glusterd_resolve_brick (tmpbrkinfo); - if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, - "cannot resolve the brick"); - goto out; - } - if (uuid_compare (tmpbrkinfo->uuid, brickinfo->uuid) || strcmp (brickinfo->path, tmpbrkinfo->path)) - continue; - } +int +glusterd_handle_getwd(rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler(req, __glusterd_handle_getwd); +} - if (brickinfo->logfile) { - strcpy (tmp_str, brickinfo->logfile); - rsp.path = dirname (tmp_str); - found = 1; - } else { - snprintf (tmp_str, PATH_MAX, "%s/bricks/", - DEFAULT_LOG_FILE_DIRECTORY); - rsp.path = tmp_str; - found = 1; - } - break; +int +__glusterd_handle_mount(rpcsvc_request_t *req) +{ + gf1_cli_mount_req mnt_req = { + 0, + }; + gf1_cli_mount_rsp rsp = { + 0, + }; + dict_t *dict = NULL; + int ret = 0; + glusterd_conf_t *priv = NULL; + + GF_ASSERT(req); + priv = THIS->private; + + ret = xdr_to_generic(req->msg[0], &mnt_req, + (xdrproc_t)xdr_gf1_cli_mount_req); + if (ret < 0) { + // failed to decode msg; + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, + "Failed to decode mount " + "request received"); + req->rpc_err = GARBAGE_ARGS; + rsp.op_ret = -1; + rsp.op_errno = EINVAL; + goto out; + } + + gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_MOUNT_REQ_RCVD, + "Received mount req"); + + if (mnt_req.dict.dict_len) { + /* Unserialize the dictionary */ + dict = dict_new(); + + ret = dict_unserialize(mnt_req.dict.dict_val, mnt_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + "failed to " + "unserialize req-buffer to dictionary"); + rsp.op_ret = -1; + rsp.op_errno = -EINVAL; + goto out; + } else { + dict->extra_stdfree = mnt_req.dict.dict_val; } + } - if (!found) { - snprintf (tmp_str, PATH_MAX, "brick %s:%s does not exitst in the volume %s", - tmpbrkinfo->hostname, tmpbrkinfo->path, cli_req.volname); - rsp.path = tmp_str; - } + synclock_unlock(&priv->big_lock); + rsp.op_ret = glusterd_do_mount(mnt_req.label, dict, &rsp.path, + &rsp.op_errno); + synclock_lock(&priv->big_lock); - ret = 0; out: - if (tmp_brick) - GF_FREE (tmp_brick); - if (tmpbrkinfo) - glusterd_brickinfo_delete (tmpbrkinfo); - rsp.op_ret = ret; - if (!rsp.path) - rsp.path = "Operation failed"; + if (!rsp.path) + rsp.path = gf_strdup(""); - ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - gf_xdr_serialize_cli_log_locate_rsp); + glusterd_submit_reply(req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gf1_cli_mount_rsp); + ret = 0; - if (cli_req.brick) - free (cli_req.brick); //its malloced by xdr - if (cli_req.volname) - free (cli_req.volname); //its malloced by xdr + if (dict) + dict_unref(dict); - glusterd_friend_sm (); - glusterd_op_sm (); + GF_FREE(rsp.path); - if (!lock_fail) - (void) glusterd_opinfo_unlock (); + glusterd_friend_sm(); + glusterd_op_sm(); - return ret; + return ret; } int -glusterd_handle_log_rotate (rpcsvc_request_t *req) +glusterd_handle_mount(rpcsvc_request_t *req) { - int32_t ret = -1; - gf1_cli_log_rotate_req cli_req = {0,}; - dict_t *dict = NULL; - int lock_fail = 0; - glusterd_op_t cli_op = GD_OP_LOG_ROTATE; - - GF_ASSERT (req); - - ret = glusterd_op_set_cli_op (cli_op); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to set cli op: %d", - ret); - lock_fail = 1; - goto out; - } - - ret = -1; - if (!gf_xdr_to_cli_log_rotate_req (req->msg[0], &cli_req)) { - //failed to decode msg; - req->rpc_err = GARBAGE_ARGS; - goto out; - } + return glusterd_big_locked_handler(req, __glusterd_handle_mount); +} - gf_log ("glusterd", GF_LOG_INFO, "Received log rotate req " - "for volume %s", cli_req.volname); +int +__glusterd_handle_umount(rpcsvc_request_t *req) +{ + gf1_cli_umount_req umnt_req = { + 0, + }; + gf1_cli_umount_rsp rsp = { + 0, + }; + char *mountbroker_root = NULL; + char mntp[PATH_MAX] = { + 0, + }; + char *path = NULL; + runner_t runner = { + 0, + }; + int ret = 0; + xlator_t *this = THIS; + gf_boolean_t dir_ok = _gf_false; + char *pdir = NULL; + char *t = NULL; + glusterd_conf_t *priv = NULL; + + GF_ASSERT(req); + GF_ASSERT(this); + priv = this->private; + + ret = xdr_to_generic(req->msg[0], &umnt_req, + (xdrproc_t)xdr_gf1_cli_umount_req); + if (ret < 0) { + // failed to decode msg; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, + "Failed to decode umount" + "request"); + req->rpc_err = GARBAGE_ARGS; + rsp.op_ret = -1; + goto out; + } + + gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_UMOUNT_REQ_RCVD, + "Received umount req"); + + if (dict_get_strn(this->options, "mountbroker-root", + SLEN("mountbroker-root"), &mountbroker_root) != 0) { + rsp.op_errno = ENOENT; + goto out; + } + + /* check if it is allowed to umount path */ + path = gf_strdup(umnt_req.path); + if (!path) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED, NULL); + rsp.op_errno = ENOMEM; + goto out; + } + dir_ok = _gf_false; + pdir = dirname(path); + t = strtail(pdir, mountbroker_root); + if (t && *t == '/') { + t = strtail(++t, MB_HIVE); + if (t && !*t) + dir_ok = _gf_true; + } + GF_FREE(path); + if (!dir_ok) { + rsp.op_errno = EACCES; + goto out; + } + + synclock_unlock(&priv->big_lock); + + if (umnt_req.lazy) { + rsp.op_ret = gf_umount_lazy(this->name, umnt_req.path, 0); + } else { + runinit(&runner); + runner_add_args(&runner, _PATH_UMOUNT, umnt_req.path, NULL); + rsp.op_ret = runner_run(&runner); + } + + synclock_lock(&priv->big_lock); + if (rsp.op_ret == 0) { + if (realpath(umnt_req.path, mntp)) + sys_rmdir(mntp); + else { + rsp.op_ret = -1; + rsp.op_errno = errno; + } + if (sys_unlink(umnt_req.path) != 0) { + rsp.op_ret = -1; + rsp.op_errno = errno; + } + } - dict = dict_new (); - if (!dict) - goto out; +out: + if (rsp.op_errno) + rsp.op_ret = -1; - ret = dict_set_dynmstr (dict, "volname", cli_req.volname); - if (ret) - goto out; + glusterd_submit_reply(req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gf1_cli_umount_rsp); + ret = 0; - ret = dict_set_dynmstr (dict, "brick", cli_req.brick); - if (ret) - goto out; + glusterd_friend_sm(); + glusterd_op_sm(); - ret = dict_set_uint64 (dict, "rotate-key", (uint64_t)time (NULL)); - if (ret) - goto out; + return ret; +} - ret = glusterd_op_begin (req, GD_OP_LOG_ROTATE, dict, _gf_true); +int +glusterd_handle_umount(rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler(req, __glusterd_handle_umount); +} +int +glusterd_friend_remove(uuid_t uuid, char *hostname) +{ + int ret = -1; + glusterd_peerinfo_t *peerinfo = NULL; + + RCU_READ_LOCK; + + peerinfo = glusterd_peerinfo_find(uuid, hostname); + if (peerinfo == NULL) { + RCU_READ_UNLOCK; + goto out; + } + + ret = glusterd_friend_remove_cleanup_vols(peerinfo->uuid); + RCU_READ_UNLOCK; + if (ret) + gf_msg(THIS->name, GF_LOG_WARNING, 0, GD_MSG_VOL_CLEANUP_FAIL, + "Volumes cleanup failed"); + /* Giving up the critical section here as glusterd_peerinfo_cleanup must + * be called from outside a critical section + */ + ret = glusterd_peerinfo_cleanup(peerinfo); out: - if (ret && dict) - dict_unref (dict); - - glusterd_friend_sm (); - glusterd_op_sm (); + gf_msg_debug(THIS->name, 0, "returning %d", ret); + /* coverity[LOCK] */ + return ret; +} - if (ret) { - ret = glusterd_op_send_cli_response (cli_op, ret, 0, req, - NULL, "operation failed"); - if (!lock_fail) - (void) glusterd_opinfo_unlock (); +int +glusterd_rpc_create(struct rpc_clnt **rpc, dict_t *options, + rpc_clnt_notify_t notify_fn, void *notify_data, + gf_boolean_t force) +{ + struct rpc_clnt *new_rpc = NULL; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + GF_ASSERT(options); + GF_VALIDATE_OR_GOTO(this->name, rpc, out); + + if (force && rpc && *rpc) { + (void)rpc_clnt_unref(*rpc); + *rpc = NULL; + } + + /* TODO: is 32 enough? or more ? */ + new_rpc = rpc_clnt_new(options, this, this->name, 16); + if (!new_rpc) + goto out; + + ret = rpc_clnt_register_notify(new_rpc, notify_fn, notify_data); + if (ret) + goto out; + ret = rpc_clnt_start(new_rpc); +out: + if (ret) { + if (new_rpc) { + (void)rpc_clnt_unref(new_rpc); } + } else { + *rpc = new_rpc; + } - return ret; + gf_msg_debug(this->name, 0, "returning %d", ret); + return ret; } int -glusterd_handle_sync_volume (rpcsvc_request_t *req) +glusterd_transport_inet_options_build(dict_t *dict, const char *hostname, + int port, char *af) { - int32_t ret = -1; - gf1_cli_sync_volume_req cli_req = {0,}; - dict_t *dict = NULL; - gf1_cli_sync_volume_rsp cli_rsp = {0.}; - char msg[2048] = {0,}; - gf_boolean_t free_hostname = _gf_true; - gf_boolean_t free_volname = _gf_true; - glusterd_volinfo_t *volinfo = NULL; - int lock_fail = 0; - glusterd_op_t cli_op = GD_OP_SYNC_VOLUME; - - GF_ASSERT (req); + xlator_t *this = NULL; + int32_t interval = -1; + int32_t time = -1; + int32_t timeout = -1; + int ret = 0; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(dict); + GF_ASSERT(hostname); + + if (!port) + port = GLUSTERD_DEFAULT_PORT; + + /* Build default transport options */ + ret = rpc_transport_inet_options_build(dict, hostname, port, af); + if (ret) + goto out; + + /* Set frame-timeout to 10mins. Default timeout of 30 mins is too long + * when compared to 2 mins for cli timeout. This ensures users don't + * wait too long after cli timesout before being able to resume normal + * operations + */ + ret = dict_set_int32n(dict, "frame-timeout", SLEN("frame-timeout"), 600); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set frame-timeout"); + goto out; + } + + /* Set keepalive options */ + ret = dict_get_int32n(this->options, "transport.socket.keepalive-interval", + SLEN("transport.socket.keepalive-interval"), + &interval); + if (ret) { + gf_msg("glusterd", GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get socket keepalive-interval"); + } + ret = dict_get_int32n(this->options, "transport.socket.keepalive-time", + SLEN("transport.socket.keepalive-time"), &time); + if (ret) { + gf_msg("glusterd", GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get socket keepalive-time"); + } + ret = dict_get_int32n(this->options, "transport.tcp-user-timeout", + SLEN("transport.tcp-user-timeout"), &timeout); + if (ret) { + gf_msg("glusterd", GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get tcp-user-timeout"); + } + + if ((interval > 0) || (time > 0)) + ret = rpc_transport_keepalive_options_set(dict, interval, time, + timeout); +out: + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; +} - ret = glusterd_op_set_cli_op (cli_op); +int +glusterd_friend_rpc_create(xlator_t *this, glusterd_peerinfo_t *peerinfo, + glusterd_peerctx_args_t *args) +{ + dict_t *options = NULL; + int ret = -1; + glusterd_peerctx_t *peerctx = NULL; + data_t *data = NULL; + char *af = NULL; + + peerctx = GF_CALLOC(1, sizeof(*peerctx), gf_gld_mt_peerctx_t); + if (!peerctx) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); + goto out; + } + + options = dict_new(); + if (!options) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); + goto out; + } + + if (args) + peerctx->args = *args; + + gf_uuid_copy(peerctx->peerid, peerinfo->uuid); + peerctx->peername = gf_strdup(peerinfo->hostname); + peerctx->peerinfo_gen = peerinfo->generation; /* A peerinfos generation + number can be used to + uniquely identify a + peerinfo */ + + ret = dict_get_str(this->options, "transport.address-family", &af); + if (ret) + gf_log(this->name, GF_LOG_TRACE, + "option transport.address-family is not set in xlator options"); + ret = glusterd_transport_inet_options_build(options, peerinfo->hostname, + peerinfo->port, af); + if (ret) + goto out; + + /* + * For simulated multi-node testing, we need to make sure that we + * create our RPC endpoint with the same address that the peer would + * use to reach us. + */ + + if (this->options) { + data = dict_getn(this->options, "transport.socket.bind-address", + SLEN("transport.socket.bind-address")); + if (data) { + ret = dict_set_sizen(options, "transport.socket.source-addr", data); + } + data = dict_getn(this->options, "ping-timeout", SLEN("ping-timeout")); + if (data) { + ret = dict_set_sizen(options, "ping-timeout", data); + } + } + + /* Enable encryption for the client connection if management encryption + * is enabled + */ + if (this->ctx->secure_mgmt) { + ret = dict_set_nstrn(options, "transport.socket.ssl-enabled", + SLEN("transport.socket.ssl-enabled"), "on", + SLEN("on")); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to set cli op: %d", - ret); - lock_fail = 1; - goto out; - } - - ret = -1; - if (!gf_xdr_to_cli_sync_volume_req (req->msg[0], &cli_req)) { - //failed to decode msg; - req->rpc_err = GARBAGE_ARGS; - goto out; - } - gf_log ("glusterd", GF_LOG_INFO, "Received volume sync req " - "for volume %s", - (cli_req.flags & GF_CLI_SYNC_ALL) ? "all" : cli_req.volname); - - dict = dict_new (); - if (!dict) { - gf_log ("", GF_LOG_ERROR, "Can't allocate sync vol dict"); - goto out; - } + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "failed to set ssl-enabled in dict"); + goto out; + } + + this->ctx->ssl_cert_depth = glusterfs_read_secure_access_file(); + } + + ret = glusterd_rpc_create(&peerinfo->rpc, options, glusterd_peer_rpc_notify, + peerctx, _gf_false); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RPC_CREATE_FAIL, + "failed to create rpc for" + " peer %s", + peerinfo->hostname); + gf_event(EVENT_PEER_RPC_CREATE_FAILED, "peer=%s", peerinfo->hostname); + goto out; + } + peerctx = NULL; + ret = 0; +out: + if (options) + dict_unref(options); - if (!glusterd_is_local_addr (cli_req.hostname)) { - ret = -1; - snprintf (msg, sizeof (msg), "sync from localhost" - " not allowed"); - goto out; - } + GF_FREE(peerctx); + return ret; +} - ret = dict_set_dynmstr (dict, "hostname", cli_req.hostname); - if (ret) { - gf_log ("", GF_LOG_ERROR, "hostname set failed"); - snprintf (msg, sizeof (msg), "hostname set failed"); - goto out; +int +glusterd_friend_add(const char *hoststr, int port, + glusterd_friend_sm_state_t state, uuid_t *uuid, + glusterd_peerinfo_t **friend, gf_boolean_t restore, + glusterd_peerctx_args_t *args) +{ + int ret = 0; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + + this = THIS; + conf = this->private; + GF_ASSERT(conf); + GF_ASSERT(hoststr); + GF_ASSERT(friend); + + *friend = glusterd_peerinfo_new(state, uuid, hoststr, port); + if (*friend == NULL) { + ret = -1; + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_PEER_ADD_FAIL, NULL); + goto out; + } + + /* + * We can't add to the list after calling glusterd_friend_rpc_create, + * even if it succeeds, because by then the callback to take it back + * off and free might have happened already (notably in the case of an + * invalid peer name). That would mean we're adding something that had + * just been free, and we're likely to crash later. + */ + cds_list_add_tail_rcu(&(*friend)->uuid_list, &conf->peers); + + // restore needs to first create the list of peers, then create rpcs + // to keep track of quorum in race-free manner. In restore for each peer + // rpc-create calls rpc_notify when the friend-list is partially + // constructed, leading to wrong quorum calculations. + if (!restore) { + ret = glusterd_store_peerinfo(*friend); + if (ret == 0) { + ret = glusterd_friend_rpc_create(this, *friend, args); } else { - free_hostname = _gf_false; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PEERINFO_CREATE_FAIL, + "Failed to store peerinfo"); + gf_event(EVENT_PEER_STORE_FAILURE, "peer=%s", (*friend)->hostname); } + } - ret = dict_set_int32 (dict, "flags", cli_req.flags); - if (ret) { - gf_log ("", GF_LOG_ERROR, "volume flags set failed"); - snprintf (msg, sizeof (msg), "volume flags set failed"); - goto out; - } + if (ret) { + (void)glusterd_peerinfo_cleanup(*friend); + *friend = NULL; + } - if (!cli_req.flags) { - ret = glusterd_volinfo_find (cli_req.volname, &volinfo); - if (!ret) { - snprintf (msg, sizeof (msg), "please delete the " - "volume: %s before sync", cli_req.volname); - ret = -1; - goto out; - } +out: + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_CONNECT_RETURNED, + "connect returned %d", ret); + return ret; +} - ret = dict_set_dynmstr (dict, "volname", cli_req.volname); - if (ret) { - gf_log ("", GF_LOG_ERROR, "volume name set failed"); - snprintf (msg, sizeof (msg), "volume name set failed"); - goto out; - } else { - free_volname = _gf_false; - } +/* glusterd_friend_add_from_peerinfo() adds a new peer into the local friends + * list from a pre created @peerinfo object. It otherwise works similarly to + * glusterd_friend_add() + */ +int +glusterd_friend_add_from_peerinfo(glusterd_peerinfo_t *friend, + gf_boolean_t restore, + glusterd_peerctx_args_t *args) +{ + int ret = 0; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + + this = THIS; + conf = this->private; + GF_ASSERT(conf); + + GF_VALIDATE_OR_GOTO(this->name, (friend != NULL), out); + + /* + * We can't add to the list after calling glusterd_friend_rpc_create, + * even if it succeeds, because by then the callback to take it back + * off and free might have happened already (notably in the case of an + * invalid peer name). That would mean we're adding something that had + * just been free, and we're likely to crash later. + */ + cds_list_add_tail_rcu(&friend->uuid_list, &conf->peers); + + // restore needs to first create the list of peers, then create rpcs + // to keep track of quorum in race-free manner. In restore for each peer + // rpc-create calls rpc_notify when the friend-list is partially + // constructed, leading to wrong quorum calculations. + if (!restore) { + ret = glusterd_store_peerinfo(friend); + if (ret == 0) { + ret = glusterd_friend_rpc_create(this, friend, args); } else { - free_volname = _gf_false; - if (glusterd_volume_count_get ()) { - snprintf (msg, sizeof (msg), "please delete all the " - "volumes before full sync"); - ret = -1; - goto out; - } + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PEERINFO_CREATE_FAIL, + "Failed to store peerinfo"); + gf_event(EVENT_PEER_STORE_FAILURE, "peer=%s", friend->hostname); } - - ret = glusterd_op_begin (req, GD_OP_SYNC_VOLUME, dict, _gf_true); + } out: - if (ret) { - cli_rsp.op_ret = -1; - cli_rsp.op_errstr = msg; - if (msg[0] == '\0') - snprintf (msg, sizeof (msg), "Operation failed"); - glusterd_submit_reply(req, &cli_rsp, NULL, 0, NULL, - gf_xdr_from_cli_sync_volume_rsp); - if (free_hostname && cli_req.hostname) - free (cli_req.hostname); - if (free_volname && cli_req.volname) - free (cli_req.volname); - if (dict) - dict_unref (dict); - - if (!lock_fail) - (void) glusterd_opinfo_unlock (); - - ret = 0; //sent error to cli, prevent second reply - } - - glusterd_friend_sm (); - glusterd_op_sm (); - - return ret; + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_CONNECT_RETURNED, + "connect returned %d", ret); + return ret; } int -glusterd_fsm_log_send_resp (rpcsvc_request_t *req, int op_ret, - char *op_errstr, dict_t *dict) +glusterd_probe_begin(rpcsvc_request_t *req, const char *hoststr, int port, + dict_t *dict, int *op_errno) { + int ret = -1; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_peerctx_args_t args = {0}; + glusterd_friend_sm_event_t *event = NULL; - int ret = -1; - gf1_cli_fsm_log_rsp rsp = {0}; - - GF_ASSERT (req); - GF_ASSERT (op_errstr); - - rsp.op_ret = op_ret; - rsp.op_errstr = op_errstr; - if (rsp.op_ret == 0) - ret = dict_allocate_and_serialize (dict, &rsp.fsm_log.fsm_log_val, - (size_t *)&rsp.fsm_log.fsm_log_len); + GF_ASSERT(hoststr); - ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - gf_xdr_from_cli_fsm_log_rsp); - if (rsp.fsm_log.fsm_log_val) - GF_FREE (rsp.fsm_log.fsm_log_val); + RCU_READ_LOCK; + peerinfo = glusterd_peerinfo_find(NULL, hoststr); - gf_log ("glusterd", GF_LOG_DEBUG, "Responded, ret: %d", ret); + if (peerinfo == NULL) { + gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_PEER_NOT_FOUND, + "Unable to find peerinfo" + " for host: %s (%d)", + hoststr, port); + args.mode = GD_MODE_ON; + args.req = req; + args.dict = dict; + ret = glusterd_friend_add(hoststr, port, GD_FRIEND_STATE_DEFAULT, NULL, + &peerinfo, 0, &args); + if ((!ret) && (!peerinfo->connected)) { + ret = GLUSTERD_CONNECTION_AWAITED; + } + + } else if (peerinfo->connected && + (GD_FRIEND_STATE_BEFRIENDED == peerinfo->state.state)) { + if (peerinfo->detaching) { + ret = -1; + if (op_errno) + *op_errno = GF_PROBE_FRIEND_DETACHING; + goto out; + } + ret = glusterd_peer_hostname_update(peerinfo, hoststr, _gf_false); + if (ret) + goto out; + // Injecting a NEW_NAME event to update cluster + ret = glusterd_friend_sm_new_event(GD_FRIEND_EVENT_NEW_NAME, &event); + if (!ret) { + event->peername = gf_strdup(peerinfo->hostname); + gf_uuid_copy(event->peerid, peerinfo->uuid); + + ret = glusterd_friend_sm_inject_event(event); + glusterd_xfer_cli_probe_resp(req, 0, GF_PROBE_SUCCESS, NULL, + (char *)hoststr, port, dict); + } + } else { + glusterd_xfer_cli_probe_resp(req, 0, GF_PROBE_FRIEND, NULL, + (char *)hoststr, port, dict); + ret = 0; + } - return 0; +out: + RCU_READ_UNLOCK; + gf_msg_debug("glusterd", 0, "returning %d", ret); + return ret; } int -glusterd_handle_fsm_log (rpcsvc_request_t *req) +glusterd_deprobe_begin(rpcsvc_request_t *req, const char *hoststr, int port, + uuid_t uuid, dict_t *dict, int *op_errno) { - int32_t ret = -1; - gf1_cli_fsm_log_req cli_req = {0,}; - dict_t *dict = NULL; - glusterd_sm_tr_log_t *log = NULL; - xlator_t *this = NULL; - glusterd_conf_t *conf = NULL; - char msg[2048] = {0}; - glusterd_peerinfo_t *peerinfo = NULL; + int ret = -1; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_friend_sm_event_t *event = NULL; + glusterd_probe_ctx_t *ctx = NULL; - GF_ASSERT (req); + GF_ASSERT(hoststr); + GF_ASSERT(req); - if (!gf_xdr_to_cli_fsm_log_req (req->msg[0], &cli_req)) { - //failed to decode msg; - req->rpc_err = GARBAGE_ARGS; - snprintf (msg, sizeof (msg), "Garbage request"); - goto out; - } + RCU_READ_LOCK; - if (strcmp ("", cli_req.name) == 0) { - this = THIS; - conf = this->private; - log = &conf->op_sm_log; - } else { - ret = glusterd_friend_find_by_hostname (cli_req.name, - &peerinfo); - if (ret) { - snprintf (msg, sizeof (msg), "%s is not a peer", - cli_req.name); - goto out; - } - log = &peerinfo->sm_log; - } + peerinfo = glusterd_peerinfo_find(uuid, hoststr); + if (peerinfo == NULL) { + ret = -1; + gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_PEER_NOT_FOUND, + "Unable to find peerinfo" + " for host: %s %d", + hoststr, port); + goto out; + } + + if (!peerinfo->rpc) { + // handle this case + goto out; + } + + if (peerinfo->detaching) { + ret = -1; + if (op_errno) + *op_errno = GF_DEPROBE_FRIEND_DETACHING; + goto out; + } - dict = dict_new (); - if (!dict) { - ret = -1; - goto out; - } + ret = glusterd_friend_sm_new_event(GD_FRIEND_EVENT_INIT_REMOVE_FRIEND, + &event); - ret = glusterd_sm_tr_log_add_to_dict (dict, log); -out: - (void)glusterd_fsm_log_send_resp (req, ret, msg, dict); - if (cli_req.name) - free (cli_req.name);//malloced by xdr - if (dict) - dict_unref (dict); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_EVENT_NEW_GET_FAIL, + "Unable to get new event"); + goto out; + } - glusterd_friend_sm (); - glusterd_op_sm (); + ctx = GF_CALLOC(1, sizeof(*ctx), gf_gld_mt_probe_ctx_t); - return 0;//send 0 to avoid double reply -} + if (!ctx) { + goto out; + } -int -glusterd_op_lock_send_resp (rpcsvc_request_t *req, int32_t status) -{ + ctx->hostname = gf_strdup(hoststr); + ctx->port = port; + ctx->req = req; + ctx->dict = dict; - gd1_mgmt_cluster_lock_rsp rsp = {{0},}; - int ret = -1; + event->ctx = ctx; - GF_ASSERT (req); - glusterd_get_uuid (&rsp.uuid); - rsp.op_ret = status; + event->peername = gf_strdup(hoststr); + gf_uuid_copy(event->peerid, uuid); - ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - gd_xdr_serialize_mgmt_cluster_lock_rsp); + ret = glusterd_friend_sm_inject_event(event); - gf_log ("glusterd", GF_LOG_INFO, - "Responded, ret: %d", ret); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_EVENT_INJECT_FAIL, + "Unable to inject event %d, " + "ret = %d", + event->event, ret); + goto out; + } + peerinfo->detaching = _gf_true; - return 0; +out: + RCU_READ_UNLOCK; + return ret; } int -glusterd_op_unlock_send_resp (rpcsvc_request_t *req, int32_t status) +glusterd_xfer_friend_remove_resp(rpcsvc_request_t *req, char *hostname, + int port) { - - gd1_mgmt_cluster_unlock_rsp rsp = {{0},}; - int ret = -1; - - GF_ASSERT (req); - rsp.op_ret = status; - glusterd_get_uuid (&rsp.uuid); - - ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - gd_xdr_serialize_mgmt_cluster_unlock_rsp); - - gf_log ("glusterd", GF_LOG_INFO, - "Responded to unlock, ret: %d", ret); - - return ret; + gd1_mgmt_friend_rsp rsp = { + {0}, + }; + int32_t ret = -1; + xlator_t *this = NULL; + + GF_ASSERT(hostname); + + rsp.op_ret = 0; + this = THIS; + GF_ASSERT(this); + + gf_uuid_copy(rsp.uuid, MY_UUID); + rsp.hostname = hostname; + rsp.port = port; + ret = glusterd_submit_reply(req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_friend_rsp); + + gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_RESPONSE_INFO, + "Responded to %s (%d), ret: %d", hostname, port, ret); + return ret; } int -glusterd_handle_cluster_unlock (rpcsvc_request_t *req) +glusterd_xfer_friend_add_resp(rpcsvc_request_t *req, char *myhostname, + char *remote_hostname, int port, int32_t op_ret, + int32_t op_errno) { - gd1_mgmt_cluster_unlock_req unlock_req = {{0}, }; - int32_t ret = -1; - glusterd_op_lock_ctx_t *ctx = NULL; + gd1_mgmt_friend_rsp rsp = { + {0}, + }; + int32_t ret = -1; + xlator_t *this = NULL; + + GF_ASSERT(myhostname); + + this = THIS; + GF_ASSERT(this); + + gf_uuid_copy(rsp.uuid, MY_UUID); + rsp.op_ret = op_ret; + rsp.op_errno = op_errno; + rsp.hostname = gf_strdup(myhostname); + rsp.port = port; + + ret = glusterd_submit_reply(req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_friend_rsp); + + gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_RESPONSE_INFO, + "Responded to %s (%d), ret: %d, op_ret: %d", remote_hostname, port, + ret, op_ret); + GF_FREE(rsp.hostname); + return ret; +} + +static void +set_probe_error_str(int op_ret, int op_errno, char *op_errstr, char *errstr, + size_t len, char *hostname, int port) +{ + if ((op_errstr) && (strcmp(op_errstr, ""))) { + snprintf(errstr, len, "%s", op_errstr); + return; + } + + if (!op_ret) { + switch (op_errno) { + case GF_PROBE_LOCALHOST: + snprintf(errstr, len, + "Probe on localhost not " + "needed"); + break; - GF_ASSERT (req); + case GF_PROBE_FRIEND: + snprintf(errstr, len, + "Host %s port %d already" + " in peer list", + hostname, port); + break; - if (!gd_xdr_to_mgmt_cluster_unlock_req (req->msg[0], &unlock_req)) { - //failed to decode msg; - req->rpc_err = GARBAGE_ARGS; - goto out; + case GF_PROBE_FRIEND_DETACHING: + snprintf(errstr, len, + "Peer is already being " + "detached from cluster.\n" + "Check peer status by running " + "gluster peer status"); + break; + default: + if (op_errno != 0) + snprintf(errstr, len, + "Probe returned " + "with %s", + strerror(op_errno)); + break; } + } else { + switch (op_errno) { + case GF_PROBE_ANOTHER_CLUSTER: + snprintf(errstr, len, + "%s is either already " + "part of another cluster or having " + "volumes configured", + hostname); + break; + case GF_PROBE_VOLUME_CONFLICT: + snprintf(errstr, len, + "At least one volume on " + "%s conflicts with existing volumes " + "in the cluster", + hostname); + break; - gf_log ("glusterd", GF_LOG_INFO, - "Received UNLOCK from uuid: %s", uuid_utoa (unlock_req.uuid)); + case GF_PROBE_UNKNOWN_PEER: + snprintf(errstr, len, + "%s responded with " + "'unknown peer' error, this could " + "happen if %s doesn't have localhost " + "in its peer database", + hostname, hostname); + break; - ctx = GF_CALLOC (1, sizeof (*ctx), gf_gld_mt_op_lock_ctx_t); + case GF_PROBE_ADD_FAILED: + snprintf(errstr, len, + "Failed to add peer " + "information on %s", + hostname); + break; - if (!ctx) { - //respond here - return -1; - } - uuid_copy (ctx->uuid, unlock_req.uuid); - ctx->req = req; + case GF_PROBE_SAME_UUID: + snprintf(errstr, len, + "Peer uuid (host %s) is " + "same as local uuid", + hostname); + break; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_UNLOCK, ctx); + case GF_PROBE_QUORUM_NOT_MET: + snprintf(errstr, len, + "Cluster quorum is not " + "met. Changing peers is not allowed " + "in this state"); + break; -out: - glusterd_friend_sm (); - glusterd_op_sm (); + case GF_PROBE_MISSED_SNAP_CONFLICT: + snprintf(errstr, len, + "Failed to update " + "list of missed snapshots from " + "peer %s", + hostname); + break; - return ret; + case GF_PROBE_SNAP_CONFLICT: + snprintf(errstr, len, + "Conflict in comparing " + "list of snapshots from " + "peer %s", + hostname); + break; + + default: + snprintf(errstr, len, + "Probe returned with " + "%s", + strerror(op_errno)); + break; + } + } } int -glusterd_op_stage_send_resp (rpcsvc_request_t *req, - int32_t op, int32_t status, - char *op_errstr, dict_t *rsp_dict) +glusterd_xfer_cli_probe_resp(rpcsvc_request_t *req, int32_t op_ret, + int32_t op_errno, char *op_errstr, char *hostname, + int port, dict_t *dict) { - gd1_mgmt_stage_op_rsp rsp = {{0},}; - int ret = -1; - - GF_ASSERT (req); - rsp.op_ret = status; - glusterd_get_uuid (&rsp.uuid); - rsp.op = op; - if (op_errstr) - rsp.op_errstr = op_errstr; - else - rsp.op_errstr = ""; - - ret = dict_allocate_and_serialize (rsp_dict, - &rsp.dict.dict_val, - (size_t *)&rsp.dict.dict_len); - if (ret < 0) { - gf_log ("", GF_LOG_DEBUG, - "failed to get serialized length of dict"); - return ret; - } + gf_cli_rsp rsp = { + 0, + }; + int32_t ret = -1; + char errstr[2048] = { + 0, + }; + char *cmd_str = NULL; + xlator_t *this = THIS; + + GF_ASSERT(req); + GF_ASSERT(this); + + (void)set_probe_error_str(op_ret, op_errno, op_errstr, errstr, + sizeof(errstr), hostname, port); + + if (dict) { + ret = dict_get_strn(dict, "cmd-str", SLEN("cmd-str"), &cmd_str); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CMDSTR_NOTFOUND_IN_DICT, + "Failed to get " + "command string"); + } - ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - gd_xdr_serialize_mgmt_stage_op_rsp); + rsp.op_ret = op_ret; + rsp.op_errno = op_errno; + rsp.op_errstr = (errstr[0] != '\0') ? errstr : ""; - gf_log ("glusterd", GF_LOG_INFO, - "Responded to stage, ret: %d", ret); - if (rsp.dict.dict_val) - GF_FREE (rsp.dict.dict_val); + gf_cmd_log("", "%s : %s %s %s", cmd_str, (op_ret) ? "FAILED" : "SUCCESS", + (errstr[0] != '\0') ? ":" : " ", + (errstr[0] != '\0') ? errstr : " "); - return ret; + ret = glusterd_submit_reply(req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gf_cli_rsp); + + if (dict) + dict_unref(dict); + gf_msg_debug(this->name, 0, "Responded to CLI, ret: %d", ret); + + return ret; } -int -glusterd_op_commit_send_resp (rpcsvc_request_t *req, - int32_t op, int32_t status, char *op_errstr, - dict_t *rsp_dict) +static void +set_deprobe_error_str(int op_ret, int op_errno, char *op_errstr, char *errstr, + size_t len, char *hostname) { - gd1_mgmt_commit_op_rsp rsp = {{0}, }; - int ret = -1; - - GF_ASSERT (req); - rsp.op_ret = status; - glusterd_get_uuid (&rsp.uuid); - rsp.op = op; + if ((op_errstr) && (strcmp(op_errstr, ""))) { + snprintf(errstr, len, "%s", op_errstr); + return; + } + + if (op_ret) { + switch (op_errno) { + case GF_DEPROBE_LOCALHOST: + snprintf(errstr, len, "%s is localhost", hostname); + break; - if (op_errstr) - rsp.op_errstr = op_errstr; - else - rsp.op_errstr = ""; + case GF_DEPROBE_NOT_FRIEND: + snprintf(errstr, len, + "%s is not part of " + "cluster", + hostname); + break; - if (rsp_dict) { - ret = dict_allocate_and_serialize (rsp_dict, - &rsp.dict.dict_val, - (size_t *)&rsp.dict.dict_len); - if (ret < 0) { - gf_log ("", GF_LOG_DEBUG, - "failed to get serialized length of dict"); - goto out; - } - } + case GF_DEPROBE_BRICK_EXIST: + snprintf(errstr, len, + "Peer %s hosts one or more bricks. If the peer is in " + "not recoverable state then use either replace-brick " + "or remove-brick command with force to remove all " + "bricks from the peer and attempt the peer detach " + "again.", + hostname); + break; + case GF_DEPROBE_SNAP_BRICK_EXIST: + snprintf(errstr, len, + "%s is part of existing " + "snapshot. Remove those snapshots " + "before proceeding ", + hostname); + break; - ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - gd_xdr_serialize_mgmt_commit_op_rsp); + case GF_DEPROBE_FRIEND_DOWN: + snprintf(errstr, len, + "One of the peers is " + "probably down. Check with " + "'peer status'"); + break; - gf_log ("glusterd", GF_LOG_INFO, - "Responded to commit, ret: %d", ret); + case GF_DEPROBE_QUORUM_NOT_MET: + snprintf(errstr, len, + "Cluster quorum is not " + "met. Changing peers is not allowed " + "in this state"); + break; -out: - if (rsp.dict.dict_val) - GF_FREE (rsp.dict.dict_val); - return ret; + case GF_DEPROBE_FRIEND_DETACHING: + snprintf(errstr, len, + "Peer is already being " + "detached from cluster.\n" + "Check peer status by running " + "gluster peer status"); + break; + default: + snprintf(errstr, len, + "Detach returned with " + "%s", + strerror(op_errno)); + break; + } + } } int -glusterd_handle_incoming_friend_req (rpcsvc_request_t *req) +glusterd_xfer_cli_deprobe_resp(rpcsvc_request_t *req, int32_t op_ret, + int32_t op_errno, char *op_errstr, + char *hostname, dict_t *dict) { - int32_t ret = -1; - gd1_mgmt_friend_req friend_req = {{0},}; - gf_boolean_t run_fsm = _gf_true; - - GF_ASSERT (req); - if (!gd_xdr_to_mgmt_friend_req (req->msg[0], &friend_req)) { - //failed to decode msg; - req->rpc_err = GARBAGE_ARGS; - goto out; - } + gf_cli_rsp rsp = { + 0, + }; + int32_t ret = -1; + char *cmd_str = NULL; + char errstr[2048] = { + 0, + }; + + GF_ASSERT(req); + + (void)set_deprobe_error_str(op_ret, op_errno, op_errstr, errstr, + sizeof(errstr), hostname); + + if (dict) { + ret = dict_get_strn(dict, "cmd-str", SLEN("cmd-str"), &cmd_str); + if (ret) + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_CMDSTR_NOTFOUND_IN_DICT, + "Failed to get " + "command string"); + } - gf_log ("glusterd", GF_LOG_INFO, - "Received probe from uuid: %s", uuid_utoa (friend_req.uuid)); - ret = glusterd_handle_friend_req (req, friend_req.uuid, - friend_req.hostname, friend_req.port, - &friend_req); + rsp.op_ret = op_ret; + rsp.op_errno = op_errno; + rsp.op_errstr = (errstr[0] != '\0') ? errstr : ""; - if (ret == GLUSTERD_CONNECTION_AWAITED) { - //fsm should be run after connection establishes - run_fsm = _gf_false; - ret = 0; - } + gf_cmd_log("", "%s : %s %s %s", cmd_str, (op_ret) ? "FAILED" : "SUCCESS", + (errstr[0] != '\0') ? ":" : " ", + (errstr[0] != '\0') ? errstr : " "); -out: - if (friend_req.hostname) - free (friend_req.hostname);//malloced by xdr + ret = glusterd_submit_reply(req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gf_cli_rsp); - if (run_fsm) { - glusterd_friend_sm (); - glusterd_op_sm (); - } + gf_msg_debug(THIS->name, 0, "Responded to CLI, ret: %d", ret); - return ret; + return ret; } -int -glusterd_handle_incoming_unfriend_req (rpcsvc_request_t *req) +int32_t +glusterd_list_friends(rpcsvc_request_t *req, dict_t *dict, int32_t flags) { - int32_t ret = -1; - gd1_mgmt_friend_req friend_req = {{0},}; - char remote_hostname[UNIX_PATH_MAX + 1] = {0,}; - - GF_ASSERT (req); - if (!gd_xdr_to_mgmt_friend_req (req->msg[0], &friend_req)) { - //failed to decode msg; - req->rpc_err = GARBAGE_ARGS; - goto out; + int32_t ret = -1; + glusterd_conf_t *priv = NULL; + glusterd_peerinfo_t *entry = NULL; + int32_t count = 0; + dict_t *friends = NULL; + gf1_cli_peer_list_rsp rsp = { + 0, + }; + char my_uuid_str[64] = { + 0, + }; + char key[64] = { + 0, + }; + int keylen; + + xlator_t *this = THIS; + GF_ASSERT(this); + + priv = this->private; + GF_ASSERT(priv); + + friends = dict_new(); + if (!friends) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); + goto out; + } + + /* Reset ret to 0, needed to prevent failure in case no peers exist */ + ret = 0; + RCU_READ_LOCK; + if (!cds_list_empty(&priv->peers)) { + cds_list_for_each_entry_rcu(entry, &priv->peers, uuid_list) + { + count++; + ret = gd_add_peer_detail_to_dict(entry, friends, count); + if (ret) + goto unlock; + } + } +unlock: + RCU_READ_UNLOCK; + if (ret) + goto out; + + if (flags == GF_CLI_LIST_POOL_NODES) { + count++; + keylen = snprintf(key, sizeof(key), "friend%d.uuid", count); + uuid_utoa_r(MY_UUID, my_uuid_str); + ret = dict_set_strn(friends, key, keylen, my_uuid_str); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + goto out; } - gf_log ("glusterd", GF_LOG_INFO, - "Received unfriend from uuid: %s", uuid_utoa (friend_req.uuid)); - - ret = glusterd_remote_hostname_get (req, remote_hostname, - sizeof (remote_hostname)); + keylen = snprintf(key, sizeof(key), "friend%d.hostname", count); + ret = dict_set_nstrn(friends, key, keylen, "localhost", + SLEN("localhost")); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get the remote hostname"); - goto out; + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + goto out; } - ret = glusterd_handle_unfriend_req (req, friend_req.uuid, - remote_hostname, friend_req.port); -out: - if (friend_req.hostname) - free (friend_req.hostname);//malloced by xdr - if (friend_req.vols.vols_val) - free (friend_req.vols.vols_val);//malloced by xdr + keylen = snprintf(key, sizeof(key), "friend%d.connected", count); + ret = dict_set_int32n(friends, key, keylen, 1); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + goto out; + } + } - glusterd_friend_sm (); - glusterd_op_sm (); + ret = dict_set_int32n(friends, "count", SLEN("count"), count); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=count", NULL); + goto out; + } - return ret; -} + ret = dict_allocate_and_serialize(friends, &rsp.friends.friends_val, + &rsp.friends.friends_len); + if (ret) + goto out; -int -glusterd_handle_friend_update_delete (dict_t *dict) -{ - char *hostname = NULL; - int32_t ret = -1; + ret = 0; +out: - GF_ASSERT (dict); + if (friends) + dict_unref(friends); - ret = dict_get_str (dict, "hostname", &hostname); - if (ret) - goto out; + rsp.op_ret = ret; - ret = glusterd_friend_remove (NULL, hostname); + glusterd_submit_reply(req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gf1_cli_peer_list_rsp); + ret = 0; + GF_FREE(rsp.friends.friends_val); -out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + return ret; } -int -glusterd_friend_hostname_update (glusterd_peerinfo_t *peerinfo, - char *hostname, - gf_boolean_t store_update) +int32_t +glusterd_get_volumes(rpcsvc_request_t *req, dict_t *dict, int32_t flags) { - char *new_hostname = NULL; - int ret = 0; - - GF_ASSERT (peerinfo); - GF_ASSERT (hostname); + int32_t ret = -1; + int32_t ret_bkp = 0; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *entry = NULL; + int32_t count = 0; + dict_t *volumes = NULL; + gf_cli_rsp rsp = { + 0, + }; + char *volname = NULL; + + priv = THIS->private; + GF_ASSERT(priv); + volumes = dict_new(); + if (!volumes) { + gf_msg("glusterd", GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "Out of Memory"); + goto out; + } + + if (cds_list_empty(&priv->volumes)) { + if (flags == GF_CLI_GET_VOLUME) + ret_bkp = -1; + ret = 0; + goto respond; + } + if (flags == GF_CLI_GET_VOLUME_ALL) { + cds_list_for_each_entry(entry, &priv->volumes, vol_list) + { + ret = glusterd_add_volume_detail_to_dict(entry, volumes, count); + if (ret) + goto respond; - new_hostname = gf_strdup (hostname); - if (!new_hostname) { - ret = -1; - goto out; + count++; } - GF_FREE (peerinfo->hostname); - peerinfo->hostname = new_hostname; - if (store_update) - ret = glusterd_store_peerinfo (peerinfo); -out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; -} - -int -glusterd_handle_friend_update (rpcsvc_request_t *req) -{ - int32_t ret = -1; - gd1_mgmt_friend_update friend_req = {{0},}; - glusterd_peerinfo_t *peerinfo = NULL; - glusterd_conf_t *priv = NULL; - xlator_t *this = NULL; - glusterd_peerinfo_t *tmp = NULL; - gd1_mgmt_friend_update_rsp rsp = {{0},}; - dict_t *dict = NULL; - char key[100] = {0,}; - char *uuid_buf = NULL; - char *hostname = NULL; - int i = 1; - int count = 0; - uuid_t uuid = {0,}; - glusterd_peerctx_args_t args = {0}; - int32_t op = 0; - - GF_ASSERT (req); - - this = THIS; - GF_ASSERT (this); - priv = this->private; - GF_ASSERT (priv); - - if (!gd_xdr_to_mgmt_friend_update (req->msg[0], &friend_req)) { - //failed to decode msg; - req->rpc_err = GARBAGE_ARGS; - goto out; - } + } else if (flags == GF_CLI_GET_NEXT_VOLUME) { + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); - ret = glusterd_friend_find (friend_req.uuid, NULL, &tmp); if (ret) { - gf_log ("", GF_LOG_CRITICAL, "Received friend update request " - "from unknown peer %s", uuid_utoa (friend_req.uuid)); - goto out; + if (priv->volumes.next) { + entry = cds_list_entry(priv->volumes.next, typeof(*entry), + vol_list); + } + } else { + ret = glusterd_volinfo_find(volname, &entry); + if (ret) + goto respond; + entry = cds_list_entry(entry->vol_list.next, typeof(*entry), + vol_list); } - gf_log ("glusterd", GF_LOG_INFO, - "Received friend update from uuid: %s", uuid_utoa (friend_req.uuid)); - if (friend_req.friends.friends_len) { - /* Unserialize the dictionary */ - dict = dict_new (); + if (&entry->vol_list == &priv->volumes) { + goto respond; + } else { + ret = glusterd_add_volume_detail_to_dict(entry, volumes, count); + if (ret) + goto respond; - ret = dict_unserialize (friend_req.friends.friends_val, - friend_req.friends.friends_len, - &dict); - if (ret < 0) { - gf_log ("glusterd", GF_LOG_ERROR, - "failed to " - "unserialize req-buffer to dictionary"); - goto out; - } else { - dict->extra_stdfree = friend_req.friends.friends_val; - } + count++; } + } else if (flags == GF_CLI_GET_VOLUME) { + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); - ret = dict_get_int32 (dict, "count", &count); - if (ret) - goto out; - - ret = dict_get_int32 (dict, "op", &op); if (ret) - goto out; + goto respond; - if (GD_FRIEND_UPDATE_DEL == op) { - ret = glusterd_handle_friend_update_delete (dict); - goto out; + ret = glusterd_volinfo_find(volname, &entry); + if (ret) { + ret_bkp = ret; + goto respond; } - args.mode = GD_MODE_ON; - while ( i <= count) { - snprintf (key, sizeof (key), "friend%d.uuid", i); - ret = dict_get_str (dict, key, &uuid_buf); - if (ret) - goto out; - uuid_parse (uuid_buf, uuid); - snprintf (key, sizeof (key), "friend%d.hostname", i); - ret = dict_get_str (dict, key, &hostname); - if (ret) - goto out; - - gf_log ("", GF_LOG_INFO, "Received uuid: %s, hostname:%s", - uuid_buf, hostname); - - if (!uuid_compare (uuid, priv->uuid)) { - gf_log ("", GF_LOG_INFO, "Received my uuid as Friend"); - i++; - continue; - } - - ret = glusterd_friend_find (uuid, hostname, &tmp); + ret = glusterd_add_volume_detail_to_dict(entry, volumes, count); + if (ret) + goto respond; - if (!ret) { - if (strcmp (hostname, tmp->hostname) != 0) { - glusterd_friend_hostname_update (tmp, hostname, - _gf_true); - } - i++; - continue; - } + count++; + } - ret = glusterd_friend_add (hostname, friend_req.port, - GD_FRIEND_STATE_BEFRIENDED, - &uuid, NULL, &peerinfo, 0, &args); +respond: + ret = dict_set_int32n(volumes, "count", SLEN("count"), count); + if (ret) + goto out; + ret = dict_allocate_and_serialize(volumes, &rsp.dict.dict_val, + &rsp.dict.dict_len); - i++; - } + if (ret) + goto out; + ret = 0; out: - uuid_copy (rsp.uuid, priv->uuid); - ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - gd_xdr_serialize_mgmt_friend_update_rsp); - if (dict) { - if (!dict->extra_stdfree && friend_req.friends.friends_val) - free (friend_req.friends.friends_val);//malloced by xdr - dict_unref (dict); - } else { - if (friend_req.friends.friends_val) - free (friend_req.friends.friends_val);//malloced by xdr - } + if (ret_bkp == -1) { + rsp.op_ret = ret_bkp; + rsp.op_errstr = "Volume does not exist"; + rsp.op_errno = EG_NOVOL; + } else { + rsp.op_ret = ret; + rsp.op_errstr = ""; + } + glusterd_submit_reply(req, &rsp, NULL, 0, NULL, (xdrproc_t)xdr_gf_cli_rsp); + ret = 0; - glusterd_friend_sm (); - glusterd_op_sm (); + if (volumes) + dict_unref(volumes); - return ret; + GF_FREE(rsp.dict.dict_val); + return ret; } int -glusterd_handle_probe_query (rpcsvc_request_t *req) +__glusterd_handle_status_volume(rpcsvc_request_t *req) { - int32_t ret = -1; - xlator_t *this = NULL; - glusterd_conf_t *conf = NULL; - gd1_mgmt_probe_req probe_req = {{0},}; - gd1_mgmt_probe_rsp rsp = {{0},}; - glusterd_peerinfo_t *peerinfo = NULL; - glusterd_peerctx_args_t args = {0}; - int port = 0; - char remote_hostname[UNIX_PATH_MAX + 1] = {0,}; - - GF_ASSERT (req); - - if (!gd_xdr_to_mgmt_probe_req (req->msg[0], &probe_req)) { - //failed to decode msg; - req->rpc_err = GARBAGE_ARGS; - goto out; + int32_t ret = -1; + uint32_t cmd = 0; + dict_t *dict = NULL; + char *volname = 0; + gf_cli_req cli_req = {{ + 0, + }}; + glusterd_op_t cli_op = GD_OP_STATUS_VOLUME; + char err_str[256] = { + 0, + }; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + + GF_ASSERT(req); + this = THIS; + GF_ASSERT(this); + conf = this->private; + GF_ASSERT(conf); + + ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + // failed to decode msg; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, + "Failed to decode " + "request received from cli"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (cli_req.dict.dict_len > 0) { + dict = dict_new(); + if (!dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, + NULL); + goto out; } - - - this = THIS; - - conf = this->private; - if (probe_req.port) - port = probe_req.port; - else - port = GF_DEFAULT_BASE_PORT; - - gf_log ("glusterd", GF_LOG_INFO, - "Received probe from uuid: %s", uuid_utoa (probe_req.uuid)); - - ret = glusterd_remote_hostname_get (req, remote_hostname, - sizeof (remote_hostname)); + ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + "failed to " + "unserialize buffer"); + snprintf(err_str, sizeof(err_str), + "Unable to decode " + "the command"); + goto out; + } + } + + ret = dict_get_uint32(dict, "cmd", &cmd); + if (ret) + goto out; + + if (!(cmd & GF_CLI_STATUS_ALL)) { + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get the remote hostname"); - goto out; - } - ret = glusterd_friend_find (probe_req.uuid, remote_hostname, &peerinfo); - if ((ret != 0 ) && (!list_empty (&conf->peers))) { - rsp.op_ret = -1; - rsp.op_errno = GF_PROBE_ANOTHER_CLUSTER; - } else if (ret) { - gf_log ("glusterd", GF_LOG_INFO, "Unable to find peerinfo" - " for host: %s (%d)", remote_hostname, port); - args.mode = GD_MODE_ON; - ret = glusterd_friend_add (remote_hostname, port, - GD_FRIEND_STATE_PROBE_RCVD, - NULL, NULL, &peerinfo, 0, &args); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Failed to add peer %s", - remote_hostname); - rsp.op_errno = GF_PROBE_ADD_FAILED; - } - } - - uuid_copy (rsp.uuid, conf->uuid); - - rsp.hostname = probe_req.hostname; - - ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - gd_xdr_serialize_mgmt_probe_rsp); + snprintf(err_str, sizeof(err_str), + "Unable to get " + "volume name"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, "%s", + err_str); + goto out; + } + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_STATUS_VOL_REQ_RCVD, + "Received status volume req for volume %s", volname); + } + if ((cmd & GF_CLI_STATUS_CLIENT_LIST) && + (conf->op_version < GD_OP_VERSION_3_13_0)) { + snprintf(err_str, sizeof(err_str), + "The cluster is operating " + "at version less than %d. Getting the client-list " + "is not allowed in this state.", + GD_OP_VERSION_3_13_0); + ret = -1; + goto out; + } + + if ((cmd & GF_CLI_STATUS_QUOTAD) && + (conf->op_version == GD_OP_VERSION_MIN)) { + snprintf(err_str, sizeof(err_str), + "The cluster is operating " + "at version 1. Getting the status of quotad is not " + "allowed in this state."); + ret = -1; + goto out; + } + + if ((cmd & GF_CLI_STATUS_SNAPD) && + (conf->op_version < GD_OP_VERSION_3_6_0)) { + snprintf(err_str, sizeof(err_str), + "The cluster is operating " + "at a lesser version than %d. Getting the status of " + "snapd is not allowed in this state", + GD_OP_VERSION_3_6_0); + ret = -1; + goto out; + } + + if ((cmd & GF_CLI_STATUS_BITD) && + (conf->op_version < GD_OP_VERSION_3_7_0)) { + snprintf(err_str, sizeof(err_str), + "The cluster is operating " + "at a lesser version than %d. Getting the status of " + "bitd is not allowed in this state", + GD_OP_VERSION_3_7_0); + ret = -1; + goto out; + } + + if ((cmd & GF_CLI_STATUS_SCRUB) && + (conf->op_version < GD_OP_VERSION_3_7_0)) { + snprintf(err_str, sizeof(err_str), + "The cluster is operating " + "at a lesser version than %d. Getting the status of " + "scrub is not allowed in this state", + GD_OP_VERSION_3_7_0); + ret = -1; + goto out; + } - gf_log ("glusterd", GF_LOG_INFO, "Responded to %s, op_ret: %d, " - "op_errno: %d, ret: %d", probe_req.hostname, - rsp.op_ret, rsp.op_errno, ret); + ret = glusterd_op_begin_synctask(req, GD_OP_STATUS_VOLUME, dict); out: - if (probe_req.hostname) - free (probe_req.hostname);//malloced by xdr - glusterd_friend_sm (); - glusterd_op_sm (); + if (ret) { + if (err_str[0] == '\0') + snprintf(err_str, sizeof(err_str), "Operation failed"); + ret = glusterd_op_send_cli_response(cli_op, ret, 0, req, dict, err_str); + } + free(cli_req.dict.dict_val); - return ret; + return ret; } int -glusterd_handle_cli_profile_volume (rpcsvc_request_t *req) +glusterd_handle_status_volume(rpcsvc_request_t *req) { - int32_t ret = -1; - gf1_cli_stats_volume_req cli_req = {0,}; - dict_t *dict = NULL; - char msg[2048] = {0,}; - gf_boolean_t free_volname = _gf_true; - int lock_fail = 0; - glusterd_op_t cli_op = GD_OP_PROFILE_VOLUME; - dict_t *tmp_dict = NULL; - - GF_ASSERT (req); + return glusterd_big_locked_handler(req, __glusterd_handle_status_volume); +} - ret = glusterd_op_set_cli_op (cli_op); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to set cli op: %d", - ret); - lock_fail = 1; - goto out; +int +__glusterd_handle_cli_clearlocks_volume(rpcsvc_request_t *req) +{ + int32_t ret = -1; + gf_cli_req cli_req = {{ + 0, + }}; + glusterd_op_t cli_op = GD_OP_CLEARLOCKS_VOLUME; + char *volname = NULL; + dict_t *dict = NULL; + char err_str[64] = { + 0, + }; + xlator_t *this = NULL; + + GF_ASSERT(req); + this = THIS; + GF_ASSERT(this); + + ret = -1; + ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, + "Failed to decode " + "request received from cli"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (cli_req.dict.dict_len) { + dict = dict_new(); + + ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + "failed to unserialize req-buffer to" + " dictionary"); + snprintf(err_str, sizeof(err_str), + "unable to decode " + "the command"); + goto out; } + } else { ret = -1; - if (!gf_xdr_to_cli_stats_volume_req (req->msg[0], &cli_req)) { - //failed to decode msg; - req->rpc_err = GARBAGE_ARGS; - goto out; - } + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CLI_REQ_EMPTY, + "Empty cli request."); + goto out; + } + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + snprintf(err_str, sizeof(err_str), + "Unable to get volume " + "name"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLNAME_NOTFOUND_IN_DICT, + "%s", err_str); + goto out; + } + + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_CLRCLK_VOL_REQ_RCVD, + "Received clear-locks volume req " + "for volume %s", + volname); + + ret = glusterd_op_begin_synctask(req, GD_OP_CLEARLOCKS_VOLUME, dict); - gf_log ("glusterd", GF_LOG_INFO, "Received volume profile req " - "for volume %s", cli_req.volname); +out: + if (ret) { + if (err_str[0] == '\0') + snprintf(err_str, sizeof(err_str), "Operation failed"); + ret = glusterd_op_send_cli_response(cli_op, ret, 0, req, dict, err_str); + } + free(cli_req.dict.dict_val); + + return ret; +} - dict = dict_new (); - if (!dict) - goto out; - ret = dict_set_dynmstr (dict, "volname", cli_req.volname); - if (ret) { - gf_log ("", GF_LOG_ERROR, "volume name set failed"); - snprintf (msg, sizeof (msg), "volume name set failed"); - goto out; - } else { - free_volname = _gf_false; - } +int +glusterd_handle_cli_clearlocks_volume(rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler(req, + __glusterd_handle_cli_clearlocks_volume); +} - ret = dict_set_int32 (dict, "op", cli_req.op); +static int +get_volinfo_from_brickid(char *brickid, glusterd_volinfo_t **volinfo) +{ + int ret = -1; + char *volid_str = NULL; + char *brick = NULL; + char *brickid_dup = NULL; + uuid_t volid = {0}; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(brickid); + + brickid_dup = gf_strdup(brickid); + if (!brickid_dup) + goto out; + + volid_str = brickid_dup; + brick = strchr(brickid_dup, ':'); + if (!brick) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_NOT_FOUND, + "Invalid brickid"); + goto out; + } + + *brick = '\0'; + brick++; + gf_uuid_parse(volid_str, volid); + ret = glusterd_volinfo_find_by_volume_id(volid, volinfo); + if (ret) { + /* Check if it is a snapshot volume */ + ret = glusterd_snap_volinfo_find_by_volume_id(volid, volinfo); if (ret) { - gf_log ("", GF_LOG_ERROR, "op set failed"); - goto out; + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOLINFO_GET_FAIL, + "Failed to find volinfo"); + goto out; } + } - tmp_dict = dict_new(); - if (!tmp_dict) - goto out; - dict_unserialize (cli_req.dict_req.dict_req_val, - cli_req.dict_req.dict_req_len, &tmp_dict); - - dict_copy (tmp_dict, dict); - - ret = glusterd_op_begin (req, cli_op, dict, _gf_true); - + ret = 0; out: - glusterd_friend_sm (); - glusterd_op_sm (); - - if (tmp_dict) - dict_unref (tmp_dict); - - if (ret && dict) - dict_unref (dict); - if (cli_req.dict_req.dict_req_val) - free (cli_req.dict_req.dict_req_val); - if (free_volname) - free (cli_req.volname); // malloced by xdr - if (ret) { - ret = glusterd_op_send_cli_response (cli_op, ret, 0, req, - NULL, "operation failed"); - if (!lock_fail) - (void) glusterd_opinfo_unlock (); - - } - - gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + GF_FREE(brickid_dup); + return ret; } -int -glusterd_friend_remove (uuid_t uuid, char *hostname) +static int +__glusterd_handle_barrier(rpcsvc_request_t *req) { - int ret = 0; - glusterd_peerinfo_t *peerinfo = NULL; + int ret = -1; + xlator_t *this = NULL; + gf_cli_req cli_req = {{ + 0, + }}; + dict_t *dict = NULL; + char *volname = NULL; + + GF_ASSERT(req); + this = THIS; + GF_ASSERT(this); + + ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, + "Failed to decode " + "request received from cli"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (!cli_req.dict.dict_len) { + ret = -1; + goto out; + } - ret = glusterd_friend_find (uuid, hostname, &peerinfo); - if (ret) - goto out; + dict = dict_new(); + if (!dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); + ret = -1; + goto out; + } + ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len, &dict); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + "Failed to unserialize " + "request dictionary."); + goto out; + } + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLNAME_NOTFOUND_IN_DICT, + "Volname not present in " + "dict"); + goto out; + } + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_BARRIER_VOL_REQ_RCVD, + "Received barrier volume request for " + "volume %s", + volname); + + ret = glusterd_op_begin_synctask(req, GD_OP_BARRIER, dict); - ret = glusterd_friend_cleanup (peerinfo); out: - gf_log ("", GF_LOG_DEBUG, "returning %d", ret); - return ret; + if (ret) { + ret = glusterd_op_send_cli_response(GD_OP_BARRIER, ret, 0, req, dict, + "Operation failed"); + } + free(cli_req.dict.dict_val); + return ret; } int -glusterd_rpc_create (struct rpc_clnt **rpc, - dict_t *options, - rpc_clnt_notify_t notify_fn, - void *notify_data) +glusterd_handle_barrier(rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler(req, __glusterd_handle_barrier); +} + +static gf_boolean_t +gd_is_global_option(char *opt_key) { - struct rpc_clnt *new_rpc = NULL; - int ret = -1; - xlator_t *this = NULL; + GF_VALIDATE_OR_GOTO(THIS->name, opt_key, out); - this = THIS; - GF_ASSERT (this); + return (strcmp(opt_key, GLUSTERD_SHARED_STORAGE_KEY) == 0 || + strcmp(opt_key, GLUSTERD_QUORUM_RATIO_KEY) == 0 || + strcmp(opt_key, GLUSTERD_GLOBAL_OP_VERSION_KEY) == 0 || + strcmp(opt_key, GLUSTERD_BRICK_MULTIPLEX_KEY) == 0 || + strcmp(opt_key, GLUSTERD_LOCALTIME_LOGGING_KEY) == 0 || + strcmp(opt_key, GLUSTERD_DAEMON_LOG_LEVEL_KEY) == 0 || + strcmp(opt_key, GLUSTERD_MAX_OP_VERSION_KEY) == 0); - GF_ASSERT (options); - new_rpc = rpc_clnt_new (options, this->ctx, this->name); +out: + return _gf_false; +} - if (!new_rpc) +int32_t +glusterd_get_volume_opts(rpcsvc_request_t *req, dict_t *dict) +{ + int32_t ret = -1; + int32_t count = 1; + int exists = 0; + char *key = NULL; + char *orig_key = NULL; + char *key_fixed = NULL; + char *volname = NULL; + char *value = NULL; + char err_str[2048] = { + 0, + }; + char dict_key[50] = { + 0, + }; + int keylen; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *volinfo = NULL; + gf_cli_rsp rsp = { + 0, + }; + char op_version_buff[10] = { + 0, + }; + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + GF_ASSERT(priv); + + GF_ASSERT(req); + GF_ASSERT(dict); + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + snprintf(err_str, sizeof(err_str), + "Failed to get volume " + "name while handling get volume option command"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLNAME_NOTFOUND_IN_DICT, + "%s", err_str); + goto out; + } + + if (strcasecmp(volname, "all") == 0) { + ret = glusterd_get_global_options_for_all_vols(req, dict, + &rsp.op_errstr); + goto out; + } + + ret = dict_get_strn(dict, "key", SLEN("key"), &key); + if (ret) { + snprintf(err_str, sizeof(err_str), + "Failed to get key " + "while handling get volume option for %s", + volname); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", + err_str); + goto out; + } + gf_msg_debug(this->name, 0, + "Received get volume opt request for " + "volume %s", + volname); + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + snprintf(err_str, sizeof(err_str), FMTSTR_CHECK_VOL_EXISTS, volname); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, + FMTSTR_CHECK_VOL_EXISTS, volname); + goto out; + } + if (strcmp(key, "all")) { + if (fnmatch(GD_HOOKS_SPECIFIC_KEY, key, FNM_NOESCAPE) == 0) { + keylen = sprintf(dict_key, "key%d", count); + ret = dict_set_strn(dict, dict_key, keylen, key); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to " + "set %s in dictionary", + key); + goto out; + } + ret = dict_get_str(volinfo->dict, key, &value); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to " + "get %s in dictionary", + key); + goto out; + } + keylen = sprintf(dict_key, "value%d", count); + ret = dict_set_strn(dict, dict_key, keylen, value); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to " + "set %s in dictionary", + key); + goto out; + } + } else { + exists = glusterd_check_option_exists(key, &key_fixed); + if (!exists) { + snprintf(err_str, sizeof(err_str), + "Option " + "with name: %s does not exist", + key); + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_UNKNOWN_KEY, + "%s", err_str); + if (key_fixed) + snprintf(err_str + ret, sizeof(err_str) - ret, + "Did you mean %s?", key_fixed); + ret = -1; goto out; + } + if (key_fixed) { + orig_key = key; + key = key_fixed; + } + + if (gd_is_global_option(key)) { + char warn_str[] = + "Warning: support to get \ + global option value using volume get \ + <volname>` will be deprecated from \ + next release. Consider using `volume \ + get all` instead for global options"; + + ret = dict_set_strn(dict, "warning", SLEN("warning"), warn_str); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set warning " + "message in dictionary"); + goto out; + } + } - ret = rpc_clnt_register_notify (new_rpc, notify_fn, notify_data); - *rpc = new_rpc; - if (ret) - goto out; - ret = rpc_clnt_start (new_rpc); -out: - if (ret) { - if (new_rpc) { - (void) rpc_clnt_unref (new_rpc); + if (strcmp(key, GLUSTERD_MAX_OP_VERSION_KEY) == 0) { + ret = glusterd_get_global_max_op_version(req, dict, 1); + if (ret) + goto out; + } else if (strcmp(key, GLUSTERD_GLOBAL_OP_VERSION_KEY) == 0) { + keylen = sprintf(dict_key, "key%d", count); + ret = dict_set_strn(dict, dict_key, keylen, key); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed" + "to set %s in dictionary", + key); + goto out; + } + keylen = sprintf(dict_key, "value%d", count); + sprintf(op_version_buff, "%d", priv->op_version); + ret = dict_set_strn(dict, dict_key, keylen, op_version_buff); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed" + " to set value for key %s in " + "dictionary", + key); + goto out; + } + } else if (strcmp(key, "config.memory-accounting") == 0) { + keylen = sprintf(dict_key, "key%d", count); + ret = dict_set_strn(dict, dict_key, keylen, key); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed" + " to set %s in dictionary", + key); + goto out; + } + keylen = sprintf(dict_key, "value%d", count); + + if (volinfo->memory_accounting) + ret = dict_set_nstrn(dict, dict_key, keylen, "Enabled", + SLEN("Enabled")); + else + ret = dict_set_nstrn(dict, dict_key, keylen, "Disabled", + SLEN("Disabled")); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed" + " to set value for key %s in " + "dictionary", + key); + goto out; + } + } else if (strcmp(key, "config.transport") == 0) { + keylen = sprintf(dict_key, "key%d", count); + ret = dict_set_strn(dict, dict_key, keylen, key); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set %s in " + "dictionary", + key); + goto out; + } + keylen = sprintf(dict_key, "value%d", count); + + if (volinfo->transport_type == GF_TRANSPORT_RDMA) + ret = dict_set_nstrn(dict, dict_key, keylen, "rdma", + SLEN("rdma")); + else if (volinfo->transport_type == GF_TRANSPORT_TCP) + ret = dict_set_nstrn(dict, dict_key, keylen, "tcp", + SLEN("tcp")); + else if (volinfo->transport_type == GF_TRANSPORT_BOTH_TCP_RDMA) + ret = dict_set_nstrn(dict, dict_key, keylen, "tcp,rdma", + SLEN("tcp,rdma")); + else + ret = dict_set_nstrn(dict, dict_key, keylen, "none", + SLEN("none")); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set value for key " + "%s in dictionary", + key); + goto out; } + } else { + keylen = sprintf(dict_key, "key%d", count); + ret = dict_set_strn(dict, dict_key, keylen, key); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set %s in " + "dictionary", + key); + goto out; + } + keylen = sprintf(dict_key, "value%d", count); + ret = dict_get_str(priv->opts, key, &value); + if (!ret) { + ret = dict_set_strn(dict, dict_key, keylen, value); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_DICT_SET_FAILED, + "Failed to set %s in " + " dictionary", + key); + goto out; + } + } else { + ret = glusterd_get_default_val_for_volopt( + dict, _gf_false, key, orig_key, volinfo, + &rsp.op_errstr); + if (ret && !rsp.op_errstr) { + snprintf(err_str, sizeof(err_str), + "Failed to fetch the " + "value of %s, check " + "log file for more" + " details", + key); + } + } + } } + /* Request is for a single option, explicitly set count to 1 + * in the dictionary. + */ + ret = dict_set_int32n(dict, "count", SLEN("count"), 1); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Failed to set count " + "value in the dictionary"); + goto out; + } + } else { + /* Handle the "all" volume option request */ + ret = glusterd_get_default_val_for_volopt(dict, _gf_true, NULL, NULL, + volinfo, &rsp.op_errstr); + if (ret && !rsp.op_errstr) { + snprintf(err_str, sizeof(err_str), + "Failed to fetch the value of all volume " + "options, check log file for more details"); + } + } - gf_log ("", GF_LOG_DEBUG, "returning %d", ret); - return ret; +out: + if (ret) { + if (!rsp.op_errstr) + rsp.op_errstr = err_str; + rsp.op_ret = ret; + } else { + rsp.op_errstr = ""; + rsp.op_ret = 0; + } + + ret = dict_allocate_and_serialize(dict, &rsp.dict.dict_val, + &rsp.dict.dict_len); + + glusterd_submit_reply(req, &rsp, NULL, 0, NULL, (xdrproc_t)xdr_gf_cli_rsp); + GF_FREE(rsp.dict.dict_val); + GF_FREE(key_fixed); + return ret; } int -glusterd_transport_keepalive_options_get (int *interval, int *time) +__glusterd_handle_get_vol_opt(rpcsvc_request_t *req) { - int ret = 0; - xlator_t *this = NULL; + int32_t ret = -1; + gf_cli_req cli_req = {{ + 0, + }}; + dict_t *dict = NULL; + char err_str[64] = { + 0, + }; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + GF_ASSERT(req); + + ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + snprintf(err_str, sizeof(err_str), + "Failed to decode " + "request received from cli"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, "%s", + err_str); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (cli_req.dict.dict_len) { + /* Unserialize the dictionary */ + dict = dict_new(); + + ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + "failed to " + "unserialize req-buffer to dictionary"); + snprintf(err_str, sizeof(err_str), + "Unable to decode " + "the command"); + goto out; + } else { + dict->extra_stdfree = cli_req.dict.dict_val; + } + } + ret = glusterd_get_volume_opts(req, dict); - this = THIS; - GF_ASSERT (this); +out: + if (dict) + dict_unref(dict); - ret = dict_get_int32 (this->options, - "transport.socket.keepalive-interval", - interval); - ret = dict_get_int32 (this->options, - "transport.socket.keepalive-time", - time); - return 0; + return ret; } int -glusterd_transport_inet_keepalive_options_build (dict_t **options, - const char *hostname, int port) +glusterd_handle_get_vol_opt(rpcsvc_request_t *req) { - dict_t *dict = NULL; - int32_t interval = -1; - int32_t time = -1; - int ret = 0; + return glusterd_big_locked_handler(req, __glusterd_handle_get_vol_opt); +} - GF_ASSERT (options); - GF_ASSERT (hostname); +extern struct rpc_clnt_program gd_brick_prog; - if (!port) - port = GLUSTERD_DEFAULT_PORT; - ret = rpc_transport_inet_options_build (&dict, hostname, port); - if (ret) - goto out; +static int +glusterd_print_global_options(dict_t *opts, char *key, data_t *val, void *data) +{ + FILE *fp = NULL; + + GF_VALIDATE_OR_GOTO(THIS->name, key, out); + GF_VALIDATE_OR_GOTO(THIS->name, val, out); + GF_VALIDATE_OR_GOTO(THIS->name, data, out); - glusterd_transport_keepalive_options_get (&interval, &time); + if (strcmp(key, GLUSTERD_GLOBAL_OPT_VERSION) == 0) + goto out; - if ((interval > 0) || (time > 0)) - ret = rpc_transport_keepalive_options_set (dict, interval, time); - *options = dict; + fp = (FILE *)data; + fprintf(fp, "%s: %s\n", key, val->data); out: - gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + return 0; } -int -glusterd_friend_add (const char *hoststr, int port, - glusterd_friend_sm_state_t state, - uuid_t *uuid, - struct rpc_clnt *rpc, - glusterd_peerinfo_t **friend, - gf_boolean_t restore, - glusterd_peerctx_args_t *args) +static int +glusterd_print_volume_options(dict_t *opts, char *key, data_t *val, void *data) { - int ret = 0; - glusterd_conf_t *conf = NULL; - glusterd_peerinfo_t *peerinfo = NULL; - glusterd_peerctx_t *peerctx = NULL; - gf_boolean_t is_allocated = _gf_false; - dict_t *options = NULL; + FILE *fp = NULL; - conf = THIS->private; - GF_ASSERT (conf) - GF_ASSERT (hoststr); + GF_VALIDATE_OR_GOTO(THIS->name, key, out); + GF_VALIDATE_OR_GOTO(THIS->name, val, out); + GF_VALIDATE_OR_GOTO(THIS->name, data, out); - peerctx = GF_CALLOC (1, sizeof (*peerctx), gf_gld_mt_peerctx_t); - if (!peerctx) { - ret = -1; - goto out; - } + fp = (FILE *)data; + fprintf(fp, "Volume%d.options.%s: %s\n", volcount, key, val->data); +out: + return 0; +} - if (args) - peerctx->args = *args; +static int +glusterd_print_gsync_status(FILE *fp, dict_t *gsync_dict) +{ + int ret = -1; + int gsync_count = 0; + int i = 0; + gf_gsync_status_t *status_vals = NULL; + char status_val_name[PATH_MAX] = { + 0, + }; - ret = glusterd_peerinfo_new (&peerinfo, state, uuid, hoststr); - if (ret) - goto out; - peerctx->peerinfo = peerinfo; - if (friend) - *friend = peerinfo; + GF_VALIDATE_OR_GOTO(THIS->name, fp, out); + GF_VALIDATE_OR_GOTO(THIS->name, gsync_dict, out); - if (!rpc) { - ret = glusterd_transport_inet_keepalive_options_build (&options, - hoststr, port); - if (ret) - goto out; - ret = glusterd_rpc_create (&rpc, options, - glusterd_peer_rpc_notify, - peerctx); - if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, "failed to create rpc for" - " peer %s", (char*)hoststr); - goto out; - } - is_allocated = _gf_true; - } + ret = dict_get_int32n(gsync_dict, "gsync-count", SLEN("gsync-count"), + &gsync_count); - peerinfo->rpc = rpc; + fprintf(fp, "Volume%d.gsync_count: %d\n", volcount, gsync_count); - if (!restore) - ret = glusterd_store_peerinfo (peerinfo); + if (gsync_count == 0) { + ret = 0; + goto out; + } - list_add_tail (&peerinfo->uuid_list, &conf->peers); + for (i = 0; i < gsync_count; i++) { + snprintf(status_val_name, sizeof(status_val_name), "status_value%d", i); + ret = dict_get_bin(gsync_dict, status_val_name, + (void **)&(status_vals)); + if (ret) + goto out; + + fprintf(fp, "Volume%d.pair%d.session_slave: %s\n", volcount, i + 1, + get_struct_variable(21, status_vals)); + fprintf(fp, "Volume%d.pair%d.master_node: %s\n", volcount, i + 1, + get_struct_variable(0, status_vals)); + fprintf(fp, "Volume%d.pair%d.master_volume: %s\n", volcount, i + 1, + get_struct_variable(1, status_vals)); + fprintf(fp, "Volume%d.pair%d.master_brick: %s\n", volcount, i + 1, + get_struct_variable(2, status_vals)); + fprintf(fp, "Volume%d.pair%d.slave_user: %s\n", volcount, i + 1, + get_struct_variable(3, status_vals)); + fprintf(fp, "Volume%d.pair%d.slave: %s\n", volcount, i + 1, + get_struct_variable(4, status_vals)); + fprintf(fp, "Volume%d.pair%d.slave_node: %s\n", volcount, i + 1, + get_struct_variable(5, status_vals)); + fprintf(fp, "Volume%d.pair%d.status: %s\n", volcount, i + 1, + get_struct_variable(6, status_vals)); + fprintf(fp, "Volume%d.pair%d.crawl_status: %s\n", volcount, i + 1, + get_struct_variable(7, status_vals)); + fprintf(fp, "Volume%d.pair%d.last_synced: %s\n", volcount, i + 1, + get_struct_variable(8, status_vals)); + fprintf(fp, "Volume%d.pair%d.entry: %s\n", volcount, i + 1, + get_struct_variable(9, status_vals)); + fprintf(fp, "Volume%d.pair%d.data: %s\n", volcount, i + 1, + get_struct_variable(10, status_vals)); + fprintf(fp, "Volume%d.pair%d.meta: %s\n", volcount, i + 1, + get_struct_variable(11, status_vals)); + fprintf(fp, "Volume%d.pair%d.failures: %s\n", volcount, i + 1, + get_struct_variable(12, status_vals)); + fprintf(fp, "Volume%d.pair%d.checkpoint_time: %s\n", volcount, i + 1, + get_struct_variable(13, status_vals)); + fprintf(fp, "Volume%d.pair%d.checkpoint_completed: %s\n", volcount, + i + 1, get_struct_variable(14, status_vals)); + fprintf(fp, "Volume%d.pair%d.checkpoint_completion_time: %s\n", + volcount, i + 1, get_struct_variable(15, status_vals)); + } out: - if (ret) { - if (peerctx) - GF_FREE (peerctx); - if (is_allocated && rpc) { - (void) rpc_clnt_unref (rpc); - } - if (peerinfo) { - peerinfo->rpc = NULL; - (void) glusterd_friend_cleanup (peerinfo); - } - } - - gf_log ("glusterd", GF_LOG_INFO, "connect returned %d", ret); - return ret; + return ret; } -int -glusterd_probe_begin (rpcsvc_request_t *req, const char *hoststr, int port) +static int +glusterd_print_gsync_status_by_vol(FILE *fp, glusterd_volinfo_t *volinfo) { - int ret = -1; - glusterd_peerinfo_t *peerinfo = NULL; - glusterd_peerctx_args_t args = {0}; - glusterd_friend_sm_event_t *event = NULL; - - GF_ASSERT (hoststr); - - ret = glusterd_friend_find (NULL, (char *)hoststr, &peerinfo); + int ret = -1; + dict_t *gsync_rsp_dict = NULL; + char my_hostname[256] = { + 0, + }; + + xlator_t *this = THIS; + GF_ASSERT(this); + + GF_VALIDATE_OR_GOTO(THIS->name, volinfo, out); + GF_VALIDATE_OR_GOTO(THIS->name, fp, out); + + gsync_rsp_dict = dict_new(); + if (!gsync_rsp_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); + goto out; + } + + ret = gethostname(my_hostname, sizeof(my_hostname)); + if (ret) { + /* stick to N/A */ + (void)strcpy(my_hostname, "N/A"); + } + + ret = glusterd_get_gsync_status_mst(volinfo, gsync_rsp_dict, my_hostname); + /* Ignoring ret as above function always returns ret = 0 */ + + ret = glusterd_print_gsync_status(fp, gsync_rsp_dict); +out: + if (gsync_rsp_dict) + dict_unref(gsync_rsp_dict); + return ret; +} +static int +glusterd_print_snapinfo_by_vol(FILE *fp, glusterd_volinfo_t *volinfo, + int volcount) +{ + int ret = -1; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_volinfo_t *tmp_vol = NULL; + glusterd_snap_t *snapinfo = NULL; + int snapcount = 0; + char timestr[GF_TIMESTR_SIZE] = { + 0, + }; + char snap_status_str[STATUS_STRLEN] = { + 0, + }; + + GF_VALIDATE_OR_GOTO(THIS->name, volinfo, out); + GF_VALIDATE_OR_GOTO(THIS->name, fp, out); + + cds_list_for_each_entry_safe(snap_vol, tmp_vol, &volinfo->snap_volumes, + snapvol_list) + { + snapcount++; + snapinfo = snap_vol->snapshot; + + ret = glusterd_get_snap_status_str(snapinfo, snap_status_str); if (ret) { - gf_log ("glusterd", GF_LOG_INFO, "Unable to find peerinfo" - " for host: %s (%d)", hoststr, port); - args.mode = GD_MODE_ON; - args.req = req; - ret = glusterd_friend_add ((char *)hoststr, port, - GD_FRIEND_STATE_DEFAULT, - NULL, NULL, &peerinfo, 0, &args); - if ((!ret) && (!peerinfo->connected)) { - ret = GLUSTERD_CONNECTION_AWAITED; - } + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_STATE_STR_GET_FAILED, + "Failed to get status for snapshot: %s", snapinfo->snapname); - } else if (peerinfo->connected && - (GD_FRIEND_STATE_BEFRIENDED == peerinfo->state.state)) { - ret = glusterd_friend_hostname_update (peerinfo, (char*)hoststr, - _gf_false); - if (ret) - goto out; - //this is just to rename so inject local acc for cluster update - ret = glusterd_friend_sm_new_event (GD_FRIEND_EVENT_LOCAL_ACC, - &event); - if (!ret) { - event->peerinfo = peerinfo; - ret = glusterd_friend_sm_inject_event (event); - glusterd_xfer_cli_probe_resp (req, 0, GF_PROBE_SUCCESS, - (char*)hoststr, port); - } - } else { - glusterd_xfer_cli_probe_resp (req, 0, GF_PROBE_FRIEND, - (char*)hoststr, port); + goto out; } + gf_time_fmt(timestr, sizeof timestr, snapinfo->time_stamp, + gf_timefmt_FT); + + fprintf(fp, "Volume%d.snapshot%d.name: %s\n", volcount, snapcount, + snapinfo->snapname); + fprintf(fp, "Volume%d.snapshot%d.id: %s\n", volcount, snapcount, + uuid_utoa(snapinfo->snap_id)); + fprintf(fp, "Volume%d.snapshot%d.time: %s\n", volcount, snapcount, + timestr); + + if (snapinfo->description) + fprintf(fp, "Volume%d.snapshot%d.description: %s\n", volcount, + snapcount, snapinfo->description); + fprintf(fp, "Volume%d.snapshot%d.status: %s\n", volcount, snapcount, + snap_status_str); + } + ret = 0; out: - gf_log ("", GF_LOG_DEBUG, "returning %d", ret); - return ret; + return ret; } -int -glusterd_deprobe_begin (rpcsvc_request_t *req, const char *hoststr, int port, - uuid_t uuid) +static int +glusterd_print_client_details(FILE *fp, dict_t *dict, + glusterd_volinfo_t *volinfo, int volcount, + glusterd_brickinfo_t *brickinfo, int brickcount) { - int ret = -1; - glusterd_peerinfo_t *peerinfo = NULL; - glusterd_friend_sm_event_t *event = NULL; - glusterd_probe_ctx_t *ctx = NULL; - - GF_ASSERT (hoststr); - GF_ASSERT (req); + int ret = -1; + xlator_t *this = NULL; + int brick_index = -1; + int client_count = 0; + char key[64] = { + 0, + }; + int keylen; + char *clientname = NULL; + uint64_t bytesread = 0; + uint64_t byteswrite = 0; + uint32_t opversion = 0; + + glusterd_pending_node_t *pending_node = NULL; + rpc_clnt_t *rpc = NULL; + struct syncargs args = { + 0, + }; + gd1_mgmt_brick_op_req *brick_req = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + + GF_VALIDATE_OR_GOTO(this->name, dict, out); + + if (gf_uuid_compare(brickinfo->uuid, MY_UUID) || + !glusterd_is_brick_started(brickinfo)) { + ret = 0; + goto out; + } - ret = glusterd_friend_find (uuid, (char *)hoststr, &peerinfo); + brick_index++; + pending_node = GF_CALLOC(1, sizeof(*pending_node), + gf_gld_mt_pending_node_t); + if (!pending_node) { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "Unable to allocate memory"); + goto out; + } - if (ret) { - gf_log ("glusterd", GF_LOG_INFO, "Unable to find peerinfo" - " for host: %s %d", hoststr, port); - goto out; - } + pending_node->node = brickinfo; + pending_node->type = GD_NODE_BRICK; + pending_node->index = brick_index; - if (!peerinfo->rpc) { - //handle this case - goto out; - } + rpc = glusterd_pending_node_get_rpc(pending_node); + if (!rpc) { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RPC_FAILURE, + "Failed to retrieve rpc object"); + goto out; + } - ret = glusterd_friend_sm_new_event - (GD_FRIEND_EVENT_INIT_REMOVE_FRIEND, &event); + brick_req = GF_CALLOC(1, sizeof(*brick_req), gf_gld_mt_mop_brick_req_t); + if (!brick_req) { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "Unable to allocate memory"); + goto out; + } + + brick_req->op = GLUSTERD_BRICK_STATUS; + brick_req->name = ""; + brick_req->dict.dict_val = NULL; + brick_req->dict.dict_len = 0; + + ret = dict_set_strn(dict, "brick-name", SLEN("brick-name"), + brickinfo->path); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=brick-name", NULL); + goto out; + } + + ret = dict_set_int32n(dict, "cmd", SLEN("cmd"), GF_CLI_STATUS_CLIENTS); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=cmd", NULL); + goto out; + } + + ret = dict_set_strn(dict, "volname", SLEN("volname"), volinfo->volname); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=volname", NULL); + goto out; + } + + ret = dict_allocate_and_serialize(dict, &brick_req->input.input_val, + &brick_req->input.input_len); + if (ret) + goto out; + + GD_SYNCOP(rpc, (&args), NULL, gd_syncop_brick_op_cbk, brick_req, + &gd_brick_prog, brick_req->op, xdr_gd1_mgmt_brick_op_req); + + if (args.op_ret) + goto out; + + ret = dict_get_int32n(args.dict, "clientcount", SLEN("clientcount"), + &client_count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Couldn't get client count"); + goto out; + } + + fprintf(fp, "Volume%d.Brick%d.client_count: %d\n", volcount, brickcount, + client_count); + + if (client_count == 0) { + ret = 0; + goto out; + } + int i; + for (i = 1; i <= client_count; i++) { + keylen = snprintf(key, sizeof(key), "client%d.hostname", i - 1); + ret = dict_get_strn(args.dict, key, keylen, &clientname); if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, - "Unable to get new event"); - return ret; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get client hostname"); + goto out; } - ctx = GF_CALLOC (1, sizeof(*ctx), gf_gld_mt_probe_ctx_t); + snprintf(key, sizeof(key), "Client%d.hostname", i); + fprintf(fp, "Volume%d.Brick%d.%s: %s\n", volcount, brickcount, key, + clientname); - if (!ctx) { - goto out; + snprintf(key, sizeof(key), "client%d.bytesread", i - 1); + ret = dict_get_uint64(args.dict, key, &bytesread); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get bytesread from client"); + goto out; } - ctx->hostname = gf_strdup (hoststr); - ctx->port = port; - ctx->req = req; - - event->ctx = ctx; + snprintf(key, sizeof(key), "Client%d.bytesread", i); + fprintf(fp, "Volume%d.Brick%d.%s: %" PRIu64 "\n", volcount, brickcount, + key, bytesread); - event->peerinfo = peerinfo; + snprintf(key, sizeof(key), "client%d.byteswrite", i - 1); + ret = dict_get_uint64(args.dict, key, &byteswrite); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get byteswrite from client"); + goto out; + } - ret = glusterd_friend_sm_inject_event (event); + snprintf(key, sizeof(key), "Client%d.byteswrite", i); + fprintf(fp, "Volume%d.Brick%d.%s: %" PRIu64 "\n", volcount, brickcount, + key, byteswrite); + snprintf(key, sizeof(key), "client%d.opversion", i - 1); + ret = dict_get_uint32(args.dict, key, &opversion); if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, "Unable to inject event %d, " - "ret = %d", event->event, ret); - goto out; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get client opversion"); + goto out; } + snprintf(key, sizeof(key), "Client%d.opversion", i); + fprintf(fp, "Volume%d.Brick%d.%s: %" PRIu32 "\n", volcount, brickcount, + key, opversion); + } + out: - return ret; + if (pending_node) + GF_FREE(pending_node); + + if (brick_req) { + if (brick_req->input.input_val) + GF_FREE(brick_req->input.input_val); + GF_FREE(brick_req); + } + if (args.dict) + dict_unref(args.dict); + if (args.errstr) + GF_FREE(args.errstr); + + return ret; } - -int -glusterd_xfer_friend_remove_resp (rpcsvc_request_t *req, char *hostname, int port) +static int +glusterd_get_state(rpcsvc_request_t *req, dict_t *dict) { - gd1_mgmt_friend_rsp rsp = {{0}, }; - int32_t ret = -1; - xlator_t *this = NULL; - glusterd_conf_t *conf = NULL; + int32_t ret = -1; + gf_cli_rsp rsp = { + 0, + }; + FILE *fp = NULL; + DIR *dp = NULL; + char err_str[2048] = { + 0, + }; + glusterd_conf_t *priv = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_peer_hostname_t *peer_hostname_info = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + xlator_t *this = NULL; + dict_t *vol_all_opts = NULL; + struct statvfs brickstat = {0}; + char *odir = NULL; + char *filename = NULL; + char *ofilepath = NULL; + char *tmp_str = NULL; + int count = 0; + int count_bkp = 0; + int odirlen = 0; + time_t now = 0; + char timestamp[16] = { + 0, + }; + uint32_t get_state_cmd = 0; + uint64_t memtotal = 0; + uint64_t memfree = 0; + char id_str[64] = { + 0, + }; + + char *vol_type_str = NULL; + + char transport_type_str[STATUS_STRLEN] = { + 0, + }; + char quorum_status_str[STATUS_STRLEN] = { + 0, + }; + char rebal_status_str[STATUS_STRLEN] = { + 0, + }; + char vol_status_str[STATUS_STRLEN] = { + 0, + }; + char brick_status_str[STATUS_STRLEN] = { + 0, + }; + this = THIS; + GF_VALIDATE_OR_GOTO(THIS->name, this, out); + + priv = THIS->private; + GF_VALIDATE_OR_GOTO(this->name, priv, out); + + GF_VALIDATE_OR_GOTO(this->name, dict, out); + + ret = dict_get_strn(dict, "odir", SLEN("odir"), &tmp_str); + if (ret) { + odirlen = gf_asprintf(&odir, "%s", "/var/run/gluster/"); + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_DICT_GET_FAILED, + "Default output directory: %s", odir); + } else { + odirlen = gf_asprintf(&odir, "%s", tmp_str); + } + + dp = sys_opendir(odir); + if (dp) { + sys_closedir(dp); + } else { + if (errno == ENOENT) { + snprintf(err_str, sizeof(err_str), + "Output directory %s does not exist.", odir); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", + err_str); + } else if (errno == ENOTDIR) { + snprintf(err_str, sizeof(err_str), + "Output directory " + "does not exist. %s points to a file.", + odir); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", + err_str); + } + + GF_FREE(odir); + ret = -1; + goto out; + } + + ret = dict_get_strn(dict, "filename", SLEN("filename"), &tmp_str); + if (ret) { + now = gf_time(); + strftime(timestamp, sizeof(timestamp), "%Y%m%d_%H%M%S", + localtime(&now)); + gf_asprintf(&filename, "%s_%s", "glusterd_state", timestamp); + + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_DICT_GET_FAILED, + "Default filename: %s", filename); + } else { + gf_asprintf(&filename, "%s", tmp_str); + } + + ret = gf_asprintf(&ofilepath, "%s%s%s", odir, + ((odir[odirlen - 1] != '/') ? "/" : ""), filename); + + if (ret < 0) { + GF_FREE(odir); + GF_FREE(filename); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to get the output path"); + ret = -1; + goto out; + } + GF_FREE(odir); + GF_FREE(filename); + + ret = dict_set_dynstrn(dict, "ofilepath", SLEN("ofilepath"), ofilepath); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to set output path"); + goto out; + } + + fp = fopen(ofilepath, "w"); + if (!fp) { + snprintf(err_str, sizeof(err_str), "Failed to open file at %s", + ofilepath); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", + err_str); + ret = -1; + goto out; + } - GF_ASSERT (hostname); + ret = dict_get_uint32(dict, "getstate-cmd", &get_state_cmd); + if (ret) { + gf_msg_debug(this->name, 0, "get-state command type not set"); + ret = 0; + } - rsp.op_ret = 0; - this = THIS; - GF_ASSERT (this); + if (get_state_cmd == GF_CLI_GET_STATE_VOLOPTS) { + fprintf(fp, "[Volume Options]\n"); + cds_list_for_each_entry(volinfo, &priv->volumes, vol_list) + { + fprintf(fp, "Volume%d.name: %s\n", ++count, volinfo->volname); - conf = this->private; + volcount = count; + vol_all_opts = dict_new(); - uuid_copy (rsp.uuid, conf->uuid); - rsp.hostname = hostname; - rsp.port = port; - ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - gd_xdr_serialize_mgmt_friend_rsp); + ret = glusterd_get_default_val_for_volopt( + vol_all_opts, _gf_true, NULL, NULL, volinfo, &rsp.op_errstr); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_OPTS_IMPORT_FAIL, + "Failed to " + "fetch the value of all volume options " + "for volume %s", + volinfo->volname); + if (vol_all_opts) + dict_unref(vol_all_opts); + continue; + } - gf_log ("glusterd", GF_LOG_INFO, - "Responded to %s (%d), ret: %d", hostname, port, ret); - return ret; -} + dict_foreach(vol_all_opts, glusterd_print_volume_options, fp); + if (vol_all_opts) + dict_unref(vol_all_opts); + } + ret = 0; + goto out; + } -int -glusterd_xfer_friend_add_resp (rpcsvc_request_t *req, char *hostname, int port, - int32_t op_ret, int32_t op_errno) -{ - gd1_mgmt_friend_rsp rsp = {{0}, }; - int32_t ret = -1; - xlator_t *this = NULL; - glusterd_conf_t *conf = NULL; + fprintf(fp, "[Global]\n"); - GF_ASSERT (hostname); + uuid_utoa_r(priv->uuid, id_str); + fprintf(fp, "MYUUID: %s\n", id_str); - this = THIS; - GF_ASSERT (this); + fprintf(fp, "op-version: %d\n", priv->op_version); - conf = this->private; + fprintf(fp, "\n[Global options]\n"); - uuid_copy (rsp.uuid, conf->uuid); - rsp.op_ret = op_ret; - rsp.op_errno = op_errno; - rsp.hostname = gf_strdup (hostname); - rsp.port = port; + if (priv->opts) + dict_foreach(priv->opts, glusterd_print_global_options, fp); - ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - gd_xdr_serialize_mgmt_friend_rsp); + fprintf(fp, "\n[Peers]\n"); + RCU_READ_LOCK; - gf_log ("glusterd", GF_LOG_INFO, - "Responded to %s (%d), ret: %d", hostname, port, ret); - if (rsp.hostname) - GF_FREE (rsp.hostname) - return ret; -} + cds_list_for_each_entry_rcu(peerinfo, &priv->peers, uuid_list) + { + fprintf(fp, "Peer%d.primary_hostname: %s\n", ++count, + peerinfo->hostname); + fprintf(fp, "Peer%d.uuid: %s\n", count, gd_peer_uuid_str(peerinfo)); + fprintf(fp, "Peer%d.state: %s\n", count, + glusterd_friend_sm_state_name_get(peerinfo->state.state)); + fprintf(fp, "Peer%d.connected: %s\n", count, + peerinfo->connected ? "Connected" : "Disconnected"); -int -glusterd_xfer_cli_probe_resp (rpcsvc_request_t *req, int32_t op_ret, - int32_t op_errno, char *hostname, int port) -{ - gf1_cli_probe_rsp rsp = {0, }; - int32_t ret = -1; + fprintf(fp, "Peer%d.othernames: ", count); + count_bkp = 0; + cds_list_for_each_entry(peer_hostname_info, &peerinfo->hostnames, + hostname_list) + { + if (strcmp(peerinfo->hostname, peer_hostname_info->hostname) == 0) + continue; - GF_ASSERT (req); + if (count_bkp > 0) + fprintf(fp, ","); - rsp.op_ret = op_ret; - rsp.op_errno = op_errno; - rsp.hostname = hostname; - rsp.port = port; + fprintf(fp, "%s", peer_hostname_info->hostname); + count_bkp++; + } + count_bkp = 0; + fprintf(fp, "\n"); + } + RCU_READ_UNLOCK; - ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - gf_xdr_serialize_cli_probe_rsp); + count = 0; + fprintf(fp, "\n[Volumes]\n"); - gf_log ("glusterd", GF_LOG_INFO, "Responded to CLI, ret: %d",ret); + cds_list_for_each_entry(volinfo, &priv->volumes, vol_list) + { + ret = glusterd_volume_get_type_str(volinfo, &vol_type_str); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STATE_STR_GET_FAILED, + "Failed to get type for volume: %s", volinfo->volname); + goto out; + } - return ret; -} + ret = glusterd_volume_get_status_str(volinfo, vol_status_str); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STATE_STR_GET_FAILED, + "Failed to get status for volume: %s", volinfo->volname); + goto out; + } -int -glusterd_xfer_cli_deprobe_resp (rpcsvc_request_t *req, int32_t op_ret, - int32_t op_errno, char *hostname) -{ - gf1_cli_deprobe_rsp rsp = {0, }; - int32_t ret = -1; + ret = glusterd_volume_get_transport_type_str(volinfo, + transport_type_str); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STATE_STR_GET_FAILED, + "Failed to get transport type for volume: %s", + volinfo->volname); + goto out; + } - GF_ASSERT (req); + ret = glusterd_volume_get_quorum_status_str(volinfo, quorum_status_str); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STATE_STR_GET_FAILED, + "Failed to get quorum status for volume: %s", + volinfo->volname); + goto out; + } - rsp.op_ret = op_ret; - rsp.op_errno = op_errno; - rsp.hostname = hostname; + ret = glusterd_volume_get_rebalance_status_str(volinfo, + rebal_status_str); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STATE_STR_GET_FAILED, + "Failed to get rebalance status for volume: %s", + volinfo->volname); + goto out; + } - ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - gf_xdr_serialize_cli_deprobe_rsp); + fprintf(fp, "Volume%d.name: %s\n", ++count, volinfo->volname); - gf_log ("glusterd", GF_LOG_INFO, "Responded to CLI, ret: %d",ret); + uuid_utoa_r(volinfo->volume_id, id_str); + fprintf(fp, "Volume%d.id: %s\n", count, id_str); - return ret; -} + fprintf(fp, "Volume%d.type: %s\n", count, vol_type_str); + fprintf(fp, "Volume%d.transport_type: %s\n", count, transport_type_str); + fprintf(fp, "Volume%d.status: %s\n", count, vol_status_str); + fprintf(fp, "Volume%d.profile_enabled: %d\n", count, + glusterd_is_profile_on(volinfo)); + fprintf(fp, "Volume%d.brickcount: %d\n", count, volinfo->brick_count); -int32_t -glusterd_list_friends (rpcsvc_request_t *req, dict_t *dict, int32_t flags) -{ - int32_t ret = -1; - glusterd_conf_t *priv = NULL; - glusterd_peerinfo_t *entry = NULL; - int32_t count = 0; - dict_t *friends = NULL; - gf1_cli_peer_list_rsp rsp = {0,}; - - priv = THIS->private; - GF_ASSERT (priv); - - if (!list_empty (&priv->peers)) { - friends = dict_new (); - if (!friends) { - gf_log ("", GF_LOG_WARNING, "Out of Memory"); - goto out; + count_bkp = count; + count = 0; + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + fprintf(fp, "Volume%d.Brick%d.path: %s:%s\n", count_bkp, ++count, + brickinfo->hostname, brickinfo->path); + fprintf(fp, "Volume%d.Brick%d.hostname: %s\n", count_bkp, count, + brickinfo->hostname); + /* Determine which one is the arbiter brick */ + if (volinfo->arbiter_count == 1) { + if (count % volinfo->replica_count == 0) { + fprintf(fp, + "Volume%d.Brick%d." + "is_arbiter: 1\n", + count_bkp, count); } - } else { - ret = 0; - goto out; - } + } + /* Add following information only for bricks + * local to current node */ + if (gf_uuid_compare(brickinfo->uuid, MY_UUID)) + continue; + fprintf(fp, "Volume%d.Brick%d.port: %d\n", count_bkp, count, + brickinfo->port); + fprintf(fp, "Volume%d.Brick%d.rdma_port: %d\n", count_bkp, count, + brickinfo->rdma_port); + fprintf(fp, "Volume%d.Brick%d.port_registered: %d\n", count_bkp, + count, brickinfo->port_registered); + glusterd_brick_get_status_str(brickinfo, brick_status_str); + fprintf(fp, "Volume%d.Brick%d.status: %s\n", count_bkp, count, + brick_status_str); + + ret = sys_statvfs(brickinfo->path, &brickstat); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "statfs error: %s ", strerror(errno)); + memfree = 0; + memtotal = 0; + } else { + memfree = brickstat.f_bfree * brickstat.f_bsize; + memtotal = brickstat.f_blocks * brickstat.f_bsize; + } + + fprintf(fp, "Volume%d.Brick%d.spacefree: %" PRIu64 "Bytes\n", + count_bkp, count, memfree); + fprintf(fp, "Volume%d.Brick%d.spacetotal: %" PRIu64 "Bytes\n", + count_bkp, count, memtotal); + + if (get_state_cmd != GF_CLI_GET_STATE_DETAIL) + continue; + + ret = glusterd_print_client_details(fp, dict, volinfo, count_bkp, + brickinfo, count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_CLIENTS_GET_STATE_FAILED, + "Failed to get client details"); + goto out; + } + } + + count = count_bkp; + + ret = glusterd_print_snapinfo_by_vol(fp, volinfo, count); + if (ret) + goto out; + + fprintf(fp, "Volume%d.snap_count: %" PRIu64 "\n", count, + volinfo->snap_count); + fprintf(fp, "Volume%d.stripe_count: %d\n", count, + volinfo->stripe_count); + fprintf(fp, "Volume%d.replica_count: %d\n", count, + volinfo->replica_count); + fprintf(fp, "Volume%d.subvol_count: %d\n", count, + volinfo->subvol_count); + fprintf(fp, "Volume%d.arbiter_count: %d\n", count, + volinfo->arbiter_count); + fprintf(fp, "Volume%d.disperse_count: %d\n", count, + volinfo->disperse_count); + fprintf(fp, "Volume%d.redundancy_count: %d\n", count, + volinfo->redundancy_count); + fprintf(fp, "Volume%d.quorum_status: %s\n", count, quorum_status_str); + + fprintf(fp, "Volume%d.snapd_svc.online_status: %s\n", count, + volinfo->snapd.svc.online ? "Online" : "Offline"); + fprintf(fp, "Volume%d.snapd_svc.inited: %s\n", count, + volinfo->snapd.svc.inited ? "True" : "False"); + + uuid_utoa_r(volinfo->rebal.rebalance_id, id_str); + char *rebal_data = gf_uint64_2human_readable( + volinfo->rebal.rebalance_data); + + fprintf(fp, "Volume%d.rebalance.id: %s\n", count, id_str); + fprintf(fp, "Volume%d.rebalance.status: %s\n", count, rebal_status_str); + fprintf(fp, "Volume%d.rebalance.failures: %" PRIu64 "\n", count, + volinfo->rebal.rebalance_failures); + fprintf(fp, "Volume%d.rebalance.skipped: %" PRIu64 "\n", count, + volinfo->rebal.skipped_files); + fprintf(fp, "Volume%d.rebalance.lookedup: %" PRIu64 "\n", count, + volinfo->rebal.lookedup_files); + fprintf(fp, "Volume%d.rebalance.files: %" PRIu64 "\n", count, + volinfo->rebal.rebalance_files); + fprintf(fp, "Volume%d.rebalance.data: %s\n", count, rebal_data); + fprintf(fp, "Volume%d.time_left: %" PRIu64 "\n", count, + volinfo->rebal.time_left); + + GF_FREE(rebal_data); + + fprintf(fp, "Volume%d.shd_svc.online_status: %s\n", count, + volinfo->shd.svc.online ? "Online" : "Offline"); + fprintf(fp, "Volume%d.shd_svc.inited: %s\n", count, + volinfo->shd.svc.inited ? "True" : "False"); + + if (volinfo->rep_brick.src_brick && volinfo->rep_brick.dst_brick) { + fprintf(fp, "Volume%d.replace_brick.src: %s:%s\n", count, + volinfo->rep_brick.src_brick->hostname, + volinfo->rep_brick.src_brick->path); + fprintf(fp, "Volume%d.replace_brick.dest: %s:%s\n", count, + volinfo->rep_brick.dst_brick->hostname, + volinfo->rep_brick.dst_brick->path); + } + + volcount = count; + ret = glusterd_print_gsync_status_by_vol(fp, volinfo); + if (ret) + goto out; - if (flags == GF_CLI_LIST_ALL) { - list_for_each_entry (entry, &priv->peers, uuid_list) { - count++; - ret = glusterd_add_peer_detail_to_dict (entry, - friends, count); - if (ret) - goto out; + if (volinfo->dict) + dict_foreach(volinfo->dict, glusterd_print_volume_options, fp); - } + fprintf(fp, "\n"); + } - ret = dict_set_int32 (friends, "count", count); + count = 0; - if (ret) - goto out; - } + fprintf(fp, "\n[Services]\n"); +#ifdef BUILD_GNFS + if (priv->nfs_svc.inited) { + fprintf(fp, "svc%d.name: %s\n", ++count, priv->nfs_svc.name); + fprintf(fp, "svc%d.online_status: %s\n\n", count, + priv->nfs_svc.online ? "Online" : "Offline"); + } +#endif + if (priv->bitd_svc.inited) { + fprintf(fp, "svc%d.name: %s\n", ++count, priv->bitd_svc.name); + fprintf(fp, "svc%d.online_status: %s\n\n", count, + priv->bitd_svc.online ? "Online" : "Offline"); + } + + if (priv->scrub_svc.inited) { + fprintf(fp, "svc%d.name: %s\n", ++count, priv->scrub_svc.name); + fprintf(fp, "svc%d.online_status: %s\n\n", count, + priv->scrub_svc.online ? "Online" : "Offline"); + } + + if (priv->quotad_svc.inited) { + fprintf(fp, "svc%d.name: %s\n", ++count, priv->quotad_svc.name); + fprintf(fp, "svc%d.online_status: %s\n\n", count, + priv->quotad_svc.online ? "Online" : "Offline"); + } + + fprintf(fp, "\n[Misc]\n"); + if (priv->pmap) { + fprintf(fp, "Base port: %d\n", priv->pmap->base_port); + fprintf(fp, "Last allocated port: %d\n", priv->pmap->last_alloc); + } +out: - ret = dict_allocate_and_serialize (friends, &rsp.friends.friends_val, - (size_t *)&rsp.friends.friends_len); + if (fp) + fclose(fp); - if (ret) - goto out; + rsp.op_ret = ret; + if (rsp.op_errstr == NULL) + rsp.op_errstr = err_str; - ret = 0; -out: + ret = dict_allocate_and_serialize(dict, &rsp.dict.dict_val, + &rsp.dict.dict_len); + glusterd_to_cli(req, &rsp, NULL, 0, NULL, (xdrproc_t)xdr_gf_cli_rsp, dict); + GF_FREE(rsp.dict.dict_val); - if (friends) - dict_unref (friends); + return ret; +} - rsp.op_ret = ret; +static int +__glusterd_handle_get_state(rpcsvc_request_t *req) +{ + int32_t ret = -1; + gf_cli_req cli_req = { + { + 0, + }, + }; + dict_t *dict = NULL; + char err_str[64] = { + 0, + }; + xlator_t *this = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO(THIS->name, this, out); + GF_VALIDATE_OR_GOTO(this->name, req, out); + + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_DAEMON_STATE_REQ_RCVD, + "Received request to get state for glusterd"); + + ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + snprintf(err_str, sizeof(err_str), + "Failed to decode " + "request received from cli"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, "%s", + err_str); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (cli_req.dict.dict_len) { + /* Unserialize the dictionary */ + dict = dict_new(); + + ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + "failed to " + "unserialize req-buffer to dictionary"); + snprintf(err_str, sizeof(err_str), + "Unable to decode" + " the command"); + goto out; + } else { + dict->extra_stdfree = cli_req.dict.dict_val; + } + } - ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - gf_xdr_serialize_cli_peer_list_rsp); - if (rsp.friends.friends_val) - GF_FREE (rsp.friends.friends_val); + ret = glusterd_get_state(req, dict); - return ret; +out: + if (dict && ret) { + /* + * When glusterd_to_cli (called from glusterd_get_state) + * succeeds, it frees the dict for us, so this would be a + * double free, but in other cases it's our responsibility. + */ + dict_unref(dict); + } + return ret; } -int32_t -glusterd_get_volumes (rpcsvc_request_t *req, dict_t *dict, int32_t flags) -{ - int32_t ret = -1; - glusterd_conf_t *priv = NULL; - glusterd_volinfo_t *entry = NULL; - int32_t count = 0; - dict_t *volumes = NULL; - gf1_cli_get_vol_rsp rsp = {0,}; - char *volname = NULL; - - priv = THIS->private; - GF_ASSERT (priv); - - volumes = dict_new (); - if (!volumes) { - gf_log ("", GF_LOG_WARNING, "Out of Memory"); - goto out; - } +int +glusterd_handle_get_state(rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler(req, __glusterd_handle_get_state); +} - if (list_empty (&priv->volumes)) { - ret = 0; - goto respond; - } +static int +get_brickinfo_from_brickid(char *brickid, glusterd_brickinfo_t **brickinfo) +{ + glusterd_volinfo_t *volinfo = NULL; + char *volid_str = NULL; + char *brick = NULL; + char *brickid_dup = NULL; + uuid_t volid = {0}; + int ret = -1; + + xlator_t *this = THIS; + GF_ASSERT(this); + + brickid_dup = gf_strdup(brickid); + if (!brickid_dup) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED, + "brick_id=%s", brickid, NULL); + goto out; + } + + volid_str = brickid_dup; + brick = strchr(brickid_dup, ':'); + if (!volid_str) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL); + goto out; + } + + if (!brick) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL); + goto out; + } + + *brick = '\0'; + brick++; + gf_uuid_parse(volid_str, volid); + ret = glusterd_volinfo_find_by_volume_id(volid, &volinfo); + if (ret) { + /* Check if it a snapshot volume */ + ret = glusterd_snap_volinfo_find_by_volume_id(volid, &volinfo); + if (ret) + goto out; + } - if (flags == GF_CLI_GET_VOLUME_ALL) { - list_for_each_entry (entry, &priv->volumes, vol_list) { - ret = glusterd_add_volume_detail_to_dict (entry, - volumes, count); - if (ret) - goto respond; + ret = glusterd_volume_brickinfo_get_by_brick(brick, volinfo, brickinfo, + _gf_false); + if (ret) + goto out; - count++; + ret = 0; +out: + GF_FREE(brickid_dup); + return ret; +} - } +static int gd_stale_rpc_disconnect_log; - } else if (flags == GF_CLI_GET_NEXT_VOLUME) { - ret = dict_get_str (dict, "volname", &volname); +int +__glusterd_brick_rpc_notify(struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data) +{ + char *brickid = NULL; + int ret = 0; + glusterd_conf_t *conf = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + int32_t pid = -1; + glusterd_brickinfo_t *brickinfo_tmp = NULL; + glusterd_brick_proc_t *brick_proc = NULL; + char pidfile[PATH_MAX] = {0}; + char *brickpath = NULL; + gf_boolean_t is_service_running = _gf_true; + + brickid = mydata; + if (!brickid) + return 0; - if (ret) { - if (priv->volumes.next) { - entry = list_entry (priv->volumes.next, - typeof (*entry), - vol_list); - } - } else { - ret = glusterd_volinfo_find (volname, &entry); - if (ret) - goto respond; - entry = list_entry (entry->vol_list.next, - typeof (*entry), - vol_list); - } + ret = get_brickinfo_from_brickid(brickid, &brickinfo); + if (ret) + return 0; - if (&entry->vol_list == &priv->volumes) { - goto respond; - } else { - ret = glusterd_add_volume_detail_to_dict (entry, - volumes, count); - if (ret) - goto respond; + this = THIS; + GF_ASSERT(this); + conf = this->private; + GF_ASSERT(conf); - count++; + switch (event) { + case RPC_CLNT_CONNECT: + ret = get_volinfo_from_brickid(brickid, &volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, + "Failed to get volinfo from " + "brickid(%s)", + brickid); + goto out; + } + /* If a node on coming back up, already starts a brick + * before the handshake, and the notification comes after + * the handshake is done, then we need to check if this + * is a restored brick with a snapshot pending. If so, we + * need to stop the brick + */ + if (brickinfo->snap_status == -1) { + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_SNAPSHOT_PENDING, + "Snapshot is pending on %s:%s. " + "Hence not starting the brick", + brickinfo->hostname, brickinfo->path); + ret = glusterd_brick_stop(volinfo, brickinfo, _gf_false); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_STOP_FAIL, + "Unable to stop %s:%s", brickinfo->hostname, + brickinfo->path); + goto out; } - } else if (flags == GF_CLI_GET_VOLUME) { - ret = dict_get_str (dict, "volname", &volname); - if (ret) - goto respond; - ret = glusterd_volinfo_find (volname, &entry); - if (ret) - goto respond; + break; + } + gf_msg_debug(this->name, 0, "Connected to %s:%s", + brickinfo->hostname, brickinfo->path); - ret = glusterd_add_volume_detail_to_dict (entry, - volumes, count); - if (ret) - goto respond; + glusterd_set_brick_status(brickinfo, GF_BRICK_STARTED); - count++; - } + gf_event(EVENT_BRICK_CONNECTED, "peer=%s;volume=%s;brick=%s", + brickinfo->hostname, volinfo->volname, brickinfo->path); -respond: - ret = dict_set_int32 (volumes, "count", count); - if (ret) - goto out; - ret = dict_allocate_and_serialize (volumes, &rsp.volumes.volumes_val, - (size_t *)&rsp.volumes.volumes_len); + ret = default_notify(this, GF_EVENT_CHILD_UP, NULL); - if (ret) - goto out; - - ret = 0; -out: - rsp.op_ret = ret; + break; - ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - gf_xdr_serialize_cli_peer_list_rsp); + case RPC_CLNT_DISCONNECT: + if (rpc != brickinfo->rpc) { + /* + * There used to be a bunch of races in the volume + * start/stop code that could result in us getting here + * and setting the brick status incorrectly. Many of + * those have been fixed or avoided, but just in case + * any are still left it doesn't hurt to keep the extra + * check and avoid further damage. + */ + GF_LOG_OCCASIONALLY(gd_stale_rpc_disconnect_log, this->name, + GF_LOG_WARNING, + "got disconnect from stale rpc on " + "%s", + brickinfo->path); + break; + } + if (glusterd_is_brick_started(brickinfo)) { + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_BRICK_DISCONNECTED, + "Brick %s:%s has disconnected from glusterd.", + brickinfo->hostname, brickinfo->path); - if (volumes) - dict_unref (volumes); + ret = get_volinfo_from_brickid(brickid, &volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, + "Failed to get volinfo from " + "brickid(%s)", + brickid); + goto out; + } + gf_event(EVENT_BRICK_DISCONNECTED, "peer=%s;volume=%s;brick=%s", + brickinfo->hostname, volinfo->volname, + brickinfo->path); + /* In case of an abrupt shutdown of a brick PMAP_SIGNOUT + * event is not received by glusterd which can lead to a + * stale port entry in glusterd, so forcibly clean up + * the same if the process is not running sometime + * gf_is_service_running true so to ensure about brick instance + * call search_brick_path_from_proc + */ + GLUSTERD_GET_BRICK_PIDFILE(pidfile, volinfo, brickinfo, conf); + is_service_running = gf_is_service_running(pidfile, &pid); + if (pid > 0) + brickpath = search_brick_path_from_proc(pid, + brickinfo->path); + if (!is_service_running || !brickpath) { + ret = pmap_registry_remove( + THIS, brickinfo->port, brickinfo->path, + GF_PMAP_PORT_BRICKSERVER, NULL, _gf_true); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, + GD_MSG_PMAP_REGISTRY_REMOVE_FAIL, 0, + "Failed to remove pmap " + "registry for port %d for " + "brick %s", + brickinfo->port, brickinfo->path); + ret = 0; + } + } + } + + if (brickpath) + GF_FREE(brickpath); + + if (is_brick_mx_enabled() && glusterd_is_brick_started(brickinfo)) { + brick_proc = brickinfo->brick_proc; + if (!brick_proc) + break; + cds_list_for_each_entry(brickinfo_tmp, &brick_proc->bricks, + mux_bricks) + { + glusterd_set_brick_status(brickinfo_tmp, GF_BRICK_STOPPED); + brickinfo_tmp->start_triggered = _gf_false; + /* When bricks are stopped, ports also need to + * be cleaned up + */ + pmap_registry_remove( + THIS, brickinfo_tmp->port, brickinfo_tmp->path, + GF_PMAP_PORT_BRICKSERVER, NULL, _gf_true); + } + } else { + glusterd_set_brick_status(brickinfo, GF_BRICK_STOPPED); + brickinfo->start_triggered = _gf_false; + } + break; + + case RPC_CLNT_DESTROY: + GF_FREE(mydata); + mydata = NULL; + break; + default: + gf_msg_trace(this->name, 0, "got some other RPC event %d", event); + break; + } - if (rsp.volumes.volumes_val) - GF_FREE (rsp.volumes.volumes_val); - return ret; +out: + return ret; } int -glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata, - rpc_clnt_event_t event, - void *data) +glusterd_brick_rpc_notify(struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data) { - xlator_t *this = NULL; - glusterd_conf_t *conf = NULL; - int ret = 0; - glusterd_brickinfo_t *brickinfo = NULL; + return glusterd_big_locked_notify(rpc, mydata, event, data, + __glusterd_brick_rpc_notify); +} - brickinfo = mydata; - if (!brickinfo) - return 0; +int +glusterd_friend_remove_notify(glusterd_peerctx_t *peerctx, int32_t op_errno) +{ + int ret = -1; + glusterd_friend_sm_event_t *new_event = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + rpcsvc_request_t *req = NULL; + char *errstr = NULL; + dict_t *dict = NULL; + + GF_ASSERT(peerctx); + + RCU_READ_LOCK; + peerinfo = glusterd_peerinfo_find_by_generation(peerctx->peerinfo_gen); + if (!peerinfo) { + gf_msg_debug(THIS->name, 0, + "Could not find peer %s(%s). " + "Peer could have been deleted.", + peerctx->peername, uuid_utoa(peerctx->peerid)); + ret = 0; + goto out; + } - this = THIS; - GF_ASSERT (this); - conf = this->private; - GF_ASSERT (conf); + req = peerctx->args.req; + dict = peerctx->args.dict; + errstr = peerctx->errstr; - switch (event) { - case RPC_CLNT_CONNECT: - gf_log (this->name, GF_LOG_DEBUG, "got RPC_CLNT_CONNECT"); - glusterd_set_brick_status (brickinfo, GF_BRICK_STARTED); - ret = default_notify (this, GF_EVENT_CHILD_UP, NULL); + ret = glusterd_friend_sm_new_event(GD_FRIEND_EVENT_REMOVE_FRIEND, + &new_event); + if (!ret) { + if (!req) { + gf_msg(THIS->name, GF_LOG_WARNING, 0, GD_MSG_EVENT_NEW_GET_FAIL, + "Unable to find the request for responding " + "to User (%s)", + peerinfo->hostname); + goto out; + } - break; + glusterd_xfer_cli_probe_resp(req, -1, op_errno, errstr, + peerinfo->hostname, peerinfo->port, dict); - case RPC_CLNT_DISCONNECT: - gf_log (this->name, GF_LOG_DEBUG, "got RPC_CLNT_DISCONNECT"); - glusterd_set_brick_status (brickinfo, GF_BRICK_STOPPED); - if (brickinfo->timer && brickinfo->timer->callbk) - brickinfo->timer->callbk (brickinfo->timer->data); - break; + new_event->peername = gf_strdup(peerinfo->hostname); + gf_uuid_copy(new_event->peerid, peerinfo->uuid); + ret = glusterd_friend_sm_inject_event(new_event); - default: - gf_log (this->name, GF_LOG_TRACE, - "got some other RPC event %d", event); - break; - } + } else { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_EVENT_INJECT_FAIL, + "Unable to create event for removing peer %s", + peerinfo->hostname); + } - return ret; +out: + RCU_READ_UNLOCK; + return ret; } int -glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata, - rpc_clnt_event_t event, - void *data) +__glusterd_peer_rpc_notify(struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data) { - xlator_t *this = NULL; - glusterd_conf_t *conf = NULL; - int ret = 0; - glusterd_peerinfo_t *peerinfo = NULL; - glusterd_peerctx_t *peerctx = NULL; - - peerctx = mydata; - if (!peerctx) - return 0; - - peerinfo = peerctx->peerinfo; - this = THIS; - conf = this->private; - - switch (event) { - case RPC_CLNT_CONNECT: - { - gf_log (this->name, GF_LOG_DEBUG, "got RPC_CLNT_CONNECT"); - peerinfo->connected = 1; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + int ret = 0; + int32_t op_errno = ENOTCONN; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_peerctx_t *peerctx = NULL; + gf_boolean_t quorum_action = _gf_false; + glusterd_volinfo_t *volinfo = NULL; + glusterfs_ctx_t *ctx = NULL; + + uuid_t uuid; + + peerctx = mydata; + if (!peerctx) + return 0; - ret = glusterd_peer_handshake (this, rpc, peerctx); - if (ret) - gf_log ("", GF_LOG_ERROR, "glusterd handshake failed"); - break; + this = THIS; + conf = this->private; + + switch (event) { + case RPC_CLNT_DESTROY: + GF_FREE(peerctx->errstr); + GF_FREE(peerctx->peername); + GF_FREE(peerctx); + return 0; + case RPC_CLNT_PING: + return 0; + default: + break; + } + ctx = this->ctx; + GF_VALIDATE_OR_GOTO(this->name, ctx, out); + if (ctx->cleanup_started) { + gf_log(this->name, GF_LOG_INFO, + "glusterd already received a SIGTERM, " + "dropping the event %d for peer %s", + event, peerctx->peername); + return 0; + } + RCU_READ_LOCK; + + peerinfo = glusterd_peerinfo_find_by_generation(peerctx->peerinfo_gen); + if (!peerinfo) { + /* Peerinfo should be available at this point if its a connect + * event. Not finding it means that something terrible has + * happened. For non-connect event we might end up having a null + * peerinfo, so log at debug level. + */ + gf_msg(THIS->name, + (RPC_CLNT_CONNECT == event) ? GF_LOG_CRITICAL : GF_LOG_DEBUG, + ENOENT, GD_MSG_PEER_NOT_FOUND, + "Could not find peer " + "%s(%s)", + peerctx->peername, uuid_utoa(peerctx->peerid)); + + if (RPC_CLNT_CONNECT == event) { + gf_event(EVENT_PEER_NOT_FOUND, "peer=%s;uuid=%s", peerctx->peername, + uuid_utoa(peerctx->peerid)); } + ret = -1; + goto out; + } + + switch (event) { + case RPC_CLNT_CONNECT: { + gf_msg_debug(this->name, 0, "got RPC_CLNT_CONNECT"); + peerinfo->connected = 1; + peerinfo->quorum_action = _gf_true; + peerinfo->generation = uatomic_add_return(&conf->generation, 1); + peerctx->peerinfo_gen = peerinfo->generation; + /* EVENT_PEER_CONNECT will only be sent if peerctx->uuid is not + * NULL, otherwise it indicates this RPC_CLNT_CONNECT is from a + * peer probe trigger and given we already generate an event for + * peer probe this would be unnecessary. + */ + if (!gf_uuid_is_null(peerinfo->uuid)) { + gf_event(EVENT_PEER_CONNECT, "host=%s;uuid=%s", + peerinfo->hostname, uuid_utoa(peerinfo->uuid)); + } + ret = glusterd_peer_dump_version(this, rpc, peerctx); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_HANDSHAKE_FAILED, + "glusterd handshake failed"); + break; + } + + case RPC_CLNT_DISCONNECT: { + /* If DISCONNECT event is already processed, skip the further + * ones + */ + if (is_rpc_clnt_disconnected(&rpc->conn)) + break; - case RPC_CLNT_DISCONNECT: + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_PEER_DISCONNECTED, + "Peer <%s> (<%s>), in state <%s>, has disconnected " + "from glusterd.", + peerinfo->hostname, uuid_utoa(peerinfo->uuid), + glusterd_friend_sm_state_name_get(peerinfo->state.state)); + gf_event(EVENT_PEER_DISCONNECT, "peer=%s;uuid=%s;state=%s", + peerinfo->hostname, uuid_utoa(peerinfo->uuid), + glusterd_friend_sm_state_name_get(peerinfo->state.state)); + + if (peerinfo->connected) { + if (conf->op_version < GD_OP_VERSION_3_6_0) { + glusterd_get_lock_owner(&uuid); + if (!gf_uuid_is_null(uuid) && + !gf_uuid_compare(peerinfo->uuid, uuid)) + glusterd_unlock(peerinfo->uuid); + } else { + cds_list_for_each_entry(volinfo, &conf->volumes, vol_list) + { + ret = glusterd_mgmt_v3_unlock(volinfo->volname, + peerinfo->uuid, "vol"); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, + GD_MSG_MGMTV3_UNLOCK_FAIL, + "Lock not released " + "for %s", + volinfo->volname); + } + } - //Inject friend disconnected here + op_errno = GF_PROBE_ANOTHER_CLUSTER; + ret = 0; + } - gf_log (this->name, GF_LOG_DEBUG, "got RPC_CLNT_DISCONNECT"); - peerinfo->connected = 0; + if ((peerinfo->quorum_contrib != QUORUM_DOWN) && + (peerinfo->state.state == GD_FRIEND_STATE_BEFRIENDED)) { + peerinfo->quorum_contrib = QUORUM_DOWN; + quorum_action = _gf_true; + peerinfo->quorum_action = _gf_false; + } - //default_notify (this, GF_EVENT_CHILD_DOWN, NULL); - break; + /* Remove peer if it is not a friend and connection/handshake + * fails, and notify cli. Happens only during probe. + */ + if (peerinfo->state.state == GD_FRIEND_STATE_DEFAULT) { + glusterd_friend_remove_notify(peerctx, op_errno); + goto out; + } - default: - gf_log (this->name, GF_LOG_TRACE, - "got some other RPC event %d", event); - ret = 0; - break; + peerinfo->connected = 0; + break; } - return ret; + default: + gf_msg_trace(this->name, 0, "got some other RPC event %d", event); + ret = 0; + break; + } + +out: + RCU_READ_UNLOCK; + + glusterd_friend_sm(); + glusterd_op_sm(); + if (quorum_action) + glusterd_do_quorum_action(); + return ret; } int -glusterd_null (rpcsvc_request_t *req) +glusterd_peer_rpc_notify(struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data) { + return glusterd_big_locked_notify(rpc, mydata, event, data, + __glusterd_peer_rpc_notify); +} - return 0; +int +glusterd_null(rpcsvc_request_t *req) +{ + return 0; } -rpcsvc_actor_t gd_svc_mgmt_actors[] = { - [GLUSTERD_MGMT_NULL] = { "NULL", GLUSTERD_MGMT_NULL, glusterd_null, NULL, NULL}, - [GLUSTERD_MGMT_PROBE_QUERY] = { "PROBE_QUERY", GLUSTERD_MGMT_PROBE_QUERY, glusterd_handle_probe_query, NULL, NULL}, - [GLUSTERD_MGMT_FRIEND_ADD] = { "FRIEND_ADD", GLUSTERD_MGMT_FRIEND_ADD, glusterd_handle_incoming_friend_req, NULL, NULL}, - [GLUSTERD_MGMT_FRIEND_REMOVE] = { "FRIEND_REMOVE", GLUSTERD_MGMT_FRIEND_REMOVE, glusterd_handle_incoming_unfriend_req, NULL, NULL}, - [GLUSTERD_MGMT_FRIEND_UPDATE] = { "FRIEND_UPDATE", GLUSTERD_MGMT_FRIEND_UPDATE, glusterd_handle_friend_update, NULL, NULL}, - [GLUSTERD_MGMT_CLUSTER_LOCK] = { "CLUSTER_LOCK", GLUSTERD_MGMT_CLUSTER_LOCK, glusterd_handle_cluster_lock, NULL, NULL}, - [GLUSTERD_MGMT_CLUSTER_UNLOCK] = { "CLUSTER_UNLOCK", GLUSTERD_MGMT_CLUSTER_UNLOCK, glusterd_handle_cluster_unlock, NULL, NULL}, - [GLUSTERD_MGMT_STAGE_OP] = { "STAGE_OP", GLUSTERD_MGMT_STAGE_OP, glusterd_handle_stage_op, NULL, NULL}, - [GLUSTERD_MGMT_COMMIT_OP] = { "COMMIT_OP", GLUSTERD_MGMT_COMMIT_OP, glusterd_handle_commit_op, NULL, NULL}, +static rpcsvc_actor_t gd_svc_mgmt_actors[GLUSTERD_MGMT_MAXVALUE] = { + [GLUSTERD_MGMT_NULL] = {"NULL", glusterd_null, NULL, GLUSTERD_MGMT_NULL, + DRC_NA, 0}, + [GLUSTERD_MGMT_CLUSTER_LOCK] = {"CLUSTER_LOCK", + glusterd_handle_cluster_lock, NULL, + GLUSTERD_MGMT_CLUSTER_LOCK, DRC_NA, 0}, + [GLUSTERD_MGMT_CLUSTER_UNLOCK] = {"CLUSTER_UNLOCK", + glusterd_handle_cluster_unlock, NULL, + GLUSTERD_MGMT_CLUSTER_UNLOCK, DRC_NA, 0}, + [GLUSTERD_MGMT_STAGE_OP] = {"STAGE_OP", glusterd_handle_stage_op, NULL, + GLUSTERD_MGMT_STAGE_OP, DRC_NA, 0}, + [GLUSTERD_MGMT_COMMIT_OP] = + { + "COMMIT_OP", + glusterd_handle_commit_op, + NULL, + GLUSTERD_MGMT_COMMIT_OP, + DRC_NA, + 0, + }, }; struct rpcsvc_program gd_svc_mgmt_prog = { - .progname = "GlusterD svc mgmt", - .prognum = GD_MGMT_PROGRAM, - .progver = GD_MGMT_VERSION, - .numactors = GD_MGMT_PROCCNT, - .actors = gd_svc_mgmt_actors, + .progname = "GlusterD svc mgmt", + .prognum = GD_MGMT_PROGRAM, + .progver = GD_MGMT_VERSION, + .numactors = GLUSTERD_MGMT_MAXVALUE, + .actors = gd_svc_mgmt_actors, + .synctask = _gf_true, }; -rpcsvc_actor_t gd_svc_cli_actors[] = { - [GLUSTER_CLI_PROBE] = { "CLI_PROBE", GLUSTER_CLI_PROBE, glusterd_handle_cli_probe, NULL, NULL}, - [GLUSTER_CLI_CREATE_VOLUME] = { "CLI_CREATE_VOLUME", GLUSTER_CLI_CREATE_VOLUME, glusterd_handle_create_volume, NULL,NULL}, - [GLUSTER_CLI_DEFRAG_VOLUME] = { "CLI_DEFRAG_VOLUME", GLUSTER_CLI_DEFRAG_VOLUME, glusterd_handle_defrag_volume_v2, NULL,NULL}, - [GLUSTER_CLI_DEPROBE] = { "FRIEND_REMOVE", GLUSTER_CLI_DEPROBE, glusterd_handle_cli_deprobe, NULL, NULL}, - [GLUSTER_CLI_LIST_FRIENDS] = { "LIST_FRIENDS", GLUSTER_CLI_LIST_FRIENDS, glusterd_handle_cli_list_friends, NULL, NULL}, - [GLUSTER_CLI_START_VOLUME] = { "START_VOLUME", GLUSTER_CLI_START_VOLUME, glusterd_handle_cli_start_volume, NULL, NULL}, - [GLUSTER_CLI_STOP_VOLUME] = { "STOP_VOLUME", GLUSTER_CLI_STOP_VOLUME, glusterd_handle_cli_stop_volume, NULL, NULL}, - [GLUSTER_CLI_DELETE_VOLUME] = { "DELETE_VOLUME", GLUSTER_CLI_DELETE_VOLUME, glusterd_handle_cli_delete_volume, NULL, NULL}, - [GLUSTER_CLI_GET_VOLUME] = { "GET_VOLUME", GLUSTER_CLI_GET_VOLUME, glusterd_handle_cli_get_volume, NULL, NULL}, - [GLUSTER_CLI_ADD_BRICK] = { "ADD_BRICK", GLUSTER_CLI_ADD_BRICK, glusterd_handle_add_brick, NULL, NULL}, - [GLUSTER_CLI_REPLACE_BRICK] = { "REPLACE_BRICK", GLUSTER_CLI_REPLACE_BRICK, glusterd_handle_replace_brick, NULL, NULL}, - [GLUSTER_CLI_REMOVE_BRICK] = { "REMOVE_BRICK", GLUSTER_CLI_REMOVE_BRICK, glusterd_handle_remove_brick, NULL, NULL}, - [GLUSTER_CLI_LOG_FILENAME] = { "LOG FILENAME", GLUSTER_CLI_LOG_FILENAME, glusterd_handle_log_filename, NULL, NULL}, - [GLUSTER_CLI_LOG_LOCATE] = { "LOG LOCATE", GLUSTER_CLI_LOG_LOCATE, glusterd_handle_log_locate, NULL, NULL}, - [GLUSTER_CLI_LOG_ROTATE] = { "LOG FILENAME", GLUSTER_CLI_LOG_ROTATE, glusterd_handle_log_rotate, NULL, NULL}, - [GLUSTER_CLI_SET_VOLUME] = { "SET_VOLUME", GLUSTER_CLI_SET_VOLUME, glusterd_handle_set_volume, NULL, NULL}, - [GLUSTER_CLI_SYNC_VOLUME] = { "SYNC_VOLUME", GLUSTER_CLI_SYNC_VOLUME, glusterd_handle_sync_volume, NULL, NULL}, - [GLUSTER_CLI_RESET_VOLUME] = { "RESET_VOLUME", GLUSTER_CLI_RESET_VOLUME, glusterd_handle_reset_volume, NULL, NULL}, - [GLUSTER_CLI_FSM_LOG] = { "FSM_LOG", GLUSTER_CLI_FSM_LOG, glusterd_handle_fsm_log, NULL, NULL}, - [GLUSTER_CLI_GSYNC_SET] = { "GSYNC_SET", GLUSTER_CLI_GSYNC_SET, glusterd_handle_gsync_set, NULL, NULL}, - [GLUSTER_CLI_PROFILE_VOLUME] = { "STATS_VOLUME", GLUSTER_CLI_PROFILE_VOLUME, glusterd_handle_cli_profile_volume, NULL, NULL}, - [GLUSTER_CLI_QUOTA] = { "QUOTA", GLUSTER_CLI_QUOTA, glusterd_handle_quota, NULL, NULL}, +static rpcsvc_actor_t gd_svc_peer_actors[GLUSTERD_FRIEND_MAXVALUE] = { + [GLUSTERD_FRIEND_NULL] = {"NULL", glusterd_null, NULL, GLUSTERD_MGMT_NULL, + DRC_NA, 0}, + [GLUSTERD_PROBE_QUERY] = {"PROBE_QUERY", glusterd_handle_probe_query, NULL, + GLUSTERD_PROBE_QUERY, DRC_NA, 0}, + [GLUSTERD_FRIEND_ADD] = {"FRIEND_ADD", glusterd_handle_incoming_friend_req, + NULL, GLUSTERD_FRIEND_ADD, DRC_NA, 0}, + [GLUSTERD_FRIEND_REMOVE] = {"FRIEND_REMOVE", + glusterd_handle_incoming_unfriend_req, NULL, + GLUSTERD_FRIEND_REMOVE, DRC_NA, 0}, + [GLUSTERD_FRIEND_UPDATE] = {"FRIEND_UPDATE", glusterd_handle_friend_update, + NULL, GLUSTERD_FRIEND_UPDATE, DRC_NA, 0}, +}; + +struct rpcsvc_program gd_svc_peer_prog = { + .progname = "GlusterD svc peer", + .prognum = GD_FRIEND_PROGRAM, + .progver = GD_FRIEND_VERSION, + .numactors = GLUSTERD_FRIEND_MAXVALUE, + .actors = gd_svc_peer_actors, + .synctask = _gf_false, +}; +static rpcsvc_actor_t gd_svc_cli_actors[GLUSTER_CLI_MAXVALUE] = { + [GLUSTER_CLI_PROBE] = {"CLI_PROBE", glusterd_handle_cli_probe, NULL, + GLUSTER_CLI_PROBE, DRC_NA, 0}, + [GLUSTER_CLI_CREATE_VOLUME] = {"CLI_CREATE_VOLUME", + glusterd_handle_create_volume, NULL, + GLUSTER_CLI_CREATE_VOLUME, DRC_NA, 0}, + [GLUSTER_CLI_DEFRAG_VOLUME] = {"CLI_DEFRAG_VOLUME", + glusterd_handle_defrag_volume, NULL, + GLUSTER_CLI_DEFRAG_VOLUME, DRC_NA, 0}, + [GLUSTER_CLI_DEPROBE] = {"FRIEND_REMOVE", glusterd_handle_cli_deprobe, NULL, + GLUSTER_CLI_DEPROBE, DRC_NA, 0}, + [GLUSTER_CLI_LIST_FRIENDS] = {"LIST_FRIENDS", + glusterd_handle_cli_list_friends, NULL, + GLUSTER_CLI_LIST_FRIENDS, DRC_NA, 0}, + [GLUSTER_CLI_UUID_RESET] = {"UUID_RESET", glusterd_handle_cli_uuid_reset, + NULL, GLUSTER_CLI_UUID_RESET, DRC_NA, 0}, + [GLUSTER_CLI_UUID_GET] = {"UUID_GET", glusterd_handle_cli_uuid_get, NULL, + GLUSTER_CLI_UUID_GET, DRC_NA, 0}, + [GLUSTER_CLI_START_VOLUME] = {"START_VOLUME", + glusterd_handle_cli_start_volume, NULL, + GLUSTER_CLI_START_VOLUME, DRC_NA, 0}, + [GLUSTER_CLI_STOP_VOLUME] = {"STOP_VOLUME", glusterd_handle_cli_stop_volume, + NULL, GLUSTER_CLI_STOP_VOLUME, DRC_NA, 0}, + [GLUSTER_CLI_DELETE_VOLUME] = {"DELETE_VOLUME", + glusterd_handle_cli_delete_volume, NULL, + GLUSTER_CLI_DELETE_VOLUME, DRC_NA, 0}, + [GLUSTER_CLI_GET_VOLUME] = {"GET_VOLUME", glusterd_handle_cli_get_volume, + NULL, GLUSTER_CLI_GET_VOLUME, DRC_NA, 0}, + [GLUSTER_CLI_ADD_BRICK] = {"ADD_BRICK", glusterd_handle_add_brick, NULL, + GLUSTER_CLI_ADD_BRICK, DRC_NA, 0}, + [GLUSTER_CLI_ATTACH_TIER] = {"ATTACH_TIER", glusterd_handle_attach_tier, + NULL, GLUSTER_CLI_ATTACH_TIER, DRC_NA, 0}, + [GLUSTER_CLI_REPLACE_BRICK] = {"REPLACE_BRICK", + glusterd_handle_replace_brick, NULL, + GLUSTER_CLI_REPLACE_BRICK, DRC_NA, 0}, + [GLUSTER_CLI_REMOVE_BRICK] = {"REMOVE_BRICK", glusterd_handle_remove_brick, + NULL, GLUSTER_CLI_REMOVE_BRICK, DRC_NA, 0}, + [GLUSTER_CLI_LOG_ROTATE] = {"LOG FILENAME", glusterd_handle_log_rotate, + NULL, GLUSTER_CLI_LOG_ROTATE, DRC_NA, 0}, + [GLUSTER_CLI_SET_VOLUME] = {"SET_VOLUME", glusterd_handle_set_volume, NULL, + GLUSTER_CLI_SET_VOLUME, DRC_NA, 0}, + [GLUSTER_CLI_SYNC_VOLUME] = {"SYNC_VOLUME", glusterd_handle_sync_volume, + NULL, GLUSTER_CLI_SYNC_VOLUME, DRC_NA, 0}, + [GLUSTER_CLI_RESET_VOLUME] = {"RESET_VOLUME", glusterd_handle_reset_volume, + NULL, GLUSTER_CLI_RESET_VOLUME, DRC_NA, 0}, + [GLUSTER_CLI_FSM_LOG] = {"FSM_LOG", glusterd_handle_fsm_log, NULL, + GLUSTER_CLI_FSM_LOG, DRC_NA, 0}, + [GLUSTER_CLI_GSYNC_SET] = {"GSYNC_SET", glusterd_handle_gsync_set, NULL, + GLUSTER_CLI_GSYNC_SET, DRC_NA, 0}, + [GLUSTER_CLI_PROFILE_VOLUME] = {"STATS_VOLUME", + glusterd_handle_cli_profile_volume, NULL, + GLUSTER_CLI_PROFILE_VOLUME, DRC_NA, 0}, + [GLUSTER_CLI_QUOTA] = {"QUOTA", glusterd_handle_quota, NULL, + GLUSTER_CLI_QUOTA, DRC_NA, 0}, + [GLUSTER_CLI_GETWD] = {"GETWD", glusterd_handle_getwd, NULL, + GLUSTER_CLI_GETWD, DRC_NA, 1}, + [GLUSTER_CLI_STATUS_VOLUME] = {"STATUS_VOLUME", + glusterd_handle_status_volume, NULL, + GLUSTER_CLI_STATUS_VOLUME, DRC_NA, 0}, + [GLUSTER_CLI_MOUNT] = {"MOUNT", glusterd_handle_mount, NULL, + GLUSTER_CLI_MOUNT, DRC_NA, 1}, + [GLUSTER_CLI_UMOUNT] = {"UMOUNT", glusterd_handle_umount, NULL, + GLUSTER_CLI_UMOUNT, DRC_NA, 1}, + [GLUSTER_CLI_HEAL_VOLUME] = {"HEAL_VOLUME", glusterd_handle_cli_heal_volume, + NULL, GLUSTER_CLI_HEAL_VOLUME, DRC_NA, 0}, + [GLUSTER_CLI_STATEDUMP_VOLUME] = {"STATEDUMP_VOLUME", + glusterd_handle_cli_statedump_volume, + NULL, GLUSTER_CLI_STATEDUMP_VOLUME, + DRC_NA, 0}, + [GLUSTER_CLI_LIST_VOLUME] = {"LIST_VOLUME", glusterd_handle_cli_list_volume, + NULL, GLUSTER_CLI_LIST_VOLUME, DRC_NA, 0}, + [GLUSTER_CLI_CLRLOCKS_VOLUME] = {"CLEARLOCKS_VOLUME", + glusterd_handle_cli_clearlocks_volume, + NULL, GLUSTER_CLI_CLRLOCKS_VOLUME, DRC_NA, + 0}, + [GLUSTER_CLI_COPY_FILE] = {"COPY_FILE", glusterd_handle_copy_file, NULL, + GLUSTER_CLI_COPY_FILE, DRC_NA, 0}, + [GLUSTER_CLI_SYS_EXEC] = {"SYS_EXEC", glusterd_handle_sys_exec, NULL, + GLUSTER_CLI_SYS_EXEC, DRC_NA, 0}, + [GLUSTER_CLI_SNAP] = {"SNAP", glusterd_handle_snapshot, NULL, + GLUSTER_CLI_SNAP, DRC_NA, 0}, + [GLUSTER_CLI_BARRIER_VOLUME] = {"BARRIER_VOLUME", glusterd_handle_barrier, + NULL, GLUSTER_CLI_BARRIER_VOLUME, DRC_NA, + 0}, + [GLUSTER_CLI_GANESHA] = {"GANESHA", glusterd_handle_ganesha_cmd, NULL, + GLUSTER_CLI_GANESHA, DRC_NA, 0}, + [GLUSTER_CLI_GET_VOL_OPT] = {"GET_VOL_OPT", glusterd_handle_get_vol_opt, + NULL, DRC_NA, 0}, + [GLUSTER_CLI_BITROT] = {"BITROT", glusterd_handle_bitrot, NULL, + GLUSTER_CLI_BITROT, DRC_NA, 0}, + [GLUSTER_CLI_GET_STATE] = {"GET_STATE", glusterd_handle_get_state, NULL, + GLUSTER_CLI_GET_STATE, DRC_NA, 0}, + [GLUSTER_CLI_RESET_BRICK] = {"RESET_BRICK", glusterd_handle_reset_brick, + NULL, GLUSTER_CLI_RESET_BRICK, DRC_NA, 0}, + [GLUSTER_CLI_TIER] = {"TIER", glusterd_handle_tier, NULL, GLUSTER_CLI_TIER, + DRC_NA, 0}, + [GLUSTER_CLI_REMOVE_TIER_BRICK] = {"REMOVE_TIER_BRICK", + glusterd_handle_tier, NULL, + GLUSTER_CLI_REMOVE_TIER_BRICK, DRC_NA, + 0}, + [GLUSTER_CLI_ADD_TIER_BRICK] = {"ADD_TIER_BRICK", + glusterd_handle_add_tier_brick, NULL, + GLUSTER_CLI_ADD_TIER_BRICK, DRC_NA, 0}, }; struct rpcsvc_program gd_svc_cli_prog = { - .progname = "GlusterD svc cli", - .prognum = GLUSTER_CLI_PROGRAM, - .progver = GLUSTER_CLI_VERSION, - .numactors = GLUSTER_CLI_PROCCNT, - .actors = gd_svc_cli_actors, + .progname = "GlusterD svc cli", + .prognum = GLUSTER_CLI_PROGRAM, + .progver = GLUSTER_CLI_VERSION, + .numactors = GLUSTER_CLI_MAXVALUE, + .actors = gd_svc_cli_actors, + .synctask = _gf_true, }; -/* Keeping below programs for backword compatibility */ - -rpcsvc_actor_t glusterd1_mgmt_actors[] = { - [GD_MGMT_NULL] = { "NULL", GD_MGMT_NULL, glusterd_null, NULL, NULL}, - [GD_MGMT_PROBE_QUERY] = { "PROBE_QUERY", GD_MGMT_PROBE_QUERY, glusterd_handle_probe_query, NULL, NULL}, - [GD_MGMT_FRIEND_ADD] = { "FRIEND_ADD", GD_MGMT_FRIEND_ADD, glusterd_handle_incoming_friend_req, NULL, NULL}, - [GD_MGMT_FRIEND_REMOVE] = { "FRIEND_REMOVE", GD_MGMT_FRIEND_REMOVE, glusterd_handle_incoming_unfriend_req, NULL, NULL}, - [GD_MGMT_FRIEND_UPDATE] = { "FRIEND_UPDATE", GD_MGMT_FRIEND_UPDATE, glusterd_handle_friend_update, NULL, NULL}, - [GD_MGMT_CLUSTER_LOCK] = { "CLUSTER_LOCK", GD_MGMT_CLUSTER_LOCK, glusterd_handle_cluster_lock, NULL, NULL}, - [GD_MGMT_CLUSTER_UNLOCK] = { "CLUSTER_UNLOCK", GD_MGMT_CLUSTER_UNLOCK, glusterd_handle_cluster_unlock, NULL, NULL}, - [GD_MGMT_STAGE_OP] = { "STAGE_OP", GD_MGMT_STAGE_OP, glusterd_handle_stage_op, NULL, NULL}, - [GD_MGMT_COMMIT_OP] = { "COMMIT_OP", GD_MGMT_COMMIT_OP, glusterd_handle_commit_op, NULL, NULL}, - [GD_MGMT_CLI_PROBE] = { "CLI_PROBE", GD_MGMT_CLI_PROBE, glusterd_handle_cli_probe, NULL, NULL}, - [GD_MGMT_CLI_CREATE_VOLUME] = { "CLI_CREATE_VOLUME", GD_MGMT_CLI_CREATE_VOLUME, glusterd_handle_create_volume, NULL,NULL}, - [GD_MGMT_CLI_DEFRAG_VOLUME] = { "CLI_DEFRAG_VOLUME", GD_MGMT_CLI_DEFRAG_VOLUME, glusterd_handle_defrag_volume, NULL,NULL}, - [GD_MGMT_CLI_DEPROBE] = { "FRIEND_REMOVE", GD_MGMT_CLI_DEPROBE, glusterd_handle_cli_deprobe, NULL, NULL}, - [GD_MGMT_CLI_LIST_FRIENDS] = { "LIST_FRIENDS", GD_MGMT_CLI_LIST_FRIENDS, glusterd_handle_cli_list_friends, NULL, NULL}, - [GD_MGMT_CLI_START_VOLUME] = { "START_VOLUME", GD_MGMT_CLI_START_VOLUME, glusterd_handle_cli_start_volume, NULL, NULL}, - [GD_MGMT_CLI_STOP_VOLUME] = { "STOP_VOLUME", GD_MGMT_CLI_STOP_VOLUME, glusterd_handle_cli_stop_volume, NULL, NULL}, - [GD_MGMT_CLI_DELETE_VOLUME] = { "DELETE_VOLUME", GD_MGMT_CLI_DELETE_VOLUME, glusterd_handle_cli_delete_volume, NULL, NULL}, - [GD_MGMT_CLI_GET_VOLUME] = { "GET_VOLUME", GD_MGMT_CLI_GET_VOLUME, glusterd_handle_cli_get_volume, NULL, NULL}, - [GD_MGMT_CLI_ADD_BRICK] = { "ADD_BRICK", GD_MGMT_CLI_ADD_BRICK, glusterd_handle_add_brick, NULL, NULL}, - [GD_MGMT_CLI_REPLACE_BRICK] = { "REPLACE_BRICK", GD_MGMT_CLI_REPLACE_BRICK, glusterd_handle_replace_brick, NULL, NULL}, - [GD_MGMT_CLI_REMOVE_BRICK] = { "REMOVE_BRICK", GD_MGMT_CLI_REMOVE_BRICK, glusterd_handle_remove_brick, NULL, NULL}, - [GD_MGMT_CLI_LOG_FILENAME] = { "LOG FILENAME", GD_MGMT_CLI_LOG_FILENAME, glusterd_handle_log_filename, NULL, NULL}, - [GD_MGMT_CLI_LOG_LOCATE] = { "LOG LOCATE", GD_MGMT_CLI_LOG_LOCATE, glusterd_handle_log_locate, NULL, NULL}, - [GD_MGMT_CLI_LOG_ROTATE] = { "LOG FILENAME", GD_MGMT_CLI_LOG_ROTATE, glusterd_handle_log_rotate, NULL, NULL}, - [GD_MGMT_CLI_SET_VOLUME] = { "SET_VOLUME", GD_MGMT_CLI_SET_VOLUME, glusterd_handle_set_volume, NULL, NULL}, - [GD_MGMT_CLI_SYNC_VOLUME] = { "SYNC_VOLUME", GD_MGMT_CLI_SYNC_VOLUME, glusterd_handle_sync_volume, NULL, NULL}, - [GD_MGMT_CLI_RESET_VOLUME] = { "RESET_VOLUME", GD_MGMT_CLI_RESET_VOLUME, glusterd_handle_reset_volume, NULL, NULL}, - [GD_MGMT_CLI_FSM_LOG] = { "FSM_LOG", GD_MGMT_CLI_FSM_LOG, glusterd_handle_fsm_log, NULL, NULL}, - [GD_MGMT_CLI_GSYNC_SET] = {"GSYNC_SET", GD_MGMT_CLI_GSYNC_SET, glusterd_handle_gsync_set, NULL, NULL}, - [GD_MGMT_CLI_PROFILE_VOLUME] = { "STATS_VOLUME", GD_MGMT_CLI_PROFILE_VOLUME, glusterd_handle_cli_profile_volume, NULL, NULL} +/** + * This set of RPC progs are deemed to be trusted. Most of the actors support + * read only queries, the only exception being MOUNT/UMOUNT which is required + * by geo-replication to support unprivileged master -> slave sessions. + */ +static rpcsvc_actor_t gd_svc_cli_trusted_actors[GLUSTER_CLI_MAXVALUE] = { + [GLUSTER_CLI_LIST_FRIENDS] = {"LIST_FRIENDS", + glusterd_handle_cli_list_friends, NULL, + GLUSTER_CLI_LIST_FRIENDS, DRC_NA, 0}, + [GLUSTER_CLI_UUID_GET] = {"UUID_GET", glusterd_handle_cli_uuid_get, NULL, + GLUSTER_CLI_UUID_GET, DRC_NA, 0}, + [GLUSTER_CLI_GET_VOLUME] = {"GET_VOLUME", glusterd_handle_cli_get_volume, + NULL, GLUSTER_CLI_GET_VOLUME, DRC_NA, 0}, + [GLUSTER_CLI_GETWD] = {"GETWD", glusterd_handle_getwd, NULL, + GLUSTER_CLI_GETWD, DRC_NA, 1}, + [GLUSTER_CLI_STATUS_VOLUME] = {"STATUS_VOLUME", + glusterd_handle_status_volume, NULL, + GLUSTER_CLI_STATUS_VOLUME, DRC_NA, 0}, + [GLUSTER_CLI_LIST_VOLUME] = {"LIST_VOLUME", glusterd_handle_cli_list_volume, + NULL, GLUSTER_CLI_LIST_VOLUME, DRC_NA, 0}, + [GLUSTER_CLI_MOUNT] = {"MOUNT", glusterd_handle_mount, NULL, + GLUSTER_CLI_MOUNT, DRC_NA, 1}, + [GLUSTER_CLI_UMOUNT] = {"UMOUNT", glusterd_handle_umount, NULL, + GLUSTER_CLI_UMOUNT, DRC_NA, 1}, }; -struct rpcsvc_program glusterd1_mop_prog = { - .progname = "GlusterD0.0.1", - .prognum = GLUSTERD1_MGMT_PROGRAM, - .progver = GLUSTERD1_MGMT_VERSION, - .numactors = GLUSTERD1_MGMT_PROCCNT, - .actors = glusterd1_mgmt_actors, +struct rpcsvc_program gd_svc_cli_trusted_progs = { + .progname = "GlusterD svc cli read-only", + .prognum = GLUSTER_CLI_PROGRAM, + .progver = GLUSTER_CLI_VERSION, + .numactors = GLUSTER_CLI_MAXVALUE, + .actors = gd_svc_cli_trusted_actors, + .synctask = _gf_true, }; + +/* As we cant remove the handlers, I'm moving the tier based + * handlers to this file as we no longer have gluster-tier.c + * and other tier.c files + */ + +int +glusterd_handle_tier(rpcsvc_request_t *req) +{ + return 0; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-handshake.c b/xlators/mgmt/glusterd/src/glusterd-handshake.c index dda8a03bb12..d96e35503dd 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handshake.c +++ b/xlators/mgmt/glusterd/src/glusterd-handshake.c @@ -1,427 +1,2580 @@ /* - Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ - + Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ -#include "xlator.h" -#include "defaults.h" -#include "glusterfs.h" -#include "compat-errno.h" +#include <glusterfs/xlator.h> +#include <glusterfs/defaults.h> +#include <glusterfs/glusterfs.h> +#include <glusterfs/syscall.h> +#include <glusterfs/compat-errno.h> #include "glusterd.h" #include "glusterd-utils.h" #include "glusterd-op-sm.h" - +#include "glusterd-store.h" +#include "glusterd-snapshot-utils.h" +#include "glusterd-svc-mgmt.h" +#include "glusterd-snapd-svc-helper.h" +#include "glusterd-volgen.h" +#include "glusterd-quotad-svc.h" +#include "glusterd-messages.h" #include "glusterfs3.h" #include "protocol-common.h" #include "rpcsvc.h" +#include "rpc-common-xdr.h" +#include "glusterd-gfproxyd-svc-helper.h" +#include "glusterd-shd-svc-helper.h" -extern struct rpc_clnt_program glusterd3_1_mgmt_prog; -extern struct rpc_clnt_program gd_clnt_mgmt_prog; +extern struct rpc_clnt_program gd_peer_prog; +extern struct rpc_clnt_program gd_mgmt_prog; +extern struct rpc_clnt_program gd_mgmt_v3_prog; -typedef ssize_t (*gfs_serialize_t) (struct iovec outmsg, void *data); +#define TRUSTED_PREFIX "trusted-" +#define GD_PEER_ID_KEY "peer-id" -static size_t -build_volfile_path (const char *volname, char *path, - size_t path_len) -{ - struct stat stbuf = {0,}; - int32_t ret = -1; - glusterd_conf_t *priv = NULL; - char *vol = NULL; - char *dup_volname = NULL; - char *free_ptr = NULL; - char *tmp = NULL; - glusterd_volinfo_t *volinfo = NULL; +typedef ssize_t (*gfs_serialize_t)(struct iovec outmsg, void *data); - priv = THIS->private; - - if (volname[0] != '/') { - /* Normal behavior */ - dup_volname = gf_strdup (volname); - } else { - /* Bringing in NFS like behavior for mount command, */ - /* With this, one can mount a volume with below cmd */ - /* bash# mount -t glusterfs server:/volume /mnt/pnt */ - dup_volname = gf_strdup (&volname[1]); +static int +get_snap_volname_and_volinfo(const char *volpath, char **volname, + glusterd_volinfo_t **volinfo) +{ + int ret = -1; + char *save_ptr = NULL; + char *str_token = NULL; + char *snapname = NULL; + char *volname_token = NULL; + char *vol = NULL; + glusterd_snap_t *snap = NULL; + xlator_t *this = NULL; + char *tmp_str_token = NULL; + char *volfile_token = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(volpath); + GF_ASSERT(volinfo); + + str_token = gf_strdup(volpath); + if (NULL == str_token) { + goto out; + } + + tmp_str_token = str_token; + + /* Input volname will have below formats: + * /snaps/<snapname>/<volname>.<hostname> + * or + * /snaps/<snapname>/<parent-volname> + * We need to extract snapname and parent_volname */ + + /*split string by "/" */ + strtok_r(str_token, "/", &save_ptr); + snapname = strtok_r(NULL, "/", &save_ptr); + if (!snapname) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, + "Invalid path: %s", volpath); + goto out; + } + + volname_token = strtok_r(NULL, "/", &save_ptr); + if (!volname_token) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, + "Invalid path: %s", volpath); + goto out; + } + + snap = glusterd_find_snap_by_name(snapname); + if (!snap) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_SNAP_NOT_FOUND, + "Failed to " + "fetch snap %s", + snapname); + goto out; + } + + /* Find if its a parent volume name or snap volume + * name. This function will succeed if volname_token + * is a parent volname + */ + ret = glusterd_volinfo_find(volname_token, volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOLINFO_GET_FAIL, + "failed to get the volinfo for the volume %s", volname_token); + + /* Get the actual volfile name. */ + volfile_token = strtok_r(NULL, "/", &save_ptr); + *volname = gf_strdup(volfile_token); + if (NULL == *volname) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED, + "Volname=%s", volfile_token, NULL); + ret = -1; + goto out; } - free_ptr = dup_volname; - - ret = glusterd_volinfo_find (dup_volname, &volinfo); + /* + * Ideally, this should succeed as volname_token now contains + * the name of the snap volume (i.e. name of the volume that + * represents the snapshot). But, if for some reason, volinfo + * for the snap volume is not found, then try to get from the + * name of the volfile. Name of the volfile is like this. + * <snap volume name>.<hostname>.<brick path>.vol + */ + ret = glusterd_snap_volinfo_find(volname_token, snap, volinfo); if (ret) { - /* Split the volume name */ - vol = strtok_r (dup_volname, ".", &tmp); - if (!vol) - goto out; - ret = glusterd_volinfo_find (vol, &volinfo); - if (ret) - goto out; - } - ret = snprintf (path, path_len, "%s/vols/%s/%s.vol", - priv->workdir, volinfo->volname, volname); - if (ret == -1) + /* Split the volume name */ + vol = strtok_r(volfile_token, ".", &save_ptr); + if (!vol) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, + "Invalid " + "volname (%s)", + volfile_token); goto out; - - ret = stat (path, &stbuf); - if ((ret == -1) && (errno == ENOENT)) { - ret = snprintf (path, path_len, "%s/vols/%s/%s-fuse.vol", - priv->workdir, volinfo->volname, volname); - ret = stat (path, &stbuf); + } + + ret = glusterd_snap_volinfo_find(vol, snap, volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_INFO_FAIL, + "Failed to " + "fetch snap volume from volname (%s)", + vol); + goto out; + } } - if ((ret == -1) && (errno == ENOENT)) { - ret = snprintf (path, path_len, "%s/vols/%s/%s-tcp.vol", - priv->workdir, volinfo->volname, volname); + } else { + /*volname_token is parent volname*/ + ret = glusterd_snap_volinfo_find_from_parent_volname(volname_token, + snap, volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_INFO_FAIL, + "Failed to " + "fetch snap volume from parent " + "volname (%s)", + volname_token); + goto out; } - ret = 1; + /* Since volname_token is a parent volname we should + * get the snap volname here*/ + *volname = gf_strdup((*volinfo)->volname); + if (NULL == *volname) { + ret = -1; + goto out; + } + } + out: - if (free_ptr) - GF_FREE (free_ptr); - return ret; + if (ret && NULL != *volname) { + GF_FREE(*volname); + *volname = NULL; + } + + if (tmp_str_token) + GF_FREE(tmp_str_token); + return ret; } -static int -xdr_to_glusterfs_req (rpcsvc_request_t *req, void *arg, gfs_serialize_t sfunc) +int32_t +glusterd_get_client_per_brick_volfile(glusterd_volinfo_t *volinfo, + char *filename, char *path, int path_len) { - int ret = -1; + char workdir[PATH_MAX] = { + 0, + }; + glusterd_conf_t *priv = NULL; + int32_t ret = -1; - if (!req) - return -1; + GF_VALIDATE_OR_GOTO("glusterd", THIS, out); + priv = THIS->private; + GF_VALIDATE_OR_GOTO(THIS->name, priv, out); - ret = sfunc (req->msg[0], arg); + GLUSTERD_GET_VOLUME_DIR(workdir, volinfo, priv); - if (ret > 0) - ret = 0; + snprintf(path, path_len, "%s/%s", workdir, filename); - return ret; + ret = 0; +out: + return ret; } - -int -server_getspec (rpcsvc_request_t *req) +size_t +build_volfile_path(char *volume_id, char *path, size_t path_len, + char *trusted_str, dict_t *dict) { - int32_t ret = -1; - int32_t op_errno = 0; - int32_t spec_fd = -1; - size_t file_len = 0; - char filename[ZR_PATH_MAX] = {0,}; - struct stat stbuf = {0,}; - char *volume = NULL; - int cookie = 0; + struct stat stbuf = { + 0, + }; + int32_t ret = -1; + char *vol = NULL; + char *dup_volname = NULL; + char *save_ptr = NULL; + char *free_ptr = NULL; + char *volname = NULL; + char *volid_ptr = NULL; + char dup_volid[PATH_MAX] = { + 0, + }; + char path_prefix[PATH_MAX] = { + 0, + }; + xlator_t *this = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_conf_t *priv = NULL; + int32_t len = 0; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + GF_ASSERT(volume_id); + GF_ASSERT(path); + + volid_ptr = strstr(volume_id, "snapd/"); + if (volid_ptr) { + volid_ptr = strchr(volid_ptr, '/'); + if (!volid_ptr) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL); + ret = -1; + goto out; + } + volid_ptr++; - gf_getspec_req args = {0,}; - gf_getspec_rsp rsp = {0,}; + ret = glusterd_volinfo_find(volid_ptr, &volinfo); + if (ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, + "Couldn't find volinfo"); + goto out; + } + glusterd_svc_build_snapd_volfile(volinfo, path, path_len); + ret = 0; + goto out; + } + + volid_ptr = strstr(volume_id, "gluster/"); + if (volid_ptr) { + volid_ptr = strchr(volid_ptr, '/'); + if (!volid_ptr) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL); + ret = -1; + goto out; + } + volid_ptr++; + glusterd_svc_build_volfile_path(volid_ptr, priv->workdir, path, + path_len); + ret = 0; + goto out; + } + + volid_ptr = strstr(volume_id, "gfproxy-client/"); + if (volid_ptr) { + volid_ptr = strchr(volid_ptr, '/'); + if (!volid_ptr) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL); + ret = -1; + goto out; + } + volid_ptr++; - if (xdr_to_glusterfs_req (req, &args, xdr_to_getspec_req)) { - //failed to decode msg; - req->rpc_err = GARBAGE_ARGS; - goto fail; + ret = glusterd_volinfo_find(volid_ptr, &volinfo); + if (ret == -1) { + gf_log(this->name, GF_LOG_ERROR, "Couldn't find volinfo"); + goto out; } - volume = args.key; + glusterd_get_gfproxy_client_volfile(volinfo, path, path_len); + + ret = 0; + goto out; + } + + volid_ptr = strstr(volume_id, "gfproxyd/"); + if (volid_ptr) { + volid_ptr = strchr(volid_ptr, '/'); + if (!volid_ptr) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL); + ret = -1; + goto out; + } + volid_ptr++; - ret = build_volfile_path (volume, filename, sizeof (filename)); + ret = glusterd_volinfo_find(volid_ptr, &volinfo); + if (ret == -1) { + gf_log(this->name, GF_LOG_ERROR, "Couldn't find volinfo"); + goto out; + } - if (ret > 0) { - /* to allocate the proper buffer to hold the file data */ - ret = stat (filename, &stbuf); - if (ret < 0){ - gf_log ("glusterd", GF_LOG_ERROR, - "Unable to stat %s (%s)", - filename, strerror (errno)); - goto fail; - } + glusterd_svc_build_gfproxyd_volfile_path(volinfo, path, path_len); + ret = 0; + goto out; + } + + volid_ptr = strstr(volume_id, "shd/"); + if (volid_ptr) { + volid_ptr = strchr(volid_ptr, '/'); + if (!volid_ptr) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL); + ret = -1; + goto out; + } + volid_ptr++; - spec_fd = open (filename, O_RDONLY); - if (spec_fd < 0) { - gf_log ("glusterd", GF_LOG_ERROR, - "Unable to open %s (%s)", - filename, strerror (errno)); - goto fail; - } - ret = file_len = stbuf.st_size; - } else { - op_errno = ENOENT; + ret = glusterd_volinfo_find(volid_ptr, &volinfo); + if (ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, + "Couldn't find volinfo for volid=%s", volid_ptr); + goto out; } - if (file_len) { - rsp.spec = CALLOC (file_len+1, sizeof (char)); - if (!rsp.spec) { - ret = -1; - op_errno = ENOMEM; - goto fail; - } - ret = read (spec_fd, rsp.spec, file_len); + glusterd_svc_build_shd_volfile_path(volinfo, path, path_len); - close (spec_fd); + ret = glusterd_svc_set_shd_pidfile(volinfo, dict); + if (ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Couldn't set pidfile in dict for volid=%s", volid_ptr); + goto out; } + ret = 0; + goto out; + } - /* convert to XDR */ -fail: - rsp.op_ret = ret; + volid_ptr = strstr(volume_id, "/snaps/"); + if (volid_ptr) { + ret = get_snap_volname_and_volinfo(volid_ptr, &volname, &volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_INFO_FAIL, + "Failed to get snap" + " volinfo from path (%s)", + volume_id); + ret = -1; + goto out; + } - if (op_errno) - rsp.op_errno = gf_errno_to_error (op_errno); - if (cookie) - rsp.op_errno = cookie; + len = snprintf(path_prefix, sizeof(path_prefix), "%s/snaps/%s", + priv->workdir, volinfo->snapshot->snapname); + volid_ptr = volname; + /* this is to ensure that volname recvd from + get_snap_volname_and_volinfo is free'd */ + free_ptr = volname; + if ((len < 0) || (len >= sizeof(path_prefix))) { + ret = -1; + goto out; + } - if (!rsp.spec) - rsp.spec = ""; + goto gotvolinfo; + } + + volid_ptr = strstr(volume_id, "rebalance/"); + if (volid_ptr) { + volid_ptr = strchr(volid_ptr, '/'); + if (!volid_ptr) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL); + ret = -1; + goto out; + } + volid_ptr++; + + ret = glusterd_volinfo_find(volid_ptr, &volinfo); + if (ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, + "Couldn't find volinfo"); + goto out; + } + glusterd_get_rebalance_volfile(volinfo, path, path_len); + ret = 0; + goto out; + } + + volid_ptr = strstr(volume_id, "client_per_brick/"); + if (volid_ptr) { + volid_ptr = strchr(volid_ptr, '/'); + if (!volid_ptr) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL); + ret = -1; + goto out; + } + volid_ptr++; + + dup_volname = gf_strdup(volid_ptr); + if (!dup_volname) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "strdup failed"); + ret = -1; + goto out; + } - glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - (gd_serialize_t)xdr_serialize_getspec_rsp); - if (args.key) - free (args.key);//malloced by xdr - if (rsp.spec && (strcmp (rsp.spec, ""))) - free (rsp.spec); + /* Split the volume name */ + vol = strtok_r(dup_volname, ".", &save_ptr); + if (!vol) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_SPLIT_FAIL, + "Volume name=%s", dup_volname, NULL); + ret = -1; + goto out; + } + ret = glusterd_volinfo_find(vol, &volinfo); + if (ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, + "Couldn't find volinfo"); + goto out; + } + ret = glusterd_get_client_per_brick_volfile(volinfo, volid_ptr, path, + path_len); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_MEMORY, + "failed to get volinfo path"); + goto out; + } - return 0; + ret = sys_access(path, F_OK); + goto out; + } + + if (volume_id[0] == '/') { + /* Normal behavior */ + volid_ptr = volume_id; + volid_ptr++; + + } else { + /* Bringing in NFS like behavior for mount command, */ + /* With this, one can mount a volume with below cmd */ + /* bash# mount -t glusterfs server:/volume /mnt/pnt */ + volid_ptr = volume_id; + } + + len = snprintf(path_prefix, sizeof(path_prefix), "%s/vols", priv->workdir); + if ((len < 0) || (len >= sizeof(path_prefix))) { + ret = -1; + goto out; + } + + ret = glusterd_volinfo_find(volid_ptr, &volinfo); + + if (ret) { + dup_volname = gf_strdup(volid_ptr); + if (!dup_volname) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED, + "Volume name=%s", volid_ptr, NULL); + ret = -1; + goto out; + } + /* Split the volume name */ + vol = strtok_r(dup_volname, ".", &save_ptr); + if (!vol) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_SPLIT_FAIL, + "Volume name=%s", dup_volname, NULL); + ret = -1; + goto out; + } + ret = glusterd_volinfo_find(vol, &volinfo); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_VOLINFO_GET_FAIL, + NULL); + goto out; + } + } + +gotvolinfo: + if (!glusterd_auth_get_username(volinfo)) + trusted_str = NULL; + + ret = snprintf(path, path_len, "%s/%s/%s.vol", path_prefix, + volinfo->volname, volid_ptr); + if (ret == -1) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); + goto out; + } + + ret = sys_stat(path, &stbuf); + + if ((ret == -1) && (errno == ENOENT)) { + if (snprintf(dup_volid, PATH_MAX, "%s", volid_ptr) >= PATH_MAX) + goto out; + if (!strchr(dup_volid, '.')) { + switch (volinfo->transport_type) { + case GF_TRANSPORT_TCP: + strcat(dup_volid, ".tcp"); + break; + case GF_TRANSPORT_RDMA: + strcat(dup_volid, ".rdma"); + break; + case GF_TRANSPORT_BOTH_TCP_RDMA: + strcat(dup_volid, ".tcp"); + break; + default: + break; + } + } + snprintf(path, path_len, "%s/%s/%s%s-fuse.vol", path_prefix, + volinfo->volname, (trusted_str ? trusted_str : ""), dup_volid); + ret = sys_stat(path, &stbuf); + } +out: + if (dup_volname) + GF_FREE(dup_volname); + if (free_ptr) + GF_FREE(free_ptr); + return ret; } +/* Get and store op-versions of the clients sending the getspec request + * Clients of versions <= 3.3, don't send op-versions, their op-versions are + * defaulted to 1. Also fetch brick_name. + */ +int32_t +glusterd_get_args_from_dict(gf_getspec_req *args, peer_info_t *peerinfo, + char **brick_name) +{ + dict_t *dict = NULL; + int client_max_op_version = 1; + int client_min_op_version = 1; + int32_t ret = -1; + xlator_t *this = NULL; + char *name = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(args); + GF_ASSERT(peerinfo); + + if (!args->xdata.xdata_len) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); + ret = 0; + goto out; + } + + dict = dict_new(); + if (!dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); + ret = -1; + goto out; + } + + ret = dict_unserialize(args->xdata.xdata_val, args->xdata.xdata_len, &dict); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + "Failed to unserialize request dictionary"); + goto out; + } + + ret = dict_get_int32(dict, "min-op-version", &client_min_op_version); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get client-min-op-version"); + goto out; + } + + ret = dict_get_int32(dict, "max-op-version", &client_max_op_version); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get client-max-op-version"); + goto out; + } + + ret = dict_get_str(dict, "brick_name", &name); + if (ret) { + gf_msg_debug(this->name, 0, "No brick name present"); + ret = 0; + goto out; + } + *brick_name = gf_strdup(name); + if (*brick_name == NULL) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED, + "Brick_name=%s", name, NULL); + ret = -1; + goto out; + } + + gf_msg_debug(this->name, 0, "brick_name = %s", *brick_name); +out: + peerinfo->max_op_version = client_max_op_version; + peerinfo->min_op_version = client_min_op_version; -rpcsvc_actor_t gluster_handshake_actors[] = { - [GF_HNDSK_NULL] = {"NULL", GF_HNDSK_NULL, NULL, NULL, NULL }, - [GF_HNDSK_GETSPEC] = {"GETSPEC", GF_HNDSK_GETSPEC, server_getspec, NULL, NULL }, -}; + if (dict) + dict_unref(dict); + return ret; +} -struct rpcsvc_program gluster_handshake_prog = { - .progname = "GlusterFS Handshake", - .prognum = GLUSTER_HNDSK_PROGRAM, - .progver = GLUSTER_HNDSK_VERSION, - .actors = gluster_handshake_actors, - .numactors = GF_HNDSK_MAXVALUE, -}; +/* Given the missed_snapinfo and snap_opinfo take the + * missed lvm snapshot + */ +int32_t +glusterd_create_missed_snap(glusterd_missed_snap_info *missed_snapinfo, + glusterd_snap_op_t *snap_opinfo) +{ + char *device = NULL; + glusterd_conf_t *priv = NULL; + glusterd_snap_t *snap = NULL; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + int32_t ret = -1; + int32_t i = 0; + uuid_t snap_uuid = { + 0, + }; + xlator_t *this = NULL; + char *mnt_device = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + GF_ASSERT(missed_snapinfo); + GF_ASSERT(snap_opinfo); + + gf_uuid_parse(missed_snapinfo->snap_uuid, snap_uuid); + + /* Find the snap-object */ + snap = glusterd_find_snap_by_id(snap_uuid); + if (!snap) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_NOT_FOUND, + "Unable to find the snap with snap_uuid %s", + missed_snapinfo->snap_uuid); + ret = -1; + goto out; + } + + /* Find the snap_vol */ + cds_list_for_each_entry(volinfo, &snap->volumes, vol_list) + { + if (!strcmp(volinfo->volname, snap_opinfo->snap_vol_id)) { + snap_vol = volinfo; + break; + } + } + + if (!snap_vol) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, + "Unable to find the snap_vol(%s) " + "for snap(%s)", + snap_opinfo->snap_vol_id, snap->snapname); + ret = -1; + goto out; + } + + /* Find the missed brick in the snap volume */ + cds_list_for_each_entry(brickinfo, &snap_vol->bricks, brick_list) + { + i++; + if (i == snap_opinfo->brick_num) + break; + } + + if (brickinfo->snap_status != -1) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_STATUS_NOT_PENDING, + "The snap status of the missed " + "brick(%s) is not pending", + brickinfo->path); + goto out; + } + + /* Fetch the device path */ + mnt_device = glusterd_get_brick_mount_device(snap_opinfo->brick_path); + if (!mnt_device) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_GET_INFO_FAIL, + "Getting device name for the" + "brick %s:%s failed", + brickinfo->hostname, snap_opinfo->brick_path); + ret = -1; + goto out; + } + + device = glusterd_build_snap_device_path(mnt_device, snap_vol->volname, + snap_opinfo->brick_num - 1); + if (!device) { + gf_msg(this->name, GF_LOG_ERROR, ENXIO, + GD_MSG_SNAP_DEVICE_NAME_GET_FAIL, + "cannot copy the snapshot " + "device name (volname: %s, snapname: %s)", + snap_vol->volname, snap->snapname); + ret = -1; + goto out; + } + if (snprintf(brickinfo->device_path, sizeof(brickinfo->device_path), "%s", + device) >= sizeof(brickinfo->device_path)) { + gf_msg(this->name, GF_LOG_ERROR, ENXIO, + GD_MSG_SNAP_DEVICE_NAME_GET_FAIL, + "cannot copy the device_path " + "(device_path: %s)", + brickinfo->device_path); + ret = -1; + goto out; + } + + /* Update the backend file-system type of snap brick in + * snap volinfo. */ + ret = glusterd_update_mntopts(snap_opinfo->brick_path, brickinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRK_MOUNTOPTS_FAIL, + "Failed to update " + "mount options for %s brick", + brickinfo->path); + /* We should not fail snapshot operation if we fail to get + * the file-system type */ + } + + ret = glusterd_take_lvm_snapshot(brickinfo, snap_opinfo->brick_path); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPSHOT_OP_FAILED, + "Failed to take snapshot of %s", snap_opinfo->brick_path); + goto out; + } + + /* After the snapshot both the origin brick (LVM brick) and + * the snapshot brick will have the same file-system label. This + * will cause lot of problems at mount time. Therefore we must + * generate a new label for the snapshot brick + */ + ret = glusterd_update_fs_label(brickinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_SET_INFO_FAIL, + "Failed to update " + "file-system label for %s brick", + brickinfo->path); + /* Failing to update label should not cause snapshot failure. + * Currently label is updated only for XFS and ext2/ext3/ext4 + * file-system. + */ + } + + /* Create and mount the snap brick */ + ret = glusterd_snap_brick_create(snap_vol, brickinfo, + snap_opinfo->brick_num - 1, 0); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_CREATION_FAIL, + "Failed to " + " create and mount the brick(%s) for the snap %s", + snap_opinfo->brick_path, snap_vol->snapshot->snapname); + goto out; + } + + brickinfo->snap_status = 0; + ret = glusterd_brick_start(snap_vol, brickinfo, _gf_false, _gf_false); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_BRICK_DISCONNECTED, + "starting the " + "brick %s:%s for the snap %s failed", + brickinfo->hostname, brickinfo->path, snap->snapname); + goto out; + } + ret = glusterd_store_volinfo(snap_vol, GLUSTERD_VOLINFO_VER_AC_NONE); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_STORE_FAIL, + "Failed to store snapshot " + "volinfo (%s) for snap %s", + snap_vol->volname, snap->snapname); + goto out; + } -char *glusterd_dump_proc[GF_DUMP_MAXVALUE] = { - [GF_DUMP_NULL] = "NULL", - [GF_DUMP_DUMP] = "DUMP", -}; +out: + if (mnt_device) + GF_FREE(mnt_device); + if (device) + GF_FREE(device); -rpc_clnt_prog_t glusterd_dump_prog = { - .progname = "GLUSTERD-DUMP", - .prognum = GLUSTER_DUMP_PROGRAM, - .progver = GLUSTER_DUMP_VERSION, - .procnames = glusterd_dump_proc, -}; + return ret; +} -static int -glusterd_event_connected_inject (glusterd_peerctx_t *peerctx) +/* Look into missed_snap_list, to see it the given brick_name, + * has any missed snap creates for the local node */ +int32_t +glusterd_take_missing_brick_snapshots(char *brick_name) { - GF_ASSERT (peerctx); + char *my_node_uuid = NULL; + glusterd_conf_t *priv = NULL; + glusterd_missed_snap_info *missed_snapinfo = NULL; + glusterd_snap_op_t *snap_opinfo = NULL; + int32_t ret = -1; + gf_boolean_t update_list = _gf_false; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + GF_ASSERT(brick_name); + + my_node_uuid = uuid_utoa(MY_UUID); + + cds_list_for_each_entry(missed_snapinfo, &priv->missed_snaps_list, + missed_snaps) + { + /* If the missed snap op is not for the local node + * then continue + */ + if (strcmp(my_node_uuid, missed_snapinfo->node_uuid)) + continue; + + cds_list_for_each_entry(snap_opinfo, &missed_snapinfo->snap_ops, + snap_ops_list) + { + /* Check if the missed snap's op is a create for + * the brick name in question + */ + if ((snap_opinfo->op == GF_SNAP_OPTION_TYPE_CREATE) && + (!strcmp(brick_name, snap_opinfo->brick_path))) { + /* Perform a snap create if the + * op is still pending + */ + if (snap_opinfo->status == GD_MISSED_SNAP_PENDING) { + ret = glusterd_create_missed_snap(missed_snapinfo, + snap_opinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_MISSED_SNAP_CREATE_FAIL, + "Failed to create " + "missed snap for %s", + brick_name); + /* At this stage, we will mark + * the entry as done. Because + * of the failure other + * snapshots will not be + * affected, and neither the + * brick. Only the current snap + * brick will always remain as + * pending. + */ + } + snap_opinfo->status = GD_MISSED_SNAP_DONE; + update_list = _gf_true; + } + /* One snap-id won't have more than one missed + * create for the same brick path. Hence + * breaking in search of another missed create + * for the same brick path in the local node + */ + break; + } + } + } - glusterd_friend_sm_event_t *event = NULL; - glusterd_probe_ctx_t *ctx = NULL; - int ret = -1; - glusterd_peerinfo_t *peerinfo = NULL; + if (update_list == _gf_true) { + ret = glusterd_store_update_missed_snaps(); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_MISSED_SNAP_LIST_STORE_FAIL, + "Failed to update missed_snaps_list"); + goto out; + } + } + ret = 0; +out: + return ret; +} - ret = glusterd_friend_sm_new_event - (GD_FRIEND_EVENT_CONNECTED, &event); +/* Checks if the client supports the volume, ie. client can understand all the + * options in the volfile + */ +static gf_boolean_t +_client_supports_volume(peer_info_t *peerinfo, int32_t *op_errno) +{ + gf_boolean_t ret = _gf_true; + glusterd_volinfo_t *volinfo = NULL; + + GF_ASSERT(peerinfo); + GF_ASSERT(op_errno); + + /* Only check when the volfile being requested is a volume. Not finding + * a volinfo implies that the volfile requested for is not of a gluster + * volume. A non volume volfile is requested by the local gluster + * services like shd and nfs-server. These need not be checked as they + * will be running at the same op-version as glusterd and will be able + * to support all the features + */ + if ((glusterd_volinfo_find(peerinfo->volname, &volinfo) == 0) && + ((peerinfo->min_op_version > volinfo->client_op_version) || + (peerinfo->max_op_version < volinfo->client_op_version))) { + ret = _gf_false; + *op_errno = ENOTSUP; + gf_msg("glusterd", GF_LOG_INFO, ENOTSUP, GD_MSG_UNSUPPORTED_VERSION, + "Client %s (%d -> %d) doesn't support required " + "op-version (%d). Rejecting volfile request.", + peerinfo->identifier, peerinfo->min_op_version, + peerinfo->max_op_version, volinfo->client_op_version); + } + + return ret; +} - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get new event"); - goto out; +int +__server_getspec(rpcsvc_request_t *req) +{ + int32_t ret = -1; + int32_t op_ret = -1; + int32_t op_errno = 0; + int32_t spec_fd = -1; + size_t file_len = 0; + char filename[PATH_MAX] = { + 0, + }; + struct stat stbuf = { + 0, + }; + char *brick_name = NULL; + char *volume = NULL; + char *tmp = NULL; + rpc_transport_t *trans = NULL; + gf_getspec_req args = { + 0, + }; + gf_getspec_rsp rsp = { + 0, + }; + char addrstr[RPCSVC_PEER_STRLEN] = {0}; + peer_info_t *peerinfo = NULL; + xlator_t *this = NULL; + dict_t *dict = NULL; + glusterd_peerinfo_t *peer = NULL; + glusterd_conf_t *conf = NULL; + int peer_cnt = 0; + char *peer_hosts = NULL; + char *tmp_str = NULL; + char portstr[10] = { + 0, + }; + int len = 0; + + this = THIS; + GF_ASSERT(this); + + conf = this->private; + ret = xdr_to_generic(req->msg[0], &args, (xdrproc_t)xdr_gf_getspec_req); + if (ret < 0) { + // failed to decode msg; + req->rpc_err = GARBAGE_ARGS; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, + "Failed to decode the message"); + goto fail; + } + + peerinfo = &req->trans->peerinfo; + + volume = args.key; + + if (strlen(volume) >= (NAME_MAX)) { + op_errno = EINVAL; + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_NAME_TOO_LONG, + "volume name too long (%s)", volume); + goto fail; + } + + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_MOUNT_REQ_RCVD, + "Received mount request for volume %s", volume); + + /* Need to strip leading '/' from volnames. This was introduced to + * support nfs style mount parameters for native gluster mount + */ + if (volume[0] == '/') + ret = snprintf(peerinfo->volname, sizeof(peerinfo->volname), "%s", + &volume[1]); + else + ret = snprintf(peerinfo->volname, sizeof(peerinfo->volname), "%s", + volume); + if (ret < 0 || ret >= sizeof(peerinfo->volname)) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, + "peerinfo->volname %s truncated or error occurred: " + "(ret: %d)", + peerinfo->volname, ret); + ret = -1; + goto fail; + } + + ret = glusterd_get_args_from_dict(&args, peerinfo, &brick_name); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get args from dict"); + goto fail; + } + + if (!_client_supports_volume(peerinfo, &op_errno)) { + ret = -1; + goto fail; + } + + dict = dict_new(); + if (!dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); + ret = -ENOMEM; + goto fail; + } + + trans = req->trans; + /* addrstr will be empty for cli socket connections */ + ret = rpcsvc_transport_peername(trans, (char *)&addrstr, sizeof(addrstr)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_RPC_TRANSPORT_GET_PEERNAME_FAIL, + "Failed to get the peername"); + goto fail; + } + + tmp = strrchr(addrstr, ':'); + if (tmp) + *tmp = '\0'; + + /* The trusted volfiles are given to the glusterd owned process like NFS + * server, self-heal daemon etc., so that they are not inadvertently + * blocked by a auth.{allow,reject} setting. The trusted volfile is not + * meant for external users. + * For unix domain socket, address will be empty. + */ + if (strlen(addrstr) == 0 || gf_is_local_addr(addrstr)) { + ret = build_volfile_path(volume, filename, sizeof(filename), + TRUSTED_PREFIX, dict); + } else { + ret = build_volfile_path(volume, filename, sizeof(filename), NULL, + dict); + } + + RCU_READ_LOCK; + cds_list_for_each_entry_rcu(peer, &conf->peers, uuid_list) + { + if (!peer->connected) + continue; + if (!peer_hosts) { + if (peer->port) { + snprintf(portstr, sizeof(portstr), "%d", peer->port); + } else { + snprintf(portstr, sizeof(portstr), "%d", GLUSTERD_DEFAULT_PORT); + } + len = strlen(peer->hostname) + strlen(portstr) + 3; + tmp_str = GF_CALLOC(1, len, gf_gld_mt_char); + snprintf(tmp_str, len, "%s%s%s%s", peer->hostname, ":", portstr, + " "); + peer_hosts = tmp_str; + } else { + len = strlen(peer_hosts) + strlen(peer->hostname) + + strlen(portstr) + 3; + tmp_str = GF_CALLOC(1, len, gf_gld_mt_char); + snprintf(tmp_str, len, "%s%s%s%s%s", peer_hosts, peer->hostname, + ":", portstr, " "); + GF_FREE(peer_hosts); + peer_hosts = tmp_str; + } + peer_cnt++; + } + RCU_READ_UNLOCK; + if (peer_cnt) { + op_ret = dict_set_str(dict, GLUSTERD_BRICK_SERVERS, peer_hosts); + if (op_ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "failed to set peer_host in dict"); + ret = op_ret; + goto fail; + } + } + + if (ret == 0) { + if (dict->count > 0) { + ret = dict_allocate_and_serialize(dict, &rsp.xdata.xdata_val, + &rsp.xdata.xdata_len); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); + goto fail; + } } - ctx = GF_CALLOC (1, sizeof(*ctx), gf_gld_mt_probe_ctx_t); + /* to allocate the proper buffer to hold the file data */ + ret = sys_stat(filename, &stbuf); + if (ret < 0) { + gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "Unable to stat %s (%s)", filename, strerror(errno)); + goto fail; + } - if (!ctx) { - ret = -1; - gf_log ("", GF_LOG_ERROR, "Memory not available"); - goto out; + spec_fd = open(filename, O_RDONLY); + if (spec_fd < 0) { + gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "Unable to open %s (%s)", filename, strerror(errno)); + goto fail; + } + ret = file_len = stbuf.st_size; + } else { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_PEER_NOT_FOUND, NULL); + op_errno = ENOENT; + goto fail; + } + + if (file_len) { + rsp.spec = CALLOC(file_len + 1, sizeof(char)); + if (!rsp.spec) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); + ret = -1; + op_errno = ENOMEM; + goto fail; } + ret = sys_read(spec_fd, rsp.spec, file_len); + } + + if (brick_name) { + gf_msg_debug(this->name, 0, "Look for missing snap creates for %s", + brick_name); + op_ret = glusterd_take_missing_brick_snapshots(brick_name); + if (op_ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MISSED_SNAP_CREATE_FAIL, + "Failed to take missing brick snapshots"); + ret = -1; + goto fail; + } + } + /* convert to XDR */ +fail: + if (spec_fd >= 0) + sys_close(spec_fd); + + GF_FREE(brick_name); + + rsp.op_ret = ret; + if (rsp.op_ret < 0) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MOUNT_REQ_FAIL, + "Failed to mount the volume"); + + if (op_errno) + rsp.op_errno = gf_errno_to_error(op_errno); + + if (!rsp.spec) + rsp.spec = strdup(""); - peerinfo = peerctx->peerinfo; - ctx->hostname = gf_strdup (peerinfo->hostname); - ctx->port = peerinfo->port; - ctx->req = peerctx->args.req; + glusterd_submit_reply(req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gf_getspec_rsp); + free(args.key); // malloced by xdr + free(rsp.spec); - event->peerinfo = peerinfo; - event->ctx = ctx; + if (peer_hosts) + GF_FREE(peer_hosts); + if (dict) + dict_unref(dict); - ret = glusterd_friend_sm_inject_event (event); + if (args.xdata.xdata_val) + free(args.xdata.xdata_val); + if (rsp.xdata.xdata_val) + GF_FREE(rsp.xdata.xdata_val); + + return 0; +} + +int +server_getspec(rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler(req, __server_getspec); +} + +int32_t +__server_event_notify(rpcsvc_request_t *req) +{ + int32_t ret = -1; + gf_event_notify_req args = { + 0, + }; + gf_event_notify_rsp rsp = { + 0, + }; + dict_t *dict = NULL; + gf_boolean_t need_rsp = _gf_true; + + ret = xdr_to_generic(req->msg[0], &args, + (xdrproc_t)xdr_gf_event_notify_req); + if (ret < 0) { + req->rpc_err = GARBAGE_ARGS; + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL); + goto fail; + } + + if (args.dict.dict_len) { + dict = dict_new(); + if (!dict) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, + NULL); + return ret; + } + ret = dict_unserialize(args.dict.dict_val, args.dict.dict_len, &dict); if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, "Unable to inject " - "EVENT_CONNECTED ret = %d", ret); - goto out; + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + "Failed to unserialize req"); + goto fail; } + } + + switch (args.op) { + case GF_EN_DEFRAG_STATUS: + gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_DEFRAG_STATUS_UPDATED, + "received defrag status updated"); + if (dict) { + glusterd_defrag_event_notify_handle(dict); + need_rsp = _gf_false; + } + break; + default: + gf_msg("glusterd", GF_LOG_ERROR, EINVAL, GD_MSG_OP_UNSUPPORTED, + "Unknown op received in event " + "notify"); + gf_event(EVENT_NOTIFY_UNKNOWN_OP, "op=%d", args.op); + ret = -1; + break; + } + +fail: + rsp.op_ret = ret; + + if (need_rsp) + glusterd_submit_reply(req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gf_event_notify_rsp); + if (dict) + dict_unref(dict); + free(args.dict.dict_val); // malloced by xdr + + return 0; +} + +int32_t +server_event_notify(rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler(req, __server_event_notify); +} +int +gd_validate_cluster_op_version(xlator_t *this, int cluster_op_version, + char *peerid) +{ + int ret = -1; + glusterd_conf_t *conf = NULL; + + conf = this->private; + + if (cluster_op_version > GD_OP_VERSION_MAX) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OP_VERSION_MISMATCH, + "operating version %d is more than the maximum " + "supported (%d) on the machine (as per peer request " + "from %s)", + cluster_op_version, GD_OP_VERSION_MAX, peerid); + goto out; + } + + /* The peer can only reduce its op-version when it doesn't have any + * volumes. Reducing op-version when it already contains volumes can + * lead to inconsistencies in the cluster + */ + if ((cluster_op_version < conf->op_version) && + !cds_list_empty(&conf->volumes)) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OP_VERS_ADJUST_FAIL, + "cannot reduce operating version to %d from current " + "version %d as volumes exist (as per peer request from " + "%s)", + cluster_op_version, conf->op_version, peerid); + goto out; + } + + ret = 0; out: - gf_log ("", GF_LOG_DEBUG, "returning %d", ret); - return ret; + return ret; +} + +/* Validate if glusterd can serve the management handshake request + * + * Requests are allowed if, + * - glusterd has no peers & no volumes, or + * - the request came from a known peer + * A known peer is identified using the following steps + * - the dict is checked for a peer uuid, which if present is matched with the + * peer list, else + * - the incoming request address is matched with the peer list + */ +gf_boolean_t +gd_validate_mgmt_hndsk_req(rpcsvc_request_t *req, dict_t *dict) +{ + int ret = -1; + char hostname[UNIX_PATH_MAX + 1] = { + 0, + }; + glusterd_peerinfo_t *peer = NULL; + xlator_t *this = NULL; + char *uuid_str = NULL; + uuid_t peer_uuid = { + 0, + }; + + this = THIS; + GF_ASSERT(this); + + if (!glusterd_have_peers() && !glusterd_have_volumes()) + return _gf_true; + + ret = dict_get_str(dict, GD_PEER_ID_KEY, &uuid_str); + /* Try to match uuid only if available, don't fail as older peers will + * not send a uuid + */ + if (!ret) { + gf_uuid_parse(uuid_str, peer_uuid); + RCU_READ_LOCK; + ret = (glusterd_peerinfo_find(peer_uuid, NULL) != NULL); + RCU_READ_UNLOCK; + if (ret) + return _gf_true; + } + + /* If you cannot get the hostname, you cannot authenticate */ + ret = glusterd_remote_hostname_get(req, hostname, sizeof(hostname)); + if (ret) + return _gf_false; + + /* If peer object is not found it indicates that request is from an + * unknown peer, if its found, validate whether its uuid is also + * available in the peerinfo list. There could be a case where hostname + * is available in the peerinfo list but the uuid has changed of the + * node due to a reinstall, in that case the validation should fail! + */ + RCU_READ_LOCK; + if (!uuid_str) { + ret = (glusterd_peerinfo_find(NULL, hostname) == NULL); + } else { + peer = glusterd_peerinfo_find(NULL, hostname); + if (!peer) { + ret = -1; + } else if (peer && glusterd_peerinfo_find(peer_uuid, NULL) != NULL) { + ret = 0; + } else { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_HANDSHAKE_REQ_REJECTED, + "Request from " + "peer %s has an entry in peerinfo, but uuid " + "does not match", + req->trans->peerinfo.identifier); + ret = -1; + } + } + RCU_READ_UNLOCK; + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_HANDSHAKE_REQ_REJECTED, + "Rejecting management " + "handshake request from unknown peer %s", + req->trans->peerinfo.identifier); + gf_event(EVENT_PEER_REJECT, "peer=%s", req->trans->peerinfo.identifier); + return _gf_false; + } + + return _gf_true; } int -glusterd_set_clnt_mgmt_program (glusterd_peerinfo_t *peerinfo, - gf_prog_detail *prog) +__glusterd_mgmt_hndsk_versions(rpcsvc_request_t *req) { - gf_prog_detail *trav = NULL; - int ret = -1; + dict_t *dict = NULL; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + int ret = -1; + int op_errno = EINVAL; + gf_mgmt_hndsk_req args = { + { + 0, + }, + }; + gf_mgmt_hndsk_rsp rsp = { + 0, + }; + dict_t *args_dict = NULL; + + this = THIS; + conf = this->private; + + ret = xdr_to_generic(req->msg[0], &args, (xdrproc_t)xdr_gf_mgmt_hndsk_req); + if (ret < 0) { + // failed to decode msg; + req->rpc_err = GARBAGE_ARGS; + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL); + goto out; + } + + GF_PROTOCOL_DICT_UNSERIALIZE(this, args_dict, args.hndsk.hndsk_val, + (args.hndsk.hndsk_len), ret, op_errno, out); + + /* Check if we can service the request */ + if (!gd_validate_mgmt_hndsk_req(req, args_dict)) { + ret = -1; + goto out; + } + + dict = dict_new(); + if (!dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); + goto out; + } + + ret = dict_set_int32(dict, GD_OP_VERSION_KEY, conf->op_version); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_SET_FAILED, + "failed to set operating version"); + rsp.op_ret = ret; + goto out; + } + + ret = dict_set_int32(dict, GD_MIN_OP_VERSION_KEY, GD_OP_VERSION_MIN); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_SET_FAILED, + "failed to set %s", GD_MIN_OP_VERSION_KEY); + rsp.op_ret = ret; + goto out; + } + + ret = dict_set_int32(dict, GD_MAX_OP_VERSION_KEY, GD_OP_VERSION_MAX); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_SET_FAILED, + "failed to set %s", GD_MAX_OP_VERSION_KEY); + rsp.op_ret = ret; + goto out; + } + + ret = 0; + + GF_PROTOCOL_DICT_SERIALIZE(this, dict, (&rsp.hndsk.hndsk_val), + rsp.hndsk.hndsk_len, op_errno, out); +out: - if (!peerinfo || !prog) - goto out; + rsp.op_ret = ret; + rsp.op_errno = op_errno; - trav = prog; + glusterd_submit_reply(req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gf_mgmt_hndsk_rsp); - while (trav) { - /* Select 'programs' */ - if ((gd_clnt_mgmt_prog.prognum == trav->prognum) && - (gd_clnt_mgmt_prog.progver == trav->progver)) { - peerinfo->mgmt = &gd_clnt_mgmt_prog; - ret = 0; - /* Break here, as this gets higher priority */ - break; - } - if ((glusterd3_1_mgmt_prog.prognum == trav->prognum) && - (glusterd3_1_mgmt_prog.progver == trav->progver)) { - peerinfo->mgmt = &glusterd3_1_mgmt_prog; - ret = 0; - } - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "%s (%"PRId64":%"PRId64") not supported", - trav->progname, trav->prognum, - trav->progver); - } - trav = trav->next; - } + ret = 0; - if (!ret && peerinfo->mgmt) { - gf_log ("", GF_LOG_INFO, - "Using Program %s, Num (%d), Version (%d)", - peerinfo->mgmt->progname, peerinfo->mgmt->prognum, - peerinfo->mgmt->progver); - } + if (dict) + dict_unref(dict); + + if (args.hndsk.hndsk_val) + free(args.hndsk.hndsk_val); + + if (rsp.hndsk.hndsk_val) + GF_FREE(rsp.hndsk.hndsk_val); + + if (args_dict) + dict_unref(args_dict); + + return ret; +} + +int +glusterd_mgmt_hndsk_versions(rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler(req, __glusterd_mgmt_hndsk_versions); +} + +int +__glusterd_mgmt_hndsk_versions_ack(rpcsvc_request_t *req) +{ + dict_t *clnt_dict = NULL; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + int ret = -1; + int op_errno = EINVAL; + int peer_op_version = 0; + gf_mgmt_hndsk_req args = { + { + 0, + }, + }; + gf_mgmt_hndsk_rsp rsp = { + 0, + }; + + this = THIS; + conf = this->private; + + ret = xdr_to_generic(req->msg[0], &args, (xdrproc_t)xdr_gf_mgmt_hndsk_req); + if (ret < 0) { + // failed to decode msg; + req->rpc_err = GARBAGE_ARGS; + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL); + goto out; + } + + GF_PROTOCOL_DICT_UNSERIALIZE(this, clnt_dict, args.hndsk.hndsk_val, + (args.hndsk.hndsk_len), ret, op_errno, out); + + ret = dict_get_int32(clnt_dict, GD_OP_VERSION_KEY, &peer_op_version); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED, + "failed to get the op-version key peer=%s", + req->trans->peerinfo.identifier); + goto out; + } + + ret = gd_validate_cluster_op_version(this, peer_op_version, + req->trans->peerinfo.identifier); + if (ret) + goto out; + + /* As this is ACK from the Cluster for the versions supported, + can set the op-version of 'this' glusterd to the one + received. */ + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_VERS_INFO, + "using the op-version %d", peer_op_version); + conf->op_version = peer_op_version; + ret = glusterd_store_global_info(this); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLOBAL_OP_VERSION_SET_FAIL, + "Failed to store op-version"); out: - return ret; + rsp.op_ret = ret; + rsp.op_errno = op_errno; + + glusterd_submit_reply(req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gf_mgmt_hndsk_rsp); + + ret = 0; + + if (clnt_dict) + dict_unref(clnt_dict); + + if (args.hndsk.hndsk_val) + free(args.hndsk.hndsk_val); + + return ret; } int -glusterd_peer_dump_version_cbk (struct rpc_req *req, struct iovec *iov, - int count, void *myframe) +glusterd_mgmt_hndsk_versions_ack(rpcsvc_request_t *req) { - int ret = -1; - gf_dump_rsp rsp = {0,}; - xlator_t *this = NULL; - gf_prog_detail *trav = NULL; - gf_prog_detail *next = NULL; - call_frame_t *frame = NULL; - glusterd_peerinfo_t *peerinfo = NULL; - glusterd_peerctx_t *peerctx = NULL; - - this = THIS; - frame = myframe; - peerctx = frame->local; - peerinfo = peerctx->peerinfo; - - if (-1 == req->rpc_status) { - gf_log ("", GF_LOG_ERROR, - "error through RPC layer, retry again later"); - goto out; + return glusterd_big_locked_handler(req, __glusterd_mgmt_hndsk_versions_ack); +} + +int +__server_get_volume_info(rpcsvc_request_t *req) +{ + int ret = -1; + int32_t op_errno = ENOENT; + gf_get_volume_info_req vol_info_req = {{ + 0, + }}; + gf_get_volume_info_rsp vol_info_rsp = { + 0, + }; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + dict_t *dict = NULL; + dict_t *dict_rsp = NULL; + char *volume_id_str = NULL; + int32_t flags = 0; + + xlator_t *this = THIS; + GF_ASSERT(this); + + ret = xdr_to_generic(req->msg[0], &vol_info_req, + (xdrproc_t)xdr_gf_get_volume_info_req); + if (ret < 0) { + /* failed to decode msg */ + req->rpc_err = GARBAGE_ARGS; + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL); + goto out; + } + gf_smsg(this->name, GF_LOG_INFO, 0, GD_MSG_VOL_INFO_REQ_RECVD, NULL); + + if (vol_info_req.dict.dict_len) { + /* Unserialize the dictionary */ + dict = dict_new(); + if (!dict) { + gf_smsg(this->name, GF_LOG_WARNING, ENOMEM, GD_MSG_DICT_CREATE_FAIL, + NULL); + op_errno = ENOMEM; + ret = -1; + goto out; } - ret = xdr_to_dump_rsp (*iov, &rsp); + ret = dict_unserialize(vol_info_req.dict.dict_val, + vol_info_req.dict.dict_len, &dict); if (ret < 0) { - gf_log ("", GF_LOG_ERROR, "failed to decode XDR"); - goto out; + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + NULL); + op_errno = -ret; + ret = -1; + goto out; + } else { + dict->extra_stdfree = vol_info_req.dict.dict_val; } - if (-1 == rsp.op_ret) { - gf_log (frame->this->name, GF_LOG_ERROR, - "failed to get the 'versions' from remote server"); - goto out; + } + + ret = dict_get_int32(dict, "flags", &flags); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=flags", NULL); + op_errno = -ret; + ret = -1; + goto out; + } + + if (!flags) { + /* Nothing to query about. Just return success */ + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_FLAG_SET, NULL); + ret = 0; + goto out; + } + + ret = dict_get_str(dict, "volname", &volname); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=volname", NULL); + op_errno = EINVAL; + ret = -1; + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_VOLINFO_GET_FAIL, + "Volname=%s", volname, NULL); + op_errno = EINVAL; + ret = -1; + goto out; + } + + if (flags & (int32_t)GF_GET_VOLUME_UUID) { + volume_id_str = gf_strdup(uuid_utoa(volinfo->volume_id)); + if (!volume_id_str) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED, + NULL); + op_errno = ENOMEM; + ret = -1; + goto out; } - /* Make sure we assign the proper program to peer */ - ret = glusterd_set_clnt_mgmt_program (peerinfo, rsp.prog); + dict_rsp = dict_new(); + if (!dict_rsp) { + gf_smsg(this->name, GF_LOG_WARNING, ENOMEM, GD_MSG_DICT_CREATE_FAIL, + NULL); + op_errno = ENOMEM; + GF_FREE(volume_id_str); + ret = -1; + goto out; + } + ret = dict_set_dynstr(dict_rsp, "volume_id", volume_id_str); if (ret) { - gf_log ("", GF_LOG_WARNING, "failed to set the mgmt program"); - goto out; + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=volume_id", NULL); + op_errno = -ret; + ret = -1; + goto out; } + } + ret = dict_allocate_and_serialize(dict_rsp, &vol_info_rsp.dict.dict_val, + &vol_info_rsp.dict.dict_len); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); + op_errno = -ret; + ret = -1; + goto out; + } + +out: + vol_info_rsp.op_ret = ret; + vol_info_rsp.op_errno = op_errno; + vol_info_rsp.op_errstr = ""; + glusterd_submit_reply(req, &vol_info_rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gf_get_volume_info_rsp); + ret = 0; + + if (dict) { + dict_unref(dict); + } + + if (dict_rsp) { + dict_unref(dict_rsp); + } + + if (vol_info_rsp.dict.dict_val) { + GF_FREE(vol_info_rsp.dict.dict_val); + } + return ret; +} - ret = default_notify (this, GF_EVENT_CHILD_UP, NULL); +int +server_get_volume_info(rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler(req, __server_get_volume_info); +} + +/* + * glusterd function to get the list of snapshot names and uuids + */ +int +__server_get_snap_info(rpcsvc_request_t *req) +{ + int ret = -1; + int op_errno = ENOENT; + gf_getsnap_name_uuid_req snap_info_req = {{ + 0, + }}; + gf_getsnap_name_uuid_rsp snap_info_rsp = { + 0, + }; + dict_t *dict = NULL; + dict_t *dict_rsp = NULL; + char *volname = NULL; + + GF_ASSERT(req); + + ret = xdr_to_generic(req->msg[0], &snap_info_req, + (xdrproc_t)xdr_gf_getsnap_name_uuid_req); + if (ret < 0) { + req->rpc_err = GARBAGE_ARGS; + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, + "Failed to decode management handshake response"); + goto out; + } + + if (snap_info_req.dict.dict_len) { + dict = dict_new(); + if (!dict) { + gf_smsg("glusterd", GF_LOG_WARNING, ENOMEM, GD_MSG_DICT_CREATE_FAIL, + NULL); + op_errno = ENOMEM; + ret = -1; + goto out; + } - if (GD_MODE_ON == peerctx->args.mode) { - ret = glusterd_event_connected_inject (peerctx); - peerctx->args.req = NULL; - } else if (GD_MODE_SWITCH_ON == peerctx->args.mode) { - peerctx->args.mode = GD_MODE_ON; + ret = dict_unserialize(snap_info_req.dict.dict_val, + snap_info_req.dict.dict_len, &dict); + if (ret < 0) { + gf_msg("glusterd", GF_LOG_ERROR, EINVAL, + GD_MSG_DICT_UNSERIALIZE_FAIL, + "Failed to unserialize dictionary"); + op_errno = EINVAL; + ret = -1; + goto out; } else { - gf_log ("", GF_LOG_WARNING, "unknown mode %d", - peerctx->args.mode); + dict->extra_stdfree = snap_info_req.dict.dict_val; } + } + + ret = dict_get_str(dict, "volname", &volname); + if (ret) { + op_errno = EINVAL; + gf_msg("glusterd", GF_LOG_ERROR, EINVAL, GD_MSG_DICT_GET_FAILED, + "Failed to retrieve volname"); + ret = -1; + goto out; + } + + dict_rsp = dict_new(); + if (!dict_rsp) { + gf_smsg("glusterd", GF_LOG_WARNING, ENOMEM, GD_MSG_DICT_CREATE_FAIL, + NULL); + op_errno = ENOMEM; + ret = -1; + goto out; + } + + ret = glusterd_snapshot_get_volnames_uuids(dict_rsp, volname, + &snap_info_rsp); + + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND, + "Error getting snapshot volume names and uuids : %s", volname); + op_errno = EINVAL; + } - glusterd_friend_sm (); - glusterd_op_sm (); +out: + snap_info_rsp.op_ret = ret; + snap_info_rsp.op_errno = op_errno; + snap_info_rsp.op_errstr = ""; + glusterd_submit_reply(req, &snap_info_rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gf_getsnap_name_uuid_rsp); + + if (dict) { + dict_unref(dict); + } + + if (dict_rsp) { + dict_unref(dict_rsp); + } + + if (snap_info_rsp.dict.dict_val) { + GF_FREE(snap_info_rsp.dict.dict_val); + } + + return 0; +} + +int +server_get_snap_info(rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler(req, __server_get_snap_info); +} + +static rpcsvc_actor_t gluster_handshake_actors[GF_HNDSK_MAXVALUE] = { + [GF_HNDSK_NULL] = {"NULL", NULL, NULL, GF_HNDSK_NULL, DRC_NA, 0}, + [GF_HNDSK_GETSPEC] = {"GETSPEC", server_getspec, NULL, GF_HNDSK_GETSPEC, + DRC_NA, 0}, + [GF_HNDSK_EVENT_NOTIFY] = {"EVENTNOTIFY", server_event_notify, NULL, + GF_HNDSK_EVENT_NOTIFY, DRC_NA, 0}, + [GF_HNDSK_GET_VOLUME_INFO] = {"GETVOLUMEINFO", server_get_volume_info, NULL, + GF_HNDSK_GET_VOLUME_INFO, DRC_NA, 0}, + [GF_HNDSK_GET_SNAPSHOT_INFO] = {"GETSNAPINFO", server_get_snap_info, NULL, + GF_HNDSK_GET_SNAPSHOT_INFO, DRC_NA, 0}, +}; + +struct rpcsvc_program gluster_handshake_prog = { + .progname = "Gluster Handshake", + .prognum = GLUSTER_HNDSK_PROGRAM, + .progver = GLUSTER_HNDSK_VERSION, + .actors = gluster_handshake_actors, + .numactors = GF_HNDSK_MAXVALUE, +}; + +/* A minimal RPC program just for the cli getspec command */ +static rpcsvc_actor_t gluster_cli_getspec_actors[GF_HNDSK_MAXVALUE] = { + [GF_HNDSK_GETSPEC] = {"GETSPEC", server_getspec, NULL, GF_HNDSK_GETSPEC, + DRC_NA, 0}, +}; + +struct rpcsvc_program gluster_cli_getspec_prog = { + .progname = "Gluster Handshake (CLI Getspec)", + .prognum = GLUSTER_HNDSK_PROGRAM, + .progver = GLUSTER_HNDSK_VERSION, + .actors = gluster_cli_getspec_actors, + .numactors = GF_HNDSK_MAXVALUE, +}; + +static char *glusterd_dump_proc[GF_DUMP_MAXVALUE] = { + [GF_DUMP_NULL] = "NULL", + [GF_DUMP_DUMP] = "DUMP", + [GF_DUMP_PING] = "PING", +}; + +static rpc_clnt_prog_t glusterd_dump_prog = { + .progname = "GLUSTERD-DUMP", + .prognum = GLUSTER_DUMP_PROGRAM, + .progver = GLUSTER_DUMP_VERSION, + .procnames = glusterd_dump_proc, +}; + +static rpcsvc_actor_t glusterd_mgmt_hndsk_actors[GD_MGMT_HNDSK_MAXVALUE] = { + [GD_MGMT_HNDSK_NULL] = {"NULL", NULL, NULL, GD_MGMT_HNDSK_NULL, DRC_NA, 0}, + [GD_MGMT_HNDSK_VERSIONS] = {"MGMT-VERS", glusterd_mgmt_hndsk_versions, NULL, + GD_MGMT_HNDSK_VERSIONS, DRC_NA, 0}, + [GD_MGMT_HNDSK_VERSIONS_ACK] = {"MGMT-VERS-ACK", + glusterd_mgmt_hndsk_versions_ack, NULL, + GD_MGMT_HNDSK_VERSIONS_ACK, DRC_NA, 0}, +}; + +struct rpcsvc_program glusterd_mgmt_hndsk_prog = { + .progname = "Gluster MGMT Handshake", + .prognum = GD_MGMT_HNDSK_PROGRAM, + .progver = GD_MGMT_HNDSK_VERSION, + .actors = glusterd_mgmt_hndsk_actors, + .numactors = GD_MGMT_HNDSK_MAXVALUE, +}; + +static char *glusterd_mgmt_hndsk_proc[GD_MGMT_HNDSK_MAXVALUE] = { + [GD_MGMT_HNDSK_NULL] = "NULL", + [GD_MGMT_HNDSK_VERSIONS] = "MGMT-VERS", + [GD_MGMT_HNDSK_VERSIONS_ACK] = "MGMT-VERS-ACK", +}; + +static rpc_clnt_prog_t gd_clnt_mgmt_hndsk_prog = { + .progname = "Gluster MGMT Handshake", + .prognum = GD_MGMT_HNDSK_PROGRAM, + .progver = GD_MGMT_HNDSK_VERSION, + .procnames = glusterd_mgmt_hndsk_proc, +}; + +static int +glusterd_event_connected_inject(glusterd_peerctx_t *peerctx) +{ + GF_ASSERT(peerctx); + + glusterd_friend_sm_event_t *event = NULL; + glusterd_probe_ctx_t *ctx = NULL; + int ret = -1; + glusterd_peerinfo_t *peerinfo = NULL; + + ret = glusterd_friend_sm_new_event(GD_FRIEND_EVENT_CONNECTED, &event); + + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_EVENT_NEW_GET_FAIL, + "Unable to get new event"); + goto out; + } + + ctx = GF_CALLOC(1, sizeof(*ctx), gf_gld_mt_probe_ctx_t); + + if (!ctx) { + ret = -1; + gf_msg("glusterd", GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "Memory not available"); + goto out; + } + + RCU_READ_LOCK; + + peerinfo = glusterd_peerinfo_find_by_generation(peerctx->peerinfo_gen); + if (!peerinfo) { + RCU_READ_UNLOCK; + ret = -1; + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_PEER_NOT_FOUND, + "Could not find peer %s(%s)", peerctx->peername, + uuid_utoa(peerctx->peerid)); + GF_FREE(ctx); + goto out; + } + ctx->hostname = gf_strdup(peerinfo->hostname); + ctx->port = peerinfo->port; + ctx->req = peerctx->args.req; + ctx->dict = peerctx->args.dict; + + event->peername = gf_strdup(peerinfo->hostname); + gf_uuid_copy(event->peerid, peerinfo->uuid); + event->ctx = ctx; + + ret = glusterd_friend_sm_inject_event(event); + + RCU_READ_UNLOCK; + + if (ret) + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_EVENT_INJECT_FAIL, + "Unable to inject " + "EVENT_CONNECTED ret = %d", + ret); - ret = 0; out: + gf_msg_debug("glusterd", 0, "returning %d", ret); + return ret; +} - /* don't use GF_FREE, buffer was allocated by libc */ - if (rsp.prog) { - trav = rsp.prog; - while (trav) { - next = trav->next; - free (trav->progname); - free (trav); - trav = next; - } - } +int +gd_validate_peer_op_version(xlator_t *this, glusterd_peerinfo_t *peerinfo, + dict_t *dict, char **errstr) +{ + int ret = -1; + glusterd_conf_t *conf = NULL; + int32_t peer_op_version = 0; + int32_t peer_min_op_version = 0; + int32_t peer_max_op_version = 0; + + if (!dict) { + gf_smsg("glusterd", GF_LOG_WARNING, ENOMEM, GD_MSG_DICT_CREATE_FAIL, + NULL); + goto out; + } + + if (!this) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_XLATOR_NOT_DEFINED, + NULL); + goto out; + } + + if (!peerinfo) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); + goto out; + } + + conf = this->private; + + ret = dict_get_int32(dict, GD_OP_VERSION_KEY, &peer_op_version); + if (ret) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=%s", GD_OP_VERSION_KEY, NULL); + goto out; + } + + ret = dict_get_int32(dict, GD_MAX_OP_VERSION_KEY, &peer_max_op_version); + if (ret) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=%s", GD_MAX_OP_VERSION_KEY, NULL); + goto out; + } + + ret = dict_get_int32(dict, GD_MIN_OP_VERSION_KEY, &peer_min_op_version); + if (ret) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=%s", GD_MIN_OP_VERSION_KEY, NULL); + goto out; + } + + ret = -1; + /* Check if peer can support our op_version */ + if ((peer_max_op_version < conf->op_version) || + (peer_min_op_version > conf->op_version)) { + ret = gf_asprintf(errstr, + "Peer %s does not support required " + "op-version", + peerinfo->hostname); + ret = -1; + goto out; + } + + ret = 0; +out: + if (peerinfo) + gf_msg_debug((this ? this->name : "glusterd"), 0, "Peer %s %s", + peerinfo->hostname, ((ret < 0) ? "rejected" : "accepted")); + return ret; +} + +int +__glusterd_mgmt_hndsk_version_ack_cbk(struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int ret = -1; + gf_mgmt_hndsk_rsp rsp = { + 0, + }; + xlator_t *this = NULL; + call_frame_t *frame = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_peerctx_t *peerctx = NULL; + char msg[64] = { + 0, + }; + + this = THIS; + frame = myframe; + peerctx = frame->local; + + RCU_READ_LOCK; + peerinfo = glusterd_peerinfo_find_by_generation(peerctx->peerinfo_gen); + if (!peerinfo) { + gf_msg_debug(this->name, 0, "Could not find peer %s(%s)", + peerctx->peername, uuid_utoa(peerctx->peerid)); + ret = -1; + goto out; + } + + if (-1 == req->rpc_status) { + snprintf(msg, sizeof(msg), + "Error through RPC layer, retry again later"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RPC_LAYER_ERROR, "%s", msg); + peerctx->errstr = gf_strdup(msg); + goto out; + } + + ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_mgmt_hndsk_rsp); + if (ret < 0) { + snprintf(msg, sizeof(msg), "Failed to decode XDR"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, "%s", msg); + peerctx->errstr = gf_strdup(msg); + goto out; + } + + if (-1 == rsp.op_ret) { + ret = -1; + snprintf(msg, sizeof(msg), + "Failed to get handshake ack from remote server"); + gf_msg(frame->this->name, GF_LOG_ERROR, 0, GD_MSG_NO_HANDSHAKE_ACK, + "%s", msg); + peerctx->errstr = gf_strdup(msg); + goto out; + } + + /* TODO: this is hardcoded as of now, but I don't forsee any problems + * with this as long as we are properly handshaking operating versions + */ + peerinfo->mgmt = &gd_mgmt_prog; + peerinfo->peer = &gd_peer_prog; + peerinfo->mgmt_v3 = &gd_mgmt_v3_prog; + + ret = default_notify(this, GF_EVENT_CHILD_UP, NULL); + + if (GD_MODE_ON == peerctx->args.mode) { + (void)glusterd_event_connected_inject(peerctx); + peerctx->args.req = NULL; + } else if (GD_MODE_SWITCH_ON == peerctx->args.mode) { + peerctx->args.mode = GD_MODE_ON; + } else { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_UNKNOWN_MODE, + "unknown mode %d", peerctx->args.mode); + } + + ret = 0; +out: + + if (ret != 0 && peerinfo) + rpc_transport_disconnect(peerinfo->rpc->conn.trans, _gf_false); + + RCU_READ_UNLOCK; + + frame->local = NULL; + STACK_DESTROY(frame->root); + + if (rsp.hndsk.hndsk_val) + free(rsp.hndsk.hndsk_val); + + glusterd_friend_sm(); + + return 0; +} + +int +glusterd_mgmt_hndsk_version_ack_cbk(struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk(req, iov, count, myframe, + __glusterd_mgmt_hndsk_version_ack_cbk); +} +int +__glusterd_mgmt_hndsk_version_cbk(struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int ret = -1; + int op_errno = EINVAL; + gf_mgmt_hndsk_rsp rsp = { + 0, + }; + gf_mgmt_hndsk_req arg = {{ + 0, + }}; + xlator_t *this = NULL; + call_frame_t *frame = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_peerctx_t *peerctx = NULL; + dict_t *dict = NULL; + dict_t *rsp_dict = NULL; + glusterd_conf_t *conf = NULL; + char msg[64] = { + 0, + }; + + this = THIS; + conf = this->private; + frame = myframe; + peerctx = frame->local; + + RCU_READ_LOCK; + + peerinfo = glusterd_peerinfo_find_by_generation(peerctx->peerinfo_gen); + if (!peerinfo) { + ret = -1; + gf_msg_debug(this->name, 0, "Could not find peer %s(%s)", + peerctx->peername, uuid_utoa(peerctx->peerid)); + goto out; + } + + if (-1 == req->rpc_status) { + ret = -1; + snprintf(msg, sizeof(msg), + "Error through RPC layer, retry again later"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RPC_LAYER_ERROR, "%s", msg); + peerctx->errstr = gf_strdup(msg); + goto out; + } + + ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_mgmt_hndsk_rsp); + if (ret < 0) { + snprintf(msg, sizeof(msg), + "Failed to decode management " + "handshake response"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, "%s", msg); + peerctx->errstr = gf_strdup(msg); + goto out; + } + + GF_PROTOCOL_DICT_UNSERIALIZE(this, dict, rsp.hndsk.hndsk_val, + rsp.hndsk.hndsk_len, ret, op_errno, out); + + op_errno = rsp.op_errno; + if (-1 == rsp.op_ret) { + gf_msg(this->name, GF_LOG_ERROR, op_errno, GD_MSG_VERS_GET_FAIL, + "failed to get the 'versions' from peer (%s)", + req->conn->trans->peerinfo.identifier); + goto out; + } + + /* Check if peer can be part of cluster */ + ret = gd_validate_peer_op_version(this, peerinfo, dict, &peerctx->errstr); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OP_VERSION_MISMATCH, + "failed to validate the operating version of peer (%s)", + peerinfo->hostname); + goto out; + } + + rsp_dict = dict_new(); + if (!rsp_dict) + goto out; + + ret = dict_set_int32(rsp_dict, GD_OP_VERSION_KEY, conf->op_version); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "failed to set operating version in dict"); + goto out; + } + + GF_PROTOCOL_DICT_SERIALIZE(this, rsp_dict, (&arg.hndsk.hndsk_val), + arg.hndsk.hndsk_len, op_errno, out); + + ret = glusterd_submit_request( + peerinfo->rpc, &arg, frame, &gd_clnt_mgmt_hndsk_prog, + GD_MGMT_HNDSK_VERSIONS_ACK, NULL, this, + glusterd_mgmt_hndsk_version_ack_cbk, (xdrproc_t)xdr_gf_mgmt_hndsk_req); + +out: + if (ret) { frame->local = NULL; - STACK_DESTROY (frame->root); + STACK_DESTROY(frame->root); + if (peerinfo) + rpc_transport_disconnect(peerinfo->rpc->conn.trans, _gf_false); + } - if (ret != 0) - rpc_transport_disconnect (peerinfo->rpc->conn.trans); + RCU_READ_UNLOCK; - return 0; + if (rsp.hndsk.hndsk_val) + free(rsp.hndsk.hndsk_val); + + if (arg.hndsk.hndsk_val) + GF_FREE(arg.hndsk.hndsk_val); + + if (dict) + dict_unref(dict); + + if (rsp_dict) + dict_unref(rsp_dict); + + return 0; } +int +glusterd_mgmt_hndsk_version_cbk(struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk(req, iov, count, myframe, + __glusterd_mgmt_hndsk_version_cbk); +} int -glusterd_peer_handshake (xlator_t *this, struct rpc_clnt *rpc, - glusterd_peerctx_t *peerctx) +glusterd_mgmt_handshake(xlator_t *this, glusterd_peerctx_t *peerctx) { - call_frame_t *frame = NULL; - gf_dump_req req = {0,}; - int ret = -1; + call_frame_t *frame = NULL; + gf_mgmt_hndsk_req req = { + { + 0, + }, + }; + glusterd_peerinfo_t *peerinfo = NULL; + dict_t *req_dict = NULL; + int ret = -1; + + frame = create_frame(this, this->ctx->pool); + if (!frame) { + gf_smsg("glusterd", GF_LOG_WARNING, errno, GD_MSG_FRAME_CREATE_FAIL, + NULL); + goto out; + } + + frame->local = peerctx; + + req_dict = dict_new(); + if (!req_dict) { + gf_smsg("glusterd", GF_LOG_WARNING, ENOMEM, GD_MSG_DICT_CREATE_FAIL, + NULL); + goto out; + } + + ret = dict_set_dynstr(req_dict, GD_PEER_ID_KEY, + gf_strdup(uuid_utoa(MY_UUID))); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "failed to set peer ID in dict"); + goto out; + } + + GF_PROTOCOL_DICT_SERIALIZE(this, req_dict, (&req.hndsk.hndsk_val), + req.hndsk.hndsk_len, ret, out); + + RCU_READ_LOCK; + + peerinfo = glusterd_peerinfo_find_by_generation(peerctx->peerinfo_gen); + if (!peerinfo) { + RCU_READ_UNLOCK; + gf_msg_debug(THIS->name, 0, "Could not find peer %s(%s)", + peerctx->peername, uuid_utoa(peerctx->peerid)); + goto out; + } + + ret = glusterd_submit_request( + peerinfo->rpc, &req, frame, &gd_clnt_mgmt_hndsk_prog, + GD_MGMT_HNDSK_VERSIONS, NULL, this, glusterd_mgmt_hndsk_version_cbk, + (xdrproc_t)xdr_gf_mgmt_hndsk_req); + + RCU_READ_UNLOCK; + + ret = 0; - frame = create_frame (this, this->ctx->pool); - if (!frame) - goto out; +out: + if (req_dict) + dict_unref(req_dict); + + if (ret && frame) + STACK_DESTROY(frame->root); + + return ret; +} + +int +glusterd_set_clnt_mgmt_program(glusterd_peerinfo_t *peerinfo, + gf_prog_detail *prog) +{ + gf_prog_detail *trav = NULL; + int ret = -1; + + if (!peerinfo || !prog) + goto out; + + trav = prog; - frame->local = peerctx; + while (trav) { + ret = -1; + if ((gd_mgmt_prog.prognum == trav->prognum) && + (gd_mgmt_prog.progver == trav->progver)) { + peerinfo->mgmt = &gd_mgmt_prog; + ret = 0; + } + + if ((gd_peer_prog.prognum == trav->prognum) && + (gd_peer_prog.progver == trav->progver)) { + peerinfo->peer = &gd_peer_prog; + ret = 0; + } + + if (ret) { + gf_msg_debug("glusterd", 0, + "%s (%" PRId64 ":%" PRId64 ") not supported", + trav->progname, trav->prognum, trav->progver); + } + + trav = trav->next; + } + + if (peerinfo->mgmt) { + gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_VERS_INFO, + "Using Program %s, Num (%d), Version (%d)", + peerinfo->mgmt->progname, peerinfo->mgmt->prognum, + peerinfo->mgmt->progver); + } + + if (peerinfo->peer) { + gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_VERS_INFO, + "Using Program %s, Num (%d), Version (%d)", + peerinfo->peer->progname, peerinfo->peer->prognum, + peerinfo->peer->progver); + } + + if (peerinfo->mgmt_v3) { + gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_VERS_INFO, + "Using Program %s, Num (%d), Version (%d)", + peerinfo->mgmt_v3->progname, peerinfo->mgmt_v3->prognum, + peerinfo->mgmt_v3->progver); + } + + ret = 0; +out: + return ret; +} + +static gf_boolean_t +_mgmt_hndsk_prog_present(gf_prog_detail *prog) +{ + gf_boolean_t ret = _gf_false; + gf_prog_detail *trav = NULL; + + GF_ASSERT(prog); + + trav = prog; + + while (trav) { + if ((trav->prognum == GD_MGMT_HNDSK_PROGRAM) && + (trav->progver == GD_MGMT_HNDSK_VERSION)) { + ret = _gf_true; + goto out; + } + trav = trav->next; + } +out: + return ret; +} + +int +__glusterd_peer_dump_version_cbk(struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int ret = -1; + gf_dump_rsp rsp = { + 0, + }; + xlator_t *this = NULL; + gf_prog_detail *trav = NULL; + gf_prog_detail *next = NULL; + call_frame_t *frame = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_peerctx_t *peerctx = NULL; + glusterd_conf_t *conf = NULL; + char msg[1024] = { + 0, + }; + + this = THIS; + conf = this->private; + frame = myframe; + peerctx = frame->local; + + RCU_READ_LOCK; + + peerinfo = glusterd_peerinfo_find_by_generation(peerctx->peerinfo_gen); + if (!peerinfo) { + gf_msg_debug(this->name, 0, "Couldn't find peer %s(%s)", + peerctx->peername, uuid_utoa(peerctx->peerid)); + goto out; + } + + if (-1 == req->rpc_status) { + snprintf(msg, sizeof(msg), + "Error through RPC layer, retry again later"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RPC_LAYER_ERROR, "%s", msg); + peerctx->errstr = gf_strdup(msg); + goto out; + } + + ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_dump_rsp); + if (ret < 0) { + snprintf(msg, sizeof(msg), "Failed to decode XDR"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, "%s", msg); + peerctx->errstr = gf_strdup(msg); + goto out; + } + if (-1 == rsp.op_ret) { + snprintf(msg, sizeof(msg), + "Failed to get the 'versions' from remote server"); + gf_msg(frame->this->name, GF_LOG_ERROR, 0, GD_MSG_VERS_GET_FAIL, "%s", + msg); + peerctx->errstr = gf_strdup(msg); + goto out; + } + + if (_mgmt_hndsk_prog_present(rsp.prog)) { + gf_msg_debug(this->name, 0, + "Proceeding to op-version handshake with peer %s", + peerinfo->hostname); + ret = glusterd_mgmt_handshake(this, peerctx); + goto out; + } else if (conf->op_version > 1) { + ret = -1; + snprintf(msg, sizeof(msg), + "Peer %s does not support required op-version", + peerinfo->hostname); + peerctx->errstr = gf_strdup(msg); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VERSION_UNSUPPORTED, "%s", + msg); + goto out; + } + + /* Make sure we assign the proper program to peer */ + ret = glusterd_set_clnt_mgmt_program(peerinfo, rsp.prog); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_MGMT_PGM_SET_FAIL, + "failed to set the mgmt program"); + goto out; + } + + ret = default_notify(this, GF_EVENT_CHILD_UP, NULL); + + if (GD_MODE_ON == peerctx->args.mode) { + (void)glusterd_event_connected_inject(peerctx); + peerctx->args.req = NULL; + } else if (GD_MODE_SWITCH_ON == peerctx->args.mode) { + peerctx->args.mode = GD_MODE_ON; + } else { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_UNKNOWN_MODE, + "unknown mode %d", peerctx->args.mode); + } + + ret = 0; + +out: + if (ret != 0 && peerinfo) + rpc_transport_disconnect(peerinfo->rpc->conn.trans, _gf_false); + + RCU_READ_UNLOCK; + + glusterd_friend_sm(); + glusterd_op_sm(); + + /* don't use GF_FREE, buffer was allocated by libc */ + if (rsp.prog) { + trav = rsp.prog; + while (trav) { + next = trav->next; + free(trav->progname); + free(trav); + trav = next; + } + } - req.gfs_id = 0xcafe; + frame->local = NULL; + STACK_DESTROY(frame->root); - ret = glusterd_submit_request (peerctx->peerinfo->rpc, &req, frame, - &glusterd_dump_prog, GF_DUMP_DUMP, - NULL, xdr_from_dump_req, this, - glusterd_peer_dump_version_cbk); + return 0; +} + +int +glusterd_peer_dump_version_cbk(struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk(req, iov, count, myframe, + __glusterd_peer_dump_version_cbk); +} + +int +glusterd_peer_dump_version(xlator_t *this, struct rpc_clnt *rpc, + glusterd_peerctx_t *peerctx) +{ + call_frame_t *frame = NULL; + gf_dump_req req = { + 0, + }; + glusterd_peerinfo_t *peerinfo = NULL; + int ret = -1; + + frame = create_frame(this, this->ctx->pool); + if (!frame) { + gf_smsg(this->name, GF_LOG_WARNING, errno, GD_MSG_FRAME_CREATE_FAIL, + NULL); + goto out; + } + + frame->local = peerctx; + if (!peerctx) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); + goto out; + } + + RCU_READ_LOCK; + + peerinfo = glusterd_peerinfo_find_by_generation(peerctx->peerinfo_gen); + if (!peerinfo) { + RCU_READ_UNLOCK; + gf_msg_debug(this->name, 0, "Couldn't find peer %s(%s)", + peerctx->peername, uuid_utoa(peerctx->peerid)); + goto out; + } + + req.gfs_id = 0xcafe; + + ret = glusterd_submit_request( + peerinfo->rpc, &req, frame, &glusterd_dump_prog, GF_DUMP_DUMP, NULL, + this, glusterd_peer_dump_version_cbk, (xdrproc_t)xdr_gf_dump_req); + + RCU_READ_UNLOCK; out: - return ret; + if (ret && frame) + STACK_DESTROY(frame->root); + + return ret; } diff --git a/xlators/mgmt/glusterd/src/glusterd-hooks.c b/xlators/mgmt/glusterd/src/glusterd-hooks.c new file mode 100644 index 00000000000..61c0f1c946f --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-hooks.c @@ -0,0 +1,641 @@ +/* + Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include <glusterfs/glusterfs.h> +#include <glusterfs/dict.h> +#include <glusterfs/xlator.h> +#include <glusterfs/logging.h> +#include <glusterfs/run.h> +#include <glusterfs/defaults.h> +#include <glusterfs/syscall.h> +#include <glusterfs/compat.h> +#include <glusterfs/compat-errno.h> +#include "glusterd.h" +#include "glusterd-sm.h" +#include "glusterd-op-sm.h" +#include "glusterd-utils.h" +#include "glusterd-store.h" +#include "glusterd-hooks.h" +#include "glusterd-messages.h" + +#include <fnmatch.h> + +#define EMPTY "" +char glusterd_hook_dirnames[GD_OP_MAX][256] = { + [GD_OP_NONE] = EMPTY, + [GD_OP_CREATE_VOLUME] = "create", + [GD_OP_START_BRICK] = EMPTY, + [GD_OP_STOP_BRICK] = EMPTY, + [GD_OP_DELETE_VOLUME] = "delete", + [GD_OP_START_VOLUME] = "start", + [GD_OP_STOP_VOLUME] = "stop", + [GD_OP_DEFRAG_VOLUME] = EMPTY, + [GD_OP_ADD_BRICK] = "add-brick", + [GD_OP_REMOVE_BRICK] = "remove-brick", + [GD_OP_REPLACE_BRICK] = EMPTY, + [GD_OP_SET_VOLUME] = "set", + [GD_OP_RESET_VOLUME] = "reset", + [GD_OP_SYNC_VOLUME] = EMPTY, + [GD_OP_LOG_ROTATE] = EMPTY, + [GD_OP_GSYNC_CREATE] = "gsync-create", + [GD_OP_GSYNC_SET] = EMPTY, + [GD_OP_PROFILE_VOLUME] = EMPTY, + [GD_OP_QUOTA] = EMPTY, + [GD_OP_STATUS_VOLUME] = EMPTY, + [GD_OP_REBALANCE] = EMPTY, + [GD_OP_HEAL_VOLUME] = EMPTY, + [GD_OP_STATEDUMP_VOLUME] = EMPTY, + [GD_OP_LIST_VOLUME] = EMPTY, + [GD_OP_CLEARLOCKS_VOLUME] = EMPTY, + [GD_OP_DEFRAG_BRICK_VOLUME] = EMPTY, + [GD_OP_RESET_BRICK] = EMPTY, +}; +#undef EMPTY + +static gf_boolean_t +glusterd_is_hook_enabled(char *script) +{ + return (script[0] == 'S' && (fnmatch("*.rpmsave", script, 0) != 0) && + (fnmatch("*.rpmnew", script, 0) != 0)); +} + +int +glusterd_hooks_create_hooks_directory(char *basedir) +{ + int ret = -1; + int op = GD_OP_NONE; + int type = GD_COMMIT_HOOK_NONE; + char version_dir[PATH_MAX] = { + 0, + }; + char path[PATH_MAX] = { + 0, + }; + char *cmd_subdir = NULL; + char type_subdir[GD_COMMIT_HOOK_MAX][256] = {{ + 0, + }, + "pre", + "post"}; + glusterd_conf_t *priv = NULL; + int32_t len = 0; + + xlator_t *this = NULL; + this = THIS; + GF_ASSERT(this); + priv = this->private; + + snprintf(path, sizeof(path), "%s/hooks", basedir); + ret = mkdir_p(path, 0755, _gf_true); + if (ret) { + gf_smsg(this->name, GF_LOG_CRITICAL, errno, GD_MSG_CREATE_DIR_FAILED, + "Path=%s", path, NULL); + goto out; + } + + GLUSTERD_GET_HOOKS_DIR(version_dir, GLUSTERD_HOOK_VER, priv); + ret = mkdir_p(version_dir, 0755, _gf_true); + if (ret) { + gf_smsg(this->name, GF_LOG_CRITICAL, errno, GD_MSG_CREATE_DIR_FAILED, + "Directory=%s", version_dir, NULL); + goto out; + } + + for (op = GD_OP_NONE + 1; op < GD_OP_MAX; op++) { + cmd_subdir = glusterd_hooks_get_hooks_cmd_subdir(op); + if (strlen(cmd_subdir) == 0) + continue; + + len = snprintf(path, sizeof(path), "%s/%s", version_dir, cmd_subdir); + if ((len < 0) || (len >= sizeof(path))) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); + ret = -1; + goto out; + } + ret = mkdir_p(path, 0755, _gf_true); + if (ret) { + gf_smsg(this->name, GF_LOG_CRITICAL, errno, + GD_MSG_CREATE_DIR_FAILED, "Path=%s", path, NULL); + goto out; + } + + for (type = GD_COMMIT_HOOK_PRE; type < GD_COMMIT_HOOK_MAX; type++) { + len = snprintf(path, sizeof(path), "%s/%s/%s", version_dir, + cmd_subdir, type_subdir[type]); + if ((len < 0) || (len >= sizeof(path))) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, + NULL); + ret = -1; + goto out; + } + ret = mkdir_p(path, 0755, _gf_true); + if (ret) { + gf_smsg(this->name, GF_LOG_CRITICAL, errno, + GD_MSG_CREATE_DIR_FAILED, "Path=%s", path, NULL); + goto out; + } + } + } + + ret = 0; +out: + return ret; +} + +char * +glusterd_hooks_get_hooks_cmd_subdir(glusterd_op_t op) +{ + GF_ASSERT((op > GD_OP_NONE) && (op < GD_OP_MAX)); + + return glusterd_hook_dirnames[op]; +} + +void +glusterd_hooks_add_working_dir(runner_t *runner, glusterd_conf_t *priv) +{ + runner_argprintf(runner, "--gd-workdir=%s", priv->workdir); +} + +void +glusterd_hooks_add_op(runner_t *runner, char *op) +{ + runner_argprintf(runner, "--volume-op=%s", op); +} + +void +glusterd_hooks_add_hooks_version(runner_t *runner) +{ + runner_argprintf(runner, "--version=%d", GLUSTERD_HOOK_VER); +} + +static void +glusterd_hooks_add_custom_args(dict_t *dict, runner_t *runner) +{ + char *hooks_args = NULL; + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + GF_VALIDATE_OR_GOTO(this->name, dict, out); + GF_VALIDATE_OR_GOTO(this->name, runner, out); + + ret = dict_get_str(dict, "hooks_args", &hooks_args); + if (ret) + gf_msg_debug(this->name, 0, "No Hooks Arguments."); + else + gf_msg_debug(this->name, 0, "Hooks Args = %s", hooks_args); + + if (hooks_args) + runner_argprintf(runner, "%s", hooks_args); + +out: + return; +} + +int +glusterd_hooks_set_volume_args(dict_t *dict, runner_t *runner) +{ + int i = 0; + int count = 0; + int ret = -1; + int flag = 0; + char query[1024] = { + 0, + }; + char *key = NULL; + char *value = NULL; + char *inet_family = NULL; + xlator_t *this = NULL; + this = THIS; + GF_ASSERT(this); + + ret = dict_get_int32(dict, "count", &count); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=count", NULL); + goto out; + } + + /* This will not happen unless op_ctx + * is corrupted*/ + if (!count) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ENTRY, "count", + NULL); + goto out; + } + + runner_add_arg(runner, "-o"); + for (i = 1; ret == 0; i++) { + snprintf(query, sizeof(query), "key%d", i); + ret = dict_get_str(dict, query, &key); + if (ret) + continue; + + snprintf(query, sizeof(query), "value%d", i); + ret = dict_get_str(dict, query, &value); + if (ret) + continue; + + runner_argprintf(runner, "%s=%s", key, value); + if ((strncmp(key, "cluster.enable-shared-storage", + SLEN("cluster.enable-shared-storage")) == 0 || + strncmp(key, "enable-shared-storage", + SLEN("enable-shared-storage")) == 0) && + strncmp(value, "enable", SLEN("enable")) == 0) + flag = 1; + } + + glusterd_hooks_add_custom_args(dict, runner); + if (flag == 1) { + ret = dict_get_str_sizen(this->options, "transport.address-family", + &inet_family); + if (!ret) { + runner_argprintf(runner, "transport.address-family=%s", + inet_family); + } + } + + ret = 0; +out: + return ret; +} + +static int +glusterd_hooks_add_op_args(runner_t *runner, glusterd_op_t op, dict_t *op_ctx, + glusterd_commit_hook_type_t type) +{ + int vol_count = 0; + gf_boolean_t truth = _gf_false; + glusterd_volinfo_t *voliter = NULL; + glusterd_conf_t *priv = NULL; + int ret = -1; + + priv = THIS->private; + cds_list_for_each_entry(voliter, &priv->volumes, vol_list) + { + if (glusterd_is_volume_started(voliter)) + vol_count++; + } + + ret = 0; + switch (op) { + case GD_OP_START_VOLUME: + if (type == GD_COMMIT_HOOK_PRE && vol_count == 0) + truth = _gf_true; + + else if (type == GD_COMMIT_HOOK_POST && vol_count == 1) + truth = _gf_true; + + else + truth = _gf_false; + + runner_argprintf(runner, "--first=%s", truth ? "yes" : "no"); + + glusterd_hooks_add_hooks_version(runner); + glusterd_hooks_add_op(runner, "start"); + glusterd_hooks_add_working_dir(runner, priv); + + break; + + case GD_OP_STOP_VOLUME: + if (type == GD_COMMIT_HOOK_PRE && vol_count == 1) + truth = _gf_true; + + else if (type == GD_COMMIT_HOOK_POST && vol_count == 0) + truth = _gf_true; + + else + truth = _gf_false; + + runner_argprintf(runner, "--last=%s", truth ? "yes" : "no"); + break; + + case GD_OP_SET_VOLUME: + ret = glusterd_hooks_set_volume_args(op_ctx, runner); + glusterd_hooks_add_working_dir(runner, priv); + break; + + case GD_OP_GSYNC_CREATE: + glusterd_hooks_add_custom_args(op_ctx, runner); + break; + + case GD_OP_ADD_BRICK: + glusterd_hooks_add_hooks_version(runner); + glusterd_hooks_add_op(runner, "add-brick"); + glusterd_hooks_add_working_dir(runner, priv); + break; + + case GD_OP_RESET_VOLUME: + glusterd_hooks_add_hooks_version(runner); + glusterd_hooks_add_op(runner, "reset"); + glusterd_hooks_add_working_dir(runner, priv); + break; + + default: + break; + } + + return ret; +} + +int +glusterd_hooks_run_hooks(char *hooks_path, glusterd_op_t op, dict_t *op_ctx, + glusterd_commit_hook_type_t type) +{ + xlator_t *this = NULL; + runner_t runner = { + 0, + }; + DIR *hookdir = NULL; + struct dirent *entry = NULL; + struct dirent scratch[2] = { + { + 0, + }, + }; + char *volname = NULL; + char **lines = NULL; + int N = 8; /*arbitrary*/ + int lineno = 0; + int line_count = 0; + int ret = -1; + + this = THIS; + + ret = dict_get_str(op_ctx, "volname", &volname); + if (ret) { + gf_msg(this->name, GF_LOG_CRITICAL, errno, GD_MSG_DICT_GET_FAILED, + "Failed to get volname " + "from operation context"); + goto out; + } + + hookdir = sys_opendir(hooks_path); + if (!hookdir) { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED, + "Failed to open dir %s", hooks_path); + goto out; + } + + lines = GF_CALLOC(1, N * sizeof(*lines), gf_gld_mt_charptr); + if (!lines) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); + ret = -1; + goto out; + } + + ret = -1; + line_count = 0; + + while ((entry = sys_readdir(hookdir, scratch))) { + if (gf_irrelevant_entry(entry)) + continue; + if (line_count == N - 1) { + N *= 2; + lines = GF_REALLOC(lines, N * sizeof(char *)); + if (!lines) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, + NULL); + goto out; + } + } + + if (glusterd_is_hook_enabled(entry->d_name)) { + lines[line_count] = gf_strdup(entry->d_name); + line_count++; + } + } + + lines[line_count] = NULL; + lines = GF_REALLOC(lines, (line_count + 1) * sizeof(char *)); + if (!lines) + goto out; + + qsort(lines, line_count, sizeof(*lines), glusterd_compare_lines); + + for (lineno = 0; lineno < line_count; lineno++) { + runinit(&runner); + runner_argprintf(&runner, "%s/%s", hooks_path, lines[lineno]); + /*Add future command line arguments to hook scripts below*/ + runner_argprintf(&runner, "--volname=%s", volname); + ret = glusterd_hooks_add_op_args(&runner, op, op_ctx, type); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_ADD_OP_ARGS_FAIL, + "Failed to add " + "command specific arguments"); + goto out; + } + + ret = runner_run_reuse(&runner); + if (ret) { + runner_log(&runner, this->name, GF_LOG_ERROR, + "Failed to execute script"); + } else { + runner_log(&runner, this->name, GF_LOG_INFO, "Ran script"); + } + runner_end(&runner); + } + + ret = 0; +out: + if (lines) { + for (lineno = 0; lineno < line_count + 1; lineno++) + GF_FREE(lines[lineno]); + + GF_FREE(lines); + } + + if (hookdir) + sys_closedir(hookdir); + + return ret; +} + +int +glusterd_hooks_post_stub_enqueue(char *scriptdir, glusterd_op_t op, + dict_t *op_ctx) +{ + int ret = -1; + glusterd_hooks_stub_t *stub = NULL; + glusterd_hooks_private_t *hooks_priv = NULL; + glusterd_conf_t *conf = NULL; + + conf = THIS->private; + hooks_priv = conf->hooks_priv; + + ret = glusterd_hooks_stub_init(&stub, scriptdir, op, op_ctx); + if (ret) + goto out; + + pthread_mutex_lock(&hooks_priv->mutex); + { + hooks_priv->waitcount++; + cds_list_add_tail(&stub->all_hooks, &hooks_priv->list); + pthread_cond_signal(&hooks_priv->cond); + } + pthread_mutex_unlock(&hooks_priv->mutex); + + ret = 0; +out: + return ret; +} + +int +glusterd_hooks_stub_init(glusterd_hooks_stub_t **stub, char *scriptdir, + glusterd_op_t op, dict_t *op_ctx) +{ + int ret = -1; + glusterd_hooks_stub_t *hooks_stub = NULL; + + xlator_t *this = NULL; + this = THIS; + GF_ASSERT(this); + GF_ASSERT(stub); + if (!stub) + goto out; + + hooks_stub = GF_CALLOC(1, sizeof(*hooks_stub), gf_gld_mt_hooks_stub_t); + if (!hooks_stub) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); + goto out; + } + + CDS_INIT_LIST_HEAD(&hooks_stub->all_hooks); + hooks_stub->op = op; + hooks_stub->scriptdir = gf_strdup(scriptdir); + if (!hooks_stub->scriptdir) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED, + "scriptdir=%s", scriptdir, NULL); + goto out; + } + + hooks_stub->op_ctx = dict_copy_with_ref(op_ctx, hooks_stub->op_ctx); + if (!hooks_stub->op_ctx) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_COPY_FAIL, NULL); + goto out; + } + + *stub = hooks_stub; + ret = 0; +out: + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_HOOK_STUB_INIT_FAIL, + NULL); + glusterd_hooks_stub_cleanup(hooks_stub); + } + + return ret; +} + +void +glusterd_hooks_stub_cleanup(glusterd_hooks_stub_t *stub) +{ + if (!stub) { + gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, GD_MSG_HOOK_STUB_NULL, + "hooks_stub is NULL"); + return; + } + + if (stub->op_ctx) + dict_unref(stub->op_ctx); + + GF_FREE(stub->scriptdir); + + GF_FREE(stub); +} + +static void * +hooks_worker(void *args) +{ + glusterd_conf_t *conf = NULL; + glusterd_hooks_private_t *hooks_priv = NULL; + glusterd_hooks_stub_t *stub = NULL; + + THIS = args; + conf = THIS->private; + hooks_priv = conf->hooks_priv; + + for (;;) { + pthread_mutex_lock(&hooks_priv->mutex); + { + while (cds_list_empty(&hooks_priv->list)) { + pthread_cond_wait(&hooks_priv->cond, &hooks_priv->mutex); + } + stub = cds_list_entry(hooks_priv->list.next, glusterd_hooks_stub_t, + all_hooks); + cds_list_del_init(&stub->all_hooks); + hooks_priv->waitcount--; + } + pthread_mutex_unlock(&hooks_priv->mutex); + + glusterd_hooks_run_hooks(stub->scriptdir, stub->op, stub->op_ctx, + GD_COMMIT_HOOK_POST); + glusterd_hooks_stub_cleanup(stub); + } + + return NULL; +} + +int +glusterd_hooks_priv_init(glusterd_hooks_private_t **new) +{ + int ret = -1; + glusterd_hooks_private_t *hooks_priv = NULL; + + xlator_t *this = NULL; + this = THIS; + GF_ASSERT(this); + + if (!new) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); + goto out; + } + + hooks_priv = GF_CALLOC(1, sizeof(*hooks_priv), gf_gld_mt_hooks_priv_t); + if (!hooks_priv) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); + goto out; + } + + pthread_mutex_init(&hooks_priv->mutex, NULL); + pthread_cond_init(&hooks_priv->cond, NULL); + CDS_INIT_LIST_HEAD(&hooks_priv->list); + hooks_priv->waitcount = 0; + + *new = hooks_priv; + ret = 0; +out: + return ret; +} + +int +glusterd_hooks_spawn_worker(xlator_t *this) +{ + int ret = -1; + glusterd_conf_t *conf = NULL; + glusterd_hooks_private_t *hooks_priv = NULL; + + ret = glusterd_hooks_priv_init(&hooks_priv); + if (ret) + goto out; + + conf = this->private; + conf->hooks_priv = hooks_priv; + ret = gf_thread_create(&hooks_priv->worker, NULL, hooks_worker, + (void *)this, "gdhooks"); + if (ret) + gf_msg(this->name, GF_LOG_CRITICAL, errno, GD_MSG_SPAWN_THREADS_FAIL, + "Failed to spawn post " + "hooks worker thread"); +out: + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-hooks.h b/xlators/mgmt/glusterd/src/glusterd-hooks.h new file mode 100644 index 00000000000..f8b887b9bd7 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-hooks.h @@ -0,0 +1,88 @@ +/* + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _GLUSTERD_HOOKS_H_ +#define _GLUSTERD_HOOKS_H_ + +#include <fnmatch.h> + +#define GLUSTERD_GET_HOOKS_DIR(path, version, priv) \ + do { \ + int32_t len; \ + len = snprintf(path, PATH_MAX, "%s/hooks/%d", priv->workdir, version); \ + if (len < 0) { \ + path[0] = 0; \ + } \ + } while (0) + +#define GLUSTERD_HOOK_VER 1 + +#define GD_HOOKS_SPECIFIC_KEY "user.*" + +typedef enum glusterd_commit_hook_type { + GD_COMMIT_HOOK_NONE = 0, + GD_COMMIT_HOOK_PRE, + GD_COMMIT_HOOK_POST, + GD_COMMIT_HOOK_MAX +} glusterd_commit_hook_type_t; + +typedef struct hooks_private { + struct cds_list_head list; + pthread_mutex_t mutex; + pthread_cond_t cond; + pthread_t worker; + int waitcount; // debug purposes +} glusterd_hooks_private_t; + +typedef struct hooks_stub { + struct cds_list_head all_hooks; + char *scriptdir; + dict_t *op_ctx; + glusterd_op_t op; + +} glusterd_hooks_stub_t; + +static inline gf_boolean_t +is_key_glusterd_hooks_friendly(char *key) +{ + gf_boolean_t is_friendly = _gf_false; + + /* This is very specific to hooks friendly behavior */ + if (fnmatch(GD_HOOKS_SPECIFIC_KEY, key, FNM_NOESCAPE) == 0) { + gf_msg_debug(THIS->name, 0, "user namespace key %s", key); + is_friendly = _gf_true; + } + + return is_friendly; +} + +int +glusterd_hooks_create_hooks_directory(char *basedir); + +char * +glusterd_hooks_get_hooks_cmd_subdir(glusterd_op_t op); + +int +glusterd_hooks_run_hooks(char *hooks_path, glusterd_op_t op, dict_t *op_ctx, + glusterd_commit_hook_type_t type); +int +glusterd_hooks_spawn_worker(xlator_t *this); + +int +glusterd_hooks_stub_init(glusterd_hooks_stub_t **stub, char *scriptdir, + glusterd_op_t op, dict_t *op_ctx); +void +glusterd_hooks_stub_cleanup(glusterd_hooks_stub_t *stub); + +int +glusterd_hooks_post_stub_enqueue(char *scriptdir, glusterd_op_t op, + dict_t *op_ctx); +int +glusterd_hooks_priv_init(glusterd_hooks_private_t **new); +#endif diff --git a/xlators/mgmt/glusterd/src/glusterd-locks.c b/xlators/mgmt/glusterd/src/glusterd-locks.c new file mode 100644 index 00000000000..11523f2854b --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-locks.c @@ -0,0 +1,870 @@ +/* + Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#include <glusterfs/common-utils.h> +#include "cli1-xdr.h" +#include "xdr-generic.h" +#include "glusterd.h" +#include "glusterd-op-sm.h" +#include "glusterd-store.h" +#include "glusterd-utils.h" +#include "glusterd-volgen.h" +#include "glusterd-locks.h" +#include "glusterd-errno.h" +#include <glusterfs/run.h> +#include <glusterfs/syscall.h> +#include "glusterd-messages.h" + +#include <signal.h> + +#define GF_MAX_LOCKING_ENTITIES 3 + +/* Valid entities that the mgmt_v3 lock can hold locks upon * + * To add newer entities to be locked, we can just add more * + * entries to this table along with the type and default value */ +glusterd_valid_entities valid_types[] = { + {"vol", _gf_true}, + {"snap", _gf_false}, + {"global", _gf_false}, + {NULL}, +}; + +/* Checks if the lock request is for a valid entity */ +static gf_boolean_t +glusterd_mgmt_v3_is_type_valid(char *type) +{ + int i = 0; + + GF_ASSERT(type); + + for (i = 0; valid_types[i].type; i++) { + if (!strcmp(type, valid_types[i].type)) { + return _gf_true; + } + } + + return _gf_false; +} + +/* Initialize the global mgmt_v3 lock list(dict) when + * glusterd is spawned */ +int32_t +glusterd_mgmt_v3_lock_init() +{ + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + priv->mgmt_v3_lock = dict_new(); + if (!priv->mgmt_v3_lock) + goto out; + + ret = 0; +out: + return ret; +} + +/* Destroy the global mgmt_v3 lock list(dict) when + * glusterd cleanup is performed */ +void +glusterd_mgmt_v3_lock_fini() +{ + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + if (priv->mgmt_v3_lock) + dict_unref(priv->mgmt_v3_lock); +} + +/* Initialize the global mgmt_v3_timer lock list(dict) when + * glusterd is spawned */ +int32_t +glusterd_mgmt_v3_lock_timer_init() +{ + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, out); + + priv->mgmt_v3_lock_timer = dict_new(); + if (!priv->mgmt_v3_lock_timer) + goto out; + + ret = 0; +out: + return ret; +} + +/* Destroy the global mgmt_v3_timer lock list(dict) when + * glusterd cleanup is performed */ +void +glusterd_mgmt_v3_lock_timer_fini() +{ + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, out); + + if (priv->mgmt_v3_lock_timer) + dict_unref(priv->mgmt_v3_lock_timer); +out: + return; +} + +static int32_t +glusterd_get_mgmt_v3_lock_owner(char *key, uuid_t *uuid) +{ + int32_t ret = -1; + glusterd_mgmt_v3_lock_obj *lock_obj = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + if (!key || !uuid) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, + "key or uuid is null."); + ret = -1; + goto out; + } + + ret = dict_get_bin(priv->mgmt_v3_lock, key, (void **)&lock_obj); + if (!ret) + gf_uuid_copy(*uuid, lock_obj->lock_owner); + + ret = 0; +out: + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +/* This function is called with the locked_count and type, to * + * release all the acquired locks. */ +static int32_t +glusterd_release_multiple_locks_per_entity(dict_t *dict, uuid_t uuid, + int32_t locked_count, char *type) +{ + char name_buf[PATH_MAX] = ""; + char *name = NULL; + int32_t i = -1; + int32_t op_ret = 0; + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(dict); + GF_ASSERT(type); + + if (locked_count == 0) { + gf_msg_debug(this->name, 0, "No %s locked as part of this transaction", + type); + goto out; + } + + /* Release all the locks held */ + for (i = 0; i < locked_count; i++) { + ret = snprintf(name_buf, sizeof(name_buf), "%sname%d", type, i + 1); + + /* Looking for volname1, volname2 or snapname1, * + * as key in the dict snapname2 */ + ret = dict_get_strn(dict, name_buf, ret, &name); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get %s locked_count = %d", name_buf, + locked_count); + op_ret = ret; + continue; + } + + ret = glusterd_mgmt_v3_unlock(name, uuid, type); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_UNLOCK_FAIL, + "Failed to release lock for %s.", name); + op_ret = ret; + } + } + +out: + gf_msg_trace(this->name, 0, "Returning %d", op_ret); + return op_ret; +} + +/* Given the count and type of the entity this function acquires * + * locks on multiple elements of the same entity. For example: * + * If type is "vol" this function tries to acquire locks on multiple * + * volumes */ +static int32_t +glusterd_acquire_multiple_locks_per_entity(dict_t *dict, uuid_t uuid, + uint32_t *op_errno, int32_t count, + char *type) +{ + char name_buf[PATH_MAX] = ""; + char *name = NULL; + int32_t i = -1; + int32_t ret = -1; + int32_t locked_count = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(dict); + GF_ASSERT(type); + + /* Locking one element after other */ + for (i = 0; i < count; i++) { + ret = snprintf(name_buf, sizeof(name_buf), "%sname%d", type, i + 1); + + /* Looking for volname1, volname2 or snapname1, * + * as key in the dict snapname2 */ + ret = dict_get_strn(dict, name_buf, ret, &name); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get %s count = %d", name_buf, count); + break; + } + + ret = glusterd_mgmt_v3_lock(name, uuid, op_errno, type); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_LOCK_GET_FAIL, + "Failed to acquire lock for %s %s " + "on behalf of %s. Reversing " + "this transaction", + type, name, uuid_utoa(uuid)); + break; + } + locked_count++; + } + + if (count == locked_count) { + /* If all locking ops went successfully, return as success */ + ret = 0; + goto out; + } + + /* If we failed to lock one element, unlock others and return failure */ + ret = glusterd_release_multiple_locks_per_entity(dict, uuid, locked_count, + type); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MULTIPLE_LOCK_RELEASE_FAIL, + "Failed to release multiple %s locks", type); + } + ret = -1; +out: + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +/* Given the type of entity, this function figures out if it should unlock a * + * single element of multiple elements of the said entity. For example: * + * if the type is "vol", this function will accordingly unlock a single volume * + * or multiple volumes */ +static int32_t +glusterd_mgmt_v3_unlock_entity(dict_t *dict, uuid_t uuid, char *type, + gf_boolean_t default_value) +{ + char name_buf[PATH_MAX] = ""; + char *name = NULL; + int32_t count = -1; + int32_t ret = -1; + gf_boolean_t hold_locks = _gf_false; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(dict); + GF_ASSERT(type); + + snprintf(name_buf, sizeof(name_buf), "hold_%s_locks", type); + hold_locks = dict_get_str_boolean(dict, name_buf, default_value); + + if (hold_locks == _gf_false) { + /* Locks were not held for this particular entity * + * Hence nothing to release */ + ret = 0; + goto out; + } + + /* Looking for volcount or snapcount in the dict */ + ret = snprintf(name_buf, sizeof(name_buf), "%scount", type); + ret = dict_get_int32n(dict, name_buf, ret, &count); + if (ret) { + /* count is not present. Only one * + * element name needs to be unlocked */ + ret = snprintf(name_buf, sizeof(name_buf), "%sname", type); + ret = dict_get_strn(dict, name_buf, ret, &name); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to fetch %sname", type); + goto out; + } + + ret = glusterd_mgmt_v3_unlock(name, uuid, type); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_UNLOCK_FAIL, + "Failed to release lock for %s %s " + "on behalf of %s.", + type, name, uuid_utoa(uuid)); + goto out; + } + } else { + /* Unlocking one element name after another */ + ret = glusterd_release_multiple_locks_per_entity(dict, uuid, count, + type); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_MULTIPLE_LOCK_RELEASE_FAIL, + "Failed to release all %s locks", type); + goto out; + } + } + + ret = 0; +out: + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +/* Given the type of entity, this function figures out if it should lock a * + * single element or multiple elements of the said entity. For example: * + * if the type is "vol", this function will accordingly lock a single volume * + * or multiple volumes */ +static int32_t +glusterd_mgmt_v3_lock_entity(dict_t *dict, uuid_t uuid, uint32_t *op_errno, + char *type, gf_boolean_t default_value) +{ + char name_buf[PATH_MAX] = ""; + char *name = NULL; + int32_t count = -1; + int32_t ret = -1; + gf_boolean_t hold_locks = _gf_false; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(dict); + GF_ASSERT(type); + + snprintf(name_buf, sizeof(name_buf), "hold_%s_locks", type); + hold_locks = dict_get_str_boolean(dict, name_buf, default_value); + + if (hold_locks == _gf_false) { + /* Not holding locks for this particular entity */ + ret = 0; + goto out; + } + + /* Looking for volcount or snapcount in the dict */ + ret = snprintf(name_buf, sizeof(name_buf), "%scount", type); + ret = dict_get_int32n(dict, name_buf, ret, &count); + if (ret) { + /* count is not present. Only one * + * element name needs to be locked */ + ret = snprintf(name_buf, sizeof(name_buf), "%sname", type); + ret = dict_get_strn(dict, name_buf, ret, &name); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to fetch %sname", type); + goto out; + } + + ret = glusterd_mgmt_v3_lock(name, uuid, op_errno, type); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_LOCK_GET_FAIL, + "Failed to acquire lock for %s %s " + "on behalf of %s.", + type, name, uuid_utoa(uuid)); + goto out; + } + } else { + /* Locking one element name after another */ + ret = glusterd_acquire_multiple_locks_per_entity(dict, uuid, op_errno, + count, type); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_MULTIPLE_LOCK_ACQUIRE_FAIL, + "Failed to acquire all %s locks", type); + goto out; + } + } + + ret = 0; +out: + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +/* Try to release locks of multiple entities like * + * volume, snaps etc. */ +int32_t +glusterd_multiple_mgmt_v3_unlock(dict_t *dict, uuid_t uuid) +{ + int32_t i = -1; + int32_t ret = -1; + int32_t op_ret = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + if (!dict) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_EMPTY, "dict is null."); + ret = -1; + goto out; + } + + for (i = 0; valid_types[i].type; i++) { + ret = glusterd_mgmt_v3_unlock_entity(dict, uuid, valid_types[i].type, + valid_types[i].default_value); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_MULTIPLE_LOCK_RELEASE_FAIL, "Unable to unlock all %s", + valid_types[i].type); + op_ret = ret; + } + } + + ret = op_ret; +out: + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; +} + +/* Try to acquire locks on multiple entities like * + * volume, snaps etc. */ +int32_t +glusterd_multiple_mgmt_v3_lock(dict_t *dict, uuid_t uuid, uint32_t *op_errno) +{ + int32_t i = -1; + int32_t ret = -1; + int32_t locked_count = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + if (!dict) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_EMPTY, "dict is null."); + ret = -1; + goto out; + } + + /* Locking one entity after other */ + for (i = 0; valid_types[i].type; i++) { + ret = glusterd_mgmt_v3_lock_entity(dict, uuid, op_errno, + valid_types[i].type, + valid_types[i].default_value); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_MULTIPLE_LOCK_ACQUIRE_FAIL, "Unable to lock all %s", + valid_types[i].type); + break; + } + locked_count++; + } + + if (locked_count == GF_MAX_LOCKING_ENTITIES) { + /* If all locking ops went successfully, return as success */ + ret = 0; + goto out; + } + + /* If we failed to lock one entity, unlock others and return failure */ + for (i = 0; i < locked_count; i++) { + ret = glusterd_mgmt_v3_unlock_entity(dict, uuid, valid_types[i].type, + valid_types[i].default_value); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_MULTIPLE_LOCK_RELEASE_FAIL, "Unable to unlock all %s", + valid_types[i].type); + } + } + ret = -1; +out: + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_mgmt_v3_lock(const char *name, uuid_t uuid, uint32_t *op_errno, + char *type) +{ + char key[PATH_MAX] = ""; + int32_t ret = -1; + glusterd_mgmt_v3_lock_obj *lock_obj = NULL; + glusterd_mgmt_v3_lock_timer *mgmt_lock_timer = NULL; + glusterd_conf_t *priv = NULL; + gf_boolean_t is_valid = _gf_true; + uuid_t owner = {0}; + xlator_t *this = NULL; + char *bt = NULL; + struct timespec delay = {0}; + char *key_dup = NULL; + glusterfs_ctx_t *mgmt_lock_timer_ctx = NULL; + xlator_t *mgmt_lock_timer_xl = NULL; + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + GF_ASSERT(priv); + + if (!name || !type) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, + "name or type is null."); + ret = -1; + goto out; + } + + is_valid = glusterd_mgmt_v3_is_type_valid(type); + if (is_valid != _gf_true) { + gf_msg_callingfn(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, + "Invalid entity. Cannot perform locking " + "operation on %s types", + type); + ret = -1; + goto out; + } + + ret = snprintf(key, sizeof(key), "%s_%s", name, type); + if (ret != strlen(name) + 1 + strlen(type)) { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CREATE_KEY_FAIL, + "Unable to create key"); + goto out; + } + + gf_msg_debug(this->name, 0, "Trying to acquire lock of %s for %s", key, + uuid_utoa(uuid)); + + ret = glusterd_get_mgmt_v3_lock_owner(key, &owner); + if (ret) { + gf_msg_debug(this->name, 0, "Unable to get mgmt_v3 lock owner"); + goto out; + } + + /* If the lock has already been held for the given volume + * we fail */ + if (!gf_uuid_is_null(owner)) { + gf_msg_callingfn(this->name, GF_LOG_WARNING, 0, + GD_MSG_LOCK_ALREADY_HELD, "Lock for %s held by %s", + name, uuid_utoa(owner)); + ret = -1; + *op_errno = EG_ANOTRANS; + goto out; + } + + lock_obj = GF_MALLOC(sizeof(glusterd_mgmt_v3_lock_obj), + gf_common_mt_mgmt_v3_lock_obj_t); + if (!lock_obj) { + ret = -1; + goto out; + } + + gf_uuid_copy(lock_obj->lock_owner, uuid); + + ret = dict_set_bin(priv->mgmt_v3_lock, key, lock_obj, + sizeof(glusterd_mgmt_v3_lock_obj)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to set lock owner in mgmt_v3 lock"); + GF_FREE(lock_obj); + goto out; + } + + mgmt_lock_timer = GF_CALLOC(1, sizeof(glusterd_mgmt_v3_lock_timer), + gf_common_mt_mgmt_v3_lock_timer_t); + + if (!mgmt_lock_timer) { + ret = -1; + goto out; + } + + mgmt_lock_timer->xl = THIS; + /*changing to default timeout value*/ + priv->mgmt_v3_lock_timeout = GF_LOCK_TIMER; + + ret = -1; + mgmt_lock_timer_xl = mgmt_lock_timer->xl; + if (!mgmt_lock_timer_xl) { + GF_FREE(mgmt_lock_timer); + goto out; + } + + mgmt_lock_timer_ctx = mgmt_lock_timer_xl->ctx; + if (!mgmt_lock_timer_ctx) { + GF_FREE(mgmt_lock_timer); + goto out; + } + + key_dup = gf_strdup(key); + delay.tv_sec = priv->mgmt_v3_lock_timeout; + delay.tv_nsec = 0; + + mgmt_lock_timer->timer = gf_timer_call_after( + mgmt_lock_timer_ctx, delay, gd_mgmt_v3_unlock_timer_cbk, key_dup); + + ret = dict_set_bin(priv->mgmt_v3_lock_timer, key, mgmt_lock_timer, + sizeof(glusterd_mgmt_v3_lock_timer)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to set timer in mgmt_v3 lock"); + GF_FREE(key_dup); + GF_FREE(mgmt_lock_timer); + goto out; + } + + /* Saving the backtrace into the pre-allocated buffer, ctx->btbuf*/ + if ((bt = gf_backtrace_save(NULL))) { + snprintf(key, sizeof(key), "debug.last-success-bt-%s", key_dup); + ret = dict_set_dynstr_with_alloc(priv->mgmt_v3_lock, key, bt); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_SET_FAILED, + "Failed to save " + "the back trace for lock %s granted to %s", + key_dup, uuid_utoa(uuid)); + ret = 0; + } + + gf_msg_debug(this->name, 0, "Lock for %s successfully held by %s", key_dup, + uuid_utoa(uuid)); + + ret = 0; +out: + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +/* + * This call back will ensure to unlock the lock_obj, in case we hit a situation + * where unlocking failed and stale lock exist*/ +void +gd_mgmt_v3_unlock_timer_cbk(void *data) +{ + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + glusterd_mgmt_v3_lock_timer *mgmt_lock_timer = NULL; + char *key = NULL; + int keylen; + char bt_key[PATH_MAX] = ""; + int bt_key_len = 0; + int32_t ret = -1; + glusterfs_ctx_t *mgmt_lock_timer_ctx = NULL; + xlator_t *mgmt_lock_timer_xl = NULL; + gf_timer_t *timer = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, out); + + GF_ASSERT(NULL != data); + key = (char *)data; + + keylen = strlen(key); + dict_deln(conf->mgmt_v3_lock, key, keylen); + + bt_key_len = snprintf(bt_key, PATH_MAX, "debug.last-success-bt-%s", key); + if (bt_key_len != SLEN("debug.last-success-bt-") + keylen) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CREATE_KEY_FAIL, + "Unable to create backtrace " + "key"); + goto out; + } + + dict_deln(conf->mgmt_v3_lock, bt_key, bt_key_len); + + ret = dict_get_bin(conf->mgmt_v3_lock_timer, key, + (void **)&mgmt_lock_timer); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to get lock owner in mgmt_v3 lock"); + } + +out: + if (mgmt_lock_timer && mgmt_lock_timer->timer) { + mgmt_lock_timer_xl = mgmt_lock_timer->xl; + GF_VALIDATE_OR_GOTO(this->name, mgmt_lock_timer_xl, ret_function); + + mgmt_lock_timer_ctx = mgmt_lock_timer_xl->ctx; + GF_VALIDATE_OR_GOTO(this->name, mgmt_lock_timer_ctx, ret_function); + + timer = mgmt_lock_timer->timer; + GF_FREE(timer->data); + gf_timer_call_cancel(mgmt_lock_timer_ctx, mgmt_lock_timer->timer); + dict_deln(conf->mgmt_v3_lock_timer, bt_key, bt_key_len); + mgmt_lock_timer->timer = NULL; + gf_log(this->name, GF_LOG_INFO, + "unlock timer is cancelled for volume_type" + " %s", + key); + } + +ret_function: + + return; +} + +int32_t +glusterd_mgmt_v3_unlock(const char *name, uuid_t uuid, char *type) +{ + char key[PATH_MAX] = ""; + char key_dup[PATH_MAX] = ""; + int keylen; + int32_t ret = -1; + gf_boolean_t is_valid = _gf_true; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_mgmt_v3_lock_timer *mgmt_lock_timer = NULL; + uuid_t owner = {0}; + xlator_t *this = NULL; + glusterfs_ctx_t *mgmt_lock_timer_ctx = NULL; + xlator_t *mgmt_lock_timer_xl = NULL; + gf_timer_t *timer = NULL; + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + GF_ASSERT(priv); + + if (!name || !type) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, + "name is null."); + ret = -1; + goto out; + } + + is_valid = glusterd_mgmt_v3_is_type_valid(type); + if (is_valid != _gf_true) { + gf_msg_callingfn(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, + "Invalid entity. Cannot perform unlocking " + "operation on %s types", + type); + ret = -1; + goto out; + } + + keylen = snprintf(key, sizeof(key), "%s_%s", name, type); + if (keylen != strlen(name) + 1 + strlen(type)) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CREATE_KEY_FAIL, + "Unable to create key"); + ret = -1; + goto out; + } + + gf_msg_debug(this->name, 0, "Trying to release lock of %s %s for %s as %s", + type, name, uuid_utoa(uuid), key); + + ret = glusterd_get_mgmt_v3_lock_owner(key, &owner); + if (ret) { + gf_msg_debug(this->name, 0, "Unable to get mgmt_v3 lock owner"); + goto out; + } + + if (gf_uuid_is_null(owner)) { + gf_msg_callingfn(this->name, GF_LOG_WARNING, 0, GD_MSG_LOCK_NOT_HELD, + "Lock for %s %s not held", type, name); + ret = -1; + goto out; + } + + ret = gf_uuid_compare(uuid, owner); + if (ret) { + gf_msg_callingfn(this->name, GF_LOG_WARNING, 0, + GD_MSG_LOCK_OWNER_MISMATCH, + "Lock owner mismatch. " + "Lock for %s %s held by %s", + type, name, uuid_utoa(owner)); + goto out; + } + + /* Removing the mgmt_v3 lock from the global list */ + dict_deln(priv->mgmt_v3_lock, key, keylen); + + ret = dict_get_bin(priv->mgmt_v3_lock_timer, key, + (void **)&mgmt_lock_timer); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to get mgmt lock key in mgmt_v3 lock"); + goto out; + } + + (void)snprintf(key_dup, sizeof(key_dup), "%s", key); + + /* Remove the backtrace key as well */ + ret = snprintf(key, sizeof(key), "debug.last-success-bt-%s", key_dup); + if (ret != SLEN("debug.last-success-bt-") + keylen) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CREATE_KEY_FAIL, + "Unable to create backtrace " + "key"); + ret = -1; + goto out; + } + dict_deln(priv->mgmt_v3_lock, key, ret); + + gf_msg_debug(this->name, 0, "Lock for %s %s successfully released", type, + name); + + /* Release owner reference which was held during lock */ + if (mgmt_lock_timer && mgmt_lock_timer->timer) { + ret = -1; + mgmt_lock_timer_xl = mgmt_lock_timer->xl; + GF_VALIDATE_OR_GOTO(this->name, mgmt_lock_timer_xl, out); + + mgmt_lock_timer_ctx = mgmt_lock_timer_xl->ctx; + GF_VALIDATE_OR_GOTO(this->name, mgmt_lock_timer_ctx, out); + ret = 0; + + timer = mgmt_lock_timer->timer; + GF_FREE(timer->data); + gf_timer_call_cancel(mgmt_lock_timer_ctx, mgmt_lock_timer->timer); + dict_deln(priv->mgmt_v3_lock_timer, key_dup, keylen); + } + ret = glusterd_volinfo_find(name, &volinfo); + if (volinfo && volinfo->stage_deleted) { + /* this indicates a volume still exists and the volume delete + * operation has failed in some of the phases, need to ensure + * stage_deleted flag is set back to false + */ + volinfo->stage_deleted = _gf_false; + gf_log(this->name, GF_LOG_INFO, + "Volume %s still exist, setting " + "stage deleted flag to false for the volume", + volinfo->volname); + } + ret = 0; +out: + + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-locks.h b/xlators/mgmt/glusterd/src/glusterd-locks.h new file mode 100644 index 00000000000..44667cebd3d --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-locks.h @@ -0,0 +1,57 @@ +/* + Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _GLUSTERD_LOCKS_H_ +#define _GLUSTERD_LOCKS_H_ + +typedef struct glusterd_mgmt_v3_lock_object_ { + uuid_t lock_owner; +} glusterd_mgmt_v3_lock_obj; + +typedef struct glusterd_mgmt_v3_lock_timer_ { + gf_timer_t *timer; + xlator_t *xl; +} glusterd_mgmt_v3_lock_timer; + +typedef struct glusterd_mgmt_v3_lock_valid_entities { + char *type; /* Entity type like vol, snap */ + gf_boolean_t default_value; /* The default value that * + * determines if the locks * + * should be held for that * + * entity */ +} glusterd_valid_entities; + +int32_t +glusterd_mgmt_v3_lock_init(); + +void +glusterd_mgmt_v3_lock_fini(); + +int32_t +glusterd_mgmt_v3_lock_timer_init(); + +void +glusterd_mgmt_v3_lock_timer_fini(); + +int32_t +glusterd_mgmt_v3_lock(const char *key, uuid_t uuid, uint32_t *op_errno, + char *type); + +int32_t +glusterd_mgmt_v3_unlock(const char *key, uuid_t uuid, char *type); + +int32_t +glusterd_multiple_mgmt_v3_lock(dict_t *dict, uuid_t uuid, uint32_t *op_errno); + +int32_t +glusterd_multiple_mgmt_v3_unlock(dict_t *dict, uuid_t uuid); + +void +gd_mgmt_v3_unlock_timer_cbk(void *data); +#endif diff --git a/xlators/mgmt/glusterd/src/glusterd-log-ops.c b/xlators/mgmt/glusterd/src/glusterd-log-ops.c new file mode 100644 index 00000000000..34abf35cb00 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-log-ops.c @@ -0,0 +1,290 @@ +/* + Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#include <glusterfs/common-utils.h> +#include "cli1-xdr.h" +#include "xdr-generic.h" +#include "glusterd.h" +#include "glusterd-op-sm.h" +#include "glusterd-store.h" +#include "glusterd-utils.h" +#include "glusterd-volgen.h" +#include "glusterd-messages.h" +#include <glusterfs/syscall.h> + +#include <signal.h> + +int +__glusterd_handle_log_rotate(rpcsvc_request_t *req) +{ + int32_t ret = -1; + gf_cli_req cli_req = {{ + 0, + }}; + dict_t *dict = NULL; + glusterd_op_t cli_op = GD_OP_LOG_ROTATE; + char *volname = NULL; + char msg[64] = { + 0, + }; + xlator_t *this = NULL; + + GF_ASSERT(req); + this = THIS; + GF_ASSERT(this); + + ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + // failed to decode msg; + req->rpc_err = GARBAGE_ARGS; + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL); + goto out; + } + + if (cli_req.dict.dict_len) { + /* Unserialize the dictionary */ + dict = dict_new(); + + ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + "failed to " + "unserialize req-buffer to dictionary"); + snprintf(msg, sizeof(msg), + "Unable to decode the " + "command"); + goto out; + } + } + + ret = dict_get_str(dict, "volname", &volname); + if (ret) { + snprintf(msg, sizeof(msg), "Failed to get volume name"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", msg); + goto out; + } + + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_LOG_ROTATE_REQ_RECVD, + "Received log rotate req " + "for volume %s", + volname); + + ret = dict_set_uint64(dict, "rotate-key", (uint64_t)gf_time()); + if (ret) + goto out; + + ret = glusterd_op_begin_synctask(req, GD_OP_LOG_ROTATE, dict); + +out: + if (ret) { + if (msg[0] == '\0') + snprintf(msg, sizeof(msg), "Operation failed"); + ret = glusterd_op_send_cli_response(cli_op, ret, 0, req, dict, msg); + } + + free(cli_req.dict.dict_val); + return ret; +} + +int +glusterd_handle_log_rotate(rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler(req, __glusterd_handle_log_rotate); +} + +/* op-sm */ +int +glusterd_op_stage_log_rotate(dict_t *dict, char **op_errstr) +{ + int ret = -1; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + char msg[2048] = {0}; + char *brick = NULL; + + ret = dict_get_str(dict, "volname", &volname); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get volume name"); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + snprintf(msg, sizeof(msg), "Volume %s does not exist", volname); + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, "%s", msg); + *op_errstr = gf_strdup(msg); + goto out; + } + + if (_gf_false == glusterd_is_volume_started(volinfo)) { + snprintf(msg, sizeof(msg), + "Volume %s needs to be started before" + " log rotate.", + volname); + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_STARTED, "%s", msg); + *op_errstr = gf_strdup(msg); + ret = -1; + goto out; + } + + ret = dict_get_str(dict, "brick", &brick); + /* If no brick is specified, do log-rotate for + all the bricks in the volume */ + if (ret) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=brick", NULL); + ret = 0; + goto out; + } + + ret = glusterd_volume_brickinfo_get_by_brick(brick, volinfo, NULL, + _gf_false); + if (ret) { + snprintf(msg, sizeof(msg), + "Incorrect brick %s " + "for volume %s", + brick, volname); + gf_msg("glusterd", GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, "%s", + msg); + *op_errstr = gf_strdup(msg); + goto out; + } +out: + gf_msg_debug("glusterd", 0, "Returning %d", ret); + + return ret; +} + +int +glusterd_op_log_rotate(dict_t *dict) +{ + int ret = -1; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + xlator_t *this = NULL; + char *volname = NULL; + char *brick = NULL; + char logfile[PATH_MAX] = { + 0, + }; + char pidfile[PATH_MAX] = { + 0, + }; + FILE *file = NULL; + pid_t pid = 0; + uint64_t key = 0; + int valid_brick = 0; + glusterd_brickinfo_t *tmpbrkinfo = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + ret = dict_get_str(dict, "volname", &volname); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "volname not found"); + goto out; + } + + ret = dict_get_uint64(dict, "rotate-key", &key); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "rotate key not found"); + goto out; + } + + ret = dict_get_str(dict, "brick", &brick); + /* If no brick is specified, do log-rotate for + all the bricks in the volume */ + if (ret) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=brick", NULL); + goto cont; + } + + ret = glusterd_brickinfo_new_from_brick(brick, &tmpbrkinfo, _gf_false, + NULL); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_BRICK_NOT_FOUND, + "cannot get brickinfo from brick"); + goto out; + } + +cont: + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) + goto out; + + ret = -1; + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + if (gf_uuid_compare(brickinfo->uuid, MY_UUID)) + continue; + + if (tmpbrkinfo && brick && + (strcmp(tmpbrkinfo->hostname, brickinfo->hostname) || + strcmp(tmpbrkinfo->path, brickinfo->path))) + continue; + + valid_brick = 1; + + GLUSTERD_GET_BRICK_PIDFILE(pidfile, volinfo, brickinfo, priv); + file = fopen(pidfile, "r+"); + if (!file) { + gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "Unable to open pidfile: %s", pidfile); + ret = -1; + goto out; + } + + ret = fscanf(file, "%d", &pid); + if (ret <= 0) { + fclose(file); + gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "Unable to read pidfile: %s", pidfile); + ret = -1; + goto out; + } + fclose(file); + file = NULL; + + snprintf(logfile, PATH_MAX, "%s.%" PRIu64, brickinfo->logfile, key); + + ret = sys_rename(brickinfo->logfile, logfile); + if (ret) + gf_msg("glusterd", GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED, + "rename failed"); + + ret = kill(pid, SIGHUP); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_PID_KILL_FAIL, + "Unable to SIGHUP to %d", pid); + goto out; + } + ret = 0; + + /* If request was for brick, only one iteration is enough */ + if (brick) + break; + } + + if (ret && !valid_brick) + ret = 0; + +out: + if (tmpbrkinfo) + glusterd_brickinfo_delete(tmpbrkinfo); + + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-mem-types.h b/xlators/mgmt/glusterd/src/glusterd-mem-types.h index bd7b1b65fd2..d7257e1a7b5 100644 --- a/xlators/mgmt/glusterd/src/glusterd-mem-types.h +++ b/xlators/mgmt/glusterd/src/glusterd-mem-types.h @@ -1,70 +1,58 @@ /* - Copyright (c) 2008-2010 Gluster, Inc. <http://www.gluster.com> + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ - #ifndef __GLUSTERD_MEM_TYPES_H__ #define __GLUSTERD_MEM_TYPES_H__ -#include "mem-types.h" +#include <glusterfs/mem-types.h> typedef enum gf_gld_mem_types_ { - gf_gld_mt_dir_entry_t = gf_common_mt_end + 1, - gf_gld_mt_volfile_ctx = gf_common_mt_end + 2, - gf_gld_mt_glusterd_state_t = gf_common_mt_end + 3, - gf_gld_mt_glusterd_conf_t = gf_common_mt_end + 4, - gf_gld_mt_locker = gf_common_mt_end + 5, - gf_gld_mt_string = gf_common_mt_end + 6, - gf_gld_mt_lock_table = gf_common_mt_end + 7, - gf_gld_mt_char = gf_common_mt_end + 8, - gf_gld_mt_glusterd_connection_t = gf_common_mt_end + 9, - gf_gld_mt_resolve_comp = gf_common_mt_end + 10, - gf_gld_mt_peerinfo_t = gf_common_mt_end + 11, - gf_gld_mt_friend_sm_event_t = gf_common_mt_end + 12, - gf_gld_mt_friend_req_ctx_t = gf_common_mt_end + 13, - gf_gld_mt_friend_update_ctx_t = gf_common_mt_end + 14, - gf_gld_mt_op_sm_event_t = gf_common_mt_end + 15, - gf_gld_mt_op_lock_ctx_t = gf_common_mt_end + 16, - gf_gld_mt_op_stage_ctx_t = gf_common_mt_end + 17, - gf_gld_mt_op_commit_ctx_t = gf_common_mt_end + 18, - gf_gld_mt_mop_stage_req_t = gf_common_mt_end + 19, - gf_gld_mt_probe_ctx_t = gf_common_mt_end + 20, - gf_gld_mt_create_volume_ctx_t = gf_common_mt_end + 21, - gf_gld_mt_start_volume_ctx_t = gf_common_mt_end + 22, - gf_gld_mt_stop_volume_ctx_t = gf_common_mt_end + 23, - gf_gld_mt_delete_volume_ctx_t = gf_common_mt_end + 24, - gf_gld_mt_glusterd_volinfo_t = gf_common_mt_end + 25, - gf_gld_mt_glusterd_brickinfo_t = gf_common_mt_end + 26, - gf_gld_mt_peer_hostname_t = gf_common_mt_end + 27, - gf_gld_mt_ifreq = gf_common_mt_end + 28, - gf_gld_mt_store_handle_t = gf_common_mt_end + 29, - gf_gld_mt_store_iter_t = gf_common_mt_end + 30, - gf_gld_mt_defrag_info = gf_common_mt_end + 31, - gf_gld_mt_log_filename_ctx_t = gf_common_mt_end + 32, - gf_gld_mt_log_locate_ctx_t = gf_common_mt_end + 33, - gf_gld_mt_log_rotate_ctx_t = gf_common_mt_end + 34, - gf_gld_mt_peerctx_t = gf_common_mt_end + 35, - gf_gld_mt_sm_tr_log_t = gf_common_mt_end + 36, - gf_gld_mt_pending_node_t = gf_common_mt_end + 37, - gf_gld_mt_brick_rsp_ctx_t = gf_common_mt_end + 38, - gf_gld_mt_mop_brick_req_t = gf_common_mt_end + 39, - gf_gld_mt_op_allack_ctx_t = gf_common_mt_end + 40, - gf_gld_mt_end = gf_common_mt_end + 41 + gf_gld_mt_glusterd_conf_t = gf_common_mt_end + 1, + gf_gld_mt_char, + gf_gld_mt_peerinfo_t, + gf_gld_mt_friend_sm_event_t, + gf_gld_mt_friend_req_ctx_t, + gf_gld_mt_friend_update_ctx_t, + gf_gld_mt_op_sm_event_t, + gf_gld_mt_op_lock_ctx_t, + gf_gld_mt_op_stage_ctx_t, + gf_gld_mt_op_commit_ctx_t, + gf_gld_mt_mop_stage_req_t, + gf_gld_mt_probe_ctx_t, + gf_gld_mt_glusterd_volinfo_t, + gf_gld_mt_volinfo_dict_data_t, + gf_gld_mt_glusterd_brickinfo_t, + gf_gld_mt_peer_hostname_t, + gf_gld_mt_defrag_info, + gf_gld_mt_peerctx_t, + gf_gld_mt_sm_tr_log_t, + gf_gld_mt_pending_node_t, + gf_gld_mt_brick_rsp_ctx_t, + gf_gld_mt_mop_brick_req_t, + gf_gld_mt_op_allack_ctx_t, + gf_gld_mt_linearr, + gf_gld_mt_linebuf, + gf_gld_mt_mount_pattern, + gf_gld_mt_mount_comp_container, + gf_gld_mt_mount_spec, + gf_gld_mt_georep_meet_spec, + gf_gld_mt_charptr, + gf_gld_mt_hooks_stub_t, + gf_gld_mt_hooks_priv_t, + gf_gld_mt_mop_commit_req_t, + gf_gld_mt_int, + gf_gld_mt_snap_t, + gf_gld_mt_missed_snapinfo_t, + gf_gld_mt_snap_create_args_t, + gf_gld_mt_glusterd_brick_proc_t, + gf_gld_mt_glusterd_svc_proc_t, + gf_gld_mt_end, } gf_gld_mem_types_t; #endif - diff --git a/xlators/mgmt/glusterd/src/glusterd-messages.h b/xlators/mgmt/glusterd/src/glusterd-messages.h new file mode 100644 index 00000000000..3a1e600fb03 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-messages.h @@ -0,0 +1,451 @@ +/* + Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _GLUSTERD_MESSAGES_H_ +#define _GLUSTERD_MESSAGES_H_ + +#include <glusterfs/glfs-message-id.h> + +/* To add new message IDs, append new identifiers at the end of the list. + * + * Never remove a message ID. If it's not used anymore, you can rename it or + * leave it as it is, but not delete it. This is to prevent reutilization of + * IDs by other messages. + * + * The component name must match one of the entries defined in + * glfs-message-id.h. + */ + +GLFS_MSGID( + GLUSTERD, GD_MSG_SERVER_QUORUM_NOT_MET, + GD_MSG_SERVER_QUORUM_LOST_STOPPING_BRICKS, + GD_MSG_SERVER_QUORUM_MET_STARTING_BRICKS, GD_MSG_PEER_DISCONNECTED, + GD_MSG_BRICK_DISCONNECTED, GD_MSG_NODE_DISCONNECTED, + GD_MSG_REBALANCE_DISCONNECTED, GD_MSG_VOL_CLEANUP_FAIL, + GD_MSG_VOL_VERS_MISMATCH, GD_MSG_CKSUM_VERS_MISMATCH, + GD_MSG_QUOTA_CONFIG_VERS_MISMATCH, GD_MSG_QUOTA_CONFIG_CKSUM_MISMATCH, + GD_MSG_BRICK_STOP_FAIL, GD_MSG_SVC_KILL_FAIL, GD_MSG_PID_KILL_FAIL, + GD_MSG_REBAL_NO_SOCK_FILE, GD_MSG_UNIX_OP_BUILD_FAIL, + GD_MSG_RPC_CREATE_FAIL, GD_MSG_FAIL_DEFAULT_OPT_SET, + GD_MSG_CLUSTER_UNLOCK_FAILED, GD_MSG_NO_MEMORY, GD_MSG_UNSUPPORTED_VERSION, + GD_MSG_COMMAND_NOT_FOUND, GD_MSG_SNAPSHOT_OP_FAILED, GD_MSG_INVALID_ENTRY, + GD_MSG_VOL_NOT_FOUND, GD_MSG_REG_COMPILE_FAILED, GD_MSG_FILE_OP_FAILED, + GD_MSG_SNAP_CREATION_FAIL, GD_MSG_VOL_OP_FAILED, GD_MSG_CREATE_DIR_FAILED, + GD_MSG_DIR_OP_FAILED, GD_MSG_VOL_STOP_FAILED, GD_MSG_NO_CLI_RESP, + GD_MSG_LOCK_INIT_FAILED, GD_MSG_SNAP_LIST_GET_FAIL, GD_MSG_UNOUNT_FAILED, + GD_MSG_LOCK_DESTROY_FAILED, GD_MSG_SNAP_CLEANUP_FAIL, + GD_MSG_SNAP_ACTIVATE_FAIL, GD_MSG_SNAP_DEACTIVATE_FAIL, + GD_MSG_SNAP_RESTORE_FAIL, GD_MSG_SNAP_REMOVE_FAIL, GD_MSG_SNAP_CONFIG_FAIL, + GD_MSG_SNAP_STATUS_FAIL, GD_MSG_SNAP_INIT_FAIL, GD_MSG_VOLINFO_SET_FAIL, + GD_MSG_VOLINFO_GET_FAIL, GD_MSG_BRICK_CREATION_FAIL, + GD_MSG_BRICK_GET_INFO_FAIL, GD_MSG_BRICK_NEW_INFO_FAIL, GD_MSG_LVS_FAIL, + GD_MSG_SET_XATTR_FAIL, GD_MSG_UMOUNTING_SNAP_BRICK, GD_MSG_OP_UNSUPPORTED, + GD_MSG_SNAP_NOT_FOUND, GD_MSG_FS_LABEL_UPDATE_FAIL, GD_MSG_LVM_MOUNT_FAILED, + GD_MSG_DICT_SET_FAILED, GD_MSG_CANONICALIZE_FAIL, GD_MSG_DICT_GET_FAILED, + GD_MSG_SNAP_INFO_FAIL, GD_MSG_SNAP_VOL_CONFIG_FAIL, + GD_MSG_SNAP_OBJECT_STORE_FAIL, GD_MSG_DICT_UNSERIALIZE_FAIL, + GD_MSG_SNAP_RESTORE_REVERT_FAIL, GD_MSG_SNAP_LIST_SET_FAIL, + GD_MSG_VOLFILE_CREATE_FAIL, GD_MSG_VOLINFO_REMOVE_FAIL, + GD_MSG_VOL_DELETE_FAIL, GD_MSG_SNAPSHOT_PENDING, + GD_MSG_BRICK_PATH_UNMOUNTED, GD_MSG_BRICK_ADD_FAIL, + GD_MSG_BRICK_SET_INFO_FAIL, GD_MSG_LVCREATE_FAIL, GD_MSG_VG_GET_FAIL, + GD_MSG_TPOOL_GET_FAIL, GD_MSG_LVM_REMOVE_FAILED, + GD_MSG_MISSEDSNAP_INFO_SET_FAIL, GD_MSG_BRK_MOUNTOPTS_FAIL, + GD_MSG_MISSED_SNAP_LIST_STORE_FAIL, GD_MSG_INVALID_MISSED_SNAP_ENTRY, + GD_MSG_MISSED_SNAP_GET_FAIL, GD_MSG_MISSED_SNAP_CREATE_FAIL, + GD_MSG_DUP_ENTRY, GD_MSG_MISSED_SNAP_STATUS_DONE, GD_MSG_NO_EXEC_PERMS, + GD_MSG_GLOBAL_OP_VERSION_SET_FAIL, GD_MSG_HARD_LIMIT_SET_FAIL, + GD_MSG_OP_SUCCESS, GD_MSG_STORE_FAIL, GD_MSG_GLOBAL_OP_VERSION_GET_FAIL, + GD_MSG_GEOREP_GET_FAILED, GD_MSG_GLUSTERD_UMOUNT_FAIL, + GD_MSG_QUORUM_CHECK_FAIL, GD_MSG_QUORUM_COUNT_IGNORED, + GD_MSG_SNAP_MOUNT_FAIL, GD_MSG_RSP_DICT_USE_FAIL, GD_MSG_SNAP_IMPORT_FAIL, + GD_MSG_SNAP_CONFLICT, GD_MSG_MISSED_SNAP_DELETE, + GD_MSG_QUOTA_CONFIG_IMPORT_FAIL, GD_MSG_SNAPDIR_CREATE_FAIL, + GD_MSG_MISSED_SNAP_PRESENT, GD_MSG_UUID_NULL, GD_MSG_TSTAMP_SET_FAIL, + GD_MSG_RESP_AGGR_FAIL, GD_MSG_DICT_EMPTY, GD_MSG_DICT_CREATE_FAIL, + GD_MSG_SNAPD_STOP_FAIL, GD_MSG_SOFT_LIMIT_REACHED, GD_MSG_SNAPD_START_FAIL, + GD_MSG_SNAPD_CREATE_FAIL, GD_MSG_SNAPD_INIT_FAIL, GD_MSG_MGMTV3_OP_FAIL, + GD_MSG_MGMTV3_PAYLOAD_BUILD_FAIL, GD_MSG_MGMTV3_UNLOCK_FAIL, + GD_MSG_MGMTV3_LOCK_GET_FAIL, GD_MSG_MGMTV3_LOCKDOWN_FAIL, + GD_MSG_POST_VALIDATION_FAIL, GD_MSG_PRE_VALIDATION_FAIL, + GD_MSG_COMMIT_OP_FAIL, GD_MSG_PEER_LIST_CREATE_FAIL, GD_MSG_BRICK_OP_FAIL, + GD_MSG_OPINFO_SET_FAIL, GD_MSG_OP_EVENT_UNLOCK_FAIL, + GD_MSG_MGMTV3_OP_RESP_FAIL, GD_MSG_PEER_NOT_FOUND, GD_MSG_REQ_DECODE_FAIL, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, GD_MSG_ALREADY_STOPPED, + GD_MSG_PRE_VALD_RESP_FAIL, GD_MSG_SVC_GET_FAIL, GD_MSG_VOLFILE_NOT_FOUND, + GD_MSG_OP_EVENT_LOCK_FAIL, GD_MSG_NON_STRIPE_VOL, GD_MSG_SNAPD_OBJ_GET_FAIL, + GD_MSG_QUOTA_DISABLED, GD_MSG_CACHE_MINMAX_SIZE_INVALID, + GD_MSG_QUOTA_GET_STAT_FAIL, GD_MSG_SUBVOLUMES_EXCEED, GD_MSG_BRICK_ADD, + GD_MSG_BRICK_REMOVE, GD_MSG_CREATE_KEY_FAIL, + GD_MSG_MULTIPLE_LOCK_ACQUIRE_FAIL, GD_MSG_MULTIPLE_LOCK_RELEASE_FAIL, + GD_MSG_RESP_FROM_UNKNOWN_PEER, GD_MSG_BRICK_MOUNDIRS_AGGR_FAIL, + GD_MSG_GFID_VALIDATE_SET_FAIL, GD_MSG_PEER_LOCK_FAIL, + GD_MSG_PEER_UNLOCK_FAIL, GD_MSG_MGMT_OP_FAIL, + GD_MSG_TRANS_OPINFO_CLEAR_FAIL, GD_MSG_GLUSTERD_LOCK_FAIL, + GD_MSG_TRANS_OPINFO_SET_FAIL, GD_MSG_TRANS_IDGEN_FAIL, GD_MSG_RPC_FAILURE, + GD_MSG_OP_VERS_ADJUST_FAIL, GD_MSG_SNAP_DEVICE_NAME_GET_FAIL, + GD_MSG_SNAP_STATUS_NOT_PENDING, GD_MSG_MGMT_PGM_SET_FAIL, + GD_MSG_EVENT_INJECT_FAIL, GD_MSG_VERS_INFO, GD_MSG_VOL_INFO_REQ_RECVD, + GD_MSG_VERS_GET_FAIL, GD_MSG_EVENT_NEW_GET_FAIL, GD_MSG_RPC_LAYER_ERROR, + GD_MSG_NO_HANDSHAKE_ACK, GD_MSG_OP_VERSION_MISMATCH, + GD_MSG_HANDSHAKE_REQ_REJECTED, GD_MSG_UNKNOWN_MODE, + GD_MSG_DEFRAG_STATUS_UPDATED, GD_MSG_NO_FLAG_SET, + GD_MSG_VERSION_UNSUPPORTED, GD_MSG_UUID_SET_FAIL, GD_MSG_MOUNT_REQ_FAIL, + GD_MSG_GLUSTERD_GLOBAL_INFO_STORE_FAIL, GD_MSG_OP_VERS_STORE_FAIL, + GD_MSG_SNAP_AUTOMIC_UPDATE_FAIL, GD_MSG_SNAPINFO_WRITE_FAIL, + GD_MSG_SNAPINFO_CREATE_FAIL, GD_MSG_SNAPD_INFO_STORE_FAIL, + GD_MSG_BRK_MNTPATH_MOUNT_FAIL, GD_MSG_BRK_MNTPATH_GET_FAIL, + GD_MSG_SNAP_BRK_MNT_RECREATE_FAIL, GD_MSG_SNAP_RESOLVE_BRICK_FAIL, + GD_MSG_RESOLVE_BRICK_FAIL, GD_MSG_BRK_MNT_RECREATE_FAIL, + GD_MSG_TMP_FILE_UNLINK_FAIL, GD_MSG_VOL_VALS_WRITE_FAIL, + GD_MSG_STORE_HANDLE_GET_FAIL, GD_MSG_STORE_HANDLE_WRITE_FAIL, + GD_MSG_MISSED_SNAP_LIST_STORE_HANDLE_GET_FAIL, + GD_MSG_MISSED_SNAP_LIST_EMPTY, GD_MSG_SNAP_VOL_RETRIEVE_FAIL, + GD_MSG_SNAPSHOT_UPDATE_FAIL, GD_MSG_SNAPD_PORT_STORE_FAIL, + GD_MSG_CKSUM_STORE_FAIL, GD_MSG_STORE_HANDLE_CREATE_FAIL, + GD_MSG_HANDLE_NULL, GD_MSG_VOL_RESTORE_FAIL, GD_MSG_NAME_TOO_LONG, + GD_MSG_UUID_PARSE_FAIL, GD_MSG_UNKNOWN_KEY, GD_MSG_STORE_ITER_DESTROY_FAIL, + GD_MSG_STORE_ITER_GET_FAIL, GD_MSG_VOLINFO_UPDATE_FAIL, + GD_MSG_PARSE_BRICKINFO_FAIL, GD_MSG_VERS_STORE_FAIL, GD_MSG_HEADER_ADD_FAIL, + GD_MSG_QUOTA_CONF_WRITE_FAIL, GD_MSG_QUOTA_CONF_CORRUPT, GD_MSG_FORK_FAIL, + GD_MSG_CKSUM_COMPUTE_FAIL, GD_MSG_VERS_CKSUM_STORE_FAIL, + GD_MSG_GET_XATTR_FAIL, GD_MSG_CONVERSION_FAILED, GD_MSG_VOL_NOT_DISTRIBUTE, + GD_MSG_VOL_STOPPED, GD_MSG_OPCTX_GET_FAIL, GD_MSG_TASKID_GEN_FAIL, + GD_MSG_REBALANCE_ID_MISSING, GD_MSG_NO_REBALANCE_PFX_IN_VOLNAME, + GD_MSG_DEFRAG_STATUS_UPDATE_FAIL, GD_MSG_UUID_GEN_STORE_FAIL, + GD_MSG_UUID_STORE_FAIL, GD_MSG_NO_INIT, GD_MSG_MODULE_NOT_INSTALLED, + GD_MSG_MODULE_NOT_WORKING, GD_MSG_WRITE_ACCESS_GRANT_FAIL, + GD_MSG_DIRPATH_TOO_LONG, GD_MSG_LOGGROUP_INVALID, GD_MSG_DIR_PERM_LIBERAL, + GD_MSG_DIR_PERM_STRICT, GD_MSG_MOUNT_SPEC_INSTALL_FAIL, + GD_MSG_GLUSTERD_SOCK_LISTENER_START_FAIL, GD_MSG_DIR_NOT_FOUND, + GD_MSG_FAILED_INIT_SHDSVC, GD_MSG_FAILED_INIT_NFSSVC, + GD_MSG_FAILED_INIT_QUOTASVC, GD_MSG_RPC_INIT_FAIL, + GD_MSG_RPCSVC_REG_NOTIFY_RETURNED, GD_MSG_RPC_TRANSPORT_COUNT_GET_FAIL, + GD_MSG_RPC_LISTENER_CREATE_FAIL, GD_MSG_OP_VERS_RESTORE_FAIL, + GD_MSG_SELF_HEALD_DISABLED, GD_MSG_PRIV_NULL, GD_MSG_GSYNC_VALIDATION_FAIL, + GD_MSG_SLAVE_CONFPATH_DETAILS_FETCH_FAIL, GD_MSG_OP_NOT_PERMITTED_AC_REQD, + GD_MSG_OP_NOT_PERMITTED, GD_MSG_REBALANCE_START_FAIL, + GD_MSG_NFS_RECONF_FAIL, GD_MSG_REMOVE_BRICK_ID_SET_FAIL, + GD_MSG_BRICK_MOUNTDIR_GET_FAIL, GD_MSG_BRICK_NOT_FOUND, + GD_MSG_BRKPATH_TOO_LONG, GD_MSG_CLRLOCKS_CLNT_UMOUNT_FAIL, + GD_MSG_CLRLOCKS_CLNT_MOUNT_FAIL, GD_MSG_CLRLOCKS_MOUNTDIR_CREATE_FAIL, + GD_MSG_BRK_PORT_NUM_GET_FAIL, GD_MSG_BRK_STATEDUMP_FAIL, + GD_MSG_VOL_GRAPH_CHANGE_NOTIFY_FAIL, GD_MSG_INVALID_VG, + GD_MSG_GLUSTERD_OP_FAILED, GD_MSG_HOSTNAME_ADD_TO_PEERLIST_FAIL, + GD_MSG_STALE_PEERINFO_REMOVE_FAIL, GD_MSG_TRANS_ID_GET_FAIL, + GD_MSG_RES_DECODE_FAIL, GD_MSG_VOL_ALREADY_EXIST, GD_MSG_BAD_BRKORDER, + GD_MSG_BAD_BRKORDER_CHECK_FAIL, GD_MSG_BRICK_SELECT_FAIL, + GD_MSG_NO_LOCK_RESP_FROM_PEER, GD_MSG_MGMTV3_LOCK_FROM_UUID_REJCT, + GD_MSG_STAGE_FROM_UUID_REJCT, GD_MSG_UNLOCK_FROM_UUID_REJCT, + GD_MSG_MGMTV3_UNLOCK_FROM_UUID_REJCT, GD_MSG_COMMIT_FROM_UUID_REJCT, + GD_MSG_VOL_NOT_STARTED, GD_MSG_VOL_NOT_REPLICA, GD_MSG_VOL_NOT_DISPERSE, + GD_MSG_OLD_REMOVE_BRICK_EXISTS, GD_MSG_USE_THE_FORCE, GD_MSG_OIP, + GD_MSG_OIP_RETRY_LATER, GD_MSG_GSYNC_RESTART_FAIL, + GD_MSG_LOCK_FROM_UUID_REJCT, GD_MSG_BRICK_OP_PAYLOAD_BUILD_FAIL, + GD_MSG_HOSTNAME_RESOLVE_FAIL, GD_MSG_COUNT_VALIDATE_FAILED, + GD_MSG_SPAWNING_CHILD_FAILED, GD_MSG_READ_CHILD_DATA_FAILED, + GD_MSG_DEFAULT_TEMP_CONFIG, GD_MSG_PIDFILE_CREATE_FAILED, + GD_MSG_GSYNCD_SPAWN_FAILED, GD_MSG_SUBOP_NOT_FOUND, GD_MSG_RESERVED_OPTION, + GD_MSG_GLUSTERD_PRIV_NOT_FOUND, GD_MSG_SLAVEINFO_FETCH_ERROR, + GD_MSG_VALIDATE_FAILED, GD_MSG_INVOKE_ERROR, GD_MSG_SESSION_CREATE_ERROR, + GD_MSG_STOP_FORCE, GD_MSG_GET_CONFIG_INFO_FAILED, + GD_MSG_STAT_FILE_READ_FAILED, GD_MSG_CONF_PATH_ASSIGN_FAILED, + GD_MSG_SESSION_INACTIVE, GD_MSG_PIDFILE_NOT_FOUND, GD_MSG_PEER_CMD_ERROR, + GD_MSG_SRC_FILE_ERROR, GD_MSG_GET_STATEFILE_NAME_FAILED, GD_MSG_STATUS_NULL, + GD_MSG_STATUSFILE_CREATE_FAILED, GD_MSG_SLAVE_URL_INVALID, + GD_MSG_INVALID_SLAVE, GD_MSG_READ_ERROR, GD_MSG_ARG_FETCH_ERROR, + GD_MSG_REG_FILE_MISSING, GD_MSG_STATEFILE_NAME_NOT_FOUND, + GD_MSG_GEO_REP_START_FAILED, GD_MSG_GSYNCD_ERROR, + GD_MSG_UPDATE_STATEFILE_FAILED, GD_MSG_STATUS_UPDATE_FAILED, + GD_MSG_GSYNCD_OP_SET_FAILED, GD_MSG_BUFFER_EMPTY, GD_MSG_CONFIG_INFO, + GD_MSG_FETCH_CONFIG_VAL_FAILED, GD_MSG_GSYNCD_PARSE_ERROR, + GD_MSG_SESSION_ALREADY_EXIST, GD_MSG_FORCE_CREATE_SESSION, + GD_MSG_GET_KEY_FAILED, GD_MSG_SESSION_DEL_FAILED, GD_MSG_CMD_EXEC_FAIL, + GD_MSG_STRDUP_FAILED, GD_MSG_UNABLE_TO_END, GD_MSG_PAUSE_FAILED, + GD_MSG_NORMALIZE_URL_FAIL, GD_MSG_MODULE_ERROR, + GD_MSG_SLAVEINFO_STORE_ERROR, GD_MSG_MARKER_START_FAIL, + GD_MSG_RESUME_FAILED, GD_MSG_GLUSTERFS_START_FAIL, + GD_MSG_GLUSTERFS_STOP_FAIL, GD_MSG_RBOP_STATE_STORE_FAIL, + GD_MSG_PUMP_XLATOR_DISABLED, GD_MSG_ABORT_OP_FAIL, GD_MSG_PAUSE_OP_FAIL, + GD_MSG_GLUSTER_SERVICE_START_FAIL, GD_MSG_HANDSHAKE_FAILED, + GD_MSG_CLI_REQ_EMPTY, GD_MSG_PEER_ADD_FAIL, + GD_MSG_SYNC_FROM_LOCALHOST_UNALLOWED, GD_MSG_UUIDS_SAME_RETRY, + GD_MSG_TSP_ALREADY_FORMED, GD_MSG_VOLS_ALREADY_PRESENT, + GD_MSG_REQ_CTX_CREATE_FAIL, GD_MSG_PEER_INFO_UPDATE_FAIL, + GD_MSG_PEERINFO_CREATE_FAIL, GD_MSG_REQ_FROM_UNKNOWN_PEER, + GD_MSG_STATUS_REPLY_STRING_CREATE_FAIL, GD_MSG_TOKENIZE_FAIL, + GD_MSG_LAZY_UMOUNT_FAIL, GD_MSG_NFS_SERVER_START_FAIL, + GD_MSG_GLUSTER_SERVICES_STOP_FAIL, GD_MSG_BRK_CLEANUP_FAIL, + GD_MSG_RB_ALREADY_STARTED, GD_MSG_RB_BRICKINFO_GET_FAIL, GD_MSG_BAD_FORMAT, + GD_MSG_RB_CMD_FAIL, GD_MSG_RB_NOT_STARTED_OR_PAUSED, GD_MSG_RB_NOT_STARTED, + GD_MSG_RB_PAUSED_ALREADY, GD_MSG_NO_FREE_PORTS, + GD_MSG_EVENT_STATE_TRANSITION_FAIL, GD_MSG_HANDLER_RETURNED, + GD_MSG_SNAP_COMPARE_CONFLICT, GD_MSG_PEER_DETACH_CLEANUP_FAIL, + GD_MSG_STALE_VOL_REMOVE_FAIL, GD_MSG_AC_ERROR, GD_MSG_LOCK_FAIL, + GD_MSG_MGMTV3_LOCK_REQ_SEND_FAIL, GD_MSG_GLUSTERD_UNLOCK_FAIL, + GD_MSG_RBOP_START_FAIL, GD_MSG_UNKNOWN_RESPONSE, + GD_MSG_COMMIT_REQ_SEND_FAIL, GD_MSG_OPCTX_UPDATE_FAIL, GD_MSG_OPCTX_NULL, + GD_MSG_DICT_COPY_FAIL, GD_MSG_SHD_STATUS_SET_FAIL, + GD_MSG_REPLICA_INDEX_GET_FAIL, GD_MSG_NFS_SERVER_NOT_RUNNING, + GD_MSG_STAGE_REQ_SEND_FAIL, GD_MSG_LOCK_REQ_SEND_FAIL, + GD_MSG_VOLNAMES_GET_FAIL, GD_MSG_NO_TASK_ID, GD_MSG_ADD_REMOVE_BRICK_FAIL, + GD_MSG_SVC_RESTART_FAIL, GD_MSG_VOL_SET_FAIL, GD_MSG_QUOTAD_NOT_RUNNING, + GD_MSG_XLATOR_COUNT_GET_FAIL, GD_MSG_TRANS_OPINFO_GET_FAIL, + GD_MSG_TRANS_ID_INVALID, GD_MSG_NO_OPTIONS_GIVEN, GD_MSG_SNAPD_NOT_RUNNING, + GD_MSG_ADD_ADDRESS_TO_PEER_FAIL, GD_MSG_PEER_ADDRESS_GET_FAIL, + GD_MSG_GETADDRINFO_FAIL, GD_MSG_PEERINFO_DELETE_FAIL, GD_MSG_KEY_NULL, + GD_MSG_SPAWN_SVCS_FAIL, GD_MSG_DICT_ITER_FAIL, + GD_MSG_TASK_STATUS_UPDATE_FAIL, GD_MSG_VOL_ID_MISMATCH, + GD_MSG_STR_TO_BOOL_FAIL, GD_MSG_RB_MNT_BRICKS_MISMATCH, + GD_MSG_RB_SRC_BRICKS_MISMATCH, GD_MSG_MNTENTRY_GET_FAIL, + GD_MSG_INODE_SIZE_GET_FAIL, GD_MSG_NO_STATEFILE_ENTRY, + GD_MSG_PMAP_UNSET_FAIL, GD_MSG_GLOBAL_OPT_IMPORT_FAIL, + GD_MSD_BRICK_DISCONNECT_FAIL, GD_MSG_SNAP_DETAILS_IMPORT_FAIL, + GD_MSG_BRICKINFO_CREATE_FAIL, GD_MSG_QUOTA_CKSUM_VER_STORE_FAIL, + GD_MSG_CKSUM_GET_FAIL, GD_MSG_BRICKPATH_ROOT_GET_FAIL, + GD_MSG_HOSTNAME_TO_UUID_FAIL, GD_MSG_REPLY_SUBMIT_FAIL, + GD_MSG_SERIALIZE_MSG_FAIL, GD_MSG_ENCODE_FAIL, + GD_MSG_RB_DST_BRICKS_MISMATCH, GD_MSG_XLATOR_VOLOPT_DYNLOAD_ERROR, + GD_MSG_VOLNAME_NOTFOUND_IN_DICT, GD_MSG_FLAGS_NOTFOUND_IN_DICT, + GD_MSG_HOSTNAME_NOTFOUND_IN_DICT, GD_MSG_PORT_NOTFOUND_IN_DICT, + GD_MSG_CMDSTR_NOTFOUND_IN_DICT, GD_MSG_SNAP_OBJ_NEW_FAIL, + GD_MSG_SNAP_BACKEND_MAKE_FAIL, GD_MSG_SNAP_CLONE_FAILED, + GD_MSG_SNAP_CLONE_PREVAL_FAILED, GD_MSG_SNAP_CLONE_POSTVAL_FAILED, + GD_MSG_VOLINFO_STORE_FAIL, GD_MSG_NEW_FRIEND_SM_EVENT_GET_FAIL, + GD_MSG_VOL_TYPE_CHANGING_INFO, GD_MSG_BRKPATH_MNTPNT_MISMATCH, + GD_MSG_TASKS_COUNT_MISMATCH, GD_MSG_WRONG_OPTS_SETTING, + GD_MSG_PATH_ALREADY_PART_OF_VOL, GD_MSG_BRICK_VALIDATE_FAIL, + GD_MSG_READIN_FILE_FAILED, GD_MSG_IMPORT_PRDICT_DICT, + GD_MSG_VOL_OPTS_IMPORT_FAIL, GD_MSG_BRICK_IMPORT_FAIL, + GD_MSG_VOLINFO_IMPORT_FAIL, GD_MSG_BRICK_ID_GEN_FAILED, + GD_MSG_GET_STATUS_DATA_FAIL, GD_MSG_BITROT_NOT_RUNNING, + GD_MSG_SCRUBBER_NOT_RUNNING, GD_MSG_SRC_BRICK_PORT_UNAVAIL, + GD_MSG_BITD_INIT_FAIL, GD_MSG_SCRUB_INIT_FAIL, GD_MSG_VAR_RUN_DIR_INIT_FAIL, + GD_MSG_VAR_RUN_DIR_FIND_FAIL, GD_MSG_SCRUBSVC_RECONF_FAIL, + GD_MSG_BITDSVC_RECONF_FAIL, GD_MSG_NFS_GNS_START_FAIL, + GD_MSG_NFS_GNS_SETUP_FAIL, GD_MSG_UNRECOGNIZED_SVC_MNGR, + GD_MSG_NFS_GNS_OP_HANDLE_FAIL, GD_MSG_EXPORT_FILE_CREATE_FAIL, + GD_MSG_NFS_GNS_HOST_FOUND, GD_MSG_REBALANCE_CMD_IN_TIER_VOL, + GD_MSG_INCOMPATIBLE_VALUE, GD_MSG_GENERATED_UUID, + GD_MSG_FILE_DESC_LIMIT_SET, GD_MSG_CURR_WORK_DIR_INFO, + GD_MSG_STRIPE_COUNT_CHANGE_INFO, GD_MSG_REPLICA_COUNT_CHANGE_INFO, + GD_MSG_ADD_BRICK_REQ_RECVD, GD_MSG_VOL_ALREADY_TIER, + GD_MSG_REM_BRICK_REQ_RECVD, GD_MSG_VOL_NOT_TIER, + GD_MSG_LOG_ROTATE_REQ_RECVD, GD_MSG_CLI_REQ_RECVD, GD_MSG_GET_VOL_REQ_RCVD, + GD_MSG_VOL_SYNC_REQ_RCVD, GD_MSG_PROBE_RCVD, GD_MSG_UNFRIEND_REQ_RCVD, + GD_MSG_FRIEND_UPDATE_RCVD, GD_MSG_RESPONSE_INFO, + GD_MSG_VOL_PROFILE_REQ_RCVD, GD_MSG_GETWD_REQ_RCVD, GD_MSG_MOUNT_REQ_RCVD, + GD_MSG_UMOUNT_REQ_RCVD, GD_MSG_CONNECT_RETURNED, GD_MSG_STATUS_VOL_REQ_RCVD, + GD_MSG_CLRCLK_VOL_REQ_RCVD, GD_MSG_BARRIER_VOL_REQ_RCVD, + GD_MSG_UUID_RECEIVED, GD_MSG_REPLACE_BRK_COMMIT_FORCE_REQ_RCVD, + GD_MSG_BRK_PORT_NO_ADD_INDO, GD_MSG_REPLACE_BRK_REQ_RCVD, + GD_MSG_ADD_OP_ARGS_FAIL, GD_MSG_POST_HOOK_STUB_INIT_FAIL, + GD_MSG_HOOK_STUB_NULL, GD_MSG_SPAWN_THREADS_FAIL, + GD_MSG_STALE_VOL_DELETE_INFO, GD_MSG_PROBE_REQ_RESP_RCVD, + GD_MSG_HOST_PRESENT_ALREADY, GD_MSG_OP_VERS_INFO, GD_MSG_OP_VERS_SET_INFO, + GD_MSG_NEW_NODE_STATE_CREATION, GD_MSG_ALREADY_MOUNTED, + GD_MSG_SHARED_STRG_VOL_OPT_VALIDATE_FAIL, GD_MSG_NFS_GNS_STOP_FAIL, + GD_MSG_NFS_GNS_RESET_FAIL, GD_MSG_SHARED_STRG_SET_FAIL, + GD_MSG_VOL_TRANSPORT_TYPE_CHANGE, GD_MSG_PEER_COUNT_GET_FAIL, + GD_MSG_INSUFFICIENT_UP_NODES, GD_MSG_OP_STAGE_STATS_VOL_FAIL, + GD_MSG_VOL_ID_SET_FAIL, GD_MSG_OP_STAGE_RESET_VOL_FAIL, + GD_MSG_OP_STAGE_BITROT_FAIL, GD_MSG_OP_STAGE_QUOTA_FAIL, + GD_MSG_OP_STAGE_DELETE_VOL_FAIL, GD_MSG_HANDLE_HEAL_CMD_FAIL, + GD_MSG_CLRCLK_SND_CMD_FAIL, GD_MSG_DISPERSE_CLUSTER_FOUND, + GD_MSG_HEAL_VOL_REQ_RCVD, GD_MSG_STATEDUMP_VOL_REQ_RCVD, + GD_MSG_THINPOOLS_FOR_THINLVS, GD_MSG_OP_STAGE_CREATE_VOL_FAIL, + GD_MSG_OP_STAGE_START_VOL_FAIL, GD_MSG_NFS_GNS_UNEXPRT_VOL_FAIL, + GD_MSG_TASK_ID_INFO, GD_MSG_DEREGISTER_SUCCESS, GD_MSG_STATEDUMP_OPTS_RCVD, + GD_MSG_STATEDUMP_INFO, GD_MSG_RECOVERING_CORRUPT_CONF, + GD_MSG_RETRIEVED_UUID, GD_MSG_XLATOR_CREATE_FAIL, + GD_MSG_GRAPH_ENTRY_ADD_FAIL, GD_MSG_ERROR_ENCOUNTERED, + GD_MSG_FILTER_RUN_FAILED, GD_MSG_DEFAULT_OPT_INFO, + GD_MSG_MARKER_STATUS_GET_FAIL, GD_MSG_MARKER_DISABLE_FAIL, + GD_MSG_GRAPH_FEATURE_ADD_FAIL, GD_MSG_XLATOR_SET_OPT_FAIL, + GD_MSG_BUILD_GRAPH_FAILED, GD_MSG_XML_TEXT_WRITE_FAIL, + GD_MSG_XML_DOC_START_FAIL, GD_MSG_XML_ELE_CREATE_FAIL, + GD_MSG_VOLUME_INCONSISTENCY, GD_MSG_XLATOR_LINK_FAIL, + GD_MSG_REMOTE_HOST_GET_FAIL, GD_MSG_GRAPH_SET_OPT_FAIL, + GD_MSG_ROOT_SQUASH_ENABLED, GD_MSG_ROOT_SQUASH_FAILED, + GD_MSG_LOCK_OWNER_MISMATCH, GD_MSG_LOCK_NOT_HELD, GD_MSG_LOCK_ALREADY_HELD, + GD_MSG_SVC_START_SUCCESS, GD_MSG_SVC_STOP_SUCCESS, GD_MSG_PARAM_NULL, + GD_MSG_SVC_STOP_FAIL, GD_MSG_SHARED_STORAGE_DOES_NOT_EXIST, + GD_MSG_SNAP_PAUSE_TIER_FAIL, GD_MSG_SNAP_RESUME_TIER_FAIL, + GD_MSG_FILE_NOT_FOUND, GD_MSG_RETRY_WITH_NEW_PORT, + GD_MSG_REMOTE_VOL_UUID_FAIL, GD_MSG_SLAVE_VOL_PARSE_FAIL, + GD_MSG_DICT_GET_SUCCESS, GD_MSG_PMAP_REGISTRY_REMOVE_FAIL, + GD_MSG_MNTBROKER_LABEL_NULL, GD_MSG_MNTBROKER_LABEL_MISS, + GD_MSG_MNTBROKER_SPEC_MISMATCH, GD_MSG_SYSCALL_FAIL, + GD_MSG_DAEMON_STATE_REQ_RCVD, GD_MSG_BRICK_CLEANUP_SUCCESS, + GD_MSG_STATE_STR_GET_FAILED, GD_MSG_RESET_BRICK_COMMIT_FORCE_REQ_RCVD, + GD_MSG_RESET_BRICK_CMD_FAIL, GD_MSG_TIERD_STOP_FAIL, + GD_MSG_TIERD_CREATE_FAIL, GD_MSG_TIERD_START_FAIL, + GD_MSG_TIERD_OBJ_GET_FAIL, GD_MSG_TIERD_NOT_RUNNING, GD_MSG_TIERD_INIT_FAIL, + GD_MSG_BRICK_MX_SET_FAIL, GD_MSG_NO_SIG_TO_PID_ZERO, + GD_MSG_TIER_WATERMARK_RESET_FAIL, GD_MSG_CLIENTS_GET_STATE_FAILED, + GD_MSG_GNFS_XLATOR_NOT_INSTALLED, GD_MSG_PIDFILE_UNLINKING, + GD_MSG_VOL_SET_VALIDATION_INFO, GD_MSG_NO_MUX_LIMIT, + GD_MSG_BRICKPROC_REM_BRICK_FAILED, GD_MSG_BRICKPROC_ADD_BRICK_FAILED, + GD_MSG_BRICKPROC_NEW_FAILED, GD_MSG_STATVFS_FAILED, GD_MSG_GARBAGE_ARGS, + GD_MSG_LOCALTIME_LOGGING_VOL_OPT_VALIDATE_FAIL, + GD_MSG_LOCALTIME_LOGGING_ENABLE, GD_MSG_LOCALTIME_LOGGING_DISABLE, + GD_MSG_PORTS_EXHAUSTED, GD_MSG_CHANGELOG_GET_FAIL, + GD_MSG_MANAGER_FUNCTION_FAILED, + GD_MSG_DAEMON_LOG_LEVEL_VOL_OPT_VALIDATE_FAIL, GD_MSG_SHD_START_FAIL, + GD_MSG_SHD_OBJ_GET_FAIL, GD_MSG_SVC_ATTACH_FAIL, GD_MSG_ATTACH_INFO, + GD_MSG_DETACH_INFO, GD_MSG_SVC_DETACH_FAIL, + GD_MSG_RPC_TRANSPORT_GET_PEERNAME_FAIL, GD_MSG_CLUSTER_RC_ENABLE, + GD_MSG_NFS_GANESHA_DISABLED, GD_MSG_GANESHA_NOT_RUNNING, GD_MSG_SNAP_WARN, + GD_MSG_BRICK_SUBVOL_VERIFY_FAIL, GD_MSG_REMOVE_ARBITER_BRICK, + GD_MSG_BRICK_NOT_DECOM, GD_MSG_BRICK_STOPPED, GD_MSG_BRICK_DEAD, + GD_MSG_BRICK_HOST_NOT_FOUND, GD_MSG_BRICK_HOST_DOWN, GD_MSG_BRICK_DELETE, + GD_MSG_BRICK_NO_REMOVE_CMD, GD_MSG_MIGRATION_PROG, GD_MSG_MIGRATION_FAIL, + GD_MSG_COPY_FAIL, GD_MSG_REALPATH_GET_FAIL, + GD_MSG_ARBITER_BRICK_SET_INFO_FAIL, GD_MSG_STRCHR_FAIL, GD_MSG_SPLIT_FAIL, + GD_MSG_ALLOC_AND_COPY_UUID_FAIL, GD_MSG_VOL_SHD_NOT_COMP, + GD_MSG_BITROT_NOT_ENABLED, GD_MSG_CREATE_BRICK_DIR_FAILED, + GD_MSG_CREATE_GLUSTER_DIR_FAILED, GD_MSG_BRICK_CREATE_MNTPNT, + GD_MSG_BRICK_CREATE_ROOT, GD_MSG_SET_XATTR_BRICK_FAIL, + GD_MSG_REMOVE_XATTR_FAIL, GD_MSG_XLATOR_NOT_DEFINED, + GD_MSG_BRICK_NOT_RUNNING, GD_MSG_INCORRECT_BRICK, GD_MSG_UUID_GET_FAIL, + GD_MSG_INVALID_ARGUMENT, GD_MSG_FRAME_CREATE_FAIL, + GD_MSG_SNAPSHOT_NOT_THIN_PROVISIONED, GD_MSG_VOL_STOP_ARGS_GET_FAILED, + GD_MSG_LSTAT_FAIL, GD_MSG_VOLUME_NOT_IMPORTED, + GD_MSG_ADD_BRICK_MNT_INFO_FAIL, GD_MSG_GET_MNT_ENTRY_INFO_FAIL, + GD_MSG_QUORUM_CLUSTER_COUNT_GET_FAIL, GD_MSG_POST_COMMIT_OP_FAIL, + GD_MSG_POST_COMMIT_FROM_UUID_REJCT, GD_MSG_POST_COMMIT_REQ_SEND_FAIL); + +#define GD_MSG_INVALID_ENTRY_STR "Invalid data entry" +#define GD_MSG_INVALID_ARGUMENT_STR \ + "Invalid arguments have been given to function" +#define GD_MSG_GARBAGE_ARGS_STR "Garbage args received" +#define GD_MSG_BRICK_SUBVOL_VERIFY_FAIL_STR "Brick's subvol verification fail" +#define GD_MSG_REMOVE_ARBITER_BRICK_STR "Failed to remove arbiter bricks" +#define GD_MSG_DICT_GET_FAILED_STR "Dict get failed" +#define GD_MSG_DICT_SET_FAILED_STR "Dict set failed" +#define GD_MSG_BRICK_NOT_FOUND_STR "Brick not found in volume" +#define GD_MSG_BRICK_NOT_DECOM_STR "Brick is not decommissoned" +#define GD_MSG_BRICK_STOPPED_STR "Found stopped brick" +#define GD_MSG_BRICK_DEAD_STR "Found dead brick" +#define GD_MSG_BRICK_HOST_NOT_FOUND_STR \ + "Host node of the brick is not a part of cluster" +#define GD_MSG_BRICK_HOST_DOWN_STR "Host node of the brick is down" +#define GD_MSG_BRICK_DELETE_STR \ + "Deleting all the bricks of the volume is not allowed" +#define GD_MSG_BRICK_NO_REMOVE_CMD_STR "No remove-brick command issued" +#define GD_MSG_INCORRECT_BRICK_STR "Incorrect brick for volume" +#define GD_MSG_MIGRATION_PROG_STR "Migration is in progress" +#define GD_MSG_MIGRATION_FAIL_STR "Migration has failed" +#define GD_MSG_XLATOR_NOT_DEFINED_STR "Xlator not defined" +#define GD_MSG_DICT_CREATE_FAIL_STR "Failed to create dictionary" +#define GD_MSG_COPY_FAIL_STR "Failed to copy" +#define GD_MSG_UUID_GET_FAIL_STR "Failed to get the uuid of local glusterd" +#define GD_MSG_GEO_REP_START_FAILED_STR "Georep start failed for volume" +#define GD_MSG_REALPATH_GET_FAIL_STR "Failed to get realpath" +#define GD_MSG_FILE_NOT_FOUND_STR "File not found in directory" +#define GD_MSG_SRC_FILE_ERROR_STR "Error in source file" +#define GD_MSG_DICT_UNSERIALIZE_FAIL_STR "Failed to unserialize dict" +#define GD_MSG_VOL_ID_SET_FAIL_STR "Failed to set volume id" +#define GD_MSG_ARBITER_BRICK_SET_INFO_FAIL_STR \ + "Failed to add arbiter info to brick" +#define GD_MSG_NO_MEMORY_STR "Out of memory" +#define GD_MSG_GLUSTERD_UMOUNT_FAIL_STR "Failed to unmount path" +#define GD_MSG_PEER_ADD_FAIL_STR "Failed to add new peer" +#define GD_MSG_BRICK_GET_INFO_FAIL_STR "Failed to get brick info" +#define GD_MSG_STRCHR_FAIL_STR "Failed to get the character" +#define GD_MSG_SPLIT_FAIL_STR "Failed to split" +#define GD_MSG_VOLINFO_GET_FAIL_STR "Failed to get volinfo" +#define GD_MSG_PEER_NOT_FOUND_STR "Failed to find peer info" +#define GD_MSG_DICT_COPY_FAIL_STR "Failed to copy values from dictionary" +#define GD_MSG_ALLOC_AND_COPY_UUID_FAIL_STR \ + "Failed to allocate memory or copy uuid" +#define GD_MSG_VOL_NOT_FOUND_STR "Volume not found" +#define GD_MSG_PEER_DISCONNECTED_STR "Peer is disconnected" +#define GD_MSG_QUOTA_GET_STAT_FAIL_STR "Failed to get quota status" +#define GD_MSG_SNAP_STATUS_FAIL_STR "Failed to get status of snapd" +#define GD_MSG_VALIDATE_FAILED_STR "Failed to validate volume" +#define GD_MSG_VOL_NOT_STARTED_STR "Volume is not started" +#define GD_MSG_VOL_SHD_NOT_COMP_STR "Volume is not Self-heal compatible" +#define GD_MSG_SELF_HEALD_DISABLED_STR "Self-heal daemon is disabled" +#define GD_MSG_NFS_GANESHA_DISABLED_STR "NFS server is disabled" +#define GD_MSG_QUOTA_DISABLED_STR "Quota is disabled" +#define GD_MSG_BITROT_NOT_RUNNING_STR "Bitrot is not enabled" +#define GD_MSG_BITROT_NOT_ENABLED_STR "Volume does not have bitrot enabled" +#define GD_MSG_SNAPD_NOT_RUNNING_STR "Snapd is not enabled" +#define GD_MSG_STRDUP_FAILED_STR "Strdup operation failed" +#define GD_MSG_QUORUM_CLUSTER_COUNT_GET_FAIL_STR \ + "Failed to get quorum cluster counts" +#define GD_MSG_GLUSTER_SERVICE_START_FAIL_STR "Failed to start glusterd service" +#define GD_MSG_PEER_ADDRESS_GET_FAIL_STR "Failed to get the address of peer" +#define GD_MSG_INVALID_SLAVE_STR "Volume is not a slave volume" +#define GD_MSG_BRICK_NOT_RUNNING_STR "One or more bricks are not running" +#define GD_MSG_BRK_MNTPATH_GET_FAIL_STR "Failed to get brick mount device" +#define GD_MSG_SNAPSHOT_NOT_THIN_PROVISIONED_STR \ + "Snapshot is supported only for thin provisioned LV." +#define GD_MSG_SNAP_DEVICE_NAME_GET_FAIL_STR \ + "Failed to copy snapshot device name" +#define GD_MSG_SNAP_NOT_FOUND_STR "Snapshot does not exist" +#define GD_MSG_CREATE_BRICK_DIR_FAILED_STR "Failed to create brick directory" +#define GD_MSG_LSTAT_FAIL_STR "Lstat operation failed" +#define GD_MSG_DIR_OP_FAILED_STR \ + "The provided path is already present. It is not a directory" +#define GD_MSG_BRICK_CREATION_FAIL_STR \ + "Brick isn't allowed to be created inside glusterd's working directory." +#define GD_MSG_BRICK_CREATE_ROOT_STR \ + "The brick is being created in the root partition. It is recommended " \ + "that you don't use the system's root partition for storage backend." +#define GD_MSG_BRICK_CREATE_MNTPNT_STR \ + "The brick is a mount point. Please create a sub-directory under the " \ + "mount point and use that as the brick directory." +#define GD_MSG_CREATE_GLUSTER_DIR_FAILED_STR \ + "Failed to create glusterfs directory" +#define GD_MSG_VOLINFO_IMPORT_FAIL_STR "Volume is not yet imported" +#define GD_MSG_BRICK_SET_INFO_FAIL_STR \ + "Failed to add brick mount details to dict" +#define GD_MSG_SET_XATTR_BRICK_FAIL_STR \ + "Glusterfs is not supported on brick. Setting extended attribute failed" +#define GD_MSG_SET_XATTR_FAIL_STR "Failed to set extended attribute" +#define GD_MSG_REMOVE_XATTR_FAIL_STR "Failed to remove extended attribute" +#define GD_MSG_XLATOR_SET_OPT_FAIL_STR "Failed to set xlator type" +#define GD_MSG_XLATOR_LINK_FAIL_STR \ + "Failed to do the link of xlator with children" +#define GD_MSG_READ_ERROR_STR "Failed to read directory" +#define GD_MSG_INCOMPATIBLE_VALUE_STR "Incompatible transport type" +#define GD_MSG_VOL_STOP_ARGS_GET_FAILED_STR "Failed to get volume stop args" +#define GD_MSG_FRAME_CREATE_FAIL_STR "Failed to create frame" +#define GD_MSG_VOLUME_NOT_IMPORTED_STR "Volume has not been imported" +#define GD_MSG_ADD_BRICK_MNT_INFO_FAIL_STR \ + "Failed to add brick mount details to dict" +#define GD_MSG_GET_MNT_ENTRY_INFO_FAIL_STR "Failed to get mount entry details" +#define GD_MSG_BRICKPATH_ROOT_GET_FAIL_STR "failed to get brick root details" +#define GD_MSG_VOL_INFO_REQ_RECVD_STR "Received get volume info req" +#define GD_MSG_NO_FLAG_SET_STR "No flags set" +#define GD_MSG_CREATE_DIR_FAILED_STR "Failed to create directory" +#define GD_MSG_POST_HOOK_STUB_INIT_FAIL_STR \ + "Failed to initialize post hooks stub" +#define GD_MSG_FILE_OP_FAILED_STR "File operation failed" +#define GD_MSG_INODE_SIZE_GET_FAIL_STR "Failed to get inode size" +#define GD_MSG_CMD_EXEC_FAIL_STR "Command execution failed" +#define GD_MSG_XLATOR_CREATE_FAIL_STR "Failed to create xlator" +#define GD_MSG_CLRCLK_VOL_REQ_RCVD_STR "Received clear-locks request for volume" +#define GD_MSG_BRK_PORT_NUM_GET_FAIL_STR \ + "Couldn't get port number of local bricks" +#define GD_MSG_CLRLOCKS_MOUNTDIR_CREATE_FAIL_STR \ + "Creating mount directory for clear-locks failed" +#define GD_MSG_CLRLOCKS_CLNT_MOUNT_FAIL_STR \ + "Failed to mount clear-locks maintenance client" +#define GD_MSG_CLRLOCKS_CLNT_UMOUNT_FAIL_STR \ + "Failed to unmount clear-locks mount point" +#define GD_MSG_CLRCLK_SND_CMD_FAIL_STR "Failed to send command for clear-locks" +#define GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL_STR \ + "Failed to allocate memory or get serialized length of dict" +#define GD_MSG_GET_XATTR_FAIL_STR "Failed to get extended attribute" + +#endif /* !_GLUSTERD_MESSAGES_H_ */ diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c b/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c new file mode 100644 index 00000000000..1069688a89d --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c @@ -0,0 +1,1144 @@ +/* + Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +/* rpc related syncops */ +#include "rpc-clnt.h" +#include "protocol-common.h" +#include "xdr-generic.h" +#include "glusterd1-xdr.h" +#include "glusterd-syncop.h" + +#include "glusterd.h" +#include "glusterd-utils.h" +#include "glusterd-locks.h" +#include "glusterd-mgmt.h" +#include "glusterd-op-sm.h" +#include "glusterd-messages.h" + +static int +glusterd_mgmt_v3_null(rpcsvc_request_t *req) +{ + return 0; +} + +static int +glusterd_mgmt_v3_lock_send_resp(rpcsvc_request_t *req, int32_t status, + uint32_t op_errno) +{ + gd1_mgmt_v3_lock_rsp rsp = { + {0}, + }; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(req); + + rsp.op_ret = status; + if (rsp.op_ret) + rsp.op_errno = op_errno; + + glusterd_get_uuid(&rsp.uuid); + + ret = glusterd_submit_reply(req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_v3_lock_rsp); + + gf_msg_debug(this->name, 0, "Responded to mgmt_v3 lock, ret: %d", ret); + + return ret; +} + +static int +glusterd_synctasked_mgmt_v3_lock(rpcsvc_request_t *req, + gd1_mgmt_v3_lock_req *lock_req, + glusterd_op_lock_ctx_t *ctx) +{ + int32_t ret = -1; + xlator_t *this = NULL; + uint32_t op_errno = 0; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(req); + GF_ASSERT(ctx); + GF_ASSERT(ctx->dict); + + /* Trying to acquire multiple mgmt_v3 locks */ + ret = glusterd_multiple_mgmt_v3_lock(ctx->dict, ctx->uuid, &op_errno); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_LOCK_GET_FAIL, + "Failed to acquire mgmt_v3 locks for %s", uuid_utoa(ctx->uuid)); + + ret = glusterd_mgmt_v3_lock_send_resp(req, ret, op_errno); + + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +static int +glusterd_op_state_machine_mgmt_v3_lock(rpcsvc_request_t *req, + gd1_mgmt_v3_lock_req *lock_req, + glusterd_op_lock_ctx_t *ctx) +{ + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_op_info_t txn_op_info = { + {0}, + }; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(req); + + glusterd_txn_opinfo_init(&txn_op_info, NULL, &lock_req->op, ctx->dict, req); + + ret = glusterd_set_txn_opinfo(&lock_req->txn_id, &txn_op_info); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OPINFO_SET_FAIL, + "Unable to set transaction's opinfo"); + goto out; + } + + ret = glusterd_op_sm_inject_event(GD_OP_EVENT_LOCK, &lock_req->txn_id, ctx); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OP_EVENT_LOCK_FAIL, + "Failed to inject event GD_OP_EVENT_LOCK"); + +out: + glusterd_friend_sm(); + glusterd_op_sm(); + + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +static int +glusterd_handle_mgmt_v3_lock_fn(rpcsvc_request_t *req) +{ + gd1_mgmt_v3_lock_req lock_req = { + {0}, + }; + int32_t ret = -1; + glusterd_op_lock_ctx_t *ctx = NULL; + xlator_t *this = NULL; + gf_boolean_t is_synctasked = _gf_false; + gf_boolean_t free_ctx = _gf_false; + glusterd_conf_t *conf = NULL; + uint32_t timeout = 0; + + this = THIS; + conf = this->private; + GF_ASSERT(conf); + GF_ASSERT(this); + GF_ASSERT(req); + + ret = xdr_to_generic(req->msg[0], &lock_req, + (xdrproc_t)xdr_gd1_mgmt_v3_lock_req); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, + "Failed to decode lock " + "request received from peer"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + gf_msg_debug(this->name, 0, + "Received mgmt_v3 lock req " + "from uuid: %s", + uuid_utoa(lock_req.uuid)); + + if (glusterd_peerinfo_find_by_uuid(lock_req.uuid) == NULL) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_PEER_NOT_FOUND, + "%s doesn't " + "belong to the cluster. Ignoring request.", + uuid_utoa(lock_req.uuid)); + ret = -1; + goto out; + } + + ctx = GF_CALLOC(1, sizeof(*ctx), gf_gld_mt_op_lock_ctx_t); + if (!ctx) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); + ret = -1; + goto out; + } + + gf_uuid_copy(ctx->uuid, lock_req.uuid); + ctx->req = req; + + ctx->dict = dict_new(); + if (!ctx->dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); + ret = -1; + goto out; + } + + ret = dict_unserialize(lock_req.dict.dict_val, lock_req.dict.dict_len, + &ctx->dict); + if (ret) { + gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + NULL); + goto out; + } + + /* Cli will add timeout key to dict if the default timeout is + * other than 2 minutes. Here we use this value to check whether + * mgmt_v3_lock_timeout should be set to default value or we + * need to change the value according to timeout value + * i.e, timeout + 120 seconds. */ + ret = dict_get_uint32(ctx->dict, "timeout", &timeout); + if (!ret) + conf->mgmt_v3_lock_timeout = timeout + 120; + + is_synctasked = dict_get_str_boolean(ctx->dict, "is_synctasked", _gf_false); + if (is_synctasked) { + ret = glusterd_synctasked_mgmt_v3_lock(req, &lock_req, ctx); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_LOCK_GET_FAIL, + "Failed to acquire mgmt_v3_locks"); + /* Ignore the return code, as it shouldn't be propagated + * from the handler function so as to avoid double + * deletion of the req + */ + ret = 0; + } + + /* The above function does not take ownership of ctx. + * Therefore we need to free the ctx explicitly. */ + free_ctx = _gf_true; + } else { + /* Shouldn't ignore the return code here, and it should + * be propagated from the handler function as in failure + * case it doesn't delete the req object + */ + ret = glusterd_op_state_machine_mgmt_v3_lock(req, &lock_req, ctx); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_LOCK_GET_FAIL, + "Failed to acquire mgmt_v3_locks"); + } + +out: + + if (ctx && (ret || free_ctx)) { + if (ctx->dict) + dict_unref(ctx->dict); + + GF_FREE(ctx); + } + + free(lock_req.dict.dict_val); + + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +static int +glusterd_mgmt_v3_pre_validate_send_resp(rpcsvc_request_t *req, int32_t op, + int32_t status, char *op_errstr, + dict_t *rsp_dict, uint32_t op_errno) +{ + gd1_mgmt_v3_pre_val_rsp rsp = { + {0}, + }; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(req); + + rsp.op_ret = status; + glusterd_get_uuid(&rsp.uuid); + rsp.op = op; + rsp.op_errno = op_errno; + if (op_errstr) + rsp.op_errstr = op_errstr; + else + rsp.op_errstr = ""; + + ret = dict_allocate_and_serialize(rsp_dict, &rsp.dict.dict_val, + &rsp.dict.dict_len); + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, 0, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); + goto out; + } + + ret = glusterd_submit_reply(req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_v3_pre_val_rsp); + + GF_FREE(rsp.dict.dict_val); +out: + gf_msg_debug(this->name, 0, "Responded to pre validation, ret: %d", ret); + return ret; +} + +static int +glusterd_handle_pre_validate_fn(rpcsvc_request_t *req) +{ + int32_t ret = -1; + gd1_mgmt_v3_pre_val_req op_req = { + {0}, + }; + xlator_t *this = NULL; + char *op_errstr = NULL; + dict_t *dict = NULL; + dict_t *rsp_dict = NULL; + uint32_t op_errno = 0; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(req); + + ret = xdr_to_generic(req->msg[0], &op_req, + (xdrproc_t)xdr_gd1_mgmt_v3_pre_val_req); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, + "Failed to decode pre validation " + "request received from peer"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (glusterd_peerinfo_find_by_uuid(op_req.uuid) == NULL) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_PEER_NOT_FOUND, + "%s doesn't " + "belong to the cluster. Ignoring request.", + uuid_utoa(op_req.uuid)); + ret = -1; + goto out; + } + + dict = dict_new(); + if (!dict) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL); + goto out; + } + + ret = dict_unserialize(op_req.dict.dict_val, op_req.dict.dict_len, &dict); + if (ret) { + gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + NULL); + goto out; + } + + rsp_dict = dict_new(); + if (!rsp_dict) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL); + return -1; + } + + ret = gd_mgmt_v3_pre_validate_fn(op_req.op, dict, &op_errstr, rsp_dict, + &op_errno); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PRE_VALIDATION_FAIL, + "Pre Validation failed on operation %s", gd_op_list[op_req.op]); + } + + ret = glusterd_mgmt_v3_pre_validate_send_resp( + req, op_req.op, ret, op_errstr, rsp_dict, op_errno); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_OP_RESP_FAIL, + "Failed to send Pre Validation " + "response for operation %s", + gd_op_list[op_req.op]); + goto out; + } + +out: + if (op_errstr && (strcmp(op_errstr, ""))) + GF_FREE(op_errstr); + + free(op_req.dict.dict_val); + + if (dict) + dict_unref(dict); + + if (rsp_dict) + dict_unref(rsp_dict); + + /* Return 0 from handler to avoid double deletion of req obj */ + return 0; +} + +static int +glusterd_mgmt_v3_brick_op_send_resp(rpcsvc_request_t *req, int32_t op, + int32_t status, char *op_errstr, + dict_t *rsp_dict) +{ + gd1_mgmt_v3_brick_op_rsp rsp = { + {0}, + }; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(req); + + rsp.op_ret = status; + glusterd_get_uuid(&rsp.uuid); + rsp.op = op; + if (op_errstr) + rsp.op_errstr = op_errstr; + else + rsp.op_errstr = ""; + + ret = dict_allocate_and_serialize(rsp_dict, &rsp.dict.dict_val, + &rsp.dict.dict_len); + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, 0, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); + goto out; + } + + ret = glusterd_submit_reply(req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_v3_brick_op_rsp); + + GF_FREE(rsp.dict.dict_val); +out: + gf_msg_debug(this->name, 0, "Responded to brick op, ret: %d", ret); + return ret; +} + +static int +glusterd_handle_brick_op_fn(rpcsvc_request_t *req) +{ + int32_t ret = -1; + gd1_mgmt_v3_brick_op_req op_req = { + {0}, + }; + xlator_t *this = NULL; + char *op_errstr = NULL; + dict_t *dict = NULL; + dict_t *rsp_dict = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(req); + + ret = xdr_to_generic(req->msg[0], &op_req, + (xdrproc_t)xdr_gd1_mgmt_v3_brick_op_req); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, + "Failed to decode brick op " + "request received from peer"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (glusterd_peerinfo_find_by_uuid(op_req.uuid) == NULL) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_PEER_NOT_FOUND, + "%s doesn't " + "belong to the cluster. Ignoring request.", + uuid_utoa(op_req.uuid)); + ret = -1; + goto out; + } + + dict = dict_new(); + if (!dict) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL); + goto out; + } + + ret = dict_unserialize(op_req.dict.dict_val, op_req.dict.dict_len, &dict); + if (ret) { + gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + NULL); + goto out; + } + + rsp_dict = dict_new(); + if (!rsp_dict) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL); + return -1; + } + + ret = gd_mgmt_v3_brick_op_fn(op_req.op, dict, &op_errstr, rsp_dict); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_OP_FAIL, + "Brick Op failed on operation %s", gd_op_list[op_req.op]); + } + + ret = glusterd_mgmt_v3_brick_op_send_resp(req, op_req.op, ret, op_errstr, + rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PRE_VALD_RESP_FAIL, + "Failed to send brick op " + "response for operation %s", + gd_op_list[op_req.op]); + goto out; + } + +out: + if (op_errstr && (strcmp(op_errstr, ""))) + GF_FREE(op_errstr); + + free(op_req.dict.dict_val); + + if (dict) + dict_unref(dict); + + if (rsp_dict) + dict_unref(rsp_dict); + + /* Return 0 from handler to avoid double deletion of req obj */ + return 0; +} + +static int +glusterd_mgmt_v3_commit_send_resp(rpcsvc_request_t *req, int32_t op, + int32_t status, char *op_errstr, + uint32_t op_errno, dict_t *rsp_dict) +{ + gd1_mgmt_v3_commit_rsp rsp = { + {0}, + }; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(req); + + rsp.op_ret = status; + glusterd_get_uuid(&rsp.uuid); + rsp.op = op; + rsp.op_errno = op_errno; + if (op_errstr) + rsp.op_errstr = op_errstr; + else + rsp.op_errstr = ""; + + ret = dict_allocate_and_serialize(rsp_dict, &rsp.dict.dict_val, + &rsp.dict.dict_len); + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, 0, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); + goto out; + } + + ret = glusterd_submit_reply(req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_v3_commit_rsp); + + GF_FREE(rsp.dict.dict_val); +out: + gf_msg_debug(this->name, 0, "Responded to commit, ret: %d", ret); + return ret; +} + +static int +glusterd_handle_commit_fn(rpcsvc_request_t *req) +{ + int32_t ret = -1; + gd1_mgmt_v3_commit_req op_req = { + {0}, + }; + xlator_t *this = NULL; + char *op_errstr = NULL; + dict_t *dict = NULL; + dict_t *rsp_dict = NULL; + uint32_t op_errno = 0; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(req); + + ret = xdr_to_generic(req->msg[0], &op_req, + (xdrproc_t)xdr_gd1_mgmt_v3_commit_req); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, + "Failed to decode commit " + "request received from peer"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (glusterd_peerinfo_find_by_uuid(op_req.uuid) == NULL) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_PEER_NOT_FOUND, + "%s doesn't " + "belong to the cluster. Ignoring request.", + uuid_utoa(op_req.uuid)); + ret = -1; + goto out; + } + + dict = dict_new(); + if (!dict) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL); + goto out; + } + + ret = dict_unserialize(op_req.dict.dict_val, op_req.dict.dict_len, &dict); + if (ret) { + gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + NULL); + goto out; + } + + rsp_dict = dict_new(); + if (!rsp_dict) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL); + return -1; + } + + ret = gd_mgmt_v3_commit_fn(op_req.op, dict, &op_errstr, &op_errno, + rsp_dict); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMIT_OP_FAIL, + "commit failed on operation %s", gd_op_list[op_req.op]); + } + + ret = glusterd_mgmt_v3_commit_send_resp(req, op_req.op, ret, op_errstr, + op_errno, rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_OP_RESP_FAIL, + "Failed to send commit " + "response for operation %s", + gd_op_list[op_req.op]); + goto out; + } + +out: + if (op_errstr && (strcmp(op_errstr, ""))) + GF_FREE(op_errstr); + + free(op_req.dict.dict_val); + + if (dict) + dict_unref(dict); + + if (rsp_dict) + dict_unref(rsp_dict); + + /* Return 0 from handler to avoid double deletion of req obj */ + return 0; +} + +static int +glusterd_mgmt_v3_post_commit_send_resp(rpcsvc_request_t *req, int32_t op, + int32_t status, char *op_errstr, + uint32_t op_errno, dict_t *rsp_dict) +{ + gd1_mgmt_v3_post_commit_rsp rsp = { + {0}, + }; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(req); + + rsp.op_ret = status; + glusterd_get_uuid(&rsp.uuid); + rsp.op = op; + rsp.op_errno = op_errno; + if (op_errstr) + rsp.op_errstr = op_errstr; + else + rsp.op_errstr = ""; + + ret = dict_allocate_and_serialize(rsp_dict, &rsp.dict.dict_val, + &rsp.dict.dict_len); + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, 0, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); + goto out; + } + + ret = glusterd_submit_reply(req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_v3_post_commit_rsp); + + GF_FREE(rsp.dict.dict_val); +out: + gf_msg_debug(this->name, 0, "Responded to post commit, ret: %d", ret); + return ret; +} + +static int +glusterd_handle_post_commit_fn(rpcsvc_request_t *req) +{ + int32_t ret = -1; + gd1_mgmt_v3_post_commit_req op_req = { + {0}, + }; + xlator_t *this = NULL; + char *op_errstr = NULL; + dict_t *dict = NULL; + dict_t *rsp_dict = NULL; + uint32_t op_errno = 0; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(req); + + ret = xdr_to_generic(req->msg[0], &op_req, + (xdrproc_t)xdr_gd1_mgmt_v3_post_commit_req); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, + "Failed to decode post commit " + "request received from peer"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (glusterd_peerinfo_find_by_uuid(op_req.uuid) == NULL) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_PEER_NOT_FOUND, + "%s doesn't " + "belong to the cluster. Ignoring request.", + uuid_utoa(op_req.uuid)); + ret = -1; + goto out; + } + + dict = dict_new(); + if (!dict) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL); + goto out; + } + + ret = dict_unserialize(op_req.dict.dict_val, op_req.dict.dict_len, &dict); + if (ret) { + gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + NULL); + goto out; + } + + rsp_dict = dict_new(); + if (!rsp_dict) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL); + return -1; + } + + ret = gd_mgmt_v3_post_commit_fn(op_req.op, dict, &op_errstr, &op_errno, + rsp_dict); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_COMMIT_OP_FAIL, + "post commit failed on operation %s", gd_op_list[op_req.op]); + } + + ret = glusterd_mgmt_v3_post_commit_send_resp(req, op_req.op, ret, op_errstr, + op_errno, rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_OP_RESP_FAIL, + "Failed to send post commit " + "response for operation %s", + gd_op_list[op_req.op]); + goto out; + } + +out: + if (op_errstr && (strcmp(op_errstr, ""))) + GF_FREE(op_errstr); + + free(op_req.dict.dict_val); + + if (dict) + dict_unref(dict); + + if (rsp_dict) + dict_unref(rsp_dict); + + /* Return 0 from handler to avoid double deletion of req obj */ + return 0; +} + +static int +glusterd_mgmt_v3_post_validate_send_resp(rpcsvc_request_t *req, int32_t op, + int32_t status, char *op_errstr, + dict_t *rsp_dict) +{ + gd1_mgmt_v3_post_val_rsp rsp = { + {0}, + }; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(req); + + rsp.op_ret = status; + glusterd_get_uuid(&rsp.uuid); + rsp.op = op; + if (op_errstr) + rsp.op_errstr = op_errstr; + else + rsp.op_errstr = ""; + + ret = dict_allocate_and_serialize(rsp_dict, &rsp.dict.dict_val, + &rsp.dict.dict_len); + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, 0, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); + goto out; + } + + ret = glusterd_submit_reply(req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_v3_post_val_rsp); + + GF_FREE(rsp.dict.dict_val); +out: + gf_msg_debug(this->name, 0, "Responded to post validation, ret: %d", ret); + return ret; +} + +static int +glusterd_handle_post_validate_fn(rpcsvc_request_t *req) +{ + int32_t ret = -1; + gd1_mgmt_v3_post_val_req op_req = { + {0}, + }; + xlator_t *this = NULL; + char *op_errstr = NULL; + dict_t *dict = NULL; + dict_t *rsp_dict = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(req); + + ret = xdr_to_generic(req->msg[0], &op_req, + (xdrproc_t)xdr_gd1_mgmt_v3_post_val_req); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, + "Failed to decode post validation " + "request received from peer"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (glusterd_peerinfo_find_by_uuid(op_req.uuid) == NULL) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_PEER_NOT_FOUND, + "%s doesn't " + "belong to the cluster. Ignoring request.", + uuid_utoa(op_req.uuid)); + ret = -1; + goto out; + } + + dict = dict_new(); + if (!dict) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL); + goto out; + } + + ret = dict_unserialize(op_req.dict.dict_val, op_req.dict.dict_len, &dict); + if (ret) { + gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + NULL); + goto out; + } + + rsp_dict = dict_new(); + if (!rsp_dict) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL); + return -1; + } + + ret = gd_mgmt_v3_post_validate_fn(op_req.op, op_req.op_ret, dict, + &op_errstr, rsp_dict); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_VALIDATION_FAIL, + "Post Validation failed on operation %s", gd_op_list[op_req.op]); + } + + ret = glusterd_mgmt_v3_post_validate_send_resp(req, op_req.op, ret, + op_errstr, rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_OP_RESP_FAIL, + "Failed to send Post Validation " + "response for operation %s", + gd_op_list[op_req.op]); + goto out; + } + +out: + if (op_errstr && (strcmp(op_errstr, ""))) + GF_FREE(op_errstr); + + free(op_req.dict.dict_val); + + if (dict) + dict_unref(dict); + + if (rsp_dict) + dict_unref(rsp_dict); + + /* Return 0 from handler to avoid double deletion of req obj */ + return 0; +} + +static int +glusterd_mgmt_v3_unlock_send_resp(rpcsvc_request_t *req, int32_t status) +{ + gd1_mgmt_v3_unlock_rsp rsp = { + {0}, + }; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(req); + + rsp.op_ret = status; + if (rsp.op_ret) + rsp.op_errno = errno; + + glusterd_get_uuid(&rsp.uuid); + + ret = glusterd_submit_reply(req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_v3_unlock_rsp); + + gf_msg_debug(this->name, 0, "Responded to mgmt_v3 unlock, ret: %d", ret); + + return ret; +} + +static int +glusterd_syctasked_mgmt_v3_unlock(rpcsvc_request_t *req, + gd1_mgmt_v3_unlock_req *unlock_req, + glusterd_op_lock_ctx_t *ctx) +{ + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(req); + GF_ASSERT(ctx); + + /* Trying to release multiple mgmt_v3 locks */ + ret = glusterd_multiple_mgmt_v3_unlock(ctx->dict, ctx->uuid); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_UNLOCK_FAIL, + "Failed to release mgmt_v3 locks for %s", uuid_utoa(ctx->uuid)); + } + + ret = glusterd_mgmt_v3_unlock_send_resp(req, ret); + + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +static int +glusterd_op_state_machine_mgmt_v3_unlock(rpcsvc_request_t *req, + gd1_mgmt_v3_unlock_req *lock_req, + glusterd_op_lock_ctx_t *ctx) +{ + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(req); + + ret = glusterd_op_sm_inject_event(GD_OP_EVENT_UNLOCK, &lock_req->txn_id, + ctx); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OP_EVENT_UNLOCK_FAIL, + "Failed to inject event GD_OP_EVENT_UNLOCK"); + + glusterd_friend_sm(); + glusterd_op_sm(); + + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +static int +glusterd_handle_mgmt_v3_unlock_fn(rpcsvc_request_t *req) +{ + gd1_mgmt_v3_unlock_req lock_req = { + {0}, + }; + int32_t ret = -1; + glusterd_op_lock_ctx_t *ctx = NULL; + xlator_t *this = NULL; + gf_boolean_t is_synctasked = _gf_false; + gf_boolean_t free_ctx = _gf_false; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(req); + + ret = xdr_to_generic(req->msg[0], &lock_req, + (xdrproc_t)xdr_gd1_mgmt_v3_unlock_req); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, + "Failed to decode unlock " + "request received from peer"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + gf_msg_debug(this->name, 0, + "Received volume unlock req " + "from uuid: %s", + uuid_utoa(lock_req.uuid)); + + if (glusterd_peerinfo_find_by_uuid(lock_req.uuid) == NULL) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_PEER_NOT_FOUND, + "%s doesn't " + "belong to the cluster. Ignoring request.", + uuid_utoa(lock_req.uuid)); + ret = -1; + goto out; + } + + ctx = GF_CALLOC(1, sizeof(*ctx), gf_gld_mt_op_lock_ctx_t); + if (!ctx) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_MEMORY, NULL); + ret = -1; + goto out; + } + + gf_uuid_copy(ctx->uuid, lock_req.uuid); + ctx->req = req; + + ctx->dict = dict_new(); + if (!ctx->dict) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL); + ret = -1; + goto out; + } + + ret = dict_unserialize(lock_req.dict.dict_val, lock_req.dict.dict_len, + &ctx->dict); + if (ret) { + gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + NULL); + goto out; + } + + is_synctasked = dict_get_str_boolean(ctx->dict, "is_synctasked", _gf_false); + if (is_synctasked) { + ret = glusterd_syctasked_mgmt_v3_unlock(req, &lock_req, ctx); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_UNLOCK_FAIL, + "Failed to release mgmt_v3_locks"); + /* Ignore the return code, as it shouldn't be propagated + * from the handler function so as to avoid double + * deletion of the req + */ + ret = 0; + } + + /* The above function does not take ownership of ctx. + * Therefore we need to free the ctx explicitly. */ + free_ctx = _gf_true; + } else { + /* Shouldn't ignore the return code here, and it should + * be propagated from the handler function as in failure + * case it doesn't delete the req object + */ + ret = glusterd_op_state_machine_mgmt_v3_unlock(req, &lock_req, ctx); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_UNLOCK_FAIL, + "Failed to release mgmt_v3_locks"); + } + +out: + + if (ctx && (ret || free_ctx)) { + if (ctx->dict) + dict_unref(ctx->dict); + + GF_FREE(ctx); + } + + free(lock_req.dict.dict_val); + + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +int +glusterd_handle_mgmt_v3_lock(rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler(req, glusterd_handle_mgmt_v3_lock_fn); +} + +static int +glusterd_handle_pre_validate(rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler(req, glusterd_handle_pre_validate_fn); +} + +static int +glusterd_handle_brick_op(rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler(req, glusterd_handle_brick_op_fn); +} + +static int +glusterd_handle_commit(rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler(req, glusterd_handle_commit_fn); +} + +static int +glusterd_handle_post_commit(rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler(req, glusterd_handle_post_commit_fn); +} + +static int +glusterd_handle_post_validate(rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler(req, glusterd_handle_post_validate_fn); +} + +int +glusterd_handle_mgmt_v3_unlock(rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler(req, glusterd_handle_mgmt_v3_unlock_fn); +} + +static rpcsvc_actor_t gd_svc_mgmt_v3_actors[GLUSTERD_MGMT_V3_MAXVALUE] = { + [GLUSTERD_MGMT_V3_NULL] = {"NULL", glusterd_mgmt_v3_null, NULL, + GLUSTERD_MGMT_V3_NULL, DRC_NA, 0}, + [GLUSTERD_MGMT_V3_LOCK] = {"MGMT_V3_LOCK", glusterd_handle_mgmt_v3_lock, + NULL, GLUSTERD_MGMT_V3_LOCK, DRC_NA, 0}, + [GLUSTERD_MGMT_V3_PRE_VALIDATE] = {"PRE_VAL", glusterd_handle_pre_validate, + NULL, GLUSTERD_MGMT_V3_PRE_VALIDATE, + DRC_NA, 0}, + [GLUSTERD_MGMT_V3_BRICK_OP] = {"BRCK_OP", glusterd_handle_brick_op, NULL, + GLUSTERD_MGMT_V3_BRICK_OP, DRC_NA, 0}, + [GLUSTERD_MGMT_V3_COMMIT] = {"COMMIT", glusterd_handle_commit, NULL, + GLUSTERD_MGMT_V3_COMMIT, DRC_NA, 0}, + [GLUSTERD_MGMT_V3_POST_COMMIT] = {"POST_COMMIT", + glusterd_handle_post_commit, NULL, + GLUSTERD_MGMT_V3_POST_COMMIT, DRC_NA, 0}, + [GLUSTERD_MGMT_V3_POST_VALIDATE] = {"POST_VAL", + glusterd_handle_post_validate, NULL, + GLUSTERD_MGMT_V3_POST_VALIDATE, DRC_NA, + 0}, + [GLUSTERD_MGMT_V3_UNLOCK] = {"MGMT_V3_UNLOCK", + glusterd_handle_mgmt_v3_unlock, NULL, + GLUSTERD_MGMT_V3_UNLOCK, DRC_NA, 0}, +}; + +struct rpcsvc_program gd_svc_mgmt_v3_prog = { + .progname = "GlusterD svc mgmt v3", + .prognum = GD_MGMT_PROGRAM, + .progver = GD_MGMT_V3_VERSION, + .numactors = GLUSTERD_MGMT_V3_MAXVALUE, + .actors = gd_svc_mgmt_v3_actors, + .synctask = _gf_true, +}; diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-mgmt.c new file mode 100644 index 00000000000..bca7221062b --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.c @@ -0,0 +1,3114 @@ +/* + Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +/* rpc related syncops */ +#include "rpc-clnt.h" +#include "protocol-common.h" +#include "xdr-generic.h" +#include "glusterd1-xdr.h" +#include "glusterd-syncop.h" + +#include "glusterd.h" +#include "glusterd-utils.h" +#include "glusterd-locks.h" +#include "glusterd-mgmt.h" +#include "glusterd-op-sm.h" +#include "glusterd-server-quorum.h" +#include "glusterd-volgen.h" +#include "glusterd-store.h" +#include "glusterd-snapshot-utils.h" +#include "glusterd-messages.h" +#include "glusterd-errno.h" +#include "glusterd-hooks.h" + +extern struct rpc_clnt_program gd_mgmt_v3_prog; + +void +gd_mgmt_v3_collate_errors(struct syncargs *args, int op_ret, int op_errno, + char *op_errstr, int op_code, uuid_t peerid, + u_char *uuid) +{ + char *peer_str = NULL; + char err_str[PATH_MAX] = "Please check log file for details."; + char op_err[PATH_MAX] = ""; + xlator_t *this = NULL; + int is_operrstr_blk = 0; + char *err_string = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + int32_t len = 0; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(args); + GF_ASSERT(uuid); + + if (op_ret) { + args->op_ret = op_ret; + args->op_errno = op_errno; + + RCU_READ_LOCK; + peerinfo = glusterd_peerinfo_find(peerid, NULL); + if (peerinfo) + peer_str = gf_strdup(peerinfo->hostname); + else + peer_str = gf_strdup(uuid_utoa(uuid)); + + RCU_READ_UNLOCK; + + is_operrstr_blk = (op_errstr && strcmp(op_errstr, "")); + err_string = (is_operrstr_blk) ? op_errstr : err_str; + + switch (op_code) { + case GLUSTERD_MGMT_V3_LOCK: { + snprintf(op_err, sizeof(op_err), "Locking failed on %s. %s", + peer_str, err_string); + break; + } + case GLUSTERD_MGMT_V3_PRE_VALIDATE: { + snprintf(op_err, sizeof(op_err), + "Pre Validation failed on %s. %s", peer_str, + err_string); + break; + } + case GLUSTERD_MGMT_V3_BRICK_OP: { + snprintf(op_err, sizeof(op_err), "Brick ops failed on %s. %s", + peer_str, err_string); + break; + } + case GLUSTERD_MGMT_V3_COMMIT: { + snprintf(op_err, sizeof(op_err), "Commit failed on %s. %s", + peer_str, err_string); + break; + } + case GLUSTERD_MGMT_V3_POST_COMMIT: { + snprintf(op_err, sizeof(op_err), "Post commit failed on %s. %s", + peer_str, err_string); + break; + } + case GLUSTERD_MGMT_V3_POST_VALIDATE: { + snprintf(op_err, sizeof(op_err), + "Post Validation failed on %s. %s", peer_str, + err_string); + break; + } + case GLUSTERD_MGMT_V3_UNLOCK: { + snprintf(op_err, sizeof(op_err), "Unlocking failed on %s. %s", + peer_str, err_string); + break; + } + default: + snprintf(op_err, sizeof(op_err), "Unknown error! on %s. %s", + peer_str, err_string); + } + + if (args->errstr) { + len = snprintf(err_str, sizeof(err_str), "%s\n%s", args->errstr, + op_err); + if (len < 0) { + strcpy(err_str, "<error>"); + } + GF_FREE(args->errstr); + args->errstr = NULL; + } else + snprintf(err_str, sizeof(err_str), "%s", op_err); + + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_OP_FAIL, "%s", + op_err); + args->errstr = gf_strdup(err_str); + } + + GF_FREE(peer_str); + + return; +} + +int32_t +gd_mgmt_v3_pre_validate_fn(glusterd_op_t op, dict_t *dict, char **op_errstr, + dict_t *rsp_dict, uint32_t *op_errno) +{ + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(dict); + GF_ASSERT(op_errstr); + GF_ASSERT(rsp_dict); + GF_VALIDATE_OR_GOTO(this->name, op_errno, out); + + switch (op) { + case GD_OP_SNAP: + ret = glusterd_snapshot_prevalidate(dict, op_errstr, rsp_dict, + op_errno); + + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, + GD_MSG_PRE_VALIDATION_FAIL, + "Snapshot Prevalidate Failed"); + goto out; + } + + break; + + case GD_OP_REPLACE_BRICK: + ret = glusterd_op_stage_replace_brick(dict, op_errstr, rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, + GD_MSG_PRE_VALIDATION_FAIL, + "Replace-brick prevalidation failed."); + goto out; + } + break; + case GD_OP_ADD_BRICK: + ret = glusterd_op_stage_add_brick(dict, op_errstr, rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, + GD_MSG_PRE_VALIDATION_FAIL, + "ADD-brick prevalidation failed."); + goto out; + } + break; + case GD_OP_START_VOLUME: + ret = glusterd_op_stage_start_volume(dict, op_errstr, rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, + GD_MSG_PRE_VALIDATION_FAIL, + "Volume start prevalidation failed."); + goto out; + } + break; + case GD_OP_STOP_VOLUME: + ret = glusterd_op_stage_stop_volume(dict, op_errstr); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, + GD_MSG_PRE_VALIDATION_FAIL, + "Volume stop prevalidation failed."); + goto out; + } + break; + case GD_OP_REMOVE_BRICK: + ret = glusterd_op_stage_remove_brick(dict, op_errstr); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, + GD_MSG_PRE_VALIDATION_FAIL, + "Remove brick prevalidation failed."); + goto out; + } + break; + + case GD_OP_RESET_BRICK: + ret = glusterd_reset_brick_prevalidate(dict, op_errstr, rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, + GD_MSG_PRE_VALIDATION_FAIL, + "Reset brick prevalidation failed."); + goto out; + } + break; + + case GD_OP_PROFILE_VOLUME: + ret = glusterd_op_stage_stats_volume(dict, op_errstr); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, + GD_MSG_PRE_VALIDATION_FAIL, + "prevalidation failed for profile operation."); + goto out; + } + break; + case GD_OP_REBALANCE: + case GD_OP_DEFRAG_BRICK_VOLUME: + ret = glusterd_mgmt_v3_op_stage_rebalance(dict, op_errstr); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, + "Rebalance Prevalidate Failed"); + goto out; + } + break; + + case GD_OP_MAX_OPVERSION: + ret = 0; + break; + + default: + break; + } + + ret = 0; +out: + gf_msg_debug(this->name, 0, "OP = %d. Returning %d", op, ret); + return ret; +} + +int32_t +gd_mgmt_v3_brick_op_fn(glusterd_op_t op, dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(dict); + GF_ASSERT(op_errstr); + GF_ASSERT(rsp_dict); + + switch (op) { + case GD_OP_SNAP: { + ret = glusterd_snapshot_brickop(dict, op_errstr, rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_BRICK_OP_FAIL, + "snapshot brickop failed"); + goto out; + } + break; + } + case GD_OP_PROFILE_VOLUME: + case GD_OP_REBALANCE: + case GD_OP_DEFRAG_BRICK_VOLUME: { + ret = gd_brick_op_phase(op, rsp_dict, dict, op_errstr); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, + "%s brickop " + "failed", + gd_op_list[op]); + goto out; + } + break; + } + default: + break; + } + + ret = 0; +out: + gf_msg_trace(this->name, 0, "OP = %d. Returning %d", op, ret); + return ret; +} + +int32_t +gd_mgmt_v3_commit_fn(glusterd_op_t op, dict_t *dict, char **op_errstr, + uint32_t *op_errno, dict_t *rsp_dict) +{ + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(dict); + GF_ASSERT(op_errstr); + GF_VALIDATE_OR_GOTO(this->name, op_errno, out); + GF_ASSERT(rsp_dict); + + glusterd_op_commit_hook(op, dict, GD_COMMIT_HOOK_PRE); + switch (op) { + case GD_OP_SNAP: { + ret = glusterd_snapshot(dict, op_errstr, op_errno, rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_COMMIT_OP_FAIL, + "Snapshot Commit Failed"); + goto out; + } + break; + } + case GD_OP_REPLACE_BRICK: { + ret = glusterd_op_replace_brick(dict, rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMIT_OP_FAIL, + "Replace-brick commit failed."); + goto out; + } + break; + } + case GD_OP_ADD_BRICK: { + ret = glusterd_op_add_brick(dict, op_errstr); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMIT_OP_FAIL, + "Add-brick commit failed."); + goto out; + } + break; + } + case GD_OP_START_VOLUME: { + ret = glusterd_op_start_volume(dict, op_errstr); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMIT_OP_FAIL, + "Volume start commit failed."); + goto out; + } + break; + } + case GD_OP_STOP_VOLUME: { + ret = glusterd_op_stop_volume(dict); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMIT_OP_FAIL, + "Volume stop commit failed."); + goto out; + } + break; + } + case GD_OP_REMOVE_BRICK: { + ret = glusterd_op_remove_brick(dict, op_errstr); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMIT_OP_FAIL, + "Remove-brick commit failed."); + goto out; + } + break; + } + case GD_OP_RESET_BRICK: { + ret = glusterd_op_reset_brick(dict, rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMIT_OP_FAIL, + "Reset-brick commit failed."); + goto out; + } + break; + } + case GD_OP_MAX_OPVERSION: { + ret = glusterd_op_get_max_opversion(op_errstr, rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMIT_OP_FAIL, + "Commit failed."); + goto out; + } + break; + } + case GD_OP_PROFILE_VOLUME: { + ret = glusterd_op_stats_volume(dict, op_errstr, rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMIT_OP_FAIL, + "commit failed for volume profile operation."); + goto out; + } + break; + } + case GD_OP_REBALANCE: + case GD_OP_DEFRAG_BRICK_VOLUME: { + ret = glusterd_mgmt_v3_op_rebalance(dict, op_errstr, rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMIT_OP_FAIL, + "Rebalance Commit Failed"); + goto out; + } + break; + } + + default: + break; + } + + ret = 0; +out: + gf_msg_debug(this->name, 0, "OP = %d. Returning %d", op, ret); + return ret; +} + +int32_t +gd_mgmt_v3_post_commit_fn(glusterd_op_t op, dict_t *dict, char **op_errstr, + uint32_t *op_errno, dict_t *rsp_dict) +{ + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(dict); + GF_ASSERT(op_errstr); + GF_VALIDATE_OR_GOTO(this->name, op_errno, out); + GF_ASSERT(rsp_dict); + + switch (op) { + case GD_OP_ADD_BRICK: + ret = glusterd_post_commit_add_brick(dict, op_errstr); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_COMMIT_OP_FAIL, + "Add-brick post commit failed."); + goto out; + } + break; + case GD_OP_REPLACE_BRICK: + ret = glusterd_post_commit_replace_brick(dict, op_errstr); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_COMMIT_OP_FAIL, + "Replace-brick post commit failed."); + goto out; + } + break; + default: + break; + } + + ret = 0; +out: + gf_msg_debug(this->name, 0, "OP = %d. Returning %d", op, ret); + return ret; +} + +int32_t +gd_mgmt_v3_post_validate_fn(glusterd_op_t op, int32_t op_ret, dict_t *dict, + char **op_errstr, dict_t *rsp_dict) +{ + int32_t ret = -1; + xlator_t *this = NULL; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(dict); + GF_ASSERT(op_errstr); + GF_ASSERT(rsp_dict); + + if (op_ret == 0) + glusterd_op_commit_hook(op, dict, GD_COMMIT_HOOK_POST); + + switch (op) { + case GD_OP_SNAP: { + ret = glusterd_snapshot_postvalidate(dict, op_ret, op_errstr, + rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, + GD_MSG_POST_VALIDATION_FAIL, + "postvalidate operation failed"); + goto out; + } + break; + } + case GD_OP_ADD_BRICK: { + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get" + " volume name"); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND, + "Unable to " + "allocate memory"); + goto out; + } + ret = glusterd_create_volfiles_and_notify_services(volinfo); + if (ret) + goto out; + ret = glusterd_store_volinfo(volinfo, + GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) + goto out; + break; + } + case GD_OP_START_VOLUME: { + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get" + " volume name"); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND, + "Unable to " + "allocate memory"); + goto out; + } + + break; + } + case GD_OP_STOP_VOLUME: { + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get" + " volume name"); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND, + "Unable to " + "allocate memory"); + goto out; + } + break; + } + + default: + break; + } + + ret = 0; + +out: + gf_msg_trace(this->name, 0, "OP = %d. Returning %d", op, ret); + return ret; +} + +int32_t +gd_mgmt_v3_lock_cbk_fn(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) +{ + int32_t ret = -1; + struct syncargs *args = NULL; + gd1_mgmt_v3_lock_rsp rsp = { + {0}, + }; + call_frame_t *frame = NULL; + int32_t op_ret = -1; + int32_t op_errno = -1; + xlator_t *this = NULL; + uuid_t *peerid = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(req); + GF_ASSERT(myframe); + + /* Even though the lock command has failed, while collating the errors + (gd_mgmt_v3_collate_errors), args->op_ret and args->op_errno will be + used. @args is obtained from frame->local. So before checking the + status of the request and going out if its a failure, args should be + set to frame->local. Otherwise, while collating args will be NULL. + This applies to other phases such as prevalidate, brickop, commit and + postvalidate also. + */ + frame = myframe; + args = frame->local; + peerid = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + GF_VALIDATE_OR_GOTO_WITH_ERROR(this->name, iov, out, op_errno, EINVAL); + + ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_v3_lock_rsp); + if (ret < 0) + goto out; + + gf_uuid_copy(args->uuid, rsp.uuid); + + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + +out: + gd_mgmt_v3_collate_errors(args, op_ret, op_errno, NULL, + GLUSTERD_MGMT_V3_LOCK, *peerid, rsp.uuid); + GF_FREE(peerid); + + if (rsp.dict.dict_val) + free(rsp.dict.dict_val); + /* req->rpc_status set to -1 means, STACK_DESTROY will be called from + * the caller function. + */ + if (req->rpc_status != -1) + STACK_DESTROY(frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_mgmt_v3_lock_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) +{ + return glusterd_big_locked_cbk(req, iov, count, myframe, + gd_mgmt_v3_lock_cbk_fn); +} + +int +gd_mgmt_v3_lock(glusterd_op_t op, dict_t *op_ctx, glusterd_peerinfo_t *peerinfo, + struct syncargs *args, uuid_t my_uuid, uuid_t recv_uuid) +{ + gd1_mgmt_v3_lock_req req = { + {0}, + }; + int32_t ret = -1; + xlator_t *this = NULL; + uuid_t *peerid = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(op_ctx); + GF_ASSERT(peerinfo); + GF_ASSERT(args); + + ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val, + &req.dict.dict_len); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); + goto out; + } + + gf_uuid_copy(req.uuid, my_uuid); + req.op = op; + + GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_ALLOC_AND_COPY_UUID_FAIL, NULL); + goto out; + } + + ret = gd_syncop_submit_request(peerinfo->rpc, &req, args, peerid, + &gd_mgmt_v3_prog, GLUSTERD_MGMT_V3_LOCK, + gd_mgmt_v3_lock_cbk, + (xdrproc_t)xdr_gd1_mgmt_v3_lock_req); +out: + GF_FREE(req.dict.dict_val); + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +int +glusterd_mgmt_v3_initiate_lockdown(glusterd_op_t op, dict_t *dict, + char **op_errstr, uint32_t *op_errno, + gf_boolean_t *is_acquired, + uint32_t txn_generation) +{ + glusterd_peerinfo_t *peerinfo = NULL; + int32_t ret = -1; + int32_t peer_cnt = 0; + struct syncargs args = {0}; + uuid_t peer_uuid = {0}; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + uint32_t timeout = 0; + + this = THIS; + GF_ASSERT(this); + conf = this->private; + GF_ASSERT(conf); + + GF_ASSERT(dict); + GF_ASSERT(op_errstr); + GF_ASSERT(is_acquired); + + /* Cli will add timeout key to dict if the default timeout is + * other than 2 minutes. Here we use this value to check whether + * mgmt_v3_lock_timeout should be set to default value or we + * need to change the value according to timeout value + * i.e, timeout + 120 seconds. */ + ret = dict_get_uint32(dict, "timeout", &timeout); + if (!ret) + conf->mgmt_v3_lock_timeout = timeout + 120; + + /* Trying to acquire multiple mgmt_v3 locks on local node */ + ret = glusterd_multiple_mgmt_v3_lock(dict, MY_UUID, op_errno); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_LOCK_GET_FAIL, + "Failed to acquire mgmt_v3 locks on localhost"); + goto out; + } + + *is_acquired = _gf_true; + + /* Sending mgmt_v3 lock req to other nodes in the cluster */ + gd_syncargs_init(&args, NULL); + ret = synctask_barrier_init((&args)); + if (ret) + goto out; + + peer_cnt = 0; + + RCU_READ_LOCK; + cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list) + { + /* Only send requests to peers who were available before the + * transaction started + */ + if (peerinfo->generation > txn_generation) + continue; + + if (!peerinfo->connected) + continue; + if (op != GD_OP_SYNC_VOLUME && + peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) + continue; + + gd_mgmt_v3_lock(op, dict, peerinfo, &args, MY_UUID, peer_uuid); + peer_cnt++; + } + RCU_READ_UNLOCK; + + if (0 == peer_cnt) { + ret = 0; + goto out; + } + + gd_synctask_barrier_wait((&args), peer_cnt); + + if (args.errstr) + *op_errstr = gf_strdup(args.errstr); + + ret = args.op_ret; + *op_errno = args.op_errno; + + gf_msg_debug(this->name, 0, + "Sent lock op req for %s " + "to %d peers. Returning %d", + gd_op_list[op], peer_cnt, ret); +out: + if (ret) { + if (*op_errstr) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_LOCK_GET_FAIL, + "%s", *op_errstr); + + ret = gf_asprintf(op_errstr, + "Another transaction is in progress. " + "Please try again after some time."); + + if (ret == -1) + *op_errstr = NULL; + + ret = -1; + } + + return ret; +} + +int +glusterd_pre_validate_aggr_rsp_dict(glusterd_op_t op, dict_t *aggr, dict_t *rsp) +{ + int32_t ret = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(aggr); + GF_ASSERT(rsp); + + switch (op) { + case GD_OP_SNAP: + ret = glusterd_snap_pre_validate_use_rsp_dict(aggr, rsp); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PRE_VALIDATION_FAIL, + "Failed to aggregate prevalidate " + "response dictionaries."); + goto out; + } + break; + case GD_OP_REPLACE_BRICK: + ret = glusterd_rb_use_rsp_dict(aggr, rsp); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PRE_VALIDATION_FAIL, + "Failed to aggregate prevalidate " + "response dictionaries."); + goto out; + } + break; + case GD_OP_START_VOLUME: + case GD_OP_ADD_BRICK: + ret = glusterd_aggr_brick_mount_dirs(aggr, rsp); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_BRICK_MOUNDIRS_AGGR_FAIL, + "Failed to " + "aggregate brick mount dirs"); + goto out; + } + break; + case GD_OP_RESET_BRICK: + ret = glusterd_rb_use_rsp_dict(aggr, rsp); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PRE_VALIDATION_FAIL, + "Failed to aggregate prevalidate " + "response dictionaries."); + goto out; + } + case GD_OP_STOP_VOLUME: + case GD_OP_REMOVE_BRICK: + case GD_OP_PROFILE_VOLUME: + case GD_OP_DEFRAG_BRICK_VOLUME: + case GD_OP_REBALANCE: + break; + case GD_OP_MAX_OPVERSION: + break; + default: + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, + "Invalid op (%s)", gd_op_list[op]); + + break; + } +out: + return ret; +} + +int32_t +gd_mgmt_v3_pre_validate_cbk_fn(struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int32_t ret = -1; + struct syncargs *args = NULL; + gd1_mgmt_v3_pre_val_rsp rsp = { + {0}, + }; + call_frame_t *frame = NULL; + int32_t op_ret = -1; + int32_t op_errno = -1; + dict_t *rsp_dict = NULL; + xlator_t *this = NULL; + uuid_t *peerid = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(req); + GF_ASSERT(myframe); + + frame = myframe; + args = frame->local; + peerid = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + GF_VALIDATE_OR_GOTO_WITH_ERROR(this->name, iov, out, op_errno, EINVAL); + + ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_v3_pre_val_rsp); + if (ret < 0) + goto out; + + if (rsp.dict.dict_len) { + /* Unserialize the dictionary */ + rsp_dict = dict_new(); + + ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &rsp_dict); + if (ret < 0) { + free(rsp.dict.dict_val); + goto out; + } else { + rsp_dict->extra_stdfree = rsp.dict.dict_val; + } + } + + gf_uuid_copy(args->uuid, rsp.uuid); + pthread_mutex_lock(&args->lock_dict); + { + ret = glusterd_pre_validate_aggr_rsp_dict(rsp.op, args->dict, rsp_dict); + } + pthread_mutex_unlock(&args->lock_dict); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RESP_AGGR_FAIL, "%s", + "Failed to aggregate response from " + " node/brick"); + if (!rsp.op_ret) + op_ret = ret; + else { + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + } + } else { + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + } + +out: + if (rsp_dict) + dict_unref(rsp_dict); + + gd_mgmt_v3_collate_errors(args, op_ret, op_errno, rsp.op_errstr, + GLUSTERD_MGMT_V3_PRE_VALIDATE, *peerid, rsp.uuid); + + if (rsp.op_errstr) + free(rsp.op_errstr); + GF_FREE(peerid); + /* req->rpc_status set to -1 means, STACK_DESTROY will be called from + * the caller function. + */ + if (req->rpc_status != -1) + STACK_DESTROY(frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_mgmt_v3_pre_validate_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) +{ + return glusterd_big_locked_cbk(req, iov, count, myframe, + gd_mgmt_v3_pre_validate_cbk_fn); +} + +int +gd_mgmt_v3_pre_validate_req(glusterd_op_t op, dict_t *op_ctx, + glusterd_peerinfo_t *peerinfo, + struct syncargs *args, uuid_t my_uuid, + uuid_t recv_uuid) +{ + int32_t ret = -1; + gd1_mgmt_v3_pre_val_req req = { + {0}, + }; + xlator_t *this = NULL; + uuid_t *peerid = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(op_ctx); + GF_ASSERT(peerinfo); + GF_ASSERT(args); + + ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val, + &req.dict.dict_len); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); + goto out; + } + + gf_uuid_copy(req.uuid, my_uuid); + req.op = op; + + GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_ALLOC_AND_COPY_UUID_FAIL, NULL); + goto out; + } + + ret = gd_syncop_submit_request( + peerinfo->rpc, &req, args, peerid, &gd_mgmt_v3_prog, + GLUSTERD_MGMT_V3_PRE_VALIDATE, gd_mgmt_v3_pre_validate_cbk, + (xdrproc_t)xdr_gd1_mgmt_v3_pre_val_req); +out: + GF_FREE(req.dict.dict_val); + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +int +glusterd_mgmt_v3_pre_validate(glusterd_op_t op, dict_t *req_dict, + char **op_errstr, uint32_t *op_errno, + uint32_t txn_generation) +{ + int32_t ret = -1; + int32_t peer_cnt = 0; + dict_t *rsp_dict = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + struct syncargs args = {0}; + uuid_t peer_uuid = {0}; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + + this = THIS; + GF_ASSERT(this); + conf = this->private; + GF_ASSERT(conf); + + GF_ASSERT(req_dict); + GF_ASSERT(op_errstr); + GF_VALIDATE_OR_GOTO(this->name, op_errno, out); + + rsp_dict = dict_new(); + if (!rsp_dict) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, + "Failed to create response dictionary"); + goto out; + } + + if (op == GD_OP_PROFILE_VOLUME || op == GD_OP_STOP_VOLUME || + op == GD_OP_REBALANCE || op == GD_OP_REMOVE_BRICK) { + ret = glusterd_validate_quorum(this, op, req_dict, op_errstr); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SERVER_QUORUM_NOT_MET, + "Server quorum not met. Rejecting operation."); + goto out; + } + } + + /* Pre Validation on local node */ + ret = gd_mgmt_v3_pre_validate_fn(op, req_dict, op_errstr, rsp_dict, + op_errno); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PRE_VALIDATION_FAIL, + "Pre Validation failed for " + "operation %s on local node", + gd_op_list[op]); + + if (*op_errstr == NULL) { + ret = gf_asprintf(op_errstr, + "Pre-validation failed " + "on localhost. Please " + "check log file for details"); + if (ret == -1) + *op_errstr = NULL; + + ret = -1; + } + goto out; + } + + if (op != GD_OP_MAX_OPVERSION) { + ret = glusterd_pre_validate_aggr_rsp_dict(op, req_dict, rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PRE_VALIDATION_FAIL, + "%s", + "Failed to aggregate response from " + " node/brick"); + goto out; + } + + dict_unref(rsp_dict); + rsp_dict = NULL; + } + + /* Sending Pre Validation req to other nodes in the cluster */ + gd_syncargs_init(&args, req_dict); + ret = synctask_barrier_init((&args)); + if (ret) + goto out; + + peer_cnt = 0; + + RCU_READ_LOCK; + cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list) + { + /* Only send requests to peers who were available before the + * transaction started + */ + if (peerinfo->generation > txn_generation) + continue; + + if (!peerinfo->connected) + continue; + if (op != GD_OP_SYNC_VOLUME && + peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) + continue; + + gd_mgmt_v3_pre_validate_req(op, req_dict, peerinfo, &args, MY_UUID, + peer_uuid); + peer_cnt++; + } + RCU_READ_UNLOCK; + + if (0 == peer_cnt) { + ret = 0; + goto out; + } + + gd_synctask_barrier_wait((&args), peer_cnt); + + if (args.op_ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PRE_VALIDATION_FAIL, + "Pre Validation failed on peers"); + + if (args.errstr) + *op_errstr = gf_strdup(args.errstr); + } + + ret = args.op_ret; + *op_errno = args.op_errno; + + gf_msg_debug(this->name, 0, + "Sent pre valaidation req for %s " + "to %d peers. Returning %d", + gd_op_list[op], peer_cnt, ret); +out: + return ret; +} + +int +glusterd_mgmt_v3_build_payload(dict_t **req, char **op_errstr, dict_t *dict, + glusterd_op_t op) +{ + int32_t ret = -1; + dict_t *req_dict = NULL; + xlator_t *this = NULL; + char *volname = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(req); + GF_ASSERT(op_errstr); + GF_ASSERT(dict); + + req_dict = dict_new(); + if (!req_dict) + goto out; + + switch (op) { + case GD_OP_MAX_OPVERSION: + case GD_OP_SNAP: + dict_copy(dict, req_dict); + break; + case GD_OP_START_VOLUME: + case GD_OP_STOP_VOLUME: + case GD_OP_ADD_BRICK: + case GD_OP_REMOVE_BRICK: + case GD_OP_DEFRAG_BRICK_VOLUME: + case GD_OP_REPLACE_BRICK: + case GD_OP_RESET_BRICK: + case GD_OP_PROFILE_VOLUME: { + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg(this->name, GF_LOG_CRITICAL, errno, + GD_MSG_DICT_GET_FAILED, + "volname is not present in " + "operation ctx"); + goto out; + } + + if (strcasecmp(volname, "all")) { + ret = glusterd_dict_set_volid(dict, volname, op_errstr); + if (ret) + goto out; + } + dict_copy(dict, req_dict); + } break; + + case GD_OP_REBALANCE: { + if (gd_set_commit_hash(dict) != 0) { + ret = -1; + goto out; + } + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg(this->name, GF_LOG_CRITICAL, errno, + GD_MSG_DICT_GET_FAILED, + "volname is not present in " + "operation ctx"); + goto out; + } + + if (strcasecmp(volname, "all")) { + ret = glusterd_dict_set_volid(dict, volname, op_errstr); + if (ret) + goto out; + } + dict_copy(dict, req_dict); + } break; + + default: + break; + } + + *req = req_dict; + ret = 0; +out: + return ret; +} + +int32_t +gd_mgmt_v3_brick_op_cbk_fn(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) +{ + int32_t ret = -1; + struct syncargs *args = NULL; + gd1_mgmt_v3_brick_op_rsp rsp = { + {0}, + }; + call_frame_t *frame = NULL; + int32_t op_ret = -1; + int32_t op_errno = -1; + dict_t *rsp_dict = NULL; + xlator_t *this = NULL; + uuid_t *peerid = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(req); + GF_ASSERT(myframe); + + frame = myframe; + args = frame->local; + peerid = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + /* If the operation failed, then iov can be NULL. So better check the + status of the operation and then worry about iov (if the status of + the command is success) + */ + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + GF_VALIDATE_OR_GOTO_WITH_ERROR(this->name, iov, out, op_errno, EINVAL); + + ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_v3_brick_op_rsp); + if (ret < 0) + goto out; + + if (rsp.dict.dict_len) { + /* Unserialize the dictionary */ + rsp_dict = dict_new(); + + ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &rsp_dict); + if (ret < 0) { + goto out; + } else { + rsp_dict->extra_stdfree = rsp.dict.dict_val; + } + } + + gf_uuid_copy(args->uuid, rsp.uuid); + pthread_mutex_lock(&args->lock_dict); + { + if (rsp.op == GD_OP_DEFRAG_BRICK_VOLUME || + rsp.op == GD_OP_PROFILE_VOLUME) + ret = glusterd_syncop_aggr_rsp_dict(rsp.op, args->dict, rsp_dict); + } + pthread_mutex_unlock(&args->lock_dict); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RESP_AGGR_FAIL, "%s", + "Failed to aggregate response from " + " node/brick"); + if (!rsp.op_ret) + op_ret = ret; + else { + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + } + } else { + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + } + +out: + + gd_mgmt_v3_collate_errors(args, op_ret, op_errno, rsp.op_errstr, + GLUSTERD_MGMT_V3_BRICK_OP, *peerid, rsp.uuid); + + if (rsp.op_errstr) + free(rsp.op_errstr); + + if (rsp_dict) + dict_unref(rsp_dict); + + GF_FREE(peerid); + /* req->rpc_status set to -1 means, STACK_DESTROY will be called from + * the caller function. + */ + if (req->rpc_status != -1) + STACK_DESTROY(frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_mgmt_v3_brick_op_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) +{ + return glusterd_big_locked_cbk(req, iov, count, myframe, + gd_mgmt_v3_brick_op_cbk_fn); +} + +int +gd_mgmt_v3_brick_op_req(glusterd_op_t op, dict_t *op_ctx, + glusterd_peerinfo_t *peerinfo, struct syncargs *args, + uuid_t my_uuid, uuid_t recv_uuid) +{ + int32_t ret = -1; + gd1_mgmt_v3_brick_op_req req = { + {0}, + }; + xlator_t *this = NULL; + uuid_t *peerid = { + 0, + }; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(op_ctx); + GF_ASSERT(peerinfo); + GF_ASSERT(args); + + ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val, + &req.dict.dict_len); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); + goto out; + } + + gf_uuid_copy(req.uuid, my_uuid); + req.op = op; + + GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_ALLOC_AND_COPY_UUID_FAIL, NULL); + goto out; + } + + ret = gd_syncop_submit_request(peerinfo->rpc, &req, args, peerid, + &gd_mgmt_v3_prog, GLUSTERD_MGMT_V3_BRICK_OP, + gd_mgmt_v3_brick_op_cbk, + (xdrproc_t)xdr_gd1_mgmt_v3_brick_op_req); +out: + GF_FREE(req.dict.dict_val); + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +int +glusterd_mgmt_v3_brick_op(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, + char **op_errstr, uint32_t txn_generation) +{ + int32_t ret = -1; + int32_t peer_cnt = 0; + dict_t *rsp_dict = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + struct syncargs args = {0}; + uuid_t peer_uuid = {0}; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + + this = THIS; + GF_ASSERT(this); + conf = this->private; + GF_ASSERT(conf); + + GF_ASSERT(req_dict); + GF_ASSERT(op_errstr); + + rsp_dict = dict_new(); + if (!rsp_dict) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, + "Failed to create response dictionary"); + goto out; + } + + /* Perform brick op on local node */ + ret = gd_mgmt_v3_brick_op_fn(op, req_dict, op_errstr, rsp_dict); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_OP_FAIL, + "Brick ops failed for " + "operation %s on local node", + gd_op_list[op]); + + if (*op_errstr == NULL) { + ret = gf_asprintf(op_errstr, + "Brick ops failed " + "on localhost. Please " + "check log file for details"); + if (ret == -1) + *op_errstr = NULL; + + ret = -1; + } + goto out; + } + if (op == GD_OP_DEFRAG_BRICK_VOLUME || op == GD_OP_PROFILE_VOLUME) { + ret = glusterd_syncop_aggr_rsp_dict(op, op_ctx, rsp_dict); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, "%s", + "Failed to aggregate response from " + " node/brick"); + goto out; + } + } + + dict_unref(rsp_dict); + rsp_dict = NULL; + + /* Sending brick op req to other nodes in the cluster */ + gd_syncargs_init(&args, op_ctx); + ret = synctask_barrier_init((&args)); + if (ret) + goto out; + + peer_cnt = 0; + + RCU_READ_LOCK; + cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list) + { + /* Only send requests to peers who were available before the + * transaction started + */ + if (peerinfo->generation > txn_generation) + continue; + + if (!peerinfo->connected) + continue; + if (op != GD_OP_SYNC_VOLUME && + peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) + continue; + + gd_mgmt_v3_brick_op_req(op, req_dict, peerinfo, &args, MY_UUID, + peer_uuid); + peer_cnt++; + } + RCU_READ_UNLOCK; + + if (0 == peer_cnt) { + ret = 0; + goto out; + } + + gd_synctask_barrier_wait((&args), peer_cnt); + + if (args.op_ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_OP_FAIL, + "Brick ops failed on peers"); + + if (args.errstr) + *op_errstr = gf_strdup(args.errstr); + } + + ret = args.op_ret; + + gf_msg_debug(this->name, 0, + "Sent brick op req for %s " + "to %d peers. Returning %d", + gd_op_list[op], peer_cnt, ret); +out: + return ret; +} + +int32_t +gd_mgmt_v3_commit_cbk_fn(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) +{ + int32_t ret = -1; + struct syncargs *args = NULL; + gd1_mgmt_v3_commit_rsp rsp = { + {0}, + }; + call_frame_t *frame = NULL; + int32_t op_ret = -1; + int32_t op_errno = -1; + dict_t *rsp_dict = NULL; + xlator_t *this = NULL; + uuid_t *peerid = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(req); + GF_ASSERT(myframe); + + frame = myframe; + args = frame->local; + peerid = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + GF_VALIDATE_OR_GOTO_WITH_ERROR(this->name, iov, out, op_errno, EINVAL); + + ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_v3_commit_rsp); + if (ret < 0) + goto out; + + if (rsp.dict.dict_len) { + /* Unserialize the dictionary */ + rsp_dict = dict_new(); + + ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &rsp_dict); + if (ret < 0) { + free(rsp.dict.dict_val); + goto out; + } else { + rsp_dict->extra_stdfree = rsp.dict.dict_val; + } + } + + gf_uuid_copy(args->uuid, rsp.uuid); + pthread_mutex_lock(&args->lock_dict); + { + ret = glusterd_syncop_aggr_rsp_dict(rsp.op, args->dict, rsp_dict); + } + pthread_mutex_unlock(&args->lock_dict); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RESP_AGGR_FAIL, "%s", + "Failed to aggregate response from " + " node/brick"); + if (!rsp.op_ret) + op_ret = ret; + else { + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + } + } else { + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + } + +out: + if (rsp_dict) + dict_unref(rsp_dict); + + gd_mgmt_v3_collate_errors(args, op_ret, op_errno, rsp.op_errstr, + GLUSTERD_MGMT_V3_COMMIT, *peerid, rsp.uuid); + GF_FREE(peerid); + + if (rsp.op_errstr) + free(rsp.op_errstr); + + /* req->rpc_status set to -1 means, STACK_DESTROY will be called from + * the caller function. + */ + if (req->rpc_status != -1) + STACK_DESTROY(frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_mgmt_v3_commit_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) +{ + return glusterd_big_locked_cbk(req, iov, count, myframe, + gd_mgmt_v3_commit_cbk_fn); +} + +int +gd_mgmt_v3_commit_req(glusterd_op_t op, dict_t *op_ctx, + glusterd_peerinfo_t *peerinfo, struct syncargs *args, + uuid_t my_uuid, uuid_t recv_uuid) +{ + int32_t ret = -1; + gd1_mgmt_v3_commit_req req = { + {0}, + }; + xlator_t *this = NULL; + uuid_t *peerid = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(op_ctx); + GF_ASSERT(peerinfo); + GF_ASSERT(args); + + ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val, + &req.dict.dict_len); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); + goto out; + } + + gf_uuid_copy(req.uuid, my_uuid); + req.op = op; + + GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_ALLOC_AND_COPY_UUID_FAIL, NULL); + goto out; + } + + ret = gd_syncop_submit_request(peerinfo->rpc, &req, args, peerid, + &gd_mgmt_v3_prog, GLUSTERD_MGMT_V3_COMMIT, + gd_mgmt_v3_commit_cbk, + (xdrproc_t)xdr_gd1_mgmt_v3_commit_req); +out: + GF_FREE(req.dict.dict_val); + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +int +glusterd_mgmt_v3_commit(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, + char **op_errstr, uint32_t *op_errno, + uint32_t txn_generation) +{ + int32_t ret = -1; + int32_t peer_cnt = 0; + dict_t *rsp_dict = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + struct syncargs args = {0}; + uuid_t peer_uuid = {0}; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + + this = THIS; + GF_ASSERT(this); + conf = this->private; + GF_ASSERT(conf); + + GF_ASSERT(op_ctx); + GF_ASSERT(req_dict); + GF_ASSERT(op_errstr); + GF_VALIDATE_OR_GOTO(this->name, op_errno, out); + + switch (op) { + case GD_OP_REBALANCE: + case GD_OP_DEFRAG_BRICK_VOLUME: + + ret = glusterd_set_rebalance_id_in_rsp_dict(req_dict, op_ctx); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, + "Failed to set rebalance id in dict."); + } + break; + case GD_OP_REMOVE_BRICK: + ret = glusterd_set_rebalance_id_for_remove_brick(req_dict, op_ctx); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, + "Failed to set rebalance id for remove-brick in dict."); + } + break; + default: + break; + } + rsp_dict = dict_new(); + if (!rsp_dict) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, + "Failed to create response dictionary"); + goto out; + } + + /* Commit on local node */ + ret = gd_mgmt_v3_commit_fn(op, req_dict, op_errstr, op_errno, rsp_dict); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMIT_OP_FAIL, + "Commit failed for " + "operation %s on local node", + gd_op_list[op]); + + if (*op_errstr == NULL) { + ret = gf_asprintf(op_errstr, + "Commit failed " + "on localhost. Please " + "check log file for details."); + if (ret == -1) + *op_errstr = NULL; + + ret = -1; + } + goto out; + } + + ret = glusterd_syncop_aggr_rsp_dict(op, op_ctx, rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RESP_AGGR_FAIL, "%s", + "Failed to aggregate response from " + " node/brick"); + goto out; + } + + dict_unref(rsp_dict); + rsp_dict = NULL; + + /* Sending commit req to other nodes in the cluster */ + gd_syncargs_init(&args, op_ctx); + ret = synctask_barrier_init((&args)); + if (ret) + goto out; + peer_cnt = 0; + + RCU_READ_LOCK; + cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list) + { + /* Only send requests to peers who were available before the + * transaction started + */ + if (peerinfo->generation > txn_generation) + continue; + if (!peerinfo->connected) + continue; + + if (op != GD_OP_SYNC_VOLUME && + peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) + continue; + + gd_mgmt_v3_commit_req(op, req_dict, peerinfo, &args, MY_UUID, + peer_uuid); + peer_cnt++; + } + RCU_READ_UNLOCK; + + if (0 == peer_cnt) { + ret = 0; + goto out; + } + + gd_synctask_barrier_wait((&args), peer_cnt); + + if (args.op_ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMIT_OP_FAIL, + "Commit failed on peers"); + + if (args.errstr) + *op_errstr = gf_strdup(args.errstr); + } + + ret = args.op_ret; + *op_errno = args.op_errno; + + gf_msg_debug(this->name, 0, + "Sent commit req for %s to %d " + "peers. Returning %d", + gd_op_list[op], peer_cnt, ret); +out: + glusterd_op_modify_op_ctx(op, op_ctx); + return ret; +} + +int32_t +gd_mgmt_v3_post_commit_cbk_fn(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) +{ + int32_t ret = -1; + struct syncargs *args = NULL; + gd1_mgmt_v3_post_commit_rsp rsp = { + {0}, + }; + call_frame_t *frame = NULL; + int32_t op_ret = -1; + int32_t op_errno = -1; + dict_t *rsp_dict = NULL; + xlator_t *this = NULL; + uuid_t *peerid = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(req); + GF_ASSERT(myframe); + + frame = myframe; + args = frame->local; + peerid = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + GF_VALIDATE_OR_GOTO_WITH_ERROR(this->name, iov, out, op_errno, EINVAL); + + ret = xdr_to_generic(*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_v3_post_commit_rsp); + if (ret < 0) + goto out; + + if (rsp.dict.dict_len) { + /* Unserialize the dictionary */ + rsp_dict = dict_new(); + + ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &rsp_dict); + if (ret < 0) { + free(rsp.dict.dict_val); + goto out; + } else { + rsp_dict->extra_stdfree = rsp.dict.dict_val; + } + } + + gf_uuid_copy(args->uuid, rsp.uuid); + pthread_mutex_lock(&args->lock_dict); + { + ret = glusterd_syncop_aggr_rsp_dict(rsp.op, args->dict, rsp_dict); + } + pthread_mutex_unlock(&args->lock_dict); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RESP_AGGR_FAIL, "%s", + "Failed to aggregate response from " + " node/brick"); + if (!rsp.op_ret) + op_ret = ret; + else { + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + } + } else { + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + } + +out: + if (rsp_dict) + dict_unref(rsp_dict); + + gd_mgmt_v3_collate_errors(args, op_ret, op_errno, rsp.op_errstr, + GLUSTERD_MGMT_V3_POST_COMMIT, *peerid, rsp.uuid); + GF_FREE(peerid); + + if (rsp.op_errstr) + free(rsp.op_errstr); + + /* req->rpc_status set to -1 means, STACK_DESTROY will be called from + * the caller function. + */ + if (req->rpc_status != -1) + STACK_DESTROY(frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_mgmt_v3_post_commit_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) +{ + return glusterd_big_locked_cbk(req, iov, count, myframe, + gd_mgmt_v3_post_commit_cbk_fn); +} + +int +gd_mgmt_v3_post_commit_req(glusterd_op_t op, dict_t *op_ctx, + glusterd_peerinfo_t *peerinfo, struct syncargs *args, + uuid_t my_uuid, uuid_t recv_uuid) +{ + int32_t ret = -1; + gd1_mgmt_v3_post_commit_req req = { + {0}, + }; + xlator_t *this = NULL; + uuid_t *peerid = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(op_ctx); + GF_ASSERT(peerinfo); + GF_ASSERT(args); + + ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val, + &req.dict.dict_len); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); + goto out; + } + + gf_uuid_copy(req.uuid, my_uuid); + req.op = op; + + GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_ALLOC_AND_COPY_UUID_FAIL, NULL); + goto out; + } + + ret = gd_syncop_submit_request( + peerinfo->rpc, &req, args, peerid, &gd_mgmt_v3_prog, + GLUSTERD_MGMT_V3_POST_COMMIT, gd_mgmt_v3_post_commit_cbk, + (xdrproc_t)xdr_gd1_mgmt_v3_post_commit_req); +out: + GF_FREE(req.dict.dict_val); + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +int +glusterd_mgmt_v3_post_commit(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, + char **op_errstr, uint32_t *op_errno, + uint32_t txn_generation) +{ + int32_t ret = -1; + int32_t peer_cnt = 0; + dict_t *rsp_dict = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + struct syncargs args = {0}; + uuid_t peer_uuid = {0}; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + + this = THIS; + GF_ASSERT(this); + conf = this->private; + GF_ASSERT(conf); + + GF_ASSERT(op_ctx); + GF_ASSERT(req_dict); + GF_ASSERT(op_errstr); + GF_VALIDATE_OR_GOTO(this->name, op_errno, out); + + rsp_dict = dict_new(); + if (!rsp_dict) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, + "Failed to create response dictionary"); + goto out; + } + + /* Post commit on local node */ + ret = gd_mgmt_v3_post_commit_fn(op, req_dict, op_errstr, op_errno, + rsp_dict); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_COMMIT_OP_FAIL, + "Post commit failed for " + "operation %s on local node", + gd_op_list[op]); + + if (*op_errstr == NULL) { + ret = gf_asprintf(op_errstr, + "Post commit failed " + "on localhost. Please " + "check log file for details."); + if (ret == -1) + *op_errstr = NULL; + + ret = -1; + } + goto out; + } + + ret = glusterd_syncop_aggr_rsp_dict(op, op_ctx, rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RESP_AGGR_FAIL, "%s", + "Failed to aggregate response from " + " node/brick"); + goto out; + } + + dict_unref(rsp_dict); + rsp_dict = NULL; + + /* Sending post commit req to other nodes in the cluster */ + gd_syncargs_init(&args, op_ctx); + ret = synctask_barrier_init((&args)); + if (ret) + goto out; + peer_cnt = 0; + + RCU_READ_LOCK; + cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list) + { + /* Only send requests to peers who were available before the + * transaction started + */ + if (peerinfo->generation > txn_generation) + continue; + if (!peerinfo->connected) + continue; + + if (op != GD_OP_SYNC_VOLUME && + peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) + continue; + + gd_mgmt_v3_post_commit_req(op, req_dict, peerinfo, &args, MY_UUID, + peer_uuid); + peer_cnt++; + } + RCU_READ_UNLOCK; + + if (0 == peer_cnt) { + ret = 0; + goto out; + } + + gd_synctask_barrier_wait((&args), peer_cnt); + + if (args.op_ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_COMMIT_OP_FAIL, + "Post commit failed on peers"); + + if (args.errstr) + *op_errstr = gf_strdup(args.errstr); + } + + ret = args.op_ret; + *op_errno = args.op_errno; + + gf_msg_debug(this->name, 0, + "Sent post commit req for %s to %d " + "peers. Returning %d", + gd_op_list[op], peer_cnt, ret); +out: + glusterd_op_modify_op_ctx(op, op_ctx); + return ret; +} + +int32_t +gd_mgmt_v3_post_validate_cbk_fn(struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int32_t ret = -1; + struct syncargs *args = NULL; + gd1_mgmt_v3_post_val_rsp rsp = { + {0}, + }; + call_frame_t *frame = NULL; + int32_t op_ret = -1; + int32_t op_errno = -1; + xlator_t *this = NULL; + uuid_t *peerid = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(req); + GF_ASSERT(myframe); + + frame = myframe; + args = frame->local; + peerid = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + GF_VALIDATE_OR_GOTO_WITH_ERROR(this->name, iov, out, op_errno, EINVAL); + + ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_v3_post_val_rsp); + if (ret < 0) + goto out; + + gf_uuid_copy(args->uuid, rsp.uuid); + + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + +out: + gd_mgmt_v3_collate_errors(args, op_ret, op_errno, rsp.op_errstr, + GLUSTERD_MGMT_V3_POST_VALIDATE, *peerid, + rsp.uuid); + if (rsp.op_errstr) + free(rsp.op_errstr); + + if (rsp.dict.dict_val) + free(rsp.dict.dict_val); + GF_FREE(peerid); + /* req->rpc_status set to -1 means, STACK_DESTROY will be called from + * the caller function. + */ + if (req->rpc_status != -1) + STACK_DESTROY(frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_mgmt_v3_post_validate_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) +{ + return glusterd_big_locked_cbk(req, iov, count, myframe, + gd_mgmt_v3_post_validate_cbk_fn); +} + +int +gd_mgmt_v3_post_validate_req(glusterd_op_t op, int32_t op_ret, dict_t *op_ctx, + glusterd_peerinfo_t *peerinfo, + struct syncargs *args, uuid_t my_uuid, + uuid_t recv_uuid) +{ + int32_t ret = -1; + gd1_mgmt_v3_post_val_req req = { + {0}, + }; + xlator_t *this = NULL; + uuid_t *peerid = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(op_ctx); + GF_ASSERT(peerinfo); + GF_ASSERT(args); + + ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val, + &req.dict.dict_len); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); + goto out; + } + + gf_uuid_copy(req.uuid, my_uuid); + req.op = op; + req.op_ret = op_ret; + + GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_ALLOC_AND_COPY_UUID_FAIL, NULL); + goto out; + } + + ret = gd_syncop_submit_request( + peerinfo->rpc, &req, args, peerid, &gd_mgmt_v3_prog, + GLUSTERD_MGMT_V3_POST_VALIDATE, gd_mgmt_v3_post_validate_cbk, + (xdrproc_t)xdr_gd1_mgmt_v3_post_val_req); +out: + GF_FREE(req.dict.dict_val); + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +int +glusterd_mgmt_v3_post_validate(glusterd_op_t op, int32_t op_ret, dict_t *dict, + dict_t *req_dict, char **op_errstr, + uint32_t txn_generation) +{ + int32_t ret = -1; + int32_t peer_cnt = 0; + dict_t *rsp_dict = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + struct syncargs args = {0}; + uuid_t peer_uuid = {0}; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + + this = THIS; + GF_ASSERT(this); + conf = this->private; + GF_ASSERT(conf); + + GF_ASSERT(dict); + GF_VALIDATE_OR_GOTO(this->name, req_dict, out); + GF_ASSERT(op_errstr); + + rsp_dict = dict_new(); + if (!rsp_dict) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, + "Failed to create response dictionary"); + goto out; + } + + /* Post Validation on local node */ + ret = gd_mgmt_v3_post_validate_fn(op, op_ret, req_dict, op_errstr, + rsp_dict); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_VALIDATION_FAIL, + "Post Validation failed for " + "operation %s on local node", + gd_op_list[op]); + + if (*op_errstr == NULL) { + ret = gf_asprintf(op_errstr, + "Post-validation failed " + "on localhost. Please check " + "log file for details"); + if (ret == -1) + *op_errstr = NULL; + + ret = -1; + } + goto out; + } + + dict_unref(rsp_dict); + rsp_dict = NULL; + + /* Sending Post Validation req to other nodes in the cluster */ + gd_syncargs_init(&args, req_dict); + ret = synctask_barrier_init((&args)); + if (ret) + goto out; + + peer_cnt = 0; + + RCU_READ_LOCK; + cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list) + { + /* Only send requests to peers who were available before the + * transaction started + */ + if (peerinfo->generation > txn_generation) + continue; + + if (!peerinfo->connected) + continue; + if (op != GD_OP_SYNC_VOLUME && + peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) + continue; + + gd_mgmt_v3_post_validate_req(op, op_ret, req_dict, peerinfo, &args, + MY_UUID, peer_uuid); + peer_cnt++; + } + RCU_READ_UNLOCK; + + if (0 == peer_cnt) { + ret = 0; + goto out; + } + + gd_synctask_barrier_wait((&args), peer_cnt); + + if (args.op_ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_VALIDATION_FAIL, + "Post Validation failed on peers"); + + if (args.errstr) + *op_errstr = gf_strdup(args.errstr); + } + + ret = args.op_ret; + + gf_msg_debug(this->name, 0, + "Sent post valaidation req for %s " + "to %d peers. Returning %d", + gd_op_list[op], peer_cnt, ret); +out: + return ret; +} + +int32_t +gd_mgmt_v3_unlock_cbk_fn(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) +{ + int32_t ret = -1; + struct syncargs *args = NULL; + gd1_mgmt_v3_unlock_rsp rsp = { + {0}, + }; + call_frame_t *frame = NULL; + int32_t op_ret = -1; + int32_t op_errno = -1; + xlator_t *this = NULL; + uuid_t *peerid = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(req); + GF_ASSERT(myframe); + + frame = myframe; + args = frame->local; + peerid = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + GF_VALIDATE_OR_GOTO_WITH_ERROR(this->name, iov, out, op_errno, EINVAL); + + ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_v3_unlock_rsp); + if (ret < 0) + goto out; + + gf_uuid_copy(args->uuid, rsp.uuid); + + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + +out: + gd_mgmt_v3_collate_errors(args, op_ret, op_errno, NULL, + GLUSTERD_MGMT_V3_UNLOCK, *peerid, rsp.uuid); + if (rsp.dict.dict_val) + free(rsp.dict.dict_val); + GF_FREE(peerid); + /* req->rpc_status set to -1 means, STACK_DESTROY will be called from + * the caller function. + */ + if (req->rpc_status != -1) + STACK_DESTROY(frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_mgmt_v3_unlock_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) +{ + return glusterd_big_locked_cbk(req, iov, count, myframe, + gd_mgmt_v3_unlock_cbk_fn); +} + +int +gd_mgmt_v3_unlock(glusterd_op_t op, dict_t *op_ctx, + glusterd_peerinfo_t *peerinfo, struct syncargs *args, + uuid_t my_uuid, uuid_t recv_uuid) +{ + int32_t ret = -1; + gd1_mgmt_v3_unlock_req req = { + {0}, + }; + xlator_t *this = NULL; + uuid_t *peerid = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(op_ctx); + GF_ASSERT(peerinfo); + GF_ASSERT(args); + + ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val, + &req.dict.dict_len); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); + goto out; + } + + gf_uuid_copy(req.uuid, my_uuid); + req.op = op; + + GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_ALLOC_AND_COPY_UUID_FAIL, NULL); + goto out; + } + + ret = gd_syncop_submit_request(peerinfo->rpc, &req, args, peerid, + &gd_mgmt_v3_prog, GLUSTERD_MGMT_V3_UNLOCK, + gd_mgmt_v3_unlock_cbk, + (xdrproc_t)xdr_gd1_mgmt_v3_unlock_req); +out: + GF_FREE(req.dict.dict_val); + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +int +glusterd_mgmt_v3_release_peer_locks(glusterd_op_t op, dict_t *dict, + int32_t op_ret, char **op_errstr, + gf_boolean_t is_acquired, + uint32_t txn_generation) +{ + int32_t ret = -1; + int32_t peer_cnt = 0; + uuid_t peer_uuid = {0}; + xlator_t *this = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + struct syncargs args = {0}; + glusterd_conf_t *conf = NULL; + + this = THIS; + GF_ASSERT(this); + conf = this->private; + GF_ASSERT(conf); + + GF_ASSERT(dict); + GF_ASSERT(op_errstr); + + /* If the lock has not been held during this + * transaction, do not send unlock requests */ + if (!is_acquired) + goto out; + + /* Sending mgmt_v3 unlock req to other nodes in the cluster */ + gd_syncargs_init(&args, NULL); + ret = synctask_barrier_init((&args)); + if (ret) + goto out; + peer_cnt = 0; + RCU_READ_LOCK; + cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list) + { + /* Only send requests to peers who were available before the + * transaction started + */ + if (peerinfo->generation > txn_generation) + continue; + + if (!peerinfo->connected) + continue; + if (op != GD_OP_SYNC_VOLUME && + peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) + continue; + + gd_mgmt_v3_unlock(op, dict, peerinfo, &args, MY_UUID, peer_uuid); + peer_cnt++; + } + RCU_READ_UNLOCK; + + if (0 == peer_cnt) { + ret = 0; + goto out; + } + + gd_synctask_barrier_wait((&args), peer_cnt); + + if (args.op_ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_UNLOCK_FAIL, + "Unlock failed on peers"); + + if (!op_ret && args.errstr) + *op_errstr = gf_strdup(args.errstr); + } + + ret = args.op_ret; + + gf_msg_debug(this->name, 0, + "Sent unlock op req for %s " + "to %d peers. Returning %d", + gd_op_list[op], peer_cnt, ret); + +out: + return ret; +} + +int32_t +glusterd_mgmt_v3_initiate_all_phases_with_brickop_phase(rpcsvc_request_t *req, + glusterd_op_t op, + dict_t *dict) +{ + int32_t ret = -1; + int32_t op_ret = -1; + dict_t *req_dict = NULL; + dict_t *tmp_dict = NULL; + glusterd_conf_t *conf = NULL; + char *op_errstr = NULL; + xlator_t *this = NULL; + gf_boolean_t is_acquired = _gf_false; + uuid_t *originator_uuid = NULL; + uint32_t txn_generation = 0; + uint32_t op_errno = 0; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(req); + GF_ASSERT(dict); + conf = this->private; + GF_ASSERT(conf); + + /* Save the peer list generation */ + txn_generation = conf->generation; + cmm_smp_rmb(); + /* This read memory barrier makes sure that this assignment happens here + * only and is not reordered and optimized by either the compiler or the + * processor. + */ + + /* Save the MY_UUID as the originator_uuid. This originator_uuid + * will be used by is_origin_glusterd() to determine if a node + * is the originator node for a command. */ + originator_uuid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); + if (!originator_uuid) { + ret = -1; + goto out; + } + + gf_uuid_copy(*originator_uuid, MY_UUID); + ret = dict_set_bin(dict, "originator_uuid", originator_uuid, + sizeof(uuid_t)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set originator_uuid."); + GF_FREE(originator_uuid); + goto out; + } + + /* Marking the operation as complete synctasked */ + ret = dict_set_int32(dict, "is_synctasked", _gf_true); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set synctasked flag."); + goto out; + } + + /* Use a copy at local unlock as cli response will be sent before + * the unlock and the volname in the dict might be removed */ + tmp_dict = dict_new(); + if (!tmp_dict) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, + "Unable to create dict"); + goto out; + } + dict_copy(dict, tmp_dict); + + /* LOCKDOWN PHASE - Acquire mgmt_v3 locks */ + ret = glusterd_mgmt_v3_initiate_lockdown(op, dict, &op_errstr, &op_errno, + &is_acquired, txn_generation); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_LOCKDOWN_FAIL, + "mgmt_v3 lockdown failed."); + goto out; + } + + /* BUILD PAYLOAD */ + ret = glusterd_mgmt_v3_build_payload(&req_dict, &op_errstr, dict, op); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_PAYLOAD_BUILD_FAIL, + LOGSTR_BUILD_PAYLOAD, gd_op_list[op]); + if (op_errstr == NULL) + gf_asprintf(&op_errstr, OPERRSTR_BUILD_PAYLOAD); + goto out; + } + + /* PRE-COMMIT VALIDATE PHASE */ + ret = glusterd_mgmt_v3_pre_validate(op, req_dict, &op_errstr, &op_errno, + txn_generation); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PRE_VALIDATION_FAIL, + "Pre Validation Failed"); + goto out; + } + + /* BRICK-OPS */ + ret = glusterd_mgmt_v3_brick_op(op, dict, req_dict, &op_errstr, + txn_generation); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, "Brick Op Failed"); + goto out; + } + + /* COMMIT OP PHASE */ + ret = glusterd_mgmt_v3_commit(op, dict, req_dict, &op_errstr, &op_errno, + txn_generation); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMIT_OP_FAIL, + "Commit Op Failed"); + goto out; + } + + /* POST-COMMIT VALIDATE PHASE */ + /* As of now, post_validate is not trying to cleanup any failed + commands. So as of now, I am sending 0 (op_ret as 0). + */ + ret = glusterd_mgmt_v3_post_validate(op, 0, dict, req_dict, &op_errstr, + txn_generation); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_VALIDATION_FAIL, + "Post Validation Failed"); + goto out; + } + + ret = 0; +out: + op_ret = ret; + /* UNLOCK PHASE FOR PEERS*/ + (void)glusterd_mgmt_v3_release_peer_locks(op, dict, op_ret, &op_errstr, + is_acquired, txn_generation); + + /* LOCAL VOLUME(S) UNLOCK */ + if (is_acquired) { + /* Trying to release multiple mgmt_v3 locks */ + ret = glusterd_multiple_mgmt_v3_unlock(tmp_dict, MY_UUID); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_UNLOCK_FAIL, + "Failed to release mgmt_v3 locks on localhost"); + op_ret = ret; + } + } + + if (op_ret && (op_errno == 0)) + op_errno = EG_INTRNL; + + if (op != GD_OP_MAX_OPVERSION) { + /* SEND CLI RESPONSE */ + glusterd_op_send_cli_response(op, op_ret, op_errno, req, dict, + op_errstr); + } + + if (req_dict) + dict_unref(req_dict); + + if (tmp_dict) + dict_unref(tmp_dict); + + if (op_errstr) { + GF_FREE(op_errstr); + op_errstr = NULL; + } + + return 0; +} + +int32_t +glusterd_mgmt_v3_initiate_all_phases(rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict) +{ + int32_t ret = -1; + int32_t op_ret = -1; + dict_t *req_dict = NULL; + dict_t *tmp_dict = NULL; + glusterd_conf_t *conf = NULL; + char *op_errstr = NULL; + xlator_t *this = NULL; + gf_boolean_t is_acquired = _gf_false; + uuid_t *originator_uuid = NULL; + uint32_t txn_generation = 0; + uint32_t op_errno = 0; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(req); + GF_ASSERT(dict); + conf = this->private; + GF_ASSERT(conf); + + /* Save the peer list generation */ + txn_generation = conf->generation; + cmm_smp_rmb(); + /* This read memory barrier makes sure that this assignment happens here + * only and is not reordered and optimized by either the compiler or the + * processor. + */ + + /* Save the MY_UUID as the originator_uuid. This originator_uuid + * will be used by is_origin_glusterd() to determine if a node + * is the originator node for a command. */ + originator_uuid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); + if (!originator_uuid) { + ret = -1; + goto out; + } + + gf_uuid_copy(*originator_uuid, MY_UUID); + ret = dict_set_bin(dict, "originator_uuid", originator_uuid, + sizeof(uuid_t)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set originator_uuid."); + GF_FREE(originator_uuid); + goto out; + } + + /* Marking the operation as complete synctasked */ + ret = dict_set_int32(dict, "is_synctasked", _gf_true); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set synctasked flag."); + goto out; + } + + /* Use a copy at local unlock as cli response will be sent before + * the unlock and the volname in the dict might be removed */ + tmp_dict = dict_new(); + if (!tmp_dict) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, + "Unable to create dict"); + goto out; + } + dict_copy(dict, tmp_dict); + + /* LOCKDOWN PHASE - Acquire mgmt_v3 locks */ + ret = glusterd_mgmt_v3_initiate_lockdown(op, dict, &op_errstr, &op_errno, + &is_acquired, txn_generation); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_LOCKDOWN_FAIL, + "mgmt_v3 lockdown failed."); + goto out; + } + + /* BUILD PAYLOAD */ + ret = glusterd_mgmt_v3_build_payload(&req_dict, &op_errstr, dict, op); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_PAYLOAD_BUILD_FAIL, + LOGSTR_BUILD_PAYLOAD, gd_op_list[op]); + if (op_errstr == NULL) + gf_asprintf(&op_errstr, OPERRSTR_BUILD_PAYLOAD); + goto out; + } + + /* PRE-COMMIT VALIDATE PHASE */ + ret = glusterd_mgmt_v3_pre_validate(op, req_dict, &op_errstr, &op_errno, + txn_generation); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PRE_VALIDATION_FAIL, + "Pre Validation Failed"); + goto out; + } + + /* COMMIT OP PHASE */ + ret = glusterd_mgmt_v3_commit(op, dict, req_dict, &op_errstr, &op_errno, + txn_generation); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMIT_OP_FAIL, + "Commit Op Failed"); + goto out; + } + + /* POST COMMIT OP PHASE */ + ret = glusterd_mgmt_v3_post_commit(op, dict, req_dict, &op_errstr, + &op_errno, txn_generation); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_COMMIT_OP_FAIL, + "Post commit Op Failed"); + goto out; + } + + /* POST-COMMIT VALIDATE PHASE */ + /* As of now, post_validate is not trying to cleanup any failed + commands. So as of now, I am sending 0 (op_ret as 0). + */ + ret = glusterd_mgmt_v3_post_validate(op, 0, dict, req_dict, &op_errstr, + txn_generation); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_VALIDATION_FAIL, + "Post Validation Failed"); + goto out; + } + + ret = 0; +out: + op_ret = ret; + /* UNLOCK PHASE FOR PEERS*/ + (void)glusterd_mgmt_v3_release_peer_locks(op, dict, op_ret, &op_errstr, + is_acquired, txn_generation); + + /* LOCAL VOLUME(S) UNLOCK */ + if (is_acquired) { + /* Trying to release multiple mgmt_v3 locks */ + ret = glusterd_multiple_mgmt_v3_unlock(tmp_dict, MY_UUID); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_UNLOCK_FAIL, + "Failed to release mgmt_v3 locks on localhost"); + op_ret = ret; + } + } + + if (op_ret && (op_errno == 0)) + op_errno = EG_INTRNL; + + if (op != GD_OP_MAX_OPVERSION) { + /* SEND CLI RESPONSE */ + glusterd_op_send_cli_response(op, op_ret, op_errno, req, dict, + op_errstr); + } + + if (req_dict) + dict_unref(req_dict); + + if (tmp_dict) + dict_unref(tmp_dict); + + if (op_errstr) { + GF_FREE(op_errstr); + op_errstr = NULL; + } + + return 0; +} + +int32_t +glusterd_set_barrier_value(dict_t *dict, char *option) +{ + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_volinfo_t *vol = NULL; + char *volname = NULL; + + this = THIS; + GF_ASSERT(this); + + GF_ASSERT(dict); + GF_ASSERT(option); + + /* TODO : Change this when we support multiple volume. + * As of now only snapshot of single volume is supported, + * Hence volname1 is directly fetched + */ + ret = dict_get_strn(dict, "volname1", SLEN("volname1"), &volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Volname not present in " + "dict"); + goto out; + } + + ret = glusterd_volinfo_find(volname, &vol); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, + "Volume %s not found ", volname); + goto out; + } + + ret = dict_set_dynstr_with_alloc(dict, "barrier", option); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set barrier op " + "in request dictionary"); + goto out; + } + + ret = dict_set_dynstr_with_alloc(vol->dict, "features.barrier", option); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set barrier op " + "in volume option dict"); + goto out; + } + + gd_update_volume_op_versions(vol); + + ret = glusterd_create_volfiles(vol); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL, + "Failed to create volfiles"); + goto out; + } + + ret = glusterd_store_volinfo(vol, GLUSTERD_VOLINFO_VER_AC_INCREMENT); + +out: + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_mgmt_v3_initiate_snap_phases(rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict) +{ + int32_t ret = -1; + int32_t op_ret = -1; + dict_t *req_dict = NULL; + dict_t *tmp_dict = NULL; + glusterd_conf_t *conf = NULL; + char *op_errstr = NULL; + xlator_t *this = NULL; + gf_boolean_t is_acquired = _gf_false; + uuid_t *originator_uuid = NULL; + gf_boolean_t success = _gf_false; + char *cli_errstr = NULL; + uint32_t txn_generation = 0; + uint32_t op_errno = 0; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(req); + GF_ASSERT(dict); + conf = this->private; + GF_ASSERT(conf); + + /* Save the peer list generation */ + txn_generation = conf->generation; + cmm_smp_rmb(); + /* This read memory barrier makes sure that this assignment happens here + * only and is not reordered and optimized by either the compiler or the + * processor. + */ + + /* Save the MY_UUID as the originator_uuid. This originator_uuid + * will be used by is_origin_glusterd() to determine if a node + * is the originator node for a command. */ + originator_uuid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); + if (!originator_uuid) { + ret = -1; + goto out; + } + + gf_uuid_copy(*originator_uuid, MY_UUID); + ret = dict_set_bin(dict, "originator_uuid", originator_uuid, + sizeof(uuid_t)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set originator_uuid."); + GF_FREE(originator_uuid); + goto out; + } + + /* Marking the operation as complete synctasked */ + ret = dict_set_int32n(dict, "is_synctasked", SLEN("is_synctasked"), + _gf_true); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set synctasked flag."); + goto out; + } + + /* Use a copy at local unlock as cli response will be sent before + * the unlock and the volname in the dict might be removed */ + tmp_dict = dict_new(); + if (!tmp_dict) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, + "Unable to create dict"); + goto out; + } + dict_copy(dict, tmp_dict); + + /* LOCKDOWN PHASE - Acquire mgmt_v3 locks */ + ret = glusterd_mgmt_v3_initiate_lockdown(op, dict, &op_errstr, &op_errno, + &is_acquired, txn_generation); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_LOCKDOWN_FAIL, + "mgmt_v3 lockdown failed."); + goto out; + } + + /* BUILD PAYLOAD */ + ret = glusterd_mgmt_v3_build_payload(&req_dict, &op_errstr, dict, op); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_PAYLOAD_BUILD_FAIL, + LOGSTR_BUILD_PAYLOAD, gd_op_list[op]); + if (op_errstr == NULL) + gf_asprintf(&op_errstr, OPERRSTR_BUILD_PAYLOAD); + goto out; + } + + /* PRE-COMMIT VALIDATE PHASE */ + ret = glusterd_mgmt_v3_pre_validate(op, req_dict, &op_errstr, &op_errno, + txn_generation); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PRE_VALIDATION_FAIL, + "Pre Validation Failed"); + goto out; + } + + /* quorum check of the volume is done here */ + ret = glusterd_snap_quorum_check(req_dict, _gf_false, &op_errstr, + &op_errno); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_QUORUM_CHECK_FAIL, + "Volume quorum check failed"); + goto out; + } + + /* Set the operation type as pre, so that differentiation can be + * made whether the brickop is sent during pre-commit or post-commit + */ + ret = dict_set_dynstr_with_alloc(req_dict, "operation-type", "pre"); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set " + "operation-type in dictionary"); + goto out; + } + + ret = glusterd_mgmt_v3_brick_op(op, dict, req_dict, &op_errstr, + txn_generation); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_OP_FAIL, + "Brick Ops Failed"); + goto unbarrier; + } + + /* COMMIT OP PHASE */ + /* TODO: As of now, the plan is to do quorum check before sending the + commit fop and if the quorum succeeds, then commit is sent to all + the other glusterds. + snap create functionality now creates the in memory and on disk + objects for the snapshot (marking them as incomplete), takes the lvm + snapshot and then updates the status of the in memory and on disk + snap objects as complete. Suppose one of the glusterds goes down + after taking the lvm snapshot, but before updating the snap object, + then treat it as a snapshot create failure and trigger cleanup. + i.e the number of commit responses received by the originator + glusterd shold be the same as the number of peers it has sent the + request to (i.e npeers variable). If not, then originator glusterd + will initiate cleanup in post-validate fop. + Question: What if one of the other glusterds goes down as explained + above and along with it the originator glusterd also goes down? + Who will initiate the cleanup? + */ + ret = dict_set_int32n(req_dict, "cleanup", SLEN("cleanup"), 1); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "failed to set dict"); + goto unbarrier; + } + + ret = glusterd_mgmt_v3_commit(op, dict, req_dict, &op_errstr, &op_errno, + txn_generation); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMIT_OP_FAIL, + "Commit Op Failed"); + /* If the main op fails, we should save the error string. + Because, op_errstr will be used for unbarrier and + unlock ops also. We might lose the actual error that + caused the failure. + */ + cli_errstr = op_errstr; + op_errstr = NULL; + goto unbarrier; + } + + success = _gf_true; +unbarrier: + /* Set the operation type as post, so that differentiation can be + * made whether the brickop is sent during pre-commit or post-commit + */ + ret = dict_set_dynstr_with_alloc(req_dict, "operation-type", "post"); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set " + "operation-type in dictionary"); + goto out; + } + + ret = glusterd_mgmt_v3_brick_op(op, dict, req_dict, &op_errstr, + txn_generation); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_OP_FAIL, + "Brick Ops Failed"); + goto out; + } + + /*Do a quorum check if the commit phase is successful*/ + if (success) { + // quorum check of the snapshot volume + ret = glusterd_snap_quorum_check(dict, _gf_true, &op_errstr, &op_errno); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_QUORUM_CHECK_FAIL, + "Snapshot Volume quorum check failed"); + goto out; + } + } + + ret = 0; + +out: + op_ret = ret; + + if (success == _gf_false) + op_ret = -1; + + /* POST-COMMIT VALIDATE PHASE */ + ret = glusterd_mgmt_v3_post_validate(op, op_ret, dict, req_dict, &op_errstr, + txn_generation); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_VALIDATION_FAIL, + "Post Validation Failed"); + op_ret = -1; + } + + /* UNLOCK PHASE FOR PEERS*/ + (void)glusterd_mgmt_v3_release_peer_locks(op, dict, op_ret, &op_errstr, + is_acquired, txn_generation); + + /* If the commit op (snapshot taking) failed, then the error is stored + in cli_errstr and unbarrier is called. Suppose, if unbarrier also + fails, then the error happened in unbarrier is logged and freed. + The error happened in commit op, which is stored in cli_errstr + is sent to cli. + */ + if (cli_errstr) { + GF_FREE(op_errstr); + op_errstr = NULL; + op_errstr = cli_errstr; + } + + /* LOCAL VOLUME(S) UNLOCK */ + if (is_acquired) { + /* Trying to release multiple mgmt_v3 locks */ + ret = glusterd_multiple_mgmt_v3_unlock(tmp_dict, MY_UUID); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_UNLOCK_FAIL, + "Failed to release mgmt_v3 locks on localhost"); + op_ret = ret; + } + } + + if (op_ret && (op_errno == 0)) + op_errno = EG_INTRNL; + + /* SEND CLI RESPONSE */ + glusterd_op_send_cli_response(op, op_ret, op_errno, req, dict, op_errstr); + + if (req_dict) + dict_unref(req_dict); + + if (tmp_dict) + dict_unref(tmp_dict); + + if (op_errstr) { + GF_FREE(op_errstr); + op_errstr = NULL; + } + + return 0; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.h b/xlators/mgmt/glusterd/src/glusterd-mgmt.h new file mode 100644 index 00000000000..27dd1849519 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.h @@ -0,0 +1,97 @@ +/* + Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _GLUSTERD_MGMT_H_ +#define _GLUSTERD_MGMT_H_ + +void +gd_mgmt_v3_collate_errors(struct syncargs *args, int op_ret, int op_errno, + char *op_errstr, int op_code, uuid_t peerid, + u_char *uuid); + +int32_t +gd_mgmt_v3_pre_validate_fn(glusterd_op_t op, dict_t *dict, char **op_errstr, + dict_t *rsp_dict, uint32_t *op_errno); + +int32_t +gd_mgmt_v3_brick_op_fn(glusterd_op_t op, dict_t *dict, char **op_errstr, + dict_t *rsp_dict); + +int32_t +gd_mgmt_v3_commit_fn(glusterd_op_t op, dict_t *dict, char **op_errstr, + uint32_t *op_errno, dict_t *rsp_dict); + +int32_t +gd_mgmt_v3_post_commit_fn(glusterd_op_t op, dict_t *dict, char **op_errstr, + uint32_t *op_errno, dict_t *rsp_dict); + +int32_t +gd_mgmt_v3_post_validate_fn(glusterd_op_t op, int32_t op_ret, dict_t *dict, + char **op_errstr, dict_t *rsp_dict); + +int32_t +glusterd_mgmt_v3_initiate_all_phases(rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict); + +int32_t +glusterd_mgmt_v3_initiate_all_phases_with_brickop_phase(rpcsvc_request_t *req, + glusterd_op_t op, + dict_t *dict); + +int32_t +glusterd_mgmt_v3_initiate_snap_phases(rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict); + +int +glusterd_snap_pre_validate_use_rsp_dict(dict_t *dst, dict_t *src); + +int32_t +glusterd_set_barrier_value(dict_t *dict, char *option); +int + +glusterd_mgmt_v3_initiate_lockdown(glusterd_op_t op, dict_t *dict, + char **op_errstr, uint32_t *op_errno, + gf_boolean_t *is_acquired, + uint32_t txn_generation); + +int +glusterd_mgmt_v3_build_payload(dict_t **req, char **op_errstr, dict_t *dict, + glusterd_op_t op); + +int +glusterd_mgmt_v3_pre_validate(glusterd_op_t op, dict_t *req_dict, + char **op_errstr, uint32_t *op_errno, + uint32_t txn_generation); + +int +glusterd_mgmt_v3_commit(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, + char **op_errstr, uint32_t *op_errno, + uint32_t txn_generation); + +int +glusterd_mgmt_v3_release_peer_locks(glusterd_op_t op, dict_t *dict, + int32_t op_ret, char **op_errstr, + gf_boolean_t is_acquired, + uint32_t txn_generation); + +int32_t +glusterd_multiple_mgmt_v3_unlock(dict_t *dict, uuid_t uuid); + +int +glusterd_reset_brick_prevalidate(dict_t *dict, char **op_errstr, + dict_t *rsp_dict); +int +glusterd_op_reset_brick(dict_t *dict, dict_t *rsp_dict); + +int +glusterd_post_commit_add_brick(dict_t *dict, char **op_errstr); + +int +glusterd_post_commit_replace_brick(dict_t *dict, char **op_errstr); +#endif /* _GLUSTERD_MGMT_H_ */ diff --git a/xlators/mgmt/glusterd/src/glusterd-mountbroker.c b/xlators/mgmt/glusterd/src/glusterd-mountbroker.c new file mode 100644 index 00000000000..645d845ee76 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-mountbroker.c @@ -0,0 +1,721 @@ +/* + Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#include <inttypes.h> +#include <fnmatch.h> +#include <pwd.h> + +#include <glusterfs/globals.h> +#include <glusterfs/glusterfs.h> +#include <glusterfs/compat.h> +#include <glusterfs/dict.h> +#include <glusterfs/list.h> +#include <glusterfs/logging.h> +#include <glusterfs/syscall.h> +#include <glusterfs/defaults.h> +#include <glusterfs/compat.h> +#include <glusterfs/compat-errno.h> +#include <glusterfs/run.h> +#include "glusterd-mem-types.h" +#include "glusterd.h" +#include "glusterd-utils.h" +#include <glusterfs/common-utils.h> +#include "glusterd-mountbroker.h" +#include "glusterd-op-sm.h" +#include "glusterd-messages.h" + +static int +seq_dict_foreach(dict_t *dict, int (*fn)(char *str, void *data), void *data) +{ + char index[] = "4294967296"; // 1<<32 + int i = 0; + char *val = NULL; + int ret = 0; + + for (;; i++) { + snprintf(index, sizeof(index), "%d", i); + ret = dict_get_str(dict, index, &val); + if (ret != 0) + return ret == -ENOENT ? 0 : ret; + ret = fn(val, data); + if (ret != 0) + return ret; + } +} + +int +parse_mount_pattern_desc(gf_mount_spec_t *mspec, char *pdesc) +#define SYNTAX_ERR -2 +{ + char *curs = NULL; + char *c2 = NULL; + char sc = '\0'; + char **cc = NULL; + gf_mount_pattern_t *pat = NULL; + int pnum = 0; + int ret = 0; + int lastsup = -1; + int incl = -1; + char **pcc = NULL; + int pnc = 0; + + skipwhite(&pdesc); + + /* a bow to theory */ + if (!*pdesc) + return 0; + + /* count number of components, separated by '&' */ + mspec->len = 0; + for (curs = pdesc; *curs; curs++) { + if (*curs == ')') + mspec->len++; + } + + mspec->patterns = GF_CALLOC(mspec->len, sizeof(*mspec->patterns), + gf_gld_mt_mount_pattern); + if (!mspec->patterns) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); + ret = -1; + goto out; + } + + pat = mspec->patterns; + curs = pdesc; + skipwhite(&curs); + for (;;) { + incl = -1; + + /* check for pattern signedness modifier */ + if (*curs == '-') { + pat->negative = _gf_true; + curs++; + } + + /* now should come condition specifier, + * then opening paren + */ + c2 = nwstrtail(curs, "SUB("); + if (c2) { + pat->condition = SET_SUB; + goto got_cond; + } + c2 = nwstrtail(curs, "SUP("); + if (c2) { + pat->condition = SET_SUPER; + lastsup = pat - mspec->patterns; + goto got_cond; + } + c2 = nwstrtail(curs, "EQL("); + if (c2) { + pat->condition = SET_EQUAL; + goto got_cond; + } + c2 = nwstrtail(curs, "MEET("); + if (c2) { + pat->condition = SET_INTERSECT; + goto got_cond; + } + c2 = nwstrtail(curs, "SUB+("); + if (c2) { + pat->condition = SET_SUB; + incl = lastsup; + goto got_cond; + } + + ret = SYNTAX_ERR; + goto out; + + got_cond: + curs = c2; + skipwhite(&curs); + /* count the number of components for pattern */ + pnum = *curs == ')' ? 0 : 1; + for (c2 = curs; *c2 != ')';) { + if (strchr("&|", *c2)) { + ret = SYNTAX_ERR; + goto out; + } + while (!strchr("|&)", *c2) && !isspace(*c2)) + c2++; + skipwhite(&c2); + switch (*c2) { + case ')': + break; + case '\0': + case '&': + ret = SYNTAX_ERR; + goto out; + case '|': + *c2 = ' '; + skipwhite(&c2); + /* fall through */ + default: + pnum++; + } + } + if (incl >= 0) { + pnc = 0; + for (pcc = mspec->patterns[incl].components; *pcc; pcc++) + pnc++; + pnum += pnc; + } + pat->components = GF_CALLOC(pnum + 1, sizeof(*pat->components), + gf_gld_mt_mount_comp_container); + if (!pat->components) { + ret = -1; + goto out; + } + + cc = pat->components; + /* copy over included component set */ + if (incl >= 0) { + memcpy(pat->components, mspec->patterns[incl].components, + pnc * sizeof(*pat->components)); + cc += pnc; + } + /* parse and add components */ + c2 = ""; /* reset c2 */ + while (*c2 != ')') { + c2 = curs; + while (!isspace(*c2) && *c2 != ')') + c2++; + sc = *c2; + *c2 = '\0'; + ; + *cc = gf_strdup(curs); + if (!*cc) { + ret = -1; + goto out; + } + *c2 = sc; + skipwhite(&c2); + curs = c2; + cc++; + } + + curs++; + skipwhite(&curs); + if (*curs == '&') { + curs++; + skipwhite(&curs); + } + + if (!*curs) + break; + pat++; + } + +out: + if (ret == SYNTAX_ERR) { + gf_msg("glusterd", GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, + "cannot parse mount patterns %s", pdesc); + } + + /* We've allocted a lotta stuff here but don't bother with freeing + * on error, in that case we'll terminate anyway + */ + return ret ? -1 : 0; +} +#undef SYNTAX_ERR + +const char *georep_mnt_desc_template = + "SUP(" + "aux-gfid-mount " + "acl " + "volfile-server=localhost " + "client-pid=%d " + "user-map-root=%s " + ")" + "SUB+(" + "log-file=%s/" GEOREP + "*/* " + "log-level=* " + "volfile-id=* " + ")" + "MEET(" + "%s" + ")"; + +int +make_georep_mountspec(gf_mount_spec_t *mspec, const char *volnames, char *user, + char *logdir) +{ + char *georep_mnt_desc = NULL; + char *meetspec = NULL; + char *vols = NULL; + char *vol = NULL; + char *p = NULL; + char *savetok = NULL; + char *fa[3] = { + 0, + }; + size_t siz = 0; + int vc = 0; + int i = 0; + int ret = 0; + + vols = gf_strdup((char *)volnames); + if (!vols) { + gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED, + "Volume name=%s", volnames, NULL); + goto out; + } + + for (vc = 1, p = vols; *p; p++) { + if (*p == ',') + vc++; + } + siz = strlen(volnames) + vc * SLEN("volfile-id="); + meetspec = GF_CALLOC(1, siz + 1, gf_gld_mt_georep_meet_spec); + if (!meetspec) { + gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); + goto out; + } + + for (p = vols;;) { + vol = strtok_r(p, ",", &savetok); + if (!vol) { + GF_ASSERT(vc == 0); + break; + } + p = NULL; + strcat(meetspec, "volfile-id="); + strcat(meetspec, vol); + if (--vc > 0) + strcat(meetspec, " "); + } + + ret = gf_asprintf(&georep_mnt_desc, georep_mnt_desc_template, + GF_CLIENT_PID_GSYNCD, user, logdir, meetspec); + if (ret == -1) { + georep_mnt_desc = NULL; + goto out; + } + + ret = parse_mount_pattern_desc(mspec, georep_mnt_desc); + +out: + fa[0] = meetspec; + fa[1] = vols; + fa[2] = georep_mnt_desc; + + for (i = 0; i < 3; i++) { + if (fa[i] == NULL) + ret = -1; + else + GF_FREE(fa[i]); + } + + return ret; +} + +static gf_boolean_t +match_comp(char *str, char *patcomp) +{ + char *c1 = patcomp; + char *c2 = str; + + GF_ASSERT(c1); + GF_ASSERT(c2); + + while (*c1 == *c2) { + if (!*c1) + return _gf_true; + c1++; + c2++; + if (c1[-1] == '=') + break; + } + + return fnmatch(c1, c2, 0) == 0 ? _gf_true : _gf_false; +} + +struct gf_set_descriptor { + gf_boolean_t priv[2]; + gf_boolean_t common; +}; + +static int +_gf_set_dict_iter1(char *val, void *data) +{ + void **dataa = data; + struct gf_set_descriptor *sd = dataa[0]; + char **curs = dataa[1]; + gf_boolean_t priv = _gf_true; + + while (*curs) { + if (match_comp(val, *curs)) { + priv = _gf_false; + sd->common = _gf_true; + } + curs++; + } + + if (priv) + sd->priv[0] = _gf_true; + + return 0; +} + +static int +_gf_set_dict_iter2(char *val, void *data) +{ + void **dataa = data; + gf_boolean_t *boo = dataa[0]; + char *comp = dataa[1]; + + if (match_comp(val, comp)) + *boo = _gf_true; + + return 0; +} + +static void +relate_sets(struct gf_set_descriptor *sd, dict_t *argdict, char **complist) +{ + void *dataa[] = {NULL, NULL}; + gf_boolean_t boo = _gf_false; + + memset(sd, 0, sizeof(*sd)); + + dataa[0] = sd; + dataa[1] = complist; + seq_dict_foreach(argdict, _gf_set_dict_iter1, dataa); + + while (*complist) { + boo = _gf_false; + dataa[0] = &boo; + dataa[1] = *complist; + seq_dict_foreach(argdict, _gf_set_dict_iter2, dataa); + + if (boo) + sd->common = _gf_true; + else + sd->priv[1] = _gf_true; + + complist++; + } +} + +static int +_arg_parse_uid(char *val, void *data) +{ + char *user = strtail(val, "user-map-root="); + struct passwd *pw = NULL; + + if (!user) + return 0; + pw = getpwnam(user); + if (!pw) + return -EINVAL; + + if (*(int *)data >= 0) + /* uid ambiguity, already found */ + return -EINVAL; + + *(int *)data = pw->pw_uid; + return 0; +} + +static int +evaluate_mount_request(xlator_t *this, gf_mount_spec_t *mspec, dict_t *argdict) +{ + struct gf_set_descriptor sd = { + { + 0, + }, + }; + int i = 0; + int uid = -1; + int ret = 0; + gf_boolean_t match = _gf_false; + + for (i = 0; i < mspec->len; i++) { + relate_sets(&sd, argdict, mspec->patterns[i].components); + switch (mspec->patterns[i].condition) { + case SET_SUB: + match = !sd.priv[0]; + break; + case SET_SUPER: + match = !sd.priv[1]; + break; + case SET_EQUAL: + match = (!sd.priv[0] && !sd.priv[1]); + break; + case SET_INTERSECT: + match = sd.common; + break; + default: + GF_ASSERT(!"unreached"); + } + if (mspec->patterns[i].negative) + match = !match; + + if (!match) { + gf_msg(this->name, GF_LOG_ERROR, EPERM, + GD_MSG_MNTBROKER_SPEC_MISMATCH, + "Mountbroker spec mismatch!!! SET: %d " + "COMPONENT: %d. Review the mount args passed", + mspec->patterns[i].condition, i); + return -EPERM; + } + } + + ret = seq_dict_foreach(argdict, _arg_parse_uid, &uid); + if (ret != 0) + return ret; + + return uid; +} + +static int +_volname_get(char *val, void *data) +{ + char **volname = data; + + *volname = strtail(val, "volfile-id="); + + return *volname ? 1 : 0; +} + +static int +_runner_add(char *val, void *data) +{ + runner_t *runner = data; + + runner_argprintf(runner, "--%s", val); + + return 0; +} + +int +glusterd_do_mount(char *label, dict_t *argdict, char **path, int *op_errno) +{ + glusterd_conf_t *priv = NULL; + char *mountbroker_root = NULL; + gf_mount_spec_t *mspec = NULL; + int uid = -ENOENT; + char *volname = NULL; + glusterd_volinfo_t *vol = NULL; + char *mtptemp = NULL; + char *mntlink = NULL; + char *cookieswitch = NULL; + char *cookie = NULL; + char *sla = NULL; + struct stat st = { + 0, + }; + runner_t runner = { + 0, + }; + int ret = 0; + xlator_t *this = THIS; + mode_t orig_umask = 0; + gf_boolean_t found_label = _gf_false; + + priv = this->private; + GF_ASSERT(priv); + + GF_ASSERT(op_errno); + *op_errno = 0; + + if (dict_get_strn(this->options, "mountbroker-root", + SLEN("mountbroker-root"), &mountbroker_root) != 0) { + *op_errno = ENOENT; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "'option mountbroker-root' " + "missing in glusterd vol file"); + goto out; + } + + GF_ASSERT(label); + if (!*label) { + *op_errno = EINVAL; + gf_msg(this->name, GF_LOG_ERROR, *op_errno, GD_MSG_MNTBROKER_LABEL_NULL, + "label is NULL (%s)", strerror(*op_errno)); + goto out; + } + + /* look up spec for label */ + cds_list_for_each_entry(mspec, &priv->mount_specs, speclist) + { + if (strcmp(mspec->label, label) != 0) + continue; + + found_label = _gf_true; + uid = evaluate_mount_request(this, mspec, argdict); + break; + } + if (uid < 0) { + *op_errno = -uid; + if (!found_label) { + gf_msg(this->name, GF_LOG_ERROR, *op_errno, + GD_MSG_MNTBROKER_LABEL_MISS, + "Missing mspec: Check the corresponding option " + "in glusterd vol file for mountbroker user: %s", + label); + } + goto out; + } + + /* some sanity check on arguments */ + seq_dict_foreach(argdict, _volname_get, &volname); + if (!volname) { + *op_errno = EINVAL; + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_DICT_GET_FAILED, + "Dict get failed for the key 'volname'"); + goto out; + } + if (glusterd_volinfo_find(volname, &vol) != 0 || + !glusterd_is_volume_started(vol)) { + *op_errno = ENOENT; + gf_msg(this->name, GF_LOG_ERROR, *op_errno, GD_MSG_MOUNT_REQ_FAIL, + "Either volume is not started or volinfo not found"); + goto out; + } + + /* go do mount */ + + /** create actual mount dir */ + + /*** "overload" string name to be possible to used for cookie + creation, see below */ + ret = gf_asprintf(&mtptemp, "%s/user%d/mtpt-%s-XXXXXX/cookie", + mountbroker_root, uid, label); + if (ret == -1) { + mtptemp = NULL; + *op_errno = ENOMEM; + goto out; + } + /*** hide cookie part */ + cookieswitch = strrchr(mtptemp, '/'); + *cookieswitch = '\0'; + + sla = strrchr(mtptemp, '/'); + *sla = '\0'; + ret = sys_mkdir(mtptemp, 0700); + if (ret == 0) + ret = sys_chown(mtptemp, uid, 0); + else if (errno == EEXIST) + ret = 0; + if (ret == -1) { + *op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, *op_errno, GD_MSG_SYSCALL_FAIL, + "Mountbroker User directory creation failed"); + goto out; + } + ret = sys_lstat(mtptemp, &st); + if (ret == -1) { + *op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, *op_errno, GD_MSG_SYSCALL_FAIL, + "stat on mountbroker user directory failed"); + goto out; + } + if (!(S_ISDIR(st.st_mode) && (st.st_mode & ~S_IFMT) == 0700 && + st.st_uid == uid && st.st_gid == 0)) { + *op_errno = EACCES; + gf_msg(this->name, GF_LOG_ERROR, *op_errno, GD_MSG_MOUNT_REQ_FAIL, + "Incorrect mountbroker user directory attributes"); + goto out; + } + *sla = '/'; + + if (!mkdtemp(mtptemp)) { + *op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, *op_errno, GD_MSG_SYSCALL_FAIL, + "Mountbroker mount directory creation failed"); + goto out; + } + + /** create private "cookie" symlink */ + + /*** occupy an entry in the hive dir via mkstemp */ + ret = gf_asprintf(&cookie, "%s/" MB_HIVE "/mntXXXXXX", mountbroker_root); + if (ret == -1) { + cookie = NULL; + *op_errno = ENOMEM; + goto out; + } + orig_umask = umask(S_IRWXG | S_IRWXO); + ret = mkstemp(cookie); + umask(orig_umask); + if (ret == -1) { + *op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, *op_errno, GD_MSG_SYSCALL_FAIL, + "Mountbroker cookie file creation failed"); + goto out; + } + sys_close(ret); + + /*** assembly the path from cookie to mountpoint */ + sla = strchr(sla - 1, '/'); + GF_ASSERT(sla); + ret = gf_asprintf(&mntlink, "../user%d%s", uid, sla); + if (ret == -1) { + *op_errno = ENOMEM; + goto out; + } + + /*** create cookie link in (to-be) mountpoint, + move it over to the final place */ + *cookieswitch = '/'; + ret = sys_symlink(mntlink, mtptemp); + if (ret != -1) + ret = sys_rename(mtptemp, cookie); + *cookieswitch = '\0'; + if (ret == -1) { + *op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, *op_errno, GD_MSG_SYSCALL_FAIL, + "symlink or rename failed"); + goto out; + } + + /** invoke glusterfs on the mountpoint */ + + runinit(&runner); + runner_add_arg(&runner, SBIN_DIR "/glusterfs"); + seq_dict_foreach(argdict, _runner_add, &runner); + runner_add_arg(&runner, mtptemp); + ret = runner_run_reuse(&runner); + if (ret == -1) { + *op_errno = EIO; /* XXX hacky fake */ + runner_log(&runner, "", GF_LOG_ERROR, "command failed"); + } + runner_end(&runner); + +out: + + if (*op_errno) { + ret = -1; + gf_msg(this->name, GF_LOG_WARNING, *op_errno, GD_MSG_MOUNT_REQ_FAIL, + "unsuccessful mount request"); + if (mtptemp) { + *cookieswitch = '/'; + sys_unlink(mtptemp); + *cookieswitch = '\0'; + sys_rmdir(mtptemp); + } + if (cookie) { + sys_unlink(cookie); + GF_FREE(cookie); + } + + } else { + ret = 0; + *path = cookie; + } + + if (mtptemp) + GF_FREE(mtptemp); + if (mntlink) + GF_FREE(mntlink); + + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-mountbroker.h b/xlators/mgmt/glusterd/src/glusterd-mountbroker.h new file mode 100644 index 00000000000..20c1347f52f --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-mountbroker.h @@ -0,0 +1,37 @@ +/* + Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#define MB_HIVE "mb_hive" + +typedef enum { SET_SUB = 1, SET_SUPER, SET_EQUAL, SET_INTERSECT } gf_setrel_t; + +struct gf_mount_pattern { + char **components; + gf_setrel_t condition; + gf_boolean_t negative; +}; +typedef struct gf_mount_pattern gf_mount_pattern_t; + +struct gf_mount_spec { + struct cds_list_head speclist; + char *label; + gf_mount_pattern_t *patterns; + size_t len; +}; +typedef struct gf_mount_spec gf_mount_spec_t; + +int +parse_mount_pattern_desc(gf_mount_spec_t *mspec, char *pdesc); + +int +make_georep_mountspec(gf_mount_spec_t *mspec, const char *volname, char *user, + char *logdir); + +int +glusterd_do_mount(char *label, dict_t *argdict, char **path, int *op_errno); diff --git a/xlators/mgmt/glusterd/src/glusterd-nfs-svc.c b/xlators/mgmt/glusterd/src/glusterd-nfs-svc.c new file mode 100644 index 00000000000..4908dbbc213 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-nfs-svc.c @@ -0,0 +1,228 @@ +/* + Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifdef BUILD_GNFS + +#include <glusterfs/globals.h> +#include <glusterfs/run.h> +#include <glusterfs/syscall.h> +#include "glusterd.h" +#include "glusterd-utils.h" +#include "glusterd-volgen.h" +#include "glusterd-nfs-svc.h" +#include "glusterd-messages.h" +#include "glusterd-svc-helper.h" + +static gf_boolean_t +glusterd_nfssvc_need_start() +{ + glusterd_conf_t *priv = NULL; + gf_boolean_t start = _gf_false; + glusterd_volinfo_t *volinfo = NULL; + + priv = THIS->private; + + cds_list_for_each_entry(volinfo, &priv->volumes, vol_list) + { + if (!glusterd_is_volume_started(volinfo)) + continue; + + if (dict_get_str_boolean(volinfo->dict, NFS_DISABLE_MAP_KEY, 1)) + continue; + start = _gf_true; + break; + } + + return start; +} + +static int +glusterd_nfssvc_create_volfile() +{ + char filepath[PATH_MAX] = { + 0, + }; + glusterd_conf_t *conf = THIS->private; + + glusterd_svc_build_volfile_path(conf->nfs_svc.name, conf->workdir, filepath, + sizeof(filepath)); + return glusterd_create_global_volfile(build_nfs_graph, filepath, NULL); +} + +static int +glusterd_nfssvc_manager(glusterd_svc_t *svc, void *data, int flags) +{ + int ret = -1; + + if (!svc->inited) { + ret = glusterd_svc_init(svc, "nfs"); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_FAILED_INIT_NFSSVC, + "Failed to init nfs service"); + goto out; + } else { + svc->inited = _gf_true; + gf_msg_debug(THIS->name, 0, "nfs service initialized"); + } + } + + ret = svc->stop(svc, SIGKILL); + if (ret) + goto out; + + /* not an error, or a (very) soft error at best */ + if (sys_access(XLATORDIR "/nfs/server.so", R_OK) != 0) { + gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_GNFS_XLATOR_NOT_INSTALLED, + "nfs/server.so xlator is not installed"); + goto out; + } + + ret = glusterd_nfssvc_create_volfile(); + if (ret) + goto out; + + if (glusterd_nfssvc_need_start()) { + ret = svc->start(svc, flags); + if (ret) + goto out; + + ret = glusterd_conn_connect(&(svc->conn)); + if (ret) + goto out; + } +out: + if (ret) + gf_event(EVENT_SVC_MANAGER_FAILED, "svc_name=%s", svc->name); + + gf_msg_debug(THIS->name, 0, "Returning %d", ret); + + return ret; +} + +static int +glusterd_nfssvc_start(glusterd_svc_t *svc, int flags) +{ + return glusterd_svc_start(svc, flags, NULL); +} + +static int +glusterd_nfssvc_stop(glusterd_svc_t *svc, int sig) +{ + int ret = -1; + gf_boolean_t deregister = _gf_false; + + if (glusterd_proc_is_running(&(svc->proc))) + deregister = _gf_true; + + ret = glusterd_svc_stop(svc, sig); + if (ret) + goto out; + if (deregister) + glusterd_nfs_pmap_deregister(); + +out: + gf_msg_debug(THIS->name, 0, "Returning %d", ret); + + return ret; +} + +void +glusterd_nfssvc_build(glusterd_svc_t *svc) +{ + svc->manager = glusterd_nfssvc_manager; + svc->start = glusterd_nfssvc_start; + svc->stop = glusterd_nfssvc_stop; +} + +int +glusterd_nfssvc_reconfigure() +{ + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + gf_boolean_t identical = _gf_false; + gf_boolean_t vol_started = _gf_false; + glusterd_volinfo_t *volinfo = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, out); + + /* not an error, or a (very) soft error at best */ + if (sys_access(XLATORDIR "/nfs/server.so", R_OK) != 0) { + gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_GNFS_XLATOR_NOT_INSTALLED, + "nfs/server.so xlator is not installed"); + ret = 0; + goto out; + } + + cds_list_for_each_entry(volinfo, &priv->volumes, vol_list) + { + if (GLUSTERD_STATUS_STARTED == volinfo->status) { + vol_started = _gf_true; + break; + } + } + if (!vol_started) { + ret = 0; + goto out; + } + + /* + * Check both OLD and NEW volfiles, if they are SAME by size + * and cksum i.e. "character-by-character". If YES, then + * NOTHING has been changed, just return. + */ + + ret = glusterd_svc_check_volfile_identical(priv->nfs_svc.name, + build_nfs_graph, &identical); + if (ret) + goto out; + + if (identical) { + ret = 0; + goto out; + } + + /* + * They are not identical. Find out if the topology is changed + * OR just the volume options. If just the options which got + * changed, then inform the xlator to reconfigure the options. + */ + identical = _gf_false; /* RESET the FLAG */ + ret = glusterd_svc_check_topology_identical(priv->nfs_svc.name, + build_nfs_graph, &identical); + if (ret) + goto out; + + /* Topology is not changed, but just the options. But write the + * options to NFS volfile, so that NFS will be reconfigured. + */ + if (identical) { + ret = glusterd_nfssvc_create_volfile(); + if (ret == 0) { /* Only if above PASSES */ + ret = glusterd_fetchspec_notify(THIS); + } + goto out; + } + + /* + * NFS volfile's topology has been changed. NFS server needs + * to be RESTARTED to ACT on the changed volfile. + */ + ret = priv->nfs_svc.manager(&(priv->nfs_svc), NULL, PROC_START_NO_WAIT); + +out: + gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret); + return ret; +} +#endif diff --git a/xlators/mgmt/glusterd/src/glusterd-nfs-svc.h b/xlators/mgmt/glusterd/src/glusterd-nfs-svc.h new file mode 100644 index 00000000000..6bfdde95749 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-nfs-svc.h @@ -0,0 +1,27 @@ +/* + Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _GLUSTERD_NFS_SVC_H_ +#define _GLUSTERD_NFS_SVC_H_ + +#include "glusterd-svc-mgmt.h" + +#ifdef BUILD_GNFS +void +glusterd_nfssvc_build(glusterd_svc_t *svc); + +int +glusterd_nfssvc_init(glusterd_svc_t *svc); + +int +glusterd_nfssvc_reconfigure(); + +#endif /* BUILD_GNFS */ +#endif diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index 3206b357baa..c537fc33a85 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -1,7938 +1,8164 @@ /* - Copyright (c) 2006-2010 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is GF_FREE software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif #include <time.h> #include <sys/uio.h> #include <sys/resource.h> +#include <sys/mount.h> #include <libgen.h> -#include "uuid.h" +#include <glusterfs/compat-uuid.h> #include "fnmatch.h" -#include "xlator.h" +#include <glusterfs/xlator.h> #include "protocol-common.h" #include "glusterd.h" -#include "call-stub.h" -#include "defaults.h" -#include "list.h" -#include "dict.h" -#include "compat.h" -#include "compat-errno.h" -#include "statedump.h" -#include "glusterd-sm.h" +#include <glusterfs/call-stub.h> +#include <glusterfs/list.h> +#include <glusterfs/dict.h> +#include <glusterfs/compat.h> +#include <glusterfs/compat-errno.h> +#include <glusterfs/statedump.h> #include "glusterd-op-sm.h" #include "glusterd-utils.h" #include "glusterd-store.h" -#include "glusterd-volgen.h" -#include "syscall.h" -#include "cli1.h" -#include "common-utils.h" - +#include "glusterd-locks.h" +#include "glusterd-quota.h" +#include <glusterfs/syscall.h> +#include "cli1-xdr.h" +#include "glusterd-snapshot-utils.h" +#include "glusterd-svc-mgmt.h" +#include "glusterd-svc-helper.h" +#include "glusterd-shd-svc-helper.h" +#include "glusterd-shd-svc.h" +#include "glusterd-quotad-svc.h" +#include "glusterd-server-quorum.h" #include <sys/types.h> #include <signal.h> +#include <sys/wait.h> +#include "glusterd-gfproxyd-svc-helper.h" -#define glusterd_op_start_volume_args_get(dict, volname, flags) \ - glusterd_op_stop_volume_args_get (dict, volname, flags) +#define len_strcmp(key, len, str) \ + ((len == SLEN(str)) && (strcmp(key, str) == 0)) -static struct list_head gd_op_sm_queue; -pthread_mutex_t gd_op_sm_lock; -glusterd_op_info_t opinfo = {{0},}; -static int glusterfs_port = GLUSTERD_DEFAULT_PORT; -static char *glusterd_op_sm_state_names[] = { - "Default", - "Lock sent", - "Locked", - "Stage op sent", - "Staged", - "Commit op sent", - "Committed", - "Unlock sent", - "Stage op failed", - "Commit op failed", - "Brick op sent", - "Brick op failed", - "Brick op Committed", - "Brick op Commit failed", - "Invalid", -}; +extern char local_node_hostname[PATH_MAX]; +static int +glusterd_set_shared_storage(dict_t *dict, char *key, char *value, + char **op_errstr); -static char *glusterd_op_sm_event_names[] = { - "GD_OP_EVENT_NONE", - "GD_OP_EVENT_START_LOCK", - "GD_OP_EVENT_LOCK", - "GD_OP_EVENT_RCVD_ACC", - "GD_OP_EVENT_ALL_ACC", - "GD_OP_EVENT_STAGE_ACC", - "GD_OP_EVENT_COMMIT_ACC", - "GD_OP_EVENT_RCVD_RJT", - "GD_OP_EVENT_STAGE_OP", - "GD_OP_EVENT_COMMIT_OP", - "GD_OP_EVENT_UNLOCK", - "GD_OP_EVENT_START_UNLOCK", - "GD_OP_EVENT_ALL_ACK", - "GD_OP_EVENT_INVALID" +/* + * Valid options for all volumes to be listed in the valid_all_vol_opts table. + * To add newer options to all volumes, we can just add more entries to this + * table. + * + * It's important that every value have a default, or have a special handler + * in glusterd_get_global_options_for_all_vols, or else we might crash there. + */ +const glusterd_all_vol_opts valid_all_vol_opts[] = { + {GLUSTERD_QUORUM_RATIO_KEY, "51"}, + {GLUSTERD_SHARED_STORAGE_KEY, "disable"}, + /* This one actually gets filled in dynamically. */ + {GLUSTERD_GLOBAL_OP_VERSION_KEY, "BUG_NO_OP_VERSION"}, + /* + * This one should be filled in dynamically, but it didn't used to be + * (before the defaults were added here) so the value is unclear. + * + * TBD: add a dynamic handler to set the appropriate value + */ + {GLUSTERD_MAX_OP_VERSION_KEY, "BUG_NO_MAX_OP_VERSION"}, + {GLUSTERD_BRICK_MULTIPLEX_KEY, "disable"}, + /* Set this value to 0 by default implying brick-multiplexing + * behaviour with no limit set on the number of brick instances that + * can be attached per process. + * TBD: Discuss the default value for this. Maybe this should be a + * dynamic value depending on the memory specifications per node */ + {GLUSTERD_BRICKMUX_LIMIT_KEY, GLUSTERD_BRICKMUX_LIMIT_DFLT_VALUE}, + {GLUSTERD_VOL_CNT_PER_THRD, GLUSTERD_VOL_CNT_PER_THRD_DEFAULT_VALUE}, + {GLUSTERD_LOCALTIME_LOGGING_KEY, "disable"}, + {GLUSTERD_DAEMON_LOG_LEVEL_KEY, "INFO"}, + {NULL}, }; -static char *gsync_opname[] = { - "gluster-command", - "gluster-log-file", - "gluster-log-level", - "log-file", - "log-level", - "remote-gsyncd", - "ssh-command", - "rsync-command", - "timeout", - "sync-jobs", - NULL +static struct cds_list_head gd_op_sm_queue; +synclock_t gd_op_sm_lock; +glusterd_op_info_t opinfo = { + {0}, }; -static int -glusterd_restart_brick_servers (glusterd_volinfo_t *); - -char* -glusterd_op_sm_state_name_get (int state) +int32_t +glusterd_txn_opinfo_dict_init() { - if (state < 0 || state >= GD_OP_STATE_MAX) - return glusterd_op_sm_state_names[GD_OP_STATE_MAX]; - return glusterd_op_sm_state_names[state]; -} + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; -char* -glusterd_op_sm_event_name_get (int event) -{ - if (event < 0 || event >= GD_OP_EVENT_MAX) - return glusterd_op_sm_event_names[GD_OP_EVENT_MAX]; - return glusterd_op_sm_event_names[event]; -} + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); -void -glusterd_destroy_lock_ctx (glusterd_op_lock_ctx_t *ctx) -{ - if (!ctx) - return; - GF_FREE (ctx); -} + priv->glusterd_txn_opinfo = dict_new(); + if (!priv->glusterd_txn_opinfo) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); + ret = -1; + goto out; + } -void -glusterd_set_volume_status (glusterd_volinfo_t *volinfo, - glusterd_volume_status status) -{ - GF_ASSERT (volinfo); - volinfo->status = status; -} + memset(priv->global_txn_id, '\0', sizeof(uuid_t)); -gf_boolean_t -glusterd_is_volume_started (glusterd_volinfo_t *volinfo) -{ - GF_ASSERT (volinfo); - return (volinfo->status == GLUSTERD_STATUS_STARTED); + ret = 0; +out: + return ret; } -gf_boolean_t -glusterd_are_all_volumes_stopped () +void +glusterd_txn_opinfo_dict_fini() { - glusterd_conf_t *priv = NULL; - xlator_t *this = NULL; - glusterd_volinfo_t *voliter = NULL; - - this = THIS; - GF_ASSERT (this); - priv = this->private; - GF_ASSERT (priv); + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; - list_for_each_entry (voliter, &priv->volumes, vol_list) { - if (voliter->status == GLUSTERD_STATUS_STARTED) - return _gf_false; - } - - return _gf_true; - -} + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); -static int -glusterd_op_sm_inject_all_acc () -{ - int32_t ret = -1; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACC, NULL); - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + if (priv->glusterd_txn_opinfo) + dict_unref(priv->glusterd_txn_opinfo); } -int -glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickinfo, - gd1_mgmt_brick_op_req **req, dict_t *dict) +void +glusterd_txn_opinfo_init(glusterd_op_info_t *opinfo, + glusterd_op_sm_state_info_t *state, int *op, + dict_t *op_ctx, rpcsvc_request_t *req) { - int ret = -1; - gd1_mgmt_brick_op_req *brick_req = NULL; - gf1_cli_top_op top_op = 0; - double throughput = 0; - double time = 0; - int32_t blk_size = 0; - int32_t blk_count = 0; + glusterd_conf_t *conf = NULL; - GF_ASSERT (op < GD_OP_MAX); - GF_ASSERT (op > GD_OP_NONE); - GF_ASSERT (req); + GF_ASSERT(opinfo); + conf = THIS->private; + GF_ASSERT(conf); - switch (op) { - case GD_OP_REMOVE_BRICK: - case GD_OP_STOP_VOLUME: - brick_req = GF_CALLOC (1, sizeof (*brick_req), - gf_gld_mt_mop_brick_req_t); - if (!brick_req) { - gf_log ("", GF_LOG_ERROR, "Out of Memory"); - goto out; - } - brick_req->op = GF_BRICK_TERMINATE; - brick_req->name = ""; - break; - case GD_OP_PROFILE_VOLUME: - brick_req = GF_CALLOC (1, sizeof (*brick_req), - gf_gld_mt_mop_brick_req_t); + if (state) + opinfo->state = *state; - if (!brick_req) { - gf_log ("", GF_LOG_ERROR, "Out of Memory"); - goto out; - } + if (op) + opinfo->op = *op; - brick_req->op = GF_BRICK_XLATOR_INFO; - brick_req->name = brickinfo->path; + if (op_ctx) + opinfo->op_ctx = dict_ref(op_ctx); + else + opinfo->op_ctx = NULL; - ret = dict_get_int32 (dict, "top-op", (int32_t*)&top_op); - if (ret) - goto cont; - if (top_op == GF_CLI_TOP_READ_PERF || - top_op == GF_CLI_TOP_WRITE_PERF) { + if (req) + opinfo->req = req; - ret = dict_get_int32 (dict, "blk-size", &blk_size); - if (ret) { - goto cont; - } - ret = dict_get_int32 (dict, "blk-cnt", &blk_count); - if (ret) - goto out; - if (top_op == GF_CLI_TOP_READ_PERF) - ret = glusterd_volume_stats_read_perf ( - brickinfo->path, blk_size, blk_count, - &throughput, &time); - else if (!ret && top_op == GF_CLI_TOP_WRITE_PERF) - ret = glusterd_volume_stats_write_perf ( - brickinfo->path, blk_size, blk_count, - &throughput, &time); - - if (ret) - goto out; - - ret = dict_set_double (dict, "throughput", - throughput); - if (ret) - goto out; - ret = dict_set_double (dict, "time", time); - if (ret) - goto out; - } - break; - default: - goto out; - break; - } + opinfo->txn_generation = conf->generation; + cmm_smp_rmb(); -cont: - ret = dict_allocate_and_serialize (dict, &brick_req->input.input_val, - (size_t*)&brick_req->input.input_len); - if (ret) - goto out; - *req = brick_req; - ret = 0; + return; +} +int32_t +glusterd_generate_txn_id(dict_t *dict, uuid_t **txn_id) +{ + int32_t ret = -1; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + GF_ASSERT(dict); + + *txn_id = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); + if (!*txn_id) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); + goto out; + } + + if (priv->op_version < GD_OP_VERSION_3_6_0) + gf_uuid_copy(**txn_id, priv->global_txn_id); + else + gf_uuid_generate(**txn_id); + + ret = dict_set_bin(dict, "transaction_id", *txn_id, sizeof(**txn_id)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set transaction id."); + goto out; + } + + gf_msg_debug(this->name, 0, "Transaction_id = %s", uuid_utoa(**txn_id)); out: - if (ret && brick_req) - GF_FREE (brick_req); - gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + if (ret && *txn_id) { + GF_FREE(*txn_id); + *txn_id = NULL; + } + + return ret; } -static int -glusterd_op_stage_create_volume (dict_t *dict, char **op_errstr) +int32_t +glusterd_get_txn_opinfo(uuid_t *txn_id, glusterd_op_info_t *opinfo) { - int ret = 0; - char *volname = NULL; - gf_boolean_t exists = _gf_false; - char *bricks = NULL; - char *brick_list = NULL; - char *free_ptr = NULL; - glusterd_brickinfo_t *brick_info = NULL; - int32_t brick_count = 0; - int32_t i = 0; - char *brick = NULL; - char *tmpptr = NULL; - char cmd_str[1024]; - xlator_t *this = NULL; - glusterd_conf_t *priv = NULL; - char msg[2048] = {0}; - - this = THIS; - if (!this) { - gf_log ("glusterd", GF_LOG_ERROR, - "this is NULL"); - goto out; - } - - priv = this->private; - if (!priv) { - gf_log ("glusterd", GF_LOG_ERROR, - "priv is NULL"); - goto out; - } - - ret = dict_get_str (dict, "volname", &volname); + int32_t ret = -1; + glusterd_txn_opinfo_obj *opinfo_obj = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); - goto out; - } + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); - exists = glusterd_check_volume_exists (volname); - - if (exists) { - snprintf (msg, sizeof (msg), "Volume %s already exists", - volname); - gf_log ("", GF_LOG_ERROR, "%s", msg); - *op_errstr = gf_strdup (msg); - ret = -1; - goto out; - } else { - ret = 0; - } - ret = dict_get_int32 (dict, "count", &brick_count); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get count"); - goto out; - } + if (!txn_id || !opinfo) { + gf_msg_callingfn(this->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_ID_GET_FAIL, + "Empty transaction id or opinfo received."); + ret = -1; + goto out; + } - ret = dict_get_str (dict, "bricks", &bricks); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get bricks"); - goto out; - } + ret = dict_get_bin(priv->glusterd_txn_opinfo, uuid_utoa(*txn_id), + (void **)&opinfo_obj); + if (ret) + goto out; - if (bricks) { - brick_list = gf_strdup (bricks); - if (!brick_list) { - ret = -1; - gf_log ("", GF_LOG_ERROR, "Out of memory"); - goto out; - } else { - free_ptr = brick_list; - } - } + (*opinfo) = opinfo_obj->opinfo; - while ( i < brick_count) { - i++; - brick= strtok_r (brick_list, " \n", &tmpptr); - brick_list = tmpptr; - ret = glusterd_brickinfo_from_brick (brick, &brick_info); - if (ret) - goto out; - snprintf (cmd_str, 1024, "%s", brick_info->path); - ret = glusterd_resolve_brick (brick_info); - if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, "cannot resolve " - "brick: %s:%s", brick_info->hostname, - brick_info->path); - goto out; - } + gf_msg_debug(this->name, 0, + "Successfully got opinfo for transaction ID : %s", + uuid_utoa(*txn_id)); - if (!uuid_compare (brick_info->uuid, priv->uuid)) { - ret = glusterd_brick_create_path (brick_info->hostname, - brick_info->path, - 0777, op_errstr); - if (ret) - goto out; - brick_list = tmpptr; - } - glusterd_brickinfo_delete (brick_info); - brick_info = NULL; - } + ret = 0; out: - if (free_ptr) - GF_FREE (free_ptr); - if (brick_info) - glusterd_brickinfo_delete (brick_info); - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - - return ret; + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; } -static int -glusterd_op_stop_volume_args_get (dict_t *dict, char** volname, - int *flags) +int32_t +glusterd_set_txn_opinfo(uuid_t *txn_id, glusterd_op_info_t *opinfo) { - int ret = -1; + int32_t ret = -1; + glusterd_txn_opinfo_obj *opinfo_obj = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; - if (!dict || !volname || !flags) - goto out; + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); - ret = dict_get_str (dict, "volname", volname); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); - goto out; - } + if (!txn_id) { + gf_msg_callingfn(this->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_ID_GET_FAIL, + "Empty transaction id received."); + ret = -1; + goto out; + } - ret = dict_get_int32 (dict, "flags", flags); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get flags"); - goto out; + ret = dict_get_bin(priv->glusterd_txn_opinfo, uuid_utoa(*txn_id), + (void **)&opinfo_obj); + if (ret) { + opinfo_obj = GF_CALLOC(1, sizeof(glusterd_txn_opinfo_obj), + gf_common_mt_txn_opinfo_obj_t); + if (!opinfo_obj) { + ret = -1; + goto out; } -out: - return ret; -} -static int -glusterd_op_stage_start_volume (dict_t *dict, char **op_errstr) -{ - int ret = 0; - char *volname = NULL; - int flags = 0; - gf_boolean_t exists = _gf_false; - glusterd_volinfo_t *volinfo = NULL; - glusterd_brickinfo_t *brickinfo = NULL; - char msg[2048]; - glusterd_conf_t *priv = NULL; - - priv = THIS->private; - if (!priv) { - gf_log ("glusterd", GF_LOG_ERROR, - "priv is NULL"); - ret = -1; - goto out; + ret = dict_set_bin(priv->glusterd_txn_opinfo, uuid_utoa(*txn_id), + opinfo_obj, sizeof(glusterd_txn_opinfo_obj)); + if (ret) { + gf_msg_callingfn(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_SET_FAILED, + "Unable to set opinfo for transaction" + " ID : %s", + uuid_utoa(*txn_id)); + goto out; } + } - ret = glusterd_op_start_volume_args_get (dict, &volname, &flags); - if (ret) - goto out; + opinfo_obj->opinfo = (*opinfo); - exists = glusterd_check_volume_exists (volname); + gf_msg_debug(this->name, 0, + "Successfully set opinfo for transaction ID : %s", + uuid_utoa(*txn_id)); + ret = 0; +out: + if (ret) + if (opinfo_obj) + GF_FREE(opinfo_obj); - if (!exists) { - snprintf (msg, sizeof (msg), "Volume %s does not exist", volname); - gf_log ("", GF_LOG_ERROR, "%s", - msg); - *op_errstr = gf_strdup (msg); - ret = -1; - } else { - ret = 0; - } + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; +} - ret = glusterd_volinfo_find (volname, &volinfo); +int32_t +glusterd_clear_txn_opinfo(uuid_t *txn_id) +{ + int32_t ret = -1; + glusterd_op_info_t txn_op_info = { + {0}, + }; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + if (!txn_id) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_ID_GET_FAIL, + "Empty transaction id received."); + ret = -1; + goto out; + } - if (ret) - goto out; + ret = glusterd_get_txn_opinfo(txn_id, &txn_op_info); + if (ret) { + gf_msg_callingfn(this->name, GF_LOG_ERROR, 0, + GD_MSG_TRANS_OPINFO_GET_FAIL, + "Unable to get transaction opinfo " + "for transaction ID : %s", + uuid_utoa(*txn_id)); + goto out; + } - list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { - ret = glusterd_resolve_brick (brickinfo); - if (ret) { - gf_log ("", GF_LOG_ERROR, - "Unable to resolve brick %s:%s", - brickinfo->hostname, brickinfo->path); - goto out; - } + if (txn_op_info.op_ctx) + dict_unref(txn_op_info.op_ctx); - if (!uuid_compare (brickinfo->uuid, priv->uuid)) { - ret = glusterd_brick_create_path (brickinfo->hostname, - brickinfo->path, - 0777, op_errstr); - if (ret) - goto out; - } + dict_del(priv->glusterd_txn_opinfo, uuid_utoa(*txn_id)); - if (!(flags & GF_CLI_FLAG_OP_FORCE)) { - if (glusterd_is_volume_started (volinfo)) { - snprintf (msg, sizeof (msg), "Volume %s already" - " started", volname); - gf_log ("glusterd", GF_LOG_ERROR, "%s", msg); - *op_errstr = gf_strdup (msg); - ret = -1; - goto out; - } - } - } + gf_msg_debug(this->name, 0, + "Successfully cleared opinfo for transaction ID : %s", + uuid_utoa(*txn_id)); - ret = 0; + ret = 0; out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - - return ret; + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; } -static int -glusterd_op_stage_stop_volume (dict_t *dict, char **op_errstr) -{ - int ret = -1; - char *volname = NULL; - int flags = 0; - gf_boolean_t exists = _gf_false; - glusterd_volinfo_t *volinfo = NULL; - char msg[2048] = {0}; - - - ret = glusterd_op_stop_volume_args_get (dict, &volname, &flags); - if (ret) - goto out; +static int glusterfs_port = GLUSTERD_DEFAULT_PORT; +static char *glusterd_op_sm_state_names[] = { + "Default", + "Lock sent", + "Locked", + "Stage op sent", + "Staged", + "Commit op sent", + "Committed", + "Unlock sent", + "Stage op failed", + "Commit op failed", + "Brick op sent", + "Brick op failed", + "Brick op Committed", + "Brick op Commit failed", + "Ack drain", + "Invalid", +}; - exists = glusterd_check_volume_exists (volname); +static char *glusterd_op_sm_event_names[] = { + "GD_OP_EVENT_NONE", "GD_OP_EVENT_START_LOCK", + "GD_OP_EVENT_LOCK", "GD_OP_EVENT_RCVD_ACC", + "GD_OP_EVENT_ALL_ACC", "GD_OP_EVENT_STAGE_ACC", + "GD_OP_EVENT_COMMIT_ACC", "GD_OP_EVENT_RCVD_RJT", + "GD_OP_EVENT_STAGE_OP", "GD_OP_EVENT_COMMIT_OP", + "GD_OP_EVENT_UNLOCK", "GD_OP_EVENT_START_UNLOCK", + "GD_OP_EVENT_ALL_ACK", "GD_OP_EVENT_LOCAL_UNLOCK_NO_RESP", + "GD_OP_EVENT_INVALID"}; - if (!exists) { - snprintf (msg, sizeof (msg), "Volume %s does not exist", volname); - gf_log ("", GF_LOG_ERROR, "%s", msg); - *op_errstr = gf_strdup (msg); - ret = -1; - goto out; - } else { - ret = 0; - } +char * +glusterd_op_sm_state_name_get(int state) +{ + if (state < 0 || state >= GD_OP_STATE_MAX) + return glusterd_op_sm_state_names[GD_OP_STATE_MAX]; + return glusterd_op_sm_state_names[state]; +} - ret = glusterd_volinfo_find (volname, &volinfo); +char * +glusterd_op_sm_event_name_get(int event) +{ + if (event < 0 || event >= GD_OP_EVENT_MAX) + return glusterd_op_sm_event_names[GD_OP_EVENT_MAX]; + return glusterd_op_sm_event_names[event]; +} - if (ret) - goto out; +static void +glusterd_destroy_lock_ctx(glusterd_op_lock_ctx_t *ctx) +{ + if (!ctx) + return; + GF_FREE(ctx); +} - if (!(flags & GF_CLI_FLAG_OP_FORCE)) { - if (_gf_false == glusterd_is_volume_started (volinfo)) { - snprintf (msg, sizeof(msg), "Volume %s " - "is not in the started state", volname); - gf_log ("", GF_LOG_ERROR, "Volume %s " - "has not been started", volname); - *op_errstr = gf_strdup (msg); - ret = -1; - goto out; - } - } +void +glusterd_set_volume_status(glusterd_volinfo_t *volinfo, + glusterd_volume_status status) +{ + GF_ASSERT(volinfo); + volinfo->status = status; +} +static int +glusterd_op_sm_inject_all_acc(uuid_t *txn_id) +{ + int ret = -1; + ret = glusterd_op_sm_inject_event(GD_OP_EVENT_ALL_ACC, txn_id, NULL); + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; +} +static int +glusterd_check_bitrot_cmd(char *key, const int keylen, char *errstr, + const size_t size) +{ + int ret = -1; + + if (len_strcmp(key, keylen, "bitrot") || + len_strcmp(key, keylen, "features.bitrot")) { + snprintf(errstr, size, + " 'gluster volume set <VOLNAME> %s' is invalid command." + " Use 'gluster volume bitrot <VOLNAME> {enable|disable}'" + " instead.", + key); + goto out; + } else if (len_strcmp(key, keylen, "scrub-freq") || + len_strcmp(key, keylen, "features.scrub-freq")) { + snprintf(errstr, size, + " 'gluster volume set <VOLNAME> %s' is invalid command." + " Use 'gluster volume bitrot <VOLNAME> scrub-frequency" + " {hourly|daily|weekly|biweekly|monthly}' instead.", + key); + goto out; + } else if (len_strcmp(key, keylen, "scrub") || + len_strcmp(key, keylen, "features.scrub")) { + snprintf(errstr, size, + " 'gluster volume set <VOLNAME> %s' is invalid command." + " Use 'gluster volume bitrot <VOLNAME> scrub {pause|resume}'" + " instead.", + key); + goto out; + } else if (len_strcmp(key, keylen, "scrub-throttle") || + len_strcmp(key, keylen, "features.scrub-throttle")) { + snprintf(errstr, size, + " 'gluster volume set <VOLNAME> %s' is invalid command." + " Use 'gluster volume bitrot <VOLNAME> scrub-throttle " + " {lazy|normal|aggressive}' instead.", + key); + goto out; + } + + ret = 0; out: - - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - - return ret; + return ret; } static int -glusterd_op_stage_delete_volume (dict_t *dict, char **op_errstr) +glusterd_check_quota_cmd(char *key, const int keylen, char *value, char *errstr, + size_t size) { - int ret = 0; - char *volname = NULL; - gf_boolean_t exists = _gf_false; - glusterd_volinfo_t *volinfo = NULL; - char msg[2048] = {0}; - - ret = dict_get_str (dict, "volname", &volname); + int ret = -1; + gf_boolean_t b = _gf_false; - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); - goto out; - } - - exists = glusterd_check_volume_exists (volname); - - if (!exists) { - snprintf (msg, sizeof (msg), "Volume %s does not exist", - volname); - gf_log ("", GF_LOG_ERROR, "%s", msg); - *op_errstr = gf_strdup (msg); - ret = -1; - goto out; + if (len_strcmp(key, keylen, "quota") || + len_strcmp(key, keylen, "features.quota")) { + ret = gf_string2boolean(value, &b); + if (ret) + goto out; + ret = -1; + if (b) { + snprintf(errstr, size, + " 'gluster volume set <VOLNAME> %s %s' is deprecated." + " Use 'gluster volume quota <VOLNAME> enable' instead.", + key, value); } else { - ret = 0; - } - - ret = glusterd_volinfo_find (volname, &volinfo); - + snprintf(errstr, size, + " 'gluster volume set <VOLNAME> %s %s' is deprecated." + " Use 'gluster volume quota <VOLNAME> disable' instead.", + key, value); + } + goto out; + } else if (len_strcmp(key, keylen, "inode-quota") || + len_strcmp(key, keylen, "features.inode-quota")) { + ret = gf_string2boolean(value, &b); if (ret) - goto out; + goto out; + ret = -1; + if (b) { + snprintf( + errstr, size, + " 'gluster volume set <VOLNAME> %s %s' is deprecated." + " Use 'gluster volume inode-quota <VOLNAME> enable' instead.", + key, value); + } else { + /* inode-quota disable not supported, + * use quota disable + */ + snprintf(errstr, size, + " 'gluster volume set <VOLNAME> %s %s' is deprecated." + " Use 'gluster volume quota <VOLNAME> disable' instead.", + key, value); + } + goto out; + } + + ret = 0; +out: + return ret; +} - if (glusterd_is_volume_started (volinfo)) { - snprintf (msg, sizeof (msg), "Volume %s has been started." - "Volume needs to be stopped before deletion.", - volname); - gf_log ("", GF_LOG_ERROR, "%s", msg); - *op_errstr = gf_strdup (msg); - ret = -1; - goto out; - } +int +glusterd_brick_op_build_payload(glusterd_op_t op, + glusterd_brickinfo_t *brickinfo, + gd1_mgmt_brick_op_req **req, dict_t *dict) +{ + int ret = -1; + gd1_mgmt_brick_op_req *brick_req = NULL; + char *volname = NULL; + char name[1024] = { + 0, + }; + gf_xl_afr_op_t heal_op = GF_SHD_OP_INVALID; + xlator_t *this = NULL; + glusterd_volinfo_t *volinfo = NULL; + + this = THIS; + GF_ASSERT(this); + + GF_ASSERT(op < GD_OP_MAX); + GF_ASSERT(op > GD_OP_NONE); + GF_ASSERT(req); + + switch (op) { + case GD_OP_REMOVE_BRICK: + case GD_OP_STOP_VOLUME: + brick_req = GF_CALLOC(1, sizeof(*brick_req), + gf_gld_mt_mop_brick_req_t); + if (!brick_req) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, + NULL); + goto out; + } + brick_req->op = GLUSTERD_BRICK_TERMINATE; + brick_req->name = brickinfo->path; + glusterd_set_brick_status(brickinfo, GF_BRICK_STOPPING); + break; + case GD_OP_PROFILE_VOLUME: + brick_req = GF_CALLOC(1, sizeof(*brick_req), + gf_gld_mt_mop_brick_req_t); + + if (!brick_req) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, + NULL); + goto out; + } + + brick_req->op = GLUSTERD_BRICK_XLATOR_INFO; + brick_req->name = brickinfo->path; + + break; + case GD_OP_HEAL_VOLUME: { + brick_req = GF_CALLOC(1, sizeof(*brick_req), + gf_gld_mt_mop_brick_req_t); + if (!brick_req) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, + NULL); + goto out; + } + + brick_req->op = GLUSTERD_BRICK_XLATOR_OP; + brick_req->name = ""; + ret = dict_get_int32n(dict, "heal-op", SLEN("heal-op"), + (int32_t *)&heal_op); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=heal-op", NULL); + goto out; + } + ret = dict_set_int32n(dict, "xl-op", SLEN("xl-op"), heal_op); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=xl-op", NULL); + goto out; + } + } break; + case GD_OP_STATUS_VOLUME: { + brick_req = GF_CALLOC(1, sizeof(*brick_req), + gf_gld_mt_mop_brick_req_t); + if (!brick_req) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, + NULL); + goto out; + } + brick_req->op = GLUSTERD_BRICK_STATUS; + brick_req->name = ""; + ret = dict_set_strn(dict, "brick-name", SLEN("brick-name"), + brickinfo->path); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=brick-name", NULL); + goto out; + } + } break; + case GD_OP_REBALANCE: + case GD_OP_DEFRAG_BRICK_VOLUME: + brick_req = GF_CALLOC(1, sizeof(*brick_req), + gf_gld_mt_mop_brick_req_t); + if (!brick_req) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, + NULL); + goto out; + } + + brick_req->op = GLUSTERD_BRICK_XLATOR_DEFRAG; + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=volname", NULL); + goto out; + } + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_VOLINFO_GET_FAIL, "Volume=%s", volname, NULL); + goto out; + } + snprintf(name, sizeof(name), "%s-dht", volname); + brick_req->name = gf_strdup(name); + + break; + case GD_OP_SNAP: + case GD_OP_BARRIER: + brick_req = GF_CALLOC(1, sizeof(*brick_req), + gf_gld_mt_mop_brick_req_t); + if (!brick_req) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, + NULL); + goto out; + } + brick_req->op = GLUSTERD_BRICK_BARRIER; + brick_req->name = brickinfo->path; + break; - ret = 0; + default: + goto out; + break; + } + + brick_req->dict.dict_len = 0; + brick_req->dict.dict_val = NULL; + ret = dict_allocate_and_serialize(dict, &brick_req->input.input_val, + &brick_req->input.input_len); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); + goto out; + } + *req = brick_req; + ret = 0; out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - - return ret; + if (ret && brick_req) + GF_FREE(brick_req); + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; } -static int -glusterd_op_stage_add_brick (dict_t *dict, char **op_errstr) +int +glusterd_node_op_build_payload(glusterd_op_t op, gd1_mgmt_brick_op_req **req, + dict_t *dict) { - int ret = 0; - char *volname = NULL; - int count = 0; - int i = 0; - char *bricks = NULL; - char *brick_list = NULL; - char *saveptr = NULL; - char *free_ptr = NULL; - char *brick = NULL; - glusterd_brickinfo_t *brickinfo = NULL; - glusterd_volinfo_t *volinfo = NULL; - char cmd_str[1024]; - glusterd_conf_t *priv = NULL; - char msg[2048] = {0,}; - gf_boolean_t brick_alloc = _gf_false; - char *all_bricks = NULL; - char *str_ret = NULL; - - priv = THIS->private; - if (!priv) - goto out; + int ret = -1; + gd1_mgmt_brick_op_req *brick_req = NULL; + char *volname = NULL; - ret = dict_get_str (dict, "volname", &volname); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); - goto out; - } + GF_ASSERT(op < GD_OP_MAX); + GF_ASSERT(op > GD_OP_NONE); + GF_ASSERT(req); + xlator_t *this = NULL; + this = THIS; + GF_ASSERT(this); - ret = glusterd_volinfo_find (volname, &volinfo); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to find volume: %s", volname); + switch (op) { + case GD_OP_PROFILE_VOLUME: + brick_req = GF_CALLOC(1, sizeof(*brick_req), + gf_gld_mt_mop_brick_req_t); + if (!brick_req) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, + NULL); goto out; - } + } - if (glusterd_is_defrag_on(volinfo)) { - snprintf (msg, sizeof(msg), "Volume name %s rebalance is in " - "progress. Please retry after completion", volname); - gf_log ("glusterd", GF_LOG_ERROR, "%s", msg); - *op_errstr = gf_strdup (msg); - ret = -1; - goto out; - } - ret = dict_get_int32 (dict, "count", &count); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get count"); - goto out; - } + brick_req->op = GLUSTERD_NODE_PROFILE; + brick_req->name = ""; - ret = dict_get_str (dict, "bricks", &bricks); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get bricks"); + break; + + case GD_OP_STATUS_VOLUME: + brick_req = GF_CALLOC(1, sizeof(*brick_req), + gf_gld_mt_mop_brick_req_t); + if (!brick_req) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, + NULL); goto out; - } + } - if (bricks) { - brick_list = gf_strdup (bricks); - all_bricks = gf_strdup (bricks); - free_ptr = brick_list; - } + brick_req->op = GLUSTERD_NODE_STATUS; + brick_req->name = ""; - /* Check whether any of the bricks given is the destination brick of the - replace brick running */ + break; - str_ret = glusterd_check_brick_rb_part (all_bricks, count, volinfo); - if (str_ret) { - gf_log ("glusterd", GF_LOG_ERROR, - "%s", str_ret); - *op_errstr = gf_strdup (str_ret); - ret = -1; + case GD_OP_SCRUB_STATUS: + case GD_OP_SCRUB_ONDEMAND: + brick_req = GF_CALLOC(1, sizeof(*brick_req), + gf_gld_mt_mop_brick_req_t); + if (!brick_req) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, + NULL); goto out; - } + } - if (count) - brick = strtok_r (brick_list+1, " \n", &saveptr); + brick_req->op = GLUSTERD_NODE_BITROT; + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=volname", NULL); + goto out; + } - while ( i < count) { - ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo, - &brickinfo); - if (!ret) { - gf_log ("", GF_LOG_ERROR, "Adding duplicate brick: %s", - brick); - ret = -1; - goto out; - } else { - ret = glusterd_brickinfo_from_brick (brick, &brickinfo); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Add-brick: Unable" - " to get brickinfo"); - goto out; - } - brick_alloc = _gf_true; - } + brick_req->name = gf_strdup(volname); + break; + default: + goto out; + } - snprintf (cmd_str, 1024, "%s", brickinfo->path); - ret = glusterd_resolve_brick (brickinfo); - if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, - "resolve brick failed"); - goto out; - } + brick_req->dict.dict_len = 0; + brick_req->dict.dict_val = NULL; + ret = dict_allocate_and_serialize(dict, &brick_req->input.input_val, + &brick_req->input.input_len); - if (!uuid_compare (brickinfo->uuid, priv->uuid)) { - ret = glusterd_brick_create_path (brickinfo->hostname, - brickinfo->path, - 0777, op_errstr); - if (ret) - goto out; - } + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); + goto out; + } - glusterd_brickinfo_delete (brickinfo); - brick_alloc = _gf_false; - brickinfo = NULL; - brick = strtok_r (NULL, " \n", &saveptr); - i++; - } + *req = brick_req; + ret = 0; out: - if (free_ptr) - GF_FREE (free_ptr); - if (brick_alloc && brickinfo) - glusterd_brickinfo_delete (brickinfo); - if (str_ret) - GF_FREE (str_ret); - if (all_bricks) - GF_FREE (all_bricks); + if (ret && brick_req) + GF_FREE(brick_req); + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; +} - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); +static int +glusterd_validate_quorum_options(xlator_t *this, char *fullkey, char *value, + char **op_errstr) +{ + int ret = 0; + char *key = NULL; + volume_option_t *opt = NULL; - return ret; + if (!glusterd_is_quorum_option(fullkey)) + goto out; + key = strchr(fullkey, '.'); + if (key == NULL) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL); + ret = -1; + goto out; + } + key++; + opt = xlator_volume_option_get(this, key); + if (!opt) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_VOLINFO_GET_FAIL, NULL); + ret = -1; + goto out; + } + ret = xlator_option_validate(this, key, value, opt, op_errstr); +out: + return ret; } -char * -glusterd_check_brick_rb_part (char *bricks, int count, glusterd_volinfo_t *volinfo) +static int +glusterd_validate_brick_mx_options(xlator_t *this, char *fullkey, char *value, + char **op_errstr) { - char *saveptr = NULL; - char *brick = NULL; - char *brick_list = NULL; - int ret = 0; - glusterd_brickinfo_t *brickinfo = NULL; - uint32_t i = 0; - char *str = NULL; - char msg[2048] = {0,}; - - brick_list = gf_strdup (bricks); - if (!brick_list) { - gf_log ("glusterd", GF_LOG_ERROR, - "Out of memory"); - ret = -1; - goto out; - } - - if (count) - brick = strtok_r (brick_list+1, " \n", &saveptr); + int ret = 0; + // Placeholder function for now - while ( i < count) { - ret = glusterd_brickinfo_from_brick (brick, &brickinfo); - if (ret) { - snprintf (msg, sizeof(msg), "Unable to" - " get brickinfo"); - gf_log ("", GF_LOG_ERROR, "%s", msg); - ret = -1; - goto out; - } + return ret; +} - if (glusterd_is_replace_running (volinfo, brickinfo)) { - snprintf (msg, sizeof(msg), "Volume %s: replace brick is running" - " and the brick %s:%s you are trying to add is the destination brick" - " for replace brick", volinfo->volname, brickinfo->hostname, brickinfo->path); - ret = -1; - goto out; - } +static int +glusterd_validate_shared_storage(char *value, char *errstr) +{ + int32_t ret = -1; + int32_t count = -1; + char *op = NULL; + char hook_script[PATH_MAX] = ""; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + int32_t len = 0; + glusterd_volinfo_t *volinfo = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, out); + + GF_VALIDATE_OR_GOTO(this->name, value, out); + GF_VALIDATE_OR_GOTO(this->name, errstr, out); + + if ((strcmp(value, "enable")) && (strcmp(value, "disable"))) { + snprintf(errstr, PATH_MAX, + "Invalid option(%s). Valid options " + "are 'enable' and 'disable'", + value); + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, "%s", + errstr); + ret = -1; + goto out; + } - glusterd_brickinfo_delete (brickinfo); - brickinfo = NULL; - brick = strtok_r (NULL, " \n", &saveptr); - i++; - } + len = snprintf(hook_script, sizeof(hook_script), + "%s" GLUSTERD_SHRD_STRG_HOOK_SCRIPT, conf->workdir); + if ((len < 0) || (len >= sizeof(hook_script))) { + ret = -1; + goto out; + } + + ret = sys_access(hook_script, R_OK | X_OK); + if (ret) { + len = snprintf(errstr, PATH_MAX, + "The hook-script (%s) required " + "for this operation is not present. " + "Please install the hook-script " + "and retry", + hook_script); + if (len < 0) { + strncpy(errstr, "<error>", PATH_MAX); + } + gf_msg(this->name, GF_LOG_ERROR, ENOENT, GD_MSG_FILE_OP_FAILED, "%s", + errstr); + goto out; + } + + if (!strncmp(value, "disable", SLEN("disable"))) { + ret = dict_get_strn(conf->opts, GLUSTERD_SHARED_STORAGE_KEY, + SLEN(GLUSTERD_SHARED_STORAGE_KEY), &op); + if (ret || !strncmp(op, "disable", SLEN("disable"))) { + snprintf(errstr, PATH_MAX, + "Shared storage volume " + "does not exist. Please enable shared storage" + " for creating shared storage volume."); + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_SHARED_STORAGE_DOES_NOT_EXIST, "%s", errstr); + ret = -1; + goto out; + } + goto out; + } + + ret = glusterd_volinfo_find(GLUSTER_SHARED_STORAGE, &volinfo); + if (!ret) { + snprintf(errstr, PATH_MAX, + "Shared storage volume(" GLUSTER_SHARED_STORAGE + ") already exists."); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_ALREADY_EXIST, "%s", + errstr); + ret = -1; + goto out; + } + + ret = glusterd_count_connected_peers(&count); + if (ret) { + snprintf(errstr, PATH_MAX, + "Failed to calculate number of connected peers."); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PEER_COUNT_GET_FAIL, "%s", + errstr); + goto out; + } + + if (count <= 1) { + snprintf(errstr, PATH_MAX, + "More than one node should " + "be up/present in the cluster to enable this option"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INSUFFICIENT_UP_NODES, "%s", + errstr); + ret = -1; + goto out; + } out: - if (brick_list) - GF_FREE(brick_list); - if (brickinfo) - glusterd_brickinfo_delete (brickinfo); - if (ret) - str = gf_strdup (msg); - return str; + return ret; } static int -glusterd_get_rb_dst_brickinfo (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t **brickinfo) -{ - int32_t ret = -1; - - if (!volinfo || !brickinfo) - goto out; - - *brickinfo = volinfo->dst_brick; - - ret = 0; +glusterd_validate_localtime_logging(char *value, char *errstr) +{ + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + int already_enabled = 0; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, out); + GF_VALIDATE_OR_GOTO(this->name, value, out); + + already_enabled = gf_log_get_localtime(); + + ret = 0; + if (strcmp(value, "enable") == 0) { + gf_log_set_localtime(1); + if (!already_enabled) + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_LOCALTIME_LOGGING_ENABLE, + "localtime logging enable"); + } else if (strcmp(value, "disable") == 0) { + gf_log_set_localtime(0); + if (already_enabled) + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_LOCALTIME_LOGGING_DISABLE, + "localtime logging disable"); + } else { + ret = -1; + GF_VALIDATE_OR_GOTO(this->name, errstr, out); + snprintf(errstr, PATH_MAX, + "Invalid option(%s). Valid options " + "are 'enable' and 'disable'", + value); + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, "%s", + errstr); + } out: - return ret; + return ret; } static int -glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, - dict_t *rsp_dict) +glusterd_validate_daemon_log_level(char *value, char *errstr) { - int ret = 0; - char *src_brick = NULL; - char *dst_brick = NULL; - char *volname = NULL; - int replace_op = 0; - glusterd_volinfo_t *volinfo = NULL; - glusterd_brickinfo_t *src_brickinfo = NULL; - char *host = NULL; - char *path = NULL; - char msg[2048] = {0}; - char *dup_dstbrick = NULL; - glusterd_peerinfo_t *peerinfo = NULL; - glusterd_brickinfo_t *dst_brickinfo = NULL; - - ret = dict_get_str (dict, "src-brick", &src_brick); + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get src brick"); - goto out; - } + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); - gf_log ("", GF_LOG_DEBUG, "src brick=%s", src_brick); + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, out); - ret = dict_get_str (dict, "dst-brick", &dst_brick); + GF_VALIDATE_OR_GOTO(this->name, value, out); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get dest brick"); - goto out; - } + ret = 0; - gf_log ("", GF_LOG_DEBUG, "dst brick=%s", dst_brick); + if ((strcmp(value, "INFO")) && (strcmp(value, "WARNING")) && + (strcmp(value, "DEBUG")) && (strcmp(value, "TRACE")) && + (strcmp(value, "ERROR"))) { + ret = -1; + GF_VALIDATE_OR_GOTO(this->name, errstr, out); + snprintf(errstr, PATH_MAX, + "Invalid option(%s). Valid options " + "are 'INFO' or 'WARNING' or 'ERROR' or 'DEBUG' or " + " 'TRACE'", + value); + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, "%s", + errstr); + } - ret = dict_get_str (dict, "volname", &volname); +out: + return ret; +} - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); - goto out; +static int +glusterd_op_stage_set_volume(dict_t *dict, char **op_errstr) +{ + int ret = -1; + char *volname = NULL; + int exists = 0; + char *key = NULL; + char *key_fixed = NULL; + char *value = NULL; + char *val_dup = NULL; + char keystr[100] = { + 0, + }; + int keystr_len; + int keylen; + char *trash_path = NULL; + int trash_path_len = 0; + int count = 0; + int dict_count = 0; + char errstr[PATH_MAX] = { + 0, + }; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + dict_t *val_dict = NULL; + gf_boolean_t global_opt = _gf_false; + gf_boolean_t key_matched = _gf_false; /* if a key was processed or not*/ + glusterd_volinfo_t *voliter = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + uint32_t new_op_version = GD_OP_VERSION_MIN; + uint32_t local_new_op_version = GD_OP_VERSION_MIN; + uint32_t local_new_client_op_version = GD_OP_VERSION_MIN; + uint32_t key_op_version = GD_OP_VERSION_MIN; + uint32_t local_key_op_version = GD_OP_VERSION_MIN; + gf_boolean_t origin_glusterd = _gf_true; + gf_boolean_t check_op_version = _gf_true; + gf_boolean_t trash_enabled = _gf_false; + gf_boolean_t all_vol = _gf_false; + struct volopt_map_entry *vmep = NULL; + + GF_ASSERT(dict); + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + /* Check if we can support the required op-version + * This check is not done on the originator glusterd. The originator + * glusterd sets this value. + */ + origin_glusterd = is_origin_glusterd(dict); + + if (!origin_glusterd) { + /* Check for v3.3.x origin glusterd */ + check_op_version = dict_get_str_boolean(dict, "check-op-version", + _gf_false); + + if (check_op_version) { + ret = dict_get_uint32(dict, "new-op-version", &new_op_version); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Key=new-op-version", NULL); + goto out; + } + + if ((new_op_version > GD_OP_VERSION_MAX) || + (new_op_version < GD_OP_VERSION_MIN)) { + ret = -1; + snprintf(errstr, sizeof(errstr), + "Required op_version (%d) is not supported." + " Max supported op version is %d", + new_op_version, priv->op_version); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UNSUPPORTED_VERSION, + "%s", errstr); + goto out; + } + } + } + + ret = dict_get_int32_sizen(dict, "count", &dict_count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Count(dict),not set in Volume-Set"); + goto out; + } + + if (dict_count == 0) { + /*No options would be specified of volume set help */ + if (dict_get_sizen(dict, "help")) { + ret = 0; + goto out; + } + + if (dict_get_sizen(dict, "help-xml")) { +#if (HAVE_LIB_XML) + ret = 0; + goto out; +#else + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MODULE_NOT_INSTALLED, + "libxml not present in the system"); + *op_errstr = gf_strdup( + "Error: xml libraries not present to produce xml-output"); + goto out; +#endif } + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_OPTIONS_GIVEN, + "No options received "); + *op_errstr = gf_strdup("Options not specified"); + ret = -1; + goto out; + } - ret = dict_get_int32 (dict, "operation", (int32_t *)&replace_op); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "dict get on replace-brick operation failed"); - goto out; - } + ret = dict_get_str_sizen(dict, "volname", &volname); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Key=volname", NULL); + goto out; + } - ret = glusterd_volinfo_find (volname, &volinfo); + if (strcasecmp(volname, "all") != 0) { + ret = glusterd_volinfo_find(volname, &volinfo); if (ret) { - snprintf (msg, sizeof (msg), "volume: %s does not exist", - volname); - *op_errstr = gf_strdup (msg); - goto out; + snprintf(errstr, sizeof(errstr), FMTSTR_CHECK_VOL_EXISTS, volname); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, + FMTSTR_CHECK_VOL_EXISTS, volname); + goto out; } - if (GLUSTERD_STATUS_STARTED != volinfo->status) { + ret = glusterd_validate_volume_id(dict, volinfo); + if (ret) + goto out; + + local_new_op_version = volinfo->op_version; + local_new_client_op_version = volinfo->client_op_version; + + } else { + all_vol = _gf_true; + } + + val_dict = dict_new(); + if (!val_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); + goto out; + } + + for (count = 1; ret != 1; count++) { + keystr_len = sprintf(keystr, "key%d", count); + ret = dict_get_strn(dict, keystr, keystr_len, &key); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=%s", keystr, NULL); + break; + } + + keystr_len = sprintf(keystr, "value%d", count); + ret = dict_get_strn(dict, keystr, keystr_len, &value); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "invalid key,value pair in 'volume set'"); + ret = -1; + goto out; + } + + key_matched = _gf_false; + keylen = strlen(key); + if (len_strcmp(key, keylen, "config.memory-accounting")) { + key_matched = _gf_true; + gf_msg_debug(this->name, 0, + "enabling memory accounting for volume %s", volname); + ret = 0; + } else if (len_strcmp(key, keylen, "config.transport")) { + key_matched = _gf_true; + gf_msg_debug(this->name, 0, "changing transport-type for volume %s", + volname); + ret = 0; + /* if value is none of 'tcp/rdma/tcp,rdma' error out */ + if (!((strcasecmp(value, "rdma") == 0) || + (strcasecmp(value, "tcp") == 0) || + (strcasecmp(value, "tcp,rdma") == 0) || + (strcasecmp(value, "rdma,tcp") == 0))) { + ret = snprintf(errstr, sizeof(errstr), + "transport-type %s does not exist", value); + /* lets not bother about above return value, + its a failure anyways */ ret = -1; - snprintf (msg, sizeof (msg), "volume: %s is not started", - volname); - *op_errstr = gf_strdup (msg); goto out; + } + } else if (len_strcmp(key, keylen, "ganesha.enable")) { + key_matched = _gf_true; + if (!strcmp(value, "off") == 0) { + ret = ganesha_manage_export(dict, "off", _gf_true, op_errstr); + if (ret) + goto out; + } } - if (glusterd_is_defrag_on(volinfo)) { - snprintf (msg, sizeof(msg), "Volume name %s rebalance is in " - "progress. Please retry after completion", volname); - gf_log ("glusterd", GF_LOG_ERROR, "%s", msg); - *op_errstr = gf_strdup (msg); - ret = -1; + if (!key_matched) { + ret = glusterd_check_bitrot_cmd(key, keylen, errstr, + sizeof(errstr)); + if (ret) goto out; - } - - switch (replace_op) { - case GF_REPLACE_OP_START: - if (glusterd_is_rb_started (volinfo)) { - gf_log ("", GF_LOG_ERROR, "Replace brick is already " - "started for volume "); - ret = -1; - goto out; - } - break; - case GF_REPLACE_OP_PAUSE: - if (glusterd_is_rb_paused (volinfo)) { - gf_log ("", GF_LOG_ERROR, "Replace brick is already " - "paused for volume "); - ret = -1; - goto out; - } else if (!glusterd_is_rb_started(volinfo)) { - gf_log ("", GF_LOG_ERROR, "Replace brick is not" - " started for volume "); - ret = -1; - goto out; - } - break; - - case GF_REPLACE_OP_ABORT: - if ((!glusterd_is_rb_paused (volinfo)) && - (!glusterd_is_rb_started (volinfo))) { - gf_log ("", GF_LOG_ERROR, "Replace brick is not " - " started or paused for volume "); - ret = -1; - goto out; - } - break; - - case GF_REPLACE_OP_COMMIT: - if (!glusterd_is_rb_started (volinfo)) { - gf_log ("", GF_LOG_ERROR, "Replace brick is not " - "started for volume "); - ret = -1; - goto out; - } - break; - - case GF_REPLACE_OP_COMMIT_FORCE: break; - case GF_REPLACE_OP_STATUS: - break; - default: - ret = -1; + ret = glusterd_check_quota_cmd(key, keylen, value, errstr, + sizeof(errstr)); + if (ret) goto out; } - ret = glusterd_volume_brickinfo_get_by_brick (src_brick, volinfo, - &src_brickinfo); - if (ret) { - snprintf (msg, sizeof (msg), "brick: %s does not exist in " - "volume: %s", src_brick, volname); - *op_errstr = gf_strdup (msg); - goto out; - } + if (is_key_glusterd_hooks_friendly(key)) + continue; - if (!glusterd_is_local_addr (src_brickinfo->hostname)) { - gf_log ("", GF_LOG_DEBUG, - "I AM THE SOURCE HOST"); - if (src_brickinfo->port && rsp_dict) { - ret = dict_set_int32 (rsp_dict, "src-brick-port", - src_brickinfo->port); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Could not set src-brick-port=%d", - src_brickinfo->port); - } - } + ret = glusterd_volopt_validate(volinfo, dict, key, value, op_errstr); + if (ret) + goto out; + exists = glusterd_check_option_exists(key, &key_fixed); + if (exists == -1) { + ret = -1; + goto out; } - dup_dstbrick = gf_strdup (dst_brick); - if (!dup_dstbrick) { + if (!exists) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY, + "Option with name: %s does not exist", key); + ret = snprintf(errstr, sizeof(errstr), "option : %s does not exist", + key); + if (key_fixed) + snprintf(errstr + ret, sizeof(errstr) - ret, + "\nDid you mean %s?", key_fixed); + ret = -1; + goto out; + } + + if (key_fixed) { + key = key_fixed; + keylen = strlen(key_fixed); + } + + if (len_strcmp(key, keylen, "cluster.granular-entry-heal")) { + /* For granular entry-heal, if the set command was + * invoked through volume-set CLI, then allow the + * command only if the volume is still in 'Created' + * state + */ + if (volinfo && volinfo->status != GLUSTERD_STATUS_NONE && + (dict_get_sizen(dict, "is-special-key") == NULL)) { + snprintf(errstr, sizeof(errstr), + " 'gluster volume set <VOLNAME> %s {enable, disable}'" + " is not supported." + " Use 'gluster volume heal <VOLNAME> " + "granular-entry-heal {enable, disable}' instead.", + key); ret = -1; - gf_log ("", GF_LOG_ERROR, "Memory allocation failed"); goto out; - } - host = strtok (dup_dstbrick, ":"); - path = strtok (NULL, ":"); - - if (!host || !path) { - gf_log ("", GF_LOG_ERROR, - "dst brick %s is not of form <HOSTNAME>:<export-dir>", - dst_brick); + } + } else if (len_strcmp(key, keylen, GLUSTERD_GLOBAL_OP_VERSION_KEY)) { + /* Check if the key is cluster.op-version and set + * local_new_op_version to the value given if possible. + */ + if (!all_vol) { ret = -1; - goto out; - } - if (!glusterd_brickinfo_get (NULL, host, path, NULL)) { - snprintf(msg, sizeof(msg), "Brick: %s:%s already in use", - host, path); - *op_errstr = gf_strdup (msg); + snprintf(errstr, sizeof(errstr), + "Option \"%s\" is not valid for a single volume", key); + goto out; + } + /* Check if cluster.op-version is the only option being + * set + */ + if (count != 1) { ret = -1; + snprintf(errstr, sizeof(errstr), + "Option \"%s\" cannot be set along with other options", + key); + goto out; + } + /* Just reusing the variable, but I'm using it for + * storing the op-version from value + */ + ret = gf_string2uint(value, &local_key_op_version); + if (ret) { + snprintf(errstr, sizeof(errstr), + "invalid number format \"%s\" in option \"%s\"", value, + key); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY, "%s", + errstr); + goto out; + } + + if (local_key_op_version > GD_OP_VERSION_MAX || + local_key_op_version < GD_OP_VERSION_MIN) { + ret = -1; + snprintf(errstr, sizeof(errstr), + "Required op_version (%d) is not supported." + " Max supported op version is %d", + local_key_op_version, priv->op_version); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VERSION_UNSUPPORTED, + "%s", errstr); + goto out; + } + if (local_key_op_version > priv->op_version) { + local_new_op_version = local_key_op_version; + } else { + ret = -1; + snprintf(errstr, sizeof(errstr), + "Required op-version (%d) should" + " not be equal or lower than current" + " cluster op-version (%d).", + local_key_op_version, priv->op_version); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VERSION_UNSUPPORTED, + "%s", errstr); goto out; - } + } - if ((volinfo->rb_status ==GF_RB_STATUS_NONE) && - (replace_op == GF_REPLACE_OP_START)) { - ret = glusterd_brickinfo_from_brick (dst_brick, &dst_brickinfo); - volinfo->src_brick = src_brickinfo; - volinfo->dst_brick = dst_brickinfo; - } else { - ret = glusterd_get_rb_dst_brickinfo (volinfo, &dst_brickinfo); + goto cont; } - if (glusterd_rb_check_bricks (volinfo, src_brickinfo, dst_brickinfo)) { - gf_log ("", GF_LOG_ERROR, "replace brick: incorrect source or" - " destination bricks specified"); + ALL_VOLUME_OPTION_CHECK(volname, _gf_false, key, ret, op_errstr, out); + ret = glusterd_validate_quorum_options(this, key, value, op_errstr); + if (ret) + goto out; + + ret = glusterd_validate_brick_mx_options(this, key, value, op_errstr); + if (ret) + goto out; + + vmep = gd_get_vmep(key); + local_key_op_version = glusterd_get_op_version_from_vmep(vmep); + if (local_key_op_version > local_new_op_version) + local_new_op_version = local_key_op_version; + if (gd_is_client_option(vmep) && + (local_key_op_version > local_new_client_op_version)) + local_new_client_op_version = local_key_op_version; + + sprintf(keystr, "op-version%d", count); + if (origin_glusterd) { + ret = dict_set_uint32(dict, keystr, local_key_op_version); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set key-op-version in dict"); + goto out; + } + } else if (check_op_version) { + ret = dict_get_uint32(dict, keystr, &key_op_version); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get key-op-version from dict"); + goto out; + } + if (local_key_op_version != key_op_version) { ret = -1; - goto out; - } - if (!glusterd_is_local_addr (host)) { - ret = glusterd_brick_create_path (host, path, 0777, op_errstr); - if (ret) - goto out; - } else { - ret = glusterd_friend_find (NULL, host, &peerinfo); - if (ret) { - snprintf (msg, sizeof (msg), "%s, is not a friend", - host); - *op_errstr = gf_strdup (msg); + snprintf(errstr, sizeof(errstr), + "option: %s op-version mismatch", key); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OP_VERSION_MISMATCH, + "%s, required op-version = %" PRIu32 + ", available op-version = %" PRIu32, + errstr, key_op_version, local_key_op_version); + goto out; + } + } + + global_opt = glusterd_check_globaloption(key); + + if (len_strcmp(key, keylen, GLUSTERD_SHARED_STORAGE_KEY)) { + ret = glusterd_validate_shared_storage(value, errstr); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_SHARED_STRG_VOL_OPT_VALIDATE_FAIL, + "Failed to validate shared storage volume options"); + goto out; + } + } else if (len_strcmp(key, keylen, GLUSTERD_LOCALTIME_LOGGING_KEY)) { + ret = glusterd_validate_localtime_logging(value, errstr); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_LOCALTIME_LOGGING_VOL_OPT_VALIDATE_FAIL, + "Failed to validate localtime logging volume options"); + goto out; + } + } else if (len_strcmp(key, keylen, GLUSTERD_DAEMON_LOG_LEVEL_KEY)) { + ret = glusterd_validate_daemon_log_level(value, errstr); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_DAEMON_LOG_LEVEL_VOL_OPT_VALIDATE_FAIL, + "Failed to validate daemon-log-level volume options"); + goto out; + } + } else if (len_strcmp(key, keylen, "features.trash-dir")) { + if (volinfo) { + ret = glusterd_volinfo_get(volinfo, VKEY_FEATURES_TRASH, + &val_dup); + if (!ret && val_dup) { + ret = gf_string2boolean(val_dup, &trash_enabled); + if (ret) goto out; } - - if (!peerinfo->connected) { - snprintf (msg, sizeof (msg), "%s, is not connected at " - "the moment", host); - *op_errstr = gf_strdup (msg); + } + if (!trash_enabled) { + snprintf(errstr, sizeof(errstr), + "Trash translator is not enabled. " + "Use volume set %s trash on", + volname); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_SET_FAIL, + "Unable to set the options in 'volume set': %s", errstr); + ret = -1; + goto out; + } + if (strchr(value, '/')) { + snprintf(errstr, sizeof(errstr), + "Path is not allowed as option"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_SET_FAIL, + "Unable to set the options in 'volume set': %s", errstr); + ret = -1; + goto out; + } + + list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + /* Check for local brick */ + if (!gf_uuid_compare(brickinfo->uuid, MY_UUID)) { + trash_path_len = strlen(value) + strlen(brickinfo->path) + + 2; + trash_path = GF_MALLOC(trash_path_len, gf_common_mt_char); + snprintf(trash_path, trash_path_len, "%s/%s", + brickinfo->path, value); + + /* Checks whether a directory with + given option exists or not */ + if (!sys_access(trash_path, R_OK)) { + snprintf(errstr, sizeof(errstr), "Path %s exists", + value); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_SET_FAIL, + "Unable to set the options in 'volume set': %s", + errstr); ret = -1; goto out; - } - - if (GD_FRIEND_STATE_BEFRIENDED != peerinfo->state.state) { - snprintf (msg, sizeof (msg), "%s, is not befriended " - "at the moment", host); - *op_errstr = gf_strdup (msg); + } else { + gf_msg_debug(this->name, 0, + "Directory with given name does not exist," + " continuing"); + } + + if (volinfo->status == GLUSTERD_STATUS_STARTED && + brickinfo->status != GF_BRICK_STARTED) { + /* If volume is in started state , checks + whether bricks are online */ + snprintf(errstr, sizeof(errstr), + "One or more bricks are down"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_SET_FAIL, + "Unable to set the options in 'volume set': %s", + errstr); ret = -1; goto out; + } + } + if (trash_path) { + GF_FREE(trash_path); + trash_path = NULL; } + } } - ret = 0; - -out: - if (dup_dstbrick) - GF_FREE (dup_dstbrick); - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; -} + ret = dict_set_strn(val_dict, key, keylen, value); -static int -glusterd_op_stage_log_filename (dict_t *dict, char **op_errstr) -{ - int ret = -1; - char *volname = NULL; - gf_boolean_t exists = _gf_false; - char msg[2048] = {0}; - char *path = NULL; - char hostname[2048] = {0}; - char *brick = NULL; - glusterd_volinfo_t *volinfo = NULL; - - ret = dict_get_str (dict, "volname", &volname); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); - goto out; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to set the options in 'volume set'"); + ret = -1; + goto out; } - exists = glusterd_check_volume_exists (volname); - ret = glusterd_volinfo_find (volname, &volinfo); - if (!exists || ret) { - snprintf (msg, sizeof (msg), "Volume %s does not exist", - volname); - gf_log ("", GF_LOG_ERROR, "%s", msg); - *op_errstr = gf_strdup (msg); - ret = -1; - goto out; + *op_errstr = NULL; + if (!global_opt && !all_vol) + ret = glusterd_validate_reconfopts(volinfo, val_dict, op_errstr); + else if (!all_vol) { + voliter = NULL; + cds_list_for_each_entry(voliter, &priv->volumes, vol_list) + { + ret = glusterd_validate_globalopts(voliter, val_dict, + op_errstr); + if (ret) + break; + } } - ret = dict_get_str (dict, "brick", &brick); - if (ret) - goto out; - - if (strchr (brick, ':')) { - ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo, - NULL); - if (ret) { - snprintf (msg, sizeof (msg), "Incorrect brick %s " - "for volume %s", brick, volname); - gf_log ("", GF_LOG_ERROR, "%s", msg); - *op_errstr = gf_strdup (msg); - goto out; - } + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL, + "Could not create temp volfile, some option failed: %s", + *op_errstr); + goto out; } + dict_deln(val_dict, key, keylen); - ret = dict_get_str (dict, "path", &path); - if (ret) { - gf_log ("", GF_LOG_ERROR, "path not found"); - goto out; + if (key_fixed) { + GF_FREE(key_fixed); + key_fixed = NULL; } + } - ret = gethostname (hostname, sizeof (hostname)); + /* Check if all the connected clients support the new client-op-version + */ + ret = glusterd_check_client_op_version_support( + volname, local_new_client_op_version, op_errstr); + if (ret) + goto out; +cont: + if (origin_glusterd) { + ret = dict_set_uint32(dict, "new-op-version", local_new_op_version); if (ret) { - snprintf (msg, sizeof (msg), "Failed to get hostname, error:%s", - strerror (errno)); - gf_log ("glusterd", GF_LOG_ERROR, "%s", msg); - *op_errstr = gf_strdup (msg); - goto out; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set new-op-version in dict"); + goto out; } - - ret = glusterd_brick_create_path (hostname, path, 0777, op_errstr); - if (ret) - goto out; -out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - - return ret; -} - -static int -glusterd_op_stage_log_rotate (dict_t *dict, char **op_errstr) -{ - int ret = -1; - char *volname = NULL; - glusterd_volinfo_t *volinfo = NULL; - gf_boolean_t exists = _gf_false; - char msg[2048] = {0}; - char *brick = NULL; - - ret = dict_get_str (dict, "volname", &volname); + /* Set this value in dict so other peers know to check for + * op-version. This is a hack for 3.3.x compatibility + * + * TODO: Remove this and the other places this is referred once + * 3.3.x compatibility is not required + */ + ret = dict_set_int32_sizen(dict, "check-op-version", 1); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); - goto out; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set check-op-version in dict"); + goto out; } + } - exists = glusterd_check_volume_exists (volname); - ret = glusterd_volinfo_find (volname, &volinfo); - if (!exists) { - snprintf (msg, sizeof (msg), "Volume %s does not exist", - volname); - gf_log ("", GF_LOG_ERROR, "%s", msg); - *op_errstr = gf_strdup (msg); - ret = -1; - goto out; - } + ret = 0; - if (_gf_false == glusterd_is_volume_started (volinfo)) { - snprintf (msg, sizeof (msg), "Volume %s needs to be started before" - " log rotate.", volname); - gf_log ("", GF_LOG_ERROR, "%s", msg); - *op_errstr = gf_strdup (msg); - ret = -1; - goto out; - } +out: + if (val_dict) + dict_unref(val_dict); - ret = dict_get_str (dict, "brick", &brick); - if (ret) - goto out; + if (trash_path) + GF_FREE(trash_path); - if (strchr (brick, ':')) { - ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo, - NULL); - if (ret) { - snprintf (msg, sizeof (msg), "Incorrect brick %s " - "for volume %s", brick, volname); - gf_log ("", GF_LOG_ERROR, "%s", msg); - *op_errstr = gf_strdup (msg); - goto out; - } - } -out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + GF_FREE(key_fixed); + if (errstr[0] != '\0') + *op_errstr = gf_strdup(errstr); - return ret; + if (ret) { + if (!(*op_errstr)) { + *op_errstr = gf_strdup("Error, Validation Failed"); + gf_msg_debug(this->name, 0, "Error, Cannot Validate option :%s", + *op_errstr); + } else { + gf_msg_debug(this->name, 0, "Error, Cannot Validate option"); + } + } + return ret; } static int -glusterd_op_stage_set_volume (dict_t *dict, char **op_errstr) +glusterd_op_stage_reset_volume(dict_t *dict, char **op_errstr) { - int ret = 0; - char *volname = NULL; - int exists = 0; - char *key = NULL; - char *key_fixed = NULL; - char *value = NULL; - char str[100] = {0, }; - int count = 0; - int dict_count = 0; - char errstr[2048] = {0, }; - glusterd_volinfo_t *volinfo = NULL; - dict_t *val_dict = NULL; - gf_boolean_t global_opt = _gf_false; - glusterd_volinfo_t *voliter = NULL; - glusterd_conf_t *priv = NULL; - xlator_t *this = NULL; - - GF_ASSERT (dict); - this = THIS; - GF_ASSERT (this); - priv = this->private; - GF_ASSERT (priv); - - val_dict = dict_new(); - if (!val_dict) - goto out; + int ret = 0; + char *volname = NULL; + int exists = 0; + char msg[2048] = {0}; + char *key = NULL; + char *key_fixed = NULL; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + this = THIS; + GF_ASSERT(this); - ret = dict_get_str (dict, "volname", &volname); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); - goto out; - } + priv = this->private; + GF_ASSERT(priv); - exists = glusterd_check_volume_exists (volname); + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); - if (!exists) { - snprintf (errstr, sizeof (errstr), "Volume %s does not exist", - volname); - gf_log ("", GF_LOG_ERROR, "%s", errstr); - *op_errstr = gf_strdup (errstr); - ret = -1; - goto out; - } + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get volume name"); + goto out; + } - ret = glusterd_volinfo_find (volname, &volinfo); + if (strcasecmp(volname, "all") != 0) { + ret = glusterd_volinfo_find(volname, &volinfo); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to allocate memory"); - goto out; + snprintf(msg, sizeof(msg), FMTSTR_CHECK_VOL_EXISTS, volname); + goto out; } - ret = dict_get_int32 (dict, "count", &dict_count); - - if (ret) { - gf_log ("", GF_LOG_ERROR, "Count(dict),not set in Volume-Set"); - goto out; + ret = glusterd_validate_volume_id(dict, volinfo); + if (ret) + goto out; + } + + ret = dict_get_strn(dict, "key", SLEN("key"), &key); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get option key"); + goto out; + } + + /* * + * If key ganesha.enable is set, then volume should be unexported from + * ganesha server. Also it is a volume-level option, perform only when + * volume name not equal to "all"(in other words if volinfo != NULL) + */ + if (volinfo && (!strcmp(key, "all") || !strcmp(key, "ganesha.enable"))) { + if (glusterd_check_ganesha_export(volinfo)) { + ret = ganesha_manage_export(dict, "off", _gf_true, op_errstr); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_NFS_GNS_RESET_FAIL, + "Could not reset ganesha.enable key"); + } + } + + if (strcmp(key, "all")) { + exists = glusterd_check_option_exists(key, &key_fixed); + if (exists == -1) { + ret = -1; + goto out; } - if ( dict_count == 1 ) { - if (dict_get (dict, "history" )) { - ret = 0; - goto out; - } - - gf_log ("", GF_LOG_ERROR, "No options received "); - *op_errstr = gf_strdup ("Options not specified"); + if (!exists) { + ret = snprintf(msg, sizeof(msg), "Option %s does not exist", key); + if (key_fixed) + snprintf(msg + ret, sizeof(msg) - ret, "\nDid you mean %s?", + key_fixed); + ret = -1; + goto out; + } else if (exists > 0) { + if (key_fixed) + key = key_fixed; + + /* 'gluster volume set/reset <VOLNAME> + * features.quota/features.inode-quota' should + * not be allowed as it is deprecated. + * Setting and resetting quota/inode-quota features + * should be allowed only through 'gluster volume quota + * <VOLNAME> enable/disable'. + * But, 'gluster volume set features.quota-deem-statfs' + * can be turned on/off when quota is enabled. + */ + + if (strcmp(VKEY_FEATURES_INODE_QUOTA, key) == 0 || + strcmp(VKEY_FEATURES_QUOTA, key) == 0) { + snprintf(msg, sizeof(msg), + "'gluster volume " + "reset <VOLNAME> %s' is deprecated. " + "Use 'gluster volume quota <VOLNAME> " + "disable' instead.", + key); ret = -1; goto out; + } + ALL_VOLUME_OPTION_CHECK(volname, _gf_false, key, ret, op_errstr, + out); } + } +out: + GF_FREE(key_fixed); + if (msg[0] != '\0') { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OP_STAGE_RESET_VOL_FAIL, + "%s", msg); + *op_errstr = gf_strdup(msg); + } - for ( count = 1; ret != 1 ; count++ ) { - global_opt = _gf_false; - sprintf (str, "key%d", count); - ret = dict_get_str (dict, str, &key); - - - if (ret) - break; - - exists = glusterd_check_option_exists (key, &key_fixed); - if (exists == -1) { - ret = -1; - goto out; - } - if (!exists) { - gf_log ("", GF_LOG_ERROR, "Option with name: %s " - "does not exist", key); - ret = snprintf (errstr, 2048, - "option : %s does not exist", - key); - if (key_fixed) - snprintf (errstr + ret, 2048 - ret, - "\nDid you mean %s?", key_fixed); - *op_errstr = gf_strdup (errstr); - ret = -1; - goto out; - } - - sprintf (str, "value%d", count); - ret = dict_get_str (dict, str, &value); - - if (ret) { - gf_log ("", GF_LOG_ERROR, - "invalid key,value pair in 'volume set'"); - ret = -1; - goto out; - } - - if (key_fixed) - key = key_fixed; - - ret = glusterd_check_globaloption (key); - if (ret) - global_opt = _gf_true; - - ret = dict_set_str (val_dict, key, value); - - if (ret) { - gf_log ("", GF_LOG_ERROR, - "Unable to set the options in 'volume set'"); - ret = -1; - goto out; - } - - *op_errstr = NULL; - if (!global_opt) - ret = glusterd_validate_reconfopts (volinfo, val_dict, op_errstr); - else { - voliter = NULL; - list_for_each_entry (voliter, &priv->volumes, vol_list) { - ret = glusterd_validate_globalopts (voliter, val_dict, op_errstr); - if (ret) - break; - } - } - - if (ret) { - gf_log ("glusterd", GF_LOG_DEBUG, "Could not create temp " - "volfile, some option failed: %s", *op_errstr); - goto out; - } - dict_del (val_dict, key); - - if (key_fixed) { - GF_FREE (key_fixed); - key_fixed = NULL; - } - } + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; +} - ret = 0; +static int +glusterd_op_stage_sync_volume(dict_t *dict, char **op_errstr) +{ + int ret = -1; + char *volname = NULL; + char *hostname = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + char msg[2048] = { + 0, + }; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + this = THIS; + GF_ASSERT(this); + + ret = dict_get_strn(dict, "hostname", SLEN("hostname"), &hostname); + if (ret) { + snprintf(msg, sizeof(msg), + "hostname couldn't be " + "retrieved from msg"); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=hostname", NULL); + *op_errstr = gf_strdup(msg); + goto out; + } + + if (gf_is_local_addr(hostname)) { + // volname is not present in case of sync all + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (!ret) { + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + snprintf(msg, sizeof(msg), + "Volume %s " + "does not exist", + volname); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_VOL_NOT_FOUND, + "Volume=%s", volname, NULL); + *op_errstr = gf_strdup(msg); + goto out; + } + } + } else { + RCU_READ_LOCK; + + peerinfo = glusterd_peerinfo_find(NULL, hostname); + if (peerinfo == NULL) { + RCU_READ_UNLOCK; + ret = -1; + snprintf(msg, sizeof(msg), "%s, is not a friend", hostname); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_PEER_NOT_FOUND, + "Peer_name=%s", hostname, NULL); + *op_errstr = gf_strdup(msg); + goto out; + + } else if (!peerinfo->connected) { + RCU_READ_UNLOCK; + ret = -1; + snprintf(msg, sizeof(msg), + "%s, is not connected at " + "the moment", + hostname); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_PEER_DISCONNECTED, + "Peer_name=%s", hostname, NULL); + *op_errstr = gf_strdup(msg); + goto out; + } + + RCU_READ_UNLOCK; + } out: - if (val_dict) - dict_unref (val_dict); + gf_msg_debug("glusterd", 0, "Returning %d", ret); - if (key_fixed) - GF_FREE (key_fixed); - - if (ret) { - if (!(*op_errstr)) { - *op_errstr = gf_strdup ("Error, Validation Failed"); - gf_log ("glsuterd", GF_LOG_DEBUG, - "Error, Cannot Validate option :%s", - *op_errstr); - } else { - gf_log ("glsuterd", GF_LOG_DEBUG, - "Error, Cannot Validate option"); - } - } - return ret; + return ret; } static int -glusterd_op_stage_reset_volume (dict_t *dict, char **op_errstr) -{ - int ret = 0; - char *volname = NULL; - gf_boolean_t exists = _gf_false; - char msg[2048] = {0}; - - ret = dict_get_str (dict, "volname", &volname); - - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); - goto out; - } +glusterd_op_stage_status_volume(dict_t *dict, char **op_errstr) +{ + int ret = -1; + uint32_t cmd = 0; + char msg[2048] = { + 0, + }; + char *volname = NULL; + char *brick = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_volinfo_t *volinfo = NULL; + dict_t *vol_opts = NULL; +#ifdef BUILD_GNFS + gf_boolean_t nfs_disabled = _gf_false; +#endif + gf_boolean_t shd_enabled = _gf_false; + + GF_ASSERT(dict); + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + ret = dict_get_uint32(dict, "cmd", &cmd); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=cmd", NULL); + goto out; + } + + if (cmd & GF_CLI_STATUS_ALL) + goto out; + + if ((cmd & GF_CLI_STATUS_QUOTAD) && + (priv->op_version == GD_OP_VERSION_MIN)) { + snprintf(msg, sizeof(msg), + "The cluster is operating at " + "version 1. Getting the status of quotad is not " + "allowed in this state."); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_QUOTA_GET_STAT_FAIL, + msg, NULL); + ret = -1; + goto out; + } + + if ((cmd & GF_CLI_STATUS_SNAPD) && + (priv->op_version < GD_OP_VERSION_3_6_0)) { + snprintf(msg, sizeof(msg), + "The cluster is operating at " + "version less than %d. Getting the " + "status of snapd is not allowed in this state.", + GD_OP_VERSION_3_6_0); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_SNAP_STATUS_FAIL, msg, + NULL); + ret = -1; + goto out; + } + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get volume name"); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + snprintf(msg, sizeof(msg), FMTSTR_CHECK_VOL_EXISTS, volname); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_VOLINFO_GET_FAIL, + "Volume=%s", volname, NULL); + ret = -1; + goto out; + } + + ret = glusterd_validate_volume_id(dict, volinfo); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_VALIDATE_FAILED, NULL); + goto out; + } + + ret = glusterd_is_volume_started(volinfo); + if (!ret) { + snprintf(msg, sizeof(msg), "Volume %s is not started", volname); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_VOL_NOT_STARTED, + "Volume=%s", volname, NULL); + ret = -1; + goto out; + } - exists = glusterd_check_volume_exists (volname); + vol_opts = volinfo->dict; - if (!exists) { - snprintf (msg, sizeof (msg), "Volume %s does not " - "exist", volname); - gf_log ("", GF_LOG_ERROR, "%s", msg); - *op_errstr = gf_strdup (msg); - ret = -1; - goto out; + if ((cmd & GF_CLI_STATUS_SHD) != 0) { + if (glusterd_is_shd_compatible_volume(volinfo)) { + shd_enabled = gd_is_self_heal_enabled(volinfo, vol_opts); + } else { + ret = -1; + snprintf(msg, sizeof(msg), "Volume %s is not Self-heal compatible", + volname); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_VOL_SHD_NOT_COMP, + "Volume=%s", volname, NULL); + goto out; + } + if (!shd_enabled) { + ret = -1; + snprintf(msg, sizeof(msg), + "Self-heal Daemon is disabled for volume %s", volname); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_SELF_HEALD_DISABLED, + "Volume=%s", volname, NULL); + goto out; + } +#ifdef BUILD_GNFS + } else if ((cmd & GF_CLI_STATUS_NFS) != 0) { + nfs_disabled = dict_get_str_boolean(vol_opts, NFS_DISABLE_MAP_KEY, + _gf_false); + if (nfs_disabled) { + ret = -1; + snprintf(msg, sizeof(msg), "NFS server is disabled for volume %s", + volname); + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_NFS_GANESHA_DISABLED, "Volume=%s", volname, NULL); + goto out; } - +#endif + } else if ((cmd & GF_CLI_STATUS_QUOTAD) != 0) { + if (!glusterd_is_volume_quota_enabled(volinfo)) { + ret = -1; + snprintf(msg, sizeof(msg), + "Volume %s does not have " + "quota enabled", + volname); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_QUOTA_DISABLED, + "Volume=%s", volname, NULL); + goto out; + } + } else if ((cmd & GF_CLI_STATUS_BITD) != 0) { + if (!glusterd_is_bitrot_enabled(volinfo)) { + ret = -1; + snprintf(msg, sizeof(msg), + "Volume %s does not have " + "bitrot enabled", + volname); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_BITROT_NOT_ENABLED, + "Volume=%s", volname, NULL); + goto out; + } + } else if ((cmd & GF_CLI_STATUS_SCRUB) != 0) { + if (!glusterd_is_bitrot_enabled(volinfo)) { + ret = -1; + snprintf(msg, sizeof(msg), + "Volume %s does not have " + "bitrot enabled. Scrubber will be enabled " + "automatically if bitrot is enabled", + volname); + gf_smsg( + this->name, GF_LOG_ERROR, errno, GD_MSG_BITROT_NOT_ENABLED, + "Scrubber will be enabled automatically if bitrot is enabled", + "Volume=%s", volname, NULL); + goto out; + } + } else if ((cmd & GF_CLI_STATUS_SNAPD) != 0) { + if (!glusterd_is_snapd_enabled(volinfo)) { + ret = -1; + snprintf(msg, sizeof(msg), + "Volume %s does not have " + "uss enabled", + volname); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_SNAPD_NOT_RUNNING, + "Volume=%s", volname, NULL); + goto out; + } + } else if ((cmd & GF_CLI_STATUS_BRICK) != 0) { + ret = dict_get_strn(dict, "brick", SLEN("brick"), &brick); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Key=brick", NULL); + goto out; + } + + ret = glusterd_volume_brickinfo_get_by_brick(brick, volinfo, &brickinfo, + _gf_false); + if (ret) { + snprintf(msg, sizeof(msg), + "No brick %s in" + " volume %s", + brick, volname); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_BRICK_NOT_FOUND, + "Brick=%s, Volume=%s", brick, volname, NULL); + ret = -1; + goto out; + } + } + + ret = 0; out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + if (ret) { + if (msg[0] != '\0') + *op_errstr = gf_strdup(msg); + else + *op_errstr = gf_strdup("Validation Failed for Status"); + } - return ret; + gf_msg_debug(this->name, 0, "Returning: %d", ret); + return ret; } +int +glusterd_op_stage_stats_volume(dict_t *dict, char **op_errstr) +{ + int ret = -1; + char *volname = NULL; + char msg[2048] = { + 0, + }; + int32_t stats_op = GF_CLI_STATS_NONE; + glusterd_volinfo_t *volinfo = NULL; + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + snprintf(msg, sizeof(msg), "Volume name get failed"); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + snprintf(msg, sizeof(msg), + "Volume %s, " + "doesn't exist", + volname); + goto out; + } + + ret = glusterd_validate_volume_id(dict, volinfo); + if (ret) + goto out; + + ret = dict_get_int32n(dict, "op", SLEN("op"), &stats_op); + if (ret) { + snprintf(msg, sizeof(msg), "Volume profile op get failed"); + goto out; + } + + if (GF_CLI_STATS_START == stats_op) { + if (_gf_true == glusterd_is_profile_on(volinfo)) { + snprintf(msg, sizeof(msg), + "Profile on Volume %s is" + " already started", + volinfo->volname); + ret = -1; + goto out; + } + } else if ((GF_CLI_STATS_STOP == stats_op) || + (GF_CLI_STATS_INFO == stats_op)) { + if (_gf_false == glusterd_is_profile_on(volinfo)) { + snprintf(msg, sizeof(msg), + "Profile on Volume %s is" + " not started", + volinfo->volname); + ret = -1; + + goto out; + } + } + if ((GF_CLI_STATS_TOP == stats_op) || (GF_CLI_STATS_INFO == stats_op)) { + if (_gf_false == glusterd_is_volume_started(volinfo)) { + snprintf(msg, sizeof(msg), "Volume %s is not started.", + volinfo->volname); + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_STARTED, "%s", + msg); + ret = -1; + goto out; + } + } + ret = 0; +out: + if (msg[0] != '\0') { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_OP_STAGE_STATS_VOL_FAIL, + "%s", msg); + *op_errstr = gf_strdup(msg); + } + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; +} static int -glusterd_op_perform_remove_brick (glusterd_volinfo_t *volinfo, char *brick) -{ - - glusterd_brickinfo_t *brickinfo = NULL; - char *dup_brick = NULL; - glusterd_conf_t *priv = NULL; - int32_t ret = -1; - - GF_ASSERT (volinfo); - GF_ASSERT (brick); - - priv = THIS->private; - - dup_brick = gf_strdup (brick); - if (!dup_brick) - goto out; - - ret = glusterd_volume_brickinfo_get_by_brick (dup_brick, volinfo, &brickinfo); - if (ret) - goto out; - - ret = glusterd_resolve_brick (brickinfo); - if (ret) - goto out; - - if (GLUSTERD_STATUS_STARTED == volinfo->status) { - ret = glusterd_brick_stop (volinfo, brickinfo); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to stop " - "glusterfs, ret: %d", ret); - goto out; - } - } - glusterd_delete_brick (volinfo, brickinfo); +_delete_reconfig_opt(dict_t *this, char *key, data_t *value, void *data) +{ + int32_t *is_force = 0; + + GF_ASSERT(data); + is_force = (int32_t *)data; + + /* Keys which has the flag VOLOPT_FLAG_NEVER_RESET + * should not be deleted + */ + + if (_gf_true == + glusterd_check_voloption_flags(key, VOLOPT_FLAG_NEVER_RESET)) { + if (*is_force != 1) + *is_force = *is_force | GD_OP_PROTECTED; + goto out; + } + + if (*is_force != 1) { + if (_gf_true == + glusterd_check_voloption_flags(key, VOLOPT_FLAG_FORCE)) { + /* indicate to caller that we don't set the option + * due to being protected + */ + *is_force = *is_force | GD_OP_PROTECTED; + goto out; + } else { + *is_force = *is_force | GD_OP_UNPROTECTED; + } + } + + gf_msg_debug("glusterd", 0, "deleting dict with key=%s,value=%s", key, + value->data); + dict_del(this, key); + /**Delete scrubber (pause/resume) option from the dictionary if bitrot + * option is going to be reset + * */ + if (!strncmp(key, VKEY_FEATURES_BITROT, strlen(VKEY_FEATURES_BITROT))) { + dict_del_sizen(this, VKEY_FEATURES_SCRUB); + } out: - if (dup_brick) - GF_FREE (dup_brick); - - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + return 0; } static int -glusterd_op_perform_replace_brick (glusterd_volinfo_t *volinfo, - char *old_brick, char *new_brick) +_delete_reconfig_global_opt(dict_t *this, char *key, data_t *value, void *data) { - glusterd_brickinfo_t *old_brickinfo = NULL; - glusterd_brickinfo_t *new_brickinfo = NULL; - int32_t ret = -1; - glusterd_conf_t *priv = NULL; - - priv = THIS->private; - - GF_ASSERT (volinfo); - - ret = glusterd_brickinfo_from_brick (new_brick, - &new_brickinfo); - if (ret) - goto out; - - ret = glusterd_volume_brickinfo_get_by_brick (old_brick, volinfo, - &old_brickinfo); - if (ret) - goto out; - - ret = glusterd_resolve_brick (new_brickinfo); - if (ret) - goto out; - - list_add_tail (&new_brickinfo->brick_list, - &old_brickinfo->brick_list); - - volinfo->brick_count++; - - ret = glusterd_op_perform_remove_brick (volinfo, old_brick); - if (ret) - goto out; - - ret = glusterd_create_volfiles_and_notify_services (volinfo); - if (ret) - goto out; - - if (GLUSTERD_STATUS_STARTED == volinfo->status) { - ret = glusterd_brick_start (volinfo, new_brickinfo); - if (ret) - goto out; - } + GF_ASSERT(data); + if (strcmp(GLUSTERD_GLOBAL_OPT_VERSION, key) == 0) + goto out; + _delete_reconfig_opt(this, key, value, data); out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + return 0; } static int -glusterd_op_perform_add_bricks (glusterd_volinfo_t *volinfo, int32_t count, - char *bricks) -{ - glusterd_brickinfo_t *brickinfo = NULL; - char *brick = NULL; - int32_t i = 1; - char *brick_list = NULL; - char *free_ptr1 = NULL; - char *free_ptr2 = NULL; - char *saveptr = NULL; - int32_t ret = -1; - glusterd_conf_t *priv = NULL; - - priv = THIS->private; - - GF_ASSERT (volinfo); - - if (bricks) { - brick_list = gf_strdup (bricks); - free_ptr1 = brick_list; - } - - if (count) - brick = strtok_r (brick_list+1, " \n", &saveptr); - - while ( i <= count) { - ret = glusterd_brickinfo_from_brick (brick, &brickinfo); - if (ret) - goto out; - - ret = glusterd_resolve_brick (brickinfo); - if (ret) - goto out; - list_add_tail (&brickinfo->brick_list, &volinfo->bricks); - brick = strtok_r (NULL, " \n", &saveptr); - i++; - volinfo->brick_count++; - - } - - brick_list = gf_strdup (bricks); - free_ptr2 = brick_list; - i = 1; - - if (count) - brick = strtok_r (brick_list+1, " \n", &saveptr); - - ret = glusterd_create_volfiles_and_notify_services (volinfo); +glusterd_options_reset(glusterd_volinfo_t *volinfo, char *key, + int32_t *is_force) +{ + int ret = 0; + data_t *value = NULL; + char *key_fixed = NULL; + xlator_t *this = NULL; + glusterd_svc_t *svc = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(volinfo->dict); + GF_ASSERT(key); + + if (!strncmp(key, "all", 3)) { + dict_foreach(volinfo->dict, _delete_reconfig_opt, is_force); + ret = glusterd_enable_default_options(volinfo, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FAIL_DEFAULT_OPT_SET, + "Failed to set " + "default options on reset for volume %s", + volinfo->volname); + goto out; + } + } else { + value = dict_get(volinfo->dict, key); + if (!value) { + gf_msg_debug(this->name, 0, "no value set for option %s", key); + goto out; + } + _delete_reconfig_opt(volinfo->dict, key, value, is_force); + ret = glusterd_enable_default_options(volinfo, key); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FAIL_DEFAULT_OPT_SET, + "Failed to set " + "default value for option '%s' on reset for " + "volume %s", + key, volinfo->volname); + goto out; + } + } + + gd_update_volume_op_versions(volinfo); + if (!volinfo->is_snap_volume) { + svc = &(volinfo->snapd.svc); + ret = svc->manager(svc, volinfo, PROC_START_NO_WAIT); if (ret) - goto out; + goto out; + } + svc = &(volinfo->gfproxyd.svc); + ret = svc->reconfigure(volinfo); + if (ret) + goto out; + + svc = &(volinfo->shd.svc); + ret = svc->reconfigure(volinfo); + if (ret) + goto out; + + ret = glusterd_create_volfiles_and_notify_services(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL, + "Unable to create volfile for" + " 'volume reset'"); + ret = -1; + goto out; + } - while (i <= count) { + ret = glusterd_store_volinfo(volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) + goto out; - ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo, - &brickinfo); - if (ret) - goto out; + if (GLUSTERD_STATUS_STARTED == volinfo->status) { + ret = glusterd_svcs_reconfigure(volinfo); + if (ret) + goto out; + } - if (GLUSTERD_STATUS_STARTED == volinfo->status) { - ret = glusterd_brick_start (volinfo, brickinfo); - if (ret) - goto out; - } - i++; - brick = strtok_r (NULL, " \n", &saveptr); - } + ret = 0; out: - if (free_ptr1) - GF_FREE (free_ptr1); - if (free_ptr2) - GF_FREE (free_ptr2); - - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + GF_FREE(key_fixed); + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; } - static int -glusterd_op_stage_remove_brick (dict_t *dict) -{ - int ret = -1; - char *volname = NULL; - glusterd_volinfo_t *volinfo = NULL; - dict_t *ctx = NULL; - char *errstr = NULL; - int32_t brick_count = 0; - - ret = dict_get_str (dict, "volname", &volname); - - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); - goto out; - } - - ret = glusterd_volinfo_find (volname, &volinfo); - - if (ret) { - gf_log ("", GF_LOG_ERROR, "Volume %s does not exist", volname); - goto out; - } - - if (glusterd_is_defrag_on(volinfo)) { - ctx = glusterd_op_get_ctx (GD_OP_REMOVE_BRICK); - errstr = gf_strdup("Rebalance is in progress. Please retry" - " after completion"); - if (!errstr) { - ret = -1; - goto out; - } - gf_log ("glusterd", GF_LOG_ERROR, "%s", errstr); - ret = dict_set_dynstr (ctx, "errstr", errstr); - if (ret) { - GF_FREE (errstr); - gf_log ("", GF_LOG_DEBUG, - "failed to set errstr ctx"); - goto out; - } - - ret = -1; - goto out; - } - - ret = dict_get_int32 (dict, "count", &brick_count); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get brick count"); - goto out; - } - - if (volinfo->brick_count == brick_count) { - ctx = glusterd_op_get_ctx (GD_OP_REMOVE_BRICK); - if (!ctx) { - gf_log ("", GF_LOG_ERROR, - "Operation Context is not present"); - ret = -1; - goto out; - } - errstr = gf_strdup ("Deleting all the bricks of the " - "volume is not allowed"); - if (!errstr) { - gf_log ("", GF_LOG_ERROR, "Out of memory"); - ret = -1; - goto out; - } - - ret = dict_set_dynstr (ctx, "errstr", errstr); - if (ret) { - GF_FREE (errstr); - gf_log ("", GF_LOG_DEBUG, - "failed to set pump status in ctx"); - goto out; - } - - ret = -1; - goto out; - } - +glusterd_op_reset_all_volume_options(xlator_t *this, dict_t *dict) +{ + char *key = NULL; + char *key_fixed = NULL; + int ret = -1; + int32_t is_force = 0; + glusterd_conf_t *conf = NULL; + dict_t *dup_opt = NULL; + gf_boolean_t all = _gf_false; + char *next_version = NULL; + gf_boolean_t quorum_action = _gf_false; + + conf = this->private; + ret = dict_get_strn(dict, "key", SLEN("key"), &key); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get key"); + goto out; + } + + ret = dict_get_int32n(dict, "force", SLEN("force"), &is_force); + if (ret) + is_force = 0; + + if (strcmp(key, "all")) { + ret = glusterd_check_option_exists(key, &key_fixed); + if (ret <= 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY, + "Option %s does not " + "exist", + key); + ret = -1; + goto out; + } + } else { + all = _gf_true; + } + + if (key_fixed) + key = key_fixed; + + ret = -1; + dup_opt = dict_new(); + if (!dup_opt) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); + goto out; + } + if (!all) { + dict_copy(conf->opts, dup_opt); + dict_del(dup_opt, key); + } + ret = glusterd_get_next_global_opt_version_str(conf->opts, &next_version); + if (ret) + goto out; + + ret = dict_set_strn(dup_opt, GLUSTERD_GLOBAL_OPT_VERSION, + SLEN(GLUSTERD_GLOBAL_OPT_VERSION), next_version); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", GLUSTERD_GLOBAL_OPT_VERSION, NULL); + goto out; + } + + ret = glusterd_store_options(this, dup_opt); + if (ret) + goto out; + + if (glusterd_is_quorum_changed(conf->opts, key, NULL)) + quorum_action = _gf_true; + + ret = dict_set_dynstrn(conf->opts, GLUSTERD_GLOBAL_OPT_VERSION, + SLEN(GLUSTERD_GLOBAL_OPT_VERSION), next_version); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", GLUSTERD_GLOBAL_OPT_VERSION, NULL); + goto out; + } else + next_version = NULL; + + if (!all) { + dict_del(conf->opts, key); + } else { + dict_foreach(conf->opts, _delete_reconfig_global_opt, &is_force); + } out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + GF_FREE(key_fixed); + if (dup_opt) + dict_unref(dup_opt); - return ret; + gf_msg_debug(this->name, 0, "returning %d", ret); + if (quorum_action) + glusterd_do_quorum_action(); + GF_FREE(next_version); + return ret; } static int -glusterd_op_stage_sync_volume (dict_t *dict, char **op_errstr) -{ - int ret = -1; - char *volname = NULL; - char *hostname = NULL; - gf_boolean_t exists = _gf_false; - glusterd_peerinfo_t *peerinfo = NULL; - char msg[2048] = {0,}; - - ret = dict_get_str (dict, "hostname", &hostname); - if (ret) { - snprintf (msg, sizeof (msg), "hostname couldn't be " - "retrieved from msg"); - *op_errstr = gf_strdup (msg); - goto out; - } - - ret = glusterd_is_local_addr (hostname); - if (ret) { - ret = glusterd_friend_find (NULL, hostname, &peerinfo); - if (ret) { - snprintf (msg, sizeof (msg), "%s, is not a friend", - hostname); - *op_errstr = gf_strdup (msg); - goto out; - } - - if (!peerinfo->connected) { - snprintf (msg, sizeof (msg), "%s, is not connected at " - "the moment", hostname); - *op_errstr = gf_strdup (msg); - ret = -1; - goto out; - } +glusterd_op_reset_volume(dict_t *dict, char **op_rspstr) +{ + glusterd_volinfo_t *volinfo = NULL; + int ret = -1; + char *volname = NULL; + char *key = NULL; + char *key_fixed = NULL; + int32_t is_force = 0; + gf_boolean_t quorum_action = _gf_false; + xlator_t *this = NULL; + + this = THIS; + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get volume name"); + goto out; + } + + if (strcasecmp(volname, "all") == 0) { + ret = glusterd_op_reset_all_volume_options(this, dict); + goto out; + } + + ret = dict_get_int32n(dict, "force", SLEN("force"), &is_force); + if (ret) + is_force = 0; + + ret = dict_get_strn(dict, "key", SLEN("key"), &key); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get option key"); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, + FMTSTR_CHECK_VOL_EXISTS, volname); + goto out; + } + + if (strcmp(key, "all") && + glusterd_check_option_exists(key, &key_fixed) != 1) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY, + "volinfo dict inconsistency: option %s not found", key); + ret = -1; + goto out; + } + if (key_fixed) + key = key_fixed; + + if (glusterd_is_quorum_changed(volinfo->dict, key, NULL)) + quorum_action = _gf_true; + + ret = glusterd_options_reset(volinfo, key, &is_force); + if (ret == -1) { + gf_asprintf(op_rspstr, "Volume reset : failed"); + } else if (is_force & GD_OP_PROTECTED) { + if (is_force & GD_OP_UNPROTECTED) { + gf_asprintf(op_rspstr, + "All unprotected fields were" + " reset. To reset the protected fields," + " use 'force'."); } else { + ret = -1; + gf_asprintf(op_rspstr, + "'%s' is protected. To reset" + " use 'force'.", + key); + } + } - //volname is not present in case of sync all - ret = dict_get_str (dict, "volname", &volname); - if (!ret) { - exists = glusterd_check_volume_exists (volname); - if (!exists) { - snprintf (msg, sizeof (msg), "Volume %s " - "does not exist", volname); - *op_errstr = gf_strdup (msg); - ret = -1; - goto out; - } - } else { - ret = 0; - } + if (!strcmp(key, "ganesha.enable") || !strcmp(key, "all")) { + if (glusterd_check_ganesha_export(volinfo) && + is_origin_glusterd(dict)) { + ret = manage_export_config(volname, "off", op_rspstr); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_NFS_GNS_RESET_FAIL, + "Could not reset ganesha.enable key"); } + } out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - - return ret; -} - -char * -volname_from_master (char *master) -{ - if (master == NULL) - return NULL; + GF_FREE(key_fixed); + if (quorum_action) + glusterd_do_quorum_action(); - return gf_strdup (master+1); + gf_msg_debug(this->name, 0, "'volume reset' returning %d", ret); + return ret; } int -glusterd_gsync_get_pid_file (char *pidfile, char *master, char *slave) +glusterd_stop_bricks(glusterd_volinfo_t *volinfo) { - FILE *in = NULL; - char buff[PATH_MAX] = {0, }; - char cmd[PATH_MAX] = {0, }; - char *ptr = NULL; - char buffer[PATH_MAX] = {0, }; - char pidfolder[PATH_MAX] = {0, }; - glusterd_conf_t *priv = NULL; - int ret = 0; - - GF_ASSERT (THIS); - - priv = THIS->private; - - snprintf (cmd, PATH_MAX, GSYNCD_PREFIX"/gsyncd --canonicalize-escape-url" - " %s %s", master, slave); - if (!(in = popen(cmd, "r"))) { - gf_log ("", GF_LOG_ERROR, "popen failed"); - return -1; - } - - ptr = fgets(buff, sizeof(buff), in); - if (ptr) { - buff[strlen(buff)-1]='\0'; //strip off \n - snprintf (buffer, PATH_MAX, "%s/gsync/%s", priv->workdir, buff); - strncpy (pidfolder, buffer, PATH_MAX); - } else { - ret = -1; - goto out; - } - - memset (buff, 0, PATH_MAX); - memset (buffer, 0, PATH_MAX); + glusterd_brickinfo_t *brickinfo = NULL; - ptr = fgets(buff, sizeof(buff), in); - if (ptr) { - buff[strlen(buff)-1]='\0'; //strip off \n - snprintf (buffer, PATH_MAX, "%s/%s.pid", pidfolder, buff); - strncpy (pidfile, buffer, PATH_MAX); + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + /*TODO: Need to change @del_brick in brick_stop to _gf_true + * once we enable synctask in peer rpc prog */ + if (glusterd_brick_stop(volinfo, brickinfo, _gf_false)) { + gf_event(EVENT_BRICK_STOP_FAILED, "peer=%s;volume=%s;brick=%s", + brickinfo->hostname, volinfo->volname, brickinfo->path); + return -1; } + } - out: - ret |= pclose (in); - - if (ret) - gf_log ("", GF_LOG_ERROR, "popen failed"); - - return ret ? -1 : 0; + return 0; } -/* status: return 0 when gsync is running - * return -1 when not running - */ int -gsync_status (char *master, char *slave, int *status) -{ - int ret = -1; - char pidfile[PATH_MAX] = {0,}; - FILE *file = NULL; - - GF_VALIDATE_OR_GOTO ("gsync", master, out); - GF_VALIDATE_OR_GOTO ("gsync", slave, out); - GF_VALIDATE_OR_GOTO ("gsync", status, out); - - ret = glusterd_gsync_get_pid_file (pidfile, master, slave); - if (ret == -1) { - ret = -1; - gf_log ("", GF_LOG_WARNING, "failed to create the pidfile string"); - goto out; - } - - file = fopen (pidfile, "r+"); - if (file) { - ret = lockf (fileno (file), F_TEST, 0); - if (ret == 0) - *status = -1; - else - *status = 0; - } else - *status = -1; - ret = 0; +glusterd_start_bricks(glusterd_volinfo_t *volinfo) + +{ + int ret = -1; + glusterd_brickinfo_t *brickinfo = NULL; + + GF_ASSERT(volinfo); + + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + if (!brickinfo->start_triggered) { + pthread_mutex_lock(&brickinfo->restart_mutex); + { + /* coverity[SLEEP] */ + ret = glusterd_brick_start(volinfo, brickinfo, _gf_false, + _gf_false); + } + pthread_mutex_unlock(&brickinfo->restart_mutex); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_DISCONNECTED, + "Failed to start %s:%s for %s", brickinfo->hostname, + brickinfo->path, volinfo->volname); + gf_event(EVENT_BRICK_START_FAILED, "peer=%s;volume=%s;brick=%s", + brickinfo->hostname, volinfo->volname, + brickinfo->path); + goto out; + } + } + } + ret = 0; out: - return ret; -} - -int -gsync_validate_config_type (int32_t config_type) -{ - switch (config_type) { - case GF_GSYNC_OPTION_TYPE_CONFIG_SET: - case GF_GSYNC_OPTION_TYPE_CONFIG_DEL: - case GF_GSYNC_OPTION_TYPE_CONFIG_GET: - case GF_GSYNC_OPTION_TYPE_CONFIG_GET_ALL:return 0; - default: return -1; - } - return 0; + return ret; } - -int32_t -glusterd_gsync_volinfo_dict_set (glusterd_volinfo_t *volinfo, - char *key, char *value) -{ - int32_t ret = -1; - char *gsync_status = NULL; - - gsync_status = gf_strdup (value); - if (!gsync_status) { - gf_log ("", GF_LOG_ERROR, "Unable to allocate memory"); - goto out; - } - - ret = dict_set_dynstr (volinfo->dict, key, gsync_status); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to set dict"); - goto out; +static int +glusterd_update_volumes_dict(glusterd_volinfo_t *volinfo) +{ + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + char *address_family_str = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, out); + + /* 3.9.0 onwards gNFS will be disabled by default. In case of an upgrade + * from anything below than 3.9.0 to 3.9.x the volume's dictionary will + * not have 'nfs.disable' key set which means the same will not be set + * to on until explicitly done. setnfs.disable to 'on' at op-version + * bump up flow is the ideal way here. The same is also applicable for + * transport.address-family where if the transport type is set to tcp + * then transport.address-family is defaulted to 'inet'. + */ + if (conf->op_version >= GD_OP_VERSION_3_9_0) { + if (dict_get_str_boolean(volinfo->dict, NFS_DISABLE_MAP_KEY, 1)) { + ret = dict_set_dynstr_with_alloc(volinfo->dict, NFS_DISABLE_MAP_KEY, + "on"); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Failed to set " + "option ' NFS_DISABLE_MAP_KEY ' on " + "volume %s", + volinfo->volname); + goto out; + } + } + ret = dict_get_strn(volinfo->dict, "transport.address-family", + SLEN("transport.address-family"), + &address_family_str); + if (ret) { + if (volinfo->transport_type == GF_TRANSPORT_TCP) { + ret = dict_set_dynstr_with_alloc( + volinfo->dict, "transport.address-family", "inet"); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_SET_FAILED, + "failed to set transport." + "address-family on %s", + volinfo->volname); + goto out; + } + } } + } + ret = glusterd_store_volinfo(volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); - ret = 0; out: - return 0; + return ret; } -int -gsync_validate_config_option (dict_t *dict, int32_t config_type, - char **op_errstr) +static int +glusterd_set_brick_mx_opts(dict_t *dict, char *key, char *value, + char **op_errstr) { - int ret = -1; - int i = 0; - char *op_name = NULL; + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; - if (config_type == GF_GSYNC_OPTION_TYPE_CONFIG_GET_ALL) - return 0; + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + GF_VALIDATE_OR_GOTO(this->name, dict, out); + GF_VALIDATE_OR_GOTO(this->name, key, out); + GF_VALIDATE_OR_GOTO(this->name, value, out); + GF_VALIDATE_OR_GOTO(this->name, op_errstr, out); - ret = dict_get_str (dict, "op_name", &op_name); - if (ret < 0) { - gf_log ("", GF_LOG_WARNING, "option not specified"); - *op_errstr = gf_strdup ("Please specify the option"); - goto out; - } - - i = 0; - while (gsync_opname[i] != NULL) { - if (strcmp (gsync_opname[i], op_name) == 0) { - ret = 0; - goto out; - } - i++; - } + ret = 0; - gf_log ("", GF_LOG_WARNING, "Invalid option"); - *op_errstr = gf_strdup ("Invalid option"); + priv = this->private; - ret = -1; + if (!strcmp(key, GLUSTERD_BRICK_MULTIPLEX_KEY)) { + ret = dict_set_dynstrn(priv->opts, GLUSTERD_BRICK_MULTIPLEX_KEY, + SLEN(GLUSTERD_BRICK_MULTIPLEX_KEY), + gf_strdup(value)); + } out: - return ret; + return ret; } -int -gsync_verify_config_options (dict_t *dict, char **op_errstr) +/* This is a hack to prevent client-io-threads from being loaded in the graph + * when the cluster-op-version is bumped up from 3.8.x to 3.13.x. The key is + * deleted subsequently in glusterd_create_volfiles(). */ +static int +glusterd_dict_set_skip_cliot_key(glusterd_volinfo_t *volinfo) { - int ret = -1; - int config_type = 0; - - GF_VALIDATE_OR_GOTO ("gsync", dict, out); - GF_VALIDATE_OR_GOTO ("gsync", op_errstr, out); - - ret = dict_get_int32 (dict, "config_type", &config_type); - if (ret < 0) { - gf_log ("", GF_LOG_WARNING, "config type is missing"); - *op_errstr = gf_strdup ("config-type missing"); - goto out; - } - - ret = gsync_validate_config_type (config_type); - if (ret == -1) { - gf_log ("", GF_LOG_WARNING, "Invalid config type"); - *op_errstr = gf_strdup ("Invalid config type"); - goto out; - } - - ret = gsync_validate_config_option (dict, config_type, op_errstr); - if (ret < 0) - goto out; - - ret = 0; -out: - return ret; + return dict_set_int32n(volinfo->dict, "skip-CLIOT", SLEN("skip-CLIOT"), 1); } static int -glusterd_op_stage_gsync_set (dict_t *dict, char **op_errstr) +glusterd_op_set_all_volume_options(xlator_t *this, dict_t *dict, + char **op_errstr) { - int ret = 0; - int type = 0; - int status = 0; - dict_t *ctx = NULL; - char *volname = NULL; - char *master = NULL; - char *slave = NULL; - gf_boolean_t exists = _gf_false; - glusterd_volinfo_t *volinfo = NULL; - - ctx = glusterd_op_get_ctx (GD_OP_GSYNC_SET); - if (!ctx) { - gf_log ("gsync", GF_LOG_DEBUG, "gsync command doesn't " - "correspond to this glusterd"); - goto out; - } + char *key = NULL; + char *key_fixed = NULL; + char *value = NULL; + char *dup_value = NULL; + int ret = -1; + glusterd_conf_t *conf = NULL; + dict_t *dup_opt = NULL; + char *next_version = NULL; + gf_boolean_t quorum_action = _gf_false; + uint32_t op_version = 0; + glusterd_volinfo_t *volinfo = NULL; + glusterd_svc_t *svc = NULL; + gf_boolean_t svcs_reconfigure = _gf_false; + + conf = this->private; + ret = dict_get_strn(dict, "key1", SLEN("key1"), &key); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=key1", NULL); + goto out; + } + + ret = dict_get_strn(dict, "value1", SLEN("value1"), &value); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "invalid key,value pair in 'volume set'"); + goto out; + } + + ret = glusterd_check_option_exists(key, &key_fixed); + if (ret <= 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UNKNOWN_KEY, + "Invalid key %s", key); + ret = -1; + goto out; + } + + if (key_fixed) + key = key_fixed; + + ret = glusterd_set_shared_storage(dict, key, value, op_errstr); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SHARED_STRG_SET_FAIL, + "Failed to set shared storage option"); + goto out; + } + + ret = glusterd_set_brick_mx_opts(dict, key, value, op_errstr); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_MX_SET_FAIL, + "Failed to set brick multiplexing option"); + goto out; + } + + /* If the key is cluster.op-version, set conf->op_version to the value + * if needed and save it. + */ + if (strcmp(key, GLUSTERD_GLOBAL_OP_VERSION_KEY) == 0) { + ret = 0; - ret = dict_get_str (dict, "master", &master); - if (ret < 0) { - gf_log ("", GF_LOG_WARNING, "master not found"); - *op_errstr = gf_strdup ("master not found"); - ret = -1; - goto out; - } + ret = gf_string2uint(value, &op_version); + if (ret) + goto out; + + if (op_version >= conf->op_version) { + conf->op_version = op_version; + + /* When a bump up happens, update the quota.conf file + * as well. This is because, till 3.7 we had a quota + * conf version v1.1 in quota.conf. When inode-quota + * feature is introduced, this needs to be changed to + * v1.2 in quota.conf and 16 bytes uuid in quota.conf + * needs to be changed to 17 bytes. Look + * glusterd_store_quota_config for more details. + */ + cds_list_for_each_entry(volinfo, &conf->volumes, vol_list) + { + ret = glusterd_store_quota_config( + volinfo, NULL, NULL, GF_QUOTA_OPTION_TYPE_UPGRADE, NULL); + if (ret) + goto out; + ret = glusterd_update_volumes_dict(volinfo); + if (ret) + goto out; - ret = dict_get_str (dict, "slave", &slave); - if (ret < 0) { - gf_log ("", GF_LOG_WARNING, "slave not found"); - *op_errstr = gf_strdup ("slave not found"); - ret = -1; - goto out; - } + if (glusterd_dict_set_skip_cliot_key(volinfo)) + goto out; - volname = volname_from_master (master); - if (volname == NULL) { - gf_log ("", GF_LOG_WARNING, "volname couldn't be found"); - *op_errstr = gf_strdup ("volname not found"); - ret = -1; - goto out; - } + if (!volinfo->is_snap_volume) { + svc = &(volinfo->snapd.svc); + ret = svc->manager(svc, volinfo, PROC_START_NO_WAIT); + if (ret) + goto out; + } - exists = glusterd_check_volume_exists (volname); - if (!exists) { - gf_log ("", GF_LOG_WARNING, "volname doesnot exist"); - *op_errstr = gf_strdup ("volname doesnot exist"); - ret = -1; - goto out; - } + svc = &(volinfo->gfproxyd.svc); + ret = svc->reconfigure(volinfo); + if (ret) + goto out; - ret = dict_get_int32 (dict, "type", &type); - if (ret < 0) { - gf_log ("", GF_LOG_WARNING, "command type not found"); - *op_errstr = gf_strdup ("command unsuccessful"); - ret = -1; - goto out; - } + svc = &(volinfo->shd.svc); + ret = svc->reconfigure(volinfo); + if (ret) + goto out; - ret = glusterd_volinfo_find (volname, &volinfo); - if (ret) { - gf_log ("", GF_LOG_WARNING, "volinfo not found " - "for %s", volname); - *op_errstr = gf_strdup ("command unsuccessful"); - ret = -1; - goto out; - } - - if (type == GF_GSYNC_OPTION_TYPE_START) { - if (GLUSTERD_STATUS_STARTED != volinfo->status) { - gf_log ("", GF_LOG_WARNING, "%s volume not started", - volname); - *op_errstr = gf_strdup ("please start the volume"); - ret = -1; - goto out; - } - //check if the gsync is already started - ret = gsync_status (master, slave, &status); - if (ret == 0 && status == 0) { - gf_log ("", GF_LOG_WARNING, "gsync already started"); - *op_errstr = gf_strdup ("gsync already started"); - ret = -1; - goto out; - } else if (ret == -1) { - gf_log ("", GF_LOG_WARNING, "gsync start validation " - " failed"); - *op_errstr = gf_strdup ("command to failed, please " - "check the log file"); - goto out; + ret = glusterd_create_volfiles_and_notify_services(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_VOLFILE_CREATE_FAIL, + "Unable to create volfile for" + " 'volume set'"); + goto out; } - ret = 0; - goto out; - } - - if (type == GF_GSYNC_OPTION_TYPE_STOP) { - ret = gsync_status (master, slave, &status); - if (ret == 0 && status == -1) { - gf_log ("", GF_LOG_WARNING, "gsync not running"); - *op_errstr = gf_strdup ("gsync not running"); - glusterd_gsync_volinfo_dict_set (volinfo, - "features.marker-gsync", "off"); - ret = -1; - goto out; - } else if (ret == -1) { - gf_log ("", GF_LOG_WARNING, "gsync stop validation " - " failed"); - *op_errstr = gf_strdup ("command failed, please " - "check the log file"); - goto out; + if (GLUSTERD_STATUS_STARTED == volinfo->status) { + svcs_reconfigure = _gf_true; } - ret = 0; - goto out; - } - - if (type == GF_GSYNC_OPTION_TYPE_CONFIGURE) { - ret = gsync_verify_config_options (dict, op_errstr); - if (ret < 0) - goto out; - } + } + if (svcs_reconfigure) { + ret = glusterd_svcs_reconfigure(NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_RESTART_FAIL, + "Unable to restart " + "services"); + goto out; + } + } + + ret = glusterd_store_global_info(this); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OP_VERS_STORE_FAIL, + "Failed to store op-version."); + } + } + /* No need to save cluster.op-version in conf->opts + */ + goto out; + } + ret = -1; + dup_opt = dict_new(); + if (!dup_opt) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); + goto out; + } + dict_copy(conf->opts, dup_opt); + ret = dict_set_str(dup_opt, key, value); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + goto out; + } + + ret = glusterd_get_next_global_opt_version_str(conf->opts, &next_version); + if (ret) + goto out; + + ret = dict_set_strn(dup_opt, GLUSTERD_GLOBAL_OPT_VERSION, + SLEN(GLUSTERD_GLOBAL_OPT_VERSION), next_version); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", GLUSTERD_GLOBAL_OPT_VERSION, NULL); + goto out; + } + + ret = glusterd_store_options(this, dup_opt); + if (ret) + goto out; + + if (glusterd_is_quorum_changed(conf->opts, key, value)) + quorum_action = _gf_true; + + ret = dict_set_dynstrn(conf->opts, GLUSTERD_GLOBAL_OPT_VERSION, + SLEN(GLUSTERD_GLOBAL_OPT_VERSION), next_version); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", GLUSTERD_GLOBAL_OPT_VERSION, NULL); + goto out; + } else + next_version = NULL; + + dup_value = gf_strdup(value); + if (!dup_value) + goto out; + + ret = dict_set_dynstr(conf->opts, key, dup_value); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + goto out; + } else + dup_value = NULL; /* Protect the allocation from GF_FREE */ - ret = 0; out: - if (volname) - GF_FREE (volname); + GF_FREE(dup_value); + GF_FREE(key_fixed); + if (dup_opt) + dict_unref(dup_opt); - return ret; + gf_msg_debug(this->name, 0, "returning %d", ret); + if (quorum_action) + glusterd_do_quorum_action(); + GF_FREE(next_version); + return ret; } -static gf_boolean_t -glusterd_is_profile_on (glusterd_volinfo_t *volinfo) -{ - int ret = -1; - char *latency_key = NULL; - char *fd_stats_key = NULL; - gf_boolean_t is_latency_on = _gf_false; - gf_boolean_t is_fd_stats_on = _gf_false; - - GF_ASSERT (volinfo); - latency_key = "diagnostics.latency-measurement"; - fd_stats_key = "diagnostics.count-fop-hits"; - - ret = dict_get_str_boolean (volinfo->dict, fd_stats_key, - _gf_false); - if (ret != -1) - is_fd_stats_on = ret; - ret = dict_get_str_boolean (volinfo->dict, latency_key, - _gf_false); - if (ret != -1) - is_latency_on = ret; - if ((_gf_true == is_latency_on) && - (_gf_true == is_fd_stats_on)) - return _gf_true; - return _gf_false; -} - -static int -glusterd_op_stage_stats_volume (dict_t *dict, char **op_errstr) +int +glusterd_op_get_max_opversion(char **op_errstr, dict_t *rsp_dict) { - int ret = -1; - char *volname = NULL; - gf_boolean_t exists = _gf_false; - char msg[2048] = {0,}; - int32_t stats_op = GF_CLI_STATS_NONE; - glusterd_volinfo_t *volinfo = NULL; - - ret = dict_get_str (dict, "volname", &volname); - if (ret) { - snprintf (msg, sizeof (msg), "Volume name get failed"); - goto out; - } + int ret = -1; - exists = glusterd_check_volume_exists (volname); - ret = glusterd_volinfo_find (volname, &volinfo); - if ((!exists) || (ret < 0)) { - snprintf (msg, sizeof (msg), "Volume %s, " - "doesn't exist", volname); - ret = -1; - goto out; - } + GF_VALIDATE_OR_GOTO(THIS->name, rsp_dict, out); - ret = dict_get_int32 (dict, "op", &stats_op); - if (ret) { - snprintf (msg, sizeof (msg), "Volume profile op get failed"); - goto out; - } + ret = dict_set_int32n(rsp_dict, "max-opversion", SLEN("max-opversion"), + GD_OP_VERSION_MAX); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Setting value for max-opversion to dict failed"); + goto out; + } - if (GF_CLI_STATS_START == stats_op) { - if (_gf_true == glusterd_is_profile_on (volinfo)) { - snprintf (msg, sizeof (msg), "Profile on Volume %s is" - " already started", volinfo->volname); - ret = -1; - goto out; - } - } - if ((GF_CLI_STATS_STOP == stats_op) || - (GF_CLI_STATS_INFO == stats_op)) { - if (_gf_false == glusterd_is_profile_on (volinfo)) { - snprintf (msg, sizeof (msg), "Profile on Volume %s is" - " not started", volinfo->volname); - ret = -1; - goto out; - } - } - if ((GF_CLI_STATS_TOP == stats_op) || - (GF_CLI_STATS_INFO == stats_op)) { - if (_gf_false == glusterd_is_volume_started (volinfo)) { - snprintf (msg, sizeof (msg), "Volume %s is not started.", - volinfo->volname); - gf_log ("glusterd", GF_LOG_ERROR, "%s", msg); - ret = -1; - goto out; - } - } - ret = 0; out: - if (msg[0] != '\0') { - gf_log ("glusterd", GF_LOG_ERROR, "%s", msg); - *op_errstr = gf_strdup (msg); - } - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + gf_msg_debug(THIS->name, 0, "Returning %d", ret); + return ret; } static int -glusterd_op_create_volume (dict_t *dict, char **op_errstr) -{ - int ret = 0; - char *volname = NULL; - glusterd_conf_t *priv = NULL; - glusterd_volinfo_t *volinfo = NULL; - glusterd_brickinfo_t *brickinfo = NULL; - xlator_t *this = NULL; - char *brick = NULL; - int32_t count = 0; - int32_t i = 1; - char *bricks = NULL; - char *brick_list = NULL; - char *free_ptr = NULL; - char *saveptr = NULL; - int32_t sub_count = 0; - char *trans_type = NULL; - char *str = NULL; - - this = THIS; - GF_ASSERT (this); - - priv = this->private; - GF_ASSERT (priv); - - ret = glusterd_volinfo_new (&volinfo); - - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to allocate memory"); - goto out; - } +glusterd_set_shared_storage(dict_t *dict, char *key, char *value, + char **op_errstr) +{ + int32_t ret = -1; + char hooks_args[PATH_MAX] = { + 0, + }; + char errstr[PATH_MAX] = { + 0, + }; + xlator_t *this = NULL; + int32_t len = 0; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + GF_VALIDATE_OR_GOTO(this->name, dict, out); + GF_VALIDATE_OR_GOTO(this->name, key, out); + GF_VALIDATE_OR_GOTO(this->name, value, out); + GF_VALIDATE_OR_GOTO(this->name, op_errstr, out); + + ret = 0; + + if (strcmp(key, GLUSTERD_SHARED_STORAGE_KEY)) { + goto out; + } + + /* Re-create the brick path so as to be * + * able to re-use it * + */ + ret = recursive_rmdir(GLUSTER_SHARED_STORAGE_BRICK_DIR); + if (ret) { + snprintf(errstr, PATH_MAX, + "Failed to remove shared " + "storage brick(%s). " + "Reason: %s", + GLUSTER_SHARED_STORAGE_BRICK_DIR, strerror(errno)); + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED, "%s", + errstr); + ret = -1; + goto out; + } + + ret = mkdir_p(GLUSTER_SHARED_STORAGE_BRICK_DIR, 0755, _gf_true); + if (-1 == ret) { + snprintf(errstr, PATH_MAX, + "Failed to create shared " + "storage brick(%s). " + "Reason: %s", + GLUSTER_SHARED_STORAGE_BRICK_DIR, strerror(errno)); + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_CREATE_DIR_FAILED, "%s", + errstr); + goto out; + } + + if (is_origin_glusterd(dict)) { + len = snprintf(hooks_args, sizeof(hooks_args), + "is_originator=1,local_node_hostname=%s", + local_node_hostname); + } else { + len = snprintf(hooks_args, sizeof(hooks_args), + "is_originator=0,local_node_hostname=%s", + local_node_hostname); + } + if ((len < 0) || (len >= sizeof(hooks_args))) { + ret = -1; + goto out; + } - ret = dict_get_str (dict, "volname", &volname); + ret = dict_set_dynstr_with_alloc(dict, "hooks_args", hooks_args); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Failed to set" + " hooks_args in dict."); + goto out; + } - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); - goto out; - } +out: + if (ret && strlen(errstr)) { + *op_errstr = gf_strdup(errstr); + } - strncpy (volinfo->volname, volname, GLUSTERD_MAX_VOLUME_NAME); - GF_ASSERT (volinfo->volname); + return ret; +} - ret = dict_get_int32 (dict, "type", &volinfo->type); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get type"); - goto out; - } +static int +glusterd_op_set_volume(dict_t *dict, char **errstr) +{ + int ret = 0; + glusterd_volinfo_t *volinfo = NULL; + char *volname = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + int count = 1; + char *key = NULL; + char *key_fixed = NULL; + char *value = NULL; + char keystr[50] = { + 0, + }; + int keylen; + gf_boolean_t global_opt = _gf_false; + gf_boolean_t global_opts_set = _gf_false; + glusterd_volinfo_t *voliter = NULL; + int32_t dict_count = 0; + gf_boolean_t check_op_version = _gf_false; + uint32_t new_op_version = 0; + gf_boolean_t quorum_action = _gf_false; + glusterd_svc_t *svc = NULL; + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + GF_ASSERT(priv); + + ret = dict_get_int32n(dict, "count", SLEN("count"), &dict_count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Count(dict),not set in Volume-Set"); + goto out; + } + + if (dict_count == 0) { + ret = glusterd_volset_help(NULL, errstr); + goto out; + } + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get volume name"); + goto out; + } + + if (strcasecmp(volname, "all") == 0) { + ret = glusterd_op_set_all_volume_options(this, dict, errstr); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, + FMTSTR_CHECK_VOL_EXISTS, volname); + goto out; + } + + /* TODO: Remove this once v3.3 compatibility is not required */ + check_op_version = dict_get_str_boolean(dict, "check-op-version", + _gf_false); + + if (check_op_version) { + ret = dict_get_uint32(dict, "new-op-version", &new_op_version); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get new op-version from dict"); + goto out; + } + } + + for (count = 1; ret != -1; count++) { + keylen = snprintf(keystr, sizeof(keystr), "key%d", count); + ret = dict_get_strn(dict, keystr, keylen, &key); + if (ret) + break; - ret = dict_get_int32 (dict, "count", &volinfo->brick_count); + keylen = snprintf(keystr, sizeof(keystr), "value%d", count); + ret = dict_get_strn(dict, keystr, keylen, &value); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get count"); - goto out; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "invalid key,value pair in 'volume set'"); + ret = -1; + goto out; } - ret = dict_get_int32 (dict, "port", &volinfo->port); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get port"); + if (strcmp(key, "config.memory-accounting") == 0) { + ret = gf_string2boolean(value, &volinfo->memory_accounting); + if (ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, + "Invalid value in key-value pair."); goto out; + } } - count = volinfo->brick_count; - - ret = dict_get_str (dict, "bricks", &bricks); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get bricks"); + if (strcmp(key, "config.transport") == 0) { + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_VOL_TRANSPORT_TYPE_CHANGE, + "changing transport-type for volume %s to %s", volname, + value); + ret = 0; + if (strcasecmp(value, "rdma") == 0) { + volinfo->transport_type = GF_TRANSPORT_RDMA; + } else if (strcasecmp(value, "tcp") == 0) { + volinfo->transport_type = GF_TRANSPORT_TCP; + } else if ((strcasecmp(value, "tcp,rdma") == 0) || + (strcasecmp(value, "rdma,tcp") == 0)) { + volinfo->transport_type = GF_TRANSPORT_BOTH_TCP_RDMA; + } else { + ret = -1; goto out; + } } - if (GF_CLUSTER_TYPE_REPLICATE == volinfo->type) { - ret = dict_get_int32 (dict, "replica-count", - &sub_count); - if (ret) - goto out; - } else if (GF_CLUSTER_TYPE_STRIPE == volinfo->type) { - ret = dict_get_int32 (dict, "stripe-count", - &sub_count); - if (ret) - goto out; - } - - ret = dict_get_str (dict, "transport", &trans_type); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get transport"); - goto out; - } + ret = glusterd_check_ganesha_cmd(key, value, errstr, dict); + if (ret == -1) + goto out; - ret = dict_get_str (dict, "volume-id", &str); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume-id"); - goto out; - } - ret = uuid_parse (str, volinfo->volume_id); - if (ret) { - gf_log ("", GF_LOG_ERROR, "unable to parse uuid %s", str); + if (!is_key_glusterd_hooks_friendly(key)) { + ret = glusterd_check_option_exists(key, &key_fixed); + GF_ASSERT(ret); + if (ret <= 0) { + key_fixed = NULL; goto out; + } } - if (strcasecmp (trans_type, "rdma") == 0) { - volinfo->transport_type = GF_TRANSPORT_RDMA; - } else if (strcasecmp (trans_type, "tcp") == 0) { - volinfo->transport_type = GF_TRANSPORT_TCP; - } else { - volinfo->transport_type = GF_TRANSPORT_BOTH_TCP_RDMA; + global_opt = _gf_false; + if (glusterd_check_globaloption(key)) { + global_opt = _gf_true; + global_opts_set = _gf_true; } - volinfo->sub_count = sub_count; + if (!global_opt) + value = gf_strdup(value); - if (bricks) { - brick_list = gf_strdup (bricks); - free_ptr = brick_list; + if (!value) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_SET_FAIL, + "Unable to set the options in 'volume set'"); + ret = -1; + goto out; } - if (count) - brick = strtok_r (brick_list+1, " \n", &saveptr); + if (key_fixed) + key = key_fixed; - while ( i <= count) { - ret = glusterd_brickinfo_from_brick (brick, &brickinfo); - if (ret) - goto out; + if (glusterd_is_quorum_changed(volinfo->dict, key, value)) + quorum_action = _gf_true; - ret = glusterd_resolve_brick (brickinfo); + if (global_opt) { + cds_list_for_each_entry(voliter, &priv->volumes, vol_list) + { + value = gf_strdup(value); + ret = dict_set_dynstr(voliter->dict, key, value); if (ret) - goto out; - list_add_tail (&brickinfo->brick_list, &volinfo->bricks); - brick = strtok_r (NULL, " \n", &saveptr); - i++; - } - - ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); - if (ret) { - *op_errstr = gf_strdup ("Failed to store the Volume information"); - goto out; - } - - ret = glusterd_create_volfiles_and_notify_services (volinfo); - if (ret) { - *op_errstr = gf_strdup ("Failed to create volume files"); - goto out; - } - - ret = glusterd_volume_compute_cksum (volinfo); - if (ret) { - *op_errstr = gf_strdup ("Failed to compute checksum of volume"); + goto out; + } + } else { + ret = dict_set_dynstr(volinfo->dict, key, value); + if (ret) goto out; } - volinfo->defrag_status = 0; - list_add_tail (&volinfo->vol_list, &priv->volumes); -out: - if (free_ptr) - GF_FREE(free_ptr); - - return ret; -} - -static int -glusterd_op_add_brick (dict_t *dict, char **op_errstr) -{ - int ret = 0; - char *volname = NULL; - glusterd_conf_t *priv = NULL; - glusterd_volinfo_t *volinfo = NULL; - xlator_t *this = NULL; - char *bricks = NULL; - int32_t count = 0; - - this = THIS; - GF_ASSERT (this); - - priv = this->private; - GF_ASSERT (priv); - - ret = dict_get_str (dict, "volname", &volname); - - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); - goto out; + if (key_fixed) { + GF_FREE(key_fixed); + key_fixed = NULL; } + } - ret = glusterd_volinfo_find (volname, &volinfo); + if (count == 1) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_OPTIONS_GIVEN, + "No options received "); + ret = -1; + goto out; + } + /* Update the cluster op-version before regenerating volfiles so that + * correct volfiles are generated + */ + if (new_op_version > priv->op_version) { + priv->op_version = new_op_version; + ret = glusterd_store_global_info(this); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to allocate memory"); - goto out; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OP_VERS_STORE_FAIL, + "Failed to store op-version"); + goto out; } + } + if (!global_opts_set) { + gd_update_volume_op_versions(volinfo); - ret = dict_get_int32 (dict, "count", &count); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get count"); + if (!volinfo->is_snap_volume) { + svc = &(volinfo->snapd.svc); + ret = svc->manager(svc, volinfo, PROC_START_NO_WAIT); + if (ret) goto out; } + svc = &(volinfo->gfproxyd.svc); + ret = svc->reconfigure(volinfo); + if (ret) + goto out; + svc = &(volinfo->shd.svc); + ret = svc->reconfigure(volinfo); + if (ret) + goto out; - ret = dict_get_str (dict, "bricks", &bricks); + ret = glusterd_create_volfiles_and_notify_services(volinfo); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get bricks"); - goto out; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL, + "Unable to create volfile for" + " 'volume set'"); + ret = -1; + goto out; } - ret = glusterd_op_perform_add_bricks (volinfo, count, bricks); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to add bricks"); - goto out; - } - - volinfo->defrag_status = 0; - - ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); + ret = glusterd_store_volinfo(volinfo, + GLUSTERD_VOLINFO_VER_AC_INCREMENT); if (ret) - goto out; - - if (GLUSTERD_STATUS_STARTED == volinfo->status) - ret = glusterd_check_generate_start_nfs (); - -out: - return ret; -} - -static int -rb_regenerate_volfiles (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo, - int32_t pump_needed) -{ - dict_t *dict = NULL; - int ret = 0; + goto out; - dict = volinfo->dict; - - gf_log ("", GF_LOG_DEBUG, - "attempting to set pump value=%d", pump_needed); - - ret = dict_set_int32 (dict, "enable-pump", pump_needed); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "could not dict_set enable-pump"); + if (GLUSTERD_STATUS_STARTED == volinfo->status) { + ret = glusterd_svcs_reconfigure(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_RESTART_FAIL, + "Unable to restart services"); goto out; + } } - ret = glusterd_create_rb_volfiles (volinfo, brickinfo); - -out: - return ret; -} - -static int -rb_src_brick_restart (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *src_brickinfo, - int activate_pump) -{ - int ret = 0; + } else { + cds_list_for_each_entry(voliter, &priv->volumes, vol_list) + { + volinfo = voliter; + gd_update_volume_op_versions(volinfo); - gf_log ("", GF_LOG_DEBUG, - "Attempting to kill src"); + if (!volinfo->is_snap_volume) { + svc = &(volinfo->snapd.svc); + ret = svc->manager(svc, volinfo, PROC_START_NO_WAIT); + if (ret) + goto out; + } - ret = glusterd_nfs_server_stop (); + svc = &(volinfo->gfproxyd.svc); + ret = svc->reconfigure(volinfo); + if (ret) + goto out; - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to stop nfs, ret: %d", - ret); - } + svc = &(volinfo->shd.svc); + ret = svc->reconfigure(volinfo); + if (ret) + goto out; - ret = glusterd_volume_stop_glusterfs (volinfo, src_brickinfo); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to stop " - "glusterfs, ret: %d", ret); + ret = glusterd_create_volfiles_and_notify_services(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL, + "Unable to create volfile for" + " 'volume set'"); + ret = -1; goto out; - } + } - glusterd_delete_volfile (volinfo, src_brickinfo); + ret = glusterd_store_volinfo(volinfo, + GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) + goto out; - if (activate_pump) { - ret = rb_regenerate_volfiles (volinfo, src_brickinfo, 1); + if (GLUSTERD_STATUS_STARTED == volinfo->status) { + ret = glusterd_svcs_reconfigure(volinfo); if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Could not regenerate volfiles with pump"); - goto out; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_RESTART_FAIL, + "Unable to restart services"); + goto out; } - } else { - ret = rb_regenerate_volfiles (volinfo, src_brickinfo, 0); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Could not regenerate volfiles without pump"); - goto out; - } - - } - - sleep (2); - ret = glusterd_volume_start_glusterfs (volinfo, src_brickinfo); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to start " - "glusterfs, ret: %d", ret); - goto out; + } } + } out: - ret = glusterd_nfs_server_start (); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to start nfs, ret: %d", - ret); - } - return ret; + GF_FREE(key_fixed); + gf_msg_debug(this->name, 0, "returning %d", ret); + if (quorum_action) + glusterd_do_quorum_action(); + return ret; } static int -rb_send_xattr_command (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *src_brickinfo, - glusterd_brickinfo_t *dst_brickinfo, - const char *xattr_key, - const char *value) -{ - glusterd_conf_t *priv = NULL; - char mount_point_path[PATH_MAX] = {0,}; - struct stat buf; - int ret = -1; - - priv = THIS->private; - - snprintf (mount_point_path, PATH_MAX, "%s/vols/%s/%s", - priv->workdir, volinfo->volname, - RB_CLIENT_MOUNTPOINT); - - ret = stat (mount_point_path, &buf); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "stat failed. Could not send " - " %s command", xattr_key); - goto out; - } - - ret = sys_lsetxattr (mount_point_path, xattr_key, - value, - strlen (value) + 1, - 0); - - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "setxattr failed"); - goto out; - } - +glusterd_op_sync_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict) +{ + int ret = -1; + char *volname = NULL; + char *hostname = NULL; + char msg[2048] = { + 0, + }; + int count = 1; + int vol_count = 0; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + ret = dict_get_strn(dict, "hostname", SLEN("hostname"), &hostname); + if (ret) { + snprintf(msg, sizeof(msg), + "hostname couldn't be " + "retrieved from msg"); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=hostname", NULL); + *op_errstr = gf_strdup(msg); + goto out; + } + + if (!gf_is_local_addr(hostname)) { ret = 0; + goto out; + } -out: - return ret; -} - -static int -rb_spawn_dst_brick (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo) -{ - glusterd_conf_t *priv = NULL; - char cmd_str[8192] = {0,}; - int ret = -1; - int32_t port = 0; - - priv = THIS->private; - - port = pmap_registry_alloc (THIS); - brickinfo->port = port; - - GF_ASSERT (port); - - snprintf (cmd_str, 8192, - "%s/sbin/glusterfs -f %s/vols/%s/%s -p %s/vols/%s/%s " - "--xlator-option src-server.listen-port=%d", - GFS_PREFIX, priv->workdir, volinfo->volname, - RB_DSTBRICKVOL_FILENAME, - priv->workdir, volinfo->volname, - RB_DSTBRICK_PIDFILE, - port); - - ret = gf_system (cmd_str); + // volname is not present in case of sync all + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (!ret) { + ret = glusterd_volinfo_find(volname, &volinfo); if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Could not start glusterfs"); - goto out; + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, + "Volume with name: %s " + "not exists", + volname); + goto out; } + } - gf_log ("", GF_LOG_DEBUG, - "Successfully started glusterfs: brick=%s:%s", - brickinfo->hostname, brickinfo->path); - + if (!rsp_dict) { + // this should happen only on source + gf_smsg(this->name, GF_LOG_INFO, errno, GD_MSG_INVALID_ARGUMENT, NULL); ret = 0; + goto out; + } -out: - return ret; -} - -static int -rb_spawn_glusterfs_client (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo) -{ - glusterd_conf_t *priv = NULL; - char cmd_str[8192] = {0,}; - struct stat buf; - int ret = -1; - - priv = THIS->private; - - snprintf (cmd_str, 4096, - "%s/sbin/glusterfs -f %s/vols/%s/%s %s/vols/%s/%s", - GFS_PREFIX, priv->workdir, volinfo->volname, - RB_CLIENTVOL_FILENAME, - priv->workdir, volinfo->volname, - RB_CLIENT_MOUNTPOINT); - - ret = gf_system (cmd_str); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Could not start glusterfs"); + if (volname) { + ret = glusterd_add_volume_to_dict(volinfo, rsp_dict, 1, "volume"); + if (ret) + goto out; + vol_count = 1; + } else { + cds_list_for_each_entry(volinfo, &priv->volumes, vol_list) + { + ret = glusterd_add_volume_to_dict(volinfo, rsp_dict, count, + "volume"); + if (ret) goto out; - } - - gf_log ("", GF_LOG_DEBUG, - "Successfully started glusterfs: brick=%s:%s", - brickinfo->hostname, brickinfo->path); - - memset (cmd_str, 0, sizeof (cmd_str)); - - snprintf (cmd_str, 4096, "%s/vols/%s/%s", - priv->workdir, volinfo->volname, - RB_CLIENT_MOUNTPOINT); - - ret = stat (cmd_str, &buf); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "stat on mountpoint failed"); - goto out; - } - gf_log ("", GF_LOG_DEBUG, - "stat on mountpoint succeeded"); - - ret = 0; + vol_count = count++; + } + } + ret = dict_set_int32n(rsp_dict, "count", SLEN("count"), vol_count); out: - return ret; -} + gf_msg_debug("glusterd", 0, "Returning %d", ret); -static const char *client_volfile_str = "volume mnt-client\n" - " type protocol/client\n" - " option remote-host %s\n" - " option remote-subvolume %s\n" - " option remote-port %d\n" - "end-volume\n" - "volume mnt-wb\n" - " type performance/write-behind\n" - " subvolumes mnt-client\n" - "end-volume\n"; + return ret; +} static int -rb_generate_client_volfile (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *src_brickinfo) -{ - glusterd_conf_t *priv = NULL; - FILE *file = NULL; - char filename[PATH_MAX]; - int ret = -1; - - priv = THIS->private; - - gf_log ("", GF_LOG_DEBUG, - "Creating volfile"); - - snprintf (filename, PATH_MAX, "%s/vols/%s/%s", - priv->workdir, volinfo->volname, - RB_CLIENTVOL_FILENAME); - - file = fopen (filename, "w+"); - if (!file) { - gf_log ("", GF_LOG_DEBUG, - "Open of volfile failed"); - ret = -1; - goto out; - } - - GF_ASSERT (src_brickinfo->port); - - fprintf (file, client_volfile_str, src_brickinfo->hostname, - src_brickinfo->path, src_brickinfo->port); - - fclose (file); - - ret = 0; - +glusterd_add_profile_volume_options(glusterd_volinfo_t *volinfo) +{ + int ret = -1; + + GF_ASSERT(volinfo); + + ret = dict_set_nstrn(volinfo->dict, VKEY_DIAG_LAT_MEASUREMENT, + SLEN(VKEY_DIAG_LAT_MEASUREMENT), "on", SLEN("on")); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "failed to set the volume %s " + "option %s value %s", + volinfo->volname, VKEY_DIAG_LAT_MEASUREMENT, "on"); + goto out; + } + + ret = dict_set_nstrn(volinfo->dict, VKEY_DIAG_CNT_FOP_HITS, + SLEN(VKEY_DIAG_CNT_FOP_HITS), "on", SLEN("on")); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "failed to set the volume %s " + "option %s value %s", + volinfo->volname, VKEY_DIAG_CNT_FOP_HITS, "on"); + goto out; + } out: - return ret; + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; } -static const char *dst_brick_volfile_str = "volume src-posix\n" - " type storage/posix\n" - " option directory %s\n" - "end-volume\n" - "volume %s\n" - " type features/locks\n" - " subvolumes src-posix\n" - "end-volume\n" - "volume src-server\n" - " type protocol/server\n" - " option auth.addr.%s.allow *\n" - " option transport-type tcp\n" - " subvolumes %s\n" - "end-volume\n"; - -static int -rb_generate_dst_brick_volfile (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *dst_brickinfo) +static void +glusterd_remove_profile_volume_options(glusterd_volinfo_t *volinfo) { - glusterd_conf_t *priv = NULL; - FILE *file = NULL; - char filename[PATH_MAX]; - int ret = -1; - - priv = THIS->private; + GF_ASSERT(volinfo); - gf_log ("", GF_LOG_DEBUG, - "Creating volfile"); - - snprintf (filename, PATH_MAX, "%s/vols/%s/%s", - priv->workdir, volinfo->volname, - RB_DSTBRICKVOL_FILENAME); + dict_del_sizen(volinfo->dict, VKEY_DIAG_LAT_MEASUREMENT); + dict_del_sizen(volinfo->dict, VKEY_DIAG_CNT_FOP_HITS); +} - file = fopen (filename, "w+"); - if (!file) { - gf_log ("", GF_LOG_DEBUG, - "Open of volfile failed"); - ret = -1; +int +glusterd_op_stats_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict) +{ + int ret = -1; + char *volname = NULL; + char msg[2048] = { + 0, + }; + glusterd_volinfo_t *volinfo = NULL; + int32_t stats_op = GF_CLI_STATS_NONE; + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "volume name get failed"); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + snprintf(msg, sizeof(msg), "Volume %s does not exists", volname); + + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, "%s", msg); + goto out; + } + + ret = dict_get_int32n(dict, "op", SLEN("op"), &stats_op); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "volume profile op get failed"); + goto out; + } + + switch (stats_op) { + case GF_CLI_STATS_START: + ret = glusterd_add_profile_volume_options(volinfo); + if (ret) goto out; - } + break; + case GF_CLI_STATS_STOP: + glusterd_remove_profile_volume_options(volinfo); + break; + case GF_CLI_STATS_INFO: + case GF_CLI_STATS_TOP: + // info is already collected in brick op. + // just goto out; + ret = 0; + goto out; + break; + default: + GF_ASSERT(0); + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY, + "Invalid profile op: %d", stats_op); + ret = -1; + goto out; + break; + } + ret = glusterd_create_volfiles_and_notify_services(volinfo); + + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL, + "Unable to create volfile for" + " 'volume set'"); + ret = -1; + goto out; + } - fprintf (file, dst_brick_volfile_str, dst_brickinfo->path, - dst_brickinfo->path, dst_brickinfo->path, - dst_brickinfo->path); + ret = glusterd_store_volinfo(volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) + goto out; - fclose (file); + if (GLUSTERD_STATUS_STARTED == volinfo->status) { + ret = glusterd_svcs_reconfigure(volinfo); + if (ret) + goto out; + } - ret = 0; + ret = 0; out: - return ret; -} - -static int -rb_mountpoint_mkdir (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *src_brickinfo) -{ - glusterd_conf_t *priv = NULL; - char mount_point_path[PATH_MAX] = {0,}; - int ret = -1; - - priv = THIS->private; + gf_msg_debug("glusterd", 0, "Returning %d", ret); - snprintf (mount_point_path, PATH_MAX, "%s/vols/%s/%s", - priv->workdir, volinfo->volname, - RB_CLIENT_MOUNTPOINT); - - ret = mkdir (mount_point_path, 0777); - if (ret && (errno != EEXIST)) { - gf_log ("", GF_LOG_DEBUG, "mkdir failed, errno: %d", - errno); - goto out; - } - - ret = 0; - -out: - return ret; + return ret; } static int -rb_mountpoint_rmdir (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *src_brickinfo) -{ - glusterd_conf_t *priv = NULL; - char mount_point_path[PATH_MAX] = {0,}; - int ret = -1; - - priv = THIS->private; - - snprintf (mount_point_path, PATH_MAX, "%s/vols/%s/%s", - priv->workdir, volinfo->volname, - RB_CLIENT_MOUNTPOINT); - - ret = rmdir (mount_point_path); - if (ret) { - gf_log ("", GF_LOG_DEBUG, "rmdir failed"); - goto out; - } - - ret = 0; +_add_remove_bricks_to_dict(dict_t *dict, glusterd_volinfo_t *volinfo, + char *prefix) +{ + int ret = -1; + int count = 0; + int i = 0; + char brick_key[16] = { + 0, + }; + char dict_key[64] = { + /* dict_key is small as prefix is up to 32 chars */ + 0, + }; + int keylen; + char *brick = NULL; + xlator_t *this = NULL; + + GF_ASSERT(dict); + GF_ASSERT(volinfo); + GF_ASSERT(prefix); + + this = THIS; + GF_ASSERT(this); + + ret = dict_get_int32n(volinfo->rebal.dict, "count", SLEN("count"), &count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get brick count"); + goto out; + } + + keylen = snprintf(dict_key, sizeof(dict_key), "%s.count", prefix); + ret = dict_set_int32n(dict, dict_key, keylen, count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set brick count in dict"); + goto out; + } + + for (i = 1; i <= count; i++) { + keylen = snprintf(brick_key, sizeof(brick_key), "brick%d", i); + + ret = dict_get_strn(volinfo->rebal.dict, brick_key, keylen, &brick); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get %s", brick_key); + goto out; + } + + keylen = snprintf(dict_key, sizeof(dict_key), "%s.%s", prefix, + brick_key); + if ((keylen < 0) || (keylen >= sizeof(dict_key))) { + ret = -1; + goto out; + } + ret = dict_set_strn(dict, dict_key, keylen, brick); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to add brick to dict"); + goto out; + } + brick = NULL; + } out: - return ret; + return ret; } +/* This adds the respective task-id and all available parameters of a task into + * a dictionary + */ static int -rb_destroy_maintenance_client (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *src_brickinfo) +_add_task_to_dict(dict_t *dict, glusterd_volinfo_t *volinfo, int op, int index) { - glusterd_conf_t *priv = NULL; - char cmd_str[8192] = {0,}; - char filename[PATH_MAX] = {0,}; - struct stat buf; - char mount_point_path[PATH_MAX] = {0,}; - int ret = -1; - - priv = THIS->private; - - snprintf (mount_point_path, PATH_MAX, "%s/vols/%s/%s", - priv->workdir, volinfo->volname, - RB_CLIENT_MOUNTPOINT); - - ret = stat (mount_point_path, &buf); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "stat failed. Cannot destroy maintenance " - "client"); - goto out; - } + int ret = -1; + char key[32] = { + 0, + }; + int keylen; + char *uuid_str = NULL; + int status = 0; + xlator_t *this = NULL; - snprintf (cmd_str, 8192, "/bin/umount -f %s/vols/%s/%s", - priv->workdir, volinfo->volname, - RB_CLIENT_MOUNTPOINT); - - ret = gf_system (cmd_str); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "umount failed on maintenance client"); - goto out; - } - - ret = rb_mountpoint_rmdir (volinfo, src_brickinfo); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "rmdir of mountpoint failed"); - goto out; - } + GF_ASSERT(dict); + GF_ASSERT(volinfo); - snprintf (filename, PATH_MAX, "%s/vols/%s/%s", - priv->workdir, volinfo->volname, - RB_CLIENTVOL_FILENAME); + this = THIS; + GF_ASSERT(this); - ret = unlink (filename); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "unlink failed"); - goto out; - } + switch (op) { + case GD_OP_REMOVE_BRICK: + snprintf(key, sizeof(key), "task%d", index); + ret = _add_remove_bricks_to_dict(dict, volinfo, key); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_ADD_REMOVE_BRICK_FAIL, + "Failed to add remove bricks to dict"); + goto out; + } + case GD_OP_REBALANCE: + uuid_str = gf_strdup(uuid_utoa(volinfo->rebal.rebalance_id)); + status = volinfo->rebal.defrag_status; + break; - ret = 0; + default: + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_TASK_ID, + "%s operation doesn't have a" + " task_id", + gd_op_list[op]); + goto out; + } + + keylen = snprintf(key, sizeof(key), "task%d.type", index); + ret = dict_set_strn(dict, key, keylen, (char *)gd_op_list[op]); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Error setting task type in dict"); + goto out; + } + + keylen = snprintf(key, sizeof(key), "task%d.id", index); + + if (!uuid_str) + goto out; + ret = dict_set_dynstrn(dict, key, keylen, uuid_str); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Error setting task id in dict"); + goto out; + } + uuid_str = NULL; + + keylen = snprintf(key, sizeof(key), "task%d.status", index); + ret = dict_set_int32n(dict, key, keylen, status); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Error setting task status in dict"); + goto out; + } out: - return ret; + if (uuid_str) + GF_FREE(uuid_str); + return ret; } static int -rb_spawn_maintenance_client (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *src_brickinfo) +glusterd_aggregate_task_status(dict_t *rsp_dict, glusterd_volinfo_t *volinfo) { - int ret = -1; + int ret = -1; + int tasks = 0; + xlator_t *this = NULL; - ret = rb_generate_client_volfile (volinfo, src_brickinfo); - if (ret) { - gf_log ("", GF_LOG_DEBUG, "Unable to generate client " - "volfile"); - goto out; - } + this = THIS; + GF_ASSERT(this); - ret = rb_mountpoint_mkdir (volinfo, src_brickinfo); - if (ret) { - gf_log ("", GF_LOG_DEBUG, "Unable to mkdir " - "mountpoint"); - goto out; - } + if (!gf_uuid_is_null(volinfo->rebal.rebalance_id)) { + ret = _add_task_to_dict(rsp_dict, volinfo, volinfo->rebal.op, tasks); - ret = rb_spawn_glusterfs_client (volinfo, src_brickinfo); if (ret) { - gf_log ("", GF_LOG_DEBUG, "Unable to start glusterfs"); - goto out; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to add task details to dict"); + goto out; } - - ret = 0; + tasks++; + } + ret = dict_set_int32n(rsp_dict, "tasks", SLEN("tasks"), tasks); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Error setting tasks count in dict"); + goto out; + } out: - return ret; + return ret; } static int -rb_spawn_destination_brick (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *dst_brickinfo) - -{ - int ret = -1; - - ret = rb_generate_dst_brick_volfile (volinfo, dst_brickinfo); - if (ret) { - gf_log ("", GF_LOG_DEBUG, "Unable to generate client " - "volfile"); - goto out; - } - - ret = rb_spawn_dst_brick (volinfo, dst_brickinfo); - if (ret) { - gf_log ("", GF_LOG_DEBUG, "Unable to start glusterfs"); - goto out; - } - - ret = 0; -out: - return ret; -} - -static int -rb_do_operation_start (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *src_brickinfo, - glusterd_brickinfo_t *dst_brickinfo) -{ - char start_value[8192] = {0,}; - int ret = -1; - - - gf_log ("", GF_LOG_DEBUG, - "replace-brick sending start xattr"); - - ret = rb_spawn_maintenance_client (volinfo, src_brickinfo); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Could not spawn maintenance " - "client"); - goto out; - } +glusterd_op_status_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict) +{ + int ret = -1; + int node_count = 0; + int brick_index = -1; + int other_count = 0; + int other_index = 0; + uint32_t cmd = 0; + char *volname = NULL; + char *brick = NULL; + xlator_t *this = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_conf_t *priv = NULL; + dict_t *vol_opts = NULL; +#ifdef BUILD_GNFS + gf_boolean_t nfs_disabled = _gf_false; +#endif + gf_boolean_t shd_enabled = _gf_false; + gf_boolean_t origin_glusterd = _gf_false; + int snapd_enabled, bitrot_enabled, volume_quota_enabled; - gf_log ("", GF_LOG_DEBUG, - "mounted the replace brick client"); + this = THIS; + GF_ASSERT(this); + priv = this->private; - snprintf (start_value, 8192, "%s:%s:%d", - dst_brickinfo->hostname, - dst_brickinfo->path, - dst_brickinfo->port); + GF_ASSERT(priv); + GF_ASSERT(dict); - ret = rb_send_xattr_command (volinfo, src_brickinfo, - dst_brickinfo, RB_PUMP_START_CMD, - start_value); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Failed to send command to pump"); - } + origin_glusterd = is_origin_glusterd(dict); - ret = rb_destroy_maintenance_client (volinfo, src_brickinfo); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Failed to destroy maintenance " - "client"); - goto out; - } + ret = dict_get_uint32(dict, "cmd", &cmd); + if (ret) + goto out; - gf_log ("", GF_LOG_DEBUG, - "unmounted the replace brick client"); + if (origin_glusterd) { ret = 0; + if ((cmd & GF_CLI_STATUS_ALL)) { + ret = glusterd_get_all_volnames(rsp_dict); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLNAMES_GET_FAIL, + "failed to get all volume " + "names for status"); + } + } + + ret = dict_set_uint32(rsp_dict, "cmd", cmd); + if (ret) + goto out; + + if (cmd & GF_CLI_STATUS_ALL) + goto out; + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) + goto out; + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, + "Volume with name: %s " + "does not exist", + volname); + goto out; + } + vol_opts = volinfo->dict; + + if ((cmd & GF_CLI_STATUS_QUOTAD) != 0) { + ret = glusterd_add_node_to_dict(priv->quotad_svc.name, rsp_dict, 0, + vol_opts); + if (ret) + goto out; + other_count++; + node_count++; +#ifdef BUILD_GNFS + } else if ((cmd & GF_CLI_STATUS_NFS) != 0) { + ret = glusterd_add_node_to_dict(priv->nfs_svc.name, rsp_dict, 0, + vol_opts); + if (ret) + goto out; + other_count++; + node_count++; +#endif + } else if ((cmd & GF_CLI_STATUS_BITD) != 0) { + ret = glusterd_add_node_to_dict(priv->bitd_svc.name, rsp_dict, 0, + vol_opts); + if (ret) + goto out; + other_count++; + node_count++; + } else if ((cmd & GF_CLI_STATUS_SCRUB) != 0) { + ret = glusterd_add_node_to_dict(priv->scrub_svc.name, rsp_dict, 0, + vol_opts); + if (ret) + goto out; + other_count++; + node_count++; + } else if ((cmd & GF_CLI_STATUS_SNAPD) != 0) { + ret = glusterd_add_snapd_to_dict(volinfo, rsp_dict, other_index); + if (ret) + goto out; + other_count++; + node_count++; + } else if ((cmd & GF_CLI_STATUS_SHD) != 0) { + ret = glusterd_add_shd_to_dict(volinfo, rsp_dict, other_index); + if (ret) + goto out; + other_count++; + node_count++; + } else if ((cmd & GF_CLI_STATUS_BRICK) != 0) { + ret = dict_get_strn(dict, "brick", SLEN("brick"), &brick); + if (ret) + goto out; -out: - return ret; -} - -static int -rb_do_operation_pause (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *src_brickinfo, - glusterd_brickinfo_t *dst_brickinfo) -{ - int ret = -1; - - gf_log ("", GF_LOG_INFO, - "replace-brick send pause xattr"); - - ret = rb_spawn_maintenance_client (volinfo, src_brickinfo); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Could not spawn maintenance " - "client"); - goto out; - } - - gf_log ("", GF_LOG_DEBUG, - "mounted the replace brick client"); + ret = glusterd_volume_brickinfo_get_by_brick(brick, volinfo, &brickinfo, + _gf_false); + if (ret) + goto out; + + if (gf_uuid_compare(brickinfo->uuid, MY_UUID)) + goto out; + + glusterd_add_brick_to_dict(volinfo, brickinfo, rsp_dict, ++brick_index); + if (cmd & GF_CLI_STATUS_DETAIL) + glusterd_add_brick_detail_to_dict(volinfo, brickinfo, rsp_dict, + brick_index); + node_count++; + + } else if ((cmd & GF_CLI_STATUS_TASKS) != 0) { + ret = glusterd_aggregate_task_status(rsp_dict, volinfo); + goto out; + + } else { + snapd_enabled = glusterd_is_snapd_enabled(volinfo); + shd_enabled = gd_is_self_heal_enabled(volinfo, vol_opts); +#ifdef BUILD_GNFS + nfs_disabled = dict_get_str_boolean(vol_opts, NFS_DISABLE_MAP_KEY, + _gf_false); +#endif + volume_quota_enabled = glusterd_is_volume_quota_enabled(volinfo); + bitrot_enabled = glusterd_is_bitrot_enabled(volinfo); - ret = rb_send_xattr_command (volinfo, src_brickinfo, - dst_brickinfo, RB_PUMP_PAUSE_CMD, - "jargon"); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Failed to send command to pump"); + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + brick_index++; + if (gf_uuid_compare(brickinfo->uuid, MY_UUID)) + continue; - } + glusterd_add_brick_to_dict(volinfo, brickinfo, rsp_dict, + brick_index); - ret = rb_destroy_maintenance_client (volinfo, src_brickinfo); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Failed to destroy maintenance " - "client"); - goto out; + if (cmd & GF_CLI_STATUS_DETAIL) { + glusterd_add_brick_detail_to_dict(volinfo, brickinfo, rsp_dict, + brick_index); + } + node_count++; } - - gf_log ("", GF_LOG_DEBUG, - "unmounted the replace brick client"); - - ret = 0; + if ((cmd & GF_CLI_STATUS_MASK) == GF_CLI_STATUS_NONE) { + other_index = brick_index + 1; + if (snapd_enabled) { + ret = glusterd_add_snapd_to_dict(volinfo, rsp_dict, + other_index); + if (ret) + goto out; + other_count++; + other_index++; + node_count++; + } + + if (glusterd_is_shd_compatible_volume(volinfo)) { + if (shd_enabled) { + ret = glusterd_add_shd_to_dict(volinfo, rsp_dict, + other_index); + if (ret) + goto out; + other_count++; + other_index++; + node_count++; + } + } +#ifdef BUILD_GNFS + if (!nfs_disabled) { + ret = glusterd_add_node_to_dict(priv->nfs_svc.name, rsp_dict, + other_index, vol_opts); + if (ret) + goto out; + other_index++; + other_count++; + node_count++; + } +#endif + if (volume_quota_enabled) { + ret = glusterd_add_node_to_dict(priv->quotad_svc.name, rsp_dict, + other_index, vol_opts); + if (ret) + goto out; + other_count++; + node_count++; + other_index++; + } + + if (bitrot_enabled) { + ret = glusterd_add_node_to_dict(priv->bitd_svc.name, rsp_dict, + other_index, vol_opts); + if (ret) + goto out; + other_count++; + node_count++; + other_index++; + /* For handling scrub status. Scrub daemon will be + * running automatically when bitrot is enable */ + ret = glusterd_add_node_to_dict(priv->scrub_svc.name, rsp_dict, + other_index, vol_opts); + if (ret) + goto out; + other_count++; + node_count++; + } + } + } + + ret = dict_set_int32n(rsp_dict, "type", SLEN("type"), volinfo->type); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=type", NULL); + goto out; + } + + ret = dict_set_int32n(rsp_dict, "brick-index-max", SLEN("brick-index-max"), + brick_index); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Key=brick-index-max", NULL); + goto out; + } + ret = dict_set_int32n(rsp_dict, "other-count", SLEN("other-count"), + other_count); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Key=other-count", NULL); + goto out; + } + ret = dict_set_int32n(rsp_dict, "count", SLEN("count"), node_count); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Key=count", NULL); + goto out; + } + + /* Active tasks */ + /* Tasks are added only for normal volume status request for either a + * single volume or all volumes + */ + if (!glusterd_status_has_tasks(cmd)) + goto out; + + ret = glusterd_aggregate_task_status(rsp_dict, volinfo); + if (ret) + goto out; + ret = 0; out: - if (!glusterd_is_local_addr (src_brickinfo->hostname)) { - ret = rb_src_brick_restart (volinfo, src_brickinfo, - 0); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Could not restart src-brick"); - } - } - return ret; -} - -static int -rb_kill_destination_brick (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *dst_brickinfo) -{ - glusterd_conf_t *priv = NULL; - char pidfile[PATH_MAX] = {0,}; - - priv = THIS->private; + gf_msg_debug(this->name, 0, "Returning %d", ret); - snprintf (pidfile, PATH_MAX, "%s/vols/%s/%s", - priv->workdir, volinfo->volname, - RB_DSTBRICK_PIDFILE); - - return glusterd_service_stop ("brick", pidfile, SIGTERM, _gf_true); + return ret; } static int -rb_do_operation_abort (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *src_brickinfo, - glusterd_brickinfo_t *dst_brickinfo) +glusterd_op_ac_none(glusterd_op_sm_event_t *event, void *ctx) { - int ret = -1; - - gf_log ("", GF_LOG_DEBUG, - "replace-brick sending abort xattr"); - - ret = rb_spawn_maintenance_client (volinfo, src_brickinfo); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Could not spawn maintenance " - "client"); - goto out; - } - - gf_log ("", GF_LOG_DEBUG, - "mounted the replace brick client"); - - ret = rb_send_xattr_command (volinfo, src_brickinfo, - dst_brickinfo, RB_PUMP_ABORT_CMD, - "jargon"); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Failed to send command to pump"); - } - - ret = rb_destroy_maintenance_client (volinfo, src_brickinfo); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Failed to destroy maintenance " - "client"); - goto out; - } + int ret = 0; - gf_log ("", GF_LOG_DEBUG, - "unmounted the replace brick client"); + gf_msg_debug(THIS->name, 0, "Returning with %d", ret); - ret = 0; - -out: - if (!glusterd_is_local_addr (src_brickinfo->hostname)) { - ret = rb_src_brick_restart (volinfo, src_brickinfo, - 0); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Could not restart src-brick"); - } - } - return ret; + return ret; } - static int -rb_get_xattr_command (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *src_brickinfo, - glusterd_brickinfo_t *dst_brickinfo, - const char *xattr_key, - char *value) +glusterd_op_sm_locking_failed(uuid_t *txn_id) { - glusterd_conf_t *priv = NULL; - char mount_point_path[PATH_MAX] = {0,}; - struct stat buf; - int ret = -1; - - priv = THIS->private; - - snprintf (mount_point_path, PATH_MAX, "%s/vols/%s/%s", - priv->workdir, volinfo->volname, - RB_CLIENT_MOUNTPOINT); - - ret = stat (mount_point_path, &buf); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "stat failed. Could not send " - " %s command", xattr_key); - goto out; - } - - ret = lgetxattr (mount_point_path, xattr_key, - value, - 8192); - - if (ret < 0) { - gf_log ("", GF_LOG_DEBUG, - "getxattr failed"); - goto out; - } + int ret = -1; - ret = 0; + opinfo.op_ret = -1; + opinfo.op_errstr = gf_strdup("locking failed for one of the peer."); -out: - return ret; + ret = glusterd_set_txn_opinfo(txn_id, &opinfo); + if (ret) + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL, + "Unable to set " + "transaction's opinfo"); + /* Inject a reject event such that unlocking gets triggered right away*/ + ret = glusterd_op_sm_inject_event(GD_OP_EVENT_RCVD_RJT, txn_id, NULL); + + return ret; } static int -rb_do_operation_status (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *src_brickinfo, - glusterd_brickinfo_t *dst_brickinfo) -{ - char status[2048] = {0,}; - char *status_reply = NULL; - dict_t *ctx = NULL; - int ret = 0; - gf_boolean_t origin = _gf_false; - - ctx = glusterd_op_get_ctx (GD_OP_REPLACE_BRICK); - if (!ctx) { - gf_log ("", GF_LOG_ERROR, - "Operation Context is not present"); - goto out; - } - - origin = _gf_true; - - if (origin) { - ret = rb_spawn_maintenance_client (volinfo, src_brickinfo); +glusterd_op_ac_send_lock(glusterd_op_sm_event_t *event, void *ctx) +{ + int ret = 0; + rpc_clnt_procedure_t *proc = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + uint32_t pending_count = 0; + dict_t *dict = NULL; + + this = THIS; + priv = this->private; + GF_ASSERT(priv); + + RCU_READ_LOCK; + cds_list_for_each_entry_rcu(peerinfo, &priv->peers, uuid_list) + { + /* Only send requests to peers who were available before the + * transaction started + */ + if (peerinfo->generation > opinfo.txn_generation) + continue; + + if (!peerinfo->connected || !peerinfo->mgmt) + continue; + if ((peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) && + (glusterd_op_get_op() != GD_OP_SYNC_VOLUME)) + continue; + + /* Based on the op_version, acquire a cluster or mgmt_v3 lock */ + if (priv->op_version < GD_OP_VERSION_3_6_0) { + proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_CLUSTER_LOCK]; + if (proc->fn) { + ret = proc->fn(NULL, this, peerinfo); if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Could not spawn maintenance " - "client"); - goto out; - } - - gf_log ("", GF_LOG_DEBUG, - "mounted the replace brick client"); - - ret = rb_get_xattr_command (volinfo, src_brickinfo, - dst_brickinfo, RB_PUMP_STATUS_CMD, - status); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Failed to get status from pump"); - goto umount; - } - - gf_log ("", GF_LOG_DEBUG, - "pump status is %s", status); - - status_reply = gf_strdup (status); - if (!status_reply) { - gf_log ("", GF_LOG_ERROR, "Out of memory"); - ret = -1; - goto umount; - } + RCU_READ_UNLOCK; + gf_msg(this->name, GF_LOG_WARNING, 0, + GD_MSG_LOCK_REQ_SEND_FAIL, + "Failed to send lock request " + "for operation 'Volume %s' to " + "peer %s", + gd_op_list[opinfo.op], peerinfo->hostname); + goto out; + } + /* Mark the peer as locked*/ + peerinfo->locked = _gf_true; + pending_count++; + } + } else { + dict = glusterd_op_get_ctx(); + dict_ref(dict); - ret = dict_set_dynstr (ctx, "status-reply", - status_reply); + proc = &peerinfo->mgmt_v3->proctable[GLUSTERD_MGMT_V3_LOCK]; + if (proc->fn) { + ret = dict_set_static_ptr(dict, "peerinfo", peerinfo); if (ret) { - gf_log ("", GF_LOG_DEBUG, - "failed to set pump status in ctx"); - + RCU_READ_UNLOCK; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "failed to set peerinfo"); + dict_unref(dict); + goto out; } - umount: - ret = rb_destroy_maintenance_client (volinfo, src_brickinfo); + ret = proc->fn(NULL, this, dict); if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Failed to destroy maintenance " - "client"); - goto out; - } - } - - gf_log ("", GF_LOG_DEBUG, - "unmounted the replace brick client"); -out: - return ret; -} - -/* Set src-brick's port number to be used in the maintainance mount - * after all commit acks are received. - */ -static int -rb_update_srcbrick_port (glusterd_brickinfo_t *src_brickinfo, dict_t *rsp_dict, - dict_t *req_dict, int32_t replace_op) -{ - xlator_t *this = NULL; - dict_t *ctx = NULL; - int ret = 0; - int dict_ret = 0; - int src_port = 0; - - this = THIS; - - ctx = glusterd_op_get_ctx (GD_OP_REPLACE_BRICK); - if (ctx) { - dict_ret = dict_get_int32 (req_dict, "src-brick-port", &src_port); - if (src_port) - src_brickinfo->port = src_port; - } - - if (!glusterd_is_local_addr (src_brickinfo->hostname)) { - gf_log ("", GF_LOG_INFO, - "adding src-brick port no"); - - src_brickinfo->port = pmap_registry_search (this, - src_brickinfo->path, GF_PMAP_PORT_BRICKSERVER); - if (!src_brickinfo->port && - replace_op != GF_REPLACE_OP_COMMIT_FORCE ) { - gf_log ("", GF_LOG_ERROR, - "Src brick port not available"); - ret = -1; - goto out; - } - - if (rsp_dict) { - ret = dict_set_int32 (rsp_dict, "src-brick-port", src_brickinfo->port); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Could not set src-brick port no"); - goto out; - } - } - - ctx = glusterd_op_get_ctx (GD_OP_REPLACE_BRICK); - if (ctx) { - ret = dict_set_int32 (ctx, "src-brick-port", src_brickinfo->port); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Could not set src-brick port no"); - goto out; - } - } - - } + RCU_READ_UNLOCK; + gf_msg(this->name, GF_LOG_WARNING, 0, + GD_MSG_MGMTV3_LOCK_REQ_SEND_FAIL, + "Failed to send mgmt_v3 lock " + "request for operation " + "'Volume %s' to peer %s", + gd_op_list[opinfo.op], peerinfo->hostname); + dict_unref(dict); + goto out; + } + /* Mark the peer as locked*/ + peerinfo->locked = _gf_true; + pending_count++; + } + } + } + RCU_READ_UNLOCK; + + opinfo.pending_count = pending_count; + + ret = glusterd_set_txn_opinfo(&event->txn_id, &opinfo); + if (ret) + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL, + "Unable to set " + "transaction's opinfo"); + + if (!opinfo.pending_count) + ret = glusterd_op_sm_inject_all_acc(&event->txn_id); out: - return ret; - -} + if (ret) + ret = glusterd_op_sm_locking_failed(&event->txn_id); -static int -rb_update_dstbrick_port (glusterd_brickinfo_t *dst_brickinfo, dict_t *rsp_dict, - dict_t *req_dict, int32_t replace_op) -{ - dict_t *ctx = NULL; - int ret = 0; - int dict_ret = 0; - int dst_port = 0; - - ctx = glusterd_op_get_ctx (GD_OP_REPLACE_BRICK); - if (ctx) { - dict_ret = dict_get_int32 (req_dict, "dst-brick-port", &dst_port); - if (dst_port) - dst_brickinfo->port = dst_port; - - } - - if (!glusterd_is_local_addr (dst_brickinfo->hostname)) { - gf_log ("", GF_LOG_INFO, - "adding dst-brick port no"); - - if (rsp_dict) { - ret = dict_set_int32 (rsp_dict, "dst-brick-port", - dst_brickinfo->port); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Could not set dst-brick port no in rsp dict"); - goto out; - } - } - - ctx = glusterd_op_get_ctx (GD_OP_REPLACE_BRICK); - if (ctx) { - ret = dict_set_int32 (ctx, "dst-brick-port", - dst_brickinfo->port); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Could not set dst-brick port no"); - goto out; - } - } - } -out: - return ret; + gf_msg_debug(this->name, 0, "Returning with %d", ret); + return ret; } - - static int -glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) -{ - int ret = 0; - dict_t *ctx = NULL; - int replace_op = 0; - glusterd_volinfo_t *volinfo = NULL; - char *volname = NULL; - xlator_t *this = NULL; - glusterd_conf_t *priv = NULL; - char *src_brick = NULL; - char *dst_brick = NULL; - glusterd_brickinfo_t *src_brickinfo = NULL; - glusterd_brickinfo_t *dst_brickinfo = NULL; - - this = THIS; - GF_ASSERT (this); - - priv = this->private; - GF_ASSERT (priv); - - ret = dict_get_str (dict, "src-brick", &src_brick); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get src brick"); - goto out; - } - - gf_log (this->name, GF_LOG_DEBUG, - "src brick=%s", src_brick); - - ret = dict_get_str (dict, "dst-brick", &dst_brick); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get dst brick"); - goto out; - } - - gf_log (this->name, GF_LOG_DEBUG, - "dst brick=%s", dst_brick); - - ret = dict_get_str (dict, "volname", &volname); - - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); - goto out; - } - - ret = dict_get_int32 (dict, "operation", (int32_t *)&replace_op); - if (ret) { - gf_log (this->name, GF_LOG_DEBUG, - "dict_get on operation failed"); - goto out; - } - - ret = glusterd_volinfo_find (volname, &volinfo); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to allocate memory"); - goto out; - } - - ret = glusterd_volume_brickinfo_get_by_brick (src_brick, volinfo, &src_brickinfo); - if (ret) { - gf_log ("", GF_LOG_DEBUG, "Unable to get src-brickinfo"); - goto out; - } - - - ret = glusterd_get_rb_dst_brickinfo (volinfo, &dst_brickinfo); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get " - "replace brick destination brickinfo"); - goto out; - } - - ret = glusterd_resolve_brick (dst_brickinfo); - if (ret) { - gf_log ("", GF_LOG_DEBUG, "Unable to resolve dst-brickinfo"); - goto out; - } - - ret = rb_update_srcbrick_port (src_brickinfo, rsp_dict, - dict, replace_op); - if (ret) - goto out; - - if ((GF_REPLACE_OP_START != replace_op)) { - ret = rb_update_dstbrick_port (dst_brickinfo, rsp_dict, - dict, replace_op); - if (ret) - goto out; - } - - switch (replace_op) { - case GF_REPLACE_OP_START: - { - if (!glusterd_is_local_addr (dst_brickinfo->hostname)) { - gf_log ("", GF_LOG_INFO, - "I AM THE DESTINATION HOST"); - if (!glusterd_is_rb_paused (volinfo)) { - ret = rb_spawn_destination_brick (volinfo, dst_brickinfo); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Failed to spawn destination brick"); - goto out; - } - } else { - gf_log ("", GF_LOG_ERROR, "Replace brick is already " - "started=> no need to restart dst brick "); - } - } - - - if (!glusterd_is_local_addr (src_brickinfo->hostname)) { - ret = rb_src_brick_restart (volinfo, src_brickinfo, - 1); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Could not restart src-brick"); - goto out; - } - } - - if (!glusterd_is_local_addr (dst_brickinfo->hostname)) { - gf_log ("", GF_LOG_INFO, - "adding dst-brick port no"); - - ret = rb_update_dstbrick_port (dst_brickinfo, rsp_dict, - dict, replace_op); - if (ret) - goto out; - } - - glusterd_set_rb_status (volinfo, GF_RB_STATUS_STARTED); - break; - } - - case GF_REPLACE_OP_COMMIT: - case GF_REPLACE_OP_COMMIT_FORCE: - { - ret = dict_set_int32 (volinfo->dict, "enable-pump", 0); - gf_log ("", GF_LOG_DEBUG, - "Received commit - will be adding dst brick and " - "removing src brick"); - - if (!glusterd_is_local_addr (dst_brickinfo->hostname) && - replace_op != GF_REPLACE_OP_COMMIT_FORCE) { - gf_log ("", GF_LOG_INFO, - "I AM THE DESTINATION HOST"); - ret = rb_kill_destination_brick (volinfo, dst_brickinfo); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Failed to kill destination brick"); - goto out; - } - } - +glusterd_op_ac_send_unlock(glusterd_op_sm_event_t *event, void *ctx) +{ + int ret = 0; + rpc_clnt_procedure_t *proc = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + uint32_t pending_count = 0; + dict_t *dict = NULL; + + this = THIS; + priv = this->private; + GF_ASSERT(priv); + + RCU_READ_LOCK; + cds_list_for_each_entry_rcu(peerinfo, &priv->peers, uuid_list) + { + /* Only send requests to peers who were available before the + * transaction started + */ + if (peerinfo->generation > opinfo.txn_generation) + continue; + + if (!peerinfo->connected || !peerinfo->mgmt || !peerinfo->locked) + continue; + if ((peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) && + (glusterd_op_get_op() != GD_OP_SYNC_VOLUME)) + continue; + /* Based on the op_version, + * release the cluster or mgmt_v3 lock */ + if (priv->op_version < GD_OP_VERSION_3_6_0) { + proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_CLUSTER_UNLOCK]; + if (proc->fn) { + ret = proc->fn(NULL, this, peerinfo); if (ret) { - gf_log ("", GF_LOG_CRITICAL, - "Unable to cleanup dst brick"); - goto out; - } - + opinfo.op_errstr = gf_strdup( + "Unlocking failed for one of " + "the peer."); + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_CLUSTER_UNLOCK_FAILED, + "Unlocking failed for operation" + " volume %s on peer %s", + gd_op_list[opinfo.op], peerinfo->hostname); + continue; + } + pending_count++; + peerinfo->locked = _gf_false; + } + } else { + dict = glusterd_op_get_ctx(); + dict_ref(dict); - ret = glusterd_nfs_server_stop (); + proc = &peerinfo->mgmt_v3->proctable[GLUSTERD_MGMT_V3_UNLOCK]; + if (proc->fn) { + ret = dict_set_static_ptr(dict, "peerinfo", peerinfo); if (ret) { - gf_log ("", GF_LOG_ERROR, - "Unable to stop nfs server, ret: %d", ret); - } - - ret = glusterd_op_perform_replace_brick (volinfo, src_brick, - dst_brick); - if (ret) { - gf_log ("", GF_LOG_CRITICAL, "Unable to add " - "dst-brick: %s to volume: %s", - dst_brick, volinfo->volname); - (void) glusterd_check_generate_start_nfs (); - goto out; - } - - volinfo->defrag_status = 0; - - ret = glusterd_check_generate_start_nfs (); - if (ret) { - gf_log ("", GF_LOG_CRITICAL, - "Failed to generate nfs volume file"); - } - - ret = glusterd_store_volinfo (volinfo, - GLUSTERD_VOLINFO_VER_AC_INCREMENT); - - if (ret) - goto out; - - ret = glusterd_fetchspec_notify (THIS); - glusterd_set_rb_status (volinfo, GF_RB_STATUS_NONE); - glusterd_brickinfo_delete (volinfo->dst_brick); - volinfo->src_brick = volinfo->dst_brick = NULL; - } - break; - - case GF_REPLACE_OP_PAUSE: - { - gf_log ("", GF_LOG_DEBUG, - "Received pause - doing nothing"); - ctx = glusterd_op_get_ctx (GD_OP_REPLACE_BRICK); - if (ctx) { - ret = rb_do_operation_pause (volinfo, src_brickinfo, - dst_brickinfo); - if (ret) { - gf_log ("", GF_LOG_ERROR, - "Pause operation failed"); - goto out; - } - } - - glusterd_set_rb_status (volinfo, GF_RB_STATUS_PAUSED); - } - break; - - case GF_REPLACE_OP_ABORT: - { - - ret = dict_set_int32 (volinfo->dict, "enable-pump", 0); - if (ret) { - gf_log ("", GF_LOG_CRITICAL, "Unable to disable pump"); - } - - if (!glusterd_is_local_addr (dst_brickinfo->hostname)) { - gf_log ("", GF_LOG_INFO, - "I AM THE DESTINATION HOST"); - ret = rb_kill_destination_brick (volinfo, dst_brickinfo); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Failed to kill destination brick"); - goto out; - } - } - - ctx = glusterd_op_get_ctx (GD_OP_REPLACE_BRICK); - if (ctx) { - ret = rb_do_operation_abort (volinfo, src_brickinfo, dst_brickinfo); - if (ret) { - gf_log ("", GF_LOG_ERROR, - "Abort operation failed"); - goto out; - } + opinfo.op_errstr = gf_strdup( + "Unlocking failed for one of the " + "peer."); + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_CLUSTER_UNLOCK_FAILED, + "Unlocking failed for operation" + " volume %s on peer %s", + gd_op_list[opinfo.op], peerinfo->hostname); + dict_unref(dict); + continue; + } + + ret = proc->fn(NULL, this, dict); + if (ret) { + opinfo.op_errstr = gf_strdup( + "Unlocking failed for one of the " + "peer."); + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_CLUSTER_UNLOCK_FAILED, + "Unlocking failed for operation" + " volume %s on peer %s", + gd_op_list[opinfo.op], peerinfo->hostname); + dict_unref(dict); + continue; } - glusterd_set_rb_status (volinfo, GF_RB_STATUS_NONE); - glusterd_brickinfo_delete (volinfo->dst_brick); - volinfo->src_brick = volinfo->dst_brick = NULL; + pending_count++; + peerinfo->locked = _gf_false; + } } - break; + } + RCU_READ_UNLOCK; - case GF_REPLACE_OP_STATUS: - { - gf_log ("", GF_LOG_DEBUG, - "received status - doing nothing"); - ctx = glusterd_op_get_ctx (GD_OP_REPLACE_BRICK); - if (ctx) { - ret = rb_do_operation_status (volinfo, src_brickinfo, - dst_brickinfo); - if (ret) - goto out; - } - - } - break; + opinfo.pending_count = pending_count; - default: - ret = -1; - goto out; - } + ret = glusterd_set_txn_opinfo(&event->txn_id, &opinfo); + if (ret) + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL, + "Unable to set " + "transaction's opinfo"); - if (ret) - goto out; + if (!opinfo.pending_count) + ret = glusterd_op_sm_inject_all_acc(&event->txn_id); -out: - return ret; + gf_msg_debug(this->name, 0, "Returning with %d", ret); + return ret; } -void -_delete_reconfig_opt (dict_t *this, char *key, data_t *value, void *data) +static int +glusterd_op_ac_ack_drain(glusterd_op_sm_event_t *event, void *ctx) { + int ret = 0; - int exists = 0; + if (opinfo.pending_count > 0) + opinfo.pending_count--; - exists = glusterd_check_option_exists(key, NULL); + ret = glusterd_set_txn_opinfo(&event->txn_id, &opinfo); + if (ret) + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL, + "Unable to set " + "transaction's opinfo"); - if (exists == 1) { - gf_log ("", GF_LOG_DEBUG, "deleting dict with key=%s,value=%s", - key, value->data); - dict_del (this, key); - } + if (!opinfo.pending_count) + ret = glusterd_op_sm_inject_event(GD_OP_EVENT_ALL_ACK, &event->txn_id, + NULL); + + gf_msg_debug(THIS->name, 0, "Returning with %d", ret); + return ret; } -int -glusterd_options_reset (glusterd_volinfo_t *volinfo) +static int +glusterd_op_ac_send_unlock_drain(glusterd_op_sm_event_t *event, void *ctx) { - int ret = 0; - - gf_log ("", GF_LOG_DEBUG, "Received volume set reset command"); - - GF_ASSERT (volinfo->dict); - - dict_foreach (volinfo->dict, _delete_reconfig_opt, volinfo->dict); - - ret = glusterd_create_volfiles_and_notify_services (volinfo); - - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to create volfile for" - " 'volume set'"); - ret = -1; - goto out; - } - - ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); - if (ret) - goto out; + return glusterd_op_ac_ack_drain(event, ctx); +} - if (GLUSTERD_STATUS_STARTED == volinfo->status) - ret = glusterd_check_generate_start_nfs (); +static int +glusterd_op_ac_lock(glusterd_op_sm_event_t *event, void *ctx) +{ + int32_t ret = 0; + char *volname = NULL; + char *globalname = NULL; + glusterd_op_lock_ctx_t *lock_ctx = NULL; + xlator_t *this = NULL; + uint32_t op_errno = 0; + glusterd_conf_t *conf = NULL; + uint32_t timeout = 0; + + GF_ASSERT(event); + GF_ASSERT(ctx); + + this = THIS; + GF_ASSERT(this); + conf = this->private; + GF_ASSERT(conf); + + lock_ctx = (glusterd_op_lock_ctx_t *)ctx; + + /* If the req came from a node running on older op_version + * the dict won't be present. Based on it acquiring a cluster + * or mgmt_v3 lock */ + if (lock_ctx->dict == NULL) { + ret = glusterd_lock(lock_ctx->uuid); + glusterd_op_lock_send_resp(lock_ctx->req, ret); + } else { + /* Cli will add timeout key to dict if the default timeout is + * other than 2 minutes. Here we use this value to check whether + * mgmt_v3_lock_timeout should be set to default value or we + * need to change the value according to timeout value + * i.e, timeout + 120 seconds. */ + ret = dict_get_uint32(lock_ctx->dict, "timeout", &timeout); + if (!ret) + conf->mgmt_v3_lock_timeout = timeout + 120; + + ret = dict_get_strn(lock_ctx->dict, "volname", SLEN("volname"), + &volname); if (ret) - goto out; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to acquire volname"); + else { + ret = glusterd_mgmt_v3_lock(volname, lock_ctx->uuid, &op_errno, + "vol"); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_LOCK_GET_FAIL, + "Unable to acquire lock for %s", volname); + goto out; + } + ret = dict_get_strn(lock_ctx->dict, "globalname", SLEN("globalname"), + &globalname); + if (!ret) { + ret = glusterd_mgmt_v3_lock(globalname, lock_ctx->uuid, &op_errno, + "global"); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_LOCK_GET_FAIL, + "Unable to acquire lock for %s", globalname); + } + out: + glusterd_op_mgmt_v3_lock_send_resp(lock_ctx->req, &event->txn_id, ret); - ret = 0; + dict_unref(lock_ctx->dict); + } -out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + gf_msg_debug(THIS->name, 0, "Lock Returned %d", ret); + return ret; } - static int -glusterd_op_reset_volume (dict_t *dict) -{ - glusterd_volinfo_t *volinfo = NULL; - int ret = -1; - char *volname = NULL; - - ret = dict_get_str (dict, "volname", &volname); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name " ); - goto out; +glusterd_op_ac_unlock(glusterd_op_sm_event_t *event, void *ctx) +{ + int32_t ret = 0; + char *volname = NULL; + char *globalname = NULL; + glusterd_op_lock_ctx_t *lock_ctx = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + GF_ASSERT(event); + GF_ASSERT(ctx); + + this = THIS; + priv = this->private; + + lock_ctx = (glusterd_op_lock_ctx_t *)ctx; + + /* If the req came from a node running on older op_version + * the dict won't be present. Based on it releasing the cluster + * or mgmt_v3 lock */ + if (lock_ctx->dict == NULL) { + ret = glusterd_unlock(lock_ctx->uuid); + glusterd_op_unlock_send_resp(lock_ctx->req, ret); + } else { + ret = dict_get_strn(lock_ctx->dict, "volname", SLEN("volname"), + &volname); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to acquire volname"); + else { + ret = glusterd_mgmt_v3_unlock(volname, lock_ctx->uuid, "vol"); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_UNLOCK_FAIL, + "Unable to release lock for %s", volname); + goto out; } - ret = glusterd_volinfo_find (volname, &volinfo); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to allocate memory"); - goto out; + ret = dict_get_strn(lock_ctx->dict, "globalname", SLEN("globalname"), + &globalname); + if (!ret) { + ret = glusterd_mgmt_v3_unlock(globalname, lock_ctx->uuid, "global"); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_UNLOCK_FAIL, + "Unable to release lock for %s", globalname); } + out: + glusterd_op_mgmt_v3_unlock_send_resp(lock_ctx->req, &event->txn_id, + ret); - ret = glusterd_options_reset (volinfo); + dict_unref(lock_ctx->dict); + } -out: - gf_log ("", GF_LOG_DEBUG, "'volume reset' returning %d", ret); - return ret; + gf_msg_debug(this->name, 0, "Unlock Returned %d", ret); + if (priv->pending_quorum_action) + glusterd_do_quorum_action(); + return ret; } -int -stop_gsync (char *master, char *slave, char **op_errstr) +static int +glusterd_op_ac_local_unlock(glusterd_op_sm_event_t *event, void *ctx) { - int32_t ret = -1; - int32_t status = 0; - pid_t pid = 0; - FILE *file = NULL; - char pidfile[PATH_MAX] = {0,}; - char buf [1024] = {0,}; - int i = 0; - - ret = gsync_status (master, slave, &status); - if (ret == 0 && status == -1) { - gf_log ("", GF_LOG_WARNING, "gsync is not running"); - *op_errstr = gf_strdup ("gsync is not running"); - ret = -1; - goto out; - } else if (ret == -1) { - gf_log ("", GF_LOG_WARNING, "gsync stop validation " - " failed"); - *op_errstr = gf_strdup ("command to failed, please " - "check the log file"); - goto out; - } + int ret = 0; + uuid_t *originator = NULL; - ret = glusterd_gsync_get_pid_file (pidfile, master, slave); - if (ret == -1) { - ret = -1; - gf_log ("", GF_LOG_WARNING, "failed to create the pidfile string"); - goto out; - } + GF_ASSERT(event); + GF_ASSERT(ctx); - file = fopen (pidfile, "r+"); - if (!file) { - gf_log ("", GF_LOG_WARNING, "cannot open pid file"); - *op_errstr = gf_strdup ("stop unsuccessful"); - ret = -1; - goto out; - } + originator = (uuid_t *)ctx; - ret = read (fileno(file), buf, 1024); - if (ret > 0) { - pid = strtol (buf, NULL, 10); - ret = kill (-pid, SIGTERM); - if (ret) { - gf_log ("", GF_LOG_WARNING, - "failed to stop gsyncd"); - goto out; - } - for (i = 0; i < 20; i++) { - if (gsync_status (master, slave, &status) == -1 || - status == -1) { - /* monitor gsyncd is dead but worker may - * still be alive, give some more time - * before SIGKILL (hack) - */ - sleep (0.05); - break; - } - sleep (0.05); - } - kill (-pid, SIGKILL); - unlink (pidfile); - } - ret = 0; + ret = glusterd_unlock(*originator); - *op_errstr = gf_strdup ("gsync stopped successfully"); + gf_msg_debug(THIS->name, 0, "Unlock Returned %d", ret); -out: - return ret; + return ret; } -int -gsync_config_set (char *master, char *slave, - dict_t *dict, char **op_errstr) +static int +glusterd_op_ac_rcvd_lock_acc(glusterd_op_sm_event_t *event, void *ctx) { - int32_t ret = -1; - char *op_name = NULL; - char *op_value = NULL; - char cmd[1024] = {0,}; - glusterd_conf_t *priv = NULL; - - if (THIS == NULL) { - gf_log ("", GF_LOG_ERROR, "THIS of glusterd not present"); - *op_errstr = gf_strdup ("Error! Glusterd cannot start gsyncd"); - goto out; - } - - priv = THIS->private; - - if (priv == NULL) { - gf_log ("", GF_LOG_ERROR, "priv of glusterd not present"); - *op_errstr = gf_strdup ("Error! Glusterd cannot start gsyncd"); - goto out; - } + int ret = 0; - ret = dict_get_str (dict, "op_name", &op_name); - if (ret < 0) { - gf_log ("", GF_LOG_WARNING, "failed to get the " - "option name for %s %s", master, slave); + GF_ASSERT(event); - *op_errstr = gf_strdup ("configure command failed, " - "please check the log-file\n"); - goto out; - } + if (opinfo.pending_count > 0) + opinfo.pending_count--; - ret = dict_get_str (dict, "op_value", &op_value); - if (ret < 0) { - gf_log ("", GF_LOG_WARNING, "failed to get " - "the option value for %s %s", - master, slave); + ret = glusterd_set_txn_opinfo(&event->txn_id, &opinfo); + if (ret) + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL, + "Unable to set " + "transaction's opinfo"); - *op_errstr = gf_strdup ("configure command " - "failed, please check " - "the log-file\n"); - goto out; - } + if (opinfo.pending_count > 0) + goto out; - ret = snprintf (cmd, 1024, GSYNCD_PREFIX"/gsyncd -c %s/%s %s %s" - " --config-set %s %s", priv->workdir, - GSYNC_CONF, master, slave, op_name, op_value); - if (ret <= 0) { - gf_log ("", GF_LOG_WARNING, "failed to " - "construct the gsyncd command"); + ret = glusterd_op_sm_inject_event(GD_OP_EVENT_ALL_ACC, &event->txn_id, + NULL); - *op_errstr = gf_strdup ("configure command failed, " - "please check the log-file\n"); - goto out; - } - - ret = system (cmd); - if (ret == -1) { - gf_log ("", GF_LOG_WARNING, "gsyncd failed to " - "set %s option for %s %s peer", - op_name, master, slave); - - *op_errstr = gf_strdup ("configure command " - "failed, please check " - "the log-file\n"); - goto out; - } - ret = 0; - *op_errstr = gf_strdup ("config-set successful"); + gf_msg_debug(THIS->name, 0, "Returning %d", ret); out: - return ret; + return ret; } int -gsync_config_del (char *master, char *slave, - dict_t *dict, char **op_errstr) -{ - int32_t ret = -1; - char *op_name = NULL; - char cmd[PATH_MAX] = {0,}; - glusterd_conf_t *priv = NULL; - - if (THIS == NULL) { - gf_log ("", GF_LOG_ERROR, "THIS of glusterd not present"); - *op_errstr = gf_strdup ("Error! Glusterd cannot start gsyncd"); - goto out; - } - - priv = THIS->private; - - if (priv == NULL) { - gf_log ("", GF_LOG_ERROR, "priv of glusterd not present"); - *op_errstr = gf_strdup ("Error! Glusterd cannot start gsyncd"); - goto out; - } - - ret = dict_get_str (dict, "op_name", &op_name); - if (ret < 0) { - gf_log ("", GF_LOG_WARNING, "failed to get " - "the option for %s %s", master, slave); - - *op_errstr = gf_strdup ("configure command " - "failed, please check " - "the log-file\n"); - goto out; - } - - ret = snprintf (cmd, PATH_MAX, GSYNCD_PREFIX"/gsyncd -c %s/%s %s %s" - " --config-del %s ", priv->workdir, - GSYNC_CONF, master, slave, op_name); - if (ret <= 0) { - gf_log ("", GF_LOG_WARNING, "failed to " - "construct the gsyncd command"); - *op_errstr = gf_strdup ("configure command " - "failed, please check " - "the log-file\n"); - goto out; - } - - ret = system (cmd); - if (ret == -1) { - gf_log ("", GF_LOG_WARNING, "failed to delete " - "%s option for %s %s peer", op_name, - master, slave); - *op_errstr = gf_strdup ("configure command " - "failed, please check " - "the log-file\n"); - goto out; - } - ret = 0; - *op_errstr = gf_strdup ("config-del successful"); +glusterd_dict_set_volid(dict_t *dict, char *volname, char **op_errstr) +{ + int ret = -1; + glusterd_volinfo_t *volinfo = NULL; + char *volid = NULL; + char msg[1024] = { + 0, + }; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + if (!dict || !volname) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + snprintf(msg, sizeof(msg), FMTSTR_CHECK_VOL_EXISTS, volname); + goto out; + } + volid = gf_strdup(uuid_utoa(volinfo->volume_id)); + if (!volid) { + ret = -1; + goto out; + } + ret = dict_set_dynstrn(dict, "vol-id", SLEN("vol-id"), volid); + if (ret) { + snprintf(msg, sizeof(msg), + "Failed to set volume id of volume" + " %s", + volname); + GF_FREE(volid); + goto out; + } out: - return ret; + if (msg[0] != '\0') { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_ID_SET_FAIL, "%s", msg); + *op_errstr = gf_strdup(msg); + } + return ret; } - int -gsync_configure (char *master, char *slave, - dict_t *dict, char **op_errstr) -{ - int32_t ret = -1; - int32_t config_type = 0; - - ret = dict_get_int32 (dict, "config_type", &config_type); - if (ret < 0) { - gf_log ("", GF_LOG_WARNING, "couldn't get the " - "config-type for %s %s", master, slave); - *op_errstr = gf_strdup ("configure command failed, " - "please check the log-file\n"); - goto out; - } - - if (config_type == GF_GSYNC_OPTION_TYPE_CONFIG_SET) { - ret = gsync_config_set (master, slave, dict, op_errstr); - goto out; - } - - if (config_type == GF_GSYNC_OPTION_TYPE_CONFIG_DEL) { - ret = gsync_config_del (master, slave, dict, op_errstr); - goto out; - } - - if ((config_type == GF_GSYNC_OPTION_TYPE_CONFIG_GET_ALL) || - (config_type == GF_GSYNC_OPTION_TYPE_CONFIG_GET)) - goto out; - else { - gf_log ("", GF_LOG_WARNING, "Invalid config type"); - *op_errstr = gf_strdup ("Invalid config type"); - ret = -1; - } - -out: - return ret; +gd_set_commit_hash(dict_t *dict) +{ + struct timeval tv; + uint32_t hash; + + /* + * We need a commit hash that won't conflict with others we might have + * set, or zero which is the implicit value if we never have. Using + * seconds<<3 like this ensures that we'll only get a collision if two + * consecutive rebalances are separated by exactly 2^29 seconds - about + * 17 years - and even then there's only a 1/8 chance of a collision in + * the low order bits. It's far more likely that this code will have + * changed completely by then. If not, call me in 2031. + * + * P.S. Time zone changes? Yeah, right. + */ + gettimeofday(&tv, NULL); + hash = tv.tv_sec << 3; + + /* + * Make sure at least one of those low-order bits is set. The extra + * shifting is because not all machines have sub-millisecond time + * resolution. + */ + hash |= 1 << ((tv.tv_usec >> 10) % 3); + + return dict_set_uint32(dict, "commit-hash", hash); } int -gsync_command_exec (dict_t *dict, char **op_errstr) -{ - char *master = NULL; - char *slave = NULL; - int32_t ret = -1; - int32_t type = -1; - - GF_VALIDATE_OR_GOTO ("gsync", dict, out); - GF_VALIDATE_OR_GOTO ("gsync", op_errstr, out); - - ret = dict_get_int32 (dict, "type", &type); - if (ret < 0) - goto out; - - ret = dict_get_str (dict, "master", &master); - if (ret < 0) - goto out; - - ret = dict_get_str (dict, "slave", &slave); - if (ret < 0) - goto out; +glusterd_op_build_payload(dict_t **req, char **op_errstr, dict_t *op_ctx) +{ + int ret = -1; + void *ctx = NULL; + dict_t *dict = NULL; + dict_t *req_dict = NULL; + glusterd_op_t op = GD_OP_NONE; + char *volname = NULL; + uint32_t status_cmd = GF_CLI_STATUS_NONE; + xlator_t *this = NULL; + gf_boolean_t do_common = _gf_false; + + GF_ASSERT(req); + + this = THIS; + GF_ASSERT(this); + + req_dict = dict_new(); + if (!req_dict) + goto out; + + if (!op_ctx) { + op = glusterd_op_get_op(); + ctx = (void *)glusterd_op_get_ctx(); + if (!ctx) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_OPTIONS_GIVEN, + "Null Context for " + "op %d", + op); + ret = -1; + goto out; + } + + } else { +#define GD_SYNC_OPCODE_KEY "sync-mgmt-operation" + ret = dict_get_int32(op_ctx, GD_SYNC_OPCODE_KEY, (int32_t *)&op); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get volume" + " operation"); + goto out; + } + ctx = op_ctx; +#undef GD_SYNC_OPCODE_KEY + } + + dict = ctx; + switch (op) { + case GD_OP_CREATE_VOLUME: { + ++glusterfs_port; + ret = dict_set_int32n(dict, "port", SLEN("port"), glusterfs_port); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set port in " + "dictionary"); + goto out; + } + dict_copy(dict, req_dict); + } break; + + case GD_OP_GSYNC_CREATE: + case GD_OP_GSYNC_SET: { + ret = glusterd_op_gsync_args_get(dict, op_errstr, &volname, NULL, + NULL); + if (ret == 0) { + ret = glusterd_dict_set_volid(dict, volname, op_errstr); + if (ret) + goto out; + } + dict_copy(dict, req_dict); + } break; + + case GD_OP_SET_VOLUME: { + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_DICT_GET_FAILED, + "volname is not present in " + "operation ctx"); + goto out; + } + if (strcmp(volname, "help") && strcmp(volname, "help-xml") && + strcasecmp(volname, "all")) { + ret = glusterd_dict_set_volid(dict, volname, op_errstr); + if (ret) + goto out; + } + dict_unref(req_dict); + req_dict = dict_ref(dict); + } break; - if (type == GF_GSYNC_OPTION_TYPE_START) { - ret = 0; + case GD_OP_REMOVE_BRICK: { + dict_t *dict = ctx; + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_DICT_GET_FAILED, + "volname is not present in " + "operation ctx"); goto out; - } + } - if (type == GF_GSYNC_OPTION_TYPE_STOP) { - ret = stop_gsync (master, slave, op_errstr); + ret = glusterd_dict_set_volid(dict, volname, op_errstr); + if (ret) goto out; - } - if (type == GF_GSYNC_OPTION_TYPE_CONFIGURE) { - ret = gsync_configure (master, slave, dict, op_errstr); + if (gd_set_commit_hash(dict) != 0) { goto out; - } else { - gf_log ("", GF_LOG_WARNING, "Invalid config type"); - *op_errstr = gf_strdup ("Invalid config type"); - ret = -1; - } -out: - return ret; -} + } -int32_t -glusterd_marker_create_volfile (glusterd_volinfo_t *volinfo) -{ - int32_t ret = 0; - - ret = glusterd_create_volfiles_and_notify_services (volinfo); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to create volfile" - " for setting of marker while 'gsync start'"); - ret = -1; - goto out; - } + dict_unref(req_dict); + req_dict = dict_ref(dict); + } break; - ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); - if (ret) + case GD_OP_STATUS_VOLUME: { + ret = dict_get_uint32(dict, "cmd", &status_cmd); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Status command not present " + "in op ctx"); goto out; + } + if (GF_CLI_STATUS_ALL & status_cmd) { + dict_copy(dict, req_dict); + break; + } + do_common = _gf_true; + } break; - if (GLUSTERD_STATUS_STARTED == volinfo->status) - ret = glusterd_check_generate_start_nfs (); - ret = 0; -out: - return ret; -} - -int -glusterd_set_marker_gsync (char *master, char *value) -{ - char *volname = NULL; - glusterd_volinfo_t *volinfo = NULL; - int ret = -1; - - volname = volname_from_master (master); - - ret = glusterd_volinfo_find (volname, &volinfo); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Volume not Found"); - ret = -1; - goto out; - } + case GD_OP_DELETE_VOLUME: + case GD_OP_START_VOLUME: + case GD_OP_STOP_VOLUME: + case GD_OP_ADD_BRICK: + case GD_OP_REPLACE_BRICK: + case GD_OP_RESET_VOLUME: + case GD_OP_LOG_ROTATE: + case GD_OP_QUOTA: + case GD_OP_PROFILE_VOLUME: + case GD_OP_HEAL_VOLUME: + case GD_OP_STATEDUMP_VOLUME: + case GD_OP_CLEARLOCKS_VOLUME: + case GD_OP_DEFRAG_BRICK_VOLUME: + case GD_OP_BARRIER: + case GD_OP_BITROT: + case GD_OP_SCRUB_STATUS: + case GD_OP_SCRUB_ONDEMAND: + case GD_OP_RESET_BRICK: { + do_common = _gf_true; + } break; + + case GD_OP_REBALANCE: { + if (gd_set_commit_hash(dict) != 0) { + goto out; + } + do_common = _gf_true; + } break; + + case GD_OP_SYNC_VOLUME: + case GD_OP_COPY_FILE: + case GD_OP_SYS_EXEC: + case GD_OP_GANESHA: { + dict_copy(dict, req_dict); + } break; - ret = glusterd_gsync_volinfo_dict_set (volinfo, - "features.marker-gsync", value); - if (ret < 0) - goto out; + default: + break; + } - ret = glusterd_marker_create_volfile (volinfo); + /* + * This has been moved out of the switch so that multiple ops with + * other special needs can all "fall through" to it. + */ + if (do_common) { + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); if (ret) { - gf_log ("", GF_LOG_ERROR, "Setting dict failed"); - goto out; + gf_msg(this->name, GF_LOG_CRITICAL, -ret, GD_MSG_DICT_GET_FAILED, + "volname is not present in " + "operation ctx"); + goto out; } -out: - return ret; - -} - -int -glusterd_op_gsync_set (dict_t *dict) -{ - char *master = NULL; - int32_t ret = -1; - int32_t type = -1; - dict_t *ctx = NULL; - char *gsync_status = NULL; - char *op_errstr = NULL; - - ret = dict_get_int32 (dict, "type", &type); - if (ret < 0) - goto out; - - ret = dict_get_str (dict, "master", &master); - if (ret < 0) - goto out; - - if (type == GF_GSYNC_OPTION_TYPE_START) { - gsync_status = gf_strdup ("on"); - if (gsync_status == NULL) { - ret = -1; - goto out; - } - - ret = glusterd_set_marker_gsync (master, gsync_status); - if (ret != 0) { - gf_log ("", GF_LOG_WARNING, "marker start failed"); - op_errstr = gf_strdup ("gsync start failed"); - ret = -1; - goto out; - } - } - - if (type == GF_GSYNC_OPTION_TYPE_STOP) { - gsync_status = gf_strdup ("off"); - if (gsync_status == NULL) { - ret = -1; - goto out; - } - - ret = glusterd_set_marker_gsync (master, gsync_status); - if (ret != 0) { - gf_log ("", GF_LOG_WARNING, "marker stop failed"); - op_errstr = gf_strdup ("gsync stop failed"); - ret = -1; - goto out; - } - } -out: - ctx = glusterd_op_get_ctx (GD_OP_GSYNC_SET); - if (ctx) { - ret = gsync_command_exec (dict, &op_errstr); - if (op_errstr) { - ret = dict_set_str (ctx, "errstr", op_errstr); - if (ret) { - GF_FREE (op_errstr); - gf_log ("", GF_LOG_WARNING, "failed to set " - "error message in ctx"); - } - } - } - - return ret; -} - -int32_t -glusterd_check_if_quota_trans_enabled (glusterd_volinfo_t *volinfo) -{ - int32_t ret = 0; - char *quota_status = NULL; - gf_boolean_t flag = _gf_false; - - ret = glusterd_volinfo_get (volinfo, "features.quota", "a_status); - if (ret) { - gf_log ("", GF_LOG_ERROR, "failed to get the quota status"); - ret = -1; + if (strcasecmp(volname, "all")) { + ret = glusterd_dict_set_volid(dict, volname, op_errstr); + if (ret) goto out; } + dict_copy(dict, req_dict); + } - ret = gf_string2boolean (quota_status, &flag); - if (ret != 0) - goto out; + *req = req_dict; + ret = 0; - if (flag == _gf_false) { - gf_log ("", GF_LOG_ERROR, "first enable the quota translator"); - ret = -1; - goto out; - } - ret = 0; out: - return ret; + return ret; } -int32_t -_glusterd_quota_remove_limits (char **quota_limits, char *path) -{ - int ret = 0; - int i = 0; - int size = 0; - int len = 0; - int pathlen = 0; - int skiplen = 0; - int flag = 0; - char *limits = NULL; - char *qlimits = NULL; - - if (*quota_limits == NULL) - return -1; - - qlimits = *quota_limits; - - pathlen = strlen (path); - - len = strlen (qlimits); - - limits = GF_CALLOC (len + 1, sizeof (char), gf_gld_mt_char); - - if (!limits) - return -1; - - while (i < len) { - if (!memcmp ((void *) &qlimits [i], (void *)path, pathlen)) - if (qlimits [i + pathlen] == ':') - flag = 1; - - while (qlimits [i + size] != ',' && - qlimits [i + size] != '\0') - size++; - - if (!flag) { - memcpy ((void *) &limits [i], (void *) &qlimits [i], size + 1); - } else { - skiplen = size + 1; - size = len - i - size; - memcpy ((void *) &limits [i], (void *) &qlimits [i + skiplen], size); - break; - } - - i += size + 1; - size = 0; - } - - if (!flag) { - ret = 1; - } else { - len = strlen (limits); - - if (len == 0) { - GF_FREE (qlimits); - - *quota_limits = NULL; - - goto out; - } - - if (limits[len - 1] == ',') { - limits[len - 1] = '\0'; - len --; - } - - GF_FREE (qlimits); - - qlimits = GF_CALLOC (len + 1, sizeof (char), gf_gld_mt_char); - - if (!qlimits) { - ret = -1; - goto out; - } - - memcpy ((void *) qlimits, (void *) limits, len + 1); - - *quota_limits = qlimits; - - ret = 0; - } - +static int +glusterd_op_ac_send_stage_op(glusterd_op_sm_event_t *event, void *ctx) +{ + int ret = 0; + int ret1 = 0; + rpc_clnt_procedure_t *proc = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + dict_t *dict = NULL; + dict_t *rsp_dict = NULL; + char *op_errstr = NULL; + glusterd_op_t op = GD_OP_NONE; + uint32_t pending_count = 0; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + op = glusterd_op_get_op(); + + rsp_dict = dict_new(); + if (!rsp_dict) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_DICT_CREATE_FAIL, + "Failed to create rsp_dict"); + ret = -1; + goto out; + } + + ret = glusterd_op_build_payload(&dict, &op_errstr, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_OP_PAYLOAD_BUILD_FAIL, + LOGSTR_BUILD_PAYLOAD, gd_op_list[op]); + if (op_errstr == NULL) + gf_asprintf(&op_errstr, OPERRSTR_BUILD_PAYLOAD); + opinfo.op_errstr = op_errstr; + goto out; + } + + ret = glusterd_validate_quorum(this, op, dict, &op_errstr); + if (ret) { + gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_SERVER_QUORUM_NOT_MET, + "Server quorum not met. Rejecting operation."); + opinfo.op_errstr = op_errstr; + goto out; + } + + ret = glusterd_op_stage_validate(op, dict, &op_errstr, rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VALIDATE_FAILED, + LOGSTR_STAGE_FAIL, gd_op_list[op], "localhost", + (op_errstr) ? ":" : " ", (op_errstr) ? op_errstr : " "); + if (op_errstr == NULL) + gf_asprintf(&op_errstr, OPERRSTR_STAGE_FAIL, "localhost"); + opinfo.op_errstr = op_errstr; + goto out; + } + + RCU_READ_LOCK; + cds_list_for_each_entry_rcu(peerinfo, &priv->peers, uuid_list) + { + /* Only send requests to peers who were available before the + * transaction started + */ + if (peerinfo->generation > opinfo.txn_generation) + continue; + + if (!peerinfo->connected || !peerinfo->mgmt) + continue; + if ((peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) && + (glusterd_op_get_op() != GD_OP_SYNC_VOLUME)) + continue; + + proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_STAGE_OP]; + GF_ASSERT(proc); + if (proc->fn) { + ret = dict_set_static_ptr(dict, "peerinfo", peerinfo); + if (ret) { + RCU_READ_UNLOCK; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "failed to " + "set peerinfo"); + goto out; + } + + ret = proc->fn(NULL, this, dict); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, + GD_MSG_STAGE_REQ_SEND_FAIL, + "Failed to " + "send stage request for operation " + "'Volume %s' to peer %s", + gd_op_list[op], peerinfo->hostname); + continue; + } + pending_count++; + } + } + RCU_READ_UNLOCK; + + opinfo.pending_count = pending_count; out: - if (limits) - GF_FREE (limits); - - return ret; -} + if (ret) + opinfo.op_ret = ret; -void * -glusterd_quota_start_crawl (void *data) -{ - int32_t ret = 0; - char cmd_str [1024] = {0, }; - char *mount = NULL; + ret1 = glusterd_set_txn_opinfo(&event->txn_id, &opinfo); + if (ret1) + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL, + "Unable to set " + "transaction's opinfo"); - mount = (char *) data; + if (rsp_dict) + dict_unref(rsp_dict); - snprintf (cmd_str, 1024, "find %s", mount); + if (dict) + dict_unref(dict); + if (ret) { + glusterd_op_sm_inject_event(GD_OP_EVENT_RCVD_RJT, &event->txn_id, NULL); + opinfo.op_ret = ret; + } - gf_log ("quota crawl", GF_LOG_INFO, "crawl started"); + gf_msg_debug(this->name, 0, + "Sent stage op request for " + "'Volume %s' to %d peers", + gd_op_list[op], opinfo.pending_count); - ret = system (cmd_str); - if (ret == -1) - gf_log ("crawl", GF_LOG_ERROR, "quota crawl failed"); + if (!opinfo.pending_count) + ret = glusterd_op_sm_inject_all_acc(&event->txn_id); - gf_log ("quota crawl", GF_LOG_INFO, "crawl ended"); + gf_msg_debug(this->name, 0, "Returning with %d", ret); - return NULL; + return ret; } -int32_t -glusterd_quota_initiate_fs_crawl (glusterd_conf_t *priv, char *volname) +/* This function takes a dict and converts the uuid values of key specified + * into hostnames + */ +static int +glusterd_op_volume_dict_uuid_to_hostname(dict_t *dict, const char *key_fmt, + int idx_min, int idx_max) { - pthread_t th; - int32_t ret = 0; - char mount [1024] = {0, }; - char cmd_str [1024] = {0, }; - - snprintf (mount, 1024, "%s/mount/%s", - priv->workdir, volname); - - snprintf (cmd_str, 1024, "mkdir -p %s", mount); - - ret = system (cmd_str); - if (ret == -1) { - gf_log ("glusterd", GF_LOG_DEBUG, "command: %s failed", cmd_str); - goto out; - } + int ret = -1; + int i = 0; + char key[128]; + int keylen; + char *uuid_str = NULL; + uuid_t uuid = { + 0, + }; + char *hostname = NULL; + xlator_t *this = NULL; - snprintf (cmd_str, 1024, "%s/sbin/glusterfs -s localhost " - "--volfile-id %s %s", GFS_PREFIX, volname, mount); + this = THIS; + GF_ASSERT(this); - ret = system (cmd_str); - if (ret == -1) { - gf_log("glusterd", GF_LOG_DEBUG, "command: %s failed", cmd_str); - goto out; - } + GF_ASSERT(dict); + GF_ASSERT(key_fmt); - ret = pthread_create (&th, NULL, glusterd_quota_start_crawl, mount); + for (i = idx_min; i < idx_max; i++) { + keylen = snprintf(key, sizeof(key), key_fmt, i); + ret = dict_get_strn(dict, key, keylen, &uuid_str); if (ret) { - snprintf (cmd_str, 1024, "umount -l %s", mount); - ret = system (cmd_str); - } -out: - return ret; -} - -char * -glusterd_quota_get_limit_value (char *quota_limits, char *path) -{ - int32_t i, j, k, l, len; - int32_t pat_len, diff; - char *ret_str = NULL; - - len = strlen (quota_limits); - pat_len = strlen (path); - i = 0; - j = 0; - - while (i < len) { - j = i; - k = 0; - while (path [k] == quota_limits [j]) { - j++; - k++; - } - - l = j; - - while (quota_limits [j] != ',' && - quota_limits [j] != '\0') - j++; - - if (quota_limits [l] == ':' && pat_len == (l - i)) { - diff = j - i; - ret_str = GF_CALLOC (diff + 1, sizeof (char), - gf_gld_mt_char); - - strncpy (ret_str, "a_limits [i], diff); - - break; - } - i = ++j; //skip ',' - } - - return ret_str; -} - -char* -_glusterd_quota_get_limit_usages (glusterd_volinfo_t *volinfo, - char *path, char **op_errstr) -{ - int32_t ret = 0; - char *quota_limits = NULL; - char *ret_str = NULL; - - if (volinfo == NULL) - return NULL; - - ret = glusterd_volinfo_get (volinfo, "features.limit-usage", - "a_limits); - if (ret) - return NULL; - if (quota_limits == NULL) { - ret_str = NULL; - *op_errstr = gf_strdup ("Limits not set any directory"); - } else if (path == NULL) - ret_str = gf_strdup (quota_limits); - else - ret_str = glusterd_quota_get_limit_value (quota_limits, path); - - return ret_str; -} - -int32_t -glusterd_quota_get_limit_usages (glusterd_conf_t *priv, - glusterd_volinfo_t *volinfo, - char *volname, - dict_t *dict, - char **op_errstr) -{ - int32_t i = 0; - int32_t ret = 0; - int32_t count = 0; - char *path = NULL; - dict_t *ctx = NULL; - char cmd_str [1024] = {0, }; - char *ret_str = NULL; - - ctx = glusterd_op_get_ctx (GD_OP_QUOTA); - if (ctx == NULL) - return -1; - - ret = dict_get_int32 (dict, "count", &count); - if (ret < 0) - goto out; - - if (count == 0) { - ret_str = _glusterd_quota_get_limit_usages (volinfo, NULL, op_errstr); - } else { - i = 0; - while (count--) { - snprintf (cmd_str, 1024, "path%d", i++); - - ret = dict_get_str (dict, cmd_str, &path); - if (ret < 0) - goto out; - - ret_str = _glusterd_quota_get_limit_usages (volinfo, path, op_errstr); - } - } - - if (ret_str) { - ret = dict_set_dynstr (ctx, "limit_list", ret_str); - } -out: - return ret; -} - -int32_t -glusterd_quota_enable (glusterd_volinfo_t *volinfo, char **op_errstr, - gf_boolean_t *crawl) -{ - int32_t ret = -1; - char *status = NULL; - char *quota_status = NULL; - - GF_VALIDATE_OR_GOTO ("glusterd", volinfo, out); - GF_VALIDATE_OR_GOTO ("glusterd", crawl, out); - GF_VALIDATE_OR_GOTO ("glusterd", op_errstr, out); - - ret = glusterd_check_if_quota_trans_enabled (volinfo); - if (ret == 0) { - *op_errstr = gf_strdup ("Quota is already enabled"); - goto out; + ret = 0; + continue; } - quota_status = gf_strdup ("on"); - if (!quota_status) { - gf_log ("", GF_LOG_ERROR, "memory allocation failed"); - *op_errstr = gf_strdup ("Enabling quota has been unsuccessful"); - goto out; - } + gf_msg_debug(this->name, 0, "Got uuid %s", uuid_str); - ret = dict_set_dynstr (volinfo->dict, "features.quota", quota_status); + ret = gf_uuid_parse(uuid_str, uuid); + /* if parsing fails don't error out + * let the original value be retained + */ if (ret) { - gf_log ("", GF_LOG_ERROR, "dict set failed"); - *op_errstr = gf_strdup ("Enabling quota has been unsuccessful"); - goto out; + ret = 0; + continue; } - *op_errstr = gf_strdup ("Enabling quota has been successful"); - - status = gf_strdup ("on"); - if (status == NULL) { - ret = -1; + hostname = glusterd_uuid_to_hostname(uuid); + if (hostname) { + gf_msg_debug(this->name, 0, "%s -> %s", uuid_str, hostname); + ret = dict_set_dynstrn(dict, key, keylen, hostname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Error setting hostname %s to dict", hostname); + GF_FREE(hostname); goto out; + } } + } - *crawl = _gf_true; - - ret = 0; out: - return ret; + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; } -int32_t -glusterd_quota_disable (glusterd_volinfo_t *volinfo, char **op_errstr) +static int +reassign_defrag_status(dict_t *dict, char *key, int keylen, + gf_defrag_status_t *status) { - int32_t ret = -1; - char *quota_status = NULL; - - GF_VALIDATE_OR_GOTO ("glusterd", volinfo, out); - GF_VALIDATE_OR_GOTO ("glusterd", op_errstr, out); - - quota_status = gf_strdup ("off"); - if (!quota_status) { - gf_log ("", GF_LOG_ERROR, "memory allocation failed"); - *op_errstr = gf_strdup ("Disabling quota has been unsuccessful"); - goto out; - } - - ret = dict_set_dynstr (volinfo->dict, "features.quota", quota_status); - if (ret) { - gf_log ("", GF_LOG_ERROR, "dict set failed"); - *op_errstr = gf_strdup ("Disabling quota has been unsuccessful"); - goto out; - } - - *op_errstr = gf_strdup ("Disabling quota has been successful"); + int ret = 0; - dict_del (volinfo->dict, "features.limit-usage"); - - quota_status = gf_strdup ("off"); - if (quota_status == NULL) { - ret = -1; - goto out; - } - -out: + if (!*status) return ret; -} - -int32_t -glusterd_quota_limit_usage (glusterd_volinfo_t *volinfo, dict_t *dict, char **op_errstr) -{ - int32_t ret = -1; - char *path = NULL; - char *limit = NULL; - char *value = NULL; - char msg [1024] = {0,}; - char *quota_limits = NULL; - - GF_VALIDATE_OR_GOTO ("glusterd", dict, out); - GF_VALIDATE_OR_GOTO ("glusterd", volinfo, out); - GF_VALIDATE_OR_GOTO ("glusterd", op_errstr, out); - - ret = glusterd_check_if_quota_trans_enabled (volinfo); - if (ret == -1) { - *op_errstr = gf_strdup ("Quota is disabled, " - "please enable to set limit"); - goto out; - } - - ret = glusterd_volinfo_get (volinfo, "features.limit-usage", - "a_limits); - if (ret) { - gf_log ("", GF_LOG_ERROR, "failed to get the quota limits"); - *op_errstr = gf_strdup ("failed to set limit"); - goto out; - } - - ret = dict_get_str (dict, "path", &path); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to fetch quota limits" ); - *op_errstr = gf_strdup ("failed to set limit"); - goto out; - } - ret = dict_get_str (dict, "limit", &limit); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to fetch quota limits" ); - *op_errstr = gf_strdup ("failed to set limit"); - goto out; - } + switch (*status) { + case GF_DEFRAG_STATUS_STARTED: + *status = GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED; + break; - if (quota_limits) { - ret = _glusterd_quota_remove_limits ("a_limits, path); - if (ret == -1) { - gf_log ("", GF_LOG_ERROR, "Unable to allocate memory"); - *op_errstr = gf_strdup ("failed to set limit"); - goto out; - } - } - - if (quota_limits == NULL) { - ret = gf_asprintf (&value, "%s:%s", path, limit); - if (ret == -1) { - gf_log ("", GF_LOG_ERROR, "Unable to allocate memory"); - *op_errstr = gf_strdup ("failed to set limit"); - goto out; - } - } else { - ret = gf_asprintf (&value, "%s,%s:%s", - quota_limits, path, limit); - if (ret == -1) { - gf_log ("", GF_LOG_ERROR, "Unable to allocate memory"); - *op_errstr = gf_strdup ("failed to set limit"); - goto out; - } + case GF_DEFRAG_STATUS_STOPPED: + *status = GF_DEFRAG_STATUS_LAYOUT_FIX_STOPPED; + break; - GF_FREE (quota_limits); - } + case GF_DEFRAG_STATUS_COMPLETE: + *status = GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE; + break; - quota_limits = value; + case GF_DEFRAG_STATUS_FAILED: + *status = GF_DEFRAG_STATUS_LAYOUT_FIX_FAILED; + break; + default: + break; + } - ret = dict_set_str (volinfo->dict, "features.limit-usage", - quota_limits); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to set quota limits" ); - *op_errstr = gf_strdup ("failed to set limit"); - goto out; - } - snprintf (msg, 1024, "limit set on %s", path); - *op_errstr = gf_strdup (msg); + ret = dict_set_int32n(dict, key, keylen, *status); + if (ret) + gf_msg(THIS->name, GF_LOG_WARNING, 0, GD_MSG_DICT_SET_FAILED, + "failed to reset defrag %s in dict", key); - ret = 0; -out: - return ret; + return ret; } -int32_t -glusterd_quota_remove_limits (glusterd_volinfo_t *volinfo, dict_t *dict, char **op_errstr) -{ - int32_t ret = -1; - char str [PATH_MAX + 1024] = {0,}; - char *quota_limits = NULL; - char *path = NULL; - - GF_VALIDATE_OR_GOTO ("glusterd", dict, out); - GF_VALIDATE_OR_GOTO ("glusterd", volinfo, out); - GF_VALIDATE_OR_GOTO ("glusterd", op_errstr, out); - - ret = glusterd_check_if_quota_trans_enabled (volinfo); - if (ret == -1) - goto out; - - ret = glusterd_volinfo_get (volinfo, "features.limit-usage", - "a_limits); - if (ret) { - gf_log ("", GF_LOG_ERROR, "failed to get the quota limits"); - goto out; - } - - ret = dict_get_str (dict, "path", &path); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to fetch quota limits" ); - goto out; - } - - ret = _glusterd_quota_remove_limits ("a_limits, path); - if (ret == -1) { - snprintf (str, sizeof (str), "Removing limit on %s has been unsuccessful", path); - *op_errstr = gf_strdup (str); - goto out; - } else { - snprintf (str, sizeof (str), "Removed quota limit on %s", path); - *op_errstr = gf_strdup (str); - } - - if (quota_limits) { - ret = dict_set_str (volinfo->dict, "features.limit-usage", - quota_limits); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to set quota limits" ); - goto out; - } - } else { - dict_del (volinfo->dict, "features.limit-usage"); - } - +/* Check and reassign the defrag_status enum got from the rebalance process + * of all peers so that the rebalance-status CLI command can display if a + * full-rebalance or just a fix-layout was carried out. + */ +static int +glusterd_op_check_peer_defrag_status(dict_t *dict, int count) +{ + glusterd_volinfo_t *volinfo = NULL; + gf_defrag_status_t status = GF_DEFRAG_STATUS_NOT_STARTED; + char key[64] = { + 0, + }; + int keylen; + char *volname = NULL; + int ret = -1; + int i = 1; + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg(THIS->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get volume name"); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + gf_msg(THIS->name, GF_LOG_WARNING, 0, GD_MSG_VOL_NOT_FOUND, + FMTSTR_CHECK_VOL_EXISTS, volname); + goto out; + } + + if (volinfo->rebal.defrag_cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) { + /* Fix layout was not issued; we don't need to reassign + the status */ ret = 0; + goto out; + } -out: - return ret; -} - - -int -glusterd_op_quota (dict_t *dict, char **op_errstr) -{ - glusterd_volinfo_t *volinfo = NULL; - int32_t ret = -1; - char *volname = NULL; - dict_t *ctx = NULL; - int type = -1; - gf_boolean_t start_crawl = _gf_false; - glusterd_conf_t *priv = NULL; - - GF_ASSERT (dict); - GF_ASSERT (op_errstr); - - priv = THIS->private; - - ret = dict_get_str (dict, "volname", &volname); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name " ); - goto out; - } - - ret = glusterd_volinfo_find (volname, &volinfo); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to allocate memory"); - goto out; - } - - ret = dict_get_int32 (dict, "type", &type); - - if (type == GF_QUOTA_OPTION_TYPE_ENABLE) { - ret = glusterd_quota_enable (volinfo, op_errstr, &start_crawl); - if (ret < 0) - goto out; - - goto create_vol; - } - - if (type == GF_QUOTA_OPTION_TYPE_DISABLE) { - ret = glusterd_quota_disable (volinfo, op_errstr); - if (ret < 0) - goto out; - - goto create_vol; - } - - if (type == GF_QUOTA_OPTION_TYPE_LIMIT_USAGE) { - ret = glusterd_quota_limit_usage (volinfo, dict, op_errstr); - if (ret < 0) - goto out; - - goto create_vol; - } - - if (type == GF_QUOTA_OPTION_TYPE_REMOVE) { - ret = glusterd_quota_remove_limits (volinfo, dict, op_errstr); - if (ret < 0) - goto out; - - goto create_vol; - } - - if (type == GF_QUOTA_OPTION_TYPE_LIST) { - ret = glusterd_check_if_quota_trans_enabled (volinfo); - if (ret == -1) { - *op_errstr = gf_strdup ("cannot list the limits, " - "quota feature is disabled"); - goto out; - } - - glusterd_quota_get_limit_usages (priv, volinfo, volname, dict, op_errstr); - - goto out; - } -create_vol: - ret = glusterd_create_volfiles_and_notify_services (volinfo); + do { + keylen = snprintf(key, sizeof(key), "status-%d", i); + ret = dict_get_int32n(dict, key, keylen, (int32_t *)&status); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to re-create volfile for" - " 'quota'"); - ret = -1; - goto out; + gf_msg(THIS->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED, + "failed to get defrag %s", key); + goto out; } - - ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); + ret = reassign_defrag_status(dict, key, keylen, &status); if (ret) - goto out; - - if (GLUSTERD_STATUS_STARTED == volinfo->status) - ret = glusterd_check_generate_start_nfs (); - - ret = 0; + goto out; + i++; + } while (i <= count); + ret = 0; out: - if (start_crawl == _gf_true) - glusterd_quota_initiate_fs_crawl (priv, volname); - - ctx = glusterd_op_get_ctx (GD_OP_QUOTA); - if (ctx && *op_errstr) { - ret = dict_set_dynstr (ctx, "errstr", *op_errstr); - if (ret) { - GF_FREE (*op_errstr); - gf_log ("", GF_LOG_DEBUG, - "failed to set error message in ctx"); - } - *op_errstr = NULL; - } - - return ret; -} + return ret; +} + +/* This function is used to verify if op_ctx indeed + requires modification. This is necessary since the + dictionary for certain commands might not have the + necessary keys required for the op_ctx modification + to succeed. + + Special Cases: + - volume status all + - volume status + + Regular Cases: + - volume status <volname> <brick> + - volume status <volname> mem + - volume status <volname> clients + - volume status <volname> inode + - volume status <volname> fd + - volume status <volname> callpool + - volume status <volname> tasks +*/ -int -glusterd_stop_bricks (glusterd_volinfo_t *volinfo) +static gf_boolean_t +glusterd_is_volume_status_modify_op_ctx(uint32_t cmd) { - glusterd_brickinfo_t *brickinfo = NULL; - - list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { - if (glusterd_brick_stop (volinfo, brickinfo)) - return -1; - } - - return 0; + if ((cmd & GF_CLI_STATUS_MASK) == GF_CLI_STATUS_NONE) { + if (cmd & GF_CLI_STATUS_BRICK) + return _gf_false; + if (cmd & GF_CLI_STATUS_ALL) + return _gf_false; + return _gf_true; + } + return _gf_false; } int -glusterd_start_bricks (glusterd_volinfo_t *volinfo) +glusterd_op_modify_port_key(dict_t *op_ctx, int brick_index_max) { - glusterd_brickinfo_t *brickinfo = NULL; + char *port = NULL; + int i = 0; + int ret = -1; + char key[64] = {0}; + int keylen; + char old_key[64] = {0}; + int old_keylen; - list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { - if (glusterd_brick_start (volinfo, brickinfo)) - return -1; - } - - return 0; -} - -static int -glusterd_restart_brick_servers (glusterd_volinfo_t *volinfo) -{ - if (!volinfo) - return -1; - if (glusterd_stop_bricks (volinfo)) { - gf_log ("", GF_LOG_ERROR, "Restart Failed: Unable to " - "stop brick servers"); - return -1; - } - usleep (500000); - if (glusterd_start_bricks (volinfo)) { - gf_log ("", GF_LOG_ERROR, "Restart Failed: Unable to " - "start brick servers"); - return -1; - } - return 0; -} - -static int -glusterd_op_set_volume (dict_t *dict) -{ - int ret = 0; - glusterd_volinfo_t *volinfo = NULL; - char *volname = NULL; - xlator_t *this = NULL; - glusterd_conf_t *priv = NULL; - int count = 1; - int restart_flag = 0; - char *key = NULL; - char *key_fixed = NULL; - char *value = NULL; - char str[50] = {0, }; - gf_boolean_t global_opt = _gf_false; - glusterd_volinfo_t *voliter = NULL; - - this = THIS; - GF_ASSERT (this); - - priv = this->private; - GF_ASSERT (priv); - - ret = dict_get_str (dict, "volname", &volname); + for (i = 0; i <= brick_index_max; i++) { + keylen = snprintf(key, sizeof(key), "brick%d.rdma_port", i); + ret = dict_get_strn(op_ctx, key, keylen, &port); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); + old_keylen = snprintf(old_key, sizeof(old_key), "brick%d.port", i); + ret = dict_get_strn(op_ctx, old_key, old_keylen, &port); + if (ret) goto out; - } - ret = glusterd_volinfo_find (volname, &volinfo); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to allocate memory"); + ret = dict_set_strn(op_ctx, key, keylen, port); + if (ret) goto out; - } - - for ( count = 1; ret != -1 ; count++ ) { - - global_opt = _gf_false; - sprintf (str, "key%d", count); - ret = dict_get_str (dict, str, &key); - - if (ret) { - break; - } - - if (!ret) { - ret = glusterd_check_option_exists (key, &key_fixed); - GF_ASSERT (ret); - if (ret == -1) { - key_fixed = NULL; - goto out; - } - ret = 0; - } - - ret = glusterd_check_globaloption (key); - if (ret) - global_opt = _gf_true; - - sprintf (str, "value%d", count); - ret = dict_get_str (dict, str, &value); - if (ret) { - gf_log ("", GF_LOG_ERROR, - "invalid key,value pair in 'volume set'"); - ret = -1; - goto out; - } - - if (!global_opt) - value = gf_strdup (value); - - if (!value) { - gf_log ("", GF_LOG_ERROR, - "Unable to set the options in 'volume set'"); - ret = -1; - goto out; - } - - if (key_fixed) - key = key_fixed; - - if (global_opt) { - list_for_each_entry (voliter, &priv->volumes, vol_list) { - value = gf_strdup (value); - ret = dict_set_dynstr (voliter->dict, key, value); - if (ret) - goto out; - } - } - else { - ret = dict_set_dynstr (volinfo->dict, key, value); - if (ret) - goto out; - } - - if (key_fixed) { - GF_FREE (key_fixed); - - key_fixed = NULL; - } - } - - - if ( count == 1 ) { - gf_log ("", GF_LOG_ERROR, "No options received "); - ret = -1; + ret = dict_set_nstrn(op_ctx, old_key, old_keylen, "\0", SLEN("\0")); + if (ret) goto out; } + } +out: + return ret; +} - if (!global_opt) { - ret = glusterd_create_volfiles_and_notify_services (volinfo); +/* This function is used to modify the op_ctx dict before sending it back + * to cli. This is useful in situations like changing the peer uuids to + * hostnames etc. + */ +void +glusterd_op_modify_op_ctx(glusterd_op_t op, void *ctx) +{ + int ret = -1; + dict_t *op_ctx = NULL; + int brick_index_max = -1; + int other_count = 0; + int count = 0; + uint32_t cmd = GF_CLI_STATUS_NONE; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + char *port = 0; + int i = 0; + char key[64] = { + 0, + }; + int keylen; + + this = THIS; + GF_ASSERT(this); + conf = this->private; + + if (ctx) + op_ctx = ctx; + else + op_ctx = glusterd_op_get_ctx(); + + if (!op_ctx) { + gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_OPCTX_NULL, + "Operation context is not present."); + goto out; + } + + switch (op) { + case GD_OP_STATUS_VOLUME: + ret = dict_get_uint32(op_ctx, "cmd", &cmd); + if (ret) { + gf_msg_debug(this->name, 0, "Failed to get status cmd"); + goto out; + } + + if (!glusterd_is_volume_status_modify_op_ctx(cmd)) { + gf_msg_debug(this->name, 0, + "op_ctx modification not required for status " + "operation being performed"); + goto out; + } + + ret = dict_get_int32n(op_ctx, "brick-index-max", + SLEN("brick-index-max"), &brick_index_max); + if (ret) { + gf_msg_debug(this->name, 0, "Failed to get brick-index-max"); + goto out; + } + + ret = dict_get_int32n(op_ctx, "other-count", SLEN("other-count"), + &other_count); + if (ret) { + gf_msg_debug(this->name, 0, "Failed to get other-count"); + goto out; + } + + count = brick_index_max + other_count + 1; + + /* + * a glusterd lesser than version 3.7 will be sending the + * rdma port in older key. Changing that value from here + * to support backward compatibility + */ + ret = dict_get_strn(op_ctx, "volname", SLEN("volname"), &volname); + if (ret) + goto out; + + for (i = 0; i <= brick_index_max; i++) { + keylen = snprintf(key, sizeof(key), "brick%d.rdma_port", i); + ret = dict_get_strn(op_ctx, key, keylen, &port); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to create volfile for" - " 'volume set'"); - ret = -1; + ret = dict_set_nstrn(op_ctx, key, keylen, "\0", SLEN("\0")); + if (ret) goto out; } - - if (restart_flag) { - if (glusterd_restart_brick_servers (volinfo)) { - ret = -1; - goto out; - } - } - - ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); + } + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) + goto out; + if (conf->op_version < GD_OP_VERSION_3_7_0 && + volinfo->transport_type == GF_TRANSPORT_RDMA) { + ret = glusterd_op_modify_port_key(op_ctx, brick_index_max); if (ret) - goto out; - - if (GLUSTERD_STATUS_STARTED == volinfo->status) { - ret = glusterd_check_generate_start_nfs (); - if (ret) { - gf_log ("", GF_LOG_WARNING, - "Unable to restart NFS-Server"); - goto out; + goto out; + } + /* add 'brick%d.peerid' into op_ctx with value of 'brick%d.path'. + nfs/sshd like services have this additional uuid */ + { + char *uuid_str = NULL; + char *uuid = NULL; + int i; + + for (i = brick_index_max + 1; i < count; i++) { + keylen = snprintf(key, sizeof(key), "brick%d.path", i); + ret = dict_get_strn(op_ctx, key, keylen, &uuid_str); + if (!ret) { + keylen = snprintf(key, sizeof(key), "brick%d.peerid", + i); + uuid = gf_strdup(uuid_str); + if (!uuid) { + gf_msg_debug(this->name, 0, + "unable to create dup of" + " uuid_str"); + continue; } - } - - } - else { - list_for_each_entry (voliter, &priv->volumes, vol_list) { - volinfo = voliter; - ret = glusterd_create_volfiles_and_notify_services (volinfo); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to create volfile for" - " 'volume set'"); - ret = -1; - goto out; - } - - if (restart_flag) { - if (glusterd_restart_brick_servers (volinfo)) { - ret = -1; - goto out; - } - } - - ret = glusterd_store_volinfo (volinfo, - GLUSTERD_VOLINFO_VER_AC_INCREMENT); - if (ret) - goto out; - - if (GLUSTERD_STATUS_STARTED == volinfo->status) { - ret = glusterd_check_generate_start_nfs (); - if (ret) { - gf_log ("", GF_LOG_WARNING, - "Unable to restart NFS-Server"); - goto out; - } + ret = dict_set_dynstrn(op_ctx, key, keylen, uuid); + if (ret != 0) { + GF_FREE(uuid); } + } } - } - - - - ret = 0; - -out: - if (key_fixed) - GF_FREE (key_fixed); - gf_log ("", GF_LOG_DEBUG, "returning %d", ret); - return ret; -} - -static int -glusterd_op_remove_brick (dict_t *dict) -{ - int ret = -1; - char *volname = NULL; - glusterd_volinfo_t *volinfo = NULL; - char *brick = NULL; - int32_t count = 0; - int32_t i = 1; - char key[256] = {0,}; - - ret = dict_get_str (dict, "volname", &volname); - - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); - goto out; - } - - ret = glusterd_volinfo_find (volname, &volinfo); - - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to allocate memory"); - goto out; - } + } - ret = dict_get_int32 (dict, "count", &count); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get count"); - goto out; - } + ret = glusterd_op_volume_dict_uuid_to_hostname( + op_ctx, "brick%d.path", 0, count); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_CONVERSION_FAILED, + "Failed uuid to hostname conversion"); + break; - while ( i <= count) { - snprintf (key, 256, "brick%d", i); - ret = dict_get_str (dict, key, &brick); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get %s", key); - goto out; + case GD_OP_PROFILE_VOLUME: + ret = dict_get_str_boolean(op_ctx, "nfs", _gf_false); + if (!ret) + goto out; + + ret = dict_get_int32n(op_ctx, "count", SLEN("count"), &count); + if (ret) { + gf_msg_debug(this->name, 0, "Failed to get brick count"); + goto out; + } + + ret = glusterd_op_volume_dict_uuid_to_hostname(op_ctx, "%d-brick", + 1, (count + 1)); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_CONVERSION_FAILED, + "Failed uuid to hostname conversion"); + + break; + + /* For both rebalance and remove-brick status, the glusterd op is the + * same + */ + case GD_OP_DEFRAG_BRICK_VOLUME: + case GD_OP_SCRUB_STATUS: + case GD_OP_SCRUB_ONDEMAND: + ret = dict_get_int32n(op_ctx, "count", SLEN("count"), &count); + if (ret) { + gf_msg_debug(this->name, 0, "Failed to get count"); + goto out; + } + + /* add 'node-name-%d' into op_ctx with value uuid_str. + this will be used to convert to hostname later */ + { + char *uuid_str = NULL; + char *uuid = NULL; + int i; + + for (i = 1; i <= count; i++) { + keylen = snprintf(key, sizeof(key), "node-uuid-%d", i); + ret = dict_get_strn(op_ctx, key, keylen, &uuid_str); + if (!ret) { + keylen = snprintf(key, sizeof(key), "node-name-%d", i); + uuid = gf_strdup(uuid_str); + if (!uuid) { + gf_msg_debug(this->name, 0, + "unable to create dup of" + " uuid_str"); + continue; + } + ret = dict_set_dynstrn(op_ctx, key, keylen, uuid); + if (ret != 0) { + GF_FREE(uuid); + } + } } + } - ret = glusterd_op_perform_remove_brick (volinfo, brick); - if (ret) - goto out; - i++; - } - - ret = glusterd_create_volfiles_and_notify_services (volinfo); - if (ret) - goto out; + ret = glusterd_op_volume_dict_uuid_to_hostname( + op_ctx, "node-name-%d", 1, (count + 1)); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_CONVERSION_FAILED, + "Failed uuid to hostname conversion"); - volinfo->defrag_status = 0; + /* Since Both rebalance and bitrot scrub status/ondemand + * are going to use same code path till here, we should + * break in case of scrub status. + */ + if (op == GD_OP_SCRUB_STATUS || op == GD_OP_SCRUB_ONDEMAND) { + break; + } - ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); + ret = glusterd_op_check_peer_defrag_status(op_ctx, count); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_DEFRAG_STATUS_UPDATE_FAIL, + "Failed to reset defrag status for fix-layout"); + break; - if (ret) - goto out; - - if (GLUSTERD_STATUS_STARTED == volinfo->status) - ret = glusterd_check_generate_start_nfs (); + default: + ret = 0; + gf_msg_debug(this->name, 0, "op_ctx modification not required"); + break; + } out: - return ret; + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_OPCTX_UPDATE_FAIL, + "op_ctx modification failed"); + return; } +int +glusterd_op_commit_hook(glusterd_op_t op, dict_t *op_ctx, + glusterd_commit_hook_type_t type) +{ + glusterd_conf_t *priv = NULL; + char hookdir[PATH_MAX] = { + 0, + }; + char scriptdir[PATH_MAX] = { + 0, + }; + char *type_subdir = ""; + char *cmd_subdir = NULL; + int ret = -1; + int32_t len = 0; + + priv = THIS->private; + switch (type) { + case GD_COMMIT_HOOK_NONE: + case GD_COMMIT_HOOK_MAX: + /*Won't be called*/ + break; + + case GD_COMMIT_HOOK_PRE: + type_subdir = "pre"; + break; + case GD_COMMIT_HOOK_POST: + type_subdir = "post"; + break; + } + + cmd_subdir = glusterd_hooks_get_hooks_cmd_subdir(op); + if (strlen(cmd_subdir) == 0) + return -1; + + GLUSTERD_GET_HOOKS_DIR(hookdir, GLUSTERD_HOOK_VER, priv); + len = snprintf(scriptdir, sizeof(scriptdir), "%s/%s/%s", hookdir, + cmd_subdir, type_subdir); + if ((len < 0) || (len >= sizeof(scriptdir))) { + return -1; + } + + switch (type) { + case GD_COMMIT_HOOK_NONE: + case GD_COMMIT_HOOK_MAX: + /*Won't be called*/ + break; + + case GD_COMMIT_HOOK_PRE: + ret = glusterd_hooks_run_hooks(scriptdir, op, op_ctx, type); + break; + case GD_COMMIT_HOOK_POST: + ret = glusterd_hooks_post_stub_enqueue(scriptdir, op, op_ctx); + break; + } + + return ret; +} static int -glusterd_op_delete_volume (dict_t *dict) -{ - int ret = 0; - char *volname = NULL; - glusterd_conf_t *priv = NULL; - glusterd_volinfo_t *volinfo = NULL; - xlator_t *this = NULL; - - this = THIS; - GF_ASSERT (this); - priv = this->private; - GF_ASSERT (priv); - - ret = dict_get_str (dict, "volname", &volname); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); - goto out; +glusterd_op_ac_send_commit_op(glusterd_op_sm_event_t *event, void *ctx) +{ + int ret = 0; + int ret1 = 0; + rpc_clnt_procedure_t *proc = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + dict_t *dict = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + char *op_errstr = NULL; + glusterd_op_t op = GD_OP_NONE; + uint32_t pending_count = 0; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + op = glusterd_op_get_op(); + + ret = glusterd_op_build_payload(&dict, &op_errstr, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_OP_PAYLOAD_BUILD_FAIL, + LOGSTR_BUILD_PAYLOAD, gd_op_list[op]); + if (op_errstr == NULL) + gf_asprintf(&op_errstr, OPERRSTR_BUILD_PAYLOAD); + opinfo.op_errstr = op_errstr; + goto out; + } + + ret = glusterd_op_commit_perform(op, dict, &op_errstr, + NULL); // rsp_dict invalid for source + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMIT_OP_FAIL, + LOGSTR_COMMIT_FAIL, gd_op_list[op], "localhost", + (op_errstr) ? ":" : " ", (op_errstr) ? op_errstr : " "); + if (op_errstr == NULL) + gf_asprintf(&op_errstr, OPERRSTR_COMMIT_FAIL, "localhost"); + opinfo.op_errstr = op_errstr; + goto out; + } + + RCU_READ_LOCK; + cds_list_for_each_entry_rcu(peerinfo, &priv->peers, uuid_list) + { + /* Only send requests to peers who were available before the + * transaction started + */ + if (peerinfo->generation > opinfo.txn_generation) + continue; + + if (!peerinfo->connected || !peerinfo->mgmt) + continue; + if ((peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) && + (glusterd_op_get_op() != GD_OP_SYNC_VOLUME)) + continue; + + proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_COMMIT_OP]; + GF_ASSERT(proc); + if (proc->fn) { + ret = dict_set_static_ptr(dict, "peerinfo", peerinfo); + if (ret) { + RCU_READ_UNLOCK; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "failed to set peerinfo"); + goto out; + } + ret = proc->fn(NULL, this, dict); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, + GD_MSG_COMMIT_REQ_SEND_FAIL, + "Failed to " + "send commit request for operation " + "'Volume %s' to peer %s", + gd_op_list[op], peerinfo->hostname); + continue; + } + pending_count++; + } + } + RCU_READ_UNLOCK; + + opinfo.pending_count = pending_count; + gf_msg_debug(this->name, 0, + "Sent commit op req for 'Volume %s' " + "to %d peers", + gd_op_list[op], opinfo.pending_count); +out: + if (dict) + dict_unref(dict); + + if (ret) + opinfo.op_ret = ret; + + ret1 = glusterd_set_txn_opinfo(&event->txn_id, &opinfo); + if (ret1) + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL, + "Unable to set " + "transaction's opinfo"); + + if (ret) { + glusterd_op_sm_inject_event(GD_OP_EVENT_RCVD_RJT, &event->txn_id, NULL); + opinfo.op_ret = ret; + } + + if (!opinfo.pending_count) { + if (op == GD_OP_REPLACE_BRICK) { + ret = glusterd_op_sm_inject_all_acc(&event->txn_id); + } else { + glusterd_op_modify_op_ctx(op, NULL); + ret = glusterd_op_sm_inject_all_acc(&event->txn_id); } + goto err; + } - ret = glusterd_volinfo_find (volname, &volinfo); - - if (ret) - goto out; +err: + gf_msg_debug(this->name, 0, "Returning with %d", ret); - ret = glusterd_delete_volume (volinfo); -out: - gf_log ("", GF_LOG_DEBUG, "returning %d", ret); - return ret; + return ret; } static int -glusterd_op_start_volume (dict_t *dict, char **op_errstr) +glusterd_op_ac_rcvd_stage_op_acc(glusterd_op_sm_event_t *event, void *ctx) { - int ret = 0; - char *volname = NULL; - int flags = 0; - glusterd_volinfo_t *volinfo = NULL; - glusterd_brickinfo_t *brickinfo = NULL; + int ret = 0; - ret = glusterd_op_start_volume_args_get (dict, &volname, &flags); - if (ret) - goto out; + GF_ASSERT(event); - ret = glusterd_volinfo_find (volname, &volinfo); + if (opinfo.pending_count > 0) + opinfo.pending_count--; - if (ret) - goto out; - list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { - ret = glusterd_brick_start (volinfo, brickinfo); - if (ret) - goto out; - } - - glusterd_set_volume_status (volinfo, GLUSTERD_STATUS_STARTED); + ret = glusterd_set_txn_opinfo(&event->txn_id, &opinfo); + if (ret) + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL, + "Unable to set " + "transaction's opinfo"); - ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); - if (ret) - goto out; + if (opinfo.pending_count > 0) + goto out; - ret = glusterd_check_generate_start_nfs (); + ret = glusterd_op_sm_inject_event(GD_OP_EVENT_STAGE_ACC, &event->txn_id, + NULL); out: - gf_log ("", GF_LOG_DEBUG, "returning %d ", ret); - return ret; + gf_msg_debug(THIS->name, 0, "Returning %d", ret); + + return ret; } static int -glusterd_op_log_filename (dict_t *dict) +glusterd_op_ac_stage_op_failed(glusterd_op_sm_event_t *event, void *ctx) { - int ret = -1; - glusterd_conf_t *priv = NULL; - glusterd_volinfo_t *volinfo = NULL; - glusterd_brickinfo_t *brickinfo = NULL; - xlator_t *this = NULL; - char *volname = NULL; - char *brick = NULL; - char *path = NULL; - char logfile[PATH_MAX] = {0,}; - char exp_path[PATH_MAX] = {0,}; - struct stat stbuf = {0,}; - int valid_brick = 0; - glusterd_brickinfo_t *tmpbrkinfo = NULL; - char* new_logdir = NULL; - - this = THIS; - GF_ASSERT (this); - priv = this->private; - GF_ASSERT (priv); - - ret = dict_get_str (dict, "volname", &volname); - if (ret) { - gf_log ("", GF_LOG_ERROR, "volname not found"); - goto out; - } - - ret = dict_get_str (dict, "path", &path); - if (ret) { - gf_log ("", GF_LOG_ERROR, "path not found"); - goto out; - } - - ret = dict_get_str (dict, "brick", &brick); - if (ret) - goto out; + int ret = 0; - ret = glusterd_volinfo_find (volname, &volinfo); - if (ret) - goto out; + GF_ASSERT(event); - if (!strchr (brick, ':')) { - brick = NULL; - ret = stat (path, &stbuf); - if (ret || !S_ISDIR (stbuf.st_mode)) { - ret = -1; - gf_log ("", GF_LOG_ERROR, "not a directory"); - goto out; - } - new_logdir = gf_strdup (path); - if (!new_logdir) { - ret = -1; - gf_log ("", GF_LOG_ERROR, "Out of memory"); - goto out; - } - if (volinfo->logdir) - GF_FREE (volinfo->logdir); - volinfo->logdir = new_logdir; - } else { - ret = glusterd_brickinfo_from_brick (brick, &tmpbrkinfo); - if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, - "cannot get brickinfo from brick"); - goto out; - } - } + if (opinfo.pending_count > 0) + opinfo.pending_count--; + ret = glusterd_set_txn_opinfo(&event->txn_id, &opinfo); + if (ret) + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL, + "Unable to set " + "transaction's opinfo"); - ret = -1; - list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + if (opinfo.pending_count > 0) + goto out; - if (uuid_is_null (brickinfo->uuid)) { - ret = glusterd_resolve_brick (brickinfo); - if (ret) - goto out; - } + ret = glusterd_op_sm_inject_event(GD_OP_EVENT_ALL_ACK, &event->txn_id, + NULL); - /* check if the brickinfo belongs to the 'this' machine */ - if (uuid_compare (brickinfo->uuid, priv->uuid)) - continue; +out: + gf_msg_debug(THIS->name, 0, "Returning %d", ret); - if (brick && strcmp (tmpbrkinfo->path,brickinfo->path)) - continue; + return ret; +} - valid_brick = 1; +static int +glusterd_op_ac_commit_op_failed(glusterd_op_sm_event_t *event, void *ctx) +{ + int ret = 0; - /* If there are more than one brick in 'this' server, its an - * extra check, but it doesn't harm functionality - */ - ret = stat (path, &stbuf); - if (ret || !S_ISDIR (stbuf.st_mode)) { - ret = -1; - gf_log ("", GF_LOG_ERROR, "not a directory"); - goto out; - } + GF_ASSERT(event); - GLUSTERD_REMOVE_SLASH_FROM_PATH (brickinfo->path, exp_path); + if (opinfo.pending_count > 0) + opinfo.pending_count--; - snprintf (logfile, PATH_MAX, "%s/%s.log", path, exp_path); + ret = glusterd_set_txn_opinfo(&event->txn_id, &opinfo); + if (ret) + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL, + "Unable to set " + "transaction's opinfo"); - if (brickinfo->logfile) - GF_FREE (brickinfo->logfile); - brickinfo->logfile = gf_strdup (logfile); - ret = 0; + if (opinfo.pending_count > 0) + goto out; - /* If request was for brick, only one iteration is enough */ - if (brick) - break; - } + ret = glusterd_op_sm_inject_event(GD_OP_EVENT_ALL_ACK, &event->txn_id, + NULL); - if (ret && !valid_brick) - ret = 0; out: - if (tmpbrkinfo) - glusterd_brickinfo_delete (tmpbrkinfo); + gf_msg_debug(THIS->name, 0, "Returning %d", ret); - return ret; + return ret; } static int -glusterd_op_log_rotate (dict_t *dict) +glusterd_op_ac_brick_op_failed(glusterd_op_sm_event_t *event, void *ctx) { - int ret = -1; - glusterd_conf_t *priv = NULL; - glusterd_volinfo_t *volinfo = NULL; - glusterd_brickinfo_t *brickinfo = NULL; - xlator_t *this = NULL; - char *volname = NULL; - char *brick = NULL; - char path[PATH_MAX] = {0,}; - char logfile[PATH_MAX] = {0,}; - char pidfile[PATH_MAX] = {0,}; - FILE *file = NULL; - pid_t pid = 0; - uint64_t key = 0; - int valid_brick = 0; - glusterd_brickinfo_t *tmpbrkinfo = NULL; - - this = THIS; - GF_ASSERT (this); - priv = this->private; - GF_ASSERT (priv); - - ret = dict_get_str (dict, "volname", &volname); - if (ret) { - gf_log ("", GF_LOG_ERROR, "volname not found"); - goto out; - } - - ret = dict_get_uint64 (dict, "rotate-key", &key); - if (ret) { - gf_log ("", GF_LOG_ERROR, "rotate key not found"); - goto out; - } + int ret = 0; + glusterd_op_brick_rsp_ctx_t *ev_ctx = NULL; + gf_boolean_t free_errstr = _gf_false; + xlator_t *this = NULL; - ret = dict_get_str (dict, "brick", &brick); - if (ret) - goto out; + this = THIS; + GF_ASSERT(this); - if (!strchr (brick, ':')) - brick = NULL; - else { - ret = glusterd_brickinfo_from_brick (brick, &tmpbrkinfo); - if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, - "cannot get brickinfo from brick"); - goto out; - } - } - - ret = glusterd_volinfo_find (volname, &volinfo); - if (ret) - goto out; + GF_ASSERT(event); + GF_ASSERT(ctx); + ev_ctx = ctx; + ret = glusterd_remove_pending_entry(&opinfo.pending_bricks, + ev_ctx->pending_node->node); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UNKNOWN_RESPONSE, + "unknown response received "); ret = -1; - list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { - if (uuid_compare (brickinfo->uuid, priv->uuid)) - continue; - - if (brick && - (strcmp (tmpbrkinfo->hostname, brickinfo->hostname) || - strcmp (tmpbrkinfo->path,brickinfo->path))) - continue; - - valid_brick = 1; - - GLUSTERD_GET_VOLUME_DIR (path, volinfo, priv); - GLUSTERD_GET_BRICK_PIDFILE (pidfile, path, brickinfo->hostname, - brickinfo->path); - - file = fopen (pidfile, "r+"); - if (!file) { - gf_log ("", GF_LOG_ERROR, "Unable to open pidfile: %s", - pidfile); - ret = -1; - goto out; - } - - ret = fscanf (file, "%d", &pid); - if (ret <= 0) { - gf_log ("", GF_LOG_ERROR, "Unable to read pidfile: %s", - pidfile); - ret = -1; - goto out; - } - fclose (file); - file = NULL; - - snprintf (logfile, PATH_MAX, "%s.%"PRIu64, - brickinfo->logfile, key); - - ret = rename (brickinfo->logfile, logfile); - if (ret) - gf_log ("", GF_LOG_WARNING, "rename failed"); - - ret = kill (pid, SIGHUP); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to SIGHUP to %d", pid); - goto out; - } - ret = 0; - - /* If request was for brick, only one iteration is enough */ - if (brick) - break; - } - - if (ret && !valid_brick) - ret = 0; + free_errstr = _gf_true; + goto out; + } + if (opinfo.brick_pending_count > 0) + opinfo.brick_pending_count--; + if (opinfo.op_ret == 0) + opinfo.op_ret = ev_ctx->op_ret; + + if (opinfo.op_errstr == NULL) + opinfo.op_errstr = ev_ctx->op_errstr; + else + free_errstr = _gf_true; + + ret = glusterd_set_txn_opinfo(&event->txn_id, &opinfo); + if (ret) + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL, + "Unable to set " + "transaction's opinfo"); + + if (opinfo.brick_pending_count > 0) + goto out; + + ret = glusterd_op_sm_inject_event(GD_OP_EVENT_ALL_ACK, &event->txn_id, + ev_ctx->commit_ctx); out: - if (tmpbrkinfo) - glusterd_brickinfo_delete (tmpbrkinfo); + if (ev_ctx->rsp_dict) + dict_unref(ev_ctx->rsp_dict); + if (free_errstr && ev_ctx->op_errstr) + GF_FREE(ev_ctx->op_errstr); + GF_FREE(ctx); + gf_msg_debug(this->name, 0, "Returning %d", ret); - return ret; + return ret; } - static int -glusterd_op_stop_volume (dict_t *dict) +glusterd_op_ac_rcvd_commit_op_acc(glusterd_op_sm_event_t *event, void *ctx) { - int ret = 0; - int flags = 0; - char *volname = NULL; - glusterd_volinfo_t *volinfo = NULL; - glusterd_brickinfo_t *brickinfo = NULL; + int ret = 0; + gf_boolean_t commit_ack_inject = _gf_true; + glusterd_op_t op = GD_OP_NONE; + xlator_t *this = NULL; - ret = glusterd_op_stop_volume_args_get (dict, &volname, &flags); - if (ret) - goto out; + this = THIS; + GF_ASSERT(this); + op = glusterd_op_get_op(); + GF_ASSERT(event); - ret = glusterd_volinfo_find (volname, &volinfo); + if (opinfo.pending_count > 0) + opinfo.pending_count--; - if (ret) - goto out; + ret = glusterd_set_txn_opinfo(&event->txn_id, &opinfo); + if (ret) + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL, + "Unable to set " + "transaction's opinfo"); - list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { - ret = glusterd_brick_stop (volinfo, brickinfo); - if (ret) - goto out; + if (opinfo.pending_count > 0) + goto out; + + if (op == GD_OP_REPLACE_BRICK) { + ret = glusterd_op_sm_inject_all_acc(&event->txn_id); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RBOP_START_FAIL, + "Couldn't start " + "replace-brick operation."); + goto out; } - glusterd_set_volume_status (volinfo, GLUSTERD_STATUS_STOPPED); + commit_ack_inject = _gf_false; + goto out; + } - ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); +out: + if (commit_ack_inject) { if (ret) - goto out; - - if (glusterd_are_all_volumes_stopped ()) { - if (glusterd_is_nfs_started ()) { - ret = glusterd_nfs_server_stop (); - if (ret) - goto out; - } - } else { - ret = glusterd_check_generate_start_nfs (); + ret = glusterd_op_sm_inject_event(GD_OP_EVENT_RCVD_RJT, + &event->txn_id, NULL); + else if (!opinfo.pending_count) { + glusterd_op_modify_op_ctx(op, NULL); + ret = glusterd_op_sm_inject_event(GD_OP_EVENT_COMMIT_ACC, + &event->txn_id, NULL); } + /*else do nothing*/ + } -out: - return ret; + return ret; } static int -glusterd_op_sync_volume (dict_t *dict, char **op_errstr, - dict_t *rsp_dict) +glusterd_op_ac_rcvd_unlock_acc(glusterd_op_sm_event_t *event, void *ctx) { - int ret = -1; - char *volname = NULL; - char *hostname = NULL; - char msg[2048] = {0,}; - int count = 1; - int vol_count = 0; - glusterd_conf_t *priv = NULL; - glusterd_volinfo_t *volinfo = NULL; - xlator_t *this = NULL; - - this = THIS; - GF_ASSERT (this); - priv = this->private; - GF_ASSERT (priv); - - ret = dict_get_str (dict, "hostname", &hostname); - if (ret) { - snprintf (msg, sizeof (msg), "hostname couldn't be " - "retrieved from msg"); - *op_errstr = gf_strdup (msg); - goto out; - } + int ret = 0; - if (glusterd_is_local_addr (hostname)) { - ret = 0; - goto out; - } + GF_ASSERT(event); - //volname is not present in case of sync all - ret = dict_get_str (dict, "volname", &volname); - if (!ret) { - ret = glusterd_volinfo_find (volname, &volinfo); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Volume with name: %s " - "not exists", volname); - goto out; - } - } + if (opinfo.pending_count > 0) + opinfo.pending_count--; - if (!rsp_dict) { - //this should happen only on source - ret = 0; - goto out; - } + ret = glusterd_set_txn_opinfo(&event->txn_id, &opinfo); + if (ret) + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL, + "Unable to set " + "transaction's opinfo"); - if (volname) { - ret = glusterd_add_volume_to_dict (volinfo, rsp_dict, - 1); - vol_count = 1; - } else { - list_for_each_entry (volinfo, &priv->volumes, vol_list) { - ret = glusterd_add_volume_to_dict (volinfo, - rsp_dict, count); - if (ret) - goto out; + if (opinfo.pending_count > 0) + goto out; - vol_count = count++; - } - } - ret = dict_set_int32 (rsp_dict, "count", vol_count); + ret = glusterd_op_sm_inject_event(GD_OP_EVENT_ALL_ACC, &event->txn_id, + NULL); -out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + gf_msg_debug(THIS->name, 0, "Returning %d", ret); - return ret; +out: + return ret; } -static int -glusterd_add_profile_volume_options (glusterd_volinfo_t *volinfo) +int32_t +glusterd_op_clear_errstr() { - int ret = -1; - char *latency_key = NULL; - char *fd_stats_key = NULL; - - GF_ASSERT (volinfo); - - latency_key = "diagnostics.latency-measurement"; - fd_stats_key = "diagnostics.count-fop-hits"; - - ret = dict_set_str (volinfo->dict, latency_key, "on"); - if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, "failed to set the volume %s " - "option %s value %s", - volinfo->volname, latency_key, "on"); - goto out; - } - - ret = dict_set_str (volinfo->dict, fd_stats_key, "on"); - if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, "failed to set the volume %s " - "option %s value %s", - volinfo->volname, fd_stats_key, "on"); - goto out; - } -out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + opinfo.op_errstr = NULL; + return 0; } -static void -glusterd_remove_profile_volume_options (glusterd_volinfo_t *volinfo) +int32_t +glusterd_op_set_ctx(void *ctx) { - char *latency_key = NULL; - char *fd_stats_key = NULL; - - GF_ASSERT (volinfo); + opinfo.op_ctx = ctx; - latency_key = "diagnostics.latency-measurement"; - fd_stats_key = "diagnostics.count-fop-hits"; - dict_del (volinfo->dict, latency_key); - dict_del (volinfo->dict, fd_stats_key); + return 0; } -static int -glusterd_op_stats_volume (dict_t *dict, char **op_errstr, - dict_t *rsp_dict) +int32_t +glusterd_op_reset_ctx() { - int ret = -1; - char *volname = NULL; - char msg[2048] = {0,}; - glusterd_volinfo_t *volinfo = NULL; - int32_t stats_op = GF_CLI_STATS_NONE; - - ret = dict_get_str (dict, "volname", &volname); - if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, "volume name get failed"); - goto out; - } + glusterd_op_set_ctx(NULL); - ret = glusterd_volinfo_find (volname, &volinfo); - if (ret) { - snprintf (msg, sizeof (msg), "Volume %s does not exists", - volname); - - gf_log ("", GF_LOG_ERROR, "%s", msg); - goto out; - } - - ret = dict_get_int32 (dict, "op", &stats_op); - if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, "volume profile op get failed"); - goto out; - } - - switch (stats_op) { - case GF_CLI_STATS_START: - ret = glusterd_add_profile_volume_options (volinfo); - if (ret) - goto out; - break; - case GF_CLI_STATS_STOP: - glusterd_remove_profile_volume_options (volinfo); - break; - case GF_CLI_STATS_INFO: - case GF_CLI_STATS_TOP: - //info is already collected in brick op. - //just goto out; - ret = 0; - goto out; - break; - default: - GF_ASSERT (0); - gf_log ("glusterd", GF_LOG_ERROR, "Invalid profile op: %d", - stats_op); - ret = -1; - goto out; - break; - } - ret = glusterd_create_volfiles_and_notify_services (volinfo); - - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to create volfile for" - " 'volume set'"); - ret = -1; - goto out; - } + return 0; +} - ret = glusterd_store_volinfo (volinfo, - GLUSTERD_VOLINFO_VER_AC_INCREMENT); +int32_t +glusterd_op_txn_complete(uuid_t *txn_id) +{ + int32_t ret = -1; + glusterd_conf_t *priv = NULL; + int32_t op = -1; + int32_t op_ret = 0; + int32_t op_errno = 0; + rpcsvc_request_t *req = NULL; + void *ctx = NULL; + char *op_errstr = NULL; + char *volname = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + GF_ASSERT(priv); + + op = glusterd_op_get_op(); + ctx = glusterd_op_get_ctx(); + op_ret = opinfo.op_ret; + op_errno = opinfo.op_errno; + req = opinfo.req; + if (opinfo.op_errstr) + op_errstr = opinfo.op_errstr; + + opinfo.op_ret = 0; + opinfo.op_errno = 0; + glusterd_op_clear_op(); + glusterd_op_reset_ctx(); + glusterd_op_clear_errstr(); + + /* Based on the op-version, we release the cluster or mgmt_v3 lock */ + if (priv->op_version < GD_OP_VERSION_3_6_0) { + ret = glusterd_unlock(MY_UUID); + /* unlock can't/shouldn't fail here!! */ if (ret) - goto out; - - if (GLUSTERD_STATUS_STARTED == volinfo->status) - ret = glusterd_check_generate_start_nfs (); + gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_GLUSTERD_UNLOCK_FAIL, + "Unable to clear local lock, ret: %d", ret); + else + gf_msg_debug(this->name, 0, "Cleared local lock"); + } else { + ret = dict_get_strn(ctx, "volname", SLEN("volname"), &volname); + if (ret) + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_DICT_GET_FAILED, + "No Volume name present. " + "Locks have not been held."); + if (volname) { + ret = glusterd_mgmt_v3_unlock(volname, MY_UUID, "vol"); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_UNLOCK_FAIL, + "Unable to release lock for %s", volname); + } + } + + ret = glusterd_op_send_cli_response(op, op_ret, op_errno, req, ctx, + op_errstr); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_CLI_RESP, + "Responding to cli failed, " + "ret: %d", + ret); + // Ignore this error, else state machine blocks ret = 0; + } -out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + if (op_errstr && (strcmp(op_errstr, ""))) + GF_FREE(op_errstr); - return ret; -} + if (priv->pending_quorum_action) + glusterd_do_quorum_action(); -static int -glusterd_op_ac_none (glusterd_op_sm_event_t *event, void *ctx) -{ - int ret = 0; + /* Clearing the transaction opinfo */ + ret = glusterd_clear_txn_opinfo(txn_id); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_CLEAR_FAIL, + "Unable to clear transaction's opinfo"); - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); - - return ret; + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; } static int -glusterd_op_ac_send_lock (glusterd_op_sm_event_t *event, void *ctx) +glusterd_op_ac_unlocked_all(glusterd_op_sm_event_t *event, void *ctx) { - int ret = 0; - rpc_clnt_procedure_t *proc = NULL; - glusterd_conf_t *priv = NULL; - xlator_t *this = NULL; - glusterd_peerinfo_t *peerinfo = NULL; - uint32_t pending_count = 0; + int ret = 0; - this = THIS; - priv = this->private; - GF_ASSERT (priv); + GF_ASSERT(event); - list_for_each_entry (peerinfo, &priv->peers, uuid_list) { - GF_ASSERT (peerinfo); + ret = glusterd_op_txn_complete(&event->txn_id); - if (!peerinfo->connected) - continue; - if ((peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) && - (glusterd_op_get_op() != GD_OP_SYNC_VOLUME)) - continue; + gf_msg_debug(THIS->name, 0, "Returning %d", ret); - proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_CLUSTER_LOCK]; - if (proc->fn) { - ret = proc->fn (NULL, this, peerinfo); - if (ret) - continue; - pending_count++; - } - } - - opinfo.pending_count = pending_count; - if (!opinfo.pending_count) - ret = glusterd_op_sm_inject_all_acc (); - - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); - - return ret; + return ret; } static int -glusterd_op_ac_send_unlock (glusterd_op_sm_event_t *event, void *ctx) -{ - int ret = 0; - rpc_clnt_procedure_t *proc = NULL; - glusterd_conf_t *priv = NULL; - xlator_t *this = NULL; - glusterd_peerinfo_t *peerinfo = NULL; - uint32_t pending_count = 0; - - this = THIS; - priv = this->private; - GF_ASSERT (priv); +glusterd_op_ac_stage_op(glusterd_op_sm_event_t *event, void *ctx) +{ + int ret = -1; + glusterd_req_ctx_t *req_ctx = NULL; + int32_t status = 0; + dict_t *rsp_dict = NULL; + char *op_errstr = NULL; + dict_t *dict = NULL; + xlator_t *this = NULL; + uuid_t *txn_id = NULL; + glusterd_op_info_t txn_op_info = { + {0}, + }; + glusterd_conf_t *priv = NULL; + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + GF_ASSERT(priv); + + GF_ASSERT(ctx); + + req_ctx = ctx; + + dict = req_ctx->dict; + + rsp_dict = dict_new(); + if (!rsp_dict) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_DICT_CREATE_FAIL, + "Failed to get new dictionary"); + return -1; + } + + status = glusterd_op_stage_validate(req_ctx->op, dict, &op_errstr, + rsp_dict); + + if (status) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VALIDATE_FAILED, + "Stage failed on operation" + " 'Volume %s', Status : %d", + gd_op_list[req_ctx->op], status); + } + + txn_id = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); + + if (txn_id) + gf_uuid_copy(*txn_id, event->txn_id); + else { + ret = -1; + goto out; + } + ret = glusterd_get_txn_opinfo(&event->txn_id, &txn_op_info); + + ret = dict_set_bin(rsp_dict, "transaction_id", txn_id, sizeof(*txn_id)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set transaction id."); + GF_FREE(txn_id); + txn_id = NULL; + goto out; + } + + ret = glusterd_op_stage_send_resp(req_ctx->req, req_ctx->op, status, + op_errstr, rsp_dict); - /*ret = glusterd_unlock (priv->uuid); +out: + if (op_errstr && (strcmp(op_errstr, ""))) + GF_FREE(op_errstr); - if (ret) - goto out; - */ + gf_msg_debug(this->name, 0, "Returning with %d", ret); - list_for_each_entry (peerinfo, &priv->peers, uuid_list) { - GF_ASSERT (peerinfo); + /* for no volname transactions, the txn_opinfo needs to be cleaned up + * as there's no unlock event triggered. However if the originator node of + * this transaction is still running with a version lower than 60000, + * txn_opinfo can't be cleared as that'll lead to a race of referring op_ctx + * after it's being freed. + */ + if (txn_op_info.skip_locking && priv->op_version >= GD_OP_VERSION_6_0 && + txn_id) + ret = glusterd_clear_txn_opinfo(txn_id); - if (!peerinfo->connected) - continue; - if ((peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) && - (glusterd_op_get_op() != GD_OP_SYNC_VOLUME)) - continue; + if (rsp_dict) + dict_unref(rsp_dict); - proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_CLUSTER_UNLOCK]; - if (proc->fn) { - ret = proc->fn (NULL, this, peerinfo); - if (ret) - continue; - pending_count++; - } - } + return ret; +} - opinfo.pending_count = pending_count; - if (!opinfo.pending_count) - ret = glusterd_op_sm_inject_all_acc (); +static gf_boolean_t +glusterd_need_brick_op(glusterd_op_t op) +{ + gf_boolean_t ret = _gf_false; - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + GF_ASSERT(GD_OP_NONE < op && op < GD_OP_MAX); - return ret; + switch (op) { + case GD_OP_PROFILE_VOLUME: + case GD_OP_STATUS_VOLUME: + case GD_OP_DEFRAG_BRICK_VOLUME: + case GD_OP_HEAL_VOLUME: + case GD_OP_SCRUB_STATUS: + case GD_OP_SCRUB_ONDEMAND: + ret = _gf_true; + break; + default: + ret = _gf_false; + } + return ret; } -static int -glusterd_op_ac_lock (glusterd_op_sm_event_t *event, void *ctx) +dict_t * +glusterd_op_init_commit_rsp_dict(glusterd_op_t op) { - int ret = 0; - glusterd_op_lock_ctx_t *lock_ctx = NULL; - int32_t status = 0; + dict_t *rsp_dict = NULL; + dict_t *op_ctx = NULL; + GF_ASSERT(GD_OP_NONE < op && op < GD_OP_MAX); - GF_ASSERT (event); - GF_ASSERT (ctx); + if (glusterd_need_brick_op(op)) { + op_ctx = glusterd_op_get_ctx(); + GF_ASSERT(op_ctx); + rsp_dict = dict_ref(op_ctx); + } else { + rsp_dict = dict_new(); + } - lock_ctx = (glusterd_op_lock_ctx_t *)ctx; - - status = glusterd_lock (lock_ctx->uuid); + return rsp_dict; +} - gf_log ("", GF_LOG_DEBUG, "Lock Returned %d", status); +static int +glusterd_op_ac_commit_op(glusterd_op_sm_event_t *event, void *ctx) +{ + int ret = 0; + glusterd_req_ctx_t *req_ctx = NULL; + int32_t status = 0; + char *op_errstr = NULL; + dict_t *dict = NULL; + dict_t *rsp_dict = NULL; + xlator_t *this = NULL; + uuid_t *txn_id = NULL; + glusterd_op_info_t txn_op_info = { + {0}, + }; + gf_boolean_t need_cleanup = _gf_true; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(ctx); + + req_ctx = ctx; + + dict = req_ctx->dict; + + rsp_dict = glusterd_op_init_commit_rsp_dict(req_ctx->op); + if (NULL == rsp_dict) + return -1; + + if (GD_OP_CLEARLOCKS_VOLUME == req_ctx->op) { + /*clear locks should be run only on + * originator glusterd*/ + status = 0; + + } else { + status = glusterd_op_commit_perform(req_ctx->op, dict, &op_errstr, + rsp_dict); + } + + if (status) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMIT_OP_FAIL, + "Commit of operation " + "'Volume %s' failed: %d", + gd_op_list[req_ctx->op], status); + + txn_id = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); + + if (txn_id) + gf_uuid_copy(*txn_id, event->txn_id); + else { + ret = -1; + goto out; + } + ret = glusterd_get_txn_opinfo(&event->txn_id, &txn_op_info); + if (ret) { + gf_msg_callingfn(this->name, GF_LOG_ERROR, 0, + GD_MSG_TRANS_OPINFO_GET_FAIL, + "Unable to get transaction opinfo " + "for transaction ID : %s", + uuid_utoa(event->txn_id)); + goto out; + } + + ret = dict_set_bin(rsp_dict, "transaction_id", txn_id, sizeof(*txn_id)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set transaction id."); + if (txn_op_info.skip_locking) + ret = glusterd_clear_txn_opinfo(txn_id); + need_cleanup = _gf_false; + GF_FREE(txn_id); + goto out; + } + + ret = glusterd_op_commit_send_resp(req_ctx->req, req_ctx->op, status, + op_errstr, rsp_dict); - ret = glusterd_op_lock_send_resp (lock_ctx->req, status); +out: + if (op_errstr && (strcmp(op_errstr, ""))) + GF_FREE(op_errstr); - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + if (rsp_dict) + dict_unref(rsp_dict); + /* for no volname transactions, the txn_opinfo needs to be cleaned up + * as there's no unlock event triggered + */ + if (need_cleanup && txn_id && txn_op_info.skip_locking) + ret = glusterd_clear_txn_opinfo(txn_id); + gf_msg_debug(this->name, 0, "Returning with %d", ret); - return ret; + return ret; } static int -glusterd_op_ac_unlock (glusterd_op_sm_event_t *event, void *ctx) +glusterd_op_ac_send_commit_failed(glusterd_op_sm_event_t *event, void *ctx) { - int ret = 0; - glusterd_op_lock_ctx_t *lock_ctx = NULL; + int ret = 0; + glusterd_req_ctx_t *req_ctx = NULL; + dict_t *op_ctx = NULL; - GF_ASSERT (event); - GF_ASSERT (ctx); + GF_ASSERT(ctx); - lock_ctx = (glusterd_op_lock_ctx_t *)ctx; + req_ctx = ctx; - ret = glusterd_unlock (lock_ctx->uuid); + op_ctx = glusterd_op_get_ctx(); - gf_log ("", GF_LOG_DEBUG, "Unlock Returned %d", ret); + ret = glusterd_op_commit_send_resp(req_ctx->req, req_ctx->op, opinfo.op_ret, + opinfo.op_errstr, op_ctx); - ret = glusterd_op_unlock_send_resp (lock_ctx->req, ret); + if (opinfo.op_errstr && (strcmp(opinfo.op_errstr, ""))) { + GF_FREE(opinfo.op_errstr); + opinfo.op_errstr = NULL; + } - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + ret = glusterd_set_txn_opinfo(&event->txn_id, &opinfo); + if (ret) + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL, + "Unable to set " + "transaction's opinfo"); - return ret; + gf_msg_debug(THIS->name, 0, "Returning with %d", ret); + return ret; } static int -glusterd_op_ac_rcvd_lock_acc (glusterd_op_sm_event_t *event, void *ctx) +glusterd_op_sm_transition_state(glusterd_op_info_t *opinfo, + glusterd_op_sm_t *state, + glusterd_op_sm_event_type_t event_type) { - int ret = 0; - - GF_ASSERT (event); + glusterd_conf_t *conf = NULL; - if (opinfo.pending_count > 0) - opinfo.pending_count--; - - if (opinfo.pending_count > 0) - goto out; + GF_ASSERT(state); + GF_ASSERT(opinfo); - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACC, NULL); + conf = THIS->private; + GF_ASSERT(conf); - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + (void)glusterd_sm_tr_log_transition_add( + &conf->op_sm_log, opinfo->state.state, state[event_type].next_state, + event_type); -out: - return ret; + opinfo->state.state = state[event_type].next_state; + return 0; } -int -glusterd_op_build_payload (glusterd_op_t op, dict_t **req) +int32_t +glusterd_op_stage_validate(glusterd_op_t op, dict_t *dict, char **op_errstr, + dict_t *rsp_dict) { - int ret = -1; - void *ctx = NULL; - dict_t *req_dict = NULL; - - GF_ASSERT (op < GD_OP_MAX); - GF_ASSERT (op > GD_OP_NONE); - GF_ASSERT (req); - - req_dict = dict_new (); - if (!req_dict) - goto out; - - ctx = (void*)glusterd_op_get_ctx (op); - if (!ctx) { - gf_log ("", GF_LOG_ERROR, "Null Context for " - "op %d", op); - ret = -1; - goto out; - } - - switch (op) { - case GD_OP_CREATE_VOLUME: - { - dict_t *dict = ctx; - ++glusterfs_port; - ret = dict_set_int32 (dict, "port", glusterfs_port); - if (ret) - goto out; - dict_copy (dict, req_dict); - } - break; - - case GD_OP_DELETE_VOLUME: - { - glusterd_op_delete_volume_ctx_t *ctx1 = ctx; - ret = dict_set_str (req_dict, "volname", - ctx1->volume_name); - if (ret) - goto out; - } - break; - - case GD_OP_START_VOLUME: - case GD_OP_STOP_VOLUME: - case GD_OP_ADD_BRICK: - case GD_OP_REPLACE_BRICK: - case GD_OP_SET_VOLUME: - case GD_OP_RESET_VOLUME: - case GD_OP_REMOVE_BRICK: - case GD_OP_LOG_FILENAME: - case GD_OP_LOG_ROTATE: - case GD_OP_SYNC_VOLUME: - case GD_OP_QUOTA: - case GD_OP_GSYNC_SET: - case GD_OP_PROFILE_VOLUME: - { - dict_t *dict = ctx; - dict_copy (dict, req_dict); - } - break; - - default: - break; - } - - *req = req_dict; - ret = 0; + int ret = -1; + xlator_t *this = THIS; -out: - return ret; -} + switch (op) { + case GD_OP_CREATE_VOLUME: + ret = glusterd_op_stage_create_volume(dict, op_errstr, rsp_dict); + break; -static int -glusterd_op_ac_send_stage_op (glusterd_op_sm_event_t *event, void *ctx) -{ - int ret = 0; - rpc_clnt_procedure_t *proc = NULL; - glusterd_conf_t *priv = NULL; - xlator_t *this = NULL; - glusterd_peerinfo_t *peerinfo = NULL; - dict_t *dict = NULL; - char *op_errstr = NULL; - int i = 0; - uint32_t pending_count = 0; - - this = THIS; - GF_ASSERT (this); - priv = this->private; - GF_ASSERT (priv); - - for ( i = GD_OP_NONE; i < GD_OP_MAX; i++) { - if (opinfo.pending_op[i]) - break; - } - - if (GD_OP_MAX == i) { - //No pending ops, inject stage_acc - ret = glusterd_op_sm_inject_event - (GD_OP_EVENT_STAGE_ACC, NULL); + case GD_OP_START_VOLUME: + ret = glusterd_op_stage_start_volume(dict, op_errstr, rsp_dict); + break; - return ret; - } + case GD_OP_STOP_VOLUME: + ret = glusterd_op_stage_stop_volume(dict, op_errstr); + break; - glusterd_op_clear_pending_op (i); + case GD_OP_DELETE_VOLUME: + ret = glusterd_op_stage_delete_volume(dict, op_errstr); + break; - ret = glusterd_op_build_payload (i, &dict); - if (ret) - goto out; + case GD_OP_ADD_BRICK: + ret = glusterd_op_stage_add_brick(dict, op_errstr, rsp_dict); + break; - /* rsp_dict NULL from source */ - ret = glusterd_op_stage_validate (i, dict, &op_errstr, NULL); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Staging failed"); - opinfo.op_errstr = op_errstr; - goto out; - } + case GD_OP_REPLACE_BRICK: + ret = glusterd_op_stage_replace_brick(dict, op_errstr, rsp_dict); + break; - list_for_each_entry (peerinfo, &priv->peers, uuid_list) { - GF_ASSERT (peerinfo); + case GD_OP_SET_VOLUME: + ret = glusterd_op_stage_set_volume(dict, op_errstr); + break; - if (!peerinfo->connected) - continue; - if ((peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) && - (glusterd_op_get_op() != GD_OP_SYNC_VOLUME)) - continue; + case GD_OP_GANESHA: + ret = glusterd_op_stage_set_ganesha(dict, op_errstr); + break; - proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_STAGE_OP]; - GF_ASSERT (proc); - if (proc->fn) { - ret = dict_set_static_ptr (dict, "peerinfo", peerinfo); - if (ret) { - gf_log ("", GF_LOG_ERROR, "failed to set peerinfo"); - goto out; - } + case GD_OP_RESET_VOLUME: + ret = glusterd_op_stage_reset_volume(dict, op_errstr); + break; + case GD_OP_REMOVE_BRICK: + ret = glusterd_op_stage_remove_brick(dict, op_errstr); + break; - ret = proc->fn (NULL, this, dict); - if (ret) - continue; - pending_count++; - } - } + case GD_OP_LOG_ROTATE: + ret = glusterd_op_stage_log_rotate(dict, op_errstr); + break; - opinfo.pending_count = pending_count; -out: - if (dict) - dict_unref (dict); - if (ret) { - glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, NULL); - opinfo.op_ret = ret; - } + case GD_OP_SYNC_VOLUME: + ret = glusterd_op_stage_sync_volume(dict, op_errstr); + break; - gf_log ("glusterd", GF_LOG_INFO, "Sent op req to %d peers", - opinfo.pending_count); + case GD_OP_GSYNC_CREATE: + ret = glusterd_op_stage_gsync_create(dict, op_errstr); + break; - if (!opinfo.pending_count) - ret = glusterd_op_sm_inject_all_acc (); + case GD_OP_GSYNC_SET: + ret = glusterd_op_stage_gsync_set(dict, op_errstr); + break; - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + case GD_OP_PROFILE_VOLUME: + ret = glusterd_op_stage_stats_volume(dict, op_errstr); + break; + + case GD_OP_QUOTA: + ret = glusterd_op_stage_quota(dict, op_errstr, rsp_dict); + break; + + case GD_OP_STATUS_VOLUME: + ret = glusterd_op_stage_status_volume(dict, op_errstr); + break; + + case GD_OP_REBALANCE: + case GD_OP_DEFRAG_BRICK_VOLUME: + ret = glusterd_op_stage_rebalance(dict, op_errstr); + break; + + case GD_OP_HEAL_VOLUME: + ret = glusterd_op_stage_heal_volume(dict, op_errstr); + break; + + case GD_OP_STATEDUMP_VOLUME: + ret = glusterd_op_stage_statedump_volume(dict, op_errstr); + break; + case GD_OP_CLEARLOCKS_VOLUME: + ret = glusterd_op_stage_clearlocks_volume(dict, op_errstr); + break; + + case GD_OP_COPY_FILE: + ret = glusterd_op_stage_copy_file(dict, op_errstr); + break; + + case GD_OP_SYS_EXEC: + ret = glusterd_op_stage_sys_exec(dict, op_errstr); + break; + + case GD_OP_BARRIER: + ret = glusterd_op_stage_barrier(dict, op_errstr); + break; + + case GD_OP_BITROT: + case GD_OP_SCRUB_STATUS: + case GD_OP_SCRUB_ONDEMAND: + ret = glusterd_op_stage_bitrot(dict, op_errstr, rsp_dict); + break; - return ret; + default: + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY, + "Unknown op %s", gd_op_list[op]); + } + gf_msg_debug(this->name, 0, "OP = %d. Returning %d", op, ret); + return ret; } -static int32_t -glusterd_op_start_rb_timer (dict_t *dict) +static void +glusterd_wait_for_blockers(glusterd_conf_t *priv) { - int32_t op = 0; - struct timeval timeout = {0, }; - glusterd_conf_t *priv = NULL; - int32_t ret = -1; - - GF_ASSERT (dict); - priv = THIS->private; - - ret = dict_get_int32 (dict, "operation", &op); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "dict_get on operation failed"); - goto out; - } - - if (op == GF_REPLACE_OP_START || - op == GF_REPLACE_OP_ABORT) - timeout.tv_sec = 5; - else - timeout.tv_sec = 1; - - timeout.tv_usec = 0; - - - priv->timer = gf_timer_call_after (THIS->ctx, timeout, - glusterd_do_replace_brick, - (void *) dict); - - ret = 0; - -out: - return ret; + while (GF_ATOMIC_GET(priv->blockers)) { + synccond_wait(&priv->cond_blockers, &priv->big_lock); + } } -static int -glusterd_op_ac_send_commit_op (glusterd_op_sm_event_t *event, void *ctx) +int32_t +glusterd_op_commit_perform(glusterd_op_t op, dict_t *dict, char **op_errstr, + dict_t *rsp_dict) { - int ret = 0; - rpc_clnt_procedure_t *proc = NULL; - glusterd_conf_t *priv = NULL; - xlator_t *this = NULL; - dict_t *dict = NULL; - dict_t *op_dict = NULL; - glusterd_peerinfo_t *peerinfo = NULL; - char *op_errstr = NULL; - int i = 0; - uint32_t pending_count = 0; - - this = THIS; - GF_ASSERT (this); - priv = this->private; - GF_ASSERT (priv); + int ret = -1; + xlator_t *this = THIS; - for ( i = GD_OP_NONE; i < GD_OP_MAX; i++) { - if (opinfo.commit_op[i]) - break; - } + glusterd_op_commit_hook(op, dict, GD_COMMIT_HOOK_PRE); + switch (op) { + case GD_OP_CREATE_VOLUME: + ret = glusterd_op_create_volume(dict, op_errstr); + break; - if (GD_OP_MAX == i) { - //No pending ops, return - return 0; - } + case GD_OP_START_VOLUME: + ret = glusterd_op_start_volume(dict, op_errstr); + break; - glusterd_op_clear_commit_op (i); + case GD_OP_STOP_VOLUME: + ret = glusterd_op_stop_volume(dict); + break; + + case GD_OP_DELETE_VOLUME: + glusterd_wait_for_blockers(this->private); + ret = glusterd_op_delete_volume(dict); + break; + + case GD_OP_ADD_BRICK: + glusterd_wait_for_blockers(this->private); + ret = glusterd_op_add_brick(dict, op_errstr); + break; + + case GD_OP_REPLACE_BRICK: + glusterd_wait_for_blockers(this->private); + ret = glusterd_op_replace_brick(dict, rsp_dict); + break; + + case GD_OP_SET_VOLUME: + ret = glusterd_op_set_volume(dict, op_errstr); + break; + case GD_OP_GANESHA: + ret = glusterd_op_set_ganesha(dict, op_errstr); + break; + case GD_OP_RESET_VOLUME: + ret = glusterd_op_reset_volume(dict, op_errstr); + break; - ret = glusterd_op_build_payload (i, &dict); + case GD_OP_REMOVE_BRICK: + glusterd_wait_for_blockers(this->private); + ret = glusterd_op_remove_brick(dict, op_errstr); + break; - if (ret) - goto out; + case GD_OP_LOG_ROTATE: + ret = glusterd_op_log_rotate(dict); + break; - ret = glusterd_op_commit_perform (i, dict, &op_errstr, NULL); //rsp_dict invalid for source - if (ret) { - gf_log ("", GF_LOG_ERROR, "Commit failed"); - opinfo.op_errstr = op_errstr; - goto out; - } + case GD_OP_SYNC_VOLUME: + ret = glusterd_op_sync_volume(dict, op_errstr, rsp_dict); + break; - list_for_each_entry (peerinfo, &priv->peers, uuid_list) { - GF_ASSERT (peerinfo); + case GD_OP_GSYNC_CREATE: + ret = glusterd_op_gsync_create(dict, op_errstr, rsp_dict); + break; - if (!peerinfo->connected) - continue; - if ((peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) && - (glusterd_op_get_op() != GD_OP_SYNC_VOLUME)) - continue; + case GD_OP_GSYNC_SET: + ret = glusterd_op_gsync_set(dict, op_errstr, rsp_dict); + break; - proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_COMMIT_OP]; - GF_ASSERT (proc); - if (proc->fn) { - ret = dict_set_static_ptr (dict, "peerinfo", peerinfo); - if (ret) { - gf_log ("", GF_LOG_ERROR, "failed to set peerinfo"); - goto out; - } - ret = proc->fn (NULL, this, dict); - if (ret) - continue; - pending_count++; - } - } + case GD_OP_PROFILE_VOLUME: + ret = glusterd_op_stats_volume(dict, op_errstr, rsp_dict); + break; - opinfo.pending_count = pending_count; - gf_log ("glusterd", GF_LOG_INFO, "Sent op req to %d peers", - opinfo.pending_count); -out: - if (dict) - dict_unref (dict); - if (ret) { - glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, NULL); - opinfo.op_ret = ret; - } + case GD_OP_QUOTA: + ret = glusterd_op_quota(dict, op_errstr, rsp_dict); + break; - if (!opinfo.pending_count) { - op_dict = glusterd_op_get_ctx (GD_OP_REPLACE_BRICK); - if (!op_dict) { - ret = glusterd_op_sm_inject_all_acc (); - goto err; - } + case GD_OP_STATUS_VOLUME: + ret = glusterd_op_status_volume(dict, op_errstr, rsp_dict); + break; - op_dict = dict_ref (op_dict); - ret = glusterd_op_start_rb_timer (op_dict); - } + case GD_OP_REBALANCE: + case GD_OP_DEFRAG_BRICK_VOLUME: + ret = glusterd_op_rebalance(dict, op_errstr, rsp_dict); + break; -err: - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + case GD_OP_HEAL_VOLUME: + ret = glusterd_op_heal_volume(dict, op_errstr); + break; - return ret; + case GD_OP_STATEDUMP_VOLUME: + ret = glusterd_op_statedump_volume(dict, op_errstr); + break; -} + case GD_OP_CLEARLOCKS_VOLUME: + ret = glusterd_op_clearlocks_volume(dict, op_errstr, rsp_dict); + break; -static int -glusterd_op_ac_rcvd_stage_op_acc (glusterd_op_sm_event_t *event, void *ctx) -{ - int ret = 0; + case GD_OP_COPY_FILE: + ret = glusterd_op_copy_file(dict, op_errstr); + break; - GF_ASSERT (event); + case GD_OP_SYS_EXEC: + ret = glusterd_op_sys_exec(dict, op_errstr, rsp_dict); + break; - if (opinfo.pending_count > 0) - opinfo.pending_count--; + case GD_OP_BARRIER: + ret = glusterd_op_barrier(dict, op_errstr); + break; - if (opinfo.pending_count > 0) - goto out; + case GD_OP_BITROT: + case GD_OP_SCRUB_STATUS: + case GD_OP_SCRUB_ONDEMAND: + ret = glusterd_op_bitrot(dict, op_errstr, rsp_dict); + break; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_STAGE_ACC, NULL); + default: + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY, + "Unknown op %s", gd_op_list[op]); + break; + } -out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + if (ret == 0) + glusterd_op_commit_hook(op, dict, GD_COMMIT_HOOK_POST); - return ret; + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; } static int -glusterd_op_ac_stage_op_failed (glusterd_op_sm_event_t *event, void *ctx) -{ - int ret = 0; - - GF_ASSERT (event); - - if (opinfo.pending_count > 0) - opinfo.pending_count--; - - if (opinfo.pending_count > 0) +glusterd_bricks_select_stop_volume(dict_t *dict, char **op_errstr, + struct cds_list_head *selected) +{ + int ret = 0; + int flags = 0; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_pending_node_t *pending_node = NULL; + + ret = glusterd_op_stop_volume_args_get(dict, &volname, &flags); + if (ret) + goto out; + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, + FMTSTR_CHECK_VOL_EXISTS, volname); + gf_asprintf(op_errstr, FMTSTR_CHECK_VOL_EXISTS, volname); + goto out; + } + + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + if (glusterd_is_brick_started(brickinfo)) { + pending_node = GF_CALLOC(1, sizeof(*pending_node), + gf_gld_mt_pending_node_t); + if (!pending_node) { + ret = -1; goto out; - - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, NULL); + } else { + pending_node->node = brickinfo; + pending_node->type = GD_NODE_BRICK; + cds_list_add_tail(&pending_node->list, selected); + pending_node = NULL; + } + /* + * This is not really the right place to do it, but + * it's the most convenient. + * TBD: move this to *after* the RPC + */ + brickinfo->status = GF_BRICK_STOPPED; + } + } out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - - return ret; + return ret; } static int -glusterd_op_ac_commit_op_failed (glusterd_op_sm_event_t *event, void *ctx) -{ - int ret = 0; - - GF_ASSERT (event); - - if (opinfo.pending_count > 0) - opinfo.pending_count--; - - if (opinfo.pending_count > 0) - goto out; - - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, NULL); - -out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); +glusterd_bricks_select_remove_brick(dict_t *dict, char **op_errstr, + struct cds_list_head *selected) +{ + int ret = -1; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + char *brick = NULL; + int32_t count = 0; + int32_t i = 1; + char key[64] = { + 0, + }; + int keylen; + glusterd_pending_node_t *pending_node = NULL; + int32_t command = 0; + int32_t force = 0; + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get volume name"); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, + "Unable to allocate memory"); + goto out; + } + + ret = dict_get_int32n(dict, "count", SLEN("count"), &count); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, -ret, GD_MSG_DICT_GET_FAILED, + "Unable to get count"); + goto out; + } + + ret = dict_get_int32n(dict, "command", SLEN("command"), &command); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, -ret, GD_MSG_DICT_GET_FAILED, + "Unable to get command"); + goto out; + } + + ret = dict_get_int32n(dict, "force", SLEN("force"), &force); + if (ret) { + gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_DICT_GET_FAILED, + "force flag is not set"); + ret = 0; + goto out; + } - return ret; -} + while (i <= count) { + keylen = snprintf(key, sizeof(key), "brick%d", i); -static int -glusterd_op_ac_brick_op_failed (glusterd_op_sm_event_t *event, void *ctx) -{ - int ret = 0; - glusterd_op_brick_rsp_ctx_t *ev_ctx = NULL; - glusterd_brickinfo_t *brickinfo = NULL; - gf_boolean_t free_errstr = _gf_false; - - GF_ASSERT (event); - GF_ASSERT (ctx); - ev_ctx = ctx; - brickinfo = ev_ctx->brickinfo; - GF_ASSERT (brickinfo); - - ret = glusterd_remove_pending_entry (&opinfo.pending_bricks, brickinfo); + ret = dict_get_strn(dict, key, keylen, &brick); if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, "unknown response received " - "from %s:%s", brickinfo->hostname, brickinfo->path); - ret = -1; - free_errstr = _gf_true; - goto out; + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get brick"); + goto out; } - if (opinfo.brick_pending_count > 0) - opinfo.brick_pending_count--; - if (opinfo.op_ret == 0) - opinfo.op_ret = ev_ctx->op_ret; - if (opinfo.op_errstr == NULL) - opinfo.op_errstr = ev_ctx->op_errstr; - else - free_errstr = _gf_true; + ret = glusterd_volume_brickinfo_get_by_brick(brick, volinfo, &brickinfo, + _gf_false); - if (opinfo.brick_pending_count > 0) - goto out; + if (ret) + goto out; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, ev_ctx->commit_ctx); + if (glusterd_is_brick_started(brickinfo)) { + pending_node = GF_CALLOC(1, sizeof(*pending_node), + gf_gld_mt_pending_node_t); + if (!pending_node) { + ret = -1; + goto out; + } else { + pending_node->node = brickinfo; + pending_node->type = GD_NODE_BRICK; + cds_list_add_tail(&pending_node->list, selected); + pending_node = NULL; + } + /* + * This is not really the right place to do it, but + * it's the most convenient. + * TBD: move this to *after* the RPC + */ + brickinfo->status = GF_BRICK_STOPPED; + } + i++; + } out: - if (ev_ctx->rsp_dict) - dict_unref (ev_ctx->rsp_dict); - if (free_errstr && ev_ctx->op_errstr) - GF_FREE (ev_ctx->op_errstr); - GF_FREE (ctx); - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - - return ret; + return ret; } -void -glusterd_op_brick_disconnect (void *data) -{ - glusterd_brickinfo_t *brickinfo = NULL; - glusterd_op_brick_rsp_ctx_t *ev_ctx = NULL; - - ev_ctx = data; - GF_ASSERT (ev_ctx); - brickinfo = ev_ctx->brickinfo; - GF_ASSERT (brickinfo); - - if (brickinfo->timer) { - gf_timer_call_cancel (THIS->ctx, brickinfo->timer); - brickinfo->timer = NULL; - gf_log ("", GF_LOG_DEBUG, - "Cancelled timer thread"); - } - - glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_ACC, ev_ctx); - glusterd_op_sm (); -} - -void -glusterd_do_replace_brick (void *data) -{ - glusterd_volinfo_t *volinfo = NULL; - int32_t op = 0; - int32_t src_port = 0; - int32_t dst_port = 0; - dict_t *dict = NULL; - char *src_brick = NULL; - char *dst_brick = NULL; - char *volname = NULL; - glusterd_brickinfo_t *src_brickinfo = NULL; - glusterd_brickinfo_t *dst_brickinfo = NULL; - glusterd_conf_t *priv = NULL; - - int ret = 0; - - dict = data; - - GF_ASSERT (THIS); - - priv = THIS->private; - - if (priv->timer) { - gf_timer_call_cancel (THIS->ctx, priv->timer); - priv->timer = NULL; - gf_log ("", GF_LOG_DEBUG, - "Cancelled timer thread"); - } - - gf_log ("", GF_LOG_DEBUG, - "Replace brick operation detected"); - - ret = dict_get_int32 (dict, "operation", &op); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "dict_get on operation failed"); - goto out; - } - ret = dict_get_str (dict, "src-brick", &src_brick); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get src brick"); - goto out; - } - - gf_log ("", GF_LOG_DEBUG, - "src brick=%s", src_brick); - - ret = dict_get_str (dict, "dst-brick", &dst_brick); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get dst brick"); - goto out; - } - - gf_log ("", GF_LOG_DEBUG, - "dst brick=%s", dst_brick); - - ret = dict_get_str (dict, "volname", &volname); - - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); - goto out; - } - - ret = glusterd_volinfo_find (volname, &volinfo); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to allocate memory"); - goto out; - } - - ret = glusterd_volume_brickinfo_get_by_brick (src_brick, volinfo, &src_brickinfo); - if (ret) { - gf_log ("", GF_LOG_DEBUG, "Unable to get src-brickinfo"); - goto out; - } +static int +glusterd_bricks_select_profile_volume(dict_t *dict, char **op_errstr, + struct cds_list_head *selected) +{ + int ret = -1; + char *volname = NULL; + char msg[2048] = { + 0, + }; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + int32_t stats_op = GF_CLI_STATS_NONE; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_pending_node_t *pending_node = NULL; + char *brick = NULL; + int32_t pid = -1; + char pidfile[PATH_MAX] = {0}; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "volume name get failed"); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + snprintf(msg, sizeof(msg), "Volume %s does not exists", volname); + + *op_errstr = gf_strdup(msg); + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, "%s", msg); + goto out; + } + + ret = dict_get_int32n(dict, "op", SLEN("op"), &stats_op); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "volume profile op get failed"); + goto out; + } + + switch (stats_op) { + case GF_CLI_STATS_START: + case GF_CLI_STATS_STOP: + goto out; + break; + case GF_CLI_STATS_INFO: +#ifdef BUILD_GNFS + ret = dict_get_str_boolean(dict, "nfs", _gf_false); + if (ret) { + if (!priv->nfs_svc.online) { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_NFS_SERVER_NOT_RUNNING, + "NFS server" + " is not running"); + goto out; + } + pending_node = GF_CALLOC(1, sizeof(*pending_node), + gf_gld_mt_pending_node_t); + if (!pending_node) { + ret = -1; + goto out; + } + pending_node->node = &(priv->nfs_svc); + pending_node->type = GD_NODE_NFS; + cds_list_add_tail(&pending_node->list, selected); + pending_node = NULL; - ret = glusterd_get_rb_dst_brickinfo (volinfo, &dst_brickinfo); - if (!dst_brickinfo) { - gf_log ("", GF_LOG_DEBUG, "Unable to get dst-brickinfo"); + ret = 0; goto out; - } + } +#endif + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + if (glusterd_is_brick_started(brickinfo)) { + /* + * In normal use, glusterd_is_brick_started + * will give us the answer we need. However, + * in our tests the brick gets detached behind + * our back, so we need to double-check this + * way. + */ + GLUSTERD_GET_BRICK_PIDFILE(pidfile, volinfo, brickinfo, + priv); + if (!gf_is_service_running(pidfile, &pid)) { + continue; + } + pending_node = GF_CALLOC(1, sizeof(*pending_node), + gf_gld_mt_pending_node_t); + if (!pending_node) { + ret = -1; + goto out; + } else { + pending_node->node = brickinfo; + pending_node->type = GD_NODE_BRICK; + cds_list_add_tail(&pending_node->list, selected); + pending_node = NULL; + } + } + } + break; - ret = glusterd_resolve_brick (dst_brickinfo); - if (ret) { - gf_log ("", GF_LOG_DEBUG, "Unable to resolve dst-brickinfo"); - goto out; - } + case GF_CLI_STATS_TOP: +#ifdef BUILD_GNFS + ret = dict_get_str_boolean(dict, "nfs", _gf_false); + if (ret) { + if (!priv->nfs_svc.online) { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_NFS_SERVER_NOT_RUNNING, + "NFS server" + " is not running"); + goto out; + } + pending_node = GF_CALLOC(1, sizeof(*pending_node), + gf_gld_mt_pending_node_t); + if (!pending_node) { + ret = -1; + goto out; + } + pending_node->node = &(priv->nfs_svc); + pending_node->type = GD_NODE_NFS; + cds_list_add_tail(&pending_node->list, selected); + pending_node = NULL; - ret = dict_get_int32 (dict, "src-brick-port", &src_port); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get src-brick port"); + ret = 0; goto out; - } - - ret = dict_get_int32 (dict, "dst-brick-port", &dst_port); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get dst-brick port"); - } + } +#endif + ret = dict_get_strn(dict, "brick", SLEN("brick"), &brick); + if (!ret) { + ret = glusterd_volume_brickinfo_get_by_brick( + brick, volinfo, &brickinfo, _gf_true); + if (ret) + goto out; - dst_brickinfo->port = dst_port; - src_brickinfo->port = src_port; + if (!glusterd_is_brick_started(brickinfo)) + goto out; - switch (op) { - case GF_REPLACE_OP_START: - if (!dst_port) { + pending_node = GF_CALLOC(1, sizeof(*pending_node), + gf_gld_mt_pending_node_t); + if (!pending_node) { + ret = -1; + goto out; + } else { + pending_node->node = brickinfo; + pending_node->type = GD_NODE_BRICK; + cds_list_add_tail(&pending_node->list, selected); + pending_node = NULL; + goto out; + } + } + ret = 0; + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + if (glusterd_is_brick_started(brickinfo)) { + pending_node = GF_CALLOC(1, sizeof(*pending_node), + gf_gld_mt_pending_node_t); + if (!pending_node) { ret = -1; goto out; + } else { + pending_node->node = brickinfo; + pending_node->type = GD_NODE_BRICK; + cds_list_add_tail(&pending_node->list, selected); + pending_node = NULL; + } } + } + break; - ret = rb_do_operation_start (volinfo, src_brickinfo, dst_brickinfo); - if (ret) { - glusterd_set_rb_status (volinfo, GF_RB_STATUS_NONE); - goto out; - } - break; - case GF_REPLACE_OP_PAUSE: - case GF_REPLACE_OP_ABORT: - case GF_REPLACE_OP_COMMIT: - case GF_REPLACE_OP_COMMIT_FORCE: - case GF_REPLACE_OP_STATUS: - break; default: - ret = -1; - goto out; - } + GF_ASSERT(0); + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY, + "Invalid profile op: %d", stats_op); + ret = -1; + goto out; + break; + } out: - if (ret) - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, NULL); - else - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_COMMIT_ACC, NULL); + gf_msg_debug("glusterd", 0, "Returning %d", ret); -// if (dict) -// dict_unref (dict); - - glusterd_op_sm (); + return ret; } - - -static int -glusterd_op_ac_rcvd_commit_op_acc (glusterd_op_sm_event_t *event, void *ctx) +int +_get_hxl_children_count(glusterd_volinfo_t *volinfo) { - glusterd_conf_t *priv = NULL; - dict_t *dict = NULL; - int ret = 0; - gf_boolean_t commit_ack_inject = _gf_false; - - priv = THIS->private; - GF_ASSERT (event); - - if (opinfo.pending_count > 0) - opinfo.pending_count--; - - if (opinfo.pending_count > 0) - goto out; - - dict = glusterd_op_get_ctx (GD_OP_REPLACE_BRICK); - if (dict) { - ret = glusterd_op_start_rb_timer (dict); - if (ret) - goto out; - commit_ack_inject = _gf_false; - goto out; - } - - commit_ack_inject = _gf_true; -out: - if (commit_ack_inject) { - if (ret) - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, NULL); - else - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_COMMIT_ACC, NULL); - } - - return ret; + if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE) { + return volinfo->disperse_count; + } else { + return volinfo->replica_count; + } } static int -glusterd_op_ac_rcvd_unlock_acc (glusterd_op_sm_event_t *event, void *ctx) -{ - int ret = 0; - - GF_ASSERT (event); - - if (opinfo.pending_count > 0) - opinfo.pending_count--; - - if (opinfo.pending_count > 0) - goto out; - - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACC, NULL); - - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - +_add_hxlator_to_dict(dict_t *dict, glusterd_volinfo_t *volinfo, int index, + int count) +{ + int ret = -1; + char key[64] = { + 0, + }; + int keylen; + char *xname = NULL; + char *xl_type = 0; + + if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE) { + xl_type = "disperse"; + } else { + xl_type = "replicate"; + } + keylen = snprintf(key, sizeof(key), "xl-%d", count); + ret = gf_asprintf(&xname, "%s-%s-%d", volinfo->volname, xl_type, index); + if (ret == -1) + goto out; + + ret = dict_set_dynstrn(dict, key, keylen, xname); + if (ret) + goto out; + + ret = dict_set_int32(dict, xname, index); out: - return ret; -} - - -int32_t -glusterd_op_clear_errstr() { - opinfo.op_errstr = NULL; - return 0; + return ret; } -int32_t -glusterd_op_set_ctx (glusterd_op_t op, void *ctx) -{ - - GF_ASSERT (op < GD_OP_MAX); - GF_ASSERT (op > GD_OP_NONE); - - opinfo.op_ctx[op] = ctx; - - return 0; - -} - -int32_t -glusterd_op_reset_ctx (glusterd_op_t op) +int +get_replica_index_for_per_replica_cmd(glusterd_volinfo_t *volinfo, dict_t *dict) { + int ret = 0; + char *hostname = NULL; + char *path = NULL; + int index = 0; + glusterd_brickinfo_t *brickinfo = NULL; + int cmd_replica_index = -1; + int replica_count = -1; - GF_ASSERT (op < GD_OP_MAX); - GF_ASSERT (op > GD_OP_NONE); + if (!dict) { + ret = -1; + goto out; + } + + ret = dict_get_strn(dict, "per-replica-cmd-hostname", + SLEN("per-replica-cmd-hostname"), &hostname); + if (ret) + goto out; + ret = dict_get_strn(dict, "per-replica-cmd-path", + SLEN("per-replica-cmd-path"), &path); + if (ret) + goto out; + + replica_count = volinfo->replica_count; + + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + if (gf_uuid_is_null(brickinfo->uuid)) + (void)glusterd_resolve_brick(brickinfo); + if (!strcmp(brickinfo->path, path) && + !strcmp(brickinfo->hostname, hostname)) { + cmd_replica_index = index / (replica_count); + goto out; + } + index++; + } - glusterd_op_set_ctx (op, NULL); +out: + if (ret) + cmd_replica_index = -1; - return 0; + return cmd_replica_index; } -int32_t -glusterd_op_txn_complete () +int +_select_hxlator_with_matching_brick(xlator_t *this, glusterd_volinfo_t *volinfo, + dict_t *dict, int *index) { - int32_t ret = -1; - glusterd_conf_t *priv = NULL; - int32_t op = -1; - int32_t op_ret = 0; - int32_t op_errno = 0; - int32_t cli_op = 0; - rpcsvc_request_t *req = NULL; - void *ctx = NULL; - gf_boolean_t ctx_free = _gf_false; - char *op_errstr = NULL; + char *path = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + int hxl_children = 0; + if (!dict || dict_get_strn(dict, "per-replica-cmd-path", + SLEN("per-replica-cmd-path"), &path)) + return -1; - priv = THIS->private; - GF_ASSERT (priv); + hxl_children = _get_hxl_children_count(volinfo); + if ((*index) == 0) + (*index)++; - ret = glusterd_unlock (priv->uuid); + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + if (gf_uuid_is_null(brickinfo->uuid)) + (void)glusterd_resolve_brick(brickinfo); - if (ret) { - gf_log ("glusterd", GF_LOG_CRITICAL, - "Unable to clear local lock, ret: %d", ret); - goto out; + if ((!gf_uuid_compare(MY_UUID, brickinfo->uuid)) && + (!strncmp(brickinfo->path, path, strlen(path)))) { + _add_hxlator_to_dict(dict, volinfo, ((*index) - 1) / hxl_children, + 0); + return 1; } + (*index)++; + } - gf_log ("glusterd", GF_LOG_INFO, "Cleared local lock"); - - op_ret = opinfo.op_ret; - op_errno = opinfo.op_errno; - cli_op = opinfo.cli_op; - req = opinfo.req; - if (opinfo.op_errstr) - op_errstr = opinfo.op_errstr; - + return 0; +} +void +_select_hxlators_with_local_bricks(xlator_t *this, glusterd_volinfo_t *volinfo, + dict_t *dict, int *index, int *hxlator_count) +{ + glusterd_brickinfo_t *brickinfo = NULL; + int hxl_children = 0; + gf_boolean_t add = _gf_false; - opinfo.op_ret = 0; - opinfo.op_errno = 0; + hxl_children = _get_hxl_children_count(volinfo); - op = glusterd_op_get_op (); + if ((*index) == 0) + (*index)++; - if (op != -1) { - glusterd_op_clear_pending_op (op); - glusterd_op_clear_commit_op (op); - glusterd_op_clear_op (op); - ctx = glusterd_op_get_ctx (op); - ctx_free = glusterd_op_get_ctx_free (op); - glusterd_op_reset_ctx (op); - glusterd_op_clear_ctx_free (op); - glusterd_op_clear_errstr (); - } + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + if (gf_uuid_is_null(brickinfo->uuid)) + (void)glusterd_resolve_brick(brickinfo); -out: - pthread_mutex_unlock (&opinfo.lock); - ret = glusterd_op_send_cli_response (cli_op, op_ret, - op_errno, req, ctx, op_errstr); + if (!gf_uuid_compare(MY_UUID, brickinfo->uuid)) + add = _gf_true; - if (ret) { - gf_log ("", GF_LOG_ERROR, "Responding to cli failed, ret: %d", - ret); - //Ignore this error, else state machine blocks - ret = 0; + if ((*index) % hxl_children == 0) { + if (add) { + _add_hxlator_to_dict(dict, volinfo, + ((*index) - 1) / hxl_children, + (*hxlator_count)); + (*hxlator_count)++; + } + add = _gf_false; } - if (ctx_free && ctx && (op != -1)) - glusterd_op_free_ctx (op, ctx, ctx_free); - if (op_errstr && (strcmp (op_errstr, ""))) - GF_FREE (op_errstr); - - gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + (*index)++; + } } -static int -glusterd_op_ac_unlocked_all (glusterd_op_sm_event_t *event, void *ctx) -{ - int ret = 0; - - GF_ASSERT (event); - - ret = glusterd_op_txn_complete (); - - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - - return ret; +int +_select_hxlators_for_full_self_heal(xlator_t *this, glusterd_volinfo_t *volinfo, + dict_t *dict, int *index, + int *hxlator_count) +{ + glusterd_brickinfo_t *brickinfo = NULL; + int hxl_children = 0; + uuid_t candidate = {0}; + int brick_index = 0; + glusterd_peerinfo_t *peerinfo = NULL; + int delta = 0; + uuid_t candidate_max = {0}; + + if ((*index) == 0) + (*index)++; + if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE) { + hxl_children = volinfo->disperse_count; + } else { + hxl_children = volinfo->replica_count; + } + + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + if (gf_uuid_compare(brickinfo->uuid, candidate_max) > 0) { + if (!gf_uuid_compare(MY_UUID, brickinfo->uuid)) { + gf_uuid_copy(candidate_max, brickinfo->uuid); + } else { + peerinfo = glusterd_peerinfo_find(brickinfo->uuid, NULL); + if (peerinfo && peerinfo->connected) { + gf_uuid_copy(candidate_max, brickinfo->uuid); + } + } + } + } + + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + if (gf_uuid_is_null(brickinfo->uuid)) + (void)glusterd_resolve_brick(brickinfo); + + delta %= hxl_children; + if ((*index + delta) == (brick_index + hxl_children)) { + if (!gf_uuid_compare(MY_UUID, brickinfo->uuid)) { + gf_uuid_copy(candidate, brickinfo->uuid); + } else { + peerinfo = glusterd_peerinfo_find(brickinfo->uuid, NULL); + if (peerinfo && peerinfo->connected) { + gf_uuid_copy(candidate, brickinfo->uuid); + } else if (peerinfo && + (!gf_uuid_compare(candidate_max, MY_UUID))) { + _add_hxlator_to_dict(dict, volinfo, + ((*index) - 1) / hxl_children, + (*hxlator_count)); + (*hxlator_count)++; + } + } + + if (!gf_uuid_compare(MY_UUID, candidate)) { + _add_hxlator_to_dict(dict, volinfo, + ((*index) - 1) / hxl_children, + (*hxlator_count)); + (*hxlator_count)++; + } + gf_uuid_clear(candidate); + brick_index += hxl_children; + delta++; + } + + (*index)++; + } + return *hxlator_count; } static int -glusterd_op_stage_quota (dict_t *dict, char **op_errstr) -{ - int ret = 0; - char *volname = NULL; - gf_boolean_t exists = _gf_false; - - GF_ASSERT (dict); - GF_ASSERT (op_errstr); - - ret = dict_get_str (dict, "volname", &volname); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); - goto out; - } - - exists = glusterd_check_volume_exists (volname); - if (!exists) { - gf_log ("", GF_LOG_ERROR, "Volume with name: %s " - "does not exist", - volname); - *op_errstr = gf_strdup ("Invalid volume name"); - ret = -1; - goto out; - } +glusterd_bricks_select_snap(dict_t *dict, char **op_errstr, + struct cds_list_head *selected) +{ + int ret = -1; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + glusterd_pending_node_t *pending_node = NULL; + glusterd_volinfo_t *volinfo = NULL; + char *volname = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + int brick_index = -1; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get" + " volname"); + goto out; + } + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) + goto out; + + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + brick_index++; + if (gf_uuid_compare(brickinfo->uuid, MY_UUID) || + !glusterd_is_brick_started(brickinfo)) { + continue; + } + pending_node = GF_CALLOC(1, sizeof(*pending_node), + gf_gld_mt_pending_node_t); + if (!pending_node) { + ret = -1; + goto out; + } + pending_node->node = brickinfo; + pending_node->type = GD_NODE_BRICK; + pending_node->index = brick_index; + cds_list_add_tail(&pending_node->list, selected); + pending_node = NULL; + } + + ret = 0; out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - - return ret; + gf_msg_debug(THIS->name, 0, "Returning ret %d", ret); + return ret; } static int -glusterd_op_ac_stage_op (glusterd_op_sm_event_t *event, void *ctx) -{ - int ret = -1; - glusterd_req_ctx_t *req_ctx = NULL; - int32_t status = 0; - dict_t *rsp_dict = NULL; - char *op_errstr = NULL; - dict_t *dict = NULL; - - GF_ASSERT (ctx); - - req_ctx = ctx; - - dict = req_ctx->dict; - - rsp_dict = dict_new (); - if (!rsp_dict) { - gf_log ("", GF_LOG_DEBUG, - "Out of memory"); - return -1; - } +fill_shd_status_for_local_bricks(dict_t *dict, glusterd_volinfo_t *volinfo, + cli_cmd_type type, int *index, + dict_t *req_dict) +{ + glusterd_brickinfo_t *brickinfo = NULL; + static char *msg = "self-heal-daemon is not running on"; + char key[32] = { + 0, + }; + int keylen; + char value[128] = { + 0, + }; + int ret = 0; + xlator_t *this = NULL; + int cmd_replica_index = -1; + + this = THIS; + + if (type == PER_HEAL_XL) { + cmd_replica_index = get_replica_index_for_per_replica_cmd(volinfo, + req_dict); + if (cmd_replica_index == -1) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_REPLICA_INDEX_GET_FAIL, + "Could not find the " + "replica index for per replica type command"); + ret = -1; + goto out; + } + } + + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + if (gf_uuid_is_null(brickinfo->uuid)) + (void)glusterd_resolve_brick(brickinfo); + + if (gf_uuid_compare(MY_UUID, brickinfo->uuid)) { + (*index)++; + continue; + } + + if (type == PER_HEAL_XL) { + if (cmd_replica_index != ((*index) / volinfo->replica_count)) { + (*index)++; + continue; + } + } + keylen = snprintf(key, sizeof(key), "%d-status", (*index)); + snprintf(value, sizeof(value), "%s %s", msg, uuid_utoa(MY_UUID)); + ret = dict_set_dynstrn(dict, key, keylen, gf_strdup(value)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to" + "set the dictionary for shd status msg"); + goto out; + } + keylen = snprintf(key, sizeof(key), "%d-shd-status", (*index)); + ret = dict_set_nstrn(dict, key, keylen, "off", SLEN("off")); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to" + " set dictionary for shd status msg"); + goto out; + } + + (*index)++; + } - status = glusterd_op_stage_validate (req_ctx->op, dict, &op_errstr, - rsp_dict); - - if (status) { - gf_log ("", GF_LOG_ERROR, "Validate failed: %d", status); - } - - ret = glusterd_op_stage_send_resp (req_ctx->req, req_ctx->op, - status, op_errstr, rsp_dict); - - if (op_errstr && (strcmp (op_errstr, ""))) - GF_FREE (op_errstr); - - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); - - if (rsp_dict) - dict_unref (rsp_dict); - - return ret; +out: + return ret; } +int +glusterd_shd_select_brick_xlator(dict_t *dict, gf_xl_afr_op_t heal_op, + glusterd_volinfo_t *volinfo, int *index, + int *hxlator_count, dict_t *rsp_dict) +{ + int ret = -1; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + glusterd_svc_t *svc = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + svc = &(volinfo->shd.svc); + + switch (heal_op) { + case GF_SHD_OP_INDEX_SUMMARY: + case GF_SHD_OP_STATISTICS_HEAL_COUNT: + if (!svc->online) { + if (!rsp_dict) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OPCTX_NULL, + "Received " + "empty ctx."); + goto out; + } + + ret = fill_shd_status_for_local_bricks( + rsp_dict, volinfo, ALL_HEAL_XL, index, dict); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_SHD_STATUS_SET_FAIL, + "Unable to " + "fill the shd status for the local " + "bricks"); + goto out; + } + break; + + case GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA: + if (!svc->online) { + if (!rsp_dict) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OPCTX_NULL, + "Received " + "empty ctx."); + goto out; + } + ret = fill_shd_status_for_local_bricks( + rsp_dict, volinfo, PER_HEAL_XL, index, dict); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_SHD_STATUS_SET_FAIL, + "Unable to " + "fill the shd status for the local" + " bricks."); + goto out; + } + break; -static int -glusterd_op_ac_commit_op (glusterd_op_sm_event_t *event, void *ctx) -{ - int ret = 0; - glusterd_req_ctx_t *req_ctx = NULL; - int32_t status = 0; - char *op_errstr = NULL; - dict_t *op_ctx = NULL; - dict_t *dict = NULL; - - GF_ASSERT (ctx); - - req_ctx = ctx; - - dict = req_ctx->dict; - - op_ctx = glusterd_op_get_ctx (req_ctx->op); - status = glusterd_op_commit_perform (req_ctx->op, dict, &op_errstr, - op_ctx); - - if (status) { - gf_log ("", GF_LOG_ERROR, "Commit failed: %d", status); - } - - ret = glusterd_op_commit_send_resp (req_ctx->req, req_ctx->op, - status, op_errstr, op_ctx); - - glusterd_op_fini_ctx (req_ctx->op); - if (op_errstr && (strcmp (op_errstr, ""))) - GF_FREE (op_errstr); - - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); - - return ret; + default: + break; + } + + switch (heal_op) { + case GF_SHD_OP_HEAL_FULL: + _select_hxlators_for_full_self_heal(this, volinfo, dict, index, + hxlator_count); + break; + case GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA: + (*hxlator_count) += _select_hxlator_with_matching_brick( + this, volinfo, dict, index); + break; + default: + _select_hxlators_with_local_bricks(this, volinfo, dict, index, + hxlator_count); + break; + } + ret = (*hxlator_count); +out: + return ret; } static int -glusterd_op_ac_send_commit_failed (glusterd_op_sm_event_t *event, void *ctx) -{ - int ret = 0; - glusterd_req_ctx_t *req_ctx = NULL; - dict_t *op_ctx = NULL; - - GF_ASSERT (ctx); - - req_ctx = ctx; - - op_ctx = glusterd_op_get_ctx (req_ctx->op); - - ret = glusterd_op_commit_send_resp (req_ctx->req, req_ctx->op, - opinfo.op_ret, opinfo.op_errstr, - op_ctx); - - glusterd_op_fini_ctx (req_ctx->op); - if (opinfo.op_errstr && (strcmp (opinfo.op_errstr, ""))) { - GF_FREE (opinfo.op_errstr); - opinfo.op_errstr = NULL; - } +glusterd_bricks_select_heal_volume(dict_t *dict, char **op_errstr, + struct cds_list_head *selected, + dict_t *rsp_dict) +{ + int ret = -1; + char *volname = NULL; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + char msg[2048] = { + 0, + }; + glusterd_pending_node_t *pending_node = NULL; + gf_xl_afr_op_t heal_op = GF_SHD_OP_INVALID; + int hxlator_count = 0; + int index = 0; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "volume name get failed"); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + snprintf(msg, sizeof(msg), "Volume %s does not exist", volname); + + *op_errstr = gf_strdup(msg); + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, "%s", msg); + goto out; + } + + ret = dict_get_int32n(dict, "heal-op", SLEN("heal-op"), + (int32_t *)&heal_op); + if (ret || (heal_op == GF_SHD_OP_INVALID)) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "heal op invalid"); + goto out; + } + ret = glusterd_shd_select_brick_xlator(dict, heal_op, volinfo, &index, + &hxlator_count, rsp_dict); + if (ret < 0) { + goto out; + } + + if (!hxlator_count) + goto out; + if (hxlator_count == -1) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_XLATOR_COUNT_GET_FAIL, + "Could not determine the" + "translator count"); + ret = -1; + goto out; + } + + ret = dict_set_int32n(dict, "count", SLEN("count"), hxlator_count); + if (ret) + goto out; + pending_node = GF_CALLOC(1, sizeof(*pending_node), + gf_gld_mt_pending_node_t); + if (!pending_node) { + ret = -1; + goto out; + } else { + pending_node->node = &(volinfo->shd.svc); + pending_node->type = GD_NODE_SHD; + cds_list_add_tail(&pending_node->list, selected); + pending_node = NULL; + } - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); - return ret; +out: + gf_msg_debug(THIS->name, 0, "Returning ret %d", ret); + return ret; } static int -glusterd_op_sm_transition_state (glusterd_op_info_t *opinfo, - glusterd_op_sm_t *state, - glusterd_op_sm_event_type_t event_type) -{ - glusterd_conf_t *conf = NULL; - - GF_ASSERT (state); - GF_ASSERT (opinfo); - - conf = THIS->private; - GF_ASSERT (conf); - - (void) glusterd_sm_tr_log_transition_add (&conf->op_sm_log, - opinfo->state.state, - state[event_type].next_state, - event_type); - - opinfo->state.state = state[event_type].next_state; - return 0; -} - -int32_t -glusterd_op_stage_validate (glusterd_op_t op, dict_t *dict, char **op_errstr, - dict_t *rsp_dict) -{ - int ret = -1; - - switch (op) { - case GD_OP_CREATE_VOLUME: - ret = glusterd_op_stage_create_volume (dict, op_errstr); - break; - - case GD_OP_START_VOLUME: - ret = glusterd_op_stage_start_volume (dict, op_errstr); - break; - - case GD_OP_STOP_VOLUME: - ret = glusterd_op_stage_stop_volume (dict, op_errstr); - break; - - case GD_OP_DELETE_VOLUME: - ret = glusterd_op_stage_delete_volume (dict, op_errstr); - break; - - case GD_OP_ADD_BRICK: - ret = glusterd_op_stage_add_brick (dict, op_errstr); - break; - - case GD_OP_REPLACE_BRICK: - ret = glusterd_op_stage_replace_brick (dict, op_errstr, - rsp_dict); - break; - - case GD_OP_SET_VOLUME: - ret = glusterd_op_stage_set_volume (dict, op_errstr); - break; - - case GD_OP_RESET_VOLUME: - ret = glusterd_op_stage_reset_volume (dict, op_errstr); - break; - - case GD_OP_REMOVE_BRICK: - ret = glusterd_op_stage_remove_brick (dict); - break; - - case GD_OP_LOG_FILENAME: - ret = glusterd_op_stage_log_filename (dict, op_errstr); - break; - - case GD_OP_LOG_ROTATE: - ret = glusterd_op_stage_log_rotate (dict, op_errstr); - break; - - case GD_OP_SYNC_VOLUME: - ret = glusterd_op_stage_sync_volume (dict, op_errstr); - break; - - case GD_OP_GSYNC_SET: - ret = glusterd_op_stage_gsync_set (dict, op_errstr); - break; - - case GD_OP_PROFILE_VOLUME: - ret = glusterd_op_stage_stats_volume (dict, op_errstr); - break; - - case GD_OP_QUOTA: - ret = glusterd_op_stage_quota (dict, op_errstr); - break; - - default: - gf_log ("", GF_LOG_ERROR, "Unknown op %d", - op); - } - - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); +glusterd_bricks_select_rebalance_volume(dict_t *dict, char **op_errstr, + struct cds_list_head *selected) +{ + int ret = -1; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + char msg[2048] = { + 0, + }; + glusterd_pending_node_t *pending_node = NULL; + + this = THIS; + GF_ASSERT(this); + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "volume name get failed"); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + snprintf(msg, sizeof(msg), "Volume %s does not exist", volname); + + *op_errstr = gf_strdup(msg); + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, "%s", msg); + goto out; + } + pending_node = GF_CALLOC(1, sizeof(*pending_node), + gf_gld_mt_pending_node_t); + if (!pending_node) { + ret = -1; + goto out; + } else { + pending_node->node = volinfo; + pending_node->type = GD_NODE_REBALANCE; + cds_list_add_tail(&pending_node->list, selected); + pending_node = NULL; + } - return ret; +out: + return ret; } +static int +glusterd_bricks_select_status_volume(dict_t *dict, char **op_errstr, + struct cds_list_head *selected) +{ + int ret = -1; + int cmd = 0; + int brick_index = -1; + char *volname = NULL; + char *brickname = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_pending_node_t *pending_node = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + glusterd_svc_t *svc = NULL; + + GF_ASSERT(dict); + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + ret = dict_get_int32n(dict, "cmd", SLEN("cmd"), &cmd); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get status type"); + goto out; + } + + if (cmd & GF_CLI_STATUS_ALL) + goto out; + + switch (cmd & GF_CLI_STATUS_MASK) { + case GF_CLI_STATUS_MEM: + case GF_CLI_STATUS_CLIENTS: + case GF_CLI_STATUS_INODE: + case GF_CLI_STATUS_FD: + case GF_CLI_STATUS_CALLPOOL: + case GF_CLI_STATUS_NFS: + case GF_CLI_STATUS_SHD: + case GF_CLI_STATUS_QUOTAD: + case GF_CLI_STATUS_SNAPD: + case GF_CLI_STATUS_BITD: + case GF_CLI_STATUS_SCRUB: + case GF_CLI_STATUS_CLIENT_LIST: + break; + default: + goto out; + } + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get volname"); + goto out; + } + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + goto out; + } + + if ((cmd & GF_CLI_STATUS_BRICK) != 0) { + ret = dict_get_strn(dict, "brick", SLEN("brick"), &brickname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get brick"); + goto out; + } + ret = glusterd_volume_brickinfo_get_by_brick(brickname, volinfo, + &brickinfo, _gf_false); + if (ret) + goto out; -int32_t -glusterd_op_commit_perform (glusterd_op_t op, dict_t *dict, char **op_errstr, - dict_t *rsp_dict) -{ - int ret = -1; - - switch (op) { - case GD_OP_CREATE_VOLUME: - ret = glusterd_op_create_volume (dict, op_errstr); - break; - - case GD_OP_START_VOLUME: - ret = glusterd_op_start_volume (dict, op_errstr); - break; - - case GD_OP_STOP_VOLUME: - ret = glusterd_op_stop_volume (dict); - break; - - case GD_OP_DELETE_VOLUME: - ret = glusterd_op_delete_volume (dict); - break; - - case GD_OP_ADD_BRICK: - ret = glusterd_op_add_brick (dict, op_errstr); - break; - - case GD_OP_REPLACE_BRICK: - ret = glusterd_op_replace_brick (dict, rsp_dict); - break; - - case GD_OP_SET_VOLUME: - ret = glusterd_op_set_volume (dict); - break; - - case GD_OP_RESET_VOLUME: - ret = glusterd_op_reset_volume (dict); - break; - - case GD_OP_REMOVE_BRICK: - ret = glusterd_op_remove_brick (dict); - break; - - case GD_OP_LOG_FILENAME: - ret = glusterd_op_log_filename (dict); - break; - - case GD_OP_LOG_ROTATE: - ret = glusterd_op_log_rotate (dict); - break; - - case GD_OP_SYNC_VOLUME: - ret = glusterd_op_sync_volume (dict, op_errstr, rsp_dict); - break; - - case GD_OP_GSYNC_SET: - ret = glusterd_op_gsync_set (dict); - break; - - case GD_OP_PROFILE_VOLUME: - ret = glusterd_op_stats_volume (dict, op_errstr, - rsp_dict); - break; - - case GD_OP_QUOTA: - ret = glusterd_op_quota (dict, op_errstr); - break; + if (gf_uuid_compare(brickinfo->uuid, MY_UUID) || + !glusterd_is_brick_started(brickinfo)) + goto out; - default: - gf_log ("", GF_LOG_ERROR, "Unknown op %d", - op); - break; + pending_node = GF_CALLOC(1, sizeof(*pending_node), + gf_gld_mt_pending_node_t); + if (!pending_node) { + ret = -1; + goto out; } + pending_node->node = brickinfo; + pending_node->type = GD_NODE_BRICK; + pending_node->index = 0; + cds_list_add_tail(&pending_node->list, selected); - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - - return ret; -} + ret = 0; +#ifdef BUILD_GNFS + } else if ((cmd & GF_CLI_STATUS_NFS) != 0) { + if (!priv->nfs_svc.online) { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_NFS_SERVER_NOT_RUNNING, + "NFS server is not running"); + goto out; + } + pending_node = GF_CALLOC(1, sizeof(*pending_node), + gf_gld_mt_pending_node_t); + if (!pending_node) { + ret = -1; + goto out; + } + pending_node->node = &(priv->nfs_svc); + pending_node->type = GD_NODE_NFS; + pending_node->index = 0; + cds_list_add_tail(&pending_node->list, selected); -void -_profile_volume_add_brick_rsp (dict_t *this, char *key, data_t *value, - void *data) -{ - char new_key[256] = {0}; - glusterd_pr_brick_rsp_conv_t *rsp_ctx = NULL; - data_t *new_value = NULL; - - rsp_ctx = data; - new_value = data_copy (value); - GF_ASSERT (new_value); - snprintf (new_key, sizeof (new_key), "%d-%s", rsp_ctx->count, key); - dict_set (rsp_ctx->dict, new_key, new_value); -} + ret = 0; +#endif + } else if ((cmd & GF_CLI_STATUS_SHD) != 0) { + svc = &(volinfo->shd.svc); + if (!svc->online) { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SELF_HEALD_DISABLED, + "Self-heal daemon is not running"); + goto out; + } + pending_node = GF_CALLOC(1, sizeof(*pending_node), + gf_gld_mt_pending_node_t); + if (!pending_node) { + ret = -1; + goto out; + } + pending_node->node = svc; + pending_node->type = GD_NODE_SHD; + pending_node->index = 0; + cds_list_add_tail(&pending_node->list, selected); -int -glusterd_profile_volume_brick_rsp (glusterd_brickinfo_t *brickinfo, - dict_t *rsp_dict, dict_t *op_ctx, - char **op_errstr) -{ - int ret = 0; - glusterd_pr_brick_rsp_conv_t rsp_ctx = {0}; - int32_t count = 0; - char brick[PATH_MAX+1024] = {0}; - char key[256] = {0}; - char *full_brick = NULL; - - GF_ASSERT (rsp_dict); - GF_ASSERT (op_ctx); - GF_ASSERT (op_errstr); - GF_ASSERT (brickinfo); - - ret = dict_get_int32 (op_ctx, "count", &count); - if (ret) { - count = 1; - } else { - count++; - } - snprintf (key, sizeof (key), "%d-brick", count); - snprintf (brick, sizeof (brick), "%s:%s", brickinfo->hostname, - brickinfo->path); - full_brick = gf_strdup (brick); - GF_ASSERT (full_brick); - ret = dict_set_dynstr (op_ctx, key, full_brick); - - rsp_ctx.count = count; - rsp_ctx.dict = op_ctx; - dict_foreach (rsp_dict, _profile_volume_add_brick_rsp, &rsp_ctx); - dict_del (op_ctx, "count"); - ret = dict_set_int32 (op_ctx, "count", count); - return ret; -} + ret = 0; + } else if ((cmd & GF_CLI_STATUS_QUOTAD) != 0) { + if (!priv->quotad_svc.online) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_QUOTAD_NOT_RUNNING, + "Quotad is not " + "running"); + ret = -1; + goto out; + } + pending_node = GF_CALLOC(1, sizeof(*pending_node), + gf_gld_mt_pending_node_t); + if (!pending_node) { + ret = -1; + goto out; + } + pending_node->node = &(priv->quotad_svc); + pending_node->type = GD_NODE_QUOTAD; + pending_node->index = 0; + cds_list_add_tail(&pending_node->list, selected); -int32_t -glusterd_handle_brick_rsp (glusterd_brickinfo_t *brickinfo, - glusterd_op_t op, dict_t *rsp_dict, dict_t *op_ctx, - char **op_errstr) -{ - int ret = 0; + ret = 0; + } else if ((cmd & GF_CLI_STATUS_BITD) != 0) { + if (!priv->bitd_svc.online) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BITROT_NOT_RUNNING, + "Bitrot is not " + "running"); + ret = -1; + goto out; + } + pending_node = GF_CALLOC(1, sizeof(*pending_node), + gf_gld_mt_pending_node_t); + if (!pending_node) { + ret = -1; + goto out; + } + pending_node->node = &(priv->bitd_svc); + pending_node->type = GD_NODE_BITD; + pending_node->index = 0; + cds_list_add_tail(&pending_node->list, selected); - GF_ASSERT (op_errstr); + ret = 0; + } else if ((cmd & GF_CLI_STATUS_SCRUB) != 0) { + if (!priv->scrub_svc.online) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SCRUBBER_NOT_RUNNING, + "Scrubber is not " + "running"); + ret = -1; + goto out; + } + pending_node = GF_CALLOC(1, sizeof(*pending_node), + gf_gld_mt_pending_node_t); + if (!pending_node) { + ret = -1; + goto out; + } + pending_node->node = &(priv->scrub_svc); + pending_node->type = GD_NODE_SCRUB; + pending_node->index = 0; + cds_list_add_tail(&pending_node->list, selected); - switch (op) { - case GD_OP_PROFILE_VOLUME: - ret = glusterd_profile_volume_brick_rsp (brickinfo, rsp_dict, - op_ctx, op_errstr); - break; + ret = 0; + } else if ((cmd & GF_CLI_STATUS_SNAPD) != 0) { + if (!volinfo->snapd.svc.online) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPD_NOT_RUNNING, + "snapd is not " + "running"); + ret = -1; + goto out; + } + pending_node = GF_CALLOC(1, sizeof(*pending_node), + gf_gld_mt_pending_node_t); + if (!pending_node) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "failed to allocate " + "memory for pending node"); + ret = -1; + goto out; + } + + pending_node->node = (void *)(&volinfo->snapd); + pending_node->type = GD_NODE_SNAPD; + pending_node->index = 0; + cds_list_add_tail(&pending_node->list, selected); - default: - break; + ret = 0; + } else { + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + brick_index++; + if (gf_uuid_compare(brickinfo->uuid, MY_UUID) || + !glusterd_is_brick_started(brickinfo)) { + continue; + } + pending_node = GF_CALLOC(1, sizeof(*pending_node), + gf_gld_mt_pending_node_t); + if (!pending_node) { + ret = -1; + gf_msg(THIS->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "Unable to allocate memory"); + goto out; + } + pending_node->node = brickinfo; + pending_node->type = GD_NODE_BRICK; + pending_node->index = brick_index; + cds_list_add_tail(&pending_node->list, selected); + pending_node = NULL; } - - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + } +out: + return ret; } static int -glusterd_bricks_select_stop_volume (dict_t *dict, char **op_errstr) -{ - int ret = 0; - int flags = 0; - char *volname = NULL; - glusterd_volinfo_t *volinfo = NULL; - glusterd_brickinfo_t *brickinfo = NULL; - glusterd_pending_node_t *pending_node = NULL; - - - ret = glusterd_op_stop_volume_args_get (dict, &volname, &flags); - if (ret) - goto out; - - ret = glusterd_volinfo_find (volname, &volinfo); +glusterd_bricks_select_scrub(dict_t *dict, char **op_errstr, + struct cds_list_head *selected) +{ + int ret = -1; + char *volname = NULL; + char msg[2048] = { + 0, + }; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_pending_node_t *pending_node = NULL; + + this = THIS; + priv = this->private; + GF_ASSERT(this); + GF_ASSERT(priv); + + GF_ASSERT(dict); + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get" + " volname"); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + snprintf(msg, sizeof(msg), "Volume %s does not exist", volname); + + *op_errstr = gf_strdup(msg); + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND, "%s", + msg); + goto out; + } + + if (!priv->scrub_svc.online) { + ret = 0; + snprintf(msg, sizeof(msg), "Scrubber daemon is not running"); - if (ret) - goto out; + gf_msg_debug(this->name, 0, "%s", msg); + goto out; + } - list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { - if (glusterd_is_brick_started (brickinfo)) { - pending_node = GF_CALLOC (1, sizeof (*pending_node), - gf_gld_mt_pending_node_t); - if (!pending_node) { - ret = -1; - goto out; - } else { - pending_node->node = brickinfo; - list_add_tail (&pending_node->list, &opinfo.pending_bricks); - pending_node = NULL; - } - } - } + pending_node = GF_CALLOC(1, sizeof(*pending_node), + gf_gld_mt_pending_node_t); + if (!pending_node) { + ret = -1; + goto out; + } + pending_node->node = &(priv->scrub_svc); + pending_node->type = GD_NODE_SCRUB; + cds_list_add_tail(&pending_node->list, selected); + pending_node = NULL; out: - return ret; + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; } - +/* Select the bricks to send the barrier request to. + * This selects the bricks of the given volume which are present on this peer + * and are running + */ static int -glusterd_bricks_select_remove_brick (dict_t *dict, char **op_errstr) -{ - int ret = -1; - char *volname = NULL; - glusterd_volinfo_t *volinfo = NULL; - glusterd_brickinfo_t *brickinfo = NULL; - char *brick = NULL; - int32_t count = 0; - int32_t i = 1; - char key[256] = {0,}; - glusterd_pending_node_t *pending_node = NULL; - - ret = dict_get_str (dict, "volname", &volname); - - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); - goto out; - } - - ret = glusterd_volinfo_find (volname, &volinfo); - - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to allocate memory"); - goto out; - } - - ret = dict_get_int32 (dict, "count", &count); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get count"); - goto out; - } - - - while ( i <= count) { - snprintf (key, 256, "brick%d", i); - ret = dict_get_str (dict, key, &brick); - if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, "Unable to get brick"); - goto out; - } - - ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo, - &brickinfo); - if (ret) - goto out; - if (glusterd_is_brick_started (brickinfo)) { - pending_node = GF_CALLOC (1, sizeof (*pending_node), - gf_gld_mt_pending_node_t); - if (!pending_node) { - ret = -1; - goto out; - } else { - pending_node->node = brickinfo; - list_add_tail (&pending_node->list, &opinfo.pending_bricks); - pending_node = NULL; - } - } - i++; - } +glusterd_bricks_select_barrier(dict_t *dict, struct cds_list_head *selected) +{ + int ret = -1; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_pending_node_t *pending_node = NULL; + + GF_ASSERT(dict); + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get volname"); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, + "Failed to find volume %s", volname); + goto out; + } + + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + if (gf_uuid_compare(brickinfo->uuid, MY_UUID) || + !glusterd_is_brick_started(brickinfo)) { + continue; + } + pending_node = GF_CALLOC(1, sizeof(*pending_node), + gf_gld_mt_pending_node_t); + if (!pending_node) { + ret = -1; + goto out; + } + pending_node->node = brickinfo; + pending_node->type = GD_NODE_BRICK; + cds_list_add_tail(&pending_node->list, selected); + pending_node = NULL; + } out: - return ret; + gf_msg_debug(THIS->name, 0, "Returning %d", ret); + return ret; } static int -glusterd_bricks_select_profile_volume (dict_t *dict, char **op_errstr) +glusterd_op_ac_send_brick_op(glusterd_op_sm_event_t *event, void *ctx) { - int ret = -1; - char *volname = NULL; - char msg[2048] = {0,}; - glusterd_conf_t *priv = NULL; - glusterd_volinfo_t *volinfo = NULL; - xlator_t *this = NULL; - int32_t stats_op = GF_CLI_STATS_NONE; - glusterd_brickinfo_t *brickinfo = NULL; - glusterd_pending_node_t *pending_node = NULL; - char *brick = NULL; - - this = THIS; - GF_ASSERT (this); - priv = this->private; - GF_ASSERT (priv); - - ret = dict_get_str (dict, "volname", &volname); - if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, "volume name get failed"); - goto out; - } - - ret = glusterd_volinfo_find (volname, &volinfo); - if (ret) { - snprintf (msg, sizeof (msg), "Volume %s does not exists", - volname); - - *op_errstr = gf_strdup (msg); - gf_log ("", GF_LOG_ERROR, "%s", msg); - goto out; - } - - ret = dict_get_int32 (dict, "op", &stats_op); - if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, "volume profile op get failed"); - goto out; - } - - switch (stats_op) { - case GF_CLI_STATS_START: - case GF_CLI_STATS_STOP: - goto out; - break; - case GF_CLI_STATS_INFO: - list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { - if (glusterd_is_brick_started (brickinfo)) { - pending_node = GF_CALLOC (1, sizeof (*pending_node), - gf_gld_mt_pending_node_t); - if (!pending_node) { - ret = -1; - goto out; - } else { - pending_node->node = brickinfo; - list_add_tail (&pending_node->list, - &opinfo.pending_bricks); - pending_node = NULL; - } - } - } - break; + int ret = 0; + rpc_clnt_procedure_t *proc = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + glusterd_op_t op = GD_OP_NONE; + glusterd_req_ctx_t *req_ctx = NULL; + char *op_errstr = NULL; + gf_boolean_t free_req_ctx = _gf_false; - case GF_CLI_STATS_TOP: - ret = dict_get_str (dict, "brick", &brick); - if (!ret) { - ret = glusterd_volume_brickinfo_get_by_brick (brick, - volinfo, &brickinfo); - if (ret) - goto out; - - pending_node = GF_CALLOC (1, sizeof (*pending_node), - gf_gld_mt_pending_node_t); - if (!pending_node) { - ret = -1; - goto out; - } else { - pending_node->node = brickinfo; - list_add_tail (&pending_node->list, - &opinfo.pending_bricks); - pending_node = NULL; - goto out; - } - } - ret = 0; - list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { - if (glusterd_is_brick_started (brickinfo)) { - pending_node = GF_CALLOC (1, sizeof (*pending_node), - gf_gld_mt_pending_node_t); - if (!pending_node) { - ret = -1; - goto out; - } else { - pending_node->node = brickinfo; - list_add_tail (&pending_node->list, - &opinfo.pending_bricks); - pending_node = NULL; - } - } - } - break; + this = THIS; + priv = this->private; - default: - GF_ASSERT (0); - gf_log ("glusterd", GF_LOG_ERROR, "Invalid profile op: %d", - stats_op); - ret = -1; - goto out; - break; - } + if (ctx) { + req_ctx = ctx; + } else { + req_ctx = GF_CALLOC(1, sizeof(*req_ctx), gf_gld_mt_op_allack_ctx_t); + if (!req_ctx) + goto out; + free_req_ctx = _gf_true; + op = glusterd_op_get_op(); + req_ctx->op = op; + gf_uuid_copy(req_ctx->uuid, MY_UUID); + ret = glusterd_op_build_payload(&req_ctx->dict, &op_errstr, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_BRICK_OP_PAYLOAD_BUILD_FAIL, LOGSTR_BUILD_PAYLOAD, + gd_op_list[op]); + if (op_errstr == NULL) + gf_asprintf(&op_errstr, OPERRSTR_BUILD_PAYLOAD); + opinfo.op_errstr = op_errstr; + goto out; + } + } + + proc = &priv->gfs_mgmt->proctable[GLUSTERD_BRICK_OP]; + if (proc->fn) { + ret = proc->fn(NULL, this, req_ctx); + if (ret) + goto out; + } + if (!opinfo.pending_count && !opinfo.brick_pending_count) { + glusterd_clear_pending_nodes(&opinfo.pending_bricks); + ret = glusterd_op_sm_inject_event(GD_OP_EVENT_ALL_ACK, &event->txn_id, + req_ctx); + } out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + if (ret && free_req_ctx) + GF_FREE(req_ctx); + gf_msg_debug(this->name, 0, "Returning with %d", ret); - return ret; + return ret; } static int -glusterd_op_ac_send_brick_op (glusterd_op_sm_event_t *event, void *ctx) -{ - int ret = 0; - rpc_clnt_procedure_t *proc = NULL; - glusterd_conf_t *priv = NULL; - xlator_t *this = NULL; - glusterd_op_t op = GD_OP_NONE; - glusterd_req_ctx_t *req_ctx = NULL; - - this = THIS; - priv = this->private; - - if (ctx) { - req_ctx = ctx; - } else { - req_ctx = GF_CALLOC (1, sizeof (*req_ctx), - gf_gld_mt_op_allack_ctx_t); - op = glusterd_op_get_op (); - req_ctx->op = op; - uuid_copy (req_ctx->uuid, priv->uuid); - ret = glusterd_op_build_payload (op, &req_ctx->dict); - if (ret)//TODO:what to do?? - goto out; - } - - proc = &priv->gfs_mgmt->proctable[GD_MGMT_BRICK_OP]; - if (proc->fn) { - ret = proc->fn (NULL, this, req_ctx); - if (ret) - goto out; - } - - if (!opinfo.pending_count && !opinfo.brick_pending_count) { - glusterd_clear_pending_nodes (&opinfo.pending_bricks); - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, req_ctx); - } - -out: - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); - - return ret; -} +glusterd_op_ac_rcvd_brick_op_acc(glusterd_op_sm_event_t *event, void *ctx) +{ + int ret = -1; + glusterd_op_brick_rsp_ctx_t *ev_ctx = NULL; + char *op_errstr = NULL; + glusterd_op_t op = GD_OP_NONE; + gd_node_type type = GD_NODE_NONE; + dict_t *op_ctx = NULL; + glusterd_req_ctx_t *req_ctx = NULL; + void *pending_entry = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + GF_VALIDATE_OR_GOTO(this->name, event, out); + GF_VALIDATE_OR_GOTO(this->name, ctx, out); + ev_ctx = ctx; + GF_VALIDATE_OR_GOTO(this->name, ev_ctx, out); + + req_ctx = ev_ctx->commit_ctx; + GF_VALIDATE_OR_GOTO(this->name, req_ctx, out); + + op = req_ctx->op; + op_ctx = glusterd_op_get_ctx(); + pending_entry = ev_ctx->pending_node->node; + type = ev_ctx->pending_node->type; + + ret = glusterd_remove_pending_entry(&opinfo.pending_bricks, pending_entry); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UNKNOWN_RESPONSE, + "unknown response received "); + ret = -1; + goto out; + } + if (opinfo.brick_pending_count > 0) + opinfo.brick_pending_count--; -static int -glusterd_op_ac_rcvd_brick_op_acc (glusterd_op_sm_event_t *event, void *ctx) -{ - int ret = 0; - glusterd_op_brick_rsp_ctx_t *ev_ctx = NULL; - glusterd_brickinfo_t *brickinfo = NULL; - char *op_errstr = NULL; - glusterd_op_t op = GD_OP_NONE; - dict_t *op_ctx = NULL; - gf_boolean_t free_errstr = _gf_true; - glusterd_req_ctx_t *req_ctx = NULL; - - GF_ASSERT (event); - GF_ASSERT (ctx); - ev_ctx = ctx; - - req_ctx = ev_ctx->commit_ctx; - GF_ASSERT (req_ctx); - - brickinfo = ev_ctx->brickinfo; - GF_ASSERT (brickinfo); - - ret = glusterd_remove_pending_entry (&opinfo.pending_bricks, brickinfo); - if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, "unknown response received " - "from %s:%s", brickinfo->hostname, brickinfo->path); - ret = -1; - free_errstr = _gf_true; - goto out; - } + ret = glusterd_set_txn_opinfo(&event->txn_id, &opinfo); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL, + "Unable to set " + "transaction's opinfo"); - if (opinfo.brick_pending_count > 0) - opinfo.brick_pending_count--; - op = req_ctx->op; - op_ctx = glusterd_op_get_ctx (op); + glusterd_handle_node_rsp(req_ctx->dict, pending_entry, op, ev_ctx->rsp_dict, + op_ctx, &op_errstr, type); - glusterd_handle_brick_rsp (brickinfo, op, ev_ctx->rsp_dict, - op_ctx, &op_errstr); - if (opinfo.brick_pending_count > 0) - goto out; + if (opinfo.brick_pending_count > 0) + goto out; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, ev_ctx->commit_ctx); + ret = glusterd_op_sm_inject_event(GD_OP_EVENT_ALL_ACK, &event->txn_id, + ev_ctx->commit_ctx); out: - if (ev_ctx->rsp_dict) - dict_unref (ev_ctx->rsp_dict); - GF_FREE (ev_ctx); - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - - return ret; + if (ev_ctx && ev_ctx->rsp_dict) + dict_unref(ev_ctx->rsp_dict); + GF_FREE(ev_ctx); + gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret); + return ret; } int32_t -glusterd_op_bricks_select (glusterd_op_t op, dict_t *dict, char **op_errstr) +glusterd_op_bricks_select(glusterd_op_t op, dict_t *dict, char **op_errstr, + struct cds_list_head *selected, dict_t *rsp_dict) { - int ret = 0; + int ret = 0; - GF_ASSERT (dict); - GF_ASSERT (op_errstr); - GF_ASSERT (op > GD_OP_NONE); - GF_ASSERT (op < GD_OP_MAX); + GF_ASSERT(dict); + GF_ASSERT(op_errstr); + GF_ASSERT(op > GD_OP_NONE); + GF_ASSERT(op < GD_OP_MAX); - switch (op) { + switch (op) { case GD_OP_STOP_VOLUME: - ret = glusterd_bricks_select_stop_volume (dict, op_errstr); - break; - + ret = glusterd_bricks_select_stop_volume(dict, op_errstr, selected); + break; case GD_OP_REMOVE_BRICK: - ret = glusterd_bricks_select_remove_brick (dict, op_errstr); - break; + ret = glusterd_bricks_select_remove_brick(dict, op_errstr, + selected); + break; case GD_OP_PROFILE_VOLUME: - ret = glusterd_bricks_select_profile_volume (dict, op_errstr); - break; - + ret = glusterd_bricks_select_profile_volume(dict, op_errstr, + selected); + break; + + case GD_OP_HEAL_VOLUME: + ret = glusterd_bricks_select_heal_volume(dict, op_errstr, selected, + rsp_dict); + break; + + case GD_OP_STATUS_VOLUME: + ret = glusterd_bricks_select_status_volume(dict, op_errstr, + selected); + break; + case GD_OP_DEFRAG_BRICK_VOLUME: + ret = glusterd_bricks_select_rebalance_volume(dict, op_errstr, + selected); + break; + + case GD_OP_BARRIER: + ret = glusterd_bricks_select_barrier(dict, selected); + break; + case GD_OP_SNAP: + ret = glusterd_bricks_select_snap(dict, op_errstr, selected); + break; + case GD_OP_SCRUB_STATUS: + case GD_OP_SCRUB_ONDEMAND: + ret = glusterd_bricks_select_scrub(dict, op_errstr, selected); + break; default: - break; - } - - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - - return ret; -} - -glusterd_op_sm_t glusterd_op_state_default [] = { - {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_NONE - {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_send_lock},//EVENT_START_LOCK - {GD_OP_STATE_LOCKED, glusterd_op_ac_lock}, //EVENT_LOCK - {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_RCVD_ACC - {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_ALL_ACC - {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_STAGE_ACC - {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_COMMIT_ACC - {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_RCVD_RJT - {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_STAGE_OP - {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_COMMIT_OP - {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK - {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_START_UNLOCK - {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_ALL_ACK - {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_MAX + break; + } + + gf_msg_debug(THIS->name, 0, "Returning %d", ret); + + return ret; +} + +glusterd_op_sm_t glusterd_op_state_default[] = { + {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, // EVENT_NONE + {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_send_lock}, // EVENT_START_LOCK + {GD_OP_STATE_LOCKED, glusterd_op_ac_lock}, // EVENT_LOCK + {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, // EVENT_RCVD_ACC + {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, // EVENT_ALL_ACC + {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, // EVENT_STAGE_ACC + {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, // EVENT_COMMIT_ACC + {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, // EVENT_RCVD_RJT + {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, // EVENT_STAGE_OP + {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, // EVENT_COMMIT_OP + {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, // EVENT_UNLOCK + {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, // EVENT_START_UNLOCK + {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, // EVENT_ALL_ACK + {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, // EVENT_LOCAL_UNLOCK_NO_RESP + {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, // EVENT_MAX }; -glusterd_op_sm_t glusterd_op_state_lock_sent [] = { - {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, //EVENT_NONE - {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none},//EVENT_START_LOCK - {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, //EVENT_LOCK - {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_rcvd_lock_acc}, //EVENT_RCVD_ACC - {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_send_stage_op}, //EVENT_ALL_ACC - {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, //EVENT_STAGE_ACC - {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, //EVENT_COMMIT_ACC - {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_send_unlock}, //EVENT_RCVD_RJT - {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, //EVENT_STAGE_OP - {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, //EVENT_COMMIT_OP - {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK - {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, //EVENT_START_UNLOCK - {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, //EVENT_ALL_ACK - {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, //EVENT_MAX +glusterd_op_sm_t glusterd_op_state_lock_sent[] = { + {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, // EVENT_NONE + {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, // EVENT_START_LOCK + {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_lock}, // EVENT_LOCK + {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_rcvd_lock_acc}, // EVENT_RCVD_ACC + {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_send_stage_op}, // EVENT_ALL_ACC + {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, // EVENT_STAGE_ACC + {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, // EVENT_COMMIT_ACC + {GD_OP_STATE_ACK_DRAIN, + glusterd_op_ac_send_unlock_drain}, // EVENT_RCVD_RJT + {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, // EVENT_STAGE_OP + {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, // EVENT_COMMIT_OP + {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, // EVENT_UNLOCK + {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, // EVENT_START_UNLOCK + {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, // EVENT_ALL_ACK + {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, // EVENT_LOCAL_UNLOCK_NO_RESP + {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, // EVENT_MAX }; -glusterd_op_sm_t glusterd_op_state_locked [] = { - {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_NONE - {GD_OP_STATE_LOCKED, glusterd_op_ac_none},//EVENT_START_LOCK - {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_LOCK - {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_RCVD_ACC - {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_ALL_ACC - {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_STAGE_ACC - {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_COMMIT_ACC - {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_RCVD_RJT - {GD_OP_STATE_STAGED, glusterd_op_ac_stage_op}, //EVENT_STAGE_OP - {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_COMMIT_OP - {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK - {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_START_UNLOCK - {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_ALL_ACK - {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_MAX +glusterd_op_sm_t glusterd_op_state_locked[] = { + {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, // EVENT_NONE + {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, // EVENT_START_LOCK + {GD_OP_STATE_LOCKED, glusterd_op_ac_lock}, // EVENT_LOCK + {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, // EVENT_RCVD_ACC + {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, // EVENT_ALL_ACC + {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, // EVENT_STAGE_ACC + {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, // EVENT_COMMIT_ACC + {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, // EVENT_RCVD_RJT + {GD_OP_STATE_STAGED, glusterd_op_ac_stage_op}, // EVENT_STAGE_OP + {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, // EVENT_COMMIT_OP + {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, // EVENT_UNLOCK + {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, // EVENT_START_UNLOCK + {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, // EVENT_ALL_ACK + {GD_OP_STATE_DEFAULT, + glusterd_op_ac_local_unlock}, // EVENT_LOCAL_UNLOCK_NO_RESP + {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, // EVENT_MAX }; -glusterd_op_sm_t glusterd_op_state_stage_op_sent [] = { - {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, //EVENT_NONE - {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none},//EVENT_START_LOCK - {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, //EVENT_LOCK - {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_rcvd_stage_op_acc}, //EVENT_RCVD_ACC - {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_send_brick_op}, //EVENT_ALL_ACC - {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_send_brick_op}, //EVENT_STAGE_ACC - {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, //EVENT_COMMIT_ACC - {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_stage_op_failed}, //EVENT_RCVD_RJT - {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, //EVENT_STAGE_OP - {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, //EVENT_COMMIT_OP - {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK - {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, //EVENT_START_UNLOCK - {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, //EVENT_ALL_ACK - {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, //EVENT_MAX +glusterd_op_sm_t glusterd_op_state_stage_op_sent[] = { + {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, // EVENT_NONE + {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, // EVENT_START_LOCK + {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_lock}, // EVENT_LOCK + {GD_OP_STATE_STAGE_OP_SENT, + glusterd_op_ac_rcvd_stage_op_acc}, // EVENT_RCVD_ACC + {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_send_brick_op}, // EVENT_ALL_ACC + {GD_OP_STATE_BRICK_OP_SENT, + glusterd_op_ac_send_brick_op}, // EVENT_STAGE_ACC + {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, // EVENT_COMMIT_ACC + {GD_OP_STATE_STAGE_OP_FAILED, + glusterd_op_ac_stage_op_failed}, // EVENT_RCVD_RJT + {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, // EVENT_STAGE_OP + {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, // EVENT_COMMIT_OP + {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, // EVENT_UNLOCK + {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, // EVENT_START_UNLOCK + {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, // EVENT_ALL_ACK + {GD_OP_STATE_STAGE_OP_SENT, + glusterd_op_ac_none}, // EVENT_LOCAL_UNLOCK_NO_RESP + {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, // EVENT_MAX }; -glusterd_op_sm_t glusterd_op_state_stage_op_failed [] = { - {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, //EVENT_NONE - {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none},//EVENT_START_LOCK - {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, //EVENT_LOCK - {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_stage_op_failed}, //EVENT_RCVD_ACC - {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, //EVENT_ALL_ACC - {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, //EVENT_STAGE_ACC - {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, //EVENT_COMMIT_ACC - {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_stage_op_failed}, //EVENT_RCVD_RJT - {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, //EVENT_STAGE_OP - {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, //EVENT_COMMIT_OP - {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK - {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, //EVENT_START_UNLOCK - {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_send_unlock}, //EVENT_ALL_ACK - {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, //EVENT_MAX +glusterd_op_sm_t glusterd_op_state_stage_op_failed[] = { + {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, // EVENT_NONE + {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, // EVENT_START_LOCK + {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_lock}, // EVENT_LOCK + {GD_OP_STATE_STAGE_OP_FAILED, + glusterd_op_ac_stage_op_failed}, // EVENT_RCVD_ACC + {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, // EVENT_ALL_ACC + {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, // EVENT_STAGE_ACC + {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, // EVENT_COMMIT_ACC + {GD_OP_STATE_STAGE_OP_FAILED, + glusterd_op_ac_stage_op_failed}, // EVENT_RCVD_RJT + {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, // EVENT_STAGE_OP + {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, // EVENT_COMMIT_OP + {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, // EVENT_UNLOCK + {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, // EVENT_START_UNLOCK + {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_send_unlock}, // EVENT_ALL_ACK + {GD_OP_STATE_STAGE_OP_FAILED, + glusterd_op_ac_none}, // EVENT_LOCAL_UNLOCK_NO_RESP + {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, // EVENT_MAX }; -glusterd_op_sm_t glusterd_op_state_staged [] = { - {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_NONE - {GD_OP_STATE_STAGED, glusterd_op_ac_none},//EVENT_START_LOCK - {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_LOCK - {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_RCVD_ACC - {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_ALL_ACC - {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_STAGE_ACC - {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_COMMIT_ACC - {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_RCVD_RJT - {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_STAGE_OP - {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_send_brick_op}, //EVENT_COMMIT_OP - {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK - {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_START_UNLOCK - {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_ALL_ACK - {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_MAX +glusterd_op_sm_t glusterd_op_state_staged[] = { + {GD_OP_STATE_STAGED, glusterd_op_ac_none}, // EVENT_NONE + {GD_OP_STATE_STAGED, glusterd_op_ac_none}, // EVENT_START_LOCK + {GD_OP_STATE_STAGED, glusterd_op_ac_lock}, // EVENT_LOCK + {GD_OP_STATE_STAGED, glusterd_op_ac_none}, // EVENT_RCVD_ACC + {GD_OP_STATE_STAGED, glusterd_op_ac_none}, // EVENT_ALL_ACC + {GD_OP_STATE_STAGED, glusterd_op_ac_none}, // EVENT_STAGE_ACC + {GD_OP_STATE_STAGED, glusterd_op_ac_none}, // EVENT_COMMIT_ACC + {GD_OP_STATE_STAGED, glusterd_op_ac_none}, // EVENT_RCVD_RJT + {GD_OP_STATE_STAGED, glusterd_op_ac_none}, // EVENT_STAGE_OP + {GD_OP_STATE_BRICK_COMMITTED, + glusterd_op_ac_send_brick_op}, // EVENT_COMMIT_OP + {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, // EVENT_UNLOCK + {GD_OP_STATE_STAGED, glusterd_op_ac_none}, // EVENT_START_UNLOCK + {GD_OP_STATE_STAGED, glusterd_op_ac_none}, // EVENT_ALL_ACK + {GD_OP_STATE_DEFAULT, + glusterd_op_ac_local_unlock}, // EVENT_LOCAL_UNLOCK_NO_RESP + {GD_OP_STATE_STAGED, glusterd_op_ac_none}, // EVENT_MAX }; -glusterd_op_sm_t glusterd_op_state_brick_op_sent [] = { - {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, //EVENT_NONE - {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none},//EVENT_START_LOCK - {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, //EVENT_LOCK - {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_rcvd_brick_op_acc}, //EVENT_RCVD_ACC - {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, //EVENT_ALL_ACC - {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, //EVENT_STAGE_ACC - {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, //EVENT_COMMIT_ACC - {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_brick_op_failed}, //EVENT_RCVD_RJT - {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, //EVENT_BRICK_OP - {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, //EVENT_COMMIT_OP - {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK - {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, //EVENT_START_UNLOCK - {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_send_commit_op}, //EVENT_ALL_ACK - {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, //EVENT_MAX +glusterd_op_sm_t glusterd_op_state_brick_op_sent[] = { + {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, // EVENT_NONE + {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, // EVENT_START_LOCK + {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_lock}, // EVENT_LOCK + {GD_OP_STATE_BRICK_OP_SENT, + glusterd_op_ac_rcvd_brick_op_acc}, // EVENT_RCVD_ACC + {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, // EVENT_ALL_ACC + {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, // EVENT_STAGE_ACC + {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, // EVENT_COMMIT_ACC + {GD_OP_STATE_BRICK_OP_FAILED, + glusterd_op_ac_brick_op_failed}, // EVENT_RCVD_RJT + {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, // EVENT_BRICK_OP + {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, // EVENT_COMMIT_OP + {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, // EVENT_UNLOCK + {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, // EVENT_START_UNLOCK + {GD_OP_STATE_COMMIT_OP_SENT, + glusterd_op_ac_send_commit_op}, // EVENT_ALL_ACK + {GD_OP_STATE_BRICK_OP_SENT, + glusterd_op_ac_none}, // EVENT_LOCAL_UNLOCK_NO_RESP + {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, // EVENT_MAX }; -glusterd_op_sm_t glusterd_op_state_brick_op_failed [] = { - {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, //EVENT_NONE - {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none},//EVENT_START_LOCK - {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, //EVENT_LOCK - {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_brick_op_failed}, //EVENT_RCVD_ACC - {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, //EVENT_ALL_ACC - {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, //EVENT_STAGE_ACC - {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, //EVENT_COMMIT_ACC - {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_brick_op_failed}, //EVENT_RCVD_RJT - {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, //EVENT_BRICK_OP - {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, //EVENT_COMMIT_OP - {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK - {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, //EVENT_START_UNLOCK - {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_send_unlock}, //EVENT_ALL_ACK - {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, //EVENT_MAX +glusterd_op_sm_t glusterd_op_state_brick_op_failed[] = { + {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, // EVENT_NONE + {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, // EVENT_START_LOCK + {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_lock}, // EVENT_LOCK + {GD_OP_STATE_BRICK_OP_FAILED, + glusterd_op_ac_brick_op_failed}, // EVENT_RCVD_ACC + {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, // EVENT_ALL_ACC + {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, // EVENT_STAGE_ACC + {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, // EVENT_COMMIT_ACC + {GD_OP_STATE_BRICK_OP_FAILED, + glusterd_op_ac_brick_op_failed}, // EVENT_RCVD_RJT + {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, // EVENT_BRICK_OP + {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, // EVENT_COMMIT_OP + {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, // EVENT_UNLOCK + {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, // EVENT_START_UNLOCK + {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_send_unlock}, // EVENT_ALL_ACK + {GD_OP_STATE_BRICK_OP_FAILED, + glusterd_op_ac_none}, // EVENT_LOCAL_UNLOCK_NO_RESP + {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, // EVENT_MAX }; -glusterd_op_sm_t glusterd_op_state_brick_committed [] = { - {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, //EVENT_NONE - {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none},//EVENT_START_LOCK - {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, //EVENT_LOCK - {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_rcvd_brick_op_acc}, //EVENT_RCVD_ACC - {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, //EVENT_ALL_ACC - {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, //EVENT_STAGE_ACC - {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, //EVENT_COMMIT_ACC - {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_brick_op_failed}, //EVENT_RCVD_RJT - {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, //EVENT_STAGE_OP - {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, //EVENT_COMMIT_OP - {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK - {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, //EVENT_START_UNLOCK - {GD_OP_STATE_COMMITED, glusterd_op_ac_commit_op}, //EVENT_ALL_ACK - {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, //EVENT_MAX +glusterd_op_sm_t glusterd_op_state_brick_committed[] = { + {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, // EVENT_NONE + {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, // EVENT_START_LOCK + {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_lock}, // EVENT_LOCK + {GD_OP_STATE_BRICK_COMMITTED, + glusterd_op_ac_rcvd_brick_op_acc}, // EVENT_RCVD_ACC + {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, // EVENT_ALL_ACC + {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, // EVENT_STAGE_ACC + {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, // EVENT_COMMIT_ACC + {GD_OP_STATE_BRICK_COMMIT_FAILED, + glusterd_op_ac_brick_op_failed}, // EVENT_RCVD_RJT + {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, // EVENT_STAGE_OP + {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, // EVENT_COMMIT_OP + {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, // EVENT_UNLOCK + {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, // EVENT_START_UNLOCK + {GD_OP_STATE_COMMITED, glusterd_op_ac_commit_op}, // EVENT_ALL_ACK + {GD_OP_STATE_DEFAULT, + glusterd_op_ac_local_unlock}, // EVENT_LOCAL_UNLOCK_NO_RESP + {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, // EVENT_MAX }; -glusterd_op_sm_t glusterd_op_state_brick_commit_failed [] = { - {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, //EVENT_NONE - {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none},//EVENT_START_LOCK - {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, //EVENT_LOCK - {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_brick_op_failed}, //EVENT_RCVD_ACC - {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, //EVENT_ALL_ACC - {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, //EVENT_STAGE_ACC - {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, //EVENT_COMMIT_ACC - {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_brick_op_failed}, //EVENT_RCVD_RJT - {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, //EVENT_STAGE_OP - {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, //EVENT_COMMIT_OP - {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK - {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, //EVENT_START_UNLOCK - {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_send_commit_failed}, //EVENT_ALL_ACK - {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, //EVENT_MAX +glusterd_op_sm_t glusterd_op_state_brick_commit_failed[] = { + {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, // EVENT_NONE + {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, // EVENT_START_LOCK + {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_lock}, // EVENT_LOCK + {GD_OP_STATE_BRICK_COMMIT_FAILED, + glusterd_op_ac_brick_op_failed}, // EVENT_RCVD_ACC + {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, // EVENT_ALL_ACC + {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, // EVENT_STAGE_ACC + {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, // EVENT_COMMIT_ACC + {GD_OP_STATE_BRICK_COMMIT_FAILED, + glusterd_op_ac_brick_op_failed}, // EVENT_RCVD_RJT + {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, // EVENT_STAGE_OP + {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, // EVENT_COMMIT_OP + {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, // EVENT_UNLOCK + {GD_OP_STATE_BRICK_COMMIT_FAILED, + glusterd_op_ac_none}, // EVENT_START_UNLOCK + {GD_OP_STATE_BRICK_COMMIT_FAILED, + glusterd_op_ac_send_commit_failed}, // EVENT_ALL_ACK + {GD_OP_STATE_DEFAULT, + glusterd_op_ac_local_unlock}, // EVENT_LOCAL_UNLOCK_NO_RESP + {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, // EVENT_MAX }; -glusterd_op_sm_t glusterd_op_state_commit_op_failed [] = { - {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, //EVENT_NONE - {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none},//EVENT_START_LOCK - {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, //EVENT_LOCK - {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_commit_op_failed}, //EVENT_RCVD_ACC - {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, //EVENT_ALL_ACC - {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, //EVENT_STAGE_ACC - {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, //EVENT_COMMIT_ACC - {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_commit_op_failed}, //EVENT_RCVD_RJT - {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, //EVENT_STAGE_OP - {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, //EVENT_COMMIT_OP - {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK - {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, //EVENT_START_UNLOCK - {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_send_unlock}, //EVENT_ALL_ACK - {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, //EVENT_MAX +glusterd_op_sm_t glusterd_op_state_commit_op_failed[] = { + {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, // EVENT_NONE + {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, // EVENT_START_LOCK + {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_lock}, // EVENT_LOCK + {GD_OP_STATE_COMMIT_OP_FAILED, + glusterd_op_ac_commit_op_failed}, // EVENT_RCVD_ACC + {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, // EVENT_ALL_ACC + {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, // EVENT_STAGE_ACC + {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, // EVENT_COMMIT_ACC + {GD_OP_STATE_COMMIT_OP_FAILED, + glusterd_op_ac_commit_op_failed}, // EVENT_RCVD_RJT + {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, // EVENT_STAGE_OP + {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, // EVENT_COMMIT_OP + {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, // EVENT_UNLOCK + {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, // EVENT_START_UNLOCK + {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_send_unlock}, // EVENT_ALL_ACK + {GD_OP_STATE_COMMIT_OP_FAILED, + glusterd_op_ac_none}, // EVENT_LOCAL_UNLOCK_NO_RESP + {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, // EVENT_MAX }; -glusterd_op_sm_t glusterd_op_state_commit_op_sent [] = { - {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, //EVENT_NONE - {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none},//EVENT_START_LOCK - {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, //EVENT_LOCK - {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_rcvd_commit_op_acc}, //EVENT_RCVD_ACC - {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_send_unlock}, //EVENT_ALL_ACC - {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, //EVENT_STAGE_ACC - {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_send_unlock}, //EVENT_COMMIT_ACC - {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_commit_op_failed}, //EVENT_RCVD_RJT - {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, //EVENT_STAGE_OP - {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, //EVENT_COMMIT_OP - {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK - {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_send_unlock}, //EVENT_START_UNLOCK - {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, //EVENT_ALL_ACK - {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, //EVENT_MAX +glusterd_op_sm_t glusterd_op_state_commit_op_sent[] = { + {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, // EVENT_NONE + {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, // EVENT_START_LOCK + {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_lock}, // EVENT_LOCK + {GD_OP_STATE_COMMIT_OP_SENT, + glusterd_op_ac_rcvd_commit_op_acc}, // EVENT_RCVD_ACC + {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_send_unlock}, // EVENT_ALL_ACC + {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, // EVENT_STAGE_ACC + {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_send_unlock}, // EVENT_COMMIT_ACC + {GD_OP_STATE_COMMIT_OP_FAILED, + glusterd_op_ac_commit_op_failed}, // EVENT_RCVD_RJT + {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, // EVENT_STAGE_OP + {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, // EVENT_COMMIT_OP + {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, // EVENT_UNLOCK + {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, // EVENT_START_UNLOCK + {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, // EVENT_ALL_ACK + {GD_OP_STATE_COMMIT_OP_SENT, + glusterd_op_ac_none}, // EVENT_LOCAL_UNLOCK_NO_RESP + {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, // EVENT_MAX }; -glusterd_op_sm_t glusterd_op_state_committed [] = { - {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_NONE - {GD_OP_STATE_COMMITED, glusterd_op_ac_none},//EVENT_START_LOCK - {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_LOCK - {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_RCVD_ACC - {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_ALL_ACC - {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_STAGE_ACC - {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_COMMIT_ACC - {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_RCVD_RJT - {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_STAGE_OP - {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_COMMIT_OP - {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK - {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_START_UNLOCK - {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_ALL_ACK - {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_MAX +glusterd_op_sm_t glusterd_op_state_committed[] = { + {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, // EVENT_NONE + {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, // EVENT_START_LOCK + {GD_OP_STATE_COMMITED, glusterd_op_ac_lock}, // EVENT_LOCK + {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, // EVENT_RCVD_ACC + {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, // EVENT_ALL_ACC + {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, // EVENT_STAGE_ACC + {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, // EVENT_COMMIT_ACC + {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, // EVENT_RCVD_RJT + {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, // EVENT_STAGE_OP + {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, // EVENT_COMMIT_OP + {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, // EVENT_UNLOCK + {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, // EVENT_START_UNLOCK + {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, // EVENT_ALL_ACK + {GD_OP_STATE_DEFAULT, + glusterd_op_ac_local_unlock}, // EVENT_LOCAL_UNLOCK_NO_RESP + {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, // EVENT_MAX }; -glusterd_op_sm_t glusterd_op_state_unlock_sent [] = { - {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, //EVENT_NONE - {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none},//EVENT_START_LOCK - {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, //EVENT_LOCK - {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_rcvd_unlock_acc}, //EVENT_RCVD_ACC - {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlocked_all}, //EVENT_ALL_ACC - {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, //EVENT_STAGE_ACC - {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, //EVENT_COMMIT_ACC - {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, //EVENT_RCVD_RJT - {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, //EVENT_STAGE_OP - {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, //EVENT_COMMIT_OP - {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK - {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, //EVENT_START_UNLOCK - {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, //EVENT_ALL_ACK - {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, //EVENT_MAX +glusterd_op_sm_t glusterd_op_state_unlock_sent[] = { + {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, // EVENT_NONE + {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, // EVENT_START_LOCK + {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_lock}, // EVENT_LOCK + {GD_OP_STATE_UNLOCK_SENT, + glusterd_op_ac_rcvd_unlock_acc}, // EVENT_RCVD_ACC + {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlocked_all}, // EVENT_ALL_ACC + {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, // EVENT_STAGE_ACC + {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, // EVENT_COMMIT_ACC + {GD_OP_STATE_UNLOCK_SENT, + glusterd_op_ac_rcvd_unlock_acc}, // EVENT_RCVD_RJT + {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, // EVENT_STAGE_OP + {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, // EVENT_COMMIT_OP + {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, // EVENT_UNLOCK + {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, // EVENT_START_UNLOCK + {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, // EVENT_ALL_ACK + {GD_OP_STATE_UNLOCK_SENT, + glusterd_op_ac_none}, // EVENT_LOCAL_UNLOCK_NO_RESP + {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, // EVENT_MAX }; - -glusterd_op_sm_t *glusterd_op_state_table [] = { - glusterd_op_state_default, - glusterd_op_state_lock_sent, - glusterd_op_state_locked, - glusterd_op_state_stage_op_sent, - glusterd_op_state_staged, - glusterd_op_state_commit_op_sent, - glusterd_op_state_committed, - glusterd_op_state_unlock_sent, - glusterd_op_state_stage_op_failed, - glusterd_op_state_commit_op_failed, - glusterd_op_state_brick_op_sent, - glusterd_op_state_brick_op_failed, - glusterd_op_state_brick_committed, - glusterd_op_state_brick_commit_failed +glusterd_op_sm_t glusterd_op_state_ack_drain[] = { + {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, // EVENT_NONE + {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, // EVENT_START_LOCK + {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_lock}, // EVENT_LOCK + {GD_OP_STATE_ACK_DRAIN, + glusterd_op_ac_send_unlock_drain}, // EVENT_RCVD_ACC + {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, // EVENT_ALL_ACC + {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, // EVENT_STAGE_ACC + {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, // EVENT_COMMIT_ACC + {GD_OP_STATE_ACK_DRAIN, + glusterd_op_ac_send_unlock_drain}, // EVENT_RCVD_RJT + {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, // EVENT_STAGE_OP + {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, // EVENT_COMMIT_OP + {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, // EVENT_UNLOCK + {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, // EVENT_START_UNLOCK + {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_send_unlock}, // EVENT_ALL_ACK + {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, // EVENT_LOCAL_UNLOCK_NO_RESP + {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, // EVENT_MAX }; +glusterd_op_sm_t *glusterd_op_state_table[] = { + glusterd_op_state_default, glusterd_op_state_lock_sent, + glusterd_op_state_locked, glusterd_op_state_stage_op_sent, + glusterd_op_state_staged, glusterd_op_state_commit_op_sent, + glusterd_op_state_committed, glusterd_op_state_unlock_sent, + glusterd_op_state_stage_op_failed, glusterd_op_state_commit_op_failed, + glusterd_op_state_brick_op_sent, glusterd_op_state_brick_op_failed, + glusterd_op_state_brick_committed, glusterd_op_state_brick_commit_failed, + glusterd_op_state_ack_drain}; + int -glusterd_op_sm_new_event (glusterd_op_sm_event_type_t event_type, - glusterd_op_sm_event_t **new_event) +glusterd_op_sm_new_event(glusterd_op_sm_event_type_t event_type, + glusterd_op_sm_event_t **new_event) { - glusterd_op_sm_event_t *event = NULL; + glusterd_op_sm_event_t *event = NULL; - GF_ASSERT (new_event); - GF_ASSERT (GD_OP_EVENT_NONE <= event_type && - GD_OP_EVENT_MAX > event_type); + GF_ASSERT(new_event); + GF_ASSERT(GD_OP_EVENT_NONE <= event_type && GD_OP_EVENT_MAX > event_type); - event = GF_CALLOC (1, sizeof (*event), gf_gld_mt_op_sm_event_t); + event = GF_CALLOC(1, sizeof(*event), gf_gld_mt_op_sm_event_t); - if (!event) - return -1; + if (!event) + return -1; - *new_event = event; - event->event = event_type; - INIT_LIST_HEAD (&event->list); + *new_event = event; + event->event = event_type; + CDS_INIT_LIST_HEAD(&event->list); - return 0; + return 0; } int -glusterd_op_sm_inject_event (glusterd_op_sm_event_type_t event_type, - void *ctx) +glusterd_op_sm_inject_event(glusterd_op_sm_event_type_t event_type, + uuid_t *txn_id, void *ctx) { - int32_t ret = -1; - glusterd_op_sm_event_t *event = NULL; + int32_t ret = -1; + glusterd_op_sm_event_t *event = NULL; - GF_ASSERT (event_type < GD_OP_EVENT_MAX && - event_type >= GD_OP_EVENT_NONE); + GF_ASSERT(event_type < GD_OP_EVENT_MAX && event_type >= GD_OP_EVENT_NONE); - ret = glusterd_op_sm_new_event (event_type, &event); + ret = glusterd_op_sm_new_event(event_type, &event); - if (ret) - goto out; + if (ret) + goto out; - event->ctx = ctx; + event->ctx = ctx; - gf_log ("glusterd", GF_LOG_DEBUG, "Enqueuing event: '%s'", - glusterd_op_sm_event_name_get (event->event)); - list_add_tail (&event->list, &gd_op_sm_queue); + if (txn_id) + gf_uuid_copy(event->txn_id, *txn_id); + + gf_msg_debug(THIS->name, 0, "Enqueue event: '%s'", + glusterd_op_sm_event_name_get(event->event)); + cds_list_add_tail(&event->list, &gd_op_sm_queue); out: - return ret; + return ret; } void -glusterd_destroy_req_ctx (glusterd_req_ctx_t *ctx) +glusterd_destroy_req_ctx(glusterd_req_ctx_t *ctx) { - if (!ctx) - return; - if (ctx->dict) - dict_unref (ctx->dict); - GF_FREE (ctx); + if (!ctx) + return; + if (ctx->dict) + dict_unref(ctx->dict); + GF_FREE(ctx); } void -glusterd_destroy_op_event_ctx (glusterd_op_sm_event_t *event) +glusterd_destroy_local_unlock_ctx(uuid_t *ctx) { - if (!event) - return; + if (!ctx) + return; + GF_FREE(ctx); +} - switch (event->event) { +void +glusterd_destroy_op_event_ctx(glusterd_op_sm_event_t *event) +{ + if (!event) + return; + + switch (event->event) { case GD_OP_EVENT_LOCK: case GD_OP_EVENT_UNLOCK: - glusterd_destroy_lock_ctx (event->ctx); - break; + glusterd_destroy_lock_ctx(event->ctx); + break; case GD_OP_EVENT_STAGE_OP: case GD_OP_EVENT_ALL_ACK: - glusterd_destroy_req_ctx (event->ctx); - break; + glusterd_destroy_req_ctx(event->ctx); + break; + case GD_OP_EVENT_LOCAL_UNLOCK_NO_RESP: + glusterd_destroy_local_unlock_ctx(event->ctx); + break; default: - break; - } + break; + } } int -glusterd_op_sm () -{ - glusterd_op_sm_event_t *event = NULL; - glusterd_op_sm_event_t *tmp = NULL; - int ret = -1; - glusterd_op_sm_ac_fn handler = NULL; - glusterd_op_sm_t *state = NULL; - glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; - - (void ) pthread_mutex_lock (&gd_op_sm_lock); - - while (!list_empty (&gd_op_sm_queue)) { - - list_for_each_entry_safe (event, tmp, &gd_op_sm_queue, list) { - - list_del_init (&event->list); - event_type = event->event; - gf_log ("", GF_LOG_DEBUG, "Dequeued event of type: '%s'", - glusterd_op_sm_event_name_get(event_type)); - - state = glusterd_op_state_table[opinfo.state.state]; - - GF_ASSERT (state); - - handler = state[event_type].handler; - GF_ASSERT (handler); - - ret = handler (event, event->ctx); - - if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, - "handler returned: %d", ret); - glusterd_destroy_op_event_ctx (event); - GF_FREE (event); - continue; - } - - ret = glusterd_op_sm_transition_state (&opinfo, state, - event_type); - - if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, - "Unable to transition" - "state from '%s' to '%s'", - glusterd_op_sm_state_name_get(opinfo.state.state), - glusterd_op_sm_state_name_get(state[event_type].next_state)); - (void ) pthread_mutex_unlock (&gd_op_sm_lock); - return ret; - } +glusterd_op_sm() +{ + glusterd_op_sm_event_t *event = NULL; + glusterd_op_sm_event_t *tmp = NULL; + int ret = -1; + int lock_err = 0; + glusterd_op_sm_ac_fn handler = NULL; + glusterd_op_sm_t *state = NULL; + glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; + xlator_t *this = NULL; + glusterd_op_info_t txn_op_info; + glusterd_conf_t *priv = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + ret = synclock_trylock(&gd_op_sm_lock); + if (ret) { + lock_err = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_LOCK_FAIL, + "lock failed due to %s", strerror(lock_err)); + goto lock_failed; + } + + while (!cds_list_empty(&gd_op_sm_queue)) { + cds_list_for_each_entry_safe(event, tmp, &gd_op_sm_queue, list) + { + cds_list_del_init(&event->list); + event_type = event->event; + gf_msg_debug(this->name, 0, + "Dequeued event of " + "type: '%s'", + glusterd_op_sm_event_name_get(event_type)); + + gf_msg_debug(this->name, 0, "transaction ID = %s", + uuid_utoa(event->txn_id)); + + ret = glusterd_get_txn_opinfo(&event->txn_id, &txn_op_info); + if (ret) { + gf_msg_callingfn(this->name, GF_LOG_ERROR, 0, + GD_MSG_TRANS_OPINFO_GET_FAIL, + "Unable to get transaction " + "opinfo for transaction ID :" + "%s", + uuid_utoa(event->txn_id)); + glusterd_destroy_op_event_ctx(event); + GF_FREE(event); + continue; + } else + opinfo = txn_op_info; + + state = glusterd_op_state_table[opinfo.state.state]; + + GF_ASSERT(state); + + handler = state[event_type].handler; + GF_ASSERT(handler); + + ret = handler(event, event->ctx); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_HANDLER_RETURNED, + "handler returned: %d", ret); + glusterd_destroy_op_event_ctx(event); + GF_FREE(event); + continue; + } + + ret = glusterd_op_sm_transition_state(&opinfo, state, event_type); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_EVENT_STATE_TRANSITION_FAIL, + "Unable to transition" + "state from '%s' to '%s'", + glusterd_op_sm_state_name_get(opinfo.state.state), + glusterd_op_sm_state_name_get( + state[event_type].next_state)); + (void)synclock_unlock(&gd_op_sm_lock); + return ret; + } - glusterd_destroy_op_event_ctx (event); - GF_FREE (event); + if ((state[event_type].next_state == GD_OP_STATE_DEFAULT) && + (event_type == GD_OP_EVENT_UNLOCK)) { + /* Clearing the transaction opinfo */ + ret = glusterd_clear_txn_opinfo(&event->txn_id); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_TRANS_OPINFO_CLEAR_FAIL, + "Unable to clear " + "transaction's opinfo"); + } else { + if ((priv->op_version < GD_OP_VERSION_6_0) || + !(event_type == GD_OP_EVENT_STAGE_OP && + opinfo.state.state == GD_OP_STATE_STAGED && + opinfo.skip_locking)) { + ret = glusterd_set_txn_opinfo(&event->txn_id, &opinfo); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_TRANS_OPINFO_SET_FAIL, + "Unable to set " + "transaction's opinfo"); } - } + } - - (void ) pthread_mutex_unlock (&gd_op_sm_lock); - ret = 0; - - return ret; -} - -int32_t -glusterd_op_set_op (glusterd_op_t op) -{ - - GF_ASSERT (op < GD_OP_MAX); - GF_ASSERT (op > GD_OP_NONE); - - opinfo.op[op] = 1; - opinfo.pending_op[op] = 1; - opinfo.commit_op[op] = 1; - - return 0; - -} - -int32_t -glusterd_op_get_op () -{ - - int i = 0; - int32_t ret = 0; - - for ( i = 0; i < GD_OP_MAX; i++) { - if (opinfo.op[i]) - break; + glusterd_destroy_op_event_ctx(event); + GF_FREE(event); } + } - if ( i == GD_OP_MAX) - ret = -1; - else - ret = i; - - return ret; - -} - - -int32_t -glusterd_op_set_cli_op (glusterd_op_t op) -{ - - int32_t ret = 0; - - ret = pthread_mutex_trylock (&opinfo.lock); - - if (ret) - goto out; - - opinfo.cli_op = op; + (void)synclock_unlock(&gd_op_sm_lock); + ret = 0; -out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; -} +lock_failed: -int32_t -glusterd_op_set_req (rpcsvc_request_t *req) -{ - - GF_ASSERT (req); - opinfo.req = req; - return 0; + return ret; } int32_t -glusterd_op_clear_pending_op (glusterd_op_t op) +glusterd_op_set_op(glusterd_op_t op) { + GF_ASSERT(op < GD_OP_MAX); + GF_ASSERT(op > GD_OP_NONE); - GF_ASSERT (op < GD_OP_MAX); - GF_ASSERT (op > GD_OP_NONE); - - opinfo.pending_op[op] = 0; - - return 0; + opinfo.op = op; + return 0; } int32_t -glusterd_op_clear_commit_op (glusterd_op_t op) +glusterd_op_get_op() { - - GF_ASSERT (op < GD_OP_MAX); - GF_ASSERT (op > GD_OP_NONE); - - opinfo.commit_op[op] = 0; - - return 0; - + return opinfo.op; } int32_t -glusterd_op_clear_op (glusterd_op_t op) +glusterd_op_set_req(rpcsvc_request_t *req) { - - GF_ASSERT (op < GD_OP_MAX); - GF_ASSERT (op > GD_OP_NONE); - - opinfo.op[op] = 0; - - return 0; - + GF_ASSERT(req); + opinfo.req = req; + return 0; } int32_t -glusterd_op_init_ctx (glusterd_op_t op) +glusterd_op_clear_op(glusterd_op_t op) { - int ret = 0; - dict_t *dict = NULL; + opinfo.op = GD_OP_NONE; - if (GD_OP_PROFILE_VOLUME != op) { - gf_log ("", GF_LOG_DEBUG, "Received op: %d, returning", op); - goto out; - } - dict = dict_new (); - if (dict == NULL) { - ret = -1; - goto out; - } - ret = glusterd_op_set_ctx (op, dict); - if (ret) - goto out; - ret = glusterd_op_set_ctx_free (op, _gf_true); - if (ret) - goto out; -out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + return 0; } - - int32_t -glusterd_op_fini_ctx (glusterd_op_t op) +glusterd_op_free_ctx(glusterd_op_t op, void *ctx) { - dict_t *dict = NULL; - - if (glusterd_op_get_ctx_free (op)) { - dict = glusterd_op_get_ctx (op); - if (dict) - dict_unref (dict); - } - glusterd_op_reset_ctx (op); - return 0; -} - - - -int32_t -glusterd_op_free_ctx (glusterd_op_t op, void *ctx, gf_boolean_t ctx_free) -{ - - GF_ASSERT (op < GD_OP_MAX); - GF_ASSERT (op > GD_OP_NONE); - - if (ctx && ctx_free) { - switch (op) { - case GD_OP_CREATE_VOLUME: - case GD_OP_STOP_VOLUME: - case GD_OP_ADD_BRICK: - case GD_OP_REMOVE_BRICK: - case GD_OP_REPLACE_BRICK: - case GD_OP_LOG_FILENAME: - case GD_OP_LOG_ROTATE: - case GD_OP_SYNC_VOLUME: - case GD_OP_SET_VOLUME: - case GD_OP_START_VOLUME: - case GD_OP_RESET_VOLUME: - case GD_OP_GSYNC_SET: - case GD_OP_QUOTA: - case GD_OP_PROFILE_VOLUME: - dict_unref (ctx); - break; - case GD_OP_DELETE_VOLUME: - GF_FREE (ctx); - break; - default: - GF_ASSERT (0); - break; - } + if (ctx) { + switch (op) { + case GD_OP_CREATE_VOLUME: + case GD_OP_DELETE_VOLUME: + case GD_OP_STOP_VOLUME: + case GD_OP_ADD_BRICK: + case GD_OP_REMOVE_BRICK: + case GD_OP_REPLACE_BRICK: + case GD_OP_LOG_ROTATE: + case GD_OP_SYNC_VOLUME: + case GD_OP_SET_VOLUME: + case GD_OP_START_VOLUME: + case GD_OP_RESET_VOLUME: + case GD_OP_GSYNC_SET: + case GD_OP_QUOTA: + case GD_OP_PROFILE_VOLUME: + case GD_OP_STATUS_VOLUME: + case GD_OP_REBALANCE: + case GD_OP_HEAL_VOLUME: + case GD_OP_STATEDUMP_VOLUME: + case GD_OP_CLEARLOCKS_VOLUME: + case GD_OP_DEFRAG_BRICK_VOLUME: + case GD_OP_MAX_OPVERSION: + dict_unref(ctx); + break; + default: + GF_ASSERT(0); + break; } - return 0; + } + glusterd_op_reset_ctx(); + return 0; } void * -glusterd_op_get_ctx (glusterd_op_t op) -{ - GF_ASSERT (op < GD_OP_MAX); - GF_ASSERT (op > GD_OP_NONE); - - return opinfo.op_ctx[op]; - -} - -int32_t -glusterd_op_set_ctx_free (glusterd_op_t op, gf_boolean_t ctx_free) -{ - - GF_ASSERT (op < GD_OP_MAX); - GF_ASSERT (op > GD_OP_NONE); - - opinfo.ctx_free[op] = ctx_free; - - return 0; - -} - -int32_t -glusterd_op_clear_ctx_free (glusterd_op_t op) +glusterd_op_get_ctx() { - - GF_ASSERT (op < GD_OP_MAX); - GF_ASSERT (op > GD_OP_NONE); - - opinfo.ctx_free[op] = _gf_false; - - return 0; - -} - -gf_boolean_t -glusterd_op_get_ctx_free (glusterd_op_t op) -{ - GF_ASSERT (op < GD_OP_MAX); - GF_ASSERT (op > GD_OP_NONE); - - return opinfo.ctx_free[op]; - + return opinfo.op_ctx; } int -glusterd_op_sm_init () +glusterd_op_sm_init() { - INIT_LIST_HEAD (&gd_op_sm_queue); - pthread_mutex_init (&gd_op_sm_lock, NULL); - return 0; + CDS_INIT_LIST_HEAD(&gd_op_sm_queue); + synclock_init(&gd_op_sm_lock, SYNC_LOCK_DEFAULT); + return 0; } - -int32_t -glusterd_opinfo_unlock(){ - return (pthread_mutex_unlock(&opinfo.lock)); -} -int32_t -glusterd_volume_stats_write_perf (char *brick_path, int32_t blk_size, - int32_t blk_count, double *throughput, double *time) -{ - int32_t fd = -1; - int32_t input_fd = -1; - char export_path[1024]; - char *buf = NULL; - int32_t iter = 0; - int32_t ret = -1; - int64_t total_blks = 0; - struct timeval begin, end = {0, }; - - - GF_VALIDATE_OR_GOTO ("stripe", brick_path, out); - - snprintf (export_path, sizeof(export_path), "%s/%s", - brick_path, ".gf_tmp_stats_perf"); - fd = open (export_path, O_CREAT|O_RDWR, S_IRWXU); - if (fd == -1) - return errno; - buf = GF_MALLOC (blk_size * sizeof(*buf), gf_common_mt_char); - - if (!buf) - return ret; - - input_fd = open("/dev/zero", O_RDONLY); - if (input_fd == -1) - return errno; - gettimeofday (&begin, NULL); - for (iter = 0; iter < blk_count; iter++) { - ret = read (input_fd, buf, blk_size); - if (ret != blk_size) { - ret = -1; - goto out; - } - ret = write (fd, buf, blk_size); - if (ret != blk_size) { - ret = -1; - goto out; - } - total_blks += ret; - } - ret = 0; - if (total_blks != (blk_size * blk_count)) { - gf_log ("glusterd", GF_LOG_WARNING, "Errors in write"); - ret = -1; - goto out; - } - - gettimeofday (&end, NULL); - *time = (end.tv_sec - begin.tv_sec) * 1e6 - + (end.tv_usec - begin.tv_usec); - - *throughput = total_blks / *time; - gf_log ("glusterd", GF_LOG_INFO, "Throughput %.2f MBps time %.2f secs bytes " - "written %"PRId64, *throughput, *time / 1e6, total_blks); -out: - if (fd >= 0) - close (fd); - if (input_fd >= 0) - close (input_fd); - if (buf) - GF_FREE (buf); - unlink (export_path); - return ret; -} - -int32_t -glusterd_volume_stats_read_perf (char *brick_path, int32_t blk_size, - int32_t blk_count, double *throughput, double *time) -{ - int32_t fd = -1; - int32_t output_fd = -1; - int32_t input_fd = -1; - char export_path[1024]; - char *buf = NULL; - int32_t iter = 0; - int32_t ret = -1; - int64_t total_blks = 0; - struct timeval begin, end = {0, }; - - - GF_VALIDATE_OR_GOTO ("glusterd", brick_path, out); - - snprintf (export_path, sizeof(export_path), "%s/%s", - brick_path, ".gf_tmp_stats_perf"); - fd = open (export_path, O_CREAT|O_RDWR, S_IRWXU); - if (fd == -1) - return errno; - buf = GF_MALLOC (blk_size * sizeof(*buf), gf_common_mt_char); - - if (!buf) - return ret; - - output_fd = open("/dev/null", O_RDWR); - if (output_fd == -1) - return errno; - input_fd = open("/dev/zero", O_RDONLY); - if (input_fd == -1) - return errno; - for (iter = 0; iter < blk_count; iter++) { - ret = read (input_fd, buf, blk_size); - if (ret != blk_size) { - ret = -1; - goto out; - } - ret = write (fd, buf, blk_size); - if (ret != blk_size) { - ret = -1; - goto out; - } - } - - - lseek (fd, 0L, 0); - gettimeofday (&begin, NULL); - for (iter = 0; iter < blk_count; iter++) { - ret = read (fd, buf, blk_size); - if (ret != blk_size) { - ret = -1; - goto out; - } - ret = write (output_fd, buf, blk_size); - if (ret != blk_size) { - ret = -1; - goto out; - } - total_blks += ret; - } - ret = 0; - if (total_blks != (blk_size * blk_count)) { - gf_log ("glusterd", GF_LOG_WARNING, "Errors in write"); - ret = -1; - goto out; - } - - gettimeofday (&end, NULL); - *time = (end.tv_sec - begin.tv_sec) * 1e6 - + (end.tv_usec - begin.tv_usec); - - *throughput = total_blks / *time; - gf_log ("glusterd", GF_LOG_INFO, "Throughput %.2f MBps time %.2f secs bytes " - "read %"PRId64, *throughput, *time / 1e6, total_blks); -out: - if (fd >= 0) - close (fd); - if (input_fd >= 0) - close (input_fd); - if (output_fd >= 0) - close (output_fd); - if (buf) - GF_FREE (buf); - unlink (export_path); - return ret; -} - diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.h b/xlators/mgmt/glusterd/src/glusterd-op-sm.h index 5e1106bc1e2..8a24b16612a 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.h +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.h @@ -1,289 +1,313 @@ /* - Copyright (c) 2006-2010 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ #ifndef _GLUSTERD_OP_SM_H_ #define _GLUSTERD_OP_SM_H_ -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - -#ifndef GSYNC_CONF -#define GSYNC_CONF "gsync/gsyncd.conf" -#endif - #include <pthread.h> -#include "uuid.h" - -#include "glusterfs.h" -#include "xlator.h" -#include "logging.h" -#include "call-stub.h" -#include "fd.h" -#include "byte-order.h" +#include <glusterfs/compat-uuid.h> + +#include <glusterfs/glusterfs.h> +#include <glusterfs/xlator.h> +#include <glusterfs/logging.h> +#include <glusterfs/call-stub.h> +#include <glusterfs/byte-order.h> #include "glusterd.h" #include "protocol-common.h" +#include "glusterd-hooks.h" -#define GD_VOLUME_NAME_MAX 256 +#define GD_OP_PROTECTED (0x02) +#define GD_OP_UNPROTECTED (0x04) typedef enum glusterd_op_sm_state_ { - GD_OP_STATE_DEFAULT = 0, - GD_OP_STATE_LOCK_SENT, - GD_OP_STATE_LOCKED, - GD_OP_STATE_STAGE_OP_SENT, - GD_OP_STATE_STAGED, - GD_OP_STATE_COMMIT_OP_SENT, - GD_OP_STATE_COMMITED, - GD_OP_STATE_UNLOCK_SENT, - GD_OP_STATE_STAGE_OP_FAILED, - GD_OP_STATE_COMMIT_OP_FAILED, - GD_OP_STATE_BRICK_OP_SENT, - GD_OP_STATE_BRICK_OP_FAILED, - GD_OP_STATE_BRICK_COMMITTED, - GD_OP_STATE_BRICK_COMMIT_FAILED, - GD_OP_STATE_MAX, + GD_OP_STATE_DEFAULT = 0, + GD_OP_STATE_LOCK_SENT, + GD_OP_STATE_LOCKED, + GD_OP_STATE_STAGE_OP_SENT, + GD_OP_STATE_STAGED, + GD_OP_STATE_COMMIT_OP_SENT, + GD_OP_STATE_COMMITED, + GD_OP_STATE_UNLOCK_SENT, + GD_OP_STATE_STAGE_OP_FAILED, + GD_OP_STATE_COMMIT_OP_FAILED, + GD_OP_STATE_BRICK_OP_SENT, + GD_OP_STATE_BRICK_OP_FAILED, + GD_OP_STATE_BRICK_COMMITTED, + GD_OP_STATE_BRICK_COMMIT_FAILED, + GD_OP_STATE_ACK_DRAIN, + GD_OP_STATE_MAX, } glusterd_op_sm_state_t; typedef enum glusterd_op_sm_event_type_ { - GD_OP_EVENT_NONE = 0, - GD_OP_EVENT_START_LOCK, - GD_OP_EVENT_LOCK, - GD_OP_EVENT_RCVD_ACC, - GD_OP_EVENT_ALL_ACC, - GD_OP_EVENT_STAGE_ACC, - GD_OP_EVENT_COMMIT_ACC, - GD_OP_EVENT_RCVD_RJT, - GD_OP_EVENT_STAGE_OP, - GD_OP_EVENT_COMMIT_OP, - GD_OP_EVENT_UNLOCK, - GD_OP_EVENT_START_UNLOCK, - GD_OP_EVENT_ALL_ACK, - GD_OP_EVENT_MAX + GD_OP_EVENT_NONE = 0, + GD_OP_EVENT_START_LOCK, + GD_OP_EVENT_LOCK, + GD_OP_EVENT_RCVD_ACC, + GD_OP_EVENT_ALL_ACC, + GD_OP_EVENT_STAGE_ACC, + GD_OP_EVENT_COMMIT_ACC, + GD_OP_EVENT_RCVD_RJT, + GD_OP_EVENT_STAGE_OP, + GD_OP_EVENT_COMMIT_OP, + GD_OP_EVENT_UNLOCK, + GD_OP_EVENT_START_UNLOCK, + GD_OP_EVENT_ALL_ACK, + GD_OP_EVENT_LOCAL_UNLOCK_NO_RESP, + GD_OP_EVENT_MAX } glusterd_op_sm_event_type_t; - struct glusterd_op_sm_event_ { - struct list_head list; - void *ctx; - glusterd_op_sm_event_type_t event; + struct cds_list_head list; + void *ctx; + glusterd_op_sm_event_type_t event; + uuid_t txn_id; }; typedef struct glusterd_op_sm_event_ glusterd_op_sm_event_t; -typedef int (*glusterd_op_sm_ac_fn) (glusterd_op_sm_event_t *, void *); +typedef int (*glusterd_op_sm_ac_fn)(glusterd_op_sm_event_t *, void *); typedef struct glusterd_op_sm_ { - glusterd_op_sm_state_t next_state; - glusterd_op_sm_ac_fn handler; + glusterd_op_sm_state_t next_state; + glusterd_op_sm_ac_fn handler; } glusterd_op_sm_t; typedef struct glusterd_op_sm_state_info_ { - glusterd_op_sm_state_t state; - struct timeval time; + glusterd_op_sm_state_t state; + struct timeval time; } glusterd_op_sm_state_info_t; struct glusterd_op_info_ { - glusterd_op_sm_state_info_t state; - int32_t pending_count; - int32_t brick_pending_count; - int32_t op_count; - glusterd_op_t op[GD_OP_MAX]; - glusterd_op_t pending_op[GD_OP_MAX]; - glusterd_op_t commit_op[GD_OP_MAX]; - struct list_head op_peers; - void *op_ctx[GD_OP_MAX]; - rpcsvc_request_t *req; - int32_t op_ret; - int32_t op_errno; - pthread_mutex_t lock; - int32_t cli_op; - gf_boolean_t ctx_free[GD_OP_MAX]; - char *op_errstr; - struct list_head pending_bricks; + glusterd_op_sm_state_info_t state; + int32_t pending_count; + int32_t brick_pending_count; + int32_t op_count; + /* op is an enum, glusterd_op_t or glusterd_op_sm_state_info_t */ + int op; + struct cds_list_head op_peers; + void *op_ctx; + rpcsvc_request_t *req; + int32_t op_ret; + int32_t op_errno; + char *op_errstr; + struct cds_list_head pending_bricks; + uint32_t txn_generation; + gf_boolean_t skip_locking; }; typedef struct glusterd_op_info_ glusterd_op_info_t; -struct glusterd_op_delete_volume_ctx_ { - char volume_name[GD_VOLUME_NAME_MAX]; -}; - -typedef struct glusterd_op_delete_volume_ctx_ glusterd_op_delete_volume_ctx_t; - struct glusterd_op_log_filename_ctx_ { - char volume_name[GD_VOLUME_NAME_MAX]; - char brick[GD_VOLUME_NAME_MAX]; - char path[PATH_MAX]; + char volume_name[GD_VOLUME_NAME_MAX]; + char brick[GD_VOLUME_NAME_MAX]; + char path[PATH_MAX]; }; typedef struct glusterd_op_log_filename_ctx_ glusterd_op_log_filename_ctx_t; struct glusterd_op_lock_ctx_ { - uuid_t uuid; - rpcsvc_request_t *req; + uuid_t uuid; + dict_t *dict; + rpcsvc_request_t *req; }; typedef struct glusterd_op_lock_ctx_ glusterd_op_lock_ctx_t; struct glusterd_req_ctx_ { - rpcsvc_request_t *req; - u_char uuid[16]; - int op; - dict_t *dict; + rpcsvc_request_t *req; + u_char uuid[16]; + int op; + dict_t *dict; }; typedef struct glusterd_req_ctx_ glusterd_req_ctx_t; typedef struct glusterd_op_brick_rsp_ctx_ { - int op_ret; - char *op_errstr; - dict_t *rsp_dict; - glusterd_req_ctx_t *commit_ctx; - glusterd_brickinfo_t *brickinfo; + int op_ret; + char *op_errstr; + dict_t *rsp_dict; + glusterd_req_ctx_t *commit_ctx; + glusterd_pending_node_t *pending_node; } glusterd_op_brick_rsp_ctx_t; typedef struct glusterd_pr_brick_rsp_conv_t { - int count; - dict_t *dict; + int count; + dict_t *dict; } glusterd_pr_brick_rsp_conv_t; +typedef struct glusterd_heal_rsp_conv_ { + dict_t *dict; + glusterd_volinfo_t *volinfo; + xlator_t *this; +} glusterd_heal_rsp_conv_t; + +typedef struct glusterd_status_rsp_conv_ { + int count; + int brick_index_max; + int other_count; + dict_t *dict; +} glusterd_status_rsp_conv_t; + +typedef struct glusterd_txn_opinfo_object_ { + glusterd_op_info_t opinfo; +} glusterd_txn_opinfo_obj; + +typedef enum cli_cmd_type_ { + PER_HEAL_XL, + ALL_HEAL_XL, +} cli_cmd_type; + +typedef struct glusterd_all_volume_options { + char *option; + char *dflt_val; +} glusterd_all_vol_opts; + int -glusterd_op_sm_new_event (glusterd_op_sm_event_type_t event_type, - glusterd_op_sm_event_t **new_event); +glusterd_op_commit_hook(glusterd_op_t op, dict_t *op_ctx, + glusterd_commit_hook_type_t type); + +int +glusterd_op_sm_new_event(glusterd_op_sm_event_type_t event_type, + glusterd_op_sm_event_t **new_event); int -glusterd_op_sm_inject_event (glusterd_op_sm_event_type_t event_type, - void *ctx); +glusterd_op_sm_inject_event(glusterd_op_sm_event_type_t event_type, + uuid_t *txn_id, void *ctx); int -glusterd_op_sm_init (); +glusterd_op_sm_init(); int -glusterd_op_sm (); +glusterd_op_sm(); int32_t -glusterd_op_set_ctx (glusterd_op_t op, void *ctx); +glusterd_op_set_ctx(void *ctx); int32_t -glusterd_op_set_op (glusterd_op_t op); +glusterd_op_set_op(glusterd_op_t op); -int32_t -glusterd_op_clear_pending_op (glusterd_op_t op); +int +glusterd_op_build_payload(dict_t **req, char **op_errstr, dict_t *op_ctx); int32_t -glusterd_op_clear_commit_op (glusterd_op_t op); +glusterd_op_stage_validate(glusterd_op_t op, dict_t *req, char **op_errstr, + dict_t *rsp_dict); -int -glusterd_op_build_payload (glusterd_op_t op, dict_t **req); +int32_t +glusterd_op_commit_perform(glusterd_op_t op, dict_t *req, char **op_errstr, + dict_t *dict); int32_t -glusterd_op_stage_validate (glusterd_op_t op, dict_t *req, char **op_errstr, - dict_t *rsp_dict); +glusterd_op_txn_begin(rpcsvc_request_t *req, glusterd_op_t op, void *ctx, + char *err_str, size_t err_len); int32_t -glusterd_op_commit_perform (glusterd_op_t op, dict_t *req, char **op_errstr, - dict_t* dict); +glusterd_op_txn_complete(); void * -glusterd_op_get_ctx (glusterd_op_t op); - -int32_t -glusterd_op_set_req (rpcsvc_request_t *req); +glusterd_op_get_ctx(); int32_t -glusterd_op_set_cli_op (glusterd_op_t op); +glusterd_op_set_req(rpcsvc_request_t *req); int32_t -glusterd_op_send_cli_response (glusterd_op_t op, int32_t op_ret, - int32_t op_errno, rpcsvc_request_t *req, - void *ctx, char *op_errstr); +glusterd_op_send_cli_response(glusterd_op_t op, int32_t op_ret, + int32_t op_errno, rpcsvc_request_t *req, + void *ctx, char *op_errstr); int32_t -glusterd_op_get_op (); +glusterd_op_get_op(); int32_t -glusterd_op_clear_pending_op (glusterd_op_t op); +glusterd_op_clear_op(); int32_t -glusterd_op_clear_commit_op (glusterd_op_t op); +glusterd_op_free_ctx(glusterd_op_t op, void *ctx); -int32_t -glusterd_op_clear_op (glusterd_op_t op); +int +glusterd_check_option_exists(char *optstring, char **completion); -int32_t -glusterd_op_free_ctx (glusterd_op_t op, void *ctx, gf_boolean_t ctx_free); +int +set_xlator_option(dict_t *dict, char *key, char *value); -int32_t -glusterd_opinfo_unlock(); +char * +glusterd_op_sm_state_name_get(int state); +char * +glusterd_op_sm_event_name_get(int event); int32_t -glusterd_op_set_ctx_free (glusterd_op_t op, gf_boolean_t ctx_free); - +glusterd_op_bricks_select(glusterd_op_t op, dict_t *dict, char **op_errstr, + struct cds_list_head *selected, dict_t *rsp_dict); +int +glusterd_brick_op_build_payload(glusterd_op_t op, + glusterd_brickinfo_t *brickinfo, + gd1_mgmt_brick_op_req **req, dict_t *dict); +int +glusterd_node_op_build_payload(glusterd_op_t op, gd1_mgmt_brick_op_req **req, + dict_t *dict); int32_t -glusterd_op_clear_ctx_free (glusterd_op_t op); +glusterd_handle_brick_rsp(void *pending_entry, glusterd_op_t op, + dict_t *rsp_dict, dict_t *ctx_dict, char **op_errstr, + gd_node_type type); -gf_boolean_t -glusterd_op_get_ctx_free (glusterd_op_t op); +dict_t * +glusterd_op_init_commit_rsp_dict(glusterd_op_t op); -int -glusterd_check_option_exists(char *optstring, char **completion); +void +glusterd_op_modify_op_ctx(glusterd_op_t op, void *op_ctx); int -set_xlator_option (dict_t *dict, char *key, char *value); +glusterd_set_detach_bricks(dict_t *dict, glusterd_volinfo_t *volinfo); -char * -glusterd_check_brick_rb_part (char *bricks, int count, glusterd_volinfo_t *volinfo); +int32_t +glusterd_volume_stats_read_perf(char *brick_path, int32_t blk_size, + int32_t blk_count, double *throughput, + double *time); +int32_t +glusterd_volume_stats_write_perf(char *brick_path, int32_t blk_size, + int32_t blk_count, double *throughput, + double *time); +gf_boolean_t +glusterd_is_volume_started(glusterd_volinfo_t *volinfo); -void -glusterd_do_replace_brick (void *data); int -glusterd_options_reset (glusterd_volinfo_t *volinfo); +glusterd_start_bricks(glusterd_volinfo_t *volinfo); -char* -glusterd_op_sm_state_name_get (int state); - -char* -glusterd_op_sm_event_name_get (int event); -int32_t -glusterd_op_bricks_select (glusterd_op_t op, dict_t *dict, char **op_errstr); +gf_boolean_t +glusterd_are_all_volumes_stopped(); int -glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickinfo, - gd1_mgmt_brick_op_req **req, dict_t *dict); -int32_t -glusterd_handle_brick_rsp (glusterd_brickinfo_t *brickinfo, - glusterd_op_t op, dict_t *rsp_dict, dict_t *ctx_dict, - char **op_errstr); -void glusterd_op_brick_disconnect (void *data); +glusterd_stop_bricks(glusterd_volinfo_t *volinfo); +int +glusterd_defrag_volume_node_rsp(dict_t *req_dict, dict_t *rsp_dict, + dict_t *op_ctx); + int32_t -glusterd_op_init_ctx (glusterd_op_t op); +glusterd_get_txn_opinfo(uuid_t *txn_id, glusterd_op_info_t *opinfo); + int32_t -glusterd_op_fini_ctx (glusterd_op_t op); +glusterd_set_txn_opinfo(uuid_t *txn_id, glusterd_op_info_t *opinfo); + int32_t -glusterd_volume_stats_read_perf (char *brick_path, int32_t blk_size, - int32_t blk_count, double *throughput, double *time); +glusterd_clear_txn_opinfo(uuid_t *txn_id); + int32_t -glusterd_volume_stats_write_perf (char *brick_path, int32_t blk_size, - int32_t blk_count, double *throughput, double *time); -gf_boolean_t -glusterd_is_volume_started (glusterd_volinfo_t *volinfo); +glusterd_generate_txn_id(dict_t *dict, uuid_t **txn_id); + +void +glusterd_set_opinfo(char *errstr, int32_t op_errno, int32_t op_ret); + int -glusterd_start_bricks (glusterd_volinfo_t *volinfo); -gf_boolean_t -glusterd_are_all_volumes_stopped (); +glusterd_dict_set_volid(dict_t *dict, char *volname, char **op_errstr); + +int +glusterd_op_stats_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict); + +int +glusterd_op_stage_stats_volume(dict_t *dict, char **op_errstr); + int -glusterd_stop_bricks (glusterd_volinfo_t *volinfo); +gd_set_commit_hash(dict_t *dict); #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-peer-utils.c b/xlators/mgmt/glusterd/src/glusterd-peer-utils.c new file mode 100644 index 00000000000..18d355cb186 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-peer-utils.c @@ -0,0 +1,1058 @@ +/* + Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include "glusterd-peer-utils.h" +#include "glusterd-store.h" +#include "glusterd-server-quorum.h" +#include "glusterd-messages.h" +#include <glusterfs/common-utils.h> +#include "glusterd-utils.h" + +void +glusterd_peerinfo_destroy(struct rcu_head *head) +{ + int32_t ret = -1; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_peer_hostname_t *hostname = NULL; + glusterd_peer_hostname_t *tmp = NULL; + + /* This works as rcu_head is the first member of gd_rcu_head */ + peerinfo = caa_container_of((gd_rcu_head *)head, glusterd_peerinfo_t, + rcu_head); + + /* Set THIS to the saved this. Needed by some functions below */ + THIS = peerinfo->rcu_head.this; + + CDS_INIT_LIST_HEAD(&peerinfo->uuid_list); + + ret = glusterd_store_delete_peerinfo(peerinfo); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_PEERINFO_DELETE_FAIL, + "Deleting peer info failed"); + } + + GF_FREE(peerinfo->hostname); + peerinfo->hostname = NULL; + + cds_list_for_each_entry_safe(hostname, tmp, &peerinfo->hostnames, + hostname_list) + { + glusterd_peer_hostname_free(hostname); + } + + glusterd_sm_tr_log_delete(&peerinfo->sm_log); + pthread_mutex_unlock(&peerinfo->delete_lock); + pthread_mutex_destroy(&peerinfo->delete_lock); + GF_FREE(peerinfo); + + peerinfo = NULL; + + return; +} + +int32_t +glusterd_peerinfo_cleanup(glusterd_peerinfo_t *peerinfo) +{ + GF_ASSERT(peerinfo); + gf_boolean_t quorum_action = _gf_false; + glusterd_conf_t *priv = THIS->private; + + if (pthread_mutex_trylock(&peerinfo->delete_lock)) { + /* Someone else is already deleting the peer, so give up */ + return 0; + } + + if (peerinfo->quorum_contrib != QUORUM_NONE) + quorum_action = _gf_true; + if (peerinfo->rpc) { + peerinfo->rpc = glusterd_rpc_clnt_unref(priv, peerinfo->rpc); + peerinfo->rpc = NULL; + } + + cds_list_del_rcu(&peerinfo->uuid_list); + /* Saving THIS, as it is needed by the callback function */ + peerinfo->rcu_head.this = THIS; + call_rcu(&peerinfo->rcu_head.head, glusterd_peerinfo_destroy); + + if (quorum_action) + /* coverity[SLEEP] */ + glusterd_do_quorum_action(); + return 0; +} + +/* gd_peerinfo_find_from_hostname iterates over all the addresses saved for each + * peer and matches it to @hoststr. + * Returns the matched peer if found else returns NULL + */ +static glusterd_peerinfo_t * +gd_peerinfo_find_from_hostname(const char *hoststr) +{ + xlator_t *this = THIS; + glusterd_conf_t *priv = NULL; + glusterd_peerinfo_t *peer = NULL; + glusterd_peerinfo_t *found = NULL; + glusterd_peer_hostname_t *tmphost = NULL; + + GF_ASSERT(this != NULL); + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, (priv != NULL), out); + + GF_VALIDATE_OR_GOTO(this->name, (hoststr != NULL), out); + + RCU_READ_LOCK; + cds_list_for_each_entry_rcu(peer, &priv->peers, uuid_list) + { + cds_list_for_each_entry_rcu(tmphost, &peer->hostnames, hostname_list) + { + if (!strncasecmp(tmphost->hostname, hoststr, 1024)) { + gf_msg_debug(this->name, 0, "Friend %s found.. state: %d", + tmphost->hostname, peer->state.state); + found = peer; /* Probably needs to be + dereferenced*/ + goto unlock; + } + } + } +unlock: + RCU_READ_UNLOCK; +out: + return found; +} + +/* gd_peerinfo_find_from_addrinfo iterates over all the addresses saved for each + * peer, resolves them and compares them to @addr. + * + * + * NOTE: As getaddrinfo is a blocking call and is being performed multiple times + * in this function, it could lead to the calling thread to be blocked for + * significant amounts of time. + * + * Returns the matched peer if found else returns NULL + */ +static glusterd_peerinfo_t * +gd_peerinfo_find_from_addrinfo(const struct addrinfo *addr) +{ + xlator_t *this = THIS; + glusterd_conf_t *conf = NULL; + glusterd_peerinfo_t *peer = NULL; + glusterd_peerinfo_t *found = NULL; + glusterd_peer_hostname_t *address = NULL; + int ret = 0; + struct addrinfo *paddr = NULL; + struct addrinfo *tmp = NULL; + + GF_ASSERT(this != NULL); + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, (conf != NULL), out); + + RCU_READ_LOCK; + cds_list_for_each_entry_rcu(peer, &conf->peers, uuid_list) + { + cds_list_for_each_entry_rcu(address, &peer->hostnames, hostname_list) + { + /* TODO: Cache the resolved addrinfos to improve + * performance + */ + ret = getaddrinfo(address->hostname, NULL, NULL, &paddr); + if (ret) { + /* Don't fail if getaddrinfo fails, continue + * onto the next address + */ + gf_msg_trace(this->name, 0, "getaddrinfo for %s failed (%s)", + address->hostname, gai_strerror(ret)); + continue; + } + + for (tmp = paddr; tmp != NULL; tmp = tmp->ai_next) { + if (gf_compare_sockaddr(addr->ai_addr, tmp->ai_addr)) { + found = peer; /* (de)referenced? */ + break; + } + } + + freeaddrinfo(paddr); + if (found) + goto unlock; + } + } +unlock: + RCU_READ_UNLOCK; +out: + return found; +} + +/* glusterd_peerinfo_find_by_hostname searches for a peer which matches the + * hostname @hoststr and if found returns the pointer to peerinfo object. + * Returns NULL otherwise. + * + * It first attempts a quick search by string matching @hoststr. If that fails, + * it'll attempt a more thorough match by resolving the addresses and matching + * the resolved addrinfos. + */ +glusterd_peerinfo_t * +glusterd_peerinfo_find_by_hostname(const char *hoststr) +{ + int ret = -1; + struct addrinfo *addr = NULL; + struct addrinfo *p = NULL; + xlator_t *this = THIS; + glusterd_peerinfo_t *peerinfo = NULL; + + GF_ASSERT(hoststr); + + peerinfo = gd_peerinfo_find_from_hostname(hoststr); + if (peerinfo) + return peerinfo; + + ret = getaddrinfo(hoststr, NULL, NULL, &addr); + if (ret != 0) { + gf_msg(this->name, GF_LOG_ERROR, ret, GD_MSG_GETADDRINFO_FAIL, + "error in getaddrinfo: %s\n", gai_strerror(ret)); + goto out; + } + + for (p = addr; p != NULL; p = p->ai_next) { + peerinfo = gd_peerinfo_find_from_addrinfo(p); + if (peerinfo) { + freeaddrinfo(addr); + return peerinfo; + } + } + +out: + gf_msg_debug(this->name, 0, "Unable to find friend: %s", hoststr); + if (addr) + freeaddrinfo(addr); + return NULL; +} + +int +glusterd_hostname_to_uuid(char *hostname, uuid_t uuid) +{ + GF_ASSERT(hostname); + GF_ASSERT(uuid); + + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_conf_t *priv = NULL; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + peerinfo = glusterd_peerinfo_find_by_hostname(hostname); + if (peerinfo) { + ret = 0; + gf_uuid_copy(uuid, peerinfo->uuid); + } else { + if (gf_is_local_addr(hostname)) { + gf_uuid_copy(uuid, MY_UUID); + ret = 0; + } else { + ret = -1; + } + } + + gf_msg_debug(this->name, 0, "returning %d", ret); + return ret; +} + +/* glusterd_peerinfo_find_by_uuid searches for a peer which matches the + * uuid @uuid and if found returns the pointer to peerinfo object. + * Returns NULL otherwise. + */ +glusterd_peerinfo_t * +glusterd_peerinfo_find_by_uuid(uuid_t uuid) +{ + glusterd_conf_t *priv = NULL; + glusterd_peerinfo_t *entry = NULL; + glusterd_peerinfo_t *found = NULL; + xlator_t *this = THIS; + glusterd_friend_sm_state_t state; + + GF_ASSERT(this); + + if (gf_uuid_is_null(uuid)) + return NULL; + + priv = this->private; + + GF_ASSERT(priv); + + RCU_READ_LOCK; + cds_list_for_each_entry_rcu(entry, &priv->peers, uuid_list) + { + if (!gf_uuid_compare(entry->uuid, uuid)) { + found = entry; /* Probably should be rcu_dereferenced */ + state = found->state.state; + break; + } + } + RCU_READ_UNLOCK; + + if (found) + gf_msg_debug(this->name, 0, "Friend found... state: %s", + glusterd_friend_sm_state_name_get(state)); + else + gf_msg_debug(this->name, 0, "Friend with uuid: %s, not found", + uuid_utoa(uuid)); + return found; +} + +/* glusterd_peerinfo_find will search for a peer matching either @uuid or + * @hostname and return a pointer to the peerinfo object + * Returns NULL otherwise. + */ +glusterd_peerinfo_t * +glusterd_peerinfo_find(uuid_t uuid, const char *hostname) +{ + glusterd_peerinfo_t *peerinfo = NULL; + xlator_t *this = THIS; + + GF_ASSERT(this); + + if (uuid) { + peerinfo = glusterd_peerinfo_find_by_uuid(uuid); + + if (peerinfo) { + return peerinfo; + } else { + gf_msg_debug(this->name, 0, "Unable to find peer by uuid: %s", + uuid_utoa(uuid)); + } + } + + if (hostname) { + peerinfo = glusterd_peerinfo_find_by_hostname(hostname); + + if (peerinfo) { + return peerinfo; + } else { + gf_msg_debug(this->name, 0, "Unable to find hostname: %s", + hostname); + } + } + return NULL; +} + +/* glusterd_peerinfo_new will create a new peerinfo object and set it's members + * values using the passed parameters. + * @hostname is added as the first entry in peerinfo->hostnames list and also + * set to peerinfo->hostname. + * It returns a pointer to peerinfo object if successful and returns NULL + * otherwise. The caller should take care of freeing the created peerinfo + * object. + */ +glusterd_peerinfo_t * +glusterd_peerinfo_new(glusterd_friend_sm_state_t state, uuid_t *uuid, + const char *hostname, int port) +{ + glusterd_peerinfo_t *new_peer = NULL; + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + + this = THIS; + GF_ASSERT(this); + conf = this->private; + GF_ASSERT(conf); + + new_peer = GF_CALLOC(1, sizeof(*new_peer), gf_gld_mt_peerinfo_t); + if (!new_peer) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); + goto out; + } + + CDS_INIT_LIST_HEAD(&new_peer->uuid_list); + + new_peer->state.state = state; + + CDS_INIT_LIST_HEAD(&new_peer->hostnames); + if (hostname) { + ret = gd_add_address_to_peer(new_peer, hostname); + if (ret) + goto out; + /* Also set it to peerinfo->hostname. Doing this as we use + * peerinfo->hostname in a lot of places and is really hard to + * get everything right + */ + new_peer->hostname = gf_strdup(hostname); + } + + if (uuid) { + gf_uuid_copy(new_peer->uuid, *uuid); + } + + ret = glusterd_sm_tr_log_init( + &new_peer->sm_log, glusterd_friend_sm_state_name_get, + glusterd_friend_sm_event_name_get, GLUSTERD_TR_LOG_SIZE); + if (ret) + goto out; + + if (new_peer->state.state == GD_FRIEND_STATE_BEFRIENDED) + new_peer->quorum_contrib = QUORUM_WAITING; + new_peer->port = port; + + pthread_mutex_init(&new_peer->delete_lock, NULL); + + new_peer->generation = uatomic_add_return(&conf->generation, 1); +out: + if (ret && new_peer) { + glusterd_peerinfo_cleanup(new_peer); + new_peer = NULL; + } + return new_peer; +} + +/* Check if the all peers are connected and befriended, except the peer + * specified (the peer being detached) + */ +gf_boolean_t +glusterd_chk_peers_connected_befriended(uuid_t skip_uuid) +{ + gf_boolean_t ret = _gf_true; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_conf_t *priv = NULL; + + priv = THIS->private; + GF_ASSERT(priv); + + RCU_READ_LOCK; + cds_list_for_each_entry_rcu(peerinfo, &priv->peers, uuid_list) + { + if (!gf_uuid_is_null(skip_uuid) && + !gf_uuid_compare(skip_uuid, peerinfo->uuid)) + continue; + + if ((GD_FRIEND_STATE_BEFRIENDED != peerinfo->state.state) || + !(peerinfo->connected)) { + ret = _gf_false; + break; + } + } + RCU_READ_UNLOCK; + + gf_msg_debug(THIS->name, 0, "Returning %s", (ret ? "TRUE" : "FALSE")); + return ret; +} + +/* Return hostname for given uuid if it exists + * else return NULL + */ +char * +glusterd_uuid_to_hostname(uuid_t uuid) +{ + char *hostname = NULL; + glusterd_conf_t *priv = NULL; + glusterd_peerinfo_t *entry = NULL; + + priv = THIS->private; + GF_ASSERT(priv); + + if (!gf_uuid_compare(MY_UUID, uuid)) { + hostname = gf_strdup("localhost"); + return hostname; + } + RCU_READ_LOCK; + if (!cds_list_empty(&priv->peers)) { + cds_list_for_each_entry_rcu(entry, &priv->peers, uuid_list) + { + if (!gf_uuid_compare(entry->uuid, uuid)) { + hostname = gf_strdup(entry->hostname); + break; + } + } + } + RCU_READ_UNLOCK; + + return hostname; +} + +char * +gd_peer_uuid_str(glusterd_peerinfo_t *peerinfo) +{ + if ((peerinfo == NULL) || gf_uuid_is_null(peerinfo->uuid)) + return NULL; + + if (peerinfo->uuid_str[0] == '\0') + uuid_utoa_r(peerinfo->uuid, peerinfo->uuid_str); + + return peerinfo->uuid_str; +} + +gf_boolean_t +glusterd_are_all_peers_up() +{ + glusterd_peerinfo_t *peerinfo = NULL; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + gf_boolean_t peers_up = _gf_false; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, out); + + RCU_READ_LOCK; + cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list) + { + if (!peerinfo->connected) { + RCU_READ_UNLOCK; + goto out; + } + } + RCU_READ_UNLOCK; + + peers_up = _gf_true; + +out: + return peers_up; +} + +gf_boolean_t +glusterd_are_vol_all_peers_up(glusterd_volinfo_t *volinfo, + struct cds_list_head *peers, char **down_peerstr) +{ + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + gf_boolean_t ret = _gf_false; + + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + if (!gf_uuid_compare(brickinfo->uuid, MY_UUID)) + continue; + + RCU_READ_LOCK; + cds_list_for_each_entry_rcu(peerinfo, peers, uuid_list) + { + if (gf_uuid_compare(peerinfo->uuid, brickinfo->uuid)) + continue; + + /*Found peer who owns the brick, return false + * if peer is not connected or not friend */ + if (!(peerinfo->connected) || + (peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED)) { + *down_peerstr = gf_strdup(peerinfo->hostname); + RCU_READ_UNLOCK; + gf_msg_debug(THIS->name, 0, "Peer %s is down. ", *down_peerstr); + goto out; + } + } + RCU_READ_UNLOCK; + } + + ret = _gf_true; +out: + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_peer_hostname_new(const char *hostname, + glusterd_peer_hostname_t **name) +{ + glusterd_peer_hostname_t *peer_hostname = NULL; + int32_t ret = -1; + + GF_ASSERT(hostname); + GF_ASSERT(name); + xlator_t *this = THIS; + GF_ASSERT(this); + + peer_hostname = GF_CALLOC(1, sizeof(*peer_hostname), + gf_gld_mt_peer_hostname_t); + + if (!peer_hostname) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); + goto out; + } + + peer_hostname->hostname = gf_strdup(hostname); + CDS_INIT_LIST_HEAD(&peer_hostname->hostname_list); + + *name = peer_hostname; + ret = 0; + +out: + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; +} + +void +glusterd_peer_hostname_free(glusterd_peer_hostname_t *name) +{ + if (!name) + return; + + cds_list_del_init(&name->hostname_list); + + GF_FREE(name->hostname); + name->hostname = NULL; + + GF_FREE(name); + + return; +} + +gf_boolean_t +gd_peer_has_address(glusterd_peerinfo_t *peerinfo, const char *address) +{ + glusterd_peer_hostname_t *hostname = NULL; + + GF_VALIDATE_OR_GOTO("glusterd", (peerinfo != NULL), out); + GF_VALIDATE_OR_GOTO("glusterd", (address != NULL), out); + + cds_list_for_each_entry(hostname, &peerinfo->hostnames, hostname_list) + { + if (strcmp(hostname->hostname, address) == 0) { + return _gf_true; + } + } + +out: + return _gf_false; +} + +int +gd_add_address_to_peer(glusterd_peerinfo_t *peerinfo, const char *address) +{ + int ret = -1; + glusterd_peer_hostname_t *hostname = NULL; + + GF_VALIDATE_OR_GOTO("glusterd", (peerinfo != NULL), out); + GF_VALIDATE_OR_GOTO("glusterd", (address != NULL), out); + + if (gd_peer_has_address(peerinfo, address)) { + ret = 0; + goto out; + } + + ret = glusterd_peer_hostname_new(address, &hostname); + if (ret) + goto out; + + cds_list_add_tail_rcu(&hostname->hostname_list, &peerinfo->hostnames); + + ret = 0; +out: + return ret; +} + +/* gd_add_friend_to_dict() adds details of @friend into @dict with the given + * @prefix. All the parameters are compulsory. + * + * The complete address list is added to the dict only if the cluster op-version + * is >= GD_OP_VERSION_3_6_0 + */ +int +gd_add_friend_to_dict(glusterd_peerinfo_t *friend, dict_t *dict, + const char *prefix) +{ + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + char key[100] = { + 0, + }; + glusterd_peer_hostname_t *address = NULL; + int count = 0; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", (this != NULL), out); + + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, (conf != NULL), out); + + GF_VALIDATE_OR_GOTO(this->name, (friend != NULL), out); + GF_VALIDATE_OR_GOTO(this->name, (dict != NULL), out); + GF_VALIDATE_OR_GOTO(this->name, (prefix != NULL), out); + + snprintf(key, sizeof(key), "%s.uuid", prefix); + ret = dict_set_dynstr_with_alloc(dict, key, uuid_utoa(friend->uuid)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set key %s in dict", key); + goto out; + } + + /* Setting the first hostname from the list with this key for backward + * compatibility + */ + snprintf(key, sizeof(key), "%s.hostname", prefix); + address = cds_list_entry(&friend->hostnames, glusterd_peer_hostname_t, + hostname_list); + ret = dict_set_dynstr_with_alloc(dict, key, address->hostname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set key %s in dict", key); + goto out; + } + + if (conf->op_version < GD_OP_VERSION_3_6_0) { + ret = 0; + goto out; + } + + address = NULL; + count = 0; + cds_list_for_each_entry(address, &friend->hostnames, hostname_list) + { + GF_VALIDATE_OR_GOTO(this->name, (address != NULL), out); + + snprintf(key, sizeof(key), "%s.hostname%d", prefix, count); + ret = dict_set_dynstr_with_alloc(dict, key, address->hostname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set key %s in dict", key); + goto out; + } + count++; + } + ret = snprintf(key, sizeof(key), "%s.address-count", prefix); + ret = dict_set_int32n(dict, key, ret, count); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set key %s in dict", key); + +out: + gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret); + return ret; +} + +/* gd_update_peerinfo_from_dict will update the hostnames for @peerinfo from + * peer details with @prefix in @dict. + * Returns 0 on success and -1 on failure. + */ +int +gd_update_peerinfo_from_dict(glusterd_peerinfo_t *peerinfo, dict_t *dict, + const char *prefix) +{ + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + char key[100] = { + 0, + }; + char *hostname = NULL; + int count = 0; + int i = 0; + + this = THIS; + GF_ASSERT(this != NULL); + + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, (conf != NULL), out); + + GF_VALIDATE_OR_GOTO(this->name, (peerinfo != NULL), out); + GF_VALIDATE_OR_GOTO(this->name, (dict != NULL), out); + GF_VALIDATE_OR_GOTO(this->name, (prefix != NULL), out); + + ret = snprintf(key, sizeof(key), "%s.hostname", prefix); + ret = dict_get_strn(dict, key, ret, &hostname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Key %s not present in " + "dictionary", + key); + goto out; + } + ret = gd_add_address_to_peer(peerinfo, hostname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_ADD_ADDRESS_TO_PEER_FAIL, + "Could not add address to peer"); + goto out; + } + /* Also set peerinfo->hostname to the first address */ + if (peerinfo->hostname != NULL) + GF_FREE(peerinfo->hostname); + peerinfo->hostname = gf_strdup(hostname); + + if (conf->op_version < GD_OP_VERSION_3_6_0) { + ret = 0; + goto out; + } + + ret = snprintf(key, sizeof(key), "%s.address-count", prefix); + ret = dict_get_int32n(dict, key, ret, &count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Key %s not present in " + "dictionary", + key); + goto out; + } + hostname = NULL; + for (i = 0; i < count; i++) { + ret = snprintf(key, sizeof(key), "%s.hostname%d", prefix, i); + ret = dict_get_strn(dict, key, ret, &hostname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Key %s not present " + "in dictionary", + key); + goto out; + } + ret = gd_add_address_to_peer(peerinfo, hostname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_ADD_ADDRESS_TO_PEER_FAIL, + "Could not add address to peer"); + goto out; + } + + hostname = NULL; + } + +out: + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; +} + +/* gd_peerinfo_from_dict creates a peerinfo object from details of peer with + * @prefix in @dict. + * Returns a pointer to the created peerinfo object on success, and NULL on + * failure. + */ +glusterd_peerinfo_t * +gd_peerinfo_from_dict(dict_t *dict, const char *prefix) +{ + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + glusterd_peerinfo_t *new_peer = NULL; + char key[64] = { + 0, + }; + char *uuid_str = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", (this != NULL), out); + + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, (conf != NULL), out); + + GF_VALIDATE_OR_GOTO(this->name, (dict != NULL), out); + GF_VALIDATE_OR_GOTO(this->name, (prefix != NULL), out); + + new_peer = glusterd_peerinfo_new(GD_FRIEND_STATE_DEFAULT, NULL, NULL, 0); + if (new_peer == NULL) { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PEERINFO_CREATE_FAIL, + "Could not create peerinfo " + "object"); + goto out; + } + + ret = snprintf(key, sizeof(key), "%s.uuid", prefix); + ret = dict_get_strn(dict, key, ret, &uuid_str); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Key %s not present in " + "dictionary", + key); + goto out; + } + gf_uuid_parse(uuid_str, new_peer->uuid); + + ret = gd_update_peerinfo_from_dict(new_peer, dict, prefix); + +out: + if ((ret != 0) && (new_peer != NULL)) { + glusterd_peerinfo_cleanup(new_peer); + new_peer = NULL; + } + + return new_peer; +} + +static int +gd_add_peer_hostnames_to_dict(glusterd_peerinfo_t *peerinfo, dict_t *dict, + const char *prefix) +{ + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + char key[64] = { + 0, + }; + glusterd_peer_hostname_t *addr = NULL; + int count = 0; + + this = THIS; + GF_ASSERT(this != NULL); + + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, (conf != NULL), out); + + if (conf->op_version < GD_OP_VERSION_3_6_0) { + ret = 0; + goto out; + } + + GF_VALIDATE_OR_GOTO(this->name, (peerinfo != NULL), out); + GF_VALIDATE_OR_GOTO(this->name, (dict != NULL), out); + GF_VALIDATE_OR_GOTO(this->name, (prefix != NULL), out); + + cds_list_for_each_entry(addr, &peerinfo->hostnames, hostname_list) + { + snprintf(key, sizeof(key), "%s.hostname%d", prefix, count); + ret = dict_set_dynstr_with_alloc(dict, key, addr->hostname); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + goto out; + } + count++; + } + + ret = snprintf(key, sizeof(key), "%s.hostname_count", prefix); + ret = dict_set_int32n(dict, key, ret, count); + +out: + return ret; +} + +int +gd_add_peer_detail_to_dict(glusterd_peerinfo_t *peerinfo, dict_t *friends, + int count) +{ + int ret = -1; + char key[32] = { + 0, + }; + int keylen; + char *peer_uuid_str = NULL; + + xlator_t *this = THIS; + GF_ASSERT(this); + GF_ASSERT(peerinfo); + GF_ASSERT(friends); + + peer_uuid_str = gd_peer_uuid_str(peerinfo); + keylen = snprintf(key, sizeof(key), "friend%d.uuid", count); + ret = dict_set_strn(friends, key, keylen, peer_uuid_str); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s", + key, NULL); + goto out; + } + + keylen = snprintf(key, sizeof(key), "friend%d.hostname", count); + ret = dict_set_strn(friends, key, keylen, peerinfo->hostname); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s", + key, NULL); + goto out; + } + + keylen = snprintf(key, sizeof(key), "friend%d.port", count); + ret = dict_set_int32n(friends, key, keylen, peerinfo->port); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s", + key, NULL); + goto out; + } + + keylen = snprintf(key, sizeof(key), "friend%d.stateId", count); + ret = dict_set_int32n(friends, key, keylen, peerinfo->state.state); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Key=%s in dict", key, NULL); + goto out; + } + + keylen = snprintf(key, sizeof(key), "friend%d.state", count); + ret = dict_set_strn( + friends, key, keylen, + glusterd_friend_sm_state_name_get(peerinfo->state.state)); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "key=%s", + key, NULL); + goto out; + } + + keylen = snprintf(key, sizeof(key), "friend%d.connected", count); + ret = dict_set_int32n(friends, key, keylen, (int32_t)peerinfo->connected); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s", + key, NULL); + goto out; + } + + snprintf(key, sizeof(key), "friend%d", count); + ret = gd_add_peer_hostnames_to_dict(peerinfo, friends, key); + +out: + return ret; +} + +/* glusterd_peerinfo_find_by_generation searches for a peer which has the + * generation number @generation and if found returns the pointer to peerinfo + * object. Returns NULL otherwise. + */ +glusterd_peerinfo_t * +glusterd_peerinfo_find_by_generation(uint32_t generation) +{ + glusterd_conf_t *priv = NULL; + glusterd_peerinfo_t *entry = NULL; + glusterd_peerinfo_t *found = NULL; + xlator_t *this = THIS; + glusterd_friend_sm_state_t state; + + GF_ASSERT(this); + + priv = this->private; + + GF_ASSERT(priv); + + RCU_READ_LOCK; + cds_list_for_each_entry_rcu(entry, &priv->peers, uuid_list) + { + if (entry->generation == generation) { + found = entry; /* Probably should be rcu_dereferenced */ + state = found->state.state; + break; + } + } + RCU_READ_UNLOCK; + + if (found) + gf_msg_debug(this->name, 0, "Friend found... state: %s", + glusterd_friend_sm_state_name_get(state)); + else + gf_msg_debug(this->name, 0, + "Friend with generation: %" PRIu32 ", not found", + generation); + return found; +} + +int +glusterd_get_peers_count() +{ + int count = 0; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + glusterd_peerinfo_t *peer = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, out); + + RCU_READ_LOCK; + cds_list_for_each_entry_rcu(peer, &conf->peers, uuid_list) count++; + RCU_READ_UNLOCK; + +out: + return count; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-peer-utils.h b/xlators/mgmt/glusterd/src/glusterd-peer-utils.h new file mode 100644 index 00000000000..fd254d57391 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-peer-utils.h @@ -0,0 +1,82 @@ +/* + Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _GLUSTERD_PEER_UTILS_H +#define _GLUSTERD_PEER_UTILS_H + +#include "glusterd.h" + +int32_t +glusterd_peerinfo_cleanup(glusterd_peerinfo_t *peerinfo); + +glusterd_peerinfo_t * +glusterd_peerinfo_find_by_hostname(const char *hoststr); + +int +glusterd_hostname_to_uuid(char *hostname, uuid_t uuid); + +glusterd_peerinfo_t * +glusterd_peerinfo_find_by_uuid(uuid_t uuid); + +glusterd_peerinfo_t * +glusterd_peerinfo_find(uuid_t uuid, const char *hostname); + +glusterd_peerinfo_t * +glusterd_peerinfo_new(glusterd_friend_sm_state_t state, uuid_t *uuid, + const char *hostname, int port); + +gf_boolean_t +glusterd_chk_peers_connected_befriended(uuid_t skip_uuid); + +char * +glusterd_uuid_to_hostname(uuid_t uuid); + +char * +gd_peer_uuid_str(glusterd_peerinfo_t *peerinfo); + +gf_boolean_t +glusterd_are_all_peers_up(); + +gf_boolean_t +glusterd_are_vol_all_peers_up(glusterd_volinfo_t *volinfo, + struct cds_list_head *peers, char **down_peerstr); + +int32_t +glusterd_peer_hostname_new(const char *hostname, + glusterd_peer_hostname_t **name); +void +glusterd_peer_hostname_free(glusterd_peer_hostname_t *name); + +gf_boolean_t +gd_peer_has_address(glusterd_peerinfo_t *peerinfo, const char *address); + +int +gd_add_address_to_peer(glusterd_peerinfo_t *peerinfo, const char *address); + +int +gd_add_friend_to_dict(glusterd_peerinfo_t *friend, dict_t *dict, + const char *prefix); + +int +gd_update_peerinfo_from_dict(glusterd_peerinfo_t *peerinfo, dict_t *dict, + const char *prefix); + +glusterd_peerinfo_t * +gd_peerinfo_from_dict(dict_t *dict, const char *prefix); + +int +gd_add_peer_detail_to_dict(glusterd_peerinfo_t *peerinfo, dict_t *friends, + int count); +glusterd_peerinfo_t * +glusterd_peerinfo_find_by_generation(uint32_t generation); + +int +glusterd_get_peers_count(); +#endif /* _GLUSTERD_PEER_UTILS_H */ diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.c b/xlators/mgmt/glusterd/src/glusterd-pmap.c index f1f49fc60b1..16ac628ab82 100644 --- a/xlators/mgmt/glusterd/src/glusterd-pmap.c +++ b/xlators/mgmt/glusterd/src/glusterd-pmap.c @@ -1,487 +1,666 @@ /* - Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ - + Copyright (c) 2010-2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ -#include "xlator.h" -#include "glusterfs.h" -#include "compat-errno.h" +#include <glusterfs/xlator.h> +#include <glusterfs/glusterfs.h> +#include <glusterfs/syscall.h> +#include <glusterfs/compat-errno.h> #include "glusterd.h" #include "glusterd-utils.h" -#include "portmap.h" +#include "portmap-xdr.h" +#include "xdr-generic.h" #include "protocol-common.h" +#include "glusterd-messages.h" #include "rpcsvc.h" #include <sys/socket.h> #include <sys/types.h> #include <netinet/in.h> - -int -pmap_port_isfree (int port) +static int +pmap_port_isfree(int port) { - struct sockaddr_in sin; - int sock = -1; - int ret = 0; + struct sockaddr_in sin; + int sock = -1; + int ret = 0; - memset (&sin, 0, sizeof (sin)); - sin.sin_family = PF_INET; - sin.sin_port = hton16 (port); + memset(&sin, 0, sizeof(sin)); + sin.sin_family = PF_INET; + sin.sin_port = hton16(port); - sock = socket (PF_INET, SOCK_STREAM, 0); - if (sock == -1) - return -1; + sock = socket(PF_INET, SOCK_STREAM, 0); + if (sock == -1) + return -1; - ret = bind (sock, (struct sockaddr *)&sin, sizeof (sin)); - close (sock); + ret = bind(sock, (struct sockaddr *)&sin, sizeof(sin)); + sys_close(sock); - return (ret == 0) ? 1 : 0; + return (ret == 0) ? 1 : 0; } - -struct pmap_registry * -pmap_registry_new (void) +static struct pmap_registry * +pmap_registry_new(xlator_t *this) { - struct pmap_registry *pmap = NULL; - int i = 0; - - pmap = CALLOC (sizeof (*pmap), 1); - if (!pmap) - return NULL; - - for (i = 0; i < 65536; i++) { - if (pmap_port_isfree (i)) - pmap->ports[i].type = GF_PMAP_PORT_FREE; - else - pmap->ports[i].type = GF_PMAP_PORT_FOREIGN; - } - - pmap->base_port = GF_DEFAULT_BASE_PORT + 2; - pmap->last_alloc = GF_DEFAULT_BASE_PORT + 2; - - return pmap; + struct pmap_registry *pmap = NULL; + int i = 0; + + pmap = CALLOC(sizeof(*pmap), 1); + if (!pmap) + return NULL; + + pmap->base_port = pmap->last_alloc = ((glusterd_conf_t *)(this->private)) + ->base_port; + pmap->max_port = ((glusterd_conf_t *)(this->private))->max_port; + for (i = pmap->base_port; i <= pmap->max_port; i++) { + if (pmap_port_isfree(i)) + pmap->ports[i].type = GF_PMAP_PORT_FREE; + else + pmap->ports[i].type = GF_PMAP_PORT_FOREIGN; + } + + return pmap; } - struct pmap_registry * -pmap_registry_get (xlator_t *this) +pmap_registry_get(xlator_t *this) { - glusterd_conf_t *priv = NULL; - struct pmap_registry *pmap = NULL; - - priv = this->private; - - pmap = priv->pmap; - if (!pmap) { - pmap = pmap_registry_new (); - if (!pmap) - return NULL; - priv->pmap = pmap; - } - - return pmap; -} + glusterd_conf_t *priv = NULL; + struct pmap_registry *pmap = NULL; + priv = this->private; -static char* -nextword (char *str) -{ - while (*str && !isspace (*str)) - str++; - while (*str && isspace (*str)) - str++; + pmap = priv->pmap; + if (!pmap) { + pmap = pmap_registry_new(this); + if (!pmap) + return NULL; + priv->pmap = pmap; + } - return str; + return pmap; } +/* + * The "destroy" argument avoids a double search in pmap_registry_remove - one + * to find the entry in the table, and the other to find the particular + * brickname within that entry (which might cover multiple bricks). We do the + * actual deletion here by "whiting out" the brick name with spaces. It's up + * to pmap_registry_remove to figure out what to do from there. + */ int -pmap_registry_search (xlator_t *this, const char *brickname, - gf_pmap_port_type_t type) +pmap_registry_search(xlator_t *this, const char *brickname, + gf_pmap_port_type_t type, gf_boolean_t destroy) { - struct pmap_registry *pmap = NULL; - int p = 0; - char *brck = NULL; - char *nbrck = NULL; - - pmap = pmap_registry_get (this); - - for (p = pmap->base_port; p <= pmap->last_alloc; p++) { - if (!pmap->ports[p].brickname || pmap->ports[p].type != type) - continue; - - for (brck = pmap->ports[p].brickname;;) { - nbrck = strtail (brck, brickname); - if (nbrck && (!*nbrck || isspace (*nbrck))) - return p; - brck = nextword (brck); - if (!*brck) - break; + struct pmap_registry *pmap = NULL; + int p = 0; + char *brck = NULL; + size_t i; + + pmap = pmap_registry_get(this); + + for (p = pmap->last_alloc; p >= pmap->base_port; p--) { + if (!pmap->ports[p].brickname || pmap->ports[p].type != type) + continue; + + brck = pmap->ports[p].brickname; + for (;;) { + for (i = 0; brck[i] && !isspace(brck[i]); ++i) + ; + if (i == 0 && brck[i] == '\0') + break; + + if (strncmp(brck, brickname, i) == 0) { + /* + * Without this check, we'd break when brck + * is merely a substring of brickname. + */ + if (brickname[i] == '\0') { + if (destroy) + do { + *(brck++) = ' '; + } while (--i); + return p; } + } + + brck += i; + + /* + * Skip over *any* amount of whitespace, including + * none (if we're already at the end of the string). + */ + while (isspace(*brck)) + ++brck; + /* + * We're either at the end of the string (which will be + * handled above strncmp on the next iteration) or at + * the next non-whitespace substring (which will be + * handled by strncmp itself). + */ } + } - return 0; + return 0; } -int -pmap_registry_search_by_xprt (xlator_t *this, void *xprt, - gf_pmap_port_type_t type) +static int +pmap_registry_search_by_xprt(xlator_t *this, void *xprt, + gf_pmap_port_type_t type) { - struct pmap_registry *pmap = NULL; - int p = 0; - int port = 0; - - pmap = pmap_registry_get (this); - - for (p = pmap->base_port; p <= pmap->last_alloc; p++) { - if (!pmap->ports[p].xprt) - continue; - if (pmap->ports[p].xprt == xprt && - pmap->ports[p].type == type) { - port = p; - break; - } + struct pmap_registry *pmap = NULL; + int p = 0; + int port = 0; + + pmap = pmap_registry_get(this); + + for (p = pmap->last_alloc; p >= pmap->base_port; p--) { + if (!pmap->ports[p].xprt) + continue; + if (pmap->ports[p].xprt == xprt) { + if (pmap->ports[p].type == type || type == GF_PMAP_PORT_ANY) { + port = p; + break; + } } + } - return port; + return port; } - -char * -pmap_registry_search_by_port (xlator_t *this, int port) +static char * +pmap_registry_search_by_port(xlator_t *this, int port) { - struct pmap_registry *pmap = NULL; - char *brickname = NULL; + struct pmap_registry *pmap = NULL; + char *brickname = NULL; + int max_port = 0; - if (port > 65535) - goto out; + max_port = ((glusterd_conf_t *)(this->private))->max_port; + if (port > max_port) + goto out; - pmap = pmap_registry_get (this); + pmap = pmap_registry_get(this); - if (pmap->ports[port].type == GF_PMAP_PORT_BRICKSERVER) - brickname = pmap->ports[port].brickname; + if (pmap->ports[port].type == GF_PMAP_PORT_BRICKSERVER) + brickname = pmap->ports[port].brickname; out: - return brickname; + return brickname; } - int -pmap_registry_alloc (xlator_t *this) +pmap_registry_alloc(xlator_t *this) { - struct pmap_registry *pmap = NULL; - int p = 0; - int port = 0; - - pmap = pmap_registry_get (this); - - for (p = pmap->last_alloc; p < 65535; p++) { - if (pmap->ports[p].type != GF_PMAP_PORT_FREE) - continue; - - if (pmap_port_isfree (p)) { - pmap->ports[p].type = GF_PMAP_PORT_LEASED; - port = p; - break; - } + struct pmap_registry *pmap = NULL; + int p = 0; + int port = 0; + + pmap = pmap_registry_get(this); + + for (p = pmap->base_port; p <= pmap->max_port; p++) { + /* GF_PMAP_PORT_FOREIGN may be freed up ? */ + if ((pmap->ports[p].type == GF_PMAP_PORT_FREE) || + (pmap->ports[p].type == GF_PMAP_PORT_FOREIGN)) { + if (pmap_port_isfree(p)) { + pmap->ports[p].type = GF_PMAP_PORT_LEASED; + port = p; + break; + } } + } - if (port) - pmap->last_alloc = port; + if (port > pmap->last_alloc) + pmap->last_alloc = port; - return port; + return port; } +/* pmap_assign_port does a pmap_registry_remove followed by pmap_registry_alloc, + * the reason for the former is to ensure we don't end up with stale ports + */ int -pmap_registry_bind (xlator_t *this, int port, const char *brickname, - gf_pmap_port_type_t type, void *xprt) +pmap_assign_port(xlator_t *this, int old_port, const char *path) { - struct pmap_registry *pmap = NULL; - int p = 0; - - pmap = pmap_registry_get (this); - - if (port > 65535) - goto out; - - p = port; - pmap->ports[p].type = type; - if (pmap->ports[p].brickname) - free (pmap->ports[p].brickname); - pmap->ports[p].brickname = strdup (brickname); - pmap->ports[p].type = type; - pmap->ports[p].xprt = xprt; - - gf_log ("pmap", GF_LOG_INFO, "adding brick %s on port %d", - brickname, port); - - if (pmap->last_alloc < p) - pmap->last_alloc = p; -out: - return 0; + int ret = -1; + int new_port = 0; + + if (old_port) { + ret = pmap_registry_remove(this, 0, path, GF_PMAP_PORT_BRICKSERVER, + NULL, _gf_false); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, GD_MSG_PMAP_REGISTRY_REMOVE_FAIL, + 0, + "Failed to" + "remove pmap registry for older signin for path" + " %s", + path); + } + } + new_port = pmap_registry_alloc(this); + return new_port; } int -pmap_registry_remove (xlator_t *this, int port, const char *brickname, - gf_pmap_port_type_t type, void *xprt) +pmap_registry_bind(xlator_t *this, int port, const char *brickname, + gf_pmap_port_type_t type, void *xprt) { - struct pmap_registry *pmap = NULL; - int p = 0; - glusterd_conf_t *priv = NULL; - - priv = this->private; - pmap = priv->pmap; - if (!pmap) - goto out; + struct pmap_registry *pmap = NULL; + int p = 0; - if (port) { - if (port > 65535) - goto out; - - p = port; - goto remove; - } - - if (brickname && strchr (brickname, '/')) { - p = pmap_registry_search (this, brickname, type); - if (p) - goto remove; - } - - if (xprt) { - p = pmap_registry_search_by_xprt (this, xprt, type); - if (p) - goto remove; - } + pmap = pmap_registry_get(this); + if (port > pmap->max_port) goto out; -remove: - gf_log ("pmap", GF_LOG_INFO, "removing brick %s on port %d", - pmap->ports[p].brickname, p); - - if (pmap->ports[p].brickname) - free (pmap->ports[p].brickname); - - pmap->ports[p].brickname = NULL; - pmap->ports[p].xprt = NULL; + p = port; + if (pmap->ports[p].type == GF_PMAP_PORT_FREE) { + /* Because of some crazy race in volume start code path because + * of friend handshaking with volumes with quorum enabled we + * might end up into a situation where glusterd would start a + * brick and get a disconnect and then immediately try to start + * the same brick instance based on another friend update + * request. And then if for the very first brick even if the + * process doesn't come up at the end sign in event gets sent + * and we end up having two duplicate portmap entries for the + * same brick. Since in brick start we mark the previous port as + * free, its better to consider a sign in request as no op if + * the corresponding port type is marked as free + */ + goto out; + } + if (pmap->ports[p].brickname) { + char *tmp = pmap->ports[p].brickname; + asprintf(&pmap->ports[p].brickname, "%s %s", tmp, brickname); + free(tmp); + } else { + pmap->ports[p].brickname = strdup(brickname); + } + pmap->ports[p].type = type; + pmap->ports[p].xprt = xprt; + + gf_msg("pmap", GF_LOG_INFO, 0, GD_MSG_BRICK_ADD, + "adding brick %s on port %d", brickname, port); + + if (pmap->last_alloc < p) + pmap->last_alloc = p; out: - return 0; + return 0; } - -typedef ssize_t (*gfs_serialize_t) (struct iovec outmsg, void *data); - - -static int -xdr_to_glusterfs_req (rpcsvc_request_t *req, void *arg, gfs_serialize_t sfunc) +int +pmap_registry_extend(xlator_t *this, int port, const char *brickname) { - int ret = -1; - - if (!req) - return -1; + struct pmap_registry *pmap = NULL; + char *old_bn; + char *new_bn; + size_t bn_len; + char *entry; + int found = 0; + + pmap = pmap_registry_get(this); + + if (port > pmap->max_port) { + return -1; + } + + switch (pmap->ports[port].type) { + case GF_PMAP_PORT_LEASED: + case GF_PMAP_PORT_BRICKSERVER: + break; + default: + return -1; + } + + old_bn = pmap->ports[port].brickname; + if (old_bn) { + bn_len = strlen(brickname); + entry = strstr(old_bn, brickname); + while (entry) { + found = 1; + if ((entry != old_bn) && (entry[-1] != ' ')) { + found = 0; + } + if ((entry[bn_len] != ' ') && (entry[bn_len] != '\0')) { + found = 0; + } + if (found) { + return 0; + } + entry = strstr(entry + bn_len, brickname); + } + asprintf(&new_bn, "%s %s", old_bn, brickname); + } else { + new_bn = strdup(brickname); + } - ret = sfunc (req->msg[0], arg); + if (!new_bn) { + return -1; + } - if (ret > 0) - ret = 0; + pmap->ports[port].brickname = new_bn; + free(old_bn); - return ret; + return 0; } - int -gluster_pmap_portbybrick (rpcsvc_request_t *req) +pmap_registry_remove(xlator_t *this, int port, const char *brickname, + gf_pmap_port_type_t type, void *xprt, + gf_boolean_t brick_disconnect) { - pmap_port_by_brick_req args = {0,}; - pmap_port_by_brick_rsp rsp = {0,}; - char *brick = NULL; - int port = 0; - - if (xdr_to_glusterfs_req (req, &args, xdr_to_pmap_port_by_brick_req)) { - req->rpc_err = GARBAGE_ARGS; - goto fail; - } + struct pmap_registry *pmap = NULL; + int p = 0; + glusterd_conf_t *priv = NULL; + char *brick_str; + + priv = this->private; + pmap = priv->pmap; + if (!pmap) + goto out; - brick = args.brick; + if (port) { + if (port > pmap->max_port) + goto out; + } - port = pmap_registry_search (THIS, brick, GF_PMAP_PORT_BRICKSERVER); + if (brickname) { + p = pmap_registry_search(this, brickname, type, _gf_true); + if (p) + goto remove; + } - if (!port) - rsp.op_ret = -1; + if (xprt) { + p = pmap_registry_search_by_xprt(this, xprt, type); + if (p) + goto remove; + } - rsp.port = port; + goto out; +remove: + gf_msg("pmap", GF_LOG_INFO, 0, GD_MSG_BRICK_REMOVE, + "removing brick %s on port %d", brickname, p); -fail: - glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - (gd_serialize_t)xdr_from_pmap_port_by_brick_rsp); - if (args.brick) - free (args.brick);//malloced by xdr + if (xprt && (xprt == pmap->ports[p].xprt)) { + pmap->ports[p].xprt = NULL; + } + + /* + * This is where we garbage-collect. If all of the brick names have + * been "whited out" by pmap_registry_search(...,destroy=_gf_true) and + * there's no xprt either, then we have nothing left worth saving and + * can delete the entire entry. + */ + if (brick_disconnect || !pmap->ports[p].xprt) { + /* If the signout call is being triggered by brick disconnect + * then clean up all the bricks (in case of brick mux) + */ + if (!brick_disconnect) { + brick_str = pmap->ports[p].brickname; + if (brick_str) { + while (*brick_str != '\0') { + if (*(brick_str++) != ' ') { + goto out; + } + } + } + } + free(pmap->ports[p].brickname); + pmap->ports[p].brickname = NULL; + pmap->ports[p].type = GF_PMAP_PORT_FREE; + } - return 0; +out: + return 0; } - int -gluster_pmap_brickbyport (rpcsvc_request_t *req) +__gluster_pmap_portbybrick(rpcsvc_request_t *req) { - pmap_brick_by_port_req args = {0,}; - pmap_brick_by_port_rsp rsp = {0,}; + pmap_port_by_brick_req args = { + 0, + }; + pmap_port_by_brick_rsp rsp = { + 0, + }; + char *brick = NULL; + int port = 0; + int ret = -1; + xlator_t *this = THIS; + GF_ASSERT(this); + + ret = xdr_to_generic(req->msg[0], &args, + (xdrproc_t)xdr_pmap_port_by_brick_req); + if (ret < 0) { + req->rpc_err = GARBAGE_ARGS; + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL); + goto fail; + } + + brick = args.brick; + + port = pmap_registry_search(this, brick, GF_PMAP_PORT_BRICKSERVER, + _gf_false); + + if (!port) + rsp.op_ret = -1; + + rsp.port = port; - if (xdr_to_glusterfs_req (req, &args, xdr_to_pmap_brick_by_port_req)) { - req->rpc_err = GARBAGE_ARGS; - goto fail; - } - - rsp.brick = pmap_registry_search_by_port (THIS, args.port); - if (!rsp.brick) { - rsp.op_ret = -1; - rsp.brick = ""; - } fail: + glusterd_submit_reply(req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_pmap_port_by_brick_rsp); + free(args.brick); // malloced by xdr - glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - (gd_serialize_t)xdr_from_pmap_brick_by_port_rsp); - - return 0; + return 0; } -static int -glusterd_brick_update_signin (glusterd_brickinfo_t *brickinfo, - gf_boolean_t value) +int +gluster_pmap_portbybrick(rpcsvc_request_t *req) { - brickinfo->signed_in = value; - - return 0; + return glusterd_big_locked_handler(req, __gluster_pmap_portbybrick); } int -gluster_pmap_signup (rpcsvc_request_t *req) +__gluster_pmap_brickbyport(rpcsvc_request_t *req) { - pmap_signup_req args = {0,}; - pmap_signup_rsp rsp = {0,}; - - - if (xdr_to_glusterfs_req (req, &args, xdr_to_pmap_signup_req)) { - req->rpc_err = GARBAGE_ARGS; - goto fail; - } - - rsp.op_ret = pmap_registry_bind (THIS, args.port, args.brick, - GF_PMAP_PORT_BRICKSERVER, req->trans); - + pmap_brick_by_port_req args = { + 0, + }; + pmap_brick_by_port_rsp rsp = { + 0, + }; + int ret = -1; + xlator_t *this = THIS; + GF_ASSERT(this); + + ret = xdr_to_generic(req->msg[0], &args, + (xdrproc_t)xdr_pmap_brick_by_port_req); + if (ret < 0) { + req->rpc_err = GARBAGE_ARGS; + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL); + goto fail; + } + + rsp.brick = pmap_registry_search_by_port(THIS, args.port); + if (!rsp.brick) { + rsp.op_ret = -1; + rsp.brick = ""; + } fail: - glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - (gd_serialize_t)xdr_from_pmap_signup_rsp); - if (args.brick) - free (args.brick);//malloced by xdr - return 0; + glusterd_submit_reply(req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_pmap_brick_by_port_rsp); + + return 0; } int -gluster_pmap_signin (rpcsvc_request_t *req) +gluster_pmap_brickbyport(rpcsvc_request_t *req) { - pmap_signin_req args = {0,}; - pmap_signin_rsp rsp = {0,}; - glusterd_brickinfo_t *brickinfo = NULL; - int ret = -1; - - if (xdr_to_glusterfs_req (req, &args, xdr_to_pmap_signin_req)) { - req->rpc_err = GARBAGE_ARGS; - goto fail; - } + return glusterd_big_locked_handler(req, __gluster_pmap_brickbyport); +} - rsp.op_ret = pmap_registry_bind (THIS, args.port, args.brick, - GF_PMAP_PORT_BRICKSERVER, req->trans); +int +__gluster_pmap_signin(rpcsvc_request_t *req) +{ + pmap_signin_req args = { + 0, + }; + pmap_signin_rsp rsp = { + 0, + }; + int ret = -1; + glusterd_brickinfo_t *brickinfo = NULL; + xlator_t *this = THIS; + GF_ASSERT(this); + + ret = xdr_to_generic(req->msg[0], &args, (xdrproc_t)xdr_pmap_signin_req); + if (ret < 0) { + req->rpc_err = GARBAGE_ARGS; + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL); + goto fail; + } + + rsp.op_ret = pmap_registry_bind(THIS, args.port, args.brick, + GF_PMAP_PORT_BRICKSERVER, req->trans); + + ret = glusterd_get_brickinfo(THIS, args.brick, args.port, &brickinfo); + /* Update portmap status in brickinfo */ + if (brickinfo) + brickinfo->port_registered = _gf_true; - ret = glusterd_get_brickinfo (THIS, args.brick, args.port, _gf_true, - &brickinfo); fail: - glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - (gd_serialize_t)xdr_from_pmap_signin_rsp); - if (args.brick) - free (args.brick);//malloced by xdr - - if (!ret) - glusterd_brick_update_signin (brickinfo, _gf_true); + glusterd_submit_reply(req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_pmap_signin_rsp); + free(args.brick); // malloced by xdr - return 0; + return 0; } - +int +gluster_pmap_signin(rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler(req, __gluster_pmap_signin); +} int -gluster_pmap_signout (rpcsvc_request_t *req) +__gluster_pmap_signout(rpcsvc_request_t *req) { - pmap_signout_req args = {0,}; - pmap_signout_rsp rsp = {0,}; - int ret = -1; - glusterd_brickinfo_t *brickinfo = NULL; - - if (xdr_to_glusterfs_req (req, &args, xdr_to_pmap_signout_req)) { - //failed to decode msg; - req->rpc_err = GARBAGE_ARGS; - goto fail; + pmap_signout_req args = { + 0, + }; + pmap_signout_rsp rsp = { + 0, + }; + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + char pidfile[PATH_MAX] = {0}; + char brick_path[PATH_MAX] = { + 0, + }; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, fail); + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, fail); + + ret = xdr_to_generic(req->msg[0], &args, (xdrproc_t)xdr_pmap_signout_req); + if (ret < 0) { + // failed to decode msg; + req->rpc_err = GARBAGE_ARGS; + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL); + goto fail; + } + rsp.op_ret = pmap_registry_remove(THIS, args.port, args.brick, + GF_PMAP_PORT_BRICKSERVER, req->trans, + _gf_false); + + ret = glusterd_get_brickinfo(THIS, args.brick, args.port, &brickinfo); + if (args.rdma_port) { + snprintf(brick_path, PATH_MAX, "%s.rdma", args.brick); + rsp.op_ret = pmap_registry_remove(THIS, args.rdma_port, brick_path, + GF_PMAP_PORT_BRICKSERVER, req->trans, + _gf_false); + } + /* Update portmap status on brickinfo */ + if (brickinfo) + brickinfo->port_registered = _gf_false; + + /* Clean up the pidfile for this brick given glusterfsd doesn't clean it + * any more. This is required to ensure we don't end up with having + * stale pid files in case a brick is killed from the backend + */ + ret = glusterd_get_volinfo_from_brick(args.brick, &volinfo); + if (!ret) { + if (volinfo && brickinfo) { + GLUSTERD_GET_BRICK_PIDFILE(pidfile, volinfo, brickinfo, conf); + sys_unlink(pidfile); + + /* Setting the brick status to GF_BRICK_STOPPED to + * ensure correct brick status is maintained on the + * glusterd end when a brick is killed from the + * backend */ + brickinfo->status = GF_BRICK_STOPPED; + + /* Remove brick from brick process if not already + * removed in the brick op phase. This situation would + * arise when the brick is killed explicitly from the + * backend */ + ret = glusterd_brick_process_remove_brick(brickinfo, NULL); + if (ret) { + gf_msg_debug(this->name, 0, + "Couldn't remove " + "brick %s:%s from brick process", + brickinfo->hostname, brickinfo->path); + /* Ignore 'ret' here since the brick might + * have already been deleted in brick op phase + */ + ret = 0; + } } + } - rsp.op_ret = pmap_registry_remove (THIS, args.port, args.brick, - GF_PMAP_PORT_BRICKSERVER, req->trans); - - ret = glusterd_get_brickinfo (THIS, args.brick, args.port, _gf_true, - &brickinfo); fail: - glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - (gd_serialize_t)xdr_from_pmap_signout_rsp); - if (args.brick) - free (args.brick);//malloced by xdr + glusterd_submit_reply(req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_pmap_signout_rsp); + free(args.brick); // malloced by xdr - if (!ret) - glusterd_brick_update_signin (brickinfo, _gf_false); + return 0; +} - return 0; +int +gluster_pmap_signout(rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler(req, __gluster_pmap_signout); } -rpcsvc_actor_t gluster_pmap_actors[] = { - [GF_PMAP_NULL] = {"NULL", GF_HNDSK_NULL, NULL, NULL, NULL }, - [GF_PMAP_PORTBYBRICK] = {"PORTBYBRICK", GF_PMAP_PORTBYBRICK, - gluster_pmap_portbybrick, NULL, NULL }, - [GF_PMAP_BRICKBYPORT] = {"BRICKBYPORT", GF_PMAP_BRICKBYPORT, - gluster_pmap_brickbyport, NULL, NULL }, - [GF_PMAP_SIGNIN] = {"SIGNIN", GF_PMAP_SIGNIN, - gluster_pmap_signin, NULL, NULL }, - [GF_PMAP_SIGNOUT] = {"SIGNOUT", GF_PMAP_SIGNOUT, - gluster_pmap_signout, NULL, NULL }, - [GF_PMAP_SIGNUP] = {"SIGNUP", GF_PMAP_SIGNUP, - gluster_pmap_signup, NULL, NULL }, +static rpcsvc_actor_t gluster_pmap_actors[GF_PMAP_MAXVALUE] = { + [GF_PMAP_NULL] = {"NULL", NULL, NULL, GF_PMAP_NULL, DRC_NA, 0}, + [GF_PMAP_PORTBYBRICK] = {"PORTBYBRICK", gluster_pmap_portbybrick, NULL, + GF_PMAP_PORTBYBRICK, DRC_NA, 0}, + [GF_PMAP_BRICKBYPORT] = {"BRICKBYPORT", gluster_pmap_brickbyport, NULL, + GF_PMAP_BRICKBYPORT, DRC_NA, 0}, + [GF_PMAP_SIGNIN] = {"SIGNIN", gluster_pmap_signin, NULL, GF_PMAP_SIGNIN, + DRC_NA, 0}, + [GF_PMAP_SIGNOUT] = {"SIGNOUT", gluster_pmap_signout, NULL, GF_PMAP_SIGNOUT, + DRC_NA, 0}, }; - struct rpcsvc_program gluster_pmap_prog = { - .progname = "Gluster Portmap", - .prognum = GLUSTER_PMAP_PROGRAM, - .progver = GLUSTER_PMAP_VERSION, - .actors = gluster_pmap_actors, - .numactors = GF_PMAP_MAXVALUE, + .progname = "Gluster Portmap", + .prognum = GLUSTER_PMAP_PROGRAM, + .progver = GLUSTER_PMAP_VERSION, + .actors = gluster_pmap_actors, + .numactors = GF_PMAP_MAXVALUE, }; diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.h b/xlators/mgmt/glusterd/src/glusterd-pmap.h index a87efed5a8d..51d75361431 100644 --- a/xlators/mgmt/glusterd/src/glusterd-pmap.h +++ b/xlators/mgmt/glusterd/src/glusterd-pmap.h @@ -1,62 +1,57 @@ /* - Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. + Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ - #ifndef _GLUSTERD_PMAP_H_ #define _GLUSTERD_PMAP_H_ -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - #include <pthread.h> -#include "uuid.h" +#include <glusterfs/compat-uuid.h> -#include "glusterfs.h" -#include "xlator.h" -#include "logging.h" -#include "call-stub.h" -#include "fd.h" -#include "byte-order.h" -#include "glusterd.h" +#include <glusterfs/glusterfs.h> +#include <glusterfs/xlator.h> +#include <glusterfs/logging.h> +#include <glusterfs/call-stub.h> +#include <glusterfs/byte-order.h> #include "rpcsvc.h" - struct pmap_port_status { - gf_pmap_port_type_t type; - char *brickname; - void *xprt; + char *brickname; + void *xprt; + gf_pmap_port_type_t type; }; struct pmap_registry { - int base_port; - int last_alloc; - struct pmap_port_status ports[65536]; + struct pmap_port_status ports[GF_PORT_MAX + 1]; + int base_port; + int max_port; + int last_alloc; }; -int pmap_registry_alloc (xlator_t *this); -int pmap_registry_bind (xlator_t *this, int port, const char *brickname, - gf_pmap_port_type_t type, void *xprt); -int pmap_registry_remove (xlator_t *this, int port, const char *brickname, - gf_pmap_port_type_t type, void *xprt); -int pmap_registry_search (xlator_t *this, const char *brickname, - gf_pmap_port_type_t type); -struct pmap_registry *pmap_registry_get (xlator_t *this); +int +pmap_assign_port(xlator_t *this, int port, const char *path); +int +pmap_mark_port_leased(xlator_t *this, int port); +int +pmap_registry_alloc(xlator_t *this); +int +pmap_registry_bind(xlator_t *this, int port, const char *brickname, + gf_pmap_port_type_t type, void *xprt); +int +pmap_registry_extend(xlator_t *this, int port, const char *brickname); +int +pmap_registry_remove(xlator_t *this, int port, const char *brickname, + gf_pmap_port_type_t type, void *xprt, + gf_boolean_t brick_disconnect); +int +pmap_registry_search(xlator_t *this, const char *brickname, + gf_pmap_port_type_t type, gf_boolean_t destroy); +struct pmap_registry * +pmap_registry_get(xlator_t *this); #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c new file mode 100644 index 00000000000..a05c90d7b10 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c @@ -0,0 +1,152 @@ +/* + Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include <stdio.h> +#include <limits.h> +#include <signal.h> + +#include "glusterd.h" +#include "glusterd-utils.h" +#include <glusterfs/common-utils.h> +#include <glusterfs/xlator.h> +#include <glusterfs/logging.h> +#include "glusterd-messages.h" +#include "glusterd-proc-mgmt.h" + +int +glusterd_proc_init(glusterd_proc_t *proc, char *name, char *pidfile, + char *logdir, char *logfile, char *volfile, char *volfileid, + char *volfileserver) +{ + int ret = -1; + + ret = snprintf(proc->name, sizeof(proc->name), "%s", name); + if (ret < 0) + goto out; + + ret = snprintf(proc->pidfile, sizeof(proc->pidfile), "%s", pidfile); + if (ret < 0) + goto out; + + ret = snprintf(proc->logdir, sizeof(proc->logdir), "%s", logdir); + if (ret < 0) + goto out; + + ret = snprintf(proc->logfile, sizeof(proc->logfile), "%s", logfile); + if (ret < 0) + goto out; + + ret = snprintf(proc->volfile, sizeof(proc->volfile), "%s", volfile); + if (ret < 0) + goto out; + + ret = snprintf(proc->volfileid, sizeof(proc->volfileid), "%s", volfileid); + if (ret < 0) + goto out; + + ret = snprintf(proc->volfileserver, sizeof(proc->volfileserver), "%s", + volfileserver); + if (ret < 0) + goto out; + +out: + if (ret > 0) + ret = 0; + + return ret; +} + +int +glusterd_proc_stop(glusterd_proc_t *proc, int sig, int flags) +{ + /* NB: Copy-paste code from glusterd_service_stop, the source may be + * removed once all daemon management use proc */ + + int32_t ret = -1; + pid_t pid = -1; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + + this = THIS; + GF_ASSERT(this); + + conf = this->private; + GF_ASSERT(conf); + + if (!gf_is_service_running(proc->pidfile, &pid)) { + ret = 0; + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_ALREADY_STOPPED, + "%s already stopped", proc->name); + goto out; + } + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_SVC_STOP_SUCCESS, + "Stopping %s daemon running in pid: " + "%d", + proc->name, pid); + + ret = kill(pid, sig); + if (ret) { + switch (errno) { + case ESRCH: + gf_msg_debug(this->name, 0, + "%s is already " + "stopped", + proc->name); + ret = 0; + goto out; + default: + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_SVC_KILL_FAIL, + "Unable to kill %s " + "service, reason:%s", + proc->name, strerror(errno)); + } + } else { + (void)glusterd_unlink_file(proc->pidfile); + } + if (flags != PROC_STOP_FORCE) + goto out; + + synclock_unlock(&conf->big_lock); + synctask_sleep(1); + synclock_lock(&conf->big_lock); + if (gf_is_service_running(proc->pidfile, &pid)) { + ret = kill(pid, SIGKILL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_PID_KILL_FAIL, + "Unable to kill pid:%d, " + "reason:%s", + pid, strerror(errno)); + goto out; + } + ret = glusterd_unlink_file(proc->pidfile); + if (ret) + goto out; + } + + ret = 0; +out: + return ret; +} + +int +glusterd_proc_get_pid(glusterd_proc_t *proc) +{ + int pid = -1; + (void)gf_is_service_running(proc->pidfile, &pid); + return pid; +} + +int +glusterd_proc_is_running(glusterd_proc_t *proc) +{ + int pid = -1; + + return gf_is_service_running(proc->pidfile, &pid); +} diff --git a/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.h b/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.h new file mode 100644 index 00000000000..e8e9ffc5082 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.h @@ -0,0 +1,44 @@ +/* + Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _GLUSTERD_PROC_MGMT_H_ +#define _GLUSTERD_PROC_MGMT_H_ + +typedef struct glusterd_proc_ glusterd_proc_t; + +enum proc_flags { + PROC_NONE = 0, + PROC_START, + PROC_START_NO_WAIT, + PROC_STOP, + PROC_STOP_FORCE +}; + +struct glusterd_proc_ { + char name[NAME_MAX]; + char pidfile[PATH_MAX]; + char logdir[PATH_MAX]; + char logfile[PATH_MAX]; + char volfile[PATH_MAX]; + char volfileserver[PATH_MAX]; + char volfileid[256]; +}; + +int +glusterd_proc_init(glusterd_proc_t *proc, char *name, char *pidfile, + char *logdir, char *logfile, char *volfile, char *volfileid, + char *volfileserver); + +int +glusterd_proc_stop(glusterd_proc_t *proc, int sig, int flags); + +int +glusterd_proc_is_running(glusterd_proc_t *proc); +#endif diff --git a/xlators/mgmt/glusterd/src/glusterd-quota.c b/xlators/mgmt/glusterd/src/glusterd-quota.c new file mode 100644 index 00000000000..8370c174ce3 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-quota.c @@ -0,0 +1,2259 @@ +/* + Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#include <glusterfs/common-utils.h> +#include "cli1-xdr.h" +#include "xdr-generic.h" +#include "glusterd.h" +#include "glusterd-op-sm.h" +#include "glusterd-store.h" +#include "glusterd-utils.h" +#include "glusterd-quotad-svc.h" +#include "glusterd-volgen.h" +#include "glusterd-messages.h" +#include <glusterfs/run.h> +#include <glusterfs/syscall.h> +#include <glusterfs/byte-order.h> +#include <glusterfs/compat-errno.h> +#include <glusterfs/quota-common-utils.h> +#include "glusterd-quota.h" + +#include <sys/wait.h> +#include <dlfcn.h> + +#ifndef _PATH_SETFATTR +#ifdef GF_LINUX_HOST_OS +#define _PATH_SETFATTR "setfattr" +#endif +#ifdef __NetBSD__ +#define _PATH_SETFATTR "/usr/pkg/bin/setfattr" +#endif +#endif + +/* Any negative pid to make it special client */ +#define QUOTA_CRAWL_PID "-100" + +#define GLUSTERFS_GET_QUOTA_LIMIT_MOUNT_PIDFILE(pidfile, volname) \ + { \ + snprintf(pidfile, PATH_MAX - 1, \ + DEFAULT_VAR_RUN_DIRECTORY "/%s_quota_limit.pid", volname); \ + } + +#define GLUSTERFS_GET_QUOTA_LIST_MOUNT_PIDFILE(pidfile, volname) \ + { \ + snprintf(pidfile, PATH_MAX - 1, \ + DEFAULT_VAR_RUN_DIRECTORY "/%s_quota_list.pid", volname); \ + } + +#define GLUSTERD_GET_QUOTA_CRAWL_PIDDIR(piddir, volinfo, type) \ + do { \ + char _volpath[PATH_MAX] = { \ + 0, \ + }; \ + int32_t _crawl_pid_len; \ + GLUSTERD_GET_VOLUME_DIR(_volpath, volinfo, priv); \ + if (type == GF_QUOTA_OPTION_TYPE_ENABLE || \ + type == GF_QUOTA_OPTION_TYPE_ENABLE_OBJECTS) \ + _crawl_pid_len = snprintf(piddir, PATH_MAX, "%s/run/quota/enable", \ + _volpath); \ + else \ + _crawl_pid_len = snprintf(piddir, PATH_MAX, \ + "%s/run/quota/disable", _volpath); \ + if ((_crawl_pid_len < 0) || (_crawl_pid_len >= PATH_MAX)) { \ + piddir[0] = 0; \ + } \ + } while (0) + +#define GLUSTERD_GET_TMP_PATH(abspath, path) \ + do { \ + snprintf(abspath, sizeof(abspath) - 1, \ + DEFAULT_VAR_RUN_DIRECTORY "/tmp%s", path); \ + } while (0) + +#define GLUSTERD_GET_QUOTA_LIST_MOUNT_PATH(abspath, volname, path) \ + do { \ + snprintf(abspath, sizeof(abspath) - 1, \ + DEFAULT_VAR_RUN_DIRECTORY "/%s_quota_list%s", volname, path); \ + } while (0) + +const char *gd_quota_op_list[GF_QUOTA_OPTION_TYPE_MAX + 1] = { + [GF_QUOTA_OPTION_TYPE_NONE] = "none", + [GF_QUOTA_OPTION_TYPE_ENABLE] = "enable", + [GF_QUOTA_OPTION_TYPE_DISABLE] = "disable", + [GF_QUOTA_OPTION_TYPE_LIMIT_USAGE] = "limit-usage", + [GF_QUOTA_OPTION_TYPE_REMOVE] = "remove", + [GF_QUOTA_OPTION_TYPE_LIST] = "list", + [GF_QUOTA_OPTION_TYPE_VERSION] = "version", + [GF_QUOTA_OPTION_TYPE_ALERT_TIME] = "alert-time", + [GF_QUOTA_OPTION_TYPE_SOFT_TIMEOUT] = "soft-timeout", + [GF_QUOTA_OPTION_TYPE_HARD_TIMEOUT] = "hard-timeout", + [GF_QUOTA_OPTION_TYPE_DEFAULT_SOFT_LIMIT] = "default-soft-limit", + [GF_QUOTA_OPTION_TYPE_LIMIT_OBJECTS] = "limit-objects", + [GF_QUOTA_OPTION_TYPE_LIST_OBJECTS] = "list-objects", + [GF_QUOTA_OPTION_TYPE_REMOVE_OBJECTS] = "remove-objects", + [GF_QUOTA_OPTION_TYPE_ENABLE_OBJECTS] = "enable-objects", + [GF_QUOTA_OPTION_TYPE_UPGRADE] = "upgrade", + [GF_QUOTA_OPTION_TYPE_MAX] = NULL}; + +gf_boolean_t +glusterd_is_quota_supported(int32_t type, char **op_errstr) +{ + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + gf_boolean_t supported = _gf_false; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, out); + + if ((conf->op_version == GD_OP_VERSION_MIN) && + (type > GF_QUOTA_OPTION_TYPE_VERSION)) + goto out; + + if ((conf->op_version < GD_OP_VERSION_3_7_0) && + (type > GF_QUOTA_OPTION_TYPE_VERSION_OBJECTS)) + goto out; + + /* Quota Operations that change quota.conf shouldn't + * be allowed as the quota.conf format changes in 3.7 + */ + if ((conf->op_version < GD_OP_VERSION_3_7_0) && + (type == GF_QUOTA_OPTION_TYPE_ENABLE || + type == GF_QUOTA_OPTION_TYPE_LIMIT_USAGE || + type == GF_QUOTA_OPTION_TYPE_REMOVE)) + goto out; + + /* Quota xattr version implemented in 3.7.6 + * quota-version is incremented when quota is enabled + * Quota enable and disable performance enhancement has been done + * in version 3.7.12. + * so don't allow enabling/disabling quota in heterogeneous + * cluster during upgrade + */ + if (type == GF_QUOTA_OPTION_TYPE_ENABLE || + type == GF_QUOTA_OPTION_TYPE_ENABLE_OBJECTS || + type == GF_QUOTA_OPTION_TYPE_DISABLE) { + if (conf->op_version < GD_OP_VERSION_3_7_12) + goto out; + } + + supported = _gf_true; + +out: + if (!supported && op_errstr != NULL && conf) + gf_asprintf(op_errstr, + "Volume quota failed. The cluster is " + "operating at version %d. Quota command" + " %s is unavailable in this version.", + conf->op_version, gd_quota_op_list[type]); + + return supported; +} + +int +__glusterd_handle_quota(rpcsvc_request_t *req) +{ + int32_t ret = -1; + gf_cli_req cli_req = {{ + 0, + }}; + dict_t *dict = NULL; + glusterd_op_t cli_op = GD_OP_QUOTA; + char *volname = NULL; + int32_t type = 0; + char msg[2048] = { + 0, + }; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + + GF_ASSERT(req); + this = THIS; + GF_ASSERT(this); + conf = this->private; + GF_ASSERT(conf); + + ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + // failed to decode msg; + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (cli_req.dict.dict_len) { + /* Unserialize the dictionary */ + dict = dict_new(); + + ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + "failed to " + "unserialize req-buffer to dictionary"); + snprintf(msg, sizeof(msg), + "Unable to decode the " + "command"); + goto out; + } else { + dict->extra_stdfree = cli_req.dict.dict_val; + } + } + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + snprintf(msg, sizeof(msg), "Unable to get volume name"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get volume name, " + "while handling quota command"); + goto out; + } + + ret = dict_get_int32n(dict, "type", SLEN("type"), &type); + if (ret) { + snprintf(msg, sizeof(msg), "Unable to get type of command"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get type of cmd, " + "while handling quota command"); + goto out; + } + + if (!glusterd_is_quota_supported(type, NULL)) { + snprintf(msg, sizeof(msg), + "Volume quota failed. The cluster " + "is operating at version %d. Quota command" + " %s is unavailable in this version.", + conf->op_version, gd_quota_op_list[type]); + ret = -1; + goto out; + } + + ret = glusterd_op_begin_synctask(req, GD_OP_QUOTA, dict); + +out: + if (ret) { + if (msg[0] == '\0') + snprintf(msg, sizeof(msg), "Operation failed"); + ret = glusterd_op_send_cli_response(cli_op, ret, 0, req, dict, msg); + } + + return ret; +} + +int +glusterd_handle_quota(rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler(req, __glusterd_handle_quota); +} + +int32_t +glusterd_check_if_quota_trans_enabled(glusterd_volinfo_t *volinfo) +{ + int32_t ret = 0; + int flag = _gf_false; + + flag = glusterd_volinfo_get_boolean(volinfo, VKEY_FEATURES_QUOTA); + if (flag == -1) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_QUOTA_GET_STAT_FAIL, + "failed to get the quota status"); + ret = -1; + goto out; + } + + if (flag == _gf_false) { + ret = -1; + goto out; + } + ret = 0; +out: + return ret; +} + +int32_t +_glusterd_quota_initiate_fs_crawl(glusterd_conf_t *priv, + glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brick, int type, + char *pid_dir) +{ + pid_t pid; + int32_t ret = -1; + int status = 0; + char mountdir[PATH_MAX] = { + 0, + }; + char logfile[PATH_MAX] = { + 0, + }; + char brickpath[PATH_MAX] = { + 0, + }; + char vol_id[PATH_MAX] = { + 0, + }; + char pidfile[PATH_MAX] = { + 0, + }; + runner_t runner = {0}; + char *volfileserver = NULL; + FILE *pidfp = NULL; + int32_t len = 0; + + GF_VALIDATE_OR_GOTO("glusterd", THIS, out); + + GLUSTERD_GET_TMP_PATH(mountdir, "/"); + ret = sys_mkdir(mountdir, 0755); + if (ret && errno != EEXIST) { + gf_msg(THIS->name, GF_LOG_WARNING, errno, GD_MSG_MOUNT_REQ_FAIL, + "failed to create temporary " + "directory %s", + mountdir); + ret = -1; + goto out; + } + + strcat(mountdir, "mntXXXXXX"); + if (mkdtemp(mountdir) == NULL) { + gf_msg(THIS->name, GF_LOG_WARNING, errno, GD_MSG_MOUNT_REQ_FAIL, + "failed to create a temporary " + "mount directory: %s", + mountdir); + ret = -1; + goto out; + } + + GLUSTERD_REMOVE_SLASH_FROM_PATH(brick->path, brickpath); + len = snprintf(logfile, sizeof(logfile), + DEFAULT_QUOTA_CRAWL_LOG_DIRECTORY "/%s.log", brickpath); + if ((len < 0) || (len >= sizeof(vol_id))) { + ret = -1; + goto out; + } + + if (dict_get_strn(THIS->options, "transport.socket.bind-address", + SLEN("transport.socket.bind-address"), + &volfileserver) != 0) + volfileserver = "localhost"; + + len = snprintf(vol_id, sizeof(vol_id), "client_per_brick/%s.%s.%s.%s.vol", + volinfo->volname, "client", brick->hostname, brickpath); + if ((len < 0) || (len >= sizeof(vol_id))) { + ret = -1; + goto out; + } + + runinit(&runner); + + if (type == GF_QUOTA_OPTION_TYPE_ENABLE || + type == GF_QUOTA_OPTION_TYPE_ENABLE_OBJECTS) + runner_add_args(&runner, SBIN_DIR "/glusterfs", "-s", volfileserver, + "--volfile-id", vol_id, "--use-readdirp=yes", + "--client-pid", QUOTA_CRAWL_PID, "-l", logfile, + mountdir, NULL); + else + runner_add_args(&runner, SBIN_DIR "/glusterfs", "-s", volfileserver, + "--volfile-id", vol_id, "--use-readdirp=no", + "--client-pid", QUOTA_CRAWL_PID, "-l", logfile, + mountdir, NULL); + + synclock_unlock(&priv->big_lock); + ret = runner_run_reuse(&runner); + synclock_lock(&priv->big_lock); + if (ret == -1) { + runner_log(&runner, "glusterd", GF_LOG_DEBUG, "command failed"); + runner_end(&runner); + goto out; + } + runner_end(&runner); + + if ((pid = fork()) < 0) { + gf_msg(THIS->name, GF_LOG_WARNING, 0, GD_MSG_FORK_FAIL, + "fork from parent failed"); + gf_umount_lazy("glusterd", mountdir, 1); + ret = -1; + goto out; + } else if (pid == 0) { // first child + /* fork one more to not hold back main process on + * blocking call below + */ + pid = fork(); + if (pid < 0) { + gf_umount_lazy("glusterd", mountdir, 1); + _exit(EXIT_FAILURE); + } else if (pid > 0) { + _exit(EXIT_SUCCESS); + } + + ret = chdir(mountdir); + if (ret == -1) { + gf_msg(THIS->name, GF_LOG_WARNING, errno, GD_MSG_DIR_OP_FAILED, + "chdir %s failed", mountdir); + gf_umount_lazy("glusterd", mountdir, 1); + exit(EXIT_FAILURE); + } + runinit(&runner); + + if (type == GF_QUOTA_OPTION_TYPE_ENABLE || + type == GF_QUOTA_OPTION_TYPE_ENABLE_OBJECTS) + runner_add_args(&runner, "/usr/bin/find", ".", "-exec", + "/usr/bin/stat", "{}", "\\", ";", NULL); + + else if (type == GF_QUOTA_OPTION_TYPE_DISABLE) { +#if defined(GF_DARWIN_HOST_OS) + runner_add_args( + &runner, "/usr/bin/find", ".", "-exec", "/usr/bin/xattr", "-w", + VIRTUAL_QUOTA_XATTR_CLEANUP_KEY, "1", "{}", "\\", ";", NULL); +#elif defined(__FreeBSD__) + runner_add_args(&runner, "/usr/bin/find", ".", "-exec", + "/usr/sbin/setextattr", EXTATTR_NAMESPACE_USER, + VIRTUAL_QUOTA_XATTR_CLEANUP_KEY, "1", "{}", "\\", + ";", NULL); +#else + runner_add_args(&runner, "find", ".", "-exec", _PATH_SETFATTR, "-n", + VIRTUAL_QUOTA_XATTR_CLEANUP_KEY, "-v", "1", "{}", + "\\", ";", NULL); +#endif + } + + if (runner_start(&runner) == -1) { + gf_umount_lazy("glusterd", mountdir, 1); + _exit(EXIT_FAILURE); + } + + len = snprintf(pidfile, sizeof(pidfile), "%s/%s.pid", pid_dir, + brickpath); + if ((len >= 0) && (len < sizeof(pidfile))) { + pidfp = fopen(pidfile, "w"); + if (pidfp != NULL) { + fprintf(pidfp, "%d\n", runner.chpid); + fflush(pidfp); + fclose(pidfp); + } + } + +#ifndef GF_LINUX_HOST_OS + runner_end(&runner); /* blocks in waitpid */ +#endif + gf_umount_lazy("glusterd", mountdir, 1); + + _exit(EXIT_SUCCESS); + } + ret = (waitpid(pid, &status, 0) == pid && WIFEXITED(status) && + WEXITSTATUS(status) == EXIT_SUCCESS) + ? 0 + : -1; + +out: + return ret; +} + +void +glusterd_stop_all_quota_crawl_service(glusterd_conf_t *priv, + glusterd_volinfo_t *volinfo, int type) +{ + DIR *dir = NULL; + struct dirent *entry = NULL; + struct dirent scratch[2] = { + { + 0, + }, + }; + char pid_dir[PATH_MAX] = { + 0, + }; + char pidfile[PATH_MAX] = { + 0, + }; + int32_t len = 0; + + GLUSTERD_GET_QUOTA_CRAWL_PIDDIR(pid_dir, volinfo, type); + + dir = sys_opendir(pid_dir); + if (dir == NULL) + return; + + while ((entry = sys_readdir(dir, scratch))) { + if (gf_irrelevant_entry(entry)) + continue; + len = snprintf(pidfile, sizeof(pidfile), "%s/%s", pid_dir, + entry->d_name); + if ((len >= 0) && (len < sizeof(pidfile))) { + glusterd_service_stop_nolock("quota_crawl", pidfile, SIGKILL, + _gf_true); + sys_unlink(pidfile); + } + } + sys_closedir(dir); +} + +int32_t +glusterd_quota_initiate_fs_crawl(glusterd_conf_t *priv, + glusterd_volinfo_t *volinfo, int type) +{ + int32_t ret = -1; + glusterd_brickinfo_t *brick = NULL; + char pid_dir[PATH_MAX] = { + 0, + }; + + GF_VALIDATE_OR_GOTO("glusterd", THIS, out); + + ret = glusterd_generate_client_per_brick_volfile(volinfo); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_GLUSTERD_OP_FAILED, + "failed to generate client volume file"); + goto out; + } + + ret = mkdir_p(DEFAULT_QUOTA_CRAWL_LOG_DIRECTORY, 0755, _gf_true); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_GLUSTERD_OP_FAILED, + "failed to create dir %s: %s", DEFAULT_QUOTA_CRAWL_LOG_DIRECTORY, + strerror(errno)); + goto out; + } + + GLUSTERD_GET_QUOTA_CRAWL_PIDDIR(pid_dir, volinfo, type); + ret = mkdir_p(pid_dir, 0755, _gf_true); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_GLUSTERD_OP_FAILED, + "failed to create dir %s: %s", pid_dir, strerror(errno)); + goto out; + } + + /* When quota enable is performed, stop alreday running enable crawl + * process and start fresh crawl process. let disable process continue + * if running to cleanup the older xattrs + * When quota disable is performed, stop both enable/disable crawl + * process and start fresh crawl process to cleanup the xattrs + */ + glusterd_stop_all_quota_crawl_service(priv, volinfo, + GF_QUOTA_OPTION_TYPE_ENABLE); + if (type == GF_QUOTA_OPTION_TYPE_DISABLE) + glusterd_stop_all_quota_crawl_service(priv, volinfo, + GF_QUOTA_OPTION_TYPE_DISABLE); + + cds_list_for_each_entry(brick, &volinfo->bricks, brick_list) + { + if (gf_uuid_compare(brick->uuid, MY_UUID)) + continue; + + ret = _glusterd_quota_initiate_fs_crawl(priv, volinfo, brick, type, + pid_dir); + + if (ret) + goto out; + } + + ret = 0; +out: + return ret; +} + +int32_t +glusterd_quota_get_default_soft_limit(glusterd_volinfo_t *volinfo, + dict_t *rsp_dict) +{ + int32_t ret = 0; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + char *default_limit = NULL; + char *val = NULL; + + if (rsp_dict == NULL) + return -1; + + this = THIS; + GF_ASSERT(this); + conf = this->private; + GF_ASSERT(conf); + + ret = glusterd_volinfo_get(volinfo, "features.default-soft-limit", + &default_limit); + if (default_limit) + val = gf_strdup(default_limit); + else + val = gf_strdup("80%"); + + ret = dict_set_dynstr_sizen(rsp_dict, "default-soft-limit", val); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set default " + "soft-limit into dict"); + goto out; + } + ret = 0; + +out: + return ret; +} + +int32_t +glusterd_inode_quota_enable(glusterd_volinfo_t *volinfo, char **op_errstr, + gf_boolean_t *crawl) +{ + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + GF_VALIDATE_OR_GOTO(this->name, volinfo, out); + GF_VALIDATE_OR_GOTO(this->name, crawl, out); + GF_VALIDATE_OR_GOTO(this->name, op_errstr, out); + + if (glusterd_is_volume_started(volinfo) == 0) { + *op_errstr = gf_strdup( + "Volume is stopped, start volume " + "to enable inode quota."); + ret = -1; + goto out; + } + + ret = glusterd_check_if_quota_trans_enabled(volinfo); + if (ret != 0) { + *op_errstr = gf_strdup( + "Quota is disabled. Enabling quota " + "will enable inode quota"); + ret = -1; + goto out; + } + + if (glusterd_is_volume_inode_quota_enabled(volinfo)) { + *op_errstr = gf_strdup("Inode Quota is already enabled"); + ret = -1; + goto out; + } + + ret = dict_set_dynstr_with_alloc(volinfo->dict, VKEY_FEATURES_INODE_QUOTA, + "on"); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "dict set failed"); + goto out; + } + + *crawl = _gf_true; + + ret = glusterd_store_quota_config( + volinfo, NULL, NULL, GF_QUOTA_OPTION_TYPE_ENABLE_OBJECTS, op_errstr); + + ret = 0; +out: + if (ret && op_errstr && !*op_errstr) + gf_asprintf(op_errstr, + "Enabling inode quota on volume %s has " + "been unsuccessful", + volinfo->volname); + return ret; +} + +int32_t +glusterd_quota_enable(glusterd_volinfo_t *volinfo, char **op_errstr, + gf_boolean_t *crawl) +{ + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + GF_VALIDATE_OR_GOTO(this->name, volinfo, out); + GF_VALIDATE_OR_GOTO(this->name, crawl, out); + GF_VALIDATE_OR_GOTO(this->name, op_errstr, out); + + if (glusterd_is_volume_started(volinfo) == 0) { + *op_errstr = gf_strdup( + "Volume is stopped, start volume " + "to enable quota."); + ret = -1; + goto out; + } + + ret = glusterd_check_if_quota_trans_enabled(volinfo); + if (ret == 0) { + *op_errstr = gf_strdup("Quota is already enabled"); + ret = -1; + goto out; + } + + ret = dict_set_dynstr_with_alloc(volinfo->dict, VKEY_FEATURES_QUOTA, "on"); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "dict set failed"); + goto out; + } + + ret = dict_set_dynstr_with_alloc(volinfo->dict, VKEY_FEATURES_INODE_QUOTA, + "on"); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "dict set failed"); + goto out; + } + + ret = dict_set_dynstr_with_alloc(volinfo->dict, + "features.quota-deem-statfs", "on"); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "setting quota-deem-statfs" + "in volinfo failed"); + goto out; + } + + *crawl = _gf_true; + + ret = glusterd_store_quota_config(volinfo, NULL, NULL, + GF_QUOTA_OPTION_TYPE_ENABLE, op_errstr); + + ret = 0; +out: + if (ret && op_errstr && !*op_errstr) + gf_asprintf(op_errstr, + "Enabling quota on volume %s has been " + "unsuccessful", + volinfo->volname); + return ret; +} + +int32_t +glusterd_quota_disable(glusterd_volinfo_t *volinfo, char **op_errstr, + gf_boolean_t *crawl) +{ + int32_t ret = -1; + int i = 0; + char *value = NULL; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + char *quota_options[] = {"features.soft-timeout", + "features.hard-timeout", + "features.alert-time", + "features.default-soft-limit", + "features.quota-deem-statfs", + "features.quota-timeout", + NULL}; + + this = THIS; + GF_ASSERT(this); + conf = this->private; + GF_ASSERT(conf); + + GF_VALIDATE_OR_GOTO(this->name, volinfo, out); + GF_VALIDATE_OR_GOTO(this->name, op_errstr, out); + + ret = glusterd_check_if_quota_trans_enabled(volinfo); + if (ret == -1) { + *op_errstr = gf_strdup("Quota is already disabled"); + goto out; + } + + ret = dict_set_dynstr_with_alloc(volinfo->dict, VKEY_FEATURES_QUOTA, "off"); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "dict set failed"); + goto out; + } + + ret = dict_set_dynstr_with_alloc(volinfo->dict, VKEY_FEATURES_INODE_QUOTA, + "off"); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "dict set failed"); + goto out; + } + + for (i = 0; quota_options[i]; i++) { + ret = glusterd_volinfo_get(volinfo, quota_options[i], &value); + if (ret) { + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_VOLINFO_GET_FAIL, + "failed to get option" + " %s", + quota_options[i]); + } else { + dict_del(volinfo->dict, quota_options[i]); + } + } + + *crawl = _gf_true; + + (void)glusterd_clean_up_quota_store(volinfo); + + ret = 0; +out: + if (ret && op_errstr && !*op_errstr) + gf_asprintf(op_errstr, + "Disabling quota on volume %s has been " + "unsuccessful", + volinfo->volname); + return ret; +} + +static int +glusterd_set_quota_limit(char *volname, char *path, char *hard_limit, + char *soft_limit, char *key, char **op_errstr) +{ + int ret = -1; + xlator_t *this = NULL; + char abspath[PATH_MAX] = { + 0, + }; + glusterd_conf_t *priv = NULL; + quota_limits_t existing_limit = { + 0, + }; + quota_limits_t new_limit = { + 0, + }; + double soft_limit_double = 0; + int64_t local_hl = 0; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + GLUSTERD_GET_QUOTA_LIMIT_MOUNT_PATH(abspath, volname, path); + ret = gf_lstat_dir(abspath, NULL); + if (ret) { + gf_asprintf(op_errstr, + "Failed to find the directory %s. " + "Reason : %s", + abspath, strerror(errno)); + goto out; + } + + if (!soft_limit) { + ret = sys_lgetxattr(abspath, key, (void *)&existing_limit, + sizeof(existing_limit)); + if (ret < 0) { + switch (errno) { +#if defined(ENOATTR) && (ENOATTR != ENODATA) + case ENODATA: /* FALLTHROUGH */ +#endif + case ENOATTR: + existing_limit.sl = -1; + break; + default: + gf_asprintf(op_errstr, + "Failed to get the " + "xattr %s from %s. Reason : %s", + key, abspath, strerror(errno)); + goto out; + } + } else { + existing_limit.hl = ntoh64(existing_limit.hl); + existing_limit.sl = ntoh64(existing_limit.sl); + } + new_limit.sl = existing_limit.sl; + + } else { + ret = gf_string2percent(soft_limit, &soft_limit_double); + if (ret) + goto out; + new_limit.sl = soft_limit_double; + } + + new_limit.sl = hton64(new_limit.sl); + + ret = gf_string2bytesize_int64(hard_limit, &local_hl); + if (ret) + goto out; + + new_limit.hl = hton64(local_hl); + + ret = sys_lsetxattr(abspath, key, (char *)(void *)&new_limit, + sizeof(new_limit), 0); + if (ret == -1) { + gf_asprintf(op_errstr, + "setxattr of %s failed on %s." + " Reason : %s", + key, abspath, strerror(errno)); + goto out; + } + ret = 0; + +out: + return ret; +} + +static int +glusterd_update_quota_conf_version(glusterd_volinfo_t *volinfo) +{ + volinfo->quota_conf_version++; + return 0; +} + +/*The function glusterd_find_gfid_match () does the following: + * Given a buffer of gfids, the number of bytes read and the key gfid that needs + * to be found, the function compares 16 bytes at a time from @buf against + * @gfid. + * + * What happens when the match is found: + * i. If the function was called as part of 'limit-usage' operation, the call + * returns with write_byte_count = bytes_read + *ii. If the function as called as part of 'quota remove' operation, @buf + * is modified in memory such that the match is deleted from the buffer, and + * also @write_byte_count is set to original buf size minus the sixteen bytes + * that was deleted as part of 'remove'. + * + * What happens when the match is not found in the current buffer: + * The function returns with write_byte_count = bytes_read, which means to say + * that the caller of this function must write the entire buffer to the tmp file + * and continue the search. + */ +static gf_boolean_t +glusterd_find_gfid_match_3_6(uuid_t gfid, unsigned char *buf, size_t bytes_read, + int opcode, size_t *write_byte_count) +{ + int gfid_index = 0; + int shift_count = 0; + unsigned char tmp_buf[17] = { + 0, + }; + + /* This function if for backward compatibility */ + + while (gfid_index != bytes_read) { + memcpy((void *)tmp_buf, (void *)&buf[gfid_index], 16); + if (!gf_uuid_compare(gfid, tmp_buf)) { + if (opcode == GF_QUOTA_OPTION_TYPE_REMOVE) { + shift_count = bytes_read - (gfid_index + 16); + memmove((void *)&buf[gfid_index], (void *)&buf[gfid_index + 16], + shift_count); + *write_byte_count = bytes_read - 16; + } else { + *write_byte_count = bytes_read; + } + return _gf_true; + } else { + gfid_index += 16; + } + } + if (gfid_index == bytes_read) + *write_byte_count = bytes_read; + + return _gf_false; +} + +static gf_boolean_t +glusterd_find_gfid_match(uuid_t gfid, char gfid_type, unsigned char *buf, + size_t bytes_read, int opcode, + size_t *write_byte_count) +{ + int gfid_index = 0; + int shift_count = 0; + unsigned char tmp_buf[17] = { + 0, + }; + char type = 0; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, out); + + if (conf->op_version < GD_OP_VERSION_3_7_0) + return glusterd_find_gfid_match_3_6(gfid, buf, bytes_read, opcode, + write_byte_count); + + while (gfid_index != bytes_read) { + memcpy((void *)tmp_buf, (void *)&buf[gfid_index], 16); + type = buf[gfid_index + 16]; + + if (!gf_uuid_compare(gfid, tmp_buf) && type == gfid_type) { + if (opcode == GF_QUOTA_OPTION_TYPE_REMOVE || + opcode == GF_QUOTA_OPTION_TYPE_REMOVE_OBJECTS) { + shift_count = bytes_read - (gfid_index + 17); + memmove((void *)&buf[gfid_index], (void *)&buf[gfid_index + 17], + shift_count); + *write_byte_count = bytes_read - 17; + } else { + *write_byte_count = bytes_read; + } + return _gf_true; + } else { + gfid_index += 17; + } + } + if (gfid_index == bytes_read) + *write_byte_count = bytes_read; + +out: + + return _gf_false; +} + +/* The function glusterd_copy_to_tmp_file() reads the "remaining" bytes from + * the source fd and writes them to destination fd, at the rate of 1000 entries + * a time (qconf_line_sz is the size of an entry) + */ + +static int +glusterd_copy_to_tmp_file(int src_fd, int dst_fd, int qconf_line_sz) +{ + int ret = 0; + ssize_t bytes_read = 0; + xlator_t *this = NULL; + unsigned char *buf = 0; + int buf_sz = qconf_line_sz * 1000; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(buf_sz > 0); + + buf = GF_CALLOC(buf_sz, 1, gf_common_mt_char); + if (!buf) { + ret = -1; + goto out; + } + + while ((bytes_read = sys_read(src_fd, buf, buf_sz)) > 0) { + if (bytes_read % qconf_line_sz != 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_QUOTA_CONF_CORRUPT, + "quota.conf " + "corrupted"); + ret = -1; + goto out; + } + ret = sys_write(dst_fd, (void *)buf, bytes_read); + if (ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, errno, + GD_MSG_QUOTA_CONF_WRITE_FAIL, + "write into quota.conf failed."); + goto out; + } + } + ret = 0; + +out: + if (buf) + GF_FREE(buf); + return ret; +} + +int +glusterd_store_quota_conf_upgrade(glusterd_volinfo_t *volinfo) +{ + int ret = -1; + int fd = -1; + int conf_fd = -1; + unsigned char gfid[17] = { + 0, + }; + xlator_t *this = NULL; + char type = 0; + + this = THIS; + GF_ASSERT(this); + + fd = gf_store_mkstemp(volinfo->quota_conf_shandle); + if (fd < 0) { + ret = -1; + goto out; + } + + conf_fd = open(volinfo->quota_conf_shandle->path, O_RDONLY); + if (conf_fd == -1) { + ret = -1; + goto out; + } + + ret = quota_conf_skip_header(conf_fd); + if (ret) + goto out; + + ret = glusterd_quota_conf_write_header(fd); + if (ret) + goto out; + + while (1) { + ret = quota_conf_read_gfid(conf_fd, gfid, &type, 1.1); + if (ret == 0) + break; + else if (ret < 0) + goto out; + + ret = glusterd_quota_conf_write_gfid(fd, gfid, + GF_QUOTA_CONF_TYPE_USAGE); + if (ret < 0) + goto out; + } + +out: + if (conf_fd != -1) + sys_close(conf_fd); + + if (ret && (fd > 0)) { + gf_store_unlink_tmppath(volinfo->quota_conf_shandle); + } else if (!ret) { + ret = gf_store_rename_tmppath(volinfo->quota_conf_shandle); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "Failed to rename " + "quota conf file"); + return ret; + } + + ret = glusterd_compute_cksum(volinfo, _gf_true); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CKSUM_COMPUTE_FAIL, + "Failed to " + "compute cksum for quota conf file"); + return ret; + } + + ret = glusterd_store_save_quota_version_and_cksum(volinfo); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_QUOTA_CKSUM_VER_STORE_FAIL, + "Failed to " + "store quota version and cksum"); + } + + return ret; +} + +int +glusterd_store_quota_config(glusterd_volinfo_t *volinfo, char *path, + char *gfid_str, int opcode, char **op_errstr) +{ + int ret = -1; + int fd = -1; + int conf_fd = -1; + ssize_t bytes_read = 0; + size_t bytes_to_write = 0; + uuid_t gfid = { + 0, + }; + xlator_t *this = NULL; + gf_boolean_t found = _gf_false; + gf_boolean_t modified = _gf_false; + gf_boolean_t is_file_empty = _gf_false; + gf_boolean_t is_first_read = _gf_true; + glusterd_conf_t *conf = NULL; + float version = 0.0f; + char type = 0; + int quota_conf_line_sz = 16; + unsigned char *buf = 0; + int buf_sz = 0; + + this = THIS; + GF_ASSERT(this); + conf = this->private; + GF_ASSERT(conf); + + glusterd_store_create_quota_conf_sh_on_absence(volinfo); + + conf_fd = open(volinfo->quota_conf_shandle->path, O_RDONLY); + if (conf_fd == -1) { + ret = -1; + goto out; + } + + ret = quota_conf_read_version(conf_fd, &version); + if (ret) + goto out; + + if (version < 1.2f && conf->op_version >= GD_OP_VERSION_3_7_0) { + /* Upgrade quota.conf file to newer format */ + sys_close(conf_fd); + conf_fd = -1; + + ret = glusterd_store_quota_conf_upgrade(volinfo); + if (ret) + goto out; + + if (GF_QUOTA_OPTION_TYPE_UPGRADE == opcode) { + /* Nothing more to be done here */ + goto out; + } + + conf_fd = open(volinfo->quota_conf_shandle->path, O_RDONLY); + if (conf_fd == -1) { + ret = -1; + goto out; + } + + ret = quota_conf_skip_header(conf_fd); + if (ret) + goto out; + } else if (GF_QUOTA_OPTION_TYPE_UPGRADE == opcode) { + /* No change to be done in quota_conf*/ + goto out; + } + + /* If op-ver is gt 3.7, then quota.conf will be upgraded, and 17 bytes + * storted in the new format. 16 bytes uuid and + * 1 byte type (usage/object) + */ + if (conf->op_version >= GD_OP_VERSION_3_7_0) + quota_conf_line_sz++; + + buf_sz = quota_conf_line_sz * 1000; + + buf = GF_CALLOC(buf_sz, 1, gf_common_mt_char); + if (!buf) { + ret = -1; + goto out; + } + + fd = gf_store_mkstemp(volinfo->quota_conf_shandle); + if (fd < 0) { + ret = -1; + goto out; + } + + ret = glusterd_quota_conf_write_header(fd); + if (ret) + goto out; + + /* Just create empty quota.conf file if create */ + if (GF_QUOTA_OPTION_TYPE_ENABLE == opcode || + GF_QUOTA_OPTION_TYPE_ENABLE_OBJECTS == opcode) { + modified = _gf_true; + goto out; + } + + /* Check if gfid_str is given for opts other than ENABLE */ + if (!gfid_str) { + ret = -1; + goto out; + } + gf_uuid_parse(gfid_str, gfid); + + if (opcode > GF_QUOTA_OPTION_TYPE_VERSION_OBJECTS) + type = GF_QUOTA_CONF_TYPE_OBJECTS; + else + type = GF_QUOTA_CONF_TYPE_USAGE; + + for (;;) { + bytes_read = sys_read(conf_fd, buf, buf_sz); + if (bytes_read <= 0) { + /*The flag @is_first_read is TRUE when the loop is + * entered, and is set to false if the first read + * reads non-zero bytes of data. The flag is used to + * detect if quota.conf is an empty file, but for the + * header. This is done to log appropriate error message + * when 'quota remove' is attempted when there are no + * limits set on the given volume. + */ + if (is_first_read) + is_file_empty = _gf_true; + break; + } + if ((bytes_read % quota_conf_line_sz) != 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_QUOTA_CONF_CORRUPT, + "quota.conf " + "corrupted"); + ret = -1; + goto out; + } + found = glusterd_find_gfid_match(gfid, type, buf, bytes_read, opcode, + &bytes_to_write); + + ret = sys_write(fd, (void *)buf, bytes_to_write); + if (ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, errno, + GD_MSG_QUOTA_CONF_WRITE_FAIL, + "write into quota.conf failed."); + goto out; + } + + /*If the match is found in this iteration, copy the rest of + * quota.conf into quota.conf.tmp and break. + * Else continue with the search. + */ + if (found) { + ret = glusterd_copy_to_tmp_file(conf_fd, fd, quota_conf_line_sz); + if (ret) + goto out; + break; + } + is_first_read = _gf_false; + } + + switch (opcode) { + case GF_QUOTA_OPTION_TYPE_LIMIT_USAGE: + if (!found) { + ret = glusterd_quota_conf_write_gfid(fd, gfid, + GF_QUOTA_CONF_TYPE_USAGE); + if (ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, errno, + GD_MSG_QUOTA_CONF_WRITE_FAIL, + "write into quota.conf failed. "); + goto out; + } + modified = _gf_true; + } + break; + case GF_QUOTA_OPTION_TYPE_LIMIT_OBJECTS: + if (!found) { + ret = glusterd_quota_conf_write_gfid( + fd, gfid, GF_QUOTA_CONF_TYPE_OBJECTS); + if (ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, errno, + GD_MSG_QUOTA_CONF_WRITE_FAIL, + "write into quota.conf failed. "); + goto out; + } + modified = _gf_true; + } + break; + + case GF_QUOTA_OPTION_TYPE_REMOVE: + case GF_QUOTA_OPTION_TYPE_REMOVE_OBJECTS: + if (is_file_empty) { + gf_asprintf(op_errstr, + "Cannot remove limit on" + " %s. The quota configuration file" + " for volume %s is empty.", + path, volinfo->volname); + ret = -1; + goto out; + } else { + if (!found) { + gf_asprintf(op_errstr, + "Error. gfid %s" + " for path %s not found in" + " store", + gfid_str, path); + ret = -1; + goto out; + } else { + modified = _gf_true; + } + } + break; + + default: + ret = 0; + break; + } + + if (modified) + glusterd_update_quota_conf_version(volinfo); + + ret = 0; +out: + if (conf_fd != -1) { + sys_close(conf_fd); + } + + if (buf) + GF_FREE(buf); + + if (ret && (fd > 0)) { + gf_store_unlink_tmppath(volinfo->quota_conf_shandle); + } else if (!ret && GF_QUOTA_OPTION_TYPE_UPGRADE != opcode) { + ret = gf_store_rename_tmppath(volinfo->quota_conf_shandle); + if (modified) { + ret = glusterd_compute_cksum(volinfo, _gf_true); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CKSUM_COMPUTE_FAIL, + "Failed to " + "compute cksum for quota conf file"); + return ret; + } + + ret = glusterd_store_save_quota_version_and_cksum(volinfo); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_VERS_CKSUM_STORE_FAIL, + "Failed to " + "store quota version and cksum"); + } + } + return ret; +} + +int32_t +glusterd_quota_limit_usage(glusterd_volinfo_t *volinfo, dict_t *dict, + int opcode, char **op_errstr) +{ + int32_t ret = -1; + char *path = NULL; + char *hard_limit = NULL; + char *soft_limit = NULL; + char *gfid_str = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + GF_VALIDATE_OR_GOTO(this->name, dict, out); + GF_VALIDATE_OR_GOTO(this->name, volinfo, out); + GF_VALIDATE_OR_GOTO(this->name, op_errstr, out); + + ret = glusterd_check_if_quota_trans_enabled(volinfo); + if (ret == -1) { + *op_errstr = gf_strdup( + "Quota is disabled, please enable " + "quota"); + goto out; + } + + ret = dict_get_strn(dict, "path", SLEN("path"), &path); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to fetch path"); + goto out; + } + ret = gf_canonicalize_path(path); + if (ret) + goto out; + + ret = dict_get_strn(dict, "hard-limit", SLEN("hard-limit"), &hard_limit); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to fetch hard limit"); + goto out; + } + + if (dict_getn(dict, "soft-limit", SLEN("soft-limit"))) { + ret = dict_get_strn(dict, "soft-limit", SLEN("soft-limit"), + &soft_limit); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to fetch " + "soft limit"); + goto out; + } + } + + if (is_origin_glusterd(dict)) { + if (opcode == GF_QUOTA_OPTION_TYPE_LIMIT_USAGE) { + ret = glusterd_set_quota_limit(volinfo->volname, path, hard_limit, + soft_limit, QUOTA_LIMIT_KEY, + op_errstr); + } else { + ret = glusterd_set_quota_limit(volinfo->volname, path, hard_limit, + soft_limit, QUOTA_LIMIT_OBJECTS_KEY, + op_errstr); + } + if (ret) + goto out; + } + + ret = dict_get_strn(dict, "gfid", SLEN("gfid"), &gfid_str); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get gfid of path " + "%s", + path); + goto out; + } + + ret = glusterd_store_quota_config(volinfo, path, gfid_str, opcode, + op_errstr); + if (ret) + goto out; + + ret = 0; +out: + + if (ret && op_errstr && !*op_errstr) + gf_asprintf(op_errstr, + "Failed to set hard limit on path %s " + "for volume %s", + path, volinfo->volname); + return ret; +} + +static int +glusterd_remove_quota_limit(char *volname, char *path, char **op_errstr, + int type) +{ + int ret = -1; + xlator_t *this = NULL; + char abspath[PATH_MAX] = { + 0, + }; + glusterd_conf_t *priv = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + GLUSTERD_GET_QUOTA_LIMIT_MOUNT_PATH(abspath, volname, path); + ret = gf_lstat_dir(abspath, NULL); + if (ret) { + gf_asprintf(op_errstr, + "Failed to find the directory %s. " + "Reason : %s", + abspath, strerror(errno)); + goto out; + } + + if (type == GF_QUOTA_OPTION_TYPE_REMOVE) { + ret = sys_lremovexattr(abspath, QUOTA_LIMIT_KEY); + if (ret) { + gf_asprintf(op_errstr, + "removexattr failed on %s. " + "Reason : %s", + abspath, strerror(errno)); + goto out; + } + } + + if (type == GF_QUOTA_OPTION_TYPE_REMOVE_OBJECTS) { + ret = sys_lremovexattr(abspath, QUOTA_LIMIT_OBJECTS_KEY); + if (ret) { + gf_asprintf(op_errstr, + "removexattr failed on %s. " + "Reason : %s", + abspath, strerror(errno)); + goto out; + } + } + ret = 0; + +out: + return ret; +} + +int32_t +glusterd_quota_remove_limits(glusterd_volinfo_t *volinfo, dict_t *dict, + int opcode, char **op_errstr, int type) +{ + int32_t ret = -1; + char *path = NULL; + char *gfid_str = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + GF_VALIDATE_OR_GOTO(this->name, dict, out); + GF_VALIDATE_OR_GOTO(this->name, volinfo, out); + GF_VALIDATE_OR_GOTO(this->name, op_errstr, out); + + ret = glusterd_check_if_quota_trans_enabled(volinfo); + if (ret == -1) { + *op_errstr = gf_strdup( + "Quota is disabled, please enable " + "quota"); + goto out; + } + + ret = dict_get_strn(dict, "path", SLEN("path"), &path); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to fetch path"); + goto out; + } + + ret = gf_canonicalize_path(path); + if (ret) + goto out; + + if (is_origin_glusterd(dict)) { + ret = glusterd_remove_quota_limit(volinfo->volname, path, op_errstr, + type); + if (ret) + goto out; + } + + ret = dict_get_strn(dict, "gfid", SLEN("gfid"), &gfid_str); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get gfid of path " + "%s", + path); + goto out; + } + + ret = glusterd_store_quota_config(volinfo, path, gfid_str, opcode, + op_errstr); + if (ret) + goto out; + + ret = 0; + +out: + return ret; +} + +int +glusterd_set_quota_option(glusterd_volinfo_t *volinfo, dict_t *dict, char *key, + char **op_errstr) +{ + int ret = 0; + char *value = NULL; + xlator_t *this = NULL; + char *option = NULL; + + this = THIS; + GF_ASSERT(this); + + ret = glusterd_check_if_quota_trans_enabled(volinfo); + if (ret == -1) { + gf_asprintf(op_errstr, + "Cannot set %s. Quota on volume %s is " + "disabled", + key, volinfo->volname); + return -1; + } + + ret = dict_get_strn(dict, "value", SLEN("value"), &value); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Option value absent."); + return -1; + } + + option = gf_strdup(value); + ret = dict_set_dynstr(volinfo->dict, key, option); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to set option %s", key); + return -1; + } + + return 0; +} + +static int +glusterd_quotad_op(int opcode) +{ + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + GF_ASSERT(priv); + + switch (opcode) { + case GF_QUOTA_OPTION_TYPE_ENABLE: + case GF_QUOTA_OPTION_TYPE_DISABLE: + + if (glusterd_all_volumes_with_quota_stopped()) + ret = glusterd_svc_stop(&(priv->quotad_svc), SIGTERM); + else + ret = priv->quotad_svc.manager(&(priv->quotad_svc), NULL, + PROC_START); + break; + + default: + ret = 0; + break; + } + return ret; +} + +int +glusterd_op_quota(dict_t *dict, char **op_errstr, dict_t *rsp_dict) +{ + glusterd_volinfo_t *volinfo = NULL; + int32_t ret = -1; + char *volname = NULL; + int type = -1; + gf_boolean_t start_crawl = _gf_false; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + GF_ASSERT(dict); + GF_ASSERT(op_errstr); + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get volume name"); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + gf_asprintf(op_errstr, FMTSTR_CHECK_VOL_EXISTS, volname); + goto out; + } + + ret = dict_get_int32n(dict, "type", SLEN("type"), &type); + + if (!glusterd_is_quota_supported(type, op_errstr)) { + ret = -1; + goto out; + } + + switch (type) { + case GF_QUOTA_OPTION_TYPE_ENABLE: + ret = glusterd_quota_enable(volinfo, op_errstr, &start_crawl); + if (ret < 0) + goto out; + break; + + case GF_QUOTA_OPTION_TYPE_ENABLE_OBJECTS: + ret = glusterd_inode_quota_enable(volinfo, op_errstr, &start_crawl); + if (ret < 0) + goto out; + break; + + case GF_QUOTA_OPTION_TYPE_DISABLE: + ret = glusterd_quota_disable(volinfo, op_errstr, &start_crawl); + if (ret < 0) + goto out; + + break; + + case GF_QUOTA_OPTION_TYPE_LIMIT_USAGE: + case GF_QUOTA_OPTION_TYPE_LIMIT_OBJECTS: + ret = glusterd_quota_limit_usage(volinfo, dict, type, op_errstr); + goto out; + + case GF_QUOTA_OPTION_TYPE_REMOVE: + case GF_QUOTA_OPTION_TYPE_REMOVE_OBJECTS: + ret = glusterd_quota_remove_limits(volinfo, dict, type, op_errstr, + type); + goto out; + + case GF_QUOTA_OPTION_TYPE_LIST: + case GF_QUOTA_OPTION_TYPE_LIST_OBJECTS: + ret = glusterd_check_if_quota_trans_enabled(volinfo); + if (ret == -1) { + *op_errstr = gf_strdup( + "Cannot list limits, " + "quota is disabled"); + goto out; + } + ret = glusterd_quota_get_default_soft_limit(volinfo, rsp_dict); + goto out; + + case GF_QUOTA_OPTION_TYPE_SOFT_TIMEOUT: + ret = glusterd_set_quota_option(volinfo, dict, + "features.soft-timeout", op_errstr); + if (ret) + goto out; + break; + + case GF_QUOTA_OPTION_TYPE_HARD_TIMEOUT: + ret = glusterd_set_quota_option(volinfo, dict, + "features.hard-timeout", op_errstr); + if (ret) + goto out; + break; + + case GF_QUOTA_OPTION_TYPE_ALERT_TIME: + ret = glusterd_set_quota_option(volinfo, dict, + "features.alert-time", op_errstr); + if (ret) + goto out; + break; + + case GF_QUOTA_OPTION_TYPE_DEFAULT_SOFT_LIMIT: + ret = glusterd_set_quota_option( + volinfo, dict, "features.default-soft-limit", op_errstr); + if (ret) + goto out; + break; + + default: + gf_asprintf(op_errstr, + "Quota command failed. Invalid " + "opcode"); + ret = -1; + goto out; + } + + if (priv->op_version > GD_OP_VERSION_MIN) { + ret = glusterd_quotad_op(type); + if (ret) + goto out; + } + + if (GF_QUOTA_OPTION_TYPE_ENABLE == type) + volinfo->quota_xattr_version++; + ret = glusterd_store_volinfo(volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) { + if (GF_QUOTA_OPTION_TYPE_ENABLE == type) + volinfo->quota_xattr_version--; + goto out; + } + + ret = glusterd_create_volfiles_and_notify_services(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL, + "Unable to re-create " + "volfiles"); + if (GF_QUOTA_OPTION_TYPE_ENABLE == type) { + /* rollback volinfo */ + volinfo->quota_xattr_version--; + ret = glusterd_store_volinfo(volinfo, + GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_SET_FAIL, + "Failed to store volinfo for volume %s", + volinfo->volname); + } + } + + ret = -1; + goto out; + } + +#if BUILD_GNFS + if (GLUSTERD_STATUS_STARTED == volinfo->status) { + if (priv->op_version == GD_OP_VERSION_MIN) + (void)priv->nfs_svc.manager(&(priv->nfs_svc), NULL, 0); + } +#endif + + if (rsp_dict && start_crawl == _gf_true) + glusterd_quota_initiate_fs_crawl(priv, volinfo, type); + + ret = 0; +out: + if (type == GF_QUOTA_OPTION_TYPE_LIMIT_USAGE || + type == GF_QUOTA_OPTION_TYPE_LIMIT_OBJECTS || + type == GF_QUOTA_OPTION_TYPE_REMOVE || + type == GF_QUOTA_OPTION_TYPE_REMOVE_OBJECTS) { + /* During a list operation we need the aux mount to be + * accessible until the listing is done at the cli + */ + glusterd_remove_auxiliary_mount(volinfo->volname); + } + + return ret; +} + +/* + * glusterd_get_gfid_from_brick() fetches the 'trusted.gfid' attribute of @path + * from each brick in the backend and places the same in the rsp_dict with the + * keys being gfid0, gfid1, gfid2 and so on. The absence of @path in the backend + * is not treated as error. + */ +static int +glusterd_get_gfid_from_brick(dict_t *dict, glusterd_volinfo_t *volinfo, + dict_t *rsp_dict, char **op_errstr) +{ + int ret = -1; + int count = 0; + char *path = NULL; + char backend_path[PATH_MAX] = { + 0, + }; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + char key[64] = { + 0, + }; + int keylen; + char *gfid_str = NULL; + uuid_t gfid; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + ret = dict_get_strn(dict, "path", SLEN("path"), &path); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get path"); + goto out; + } + + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + ret = glusterd_resolve_brick(brickinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RESOLVE_BRICK_FAIL, + FMTSTR_RESOLVE_BRICK, brickinfo->hostname, brickinfo->path); + goto out; + } + + if (gf_uuid_compare(brickinfo->uuid, MY_UUID)) + continue; + + if (brickinfo->vg[0]) + continue; + + snprintf(backend_path, sizeof(backend_path), "%s%s", brickinfo->path, + path); + + ret = gf_lstat_dir(backend_path, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_INFO, errno, GD_MSG_DIR_OP_FAILED, + "Failed to find " + "directory %s.", + backend_path); + ret = 0; + continue; + } + ret = sys_lgetxattr(backend_path, GFID_XATTR_KEY, gfid, 16); + if (ret < 0) { + gf_smsg(this->name, GF_LOG_INFO, errno, GD_MSG_GET_XATTR_FAIL, + "Attribute=%s, Directory=%s", GFID_XATTR_KEY, backend_path, + NULL); + ret = 0; + continue; + } + keylen = snprintf(key, sizeof(key), "gfid%d", count); + + gfid_str = gf_strdup(uuid_utoa(gfid)); + if (!gfid_str) { + ret = -1; + goto out; + } + + ret = dict_set_dynstrn(rsp_dict, key, keylen, gfid_str); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to place " + "gfid of %s in dict", + backend_path); + GF_FREE(gfid_str); + goto out; + } + count++; + } + + ret = dict_set_int32n(rsp_dict, "count", SLEN("count"), count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set count"); + goto out; + } + + ret = 0; +out: + return ret; +} + +static int +_glusterd_validate_quota_opts(dict_t *dict, int type, char **errstr) +{ + int ret = -1; + xlator_t *this = THIS; + void *quota_xl = NULL; + volume_opt_list_t opt_list = { + {0}, + }; + volume_option_t *opt = NULL; + char *key = NULL; + char *value = NULL; + + GF_ASSERT(dict); + GF_ASSERT(this); + + ret = xlator_volopt_dynload("features/quota", "a_xl, &opt_list); + if (ret) + goto out; + + switch (type) { + case GF_QUOTA_OPTION_TYPE_SOFT_TIMEOUT: + case GF_QUOTA_OPTION_TYPE_HARD_TIMEOUT: + case GF_QUOTA_OPTION_TYPE_ALERT_TIME: + case GF_QUOTA_OPTION_TYPE_DEFAULT_SOFT_LIMIT: + key = (char *)gd_quota_op_list[type]; + break; + default: + ret = -1; + goto out; + } + + opt = xlator_volume_option_get_list(&opt_list, key); + if (!opt) { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_UNKNOWN_KEY, + "Unknown option: %s", key); + goto out; + } + ret = dict_get_strn(dict, "value", SLEN("value"), &value); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Value not found for key %s", key); + goto out; + } + + ret = xlator_option_validate(this, key, value, opt, errstr); + +out: + if (quota_xl) { + dlclose(quota_xl); + quota_xl = NULL; + } + return ret; +} + +static int +glusterd_create_quota_auxiliary_mount(xlator_t *this, char *volname, int type) +{ + int ret = -1; + char mountdir[PATH_MAX] = { + 0, + }; + char pidfile_path[PATH_MAX] = { + 0, + }; + char logfile[PATH_MAX] = { + 0, + }; + char qpid[16] = { + 0, + }; + char *volfileserver = NULL; + glusterd_conf_t *priv = NULL; + struct stat buf = { + 0, + }; + FILE *file = NULL; + + GF_VALIDATE_OR_GOTO("glusterd", this, out); + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, out); + + if (type == GF_QUOTA_OPTION_TYPE_LIST || + type == GF_QUOTA_OPTION_TYPE_LIST_OBJECTS) { + GLUSTERFS_GET_QUOTA_LIST_MOUNT_PIDFILE(pidfile_path, volname); + GLUSTERD_GET_QUOTA_LIST_MOUNT_PATH(mountdir, volname, "/"); + } else { + GLUSTERFS_GET_QUOTA_LIMIT_MOUNT_PIDFILE(pidfile_path, volname); + GLUSTERD_GET_QUOTA_LIMIT_MOUNT_PATH(mountdir, volname, "/"); + } + + file = fopen(pidfile_path, "r"); + if (file) { + /* Previous command did not clean up pid file. + * remove aux mount if it exists*/ + gf_umount_lazy(this->name, mountdir, 1); + fclose(file); + } + + ret = sys_mkdir(mountdir, 0755); + if (ret && errno != EEXIST) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_MOUNT_REQ_FAIL, + "Failed to create auxiliary " + "mount directory %s", + mountdir); + goto out; + } + snprintf(logfile, PATH_MAX - 1, "%s/quota-mount-%s.log", priv->logdir, + volname); + snprintf(qpid, 15, "%d", GF_CLIENT_PID_QUOTA_MOUNT); + + if (dict_get_strn(this->options, "transport.socket.bind-address", + SLEN("transport.socket.bind-address"), + &volfileserver) != 0) + volfileserver = "localhost"; + + synclock_unlock(&priv->big_lock); + ret = runcmd(SBIN_DIR "/glusterfs", "--volfile-server", volfileserver, + "--volfile-id", volname, "-l", logfile, "-p", pidfile_path, + "--client-pid", qpid, mountdir, NULL); + if (ret == 0) { + /* Block here till mount process is ready to accept FOPs. + * Else, if glusterd acquires biglock below before + * mount process is ready, then glusterd and mount process + * can get into a deadlock situation. + */ + ret = sys_stat(mountdir, &buf); + if (ret < 0) + ret = -errno; + } else { + ret = -errno; + } + + synclock_lock(&priv->big_lock); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, GD_MSG_MOUNT_REQ_FAIL, + "Failed to mount glusterfs " + "client. Please check the log file %s for more details", + logfile); + ret = -1; + goto out; + } + + ret = 0; + +out: + return ret; +} + +int +glusterd_op_stage_quota(dict_t *dict, char **op_errstr, dict_t *rsp_dict) +{ + int ret = 0; + char *volname = NULL; + int type = 0; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *volinfo = NULL; + char *hard_limit_str = NULL; + int64_t hard_limit = 0; + gf_boolean_t get_gfid = _gf_false; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + GF_ASSERT(dict); + GF_ASSERT(op_errstr); + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get volume name"); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + gf_asprintf(op_errstr, FMTSTR_CHECK_VOL_EXISTS, volname); + goto out; + } + + if (!glusterd_is_volume_started(volinfo)) { + *op_errstr = gf_strdup( + "Volume is stopped, start volume " + "before executing quota command."); + ret = -1; + goto out; + } + + ret = dict_get_int32n(dict, "type", SLEN("type"), &type); + if (ret) { + *op_errstr = gf_strdup( + "Volume quota failed, internal error, " + "unable to get type of operation"); + goto out; + } + + if ((!glusterd_is_volume_quota_enabled(volinfo)) && + (type != GF_QUOTA_OPTION_TYPE_ENABLE)) { + *op_errstr = gf_strdup( + "Quota is disabled, please enable " + "quota"); + ret = -1; + goto out; + } + + if (type > GF_QUOTA_OPTION_TYPE_VERSION_OBJECTS) { + if (!glusterd_is_volume_inode_quota_enabled(volinfo) && + type != GF_QUOTA_OPTION_TYPE_ENABLE_OBJECTS) { + *op_errstr = gf_strdup( + "Inode Quota is disabled, " + "please enable inode quota"); + ret = -1; + goto out; + } + } + + if (!glusterd_is_quota_supported(type, op_errstr)) { + ret = -1; + goto out; + } + + if ((GF_QUOTA_OPTION_TYPE_ENABLE != type) && + (glusterd_check_if_quota_trans_enabled(volinfo) != 0)) { + ret = -1; + gf_asprintf(op_errstr, "Quota is not enabled on volume %s", volname); + goto out; + } + + switch (type) { + case GF_QUOTA_OPTION_TYPE_LIST: + case GF_QUOTA_OPTION_TYPE_LIST_OBJECTS: + case GF_QUOTA_OPTION_TYPE_LIMIT_USAGE: + case GF_QUOTA_OPTION_TYPE_LIMIT_OBJECTS: + case GF_QUOTA_OPTION_TYPE_REMOVE: + case GF_QUOTA_OPTION_TYPE_REMOVE_OBJECTS: + /* Quota auxiliary mount is needed by CLI + * for list command and need by glusterd for + * setting/removing limit + */ + if (is_origin_glusterd(dict)) { + ret = glusterd_create_quota_auxiliary_mount(this, volname, + type); + if (ret) { + *op_errstr = gf_strdup( + "Failed to start aux " + "mount"); + goto out; + } + } + break; + } + + switch (type) { + case GF_QUOTA_OPTION_TYPE_LIMIT_USAGE: + ret = dict_get_strn(dict, "hard-limit", SLEN("hard-limit"), + &hard_limit_str); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get hard-limit from dict"); + goto out; + } + ret = gf_string2bytesize_int64(hard_limit_str, &hard_limit); + if (ret) { + if (errno == ERANGE || hard_limit < 0) + gf_asprintf(op_errstr, + "Hard-limit " + "value out of range (0 - %" PRId64 "): %s", + hard_limit, hard_limit_str); + else + gf_msg(this->name, GF_LOG_ERROR, errno, + GD_MSG_CONVERSION_FAILED, + "Failed to convert hard-limit " + "string to value"); + goto out; + } + get_gfid = _gf_true; + break; + case GF_QUOTA_OPTION_TYPE_LIMIT_OBJECTS: + get_gfid = _gf_true; + break; + + case GF_QUOTA_OPTION_TYPE_REMOVE: + case GF_QUOTA_OPTION_TYPE_REMOVE_OBJECTS: + get_gfid = _gf_true; + break; + + case GF_QUOTA_OPTION_TYPE_SOFT_TIMEOUT: + case GF_QUOTA_OPTION_TYPE_HARD_TIMEOUT: + case GF_QUOTA_OPTION_TYPE_ALERT_TIME: + case GF_QUOTA_OPTION_TYPE_DEFAULT_SOFT_LIMIT: + ret = _glusterd_validate_quota_opts(dict, type, op_errstr); + if (ret) + goto out; + break; + + default: + break; + } + + if (get_gfid == _gf_true) { + ret = glusterd_get_gfid_from_brick(dict, volinfo, rsp_dict, op_errstr); + if (ret) + goto out; + } + + ret = 0; + +out: + if (ret && op_errstr && *op_errstr) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OP_STAGE_QUOTA_FAIL, "%s", + *op_errstr); + gf_msg_debug(this->name, 0, "Returning %d", ret); + + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-quota.h b/xlators/mgmt/glusterd/src/glusterd-quota.h new file mode 100644 index 00000000000..ab2092a9c6a --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-quota.h @@ -0,0 +1,17 @@ +/* + Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _GLUSTERD_QUOTA_ +#define _GLUSTERD_QUOTA_ + +int +glusterd_store_quota_config(glusterd_volinfo_t *volinfo, char *path, + char *gfid_str, int opcode, char **op_errstr); + +#endif diff --git a/xlators/mgmt/glusterd/src/glusterd-quotad-svc.c b/xlators/mgmt/glusterd/src/glusterd-quotad-svc.c new file mode 100644 index 00000000000..f26d832a06d --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-quotad-svc.c @@ -0,0 +1,217 @@ +/* + Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include <glusterfs/globals.h> +#include <glusterfs/run.h> +#include "glusterd.h" +#include "glusterd-utils.h" +#include "glusterd-volgen.h" +#include "glusterd-quotad-svc.h" +#include "glusterd-messages.h" +#include "glusterd-svc-helper.h" + +char *quotad_svc_name = "quotad"; + +void +glusterd_quotadsvc_build(glusterd_svc_t *svc) +{ + svc->manager = glusterd_quotadsvc_manager; + svc->start = glusterd_quotadsvc_start; + svc->stop = glusterd_svc_stop; +} + +int +glusterd_quotadsvc_init(glusterd_svc_t *svc) +{ + int ret = -1; + + ret = glusterd_svc_init(svc, quotad_svc_name); + if (ret) + goto out; + +out: + return ret; +} + +static int +glusterd_quotadsvc_create_volfile() +{ + char filepath[PATH_MAX] = { + 0, + }; + glusterd_conf_t *conf = THIS->private; + + glusterd_svc_build_volfile_path(quotad_svc_name, conf->workdir, filepath, + sizeof(filepath)); + return glusterd_create_global_volfile(build_quotad_graph, filepath, NULL); +} + +int +glusterd_quotadsvc_manager(glusterd_svc_t *svc, void *data, int flags) +{ + int ret = 0; + glusterd_volinfo_t *volinfo = NULL; + + if (!svc->inited) { + ret = glusterd_quotadsvc_init(svc); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_FAILED_INIT_QUOTASVC, + "Failed to init " + "quotad service"); + goto out; + } else { + svc->inited = _gf_true; + gf_msg_debug(THIS->name, 0, + "quotad service " + "initialized"); + } + } + + volinfo = data; + + /* If all the volumes are stopped or all shd compatible volumes + * are stopped then stop the service if: + * - volinfo is NULL or + * - volinfo is present and volume is shd compatible + * Otherwise create volfile and restart service if: + * - volinfo is NULL or + * - volinfo is present and volume is shd compatible + */ + if (glusterd_are_all_volumes_stopped() || + glusterd_all_volumes_with_quota_stopped()) { + if (!(volinfo && !glusterd_is_volume_quota_enabled(volinfo))) { + ret = svc->stop(svc, SIGTERM); + } + } else { + if (!(volinfo && !glusterd_is_volume_quota_enabled(volinfo))) { + ret = glusterd_quotadsvc_create_volfile(); + if (ret) + goto out; + + ret = svc->stop(svc, SIGTERM); + if (ret) + goto out; + + ret = svc->start(svc, flags); + if (ret) + goto out; + + ret = glusterd_conn_connect(&(svc->conn)); + if (ret) + goto out; + } + } +out: + if (ret) + gf_event(EVENT_SVC_MANAGER_FAILED, "svc_name=%s", svc->name); + + gf_msg_debug(THIS->name, 0, "Returning %d", ret); + + return ret; +} + +int +glusterd_quotadsvc_start(glusterd_svc_t *svc, int flags) +{ + int i = 0; + int ret = -1; + dict_t *cmdline = NULL; + char key[16] = {0}; + char *options[] = {svc->name, "--process-name", NULL}; + + cmdline = dict_new(); + if (!cmdline) { + gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); + goto out; + } + + for (i = 0; options[i]; i++) { + ret = snprintf(key, sizeof(key), "arg%d", i); + ret = dict_set_strn(cmdline, key, ret, options[i]); + if (ret) + goto out; + } + + ret = glusterd_svc_start(svc, flags, cmdline); + +out: + if (cmdline) + dict_unref(cmdline); + + gf_msg_debug(THIS->name, 0, "Returning %d", ret); + + return ret; +} + +int +glusterd_quotadsvc_reconfigure() +{ + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + gf_boolean_t identical = _gf_false; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, out); + + if (glusterd_all_volumes_with_quota_stopped()) + goto manager; + + /* + * Check both OLD and NEW volfiles, if they are SAME by size + * and cksum i.e. "character-by-character". If YES, then + * NOTHING has been changed, just return. + */ + ret = glusterd_svc_check_volfile_identical(priv->quotad_svc.name, + build_quotad_graph, &identical); + if (ret) + goto out; + + if (identical) { + ret = 0; + goto out; + } + + /* + * They are not identical. Find out if the topology is changed + * OR just the volume options. If just the options which got + * changed, then inform the xlator to reconfigure the options. + */ + identical = _gf_false; /* RESET the FLAG */ + ret = glusterd_svc_check_topology_identical(priv->quotad_svc.name, + build_quotad_graph, &identical); + if (ret) + goto out; + + /* Topology is not changed, but just the options. But write the + * options to quotad volfile, so that quotad will be reconfigured. + */ + if (identical) { + ret = glusterd_quotadsvc_create_volfile(); + if (ret == 0) { /* Only if above PASSES */ + ret = glusterd_fetchspec_notify(THIS); + } + goto out; + } +manager: + /* + * quotad volfile's topology has been changed. quotad server needs + * to be RESTARTED to ACT on the changed volfile. + */ + ret = priv->quotad_svc.manager(&(priv->quotad_svc), NULL, + PROC_START_NO_WAIT); + +out: + gf_msg_debug(this ? this->name : "Quotad", 0, "Returning %d", ret); + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-quotad-svc.h b/xlators/mgmt/glusterd/src/glusterd-quotad-svc.h new file mode 100644 index 00000000000..e8d9bbee964 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-quotad-svc.h @@ -0,0 +1,31 @@ +/* + Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _GLUSTERD_QUOTAD_SVC_H_ +#define _GLUSTERD_QUOTAD_SVC_H_ + +#include "glusterd-svc-mgmt.h" + +void +glusterd_quotadsvc_build(glusterd_svc_t *svc); + +int +glusterd_quotadsvc_init(glusterd_svc_t *svc); + +int +glusterd_quotadsvc_start(glusterd_svc_t *svc, int flags); + +int +glusterd_quotadsvc_manager(glusterd_svc_t *svc, void *data, int flags); + +int +glusterd_quotadsvc_reconfigure(); + +#endif diff --git a/xlators/mgmt/glusterd/src/glusterd-rcu.h b/xlators/mgmt/glusterd/src/glusterd-rcu.h new file mode 100644 index 00000000000..c85f9bea8f8 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-rcu.h @@ -0,0 +1,36 @@ +/* + Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _GLUSTERD_RCU_H +#define _GLUSTERD_RCU_H + +#include <urcu-bp.h> +#include <urcu/rculist.h> +#include <urcu/compiler.h> +#include <urcu/uatomic.h> +#include <urcu-call-rcu.h> + +#ifdef URCU_OLD +#include "rculist-extra.h" +#endif + +#include <glusterfs/xlator.h> + +/* gd_rcu_head is a composite struct, composed of struct rcu_head and a this + * pointer, which is used to pass the THIS pointer to call_rcu callbacks. + * + * Use this in place of struct rcu_head when embedding into another struct + */ +typedef struct glusterd_rcu_head_ { + struct rcu_head head; + xlator_t *this; +} gd_rcu_head; + +#endif /* _GLUSTERD_RCU_H */ diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c index fec0a1b2e19..458bf168ede 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c +++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c @@ -1,745 +1,1422 @@ /* - Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ + Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ #include <inttypes.h> +#include <sys/types.h> +#include <unistd.h> #include <sys/resource.h> +#include <sys/statvfs.h> -#include "globals.h" -#include "compat.h" +#include <glusterfs/compat.h> #include "protocol-common.h" -#include "xlator.h" -#include "logging.h" -#include "timer.h" +#include <glusterfs/xlator.h> +#include <glusterfs/logging.h> +#include <glusterfs/timer.h> #include "glusterd-mem-types.h" #include "glusterd.h" #include "glusterd-sm.h" #include "glusterd-op-sm.h" #include "glusterd-utils.h" +#include "glusterd-mgmt.h" +#include "glusterd-messages.h" #include "glusterd-store.h" - -#include "syscall.h" -#include "cli1.h" - +#include <glusterfs/run.h> +#include "glusterd-volgen.h" +#include "glusterd-messages.h" + +#include <glusterfs/syscall.h> +#include "cli1-xdr.h" +#include "xdr-generic.h" + +#define GLUSTERD_GET_DEFRAG_SOCK_FILE(path, volinfo) \ + do { \ + int32_t _defrag_sockfile_len; \ + char tmppath[PATH_MAX] = { \ + 0, \ + }; \ + _defrag_sockfile_len = snprintf( \ + tmppath, PATH_MAX, \ + DEFAULT_VAR_RUN_DIRECTORY "/gluster-%s-%s-%s.sock", "rebalance", \ + volinfo->volname, uuid_utoa(MY_UUID)); \ + if ((_defrag_sockfile_len < 0) || \ + (_defrag_sockfile_len >= PATH_MAX)) { \ + path[0] = 0; \ + } else { \ + glusterd_set_socket_filepath(tmppath, path, sizeof(path)); \ + } \ + } while (0) + +int32_t +glusterd_brick_op_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe); int -gf_glusterd_rebalance_move_data (glusterd_volinfo_t *volinfo, const char *dir) +glusterd_defrag_start_validate(glusterd_volinfo_t *volinfo, char *op_errstr, + size_t len, glusterd_op_t op) { - int ret = -1; - int dst_fd = -1; - int src_fd = -1; - DIR *fd = NULL; - glusterd_defrag_info_t *defrag = NULL; - struct dirent *entry = NULL; - struct stat stbuf = {0,}; - struct stat new_stbuf = {0,}; - char full_path[PATH_MAX] = {0,}; - char tmp_filename[PATH_MAX] = {0,}; - char value[16] = {0,}; - char linkinfo[PATH_MAX] = {0,}; - - if (!volinfo->defrag) - goto out; - - defrag = volinfo->defrag; - - fd = opendir (dir); - if (!fd) - goto out; - while ((entry = readdir (fd))) { - if (!entry) - break; - - if (!strcmp (entry->d_name, ".") || !strcmp (entry->d_name, "..")) - continue; - - snprintf (full_path, PATH_MAX, "%s/%s", dir, entry->d_name); - - ret = stat (full_path, &stbuf); - if (ret == -1) - continue; - - if (!S_ISREG (stbuf.st_mode)) - continue; - - defrag->num_files_lookedup += 1; - - if (stbuf.st_nlink > 1) - continue; - - /* if distribute is present, it will honor this key. - -1 is returned if distribute is not present or file doesn't - have a link-file. If file has link-file, the path of - link-file will be the value */ - ret = sys_lgetxattr (full_path, GF_XATTR_LINKINFO_KEY, - &linkinfo, PATH_MAX); - if (ret <= 0) - continue; - - /* If the file is open, don't run rebalance on it */ - ret = sys_lgetxattr (full_path, GLUSTERFS_OPEN_FD_COUNT, - &value, 16); - if ((ret < 0) || !strncmp (value, "1", 1)) - continue; - - /* If its a regular file, and sticky bit is set, we need to - rebalance that */ - snprintf (tmp_filename, PATH_MAX, "%s/.%s.gfs%llu", dir, - entry->d_name, - (unsigned long long)stbuf.st_size); - - dst_fd = creat (tmp_filename, stbuf.st_mode); - if (dst_fd == -1) - continue; - - src_fd = open (full_path, O_RDONLY); - if (src_fd == -1) { - close (dst_fd); - continue; - } - - while (1) { - ret = read (src_fd, defrag->databuf, 131072); - if (!ret || (ret < 0)) { - break; - } - ret = write (dst_fd, defrag->databuf, ret); - if (ret < 0) { - break; - } - } - - ret = stat (full_path, &new_stbuf); - if (ret < 0) { - close (dst_fd); - close (src_fd); - continue; - } - /* No need to rebalance, if there is some - activity on source file */ - if (new_stbuf.st_mtime != stbuf.st_mtime) { - close (dst_fd); - close (src_fd); - continue; - } - - ret = fchown (dst_fd, stbuf.st_uid, stbuf.st_gid); - if (ret) { - gf_log ("", GF_LOG_WARNING, - "failed to set the uid/gid of file %s: %s", - tmp_filename, strerror (errno)); - } - - ret = rename (tmp_filename, full_path); - if (ret != -1) { - LOCK (&defrag->lock); - { - defrag->total_files += 1; - defrag->total_data += stbuf.st_size; - } - UNLOCK (&defrag->lock); - } - - close (dst_fd); - close (src_fd); - - if (volinfo->defrag_status == GF_DEFRAG_STATUS_STOPED) { - closedir (fd); - ret = -1; - goto out; - } - } - closedir (fd); - - fd = opendir (dir); - if (!fd) - goto out; - while ((entry = readdir (fd))) { - if (!entry) - break; - - if (!strcmp (entry->d_name, ".") || !strcmp (entry->d_name, "..")) - continue; - - snprintf (full_path, 1024, "%s/%s", dir, entry->d_name); - - ret = stat (full_path, &stbuf); - if (ret == -1) - continue; - - if (!S_ISDIR (stbuf.st_mode)) - continue; - - ret = gf_glusterd_rebalance_move_data (volinfo, full_path); - if (ret) - break; - } - closedir (fd); - - if (!entry) - ret = 0; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + /* Check only if operation is not remove-brick */ + if ((GD_OP_REMOVE_BRICK != op) && !gd_is_remove_brick_committed(volinfo)) { + gf_msg_debug(this->name, 0, + "A remove-brick task on " + "volume %s is not yet committed", + volinfo->volname); + snprintf(op_errstr, len, + "A remove-brick task on volume %s is" + " not yet committed. Either commit or stop the " + "remove-brick task.", + volinfo->volname); + goto out; + } + + if (glusterd_is_defrag_on(volinfo)) { + gf_msg_debug(this->name, 0, "rebalance on volume %s already started", + volinfo->volname); + snprintf(op_errstr, len, "Rebalance on %s is already started", + volinfo->volname); + goto out; + } + + ret = 0; out: - return ret; + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; } -int -gf_glusterd_rebalance_fix_layout (glusterd_volinfo_t *volinfo, const char *dir) +int32_t +__glusterd_defrag_notify(struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data) { - int ret = -1; - char value[128] = {0,}; - char full_path[1024] = {0,}; - struct stat stbuf = {0,}; - DIR *fd = NULL; - struct dirent *entry = NULL; - - if (!volinfo->defrag) - goto out; - - fd = opendir (dir); - if (!fd) - goto out; + glusterd_volinfo_t *volinfo = NULL; + glusterd_defrag_info_t *defrag = NULL; + int ret = 0; + char pidfile[PATH_MAX]; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + int pid = -1; + + this = THIS; + if (!this) + return 0; - while ((entry = readdir (fd))) { - if (!entry) - break; + priv = this->private; + if (!priv) + return 0; - if (!strcmp (entry->d_name, ".") || !strcmp (entry->d_name, "..")) - continue; + volinfo = mydata; + if (!volinfo) + return 0; - snprintf (full_path, 1024, "%s/%s", dir, entry->d_name); + defrag = volinfo->rebal.defrag; + if (!defrag) + return 0; - ret = stat (full_path, &stbuf); - if (ret == -1) - continue; + if ((event == RPC_CLNT_DISCONNECT) && defrag->connected) + volinfo->rebal.defrag = NULL; - if (S_ISDIR (stbuf.st_mode)) { - /* Fix the layout of the directory */ - sys_lgetxattr (full_path, "trusted.distribute.fix.layout", - &value, 128); + GLUSTERD_GET_DEFRAG_PID_FILE(pidfile, volinfo, priv); - volinfo->defrag->total_files += 1; + switch (event) { + case RPC_CLNT_CONNECT: { + if (defrag->connected) + return 0; - /* Traverse into subdirectory */ - ret = gf_glusterd_rebalance_fix_layout (volinfo, - full_path); - if (ret) - break; - } + LOCK(&defrag->lock); + { + defrag->connected = 1; + } + UNLOCK(&defrag->lock); - if (volinfo->defrag_status == GF_DEFRAG_STATUS_STOPED) { - closedir (fd); - ret = -1; - goto out; - } + gf_msg_debug(this->name, 0, "%s got RPC_CLNT_CONNECT", + rpc->conn.name); + break; } - closedir (fd); - if (!entry) - ret = 0; + case RPC_CLNT_DISCONNECT: { + if (!defrag->connected) + return 0; -out: - return ret; -} - -void * -glusterd_defrag_start (void *data) -{ - glusterd_volinfo_t *volinfo = data; - glusterd_defrag_info_t *defrag = NULL; - char cmd_str[1024] = {0,}; - int ret = -1; - struct stat stbuf = {0,}; - char value[128] = {0,}; - - defrag = volinfo->defrag; - if (!defrag) - goto out; + LOCK(&defrag->lock); + { + defrag->connected = 0; + } + UNLOCK(&defrag->lock); - sleep (1); - ret = stat (defrag->mount, &stbuf); - if ((ret == -1) && (errno == ENOTCONN)) { - /* Wait for some more time before starting rebalance */ - sleep (2); - ret = stat (defrag->mount, &stbuf); - if (ret == -1) { - volinfo->defrag_status = GF_DEFRAG_STATUS_FAILED; - volinfo->rebalance_files = 0; - volinfo->rebalance_data = 0; - volinfo->lookedup_files = 0; - goto out; + if (!gf_is_service_running(pidfile, &pid)) { + if (volinfo->rebal.defrag_status == GF_DEFRAG_STATUS_STARTED) { + volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_FAILED; } - } + } - /* Fix the root ('/') first */ - sys_lgetxattr (defrag->mount, "trusted.distribute.fix.layout", - &value, 128); + glusterd_store_perform_node_state_store(volinfo); - if ((defrag->cmd == GF_DEFRAG_CMD_START) || - (defrag->cmd == GF_DEFRAG_CMD_START_LAYOUT_FIX)) { - /* root's layout got fixed */ - defrag->total_files = 1; + rpc_clnt_disable(defrag->rpc); + glusterd_defrag_rpc_put(defrag); + if (defrag->cbk_fn) + defrag->cbk_fn(volinfo, volinfo->rebal.defrag_status); - /* Step 1: Fix layout of all the directories */ - ret = gf_glusterd_rebalance_fix_layout (volinfo, defrag->mount); - if (ret) { - volinfo->defrag_status = GF_DEFRAG_STATUS_FAILED; - goto out; - } - - /* Completed first step */ - volinfo->defrag_status = GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE; + GF_FREE(defrag); + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_REBALANCE_DISCONNECTED, + "Rebalance process for volume %s has disconnected.", + volinfo->volname); + break; } + case RPC_CLNT_DESTROY: + glusterd_volinfo_unref(volinfo); + break; + default: + gf_msg_trace(this->name, 0, "got some other RPC event %d", event); + ret = 0; + break; + } - if ((defrag->cmd == GF_DEFRAG_CMD_START) || - (defrag->cmd == GF_DEFRAG_CMD_START_MIGRATE_DATA)) { - /* It was used by number of layout fixes on directories */ - defrag->total_files = 0; - - volinfo->defrag_status = GF_DEFRAG_STATUS_MIGRATE_DATA_STARTED; - - /* Step 2: Iterate over directories to move data */ - ret = gf_glusterd_rebalance_move_data (volinfo, defrag->mount); - if (ret) { - volinfo->defrag_status = GF_DEFRAG_STATUS_FAILED; - goto out; - } + return ret; +} - /* Completed second step */ - volinfo->defrag_status = GF_DEFRAG_STATUS_MIGRATE_DATA_COMPLETE; - } +int32_t +glusterd_defrag_notify(struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data) +{ + return glusterd_big_locked_notify(rpc, mydata, event, data, + __glusterd_defrag_notify); +} - /* Completed whole process */ - if (defrag->cmd == GF_DEFRAG_CMD_START) - volinfo->defrag_status = GF_DEFRAG_STATUS_COMPLETE; +int +glusterd_handle_defrag_start(glusterd_volinfo_t *volinfo, char *op_errstr, + size_t len, int cmd, defrag_cbk_fn_t cbk, + glusterd_op_t op) +{ + xlator_t *this = NULL; + int ret = -1; + glusterd_defrag_info_t *defrag = NULL; + runner_t runner = { + 0, + }; + glusterd_conf_t *priv = NULL; + char defrag_path[PATH_MAX]; + char sockfile[PATH_MAX] = { + 0, + }; + char pidfile[PATH_MAX] = { + 0, + }; + char logfile[PATH_MAX] = { + 0, + }; + char volname[PATH_MAX] = { + 0, + }; + char valgrind_logfile[PATH_MAX] = { + 0, + }; + char msg[1024] = { + 0, + }; + char *volfileserver = NULL; + char *localtime_logging = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + + priv = this->private; + GF_VALIDATE_OR_GOTO("glusterd", priv, out); + + GF_ASSERT(volinfo); + GF_ASSERT(op_errstr); + + ret = glusterd_defrag_start_validate(volinfo, op_errstr, len, op); + if (ret) + goto out; + if (!volinfo->rebal.defrag) + volinfo->rebal.defrag = GF_CALLOC(1, sizeof(*volinfo->rebal.defrag), + gf_gld_mt_defrag_info); + if (!volinfo->rebal.defrag) + goto out; + + defrag = volinfo->rebal.defrag; + + defrag->cmd = cmd; + + volinfo->rebal.defrag_cmd = cmd; + volinfo->rebal.op = op; + + LOCK_INIT(&defrag->lock); + + volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_STARTED; + + glusterd_volinfo_reset_defrag_stats(volinfo); + glusterd_store_perform_node_state_store(volinfo); + + GLUSTERD_GET_DEFRAG_DIR(defrag_path, volinfo, priv); + ret = mkdir_p(defrag_path, 0755, _gf_true); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_CREATE_DIR_FAILED, + "Failed to create " + "directory %s", + defrag_path); + goto out; + } + + GLUSTERD_GET_DEFRAG_SOCK_FILE(sockfile, volinfo); + GLUSTERD_GET_DEFRAG_PID_FILE(pidfile, volinfo, priv); + snprintf(logfile, PATH_MAX, "%s/%s-%s.log", priv->logdir, volinfo->volname, + "rebalance"); + runinit(&runner); + + if (this->ctx->cmd_args.vgtool != _gf_none) { + snprintf(valgrind_logfile, PATH_MAX, "%s/valgrind-%s-rebalance.log", + priv->logdir, volinfo->volname); + + if (this->ctx->cmd_args.vgtool == _gf_memcheck) + runner_add_args(&runner, "valgrind", "--leak-check=full", + "--trace-children=yes", "--track-origins=yes", + NULL); + else + runner_add_args(&runner, "valgrind", "--tool=drd", NULL); + + runner_argprintf(&runner, "--log-file=%s", valgrind_logfile); + } + + snprintf(volname, sizeof(volname), "rebalance/%s", volinfo->volname); + + if (dict_get_strn(this->options, "transport.socket.bind-address", + SLEN("transport.socket.bind-address"), + &volfileserver) != 0) { + volfileserver = "localhost"; + } + + runner_add_args( + &runner, SBIN_DIR "/glusterfs", "-s", volfileserver, "--volfile-id", + volname, "--xlator-option", "*dht.use-readdirp=yes", "--xlator-option", + "*dht.lookup-unhashed=yes", "--xlator-option", + "*dht.assert-no-child-down=yes", "--xlator-option", + "*dht.readdir-optimize=on", "--process-name", "rebalance", NULL); + + runner_add_arg(&runner, "--xlator-option"); + runner_argprintf(&runner, "*dht.rebalance-cmd=%d", cmd); + runner_add_arg(&runner, "--xlator-option"); + runner_argprintf(&runner, "*dht.node-uuid=%s", uuid_utoa(MY_UUID)); + runner_add_arg(&runner, "--xlator-option"); + runner_argprintf(&runner, "*dht.commit-hash=%u", + volinfo->rebal.commit_hash); + runner_add_arg(&runner, "--socket-file"); + runner_argprintf(&runner, "%s", sockfile); + runner_add_arg(&runner, "--pid-file"); + runner_argprintf(&runner, "%s", pidfile); + runner_add_arg(&runner, "-l"); + runner_argprintf(&runner, "%s", logfile); + if (volinfo->memory_accounting) + runner_add_arg(&runner, "--mem-accounting"); + if (dict_get_strn(priv->opts, GLUSTERD_LOCALTIME_LOGGING_KEY, + SLEN(GLUSTERD_LOCALTIME_LOGGING_KEY), + &localtime_logging) == 0) { + if (strcmp(localtime_logging, "enable") == 0) + runner_add_arg(&runner, "--localtime-logging"); + } + + snprintf(msg, sizeof(msg), "Starting the rebalance service for volume %s", + volinfo->volname); + runner_log(&runner, this->name, GF_LOG_DEBUG, msg); + + ret = runner_run_nowait(&runner); + if (ret) { + gf_msg_debug("glusterd", 0, "rebalance command failed"); + goto out; + } + + sleep(5); + + ret = glusterd_rebalance_rpc_create(volinfo); + + // FIXME: this cbk is passed as NULL in all occurrences. May be + // we never needed it. + if (cbk) + defrag->cbk_fn = cbk; - volinfo->rebalance_files = defrag->total_files; - volinfo->rebalance_data = defrag->total_data; - volinfo->lookedup_files = defrag->num_files_lookedup; out: - volinfo->defrag = NULL; - if (defrag) { - gf_log ("rebalance", GF_LOG_INFO, "rebalance on %s complete", - defrag->mount); - - snprintf (cmd_str, 1024, "umount -l %s", defrag->mount); - ret = system (cmd_str); - LOCK_DESTROY (&defrag->lock); - GF_FREE (defrag); - } - - return NULL; + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; } int -glusterd_defrag_stop_validate (glusterd_volinfo_t *volinfo, - char *op_errstr, size_t len) +glusterd_rebalance_defrag_init(glusterd_volinfo_t *volinfo, defrag_cbk_fn_t cbk) + { - int ret = -1; - if (glusterd_is_defrag_on (volinfo) == 0) { - snprintf (op_errstr, len, "Rebalance on %s is either Completed " - "or not yet started", volinfo->volname); - goto out; - } + glusterd_defrag_info_t *defrag = NULL; + int ret = -1; + + if (!volinfo->rebal.defrag) { + volinfo->rebal.defrag = GF_CALLOC(1, sizeof(*volinfo->rebal.defrag), + gf_gld_mt_defrag_info); + } else { + /* + * if defrag variable is already initialized, + * we skip the initialization. + */ ret = 0; + goto out; + } + + if (!volinfo->rebal.defrag) + goto out; + defrag = volinfo->rebal.defrag; + + defrag->cmd = volinfo->rebal.defrag_cmd; + LOCK_INIT(&defrag->lock); + if (cbk) + defrag->cbk_fn = cbk; + ret = 0; out: - gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + return ret; } int -glusterd_defrag_stop (glusterd_volinfo_t *volinfo, u_quad_t *files, - u_quad_t *size, char *op_errstr, size_t len) +glusterd_rebalance_rpc_create(glusterd_volinfo_t *volinfo) { - /* TODO: set a variaeble 'stop_defrag' here, it should be checked - in defrag loop */ - int ret = -1; - GF_ASSERT (volinfo); - GF_ASSERT (files); - GF_ASSERT (size); - GF_ASSERT (op_errstr); - - ret = glusterd_defrag_stop_validate (volinfo, op_errstr, len); - if (ret) - goto out; - if (!volinfo || !volinfo->defrag) { - ret = -1; - goto out; - } - - LOCK (&volinfo->defrag->lock); - { - volinfo->defrag_status = GF_DEFRAG_STATUS_STOPED; - *files = volinfo->defrag->total_files; - *size = volinfo->defrag->total_data; - } - UNLOCK (&volinfo->defrag->lock); - - ret = 0; + dict_t *options = NULL; + char sockfile[PATH_MAX] = { + 0, + }; + int ret = -1; + glusterd_defrag_info_t *defrag = volinfo->rebal.defrag; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + // rebalance process is not started + if (!defrag) + goto out; + + options = dict_new(); + if (!options) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); + goto out; + } + + GLUSTERD_GET_DEFRAG_SOCK_FILE(sockfile, volinfo); + + /* Setting frame-timeout to 10mins (600seconds). + * Unix domain sockets ensures that the connection is reliable. The + * default timeout of 30mins used for unreliable network connections is + * too long for unix domain socket connections. + */ + ret = rpc_transport_unix_options_build(options, sockfile, 600); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_UNIX_OP_BUILD_FAIL, + "Unix options build failed"); + goto out; + } + + glusterd_volinfo_ref(volinfo); + ret = glusterd_rpc_create(&defrag->rpc, options, glusterd_defrag_notify, + volinfo, _gf_true); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_RPC_CREATE_FAIL, + "Glusterd RPC creation failed"); + goto out; + } + ret = 0; out: - gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + if (options) + dict_unref(options); + return ret; } int -glusterd_defrag_status_get_v2 (glusterd_volinfo_t *volinfo, - gf2_cli_defrag_vol_rsp *rsp) +glusterd_rebalance_cmd_validate(int cmd, char *volname, + glusterd_volinfo_t **volinfo, char *op_errstr, + size_t len) { - if (!volinfo) - goto out; - - if (volinfo->defrag) { - LOCK (&volinfo->defrag->lock); - { - rsp->files = volinfo->defrag->total_files; - rsp->size = volinfo->defrag->total_data; - rsp->lookedup_files = volinfo->defrag->num_files_lookedup; - } - UNLOCK (&volinfo->defrag->lock); - } else { - rsp->files = volinfo->rebalance_files; - rsp->size = volinfo->rebalance_data; - rsp->lookedup_files = volinfo->lookedup_files; - } + int ret = -1; + + if (glusterd_volinfo_find(volname, volinfo)) { + gf_msg("glusterd", GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND, + "Received rebalance on invalid" + " volname %s", + volname); + snprintf(op_errstr, len, "Volume %s does not exist", volname); + goto out; + } + if ((*volinfo)->brick_count <= (*volinfo)->dist_leaf_count) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_DISTRIBUTE, + "Volume %s is not a " + "distribute type or contains only 1 brick", + volname); + snprintf(op_errstr, len, + "Volume %s is not a distribute " + "volume or contains only 1 brick.\n" + "Not performing rebalance", + volname); + goto out; + } + + if ((*volinfo)->status != GLUSTERD_STATUS_STARTED) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOL_STOPPED, + "Received rebalance on stopped" + " volname %s", + volname); + snprintf(op_errstr, len, + "Volume %s needs to " + "be started to perform rebalance", + volname); + goto out; + } + + ret = 0; - rsp->op_errno = volinfo->defrag_status; - rsp->op_ret = 0; out: - return 0; + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; } int -glusterd_defrag_status_get (glusterd_volinfo_t *volinfo, - gf1_cli_defrag_vol_rsp *rsp) +__glusterd_handle_defrag_volume(rpcsvc_request_t *req) { - if (!volinfo) - goto out; - - if (volinfo->defrag) { - LOCK (&volinfo->defrag->lock); - { - rsp->files = volinfo->defrag->total_files; - rsp->size = volinfo->defrag->total_data; - rsp->lookedup_files = volinfo->defrag->num_files_lookedup; - } - UNLOCK (&volinfo->defrag->lock); - } else { - rsp->files = volinfo->rebalance_files; - rsp->size = volinfo->rebalance_data; - rsp->lookedup_files = volinfo->lookedup_files; + int32_t ret = -1; + gf_cli_req cli_req = {{ + 0, + }}; + glusterd_conf_t *priv = NULL; + int32_t op = GD_OP_NONE; + dict_t *dict = NULL; + char *volname = NULL; + gf_cli_defrag_type cmd = 0; + char msg[2048] = { + 0, + }; + xlator_t *this = NULL; + + GF_ASSERT(req); + this = THIS; + GF_ASSERT(this); + + priv = this->private; + GF_ASSERT(priv); + + ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + // failed to decode msg; + req->rpc_err = GARBAGE_ARGS; + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL); + goto out; + } + + if (cli_req.dict.dict_len) { + /* Unserialize the dictionary */ + dict = dict_new(); + + ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + "failed to " + "unserialize req-buffer to dictionary"); + snprintf(msg, sizeof(msg), + "Unable to decode the " + "command"); + goto out; } - - rsp->op_errno = volinfo->defrag_status; - rsp->op_ret = 0; + } + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + snprintf(msg, sizeof(msg), "Failed to get volume name"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", msg); + goto out; + } + + ret = dict_get_int32n(dict, "rebalance-command", SLEN("rebalance-command"), + (int32_t *)&cmd); + if (ret) { + snprintf(msg, sizeof(msg), "Failed to get command"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", msg); + goto out; + } + + ret = dict_set_static_bin(dict, "node-uuid", MY_UUID, 16); + if (ret) + goto out; + + if ((cmd == GF_DEFRAG_CMD_STATUS) || (cmd == GF_DEFRAG_CMD_STOP)) { + op = GD_OP_DEFRAG_BRICK_VOLUME; + } else + op = GD_OP_REBALANCE; + + if (priv->op_version < GD_OP_VERSION_6_0) { + gf_msg_debug(this->name, 0, + "The cluster is operating at " + "version less than %d. Falling back " + "to op-sm framework.", + GD_OP_VERSION_6_0); + ret = glusterd_op_begin(req, op, dict, msg, sizeof(msg)); + glusterd_friend_sm(); + glusterd_op_sm(); + } else { + ret = glusterd_mgmt_v3_initiate_all_phases_with_brickop_phase(req, op, + dict); + } out: - return 0; + if (ret) { + if (msg[0] == '\0') + snprintf(msg, sizeof(msg), "Operation failed"); + ret = glusterd_op_send_cli_response(GD_OP_REBALANCE, ret, 0, req, dict, + msg); + } + + free(cli_req.dict.dict_val); // malloced by xdr + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; } -void -glusterd_rebalance_cmd_attempted_log (int cmd, char *volname) +int +glusterd_handle_defrag_volume(rpcsvc_request_t *req) { - switch (cmd) { - case GF_DEFRAG_CMD_START_LAYOUT_FIX: - gf_cmd_log ("Volume rebalance"," on volname: %s " - "cmd: start fix layout , attempted", - volname); - break; - case GF_DEFRAG_CMD_START_MIGRATE_DATA: - gf_cmd_log ("Volume rebalance"," on volname: %s " - "cmd: start data migrate attempted", - volname); - break; - case GF_DEFRAG_CMD_START: - gf_cmd_log ("Volume rebalance"," on volname: %s " - "cmd: start, attempted", volname); - break; - case GF_DEFRAG_CMD_STOP: - gf_cmd_log ("Volume rebalance"," on volname: %s " - "cmd: stop, attempted", volname); - break; - default: - break; - } - - gf_log ("glusterd", GF_LOG_INFO, "Received rebalance volume %d on %s", - cmd, volname); + return glusterd_big_locked_handler(req, __glusterd_handle_defrag_volume); } -void -glusterd_rebalance_cmd_log (int cmd, char *volname, int status) +static int +glusterd_brick_validation(dict_t *dict, char *key, data_t *value, void *data) { - if (cmd != GF_DEFRAG_CMD_STATUS) { - gf_cmd_log ("volume rebalance"," on volname: %s %d %s", - volname, cmd, ((status)?"FAILED":"SUCCESS")); - } + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_volinfo_t *volinfo = data; + glusterd_brickinfo_t *brickinfo = NULL; + + this = THIS; + GF_ASSERT(this); + + ret = glusterd_volume_brickinfo_get_by_brick(value->data, volinfo, + &brickinfo, _gf_false); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_BRICK_NOT_FOUND, + "Incorrect brick %s for " + "volume %s", + value->data, volinfo->volname); + return ret; + } + + if (!brickinfo->decommissioned) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_BRICK_NOT_FOUND, + "Incorrect brick %s for " + "volume %s", + value->data, volinfo->volname); + ret = -1; + return ret; + } + + return ret; } int -glusterd_defrag_start_validate (glusterd_volinfo_t *volinfo, char *op_errstr, - size_t len) +glusterd_set_rebalance_id_in_rsp_dict(dict_t *req_dict, dict_t *rsp_dict) { - int ret = -1; - - if (glusterd_is_defrag_on (volinfo)) { - gf_log ("glusterd", GF_LOG_DEBUG, - "rebalance on volume %s already started", - volinfo->volname); - snprintf (op_errstr, len, "Rebalance on %s is already started", - volinfo->volname); - goto out; + int ret = -1; + int32_t cmd = 0; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + char msg[2048] = {0}; + char *task_id_str = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + GF_ASSERT(rsp_dict); + GF_ASSERT(req_dict); + + ret = dict_get_strn(rsp_dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg_debug(this->name, 0, "volname not found"); + goto out; + } + + ret = dict_get_int32n(rsp_dict, "rebalance-command", + SLEN("rebalance-command"), &cmd); + if (ret) { + gf_msg_debug(this->name, 0, "cmd not found"); + goto out; + } + + ret = glusterd_rebalance_cmd_validate(cmd, volname, &volinfo, msg, + sizeof(msg)); + if (ret) { + gf_msg_debug(this->name, 0, "failed to validate"); + goto out; + } + + /* reblance id is generted in glusterd_mgmt_v3_op_stage_rebalance(), but + * rsp_dict is unavailable there. So copying it to rsp_dict from req_dict + * here. So that cli can display the rebalance id.*/ + if ((cmd == GF_DEFRAG_CMD_START) || + (cmd == GF_DEFRAG_CMD_START_LAYOUT_FIX) || + (cmd == GF_DEFRAG_CMD_START_FORCE)) { + if (is_origin_glusterd(rsp_dict)) { + ret = dict_get_strn(req_dict, GF_REBALANCE_TID_KEY, + SLEN(GF_REBALANCE_TID_KEY), &task_id_str); + if (ret) { + snprintf(msg, sizeof(msg), "Missing rebalance-id"); + gf_msg(this->name, GF_LOG_WARNING, 0, + GD_MSG_REBALANCE_ID_MISSING, "%s", msg); + ret = 0; + } else { + gf_uuid_parse(task_id_str, volinfo->rebal.rebalance_id); + ret = glusterd_copy_uuid_to_dict(volinfo->rebal.rebalance_id, + rsp_dict, GF_REBALANCE_TID_KEY, + SLEN(GF_REBALANCE_TID_KEY)); + if (ret) { + snprintf(msg, sizeof(msg), + "Failed to set rebalance id for volume %s", + volname); + gf_msg(this->name, GF_LOG_WARNING, 0, + GD_MSG_DICT_SET_FAILED, "%s", msg); + } + } } - - if (glusterd_is_rb_started (volinfo) || - glusterd_is_rb_paused (volinfo)) { - gf_log ("glusterd", GF_LOG_DEBUG, - "Replace brick is in progress on volume %s", - volinfo->volname); - snprintf (op_errstr, len, "Replace brick is in progress on " - "volume %s", volinfo->volname); + } + + /* Set task-id, if available, in rsp_dict for operations other than + * start. This is needed when we want rebalance id in xml output + */ + if (cmd == GF_DEFRAG_CMD_STATUS || cmd == GF_DEFRAG_CMD_STOP) { + if (!gf_uuid_is_null(volinfo->rebal.rebalance_id)) { + if (GD_OP_REMOVE_BRICK == volinfo->rebal.op) + ret = glusterd_copy_uuid_to_dict( + volinfo->rebal.rebalance_id, rsp_dict, + GF_REMOVE_BRICK_TID_KEY, SLEN(GF_REMOVE_BRICK_TID_KEY)); + else + ret = glusterd_copy_uuid_to_dict(volinfo->rebal.rebalance_id, + rsp_dict, GF_REBALANCE_TID_KEY, + SLEN(GF_REBALANCE_TID_KEY)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set task-id for volume %s", volname); goto out; + } } - ret = 0; + } out: - gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + return ret; } int -glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, - size_t len, int cmd) +glusterd_mgmt_v3_op_stage_rebalance(dict_t *dict, char **op_errstr) { - int ret = -1; - glusterd_defrag_info_t *defrag = NULL; - char cmd_str[4096] = {0,}; - glusterd_conf_t *priv = NULL; - - priv = THIS->private; - - GF_ASSERT (volinfo); - GF_ASSERT (op_errstr); - - ret = glusterd_defrag_start_validate (volinfo, op_errstr, len); - if (ret) + char *volname = NULL; + char *cmd_str = NULL; + int ret = 0; + int32_t cmd = 0; + char msg[2048] = {0}; + glusterd_volinfo_t *volinfo = NULL; + char *task_id_str = NULL; + xlator_t *this = 0; + + this = THIS; + GF_ASSERT(this); + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg_debug(this->name, 0, "volname not found"); + goto out; + } + + ret = dict_get_int32n(dict, "rebalance-command", SLEN("rebalance-command"), + &cmd); + if (ret) { + gf_msg_debug(this->name, 0, "cmd not found"); + goto out; + } + + ret = glusterd_rebalance_cmd_validate(cmd, volname, &volinfo, msg, + sizeof(msg)); + if (ret) { + gf_msg_debug(this->name, 0, "failed to validate"); + goto out; + } + switch (cmd) { + case GF_DEFRAG_CMD_START: + case GF_DEFRAG_CMD_START_LAYOUT_FIX: + /* Check if the connected clients are all of version + * glusterfs-3.6 and higher. This is needed to prevent some data + * loss issues that could occur when older clients are connected + * when rebalance is run. This check can be bypassed by using + * 'force' + */ + ret = glusterd_check_client_op_version_support( + volname, GD_OP_VERSION_3_6_0, NULL); + if (ret) { + ret = gf_asprintf(op_errstr, + "Volume %s has one or " + "more connected clients of a version" + " lower than GlusterFS-v3.6.0. " + "Starting rebalance in this state " + "could lead to data loss.\nPlease " + "disconnect those clients before " + "attempting this command again.", + volname); goto out; - if (!volinfo->defrag) - volinfo->defrag = GF_CALLOC (1, sizeof (glusterd_defrag_info_t), - gf_gld_mt_defrag_info); - if (!volinfo->defrag) + } + /* Fall through */ + case GF_DEFRAG_CMD_START_FORCE: + if (is_origin_glusterd(dict)) { + ret = glusterd_generate_and_set_task_id( + dict, GF_REBALANCE_TID_KEY, SLEN(GF_REBALANCE_TID_KEY)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TASKID_GEN_FAIL, + "Failed to generate task-id"); + goto out; + } + } else { + ret = dict_get_strn(dict, GF_REBALANCE_TID_KEY, + SLEN(GF_REBALANCE_TID_KEY), &task_id_str); + if (ret) { + snprintf(msg, sizeof(msg), "Missing rebalance-id"); + gf_msg(this->name, GF_LOG_WARNING, 0, + GD_MSG_REBALANCE_ID_MISSING, "%s", msg); + ret = 0; + } + } + ret = glusterd_defrag_start_validate(volinfo, msg, sizeof(msg), + GD_OP_REBALANCE); + if (ret) { + gf_msg_debug(this->name, 0, + "defrag start validate " + "failed for volume %s.", + volinfo->volname); goto out; + } + break; + case GF_DEFRAG_CMD_STATUS: + case GF_DEFRAG_CMD_STOP: - defrag = volinfo->defrag; - - defrag->cmd = cmd; - - LOCK_INIT (&defrag->lock); - snprintf (defrag->mount, 1024, "%s/mount/%s", - priv->workdir, volinfo->volname); - /* Create a directory, mount glusterfs over it, start glusterfs-defrag */ - snprintf (cmd_str, sizeof (cmd_str), "mkdir -p %s", defrag->mount); - ret = system (cmd_str); - - if (ret) { - gf_log("glusterd", GF_LOG_DEBUG, "command: %s failed", cmd_str); + ret = dict_get_strn(dict, "cmd-str", SLEN("cmd-str"), &cmd_str); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get " + "command string"); + ret = -1; goto out; - } - - snprintf (cmd_str, sizeof (cmd_str), - "%s/sbin/glusterfs -s localhost --volfile-id %s " - "--xlator-option *dht.use-readdirp=yes " - "--xlator-option *dht.lookup-unhashed=yes %s", - GFS_PREFIX, volinfo->volname, - defrag->mount); - ret = gf_system (cmd_str); - if (ret) { - gf_log("glusterd", GF_LOG_DEBUG, "command: %s failed", cmd_str); + } + if ((strstr(cmd_str, "rebalance") != NULL) && + (volinfo->rebal.op != GD_OP_REBALANCE)) { + snprintf(msg, sizeof(msg), + "Rebalance not started " + "for volume %s.", + volinfo->volname); + ret = -1; goto out; - } + } + + if (strstr(cmd_str, "remove-brick") != NULL) { + if (volinfo->rebal.op != GD_OP_REMOVE_BRICK) { + snprintf(msg, sizeof(msg), + "remove-brick not " + "started for volume %s.", + volinfo->volname); + ret = -1; + goto out; + } - volinfo->defrag_status = GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED; + /* For remove-brick status/stop command check whether + * given input brick is part of volume or not.*/ - ret = pthread_create (&defrag->th, NULL, glusterd_defrag_start, - volinfo); - if (ret) { - snprintf (cmd_str, sizeof (cmd_str), "umount -l %s", defrag->mount); - if (system (cmd_str)) - gf_log("glusterd", GF_LOG_DEBUG, "command: %s " - "failed", cmd_str); - } + ret = dict_foreach_fnmatch(dict, "brick*", + glusterd_brick_validation, volinfo); + if (ret == -1) { + snprintf(msg, sizeof(msg), + "Incorrect brick" + " for volume %s", + volinfo->volname); + goto out; + } + } + break; + + default: + break; + } + + ret = 0; out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + if (ret && op_errstr && msg[0]) + *op_errstr = gf_strdup(msg); + + return ret; } int -glusterd_rebalance_cmd_validate (int cmd, char *volname, - glusterd_volinfo_t **volinfo, - char *op_errstr, size_t len) +glusterd_mgmt_v3_op_rebalance(dict_t *dict, char **op_errstr, dict_t *rsp_dict) { - int ret = -1; + char *volname = NULL; + int ret = 0; + int32_t cmd = 0; + char msg[2048] = {0}; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_brickinfo_t *tmp = NULL; + gf_boolean_t volfile_update = _gf_false; + char *task_id_str = NULL; + xlator_t *this = NULL; + uint32_t commit_hash; + int32_t is_force = 0; + + this = THIS; + GF_ASSERT(this); + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg_debug(this->name, 0, "volname not given"); + goto out; + } + + ret = dict_get_int32n(dict, "rebalance-command", SLEN("rebalance-command"), + &cmd); + if (ret) { + gf_msg_debug(this->name, 0, "command not given"); + goto out; + } + + ret = glusterd_rebalance_cmd_validate(cmd, volname, &volinfo, msg, + sizeof(msg)); + if (ret) { + gf_msg_debug(this->name, 0, "cmd validate failed"); + goto out; + } + + switch (cmd) { + case GF_DEFRAG_CMD_START: + case GF_DEFRAG_CMD_START_LAYOUT_FIX: + case GF_DEFRAG_CMD_START_FORCE: + + ret = dict_get_int32n(dict, "force", SLEN("force"), &is_force); + if (ret) + is_force = 0; + if (!is_force) { + /* Reset defrag status to 'NOT STARTED' whenever a + * remove-brick/rebalance command is issued to remove + * stale information from previous run. + */ + volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_NOT_STARTED; + + ret = dict_get_strn(dict, GF_REBALANCE_TID_KEY, + SLEN(GF_REBALANCE_TID_KEY), &task_id_str); + if (ret) { + gf_msg_debug(this->name, 0, + "Missing rebalance" + " id"); + ret = 0; + } else { + gf_uuid_parse(task_id_str, volinfo->rebal.rebalance_id); + volinfo->rebal.op = GD_OP_REBALANCE; + } + if (!gd_should_i_start_rebalance(volinfo)) { + /* Store the rebalance-id and rebalance command + * even if the peer isn't starting a rebalance + * process. On peers where a rebalance process + * is started, glusterd_handle_defrag_start + * performs the storing. + * Storing this is needed for having + * 'volume status' work correctly. + */ + glusterd_store_perform_node_state_store(volinfo); + break; + } + if (dict_get_uint32(dict, "commit-hash", &commit_hash) == 0) { + volinfo->rebal.commit_hash = commit_hash; + } + ret = glusterd_handle_defrag_start(volinfo, msg, sizeof(msg), + cmd, NULL, GD_OP_REBALANCE); + break; + } else { + /* Reset defrag status to 'STARTED' so that the + * pid is checked and restarted accordingly. + * If the pid is not running it executes the + * "NOT_STARTED" case and restarts the process + */ + volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_STARTED; + volinfo->rebal.defrag_cmd = cmd; + volinfo->rebal.op = GD_OP_REBALANCE; + + ret = dict_get_strn(dict, GF_REBALANCE_TID_KEY, + SLEN(GF_REBALANCE_TID_KEY), &task_id_str); + if (ret) { + gf_msg_debug(this->name, 0, + "Missing rebalance" + " id"); + ret = 0; + } else { + gf_uuid_parse(task_id_str, volinfo->rebal.rebalance_id); + volinfo->rebal.op = GD_OP_REBALANCE; + } + if (dict_get_uint32(dict, "commit-hash", &commit_hash) == 0) { + volinfo->rebal.commit_hash = commit_hash; + } + ret = glusterd_restart_rebalance_for_volume(volinfo); + break; + } + case GF_DEFRAG_CMD_STOP: + /* Clear task-id only on explicitly stopping rebalance. + * Also clear the stored operation, so it doesn't cause trouble + * with future rebalance/remove-brick starts + */ + gf_uuid_clear(volinfo->rebal.rebalance_id); + volinfo->rebal.op = GD_OP_NONE; + + /* Fall back to the old volume file in case of decommission*/ + cds_list_for_each_entry_safe(brickinfo, tmp, &volinfo->bricks, + brick_list) + { + if (!brickinfo->decommissioned) + continue; + brickinfo->decommissioned = 0; + volfile_update = _gf_true; + } + + if (volfile_update == _gf_false) { + ret = 0; + break; + } - if (glusterd_volinfo_find(volname, volinfo)) { - gf_log ("glusterd", GF_LOG_ERROR, "Received rebalance on invalid" - " volname %s", volname); - snprintf (op_errstr, len, "Volume %s does not exist", - volname); + ret = glusterd_create_volfiles_and_notify_services(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, + GD_MSG_VOLFILE_CREATE_FAIL, "failed to create volfiles"); goto out; - } + } - if ((*volinfo)->status != GLUSTERD_STATUS_STARTED) { - gf_log ("glusterd", GF_LOG_ERROR, "Received rebalance on stopped" - " volname %s", volname); - snprintf (op_errstr, len, "Volume %s needs to " - "be started to perform rebalance", volname); + ret = glusterd_store_volinfo(volinfo, + GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOLINFO_SET_FAIL, + "failed to store volinfo"); goto out; - } - ret = 0; + } + + ret = 0; + break; + + case GF_DEFRAG_CMD_STATUS: + break; + default: + break; + } + out: - gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + if (ret && op_errstr && msg[0]) + *op_errstr = gf_strdup(msg); + + return ret; } int -glusterd_handle_defrag_volume_v2 (rpcsvc_request_t *req) +glusterd_op_stage_rebalance(dict_t *dict, char **op_errstr) { - int32_t ret = -1; - gf1_cli_defrag_vol_req cli_req = {0,}; - glusterd_volinfo_t *volinfo = NULL; - gf2_cli_defrag_vol_rsp rsp = {0,}; - char msg[2048] = {0}; - glusterd_conf_t *priv = NULL; - - GF_ASSERT (req); - - priv = THIS->private; - if (!gf_xdr_to_cli_defrag_vol_req (req->msg[0], &cli_req)) { - //failed to decode msg; - req->rpc_err = GARBAGE_ARGS; + char *volname = NULL; + char *cmd_str = NULL; + int ret = 0; + int32_t cmd = 0; + char msg[2048] = {0}; + glusterd_volinfo_t *volinfo = NULL; + char *task_id_str = NULL; + dict_t *op_ctx = NULL; + xlator_t *this = 0; + + this = THIS; + GF_ASSERT(this); + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg_debug(this->name, 0, "volname not found"); + goto out; + } + + ret = dict_get_int32n(dict, "rebalance-command", SLEN("rebalance-command"), + &cmd); + if (ret) { + gf_msg_debug(this->name, 0, "cmd not found"); + goto out; + } + + ret = glusterd_rebalance_cmd_validate(cmd, volname, &volinfo, msg, + sizeof(msg)); + if (ret) { + gf_msg_debug(this->name, 0, "failed to validate"); + goto out; + } + switch (cmd) { + case GF_DEFRAG_CMD_START: + case GF_DEFRAG_CMD_START_LAYOUT_FIX: + /* Check if the connected clients are all of version + * glusterfs-3.6 and higher. This is needed to prevent some data + * loss issues that could occur when older clients are connected + * when rebalance is run. This check can be bypassed by using + * 'force' + */ + ret = glusterd_check_client_op_version_support( + volname, GD_OP_VERSION_3_6_0, NULL); + if (ret) { + ret = gf_asprintf(op_errstr, + "Volume %s has one or " + "more connected clients of a version" + " lower than GlusterFS-v3.6.0. " + "Starting rebalance in this state " + "could lead to data loss.\nPlease " + "disconnect those clients before " + "attempting this command again.", + volname); goto out; - } - - glusterd_rebalance_cmd_attempted_log (cli_req.cmd, cli_req.volname); + } + /* Fall through */ + case GF_DEFRAG_CMD_START_FORCE: + if (is_origin_glusterd(dict)) { + op_ctx = glusterd_op_get_ctx(); + if (!op_ctx) { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OPCTX_GET_FAIL, + "Failed to get op_ctx"); + goto out; + } - rsp.volname = cli_req.volname; - rsp.op_ret = -1; - rsp.op_errstr = msg; + ret = glusterd_generate_and_set_task_id( + op_ctx, GF_REBALANCE_TID_KEY, SLEN(GF_REBALANCE_TID_KEY)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TASKID_GEN_FAIL, + "Failed to generate task-id"); + goto out; + } + } else { + ret = dict_get_strn(dict, GF_REBALANCE_TID_KEY, + SLEN(GF_REBALANCE_TID_KEY), &task_id_str); + if (ret) { + snprintf(msg, sizeof(msg), "Missing rebalance-id"); + gf_msg(this->name, GF_LOG_WARNING, 0, + GD_MSG_REBALANCE_ID_MISSING, "%s", msg); + ret = 0; + } + } + ret = glusterd_defrag_start_validate(volinfo, msg, sizeof(msg), + GD_OP_REBALANCE); + if (ret) { + gf_msg_debug(this->name, 0, + "defrag start validate " + "failed for volume %s.", + volinfo->volname); + goto out; + } + break; + case GF_DEFRAG_CMD_STATUS: + case GF_DEFRAG_CMD_STOP: - ret = glusterd_rebalance_cmd_validate (cli_req.cmd, cli_req.volname, - &volinfo, msg, sizeof (msg)); - if (ret) + ret = dict_get_strn(dict, "cmd-str", SLEN("cmd-str"), &cmd_str); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get " + "command string"); + ret = -1; goto out; + } + if ((strstr(cmd_str, "rebalance") != NULL) && + (volinfo->rebal.op != GD_OP_REBALANCE)) { + snprintf(msg, sizeof(msg), + "Rebalance not started " + "for volume %s.", + volinfo->volname); + ret = -1; + goto out; + } + + if (strstr(cmd_str, "remove-brick") != NULL) { + if (volinfo->rebal.op != GD_OP_REMOVE_BRICK) { + snprintf(msg, sizeof(msg), + "remove-brick not " + "started for volume %s.", + volinfo->volname); + ret = -1; + goto out; + } + + /* For remove-brick status/stop command check whether + * given input brick is part of volume or not.*/ + + ret = dict_foreach_fnmatch(dict, "brick*", + glusterd_brick_validation, volinfo); + if (ret == -1) { + snprintf(msg, sizeof(msg), + "Incorrect brick" + " for volume %s", + volinfo->volname); + goto out; + } + } + break; - switch (cli_req.cmd) { - case GF_DEFRAG_CMD_START: - case GF_DEFRAG_CMD_START_LAYOUT_FIX: - case GF_DEFRAG_CMD_START_MIGRATE_DATA: - ret = glusterd_handle_defrag_start (volinfo, msg, sizeof (msg), - cli_req.cmd); - rsp.op_ret = ret; - break; - case GF_DEFRAG_CMD_STOP: - ret = glusterd_defrag_stop (volinfo, &rsp.files, &rsp.size, - msg, sizeof (msg)); - rsp.op_ret = ret; - break; - case GF_DEFRAG_CMD_STATUS: - ret = glusterd_defrag_status_get_v2 (volinfo, &rsp); - break; default: - break; - } - glusterd_rebalance_cmd_log (cli_req.cmd, cli_req.volname, rsp.op_ret); -out: + break; + } - ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - gf_xdr_serialize_cli_defrag_vol_rsp_v2); - if (cli_req.volname) - free (cli_req.volname);//malloced by xdr + ret = 0; +out: + if (ret && op_errstr && msg[0]) + *op_errstr = gf_strdup(msg); - return 0; + return ret; } int -glusterd_handle_defrag_volume (rpcsvc_request_t *req) +glusterd_op_rebalance(dict_t *dict, char **op_errstr, dict_t *rsp_dict) { - int32_t ret = -1; - gf1_cli_defrag_vol_req cli_req = {0,}; - glusterd_conf_t *priv = NULL; - char cmd_str[4096] = {0,}; - glusterd_volinfo_t *volinfo = NULL; - gf1_cli_defrag_vol_rsp rsp = {0,}; - char msg[2048] = {0}; - - GF_ASSERT (req); - - priv = THIS->private; - - if (!gf_xdr_to_cli_defrag_vol_req (req->msg[0], &cli_req)) { - //failed to decode msg; - req->rpc_err = GARBAGE_ARGS; + char *volname = NULL; + int ret = 0; + int32_t cmd = 0; + char msg[2048] = {0}; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_brickinfo_t *tmp = NULL; + gf_boolean_t volfile_update = _gf_false; + char *task_id_str = NULL; + dict_t *ctx = NULL; + xlator_t *this = NULL; + uint32_t commit_hash; + int32_t is_force = 0; + + this = THIS; + GF_ASSERT(this); + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg_debug(this->name, 0, "volname not given"); + goto out; + } + + ret = dict_get_int32n(dict, "rebalance-command", SLEN("rebalance-command"), + &cmd); + if (ret) { + gf_msg_debug(this->name, 0, "command not given"); + goto out; + } + + ret = glusterd_rebalance_cmd_validate(cmd, volname, &volinfo, msg, + sizeof(msg)); + if (ret) { + gf_msg_debug(this->name, 0, "cmd validate failed"); + goto out; + } + + /* Set task-id, if available, in op_ctx dict for operations other than + * start + */ + if (cmd == GF_DEFRAG_CMD_STATUS || cmd == GF_DEFRAG_CMD_STOP) { + if (!gf_uuid_is_null(volinfo->rebal.rebalance_id)) { + ctx = glusterd_op_get_ctx(); + if (!ctx) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OPCTX_GET_FAIL, + "Failed to get op_ctx"); + ret = -1; goto out; + } + + if (GD_OP_REMOVE_BRICK == volinfo->rebal.op) + ret = glusterd_copy_uuid_to_dict(volinfo->rebal.rebalance_id, + ctx, GF_REMOVE_BRICK_TID_KEY, + SLEN(GF_REMOVE_BRICK_TID_KEY)); + else + ret = glusterd_copy_uuid_to_dict(volinfo->rebal.rebalance_id, + ctx, GF_REBALANCE_TID_KEY, + SLEN(GF_REBALANCE_TID_KEY)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TASKID_GEN_FAIL, + "Failed to set task-id"); + goto out; + } } + } - glusterd_rebalance_cmd_attempted_log (cli_req.cmd, cli_req.volname); - - rsp.volname = cli_req.volname; - rsp.op_ret = -1; - - ret = glusterd_rebalance_cmd_validate (cli_req.cmd, cli_req.volname, - &volinfo, msg, sizeof (msg)); - if (ret) - goto out; - switch (cli_req.cmd) { + switch (cmd) { case GF_DEFRAG_CMD_START: case GF_DEFRAG_CMD_START_LAYOUT_FIX: - case GF_DEFRAG_CMD_START_MIGRATE_DATA: - { - ret = glusterd_handle_defrag_start (volinfo, msg, sizeof (msg), - cli_req.cmd); - rsp.op_ret = ret; + case GF_DEFRAG_CMD_START_FORCE: + + ret = dict_get_int32n(dict, "force", SLEN("force"), &is_force); + if (ret) + is_force = 0; + if (!is_force) { + /* Reset defrag status to 'NOT STARTED' whenever a + * remove-brick/rebalance command is issued to remove + * stale information from previous run. + */ + volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_NOT_STARTED; + + ret = dict_get_strn(dict, GF_REBALANCE_TID_KEY, + SLEN(GF_REBALANCE_TID_KEY), &task_id_str); + if (ret) { + gf_msg_debug(this->name, 0, + "Missing rebalance" + " id"); + ret = 0; + } else { + gf_uuid_parse(task_id_str, volinfo->rebal.rebalance_id); + volinfo->rebal.op = GD_OP_REBALANCE; + } + if (!gd_should_i_start_rebalance(volinfo)) { + /* Store the rebalance-id and rebalance command + * even if the peer isn't starting a rebalance + * process. On peers where a rebalance process + * is started, glusterd_handle_defrag_start + * performs the storing. + * Storing this is needed for having + * 'volume status' work correctly. + */ + glusterd_store_perform_node_state_store(volinfo); + break; + } + if (dict_get_uint32(dict, "commit-hash", &commit_hash) == 0) { + volinfo->rebal.commit_hash = commit_hash; + } + ret = glusterd_handle_defrag_start(volinfo, msg, sizeof(msg), + cmd, NULL, GD_OP_REBALANCE); break; - } + } else { + /* Reset defrag status to 'STARTED' so that the + * pid is checked and restarted accordingly. + * If the pid is not running it executes the + * "NOT_STARTED" case and restarts the process + */ + volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_STARTED; + volinfo->rebal.defrag_cmd = cmd; + volinfo->rebal.op = GD_OP_REBALANCE; + + ret = dict_get_strn(dict, GF_REBALANCE_TID_KEY, + SLEN(GF_REBALANCE_TID_KEY), &task_id_str); + if (ret) { + gf_msg_debug(this->name, 0, + "Missing rebalance" + " id"); + ret = 0; + } else { + gf_uuid_parse(task_id_str, volinfo->rebal.rebalance_id); + volinfo->rebal.op = GD_OP_REBALANCE; + } + if (dict_get_uint32(dict, "commit-hash", &commit_hash) == 0) { + volinfo->rebal.commit_hash = commit_hash; + } + ret = glusterd_restart_rebalance_for_volume(volinfo); + break; + } case GF_DEFRAG_CMD_STOP: - ret = glusterd_defrag_stop (volinfo, &rsp.files, &rsp.size, - msg, sizeof (msg)); - rsp.op_ret = ret; + /* Clear task-id only on explicitly stopping rebalance. + * Also clear the stored operation, so it doesn't cause trouble + * with future rebalance/remove-brick starts + */ + gf_uuid_clear(volinfo->rebal.rebalance_id); + volinfo->rebal.op = GD_OP_NONE; + + /* Fall back to the old volume file in case of decommission*/ + cds_list_for_each_entry_safe(brickinfo, tmp, &volinfo->bricks, + brick_list) + { + if (!brickinfo->decommissioned) + continue; + brickinfo->decommissioned = 0; + volfile_update = _gf_true; + } + + if (volfile_update == _gf_false) { + ret = 0; break; + } + + ret = glusterd_create_volfiles_and_notify_services(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, + GD_MSG_VOLFILE_CREATE_FAIL, "failed to create volfiles"); + goto out; + } + + ret = glusterd_store_volinfo(volinfo, + GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOLINFO_SET_FAIL, + "failed to store volinfo"); + goto out; + } + + ret = 0; + break; + case GF_DEFRAG_CMD_STATUS: - ret = glusterd_defrag_status_get (volinfo, &rsp); - break; + break; default: - break; - } - if (ret) - gf_log("glusterd", GF_LOG_DEBUG, "command: %s failed",cmd_str); - - if (cli_req.cmd != GF_DEFRAG_CMD_STATUS) { - gf_cmd_log ("volume rebalance"," on volname: %s %d %s", - cli_req.volname, - cli_req.cmd, ((ret)?"FAILED":"SUCCESS")); - } + break; + } out: - ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - gf_xdr_serialize_cli_defrag_vol_rsp); - if (cli_req.volname) - free (cli_req.volname);//malloced by xdr + if (ret && op_errstr && msg[0]) + *op_errstr = gf_strdup(msg); - return 0; + return ret; +} + +int32_t +glusterd_defrag_event_notify_handle(dict_t *dict) +{ + glusterd_volinfo_t *volinfo = NULL; + char *volname = NULL; + char *volname_ptr = NULL; + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(dict); + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get volname"); + return ret; + } + + volname_ptr = strstr(volname, "rebalance/"); + if (volname_ptr) { + volname_ptr = strchr(volname_ptr, '/'); + volname = volname_ptr + 1; + } else { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_REBALANCE_PFX_IN_VOLNAME, + "volname received (%s) is not prefixed with rebalance.", + volname); + ret = -1; + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, + "Failed to get volinfo for %s", volname); + return ret; + } + + ret = glusterd_defrag_volume_status_update(volinfo, dict, 0); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DEFRAG_STATUS_UPDATE_FAIL, + "Failed to update status"); + gf_event(EVENT_REBALANCE_STATUS_UPDATE_FAILED, "volume=%s", + volinfo->volname); + } + +out: + return ret; } diff --git a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c new file mode 100644 index 00000000000..43c2f4373e0 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c @@ -0,0 +1,716 @@ +/* + Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#include <glusterfs/common-utils.h> +#include "cli1-xdr.h" +#include "xdr-generic.h" +#include <glusterfs/glusterfs.h> +#include "glusterd.h" +#include "glusterd-op-sm.h" +#include "glusterd-geo-rep.h" +#include "glusterd-store.h" +#include "glusterd-utils.h" +#include "glusterd-svc-mgmt.h" +#include "glusterd-svc-helper.h" +#include "glusterd-volgen.h" +#include "glusterd-messages.h" +#include "glusterd-server-quorum.h" +#include "glusterd-mgmt.h" +#include <glusterfs/run.h> +#include <glusterfs/syscall.h> + +#include <signal.h> + +int +glusterd_mgmt_v3_initiate_replace_brick_cmd_phases(rpcsvc_request_t *req, + glusterd_op_t op, + dict_t *dict); +int +__glusterd_handle_replace_brick(rpcsvc_request_t *req) +{ + int32_t ret = -1; + gf_cli_req cli_req = {{ + 0, + }}; + dict_t *dict = NULL; + char *src_brick = NULL; + char *dst_brick = NULL; + char *cli_op = NULL; + glusterd_op_t op = -1; + char *volname = NULL; + char msg[256] = { + 0, + }; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + + GF_ASSERT(req); + this = THIS; + GF_ASSERT(this); + conf = this->private; + + ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + // failed to decode msg; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, + "Failed to decode " + "request received from cli"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_REPLACE_BRK_REQ_RCVD, + "Received replace brick req"); + + if (cli_req.dict.dict_len) { + /* Unserialize the dictionary */ + dict = dict_new(); + + ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + "failed to " + "unserialize req-buffer to dictionary"); + snprintf(msg, sizeof(msg), + "Unable to decode the " + "command"); + goto out; + } + } + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + snprintf(msg, sizeof(msg), "Could not get volume name"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", msg); + goto out; + } + + ret = dict_get_strn(dict, "operation", SLEN("operation"), &cli_op); + if (ret) { + gf_msg_debug(this->name, 0, "dict_get on operation failed"); + snprintf(msg, sizeof(msg), "Could not get operation"); + goto out; + } + + op = gd_cli_to_gd_op(cli_op); + + if (conf->op_version < GD_OP_VERSION_3_9_0 && + strcmp(cli_op, "GF_REPLACE_OP_COMMIT_FORCE")) { + snprintf(msg, sizeof(msg), + "Cannot execute command. The " + "cluster is operating at version %d. reset-brick " + "command %s is unavailable in this version.", + conf->op_version, gd_rb_op_to_str(cli_op)); + ret = -1; + goto out; + } + + ret = dict_get_strn(dict, "src-brick", SLEN("src-brick"), &src_brick); + + if (ret) { + snprintf(msg, sizeof(msg), "Failed to get src brick"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", msg); + goto out; + } + gf_msg_debug(this->name, 0, "src brick=%s", src_brick); + + if (!strcmp(cli_op, "GF_RESET_OP_COMMIT") || + !strcmp(cli_op, "GF_RESET_OP_COMMIT_FORCE") || + !strcmp(cli_op, "GF_REPLACE_OP_COMMIT_FORCE")) { + ret = dict_get_strn(dict, "dst-brick", SLEN("dst-brick"), &dst_brick); + + if (ret) { + snprintf(msg, sizeof(msg), + "Failed to get" + "dest brick"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", + msg); + goto out; + } + + gf_msg_debug(this->name, 0, "dst brick=%s", dst_brick); + } + + gf_msg(this->name, GF_LOG_INFO, 0, + (op == GD_OP_REPLACE_BRICK) + ? GD_MSG_REPLACE_BRK_COMMIT_FORCE_REQ_RCVD + : GD_MSG_RESET_BRICK_COMMIT_FORCE_REQ_RCVD, + "Received %s request.", gd_rb_op_to_str(cli_op)); + + ret = glusterd_mgmt_v3_initiate_replace_brick_cmd_phases(req, op, dict); + +out: + if (ret) { + glusterd_op_send_cli_response(op, ret, 0, req, dict, msg); + } + ret = 0; + free(cli_req.dict.dict_val); // malloced by xdr + + return ret; +} + +int +glusterd_handle_reset_brick(rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler(req, __glusterd_handle_replace_brick); +} + +int +glusterd_handle_replace_brick(rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler(req, __glusterd_handle_replace_brick); +} + +int +glusterd_op_stage_replace_brick(dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + int ret = 0; + char *src_brick = NULL; + char *dst_brick = NULL; + char *volname = NULL; + char *op = NULL; + glusterd_op_t gd_op = -1; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *src_brickinfo = NULL; + char *host = NULL; + char msg[2048] = {0}; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_brickinfo_t *dst_brickinfo = NULL; + glusterd_conf_t *priv = NULL; + char pidfile[PATH_MAX] = {0}; + xlator_t *this = NULL; + gf_boolean_t is_force = _gf_false; + char *dup_dstbrick = NULL; + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + GF_ASSERT(priv); + + ret = glusterd_brick_op_prerequisites(dict, &op, &gd_op, &volname, &volinfo, + &src_brick, &src_brickinfo, pidfile, + op_errstr, rsp_dict); + if (ret) + goto out; + + if (volinfo->type == GF_CLUSTER_TYPE_NONE) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OP_NOT_PERMITTED, + "replace-brick is not permitted on distribute only " + "volumes"); + gf_asprintf(op_errstr, + "replace-brick is not permitted on " + "distribute only volumes. Please use add-brick " + "and remove-brick operations instead."); + ret = -1; + goto out; + } + ret = glusterd_validate_quorum(this, gd_op, dict, op_errstr); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SERVER_QUORUM_NOT_MET, + "Server quorum not met. Rejecting operation."); + goto out; + } + + if (strcmp(op, "GF_REPLACE_OP_COMMIT_FORCE")) { + ret = -1; + goto out; + } else { + is_force = _gf_true; + } + + if (volinfo->snap_count > 0 || !cds_list_empty(&volinfo->snap_volumes)) { + snprintf(msg, sizeof(msg), + "Volume %s has %" PRIu64 + " snapshots. " + "Changing the volume configuration will not effect snapshots." + "But the snapshot brick mount should be intact to " + "make them function.", + volname, volinfo->snap_count); + gf_msg("glusterd", GF_LOG_WARNING, 0, GD_MSG_SNAP_WARN, "%s", msg); + msg[0] = '\0'; + } + + glusterd_add_peers_to_auth_list(volname); + + ret = glusterd_get_dst_brick_info(&dst_brick, volname, op_errstr, + &dst_brickinfo, &host, dict, + &dup_dstbrick); + if (ret) + goto out; + + ret = glusterd_new_brick_validate(dst_brick, dst_brickinfo, msg, + sizeof(msg), op); + /* fail if brick being replaced with itself */ + if (ret) { + *op_errstr = gf_strdup(msg); + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_VALIDATE_FAIL, "%s", + *op_errstr); + goto out; + } + + volinfo->rep_brick.src_brick = src_brickinfo; + volinfo->rep_brick.dst_brick = dst_brickinfo; + + if (glusterd_rb_check_bricks(volinfo, src_brickinfo, dst_brickinfo)) { + ret = -1; + *op_errstr = gf_strdup( + "Incorrect source or " + "destination brick"); + if (*op_errstr) + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_BRICK_NOT_FOUND, + "%s", *op_errstr); + goto out; + } + + if (gf_is_local_addr(host)) { + ret = glusterd_validate_and_create_brickpath( + dst_brickinfo, volinfo->volume_id, volinfo->volname, op_errstr, + is_force, _gf_false); + if (ret) + goto out; + } + + if (!gf_is_local_addr(host)) { + RCU_READ_LOCK; + + peerinfo = glusterd_peerinfo_find(NULL, host); + if (peerinfo == NULL) { + RCU_READ_UNLOCK; + ret = -1; + snprintf(msg, sizeof(msg), "%s, is not a friend", host); + *op_errstr = gf_strdup(msg); + goto out; + + } else if (!peerinfo->connected) { + RCU_READ_UNLOCK; + ret = -1; + snprintf(msg, sizeof(msg), + "%s, is not connected at " + "the moment", + host); + *op_errstr = gf_strdup(msg); + goto out; + + } else if (GD_FRIEND_STATE_BEFRIENDED != peerinfo->state.state) { + RCU_READ_UNLOCK; + ret = -1; + snprintf(msg, sizeof(msg), + "%s, is not befriended " + "at the moment", + host); + *op_errstr = gf_strdup(msg); + goto out; + } + RCU_READ_UNLOCK; + + } else if (priv->op_version >= GD_OP_VERSION_3_6_0) { + /* A bricks mount dir is required only by snapshots which were + * introduced in gluster-3.6.0 + */ + + if (!(gf_uuid_compare(dst_brickinfo->uuid, MY_UUID))) { + ret = glusterd_get_brick_mount_dir(dst_brickinfo->path, + dst_brickinfo->hostname, + dst_brickinfo->mount_dir); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_BRICK_MOUNTDIR_GET_FAIL, + "Failed to get brick mount_dir"); + goto out; + } + ret = dict_set_dynstr_with_alloc(rsp_dict, "brick1.mount_dir", + dst_brickinfo->mount_dir); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set brick.mount_dir"); + goto out; + } + } + + ret = dict_set_int32n(rsp_dict, "brick_count", SLEN("brick_count"), 1); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set local_brick_count"); + goto out; + } + } + + ret = 0; + +out: + GF_FREE(dup_dstbrick); + gf_msg_debug(this->name, 0, "Returning %d", ret); + + return ret; +} + +int +glusterd_op_perform_replace_brick(glusterd_volinfo_t *volinfo, char *old_brick, + char *new_brick, dict_t *dict) +{ + char *brick_mount_dir = NULL; + glusterd_brickinfo_t *old_brickinfo = NULL; + glusterd_brickinfo_t *new_brickinfo = NULL; + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + struct statvfs brickstat = { + 0, + }; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(dict); + GF_ASSERT(volinfo); + + conf = this->private; + GF_ASSERT(conf); + + ret = glusterd_brickinfo_new_from_brick(new_brick, &new_brickinfo, _gf_true, + NULL); + if (ret) + goto out; + + ret = glusterd_resolve_brick(new_brickinfo); + if (ret) + goto out; + + if (!gf_uuid_compare(new_brickinfo->uuid, MY_UUID)) { + ret = sys_statvfs(new_brickinfo->path, &brickstat); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_STATVFS_FAILED, + "Failed to fetch disk utilization " + "from the brick (%s:%s). Please check the health of " + "the brick. Error code was %s", + new_brickinfo->hostname, new_brickinfo->path, + strerror(errno)); + + goto out; + } + new_brickinfo->statfs_fsid = brickstat.f_fsid; + } + + ret = glusterd_volume_brickinfo_get_by_brick(old_brick, volinfo, + &old_brickinfo, _gf_false); + if (ret) + goto out; + + (void)snprintf(new_brickinfo->brick_id, sizeof(new_brickinfo->brick_id), + "%s", old_brickinfo->brick_id); + new_brickinfo->port = old_brickinfo->port; + + /* A bricks mount dir is required only by snapshots which were + * introduced in gluster-3.6.0 + */ + if (conf->op_version >= GD_OP_VERSION_3_6_0) { + ret = dict_get_strn(dict, "brick1.mount_dir", SLEN("brick1.mount_dir"), + &brick_mount_dir); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, + GD_MSG_BRICK_MOUNTDIR_GET_FAIL, + "brick1.mount_dir not present"); + goto out; + } + (void)snprintf(new_brickinfo->mount_dir, + sizeof(new_brickinfo->mount_dir), "%s", brick_mount_dir); + } + + cds_list_add(&new_brickinfo->brick_list, &old_brickinfo->brick_list); + + volinfo->brick_count++; + + ret = glusterd_op_perform_remove_brick(volinfo, old_brick, 1, NULL); + if (ret) + goto out; + + /* if the volume is a replicate volume, do: */ + if (glusterd_is_volume_replicate(volinfo)) { + if (!gf_uuid_compare(new_brickinfo->uuid, MY_UUID)) { + ret = glusterd_handle_replicate_brick_ops(volinfo, new_brickinfo, + GD_OP_REPLACE_BRICK); + if (ret < 0) + goto out; + } + } + + ret = glusterd_create_volfiles_and_notify_services(volinfo); + if (ret) + goto out; + + if (GLUSTERD_STATUS_STARTED == volinfo->status) { + ret = glusterd_brick_start(volinfo, new_brickinfo, _gf_false, + _gf_false); + if (ret) + goto out; + } + +out: + + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; +} + +int +glusterd_op_replace_brick(dict_t *dict, dict_t *rsp_dict) +{ + int ret = 0; + char *replace_op = NULL; + glusterd_volinfo_t *volinfo = NULL; + char *volname = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + char *src_brick = NULL; + char *dst_brick = NULL; + glusterd_brickinfo_t *src_brickinfo = NULL; + glusterd_brickinfo_t *dst_brickinfo = NULL; + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + GF_ASSERT(priv); + + ret = dict_get_strn(dict, "src-brick", SLEN("src-brick"), &src_brick); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get src brick"); + goto out; + } + + gf_msg_debug(this->name, 0, "src brick=%s", src_brick); + + ret = dict_get_strn(dict, "dst-brick", SLEN("dst-brick"), &dst_brick); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get dst brick"); + goto out; + } + + gf_msg_debug(this->name, 0, "dst brick=%s", dst_brick); + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get volume name"); + goto out; + } + + ret = dict_get_strn(dict, "operation", SLEN("operation"), &replace_op); + if (ret) { + gf_msg_debug(this->name, 0, "dict_get on operation failed"); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "Unable to allocate memory"); + goto out; + } + + ret = glusterd_volume_brickinfo_get_by_brick(src_brick, volinfo, + &src_brickinfo, _gf_false); + if (ret) { + gf_msg_debug(this->name, 0, "Unable to get src-brickinfo"); + goto out; + } + + ret = glusterd_get_rb_dst_brickinfo(volinfo, &dst_brickinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RB_BRICKINFO_GET_FAIL, + "Unable to get " + "replace brick destination brickinfo"); + goto out; + } + + ret = glusterd_resolve_brick(dst_brickinfo); + if (ret) { + gf_msg_debug(this->name, 0, "Unable to resolve dst-brickinfo"); + goto out; + } + + ret = rb_update_dstbrick_port(dst_brickinfo, rsp_dict, dict); + if (ret) + goto out; + + if (strcmp(replace_op, "GF_REPLACE_OP_COMMIT_FORCE")) { + ret = -1; + goto out; + } + + ret = glusterd_svcs_stop(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLUSTER_SERVICES_STOP_FAIL, + "Unable to stop gluster services, ret: %d", ret); + } + + ret = glusterd_op_perform_replace_brick(volinfo, src_brick, dst_brick, + dict); + if (ret) { + gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_BRICK_ADD_FAIL, + "Unable to add dst-brick: " + "%s to volume: %s", + dst_brick, volinfo->volname); + (void)glusterd_svcs_manager(volinfo); + goto out; + } + + volinfo->rebal.defrag_status = 0; + + ret = glusterd_svcs_manager(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_CRITICAL, 0, + GD_MSG_GLUSTER_SERVICE_START_FAIL, + "Failed to start one or more gluster services."); + } + + ret = glusterd_fetchspec_notify(THIS); + glusterd_brickinfo_delete(volinfo->rep_brick.dst_brick); + volinfo->rep_brick.src_brick = NULL; + volinfo->rep_brick.dst_brick = NULL; + + if (!ret) + ret = glusterd_store_volinfo(volinfo, + GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RBOP_STATE_STORE_FAIL, + "Couldn't store" + " replace brick operation's state"); + +out: + return ret; +} + +int +glusterd_mgmt_v3_initiate_replace_brick_cmd_phases(rpcsvc_request_t *req, + glusterd_op_t op, + dict_t *dict) +{ + int32_t ret = -1; + int32_t op_ret = -1; + uint32_t txn_generation = 0; + uint32_t op_errno = 0; + char *op_errstr = NULL; + dict_t *req_dict = NULL; + dict_t *tmp_dict = NULL; + uuid_t *originator_uuid = NULL; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + gf_boolean_t is_acquired = _gf_false; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(req); + GF_ASSERT(dict); + conf = this->private; + GF_ASSERT(conf); + + txn_generation = conf->generation; + originator_uuid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); + if (!originator_uuid) { + ret = -1; + goto out; + } + + gf_uuid_copy(*originator_uuid, MY_UUID); + ret = dict_set_bin(dict, "originator_uuid", originator_uuid, + sizeof(uuid_t)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set originator_uuid."); + GF_FREE(originator_uuid); + goto out; + } + + ret = dict_set_int32n(dict, "is_synctasked", SLEN("is_synctasked"), + _gf_true); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set synctasked flag to true."); + goto out; + } + + tmp_dict = dict_new(); + if (!tmp_dict) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, + "Unable to create dict"); + goto out; + } + dict_copy(dict, tmp_dict); + + ret = glusterd_mgmt_v3_initiate_lockdown(op, dict, &op_errstr, &op_errno, + &is_acquired, txn_generation); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_LOCKDOWN_FAIL, + "mgmt_v3 lockdown failed."); + goto out; + } + + ret = glusterd_mgmt_v3_build_payload(&req_dict, &op_errstr, dict, op); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_PAYLOAD_BUILD_FAIL, + LOGSTR_BUILD_PAYLOAD, gd_op_list[op]); + if (op_errstr == NULL) + gf_asprintf(&op_errstr, OPERRSTR_BUILD_PAYLOAD); + goto out; + } + + ret = glusterd_mgmt_v3_pre_validate(op, req_dict, &op_errstr, &op_errno, + txn_generation); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PRE_VALIDATION_FAIL, + "Pre Validation Failed"); + goto out; + } + + ret = glusterd_mgmt_v3_commit(op, dict, req_dict, &op_errstr, &op_errno, + txn_generation); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMIT_OP_FAIL, + "Commit Op Failed"); + goto out; + } + + ret = 0; + +out: + op_ret = ret; + + (void)glusterd_mgmt_v3_release_peer_locks(op, dict, op_ret, &op_errstr, + is_acquired, txn_generation); + + if (is_acquired) { + ret = glusterd_multiple_mgmt_v3_unlock(tmp_dict, MY_UUID); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_UNLOCK_FAIL, + "Failed to release mgmt_v3 locks on " + "localhost."); + op_ret = ret; + } + } + /* SEND CLI RESPONSE */ + glusterd_op_send_cli_response(op, op_ret, op_errno, req, dict, op_errstr); + + if (req_dict) + dict_unref(req_dict); + + if (tmp_dict) + dict_unref(tmp_dict); + + if (op_errstr) { + GF_FREE(op_errstr); + op_errstr = NULL; + } + + return 0; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-reset-brick.c b/xlators/mgmt/glusterd/src/glusterd-reset-brick.c new file mode 100644 index 00000000000..e4d247a1d6c --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-reset-brick.c @@ -0,0 +1,376 @@ +/* + Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#include <glusterfs/common-utils.h> +#include "cli1-xdr.h" +#include "xdr-generic.h" +#include <glusterfs/glusterfs.h> +#include "glusterd.h" +#include "glusterd-op-sm.h" +#include "glusterd-geo-rep.h" +#include "glusterd-store.h" +#include "glusterd-utils.h" +#include "glusterd-svc-mgmt.h" +#include "glusterd-svc-helper.h" +#include "glusterd-volgen.h" +#include "glusterd-messages.h" +#include "glusterd-mgmt.h" +#include <glusterfs/run.h> +#include <glusterfs/syscall.h> + +#include <signal.h> + +int +glusterd_reset_brick_prevalidate(dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + int ret = 0; + char *src_brick = NULL; + char *dst_brick = NULL; + char *volname = NULL; + char *op = NULL; + glusterd_op_t gd_op = -1; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *src_brickinfo = NULL; + char *host = NULL; + char msg[2048] = {0}; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_brickinfo_t *dst_brickinfo = NULL; + glusterd_conf_t *priv = NULL; + char pidfile[PATH_MAX] = {0}; + xlator_t *this = NULL; + gf_boolean_t is_force = _gf_false; + int32_t ignore_partition = 0; + pid_t pid = -1; + uuid_t volume_id = { + 0, + }; + char *dup_dstbrick = NULL; + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + GF_ASSERT(priv); + + ret = glusterd_brick_op_prerequisites(dict, &op, &gd_op, &volname, &volinfo, + &src_brick, &src_brickinfo, pidfile, + op_errstr, rsp_dict); + if (ret) + goto out; + + if (!strcmp(op, "GF_RESET_OP_START")) + goto done; + + if (!strcmp(op, "GF_RESET_OP_COMMIT_FORCE")) + is_force = _gf_true; + + ret = glusterd_get_dst_brick_info(&dst_brick, volname, op_errstr, + &dst_brickinfo, &host, dict, + &dup_dstbrick); + if (ret) + goto out; + + ret = glusterd_new_brick_validate(dst_brick, dst_brickinfo, msg, + sizeof(msg), op); + /* if bricks are not same and reset brick was used, fail command. + * Only replace brick should be used to replace with new bricks + * to the volume. + */ + if (ret == 0) { + if (!gf_uuid_compare(MY_UUID, dst_brickinfo->uuid)) { + ret = -1; + *op_errstr = gf_strdup( + "When destination brick is new," + " please use" + " gluster volume " + "replace-brick <volname> " + "<src-brick> <dst-brick> " + "commit force"); + if (*op_errstr) + gf_msg(this->name, GF_LOG_ERROR, EPERM, + GD_MSG_BRICK_VALIDATE_FAIL, "%s", *op_errstr); + goto out; + } + } else if (ret == 1) { + if (gf_is_service_running(pidfile, &pid)) { + ret = -1; + *op_errstr = gf_strdup( + "Source brick" + " must be stopped." + " Please use " + "gluster volume " + "reset-brick <volname> " + "<dst-brick> start."); + if (*op_errstr) + gf_msg(this->name, GF_LOG_ERROR, EPERM, + GD_MSG_BRICK_VALIDATE_FAIL, "%s", *op_errstr); + goto out; + } + ret = sys_lgetxattr(dst_brickinfo->path, GF_XATTR_VOL_ID_KEY, volume_id, + 16); + if (gf_uuid_compare(dst_brickinfo->uuid, src_brickinfo->uuid) || + (ret >= 0 && is_force == _gf_false)) { + ret = -1; + *op_errstr = gf_strdup( + "Brick not available." + "It may be containing " + "or be contained " + "by an existing brick." + "Use 'force' option to " + "override this."); + if (*op_errstr) + gf_msg(this->name, GF_LOG_ERROR, EPERM, + GD_MSG_BRICK_VALIDATE_FAIL, "%s", *op_errstr); + goto out; + } + ret = 0; + } else { + *op_errstr = gf_strdup(msg); + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_VALIDATE_FAIL, "%s", + *op_errstr); + goto out; + } + + volinfo->rep_brick.src_brick = src_brickinfo; + volinfo->rep_brick.dst_brick = dst_brickinfo; + + ret = dict_get_int32n(dict, "ignore-partition", SLEN("ignore-partition"), + &ignore_partition); + ret = 0; + if (gf_is_local_addr(host)) { + ret = glusterd_validate_and_create_brickpath( + dst_brickinfo, volinfo->volume_id, volinfo->volname, op_errstr, + is_force, ignore_partition); + if (ret) + goto out; + } else { + RCU_READ_LOCK; + + peerinfo = glusterd_peerinfo_find(NULL, host); + if (peerinfo == NULL) { + RCU_READ_UNLOCK; + ret = -1; + snprintf(msg, sizeof(msg), "%s, is not a friend.", host); + *op_errstr = gf_strdup(msg); + goto out; + + } else if (!peerinfo->connected) { + RCU_READ_UNLOCK; + ret = -1; + snprintf(msg, sizeof(msg), + "%s," + "is not connected at " + "the moment.", + host); + *op_errstr = gf_strdup(msg); + goto out; + + } else if (GD_FRIEND_STATE_BEFRIENDED != peerinfo->state.state) { + RCU_READ_UNLOCK; + ret = -1; + snprintf(msg, sizeof(msg), + "%s, is not befriended " + "at the moment.", + host); + *op_errstr = gf_strdup(msg); + goto out; + } + RCU_READ_UNLOCK; + } + + if (!(gf_uuid_compare(dst_brickinfo->uuid, MY_UUID))) { + ret = glusterd_get_brick_mount_dir(dst_brickinfo->path, + dst_brickinfo->hostname, + dst_brickinfo->mount_dir); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_MOUNTDIR_GET_FAIL, + "Failed to get brick mount_dir"); + goto out; + } + ret = dict_set_dynstr_with_alloc(rsp_dict, "brick1.mount_dir", + dst_brickinfo->mount_dir); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set brick.mount_dir"); + goto out; + } + } + + ret = dict_set_int32n(rsp_dict, "brick_count", SLEN("brick_count"), 1); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set local_brick_count."); + goto out; + } + +done: + ret = 0; +out: + GF_FREE(dup_dstbrick); + gf_msg_debug(this->name, 0, "Returning %d.", ret); + + return ret; +} + +int +glusterd_op_reset_brick(dict_t *dict, dict_t *rsp_dict) +{ + int ret = 0; + char *op = NULL; + glusterd_volinfo_t *volinfo = NULL; + char *volname = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + char *src_brick = NULL; + char *dst_brick = NULL; + glusterd_brickinfo_t *src_brickinfo = NULL; + glusterd_brickinfo_t *dst_brickinfo = NULL; + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + GF_ASSERT(priv); + + ret = dict_get_strn(dict, "operation", SLEN("operation"), &op); + if (ret) { + gf_msg_debug(this->name, 0, "dict_get on operation failed"); + goto out; + } + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get volume name"); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) + goto out; + + ret = dict_get_strn(dict, "src-brick", SLEN("src-brick"), &src_brick); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get src brick"); + goto out; + } + + gf_msg_debug(this->name, 0, "src brick=%s", src_brick); + + ret = glusterd_volume_brickinfo_get_by_brick(src_brick, volinfo, + &src_brickinfo, _gf_false); + if (ret) { + gf_msg_debug(this->name, 0, "Unable to get src-brickinfo"); + goto out; + } + + if (!strcmp(op, "GF_RESET_OP_START")) { + ret = glusterd_volume_stop_glusterfs(volinfo, src_brickinfo, _gf_false); + if (ret) { + gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_BRICK_STOP_FAIL, + "Unable to stop" + " brick: %s:%s", + src_brickinfo->hostname, src_brickinfo->path); + } + + goto out; + + } else if (!strcmp(op, "GF_RESET_OP_COMMIT") || + !strcmp(op, "GF_RESET_OP_COMMIT_FORCE")) { + ret = dict_get_strn(dict, "dst-brick", SLEN("dst-brick"), &dst_brick); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get dst brick"); + goto out; + } + + gf_msg_debug(this->name, 0, "dst brick=%s", dst_brick); + + ret = glusterd_get_rb_dst_brickinfo(volinfo, &dst_brickinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RB_BRICKINFO_GET_FAIL, + "Unable to get " + "reset brick " + "destination brickinfo"); + goto out; + } + + ret = glusterd_resolve_brick(dst_brickinfo); + if (ret) { + gf_msg_debug(this->name, 0, "Unable to resolve dst-brickinfo"); + goto out; + } + + ret = rb_update_dstbrick_port(dst_brickinfo, rsp_dict, dict); + if (ret) + goto out; + + if (gf_uuid_compare(dst_brickinfo->uuid, MY_UUID)) { + gf_msg_debug(this->name, 0, "I AM THE DESTINATION HOST"); + ret = glusterd_volume_stop_glusterfs(volinfo, src_brickinfo, + _gf_false); + if (ret) { + gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_BRICK_STOP_FAIL, + "Unable to stop brick: %s:%s", src_brickinfo->hostname, + src_brickinfo->path); + goto out; + } + } + + ret = glusterd_svcs_stop(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_GLUSTER_SERVICES_STOP_FAIL, + "Unable to stop gluster services, ret: %d", ret); + goto out; + } + ret = glusterd_op_perform_replace_brick(volinfo, src_brick, dst_brick, + dict); + if (ret) { + gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_BRICK_ADD_FAIL, + "Unable to add dst-brick: " + "%s to volume: %s", + dst_brick, volinfo->volname); + (void)glusterd_svcs_manager(volinfo); + goto out; + } + + volinfo->rebal.defrag_status = 0; + + ret = glusterd_svcs_manager(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_CRITICAL, 0, + GD_MSG_GLUSTER_SERVICE_START_FAIL, + "Failed to start one or more gluster services."); + } + + ret = glusterd_fetchspec_notify(THIS); + glusterd_brickinfo_delete(volinfo->rep_brick.dst_brick); + volinfo->rep_brick.src_brick = NULL; + volinfo->rep_brick.dst_brick = NULL; + + if (!ret) + ret = glusterd_store_volinfo(volinfo, + GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RBOP_STATE_STORE_FAIL, + "Couldn't store" + " reset brick operation's state."); + } + } else { + ret = -1; + goto out; + } + +out: + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c index 2b0f87fb00e..88662e3bbae 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c @@ -1,1823 +1,2448 @@ /* - Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ - + Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ #include "rpc-clnt.h" #include "glusterd1-xdr.h" -#include "glusterd1.h" -#include "cli1.h" +#include "cli1-xdr.h" + +#include "xdr-generic.h" -#include "compat-errno.h" +#include <glusterfs/compat-errno.h> #include "glusterd-op-sm.h" #include "glusterd-sm.h" #include "glusterd.h" #include "protocol-common.h" #include "glusterd-utils.h" -#include "common-utils.h" +#include <glusterfs/common-utils.h> +#include "glusterd-messages.h" +#include "glusterd-snapshot-utils.h" #include <sys/uio.h> +#define SERVER_PATH_MAX (16 * 1024) -#define SERVER_PATH_MAX (16 * 1024) - +#define GLUSTERD_STACK_DESTROY(frame) \ + do { \ + frame->local = NULL; \ + STACK_DESTROY(frame->root); \ + } while (0) extern glusterd_op_info_t opinfo; +extern uuid_t global_txn_id; int32_t -glusterd3_1_brick_op (call_frame_t *frame, xlator_t *this, - void *data); -int32_t -glusterd_op_send_cli_response (glusterd_op_t op, int32_t op_ret, - int32_t op_errno, rpcsvc_request_t *req, - void *op_ctx, char *op_errstr) +glusterd_op_send_cli_response(glusterd_op_t op, int32_t op_ret, + int32_t op_errno, rpcsvc_request_t *req, + void *op_ctx, char *op_errstr) { - int32_t ret = -1; - gd_serialize_t sfunc = NULL; - void *cli_rsp = NULL; - dict_t *ctx = NULL; - char *free_ptr = NULL; - glusterd_conf_t *conf = NULL; - - GF_ASSERT (THIS); - - conf = THIS->private; - - GF_ASSERT (conf); - - switch (op) { - case GD_OP_CREATE_VOLUME: - { - gf1_cli_create_vol_rsp rsp = {0,}; - rsp.op_ret = op_ret; - rsp.op_errno = op_errno; - rsp.volname = ""; - if (op_errstr) - rsp.op_errstr = op_errstr; - else - rsp.op_errstr = ""; - cli_rsp = &rsp; - sfunc = gf_xdr_serialize_cli_create_vol_rsp; - break; + int32_t ret = -1; + void *cli_rsp = NULL; + dict_t *ctx = NULL; + char *free_ptr = NULL; + glusterd_conf_t *conf = NULL; + xdrproc_t xdrproc = NULL; + char *errstr = NULL; + int32_t status = 0; + int32_t count = 0; + gf_cli_rsp rsp = { + 0, + }; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + conf = this->private; + + GF_ASSERT(conf); + + ctx = op_ctx; + + switch (op) { + case GD_OP_REMOVE_BRICK: { + if (ctx) + ret = dict_get_strn(ctx, "errstr", SLEN("errstr"), &errstr); + break; + } + case GD_OP_RESET_VOLUME: { + if (op_ret && !op_errstr) + errstr = "Error while resetting options"; + break; + } + case GD_OP_REBALANCE: + case GD_OP_DEFRAG_BRICK_VOLUME: { + if (ctx) { + ret = dict_get_int32n(ctx, "status", SLEN("status"), &status); + if (ret) { + gf_msg_trace(this->name, 0, "failed to get status"); + } + } + break; + } + case GD_OP_GSYNC_CREATE: + case GD_OP_GSYNC_SET: { + if (ctx) { + ret = dict_get_strn(ctx, "errstr", SLEN("errstr"), &errstr); + ret = dict_set_strn(ctx, "glusterd_workdir", + SLEN("glusterd_workdir"), conf->workdir); + /* swallow error here, that will be re-triggered in cli */ + } + break; + } + case GD_OP_PROFILE_VOLUME: { + if (ctx && dict_get_int32n(ctx, "count", SLEN("count"), &count)) { + ret = dict_set_int32n(ctx, "count", SLEN("count"), 0); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "failed to set count in dictionary"); + } + } + break; } - - case GD_OP_START_VOLUME: - { - gf1_cli_start_vol_rsp rsp = {0,}; - rsp.op_ret = op_ret; - rsp.op_errno = op_errno; - rsp.volname = ""; - if (op_errstr) - rsp.op_errstr = op_errstr; - else - rsp.op_errstr = ""; - cli_rsp = &rsp; - sfunc = gf_xdr_serialize_cli_start_vol_rsp; - break; + case GD_OP_START_BRICK: + case GD_OP_STOP_BRICK: { + gf_msg_debug(this->name, 0, "op '%s' not supported", + gd_op_list[op]); + break; } - - case GD_OP_STOP_VOLUME: - { - gf1_cli_stop_vol_rsp rsp = {0,}; - rsp.op_ret = op_ret; - rsp.op_errno = op_errno; - rsp.volname = ""; - if (op_errstr) - rsp.op_errstr = op_errstr; - else - rsp.op_errstr = ""; - cli_rsp = &rsp; - sfunc = gf_xdr_serialize_cli_stop_vol_rsp; - break; + case GD_OP_NONE: + case GD_OP_MAX: { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_OP_UNSUPPORTED, + "invalid operation"); + break; } - + case GD_OP_CREATE_VOLUME: + case GD_OP_START_VOLUME: + case GD_OP_STOP_VOLUME: case GD_OP_DELETE_VOLUME: - { - gf1_cli_delete_vol_rsp rsp = {0,}; - rsp.op_ret = op_ret; - rsp.op_errno = op_errno; - rsp.volname = ""; - if (op_errstr) - rsp.op_errstr = op_errstr; - else - rsp.op_errstr = ""; - cli_rsp = &rsp; - sfunc = gf_xdr_serialize_cli_delete_vol_rsp; - break; - } - case GD_OP_DEFRAG_VOLUME: - { - gf1_cli_defrag_vol_rsp rsp = {0,}; - rsp.op_ret = op_ret; - rsp.op_errno = op_errno; - //rsp.volname = ""; - cli_rsp = &rsp; - sfunc = gf_xdr_serialize_cli_defrag_vol_rsp; - break; - } - case GD_OP_ADD_BRICK: - { - gf1_cli_add_brick_rsp rsp = {0,}; - rsp.op_ret = op_ret; - rsp.op_errno = op_errno; - rsp.volname = ""; - if (op_errstr) - rsp.op_errstr = op_errstr; - else - rsp.op_errstr = ""; - cli_rsp = &rsp; - sfunc = gf_xdr_serialize_cli_add_brick_rsp; - break; - } - - case GD_OP_REMOVE_BRICK: - { - gf1_cli_remove_brick_rsp rsp = {0,}; - ctx = op_ctx; - if (ctx && - dict_get_str (ctx, "errstr", &rsp.op_errstr)) - rsp.op_errstr = ""; - rsp.op_ret = op_ret; - rsp.op_errno = op_errno; - rsp.volname = ""; - cli_rsp = &rsp; - sfunc = gf_xdr_serialize_cli_remove_brick_rsp; - break; - } - - case GD_OP_REPLACE_BRICK: - { - gf1_cli_replace_brick_rsp rsp = {0,}; - ctx = op_ctx; - if (ctx && - dict_get_str (ctx, "status-reply", &rsp.status)) - rsp.status = ""; - rsp.op_ret = op_ret; - rsp.op_errno = op_errno; - if (op_errstr) - rsp.op_errstr = op_errstr; - else - rsp.op_errstr = ""; - rsp.volname = ""; - cli_rsp = &rsp; - sfunc = gf_xdr_serialize_cli_replace_brick_rsp; - break; - } - - case GD_OP_SET_VOLUME: - { - gf1_cli_set_vol_rsp rsp = {0,}; - rsp.op_ret = op_ret; - rsp.op_errno = op_errno; - rsp.volname = ""; - if (op_errstr) - rsp.op_errstr = op_errstr; - else - rsp.op_errstr = ""; - cli_rsp = &rsp; - sfunc = gf_xdr_serialize_cli_set_vol_rsp; - break; - } - - case GD_OP_RESET_VOLUME: - { - gf_log ("", GF_LOG_DEBUG, "Return value to CLI"); - gf1_cli_reset_vol_rsp rsp = {0,}; - rsp.op_ret = op_ret; - rsp.op_errno = 1; - rsp.volname = ""; - if (op_errstr) - rsp.op_errstr = op_errstr; - else - rsp.op_errstr = "Error while resetting options"; - cli_rsp = &rsp; - sfunc = gf_xdr_serialize_cli_reset_vol_rsp; - break; - } - - case GD_OP_LOG_FILENAME: - { - gf1_cli_log_filename_rsp rsp = {0,}; - rsp.op_ret = op_ret; - rsp.op_errno = op_errno; - if (op_errstr) - rsp.errstr = op_errstr; - else - rsp.errstr = ""; - cli_rsp = &rsp; - sfunc = gf_xdr_serialize_cli_log_filename_rsp; - break; - } case GD_OP_LOG_ROTATE: - { - gf1_cli_log_rotate_rsp rsp = {0,}; - rsp.op_ret = op_ret; - rsp.op_errno = op_errno; - if (op_errstr) - rsp.errstr = op_errstr; - else - rsp.errstr = ""; - cli_rsp = &rsp; - sfunc = gf_xdr_serialize_cli_log_rotate_rsp; - break; - } case GD_OP_SYNC_VOLUME: - { - gf1_cli_sync_volume_rsp rsp = {0,}; - rsp.op_ret = op_ret; - rsp.op_errno = op_errno; - if (op_errstr) - rsp.op_errstr = op_errstr; - else - rsp.op_errstr = ""; - cli_rsp = &rsp; - sfunc = gf_xdr_from_cli_sync_volume_rsp; - break; - } - case GD_OP_GSYNC_SET: - { - int type = 0; - int config_type = 0; - char *str = NULL; - char *master = NULL; - char *slave = NULL; - char *op_name = NULL; - gf1_cli_gsync_set_rsp rsp = {0,}; - ctx = op_ctx; - rsp.op_ret = op_ret; - rsp.op_errno = op_errno; - rsp.op_errstr = ""; - rsp.op_name = ""; - rsp.master = ""; - rsp.slave = ""; - rsp.glusterd_workdir = gf_strdup (conf->workdir); - rsp.gsync_prefix = gf_strdup (GSYNCD_PREFIX); - if (ctx) { - ret = dict_get_str (ctx, "errstr", - &str); - if (ret == 0) - rsp.op_errstr = gf_strdup (str); - ret = dict_get_int32 (ctx, "type", - &type); - if (ret == 0) - rsp.type = type; - ret = dict_get_int32 (ctx, "config_type", - &config_type); - if (ret == 0) - rsp.config_type = config_type; - ret = dict_get_str (ctx, "master", - &master); - if (ret == 0) - rsp.master = gf_strdup (master); - - ret = dict_get_str (ctx, "slave", - &slave); - if (ret == 0) - rsp.slave = gf_strdup (slave); - - if (config_type == - GF_GSYNC_OPTION_TYPE_CONFIG_GET) { - ret = dict_get_str (ctx, "op_name", - &op_name); - if (ret == 0) - rsp.op_name = - gf_strdup (op_name); - } - } else if (op_errstr) - rsp.op_errstr = op_errstr; - cli_rsp = &rsp; - sfunc = gf_xdr_serialize_cli_gsync_set_rsp; - break; - } - case GD_OP_RENAME_VOLUME: - case GD_OP_START_BRICK: - case GD_OP_STOP_BRICK: - case GD_OP_LOG_LOCATE: - { - gf_log ("", GF_LOG_DEBUG, "not supported op %d", op); - break; - } - case GD_OP_PROFILE_VOLUME: - { - gf1_cli_stats_volume_rsp rsp = {0,}; - rsp.op_ret = op_ret; - rsp.op_errno = op_errno; - if (op_errstr) - rsp.op_errstr = op_errstr; - else - rsp.op_errstr = ""; - ctx = op_ctx; - dict_allocate_and_serialize (ctx, - &rsp.stats_info.stats_info_val, - (size_t*)&rsp.stats_info.stats_info_len); - free_ptr = rsp.stats_info.stats_info_val; - cli_rsp = &rsp; - sfunc = gf_xdr_from_cli_stats_volume_rsp; - break; - } - + case GD_OP_STATEDUMP_VOLUME: + case GD_OP_REPLACE_BRICK: + case GD_OP_STATUS_VOLUME: + case GD_OP_SET_VOLUME: + case GD_OP_LIST_VOLUME: + case GD_OP_CLEARLOCKS_VOLUME: + case GD_OP_HEAL_VOLUME: case GD_OP_QUOTA: - { - int32_t type; - char *str = NULL; - char *errstr = NULL; - gf1_cli_quota_rsp rsp = {0,}; - - rsp.op_ret = op_ret; - rsp.op_errno = op_errno; - rsp.volname = ""; - - ctx = op_ctx; - - if (op_errstr) - rsp.op_errstr = gf_strdup (op_errstr); - else { - ret = dict_get_str (ctx, "errstr", &errstr); - if (ret == 0) - rsp.op_errstr = gf_strdup (errstr); - else - rsp.op_errstr = ""; - } + case GD_OP_SNAP: + case GD_OP_BARRIER: + case GD_OP_BITROT: + case GD_OP_SCRUB_STATUS: + case GD_OP_SCRUB_ONDEMAND: + case GD_OP_RESET_BRICK: + case GD_OP_MAX_OPVERSION: + case GD_OP_DETACH_NOT_STARTED: + case GD_OP_GANESHA: + case GD_OP_DETACH_TIER: + case GD_OP_TIER_MIGRATE: + case GD_OP_TIER_START_STOP: + case GD_OP_TIER_STATUS: + case GD_OP_DETACH_TIER_STATUS: + case GD_OP_REMOVE_TIER_BRICK: + case GD_OP_ADD_TIER_BRICK: - rsp.limit_list = ""; + { + /*nothing specific to be done*/ + break; + } + case GD_OP_COPY_FILE: { + if (ctx) + ret = dict_get_strn(ctx, "errstr", SLEN("errstr"), &errstr); + break; + } + case GD_OP_SYS_EXEC: { + if (ctx) { + ret = dict_get_strn(ctx, "errstr", SLEN("errstr"), &errstr); + ret = dict_set_strn(ctx, "glusterd_workdir", + SLEN("glusterd_workdir"), conf->workdir); + } + break; + } + } + + rsp.op_ret = op_ret; + rsp.op_errno = op_errno; + + if (errstr) + rsp.op_errstr = errstr; + else if (op_errstr) + rsp.op_errstr = op_errstr; + + if (!rsp.op_errstr) + rsp.op_errstr = ""; + + if (ctx) { + ret = dict_allocate_and_serialize(ctx, &rsp.dict.dict_val, + &rsp.dict.dict_len); + if (ret < 0) + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); + else + free_ptr = rsp.dict.dict_val; + } - if (op_ret == 0 && ctx) { - ret = dict_get_str (ctx, "volname", &str); - if (ret == 0) - rsp.volname = gf_strdup (str); + /* needed by 'rebalance status' */ + if (status) + rsp.op_errno = status; - ret = dict_get_int32 - (ctx, "type", &type); - if (ret == 0) - rsp.type = type; - else - rsp.type = 0; + cli_rsp = &rsp; + xdrproc = (xdrproc_t)xdr_gf_cli_rsp; - if (type == GF_QUOTA_OPTION_TYPE_LIST) { - ret = dict_get_str - (ctx,"limit_list", &str); + glusterd_to_cli(req, cli_rsp, NULL, 0, NULL, xdrproc, ctx); + ret = 0; - if (ret == 0) - rsp.limit_list = - gf_strdup (str); - } - } - cli_rsp = &rsp; - sfunc = gf_xdr_serialize_cli_quota_rsp; - break; - } + GF_FREE(free_ptr); + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; +} - case GD_OP_NONE: - case GD_OP_MAX: - { - gf_log ("", GF_LOG_ERROR, "invalid operation %d", op); - break; - } - } +int +glusterd_big_locked_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe, fop_cbk_fn_t fn) +{ + glusterd_conf_t *priv = THIS->private; + int ret = -1; - ret = glusterd_submit_reply (req, cli_rsp, NULL, 0, NULL, - sfunc); + synclock_lock(&priv->big_lock); + ret = fn(req, iov, count, myframe); + synclock_unlock(&priv->big_lock); - if (free_ptr) - GF_FREE (free_ptr); - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + return ret; } int -glusterd3_1_probe_cbk (struct rpc_req *req, struct iovec *iov, - int count, void *myframe) +__glusterd_probe_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) { - gd1_mgmt_probe_rsp rsp = {{0},}; - glusterd_conf_t *conf = NULL; - int ret = 0; - glusterd_peerinfo_t *peerinfo = NULL; - glusterd_friend_sm_event_t *event = NULL; - glusterd_probe_ctx_t *ctx = NULL; - - conf = THIS->private; - - if (-1 == req->rpc_status) { - goto out; - } + gd1_mgmt_probe_rsp rsp = { + {0}, + }; + int ret = 0; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_friend_sm_event_t *event = NULL; + glusterd_probe_ctx_t *ctx = NULL; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + + if (-1 == req->rpc_status) { + goto out; + } + + this = THIS; + GF_ASSERT(this != NULL); + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, (conf != NULL), out); + + ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_probe_rsp); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RES_DECODE_FAIL, "error"); + // rsp.op_ret = -1; + // rsp.op_errno = EINVAL; + goto out; + } + + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_PROBE_REQ_RESP_RCVD, + "Received probe resp from uuid: %s, host: %s", uuid_utoa(rsp.uuid), + rsp.hostname); + if (rsp.op_ret != 0) { + ctx = ((call_frame_t *)myframe)->local; + ((call_frame_t *)myframe)->local = NULL; - ret = gd_xdr_to_mgmt_probe_rsp (*iov, &rsp); - if (ret < 0) { - gf_log ("", GF_LOG_ERROR, "error"); - //rsp.op_ret = -1; - //rsp.op_errno = EINVAL; - goto out; + GF_ASSERT(ctx); + + if (ctx->req) { + glusterd_xfer_cli_probe_resp(ctx->req, rsp.op_ret, rsp.op_errno, + rsp.op_errstr, ctx->hostname, + ctx->port, ctx->dict); + } + + glusterd_destroy_probe_ctx(ctx); + (void)glusterd_friend_remove(rsp.uuid, rsp.hostname); + ret = rsp.op_ret; + goto out; + } + + RCU_READ_LOCK; + peerinfo = glusterd_peerinfo_find(rsp.uuid, rsp.hostname); + if (peerinfo == NULL) { + RCU_READ_UNLOCK + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PEER_NOT_FOUND, + "Could not find peerd %s(%s)", rsp.hostname, + uuid_utoa(rsp.uuid)); + goto out; + } + + /* + * In the case of a fresh probe rsp.uuid and peerinfo.uuid will not + * match, as peerinfo->uuid will be NULL. + * + * In the case of a peer probe being done to add a new network to a + * peer, rsp.uuid will match an existing peerinfo.uuid. If we have this + * stage it means that the current address/hostname being used isn't + * present in the found peerinfo. If it were, we would have found out + * earlier in the probe process and wouldn't even reach till here. So, + * we need to add the new hostname to the peer. + * + * This addition should only be done for cluster op-version >= + * GD_OP_VERSION_3_6_0 as address lists are only supported from then on. + * Also, this update should only be done when an explicit CLI probe + * command was used to begin the probe process. + */ + if ((conf->op_version >= GD_OP_VERSION_3_6_0) && + (gf_uuid_compare(rsp.uuid, peerinfo->uuid) == 0)) { + ctx = ((call_frame_t *)myframe)->local; + /* Presence of ctx->req implies this probe was started by a cli + * probe command + */ + if (ctx->req == NULL) + goto cont; + + gf_msg_debug(this->name, 0, + "Adding address '%s' to " + "existing peer %s", + rsp.hostname, uuid_utoa(rsp.uuid)); + + ret = glusterd_friend_remove(NULL, rsp.hostname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_STALE_PEERINFO_REMOVE_FAIL, + "Could not remove " + "stale peerinfo with name %s", + rsp.hostname); + goto reply; } - gf_log ("glusterd", GF_LOG_INFO, - "Received probe resp from uuid: %s, host: %s", - uuid_utoa (rsp.uuid), rsp.hostname); - if (rsp.op_ret != 0) { - ctx = ((call_frame_t *)myframe)->local; - ((call_frame_t *)myframe)->local = NULL; - - GF_ASSERT (ctx); - - if (ctx->req) { - glusterd_xfer_cli_probe_resp (ctx->req, rsp.op_ret, - rsp.op_errno, - ctx->hostname, ctx->port); - } - - glusterd_destroy_probe_ctx (ctx); - (void) glusterd_friend_remove (rsp.uuid, rsp.hostname); - ret = rsp.op_ret; - goto out; - } - ret = glusterd_friend_find (rsp.uuid, rsp.hostname, &peerinfo); + ret = gd_add_address_to_peer(peerinfo, rsp.hostname); if (ret) { - GF_ASSERT (0); + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_HOSTNAME_ADD_TO_PEERLIST_FAIL, + "Couldn't add hostname to peer list"); + goto reply; } - uuid_copy (peerinfo->uuid, rsp.uuid); - - ret = glusterd_friend_sm_new_event - (GD_FRIEND_EVENT_INIT_FRIEND_REQ, &event); + /* Injecting EVENT_NEW_NAME to send update */ + ret = glusterd_friend_sm_new_event(GD_FRIEND_EVENT_NEW_NAME, &event); + if (!ret) { + event->peername = gf_strdup(peerinfo->hostname); + gf_uuid_copy(event->peerid, peerinfo->uuid); - if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, - "Unable to get event"); - goto out; + ret = glusterd_friend_sm_inject_event(event); } + rsp.op_errno = GF_PROBE_FRIEND; - event->peerinfo = peerinfo; - event->ctx = ((call_frame_t *)myframe)->local; + reply: + ctx = ((call_frame_t *)myframe)->local; ((call_frame_t *)myframe)->local = NULL; - ret = glusterd_friend_sm_inject_event (event); + if (!ctx) { + ret = -1; + goto unlock; + } - if (!ret) { - glusterd_friend_sm (); - glusterd_op_sm (); + if (ctx->req) { + glusterd_xfer_cli_probe_resp(ctx->req, ret, rsp.op_errno, + rsp.op_errstr, ctx->hostname, + ctx->port, ctx->dict); } - gf_log ("glusterd", GF_LOG_INFO, "Received resp to probe req"); + glusterd_destroy_probe_ctx(ctx); -out: - if (rsp.hostname) - free (rsp.hostname);//malloced by xdr - GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe)); - return ret; -} + goto unlock; -int -glusterd3_1_friend_add_cbk (struct rpc_req * req, struct iovec *iov, - int count, void *myframe) -{ - gd1_mgmt_friend_rsp rsp = {{0},}; - glusterd_conf_t *conf = NULL; - int ret = -1; - glusterd_friend_sm_event_t *event = NULL; - glusterd_friend_sm_event_type_t event_type = GD_FRIEND_EVENT_NONE; - glusterd_peerinfo_t *peerinfo = NULL; - int32_t op_ret = -1; - int32_t op_errno = -1; - glusterd_probe_ctx_t *ctx = NULL; - glusterd_friend_update_ctx_t *ev_ctx = NULL; - - conf = THIS->private; - - if (-1 == req->rpc_status) { - rsp.op_ret = -1; - rsp.op_errno = EINVAL; - goto out; + } else if (strncasecmp(rsp.hostname, peerinfo->hostname, 1024)) { + gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_HOST_PRESENT_ALREADY, + "Host: %s with uuid: %s " + "already present in cluster with alias hostname: %s", + rsp.hostname, uuid_utoa(rsp.uuid), peerinfo->hostname); + + ctx = ((call_frame_t *)myframe)->local; + ((call_frame_t *)myframe)->local = NULL; + + if (!ctx) { + ret = -1; + goto unlock; } - ret = gd_xdr_to_mgmt_friend_rsp (*iov, &rsp); - if (ret < 0) { - gf_log ("", GF_LOG_ERROR, "error"); - rsp.op_ret = -1; - rsp.op_errno = EINVAL; - goto out; + rsp.op_errno = GF_PROBE_FRIEND; + if (ctx->req) { + glusterd_xfer_cli_probe_resp(ctx->req, rsp.op_ret, rsp.op_errno, + rsp.op_errstr, ctx->hostname, + ctx->port, ctx->dict); } - op_ret = rsp.op_ret; - op_errno = rsp.op_errno; + glusterd_destroy_probe_ctx(ctx); + (void)glusterd_friend_remove(NULL, rsp.hostname); + ret = rsp.op_ret; - gf_log ("glusterd", GF_LOG_INFO, - "Received %s from uuid: %s, host: %s, port: %d", - (op_ret)?"RJT":"ACC", uuid_utoa (rsp.uuid), rsp.hostname, rsp.port); + goto unlock; + } - ret = glusterd_friend_find (rsp.uuid, rsp.hostname, &peerinfo); +cont: + gf_uuid_copy(peerinfo->uuid, rsp.uuid); - if (ret) { - gf_log ("", GF_LOG_ERROR, "received friend add response from" - " unknown peer uuid: %s", uuid_utoa (rsp.uuid)); - goto out; - } + ret = glusterd_friend_sm_new_event(GD_FRIEND_EVENT_INIT_FRIEND_REQ, &event); - if (op_ret) - event_type = GD_FRIEND_EVENT_RCVD_RJT; - else - event_type = GD_FRIEND_EVENT_RCVD_ACC; + if (ret) { + RCU_READ_UNLOCK; + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_NEW_FRIEND_SM_EVENT_GET_FAIL, + "Unable to get event"); + goto out; + } - ret = glusterd_friend_sm_new_event (event_type, &event); + event->peername = gf_strdup(peerinfo->hostname); + gf_uuid_copy(event->peerid, peerinfo->uuid); - if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, - "Unable to get event"); - goto out; - } - event->peerinfo = peerinfo; - ev_ctx = GF_CALLOC (1, sizeof (*ev_ctx), - gf_gld_mt_friend_update_ctx_t); - if (!ev_ctx) { - ret = -1; - goto out; - } - - uuid_copy (ev_ctx->uuid, rsp.uuid); - ev_ctx->hostname = gf_strdup (rsp.hostname); + event->ctx = ((call_frame_t *)myframe)->local; + ((call_frame_t *)myframe)->local = NULL; + ret = glusterd_friend_sm_inject_event(event); - event->ctx = ev_ctx; - ret = glusterd_friend_sm_inject_event (event); + gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_PROBE_REQ_RESP_RCVD, + "Received resp to probe req"); - if (ret) - goto out; +unlock: + RCU_READ_UNLOCK; out: - ctx = ((call_frame_t *)myframe)->local; - ((call_frame_t *)myframe)->local = NULL; - - GF_ASSERT (ctx); - - if (ctx->req)//reverse probe doesnt have req - ret = glusterd_xfer_cli_probe_resp (ctx->req, op_ret, op_errno, - ctx->hostname, ctx->port); - if (!ret) { - glusterd_friend_sm (); - glusterd_op_sm (); - } - if (ctx) - glusterd_destroy_probe_ctx (ctx); - if (rsp.hostname) - free (rsp.hostname);//malloced by xdr - GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe)); - return ret; + free(rsp.hostname); // malloced by xdr + GLUSTERD_STACK_DESTROY(((call_frame_t *)myframe)); + + /* Attempt to start the state machine. Needed as no state machine could + * be running at time this RPC reply was received + */ + if (!ret) { + glusterd_friend_sm(); + glusterd_op_sm(); + } + + return ret; } int -glusterd3_1_friend_remove_cbk (struct rpc_req * req, struct iovec *iov, - int count, void *myframe) +glusterd_probe_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) { - gd1_mgmt_friend_rsp rsp = {{0},}; - glusterd_conf_t *conf = NULL; - int ret = -1; - glusterd_friend_sm_event_t *event = NULL; - glusterd_friend_sm_event_type_t event_type = GD_FRIEND_EVENT_NONE; - glusterd_peerinfo_t *peerinfo = NULL; - int32_t op_ret = -1; - int32_t op_errno = -1; - glusterd_probe_ctx_t *ctx = NULL; - - conf = THIS->private; - GF_ASSERT (conf); - - ctx = ((call_frame_t *)myframe)->local; - ((call_frame_t *)myframe)->local = NULL; - GF_ASSERT (ctx); - - if (-1 == req->rpc_status) { - rsp.op_ret = -1; - rsp.op_errno = EINVAL; - goto inject; - } + return glusterd_big_locked_cbk(req, iov, count, myframe, + __glusterd_probe_cbk); +} - ret = gd_xdr_to_mgmt_friend_rsp (*iov, &rsp); - if (ret < 0) { - gf_log ("", GF_LOG_ERROR, "error"); - rsp.op_ret = -1; - rsp.op_errno = EINVAL; - goto respond; - } +int +__glusterd_friend_add_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) +{ + gd1_mgmt_friend_rsp rsp = { + {0}, + }; + int ret = -1; + glusterd_friend_sm_event_t *event = NULL; + glusterd_friend_sm_event_type_t event_type = GD_FRIEND_EVENT_NONE; + glusterd_peerinfo_t *peerinfo = NULL; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + glusterd_probe_ctx_t *ctx = NULL; + glusterd_friend_update_ctx_t *ev_ctx = NULL; + + if (-1 == req->rpc_status) { + rsp.op_ret = -1; + rsp.op_errno = EINVAL; + goto out; + } + + ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_friend_rsp); + if (ret < 0) { + gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_RES_DECODE_FAIL, + "error"); + rsp.op_ret = -1; + rsp.op_errno = EINVAL; + goto out; + } + + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + + gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_RESPONSE_INFO, + "Received %s from uuid: %s, host: %s, port: %d", + (op_ret) ? "RJT" : "ACC", uuid_utoa(rsp.uuid), rsp.hostname, + rsp.port); + + RCU_READ_LOCK; + + peerinfo = glusterd_peerinfo_find(rsp.uuid, rsp.hostname); + if (peerinfo == NULL) { + RCU_READ_UNLOCK + ret = -1; + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_RESP_FROM_UNKNOWN_PEER, + "received friend add response from" + " unknown peer uuid: %s", + uuid_utoa(rsp.uuid)); + goto out; + } + + if (op_ret) + event_type = GD_FRIEND_EVENT_RCVD_RJT; + else + event_type = GD_FRIEND_EVENT_RCVD_ACC; + + ret = glusterd_friend_sm_new_event(event_type, &event); + + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_EVENT_NEW_GET_FAIL, + "Unable to get event"); + goto unlock; + } + + ev_ctx = GF_CALLOC(1, sizeof(*ev_ctx), gf_gld_mt_friend_update_ctx_t); + if (!ev_ctx) { + ret = -1; + goto unlock; + } + + gf_uuid_copy(ev_ctx->uuid, rsp.uuid); + ev_ctx->hostname = gf_strdup(rsp.hostname); + + event->peername = gf_strdup(peerinfo->hostname); + gf_uuid_copy(event->peerid, peerinfo->uuid); + event->ctx = ev_ctx; + ret = glusterd_friend_sm_inject_event(event); + +unlock: + RCU_READ_UNLOCK; +out: + ctx = ((call_frame_t *)myframe)->local; + ((call_frame_t *)myframe)->local = NULL; + + if (ctx && ctx->req) { + /*reverse probe doesn't have req*/ + ret = glusterd_xfer_cli_probe_resp(ctx->req, op_ret, op_errno, NULL, + ctx->hostname, ctx->port, ctx->dict); + } + if (!ret) { + glusterd_friend_sm(); + glusterd_op_sm(); + } + + if (ctx) + glusterd_destroy_probe_ctx(ctx); + free(rsp.hostname); // malloced by xdr + GLUSTERD_STACK_DESTROY(((call_frame_t *)myframe)); + return ret; +} - op_ret = rsp.op_ret; - op_errno = rsp.op_errno; +int +glusterd_friend_add_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) +{ + return glusterd_big_locked_cbk(req, iov, count, myframe, + __glusterd_friend_add_cbk); +} - gf_log ("glusterd", GF_LOG_INFO, - "Received %s from uuid: %s, host: %s, port: %d", - (op_ret)?"RJT":"ACC", uuid_utoa (rsp.uuid), rsp.hostname, rsp.port); +int +__glusterd_friend_remove_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) +{ + gd1_mgmt_friend_rsp rsp = { + {0}, + }; + glusterd_conf_t *conf = NULL; + int ret = -1; + glusterd_friend_sm_event_t *event = NULL; + glusterd_friend_sm_event_type_t event_type = GD_FRIEND_EVENT_NONE; + glusterd_peerinfo_t *peerinfo = NULL; + int32_t op_ret = -1; + int32_t op_errno = 0; + glusterd_probe_ctx_t *ctx = NULL; + gf_boolean_t move_sm_now = _gf_true; + + conf = THIS->private; + GF_ASSERT(conf); + + ctx = ((call_frame_t *)myframe)->local; + ((call_frame_t *)myframe)->local = NULL; + if (!ctx) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_EVENT_NEW_GET_FAIL, + "Unable to get glusterd probe context"); + goto out; + } + if (-1 == req->rpc_status) { + rsp.op_ret = -1; + rsp.op_errno = EINVAL; + move_sm_now = _gf_false; + goto inject; + } + + ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_friend_rsp); + if (ret < 0) { + gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_RES_DECODE_FAIL, + "error"); + rsp.op_ret = -1; + rsp.op_errno = EINVAL; + goto respond; + } + + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + + gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_RESPONSE_INFO, + "Received %s from uuid: %s, host: %s, port: %d", + (op_ret) ? "RJT" : "ACC", uuid_utoa(rsp.uuid), rsp.hostname, + rsp.port); inject: - ret = glusterd_friend_find (rsp.uuid, ctx->hostname, &peerinfo); + RCU_READ_LOCK; - if (ret) { - //can happen as part of rpc clnt connection cleanup - //when the frame timeout happens after 30 minutes - goto respond; - } + peerinfo = glusterd_peerinfo_find(rsp.uuid, ctx->hostname); + if (peerinfo == NULL) { + // can happen as part of rpc clnt connection cleanup + // when the frame timeout happens after 30 minutes + goto unlock; + } - event_type = GD_FRIEND_EVENT_REMOVE_FRIEND; + event_type = GD_FRIEND_EVENT_REMOVE_FRIEND; - ret = glusterd_friend_sm_new_event (event_type, &event); + ret = glusterd_friend_sm_new_event(event_type, &event); - if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, - "Unable to get event"); - goto respond; - } - event->peerinfo = peerinfo; - - ret = glusterd_friend_sm_inject_event (event); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_EVENT_NEW_GET_FAIL, + "Unable to get event"); + goto unlock; + } + event->peername = gf_strdup(peerinfo->hostname); + gf_uuid_copy(event->peerid, peerinfo->uuid); - if (ret) - goto respond; + ret = glusterd_friend_sm_inject_event(event); - glusterd_friend_sm (); - glusterd_op_sm (); + if (ret) + goto unlock; - op_ret = 0; + /*friend_sm would be moved on CLNT_DISCONNECT, consequently + cleaning up peerinfo. Else, we run the risk of triggering + a clnt_destroy within saved_frames_unwind. + */ + op_ret = 0; +unlock: + RCU_READ_UNLOCK; respond: - ret = glusterd_xfer_cli_deprobe_resp (ctx->req, op_ret, op_errno, - ctx->hostname); - if (!ret) { - glusterd_friend_sm (); - glusterd_op_sm (); - } - - if (ctx) { - glusterd_broadcast_friend_delete (ctx->hostname, NULL); - glusterd_destroy_probe_ctx (ctx); - } + ret = glusterd_xfer_cli_deprobe_resp(ctx->req, op_ret, op_errno, NULL, + ctx->hostname, ctx->dict); + if (!ret && move_sm_now) { + glusterd_friend_sm(); + glusterd_op_sm(); + } + + glusterd_broadcast_friend_delete(ctx->hostname, NULL); + glusterd_destroy_probe_ctx(ctx); +out: + free(rsp.hostname); // malloced by xdr + GLUSTERD_STACK_DESTROY(((call_frame_t *)myframe)); + return ret; +} - if (rsp.hostname) - free (rsp.hostname);//malloced by xdr - GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe)); - return ret; +int +glusterd_friend_remove_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) +{ + return glusterd_big_locked_cbk(req, iov, count, myframe, + __glusterd_friend_remove_cbk); } int32_t -glusterd3_1_friend_update_cbk (struct rpc_req *req, struct iovec *iov, - int count, void *myframe) +__glusterd_friend_update_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) { - gd1_mgmt_cluster_lock_rsp rsp = {{0},}; - int ret = -1; - int32_t op_ret = 0; - char str[50] = {0,}; - - GF_ASSERT (req); - - if (-1 == req->rpc_status) { - rsp.op_ret = -1; - rsp.op_errno = EINVAL; - goto out; - } - -/* ret = gd_xdr_to_mgmt_friend_update_rsp (*iov, &rsp); - if (ret < 0) { - gf_log ("", GF_LOG_ERROR, "error"); - rsp.op_ret = -1; - rsp.op_errno = EINVAL; - goto out; - } - uuid_unparse (rsp.uuid, str); + int ret = -1; + gd1_mgmt_friend_update_rsp rsp = { + {0}, + }; + xlator_t *this = NULL; + + GF_ASSERT(req); + this = THIS; + + if (-1 == req->rpc_status) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RPC_FAILURE, "RPC Error"); + goto out; + } + + ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_friend_update_rsp); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RES_DECODE_FAIL, + "Failed to serialize friend" + " update response"); + goto out; + } + + ret = 0; +out: + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_RESPONSE_INFO, + "Received %s from uuid: %s", (ret) ? "RJT" : "ACC", + uuid_utoa(rsp.uuid)); - op_ret = rsp.op_ret; -*/ - gf_log ("glusterd", GF_LOG_INFO, - "Received %s from uuid: %s", - (op_ret)?"RJT":"ACC", str); + GLUSTERD_STACK_DESTROY(((call_frame_t *)myframe)); + return ret; +} -out: - GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe)); - return ret; +int +glusterd_friend_update_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) +{ + return glusterd_big_locked_cbk(req, iov, count, myframe, + __glusterd_friend_update_cbk); } int32_t -glusterd3_1_cluster_lock_cbk (struct rpc_req *req, struct iovec *iov, - int count, void *myframe) +__glusterd_cluster_lock_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) { - gd1_mgmt_cluster_lock_rsp rsp = {{0},}; - int ret = -1; - int32_t op_ret = -1; - glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; - glusterd_peerinfo_t *peerinfo = NULL; - - GF_ASSERT (req); - - if (-1 == req->rpc_status) { - rsp.op_ret = -1; - rsp.op_errno = EINVAL; - goto out; - } + gd1_mgmt_cluster_lock_rsp rsp = { + {0}, + }; + int ret = -1; + int32_t op_ret = -1; + glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; + xlator_t *this = NULL; + uuid_t *txn_id = NULL; + glusterd_conf_t *priv = NULL; + char *err_str = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + GF_ASSERT(req); + + txn_id = &priv->global_txn_id; + + if (-1 == req->rpc_status) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_LOCK_RESP_FROM_PEER, + "Lock response is not " + "received from one of the peer"); + err_str = "Lock response is not received from one of the peer"; + glusterd_set_opinfo(err_str, ENETRESET, -1); + event_type = GD_OP_EVENT_RCVD_RJT; + goto out; + } + + ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_cluster_lock_rsp); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RES_DECODE_FAIL, + "Failed to decode " + "cluster lock response received from peer"); + err_str = + "Failed to decode cluster lock response received from" + " peer"; + glusterd_set_opinfo(err_str, EINVAL, -1); + event_type = GD_OP_EVENT_RCVD_RJT; + goto out; + } + + op_ret = rsp.op_ret; + + if (op_ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_LOCK_FROM_UUID_REJCT, + "Received lock RJT from uuid: %s", uuid_utoa(rsp.uuid)); + } else { + gf_msg_debug(this->name, 0, "Received lock ACC from uuid: %s", + uuid_utoa(rsp.uuid)); + } + + RCU_READ_LOCK; + ret = (glusterd_peerinfo_find(rsp.uuid, NULL) == NULL); + RCU_READ_UNLOCK; + + if (ret) { + gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_RESP_FROM_UNKNOWN_PEER, + "cluster lock response received from unknown peer: %s." + "Ignoring response", + uuid_utoa(rsp.uuid)); + err_str = "cluster lock response received from unknown peer"; + goto out; + } + + if (op_ret) { + event_type = GD_OP_EVENT_RCVD_RJT; + opinfo.op_ret = op_ret; + opinfo.op_errstr = gf_strdup( + "Another transaction could be in " + "progress. Please try again after" + " some time."); + } else { + event_type = GD_OP_EVENT_RCVD_ACC; + } - ret = gd_xdr_to_mgmt_cluster_lock_rsp (*iov, &rsp); - if (ret < 0) { - gf_log ("", GF_LOG_ERROR, "error"); - rsp.op_ret = -1; - rsp.op_errno = EINVAL; - goto out; - } - - op_ret = rsp.op_ret; - - gf_log ("glusterd", GF_LOG_INFO, - "Received %s from uuid: %s", - (op_ret)?"RJT":"ACC", uuid_utoa (rsp.uuid)); +out: - ret = glusterd_friend_find (rsp.uuid, NULL, &peerinfo); + ret = glusterd_set_txn_opinfo(txn_id, &opinfo); + if (ret) + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL, + "Unable to set " + "transaction's opinfo"); - if (ret) { - gf_log ("", GF_LOG_CRITICAL, "Lock response received from " - "unknown peer: %s", uuid_utoa (rsp.uuid)); - } + ret = glusterd_op_sm_inject_event(event_type, txn_id, NULL); - if (op_ret) { - event_type = GD_OP_EVENT_RCVD_RJT; - opinfo.op_ret = op_ret; - } else { - event_type = GD_OP_EVENT_RCVD_ACC; - } + if (!ret) { + glusterd_friend_sm(); + glusterd_op_sm(); + } - ret = glusterd_op_sm_inject_event (event_type, NULL); - - if (!ret) { - glusterd_friend_sm (); - glusterd_op_sm (); - } - -out: - GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe)); - return ret; + GLUSTERD_STACK_DESTROY(((call_frame_t *)myframe)); + return ret; } int32_t -glusterd3_1_cluster_unlock_cbk (struct rpc_req *req, struct iovec *iov, - int count, void *myframe) +glusterd_cluster_lock_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) { - gd1_mgmt_cluster_lock_rsp rsp = {{0},}; - int ret = -1; - int32_t op_ret = -1; - glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; - glusterd_peerinfo_t *peerinfo = NULL; - - - GF_ASSERT (req); - - if (-1 == req->rpc_status) { - rsp.op_ret = -1; - rsp.op_errno = EINVAL; - goto out; - } - - ret = gd_xdr_to_mgmt_cluster_unlock_rsp (*iov, &rsp); - if (ret < 0) { - gf_log ("", GF_LOG_ERROR, "error"); - rsp.op_ret = -1; - rsp.op_errno = EINVAL; - goto out; - } - - op_ret = rsp.op_ret; - - gf_log ("glusterd", GF_LOG_INFO, - "Received %s from uuid: %s", - (op_ret)?"RJT":"ACC", uuid_utoa (rsp.uuid)); - - ret = glusterd_friend_find (rsp.uuid, NULL, &peerinfo); - - if (ret) { - gf_log ("", GF_LOG_CRITICAL, "Unlock response received from " - "unknown peer %s", uuid_utoa (rsp.uuid)); - } - - if (op_ret) { - event_type = GD_OP_EVENT_RCVD_RJT; - opinfo.op_ret = op_ret; - } else { - event_type = GD_OP_EVENT_RCVD_ACC; - } - - ret = glusterd_op_sm_inject_event (event_type, NULL); - - if (!ret) { - glusterd_friend_sm (); - glusterd_op_sm (); - } + return glusterd_big_locked_cbk(req, iov, count, myframe, + __glusterd_cluster_lock_cbk); +} -out: - GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe)); - return ret; +void +glusterd_set_opinfo(char *errstr, int32_t op_errno, int32_t op_ret) +{ + opinfo.op_errstr = gf_strdup(errstr); + opinfo.op_errno = op_errno; + opinfo.op_ret = op_ret; } static int32_t -glusterd_rb_use_rsp_dict (dict_t *rsp_dict) +glusterd_mgmt_v3_lock_peers_cbk_fn(struct rpc_req *req, struct iovec *iov, + int count, void *myframe) { - int32_t src_port = 0; - int32_t dst_port = 0; - int ret = 0; - dict_t *ctx = NULL; - - - ctx = glusterd_op_get_ctx (GD_OP_REPLACE_BRICK); - if (!ctx) { - gf_log ("", GF_LOG_ERROR, - "Operation Context is not present"); - GF_ASSERT (0); - } - - if (rsp_dict) { - ret = dict_get_int32 (rsp_dict, "src-brick-port", &src_port); - if (ret == 0) { - gf_log ("", GF_LOG_DEBUG, - "src-brick-port=%d found", src_port); - } - - ret = dict_get_int32 (rsp_dict, "dst-brick-port", &dst_port); - if (ret == 0) { - gf_log ("", GF_LOG_DEBUG, - "dst-brick-port=%d found", dst_port); - } - - } - - if (src_port) { - ret = dict_set_int32 (ctx, "src-brick-port", - src_port); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Could not set src-brick"); - goto out; - } - } - - if (dst_port) { - ret = dict_set_int32 (ctx, "dst-brick-port", - dst_port); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Could not set dst-brick"); - goto out; - } - - } + gd1_mgmt_v3_lock_rsp rsp = { + {0}, + }; + int ret = -1; + int32_t op_ret = -1; + glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; + xlator_t *this = NULL; + call_frame_t *frame = NULL; + uuid_t *txn_id = NULL; + char *err_str = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(req); + + frame = myframe; + txn_id = frame->cookie; + frame->cookie = NULL; + + if (-1 == req->rpc_status) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_LOCK_RESP_FROM_PEER, + "Lock response is not " + "received from one of the peer"); + err_str = "Lock response is not received from one of the peer"; + glusterd_set_opinfo(err_str, ENETRESET, -1); + event_type = GD_OP_EVENT_RCVD_RJT; + goto out; + } + + ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_v3_lock_rsp); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RES_DECODE_FAIL, + "Failed to decode " + "mgmt_v3 lock response received from peer"); + err_str = + "Failed to decode mgmt_v3 lock response received from" + " peer"; + glusterd_set_opinfo(err_str, EINVAL, -1); + event_type = GD_OP_EVENT_RCVD_RJT; + goto out; + } + + op_ret = rsp.op_ret; + + txn_id = &rsp.txn_id; + + if (op_ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_LOCK_FROM_UUID_REJCT, + "Received mgmt_v3 lock RJT from uuid: %s", uuid_utoa(rsp.uuid)); + } else { + gf_msg_debug(this->name, 0, "Received mgmt_v3 lock ACC from uuid: %s", + uuid_utoa(rsp.uuid)); + } + + RCU_READ_LOCK; + ret = (glusterd_peerinfo_find(rsp.uuid, NULL) == NULL); + RCU_READ_UNLOCK; + + if (ret) { + gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_RESP_FROM_UNKNOWN_PEER, + "mgmt_v3 lock response received " + "from unknown peer: %s. Ignoring response", + uuid_utoa(rsp.uuid)); + goto out; + } + + if (op_ret) { + event_type = GD_OP_EVENT_RCVD_RJT; + opinfo.op_ret = op_ret; + opinfo.op_errstr = gf_strdup( + "Another transaction could be in " + "progress. Please try again after" + " some time."); + } else { + event_type = GD_OP_EVENT_RCVD_ACC; + } out: - return ret; + ret = glusterd_set_txn_opinfo(txn_id, &opinfo); + if (ret) + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL, + "Unable to set " + "transaction's opinfo"); + + ret = glusterd_op_sm_inject_event(event_type, txn_id, NULL); + if (!ret) { + glusterd_friend_sm(); + glusterd_op_sm(); + } + + GF_FREE(frame->cookie); + GLUSTERD_STACK_DESTROY(frame); + return ret; } int32_t -glusterd3_1_stage_op_cbk (struct rpc_req *req, struct iovec *iov, - int count, void *myframe) +glusterd_mgmt_v3_lock_peers_cbk(struct rpc_req *req, struct iovec *iov, + int count, void *myframe) { - gd1_mgmt_stage_op_rsp rsp = {{0},}; - int ret = -1; - int32_t op_ret = -1; - glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; - glusterd_peerinfo_t *peerinfo = NULL; - dict_t *dict = NULL; - char err_str[2048] = {0}; - char *peer_str = NULL; - - GF_ASSERT (req); - - if (-1 == req->rpc_status) { - rsp.op_ret = -1; - rsp.op_errno = EINVAL; - rsp.op_errstr = "error"; - goto out; - } - - ret = gd_xdr_to_mgmt_stage_op_rsp (*iov, &rsp); - if (ret < 0) { - gf_log ("", GF_LOG_ERROR, "error"); - rsp.op_ret = -1; - rsp.op_errno = EINVAL; - rsp.op_errstr = "error"; - goto out; - } - - if (rsp.dict.dict_len) { - /* Unserialize the dictionary */ - dict = dict_new (); - - ret = dict_unserialize (rsp.dict.dict_val, - rsp.dict.dict_len, - &dict); - if (ret < 0) { - gf_log ("glusterd", GF_LOG_ERROR, - "failed to " - "unserialize rsp-buffer to dictionary"); - event_type = GD_OP_EVENT_RCVD_RJT; - goto out; - } else { - dict->extra_stdfree = rsp.dict.dict_val; - } - } - - op_ret = rsp.op_ret; - - gf_log ("glusterd", GF_LOG_INFO, - "Received %s from uuid: %s", - (op_ret)?"RJT":"ACC", uuid_utoa (rsp.uuid)); + return glusterd_big_locked_cbk(req, iov, count, myframe, + glusterd_mgmt_v3_lock_peers_cbk_fn); +} - ret = glusterd_friend_find (rsp.uuid, NULL, &peerinfo); +static int32_t +glusterd_mgmt_v3_unlock_peers_cbk_fn(struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + gd1_mgmt_v3_unlock_rsp rsp = { + {0}, + }; + int ret = -1; + int32_t op_ret = -1; + glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; + xlator_t *this = NULL; + call_frame_t *frame = NULL; + uuid_t *txn_id = NULL; + char *err_str = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(req); + + frame = myframe; + txn_id = frame->cookie; + frame->cookie = NULL; + + if (-1 == req->rpc_status) { + err_str = "Unlock response not received from one of the peer."; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CLUSTER_UNLOCK_FAILED, + "UnLock response is not received from one of the peer"); + glusterd_set_opinfo(err_str, 0, 0); + event_type = GD_OP_EVENT_RCVD_RJT; + goto out; + } + + ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_v3_unlock_rsp); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CLUSTER_UNLOCK_FAILED, + "Failed to decode mgmt_v3 unlock response received from" + "peer"); + err_str = + "Failed to decode mgmt_v3 unlock response received " + "from peer"; + glusterd_set_opinfo(err_str, 0, 0); + event_type = GD_OP_EVENT_RCVD_RJT; + goto out; + } + + op_ret = rsp.op_ret; + + txn_id = &rsp.txn_id; + + if (op_ret) { + gf_msg( + this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_UNLOCK_FROM_UUID_REJCT, + "Received mgmt_v3 unlock RJT from uuid: %s", uuid_utoa(rsp.uuid)); + } else { + gf_msg_debug(this->name, 0, "Received mgmt_v3 unlock ACC from uuid: %s", + uuid_utoa(rsp.uuid)); + } + + RCU_READ_LOCK; + ret = (glusterd_peerinfo_find(rsp.uuid, NULL) == NULL); + RCU_READ_UNLOCK; + + if (ret) { + gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_CLUSTER_UNLOCK_FAILED, + "mgmt_v3 unlock response received " + "from unknown peer: %s. Ignoring response", + uuid_utoa(rsp.uuid)); + goto out; + } + + if (op_ret) { + event_type = GD_OP_EVENT_RCVD_RJT; + opinfo.op_ret = op_ret; + opinfo.op_errstr = gf_strdup( + "Another transaction could be in " + "progress. Please try again after" + " some time."); + } else { + event_type = GD_OP_EVENT_RCVD_ACC; + } - if (ret) { - gf_log ("", GF_LOG_CRITICAL, "Stage response received from " - "unknown peer: %s", uuid_utoa (rsp.uuid)); - } +out: - if (op_ret) { - event_type = GD_OP_EVENT_RCVD_RJT; - opinfo.op_ret = op_ret; - if (strcmp ("", rsp.op_errstr)) { - opinfo.op_errstr = gf_strdup (rsp.op_errstr); - } else { - if (peerinfo) - peer_str = peerinfo->hostname; - else - peer_str = uuid_utoa (rsp.uuid); - snprintf (err_str, sizeof (err_str), "Operation failed " - "on %s", peer_str); - opinfo.op_errstr = gf_strdup (err_str); - } - if (!opinfo.op_errstr) { - gf_log ("", GF_LOG_ERROR, "memory allocation failed"); - ret = -1; - goto out; - } - } else { - event_type = GD_OP_EVENT_RCVD_ACC; - } + ret = glusterd_set_txn_opinfo(txn_id, &opinfo); + if (ret) + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL, + "Unable to set " + "transaction's opinfo"); - switch (rsp.op) { - case GD_OP_REPLACE_BRICK: - glusterd_rb_use_rsp_dict (dict); - break; - } + ret = glusterd_op_sm_inject_event(event_type, txn_id, NULL); - ret = glusterd_op_sm_inject_event (event_type, NULL); + if (!ret) { + glusterd_friend_sm(); + glusterd_op_sm(); + } - if (!ret) { - glusterd_friend_sm (); - glusterd_op_sm (); - } + GF_FREE(frame->cookie); + GLUSTERD_STACK_DESTROY(frame); + return ret; +} -out: - if (rsp.op_errstr && strcmp (rsp.op_errstr, "error")) - free (rsp.op_errstr); //malloced by xdr - if (dict) { - if (!dict->extra_stdfree && rsp.dict.dict_val) - free (rsp.dict.dict_val); //malloced by xdr - dict_unref (dict); - } else { - if (rsp.dict.dict_val) - free (rsp.dict.dict_val); //malloced by xdr - } - GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe)); - return ret; +int32_t +glusterd_mgmt_v3_unlock_peers_cbk(struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk(req, iov, count, myframe, + glusterd_mgmt_v3_unlock_peers_cbk_fn); } -static int32_t -glusterd_sync_use_rsp_dict (dict_t *rsp_dict) +int32_t +__glusterd_cluster_unlock_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) { - int ret = 0; + gd1_mgmt_cluster_lock_rsp rsp = { + {0}, + }; + int ret = -1; + int32_t op_ret = -1; + glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; + xlator_t *this = NULL; + uuid_t *txn_id = NULL; + glusterd_conf_t *priv = NULL; + char *err_str = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + GF_ASSERT(req); + + txn_id = &priv->global_txn_id; + + if (-1 == req->rpc_status) { + err_str = "Unlock response not received from one of the peer."; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CLUSTER_UNLOCK_FAILED, + "UnLock response is not received from one of the peer"); + glusterd_set_opinfo(err_str, 0, 0); + event_type = GD_OP_EVENT_RCVD_RJT; + goto out; + } + + ret = xdr_to_generic(*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_cluster_unlock_rsp); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CLUSTER_UNLOCK_FAILED, + "Failed to decode unlock response received from peer"); + err_str = + "Failed to decode cluster unlock response received " + "from peer"; + glusterd_set_opinfo(err_str, 0, 0); + event_type = GD_OP_EVENT_RCVD_RJT; + goto out; + } + + op_ret = rsp.op_ret; + + if (op_ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UNLOCK_FROM_UUID_REJCT, + "Received unlock RJT from uuid: %s", uuid_utoa(rsp.uuid)); + } else { + gf_msg_debug(this->name, 0, "Received unlock ACC from uuid: %s", + uuid_utoa(rsp.uuid)); + } + + RCU_READ_LOCK; + ret = (glusterd_peerinfo_find(rsp.uuid, NULL) == NULL); + RCU_READ_UNLOCK; + + if (ret) { + gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_CLUSTER_UNLOCK_FAILED, + "Unlock response received from unknown peer %s", + uuid_utoa(rsp.uuid)); + goto out; + } + + if (op_ret) { + event_type = GD_OP_EVENT_RCVD_RJT; + opinfo.op_ret = op_ret; + } else { + event_type = GD_OP_EVENT_RCVD_ACC; + } + +out: - GF_ASSERT (rsp_dict); + ret = glusterd_set_txn_opinfo(txn_id, &opinfo); + if (ret) + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL, + "Unable to set " + "transaction's opinfo"); - if (!rsp_dict) { - goto out; - } + ret = glusterd_op_sm_inject_event(event_type, txn_id, NULL); - ret = glusterd_import_friend_volumes (rsp_dict); -out: - return ret; + if (!ret) { + glusterd_friend_sm(); + glusterd_op_sm(); + } + GLUSTERD_STACK_DESTROY(((call_frame_t *)myframe)); + return ret; } -void -_profile_volume_add_friend_rsp (dict_t *this, char *key, data_t *value, - void *data) +int32_t +glusterd_cluster_unlock_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) { - char new_key[256] = {0}; - glusterd_pr_brick_rsp_conv_t *rsp_ctx = NULL; - data_t *new_value = NULL; - int brick_count = 0; - char brick_key[256]; - - if (strcmp (key, "count") == 0) - return; - sscanf (key, "%d%s", &brick_count, brick_key); - rsp_ctx = data; - new_value = data_copy (value); - GF_ASSERT (new_value); - snprintf (new_key, sizeof (new_key), "%d%s", - rsp_ctx->count + brick_count, brick_key); - dict_set (rsp_ctx->dict, new_key, new_value); + return glusterd_big_locked_cbk(req, iov, count, myframe, + __glusterd_cluster_unlock_cbk); } -int -glusterd_profile_volume_use_rsp_dict (dict_t *rsp_dict) +int32_t +__glusterd_stage_op_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) { - int ret = 0; - glusterd_pr_brick_rsp_conv_t rsp_ctx = {0}; - int32_t brick_count = 0; - int32_t count = 0; - dict_t *ctx_dict = NULL; - glusterd_op_t op = GD_OP_NONE; - - GF_ASSERT (rsp_dict); - - ret = dict_get_int32 (rsp_dict, "count", &brick_count); - if (ret) { - ret = 0; //no bricks in the rsp - goto out; + gd1_mgmt_stage_op_rsp rsp = { + {0}, + }; + int ret = -1; + int32_t op_ret = -1; + glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; + glusterd_peerinfo_t *peerinfo = NULL; + dict_t *dict = NULL; + char *peer_str = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + uuid_t *txn_id = NULL; + call_frame_t *frame = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(req); + priv = this->private; + GF_ASSERT(priv); + GF_ASSERT(myframe); + + frame = myframe; + txn_id = frame->cookie; + + if (-1 == req->rpc_status) { + rsp.op_ret = -1; + rsp.op_errno = EINVAL; + /* use standard allocation because to keep uniformity + in freeing it */ + rsp.op_errstr = strdup("error"); + goto out; + } + + ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_stage_op_rsp); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RES_DECODE_FAIL, + "Failed to decode stage " + "response received from peer"); + rsp.op_ret = -1; + rsp.op_errno = EINVAL; + /* use standard allocation because to keep uniformity + in freeing it */ + rsp.op_errstr = strdup( + "Failed to decode stage response " + "received from peer."); + goto out; + } + + if (rsp.dict.dict_len) { + /* Unserialize the dictionary */ + dict = dict_new(); + + ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + "failed to " + "unserialize rsp-buffer to dictionary"); + event_type = GD_OP_EVENT_RCVD_RJT; + goto out; + } else { + dict->extra_stdfree = rsp.dict.dict_val; } + } - op = glusterd_op_get_op (); - GF_ASSERT (GD_OP_PROFILE_VOLUME == op); - ctx_dict = glusterd_op_get_ctx (op); - - ret = dict_get_int32 (ctx_dict, "count", &count); - rsp_ctx.count = count; - rsp_ctx.dict = ctx_dict; - dict_foreach (rsp_dict, _profile_volume_add_friend_rsp, &rsp_ctx); - dict_del (ctx_dict, "count"); - ret = dict_set_int32 (ctx_dict, "count", count + brick_count); out: - return ret; + op_ret = rsp.op_ret; + + if (op_ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STAGE_FROM_UUID_REJCT, + "Received stage RJT from uuid: %s", uuid_utoa(rsp.uuid)); + } else { + gf_msg_debug(this->name, 0, "Received stage ACC from uuid: %s", + uuid_utoa(rsp.uuid)); + } + + RCU_READ_LOCK; + peerinfo = glusterd_peerinfo_find(rsp.uuid, NULL); + if (peerinfo == NULL) { + gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_RESP_FROM_UNKNOWN_PEER, + "Stage response received " + "from unknown peer: %s. Ignoring response.", + uuid_utoa(rsp.uuid)); + } + + if (op_ret) { + event_type = GD_OP_EVENT_RCVD_RJT; + opinfo.op_ret = op_ret; + if (strcmp("", rsp.op_errstr)) { + opinfo.op_errstr = gf_strdup(rsp.op_errstr); + } else { + if (peerinfo) + peer_str = peerinfo->hostname; + else + peer_str = uuid_utoa(rsp.uuid); + char err_str[2048]; + snprintf(err_str, sizeof(err_str), OPERRSTR_STAGE_FAIL, peer_str); + opinfo.op_errstr = gf_strdup(err_str); + } + } else { + event_type = GD_OP_EVENT_RCVD_ACC; + } + + RCU_READ_UNLOCK; + + ret = glusterd_set_txn_opinfo(txn_id, &opinfo); + if (ret) + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL, + "Unable to set " + "transaction's opinfo"); + + ret = glusterd_op_sm_inject_event(event_type, txn_id, NULL); + + if (!ret) { + glusterd_friend_sm(); + glusterd_op_sm(); + } + + free(rsp.op_errstr); // malloced by xdr + if (dict) { + if (!dict->extra_stdfree && rsp.dict.dict_val) + free(rsp.dict.dict_val); // malloced by xdr + dict_unref(dict); + } else { + free(rsp.dict.dict_val); // malloced by xdr + } + GF_FREE(frame->cookie); + GLUSTERD_STACK_DESTROY(((call_frame_t *)myframe)); + return ret; } int32_t -glusterd3_1_commit_op_cbk (struct rpc_req *req, struct iovec *iov, - int count, void *myframe) +glusterd_stage_op_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) { - gd1_mgmt_commit_op_rsp rsp = {{0},}; - int ret = -1; - int32_t op_ret = -1; - glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; - glusterd_peerinfo_t *peerinfo = NULL; - dict_t *dict = NULL; - char err_str[2048] = {0}; - char *peer_str = NULL; - - - GF_ASSERT (req); - - if (-1 == req->rpc_status) { - rsp.op_ret = -1; - rsp.op_errno = EINVAL; - rsp.op_errstr = "error"; - event_type = GD_OP_EVENT_RCVD_RJT; - goto out; - } + return glusterd_big_locked_cbk(req, iov, count, myframe, + __glusterd_stage_op_cbk); +} - ret = gd_xdr_to_mgmt_commit_op_rsp (*iov, &rsp); +int32_t +__glusterd_commit_op_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) +{ + gd1_mgmt_commit_op_rsp rsp = { + {0}, + }; + int ret = -1; + int32_t op_ret = -1; + glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; + glusterd_peerinfo_t *peerinfo = NULL; + dict_t *dict = NULL; + char *peer_str = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + uuid_t *txn_id = NULL; + glusterd_op_info_t txn_op_info = { + {0}, + }; + call_frame_t *frame = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(req); + priv = this->private; + GF_ASSERT(priv); + GF_ASSERT(myframe); + + frame = myframe; + txn_id = frame->cookie; + + if (-1 == req->rpc_status) { + rsp.op_ret = -1; + rsp.op_errno = EINVAL; + /* use standard allocation because to keep uniformity + in freeing it */ + rsp.op_errstr = strdup("error"); + event_type = GD_OP_EVENT_RCVD_RJT; + goto out; + } + + ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_commit_op_rsp); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RES_DECODE_FAIL, + "Failed to decode commit " + "response received from peer"); + rsp.op_ret = -1; + rsp.op_errno = EINVAL; + /* use standard allocation because to keep uniformity + in freeing it */ + rsp.op_errstr = strdup( + "Failed to decode commit response " + "received from peer."); + event_type = GD_OP_EVENT_RCVD_RJT; + goto out; + } + + if (rsp.dict.dict_len) { + /* Unserialize the dictionary */ + dict = dict_new(); + + ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict); if (ret < 0) { - gf_log ("", GF_LOG_ERROR, "error"); - rsp.op_ret = -1; - rsp.op_errno = EINVAL; - rsp.op_errstr = "error"; - event_type = GD_OP_EVENT_RCVD_RJT; - goto out; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + "failed to " + "unserialize rsp-buffer to dictionary"); + event_type = GD_OP_EVENT_RCVD_RJT; + goto out; + } else { + dict->extra_stdfree = rsp.dict.dict_val; + } + } + + op_ret = rsp.op_ret; + + if (op_ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMIT_FROM_UUID_REJCT, + "Received commit RJT from uuid: %s", uuid_utoa(rsp.uuid)); + } else { + gf_msg_debug(this->name, 0, "Received commit ACC from uuid: %s", + uuid_utoa(rsp.uuid)); + } + + ret = glusterd_get_txn_opinfo(txn_id, &txn_op_info); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_GET_FAIL, + "Failed to get txn_op_info " + "for txn_id = %s", + uuid_utoa(*txn_id)); + } + + RCU_READ_LOCK; + peerinfo = glusterd_peerinfo_find(rsp.uuid, NULL); + if (peerinfo == NULL) { + gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_RESP_FROM_UNKNOWN_PEER, + "Commit response for " + "'Volume %s' received from unknown peer: %s", + gd_op_list[opinfo.op], uuid_utoa(rsp.uuid)); + } + + if (op_ret) { + event_type = GD_OP_EVENT_RCVD_RJT; + opinfo.op_ret = op_ret; + if (strcmp("", rsp.op_errstr)) { + opinfo.op_errstr = gf_strdup(rsp.op_errstr); + } else { + if (peerinfo) + peer_str = peerinfo->hostname; + else + peer_str = uuid_utoa(rsp.uuid); + char err_str[2048]; + snprintf(err_str, sizeof(err_str), OPERRSTR_COMMIT_FAIL, peer_str); + opinfo.op_errstr = gf_strdup(err_str); } - - if (rsp.dict.dict_len) { - /* Unserialize the dictionary */ - dict = dict_new (); - - ret = dict_unserialize (rsp.dict.dict_val, - rsp.dict.dict_len, - &dict); - if (ret < 0) { - gf_log ("glusterd", GF_LOG_ERROR, - "failed to " - "unserialize rsp-buffer to dictionary"); - event_type = GD_OP_EVENT_RCVD_RJT; - goto out; - } else { - dict->extra_stdfree = rsp.dict.dict_val; - } + if (!opinfo.op_errstr) { + goto unlock; } + } else { + event_type = GD_OP_EVENT_RCVD_ACC; + GF_ASSERT(rsp.op == txn_op_info.op); - op_ret = rsp.op_ret; - - gf_log ("glusterd", GF_LOG_INFO, - "Received %s from uuid: %s", - (op_ret)?"RJT":"ACC", uuid_utoa (rsp.uuid)); - - ret = glusterd_friend_find (rsp.uuid, NULL, &peerinfo); - - if (ret) { - gf_log ("", GF_LOG_CRITICAL, "Commit response received from " - "unknown peer: %s", uuid_utoa (rsp.uuid)); - } - - if (op_ret) { - event_type = GD_OP_EVENT_RCVD_RJT; - opinfo.op_ret = op_ret; - if (strcmp ("", rsp.op_errstr)) { - opinfo.op_errstr = gf_strdup(rsp.op_errstr); - } else { - if (peerinfo) - peer_str = peerinfo->hostname; - else - peer_str = uuid_utoa (rsp.uuid); - snprintf (err_str, sizeof (err_str), "Operation failed " - "on %s", peer_str); - opinfo.op_errstr = gf_strdup (err_str); - } - if (!opinfo.op_errstr) { - gf_log ("", GF_LOG_ERROR, "memory allocation failed"); - ret = -1; - goto out; - } - } else { - event_type = GD_OP_EVENT_RCVD_ACC; - switch (rsp.op) { - case GD_OP_REPLACE_BRICK: - ret = glusterd_rb_use_rsp_dict (dict); - if (ret) - goto out; - break; - - case GD_OP_SYNC_VOLUME: - ret = glusterd_sync_use_rsp_dict (dict); - if (ret) - goto out; + switch (rsp.op) { + case GD_OP_PROFILE_VOLUME: + ret = glusterd_profile_volume_use_rsp_dict(txn_op_info.op_ctx, + dict); + if (ret) + goto unlock; break; - case GD_OP_PROFILE_VOLUME: - ret = glusterd_profile_volume_use_rsp_dict (dict); - if (ret) - goto out; + case GD_OP_REBALANCE: + case GD_OP_DEFRAG_BRICK_VOLUME: + ret = glusterd_volume_rebalance_use_rsp_dict(txn_op_info.op_ctx, + dict); + if (ret) + goto unlock; break; - default: + default: break; - } } + } +unlock: + RCU_READ_UNLOCK; out: - ret = glusterd_op_sm_inject_event (event_type, NULL); - if (!ret) { - glusterd_friend_sm (); - glusterd_op_sm (); - } - - if (dict) - dict_unref (dict); - if (rsp.op_errstr && strcmp (rsp.op_errstr, "error")) - free (rsp.op_errstr); //malloced by xdr - GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe)); - return ret; + ret = glusterd_set_txn_opinfo(txn_id, &opinfo); + if (ret) + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL, + "Unable to set " + "transaction's opinfo"); + + ret = glusterd_op_sm_inject_event(event_type, txn_id, NULL); + + if (!ret) { + glusterd_friend_sm(); + glusterd_op_sm(); + } + + if (dict) + dict_unref(dict); + free(rsp.op_errstr); // malloced by xdr + GF_FREE(frame->cookie); + GLUSTERD_STACK_DESTROY(((call_frame_t *)myframe)); + return ret; } - - int32_t -glusterd3_1_probe (call_frame_t *frame, xlator_t *this, - void *data) +glusterd_commit_op_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) { - gd1_mgmt_probe_req req = {{0},}; - int ret = 0; - int port = 0; - char *hostname = NULL; - glusterd_peerinfo_t *peerinfo = NULL; - glusterd_conf_t *priv = NULL; - dict_t *dict = NULL; - - if (!frame || !this || !data) { - ret = -1; - goto out; - } - - dict = data; - priv = this->private; - - GF_ASSERT (priv); - ret = dict_get_str (dict, "hostname", &hostname); - if (ret) - goto out; - ret = dict_get_int32 (dict, "port", &port); - if (ret) - port = GF_DEFAULT_BASE_PORT; - - ret = dict_get_ptr (dict, "peerinfo", VOID (&peerinfo)); - if (ret) - goto out; - - uuid_copy (req.uuid, priv->uuid); - req.hostname = gf_strdup (hostname); - req.port = port; + return glusterd_big_locked_cbk(req, iov, count, myframe, + __glusterd_commit_op_cbk); +} - ret = glusterd_submit_request (peerinfo->rpc, &req, frame, peerinfo->mgmt, - GD_MGMT_PROBE_QUERY, - NULL, gd_xdr_from_mgmt_probe_req, - this, glusterd3_1_probe_cbk); +int32_t +glusterd_rpc_probe(call_frame_t *frame, xlator_t *this, void *data) +{ + gd1_mgmt_probe_req req = { + {0}, + }; + int ret = 0; + int port = 0; + char *hostname = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_conf_t *priv = NULL; + dict_t *dict = NULL; + + if (!frame || !this || !data) { + gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); + ret = -1; + goto out; + } + + dict = data; + priv = this->private; + + GF_ASSERT(priv); + ret = dict_get_strn(dict, "hostname", SLEN("hostname"), &hostname); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=hostname", NULL); + goto out; + } + ret = dict_get_int32n(dict, "port", SLEN("port"), &port); + if (ret) { + gf_smsg(this->name, GF_LOG_DEBUG, errno, GD_MSG_DICT_GET_FAILED, + "Key=port", NULL); + port = GF_DEFAULT_BASE_PORT; + } + + ret = dict_get_ptr(dict, "peerinfo", VOID(&peerinfo)); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=peerinfo", NULL); + goto out; + } + + gf_uuid_copy(req.uuid, MY_UUID); + req.hostname = gf_strdup(hostname); + req.port = port; + + ret = glusterd_submit_request( + peerinfo->rpc, &req, frame, peerinfo->peer, GLUSTERD_PROBE_QUERY, NULL, + this, glusterd_probe_cbk, (xdrproc_t)xdr_gd1_mgmt_probe_req); out: - if (req.hostname) - GF_FREE (req.hostname); - gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + GF_FREE(req.hostname); + gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret); + return ret; } - int32_t -glusterd3_1_friend_add (call_frame_t *frame, xlator_t *this, - void *data) +glusterd_rpc_friend_add(call_frame_t *frame, xlator_t *this, void *data) { - gd1_mgmt_friend_req req = {{0},}; - int ret = 0; - glusterd_peerinfo_t *peerinfo = NULL; - glusterd_conf_t *priv = NULL; - glusterd_friend_sm_event_t *event = NULL; - glusterd_friend_req_ctx_t *ctx = NULL; - dict_t *vols = NULL; - - - if (!frame || !this || !data) { - ret = -1; - goto out; + gd1_mgmt_friend_req req = { + {0}, + }; + int ret = 0; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_conf_t *priv = NULL; + glusterd_friend_sm_event_t *event = NULL; + dict_t *peer_data = NULL; + + if (!frame || !this || !data) { + gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); + ret = -1; + goto out; + } + + event = data; + priv = this->private; + + GF_ASSERT(priv); + + RCU_READ_LOCK; + + peerinfo = glusterd_peerinfo_find(event->peerid, event->peername); + if (!peerinfo) { + RCU_READ_UNLOCK; + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PEER_NOT_FOUND, + "Could not find peer %s(%s)", event->peername, + uuid_utoa(event->peerid)); + goto out; + } + + req.hostname = gf_strdup(peerinfo->hostname); + req.port = peerinfo->port; + + RCU_READ_UNLOCK; + + gf_uuid_copy(req.uuid, MY_UUID); + + peer_data = dict_new(); + if (!peer_data) { + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_DICT_CREATE_FAIL, + NULL); + errno = ENOMEM; + goto out; + } + + ret = dict_set_dynstr_with_alloc(peer_data, "hostname_in_cluster", + peerinfo->hostname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Unable to add hostname of the peer"); + goto out; + } + + if (priv->op_version >= GD_OP_VERSION_3_6_0) { + ret = glusterd_add_missed_snaps_to_export_dict(peer_data); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_MISSED_SNAP_LIST_STORE_FAIL, + "Unable to add list of missed snapshots " + "in the peer_data dict for handshake"); + goto out; } - event = data; - priv = this->private; - - GF_ASSERT (priv); - - ctx = event->ctx; - - peerinfo = event->peerinfo; - - ret = glusterd_build_volume_dict (&vols); - if (ret) - goto out; - - uuid_copy (req.uuid, priv->uuid); - req.hostname = peerinfo->hostname; - req.port = peerinfo->port; - - ret = dict_allocate_and_serialize (vols, &req.vols.vols_val, - (size_t *)&req.vols.vols_len); - if (ret) - goto out; - - ret = glusterd_submit_request (peerinfo->rpc, &req, frame, peerinfo->mgmt, - GD_MGMT_FRIEND_ADD, - NULL, gd_xdr_from_mgmt_friend_req, - this, glusterd3_1_friend_add_cbk); + ret = glusterd_add_snapshots_to_export_dict(peer_data); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_LIST_SET_FAIL, + "Unable to add list of snapshots " + "in the peer_data dict for handshake"); + goto out; + } + } + + /* Don't add any key-value in peer_data dictionary after call this function + */ + ret = glusterd_add_volumes_to_export_dict(peer_data, &req.vols.vols_val, + &req.vols.vols_len); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to add list of volumes " + "in the peer_data dict for handshake"); + goto out; + } + + if (!req.vols.vols_len) { + ret = dict_allocate_and_serialize(peer_data, &req.vols.vols_val, + &req.vols.vols_len); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); + goto out; + } + } + ret = glusterd_submit_request( + peerinfo->rpc, &req, frame, peerinfo->peer, GLUSTERD_FRIEND_ADD, NULL, + this, glusterd_friend_add_cbk, (xdrproc_t)xdr_gd1_mgmt_friend_req); out: - if (req.vols.vols_val) - GF_FREE (req.vols.vols_val); + GF_FREE(req.vols.vols_val); + GF_FREE(req.hostname); - if (vols) - dict_unref (vols); + if (peer_data) + dict_unref(peer_data); - gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret); + return ret; } int32_t -glusterd3_1_friend_remove (call_frame_t *frame, xlator_t *this, - void *data) +glusterd_rpc_friend_remove(call_frame_t *frame, xlator_t *this, void *data) { - gd1_mgmt_friend_req req = {{0},}; - int ret = 0; - glusterd_peerinfo_t *peerinfo = NULL; - glusterd_conf_t *priv = NULL; - glusterd_friend_sm_event_t *event = NULL; - - if (!frame || !this || !data) { - ret = -1; - goto out; - } + gd1_mgmt_friend_req req = { + {0}, + }; + int ret = 0; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_conf_t *priv = NULL; + glusterd_friend_sm_event_t *event = NULL; + + if (!frame || !this || !data) { + ret = -1; + goto out; + } + + event = data; + priv = this->private; + + GF_ASSERT(priv); + + RCU_READ_LOCK; + + peerinfo = glusterd_peerinfo_find(event->peerid, event->peername); + if (!peerinfo) { + RCU_READ_UNLOCK; + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PEER_NOT_FOUND, + "Could not find peer %s(%s)", event->peername, + uuid_utoa(event->peerid)); + goto out; + } + + gf_uuid_copy(req.uuid, MY_UUID); + req.hostname = gf_strdup(peerinfo->hostname); + req.port = peerinfo->port; + + ret = glusterd_submit_request(peerinfo->rpc, &req, frame, peerinfo->peer, + GLUSTERD_FRIEND_REMOVE, NULL, this, + glusterd_friend_remove_cbk, + (xdrproc_t)xdr_gd1_mgmt_friend_req); + + RCU_READ_UNLOCK; +out: + GF_FREE(req.hostname); - event = data; - priv = this->private; + gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret); + return ret; +} - GF_ASSERT (priv); +int32_t +glusterd_rpc_friend_update(call_frame_t *frame, xlator_t *this, void *data) +{ + gd1_mgmt_friend_update req = { + {0}, + }; + int ret = 0; + glusterd_conf_t *priv = NULL; + dict_t *friends = NULL; + call_frame_t *dummy_frame = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + + priv = this->private; + GF_ASSERT(priv); + + friends = data; + if (!friends) + goto out; + + ret = dict_get_ptr(friends, "peerinfo", VOID(&peerinfo)); + if (ret) + goto out; + /* Don't want to send the pointer over */ + dict_deln(friends, "peerinfo", SLEN("peerinfo")); + + ret = dict_allocate_and_serialize(friends, &req.friends.friends_val, + &req.friends.friends_len); + if (ret) + goto out; + + gf_uuid_copy(req.uuid, MY_UUID); + + dummy_frame = create_frame(this, this->ctx->pool); + ret = glusterd_submit_request(peerinfo->rpc, &req, dummy_frame, + peerinfo->peer, GLUSTERD_FRIEND_UPDATE, NULL, + this, glusterd_friend_update_cbk, + (xdrproc_t)xdr_gd1_mgmt_friend_update); - peerinfo = event->peerinfo; +out: + GF_FREE(req.friends.friends_val); - uuid_copy (req.uuid, priv->uuid); - req.hostname = peerinfo->hostname; - req.port = peerinfo->port; - ret = glusterd_submit_request (peerinfo->rpc, &req, frame, peerinfo->mgmt, - GD_MGMT_FRIEND_REMOVE, - NULL, gd_xdr_from_mgmt_friend_req, - this, glusterd3_1_friend_remove_cbk); + if (ret && dummy_frame) + STACK_DESTROY(dummy_frame->root); -out: - gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret); + return ret; } - int32_t -glusterd3_1_friend_update (call_frame_t *frame, xlator_t *this, - void *data) +glusterd_cluster_lock(call_frame_t *frame, xlator_t *this, void *data) { - gd1_mgmt_friend_update req = {{0},}; - int ret = 0; - glusterd_conf_t *priv = NULL; - dict_t *friends = NULL; - char *dict_buf = NULL; - size_t len = -1; - call_frame_t *dummy_frame = NULL; - glusterd_peerinfo_t *peerinfo = NULL; - - priv = this->private; - GF_ASSERT (priv); - - friends = data; - if (!friends) - goto out; + gd1_mgmt_cluster_lock_req req = { + {0}, + }; + int ret = -1; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_conf_t *priv = NULL; + call_frame_t *dummy_frame = NULL; - ret = dict_get_ptr (friends, "peerinfo", VOID(&peerinfo)); - if (ret) - goto out; + if (!this) + goto out; - ret = dict_allocate_and_serialize (friends, &dict_buf, (size_t *)&len); - if (ret) - goto out; + peerinfo = data; - req.friends.friends_val = dict_buf; - req.friends.friends_len = len; + priv = this->private; + GF_ASSERT(priv); - uuid_copy (req.uuid, priv->uuid); + glusterd_get_uuid(&req.uuid); - dummy_frame = create_frame (this, this->ctx->pool); - ret = glusterd_submit_request (peerinfo->rpc, &req, dummy_frame, - peerinfo->mgmt, - GD_MGMT_FRIEND_UPDATE, - NULL, gd_xdr_from_mgmt_friend_update, - this, glusterd3_1_friend_update_cbk); + dummy_frame = create_frame(this, this->ctx->pool); + if (!dummy_frame) + goto out; + ret = glusterd_submit_request(peerinfo->rpc, &req, dummy_frame, + peerinfo->mgmt, GLUSTERD_MGMT_CLUSTER_LOCK, + NULL, this, glusterd_cluster_lock_cbk, + (xdrproc_t)xdr_gd1_mgmt_cluster_lock_req); out: - if (req.friends.friends_val) - GF_FREE (req.friends.friends_val); + gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret); - gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + if (ret && dummy_frame) + STACK_DESTROY(dummy_frame->root); + return ret; } int32_t -glusterd3_1_cluster_lock (call_frame_t *frame, xlator_t *this, - void *data) +glusterd_mgmt_v3_lock_peers(call_frame_t *frame, xlator_t *this, void *data) { - gd1_mgmt_cluster_lock_req req = {{0},}; - int ret = -1; - glusterd_peerinfo_t *peerinfo = NULL; - glusterd_conf_t *priv = NULL; - call_frame_t *dummy_frame = NULL; - - if (!this) - goto out; - - peerinfo = data; - - priv = this->private; - GF_ASSERT (priv); - - glusterd_get_uuid (&req.uuid); - - dummy_frame = create_frame (this, this->ctx->pool); - if (!dummy_frame) - goto out; - - ret = glusterd_submit_request (peerinfo->rpc, &req, dummy_frame, - peerinfo->mgmt, GD_MGMT_CLUSTER_LOCK, - NULL, - gd_xdr_from_mgmt_cluster_lock_req, - this, glusterd3_1_cluster_lock_cbk); + gd1_mgmt_v3_lock_req req = { + {0}, + }; + int ret = -1; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_conf_t *priv = NULL; + dict_t *dict = NULL; + uuid_t *txn_id = NULL; + + if (!this) + goto out; + + dict = data; + + priv = this->private; + GF_ASSERT(priv); + + ret = dict_get_ptr(dict, "peerinfo", VOID(&peerinfo)); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=peerinfo", NULL); + goto out; + } + + // peerinfo should not be in payload + dict_deln(dict, "peerinfo", SLEN("peerinfo")); + + glusterd_get_uuid(&req.uuid); + + ret = dict_allocate_and_serialize(dict, &req.dict.dict_val, + &req.dict.dict_len); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); + goto out; + } + + /* Sending valid transaction ID to peers */ + ret = dict_get_bin(dict, "transaction_id", (void **)&txn_id); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_ID_GET_FAIL, + "Failed to get transaction id."); + goto out; + } else { + gf_msg_debug(this->name, 0, "Transaction_id = %s", uuid_utoa(*txn_id)); + gf_uuid_copy(req.txn_id, *txn_id); + } + + if (!frame) + frame = create_frame(this, this->ctx->pool); + + if (!frame) { + ret = -1; + goto out; + } + frame->cookie = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); + if (!frame->cookie) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); + ret = -1; + goto out; + } + gf_uuid_copy(frame->cookie, req.txn_id); + + ret = glusterd_submit_request(peerinfo->rpc, &req, frame, peerinfo->mgmt_v3, + GLUSTERD_MGMT_V3_LOCK, NULL, this, + glusterd_mgmt_v3_lock_peers_cbk, + (xdrproc_t)xdr_gd1_mgmt_v3_lock_req); out: - gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret); + if (dict) + dict_unref(dict); + if (req.dict.dict_val) + GF_FREE(req.dict.dict_val); + return ret; } int32_t -glusterd3_1_cluster_unlock (call_frame_t *frame, xlator_t *this, - void *data) +glusterd_mgmt_v3_unlock_peers(call_frame_t *frame, xlator_t *this, void *data) { - gd1_mgmt_cluster_lock_req req = {{0},}; - int ret = -1; - glusterd_peerinfo_t *peerinfo = NULL; - glusterd_conf_t *priv = NULL; - call_frame_t *dummy_frame = NULL; - - if (!this ) { - ret = -1; - goto out; - } - peerinfo = data; - priv = this->private; - GF_ASSERT (priv); - - glusterd_get_uuid (&req.uuid); - - dummy_frame = create_frame (this, this->ctx->pool); - if (!dummy_frame) - goto out; - - ret = glusterd_submit_request (peerinfo->rpc, &req, dummy_frame, - peerinfo->mgmt, GD_MGMT_CLUSTER_UNLOCK, - NULL, - gd_xdr_from_mgmt_cluster_unlock_req, - this, glusterd3_1_cluster_unlock_cbk); + gd1_mgmt_v3_unlock_req req = { + {0}, + }; + int ret = -1; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_conf_t *priv = NULL; + dict_t *dict = NULL; + uuid_t *txn_id = NULL; + + if (!this) + goto out; + + dict = data; + + priv = this->private; + GF_ASSERT(priv); + + ret = dict_get_ptr(dict, "peerinfo", VOID(&peerinfo)); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=peerinfo", NULL); + goto out; + } + + // peerinfo should not be in payload + dict_deln(dict, "peerinfo", SLEN("peerinfo")); + + glusterd_get_uuid(&req.uuid); + + ret = dict_allocate_and_serialize(dict, &req.dict.dict_val, + &req.dict.dict_len); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); + goto out; + } + + /* Sending valid transaction ID to peers */ + ret = dict_get_bin(dict, "transaction_id", (void **)&txn_id); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_ID_GET_FAIL, + "Failed to get transaction id."); + goto out; + } else { + gf_msg_debug(this->name, 0, "Transaction_id = %s", uuid_utoa(*txn_id)); + gf_uuid_copy(req.txn_id, *txn_id); + } + + if (!frame) + frame = create_frame(this, this->ctx->pool); + + if (!frame) { + ret = -1; + goto out; + } + frame->cookie = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); + if (!frame->cookie) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); + ret = -1; + goto out; + } + gf_uuid_copy(frame->cookie, req.txn_id); + + ret = glusterd_submit_request(peerinfo->rpc, &req, frame, peerinfo->mgmt_v3, + GLUSTERD_MGMT_V3_UNLOCK, NULL, this, + glusterd_mgmt_v3_unlock_peers_cbk, + (xdrproc_t)xdr_gd1_mgmt_v3_unlock_req); out: - gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret); + if (dict) + dict_unref(dict); + + if (req.dict.dict_val) + GF_FREE(req.dict.dict_val); + return ret; } int32_t -glusterd3_1_stage_op (call_frame_t *frame, xlator_t *this, - void *data) +glusterd_cluster_unlock(call_frame_t *frame, xlator_t *this, void *data) { - gd1_mgmt_stage_op_req req = {{0,},}; - int ret = -1; - glusterd_peerinfo_t *peerinfo = NULL; - glusterd_conf_t *priv = NULL; - call_frame_t *dummy_frame = NULL; - dict_t *dict = NULL; - gf_boolean_t is_alloc = _gf_true; - - if (!this) { - goto out; - } - - dict = data; - - priv = this->private; - GF_ASSERT (priv); - - ret = dict_get_ptr (dict, "peerinfo", VOID (&peerinfo)); - if (ret) - goto out; + gd1_mgmt_cluster_lock_req req = { + {0}, + }; + int ret = -1; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_conf_t *priv = NULL; + call_frame_t *dummy_frame = NULL; + + if (!this) { + ret = -1; + goto out; + } + peerinfo = data; + priv = this->private; + GF_ASSERT(priv); + + glusterd_get_uuid(&req.uuid); + + dummy_frame = create_frame(this, this->ctx->pool); + if (!dummy_frame) + goto out; + + ret = glusterd_submit_request(peerinfo->rpc, &req, dummy_frame, + peerinfo->mgmt, GLUSTERD_MGMT_CLUSTER_UNLOCK, + NULL, this, glusterd_cluster_unlock_cbk, + (xdrproc_t)xdr_gd1_mgmt_cluster_unlock_req); +out: + gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret); - //peerinfo should not be in payload - dict_del (dict, "peerinfo"); + if (ret && dummy_frame) + STACK_DESTROY(dummy_frame->root); - glusterd_get_uuid (&req.uuid); - req.op = glusterd_op_get_op (); + return ret; +} - if (GD_OP_DELETE_VOLUME == req.op) { - ret = dict_get_str (dict, "volname", &req.buf.buf_val); - if (ret) - goto out; - req.buf.buf_len = strlen (req.buf.buf_val); - is_alloc = _gf_false; - } else { - ret = dict_allocate_and_serialize (dict, &req.buf.buf_val, - (size_t *)&req.buf.buf_len); +int32_t +glusterd_stage_op(call_frame_t *frame, xlator_t *this, void *data) +{ + gd1_mgmt_stage_op_req req = { + { + 0, + }, + }; + int ret = -1; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_conf_t *priv = NULL; + dict_t *dict = NULL; + uuid_t *txn_id = NULL; + + if (!this) { + goto out; + } + + dict = data; + + priv = this->private; + GF_ASSERT(priv); + + ret = dict_get_ptr(dict, "peerinfo", VOID(&peerinfo)); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=peerinfo", NULL); + goto out; + } + + // peerinfo should not be in payload + dict_deln(dict, "peerinfo", SLEN("peerinfo")); + + glusterd_get_uuid(&req.uuid); + req.op = glusterd_op_get_op(); + + ret = dict_allocate_and_serialize(dict, &req.buf.buf_val, &req.buf.buf_len); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); + goto out; + } + /* Sending valid transaction ID to peers */ + ret = dict_get_bin(dict, "transaction_id", (void **)&txn_id); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_ID_GET_FAIL, + "Failed to get transaction id."); + goto out; + } else { + gf_msg_debug(this->name, 0, "Transaction_id = %s", uuid_utoa(*txn_id)); + } + + if (!frame) + frame = create_frame(this, this->ctx->pool); + + if (!frame) { + ret = -1; + goto out; + } + frame->cookie = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); + if (!frame->cookie) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); + ret = -1; + goto out; + } + gf_uuid_copy(frame->cookie, *txn_id); + + ret = glusterd_submit_request(peerinfo->rpc, &req, frame, peerinfo->mgmt, + GLUSTERD_MGMT_STAGE_OP, NULL, this, + glusterd_stage_op_cbk, + (xdrproc_t)xdr_gd1_mgmt_stage_op_req); - if (ret) - goto out; - } +out: + if (req.buf.buf_val) + GF_FREE(req.buf.buf_val); - dummy_frame = create_frame (this, this->ctx->pool); - if (!dummy_frame) - goto out; + gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret); + return ret; +} - ret = glusterd_submit_request (peerinfo->rpc, &req, dummy_frame, - peerinfo->mgmt, GD_MGMT_STAGE_OP, - NULL, - gd_xdr_from_mgmt_stage_op_req, - this, glusterd3_1_stage_op_cbk); +int32_t +glusterd_commit_op(call_frame_t *frame, xlator_t *this, void *data) +{ + gd1_mgmt_commit_op_req req = { + { + 0, + }, + }; + int ret = -1; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_conf_t *priv = NULL; + dict_t *dict = NULL; + uuid_t *txn_id = NULL; + + if (!this) { + goto out; + } + + dict = data; + priv = this->private; + GF_ASSERT(priv); + + ret = dict_get_ptr(dict, "peerinfo", VOID(&peerinfo)); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=peerinfo", NULL); + goto out; + } + + // peerinfo should not be in payload + dict_deln(dict, "peerinfo", SLEN("peerinfo")); + + glusterd_get_uuid(&req.uuid); + req.op = glusterd_op_get_op(); + + ret = dict_allocate_and_serialize(dict, &req.buf.buf_val, &req.buf.buf_len); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); + goto out; + } + /* Sending valid transaction ID to peers */ + ret = dict_get_bin(dict, "transaction_id", (void **)&txn_id); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_ID_GET_FAIL, + "Failed to get transaction id."); + goto out; + } else { + gf_msg_debug(this->name, 0, "Transaction_id = %s", uuid_utoa(*txn_id)); + } + + if (!frame) + frame = create_frame(this, this->ctx->pool); + + if (!frame) { + ret = -1; + goto out; + } + frame->cookie = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); + if (!frame->cookie) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); + ret = -1; + goto out; + } + gf_uuid_copy(frame->cookie, *txn_id); + + ret = glusterd_submit_request(peerinfo->rpc, &req, frame, peerinfo->mgmt, + GLUSTERD_MGMT_COMMIT_OP, NULL, this, + glusterd_commit_op_cbk, + (xdrproc_t)xdr_gd1_mgmt_commit_op_req); out: - if ((_gf_true == is_alloc) && req.buf.buf_val) - GF_FREE (req.buf.buf_val); + if (req.buf.buf_val) + GF_FREE(req.buf.buf_val); - gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret); + return ret; } int32_t -glusterd3_1_commit_op (call_frame_t *frame, xlator_t *this, - void *data) +__glusterd_brick_op_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) { - gd1_mgmt_commit_op_req req = {{0,},}; - int ret = -1; - glusterd_peerinfo_t *peerinfo = NULL; - glusterd_conf_t *priv = NULL; - call_frame_t *dummy_frame = NULL; - dict_t *dict = NULL; - gf_boolean_t is_alloc = _gf_true; - - if (!this) { - goto out; + gd1_mgmt_brick_op_rsp rsp = {0}; + int ret = -1; + int32_t op_ret = -1; + glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; + call_frame_t *frame = NULL; + glusterd_op_brick_rsp_ctx_t *ev_ctx = NULL; + dict_t *dict = NULL; + int index = 0; + glusterd_req_ctx_t *req_ctx = NULL; + glusterd_pending_node_t *node = NULL; + xlator_t *this = NULL; + uuid_t *txn_id = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + GF_ASSERT(req); + + txn_id = &priv->global_txn_id; + frame = myframe; + req_ctx = frame->local; + + if (-1 == req->rpc_status) { + rsp.op_ret = -1; + rsp.op_errno = EINVAL; + /* use standard allocation because to keep uniformity + in freeing it */ + rsp.op_errstr = strdup("error"); + event_type = GD_OP_EVENT_RCVD_RJT; + goto out; + } + + ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RES_DECODE_FAIL, + "Failed to decode brick op " + "response received"); + rsp.op_ret = -1; + rsp.op_errno = EINVAL; + rsp.op_errstr = strdup("Unable to decode brick op response"); + event_type = GD_OP_EVENT_RCVD_RJT; + goto out; + } + + if (rsp.output.output_len) { + /* Unserialize the dictionary */ + dict = dict_new(); + + ret = dict_unserialize(rsp.output.output_val, rsp.output.output_len, + &dict); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + "Failed to " + "unserialize rsp-buffer to dictionary"); + event_type = GD_OP_EVENT_RCVD_RJT; + goto out; + } else { + dict->extra_stdfree = rsp.output.output_val; } + } - dict = data; - priv = this->private; - GF_ASSERT (priv); - - ret = dict_get_ptr (dict, "peerinfo", VOID (&peerinfo)); - if (ret) - goto out; + op_ret = rsp.op_ret; - //peerinfo should not be in payload - dict_del (dict, "peerinfo"); + /* Add index to rsp_dict for GD_OP_STATUS_VOLUME */ + if (GD_OP_STATUS_VOLUME == req_ctx->op) { + node = frame->cookie; + index = node->index; + ret = dict_set_int32n(dict, "index", SLEN("index"), index); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Error setting index on brick status rsp dict"); + rsp.op_ret = -1; + event_type = GD_OP_EVENT_RCVD_RJT; + goto out; + } + } +out: - glusterd_get_uuid (&req.uuid); - req.op = glusterd_op_get_op (); + if (req_ctx && req_ctx->dict) { + ret = dict_get_bin(req_ctx->dict, "transaction_id", (void **)&txn_id); + gf_msg_debug(this->name, -ret, "transaction ID = %s", + uuid_utoa(*txn_id)); + } - if (GD_OP_DELETE_VOLUME == req.op) { - ret = dict_get_str (dict, "volname", &req.buf.buf_val); - if (ret) - goto out; - req.buf.buf_len = strlen (req.buf.buf_val); - is_alloc = _gf_false; + ev_ctx = GF_CALLOC(1, sizeof(*ev_ctx), gf_gld_mt_brick_rsp_ctx_t); + if (ev_ctx) { + if (op_ret) { + event_type = GD_OP_EVENT_RCVD_RJT; + ev_ctx->op_ret = op_ret; + ev_ctx->op_errstr = gf_strdup(rsp.op_errstr); } else { - ret = dict_allocate_and_serialize (dict, &req.buf.buf_val, - (size_t *)&req.buf.buf_len); - - if (ret) - goto out; + event_type = GD_OP_EVENT_RCVD_ACC; } - - dummy_frame = create_frame (this, this->ctx->pool); - if (!dummy_frame) - goto out; - - ret = glusterd_submit_request (peerinfo->rpc, &req, dummy_frame, - peerinfo->mgmt, GD_MGMT_COMMIT_OP, - NULL, - gd_xdr_from_mgmt_commit_op_req, - this, glusterd3_1_commit_op_cbk); - -out: - if ((_gf_true == is_alloc) && req.buf.buf_val) - GF_FREE (req.buf.buf_val); - - gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + ev_ctx->pending_node = frame->cookie; + ev_ctx->rsp_dict = dict; + ev_ctx->commit_ctx = frame->local; + ret = glusterd_op_sm_inject_event(event_type, txn_id, ev_ctx); + } + if (!ret) { + glusterd_friend_sm(); + glusterd_op_sm(); + } + + if (ret) { + if (dict) { + dict_unref(dict); + } + if (ev_ctx) { + GF_FREE(ev_ctx->op_errstr); + GF_FREE(ev_ctx); + } + } + free(rsp.op_errstr); // malloced by xdr + GLUSTERD_STACK_DESTROY(frame); + return ret; } int32_t -glusterd_start_brick_disconnect_timer (glusterd_op_brick_rsp_ctx_t *ev_ctx) +glusterd_brick_op_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) { - struct timeval timeout = {0, }; - int32_t ret = -1; - xlator_t *this = NULL; - glusterd_brickinfo_t *brickinfo = NULL; - - timeout.tv_sec = 5; - timeout.tv_usec = 0; - brickinfo = ev_ctx->brickinfo; - GF_ASSERT (brickinfo); - this = THIS; - GF_ASSERT (this); - - brickinfo->timer = gf_timer_call_after (this->ctx, timeout, - glusterd_op_brick_disconnect, - (void *) ev_ctx); - - ret = 0; - - return ret; + return glusterd_big_locked_cbk(req, iov, count, myframe, + __glusterd_brick_op_cbk); } int32_t -glusterd3_1_brick_op_cbk (struct rpc_req *req, struct iovec *iov, - int count, void *myframe) +glusterd_brick_op(call_frame_t *frame, xlator_t *this, void *data) { - gd1_mgmt_brick_op_rsp rsp = {0}; - int ret = -1; - int32_t op_ret = -1; - glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; - call_frame_t *frame = NULL; - glusterd_op_brick_rsp_ctx_t *ev_ctx = NULL; - int32_t op = -1; - dict_t *dict = NULL; - - GF_ASSERT (req); - frame = myframe; - - if (-1 == req->rpc_status) { - rsp.op_ret = -1; - rsp.op_errno = EINVAL; - rsp.op_errstr = "error"; - event_type = GD_OP_EVENT_RCVD_RJT; - goto out; - } + gd1_mgmt_brick_op_req *req = NULL; + int ret = 0; + int ret1 = 0; + glusterd_conf_t *priv = NULL; + call_frame_t *dummy_frame = NULL; + char *op_errstr = NULL; + int pending_bricks = 0; + glusterd_pending_node_t *pending_node; + glusterd_req_ctx_t *req_ctx = NULL; + struct rpc_clnt *rpc = NULL; + dict_t *op_ctx = NULL; + uuid_t *txn_id = NULL; + + if (!this) { + ret = -1; + goto out; + } + priv = this->private; + GF_ASSERT(priv); + + txn_id = &priv->global_txn_id; + + req_ctx = data; + GF_ASSERT(req_ctx); + CDS_INIT_LIST_HEAD(&opinfo.pending_bricks); + + ret = dict_get_bin(req_ctx->dict, "transaction_id", (void **)&txn_id); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_BRICK_SELECT_FAIL, + "Could not get transaction ID from dict, global" + "transaction ID = %s", + uuid_utoa(*txn_id)); + } else { + gf_msg_debug(this->name, 0, "transaction ID = %s", uuid_utoa(*txn_id)); + } + ret = glusterd_op_bricks_select(req_ctx->op, req_ctx->dict, &op_errstr, + &opinfo.pending_bricks, NULL); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_SELECT_FAIL, + "Failed to select bricks " + "while performing brick op during 'Volume %s'", + gd_op_list[opinfo.op]); + opinfo.op_errstr = op_errstr; + goto out; + } + + cds_list_for_each_entry(pending_node, &opinfo.pending_bricks, list) + { + dummy_frame = create_frame(this, this->ctx->pool); + if (!dummy_frame) + continue; + + if ((pending_node->type == GD_NODE_NFS) || + (pending_node->type == GD_NODE_QUOTAD) || + (pending_node->type == GD_NODE_SNAPD) || + (pending_node->type == GD_NODE_SCRUB) || + ((pending_node->type == GD_NODE_SHD) && + (req_ctx->op == GD_OP_STATUS_VOLUME))) { + ret = glusterd_node_op_build_payload( + req_ctx->op, (gd1_mgmt_brick_op_req **)&req, req_ctx->dict); + } else { + ret = glusterd_brick_op_build_payload( + req_ctx->op, pending_node->node, (gd1_mgmt_brick_op_req **)&req, + req_ctx->dict); + } + if (ret || !req) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_BRICK_OP_PAYLOAD_BUILD_FAIL, + "Failed to " + "build op payload during " + "'Volume %s'", + gd_op_list[req_ctx->op]); + goto out; + } + + dummy_frame->local = data; + dummy_frame->cookie = pending_node; + + rpc = glusterd_pending_node_get_rpc(pending_node); + if (!rpc) { + if (pending_node->type == GD_NODE_REBALANCE) { + opinfo.brick_pending_count = 0; + ret = 0; + GF_FREE(req->input.input_val); + GF_FREE(req); + req = NULL; + GLUSTERD_STACK_DESTROY(dummy_frame); + + op_ctx = glusterd_op_get_ctx(); + if (!op_ctx) + goto out; + glusterd_defrag_volume_node_rsp(req_ctx->dict, NULL, op_ctx); - ret = gd_xdr_to_mgmt_brick_op_rsp (*iov, &rsp); - if (ret < 0) { - gf_log ("", GF_LOG_ERROR, "error"); - rsp.op_ret = -1; - rsp.op_errno = EINVAL; - rsp.op_errstr = "error"; - event_type = GD_OP_EVENT_RCVD_RJT; goto out; - } + } - if (rsp.output.output_len) { - /* Unserialize the dictionary */ - dict = dict_new (); - - ret = dict_unserialize (rsp.output.output_val, - rsp.output.output_len, - &dict); - if (ret < 0) { - gf_log ("glusterd", GF_LOG_ERROR, - "failed to " - "unserialize rsp-buffer to dictionary"); - event_type = GD_OP_EVENT_RCVD_RJT; - goto out; - } else { - dict->extra_stdfree = rsp.output.output_val; - } + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RPC_FAILURE, + "Brick Op failed " + "due to rpc failure."); + goto out; } - op_ret = rsp.op_ret; + ret = glusterd_submit_request( + rpc, req, dummy_frame, priv->gfs_mgmt, req->op, NULL, this, + glusterd_brick_op_cbk, (xdrproc_t)xdr_gd1_mgmt_brick_op_req); + GF_FREE(req->input.input_val); + GF_FREE(req); + req = NULL; + + if (!ret) + pending_bricks++; + + glusterd_pending_node_put_rpc(pending_node); + } + + gf_msg_trace(this->name, 0, + "Sent brick op req for operation " + "'Volume %s' to %d bricks", + gd_op_list[req_ctx->op], pending_bricks); + opinfo.brick_pending_count = pending_bricks; out: - ev_ctx = GF_CALLOC (1, sizeof (*ev_ctx), gf_gld_mt_brick_rsp_ctx_t); - GF_ASSERT (ev_ctx); - if (op_ret) { - event_type = GD_OP_EVENT_RCVD_RJT; - ev_ctx->op_ret = op_ret; - ev_ctx->op_errstr = gf_strdup(rsp.op_errstr); - } else { - event_type = GD_OP_EVENT_RCVD_ACC; - } - ev_ctx->brickinfo = frame->cookie; - ev_ctx->rsp_dict = dict; - ev_ctx->commit_ctx = frame->local; - op = glusterd_op_get_op (); - if ((op == GD_OP_STOP_VOLUME) || - (op == GD_OP_REMOVE_BRICK)) { - ret = glusterd_start_brick_disconnect_timer (ev_ctx); - } else { - ret = glusterd_op_sm_inject_event (event_type, ev_ctx); - if (!ret) { - glusterd_friend_sm (); - glusterd_op_sm (); - } - } - if (ret && dict) - dict_unref (dict); - if (rsp.op_errstr && strcmp (rsp.op_errstr, "error")) - free (rsp.op_errstr); //malloced by xdr - GLUSTERD_STACK_DESTROY (frame); - return ret; -} + if (ret) + opinfo.op_ret = ret; + ret1 = glusterd_set_txn_opinfo(txn_id, &opinfo); + if (ret1) + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL, + "Unable to set " + "transaction's opinfo"); -struct rpc_clnt_procedure glusterd3_1_clnt_mgmt_actors[GD_MGMT_MAXVALUE] = { - [GD_MGMT_NULL] = {"NULL", NULL }, - [GD_MGMT_PROBE_QUERY] = { "PROBE_QUERY", glusterd3_1_probe}, - [GD_MGMT_FRIEND_ADD] = { "FRIEND_ADD", glusterd3_1_friend_add }, - [GD_MGMT_CLUSTER_LOCK] = {"CLUSTER_LOCK", glusterd3_1_cluster_lock}, - [GD_MGMT_CLUSTER_UNLOCK] = {"CLUSTER_UNLOCK", glusterd3_1_cluster_unlock}, - [GD_MGMT_STAGE_OP] = {"STAGE_OP", glusterd3_1_stage_op}, - [GD_MGMT_COMMIT_OP] = {"COMMIT_OP", glusterd3_1_commit_op}, - [GD_MGMT_FRIEND_REMOVE] = { "FRIEND_REMOVE", glusterd3_1_friend_remove}, - [GD_MGMT_FRIEND_UPDATE] = { "FRIEND_UPDATE", glusterd3_1_friend_update}, -}; + if (ret) { + glusterd_op_sm_inject_event(GD_OP_EVENT_RCVD_RJT, txn_id, data); + opinfo.op_ret = ret; + } -struct rpc_clnt_procedure glusterd3_1_fs_mgmt_actors[GD_MGMT_MAXVALUE] = { - [GD_MGMT_NULL] = {"NULL", NULL }, - [GD_MGMT_BRICK_OP] = {"BRICK_OP", glusterd3_1_brick_op}, -}; + gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret); + return ret; +} -struct rpc_clnt_program glusterd3_1_mgmt_prog = { - .progname = "Mgmt 3.1", - .prognum = GLUSTERD1_MGMT_PROGRAM, - .progver = GLUSTERD1_MGMT_VERSION, - .proctable = glusterd3_1_clnt_mgmt_actors, - .numproc = GLUSTERD1_MGMT_PROCCNT, +struct rpc_clnt_procedure gd_brick_actors[GLUSTERD_BRICK_MAXVALUE] = { + [GLUSTERD_BRICK_NULL] = {"NULL", NULL}, + [GLUSTERD_BRICK_OP] = {"BRICK_OP", glusterd_brick_op}, }; -struct rpc_clnt_procedure gd_clnt_mgmt_actors[GLUSTERD_MGMT_MAXVALUE] = { - [GLUSTERD_MGMT_NULL] = {"NULL", NULL }, - [GLUSTERD_MGMT_PROBE_QUERY] = {"PROBE_QUERY", glusterd3_1_probe}, - [GLUSTERD_MGMT_FRIEND_ADD] = {"FRIEND_ADD", glusterd3_1_friend_add}, - [GLUSTERD_MGMT_CLUSTER_LOCK] = {"CLUSTER_LOCK", glusterd3_1_cluster_lock}, - [GLUSTERD_MGMT_CLUSTER_UNLOCK] = {"CLUSTER_UNLOCK", glusterd3_1_cluster_unlock}, - [GLUSTERD_MGMT_STAGE_OP] = {"STAGE_OP", glusterd3_1_stage_op}, - [GLUSTERD_MGMT_COMMIT_OP] = {"COMMIT_OP", glusterd3_1_commit_op}, - [GLUSTERD_MGMT_FRIEND_REMOVE] = {"FRIEND_REMOVE", glusterd3_1_friend_remove}, - [GLUSTERD_MGMT_FRIEND_UPDATE] = {"FRIEND_UPDATE", glusterd3_1_friend_update}, +struct rpc_clnt_procedure gd_peer_actors[GLUSTERD_FRIEND_MAXVALUE] = { + [GLUSTERD_FRIEND_NULL] = {"NULL", NULL}, + [GLUSTERD_PROBE_QUERY] = {"PROBE_QUERY", glusterd_rpc_probe}, + [GLUSTERD_FRIEND_ADD] = {"FRIEND_ADD", glusterd_rpc_friend_add}, + [GLUSTERD_FRIEND_REMOVE] = {"FRIEND_REMOVE", glusterd_rpc_friend_remove}, + [GLUSTERD_FRIEND_UPDATE] = {"FRIEND_UPDATE", glusterd_rpc_friend_update}, }; -struct rpc_clnt_program gd_clnt_mgmt_prog = { - .progname = "glusterd clnt mgmt", - .prognum = GD_MGMT_PROGRAM, - .progver = GD_MGMT_VERSION, - .numproc = GD_MGMT_PROCCNT, - .proctable = gd_clnt_mgmt_actors, +struct rpc_clnt_procedure gd_mgmt_actors[GLUSTERD_MGMT_MAXVALUE] = { + [GLUSTERD_MGMT_NULL] = {"NULL", NULL}, + [GLUSTERD_MGMT_CLUSTER_LOCK] = {"CLUSTER_LOCK", glusterd_cluster_lock}, + [GLUSTERD_MGMT_CLUSTER_UNLOCK] = {"CLUSTER_UNLOCK", + glusterd_cluster_unlock}, + [GLUSTERD_MGMT_STAGE_OP] = {"STAGE_OP", glusterd_stage_op}, + [GLUSTERD_MGMT_COMMIT_OP] = {"COMMIT_OP", glusterd_commit_op}, }; -struct rpc_clnt_program glusterd_glusterfs_3_1_mgmt_prog = { - .progname = "GlusterFS Mops", - .prognum = GLUSTERFS_PROGRAM, - .progver = GLUSTERFS_VERSION, - .proctable = glusterd3_1_fs_mgmt_actors, - .numproc = GLUSTERFS_PROCCNT, +struct rpc_clnt_procedure gd_mgmt_v3_actors[GLUSTERD_MGMT_V3_MAXVALUE] = { + [GLUSTERD_MGMT_V3_NULL] = {"NULL", NULL}, + [GLUSTERD_MGMT_V3_LOCK] = {"MGMT_V3_LOCK", glusterd_mgmt_v3_lock_peers}, + [GLUSTERD_MGMT_V3_UNLOCK] = {"MGMT_V3_UNLOCK", + glusterd_mgmt_v3_unlock_peers}, }; -int32_t -glusterd3_1_brick_op (call_frame_t *frame, xlator_t *this, - void *data) -{ - gd1_mgmt_brick_op_req *req = NULL; - int ret = 0; - glusterd_conf_t *priv = NULL; - call_frame_t *dummy_frame = NULL; - char *op_errstr = NULL; - int pending_bricks = 0; - glusterd_pending_node_t *pending_brick; - glusterd_brickinfo_t *brickinfo = NULL; - glusterd_req_ctx_t *req_ctx = NULL; - - if (!this) { - ret = -1; - goto out; - } - priv = this->private; - GF_ASSERT (priv); - - req_ctx = data; - GF_ASSERT (req_ctx); - INIT_LIST_HEAD (&opinfo.pending_bricks); - ret = glusterd_op_bricks_select (req_ctx->op, req_ctx->dict, &op_errstr); - - if (ret) { - gf_log ("", GF_LOG_ERROR, "Brick Op failed"); - opinfo.op_errstr = op_errstr; - goto out; - } - - list_for_each_entry (pending_brick, &opinfo.pending_bricks, list) { - dummy_frame = create_frame (this, this->ctx->pool); - brickinfo = pending_brick->node; - - if (!dummy_frame) - continue; - if (_gf_false == glusterd_is_brick_started (brickinfo)) - continue; - - ret = glusterd_brick_op_build_payload (req_ctx->op, brickinfo, - (gd1_mgmt_brick_op_req **)&req, - req_ctx->dict); +struct rpc_clnt_program gd_mgmt_prog = { + .progname = "glusterd mgmt", + .prognum = GD_MGMT_PROGRAM, + .progver = GD_MGMT_VERSION, + .proctable = gd_mgmt_actors, + .numproc = GLUSTERD_MGMT_MAXVALUE, +}; - if (ret) - goto out; - - dummy_frame->local = data; - dummy_frame->cookie = brickinfo; - ret = glusterd_submit_request (brickinfo->rpc, req, dummy_frame, - &glusterd_glusterfs_3_1_mgmt_prog, - req->op, NULL, - gd_xdr_from_mgmt_brick_op_req, - this, glusterd3_1_brick_op_cbk); - if (req) { - if (req->input.input_val) - GF_FREE (req->input.input_val); - GF_FREE (req); - req = NULL; - } - if (!ret) - pending_bricks++; - } +struct rpc_clnt_program gd_brick_prog = { + .progname = "brick operations", + .prognum = GD_BRICK_PROGRAM, + .progver = GD_BRICK_VERSION, + .proctable = gd_brick_actors, + .numproc = GLUSTERD_BRICK_MAXVALUE, +}; - gf_log ("glusterd", GF_LOG_DEBUG, "Sent op req to %d bricks", - pending_bricks); - opinfo.brick_pending_count = pending_bricks; +struct rpc_clnt_program gd_peer_prog = { + .progname = "Peer mgmt", + .prognum = GD_FRIEND_PROGRAM, + .progver = GD_FRIEND_VERSION, + .proctable = gd_peer_actors, + .numproc = GLUSTERD_FRIEND_MAXVALUE, +}; -out: - if (ret) { - glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, data); - opinfo.op_ret = ret; - } - gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); - return ret; -} +struct rpc_clnt_program gd_mgmt_v3_prog = { + .progname = "glusterd mgmt v3", + .prognum = GD_MGMT_PROGRAM, + .progver = GD_MGMT_V3_VERSION, + .proctable = gd_mgmt_v3_actors, + .numproc = GLUSTERD_MGMT_V3_MAXVALUE, +}; diff --git a/xlators/mgmt/glusterd/src/glusterd-scrub-svc.c b/xlators/mgmt/glusterd/src/glusterd-scrub-svc.c new file mode 100644 index 00000000000..c49a0eefba5 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-scrub-svc.c @@ -0,0 +1,207 @@ +/* + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include <glusterfs/globals.h> +#include <glusterfs/run.h> +#include "glusterd.h" +#include "glusterd-utils.h" +#include "glusterd-volgen.h" +#include "glusterd-scrub-svc.h" +#include "glusterd-svc-helper.h" + +char *scrub_svc_name = "scrub"; + +void +glusterd_scrubsvc_build(glusterd_svc_t *svc) +{ + svc->manager = glusterd_scrubsvc_manager; + svc->start = glusterd_scrubsvc_start; + svc->stop = glusterd_scrubsvc_stop; +} + +int +glusterd_scrubsvc_init(glusterd_svc_t *svc) +{ + return glusterd_svc_init(svc, scrub_svc_name); +} + +static int +glusterd_scrubsvc_create_volfile() +{ + char filepath[PATH_MAX] = { + 0, + }; + int ret = -1; + glusterd_conf_t *conf = NULL; + xlator_t *this = NULL; + + this = THIS; + conf = this->private; + GF_ASSERT(conf); + + glusterd_svc_build_volfile_path(scrub_svc_name, conf->workdir, filepath, + sizeof(filepath)); + + ret = glusterd_create_global_volfile(build_scrub_graph, filepath, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL, + "Failed to create volfile"); + goto out; + } + +out: + gf_msg_debug(this->name, 0, "Returning %d", ret); + + return ret; +} + +int +glusterd_scrubsvc_manager(glusterd_svc_t *svc, void *data, int flags) +{ + int ret = -EINVAL; + + if (!svc->inited) { + ret = glusterd_scrubsvc_init(svc); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SCRUB_INIT_FAIL, + "Failed to init " + "scrub service"); + goto out; + } else { + svc->inited = _gf_true; + gf_msg_debug(THIS->name, 0, + "scrub service " + "initialized"); + } + } + + if (glusterd_should_i_stop_bitd()) { + ret = svc->stop(svc, SIGTERM); + } else { + ret = glusterd_scrubsvc_create_volfile(); + if (ret) + goto out; + + ret = svc->stop(svc, SIGKILL); + if (ret) + goto out; + + ret = svc->start(svc, flags); + if (ret) + goto out; + + ret = glusterd_conn_connect(&(svc->conn)); + if (ret) + goto out; + } + +out: + if (ret) + gf_event(EVENT_SVC_MANAGER_FAILED, "svc_name=%s", svc->name); + gf_msg_debug(THIS->name, 0, "Returning %d", ret); + + return ret; +} + +int +glusterd_scrubsvc_start(glusterd_svc_t *svc, int flags) +{ + int ret = -1; + dict_t *cmdict = NULL; + + cmdict = dict_new(); + if (!cmdict) { + gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); + goto error_return; + } + + ret = dict_set_str(cmdict, "cmdarg0", "--global-timer-wheel"); + if (ret) + goto dealloc_dict; + + ret = glusterd_svc_start(svc, flags, cmdict); + +dealloc_dict: + dict_unref(cmdict); +error_return: + return ret; +} + +int +glusterd_scrubsvc_stop(glusterd_svc_t *svc, int sig) +{ + return glusterd_svc_stop(svc, sig); +} + +int +glusterd_scrubsvc_reconfigure() +{ + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + gf_boolean_t identical = _gf_false; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, out); + + if (glusterd_should_i_stop_bitd()) + goto manager; + + /* + * Check both OLD and NEW volfiles, if they are SAME by size + * and cksum i.e. "character-by-character". If YES, then + * NOTHING has been changed, just return. + */ + ret = glusterd_svc_check_volfile_identical(priv->scrub_svc.name, + build_scrub_graph, &identical); + if (ret) + goto out; + + if (identical) { + ret = 0; + goto out; + } + + /* + * They are not identical. Find out if the topology is changed + * OR just the volume options. If just the options which got + * changed, then inform the xlator to reconfigure the options. + */ + identical = _gf_false; /* RESET the FLAG */ + ret = glusterd_svc_check_topology_identical(priv->scrub_svc.name, + build_scrub_graph, &identical); + if (ret) + goto out; + + /* Topology is not changed, but just the options. But write the + * options to scrub volfile, so that scrub will be reconfigured. + */ + if (identical) { + ret = glusterd_scrubsvc_create_volfile(); + if (ret == 0) { /* Only if above PASSES */ + ret = glusterd_fetchspec_notify(THIS); + } + goto out; + } + +manager: + /* + * scrub volfile's topology has been changed. scrub server needs + * to be RESTARTED to ACT on the changed volfile. + */ + ret = priv->scrub_svc.manager(&(priv->scrub_svc), NULL, PROC_START_NO_WAIT); + +out: + gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret); + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-scrub-svc.h b/xlators/mgmt/glusterd/src/glusterd-scrub-svc.h new file mode 100644 index 00000000000..514b1de96a0 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-scrub-svc.h @@ -0,0 +1,45 @@ +/* + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _GLUSTERD_SCRUB_SVC_H_ +#define _GLUSTERD_SCRUB_SVC_H_ + +#include "glusterd-svc-mgmt.h" + +typedef struct glusterd_scrubsvc_ glusterd_scrubsvc_t; + +struct glusterd_scrubsvc_ { + glusterd_svc_t svc; + gf_store_handle_t *handle; +}; + +void +glusterd_scrubsvc_build(glusterd_svc_t *svc); + +int +glusterd_scrubsvc_init(glusterd_svc_t *svc); + +int +glusterd_scrubsvc_manager(glusterd_svc_t *svc, void *data, int flags); + +int +glusterd_scrubsvc_start(glusterd_svc_t *svc, int flags); + +int +glusterd_scrubsvc_stop(glusterd_svc_t *svc, int sig); + +int +glusterd_scrubsvc_reconfigure(); + +void +glusterd_scrubsvc_build_volfile_path(char *server, char *workdir, char *volfile, + size_t len); + +#endif diff --git a/xlators/mgmt/glusterd/src/glusterd-server-quorum.c b/xlators/mgmt/glusterd/src/glusterd-server-quorum.c new file mode 100644 index 00000000000..b0b8a2e4018 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-server-quorum.c @@ -0,0 +1,486 @@ +/* + Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#include <glusterfs/common-utils.h> +#include "glusterd.h" +#include "glusterd-utils.h" +#include "glusterd-messages.h" +#include "glusterd-server-quorum.h" +#include "glusterd-store.h" +#include "glusterd-syncop.h" +#include "glusterd-op-sm.h" + +#define CEILING_POS(X) (((X) - (int)(X)) > 0 ? (int)((X) + 1) : (int)(X)) + +static gf_boolean_t +glusterd_is_get_op(xlator_t *this, glusterd_op_t op, dict_t *dict) +{ + char *key = NULL; + char *volname = NULL; + int ret = 0; + + if (op == GD_OP_STATUS_VOLUME) + return _gf_true; + + if (op == GD_OP_SET_VOLUME) { + /*check for set volume help*/ + ret = dict_get_str(dict, "volname", &volname); + if (volname && ((strcmp(volname, "help") == 0) || + (strcmp(volname, "help-xml") == 0))) { + ret = dict_get_str(dict, "key1", &key); + if (ret < 0) + return _gf_true; + } + } + return _gf_false; +} + +gf_boolean_t +glusterd_is_quorum_validation_required(xlator_t *this, glusterd_op_t op, + dict_t *dict) +{ + gf_boolean_t required = _gf_true; + char *key = NULL; + char *key_fixed = NULL; + int ret = -1; + + if (glusterd_is_get_op(this, op, dict)) { + required = _gf_false; + goto out; + } + if ((op != GD_OP_SET_VOLUME) && (op != GD_OP_RESET_VOLUME)) + goto out; + if (op == GD_OP_SET_VOLUME) + ret = dict_get_str(dict, "key1", &key); + else if (op == GD_OP_RESET_VOLUME) + ret = dict_get_str(dict, "key", &key); + if (ret) + goto out; + ret = glusterd_check_option_exists(key, &key_fixed); + if (ret <= 0) + goto out; + if (key_fixed) + key = key_fixed; + if (glusterd_is_quorum_option(key)) + required = _gf_false; +out: + GF_FREE(key_fixed); + return required; +} + +int +glusterd_validate_quorum(xlator_t *this, glusterd_op_t op, dict_t *dict, + char **op_errstr) +{ + int ret = 0; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + char *errstr = NULL; + + errstr = "Quorum not met. Volume operation not allowed."; + if (!glusterd_is_quorum_validation_required(this, op, dict)) + goto out; + + ret = dict_get_str(dict, "volname", &volname); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=volname", NULL); + ret = 0; + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_VOLINFO_GET_FAIL, NULL); + ret = 0; + goto out; + } + + if (!glusterd_is_volume_in_server_quorum(volinfo)) { + ret = 0; + goto out; + } + + if (does_gd_meet_server_quorum(this)) { + ret = 0; + goto out; + } + + ret = -1; + *op_errstr = gf_strdup(errstr); + +out: + return ret; +} + +gf_boolean_t +glusterd_is_quorum_option(char *option) +{ + gf_boolean_t res = _gf_false; + int i = 0; + static const char *const keys[] = {GLUSTERD_QUORUM_TYPE_KEY, + GLUSTERD_QUORUM_RATIO_KEY, NULL}; + + for (i = 0; keys[i]; i++) { + if (strcmp(option, keys[i]) == 0) { + res = _gf_true; + break; + } + } + return res; +} + +gf_boolean_t +glusterd_is_quorum_changed(dict_t *options, char *option, char *value) +{ + int ret = 0; + gf_boolean_t reconfigured = _gf_false; + gf_boolean_t all = _gf_false; + char *oldquorum = NULL; + char *newquorum = NULL; + char *oldratio = NULL; + char *newratio = NULL; + xlator_t *this = NULL; + + this = THIS; + + if ((strcmp("all", option) != 0) && !glusterd_is_quorum_option(option)) + goto out; + + if (strcmp("all", option) == 0) + all = _gf_true; + + if (all || (strcmp(GLUSTERD_QUORUM_TYPE_KEY, option) == 0)) { + newquorum = value; + ret = dict_get_str(options, GLUSTERD_QUORUM_TYPE_KEY, &oldquorum); + if (ret) + gf_msg(this->name, GF_LOG_DEBUG, 0, GD_MSG_DICT_GET_FAILED, + "dict_get_str failed on %s", GLUSTERD_QUORUM_TYPE_KEY); + } + + if (all || (strcmp(GLUSTERD_QUORUM_RATIO_KEY, option) == 0)) { + newratio = value; + ret = dict_get_str(options, GLUSTERD_QUORUM_RATIO_KEY, &oldratio); + if (ret) + gf_msg(this->name, GF_LOG_DEBUG, 0, GD_MSG_DICT_GET_FAILED, + "dict_get_str failed on %s", GLUSTERD_QUORUM_RATIO_KEY); + } + + reconfigured = _gf_true; + + if (oldquorum && newquorum && (strcmp(oldquorum, newquorum) == 0)) + reconfigured = _gf_false; + if (oldratio && newratio && (strcmp(oldratio, newratio) == 0)) + reconfigured = _gf_false; + + if ((oldratio == NULL) && (newratio == NULL) && (oldquorum == NULL) && + (newquorum == NULL)) + reconfigured = _gf_false; +out: + return reconfigured; +} + +static gf_boolean_t +_is_contributing_to_quorum(gd_quorum_contrib_t contrib) +{ + if ((contrib == QUORUM_UP) || (contrib == QUORUM_DOWN)) + return _gf_true; + return _gf_false; +} + +gf_boolean_t +does_quorum_meet(int active_count, int quorum_count) +{ + return (active_count >= quorum_count); +} + +int +glusterd_get_quorum_cluster_counts(xlator_t *this, int *active_count, + int *quorum_count) +{ + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_conf_t *conf = NULL; + int ret = -1; + int inquorum_count = 0; + char *val = NULL; + double quorum_percentage = 0.0; + gf_boolean_t ratio = _gf_false; + int count = 0; + + conf = this->private; + + /* Start with counting self */ + inquorum_count = 1; + if (active_count) + *active_count = 1; + + RCU_READ_LOCK; + cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list) + { + if (_is_contributing_to_quorum(peerinfo->quorum_contrib)) + inquorum_count = inquorum_count + 1; + if (active_count && (peerinfo->quorum_contrib == QUORUM_UP)) + *active_count = *active_count + 1; + } + RCU_READ_UNLOCK; + + ret = dict_get_str(conf->opts, GLUSTERD_QUORUM_RATIO_KEY, &val); + if (ret == 0) { + ret = gf_string2percent(val, &quorum_percentage); + if (ret == 0) + ratio = _gf_true; + } + if (ratio) + count = CEILING_POS(inquorum_count * quorum_percentage / 100.0); + else + count = (inquorum_count * 50 / 100) + 1; + + *quorum_count = count; + ret = 0; + + return ret; +} + +gf_boolean_t +glusterd_is_volume_in_server_quorum(glusterd_volinfo_t *volinfo) +{ + gf_boolean_t res = _gf_false; + char *quorum_type = NULL; + int ret = 0; + + ret = dict_get_str(volinfo->dict, GLUSTERD_QUORUM_TYPE_KEY, &quorum_type); + if (ret) { + gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=%s", GLUSTERD_QUORUM_TYPE_KEY, NULL); + goto out; + } + + if (strcmp(quorum_type, GLUSTERD_SERVER_QUORUM) == 0) + res = _gf_true; +out: + return res; +} + +gf_boolean_t +glusterd_is_any_volume_in_server_quorum(xlator_t *this) +{ + glusterd_conf_t *conf = NULL; + glusterd_volinfo_t *volinfo = NULL; + + conf = this->private; + list_for_each_entry(volinfo, &conf->volumes, vol_list) + { + if (glusterd_is_volume_in_server_quorum(volinfo)) { + return _gf_true; + } + } + return _gf_false; +} + +gf_boolean_t +does_gd_meet_server_quorum(xlator_t *this) +{ + int quorum_count = 0; + int active_count = 0; + gf_boolean_t in = _gf_false; + int ret = -1; + + ret = glusterd_get_quorum_cluster_counts(this, &active_count, + &quorum_count); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_QUORUM_CLUSTER_COUNT_GET_FAIL, NULL); + goto out; + } + + if (!does_quorum_meet(active_count, quorum_count)) { + goto out; + } + + in = _gf_true; +out: + return in; +} + +void +glusterd_do_volume_quorum_action(xlator_t *this, glusterd_volinfo_t *volinfo, + gf_boolean_t meets_quorum) +{ + int ret = -1; + glusterd_brickinfo_t *brickinfo = NULL; + gd_quorum_status_t quorum_status = NOT_APPLICABLE_QUORUM; + gf_boolean_t follows_quorum = _gf_false; + gf_boolean_t quorum_status_unchanged = _gf_false; + + if (volinfo->status != GLUSTERD_STATUS_STARTED) { + volinfo->quorum_status = NOT_APPLICABLE_QUORUM; + goto out; + } + + follows_quorum = glusterd_is_volume_in_server_quorum(volinfo); + if (follows_quorum) { + if (meets_quorum) + quorum_status = MEETS_QUORUM; + else + quorum_status = DOESNT_MEET_QUORUM; + } else { + quorum_status = NOT_APPLICABLE_QUORUM; + } + + /* + * The following check is added to prevent spurious brick starts when + * events occur that affect quorum. + * Example: + * There is a cluster of 10 peers. Volume is in quorum. User + * takes down one brick from the volume to perform maintenance. + * Suddenly one of the peers go down. Cluster is still in quorum. But + * because of this 'peer going down' event, quorum is calculated and + * the bricks that are down are brought up again. In this process it + * also brings up the brick that is purposefully taken down. + */ + if (volinfo->quorum_status == quorum_status) { + quorum_status_unchanged = _gf_true; + goto out; + } + + if (quorum_status == MEETS_QUORUM) { + gf_msg(this->name, GF_LOG_CRITICAL, 0, + GD_MSG_SERVER_QUORUM_MET_STARTING_BRICKS, + "Server quorum regained for volume %s. Starting local " + "bricks.", + volinfo->volname); + gf_event(EVENT_QUORUM_REGAINED, "volume=%s", volinfo->volname); + } else if (quorum_status == DOESNT_MEET_QUORUM) { + gf_msg(this->name, GF_LOG_CRITICAL, 0, + GD_MSG_SERVER_QUORUM_LOST_STOPPING_BRICKS, + "Server quorum lost for volume %s. Stopping local " + "bricks.", + volinfo->volname); + gf_event(EVENT_QUORUM_LOST, "volume=%s", volinfo->volname); + } + + list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + if (!glusterd_is_local_brick(this, volinfo, brickinfo)) + continue; + if (quorum_status == DOESNT_MEET_QUORUM) { + ret = glusterd_brick_stop(volinfo, brickinfo, _gf_false); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_STOP_FAIL, + "Failed to " + "stop brick %s:%s", + brickinfo->hostname, brickinfo->path); + } + } else { + if (!brickinfo->start_triggered) { + pthread_mutex_lock(&brickinfo->restart_mutex); + { + /* coverity[SLEEP] */ + ret = glusterd_brick_start(volinfo, brickinfo, _gf_false, + _gf_false); + } + pthread_mutex_unlock(&brickinfo->restart_mutex); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_BRICK_DISCONNECTED, "Failed to start %s:%s", + brickinfo->hostname, brickinfo->path); + } + } + } + } + volinfo->quorum_status = quorum_status; + if (quorum_status == MEETS_QUORUM) { + /* bricks might have been restarted and so as the port change + * might have happened + */ + ret = glusterd_store_volinfo(volinfo, GLUSTERD_VOLINFO_VER_AC_NONE); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_STORE_FAIL, + "Failed to write volinfo for volume %s", volinfo->volname); + goto out; + } + } +out: + if (quorum_status_unchanged) { + list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + if (!glusterd_is_local_brick(this, volinfo, brickinfo)) + continue; + ret = glusterd_brick_start(volinfo, brickinfo, _gf_false, _gf_true); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_DISCONNECTED, + "Failed to " + "connect to %s:%s", + brickinfo->hostname, brickinfo->path); + } + } + } + return; +} + +int +glusterd_do_quorum_action() +{ + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + glusterd_volinfo_t *volinfo = NULL; + int ret = 0; + int active_count = 0; + int quorum_count = 0; + gf_boolean_t meets = _gf_false; + + this = THIS; + conf = this->private; + + conf->pending_quorum_action = _gf_true; + ret = glusterd_lock(conf->uuid); + if (ret) + goto out; + + { + ret = glusterd_get_quorum_cluster_counts(this, &active_count, + &quorum_count); + if (ret) + goto unlock; + + if (does_quorum_meet(active_count, quorum_count)) + meets = _gf_true; + list_for_each_entry(volinfo, &conf->volumes, vol_list) + { + glusterd_do_volume_quorum_action(this, volinfo, meets); + } + } +unlock: + (void)glusterd_unlock(conf->uuid); + conf->pending_quorum_action = _gf_false; +out: + return ret; +} + +/* ret = 0 represents quorum is not met + * ret = 1 represents quorum is met + * ret = 2 represents quorum not applicable + */ + +int +check_quorum_for_brick_start(glusterd_volinfo_t *volinfo, + gf_boolean_t node_quorum) +{ + gf_boolean_t volume_quorum = _gf_false; + int ret = 0; + + volume_quorum = glusterd_is_volume_in_server_quorum(volinfo); + if (volume_quorum) { + if (node_quorum) + ret = 1; + } else { + ret = 2; + } + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-server-quorum.h b/xlators/mgmt/glusterd/src/glusterd-server-quorum.h new file mode 100644 index 00000000000..e11bf1a9206 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-server-quorum.h @@ -0,0 +1,46 @@ +/* + Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _GLUSTERD_SERVER_QUORUM_H +#define _GLUSTERD_SERVER_QUORUM_H + +int +glusterd_validate_quorum(xlator_t *this, glusterd_op_t op, dict_t *dict, + char **op_errstr); + +gf_boolean_t +glusterd_is_quorum_changed(dict_t *options, char *option, char *value); + +int +glusterd_do_quorum_action(); + +int +glusterd_get_quorum_cluster_counts(xlator_t *this, int *active_count, + int *quorum_count); + +gf_boolean_t +glusterd_is_quorum_option(char *option); + +gf_boolean_t +glusterd_is_volume_in_server_quorum(glusterd_volinfo_t *volinfo); + +gf_boolean_t +glusterd_is_any_volume_in_server_quorum(xlator_t *this); + +gf_boolean_t +does_gd_meet_server_quorum(xlator_t *this); + +int +check_quorum_for_brick_start(glusterd_volinfo_t *volinfo, + gf_boolean_t node_quorum); + +gf_boolean_t +does_quorum_meet(int active_count, int quorum_count); + +#endif /* _GLUSTERD_SERVER_QUORUM_H */ diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c new file mode 100644 index 00000000000..5661e391a9c --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c @@ -0,0 +1,153 @@ +/* + Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include "glusterd.h" +#include "glusterd-utils.h" +#include "glusterd-shd-svc-helper.h" +#include "glusterd-messages.h" +#include "glusterd-volgen.h" + +void +glusterd_svc_build_shd_socket_filepath(glusterd_volinfo_t *volinfo, char *path, + int path_len) +{ + char sockfilepath[PATH_MAX] = { + 0, + }; + char rundir[PATH_MAX] = { + 0, + }; + int32_t len = 0; + glusterd_conf_t *priv = THIS->private; + + if (!priv) + return; + + GLUSTERD_GET_SHD_RUNDIR(rundir, volinfo, priv); + len = snprintf(sockfilepath, sizeof(sockfilepath), "%s/run-%s", rundir, + uuid_utoa(MY_UUID)); + if ((len < 0) || (len >= sizeof(sockfilepath))) { + sockfilepath[0] = 0; + } + + glusterd_set_socket_filepath(sockfilepath, path, path_len); +} + +void +glusterd_svc_build_shd_pidfile(glusterd_volinfo_t *volinfo, char *path, + int path_len) +{ + char rundir[PATH_MAX] = { + 0, + }; + glusterd_conf_t *priv = THIS->private; + + if (!priv) + return; + + GLUSTERD_GET_SHD_RUNDIR(rundir, volinfo, priv); + + snprintf(path, path_len, "%s/%s-shd.pid", rundir, volinfo->volname); +} + +void +glusterd_svc_build_shd_volfile_path(glusterd_volinfo_t *volinfo, char *path, + int path_len) +{ + char workdir[PATH_MAX] = { + 0, + }; + glusterd_conf_t *priv = THIS->private; + + if (!priv) + return; + + GLUSTERD_GET_VOLUME_DIR(workdir, volinfo, priv); + + snprintf(path, path_len, "%s/%s-shd.vol", workdir, volinfo->volname); +} + +void +glusterd_shd_svcproc_cleanup(glusterd_shdsvc_t *shd) +{ + glusterd_svc_proc_t *svc_proc = NULL; + glusterd_svc_t *svc = NULL; + glusterd_conf_t *conf = NULL; + gf_boolean_t need_unref = _gf_false; + rpc_clnt_t *rpc = NULL; + + conf = THIS->private; + if (!conf) + return; + + GF_VALIDATE_OR_GOTO(THIS->name, conf, out); + GF_VALIDATE_OR_GOTO(THIS->name, shd, out); + + svc = &shd->svc; + shd->attached = _gf_false; + + if (svc->conn.rpc) { + rpc_clnt_unref(svc->conn.rpc); + svc->conn.rpc = NULL; + } + + pthread_mutex_lock(&conf->attach_lock); + { + svc_proc = svc->svc_proc; + svc->svc_proc = NULL; + svc->inited = _gf_false; + cds_list_del_init(&svc->mux_svc); + glusterd_unlink_file(svc->proc.pidfile); + + if (svc_proc && cds_list_empty(&svc_proc->svcs)) { + cds_list_del_init(&svc_proc->svc_proc_list); + /* We cannot free svc_proc list from here. Because + * if there are pending events on the rpc, it will + * try to access the corresponding svc_proc, so unrefing + * rpc request and then cleaning up the memory is carried + * from the notify function upon RPC_CLNT_DESTROY destroy. + */ + need_unref = _gf_true; + rpc = svc_proc->rpc; + svc_proc->rpc = NULL; + } + } + pthread_mutex_unlock(&conf->attach_lock); + /*rpc unref has to be performed outside the lock*/ + if (need_unref && rpc) + rpc_clnt_unref(rpc); +out: + return; +} + +int +glusterd_svc_set_shd_pidfile(glusterd_volinfo_t *volinfo, dict_t *dict) +{ + int ret = -1; + glusterd_svc_t *svc = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + GF_VALIDATE_OR_GOTO(this->name, volinfo, out); + GF_VALIDATE_OR_GOTO(this->name, dict, out); + + svc = &(volinfo->shd.svc); + + ret = dict_set_dynstr_with_alloc(dict, "pidfile", svc->proc.pidfile); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set pidfile %s in dict", svc->proc.pidfile); + goto out; + } + ret = 0; +out: + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h new file mode 100644 index 00000000000..1f0984ba857 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h @@ -0,0 +1,42 @@ +/* + Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _GLUSTERD_SHD_SVC_HELPER_H_ +#define _GLUSTERD_SHD_SVC_HELPER_H_ + +#include "glusterd.h" +#include "glusterd-svc-mgmt.h" + +void +glusterd_svc_build_shd_socket_filepath(glusterd_volinfo_t *volinfo, char *path, + int path_len); + +void +glusterd_svc_build_shd_pidfile(glusterd_volinfo_t *volinfo, char *path, + int path_len); + +void +glusterd_svc_build_shd_volfile_path(glusterd_volinfo_t *volinfo, char *path, + int path_len); + +void +glusterd_shd_svcproc_cleanup(glusterd_shdsvc_t *shd); + +int +glusterd_recover_shd_attach_failure(glusterd_volinfo_t *volinfo, + glusterd_svc_t *svc, int flags); + +int +glusterd_shdsvc_create_volfile(glusterd_volinfo_t *volinfo); + +int +glusterd_svc_set_shd_pidfile(glusterd_volinfo_t *volinfo, dict_t *dict); + +#endif diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c new file mode 100644 index 00000000000..1c56384a14b --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c @@ -0,0 +1,796 @@ +/* + Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include <glusterfs/globals.h> +#include <glusterfs/run.h> +#include "glusterd.h" +#include "glusterd-utils.h" +#include "glusterd-volgen.h" +#include "glusterd-shd-svc.h" +#include "glusterd-shd-svc-helper.h" +#include "glusterd-svc-helper.h" +#include "glusterd-store.h" + +#define GD_SHD_PROCESS_NAME "--process-name" +char *shd_svc_name = "glustershd"; + +void +glusterd_shdsvc_build(glusterd_svc_t *svc) +{ + int ret = -1; + ret = snprintf(svc->name, sizeof(svc->name), "%s", shd_svc_name); + if (ret < 0) + return; + + CDS_INIT_LIST_HEAD(&svc->mux_svc); + svc->manager = glusterd_shdsvc_manager; + svc->start = glusterd_shdsvc_start; + svc->stop = glusterd_shdsvc_stop; + svc->reconfigure = glusterd_shdsvc_reconfigure; +} + +int +glusterd_shdsvc_init(void *data, glusterd_conn_t *mux_conn, + glusterd_svc_proc_t *mux_svc) +{ + int ret = -1; + char rundir[PATH_MAX] = { + 0, + }; + char sockpath[PATH_MAX] = { + 0, + }; + char pidfile[PATH_MAX] = { + 0, + }; + char volfile[PATH_MAX] = { + 0, + }; + char logdir[PATH_MAX] = { + 0, + }; + char logfile[PATH_MAX] = { + 0, + }; + char volfileid[256] = {0}; + glusterd_svc_t *svc = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_conf_t *priv = NULL; + glusterd_muxsvc_conn_notify_t notify = NULL; + xlator_t *this = NULL; + char *volfileserver = NULL; + int32_t len = 0; + + this = THIS; + GF_VALIDATE_OR_GOTO(THIS->name, this, out); + + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, out); + + volinfo = data; + GF_VALIDATE_OR_GOTO(this->name, data, out); + GF_VALIDATE_OR_GOTO(this->name, mux_svc, out); + + svc = &(volinfo->shd.svc); + + ret = snprintf(svc->name, sizeof(svc->name), "%s", shd_svc_name); + if (ret < 0) + goto out; + + notify = glusterd_muxsvc_common_rpc_notify; + glusterd_store_perform_node_state_store(volinfo); + + GLUSTERD_GET_SHD_RUNDIR(rundir, volinfo, priv); + glusterd_svc_create_rundir(rundir); + + glusterd_svc_build_logfile_path(shd_svc_name, priv->logdir, logfile, + sizeof(logfile)); + + /* Initialize the connection mgmt */ + if (mux_conn && mux_svc->rpc) { + /* multiplexed svc */ + svc->conn.frame_timeout = mux_conn->frame_timeout; + /* This will be unrefed from glusterd_shd_svcproc_cleanup*/ + svc->conn.rpc = rpc_clnt_ref(mux_svc->rpc); + ret = snprintf(svc->conn.sockpath, sizeof(svc->conn.sockpath), "%s", + mux_conn->sockpath); + if (ret < 0) + goto out; + } else { + ret = mkdir_p(priv->logdir, 0755, _gf_true); + if ((ret == -1) && (EEXIST != errno)) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_CREATE_DIR_FAILED, + "Unable to create logdir %s", logdir); + goto out; + } + + glusterd_svc_build_shd_socket_filepath(volinfo, sockpath, + sizeof(sockpath)); + ret = glusterd_muxsvc_conn_init(&(svc->conn), mux_svc, sockpath, 600, + notify); + if (ret) + goto out; + /* This will be unrefed when the last svcs is detached from the list */ + if (!mux_svc->rpc) + mux_svc->rpc = rpc_clnt_ref(svc->conn.rpc); + } + + /* Initialize the process mgmt */ + glusterd_svc_build_shd_pidfile(volinfo, pidfile, sizeof(pidfile)); + glusterd_svc_build_shd_volfile_path(volinfo, volfile, PATH_MAX); + len = snprintf(volfileid, sizeof(volfileid), "shd/%s", volinfo->volname); + if ((len < 0) || (len >= sizeof(volfileid))) { + ret = -1; + goto out; + } + + if (dict_get_strn(this->options, "transport.socket.bind-address", + SLEN("transport.socket.bind-address"), + &volfileserver) != 0) { + volfileserver = "localhost"; + } + ret = glusterd_proc_init(&(svc->proc), shd_svc_name, pidfile, logdir, + logfile, volfile, volfileid, volfileserver); + if (ret) + goto out; + +out: + gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret); + return ret; +} + +int +glusterd_shdsvc_create_volfile(glusterd_volinfo_t *volinfo) +{ + char filepath[PATH_MAX] = { + 0, + }; + + int ret = -1; + dict_t *mod_dict = NULL; + xlator_t *this = THIS; + GF_ASSERT(this); + + glusterd_svc_build_shd_volfile_path(volinfo, filepath, PATH_MAX); + if (!glusterd_is_shd_compatible_volume(volinfo)) { + /* If volfile exist, delete it. This case happens when we + * change from replica/ec to distribute. + */ + (void)glusterd_unlink_file(filepath); + ret = 0; + goto out; + } + mod_dict = dict_new(); + if (!mod_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); + goto out; + } + + ret = dict_set_uint32(mod_dict, "cluster.background-self-heal-count", 0); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=cluster.background-self-heal-count", NULL); + goto out; + } + + ret = dict_set_str(mod_dict, "cluster.data-self-heal", "on"); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=cluster.data-self-heal", NULL); + goto out; + } + + ret = dict_set_str(mod_dict, "cluster.metadata-self-heal", "on"); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=cluster.metadata-self-heal", NULL); + goto out; + } + + ret = dict_set_str(mod_dict, "cluster.entry-self-heal", "on"); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=cluster.entry-self-heal", NULL); + goto out; + } + + ret = glusterd_shdsvc_generate_volfile(volinfo, filepath, mod_dict); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL, + "Failed to create volfile"); + goto out; + } + +out: + if (mod_dict) + dict_unref(mod_dict); + gf_msg_debug(this->name, 0, "Returning %d", ret); + + return ret; +} + +gf_boolean_t +glusterd_svcs_shd_compatible_volumes_stopped(glusterd_svc_t *svc) +{ + glusterd_svc_proc_t *svc_proc = NULL; + glusterd_shdsvc_t *shd = NULL; + glusterd_svc_t *temp_svc = NULL; + glusterd_volinfo_t *volinfo = NULL; + gf_boolean_t comp = _gf_false; + glusterd_conf_t *conf = THIS->private; + + GF_VALIDATE_OR_GOTO("glusterd", conf, out); + GF_VALIDATE_OR_GOTO("glusterd", svc, out); + pthread_mutex_lock(&conf->attach_lock); + { + svc_proc = svc->svc_proc; + if (!svc_proc) + goto unlock; + cds_list_for_each_entry(temp_svc, &svc_proc->svcs, mux_svc) + { + /* Get volinfo->shd from svc object */ + shd = cds_list_entry(svc, glusterd_shdsvc_t, svc); + if (!shd) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_SHD_OBJ_GET_FAIL, + "Failed to get shd object " + "from shd service"); + goto unlock; + } + + /* Get volinfo from shd */ + volinfo = cds_list_entry(shd, glusterd_volinfo_t, shd); + if (!volinfo) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, + "Failed to get volinfo from " + "from shd"); + goto unlock; + } + if (!glusterd_is_shd_compatible_volume(volinfo)) + continue; + if (volinfo->status == GLUSTERD_STATUS_STARTED) + goto unlock; + } + comp = _gf_true; + } +unlock: + pthread_mutex_unlock(&conf->attach_lock); +out: + return comp; +} + +int +glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags) +{ + int ret = -1; + glusterd_volinfo_t *volinfo = NULL; + glusterd_conf_t *conf = NULL; + gf_boolean_t shd_restart = _gf_false; + + conf = THIS->private; + GF_VALIDATE_OR_GOTO("glusterd", conf, out); + GF_VALIDATE_OR_GOTO("glusterd", svc, out); + volinfo = data; + GF_VALIDATE_OR_GOTO("glusterd", volinfo, out); + + if (volinfo->is_snap_volume) { + /* healing of a snap volume is not supported yet*/ + ret = 0; + goto out; + } + + while (conf->restart_shd) { + synccond_wait(&conf->cond_restart_shd, &conf->big_lock); + } + conf->restart_shd = _gf_true; + shd_restart = _gf_true; + + if (volinfo) + glusterd_volinfo_ref(volinfo); + + if (!glusterd_is_shd_compatible_volume(volinfo)) { + ret = 0; + if (svc->inited) { + /* This means glusterd was running for this volume and now + * it was converted to a non-shd volume. So just stop the shd + */ + ret = svc->stop(svc, SIGTERM); + } + goto out; + } + ret = glusterd_shdsvc_create_volfile(volinfo); + if (ret) + goto out; + + ret = glusterd_shd_svc_mux_init(volinfo, svc); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_FAILED_INIT_SHDSVC, + "Failed to init shd service"); + goto out; + } + + /* If all the volumes are stopped or all shd compatible volumes + * are stopped then stop the service if: + * - volinfo is NULL or + * - volinfo is present and volume is shd compatible + * Otherwise create volfile and restart service if: + * - volinfo is NULL or + * - volinfo is present and volume is shd compatible + */ + if (glusterd_svcs_shd_compatible_volumes_stopped(svc)) { + /* TODO + * Take a lock and detach all svc's to stop the process + * also reset the init flag + */ + ret = svc->stop(svc, SIGTERM); + } else if (volinfo) { + if (volinfo->status != GLUSTERD_STATUS_STARTED) { + ret = svc->stop(svc, SIGTERM); + if (ret) + goto out; + } + if (volinfo->status == GLUSTERD_STATUS_STARTED) { + ret = svc->start(svc, flags); + if (ret) + goto out; + } + } +out: + if (shd_restart) { + conf->restart_shd = _gf_false; + synccond_broadcast(&conf->cond_restart_shd); + } + if (volinfo) + glusterd_volinfo_unref(volinfo); + if (ret) + gf_event(EVENT_SVC_MANAGER_FAILED, "svc_name=%s", svc->name); + gf_msg_debug(THIS->name, 0, "Returning %d", ret); + + return ret; +} + +int +glusterd_new_shd_svc_start(glusterd_svc_t *svc, int flags) +{ + int ret = -1; + char glusterd_uuid_option[PATH_MAX] = {0}; + char client_pid[32] = {0}; + dict_t *cmdline = NULL; + xlator_t *this = THIS; + GF_ASSERT(this); + + cmdline = dict_new(); + if (!cmdline) + goto out; + + ret = snprintf(glusterd_uuid_option, sizeof(glusterd_uuid_option), + "*replicate*.node-uuid=%s", uuid_utoa(MY_UUID)); + if (ret < 0) + goto out; + + ret = snprintf(client_pid, sizeof(client_pid), "--client-pid=%d", + GF_CLIENT_PID_SELF_HEALD); + if (ret < 0) + goto out; + + ret = dict_set_str(cmdline, "arg", client_pid); + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=arg", NULL); + goto out; + } + + /* Pass cmdline arguments as key-value pair. The key is merely + * a carrier and is not used. Since dictionary follows LIFO the value + * should be put in reverse order*/ + ret = dict_set_str(cmdline, "arg4", svc->name); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=arg4", NULL); + goto out; + } + + ret = dict_set_str(cmdline, "arg3", GD_SHD_PROCESS_NAME); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=arg3", NULL); + goto out; + } + + ret = dict_set_str(cmdline, "arg2", glusterd_uuid_option); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=arg2", NULL); + goto out; + } + + ret = dict_set_str(cmdline, "arg1", "--xlator-option"); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=arg1", NULL); + goto out; + } + + ret = glusterd_svc_start(svc, flags, cmdline); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_GLUSTER_SERVICE_START_FAIL, NULL); + goto out; + } + + ret = glusterd_conn_connect(&(svc->conn)); +out: + if (cmdline) + dict_unref(cmdline); + return ret; +} + +int +glusterd_recover_shd_attach_failure(glusterd_volinfo_t *volinfo, + glusterd_svc_t *svc, int flags) +{ + int ret = -1; + glusterd_svc_proc_t *mux_proc = NULL; + glusterd_conf_t *conf = NULL; + + conf = THIS->private; + + if (!conf || !volinfo || !svc) + return -1; + glusterd_shd_svcproc_cleanup(&volinfo->shd); + mux_proc = glusterd_svcprocess_new(); + if (!mux_proc) { + return -1; + } + ret = glusterd_shdsvc_init(volinfo, NULL, mux_proc); + if (ret) + return -1; + pthread_mutex_lock(&conf->attach_lock); + { + cds_list_add_tail(&mux_proc->svc_proc_list, &conf->shd_procs); + svc->svc_proc = mux_proc; + cds_list_del_init(&svc->mux_svc); + cds_list_add_tail(&svc->mux_svc, &mux_proc->svcs); + } + pthread_mutex_unlock(&conf->attach_lock); + + ret = glusterd_new_shd_svc_start(svc, flags); + if (!ret) { + volinfo->shd.attached = _gf_true; + } + return ret; +} + +int +glusterd_shdsvc_start(glusterd_svc_t *svc, int flags) +{ + int ret = -1; + glusterd_shdsvc_t *shd = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_conf_t *conf = NULL; + + GF_VALIDATE_OR_GOTO("glusterd", svc, out); + conf = THIS->private; + GF_VALIDATE_OR_GOTO("glusterd", conf, out); + + /* Get volinfo->shd from svc object */ + shd = cds_list_entry(svc, glusterd_shdsvc_t, svc); + if (!shd) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_SHD_OBJ_GET_FAIL, + "Failed to get shd object " + "from shd service"); + return -1; + } + + /* Get volinfo from shd */ + volinfo = cds_list_entry(shd, glusterd_volinfo_t, shd); + if (!volinfo) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, + "Failed to get volinfo from " + "from shd"); + return -1; + } + + if (volinfo->status != GLUSTERD_STATUS_STARTED) + return -1; + + glusterd_volinfo_ref(volinfo); + + if (!svc->inited) { + ret = glusterd_shd_svc_mux_init(volinfo, svc); + if (ret) + goto out; + } + + if (shd->attached) { + glusterd_volinfo_ref(volinfo); + /* Unref will happen from glusterd_svc_attach_cbk */ + ret = glusterd_attach_svc(svc, volinfo, flags); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, + "Failed to attach shd svc(volume=%s) to pid=%d", + volinfo->volname, glusterd_proc_get_pid(&svc->proc)); + glusterd_shd_svcproc_cleanup(&volinfo->shd); + glusterd_volinfo_unref(volinfo); + goto out1; + } + goto out; + } + ret = glusterd_new_shd_svc_start(svc, flags); + if (!ret) { + shd->attached = _gf_true; + } +out: + if (ret && volinfo) + glusterd_shd_svcproc_cleanup(&volinfo->shd); + if (volinfo) + glusterd_volinfo_unref(volinfo); +out1: + gf_msg_debug(THIS->name, 0, "Returning %d", ret); + + return ret; +} + +int +glusterd_shdsvc_reconfigure(glusterd_volinfo_t *volinfo) +{ + int ret = -1; + xlator_t *this = NULL; + gf_boolean_t identical = _gf_false; + dict_t *mod_dict = NULL; + glusterd_svc_t *svc = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + + if (!volinfo) { + /* reconfigure will be called separately*/ + ret = 0; + goto out; + } + + glusterd_volinfo_ref(volinfo); + svc = &(volinfo->shd.svc); + if (glusterd_svcs_shd_compatible_volumes_stopped(svc)) + goto manager; + + /* + * Check both OLD and NEW volfiles, if they are SAME by size + * and cksum i.e. "character-by-character". If YES, then + * NOTHING has been changed, just return. + */ + + if (!glusterd_is_shd_compatible_volume(volinfo)) { + if (svc->inited) + goto manager; + + /* Nothing to do if not shd compatible */ + ret = 0; + goto out; + } + mod_dict = dict_new(); + if (!mod_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); + goto out; + } + + ret = dict_set_uint32(mod_dict, "cluster.background-self-heal-count", 0); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=cluster.background-self-heal-count", NULL); + goto out; + } + + ret = dict_set_str(mod_dict, "cluster.data-self-heal", "on"); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=cluster.data-self-heal", NULL); + goto out; + } + + ret = dict_set_str(mod_dict, "cluster.metadata-self-heal", "on"); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=cluster.metadata-self-heal", NULL); + goto out; + } + + ret = dict_set_int32(mod_dict, "graph-check", 1); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=graph-check", NULL); + goto out; + } + + ret = dict_set_str(mod_dict, "cluster.entry-self-heal", "on"); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=cluster.entry-self-heal", NULL); + goto out; + } + + ret = glusterd_volume_svc_check_volfile_identical( + "glustershd", mod_dict, volinfo, glusterd_shdsvc_generate_volfile, + &identical); + if (ret) + goto out; + + if (identical) { + ret = 0; + goto out; + } + + /* + * They are not identical. Find out if the topology is changed + * OR just the volume options. If just the options which got + * changed, then inform the xlator to reconfigure the options. + */ + identical = _gf_false; /* RESET the FLAG */ + ret = glusterd_volume_svc_check_topology_identical( + "glustershd", mod_dict, volinfo, glusterd_shdsvc_generate_volfile, + &identical); + if (ret) + goto out; + + /* Topology is not changed, but just the options. But write the + * options to shd volfile, so that shd will be reconfigured. + */ + if (identical) { + ret = glusterd_shdsvc_create_volfile(volinfo); + if (ret == 0) { /* Only if above PASSES */ + ret = glusterd_fetchspec_notify(THIS); + } + goto out; + } +manager: + /* + * shd volfile's topology has been changed. volfile needs + * to be RECONFIGURED to ACT on the changed volfile. + */ + ret = svc->manager(svc, volinfo, PROC_START_NO_WAIT); + +out: + if (volinfo) + glusterd_volinfo_unref(volinfo); + if (mod_dict) + dict_unref(mod_dict); + gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret); + return ret; +} + +int +glusterd_shdsvc_restart() +{ + glusterd_volinfo_t *volinfo = NULL; + glusterd_volinfo_t *tmp = NULL; + int ret = -1; + xlator_t *this = THIS; + glusterd_conf_t *conf = NULL; + glusterd_svc_t *svc = NULL; + + GF_VALIDATE_OR_GOTO("glusterd", this, out); + + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, out); + + pthread_mutex_lock(&conf->volume_lock); + cds_list_for_each_entry_safe(volinfo, tmp, &conf->volumes, vol_list) + { + glusterd_volinfo_ref(volinfo); + pthread_mutex_unlock(&conf->volume_lock); + /* Start per volume shd svc */ + if (volinfo->status == GLUSTERD_STATUS_STARTED) { + svc = &(volinfo->shd.svc); + ret = svc->manager(svc, volinfo, PROC_START_NO_WAIT); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SHD_START_FAIL, + "Couldn't start shd for " + "vol: %s on restart", + volinfo->volname); + gf_event(EVENT_SVC_MANAGER_FAILED, "volume=%s;svc_name=%s", + volinfo->volname, svc->name); + glusterd_volinfo_unref(volinfo); + goto out; + } + } + glusterd_volinfo_unref(volinfo); + pthread_mutex_lock(&conf->volume_lock); + } + pthread_mutex_unlock(&conf->volume_lock); +out: + return ret; +} + +int +glusterd_shdsvc_stop(glusterd_svc_t *svc, int sig) +{ + int ret = -1; + glusterd_svc_proc_t *svc_proc = NULL; + glusterd_shdsvc_t *shd = NULL; + glusterd_volinfo_t *volinfo = NULL; + gf_boolean_t empty = _gf_false; + glusterd_conf_t *conf = NULL; + int pid = -1; + + conf = THIS->private; + GF_VALIDATE_OR_GOTO("glusterd", conf, out); + GF_VALIDATE_OR_GOTO("glusterd", svc, out); + svc_proc = svc->svc_proc; + if (!svc_proc) { + /* + * This can happen when stop was called on a volume that is not shd + * compatible. + */ + gf_msg_debug("glusterd", 0, "svc_proc is null, ie shd already stopped"); + ret = 0; + goto out; + } + + /* Get volinfo->shd from svc object */ + shd = cds_list_entry(svc, glusterd_shdsvc_t, svc); + if (!shd) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_SHD_OBJ_GET_FAIL, + "Failed to get shd object " + "from shd service"); + return -1; + } + + /* Get volinfo from shd */ + volinfo = cds_list_entry(shd, glusterd_volinfo_t, shd); + if (!volinfo) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, + "Failed to get volinfo from " + "from shd"); + return -1; + } + + glusterd_volinfo_ref(volinfo); + pthread_mutex_lock(&conf->attach_lock); + { + if (!gf_is_service_running(svc->proc.pidfile, &pid)) { + gf_msg_debug(THIS->name, 0, "shd isn't running"); + } + cds_list_del_init(&svc->mux_svc); + empty = cds_list_empty(&svc_proc->svcs); + if (empty) { + svc_proc->status = GF_SVC_STOPPING; + cds_list_del_init(&svc_proc->svc_proc_list); + } + } + pthread_mutex_unlock(&conf->attach_lock); + if (empty) { + /* Unref will happen when destroying the connection */ + glusterd_volinfo_ref(volinfo); + svc_proc->data = volinfo; + ret = glusterd_svc_stop(svc, sig); + if (ret) { + glusterd_volinfo_unref(volinfo); + goto out; + } + } + if (!empty && pid != -1) { + ret = glusterd_detach_svc(svc, volinfo, sig); + if (ret) + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SVC_STOP_FAIL, + "shd service is failed to detach volume %s from pid %d", + volinfo->volname, glusterd_proc_get_pid(&svc->proc)); + else + gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_SVC_STOP_SUCCESS, + "Shd service is detached for volume %s from pid %d", + volinfo->volname, glusterd_proc_get_pid(&svc->proc)); + } + svc->online = _gf_false; + (void)glusterd_unlink_file((char *)svc->proc.pidfile); + glusterd_shd_svcproc_cleanup(shd); + ret = 0; + glusterd_volinfo_unref(volinfo); +out: + gf_msg_debug(THIS->name, 0, "Returning %d", ret); + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.h b/xlators/mgmt/glusterd/src/glusterd-shd-svc.h new file mode 100644 index 00000000000..55b409f4b69 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.h @@ -0,0 +1,45 @@ +/* + Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _GLUSTERD_SHD_SVC_H_ +#define _GLUSTERD_SHD_SVC_H_ + +#include "glusterd-svc-mgmt.h" +#include "glusterd.h" + +typedef struct glusterd_shdsvc_ glusterd_shdsvc_t; +struct glusterd_shdsvc_ { + glusterd_svc_t svc; + gf_boolean_t attached; +}; + +void +glusterd_shdsvc_build(glusterd_svc_t *svc); + +int +glusterd_shdsvc_init(void *data, glusterd_conn_t *mux_conn, + glusterd_svc_proc_t *svc_proc); + +int +glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags); + +int +glusterd_shdsvc_start(glusterd_svc_t *svc, int flags); + +int +glusterd_shdsvc_reconfigure(); + +int +glusterd_shdsvc_restart(); + +int +glusterd_shdsvc_stop(glusterd_svc_t *svc, int sig); + +#endif diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.c b/xlators/mgmt/glusterd/src/glusterd-sm.c index 94e7ca08a09..bf2d81b644a 100644 --- a/xlators/mgmt/glusterd/src/glusterd-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-sm.c @@ -1,580 +1,933 @@ /* - Copyright (c) 2006-2010 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is GF_FREE software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif #include <time.h> #include <sys/uio.h> #include <sys/resource.h> #include <libgen.h> -#include "uuid.h" +#include <glusterfs/compat-uuid.h> #include "fnmatch.h" -#include "xlator.h" +#include <glusterfs/xlator.h> #include "protocol-common.h" #include "glusterd.h" -#include "call-stub.h" -#include "defaults.h" -#include "list.h" -#include "dict.h" -#include "compat.h" -#include "compat-errno.h" -#include "statedump.h" +#include <glusterfs/call-stub.h> +#include <glusterfs/defaults.h> +#include <glusterfs/list.h> +#include "glusterd-messages.h" +#include <glusterfs/dict.h> +#include <glusterfs/compat.h> +#include <glusterfs/compat-errno.h> +#include <glusterfs/statedump.h> #include "glusterd-sm.h" #include "glusterd-op-sm.h" #include "glusterd-utils.h" #include "glusterd-store.h" +#include "glusterd-svc-helper.h" +#include "glusterd-snapshot-utils.h" +#include "glusterd-server-quorum.h" +#include "glusterd-gfproxyd-svc-helper.h" -static struct list_head gd_friend_sm_queue; - -static char *glusterd_friend_sm_state_names[] = { - "Establishing Connection", - "Probe Sent to Peer", - "Probe Received from Peer", - "Peer in Cluster", - "Accepted peer request", - "Sent and Received peer request", - "Peer Rejected", - "Peer detach in progress", - "Probe Received from peer", - "Connected to Peer", - "Peer is connected and Accepted", - "Invalid State" +char local_node_hostname[PATH_MAX] = { + 0, }; -static char *glusterd_friend_sm_event_names[] = { - "GD_FRIEND_EVENT_NONE", - "GD_FRIEND_EVENT_PROBE", - "GD_FRIEND_EVENT_INIT_FRIEND_REQ", - "GD_FRIEND_EVENT_RCVD_ACC", - "GD_FRIEND_EVENT_LOCAL_ACC", - "GD_FRIEND_EVENT_RCVD_RJT", - "GD_FRIEND_EVENT_LOCAL_RJT", - "GD_FRIEND_EVENT_RCVD_FRIEND_REQ", - "GD_FRIEND_EVENT_INIT_REMOVE_FRIEND", - "GD_FRIEND_EVENT_RCVD_REMOVE_FRIEND", - "GD_FRIEND_EVENT_REMOVE_FRIEND", - "GD_FRIEND_EVENT_CONNECTED", - "GD_FRIEND_EVENT_MAX" -}; +static struct cds_list_head gd_friend_sm_queue; + +static char *glusterd_friend_sm_state_names[] = { + "Establishing Connection", + "Probe Sent to Peer", + "Probe Received from Peer", + "Peer in Cluster", + "Accepted peer request", + "Sent and Received peer request", + "Peer Rejected", + "Peer detach in progress", + "Probe Received from peer", + "Connected to Peer", + "Peer is connected and Accepted", + "Invalid State"}; -char* -glusterd_friend_sm_state_name_get (int state) +static char *glusterd_friend_sm_event_names[] = { + "GD_FRIEND_EVENT_NONE", + "GD_FRIEND_EVENT_PROBE", + "GD_FRIEND_EVENT_INIT_FRIEND_REQ", + "GD_FRIEND_EVENT_RCVD_ACC", + "GD_FRIEND_EVENT_LOCAL_ACC", + "GD_FRIEND_EVENT_RCVD_RJT", + "GD_FRIEND_EVENT_LOCAL_RJT", + "GD_FRIEND_EVENT_RCVD_FRIEND_REQ", + "GD_FRIEND_EVENT_INIT_REMOVE_FRIEND", + "GD_FRIEND_EVENT_RCVD_REMOVE_FRIEND", + "GD_FRIEND_EVENT_REMOVE_FRIEND", + "GD_FRIEND_EVENT_CONNECTED", + "GD_FRIEND_EVENT_NEW_NAME", + "GD_FRIEND_EVENT_MAX"}; + +char * +glusterd_friend_sm_state_name_get(int state) { - if (state < 0 || state >= GD_FRIEND_STATE_MAX) - return glusterd_friend_sm_state_names[GD_FRIEND_STATE_MAX]; - return glusterd_friend_sm_state_names[state]; + if (state < 0 || state >= GD_FRIEND_STATE_MAX) + return glusterd_friend_sm_state_names[GD_FRIEND_STATE_MAX]; + return glusterd_friend_sm_state_names[state]; } -char* -glusterd_friend_sm_event_name_get (int event) +char * +glusterd_friend_sm_event_name_get(int event) { - if (event < 0 || event >= GD_FRIEND_EVENT_MAX) - return glusterd_friend_sm_event_names[GD_FRIEND_EVENT_MAX]; - return glusterd_friend_sm_event_names[event]; + if (event < 0 || event >= GD_FRIEND_EVENT_MAX) + return glusterd_friend_sm_event_names[GD_FRIEND_EVENT_MAX]; + return glusterd_friend_sm_event_names[event]; } void -glusterd_destroy_probe_ctx (glusterd_probe_ctx_t *ctx) +glusterd_destroy_probe_ctx(glusterd_probe_ctx_t *ctx) { - if (!ctx) - return; + if (!ctx) + return; - if (ctx->hostname) - GF_FREE (ctx->hostname); - GF_FREE (ctx); + GF_FREE(ctx->hostname); + GF_FREE(ctx); } void -glusterd_destroy_friend_req_ctx (glusterd_friend_req_ctx_t *ctx) +glusterd_destroy_friend_req_ctx(glusterd_friend_req_ctx_t *ctx) { - if (!ctx) - return; - - if (ctx->vols) - dict_unref (ctx->vols); - if (ctx->hostname) - GF_FREE (ctx->hostname); - GF_FREE (ctx); + if (!ctx) + return; + + if (ctx->vols) + dict_unref(ctx->vols); + GF_FREE(ctx->hostname); + GF_FREE(ctx); } void -glusterd_destroy_friend_update_ctx (glusterd_friend_update_ctx_t *ctx) +glusterd_destroy_friend_update_ctx(glusterd_friend_update_ctx_t *ctx) { - if (!ctx) - return; - if (ctx->hostname) - GF_FREE (ctx->hostname); - GF_FREE (ctx); + if (!ctx) + return; + GF_FREE(ctx->hostname); + GF_FREE(ctx); } int -glusterd_broadcast_friend_delete (char *hostname, uuid_t uuid) +glusterd_broadcast_friend_delete(char *hostname, uuid_t uuid) { - int ret = 0; - rpc_clnt_procedure_t *proc = NULL; - xlator_t *this = NULL; - glusterd_friend_update_ctx_t ctx = {{0},}; - glusterd_peerinfo_t *peerinfo = NULL; - glusterd_conf_t *priv = NULL; - dict_t *friends = NULL; - char key[100] = {0,}; - int32_t count = 0; - - this = THIS; - priv = this->private; - - GF_ASSERT (priv); - - ctx.hostname = hostname; - ctx.op = GD_FRIEND_UPDATE_DEL; - - friends = dict_new (); - if (!friends) - goto out; - - snprintf (key, sizeof (key), "op"); - ret = dict_set_int32 (friends, key, ctx.op); - if (ret) - goto out; - - snprintf (key, sizeof (key), "hostname"); - ret = dict_set_str (friends, key, hostname); - if (ret) - goto out; - - ret = dict_set_int32 (friends, "count", count); - if (ret) - goto out; - - list_for_each_entry (peerinfo, &priv->peers, uuid_list) { - if (!peerinfo->connected || !peerinfo->mgmt) - continue; - - ret = dict_set_static_ptr (friends, "peerinfo", peerinfo); - if (ret) { - gf_log ("", GF_LOG_ERROR, "failed to set peerinfo"); - goto out; - } - - proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_FRIEND_UPDATE]; - if (proc->fn) { - ret = proc->fn (NULL, this, friends); - } + int ret = 0; + rpc_clnt_procedure_t *proc = NULL; + xlator_t *this = NULL; + glusterd_friend_update_ctx_t ctx = { + {0}, + }; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_conf_t *priv = NULL; + dict_t *friends = NULL; + char key[64] = { + 0, + }; + int keylen; + int32_t count = 0; + + this = THIS; + priv = this->private; + + GF_ASSERT(priv); + + ctx.hostname = hostname; + ctx.op = GD_FRIEND_UPDATE_DEL; + + friends = dict_new(); + if (!friends) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); + goto out; + } + + keylen = snprintf(key, sizeof(key), "op"); + ret = dict_set_int32n(friends, key, keylen, ctx.op); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + goto out; + } + + keylen = snprintf(key, sizeof(key), "hostname"); + ret = dict_set_strn(friends, key, keylen, hostname); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + goto out; + } + + ret = dict_set_int32n(friends, "count", SLEN("count"), count); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + goto out; + } + + RCU_READ_LOCK; + cds_list_for_each_entry_rcu(peerinfo, &priv->peers, uuid_list) + { + if (!peerinfo->connected || !peerinfo->peer) + continue; + + /* Setting a direct reference to peerinfo in the dict is okay as + * it is only going to be used within this read critical section + * (in glusterd_rpc_friend_update) + */ + ret = dict_set_static_ptr(friends, "peerinfo", peerinfo); + if (ret) { + RCU_READ_UNLOCK; + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "failed to set peerinfo"); + goto out; } - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + proc = &peerinfo->peer->proctable[GLUSTERD_FRIEND_UPDATE]; + if (proc->fn) { + ret = proc->fn(NULL, this, friends); + } + } + RCU_READ_UNLOCK; out: - if (friends) - dict_unref (friends); + if (friends) + dict_unref(friends); - return ret; + gf_msg_debug("glusterd", 0, "Returning with %d", ret); + return ret; } - static int -glusterd_ac_none (glusterd_friend_sm_event_t *event, void *ctx) +glusterd_ac_none(glusterd_friend_sm_event_t *event, void *ctx) { - int ret = 0; + int ret = 0; - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + gf_msg_debug("glusterd", 0, "Returning with %d", ret); - return ret; + return ret; } static int -glusterd_ac_error (glusterd_friend_sm_event_t *event, void *ctx) +glusterd_ac_error(glusterd_friend_sm_event_t *event, void *ctx) { - int ret = 0; + int ret = 0; - gf_log ("", GF_LOG_ERROR, "Received event %d ", event->event); + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_AC_ERROR, "Received event %d ", + event->event); - return ret; + return ret; } static int -glusterd_ac_reverse_probe_begin (glusterd_friend_sm_event_t *event, void *ctx) +glusterd_ac_reverse_probe_begin(glusterd_friend_sm_event_t *event, void *ctx) { - int ret = 0; - glusterd_peerinfo_t *peerinfo = NULL; - glusterd_friend_sm_event_t *new_event = NULL; - glusterd_probe_ctx_t *new_ev_ctx = NULL; - - GF_ASSERT (event); - GF_ASSERT (ctx); - - peerinfo = event->peerinfo; - ret = glusterd_friend_sm_new_event - (GD_FRIEND_EVENT_PROBE, &new_event); - - if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, "Unable to get new new_event"); - ret = -1; - goto out; - } - - new_ev_ctx = GF_CALLOC (1, sizeof(*new_ev_ctx), gf_gld_mt_probe_ctx_t); - - if (!new_ev_ctx) { - ret = -1; - goto out; - } - - new_ev_ctx->hostname = gf_strdup (peerinfo->hostname); - new_ev_ctx->port = peerinfo->port; - new_ev_ctx->req = NULL; - - new_event->peerinfo = peerinfo; - new_event->ctx = new_ev_ctx; - - ret = glusterd_friend_sm_inject_event (new_event); - - if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, "Unable to inject new_event %d, " - "ret = %d", new_event->event, ret); - } + int ret = 0; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_friend_sm_event_t *new_event = NULL; + glusterd_probe_ctx_t *new_ev_ctx = NULL; + + GF_ASSERT(event); + GF_ASSERT(ctx); + + new_ev_ctx = GF_CALLOC(1, sizeof(*new_ev_ctx), gf_gld_mt_probe_ctx_t); + + RCU_READ_LOCK; + + peerinfo = glusterd_peerinfo_find(event->peerid, event->peername); + if (!peerinfo) { + RCU_READ_UNLOCK; + ret = -1; + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_PEER_NOT_FOUND, + "Could not find peer %s(%s)", event->peername, + uuid_utoa(event->peerid)); + goto out; + } + + ret = glusterd_friend_sm_new_event(GD_FRIEND_EVENT_PROBE, &new_event); + + if (ret) { + RCU_READ_UNLOCK; + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_EVENT_NEW_GET_FAIL, + "Unable to get new new_event"); + ret = -1; + goto out; + } + + if (!new_ev_ctx) { + RCU_READ_UNLOCK; + ret = -1; + goto out; + } + + new_ev_ctx->hostname = gf_strdup(peerinfo->hostname); + new_ev_ctx->port = peerinfo->port; + new_ev_ctx->req = NULL; + + new_event->peername = gf_strdup(peerinfo->hostname); + gf_uuid_copy(new_event->peerid, peerinfo->uuid); + new_event->ctx = new_ev_ctx; + + ret = glusterd_friend_sm_inject_event(new_event); + + RCU_READ_UNLOCK; + + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_EVENT_INJECT_FAIL, + "Unable to inject new_event %d, " + "ret = %d", + new_event->event, ret); + } out: - if (ret) { - if (new_event) - GF_FREE (new_event); - if (new_ev_ctx->hostname) - GF_FREE (new_ev_ctx->hostname); - if (new_ev_ctx) - GF_FREE (new_ev_ctx); - } - gf_log ("", GF_LOG_DEBUG, "returning with %d", ret); - return ret; + if (ret) { + if (new_event) + GF_FREE(new_event->peername); + GF_FREE(new_event); + if (new_ev_ctx) + GF_FREE(new_ev_ctx->hostname); + GF_FREE(new_ev_ctx); + } + gf_msg_debug("glusterd", 0, "returning with %d", ret); + return ret; } static int -glusterd_ac_friend_add (glusterd_friend_sm_event_t *event, void *ctx) +glusterd_ac_friend_add(glusterd_friend_sm_event_t *event, void *ctx) { - int ret = 0; - glusterd_peerinfo_t *peerinfo = NULL; - rpc_clnt_procedure_t *proc = NULL; - call_frame_t *frame = NULL; - glusterd_conf_t *conf = NULL; - xlator_t *this = NULL; - - GF_ASSERT (event); - peerinfo = event->peerinfo; - - this = THIS; - conf = this->private; - - GF_ASSERT (conf); - - if (!peerinfo->mgmt) - goto out; - proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_FRIEND_ADD]; - if (proc->fn) { - frame = create_frame (this, this->ctx->pool); - if (!frame) { - goto out; - } - frame->local = ctx; - ret = proc->fn (frame, this, event); + int ret = 0; + glusterd_peerinfo_t *peerinfo = NULL; + rpc_clnt_procedure_t *proc = NULL; + call_frame_t *frame = NULL; + glusterd_conf_t *conf = NULL; + xlator_t *this = NULL; + + GF_ASSERT(event); + + this = THIS; + conf = this->private; + + GF_ASSERT(conf); + + RCU_READ_LOCK; + + peerinfo = glusterd_peerinfo_find(event->peerid, event->peername); + if (!peerinfo) { + RCU_READ_UNLOCK; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PEER_NOT_FOUND, + "Could not find peer %s(%s)", event->peername, + uuid_utoa(event->peerid)); + goto out; + } + + if (!peerinfo->peer) { + RCU_READ_UNLOCK; + goto out; + } + proc = &peerinfo->peer->proctable[GLUSTERD_FRIEND_ADD]; + if (proc->fn) { + frame = create_frame(this, this->ctx->pool); + if (!frame) { + RCU_READ_UNLOCK; + goto out; } + frame->local = ctx; + ret = proc->fn(frame, this, event); + } + RCU_READ_UNLOCK; out: - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + if (ret && frame) + STACK_DESTROY(frame->root); - return ret; + gf_msg_debug("glusterd", 0, "Returning with %d", ret); + return ret; } static int -glusterd_ac_friend_probe (glusterd_friend_sm_event_t *event, void *ctx) +glusterd_ac_friend_probe(glusterd_friend_sm_event_t *event, void *ctx) { - int ret = -1; - rpc_clnt_procedure_t *proc = NULL; - call_frame_t *frame = NULL; - glusterd_conf_t *conf = NULL; - xlator_t *this = NULL; - glusterd_probe_ctx_t *probe_ctx = NULL; - glusterd_peerinfo_t *peerinfo = NULL; - dict_t *dict = NULL; + int ret = -1; + rpc_clnt_procedure_t *proc = NULL; + call_frame_t *frame = NULL; + glusterd_conf_t *conf = NULL; + xlator_t *this = NULL; + glusterd_probe_ctx_t *probe_ctx = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + dict_t *dict = NULL; + + GF_ASSERT(ctx); + + probe_ctx = ctx; + + this = THIS; + + GF_ASSERT(this); + + conf = this->private; + + GF_ASSERT(conf); + + RCU_READ_LOCK; + peerinfo = glusterd_peerinfo_find(NULL, probe_ctx->hostname); + if (peerinfo == NULL) { + // We should not reach this state ideally + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_PEER_NOT_FOUND, NULL); + ret = -1; + goto unlock; + } + + if (!peerinfo->peer) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_PEER_ADDRESS_GET_FAIL, + NULL); + goto unlock; + } + proc = &peerinfo->peer->proctable[GLUSTERD_PROBE_QUERY]; + if (proc->fn) { + frame = create_frame(this, this->ctx->pool); + if (!frame) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_FRAME_CREATE_FAIL, + NULL); + goto unlock; + } + frame->local = ctx; + dict = dict_new(); + if (!dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, + NULL); + goto unlock; + } + ret = dict_set_strn(dict, "hostname", SLEN("hostname"), + probe_ctx->hostname); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=hostname", NULL); + goto unlock; + } - GF_ASSERT (ctx); + ret = dict_set_int32n(dict, "port", SLEN("port"), probe_ctx->port); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=port", NULL); + goto unlock; + } - probe_ctx = ctx; + /* The peerinfo reference being set here is going to be used + * only within this critical section, in glusterd_rpc_probe + * (ie. proc->fn). + */ + ret = dict_set_static_ptr(dict, "peerinfo", peerinfo); + if (ret) { + RCU_READ_UNLOCK; + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "failed to set peerinfo"); + goto out; + } - this = THIS; + ret = proc->fn(frame, this, dict); + if (ret) + goto unlock; + } +unlock: + RCU_READ_UNLOCK; +out: - GF_ASSERT (this); + if (dict) + dict_unref(dict); + gf_msg_debug("glusterd", 0, "Returning with %d", ret); - conf = this->private; + if (ret && frame) + STACK_DESTROY(frame->root); - GF_ASSERT (conf); + return ret; +} - ret = glusterd_friend_find (NULL, probe_ctx->hostname, &peerinfo); - if (ret) { - //We should not reach this state ideally - GF_ASSERT (0); - goto out; +static int +glusterd_ac_send_friend_remove_req(glusterd_friend_sm_event_t *event, + void *data) +{ + int ret = 0; + glusterd_peerinfo_t *peerinfo = NULL; + rpc_clnt_procedure_t *proc = NULL; + call_frame_t *frame = NULL; + glusterd_conf_t *conf = NULL; + xlator_t *this = NULL; + glusterd_friend_sm_event_type_t event_type = GD_FRIEND_EVENT_NONE; + glusterd_probe_ctx_t *ctx = NULL; + glusterd_friend_sm_event_t *new_event = NULL; + + GF_ASSERT(event); + + this = THIS; + conf = this->private; + + GF_ASSERT(conf); + + RCU_READ_LOCK; + + peerinfo = glusterd_peerinfo_find(event->peerid, event->peername); + if (!peerinfo) { + RCU_READ_UNLOCK; + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PEER_NOT_FOUND, + "Could not find peer %s(%s)", event->peername, + uuid_utoa(event->peerid)); + goto out; + } + ctx = event->ctx; + + if (!peerinfo->connected) { + event_type = GD_FRIEND_EVENT_REMOVE_FRIEND; + + ret = glusterd_friend_sm_new_event(event_type, &new_event); + + if (!ret) { + new_event->peername = peerinfo->hostname; + gf_uuid_copy(new_event->peerid, peerinfo->uuid); + ret = glusterd_friend_sm_inject_event(new_event); + } else { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_EVENT_NEW_GET_FAIL, + "Unable to get event"); } - if (!peerinfo->mgmt) - goto out; - proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_PROBE_QUERY]; - if (proc->fn) { - frame = create_frame (this, this->ctx->pool); - if (!frame) { - goto out; - } - frame->local = ctx; - dict = dict_new (); - if (!dict) - goto out; - ret = dict_set_str (dict, "hostname", probe_ctx->hostname); - if (ret) - goto out; - - ret = dict_set_int32 (dict, "port", probe_ctx->port); - if (ret) - goto out; - - ret = dict_set_static_ptr (dict, "peerinfo", peerinfo); - if (ret) { - gf_log ("", GF_LOG_ERROR, "failed to set peerinfo"); - goto out; - } - - ret = proc->fn (frame, this, dict); - if (ret) - goto out; - + if (ctx) { + ret = glusterd_xfer_cli_deprobe_resp(ctx->req, ret, 0, NULL, + ctx->hostname, ctx->dict); + glusterd_broadcast_friend_delete(ctx->hostname, NULL); + glusterd_destroy_probe_ctx(ctx); } + goto unlock; + } + + if (!peerinfo->peer) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_PEER_ADDRESS_GET_FAIL, + NULL); + goto unlock; + } + proc = &peerinfo->peer->proctable[GLUSTERD_FRIEND_REMOVE]; + if (proc->fn) { + frame = create_frame(this, this->ctx->pool); + if (!frame) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_FRAME_CREATE_FAIL, + NULL); + goto unlock; + } + frame->local = data; + ret = proc->fn(frame, this, event); + } - +unlock: + RCU_READ_UNLOCK; out: - if (dict) - dict_unref (dict); - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); - - return ret; -} -static int -glusterd_ac_send_friend_remove_req (glusterd_friend_sm_event_t *event, - void *data) -{ - int ret = 0; - glusterd_peerinfo_t *peerinfo = NULL; - rpc_clnt_procedure_t *proc = NULL; - call_frame_t *frame = NULL; - glusterd_conf_t *conf = NULL; - xlator_t *this = NULL; - glusterd_friend_sm_event_type_t event_type = GD_FRIEND_EVENT_NONE; - glusterd_probe_ctx_t *ctx = NULL; - glusterd_friend_sm_event_t *new_event = NULL; + gf_msg_debug("glusterd", 0, "Returning with %d", ret); - GF_ASSERT (event); - peerinfo = event->peerinfo; + if (ret && frame) + STACK_DESTROY(frame->root); - this = THIS; - conf = this->private; + return ret; +} - GF_ASSERT (conf); +static gf_boolean_t +glusterd_should_update_peer(glusterd_peerinfo_t *peerinfo, + glusterd_peerinfo_t *cur_peerinfo) +{ + if ((peerinfo == cur_peerinfo) || + (peerinfo->state.state == GD_FRIEND_STATE_BEFRIENDED)) + return _gf_true; - ctx = event->ctx; + return _gf_false; +} - if (!peerinfo->connected) { - event_type = GD_FRIEND_EVENT_REMOVE_FRIEND; +static int +glusterd_ac_send_friend_update(glusterd_friend_sm_event_t *event, void *ctx) +{ + int ret = 0; + glusterd_peerinfo_t *cur_peerinfo = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + rpc_clnt_procedure_t *proc = NULL; + xlator_t *this = NULL; + glusterd_friend_update_ctx_t ev_ctx = {{0}}; + glusterd_conf_t *priv = NULL; + dict_t *friends = NULL; + char key[64] = { + 0, + }; + int keylen; + int32_t count = 0; + + GF_ASSERT(event); + + this = THIS; + priv = this->private; + + GF_ASSERT(priv); + + keylen = snprintf(key, sizeof(key), "op"); + friends = dict_new(); + + RCU_READ_LOCK; + + cur_peerinfo = glusterd_peerinfo_find(event->peerid, event->peername); + if (!cur_peerinfo) { + RCU_READ_UNLOCK; + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PEER_NOT_FOUND, + "Could not find peer %s(%s)", event->peername, + uuid_utoa(event->peerid)); + goto out; + } + + if (!friends) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); + goto unlock; + } + + ev_ctx.op = GD_FRIEND_UPDATE_ADD; + ret = dict_set_int32n(friends, key, keylen, ev_ctx.op); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + goto unlock; + } + + cds_list_for_each_entry_rcu(peerinfo, &priv->peers, uuid_list) + { + if (!glusterd_should_update_peer(peerinfo, cur_peerinfo)) + continue; + + count++; + + snprintf(key, sizeof(key), "friend%d", count); + ret = gd_add_friend_to_dict(peerinfo, friends, key); + if (ret) + goto unlock; + } - ret = glusterd_friend_sm_new_event (event_type, &new_event); + ret = dict_set_int32n(friends, "count", SLEN("count"), count); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=count", NULL); + goto unlock; + } - if (!ret) { - new_event->peerinfo = peerinfo; - ret = glusterd_friend_sm_inject_event (new_event); - } else { - gf_log ("glusterd", GF_LOG_ERROR, - "Unable to get event"); - } + cds_list_for_each_entry_rcu(peerinfo, &priv->peers, uuid_list) + { + if (!peerinfo->connected || !peerinfo->peer) + continue; - if (ctx) - ret = glusterd_xfer_cli_deprobe_resp (ctx->req, ret, 0, - ctx->hostname); - glusterd_friend_sm (); - glusterd_op_sm (); + if (!glusterd_should_update_peer(peerinfo, cur_peerinfo)) + continue; - if (ctx) { - glusterd_broadcast_friend_delete (ctx->hostname, NULL); - glusterd_destroy_probe_ctx (ctx); - } - goto out; + ret = dict_set_static_ptr(friends, "peerinfo", peerinfo); + if (ret) { + RCU_READ_UNLOCK; + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "failed to set peerinfo"); + goto out; } - if (!peerinfo->mgmt) - goto out; - proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_FRIEND_REMOVE]; + proc = &peerinfo->peer->proctable[GLUSTERD_FRIEND_UPDATE]; if (proc->fn) { - frame = create_frame (this, this->ctx->pool); - if (!frame) { - goto out; - } - frame->local = data; - ret = proc->fn (frame, this, event); + ret = proc->fn(NULL, this, friends); } + } +unlock: + RCU_READ_UNLOCK; out: - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); - return ret; + if (friends) + dict_unref(friends); + + gf_msg_debug("glusterd", 0, "Returning with %d", ret); + return ret; } +/* ac_update_friend only sends friend update to the friend that caused this + * event to happen + */ static int -glusterd_ac_send_friend_update (glusterd_friend_sm_event_t *event, void *ctx) +glusterd_ac_update_friend(glusterd_friend_sm_event_t *event, void *ctx) { - int ret = 0; - glusterd_peerinfo_t *peerinfo = NULL; - rpc_clnt_procedure_t *proc = NULL; - xlator_t *this = NULL; - glusterd_friend_update_ctx_t ev_ctx = {{0}}; - glusterd_conf_t *priv = NULL; - dict_t *friends = NULL; - char key[100] = {0,}; - char *dup_buf = NULL; - int32_t count = 0; - - GF_ASSERT (event); - peerinfo = event->peerinfo; - - this = THIS; - priv = this->private; - - GF_ASSERT (priv); - - ev_ctx.op = GD_FRIEND_UPDATE_ADD; - - friends = dict_new (); - if (!friends) - goto out; - - snprintf (key, sizeof (key), "op"); - ret = dict_set_int32 (friends, key, ev_ctx.op); + int ret = 0; + glusterd_peerinfo_t *cur_peerinfo = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + rpc_clnt_procedure_t *proc = NULL; + xlator_t *this = NULL; + glusterd_friend_update_ctx_t ev_ctx = {{0}}; + glusterd_conf_t *priv = NULL; + dict_t *friends = NULL; + char key[64] = { + 0, + }; + int keylen; + int32_t count = 0; + + GF_ASSERT(event); + + this = THIS; + priv = this->private; + + GF_ASSERT(priv); + + friends = dict_new(); + keylen = snprintf(key, sizeof(key), "op"); + + RCU_READ_LOCK; + + cur_peerinfo = glusterd_peerinfo_find(event->peerid, event->peername); + if (!cur_peerinfo) { + RCU_READ_UNLOCK; + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PEER_NOT_FOUND, + "Could not find peer %s(%s)", event->peername, + uuid_utoa(event->peerid)); + goto out; + } + + /* Bail out early if peer is not connected. + * We cannot send requests to the peer until we have established our + * client connection to it. + */ + if (!cur_peerinfo->connected || !cur_peerinfo->peer) { + ret = 0; + goto unlock; + } + + if (!friends) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); + goto out; + } + + ev_ctx.op = GD_FRIEND_UPDATE_ADD; + ret = dict_set_int32n(friends, key, keylen, ev_ctx.op); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + goto unlock; + } + + cds_list_for_each_entry_rcu(peerinfo, &priv->peers, uuid_list) + { + if (!glusterd_should_update_peer(peerinfo, cur_peerinfo)) + continue; + + count++; + + snprintf(key, sizeof(key), "friend%d", count); + ret = gd_add_friend_to_dict(peerinfo, friends, key); if (ret) - goto out; + goto unlock; + } + + ret = dict_set_int32n(friends, "count", SLEN("count"), count); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=count", NULL); + goto unlock; + } + + ret = dict_set_static_ptr(friends, "peerinfo", cur_peerinfo); + if (ret) { + RCU_READ_UNLOCK; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "failed to set peerinfo"); + goto out; + } + + proc = &cur_peerinfo->peer->proctable[GLUSTERD_FRIEND_UPDATE]; + if (proc->fn) + ret = proc->fn(NULL, this, friends); + + gf_msg_debug(this->name, 0, "Returning with %d", ret); + +unlock: + RCU_READ_UNLOCK; +out: - list_for_each_entry (peerinfo, &priv->peers, uuid_list) { - count++; - snprintf (key, sizeof (key), "friend%d.uuid", count); - dup_buf = gf_strdup (uuid_utoa (peerinfo->uuid)); - ret = dict_set_dynstr (friends, key, dup_buf); - if (ret) - goto out; - snprintf (key, sizeof (key), "friend%d.hostname", count); - ret = dict_set_str (friends, key, peerinfo->hostname); - if (ret) - goto out; - gf_log ("", GF_LOG_INFO, "Added uuid: %s, host: %s", - dup_buf, peerinfo->hostname); - } + if (friends) + dict_unref(friends); - ret = dict_set_int32 (friends, "count", count); - if (ret) - goto out; + return ret; +} - list_for_each_entry (peerinfo, &priv->peers, uuid_list) { - if (!peerinfo->connected || !peerinfo->mgmt) - continue; +/* Clean up stale volumes on the peer being detached. The volumes which have + * bricks on other peers are stale with respect to the detached peer. + */ +static void +glusterd_peer_detach_cleanup(glusterd_conf_t *priv) +{ + int ret = -1; + glusterd_volinfo_t *volinfo = NULL; + glusterd_volinfo_t *tmp_volinfo = NULL; + glusterd_svc_t *svc = NULL; + + GF_ASSERT(priv); + + cds_list_for_each_entry_safe(volinfo, tmp_volinfo, &priv->volumes, vol_list) + { + /* The peer detach checks make sure that, at this point in the + * detach process, there are only volumes contained completely + * within or completely outside the detached peer. + * The only stale volumes at this point are the ones + * completely outside the peer and can be safely deleted. + */ + if (!glusterd_friend_contains_vol_bricks(volinfo, MY_UUID)) { + gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_STALE_VOL_DELETE_INFO, + "Deleting stale volume %s", volinfo->volname); + + /*Stop snapd daemon service if snapd daemon is running*/ + if (!volinfo->is_snap_volume) { + svc = &(volinfo->snapd.svc); + ret = svc->stop(svc, SIGTERM); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SVC_STOP_FAIL, + "Failed " + "to stop snapd daemon service"); + } + } - ret = dict_set_static_ptr (friends, "peerinfo", peerinfo); + if (glusterd_is_shd_compatible_volume(volinfo)) { + svc = &(volinfo->shd.svc); + ret = svc->stop(svc, SIGTERM); if (ret) { - gf_log ("", GF_LOG_ERROR, "failed to set peerinfo"); - goto out; + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SVC_STOP_FAIL, + "Failed " + "to stop shd daemon service"); } + } - proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_FRIEND_UPDATE]; - if (proc->fn) { - ret = proc->fn (NULL, this, friends); + if (glusterd_is_gfproxyd_enabled(volinfo)) { + svc = &(volinfo->gfproxyd.svc); + ret = svc->stop(svc, SIGTERM); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SVC_STOP_FAIL, + "Failed " + "to stop gfproxyd daemon service"); } + } + + ret = glusterd_cleanup_snaps_for_volume(volinfo); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_VOL_DELETE_FAIL, + "Error deleting snapshots for volume %s", + volinfo->volname); + } + + ret = glusterd_delete_volume(volinfo); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, + GD_MSG_STALE_VOL_REMOVE_FAIL, + "Error deleting stale volume"); + } } - - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); - -out: - if (friends) - dict_unref (friends); - - return ret; + } + + /*Reconfigure all daemon services upon peer detach*/ + ret = glusterd_svcs_reconfigure(NULL); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SVC_STOP_FAIL, + "Failed to reconfigure all daemon services."); + } } - static int -glusterd_ac_handle_friend_remove_req (glusterd_friend_sm_event_t *event, - void *ctx) +glusterd_ac_handle_friend_remove_req(glusterd_friend_sm_event_t *event, + void *ctx) { - int ret = 0; - glusterd_peerinfo_t *peerinfo = NULL; - glusterd_friend_req_ctx_t *ev_ctx = NULL; - glusterd_friend_sm_event_t *new_event = NULL; - glusterd_conf_t *priv = NULL; - - GF_ASSERT (ctx); - ev_ctx = ctx; - peerinfo = event->peerinfo; - GF_ASSERT (peerinfo); - - priv = THIS->private; - GF_ASSERT (priv); - - ret = glusterd_xfer_friend_remove_resp (ev_ctx->req, ev_ctx->hostname, - ev_ctx->port); - - list_for_each_entry (peerinfo, &priv->peers, uuid_list) { - - ret = glusterd_friend_sm_new_event (GD_FRIEND_EVENT_REMOVE_FRIEND, - &new_event); - if (ret) - goto out; + int ret = 0; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_friend_req_ctx_t *ev_ctx = NULL; + glusterd_friend_sm_event_t *new_event = NULL; + glusterd_conf_t *priv = NULL; + + GF_ASSERT(ctx); + ev_ctx = ctx; + + priv = THIS->private; + GF_ASSERT(priv); + + ret = glusterd_xfer_friend_remove_resp(ev_ctx->req, ev_ctx->hostname, + ev_ctx->port); + + RCU_READ_LOCK; + cds_list_for_each_entry_rcu(peerinfo, &priv->peers, uuid_list) + { + ret = glusterd_friend_sm_new_event(GD_FRIEND_EVENT_REMOVE_FRIEND, + &new_event); + if (ret) { + RCU_READ_UNLOCK; + goto out; + } - new_event->peerinfo = peerinfo; + new_event->peername = gf_strdup(peerinfo->hostname); + gf_uuid_copy(new_event->peerid, peerinfo->uuid); - ret = glusterd_friend_sm_inject_event (new_event); - if (ret) - goto out; + ret = glusterd_friend_sm_inject_event(new_event); + if (ret) { + RCU_READ_UNLOCK; + goto out; } + new_event = NULL; + } + RCU_READ_UNLOCK; + + glusterd_peer_detach_cleanup(priv); out: - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + if (new_event) + GF_FREE(new_event->peername); + GF_FREE(new_event); - return ret; + gf_msg_debug(THIS->name, 0, "Returning with %d", ret); + return ret; } static int -glusterd_ac_friend_remove (glusterd_friend_sm_event_t *event, void *ctx) +glusterd_ac_friend_remove(glusterd_friend_sm_event_t *event, void *ctx) { - int ret = -1; - - ret = glusterd_friend_cleanup (event->peerinfo); - - if (ret) { - gf_log ("", GF_LOG_ERROR, "Cleanup returned: %d", ret); - } - - return 0; + int ret = -1; + glusterd_peerinfo_t *peerinfo = NULL; + + GF_ASSERT(event); + + RCU_READ_LOCK; + + peerinfo = glusterd_peerinfo_find(event->peerid, event->peername); + if (!peerinfo) { + RCU_READ_UNLOCK; + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_PEER_NOT_FOUND, + "Could not find peer %s(%s)", event->peername, + uuid_utoa(event->peerid)); + goto out; + } + ret = glusterd_friend_remove_cleanup_vols(peerinfo->uuid); + RCU_READ_UNLOCK; + if (ret) + gf_msg(THIS->name, GF_LOG_WARNING, 0, GD_MSG_VOL_CLEANUP_FAIL, + "Volumes cleanup failed"); + + /* Exiting read critical section as glusterd_peerinfo_cleanup calls + * synchronize_rcu before freeing the peerinfo + */ + + ret = glusterd_peerinfo_cleanup(peerinfo); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_PEER_DETACH_CLEANUP_FAIL, + "Cleanup returned: %d", ret); + } +out: + return 0; } /*static int @@ -588,426 +941,682 @@ glusterd_ac_none (void *ctx) }*/ static int -glusterd_ac_handle_friend_add_req (glusterd_friend_sm_event_t *event, void *ctx) +glusterd_ac_handle_friend_add_req(glusterd_friend_sm_event_t *event, void *ctx) { - int ret = 0; - uuid_t uuid; - glusterd_peerinfo_t *peerinfo = NULL; - glusterd_friend_req_ctx_t *ev_ctx = NULL; - glusterd_friend_update_ctx_t *new_ev_ctx = NULL; - glusterd_friend_sm_event_t *new_event = NULL; - glusterd_friend_sm_event_type_t event_type = GD_FRIEND_EVENT_NONE; - int status = 0; - int32_t op_ret = -1; - int32_t op_errno = 0; - - GF_ASSERT (ctx); - ev_ctx = ctx; - uuid_copy (uuid, ev_ctx->uuid); - peerinfo = event->peerinfo; - GF_ASSERT (peerinfo); - uuid_copy (peerinfo->uuid, ev_ctx->uuid); - - //Build comparison logic here. - ret = glusterd_compare_friend_data (ev_ctx->vols, &status); - if (ret) - goto out; + int ret = 0; + uuid_t uuid; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_friend_req_ctx_t *ev_ctx = NULL; + glusterd_friend_update_ctx_t *new_ev_ctx = NULL; + glusterd_friend_sm_event_t *new_event = NULL; + glusterd_friend_sm_event_type_t event_type = GD_FRIEND_EVENT_NONE; + glusterd_conf_t *conf = NULL; + int status = 0; + int32_t op_ret = -1; + int32_t op_errno = 0; + xlator_t *this = NULL; + char *hostname = NULL; + + this = THIS; + GF_ASSERT(this); + + GF_ASSERT(ctx); + ev_ctx = ctx; + gf_uuid_copy(uuid, ev_ctx->uuid); + + RCU_READ_LOCK; + peerinfo = glusterd_peerinfo_find(event->peerid, event->peername); + if (!peerinfo) { + RCU_READ_UNLOCK; + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PEER_NOT_FOUND, + "Could not find peer %s(%s)", event->peername, + uuid_utoa(event->peerid)); + goto out; + } + + /* TODO: How do you do an atomic copy of uuid_t */ + /* TODO: Updating within a read-critical section is also invalid + * Update properly with updater synchronization + */ + gf_uuid_copy(peerinfo->uuid, ev_ctx->uuid); + + RCU_READ_UNLOCK; + + conf = this->private; + GF_ASSERT(conf); + + /* Passing the peername from the event. glusterd_compare_friend_data + * updates volumes and will use synchronize_rcu. If we were to pass + * peerinfo->hostname, we would have to do it under a read critical + * section which would lead to a deadlock + */ + + // Build comparison logic here. + pthread_mutex_lock(&conf->import_volumes); + { + ret = glusterd_compare_friend_data(ev_ctx->vols, &status, + event->peername); + if (ret) { + pthread_mutex_unlock(&conf->import_volumes); + goto out; + } if (GLUSTERD_VOL_COMP_RJT != status) { - event_type = GD_FRIEND_EVENT_LOCAL_ACC; - op_ret = 0; + event_type = GD_FRIEND_EVENT_LOCAL_ACC; + op_ret = 0; } else { - event_type = GD_FRIEND_EVENT_LOCAL_RJT; - op_errno = GF_PROBE_VOLUME_CONFLICT; - op_ret = -1; - } - - ret = glusterd_friend_sm_new_event (event_type, &new_event); - - if (ret) { - gf_log ("", GF_LOG_ERROR, "Out of Memory"); + event_type = GD_FRIEND_EVENT_LOCAL_RJT; + op_errno = GF_PROBE_VOLUME_CONFLICT; + op_ret = -1; } - new_event->peerinfo = peerinfo; - - new_ev_ctx = GF_CALLOC (1, sizeof (*new_ev_ctx), - gf_gld_mt_friend_update_ctx_t); - if (!new_ev_ctx) { - ret = -1; - goto out; + /* Compare missed_snapshot list with the peer * + * if volume comparison is successful */ + if ((op_ret == 0) && (conf->op_version >= GD_OP_VERSION_3_6_0)) { + ret = glusterd_import_friend_missed_snap_list(ev_ctx->vols); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_MISSED_SNAP_LIST_STORE_FAIL, + "Failed to import peer's " + "missed_snaps_list."); + event_type = GD_FRIEND_EVENT_LOCAL_RJT; + op_errno = GF_PROBE_MISSED_SNAP_CONFLICT; + op_ret = -1; + } + + /* glusterd_compare_friend_snapshots and functions only require + * a peers hostname and uuid. It also does updates, which + * require use of synchronize_rcu. So we pass the hostname and + * id from the event instead of the peerinfo object to prevent + * deadlocks as above. + */ + ret = glusterd_compare_friend_snapshots( + ev_ctx->vols, event->peername, event->peerid); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_SNAP_COMPARE_CONFLICT, + "Conflict in comparing peer's snapshots"); + event_type = GD_FRIEND_EVENT_LOCAL_RJT; + op_errno = GF_PROBE_SNAP_CONFLICT; + op_ret = -1; + } } - - uuid_copy (new_ev_ctx->uuid, ev_ctx->uuid); - new_ev_ctx->hostname = gf_strdup (ev_ctx->hostname); - new_ev_ctx->op = GD_FRIEND_UPDATE_ADD; - - new_event->ctx = new_ev_ctx; - - glusterd_friend_sm_inject_event (new_event); - - ret = glusterd_xfer_friend_add_resp (ev_ctx->req, ev_ctx->hostname, - ev_ctx->port, op_ret, op_errno); + } + pthread_mutex_unlock(&conf->import_volumes); + ret = glusterd_friend_sm_new_event(event_type, &new_event); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "Out of Memory"); + goto out; + } + + new_event->peername = gf_strdup(event->peername); + gf_uuid_copy(new_event->peerid, event->peerid); + + new_ev_ctx = GF_CALLOC(1, sizeof(*new_ev_ctx), + gf_gld_mt_friend_update_ctx_t); + if (!new_ev_ctx) { + ret = -1; + goto out; + } + + gf_uuid_copy(new_ev_ctx->uuid, ev_ctx->uuid); + new_ev_ctx->hostname = gf_strdup(ev_ctx->hostname); + new_ev_ctx->op = GD_FRIEND_UPDATE_ADD; + + new_event->ctx = new_ev_ctx; + + ret = dict_get_strn(ev_ctx->vols, "hostname_in_cluster", + SLEN("hostname_in_cluster"), &hostname); + if (ret || !hostname) { + gf_msg_debug(this->name, 0, "Unable to fetch local hostname from peer"); + } else if (snprintf(local_node_hostname, sizeof(local_node_hostname), "%s", + hostname) >= sizeof(local_node_hostname)) { + gf_msg_debug(this->name, 0, "local_node_hostname truncated"); + ret = -1; + goto out; + } + + glusterd_friend_sm_inject_event(new_event); + new_event = NULL; + + ret = glusterd_xfer_friend_add_resp(ev_ctx->req, ev_ctx->hostname, + event->peername, ev_ctx->port, op_ret, + op_errno); out: - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + if (new_event) + GF_FREE(new_event->peername); + GF_FREE(new_event); - return ret; + gf_msg_debug("glusterd", 0, "Returning with %d", ret); + return ret; } static int -glusterd_friend_sm_transition_state (glusterd_peerinfo_t *peerinfo, - glusterd_sm_t *state, - glusterd_friend_sm_event_type_t event_type) +glusterd_friend_sm_transition_state(uuid_t peerid, char *peername, + glusterd_sm_t *state, + glusterd_friend_sm_event_type_t event_type) { + int ret = -1; + glusterd_peerinfo_t *peerinfo = NULL; - GF_ASSERT (state); - GF_ASSERT (peerinfo); + GF_ASSERT(state); + GF_ASSERT(peername); - (void) glusterd_sm_tr_log_transition_add (&peerinfo->sm_log, - peerinfo->state.state, - state[event_type].next_state, - event_type); + RCU_READ_LOCK; + peerinfo = glusterd_peerinfo_find(peerid, peername); + if (!peerinfo) { + gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_PEER_NOT_FOUND, NULL); + goto out; + } - peerinfo->state.state = state[event_type].next_state; - return 0; -} + (void)glusterd_sm_tr_log_transition_add( + &peerinfo->sm_log, peerinfo->state.state, state[event_type].next_state, + event_type); + uatomic_set(&peerinfo->state.state, state[event_type].next_state); -glusterd_sm_t glusterd_state_default [] = { - {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, - {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_probe},//EV_PROBE - {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_friend_add}, //EV_INIT_FRIEND_REQ - {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, //EVENT_RCVD_ACC - {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, //EVENT_RCVD_LOCAL_ACC - {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, //EVENT_RCVD_RJT - {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, //EVENT_RCVD_LOCAL_RJT - {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_handle_friend_add_req}, //EVENT_RCV_FRIEND_REQ - {GD_FRIEND_STATE_DEFAULT, glusterd_ac_send_friend_remove_req}, //EV_INIT_REMOVE_FRIEND - {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, //EVENT_RCVD_REMOVE_FRIEND - {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_remove}, //EVENT_REMOVE_FRIEND - {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_probe}, //EVENT_CONNECTED - {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, //EVENT_MAX -}; + ret = 0; +out: + RCU_READ_UNLOCK; + return ret; +} -glusterd_sm_t glusterd_state_probe_rcvd [] = { - {GD_FRIEND_STATE_PROBE_RCVD, glusterd_ac_none}, - {GD_FRIEND_STATE_PROBE_RCVD, glusterd_ac_none}, //EV_PROBE - {GD_FRIEND_STATE_PROBE_RCVD, glusterd_ac_none}, //EV_INIT_FRIEND_REQ - {GD_FRIEND_STATE_PROBE_RCVD, glusterd_ac_none}, //EVENT_RCVD_ACC - {GD_FRIEND_STATE_PROBE_RCVD, glusterd_ac_none}, //EVENT_RCVD_LOCAL_ACC - {GD_FRIEND_STATE_PROBE_RCVD, glusterd_ac_none}, //EVENT_RCVD_RJT - {GD_FRIEND_STATE_PROBE_RCVD, glusterd_ac_none}, //EVENT_RCVD_LOCAL_RJT - {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_handle_friend_add_req}, //EVENT_RCV_FRIEND_REQ - {GD_FRIEND_STATE_DEFAULT, glusterd_ac_send_friend_remove_req}, //EV_INIT_REMOVE_FRIEND - {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, //EVENT_RCVD_REMOVE_FRIEND - {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_remove}, //EVENT_REMOVE_FRIEND - {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, //EVENT_CONNECTED - {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, //EVENT_MAX +glusterd_sm_t glusterd_state_default[] = { + {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, + {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_probe}, // EV_PROBE + {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_friend_add}, // EV_INIT_FRIEND_REQ + {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, // EVENT_RCVD_ACC + {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, // EVENT_RCVD_LOCAL_ACC + {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, // EVENT_RCVD_RJT + {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, // EVENT_RCVD_LOCAL_RJT + {GD_FRIEND_STATE_REQ_RCVD, + glusterd_ac_handle_friend_add_req}, // EVENT_RCV_FRIEND_REQ + {GD_FRIEND_STATE_DEFAULT, + glusterd_ac_send_friend_remove_req}, // EV_INIT_REMOVE_FRIEND + {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, // EVENT_RCVD_REMOVE_FRIEND + {GD_FRIEND_STATE_DEFAULT, + glusterd_ac_friend_remove}, // EVENT_REMOVE_FRIEND + {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_probe}, // EVENT_CONNECTED + {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, // EVENT_NEW_NAME + {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, // EVENT_MAX }; -glusterd_sm_t glusterd_state_connected_rcvd [] = { - {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, - {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, //EV_PROBE - {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, //EV_INIT_FRIEND_REQ - {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, //EVENT_RCVD_ACC - {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_reverse_probe_begin}, //EVENT_RCVD_LOCAL_ACC - {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, //EVENT_RCVD_RJT - {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, //EVENT_RCVD_LOCAL_RJT - {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_handle_friend_add_req}, //EVENT_RCV_FRIEND_REQ - {GD_FRIEND_STATE_DEFAULT, glusterd_ac_send_friend_remove_req}, //EV_INIT_REMOVE_FRIEND - {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, //EVENT_RCVD_REMOVE_FRIEND - {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_remove}, //EVENT_REMOVE_FRIEND - {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, //EVENT_CONNECTED - {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, //EVENT_MAX +glusterd_sm_t glusterd_state_probe_rcvd[] = { + {GD_FRIEND_STATE_PROBE_RCVD, glusterd_ac_none}, + {GD_FRIEND_STATE_PROBE_RCVD, glusterd_ac_none}, // EV_PROBE + {GD_FRIEND_STATE_PROBE_RCVD, glusterd_ac_none}, // EV_INIT_FRIEND_REQ + {GD_FRIEND_STATE_PROBE_RCVD, glusterd_ac_none}, // EVENT_RCVD_ACC + {GD_FRIEND_STATE_PROBE_RCVD, glusterd_ac_none}, // EVENT_RCVD_LOCAL_ACC + {GD_FRIEND_STATE_PROBE_RCVD, glusterd_ac_none}, // EVENT_RCVD_RJT + {GD_FRIEND_STATE_PROBE_RCVD, glusterd_ac_none}, // EVENT_RCVD_LOCAL_RJT + {GD_FRIEND_STATE_REQ_RCVD, + glusterd_ac_handle_friend_add_req}, // EVENT_RCV_FRIEND_REQ + {GD_FRIEND_STATE_DEFAULT, + glusterd_ac_send_friend_remove_req}, // EV_INIT_REMOVE_FRIEND + {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, // EVENT_RCVD_REMOVE_FRIEND + {GD_FRIEND_STATE_DEFAULT, + glusterd_ac_friend_remove}, // EVENT_REMOVE_FRIEND + {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, // EVENT_CONNECTED + {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, // EVENT_NEW_NAME + {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, // EVENT_MAX }; -glusterd_sm_t glusterd_state_connected_accepted [] = { - {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none}, - {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_friend_probe}, //EV_PROBE - {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_friend_add}, //EV_INIT_FRIEND_REQ - {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none}, //EVENT_RCVD_ACC - {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none}, //EVENT_RCVD_LOCAL_ACC - {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none}, //EVENT_RCVD_RJT - {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none}, //EVENT_RCVD_LOCAL_RJT - {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none}, //EVENT_RCV_FRIEND_REQ - {GD_FRIEND_STATE_DEFAULT, glusterd_ac_send_friend_remove_req}, //EV_INIT_REMOVE_FRIEND - {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, //EVENT_RCVD_REMOVE_FRIEND - {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_remove}, //EVENT_REMOVE_FRIEND - {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none}, //EVENT_CONNECTED - {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none}, //EVENT_MAX +glusterd_sm_t glusterd_state_connected_rcvd[] = { + {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, + {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, // EV_PROBE + {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, // EV_INIT_FRIEND_REQ + {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, // EVENT_RCVD_ACC + {GD_FRIEND_STATE_CONNECTED_ACCEPTED, + glusterd_ac_reverse_probe_begin}, // EVENT_RCVD_LOCAL_ACC + {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, // EVENT_RCVD_RJT + {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, // EVENT_RCVD_LOCAL_RJT + {GD_FRIEND_STATE_CONNECTED_RCVD, + glusterd_ac_handle_friend_add_req}, // EVENT_RCV_FRIEND_REQ + {GD_FRIEND_STATE_DEFAULT, + glusterd_ac_send_friend_remove_req}, // EV_INIT_REMOVE_FRIEND + {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, // EVENT_RCVD_REMOVE_FRIEND + {GD_FRIEND_STATE_DEFAULT, + glusterd_ac_friend_remove}, // EVENT_REMOVE_FRIEND + {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, // EVENT_CONNECTED + {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, // EVENT_NEW_NAME + {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, // EVENT_MAX }; -glusterd_sm_t glusterd_state_req_sent [] = { - {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none}, //EVENT_NONE, - {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none}, //EVENT_PROBE, - {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none}, //EVENT_INIT_FRIEND_REQ, - {GD_FRIEND_STATE_REQ_ACCEPTED, glusterd_ac_none}, //EVENT_RCVD_ACC - {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none}, //EVENT_RCVD_LOCAL_ACC - {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, //EVENT_RCVD_RJT - {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none}, //EVENT_RCVD_LOCAL_RJT - {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_handle_friend_add_req}, //EVENT_RCV_FRIEND_REQ - {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_send_friend_remove_req}, //EVENT_INIT_REMOVE_FRIEND, - {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none}, //EVENT_RCVD_REMOVE_FRIEND - {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_remove}, //EVENT_REMOVE_FRIEND - {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none},//EVENT_CONNECTED - {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none},//EVENT_MAX +glusterd_sm_t glusterd_state_connected_accepted[] = { + {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none}, + {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_friend_probe}, // EV_PROBE + {GD_FRIEND_STATE_REQ_SENT_RCVD, + glusterd_ac_friend_add}, // EV_INIT_FRIEND_REQ + {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none}, // EVENT_RCVD_ACC + {GD_FRIEND_STATE_CONNECTED_ACCEPTED, + glusterd_ac_none}, // EVENT_RCVD_LOCAL_ACC + {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none}, // EVENT_RCVD_RJT + {GD_FRIEND_STATE_CONNECTED_ACCEPTED, + glusterd_ac_none}, // EVENT_RCVD_LOCAL_RJT + {GD_FRIEND_STATE_CONNECTED_ACCEPTED, + glusterd_ac_none}, // EVENT_RCV_FRIEND_REQ + {GD_FRIEND_STATE_DEFAULT, + glusterd_ac_send_friend_remove_req}, // EV_INIT_REMOVE_FRIEND + {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, // EVENT_RCVD_REMOVE_FRIEND + {GD_FRIEND_STATE_DEFAULT, + glusterd_ac_friend_remove}, // EVENT_REMOVE_FRIEND + {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none}, // EVENT_CONNECTED + {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none}, // EVENT_NEW_NAME + {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none}, // EVENT_MAX }; -glusterd_sm_t glusterd_state_req_rcvd [] = { - {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_none}, //EVENT_NONE, - {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_none}, //EVENT_PROBE, - {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none}, //EVENT_INIT_FRIEND_REQ, - {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_none}, //EVENT_RCVD_ACC - {GD_FRIEND_STATE_REQ_ACCEPTED, glusterd_ac_none}, //EVENT_RCVD_LOCAL_ACC - {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_none}, //EVENT_RCVD_RJT - {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, //EVENT_RCVD_LOCAL_RJT - {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_none}, //EVENT_RCV_FRIEND_REQ - {GD_FRIEND_STATE_DEFAULT, glusterd_ac_send_friend_remove_req}, //EVENT_INIT_REMOVE_FRIEND, - {GD_FRIEND_STATE_DEFAULT, glusterd_ac_handle_friend_remove_req}, //EVENT_RCVD_REMOVE_FRIEND - {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_remove}, //EVENT_REMOVE_FRIEND - {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none},//EVENT_CONNECTED - {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_none},//EVENT_MAX +glusterd_sm_t glusterd_state_req_sent[] = { + {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none}, // EVENT_NONE, + {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none}, // EVENT_PROBE, + {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none}, // EVENT_INIT_FRIEND_REQ, + {GD_FRIEND_STATE_REQ_ACCEPTED, glusterd_ac_none}, // EVENT_RCVD_ACC + {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none}, // EVENT_RCVD_LOCAL_ACC + {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, // EVENT_RCVD_RJT + {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none}, // EVENT_RCVD_LOCAL_RJT + {GD_FRIEND_STATE_REQ_SENT_RCVD, + glusterd_ac_handle_friend_add_req}, // EVENT_RCV_FRIEND_REQ + {GD_FRIEND_STATE_UNFRIEND_SENT, + glusterd_ac_send_friend_remove_req}, // EVENT_INIT_REMOVE_FRIEND, + {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none}, // EVENT_RCVD_REMOVE_FRIEND + {GD_FRIEND_STATE_DEFAULT, + glusterd_ac_friend_remove}, // EVENT_REMOVE_FRIEND + {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none}, // EVENT_CONNECTED + {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none}, // EVENT_NEW_NAME + {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none}, // EVENT_MAX }; -glusterd_sm_t glusterd_state_befriended [] = { - {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_none}, //EVENT_NONE, - {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_none}, //EVENT_PROBE, - {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_none}, //EVENT_INIT_FRIEND_REQ, - {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_none}, //EVENT_RCVD_ACC - {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_send_friend_update}, //EVENT_RCVD_LOCAL_ACC - {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, //EVENT_RCVD_RJT - {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, //EVENT_RCVD_LOCAL_RJT - {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_handle_friend_add_req}, //EVENT_RCV_FRIEND_REQ - {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_send_friend_remove_req}, //EVENT_INIT_REMOVE_FRIEND, - {GD_FRIEND_STATE_DEFAULT, glusterd_ac_handle_friend_remove_req}, //EVENT_RCVD_REMOVE_FRIEND - {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_remove}, //EVENT_REMOVE_FRIEND - {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_friend_add},//EVENT_CONNECTED - {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_none},//EVENT_MAX +glusterd_sm_t glusterd_state_req_rcvd[] = { + {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_none}, // EVENT_NONE, + {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_none}, // EVENT_PROBE, + {GD_FRIEND_STATE_REQ_SENT_RCVD, + glusterd_ac_none}, // EVENT_INIT_FRIEND_REQ, + {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_none}, // EVENT_RCVD_ACC + {GD_FRIEND_STATE_REQ_ACCEPTED, glusterd_ac_none}, // EVENT_RCVD_LOCAL_ACC + {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_none}, // EVENT_RCVD_RJT + {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, // EVENT_RCVD_LOCAL_RJT + {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_none}, // EVENT_RCV_FRIEND_REQ + {GD_FRIEND_STATE_DEFAULT, + glusterd_ac_send_friend_remove_req}, // EVENT_INIT_REMOVE_FRIEND, + {GD_FRIEND_STATE_DEFAULT, + glusterd_ac_handle_friend_remove_req}, // EVENT_RCVD_REMOVE_FRIEND + {GD_FRIEND_STATE_DEFAULT, + glusterd_ac_friend_remove}, // EVENT_REMOVE_FRIEND + {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, // EVENT_CONNECTED + {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, // EVENT_NEW_NAME + {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_none}, // EVENT_MAX }; -glusterd_sm_t glusterd_state_req_sent_rcvd [] = { - {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none}, //EVENT_NONE, - {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none}, //EVENT_PROBE, - {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none}, //EVENT_INIT_FRIEND_REQ, - {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_send_friend_update}, //EVENT_RCVD_ACC - {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none}, //EVENT_RCVD_LOCAL_ACC - {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, //EVENT_RCVD_RJT - {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none}, //EVENT_RCVD_LOCAL_RJT - {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none}, //EVENT_RCV_FRIEND_REQ - {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_send_friend_remove_req}, //EVENT_INIT_REMOVE_FRIEND, - {GD_FRIEND_STATE_DEFAULT, glusterd_ac_handle_friend_remove_req}, //EVENT_RCVD_REMOVE_FRIEND - {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_remove}, //EVENT_REMOVE_FRIEND - {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none},//EVENT_CONNECTED - {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none},//EVENT_MAX +glusterd_sm_t glusterd_state_befriended[] = { + {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_none}, // EVENT_NONE, + {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_none}, // EVENT_PROBE, + {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_none}, // EVENT_INIT_FRIEND_REQ, + {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_update_friend}, // EVENT_RCVD_ACC + {GD_FRIEND_STATE_BEFRIENDED, + glusterd_ac_update_friend}, // EVENT_RCVD_LOCAL_ACC + {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, // EVENT_RCVD_RJT + {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, // EVENT_RCVD_LOCAL_RJT + {GD_FRIEND_STATE_BEFRIENDED, + glusterd_ac_handle_friend_add_req}, // EVENT_RCV_FRIEND_REQ + {GD_FRIEND_STATE_UNFRIEND_SENT, + glusterd_ac_send_friend_remove_req}, // EVENT_INIT_REMOVE_FRIEND, + {GD_FRIEND_STATE_DEFAULT, + glusterd_ac_handle_friend_remove_req}, // EVENT_RCVD_REMOVE_FRIEND + {GD_FRIEND_STATE_DEFAULT, + glusterd_ac_friend_remove}, // EVENT_REMOVE_FRIEND + {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_friend_add}, // EVENT_CONNECTED + {GD_FRIEND_STATE_BEFRIENDED, + glusterd_ac_send_friend_update}, // EVENT_NEW_NAME + {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_none}, // EVENT_MAX }; -glusterd_sm_t glusterd_state_rejected [] = { - {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, //EVENT_NONE, - {GD_FRIEND_STATE_REJECTED, glusterd_ac_friend_probe}, //EVENT_PROBE, - {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_friend_add}, //EVENT_INIT_FRIEND_REQ, - {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_none}, //EVENT_RCVD_ACC - {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_none}, //EVENT_RCVD_LOCAL_ACC - {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, //EVENT_RCVD_RJT - {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, //EVENT_RCVD_LOCAL_RJT - {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_handle_friend_add_req}, //EVENT_RCV_FRIEND_REQ - {GD_FRIEND_STATE_DEFAULT, glusterd_ac_send_friend_remove_req}, //EVENT_INIT_REMOVE_FRIEND - {GD_FRIEND_STATE_DEFAULT, glusterd_ac_handle_friend_remove_req}, //EVENT_RCVD_REMOVE_FRIEND - {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_remove}, //EVENT_REMOVE_FRIEND - {GD_FRIEND_STATE_REJECTED, glusterd_ac_friend_add},//EVENT_CONNECTED - {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_none},//EVENT_MAX +glusterd_sm_t glusterd_state_req_sent_rcvd[] = { + {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none}, // EVENT_NONE, + {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none}, // EVENT_PROBE, + {GD_FRIEND_STATE_REQ_SENT_RCVD, + glusterd_ac_none}, // EVENT_INIT_FRIEND_REQ, + {GD_FRIEND_STATE_BEFRIENDED, + glusterd_ac_send_friend_update}, // EVENT_RCVD_ACC + {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none}, // EVENT_RCVD_LOCAL_ACC + {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, // EVENT_RCVD_RJT + {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none}, // EVENT_RCVD_LOCAL_RJT + {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none}, // EVENT_RCV_FRIEND_REQ + {GD_FRIEND_STATE_UNFRIEND_SENT, + glusterd_ac_send_friend_remove_req}, // EVENT_INIT_REMOVE_FRIEND, + {GD_FRIEND_STATE_DEFAULT, + glusterd_ac_handle_friend_remove_req}, // EVENT_RCVD_REMOVE_FRIEND + {GD_FRIEND_STATE_DEFAULT, + glusterd_ac_friend_remove}, // EVENT_REMOVE_FRIEND + {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none}, // EVENT_CONNECTED + {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none}, // EVENT_NEW_NAME + {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none}, // EVENT_MAX }; -glusterd_sm_t glusterd_state_req_accepted [] = { - {GD_FRIEND_STATE_REQ_ACCEPTED, glusterd_ac_none}, //EVENT_NONE, - {GD_FRIEND_STATE_REQ_ACCEPTED, glusterd_ac_none}, //EVENT_PROBE, - {GD_FRIEND_STATE_REQ_ACCEPTED, glusterd_ac_none}, //EVENT_INIT_FRIEND_REQ, - {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_send_friend_update}, //EVENT_RCVD_ACC - {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_send_friend_update}, //EVENT_RCVD_LOCAL_ACC - {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, //EVENT_RCVD_RJT - {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, //EVENT_RCVD_LOCAL_RJT - {GD_FRIEND_STATE_REQ_ACCEPTED, glusterd_ac_handle_friend_add_req}, //EVENT_RCV_FRIEND_REQ - {GD_FRIEND_STATE_REQ_ACCEPTED, glusterd_ac_send_friend_remove_req}, //EVENT_INIT_REMOVE_FRIEND - {GD_FRIEND_STATE_DEFAULT, glusterd_ac_handle_friend_remove_req}, //EVENT_RCVD_REMOVE_FRIEND - {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_remove}, //EVENT_REMOVE_FRIEND - {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_reverse_probe_begin},//EVENT_CONNECTED - {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none},//EVENT_MAX +glusterd_sm_t glusterd_state_rejected[] = { + {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, // EVENT_NONE, + {GD_FRIEND_STATE_REJECTED, glusterd_ac_friend_probe}, // EVENT_PROBE, + {GD_FRIEND_STATE_REQ_SENT, + glusterd_ac_friend_add}, // EVENT_INIT_FRIEND_REQ, + {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_none}, // EVENT_RCVD_ACC + {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_none}, // EVENT_RCVD_LOCAL_ACC + {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, // EVENT_RCVD_RJT + {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, // EVENT_RCVD_LOCAL_RJT + {GD_FRIEND_STATE_REQ_RCVD, + glusterd_ac_handle_friend_add_req}, // EVENT_RCV_FRIEND_REQ + {GD_FRIEND_STATE_DEFAULT, + glusterd_ac_send_friend_remove_req}, // EVENT_INIT_REMOVE_FRIEND + {GD_FRIEND_STATE_DEFAULT, + glusterd_ac_handle_friend_remove_req}, // EVENT_RCVD_REMOVE_FRIEND + {GD_FRIEND_STATE_DEFAULT, + glusterd_ac_friend_remove}, // EVENT_REMOVE_FRIEND + {GD_FRIEND_STATE_REJECTED, glusterd_ac_friend_add}, // EVENT_CONNECTED + {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, // EVENT_NEW_NAME + {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_none}, // EVENT_MAX }; -glusterd_sm_t glusterd_state_unfriend_sent [] = { - {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_none}, //EVENT_NONE, - {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_error}, //EVENT_PROBE, - {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_none}, //EVENT_INIT_FRIEND_REQ, - {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_none}, //EVENT_RCVD_ACC - {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_none}, //EVENT_RCVD_LOCAL_ACC - {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_error}, //EVENT_RCVD_RJT - {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_error}, //EVENT_RCVD_LOCAL_RJT - {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_error}, //EVENT_RCV_FRIEND_REQ - {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_none}, //EVENT_INIT_REMOVE_FRIEND - {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_none}, //EVENT_RCVD_REMOVE_FRIEND - {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_remove}, //EVENT_REMOVE_FRIEND - {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_none},//EVENT_CONNECTED - {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_none},//EVENT_MAX +glusterd_sm_t glusterd_state_req_accepted[] = { + {GD_FRIEND_STATE_REQ_ACCEPTED, glusterd_ac_none}, // EVENT_NONE, + {GD_FRIEND_STATE_REQ_ACCEPTED, glusterd_ac_none}, // EVENT_PROBE, + {GD_FRIEND_STATE_REQ_ACCEPTED, glusterd_ac_none}, // EVENT_INIT_FRIEND_REQ, + {GD_FRIEND_STATE_BEFRIENDED, + glusterd_ac_send_friend_update}, // EVENT_RCVD_ACC + {GD_FRIEND_STATE_BEFRIENDED, + glusterd_ac_send_friend_update}, // EVENT_RCVD_LOCAL_ACC + {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, // EVENT_RCVD_RJT + {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, // EVENT_RCVD_LOCAL_RJT + {GD_FRIEND_STATE_REQ_ACCEPTED, + glusterd_ac_handle_friend_add_req}, // EVENT_RCV_FRIEND_REQ + {GD_FRIEND_STATE_REQ_ACCEPTED, + glusterd_ac_send_friend_remove_req}, // EVENT_INIT_REMOVE_FRIEND + {GD_FRIEND_STATE_DEFAULT, + glusterd_ac_handle_friend_remove_req}, // EVENT_RCVD_REMOVE_FRIEND + {GD_FRIEND_STATE_DEFAULT, + glusterd_ac_friend_remove}, // EVENT_REMOVE_FRIEND + {GD_FRIEND_STATE_CONNECTED_ACCEPTED, + glusterd_ac_reverse_probe_begin}, // EVENT_CONNECTED + {GD_FRIEND_STATE_REQ_ACCEPTED, glusterd_ac_none}, // EVENT_NEW_NAME + {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none}, // EVENT_MAX }; -glusterd_sm_t *glusterd_friend_state_table [] = { - glusterd_state_default, - glusterd_state_req_sent, - glusterd_state_req_rcvd, - glusterd_state_befriended, - glusterd_state_req_accepted, - glusterd_state_req_sent_rcvd, - glusterd_state_rejected, - glusterd_state_unfriend_sent, - glusterd_state_probe_rcvd, - glusterd_state_connected_rcvd, - glusterd_state_connected_accepted +glusterd_sm_t glusterd_state_unfriend_sent[] = { + {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_none}, // EVENT_NONE, + {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_error}, // EVENT_PROBE, + {GD_FRIEND_STATE_UNFRIEND_SENT, + glusterd_ac_none}, // EVENT_INIT_FRIEND_REQ, + {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_none}, // EVENT_RCVD_ACC + {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_none}, // EVENT_RCVD_LOCAL_ACC + {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_error}, // EVENT_RCVD_RJT + {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_error}, // EVENT_RCVD_LOCAL_RJT + {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_error}, // EVENT_RCV_FRIEND_REQ + {GD_FRIEND_STATE_UNFRIEND_SENT, + glusterd_ac_none}, // EVENT_INIT_REMOVE_FRIEND + {GD_FRIEND_STATE_UNFRIEND_SENT, + glusterd_ac_none}, // EVENT_RCVD_REMOVE_FRIEND + {GD_FRIEND_STATE_DEFAULT, + glusterd_ac_friend_remove}, // EVENT_REMOVE_FRIEND + {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_none}, // EVENT_CONNECTED + {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_none}, // EVENT_NEW_NAME + {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_none}, // EVENT_MAX }; +glusterd_sm_t *glusterd_friend_state_table[] = { + glusterd_state_default, glusterd_state_req_sent, + glusterd_state_req_rcvd, glusterd_state_befriended, + glusterd_state_req_accepted, glusterd_state_req_sent_rcvd, + glusterd_state_rejected, glusterd_state_unfriend_sent, + glusterd_state_probe_rcvd, glusterd_state_connected_rcvd, + glusterd_state_connected_accepted}; + int -glusterd_friend_sm_new_event (glusterd_friend_sm_event_type_t event_type, - glusterd_friend_sm_event_t **new_event) +glusterd_friend_sm_new_event(glusterd_friend_sm_event_type_t event_type, + glusterd_friend_sm_event_t **new_event) { - glusterd_friend_sm_event_t *event = NULL; + glusterd_friend_sm_event_t *event = NULL; - GF_ASSERT (new_event); - GF_ASSERT (GD_FRIEND_EVENT_NONE <= event_type && - GD_FRIEND_EVENT_MAX > event_type); + GF_ASSERT(new_event); + GF_ASSERT(GD_FRIEND_EVENT_NONE <= event_type && + GD_FRIEND_EVENT_MAX > event_type); - event = GF_CALLOC (1, sizeof (*event), gf_gld_mt_friend_sm_event_t); + event = GF_CALLOC(1, sizeof(*event), gf_gld_mt_friend_sm_event_t); - if (!event) - return -1; + if (!event) + return -1; - *new_event = event; - event->event = event_type; - INIT_LIST_HEAD (&event->list); + *new_event = event; + event->event = event_type; + CDS_INIT_LIST_HEAD(&event->list); - return 0; + return 0; } int -glusterd_friend_sm_inject_event (glusterd_friend_sm_event_t *event) +glusterd_friend_sm_inject_event(glusterd_friend_sm_event_t *event) { - GF_ASSERT (event); - gf_log ("glusterd", GF_LOG_DEBUG, "Enqueuing event: '%s'", - glusterd_friend_sm_event_name_get (event->event)); - list_add_tail (&event->list, &gd_friend_sm_queue); + GF_ASSERT(event); + gf_msg_debug("glusterd", 0, "Enqueue event: '%s'", + glusterd_friend_sm_event_name_get(event->event)); + cds_list_add_tail(&event->list, &gd_friend_sm_queue); - return 0; + return 0; } void -glusterd_destroy_friend_event_context (glusterd_friend_sm_event_t *event) +glusterd_destroy_friend_event_context(glusterd_friend_sm_event_t *event) { - if (!event) - return; + if (!event) + return; - switch (event->event) { + switch (event->event) { case GD_FRIEND_EVENT_RCVD_FRIEND_REQ: case GD_FRIEND_EVENT_RCVD_REMOVE_FRIEND: - glusterd_destroy_friend_req_ctx (event->ctx); - break; + glusterd_destroy_friend_req_ctx(event->ctx); + break; case GD_FRIEND_EVENT_LOCAL_ACC: case GD_FRIEND_EVENT_LOCAL_RJT: case GD_FRIEND_EVENT_RCVD_ACC: case GD_FRIEND_EVENT_RCVD_RJT: - glusterd_destroy_friend_update_ctx (event->ctx); - break; + glusterd_destroy_friend_update_ctx(event->ctx); + break; default: - break; - } + break; + } +} + +gf_boolean_t +gd_does_peer_affect_quorum(glusterd_friend_sm_state_t old_state, + glusterd_friend_sm_event_type_t event_type, + glusterd_peerinfo_t *peerinfo) +{ + gf_boolean_t affects = _gf_false; + + // When glusterd comes up with friends in BEFRIENDED state in store, + // wait until compare-data happens. + if ((old_state == GD_FRIEND_STATE_BEFRIENDED) && + (event_type != GD_FRIEND_EVENT_RCVD_ACC) && + (event_type != GD_FRIEND_EVENT_LOCAL_ACC)) + goto out; + if ((peerinfo->state.state == GD_FRIEND_STATE_BEFRIENDED) && + peerinfo->connected) { + affects = _gf_true; + } +out: + return affects; } int -glusterd_friend_sm () +glusterd_friend_sm() { - glusterd_friend_sm_event_t *event = NULL; - glusterd_friend_sm_event_t *tmp = NULL; - int ret = -1; - glusterd_friend_sm_ac_fn handler = NULL; - glusterd_sm_t *state = NULL; - glusterd_peerinfo_t *peerinfo = NULL; - glusterd_friend_sm_event_type_t event_type = 0; - gf_boolean_t is_await_conn = _gf_false; - - while (!list_empty (&gd_friend_sm_queue)) { - list_for_each_entry_safe (event, tmp, &gd_friend_sm_queue, list) { - - list_del_init (&event->list); - event_type = event->event; - peerinfo = event->peerinfo; - if (!peerinfo) { - gf_log ("glusterd", GF_LOG_CRITICAL, "Received" - " event %s with empty peer info", - glusterd_friend_sm_event_name_get (event_type)); - - GF_FREE (event); - continue; - } - gf_log ("", GF_LOG_DEBUG, "Dequeued event of type: '%s'", - glusterd_friend_sm_event_name_get (event_type)); - - - state = glusterd_friend_state_table[peerinfo->state.state]; - - GF_ASSERT (state); - - handler = state[event_type].handler; - GF_ASSERT (handler); - - ret = handler (event, event->ctx); - if (ret == GLUSTERD_CONNECTION_AWAITED) { - is_await_conn = _gf_true; - ret = 0; - } - - if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, "handler returned: " - "%d", ret); - glusterd_destroy_friend_event_context (event); - GF_FREE (event); - continue; - } - - if ((GD_FRIEND_EVENT_REMOVE_FRIEND == event_type) || - (GD_FRIEND_EVENT_INIT_REMOVE_FRIEND == event_type)){ - glusterd_destroy_friend_event_context (event); - GF_FREE (event); - continue; - } - - ret = glusterd_friend_sm_transition_state (peerinfo, - state, event_type); - - if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, "Unable to transition" - " state from '%s' to '%s' for event '%s'", - glusterd_friend_sm_state_name_get(peerinfo->state.state), - glusterd_friend_sm_state_name_get(state[event_type].next_state), - glusterd_friend_sm_event_name_get(event_type)); - goto out; - } - - ret = glusterd_store_peerinfo (peerinfo); - - glusterd_destroy_friend_event_context (event); - GF_FREE (event); - if (is_await_conn) - break; + glusterd_friend_sm_event_t *event = NULL; + glusterd_friend_sm_event_t *tmp = NULL; + int ret = -1; + glusterd_friend_sm_ac_fn handler = NULL; + glusterd_sm_t *state = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_friend_sm_event_type_t event_type = 0; + gf_boolean_t is_await_conn = _gf_false; + gf_boolean_t quorum_action = _gf_false; + glusterd_friend_sm_state_t old_state = GD_FRIEND_STATE_DEFAULT; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + while (!cds_list_empty(&gd_friend_sm_queue)) { + cds_list_for_each_entry_safe(event, tmp, &gd_friend_sm_queue, list) + { + cds_list_del_init(&event->list); + event_type = event->event; + + RCU_READ_LOCK; + + peerinfo = glusterd_peerinfo_find(event->peerid, event->peername); + if (!peerinfo) { + RCU_READ_UNLOCK; + gf_msg("glusterd", GF_LOG_CRITICAL, 0, GD_MSG_PEER_NOT_FOUND, + "Received" + " event %s with empty peer info", + glusterd_friend_sm_event_name_get(event_type)); + + GF_FREE(event); + continue; + } + old_state = peerinfo->state.state; + RCU_READ_UNLOCK; + gf_msg_debug("glusterd", 0, "Dequeued event of type: '%s'", + glusterd_friend_sm_event_name_get(event_type)); + + /* Giving up read-critical section here as we only need + * the current state to call the handler. + * + * We cannot continue into the handler in a read + * critical section as there are handlers who do + * updates, and could cause deadlocks. + */ + + state = glusterd_friend_state_table[old_state]; + + GF_ASSERT(state); + + handler = state[event_type].handler; + GF_ASSERT(handler); + + ret = handler(event, event->ctx); + if (ret == GLUSTERD_CONNECTION_AWAITED) { + is_await_conn = _gf_true; + ret = 0; + } + + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_HANDLER_RETURNED, + "handler returned: " + "%d", + ret); + glusterd_destroy_friend_event_context(event); + GF_FREE(event); + continue; + } + + if ((GD_FRIEND_EVENT_REMOVE_FRIEND == event_type) || + (GD_FRIEND_EVENT_INIT_REMOVE_FRIEND == event_type)) { + glusterd_destroy_friend_event_context(event); + GF_FREE(event); + continue; + } + + ret = glusterd_friend_sm_transition_state( + event->peerid, event->peername, state, event_type); + + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, + GD_MSG_EVENT_STATE_TRANSITION_FAIL, + "Unable to transition" + " state from '%s' to '%s' for event '%s'", + glusterd_friend_sm_state_name_get(old_state), + glusterd_friend_sm_state_name_get( + state[event_type].next_state), + glusterd_friend_sm_event_name_get(event_type)); + goto out; + } + + peerinfo = NULL; + /* We need to obtain peerinfo reference once again as we + * had exited the read critical section above. + */ + RCU_READ_LOCK; + peerinfo = glusterd_peerinfo_find(event->peerid, event->peername); + if (!peerinfo) { + RCU_READ_UNLOCK; + /* A peer can only be deleted as a effect of + * this state machine, and two such state + * machines can never run at the same time. + * So if we cannot find the peerinfo here, + * something has gone terribly wrong. + */ + ret = -1; + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_PEER_NOT_FOUND, + "Cannot find peer %s(%s)", event->peername, + uuid_utoa(event->peerid)); + goto out; + } + if (gd_does_peer_affect_quorum(old_state, event_type, peerinfo)) { + peerinfo->quorum_contrib = QUORUM_UP; + if (peerinfo->quorum_action) { + peerinfo->quorum_action = _gf_false; + quorum_action = _gf_true; } - if (is_await_conn) - break; + } + + ret = glusterd_store_peerinfo(peerinfo); + RCU_READ_UNLOCK; + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PEERINFO_CREATE_FAIL, + "Failed to store peerinfo"); + } + + glusterd_destroy_friend_event_context(event); + GF_FREE(event); + if (is_await_conn) + break; } + if (is_await_conn) + break; + } - ret = 0; + ret = 0; out: - return ret; + if (quorum_action) { + /* When glusterd is restarted, it needs to wait until the 'friends' view + * of the volumes settle, before it starts any of the internal daemons. + * + * Every friend that was part of the cluster, would send its + * cluster-view, 'our' way. For every friend, who belongs to + * a partition which has a different cluster-view from our + * partition, we may update our cluster-view. For subsequent + * friends from that partition would agree with us, if the first + * friend wasn't rejected. For every first friend, whom we agreed with, + * we would need to start internal daemons/bricks belonging to the + * new volumes. + * glusterd_spawn_daemons calls functions that are idempotent. ie, + * the functions spawn process(es) only if they are not started yet. + * + * */ + synclock_unlock(&priv->big_lock); + glusterd_launch_synctask(glusterd_spawn_daemons, NULL); + synclock_lock(&priv->big_lock); + glusterd_do_quorum_action(); + } + return ret; } - int -glusterd_friend_sm_init () +glusterd_friend_sm_init() { - INIT_LIST_HEAD (&gd_friend_sm_queue); - return 0; + CDS_INIT_LIST_HEAD(&gd_friend_sm_queue); + return 0; } diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.h b/xlators/mgmt/glusterd/src/glusterd-sm.h index f607b33d876..11cbd85b3e3 100644 --- a/xlators/mgmt/glusterd/src/glusterd-sm.h +++ b/xlators/mgmt/glusterd/src/glusterd-sm.h @@ -1,210 +1,216 @@ /* - Copyright (c) 2006-2010 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ #ifndef _GLUSTERD_SM_H_ #define _GLUSTERD_SM_H_ -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - #include <pthread.h> -#include "uuid.h" +#include <glusterfs/compat-uuid.h> #include "rpc-clnt.h" -#include "glusterfs.h" -#include "xlator.h" -#include "logging.h" -#include "call-stub.h" -#include "fd.h" -#include "byte-order.h" -//#include "glusterd.h" +#include <glusterfs/glusterfs.h> +#include <glusterfs/call-stub.h> +#include <glusterfs/byte-order.h> #include "rpcsvc.h" +#include <glusterfs/store.h> -struct glusterd_store_handle_ { - char *path; - int fd; - FILE *read; -}; +#include "glusterd-rcu.h" -typedef struct glusterd_store_handle_ glusterd_store_handle_t; +typedef enum gd_quorum_contribution_ { + QUORUM_NONE, + QUORUM_WAITING, + QUORUM_DOWN, + QUORUM_UP +} gd_quorum_contrib_t; typedef enum glusterd_friend_sm_state_ { - GD_FRIEND_STATE_DEFAULT = 0, - GD_FRIEND_STATE_REQ_SENT, - GD_FRIEND_STATE_REQ_RCVD, - GD_FRIEND_STATE_BEFRIENDED, - GD_FRIEND_STATE_REQ_ACCEPTED, - GD_FRIEND_STATE_REQ_SENT_RCVD, - GD_FRIEND_STATE_REJECTED, - GD_FRIEND_STATE_UNFRIEND_SENT, - GD_FRIEND_STATE_PROBE_RCVD, - GD_FRIEND_STATE_CONNECTED_RCVD, - GD_FRIEND_STATE_CONNECTED_ACCEPTED, - GD_FRIEND_STATE_MAX + GD_FRIEND_STATE_DEFAULT = 0, + GD_FRIEND_STATE_REQ_SENT, + GD_FRIEND_STATE_REQ_RCVD, + GD_FRIEND_STATE_BEFRIENDED, + GD_FRIEND_STATE_REQ_ACCEPTED, + GD_FRIEND_STATE_REQ_SENT_RCVD, + GD_FRIEND_STATE_REJECTED, + GD_FRIEND_STATE_UNFRIEND_SENT, + GD_FRIEND_STATE_PROBE_RCVD, + GD_FRIEND_STATE_CONNECTED_RCVD, + GD_FRIEND_STATE_CONNECTED_ACCEPTED, + GD_FRIEND_STATE_MAX } glusterd_friend_sm_state_t; typedef struct glusterd_peer_state_info_ { - glusterd_friend_sm_state_t state; - struct timeval transition_time; -}glusterd_peer_state_info_t; + glusterd_friend_sm_state_t state; + struct timeval transition_time; +} glusterd_peer_state_info_t; typedef struct glusterd_peer_hostname_ { - char *hostname; - struct list_head hostname_list; -}glusterd_peer_hostname_t; + char *hostname; + struct cds_list_head hostname_list; +} glusterd_peer_hostname_t; typedef struct glusterd_sm_transition_ { - int old_state; - int event; - int new_state; - time_t time; + int old_state; + int event; + int new_state; + time_t time; } glusterd_sm_transition_t; typedef struct glusterd_sm_tr_log_ { - glusterd_sm_transition_t *transitions; - size_t current; - size_t size; - size_t count; - char* (*state_name_get) (int); - char* (*event_name_get) (int); + glusterd_sm_transition_t *transitions; + size_t current; + size_t size; + size_t count; + char *(*state_name_get)(int); + char *(*event_name_get)(int); } glusterd_sm_tr_log_t; struct glusterd_peerinfo_ { - uuid_t uuid; - char uuid_str[50]; - glusterd_peer_state_info_t state; - char *hostname; - int port; - struct list_head uuid_list; - struct list_head op_peers_list; - struct rpc_clnt *rpc; - rpc_clnt_prog_t *mgmt; - int connected; - glusterd_store_handle_t *shandle; - glusterd_sm_tr_log_t sm_log; + uuid_t uuid; + char uuid_str[50]; /* Retrieve this using + * gd_peer_uuid_str () + */ + glusterd_peer_state_info_t state; + char *hostname; + struct cds_list_head hostnames; + int port; + struct cds_list_head uuid_list; + struct cds_list_head op_peers_list; + struct rpc_clnt *rpc; + rpc_clnt_prog_t *mgmt; + rpc_clnt_prog_t *peer; + rpc_clnt_prog_t *mgmt_v3; + int connected; + gf_store_handle_t *shandle; + glusterd_sm_tr_log_t sm_log; + gf_boolean_t quorum_action; + gd_quorum_contrib_t quorum_contrib; + gf_boolean_t locked; + gf_boolean_t detaching; + /* Members required for proper cleanup using RCU */ + gd_rcu_head rcu_head; + pthread_mutex_t delete_lock; + uint32_t generation; }; typedef struct glusterd_peerinfo_ glusterd_peerinfo_t; +typedef struct glusterd_local_peers_ { + glusterd_peerinfo_t *peerinfo; + struct cds_list_head op_peers_list; +} glusterd_local_peers_t; + typedef enum glusterd_ev_gen_mode_ { - GD_MODE_OFF, - GD_MODE_ON, - GD_MODE_SWITCH_ON + GD_MODE_OFF, + GD_MODE_ON, + GD_MODE_SWITCH_ON } glusterd_ev_gen_mode_t; typedef struct glusterd_peer_ctx_args_ { - rpcsvc_request_t *req; - glusterd_ev_gen_mode_t mode; + rpcsvc_request_t *req; + glusterd_ev_gen_mode_t mode; + dict_t *dict; } glusterd_peerctx_args_t; typedef struct glusterd_peer_ctx_ { - glusterd_peerctx_args_t args; - glusterd_peerinfo_t *peerinfo; + glusterd_peerctx_args_t args; + uuid_t peerid; + char *peername; + uint32_t peerinfo_gen; + char *errstr; } glusterd_peerctx_t; typedef enum glusterd_friend_sm_event_type_ { - GD_FRIEND_EVENT_NONE = 0, - GD_FRIEND_EVENT_PROBE, - GD_FRIEND_EVENT_INIT_FRIEND_REQ, - GD_FRIEND_EVENT_RCVD_ACC, - GD_FRIEND_EVENT_LOCAL_ACC, - GD_FRIEND_EVENT_RCVD_RJT, - GD_FRIEND_EVENT_LOCAL_RJT, - GD_FRIEND_EVENT_RCVD_FRIEND_REQ, - GD_FRIEND_EVENT_INIT_REMOVE_FRIEND, - GD_FRIEND_EVENT_RCVD_REMOVE_FRIEND, - GD_FRIEND_EVENT_REMOVE_FRIEND, - GD_FRIEND_EVENT_CONNECTED, - GD_FRIEND_EVENT_MAX + GD_FRIEND_EVENT_NONE = 0, + GD_FRIEND_EVENT_PROBE, + GD_FRIEND_EVENT_INIT_FRIEND_REQ, + GD_FRIEND_EVENT_RCVD_ACC, + GD_FRIEND_EVENT_LOCAL_ACC, + GD_FRIEND_EVENT_RCVD_RJT, + GD_FRIEND_EVENT_LOCAL_RJT, + GD_FRIEND_EVENT_RCVD_FRIEND_REQ, + GD_FRIEND_EVENT_INIT_REMOVE_FRIEND, + GD_FRIEND_EVENT_RCVD_REMOVE_FRIEND, + GD_FRIEND_EVENT_REMOVE_FRIEND, + GD_FRIEND_EVENT_CONNECTED, + GD_FRIEND_EVENT_NEW_NAME, + GD_FRIEND_EVENT_MAX } glusterd_friend_sm_event_type_t; - typedef enum glusterd_friend_update_op_ { - GD_FRIEND_UPDATE_NONE = 0, - GD_FRIEND_UPDATE_ADD, - GD_FRIEND_UPDATE_DEL, + GD_FRIEND_UPDATE_NONE = 0, + GD_FRIEND_UPDATE_ADD, + GD_FRIEND_UPDATE_DEL, } glusterd_friend_update_op_t; - struct glusterd_friend_sm_event_ { - struct list_head list; - glusterd_peerinfo_t *peerinfo; - void *ctx; - glusterd_friend_sm_event_type_t event; + struct cds_list_head list; + uuid_t peerid; + char *peername; + void *ctx; + glusterd_friend_sm_event_type_t event; }; typedef struct glusterd_friend_sm_event_ glusterd_friend_sm_event_t; -typedef int (*glusterd_friend_sm_ac_fn) (glusterd_friend_sm_event_t *, void *); +typedef int (*glusterd_friend_sm_ac_fn)(glusterd_friend_sm_event_t *, void *); typedef struct glusterd_sm_ { - glusterd_friend_sm_state_t next_state; - glusterd_friend_sm_ac_fn handler; + glusterd_friend_sm_state_t next_state; + glusterd_friend_sm_ac_fn handler; } glusterd_sm_t; typedef struct glusterd_friend_req_ctx_ { - uuid_t uuid; - char *hostname; - rpcsvc_request_t *req; - int port; - dict_t *vols; + uuid_t uuid; + char *hostname; + rpcsvc_request_t *req; + int port; + dict_t *vols; } glusterd_friend_req_ctx_t; typedef struct glusterd_friend_update_ctx_ { - uuid_t uuid; - char *hostname; - int op; + uuid_t uuid; + char *hostname; + int op; } glusterd_friend_update_ctx_t; typedef struct glusterd_probe_ctx_ { - char *hostname; - rpcsvc_request_t *req; - int port; + char *hostname; + rpcsvc_request_t *req; + int port; + dict_t *dict; } glusterd_probe_ctx_t; int -glusterd_friend_sm_new_event (glusterd_friend_sm_event_type_t event_type, - glusterd_friend_sm_event_t **new_event); +glusterd_friend_sm_new_event(glusterd_friend_sm_event_type_t event_type, + glusterd_friend_sm_event_t **new_event); int -glusterd_friend_sm_inject_event (glusterd_friend_sm_event_t *event); +glusterd_friend_sm_inject_event(glusterd_friend_sm_event_t *event); int -glusterd_friend_sm_init (); +glusterd_friend_sm_init(); int -glusterd_friend_sm (); +glusterd_friend_sm(); void -glusterd_destroy_probe_ctx (glusterd_probe_ctx_t *ctx); +glusterd_destroy_probe_ctx(glusterd_probe_ctx_t *ctx); void -glusterd_destroy_friend_req_ctx (glusterd_friend_req_ctx_t *ctx); +glusterd_destroy_friend_req_ctx(glusterd_friend_req_ctx_t *ctx); -char* -glusterd_friend_sm_state_name_get (int state); +char * +glusterd_friend_sm_state_name_get(int state); -char* -glusterd_friend_sm_event_name_get (int event); +char * +glusterd_friend_sm_event_name_get(int event); int -glusterd_broadcast_friend_delete (char *hostname, uuid_t uuid); +glusterd_broadcast_friend_delete(char *hostname, uuid_t uuid); void -glusterd_destroy_friend_update_ctx (glusterd_friend_update_ctx_t *ctx); +glusterd_destroy_friend_update_ctx(glusterd_friend_update_ctx_t *ctx); #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-snapd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-snapd-svc-helper.c new file mode 100644 index 00000000000..42ef51b01b4 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-snapd-svc-helper.c @@ -0,0 +1,75 @@ +/* + Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include "glusterd.h" +#include "glusterd-utils.h" +#include "glusterd-snapd-svc-helper.h" + +void +glusterd_svc_build_snapd_rundir(glusterd_volinfo_t *volinfo, char *path, + int path_len) +{ + char workdir[PATH_MAX] = { + 0, + }; + glusterd_conf_t *priv = THIS->private; + + GLUSTERD_GET_VOLUME_PID_DIR(workdir, volinfo, priv); + snprintf(path, path_len, "%s", workdir); +} + +void +glusterd_svc_build_snapd_socket_filepath(glusterd_volinfo_t *volinfo, + char *path, int path_len) +{ + char sockfilepath[PATH_MAX] = { + 0, + }; + char rundir[PATH_MAX] = { + 0, + }; + int32_t len = 0; + + glusterd_svc_build_snapd_rundir(volinfo, rundir, sizeof(rundir)); + len = snprintf(sockfilepath, sizeof(sockfilepath), "%s/run-%s", rundir, + uuid_utoa(MY_UUID)); + if ((len < 0) || (len >= sizeof(sockfilepath))) { + sockfilepath[0] = 0; + } + + glusterd_set_socket_filepath(sockfilepath, path, path_len); +} + +void +glusterd_svc_build_snapd_pidfile(glusterd_volinfo_t *volinfo, char *path, + int path_len) +{ + char rundir[PATH_MAX] = { + 0, + }; + + glusterd_svc_build_snapd_rundir(volinfo, rundir, sizeof(rundir)); + + snprintf(path, path_len, "%s/%s-snapd.pid", rundir, volinfo->volname); +} + +void +glusterd_svc_build_snapd_volfile(glusterd_volinfo_t *volinfo, char *path, + int path_len) +{ + char workdir[PATH_MAX] = { + 0, + }; + glusterd_conf_t *priv = THIS->private; + + GLUSTERD_GET_VOLUME_DIR(workdir, volinfo, priv); + + snprintf(path, path_len, "%s/%s-snapd.vol", workdir, volinfo->volname); +} diff --git a/xlators/mgmt/glusterd/src/glusterd-snapd-svc-helper.h b/xlators/mgmt/glusterd/src/glusterd-snapd-svc-helper.h new file mode 100644 index 00000000000..3e23c2ce942 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-snapd-svc-helper.h @@ -0,0 +1,32 @@ +/* + Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _GLUSTERD_SNAPD_SVC_HELPER_H_ +#define _GLUSTERD_SNAPD_SVC_HELPER_H_ + +#include "glusterd.h" + +void +glusterd_svc_build_snapd_rundir(glusterd_volinfo_t *volinfo, char *path, + int path_len); + +void +glusterd_svc_build_snapd_socket_filepath(glusterd_volinfo_t *volinfo, + char *path, int path_len); + +void +glusterd_svc_build_snapd_pidfile(glusterd_volinfo_t *volinfo, char *path, + int path_len); + +void +glusterd_svc_build_snapd_volfile(glusterd_volinfo_t *volinfo, char *path, + int path_len); + +#endif diff --git a/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c b/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c new file mode 100644 index 00000000000..d75f249b29e --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c @@ -0,0 +1,478 @@ +/* + Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include <glusterfs/globals.h> +#include <glusterfs/run.h> +#include "glusterd-utils.h" +#include "glusterd-volgen.h" +#include "glusterd-messages.h" +#include "glusterd-svc-mgmt.h" +#include "glusterd-svc-helper.h" +#include "glusterd-conn-mgmt.h" +#include "glusterd-proc-mgmt.h" +#include "glusterd-snapd-svc.h" +#include "glusterd-snapd-svc-helper.h" +#include "glusterd-snapshot-utils.h" +#include <glusterfs/syscall.h> + +char *snapd_svc_name = "snapd"; + +static void +glusterd_svc_build_snapd_logdir(char *logdir, char *volname, size_t len) +{ + glusterd_conf_t *priv = THIS->private; + snprintf(logdir, len, "%s/snaps/%s", priv->logdir, volname); +} + +static void +glusterd_svc_build_snapd_logfile(char *logfile, char *logdir, size_t len) +{ + snprintf(logfile, len, "%s/snapd.log", logdir); +} + +void +glusterd_snapdsvc_build(glusterd_svc_t *svc) +{ + svc->manager = glusterd_snapdsvc_manager; + svc->start = glusterd_snapdsvc_start; + svc->stop = glusterd_svc_stop; +} + +int +glusterd_snapdsvc_init(void *data) +{ + int ret = -1; + char rundir[PATH_MAX] = { + 0, + }; + char sockpath[PATH_MAX] = { + 0, + }; + char pidfile[PATH_MAX] = { + 0, + }; + char volfile[PATH_MAX] = { + 0, + }; + char logdir[PATH_MAX] = { + 0, + }; + char logfile[PATH_MAX] = { + 0, + }; + char volfileid[256] = {0}; + glusterd_svc_t *svc = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_conf_t *priv = NULL; + glusterd_conn_notify_t notify = NULL; + xlator_t *this = NULL; + char *volfileserver = NULL; + int32_t len = 0; + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + GF_ASSERT(priv); + + volinfo = data; + + svc = &(volinfo->snapd.svc); + + ret = snprintf(svc->name, sizeof(svc->name), "%s", snapd_svc_name); + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); + goto out; + } + + notify = glusterd_snapdsvc_rpc_notify; + + glusterd_svc_build_snapd_rundir(volinfo, rundir, sizeof(rundir)); + glusterd_svc_create_rundir(rundir); + + /* Initialize the connection mgmt */ + glusterd_svc_build_snapd_socket_filepath(volinfo, sockpath, + sizeof(sockpath)); + ret = glusterd_conn_init(&(svc->conn), sockpath, 600, notify); + if (ret) + goto out; + + /* Initialize the process mgmt */ + glusterd_svc_build_snapd_pidfile(volinfo, pidfile, sizeof(pidfile)); + glusterd_svc_build_snapd_volfile(volinfo, volfile, sizeof(volfile)); + glusterd_svc_build_snapd_logdir(logdir, volinfo->volname, sizeof(logdir)); + ret = mkdir_p(logdir, 0755, _gf_true); + if ((ret == -1) && (EEXIST != errno)) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_CREATE_DIR_FAILED, + "Unable to create logdir %s", logdir); + goto out; + } + glusterd_svc_build_snapd_logfile(logfile, logdir, sizeof(logfile)); + len = snprintf(volfileid, sizeof(volfileid), "snapd/%s", volinfo->volname); + if ((len < 0) || (len >= sizeof(volfileid))) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); + ret = -1; + goto out; + } + + if (dict_get_str(this->options, "transport.socket.bind-address", + &volfileserver) != 0) { + volfileserver = "localhost"; + } + ret = glusterd_proc_init(&(svc->proc), snapd_svc_name, pidfile, logdir, + logfile, volfile, volfileid, volfileserver); + if (ret) + goto out; + +out: + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; +} + +int +glusterd_snapdsvc_manager(glusterd_svc_t *svc, void *data, int flags) +{ + int ret = 0; + xlator_t *this = THIS; + glusterd_volinfo_t *volinfo = NULL; + + volinfo = data; + + if (!svc->inited) { + ret = glusterd_snapdsvc_init(volinfo); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SNAPD_INIT_FAIL, + "Failed to initialize " + "snapd service for volume %s", + volinfo->volname); + goto out; + } else { + svc->inited = _gf_true; + gf_msg_debug(THIS->name, 0, + "snapd service " + "initialized"); + } + } + + ret = glusterd_is_snapd_enabled(volinfo); + if (ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, + "Failed to read volume " + "options"); + goto out; + } + + if (ret) { + if (!glusterd_is_volume_started(volinfo)) { + if (glusterd_proc_is_running(&svc->proc)) { + ret = svc->stop(svc, SIGTERM); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPD_STOP_FAIL, + "Couldn't stop snapd for " + "volume: %s", + volinfo->volname); + } else { + /* Since snapd is not running set ret to 0 */ + ret = 0; + } + goto out; + } + + ret = glusterd_snapdsvc_create_volfile(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPD_CREATE_FAIL, + "Couldn't create " + "snapd volfile for volume: %s", + volinfo->volname); + goto out; + } + + ret = svc->start(svc, flags); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPD_START_FAIL, + "Couldn't start " + "snapd for volume: %s", + volinfo->volname); + goto out; + } + + glusterd_volinfo_ref(volinfo); + ret = glusterd_conn_connect(&(svc->conn)); + if (ret) { + glusterd_volinfo_unref(volinfo); + goto out; + } + + } else if (glusterd_proc_is_running(&svc->proc)) { + ret = svc->stop(svc, SIGTERM); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPD_STOP_FAIL, + "Couldn't stop snapd for volume: %s", volinfo->volname); + goto out; + } + volinfo->snapd.port = 0; + } + +out: + if (ret) { + gf_event(EVENT_SVC_MANAGER_FAILED, "volume=%s;svc_name=%s", + volinfo->volname, svc->name); + } + gf_msg_debug(THIS->name, 0, "Returning %d", ret); + + return ret; +} + +int32_t +glusterd_snapdsvc_start(glusterd_svc_t *svc, int flags) +{ + int ret = -1; + runner_t runner = { + 0, + }; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + char valgrind_logfile[PATH_MAX] = {0}; + int snapd_port = 0; + char msg[1024] = { + 0, + }; + char snapd_id[PATH_MAX] = { + 0, + }; + glusterd_volinfo_t *volinfo = NULL; + glusterd_snapdsvc_t *snapd = NULL; + char *localtime_logging = NULL; + int32_t len = 0; + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + GF_ASSERT(priv); + + if (glusterd_proc_is_running(&svc->proc)) { + ret = 0; + goto out; + } + + /* Get volinfo->snapd from svc object */ + snapd = cds_list_entry(svc, glusterd_snapdsvc_t, svc); + if (!snapd) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPD_OBJ_GET_FAIL, + "Failed to get snapd object " + "from snapd service"); + goto out; + } + + /* Get volinfo from snapd */ + volinfo = cds_list_entry(snapd, glusterd_volinfo_t, snapd); + if (!volinfo) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, + "Failed to get volinfo from " + "from snapd"); + goto out; + } + + ret = sys_access(svc->proc.volfile, F_OK); + if (ret) { + gf_msg(this->name, GF_LOG_DEBUG, 0, GD_MSG_VOLINFO_GET_FAIL, + "snapd Volfile %s is not present", svc->proc.volfile); + /* If glusterd is down on one of the nodes and during + * that time "USS is enabled" for the first time. After some + * time when the glusterd which was down comes back it tries + * to look for the snapd volfile and it does not find snapd + * volfile and because of this starting of snapd fails. + * Therefore, if volfile is not present then create a fresh + * volfile. + */ + ret = glusterd_snapdsvc_create_volfile(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL, + "Couldn't create " + "snapd volfile for volume: %s", + volinfo->volname); + goto out; + } + } + runinit(&runner); + + if (this->ctx->cmd_args.vgtool != _gf_none) { + len = snprintf(valgrind_logfile, PATH_MAX, "%s/valgrind-snapd.log", + svc->proc.logdir); + if ((len < 0) || (len >= PATH_MAX)) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); + ret = -1; + goto out; + } + + if (this->ctx->cmd_args.vgtool == _gf_memcheck) + runner_add_args(&runner, "valgrind", "--leak-check=full", + "--trace-children=yes", "--track-origins=yes", + NULL); + else + runner_add_args(&runner, "valgrind", "--tool=drd", NULL); + + runner_argprintf(&runner, "--log-file=%s", valgrind_logfile); + } + + snprintf(snapd_id, sizeof(snapd_id), "snapd-%s", volinfo->volname); + runner_add_args(&runner, SBIN_DIR "/glusterfsd", "-s", + svc->proc.volfileserver, "--volfile-id", + svc->proc.volfileid, "-p", svc->proc.pidfile, "-l", + svc->proc.logfile, "--brick-name", snapd_id, "-S", + svc->conn.sockpath, "--process-name", svc->name, NULL); + if (dict_get_str(priv->opts, GLUSTERD_LOCALTIME_LOGGING_KEY, + &localtime_logging) == 0) { + if (strcmp(localtime_logging, "enable") == 0) + runner_add_arg(&runner, "--localtime-logging"); + } + + snapd_port = pmap_assign_port(THIS, volinfo->snapd.port, snapd_id); + if (!snapd_port) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PORTS_EXHAUSTED, + "All the ports in the range are exhausted, can't start " + "snapd for volume %s", + volinfo->volname); + ret = -1; + goto out; + } + + volinfo->snapd.port = snapd_port; + + runner_add_arg(&runner, "--brick-port"); + runner_argprintf(&runner, "%d", snapd_port); + runner_add_arg(&runner, "--xlator-option"); + runner_argprintf(&runner, "%s-server.listen-port=%d", volinfo->volname, + snapd_port); + runner_add_arg(&runner, "--no-mem-accounting"); + + snprintf(msg, sizeof(msg), "Starting the snapd service for volume %s", + volinfo->volname); + runner_log(&runner, this->name, GF_LOG_DEBUG, msg); + + if (flags == PROC_START_NO_WAIT) { + ret = runner_run_nowait(&runner); + } else { + synclock_unlock(&priv->big_lock); + { + ret = runner_run(&runner); + } + synclock_lock(&priv->big_lock); + } + +out: + return ret; +} + +int +glusterd_snapdsvc_restart() +{ + glusterd_volinfo_t *volinfo = NULL; + glusterd_volinfo_t *tmp = NULL; + int ret = 0; + xlator_t *this = THIS; + glusterd_conf_t *conf = NULL; + glusterd_svc_t *svc = NULL; + + GF_ASSERT(this); + + conf = this->private; + GF_ASSERT(conf); + + cds_list_for_each_entry_safe(volinfo, tmp, &conf->volumes, vol_list) + { + /* Start per volume snapd svc */ + if (volinfo->status == GLUSTERD_STATUS_STARTED) { + svc = &(volinfo->snapd.svc); + ret = svc->manager(svc, volinfo, PROC_START_NO_WAIT); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPD_START_FAIL, + "Couldn't resolve snapd for " + "vol: %s on restart", + volinfo->volname); + gf_event(EVENT_SVC_MANAGER_FAILED, "volume=%s;svc_name=%s", + volinfo->volname, svc->name); + goto out; + } + } + } +out: + return ret; +} + +int +glusterd_snapdsvc_rpc_notify(glusterd_conn_t *conn, rpc_clnt_event_t event) +{ + int ret = 0; + glusterd_svc_t *svc = NULL; + xlator_t *this = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_snapdsvc_t *snapd = NULL; + + this = THIS; + GF_ASSERT(this); + + svc = cds_list_entry(conn, glusterd_svc_t, conn); + if (!svc) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_GET_FAIL, + "Failed to get the service"); + return -1; + } + snapd = cds_list_entry(svc, glusterd_snapdsvc_t, svc); + if (!snapd) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPD_OBJ_GET_FAIL, + "Failed to get the " + "snapd object"); + return -1; + } + + volinfo = cds_list_entry(snapd, glusterd_volinfo_t, snapd); + if (!volinfo) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, + "Failed to get the " + "volinfo object"); + return -1; + } + + switch (event) { + case RPC_CLNT_CONNECT: + gf_msg_debug(this->name, 0, + "%s has connected with " + "glusterd.", + svc->name); + gf_event(EVENT_SVC_CONNECTED, "volume=%s;svc_name=%s", + volinfo->volname, svc->name); + svc->online = _gf_true; + break; + + case RPC_CLNT_DISCONNECT: + if (svc->online) { + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_NODE_DISCONNECTED, + "%s has disconnected " + "from glusterd.", + svc->name); + gf_event(EVENT_SVC_DISCONNECTED, "volume=%s;svc_name=%s", + volinfo->volname, svc->name); + svc->online = _gf_false; + } + break; + + case RPC_CLNT_DESTROY: + glusterd_volinfo_unref(volinfo); + break; + + default: + gf_msg_trace(this->name, 0, "got some other RPC event %d", event); + break; + } + + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-snapd-svc.h b/xlators/mgmt/glusterd/src/glusterd-snapd-svc.h new file mode 100644 index 00000000000..e15dbf54315 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-snapd-svc.h @@ -0,0 +1,42 @@ +/* + Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _GLUSTERD_SNAPD_SVC_H_ +#define _GLUSTERD_SNAPD_SVC_H_ + +#include "glusterd-svc-mgmt.h" + +typedef struct glusterd_snapdsvc_ glusterd_snapdsvc_t; + +struct glusterd_snapdsvc_ { + glusterd_svc_t svc; + gf_store_handle_t *handle; + int port; +}; + +void +glusterd_snapdsvc_build(glusterd_svc_t *svc); + +int +glusterd_snapdsvc_init(void *data); + +int +glusterd_snapdsvc_manager(glusterd_svc_t *svc, void *data, int flags); + +int +glusterd_snapdsvc_start(glusterd_svc_t *svc, int flags); + +int +glusterd_snapdsvc_restart(); + +int +glusterd_snapdsvc_rpc_notify(glusterd_conn_t *conn, rpc_clnt_event_t event); + +#endif diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c new file mode 100644 index 00000000000..995268b796d --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c @@ -0,0 +1,4290 @@ +/* + Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#include <inttypes.h> + +#if defined(GF_LINUX_HOST_OS) +#include <mntent.h> +#else +#include "mntent_compat.h" +#endif +#include <dlfcn.h> + +#include <glusterfs/dict.h> +#include <glusterfs/syscall.h> +#include "glusterd-op-sm.h" +#include "glusterd-utils.h" +#include "glusterd-messages.h" +#include "glusterd-store.h" +#include "glusterd-volgen.h" +#include "glusterd-snapd-svc.h" +#include "glusterd-svc-helper.h" +#include "glusterd-snapd-svc-helper.h" +#include "glusterd-snapshot-utils.h" +#include "glusterd-server-quorum.h" +#include "glusterd-messages.h" +#include "glusterd-errno.h" + +/* + * glusterd_snap_geo_rep_restore: + * This function restores the atime and mtime of marker.tstamp + * if present from snapped marker.tstamp file. + */ + +int32_t +glusterd_snapobject_delete(glusterd_snap_t *snap) +{ + if (snap == NULL) { + gf_msg(THIS->name, GF_LOG_WARNING, 0, GD_MSG_PARAM_NULL, + "snap is NULL"); + return -1; + } + + cds_list_del_init(&snap->snap_list); + cds_list_del_init(&snap->volumes); + if (LOCK_DESTROY(&snap->lock)) + gf_msg(THIS->name, GF_LOG_WARNING, 0, GD_MSG_LOCK_DESTROY_FAILED, + "Failed destroying lock" + "of snap %s", + snap->snapname); + + GF_FREE(snap->description); + GF_FREE(snap); + + return 0; +} + +/* + * This function is to be called only from glusterd_peer_detach_cleanup() + * as this continues to delete snaps in spite of faiure while deleting + * one, as we don't want to fail peer_detach in such a case. + */ +int +glusterd_cleanup_snaps_for_volume(glusterd_volinfo_t *volinfo) +{ + int32_t op_ret = 0; + int32_t ret = 0; + xlator_t *this = NULL; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_volinfo_t *dummy_snap_vol = NULL; + glusterd_snap_t *snap = NULL; + + this = THIS; + GF_ASSERT(this); + + cds_list_for_each_entry_safe(snap_vol, dummy_snap_vol, + &volinfo->snap_volumes, snapvol_list) + { + snap = snap_vol->snapshot; + ret = glusterd_store_delete_snap(snap); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOL_DELETE_FAIL, + "Failed to remove " + "snap %s from store", + snap->snapname); + op_ret = ret; + continue; + } + + ret = glusterd_snapobject_delete(snap); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOL_DELETE_FAIL, + "Failed to delete " + "snap object %s", + snap->snapname); + op_ret = ret; + continue; + } + + ret = glusterd_store_delete_volume(snap_vol); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOL_DELETE_FAIL, + "Failed to remove " + "volume %s from store", + snap_vol->volname); + op_ret = ret; + continue; + } + + ret = glusterd_volinfo_delete(snap_vol); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOL_DELETE_FAIL, + "Failed to remove " + "volinfo %s ", + snap_vol->volname); + op_ret = ret; + continue; + } + } + + return op_ret; +} + +int +glusterd_snap_geo_rep_restore(glusterd_volinfo_t *snap_volinfo, + glusterd_volinfo_t *new_volinfo) +{ + char vol_tstamp_file[PATH_MAX] = { + 0, + }; + char snap_tstamp_file[PATH_MAX] = { + 0, + }; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + int geo_rep_indexing_on = 0; + int ret = 0; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(snap_volinfo); + GF_ASSERT(new_volinfo); + + priv = this->private; + GF_ASSERT(priv); + + /* Check if geo-rep indexing is enabled, if yes, we need restore + * back the mtime of 'marker.tstamp' file. + */ + geo_rep_indexing_on = glusterd_volinfo_get_boolean(new_volinfo, + VKEY_MARKER_XTIME); + if (geo_rep_indexing_on == -1) { + gf_msg_debug(this->name, 0, + "Failed" + " to check whether geo-rep-indexing enabled or not"); + ret = 0; + goto out; + } + + if (geo_rep_indexing_on == 1) { + GLUSTERD_GET_VOLUME_DIR(vol_tstamp_file, new_volinfo, priv); + strncat(vol_tstamp_file, "/marker.tstamp", + PATH_MAX - strlen(vol_tstamp_file) - 1); + GLUSTERD_GET_VOLUME_DIR(snap_tstamp_file, snap_volinfo, priv); + strncat(snap_tstamp_file, "/marker.tstamp", + PATH_MAX - strlen(snap_tstamp_file) - 1); + ret = gf_set_timestamp(snap_tstamp_file, vol_tstamp_file); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TSTAMP_SET_FAIL, + "Unable to set atime and mtime of %s as of %s", + vol_tstamp_file, snap_tstamp_file); + goto out; + } + } + +out: + return ret; +} + +/* This function will copy snap volinfo to the new + * passed volinfo and regenerate backend store files + * for the restored snap. + * + * @param new_volinfo new volinfo + * @param snap_volinfo volinfo of snap volume + * + * @return 0 on success and -1 on failure + * + * TODO: Duplicate all members of volinfo, e.g. geo-rep sync slaves + */ +int32_t +glusterd_snap_volinfo_restore(dict_t *dict, dict_t *rsp_dict, + glusterd_volinfo_t *new_volinfo, + glusterd_volinfo_t *snap_volinfo, + int32_t volcount) +{ + char *value = NULL; + char key[64] = ""; + int32_t brick_count = -1; + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_brickinfo_t *new_brickinfo = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(dict); + GF_ASSERT(rsp_dict); + + GF_VALIDATE_OR_GOTO(this->name, new_volinfo, out); + GF_VALIDATE_OR_GOTO(this->name, snap_volinfo, out); + + brick_count = 0; + cds_list_for_each_entry(brickinfo, &snap_volinfo->bricks, brick_list) + { + brick_count++; + ret = glusterd_brickinfo_new(&new_brickinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_NEW_INFO_FAIL, + "Failed to create " + "new brickinfo"); + goto out; + } + + /* Duplicate brickinfo */ + ret = glusterd_brickinfo_dup(brickinfo, new_brickinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_SET_INFO_FAIL, + "Failed to dup " + "brickinfo"); + goto out; + } + + /* Fetch values if present in dict These values won't + * be present in case of a missed restore. In that case + * it's fine to use the local node's value + */ + snprintf(key, sizeof(key), "snap%d.brick%d.path", volcount, + brick_count); + ret = dict_get_str(dict, key, &value); + if (!ret) + gf_strncpy(new_brickinfo->path, value, sizeof(new_brickinfo->path)); + + snprintf(key, sizeof(key), "snap%d.brick%d.snap_status", volcount, + brick_count); + ret = dict_get_int32(dict, key, &new_brickinfo->snap_status); + + snprintf(key, sizeof(key), "snap%d.brick%d.device_path", volcount, + brick_count); + ret = dict_get_str(dict, key, &value); + if (!ret) + gf_strncpy(new_brickinfo->device_path, value, + sizeof(new_brickinfo->device_path)); + + snprintf(key, sizeof(key), "snap%d.brick%d.fs_type", volcount, + brick_count); + ret = dict_get_str(dict, key, &value); + if (!ret) + gf_strncpy(new_brickinfo->fstype, value, + sizeof(new_brickinfo->fstype)); + + snprintf(key, sizeof(key), "snap%d.brick%d.mnt_opts", volcount, + brick_count); + ret = dict_get_str(dict, key, &value); + if (!ret) + gf_strncpy(new_brickinfo->mnt_opts, value, + sizeof(new_brickinfo->mnt_opts)); + + /* If the brick is not of this peer, or snapshot is missed * + * for the brick do not replace the xattr for it */ + if ((!gf_uuid_compare(brickinfo->uuid, MY_UUID)) && + (brickinfo->snap_status != -1)) { + /* We need to replace the volume id of all the bricks + * to the volume id of the origin volume. new_volinfo + * has the origin volume's volume id*/ + ret = sys_lsetxattr(new_brickinfo->path, GF_XATTR_VOL_ID_KEY, + new_volinfo->volume_id, + sizeof(new_volinfo->volume_id), XATTR_REPLACE); + if (ret == -1) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_SET_XATTR_FAIL, + "Attribute=%s, Path=%s, Reason=%s, Snap=%s", + GF_XATTR_VOL_ID_KEY, new_brickinfo->path, + strerror(errno), new_volinfo->volname, NULL); + goto out; + } + } + + /* If a snapshot is pending for this brick then + * restore should also be pending + */ + if (brickinfo->snap_status == -1) { + /* Adding missed delete to the dict */ + ret = glusterd_add_missed_snaps_to_dict( + rsp_dict, snap_volinfo, brickinfo, brick_count, + GF_SNAP_OPTION_TYPE_RESTORE); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_MISSEDSNAP_INFO_SET_FAIL, + "Failed to add missed snapshot info " + "for %s:%s in the rsp_dict", + brickinfo->hostname, brickinfo->path); + goto out; + } + } + + cds_list_add_tail(&new_brickinfo->brick_list, &new_volinfo->bricks); + /* ownership of new_brickinfo is passed to new_volinfo */ + new_brickinfo = NULL; + } + + /* Regenerate all volfiles */ + ret = glusterd_create_volfiles_and_notify_services(new_volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL, + "Failed to regenerate volfiles"); + goto out; + } + + /* Restore geo-rep marker.tstamp's timestamp */ + ret = glusterd_snap_geo_rep_restore(snap_volinfo, new_volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TSTAMP_SET_FAIL, + "Geo-rep: marker.tstamp's timestamp restoration failed"); + goto out; + } + +out: + if (ret && (NULL != new_brickinfo)) { + (void)glusterd_brickinfo_delete(new_brickinfo); + } + + return ret; +} + +int +glusterd_snap_volinfo_find_by_volume_id(uuid_t volume_id, + glusterd_volinfo_t **volinfo) +{ + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_volinfo_t *voliter = NULL; + glusterd_snap_t *snap = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + priv = this->private; + GF_ASSERT(priv); + GF_ASSERT(volinfo); + + if (gf_uuid_is_null(volume_id)) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_UUID_NULL, + "Volume UUID is NULL"); + goto out; + } + + cds_list_for_each_entry(snap, &priv->snapshots, snap_list) + { + cds_list_for_each_entry(voliter, &snap->volumes, vol_list) + { + if (gf_uuid_compare(volume_id, voliter->volume_id)) + continue; + *volinfo = voliter; + ret = 0; + goto out; + } + } + + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_NOT_FOUND, + "Snap volume not found"); +out: + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_snap_volinfo_find(char *snap_volname, glusterd_snap_t *snap, + glusterd_volinfo_t **volinfo) +{ + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + priv = this->private; + GF_ASSERT(priv); + GF_ASSERT(snap); + GF_ASSERT(snap_volname); + + cds_list_for_each_entry(snap_vol, &snap->volumes, vol_list) + { + if (!strcmp(snap_vol->volname, snap_volname)) { + ret = 0; + *volinfo = snap_vol; + goto out; + } + } + + gf_msg(this->name, GF_LOG_WARNING, EINVAL, GD_MSG_SNAP_NOT_FOUND, + "Snap volume %s not found", snap_volname); +out: + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_snap_volinfo_find_from_parent_volname(char *origin_volname, + glusterd_snap_t *snap, + glusterd_volinfo_t **volinfo) +{ + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + priv = this->private; + GF_ASSERT(priv); + GF_ASSERT(snap); + GF_ASSERT(origin_volname); + + cds_list_for_each_entry(snap_vol, &snap->volumes, vol_list) + { + if (!strcmp(snap_vol->parent_volname, origin_volname)) { + ret = 0; + *volinfo = snap_vol; + goto out; + } + } + + gf_msg_debug(this->name, 0, + "Snap volume not found(snap: %s, " + "origin-volume: %s", + snap->snapname, origin_volname); + +out: + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +/* Exports a bricks snapshot details only if required + * + * The details will be exported only if the cluster op-version is greater than + * 4, ie. snapshot is supported in the cluster + */ +int +gd_add_brick_snap_details_to_dict(dict_t *dict, char *prefix, + glusterd_brickinfo_t *brickinfo) +{ + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + char key[256] = { + 0, + }; + + this = THIS; + GF_ASSERT(this != NULL); + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, (conf != NULL), out); + + GF_VALIDATE_OR_GOTO(this->name, (dict != NULL), out); + GF_VALIDATE_OR_GOTO(this->name, (prefix != NULL), out); + GF_VALIDATE_OR_GOTO(this->name, (brickinfo != NULL), out); + + if (conf->op_version < GD_OP_VERSION_3_6_0) { + ret = 0; + goto out; + } + + snprintf(key, sizeof(key), "%s.snap_status", prefix); + ret = dict_set_int32(dict, key, brickinfo->snap_status); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_STATUS_FAIL, + "Failed to set snap_status for %s:%s", brickinfo->hostname, + brickinfo->path); + goto out; + } + + snprintf(key, sizeof(key), "%s.device_path", prefix); + ret = dict_set_str(dict, key, brickinfo->device_path); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set snap_device for %s:%s", brickinfo->hostname, + brickinfo->path); + goto out; + } + + snprintf(key, sizeof(key), "%s.fs_type", prefix); + ret = dict_set_str(dict, key, brickinfo->fstype); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set fstype for %s:%s", brickinfo->hostname, + brickinfo->path); + goto out; + } + + snprintf(key, sizeof(key), "%s.mnt_opts", prefix); + ret = dict_set_str(dict, key, brickinfo->mnt_opts); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRK_MOUNTOPTS_FAIL, + "Failed to set mnt_opts for %s:%s", brickinfo->hostname, + brickinfo->path); + goto out; + } + + snprintf(key, sizeof(key), "%s.mount_dir", prefix); + ret = dict_set_str(dict, key, brickinfo->mount_dir); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to set mount_dir for %s:%s", brickinfo->hostname, + brickinfo->path); + +out: + return ret; +} + +/* Exports a volumes snapshot details only if required. + * + * The snapshot details will only be exported if the cluster op-version is + * greater than 4, ie. snapshot is supported in the cluster + */ +int +gd_add_vol_snap_details_to_dict(dict_t *dict, char *prefix, + glusterd_volinfo_t *volinfo) +{ + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + char key[256] = { + 0, + }; + + this = THIS; + GF_ASSERT(this != NULL); + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, (conf != NULL), out); + + GF_VALIDATE_OR_GOTO(this->name, (dict != NULL), out); + GF_VALIDATE_OR_GOTO(this->name, (volinfo != NULL), out); + GF_VALIDATE_OR_GOTO(this->name, (prefix != NULL), out); + + if (conf->op_version < GD_OP_VERSION_3_6_0) { + ret = 0; + goto out; + } + + snprintf(key, sizeof(key), "%s.restored_from_snap", prefix); + ret = dict_set_dynstr_with_alloc(dict, key, + uuid_utoa(volinfo->restored_from_snap)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to set %s for volume" + "%s", + key, volinfo->volname); + goto out; + } + + if (strlen(volinfo->parent_volname) > 0) { + snprintf(key, sizeof(key), "%s.parent_volname", prefix); + ret = dict_set_dynstr_with_alloc(dict, key, volinfo->parent_volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to set %s " + "for volume %s", + key, volinfo->volname); + goto out; + } + } + + snprintf(key, sizeof(key), "%s.is_snap_volume", prefix); + ret = dict_set_uint32(dict, key, volinfo->is_snap_volume); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to set %s for volume" + "%s", + key, volinfo->volname); + goto out; + } + + snprintf(key, sizeof(key), "%s.snap-max-hard-limit", prefix); + ret = dict_set_uint64(dict, key, volinfo->snap_max_hard_limit); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to set %s for volume" + "%s", + key, volinfo->volname); + } + +out: + return ret; +} + +int32_t +glusterd_add_missed_snaps_to_export_dict(dict_t *peer_data) +{ + char name_buf[PATH_MAX] = ""; + char value[PATH_MAX] = ""; + int32_t missed_snap_count = 0; + int32_t ret = -1; + glusterd_conf_t *priv = NULL; + glusterd_missed_snap_info *missed_snapinfo = NULL; + glusterd_snap_op_t *snap_opinfo = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(peer_data); + + priv = this->private; + GF_ASSERT(priv); + + /* Add the missed_entries in the dict */ + cds_list_for_each_entry(missed_snapinfo, &priv->missed_snaps_list, + missed_snaps) + { + cds_list_for_each_entry(snap_opinfo, &missed_snapinfo->snap_ops, + snap_ops_list) + { + snprintf(name_buf, sizeof(name_buf), "missed_snaps_%d", + missed_snap_count); + snprintf(value, sizeof(value), "%s:%s=%s:%d:%s:%d:%d", + missed_snapinfo->node_uuid, missed_snapinfo->snap_uuid, + snap_opinfo->snap_vol_id, snap_opinfo->brick_num, + snap_opinfo->brick_path, snap_opinfo->op, + snap_opinfo->status); + + ret = dict_set_dynstr_with_alloc(peer_data, name_buf, value); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to set %s", name_buf); + goto out; + } + missed_snap_count++; + } + } + + ret = dict_set_int32(peer_data, "missed_snap_count", missed_snap_count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to set missed_snap_count"); + goto out; + } + +out: + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_add_snap_to_dict(glusterd_snap_t *snap, dict_t *peer_data, + int32_t snap_count) +{ + char buf[64] = ""; + char prefix[32] = ""; + int32_t ret = -1; + int32_t volcount = 0; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + gf_boolean_t host_bricks = _gf_false; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(snap); + GF_ASSERT(peer_data); + + snprintf(prefix, sizeof(prefix), "snap%d", snap_count); + + cds_list_for_each_entry(volinfo, &snap->volumes, vol_list) + { + volcount++; + ret = glusterd_add_volume_to_dict(volinfo, peer_data, volcount, prefix); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to add snap:%s volume:%s " + "to peer_data dict for handshake", + snap->snapname, volinfo->volname); + goto out; + } + + if (glusterd_is_volume_quota_enabled(volinfo)) { + ret = glusterd_vol_add_quota_conf_to_dict(volinfo, peer_data, + volcount, prefix); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to add quota conf for " + "snap:%s volume:%s to peer_data " + "dict for handshake", + snap->snapname, volinfo->volname); + goto out; + } + } + + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + if (!gf_uuid_compare(brickinfo->uuid, MY_UUID)) { + host_bricks = _gf_true; + break; + } + } + } + + snprintf(buf, sizeof(buf), "%s.host_bricks", prefix); + ret = dict_set_int8(peer_data, buf, (int8_t)host_bricks); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to set host_bricks for snap %s", snap->snapname); + goto out; + } + + snprintf(buf, sizeof(buf), "%s.volcount", prefix); + ret = dict_set_int32(peer_data, buf, volcount); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to set volcount for snap %s", snap->snapname); + goto out; + } + + snprintf(buf, sizeof(buf), "%s.snapname", prefix); + ret = dict_set_dynstr_with_alloc(peer_data, buf, snap->snapname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to set snapname for snap %s", snap->snapname); + goto out; + } + + snprintf(buf, sizeof(buf), "%s.snap_id", prefix); + ret = dict_set_dynstr_with_alloc(peer_data, buf, uuid_utoa(snap->snap_id)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to set snap_id for snap %s", snap->snapname); + goto out; + } + + if (snap->description) { + snprintf(buf, sizeof(buf), "%s.description", prefix); + ret = dict_set_dynstr_with_alloc(peer_data, buf, snap->description); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to set description for snap %s", snap->snapname); + goto out; + } + } + + snprintf(buf, sizeof(buf), "%s.time_stamp", prefix); + ret = dict_set_int64(peer_data, buf, (int64_t)snap->time_stamp); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to set time_stamp for snap %s", snap->snapname); + goto out; + } + + snprintf(buf, sizeof(buf), "%s.snap_restored", prefix); + ret = dict_set_int8(peer_data, buf, snap->snap_restored); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to set snap_restored for snap %s", snap->snapname); + goto out; + } + + snprintf(buf, sizeof(buf), "%s.snap_status", prefix); + ret = dict_set_int32(peer_data, buf, snap->snap_status); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to set snap_status for snap %s", snap->snapname); + goto out; + } +out: + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_add_snapshots_to_export_dict(dict_t *peer_data) +{ + int32_t snap_count = 0; + int32_t ret = -1; + glusterd_conf_t *priv = NULL; + glusterd_snap_t *snap = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + GF_ASSERT(peer_data); + + cds_list_for_each_entry(snap, &priv->snapshots, snap_list) + { + snap_count++; + ret = glusterd_add_snap_to_dict(snap, peer_data, snap_count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to add snap(%s) to the " + " peer_data dict for handshake", + snap->snapname); + goto out; + } + } + + ret = dict_set_int32(peer_data, "snap_count", snap_count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set snap_count"); + goto out; + } + +out: + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +/* Imports the snapshot details of a brick if required and available + * + * Snapshot details will be imported only if the cluster op-version is >= 4 + */ +int +gd_import_new_brick_snap_details(dict_t *dict, char *prefix, + glusterd_brickinfo_t *brickinfo) +{ + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + char key[512] = { + 0, + }; + char *snap_device = NULL; + char *fs_type = NULL; + char *mnt_opts = NULL; + char *mount_dir = NULL; + + this = THIS; + GF_ASSERT(this != NULL); + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, (conf != NULL), out); + + GF_VALIDATE_OR_GOTO(this->name, (dict != NULL), out); + GF_VALIDATE_OR_GOTO(this->name, (prefix != NULL), out); + GF_VALIDATE_OR_GOTO(this->name, (brickinfo != NULL), out); + + if (conf->op_version < GD_OP_VERSION_3_6_0) { + ret = 0; + goto out; + } + + snprintf(key, sizeof(key), "%s.snap_status", prefix); + ret = dict_get_int32(dict, key, &brickinfo->snap_status); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "%s missing in payload", key); + goto out; + } + + snprintf(key, sizeof(key), "%s.device_path", prefix); + ret = dict_get_str(dict, key, &snap_device); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "%s missing in payload", key); + goto out; + } + gf_strncpy(brickinfo->device_path, snap_device, + sizeof(brickinfo->device_path)); + snprintf(key, sizeof(key), "%s.fs_type", prefix); + ret = dict_get_str(dict, key, &fs_type); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "%s missing in payload", key); + goto out; + } + gf_strncpy(brickinfo->fstype, fs_type, sizeof(brickinfo->fstype)); + + snprintf(key, sizeof(key), "%s.mnt_opts", prefix); + ret = dict_get_str(dict, key, &mnt_opts); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "%s missing in payload", key); + goto out; + } + gf_strncpy(brickinfo->mnt_opts, mnt_opts, sizeof(brickinfo->mnt_opts)); + + snprintf(key, sizeof(key), "%s.mount_dir", prefix); + ret = dict_get_str(dict, key, &mount_dir); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "%s missing in payload", key); + goto out; + } + gf_strncpy(brickinfo->mount_dir, mount_dir, sizeof(brickinfo->mount_dir)); + +out: + return ret; +} + +/* + * Imports the snapshot details of a volume if required and available + * + * Snapshot details will be imported only if cluster.op_version is greater than + * or equal to GD_OP_VERSION_3_6_0, the op-version from which volume snapshot is + * supported. + */ +int +gd_import_volume_snap_details(dict_t *dict, glusterd_volinfo_t *volinfo, + char *prefix, char *volname) +{ + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + char key[256] = { + 0, + }; + char *restored_snap = NULL; + + this = THIS; + GF_ASSERT(this != NULL); + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, (conf != NULL), out); + + GF_VALIDATE_OR_GOTO(this->name, (dict != NULL), out); + GF_VALIDATE_OR_GOTO(this->name, (volinfo != NULL), out); + GF_VALIDATE_OR_GOTO(this->name, (prefix != NULL), out); + GF_VALIDATE_OR_GOTO(this->name, (volname != NULL), out); + + if (conf->op_version < GD_OP_VERSION_3_6_0) { + ret = 0; + goto out; + } + + snprintf(key, sizeof(key), "%s.is_snap_volume", prefix); + uint32_t is_snap_int; + ret = dict_get_uint32(dict, key, &is_snap_int); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "%s missing in payload " + "for %s", + key, volname); + goto out; + } + volinfo->is_snap_volume = (is_snap_int != 0); + + snprintf(key, sizeof(key), "%s.restored_from_snap", prefix); + ret = dict_get_str(dict, key, &restored_snap); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "%s missing in payload " + "for %s", + key, volname); + goto out; + } + + gf_uuid_parse(restored_snap, volinfo->restored_from_snap); + + snprintf(key, sizeof(key), "%s.snap-max-hard-limit", prefix); + ret = dict_get_uint64(dict, key, &volinfo->snap_max_hard_limit); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "%s missing in payload " + "for %s", + key, volname); +out: + return ret; +} + +int32_t +glusterd_perform_missed_op(glusterd_snap_t *snap, int32_t op) +{ + dict_t *dict = NULL; + int32_t ret = -1; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *snap_volinfo = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_volinfo_t *tmp = NULL; + xlator_t *this = NULL; + uuid_t null_uuid = {0}; + char *parent_volname = NULL; + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + GF_ASSERT(priv); + GF_ASSERT(snap); + + dict = dict_new(); + if (!dict) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, + "Unable to create dict"); + ret = -1; + goto out; + } + + switch (op) { + case GF_SNAP_OPTION_TYPE_DELETE: + ret = glusterd_snap_remove(dict, snap, _gf_true, _gf_false, + _gf_false); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_REMOVE_FAIL, + "Failed to remove snap"); + goto out; + } + + break; + case GF_SNAP_OPTION_TYPE_RESTORE: + cds_list_for_each_entry_safe(snap_volinfo, tmp, &snap->volumes, + vol_list) + { + parent_volname = gf_strdup(snap_volinfo->parent_volname); + if (!parent_volname) + goto out; + + ret = glusterd_volinfo_find(parent_volname, &volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, + "Could not get volinfo of %s", parent_volname); + goto out; + } + + volinfo->version--; + gf_uuid_copy(volinfo->restored_from_snap, null_uuid); + + /* gd_restore_snap_volume() uses the dict and volcount + * to fetch snap brick info from other nodes, which were + * collected during prevalidation. As this is an ad-hoc + * op and only local node's data matter, hence sending + * volcount as 0 and re-using the same dict because we + * need not record any missed creates in the rsp_dict. + */ + ret = gd_restore_snap_volume(dict, dict, volinfo, snap_volinfo, + 0); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_SNAP_RESTORE_FAIL, + "Failed to restore snap for %s", snap->snapname); + volinfo->version++; + goto out; + } + + /* Restore is successful therefore delete the original + * volume's volinfo. If the volinfo is already restored + * then we should delete the backend LVMs */ + if (!gf_uuid_is_null(volinfo->restored_from_snap)) { + ret = glusterd_lvm_snapshot_remove(dict, volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_SNAP_REMOVE_FAIL, + "Failed to remove LVM backend"); + goto out; + } + } + + /* Detach the volinfo from priv->volumes, so that no new + * command can ref it any more and then unref it. + */ + cds_list_del_init(&volinfo->vol_list); + glusterd_volinfo_unref(volinfo); + + ret = glusterd_snapshot_restore_cleanup(dict, parent_volname, + snap); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_SNAP_CLEANUP_FAIL, + "Failed to perform snapshot restore " + "cleanup for %s volume", + parent_volname); + goto out; + } + + GF_FREE(parent_volname); + parent_volname = NULL; + } + + break; + default: + /* The entry must be a create, delete, or + * restore entry + */ + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, + "Invalid missed snap entry"); + ret = -1; + goto out; + } + +out: + dict_unref(dict); + if (parent_volname) { + GF_FREE(parent_volname); + parent_volname = NULL; + } + + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +/* Perform missed deletes and restores on this node */ +int32_t +glusterd_perform_missed_snap_ops() +{ + int32_t ret = -1; + int32_t op_status = -1; + glusterd_conf_t *priv = NULL; + glusterd_missed_snap_info *missed_snapinfo = NULL; + glusterd_snap_op_t *snap_opinfo = NULL; + glusterd_snap_t *snap = NULL; + uuid_t snap_uuid = { + 0, + }; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + GF_ASSERT(priv); + + cds_list_for_each_entry(missed_snapinfo, &priv->missed_snaps_list, + missed_snaps) + { + /* If the pending snap_op is not for this node then continue */ + if (strcmp(missed_snapinfo->node_uuid, uuid_utoa(MY_UUID))) + continue; + + /* Find the snap id */ + gf_uuid_parse(missed_snapinfo->snap_uuid, snap_uuid); + snap = NULL; + snap = glusterd_find_snap_by_id(snap_uuid); + if (!snap) { + /* If the snap is not found, then a delete or a + * restore can't be pending on that snap_uuid. + */ + gf_msg_debug(this->name, 0, "Not a pending delete or restore op"); + continue; + } + + op_status = GD_MISSED_SNAP_PENDING; + cds_list_for_each_entry(snap_opinfo, &missed_snapinfo->snap_ops, + snap_ops_list) + { + /* If the snap_op is create or its status is + * GD_MISSED_SNAP_DONE then continue + */ + if ((snap_opinfo->status == GD_MISSED_SNAP_DONE) || + (snap_opinfo->op == GF_SNAP_OPTION_TYPE_CREATE)) + continue; + + /* Perform the actual op for the first time for + * this snap, and mark the snap_status as + * GD_MISSED_SNAP_DONE. For other entries for the same + * snap, just mark the entry as done. + */ + if (op_status == GD_MISSED_SNAP_PENDING) { + ret = glusterd_perform_missed_op(snap, snap_opinfo->op); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_SNAPSHOT_OP_FAILED, + "Failed to perform missed snap op"); + goto out; + } + op_status = GD_MISSED_SNAP_DONE; + } + + snap_opinfo->status = GD_MISSED_SNAP_DONE; + } + } + + ret = 0; +out: + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +/* Import friend volumes missed_snap_list and update * + * missed_snap_list if need be */ +int32_t +glusterd_import_friend_missed_snap_list(dict_t *peer_data) +{ + int32_t missed_snap_count = -1; + int32_t ret = -1; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(peer_data); + + priv = this->private; + GF_ASSERT(priv); + + /* Add the friends missed_snaps entries to the in-memory list */ + ret = dict_get_int32(peer_data, "missed_snap_count", &missed_snap_count); + if (ret) { + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_MISSED_SNAP_GET_FAIL, + "No missed snaps"); + ret = 0; + goto out; + } + + ret = glusterd_add_missed_snaps_to_list(peer_data, missed_snap_count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MISSED_SNAP_LIST_STORE_FAIL, + "Failed to add missed snaps to list"); + goto out; + } + + ret = glusterd_perform_missed_snap_ops(); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPSHOT_OP_FAILED, + "Failed to perform snap operations"); + /* Not going to out at this point coz some * + * missed ops might have been performed. We * + * need to persist the current list * + */ + } + + ret = glusterd_store_update_missed_snaps(); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MISSED_SNAP_LIST_STORE_FAIL, + "Failed to update missed_snaps_list"); + goto out; + } + +out: + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +/* + * This function will set boolean "conflict" to true if peer snap + * has a version greater than snap version of local node. Otherwise + * boolean "conflict" will be set to false. + */ +int +glusterd_check_peer_has_higher_snap_version(dict_t *peer_data, + char *peer_snap_name, int volcount, + gf_boolean_t *conflict, + char *prefix, glusterd_snap_t *snap, + char *hostname) +{ + glusterd_volinfo_t *snap_volinfo = NULL; + char key[256] = {0}; + int version = 0, i = 0; + int ret = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(snap); + GF_ASSERT(peer_data); + + for (i = 1; i <= volcount; i++) { + snprintf(key, sizeof(key), "%s%d.version", prefix, i); + ret = dict_get_int32(peer_data, key, &version); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "failed to get " + "version of snap volume = %s", + peer_snap_name); + return -1; + } + + /* TODO : As of now there is only one volume in snapshot. + * Change this when multiple volume snapshot is introduced + */ + snap_volinfo = cds_list_entry(snap->volumes.next, glusterd_volinfo_t, + vol_list); + if (!snap_volinfo) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, + "Failed to get snap " + "volinfo %s", + snap->snapname); + return -1; + } + + if (version > snap_volinfo->version) { + /* Mismatch detected */ + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_VOL_VERS_MISMATCH, + "Version of volume %s differ. " + "local version = %d, remote version = %d " + "on peer %s", + snap_volinfo->volname, snap_volinfo->version, version, + hostname); + *conflict = _gf_true; + break; + } else { + *conflict = _gf_false; + } + } + return 0; +} + +/* Check for the peer_snap_name in the list of existing snapshots. + * If a snap exists with the same name and a different snap_id, then + * there is a conflict. Set conflict as _gf_true, and snap to the + * conflicting snap object. If a snap exists with the same name, and the + * same snap_id, then there is no conflict. Set conflict as _gf_false + * and snap to the existing snap object. If no snap exists with the + * peer_snap_name, then there is no conflict. Set conflict as _gf_false + * and snap to NULL. + */ +void +glusterd_is_peer_snap_conflicting(char *peer_snap_name, char *peer_snap_id, + gf_boolean_t *conflict, + glusterd_snap_t **snap, char *hostname) +{ + uuid_t peer_snap_uuid = { + 0, + }; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(peer_snap_name); + GF_ASSERT(peer_snap_id); + GF_ASSERT(conflict); + GF_ASSERT(snap); + GF_ASSERT(hostname); + + *snap = glusterd_find_snap_by_name(peer_snap_name); + if (*snap) { + gf_uuid_parse(peer_snap_id, peer_snap_uuid); + if (!gf_uuid_compare(peer_snap_uuid, (*snap)->snap_id)) { + /* Current node contains the same snap having + * the same snapname and snap_id + */ + gf_msg_debug(this->name, 0, + "Snapshot %s from peer %s present in " + "localhost", + peer_snap_name, hostname); + *conflict = _gf_false; + } else { + /* Current node contains the same snap having + * the same snapname but different snap_id + */ + gf_msg_debug(this->name, 0, + "Snapshot %s from peer %s conflicts with " + "snapshot in localhost", + peer_snap_name, hostname); + *conflict = _gf_true; + } + } else { + /* Peer contains snapshots missing on the current node */ + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_MISSED_SNAP_PRESENT, + "Snapshot %s from peer %s missing on localhost", peer_snap_name, + hostname); + *conflict = _gf_false; + } +} + +/* Check if the local node is hosting any bricks for the given snapshot */ +gf_boolean_t +glusterd_are_snap_bricks_local(glusterd_snap_t *snap) +{ + gf_boolean_t is_local = _gf_false; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(snap); + + cds_list_for_each_entry(volinfo, &snap->volumes, vol_list) + { + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + if (!gf_uuid_compare(brickinfo->uuid, MY_UUID)) { + is_local = _gf_true; + goto out; + } + } + } + +out: + gf_msg_trace(this->name, 0, "Returning %d", is_local); + return is_local; +} + +/* Check if the peer has missed any snap delete + * or restore for the given snap_id + */ +gf_boolean_t +glusterd_peer_has_missed_snap_delete(uuid_t peerid, char *peer_snap_id) +{ + char *peer_uuid = NULL; + gf_boolean_t missed_delete = _gf_false; + glusterd_conf_t *priv = NULL; + glusterd_missed_snap_info *missed_snapinfo = NULL; + glusterd_snap_op_t *snap_opinfo = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + GF_ASSERT(peer_snap_id); + + peer_uuid = uuid_utoa(peerid); + + cds_list_for_each_entry(missed_snapinfo, &priv->missed_snaps_list, + missed_snaps) + { + /* Look for missed snap for the same peer, and + * the same snap_id + */ + if ((!strcmp(peer_uuid, missed_snapinfo->node_uuid)) && + (!strcmp(peer_snap_id, missed_snapinfo->snap_uuid))) { + /* Check if the missed snap's op is delete and the + * status is pending + */ + cds_list_for_each_entry(snap_opinfo, &missed_snapinfo->snap_ops, + snap_ops_list) + { + if (((snap_opinfo->op == GF_SNAP_OPTION_TYPE_DELETE) || + (snap_opinfo->op == GF_SNAP_OPTION_TYPE_RESTORE)) && + (snap_opinfo->status == GD_MISSED_SNAP_PENDING)) { + missed_delete = _gf_true; + goto out; + } + } + } + } + +out: + gf_msg_trace(this->name, 0, "Returning %d", missed_delete); + return missed_delete; +} + +/* Generate and store snap volfiles for imported snap object */ +int32_t +glusterd_gen_snap_volfiles(glusterd_volinfo_t *snap_vol, char *peer_snap_name) +{ + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_volinfo_t *parent_volinfo = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(snap_vol); + GF_ASSERT(peer_snap_name); + + ret = glusterd_store_volinfo(snap_vol, GLUSTERD_VOLINFO_VER_AC_NONE); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_SET_FAIL, + "Failed to store snapshot " + "volinfo (%s) for snap %s", + snap_vol->volname, peer_snap_name); + goto out; + } + + ret = generate_brick_volfiles(snap_vol); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL, + "generating the brick volfiles for the " + "snap %s failed", + peer_snap_name); + goto out; + } + + ret = generate_client_volfiles(snap_vol, GF_CLIENT_TRUSTED); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL, + "generating the trusted client volfiles for " + "the snap %s failed", + peer_snap_name); + goto out; + } + + ret = generate_client_volfiles(snap_vol, GF_CLIENT_OTHER); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL, + "generating the client volfiles for the " + "snap %s failed", + peer_snap_name); + goto out; + } + + ret = glusterd_volinfo_find(snap_vol->parent_volname, &parent_volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, + "Parent volinfo " + "not found for %s volume of snap %s", + snap_vol->volname, peer_snap_name); + goto out; + } + + glusterd_list_add_snapvol(parent_volinfo, snap_vol); + + ret = glusterd_store_volinfo(snap_vol, GLUSTERD_VOLINFO_VER_AC_NONE); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_SET_FAIL, + "Failed to store snap volinfo"); + goto out; + } +out: + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +/* Import snapshot info from peer_data and add it to priv */ +int32_t +glusterd_import_friend_snap(dict_t *peer_data, int32_t snap_count, + char *peer_snap_name, char *peer_snap_id) +{ + char buf[64] = ""; + char prefix[32] = ""; + char *description = NULL; + dict_t *dict = NULL; + glusterd_snap_t *snap = NULL; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_conf_t *priv = NULL; + int32_t ret = -1; + int32_t volcount = -1; + int32_t i = -1; + xlator_t *this = NULL; + int64_t time_stamp; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + GF_ASSERT(peer_data); + GF_ASSERT(peer_snap_name); + GF_ASSERT(peer_snap_id); + + snprintf(prefix, sizeof(prefix), "snap%d", snap_count); + + snap = glusterd_new_snap_object(); + if (!snap) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_CREATION_FAIL, + "Could not create " + "the snap object for snap %s", + peer_snap_name); + goto out; + } + + dict = dict_new(); + if (!dict) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, + "Failed to create dict"); + ret = -1; + goto out; + } + + gf_strncpy(snap->snapname, peer_snap_name, sizeof(snap->snapname)); + gf_uuid_parse(peer_snap_id, snap->snap_id); + + snprintf(buf, sizeof(buf), "%s.description", prefix); + ret = dict_get_str(peer_data, buf, &description); + if (ret == 0 && description) { + snap->description = gf_strdup(description); + if (snap->description == NULL) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_CREATION_FAIL, + "Saving the Snapshot Description Failed"); + ret = -1; + goto out; + } + } + + snprintf(buf, sizeof(buf), "%s.time_stamp", prefix); + ret = dict_get_int64(peer_data, buf, &time_stamp); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get time_stamp for snap %s", peer_snap_name); + goto out; + } + snap->time_stamp = (time_t)time_stamp; + + snprintf(buf, sizeof(buf), "%s.snap_restored", prefix); + ret = dict_get_int8(peer_data, buf, (int8_t *)&snap->snap_restored); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get snap_restored for snap %s", peer_snap_name); + goto out; + } + + snprintf(buf, sizeof(buf), "%s.snap_status", prefix); + ret = dict_get_int32(peer_data, buf, (int32_t *)&snap->snap_status); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get snap_status for snap %s", peer_snap_name); + goto out; + } + + /* If the snap is scheduled to be decommissioned, then + * don't accept the snap */ + if (snap->snap_status == GD_SNAP_STATUS_DECOMMISSION) { + gf_msg_debug(this->name, 0, + "The snap(%s) is scheduled to be decommissioned " + "Not accepting the snap.", + peer_snap_name); + glusterd_snap_remove(dict, snap, _gf_true, _gf_true, _gf_false); + ret = 0; + goto out; + } + + snprintf(buf, sizeof(buf), "%s.volcount", prefix); + ret = dict_get_int32(peer_data, buf, &volcount); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get volcount for snap %s", peer_snap_name); + goto out; + } + + ret = glusterd_store_create_snap_dir(snap); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SNAPDIR_CREATE_FAIL, + "Failed to create snap dir"); + goto out; + } + + glusterd_list_add_order(&snap->snap_list, &priv->snapshots, + glusterd_compare_snap_time); + + for (i = 1; i <= volcount; i++) { + ret = glusterd_import_volinfo(peer_data, i, &snap_vol, prefix); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_SET_FAIL, + "Failed to import snap volinfo for " + "snap %s", + peer_snap_name); + goto out; + } + + snap_vol->snapshot = snap; + + ret = glusterd_gen_snap_volfiles(snap_vol, peer_snap_name); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL, + "Failed to generate snap vol files " + "for snap %s", + peer_snap_name); + goto out; + } + /* During handshake, after getting updates from friend mount + * point for activated snapshot should exist and should not + * for deactivated snapshot. + */ + if (glusterd_is_volume_started(snap_vol)) { + ret = glusterd_recreate_vol_brick_mounts(this, snap_vol); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_BRK_MNT_RECREATE_FAIL, + "Failed to recreate brick mounts" + " for %s", + snap->snapname); + goto out; + } + + (void)glusterd_start_bricks(snap_vol); + ret = glusterd_store_volinfo(snap_vol, + GLUSTERD_VOLINFO_VER_AC_NONE); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_STORE_FAIL, + "Failed to " + "write volinfo for volume %s", + snap_vol->volname); + goto out; + } + } else { + (void)glusterd_stop_bricks(snap_vol); + ret = glusterd_snap_unmount(this, snap_vol); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLUSTERD_UMOUNT_FAIL, + "Failed to unmounts for %s", snap->snapname); + } + } + + ret = glusterd_import_quota_conf(peer_data, i, snap_vol, prefix); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_QUOTA_CONFIG_IMPORT_FAIL, + "Failed to import quota conf " + "for snap %s", + peer_snap_name); + goto out; + } + + snap_vol = NULL; + } + + ret = glusterd_store_snap(snap); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_CREATION_FAIL, + "Could not store snap" + "object %s", + peer_snap_name); + goto out; + } + glusterd_fetchsnap_notify(this); + +out: + if (ret) + glusterd_snap_remove(dict, snap, _gf_true, _gf_true, _gf_false); + + if (dict) + dict_unref(dict); + + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +/* During a peer-handshake, after the volumes have synced, and the list of + * missed snapshots have synced, the node will perform the pending deletes + * and restores on this list. At this point, the current snapshot list in + * the node will be updated, and hence in case of conflicts arising during + * snapshot handshake, the peer hosting the bricks will be given precedence + * Likewise, if there will be a conflict, and both peers will be in the same + * state, i.e either both would be hosting bricks or both would not be hosting + * bricks, then a decision can't be taken and a peer-reject will happen. + * + * glusterd_compare_snap() & glusterd_update_snaps () implement the following + * algorithm to perform the above task. Please note the former function tries to + * iterate over the snaps one at a time and updating the relevant fields in the + * dictionary and then glusterd_update_snaps () go over all the snaps and update + * them at one go as part of a synctask. + * Step 1: Start. + * Step 2: Check if the peer is missing a delete or restore on the said snap. + * If yes, goto step 6. + * Step 3: Check if there is a conflict between the peer's data and the + * local snap. If no, goto step 5. + * Step 4: As there is a conflict, check if both the peer and the local nodes + * are hosting bricks. Based on the results perform the following: + * Peer Hosts Bricks Local Node Hosts Bricks Action + * Yes Yes Goto Step 8 + * No No Goto Step 8 + * Yes No Goto Step 9 + * No Yes Goto Step 7 + * Step 5: Check if the local node is missing the peer's data. + * If yes, goto step 10. + * Step 6: Check if the snap volume version is lesser than peer_data + * if yes goto step 9 + * Step 7: It's a no-op. Goto step 11 + * Step 8: Peer Reject. Goto step 11 + * Step 9: Delete local node's data. + * Step 10: Accept Peer Data. + * Step 11: Stop + * + */ +int32_t +glusterd_compare_snap(dict_t *peer_data, int32_t snap_count, char *peername, + uuid_t peerid) +{ + char buf[64] = ""; + char prefix[32] = ""; + char *peer_snap_name = NULL; + char *peer_snap_id = NULL; + glusterd_snap_t *snap = NULL; + gf_boolean_t conflict = _gf_false; + gf_boolean_t is_local = _gf_false; + gf_boolean_t is_hosted = _gf_false; + gf_boolean_t missed_delete = _gf_false; + int32_t ret = -1; + int32_t volcount = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(peer_data); + GF_ASSERT(peername); + + snprintf(prefix, sizeof(prefix), "snap%d", snap_count); + + ret = dict_set_uint32(peer_data, buf, 0); + snprintf(buf, sizeof(buf), "%s.accept_peer_data", prefix); + ret = dict_set_uint32(peer_data, buf, 0); + snprintf(buf, sizeof(buf), "%s.remove_lvm", prefix); + ret = dict_set_uint32(peer_data, buf, 0); + snprintf(buf, sizeof(buf), "%s.remove_my_data", prefix); + ret = dict_set_uint32(peer_data, buf, 0); + + /* Fetch the peer's snapname */ + snprintf(buf, sizeof(buf), "%s.snapname", prefix); + ret = dict_get_str(peer_data, buf, &peer_snap_name); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to fetch snapname from peer: %s", peername); + goto out; + } + + /* Fetch the peer's snap_id */ + snprintf(buf, sizeof(buf), "%s.snap_id", prefix); + ret = dict_get_str(peer_data, buf, &peer_snap_id); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to fetch snap_id from peer: %s", peername); + goto out; + } + + snprintf(buf, sizeof(buf), "%s.volcount", prefix); + ret = dict_get_int32(peer_data, buf, &volcount); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get volcount for snap %s", peer_snap_name); + goto out; + } + + /* Check if the peer has missed a snap delete or restore + * resulting in stale data for the snap in question + */ + missed_delete = glusterd_peer_has_missed_snap_delete(peerid, peer_snap_id); + if (missed_delete == _gf_true) { + /* Peer has missed delete on the missing/conflicting snap_id */ + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_MISSED_SNAP_DELETE, + "Peer %s has missed a delete " + "on snap %s", + peername, peer_snap_name); + ret = 0; + goto out; + } + + /* Check if there is a conflict, and if the + * peer data is already present + */ + glusterd_is_peer_snap_conflicting(peer_snap_name, peer_snap_id, &conflict, + &snap, peername); + if (conflict == _gf_false) { + if (!snap) { + /* Peer has snap with the same snapname + * and snap_id, which local node doesn't have. + */ + snprintf(buf, sizeof(buf), "%s.accept_peer_data", prefix); + ret = dict_set_uint32(peer_data, buf, 1); + goto out; + } + /* Peer has snap with the same snapname + * and snap_id. Now check if peer has a + * snap with higher snap version than local + * node has. + */ + ret = glusterd_check_peer_has_higher_snap_version( + peer_data, peer_snap_name, volcount, &conflict, prefix, snap, + peername); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOL_VERS_MISMATCH, + "Failed " + "to check version of snap volume"); + goto out; + } + if (conflict == _gf_true) { + /* + * Snap version of peer is higher than snap + * version of local node. + * + * Remove data in local node and accept peer data. + * We just need to heal snap info of local node, So + * When removing data from local node, make sure + * we are not removing backend lvm of the snap. + */ + snprintf(buf, sizeof(buf), "%s.remove_lvm", prefix); + ret = dict_set_uint32(peer_data, buf, 0); + snprintf(buf, sizeof(buf), "%s.remove_my_data", prefix); + ret = dict_set_uint32(peer_data, buf, 1); + snprintf(buf, sizeof(buf), "%s.accept_peer_data", prefix); + ret = dict_set_uint32(peer_data, buf, 1); + + } else { + ret = 0; + } + goto out; + } + + /* There is a conflict. Check if the current node is + * hosting bricks for the conflicted snap. + */ + is_local = glusterd_are_snap_bricks_local(snap); + + /* Check if the peer is hosting any bricks for the + * conflicting snap + */ + snprintf(buf, sizeof(buf), "%s.host_bricks", prefix); + ret = dict_get_int8(peer_data, buf, (int8_t *)&is_hosted); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to fetch host_bricks from peer: %s " + "for %s", + peername, peer_snap_name); + goto out; + } + + /* As there is a conflict at this point of time, the data of the + * node that hosts a brick takes precedence. If both the local + * node and the peer are in the same state, i.e if both of them + * are either hosting or not hosting the bricks, for the snap, + * then it's a peer reject + */ + if (is_hosted == is_local) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_CONFLICT, + "Conflict in snapshot %s with peer %s", peer_snap_name, + peername); + ret = -1; + goto out; + } + + if (is_hosted == _gf_false) { + /* If there was a conflict, and the peer is not hosting + * any brick, then don't accept peer data + */ + gf_msg_debug(this->name, 0, + "Peer doesn't hosts bricks for conflicting " + "snap(%s). Not accepting peer data.", + peer_snap_name); + ret = 0; + goto out; + } + + /* The peer is hosting a brick in case of conflict + * And local node isn't. Hence remove local node's + * data and accept peer data + */ + gf_msg_debug(this->name, 0, + "Peer hosts bricks for conflicting " + "snap(%s). Removing local data. Accepting peer data.", + peer_snap_name); + snprintf(buf, sizeof(buf), "%s.remove_lvm", prefix); + ret = dict_set_uint32(peer_data, buf, 1); + snprintf(buf, sizeof(buf), "%s.remove_my_data", prefix); + ret = dict_set_uint32(peer_data, buf, 1); + snprintf(buf, sizeof(buf), "%s.accept_peer_data", prefix); + ret = dict_set_uint32(peer_data, buf, 1); + +out: + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_update_snaps_synctask(void *opaque) +{ + int32_t ret = -1; + int32_t snap_count = 0; + int i = 1; + xlator_t *this = NULL; + dict_t *peer_data = NULL; + char buf[64] = ""; + char prefix[32] = ""; + char *peer_snap_name = NULL; + char *peer_snap_id = NULL; + char *peername = NULL; + gf_boolean_t remove_lvm = _gf_false; + gf_boolean_t remove_my_data = _gf_false; + gf_boolean_t accept_peer_data = _gf_false; + int32_t val = 0; + glusterd_snap_t *snap = NULL; + dict_t *dict = NULL; + glusterd_conf_t *conf = NULL; + + this = THIS; + GF_ASSERT(this); + + conf = this->private; + GF_ASSERT(conf); + + peer_data = (dict_t *)opaque; + GF_ASSERT(peer_data); + + synclock_lock(&conf->big_lock); + + while (conf->restart_bricks) { + synccond_wait(&conf->cond_restart_bricks, &conf->big_lock); + } + conf->restart_bricks = _gf_true; + + ret = dict_get_int32(peer_data, "snap_count", &snap_count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to fetch snap_count"); + goto out; + } + ret = dict_get_str(peer_data, "peername", &peername); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to fetch peername"); + goto out; + } + + for (i = 1; i <= snap_count; i++) { + snprintf(prefix, sizeof(prefix), "snap%d", i); + + /* Fetch the peer's snapname */ + snprintf(buf, sizeof(buf), "%s.snapname", prefix); + ret = dict_get_str(peer_data, buf, &peer_snap_name); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to fetch snapname from peer: %s", peername); + goto out; + } + + /* Fetch the peer's snap_id */ + snprintf(buf, sizeof(buf), "%s.snap_id", prefix); + ret = dict_get_str(peer_data, buf, &peer_snap_id); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to fetch snap_id from peer: %s", peername); + goto out; + } + + /* remove_my_data */ + snprintf(buf, sizeof(buf), "%s.remove_my_data", prefix); + ret = dict_get_int32(peer_data, buf, &val); + if (val) + remove_my_data = _gf_true; + else + remove_my_data = _gf_false; + + if (remove_my_data) { + snprintf(buf, sizeof(buf), "%s.remove_lvm", prefix); + ret = dict_get_int32(peer_data, buf, &val); + if (val) + remove_lvm = _gf_true; + else + remove_lvm = _gf_false; + + dict = dict_new(); + if (!dict) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, + "Unable to create dict"); + ret = -1; + goto out; + } + snap = glusterd_find_snap_by_name(peer_snap_name); + if (!snap) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MISSED_SNAP_PRESENT, + "Snapshot %s from peer %s missing on " + "localhost", + peer_snap_name, peername); + ret = -1; + goto out; + } + + ret = glusterd_snap_remove(dict, snap, remove_lvm, _gf_false, + _gf_false); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_REMOVE_FAIL, + "Failed to remove snap %s", snap->snapname); + goto out; + } + + dict_unref(dict); + dict = NULL; + } + snprintf(buf, sizeof(buf), "%s.accept_peer_data", prefix); + ret = dict_get_int32(peer_data, buf, &val); + if (val) + accept_peer_data = _gf_true; + else + accept_peer_data = _gf_false; + + if (accept_peer_data) { + /* Accept Peer Data */ + ret = glusterd_import_friend_snap(peer_data, i, peer_snap_name, + peer_snap_id); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_IMPORT_FAIL, + "Failed to import snap %s from peer %s", peer_snap_name, + peername); + goto out; + } + } + } + +out: + if (peer_data) + dict_unref(peer_data); + if (dict) + dict_unref(dict); + conf->restart_bricks = _gf_false; + synccond_broadcast(&conf->cond_restart_bricks); + + return ret; +} + +/* Compare snapshots present in peer_data, with the snapshots in + * the current node + */ +int32_t +glusterd_compare_friend_snapshots(dict_t *peer_data, char *peername, + uuid_t peerid) +{ + int32_t ret = -1; + int32_t snap_count = 0; + int i = 1; + xlator_t *this = NULL; + dict_t *peer_data_copy = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(peer_data); + GF_ASSERT(peername); + + ret = dict_get_int32(peer_data, "snap_count", &snap_count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to fetch snap_count"); + goto out; + } + + if (!snap_count) + goto out; + + for (i = 1; i <= snap_count; i++) { + /* Compare one snapshot from peer_data at a time */ + ret = glusterd_compare_snap(peer_data, i, peername, peerid); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPSHOT_OP_FAILED, + "Failed to compare snapshots with peer %s", peername); + goto out; + } + } + /* Update the snaps at one go */ + peer_data_copy = dict_copy_with_ref(peer_data, NULL); + ret = dict_set_str(peer_data_copy, "peername", peername); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set peername into the dict"); + if (peer_data_copy) + dict_unref(peer_data_copy); + goto out; + } + glusterd_launch_synctask(glusterd_update_snaps_synctask, peer_data_copy); + +out: + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_add_snapd_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict, + int32_t count) +{ + int ret = -1; + int32_t pid = -1; + int32_t brick_online = -1; + char key[64] = {0}; + char base_key[32] = {0}; + char pidfile[PATH_MAX] = {0}; + xlator_t *this = NULL; + + GF_ASSERT(volinfo); + GF_ASSERT(dict); + + this = THIS; + GF_ASSERT(this); + + snprintf(base_key, sizeof(base_key), "brick%d", count); + snprintf(key, sizeof(key), "%s.hostname", base_key); + ret = dict_set_str(dict, key, "Snapshot Daemon"); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s", + key, NULL); + goto out; + } + + snprintf(key, sizeof(key), "%s.path", base_key); + ret = dict_set_dynstr(dict, key, gf_strdup(uuid_utoa(MY_UUID))); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s", + key, NULL); + goto out; + } + + snprintf(key, sizeof(key), "%s.port", base_key); + ret = dict_set_int32(dict, key, volinfo->snapd.port); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s", + key, NULL); + goto out; + } + + glusterd_svc_build_snapd_pidfile(volinfo, pidfile, sizeof(pidfile)); + + brick_online = gf_is_service_running(pidfile, &pid); + if (brick_online == _gf_false) + pid = -1; + + snprintf(key, sizeof(key), "%s.pid", base_key); + ret = dict_set_int32(dict, key, pid); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s", + key, NULL); + goto out; + } + + snprintf(key, sizeof(key), "%s.status", base_key); + ret = dict_set_int32(dict, key, brick_online); + +out: + if (ret) + gf_msg_debug(this->name, 0, "Returning %d", ret); + + return ret; +} + +int +glusterd_snap_config_use_rsp_dict(dict_t *dst, dict_t *src) +{ + char buf[PATH_MAX] = ""; + char *volname = NULL; + int ret = -1; + int config_command = 0; + uint64_t i = 0; + uint64_t hard_limit = GLUSTERD_SNAPS_MAX_HARD_LIMIT; + uint64_t soft_limit = GLUSTERD_SNAPS_DEF_SOFT_LIMIT_PERCENT; + uint64_t value = 0; + uint64_t voldisplaycount = 0; + + if (!dst || !src) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_EMPTY, + "Source or Destination " + "dict is empty."); + goto out; + } + + ret = dict_get_int32(dst, "config-command", &config_command); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "failed to get config-command type"); + goto out; + } + + switch (config_command) { + case GF_SNAP_CONFIG_DISPLAY: + ret = dict_get_uint64(src, GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT, + &hard_limit); + if (!ret) { + ret = dict_set_uint64( + dst, GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT, hard_limit); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to set snap_max_hard_limit"); + goto out; + } + } else { + /* Received dummy response from other nodes */ + ret = 0; + goto out; + } + + ret = dict_get_uint64(src, GLUSTERD_STORE_KEY_SNAP_MAX_SOFT_LIMIT, + &soft_limit); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get snap_max_soft_limit"); + goto out; + } + + ret = dict_set_uint64(dst, GLUSTERD_STORE_KEY_SNAP_MAX_SOFT_LIMIT, + soft_limit); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to set snap_max_soft_limit"); + goto out; + } + + ret = dict_get_uint64(src, "voldisplaycount", &voldisplaycount); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get voldisplaycount"); + goto out; + } + + ret = dict_set_uint64(dst, "voldisplaycount", voldisplaycount); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to set voldisplaycount"); + goto out; + } + + for (i = 0; i < voldisplaycount; i++) { + snprintf(buf, sizeof(buf), "volume%" PRIu64 "-volname", i); + ret = dict_get_str(src, buf, &volname); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get %s", buf); + goto out; + } + ret = dict_set_str(dst, buf, volname); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to set %s", buf); + goto out; + } + + snprintf(buf, sizeof(buf), + "volume%" PRIu64 "-snap-max-hard-limit", i); + ret = dict_get_uint64(src, buf, &value); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get %s", buf); + goto out; + } + ret = dict_set_uint64(dst, buf, value); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to set %s", buf); + goto out; + } + + snprintf(buf, sizeof(buf), + "volume%" PRIu64 "-active-hard-limit", i); + ret = dict_get_uint64(src, buf, &value); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get %s", buf); + goto out; + } + ret = dict_set_uint64(dst, buf, value); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to set %s", buf); + goto out; + } + + snprintf(buf, sizeof(buf), + "volume%" PRIu64 "-snap-max-soft-limit", i); + ret = dict_get_uint64(src, buf, &value); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get %s", buf); + goto out; + } + ret = dict_set_uint64(dst, buf, value); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to set %s", buf); + goto out; + } + } + + break; + default: + break; + } + + ret = 0; +out: + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; +} + +int +glusterd_merge_brick_status(dict_t *dst, dict_t *src) +{ + int64_t volume_count = 0; + int64_t index = 0; + int64_t j = 0; + int64_t brick_count = 0; + int64_t brick_order = 0; + char key[64] = { + 0, + }; + char key_prefix[16] = { + 0, + }; + char snapbrckcnt[PATH_MAX] = { + 0, + }; + char snapbrckord[PATH_MAX] = { + 0, + }; + char *clonename = NULL; + int ret = -1; + int32_t brick_online = 0; + xlator_t *this = NULL; + int32_t snap_command = 0; + + this = THIS; + GF_ASSERT(this); + + if (!dst || !src) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_EMPTY, + "Source or Destination " + "dict is empty."); + goto out; + } + + ret = dict_get_int32(dst, "type", &snap_command); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "unable to get the type of " + "the snapshot command"); + goto out; + } + + if (snap_command == GF_SNAP_OPTION_TYPE_DELETE) { + gf_msg_debug(this->name, 0, + "snapshot delete command." + " Need not merge the status of the bricks"); + ret = 0; + goto out; + } + + /* Try and fetch clonename. If present set status with clonename * + * else do so as snap-vol */ + ret = dict_get_str(dst, "clonename", &clonename); + if (ret) { + snprintf(key_prefix, sizeof(key_prefix), "snap-vol"); + } else + snprintf(key_prefix, sizeof(key_prefix), "clone"); + + ret = dict_get_int64(src, "volcount", &volume_count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "failed to " + "get the volume count"); + goto out; + } + + for (index = 0; index < volume_count; index++) { + ret = snprintf(snapbrckcnt, sizeof(snapbrckcnt) - 1, + "snap-vol%" PRId64 "_brickcount", index + 1); + ret = dict_get_int64(src, snapbrckcnt, &brick_count); + if (ret) { + gf_msg_trace(this->name, 0, + "No bricks for this volume in this dict (%s)", + snapbrckcnt); + continue; + } + + for (j = 0; j < brick_count; j++) { + /* Fetching data from source dict */ + snprintf(snapbrckord, sizeof(snapbrckord) - 1, + "snap-vol%" PRId64 ".brick%" PRId64 ".order", index + 1, + j); + + ret = dict_get_int64(src, snapbrckord, &brick_order); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get brick order (%s)", snapbrckord); + goto out; + } + + snprintf(key, sizeof(key), "%s%" PRId64 ".brick%" PRId64 ".status", + key_prefix, index + 1, brick_order); + ret = dict_get_int32(src, key, &brick_online); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "failed to " + "get the brick status (%s)", + key); + goto out; + } + + ret = dict_set_int32(dst, key, brick_online); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "failed to " + "set the brick status (%s)", + key); + goto out; + } + brick_online = 0; + } + } + + ret = 0; + +out: + return ret; +} + +/* Aggregate missed_snap_counts from different nodes and save it * + * in the req_dict of the originator node */ +int +glusterd_snap_create_use_rsp_dict(dict_t *dst, dict_t *src) +{ + char *buf = NULL; + char *tmp_str = NULL; + char name_buf[PATH_MAX] = ""; + int32_t i = -1; + int32_t ret = -1; + int32_t src_missed_snap_count = -1; + int32_t dst_missed_snap_count = -1; + xlator_t *this = NULL; + int8_t soft_limit_flag = -1; + + this = THIS; + GF_ASSERT(this); + + if (!dst || !src) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_EMPTY, + "Source or Destination " + "dict is empty."); + goto out; + } + + ret = glusterd_merge_brick_status(dst, src); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_SET_INFO_FAIL, + "failed to merge brick " + "status"); + goto out; + } + + ret = dict_get_str(src, "snapuuid", &buf); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "failed to get snap UUID"); + goto out; + } + + ret = dict_set_dynstr_with_alloc(dst, "snapuuid", buf); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set snap uuid in dict"); + goto out; + } + + /* set in dst dictionary soft-limit-reach only if soft-limit-reach + * is present src dictionary */ + ret = dict_get_int8(src, "soft-limit-reach", &soft_limit_flag); + if (!ret) { + ret = dict_set_int8(dst, "soft-limit-reach", soft_limit_flag); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set " + "soft_limit_flag"); + goto out; + } + } + + ret = dict_get_int32(src, "missed_snap_count", &src_missed_snap_count); + if (ret) { + gf_msg_debug(this->name, 0, "No missed snaps"); + ret = 0; + goto out; + } + + ret = dict_get_int32(dst, "missed_snap_count", &dst_missed_snap_count); + if (ret) { + /* Initialize dst_missed_count for the first time */ + dst_missed_snap_count = 0; + } + + for (i = 0; i < src_missed_snap_count; i++) { + snprintf(name_buf, sizeof(name_buf), "missed_snaps_%d", i); + ret = dict_get_str(src, name_buf, &buf); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to fetch %s", name_buf); + goto out; + } + + snprintf(name_buf, sizeof(name_buf), "missed_snaps_%d", + dst_missed_snap_count); + + tmp_str = gf_strdup(buf); + if (!tmp_str) { + ret = -1; + goto out; + } + + ret = dict_set_dynstr(dst, name_buf, tmp_str); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to set %s", name_buf); + goto out; + } + + tmp_str = NULL; + dst_missed_snap_count++; + } + + ret = dict_set_int32(dst, "missed_snap_count", dst_missed_snap_count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to set dst_missed_snap_count"); + goto out; + } + +out: + if (ret && tmp_str) + GF_FREE(tmp_str); + + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +int +glusterd_snap_use_rsp_dict(dict_t *dst, dict_t *src) +{ + int ret = -1; + int32_t snap_command = 0; + + if (!dst || !src) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_EMPTY, + "Source or Destination " + "dict is empty."); + goto out; + } + + ret = dict_get_int32(dst, "type", &snap_command); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "unable to get the type of " + "the snapshot command"); + goto out; + } + + switch (snap_command) { + case GF_SNAP_OPTION_TYPE_CREATE: + case GF_SNAP_OPTION_TYPE_DELETE: + case GF_SNAP_OPTION_TYPE_CLONE: + ret = glusterd_snap_create_use_rsp_dict(dst, src); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_RSP_DICT_USE_FAIL, + "Unable to use rsp dict"); + goto out; + } + break; + case GF_SNAP_OPTION_TYPE_CONFIG: + ret = glusterd_snap_config_use_rsp_dict(dst, src); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_RSP_DICT_USE_FAIL, + "Unable to use rsp dict"); + goto out; + } + break; + default: + /* copy the response dictinary's contents to the dict to be + * sent back to the cli */ + dict_copy(src, dst); + break; + } + + ret = 0; +out: + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; +} + +int +glusterd_compare_snap_time(struct cds_list_head *list1, + struct cds_list_head *list2) +{ + glusterd_snap_t *snap1 = NULL; + glusterd_snap_t *snap2 = NULL; + double diff_time = 0; + + GF_ASSERT(list1); + GF_ASSERT(list2); + + snap1 = cds_list_entry(list1, glusterd_snap_t, snap_list); + snap2 = cds_list_entry(list2, glusterd_snap_t, snap_list); + diff_time = difftime(snap1->time_stamp, snap2->time_stamp); + + return (int)diff_time; +} + +int +glusterd_compare_snap_vol_time(struct cds_list_head *list1, + struct cds_list_head *list2) +{ + glusterd_volinfo_t *snapvol1 = NULL; + glusterd_volinfo_t *snapvol2 = NULL; + double diff_time = 0; + + GF_ASSERT(list1); + GF_ASSERT(list2); + + snapvol1 = cds_list_entry(list1, glusterd_volinfo_t, snapvol_list); + snapvol2 = cds_list_entry(list2, glusterd_volinfo_t, snapvol_list); + diff_time = difftime(snapvol1->snapshot->time_stamp, + snapvol2->snapshot->time_stamp); + + return (int)diff_time; +} + +int32_t +glusterd_missed_snapinfo_new(glusterd_missed_snap_info **missed_snapinfo) +{ + glusterd_missed_snap_info *new_missed_snapinfo = NULL; + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(missed_snapinfo); + + new_missed_snapinfo = GF_CALLOC(1, sizeof(*new_missed_snapinfo), + gf_gld_mt_missed_snapinfo_t); + + if (!new_missed_snapinfo) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); + goto out; + } + + CDS_INIT_LIST_HEAD(&new_missed_snapinfo->missed_snaps); + CDS_INIT_LIST_HEAD(&new_missed_snapinfo->snap_ops); + + *missed_snapinfo = new_missed_snapinfo; + + ret = 0; + +out: + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_missed_snap_op_new(glusterd_snap_op_t **snap_op) +{ + glusterd_snap_op_t *new_snap_op = NULL; + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(snap_op); + + new_snap_op = GF_CALLOC(1, sizeof(*new_snap_op), + gf_gld_mt_missed_snapinfo_t); + + if (!new_snap_op) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); + goto out; + } + + new_snap_op->brick_num = -1; + new_snap_op->op = -1; + new_snap_op->status = -1; + CDS_INIT_LIST_HEAD(&new_snap_op->snap_ops_list); + + *snap_op = new_snap_op; + + ret = 0; +out: + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +gf_boolean_t +mntopts_exists(const char *str, const char *opts) +{ + char *dup_val = NULL; + char *savetok = NULL; + char *token = NULL; + gf_boolean_t exists = _gf_false; + + GF_ASSERT(opts); + + if (!str || !strlen(str)) + goto out; + + dup_val = gf_strdup(str); + if (!dup_val) + goto out; + + token = strtok_r(dup_val, ",", &savetok); + while (token) { + if (!strcmp(token, opts)) { + exists = _gf_true; + goto out; + } + token = strtok_r(NULL, ",", &savetok); + } + +out: + GF_FREE(dup_val); + return exists; +} + +int32_t +glusterd_mount_lvm_snapshot(glusterd_brickinfo_t *brickinfo, + char *brick_mount_path) +{ + char msg[NAME_MAX] = ""; + char mnt_opts[1024] = ""; + int32_t ret = -1; + runner_t runner = { + 0, + }; + xlator_t *this = NULL; + int32_t len = 0; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(brick_mount_path); + GF_ASSERT(brickinfo); + + runinit(&runner); + len = snprintf(msg, sizeof(msg), "mount %s %s", brickinfo->device_path, + brick_mount_path); + if (len < 0) { + strcpy(msg, "<error>"); + } + + gf_strncpy(mnt_opts, brickinfo->mnt_opts, sizeof(mnt_opts)); + + /* XFS file-system does not allow to mount file-system with duplicate + * UUID. File-system UUID of snapshot and its origin volume is same. + * Therefore to mount such a snapshot in XFS we need to pass nouuid + * option + */ + if (!strcmp(brickinfo->fstype, "xfs") && + !mntopts_exists(mnt_opts, "nouuid")) { + if (strlen(mnt_opts) > 0) + strcat(mnt_opts, ","); + strcat(mnt_opts, "nouuid"); + } + + if (strlen(mnt_opts) > 0) { + runner_add_args(&runner, "mount", "-o", mnt_opts, + brickinfo->device_path, brick_mount_path, NULL); + } else { + runner_add_args(&runner, "mount", brickinfo->device_path, + brick_mount_path, NULL); + } + + runner_log(&runner, this->name, GF_LOG_DEBUG, msg); + ret = runner_run(&runner); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_MOUNT_FAIL, + "mounting the snapshot " + "logical device %s failed (error: %s)", + brickinfo->device_path, strerror(errno)); + goto out; + } else + gf_msg_debug(this->name, 0, + "mounting the snapshot " + "logical device %s successful", + brickinfo->device_path); + +out: + gf_msg_trace(this->name, 0, "Returning with %d", ret); + return ret; +} + +gf_boolean_t +glusterd_volume_quorum_calculate(glusterd_volinfo_t *volinfo, dict_t *dict, + int down_count, gf_boolean_t first_brick_on, + int8_t snap_force, int quorum_count, + char *quorum_type, char **op_errstr, + uint32_t *op_errno) +{ + gf_boolean_t quorum_met = _gf_false; + const char err_str[] = "One or more bricks may be down."; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_VALIDATE_OR_GOTO(this->name, op_errno, out); + + if (!volinfo || !dict) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_INVALID_ENTRY, + "input parameters NULL"); + goto out; + } + + /* In a n-way replication where n >= 3 we should not take a snapshot + * if even one brick is down, irrespective of the quorum being met. + * TODO: Remove this restriction once n-way replication is + * supported with snapshot. + */ + if (down_count) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_DISCONNECTED, "%s", + err_str); + *op_errstr = gf_strdup(err_str); + *op_errno = EG_BRCKDWN; + } else { + quorum_met = _gf_true; + } + + /* TODO : Support for n-way relication in snapshot*/ +out: + return quorum_met; +} + +static int32_t +glusterd_volume_quorum_check(glusterd_volinfo_t *volinfo, int64_t index, + dict_t *dict, const char *key_prefix, + int8_t snap_force, int quorum_count, + char *quorum_type, char **op_errstr, + uint32_t *op_errno) +{ + int ret = 0; + xlator_t *this = NULL; + int64_t i = 0; + int64_t j = 0; + char key[128] = { + 0, + }; /* key_prefix is passed from above, but is really quite small */ + int keylen; + int down_count = 0; + gf_boolean_t first_brick_on = _gf_true; + glusterd_conf_t *priv = NULL; + gf_boolean_t quorum_met = _gf_false; + int distribute_subvols = 0; + int32_t brick_online = 0; + const char err_str[] = "quorum is not met"; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + GF_VALIDATE_OR_GOTO(this->name, op_errno, out); + + if (!volinfo || !dict) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_INVALID_ENTRY, + "input parameters NULL"); + goto out; + } + + if ((!glusterd_is_volume_replicate(volinfo) || + volinfo->replica_count < 3) && + (GF_CLUSTER_TYPE_DISPERSE != volinfo->type)) { + for (i = 0; i < volinfo->brick_count; i++) { + /* for a pure distribute volume, and replica volume + with replica count 2, quorum is not met if even + one of its subvolumes is down + */ + keylen = snprintf(key, sizeof(key), + "%s%" PRId64 ".brick%" PRId64 ".status", + key_prefix, index, i); + ret = dict_get_int32n(dict, key, keylen, &brick_online); + if (ret || !brick_online) { + ret = 1; + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_SERVER_QUORUM_NOT_MET, "%s", err_str); + *op_errstr = gf_strdup(err_str); + *op_errno = EG_BRCKDWN; + goto out; + } + } + ret = 0; + quorum_met = _gf_true; + } else { + distribute_subvols = volinfo->brick_count / volinfo->dist_leaf_count; + for (j = 0; j < distribute_subvols; j++) { + /* by default assume quorum is not met + TODO: Handle distributed striped replicate volumes + Currently only distributed replicate volumes are + handled. + */ + ret = 1; + quorum_met = _gf_false; + for (i = 0; i < volinfo->dist_leaf_count; i++) { + keylen = snprintf( + key, sizeof(key), "%s%" PRId64 ".brick%" PRId64 ".status", + key_prefix, index, (j * volinfo->dist_leaf_count) + i); + ret = dict_get_int32n(dict, key, keylen, &brick_online); + if (ret || !brick_online) { + if (i == 0) + first_brick_on = _gf_false; + down_count++; + } + } + + quorum_met = glusterd_volume_quorum_calculate( + volinfo, dict, down_count, first_brick_on, snap_force, + quorum_count, quorum_type, op_errstr, op_errno); + /* goto out if quorum is not met */ + if (!quorum_met) { + ret = -1; + goto out; + } + + down_count = 0; + first_brick_on = _gf_true; + } + } + + if (quorum_met) { + gf_msg_debug(this->name, 0, "volume %s is in quorum", volinfo->volname); + ret = 0; + } + +out: + return ret; +} + +static int32_t +glusterd_snap_common_quorum_calculate(glusterd_volinfo_t *volinfo, dict_t *dict, + int64_t index, const char *key_prefix, + int8_t snap_force, + gf_boolean_t snap_volume, + char **op_errstr, uint32_t *op_errno) +{ + int quorum_count = 0; + char *quorum_type = NULL; + int32_t tmp = 0; + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_VALIDATE_OR_GOTO(this->name, op_errno, out); + GF_VALIDATE_OR_GOTO(this->name, volinfo, out); + + /* for replicate volumes with replica count equal to or + greater than 3, do quorum check by getting what type + of quorum rule has been set by getting the volume + option set. If getting the option fails, then assume + default. + AFR does this: + if quorum type is "auto": + - for odd number of bricks (n), n/2 + 1 + bricks should be present + - for even number of bricks n, n/2 bricks + should be present along with the 1st + subvolume + if quorum type is not "auto": + - get the quorum count from dict with the + help of the option "cluster.quorum-count" + if the option is not there in the dict, + then assume quorum type is auto and follow + the above method. + For non replicate volumes quorum is met only if all + the bricks of the volume are online + */ + + if (GF_CLUSTER_TYPE_REPLICATE == volinfo->type) { + if (volinfo->replica_count % 2 == 0) + quorum_count = volinfo->replica_count / 2; + else + quorum_count = volinfo->replica_count / 2 + 1; + } else if (GF_CLUSTER_TYPE_DISPERSE == volinfo->type) { + quorum_count = volinfo->disperse_count - volinfo->redundancy_count; + } else { + quorum_count = volinfo->brick_count; + } + + ret = dict_get_str_sizen(volinfo->dict, "cluster.quorum-type", + &quorum_type); + if (!ret && !strcmp(quorum_type, "fixed")) { + ret = dict_get_int32_sizen(volinfo->dict, "cluster.quorum-count", &tmp); + /* if quorum-type option is not found in the + dict assume auto quorum type. i.e n/2 + 1. + The same assumption is made when quorum-count + option cannot be obtained from the dict (even + if the quorum-type option is not set to auto, + the behavior is set to the default behavior) + */ + if (!ret) { + /* for dispersed volumes, only allow quorums + equal or larger than minimum functional + value. + */ + if ((GF_CLUSTER_TYPE_DISPERSE != volinfo->type) || + (tmp >= quorum_count)) { + quorum_count = tmp; + } else { + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_QUORUM_COUNT_IGNORED, + "Ignoring small quorum-count " + "(%d) on dispersed volume", + tmp); + quorum_type = NULL; + } + } else + quorum_type = NULL; + } + + ret = glusterd_volume_quorum_check(volinfo, index, dict, key_prefix, + snap_force, quorum_count, quorum_type, + op_errstr, op_errno); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOL_NOT_FOUND, + "volume %s " + "is not in quorum", + volinfo->volname); + goto out; + } + +out: + return ret; +} + +static int32_t +glusterd_snap_quorum_check_for_clone(dict_t *dict, gf_boolean_t snap_volume, + char **op_errstr, uint32_t *op_errno) +{ + const char err_str[] = "glusterds are not in quorum"; + char key_prefix[16] = { + 0, + }; + char *snapname = NULL; + glusterd_snap_t *snap = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_volinfo_t *tmp_volinfo = NULL; + char *volname = NULL; + int64_t volcount = 0; + int64_t i = 0; + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_VALIDATE_OR_GOTO(this->name, op_errno, out); + + if (!dict) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_EMPTY, "dict is NULL"); + goto out; + } + + if (snap_volume) { + ret = dict_get_str_sizen(dict, "snapname", &snapname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "failed to " + "get snapname"); + goto out; + } + + snap = glusterd_find_snap_by_name(snapname); + if (!snap) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_NOT_FOUND, + "failed to " + "get the snapshot %s", + snapname); + ret = -1; + goto out; + } + } + + /* Do a quorum check of glusterds also. Because, the missed snapshot + * information will be saved by glusterd and if glusterds are not in + * quorum, then better fail the snapshot + */ + if (!does_gd_meet_server_quorum(this)) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SERVER_QUORUM_NOT_MET, + "%s", err_str); + *op_errstr = gf_strdup(err_str); + *op_errno = EG_NODEDWN; + ret = -1; + goto out; + } else + gf_msg_debug(this->name, 0, "glusterds are in quorum"); + + ret = dict_get_int64(dict, "volcount", &volcount); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "failed to get " + "volcount"); + goto out; + } + + for (i = 1; i <= volcount; i++) { + ret = dict_get_str_sizen(dict, "clonename", &volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "failed to " + "get clonename"); + goto out; + } + + if (snap_volume && snap) { + cds_list_for_each_entry(tmp_volinfo, &snap->volumes, vol_list) + { + if (!tmp_volinfo) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_NOT_FOUND, + "failed to get snap volume " + "for snap %s", + snapname); + ret = -1; + goto out; + } + volinfo = tmp_volinfo; + } + } else { + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, + "failed to find the volume %s", volname); + goto out; + } + } + + snprintf(key_prefix, sizeof(key_prefix), "%s", + snap_volume ? "vol" : "clone"); + + ret = glusterd_snap_common_quorum_calculate( + volinfo, dict, i, key_prefix, 0, snap_volume, op_errstr, op_errno); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOL_NOT_FOUND, + "volume %s " + "is not in quorum", + volname); + goto out; + } + } +out: + return ret; +} + +static int32_t +glusterd_snap_quorum_check_for_create(dict_t *dict, gf_boolean_t snap_volume, + char **op_errstr, uint32_t *op_errno) +{ + int8_t snap_force = 0; + int32_t force = 0; + const char err_str[] = "glusterds are not in quorum"; + char key_prefix[16] = { + 0, + }; + char *snapname = NULL; + glusterd_snap_t *snap = NULL; + glusterd_volinfo_t *volinfo = NULL; + char *volname = NULL; + int64_t volcount = 0; + char key[32] = { + 0, + }; + int64_t i = 0; + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_VALIDATE_OR_GOTO(this->name, op_errno, out); + + if (!dict) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_EMPTY, "dict is NULL"); + goto out; + } + + if (snap_volume) { + ret = dict_get_str(dict, "snapname", &snapname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "failed to " + "get snapname"); + goto out; + } + + snap = glusterd_find_snap_by_name(snapname); + if (!snap) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_NOT_FOUND, + "failed to " + "get the snapshot %s", + snapname); + ret = -1; + goto out; + } + } + + ret = dict_get_int32(dict, "flags", &force); + if (!ret && (force & GF_CLI_FLAG_OP_FORCE)) + snap_force = 1; + + /* Do a quorum check of glusterds also. Because, the missed snapshot + * information will be saved by glusterd and if glusterds are not in + * quorum, then better fail the snapshot + */ + if (!does_gd_meet_server_quorum(this)) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SERVER_QUORUM_NOT_MET, + "%s", err_str); + *op_errstr = gf_strdup(err_str); + *op_errno = EG_NODEDWN; + ret = -1; + goto out; + } else + gf_msg_debug(this->name, 0, "glusterds are in quorum"); + + ret = dict_get_int64(dict, "volcount", &volcount); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "failed to get " + "volcount"); + goto out; + } + + for (i = 1; i <= volcount; i++) { + snprintf(key, sizeof(key), "%s%" PRId64, + snap_volume ? "snap-volname" : "volname", i); + ret = dict_get_str(dict, key, &volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "failed to " + "get volname"); + goto out; + } + + if (snap_volume) { + ret = glusterd_snap_volinfo_find(volname, snap, &volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_NOT_FOUND, + "failed to get snap volume %s " + "for snap %s", + volname, snapname); + goto out; + } + } else { + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, + "failed to find the volume %s", volname); + goto out; + } + } + + snprintf(key_prefix, sizeof(key_prefix), "%s", + snap_volume ? "snap-vol" : "vol"); + + ret = glusterd_snap_common_quorum_calculate( + volinfo, dict, i, key_prefix, snap_force, snap_volume, op_errstr, + op_errno); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOL_NOT_FOUND, + "volume %s " + "is not in quorum", + volinfo->volname); + goto out; + } + } +out: + return ret; +} + +int32_t +glusterd_snap_quorum_check(dict_t *dict, gf_boolean_t snap_volume, + char **op_errstr, uint32_t *op_errno) +{ + int32_t ret = -1; + xlator_t *this = NULL; + int32_t snap_command = 0; + const char err_str[] = "glusterds are not in quorum"; + + this = THIS; + GF_ASSERT(this); + GF_VALIDATE_OR_GOTO(this->name, op_errno, out); + + if (!dict) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_EMPTY, "dict is NULL"); + goto out; + } + + ret = dict_get_int32_sizen(dict, "type", &snap_command); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "unable to get the type of " + "the snapshot command"); + goto out; + } + + switch (snap_command) { + case GF_SNAP_OPTION_TYPE_CREATE: + ret = glusterd_snap_quorum_check_for_create(dict, snap_volume, + op_errstr, op_errno); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_QUORUM_CHECK_FAIL, + "Quorum check" + "failed during snapshot create command"); + goto out; + } + break; + case GF_SNAP_OPTION_TYPE_CLONE: + ret = glusterd_snap_quorum_check_for_clone(dict, !snap_volume, + op_errstr, op_errno); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_QUORUM_CHECK_FAIL, + "Quorum check" + "failed during snapshot clone command"); + goto out; + } + break; + case GF_SNAP_OPTION_TYPE_DELETE: + case GF_SNAP_OPTION_TYPE_RESTORE: + if (!does_gd_meet_server_quorum(this)) { + ret = -1; + gf_msg(this->name, GF_LOG_WARNING, 0, + GD_MSG_SERVER_QUORUM_NOT_MET, "%s", err_str); + *op_errstr = gf_strdup(err_str); + *op_errno = EG_NODEDWN; + goto out; + } + + gf_msg_debug(this->name, 0, + "glusterds are in " + "quorum"); + break; + default: + break; + } + + ret = 0; + +out: + return ret; +} + +int +glusterd_is_path_mounted(const char *path) +{ + FILE *mtab = NULL; + struct mntent *part = NULL; + int is_mounted = 0; + + if ((mtab = setmntent("/etc/mtab", "r")) != NULL) { + while ((part = getmntent(mtab)) != NULL) { + if ((part->mnt_fsname != NULL) && + (strcmp(part->mnt_dir, path)) == 0) { + is_mounted = 1; + break; + } + } + endmntent(mtab); + } + return is_mounted; +} +/* This function will do unmount for snaps. + */ +int32_t +glusterd_snap_unmount(xlator_t *this, glusterd_volinfo_t *volinfo) +{ + char *brick_mount_path = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + int32_t ret = -1; + int retry_count = 0; + + GF_ASSERT(this); + GF_ASSERT(volinfo); + + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + /* If the brick is not of this node, we continue */ + if (gf_uuid_compare(brickinfo->uuid, MY_UUID)) { + continue; + } + /* If snapshot is pending, we continue */ + if (brickinfo->snap_status == -1) { + continue; + } + + ret = glusterd_find_brick_mount_path(brickinfo->path, + &brick_mount_path); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRK_MNTPATH_GET_FAIL, + "Failed to find brick_mount_path for %s", brickinfo->path); + goto out; + } + /* unmount cannot be done when the brick process is still in + * the process of shutdown, so give three re-tries + */ + retry_count = 0; + while (retry_count <= 2) { + retry_count++; + /* umount2 system call doesn't cleanup mtab entry + * after un-mount, using external umount command. + */ + ret = glusterd_umount(brick_mount_path); + if (!ret) + break; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLUSTERD_UMOUNT_FAIL, + "umount failed " + "for path %s (brick: %s): %s. Retry(%d)", + brick_mount_path, brickinfo->path, strerror(errno), + retry_count); + sleep(3); + } + } + +out: + if (brick_mount_path) + GF_FREE(brick_mount_path); + + return ret; +} + +int32_t +glusterd_umount(const char *path) +{ + char msg[NAME_MAX] = ""; + int32_t ret = -1; + runner_t runner = { + 0, + }; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(path); + + if (!glusterd_is_path_mounted(path)) { + return 0; + } + + runinit(&runner); + snprintf(msg, sizeof(msg), "umount path %s", path); + runner_add_args(&runner, _PATH_UMOUNT, "-f", path, NULL); + runner_log(&runner, this->name, GF_LOG_DEBUG, msg); + ret = runner_run(&runner); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_GLUSTERD_UMOUNT_FAIL, + "umounting %s failed (%s)", path, strerror(errno)); + + gf_msg_trace(this->name, 0, "Returning with %d", ret); + return ret; +} + +int32_t +glusterd_copy_file(const char *source, const char *destination) +{ + int32_t ret = -1; + xlator_t *this = NULL; + char buffer[1024] = ""; + int src_fd = -1; + int dest_fd = -1; + int read_len = -1; + struct stat stbuf = { + 0, + }; + mode_t dest_mode = 0; + + this = THIS; + GF_ASSERT(this); + + GF_ASSERT(source); + GF_ASSERT(destination); + + /* Here is stat is made to get the file permission of source file*/ + ret = sys_lstat(source, &stbuf); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "%s not found", source); + goto out; + } + + dest_mode = stbuf.st_mode & 0777; + + src_fd = open(source, O_RDONLY); + if (src_fd == -1) { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "Unable to open file %s", source); + goto out; + } + + dest_fd = sys_creat(destination, dest_mode); + if (dest_fd < 0) { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED, + "Unble to open a file %s", destination); + goto out; + } + + do { + ret = sys_read(src_fd, buffer, sizeof(buffer)); + if (ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "Error reading file " + "%s", + source); + goto out; + } + read_len = ret; + if (read_len == 0) + break; + + ret = sys_write(dest_fd, buffer, read_len); + if (ret != read_len) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED, + "Writing in " + "file %s failed with error %s", + destination, strerror(errno)); + goto out; + } + } while (ret > 0); +out: + if (src_fd != -1) + sys_close(src_fd); + + if (dest_fd > 0) + sys_close(dest_fd); + return ret; +} + +int32_t +glusterd_copy_folder(const char *source, const char *destination) +{ + int32_t ret = -1; + xlator_t *this = NULL; + DIR *dir_ptr = NULL; + struct dirent *entry = NULL; + struct dirent scratch[2] = { + { + 0, + }, + }; + char src_path[PATH_MAX] = { + 0, + }; + char dest_path[PATH_MAX] = { + 0, + }; + + this = THIS; + GF_ASSERT(this); + + GF_ASSERT(source); + GF_ASSERT(destination); + + dir_ptr = sys_opendir(source); + if (!dir_ptr) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED, + "Unable to open %s", source); + goto out; + } + + for (;;) { + errno = 0; + entry = sys_readdir(dir_ptr, scratch); + if (!entry || errno != 0) + break; + + if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0) + continue; + ret = snprintf(src_path, sizeof(src_path), "%s/%s", source, + entry->d_name); + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); + goto out; + } + + ret = snprintf(dest_path, sizeof(dest_path), "%s/%s", destination, + entry->d_name); + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); + goto out; + } + + ret = glusterd_copy_file(src_path, dest_path); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "Could not copy " + "%s to %s", + src_path, dest_path); + goto out; + } + } +out: + if (dir_ptr) + (void)sys_closedir(dir_ptr); + + return ret; +} + +int32_t +glusterd_get_geo_rep_session(char *slave_key, char *origin_volname, + dict_t *gsync_slaves_dict, char *session, + char *slave) +{ + int32_t ret = -1; + int32_t len = 0; + char *token = NULL; + char *tok = NULL; + char *temp = NULL; + char *ip = NULL; + char *ip_i = NULL; + char *ip_temp = NULL; + char *buffer = NULL; + xlator_t *this = NULL; + char *slave_temp = NULL; + char *save_ptr = NULL; + + this = THIS; + GF_ASSERT(this); + + GF_ASSERT(slave_key); + GF_ASSERT(origin_volname); + GF_ASSERT(gsync_slaves_dict); + + ret = dict_get_str(gsync_slaves_dict, slave_key, &buffer); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to " + "get value for key %s", + slave_key); + goto out; + } + + temp = gf_strdup(buffer); + if (!temp) { + ret = -1; + goto out; + } + + /* geo-rep session string format being parsed: + * "master_node_uuid:ssh://slave_host::slave_vol:slave_voluuid" + */ + token = strtok_r(temp, "/", &save_ptr); + + token = strtok_r(NULL, ":", &save_ptr); + if (!token) { + ret = -1; + goto out; + } + token++; + + ip = gf_strdup(token); + if (!ip) { + ret = -1; + goto out; + } + ip_i = ip; + + token = strtok_r(NULL, ":", &save_ptr); + if (!token) { + ret = -1; + goto out; + } + + slave_temp = gf_strdup(token); + if (!slave) { + ret = -1; + goto out; + } + + /* If 'ip' has 'root@slavehost', point to 'slavehost' as + * working directory for root users are created without + * 'root@' */ + ip_temp = gf_strdup(ip); + tok = strtok_r(ip_temp, "@", &save_ptr); + len = strlen(tok); + tok = strtok_r(NULL, "@", &save_ptr); + if (tok != NULL) + ip_i = ip + len + 1; + + ret = snprintf(session, PATH_MAX, "%s_%s_%s", origin_volname, ip_i, + slave_temp); + if (ret < 0) /* Negative value is an error */ + goto out; + + ret = snprintf(slave, PATH_MAX, "%s::%s", ip, slave_temp); + if (ret < 0) { + goto out; + } + + ret = 0; /* Success */ + +out: + if (temp) + GF_FREE(temp); + + if (ip) + GF_FREE(ip); + + if (ip_temp) + GF_FREE(ip_temp); + + if (slave_temp) + GF_FREE(slave_temp); + + return ret; +} + +int32_t +glusterd_copy_quota_files(glusterd_volinfo_t *src_vol, + glusterd_volinfo_t *dest_vol, + gf_boolean_t *conf_present) +{ + int32_t ret = -1; + char src_dir[PATH_MAX] = ""; + char dest_dir[PATH_MAX] = ""; + char src_path[PATH_MAX] = ""; + char dest_path[PATH_MAX] = ""; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + struct stat stbuf = { + 0, + }; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + GF_ASSERT(src_vol); + GF_ASSERT(dest_vol); + + GLUSTERD_GET_VOLUME_DIR(src_dir, src_vol, priv); + + GLUSTERD_GET_VOLUME_DIR(dest_dir, dest_vol, priv); + + ret = snprintf(src_path, sizeof(src_path), "%s/quota.conf", src_dir); + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); + goto out; + } + + /* quota.conf is not present if quota is not enabled, Hence ignoring + * the absence of this file + */ + ret = sys_lstat(src_path, &stbuf); + if (ret) { + ret = 0; + gf_msg_debug(this->name, 0, "%s not found", src_path); + goto out; + } + + ret = snprintf(dest_path, sizeof(dest_path), "%s/quota.conf", dest_dir); + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); + goto out; + } + + ret = glusterd_copy_file(src_path, dest_path); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "Failed to copy %s in %s", src_path, dest_path); + goto out; + } + + ret = snprintf(src_path, sizeof(src_path), "%s/quota.cksum", src_dir); + if (ret < 0) + goto out; + + /* if quota.conf is present, quota.cksum has to be present. * + * Fail snapshot operation if file is absent * + */ + ret = sys_lstat(src_path, &stbuf); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_NOT_FOUND, + "%s not found", src_path); + goto out; + } + + ret = snprintf(dest_path, sizeof(dest_path), "%s/quota.cksum", dest_dir); + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); + goto out; + } + + ret = glusterd_copy_file(src_path, dest_path); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "Failed to copy %s in %s", src_path, dest_path); + goto out; + } + + *conf_present = _gf_true; +out: + return ret; +} + +/* * + * Here there are two possibilities, either destination is snaphot or + * clone. In the case of snapshot nfs_ganesha export file will be copied + * to snapdir. If it is clone , then new export file will be created for + * the clone in the GANESHA_EXPORT_DIRECTORY, replacing occurences of + * volname with clonename + */ +int +glusterd_copy_nfs_ganesha_file(glusterd_volinfo_t *src_vol, + glusterd_volinfo_t *dest_vol) +{ + int32_t ret = -1; + char snap_dir[PATH_MAX] = { + 0, + }; + char src_path[PATH_MAX] = { + 0, + }; + char dest_path[PATH_MAX] = { + 0, + }; + char buffer[BUFSIZ] = { + 0, + }; + char *find_ptr = NULL; + char *buff_ptr = NULL; + char *tmp_ptr = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + struct stat stbuf = { + 0, + }; + FILE *src = NULL; + FILE *dest = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO("snapshot", this, out); + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, out); + + GF_VALIDATE_OR_GOTO(this->name, src_vol, out); + GF_VALIDATE_OR_GOTO(this->name, dest_vol, out); + + if (glusterd_check_ganesha_export(src_vol) == _gf_false) { + gf_msg_debug(this->name, 0, + "%s is not exported via " + "NFS-Ganesha. Skipping copy of export conf.", + src_vol->volname); + ret = 0; + goto out; + } + + if (src_vol->is_snap_volume) { + GLUSTERD_GET_SNAP_DIR(snap_dir, src_vol->snapshot, priv); + ret = snprintf(src_path, PATH_MAX, "%s/export.%s.conf", snap_dir, + src_vol->snapshot->snapname); + } else { + ret = snprintf(src_path, PATH_MAX, "%s/export.%s.conf", + GANESHA_EXPORT_DIRECTORY, src_vol->volname); + } + if (ret < 0 || ret >= PATH_MAX) + goto out; + + ret = sys_lstat(src_path, &stbuf); + if (ret) { + /* + * This code path is hit, only when the src_vol is being * + * exported via NFS-Ganesha. So if the conf file is not * + * available, we fail the snapshot operation. * + */ + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "Stat on %s failed with %s", src_path, strerror(errno)); + goto out; + } + + if (dest_vol->is_snap_volume) { + memset(snap_dir, 0, PATH_MAX); + GLUSTERD_GET_SNAP_DIR(snap_dir, dest_vol->snapshot, priv); + ret = snprintf(dest_path, sizeof(dest_path), "%s/export.%s.conf", + snap_dir, dest_vol->snapshot->snapname); + if (ret < 0) + goto out; + + ret = glusterd_copy_file(src_path, dest_path); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "Failed to copy %s in %s", src_path, dest_path); + goto out; + } + + } else { + ret = snprintf(dest_path, sizeof(dest_path), "%s/export.%s.conf", + GANESHA_EXPORT_DIRECTORY, dest_vol->volname); + if (ret < 0) + goto out; + + src = fopen(src_path, "r"); + dest = fopen(dest_path, "w"); + + if (!src || !dest) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED, + "Failed to open %s", dest ? src_path : dest_path); + ret = -1; + goto out; + } + + /* * + * if the source volume is snapshot, the export conf file + * consists of orginal volname + */ + if (src_vol->is_snap_volume) + find_ptr = gf_strdup(src_vol->parent_volname); + else + find_ptr = gf_strdup(src_vol->volname); + + if (!find_ptr) + goto out; + + /* Replacing volname with clonename */ + while (fgets(buffer, BUFSIZ, src)) { + buff_ptr = buffer; + while ((tmp_ptr = strstr(buff_ptr, find_ptr))) { + while (buff_ptr < tmp_ptr) + fputc((int)*buff_ptr++, dest); + fputs(dest_vol->volname, dest); + buff_ptr += strlen(find_ptr); + } + fputs(buff_ptr, dest); + memset(buffer, 0, BUFSIZ); + } + } +out: + if (src) + fclose(src); + if (dest) + fclose(dest); + if (find_ptr) + GF_FREE(find_ptr); + + return ret; +} + +int32_t +glusterd_restore_geo_rep_files(glusterd_volinfo_t *snap_vol) +{ + int32_t ret = -1; + char src_path[PATH_MAX] = ""; + char dest_path[PATH_MAX] = ""; + xlator_t *this = NULL; + char *origin_volname = NULL; + glusterd_volinfo_t *origin_vol = NULL; + int i = 0; + char key[32] = ""; + char session[PATH_MAX] = ""; + char slave[PATH_MAX] = ""; + char snapgeo_dir[PATH_MAX] = ""; + glusterd_conf_t *priv = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + GF_ASSERT(snap_vol); + + origin_volname = gf_strdup(snap_vol->parent_volname); + if (!origin_volname) { + ret = -1; + goto out; + } + + ret = glusterd_volinfo_find(origin_volname, &origin_vol); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, + "Unable to fetch " + "volinfo for volname %s", + origin_volname); + goto out; + } + + for (i = 1; i <= snap_vol->gsync_slaves->count; i++) { + ret = snprintf(key, sizeof(key), "slave%d", i); + if (ret < 0) { + goto out; + } + + /* "origin_vol" is used here because geo-replication saves + * the session in the form of master_ip_slave. + * As we need the master volume to be same even after + * restore, we are passing the origin volume name. + * + * "snap_vol->gsync_slaves" contain the slave information + * when the snapshot was taken, hence we have to restore all + * those slaves information when we do snapshot restore. + */ + ret = glusterd_get_geo_rep_session( + key, origin_vol->volname, snap_vol->gsync_slaves, session, slave); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GEOREP_GET_FAILED, + "Failed to get geo-rep session"); + goto out; + } + + GLUSTERD_GET_SNAP_GEO_REP_DIR(snapgeo_dir, snap_vol->snapshot, priv); + ret = snprintf(src_path, sizeof(src_path), "%s/%s", snapgeo_dir, + session); + if (ret < 0) + goto out; + + ret = snprintf(dest_path, sizeof(dest_path), "%s/%s/%s", priv->workdir, + GEOREP, session); + if (ret < 0) + goto out; + + ret = glusterd_copy_folder(src_path, dest_path); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DIR_OP_FAILED, + "Could not copy " + "%s to %s", + src_path, dest_path); + goto out; + } + } +out: + if (origin_volname) + GF_FREE(origin_volname); + + return ret; +} + +int +glusterd_restore_nfs_ganesha_file(glusterd_volinfo_t *src_vol, + glusterd_snap_t *snap) +{ + int32_t ret = -1; + char snap_dir[PATH_MAX] = ""; + char src_path[PATH_MAX] = ""; + char dest_path[PATH_MAX] = ""; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + struct stat stbuf = { + 0, + }; + + this = THIS; + GF_VALIDATE_OR_GOTO("snapshot", this, out); + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, out); + + GF_VALIDATE_OR_GOTO(this->name, src_vol, out); + GF_VALIDATE_OR_GOTO(this->name, snap, out); + + GLUSTERD_GET_SNAP_DIR(snap_dir, snap, priv); + + ret = snprintf(src_path, sizeof(src_path), "%s/export.%s.conf", snap_dir, + snap->snapname); + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); + goto out; + } + + ret = sys_lstat(src_path, &stbuf); + if (ret) { + if (errno == ENOENT) { + ret = 0; + gf_msg_debug(this->name, 0, "%s not found", src_path); + } else + gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED, + "Stat on %s failed with %s", src_path, strerror(errno)); + goto out; + } + + ret = snprintf(dest_path, sizeof(dest_path), "%s/export.%s.conf", + GANESHA_EXPORT_DIRECTORY, src_vol->volname); + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); + goto out; + } + + ret = glusterd_copy_file(src_path, dest_path); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "Failed to copy %s in %s", src_path, dest_path); + +out: + return ret; +} + +/* Snapd functions */ +int +glusterd_is_snapd_enabled(glusterd_volinfo_t *volinfo) +{ + int ret = 0; + xlator_t *this = THIS; + + ret = dict_get_str_boolean(volinfo->dict, "features.uss", -2); + if (ret == -2) { + gf_msg_debug(this->name, 0, + "Key features.uss not " + "present in the dict for volume %s", + volinfo->volname); + ret = 0; + + } else if (ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get 'features.uss'" + " from dict for volume %s", + volinfo->volname); + } + + return ret; +} + +int32_t +glusterd_is_snap_soft_limit_reached(glusterd_volinfo_t *volinfo, dict_t *dict) +{ + int32_t ret = -1; + uint64_t opt_max_hard = GLUSTERD_SNAPS_MAX_HARD_LIMIT; + uint64_t opt_max_soft = GLUSTERD_SNAPS_DEF_SOFT_LIMIT_PERCENT; + uint64_t limit = 0; + int auto_delete = 0; + uint64_t effective_max_limit = 0; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + + GF_ASSERT(volinfo); + GF_ASSERT(dict); + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + /* config values snap-max-hard-limit and snap-max-soft-limit are + * optional and hence we are not erroring out if values are not + * present + */ + gd_get_snap_conf_values_if_present(priv->opts, &opt_max_hard, + &opt_max_soft); + + /* "auto-delete" might not be set by user explicitly, + * in that case it's better to consider the default value. + * Hence not erroring out if Key is not found. + */ + auto_delete = dict_get_str_boolean( + priv->opts, GLUSTERD_STORE_KEY_SNAP_AUTO_DELETE, _gf_false); + + if (volinfo->snap_max_hard_limit < opt_max_hard) + effective_max_limit = volinfo->snap_max_hard_limit; + else + effective_max_limit = opt_max_hard; + + limit = (opt_max_soft * effective_max_limit) / 100; + + if (volinfo->snap_count >= limit && auto_delete != _gf_true) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SOFT_LIMIT_REACHED, + "Soft-limit " + "(value = %" PRIu64 + ") of volume %s is reached. " + "Snapshot creation is not possible once effective " + "hard-limit (value = %" PRIu64 ") is reached.", + limit, volinfo->volname, effective_max_limit); + + ret = dict_set_int8(dict, "soft-limit-reach", _gf_true); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to " + "set soft limit exceed flag in " + "response dictionary"); + } + + goto out; + } + ret = 0; +out: + return ret; +} + +/* This function initializes the parameter sys_hard_limit, + * sys_soft_limit and auto_delete value to the value set + * in dictionary, If value is not present then it is + * initialized to default values. Hence this function does not + * return any values. + */ +void +gd_get_snap_conf_values_if_present(dict_t *dict, uint64_t *sys_hard_limit, + uint64_t *sys_soft_limit) +{ + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + GF_ASSERT(dict); + + /* "snap-max-hard-limit" might not be set by user explicitly, + * in that case it's better to consider the default value. + * Hence not erroring out if Key is not found. + */ + if (dict_get_uint64(dict, GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT, + sys_hard_limit)) { + gf_msg_debug(this->name, 0, + "%s is not present in" + "dictionary", + GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT); + } + + /* "snap-max-soft-limit" might not be set by user explicitly, + * in that case it's better to consider the default value. + * Hence not erroring out if Key is not found. + */ + if (dict_get_uint64(dict, GLUSTERD_STORE_KEY_SNAP_MAX_SOFT_LIMIT, + sys_soft_limit)) { + gf_msg_debug(this->name, 0, + "%s is not present in" + "dictionary", + GLUSTERD_STORE_KEY_SNAP_MAX_SOFT_LIMIT); + } +} + +int +glusterd_get_snap_status_str(glusterd_snap_t *snapinfo, char *snap_status_str) +{ + int ret = -1; + + GF_VALIDATE_OR_GOTO(THIS->name, snapinfo, out); + GF_VALIDATE_OR_GOTO(THIS->name, snap_status_str, out); + + switch (snapinfo->snap_status) { + case GD_SNAP_STATUS_NONE: + sprintf(snap_status_str, "%s", "none"); + break; + case GD_SNAP_STATUS_INIT: + sprintf(snap_status_str, "%s", "init"); + break; + case GD_SNAP_STATUS_IN_USE: + sprintf(snap_status_str, "%s", "in_use"); + break; + case GD_SNAP_STATUS_DECOMMISSION: + sprintf(snap_status_str, "%s", "decommissioned"); + break; + case GD_SNAP_STATUS_UNDER_RESTORE: + sprintf(snap_status_str, "%s", "under_restore"); + break; + case GD_SNAP_STATUS_RESTORED: + sprintf(snap_status_str, "%s", "restored"); + break; + default: + goto out; + } + ret = 0; +out: + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.h b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.h new file mode 100644 index 00000000000..5762999bba7 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.h @@ -0,0 +1,169 @@ +/* + Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _GLUSTERD_SNAP_UTILS_H +#define _GLUSTERD_SNAP_UTILS_H + +#define GLUSTERD_GET_SNAP_DIR(path, snap, priv) \ + do { \ + int32_t _snap_dir_len; \ + _snap_dir_len = snprintf(path, PATH_MAX, "%s/snaps/%s", priv->workdir, \ + snap->snapname); \ + if ((_snap_dir_len < 0) || (_snap_dir_len >= PATH_MAX)) { \ + path[0] = 0; \ + } \ + } while (0) + +int32_t +glusterd_snap_volinfo_find(char *volname, glusterd_snap_t *snap, + glusterd_volinfo_t **volinfo); + +int32_t +glusterd_snap_volinfo_find_from_parent_volname(char *origin_volname, + glusterd_snap_t *snap, + glusterd_volinfo_t **volinfo); + +int +glusterd_snap_volinfo_find_by_volume_id(uuid_t volume_id, + glusterd_volinfo_t **volinfo); + +int32_t +glusterd_add_snapd_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict, + int32_t count); + +int +glusterd_compare_snap_time(struct cds_list_head *, struct cds_list_head *); + +int +glusterd_compare_snap_vol_time(struct cds_list_head *, struct cds_list_head *); + +int32_t +glusterd_snap_volinfo_restore(dict_t *dict, dict_t *rsp_dict, + glusterd_volinfo_t *new_volinfo, + glusterd_volinfo_t *snap_volinfo, + int32_t volcount); +int32_t +glusterd_snapobject_delete(glusterd_snap_t *snap); + +int32_t +glusterd_cleanup_snaps_for_volume(glusterd_volinfo_t *volinfo); + +int32_t +glusterd_missed_snapinfo_new(glusterd_missed_snap_info **missed_snapinfo); + +int32_t +glusterd_missed_snap_op_new(glusterd_snap_op_t **snap_op); + +int32_t +glusterd_add_missed_snaps_to_dict(dict_t *rsp_dict, + glusterd_volinfo_t *snap_vol, + glusterd_brickinfo_t *brickinfo, + int32_t brick_number, int32_t op); + +int32_t +glusterd_add_missed_snaps_to_export_dict(dict_t *peer_data); + +int32_t +glusterd_import_friend_missed_snap_list(dict_t *peer_data); + +int +gd_restore_snap_volume(dict_t *dict, dict_t *rsp_dict, + glusterd_volinfo_t *orig_vol, + glusterd_volinfo_t *snap_vol, int32_t volcount); + +int32_t +glusterd_mount_lvm_snapshot(glusterd_brickinfo_t *brickinfo, + char *brick_mount_path); + +int32_t +glusterd_umount(const char *path); + +int32_t +glusterd_snap_unmount(xlator_t *this, glusterd_volinfo_t *volinfo); + +int32_t +glusterd_add_snapshots_to_export_dict(dict_t *peer_data); + +int32_t +glusterd_compare_friend_snapshots(dict_t *peer_data, char *peername, + uuid_t peerid); + +int32_t +glusterd_store_create_snap_dir(glusterd_snap_t *snap); + +int32_t +glusterd_copy_file(const char *source, const char *destination); + +int32_t +glusterd_copy_folder(const char *source, const char *destination); + +int32_t +glusterd_get_geo_rep_session(char *slave_key, char *origin_volname, + dict_t *gsync_slaves_dict, char *session, + char *slave); + +int32_t +glusterd_restore_geo_rep_files(glusterd_volinfo_t *snap_vol); + +int32_t +glusterd_copy_quota_files(glusterd_volinfo_t *src_vol, + glusterd_volinfo_t *dest_vol, + gf_boolean_t *conf_present); + +int +glusterd_snap_use_rsp_dict(dict_t *aggr, dict_t *rsp_dict); + +int +gd_add_vol_snap_details_to_dict(dict_t *dict, char *prefix, + glusterd_volinfo_t *volinfo); + +int +gd_add_brick_snap_details_to_dict(dict_t *dict, char *prefix, + glusterd_brickinfo_t *brickinfo); + +int +gd_import_new_brick_snap_details(dict_t *dict, char *prefix, + glusterd_brickinfo_t *brickinfo); + +int +gd_import_volume_snap_details(dict_t *dict, glusterd_volinfo_t *volinfo, + char *prefix, char *volname); + +int32_t +glusterd_snap_quorum_check(dict_t *dict, gf_boolean_t snap_volume, + char **op_errstr, uint32_t *op_errno); + +int32_t +glusterd_snap_brick_create(glusterd_volinfo_t *snap_volinfo, + glusterd_brickinfo_t *brickinfo, int32_t brick_count, + int32_t clone); + +int +glusterd_snapshot_restore_cleanup(dict_t *rsp_dict, char *volname, + glusterd_snap_t *snap); + +void +glusterd_get_snapd_dir(glusterd_volinfo_t *volinfo, char *path, int path_len); + +int +glusterd_is_snapd_enabled(glusterd_volinfo_t *volinfo); + +int32_t +glusterd_check_and_set_config_limit(glusterd_conf_t *priv); + +int32_t +glusterd_is_snap_soft_limit_reached(glusterd_volinfo_t *volinfo, dict_t *dict); + +void +gd_get_snap_conf_values_if_present(dict_t *opts, uint64_t *sys_hard_limit, + uint64_t *sys_soft_limit); +int +glusterd_get_snap_status_str(glusterd_snap_t *snapinfo, char *snap_status_str); + +#endif diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot.c b/xlators/mgmt/glusterd/src/glusterd-snapshot.c new file mode 100644 index 00000000000..aeaa8d15214 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-snapshot.c @@ -0,0 +1,10087 @@ +/* + Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#include <inttypes.h> +#include <sys/types.h> +#include <unistd.h> +#include <sys/resource.h> +#include <sys/statvfs.h> +#include <sys/mount.h> +#include <signal.h> +#include "glusterd-messages.h" +#include "glusterd-errno.h" + +#if defined(GF_LINUX_HOST_OS) +#include <mntent.h> +#else +#include "mntent_compat.h" +#endif + +#ifdef __NetBSD__ +#define umount2(dir, flags) unmount(dir, ((flags) != 0) ? MNT_FORCE : 0) +#endif + +#if defined(GF_DARWIN_HOST_OS) || defined(__FreeBSD__) +#include <sys/param.h> +#include <sys/mount.h> +#define umount2(dir, flags) unmount(dir, ((flags) != 0) ? MNT_FORCE : 0) +#endif + +#include <regex.h> + +#include <glusterfs/compat.h> +#include "protocol-common.h" +#include <glusterfs/xlator.h> +#include <glusterfs/logging.h> +#include <glusterfs/timer.h> +#include "glusterd-mem-types.h" +#include "glusterd.h" +#include "glusterd-sm.h" +#include "glusterd-op-sm.h" +#include "glusterd-utils.h" +#include "glusterd-store.h" +#include <glusterfs/run.h> +#include "glusterd-volgen.h" +#include "glusterd-mgmt.h" +#include "glusterd-syncop.h" +#include "glusterd-snapshot-utils.h" +#include "glusterd-snapd-svc.h" + +#include "glusterfs3.h" + +#include <glusterfs/syscall.h> +#include "cli1-xdr.h" +#include "xdr-generic.h" + +#include <glusterfs/lvm-defaults.h> +#include <glusterfs/events.h> + +#define GLUSTERD_GET_UUID_NOHYPHEN(ret_string, uuid) \ + do { \ + char *snap_volname_ptr = ret_string; \ + char tmp_uuid[64]; \ + char *snap_volid_ptr = uuid_utoa_r(uuid, tmp_uuid); \ + while (*snap_volid_ptr) { \ + if (*snap_volid_ptr == '-') { \ + snap_volid_ptr++; \ + } else { \ + (*snap_volname_ptr++) = (*snap_volid_ptr++); \ + } \ + } \ + *snap_volname_ptr = '\0'; \ + } while (0) + +char snap_mount_dir[VALID_GLUSTERD_PATHMAX]; +struct snap_create_args_ { + xlator_t *this; + dict_t *dict; + dict_t *rsp_dict; + glusterd_volinfo_t *snap_vol; + glusterd_brickinfo_t *brickinfo; + struct syncargs *args; + int32_t volcount; + int32_t brickcount; + int32_t brickorder; +}; + +/* This structure is used to store unsupported options and their values + * for snapshotted volume. + */ +struct gd_snap_unsupported_opt_t { + char *key; + char *value; +}; + +typedef struct snap_create_args_ snap_create_args_t; + +/* This function is called to get the device path of the snap lvm. Usually + if /dev/mapper/<group-name>-<lvm-name> is the device for the lvm, + then the snap device will be /dev/<group-name>/<snapname>. + This function takes care of building the path for the snap device. +*/ + +char * +glusterd_build_snap_device_path(char *device, char *snapname, + int32_t brickcount) +{ + char snap[PATH_MAX] = ""; + char msg[1024] = ""; + char volgroup[PATH_MAX] = ""; + char *snap_device = NULL; + xlator_t *this = NULL; + runner_t runner = { + 0, + }; + char *ptr = NULL; + int ret = -1; + + this = THIS; + GF_ASSERT(this); + if (!device) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, + "device is NULL"); + goto out; + } + if (!snapname) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, + "snapname is NULL"); + goto out; + } + + runinit(&runner); + runner_add_args(&runner, "lvs", "--noheadings", "-o", "vg_name", device, + NULL); + runner_redir(&runner, STDOUT_FILENO, RUN_PIPE); + snprintf(msg, sizeof(msg), "Get volume group for device %s", device); + runner_log(&runner, this->name, GF_LOG_DEBUG, msg); + ret = runner_start(&runner); + if (ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_VG_GET_FAIL, + "Failed to get volume group " + "for device %s", + device); + runner_end(&runner); + goto out; + } + ptr = fgets(volgroup, sizeof(volgroup), + runner_chio(&runner, STDOUT_FILENO)); + if (!ptr || !strlen(volgroup)) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_VG_GET_FAIL, + "Failed to get volume group " + "for snap %s", + snapname); + runner_end(&runner); + ret = -1; + goto out; + } + runner_end(&runner); + + snprintf(snap, sizeof(snap), "/dev/%s/%s_%d", gf_trim(volgroup), snapname, + brickcount); + snap_device = gf_strdup(snap); + if (!snap_device) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, GD_MSG_NO_MEMORY, + "Cannot copy the snapshot device name for snapname: %s", + snapname); + } + +out: + return snap_device; +} + +/* Look for disconnected peers, for missed snap creates or deletes */ +static int32_t +glusterd_find_missed_snap(dict_t *rsp_dict, glusterd_volinfo_t *vol, + struct cds_list_head *peers, int32_t op) +{ + int32_t brick_count = -1; + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(rsp_dict); + GF_ASSERT(peers); + GF_ASSERT(vol); + + brick_count = 0; + cds_list_for_each_entry(brickinfo, &vol->bricks, brick_list) + { + if (!gf_uuid_compare(brickinfo->uuid, MY_UUID)) { + /* If the brick belongs to the same node */ + brick_count++; + continue; + } + + RCU_READ_LOCK; + cds_list_for_each_entry_rcu(peerinfo, peers, uuid_list) + { + if (gf_uuid_compare(peerinfo->uuid, brickinfo->uuid)) { + /* If the brick doesn't belong to this peer */ + continue; + } + + /* Found peer who owns the brick, * + * if peer is not connected or not * + * friend add it to missed snap list */ + if (!(peerinfo->connected) || + (peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED)) { + ret = glusterd_add_missed_snaps_to_dict( + rsp_dict, vol, brickinfo, brick_count + 1, op); + if (ret) { + RCU_READ_UNLOCK; + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_MISSED_SNAP_CREATE_FAIL, + "Failed to add missed snapshot " + "info for %s:%s in the " + "rsp_dict", + brickinfo->hostname, brickinfo->path); + goto out; + } + } + } + RCU_READ_UNLOCK; + brick_count++; + } + + ret = 0; +out: + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +int +snap_max_limits_display_commit(dict_t *rsp_dict, char *volname, char *op_errstr, + int len) +{ + char err_str[PATH_MAX] = ""; + char key[64] = ""; + int keylen; + glusterd_conf_t *conf = NULL; + glusterd_volinfo_t *volinfo = NULL; + int ret = -1; + uint64_t active_hard_limit = 0; + uint64_t snap_max_limit = 0; + uint64_t soft_limit_value = -1; + uint64_t count = 0; + xlator_t *this = NULL; + uint64_t opt_hard_max = GLUSTERD_SNAPS_MAX_HARD_LIMIT; + uint64_t opt_soft_max = GLUSTERD_SNAPS_DEF_SOFT_LIMIT_PERCENT; + char *auto_delete = "disable"; + char *snap_activate = "disable"; + + this = THIS; + + GF_ASSERT(this); + GF_ASSERT(rsp_dict); + GF_ASSERT(op_errstr); + + conf = this->private; + + GF_ASSERT(conf); + + /* config values snap-max-hard-limit and snap-max-soft-limit are + * optional and hence we are not erroring out if values are not + * present + */ + gd_get_snap_conf_values_if_present(conf->opts, &opt_hard_max, + &opt_soft_max); + + if (!volname) { + /* For system limit */ + cds_list_for_each_entry(volinfo, &conf->volumes, vol_list) + { + if (volinfo->is_snap_volume == _gf_true) + continue; + + snap_max_limit = volinfo->snap_max_hard_limit; + if (snap_max_limit > opt_hard_max) + active_hard_limit = opt_hard_max; + else + active_hard_limit = snap_max_limit; + + soft_limit_value = (opt_soft_max * active_hard_limit) / 100; + + keylen = snprintf(key, sizeof(key), "volume%" PRId64 "-volname", + count); + ret = dict_set_strn(rsp_dict, key, keylen, volinfo->volname); + if (ret) { + len = snprintf(err_str, PATH_MAX, "Failed to set %s", key); + if (len < 0) { + strcpy(err_str, "<error>"); + } + goto out; + } + + snprintf(key, sizeof(key), "volume%" PRId64 "-snap-max-hard-limit", + count); + ret = dict_set_uint64(rsp_dict, key, snap_max_limit); + if (ret) { + len = snprintf(err_str, PATH_MAX, "Failed to set %s", key); + if (len < 0) { + strcpy(err_str, "<error>"); + } + goto out; + } + + snprintf(key, sizeof(key), "volume%" PRId64 "-active-hard-limit", + count); + ret = dict_set_uint64(rsp_dict, key, active_hard_limit); + if (ret) { + len = snprintf(err_str, PATH_MAX, "Failed to set %s", key); + if (len < 0) { + strcpy(err_str, "<error>"); + } + goto out; + } + + snprintf(key, sizeof(key), "volume%" PRId64 "-snap-max-soft-limit", + count); + ret = dict_set_uint64(rsp_dict, key, soft_limit_value); + if (ret) { + len = snprintf(err_str, PATH_MAX, "Failed to set %s", key); + if (len < 0) { + strcpy(err_str, "<error>"); + } + goto out; + } + count++; + } + + ret = dict_set_uint64(rsp_dict, "voldisplaycount", count); + if (ret) { + snprintf(err_str, PATH_MAX, "Failed to set voldisplaycount"); + goto out; + } + } else { + /* For one volume */ + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + snprintf(err_str, PATH_MAX, + "Volume (%s) does not " + "exist", + volname); + goto out; + } + + snap_max_limit = volinfo->snap_max_hard_limit; + if (snap_max_limit > opt_hard_max) + active_hard_limit = opt_hard_max; + else + active_hard_limit = snap_max_limit; + + soft_limit_value = (opt_soft_max * active_hard_limit) / 100; + + keylen = snprintf(key, sizeof(key), "volume%" PRId64 "-volname", count); + ret = dict_set_strn(rsp_dict, key, keylen, volinfo->volname); + if (ret) { + len = snprintf(err_str, PATH_MAX, "Failed to set %s", key); + if (len < 0) { + strcpy(err_str, "<error>"); + } + goto out; + } + + snprintf(key, sizeof(key), "volume%" PRId64 "-snap-max-hard-limit", + count); + ret = dict_set_uint64(rsp_dict, key, snap_max_limit); + if (ret) { + len = snprintf(err_str, PATH_MAX, "Failed to set %s", key); + if (len < 0) { + strcpy(err_str, "<error>"); + } + goto out; + } + + snprintf(key, sizeof(key), "volume%" PRId64 "-active-hard-limit", + count); + ret = dict_set_uint64(rsp_dict, key, active_hard_limit); + if (ret) { + len = snprintf(err_str, PATH_MAX, "Failed to set %s", key); + if (len < 0) { + strcpy(err_str, "<error>"); + } + goto out; + } + + snprintf(key, sizeof(key), "volume%" PRId64 "-snap-max-soft-limit", + count); + ret = dict_set_uint64(rsp_dict, key, soft_limit_value); + if (ret) { + len = snprintf(err_str, PATH_MAX, "Failed to set %s", key); + if (len < 0) { + strcpy(err_str, "<error>"); + } + goto out; + } + + count++; + + ret = dict_set_uint64(rsp_dict, "voldisplaycount", count); + if (ret) { + snprintf(err_str, PATH_MAX, "Failed to set voldisplaycount"); + goto out; + } + } + + ret = dict_set_uint64(rsp_dict, GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT, + opt_hard_max); + if (ret) { + snprintf(err_str, PATH_MAX, "Failed to set %s in response dictionary", + GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT); + goto out; + } + + ret = dict_set_uint64(rsp_dict, GLUSTERD_STORE_KEY_SNAP_MAX_SOFT_LIMIT, + opt_soft_max); + if (ret) { + snprintf(err_str, PATH_MAX, "Failed to set %s in response dictionary", + GLUSTERD_STORE_KEY_SNAP_MAX_SOFT_LIMIT); + goto out; + } + + /* "auto-delete" might not be set by user explicitly, + * in that case it's better to consider the default value. + * Hence not erroring out if Key is not found. + */ + ret = dict_get_strn(conf->opts, GLUSTERD_STORE_KEY_SNAP_AUTO_DELETE, + SLEN(GLUSTERD_STORE_KEY_SNAP_AUTO_DELETE), + &auto_delete); + + ret = dict_set_dynstr_with_alloc( + rsp_dict, GLUSTERD_STORE_KEY_SNAP_AUTO_DELETE, auto_delete); + if (ret) { + snprintf(err_str, PATH_MAX, "Failed to set %s in response dictionary", + GLUSTERD_STORE_KEY_SNAP_AUTO_DELETE); + goto out; + } + + /* "snap-activate-on-create" might not be set by user explicitly, + * in that case it's better to consider the default value. + * Hence not erroring out if Key is not found. + */ + ret = dict_get_strn(conf->opts, GLUSTERD_STORE_KEY_SNAP_ACTIVATE, + SLEN(GLUSTERD_STORE_KEY_SNAP_ACTIVATE), &snap_activate); + + ret = dict_set_dynstr_with_alloc(rsp_dict, GLUSTERD_STORE_KEY_SNAP_ACTIVATE, + snap_activate); + if (ret) { + snprintf(err_str, PATH_MAX, "Failed to set %s in response dictionary", + GLUSTERD_STORE_KEY_SNAP_ACTIVATE); + goto out; + } + + ret = 0; +out: + if (ret) { + strncpy(op_errstr, err_str, len); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "%s", + err_str); + } + return ret; +} + +/* Third argument of scandir(used in glusterd_copy_geo_rep_session_files) + * is filter function. As we don't want "." and ".." files present in the + * directory, we are excliding these 2 files. + * "file_select" function here does the job of filtering. + */ +int +file_select(const struct dirent *entry) +{ + if (entry == NULL) + return (FALSE); + + if ((strcmp(entry->d_name, ".") == 0) || (strcmp(entry->d_name, "..") == 0)) + return (FALSE); + else + return (TRUE); +} + +int32_t +glusterd_copy_geo_rep_session_files(char *session, glusterd_volinfo_t *snap_vol) +{ + int32_t ret = -1; + char snap_session_dir[PATH_MAX] = ""; + char georep_session_dir[PATH_MAX] = ""; + regex_t *reg_exp = NULL; + int file_count = -1; + struct dirent **files = { + 0, + }; + xlator_t *this = NULL; + int i = 0; + char src_path[PATH_MAX] = ""; + char dest_path[PATH_MAX] = ""; + glusterd_conf_t *priv = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + GF_ASSERT(session); + GF_ASSERT(snap_vol); + + ret = snprintf(georep_session_dir, sizeof(georep_session_dir), "%s/%s/%s", + priv->workdir, GEOREP, session); + if (ret < 0) { /* Negative value is an error */ + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_COPY_FAIL, NULL); + goto out; + } + + ret = snprintf(snap_session_dir, sizeof(snap_session_dir), "%s/%s/%s/%s/%s", + priv->workdir, GLUSTERD_VOL_SNAP_DIR_PREFIX, + snap_vol->snapshot->snapname, GEOREP, session); + if (ret < 0) { /* Negative value is an error */ + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_COPY_FAIL, NULL); + goto out; + } + + ret = mkdir_p(snap_session_dir, 0755, _gf_true); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED, + "Creating directory %s failed", snap_session_dir); + goto out; + } + + /* TODO : good to have - Allocate in stack instead of heap */ + reg_exp = GF_CALLOC(1, sizeof(regex_t), gf_common_mt_regex_t); + if (!reg_exp) { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "Failed to allocate memory for regular expression"); + goto out; + } + + ret = regcomp(reg_exp, "(.*status$)|(.*conf$)\0", REG_EXTENDED); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REG_COMPILE_FAILED, + "Failed to compile the regular expression"); + goto out; + } + + /* If there are no files in a particular session then fail it*/ + file_count = scandir(georep_session_dir, &files, file_select, alphasort); + if (file_count <= 0) { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, ENOENT, GD_MSG_FILE_OP_FAILED, + "Session files not present " + "in %s", + georep_session_dir); + goto out; + } + + /* Now compare the file name with regular expression to see if + * there is a match + */ + for (i = 0; i < file_count; i++) { + if (regexec(reg_exp, files[i]->d_name, 0, NULL, 0)) + continue; + + ret = snprintf(src_path, sizeof(src_path), "%s/%s", georep_session_dir, + files[i]->d_name); + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_COPY_FAIL, NULL); + goto out; + } + + ret = snprintf(dest_path, sizeof(dest_path), "%s/%s", snap_session_dir, + files[i]->d_name); + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_COPY_FAIL, NULL); + goto out; + } + + ret = glusterd_copy_file(src_path, dest_path); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "Could not copy file %s of session %s", files[i]->d_name, + session); + goto out; + } + } +out: + /* files are malloc'd by scandir, free them */ + if (file_count > 0) { + while (file_count--) { + free(files[file_count]); + } + free(files); + } + + if (reg_exp) + GF_FREE(reg_exp); + + return ret; +} + +/* This function will take backup of the volume store + * of the to-be restored volume. This will help us to + * revert the operation if it fails. + * + * @param volinfo volinfo of the origin volume + * + * @return 0 on success and -1 on failure + */ +int +glusterd_snapshot_backup_vol(glusterd_volinfo_t *volinfo) +{ + char pathname[PATH_MAX] = ""; + int ret = -1; + int op_ret = 0; + char delete_path[PATH_MAX] = ""; + char trashdir[PATH_MAX] = ""; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + int32_t len = 0; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + GF_ASSERT(volinfo); + + GLUSTERD_GET_VOLUME_DIR(pathname, volinfo, priv); + + len = snprintf(delete_path, sizeof(delete_path), + "%s/" GLUSTERD_TRASH "/vols-%s.deleted", priv->workdir, + volinfo->volname); + if ((len < 0) || (len >= sizeof(delete_path))) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_COPY_FAIL, NULL); + goto out; + } + + len = snprintf(trashdir, sizeof(trashdir), "%s/" GLUSTERD_TRASH, + priv->workdir); + if ((len < 0) || (len >= sizeof(trashdir))) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_COPY_FAIL, NULL); + goto out; + } + + /* Create trash folder if it is not there */ + ret = sys_mkdir(trashdir, 0755); + if (ret && errno != EEXIST) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED, + "Failed to create trash directory, reason : %s", + strerror(errno)); + ret = -1; + goto out; + } + + /* Move the origin volume volder to the backup location */ + ret = sys_rename(pathname, delete_path); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "Failed to rename snap " + "directory %s to %s", + pathname, delete_path); + goto out; + } + + /* Re-create an empty origin volume folder so that restore can + * happen. */ + ret = sys_mkdir(pathname, 0755); + if (ret && errno != EEXIST) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED, + "Failed to create origin " + "volume directory (%s), reason : %s", + pathname, strerror(errno)); + ret = -1; + goto out; + } + + ret = 0; +out: + /* Save the actual return value */ + op_ret = ret; + if (ret) { + /* Revert the changes in case of failure */ + ret = sys_rmdir(pathname); + if (ret) { + gf_msg_debug(this->name, 0, "Failed to rmdir: %s,err: %s", pathname, + strerror(errno)); + } + + ret = sys_rename(delete_path, pathname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "Failed to rename directory %s to %s", delete_path, + pathname); + } + + ret = sys_rmdir(trashdir); + if (ret) { + gf_msg_debug(this->name, 0, "Failed to rmdir: %s, Reason: %s", + trashdir, strerror(errno)); + } + } + + gf_msg_trace(this->name, 0, "Returning %d", op_ret); + + return op_ret; +} + +static int32_t +glusterd_copy_geo_rep_files(glusterd_volinfo_t *origin_vol, + glusterd_volinfo_t *snap_vol, dict_t *rsp_dict) +{ + int32_t ret = -1; + int i = 0; + xlator_t *this = NULL; + char key[32] = ""; + char session[PATH_MAX] = ""; + char slave[PATH_MAX] = ""; + char snapgeo_dir[PATH_MAX] = ""; + glusterd_conf_t *priv = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + GF_ASSERT(origin_vol); + GF_ASSERT(snap_vol); + GF_ASSERT(rsp_dict); + + /* This condition is not satisfied if the volume + * is slave volume. + */ + if (!origin_vol->gsync_slaves) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_SLAVE, NULL); + ret = 0; + goto out; + } + + GLUSTERD_GET_SNAP_GEO_REP_DIR(snapgeo_dir, snap_vol->snapshot, priv); + + ret = sys_mkdir(snapgeo_dir, 0755); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED, + "Creating directory %s failed", snapgeo_dir); + goto out; + } + + for (i = 1; i <= origin_vol->gsync_slaves->count; i++) { + ret = snprintf(key, sizeof(key), "slave%d", i); + if (ret < 0) /* Negative value is an error */ + goto out; + + ret = glusterd_get_geo_rep_session( + key, origin_vol->volname, origin_vol->gsync_slaves, session, slave); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GEOREP_GET_FAILED, + "Failed to get geo-rep session"); + goto out; + } + + ret = glusterd_copy_geo_rep_session_files(session, snap_vol); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED, + "Failed to copy files" + " related to session %s", + session); + goto out; + } + } + +out: + return ret; +} + +/* This function will restore a snapshot volumes + * + * @param dict dictionary containing snapshot restore request + * @param op_errstr In case of any failure error message will be returned + * in this variable + * @return Negative value on Failure and 0 in success + */ +int +glusterd_snapshot_restore(dict_t *dict, char **op_errstr, dict_t *rsp_dict) +{ + int ret = -1; + int32_t volcount = -1; + char *snapname = NULL; + xlator_t *this = NULL; + glusterd_volinfo_t *snap_volinfo = NULL; + glusterd_volinfo_t *tmp = NULL; + glusterd_volinfo_t *parent_volinfo = NULL; + glusterd_snap_t *snap = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + + GF_ASSERT(this); + GF_ASSERT(dict); + GF_ASSERT(op_errstr); + GF_ASSERT(rsp_dict); + + priv = this->private; + GF_ASSERT(priv); + + ret = dict_get_strn(dict, "snapname", SLEN("snapname"), &snapname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get snap name"); + goto out; + } + + snap = glusterd_find_snap_by_name(snapname); + if (NULL == snap) { + ret = gf_asprintf(op_errstr, "Snapshot (%s) does not exist", snapname); + if (ret < 0) { + goto out; + } + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_NOT_FOUND, "%s", + *op_errstr); + ret = -1; + goto out; + } + + volcount = 0; + cds_list_for_each_entry_safe(snap_volinfo, tmp, &snap->volumes, vol_list) + { + volcount++; + ret = glusterd_volinfo_find(snap_volinfo->parent_volname, + &parent_volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND, + "Could not get volinfo of %s", snap_volinfo->parent_volname); + goto out; + } + + ret = dict_set_dynstr_with_alloc(rsp_dict, "snapuuid", + uuid_utoa(snap->snap_id)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set snap " + "uuid in response dictionary for %s snapshot", + snap->snapname); + goto out; + } + + ret = dict_set_dynstr_with_alloc(rsp_dict, "volname", + snap_volinfo->parent_volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set snap " + "uuid in response dictionary for %s snapshot", + snap->snapname); + goto out; + } + + ret = dict_set_dynstr_with_alloc(rsp_dict, "volid", + uuid_utoa(parent_volinfo->volume_id)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set snap " + "uuid in response dictionary for %s snapshot", + snap->snapname); + goto out; + } + + if (is_origin_glusterd(dict) == _gf_true) { + /* From origin glusterd check if * + * any peers with snap bricks is down */ + ret = glusterd_find_missed_snap(rsp_dict, snap_volinfo, + &priv->peers, + GF_SNAP_OPTION_TYPE_RESTORE); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MISSED_SNAP_GET_FAIL, + "Failed to find missed snap restores"); + goto out; + } + } + /* During snapshot restore, mount point for stopped snap + * should exist as it is required to set extended attribute. + */ + ret = glusterd_recreate_vol_brick_mounts(this, snap_volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRK_MNT_RECREATE_FAIL, + "Failed to recreate brick mounts for %s", snap->snapname); + goto out; + } + + ret = gd_restore_snap_volume(dict, rsp_dict, parent_volinfo, + snap_volinfo, volcount); + if (ret) { + /* No need to update op_errstr because it is assumed + * that the called function will do that in case of + * failure. + */ + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_RESTORE_FAIL, + "Failed to restore " + "snap for %s", + snapname); + goto out; + } + + /* Detach the volinfo from priv->volumes, so that no new + * command can ref it any more and then unref it. + */ + cds_list_del_init(&parent_volinfo->vol_list); + glusterd_volinfo_unref(parent_volinfo); + } + + ret = 0; + + /* TODO: Need to check if we need to delete the snap after the + * operation is successful or not. Also need to persist the state + * of restore operation in the store. + */ +out: + return ret; +} + +/* This function is called before actual restore is taken place. This function + * will validate whether the snapshot volumes are ready to be restored or not. + * + * @param dict dictionary containing snapshot restore request + * @param op_errstr In case of any failure error message will be returned + * in this variable + * @param rsp_dict response dictionary + * @return Negative value on Failure and 0 in success + */ +int32_t +glusterd_snapshot_restore_prevalidate(dict_t *dict, char **op_errstr, + uint32_t *op_errno, dict_t *rsp_dict) +{ + int ret = -1; + int32_t i = 0; + int32_t volcount = 0; + int32_t brick_count = 0; + gf_boolean_t snap_restored = _gf_false; + char key[64] = ""; + int keylen; + char *volname = NULL; + char *snapname = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_snap_t *snap = NULL; + xlator_t *this = NULL; + + this = THIS; + + GF_ASSERT(this); + GF_ASSERT(dict); + GF_ASSERT(op_errstr); + GF_VALIDATE_OR_GOTO(this->name, op_errno, out); + GF_ASSERT(rsp_dict); + + ret = dict_get_strn(dict, "snapname", SLEN("snapname"), &snapname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get " + "snap name"); + goto out; + } + + snap = glusterd_find_snap_by_name(snapname); + if (NULL == snap) { + ret = gf_asprintf(op_errstr, "Snapshot (%s) does not exist", snapname); + *op_errno = EG_SNAPEXST; + if (ret < 0) { + goto out; + } + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_SNAP_NOT_FOUND, "%s", + *op_errstr); + ret = -1; + goto out; + } + + snap_restored = snap->snap_restored; + + if (snap_restored) { + ret = gf_asprintf(op_errstr, + "Snapshot (%s) is already " + "restored", + snapname); + if (ret < 0) { + goto out; + } + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPSHOT_OP_FAILED, "%s", + *op_errstr); + ret = -1; + goto out; + } + + ret = dict_set_strn(rsp_dict, "snapname", SLEN("snapname"), snapname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set " + "snap name(%s)", + snapname); + goto out; + } + + ret = dict_get_int32n(dict, "volcount", SLEN("volcount"), &volcount); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get volume count"); + goto out; + } + + /* Snapshot restore will only work if all the volumes, + that are part of the snapshot, are stopped. */ + for (i = 1; i <= volcount; ++i) { + keylen = snprintf(key, sizeof(key), "volname%d", i); + ret = dict_get_strn(dict, key, keylen, &volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to " + "get volume name"); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + ret = gf_asprintf(op_errstr, + "Volume (%s) " + "does not exist", + volname); + *op_errno = EG_NOVOL; + if (ret < 0) { + goto out; + } + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND, "%s", + *op_errstr); + ret = -1; + goto out; + } + + if (glusterd_is_volume_started(volinfo)) { + ret = gf_asprintf( + op_errstr, + "Volume (%s) has been " + "started. Volume needs to be stopped before restoring " + "a snapshot.", + volname); + *op_errno = EG_VOLRUN; + if (ret < 0) { + goto out; + } + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPSHOT_OP_FAILED, "%s", + *op_errstr); + ret = -1; + goto out; + } + + /* Take backup of the volinfo folder */ + ret = glusterd_snapshot_backup_vol(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_OP_FAILED, + "Failed to backup " + "volume backend files for %s volume", + volinfo->volname); + goto out; + } + } + + /* Get brickinfo for snap_volumes */ + volcount = 0; + cds_list_for_each_entry(volinfo, &snap->volumes, vol_list) + { + volcount++; + brick_count = 0; + + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + brick_count++; + if (gf_uuid_compare(brickinfo->uuid, MY_UUID)) + continue; + + keylen = snprintf(key, sizeof(key), "snap%d.brick%d.path", volcount, + brick_count); + ret = dict_set_strn(rsp_dict, key, keylen, brickinfo->path); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set %s", key); + goto out; + } + + keylen = snprintf(key, sizeof(key), "snap%d.brick%d.snap_status", + volcount, brick_count); + ret = dict_set_int32n(rsp_dict, key, keylen, + brickinfo->snap_status); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set %s", key); + goto out; + } + + keylen = snprintf(key, sizeof(key), "snap%d.brick%d.device_path", + volcount, brick_count); + ret = dict_set_strn(rsp_dict, key, keylen, brickinfo->device_path); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set %s", key); + goto out; + } + + keylen = snprintf(key, sizeof(key), "snap%d.brick%d.fs_type", + volcount, brick_count); + ret = dict_set_strn(rsp_dict, key, keylen, brickinfo->fstype); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set %s", key); + goto out; + } + + keylen = snprintf(key, sizeof(key), "snap%d.brick%d.mnt_opts", + volcount, brick_count); + ret = dict_set_strn(rsp_dict, key, keylen, brickinfo->mnt_opts); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set %s", key); + goto out; + } + } + + keylen = snprintf(key, sizeof(key), "snap%d.brick_count", volcount); + ret = dict_set_int32n(rsp_dict, key, keylen, brick_count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set %s", key); + goto out; + } + } + + ret = dict_set_int32n(rsp_dict, "volcount", SLEN("volcount"), volcount); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set %s", key); + goto out; + } + +out: + return ret; +} + +int +snap_max_hard_limits_validate(dict_t *dict, char *volname, uint64_t value, + char **op_errstr) +{ + char err_str[PATH_MAX] = ""; + glusterd_conf_t *conf = NULL; + glusterd_volinfo_t *volinfo = NULL; + int ret = -1; + uint64_t max_limit = GLUSTERD_SNAPS_MAX_HARD_LIMIT; + xlator_t *this = NULL; + uint64_t opt_hard_max = GLUSTERD_SNAPS_MAX_HARD_LIMIT; + + this = THIS; + + GF_ASSERT(this); + GF_ASSERT(dict); + GF_ASSERT(op_errstr); + + conf = this->private; + + GF_ASSERT(conf); + + if (volname) { + ret = glusterd_volinfo_find(volname, &volinfo); + if (!ret) { + if (volinfo->is_snap_volume) { + ret = -1; + snprintf(err_str, PATH_MAX, + "%s is a snap volume. Configuring " + "snap-max-hard-limit for a snap " + "volume is prohibited.", + volname); + goto out; + } + } + } + + /* "snap-max-hard-limit" might not be set by user explicitly, + * in that case it's better to use the default value. + * Hence not erroring out if Key is not found. + */ + ret = dict_get_uint64(conf->opts, GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT, + &opt_hard_max); + if (ret) { + ret = 0; + gf_msg_debug(this->name, 0, + "%s is not present in " + "opts dictionary", + GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT); + } + + /* volume snap-max-hard-limit cannot exceed system snap-max-hard-limit. + * Hence during prevalidate following checks are made to ensure the + * snap-max-hard-limit set on one particular volume does not + * exceed snap-max-hard-limit set globally (system limit). + */ + if (value && volname) { + max_limit = opt_hard_max; + } + + if (value > max_limit) { + ret = -1; + snprintf(err_str, PATH_MAX, + "Invalid snap-max-hard-limit " + "%" PRIu64 ". Expected range 1 - %" PRIu64, + value, max_limit); + goto out; + } + + ret = 0; +out: + if (ret) { + *op_errstr = gf_strdup(err_str); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPSHOT_OP_FAILED, "%s", + err_str); + } + return ret; +} + +int +glusterd_snapshot_config_prevalidate(dict_t *dict, char **op_errstr, + uint32_t *op_errno) +{ + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + int ret = -1; + int config_command = 0; + char err_str[PATH_MAX] = ""; + glusterd_conf_t *conf = NULL; + uint64_t hard_limit = 0; + uint64_t soft_limit = 0; + gf_loglevel_t loglevel = GF_LOG_ERROR; + uint64_t max_limit = GLUSTERD_SNAPS_MAX_HARD_LIMIT; + int32_t cur_auto_delete = 0; + int32_t req_auto_delete = 0; + int32_t cur_snap_activate = 0; + int32_t req_snap_activate = 0; + + this = THIS; + + GF_ASSERT(this); + GF_ASSERT(dict); + GF_ASSERT(op_errstr); + GF_VALIDATE_OR_GOTO(this->name, op_errno, out); + + conf = this->private; + + GF_ASSERT(conf); + + ret = dict_get_int32n(dict, "config-command", SLEN("config-command"), + &config_command); + if (ret) { + snprintf(err_str, sizeof(err_str), "failed to get config-command type"); + goto out; + } + + if (config_command != GF_SNAP_CONFIG_TYPE_SET) { + ret = 0; + goto out; + } + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (volname) { + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + snprintf(err_str, sizeof(err_str), "Volume (%s) does not exist.", + volname); + *op_errno = EG_NOVOL; + goto out; + } + } + + /* config values snap-max-hard-limit and snap-max-soft-limit are + * optional and hence we are not erroring out if values are not + * present + */ + gd_get_snap_conf_values_if_present(dict, &hard_limit, &soft_limit); + + if (hard_limit) { + /* Validations for snap-max-hard-limits */ + ret = snap_max_hard_limits_validate(dict, volname, hard_limit, + op_errstr); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_HARD_LIMIT_SET_FAIL, + "snap-max-hard-limit validation failed."); + *op_errno = EINVAL; + goto out; + } + } + + if (soft_limit) { + max_limit = GLUSTERD_SNAPS_MAX_SOFT_LIMIT_PERCENT; + if (soft_limit > max_limit) { + ret = -1; + snprintf(err_str, PATH_MAX, + "Invalid " + "snap-max-soft-limit " + "%" PRIu64 ". Expected range 1 - %" PRIu64, + soft_limit, max_limit); + *op_errno = EINVAL; + goto out; + } + } + + if (hard_limit || soft_limit) { + ret = 0; + goto out; + } + + if (dict_getn(dict, GLUSTERD_STORE_KEY_SNAP_AUTO_DELETE, + SLEN(GLUSTERD_STORE_KEY_SNAP_AUTO_DELETE))) { + req_auto_delete = dict_get_str_boolean( + dict, GLUSTERD_STORE_KEY_SNAP_AUTO_DELETE, _gf_false); + if (req_auto_delete < 0) { + ret = -1; + snprintf(err_str, sizeof(err_str), + "Please enter a " + "valid boolean value for auto-delete"); + *op_errno = EINVAL; + goto out; + } + + /* Ignoring the error as the auto-delete is optional and + might not be present in the options dictionary.*/ + cur_auto_delete = dict_get_str_boolean( + conf->opts, GLUSTERD_STORE_KEY_SNAP_AUTO_DELETE, _gf_false); + + if (cur_auto_delete == req_auto_delete) { + ret = -1; + if (cur_auto_delete == _gf_true) + snprintf(err_str, sizeof(err_str), + "auto-delete is already enabled"); + else + snprintf(err_str, sizeof(err_str), + "auto-delete is already disabled"); + *op_errno = EINVAL; + goto out; + } + } else if (dict_getn(dict, GLUSTERD_STORE_KEY_SNAP_ACTIVATE, + SLEN(GLUSTERD_STORE_KEY_SNAP_ACTIVATE))) { + req_snap_activate = dict_get_str_boolean( + dict, GLUSTERD_STORE_KEY_SNAP_ACTIVATE, _gf_false); + if (req_snap_activate < 0) { + ret = -1; + snprintf(err_str, sizeof(err_str), + "Please enter a " + "valid boolean value for activate-on-create"); + *op_errno = EINVAL; + goto out; + } + + /* Ignoring the error as the activate-on-create is optional and + might not be present in the options dictionary.*/ + cur_snap_activate = dict_get_str_boolean( + conf->opts, GLUSTERD_STORE_KEY_SNAP_ACTIVATE, _gf_false); + + if (cur_snap_activate == req_snap_activate) { + ret = -1; + if (cur_snap_activate == _gf_true) + snprintf(err_str, sizeof(err_str), + "activate-on-create is already enabled"); + else + snprintf(err_str, sizeof(err_str), + "activate-on-create is already disabled"); + *op_errno = EINVAL; + goto out; + } + } else { + ret = -1; + snprintf(err_str, sizeof(err_str), "Invalid option"); + *op_errno = EINVAL; + goto out; + } + + ret = 0; +out: + + if (ret && err_str[0] != '\0') { + gf_msg(this->name, loglevel, 0, GD_MSG_SNAPSHOT_OP_FAILED, "%s", + err_str); + *op_errstr = gf_strdup(err_str); + } + + return ret; +} + +/* This function will be called from RPC handler routine. + * This function is responsible for getting the requested + * snapshot config into the dictionary. + * + * @param req RPC request object. Required for sending a response back. + * @param op glusterd operation. Required for sending a response back. + * @param dict pointer to dictionary which will contain both + * request and response key-pair values. + * @return -1 on error and 0 on success + */ +int +glusterd_handle_snapshot_config(rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict, char *err_str, size_t len) +{ + int32_t ret = -1; + char *volname = NULL; + xlator_t *this = NULL; + int config_command = 0; + + this = THIS; + GF_ASSERT(this); + + GF_VALIDATE_OR_GOTO(this->name, req, out); + GF_VALIDATE_OR_GOTO(this->name, dict, out); + + /* TODO : Type of lock to be taken when we are setting + * limits system wide + */ + ret = dict_get_int32n(dict, "config-command", SLEN("config-command"), + &config_command); + if (ret) { + snprintf(err_str, len, "Failed to get config-command type"); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Key=config-command", NULL); + goto out; + } + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + + switch (config_command) { + case GF_SNAP_CONFIG_TYPE_SET: + if (!volname) { + ret = dict_set_int32n(dict, "hold_vol_locks", + SLEN("hold_vol_locks"), _gf_false); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to set hold_vol_locks value " + "as _gf_false"); + goto out; + } + } + ret = glusterd_mgmt_v3_initiate_all_phases(req, op, dict); + break; + case GF_SNAP_CONFIG_DISPLAY: + /* Reading data from local node only */ + ret = snap_max_limits_display_commit(dict, volname, err_str, len); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_HARD_LIMIT_SET_FAIL, + "snap-max-limit " + "display commit failed."); + goto out; + } + + /* If everything is successful then send the response + * back to cli + */ + ret = glusterd_op_send_cli_response(op, 0, 0, req, dict, err_str); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_CLI_RESP, + "Failed to send cli " + "response"); + goto out; + } + + break; + default: + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_COMMAND_NOT_FOUND, + "Unknown config type"); + ret = -1; + break; + } +out: + return ret; +} +int +glusterd_snap_create_clone_pre_val_use_rsp_dict(dict_t *dst, dict_t *src) +{ + char *snap_brick_dir = NULL; + char *snap_device = NULL; + char key[64] = ""; + int keylen; + char *value = ""; + char snapbrckcnt[PATH_MAX] = ""; + char snapbrckord[PATH_MAX] = ""; + int ret = -1; + int64_t i = -1; + int64_t j = -1; + int64_t volume_count = 0; + int64_t brick_count = 0; + int64_t brick_order = 0; + xlator_t *this = NULL; + int32_t brick_online = 0; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(dst); + GF_ASSERT(src); + + ret = dict_get_int64(src, "volcount", &volume_count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "failed to " + "get the volume count"); + goto out; + } + + for (i = 0; i < volume_count; i++) { + ret = snprintf(snapbrckcnt, sizeof(snapbrckcnt) - 1, + "vol%" PRId64 "_brickcount", i + 1); + ret = dict_get_int64(src, snapbrckcnt, &brick_count); + if (ret) { + gf_msg_trace(this->name, 0, + "No bricks for this volume in this dict"); + continue; + } + + for (j = 0; j < brick_count; j++) { + /* Fetching data from source dict */ + snprintf(key, sizeof(key), "vol%" PRId64 ".brickdir%" PRId64, i + 1, + j); + ret = dict_get_ptr(src, key, (void **)&snap_brick_dir); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED, + "Unable to fetch %s", key); + continue; + } + + /* Fetching brick order from source dict */ + snprintf(snapbrckord, sizeof(snapbrckord) - 1, + "vol%" PRId64 ".brick%" PRId64 ".order", i + 1, j); + ret = dict_get_int64(src, snapbrckord, &brick_order); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get brick order"); + goto out; + } + + snprintf(key, sizeof(key), "vol%" PRId64 ".brickdir%" PRId64, i + 1, + brick_order); + ret = dict_set_dynstr_with_alloc(dst, key, snap_brick_dir); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set %s", key); + goto out; + } + + keylen = snprintf(key, sizeof(key), "vol%" PRId64 ".fstype%" PRId64, + i + 1, j); + ret = dict_get_strn(src, key, keylen, &value); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED, + "Unable to fetch %s", key); + continue; + } + + snprintf(key, sizeof(key), "vol%" PRId64 ".fstype%" PRId64, i + 1, + brick_order); + ret = dict_set_dynstr_with_alloc(dst, key, value); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set %s", key); + goto out; + } + + keylen = snprintf(key, sizeof(key), + "vol%" PRId64 ".mnt_opts%" PRId64, i + 1, j); + ret = dict_get_strn(src, key, keylen, &value); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED, + "Unable to fetch %s", key); + continue; + } + + snprintf(key, sizeof(key), "vol%" PRId64 ".mnt_opts%" PRId64, i + 1, + brick_order); + ret = dict_set_dynstr_with_alloc(dst, key, value); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set %s", key); + goto out; + } + + snprintf(key, sizeof(key), + "vol%" PRId64 ".brick_snapdevice%" PRId64, i + 1, j); + ret = dict_get_ptr(src, key, (void **)&snap_device); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to fetch snap_device"); + goto out; + } + + snprintf(key, sizeof(key), + "vol%" PRId64 ".brick_snapdevice%" PRId64, i + 1, + brick_order); + ret = dict_set_dynstr_with_alloc(dst, key, snap_device); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set %s", key); + goto out; + } + + keylen = snprintf(key, sizeof(key), + "vol%" PRId64 ".brick%" PRId64 ".status", i + 1, + brick_order); + ret = dict_get_int32n(src, key, keylen, &brick_online); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "failed to " + "get the brick status"); + goto out; + } + + ret = dict_set_int32n(dst, key, keylen, brick_online); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "failed to " + "set the brick status"); + goto out; + } + brick_online = 0; + } + } + ret = 0; +out: + + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +/* Aggregate brickinfo's of the snap volumes to be restored from */ +int32_t +glusterd_snap_restore_use_rsp_dict(dict_t *dst, dict_t *src) +{ + char key[64] = ""; + int keylen; + char *strvalue = NULL; + int32_t value = -1; + int32_t i = -1; + int32_t j = -1; + int32_t vol_count = -1; + int32_t brickcount = -1; + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + if (!dst || !src) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, + "Source or Destination " + "dict is empty."); + goto out; + } + + ret = dict_get_int32(src, "volcount", &vol_count); + if (ret) { + gf_msg_debug(this->name, 0, "No volumes"); + ret = 0; + goto out; + } + + for (i = 1; i <= vol_count; i++) { + keylen = snprintf(key, sizeof(key), "snap%d.brick_count", i); + ret = dict_get_int32n(src, key, keylen, &brickcount); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get %s", key); + goto out; + } + + for (j = 1; j <= brickcount; j++) { + keylen = snprintf(key, sizeof(key), "snap%d.brick%d.path", i, j); + ret = dict_get_strn(src, key, keylen, &strvalue); + if (ret) { + /* The brickinfo will be present in + * another rsp_dict */ + gf_msg_debug(this->name, 0, "%s not present", key); + ret = 0; + continue; + } + ret = dict_set_dynstr_with_alloc(dst, key, strvalue); + if (ret) { + gf_msg_debug(this->name, 0, "Failed to set %s", key); + goto out; + } + + keylen = snprintf(key, sizeof(key), "snap%d.brick%d.snap_status", i, + j); + ret = dict_get_int32n(src, key, keylen, &value); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get %s", key); + goto out; + } + ret = dict_set_int32n(dst, key, keylen, value); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set %s", key); + goto out; + } + + keylen = snprintf(key, sizeof(key), "snap%d.brick%d.device_path", i, + j); + ret = dict_get_strn(src, key, keylen, &strvalue); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get %s", key); + goto out; + } + ret = dict_set_dynstr_with_alloc(dst, key, strvalue); + if (ret) { + gf_msg_debug(this->name, 0, "Failed to set %s", key); + goto out; + } + + keylen = snprintf(key, sizeof(key), "snap%d.brick%d.fs_type", i, j); + ret = dict_get_strn(src, key, keylen, &strvalue); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get %s", key); + goto out; + } + ret = dict_set_dynstr_with_alloc(dst, key, strvalue); + if (ret) { + gf_msg_debug(this->name, 0, "Failed to set %s", key); + goto out; + } + + keylen = snprintf(key, sizeof(key), "snap%d.brick%d.mnt_opts", i, + j); + ret = dict_get_strn(src, key, keylen, &strvalue); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get %s", key); + goto out; + } + ret = dict_set_dynstr_with_alloc(dst, key, strvalue); + if (ret) { + gf_msg_debug(this->name, 0, "Failed to set %s", key); + goto out; + } + } + } + +out: + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +int +glusterd_snap_pre_validate_use_rsp_dict(dict_t *dst, dict_t *src) +{ + int ret = -1; + int32_t snap_command = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + if (!dst || !src) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY, + "Source or Destination " + "dict is empty."); + goto out; + } + + ret = dict_get_int32n(dst, "type", SLEN("type"), &snap_command); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "unable to get the type of " + "the snapshot command"); + goto out; + } + + switch (snap_command) { + case GF_SNAP_OPTION_TYPE_CREATE: + case GF_SNAP_OPTION_TYPE_CLONE: + ret = glusterd_snap_create_clone_pre_val_use_rsp_dict(dst, src); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to use " + "rsp dict"); + goto out; + } + break; + case GF_SNAP_OPTION_TYPE_RESTORE: + ret = glusterd_snap_restore_use_rsp_dict(dst, src); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RSP_DICT_USE_FAIL, + "Unable to use " + "rsp dict"); + goto out; + } + break; + default: + break; + } + + ret = 0; +out: + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; +} + +int +glusterd_add_brick_status_to_dict(dict_t *dict, glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo, + char *key_prefix) +{ + char pidfile[PATH_MAX] = ""; + int32_t brick_online = 0; + pid_t pid = 0; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + int ret = -1; + + GF_ASSERT(dict); + GF_ASSERT(volinfo); + GF_ASSERT(brickinfo); + + this = THIS; + GF_ASSERT(this); + conf = this->private; + GF_ASSERT(conf); + + if (!key_prefix) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, + "key prefix is NULL"); + goto out; + } + + GLUSTERD_GET_BRICK_PIDFILE(pidfile, volinfo, brickinfo, conf); + + brick_online = gf_is_service_running(pidfile, &pid); + + ret = dict_set_int32(dict, key_prefix, brick_online); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set %s", key_prefix); + goto out; + } + brick_online = 0; + + ret = 0; + +out: + return ret; +} + +/* This function will check whether the given device + * is a thinly provisioned LV or not. + * + * @param device LV device path + * + * @return _gf_true if LV is thin else _gf_false + */ +gf_boolean_t +glusterd_is_thinp_brick(char *device, uint32_t *op_errno) +{ + int ret = -1; + char msg[1024] = ""; + char pool_name[PATH_MAX] = ""; + char *ptr = NULL; + xlator_t *this = NULL; + runner_t runner = { + 0, + }; + gf_boolean_t is_thin = _gf_false; + + this = THIS; + + GF_VALIDATE_OR_GOTO("glusterd", this, out); + GF_VALIDATE_OR_GOTO(this->name, device, out); + GF_VALIDATE_OR_GOTO(this->name, op_errno, out); + + snprintf(msg, sizeof(msg), "Get thin pool name for device %s", device); + + runinit(&runner); + + runner_add_args(&runner, "lvs", "--noheadings", "-o", "pool_lv", device, + NULL); + runner_redir(&runner, STDOUT_FILENO, RUN_PIPE); + runner_log(&runner, this->name, GF_LOG_DEBUG, msg); + + ret = runner_start(&runner); + if (ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_TPOOL_GET_FAIL, + "Failed to get thin pool " + "name for device %s", + device); + runner_end(&runner); + goto out; + } + + ptr = fgets(pool_name, sizeof(pool_name), + runner_chio(&runner, STDOUT_FILENO)); + if (!ptr || !strlen(pool_name)) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_TPOOL_GET_FAIL, + "Failed to get pool name " + "for device %s", + device); + runner_end(&runner); + ret = -1; + goto out; + } + + runner_end(&runner); + + /* Trim all the whitespaces. */ + ptr = gf_trim(pool_name); + + /* If the LV has thin pool associated with this + * then it is a thinly provisioned LV else it is + * regular LV */ + if (0 != ptr[0]) { + is_thin = _gf_true; + } + +out: + if (!is_thin) + *op_errno = EG_NOTTHINP; + + return is_thin; +} + +int +glusterd_snap_create_clone_common_prevalidate( + dict_t *rsp_dict, int flags, char *snapname, char *err_str, + char *snap_volname, int64_t volcount, glusterd_volinfo_t *volinfo, + gf_loglevel_t *loglevel, int clone, uint32_t *op_errno) +{ + char *device = NULL; + char *orig_device = NULL; + char key[128] = ""; + int ret = -1; + int64_t i = 1; + int64_t brick_order = 0; + int64_t brick_count = 0; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + int32_t len = 0; + + this = THIS; + conf = this->private; + GF_ASSERT(conf); + GF_VALIDATE_OR_GOTO(this->name, op_errno, out); + + if (!snapname || !volinfo) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, + "Failed to validate " + "snapname or volume information"); + ret = -1; + goto out; + } + + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + if (gf_uuid_compare(brickinfo->uuid, MY_UUID)) { + brick_order++; + continue; + } + + if (!glusterd_is_brick_started(brickinfo)) { + if (!clone && (flags & GF_CLI_FLAG_OP_FORCE)) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_BRICK_DISCONNECTED, + "brick %s:%s is not started", brickinfo->hostname, + brickinfo->path); + brick_order++; + brick_count++; + continue; + } + if (!clone) { + snprintf(err_str, PATH_MAX, + "One or more bricks are not running. " + "Please run volume status command to see " + "brick status.\n" + "Please start the stopped brick " + "and then issue snapshot create " + "command or use [force] option in " + "snapshot create to override this " + "behavior."); + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_BRICK_NOT_RUNNING, + "Please run volume status command to see brick " + "status.Please start the stopped brick and then issue " + "snapshot create command or use 'force' option in " + "snapshot create to override this behavior.", + NULL); + } else { + snprintf(err_str, PATH_MAX, + "One or more bricks are not running. " + "Please run snapshot status command to see " + "brick status.\n" + "Please start the stopped brick " + "and then issue snapshot clone " + "command "); + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_BRICK_NOT_RUNNING, + "Please run snapshot status command to see brick " + "status. Please start the stopped brick and then issue " + "snapshot clone command.", + NULL); + } + *op_errno = EG_BRCKDWN; + ret = -1; + goto out; + } + + orig_device = glusterd_get_brick_mount_device(brickinfo->path); + if (!orig_device) { + len = snprintf(err_str, PATH_MAX, + "getting device name for the brick " + "%s:%s failed", + brickinfo->hostname, brickinfo->path); + if (len < 0) { + strcpy(err_str, "<error>"); + } + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_BRK_MNTPATH_GET_FAIL, + "Brick_hostname=%s, Brick_path=%s", brickinfo->hostname, + brickinfo->path, NULL); + ret = -1; + goto out; + } + if (!clone) { + if (!glusterd_is_thinp_brick(orig_device, op_errno)) { + snprintf(err_str, PATH_MAX, + "Snapshot is supported only for " + "thin provisioned LV. Ensure that " + "all bricks of %s are thinly " + "provisioned LV.", + volinfo->volname); + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_SNAPSHOT_NOT_THIN_PROVISIONED, + "Ensure that all bricks of volume are thinly " + "provisioned LV, Volume=%s", + volinfo->volname, NULL); + ret = -1; + goto out; + } + } + + device = glusterd_build_snap_device_path(orig_device, snap_volname, + brick_count); + if (!device) { + snprintf(err_str, PATH_MAX, + "cannot copy the snapshot device " + "name (volname: %s, snapname: %s)", + volinfo->volname, snapname); + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_SNAP_DEVICE_NAME_GET_FAIL, "Volname=%s, Snapname=%s", + volinfo->volname, snapname, NULL); + *loglevel = GF_LOG_WARNING; + ret = -1; + goto out; + } + + GF_FREE(orig_device); + orig_device = NULL; + + snprintf(key, sizeof(key), "vol%" PRId64 ".brick_snapdevice%" PRId64, i, + brick_count); + ret = dict_set_dynstr_with_alloc(rsp_dict, key, device); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set %s", key); + goto out; + } + + ret = glusterd_update_mntopts(brickinfo->path, brickinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRK_MOUNTOPTS_FAIL, + "Failed to " + "update mount options for %s brick", + brickinfo->path); + } + + snprintf(key, sizeof(key), "vol%" PRId64 ".fstype%" PRId64, i, + brick_count); + ret = dict_set_dynstr_with_alloc(rsp_dict, key, brickinfo->fstype); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set %s", key); + goto out; + } + + snprintf(key, sizeof(key), "vol%" PRId64 ".mnt_opts%" PRId64, i, + brick_count); + ret = dict_set_dynstr_with_alloc(rsp_dict, key, brickinfo->mnt_opts); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set %s", key); + goto out; + } + + snprintf(key, sizeof(key), "vol%" PRId64 ".brickdir%" PRId64, i, + brick_count); + ret = dict_set_dynstr_with_alloc(rsp_dict, key, brickinfo->mount_dir); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set %s", key); + goto out; + } + + snprintf(key, sizeof(key) - 1, "vol%" PRId64 ".brick%" PRId64 ".order", + i, brick_count); + ret = dict_set_int64(rsp_dict, key, brick_order); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set %s", key); + goto out; + } + + snprintf(key, sizeof(key), "vol%" PRId64 ".brick%" PRId64 ".status", i, + brick_order); + + ret = glusterd_add_brick_status_to_dict(rsp_dict, volinfo, brickinfo, + key); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "failed to " + "add brick status to dict"); + goto out; + } + brick_count++; + brick_order++; + if (device) { + GF_FREE(device); + device = NULL; + } + } + snprintf(key, sizeof(key) - 1, "vol%" PRId64 "_brickcount", volcount); + ret = dict_set_int64(rsp_dict, key, brick_count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set %s", key); + goto out; + } + ret = 0; +out: + if (orig_device) + GF_FREE(orig_device); + + if (device) + GF_FREE(device); + + return ret; +} + +int +glusterd_snapshot_clone_prevalidate(dict_t *dict, char **op_errstr, + dict_t *rsp_dict, uint32_t *op_errno) +{ + char *clonename = NULL; + char *snapname = NULL; + char device_name[64] = ""; + glusterd_snap_t *snap = NULL; + char err_str[PATH_MAX] = ""; + int ret = -1; + int64_t volcount = 1; + glusterd_volinfo_t *snap_vol = NULL; + xlator_t *this = NULL; + uuid_t *snap_volid = NULL; + gf_loglevel_t loglevel = GF_LOG_ERROR; + glusterd_volinfo_t *volinfo = NULL; + + this = THIS; + GF_ASSERT(op_errstr); + GF_ASSERT(dict); + GF_VALIDATE_OR_GOTO(this->name, op_errno, out); + + ret = dict_get_strn(dict, "clonename", SLEN("clonename"), &clonename); + if (ret) { + snprintf(err_str, sizeof(err_str), + "Failed to " + "get the clone name"); + goto out; + } + + ret = dict_get_strn(dict, "snapname", SLEN("snapname"), &snapname); + if (ret) { + snprintf(err_str, sizeof(err_str), "Failed to get snapname"); + goto out; + } + + ret = glusterd_volinfo_find(clonename, &volinfo); + if (!ret) { + ret = -1; + snprintf(err_str, sizeof(err_str), + "Volume with name:%s " + "already exists", + clonename); + *op_errno = EG_VOLEXST; + goto out; + } + /* need to find snap volinfo*/ + snap = glusterd_find_snap_by_name(snapname); + if (!snap) { + ret = -1; + snprintf(err_str, sizeof(err_str), + "Failed to find :%s " + "snap", + snapname); + goto out; + } + + /* TODO : As of now there is only one volume in snapshot. + * Change this when multiple volume snapshot is introduced + */ + snap_vol = list_entry(snap->volumes.next, glusterd_volinfo_t, vol_list); + if (!snap_vol) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, + "Failed to get snap " + "volinfo %s", + snap->snapname); + goto out; + } + + if (!glusterd_is_volume_started(snap_vol)) { + snprintf(err_str, sizeof(err_str), + "Snapshot %s is " + "not activated", + snap->snapname); + loglevel = GF_LOG_WARNING; + *op_errno = EG_VOLSTP; + goto out; + } + + ret = dict_get_bin(dict, "vol1_volid", (void **)&snap_volid); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to fetch snap_volid"); + goto out; + } + + GLUSTERD_GET_UUID_NOHYPHEN(device_name, *snap_volid); + + /* Adding snap bricks mount paths to the dict */ + ret = glusterd_snap_create_clone_common_prevalidate( + rsp_dict, 0, snapname, err_str, device_name, 1, snap_vol, &loglevel, 1, + op_errno); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PRE_VALIDATION_FAIL, + "Failed to pre validate"); + goto out; + } + + ret = dict_set_int64(rsp_dict, "volcount", volcount); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set volcount"); + goto out; + } + +out: + + if (ret && err_str[0] != '\0') { + gf_msg(this->name, loglevel, 0, GD_MSG_SNAP_CLONE_PREVAL_FAILED, "%s", + err_str); + *op_errstr = gf_strdup(err_str); + } + + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +int +glusterd_snapshot_create_prevalidate(dict_t *dict, char **op_errstr, + dict_t *rsp_dict, uint32_t *op_errno) +{ + char *volname = NULL; + char *snapname = NULL; + char key[64] = ""; + int keylen; + char snap_volname[64] = ""; + char err_str[PATH_MAX] = ""; + int ret = -1; + int64_t i = 0; + int64_t volcount = 0; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + uuid_t *snap_volid = NULL; + gf_loglevel_t loglevel = GF_LOG_ERROR; + glusterd_conf_t *conf = NULL; + int64_t effective_max_limit = 0; + int flags = 0; + uint64_t opt_hard_max = GLUSTERD_SNAPS_MAX_HARD_LIMIT; + char *description = NULL; + + this = THIS; + GF_ASSERT(op_errstr); + conf = this->private; + GF_ASSERT(conf); + GF_VALIDATE_OR_GOTO(this->name, op_errno, out); + + ret = dict_get_int64(dict, "volcount", &volcount); + if (ret) { + snprintf(err_str, sizeof(err_str), + "Failed to " + "get the volume count"); + goto out; + } + if (volcount <= 0) { + snprintf(err_str, sizeof(err_str), + "Invalid volume count %" PRId64 " supplied", volcount); + ret = -1; + goto out; + } + + ret = dict_get_strn(dict, "snapname", SLEN("snapname"), &snapname); + if (ret) { + snprintf(err_str, sizeof(err_str), "Failed to get snapname"); + goto out; + } + + ret = dict_get_strn(dict, "description", SLEN("description"), &description); + if (description && !(*description)) { + /* description should have a non-null value */ + ret = -1; + snprintf(err_str, sizeof(err_str), + "Snapshot cannot be " + "created with empty description"); + goto out; + } + + ret = dict_get_int32n(dict, "flags", SLEN("flags"), &flags); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get flags"); + goto out; + } + + if (glusterd_find_snap_by_name(snapname)) { + ret = -1; + snprintf(err_str, sizeof(err_str), + "Snapshot %s already " + "exists", + snapname); + *op_errno = EG_SNAPEXST; + goto out; + } + + for (i = 1; i <= volcount; i++) { + keylen = snprintf(key, sizeof(key), "volname%" PRId64, i); + ret = dict_get_strn(dict, key, keylen, &volname); + if (ret) { + snprintf(err_str, sizeof(err_str), "failed to get volume name"); + goto out; + } + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + snprintf(err_str, sizeof(err_str), "Volume (%s) does not exist ", + volname); + *op_errno = EG_NOVOL; + goto out; + } + + ret = -1; + if (!glusterd_is_volume_started(volinfo)) { + snprintf(err_str, sizeof(err_str), + "volume %s is " + "not started", + volinfo->volname); + loglevel = GF_LOG_WARNING; + *op_errno = EG_VOLSTP; + goto out; + } + + if (glusterd_is_defrag_on(volinfo)) { + snprintf(err_str, sizeof(err_str), + "rebalance process is running for the " + "volume %s", + volname); + loglevel = GF_LOG_WARNING; + *op_errno = EG_RBALRUN; + goto out; + } + + if (gd_vol_is_geo_rep_active(volinfo)) { + snprintf(err_str, sizeof(err_str), + "geo-replication session is running for " + "the volume %s. Session needs to be " + "stopped before taking a snapshot.", + volname); + loglevel = GF_LOG_WARNING; + *op_errno = EG_GEOREPRUN; + goto out; + } + + if (volinfo->is_snap_volume == _gf_true) { + snprintf(err_str, sizeof(err_str), "Volume %s is a snap volume", + volname); + loglevel = GF_LOG_WARNING; + *op_errno = EG_ISSNAP; + goto out; + } + + /* "snap-max-hard-limit" might not be set by user explicitly, + * in that case it's better to consider the default value. + * Hence not erroring out if Key is not found. + */ + ret = dict_get_uint64( + conf->opts, GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT, &opt_hard_max); + if (ret) { + ret = 0; + gf_msg_debug(this->name, 0, + "%s is not present " + "in opts dictionary", + GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT); + } + + if (volinfo->snap_max_hard_limit < opt_hard_max) + effective_max_limit = volinfo->snap_max_hard_limit; + else + effective_max_limit = opt_hard_max; + + if (volinfo->snap_count >= effective_max_limit) { + ret = -1; + snprintf(err_str, sizeof(err_str), + "The number of existing snaps has reached " + "the effective maximum limit of %" PRIu64 + ", " + "for the volume (%s). Please delete few " + "snapshots before taking further snapshots.", + effective_max_limit, volname); + loglevel = GF_LOG_WARNING; + *op_errno = EG_HRDLMT; + goto out; + } + + snprintf(key, sizeof(key), "vol%" PRId64 "_volid", i); + ret = dict_get_bin(dict, key, (void **)&snap_volid); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to fetch snap_volid"); + goto out; + } + + /* snap volume uuid is used as lvm snapshot name. + This will avoid restrictions on snapshot names + provided by user */ + GLUSTERD_GET_UUID_NOHYPHEN(snap_volname, *snap_volid); + + ret = glusterd_snap_create_clone_common_prevalidate( + rsp_dict, flags, snapname, err_str, snap_volname, i, volinfo, + &loglevel, 0, op_errno); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PRE_VALIDATION_FAIL, + "Failed to pre validate"); + goto out; + } + } + + ret = dict_set_int64(rsp_dict, "volcount", volcount); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set volcount"); + goto out; + } + + ret = 0; + +out: + if (ret && err_str[0] != '\0') { + gf_msg(this->name, loglevel, 0, GD_MSG_SNAPSHOT_OP_FAILED, "%s", + err_str); + *op_errstr = gf_strdup(err_str); + } + + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +glusterd_snap_t * +glusterd_new_snap_object() +{ + glusterd_snap_t *snap = NULL; + + snap = GF_CALLOC(1, sizeof(*snap), gf_gld_mt_snap_t); + + if (snap) { + if (LOCK_INIT(&snap->lock)) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_LOCK_INIT_FAILED, + "Failed initiating" + " snap lock"); + GF_FREE(snap); + return NULL; + } + + CDS_INIT_LIST_HEAD(&snap->snap_list); + CDS_INIT_LIST_HEAD(&snap->volumes); + snap->snapname[0] = 0; + snap->snap_status = GD_SNAP_STATUS_INIT; + } + + return snap; +}; + +/* Function glusterd_list_add_snapvol adds the volinfo object (snapshot volume) + to the snapshot object list and to the parent volume list */ +int32_t +glusterd_list_add_snapvol(glusterd_volinfo_t *origin_vol, + glusterd_volinfo_t *snap_vol) +{ + int ret = -1; + glusterd_snap_t *snap = NULL; + + GF_VALIDATE_OR_GOTO("glusterd", origin_vol, out); + GF_VALIDATE_OR_GOTO("glusterd", snap_vol, out); + + snap = snap_vol->snapshot; + GF_ASSERT(snap); + + cds_list_add_tail(&snap_vol->vol_list, &snap->volumes); + LOCK(&origin_vol->lock); + { + glusterd_list_add_order(&snap_vol->snapvol_list, + &origin_vol->snap_volumes, + glusterd_compare_snap_vol_time); + + origin_vol->snap_count++; + } + UNLOCK(&origin_vol->lock); + + gf_msg_debug(THIS->name, 0, "Snapshot %s added to the list", + snap->snapname); + ret = 0; +out: + return ret; +} + +glusterd_snap_t * +glusterd_find_snap_by_name(char *snapname) +{ + glusterd_snap_t *snap = NULL; + glusterd_conf_t *priv = NULL; + + priv = THIS->private; + GF_ASSERT(priv); + GF_ASSERT(snapname); + + cds_list_for_each_entry(snap, &priv->snapshots, snap_list) + { + if (!strcmp(snap->snapname, snapname)) { + gf_msg_debug(THIS->name, 0, + "Found " + "snap %s (%s)", + snap->snapname, uuid_utoa(snap->snap_id)); + goto out; + } + } + snap = NULL; +out: + return snap; +} + +glusterd_snap_t * +glusterd_find_snap_by_id(uuid_t snap_id) +{ + glusterd_snap_t *snap = NULL; + glusterd_conf_t *priv = NULL; + + priv = THIS->private; + GF_ASSERT(priv); + + if (gf_uuid_is_null(snap_id)) + goto out; + + cds_list_for_each_entry(snap, &priv->snapshots, snap_list) + { + if (!gf_uuid_compare(snap->snap_id, snap_id)) { + gf_msg_debug(THIS->name, 0, + "Found " + "snap %s (%s)", + snap->snapname, uuid_utoa(snap->snap_id)); + goto out; + } + } + snap = NULL; +out: + return snap; +} + +int +glusterd_do_lvm_snapshot_remove(glusterd_volinfo_t *snap_vol, + glusterd_brickinfo_t *brickinfo, + const char *mount_pt, const char *snap_device) +{ + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + runner_t runner = { + 0, + }; + char msg[1024] = ""; + char pidfile[PATH_MAX] = ""; + pid_t pid = -1; + int retry_count = 0; + char *mnt_pt = NULL; + gf_boolean_t unmount = _gf_true; + int32_t len = 0; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + if (!brickinfo) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY, + "brickinfo NULL"); + goto out; + } + GF_ASSERT(snap_vol); + GF_ASSERT(mount_pt); + GF_ASSERT(snap_device); + + GLUSTERD_GET_BRICK_PIDFILE(pidfile, snap_vol, brickinfo, priv); + if (gf_is_service_running(pidfile, &pid)) { + (void)send_attach_req(this, brickinfo->rpc, brickinfo->path, NULL, NULL, + GLUSTERD_BRICK_TERMINATE); + brickinfo->status = GF_BRICK_STOPPED; + } + + /* Check if the brick is mounted and then try unmounting the brick */ + ret = glusterd_get_brick_root(brickinfo->path, &mnt_pt); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_BRICK_PATH_UNMOUNTED, + "Getting the root " + "of the brick for volume %s (snap %s) failed. " + "Removing lv (%s).", + snap_vol->volname, snap_vol->snapshot->snapname, snap_device); + /* The brick path is already unmounted. Remove the lv only * + * Need not fail the operation */ + ret = 0; + unmount = _gf_false; + } + + if ((unmount == _gf_true) && (strcmp(mnt_pt, mount_pt))) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_BRICK_PATH_UNMOUNTED, + "Lvm is not mounted for brick %s:%s. " + "Removing lv (%s).", + brickinfo->hostname, brickinfo->path, snap_device); + /* The brick path is already unmounted. Remove the lv only * + * Need not fail the operation */ + unmount = _gf_false; + } + + /* umount cannot be done when the brick process is still in the process + of shutdown, so give three re-tries */ + while ((unmount == _gf_true) && (retry_count < 3)) { + retry_count++; + /*umount2 system call doesn't cleanup mtab entry after un-mount. + So use external umount command*/ + ret = glusterd_umount(mount_pt); + if (!ret) + break; + + gf_msg_debug(this->name, 0, + "umount failed for " + "path %s (brick: %s): %s. Retry(%d)", + mount_pt, brickinfo->path, strerror(errno), retry_count); + + /* + * This used to be one second, but that wasn't long enough + * to get past the spurious EPERM errors that prevent some + * tests (especially bug-1162462.t) from passing reliably. + * + * TBD: figure out where that garbage is coming from + */ + sleep(3); + } + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UNOUNT_FAILED, + "umount failed for " + "path %s (brick: %s): %s.", + mount_pt, brickinfo->path, strerror(errno)); + /* + * This is cheating, but necessary until we figure out how to + * shut down a brick within a still-living brick daemon so that + * random translators aren't keeping the mountpoint alive. + * + * TBD: figure out a real solution + */ + ret = 0; + goto out; + } + + runinit(&runner); + len = snprintf(msg, sizeof(msg), + "remove snapshot of the brick %s:%s, " + "device: %s", + brickinfo->hostname, brickinfo->path, snap_device); + if (len < 0) { + strcpy(msg, "<error>"); + } + runner_add_args(&runner, LVM_REMOVE, "-f", snap_device, NULL); + runner_log(&runner, "", GF_LOG_DEBUG, msg); + + ret = runner_run(&runner); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_REMOVE_FAIL, + "removing snapshot of the " + "brick (%s:%s) of device %s failed", + brickinfo->hostname, brickinfo->path, snap_device); + goto out; + } + +out: + if (mnt_pt) + GF_FREE(mnt_pt); + + return ret; +} + +int32_t +glusterd_lvm_snapshot_remove(dict_t *rsp_dict, glusterd_volinfo_t *snap_vol) +{ + int32_t brick_count = -1; + int32_t ret = -1; + int32_t err = 0; + glusterd_brickinfo_t *brickinfo = NULL; + xlator_t *this = NULL; + char brick_dir[PATH_MAX] = ""; + char snap_path[PATH_MAX] = ""; + char *tmp = NULL; + char *brick_mount_path = NULL; + gf_boolean_t is_brick_dir_present = _gf_false; + struct stat stbuf = { + 0, + }; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(snap_vol); + + if ((snap_vol->is_snap_volume == _gf_false) && + (gf_uuid_is_null(snap_vol->restored_from_snap))) { + gf_msg_debug(this->name, 0, + "Not a snap volume, or a restored snap volume."); + ret = 0; + goto out; + } + + brick_count = -1; + cds_list_for_each_entry(brickinfo, &snap_vol->bricks, brick_list) + { + brick_count++; + if (gf_uuid_compare(brickinfo->uuid, MY_UUID)) { + gf_msg_debug(this->name, 0, "%s:%s belongs to a different node", + brickinfo->hostname, brickinfo->path); + continue; + } + + /* Fetch the brick mount path from the brickinfo->path */ + ret = glusterd_find_brick_mount_path(brickinfo->path, + &brick_mount_path); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_GET_INFO_FAIL, + "Failed to find brick_mount_path for %s", brickinfo->path); + ret = 0; + continue; + } + + /* As deactivated snapshot have no active mount point we + * check only for activated snapshot. + */ + if (snap_vol->status == GLUSTERD_STATUS_STARTED) { + ret = sys_lstat(brick_mount_path, &stbuf); + if (ret) { + gf_msg_debug(this->name, 0, "Brick %s:%s already deleted.", + brickinfo->hostname, brickinfo->path); + ret = 0; + continue; + } + } + + if (brickinfo->snap_status == -1) { + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_SNAPSHOT_PENDING, + "snapshot was pending. lvm not present " + "for brick %s:%s of the snap %s.", + brickinfo->hostname, brickinfo->path, + snap_vol->snapshot->snapname); + + if (rsp_dict && (snap_vol->is_snap_volume == _gf_true)) { + /* Adding missed delete to the dict */ + ret = glusterd_add_missed_snaps_to_dict( + rsp_dict, snap_vol, brickinfo, brick_count + 1, + GF_SNAP_OPTION_TYPE_DELETE); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_MISSED_SNAP_CREATE_FAIL, + "Failed to add missed snapshot " + "info for %s:%s in the " + "rsp_dict", + brickinfo->hostname, brickinfo->path); + goto out; + } + } + + continue; + } + + /* Check if the brick has a LV associated with it */ + if (strlen(brickinfo->device_path) == 0) { + gf_msg_debug(this->name, 0, + "Brick (%s:%s) does not have a LV " + "associated with it. Removing the brick path", + brickinfo->hostname, brickinfo->path); + goto remove_brick_path; + } + + /* Verify if the device path exists or not */ + ret = sys_stat(brickinfo->device_path, &stbuf); + if (ret) { + gf_msg_debug(this->name, 0, + "LV (%s) for brick (%s:%s) not present. " + "Removing the brick path", + brickinfo->device_path, brickinfo->hostname, + brickinfo->path); + /* Making ret = 0 as absence of device path should * + * not fail the remove operation */ + ret = 0; + goto remove_brick_path; + } + + ret = glusterd_do_lvm_snapshot_remove( + snap_vol, brickinfo, brick_mount_path, brickinfo->device_path); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_REMOVE_FAIL, + "Failed to " + "remove the snapshot %s (%s)", + brickinfo->path, brickinfo->device_path); + err = -1; /* We need to record this failure */ + } + + remove_brick_path: + /* After removing the brick dir fetch the parent path + * i.e /var/run/gluster/snaps/<snap-vol-id>/ + */ + if (is_brick_dir_present == _gf_false) { + /* Need to fetch brick_dir to be removed from + * brickinfo->path, as in a restored volume, + * snap_vol won't have the non-hyphenated snap_vol_id + */ + tmp = strstr(brick_mount_path, "brick"); + if (!tmp) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, + "Invalid brick %s", brickinfo->path); + GF_FREE(brick_mount_path); + brick_mount_path = NULL; + continue; + } + + strncpy(brick_dir, brick_mount_path, + (size_t)(tmp - brick_mount_path)); + + /* Peers not hosting bricks will have _gf_false */ + is_brick_dir_present = _gf_true; + } + + GF_FREE(brick_mount_path); + brick_mount_path = NULL; + } + + if (is_brick_dir_present == _gf_true) { + ret = recursive_rmdir(brick_dir); + if (ret) { + if (errno == ENOTEMPTY) { + /* Will occur when multiple glusterds + * are running in the same node + */ + gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_DIR_OP_FAILED, + "Failed to rmdir: %s, err: %s. " + "More than one glusterd running " + "on this node.", + brick_dir, strerror(errno)); + ret = 0; + goto out; + } else + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED, + "Failed to rmdir: %s, err: %s", brick_dir, + strerror(errno)); + goto out; + } + + /* After removing brick_dir, fetch and remove snap path + * i.e. /var/run/gluster/snaps/<snap-name>. + */ + if (!snap_vol->snapshot) { + gf_msg(this->name, GF_LOG_WARNING, EINVAL, GD_MSG_INVALID_ENTRY, + "snapshot not" + "present in snap_vol"); + ret = -1; + goto out; + } + + snprintf(snap_path, sizeof(snap_path), "%s/%s", snap_mount_dir, + snap_vol->snapshot->snapname); + ret = recursive_rmdir(snap_path); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED, + "Failed to remove " + "%s directory : error : %s", + snap_path, strerror(errno)); + goto out; + } + } + + ret = 0; +out: + if (err) { + ret = err; + } + GF_FREE(brick_mount_path); + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_snap_volume_remove(dict_t *rsp_dict, glusterd_volinfo_t *snap_vol, + gf_boolean_t remove_lvm, gf_boolean_t force) +{ + int ret = -1; + int save_ret = 0; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_volinfo_t *origin_vol = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(rsp_dict); + GF_ASSERT(snap_vol); + + if (!snap_vol) { + gf_msg(this->name, GF_LOG_WARNING, EINVAL, GD_MSG_INVALID_ENTRY, + "snap_vol in NULL"); + ret = -1; + goto out; + } + + cds_list_for_each_entry(brickinfo, &snap_vol->bricks, brick_list) + { + if (gf_uuid_compare(brickinfo->uuid, MY_UUID)) + continue; + + ret = glusterd_brick_stop(snap_vol, brickinfo, _gf_false); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_BRICK_STOP_FAIL, + "Failed to stop " + "brick for volume %s", + snap_vol->volname); + save_ret = ret; + + /* Don't clean up the snap on error when + force flag is disabled */ + if (!force) + goto out; + } + } + + /* Only remove the backend lvm when required */ + if (remove_lvm) { + ret = glusterd_lvm_snapshot_remove(rsp_dict, snap_vol); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_REMOVE_FAIL, + "Failed to remove " + "lvm snapshot volume %s", + snap_vol->volname); + save_ret = ret; + if (!force) + goto out; + } + } + + ret = glusterd_store_delete_volume(snap_vol); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOL_DELETE_FAIL, + "Failed to remove volume %s " + "from store", + snap_vol->volname); + save_ret = ret; + if (!force) + goto out; + } + + if (!cds_list_empty(&snap_vol->snapvol_list)) { + ret = glusterd_volinfo_find(snap_vol->parent_volname, &origin_vol); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND, + "Failed to get " + "parent volinfo %s for volume %s", + snap_vol->parent_volname, snap_vol->volname); + save_ret = ret; + if (!force) + goto out; + } + origin_vol->snap_count--; + } + + glusterd_volinfo_unref(snap_vol); + + if (save_ret) + ret = save_ret; +out: + gf_msg_trace(this->name, 0, "returning %d", ret); + return ret; +} + +int32_t +glusterd_snap_remove(dict_t *rsp_dict, glusterd_snap_t *snap, + gf_boolean_t remove_lvm, gf_boolean_t force, + gf_boolean_t is_clone) +{ + int ret = -1; + int save_ret = 0; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_volinfo_t *tmp = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(rsp_dict); + GF_ASSERT(snap); + + if (!snap) { + gf_msg(this->name, GF_LOG_WARNING, EINVAL, GD_MSG_INVALID_ENTRY, + "snap is NULL"); + ret = -1; + goto out; + } + + cds_list_for_each_entry_safe(snap_vol, tmp, &snap->volumes, vol_list) + { + ret = glusterd_snap_volume_remove(rsp_dict, snap_vol, remove_lvm, + force); + if (ret && !force) { + /* Don't clean up the snap on error when + force flag is disabled */ + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_REMOVE_FAIL, + "Failed to remove " + "volinfo %s for snap %s", + snap_vol->volname, snap->snapname); + save_ret = ret; + goto out; + } + } + + /* A clone does not persist snap info in /var/lib/glusterd/snaps/ * + * and hence there is no snap info to be deleted from there * + */ + if (!is_clone) { + ret = glusterd_store_delete_snap(snap); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_REMOVE_FAIL, + "Failed to remove snap %s from store", snap->snapname); + save_ret = ret; + if (!force) + goto out; + } + } + + ret = glusterd_snapobject_delete(snap); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_REMOVE_FAIL, + "Failed to delete " + "snap object %s", + snap->snapname); + + if (save_ret) + ret = save_ret; +out: + gf_msg_trace(THIS->name, 0, "returning %d", ret); + return ret; +} + +static int +glusterd_snapshot_get_snapvol_detail(dict_t *dict, glusterd_volinfo_t *snap_vol, + const char *keyprefix, const int detail) +{ + int ret = -1; + int snap_limit = 0; + char key[64] = ""; /* keyprefix is quite small, up to 32 byts */ + int keylen; + char *value = NULL; + glusterd_volinfo_t *origin_vol = NULL; + glusterd_conf_t *conf = NULL; + xlator_t *this = NULL; + uint64_t opt_hard_max = GLUSTERD_SNAPS_MAX_HARD_LIMIT; + + this = THIS; + conf = this->private; + GF_ASSERT(conf); + + GF_ASSERT(dict); + GF_ASSERT(snap_vol); + GF_ASSERT(keyprefix); + + /* Volume Name */ + value = gf_strdup(snap_vol->volname); + if (!value) + goto out; + + keylen = snprintf(key, sizeof(key), "%s.volname", keyprefix); + ret = dict_set_dynstrn(dict, key, keylen, value); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set " + "volume name in dictionary: %s", + key); + goto out; + } + + /* Volume ID */ + value = gf_strdup(uuid_utoa(snap_vol->volume_id)); + if (NULL == value) { + ret = -1; + goto out; + } + + keylen = snprintf(key, sizeof(key), "%s.vol-id", keyprefix); + ret = dict_set_dynstrn(dict, key, keylen, value); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_MEMORY, + "Failed to set " + "volume id in dictionary: %s", + key); + goto out; + } + value = NULL; + + /* volume status */ + keylen = snprintf(key, sizeof(key), "%s.vol-status", keyprefix); + switch (snap_vol->status) { + case GLUSTERD_STATUS_STARTED: + ret = dict_set_nstrn(dict, key, keylen, "Started", SLEN("Started")); + break; + case GLUSTERD_STATUS_STOPPED: + ret = dict_set_nstrn(dict, key, keylen, "Stopped", SLEN("Stopped")); + break; + case GD_SNAP_STATUS_NONE: + ret = dict_set_nstrn(dict, key, keylen, "None", SLEN("None")); + break; + default: + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, + "Invalid volume status"); + ret = -1; + goto out; + } + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set volume status" + " in dictionary: %s", + key); + goto out; + } + + ret = glusterd_volinfo_find(snap_vol->parent_volname, &origin_vol); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND, + "failed to get the parent " + "volinfo for the volume %s", + snap_vol->volname); + goto out; + } + + /* "snap-max-hard-limit" might not be set by user explicitly, + * in that case it's better to consider the default value. + * Hence not erroring out if Key is not found. + */ + ret = dict_get_uint64(conf->opts, GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT, + &opt_hard_max); + if (ret) { + ret = 0; + gf_msg_debug(this->name, 0, + "%s is not present in " + "opts dictionary", + GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT); + } + + if (opt_hard_max < origin_vol->snap_max_hard_limit) { + snap_limit = opt_hard_max; + gf_msg_debug(this->name, 0, + "system snap-max-hard-limit is" + " lesser than volume snap-max-hard-limit, " + "snap-max-hard-limit value is set to %d", + snap_limit); + } else { + snap_limit = origin_vol->snap_max_hard_limit; + gf_msg_debug(this->name, 0, + "volume snap-max-hard-limit is" + " lesser than system snap-max-hard-limit, " + "snap-max-hard-limit value is set to %d", + snap_limit); + } + + keylen = snprintf(key, sizeof(key), "%s.snaps-available", keyprefix); + if (snap_limit > origin_vol->snap_count) + ret = dict_set_int32n(dict, key, keylen, + snap_limit - origin_vol->snap_count); + else + ret = dict_set_int32(dict, key, 0); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set available snaps"); + goto out; + } + + keylen = snprintf(key, sizeof(key), "%s.snapcount", keyprefix); + ret = dict_set_int32n(dict, key, keylen, origin_vol->snap_count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Could not save snapcount"); + goto out; + } + + if (!detail) + goto out; + + /* Parent volume name */ + value = gf_strdup(snap_vol->parent_volname); + if (!value) + goto out; + + keylen = snprintf(key, sizeof(key), "%s.origin-volname", keyprefix); + ret = dict_set_dynstrn(dict, key, keylen, value); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set parent " + "volume name in dictionary: %s", + key); + goto out; + } + value = NULL; + + ret = 0; +out: + if (value) + GF_FREE(value); + + return ret; +} + +static int +glusterd_snapshot_get_snap_detail(dict_t *dict, glusterd_snap_t *snap, + const char *keyprefix, + glusterd_volinfo_t *volinfo) +{ + int ret = -1; + int volcount = 0; + char key[32] = ""; /* keyprefix is quite small, up to 16 bytes */ + int keylen; + char timestr[GF_TIMESTR_SIZE] = ""; + char *value = NULL; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_volinfo_t *tmp_vol = NULL; + xlator_t *this = NULL; + + this = THIS; + + GF_ASSERT(dict); + GF_ASSERT(snap); + GF_ASSERT(keyprefix); + + /* Snap Name */ + value = gf_strdup(snap->snapname); + if (!value) + goto out; + + keylen = snprintf(key, sizeof(key), "%s.snapname", keyprefix); + ret = dict_set_dynstrn(dict, key, keylen, value); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set " + "snap name in dictionary"); + goto out; + } + + /* Snap ID */ + value = gf_strdup(uuid_utoa(snap->snap_id)); + if (NULL == value) { + ret = -1; + goto out; + } + + keylen = snprintf(key, sizeof(key), "%s.snap-id", keyprefix); + ret = dict_set_dynstrn(dict, key, keylen, value); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set " + "snap id in dictionary"); + goto out; + } + value = NULL; + + gf_time_fmt(timestr, sizeof timestr, snap->time_stamp, gf_timefmt_FT); + value = gf_strdup(timestr); + + if (NULL == value) { + ret = -1; + goto out; + } + + keylen = snprintf(key, sizeof(key), "%s.snap-time", keyprefix); + ret = dict_set_dynstrn(dict, key, keylen, value); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set " + "snap time stamp in dictionary"); + goto out; + } + value = NULL; + + /* If snap description is provided then add that into dictionary */ + if (NULL != snap->description) { + value = gf_strdup(snap->description); + if (NULL == value) { + ret = -1; + goto out; + } + + keylen = snprintf(key, sizeof(key), "%s.snap-desc", keyprefix); + ret = dict_set_dynstrn(dict, key, keylen, value); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set " + "snap description in dictionary"); + goto out; + } + value = NULL; + } + + keylen = snprintf(key, sizeof(key), "%s.snap-status", keyprefix); + switch (snap->snap_status) { + case GD_SNAP_STATUS_INIT: + ret = dict_set_nstrn(dict, key, keylen, "Init", SLEN("Init")); + break; + case GD_SNAP_STATUS_IN_USE: + ret = dict_set_nstrn(dict, key, keylen, "In-use", SLEN("In-use")); + break; + case GD_SNAP_STATUS_DECOMMISSION: + ret = dict_set_nstrn(dict, key, keylen, "Decommisioned", + SLEN("Decommisioned")); + break; + case GD_SNAP_STATUS_RESTORED: + ret = dict_set_nstrn(dict, key, keylen, "Restored", + SLEN("Restored")); + break; + case GD_SNAP_STATUS_NONE: + ret = dict_set_nstrn(dict, key, keylen, "None", SLEN("None")); + break; + default: + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, + "Invalid snap status"); + ret = -1; + goto out; + } + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set snap status " + "in dictionary"); + goto out; + } + + if (volinfo) { + volcount = 1; + snprintf(key, sizeof(key), "%s.vol%d", keyprefix, volcount); + ret = glusterd_snapshot_get_snapvol_detail(dict, volinfo, key, 0); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_DICT_GET_FAILED, + "Failed to " + "get volume detail %s for snap %s", + snap_vol->volname, snap->snapname); + goto out; + } + goto done; + } + + cds_list_for_each_entry_safe(snap_vol, tmp_vol, &snap->volumes, vol_list) + { + volcount++; + snprintf(key, sizeof(key), "%s.vol%d", keyprefix, volcount); + ret = glusterd_snapshot_get_snapvol_detail(dict, snap_vol, key, 1); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to " + "get volume detail %s for snap %s", + snap_vol->volname, snap->snapname); + goto out; + } + } + +done: + keylen = snprintf(key, sizeof(key), "%s.vol-count", keyprefix); + ret = dict_set_int32n(dict, key, keylen, volcount); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set %s", key); + goto out; + } + + ret = 0; +out: + if (value) + GF_FREE(value); + + return ret; +} + +static int +glusterd_snapshot_get_all_snap_info(dict_t *dict) +{ + int ret = -1; + int snapcount = 0; + char key[16] = ""; + glusterd_snap_t *snap = NULL; + glusterd_snap_t *tmp_snap = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + this = THIS; + priv = this->private; + GF_ASSERT(priv); + + /* General parameter validation */ + GF_ASSERT(dict); + + cds_list_for_each_entry_safe(snap, tmp_snap, &priv->snapshots, snap_list) + { + snapcount++; + snprintf(key, sizeof(key), "snap%d", snapcount); + ret = glusterd_snapshot_get_snap_detail(dict, snap, key, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get " + "snapdetail for snap %s", + snap->snapname); + goto out; + } + } + + ret = dict_set_int32n(dict, "snapcount", SLEN("snapcount"), snapcount); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set snapcount"); + goto out; + } + + ret = 0; +out: + return ret; +} + +int +glusterd_snapshot_get_info_by_volume(dict_t *dict, char *volname, char *err_str, + size_t len) +{ + int ret = -1; + int snapcount = 0; + int snap_limit = 0; + char *value = NULL; + char key[16] = ""; + glusterd_volinfo_t *volinfo = NULL; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_volinfo_t *tmp_vol = NULL; + glusterd_conf_t *conf = NULL; + xlator_t *this = NULL; + uint64_t opt_hard_max = GLUSTERD_SNAPS_MAX_HARD_LIMIT; + + this = THIS; + conf = this->private; + GF_ASSERT(conf); + + GF_ASSERT(dict); + GF_ASSERT(volname); + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + snprintf(err_str, len, "Volume (%s) does not exist", volname); + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND, "%s", + err_str); + goto out; + } + + /* "snap-max-hard-limit" might not be set by user explicitly, + * in that case it's better to consider the default value. + * Hence not erroring out if Key is not found. + */ + ret = dict_get_uint64(conf->opts, GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT, + &opt_hard_max); + if (ret) { + ret = 0; + gf_msg_debug(this->name, 0, + "%s is not present in " + "opts dictionary", + GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT); + } + + if (opt_hard_max < volinfo->snap_max_hard_limit) { + snap_limit = opt_hard_max; + gf_msg_debug(this->name, 0, + "system snap-max-hard-limit is" + " lesser than volume snap-max-hard-limit, " + "snap-max-hard-limit value is set to %d", + snap_limit); + } else { + snap_limit = volinfo->snap_max_hard_limit; + gf_msg_debug(this->name, 0, + "volume snap-max-hard-limit is" + " lesser than system snap-max-hard-limit, " + "snap-max-hard-limit value is set to %d", + snap_limit); + } + + if (snap_limit > volinfo->snap_count) + ret = dict_set_int32n(dict, "snaps-available", SLEN("snaps-available"), + snap_limit - volinfo->snap_count); + else + ret = dict_set_int32n(dict, "snaps-available", SLEN("snaps-available"), + 0); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set available snaps"); + goto out; + } + + /* Origin volume name */ + value = gf_strdup(volinfo->volname); + if (!value) + goto out; + + ret = dict_set_dynstrn(dict, "origin-volname", SLEN("origin-volname"), + value); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set parent " + "volume name in dictionary: %s", + value); + goto out; + } + value = NULL; + + cds_list_for_each_entry_safe(snap_vol, tmp_vol, &volinfo->snap_volumes, + snapvol_list) + { + snapcount++; + snprintf(key, sizeof(key), "snap%d", snapcount); + ret = glusterd_snapshot_get_snap_detail(dict, snap_vol->snapshot, key, + snap_vol); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get " + "snapdetail for snap %s", + snap_vol->snapshot->snapname); + goto out; + } + } + ret = dict_set_int32n(dict, "snapcount", SLEN("snapcount"), snapcount); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set snapcount"); + goto out; + } + + ret = 0; +out: + if (value) + GF_FREE(value); + + return ret; +} + +/* This function will be called from RPC handler routine. + * This function is responsible for getting the requested + * snapshot info into the dictionary. + * + * @param req RPC request object. Required for sending a response back. + * @param op glusterd operation. Required for sending a response back. + * @param dict pointer to dictionary which will contain both + * request and response key-pair values. + * @return -1 on error and 0 on success + */ +int +glusterd_handle_snapshot_info(rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict, char *err_str, size_t len) +{ + int ret = -1; + int8_t snap_driven = 1; + char *volname = NULL; + char *snapname = NULL; + glusterd_snap_t *snap = NULL; + xlator_t *this = NULL; + int32_t cmd = GF_SNAP_INFO_TYPE_ALL; + + this = THIS; + GF_ASSERT(this); + + GF_VALIDATE_OR_GOTO(this->name, req, out); + GF_VALIDATE_OR_GOTO(this->name, dict, out); + + ret = dict_get_int32n(dict, "sub-cmd", SLEN("sub-cmd"), &cmd); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get type " + "of snapshot info"); + goto out; + } + + switch (cmd) { + case GF_SNAP_INFO_TYPE_ALL: { + ret = glusterd_snapshot_get_all_snap_info(dict); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get info of all snaps"); + goto out; + } + break; + } + + case GF_SNAP_INFO_TYPE_SNAP: { + ret = dict_get_strn(dict, "snapname", SLEN("snapname"), &snapname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get snap name"); + goto out; + } + + ret = dict_set_int32n(dict, "snapcount", SLEN("snapcount"), 1); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set snapcount"); + goto out; + } + + snap = glusterd_find_snap_by_name(snapname); + if (!snap) { + snprintf(err_str, len, "Snapshot (%s) does not exist", + snapname); + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_SNAP_NOT_FOUND, + "%s", err_str); + ret = -1; + goto out; + } + ret = glusterd_snapshot_get_snap_detail(dict, snap, "snap1", NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_NOT_FOUND, + "Failed to get snap detail of snap " + "%s", + snap->snapname); + goto out; + } + break; + } + + case GF_SNAP_INFO_TYPE_VOL: { + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, + "Failed to get volname"); + goto out; + } + ret = glusterd_snapshot_get_info_by_volume(dict, volname, err_str, + len); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND, + "Failed to get volume info of volume " + "%s", + volname); + goto out; + } + snap_driven = 0; + break; + } + } + + ret = dict_set_int8(dict, "snap-driven", snap_driven); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set snap-driven"); + goto out; + } + + /* If everything is successful then send the response back to cli. + * In case of failure the caller of this function will take care + of the response */ + ret = glusterd_op_send_cli_response(op, 0, 0, req, dict, err_str); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_CLI_RESP, + "Failed to send cli " + "response"); + goto out; + } + + ret = 0; + +out: + return ret; +} + +/* This function sets all the snapshot names in the dictionary */ +int +glusterd_snapshot_get_all_snapnames(dict_t *dict) +{ + int ret = -1; + int snapcount = 0; + char *snapname = NULL; + char key[64] = ""; + int keylen; + glusterd_snap_t *snap = NULL; + glusterd_snap_t *tmp_snap = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + this = THIS; + priv = this->private; + GF_ASSERT(priv); + GF_ASSERT(dict); + + cds_list_for_each_entry_safe(snap, tmp_snap, &priv->snapshots, snap_list) + { + snapcount++; + snapname = gf_strdup(snap->snapname); + if (!snapname) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "strdup failed"); + ret = -1; + goto out; + } + keylen = snprintf(key, sizeof(key), "snapname%d", snapcount); + ret = dict_set_dynstrn(dict, key, keylen, snapname); + if (ret) { + GF_FREE(snapname); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set %s", key); + goto out; + } + } + + ret = dict_set_int32n(dict, "snapcount", SLEN("snapcount"), snapcount); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set snapcount"); + goto out; + } + + ret = 0; +out: + + return ret; +} + +/* This function sets all the snapshot names + under a given volume in the dictionary */ +int +glusterd_snapshot_get_vol_snapnames(dict_t *dict, glusterd_volinfo_t *volinfo) +{ + int ret = -1; + int snapcount = 0; + char *snapname = NULL; + char key[32] = ""; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_volinfo_t *tmp_vol = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(dict); + GF_ASSERT(volinfo); + + cds_list_for_each_entry_safe(snap_vol, tmp_vol, &volinfo->snap_volumes, + snapvol_list) + { + snapcount++; + snprintf(key, sizeof(key), "snapname%d", snapcount); + + ret = dict_set_dynstr_with_alloc(dict, key, + snap_vol->snapshot->snapname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to " + "set %s", + key); + GF_FREE(snapname); + goto out; + } + } + + ret = dict_set_int32n(dict, "snapcount", SLEN("snapcount"), snapcount); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set snapcount"); + goto out; + } + + ret = 0; +out: + + return ret; +} + +int +glusterd_handle_snapshot_list(rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict, char *err_str, size_t len, + uint32_t *op_errno) +{ + int ret = -1; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + + this = THIS; + + GF_VALIDATE_OR_GOTO(this->name, req, out); + GF_VALIDATE_OR_GOTO(this->name, dict, out); + GF_VALIDATE_OR_GOTO(this->name, op_errno, out); + + /* Ignore error for getting volname as it is optional */ + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + + if (NULL == volname) { + ret = glusterd_snapshot_get_all_snapnames(dict); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_SNAP_LIST_GET_FAIL, + "Failed to get snapshot list"); + goto out; + } + } else { + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + snprintf(err_str, len, "Volume (%s) does not exist", volname); + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND, "%s", + err_str); + *op_errno = EG_NOVOL; + goto out; + } + + ret = glusterd_snapshot_get_vol_snapnames(dict, volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_SNAP_LIST_GET_FAIL, + "Failed to get snapshot list for volume %s", volname); + goto out; + } + } + + /* If everything is successful then send the response back to cli. + In case of failure the caller of this function will take of response.*/ + ret = glusterd_op_send_cli_response(op, 0, 0, req, dict, err_str); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_CLI_RESP, + "Failed to send cli " + "response"); + goto out; + } + + ret = 0; + +out: + return ret; +} + +/* This is a snapshot create handler function. This function will be + * executed in the originator node. This function is responsible for + * calling mgmt_v3 framework to do the actual snap creation on all the bricks + * + * @param req RPC request object + * @param op gluster operation + * @param dict dictionary containing snapshot restore request + * @param err_str In case of an err this string should be populated + * @param len length of err_str buffer + * + * @return Negative value on Failure and 0 in success + */ +int +glusterd_handle_snapshot_create(rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict, char *err_str, size_t len) +{ + int ret = -1; + char *volname = NULL; + char *snapname = NULL; + int64_t volcount = 0; + xlator_t *this = NULL; + char key[64] = ""; + int keylen; + char *username = NULL; + char *password = NULL; + uuid_t *uuid_ptr = NULL; + uuid_t tmp_uuid = {0}; + int i = 0; + int timestamp = 0; + char snap_volname[GD_VOLUME_NAME_MAX] = ""; + char new_snapname[GLUSTERD_MAX_SNAP_NAME] = ""; + char gmt_snaptime[GLUSTERD_MAX_SNAP_NAME] = ""; + time_t snap_time; + this = THIS; + GF_ASSERT(this); + GF_ASSERT(req); + GF_ASSERT(dict); + GF_ASSERT(err_str); + + ret = dict_get_int64(dict, "volcount", &volcount); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "failed to " + "get the volume count"); + goto out; + } + if (volcount <= 0) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, + "Invalid volume count %" PRId64 " supplied", volcount); + ret = -1; + goto out; + } + + ret = dict_get_strn(dict, "snapname", SLEN("snapname"), &snapname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "failed to get the snapname"); + goto out; + } + + timestamp = dict_get_str_boolean(dict, "no-timestamp", _gf_false); + if (timestamp == -1) { + gf_log(this->name, GF_LOG_ERROR, + "Failed to get " + "no-timestamp flag "); + goto out; + } + + snap_time = gf_time(); + ret = dict_set_int64(dict, "snap-time", (int64_t)snap_time); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to set snap-time"); + goto out; + } + + if (!timestamp) { + strftime(gmt_snaptime, sizeof(gmt_snaptime), "_GMT-%Y.%m.%d-%H.%M.%S", + gmtime(&snap_time)); + snprintf(new_snapname, sizeof(new_snapname), "%s%s", snapname, + gmt_snaptime); + ret = dict_set_dynstr_with_alloc(dict, "snapname", new_snapname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to update " + "snap-name"); + goto out; + } + snapname = new_snapname; + } + + if (strlen(snapname) >= GLUSTERD_MAX_SNAP_NAME) { + snprintf(err_str, len, + "snapname cannot exceed 255 " + "characters"); + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, "%s", + err_str); + ret = -1; + goto out; + } + + uuid_ptr = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); + if (!uuid_ptr) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "Out Of Memory"); + ret = -1; + goto out; + } + + gf_uuid_generate(*uuid_ptr); + ret = dict_set_bin(dict, "snap-id", uuid_ptr, sizeof(uuid_t)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to set snap-id"); + GF_FREE(uuid_ptr); + goto out; + } + uuid_ptr = NULL; + + for (i = 1; i <= volcount; i++) { + keylen = snprintf(key, sizeof(key), "volname%d", i); + ret = dict_get_strn(dict, key, keylen, &volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get volume name"); + goto out; + } + + /* generate internal username and password for the snap*/ + gf_uuid_generate(tmp_uuid); + username = gf_strdup(uuid_utoa(tmp_uuid)); + keylen = snprintf(key, sizeof(key), "volume%d_username", i); + ret = dict_set_dynstrn(dict, key, keylen, username); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set snap " + "username for volume %s", + volname); + GF_FREE(username); + goto out; + } + + gf_uuid_generate(tmp_uuid); + password = gf_strdup(uuid_utoa(tmp_uuid)); + keylen = snprintf(key, sizeof(key), "volume%d_password", i); + ret = dict_set_dynstrn(dict, key, keylen, password); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set snap " + "password for volume %s", + volname); + GF_FREE(password); + goto out; + } + + uuid_ptr = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); + if (!uuid_ptr) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "Out Of Memory"); + ret = -1; + goto out; + } + + snprintf(key, sizeof(key), "vol%d_volid", i); + gf_uuid_generate(*uuid_ptr); + ret = dict_set_bin(dict, key, uuid_ptr, sizeof(uuid_t)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to set snap_volid"); + GF_FREE(uuid_ptr); + goto out; + } + GLUSTERD_GET_UUID_NOHYPHEN(snap_volname, *uuid_ptr); + snprintf(key, sizeof(key), "snap-volname%d", i); + ret = dict_set_dynstr_with_alloc(dict, key, snap_volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to set snap volname"); + GF_FREE(uuid_ptr); + goto out; + } + } + + ret = glusterd_mgmt_v3_initiate_snap_phases(req, op, dict); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_INIT_FAIL, + "Failed to initiate snap " + "phases"); + } + +out: + return ret; +} + +/* This is a snapshot status handler function. This function will be + * executed in a originator node. This function is responsible for + * calling mgmt v3 framework to get the actual snapshot status from + * all the bricks + * + * @param req RPC request object + * @param op gluster operation + * @param dict dictionary containing snapshot status request + * @param err_str In case of an err this string should be populated + * @param len length of err_str buffer + * + * return : 0 in case of success. + * -1 in case of failure. + * + */ +int +glusterd_handle_snapshot_status(rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict, char *err_str, size_t len) +{ + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + GF_ASSERT(req); + GF_ASSERT(dict); + GF_ASSERT(err_str); + + ret = glusterd_mgmt_v3_initiate_snap_phases(req, op, dict); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_INIT_FAIL, + "Failed to initiate " + "snap phases"); + goto out; + } + + ret = 0; +out: + return ret; +} + +/* This is a snapshot clone handler function. This function will be + * executed in the originator node. This function is responsible for + * calling mgmt_v3 framework to do the actual snap clone on all the bricks + * + * @param req RPC request object + * @param op gluster operation + * @param dict dictionary containing snapshot restore request + * @param err_str In case of an err this string should be populated + * @param len length of err_str buffer + * + * @return Negative value on Failure and 0 in success + */ +int +glusterd_handle_snapshot_clone(rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict, char *err_str, size_t len) +{ + int ret = -1; + char *clonename = NULL; + char *snapname = NULL; + xlator_t *this = NULL; + char key[64] = ""; + int keylen; + char *username = NULL; + char *password = NULL; + char *volname = NULL; + uuid_t *uuid_ptr = NULL; + uuid_t tmp_uuid = {0}; + int i = 0; + char snap_volname[GD_VOLUME_NAME_MAX] = ""; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(req); + GF_ASSERT(dict); + GF_ASSERT(err_str); + + ret = dict_get_strn(dict, "clonename", SLEN("clonename"), &clonename); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "failed to " + "get the clone name"); + goto out; + } + /*We need to take a volume lock on clone name*/ + volname = gf_strdup(clonename); + keylen = snprintf(key, sizeof(key), "volname1"); + ret = dict_set_dynstrn(dict, key, keylen, volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set clone " + "name for volume locking"); + GF_FREE(volname); + goto out; + } + + ret = dict_get_strn(dict, "snapname", SLEN("snapname"), &snapname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "failed to get the snapname"); + goto out; + } + + uuid_ptr = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); + if (!uuid_ptr) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "Out Of Memory"); + ret = -1; + goto out; + } + + gf_uuid_generate(*uuid_ptr); + ret = dict_set_bin(dict, "clone-id", uuid_ptr, sizeof(uuid_t)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to set clone-id"); + GF_FREE(uuid_ptr); + goto out; + } + uuid_ptr = NULL; + + ret = dict_get_strn(dict, "snapname", SLEN("snapname"), &snapname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get snapname name"); + goto out; + } + + gf_uuid_generate(tmp_uuid); + username = gf_strdup(uuid_utoa(tmp_uuid)); + keylen = snprintf(key, sizeof(key), "volume1_username"); + ret = dict_set_dynstrn(dict, key, keylen, username); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set clone " + "username for volume %s", + clonename); + GF_FREE(username); + goto out; + } + + gf_uuid_generate(tmp_uuid); + password = gf_strdup(uuid_utoa(tmp_uuid)); + keylen = snprintf(key, sizeof(key), "volume1_password"); + ret = dict_set_dynstrn(dict, key, keylen, password); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set clone " + "password for volume %s", + clonename); + GF_FREE(password); + goto out; + } + + uuid_ptr = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); + if (!uuid_ptr) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "Out Of Memory"); + ret = -1; + goto out; + } + + snprintf(key, sizeof(key), "vol1_volid"); + gf_uuid_generate(*uuid_ptr); + ret = dict_set_bin(dict, key, uuid_ptr, sizeof(uuid_t)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to set clone_volid"); + GF_FREE(uuid_ptr); + goto out; + } + snprintf(key, sizeof(key), "clone-volname%d", i); + ret = dict_set_dynstr_with_alloc(dict, key, snap_volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to set snap volname"); + GF_FREE(uuid_ptr); + goto out; + } + + ret = glusterd_mgmt_v3_initiate_snap_phases(req, op, dict); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_INIT_FAIL, + "Failed to initiate " + "snap phases"); + } + +out: + return ret; +} + +/* This is a snapshot restore handler function. This function will be + * executed in the originator node. This function is responsible for + * calling mgmt_v3 framework to do the actual restore on all the bricks + * + * @param req RPC request object + * @param op gluster operation + * @param dict dictionary containing snapshot restore request + * @param err_str In case of an err this string should be populated + * @param len length of err_str buffer + * + * @return Negative value on Failure and 0 in success + */ +int +glusterd_handle_snapshot_restore(rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict, char *err_str, + uint32_t *op_errno, size_t len) +{ + int ret = -1; + char *snapname = NULL; + char *buf = NULL; + glusterd_conf_t *conf = NULL; + xlator_t *this = NULL; + glusterd_snap_t *snap = NULL; + glusterd_volinfo_t *snap_volinfo = NULL; + int32_t i = 0; + char key[64] = ""; + int keylen; + + this = THIS; + GF_ASSERT(this); + conf = this->private; + + GF_ASSERT(conf); + GF_ASSERT(req); + GF_ASSERT(dict); + GF_ASSERT(err_str); + + ret = dict_get_strn(dict, "snapname", SLEN("snapname"), &snapname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to " + "get snapname"); + goto out; + } + + snap = glusterd_find_snap_by_name(snapname); + if (!snap) { + snprintf(err_str, len, "Snapshot (%s) does not exist", snapname); + *op_errno = EG_NOSNAP; + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_SNAP_NOT_FOUND, "%s", + err_str); + ret = -1; + goto out; + } + + list_for_each_entry(snap_volinfo, &snap->volumes, vol_list) + { + i++; + keylen = snprintf(key, sizeof(key), "volname%d", i); + buf = gf_strdup(snap_volinfo->parent_volname); + if (!buf) { + ret = -1; + goto out; + } + ret = dict_set_dynstrn(dict, key, keylen, buf); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Could not set " + "parent volume name %s in the dict", + snap_volinfo->parent_volname); + GF_FREE(buf); + goto out; + } + buf = NULL; + } + + ret = dict_set_int32n(dict, "volcount", SLEN("volcount"), i); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Could not save volume count"); + goto out; + } + + ret = glusterd_mgmt_v3_initiate_snap_phases(req, op, dict); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_INIT_FAIL, + "Failed to initiate snap phases"); + goto out; + } + + ret = 0; + +out: + return ret; +} + +glusterd_snap_t * +glusterd_create_snap_object(dict_t *dict, dict_t *rsp_dict) +{ + char *snapname = NULL; + uuid_t *snap_id = NULL; + char *description = NULL; + glusterd_snap_t *snap = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + int ret = -1; + int64_t time_stamp = 0; + + this = THIS; + priv = this->private; + + GF_ASSERT(dict); + GF_ASSERT(rsp_dict); + + /* Fetch snapname, description, id and time from dict */ + ret = dict_get_strn(dict, "snapname", SLEN("snapname"), &snapname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to fetch snapname"); + goto out; + } + + /* Ignore ret value for description*/ + ret = dict_get_strn(dict, "description", SLEN("description"), &description); + + ret = dict_get_bin(dict, "snap-id", (void **)&snap_id); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to fetch snap_id"); + goto out; + } + + ret = dict_get_int64(dict, "snap-time", &time_stamp); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to fetch snap-time"); + goto out; + } + if (time_stamp <= 0) { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, + "Invalid time-stamp: %" PRId64, time_stamp); + goto out; + } + + cds_list_for_each_entry(snap, &priv->snapshots, snap_list) + { + if (!strcmp(snap->snapname, snapname) || + !gf_uuid_compare(snap->snap_id, *snap_id)) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_CREATION_FAIL, + "Found duplicate snap %s (%s)", snap->snapname, + uuid_utoa(snap->snap_id)); + ret = -1; + break; + } + } + if (ret) { + snap = NULL; + goto out; + } + + snap = glusterd_new_snap_object(); + if (!snap) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_CREATION_FAIL, + "Could not create " + "the snap object for snap %s", + snapname); + goto out; + } + + gf_strncpy(snap->snapname, snapname, sizeof(snap->snapname)); + gf_uuid_copy(snap->snap_id, *snap_id); + snap->time_stamp = (time_t)time_stamp; + /* Set the status as GD_SNAP_STATUS_INIT and once the backend snapshot + is taken and snap is really ready to use, set the status to + GD_SNAP_STATUS_IN_USE. This helps in identifying the incomplete + snapshots and cleaning them up. + */ + snap->snap_status = GD_SNAP_STATUS_INIT; + if (description) { + snap->description = gf_strdup(description); + if (snap->description == NULL) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_CREATION_FAIL, + "Saving the Snapshot Description Failed"); + ret = -1; + goto out; + } + } + + ret = glusterd_store_snap(snap); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_CREATION_FAIL, + "Could not store snap" + "object %s", + snap->snapname); + goto out; + } + + glusterd_list_add_order(&snap->snap_list, &priv->snapshots, + glusterd_compare_snap_time); + + gf_msg_trace(this->name, 0, "Snapshot %s added to the list", + snap->snapname); + + ret = 0; + +out: + if (ret) { + if (snap) + glusterd_snap_remove(rsp_dict, snap, _gf_true, _gf_true, _gf_false); + snap = NULL; + } + + return snap; +} + +/* Added missed_snap_entry to rsp_dict */ +int32_t +glusterd_add_missed_snaps_to_dict(dict_t *rsp_dict, + glusterd_volinfo_t *snap_vol, + glusterd_brickinfo_t *brickinfo, + int32_t brick_number, int32_t op) +{ + char *snap_uuid = NULL; + char missed_snap_entry[PATH_MAX] = ""; + char name_buf[PATH_MAX] = ""; + int32_t missed_snap_count = -1; + int32_t ret = -1; + xlator_t *this = NULL; + int32_t len = 0; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(rsp_dict); + GF_ASSERT(snap_vol); + GF_ASSERT(brickinfo); + + snap_uuid = gf_strdup(uuid_utoa(snap_vol->snapshot->snap_id)); + if (!snap_uuid) { + ret = -1; + goto out; + } + + len = snprintf(missed_snap_entry, sizeof(missed_snap_entry), + "%s:%s=%s:%d:%s:%d:%d", uuid_utoa(brickinfo->uuid), + snap_uuid, snap_vol->volname, brick_number, brickinfo->path, + op, GD_MISSED_SNAP_PENDING); + if ((len < 0) || (len >= sizeof(missed_snap_entry))) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_COPY_FAIL, NULL); + goto out; + } + + /* Fetch the missed_snap_count from the dict */ + ret = dict_get_int32n(rsp_dict, "missed_snap_count", + SLEN("missed_snap_count"), &missed_snap_count); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Key=missed_snap_count", NULL); + /* Initialize the missed_snap_count for the first time */ + missed_snap_count = 0; + } + + /* Setting the missed_snap_entry in the rsp_dict */ + snprintf(name_buf, sizeof(name_buf), "missed_snaps_%d", missed_snap_count); + ret = dict_set_dynstr_with_alloc(rsp_dict, name_buf, missed_snap_entry); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set missed_snap_entry (%s) " + "in the rsp_dict.", + missed_snap_entry); + goto out; + } + missed_snap_count++; + + /* Setting the new missed_snap_count in the dict */ + ret = dict_set_int32n(rsp_dict, "missed_snap_count", + SLEN("missed_snap_count"), missed_snap_count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set missed_snap_count for %s " + "in the rsp_dict.", + missed_snap_entry); + goto out; + } + +out: + if (snap_uuid) + GF_FREE(snap_uuid); + + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +/* This function actually calls the command (or the API) for taking the + snapshot of the backend brick filesystem. If this is successful, + then call the glusterd_snap_create function to create the snap object + for glusterd +*/ +int32_t +glusterd_take_lvm_snapshot(glusterd_brickinfo_t *brickinfo, + char *origin_brick_path) +{ + char msg[NAME_MAX] = ""; + char buf[PATH_MAX] = ""; + char *ptr = NULL; + char *origin_device = NULL; + int ret = -1; + gf_boolean_t match = _gf_false; + runner_t runner = { + 0, + }; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(brickinfo); + GF_ASSERT(origin_brick_path); + + origin_device = glusterd_get_brick_mount_device(origin_brick_path); + if (!origin_device) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_GET_INFO_FAIL, + "getting device name for " + "the brick %s failed", + origin_brick_path); + goto out; + } + + /* Figuring out if setactivationskip flag is supported or not */ + runinit(&runner); + snprintf(msg, sizeof(msg), "running lvcreate help"); + runner_add_args(&runner, LVM_CREATE, "--help", NULL); + runner_log(&runner, "", GF_LOG_DEBUG, msg); + runner_redir(&runner, STDOUT_FILENO, RUN_PIPE); + ret = runner_start(&runner); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_LVCREATE_FAIL, + "Failed to run lvcreate help"); + runner_end(&runner); + goto out; + } + + /* Looking for setactivationskip in lvcreate --help */ + do { + ptr = fgets(buf, sizeof(buf), runner_chio(&runner, STDOUT_FILENO)); + if (ptr) { + if (strstr(buf, "setactivationskip")) { + match = _gf_true; + break; + } + } + } while (ptr != NULL); + runner_end(&runner); + + /* Taking the actual snapshot */ + runinit(&runner); + snprintf(msg, sizeof(msg), "taking snapshot of the brick %s", + origin_brick_path); + if (match == _gf_true) + runner_add_args(&runner, LVM_CREATE, "-s", origin_device, + "--setactivationskip", "n", "--name", + brickinfo->device_path, NULL); + else + runner_add_args(&runner, LVM_CREATE, "-s", origin_device, "--name", + brickinfo->device_path, NULL); + runner_log(&runner, this->name, GF_LOG_DEBUG, msg); + ret = runner_run(&runner); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_CREATION_FAIL, + "taking snapshot of the " + "brick (%s) of device %s failed", + origin_brick_path, origin_device); + } + +out: + if (origin_device) + GF_FREE(origin_device); + + return ret; +} + +int32_t +glusterd_snap_brick_create(glusterd_volinfo_t *snap_volinfo, + glusterd_brickinfo_t *brickinfo, int32_t brick_count, + int32_t clone) +{ + int32_t ret = -1; + xlator_t *this = NULL; + char snap_brick_mount_path[PATH_MAX] = ""; + char clone_uuid[64] = ""; + struct stat statbuf = { + 0, + }; + int32_t len = 0; + + this = THIS; + + GF_ASSERT(snap_volinfo); + GF_ASSERT(brickinfo); + + if (clone) { + GLUSTERD_GET_UUID_NOHYPHEN(clone_uuid, snap_volinfo->volume_id); + len = snprintf(snap_brick_mount_path, sizeof(snap_brick_mount_path), + "%s/%s/brick%d", snap_mount_dir, clone_uuid, + brick_count + 1); + } else { + len = snprintf(snap_brick_mount_path, sizeof(snap_brick_mount_path), + "%s/%s/brick%d", snap_mount_dir, snap_volinfo->volname, + brick_count + 1); + } + if ((len < 0) || (len >= sizeof(snap_brick_mount_path))) { + goto out; + } + + ret = mkdir_p(snap_brick_mount_path, 0755, _gf_true); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED, + "creating the brick directory" + " %s for the snapshot %s(device: %s) failed", + snap_brick_mount_path, snap_volinfo->volname, + brickinfo->device_path); + goto out; + } + /* mount the snap logical device on the directory inside + /run/gluster/snaps/<snapname>/@snap_brick_mount_path + Way to mount the snap brick via mount api is this. + ret = mount (device, snap_brick_mount_path, entry->mnt_type, + MS_MGC_VAL, "nouuid"); + But for now, mounting using runner apis. + */ + ret = glusterd_mount_lvm_snapshot(brickinfo, snap_brick_mount_path); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_LVM_MOUNT_FAILED, + "Failed to mount lvm snapshot."); + goto out; + } + + ret = sys_stat(brickinfo->path, &statbuf); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED, + "stat of the brick %s" + "(brick mount: %s) failed (%s)", + brickinfo->path, snap_brick_mount_path, strerror(errno)); + goto out; + } + ret = sys_lsetxattr(brickinfo->path, GF_XATTR_VOL_ID_KEY, + snap_volinfo->volume_id, 16, XATTR_REPLACE); + if (ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_SET_XATTR_FAIL, + "Failed to set " + "extended attribute %s on %s. Reason: " + "%s, snap: %s", + GF_XATTR_VOL_ID_KEY, brickinfo->path, strerror(errno), + snap_volinfo->volname); + goto out; + } + +out: + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_UMOUNTING_SNAP_BRICK, + "unmounting the snap brick" + " mount %s", + snap_brick_mount_path); + /*umount2 system call doesn't cleanup mtab entry after un-mount. + So use external umount command*/ + glusterd_umount(snap_brick_mount_path); + } + + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +static int32_t +glusterd_add_brick_to_snap_volume(dict_t *dict, dict_t *rsp_dict, + glusterd_volinfo_t *snap_vol, + glusterd_brickinfo_t *original_brickinfo, + int64_t volcount, int32_t brick_count, + int clone) +{ + char key[64] = ""; + int keylen; + char *value = NULL; + char *snap_brick_dir = NULL; + char snap_brick_path[PATH_MAX] = ""; + char clone_uuid[64] = ""; + char *snap_device = NULL; + glusterd_brickinfo_t *snap_brickinfo = NULL; + gf_boolean_t add_missed_snap = _gf_false; + int32_t ret = -1; + xlator_t *this = NULL; + char abspath[PATH_MAX] = ""; + int32_t len = 0; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(dict); + GF_ASSERT(rsp_dict); + GF_ASSERT(snap_vol); + GF_ASSERT(original_brickinfo); + + snprintf(key, sizeof(key), "vol%" PRId64 ".origin_brickpath%d", volcount, + brick_count); + ret = dict_set_dynstr_with_alloc(dict, key, original_brickinfo->path); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set %s", key); + goto out; + } + + ret = glusterd_brickinfo_new(&snap_brickinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_NEW_INFO_FAIL, + "initializing the brick for the snap " + "volume failed (snapname: %s)", + snap_vol->snapshot->snapname); + goto out; + } + + keylen = snprintf(key, sizeof(key), "vol%" PRId64 ".fstype%d", volcount, + brick_count); + ret = dict_get_strn(dict, key, keylen, &value); + if (!ret) { + /* Update the fstype in original brickinfo as well */ + gf_strncpy(original_brickinfo->fstype, value, + sizeof(original_brickinfo->fstype)); + gf_strncpy(snap_brickinfo->fstype, value, + sizeof(snap_brickinfo->fstype)); + } else { + if (is_origin_glusterd(dict) == _gf_true) + add_missed_snap = _gf_true; + } + + keylen = snprintf(key, sizeof(key), "vol%" PRId64 ".mnt_opts%d", volcount, + brick_count); + ret = dict_get_strn(dict, key, keylen, &value); + if (!ret) { + /* Update the mnt_opts in original brickinfo as well */ + gf_strncpy(original_brickinfo->mnt_opts, value, + sizeof(original_brickinfo->mnt_opts)); + gf_strncpy(snap_brickinfo->mnt_opts, value, + sizeof(snap_brickinfo->mnt_opts)); + } else { + if (is_origin_glusterd(dict) == _gf_true) + add_missed_snap = _gf_true; + } + + keylen = snprintf(key, sizeof(key), "vol%" PRId64 ".brickdir%d", volcount, + brick_count); + ret = dict_get_strn(dict, key, keylen, &snap_brick_dir); + if (ret) { + /* Using original brickinfo here because it will be a + * pending snapshot and storing the original brickinfo + * will help in mapping while recreating the missed snapshot + */ + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_NOT_FOUND, + "Unable to fetch " + "snap mount path(%s). Adding to missed_snap_list", + key); + snap_brickinfo->snap_status = -1; + + snap_brick_dir = original_brickinfo->mount_dir; + + /* In origiator node add snaps missed + * from different nodes to the dict + */ + if (is_origin_glusterd(dict) == _gf_true) + add_missed_snap = _gf_true; + } + + if ((snap_brickinfo->snap_status != -1) && + (!gf_uuid_compare(original_brickinfo->uuid, MY_UUID)) && + (!glusterd_is_brick_started(original_brickinfo))) { + /* In case if the brick goes down after prevalidate. */ + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_BRICK_DISCONNECTED, + "brick %s:%s is not" + " started (snap: %s)", + original_brickinfo->hostname, original_brickinfo->path, + snap_vol->snapshot->snapname); + + snap_brickinfo->snap_status = -1; + add_missed_snap = _gf_true; + } + + if (add_missed_snap) { + ret = glusterd_add_missed_snaps_to_dict( + rsp_dict, snap_vol, original_brickinfo, brick_count + 1, + GF_SNAP_OPTION_TYPE_CREATE); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MISSEDSNAP_INFO_SET_FAIL, + "Failed to add missed" + " snapshot info for %s:%s in the rsp_dict", + original_brickinfo->hostname, original_brickinfo->path); + goto out; + } + } + + /* Create brick-path in the format /var/run/gluster/snaps/ * + * <snap-uuid>/<original-brick#>/snap-brick-dir * + */ + if (clone) { + GLUSTERD_GET_UUID_NOHYPHEN(clone_uuid, snap_vol->volume_id); + len = snprintf(snap_brick_path, sizeof(snap_brick_path), + "%s/%s/brick%d%s", snap_mount_dir, clone_uuid, + brick_count + 1, snap_brick_dir); + } else { + len = snprintf(snap_brick_path, sizeof(snap_brick_path), + "%s/%s/brick%d%s", snap_mount_dir, snap_vol->volname, + brick_count + 1, snap_brick_dir); + } + if ((len < 0) || (len >= sizeof(snap_brick_path))) { + ret = -1; + goto out; + } + + keylen = snprintf(key, sizeof(key), "vol%" PRId64 ".brick_snapdevice%d", + volcount, brick_count); + ret = dict_get_strn(dict, key, keylen, &snap_device); + if (ret) { + /* If the device name is empty, so will be the brick path + * Hence the missed snap has already been added above + */ + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_NOT_FOUND, + "Unable to fetch " + "snap device (%s). Leaving empty", + key); + } else + gf_strncpy(snap_brickinfo->device_path, snap_device, + sizeof(snap_brickinfo->device_path)); + + ret = gf_canonicalize_path(snap_brick_path); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CANONICALIZE_FAIL, + "Failed to canonicalize path"); + goto out; + } + + gf_strncpy(snap_brickinfo->hostname, original_brickinfo->hostname, + sizeof(snap_brickinfo->hostname)); + gf_strncpy(snap_brickinfo->path, snap_brick_path, + sizeof(snap_brickinfo->path)); + + if (!realpath(snap_brick_path, abspath)) { + /* ENOENT indicates that brick path has not been created which + * is a valid scenario */ + if (errno != ENOENT) { + gf_msg(this->name, GF_LOG_CRITICAL, errno, + GD_MSG_BRICKINFO_CREATE_FAIL, + "realpath () " + "failed for brick %s. The underlying filesystem" + " may be in bad state", + snap_brick_path); + ret = -1; + goto out; + } + } + gf_strncpy(snap_brickinfo->real_path, abspath, + sizeof(snap_brickinfo->real_path)); + + gf_strncpy(snap_brickinfo->mount_dir, original_brickinfo->mount_dir, + sizeof(snap_brickinfo->mount_dir)); + gf_uuid_copy(snap_brickinfo->uuid, original_brickinfo->uuid); + /* AFR changelog names are based on brick_id and hence the snap + * volume's bricks must retain the same ID */ + cds_list_add_tail(&snap_brickinfo->brick_list, &snap_vol->bricks); + + if (clone) { + GLUSTERD_ASSIGN_BRICKID_TO_BRICKINFO(snap_brickinfo, snap_vol, + brick_count); + } else + gf_strncpy(snap_brickinfo->brick_id, original_brickinfo->brick_id, + sizeof(snap_brickinfo->brick_id)); + +out: + if (ret && snap_brickinfo) + GF_FREE(snap_brickinfo); + + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +/* This function will update the file-system label of the + * backend snapshot brick. + * + * @param brickinfo brickinfo of the snap volume + * + * @return 0 on success and -1 on failure + */ +int +glusterd_update_fs_label(glusterd_brickinfo_t *brickinfo) +{ + int32_t ret = -1; + char msg[PATH_MAX] = ""; + char label[NAME_MAX] = ""; + uuid_t uuid = { + 0, + }; + runner_t runner = { + 0, + }; + xlator_t *this = NULL; + int32_t len = 0; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(brickinfo); + + /* Generate a new UUID */ + gf_uuid_generate(uuid); + + GLUSTERD_GET_UUID_NOHYPHEN(label, uuid); + + runinit(&runner); + + /* Call the file-system specific tools to update the file-system + * label. Currently we are only supporting xfs and ext2/ext3/ext4 + * file-system. + */ + if (0 == strcmp(brickinfo->fstype, "xfs")) { + /* XFS label is of size 12. Therefore we should truncate the + * label to 12 bytes*/ + label[12] = '\0'; + len = snprintf(msg, sizeof(msg), + "Changing filesystem label " + "of %s brick to %s", + brickinfo->path, label); + if (len < 0) { + strcpy(msg, "<error>"); + } + /* Run the run xfs_admin tool to change the label + * of the file-system */ + runner_add_args(&runner, "xfs_admin", "-L", label, + brickinfo->device_path, NULL); + } else if (0 == strcmp(brickinfo->fstype, "ext4") || + 0 == strcmp(brickinfo->fstype, "ext3") || + 0 == strcmp(brickinfo->fstype, "ext2")) { + /* Ext2/Ext3/Ext4 label is of size 16. Therefore we should + * truncate the label to 16 bytes*/ + label[16] = '\0'; + len = snprintf(msg, sizeof(msg), + "Changing filesystem label " + "of %s brick to %s", + brickinfo->path, label); + if (len < 0) { + strcpy(msg, "<error>"); + } + /* For ext2/ext3/ext4 run tune2fs to change the + * file-system label */ + runner_add_args(&runner, "tune2fs", "-L", label, brickinfo->device_path, + NULL); + } else { + gf_msg(this->name, GF_LOG_WARNING, EOPNOTSUPP, GD_MSG_OP_UNSUPPORTED, + "Changing file-system " + "label of %s file-system is not supported as of now", + brickinfo->fstype); + runner_end(&runner); + ret = -1; + goto out; + } + + runner_log(&runner, this->name, GF_LOG_DEBUG, msg); + ret = runner_run(&runner); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FS_LABEL_UPDATE_FAIL, + "Failed to change " + "filesystem label of %s brick to %s", + brickinfo->path, label); + goto out; + } + + ret = 0; +out: + return ret; +} + +static int32_t +glusterd_take_brick_snapshot(dict_t *dict, glusterd_volinfo_t *snap_vol, + glusterd_brickinfo_t *brickinfo, int32_t volcount, + int32_t brick_count, int32_t clone) +{ + char *origin_brick_path = NULL; + char key[64] = ""; + int keylen; + int32_t ret = -1; + gf_boolean_t snap_activate = _gf_false; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + priv = this->private; + GF_ASSERT(this); + GF_ASSERT(dict); + GF_ASSERT(snap_vol); + GF_ASSERT(brickinfo); + GF_ASSERT(priv); + + if (strlen(brickinfo->device_path) == 0) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, + "Device path is empty " + "brick %s:%s", + brickinfo->hostname, brickinfo->path); + ret = -1; + goto out; + } + + keylen = snprintf(key, sizeof(key), "vol%d.origin_brickpath%d", volcount, + brick_count); + ret = dict_get_strn(dict, key, keylen, &origin_brick_path); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED, + "Unable to fetch " + "brick path (%s)", + key); + goto out; + } + + ret = glusterd_take_lvm_snapshot(brickinfo, origin_brick_path); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_CREATION_FAIL, + "Failed to take snapshot of " + "brick %s:%s", + brickinfo->hostname, origin_brick_path); + goto out; + } + + /* After the snapshot both the origin brick (LVM brick) and + * the snapshot brick will have the same file-system label. This + * will cause lot of problems at mount time. Therefore we must + * generate a new label for the snapshot brick + */ + ret = glusterd_update_fs_label(brickinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FS_LABEL_UPDATE_FAIL, + "Failed to update " + "file-system label for %s brick", + brickinfo->path); + /* Failing to update label should not cause snapshot failure. + * Currently label is updated only for XFS and ext2/ext3/ext4 + * file-system. + */ + } + + /* create the complete brick here in case of clone and + * activate-on-create configuration. + */ + snap_activate = dict_get_str_boolean( + priv->opts, GLUSTERD_STORE_KEY_SNAP_ACTIVATE, _gf_false); + if (clone || snap_activate) { + ret = glusterd_snap_brick_create(snap_vol, brickinfo, brick_count, + clone); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_CREATION_FAIL, + "not able to " + "create the brick for the snap %s, volume %s", + snap_vol->snapshot->snapname, snap_vol->volname); + goto out; + } + } + +out: + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +static int +glusterd_snap_clear_unsupported_opt( + glusterd_volinfo_t *volinfo, + struct gd_snap_unsupported_opt_t *unsupported_opt) +{ + int ret = -1; + int i = 0; + + GF_VALIDATE_OR_GOTO("glusterd", volinfo, out); + + for (i = 0; unsupported_opt[i].key; i++) { + glusterd_volinfo_get(volinfo, unsupported_opt[i].key, + &unsupported_opt[i].value); + + if (unsupported_opt[i].value) { + unsupported_opt[i].value = gf_strdup(unsupported_opt[i].value); + if (!unsupported_opt[i].value) { + ret = -1; + goto out; + } + dict_del(volinfo->dict, unsupported_opt[i].key); + } + } + + ret = 0; +out: + return ret; +} + +static int +glusterd_snap_set_unsupported_opt( + glusterd_volinfo_t *volinfo, + struct gd_snap_unsupported_opt_t *unsupported_opt) +{ + int ret = -1; + int i = 0; + + GF_VALIDATE_OR_GOTO("glusterd", volinfo, out); + + for (i = 0; unsupported_opt[i].key; i++) { + if (!unsupported_opt[i].value) + continue; + + ret = dict_set_dynstr(volinfo->dict, unsupported_opt[i].key, + unsupported_opt[i].value); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "dict set failed"); + goto out; + } + unsupported_opt[i].value = NULL; + } + + ret = 0; +out: + return ret; +} + +glusterd_volinfo_t * +glusterd_do_snap_vol(glusterd_volinfo_t *origin_vol, glusterd_snap_t *snap, + dict_t *dict, dict_t *rsp_dict, int64_t volcount, + int clone) +{ + char key[64] = ""; + int keylen; + char *username = NULL; + char *password = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *snap_vol = NULL; + uuid_t *snap_volid = NULL; + int32_t ret = -1; + int32_t brick_count = 0; + xlator_t *this = NULL; + char *clonename = NULL; + gf_boolean_t conf_present = _gf_false; + int i = 0; + + struct gd_snap_unsupported_opt_t unsupported_opt[] = { + {.key = VKEY_FEATURES_QUOTA, .value = NULL}, + {.key = VKEY_FEATURES_INODE_QUOTA, .value = NULL}, + {.key = "feature.deem-statfs", .value = NULL}, + {.key = "features.quota-deem-statfs", .value = NULL}, + {.key = NULL, .value = NULL}}; + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + GF_ASSERT(priv); + GF_ASSERT(dict); + GF_ASSERT(origin_vol); + GF_ASSERT(rsp_dict); + + /* fetch username, password and vol_id from dict*/ + keylen = snprintf(key, sizeof(key), "volume%" PRId64 "_username", volcount); + ret = dict_get_strn(dict, key, keylen, &username); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get %s for " + "snap %s", + key, snap->snapname); + goto out; + } + keylen = snprintf(key, sizeof(key), "volume%" PRId64 "_password", volcount); + ret = dict_get_strn(dict, key, keylen, &password); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get %s for " + "snap %s", + key, snap->snapname); + goto out; + } + + snprintf(key, sizeof(key), "vol%" PRId64 "_volid", volcount); + ret = dict_get_bin(dict, key, (void **)&snap_volid); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to fetch snap_volid"); + goto out; + } + + /* We are not setting the username and password here as + * we need to set the user name and password passed in + * the dictionary + */ + ret = glusterd_volinfo_dup(origin_vol, &snap_vol, _gf_false); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_OP_FAILED, + "Failed to duplicate volinfo " + "for the snapshot %s", + snap->snapname); + goto out; + } + + /* uuid is used as lvm snapshot name. + This will avoid restrictions on snapshot names provided by user */ + gf_uuid_copy(snap_vol->volume_id, *snap_volid); + snap_vol->is_snap_volume = _gf_true; + snap_vol->snapshot = snap; + + if (clone) { + snap_vol->is_snap_volume = _gf_false; + ret = dict_get_strn(dict, "clonename", SLEN("clonename"), &clonename); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get %s " + "for snap %s", + key, snap->snapname); + goto out; + } + cds_list_add_tail(&snap_vol->vol_list, &snap->volumes); + gf_strncpy(snap_vol->volname, clonename, sizeof(snap_vol->volname)); + gf_uuid_copy(snap_vol->restored_from_snap, + origin_vol->snapshot->snap_id); + + } else { + GLUSTERD_GET_UUID_NOHYPHEN(snap_vol->volname, *snap_volid); + gf_strncpy(snap_vol->parent_volname, origin_vol->volname, + sizeof(snap_vol->parent_volname)); + ret = glusterd_list_add_snapvol(origin_vol, snap_vol); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_LIST_SET_FAIL, + "could not add the " + "snap volume %s to the list", + snap_vol->volname); + goto out; + } + /* TODO : Sync before taking a snapshot */ + /* Copy the status and config files of geo-replication before + * taking a snapshot. During restore operation these files needs + * to be copied back in /var/lib/glusterd/georeplication/ + */ + ret = glusterd_copy_geo_rep_files(origin_vol, snap_vol, rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_OP_FAILED, + "Failed to copy " + "geo-rep config and status files for volume %s", + origin_vol->volname); + goto out; + } + } + + glusterd_auth_set_username(snap_vol, username); + glusterd_auth_set_password(snap_vol, password); + + /* Adding snap brickinfos to the snap volinfo */ + brick_count = 0; + cds_list_for_each_entry(brickinfo, &origin_vol->bricks, brick_list) + { + ret = glusterd_add_brick_to_snap_volume( + dict, rsp_dict, snap_vol, brickinfo, volcount, brick_count, clone); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, + "Failed to add the snap brick for " + "%s:%s to the snap volume", + brickinfo->hostname, brickinfo->path); + goto out; + } + brick_count++; + } + + /* During snapshot creation if I/O is in progress, + * then barrier value is enabled. Hence during snapshot create + * and in-turn snapshot restore the barrier value is set to enable. + * Because of this further I/O on the mount point fails. + * Hence remove the barrier key from newly created snap volinfo + * before storing and generating the brick volfiles. Also update + * the snap vol's version after removing the barrier key. + */ + dict_deln(snap_vol->dict, "features.barrier", SLEN("features.barrier")); + gd_update_volume_op_versions(snap_vol); + + /* * + * Create the export file from the node where ganesha.enable "on" + * is executed + * */ + if (glusterd_is_ganesha_cluster() && + glusterd_check_ganesha_export(snap_vol)) { + if (is_origin_glusterd(dict)) { + ret = manage_export_config(clonename, "on", NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_EXPORT_FILE_CREATE_FAIL, + "Failed to create" + "export file for NFS-Ganesha\n"); + goto out; + } + } + + ret = dict_set_dynstr_with_alloc(snap_vol->dict, + "features.cache-invalidation", "on"); + ret = gd_ganesha_send_dbus(clonename, "on"); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_EXPORT_FILE_CREATE_FAIL, + "Dynamic export addition/deletion failed." + " Please see log file for details. Clone name = %s", + clonename); + goto out; + } + } + if (!glusterd_is_ganesha_cluster() && + glusterd_check_ganesha_export(snap_vol)) { + /* This happens when a snapshot was created when Ganesha was + * enabled globally. Then Ganesha disabled from the cluster. + * In such cases, we will have the volume level option set + * on dict, So we have to disable it as it doesn't make sense + * to keep the option. + */ + + ret = dict_set_dynstr(snap_vol->dict, "ganesha.enable", "off"); + if (ret) + goto out; + } + + ret = glusterd_store_volinfo(snap_vol, GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_SET_FAIL, + "Failed to store snapshot " + "volinfo (%s) for snap %s", + snap_vol->volname, snap->snapname); + goto out; + } + + ret = glusterd_copy_quota_files(origin_vol, snap_vol, &conf_present); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_VOL_CONFIG_FAIL, + "Failed to copy quota " + "config and cksum for volume %s", + origin_vol->volname); + goto out; + } + + if (snap_vol->is_snap_volume) { + ret = glusterd_snap_clear_unsupported_opt(snap_vol, unsupported_opt); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_OP_FAILED, + "Failed to clear quota " + "option for the snap %s (volume: %s)", + snap->snapname, origin_vol->volname); + goto out; + } + } + + ret = generate_brick_volfiles(snap_vol); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL, + "generating the brick " + "volfiles for the snap %s (volume: %s) failed", + snap->snapname, origin_vol->volname); + goto reset_option; + } + + ret = generate_client_volfiles(snap_vol, GF_CLIENT_TRUSTED); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL, + "generating the trusted " + "client volfiles for the snap %s (volume: %s) failed", + snap->snapname, origin_vol->volname); + goto reset_option; + } + + ret = generate_client_volfiles(snap_vol, GF_CLIENT_OTHER); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL, + "generating the client " + "volfiles for the snap %s (volume: %s) failed", + snap->snapname, origin_vol->volname); + goto reset_option; + } + +reset_option: + if (snap_vol->is_snap_volume) { + if (glusterd_snap_set_unsupported_opt(snap_vol, unsupported_opt)) { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_OP_FAILED, + "Failed to reset quota " + "option for the snap %s (volume: %s)", + snap->snapname, origin_vol->volname); + } + } +out: + if (ret) { + for (i = 0; unsupported_opt[i].key; i++) + GF_FREE(unsupported_opt[i].value); + + if (snap_vol) { + if (glusterd_is_ganesha_cluster() && + glusterd_check_ganesha_export(snap_vol)) { + if (is_origin_glusterd(dict)) { + ret = manage_export_config(clonename, "on", NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_EXPORT_FILE_CREATE_FAIL, + "Failed to create" + "export file for NFS-Ganesha\n"); + } + } + + ret = gd_ganesha_send_dbus(clonename, "off"); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_EXPORT_FILE_CREATE_FAIL, + "Dynamic export addition/deletion failed." + " Please see log file for details. Clone name = %s", + clonename); + } + } + + glusterd_snap_volume_remove(rsp_dict, snap_vol, _gf_true, _gf_true); + } + snap_vol = NULL; + } + + return snap_vol; +} + +/*This is the prevalidate function for both activate and deactive of snap + * For Activate operation pass is_op_activate as _gf_true + * For Deactivate operation pass is_op_activate as _gf_false + * */ +int +glusterd_snapshot_activate_deactivate_prevalidate(dict_t *dict, + char **op_errstr, + uint32_t *op_errno, + dict_t *rsp_dict, + gf_boolean_t is_op_activate) +{ + int32_t ret = -1; + char *snapname = NULL; + xlator_t *this = NULL; + glusterd_snap_t *snap = NULL; + glusterd_volinfo_t *snap_volinfo = NULL; + char err_str[PATH_MAX] = ""; + gf_loglevel_t loglevel = GF_LOG_ERROR; + glusterd_volume_status volume_status = GLUSTERD_STATUS_STOPPED; + int flags = 0; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + GF_VALIDATE_OR_GOTO(this->name, op_errno, out); + + if (!dict || !op_errstr) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY, + "input parameters NULL"); + goto out; + } + + ret = dict_get_strn(dict, "snapname", SLEN("snapname"), &snapname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Getting the snap name " + "failed"); + goto out; + } + + snap = glusterd_find_snap_by_name(snapname); + if (!snap) { + snprintf(err_str, sizeof(err_str), + "Snapshot (%s) does not " + "exist.", + snapname); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_NOT_FOUND, + "Snapname=%s", snapname, NULL); + *op_errno = EG_NOSNAP; + ret = -1; + goto out; + } + + /*If its activation of snap then fetch the flags*/ + if (is_op_activate) { + ret = dict_get_int32n(dict, "flags", SLEN("flags"), &flags); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get flags"); + goto out; + } + } + + /* TODO : As of now there is only volume in snapshot. + * Change this when multiple volume snapshot is introduced + */ + snap_volinfo = cds_list_entry(snap->volumes.next, glusterd_volinfo_t, + vol_list); + if (!snap_volinfo) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOLINFO_GET_FAIL, + "Unable to fetch snap_volinfo"); + ret = -1; + goto out; + } + + /*TODO: When multiple snapvolume are involved a cumulative + * logic is required to tell whether is snapshot is + * started/partially started/stopped*/ + if (is_op_activate) { + volume_status = GLUSTERD_STATUS_STARTED; + } + + if (snap_volinfo->status == volume_status) { + if (is_op_activate) { + /* if flag is to GF_CLI_FLAG_OP_FORCE + * try to start the snap volume, even + * if the volume_status is GLUSTERD_STATUS_STARTED. + * By doing so we try to bring + * back the brick processes that are down*/ + if (!(flags & GF_CLI_FLAG_OP_FORCE)) { + snprintf(err_str, sizeof(err_str), + "Snapshot %s is already activated.", snapname); + *op_errno = EINVAL; + ret = -1; + } + } else { + snprintf(err_str, sizeof(err_str), + "Snapshot %s is already deactivated.", snapname); + *op_errno = EINVAL; + ret = -1; + } + goto out; + } + ret = 0; +out: + + if (ret && err_str[0] != '\0' && op_errstr) { + gf_msg(this->name, loglevel, 0, GD_MSG_SNAPSHOT_OP_FAILED, "%s", + err_str); + *op_errstr = gf_strdup(err_str); + } + + return ret; +} + +int32_t +glusterd_handle_snapshot_delete_vol(dict_t *dict, char *err_str, + uint32_t *op_errno, int len) +{ + int32_t ret = -1; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + char *volname = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(dict); + GF_VALIDATE_OR_GOTO(this->name, op_errno, out); + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get " + "volume name"); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + snprintf(err_str, len, "Volume (%s) does not exist", volname); + *op_errno = EG_NOVOL; + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND, + "Failed to get volinfo of " + "volume %s", + volname); + goto out; + } + + ret = glusterd_snapshot_get_vol_snapnames(dict, volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_LIST_GET_FAIL, + "Failed to get snapshot list for volume %s", volname); + goto out; + } + + ret = 0; +out: + return ret; +} + +static int32_t +glusterd_handle_snapshot_delete_all(dict_t *dict) +{ + int32_t ret = -1; + int32_t i = 0; + char key[32] = ""; + glusterd_conf_t *priv = NULL; + glusterd_snap_t *snap = NULL; + glusterd_snap_t *tmp_snap = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + GF_ASSERT(dict); + + cds_list_for_each_entry_safe(snap, tmp_snap, &priv->snapshots, snap_list) + { + /* indexing from 1 to n, to keep it uniform with other code + * paths + */ + i++; + ret = snprintf(key, sizeof(key), "snapname%d", i); + if (ret < 0) { + goto out; + } + + ret = dict_set_dynstr_with_alloc(dict, key, snap->snapname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Could not save " + "snap name"); + goto out; + } + } + + ret = dict_set_int32n(dict, "snapcount", SLEN("snapcount"), i); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Could not save snapcount"); + goto out; + } + + ret = 0; + +out: + return ret; +} + +int32_t +glusterd_handle_snapshot_delete_type_snap(rpcsvc_request_t *req, + glusterd_op_t op, dict_t *dict, + char *err_str, uint32_t *op_errno, + size_t len) +{ + int32_t ret = -1; + int64_t volcount = 0; + char *snapname = NULL; + char *volname = NULL; + char key[64] = ""; + int keylen; + glusterd_snap_t *snap = NULL; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_volinfo_t *tmp = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + GF_ASSERT(req); + GF_ASSERT(dict); + GF_ASSERT(err_str); + + ret = dict_get_strn(dict, "snapname", SLEN("snapname"), &snapname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get snapname"); + goto out; + } + + snap = glusterd_find_snap_by_name(snapname); + if (!snap) { + snprintf(err_str, len, "Snapshot (%s) does not exist", snapname); + *op_errno = EG_NOSNAP; + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_SNAP_NOT_FOUND, "%s", + err_str); + ret = -1; + goto out; + } + + /* Set volnames in the dict to get mgmt_v3 lock */ + cds_list_for_each_entry_safe(snap_vol, tmp, &snap->volumes, vol_list) + { + volcount++; + volname = gf_strdup(snap_vol->parent_volname); + if (!volname) { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "strdup failed"); + goto out; + } + + keylen = snprintf(key, sizeof(key), "volname%" PRId64, volcount); + ret = dict_set_dynstrn(dict, key, keylen, volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set " + "volume name in dictionary"); + GF_FREE(volname); + goto out; + } + volname = NULL; + } + ret = dict_set_int64(dict, "volcount", volcount); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set volcount"); + goto out; + } + + ret = glusterd_mgmt_v3_initiate_snap_phases(req, op, dict); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_INIT_FAIL, + "Failed to initiate snap " + "phases"); + goto out; + } + + ret = 0; + +out: + return ret; +} + +/* This is a snapshot remove handler function. This function will be + * executed in the originator node. This function is responsible for + * calling mgmt v3 framework to do the actual remove on all the bricks + * + * @param req RPC request object + * @param op gluster operation + * @param dict dictionary containing snapshot remove request + * @param err_str In case of an err this string should be populated + * @param len length of err_str buffer + * + * @return Negative value on Failure and 0 in success + */ +int +glusterd_handle_snapshot_delete(rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict, char *err_str, uint32_t *op_errno, + size_t len) +{ + int ret = -1; + xlator_t *this = NULL; + int32_t delete_cmd = -1; + + this = THIS; + + GF_ASSERT(this); + + GF_ASSERT(req); + GF_ASSERT(dict); + GF_ASSERT(err_str); + GF_VALIDATE_OR_GOTO(this->name, op_errno, out); + + ret = dict_get_int32n(dict, "sub-cmd", SLEN("sub-cmd"), &delete_cmd); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMAND_NOT_FOUND, + "Failed to get sub-cmd"); + goto out; + } + + switch (delete_cmd) { + case GF_SNAP_DELETE_TYPE_SNAP: + case GF_SNAP_DELETE_TYPE_ITER: + ret = glusterd_handle_snapshot_delete_type_snap( + req, op, dict, err_str, op_errno, len); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_REMOVE_FAIL, + "Failed to handle " + "snapshot delete for type SNAP"); + goto out; + } + break; + + case GF_SNAP_DELETE_TYPE_ALL: + ret = glusterd_handle_snapshot_delete_all(dict); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_REMOVE_FAIL, + "Failed to handle " + "snapshot delete for type ALL"); + goto out; + } + break; + + case GF_SNAP_DELETE_TYPE_VOL: + ret = glusterd_handle_snapshot_delete_vol(dict, err_str, op_errno, + len); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_REMOVE_FAIL, + "Failed to handle " + "snapshot delete for type VOL"); + goto out; + } + break; + + default: + *op_errno = EINVAL; + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, + "Wrong snapshot delete type"); + break; + } + + if (ret == 0 && (delete_cmd == GF_SNAP_DELETE_TYPE_ALL || + delete_cmd == GF_SNAP_DELETE_TYPE_VOL)) { + ret = glusterd_op_send_cli_response(op, 0, 0, req, dict, err_str); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_CLI_RESP, + "Failed to send cli " + "response"); + goto out; + } + } + ret = 0; +out: + return ret; +} + +int +glusterd_snapshot_remove_prevalidate(dict_t *dict, char **op_errstr, + uint32_t *op_errno, dict_t *rsp_dict) +{ + int32_t ret = -1; + char *snapname = NULL; + xlator_t *this = NULL; + glusterd_snap_t *snap = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + GF_VALIDATE_OR_GOTO(this->name, op_errno, out); + + if (!dict || !op_errstr) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY, + "input parameters NULL"); + goto out; + } + + ret = dict_get_strn(dict, "snapname", SLEN("snapname"), &snapname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Getting the snap name " + "failed"); + goto out; + } + + snap = glusterd_find_snap_by_name(snapname); + if (!snap) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_SNAP_NOT_FOUND, + "Snapshot (%s) does not exist", snapname); + *op_errno = EG_NOSNAP; + ret = -1; + goto out; + } + + ret = dict_set_dynstr_with_alloc(dict, "snapuuid", + uuid_utoa(snap->snap_id)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set snap " + "uuid in response dictionary for %s snapshot", + snap->snapname); + goto out; + } + + ret = 0; +out: + return ret; +} + +int +glusterd_snapshot_status_prevalidate(dict_t *dict, char **op_errstr, + uint32_t *op_errno, dict_t *rsp_dict) +{ + int ret = -1; + char *snapname = NULL; + glusterd_conf_t *conf = NULL; + xlator_t *this = NULL; + int32_t cmd = -1; + glusterd_volinfo_t *volinfo = NULL; + char *volname = NULL; + + this = THIS; + GF_ASSERT(this); + conf = this->private; + GF_ASSERT(conf); + GF_ASSERT(op_errstr); + GF_VALIDATE_OR_GOTO(this->name, op_errno, out); + + if (!dict) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY, + "Input dict is NULL"); + goto out; + } + + ret = dict_get_int32n(dict, "sub-cmd", SLEN("sub-cmd"), &cmd); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Could not fetch status cmd"); + goto out; + } + + switch (cmd) { + case GF_SNAP_STATUS_TYPE_ALL: { + break; + } + case GF_SNAP_STATUS_TYPE_ITER: + case GF_SNAP_STATUS_TYPE_SNAP: { + ret = dict_get_strn(dict, "snapname", SLEN("snapname"), &snapname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Could not fetch snapname"); + goto out; + } + + if (!glusterd_find_snap_by_name(snapname)) { + ret = gf_asprintf(op_errstr, + "Snapshot (%s) " + "does not exist", + snapname); + *op_errno = EG_NOSNAP; + if (ret < 0) { + goto out; + } + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_SNAP_NOT_FOUND, + "Snapshot (%s) does not exist", snapname); + goto out; + } + break; + } + case GF_SNAP_STATUS_TYPE_VOL: { + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Could not fetch volname"); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + ret = gf_asprintf(op_errstr, + "Volume (%s) " + "does not exist", + volname); + *op_errno = EG_NOVOL; + if (ret < 0) { + goto out; + } + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND, + "Volume " + "%s not present", + volname); + goto out; + } + break; + } + default: { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_COMMAND_NOT_FOUND, + "Invalid command"); + *op_errno = EINVAL; + break; + } + } + ret = 0; + +out: + return ret; +} + +int32_t +glusterd_snapshot_activate_commit(dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + int32_t ret = -1; + char *snapname = NULL; + glusterd_snap_t *snap = NULL; + glusterd_volinfo_t *snap_volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + xlator_t *this = NULL; + int flags = 0; + int brick_count = -1; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(dict); + GF_ASSERT(rsp_dict); + GF_ASSERT(op_errstr); + + if (!dict || !op_errstr) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY, + "input parameters NULL"); + goto out; + } + + ret = dict_get_strn(dict, "snapname", SLEN("snapname"), &snapname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Getting the snap name " + "failed"); + goto out; + } + + ret = dict_get_int32n(dict, "flags", SLEN("flags"), &flags); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get flags"); + goto out; + } + + snap = glusterd_find_snap_by_name(snapname); + if (!snap) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_SNAP_NOT_FOUND, + "Snapshot (%s) does not exist", snapname); + ret = -1; + goto out; + } + + /* TODO : As of now there is only volume in snapshot. + * Change this when multiple volume snapshot is introduced + */ + snap_volinfo = cds_list_entry(snap->volumes.next, glusterd_volinfo_t, + vol_list); + if (!snap_volinfo) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, + "Unable to fetch snap_volinfo"); + ret = -1; + goto out; + } + + /* create the complete brick here */ + cds_list_for_each_entry(brickinfo, &snap_volinfo->bricks, brick_list) + { + brick_count++; + if (gf_uuid_compare(brickinfo->uuid, MY_UUID)) + continue; + ret = glusterd_snap_brick_create(snap_volinfo, brickinfo, brick_count, + _gf_false); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_CREATION_FAIL, + "not able to " + "create the brick for the snap %s, volume %s", + snap_volinfo->snapshot->snapname, snap_volinfo->volname); + goto out; + } + } + + ret = glusterd_start_volume(snap_volinfo, flags, _gf_true); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_ACTIVATE_FAIL, + "Failed to activate snap volume %s of the snap %s", + snap_volinfo->volname, snap->snapname); + goto out; + } + + ret = dict_set_dynstr_with_alloc(rsp_dict, "snapuuid", + uuid_utoa(snap->snap_id)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set snap " + "uuid in response dictionary for %s snapshot", + snap->snapname); + goto out; + } + + ret = 0; +out: + return ret; +} + +int32_t +glusterd_snapshot_deactivate_commit(dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + int32_t ret = -1; + char *snapname = NULL; + glusterd_snap_t *snap = NULL; + glusterd_volinfo_t *snap_volinfo = NULL; + xlator_t *this = NULL; + char snap_path[PATH_MAX] = ""; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(dict); + GF_ASSERT(rsp_dict); + GF_ASSERT(op_errstr); + + if (!dict || !op_errstr) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY, + "input parameters NULL"); + goto out; + } + + ret = dict_get_strn(dict, "snapname", SLEN("snapname"), &snapname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Getting the snap name " + "failed"); + goto out; + } + + snap = glusterd_find_snap_by_name(snapname); + if (!snap) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_SNAP_NOT_FOUND, + "Snapshot (%s) does not exist", snapname); + ret = -1; + goto out; + } + + /* TODO : As of now there is only volume in snapshot. + * Change this when multiple volume snapshot is introduced + */ + snap_volinfo = cds_list_entry(snap->volumes.next, glusterd_volinfo_t, + vol_list); + if (!snap_volinfo) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, + "Unable to fetch snap_volinfo"); + ret = -1; + goto out; + } + + ret = glusterd_stop_volume(snap_volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_DEACTIVATE_FAIL, + "Failed to deactivate" + "snap %s", + snapname); + goto out; + } + + ret = glusterd_snap_unmount(this, snap_volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLUSTERD_UMOUNT_FAIL, + "Failed to unmounts for %s", snap->snapname); + } + + /*Remove /var/run/gluster/snaps/<snap-name> entry for deactivated snaps. + * This entry will be created again during snap activate. + */ + snprintf(snap_path, sizeof(snap_path), "%s/%s", snap_mount_dir, snapname); + ret = recursive_rmdir(snap_path); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED, + "Failed to remove " + "%s directory : error : %s", + snap_path, strerror(errno)); + goto out; + } + + ret = dict_set_dynstr_with_alloc(rsp_dict, "snapuuid", + uuid_utoa(snap->snap_id)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set snap " + "uuid in response dictionary for %s snapshot", + snap->snapname); + goto out; + } + + ret = 0; +out: + return ret; +} + +int32_t +glusterd_snapshot_remove_commit(dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + int32_t ret = -1; + char *snapname = NULL; + char *dup_snapname = NULL; + glusterd_snap_t *snap = NULL; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *snap_volinfo = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(dict); + GF_ASSERT(rsp_dict); + GF_ASSERT(op_errstr); + + priv = this->private; + GF_ASSERT(priv); + + if (!dict || !op_errstr) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY, + "input parameters NULL"); + goto out; + } + + ret = dict_get_strn(dict, "snapname", SLEN("snapname"), &snapname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Getting the snap name " + "failed"); + goto out; + } + + snap = glusterd_find_snap_by_name(snapname); + if (!snap) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_SNAP_NOT_FOUND, + "Snapshot (%s) does not exist", snapname); + ret = -1; + goto out; + } + + ret = dict_set_dynstr_with_alloc(rsp_dict, "snapuuid", + uuid_utoa(snap->snap_id)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set snap uuid in " + "response dictionary for %s snapshot", + snap->snapname); + goto out; + } + + /* Save the snap status as GD_SNAP_STATUS_DECOMMISSION so + * that if the node goes down the snap would be removed + */ + snap->snap_status = GD_SNAP_STATUS_DECOMMISSION; + ret = glusterd_store_snap(snap); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_OBJECT_STORE_FAIL, + "Failed to " + "store snap object %s", + snap->snapname); + goto out; + } else + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_OP_SUCCESS, + "Successfully marked " + "snap %s for decommission.", + snap->snapname); + + if (is_origin_glusterd(dict) == _gf_true) { + /* TODO : As of now there is only volume in snapshot. + * Change this when multiple volume snapshot is introduced + */ + snap_volinfo = cds_list_entry(snap->volumes.next, glusterd_volinfo_t, + vol_list); + if (!snap_volinfo) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, + "Unable to fetch snap_volinfo"); + ret = -1; + goto out; + } + + /* From origin glusterd check if * + * any peers with snap bricks is down */ + ret = glusterd_find_missed_snap(rsp_dict, snap_volinfo, &priv->peers, + GF_SNAP_OPTION_TYPE_DELETE); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MISSED_SNAP_GET_FAIL, + "Failed to find missed snap deletes"); + goto out; + } + } + + ret = glusterd_snap_remove(rsp_dict, snap, _gf_true, _gf_false, _gf_false); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_REMOVE_FAIL, + "Failed to remove snap %s", snapname); + goto out; + } + + dup_snapname = gf_strdup(snapname); + if (!dup_snapname) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "Strdup failed"); + ret = -1; + goto out; + } + + ret = dict_set_dynstr(rsp_dict, "snapname", dup_snapname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set the snapname"); + GF_FREE(dup_snapname); + goto out; + } + + ret = 0; +out: + return ret; +} + +int32_t +glusterd_do_snap_cleanup(dict_t *dict, char **op_errstr, dict_t *rsp_dict) +{ + int32_t ret = -1; + char *name = NULL; + char *volname = NULL; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + glusterd_snap_t *snap = NULL; + + this = THIS; + GF_ASSERT(this); + conf = this->private; + GF_ASSERT(conf); + + if (!dict || !op_errstr) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY, + "input parameters NULL"); + goto out; + } + + /* As of now snapshot of multiple volumes are not supported */ + ret = dict_get_strn(dict, "volname1", SLEN("volname1"), &volname); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get" + " volume name"); + goto out; + } + + ret = dict_get_strn(dict, "snapname", SLEN("snapname"), &name); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "getting the snap " + "name failed (volume: %s)", + volname); + goto out; + } + + /* + If the snapname is not found that means the failure happened at + staging, or in commit, before the snap object is created, in which + case there is nothing to cleanup. So set ret to 0. + */ + snap = glusterd_find_snap_by_name(name); + if (!snap) { + gf_msg(this->name, GF_LOG_INFO, EINVAL, GD_MSG_SNAP_NOT_FOUND, + "Snapshot (%s) does not exist", name); + ret = 0; + goto out; + } + + ret = glusterd_snap_remove(rsp_dict, snap, _gf_true, _gf_true, _gf_false); + if (ret) { + /* Ignore failure as this is a cleanup of half cooked + snapshot */ + gf_msg_debug(this->name, 0, "removing the snap %s failed", name); + ret = 0; + } + + name = NULL; + + ret = 0; + +out: + + return ret; +} + +/* In case of a successful, delete or create operation, during post_validate * + * look for missed snap operations and update the missed snap lists */ +int32_t +glusterd_snapshot_update_snaps_post_validate(dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + int32_t ret = -1; + int32_t missed_snap_count = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(dict); + GF_ASSERT(rsp_dict); + GF_ASSERT(op_errstr); + + ret = dict_get_int32n(dict, "missed_snap_count", SLEN("missed_snap_count"), + &missed_snap_count); + if (ret) { + gf_msg_debug(this->name, 0, "No missed snaps"); + ret = 0; + goto out; + } + + ret = glusterd_add_missed_snaps_to_list(dict, missed_snap_count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MISSEDSNAP_INFO_SET_FAIL, + "Failed to add missed snaps to list"); + goto out; + } + + ret = glusterd_store_update_missed_snaps(); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MISSEDSNAP_INFO_SET_FAIL, + "Failed to update missed_snaps_list"); + goto out; + } + +out: + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +int +glusterd_take_brick_snapshot_task(void *opaque) +{ + int ret = 0; + int32_t clone = 0; + snap_create_args_t *snap_args = NULL; + char *clonename = NULL; + char key[64] = ""; + int keylen; + + GF_ASSERT(opaque); + + snap_args = (snap_create_args_t *)opaque; + THIS = snap_args->this; + + /* Try and fetch clonename. If present set status with clonename * + * else do so as snap-vol */ + ret = dict_get_strn(snap_args->dict, "clonename", SLEN("clonename"), + &clonename); + if (ret) { + keylen = snprintf(key, sizeof(key), "snap-vol%d.brick%d.status", + snap_args->volcount, snap_args->brickorder); + } else { + keylen = snprintf(key, sizeof(key), "clone%d.brick%d.status", + snap_args->volcount, snap_args->brickorder); + clone = 1; + } + + ret = glusterd_take_brick_snapshot( + snap_args->dict, snap_args->snap_vol, snap_args->brickinfo, + snap_args->volcount, snap_args->brickorder, clone); + + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_CREATION_FAIL, + "Failed to " + "take backend snapshot for brick " + "%s:%s volume(%s)", + snap_args->brickinfo->hostname, snap_args->brickinfo->path, + snap_args->snap_vol->volname); + } + + if (dict_set_int32n(snap_args->rsp_dict, key, keylen, (ret) ? 0 : 1)) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "failed to " + "add %s to dict", + key); + ret = -1; + goto out; + } + +out: + return ret; +} + +int32_t +glusterd_take_brick_snapshot_cbk(int ret, call_frame_t *frame, void *opaque) +{ + snap_create_args_t *snap_args = NULL; + struct syncargs *args = NULL; + + GF_ASSERT(opaque); + + snap_args = (snap_create_args_t *)opaque; + args = snap_args->args; + + if (ret) + args->op_ret = ret; + + GF_FREE(opaque); + synctask_barrier_wake(args); + return 0; +} + +int32_t +glusterd_schedule_brick_snapshot(dict_t *dict, dict_t *rsp_dict, + glusterd_snap_t *snap) +{ + int ret = -1; + int32_t volcount = 0; + int32_t brickcount = 0; + int32_t brickorder = 0; + int32_t taskcount = 0; + char key[64] = ""; + int keylen; + xlator_t *this = NULL; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + struct syncargs args = {0}; + snap_create_args_t *snap_args = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(dict); + GF_ASSERT(snap); + + ret = synctask_barrier_init((&args)); + if (ret) + goto out; + cds_list_for_each_entry(snap_vol, &snap->volumes, vol_list) + { + volcount++; + brickcount = 0; + brickorder = 0; + cds_list_for_each_entry(brickinfo, &snap_vol->bricks, brick_list) + { + keylen = snprintf(key, sizeof(key), "snap-vol%d.brick%d.order", + volcount, brickcount); + ret = dict_set_int32n(rsp_dict, key, keylen, brickorder); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set %s", key); + goto out; + } + + if ((gf_uuid_compare(brickinfo->uuid, MY_UUID)) || + (brickinfo->snap_status == -1)) { + if (!gf_uuid_compare(brickinfo->uuid, MY_UUID)) { + brickcount++; + keylen = snprintf(key, sizeof(key), + "snap-vol%d.brick%d.status", volcount, + brickorder); + ret = dict_set_int32n(rsp_dict, key, keylen, 0); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_DICT_SET_FAILED, + "failed to add %s to " + "dict", + key); + goto out; + } + } + brickorder++; + continue; + } + + snap_args = GF_CALLOC(1, sizeof(*snap_args), + gf_gld_mt_snap_create_args_t); + if (!snap_args) { + ret = -1; + goto out; + } + + snap_args->this = this; + snap_args->dict = dict; + snap_args->rsp_dict = rsp_dict; + snap_args->snap_vol = snap_vol; + snap_args->brickinfo = brickinfo; + snap_args->volcount = volcount; + snap_args->brickcount = brickcount; + snap_args->brickorder = brickorder; + snap_args->args = &args; + + ret = synctask_new( + this->ctx->env, glusterd_take_brick_snapshot_task, + glusterd_take_brick_snapshot_cbk, NULL, snap_args); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_CREATION_FAIL, + "Failed to " + "spawn task for snapshot create"); + GF_FREE(snap_args); + goto out; + } + taskcount++; + brickcount++; + brickorder++; + } + + snprintf(key, sizeof(key), "snap-vol%d_brickcount", volcount); + ret = dict_set_int64(rsp_dict, key, brickcount); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "failed to " + "add %s to dict", + key); + goto out; + } + } + synctask_barrier_wait((&args), taskcount); + taskcount = 0; + + if (args.op_ret) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_CREATION_FAIL, + "Failed to create snapshot"); + + ret = args.op_ret; +out: + if (ret && taskcount) + synctask_barrier_wait((&args), taskcount); + + return ret; +} + +glusterd_snap_t * +glusterd_create_snap_object_for_clone(dict_t *dict, dict_t *rsp_dict) +{ + char *snapname = NULL; + uuid_t *snap_id = NULL; + glusterd_snap_t *snap = NULL; + xlator_t *this = NULL; + int ret = -1; + + this = THIS; + + GF_ASSERT(dict); + GF_ASSERT(rsp_dict); + + /* Fetch snapname, description, id and time from dict */ + ret = dict_get_strn(dict, "clonename", SLEN("clonename"), &snapname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to fetch clonename"); + goto out; + } + + ret = dict_get_bin(dict, "clone-id", (void **)&snap_id); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to fetch clone_id"); + goto out; + } + + snap = glusterd_new_snap_object(); + if (!snap) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_OBJ_NEW_FAIL, + "Could not create " + "the snap object for snap %s", + snapname); + goto out; + } + + gf_strncpy(snap->snapname, snapname, sizeof(snap->snapname)); + gf_uuid_copy(snap->snap_id, *snap_id); + + ret = 0; + +out: + if (ret) { + snap = NULL; + } + + return snap; +} + +int32_t +glusterd_snapshot_clone_commit(dict_t *dict, char **op_errstr, dict_t *rsp_dict) +{ + int ret = -1; + int64_t volcount = 0; + char *snapname = NULL; + char *volname = NULL; + char *tmp_name = NULL; + xlator_t *this = NULL; + glusterd_snap_t *snap_parent = NULL; + glusterd_snap_t *snap = NULL; + glusterd_volinfo_t *origin_vol = NULL; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(dict); + GF_ASSERT(op_errstr); + GF_ASSERT(rsp_dict); + priv = this->private; + GF_ASSERT(priv); + + ret = dict_get_strn(dict, "clonename", SLEN("clonename"), &snapname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to fetch clonename"); + goto out; + } + tmp_name = gf_strdup(snapname); + if (!tmp_name) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "Out of memory"); + ret = -1; + goto out; + } + + ret = dict_set_dynstr(rsp_dict, "clonename", tmp_name); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to set clonename in rsp_dict"); + GF_FREE(tmp_name); + goto out; + } + tmp_name = NULL; + + ret = dict_get_strn(dict, "snapname", SLEN("snapname"), &volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "failed to get snap name"); + goto out; + } + + snap_parent = glusterd_find_snap_by_name(volname); + if (!snap_parent) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_SNAP_NOT_FOUND, + "Failed to " + "fetch snap %s", + volname); + goto out; + } + + /* TODO : As of now there is only one volume in snapshot. + * Change this when multiple volume snapshot is introduced + */ + origin_vol = cds_list_entry(snap_parent->volumes.next, glusterd_volinfo_t, + vol_list); + if (!origin_vol) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, + "Failed to get snap " + "volinfo %s", + snap_parent->snapname); + goto out; + } + + snap = glusterd_create_snap_object_for_clone(dict, rsp_dict); + if (!snap) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_OBJ_NEW_FAIL, + "creating the" + "snap object %s failed", + snapname); + ret = -1; + goto out; + } + + snap_vol = glusterd_do_snap_vol(origin_vol, snap, dict, rsp_dict, 1, 1); + if (!snap_vol) { + ret = -1; + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_CREATION_FAIL, + "taking the " + "snapshot of the volume %s failed", + volname); + goto out; + } + + volcount = 1; + ret = dict_set_int64(rsp_dict, "volcount", volcount); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set volcount"); + goto out; + } + + ret = glusterd_schedule_brick_snapshot(dict, rsp_dict, snap); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_BACKEND_MAKE_FAIL, + "Failed to take backend " + "snapshot %s", + snap->snapname); + goto out; + } + + cds_list_del_init(&snap_vol->vol_list); + ret = dict_set_dynstr_with_alloc(rsp_dict, "snapuuid", + uuid_utoa(snap_vol->volume_id)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set snap " + "uuid in response dictionary for %s snapshot", + snap->snapname); + goto out; + } + + glusterd_list_add_order(&snap_vol->vol_list, &priv->volumes, + glusterd_compare_volume_name); + + ret = 0; + +out: + if (ret) { + if (snap) + glusterd_snap_remove(rsp_dict, snap, _gf_true, _gf_true, _gf_true); + snap = NULL; + } + + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_snapshot_create_commit(dict_t *dict, char **op_errstr, + uint32_t *op_errno, dict_t *rsp_dict) +{ + int ret = -1; + int64_t i = 0; + int64_t volcount = 0; + int32_t snap_activate = 0; + int32_t flags = 0; + char *snapname = NULL; + char *volname = NULL; + char *tmp_name = NULL; + char key[64] = ""; + int keylen; + xlator_t *this = NULL; + glusterd_snap_t *snap = NULL; + glusterd_volinfo_t *origin_vol = NULL; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(dict); + GF_ASSERT(op_errstr); + GF_VALIDATE_OR_GOTO(this->name, op_errno, out); + GF_ASSERT(rsp_dict); + priv = this->private; + GF_ASSERT(priv); + + ret = dict_get_int64(dict, "volcount", &volcount); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "failed to " + "get the volume count"); + goto out; + } + + ret = dict_get_strn(dict, "snapname", SLEN("snapname"), &snapname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to fetch snapname"); + goto out; + } + tmp_name = gf_strdup(snapname); + if (!tmp_name) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "Out of memory"); + ret = -1; + goto out; + } + + ret = dict_set_dynstr(rsp_dict, "snapname", tmp_name); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to set snapname in rsp_dict"); + GF_FREE(tmp_name); + goto out; + } + tmp_name = NULL; + + snap = glusterd_create_snap_object(dict, rsp_dict); + if (!snap) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_CREATION_FAIL, + "creating the" + "snap object %s failed", + snapname); + ret = -1; + goto out; + } + + for (i = 1; i <= volcount; i++) { + keylen = snprintf(key, sizeof(key), "volname%" PRId64, i); + ret = dict_get_strn(dict, key, keylen, &volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "failed to get volume name"); + goto out; + } + + ret = glusterd_volinfo_find(volname, &origin_vol); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND, + "failed to get the volinfo for " + "the volume %s", + volname); + goto out; + } + + if (is_origin_glusterd(dict)) { + ret = glusterd_is_snap_soft_limit_reached(origin_vol, rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPSHOT_OP_FAILED, + "Failed to " + "check soft limit exceeded or not, " + "for volume %s ", + origin_vol->volname); + goto out; + } + } + + snap_vol = glusterd_do_snap_vol(origin_vol, snap, dict, rsp_dict, i, 0); + if (!snap_vol) { + ret = -1; + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_CREATION_FAIL, + "taking the " + "snapshot of the volume %s failed", + volname); + goto out; + } + } + ret = dict_set_int64(rsp_dict, "volcount", volcount); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set volcount"); + goto out; + } + + ret = glusterd_schedule_brick_snapshot(dict, rsp_dict, snap); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_CREATION_FAIL, + "Failed to take backend " + "snapshot %s", + snap->snapname); + goto out; + } + + ret = dict_set_dynstr_with_alloc(rsp_dict, "snapuuid", + uuid_utoa(snap->snap_id)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set snap " + "uuid in response dictionary for %s snapshot", + snap->snapname); + goto out; + } + + snap_activate = dict_get_str_boolean( + priv->opts, GLUSTERD_STORE_KEY_SNAP_ACTIVATE, _gf_false); + if (!snap_activate) { + cds_list_for_each_entry(snap_vol, &snap->volumes, vol_list) + { + snap_vol->status = GLUSTERD_STATUS_STOPPED; + ret = glusterd_store_volinfo(snap_vol, + GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_SET_FAIL, + "Failed to store snap volinfo %s", snap_vol->volname); + goto out; + } + } + + goto out; + } + + /* Activate created bricks in case of activate-on-create config. */ + ret = dict_get_int32n(dict, "flags", SLEN("flags"), &flags); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get flags"); + goto out; + } + + cds_list_for_each_entry(snap_vol, &snap->volumes, vol_list) + { + ret = glusterd_start_volume(snap_vol, flags, _gf_true); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_ACTIVATE_FAIL, + "Failed to activate snap volume %s of the " + "snap %s", + snap_vol->volname, snap->snapname); + goto out; + } + } + + ret = 0; + +out: + if (ret) { + if (snap) + glusterd_snap_remove(rsp_dict, snap, _gf_true, _gf_true, _gf_false); + snap = NULL; + } + + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +int +snap_max_hard_limit_set_commit(dict_t *dict, uint64_t value, char *volname, + char **op_errstr) +{ + char err_str[PATH_MAX] = ""; + glusterd_conf_t *conf = NULL; + glusterd_volinfo_t *volinfo = NULL; + int ret = -1; + xlator_t *this = NULL; + char *next_version = NULL; + + this = THIS; + + GF_ASSERT(this); + GF_ASSERT(dict); + GF_ASSERT(op_errstr); + + conf = this->private; + + GF_ASSERT(conf); + + /* TODO: Initiate auto deletion when there is a limit change */ + if (!volname) { + /* For system limit */ + ret = dict_set_uint64(conf->opts, + GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT, value); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to store " + "%s in the options", + GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT); + goto out; + } + + ret = glusterd_get_next_global_opt_version_str(conf->opts, + &next_version); + if (ret) + goto out; + + ret = dict_set_strn(conf->opts, GLUSTERD_GLOBAL_OPT_VERSION, + SLEN(GLUSTERD_GLOBAL_OPT_VERSION), next_version); + if (ret) + goto out; + + ret = glusterd_store_options(this, conf->opts); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_FAIL, + "Failed to store " + "options"); + goto out; + } + } else { + /* For one volume */ + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + snprintf(err_str, PATH_MAX, + "Failed to get the" + " volinfo for volume %s", + volname); + goto out; + } + + volinfo->snap_max_hard_limit = value; + + ret = glusterd_store_volinfo(volinfo, + GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) { + snprintf(err_str, PATH_MAX, + "Failed to store " + "snap-max-hard-limit for volume %s", + volname); + goto out; + } + } + + ret = 0; +out: + if (ret) { + *op_errstr = gf_strdup(err_str); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPSHOT_OP_FAILED, "%s", + err_str); + } + return ret; +} + +int +glusterd_snapshot_config_commit(dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + char *volname = NULL; + xlator_t *this = NULL; + int ret = -1; + glusterd_conf_t *conf = NULL; + int config_command = 0; + uint64_t hard_limit = 0; + uint64_t soft_limit = 0; + char *next_version = NULL; + char *auto_delete = NULL; + char *snap_activate = NULL; + gf_boolean_t system_conf = _gf_false; + + this = THIS; + + GF_ASSERT(this); + GF_ASSERT(dict); + GF_ASSERT(op_errstr); + + conf = this->private; + + GF_ASSERT(conf); + + ret = dict_get_int32n(dict, "config-command", SLEN("config-command"), + &config_command); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMAND_NOT_FOUND, + "failed to get config-command type"); + goto out; + } + if (config_command != GF_SNAP_CONFIG_TYPE_SET) { + ret = 0; + goto out; + } + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + + /* config values snap-max-hard-limit and snap-max-soft-limit are + * optional and hence we are not erroring out if values are not + * present + */ + gd_get_snap_conf_values_if_present(dict, &hard_limit, &soft_limit); + + if (hard_limit) { + /* Commit ops for snap-max-hard-limit */ + ret = snap_max_hard_limit_set_commit(dict, hard_limit, volname, + op_errstr); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_HARD_LIMIT_SET_FAIL, + "snap-max-hard-limit set commit failed."); + goto out; + } + } + + if (soft_limit) { + /* For system limit */ + system_conf = _gf_true; + ret = dict_set_uint64( + conf->opts, GLUSTERD_STORE_KEY_SNAP_MAX_SOFT_LIMIT, soft_limit); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to save %s in the dictionary", + GLUSTERD_STORE_KEY_SNAP_MAX_SOFT_LIMIT); + goto out; + } + } + + if (hard_limit || soft_limit) { + ret = 0; + goto done; + } + + if (!dict_get_strn(dict, GLUSTERD_STORE_KEY_SNAP_AUTO_DELETE, + SLEN(GLUSTERD_STORE_KEY_SNAP_AUTO_DELETE), + &auto_delete)) { + system_conf = _gf_true; + ret = dict_set_dynstr_with_alloc( + conf->opts, GLUSTERD_STORE_KEY_SNAP_AUTO_DELETE, auto_delete); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Could not " + "save auto-delete value in conf->opts"); + goto out; + } + } else if (!dict_get_strn(dict, GLUSTERD_STORE_KEY_SNAP_ACTIVATE, + SLEN(GLUSTERD_STORE_KEY_SNAP_ACTIVATE), + &snap_activate)) { + system_conf = _gf_true; + ret = dict_set_dynstr_with_alloc( + conf->opts, GLUSTERD_STORE_KEY_SNAP_ACTIVATE, snap_activate); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Could not save " + "snap-activate-on-create value in conf->opts"); + goto out; + } + } else { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, + "Invalid option"); + goto out; + } + +done: + if (system_conf) { + ret = glusterd_get_next_global_opt_version_str(conf->opts, + &next_version); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_GLOBAL_OP_VERSION_GET_FAIL, + "Failed to get next global opt-version"); + goto out; + } + + ret = dict_set_strn(conf->opts, GLUSTERD_GLOBAL_OPT_VERSION, + SLEN(GLUSTERD_GLOBAL_OPT_VERSION), next_version); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_GLOBAL_OP_VERSION_SET_FAIL, + "Failed to set next global opt-version"); + goto out; + } + + ret = glusterd_store_options(this, conf->opts); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_FAIL, + "Failed to store options"); + goto out; + } + } + +out: + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +static int +glusterd_get_brick_lvm_details(dict_t *rsp_dict, + glusterd_brickinfo_t *brickinfo, char *volname, + char *device, const char *key_prefix) +{ + int ret = -1; + glusterd_conf_t *priv = NULL; + runner_t runner = { + 0, + }; + xlator_t *this = NULL; + char msg[PATH_MAX] = ""; + char buf[PATH_MAX] = ""; + char *ptr = NULL; + char *token = NULL; + char key[160] = ""; /* key_prefix is 128 bytes at most */ + char *value = NULL; + + GF_ASSERT(rsp_dict); + GF_ASSERT(brickinfo); + GF_ASSERT(volname); + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + device = glusterd_get_brick_mount_device(brickinfo->path); + if (!device) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_GET_INFO_FAIL, + "Getting device name for " + "the brick %s:%s failed", + brickinfo->hostname, brickinfo->path); + goto out; + } + runinit(&runner); + snprintf(msg, sizeof(msg), + "running lvs command, " + "for getting snap status"); + /* Using lvs command fetch the Volume Group name, + * Percentage of data filled and Logical Volume size + * + * "-o" argument is used to get the desired information, + * example : "lvs /dev/VolGroup/thin_vol -o vgname,lv_size", + * will get us Volume Group name and Logical Volume size. + * + * Here separator used is ":", + * for the above given command with separator ":", + * The output will be "vgname:lvsize" + */ + runner_add_args(&runner, LVS, device, "--noheading", "-o", + "vg_name,data_percent,lv_size", "--separator", ":", NULL); + runner_redir(&runner, STDOUT_FILENO, RUN_PIPE); + runner_log(&runner, "", GF_LOG_DEBUG, msg); + ret = runner_start(&runner); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_LVS_FAIL, + "Could not perform lvs action"); + goto end; + } + do { + ptr = fgets(buf, sizeof(buf), runner_chio(&runner, STDOUT_FILENO)); + + if (ptr == NULL) + break; + token = strtok(buf, ":"); + if (token != NULL) { + while (token[0] == ' ') + token++; + value = gf_strdup(token); + if (!value) { + ret = -1; + goto end; + } + ret = snprintf(key, sizeof(key), "%s.vgname", key_prefix); + if (ret < 0) { + goto end; + } + + ret = dict_set_dynstr(rsp_dict, key, value); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Could not save vgname "); + goto end; + } + } + + token = strtok(NULL, ":"); + if (token != NULL) { + value = gf_strdup(token); + if (!value) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED, + "token=%s", token, NULL); + ret = -1; + goto end; + } + ret = snprintf(key, sizeof(key), "%s.data", key_prefix); + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, + NULL); + goto end; + } + + ret = dict_set_dynstr(rsp_dict, key, value); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Could not save data percent "); + goto end; + } + } + token = strtok(NULL, ":"); + if (token != NULL) { + value = gf_strdup(token); + if (!value) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED, + "token=%s", token, NULL); + ret = -1; + goto end; + } + ret = snprintf(key, sizeof(key), "%s.lvsize", key_prefix); + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, + NULL); + goto end; + } + + ret = dict_set_dynstr(rsp_dict, key, value); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Could not save meta data percent "); + goto end; + } + } + + } while (ptr != NULL); + + ret = 0; + +end: + runner_end(&runner); + +out: + if (ret && value) { + GF_FREE(value); + } + + if (device) + GF_FREE(device); + + return ret; +} + +static int +glusterd_get_single_brick_status(char **op_errstr, dict_t *rsp_dict, + const char *keyprefix, int index, + glusterd_volinfo_t *snap_volinfo, + glusterd_brickinfo_t *brickinfo) +{ + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + char key[128] = ""; /* keyprefix is not longer than 64 bytes */ + int keylen; + char *device = NULL; + char *value = NULL; + char brick_path[PATH_MAX] = ""; + char pidfile[PATH_MAX] = ""; + pid_t pid = -1; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + GF_ASSERT(op_errstr); + GF_ASSERT(rsp_dict); + GF_ASSERT(keyprefix); + GF_ASSERT(snap_volinfo); + GF_ASSERT(brickinfo); + + keylen = snprintf(key, sizeof(key), "%s.brick%d.path", keyprefix, index); + if (keylen < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); + ret = -1; + goto out; + } + + ret = snprintf(brick_path, sizeof(brick_path), "%s:%s", brickinfo->hostname, + brickinfo->path); + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); + goto out; + } + + value = gf_strdup(brick_path); + if (!value) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED, + "brick_path=%s", brick_path, NULL); + ret = -1; + goto out; + } + + ret = dict_set_dynstrn(rsp_dict, key, keylen, value); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to store " + "brick_path %s", + brickinfo->path); + goto out; + } + + if (brickinfo->snap_status == -1) { + /* Setting vgname as "Pending Snapshot" */ + value = gf_strdup("Pending Snapshot"); + if (!value) { + ret = -1; + goto out; + } + + keylen = snprintf(key, sizeof(key), "%s.brick%d.vgname", keyprefix, + index); + ret = dict_set_dynstrn(rsp_dict, key, keylen, value); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Could not save vgname "); + goto out; + } + + ret = 0; + goto out; + } + value = NULL; + + keylen = snprintf(key, sizeof(key), "%s.brick%d.status", keyprefix, index); + if (keylen < 0) { + ret = -1; + goto out; + } + + if (brickinfo->status == GF_BRICK_STOPPED) { + value = gf_strdup("No"); + if (!value) { + ret = -1; + goto out; + } + ret = dict_set_strn(rsp_dict, key, keylen, value); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Could not save brick status"); + goto out; + } + value = NULL; + } else { + value = gf_strdup("Yes"); + if (!value) { + ret = -1; + goto out; + } + ret = dict_set_strn(rsp_dict, key, keylen, value); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Could not save brick status"); + goto out; + } + value = NULL; + + GLUSTERD_GET_BRICK_PIDFILE(pidfile, snap_volinfo, brickinfo, priv); + + if (gf_is_service_running(pidfile, &pid)) { + keylen = snprintf(key, sizeof(key), "%s.brick%d.pid", keyprefix, + index); + if (keylen < 0) { + ret = -1; + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, + NULL); + goto out; + } + + ret = dict_set_int32n(rsp_dict, key, keylen, pid); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Could not save pid %d", pid); + goto out; + } + } + } + + keylen = snprintf(key, sizeof(key), "%s.brick%d", keyprefix, index); + if (keylen < 0) { + ret = -1; + goto out; + } + /* While getting snap status we should show relevant information + * for deactivated snaps. + */ + if (snap_volinfo->status == GLUSTERD_STATUS_STOPPED) { + /* Setting vgname as "Deactivated Snapshot" */ + value = gf_strdup("N/A (Deactivated Snapshot)"); + if (!value) { + ret = -1; + goto out; + } + + keylen = snprintf(key, sizeof(key), "%s.brick%d.vgname", keyprefix, + index); + ret = dict_set_dynstrn(rsp_dict, key, keylen, value); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Could not save vgname "); + goto out; + } + + ret = 0; + goto out; + } + + ret = glusterd_get_brick_lvm_details(rsp_dict, brickinfo, + snap_volinfo->volname, device, key); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_GET_INFO_FAIL, + "Failed to get " + "brick LVM details"); + goto out; + } +out: + if (ret && value) { + GF_FREE(value); + } + + return ret; +} + +static int +glusterd_get_single_snap_status(char **op_errstr, dict_t *rsp_dict, + const char *keyprefix, glusterd_snap_t *snap) +{ + int ret = -1; + xlator_t *this = NULL; + char key[64] = ""; /* keyprefix is "status.snap0" */ + int keylen; + char brickkey[PATH_MAX] = ""; + glusterd_volinfo_t *snap_volinfo = NULL; + glusterd_volinfo_t *tmp_volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + int volcount = 0; + int brickcount = 0; + + this = THIS; + GF_ASSERT(this); + + GF_ASSERT(op_errstr); + GF_ASSERT(rsp_dict); + GF_ASSERT(keyprefix); + GF_ASSERT(snap); + + cds_list_for_each_entry_safe(snap_volinfo, tmp_volinfo, &snap->volumes, + vol_list) + { + keylen = snprintf(key, sizeof(key), "%s.vol%d", keyprefix, volcount); + if (keylen < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); + ret = -1; + goto out; + } + cds_list_for_each_entry(brickinfo, &snap_volinfo->bricks, brick_list) + { + if (!glusterd_is_local_brick(this, snap_volinfo, brickinfo)) { + brickcount++; + continue; + } + + ret = glusterd_get_single_brick_status( + op_errstr, rsp_dict, key, brickcount, snap_volinfo, brickinfo); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_STATUS_FAIL, + "Getting " + "single snap status failed"); + goto out; + } + brickcount++; + } + keylen = snprintf(brickkey, sizeof(brickkey), "%s.brickcount", key); + if (keylen < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); + goto out; + } + + ret = dict_set_int32n(rsp_dict, brickkey, keylen, brickcount); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Could not save brick count"); + goto out; + } + volcount++; + } + + keylen = snprintf(key, sizeof(key), "%s.volcount", keyprefix); + if (keylen < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); + ret = -1; + goto out; + } + + ret = dict_set_int32n(rsp_dict, key, keylen, volcount); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Could not save volcount"); + goto out; + } + +out: + + return ret; +} + +static int +glusterd_get_each_snap_object_status(char **op_errstr, dict_t *rsp_dict, + glusterd_snap_t *snap, + const char *keyprefix) +{ + int ret = -1; + char key[32] = ""; /* keyprefix is "status.snap0" */ + int keylen; + char *temp = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(op_errstr); + GF_ASSERT(rsp_dict); + GF_ASSERT(snap); + GF_ASSERT(keyprefix); + + /* TODO : Get all the snap volume info present in snap object, + * as of now, There will be only one snapvolinfo per snap object + */ + keylen = snprintf(key, sizeof(key), "%s.snapname", keyprefix); + if (keylen < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); + ret = -1; + goto out; + } + + temp = gf_strdup(snap->snapname); + if (temp == NULL) { + ret = -1; + goto out; + } + ret = dict_set_dynstrn(rsp_dict, key, keylen, temp); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Could not save " + "snap name"); + goto out; + } + + temp = NULL; + + keylen = snprintf(key, sizeof(key), "%s.uuid", keyprefix); + if (keylen < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); + ret = -1; + goto out; + } + + temp = gf_strdup(uuid_utoa(snap->snap_id)); + if (temp == NULL) { + ret = -1; + goto out; + } + + ret = dict_set_dynstrn(rsp_dict, key, keylen, temp); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Could not save " + "snap UUID"); + goto out; + } + + temp = NULL; + + ret = glusterd_get_single_snap_status(op_errstr, rsp_dict, keyprefix, snap); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_STATUS_FAIL, + "Could not get single snap status"); + goto out; + } + + keylen = snprintf(key, sizeof(key), "%s.volcount", keyprefix); + if (keylen < 0) { + ret = keylen; + goto out; + } + + ret = dict_set_int32n(rsp_dict, key, keylen, 1); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Could not save volcount"); + goto out; + } +out: + if (ret && temp) + GF_FREE(temp); + + return ret; +} + +int +glusterd_get_snap_status_of_volume(char **op_errstr, dict_t *rsp_dict, + char *volname, char *keyprefix) +{ + int ret = -1; + glusterd_volinfo_t *snap_volinfo = NULL; + glusterd_volinfo_t *temp_volinfo = NULL; + glusterd_volinfo_t *volinfo = NULL; + char key[64] = ""; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + int i = 0; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + GF_ASSERT(op_errstr); + GF_ASSERT(rsp_dict); + GF_ASSERT(volname); + GF_ASSERT(keyprefix); + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND, + "Failed to get volinfo of " + "volume %s", + volname); + goto out; + } + + cds_list_for_each_entry_safe(snap_volinfo, temp_volinfo, + &volinfo->snap_volumes, snapvol_list) + { + ret = snprintf(key, sizeof(key), "status.snap%d.snapname", i); + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); + goto out; + } + + ret = dict_set_dynstr_with_alloc(rsp_dict, key, + snap_volinfo->snapshot->snapname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Could not save " + "snap name"); + goto out; + } + + i++; + } + + ret = dict_set_int32n(rsp_dict, "status.snapcount", + SLEN("status.snapcount"), i); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to save snapcount"); + ret = -1; + goto out; + } +out: + return ret; +} + +int +glusterd_get_all_snapshot_status(dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + int32_t i = 0; + int ret = -1; + char key[64] = ""; + glusterd_conf_t *priv = NULL; + glusterd_snap_t *snap = NULL; + glusterd_snap_t *tmp_snap = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + GF_ASSERT(dict); + GF_ASSERT(op_errstr); + + cds_list_for_each_entry_safe(snap, tmp_snap, &priv->snapshots, snap_list) + { + ret = snprintf(key, sizeof(key), "status.snap%d.snapname", i); + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); + goto out; + } + + ret = dict_set_dynstr_with_alloc(rsp_dict, key, snap->snapname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Could not save " + "snap name"); + goto out; + } + + i++; + } + + ret = dict_set_int32n(rsp_dict, "status.snapcount", + SLEN("status.snapcount"), i); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Could not save snapcount"); + goto out; + } + + ret = 0; +out: + return ret; +} + +int +glusterd_snapshot_status_commit(dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + xlator_t *this = NULL; + int ret = -1; + glusterd_conf_t *conf = NULL; + int32_t cmd = -1; + char *snapname = NULL; + glusterd_snap_t *snap = NULL; + char *volname = NULL; + + this = THIS; + + GF_ASSERT(this); + GF_ASSERT(dict); + GF_ASSERT(op_errstr); + + conf = this->private; + + GF_ASSERT(conf); + ret = dict_get_int32n(dict, "sub-cmd", SLEN("sub-cmd"), &cmd); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get status cmd type"); + goto out; + } + + ret = dict_set_int32n(rsp_dict, "sub-cmd", SLEN("sub-cmd"), cmd); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Could not save status cmd in rsp dictionary"); + goto out; + } + switch (cmd) { + case GF_SNAP_STATUS_TYPE_ALL: { + ret = glusterd_get_all_snapshot_status(dict, op_errstr, rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_STATUS_FAIL, + "Unable to " + "get snapshot status"); + goto out; + } + break; + } + case GF_SNAP_STATUS_TYPE_ITER: + case GF_SNAP_STATUS_TYPE_SNAP: { + ret = dict_get_strn(dict, "snapname", SLEN("snapname"), &snapname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to " + "get snap name"); + goto out; + } + + snap = glusterd_find_snap_by_name(snapname); + if (!snap) { + ret = gf_asprintf(op_errstr, + "Snapshot (%s) " + "does not exist", + snapname); + if (ret < 0) { + goto out; + } + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, + "Unable to " + "get snap volinfo"); + goto out; + } + ret = glusterd_get_each_snap_object_status(op_errstr, rsp_dict, + snap, "status.snap0"); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_STATUS_FAIL, + "Unable to " + "get status of snap"); + goto out; + } + + ret = dict_set_int32n(rsp_dict, "status.snapcount", + SLEN("status.snapcount"), 1); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to " + "set snapcount to 1"); + goto out; + } + break; + } + case GF_SNAP_STATUS_TYPE_VOL: { + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to" + " get volume name"); + goto out; + } + + ret = glusterd_get_snap_status_of_volume(op_errstr, rsp_dict, + volname, "status.vol0"); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_STATUS_FAIL, + "Function :" + " glusterd_get_snap_status_of_volume " + "failed"); + goto out; + } + } + } + ret = 0; +out: + return ret; +} + +int32_t +glusterd_handle_snap_limit(dict_t *dict, dict_t *rsp_dict) +{ + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + uint64_t effective_max_limit = 0; + int64_t volcount = 0; + int i = 0; + char *volname = NULL; + char key[64] = ""; + int keylen; + char msg[PATH_MAX] = ""; + glusterd_volinfo_t *volinfo = NULL; + uint64_t limit = 0; + int64_t count = 0; + glusterd_snap_t *snap = NULL; + glusterd_volinfo_t *tmp_volinfo = NULL; + uint64_t opt_max_hard = GLUSTERD_SNAPS_MAX_HARD_LIMIT; + uint64_t opt_max_soft = GLUSTERD_SNAPS_DEF_SOFT_LIMIT_PERCENT; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(dict); + GF_ASSERT(rsp_dict); + + priv = this->private; + GF_ASSERT(priv); + + ret = dict_get_int64(dict, "volcount", &volcount); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "failed to get the volcount"); + goto out; + } + + for (i = 1; i <= volcount; i++) { + keylen = snprintf(key, sizeof(key), "volname%d", i); + ret = dict_get_strn(dict, key, keylen, &volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "failed to get the " + "volname"); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND, + "volinfo for %s " + "not found", + volname); + goto out; + } + + /* config values snap-max-hard-limit and snap-max-soft-limit are + * optional and hence we are not erroring out if values are not + * present + */ + gd_get_snap_conf_values_if_present(priv->opts, &opt_max_hard, + &opt_max_soft); + + /* The minimum of the 2 limits i.e system wide limit and + volume wide limit will be considered + */ + if (volinfo->snap_max_hard_limit < opt_max_hard) + effective_max_limit = volinfo->snap_max_hard_limit; + else + effective_max_limit = opt_max_hard; + + limit = (opt_max_soft * effective_max_limit) / 100; + + count = volinfo->snap_count - limit; + if (count <= 0) + goto out; + + tmp_volinfo = cds_list_entry(volinfo->snap_volumes.next, + glusterd_volinfo_t, snapvol_list); + snap = tmp_volinfo->snapshot; + GF_ASSERT(snap); + + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SOFT_LIMIT_REACHED, + "Soft-limit " + "(value = %" PRIu64 + ") of volume %s is reached. " + "Deleting snapshot %s.", + limit, volinfo->volname, snap->snapname); + + snprintf(msg, sizeof(msg), + "snapshot_name=%s;" + "snapshot_uuid=%s", + snap->snapname, uuid_utoa(snap->snap_id)); + + LOCK(&snap->lock); + { + snap->snap_status = GD_SNAP_STATUS_DECOMMISSION; + ret = glusterd_store_snap(snap); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_SNAP_OBJECT_STORE_FAIL, + "could " + "not store snap object %s", + snap->snapname); + goto unlock; + } + + ret = glusterd_snap_remove(rsp_dict, snap, _gf_true, _gf_true, + _gf_false); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_REMOVE_FAIL, + "failed to remove snap %s", snap->snapname); + } + unlock: + UNLOCK(&snap->lock); + if (is_origin_glusterd(dict) == _gf_true) { + if (ret) + gf_event(EVENT_SNAPSHOT_DELETE_FAILED, "%s", msg); + else + gf_event(EVENT_SNAPSHOT_DELETED, "%s", msg); + } + } + +out: + return ret; +} + +int32_t +glusterd_snapshot_clone_postvalidate(dict_t *dict, int32_t op_ret, + char **op_errstr, dict_t *rsp_dict) +{ + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + int ret = -1; + int32_t cleanup = 0; + glusterd_snap_t *snap = NULL; + glusterd_volinfo_t *snap_vol = NULL; + char *clonename = NULL; + + this = THIS; + + GF_ASSERT(this); + GF_ASSERT(dict); + GF_ASSERT(rsp_dict); + + priv = this->private; + GF_ASSERT(priv); + + ret = dict_get_strn(dict, "clonename", SLEN("clonename"), &clonename); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to fetch " + "clonename"); + goto out; + } + + ret = glusterd_volinfo_find(clonename, &snap_vol); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, + "unable to find clone " + "%s volinfo", + clonename); + goto out; + } + + if (snap_vol) + snap = snap_vol->snapshot; + else { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_NOT_FOUND, + "Snapshot volume is null"); + goto out; + } + + /* Fetch snap object from snap_vol and delete it all in case of * + * a failure, or else, just delete the snap object as it is not * + * needed in case of a clone * + */ + if (op_ret) { + ret = dict_get_int32n(dict, "cleanup", SLEN("cleanup"), &cleanup); + if (!ret && cleanup && snap) { + glusterd_snap_remove(rsp_dict, snap, _gf_true, _gf_true, _gf_true); + } + /* Irrespective of status of cleanup its better + * to return from this function. As the functions + * following this block is not required to be + * executed in case of failure scenario. + */ + ret = 0; + goto out; + } + + ret = glusterd_snapobject_delete(snap); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_REMOVE_FAIL, + "Failed to delete " + "snap object %s", + snap->snapname); + goto out; + } + snap_vol->snapshot = NULL; + +out: + return ret; +} + +int32_t +glusterd_snapshot_create_postvalidate(dict_t *dict, int32_t op_ret, + char **op_errstr, dict_t *rsp_dict) +{ + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + int ret = -1; + int32_t cleanup = 0; + glusterd_snap_t *snap = NULL; + char *snapname = NULL; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + uint64_t opt_hard_max = GLUSTERD_SNAPS_MAX_HARD_LIMIT; + uint64_t opt_max_soft = GLUSTERD_SNAPS_DEF_SOFT_LIMIT_PERCENT; + int64_t effective_max_limit = 0; + int64_t soft_limit = 0; + int32_t snap_activate = _gf_false; + + this = THIS; + + GF_ASSERT(this); + GF_ASSERT(dict); + GF_ASSERT(rsp_dict); + + priv = this->private; + GF_ASSERT(priv); + + if (op_ret) { + ret = dict_get_int32n(dict, "cleanup", SLEN("cleanup"), &cleanup); + if (!ret && cleanup) { + ret = glusterd_do_snap_cleanup(dict, op_errstr, rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_CLEANUP_FAIL, + "cleanup " + "operation failed"); + goto out; + } + } + /* Irrespective of status of cleanup its better + * to return from this function. As the functions + * following this block is not required to be + * executed in case of failure scenario. + */ + ret = 0; + goto out; + } + + ret = dict_get_strn(dict, "snapname", SLEN("snapname"), &snapname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to fetch " + "snapname"); + goto out; + } + + snap = glusterd_find_snap_by_name(snapname); + if (!snap) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_NOT_FOUND, + "unable to find snap " + "%s", + snapname); + goto out; + } + + snap->snap_status = GD_SNAP_STATUS_IN_USE; + ret = glusterd_store_snap(snap); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_OBJECT_STORE_FAIL, + "Could not store snap" + "object %s", + snap->snapname); + goto out; + } + + ret = glusterd_snapshot_update_snaps_post_validate(dict, op_errstr, + rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_CREATION_FAIL, + "Failed to " + "create snapshot"); + goto out; + } + + /* + * If activate_on_create was enabled, and we have reached this * + * section of the code, that means, that after successfully * + * creating the snapshot, we have also successfully started the * + * snapshot bricks on all nodes. So from originator node we can * + * send EVENT_SNAPSHOT_ACTIVATED event. * + * * + * Also check, if hard limit and soft limit is reached in case * + * of successfully creating the snapshot, and generate the event * + */ + if (is_origin_glusterd(dict) == _gf_true) { + snap_activate = dict_get_str_boolean( + priv->opts, GLUSTERD_STORE_KEY_SNAP_ACTIVATE, _gf_false); + + if (snap_activate == _gf_true) { + gf_event(EVENT_SNAPSHOT_ACTIVATED, + "snapshot_name=%s;" + "snapshot_uuid=%s", + snap->snapname, uuid_utoa(snap->snap_id)); + } + + ret = dict_get_strn(dict, "volname1", SLEN("volname1"), &volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get volname."); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, + "Failed to get volinfo."); + goto out; + } + + /* config values snap-max-hard-limit and snap-max-soft-limit are + * optional and hence we are not erroring out if values are not + * present + */ + gd_get_snap_conf_values_if_present(priv->opts, &opt_hard_max, + &opt_max_soft); + + if (volinfo->snap_max_hard_limit < opt_hard_max) + effective_max_limit = volinfo->snap_max_hard_limit; + else + effective_max_limit = opt_hard_max; + + /* + * Check for hard limit. If it is reached after taking * + * this snapshot, then generate event for the same. If * + * it is not reached, then check for the soft limit, * + * and generate event accordingly. * + */ + if (volinfo->snap_count >= effective_max_limit) { + gf_event(EVENT_SNAPSHOT_HARD_LIMIT_REACHED, + "volume_name=%s;volume_id=%s", volname, + uuid_utoa(volinfo->volume_id)); + } else { + soft_limit = (opt_max_soft * effective_max_limit) / 100; + if (volinfo->snap_count >= soft_limit) { + gf_event(EVENT_SNAPSHOT_SOFT_LIMIT_REACHED, + "volume_name=%s;volume_id=%s", volname, + uuid_utoa(volinfo->volume_id)); + } + } + } + + /* "auto-delete" might not be set by user explicitly, + * in that case it's better to consider the default value. + * Hence not erroring out if Key is not found. + */ + ret = dict_get_str_boolean(priv->opts, GLUSTERD_STORE_KEY_SNAP_AUTO_DELETE, + _gf_false); + if (_gf_true == ret) { + ret = glusterd_handle_snap_limit(dict, rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_REMOVE_FAIL, + "failed to remove snap"); + /* ignore the errors of autodelete */ + ret = 0; + } + } + +out: + return ret; +} + +int32_t +glusterd_snapshot(dict_t *dict, char **op_errstr, uint32_t *op_errno, + dict_t *rsp_dict) +{ + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + int32_t snap_command = 0; + char *snap_name = NULL; + char temp[PATH_MAX] = ""; + int ret = -1; + + this = THIS; + + GF_ASSERT(this); + GF_ASSERT(dict); + GF_ASSERT(rsp_dict); + GF_VALIDATE_OR_GOTO(this->name, op_errno, out); + + priv = this->private; + GF_ASSERT(priv); + + ret = dict_get_int32n(dict, "type", SLEN("type"), &snap_command); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMAND_NOT_FOUND, + "unable to get the type of " + "the snapshot command"); + goto out; + } + + switch (snap_command) { + case (GF_SNAP_OPTION_TYPE_CREATE): + ret = glusterd_snapshot_create_commit(dict, op_errstr, op_errno, + rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_CREATION_FAIL, + "Failed to " + "create snapshot"); + goto out; + } + break; + + case (GF_SNAP_OPTION_TYPE_CLONE): + ret = glusterd_snapshot_clone_commit(dict, op_errstr, rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_CLONE_FAILED, + "Failed to " + "clone snapshot"); + goto out; + } + break; + + case GF_SNAP_OPTION_TYPE_CONFIG: + ret = glusterd_snapshot_config_commit(dict, op_errstr, rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_CONFIG_FAIL, + "snapshot config failed"); + goto out; + } + break; + + case GF_SNAP_OPTION_TYPE_DELETE: + ret = glusterd_snapshot_remove_commit(dict, op_errstr, rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_REMOVE_FAIL, + "Failed to " + "delete snapshot"); + if (*op_errstr) { + /* If error string is already set + * then goto out */ + goto out; + } + + ret = dict_get_strn(dict, "snapname", SLEN("snapname"), + &snap_name); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get snapname"); + snap_name = "NA"; + } + + snprintf(temp, sizeof(temp), + "Snapshot %s might " + "not be in an usable state.", + snap_name); + + *op_errstr = gf_strdup(temp); + ret = -1; + goto out; + } + break; + + case GF_SNAP_OPTION_TYPE_RESTORE: + ret = glusterd_snapshot_restore(dict, op_errstr, rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_RESTORE_FAIL, + "Failed to " + "restore snapshot"); + goto out; + } + + break; + case GF_SNAP_OPTION_TYPE_ACTIVATE: + ret = glusterd_snapshot_activate_commit(dict, op_errstr, rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_ACTIVATE_FAIL, + "Failed to " + "activate snapshot"); + goto out; + } + + break; + + case GF_SNAP_OPTION_TYPE_DEACTIVATE: + ret = glusterd_snapshot_deactivate_commit(dict, op_errstr, + rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, + GD_MSG_SNAP_DEACTIVATE_FAIL, + "Failed to " + "deactivate snapshot"); + goto out; + } + + break; + + case GF_SNAP_OPTION_TYPE_STATUS: + ret = glusterd_snapshot_status_commit(dict, op_errstr, rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_STATUS_FAIL, + "Failed to " + "show snapshot status"); + goto out; + } + break; + + default: + gf_msg(this->name, GF_LOG_WARNING, EINVAL, GD_MSG_INVALID_ENTRY, + "invalid snap command"); + goto out; + break; + } + + ret = 0; + +out: + return ret; +} + +int +glusterd_snapshot_brickop(dict_t *dict, char **op_errstr, dict_t *rsp_dict) +{ + int ret = -1; + int64_t vol_count = 0; + int64_t count = 1; + char key[64] = ""; + int keylen; + char *volname = NULL; + int32_t snap_command = 0; + xlator_t *this = NULL; + char *op_type = NULL; + + this = THIS; + + GF_ASSERT(this); + GF_ASSERT(dict); + GF_ASSERT(rsp_dict); + + ret = dict_get_int32n(dict, "type", SLEN("type"), &snap_command); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMAND_NOT_FOUND, + "unable to get the type of " + "the snapshot command"); + goto out; + } + + switch (snap_command) { + case GF_SNAP_OPTION_TYPE_CREATE: + + /* op_type with tell us whether its pre-commit operation + * or post-commit + */ + ret = dict_get_strn(dict, "operation-type", SLEN("operation-type"), + &op_type); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to fetch " + "operation type"); + goto out; + } + + if (strcmp(op_type, "pre") == 0) { + /* BRICK OP PHASE for enabling barrier, Enable barrier + * if its a pre-commit operation + */ + ret = glusterd_set_barrier_value(dict, "enable"); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to " + "set barrier value as enable in dict"); + goto out; + } + } else if (strcmp(op_type, "post") == 0) { + /* BRICK OP PHASE for disabling barrier, Disable barrier + * if its a post-commit operation + */ + ret = glusterd_set_barrier_value(dict, "disable"); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to " + "set barrier value as disable in " + "dict"); + goto out; + } + } else { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, + "Invalid op_type"); + goto out; + } + + ret = dict_get_int64(dict, "volcount", &vol_count); + if (ret) + goto out; + while (count <= vol_count) { + keylen = snprintf(key, sizeof(key), "volname%" PRId64, count); + ret = dict_get_strn(dict, key, keylen, &volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get volname"); + goto out; + } + ret = dict_set_strn(dict, "volname", SLEN("volname"), volname); + if (ret) + goto out; + + ret = gd_brick_op_phase(GD_OP_SNAP, NULL, dict, op_errstr); + if (ret) + goto out; + volname = NULL; + count++; + } + + dict_deln(dict, "volname", SLEN("volname")); + ret = 0; + break; + case GF_SNAP_OPTION_TYPE_DELETE: + break; + default: + break; + } + +out: + return ret; +} + +int +glusterd_snapshot_prevalidate(dict_t *dict, char **op_errstr, dict_t *rsp_dict, + uint32_t *op_errno) +{ + int snap_command = 0; + xlator_t *this = NULL; + int ret = -1; + + this = THIS; + + GF_ASSERT(this); + GF_ASSERT(dict); + GF_ASSERT(rsp_dict); + GF_VALIDATE_OR_GOTO(this->name, op_errno, out); + + ret = dict_get_int32n(dict, "type", SLEN("type"), &snap_command); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMAND_NOT_FOUND, + "unable to get the type of " + "the snapshot command"); + goto out; + } + + switch (snap_command) { + case (GF_SNAP_OPTION_TYPE_CREATE): + ret = glusterd_snapshot_create_prevalidate(dict, op_errstr, + rsp_dict, op_errno); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_CREATION_FAIL, + "Snapshot create " + "pre-validation failed"); + goto out; + } + break; + + case (GF_SNAP_OPTION_TYPE_CLONE): + ret = glusterd_snapshot_clone_prevalidate(dict, op_errstr, rsp_dict, + op_errno); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, + GD_MSG_SNAP_CLONE_PREVAL_FAILED, + "Snapshot clone " + "pre-validation failed"); + goto out; + } + break; + + case (GF_SNAP_OPTION_TYPE_CONFIG): + ret = glusterd_snapshot_config_prevalidate(dict, op_errstr, + op_errno); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_CONFIG_FAIL, + "Snapshot config " + "pre-validation failed"); + goto out; + } + break; + + case GF_SNAP_OPTION_TYPE_RESTORE: + ret = glusterd_snapshot_restore_prevalidate(dict, op_errstr, + op_errno, rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_RESTORE_FAIL, + "Snapshot restore " + "validation failed"); + goto out; + } + break; + + case GF_SNAP_OPTION_TYPE_ACTIVATE: + ret = glusterd_snapshot_activate_deactivate_prevalidate( + dict, op_errstr, op_errno, rsp_dict, _gf_true); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_ACTIVATE_FAIL, + "Snapshot activate " + "validation failed"); + goto out; + } + break; + case GF_SNAP_OPTION_TYPE_DEACTIVATE: + ret = glusterd_snapshot_activate_deactivate_prevalidate( + dict, op_errstr, op_errno, rsp_dict, _gf_false); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, + GD_MSG_SNAP_DEACTIVATE_FAIL, + "Snapshot deactivate validation failed"); + goto out; + } + break; + case GF_SNAP_OPTION_TYPE_DELETE: + ret = glusterd_snapshot_remove_prevalidate(dict, op_errstr, + op_errno, rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_REMOVE_FAIL, + "Snapshot remove " + "validation failed"); + goto out; + } + break; + + case GF_SNAP_OPTION_TYPE_STATUS: + ret = glusterd_snapshot_status_prevalidate(dict, op_errstr, + op_errno, rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_STATUS_FAIL, + "Snapshot status " + "validation failed"); + goto out; + } + break; + + default: + gf_msg(this->name, GF_LOG_WARNING, EINVAL, GD_MSG_COMMAND_NOT_FOUND, + "invalid snap command"); + *op_errno = EINVAL; + goto out; + } + + ret = 0; +out: + return ret; +} + +/* This function is called to remove the trashpath, in cases + * when the restore operation is successful and we don't need + * the backup, and incases when the restore op is failed before + * commit, and we don't need to revert the backup. + * + * @param volname name of the volume which is being restored + * + * @return 0 on success or -1 on failure + */ +int +glusterd_remove_trashpath(char *volname) +{ + int ret = -1; + char delete_path[PATH_MAX] = { + 0, + }; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + struct stat stbuf = { + 0, + }; + int32_t len = 0; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + + GF_ASSERT(volname); + + len = snprintf(delete_path, sizeof(delete_path), + "%s/" GLUSTERD_TRASH "/vols-%s.deleted", priv->workdir, + volname); + if ((len < 0) || (len >= sizeof(delete_path))) { + goto out; + } + + ret = sys_lstat(delete_path, &stbuf); + if (ret) { + /* If the trash dir does not exist, return * + * without failure * + */ + if (errno == ENOENT) { + ret = 0; + goto out; + } else { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED, + "Failed to lstat " + "backup dir (%s)", + delete_path); + goto out; + } + } + + /* Delete the backup copy of volume folder */ + ret = recursive_rmdir(delete_path); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED, + "Failed to remove " + "backup dir (%s)", + delete_path); + goto out; + } + + ret = 0; +out: + return ret; +} + +/* This function is called if snapshot restore operation + * is successful. It will cleanup the backup files created + * during the restore operation. + * + * @param rsp_dict Response dictionary + * @param volinfo volinfo of the volume which is being restored + * @param snap snap object + * + * @return 0 on success or -1 on failure + */ +int +glusterd_snapshot_restore_cleanup(dict_t *rsp_dict, char *volname, + glusterd_snap_t *snap) +{ + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + GF_ASSERT(rsp_dict); + GF_ASSERT(volname); + GF_ASSERT(snap); + + /* Now delete the snap entry. */ + ret = glusterd_snap_remove(rsp_dict, snap, _gf_false, _gf_true, _gf_false); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_REMOVE_FAIL, + "Failed to delete " + "snap %s", + snap->snapname); + goto out; + } + + /* Delete the backup copy of volume folder */ + ret = glusterd_remove_trashpath(volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED, + "Failed to remove " + "backup dir"); + goto out; + } + + ret = 0; +out: + return ret; +} + +/* This function is called when the snapshot restore operation failed + * for some reasons. In such case we revert the restore operation. + * + * @param volinfo volinfo of the origin volume + * + * @return 0 on success and -1 on failure + */ +int +glusterd_snapshot_revert_partial_restored_vol(glusterd_volinfo_t *volinfo) +{ + int ret = 0; + char pathname[PATH_MAX] = ""; + char trash_path[PATH_MAX] = ""; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_volinfo_t *reverted_vol = NULL; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_volinfo_t *tmp_vol = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + int32_t len = 0; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + GF_ASSERT(volinfo); + + GLUSTERD_GET_VOLUME_DIR(pathname, volinfo, priv); + + len = snprintf(trash_path, sizeof(trash_path), + "%s/" GLUSTERD_TRASH "/vols-%s.deleted", priv->workdir, + volinfo->volname); + if ((len < 0) || (len >= sizeof(trash_path))) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); + ret = -1; + goto out; + } + + /* Since snapshot restore failed we cannot rely on the volume + * data stored under vols folder. Therefore delete the origin + * volume's backend folder.*/ + ret = recursive_rmdir(pathname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED, + "Failed to remove " + "%s directory", + pathname); + goto out; + } + + /* Now move the backup copy of the vols to its original + * location.*/ + ret = sys_rename(trash_path, pathname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED, + "Failed to rename folder " + "from %s to %s", + trash_path, pathname); + goto out; + } + + /* Retrieve the volume from the store */ + reverted_vol = glusterd_store_retrieve_volume(volinfo->volname, NULL); + if (NULL == reverted_vol) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_OP_FAILED, + "Failed to load restored " + "%s volume", + volinfo->volname); + goto out; + } + + /* Retrieve the snap_volumes list from the older volinfo */ + reverted_vol->snap_count = volinfo->snap_count; + cds_list_for_each_entry_safe(snap_vol, tmp_vol, &volinfo->snap_volumes, + snapvol_list) + { + cds_list_add_tail(&snap_vol->snapvol_list, &reverted_vol->snap_volumes); + + cds_list_for_each_entry(brickinfo, &snap_vol->bricks, brick_list) + { + /* + * If the brick is not of this peer, or snapshot is * + * missed for the brick don't restore the xattr for it * + */ + if ((!gf_uuid_compare(brickinfo->uuid, MY_UUID)) && + (brickinfo->snap_status != -1)) { + /* + * We need to restore volume id of all snap * + * bricks to volume id of the snap volume. * + */ + ret = sys_lsetxattr(brickinfo->path, GF_XATTR_VOL_ID_KEY, + snap_vol->volume_id, + sizeof(snap_vol->volume_id), XATTR_REPLACE); + if (ret == -1) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_SET_XATTR_FAIL, + "Attribute=%s, Path=%s, Reason=%s, Snap=%s", + GF_XATTR_VOL_ID_KEY, brickinfo->path, + strerror(errno), snap_vol->volname, NULL); + goto out; + } + } + } + } + + /* Since we retrieved the volinfo from store now we don't + * want the older volinfo. Therefore delete the older volinfo */ + glusterd_volinfo_unref(volinfo); + ret = 0; +out: + return ret; +} + +/* This function is called when glusterd is started and we need + * to revert a failed snapshot restore. + * + * @param snap snapshot object of the restored snap + * + * @return 0 on success and -1 on failure + */ +int +glusterd_snapshot_revert_restore_from_snap(glusterd_snap_t *snap) +{ + int ret = -1; + char volname[PATH_MAX] = ""; + glusterd_volinfo_t *snap_volinfo = NULL; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + + this = THIS; + + GF_ASSERT(this); + GF_ASSERT(snap); + + /* TODO : As of now there is only one volume in snapshot. + * Change this when multiple volume snapshot is introduced + */ + snap_volinfo = cds_list_entry(snap->volumes.next, glusterd_volinfo_t, + vol_list); + + gf_strncpy(volname, snap_volinfo->parent_volname, sizeof(volname)); + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND, + "Could not get volinfo of " + "%s", + snap_volinfo->parent_volname); + goto out; + } + + ret = glusterd_snapshot_revert_partial_restored_vol(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_RESTORE_REVERT_FAIL, + "Failed to revert snapshot " + "restore operation for %s volume", + volname); + goto out; + } +out: + return ret; +} + +/* This function is called from post-validation. Based on the op_ret + * it will take a decision on whether to revert the operation or + * perform cleanup. + * + * @param dict dictionary object + * @param op_ret return value of the restore operation + * @param op_errstr error string + * @param rsp_dict Response dictionary + * + * @return 0 on success and -1 on failure + */ +int +glusterd_snapshot_restore_postop(dict_t *dict, int32_t op_ret, char **op_errstr, + dict_t *rsp_dict) +{ + int ret = -1; + char *name = NULL; + char *volname = NULL; + int cleanup = 0; + glusterd_snap_t *snap = NULL; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + + this = THIS; + + GF_ASSERT(this); + GF_ASSERT(dict); + GF_ASSERT(rsp_dict); + + ret = dict_get_strn(dict, "snapname", SLEN("snapname"), &name); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "getting the snap " + "name failed (volume: %s)", + name); + goto out; + } + + snap = glusterd_find_snap_by_name(name); + if (!snap) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_NOT_FOUND, + "Snapshot (%s) does not exist", name); + ret = -1; + goto out; + } + + /* TODO: fix this when multiple volume support will come */ + ret = dict_get_strn(dict, "volname1", SLEN("volname1"), &volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "failed to get volume name"); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, + "Volume (%s) does not exist ", volname); + goto out; + } + + ret = dict_get_strn(dict, "snapname", SLEN("snapname"), &name); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "getting the snap " + "name failed (volume: %s)", + volinfo->volname); + goto out; + } + + snap = glusterd_find_snap_by_name(name); + if (!snap) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_SNAP_NOT_FOUND, + "snap %s is not found", name); + ret = -1; + goto out; + } + + /* On success perform the cleanup operation */ + if (0 == op_ret) { + ret = glusterd_snapshot_restore_cleanup(rsp_dict, volname, snap); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_CLEANUP_FAIL, + "Failed to perform " + "snapshot restore cleanup for %s volume", + volname); + goto out; + } + } else { /* On failure revert snapshot restore */ + ret = dict_get_int32n(dict, "cleanup", SLEN("cleanup"), &cleanup); + /* Perform cleanup only when required */ + if (ret || (0 == cleanup)) { + /* Delete the backup copy of volume folder */ + ret = glusterd_remove_trashpath(volinfo->volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED, + "Failed to remove backup dir"); + goto out; + } + ret = 0; + goto out; + } + + ret = glusterd_snapshot_revert_partial_restored_vol(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_RESTORE_REVERT_FAIL, + "Failed to revert " + "restore operation for %s volume", + volname); + goto out; + } + + snap->snap_status = GD_SNAP_STATUS_IN_USE; + /* We need to save this in disk */ + ret = glusterd_store_snap(snap); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_OBJECT_STORE_FAIL, + "Could not store snap object for %s snap", snap->snapname); + goto out; + } + + /* After restore fails, we have to remove mount point for + * deactivated snaps which was created at start of restore op. + */ + if (volinfo->status == GLUSTERD_STATUS_STOPPED) { + ret = glusterd_snap_unmount(this, volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLUSTERD_UMOUNT_FAIL, + "Failed to unmounts for %s", snap->snapname); + } + } + } + + ret = 0; +out: + return ret; +} + +int +glusterd_snapshot_postvalidate(dict_t *dict, int32_t op_ret, char **op_errstr, + dict_t *rsp_dict) +{ + int snap_command = 0; + xlator_t *this = NULL; + int ret = -1; + + this = THIS; + + GF_ASSERT(this); + GF_ASSERT(dict); + GF_ASSERT(rsp_dict); + + ret = dict_get_int32n(dict, "type", SLEN("type"), &snap_command); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMAND_NOT_FOUND, + "unable to get the type of " + "the snapshot command"); + goto out; + } + + switch (snap_command) { + case GF_SNAP_OPTION_TYPE_CREATE: + ret = glusterd_snapshot_create_postvalidate(dict, op_ret, op_errstr, + rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_CREATION_FAIL, + "Snapshot create " + "post-validation failed"); + goto out; + } + glusterd_fetchsnap_notify(this); + break; + case GF_SNAP_OPTION_TYPE_CLONE: + ret = glusterd_snapshot_clone_postvalidate(dict, op_ret, op_errstr, + rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, + GD_MSG_SNAP_CLONE_POSTVAL_FAILED, + "Snapshot create " + "post-validation failed"); + goto out; + } + glusterd_fetchsnap_notify(this); + break; + case GF_SNAP_OPTION_TYPE_DELETE: + if (op_ret) { + gf_msg_debug(this->name, 0, + "op_ret = %d. Not performing delete " + "post_validate", + op_ret); + ret = 0; + goto out; + } + ret = glusterd_snapshot_update_snaps_post_validate(dict, op_errstr, + rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_MISSED_SNAP_LIST_STORE_FAIL, + "Failed to " + "update missed snaps list"); + goto out; + } + glusterd_fetchsnap_notify(this); + break; + case GF_SNAP_OPTION_TYPE_RESTORE: + ret = glusterd_snapshot_update_snaps_post_validate(dict, op_errstr, + rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_RESTORE_FAIL, + "Failed to " + "update missed snaps list"); + goto out; + } + + ret = glusterd_snapshot_restore_postop(dict, op_ret, op_errstr, + rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_RESTORE_FAIL, + "Failed to " + "perform snapshot restore post-op"); + goto out; + } + glusterd_fetchsnap_notify(this); + break; + case GF_SNAP_OPTION_TYPE_ACTIVATE: + case GF_SNAP_OPTION_TYPE_DEACTIVATE: + glusterd_fetchsnap_notify(this); + break; + case GF_SNAP_OPTION_TYPE_STATUS: + case GF_SNAP_OPTION_TYPE_CONFIG: + case GF_SNAP_OPTION_TYPE_INFO: + case GF_SNAP_OPTION_TYPE_LIST: + /*Nothing to be done. But want to + * avoid the default case warning*/ + ret = 0; + break; + default: + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_COMMAND_NOT_FOUND, + "invalid snap command"); + goto out; + } + + ret = 0; +out: + return ret; +} + +/* + Verify availability of lvm commands +*/ + +static gf_boolean_t +glusterd_is_lvm_cmd_available(char *lvm_cmd) +{ + int32_t ret = 0; + struct stat buf = { + 0, + }; + + if (!lvm_cmd) + return _gf_false; + + ret = sys_stat(lvm_cmd, &buf); + if (ret != 0) { + gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "stat fails on %s, exiting. (errno = %d (%s))", lvm_cmd, errno, + strerror(errno)); + return _gf_false; + } + + if ((!ret) && (!S_ISREG(buf.st_mode))) { + gf_msg(THIS->name, GF_LOG_CRITICAL, EINVAL, GD_MSG_COMMAND_NOT_FOUND, + "Provided command %s is not a regular file," + "exiting", + lvm_cmd); + return _gf_false; + } + + if ((!ret) && (!(buf.st_mode & S_IXUSR))) { + gf_msg(THIS->name, GF_LOG_CRITICAL, 0, GD_MSG_NO_EXEC_PERMS, + "Provided command %s has no exec permissions," + "exiting", + lvm_cmd); + return _gf_false; + } + + return _gf_true; +} + +int +glusterd_handle_snapshot_fn(rpcsvc_request_t *req) +{ + int32_t ret = 0; + dict_t *dict = NULL; + gf_cli_req cli_req = { + {0}, + }; + glusterd_op_t cli_op = GD_OP_SNAP; + int type = 0; + glusterd_conf_t *conf = NULL; + char *host_uuid = NULL; + char err_str[2048] = ""; + xlator_t *this = NULL; + uint32_t op_errno = 0; + + GF_ASSERT(req); + + this = THIS; + GF_ASSERT(this); + conf = this->private; + GF_ASSERT(conf); + + ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + req->rpc_err = GARBAGE_ARGS; + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL); + goto out; + } + + if (cli_req.dict.dict_len > 0) { + dict = dict_new(); + if (!dict) + goto out; + + ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + "failed to " + "unserialize req-buffer to dictionary"); + snprintf(err_str, sizeof(err_str), + "Unable to decode " + "the command"); + goto out; + } + + dict->extra_stdfree = cli_req.dict.dict_val; + + host_uuid = gf_strdup(uuid_utoa(MY_UUID)); + if (host_uuid == NULL) { + snprintf(err_str, sizeof(err_str), + "Failed to get " + "the uuid of local glusterd"); + ret = -1; + goto out; + } + ret = dict_set_dynstrn(dict, "host-uuid", SLEN("host-uuid"), host_uuid); + if (ret) { + GF_FREE(host_uuid); + goto out; + } + + } else { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, + "request dict length is %d", cli_req.dict.dict_len); + goto out; + } + + if (conf->op_version < GD_OP_VERSION_3_6_0) { + snprintf(err_str, sizeof(err_str), + "Cluster operating version" + " is lesser than the supported version " + "for a snapshot"); + op_errno = EG_OPNOTSUP; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UNSUPPORTED_VERSION, + "%s (%d < %d)", err_str, conf->op_version, GD_OP_VERSION_3_6_0); + ret = -1; + goto out; + } + + ret = dict_get_int32n(dict, "type", SLEN("type"), &type); + if (ret < 0) { + snprintf(err_str, sizeof(err_str), "Command type not found"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMAND_NOT_FOUND, "%s", + err_str); + goto out; + } + + if (!glusterd_is_lvm_cmd_available(LVM_CREATE)) { + snprintf(err_str, sizeof(err_str), + "LVM commands not found," + " snapshot functionality is disabled"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMAND_NOT_FOUND, "%s", + err_str); + ret = -1; + goto out; + } + + switch (type) { + case GF_SNAP_OPTION_TYPE_CREATE: + ret = glusterd_handle_snapshot_create(req, cli_op, dict, err_str, + sizeof(err_str)); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_CREATION_FAIL, + "Snapshot create failed: %s", err_str); + } + break; + + case GF_SNAP_OPTION_TYPE_CLONE: + ret = glusterd_handle_snapshot_clone(req, cli_op, dict, err_str, + sizeof(err_str)); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_CLONE_FAILED, + "Snapshot clone " + "failed: %s", + err_str); + } + break; + + case GF_SNAP_OPTION_TYPE_RESTORE: + ret = glusterd_handle_snapshot_restore(req, cli_op, dict, err_str, + &op_errno, sizeof(err_str)); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_RESTORE_FAIL, + "Snapshot restore failed: %s", err_str); + } + + break; + case GF_SNAP_OPTION_TYPE_INFO: + ret = glusterd_handle_snapshot_info(req, cli_op, dict, err_str, + sizeof(err_str)); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_INFO_FAIL, + "Snapshot info failed"); + } + break; + case GF_SNAP_OPTION_TYPE_LIST: + ret = glusterd_handle_snapshot_list(req, cli_op, dict, err_str, + sizeof(err_str), &op_errno); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_LIST_GET_FAIL, + "Snapshot list failed"); + } + break; + case GF_SNAP_OPTION_TYPE_CONFIG: + ret = glusterd_handle_snapshot_config(req, cli_op, dict, err_str, + sizeof(err_str)); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_CONFIG_FAIL, + "snapshot config failed"); + } + break; + case GF_SNAP_OPTION_TYPE_DELETE: + ret = glusterd_handle_snapshot_delete(req, cli_op, dict, err_str, + &op_errno, sizeof(err_str)); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_REMOVE_FAIL, + "Snapshot delete failed: %s", err_str); + } + break; + case GF_SNAP_OPTION_TYPE_ACTIVATE: + ret = glusterd_mgmt_v3_initiate_snap_phases(req, cli_op, dict); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_ACTIVATE_FAIL, + "Snapshot activate failed: %s", err_str); + } + break; + case GF_SNAP_OPTION_TYPE_DEACTIVATE: + ret = glusterd_mgmt_v3_initiate_snap_phases(req, cli_op, dict); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, + GD_MSG_SNAP_DEACTIVATE_FAIL, + "Snapshot deactivate failed: %s", err_str); + } + break; + case GF_SNAP_OPTION_TYPE_STATUS: + ret = glusterd_handle_snapshot_status(req, cli_op, dict, err_str, + sizeof(err_str)); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_STATUS_FAIL, + "Snapshot status failed: %s", err_str); + } + break; + default: + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_COMMAND_NOT_FOUND, + "Unknown snapshot request " + "type (%d)", + type); + ret = -1; /* Failure */ + } + +out: + if (ret) { + if (err_str[0] == '\0') + snprintf(err_str, sizeof(err_str), "Operation failed"); + + if (ret && (op_errno == 0)) + op_errno = EG_INTRNL; + + ret = glusterd_op_send_cli_response(cli_op, ret, op_errno, req, dict, + err_str); + } + + return ret; +} + +int +glusterd_handle_snapshot(rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler(req, glusterd_handle_snapshot_fn); +} + +static void +glusterd_free_snap_op(glusterd_snap_op_t *snap_op) +{ + if (snap_op) { + if (snap_op->brick_path) + GF_FREE(snap_op->brick_path); + + GF_FREE(snap_op); + } +} + +static void +glusterd_free_missed_snapinfo(glusterd_missed_snap_info *missed_snapinfo) +{ + glusterd_snap_op_t *snap_opinfo = NULL; + glusterd_snap_op_t *tmp = NULL; + + if (missed_snapinfo) { + cds_list_for_each_entry_safe(snap_opinfo, tmp, + &missed_snapinfo->snap_ops, snap_ops_list) + { + glusterd_free_snap_op(snap_opinfo); + snap_opinfo = NULL; + } + + if (missed_snapinfo->node_uuid) + GF_FREE(missed_snapinfo->node_uuid); + + if (missed_snapinfo->snap_uuid) + GF_FREE(missed_snapinfo->snap_uuid); + + GF_FREE(missed_snapinfo); + } +} + +/* Look for duplicates and accordingly update the list */ +int32_t +glusterd_update_missed_snap_entry(glusterd_missed_snap_info *missed_snapinfo, + glusterd_snap_op_t *missed_snap_op) +{ + int32_t ret = -1; + glusterd_snap_op_t *snap_opinfo = NULL; + gf_boolean_t match = _gf_false; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(missed_snapinfo); + GF_ASSERT(missed_snap_op); + + cds_list_for_each_entry(snap_opinfo, &missed_snapinfo->snap_ops, + snap_ops_list) + { + /* If the entry is not for the same snap_vol_id + * then continue + */ + if (strcmp(snap_opinfo->snap_vol_id, missed_snap_op->snap_vol_id)) + continue; + + if ((!strcmp(snap_opinfo->brick_path, missed_snap_op->brick_path)) && + (snap_opinfo->op == missed_snap_op->op)) { + /* If two entries have conflicting status + * GD_MISSED_SNAP_DONE takes precedence + */ + if ((snap_opinfo->status == GD_MISSED_SNAP_PENDING) && + (missed_snap_op->status == GD_MISSED_SNAP_DONE)) { + snap_opinfo->status = GD_MISSED_SNAP_DONE; + gf_msg(this->name, GF_LOG_INFO, 0, + GD_MSG_MISSED_SNAP_STATUS_DONE, + "Updating missed snap status " + "for %s:%s=%s:%d:%s:%d as DONE", + missed_snapinfo->node_uuid, missed_snapinfo->snap_uuid, + snap_opinfo->snap_vol_id, snap_opinfo->brick_num, + snap_opinfo->brick_path, snap_opinfo->op); + ret = 0; + glusterd_free_snap_op(missed_snap_op); + goto out; + } + match = _gf_true; + break; + } else if ((snap_opinfo->brick_num == missed_snap_op->brick_num) && + (snap_opinfo->op == GF_SNAP_OPTION_TYPE_CREATE) && + ((missed_snap_op->op == GF_SNAP_OPTION_TYPE_DELETE) || + (missed_snap_op->op == GF_SNAP_OPTION_TYPE_RESTORE))) { + /* Optimizing create and delete entries for the same + * brick and same node + */ + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_MISSED_SNAP_STATUS_DONE, + "Updating missed snap status " + "for %s:%s=%s:%d:%s:%d as DONE", + missed_snapinfo->node_uuid, missed_snapinfo->snap_uuid, + snap_opinfo->snap_vol_id, snap_opinfo->brick_num, + snap_opinfo->brick_path, snap_opinfo->op); + snap_opinfo->status = GD_MISSED_SNAP_DONE; + ret = 0; + glusterd_free_snap_op(missed_snap_op); + goto out; + } + } + + if (match == _gf_true) { + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_DUP_ENTRY, + "Duplicate entry. Not updating"); + glusterd_free_snap_op(missed_snap_op); + } else { + cds_list_add_tail(&missed_snap_op->snap_ops_list, + &missed_snapinfo->snap_ops); + } + + ret = 0; +out: + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +/* Add new missed snap entry to the missed_snaps list. */ +int32_t +glusterd_add_new_entry_to_list(char *missed_info, char *snap_vol_id, + int32_t brick_num, char *brick_path, + int32_t snap_op, int32_t snap_status) +{ + char *buf = NULL; + char *save_ptr = NULL; + char node_snap_info[PATH_MAX] = ""; + int32_t ret = -1; + glusterd_missed_snap_info *missed_snapinfo = NULL; + glusterd_snap_op_t *missed_snap_op = NULL; + glusterd_conf_t *priv = NULL; + gf_boolean_t match = _gf_false; + gf_boolean_t free_missed_snap_info = _gf_false; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(missed_info); + GF_ASSERT(snap_vol_id); + GF_ASSERT(brick_path); + + priv = this->private; + GF_ASSERT(priv); + + /* Create the snap_op object consisting of the * + * snap id and the op */ + ret = glusterd_missed_snap_op_new(&missed_snap_op); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MISSED_SNAP_CREATE_FAIL, + "Failed to create new missed snap object."); + ret = -1; + goto out; + } + + missed_snap_op->snap_vol_id = gf_strdup(snap_vol_id); + if (!missed_snap_op->snap_vol_id) { + ret = -1; + goto out; + } + missed_snap_op->brick_path = gf_strdup(brick_path); + if (!missed_snap_op->brick_path) { + ret = -1; + goto out; + } + missed_snap_op->brick_num = brick_num; + missed_snap_op->op = snap_op; + missed_snap_op->status = snap_status; + + /* Look for other entries for the same node and same snap */ + cds_list_for_each_entry(missed_snapinfo, &priv->missed_snaps_list, + missed_snaps) + { + snprintf(node_snap_info, sizeof(node_snap_info), "%s:%s", + missed_snapinfo->node_uuid, missed_snapinfo->snap_uuid); + if (!strcmp(node_snap_info, missed_info)) { + /* Found missed snapshot info for * + * the same node and same snap */ + match = _gf_true; + break; + } + } + + if (match == _gf_false) { + /* First snap op missed for the brick */ + ret = glusterd_missed_snapinfo_new(&missed_snapinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MISSED_SNAP_CREATE_FAIL, + "Failed to create missed snapinfo"); + goto out; + } + free_missed_snap_info = _gf_true; + buf = strtok_r(missed_info, ":", &save_ptr); + if (!buf) { + ret = -1; + goto out; + } + missed_snapinfo->node_uuid = gf_strdup(buf); + if (!missed_snapinfo->node_uuid) { + ret = -1; + goto out; + } + + buf = strtok_r(NULL, ":", &save_ptr); + if (!buf) { + ret = -1; + goto out; + } + missed_snapinfo->snap_uuid = gf_strdup(buf); + if (!missed_snapinfo->snap_uuid) { + ret = -1; + goto out; + } + + cds_list_add_tail(&missed_snap_op->snap_ops_list, + &missed_snapinfo->snap_ops); + cds_list_add_tail(&missed_snapinfo->missed_snaps, + &priv->missed_snaps_list); + + ret = 0; + goto out; + } else { + ret = glusterd_update_missed_snap_entry(missed_snapinfo, + missed_snap_op); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MISSED_SNAP_CREATE_FAIL, + "Failed to update existing missed snap entry."); + goto out; + } + } + +out: + if (ret) { + glusterd_free_snap_op(missed_snap_op); + + if (missed_snapinfo && (free_missed_snap_info == _gf_true)) + glusterd_free_missed_snapinfo(missed_snapinfo); + } + + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +/* Add missing snap entries to the in-memory conf->missed_snap_list */ +int32_t +glusterd_add_missed_snaps_to_list(dict_t *dict, int32_t missed_snap_count) +{ + char *buf = NULL; + char *tmp = NULL; + char *save_ptr = NULL; + char *nodeid = NULL; + char *snap_uuid = NULL; + char *snap_vol_id = NULL; + char *brick_path = NULL; + char missed_info[PATH_MAX] = ""; + char key[64] = ""; + int keylen; + int32_t i = -1; + int32_t ret = -1; + int32_t brick_num = -1; + int32_t snap_op = -1; + int32_t snap_status = -1; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(dict); + + priv = this->private; + GF_ASSERT(priv); + + /* We can update the missed_snaps_list without acquiring * + * any additional locks as big lock will be held. */ + for (i = 0; i < missed_snap_count; i++) { + keylen = snprintf(key, sizeof(key), "missed_snaps_%d", i); + ret = dict_get_strn(dict, key, keylen, &buf); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to fetch %s", key); + goto out; + } + + gf_msg_debug(this->name, 0, "missed_snap_entry = %s", buf); + + /* Need to make a duplicate string coz the same dictionary * + * is resent to the non-originator nodes */ + tmp = gf_strdup(buf); + if (!tmp) { + ret = -1; + goto out; + } + + /* Fetch the node-id, snap-id, brick_num, + * brick_path, snap_op and snap status + */ + nodeid = strtok_r(tmp, ":", &save_ptr); + snap_uuid = strtok_r(NULL, "=", &save_ptr); + snap_vol_id = strtok_r(NULL, ":", &save_ptr); + brick_num = atoi(strtok_r(NULL, ":", &save_ptr)); + brick_path = strtok_r(NULL, ":", &save_ptr); + snap_op = atoi(strtok_r(NULL, ":", &save_ptr)); + snap_status = atoi(strtok_r(NULL, ":", &save_ptr)); + + if (!nodeid || !snap_uuid || !brick_path || !snap_vol_id || + brick_num < 1 || snap_op < 1 || snap_status < 1) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_INVALID_MISSED_SNAP_ENTRY, + "Invalid missed_snap_entry"); + ret = -1; + goto out; + } + + snprintf(missed_info, sizeof(missed_info), "%s:%s", nodeid, snap_uuid); + + ret = glusterd_add_new_entry_to_list(missed_info, snap_vol_id, + brick_num, brick_path, snap_op, + snap_status); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_MISSED_SNAP_LIST_STORE_FAIL, + "Failed to store missed snaps_list"); + goto out; + } + + GF_FREE(tmp); + tmp = NULL; + } + + ret = 0; +out: + if (tmp) + GF_FREE(tmp); + + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +/* This function will restore origin volume to it's snap. + * The restore operation will simply replace the Gluster origin + * volume with the snap volume. + * TODO: Multi-volume delete to be done. + * Cleanup in case of restore failure is pending. + * + * @param orig_vol volinfo of origin volume + * @param snap_vol volinfo of snapshot volume + * + * @return 0 on success and negative value on error + */ +int +gd_restore_snap_volume(dict_t *dict, dict_t *rsp_dict, + glusterd_volinfo_t *orig_vol, + glusterd_volinfo_t *snap_vol, int32_t volcount) +{ + int ret = -1; + glusterd_volinfo_t *new_volinfo = NULL; + glusterd_snap_t *snap = NULL; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + glusterd_volinfo_t *temp_volinfo = NULL; + glusterd_volinfo_t *voliter = NULL; + gf_boolean_t conf_present = _gf_false; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(dict); + GF_ASSERT(rsp_dict); + conf = this->private; + GF_ASSERT(conf); + + GF_VALIDATE_OR_GOTO(this->name, orig_vol, out); + GF_VALIDATE_OR_GOTO(this->name, snap_vol, out); + snap = snap_vol->snapshot; + GF_VALIDATE_OR_GOTO(this->name, snap, out); + + /* Set the status to under restore so that if the + * the node goes down during restore and comes back + * the state of the volume can be reverted correctly + */ + snap->snap_status = GD_SNAP_STATUS_UNDER_RESTORE; + + /* We need to save this in disk so that if node goes + * down the status is in updated state. + */ + ret = glusterd_store_snap(snap); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED, + "Could not store snap " + "object for %s snap of %s volume", + snap_vol->volname, snap_vol->parent_volname); + goto out; + } + + /* Snap volume must be stopped before performing the + * restore operation. + */ + ret = glusterd_stop_volume(snap_vol); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_STOP_FAILED, + "Failed to stop " + "snap volume"); + goto out; + } + + /* Create a new volinfo for the restored volume */ + ret = glusterd_volinfo_dup(snap_vol, &new_volinfo, _gf_true); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_OP_FAILED, + "Failed to create volinfo"); + goto out; + } + + /* Following entries need to be derived from origin volume. */ + gf_strncpy(new_volinfo->volname, orig_vol->volname, + sizeof(new_volinfo->volname)); + gf_uuid_copy(new_volinfo->volume_id, orig_vol->volume_id); + new_volinfo->snap_count = orig_vol->snap_count; + gf_uuid_copy(new_volinfo->restored_from_snap, snap_vol->snapshot->snap_id); + + /* Use the same version as the original version */ + new_volinfo->version = orig_vol->version; + + /* Copy the snap vol info to the new_volinfo.*/ + ret = glusterd_snap_volinfo_restore(dict, rsp_dict, new_volinfo, snap_vol, + volcount); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_RESTORE_FAIL, + "Failed to restore snap"); + goto out; + } + + /* In case a new node is added to the peer, after a snapshot was + * taken, the geo-rep files are not synced to that node. This + * leads to the failure of snapshot restore. Hence, ignoring the + * missing geo-rep files in the new node, and proceeding with + * snapshot restore. Once the restore is successful, the missing + * geo-rep files can be generated with "gluster volume geo-rep + * <master-vol> <slave-vol> create push-pem force" + */ + ret = glusterd_restore_geo_rep_files(snap_vol); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_RESTORE_FAIL, + "Failed to restore " + "geo-rep files for snap %s", + snap_vol->snapshot->snapname); + } + + /* Need not save cksum, as we will copy cksum file in * + * this function * + */ + ret = glusterd_copy_quota_files(snap_vol, orig_vol, &conf_present); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_RESTORE_FAIL, + "Failed to restore " + "quota files for snap %s", + snap_vol->snapshot->snapname); + goto out; + } + + /* New volinfo always shows the status as created. Therefore + * set the status to the original volume's status. */ + glusterd_set_volume_status(new_volinfo, orig_vol->status); + + cds_list_add_tail(&new_volinfo->vol_list, &conf->volumes); + + ret = glusterd_store_volinfo(new_volinfo, + GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_OP_FAILED, + "Failed to store volinfo"); + goto out; + } + + ret = 0; +out: + if (ret) { + /* In case of any failure we should free new_volinfo. Doing + * this will also remove the entry we added in conf->volumes + * if it was added there. + */ + if (new_volinfo) + (void)glusterd_volinfo_delete(new_volinfo); + } else { + cds_list_for_each_entry_safe(voliter, temp_volinfo, + &orig_vol->snap_volumes, snapvol_list) + { + cds_list_add_tail(&voliter->snapvol_list, + &new_volinfo->snap_volumes); + } + } + + return ret; +} + +int +glusterd_snapshot_get_volnames_uuids(dict_t *dict, char *volname, + gf_getsnap_name_uuid_rsp *snap_info_rsp) +{ + int ret = -1; + int snapcount = 0; + char key[32] = ""; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_volinfo_t *tmp_vol = NULL; + xlator_t *this = NULL; + int op_errno = 0; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(volname); + GF_VALIDATE_OR_GOTO_WITH_ERROR(this->name, dict, out, op_errno, EINVAL); + GF_VALIDATE_OR_GOTO_WITH_ERROR(this->name, volname, out, op_errno, EINVAL); + GF_VALIDATE_OR_GOTO_WITH_ERROR(this->name, snap_info_rsp, out, op_errno, + EINVAL); + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND, + "Failed to get volinfo of volume %s", volname); + op_errno = EINVAL; + goto out; + } + + cds_list_for_each_entry_safe(snap_vol, tmp_vol, &volinfo->snap_volumes, + snapvol_list) + { + if (GLUSTERD_STATUS_STARTED != snap_vol->status) + continue; + + snapcount++; + + /* Set Snap Name */ + snprintf(key, sizeof(key), "snapname.%d", snapcount); + ret = dict_set_dynstr_with_alloc(dict, key, + snap_vol->snapshot->snapname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set " + "snap name in dictionary"); + goto out; + } + + /* Set Snap ID */ + snprintf(key, sizeof(key), "snap-id.%d", snapcount); + ret = dict_set_dynstr_with_alloc( + dict, key, uuid_utoa(snap_vol->snapshot->snap_id)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set " + "snap id in dictionary"); + goto out; + } + + /* Snap Volname which is used to activate the snap vol */ + snprintf(key, sizeof(key), "snap-volname.%d", snapcount); + ret = dict_set_dynstr_with_alloc(dict, key, snap_vol->volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set " + "snap id in dictionary"); + goto out; + } + } + + ret = dict_set_int32n(dict, "snap-count", SLEN("snap-count"), snapcount); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set snapcount"); + op_errno = -ret; + goto out; + } + + ret = dict_allocate_and_serialize(dict, &snap_info_rsp->dict.dict_val, + &snap_info_rsp->dict.dict_len); + if (ret) { + op_errno = -ret; + ret = -1; + goto out; + } + + ret = 0; + +out: + snap_info_rsp->op_ret = ret; + snap_info_rsp->op_errno = op_errno; + snap_info_rsp->op_errstr = ""; + + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-statedump.c b/xlators/mgmt/glusterd/src/glusterd-statedump.c new file mode 100644 index 00000000000..225d10cc546 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-statedump.c @@ -0,0 +1,243 @@ +/* + Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include <glusterfs/statedump.h> +#include "glusterd.h" +#include "glusterd-shd-svc.h" +#include "glusterd-quotad-svc.h" +#include "glusterd-locks.h" +#include "glusterd-messages.h" + +static void +glusterd_dump_peer(glusterd_peerinfo_t *peerinfo, char *input_key, int index, + gf_boolean_t xpeers) +{ + char subkey[GF_DUMP_MAX_BUF_LEN + 11] = ""; + char key[GF_DUMP_MAX_BUF_LEN] = ""; + + strncpy(key, input_key, sizeof(key) - 1); + + snprintf(subkey, sizeof(subkey), "%s%d", key, index); + + gf_proc_dump_build_key(key, subkey, "uuid"); + gf_proc_dump_write(key, "%s", uuid_utoa(peerinfo->uuid)); + + gf_proc_dump_build_key(key, subkey, "hostname"); + gf_proc_dump_write(key, "%s", peerinfo->hostname); + + gf_proc_dump_build_key(key, subkey, "port"); + gf_proc_dump_write(key, "%d", peerinfo->port); + + gf_proc_dump_build_key(key, subkey, "state"); + gf_proc_dump_write(key, "%d", peerinfo->state.state); + + gf_proc_dump_build_key(key, subkey, "quorum-action"); + gf_proc_dump_write(key, "%d", peerinfo->quorum_action); + + gf_proc_dump_build_key(key, subkey, "quorum-contrib"); + gf_proc_dump_write(key, "%d", peerinfo->quorum_contrib); + + gf_proc_dump_build_key(key, subkey, "detaching"); + gf_proc_dump_write(key, "%d", peerinfo->detaching); + + gf_proc_dump_build_key(key, subkey, "locked"); + gf_proc_dump_write(key, "%d", peerinfo->locked); +} + +static void +glusterd_dump_peer_rpcstat(glusterd_peerinfo_t *peerinfo, char *input_key, + int index) +{ + rpc_clnt_connection_t *conn = NULL; + int ret = -1; + rpc_clnt_t *rpc = NULL; + char rpcsvc_peername[RPCSVC_PEER_STRLEN] = ""; + char subkey[GF_DUMP_MAX_BUF_LEN + 11] = ""; + char key[GF_DUMP_MAX_BUF_LEN] = ""; + + strncpy(key, input_key, sizeof(key) - 1); + + /* Dump the rpc connection statistics */ + rpc = peerinfo->rpc; + if (rpc) { + conn = &rpc->conn; + snprintf(subkey, sizeof(subkey), "%s%d", key, index); + ret = rpcsvc_transport_peername(conn->trans, (char *)&rpcsvc_peername, + sizeof(rpcsvc_peername)); + if (!ret) { + gf_proc_dump_build_key(key, subkey, "rpc.peername"); + gf_proc_dump_write(key, "%s", rpcsvc_peername); + } + gf_proc_dump_build_key(key, subkey, "rpc.connected"); + gf_proc_dump_write(key, "%d", conn->connected); + + gf_proc_dump_build_key(key, subkey, "rpc.total-bytes-read"); + gf_proc_dump_write(key, "%" PRIu64, conn->trans->total_bytes_read); + + gf_proc_dump_build_key(key, subkey, "rpc.total-bytes-written"); + gf_proc_dump_write(key, "%" PRIu64, conn->trans->total_bytes_write); + + gf_proc_dump_build_key(key, subkey, "rpc.ping_msgs_sent"); + gf_proc_dump_write(key, "%" PRIu64, conn->pingcnt); + + gf_proc_dump_build_key(key, subkey, "rpc.msgs_sent"); + gf_proc_dump_write(key, "%" PRIu64, conn->msgcnt); + } +} + +static void +glusterd_dump_client_details(glusterd_conf_t *conf) +{ + rpc_transport_t *xprt = NULL; + char key[GF_DUMP_MAX_BUF_LEN] = ""; + char subkey[50] = ""; + int index = 1; + + pthread_mutex_lock(&conf->xprt_lock); + { + list_for_each_entry(xprt, &conf->xprt_list, list) + { + snprintf(subkey, sizeof(subkey), "glusterd.client%d", index); + + gf_proc_dump_build_key(key, subkey, "identifier"); + gf_proc_dump_write(key, "%s", xprt->peerinfo.identifier); + + gf_proc_dump_build_key(key, subkey, "volname"); + gf_proc_dump_write(key, "%s", xprt->peerinfo.volname); + + gf_proc_dump_build_key(key, subkey, "max-op-version"); + gf_proc_dump_write(key, "%u", xprt->peerinfo.max_op_version); + + gf_proc_dump_build_key(key, subkey, "min-op-version"); + gf_proc_dump_write(key, "%u", xprt->peerinfo.min_op_version); + index++; + } + } + pthread_mutex_unlock(&conf->xprt_lock); +} + +/* The following function is just for dumping mgmt_v3_lock dictionary, any other + * dict passed to this API will not work */ + +static void +glusterd_dict_mgmt_v3_lock_statedump(dict_t *dict) +{ + int ret = 0; + int dumplen = 0; + data_pair_t *trav = NULL; + char key[GF_DUMP_MAX_BUF_LEN] = ""; + char dump[64 * 1024] = ""; + + if (!dict) { + gf_msg_callingfn("glusterd", GF_LOG_WARNING, EINVAL, GD_MSG_DICT_EMPTY, + "dict NULL"); + goto out; + } + for (trav = dict->members_list; trav; trav = trav->next) { + if (strstr(trav->key, "debug.last-success-bt") != NULL) { + ret = snprintf(&dump[dumplen], sizeof(dump) - dumplen, "\n\t%s:%s", + trav->key, trav->value->data); + } else { + ret = snprintf( + &dump[dumplen], sizeof(dump) - dumplen, "\n\t%s:%s", trav->key, + uuid_utoa(((glusterd_mgmt_v3_lock_obj *)(trav->value->data)) + ->lock_owner)); + } + if ((ret == -1) || !ret) + return; + dumplen += ret; + } + + if (dumplen) { + gf_proc_dump_build_key(key, "glusterd", "mgmt_v3_lock"); + gf_proc_dump_write(key, "%s", dump); + } + +out: + return; +} + +int +glusterd_dump_priv(xlator_t *this) +{ + glusterd_conf_t *priv = NULL; + char key[GF_DUMP_MAX_BUF_LEN] = ""; + int port = 0; + struct pmap_registry *pmap = NULL; + + GF_VALIDATE_OR_GOTO("glusterd", this, out); + + priv = this->private; + if (!priv) + return 0; + + gf_proc_dump_build_key(key, "xlator.glusterd", "priv"); + gf_proc_dump_add_section("%s", key); + + pthread_mutex_lock(&priv->mutex); + { + gf_proc_dump_build_key(key, "glusterd", "my-uuid"); + gf_proc_dump_write(key, "%s", uuid_utoa(priv->uuid)); + + gf_proc_dump_build_key(key, "glusterd", "working-directory"); + gf_proc_dump_write(key, "%s", priv->workdir); + + gf_proc_dump_build_key(key, "glusterd", "max-op-version"); + gf_proc_dump_write(key, "%d", GD_OP_VERSION_MAX); + + gf_proc_dump_build_key(key, "glusterd", "min-op-version"); + gf_proc_dump_write(key, "%d", GD_OP_VERSION_MIN); + + gf_proc_dump_build_key(key, "glusterd", "current-op-version"); + gf_proc_dump_write(key, "%d", priv->op_version); + + gf_proc_dump_build_key(key, "glusterd", "ping-timeout"); + gf_proc_dump_write(key, "%d", priv->ping_timeout); +#ifdef BUILD_GNFS + gf_proc_dump_build_key(key, "glusterd", "nfs.online"); + gf_proc_dump_write(key, "%d", priv->nfs_svc.online); +#endif + gf_proc_dump_build_key(key, "glusterd", "quotad.online"); + gf_proc_dump_write(key, "%d", priv->quotad_svc.online); + + gf_proc_dump_build_key(key, "glusterd", "bitd.online"); + gf_proc_dump_write(key, "%d", priv->bitd_svc.online); + + gf_proc_dump_build_key(key, "glusterd", "scrub.online"); + gf_proc_dump_write(key, "%d", priv->scrub_svc.online); + + /* Dump peer details */ + GLUSTERD_DUMP_PEERS(&priv->peers, uuid_list, _gf_false); + + /* Dump pmap data structure from base port to last alloc */ + pmap = priv->pmap; + if (pmap) { + for (port = pmap->base_port; port <= pmap->last_alloc; port++) { + gf_proc_dump_build_key(key, "glusterd", "pmap_port"); + gf_proc_dump_write(key, "%d", port); + gf_proc_dump_build_key(key, "glusterd", "pmap[%d].type", port); + gf_proc_dump_write(key, "%d", pmap->ports[port].type); + gf_proc_dump_build_key(key, "glusterd", "pmap[%d].brickname", + port); + gf_proc_dump_write(key, "%s", pmap->ports[port].brickname); + } + } + /* Dump client details */ + glusterd_dump_client_details(priv); + + /* Dump mgmt_v3_lock from the dictionary if any */ + glusterd_dict_mgmt_v3_lock_statedump(priv->mgmt_v3_lock); + dict_dump_to_statedump(priv->opts, "options", "glusterd"); + } + pthread_mutex_unlock(&priv->mutex); + +out: + return 0; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-statedump.h b/xlators/mgmt/glusterd/src/glusterd-statedump.h new file mode 100644 index 00000000000..b5ef1f48e82 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-statedump.h @@ -0,0 +1,18 @@ +/* + Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _GLUSTERD_STATEDUMP_H_ +#define _GLUSTERD_STATEDUMP_H_ + +#include <glusterfs/xlator.h> + +int +glusterd_dump_priv(xlator_t *this); +#endif diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c index 8058adcd0f9..d94dceb10b7 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.c +++ b/xlators/mgmt/glusterd/src/glusterd-store.c @@ -1,1985 +1,5125 @@ /* - Copyright (c) 2007-2010 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + Copyright (c) 2007-2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif #include "glusterd-op-sm.h" #include <inttypes.h> - -#include "globals.h" -#include "glusterfs.h" -#include "compat.h" -#include "dict.h" +#include <glusterfs/glusterfs.h> +#include <glusterfs/compat.h> +#include <glusterfs/dict.h> #include "protocol-common.h" -#include "xlator.h" -#include "logging.h" -#include "timer.h" -#include "defaults.h" -#include "compat.h" -#include "compat-errno.h" -#include "statedump.h" +#include <glusterfs/xlator.h> +#include <glusterfs/logging.h> +#include <glusterfs/timer.h> +#include <glusterfs/syscall.h> +#include <glusterfs/defaults.h> +#include <glusterfs/compat.h> +#include <glusterfs/compat-errno.h> +#include <glusterfs/statedump.h> #include "glusterd-mem-types.h" #include "glusterd.h" #include "glusterd-sm.h" #include "glusterd-op-sm.h" #include "glusterd-utils.h" +#include "glusterd-hooks.h" +#include <glusterfs/store.h> #include "glusterd-store.h" +#include "glusterd-snapshot-utils.h" +#include "glusterd-messages.h" -#include "glusterd1.h" -#include "cli1.h" #include "rpc-clnt.h" -#include "common-utils.h" +#include <glusterfs/common-utils.h> +#include <glusterfs/quota-common-utils.h> #include <sys/resource.h> #include <inttypes.h> #include <dirent.h> -static int32_t -glusterd_store_mkdir (char *path) -{ - int32_t ret = -1; - - ret = mkdir (path, 0777); - - if ((-1 == ret) && (EEXIST != errno)) { - gf_log ("", GF_LOG_ERROR, "mkdir() failed on path %s," - "errno: %s", path, strerror (errno)); - } else { - ret = 0; - } +#if defined(GF_LINUX_HOST_OS) +#include <mntent.h> +#else +#include "mntent_compat.h" +#endif - return ret; -} +#define GLUSTERD_GET_BRICK_DIR(path, volinfo, priv) \ + do { \ + int32_t _brick_len; \ + if (volinfo->is_snap_volume) { \ + _brick_len = snprintf(path, PATH_MAX, "%s/snaps/%s/%s/%s", \ + priv->workdir, volinfo->snapshot->snapname, \ + volinfo->volname, GLUSTERD_BRICK_INFO_DIR); \ + } else { \ + _brick_len = snprintf(path, PATH_MAX, "%s/%s/%s/%s", \ + priv->workdir, GLUSTERD_VOLUME_DIR_PREFIX, \ + volinfo->volname, GLUSTERD_BRICK_INFO_DIR); \ + } \ + if ((_brick_len < 0) || (_brick_len >= PATH_MAX)) { \ + path[0] = 0; \ + } \ + } while (0) -int32_t -glusterd_store_handle_create_on_absence (glusterd_store_handle_t **shandle, - char *path) +void +glusterd_replace_slash_with_hyphen(char *str) { - GF_ASSERT (shandle); - int32_t ret = 0; + char *ptr = NULL; - if (*shandle == NULL) { - ret = glusterd_store_handle_new (path, shandle); + ptr = strchr(str, '/'); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to create store" - " handle for path: %s", path); - } - } - return ret; + while (ptr) { + *ptr = '-'; + ptr = strchr(ptr, '/'); + } } int32_t -glusterd_store_mkstemp (glusterd_store_handle_t *shandle) +glusterd_store_create_brick_dir(glusterd_volinfo_t *volinfo) { - int fd = -1; - char tmppath[PATH_MAX] = {0,}; + int32_t ret = -1; + char brickdirpath[PATH_MAX] = { + 0, + }; + glusterd_conf_t *priv = NULL; - GF_ASSERT (shandle); - GF_ASSERT (shandle->path); + GF_ASSERT(volinfo); - snprintf (tmppath, sizeof (tmppath), "%s.tmp", shandle->path); - fd = open (tmppath, O_RDWR | O_CREAT | O_TRUNC, 0644); - if (fd <= 0) { - gf_log ("glusterd", GF_LOG_ERROR, "Failed to open %s, " - "error: %s", tmppath, strerror (errno)); - } + priv = THIS->private; + GF_ASSERT(priv); - return fd; + GLUSTERD_GET_BRICK_DIR(brickdirpath, volinfo, priv); + ret = gf_store_mkdir(brickdirpath); + + return ret; } -int32_t -glusterd_store_rename_tmppath (glusterd_store_handle_t *shandle) +static void +glusterd_store_key_vol_brick_set(glusterd_brickinfo_t *brickinfo, + char *key_vol_brick, size_t len) { - int32_t ret = -1; - char tmppath[PATH_MAX] = {0,}; - - GF_ASSERT (shandle); - GF_ASSERT (shandle->path); - - snprintf (tmppath, sizeof (tmppath), "%s.tmp", shandle->path); - ret = rename (tmppath, shandle->path); - if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, "Failed to mv %s to %s, " - "error: %s", tmppath, shandle->path, strerror (errno)); - } + GF_ASSERT(brickinfo); + GF_ASSERT(key_vol_brick); + GF_ASSERT(len >= PATH_MAX); - return ret; + snprintf(key_vol_brick, len, "%s", brickinfo->path); + glusterd_replace_slash_with_hyphen(key_vol_brick); } -int32_t -glusterd_store_unlink_tmppath (glusterd_store_handle_t *shandle) +static void +glusterd_store_brickinfofname_set(glusterd_brickinfo_t *brickinfo, + char *brickfname, size_t len) { - int32_t ret = -1; - char tmppath[PATH_MAX] = {0,}; + char key_vol_brick[PATH_MAX] = {0}; - GF_ASSERT (shandle); - GF_ASSERT (shandle->path); + GF_ASSERT(brickfname); + GF_ASSERT(brickinfo); + GF_ASSERT(len >= PATH_MAX); - snprintf (tmppath, sizeof (tmppath), "%s.tmp", shandle->path); - ret = unlink (tmppath); - if (ret && (errno != ENOENT)) { - gf_log ("glusterd", GF_LOG_ERROR, "Failed to mv %s to %s, " - "error: %s", tmppath, shandle->path, strerror (errno)); - } else { - ret = 0; - } + glusterd_store_key_vol_brick_set(brickinfo, key_vol_brick, + sizeof(key_vol_brick)); + snprintf(brickfname, len, "%s:%s", brickinfo->hostname, key_vol_brick); +} - return ret; +static void +glusterd_store_brickinfopath_set(glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo, + char *brickpath, size_t len) +{ + char brickfname[PATH_MAX] = {0}; + char brickdirpath[PATH_MAX] = { + 0, + }; + glusterd_conf_t *priv = NULL; + + GF_ASSERT(brickpath); + GF_ASSERT(brickinfo); + GF_ASSERT(len >= PATH_MAX); + + priv = THIS->private; + GF_ASSERT(priv); + + GLUSTERD_GET_BRICK_DIR(brickdirpath, volinfo, priv); + glusterd_store_brickinfofname_set(brickinfo, brickfname, + sizeof(brickfname)); + snprintf(brickpath, len, "%s/%s", brickdirpath, brickfname); } static void -glusterd_replace_slash_with_hipen (char *str) +glusterd_store_snapd_path_set(glusterd_volinfo_t *volinfo, char *snapd_path, + size_t len) { - char *ptr = NULL; + char volpath[PATH_MAX] = { + 0, + }; + glusterd_conf_t *priv = NULL; - ptr = strchr (str, '/'); + GF_ASSERT(volinfo); + GF_ASSERT(len >= PATH_MAX); - while (ptr) { - *ptr = '-'; - ptr = strchr (str, '/'); - } + priv = THIS->private; + GF_ASSERT(priv); + + GLUSTERD_GET_VOLUME_DIR(volpath, volinfo, priv); + + snprintf(snapd_path, len, "%s/snapd.info", volpath); } -int32_t -glusterd_store_create_brick_dir (glusterd_volinfo_t *volinfo) +gf_boolean_t +glusterd_store_is_valid_brickpath(char *volname, char *brick) { - int32_t ret = -1; - char brickdirpath[PATH_MAX] = {0,}; - glusterd_conf_t *priv = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_volinfo_t *volinfo = NULL; + int32_t ret = 0; + size_t volname_len = strlen(volname); + xlator_t *this = NULL; + int bpath_len = 0; + const char delim[2] = "/"; + char *sub_dir = NULL; + char *saveptr = NULL; + char *brickpath_ptr = NULL; + + this = THIS; + GF_ASSERT(this); + + ret = glusterd_brickinfo_new_from_brick(brick, &brickinfo, _gf_false, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_BRICK_CREATION_FAIL, + "Failed to create brick " + "info for brick %s", + brick); + ret = 0; + goto out; + } + ret = glusterd_volinfo_new(&volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOLFILE_CREATE_FAIL, + "Failed to create volinfo"); + ret = 0; + goto out; + } + if (volname_len >= sizeof(volinfo->volname)) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_NAME_TOO_LONG, + "volume name too long"); + ret = 0; + goto out; + } + memcpy(volinfo->volname, volname, volname_len + 1); + + /* Check whether brickpath is less than PATH_MAX */ + ret = 1; + bpath_len = strlen(brickinfo->path); + + if (brickinfo->path[bpath_len - 1] != '/') { + if (bpath_len >= PATH_MAX) { + ret = 0; + goto out; + } + } else { + /* Path has a trailing "/" which should not be considered in + * length check validation + */ + if (bpath_len >= PATH_MAX + 1) { + ret = 0; + goto out; + } + } - GF_ASSERT (volinfo); + /* The following validation checks whether each sub directories in the + * brick path meets the POSIX max length validation + */ - priv = THIS->private; - GF_ASSERT (priv); + brickpath_ptr = brickinfo->path; + sub_dir = strtok_r(brickpath_ptr, delim, &saveptr); - GLUSTERD_GET_BRICK_DIR (brickdirpath, volinfo, priv); - ret = glusterd_store_mkdir (brickdirpath); + while (sub_dir != NULL) { + if (strlen(sub_dir) >= _POSIX_PATH_MAX) { + ret = 0; + goto out; + } + sub_dir = strtok_r(NULL, delim, &saveptr); + } - return ret; +out: + if (brickinfo) + glusterd_brickinfo_delete(brickinfo); + if (volinfo) + glusterd_volinfo_unref(volinfo); + + return ret; } -static void -glusterd_store_key_vol_brick_set (glusterd_brickinfo_t *brickinfo, - char *key_vol_brick, size_t len) +int32_t +glusterd_store_volinfo_brick_fname_write(int vol_fd, + glusterd_brickinfo_t *brickinfo, + int32_t brick_count, + int is_thin_arbiter) { - GF_ASSERT (brickinfo); - GF_ASSERT (key_vol_brick); - GF_ASSERT (len >= PATH_MAX); + char key[64] = { + 0, + }; + char brickfname[PATH_MAX] = { + 0, + }; + int32_t ret = -1; + + if (!is_thin_arbiter) { + snprintf(key, sizeof(key), "%s-%d", GLUSTERD_STORE_KEY_VOL_BRICK, + brick_count); + } else { + snprintf(key, sizeof(key), "%s-%d", GLUSTERD_STORE_KEY_VOL_TA_BRICK, + brick_count); + } + glusterd_store_brickinfofname_set(brickinfo, brickfname, + sizeof(brickfname)); + ret = gf_store_save_value(vol_fd, key, brickfname); + return ret; +} - snprintf (key_vol_brick, len, "%s", brickinfo->path); - glusterd_replace_slash_with_hipen (key_vol_brick); +int32_t +glusterd_store_create_brick_shandle_on_absence(glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo) +{ + char brickpath[PATH_MAX] = { + 0, + }; + int32_t ret = 0; + + GF_ASSERT(volinfo); + GF_ASSERT(brickinfo); + + glusterd_store_brickinfopath_set(volinfo, brickinfo, brickpath, + sizeof(brickpath)); + ret = gf_store_handle_create_on_absence(&brickinfo->shandle, brickpath); + return ret; } -static void -glusterd_store_brickinfofname_set (glusterd_brickinfo_t *brickinfo, - char *brickfname, size_t len) +int32_t +glusterd_store_create_snapd_shandle_on_absence(glusterd_volinfo_t *volinfo) { - char key_vol_brick[PATH_MAX] = {0}; + char snapd_path[PATH_MAX] = { + 0, + }; + int32_t ret = 0; - GF_ASSERT (brickfname); - GF_ASSERT (brickinfo); - GF_ASSERT (len >= PATH_MAX); + GF_ASSERT(volinfo); - glusterd_store_key_vol_brick_set (brickinfo, key_vol_brick, - sizeof (key_vol_brick)); - snprintf (brickfname, len, "%s:%s", brickinfo->hostname, key_vol_brick); + glusterd_store_snapd_path_set(volinfo, snapd_path, sizeof(snapd_path)); + ret = gf_store_handle_create_on_absence(&volinfo->snapd.handle, snapd_path); + return ret; } -static void -glusterd_store_brickinfopath_set (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo, - char *brickpath, size_t len) +/* Store the bricks snapshot details only if required + * + * The snapshot details will be stored only if the cluster op-version is + * greater than or equal to 4 + */ +static int +gd_store_brick_snap_details_write(int fd, glusterd_brickinfo_t *brickinfo) { - char brickfname[PATH_MAX] = {0}; - char brickdirpath[PATH_MAX] = {0,}; - glusterd_conf_t *priv = NULL; + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + char value[5 * PATH_MAX]; + uint total_len = 0; - GF_ASSERT (brickpath); - GF_ASSERT (brickinfo); - GF_ASSERT (len >= PATH_MAX); + this = THIS; + GF_ASSERT(this != NULL); + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, (conf != NULL), out); - priv = THIS->private; - GF_ASSERT (priv); + GF_VALIDATE_OR_GOTO(this->name, (fd > 0), out); + GF_VALIDATE_OR_GOTO(this->name, (brickinfo != NULL), out); - GLUSTERD_GET_BRICK_DIR (brickdirpath, volinfo, priv); - glusterd_store_brickinfofname_set (brickinfo, brickfname, - sizeof (brickfname)); - snprintf (brickpath, len, "%s/%s", brickdirpath, brickfname); + if (conf->op_version < GD_OP_VERSION_3_6_0) { + ret = 0; + goto out; + } + + if (brickinfo->device_path[0] != '\0') { + ret = snprintf(value + total_len, sizeof(value) - total_len, "%s=%s\n", + GLUSTERD_STORE_KEY_BRICK_DEVICE_PATH, + brickinfo->device_path); + if (ret < 0 || ret >= sizeof(value) - total_len) { + ret = -1; + goto err; + } + total_len += ret; + } + + if (brickinfo->mount_dir[0] != '\0') { + ret = snprintf(value + total_len, sizeof(value) - total_len, "%s=%s\n", + GLUSTERD_STORE_KEY_BRICK_MOUNT_DIR, + brickinfo->mount_dir); + if (ret < 0 || ret >= sizeof(value) - total_len) { + ret = -1; + goto err; + } + total_len += ret; + } + + if (brickinfo->fstype[0] != '\0') { + ret = snprintf(value + total_len, sizeof(value) - total_len, "%s=%s\n", + GLUSTERD_STORE_KEY_BRICK_FSTYPE, brickinfo->fstype); + if (ret < 0 || ret >= sizeof(value) - total_len) { + ret = -1; + goto err; + } + total_len += ret; + } + + if (brickinfo->mnt_opts[0] != '\0') { + ret = snprintf(value + total_len, sizeof(value) - total_len, "%s=%s\n", + GLUSTERD_STORE_KEY_BRICK_MNTOPTS, brickinfo->mnt_opts); + if (ret < 0 || ret >= sizeof(value) - total_len) { + ret = -1; + goto err; + } + total_len += ret; + } + + ret = snprintf(value + total_len, sizeof(value) - total_len, "%s=%d\n", + GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS, + brickinfo->snap_status); + if (ret < 0 || ret >= sizeof(value) - total_len) { + ret = -1; + goto err; + } + total_len += ret; + + ret = snprintf(value + total_len, sizeof(value) - total_len, + "%s=%" PRIu64 "\n", GLUSTERD_STORE_KEY_BRICK_FSID, + brickinfo->statfs_fsid); + if (ret < 0 || ret >= sizeof(value) - total_len) { + ret = -1; + goto err; + } + + ret = gf_store_save_items(fd, value); +err: + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FS_LABEL_UPDATE_FAIL, + "Failed to save " + "snap detils of brick %s", + brickinfo->path); + } +out: + return ret; } -int32_t -glusterd_store_volinfo_brick_fname_write (int vol_fd, - glusterd_brickinfo_t *brickinfo, - int32_t brick_count) +static int32_t +glusterd_store_brickinfo_write(int fd, glusterd_brickinfo_t *brickinfo) { - char key[PATH_MAX] = {0,}; - char brickfname[PATH_MAX] = {0,}; - int32_t ret = -1; - - snprintf (key, sizeof (key), "%s-%d", GLUSTERD_STORE_KEY_VOL_BRICK, - brick_count); - glusterd_store_brickinfofname_set (brickinfo, brickfname, - sizeof (brickfname)); - ret = glusterd_store_save_value (vol_fd, key, brickfname); - return ret; + char value[5 * PATH_MAX]; + int32_t ret = -1; + + GF_ASSERT(brickinfo); + GF_ASSERT(fd > 0); + + ret = snprintf(value, sizeof(value), + "%s=%s\n%s=%s\n%s=%s\n%s=%s\n%s=%d\n%s=%d\n%s=%d\n%s=%s\n", + GLUSTERD_STORE_KEY_BRICK_UUID, uuid_utoa(brickinfo->uuid), + GLUSTERD_STORE_KEY_BRICK_HOSTNAME, brickinfo->hostname, + GLUSTERD_STORE_KEY_BRICK_PATH, brickinfo->path, + GLUSTERD_STORE_KEY_BRICK_REAL_PATH, brickinfo->path, + GLUSTERD_STORE_KEY_BRICK_PORT, brickinfo->port, + GLUSTERD_STORE_KEY_BRICK_RDMA_PORT, brickinfo->rdma_port, + GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED, + brickinfo->decommissioned, GLUSTERD_STORE_KEY_BRICK_ID, + brickinfo->brick_id); + + if (ret < 0 || ret >= sizeof(value)) { + ret = -1; + goto out; + } + + ret = gf_store_save_items(fd, value); + if (ret) + goto out; + + ret = gd_store_brick_snap_details_write(fd, brickinfo); + if (ret) + goto out; + + if (!brickinfo->vg[0]) + goto out; + + ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_BRICK_VGNAME, + brickinfo->vg); +out: + gf_msg_debug(THIS->name, 0, "Returning %d", ret); + return ret; } int32_t -glusterd_store_create_brick_shandle_on_absence (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo) +glusterd_store_snapd_write(int fd, glusterd_volinfo_t *volinfo) { - char brickpath[PATH_MAX] = {0,}; - int32_t ret = 0; + char value[64] = { + 0, + }; + int32_t ret = 0; + xlator_t *this = NULL; + + GF_ASSERT(volinfo); + GF_ASSERT(fd > 0); + + this = THIS; + GF_ASSERT(this); + + snprintf(value, sizeof(value), "%d", volinfo->snapd.port); + ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_SNAPD_PORT, value); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPD_PORT_STORE_FAIL, + "failed to store the snapd " + "port of volume %s", + volinfo->volname); + + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; +} - GF_ASSERT (volinfo); - GF_ASSERT (brickinfo); +static int32_t +glusterd_store_perform_brick_store(glusterd_brickinfo_t *brickinfo) +{ + int fd = -1; + int32_t ret = -1; + GF_ASSERT(brickinfo); + + fd = gf_store_mkstemp(brickinfo->shandle); + if (fd <= 0) { + ret = -1; + goto out; + } + ret = glusterd_store_brickinfo_write(fd, brickinfo); + if (ret) + goto out; - glusterd_store_brickinfopath_set (volinfo, brickinfo, brickpath, - sizeof (brickpath)); - ret = glusterd_store_handle_create_on_absence (&brickinfo->shandle, - brickpath); - return ret; +out: + if (ret && (fd > 0)) { + gf_store_unlink_tmppath(brickinfo->shandle); + } + gf_msg_debug(THIS->name, 0, "Returning %d", ret); + return ret; } int32_t -glusterd_store_brickinfo_write (int fd, glusterd_brickinfo_t *brickinfo) +glusterd_store_perform_snapd_store(glusterd_volinfo_t *volinfo) { - char value[256] = {0,}; - int32_t ret = 0; + int fd = -1; + int32_t ret = -1; + xlator_t *this = NULL; + + GF_ASSERT(volinfo); + + this = THIS; + GF_ASSERT(this); + + fd = gf_store_mkstemp(volinfo->snapd.handle); + if (fd <= 0) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "failed to create the " + "temporary file for the snapd store handle of volume " + "%s", + volinfo->volname); + goto out; + } + + ret = glusterd_store_snapd_write(fd, volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPD_PORT_STORE_FAIL, + "failed to write snapd port " + "info to store handle (volume: %s", + volinfo->volname); + goto out; + } + + ret = gf_store_rename_tmppath(volinfo->snapd.handle); - GF_ASSERT (brickinfo); - GF_ASSERT (fd > 0); +out: + if (ret && (fd > 0)) + gf_store_unlink_tmppath(volinfo->snapd.handle); + gf_msg_debug(THIS->name, 0, "Returning %d", ret); + return ret; +} - ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_HOSTNAME, - brickinfo->hostname); - if (ret) - goto out; +static int32_t +glusterd_store_brickinfo(glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo, int32_t brick_count, + int vol_fd, int is_thin_arbiter) +{ + int32_t ret = -1; - ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_PATH, - brickinfo->path); - if (ret) - goto out; + GF_ASSERT(volinfo); + GF_ASSERT(brickinfo); + + ret = glusterd_store_volinfo_brick_fname_write( + vol_fd, brickinfo, brick_count, is_thin_arbiter); + if (ret) + goto out; - snprintf (value, sizeof(value), "%d", brickinfo->port); - ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_PORT, - value); + ret = glusterd_store_create_brick_shandle_on_absence(volinfo, brickinfo); + if (ret) + goto out; + ret = glusterd_store_perform_brick_store(brickinfo); out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + gf_msg_debug(THIS->name, 0, "Returning with %d", ret); + return ret; } int32_t -glusterd_store_perform_brick_store (glusterd_brickinfo_t *brickinfo) +glusterd_store_snapd_info(glusterd_volinfo_t *volinfo) { - int fd = -1; - int32_t ret = -1; - GF_ASSERT (brickinfo); - - fd = glusterd_store_mkstemp (brickinfo->shandle); - if (fd <= 0) { - ret = -1; - goto out; - } - - ret = glusterd_store_brickinfo_write (fd, brickinfo); - if (ret) - goto out; + int32_t ret = -1; + xlator_t *this = NULL; + + GF_ASSERT(volinfo); + + this = THIS; + GF_ASSERT(this); + + ret = glusterd_store_create_snapd_shandle_on_absence(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_HANDLE_CREATE_FAIL, + "failed to create store " + "handle for snapd (volume: %s)", + volinfo->volname); + goto out; + } + + ret = glusterd_store_perform_snapd_store(volinfo); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPD_INFO_STORE_FAIL, + "failed to store snapd info " + "of the volume %s", + volinfo->volname); - ret = glusterd_store_rename_tmppath (brickinfo->shandle); out: - if (ret && (fd > 0)) - glusterd_store_unlink_tmppath (brickinfo->shandle); - if (fd > 0) - close (fd); - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + if (ret) + gf_store_unlink_tmppath(volinfo->snapd.handle); + + gf_msg_debug(this->name, 0, "Returning with %d", ret); + return ret; } int32_t -glusterd_store_brickinfo (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo, int32_t brick_count, - int vol_fd) +glusterd_store_delete_brick(glusterd_brickinfo_t *brickinfo, char *delete_path) { - int32_t ret = -1; + int32_t ret = -1; + glusterd_conf_t *priv = NULL; + char brickpath[PATH_MAX] = { + 0, + }; + char *ptr = NULL; + char *tmppath = NULL; + xlator_t *this = NULL; - GF_ASSERT (volinfo); - GF_ASSERT (brickinfo); + this = THIS; + GF_ASSERT(this); + GF_ASSERT(brickinfo); - ret = glusterd_store_volinfo_brick_fname_write (vol_fd, brickinfo, - brick_count); - if (ret) - goto out; - - ret = glusterd_store_create_brick_dir (volinfo); - if (ret) - goto out; + priv = this->private; + GF_ASSERT(priv); - ret = glusterd_store_create_brick_shandle_on_absence (volinfo, - brickinfo); - if (ret) - goto out; + tmppath = gf_strdup(brickinfo->path); - ret = glusterd_store_perform_brick_store (brickinfo); -out: - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); - return ret; -} + ptr = strchr(tmppath, '/'); -int32_t -glusterd_store_delete_brick (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo) -{ - int32_t ret = -1; - glusterd_conf_t *priv = NULL; - char path[PATH_MAX] = {0,}; - char brickpath[PATH_MAX] = {0,}; - char *ptr = NULL; - char *tmppath = NULL; + while (ptr) { + *ptr = '-'; + ptr = strchr(tmppath, '/'); + } - GF_ASSERT (volinfo); - GF_ASSERT (brickinfo); + snprintf(brickpath, sizeof(brickpath), + "%s/" GLUSTERD_BRICK_INFO_DIR "/%s:%s", delete_path, + brickinfo->hostname, tmppath); - priv = THIS->private; + GF_FREE(tmppath); - GF_ASSERT (priv); + ret = sys_unlink(brickpath); - GLUSTERD_GET_BRICK_DIR (path, volinfo, priv); + if ((ret < 0) && (errno != ENOENT)) { + gf_msg_debug(this->name, 0, "Unlink failed on %s", brickpath); + ret = -1; + goto out; + } else { + ret = 0; + } - tmppath = gf_strdup (brickinfo->path); +out: + if (brickinfo->shandle) { + gf_store_handle_destroy(brickinfo->shandle); + brickinfo->shandle = NULL; + } + gf_msg_debug(this->name, 0, "Returning with %d", ret); + return ret; +} - ptr = strchr (tmppath, '/'); +static int +_storeopts(dict_t *dict_value, char *key, data_t *value, void *data) +{ + int32_t ret = 0; + int32_t exists = 0; + int32_t option_len = 0; + gf_store_handle_t *shandle = NULL; + glusterd_volinfo_data_store_t *dict_data = NULL; + xlator_t *this = NULL; - while (ptr) { - *ptr = '-'; - ptr = strchr (tmppath, '/'); - } + this = THIS; + GF_ASSERT(this); - snprintf (brickpath, sizeof (brickpath), "%s/%s:%s", - path, brickinfo->hostname, tmppath); + dict_data = (glusterd_volinfo_data_store_t *)data; + shandle = dict_data->shandle; - GF_FREE (tmppath); + GF_ASSERT(shandle); + GF_ASSERT(shandle->fd > 0); + GF_ASSERT(key); + GF_ASSERT(value); + GF_ASSERT(value->data); - ret = unlink (brickpath); + if (dict_data->key_check == 1) { + if (is_key_glusterd_hooks_friendly(key)) { + exists = 1; - if ((ret < 0) && (errno != ENOENT)) { - gf_log ("", GF_LOG_ERROR, "Unlink failed on %s, reason: %s", - brickpath, strerror(errno)); - ret = -1; - goto out; } else { - ret = 0; + exists = glusterd_check_option_exists(key, NULL); } - -out: - if (brickinfo->shandle) { - glusterd_store_handle_destroy (brickinfo->shandle); - brickinfo->shandle = NULL; + } + if (exists == 1 || dict_data->key_check == 0) { + gf_msg_debug(this->name, 0, + "Storing in buffer for volinfo:key= %s, " + "val=%s", + key, value->data); + } else { + gf_msg_debug(this->name, 0, "Discarding:key= %s, val=%s", key, + value->data); + return 0; + } + + /* + * The option_len considers the length of the key value + * pair and along with that '=' and '\n', but as value->len + * already considers a NULL at the end of the data, adding + * just 1. + */ + option_len = strlen(key) + value->len + 1; + + if ((VOLINFO_BUFFER_SIZE - dict_data->buffer_len - 1) < option_len) { + ret = gf_store_save_items(shandle->fd, dict_data->buffer); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED, NULL); + return -1; } - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); - return ret; + dict_data->buffer_len = 0; + dict_data->buffer[0] = '\0'; + } + ret = snprintf(dict_data->buffer + dict_data->buffer_len, option_len + 1, + "%s=%s\n", key, value->data); + if (ret < 0 || ret > option_len + 1) { + gf_smsg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_COPY_FAIL, NULL); + return -1; + } + + dict_data->buffer_len += ret; + + return 0; } -int32_t -glusterd_store_remove_bricks (glusterd_volinfo_t *volinfo) +/* Store the volumes snapshot details only if required + * + * The snapshot details will be stored only if the cluster op-version is + * greater than or equal to 4 + */ +static int +glusterd_volume_write_snap_details(int fd, glusterd_volinfo_t *volinfo) { - int32_t ret = 0; - glusterd_brickinfo_t *tmp = NULL; - glusterd_conf_t *priv = NULL; - char brickdir [PATH_MAX] = {0,}; - DIR *dir = NULL; - struct dirent *entry = NULL; - char path[PATH_MAX] = {0,}; - - GF_ASSERT (volinfo); + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + char buf[PATH_MAX] = { + 0, + }; + + this = THIS; + GF_ASSERT(this != NULL); + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, (conf != NULL), out); + + GF_VALIDATE_OR_GOTO(this->name, (fd > 0), out); + GF_VALIDATE_OR_GOTO(this->name, (volinfo != NULL), out); + + if (conf->op_version < GD_OP_VERSION_3_6_0) { + ret = 0; + goto out; + } + + ret = snprintf(buf, sizeof(buf), "%s=%s\n%s=%s\n%s=%" PRIu64 "\n", + GLUSTERD_STORE_KEY_PARENT_VOLNAME, volinfo->parent_volname, + GLUSTERD_STORE_KEY_VOL_RESTORED_SNAP, + uuid_utoa(volinfo->restored_from_snap), + GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT, + volinfo->snap_max_hard_limit); + if (ret < 0 || ret >= sizeof(buf)) { + ret = -1; + goto err; + } + + ret = gf_store_save_items(fd, buf); + if (ret) { + goto err; + } + ret = glusterd_store_snapd_info(volinfo); +err: + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPINFO_WRITE_FAIL, + "Failed to write snap details" + " for volume %s", + volinfo->volname); +out: + return ret; +} - list_for_each_entry (tmp, &volinfo->bricks, brick_list) { - ret = glusterd_store_delete_brick (volinfo, tmp); - if (ret) - goto out; +static int32_t +glusterd_volume_exclude_options_write(int fd, glusterd_volinfo_t *volinfo) +{ + char *str = NULL; + char buf[PATH_MAX]; + uint total_len = 0; + int32_t ret = -1; + xlator_t *this = THIS; + glusterd_conf_t *conf = NULL; + + GF_ASSERT(this); + GF_ASSERT(fd > 0); + GF_ASSERT(volinfo); + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, (conf != NULL), out); + + ret = snprintf(buf + total_len, sizeof(buf) - total_len, + "%s=%d\n%s=%d\n%s=%d\n%s=%d\n%s=%d\n%s=%d\n", + GLUSTERD_STORE_KEY_VOL_TYPE, volinfo->type, + GLUSTERD_STORE_KEY_VOL_COUNT, volinfo->brick_count, + GLUSTERD_STORE_KEY_VOL_STATUS, volinfo->status, + GLUSTERD_STORE_KEY_VOL_SUB_COUNT, volinfo->sub_count, + GLUSTERD_STORE_KEY_VOL_STRIPE_CNT, volinfo->stripe_count, + GLUSTERD_STORE_KEY_VOL_REPLICA_CNT, volinfo->replica_count); + if (ret < 0 || ret >= sizeof(buf) - total_len) { + ret = -1; + goto out; + } + total_len += ret; + + if ((conf->op_version >= GD_OP_VERSION_3_7_6) && volinfo->arbiter_count) { + ret = snprintf(buf + total_len, sizeof(buf) - total_len, "%s=%d\n", + GLUSTERD_STORE_KEY_VOL_ARBITER_CNT, + volinfo->arbiter_count); + if (ret < 0 || ret >= sizeof(buf) - total_len) { + ret = -1; + goto out; + } + total_len += ret; + } + + if (conf->op_version >= GD_OP_VERSION_3_6_0) { + ret = snprintf( + buf + total_len, sizeof(buf) - total_len, "%s=%d\n%s=%d\n", + GLUSTERD_STORE_KEY_VOL_DISPERSE_CNT, volinfo->disperse_count, + GLUSTERD_STORE_KEY_VOL_REDUNDANCY_CNT, volinfo->redundancy_count); + if (ret < 0 || ret >= sizeof(buf) - total_len) { + ret = -1; + goto out; + } + total_len += ret; + } + + ret = snprintf(buf + total_len, sizeof(buf) - total_len, + "%s=%d\n%s=%d\n%s=%s\n", GLUSTERD_STORE_KEY_VOL_VERSION, + volinfo->version, GLUSTERD_STORE_KEY_VOL_TRANSPORT, + volinfo->transport_type, GLUSTERD_STORE_KEY_VOL_ID, + uuid_utoa(volinfo->volume_id)); + if (ret < 0 || ret >= sizeof(buf) - total_len) { + ret = -1; + goto out; + } + total_len += ret; + + str = glusterd_auth_get_username(volinfo); + if (str) { + ret = snprintf(buf + total_len, sizeof(buf) - total_len, "%s=%s\n", + GLUSTERD_STORE_KEY_USERNAME, str); + if (ret < 0 || ret >= sizeof(buf) - total_len) { + ret = -1; + goto out; + } + total_len += ret; + } + + str = glusterd_auth_get_password(volinfo); + if (str) { + ret = snprintf(buf + total_len, sizeof(buf) - total_len, "%s=%s\n", + GLUSTERD_STORE_KEY_PASSWORD, str); + if (ret < 0 || ret >= sizeof(buf) - total_len) { + ret = -1; + goto out; + } + total_len += ret; + } + + ret = snprintf(buf + total_len, sizeof(buf) - total_len, "%s=%d\n%s=%d\n", + GLUSTERD_STORE_KEY_VOL_OP_VERSION, volinfo->op_version, + GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION, + volinfo->client_op_version); + if (ret < 0 || ret >= sizeof(buf) - total_len) { + ret = -1; + goto out; + } + total_len += ret; + + if (conf->op_version >= GD_OP_VERSION_3_7_6) { + ret = snprintf(buf + total_len, sizeof(buf) - total_len, "%s=%d\n", + GLUSTERD_STORE_KEY_VOL_QUOTA_VERSION, + volinfo->quota_xattr_version); + if (ret < 0 || ret >= sizeof(buf) - total_len) { + ret = -1; + goto out; } + total_len += ret; + } + if (conf->op_version >= GD_OP_VERSION_3_10_0) { + ret = snprintf(buf + total_len, sizeof(buf) - total_len, "%s=0\n", + GF_TIER_ENABLED); + if (ret < 0 || ret >= sizeof(buf) - total_len) { + ret = -1; + goto out; + } + total_len += ret; + } + + if ((conf->op_version >= GD_OP_VERSION_7_0) && + volinfo->thin_arbiter_count) { + ret = snprintf(buf + total_len, sizeof(buf) - total_len, "%s=%d\n", + GLUSTERD_STORE_KEY_VOL_THIN_ARBITER_CNT, + volinfo->thin_arbiter_count); + if (ret < 0 || ret >= sizeof(buf) - total_len) { + ret = -1; + goto out; + } + total_len += ret; + } - priv = THIS->private; - GF_ASSERT (priv); + ret = gf_store_save_items(fd, buf); + if (ret) + goto out; - GLUSTERD_GET_BRICK_DIR (brickdir, volinfo, priv); + ret = glusterd_volume_write_snap_details(fd, volinfo); - dir = opendir (brickdir); +out: + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_VALS_WRITE_FAIL, + "Unable to write volume " + "values for %s", + volinfo->volname); + return ret; +} - glusterd_for_each_entry (entry, dir); +static void +glusterd_store_voldirpath_set(glusterd_volinfo_t *volinfo, char *voldirpath) +{ + glusterd_conf_t *priv = NULL; - while (entry) { - snprintf (path, sizeof (path), "%s/%s", - brickdir, entry->d_name); - ret = unlink (path); - if (ret && errno != ENOENT) { - gf_log ("", GF_LOG_ERROR, "Unable to unlink %s, " - "reason: %s", path, strerror(errno)); - } - glusterd_for_each_entry (entry, dir); - } + GF_ASSERT(volinfo); + priv = THIS->private; + GF_ASSERT(priv); - closedir (dir); + GLUSTERD_GET_VOLUME_DIR(voldirpath, volinfo, priv); +} - ret = rmdir (brickdir); +static void +glusterd_store_piddirpath_set(glusterd_volinfo_t *volinfo, char *piddirpath) +{ + glusterd_conf_t *priv = NULL; -out: - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); - return ret; + GF_ASSERT(volinfo); + priv = THIS->private; + GF_ASSERT(priv); + + GLUSTERD_GET_VOLUME_PID_DIR(piddirpath, volinfo, priv); } -void _storeopts (dict_t *this, char *key, data_t *value, void *data) +static int32_t +glusterd_store_create_volume_dirs(glusterd_volinfo_t *volinfo) { - int32_t ret = 0; - int32_t exists = 0; - glusterd_store_handle_t *shandle = NULL; + int32_t ret = -1; + char dirpath[PATH_MAX] = { + 0, + }; - shandle = (glusterd_store_handle_t*)data; + GF_ASSERT(volinfo); - GF_ASSERT (shandle); - GF_ASSERT (shandle->fd > 0); - GF_ASSERT (shandle->path); - GF_ASSERT (key); - GF_ASSERT (value && value->data); + glusterd_store_voldirpath_set(volinfo, dirpath); + ret = gf_store_mkdir(dirpath); + if (ret) + goto out; - if ((!shandle) || (shandle->fd <= 0) || (!shandle->path)) - return; + glusterd_store_piddirpath_set(volinfo, dirpath); + ret = gf_store_mkdir(dirpath); + if (ret) + goto out; - if (!key) - return; - if (!value || !value->data) - return; +out: + gf_msg_debug(THIS->name, 0, "Returning with %d", ret); + return ret; +} - exists = glusterd_check_option_exists (key, NULL); - if (1 == exists) { - gf_log ("", GF_LOG_DEBUG, "Storing in volinfo:key= %s, val=%s", - key, value->data); - } else { - gf_log ("", GF_LOG_DEBUG, "Discarding:key= %s, val=%s", - key, value->data); - return; - } +int32_t +glusterd_store_create_snap_dir(glusterd_snap_t *snap) +{ + int32_t ret = -1; + char snapdirpath[PATH_MAX] = { + 0, + }; + glusterd_conf_t *priv = NULL; + + priv = THIS->private; + GF_ASSERT(priv); + GF_ASSERT(snap); + + GLUSTERD_GET_SNAP_DIR(snapdirpath, snap, priv); + + ret = mkdir_p(snapdirpath, 0755, _gf_true); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_CREATE_DIR_FAILED, + "Failed to create snaps dir " + "%s", + snapdirpath); + } + return ret; +} - ret = glusterd_store_save_value (shandle->fd, key, (char*)value->data); +static int32_t +glusterd_store_volinfo_write(int fd, glusterd_volinfo_t *volinfo) +{ + int32_t ret = -1; + gf_store_handle_t *shandle = NULL; + GF_ASSERT(fd > 0); + GF_ASSERT(volinfo); + GF_ASSERT(volinfo->shandle); + xlator_t *this = NULL; + glusterd_volinfo_data_store_t *dict_data = NULL; + + this = THIS; + GF_ASSERT(this); + + shandle = volinfo->shandle; + + dict_data = GF_CALLOC(1, sizeof(glusterd_volinfo_data_store_t), + gf_gld_mt_volinfo_dict_data_t); + if (dict_data == NULL) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_MEMORY, NULL); + return -1; + } + + ret = glusterd_volume_exclude_options_write(fd, volinfo); + if (ret) { + goto out; + } + + dict_data->shandle = shandle; + dict_data->key_check = 1; + + shandle->fd = fd; + dict_foreach(volinfo->dict, _storeopts, (void *)dict_data); + + dict_data->key_check = 0; + dict_foreach(volinfo->gsync_slaves, _storeopts, (void *)dict_data); + + if (dict_data->buffer_len > 0) { + ret = gf_store_save_items(fd, dict_data->buffer); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to write into store" - " handle for path: %s", shandle->path); - return; + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED, NULL); + goto out; } + } + + shandle->fd = 0; +out: + GF_FREE(dict_data); + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; } -int32_t -glusterd_volume_exclude_options_write (int fd, glusterd_volinfo_t *volinfo) +static int32_t +glusterd_store_snapinfo_write(glusterd_snap_t *snap) { - GF_ASSERT (fd > 0); - GF_ASSERT (volinfo); + int32_t ret = -1; + int fd = 0; + char buf[PATH_MAX]; + uint total_len = 0; + + GF_ASSERT(snap); + + fd = gf_store_mkstemp(snap->shandle); + if (fd <= 0) + goto out; + + ret = snprintf(buf + total_len, sizeof(buf) - total_len, + "%s=%s\n%s=%d\n%s=%d\n", GLUSTERD_STORE_KEY_SNAP_ID, + uuid_utoa(snap->snap_id), GLUSTERD_STORE_KEY_SNAP_STATUS, + snap->snap_status, GLUSTERD_STORE_KEY_SNAP_RESTORED, + snap->snap_restored); + if (ret < 0 || ret >= sizeof(buf) - total_len) { + ret = -1; + goto out; + } + total_len += ret; + + if (snap->description) { + ret = snprintf(buf + total_len, sizeof(buf) - total_len, "%s=%s\n", + GLUSTERD_STORE_KEY_SNAP_DESC, snap->description); + if (ret < 0 || ret >= sizeof(buf) - total_len) { + ret = -1; + goto out; + } + total_len += ret; + } - char buf[4096] = {0,}; - int32_t ret = -1; + ret = snprintf(buf + total_len, sizeof(buf) - total_len, "%s=%ld\n", + GLUSTERD_STORE_KEY_SNAP_TIMESTAMP, snap->time_stamp); + if (ret < 0 || ret >= sizeof(buf) - total_len) { + ret = -1; + goto out; + } + ret = gf_store_save_items(fd, buf); - snprintf (buf, sizeof (buf), "%d", volinfo->type); - ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_TYPE, buf); - if (ret) - goto out; - - snprintf (buf, sizeof (buf), "%d", volinfo->brick_count); - ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_COUNT, buf); - if (ret) - goto out; +out: + gf_msg_debug(THIS->name, 0, "Returning %d", ret); + return ret; +} - snprintf (buf, sizeof (buf), "%d", volinfo->status); - ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_STATUS, buf); - if (ret) - goto out; +static void +glusterd_store_volfpath_set(glusterd_volinfo_t *volinfo, char *volfpath, + size_t len) +{ + char voldirpath[PATH_MAX] = { + 0, + }; + GF_ASSERT(volinfo); + GF_ASSERT(volfpath); + GF_ASSERT(len <= PATH_MAX); + + glusterd_store_voldirpath_set(volinfo, voldirpath); + snprintf(volfpath, len, "%s/%s", voldirpath, GLUSTERD_VOLUME_INFO_FILE); +} - snprintf (buf, sizeof (buf), "%d", volinfo->sub_count); - ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_SUB_COUNT, - buf); - if (ret) - goto out; +static void +glusterd_store_node_state_path_set(glusterd_volinfo_t *volinfo, + char *node_statepath, size_t len) +{ + char voldirpath[PATH_MAX] = { + 0, + }; + GF_ASSERT(volinfo); + GF_ASSERT(node_statepath); + GF_ASSERT(len <= PATH_MAX); + + glusterd_store_voldirpath_set(volinfo, voldirpath); + snprintf(node_statepath, len, "%s/%s", voldirpath, + GLUSTERD_NODE_STATE_FILE); +} - snprintf (buf, sizeof (buf), "%d", volinfo->version); - ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_VERSION, - buf); - if (ret) - goto out; +static void +glusterd_store_quota_conf_path_set(glusterd_volinfo_t *volinfo, + char *quota_conf_path, size_t len) +{ + char voldirpath[PATH_MAX] = { + 0, + }; + GF_ASSERT(volinfo); + GF_ASSERT(quota_conf_path); + GF_ASSERT(len <= PATH_MAX); + + glusterd_store_voldirpath_set(volinfo, voldirpath); + snprintf(quota_conf_path, len, "%s/%s", voldirpath, + GLUSTERD_VOLUME_QUOTA_CONFIG); +} - snprintf (buf, sizeof (buf), "%d", volinfo->transport_type); - ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_TRANSPORT, - buf); - if (ret) - goto out; +static void +glusterd_store_missed_snaps_list_path_set(char *missed_snaps_list, size_t len) +{ + glusterd_conf_t *priv = NULL; - ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_ID, - uuid_utoa (volinfo->volume_id)); - if (ret) - goto out; + priv = THIS->private; + GF_ASSERT(priv); + GF_ASSERT(missed_snaps_list); + GF_ASSERT(len <= PATH_MAX); -out: - if (ret) - gf_log ("", GF_LOG_ERROR, "Unable to write volume values" - " for %s", volinfo->volname); - return ret; + snprintf(missed_snaps_list, len, + "%s/snaps/" GLUSTERD_MISSED_SNAPS_LIST_FILE, priv->workdir); } static void -glusterd_store_voldirpath_set (glusterd_volinfo_t *volinfo, char *voldirpath, - size_t len) +glusterd_store_snapfpath_set(glusterd_snap_t *snap, char *snap_fpath, + size_t len) { - glusterd_conf_t *priv = NULL; + glusterd_conf_t *priv = NULL; + priv = THIS->private; + GF_ASSERT(priv); + GF_ASSERT(snap); + GF_ASSERT(snap_fpath); + GF_ASSERT(len <= PATH_MAX); + + snprintf(snap_fpath, len, "%s/snaps/%s/%s", priv->workdir, snap->snapname, + GLUSTERD_SNAP_INFO_FILE); +} - GF_ASSERT (volinfo); - priv = THIS->private; - GF_ASSERT (priv); +int32_t +glusterd_store_create_vol_shandle_on_absence(glusterd_volinfo_t *volinfo) +{ + char volfpath[PATH_MAX] = {0}; + int32_t ret = 0; - snprintf (voldirpath, len, "%s/%s/%s", priv->workdir, - GLUSTERD_VOLUME_DIR_PREFIX, volinfo->volname); + GF_ASSERT(volinfo); + + glusterd_store_volfpath_set(volinfo, volfpath, sizeof(volfpath)); + ret = gf_store_handle_create_on_absence(&volinfo->shandle, volfpath); + return ret; } -static int32_t -glusterd_store_create_volume_dir (glusterd_volinfo_t *volinfo) +int32_t +glusterd_store_create_nodestate_sh_on_absence(glusterd_volinfo_t *volinfo) { - int32_t ret = -1; - char voldirpath[PATH_MAX] = {0,}; + char node_state_path[PATH_MAX] = {0}; + int32_t ret = 0; - GF_ASSERT (volinfo); + GF_ASSERT(volinfo); - glusterd_store_voldirpath_set (volinfo, voldirpath, - sizeof (voldirpath)); - ret = glusterd_store_mkdir (voldirpath); - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); - return ret; + glusterd_store_node_state_path_set(volinfo, node_state_path, + sizeof(node_state_path)); + ret = gf_store_handle_create_on_absence(&volinfo->node_state_shandle, + node_state_path); + + return ret; } int32_t -glusterd_store_volinfo_write (int fd, glusterd_volinfo_t *volinfo) +glusterd_store_create_quota_conf_sh_on_absence(glusterd_volinfo_t *volinfo) { - int32_t ret = -1; - glusterd_store_handle_t *shandle = NULL; - GF_ASSERT (fd > 0); - GF_ASSERT (volinfo); - GF_ASSERT (volinfo->shandle); + char quota_conf_path[PATH_MAX] = {0}; + int32_t ret = 0; - shandle = volinfo->shandle; - ret = glusterd_volume_exclude_options_write (fd, volinfo); - if (ret) - goto out; + GF_ASSERT(volinfo); - shandle->fd = fd; - dict_foreach (volinfo->dict, _storeopts, shandle); - shandle->fd = 0; -out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + glusterd_store_quota_conf_path_set(volinfo, quota_conf_path, + sizeof(quota_conf_path)); + ret = gf_store_handle_create_on_absence(&volinfo->quota_conf_shandle, + quota_conf_path); + + return ret; } -static void -glusterd_store_volfpath_set (glusterd_volinfo_t *volinfo, char *volfpath, - size_t len) +static int32_t +glusterd_store_create_missed_snaps_list_shandle_on_absence() { - char voldirpath[PATH_MAX] = {0,}; - GF_ASSERT (volinfo); - GF_ASSERT (volfpath); - GF_ASSERT (len >= PATH_MAX); + char missed_snaps_list[PATH_MAX] = ""; + int32_t ret = -1; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); - glusterd_store_voldirpath_set (volinfo, voldirpath, - sizeof (voldirpath)); - snprintf (volfpath, len, "%s/%s", voldirpath, GLUSTERD_VOLUME_INFO_FILE); + priv = this->private; + GF_ASSERT(priv); + + glusterd_store_missed_snaps_list_path_set(missed_snaps_list, + sizeof(missed_snaps_list)); + + ret = gf_store_handle_create_on_absence(&priv->missed_snaps_list_shandle, + missed_snaps_list); + return ret; } int32_t -glusterd_store_create_vol_shandle_on_absence (glusterd_volinfo_t *volinfo) +glusterd_store_create_snap_shandle_on_absence(glusterd_snap_t *snap) { - char volfpath[PATH_MAX] = {0}; - int32_t ret = 0; + char snapfpath[PATH_MAX] = {0}; + int32_t ret = 0; - GF_ASSERT (volinfo); + GF_ASSERT(snap); - glusterd_store_volfpath_set (volinfo, volfpath, sizeof (volfpath)); - ret = glusterd_store_handle_create_on_absence (&volinfo->shandle, - volfpath); - return ret; + glusterd_store_snapfpath_set(snap, snapfpath, sizeof(snapfpath)); + ret = gf_store_handle_create_on_absence(&snap->shandle, snapfpath); + return ret; +} + +static int32_t +glusterd_store_brickinfos(glusterd_volinfo_t *volinfo, int vol_fd) +{ + int32_t ret = 0; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_brickinfo_t *ta_brickinfo = NULL; + int32_t brick_count = 0; + int32_t ta_brick_count = 0; + + GF_ASSERT(volinfo); + + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + ret = glusterd_store_brickinfo(volinfo, brickinfo, brick_count, vol_fd, + 0); + if (ret) + goto out; + brick_count++; + } + if (volinfo->thin_arbiter_count == 1) { + ta_brickinfo = list_first_entry(&volinfo->ta_bricks, + glusterd_brickinfo_t, brick_list); + ret = glusterd_store_brickinfo(volinfo, ta_brickinfo, ta_brick_count, + vol_fd, 1); + if (ret) + goto out; + } + +out: + gf_msg_debug(THIS->name, 0, "Returning %d", ret); + return ret; } int32_t -glusterd_store_brickinfos (glusterd_volinfo_t *volinfo, int vol_fd) +glusterd_store_node_state_write(int fd, glusterd_volinfo_t *volinfo) { - int32_t ret = 0; - glusterd_brickinfo_t *brickinfo = NULL; - int32_t brick_count = 0; + int ret = -1; + char buf[PATH_MAX]; + char uuid[UUID_SIZE + 1]; + uint total_len = 0; + glusterd_volinfo_data_store_t *dict_data = NULL; + gf_store_handle_t shandle; + xlator_t *this = NULL; - GF_ASSERT (volinfo); + this = THIS; + GF_ASSERT(this); - list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { - ret = glusterd_store_brickinfo (volinfo, brickinfo, - brick_count, vol_fd); - if (ret) - goto out; - brick_count++; + GF_ASSERT(fd > 0); + GF_ASSERT(volinfo); + + if (volinfo->rebal.defrag_cmd == GF_DEFRAG_CMD_STATUS) { + ret = 0; + goto out; + } + + gf_uuid_unparse(volinfo->rebal.rebalance_id, uuid); + ret = snprintf(buf + total_len, sizeof(buf) - total_len, + "%s=%d\n%s=%d\n%s=%d\n%s=%s\n", + GLUSTERD_STORE_KEY_VOL_DEFRAG, volinfo->rebal.defrag_cmd, + GLUSTERD_STORE_KEY_VOL_DEFRAG_STATUS, + volinfo->rebal.defrag_status, GLUSTERD_STORE_KEY_DEFRAG_OP, + volinfo->rebal.op, GF_REBALANCE_TID_KEY, uuid); + if (ret < 0 || ret >= sizeof(buf) - total_len) { + ret = -1; + goto out; + } + total_len += ret; + + ret = snprintf( + buf + total_len, sizeof(buf) - total_len, + "%s=%" PRIu64 "\n%s=%" PRIu64 "\n%s=%" PRIu64 "\n%s=%" PRIu64 + "\n%s=%" PRIu64 "\n%s=%lf\n", + GLUSTERD_STORE_KEY_VOL_DEFRAG_REB_FILES, volinfo->rebal.rebalance_files, + GLUSTERD_STORE_KEY_VOL_DEFRAG_SIZE, volinfo->rebal.rebalance_data, + GLUSTERD_STORE_KEY_VOL_DEFRAG_SCANNED, volinfo->rebal.lookedup_files, + GLUSTERD_STORE_KEY_VOL_DEFRAG_FAILURES, + volinfo->rebal.rebalance_failures, + GLUSTERD_STORE_KEY_VOL_DEFRAG_SKIPPED, volinfo->rebal.skipped_files, + GLUSTERD_STORE_KEY_VOL_DEFRAG_RUN_TIME, volinfo->rebal.rebalance_time); + if (ret < 0 || ret >= sizeof(buf) - total_len) { + ret = -1; + goto out; + } + + ret = gf_store_save_items(fd, buf); + if (ret) { + goto out; + } + + if (volinfo->rebal.dict) { + dict_data = GF_CALLOC(1, sizeof(glusterd_volinfo_data_store_t), + gf_gld_mt_volinfo_dict_data_t); + if (dict_data == NULL) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_MEMORY, NULL); + return -1; + } + dict_data->shandle = &shandle; + shandle.fd = fd; + dict_foreach(volinfo->rebal.dict, _storeopts, (void *)dict_data); + if (dict_data->buffer_len > 0) { + ret = gf_store_save_items(fd, dict_data->buffer); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED, + NULL); + goto out; + ; + } } + } out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + GF_FREE(dict_data); + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; } int32_t -glusterd_store_perform_volume_store (glusterd_volinfo_t *volinfo) +glusterd_store_perform_node_state_store(glusterd_volinfo_t *volinfo) { - int fd = -1; - int32_t ret = -1; - GF_ASSERT (volinfo); + int fd = -1; + int32_t ret = -1; + GF_ASSERT(volinfo); - fd = glusterd_store_mkstemp (volinfo->shandle); - if (fd <= 0) { - ret = -1; - goto out; - } + fd = gf_store_mkstemp(volinfo->node_state_shandle); + if (fd <= 0) { + ret = -1; + goto out; + } - ret = glusterd_store_volinfo_write (fd, volinfo); - if (ret) - goto out; + ret = glusterd_store_node_state_write(fd, volinfo); + if (ret) + goto out; - ret = glusterd_store_brickinfos (volinfo, fd); - if (ret) - goto out; + ret = gf_store_rename_tmppath(volinfo->node_state_shandle); + if (ret) + goto out; - ret = glusterd_store_rename_tmppath (volinfo->shandle); out: - if (ret && (fd > 0)) - glusterd_store_unlink_tmppath (volinfo->shandle); - if (fd > 0) - close (fd); - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + if (ret && (fd > 0)) + gf_store_unlink_tmppath(volinfo->node_state_shandle); + gf_msg_debug(THIS->name, 0, "Returning %d", ret); + return ret; +} + +static int32_t +glusterd_store_perform_volume_store(glusterd_volinfo_t *volinfo) +{ + int fd = -1; + int32_t ret = -1; + GF_ASSERT(volinfo); + + fd = gf_store_mkstemp(volinfo->shandle); + if (fd <= 0) { + ret = -1; + goto out; + } + + ret = glusterd_store_volinfo_write(fd, volinfo); + if (ret) + goto out; + + ret = glusterd_store_create_brick_dir(volinfo); + if (ret) + goto out; + + ret = glusterd_store_brickinfos(volinfo, fd); + if (ret) + goto out; + +out: + if (ret && (fd > 0)) + gf_store_unlink_tmppath(volinfo->shandle); + gf_msg_debug(THIS->name, 0, "Returning %d", ret); + return ret; } void -glusterd_perform_volinfo_version_action (glusterd_volinfo_t *volinfo, - glusterd_volinfo_ver_ac_t ac) +glusterd_perform_volinfo_version_action(glusterd_volinfo_t *volinfo, + glusterd_volinfo_ver_ac_t ac) { - GF_ASSERT (volinfo); + GF_ASSERT(volinfo); - switch (ac) { + switch (ac) { case GLUSTERD_VOLINFO_VER_AC_NONE: - break; + break; case GLUSTERD_VOLINFO_VER_AC_INCREMENT: - volinfo->version++; - break; - } + volinfo->version++; + break; + case GLUSTERD_VOLINFO_VER_AC_DECREMENT: + volinfo->version--; + break; + } +} + +void +glusterd_store_bricks_cleanup_tmp(glusterd_volinfo_t *volinfo) +{ + glusterd_brickinfo_t *brickinfo = NULL; + + GF_ASSERT(volinfo); + + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + gf_store_unlink_tmppath(brickinfo->shandle); + } +} + +void +glusterd_store_volume_cleanup_tmp(glusterd_volinfo_t *volinfo) +{ + GF_ASSERT(volinfo); + + glusterd_store_bricks_cleanup_tmp(volinfo); + + gf_store_unlink_tmppath(volinfo->shandle); + + gf_store_unlink_tmppath(volinfo->node_state_shandle); + + gf_store_unlink_tmppath(volinfo->snapd.handle); } int32_t -glusterd_store_volinfo (glusterd_volinfo_t *volinfo, glusterd_volinfo_ver_ac_t ac) +glusterd_store_brickinfos_atomic_update(glusterd_volinfo_t *volinfo) { - int32_t ret = -1; + int ret = -1; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_brickinfo_t *ta_brickinfo = NULL; - GF_ASSERT (volinfo); + GF_ASSERT(volinfo); - glusterd_perform_volinfo_version_action (volinfo, ac); - ret = glusterd_store_create_volume_dir (volinfo); + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + ret = gf_store_rename_tmppath(brickinfo->shandle); if (ret) - goto out; + goto out; + } - ret = glusterd_store_create_vol_shandle_on_absence (volinfo); + if (volinfo->thin_arbiter_count == 1) { + ta_brickinfo = list_first_entry(&volinfo->ta_bricks, + glusterd_brickinfo_t, brick_list); + ret = gf_store_rename_tmppath(ta_brickinfo->shandle); if (ret) - goto out; + goto out; + } - ret = glusterd_store_perform_volume_store (volinfo); - if (ret) - goto out; - //checksum should be computed at the end - ret = glusterd_volume_compute_cksum (volinfo); - if (ret) - goto out; out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - - return ret; + return ret; } - int32_t -glusterd_store_delete_volume (glusterd_volinfo_t *volinfo) +glusterd_store_volinfo_atomic_update(glusterd_volinfo_t *volinfo) { - char pathname[PATH_MAX] = {0,}; - int32_t ret = 0; - glusterd_conf_t *priv = NULL; - DIR *dir = NULL; - struct dirent *entry = NULL; - char path[PATH_MAX] = {0,}; - struct stat st = {0, }; + int ret = -1; + GF_ASSERT(volinfo); - GF_ASSERT (volinfo); - priv = THIS->private; + ret = gf_store_rename_tmppath(volinfo->shandle); + if (ret) + goto out; - GF_ASSERT (priv); - snprintf (pathname, sizeof (pathname), "%s/vols/%s", priv->workdir, - volinfo->volname); - - dir = opendir (pathname); - if (!dir) - goto out; - ret = glusterd_store_remove_bricks (volinfo); +out: + if (ret) + gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "Couldn't rename " + "temporary file(s)"); + return ret; +} - if (ret) { - gf_log ("", GF_LOG_ERROR, "Remove bricks failed for %s", - volinfo->volname); - } +int32_t +glusterd_store_volume_atomic_update(glusterd_volinfo_t *volinfo) +{ + int ret = -1; + GF_ASSERT(volinfo); - glusterd_for_each_entry (entry, dir); - while (entry) { + ret = glusterd_store_brickinfos_atomic_update(volinfo); + if (ret) + goto out; - snprintf (path, PATH_MAX, "%s/%s", pathname, entry->d_name); - ret = stat (path, &st); - if (ret == -1) { - gf_log ("", GF_LOG_ERROR, "Failed to stat entry: %s:%s", - path, strerror (errno)); - goto stat_failed; - } + ret = glusterd_store_volinfo_atomic_update(volinfo); - if (S_ISDIR (st.st_mode)) - ret = rmdir (path); - else - ret = unlink (path); +out: + return ret; +} - gf_log ("", GF_LOG_INFO, "%s %s", - ret?"Failed to remove":"Removed", - entry->d_name); - if (ret) - gf_log ("", GF_LOG_INFO, "errno:%d", errno); -stat_failed: - memset (path, 0, sizeof(path)); - glusterd_for_each_entry (entry, dir); - } +int32_t +glusterd_store_snap_atomic_update(glusterd_snap_t *snap) +{ + int ret = -1; + GF_ASSERT(snap); - ret = closedir (dir); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Failed to close dir, errno:%d", - errno); - } + ret = gf_store_rename_tmppath(snap->shandle); + if (ret) + gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "Couldn't rename " + "temporary file(s)"); - ret = rmdir (pathname); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Failed to rmdir: %s, err: %s", - pathname, strerror (errno)); - } + return ret; +} +int32_t +glusterd_store_snap(glusterd_snap_t *snap) +{ + int32_t ret = -1; + + GF_ASSERT(snap); + + ret = glusterd_store_create_snap_dir(snap); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SNAPDIR_CREATE_FAIL, + "Failed to create snap dir"); + goto out; + } + + ret = glusterd_store_create_snap_shandle_on_absence(snap); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SNAPINFO_CREATE_FAIL, + "Failed to create snap info " + "file"); + goto out; + } + + ret = glusterd_store_snapinfo_write(snap); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SNAPINFO_WRITE_FAIL, + "Failed to write snap info"); + goto out; + } + + ret = glusterd_store_snap_atomic_update(snap); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_AUTOMIC_UPDATE_FAIL, + "Failed to do automic update"); + goto out; + } out: - if (volinfo->shandle) { - glusterd_store_handle_destroy (volinfo->shandle); - volinfo->shandle = NULL; - } - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + if (ret && snap->shandle) + gf_store_unlink_tmppath(snap->shandle); - return ret; + gf_msg_trace(THIS->name, 0, "Returning %d", ret); + return ret; } - - int32_t -glusterd_store_retrieve_value (glusterd_store_handle_t *handle, - char *key, char **value) +glusterd_store_volinfo(glusterd_volinfo_t *volinfo, + glusterd_volinfo_ver_ac_t ac) { - int32_t ret = -1; - char scan_str[4096] = {0,}; - char *iter_key = NULL; - char *iter_val = NULL; - char *str = NULL; - char *free_str = NULL; + int32_t ret = -1; + glusterfs_ctx_t *ctx = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + ctx = this->ctx; + GF_ASSERT(ctx); + GF_ASSERT(volinfo); + + pthread_mutex_lock(&ctx->cleanup_lock); + pthread_mutex_lock(&volinfo->store_volinfo_lock); + { + glusterd_perform_volinfo_version_action(volinfo, ac); + + ret = glusterd_store_create_volume_dirs(volinfo); + if (ret) + goto unlock; - GF_ASSERT (handle); + ret = glusterd_store_create_vol_shandle_on_absence(volinfo); + if (ret) + goto unlock; - handle->fd = open (handle->path, O_RDWR); + ret = glusterd_store_create_nodestate_sh_on_absence(volinfo); + if (ret) + goto unlock; - if (!handle->read) - handle->read = fdopen (handle->fd, "r"); + ret = glusterd_store_perform_volume_store(volinfo); + if (ret) + goto unlock; - if (!handle->read) { - gf_log ("", GF_LOG_ERROR, "Unable to open file %s errno: %d", - handle->path, errno); - goto out; + ret = glusterd_store_volume_atomic_update(volinfo); + if (ret) { + glusterd_perform_volinfo_version_action( + volinfo, GLUSTERD_VOLINFO_VER_AC_DECREMENT); + goto unlock; } - ret = fscanf (handle->read, "%s", scan_str); - - while (ret != EOF) { - if (free_str) { - GF_FREE (free_str); - free_str = NULL; - } - str = gf_strdup (scan_str); - if (!str) - goto out; - else - free_str = str; - iter_key = strtok (str, "="); - gf_log ("", GF_LOG_DEBUG, "key %s read", iter_key); - - if (!strcmp (key, iter_key)) { - gf_log ("", GF_LOG_DEBUG, "key %s found", key); - iter_val = strtok (NULL, "="); - ret = 0; - if (iter_val) - *value = gf_strdup (iter_val); - goto out; - } + ret = glusterd_store_perform_node_state_store(volinfo); + if (ret) + goto unlock; - ret = fscanf (handle->read, "%s", scan_str); - } + /* checksum should be computed at the end */ + ret = glusterd_compute_cksum(volinfo, _gf_false); + if (ret) + goto unlock; + } +unlock: + pthread_mutex_unlock(&volinfo->store_volinfo_lock); + pthread_mutex_unlock(&ctx->cleanup_lock); - if (EOF == ret) - ret = -1; -out: - if (handle->fd > 0) { - close (handle->fd); - handle->read = NULL; - } + if (ret) + glusterd_store_volume_cleanup_tmp(volinfo); - if (free_str) - GF_FREE (free_str); + gf_msg_debug(THIS->name, 0, "Returning %d", ret); - return ret; + return ret; } int32_t -glusterd_store_save_value (int fd, char *key, char *value) +glusterd_store_delete_volume(glusterd_volinfo_t *volinfo) { - int32_t ret = -1; - char buf[4096] = {0,}; + char pathname[PATH_MAX] = { + 0, + }; + int32_t ret = 0; + glusterd_conf_t *priv = NULL; + char delete_path[PATH_MAX] = { + 0, + }; + char trashdir[PATH_MAX] = { + 0, + }; + xlator_t *this = NULL; + gf_boolean_t rename_fail = _gf_false; + int32_t len = 0; + + this = THIS; + GF_ASSERT(this); + + GF_ASSERT(volinfo); + priv = this->private; + + GF_ASSERT(priv); + + GLUSTERD_GET_VOLUME_DIR(pathname, volinfo, priv); + + len = snprintf(delete_path, sizeof(delete_path), + "%s/" GLUSTERD_TRASH "/%s.deleted", priv->workdir, + uuid_utoa(volinfo->volume_id)); + if ((len < 0) || (len >= sizeof(delete_path))) { + goto out; + } + + len = snprintf(trashdir, sizeof(trashdir), "%s/" GLUSTERD_TRASH, + priv->workdir); + if ((len < 0) || (len >= sizeof(trashdir))) { + goto out; + } + + ret = sys_mkdir(trashdir, 0755); + if (ret && errno != EEXIST) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_CREATE_DIR_FAILED, + "Failed to create trash " + "directory"); + goto out; + } + + ret = sys_rename(pathname, delete_path); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED, + "Failed to rename volume " + "directory for volume %s", + volinfo->volname); + rename_fail = _gf_true; + goto out; + } + + ret = recursive_rmdir(trashdir); + if (ret) { + gf_msg_debug(this->name, 0, "Failed to rmdir: %s", trashdir); + } - GF_ASSERT (fd > 0); - GF_ASSERT (key); - GF_ASSERT (value); +out: + if (volinfo->shandle) { + gf_store_handle_destroy(volinfo->shandle); + volinfo->shandle = NULL; + } + ret = (rename_fail == _gf_true) ? -1 : 0; + + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; +} - snprintf (buf, sizeof (buf), "%s=%s\n", key, value); - ret = write (fd, buf, strlen (buf)); +/*TODO: cleanup the duplicate code and implement a generic function for + * deleting snap/volume depending on the parameter flag */ +int32_t +glusterd_store_delete_snap(glusterd_snap_t *snap) +{ + char pathname[PATH_MAX] = { + 0, + }; + int32_t ret = 0; + glusterd_conf_t *priv = NULL; + DIR *dir = NULL; + struct dirent *entry = NULL; + struct dirent scratch[2] = { + { + 0, + }, + }; + char path[PATH_MAX] = { + 0, + }; + char delete_path[PATH_MAX] = { + 0, + }; + char trashdir[PATH_MAX] = { + 0, + }; + struct stat st = { + 0, + }; + xlator_t *this = NULL; + gf_boolean_t rename_fail = _gf_false; + int32_t len = 0; + + this = THIS; + priv = this->private; + GF_ASSERT(priv); + + GF_ASSERT(snap); + GLUSTERD_GET_SNAP_DIR(pathname, snap, priv); + + len = snprintf(delete_path, sizeof(delete_path), + "%s/" GLUSTERD_TRASH "/snap-%s.deleted", priv->workdir, + uuid_utoa(snap->snap_id)); + if ((len < 0) || (len >= sizeof(delete_path))) { + goto out; + } + + len = snprintf(trashdir, sizeof(trashdir), "%s/" GLUSTERD_TRASH, + priv->workdir); + if ((len < 0) || (len >= sizeof(trashdir))) { + goto out; + } + + ret = sys_mkdir(trashdir, 0755); + if (ret && errno != EEXIST) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_CREATE_DIR_FAILED, + "Failed to create trash " + "directory"); + goto out; + } + + ret = sys_rename(pathname, delete_path); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED, + "Failed to rename snap " + "directory %s to %s", + pathname, delete_path); + rename_fail = _gf_true; + goto out; + } + + dir = sys_opendir(delete_path); + if (!dir) { + gf_msg_debug(this->name, 0, "Failed to open directory %s.", + delete_path); + goto out; + } + + while ((entry = sys_readdir(dir, scratch))) { + if (gf_irrelevant_entry(entry)) + continue; + len = snprintf(path, PATH_MAX, "%s/%s", delete_path, entry->d_name); + if ((len < 0) || (len >= PATH_MAX)) { + goto stat_failed; + } + ret = sys_stat(path, &st); + if (ret == -1) { + gf_msg_debug(this->name, 0, + "Failed to stat " + "entry %s", + path); + goto stat_failed; + } - if (ret < 0) { - gf_log ("", GF_LOG_CRITICAL, "Unable to store key: %s," - "value: %s, error: %s", key, value, - strerror (errno)); - ret = -1; - goto out; + if (S_ISDIR(st.st_mode)) + ret = sys_rmdir(path); + else + ret = sys_unlink(path); + + if (ret) { + gf_msg_debug(this->name, 0, + " Failed to remove " + "%s", + path); } - ret = 0; + gf_msg_debug(this->name, 0, "%s %s", + ret ? "Failed to remove" : "Removed", entry->d_name); + stat_failed: + memset(path, 0, sizeof(path)); + } + + ret = sys_closedir(dir); + if (ret) { + gf_msg_debug(this->name, 0, "Failed to close dir %s.", delete_path); + } + + ret = sys_rmdir(delete_path); + if (ret) { + gf_msg_debug(this->name, 0, "Failed to rmdir: %s", delete_path); + } + ret = sys_rmdir(trashdir); + if (ret) { + gf_msg_debug(this->name, 0, "Failed to rmdir: %s", trashdir); + } out: - - gf_log ("", GF_LOG_DEBUG, "returning: %d", ret); - return ret; + if (snap->shandle) { + gf_store_handle_destroy(snap->shandle); + snap->shandle = NULL; + } + ret = (rename_fail == _gf_true) ? -1 : 0; + + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; } -int32_t -glusterd_store_handle_new (char *path, glusterd_store_handle_t **handle) +int +glusterd_store_global_info(xlator_t *this) { - int32_t ret = -1; - glusterd_store_handle_t *shandle = NULL; - int fd = -1; - char *spath = NULL; - - shandle = GF_CALLOC (1, sizeof (*shandle), gf_gld_mt_store_handle_t); - if (!shandle) - goto out; + int ret = -1; + glusterd_conf_t *conf = NULL; + char buf[PATH_MAX]; + uint total_len = 0; + gf_store_handle_t *handle = NULL; + char *uuid_str = NULL; + + conf = this->private; + + uuid_str = gf_strdup(uuid_utoa(MY_UUID)); + if (!uuid_str) + goto out; + + if (!conf->handle) { + ret = snprintf(buf, sizeof(buf), "%s/%s", conf->workdir, + GLUSTERD_INFO_FILE); + if ((ret < 0) || (ret >= sizeof(buf))) { + ret = -1; + goto out; + } + ret = gf_store_handle_new(buf, &handle); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_HANDLE_GET_FAIL, + "Unable to get store handle"); + goto out; + } - spath = gf_strdup (path); + conf->handle = handle; + } else + handle = conf->handle; + + /* These options need to be available for all users */ + ret = sys_chmod(handle->path, 0644); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "chmod error for %s", GLUSTERD_INFO_FILE); + goto out; + } + + handle->fd = gf_store_mkstemp(handle); + if (handle->fd < 0) { + ret = -1; + goto out; + } + + ret = snprintf(buf, sizeof(buf), "%s=%s\n", GLUSTERD_STORE_UUID_KEY, + uuid_str); + if (ret < 0 || ret >= sizeof(buf)) { + ret = -1; + goto out; + } + total_len += ret; + + ret = snprintf(buf + total_len, sizeof(buf) - total_len, "%s=%d\n", + GD_OP_VERSION_KEY, conf->op_version); + if (ret < 0 || ret >= sizeof(buf) - total_len) { + ret = -1; + goto out; + } + + ret = gf_store_save_items(handle->fd, buf); + if (ret) { + gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_OP_VERS_STORE_FAIL, + "Storing glusterd global-info failed ret = %d", ret); + goto out; + } + + ret = gf_store_rename_tmppath(handle); +out: + if (handle) { + if (ret && (handle->fd >= 0)) + gf_store_unlink_tmppath(handle); + } - if (!spath) - goto out; + if (uuid_str) + GF_FREE(uuid_str); - fd = open (path, O_RDWR | O_CREAT | O_APPEND, 0644); - if (fd <= 0) { - gf_log ("glusterd", GF_LOG_ERROR, "Failed to open file: %s, " - "error: %s", path, strerror (errno)); - goto out; - } + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_GLUSTERD_GLOBAL_INFO_STORE_FAIL, + "Failed to store glusterd global-info"); - shandle->path = spath; - *handle = shandle; + return ret; +} - ret = 0; +int +glusterd_store_max_op_version(xlator_t *this) +{ + int ret = -1; + glusterd_conf_t *conf = NULL; + char op_version_str[15] = { + 0, + }; + char path[PATH_MAX] = { + 0, + }; + gf_store_handle_t *handle = NULL; + int32_t len = 0; + + conf = this->private; + + len = snprintf(path, PATH_MAX, "%s/%s", conf->workdir, + GLUSTERD_UPGRADE_FILE); + if ((len < 0) || (len >= PATH_MAX)) { + goto out; + } + ret = gf_store_handle_new(path, &handle); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_HANDLE_GET_FAIL, + "Unable to get store handle"); + goto out; + } + + /* These options need to be available for all users */ + ret = sys_chmod(handle->path, 0644); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "chmod error for %s", GLUSTERD_UPGRADE_FILE); + goto out; + } + + handle->fd = gf_store_mkstemp(handle); + if (handle->fd < 0) { + ret = -1; + goto out; + } + + snprintf(op_version_str, sizeof(op_version_str), "%d", GD_OP_VERSION_MAX); + ret = gf_store_save_value(handle->fd, GD_MAX_OP_VERSION_KEY, + op_version_str); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OP_VERS_STORE_FAIL, + "Storing op-version failed ret = %d", ret); + goto out; + } + + ret = gf_store_rename_tmppath(handle); +out: + if (handle) { + if (ret && (handle->fd >= 0)) + gf_store_unlink_tmppath(handle); + } + + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_GLUSTERD_GLOBAL_INFO_STORE_FAIL, + "Failed to store max op-version"); + if (handle) + gf_store_handle_destroy(handle); + return ret; +} +int +glusterd_retrieve_max_op_version(xlator_t *this, int *op_version) +{ + char *op_version_str = NULL; + glusterd_conf_t *priv = NULL; + int ret = -1; + int tmp_version = 0; + char *tmp = NULL; + char path[PATH_MAX] = { + 0, + }; + gf_store_handle_t *handle = NULL; + int32_t len = 0; + + priv = this->private; + + len = snprintf(path, PATH_MAX, "%s/%s", priv->workdir, + GLUSTERD_UPGRADE_FILE); + if ((len < 0) || (len >= PATH_MAX)) { + goto out; + } + ret = gf_store_handle_retrieve(path, &handle); + + if (ret) { + gf_msg_debug(this->name, 0, + "Unable to get store " + "handle!"); + goto out; + } + + ret = gf_store_retrieve_value(handle, GD_MAX_OP_VERSION_KEY, + &op_version_str); + if (ret) { + gf_msg_debug(this->name, 0, "No previous op_version present"); + goto out; + } + + tmp_version = strtol(op_version_str, &tmp, 10); + if ((tmp_version <= 0) || (tmp && strlen(tmp) > 1)) { + gf_msg(this->name, GF_LOG_WARNING, EINVAL, GD_MSG_UNSUPPORTED_VERSION, + "invalid version number"); + goto out; + } + + *op_version = tmp_version; + + ret = 0; out: - if (fd > 0) - close (fd); + if (op_version_str) + GF_FREE(op_version_str); + if (handle) + gf_store_handle_destroy(handle); + return ret; +} - if (ret == -1) { - if (spath) - GF_FREE (spath); - if (shandle) { - GF_FREE (shandle); - } +int +glusterd_retrieve_op_version(xlator_t *this, int *op_version) +{ + char *op_version_str = NULL; + glusterd_conf_t *priv = NULL; + int ret = -1; + int tmp_version = 0; + char *tmp = NULL; + char path[PATH_MAX] = { + 0, + }; + gf_store_handle_t *handle = NULL; + int32_t len = 0; + + priv = this->private; + + if (!priv->handle) { + len = snprintf(path, PATH_MAX, "%s/%s", priv->workdir, + GLUSTERD_INFO_FILE); + if ((len < 0) || (len >= PATH_MAX)) { + goto out; } + ret = gf_store_handle_retrieve(path, &handle); - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + if (ret) { + gf_msg_debug(this->name, 0, + "Unable to get store " + "handle!"); + goto out; + } + + priv->handle = handle; + } + + ret = gf_store_retrieve_value(priv->handle, GD_OP_VERSION_KEY, + &op_version_str); + if (ret) { + gf_msg_debug(this->name, 0, "No previous op_version present"); + goto out; + } + + tmp_version = strtol(op_version_str, &tmp, 10); + if ((tmp_version <= 0) || (tmp && strlen(tmp) > 1)) { + gf_msg(this->name, GF_LOG_WARNING, EINVAL, GD_MSG_UNSUPPORTED_VERSION, + "invalid version number"); + goto out; + } + + *op_version = tmp_version; + + ret = 0; +out: + if (op_version_str) + GF_FREE(op_version_str); + + return ret; } int -glusterd_store_handle_retrieve (char *path, glusterd_store_handle_t **handle) +glusterd_restore_op_version(xlator_t *this) { - int32_t ret = -1; - struct stat statbuf = {0}; - - ret = stat (path, &statbuf); - if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, "Unable to retrieve store " - "handle for %s, error: %s", path, strerror (errno)); - goto out; + glusterd_conf_t *conf = NULL; + int ret = 0; + int op_version = 0; + + conf = this->private; + + ret = glusterd_retrieve_op_version(this, &op_version); + if (!ret) { + if ((op_version < GD_OP_VERSION_MIN) || + (op_version > GD_OP_VERSION_MAX)) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_UNSUPPORTED_VERSION, + "wrong op-version (%d) retrieved", op_version); + ret = -1; + goto out; } - ret = glusterd_store_handle_new (path, handle); + conf->op_version = op_version; + gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_OP_VERS_INFO, + "retrieved op-version: %d", conf->op_version); + goto out; + } + + /* op-version can be missing from the store file in 2 cases, + * 1. This is a new install of glusterfs + * 2. This is an upgrade of glusterfs from a version without op-version + * to a version with op-version (eg. 3.3 -> 3.4) + * + * Detection of a new install or an upgrade from an older install can be + * done by checking for the presence of the its peer-id in the store + * file. If peer-id is present, the installation is an upgrade else, it + * is a new install. + * + * For case 1, set op-version to GD_OP_VERSION_MAX. + * For case 2, set op-version to GD_OP_VERSION_MIN. + */ + ret = glusterd_retrieve_uuid(); + if (ret) { + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_OP_VERS_SET_INFO, + "Detected new install. Setting" + " op-version to maximum : %d", + GD_OP_VERSION_MAX); + conf->op_version = GD_OP_VERSION_MAX; + } else { + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_OP_VERS_SET_INFO, + "Upgrade detected. Setting" + " op-version to minimum : %d", + GD_OP_VERSION_MIN); + conf->op_version = GD_OP_VERSION_MIN; + } + ret = 0; out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + return ret; } int32_t -glusterd_store_handle_destroy (glusterd_store_handle_t *handle) +glusterd_retrieve_uuid() { - int32_t ret = -1; + char *uuid_str = NULL; + int32_t ret = -1; + gf_store_handle_t *handle = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + char path[PATH_MAX] = { + 0, + }; + int32_t len = 0; + + this = THIS; + priv = this->private; + + if (!priv->handle) { + len = snprintf(path, PATH_MAX, "%s/%s", priv->workdir, + GLUSTERD_INFO_FILE); + if ((len < 0) || (len >= PATH_MAX)) { + goto out; + } + ret = gf_store_handle_retrieve(path, &handle); - if (!handle) { - ret = 0; - goto out; + if (ret) { + gf_msg_debug(this->name, 0, + "Unable to get store" + "handle!"); + goto out; } - GF_FREE (handle->path); + priv->handle = handle; + } + pthread_mutex_lock(&priv->mutex); + { + ret = gf_store_retrieve_value(priv->handle, GLUSTERD_STORE_UUID_KEY, + &uuid_str); + } + pthread_mutex_unlock(&priv->mutex); + if (ret) { + gf_msg_debug(this->name, 0, "No previous uuid is present"); + goto out; + } + + gf_uuid_parse(uuid_str, priv->uuid); - GF_FREE (handle); +out: + GF_FREE(uuid_str); + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; +} +int +glusterd_store_retrieve_snapd(glusterd_volinfo_t *volinfo) +{ + int ret = -1; + char *key = NULL; + char *value = NULL; + char volpath[PATH_MAX] = { + 0, + }; + char path[PATH_MAX] = { + 0, + }; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + gf_store_iter_t *iter = NULL; + gf_store_op_errno_t op_errno = GD_STORE_SUCCESS; + int32_t len = 0; + + this = THIS; + GF_ASSERT(this); + conf = THIS->private; + GF_ASSERT(volinfo); + + if (conf->op_version < GD_OP_VERSION_3_6_0) { + ret = 0; + goto out; + } + + /* + * This is needed for upgrade situations. Say a volume is created with + * older version of glusterfs and upgraded to a glusterfs version equal + * to or greater than GD_OP_VERSION_3_6_0. The older glusterd would not + * have created the snapd.info file related to snapshot daemon for user + * serviceable snapshots. So as part of upgrade when the new glusterd + * starts, as part of restore (restoring the volume to be precise), it + * tries to snapd related info from snapd.info file. But since there was + * no such file till now, the restore operation fails. Thus, to prevent + * it from happening check whether user serviceable snapshots features + * is enabled before restoring snapd. If its disabled, then simply + * exit by returning success (without even checking for the snapd.info). + */ + + if (!dict_get_str_boolean(volinfo->dict, "features.uss", _gf_false)) { ret = 0; + goto out; + } + + GLUSTERD_GET_VOLUME_DIR(volpath, volinfo, conf); + + len = snprintf(path, sizeof(path), "%s/%s", volpath, + GLUSTERD_VOLUME_SNAPD_INFO_FILE); + if ((len < 0) || (len >= sizeof(path))) { + goto out; + } + + ret = gf_store_handle_retrieve(path, &volinfo->snapd.handle); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_HANDLE_NULL, + "volinfo handle is NULL"); + goto out; + } + + ret = gf_store_iter_new(volinfo->snapd.handle, &iter); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_GET_FAIL, + "Failed to get new store " + "iter"); + goto out; + } + + ret = gf_store_iter_get_next(iter, &key, &value, &op_errno); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_GET_FAIL, + "Failed to get next store " + "iter"); + goto out; + } + + while (!ret) { + if (!strncmp(key, GLUSTERD_STORE_KEY_SNAPD_PORT, + SLEN(GLUSTERD_STORE_KEY_SNAPD_PORT))) { + volinfo->snapd.port = atoi(value); + } + + ret = gf_store_iter_get_next(iter, &key, &value, &op_errno); + } + + if (op_errno != GD_STORE_EOF) + goto out; + + ret = 0; out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + if (gf_store_iter_destroy(&iter)) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL, + "Failed to destroy store iter"); + ret = -1; + } - return ret; + return ret; } int32_t -glusterd_store_uuid () +glusterd_store_retrieve_bricks(glusterd_volinfo_t *volinfo) { - glusterd_conf_t *priv = NULL; - char path[PATH_MAX] = {0,}; - int32_t ret = -1; - glusterd_store_handle_t *handle = NULL; - - priv = THIS->private; + int32_t ret = 0; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_brickinfo_t *ta_brickinfo = NULL; + gf_store_iter_t *iter = NULL; + char *key = NULL; + char *value = NULL; + char brickdir[PATH_MAX] = { + 0, + }; + char path[PATH_MAX] = { + 0, + }; + glusterd_conf_t *priv = NULL; + int32_t brick_count = 0; + int32_t ta_brick_count = 0; + char tmpkey[32] = { + 0, + }; + gf_store_iter_t *tmpiter = NULL; + char *tmpvalue = NULL; + char abspath[PATH_MAX] = {0}; + struct pmap_registry *pmap = NULL; + xlator_t *this = NULL; + int brickid = 0; + /* ta_brick_id initialization with 2 since ta-brick id starts with + * volname-ta-2 + */ + int ta_brick_id = 2; + gf_store_op_errno_t op_errno = GD_STORE_SUCCESS; + int32_t len = 0; + + GF_ASSERT(volinfo); + GF_ASSERT(volinfo->volname); + + this = THIS; + priv = this->private; + + GLUSTERD_GET_BRICK_DIR(brickdir, volinfo, priv); + + ret = gf_store_iter_new(volinfo->shandle, &tmpiter); + + if (ret) + goto out; + + while (brick_count < volinfo->brick_count) { + ret = glusterd_brickinfo_new(&brickinfo); - snprintf (path, PATH_MAX, "%s/%s", priv->workdir, - GLUSTERD_INFO_FILE); + if (ret) + goto out; + snprintf(tmpkey, sizeof(tmpkey), "%s-%d", GLUSTERD_STORE_KEY_VOL_BRICK, + brick_count); + ret = gf_store_iter_get_matching(tmpiter, tmpkey, &tmpvalue); + len = snprintf(path, sizeof(path), "%s/%s", brickdir, tmpvalue); + GF_FREE(tmpvalue); + tmpvalue = NULL; + if ((len < 0) || (len >= sizeof(path))) { + ret = -1; + goto out; + } - if (!priv->handle) { - ret = glusterd_store_handle_new (path, &handle); + ret = gf_store_handle_retrieve(path, &brickinfo->shandle); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get store" - " handle!"); - goto out; - } + if (ret) + goto out; - priv->handle = handle; - } else { - handle = priv->handle; - } + ret = gf_store_iter_new(brickinfo->shandle, &iter); - handle->fd = open (handle->path, O_RDWR | O_CREAT | O_TRUNC, 0644); - if (handle->fd <= 0) { - ret = -1; - goto out; - } - ret = glusterd_store_save_value (handle->fd, GLUSTERD_STORE_UUID_KEY, - uuid_utoa (priv->uuid)); + if (ret) + goto out; + ret = gf_store_iter_get_next(iter, &key, &value, &op_errno); if (ret) { - gf_log ("", GF_LOG_CRITICAL, "Storing uuid failed" - "ret = %d", ret); - goto out; - } - - -out: - if (handle->fd > 0) { - close (handle->fd); - handle->fd = 0; + gf_msg("glusterd", GF_LOG_ERROR, op_errno, + GD_MSG_STORE_ITER_GET_FAIL, + "Unable to iterate " + "the store for brick: %s", + path); + goto out; } - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; -} + while (!ret) { + if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_HOSTNAME, + SLEN(GLUSTERD_STORE_KEY_BRICK_HOSTNAME))) { + if (snprintf(brickinfo->hostname, sizeof(brickinfo->hostname), + "%s", value) >= sizeof(brickinfo->hostname)) { + gf_msg("glusterd", GF_LOG_ERROR, op_errno, + GD_MSG_PARSE_BRICKINFO_FAIL, + "brick hostname truncated: %s", brickinfo->hostname); + goto out; + } + } else if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_PATH, + SLEN(GLUSTERD_STORE_KEY_BRICK_PATH))) { + if (snprintf(brickinfo->path, sizeof(brickinfo->path), "%s", + value) >= sizeof(brickinfo->path)) { + gf_msg("glusterd", GF_LOG_ERROR, op_errno, + GD_MSG_PARSE_BRICKINFO_FAIL, + "brick path truncated: %s", brickinfo->path); + goto out; + } + } else if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_REAL_PATH, + SLEN(GLUSTERD_STORE_KEY_BRICK_REAL_PATH))) { + if (snprintf(brickinfo->real_path, sizeof(brickinfo->real_path), + "%s", value) >= sizeof(brickinfo->real_path)) { + gf_msg("glusterd", GF_LOG_ERROR, op_errno, + GD_MSG_PARSE_BRICKINFO_FAIL, + "real_path truncated: %s", brickinfo->real_path); + goto out; + } + } else if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_PORT, + SLEN(GLUSTERD_STORE_KEY_BRICK_PORT))) { + ret = gf_string2int(value, &brickinfo->port); + if (ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, + GD_MSG_INCOMPATIBLE_VALUE, + "Failed to convert " + "string to integer"); + } -int32_t -glusterd_retrieve_uuid () -{ - char *uuid_str = NULL; - int32_t ret = -1; - glusterd_store_handle_t *handle = NULL; - glusterd_conf_t *priv = NULL; - char path[PATH_MAX] = {0,}; + if (brickinfo->port < priv->base_port) { + /* This is required to adhere to the + IANA standards */ + brickinfo->port = 0; + } else { + /* This is required to have proper ports + assigned to bricks after restart */ + pmap = pmap_registry_get(THIS); + if (pmap->last_alloc <= brickinfo->port) + pmap->last_alloc = brickinfo->port + 1; + } + } else if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_RDMA_PORT, + SLEN(GLUSTERD_STORE_KEY_BRICK_RDMA_PORT))) { + ret = gf_string2int(value, &brickinfo->rdma_port); + if (ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, + GD_MSG_INCOMPATIBLE_VALUE, + "Failed to convert " + "string to integer"); + } - priv = THIS->private; + if (brickinfo->rdma_port < priv->base_port) { + /* This is required to adhere to the + IANA standards */ + brickinfo->rdma_port = 0; + } else { + /* This is required to have proper ports + assigned to bricks after restart */ + pmap = pmap_registry_get(THIS); + if (pmap->last_alloc <= brickinfo->rdma_port) + pmap->last_alloc = brickinfo->rdma_port + 1; + } + } else if (!strncmp( + key, GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED, + SLEN(GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED))) { + ret = gf_string2int(value, &brickinfo->decommissioned); + if (ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, + GD_MSG_INCOMPATIBLE_VALUE, + "Failed to convert " + "string to integer"); + } - if (!priv->handle) { - snprintf (path, PATH_MAX, "%s/%s", priv->workdir, - GLUSTERD_INFO_FILE); - ret = glusterd_store_handle_retrieve (path, &handle); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_DEVICE_PATH, + SLEN(GLUSTERD_STORE_KEY_BRICK_DEVICE_PATH))) { + if (snprintf(brickinfo->device_path, + sizeof(brickinfo->device_path), "%s", + value) >= sizeof(brickinfo->device_path)) { + gf_msg("glusterd", GF_LOG_ERROR, op_errno, + GD_MSG_PARSE_BRICKINFO_FAIL, + "device_path truncated: %s", brickinfo->device_path); + goto out; + } + } else if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_MOUNT_DIR, + SLEN(GLUSTERD_STORE_KEY_BRICK_MOUNT_DIR))) { + if (snprintf(brickinfo->mount_dir, sizeof(brickinfo->mount_dir), + "%s", value) >= sizeof(brickinfo->mount_dir)) { + gf_msg("glusterd", GF_LOG_ERROR, op_errno, + GD_MSG_PARSE_BRICKINFO_FAIL, + "mount_dir truncated: %s", brickinfo->mount_dir); + goto out; + } + } else if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS, + SLEN(GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS))) { + ret = gf_string2int(value, &brickinfo->snap_status); + if (ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, + GD_MSG_INCOMPATIBLE_VALUE, + "Failed to convert " + "string to integer"); + } + } else if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_FSTYPE, + SLEN(GLUSTERD_STORE_KEY_BRICK_FSTYPE))) { + if (snprintf(brickinfo->fstype, sizeof(brickinfo->fstype), "%s", + value) >= sizeof(brickinfo->fstype)) { + gf_msg("glusterd", GF_LOG_ERROR, op_errno, + GD_MSG_PARSE_BRICKINFO_FAIL, "fstype truncated: %s", + brickinfo->fstype); + goto out; + } + } else if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_MNTOPTS, + SLEN(GLUSTERD_STORE_KEY_BRICK_MNTOPTS))) { + if (snprintf(brickinfo->mnt_opts, sizeof(brickinfo->mnt_opts), + "%s", value) >= sizeof(brickinfo->mnt_opts)) { + gf_msg("glusterd", GF_LOG_ERROR, op_errno, + GD_MSG_PARSE_BRICKINFO_FAIL, + "mnt_opts truncated: %s", brickinfo->mnt_opts); + goto out; + } + } else if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_VGNAME, + SLEN(GLUSTERD_STORE_KEY_BRICK_VGNAME))) { + if (snprintf(brickinfo->vg, sizeof(brickinfo->vg), "%s", + value) >= sizeof(brickinfo->vg)) { + gf_msg("glusterd", GF_LOG_ERROR, op_errno, + GD_MSG_PARSE_BRICKINFO_FAIL, + "brickinfo->vg truncated: %s", brickinfo->vg); + goto out; + } + } else if (!strcmp(key, GLUSTERD_STORE_KEY_BRICK_ID)) { + if (snprintf(brickinfo->brick_id, sizeof(brickinfo->brick_id), + "%s", value) >= sizeof(brickinfo->brick_id)) { + gf_msg("glusterd", GF_LOG_ERROR, op_errno, + GD_MSG_PARSE_BRICKINFO_FAIL, + "brick_id truncated: %s", brickinfo->brick_id); + goto out; + } + } else if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_FSID, + SLEN(GLUSTERD_STORE_KEY_BRICK_FSID))) { + ret = gf_string2uint64(value, &brickinfo->statfs_fsid); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get store " - "handle!"); - goto out; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY, + "%s " + "is not a valid uint64_t value", + value); } - priv->handle = handle; + } else if (!strcmp(key, GLUSTERD_STORE_KEY_BRICK_UUID)) { + gf_uuid_parse(value, brickinfo->uuid); + } else { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UNKNOWN_KEY, + "Unknown key: %s", key); + } + + GF_FREE(key); + GF_FREE(value); + key = NULL; + value = NULL; + + ret = gf_store_iter_get_next(iter, &key, &value, &op_errno); } - ret = glusterd_store_retrieve_value (priv->handle, - GLUSTERD_STORE_UUID_KEY, - &uuid_str); + if (op_errno != GD_STORE_EOF) { + gf_msg(this->name, GF_LOG_ERROR, op_errno, + GD_MSG_PARSE_BRICKINFO_FAIL, + "Error parsing brickinfo: " + "op_errno=%d", + op_errno); + goto out; + } - if (ret) { - gf_log ("", GF_LOG_INFO, "No previous uuid is present"); - goto out; + if (brickinfo->brick_id[0] == '\0') { + /* This is an old volume upgraded to op_version 4 */ + GLUSTERD_ASSIGN_BRICKID_TO_BRICKINFO(brickinfo, volinfo, brickid++); + } + /* Populate brickinfo->real_path for normal volumes, for + * snapshot or snapshot restored volume this would be done post + * creating the brick mounts + */ + if (gf_uuid_is_null(brickinfo->uuid)) + (void)glusterd_resolve_brick(brickinfo); + if (brickinfo->real_path[0] == '\0' && !volinfo->is_snap_volume && + gf_uuid_is_null(volinfo->restored_from_snap)) { + /* By now if the brick is a local brick then it will be + * able to resolve which is the only thing we want now + * for checking whether the brickinfo->uuid matches + * with MY_UUID for realpath check. Hence do not handle + * error + */ + if (!gf_uuid_compare(brickinfo->uuid, MY_UUID)) { + if (!realpath(brickinfo->path, abspath)) { + gf_msg(this->name, GF_LOG_CRITICAL, errno, + GD_MSG_BRICKINFO_CREATE_FAIL, + "realpath() failed for brick %s" + ". The underlying file system " + "may be in bad state", + brickinfo->path); + ret = -1; + goto out; + } + if (strlen(abspath) >= sizeof(brickinfo->real_path)) { + ret = -1; + goto out; + } + (void)strncpy(brickinfo->real_path, abspath, + sizeof(brickinfo->real_path)); + } } - uuid_parse (uuid_str, priv->uuid); + /* Handle upgrade case of shared_brick_count 'fsid' */ + /* Ideally statfs_fsid should never be 0 if done right */ + if (!gf_uuid_compare(brickinfo->uuid, MY_UUID) && + brickinfo->statfs_fsid == 0) { + struct statvfs brickstat = { + 0, + }; + ret = sys_statvfs(brickinfo->path, &brickstat); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, + GD_MSG_BRICKINFO_CREATE_FAIL, + "failed to get statfs() call on brick %s", + brickinfo->path); + /* No need for treating it as an error, lets continue + with just a message */ + } else { + brickinfo->statfs_fsid = brickstat.f_fsid; + } + } -out: - if (uuid_str) - GF_FREE (uuid_str); - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; -} + cds_list_add_tail(&brickinfo->brick_list, &volinfo->bricks); + brick_count++; + } -int32_t -glusterd_store_iter_new (glusterd_store_handle_t *shandle, - glusterd_store_iter_t **iter) -{ - int32_t ret = -1; - glusterd_store_iter_t *tmp_iter = NULL; - int fd = -1; + if (gf_store_iter_destroy(&tmpiter)) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL, + "Failed to destroy store iter"); + ret = -1; + goto out; + } - GF_ASSERT (shandle); - GF_ASSERT (iter); + ret = gf_store_iter_new(volinfo->shandle, &tmpiter); - tmp_iter = GF_CALLOC (1, sizeof (*tmp_iter), - gf_gld_mt_store_iter_t); + if (ret) + goto out; - if (!tmp_iter) { - gf_log ("", GF_LOG_ERROR, "Out of Memory"); + if (volinfo->thin_arbiter_count == 1) { + snprintf(tmpkey, sizeof(tmpkey), "%s-%d", + GLUSTERD_STORE_KEY_VOL_TA_BRICK, 0); + while (ta_brick_count < volinfo->subvol_count) { + ret = glusterd_brickinfo_new(&ta_brickinfo); + if (ret) goto out; - } - fd = open (shandle->path, O_RDWR); + ret = gf_store_iter_get_matching(tmpiter, tmpkey, &tmpvalue); - if (fd < 0) { - gf_log ("", GF_LOG_ERROR, "Unable to open %s, errno: %d", - shandle->path, errno); + len = snprintf(path, sizeof(path), "%s/%s", brickdir, tmpvalue); + GF_FREE(tmpvalue); + tmpvalue = NULL; + if ((len < 0) || (len >= sizeof(path))) { + ret = -1; + goto out; + } + + ret = gf_store_handle_retrieve(path, &ta_brickinfo->shandle); + + if (ret) goto out; - } - tmp_iter->fd = fd; + ret = gf_store_iter_new(ta_brickinfo->shandle, &iter); - tmp_iter->file = fdopen (tmp_iter->fd, "r"); + if (ret) + goto out; - if (!tmp_iter->file) { - gf_log ("", GF_LOG_ERROR, "Unable to open file %s errno: %d", - shandle->path, errno); + ret = gf_store_iter_get_next(iter, &key, &value, &op_errno); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, op_errno, + GD_MSG_STORE_ITER_GET_FAIL, + "Unable to iterate " + "the store for brick: %s", + path); goto out; + } + + while (!ret) { + if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_HOSTNAME, + SLEN(GLUSTERD_STORE_KEY_BRICK_HOSTNAME))) { + if (snprintf(ta_brickinfo->hostname, + sizeof(ta_brickinfo->hostname), "%s", + value) >= sizeof(ta_brickinfo->hostname)) { + gf_msg("glusterd", GF_LOG_ERROR, op_errno, + GD_MSG_PARSE_BRICKINFO_FAIL, + "brick hostname truncated: %s", + ta_brickinfo->hostname); + goto out; + } + } else if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_PATH, + SLEN(GLUSTERD_STORE_KEY_BRICK_PATH))) { + if (snprintf(ta_brickinfo->path, sizeof(ta_brickinfo->path), + "%s", value) >= sizeof(ta_brickinfo->path)) { + gf_msg("glusterd", GF_LOG_ERROR, op_errno, + GD_MSG_PARSE_BRICKINFO_FAIL, + "brick path truncated: %s", ta_brickinfo->path); + goto out; + } + } else if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_REAL_PATH, + SLEN(GLUSTERD_STORE_KEY_BRICK_REAL_PATH))) { + if (snprintf(ta_brickinfo->real_path, + sizeof(ta_brickinfo->real_path), "%s", + value) >= sizeof(ta_brickinfo->real_path)) { + gf_msg("glusterd", GF_LOG_ERROR, op_errno, + GD_MSG_PARSE_BRICKINFO_FAIL, + "real_path truncated: %s", + ta_brickinfo->real_path); + goto out; + } + } else if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_PORT, + SLEN(GLUSTERD_STORE_KEY_BRICK_PORT))) { + ret = gf_string2int(value, &ta_brickinfo->port); + if (ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, + GD_MSG_INCOMPATIBLE_VALUE, + "Failed to convert " + "string to integer"); + } + + if (ta_brickinfo->port < priv->base_port) { + /* This is required to adhere to the + IANA standards */ + ta_brickinfo->port = 0; + } + } else if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_RDMA_PORT, + SLEN(GLUSTERD_STORE_KEY_BRICK_RDMA_PORT))) { + ret = gf_string2int(value, &ta_brickinfo->rdma_port); + if (ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, + GD_MSG_INCOMPATIBLE_VALUE, + "Failed to convert " + "string to integer"); + } + + if (ta_brickinfo->rdma_port < priv->base_port) { + /* This is required to adhere to the + IANA standards */ + ta_brickinfo->rdma_port = 0; + } + } else if (!strncmp( + key, GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED, + SLEN(GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED))) { + ret = gf_string2int(value, &ta_brickinfo->decommissioned); + if (ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, + GD_MSG_INCOMPATIBLE_VALUE, + "Failed to convert " + "string to integer"); + } + + } else if (!strcmp(key, GLUSTERD_STORE_KEY_BRICK_ID)) { + if (snprintf(ta_brickinfo->brick_id, + sizeof(ta_brickinfo->brick_id), "%s", + value) >= sizeof(ta_brickinfo->brick_id)) { + gf_msg("glusterd", GF_LOG_ERROR, op_errno, + GD_MSG_PARSE_BRICKINFO_FAIL, + "brick_id truncated: %s", + ta_brickinfo->brick_id); + goto out; + } + } else if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_FSID, + SLEN(GLUSTERD_STORE_KEY_BRICK_FSID))) { + ret = gf_string2uint64(value, &ta_brickinfo->statfs_fsid); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_INVALID_ENTRY, + "%s " + "is not a valid uint64_t value", + value); + } + } else if (!strcmp(key, GLUSTERD_STORE_KEY_BRICK_UUID)) { + gf_uuid_parse(value, brickinfo->uuid); + } else if (!strncmp( + key, GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS, + SLEN(GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS))) { + ret = gf_string2int(value, &ta_brickinfo->snap_status); + if (ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, + GD_MSG_INCOMPATIBLE_VALUE, + "Failed to convert " + "string to integer"); + } + + } else { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UNKNOWN_KEY, + "Unknown key: %s", key); + } + + GF_FREE(key); + GF_FREE(value); + key = NULL; + value = NULL; + ret = gf_store_iter_get_next(iter, &key, &value, &op_errno); + } + + GLUSTERD_ASSIGN_BRICKID_TO_TA_BRICKINFO(ta_brickinfo, volinfo, + ta_brick_id); + ta_brick_id += 3; + + cds_list_add_tail(&ta_brickinfo->brick_list, &volinfo->ta_bricks); + ta_brick_count++; } + } - strncpy (tmp_iter->filepath, shandle->path, sizeof (tmp_iter->filepath)); - *iter = tmp_iter; - ret = 0; + assign_brick_groups(volinfo); + ret = 0; out: - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); - return ret; + if (gf_store_iter_destroy(&tmpiter)) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL, + "Failed to destroy store iter"); + ret = -1; + } + + if (gf_store_iter_destroy(&iter)) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL, + "Failed to destroy store iter"); + ret = -1; + } + + gf_msg_debug(this->name, 0, "Returning with %d", ret); + + return ret; } int32_t -glusterd_store_validate_key_value (char *storepath, char *key, char*val, - glusterd_store_op_errno_t *op_errno) +glusterd_store_retrieve_node_state(glusterd_volinfo_t *volinfo) { - int ret = 0; - - GF_ASSERT (op_errno); - GF_ASSERT (storepath); - - if ((key == NULL) && (val == NULL)) { - ret = -1; - gf_log ("glusterd", GF_LOG_ERROR, "Glusterd store may be " - "corrupted, Invalid key and value (null) in %s", - storepath); - *op_errno = GD_STORE_KEY_VALUE_NULL; - } else if (key == NULL) { - ret = -1; - gf_log ("glusterd", GF_LOG_ERROR, "Glusterd store may be " - "corrupted, Invalid key (null) in %s", storepath); - *op_errno = GD_STORE_KEY_NULL; - } else if (val == NULL) { - ret = -1; - gf_log ("glusterd", GF_LOG_ERROR, "Glusterd store may be " - "corrupted, Invalid value (null) for key %s in %s", - key, storepath); - *op_errno = GD_STORE_VALUE_NULL; + int32_t ret = -1; + gf_store_iter_t *iter = NULL; + char *key = NULL; + char *value = NULL; + char *dup_value = NULL; + char volpath[PATH_MAX] = { + 0, + }; + glusterd_conf_t *priv = NULL; + char path[PATH_MAX] = { + 0, + }; + gf_store_op_errno_t op_errno = GD_STORE_SUCCESS; + dict_t *tmp_dict = NULL; + xlator_t *this = NULL; + int32_t len = 0; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + GF_ASSERT(volinfo); + + GLUSTERD_GET_VOLUME_DIR(volpath, volinfo, priv); + len = snprintf(path, sizeof(path), "%s/%s", volpath, + GLUSTERD_NODE_STATE_FILE); + if ((len < 0) || (len >= PATH_MAX)) { + goto out; + } + + ret = gf_store_handle_retrieve(path, &volinfo->node_state_shandle); + if (ret) + goto out; + + ret = gf_store_iter_new(volinfo->node_state_shandle, &iter); + + if (ret) + goto out; + + ret = gf_store_iter_get_next(iter, &key, &value, &op_errno); + + if (ret) + goto out; + + while (ret == 0) { + if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG, + SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG))) { + volinfo->rebal.defrag_cmd = atoi(value); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_STATUS, + SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_STATUS))) { + volinfo->rebal.defrag_status = atoi(value); + } else if (!strncmp(key, GF_REBALANCE_TID_KEY, + SLEN(GF_REBALANCE_TID_KEY))) { + gf_uuid_parse(value, volinfo->rebal.rebalance_id); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_DEFRAG_OP, + SLEN(GLUSTERD_STORE_KEY_DEFRAG_OP))) { + volinfo->rebal.op = atoi(value); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_REB_FILES, + SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_REB_FILES))) { + sscanf(value, "%" PRIu64, &volinfo->rebal.rebalance_files); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_SIZE, + SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_SIZE))) { + sscanf(value, "%" PRIu64, &volinfo->rebal.rebalance_data); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_SCANNED, + SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_SCANNED))) { + sscanf(value, "%" PRIu64, &volinfo->rebal.lookedup_files); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_FAILURES, + SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_FAILURES))) { + sscanf(value, "%" PRIu64, &volinfo->rebal.rebalance_failures); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_SKIPPED, + SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_SKIPPED))) { + sscanf(value, "%" PRIu64, &volinfo->rebal.skipped_files); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_RUN_TIME, + SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_RUN_TIME))) { + volinfo->rebal.rebalance_time = atoi(value); } else { - ret = 0; - *op_errno = GD_STORE_SUCCESS; + if (!tmp_dict) { + tmp_dict = dict_new(); + if (!tmp_dict) { + ret = -1; + goto out; + } + } + dup_value = gf_strdup(value); + if (!dup_value) { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "Failed to strdup value string"); + goto out; + } + ret = dict_set_str(tmp_dict, key, dup_value); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Error setting data in rebal " + "dict."); + goto out; + } + dup_value = NULL; } - return ret; -} + GF_FREE(key); + GF_FREE(value); + key = NULL; + value = NULL; -int32_t -glusterd_store_iter_get_next (glusterd_store_iter_t *iter, - char **key, char **value, - glusterd_store_op_errno_t *op_errno) -{ - int32_t ret = -1; - char scan_str[4096] = {0,}; - char *str = NULL; - char *free_str = NULL; - char *iter_key = NULL; - char *iter_val = NULL; - glusterd_store_op_errno_t store_errno = GD_STORE_SUCCESS; + ret = gf_store_iter_get_next(iter, &key, &value, &op_errno); + } + if (tmp_dict) { + volinfo->rebal.dict = dict_ref(tmp_dict); + } - GF_ASSERT (iter); - GF_ASSERT (iter->file); - GF_ASSERT (key); - GF_ASSERT (value); + if (op_errno != GD_STORE_EOF) { + ret = -1; + goto out; + } - *key = NULL; - *value = NULL; + ret = 0; - ret = fscanf (iter->file, "%s", scan_str); +out: + if (gf_store_iter_destroy(&iter)) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL, + "Failed to destroy store iter"); + ret = -1; + } + + if (dup_value) + GF_FREE(dup_value); + if (ret) { + if (volinfo->rebal.dict) + dict_unref(volinfo->rebal.dict); + } + if (tmp_dict) + dict_unref(tmp_dict); + + gf_msg_trace(this->name, 0, "Returning with %d", ret); + + return ret; +} - if (ret <= 0) { - ret = -1; - store_errno = GD_STORE_EOF; +int +glusterd_store_update_volinfo(glusterd_volinfo_t *volinfo) +{ + int ret = -1; + int exists = 0; + char *key = NULL; + char *value = NULL; + char volpath[PATH_MAX] = { + 0, + }; + char path[PATH_MAX] = { + 0, + }; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + gf_store_iter_t *iter = NULL; + gf_store_op_errno_t op_errno = GD_STORE_SUCCESS; + int32_t len = 0; + + this = THIS; + GF_ASSERT(this); + conf = THIS->private; + GF_ASSERT(volinfo); + + GLUSTERD_GET_VOLUME_DIR(volpath, volinfo, conf); + + len = snprintf(path, sizeof(path), "%s/%s", volpath, + GLUSTERD_VOLUME_INFO_FILE); + if ((len < 0) || (len >= sizeof(path))) { + goto out; + } + + ret = gf_store_handle_retrieve(path, &volinfo->shandle); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_HANDLE_NULL, + "volinfo handle is NULL"); + goto out; + } + + ret = gf_store_iter_new(volinfo->shandle, &iter); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_GET_FAIL, + "Failed to get new store " + "iter"); + goto out; + } + + ret = gf_store_iter_get_next(iter, &key, &value, &op_errno); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_GET_FAIL, + "Failed to get next store " + "iter"); + goto out; + } + + while (!ret) { + gf_msg_debug(this->name, 0, "key = %s value = %s", key, value); + if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_TYPE, + SLEN(GLUSTERD_STORE_KEY_VOL_TYPE))) { + volinfo->type = atoi(value); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_COUNT, + SLEN(GLUSTERD_STORE_KEY_VOL_COUNT))) { + volinfo->brick_count = atoi(value); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_STATUS, + SLEN(GLUSTERD_STORE_KEY_VOL_STATUS))) { + volinfo->status = atoi(value); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_VERSION, + SLEN(GLUSTERD_STORE_KEY_VOL_VERSION))) { + volinfo->version = atoi(value); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_PORT, + SLEN(GLUSTERD_STORE_KEY_VOL_PORT))) { + volinfo->port = atoi(value); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_SUB_COUNT, + SLEN(GLUSTERD_STORE_KEY_VOL_SUB_COUNT))) { + volinfo->sub_count = atoi(value); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_STRIPE_CNT, + SLEN(GLUSTERD_STORE_KEY_VOL_STRIPE_CNT))) { + volinfo->stripe_count = atoi(value); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_REPLICA_CNT, + SLEN(GLUSTERD_STORE_KEY_VOL_REPLICA_CNT))) { + volinfo->replica_count = atoi(value); + } else if (!strcmp(key, GLUSTERD_STORE_KEY_VOL_ARBITER_CNT)) { + volinfo->arbiter_count = atoi(value); + } else if (!strcmp(key, GLUSTERD_STORE_KEY_VOL_THIN_ARBITER_CNT)) { + volinfo->thin_arbiter_count = atoi(value); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DISPERSE_CNT, + SLEN(GLUSTERD_STORE_KEY_VOL_DISPERSE_CNT))) { + volinfo->disperse_count = atoi(value); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_REDUNDANCY_CNT, + SLEN(GLUSTERD_STORE_KEY_VOL_REDUNDANCY_CNT))) { + volinfo->redundancy_count = atoi(value); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_TRANSPORT, + SLEN(GLUSTERD_STORE_KEY_VOL_TRANSPORT))) { + volinfo->transport_type = atoi(value); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_ID, + SLEN(GLUSTERD_STORE_KEY_VOL_ID))) { + ret = gf_uuid_parse(value, volinfo->volume_id); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_UUID_PARSE_FAIL, + "failed to parse uuid"); + + } else if (!strncmp(key, GLUSTERD_STORE_KEY_USERNAME, + SLEN(GLUSTERD_STORE_KEY_USERNAME))) { + glusterd_auth_set_username(volinfo, value); + + } else if (!strncmp(key, GLUSTERD_STORE_KEY_PASSWORD, + SLEN(GLUSTERD_STORE_KEY_PASSWORD))) { + glusterd_auth_set_password(volinfo, value); + + } else if (strstr(key, "slave")) { + ret = dict_set_dynstr(volinfo->gsync_slaves, key, gf_strdup(value)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Error in " + "dict_set_str"); goto out; - } - - str = gf_strdup (scan_str); - if (!str) { - ret = -1; - store_errno = GD_STORE_ENOMEM; + } + gf_msg_debug(this->name, 0, + "Parsed as " GEOREP + " " + " slave:key=%s,value:%s", + key, value); + + } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_OP_VERSION, + SLEN(GLUSTERD_STORE_KEY_VOL_OP_VERSION))) { + volinfo->op_version = atoi(value); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION, + SLEN(GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION))) { + volinfo->client_op_version = atoi(value); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT, + SLEN(GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT))) { + volinfo->snap_max_hard_limit = (uint64_t)atoll(value); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_RESTORED_SNAP, + SLEN(GLUSTERD_STORE_KEY_VOL_RESTORED_SNAP))) { + ret = gf_uuid_parse(value, volinfo->restored_from_snap); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_UUID_PARSE_FAIL, + "failed to parse restored snap's uuid"); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_PARENT_VOLNAME, + SLEN(GLUSTERD_STORE_KEY_PARENT_VOLNAME))) { + if (snprintf(volinfo->parent_volname, + sizeof(volinfo->parent_volname), "%s", + value) >= sizeof(volinfo->parent_volname)) { + gf_msg("glusterd", GF_LOG_ERROR, op_errno, + GD_MSG_PARSE_BRICKINFO_FAIL, + "parent_volname truncated: %s", volinfo->parent_volname); goto out; + } + } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_QUOTA_VERSION, + SLEN(GLUSTERD_STORE_KEY_VOL_QUOTA_VERSION))) { + volinfo->quota_xattr_version = atoi(value); } else { - free_str = str; + if (is_key_glusterd_hooks_friendly(key)) { + exists = 1; + + } else { + exists = glusterd_check_option_exists(key, NULL); + } + + switch (exists) { + case -1: + ret = -1; + goto out; + + case 0: + /*Ignore GLUSTERD_STORE_KEY_VOL_BRICK since + glusterd_store_retrieve_bricks gets it later. + also, ignore tier-enabled key as we deprecated + tier xlator*/ + if (!strstr(key, GLUSTERD_STORE_KEY_VOL_BRICK) || + !strstr(key, GF_TIER_ENABLED)) + gf_msg(this->name, GF_LOG_WARNING, 0, + GD_MSG_UNKNOWN_KEY, "Unknown key: %s", key); + break; + + case 1: + /*The following strcmp check is to ensure that + * glusterd does not restore the quota limits + * into volinfo->dict post upgradation from 3.3 + * to 3.4 as the same limits will now be stored + * in xattrs on the respective directories. + */ + if (!strcmp(key, "features.limit-usage")) + break; + ret = dict_set_str(volinfo->dict, key, gf_strdup(value)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_DICT_SET_FAILED, + "Error in " + "dict_set_str"); + goto out; + } + gf_msg_debug(this->name, 0, + "Parsed as Volume-" + "set:key=%s,value:%s", + key, value); + break; + } } - iter_key = strtok (str, "="); - iter_val = strtok (NULL, "="); + GF_FREE(key); + GF_FREE(value); + key = NULL; + value = NULL; + + ret = gf_store_iter_get_next(iter, &key, &value, &op_errno); + } + + /* backward compatibility */ + { + switch (volinfo->type) { + case GF_CLUSTER_TYPE_NONE: + volinfo->stripe_count = 1; + volinfo->replica_count = 1; + break; + + case GF_CLUSTER_TYPE_REPLICATE: + volinfo->stripe_count = 1; + volinfo->replica_count = volinfo->sub_count; + break; + + case GF_CLUSTER_TYPE_DISPERSE: + GF_ASSERT(volinfo->disperse_count > 0); + GF_ASSERT(volinfo->redundancy_count > 0); + break; + + case GF_CLUSTER_TYPE_STRIPE: + case GF_CLUSTER_TYPE_STRIPE_REPLICATE: + gf_msg(this->name, GF_LOG_CRITICAL, ENOTSUP, + GD_MSG_VOLINFO_STORE_FAIL, + "The volume type is no more supported. Please refer to " + "glusterfs-6.0 release-notes for how to migrate from " + "this volume type"); + break; + + default: + GF_ASSERT(0); + break; + } - ret = glusterd_store_validate_key_value (iter->filepath, iter_key, - iter_val, &store_errno); - if (ret) - goto out; + volinfo->dist_leaf_count = glusterd_get_dist_leaf_count(volinfo); - *value = gf_strdup (iter_val); + volinfo->subvol_count = (volinfo->brick_count / + volinfo->dist_leaf_count); - *key = gf_strdup (iter_key); - if (!iter_key || !iter_val) { - ret = -1; - store_errno = GD_STORE_ENOMEM; - goto out; - } + /* Only calculate volume op-versions if they are not found */ + if (!volinfo->op_version && !volinfo->client_op_version) + gd_update_volume_op_versions(volinfo); + } - ret = 0; + if (op_errno != GD_STORE_EOF) + goto out; + + ret = 0; out: - if (ret) { - if (*key) { - GF_FREE (*key); - *key = NULL; - } - if (*value) { - GF_FREE (*value); - *value = NULL; - } - } - if (free_str) - GF_FREE (free_str); - if (op_errno) - *op_errno = store_errno; + if (gf_store_iter_destroy(&iter)) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL, + "Failed to destroy store iter"); + ret = -1; + } - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); - return ret; + return ret; } -int32_t -glusterd_store_iter_get_matching (glusterd_store_iter_t *iter, - char *key, char **value) +glusterd_volinfo_t * +glusterd_store_retrieve_volume(char *volname, glusterd_snap_t *snap) { - int32_t ret = -1; - char *tmp_key = NULL; - char *tmp_value = NULL; - - ret = glusterd_store_iter_get_next (iter, &tmp_key, &tmp_value, - NULL); - while (!ret) { - if (!strncmp (key, tmp_key, strlen (key))){ - *value = tmp_value; - GF_FREE (tmp_key); - goto out; - } - GF_FREE (tmp_key); - GF_FREE (tmp_value); - ret = glusterd_store_iter_get_next (iter, &tmp_key, - &tmp_value, NULL); + int32_t ret = -1; + glusterd_volinfo_t *volinfo = NULL; + glusterd_volinfo_t *origin_volinfo = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + GF_ASSERT(volname); + + ret = glusterd_volinfo_new(&volinfo); + if (ret) + goto out; + + if (snprintf(volinfo->volname, NAME_MAX + 1, "%s", volname) >= NAME_MAX + 1) + goto out; + volinfo->snapshot = snap; + if (snap) + volinfo->is_snap_volume = _gf_true; + + ret = glusterd_store_update_volinfo(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_UPDATE_FAIL, + "Failed to update volinfo " + "for %s volume", + volname); + goto out; + } + + ret = glusterd_store_retrieve_bricks(volinfo); + if (ret) + goto out; + + ret = glusterd_store_retrieve_snapd(volinfo); + if (ret) + goto out; + + ret = glusterd_compute_cksum(volinfo, _gf_false); + if (ret) + goto out; + + ret = glusterd_store_retrieve_quota_version(volinfo); + if (ret) + goto out; + + ret = glusterd_store_create_quota_conf_sh_on_absence(volinfo); + if (ret) + goto out; + + ret = glusterd_compute_cksum(volinfo, _gf_true); + if (ret) + goto out; + + ret = glusterd_store_save_quota_version_and_cksum(volinfo); + if (ret) + goto out; + + if (!snap) { + glusterd_list_add_order(&volinfo->vol_list, &priv->volumes, + glusterd_compare_volume_name); + + } else { + ret = glusterd_volinfo_find(volinfo->parent_volname, &origin_volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, + "Parent volinfo " + "not found for %s volume", + volname); + goto out; } + glusterd_list_add_snapvol(origin_volinfo, volinfo); + } + out: - return ret; -} + if (ret) { + if (volinfo) + glusterd_volinfo_unref(volinfo); + volinfo = NULL; + } -int32_t -glusterd_store_iter_destroy (glusterd_store_iter_t *iter) -{ - int32_t ret = -1; + gf_msg_trace(this->name, 0, "Returning with %d", ret); - GF_ASSERT (iter); - GF_ASSERT (iter->fd > 0); + return volinfo; +} - ret = fclose (iter->file); +static void +glusterd_store_set_options_path(glusterd_conf_t *conf, char *path, size_t len) +{ + snprintf(path, len, "%s/options", conf->workdir); +} +int32_t +glusterd_store_options(xlator_t *this, dict_t *opts) +{ + gf_store_handle_t *shandle = NULL; + glusterd_conf_t *conf = NULL; + char path[PATH_MAX] = {0}; + int fd = -1; + int32_t ret = -1; + glusterd_volinfo_data_store_t *dict_data = NULL; + + conf = this->private; + glusterd_store_set_options_path(conf, path, sizeof(path)); + + ret = gf_store_handle_new(path, &shandle); + if (ret) { + goto out; + } + + fd = gf_store_mkstemp(shandle); + if (fd <= 0) { + ret = -1; + goto out; + } + + dict_data = GF_CALLOC(1, sizeof(glusterd_volinfo_data_store_t), + gf_gld_mt_volinfo_dict_data_t); + if (dict_data == NULL) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_MEMORY, NULL); + return -1; + } + dict_data->shandle = shandle; + shandle->fd = fd; + dict_foreach(opts, _storeopts, (void *)dict_data); + if (dict_data->buffer_len > 0) { + ret = gf_store_save_items(fd, dict_data->buffer); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to close fd: %d, ret: %d, " - "errno: %d" ,iter->fd, ret, errno); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED, NULL); + goto out; } + } - GF_FREE (iter); - - return ret; + ret = gf_store_rename_tmppath(shandle); +out: + shandle->fd = 0; + GF_FREE(dict_data); + if ((ret < 0) && (fd > 0)) { + gf_store_unlink_tmppath(shandle); + } + gf_store_handle_destroy(shandle); + return ret; } -char* -glusterd_store_strerror (glusterd_store_op_errno_t op_errno) +int32_t +glusterd_store_retrieve_options(xlator_t *this) { - switch (op_errno) { - case GD_STORE_SUCCESS: - return "Success"; - case GD_STORE_KEY_NULL: - return "Invalid Key"; - case GD_STORE_VALUE_NULL: - return "Invalid Value"; - case GD_STORE_KEY_VALUE_NULL: - return "Invalid Key and Value"; - case GD_STORE_EOF: - return "No data"; - case GD_STORE_ENOMEM: - return "No memory"; - default: - return "Invalid errno"; + char path[PATH_MAX] = {0}; + glusterd_conf_t *conf = NULL; + gf_store_handle_t *shandle = NULL; + gf_store_iter_t *iter = NULL; + char *key = NULL; + char *value = NULL; + gf_store_op_errno_t op_errno = 0; + int ret = -1; + + conf = this->private; + glusterd_store_set_options_path(conf, path, sizeof(path)); + + ret = gf_store_handle_retrieve(path, &shandle); + if (ret) + goto out; + + ret = gf_store_iter_new(shandle, &iter); + if (ret) + goto out; + + ret = gf_store_iter_get_next(iter, &key, &value, &op_errno); + while (!ret) { + ret = dict_set_dynstr(conf->opts, key, value); + if (ret) { + GF_FREE(key); + GF_FREE(value); + goto out; } - return "Invalid errno"; + GF_FREE(key); + key = NULL; + value = NULL; + + ret = gf_store_iter_get_next(iter, &key, &value, &op_errno); + } + if (op_errno != GD_STORE_EOF) + goto out; + ret = 0; +out: + (void)gf_store_iter_destroy(&iter); + gf_store_handle_destroy(shandle); + return ret; } int32_t -glusterd_store_retrieve_bricks (glusterd_volinfo_t *volinfo) +glusterd_store_retrieve_volumes(xlator_t *this, glusterd_snap_t *snap) { + int32_t ret = -1; + char path[PATH_MAX] = { + 0, + }; + glusterd_conf_t *priv = NULL; + DIR *dir = NULL; + struct dirent *entry = NULL; + struct dirent scratch[2] = { + { + 0, + }, + }; + glusterd_volinfo_t *volinfo = NULL; + struct stat st = { + 0, + }; + char entry_path[PATH_MAX] = { + 0, + }; + int32_t len = 0; + + GF_ASSERT(this); + priv = this->private; + + GF_ASSERT(priv); + + if (snap) + len = snprintf(path, PATH_MAX, "%s/snaps/%s", priv->workdir, + snap->snapname); + else + len = snprintf(path, PATH_MAX, "%s/%s", priv->workdir, + GLUSTERD_VOLUME_DIR_PREFIX); + if ((len < 0) || (len >= PATH_MAX)) { + goto out; + } + + dir = sys_opendir(path); + + if (!dir) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED, + "Unable to open dir %s", path); + goto out; + } + + while ((entry = sys_readdir(dir, scratch))) { + if (gf_irrelevant_entry(entry)) + continue; + if (snap && ((!strcmp(entry->d_name, "geo-replication")) || + (!strcmp(entry->d_name, "info")))) + continue; + + len = snprintf(entry_path, PATH_MAX, "%s/%s", path, entry->d_name); + if ((len < 0) || (len >= PATH_MAX)) + continue; + + ret = sys_lstat(entry_path, &st); + if (ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY, + "Failed to stat entry %s : %s", path, strerror(errno)); + continue; + } - int32_t ret = 0; - glusterd_brickinfo_t *brickinfo = NULL; - glusterd_store_iter_t *iter = NULL; - char *key = NULL; - char *value = NULL; - char brickdir[PATH_MAX] = {0,}; - char path[PATH_MAX] = {0,}; - glusterd_conf_t *priv = NULL; - int32_t brick_count = 0; - char tmpkey[4096] = {0,}; - glusterd_store_iter_t *tmpiter = NULL; - char *tmpvalue = NULL; - struct pmap_registry *pmap = NULL; - glusterd_store_op_errno_t op_errno = GD_STORE_SUCCESS; - - GF_ASSERT (volinfo); - GF_ASSERT (volinfo->volname); - - priv = THIS->private; - - GLUSTERD_GET_BRICK_DIR (brickdir, volinfo, priv) + if (!S_ISDIR(st.st_mode)) { + gf_msg_debug(this->name, 0, "%s is not a valid volume", + entry->d_name); + continue; + } - ret = glusterd_store_iter_new (volinfo->shandle, &tmpiter); + volinfo = glusterd_store_retrieve_volume(entry->d_name, snap); + if (!volinfo) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_RESTORE_FAIL, + "Unable to restore " + "volume: %s", + entry->d_name); + ret = -1; + goto out; + } - if (ret) - goto out; + ret = glusterd_store_retrieve_node_state(volinfo); + if (ret) { + /* Backward compatibility */ + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_NEW_NODE_STATE_CREATION, + "Creating a new node_state " + "for volume: %s.", + entry->d_name); + glusterd_store_create_nodestate_sh_on_absence(volinfo); + glusterd_store_perform_node_state_store(volinfo); + } + } - while (brick_count < volinfo->brick_count) { - ret = glusterd_brickinfo_new (&brickinfo); + ret = 0; - if (ret) - goto out; - snprintf (tmpkey, sizeof (tmpkey), "%s-%d", - GLUSTERD_STORE_KEY_VOL_BRICK,brick_count); - ret = glusterd_store_iter_get_matching (tmpiter, tmpkey, - &tmpvalue); - snprintf (path, sizeof (path), "%s/%s", brickdir, tmpvalue); - - GF_FREE (tmpvalue); +out: + if (dir) + sys_closedir(dir); + gf_msg_debug(this->name, 0, "Returning with %d", ret); - tmpvalue = NULL; + return ret; +} - ret = glusterd_store_handle_retrieve (path, &brickinfo->shandle); +/* Figure out the brick mount path, from the brick path */ +int32_t +glusterd_find_brick_mount_path(char *brick_path, char **brick_mount_path) +{ + char *ptr = NULL; + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(brick_path); + GF_ASSERT(brick_mount_path); + + *brick_mount_path = gf_strdup(brick_path); + if (!*brick_mount_path) { + ret = -1; + goto out; + } + + /* Finding the pointer to the end of + * /var/run/gluster/snaps/<snap-uuid> + */ + ptr = strstr(*brick_mount_path, "brick"); + if (!ptr) { + /* Snapshot bricks must have brick num as part + * of the brickpath + */ + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, + "Invalid brick path(%s)", brick_path); + ret = -1; + goto out; + } + + /* Moving the pointer to the end of + * /var/run/gluster/snaps/<snap-uuid>/<brick_num> + * and assigning '\0' to it. + */ + while ((*ptr != '\0') && (*ptr != '/')) + ptr++; + + if (*ptr == '/') { + *ptr = '\0'; + } + + ret = 0; +out: + if (ret && *brick_mount_path) { + GF_FREE(*brick_mount_path); + *brick_mount_path = NULL; + } + gf_msg_trace(this->name, 0, "Returning with %d", ret); + return ret; +} - if (ret) - goto out; +/* Check if brick_mount_path is already mounted. If not, mount the device_path + * at the brick_mount_path + */ +int32_t +glusterd_mount_brick_paths(char *brick_mount_path, + glusterd_brickinfo_t *brickinfo) +{ + int32_t ret = -1; + runner_t runner = { + 0, + }; + char buff[PATH_MAX] = { + 0, + }; + struct mntent save_entry = { + 0, + }; + struct mntent *entry = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(brick_mount_path); + GF_ASSERT(brickinfo); + + priv = this->private; + GF_ASSERT(priv); + + /* Check if the brick_mount_path is already mounted */ + entry = glusterd_get_mnt_entry_info(brick_mount_path, buff, sizeof(buff), + &save_entry); + if (entry) { + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_ALREADY_MOUNTED, + "brick_mount_path (%s) already mounted.", brick_mount_path); + ret = 0; + goto out; + } + + /* TODO RHEL 6.5 has the logical volumes inactive by default + * on reboot. Hence activating the logical vol. Check behaviour + * on other systems + */ + /* Activate the snapshot */ + runinit(&runner); + runner_add_args(&runner, "lvchange", "-ay", brickinfo->device_path, NULL); + ret = runner_run(&runner); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_SNAP_ACTIVATE_FAIL, + "Failed to activate %s.", brickinfo->device_path); + goto out; + } else + gf_msg_debug(this->name, 0, "Activating %s successful", + brickinfo->device_path); + + /* Mount the snapshot */ + ret = glusterd_mount_lvm_snapshot(brickinfo, brick_mount_path); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_MOUNT_FAIL, + "Failed to mount lvm snapshot."); + goto out; + } - ret = glusterd_store_iter_new (brickinfo->shandle, &iter); +out: + gf_msg_trace(this->name, 0, "Returning with %d", ret); + return ret; +} - if (ret) - goto out; +int32_t +glusterd_recreate_vol_brick_mounts(xlator_t *this, glusterd_volinfo_t *volinfo) +{ + char *brick_mount_path = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + int32_t ret = -1; + struct stat st_buf = { + 0, + }; + char abspath[PATH_MAX] = {0}; + + GF_ASSERT(this); + GF_ASSERT(volinfo); + + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + /* If the brick is not of this node, or its + * snapshot is pending, or the brick is not + * a snapshotted brick, we continue + */ + if ((gf_uuid_compare(brickinfo->uuid, MY_UUID)) || + (brickinfo->snap_status == -1) || + (strlen(brickinfo->device_path) == 0)) + continue; + + /* Fetch the brick mount path from the brickinfo->path */ + ret = glusterd_find_brick_mount_path(brickinfo->path, + &brick_mount_path); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRK_MNTPATH_GET_FAIL, + "Failed to find brick_mount_path for %s", brickinfo->path); + goto out; + } - ret = glusterd_store_iter_get_next (iter, &key, &value, - &op_errno); + /* Check if the brickinfo path is present. + * If not create the brick_mount_path */ + ret = sys_lstat(brickinfo->path, &st_buf); + if (ret) { + if (errno == ENOENT) { + ret = mkdir_p(brick_mount_path, 0755, _gf_true); if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, "Unable to iterate " - "the store for brick: %s, reason: %s", path, - glusterd_store_strerror (op_errno)); - goto out; - } - while (!ret) { - if (!strncmp (key, GLUSTERD_STORE_KEY_BRICK_HOSTNAME, - strlen (GLUSTERD_STORE_KEY_BRICK_HOSTNAME))) { - strncpy (brickinfo->hostname, value, 1024); - } else if (!strncmp (key, GLUSTERD_STORE_KEY_BRICK_PATH, - strlen (GLUSTERD_STORE_KEY_BRICK_PATH))) { - strncpy (brickinfo->path, value, - sizeof (brickinfo->path)); - } else if (!strncmp (key, GLUSTERD_STORE_KEY_BRICK_PORT, - strlen (GLUSTERD_STORE_KEY_BRICK_PORT))) { - gf_string2int (value, &brickinfo->port); - /* This is required to have proper ports - assigned to bricks after restart */ - pmap = pmap_registry_get (THIS); - if (pmap->last_alloc <= brickinfo->port) - pmap->last_alloc = brickinfo->port + 1; - } else { - gf_log ("", GF_LOG_ERROR, "Unknown key: %s", - key); - } - - GF_FREE (key); - GF_FREE (value); - key = NULL; - value = NULL; - - ret = glusterd_store_iter_get_next (iter, &key, &value, - &op_errno); + gf_msg(this->name, GF_LOG_ERROR, errno, + GD_MSG_CREATE_DIR_FAILED, "Failed to create %s. ", + brick_mount_path); + goto out; } + } else { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "Brick Path(%s) not valid. ", brickinfo->path); + goto out; + } + } - if (op_errno != GD_STORE_EOF) - goto out; - ret = glusterd_store_iter_destroy (iter); - - if (ret) - goto out; + /* Check if brick_mount_path is already mounted. + * If not, mount the device_path at the brick_mount_path */ + ret = glusterd_mount_brick_paths(brick_mount_path, brickinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRK_MNTPATH_MOUNT_FAIL, + "Failed to mount brick_mount_path"); + } + if (!gf_uuid_compare(brickinfo->uuid, MY_UUID)) { + if (brickinfo->real_path[0] == '\0') { + if (!realpath(brickinfo->path, abspath)) { + gf_msg(this->name, GF_LOG_CRITICAL, errno, + GD_MSG_BRICKINFO_CREATE_FAIL, + "realpath() failed for brick %s" + ". The underlying file system " + "may be in bad state", + brickinfo->path); + ret = -1; + goto out; + } + if (strlen(abspath) >= sizeof(brickinfo->real_path)) { + ret = -1; + goto out; + } + (void)strncpy(brickinfo->real_path, abspath, + sizeof(brickinfo->real_path)); + } + } - list_add_tail (&brickinfo->brick_list, &volinfo->bricks); - brick_count++; + if (brick_mount_path) { + GF_FREE(brick_mount_path); + brick_mount_path = NULL; } + } - ret = glusterd_store_iter_destroy (tmpiter); - if (ret) - goto out; + ret = 0; out: - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + if (ret && brick_mount_path) + GF_FREE(brick_mount_path); - return ret; + gf_msg_trace(this->name, 0, "Returning with %d", ret); + return ret; } - int32_t -glusterd_store_retrieve_volume (char *volname) +glusterd_resolve_snap_bricks(xlator_t *this, glusterd_snap_t *snap) { - int32_t ret = -1; - glusterd_volinfo_t *volinfo = NULL; - glusterd_store_iter_t *iter = NULL; - char *key = NULL; - char *value = NULL; - char volpath[PATH_MAX] = {0,}; - glusterd_conf_t *priv = NULL; - char path[PATH_MAX] = {0,}; - int exists = 0; - glusterd_store_op_errno_t op_errno = GD_STORE_SUCCESS; + int32_t ret = -1; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + + GF_ASSERT(this); + GF_VALIDATE_OR_GOTO(this->name, snap, out); + + cds_list_for_each_entry(volinfo, &snap->volumes, vol_list) + { + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + ret = glusterd_resolve_brick(brickinfo); + if (ret) { + gf_event(EVENT_BRICKPATH_RESOLVE_FAILED, + "peer=%s;volume=%s;brick=%s", brickinfo->hostname, + volinfo->volname, brickinfo->path); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RESOLVE_BRICK_FAIL, + "resolve brick failed in restore"); + goto out; + } + } + } - ret = glusterd_volinfo_new (&volinfo); + ret = 0; - if (ret) - goto out; +out: + gf_msg_trace(this->name, 0, "Returning with %d", ret); - strncpy (volinfo->volname, volname, GLUSTERD_MAX_VOLUME_NAME); + return ret; +} - priv = THIS->private; +int +glusterd_store_update_snap(glusterd_snap_t *snap) +{ + int ret = -1; + char *key = NULL; + char *value = NULL; + char snappath[PATH_MAX] = { + 0, + }; + char path[PATH_MAX] = { + 0, + }; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + gf_store_iter_t *iter = NULL; + gf_store_op_errno_t op_errno = GD_STORE_SUCCESS; + int32_t len = 0; + + this = THIS; + conf = this->private; + GF_ASSERT(snap); + + GLUSTERD_GET_SNAP_DIR(snappath, snap, conf); + + len = snprintf(path, sizeof(path), "%s/%s", snappath, + GLUSTERD_SNAP_INFO_FILE); + if ((len < 0) || (len >= sizeof(path))) { + goto out; + } + + ret = gf_store_handle_retrieve(path, &snap->shandle); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_HANDLE_NULL, + "snap handle is NULL"); + goto out; + } + + ret = gf_store_iter_new(snap->shandle, &iter); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_GET_FAIL, + "Failed to get new store " + "iter"); + goto out; + } + + ret = gf_store_iter_get_next(iter, &key, &value, &op_errno); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_GET_FAIL, + "Failed to get next store " + "iter"); + goto out; + } + + while (!ret) { + gf_msg_debug(this->name, 0, "key = %s value = %s", key, value); + + if (!strncmp(key, GLUSTERD_STORE_KEY_SNAP_ID, + SLEN(GLUSTERD_STORE_KEY_SNAP_ID))) { + ret = gf_uuid_parse(value, snap->snap_id); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_UUID_PARSE_FAIL, + "Failed to parse uuid"); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_SNAP_RESTORED, + SLEN(GLUSTERD_STORE_KEY_SNAP_RESTORED))) { + snap->snap_restored = atoi(value); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_SNAP_STATUS, + SLEN(GLUSTERD_STORE_KEY_SNAP_STATUS))) { + snap->snap_status = atoi(value); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_SNAP_DESC, + SLEN(GLUSTERD_STORE_KEY_SNAP_DESC))) { + snap->description = gf_strdup(value); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_SNAP_TIMESTAMP, + SLEN(GLUSTERD_STORE_KEY_SNAP_TIMESTAMP))) { + snap->time_stamp = atoi(value); + } - GLUSTERD_GET_VOLUME_DIR(volpath, volinfo, priv); - snprintf (path, sizeof (path), "%s/%s", volpath, - GLUSTERD_VOLUME_INFO_FILE); + GF_FREE(key); + GF_FREE(value); + key = NULL; + value = NULL; - ret = glusterd_store_handle_retrieve (path, &volinfo->shandle); + ret = gf_store_iter_get_next(iter, &key, &value, &op_errno); + } - if (ret) - goto out; + if (op_errno != GD_STORE_EOF) + goto out; - ret = glusterd_store_iter_new (volinfo->shandle, &iter); + ret = 0; - if (ret) - goto out; +out: + if (gf_store_iter_destroy(&iter)) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL, + "Failed to destroy store iter"); + ret = -1; + } - ret = glusterd_store_iter_get_next (iter, &key, &value, &op_errno); - if (ret) - goto out; + return ret; +} - while (!ret) { - if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_TYPE, - strlen (GLUSTERD_STORE_KEY_VOL_TYPE))) { - volinfo->type = atoi (value); - } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_COUNT, - strlen (GLUSTERD_STORE_KEY_VOL_COUNT))) { - volinfo->brick_count = atoi (value); - } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_STATUS, - strlen (GLUSTERD_STORE_KEY_VOL_STATUS))) { - volinfo->status = atoi (value); - } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_VERSION, - strlen (GLUSTERD_STORE_KEY_VOL_VERSION))) { - volinfo->version = atoi (value); - } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_PORT, - strlen (GLUSTERD_STORE_KEY_VOL_PORT))) { - volinfo->port = atoi (value); - } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_SUB_COUNT, - strlen (GLUSTERD_STORE_KEY_VOL_SUB_COUNT))) { - volinfo->sub_count = atoi (value); - } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_TRANSPORT, - strlen (GLUSTERD_STORE_KEY_VOL_TRANSPORT))) { - volinfo->transport_type = atoi (value); - } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_ID, - strlen (GLUSTERD_STORE_KEY_VOL_ID))) { - ret = uuid_parse (value, volinfo->volume_id); - if (ret) - gf_log ("", GF_LOG_WARNING, - "failed to parse uuid"); - } else { - exists = glusterd_check_option_exists (key, NULL); - if (exists == -1) { - ret = -1; - goto out; - } - if (exists) { - ret = dict_set_str(volinfo->dict, key, - gf_strdup (value)); - if (ret) { - gf_log ("",GF_LOG_ERROR, "Error in " - "dict_set_str"); - goto out; - } - gf_log ("", GF_LOG_DEBUG, "Parsed as Volume-" - "set:key=%s,value:%s", - key, value); - } - else - gf_log ("", GF_LOG_ERROR, "Unknown key: %s", - key); - } +int32_t +glusterd_store_retrieve_snap(char *snapname) +{ + int32_t ret = -1; + glusterd_snap_t *snap = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + this = THIS; + priv = this->private; + GF_ASSERT(priv); + GF_ASSERT(snapname); + + snap = glusterd_new_snap_object(); + if (!snap) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_OBJECT_STORE_FAIL, + "Failed to create " + " snap object"); + goto out; + } + + if (snprintf(snap->snapname, sizeof(snap->snapname), "%s", snapname) >= + sizeof(snap->snapname)) + goto out; + ret = glusterd_store_update_snap(snap); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPSHOT_UPDATE_FAIL, + "Failed to update snapshot " + "for %s snap", + snapname); + goto out; + } + + ret = glusterd_store_retrieve_volumes(this, snap); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_VOL_RETRIEVE_FAIL, + "Failed to retrieve " + "snap volumes for snap %s", + snapname); + goto out; + } + + /* TODO: list_add_order can do 'N-square' comparisons and + is not efficient. Find a better solution to store the snap + in order */ + glusterd_list_add_order(&snap->snap_list, &priv->snapshots, + glusterd_compare_snap_time); - GF_FREE (key); - GF_FREE (value); - key = NULL; - value = NULL; +out: + gf_msg_trace(this->name, 0, "Returning with %d", ret); + return ret; +} - ret = glusterd_store_iter_get_next (iter, &key, &value, - &op_errno); +/* Read the missed_snap_list and update the in-memory structs */ +int32_t +glusterd_store_retrieve_missed_snaps_list(xlator_t *this) +{ + char path[PATH_MAX] = ""; + char *snap_vol_id = NULL; + char *missed_node_info = NULL; + char *brick_path = NULL; + char *value = NULL; + char *save_ptr = NULL; + FILE *fp = NULL; + int32_t brick_num = -1; + int32_t snap_op = -1; + int32_t snap_status = -1; + int32_t ret = -1; + glusterd_conf_t *priv = NULL; + gf_store_op_errno_t store_errno = GD_STORE_SUCCESS; + + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + /* Get the path of the missed_snap_list */ + glusterd_store_missed_snaps_list_path_set(path, sizeof(path)); + + fp = fopen(path, "r"); + if (!fp) { + /* If errno is ENOENT then there are no missed snaps yet */ + if (errno != ENOENT) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "Failed to open %s. ", path); + } else { + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_MISSED_SNAP_LIST_EMPTY, + "No missed snaps list."); + ret = 0; } - if (op_errno != GD_STORE_EOF) - goto out; - - ret = glusterd_store_iter_destroy (iter); + goto out; + } - if (ret) - goto out; + do { + ret = gf_store_read_and_tokenize(fp, &missed_node_info, &value, + &store_errno); + if (ret) { + if (store_errno == GD_STORE_EOF) { + gf_msg_debug(this->name, 0, "EOF for missed_snap_list"); + ret = 0; + break; + } + gf_msg(this->name, GF_LOG_ERROR, store_errno, + GD_MSG_MISSED_SNAP_GET_FAIL, + "Failed to fetch data from " + "missed_snaps_list."); + goto out; + } - ret = glusterd_store_retrieve_bricks (volinfo); - if (ret) - goto out; + /* Fetch the brick_num, brick_path, snap_op and snap status */ + snap_vol_id = strtok_r(value, ":", &save_ptr); + brick_num = atoi(strtok_r(NULL, ":", &save_ptr)); + brick_path = strtok_r(NULL, ":", &save_ptr); + snap_op = atoi(strtok_r(NULL, ":", &save_ptr)); + snap_status = atoi(strtok_r(NULL, ":", &save_ptr)); + + if (!missed_node_info || !brick_path || !snap_vol_id || brick_num < 1 || + snap_op < 1 || snap_status < 1) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, + GD_MSG_INVALID_MISSED_SNAP_ENTRY, + "Invalid missed_snap_entry"); + ret = -1; + goto out; + } - ret = glusterd_volume_compute_cksum (volinfo); - if (ret) - goto out; + ret = glusterd_add_new_entry_to_list(missed_node_info, snap_vol_id, + brick_num, brick_path, snap_op, + snap_status); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_MISSED_SNAP_LIST_STORE_FAIL, + "Failed to store missed snaps_list"); + goto out; + } - list_add_tail (&volinfo->vol_list, &priv->volumes); + } while (store_errno == GD_STORE_SUCCESS); + ret = 0; out: - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + if (fp) + fclose(fp); - return ret; + gf_msg_trace(this->name, 0, "Returning with %d", ret); + return ret; } - int32_t -glusterd_store_retrieve_volumes (xlator_t *this) +glusterd_store_retrieve_snaps(xlator_t *this) { - int32_t ret = 0; - char path[PATH_MAX] = {0,}; - glusterd_conf_t *priv = NULL; - DIR *dir = NULL; - struct dirent *entry = NULL; - - GF_ASSERT (this); - priv = this->private; - - GF_ASSERT (priv); - - snprintf (path, PATH_MAX, "%s/%s", priv->workdir, - GLUSTERD_VOLUME_DIR_PREFIX); - - dir = opendir (path); - - if (!dir) { - gf_log ("", GF_LOG_ERROR, "Unable to open dir %s", path); - ret = -1; + int32_t ret = 0; + char path[PATH_MAX] = { + 0, + }; + glusterd_conf_t *priv = NULL; + DIR *dir = NULL; + struct dirent *entry = NULL; + struct dirent scratch[2] = { + { + 0, + }, + }; + int32_t len = 0; + + GF_ASSERT(this); + priv = this->private; + + GF_ASSERT(priv); + + len = snprintf(path, PATH_MAX, "%s/snaps", priv->workdir); + if ((len < 0) || (len >= PATH_MAX)) { + ret = -1; + goto out; + } + + dir = sys_opendir(path); + + if (!dir) { + /* If snaps dir doesn't exists ignore the error for + backward compatibility */ + if (errno != ENOENT) { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED, + "Unable to open dir %s", path); + } + goto out; + } + + while ((entry = sys_readdir(dir, scratch))) { + if (gf_irrelevant_entry(entry)) + continue; + if (strcmp(entry->d_name, GLUSTERD_MISSED_SNAPS_LIST_FILE)) { + ret = glusterd_store_retrieve_snap(entry->d_name); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_RESTORE_FAIL, + "Unable to restore snapshot: %s", entry->d_name); goto out; + } } + } - glusterd_for_each_entry (entry, dir); - - while (entry) { - ret = glusterd_store_retrieve_volume (entry->d_name); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to restore " - "volume: %s", entry->d_name); - goto out; - } - glusterd_for_each_entry (entry, dir); - } + /* Retrieve missed_snaps_list */ + ret = glusterd_store_retrieve_missed_snaps_list(this); + if (ret) { + gf_msg_debug(this->name, 0, "Failed to retrieve missed_snaps_list"); + goto out; + } out: - if (dir) - closedir (dir); - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + if (dir) + sys_closedir(dir); + gf_msg_debug(this->name, 0, "Returning with %d", ret); - return ret; + return ret; } +/* Writes all the contents of conf->missed_snap_list */ int32_t -glusterd_store_delete_peerinfo (glusterd_peerinfo_t *peerinfo) +glusterd_store_write_missed_snapinfo(int32_t fd) { - int32_t ret = -1; - glusterd_conf_t *priv = NULL; - char peerdir[PATH_MAX] = {0,}; - char filepath[PATH_MAX] = {0,}; - char hostname_path[PATH_MAX] = {0,}; - - - if (!peerinfo) { - ret = 0; + char key[(UUID_SIZE * 2) + 2]; + char value[PATH_MAX]; + int32_t ret = -1; + glusterd_conf_t *priv = NULL; + glusterd_missed_snap_info *missed_snapinfo = NULL; + glusterd_snap_op_t *snap_opinfo = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + GF_ASSERT(priv); + + /* Write the missed_snap_entry */ + cds_list_for_each_entry(missed_snapinfo, &priv->missed_snaps_list, + missed_snaps) + { + cds_list_for_each_entry(snap_opinfo, &missed_snapinfo->snap_ops, + snap_ops_list) + { + snprintf(key, sizeof(key), "%s:%s", missed_snapinfo->node_uuid, + missed_snapinfo->snap_uuid); + snprintf(value, sizeof(value), "%s:%d:%s:%d:%d", + snap_opinfo->snap_vol_id, snap_opinfo->brick_num, + snap_opinfo->brick_path, snap_opinfo->op, + snap_opinfo->status); + ret = gf_store_save_value(fd, key, value); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_MISSEDSNAP_INFO_SET_FAIL, + "Failed to write missed snapinfo"); goto out; + } } + } - priv = THIS->private; - - snprintf (peerdir, PATH_MAX, "%s/peers", priv->workdir); + ret = 0; +out: + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} +/* Adds the missed snap entries to the in-memory conf->missed_snap_list * + * and writes them to disk */ +int32_t +glusterd_store_update_missed_snaps() +{ + int32_t fd = -1; + int32_t ret = -1; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + GF_ASSERT(priv); + + ret = glusterd_store_create_missed_snaps_list_shandle_on_absence(); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_MISSED_SNAP_LIST_STORE_HANDLE_GET_FAIL, + "Unable to obtain " + "missed_snaps_list store handle."); + goto out; + } + + fd = gf_store_mkstemp(priv->missed_snaps_list_shandle); + if (fd <= 0) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "Failed to create tmp file"); + ret = -1; + goto out; + } + + ret = glusterd_store_write_missed_snapinfo(fd); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MISSED_SNAP_CREATE_FAIL, + "Failed to write missed snaps to disk"); + goto out; + } + + ret = gf_store_rename_tmppath(priv->missed_snaps_list_shandle); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "Failed to rename the tmp file"); + goto out; + } +out: + if (ret && (fd > 0)) { + ret = gf_store_unlink_tmppath(priv->missed_snaps_list_shandle); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TMP_FILE_UNLINK_FAIL, + "Failed to unlink the tmp file"); + } + ret = -1; + } - if (uuid_is_null (peerinfo->uuid)) { + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} - if (peerinfo->hostname) { - snprintf (filepath, PATH_MAX, "%s/%s", peerdir, - peerinfo->hostname); - } else { - ret = 0; - goto out; - } +int32_t +glusterd_store_delete_peerinfo(glusterd_peerinfo_t *peerinfo) +{ + int32_t ret = -1; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + char peerdir[PATH_MAX] = { + 0, + }; + char filepath[PATH_MAX] = { + 0, + }; + char hostname_path[PATH_MAX] = { + 0, + }; + int32_t len = 0; + + if (!peerinfo) { + ret = 0; + goto out; + } + + this = THIS; + priv = this->private; + + len = snprintf(peerdir, PATH_MAX, "%s/peers", priv->workdir); + if ((len < 0) || (len >= PATH_MAX)) { + goto out; + } + + if (gf_uuid_is_null(peerinfo->uuid)) { + if (peerinfo->hostname) { + len = snprintf(filepath, PATH_MAX, "%s/%s", peerdir, + peerinfo->hostname); + if ((len < 0) || (len >= PATH_MAX)) { + goto out; + } } else { + ret = 0; + goto out; + } + } else { + len = snprintf(filepath, PATH_MAX, "%s/%s", peerdir, + uuid_utoa(peerinfo->uuid)); + if ((len < 0) || (len >= PATH_MAX)) { + goto out; + } + len = snprintf(hostname_path, PATH_MAX, "%s/%s", peerdir, + peerinfo->hostname); + if ((len < 0) || (len >= PATH_MAX)) { + goto out; + } - snprintf (filepath, PATH_MAX, "%s/%s", peerdir, - uuid_utoa (peerinfo->uuid)); - snprintf (hostname_path, PATH_MAX, "%s/%s", - peerdir, peerinfo->hostname); - - ret = unlink (hostname_path); + ret = sys_unlink(hostname_path); - if (!ret) - goto out; - } + if (!ret) + goto out; + } - ret = unlink (filepath); - if (ret && (errno == ENOENT)) - ret = 0; + ret = sys_unlink(filepath); + if (ret && (errno == ENOENT)) + ret = 0; out: - if (peerinfo->shandle) { - glusterd_store_handle_destroy (peerinfo->shandle); - peerinfo->shandle = NULL; - } - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + if (peerinfo && peerinfo->shandle) { + gf_store_handle_destroy(peerinfo->shandle); + peerinfo->shandle = NULL; + } + gf_msg_debug((this ? this->name : "glusterd"), 0, "Returning with %d", ret); - return ret; + return ret; } void -glusterd_store_peerinfo_dirpath_set (char *path, size_t len) +glusterd_store_peerinfo_dirpath_set(char *path, size_t len) { - glusterd_conf_t *priv = NULL; - GF_ASSERT (path); - GF_ASSERT (len >= PATH_MAX); + glusterd_conf_t *priv = NULL; + GF_ASSERT(path); + GF_ASSERT(len >= PATH_MAX); - priv = THIS->private; - snprintf (path, len, "%s/peers", priv->workdir); + priv = THIS->private; + snprintf(path, len, "%s/peers", priv->workdir); } int32_t -glusterd_store_create_peer_dir () +glusterd_store_create_peer_dir() { - int32_t ret = 0; - char path[PATH_MAX]; + int32_t ret = 0; + char path[PATH_MAX]; - glusterd_store_peerinfo_dirpath_set (path, sizeof (path)); - ret = glusterd_store_mkdir (path); + glusterd_store_peerinfo_dirpath_set(path, sizeof(path)); + ret = gf_store_mkdir(path); - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); - return ret; + gf_msg_debug("glusterd", 0, "Returning with %d", ret); + return ret; } static void -glusterd_store_uuid_peerpath_set (glusterd_peerinfo_t *peerinfo, char *peerfpath, - size_t len) +glusterd_store_uuid_peerpath_set(glusterd_peerinfo_t *peerinfo, char *peerfpath, + size_t len) { - char peerdir[PATH_MAX]; - char str[50] = {0}; + char peerdir[PATH_MAX]; + char str[50] = {0}; - GF_ASSERT (peerinfo); - GF_ASSERT (peerfpath); - GF_ASSERT (len >= PATH_MAX); + GF_ASSERT(peerinfo); + GF_ASSERT(peerfpath); + GF_ASSERT(len >= PATH_MAX); - glusterd_store_peerinfo_dirpath_set (peerdir, sizeof (peerdir)); - uuid_unparse (peerinfo->uuid, str); - snprintf (peerfpath, len, "%s/%s", peerdir, str); + glusterd_store_peerinfo_dirpath_set(peerdir, sizeof(peerdir)); + gf_uuid_unparse(peerinfo->uuid, str); + snprintf(peerfpath, len, "%s/%s", peerdir, str); } static void -glusterd_store_hostname_peerpath_set (glusterd_peerinfo_t *peerinfo, - char *peerfpath, size_t len) +glusterd_store_hostname_peerpath_set(glusterd_peerinfo_t *peerinfo, + char *peerfpath, size_t len) { - char peerdir[PATH_MAX]; + char peerdir[PATH_MAX]; - GF_ASSERT (peerinfo); - GF_ASSERT (peerfpath); - GF_ASSERT (len >= PATH_MAX); + GF_ASSERT(peerinfo); + GF_ASSERT(peerfpath); + GF_ASSERT(len >= PATH_MAX); - glusterd_store_peerinfo_dirpath_set (peerdir, sizeof (peerdir)); - snprintf (peerfpath, len, "%s/%s", peerdir, peerinfo->hostname); + glusterd_store_peerinfo_dirpath_set(peerdir, sizeof(peerdir)); + snprintf(peerfpath, len, "%s/%s", peerdir, peerinfo->hostname); } int32_t -glusterd_store_peerinfo_hostname_shandle_create (glusterd_peerinfo_t *peerinfo) +glusterd_store_peerinfo_hostname_shandle_create(glusterd_peerinfo_t *peerinfo) { - char peerfpath[PATH_MAX]; - int32_t ret = -1; + char peerfpath[PATH_MAX]; + int32_t ret = -1; - glusterd_store_hostname_peerpath_set (peerinfo, peerfpath, - sizeof (peerfpath)); - ret = glusterd_store_handle_create_on_absence (&peerinfo->shandle, - peerfpath); - return ret; + glusterd_store_hostname_peerpath_set(peerinfo, peerfpath, + sizeof(peerfpath)); + ret = gf_store_handle_create_on_absence(&peerinfo->shandle, peerfpath); + return ret; } int32_t -glusterd_store_peerinfo_uuid_shandle_create (glusterd_peerinfo_t *peerinfo) +glusterd_store_peerinfo_uuid_shandle_create(glusterd_peerinfo_t *peerinfo) { - char peerfpath[PATH_MAX]; - int32_t ret = -1; + char peerfpath[PATH_MAX]; + int32_t ret = -1; - glusterd_store_uuid_peerpath_set (peerinfo, peerfpath, - sizeof (peerfpath)); - ret = glusterd_store_handle_create_on_absence (&peerinfo->shandle, - peerfpath); - return ret; + glusterd_store_uuid_peerpath_set(peerinfo, peerfpath, sizeof(peerfpath)); + ret = gf_store_handle_create_on_absence(&peerinfo->shandle, peerfpath); + return ret; } int32_t -glusterd_peerinfo_hostname_shandle_check_destroy (glusterd_peerinfo_t *peerinfo) +glusterd_peerinfo_hostname_shandle_check_destroy(glusterd_peerinfo_t *peerinfo) { - char peerfpath[PATH_MAX]; - int32_t ret = -1; - struct stat stbuf = {0,}; - - glusterd_store_hostname_peerpath_set (peerinfo, peerfpath, - sizeof (peerfpath)); - ret = stat (peerfpath, &stbuf); - if (!ret) { - if (peerinfo->shandle) - glusterd_store_handle_destroy (peerinfo->shandle); - peerinfo->shandle = NULL; - ret = unlink (peerfpath); - } - return ret; + char peerfpath[PATH_MAX]; + int32_t ret = -1; + struct stat stbuf = { + 0, + }; + + glusterd_store_hostname_peerpath_set(peerinfo, peerfpath, + sizeof(peerfpath)); + ret = sys_stat(peerfpath, &stbuf); + if (!ret) { + if (peerinfo->shandle) + gf_store_handle_destroy(peerinfo->shandle); + peerinfo->shandle = NULL; + ret = sys_unlink(peerfpath); + } + return ret; } int32_t -glusterd_store_create_peer_shandle (glusterd_peerinfo_t *peerinfo) +glusterd_store_create_peer_shandle(glusterd_peerinfo_t *peerinfo) { - int32_t ret = 0; + int32_t ret = 0; - GF_ASSERT (peerinfo); + GF_ASSERT(peerinfo); - if (glusterd_peerinfo_is_uuid_unknown (peerinfo)) { - ret = glusterd_store_peerinfo_hostname_shandle_create (peerinfo); - } else { - ret = glusterd_peerinfo_hostname_shandle_check_destroy (peerinfo); - ret = glusterd_store_peerinfo_uuid_shandle_create (peerinfo); + if (gf_uuid_is_null(peerinfo->uuid)) { + ret = glusterd_store_peerinfo_hostname_shandle_create(peerinfo); + } else { + ret = glusterd_peerinfo_hostname_shandle_check_destroy(peerinfo); + ret = glusterd_store_peerinfo_uuid_shandle_create(peerinfo); + } + return ret; +} + +static int32_t +glusterd_store_peer_write(int fd, glusterd_peerinfo_t *peerinfo) +{ + char buf[PATH_MAX]; + uint total_len = 0; + int32_t ret = 0; + int32_t i = 1; + glusterd_peer_hostname_t *hostname = NULL; + + ret = snprintf(buf + total_len, sizeof(buf) - total_len, "%s=%s\n%s=%d\n", + GLUSTERD_STORE_KEY_PEER_UUID, uuid_utoa(peerinfo->uuid), + GLUSTERD_STORE_KEY_PEER_STATE, peerinfo->state.state); + if (ret < 0 || ret >= sizeof(buf) - total_len) { + ret = -1; + goto out; + } + total_len += ret; + + cds_list_for_each_entry(hostname, &peerinfo->hostnames, hostname_list) + { + ret = snprintf(buf + total_len, sizeof(buf) - total_len, + GLUSTERD_STORE_KEY_PEER_HOSTNAME "%d=%s\n", i, + hostname->hostname); + if (ret < 0 || ret >= sizeof(buf) - total_len) { + ret = -1; + goto out; } - return ret; + total_len += ret; + i++; + } + + ret = gf_store_save_items(fd, buf); +out: + gf_msg_debug("glusterd", 0, "Returning with %d", ret); + return ret; } int32_t -glusterd_store_peer_write (int fd, glusterd_peerinfo_t *peerinfo) +glusterd_store_perform_peer_store(glusterd_peerinfo_t *peerinfo) { - char buf[50] = {0}; - int32_t ret = 0; + int fd = -1; + int32_t ret = -1; - ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_PEER_UUID, - uuid_utoa (peerinfo->uuid)); - if (ret) - goto out; + GF_ASSERT(peerinfo); - snprintf (buf, sizeof (buf), "%d", peerinfo->state.state); - ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_PEER_STATE, buf); - if (ret) - goto out; + fd = gf_store_mkstemp(peerinfo->shandle); + if (fd <= 0) { + ret = -1; + goto out; + } - ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_PEER_HOSTNAME "1", - peerinfo->hostname); + ret = glusterd_store_peer_write(fd, peerinfo); + if (ret) + goto out; + + ret = gf_store_rename_tmppath(peerinfo->shandle); out: - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); - return ret; + if (ret && (fd > 0)) + gf_store_unlink_tmppath(peerinfo->shandle); + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; } int32_t -glusterd_store_perform_peer_store (glusterd_peerinfo_t *peerinfo) +glusterd_store_peerinfo(glusterd_peerinfo_t *peerinfo) { - int fd = -1; - int32_t ret = -1; + int32_t ret = -1; - GF_ASSERT (peerinfo); + GF_ASSERT(peerinfo); - fd = glusterd_store_mkstemp (peerinfo->shandle); - if (fd <= 0) { - ret = -1; - goto out; - } + ret = glusterd_store_create_peer_dir(); + if (ret) + goto out; - ret = glusterd_store_peer_write (fd, peerinfo); - if (ret) - goto out; + ret = glusterd_store_create_peer_shandle(peerinfo); + if (ret) + goto out; - ret = glusterd_store_rename_tmppath (peerinfo->shandle); + ret = glusterd_store_perform_peer_store(peerinfo); out: - if (ret && (fd > 0)) - glusterd_store_unlink_tmppath (peerinfo->shandle); - if (fd > 0) - close (fd); - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + gf_msg_debug("glusterd", 0, "Returning with %d", ret); + return ret; } int32_t -glusterd_store_peerinfo (glusterd_peerinfo_t *peerinfo) +glusterd_store_retrieve_peers(xlator_t *this) { - int32_t ret = -1; - - GF_ASSERT (peerinfo); - - ret = glusterd_store_create_peer_dir (); + int32_t ret = 0; + glusterd_conf_t *priv = NULL; + DIR *dir = NULL; + struct dirent *entry = NULL; + struct dirent scratch[2] = { + { + 0, + }, + }; + char path[PATH_MAX] = { + 0, + }; + glusterd_peerinfo_t *peerinfo = NULL; + gf_store_handle_t *shandle = NULL; + char filepath[PATH_MAX] = { + 0, + }; + gf_store_iter_t *iter = NULL; + char *key = NULL; + char *value = NULL; + glusterd_peerctx_args_t args = {0}; + gf_store_op_errno_t op_errno = GD_STORE_SUCCESS; + glusterd_peer_hostname_t *address = NULL; + uuid_t tmp_uuid; + gf_boolean_t is_ok; + int32_t len; + + GF_ASSERT(this); + priv = this->private; + + GF_ASSERT(priv); + + len = snprintf(path, PATH_MAX, "%s/%s", priv->workdir, + GLUSTERD_PEER_DIR_PREFIX); + if ((len < 0) || (len >= PATH_MAX)) { + ret = -1; + goto out; + } + + dir = sys_opendir(path); + + if (!dir) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED, + "Unable to open dir %s", path); + ret = -1; + goto out; + } + + while ((entry = sys_readdir(dir, scratch))) { + if (gf_irrelevant_entry(entry)) + continue; + if (gf_uuid_parse(entry->d_name, tmp_uuid) != 0) { + gf_log(this->name, GF_LOG_WARNING, "skipping non-peer file %s", + entry->d_name); + continue; + } + is_ok = _gf_false; + len = snprintf(filepath, PATH_MAX, "%s/%s", path, entry->d_name); + if ((len < 0) || (len >= PATH_MAX)) { + goto next; + } + ret = gf_store_handle_retrieve(filepath, &shandle); if (ret) - goto out; + goto next; - ret = glusterd_store_create_peer_shandle (peerinfo); + ret = gf_store_iter_new(shandle, &iter); if (ret) - goto out; + goto next; - ret = glusterd_store_perform_peer_store (peerinfo); -out: - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); - return ret; -} + ret = gf_store_iter_get_next(iter, &key, &value, &op_errno); + if (ret) { + goto next; + } -int32_t -glusterd_store_retrieve_peers (xlator_t *this) -{ - int32_t ret = 0; - glusterd_conf_t *priv = NULL; - DIR *dir = NULL; - struct dirent *entry = NULL; - char path[PATH_MAX] = {0,}; - glusterd_peerinfo_t *peerinfo = NULL; - uuid_t uuid = {0,}; - char *hostname = NULL; - int32_t state = 0; - glusterd_store_handle_t *shandle = NULL; - char filepath[PATH_MAX] = {0,}; - glusterd_store_iter_t *iter = NULL; - char *key = NULL; - char *value = NULL; - glusterd_peerctx_args_t args = {0}; - glusterd_store_op_errno_t op_errno = GD_STORE_SUCCESS; - - GF_ASSERT (this); - priv = this->private; - - GF_ASSERT (priv); - - snprintf (path, PATH_MAX, "%s/%s", priv->workdir, - GLUSTERD_PEER_DIR_PREFIX); - - dir = opendir (path); - - if (!dir) { - gf_log ("", GF_LOG_ERROR, "Unable to open dir %s", path); - ret = -1; - goto out; + /* Create an empty peerinfo object before reading in the + * details + */ + peerinfo = glusterd_peerinfo_new(GD_FRIEND_STATE_DEFAULT, NULL, NULL, + 0); + if (peerinfo == NULL) { + ret = -1; + goto next; } - glusterd_for_each_entry (entry, dir); + while (!ret) { + if (!strncmp(GLUSTERD_STORE_KEY_PEER_UUID, key, + SLEN(GLUSTERD_STORE_KEY_PEER_UUID))) { + if (value) + gf_uuid_parse(value, peerinfo->uuid); + } else if (!strncmp(GLUSTERD_STORE_KEY_PEER_STATE, key, + SLEN(GLUSTERD_STORE_KEY_PEER_STATE))) { + peerinfo->state.state = atoi(value); + } else if (!strncmp(GLUSTERD_STORE_KEY_PEER_HOSTNAME, key, + SLEN(GLUSTERD_STORE_KEY_PEER_HOSTNAME))) { + ret = gd_add_address_to_peer(peerinfo, value); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_ADD_ADDRESS_TO_PEER_FAIL, + "Could not add address to peer"); + } + } else { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UNKNOWN_KEY, + "Unknown key: %s", key); + } - while (entry) { - snprintf (filepath, PATH_MAX, "%s/%s", path, entry->d_name); - ret = glusterd_store_handle_retrieve (filepath, &shandle); - if (ret) - goto out; + GF_FREE(key); + GF_FREE(value); + key = NULL; + value = NULL; - ret = glusterd_store_iter_new (shandle, &iter); - if (ret) - goto out; + ret = gf_store_iter_get_next(iter, &key, &value, &op_errno); + } + if (op_errno != GD_STORE_EOF) { + goto next; + } - ret = glusterd_store_iter_get_next (iter, &key, &value, - &op_errno); - if (ret) - goto out; + if (gf_uuid_is_null(peerinfo->uuid)) { + gf_log("", GF_LOG_ERROR, + "Null UUID while attempting to read peer from '%s'", + filepath); + goto next; + } - while (!ret) { - - if (!strncmp (GLUSTERD_STORE_KEY_PEER_UUID, key, - strlen (GLUSTERD_STORE_KEY_PEER_UUID))) { - if (value) - uuid_parse (value, uuid); - } else if (!strncmp (GLUSTERD_STORE_KEY_PEER_STATE, - key, - strlen (GLUSTERD_STORE_KEY_PEER_STATE))) { - state = atoi (value); - } else if (!strncmp (GLUSTERD_STORE_KEY_PEER_HOSTNAME, - key, - strlen (GLUSTERD_STORE_KEY_PEER_HOSTNAME))) { - hostname = gf_strdup (value); - } else { - gf_log ("", GF_LOG_ERROR, "Unknown key: %s", - key); - } - - GF_FREE (key); - GF_FREE (value); - key = NULL; - value = NULL; - - ret = glusterd_store_iter_get_next (iter, &key, &value, - &op_errno); - } - if (op_errno != GD_STORE_EOF) - goto out; + /* Set first hostname from peerinfo->hostnames to + * peerinfo->hostname + */ + address = cds_list_entry(peerinfo->hostnames.next, + glusterd_peer_hostname_t, hostname_list); + peerinfo->hostname = gf_strdup(address->hostname); - (void) glusterd_store_iter_destroy (iter); + ret = glusterd_friend_add_from_peerinfo(peerinfo, 1, NULL); + if (ret) + goto next; - args.mode = GD_MODE_SWITCH_ON; - ret = glusterd_friend_add (hostname, 0, state, &uuid, - NULL, &peerinfo, 1, &args); + peerinfo->shandle = shandle; + is_ok = _gf_true; - GF_FREE (hostname); - if (ret) - goto out; + next: + (void)gf_store_iter_destroy(&iter); - peerinfo->shandle = shandle; - glusterd_for_each_entry (entry, dir); + if (!is_ok) { + gf_log(this->name, GF_LOG_WARNING, + "skipping malformed peer file %s", entry->d_name); + if (peerinfo) { + glusterd_peerinfo_cleanup(peerinfo); + } } + peerinfo = NULL; + } + + args.mode = GD_MODE_ON; + + RCU_READ_LOCK; + cds_list_for_each_entry_rcu(peerinfo, &priv->peers, uuid_list) + { + ret = glusterd_friend_rpc_create(this, peerinfo, &args); + if (ret) + break; + } + RCU_READ_UNLOCK; + peerinfo = NULL; out: - if (dir) - closedir (dir); - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); - return ret; + if (dir) + sys_closedir(dir); + gf_msg_debug(this->name, 0, "Returning with %d", ret); + + return ret; } +/* Bricks for snap volumes are hosted at /var/run/gluster/snaps + * When a volume is restored, it points to the bricks of the snap + * volume it was restored from. Hence on a node restart these + * paths need to be recreated and re-mounted + */ int32_t -glusterd_resolve_all_bricks (xlator_t *this) -{ - int32_t ret = 0; - glusterd_conf_t *priv = NULL; - glusterd_volinfo_t *volinfo = NULL; - glusterd_brickinfo_t *brickinfo = NULL; - - GF_ASSERT (this); - priv = this->private; - - GF_ASSERT (priv); - - list_for_each_entry (volinfo, &priv->volumes, vol_list) { - list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { - ret = glusterd_resolve_brick (brickinfo); - if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, - "resolve brick failed in restore"); - goto out; - } +glusterd_recreate_all_snap_brick_mounts(xlator_t *this) +{ + int32_t ret = 0; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_snap_t *snap = NULL; + + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + /* Recreate bricks of volumes restored from snaps */ + cds_list_for_each_entry(volinfo, &priv->volumes, vol_list) + { + /* If the volume is not a restored volume then continue */ + if (gf_uuid_is_null(volinfo->restored_from_snap)) + continue; + + ret = glusterd_recreate_vol_brick_mounts(this, volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRK_MNT_RECREATE_FAIL, + "Failed to recreate brick mounts " + "for %s", + volinfo->volname); + goto out; + } + } + + /* Recreate bricks of snapshot volumes + * We are not creating brick mounts for stopped snaps. + */ + cds_list_for_each_entry(snap, &priv->snapshots, snap_list) + { + cds_list_for_each_entry(volinfo, &snap->volumes, vol_list) + { + if (volinfo->status != GLUSTERD_STATUS_STOPPED) { + ret = glusterd_recreate_vol_brick_mounts(this, volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_BRK_MNT_RECREATE_FAIL, + "Failed to recreate brick " + "mounts for %s", + snap->snapname); + goto out; } + } + } + } + +out: + gf_msg_trace(this->name, 0, "Returning with %d", ret); + return ret; +} + +/* When the snapshot command from cli is received, the on disk and + * in memory structures for the snapshot are created (with the status) + * being marked as GD_SNAP_STATUS_INIT. Once the backend snapshot is + * taken, the status is changed to GD_SNAP_STATUS_IN_USE. If glusterd + * dies after taking the backend snapshot, but before updating the + * status, then when glusterd comes up, it should treat that snapshot + * as a failed snapshot and clean it up. + * + * Restore operation starts by setting the status to + * GD_SNAP_STATUS_RESTORED. If the server goes down before changing + * the status the status back we need to revert the partial snapshot + * taken. + */ +int32_t +glusterd_snap_cleanup(xlator_t *this) +{ + dict_t *dict = NULL; + int32_t ret = 0; + glusterd_conf_t *priv = NULL; + glusterd_snap_t *snap = NULL; + glusterd_snap_t *tmp_snap = NULL; + + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + dict = dict_new(); + if (!dict) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, + "Failed to create dict"); + ret = -1; + goto out; + } + + cds_list_for_each_entry_safe(snap, tmp_snap, &priv->snapshots, snap_list) + { + if (snap->snap_status == GD_SNAP_STATUS_RESTORED) { + ret = glusterd_snapshot_revert_restore_from_snap(snap); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, + GD_MSG_SNAP_RESTORE_REVERT_FAIL, + "Failed to " + "revert partially restored snapshot " + "(%s)", + snap->snapname); + goto out; + } + } else if (snap->snap_status != GD_SNAP_STATUS_IN_USE) { + ret = glusterd_snap_remove(dict, snap, _gf_true, _gf_true, + _gf_false); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_REMOVE_FAIL, + "Failed to remove the snapshot %s", snap->snapname); + goto out; + } + } + } +out: + if (dict) + dict_unref(dict); + + gf_msg_trace(this->name, 0, "Returning with %d", ret); + return ret; +} + +int32_t +glusterd_resolve_all_bricks(xlator_t *this) +{ + int32_t ret = 0; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_snap_t *snap = NULL; + + GF_ASSERT(this); + priv = this->private; + + GF_ASSERT(priv); + + /* Resolve bricks of volumes */ + cds_list_for_each_entry(volinfo, &priv->volumes, vol_list) + { + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + ret = glusterd_resolve_brick(brickinfo); + if (ret) { + gf_event(EVENT_BRICKPATH_RESOLVE_FAILED, + "peer=%s;volume=%s;brick=%s", brickinfo->hostname, + volinfo->volname, brickinfo->path); + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_RESOLVE_BRICK_FAIL, + "Failed to resolve brick %s with host %s of volume %s" + " in restore", + brickinfo->path, brickinfo->hostname, volinfo->volname); + goto out; + } + } + } + + /* Resolve bricks of snapshot volumes */ + cds_list_for_each_entry(snap, &priv->snapshots, snap_list) + { + ret = glusterd_resolve_snap_bricks(this, snap); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_RESOLVE_BRICK_FAIL, + "resolving the snap bricks" + " failed for snap: %s", + snap->snapname); + goto out; } + } out: - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + gf_msg_trace(this->name, 0, "Returning with %d", ret); + return ret; +} - return ret; +int32_t +glusterd_restore() +{ + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + + ret = glusterd_options_init(this); + if (ret < 0) + goto out; + + ret = glusterd_store_retrieve_volumes(this, NULL); + if (ret) + goto out; + + ret = glusterd_store_retrieve_peers(this); + if (ret) + goto out; + + /* While retrieving snapshots, if the snapshot status + is not GD_SNAP_STATUS_IN_USE, then the snapshot is + cleaned up. To do that, the snap volume has to be + stopped by stopping snapshot volume's bricks. And for + that the snapshot bricks should be resolved. But without + retrieving the peers, resolving bricks will fail. So + do retrieving of snapshots after retrieving peers. + */ + ret = glusterd_store_retrieve_snaps(this); + if (ret) + goto out; + + ret = glusterd_resolve_all_bricks(this); + if (ret) + goto out; + + ret = glusterd_snap_cleanup(this); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_CLEANUP_FAIL, + "Failed to perform " + "a cleanup of the snapshots"); + goto out; + } + + ret = glusterd_recreate_all_snap_brick_mounts(this); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_BRK_MNT_RECREATE_FAIL, + "Failed to recreate " + "all snap brick mounts"); + goto out; + } + +out: + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; +} + +int +glusterd_store_retrieve_quota_version(glusterd_volinfo_t *volinfo) +{ + int ret = -1; + uint32_t version = 0; + char cksum_path[PATH_MAX] = { + 0, + }; + char path[PATH_MAX] = { + 0, + }; + char *version_str = NULL; + char *tmp = NULL; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + gf_store_handle_t *handle = NULL; + int32_t len = 0; + + this = THIS; + GF_ASSERT(this); + conf = this->private; + GF_ASSERT(conf); + + GLUSTERD_GET_VOLUME_DIR(path, volinfo, conf); + len = snprintf(cksum_path, sizeof(cksum_path), "%s/%s", path, + GLUSTERD_VOL_QUOTA_CKSUM_FILE); + if ((len < 0) || (len >= sizeof(cksum_path))) { + goto out; + } + + ret = gf_store_handle_new(cksum_path, &handle); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_HANDLE_GET_FAIL, + "Unable to get store handle " + "for %s", + cksum_path); + goto out; + } + + ret = gf_store_retrieve_value(handle, "version", &version_str); + if (ret) { + gf_msg_debug(this->name, 0, "Version absent"); + ret = 0; + goto out; + } + + version = strtoul(version_str, &tmp, 10); + if ((errno == ERANGE) || (errno == EINVAL)) { + gf_msg_debug(this->name, 0, "Invalid version number"); + goto out; + } + volinfo->quota_conf_version = version; + ret = 0; + +out: + if (version_str) + GF_FREE(version_str); + gf_store_handle_destroy(handle); + return ret; +} + +int +glusterd_store_save_quota_version_and_cksum(glusterd_volinfo_t *volinfo) +{ + gf_store_handle_t *shandle = NULL; + glusterd_conf_t *conf = NULL; + xlator_t *this = NULL; + char path[PATH_MAX] = {0}; + char cksum_path[PATH_MAX + 32] = { + 0, + }; + char buf[64] = {0}; + int fd = -1; + int32_t ret = -1; + int32_t len = 0; + + this = THIS; + conf = this->private; + + GLUSTERD_GET_VOLUME_DIR(path, volinfo, conf); + len = snprintf(cksum_path, sizeof(cksum_path), "%s/%s", path, + GLUSTERD_VOL_QUOTA_CKSUM_FILE); + if ((len < 0) || (len >= sizeof(cksum_path))) { + goto out; + } + + ret = gf_store_handle_new(cksum_path, &shandle); + if (ret) + goto out; + + fd = gf_store_mkstemp(shandle); + if (fd <= 0) { + ret = -1; + goto out; + } + + ret = snprintf(buf, sizeof(buf), "cksum=%u\nversion=%u\n", + volinfo->quota_conf_cksum, volinfo->quota_conf_version); + if (ret < 0 || ret >= sizeof(buf)) { + ret = -1; + goto out; + } + + ret = gf_store_save_items(fd, buf); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CKSUM_STORE_FAIL, + "Failed to store quota cksum and version"); + goto out; + } + + ret = gf_store_rename_tmppath(shandle); + if (ret) + goto out; + +out: + if ((ret < 0) && (fd > 0)) + gf_store_unlink_tmppath(shandle); + gf_store_handle_destroy(shandle); + return ret; } int32_t -glusterd_restore () +glusterd_quota_conf_write_header(int fd) { - int32_t ret = -1; - xlator_t *this = NULL; + int header_len = 0; + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; - this = THIS; + this = THIS; + GF_VALIDATE_OR_GOTO("quota", this, out); - ret = glusterd_store_retrieve_volumes (this); + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, out); - if (ret) - goto out; + if (conf->op_version < GD_OP_VERSION_3_7_0) { + header_len = SLEN(QUOTA_CONF_HEADER_1_1); + ret = gf_nwrite(fd, QUOTA_CONF_HEADER_1_1, header_len); + } else { + header_len = SLEN(QUOTA_CONF_HEADER); + ret = gf_nwrite(fd, QUOTA_CONF_HEADER, header_len); + } - ret = glusterd_store_retrieve_peers (this); - if (ret) - goto out; + if (ret != header_len) { + ret = -1; + goto out; + } - ret = glusterd_resolve_all_bricks (this); - if (ret) - goto out; + ret = 0; out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + if (ret < 0) + gf_msg_callingfn("quota", GF_LOG_ERROR, 0, GD_MSG_QUOTA_CONF_WRITE_FAIL, + "failed to write " + "header to a quota conf"); + + return ret; +} + +int32_t +glusterd_quota_conf_write_gfid(int fd, void *buf, char type) +{ + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO("quota", this, out); + + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, out); + + ret = gf_nwrite(fd, buf, 16); + if (ret != 16) { + ret = -1; + goto out; + } + + if (conf->op_version >= GD_OP_VERSION_3_7_0) { + ret = gf_nwrite(fd, &type, 1); + if (ret != 1) { + ret = -1; + goto out; + } + } + + ret = 0; + +out: + if (ret < 0) + gf_msg_callingfn("quota", GF_LOG_ERROR, 0, GD_MSG_QUOTA_CONF_WRITE_FAIL, + "failed to write " + "gfid %s to a quota conf", + uuid_utoa(buf)); + + return ret; } diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h index 0403c10f955..83f4df0783e 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.h +++ b/xlators/mgmt/glusterd/src/glusterd-store.h @@ -1,128 +1,216 @@ /* - Copyright (c) 2006-2010 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ #ifndef _GLUSTERD_HA_H_ #define _GLUSTERD_HA_H_ -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - #include <pthread.h> -#include "uuid.h" - -#include "glusterfs.h" -#include "xlator.h" -#include "logging.h" -#include "call-stub.h" -#include "fd.h" -#include "byte-order.h" +#include <glusterfs/compat-uuid.h> + +#include <glusterfs/glusterfs.h> +#include <glusterfs/xlator.h> +#include <glusterfs/run.h> +#include <glusterfs/logging.h> +#include <glusterfs/call-stub.h> +#include <glusterfs/byte-order.h> #include "glusterd.h" #include "rpcsvc.h" -typedef enum glusterd_store_ver_ac_{ - GLUSTERD_VOLINFO_VER_AC_NONE = 0, - GLUSTERD_VOLINFO_VER_AC_INCREMENT = 1, +typedef enum glusterd_store_ver_ac_ { + GLUSTERD_VOLINFO_VER_AC_NONE = 0, + GLUSTERD_VOLINFO_VER_AC_INCREMENT = 1, + GLUSTERD_VOLINFO_VER_AC_DECREMENT = 2, } glusterd_volinfo_ver_ac_t; +#define UUID_SIZE 36 +#define VOLINFO_BUFFER_SIZE 4093 +#define GLUSTERD_STORE_UUID_KEY "UUID" + +#define GLUSTERD_STORE_KEY_VOL_TYPE "type" +#define GLUSTERD_STORE_KEY_VOL_COUNT "count" +#define GLUSTERD_STORE_KEY_VOL_STATUS "status" +#define GLUSTERD_STORE_KEY_VOL_PORT "port" +#define GLUSTERD_STORE_KEY_VOL_SUB_COUNT "sub_count" +#define GLUSTERD_STORE_KEY_VOL_STRIPE_CNT "stripe_count" +#define GLUSTERD_STORE_KEY_VOL_REPLICA_CNT "replica_count" +#define GLUSTERD_STORE_KEY_VOL_DISPERSE_CNT "disperse_count" +#define GLUSTERD_STORE_KEY_VOL_REDUNDANCY_CNT "redundancy_count" +#define GLUSTERD_STORE_KEY_VOL_ARBITER_CNT "arbiter_count" +#define GLUSTERD_STORE_KEY_VOL_THIN_ARBITER_CNT "thin_arbiter_count" +#define GLUSTERD_STORE_KEY_VOL_BRICK "brick" +#define GLUSTERD_STORE_KEY_VOL_TA_BRICK "ta-brick" +#define GLUSTERD_STORE_KEY_VOL_VERSION "version" +#define GLUSTERD_STORE_KEY_VOL_TRANSPORT "transport-type" +#define GLUSTERD_STORE_KEY_VOL_ID "volume-id" +#define GLUSTERD_STORE_KEY_VOL_RESTORED_SNAP "restored_from_snap" +#define GLUSTERD_STORE_KEY_RB_STATUS "rb_status" +#define GLUSTERD_STORE_KEY_RB_SRC_BRICK "rb_src" +#define GLUSTERD_STORE_KEY_RB_DST_BRICK "rb_dst" +#define GLUSTERD_STORE_KEY_RB_DST_PORT "rb_port" +#define GLUSTERD_STORE_KEY_VOL_DEFRAG "rebalance_status" +#define GLUSTERD_STORE_KEY_VOL_DEFRAG_STATUS "status" +#define GLUSTERD_STORE_KEY_DEFRAG_OP "rebalance_op" +#define GLUSTERD_STORE_KEY_USERNAME "username" +#define GLUSTERD_STORE_KEY_PASSWORD "password" +#define GLUSTERD_STORE_KEY_PARENT_VOLNAME "parent_volname" +#define GLUSTERD_STORE_KEY_VOL_OP_VERSION "op-version" +#define GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION "client-op-version" +#define GLUSTERD_STORE_KEY_VOL_QUOTA_VERSION "quota-version" + +#define GLUSTERD_STORE_KEY_SNAP_NAME "name" +#define GLUSTERD_STORE_KEY_SNAP_ID "snap-id" +#define GLUSTERD_STORE_KEY_SNAP_DESC "desc" +#define GLUSTERD_STORE_KEY_SNAP_TIMESTAMP "time-stamp" +#define GLUSTERD_STORE_KEY_SNAP_STATUS "status" +#define GLUSTERD_STORE_KEY_SNAP_RESTORED "snap-restored" +#define GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT "snap-max-hard-limit" +#define GLUSTERD_STORE_KEY_SNAP_AUTO_DELETE "auto-delete" +#define GLUSTERD_STORE_KEY_SNAP_MAX_SOFT_LIMIT "snap-max-soft-limit" +#define GLUSTERD_STORE_KEY_SNAPD_PORT "snapd-port" +#define GLUSTERD_STORE_KEY_SNAP_ACTIVATE "snap-activate-on-create" -#define GLUSTERD_STORE_UUID_KEY "UUID" +#define GLUSTERD_STORE_KEY_BRICK_HOSTNAME "hostname" +#define GLUSTERD_STORE_KEY_BRICK_PATH "path" +#define GLUSTERD_STORE_KEY_BRICK_REAL_PATH "real_path" +#define GLUSTERD_STORE_KEY_BRICK_PORT "listen-port" +#define GLUSTERD_STORE_KEY_BRICK_RDMA_PORT "rdma.listen-port" +#define GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED "decommissioned" +#define GLUSTERD_STORE_KEY_BRICK_VGNAME "vg" +#define GLUSTERD_STORE_KEY_BRICK_DEVICE_PATH "device_path" +#define GLUSTERD_STORE_KEY_BRICK_MOUNT_DIR "mount_dir" +#define GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS "snap-status" +#define GLUSTERD_STORE_KEY_BRICK_FSTYPE "fs-type" +#define GLUSTERD_STORE_KEY_BRICK_MNTOPTS "mnt-opts" +#define GLUSTERD_STORE_KEY_BRICK_ID "brick-id" +#define GLUSTERD_STORE_KEY_BRICK_FSID "brick-fsid" +#define GLUSTERD_STORE_KEY_BRICK_UUID "uuid" + +#define GLUSTERD_STORE_KEY_PEER_UUID "uuid" +#define GLUSTERD_STORE_KEY_PEER_HOSTNAME "hostname" +#define GLUSTERD_STORE_KEY_PEER_STATE "state" +#define GLUSTERD_STORE_KEY_VOL_CAPS "caps" /* left just for backward compat */ + +#define GLUSTERD_STORE_KEY_VOL_DEFRAG_REB_FILES "rebalanced-files" +#define GLUSTERD_STORE_KEY_VOL_DEFRAG_SIZE "size" +#define GLUSTERD_STORE_KEY_VOL_DEFRAG_SCANNED "scanned" +#define GLUSTERD_STORE_KEY_VOL_DEFRAG_FAILURES "failures" +#define GLUSTERD_STORE_KEY_VOL_DEFRAG_SKIPPED "skipped" +#define GLUSTERD_STORE_KEY_VOL_DEFRAG_RUN_TIME "run-time" + +#define GLUSTERD_STORE_KEY_VOL_MIGRATED_FILES "migrated-files" +#define GLUSTERD_STORE_KEY_VOL_MIGRATED_SIZE "migration-size" +#define GLUSTERD_STORE_KEY_VOL_MIGRATIONS_SCANNED "migration-scanned" +#define GLUSTERD_STORE_KEY_VOL_MIGRATIONS_FAILURES "migration-failures" +#define GLUSTERD_STORE_KEY_VOL_MIGRATIONS_SKIPPED "migration-skipped" +#define GLUSTERD_STORE_KEY_VOL_MIGRATION_RUN_TIME "migration-run-time" + +#define GLUSTERD_STORE_KEY_GANESHA_GLOBAL "nfs-ganesha" -#define GLUSTERD_STORE_KEY_VOL_TYPE "type" -#define GLUSTERD_STORE_KEY_VOL_COUNT "count" -#define GLUSTERD_STORE_KEY_VOL_STATUS "status" -#define GLUSTERD_STORE_KEY_VOL_PORT "port" -#define GLUSTERD_STORE_KEY_VOL_SUB_COUNT "sub_count" -#define GLUSTERD_STORE_KEY_VOL_BRICK "brick" -#define GLUSTERD_STORE_KEY_VOL_VERSION "version" -#define GLUSTERD_STORE_KEY_VOL_TRANSPORT "transport-type" -#define GLUSTERD_STORE_KEY_VOL_ID "volume-id" +/* + * The structure is responsible for handling the parameter for writes into + * the buffer before it is finally written to the file. The writes will be + * of the form of key-value pairs. + */ +struct glusterd_volinfo_data_store_ { + gf_store_handle_t *shandle; /*Contains fd and path of the file */ + int16_t buffer_len; + char key_check; /* flag to check if key is to be validated before write*/ + char buffer[VOLINFO_BUFFER_SIZE]; +}; +typedef struct glusterd_volinfo_data_store_ glusterd_volinfo_data_store_t; -#define GLUSTERD_STORE_KEY_BRICK_HOSTNAME "hostname" -#define GLUSTERD_STORE_KEY_BRICK_PATH "path" -#define GLUSTERD_STORE_KEY_BRICK_PORT "listen-port" +int32_t +glusterd_store_volinfo(glusterd_volinfo_t *volinfo, + glusterd_volinfo_ver_ac_t ac); + +int32_t +glusterd_store_delete_volume(glusterd_volinfo_t *volinfo); -#define GLUSTERD_STORE_KEY_PEER_UUID "uuid" -#define GLUSTERD_STORE_KEY_PEER_HOSTNAME "hostname" -#define GLUSTERD_STORE_KEY_PEER_STATE "state" +int32_t +glusterd_store_delete_snap(glusterd_snap_t *snap); -#define glusterd_for_each_entry(entry, dir) \ - do {\ - entry = NULL;\ - if (dir) {\ - entry = readdir (dir);\ - while (entry && (!strcmp (entry->d_name, ".") ||\ - !strcmp (entry->d_name, ".."))) {\ - entry = readdir (dir);\ - }\ - }\ - } while (0); \ +int32_t +glusterd_retrieve_uuid(); + +int32_t +glusterd_store_peerinfo(glusterd_peerinfo_t *peerinfo); -typedef enum { - GD_STORE_SUCCESS, - GD_STORE_KEY_NULL, - GD_STORE_VALUE_NULL, - GD_STORE_KEY_VALUE_NULL, - GD_STORE_EOF, - GD_STORE_ENOMEM -} glusterd_store_op_errno_t; +int32_t +glusterd_store_delete_peerinfo(glusterd_peerinfo_t *peerinfo); int32_t -glusterd_store_volinfo (glusterd_volinfo_t *volinfo, glusterd_volinfo_ver_ac_t ac); +glusterd_store_delete_brick(glusterd_brickinfo_t *brickinfo, char *delete_path); int32_t -glusterd_store_delete_volume (glusterd_volinfo_t *volinfo); +glusterd_restore(); + +void +glusterd_perform_volinfo_version_action(glusterd_volinfo_t *volinfo, + glusterd_volinfo_ver_ac_t ac); +gf_boolean_t +glusterd_store_is_valid_brickpath(char *volname, char *brick); int32_t -glusterd_store_uuid (); +glusterd_store_perform_node_state_store(glusterd_volinfo_t *volinfo); + +int +glusterd_retrieve_op_version(xlator_t *this, int *op_version); + +int +glusterd_retrieve_max_op_version(xlator_t *this, int *op_version); + +int +glusterd_store_max_op_version(xlator_t *this); + +int +glusterd_store_global_info(xlator_t *this); int32_t -glusterd_store_handle_new (char *path, glusterd_store_handle_t **handle); +glusterd_store_retrieve_options(xlator_t *this); int32_t -glusterd_store_save_value (int fd, char *key, char *value); +glusterd_store_retrieve_bricks(glusterd_volinfo_t *volinfo); int32_t -glusterd_store_retrieve_value (glusterd_store_handle_t *handle, - char *key, char **value); +glusterd_store_options(xlator_t *this, dict_t *opts); + +void +glusterd_replace_slash_with_hyphen(char *str); int32_t -glusterd_retrieve_uuid (); +glusterd_store_create_quota_conf_sh_on_absence(glusterd_volinfo_t *volinfo); + +int +glusterd_store_retrieve_quota_version(glusterd_volinfo_t *volinfo); + +int +glusterd_store_save_quota_version_and_cksum(glusterd_volinfo_t *volinfo); int32_t -glusterd_store_peerinfo (glusterd_peerinfo_t *peerinfo); +glusterd_store_snap(glusterd_snap_t *snap); int32_t -glusterd_store_delete_peerinfo (glusterd_peerinfo_t *peerinfo); +glusterd_store_update_missed_snaps(); + +glusterd_volinfo_t * +glusterd_store_retrieve_volume(char *volname, glusterd_snap_t *snap); + +int +glusterd_restore_op_version(xlator_t *this); int32_t -glusterd_store_delete_brick (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo); +glusterd_quota_conf_write_header(int fd); int32_t -glusterd_store_handle_destroy (glusterd_store_handle_t *handle); +glusterd_quota_conf_write_gfid(int fd, void *buf, char type); int32_t -glusterd_restore (); +glusterd_recreate_vol_brick_mounts(xlator_t *this, glusterd_volinfo_t *volinfo); -void -glusterd_perform_volinfo_version_action (glusterd_volinfo_t *volinfo, - glusterd_volinfo_ver_ac_t ac); #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c new file mode 100644 index 00000000000..ca845903c4f --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c @@ -0,0 +1,1047 @@ +/* + Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#include <signal.h> + +#include <glusterfs/globals.h> +#include <glusterfs/run.h> +#include "glusterd.h" +#include <glusterfs/glusterfs.h> +#include "glusterd-utils.h" +#include "glusterd-svc-mgmt.h" +#include "glusterd-shd-svc.h" +#include "glusterd-quotad-svc.h" +#ifdef BUILD_GNFS +#include "glusterd-nfs-svc.h" +#endif +#include "glusterd-bitd-svc.h" +#include "glusterd-shd-svc-helper.h" +#include "glusterd-scrub-svc.h" +#include "glusterd-svc-helper.h" +#include <glusterfs/syscall.h> +#include "glusterd-snapshot-utils.h" + +int +glusterd_svcs_reconfigure(glusterd_volinfo_t *volinfo) +{ + int ret = 0; + xlator_t *this = THIS; + glusterd_conf_t *conf = NULL; + char *svc_name = NULL; + + GF_ASSERT(this); + + conf = this->private; + GF_ASSERT(conf); + +#ifdef BUILD_GNFS + svc_name = "nfs"; + ret = glusterd_nfssvc_reconfigure(); + if (ret) + goto out; +#endif + svc_name = "self-heald"; + if (volinfo) { + ret = glusterd_shdsvc_reconfigure(volinfo); + if (ret) + goto out; + } + + if (conf->op_version == GD_OP_VERSION_MIN) + goto out; + + svc_name = "quotad"; + ret = glusterd_quotadsvc_reconfigure(); + if (ret) + goto out; + + svc_name = "bitd"; + ret = glusterd_bitdsvc_reconfigure(); + if (ret) + goto out; + + svc_name = "scrubber"; + ret = glusterd_scrubsvc_reconfigure(); +out: + if (ret && svc_name) + gf_event(EVENT_SVC_RECONFIGURE_FAILED, "svc_name=%s", svc_name); + return ret; +} + +int +glusterd_svcs_stop(glusterd_volinfo_t *volinfo) +{ + int ret = 0; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + GF_ASSERT(priv); + +#ifdef BUILD_GNFS + ret = priv->nfs_svc.stop(&(priv->nfs_svc), SIGKILL); + if (ret) + goto out; +#endif + ret = priv->quotad_svc.stop(&(priv->quotad_svc), SIGTERM); + if (ret) + goto out; + + if (volinfo) { + ret = volinfo->shd.svc.stop(&(volinfo->shd.svc), SIGTERM); + if (ret) + goto out; + } + + ret = priv->bitd_svc.stop(&(priv->bitd_svc), SIGTERM); + if (ret) + goto out; + + ret = priv->scrub_svc.stop(&(priv->scrub_svc), SIGTERM); + +out: + return ret; +} + +int +glusterd_svcs_manager(glusterd_volinfo_t *volinfo) +{ + int ret = 0; + xlator_t *this = THIS; + glusterd_conf_t *conf = NULL; + + GF_ASSERT(this); + + conf = this->private; + GF_ASSERT(conf); + + if (volinfo && volinfo->is_snap_volume) + return 0; + +#if BUILD_GNFS + ret = conf->nfs_svc.manager(&(conf->nfs_svc), NULL, PROC_START_NO_WAIT); + if (ret) + goto out; +#endif + if (conf->op_version == GD_OP_VERSION_MIN) + goto out; + + ret = conf->quotad_svc.manager(&(conf->quotad_svc), volinfo, + PROC_START_NO_WAIT); + if (ret == -EINVAL) + ret = 0; + if (ret) + goto out; + + ret = conf->bitd_svc.manager(&(conf->bitd_svc), NULL, PROC_START_NO_WAIT); + if (ret == -EINVAL) + ret = 0; + if (ret) + goto out; + + if (volinfo) { + ret = volinfo->shd.svc.manager(&(volinfo->shd.svc), volinfo, + PROC_START_NO_WAIT); + if (ret == -EINVAL) + ret = 0; + if (ret) + goto out; + } + + ret = conf->scrub_svc.manager(&(conf->scrub_svc), NULL, PROC_START_NO_WAIT); + if (ret == -EINVAL) + ret = 0; +out: + return ret; +} + +int +glusterd_svc_check_volfile_identical(char *svc_name, + glusterd_graph_builder_t builder, + gf_boolean_t *identical) +{ + char orgvol[PATH_MAX] = { + 0, + }; + char *tmpvol = NULL; + glusterd_conf_t *conf = NULL; + xlator_t *this = NULL; + int ret = -1; + int need_unlink = 0; + int tmp_fd = -1; + + this = THIS; + + GF_ASSERT(this); + GF_ASSERT(identical); + conf = this->private; + + glusterd_svc_build_volfile_path(svc_name, conf->workdir, orgvol, + sizeof(orgvol)); + + ret = gf_asprintf(&tmpvol, "/tmp/g%s-XXXXXX", svc_name); + if (ret < 0) { + goto out; + } + + /* coverity[SECURE_TEMP] mkstemp uses 0600 as the mode and is safe */ + tmp_fd = mkstemp(tmpvol); + if (tmp_fd < 0) { + gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED, + "Unable to create temp file" + " %s:(%s)", + tmpvol, strerror(errno)); + ret = -1; + goto out; + } + + need_unlink = 1; + + ret = glusterd_create_global_volfile(builder, tmpvol, NULL); + if (ret) + goto out; + + ret = glusterd_check_files_identical(orgvol, tmpvol, identical); +out: + if (need_unlink) + sys_unlink(tmpvol); + + if (tmpvol != NULL) + GF_FREE(tmpvol); + + if (tmp_fd >= 0) + sys_close(tmp_fd); + + return ret; +} + +int +glusterd_svc_check_topology_identical(char *svc_name, + glusterd_graph_builder_t builder, + gf_boolean_t *identical) +{ + char orgvol[PATH_MAX] = { + 0, + }; + char *tmpvol = NULL; + glusterd_conf_t *conf = NULL; + xlator_t *this = THIS; + int ret = -1; + int tmpclean = 0; + int tmpfd = -1; + + if ((!identical) || (!this) || (!this->private)) { + gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); + goto out; + } + + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, out); + + /* Fetch the original volfile */ + glusterd_svc_build_volfile_path(svc_name, conf->workdir, orgvol, + sizeof(orgvol)); + + /* Create the temporary volfile */ + ret = gf_asprintf(&tmpvol, "/tmp/g%s-XXXXXX", svc_name); + if (ret < 0) { + goto out; + } + + /* coverity[SECURE_TEMP] mkstemp uses 0600 as the mode and is safe */ + tmpfd = mkstemp(tmpvol); + if (tmpfd < 0) { + gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED, + "Unable to create temp file" + " %s:(%s)", + tmpvol, strerror(errno)); + ret = -1; + goto out; + } + + tmpclean = 1; /* SET the flag to unlink() tmpfile */ + + ret = glusterd_create_global_volfile(builder, tmpvol, NULL); + if (ret) + goto out; + + /* Compare the topology of volfiles */ + ret = glusterd_check_topology_identical(orgvol, tmpvol, identical); +out: + if (tmpfd >= 0) + sys_close(tmpfd); + if (tmpclean) + sys_unlink(tmpvol); + if (tmpvol != NULL) + GF_FREE(tmpvol); + return ret; +} + +int +glusterd_volume_svc_check_volfile_identical( + char *svc_name, dict_t *mode_dict, glusterd_volinfo_t *volinfo, + glusterd_vol_graph_builder_t builder, gf_boolean_t *identical) +{ + char orgvol[PATH_MAX] = { + 0, + }; + char *tmpvol = NULL; + xlator_t *this = NULL; + int ret = -1; + int need_unlink = 0; + int tmp_fd = -1; + + this = THIS; + + GF_VALIDATE_OR_GOTO("glusterd", this, out); + GF_VALIDATE_OR_GOTO(this->name, identical, out); + + /* This builds volfile for volume level dameons */ + glusterd_volume_svc_build_volfile_path(svc_name, volinfo, orgvol, + sizeof(orgvol)); + + ret = gf_asprintf(&tmpvol, "/tmp/g%s-XXXXXX", svc_name); + if (ret < 0) { + goto out; + } + + /* coverity[SECURE_TEMP] mkstemp uses 0600 as the mode and is safe */ + tmp_fd = mkstemp(tmpvol); + if (tmp_fd < 0) { + gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED, + "Unable to create temp file" + " %s:(%s)", + tmpvol, strerror(errno)); + ret = -1; + goto out; + } + + need_unlink = 1; + + ret = builder(volinfo, tmpvol, mode_dict); + if (ret) + goto out; + + ret = glusterd_check_files_identical(orgvol, tmpvol, identical); +out: + if (need_unlink) + sys_unlink(tmpvol); + + if (tmpvol != NULL) + GF_FREE(tmpvol); + + if (tmp_fd >= 0) + sys_close(tmp_fd); + + return ret; +} + +int +glusterd_volume_svc_check_topology_identical( + char *svc_name, dict_t *mode_dict, glusterd_volinfo_t *volinfo, + glusterd_vol_graph_builder_t builder, gf_boolean_t *identical) +{ + char orgvol[PATH_MAX] = { + 0, + }; + char *tmpvol = NULL; + glusterd_conf_t *conf = NULL; + xlator_t *this = THIS; + int ret = -1; + int tmpclean = 0; + int tmpfd = -1; + + if ((!identical) || (!this) || (!this->private)) { + gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); + goto out; + } + + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, out); + + /* This builds volfile for volume level dameons */ + glusterd_volume_svc_build_volfile_path(svc_name, volinfo, orgvol, + sizeof(orgvol)); + /* Create the temporary volfile */ + ret = gf_asprintf(&tmpvol, "/tmp/g%s-XXXXXX", svc_name); + if (ret < 0) { + goto out; + } + + /* coverity[SECURE_TEMP] mkstemp uses 0600 as the mode and is safe */ + tmpfd = mkstemp(tmpvol); + if (tmpfd < 0) { + gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED, + "Unable to create temp file" + " %s:(%s)", + tmpvol, strerror(errno)); + ret = -1; + goto out; + } + + tmpclean = 1; /* SET the flag to unlink() tmpfile */ + + ret = builder(volinfo, tmpvol, mode_dict); + if (ret) + goto out; + + /* Compare the topology of volfiles */ + ret = glusterd_check_topology_identical(orgvol, tmpvol, identical); +out: + if (tmpfd >= 0) + sys_close(tmpfd); + if (tmpclean) + sys_unlink(tmpvol); + if (tmpvol != NULL) + GF_FREE(tmpvol); + return ret; +} + +gf_boolean_t +glusterd_is_svcproc_attachable(glusterd_svc_proc_t *svc_proc) +{ + int pid = -1; + glusterd_svc_t *parent_svc = NULL; + + if (!svc_proc) + return _gf_false; + + if (svc_proc->status == GF_SVC_STARTING) + return _gf_true; + + if (svc_proc->status == GF_SVC_STARTED || + svc_proc->status == GF_SVC_DISCONNECTED) { + parent_svc = cds_list_entry(svc_proc->svcs.next, glusterd_svc_t, + mux_svc); + if (parent_svc && gf_is_service_running(parent_svc->proc.pidfile, &pid)) + return _gf_true; + } + + if (svc_proc->status == GF_SVC_DIED || svc_proc->status == GF_SVC_STOPPING) + return _gf_false; + + return _gf_false; +} + +void * +__gf_find_compatible_svc(gd_node_type daemon) +{ + glusterd_svc_proc_t *svc_proc = NULL; + struct cds_list_head *svc_procs = NULL; + glusterd_conf_t *conf = NULL; + + conf = THIS->private; + GF_VALIDATE_OR_GOTO("glusterd", conf, out); + + switch (daemon) { + case GD_NODE_SHD: { + svc_procs = &conf->shd_procs; + if (!svc_procs) + goto out; + } break; + default: + /* Add support for other client daemons here */ + goto out; + } + + cds_list_for_each_entry(svc_proc, svc_procs, svc_proc_list) + { + if (glusterd_is_svcproc_attachable(svc_proc)) + return (void *)svc_proc; + /* + * Logic to select one process goes here. Currently there is only one + * shd_proc. So selecting the first one; + */ + } +out: + return NULL; +} + +glusterd_svc_proc_t * +glusterd_svcprocess_new() +{ + glusterd_svc_proc_t *new_svcprocess = NULL; + + new_svcprocess = GF_CALLOC(1, sizeof(*new_svcprocess), + gf_gld_mt_glusterd_svc_proc_t); + + if (!new_svcprocess) + return NULL; + + CDS_INIT_LIST_HEAD(&new_svcprocess->svc_proc_list); + CDS_INIT_LIST_HEAD(&new_svcprocess->svcs); + new_svcprocess->notify = glusterd_muxsvc_common_rpc_notify; + new_svcprocess->status = GF_SVC_STARTING; + return new_svcprocess; +} + +int +glusterd_shd_svc_mux_init(glusterd_volinfo_t *volinfo, glusterd_svc_t *svc) +{ + int ret = -1; + glusterd_svc_proc_t *mux_proc = NULL; + glusterd_conn_t *mux_conn = NULL; + glusterd_conf_t *conf = NULL; + glusterd_svc_t *parent_svc = NULL; + int pid = -1; + gf_boolean_t stop_daemon = _gf_false; + char pidfile[PATH_MAX] = { + 0, + }; + + GF_VALIDATE_OR_GOTO("glusterd", svc, out); + GF_VALIDATE_OR_GOTO("glusterd", volinfo, out); + conf = THIS->private; + GF_VALIDATE_OR_GOTO("glusterd", conf, out); + GF_VALIDATE_OR_GOTO("glusterd", svc, out); + + pthread_mutex_lock(&conf->attach_lock); + { + if (svc->inited && !glusterd_proc_is_running(&(svc->proc))) { + /* This is the case when shd process was abnormally killed */ + pthread_mutex_unlock(&conf->attach_lock); + glusterd_shd_svcproc_cleanup(&volinfo->shd); + pthread_mutex_lock(&conf->attach_lock); + } + + if (!svc->inited) { + glusterd_svc_build_shd_pidfile(volinfo, pidfile, sizeof(pidfile)); + ret = snprintf(svc->proc.name, sizeof(svc->proc.name), "%s", + "glustershd"); + if (ret < 0) + goto unlock; + + ret = snprintf(svc->proc.pidfile, sizeof(svc->proc.pidfile), "%s", + pidfile); + if (ret < 0) + goto unlock; + + if (gf_is_service_running(pidfile, &pid)) { + /* Just connect is required, but we don't know what happens + * during the disconnect. So better to reattach. + */ + mux_proc = __gf_find_compatible_svc_from_pid(GD_NODE_SHD, pid); + } + + if (!mux_proc) { + if (pid != -1 && sys_access(pidfile, R_OK) == 0) { + /* stale pid file, stop and unlink it. This has to be + * done outside the attach_lock. + */ + stop_daemon = _gf_true; + } + mux_proc = __gf_find_compatible_svc(GD_NODE_SHD); + } + if (mux_proc) { + /* Take first entry from the process */ + parent_svc = cds_list_entry(mux_proc->svcs.next, glusterd_svc_t, + mux_svc); + mux_conn = &parent_svc->conn; + if (volinfo) + volinfo->shd.attached = _gf_true; + } else { + mux_proc = glusterd_svcprocess_new(); + if (!mux_proc) { + ret = -1; + goto unlock; + } + cds_list_add_tail(&mux_proc->svc_proc_list, &conf->shd_procs); + } + svc->svc_proc = mux_proc; + cds_list_del_init(&svc->mux_svc); + cds_list_add_tail(&svc->mux_svc, &mux_proc->svcs); + ret = glusterd_shdsvc_init(volinfo, mux_conn, mux_proc); + if (ret) { + pthread_mutex_unlock(&conf->attach_lock); + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_FAILED_INIT_SHDSVC, + "Failed to init shd " + "service"); + goto out; + } + gf_msg_debug(THIS->name, 0, "shd service initialized"); + svc->inited = _gf_true; + } + ret = 0; + } +unlock: + pthread_mutex_unlock(&conf->attach_lock); +out: + if (stop_daemon) { + glusterd_proc_stop(&svc->proc, SIGTERM, PROC_STOP_FORCE); + glusterd_unlink_file(pidfile); + } + return ret; +} + +void * +__gf_find_compatible_svc_from_pid(gd_node_type daemon, pid_t pid) +{ + glusterd_svc_proc_t *svc_proc = NULL; + struct cds_list_head *svc_procs = NULL; + glusterd_svc_t *svc = NULL; + pid_t mux_pid = -1; + glusterd_conf_t *conf = NULL; + + conf = THIS->private; + if (!conf) + return NULL; + + switch (daemon) { + case GD_NODE_SHD: { + svc_procs = &conf->shd_procs; + if (!svc_procs) + return NULL; + } break; + default: + /* Add support for other client daemons here */ + return NULL; + } + + cds_list_for_each_entry(svc_proc, svc_procs, svc_proc_list) + { + cds_list_for_each_entry(svc, &svc_proc->svcs, mux_svc) + { + if (gf_is_service_running(svc->proc.pidfile, &mux_pid)) { + if (mux_pid == pid && + glusterd_is_svcproc_attachable(svc_proc)) { + /*TODO + * inefficient loop, but at the moment, there is only + * one shd. + */ + return svc_proc; + } + } + } + } + return NULL; +} + +static int32_t +my_callback(struct rpc_req *req, struct iovec *iov, int count, void *v_frame) +{ + call_frame_t *frame = v_frame; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + + GF_VALIDATE_OR_GOTO("glusterd", frame, out); + this = frame->this; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, out); + + if (GF_ATOMIC_DEC(conf->blockers) == 0) { + synccond_broadcast(&conf->cond_blockers); + } + + STACK_DESTROY(frame->root); +out: + return 0; +} + +static int32_t +glusterd_svc_attach_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *v_frame) +{ + call_frame_t *frame = v_frame; + glusterd_volinfo_t *volinfo = NULL; + glusterd_shdsvc_t *shd = NULL; + glusterd_svc_t *svc = frame->cookie; + glusterd_conf_t *conf = NULL; + int *flag = (int *)frame->local; + xlator_t *this = THIS; + int ret = -1; + gf_getspec_rsp rsp = { + 0, + }; + + GF_VALIDATE_OR_GOTO("glusterd", this, out); + conf = this->private; + GF_VALIDATE_OR_GOTO("glusterd", conf, out); + GF_VALIDATE_OR_GOTO("glusterd", frame, out); + GF_VALIDATE_OR_GOTO("glusterd", svc, out); + + frame->local = NULL; + frame->cookie = NULL; + + if (!strcmp(svc->name, "glustershd")) { + /* Get volinfo->shd from svc object */ + shd = cds_list_entry(svc, glusterd_shdsvc_t, svc); + if (!shd) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_SHD_OBJ_GET_FAIL, + "Failed to get shd object " + "from shd service"); + goto out; + } + + /* Get volinfo from shd */ + volinfo = cds_list_entry(shd, glusterd_volinfo_t, shd); + if (!volinfo) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, + "Failed to get volinfo from " + "from shd"); + goto out; + } + } + + if (!iov) { + gf_msg(frame->this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, + "iov is NULL"); + ret = -1; + goto out; + } + + ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_getspec_rsp); + if (ret < 0) { + gf_msg(frame->this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, + "XDR decoding error"); + ret = -1; + goto out; + } + + if (rsp.op_ret == 0) { + svc->online = _gf_true; + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_SVC_ATTACH_FAIL, + "svc %s of volume %s attached successfully to pid %d", svc->name, + volinfo->volname, glusterd_proc_get_pid(&svc->proc)); + } else { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_ATTACH_FAIL, + "svc %s of volume %s failed to attach to pid %d", svc->name, + volinfo->volname, glusterd_proc_get_pid(&svc->proc)); + if (!strcmp(svc->name, "glustershd")) { + glusterd_shd_svcproc_cleanup(&volinfo->shd); + } + } +out: + if (flag) { + GF_FREE(flag); + } + + if (volinfo) + glusterd_volinfo_unref(volinfo); + + if (GF_ATOMIC_DEC(conf->blockers) == 0) { + synccond_broadcast(&conf->cond_blockers); + } + STACK_DESTROY(frame->root); + return 0; +} + +extern size_t +build_volfile_path(char *volume_id, char *path, size_t path_len, + char *trusted_str, dict_t *dict); + +int +__glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flags, + struct rpc_clnt *rpc, char *volfile_id, + int op) +{ + int ret = -1; + struct iobuf *iobuf = NULL; + struct iobref *iobref = NULL; + struct iovec iov = { + 0, + }; + char path[PATH_MAX] = { + '\0', + }; + struct stat stbuf = { + 0, + }; + int32_t spec_fd = -1; + size_t file_len = -1; + char *volfile_content = NULL; + ssize_t req_size = 0; + call_frame_t *frame = NULL; + gd1_mgmt_brick_op_req brick_req; + dict_t *dict = NULL; + void *req = &brick_req; + void *errlbl = &&err; + struct rpc_clnt_connection *conn; + xlator_t *this = THIS; + glusterd_conf_t *conf = THIS->private; + extern struct rpc_clnt_program gd_brick_prog; + fop_cbk_fn_t cbkfn = my_callback; + + if (!rpc) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_PARAM_NULL, + "called with null rpc"); + return -1; + } + + conn = &rpc->conn; + if (!conn->connected || conn->disconnected) { + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_CONNECT_RETURNED, + "not connected yet"); + return -1; + } + + brick_req.op = op; + brick_req.name = volfile_id; + brick_req.input.input_val = NULL; + brick_req.input.input_len = 0; + brick_req.dict.dict_val = NULL; + brick_req.dict.dict_len = 0; + + frame = create_frame(this, this->ctx->pool); + if (!frame) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_FRAME_CREATE_FAIL, + NULL); + goto *errlbl; + } + + if (op == GLUSTERD_SVC_ATTACH) { + dict = dict_new(); + if (!dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, + NULL); + ret = -ENOMEM; + goto *errlbl; + } + + (void)build_volfile_path(volfile_id, path, sizeof(path), NULL, dict); + + ret = sys_stat(path, &stbuf); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_ATTACH_FAIL, + "Unable to stat %s (%s)", path, strerror(errno)); + ret = -EINVAL; + goto *errlbl; + } + + file_len = stbuf.st_size; + volfile_content = GF_MALLOC(file_len + 1, gf_common_mt_char); + if (!volfile_content) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); + ret = -ENOMEM; + goto *errlbl; + } + spec_fd = open(path, O_RDONLY); + if (spec_fd < 0) { + gf_msg(THIS->name, GF_LOG_WARNING, 0, GD_MSG_SVC_ATTACH_FAIL, + "failed to read volfile %s", path); + ret = -EIO; + goto *errlbl; + } + ret = sys_read(spec_fd, volfile_content, file_len); + if (ret == file_len) { + brick_req.input.input_val = volfile_content; + brick_req.input.input_len = file_len; + } else { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_ATTACH_FAIL, + "read failed on path %s. File size=%" GF_PRI_SIZET + "read size=%d", + path, file_len, ret); + ret = -EIO; + goto *errlbl; + } + if (dict->count > 0) { + ret = dict_allocate_and_serialize(dict, &brick_req.dict.dict_val, + &brick_req.dict.dict_len); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); + goto *errlbl; + } + } + + frame->cookie = svc; + frame->local = GF_CALLOC(1, sizeof(int), gf_gld_mt_int); + *((int *)frame->local) = flags; + cbkfn = glusterd_svc_attach_cbk; + } + + req_size = xdr_sizeof((xdrproc_t)xdr_gd1_mgmt_brick_op_req, req); + iobuf = iobuf_get2(rpc->ctx->iobuf_pool, req_size); + if (!iobuf) { + goto *errlbl; + } + errlbl = &&maybe_free_iobuf; + + iov.iov_base = iobuf->ptr; + iov.iov_len = iobuf_pagesize(iobuf); + + iobref = iobref_new(); + if (!iobref) { + goto *errlbl; + } + errlbl = &&free_iobref; + + iobref_add(iobref, iobuf); + /* + * Drop our reference to the iobuf. The iobref should already have + * one after iobref_add, so when we unref that we'll free the iobuf as + * well. This allows us to pass just the iobref as frame->local. + */ + iobuf_unref(iobuf); + /* Set the pointer to null so we don't free it on a later error. */ + iobuf = NULL; + + /* Create the xdr payload */ + ret = xdr_serialize_generic(iov, req, (xdrproc_t)xdr_gd1_mgmt_brick_op_req); + if (ret == -1) { + goto *errlbl; + } + iov.iov_len = ret; + + /* Send the msg */ + GF_ATOMIC_INC(conf->blockers); + ret = rpc_clnt_submit(rpc, &gd_brick_prog, op, cbkfn, &iov, 1, NULL, 0, + iobref, frame, NULL, 0, NULL, 0, NULL); + if (dict) + dict_unref(dict); + GF_FREE(volfile_content); + if (spec_fd >= 0) + sys_close(spec_fd); + return ret; + +free_iobref: + iobref_unref(iobref); +maybe_free_iobuf: + if (iobuf) { + iobuf_unref(iobuf); + } +err: + if (dict) + dict_unref(dict); + if (brick_req.dict.dict_val) + GF_FREE(brick_req.dict.dict_val); + + GF_FREE(volfile_content); + if (spec_fd >= 0) + sys_close(spec_fd); + if (frame) + STACK_DESTROY(frame->root); + return -1; +} + +int +glusterd_attach_svc(glusterd_svc_t *svc, glusterd_volinfo_t *volinfo, int flags) +{ + glusterd_conf_t *conf = THIS->private; + int ret = -1; + int tries; + rpc_clnt_t *rpc = NULL; + + GF_VALIDATE_OR_GOTO("glusterd", conf, out); + GF_VALIDATE_OR_GOTO("glusterd", svc, out); + GF_VALIDATE_OR_GOTO("glusterd", volinfo, out); + + gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_ATTACH_INFO, + "adding svc %s (volume=%s) to existing " + "process with pid %d", + svc->name, volinfo->volname, glusterd_proc_get_pid(&svc->proc)); + + rpc = rpc_clnt_ref(svc->conn.rpc); + for (tries = 15; tries > 0; --tries) { + /* There might be a case that the volume for which we're attempting to + * attach a shd svc might become stale and in the process of deletion. + * Given that the volinfo object is being already passed here before + * that sequence of operation has happened we might be operating on a + * stale volume. At every sync task switch we should check for existance + * of the volume now + */ + if (!glusterd_volume_exists(volinfo->volname)) { + gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_SVC_ATTACH_FAIL, + "Volume %s " + " is marked as stale, not attempting further shd svc attach " + "attempts", + volinfo->volname); + ret = 0; + goto out; + } + if (rpc) { + pthread_mutex_lock(&conf->attach_lock); + { + ret = __glusterd_send_svc_configure_req( + svc, flags, rpc, svc->proc.volfileid, GLUSTERD_SVC_ATTACH); + } + pthread_mutex_unlock(&conf->attach_lock); + if (!ret) { + volinfo->shd.attached = _gf_true; + goto out; + } + } + /* + * It might not actually be safe to manipulate the lock + * like this, but if we don't then the connection can + * never actually complete and retries are useless. + * Unfortunately, all of the alternatives (e.g. doing + * all of this in a separate thread) are much more + * complicated and risky. + * TBD: see if there's a better way + */ + synclock_unlock(&conf->big_lock); + synctask_sleep(1); + synclock_lock(&conf->big_lock); + } + ret = -1; + gf_msg("glusterd", GF_LOG_WARNING, 0, GD_MSG_SVC_ATTACH_FAIL, + "attach failed for %s(volume=%s)", svc->name, volinfo->volname); +out: + if (rpc) + rpc_clnt_unref(rpc); + return ret; +} + +int +glusterd_detach_svc(glusterd_svc_t *svc, glusterd_volinfo_t *volinfo, int sig) +{ + glusterd_conf_t *conf = THIS->private; + int ret = -1; + int tries; + rpc_clnt_t *rpc = NULL; + + GF_VALIDATE_OR_GOTO(THIS->name, conf, out); + GF_VALIDATE_OR_GOTO(THIS->name, svc, out); + GF_VALIDATE_OR_GOTO(THIS->name, volinfo, out); + + gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_DETACH_INFO, + "removing svc %s (volume=%s) from existing " + "process with pid %d", + svc->name, volinfo->volname, glusterd_proc_get_pid(&svc->proc)); + + rpc = rpc_clnt_ref(svc->conn.rpc); + for (tries = 15; tries > 0; --tries) { + if (rpc) { + /*For detach there is no flags, and we are not using sig.*/ + pthread_mutex_lock(&conf->attach_lock); + { + ret = __glusterd_send_svc_configure_req(svc, 0, svc->conn.rpc, + svc->proc.volfileid, + GLUSTERD_SVC_DETACH); + } + pthread_mutex_unlock(&conf->attach_lock); + if (!ret) { + goto out; + } + } + /* + * It might not actually be safe to manipulate the lock + * like this, but if we don't then the connection can + * never actually complete and retries are useless. + * Unfortunately, all of the alternatives (e.g. doing + * all of this in a separate thread) are much more + * complicated and risky. + * TBD: see if there's a better way + */ + synclock_unlock(&conf->big_lock); + synctask_sleep(1); + synclock_lock(&conf->big_lock); + } + ret = -1; + gf_msg("glusterd", GF_LOG_WARNING, 0, GD_MSG_SVC_DETACH_FAIL, + "detach failed for %s(volume=%s)", svc->name, volinfo->volname); +out: + if (rpc) + rpc_clnt_unref(rpc); + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.h b/xlators/mgmt/glusterd/src/glusterd-svc-helper.h new file mode 100644 index 00000000000..12717dc58ac --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.h @@ -0,0 +1,72 @@ +/* + Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _GLUSTERD_SVC_HELPER_H_ +#define _GLUSTERD_SVC_HELPER_H_ + +#include "glusterd.h" +#include "glusterd-svc-mgmt.h" +#include "glusterd-volgen.h" + +int +glusterd_svcs_reconfigure(glusterd_volinfo_t *volinfo); + +int +glusterd_svcs_stop(glusterd_volinfo_t *vol); + +int +glusterd_svcs_manager(glusterd_volinfo_t *volinfo); + +int +glusterd_svc_check_volfile_identical(char *svc_name, + glusterd_graph_builder_t builder, + gf_boolean_t *identical); +int +glusterd_svc_check_topology_identical(char *svc_name, + glusterd_graph_builder_t builder, + gf_boolean_t *identical); +int +glusterd_volume_svc_check_volfile_identical(char *svc_name, dict_t *mode_dict, + glusterd_volinfo_t *volinfo, + glusterd_vol_graph_builder_t, + gf_boolean_t *identical); +int +glusterd_volume_svc_check_topology_identical(char *svc_name, dict_t *mode_dict, + glusterd_volinfo_t *volinfo, + glusterd_vol_graph_builder_t, + gf_boolean_t *identical); +void +glusterd_volume_svc_build_volfile_path(char *server, glusterd_volinfo_t *vol, + char *volfile, size_t len); +void * +__gf_find_compatible_svc(gd_node_type daemon); + +glusterd_svc_proc_t * +glusterd_svcprocess_new(); + +int +glusterd_shd_svc_mux_init(glusterd_volinfo_t *volinfo, glusterd_svc_t *svc); + +void * +__gf_find_compatible_svc_from_pid(gd_node_type daemon, pid_t pid); + +int +glusterd_attach_svc(glusterd_svc_t *svc, glusterd_volinfo_t *volinfo, + int flags); + +int +glusterd_detach_svc(glusterd_svc_t *svc, glusterd_volinfo_t *volinfo, int sig); + +int +__glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flag, + struct rpc_clnt *rpc, char *volfile_id, + int op); + +#endif diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c new file mode 100644 index 00000000000..18b3fb13630 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c @@ -0,0 +1,536 @@ +/* + Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include <glusterfs/globals.h> +#include <glusterfs/run.h> +#include "glusterd.h" +#include <glusterfs/glusterfs.h> +#include "glusterd-utils.h" +#include "glusterd-svc-mgmt.h" +#include "glusterd-proc-mgmt.h" +#include "glusterd-conn-mgmt.h" +#include "glusterd-messages.h" +#include <glusterfs/syscall.h> +#include "glusterd-shd-svc-helper.h" + +int +glusterd_svc_create_rundir(char *rundir) +{ + int ret = -1; + + ret = mkdir_p(rundir, 0755, _gf_true); + if ((ret == -1) && (EEXIST != errno)) { + gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_CREATE_DIR_FAILED, + "Unable to create rundir %s", rundir); + } + return ret; +} + +void +glusterd_svc_build_logfile_path(char *server, char *logdir, char *logfile, + size_t len) +{ + snprintf(logfile, len, "%s/%s.log", logdir, server); +} + +void +glusterd_svc_build_volfileid_path(char *server, char *volfileid, size_t len) +{ + snprintf(volfileid, len, "gluster/%s", server); +} + +static int +glusterd_svc_init_common(glusterd_svc_t *svc, char *svc_name, char *workdir, + char *rundir, char *logdir, + glusterd_conn_notify_t notify) +{ + int ret = -1; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + char pidfile[PATH_MAX] = { + 0, + }; + char logfile[PATH_MAX] = { + 0, + }; + char volfile[PATH_MAX] = { + 0, + }; + char sockfpath[PATH_MAX] = { + 0, + }; + char volfileid[256] = {0}; + char *volfileserver = NULL; + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + GF_ASSERT(priv); + + ret = snprintf(svc->name, sizeof(svc->name), "%s", svc_name); + if (ret < 0) + goto out; + + if (!notify) + notify = glusterd_svc_common_rpc_notify; + + glusterd_svc_create_rundir(rundir); + + /* Initialize the connection mgmt */ + glusterd_conn_build_socket_filepath(rundir, MY_UUID, sockfpath, + sizeof(sockfpath)); + + ret = glusterd_conn_init(&(svc->conn), sockfpath, 600, notify); + if (ret) + goto out; + + /* Initialize the process mgmt */ + glusterd_svc_build_pidfile_path(svc_name, priv->rundir, pidfile, + sizeof(pidfile)); + + glusterd_svc_build_volfile_path(svc_name, workdir, volfile, + sizeof(volfile)); + + glusterd_svc_build_logfile_path(svc_name, logdir, logfile, sizeof(logfile)); + glusterd_svc_build_volfileid_path(svc_name, volfileid, sizeof(volfileid)); + + if (dict_get_strn(this->options, "transport.socket.bind-address", + SLEN("transport.socket.bind-address"), + &volfileserver) != 0) { + volfileserver = "localhost"; + } + + ret = glusterd_proc_init(&(svc->proc), svc_name, pidfile, logdir, logfile, + volfile, volfileid, volfileserver); + if (ret) + goto out; + +out: + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; +} + +static int +svc_add_args(dict_t *cmdline, char *arg, data_t *value, void *data) +{ + runner_t *runner = data; + runner_add_arg(runner, value->data); + return 0; +} + +int +glusterd_svc_init(glusterd_svc_t *svc, char *svc_name) +{ + int ret = -1; + char rundir[PATH_MAX] = { + 0, + }; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + GF_ASSERT(priv); + + glusterd_svc_build_rundir(svc_name, priv->rundir, rundir, sizeof(rundir)); + ret = glusterd_svc_init_common(svc, svc_name, priv->workdir, rundir, + priv->logdir, NULL); + + return ret; +} + +int +glusterd_svc_start(glusterd_svc_t *svc, int flags, dict_t *cmdline) +{ + int ret = -1; + runner_t runner = { + 0, + }; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + char valgrind_logfile[PATH_MAX] = {0}; + char *localtime_logging = NULL; + char *log_level = NULL; + char daemon_log_level[30] = {0}; + char msg[1024] = { + 0, + }; + int32_t len = 0; + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + GF_VALIDATE_OR_GOTO("glusterd", priv, out); + GF_VALIDATE_OR_GOTO("glusterd", svc, out); + + pthread_mutex_lock(&priv->attach_lock); + { + if (glusterd_proc_is_running(&(svc->proc))) { + ret = 0; + goto unlock; + } + + ret = sys_access(svc->proc.volfile, F_OK); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_NOT_FOUND, + "Volfile %s is not present", svc->proc.volfile); + goto unlock; + } + + runinit(&runner); + + if (this->ctx->cmd_args.vgtool != _gf_none) { + len = snprintf(valgrind_logfile, PATH_MAX, "%s/valgrind-%s.log", + svc->proc.logdir, svc->name); + if ((len < 0) || (len >= PATH_MAX)) { + ret = -1; + goto unlock; + } + + if (this->ctx->cmd_args.vgtool == _gf_memcheck) + runner_add_args(&runner, "valgrind", "--leak-check=full", + "--trace-children=yes", "--track-origins=yes", + NULL); + else + runner_add_args(&runner, "valgrind", "--tool=drd", NULL); + + runner_argprintf(&runner, "--log-file=%s", valgrind_logfile); + } + + runner_add_args(&runner, SBIN_DIR "/glusterfs", "-s", + svc->proc.volfileserver, "--volfile-id", + svc->proc.volfileid, "-p", svc->proc.pidfile, "-l", + svc->proc.logfile, "-S", svc->conn.sockpath, NULL); + + if (dict_get_strn(priv->opts, GLUSTERD_LOCALTIME_LOGGING_KEY, + SLEN(GLUSTERD_LOCALTIME_LOGGING_KEY), + &localtime_logging) == 0) { + if (strcmp(localtime_logging, "enable") == 0) + runner_add_arg(&runner, "--localtime-logging"); + } + if (dict_get_strn(priv->opts, GLUSTERD_DAEMON_LOG_LEVEL_KEY, + SLEN(GLUSTERD_DAEMON_LOG_LEVEL_KEY), + &log_level) == 0) { + snprintf(daemon_log_level, 30, "--log-level=%s", log_level); + runner_add_arg(&runner, daemon_log_level); + } + + if (this->ctx->cmd_args.global_threading) { + runner_add_arg(&runner, "--global-threading"); + } + + if (cmdline) + dict_foreach(cmdline, svc_add_args, (void *)&runner); + + snprintf(msg, sizeof(msg), "Starting %s service", svc->name); + runner_log(&runner, this->name, GF_LOG_DEBUG, msg); + + if (flags == PROC_START_NO_WAIT) { + ret = runner_run_nowait(&runner); + } else { + synclock_unlock(&priv->big_lock); + { + ret = runner_run(&runner); + } + synclock_lock(&priv->big_lock); + } + } +unlock: + pthread_mutex_unlock(&priv->attach_lock); +out: + gf_msg_debug(this->name, 0, "Returning %d", ret); + + return ret; +} + +int +glusterd_svc_stop(glusterd_svc_t *svc, int sig) +{ + int ret = -1; + + ret = glusterd_proc_stop(&(svc->proc), sig, PROC_STOP_FORCE); + if (ret) + goto out; + glusterd_conn_disconnect(&(svc->conn)); + + if (ret == 0) { + svc->online = _gf_false; + (void)glusterd_unlink_file((char *)svc->conn.sockpath); + } + gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_SVC_STOP_SUCCESS, + "%s service is stopped", svc->name); +out: + gf_msg_debug(THIS->name, 0, "Returning %d", ret); + + return ret; +} + +void +glusterd_svc_build_pidfile_path(char *server, char *workdir, char *path, + size_t len) +{ + char dir[PATH_MAX] = {0}; + + GF_ASSERT(len == PATH_MAX); + + glusterd_svc_build_rundir(server, workdir, dir, sizeof(dir)); + snprintf(path, len, "%s/%s.pid", dir, server); +} + +void +glusterd_svc_build_volfile_path(char *server, char *workdir, char *volfile, + size_t len) +{ + char dir[PATH_MAX] = { + 0, + }; + + GF_ASSERT(len == PATH_MAX); + + glusterd_svc_build_svcdir(server, workdir, dir, sizeof(dir)); + + if (!strcmp(server, "quotad")) + /*quotad has different volfile name*/ + snprintf(volfile, len, "%s/%s.vol", dir, server); + else + snprintf(volfile, len, "%s/%s-server.vol", dir, server); +} + +void +glusterd_svc_build_svcdir(char *server, char *workdir, char *path, size_t len) +{ + GF_ASSERT(len == PATH_MAX); + + snprintf(path, len, "%s/%s", workdir, server); +} + +void +glusterd_svc_build_rundir(char *server, char *workdir, char *path, size_t len) +{ + char dir[PATH_MAX] = {0}; + + GF_ASSERT(len == PATH_MAX); + + glusterd_svc_build_svcdir(server, workdir, dir, sizeof(dir)); + snprintf(path, len, "%s", dir); +} + +int +glusterd_svc_reconfigure(int (*create_volfile)()) +{ + int ret = -1; + + ret = create_volfile(); + if (ret) + goto out; + + ret = glusterd_fetchspec_notify(THIS); +out: + return ret; +} + +int +glusterd_svc_common_rpc_notify(glusterd_conn_t *conn, rpc_clnt_event_t event) +{ + int ret = 0; + glusterd_svc_t *svc = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + /* Get the parent onject i.e. svc using list_entry macro */ + svc = cds_list_entry(conn, glusterd_svc_t, conn); + if (!svc) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_GET_FAIL, + "Failed to get the service"); + return -1; + } + + switch (event) { + case RPC_CLNT_CONNECT: + gf_msg_debug(this->name, 0, + "%s has connected with " + "glusterd.", + svc->name); + gf_event(EVENT_SVC_CONNECTED, "svc_name=%s", svc->name); + svc->online = _gf_true; + break; + + case RPC_CLNT_DISCONNECT: + if (svc->online) { + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_NODE_DISCONNECTED, + "%s has disconnected " + "from glusterd.", + svc->name); + gf_event(EVENT_SVC_DISCONNECTED, "svc_name=%s", svc->name); + svc->online = _gf_false; + } + break; + + default: + gf_msg_trace(this->name, 0, "got some other RPC event %d", event); + break; + } + + return ret; +} + +void +glusterd_volume_svc_build_volfile_path(char *server, glusterd_volinfo_t *vol, + char *volfile, size_t len) +{ + GF_ASSERT(len == PATH_MAX); + + if (!strcmp(server, "glustershd")) { + glusterd_svc_build_shd_volfile_path(vol, volfile, len); + } +} + +int +glusterd_muxsvc_common_rpc_notify(glusterd_svc_proc_t *mux_proc, + rpc_clnt_event_t event) +{ + int ret = 0; + glusterd_svc_t *svc = NULL; + glusterd_svc_t *tmp = NULL; + xlator_t *this = NULL; + gf_boolean_t need_logging = _gf_false; + + this = THIS; + GF_ASSERT(this); + + if (!mux_proc) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_GET_FAIL, + "Failed to get the svc proc data"); + return -1; + } + + /* Currently this function was used for shd svc, if this function is + * using for another svc, change ths glustershd reference. We can get + * the svc name from any of the attached svc's + */ + switch (event) { + case RPC_CLNT_CONNECT: + gf_msg_debug(this->name, 0, + "glustershd has connected with glusterd."); + gf_event(EVENT_SVC_CONNECTED, "svc_name=glustershd"); + cds_list_for_each_entry_safe(svc, tmp, &mux_proc->svcs, mux_svc) + { + if (svc->online) + continue; + svc->online = _gf_true; + } + if (mux_proc->status != GF_SVC_STARTED) + mux_proc->status = GF_SVC_STARTED; + + break; + + case RPC_CLNT_DISCONNECT: + cds_list_for_each_entry_safe(svc, tmp, &mux_proc->svcs, mux_svc) + { + if (svc->online) { + if (!need_logging) + need_logging = _gf_true; + svc->online = _gf_false; + } + } + if (mux_proc->status != GF_SVC_DIED) { + svc = cds_list_entry(mux_proc->svcs.next, glusterd_svc_t, + mux_svc); + if (svc && !glusterd_proc_is_running(&svc->proc)) { + mux_proc->status = GF_SVC_DIED; + } else { + mux_proc->status = GF_SVC_DISCONNECTED; + } + } + + if (need_logging) { + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_NODE_DISCONNECTED, + "glustershd has disconnected from glusterd."); + gf_event(EVENT_SVC_DISCONNECTED, "svc_name=glustershd"); + } + break; + + default: + gf_msg_trace(this->name, 0, "got some other RPC event %d", event); + break; + } + + return ret; +} + +int +glusterd_muxsvc_conn_init(glusterd_conn_t *conn, glusterd_svc_proc_t *mux_proc, + char *sockpath, int frame_timeout, + glusterd_muxsvc_conn_notify_t notify) +{ + int ret = -1; + dict_t *options = NULL; + struct rpc_clnt *rpc = NULL; + xlator_t *this = THIS; + glusterd_svc_t *svc = NULL; + + options = dict_new(); + if (!this || !options) + goto out; + + svc = cds_list_entry(conn, glusterd_svc_t, conn); + if (!svc) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_GET_FAIL, + "Failed to get the service"); + goto out; + } + + ret = rpc_transport_unix_options_build(options, sockpath, frame_timeout); + if (ret) + goto out; + + ret = dict_set_int32n(options, "transport.socket.ignore-enoent", + SLEN("transport.socket.ignore-enoent"), 1); + if (ret) + goto out; + + /* @options is free'd by rpc_transport when destroyed */ + rpc = rpc_clnt_new(options, this, (char *)svc->name, 16); + if (!rpc) { + ret = -1; + goto out; + } + + ret = rpc_clnt_register_notify(rpc, glusterd_muxsvc_conn_common_notify, + mux_proc); + if (ret) + goto out; + + ret = snprintf(conn->sockpath, sizeof(conn->sockpath), "%s", sockpath); + if (ret < 0) + goto out; + else + ret = 0; + + conn->frame_timeout = frame_timeout; + conn->rpc = rpc; + mux_proc->notify = notify; +out: + if (options) + dict_unref(options); + if (ret) { + if (rpc) { + rpc_clnt_unref(rpc); + rpc = NULL; + } + } + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h new file mode 100644 index 00000000000..5daee993833 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h @@ -0,0 +1,112 @@ +/* + Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _GLUSTERD_SVC_MGMT_H_ +#define _GLUSTERD_SVC_MGMT_H_ + +#include "glusterd-proc-mgmt.h" +#include "glusterd-conn-mgmt.h" +#include "glusterd-rcu.h" + +struct glusterd_svc_; + +typedef struct glusterd_svc_ glusterd_svc_t; +typedef struct glusterd_svc_proc_ glusterd_svc_proc_t; + +typedef void (*glusterd_svc_build_t)(glusterd_svc_t *svc); + +typedef int (*glusterd_svc_manager_t)(glusterd_svc_t *svc, void *data, + int flags); +typedef int (*glusterd_svc_start_t)(glusterd_svc_t *svc, int flags); +typedef int (*glusterd_svc_stop_t)(glusterd_svc_t *svc, int sig); +typedef int (*glusterd_svc_reconfigure_t)(void *data); + +typedef int (*glusterd_muxsvc_conn_notify_t)(glusterd_svc_proc_t *mux_proc, + rpc_clnt_event_t event); + +typedef enum gf_svc_status { + GF_SVC_STARTING, + GF_SVC_STARTED, + GF_SVC_STOPPING, + GF_SVC_DISCONNECTED, + GF_SVC_DIED, +} gf_svc_status_t; + +struct glusterd_svc_proc_ { + struct cds_list_head svc_proc_list; + struct cds_list_head svcs; + glusterd_muxsvc_conn_notify_t notify; + rpc_clnt_t *rpc; + void *data; + gf_svc_status_t status; +}; + +struct glusterd_svc_ { + glusterd_conn_t conn; + glusterd_svc_manager_t manager; + glusterd_svc_start_t start; + glusterd_svc_stop_t stop; + glusterd_svc_reconfigure_t reconfigure; + glusterd_svc_proc_t *svc_proc; + struct cds_list_head mux_svc; + glusterd_proc_t proc; + char name[NAME_MAX]; + gf_boolean_t online; + gf_boolean_t inited; +}; + +int +glusterd_svc_create_rundir(char *rundir); + +int +glusterd_svc_init(glusterd_svc_t *svc, char *svc_name); + +int +glusterd_svc_start(glusterd_svc_t *svc, int flags, dict_t *cmdline); + +int +glusterd_svc_stop(glusterd_svc_t *svc, int sig); + +void +glusterd_svc_build_pidfile_path(char *server, char *workdir, char *path, + size_t len); + +void +glusterd_svc_build_volfile_path(char *server, char *workdir, char *volfile, + size_t len); + +void +glusterd_svc_build_logfile_path(char *server, char *logdir, char *logfile, + size_t len); + +void +glusterd_svc_build_svcdir(char *server, char *workdir, char *path, size_t len); + +void +glusterd_svc_build_rundir(char *server, char *workdir, char *path, size_t len); + +int +glusterd_svc_reconfigure(int (*create_volfile)()); + +int +glusterd_svc_common_rpc_notify(glusterd_conn_t *conn, rpc_clnt_event_t event); + +int +glusterd_muxsvc_common_rpc_notify(glusterd_svc_proc_t *conn, + rpc_clnt_event_t event); + +int +glusterd_proc_get_pid(glusterd_proc_t *proc); + +int +glusterd_muxsvc_conn_init(glusterd_conn_t *conn, glusterd_svc_proc_t *mux_proc, + char *sockpath, int frame_timeout, + glusterd_muxsvc_conn_notify_t notify); +#endif diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c new file mode 100644 index 00000000000..b73d37ad08e --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c @@ -0,0 +1,2043 @@ +/* + Copyright (c) 2012-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +/* rpc related syncops */ +#include "rpc-clnt.h" +#include "protocol-common.h" +#include "xdr-generic.h" +#include "glusterd1-xdr.h" +#include "glusterd-syncop.h" +#include "glusterd-mgmt.h" + +#include "glusterd.h" +#include "glusterd-op-sm.h" +#include "glusterd-utils.h" +#include "glusterd-server-quorum.h" +#include "glusterd-locks.h" +#include "glusterd-snapshot-utils.h" +#include "glusterd-messages.h" +#include "glusterd-errno.h" + +extern glusterd_op_info_t opinfo; + +void +gd_synctask_barrier_wait(struct syncargs *args, int count) +{ + glusterd_conf_t *conf = THIS->private; + + synclock_unlock(&conf->big_lock); + synctask_barrier_wait(args, count); + synclock_lock(&conf->big_lock); + + syncbarrier_destroy(&args->barrier); +} + +static void +gd_collate_errors(struct syncargs *args, int op_ret, int op_errno, + char *op_errstr, int op_code, uuid_t peerid, u_char *uuid) +{ + char err_str[PATH_MAX] = "Please check log file for details."; + char op_err[PATH_MAX] = ""; + int len = -1; + char *peer_str = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + + if (op_ret) { + args->op_ret = op_ret; + args->op_errno = op_errno; + + RCU_READ_LOCK; + peerinfo = glusterd_peerinfo_find(peerid, NULL); + if (peerinfo) + peer_str = gf_strdup(peerinfo->hostname); + else + peer_str = gf_strdup(uuid_utoa(uuid)); + RCU_READ_UNLOCK; + + if (op_errstr && strcmp(op_errstr, "")) { + len = snprintf(err_str, sizeof(err_str) - 1, "Error: %s", + op_errstr); + err_str[len] = '\0'; + } + + switch (op_code) { + case GLUSTERD_MGMT_CLUSTER_LOCK: { + len = snprintf(op_err, sizeof(op_err) - 1, + "Locking failed on %s. %s", peer_str, err_str); + break; + } + case GLUSTERD_MGMT_CLUSTER_UNLOCK: { + len = snprintf(op_err, sizeof(op_err) - 1, + "Unlocking failed on %s. %s", peer_str, err_str); + break; + } + case GLUSTERD_MGMT_STAGE_OP: { + len = snprintf(op_err, sizeof(op_err) - 1, + "Staging failed on %s. %s", peer_str, err_str); + break; + } + case GLUSTERD_MGMT_COMMIT_OP: { + len = snprintf(op_err, sizeof(op_err) - 1, + "Commit failed on %s. %s", peer_str, err_str); + break; + } + } + + if (len > 0) + op_err[len] = '\0'; + + if (args->errstr) { + len = snprintf(err_str, sizeof(err_str) - 1, "%s\n%s", args->errstr, + op_err); + GF_FREE(args->errstr); + args->errstr = NULL; + } else + len = snprintf(err_str, sizeof(err_str) - 1, "%s", op_err); + err_str[len] = '\0'; + + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_MGMT_OP_FAIL, "%s", op_err); + args->errstr = gf_strdup(err_str); + } + + GF_FREE(peer_str); + + return; +} + +void +gd_syncargs_init(struct syncargs *args, dict_t *op_ctx) +{ + args->dict = op_ctx; + pthread_mutex_init(&args->lock_dict, NULL); +} + +static void +gd_stage_op_req_free(gd1_mgmt_stage_op_req *req) +{ + if (!req) + return; + + GF_FREE(req->buf.buf_val); + GF_FREE(req); +} + +static void +gd_commit_op_req_free(gd1_mgmt_commit_op_req *req) +{ + if (!req) + return; + + GF_FREE(req->buf.buf_val); + GF_FREE(req); +} + +static void +gd_brick_op_req_free(gd1_mgmt_brick_op_req *req) +{ + if (!req) + return; + + if (req->dict.dict_val) + GF_FREE(req->dict.dict_val); + GF_FREE(req->input.input_val); + GF_FREE(req); +} + +int +gd_syncop_submit_request(struct rpc_clnt *rpc, void *req, void *local, + void *cookie, rpc_clnt_prog_t *prog, int procnum, + fop_cbk_fn_t cbkfn, xdrproc_t xdrproc) +{ + int ret = -1; + struct iobuf *iobuf = NULL; + struct iobref *iobref = NULL; + int count = 0; + struct iovec iov = { + 0, + }; + ssize_t req_size = 0; + call_frame_t *frame = NULL; + + GF_ASSERT(rpc); + if (!req) + goto out; + + req_size = xdr_sizeof(xdrproc, req); + iobuf = iobuf_get2(rpc->ctx->iobuf_pool, req_size); + if (!iobuf) + goto out; + + iobref = iobref_new(); + if (!iobref) + goto out; + + frame = create_frame(THIS, THIS->ctx->pool); + if (!frame) + goto out; + + iobref_add(iobref, iobuf); + + iov.iov_base = iobuf->ptr; + iov.iov_len = iobuf_pagesize(iobuf); + + /* Create the xdr payload */ + ret = xdr_serialize_generic(iov, req, xdrproc); + if (ret == -1) + goto out; + + iov.iov_len = ret; + count = 1; + + frame->local = local; + frame->cookie = cookie; + + /* Send the msg */ + ret = rpc_clnt_submit(rpc, prog, procnum, cbkfn, &iov, count, NULL, 0, + iobref, frame, NULL, 0, NULL, 0, NULL); + + /* TODO: do we need to start ping also? */ + +out: + iobref_unref(iobref); + iobuf_unref(iobuf); + + if (ret && frame) + STACK_DESTROY(frame->root); + return ret; +} + +/* Defined in glusterd-rpc-ops.c */ +extern struct rpc_clnt_program gd_mgmt_prog; +extern struct rpc_clnt_program gd_brick_prog; +extern struct rpc_clnt_program gd_mgmt_v3_prog; + +int +glusterd_syncop_aggr_rsp_dict(glusterd_op_t op, dict_t *aggr, dict_t *rsp) +{ + int ret = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + switch (op) { + case GD_OP_CREATE_VOLUME: + case GD_OP_ADD_BRICK: + case GD_OP_START_VOLUME: + ret = glusterd_aggr_brick_mount_dirs(aggr, rsp); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_BRICK_MOUNDIRS_AGGR_FAIL, + "Failed to " + "aggregate brick mount dirs"); + goto out; + } + break; + + case GD_OP_REPLACE_BRICK: + case GD_OP_RESET_BRICK: + ret = glusterd_rb_use_rsp_dict(aggr, rsp); + if (ret) + goto out; + break; + + case GD_OP_SYNC_VOLUME: + ret = glusterd_sync_use_rsp_dict(aggr, rsp); + if (ret) + goto out; + break; + + case GD_OP_GSYNC_CREATE: + break; + + case GD_OP_GSYNC_SET: + ret = glusterd_gsync_use_rsp_dict(aggr, rsp, NULL); + if (ret) + goto out; + break; + + case GD_OP_STATUS_VOLUME: + ret = glusterd_volume_status_copy_to_op_ctx_dict(aggr, rsp); + if (ret) + goto out; + break; + + case GD_OP_HEAL_VOLUME: + ret = glusterd_volume_heal_use_rsp_dict(aggr, rsp); + if (ret) + goto out; + + break; + + case GD_OP_CLEARLOCKS_VOLUME: + ret = glusterd_use_rsp_dict(aggr, rsp); + if (ret) + goto out; + break; + + case GD_OP_QUOTA: + ret = glusterd_volume_quota_copy_to_op_ctx_dict(aggr, rsp); + if (ret) + goto out; + break; + + case GD_OP_SYS_EXEC: + ret = glusterd_sys_exec_output_rsp_dict(aggr, rsp); + if (ret) + goto out; + break; + + case GD_OP_SNAP: + ret = glusterd_snap_use_rsp_dict(aggr, rsp); + if (ret) + goto out; + break; + + case GD_OP_SCRUB_STATUS: + ret = glusterd_volume_bitrot_scrub_use_rsp_dict(aggr, rsp); + break; + + case GD_OP_SCRUB_ONDEMAND: + break; + + case GD_OP_MAX_OPVERSION: + ret = glusterd_max_opversion_use_rsp_dict(aggr, rsp); + break; + + case GD_OP_PROFILE_VOLUME: + ret = glusterd_profile_volume_use_rsp_dict(aggr, rsp); + break; + + case GD_OP_REBALANCE: + case GD_OP_DEFRAG_BRICK_VOLUME: + ret = glusterd_volume_rebalance_use_rsp_dict(aggr, rsp); + break; + + default: + break; + } +out: + return ret; +} + +int32_t +gd_syncop_mgmt_v3_lock_cbk_fn(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) +{ + int ret = -1; + struct syncargs *args = NULL; + gd1_mgmt_v3_lock_rsp rsp = { + {0}, + }; + call_frame_t *frame = NULL; + int op_ret = -1; + int op_errno = -1; + xlator_t *this = NULL; + uuid_t *peerid = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(req); + GF_ASSERT(myframe); + + frame = myframe; + args = frame->local; + peerid = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + GF_VALIDATE_OR_GOTO_WITH_ERROR(this->name, iov, out, op_errno, EINVAL); + + ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_v3_lock_rsp); + if (ret < 0) + goto out; + + gf_uuid_copy(args->uuid, rsp.uuid); + + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; +out: + gd_mgmt_v3_collate_errors(args, op_ret, op_errno, NULL, + GLUSTERD_MGMT_V3_LOCK, *peerid, rsp.uuid); + + GF_FREE(peerid); + /* req->rpc_status set to -1 means, STACK_DESTROY will be called from + * the caller function. + */ + if (req->rpc_status != -1) + STACK_DESTROY(frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_syncop_mgmt_v3_lock_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) +{ + return glusterd_big_locked_cbk(req, iov, count, myframe, + gd_syncop_mgmt_v3_lock_cbk_fn); +} + +int +gd_syncop_mgmt_v3_lock(glusterd_op_t op, dict_t *op_ctx, + glusterd_peerinfo_t *peerinfo, struct syncargs *args, + uuid_t my_uuid, uuid_t recv_uuid, uuid_t txn_id) +{ + int ret = -1; + gd1_mgmt_v3_lock_req req = { + {0}, + }; + uuid_t *peerid = NULL; + + GF_ASSERT(op_ctx); + GF_ASSERT(peerinfo); + GF_ASSERT(args); + + ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val, + &req.dict.dict_len); + if (ret) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); + goto out; + } + + gf_uuid_copy(req.uuid, my_uuid); + gf_uuid_copy(req.txn_id, txn_id); + req.op = op; + + GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret); + if (ret) + goto out; + + ret = gd_syncop_submit_request(peerinfo->rpc, &req, args, peerid, + &gd_mgmt_v3_prog, GLUSTERD_MGMT_V3_LOCK, + gd_syncop_mgmt_v3_lock_cbk, + (xdrproc_t)xdr_gd1_mgmt_v3_lock_req); +out: + GF_FREE(req.dict.dict_val); + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; +} + +int32_t +gd_syncop_mgmt_v3_unlock_cbk_fn(struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int ret = -1; + struct syncargs *args = NULL; + gd1_mgmt_v3_unlock_rsp rsp = { + {0}, + }; + call_frame_t *frame = NULL; + int op_ret = -1; + int op_errno = -1; + xlator_t *this = NULL; + uuid_t *peerid = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(req); + GF_ASSERT(myframe); + + frame = myframe; + args = frame->local; + peerid = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + GF_VALIDATE_OR_GOTO_WITH_ERROR(this->name, iov, out, op_errno, EINVAL); + + ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_v3_unlock_rsp); + if (ret < 0) + goto out; + + gf_uuid_copy(args->uuid, rsp.uuid); + + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; +out: + gd_mgmt_v3_collate_errors(args, op_ret, op_errno, NULL, + GLUSTERD_MGMT_V3_UNLOCK, *peerid, rsp.uuid); + + GF_FREE(peerid); + /* req->rpc_status set to -1 means, STACK_DESTROY will be called from + * the caller function. + */ + if (req->rpc_status != -1) + STACK_DESTROY(frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_syncop_mgmt_v3_unlock_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) +{ + return glusterd_big_locked_cbk(req, iov, count, myframe, + gd_syncop_mgmt_v3_unlock_cbk_fn); +} + +int +gd_syncop_mgmt_v3_unlock(dict_t *op_ctx, glusterd_peerinfo_t *peerinfo, + struct syncargs *args, uuid_t my_uuid, + uuid_t recv_uuid, uuid_t txn_id) +{ + int ret = -1; + gd1_mgmt_v3_unlock_req req = { + {0}, + }; + uuid_t *peerid = NULL; + + GF_ASSERT(op_ctx); + GF_ASSERT(peerinfo); + GF_ASSERT(args); + + ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val, + &req.dict.dict_len); + if (ret) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); + goto out; + } + + gf_uuid_copy(req.uuid, my_uuid); + gf_uuid_copy(req.txn_id, txn_id); + + GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret); + if (ret) + goto out; + + ret = gd_syncop_submit_request(peerinfo->rpc, &req, args, peerid, + &gd_mgmt_v3_prog, GLUSTERD_MGMT_V3_UNLOCK, + gd_syncop_mgmt_v3_unlock_cbk, + (xdrproc_t)xdr_gd1_mgmt_v3_unlock_req); +out: + GF_FREE(req.dict.dict_val); + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; +} + +int32_t +_gd_syncop_mgmt_lock_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) +{ + int ret = -1; + struct syncargs *args = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + gd1_mgmt_cluster_lock_rsp rsp = { + {0}, + }; + call_frame_t *frame = NULL; + int op_ret = -1; + int op_errno = -1; + xlator_t *this = NULL; + uuid_t *peerid = NULL; + + this = THIS; + GF_ASSERT(this); + + frame = myframe; + args = frame->local; + peerid = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + GF_VALIDATE_OR_GOTO_WITH_ERROR(this->name, iov, out, op_errno, EINVAL); + + ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_cluster_lock_rsp); + if (ret < 0) + goto out; + + gf_uuid_copy(args->uuid, rsp.uuid); + + RCU_READ_LOCK; + peerinfo = glusterd_peerinfo_find(*peerid, NULL); + if (peerinfo) { + /* Set peer as locked, so we unlock only the locked peers */ + if (rsp.op_ret == 0) + peerinfo->locked = _gf_true; + RCU_READ_UNLOCK; + } else { + RCU_READ_UNLOCK; + rsp.op_ret = -1; + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_PEER_NOT_FOUND, + "Could not find peer with " + "ID %s", + uuid_utoa(*peerid)); + } + + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; +out: + gd_collate_errors(args, op_ret, op_errno, NULL, GLUSTERD_MGMT_CLUSTER_LOCK, + *peerid, rsp.uuid); + + GF_FREE(peerid); + /* req->rpc_status set to -1 means, STACK_DESTROY will be called from + * the caller function. + */ + if (req->rpc_status != -1) + STACK_DESTROY(frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_syncop_mgmt_lock_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) +{ + return glusterd_big_locked_cbk(req, iov, count, myframe, + _gd_syncop_mgmt_lock_cbk); +} + +int +gd_syncop_mgmt_lock(glusterd_peerinfo_t *peerinfo, struct syncargs *args, + uuid_t my_uuid, uuid_t recv_uuid) +{ + int ret = -1; + gd1_mgmt_cluster_lock_req req = { + {0}, + }; + uuid_t *peerid = NULL; + + gf_uuid_copy(req.uuid, my_uuid); + GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret); + if (ret) + goto out; + + ret = gd_syncop_submit_request(peerinfo->rpc, &req, args, peerid, + &gd_mgmt_prog, GLUSTERD_MGMT_CLUSTER_LOCK, + gd_syncop_mgmt_lock_cbk, + (xdrproc_t)xdr_gd1_mgmt_cluster_lock_req); +out: + return ret; +} + +int32_t +_gd_syncop_mgmt_unlock_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) +{ + int ret = -1; + struct syncargs *args = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + gd1_mgmt_cluster_unlock_rsp rsp = { + {0}, + }; + call_frame_t *frame = NULL; + int op_ret = -1; + int op_errno = -1; + xlator_t *this = NULL; + uuid_t *peerid = NULL; + + this = THIS; + GF_ASSERT(this); + + frame = myframe; + args = frame->local; + peerid = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + GF_VALIDATE_OR_GOTO_WITH_ERROR(this->name, iov, out, op_errno, EINVAL); + + ret = xdr_to_generic(*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_cluster_unlock_rsp); + if (ret < 0) + goto out; + + gf_uuid_copy(args->uuid, rsp.uuid); + + RCU_READ_LOCK; + peerinfo = glusterd_peerinfo_find(*peerid, NULL); + if (peerinfo) { + peerinfo->locked = _gf_false; + RCU_READ_UNLOCK; + } else { + RCU_READ_UNLOCK; + rsp.op_ret = -1; + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_PEER_NOT_FOUND, + "Could not find peer with " + "ID %s", + uuid_utoa(*peerid)); + } + + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; +out: + gd_collate_errors(args, op_ret, op_errno, NULL, + GLUSTERD_MGMT_CLUSTER_UNLOCK, *peerid, rsp.uuid); + + GF_FREE(peerid); + /* req->rpc_status set to -1 means, STACK_DESTROY will be called from + * the caller function. + */ + if (req->rpc_status != -1) + STACK_DESTROY(frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_syncop_mgmt_unlock_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) +{ + return glusterd_big_locked_cbk(req, iov, count, myframe, + _gd_syncop_mgmt_unlock_cbk); +} + +int +gd_syncop_mgmt_unlock(glusterd_peerinfo_t *peerinfo, struct syncargs *args, + uuid_t my_uuid, uuid_t recv_uuid) +{ + int ret = -1; + gd1_mgmt_cluster_unlock_req req = { + {0}, + }; + uuid_t *peerid = NULL; + + gf_uuid_copy(req.uuid, my_uuid); + GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret); + if (ret) + goto out; + + ret = gd_syncop_submit_request(peerinfo->rpc, &req, args, peerid, + &gd_mgmt_prog, GLUSTERD_MGMT_CLUSTER_UNLOCK, + gd_syncop_mgmt_unlock_cbk, + (xdrproc_t)xdr_gd1_mgmt_cluster_lock_req); +out: + return ret; +} + +int32_t +_gd_syncop_stage_op_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) +{ + int ret = -1; + gd1_mgmt_stage_op_rsp rsp = { + {0}, + }; + struct syncargs *args = NULL; + xlator_t *this = NULL; + dict_t *rsp_dict = NULL; + call_frame_t *frame = NULL; + int op_ret = -1; + int op_errno = -1; + uuid_t *peerid = NULL; + + this = THIS; + GF_ASSERT(this); + + frame = myframe; + args = frame->local; + peerid = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + GF_VALIDATE_OR_GOTO_WITH_ERROR(this->name, iov, out, op_errno, EINVAL); + + ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_stage_op_rsp); + if (ret < 0) + goto out; + + if (rsp.dict.dict_len) { + /* Unserialize the dictionary */ + rsp_dict = dict_new(); + + ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &rsp_dict); + if (ret < 0) { + GF_FREE(rsp.dict.dict_val); + goto out; + } else { + rsp_dict->extra_stdfree = rsp.dict.dict_val; + } + } + + RCU_READ_LOCK; + ret = (glusterd_peerinfo_find(rsp.uuid, NULL) == NULL); + RCU_READ_UNLOCK; + if (ret) { + ret = -1; + gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_RESP_FROM_UNKNOWN_PEER, + "Staging response " + "for 'Volume %s' received from unknown " + "peer: %s", + gd_op_list[rsp.op], uuid_utoa(rsp.uuid)); + goto out; + } + + gf_uuid_copy(args->uuid, rsp.uuid); + if (rsp.op == GD_OP_REPLACE_BRICK || rsp.op == GD_OP_QUOTA || + rsp.op == GD_OP_CREATE_VOLUME || rsp.op == GD_OP_ADD_BRICK || + rsp.op == GD_OP_START_VOLUME) { + pthread_mutex_lock(&args->lock_dict); + { + ret = glusterd_syncop_aggr_rsp_dict(rsp.op, args->dict, rsp_dict); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RESP_AGGR_FAIL, "%s", + "Failed to aggregate response from " + " node/brick"); + } + pthread_mutex_unlock(&args->lock_dict); + } + + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + +out: + gd_collate_errors(args, op_ret, op_errno, rsp.op_errstr, + GLUSTERD_MGMT_STAGE_OP, *peerid, rsp.uuid); + + if (rsp_dict) + dict_unref(rsp_dict); + GF_FREE(peerid); + /* req->rpc_status set to -1 means, STACK_DESTROY will be called from + * the caller function. + */ + if (req->rpc_status != -1) + STACK_DESTROY(frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_syncop_stage_op_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) +{ + return glusterd_big_locked_cbk(req, iov, count, myframe, + _gd_syncop_stage_op_cbk); +} + +int +gd_syncop_mgmt_stage_op(glusterd_peerinfo_t *peerinfo, struct syncargs *args, + uuid_t my_uuid, uuid_t recv_uuid, int op, + dict_t *dict_out, dict_t *op_ctx) +{ + gd1_mgmt_stage_op_req *req = NULL; + int ret = -1; + uuid_t *peerid = NULL; + + req = GF_CALLOC(1, sizeof(*req), gf_gld_mt_mop_stage_req_t); + if (!req) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); + goto out; + } + + gf_uuid_copy(req->uuid, my_uuid); + req->op = op; + + ret = dict_allocate_and_serialize(dict_out, &req->buf.buf_val, + &req->buf.buf_len); + if (ret) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); + goto out; + } + + GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret); + if (ret) + goto out; + + ret = gd_syncop_submit_request( + peerinfo->rpc, req, args, peerid, &gd_mgmt_prog, GLUSTERD_MGMT_STAGE_OP, + gd_syncop_stage_op_cbk, (xdrproc_t)xdr_gd1_mgmt_stage_op_req); +out: + gd_stage_op_req_free(req); + return ret; +} + +int32_t +_gd_syncop_brick_op_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) +{ + struct syncargs *args = NULL; + gd1_mgmt_brick_op_rsp rsp = { + 0, + }; + int ret = -1; + call_frame_t *frame = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + frame = myframe; + args = frame->local; + frame->local = NULL; + + /* initialize */ + args->op_ret = -1; + args->op_errno = EINVAL; + + if (-1 == req->rpc_status) { + args->op_errno = ENOTCONN; + goto out; + } + + GF_VALIDATE_OR_GOTO_WITH_ERROR(this->name, iov, out, args->op_errno, + EINVAL); + + ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp); + if (ret < 0) + goto out; + + if (rsp.output.output_len) { + args->dict = dict_new(); + if (!args->dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, + NULL); + ret = -1; + args->op_errno = ENOMEM; + goto out; + } + + ret = dict_unserialize(rsp.output.output_val, rsp.output.output_len, + &args->dict); + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_UNSERIALIZE_FAIL, NULL); + goto out; + } + } + + args->op_ret = rsp.op_ret; + args->op_errno = rsp.op_errno; + args->errstr = gf_strdup(rsp.op_errstr); + +out: + if ((rsp.op_errstr) && (strcmp(rsp.op_errstr, "") != 0)) + free(rsp.op_errstr); + free(rsp.output.output_val); + + /* req->rpc_status set to -1 means, STACK_DESTROY will be called from + * the caller function. + */ + if (req->rpc_status != -1) + STACK_DESTROY(frame->root); + __wake(args); + + return 0; +} + +int32_t +gd_syncop_brick_op_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) +{ + return glusterd_big_locked_cbk(req, iov, count, myframe, + _gd_syncop_brick_op_cbk); +} + +int +gd_syncop_mgmt_brick_op(struct rpc_clnt *rpc, glusterd_pending_node_t *pnode, + int op, dict_t *dict_out, dict_t *op_ctx, char **errstr) +{ + struct syncargs args = { + 0, + }; + gd1_mgmt_brick_op_req *req = NULL; + int ret = 0; + xlator_t *this = NULL; + + this = THIS; + args.op_ret = -1; + args.op_errno = ENOTCONN; + + if ((pnode->type == GD_NODE_NFS) || (pnode->type == GD_NODE_QUOTAD) || + (pnode->type == GD_NODE_SCRUB) || + ((pnode->type == GD_NODE_SHD) && (op == GD_OP_STATUS_VOLUME))) { + ret = glusterd_node_op_build_payload(op, &req, dict_out); + + } else { + ret = glusterd_brick_op_build_payload(op, pnode->node, &req, dict_out); + } + + if (ret) + goto out; + + GD_SYNCOP(rpc, (&args), NULL, gd_syncop_brick_op_cbk, req, &gd_brick_prog, + req->op, xdr_gd1_mgmt_brick_op_req); + + if (args.errstr) { + if ((strlen(args.errstr) > 0) && errstr) + *errstr = args.errstr; + else + GF_FREE(args.errstr); + } + + if (GD_OP_STATUS_VOLUME == op) { + ret = dict_set_int32(args.dict, "index", pnode->index); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Error setting index on brick status" + " rsp dict"); + args.op_ret = -1; + goto out; + } + } + + if (req->op == GLUSTERD_BRICK_TERMINATE) { + if (args.op_ret && (args.op_errno == ENOTCONN)) { + /* + * This is actually OK. It happens when the target + * brick process exits and we saw the closed connection + * before we read the response. If we didn't read the + * response quickly enough that's kind of our own + * fault, and the fact that the process exited means + * that our goal of terminating the brick was achieved. + */ + args.op_ret = 0; + } + } + + if (args.op_ret == 0) + glusterd_handle_node_rsp(dict_out, pnode->node, op, args.dict, op_ctx, + errstr, pnode->type); + +out: + errno = args.op_errno; + if (args.dict) + dict_unref(args.dict); + if (args.op_ret && errstr && (*errstr == NULL)) { + if (op == GD_OP_HEAL_VOLUME) { + gf_asprintf(errstr, + "Glusterd Syncop Mgmt brick op '%s' failed." + " Please check glustershd log file for details.", + gd_op_list[op]); + } else { + gf_asprintf(errstr, + "Glusterd Syncop Mgmt brick op '%s' failed." + " Please check brick log file for details.", + gd_op_list[op]); + } + } + gd_brick_op_req_free(req); + return args.op_ret; +} + +int32_t +_gd_syncop_commit_op_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) +{ + int ret = -1; + gd1_mgmt_commit_op_rsp rsp = { + {0}, + }; + struct syncargs *args = NULL; + xlator_t *this = NULL; + dict_t *rsp_dict = NULL; + call_frame_t *frame = NULL; + int op_ret = -1; + int op_errno = -1; + int type = GF_QUOTA_OPTION_TYPE_NONE; + uuid_t *peerid = NULL; + + this = THIS; + GF_ASSERT(this); + + frame = myframe; + args = frame->local; + peerid = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + GF_VALIDATE_OR_GOTO_WITH_ERROR(this->name, iov, out, op_errno, EINVAL); + + ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_commit_op_rsp); + if (ret < 0) { + goto out; + } + + if (rsp.dict.dict_len) { + /* Unserialize the dictionary */ + rsp_dict = dict_new(); + + ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &rsp_dict); + if (ret < 0) { + GF_FREE(rsp.dict.dict_val); + goto out; + } else { + rsp_dict->extra_stdfree = rsp.dict.dict_val; + } + } + + RCU_READ_LOCK; + ret = (glusterd_peerinfo_find(rsp.uuid, NULL) == 0); + RCU_READ_UNLOCK; + if (ret) { + ret = -1; + gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_RESP_FROM_UNKNOWN_PEER, + "Commit response " + "for 'Volume %s' received from unknown " + "peer: %s", + gd_op_list[rsp.op], uuid_utoa(rsp.uuid)); + goto out; + } + + gf_uuid_copy(args->uuid, rsp.uuid); + if (rsp.op == GD_OP_QUOTA) { + ret = dict_get_int32(args->dict, "type", &type); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get " + "opcode"); + goto out; + } + } + + if ((rsp.op != GD_OP_QUOTA) || (type == GF_QUOTA_OPTION_TYPE_LIST)) { + pthread_mutex_lock(&args->lock_dict); + { + ret = glusterd_syncop_aggr_rsp_dict(rsp.op, args->dict, rsp_dict); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RESP_AGGR_FAIL, "%s", + "Failed to aggregate response from " + " node/brick"); + } + pthread_mutex_unlock(&args->lock_dict); + } + + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + +out: + gd_collate_errors(args, op_ret, op_errno, rsp.op_errstr, + GLUSTERD_MGMT_COMMIT_OP, *peerid, rsp.uuid); + if (rsp_dict) + dict_unref(rsp_dict); + GF_FREE(peerid); + /* req->rpc_status set to -1 means, STACK_DESTROY will be called from + * the caller function. + */ + if (req->rpc_status != -1) + STACK_DESTROY(frame->root); + synctask_barrier_wake(args); + + return 0; +} + +int32_t +gd_syncop_commit_op_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) +{ + return glusterd_big_locked_cbk(req, iov, count, myframe, + _gd_syncop_commit_op_cbk); +} + +int +gd_syncop_mgmt_commit_op(glusterd_peerinfo_t *peerinfo, struct syncargs *args, + uuid_t my_uuid, uuid_t recv_uuid, int op, + dict_t *dict_out, dict_t *op_ctx) +{ + gd1_mgmt_commit_op_req *req = NULL; + int ret = -1; + uuid_t *peerid = NULL; + + req = GF_CALLOC(1, sizeof(*req), gf_gld_mt_mop_commit_req_t); + if (!req) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); + goto out; + } + + gf_uuid_copy(req->uuid, my_uuid); + req->op = op; + + ret = dict_allocate_and_serialize(dict_out, &req->buf.buf_val, + &req->buf.buf_len); + if (ret) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, + GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL); + goto out; + } + + GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret); + if (ret) + goto out; + + ret = gd_syncop_submit_request(peerinfo->rpc, req, args, peerid, + &gd_mgmt_prog, GLUSTERD_MGMT_COMMIT_OP, + gd_syncop_commit_op_cbk, + (xdrproc_t)xdr_gd1_mgmt_commit_op_req); +out: + gd_commit_op_req_free(req); + return ret; +} + +int +gd_lock_op_phase(glusterd_conf_t *conf, glusterd_op_t op, dict_t *op_ctx, + char **op_errstr, uuid_t txn_id, + glusterd_op_info_t *txn_opinfo, gf_boolean_t cluster_lock) +{ + int ret = -1; + int peer_cnt = 0; + uuid_t peer_uuid = {0}; + xlator_t *this = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + struct syncargs args = {0}; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + + ret = synctask_barrier_init((&args)); + if (ret) + goto out; + + peer_cnt = 0; + + RCU_READ_LOCK; + cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list) + { + /* Only send requests to peers who were available before the + * transaction started + */ + if (peerinfo->generation > txn_opinfo->txn_generation) + continue; + + if (!peerinfo->connected) + continue; + if (op != GD_OP_SYNC_VOLUME && + peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) + continue; + + if (cluster_lock) { + /* Reset lock status */ + peerinfo->locked = _gf_false; + gd_syncop_mgmt_lock(peerinfo, &args, MY_UUID, peer_uuid); + } else + gd_syncop_mgmt_v3_lock(op, op_ctx, peerinfo, &args, MY_UUID, + peer_uuid, txn_id); + peer_cnt++; + } + RCU_READ_UNLOCK; + + if (0 == peer_cnt) { + ret = 0; + goto out; + } + + gd_synctask_barrier_wait((&args), peer_cnt); + + if (args.op_ret) { + if (args.errstr) + *op_errstr = gf_strdup(args.errstr); + else { + ret = gf_asprintf(op_errstr, + "Another transaction " + "could be in progress. Please try " + "again after some time."); + if (ret == -1) + *op_errstr = NULL; + + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PEER_LOCK_FAIL, + "Failed to acquire lock"); + } + } + + ret = args.op_ret; + + gf_msg_debug(this->name, 0, + "Sent lock op req for 'Volume %s' " + "to %d peers. Returning %d", + gd_op_list[op], peer_cnt, ret); +out: + return ret; +} + +int +gd_stage_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, + char **op_errstr, glusterd_op_info_t *txn_opinfo) +{ + int ret = -1; + int peer_cnt = 0; + dict_t *rsp_dict = NULL; + char *hostname = NULL; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + uuid_t tmp_uuid = {0}; + char *errstr = NULL; + struct syncargs args = {0}; + dict_t *aggr_dict = NULL; + + this = THIS; + GF_ASSERT(this); + conf = this->private; + GF_ASSERT(conf); + + rsp_dict = dict_new(); + if (!rsp_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); + goto out; + } + + if ((op == GD_OP_CREATE_VOLUME) || (op == GD_OP_ADD_BRICK) || + (op == GD_OP_START_VOLUME)) + aggr_dict = req_dict; + else + aggr_dict = op_ctx; + + ret = glusterd_validate_quorum(this, op, req_dict, op_errstr); + if (ret) { + gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_SERVER_QUORUM_NOT_MET, + "Server quorum not met. Rejecting operation."); + goto out; + } + + ret = glusterd_op_stage_validate(op, req_dict, op_errstr, rsp_dict); + if (ret) { + hostname = "localhost"; + goto stage_done; + } + + if ((op == GD_OP_REPLACE_BRICK || op == GD_OP_QUOTA || + op == GD_OP_CREATE_VOLUME || op == GD_OP_ADD_BRICK || + op == GD_OP_START_VOLUME)) { + ret = glusterd_syncop_aggr_rsp_dict(op, aggr_dict, rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RESP_AGGR_FAIL, "%s", + "Failed to aggregate response from node/brick"); + goto out; + } + } + dict_unref(rsp_dict); + rsp_dict = NULL; + +stage_done: + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VALIDATE_FAILED, + LOGSTR_STAGE_FAIL, gd_op_list[op], hostname, + (*op_errstr) ? ":" : " ", (*op_errstr) ? *op_errstr : " "); + if (*op_errstr == NULL) + gf_asprintf(op_errstr, OPERRSTR_STAGE_FAIL, hostname); + goto out; + } + + gd_syncargs_init(&args, aggr_dict); + ret = synctask_barrier_init((&args)); + if (ret) + goto out; + + peer_cnt = 0; + + RCU_READ_LOCK; + cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list) + { + /* Only send requests to peers who were available before the + * transaction started + */ + if (peerinfo->generation > txn_opinfo->txn_generation) + continue; + + if (!peerinfo->connected) + continue; + if (op != GD_OP_SYNC_VOLUME && + peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) + continue; + + (void)gd_syncop_mgmt_stage_op(peerinfo, &args, MY_UUID, tmp_uuid, op, + req_dict, op_ctx); + peer_cnt++; + } + RCU_READ_UNLOCK; + + if (0 == peer_cnt) { + ret = 0; + goto out; + } + + gf_msg_debug(this->name, 0, + "Sent stage op req for 'Volume %s' " + "to %d peers", + gd_op_list[op], peer_cnt); + + gd_synctask_barrier_wait((&args), peer_cnt); + + if (args.errstr) + *op_errstr = gf_strdup(args.errstr); + else if (dict_get_str(aggr_dict, "errstr", &errstr) == 0) + *op_errstr = gf_strdup(errstr); + + ret = args.op_ret; + +out: + if ((ret == 0) && (op == GD_OP_QUOTA)) { + ret = glusterd_validate_and_set_gfid(op_ctx, req_dict, op_errstr); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GFID_VALIDATE_SET_FAIL, + "Failed to validate and set gfid"); + } + + if (rsp_dict) + dict_unref(rsp_dict); + return ret; +} + +int +gd_commit_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, + char **op_errstr, glusterd_op_info_t *txn_opinfo) +{ + dict_t *rsp_dict = NULL; + int peer_cnt = -1; + int ret = -1; + char *hostname = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + uuid_t tmp_uuid = {0}; + char *errstr = NULL; + struct syncargs args = {0}; + int type = GF_QUOTA_OPTION_TYPE_NONE; + uint32_t cmd = 0; + gf_boolean_t origin_glusterd = _gf_false; + + this = THIS; + GF_ASSERT(this); + conf = this->private; + GF_ASSERT(conf); + + rsp_dict = dict_new(); + if (!rsp_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); + ret = -1; + goto out; + } + + ret = glusterd_op_commit_perform(op, req_dict, op_errstr, rsp_dict); + if (ret) { + hostname = "localhost"; + goto commit_done; + } + + if (op == GD_OP_QUOTA) { + ret = dict_get_int32(op_ctx, "type", &type); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get " + "opcode"); + goto out; + } + } + + if (((op == GD_OP_QUOTA) && + ((type == GF_QUOTA_OPTION_TYPE_LIST) || + (type == GF_QUOTA_OPTION_TYPE_LIST_OBJECTS))) || + ((op != GD_OP_SYNC_VOLUME) && (op != GD_OP_QUOTA))) { + ret = glusterd_syncop_aggr_rsp_dict(op, op_ctx, rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RESP_AGGR_FAIL, "%s", + "Failed to aggregate " + "response from node/brick"); + goto out; + } + } + + dict_unref(rsp_dict); + rsp_dict = NULL; + +commit_done: + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMIT_OP_FAIL, + LOGSTR_COMMIT_FAIL, gd_op_list[op], hostname, + (*op_errstr) ? ":" : " ", (*op_errstr) ? *op_errstr : " "); + if (*op_errstr == NULL) + gf_asprintf(op_errstr, OPERRSTR_COMMIT_FAIL, hostname); + goto out; + } + + gd_syncargs_init(&args, op_ctx); + ret = synctask_barrier_init((&args)); + if (ret) + goto out; + + peer_cnt = 0; + origin_glusterd = is_origin_glusterd(req_dict); + + if (op == GD_OP_STATUS_VOLUME) { + ret = dict_get_uint32(req_dict, "cmd", &cmd); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=cmd", NULL); + goto out; + } + + if (origin_glusterd) { + if ((cmd & GF_CLI_STATUS_ALL)) { + ret = 0; + goto out; + } + } + } + + RCU_READ_LOCK; + cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list) + { + /* Only send requests to peers who were available before the + * transaction started + */ + if (peerinfo->generation > txn_opinfo->txn_generation) + continue; + + if (!peerinfo->connected) + continue; + if (op != GD_OP_SYNC_VOLUME && + peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) + continue; + + (void)gd_syncop_mgmt_commit_op(peerinfo, &args, MY_UUID, tmp_uuid, op, + req_dict, op_ctx); + peer_cnt++; + } + RCU_READ_UNLOCK; + + if (0 == peer_cnt) { + ret = 0; + goto out; + } + + gd_synctask_barrier_wait((&args), peer_cnt); + ret = args.op_ret; + if (args.errstr) + *op_errstr = gf_strdup(args.errstr); + else if (dict_get_str(op_ctx, "errstr", &errstr) == 0) + *op_errstr = gf_strdup(errstr); + + gf_msg_debug(this->name, 0, + "Sent commit op req for 'Volume %s' " + "to %d peers", + gd_op_list[op], peer_cnt); +out: + if (!ret) + glusterd_op_modify_op_ctx(op, op_ctx); + + if (rsp_dict) + dict_unref(rsp_dict); + + GF_FREE(args.errstr); + args.errstr = NULL; + + return ret; +} + +int +gd_unlock_op_phase(glusterd_conf_t *conf, glusterd_op_t op, int *op_ret, + rpcsvc_request_t *req, dict_t *op_ctx, char *op_errstr, + char *volname, gf_boolean_t is_acquired, uuid_t txn_id, + glusterd_op_info_t *txn_opinfo, gf_boolean_t cluster_lock) +{ + glusterd_peerinfo_t *peerinfo = NULL; + uuid_t tmp_uuid = {0}; + int peer_cnt = 0; + int ret = -1; + xlator_t *this = NULL; + struct syncargs args = {0}; + int32_t global = 0; + char *type = NULL; + + this = THIS; + GF_ASSERT(this); + + /* If the lock has not been held during this + * transaction, do not send unlock requests */ + if (!is_acquired) { + ret = 0; + goto out; + } + + ret = synctask_barrier_init((&args)); + if (ret) + goto out; + + peer_cnt = 0; + + if (cluster_lock) { + RCU_READ_LOCK; + cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list) + { + /* Only send requests to peers who were available before + * the transaction started + */ + if (peerinfo->generation > txn_opinfo->txn_generation) + continue; + + if (!peerinfo->connected) + continue; + if (op != GD_OP_SYNC_VOLUME && + peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) + continue; + + /* Only unlock peers that were locked */ + if (peerinfo->locked) { + gd_syncop_mgmt_unlock(peerinfo, &args, MY_UUID, tmp_uuid); + peer_cnt++; + } + } + RCU_READ_UNLOCK; + } else { + ret = dict_get_int32(op_ctx, "hold_global_locks", &global); + if (!ret && global) + type = "global"; + else + type = "vol"; + if (volname || global) { + RCU_READ_LOCK; + cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list) + { + /* Only send requests to peers who were + * available before the transaction started + */ + if (peerinfo->generation > txn_opinfo->txn_generation) + continue; + + if (!peerinfo->connected) + continue; + if (op != GD_OP_SYNC_VOLUME && + peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) + continue; + + gd_syncop_mgmt_v3_unlock(op_ctx, peerinfo, &args, MY_UUID, + tmp_uuid, txn_id); + peer_cnt++; + } + RCU_READ_UNLOCK; + } + } + + if (0 == peer_cnt) { + ret = 0; + goto out; + } + + gd_synctask_barrier_wait((&args), peer_cnt); + + ret = args.op_ret; + + gf_msg_debug(this->name, 0, + "Sent unlock op req for 'Volume %s' " + "to %d peers. Returning %d", + gd_op_list[op], peer_cnt, ret); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PEER_UNLOCK_FAIL, + "Failed to unlock " + "on some peer(s)"); + } + +out: + /* If unlock failed, and op_ret was previously set + * priority is given to the op_ret. If op_ret was + * not set, and unlock failed, then set op_ret */ + if (!*op_ret) + *op_ret = ret; + + if (is_acquired) { + /* Based on the op-version, + * we release the cluster or mgmt_v3 lock + * and clear the op */ + + glusterd_op_clear_op(op); + if (cluster_lock) + glusterd_unlock(MY_UUID); + else { + if (type) { + ret = glusterd_mgmt_v3_unlock(volname, MY_UUID, type); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_MGMTV3_UNLOCK_FAIL, + "Unable to release lock for %s", volname); + } + } + } + + if (!*op_ret) + *op_ret = ret; + + /* + * If there are any quorum events while the OP is in progress, process + * them. + */ + if (conf->pending_quorum_action) + glusterd_do_quorum_action(); + + return 0; +} + +int +gd_get_brick_count(struct cds_list_head *bricks) +{ + glusterd_pending_node_t *pending_node = NULL; + int npeers = 0; + cds_list_for_each_entry(pending_node, bricks, list) { npeers++; } + return npeers; +} + +int +gd_brick_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, + char **op_errstr) +{ + glusterd_pending_node_t *pending_node = NULL; + glusterd_pending_node_t *tmp = NULL; + struct cds_list_head selected = { + 0, + }; + xlator_t *this = NULL; + int brick_count = 0; + int ret = -1; + rpc_clnt_t *rpc = NULL; + dict_t *rsp_dict = NULL; + int32_t cmd = GF_OP_CMD_NONE; + glusterd_volinfo_t *volinfo = NULL; + + this = THIS; + rsp_dict = dict_new(); + if (!rsp_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); + ret = -1; + goto out; + } + + CDS_INIT_LIST_HEAD(&selected); + ret = glusterd_op_bricks_select(op, req_dict, op_errstr, &selected, + rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_OP_FAIL, "%s", + (*op_errstr) ? *op_errstr + : "Brick op failed. Check " + "glusterd log file for more details."); + goto out; + } + + if (op == GD_OP_HEAL_VOLUME) { + ret = glusterd_syncop_aggr_rsp_dict(op, op_ctx, rsp_dict); + if (ret) + goto out; + } + dict_unref(rsp_dict); + rsp_dict = NULL; + + brick_count = 0; + cds_list_for_each_entry_safe(pending_node, tmp, &selected, list) + { + rpc = glusterd_pending_node_get_rpc(pending_node); + /* In the case of rebalance if the rpc object is null, we try to + * create the rpc object. if the rebalance daemon is down, it returns + * -1. otherwise, rpc object will be created and referenced. + */ + if (!rpc) { + if (pending_node->type == GD_NODE_REBALANCE && pending_node->node) { + volinfo = pending_node->node; + ret = glusterd_rebalance_rpc_create(volinfo); + if (ret) { + ret = 0; + glusterd_defrag_volume_node_rsp(req_dict, NULL, op_ctx); + goto out; + } else { + rpc = glusterd_defrag_rpc_get(volinfo->rebal.defrag); + } + } else { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RPC_FAILURE, + "Brick Op failed " + "due to rpc failure."); + goto out; + } + } + + ret = gd_syncop_mgmt_brick_op(rpc, pending_node, op, req_dict, op_ctx, + op_errstr); + if (op == GD_OP_STATUS_VOLUME) { + /* for client-list its enough to quit the loop + * once we get the value from one brick + * */ + ret = dict_get_int32(req_dict, "cmd", &cmd); + if (!ret && (cmd & GF_CLI_STATUS_CLIENT_LIST)) { + if (dict_get(op_ctx, "client-count")) + break; + } + } + if (ret) + goto out; + + brick_count++; + glusterd_pending_node_put_rpc(pending_node); + GF_FREE(pending_node); + } + + pending_node = NULL; + ret = 0; +out: + if (pending_node && pending_node->node) + glusterd_pending_node_put_rpc(pending_node); + + if (rsp_dict) + dict_unref(rsp_dict); + gf_msg_debug(this->name, 0, "Sent op req to %d bricks", brick_count); + return ret; +} + +void +gd_sync_task_begin(dict_t *op_ctx, rpcsvc_request_t *req) +{ + int ret = -1; + int op_ret = -1; + dict_t *req_dict = NULL; + glusterd_conf_t *conf = NULL; + glusterd_op_t op = GD_OP_NONE; + int32_t tmp_op = 0; + char *op_errstr = NULL; + char *tmp = NULL; + char *global = NULL; + char *volname = NULL; + xlator_t *this = NULL; + gf_boolean_t is_acquired = _gf_false; + gf_boolean_t is_global = _gf_false; + uuid_t *txn_id = NULL; + glusterd_op_info_t txn_opinfo = { + {0}, + }; + uint32_t op_errno = 0; + gf_boolean_t cluster_lock = _gf_false; + uint32_t timeout = 0; + + this = THIS; + GF_ASSERT(this); + conf = this->private; + GF_ASSERT(conf); + + ret = dict_get_int32(op_ctx, GD_SYNC_OPCODE_KEY, &tmp_op); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get volume " + "operation"); + goto out; + } + op = tmp_op; + + /* Generate a transaction-id for this operation and + * save it in the dict */ + ret = glusterd_generate_txn_id(op_ctx, &txn_id); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_IDGEN_FAIL, + "Failed to generate transaction id"); + goto out; + } + + /* Save opinfo for this transaction with the transaction id */ + glusterd_txn_opinfo_init(&txn_opinfo, NULL, &op, NULL, NULL); + ret = glusterd_set_txn_opinfo(txn_id, &txn_opinfo); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL, + "Unable to set transaction's opinfo"); + + gf_msg_debug(this->name, 0, "Transaction ID : %s", uuid_utoa(*txn_id)); + + /* Save the MY_UUID as the originator_uuid */ + ret = glusterd_set_originator_uuid(op_ctx); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UUID_SET_FAIL, + "Failed to set originator_uuid."); + goto out; + } + + if (conf->op_version < GD_OP_VERSION_3_6_0) + cluster_lock = _gf_true; + + /* Based on the op_version, acquire a cluster or mgmt_v3 lock */ + if (cluster_lock) { + ret = glusterd_lock(MY_UUID); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLUSTERD_LOCK_FAIL, + "Unable to acquire lock"); + gf_asprintf(&op_errstr, + "Another transaction is in progress. " + "Please try again after some time."); + goto out; + } + } else { + /* Cli will add timeout key to dict if the default timeout is + * other than 2 minutes. Here we use this value to check whether + * mgmt_v3_lock_timeout should be set to default value or we + * need to change the value according to timeout value + * i.e, timeout + 120 seconds. */ + ret = dict_get_uint32(op_ctx, "timeout", &timeout); + if (!ret) + conf->mgmt_v3_lock_timeout = timeout + 120; + + ret = dict_get_str(op_ctx, "globalname", &global); + if (!ret) { + is_global = _gf_true; + goto global; + } + + /* If no volname is given as a part of the command, locks will + * not be held */ + ret = dict_get_str(op_ctx, "volname", &tmp); + if (ret) { + gf_msg_debug("glusterd", 0, + "Failed to get volume " + "name"); + goto local_locking_done; + } else { + /* Use a copy of volname, as cli response will be + * sent before the unlock, and the volname in the + * dict, might be removed */ + volname = gf_strdup(tmp); + if (!volname) + goto out; + } + + ret = glusterd_mgmt_v3_lock(volname, MY_UUID, &op_errno, "vol"); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_LOCK_GET_FAIL, + "Unable to acquire lock for %s", volname); + gf_asprintf(&op_errstr, + "Another transaction is in progress " + "for %s. Please try again after some time.", + volname); + goto out; + } + } + +global: + if (is_global) { + ret = glusterd_mgmt_v3_lock(global, MY_UUID, &op_errno, "global"); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_LOCK_GET_FAIL, + "Unable to acquire lock for %s", global); + gf_asprintf(&op_errstr, + "Another transaction is in progress " + "for %s. Please try again after some time.", + global); + is_global = _gf_false; + goto out; + } + } + + is_acquired = _gf_true; + +local_locking_done: + + /* If no volname is given as a part of the command, locks will + * not be held */ + if (volname || cluster_lock || is_global) { + ret = gd_lock_op_phase(conf, op, op_ctx, &op_errstr, *txn_id, + &txn_opinfo, cluster_lock); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PEER_LOCK_FAIL, + "Locking Peers Failed."); + goto out; + } + } + + ret = glusterd_op_build_payload(&req_dict, &op_errstr, op_ctx); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_OP_PAYLOAD_BUILD_FAIL, + LOGSTR_BUILD_PAYLOAD, gd_op_list[op]); + if (op_errstr == NULL) + gf_asprintf(&op_errstr, OPERRSTR_BUILD_PAYLOAD); + goto out; + } + + ret = gd_stage_op_phase(op, op_ctx, req_dict, &op_errstr, &txn_opinfo); + if (ret) + goto out; + + ret = gd_brick_op_phase(op, op_ctx, req_dict, &op_errstr); + if (ret) + goto out; + + ret = gd_commit_op_phase(op, op_ctx, req_dict, &op_errstr, &txn_opinfo); + if (ret) + goto out; + + ret = 0; +out: + op_ret = ret; + if (txn_id) { + if (global) + (void)gd_unlock_op_phase(conf, op, &op_ret, req, op_ctx, op_errstr, + global, is_acquired, *txn_id, &txn_opinfo, + cluster_lock); + else + (void)gd_unlock_op_phase(conf, op, &op_ret, req, op_ctx, op_errstr, + volname, is_acquired, *txn_id, &txn_opinfo, + cluster_lock); + + /* Clearing the transaction opinfo */ + ret = glusterd_clear_txn_opinfo(txn_id); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_CLEAR_FAIL, + "Unable to clear transaction's " + "opinfo for transaction ID : %s", + uuid_utoa(*txn_id)); + } + + if (op_ret && (op_errno == 0)) + op_errno = EG_INTRNL; + + glusterd_op_send_cli_response(op, op_ret, op_errno, req, op_ctx, op_errstr); + + if (volname) + GF_FREE(volname); + + if (req_dict) + dict_unref(req_dict); + + if (op_errstr) { + GF_FREE(op_errstr); + op_errstr = NULL; + } + + return; +} + +int32_t +glusterd_op_begin_synctask(rpcsvc_request_t *req, glusterd_op_t op, void *dict) +{ + int ret = 0; + + ret = dict_set_int32(dict, GD_SYNC_OPCODE_KEY, op); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "dict set failed for setting operations"); + goto out; + } + + gd_sync_task_begin(dict, req); + ret = 0; +out: + + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.h b/xlators/mgmt/glusterd/src/glusterd-syncop.h new file mode 100644 index 00000000000..a265f2135c6 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-syncop.h @@ -0,0 +1,93 @@ +/* + Copyright (c) 2012-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef __RPC_SYNCOP_H +#define __RPC_SYNCOP_H + +#include <glusterfs/syncop.h> +#include "glusterd-sm.h" +#include "glusterd.h" + +#define GD_SYNC_OPCODE_KEY "sync-mgmt-operation" + +/* gd_syncop_* */ +#define GD_SYNCOP(rpc, stb, cookie, cbk, req, prog, procnum, xdrproc) \ + do { \ + int ret = 0; \ + struct synctask *task = NULL; \ + glusterd_conf_t *conf = THIS->private; \ + \ + task = synctask_get(); \ + stb->task = task; \ + \ + /*This is to ensure that the brick_op_cbk is able to \ + * take the big lock*/ \ + synclock_unlock(&conf->big_lock); \ + ret = gd_syncop_submit_request(rpc, req, stb, cookie, prog, procnum, \ + cbk, (xdrproc_t)xdrproc); \ + if (!ret) \ + synctask_yield(stb->task, NULL); \ + else \ + gf_asprintf(&stb->errstr, \ + "%s failed. Check log file" \ + " for more details", \ + (prog)->progname); \ + synclock_lock(&conf->big_lock); \ + } while (0) + +#define GD_ALLOC_COPY_UUID(dst_ptr, uuid, ret) \ + do { \ + dst_ptr = GF_MALLOC(sizeof(*dst_ptr), gf_common_mt_uuid_t); \ + if (dst_ptr) { \ + gf_uuid_copy(*dst_ptr, uuid); \ + ret = 0; \ + } else { \ + ret = -1; \ + } \ + } while (0) + +int32_t +gd_syncop_brick_op_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe); + +int +gd_syncop_submit_request(struct rpc_clnt *rpc, void *req, void *local, + void *cookie, rpc_clnt_prog_t *prog, int procnum, + fop_cbk_fn_t cbkfn, xdrproc_t xdrproc); +int +gd_syncop_mgmt_lock(glusterd_peerinfo_t *peerinfo, struct syncargs *arg, + uuid_t my_uuid, uuid_t recv_uuid); + +int +gd_syncop_mgmt_unlock(glusterd_peerinfo_t *peerinfo, struct syncargs *arg, + uuid_t my_uuid, uuid_t recv_uuid); + +int +gd_syncop_mgmt_stage_op(glusterd_peerinfo_t *peerinfo, struct syncargs *arg, + uuid_t my_uuid, uuid_t recv_uuid, int op, + dict_t *dict_out, dict_t *op_ctx); + +int +gd_syncop_mgmt_commit_op(glusterd_peerinfo_t *peerinfo, struct syncargs *arg, + uuid_t my_uuid, uuid_t recv_uuid, int op, + dict_t *dict_out, dict_t *op_ctx); + +void +gd_synctask_barrier_wait(struct syncargs *args, int count); + +int +gd_brick_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, + char **op_errstr); + +int +glusterd_syncop_aggr_rsp_dict(glusterd_op_t op, dict_t *aggr, dict_t *rsp); + +void +gd_syncargs_init(struct syncargs *args, dict_t *op_ctx); +#endif /* __RPC_SYNCOP_H */ diff --git a/xlators/mgmt/glusterd/src/glusterd-tierd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-tierd-svc-helper.c new file mode 100644 index 00000000000..035795b3deb --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-tierd-svc-helper.c @@ -0,0 +1,207 @@ +/* + Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include "glusterd.h" +#include "glusterd-utils.h" +#include "glusterd-tierd-svc-helper.h" +#include "glusterd-messages.h" +#include <glusterfs/syscall.h> +#include "glusterd-volgen.h" + +void +glusterd_svc_build_tierd_rundir(glusterd_volinfo_t *volinfo, char *path, + int path_len) +{ + char workdir[PATH_MAX] = { + 0, + }; + glusterd_conf_t *priv = THIS->private; + + GLUSTERD_GET_TIER_DIR(workdir, volinfo, priv); + + snprintf(path, path_len, "%s/run", workdir); +} + +void +glusterd_svc_build_tierd_socket_filepath(glusterd_volinfo_t *volinfo, + char *path, int path_len) +{ + char sockfilepath[PATH_MAX] = { + 0, + }; + char rundir[PATH_MAX] = { + 0, + }; + int32_t len = 0; + + glusterd_svc_build_tierd_rundir(volinfo, rundir, sizeof(rundir)); + len = snprintf(sockfilepath, sizeof(sockfilepath), "%s/run-%s", rundir, + uuid_utoa(MY_UUID)); + if ((len < 0) || (len >= sizeof(sockfilepath))) { + sockfilepath[0] = 0; + } + + glusterd_set_socket_filepath(sockfilepath, path, path_len); +} + +void +glusterd_svc_build_tierd_pidfile(glusterd_volinfo_t *volinfo, char *path, + int path_len) +{ + char rundir[PATH_MAX] = { + 0, + }; + + glusterd_svc_build_tierd_rundir(volinfo, rundir, sizeof(rundir)); + + snprintf(path, path_len, "%s/%s-tierd.pid", rundir, volinfo->volname); +} + +void +glusterd_svc_build_tierd_volfile_path(glusterd_volinfo_t *volinfo, char *path, + int path_len) +{ + char workdir[PATH_MAX] = { + 0, + }; + glusterd_conf_t *priv = THIS->private; + + GLUSTERD_GET_VOLUME_DIR(workdir, volinfo, priv); + + snprintf(path, path_len, "%s/%s-tierd.vol", workdir, volinfo->volname); +} + +void +glusterd_svc_build_tierd_logdir(char *logdir, char *volname, size_t len) +{ + glusterd_conf_t *conf = THIS->private; + snprintf(logdir, len, "%s/tier/%s", priv->logdir, volname); +} + +void +glusterd_svc_build_tierd_logfile(char *logfile, char *logdir, size_t len) +{ + snprintf(logfile, len, "%s/tierd.log", logdir); +} + +int +glusterd_svc_check_tier_volfile_identical(char *svc_name, + glusterd_volinfo_t *volinfo, + gf_boolean_t *identical) +{ + char orgvol[PATH_MAX] = { + 0, + }; + char *tmpvol = NULL; + xlator_t *this = NULL; + int ret = -1; + int need_unlink = 0; + int tmp_fd = -1; + + this = THIS; + + GF_VALIDATE_OR_GOTO(THIS->name, this, out); + GF_VALIDATE_OR_GOTO(this->name, identical, out); + + glusterd_svc_build_tierd_volfile_path(volinfo, orgvol, sizeof(orgvol)); + + ret = gf_asprintf(&tmpvol, "/tmp/g%s-XXXXXX", svc_name); + if (ret < 0) { + goto out; + } + + /* coverity[SECURE_TEMP] mkstemp uses 0600 as the mode and is safe */ + tmp_fd = mkstemp(tmpvol); + if (tmp_fd < 0) { + gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED, + "Unable to create temp file" + " %s:(%s)", + tmpvol, strerror(errno)); + ret = -1; + goto out; + } + + need_unlink = 1; + ret = build_rebalance_volfile(volinfo, tmpvol, NULL); + if (ret) + goto out; + + ret = glusterd_check_files_identical(orgvol, tmpvol, identical); + if (ret) + goto out; + +out: + if (need_unlink) + sys_unlink(tmpvol); + + if (tmpvol != NULL) + GF_FREE(tmpvol); + + if (tmp_fd >= 0) + sys_close(tmp_fd); + + return ret; +} + +int +glusterd_svc_check_tier_topology_identical(char *svc_name, + glusterd_volinfo_t *volinfo, + gf_boolean_t *identical) +{ + char orgvol[PATH_MAX] = { + 0, + }; + char *tmpvol = NULL; + glusterd_conf_t *conf = NULL; + xlator_t *this = THIS; + int ret = -1; + int tmpclean = 0; + int tmpfd = -1; + + if ((!identical) || (!this) || (!this->private)) + goto out; + + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, out); + + glusterd_svc_build_tierd_volfile_path(volinfo, orgvol, sizeof(orgvol)); + + ret = gf_asprintf(&tmpvol, "/tmp/g%s-XXXXXX", svc_name); + if (ret < 0) { + goto out; + } + + /* coverity[SECURE_TEMP] mkstemp uses 0600 as the mode and is safe */ + tmpfd = mkstemp(tmpvol); + if (tmpfd < 0) { + gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED, + "Unable to create temp file" + " %s:(%s)", + tmpvol, strerror(errno)); + ret = -1; + goto out; + } + + tmpclean = 1; /* SET the flag to unlink() tmpfile */ + ret = build_rebalance_volfile(volinfo, tmpvol, NULL); + if (ret) + goto out; + + /* Compare the topology of volfiles */ + ret = glusterd_check_topology_identical(orgvol, tmpvol, identical); +out: + if (tmpfd >= 0) + sys_close(tmpfd); + if (tmpclean) + sys_unlink(tmpvol); + if (tmpvol != NULL) + GF_FREE(tmpvol); + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index e8d107db7b8..90ef2cf4c9c 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -1,3129 +1,15046 @@ /* - Copyright (c) 2006-2010 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ +#include <inttypes.h> -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" +#if defined(GF_LINUX_HOST_OS) +#include <mntent.h> +#else +#include "mntent_compat.h" +#endif +#include <dlfcn.h> +#if (HAVE_LIB_XML) +#include <libxml/encoding.h> +#include <libxml/xmlwriter.h> #endif -#include <inttypes.h> -#include "globals.h" -#include "glusterfs.h" -#include "compat.h" -#include "dict.h" -#include "xlator.h" -#include "logging.h" -#include "timer.h" -#include "defaults.h" -#include "compat.h" -#include "md5.h" -#include "compat-errno.h" -#include "statedump.h" +#include <glusterfs/glusterfs.h> +#include <glusterfs/compat.h> +#include <glusterfs/dict.h> +#include <glusterfs/xlator.h> +#include <glusterfs/logging.h> +#include "glusterd-messages.h" +#include <glusterfs/timer.h> +#include <glusterfs/defaults.h> +#include <glusterfs/compat.h> +#include <glusterfs/syncop.h> +#include <glusterfs/run.h> +#include <glusterfs/compat-errno.h> +#include <glusterfs/statedump.h> +#include <glusterfs/syscall.h> #include "glusterd-mem-types.h" #include "glusterd.h" #include "glusterd-op-sm.h" +#include "glusterd-geo-rep.h" #include "glusterd-sm.h" #include "glusterd-utils.h" #include "glusterd-store.h" #include "glusterd-volgen.h" #include "glusterd-pmap.h" - +#include <glusterfs/glusterfs-acl.h> +#include "glusterd-syncop.h" +#include "glusterd-mgmt.h" +#include "glusterd-locks.h" +#include "glusterd-messages.h" +#include "glusterd-volgen.h" +#include "glusterd-snapshot-utils.h" +#include "glusterd-svc-mgmt.h" +#include "glusterd-svc-helper.h" +#include "glusterd-shd-svc.h" +#include "glusterd-quotad-svc.h" +#include "glusterd-snapd-svc.h" +#include "glusterd-bitd-svc.h" +#include "glusterd-gfproxyd-svc.h" +#include "glusterd-server-quorum.h" +#include <glusterfs/quota-common-utils.h> +#include <glusterfs/common-utils.h> +#include "glusterd-shd-svc-helper.h" + +#include "xdr-generic.h" #include <sys/resource.h> #include <inttypes.h> #include <signal.h> #include <sys/types.h> -#include <net/if.h> #include <sys/ioctl.h> #include <sys/socket.h> #include <rpc/pmap_clnt.h> +#include <unistd.h> +#include <fnmatch.h> +#include <sys/statvfs.h> +#include <ifaddrs.h> #ifdef GF_SOLARIS_HOST_OS #include <sys/sockio.h> #endif -#define MOUNT_PROGRAM 100005 +#ifdef __FreeBSD__ +#include <sys/sysctl.h> +#include <sys/param.h> +#include <sys/queue.h> +#include <libprocstat.h> +#include <libutil.h> +#endif + #define NFS_PROGRAM 100003 #define NFSV3_VERSION 3 + +#define MOUNT_PROGRAM 100005 #define MOUNTV3_VERSION 3 #define MOUNTV1_VERSION 1 -char *glusterd_sock_dir = "/tmp"; -static glusterd_lock_t lock; +#define NLM_PROGRAM 100021 +#define NLMV4_VERSION 4 +#define NLMV1_VERSION 1 + +#ifdef BUILD_GNFS +#define GLUSTERD_GET_NFS_PIDFILE(pidfile, priv) \ + do { \ + int32_t _nfs_pid_len; \ + _nfs_pid_len = snprintf(pidfile, PATH_MAX, "%s/nfs/nfs.pid", \ + priv->rundir); \ + if ((_nfs_pid_len < 0) || (_nfs_pid_len >= PATH_MAX)) { \ + pidfile[0] = 0; \ + } \ + } while (0) +#endif -static int32_t -glusterd_get_lock_owner (uuid_t *uuid) -{ - uuid_copy (*uuid, lock.owner) ; - return 0; -} +#define GLUSTERD_GET_QUOTAD_PIDFILE(pidfile, priv) \ + do { \ + int32_t _quotad_pid_len; \ + _quotad_pid_len = snprintf(pidfile, PATH_MAX, "%s/quotad/quotad.pid", \ + priv->rundir); \ + if ((_quotad_pid_len < 0) || (_quotad_pid_len >= PATH_MAX)) { \ + pidfile[0] = 0; \ + } \ + } while (0) -static int32_t -glusterd_set_lock_owner (uuid_t owner) +gf_boolean_t +is_brick_mx_enabled(void) { - uuid_copy (lock.owner, owner); - //TODO: set timestamp - return 0; -} + char *value = NULL; + int ret = 0; + gf_boolean_t enabled = _gf_false; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; -static int32_t -glusterd_unset_lock_owner (uuid_t owner) -{ - uuid_clear (lock.owner); - //TODO: set timestamp - return 0; + this = THIS; + + priv = this->private; + + ret = dict_get_strn(priv->opts, GLUSTERD_BRICK_MULTIPLEX_KEY, + SLEN(GLUSTERD_BRICK_MULTIPLEX_KEY), &value); + + if (!ret) + ret = gf_string2boolean(value, &enabled); + + return ret ? _gf_false : enabled; } -gf_boolean_t -glusterd_is_loopback_localhost (const struct sockaddr *sa, char *hostname) +int +get_mux_limit_per_process(int *mux_limit) { - GF_ASSERT (sa); + char *value = NULL; + int ret = -1; + int max_bricks_per_proc = 0; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; - gf_boolean_t is_local = _gf_false; - const struct in_addr *addr4 = NULL; - const struct in6_addr *addr6 = NULL; - uint8_t *ap = NULL; - struct in6_addr loopbackaddr6 = IN6ADDR_LOOPBACK_INIT; + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); - switch (sa->sa_family) { - case AF_INET: - addr4 = &(((struct sockaddr_in *)sa)->sin_addr); - ap = (uint8_t*)&addr4->s_addr; - if (ap[0] == 127) - is_local = _gf_true; - break; + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, out); - case AF_INET6: - addr6 = &(((struct sockaddr_in6 *)sa)->sin6_addr); - if (memcmp (addr6, &loopbackaddr6, - sizeof (loopbackaddr6)) == 0) - is_local = _gf_true; - break; + if (!is_brick_mx_enabled()) { + max_bricks_per_proc = 1; + ret = 0; + goto out; + } + + ret = dict_get_strn(priv->opts, GLUSTERD_BRICKMUX_LIMIT_KEY, + SLEN(GLUSTERD_BRICKMUX_LIMIT_KEY), &value); + if (ret) { + value = GLUSTERD_BRICKMUX_LIMIT_DFLT_VALUE; + } + ret = gf_string2int(value, &max_bricks_per_proc); + if (ret) + goto out; - default: - if (hostname) - gf_log ("glusterd", GF_LOG_ERROR, - "unknown address family %d for %s", - sa->sa_family, hostname); - break; - } +out: + *mux_limit = max_bricks_per_proc; - return is_local; + gf_msg_debug("glusterd", 0, "Mux limit set to %d bricks per process", + *mux_limit); + + return ret; } -int32_t -glusterd_is_local_addr (char *hostname) -{ - int32_t ret = -1; - struct addrinfo *result = NULL; - struct addrinfo *res = NULL; - int32_t found = 0; - struct ifconf buf = {0,}; - int sd = -1; - struct ifreq *ifr = NULL; - struct ifreq *ifr_end = NULL; - int32_t size = 0; - char buff[1024] = {0,}; - gf_boolean_t need_free = _gf_false; - - ret = getaddrinfo (hostname, NULL, NULL, &result); +int +get_gd_vol_thread_limit(int *thread_limit) +{ + char *value = NULL; + int ret = -1; + int vol_per_thread_limit = 0; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; - if (ret != 0) { - gf_log ("", GF_LOG_ERROR, "error in getaddrinfo: %s\n", - gai_strerror(ret)); - goto out; - } + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); - for (res = result; res != NULL; res = res->ai_next) { - found = glusterd_is_loopback_localhost (res->ai_addr, hostname); - if (found) - goto out; - } + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, out); + if (!is_brick_mx_enabled()) { + vol_per_thread_limit = 1; + ret = 0; + goto out; + } + + ret = dict_get_strn(priv->opts, GLUSTERD_VOL_CNT_PER_THRD, + SLEN(GLUSTERD_VOL_CNT_PER_THRD), &value); + if (ret) { + value = GLUSTERD_VOL_CNT_PER_THRD_DEFAULT_VALUE; + } + ret = gf_string2int(value, &vol_per_thread_limit); + if (ret) + goto out; - sd = socket (AF_INET, SOCK_DGRAM, 0); - if (sd == -1) - goto out; +out: + *thread_limit = vol_per_thread_limit; - buf.ifc_len = sizeof (buff); - buf.ifc_buf = buff; - size = buf.ifc_len; + gf_msg_debug("glusterd", 0, + "Per Thread volume limit set to %d glusterd to populate dict " + "data parallel", + *thread_limit); - ret = ioctl (sd, SIOCGIFCONF, &buf); - if (ret) { - goto out; - } + return ret; +} - while (size <= buf.ifc_len) { - size += sizeof (struct ifreq); - buf.ifc_len = size; - if (need_free) - GF_FREE (buf.ifc_req); - buf.ifc_req = GF_CALLOC (1, size, gf_gld_mt_ifreq); - need_free = 1; - ret = ioctl (sd, SIOCGIFCONF, &buf); - if (ret) { - goto out; - } - } +extern struct volopt_map_entry glusterd_volopt_map[]; +extern glusterd_all_vol_opts valid_all_vol_opts[]; - ifr_end = (struct ifreq *)&buf.ifc_buf[buf.ifc_len]; +static glusterd_lock_t lock; - for (res = result; res != NULL; res = res->ai_next) { - ifr = buf.ifc_req; - while (ifr < ifr_end) { - if ((ifr->ifr_addr.sa_family == res->ai_addr->sa_family) - && (memcmp (&ifr->ifr_addr, res->ai_addr, - res->ai_addrlen) == 0)) { - found = 1; - goto out; - } - ifr++; - } +static int +_brick_for_each(glusterd_volinfo_t *volinfo, dict_t *mod_dict, void *data, + int (*fn)(glusterd_volinfo_t *, glusterd_brickinfo_t *, + dict_t *mod_dict, void *)) +{ + int ret = 0; + glusterd_brickinfo_t *brickinfo = NULL; + xlator_t *this = THIS; + + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + gf_msg_debug(this->name, 0, "Found a brick - %s:%s", + brickinfo->hostname, brickinfo->path); + ret = fn(volinfo, brickinfo, mod_dict, data); + if (ret) + goto out; + } +out: + return ret; +} + +/* This is going to be a O(n^2) operation as we have to pick a brick, + make sure it belong to this machine, and compare another brick belonging + to this machine (if exists), is sharing the backend */ +static void +gd_set_shared_brick_count(glusterd_volinfo_t *volinfo) +{ + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_brickinfo_t *trav = NULL; + + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + if (gf_uuid_compare(brickinfo->uuid, MY_UUID)) + continue; + brickinfo->fs_share_count = 0; + cds_list_for_each_entry(trav, &volinfo->bricks, brick_list) + { + if (!gf_uuid_compare(trav->uuid, MY_UUID) && + (trav->statfs_fsid == brickinfo->statfs_fsid)) { + brickinfo->fs_share_count++; + } } + } -out: - if (sd >= 0) - close (sd); + return; +} - if (result) - freeaddrinfo (result); +int +glusterd_volume_brick_for_each(glusterd_volinfo_t *volinfo, void *data, + int (*fn)(glusterd_volinfo_t *, + glusterd_brickinfo_t *, + dict_t *mod_dict, void *)) +{ + gd_set_shared_brick_count(volinfo); - if (need_free) - GF_FREE (buf.ifc_req); + return _brick_for_each(volinfo, NULL, data, fn); +} - if (found) - gf_log ("glusterd", GF_LOG_DEBUG, "%s is local", hostname); - else - gf_log ("glusterd", GF_LOG_DEBUG, "%s is not local", hostname); +int32_t +glusterd_get_lock_owner(uuid_t *uuid) +{ + gf_uuid_copy(*uuid, lock.owner); + return 0; +} - return !found; +static int32_t +glusterd_set_lock_owner(uuid_t owner) +{ + gf_uuid_copy(lock.owner, owner); + // TODO: set timestamp + return 0; +} + +static int32_t +glusterd_unset_lock_owner(uuid_t owner) +{ + gf_uuid_clear(lock.owner); + // TODO: set timestamp + return 0; +} + +gf_boolean_t +glusterd_is_fuse_available() +{ + int fd = 0; + +#ifdef __NetBSD__ + fd = open("/dev/puffs", O_RDWR); +#else + fd = open("/dev/fuse", O_RDWR); +#endif + + if (fd > -1 && !sys_close(fd)) + return _gf_true; + else + return _gf_false; } int32_t -glusterd_lock (uuid_t uuid) +glusterd_lock(uuid_t uuid) { + uuid_t owner; + char new_owner_str[50] = ""; + char owner_str[50] = ""; + int ret = -1; + xlator_t *this = NULL; - uuid_t owner; - char new_owner_str[50]; - char owner_str[50]; - int ret = -1; + this = THIS; + GF_ASSERT(this); - GF_ASSERT (uuid); + GF_ASSERT(uuid); - glusterd_get_lock_owner (&owner); + glusterd_get_lock_owner(&owner); - if (!uuid_is_null (owner)) { - gf_log ("glusterd", GF_LOG_ERROR, "Unable to get lock" - " for uuid: %s, lock held by: %s", - uuid_utoa_r (uuid, new_owner_str), - uuid_utoa_r (owner, owner_str)); - goto out; - } + if (!gf_uuid_is_null(owner)) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLUSTERD_LOCK_FAIL, + "Unable to get lock" + " for uuid: %s, lock held by: %s", + uuid_utoa_r(uuid, new_owner_str), uuid_utoa_r(owner, owner_str)); + goto out; + } - ret = glusterd_set_lock_owner (uuid); + ret = glusterd_set_lock_owner(uuid); - if (!ret) { - gf_log ("glusterd", GF_LOG_INFO, "Cluster lock held by" - " %s", uuid_utoa (uuid)); - } + if (!ret) { + gf_msg_debug(this->name, 0, + "Cluster lock held by" + " %s", + uuid_utoa(uuid)); + } out: - return ret; + return ret; } - int32_t -glusterd_unlock (uuid_t uuid) +glusterd_unlock(uuid_t uuid) { - uuid_t owner; - char new_owner_str[50]; - char owner_str[50]; - int32_t ret = -1; + uuid_t owner; + char new_owner_str[50] = ""; + char owner_str[50] = ""; + int32_t ret = -1; + xlator_t *this = NULL; - GF_ASSERT (uuid); + this = THIS; + GF_ASSERT(this); - glusterd_get_lock_owner (&owner); + GF_ASSERT(uuid); - if (NULL == owner) { - gf_log ("glusterd", GF_LOG_ERROR, "Cluster lock not held!"); - goto out; - } + glusterd_get_lock_owner(&owner); - ret = uuid_compare (uuid, owner); + if (gf_uuid_is_null(owner)) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLUSTERD_LOCK_FAIL, + "Cluster lock not held!"); + goto out; + } - if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, "Cluster lock held by %s" - " ,unlock req from %s!", uuid_utoa_r (owner ,owner_str) - , uuid_utoa_r (uuid, new_owner_str)); - goto out; - } + ret = gf_uuid_compare(uuid, owner); - ret = glusterd_unset_lock_owner (uuid); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLUSTERD_LOCK_FAIL, + "Cluster lock held by %s ," + "unlock req from %s!", + uuid_utoa_r(owner, owner_str), uuid_utoa_r(uuid, new_owner_str)); + goto out; + } - if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, "Unable to clear cluster " - "lock"); - goto out; - } + ret = glusterd_unset_lock_owner(uuid); - ret = 0; + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLUSTERD_UNLOCK_FAIL, + "Unable to clear cluster " + "lock"); + goto out; + } + + ret = 0; out: - return ret; + return ret; } - int -glusterd_get_uuid (uuid_t *uuid) +glusterd_get_uuid(uuid_t *uuid) { - glusterd_conf_t *priv = NULL; + glusterd_conf_t *priv = NULL; - priv = THIS->private; + priv = THIS->private; - GF_ASSERT (priv); + GF_ASSERT(priv); - uuid_copy (*uuid, priv->uuid); + gf_uuid_copy(*uuid, MY_UUID); - return 0; + return 0; } int -glusterd_submit_request (struct rpc_clnt *rpc, void *req, - call_frame_t *frame, rpc_clnt_prog_t *prog, - int procnum, struct iobref *iobref, - gd_serialize_t sfunc, xlator_t *this, - fop_cbk_fn_t cbkfn) +glusterd_submit_request(struct rpc_clnt *rpc, void *req, call_frame_t *frame, + rpc_clnt_prog_t *prog, int procnum, + struct iobref *iobref, xlator_t *this, + fop_cbk_fn_t cbkfn, xdrproc_t xdrproc) { - int ret = -1; - struct iobuf *iobuf = NULL; - int count = 0; - char new_iobref = 0, start_ping = 0; - struct iovec iov = {0, }; - - GF_ASSERT (rpc); - GF_ASSERT (this); - - iobuf = iobuf_get (this->ctx->iobuf_pool); + char new_iobref = 0; + int ret = -1; + int count = 0; + ssize_t req_size = 0; + struct iobuf *iobuf = NULL; + struct iovec iov = { + 0, + }; + + GF_ASSERT(rpc); + GF_ASSERT(this); + + if (req) { + req_size = xdr_sizeof(xdrproc, req); + iobuf = iobuf_get2(this->ctx->iobuf_pool, req_size); if (!iobuf) { - goto out; + goto out; }; if (!iobref) { - iobref = iobref_new (); - if (!iobref) { - goto out; - } + iobref = iobref_new(); + if (!iobref) { + gf_smsg("glusterd", GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + NULL); + goto out; + } - new_iobref = 1; + new_iobref = 1; } - iobref_add (iobref, iobuf); + iobref_add(iobref, iobuf); iov.iov_base = iobuf->ptr; - iov.iov_len = 128 * GF_UNIT_KB; + iov.iov_len = iobuf_pagesize(iobuf); /* Create the xdr payload */ - if (req && sfunc) { - ret = sfunc (iov, req); - if (ret == -1) { - goto out; - } - iov.iov_len = ret; - count = 1; - } - /* Send the msg */ - ret = rpc_clnt_submit (rpc, prog, procnum, cbkfn, - &iov, count, - NULL, 0, iobref, frame, NULL, 0, NULL, 0, NULL); - - if (ret == 0) { - pthread_mutex_lock (&rpc->conn.lock); - { - if (!rpc->conn.ping_started) { - start_ping = 1; - } - } - pthread_mutex_unlock (&rpc->conn.lock); + ret = xdr_serialize_generic(iov, req, xdrproc); + if (ret == -1) { + goto out; } - - if (start_ping) - //client_start_ping ((void *) this); - - ret = 0; + iov.iov_len = ret; + count = 1; + } + + /* Send the msg */ + rpc_clnt_submit(rpc, prog, procnum, cbkfn, &iov, count, NULL, 0, iobref, + frame, NULL, 0, NULL, 0, NULL); + + /* Unconditionally set ret to 0 here. This is to guard against a double + * STACK_DESTROY in case of a failure in rpc_clnt_submit AFTER the + * request is sent over the wire: once in the callback function of the + * request and once in the error codepath of some of the callers of + * glusterd_submit_request(). + */ + ret = 0; out: - if (new_iobref) { - iobref_unref (iobref); - } + if (new_iobref) { + iobref_unref(iobref); + } - iobuf_unref (iobuf); + iobuf_unref(iobuf); - return ret; + return ret; } struct iobuf * -glusterd_serialize_reply (rpcsvc_request_t *req, void *arg, - gd_serialize_t sfunc, struct iovec *outmsg) +glusterd_serialize_reply(rpcsvc_request_t *req, void *arg, struct iovec *outmsg, + xdrproc_t xdrproc) { - struct iobuf *iob = NULL; - ssize_t retlen = -1; + struct iobuf *iob = NULL; + ssize_t retlen = -1; + ssize_t rsp_size = 0; + + /* First, get the io buffer into which the reply in arg will + * be serialized. + */ + rsp_size = xdr_sizeof(xdrproc, arg); + iob = iobuf_get2(req->svc->ctx->iobuf_pool, rsp_size); + if (!iob) { + gf_msg("glusterd", GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "Failed to get iobuf"); + goto ret; + } + + iobuf_to_iovec(iob, outmsg); + /* Use the given serializer to translate the give C structure in arg + * to XDR format which will be written into the buffer in outmsg. + */ + /* retlen is used to received the error since size_t is unsigned and we + * need -1 for error notification during encoding. + */ + retlen = xdr_serialize_generic(*outmsg, arg, xdrproc); + if (retlen == -1) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_ENCODE_FAIL, + "Failed to encode message"); + goto ret; + } + + outmsg->iov_len = retlen; +ret: + if (retlen == -1) { + iobuf_unref(iob); + iob = NULL; + } - /* First, get the io buffer into which the reply in arg will - * be serialized. - */ - iob = iobuf_get (req->svc->ctx->iobuf_pool); - if (!iob) { - gf_log ("", GF_LOG_ERROR, "Failed to get iobuf"); - goto ret; - } + return iob; +} - iobuf_to_iovec (iob, outmsg); - /* Use the given serializer to translate the give C structure in arg - * to XDR format which will be written into the buffer in outmsg. - */ - /* retlen is used to received the error since size_t is unsigned and we - * need -1 for error notification during encoding. - */ - retlen = sfunc (*outmsg, arg); - if (retlen == -1) { - gf_log ("", GF_LOG_ERROR, "Failed to encode message"); - goto ret; +int +glusterd_submit_reply(rpcsvc_request_t *req, void *arg, struct iovec *payload, + int payloadcount, struct iobref *iobref, + xdrproc_t xdrproc) +{ + struct iobuf *iob = NULL; + int ret = -1; + struct iovec rsp = { + 0, + }; + char new_iobref = 0; + + if (!req) { + GF_ASSERT(req); + goto out; + } + + if (!iobref) { + iobref = iobref_new(); + if (!iobref) { + gf_msg("glusterd", GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "out of memory"); + goto out; } - outmsg->iov_len = retlen; -ret: - if (retlen == -1) { - iobuf_unref (iob); - iob = NULL; + new_iobref = 1; + } + + iob = glusterd_serialize_reply(req, arg, &rsp, xdrproc); + if (!iob) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_SERIALIZE_MSG_FAIL, + "Failed to serialize reply"); + } else { + iobref_add(iobref, iob); + } + + ret = rpcsvc_submit_generic(req, &rsp, 1, payload, payloadcount, iobref); + + /* Now that we've done our job of handing the message to the RPC layer + * we can safely unref the iob in the hope that RPC layer must have + * ref'ed the iob on receiving into the txlist. + */ + if (ret == -1) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_REPLY_SUBMIT_FAIL, + "Reply submission failed"); + goto out; + } + + ret = 0; +out: + + if (new_iobref) { + iobref_unref(iobref); + } + + if (iob) + iobuf_unref(iob); + return ret; +} + +glusterd_volinfo_t * +glusterd_volinfo_unref(glusterd_volinfo_t *volinfo) +{ + int refcnt = -1; + glusterd_conf_t *conf = THIS->private; + + pthread_mutex_lock(&conf->volume_lock); + { + pthread_mutex_lock(&volinfo->reflock); + { + refcnt = --volinfo->refcnt; } + pthread_mutex_unlock(&volinfo->reflock); + } + pthread_mutex_unlock(&conf->volume_lock); + if (!refcnt) { + glusterd_volinfo_delete(volinfo); + return NULL; + } + + return volinfo; +} - return iob; +glusterd_volinfo_t * +glusterd_volinfo_ref(glusterd_volinfo_t *volinfo) +{ + pthread_mutex_lock(&volinfo->reflock); + { + ++volinfo->refcnt; + } + pthread_mutex_unlock(&volinfo->reflock); + + return volinfo; } -int -glusterd_submit_reply (rpcsvc_request_t *req, void *arg, - struct iovec *payload, int payloadcount, - struct iobref *iobref, gd_serialize_t sfunc) +int32_t +glusterd_volinfo_new(glusterd_volinfo_t **volinfo) { - struct iobuf *iob = NULL; - int ret = -1; - struct iovec rsp = {0,}; - char new_iobref = 0; + glusterd_volinfo_t *new_volinfo = NULL; + int32_t ret = -1; - if (!req) { - GF_ASSERT (req); - goto out; - } + GF_ASSERT(volinfo); + new_volinfo = GF_CALLOC(1, sizeof(*new_volinfo), + gf_gld_mt_glusterd_volinfo_t); - if (!iobref) { - iobref = iobref_new (); - if (!iobref) { - gf_log ("", GF_LOG_ERROR, "out of memory"); - goto out; - } + if (!new_volinfo) + goto out; - new_iobref = 1; - } + LOCK_INIT(&new_volinfo->lock); + CDS_INIT_LIST_HEAD(&new_volinfo->vol_list); + CDS_INIT_LIST_HEAD(&new_volinfo->snapvol_list); + CDS_INIT_LIST_HEAD(&new_volinfo->bricks); + CDS_INIT_LIST_HEAD(&new_volinfo->ta_bricks); + CDS_INIT_LIST_HEAD(&new_volinfo->snap_volumes); - iob = glusterd_serialize_reply (req, arg, sfunc, &rsp); - if (!iob) { - gf_log ("", GF_LOG_ERROR, "Failed to serialize reply"); - goto out; - } + new_volinfo->dict = dict_new(); + if (!new_volinfo->dict) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); + GF_FREE(new_volinfo); - iobref_add (iobref, iob); + goto out; + } - ret = rpcsvc_submit_generic (req, &rsp, 1, payload, payloadcount, - iobref); + new_volinfo->gsync_slaves = dict_new(); + if (!new_volinfo->gsync_slaves) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); + dict_unref(new_volinfo->dict); + GF_FREE(new_volinfo); + goto out; + } - /* Now that we've done our job of handing the message to the RPC layer - * we can safely unref the iob in the hope that RPC layer must have - * ref'ed the iob on receiving into the txlist. - */ - iobuf_unref (iob); - if (ret == -1) { - gf_log ("", GF_LOG_ERROR, "Reply submission failed"); - goto out; - } + new_volinfo->gsync_active_slaves = dict_new(); + if (!new_volinfo->gsync_active_slaves) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); + dict_unref(new_volinfo->dict); + dict_unref(new_volinfo->gsync_slaves); + GF_FREE(new_volinfo); + goto out; + } - ret = 0; -out: + snprintf(new_volinfo->parent_volname, GD_VOLUME_NAME_MAX, "N/A"); - if (new_iobref) { - iobref_unref (iobref); - } + new_volinfo->snap_max_hard_limit = GLUSTERD_SNAPS_MAX_HARD_LIMIT; - return ret; -} + new_volinfo->xl = THIS; -gf_boolean_t -glusterd_check_volume_exists (char *volname) -{ - char pathname[1024] = {0,}; - struct stat stbuf = {0,}; - int32_t ret = -1; - glusterd_conf_t *priv = NULL; + glusterd_snapdsvc_build(&new_volinfo->snapd.svc); + glusterd_gfproxydsvc_build(&new_volinfo->gfproxyd.svc); + glusterd_shdsvc_build(&new_volinfo->shd.svc); - priv = THIS->private; + pthread_mutex_init(&new_volinfo->store_volinfo_lock, NULL); + pthread_mutex_init(&new_volinfo->reflock, NULL); - snprintf (pathname, 1024, "%s/vols/%s", priv->workdir, - volname); + *volinfo = glusterd_volinfo_ref(new_volinfo); - ret = stat (pathname, &stbuf); + ret = 0; - if (ret) { - gf_log ("", GF_LOG_DEBUG, "Volume %s does not exist." - "stat failed with errno : %d on path: %s", - volname, errno, pathname); - return _gf_false; - } +out: + gf_msg_debug(THIS->name, 0, "Returning %d", ret); + return ret; +} - return _gf_true; +/* This function will create a new volinfo and then + * dup the entries from volinfo to the new_volinfo. + * + * @param volinfo volinfo which will be duplicated + * @param dup_volinfo new volinfo which will be created + * @param set_userauth if this true then auth info is also set + * + * @return 0 on success else -1 + */ +int32_t +glusterd_volinfo_dup(glusterd_volinfo_t *volinfo, + glusterd_volinfo_t **dup_volinfo, + gf_boolean_t set_userauth) +{ + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_volinfo_t *new_volinfo = NULL; + + this = THIS; + GF_ASSERT(this); + GF_VALIDATE_OR_GOTO(this->name, volinfo, out); + GF_VALIDATE_OR_GOTO(this->name, dup_volinfo, out); + + ret = glusterd_volinfo_new(&new_volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_SET_FAIL, + "not able to create the " + "duplicate volinfo for the volume %s", + volinfo->volname); + goto out; + } + + new_volinfo->type = volinfo->type; + new_volinfo->replica_count = volinfo->replica_count; + new_volinfo->arbiter_count = volinfo->arbiter_count; + new_volinfo->stripe_count = volinfo->stripe_count; + new_volinfo->disperse_count = volinfo->disperse_count; + new_volinfo->redundancy_count = volinfo->redundancy_count; + new_volinfo->dist_leaf_count = volinfo->dist_leaf_count; + new_volinfo->sub_count = volinfo->sub_count; + new_volinfo->subvol_count = volinfo->subvol_count; + new_volinfo->transport_type = volinfo->transport_type; + new_volinfo->brick_count = volinfo->brick_count; + new_volinfo->quota_conf_version = volinfo->quota_conf_version; + new_volinfo->quota_xattr_version = volinfo->quota_xattr_version; + new_volinfo->snap_max_hard_limit = volinfo->snap_max_hard_limit; + new_volinfo->quota_conf_cksum = volinfo->quota_conf_cksum; + + dict_copy(volinfo->dict, new_volinfo->dict); + dict_copy(volinfo->gsync_slaves, new_volinfo->gsync_slaves); + dict_copy(volinfo->gsync_active_slaves, new_volinfo->gsync_active_slaves); + gd_update_volume_op_versions(new_volinfo); + + if (set_userauth) { + glusterd_auth_set_username(new_volinfo, volinfo->auth.username); + glusterd_auth_set_password(new_volinfo, volinfo->auth.password); + } + + *dup_volinfo = new_volinfo; + ret = 0; +out: + if (ret && (NULL != new_volinfo)) { + (void)glusterd_volinfo_delete(new_volinfo); + } + return ret; } +/* This function will duplicate brickinfo + * + * @param brickinfo Source brickinfo + * @param dup_brickinfo Destination brickinfo + * + * @return 0 on success else -1 + */ int32_t -glusterd_volinfo_new (glusterd_volinfo_t **volinfo) +glusterd_brickinfo_dup(glusterd_brickinfo_t *brickinfo, + glusterd_brickinfo_t *dup_brickinfo) { - glusterd_volinfo_t *new_volinfo = NULL; - int32_t ret = -1; + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + GF_VALIDATE_OR_GOTO(this->name, brickinfo, out); + GF_VALIDATE_OR_GOTO(this->name, dup_brickinfo, out); + + strcpy(dup_brickinfo->hostname, brickinfo->hostname); + strcpy(dup_brickinfo->path, brickinfo->path); + strcpy(dup_brickinfo->real_path, brickinfo->real_path); + strcpy(dup_brickinfo->device_path, brickinfo->device_path); + strcpy(dup_brickinfo->fstype, brickinfo->fstype); + strcpy(dup_brickinfo->mnt_opts, brickinfo->mnt_opts); + ret = gf_canonicalize_path(dup_brickinfo->path); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_CANONICALIZE_FAIL, + "Failed to canonicalize " + "brick path"); + goto out; + } + gf_uuid_copy(dup_brickinfo->uuid, brickinfo->uuid); + + dup_brickinfo->port = brickinfo->port; + dup_brickinfo->rdma_port = brickinfo->rdma_port; + if (NULL != brickinfo->logfile) { + dup_brickinfo->logfile = gf_strdup(brickinfo->logfile); + if (NULL == dup_brickinfo->logfile) { + ret = -1; + goto out; + } + } + strcpy(dup_brickinfo->brick_id, brickinfo->brick_id); + strcpy(dup_brickinfo->mount_dir, brickinfo->mount_dir); + dup_brickinfo->status = brickinfo->status; + dup_brickinfo->snap_status = brickinfo->snap_status; +out: + return ret; +} - GF_ASSERT (volinfo); +/* + * gd_vol_is_geo_rep_active: + * This function checks for any running geo-rep session for + * the volume given. + * + * Return Value: + * _gf_true : If any running geo-rep session. + * _gf_false: If no running geo-rep session. + */ - new_volinfo = GF_CALLOC (1, sizeof(*new_volinfo), - gf_gld_mt_glusterd_volinfo_t); +gf_boolean_t +gd_vol_is_geo_rep_active(glusterd_volinfo_t *volinfo) +{ + gf_boolean_t active = _gf_false; - if (!new_volinfo) - goto out; + GF_ASSERT(volinfo); - INIT_LIST_HEAD (&new_volinfo->vol_list); - INIT_LIST_HEAD (&new_volinfo->bricks); + if (volinfo->gsync_active_slaves && volinfo->gsync_active_slaves->count > 0) + active = _gf_true; - new_volinfo->dict = dict_new (); - if (!new_volinfo->dict) { - if (new_volinfo) - GF_FREE (new_volinfo); + return active; +} - goto out; - } +void +glusterd_auth_cleanup(glusterd_volinfo_t *volinfo) +{ + GF_ASSERT(volinfo); - *volinfo = new_volinfo; + GF_FREE(volinfo->auth.username); - ret = 0; + GF_FREE(volinfo->auth.password); +} -out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; +char * +glusterd_auth_get_username(glusterd_volinfo_t *volinfo) +{ + GF_ASSERT(volinfo); + + return volinfo->auth.username; } -int32_t -glusterd_brickinfo_delete (glusterd_brickinfo_t *brickinfo) +char * +glusterd_auth_get_password(glusterd_volinfo_t *volinfo) { - int32_t ret = -1; + GF_ASSERT(volinfo); - GF_ASSERT (brickinfo); + return volinfo->auth.password; +} - list_del_init (&brickinfo->brick_list); +int32_t +glusterd_auth_set_username(glusterd_volinfo_t *volinfo, char *username) +{ + GF_ASSERT(volinfo); + GF_ASSERT(username); - if (brickinfo->logfile) - GF_FREE (brickinfo->logfile); - GF_FREE (brickinfo); + volinfo->auth.username = gf_strdup(username); + return 0; +} - ret = 0; +int32_t +glusterd_auth_set_password(glusterd_volinfo_t *volinfo, char *password) +{ + GF_ASSERT(volinfo); + GF_ASSERT(password); - return ret; + volinfo->auth.password = gf_strdup(password); + return 0; } int32_t -glusterd_volume_brickinfos_delete (glusterd_volinfo_t *volinfo) +glusterd_brickinfo_delete(glusterd_brickinfo_t *brickinfo) { - glusterd_brickinfo_t *brickinfo = NULL; - glusterd_brickinfo_t *tmp = NULL; - int32_t ret = 0; + int32_t ret = -1; - GF_ASSERT (volinfo); + GF_ASSERT(brickinfo); - list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks, - brick_list) { - ret = glusterd_brickinfo_delete (brickinfo); - if (ret) - goto out; - } + cds_list_del_init(&brickinfo->brick_list); -out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + (void)gf_store_handle_destroy(brickinfo->shandle); + + GF_FREE(brickinfo->logfile); + GF_FREE(brickinfo); + + ret = 0; + + return ret; } int32_t -glusterd_volinfo_delete (glusterd_volinfo_t *volinfo) +glusterd_volume_brickinfos_delete(glusterd_volinfo_t *volinfo) { - int32_t ret = -1; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_brickinfo_t *tmp = NULL; + int32_t ret = 0; - GF_ASSERT (volinfo); + GF_ASSERT(volinfo); - list_del_init (&volinfo->vol_list); - - ret = glusterd_volume_brickinfos_delete (volinfo); + cds_list_for_each_entry_safe(brickinfo, tmp, &volinfo->bricks, brick_list) + { + ret = glusterd_brickinfo_delete(brickinfo); if (ret) - goto out; - dict_unref (volinfo->dict); - if (volinfo->logdir) - GF_FREE (volinfo->logdir); - - GF_FREE (volinfo); - ret = 0; + goto out; + } out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + gf_msg_debug(THIS->name, 0, "Returning %d", ret); + return ret; } +int +glusterd_volinfo_remove(glusterd_volinfo_t *volinfo) +{ + cds_list_del_init(&volinfo->vol_list); + glusterd_volinfo_unref(volinfo); + return 0; +} int32_t -glusterd_brickinfo_new (glusterd_brickinfo_t **brickinfo) +glusterd_volinfo_delete(glusterd_volinfo_t *volinfo) { - glusterd_brickinfo_t *new_brickinfo = NULL; - int32_t ret = -1; + int32_t ret = -1; + + GF_ASSERT(volinfo); + + cds_list_del_init(&volinfo->vol_list); + cds_list_del_init(&volinfo->snapvol_list); + + ret = glusterd_volume_brickinfos_delete(volinfo); + if (ret) + goto out; + if (volinfo->dict) + dict_unref(volinfo->dict); + if (volinfo->gsync_slaves) + dict_unref(volinfo->gsync_slaves); + if (volinfo->gsync_active_slaves) + dict_unref(volinfo->gsync_active_slaves); + GF_FREE(volinfo->logdir); + if (volinfo->rebal.dict) + dict_unref(volinfo->rebal.dict); + + /* Destroy the connection object for per volume svc daemons */ + glusterd_conn_term(&volinfo->snapd.svc.conn); + glusterd_conn_term(&volinfo->gfproxyd.svc.conn); + + gf_store_handle_destroy(volinfo->quota_conf_shandle); + gf_store_handle_destroy(volinfo->shandle); + gf_store_handle_destroy(volinfo->node_state_shandle); + gf_store_handle_destroy(volinfo->snapd.handle); + + glusterd_auth_cleanup(volinfo); + glusterd_shd_svcproc_cleanup(&volinfo->shd); + + pthread_mutex_destroy(&volinfo->store_volinfo_lock); + pthread_mutex_destroy(&volinfo->reflock); + LOCK_DESTROY(&volinfo->lock); + + GF_FREE(volinfo); + ret = 0; +out: + gf_msg_debug(THIS->name, 0, "Returning %d", ret); + return ret; +} - GF_ASSERT (brickinfo); +int32_t +glusterd_brickprocess_new(glusterd_brick_proc_t **brickprocess) +{ + glusterd_brick_proc_t *new_brickprocess = NULL; + int32_t ret = -1; - new_brickinfo = GF_CALLOC (1, sizeof(*new_brickinfo), - gf_gld_mt_glusterd_brickinfo_t); + GF_VALIDATE_OR_GOTO(THIS->name, brickprocess, out); - if (!new_brickinfo) - goto out; + new_brickprocess = GF_CALLOC(1, sizeof(*new_brickprocess), + gf_gld_mt_glusterd_brick_proc_t); - INIT_LIST_HEAD (&new_brickinfo->brick_list); + if (!new_brickprocess) + goto out; - *brickinfo = new_brickinfo; + CDS_INIT_LIST_HEAD(&new_brickprocess->bricks); + CDS_INIT_LIST_HEAD(&new_brickprocess->brick_proc_list); - ret = 0; + new_brickprocess->brick_count = 0; + *brickprocess = new_brickprocess; + + ret = 0; out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + gf_msg_debug(THIS->name, 0, "Returning %d", ret); + return ret; } int32_t -glusterd_resolve_brick (glusterd_brickinfo_t *brickinfo) +glusterd_brickinfo_new(glusterd_brickinfo_t **brickinfo) { - int32_t ret = -1; + glusterd_brickinfo_t *new_brickinfo = NULL; + int32_t ret = -1; - GF_ASSERT (brickinfo); + GF_ASSERT(brickinfo); - ret = glusterd_hostname_to_uuid (brickinfo->hostname, brickinfo->uuid); - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; -} + new_brickinfo = GF_CALLOC(1, sizeof(*new_brickinfo), + gf_gld_mt_glusterd_brickinfo_t); -int32_t -glusterd_brickinfo_from_brick (char *brick, - glusterd_brickinfo_t **brickinfo) -{ - int32_t ret = -1; - glusterd_brickinfo_t *new_brickinfo = NULL; - char *hostname = NULL; - char *path = NULL; - char *tmp = NULL; - char *tmpstr = NULL; - - GF_ASSERT (brick); - GF_ASSERT (brickinfo); - - tmp = gf_strdup (brick); - if (!tmp) { - gf_log ("glusterd", GF_LOG_ERROR, - "Out of memory"); - goto out; - } + if (!new_brickinfo) + goto out; - hostname = strtok_r (tmp, ":", &tmpstr); - path = strtok_r (NULL, ":", &tmpstr); + CDS_INIT_LIST_HEAD(&new_brickinfo->brick_list); + CDS_INIT_LIST_HEAD(&new_brickinfo->mux_bricks); + pthread_mutex_init(&new_brickinfo->restart_mutex, NULL); + *brickinfo = new_brickinfo; - GF_ASSERT (hostname); - GF_ASSERT (path); + ret = 0; - ret = glusterd_brickinfo_new (&new_brickinfo); +out: + gf_msg_debug(THIS->name, 0, "Returning %d", ret); + return ret; +} - if (ret) - goto out; +int +glusterd_get_next_available_brickid(glusterd_volinfo_t *volinfo) +{ + glusterd_brickinfo_t *brickinfo = NULL; + char *token = NULL; + int brickid = 0; + int max_brickid = -1; + int ret = -1; + + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + token = strrchr(brickinfo->brick_id, '-'); + ret = gf_string2int32(++token, &brickid); + if (ret < 0) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ID_GEN_FAILED, + "Unable to generate brick ID"); + return ret; + } + if (brickid > max_brickid) + max_brickid = brickid; + } - strncpy (new_brickinfo->hostname, hostname, 1024); - strncpy (new_brickinfo->path, path, 1024); + return max_brickid + 1; +} + +int32_t +glusterd_resolve_brick(glusterd_brickinfo_t *brickinfo) +{ + int32_t ret = -1; + xlator_t *this = NULL; - *brickinfo = new_brickinfo; + this = THIS; + GF_ASSERT(this); + GF_ASSERT(brickinfo); + if (!gf_uuid_compare(brickinfo->uuid, MY_UUID) || + (glusterd_peerinfo_find_by_uuid(brickinfo->uuid) != NULL)) { ret = 0; + goto out; + } + + ret = glusterd_hostname_to_uuid(brickinfo->hostname, brickinfo->uuid); + out: - if (tmp) - GF_FREE (tmp); - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; } int32_t -glusterd_volume_brickinfo_get (uuid_t uuid, char *hostname, char *path, - glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t **brickinfo) +glusterd_get_brick_mount_dir(char *brickpath, char *hostname, char *mount_dir) { - glusterd_brickinfo_t *brickiter = NULL; - uuid_t peer_uuid = {0}; - int32_t ret = -1; + char *mnt_pt = NULL; + char *brick_dir = NULL; + int32_t ret = -1; + uuid_t brick_uuid = { + 0, + }; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(brickpath); + GF_ASSERT(hostname); + GF_ASSERT(mount_dir); + + ret = glusterd_hostname_to_uuid(hostname, brick_uuid); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_HOSTNAME_TO_UUID_FAIL, + "Failed to convert hostname %s to uuid", hostname); + goto out; + } + + if (!gf_uuid_compare(brick_uuid, MY_UUID)) { + ret = glusterd_get_brick_root(brickpath, &mnt_pt); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, + GD_MSG_BRICKPATH_ROOT_GET_FAIL, + "Could not get the root of the brick path %s", brickpath); + goto out; + } - if (uuid) { - uuid_copy (peer_uuid, uuid); - } else { - ret = glusterd_hostname_to_uuid (hostname, peer_uuid); - if (ret) - goto out; + if (strncmp(brickpath, mnt_pt, strlen(mnt_pt))) { + gf_msg(this->name, GF_LOG_WARNING, 0, + GD_MSG_BRKPATH_MNTPNT_MISMATCH, "brick: %s brick mount: %s", + brickpath, mnt_pt); + ret = -1; + goto out; } + + brick_dir = &brickpath[strlen(mnt_pt)]; + if (brick_dir[0] == '/') + brick_dir++; + + snprintf(mount_dir, VALID_GLUSTERD_PATHMAX, "/%s", brick_dir); + } + +out: + if (mnt_pt) + GF_FREE(mnt_pt); + + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_brickinfo_new_from_brick(char *brick, glusterd_brickinfo_t **brickinfo, + gf_boolean_t construct_real_path, + char **op_errstr) +{ + char *hostname = NULL; + char *path = NULL; + char *tmp_host = NULL; + char *tmp_path = NULL; + int32_t ret = -1; + glusterd_brickinfo_t *new_brickinfo = NULL; + xlator_t *this = NULL; + char abspath[PATH_MAX] = ""; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(brick); + GF_ASSERT(brickinfo); + + tmp_host = gf_strdup(brick); + if (tmp_host && !get_host_name(tmp_host, &hostname)) + goto out; + tmp_path = gf_strdup(brick); + if (tmp_path && !get_path_name(tmp_path, &path)) + goto out; + + GF_ASSERT(hostname); + GF_ASSERT(path); + + ret = glusterd_brickinfo_new(&new_brickinfo); + if (ret) + goto out; + + ret = gf_canonicalize_path(path); + if (ret) + goto out; + ret = snprintf(new_brickinfo->hostname, sizeof(new_brickinfo->hostname), + "%s", hostname); + if (ret < 0 || ret >= sizeof(new_brickinfo->hostname)) { + ret = -1; + goto out; + } + ret = snprintf(new_brickinfo->path, sizeof(new_brickinfo->path), "%s", + path); + if (ret < 0 || ret >= sizeof(new_brickinfo->path)) { ret = -1; - list_for_each_entry (brickiter, &volinfo->bricks, brick_list) { + goto out; + } - if (uuid_is_null (brickiter->uuid)) { - ret = glusterd_resolve_brick (brickiter); - if (ret) - goto out; - } - if ((!uuid_compare (peer_uuid, brickiter->uuid)) && - !strcmp (brickiter->path, path)) { - gf_log ("", GF_LOG_INFO, "Found brick"); - ret = 0; - if (brickinfo) - *brickinfo = brickiter; - break; - } else { - ret = -1; - } + if (construct_real_path) { + ret = glusterd_hostname_to_uuid(new_brickinfo->hostname, + new_brickinfo->uuid); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_HOSTNAME_TO_UUID_FAIL, + "Failed to convert hostname %s to uuid", hostname); + if (op_errstr) + gf_asprintf(op_errstr, + "Host %s is not in " + "\'Peer in Cluster\' state", + new_brickinfo->hostname); + goto out; + } + } + + if (construct_real_path && !gf_uuid_compare(new_brickinfo->uuid, MY_UUID) && + new_brickinfo->real_path[0] == '\0') { + if (!realpath(new_brickinfo->path, abspath)) { + /* ENOENT indicates that brick path has not been created + * which is a valid scenario */ + if (errno != ENOENT) { + gf_msg(this->name, GF_LOG_CRITICAL, errno, + GD_MSG_BRICKINFO_CREATE_FAIL, + "realpath" + " () failed for brick %s. The " + "underlying filesystem may be in bad " + "state. Error - %s", + new_brickinfo->path, strerror(errno)); + ret = -1; + goto out; + } + } + if (strlen(abspath) >= sizeof(new_brickinfo->real_path)) { + ret = -1; + goto out; } + (void)strncpy(new_brickinfo->real_path, abspath, + sizeof(new_brickinfo->real_path)); + } + *brickinfo = new_brickinfo; + + ret = 0; out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + GF_FREE(tmp_host); + if (tmp_host) + GF_FREE(tmp_path); + + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; } -int32_t -glusterd_volume_brickinfo_get_by_brick (char *brick, - glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t **brickinfo) +static gf_boolean_t +_is_prefix(char *str1, char *str2) { - int32_t ret = -1; - char *hostname = NULL; - char *path = NULL; - char *dup_brick = NULL; - char *free_ptr = NULL; + GF_ASSERT(str1); + GF_ASSERT(str2); + + int i = 0; + int len1 = 0; + int len2 = 0; + int small_len = 0; + char *bigger = NULL; + gf_boolean_t prefix = _gf_true; + + len1 = strlen(str1); + len2 = strlen(str2); + small_len = min(len1, len2); + + /* + * If either one (not both) of the strings are 0-length, they are not + * prefixes of each other. + */ + if ((small_len == 0) && (len1 != len2)) { + return _gf_false; + } - GF_ASSERT (brick); - GF_ASSERT (volinfo); + for (i = 0; i < small_len; i++) { + if (str1[i] != str2[i]) { + prefix = _gf_false; + break; + } + } - gf_log ("", GF_LOG_INFO, "brick: %s", brick); + if (len1 < len2) + bigger = str2; - dup_brick = gf_strdup (brick); - if (!dup_brick) { - gf_log ("", GF_LOG_ERROR, - "Out of memory"); - ret = -1; + else if (len1 > len2) + bigger = str1; + + else + return prefix; + + if (bigger[small_len] != '/') + prefix = _gf_false; + + return prefix; +} + +/* Checks if @path is available in the peer identified by @uuid + * 'availability' is determined by querying current state of volumes + * in the cluster. */ +gf_boolean_t +glusterd_is_brickpath_available(uuid_t uuid, char *path) +{ + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_conf_t *priv = NULL; + gf_boolean_t available = _gf_false; + char tmp_path[PATH_MAX] = ""; + + priv = THIS->private; + + if (snprintf(tmp_path, PATH_MAX, "%s", path) >= PATH_MAX) + goto out; + /* path may not yet exist */ + if (!realpath(path, tmp_path)) { + if (errno != ENOENT) { + gf_msg(THIS->name, GF_LOG_CRITICAL, errno, + GD_MSG_BRICKINFO_CREATE_FAIL, + "realpath" + " () failed for brick %s. The " + "underlying filesystem may be in bad " + "state. Error - %s", + path, strerror(errno)); + goto out; + } + /* When realpath(3) fails, tmp_path is undefined. */ + (void)snprintf(tmp_path, sizeof(tmp_path), "%s", path); + } + + cds_list_for_each_entry(volinfo, &priv->volumes, vol_list) + { + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + if (gf_uuid_compare(uuid, brickinfo->uuid)) + continue; + if (_is_prefix(brickinfo->real_path, tmp_path)) { + gf_msg(THIS->name, GF_LOG_CRITICAL, 0, + GD_MSG_BRICKINFO_CREATE_FAIL, + "_is_prefix call failed for brick %s " + "against brick %s", + tmp_path, brickinfo->real_path); goto out; - } else { - free_ptr = dup_brick; + } } + } + available = _gf_true; +out: + return available; +} - hostname = strtok (dup_brick, ":"); - path = strtok (NULL, ":"); +int +glusterd_validate_and_create_brickpath(glusterd_brickinfo_t *brickinfo, + uuid_t volume_id, char *volname, + char **op_errstr, gf_boolean_t is_force, + gf_boolean_t ignore_partition) +{ + int ret = -1; + char parentdir[PATH_MAX] = ""; + struct stat parent_st = { + 0, + }; + struct stat brick_st = { + 0, + }; + struct stat root_st = { + 0, + }; + char msg[2048] = ""; + gf_boolean_t is_created = _gf_false; + char glusterfs_dir_path[PATH_MAX] = ""; + int32_t len = 0; + + ret = sys_mkdir(brickinfo->path, 0755); + if (ret) { + if (errno != EEXIST) { + len = snprintf(msg, sizeof(msg), + "Failed to create " + "brick directory for brick %s:%s. " + "Reason : %s ", + brickinfo->hostname, brickinfo->path, + strerror(errno)); + gf_smsg( + "glusterd", GF_LOG_ERROR, errno, GD_MSG_CREATE_BRICK_DIR_FAILED, + "Brick_hostname=%s, Brick_path=%s, Reason=%s", + brickinfo->hostname, brickinfo->path, strerror(errno), NULL); + goto out; + } + } else { + is_created = _gf_true; + } + + ret = sys_lstat(brickinfo->path, &brick_st); + if (ret) { + len = snprintf(msg, sizeof(msg), + "lstat failed on %s. " + "Reason : %s", + brickinfo->path, strerror(errno)); + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_LSTAT_FAIL, + "Failed on Brick_path=%s, Reason=%s", brickinfo->path, + strerror(errno), NULL); + goto out; + } + + if ((!is_created) && (!S_ISDIR(brick_st.st_mode))) { + len = snprintf(msg, sizeof(msg), + "The provided path %s " + "which is already present, is not a directory", + brickinfo->path); + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED, + "Brick_path=%s", brickinfo->path, NULL); + ret = -1; + goto out; + } - if (!hostname || !path) { - gf_log ("", GF_LOG_ERROR, - "brick %s is not of form <HOSTNAME>:<export-dir>", - brick); + len = snprintf(parentdir, sizeof(parentdir), "%s/..", brickinfo->path); + if ((len < 0) || (len >= sizeof(parentdir))) { + ret = -1; + goto out; + } + + ret = sys_lstat("/", &root_st); + if (ret) { + len = snprintf(msg, sizeof(msg), + "lstat failed on /. " + "Reason : %s", + strerror(errno)); + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_LSTAT_FAIL, + "Failed on /, Reason=%s", strerror(errno), NULL); + goto out; + } + + ret = sys_lstat(parentdir, &parent_st); + if (ret) { + len = snprintf(msg, sizeof(msg), + "lstat failed on %s. " + "Reason : %s", + parentdir, strerror(errno)); + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_LSTAT_FAIL, + "Failed on parentdir=%s, Reason=%s", parentdir, strerror(errno), + NULL); + goto out; + } + if (strncmp(volname, GLUSTER_SHARED_STORAGE, + SLEN(GLUSTER_SHARED_STORAGE)) && + sizeof(GLUSTERD_DEFAULT_WORKDIR) <= (strlen(brickinfo->path) + 1) && + !strncmp(brickinfo->path, GLUSTERD_DEFAULT_WORKDIR, + (sizeof(GLUSTERD_DEFAULT_WORKDIR) - 1))) { + len = snprintf(msg, sizeof(msg), + "Brick isn't allowed to be " + "created inside glusterd's working directory."); + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_BRICK_CREATION_FAIL, + NULL); + ret = -1; + goto out; + } + + if (!is_force) { + if (brick_st.st_dev != parent_st.st_dev) { + len = snprintf(msg, sizeof(msg), + "The brick %s:%s " + "is a mount point. Please create a " + "sub-directory under the mount point " + "and use that as the brick directory. " + "Or use 'force' at the end of the " + "command if you want to override this " + "behavior.", + brickinfo->hostname, brickinfo->path); + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_BRICK_CREATE_MNTPNT, + "Use 'force' at the end of the command if you want to " + "override this behavior, Brick_hostname=%s, Brick_path=%s", + brickinfo->hostname, brickinfo->path, NULL); + ret = -1; + goto out; + } else if (parent_st.st_dev == root_st.st_dev) { + len = snprintf(msg, sizeof(msg), + "The brick %s:%s " + "is being created in the root " + "partition. It is recommended that " + "you don't use the system's root " + "partition for storage backend. Or " + "use 'force' at the end of the " + "command if you want to override this " + "behavior.", + brickinfo->hostname, brickinfo->path); + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_BRICK_CREATE_ROOT, + "Use 'force' at the end of the command if you want to " + "override this behavior, Brick_hostname=%s, Brick_path=%s", + brickinfo->hostname, brickinfo->path, NULL); + + /* If --wignore-partition flag is used, ignore warnings + * related to bricks being on root partition when 'force' + * is not used */ + if ((len < 0) || (len >= sizeof(msg)) || !ignore_partition) { ret = -1; goto out; + } } + } - ret = glusterd_volume_brickinfo_get (NULL, hostname, path, volinfo, - brickinfo); -out: - if (free_ptr) - GF_FREE (free_ptr); + ret = glusterd_check_and_set_brick_xattr( + brickinfo->hostname, brickinfo->path, volume_id, op_errstr, is_force); + if (ret) + goto out; - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + /* create .glusterfs directory */ + len = snprintf(glusterfs_dir_path, sizeof(glusterfs_dir_path), "%s/%s", + brickinfo->path, ".glusterfs"); + if ((len < 0) || (len >= sizeof(glusterfs_dir_path))) { + ret = -1; + goto out; + } + + ret = sys_mkdir(glusterfs_dir_path, 0600); + if (ret && (errno != EEXIST)) { + len = snprintf(msg, sizeof(msg), + "Failed to create " + ".glusterfs directory for brick %s:%s. " + "Reason : %s ", + brickinfo->hostname, brickinfo->path, strerror(errno)); + gf_smsg("glusterd", GF_LOG_ERROR, errno, + GD_MSG_CREATE_GLUSTER_DIR_FAILED, + "Brick_hostname=%s, Brick_path=%s, Reason=%s", + brickinfo->hostname, brickinfo->path, strerror(errno), NULL); + goto out; + } + + ret = 0; + +out: + if (len < 0) { + ret = -1; + } + if (ret && is_created) { + (void)recursive_rmdir(brickinfo->path); + } + if (ret && !*op_errstr && msg[0] != '\0') + *op_errstr = gf_strdup(msg); + + return ret; } int32_t -glusterd_friend_cleanup (glusterd_peerinfo_t *peerinfo) +glusterd_volume_brickinfo_get(uuid_t uuid, char *hostname, char *path, + glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t **brickinfo) { - GF_ASSERT (peerinfo); - glusterd_peerctx_t *peerctx = NULL; + glusterd_brickinfo_t *brickiter = NULL; + uuid_t peer_uuid = {0}; + int32_t ret = -1; + xlator_t *this = NULL; - if (peerinfo->rpc) { - peerctx = peerinfo->rpc->mydata; - peerinfo->rpc->mydata = NULL; - peerinfo->rpc = rpc_clnt_unref (peerinfo->rpc); - peerinfo->rpc = NULL; - if (peerctx) - GF_FREE (peerctx); + this = THIS; + + if (uuid) { + gf_uuid_copy(peer_uuid, uuid); + } else { + ret = glusterd_hostname_to_uuid(hostname, peer_uuid); + if (ret) + goto out; + } + ret = -1; + cds_list_for_each_entry(brickiter, &volinfo->bricks, brick_list) + { + if ((gf_uuid_is_null(brickiter->uuid)) && + (glusterd_resolve_brick(brickiter) != 0)) + goto out; + if (gf_uuid_compare(peer_uuid, brickiter->uuid)) + continue; + + if (strcmp(brickiter->path, path) == 0) { + gf_msg_debug(this->name, 0, LOGSTR_FOUND_BRICK, brickiter->hostname, + brickiter->path, volinfo->volname); + ret = 0; + if (brickinfo) + *brickinfo = brickiter; + break; } - glusterd_peer_destroy (peerinfo); + } - return 0; +out: + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; } int32_t -glusterd_volinfo_find (char *volname, glusterd_volinfo_t **volinfo) +glusterd_volume_ta_brickinfo_get(uuid_t uuid, char *hostname, char *path, + glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t **ta_brickinfo) { - glusterd_volinfo_t *tmp_volinfo = NULL; - int32_t ret = -1; - xlator_t *this = NULL; - glusterd_conf_t *priv = NULL; + glusterd_brickinfo_t *ta_brickiter = NULL; + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + + ret = -1; + + cds_list_for_each_entry(ta_brickiter, &volinfo->ta_bricks, brick_list) + { + if (strcmp(ta_brickiter->path, path) == 0 && + strcmp(ta_brickiter->hostname, hostname) == 0) { + gf_msg_debug(this->name, 0, LOGSTR_FOUND_BRICK, + ta_brickiter->hostname, ta_brickiter->path, + volinfo->volname); + ret = 0; + if (ta_brickinfo) + *ta_brickinfo = ta_brickiter; + break; + } + } - GF_ASSERT (volname); + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; +} - this = THIS; - GF_ASSERT (this); +int32_t +glusterd_volume_brickinfo_get_by_brick(char *brick, glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t **brickinfo, + gf_boolean_t construct_real_path) +{ + int32_t ret = -1; + glusterd_brickinfo_t *tmp_brickinfo = NULL; - priv = this->private; + GF_ASSERT(brick); + GF_ASSERT(volinfo); - list_for_each_entry (tmp_volinfo, &priv->volumes, vol_list) { - if (!strcmp (tmp_volinfo->volname, volname)) { - gf_log ("", GF_LOG_DEBUG, "Volume %s found", volname); - ret = 0; - *volinfo = tmp_volinfo; - break; - } - } + ret = glusterd_brickinfo_new_from_brick(brick, &tmp_brickinfo, + construct_real_path, NULL); + if (ret) + goto out; + ret = glusterd_volume_brickinfo_get( + NULL, tmp_brickinfo->hostname, tmp_brickinfo->path, volinfo, brickinfo); + (void)glusterd_brickinfo_delete(tmp_brickinfo); +out: + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; +} - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; +gf_boolean_t +glusterd_is_brick_decommissioned(glusterd_volinfo_t *volinfo, char *hostname, + char *path) +{ + gf_boolean_t decommissioned = _gf_false; + glusterd_brickinfo_t *brickinfo = NULL; + int ret = -1; + + ret = glusterd_volume_brickinfo_get(NULL, hostname, path, volinfo, + &brickinfo); + if (ret) + goto out; + decommissioned = brickinfo->decommissioned; +out: + return decommissioned; } +int +glusterd_volinfo_find_by_volume_id(uuid_t volume_id, + glusterd_volinfo_t **volinfo) +{ + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_volinfo_t *voliter = NULL; + glusterd_conf_t *priv = NULL; + + if (!volume_id) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); + return -1; + } + + this = THIS; + priv = this->private; + + cds_list_for_each_entry(voliter, &priv->volumes, vol_list) + { + if (gf_uuid_compare(volume_id, voliter->volume_id)) + continue; + *volinfo = voliter; + ret = 0; + gf_msg_debug(this->name, 0, "Volume %s found", voliter->volname); + break; + } + return ret; +} int32_t -glusterd_service_stop (const char *service, char *pidfile, int sig, - gf_boolean_t force_kill) -{ - int32_t ret = -1; - pid_t pid = -1; - FILE *file = NULL; - gf_boolean_t is_locked = _gf_false; - - file = fopen (pidfile, "r+"); - - if (!file) { - gf_log ("", GF_LOG_ERROR, "Unable to open pidfile: %s", - pidfile); - if (errno == ENOENT) { - gf_log ("",GF_LOG_TRACE, "%s may not be running", - service); - ret = 0; - goto out; - } - ret = -1; - goto out; +glusterd_volinfo_find(const char *volname, glusterd_volinfo_t **volinfo) +{ + glusterd_volinfo_t *tmp_volinfo = NULL; + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + + GF_ASSERT(volname); + this = THIS; + GF_ASSERT(this); + + priv = this->private; + GF_ASSERT(priv); + + cds_list_for_each_entry(tmp_volinfo, &priv->volumes, vol_list) + { + if (!strcmp(tmp_volinfo->volname, volname)) { + gf_msg_debug(this->name, 0, "Volume %s found", volname); + ret = 0; + *volinfo = tmp_volinfo; + break; } - ret = lockf (fileno (file), F_TLOCK, 0); - if (!ret) { - is_locked = _gf_true; - ret = unlink (pidfile); - if (ret && (ENOENT != errno)) { - gf_log ("", GF_LOG_ERROR, "Unable to " - "unlink stale pidfile: %s", pidfile); - } - goto out; + } + + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; +} + +gf_boolean_t +glusterd_volume_exists(const char *volname) +{ + glusterd_volinfo_t *tmp_volinfo = NULL; + gf_boolean_t volume_found = _gf_false; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + + GF_ASSERT(volname); + this = THIS; + GF_ASSERT(this); + + priv = this->private; + GF_ASSERT(priv); + + cds_list_for_each_entry(tmp_volinfo, &priv->volumes, vol_list) + { + if (!strcmp(tmp_volinfo->volname, volname)) { + gf_msg_debug(this->name, 0, "Volume %s found", volname); + volume_found = _gf_true; + break; } + } + return volume_found; +} - ret = fscanf (file, "%d", &pid); - if (ret <= 0) { - gf_log ("", GF_LOG_ERROR, "Unable to read pidfile: %s", - pidfile); - ret = -1; +int32_t +glusterd_service_stop(const char *service, char *pidfile, int sig, + gf_boolean_t force_kill) +{ + int32_t ret = -1; + pid_t pid = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + if (!gf_is_service_running(pidfile, &pid)) { + ret = 0; + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_ALREADY_STOPPED, + "%s already stopped", service); + goto out; + } + gf_msg_debug(this->name, 0, + "Stopping gluster %s running in pid: " + "%d", + service, pid); + + ret = kill(pid, sig); + if (ret) { + switch (errno) { + case ESRCH: + gf_msg_debug(this->name, 0, "%s is already stopped", service); + ret = 0; goto out; + default: + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_SVC_KILL_FAIL, + "Unable to kill %s " + "service, reason:%s", + service, strerror(errno)); } - fclose (file); - file = NULL; + } + if (!force_kill) + goto out; - gf_log ("", GF_LOG_INFO, "Stopping gluster %s running in pid: %d", - service, pid); + sleep(1); + if (gf_is_service_running(pidfile, &pid)) { + ret = kill(pid, SIGKILL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_PID_KILL_FAIL, + "Unable to kill pid:%d, " + "reason:%s", + pid, strerror(errno)); + goto out; + } + } - ret = kill (pid, sig); + ret = 0; +out: + return ret; +} - if (force_kill) { - sleep (1); - file = fopen (pidfile, "r+"); - if (!file) { - ret = 0; - goto out; - } - ret = lockf (fileno (file), F_TLOCK, 0); - if (ret && ((EAGAIN == errno) || (EACCES == errno))) { - ret = kill (pid, SIGKILL); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to " - "kill pid %d reason: %s", pid, - strerror(errno)); - goto out; - } - - } else if (0 == ret){ - is_locked = _gf_true; - } - ret = unlink (pidfile); - if (ret && (ENOENT != errno)) { - gf_log ("", GF_LOG_ERROR, "Unable to " - "unlink pidfile: %s", pidfile); - goto out; - } +int32_t +glusterd_service_stop_nolock(const char *service, char *pidfile, int sig, + gf_boolean_t force_kill) +{ + int32_t ret = -1; + pid_t pid = -1; + xlator_t *this = NULL; + FILE *file = NULL; + + this = THIS; + GF_ASSERT(this); + + file = fopen(pidfile, "r+"); + if (file) { + ret = fscanf(file, "%d", &pid); + if (ret <= 0) { + gf_msg_debug(this->name, 0, "Unable to read pidfile: %s", pidfile); + goto out; } + } + if (kill(pid, 0) < 0) { ret = 0; + gf_msg_debug(this->name, 0, "%s process not running: (%d) %s", service, + pid, strerror(errno)); + goto out; + } + gf_msg_debug(this->name, 0, + "Stopping gluster %s service running with " + "pid: %d", + service, pid); + + ret = kill(pid, sig); + if (ret) { + switch (errno) { + case ESRCH: + gf_msg_debug(this->name, 0, "%s is already stopped", service); + ret = 0; + goto out; + default: + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_SVC_KILL_FAIL, + "Unable to kill %s " + "service, reason:%s", + service, strerror(errno)); + } + } + if (!force_kill) + goto out; + + sleep(1); + if (kill(pid, 0) == 0) { + ret = kill(pid, SIGKILL); + if (ret) { + /* Process is already dead, don't fail */ + if (errno == ESRCH) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_PID_KILL_FAIL, + "Unable to find pid:%d, " + "must be dead already. Ignoring.", + pid); + } else { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_PID_KILL_FAIL, + "Unable to kill pid:%d, " + "reason:%s", + pid, strerror(errno)); + goto out; + } + } + } + + ret = 0; + out: - if (is_locked && file) - lockf (fileno (file), F_ULOCK, 0); - if (file) - fclose (file); - return ret; -} + if (file) + fclose(file); + return ret; +} void -glusterd_set_brick_socket_filepath (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo, - char *sockpath, size_t len) +glusterd_set_socket_filepath(char *sock_filepath, char *sockpath, size_t len) { - char export_path[PATH_MAX] = {0,}; - char sock_filepath[PATH_MAX] = {0,}; - char md5_sum[MD5_DIGEST_LEN*2+1] = {0,}; - char volume_dir[PATH_MAX] = {0,}; - xlator_t *this = NULL; - glusterd_conf_t *priv = NULL; - int expected_file_len = 0; - - expected_file_len = strlen (glusterd_sock_dir) + strlen ("/") + - MD5_DIGEST_LEN*2 + strlen (".socket") + 1; - GF_ASSERT (len >= expected_file_len); - this = THIS; - GF_ASSERT (this); + char xxh64[GF_XXH64_DIGEST_LENGTH * 2 + 1] = { + 0, + }; - priv = this->private; - - GLUSTERD_GET_VOLUME_DIR (volume_dir, volinfo, priv); - GLUSTERD_REMOVE_SLASH_FROM_PATH (brickinfo->path, export_path); - snprintf (sock_filepath, PATH_MAX, "%s/run/%s-%s", - volume_dir, brickinfo->hostname, export_path); - _get_md5_str (md5_sum, sizeof (md5_sum), - (uint8_t*)sock_filepath, strlen (sock_filepath)); + gf_xxh64_wrapper((unsigned char *)sock_filepath, strlen(sock_filepath), + GF_XXHSUM64_DEFAULT_SEED, xxh64); + snprintf(sockpath, len, "%s/%s.socket", GLUSTERD_SOCK_DIR, xxh64); +} - snprintf (sockpath, len, "%s/%s.socket", glusterd_sock_dir, md5_sum); +void +glusterd_set_brick_socket_filepath(glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo, + char *sockpath, size_t len) +{ + char volume_dir[PATH_MAX] = ""; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + int expected_file_len = 0; + char export_path[PATH_MAX] = ""; + char sock_filepath[PATH_MAX] = ""; + int32_t slen = 0; + + expected_file_len = SLEN(GLUSTERD_SOCK_DIR) + SLEN("/") + + SHA256_DIGEST_LENGTH * 2 + SLEN(".socket") + 1; + GF_ASSERT(len >= expected_file_len); + this = THIS; + GF_ASSERT(this); + + priv = this->private; + + GLUSTERD_GET_VOLUME_PID_DIR(volume_dir, volinfo, priv); + GLUSTERD_REMOVE_SLASH_FROM_PATH(brickinfo->path, export_path); + slen = snprintf(sock_filepath, PATH_MAX, "%s/run/%s-%s", volume_dir, + brickinfo->hostname, export_path); + if (slen < 0) { + sock_filepath[0] = 0; + } + glusterd_set_socket_filepath(sock_filepath, sockpath, len); } -/* connection happens only if it is not aleady connected, +/* connection happens only if it is not already connected, * reconnections are taken care by rpc-layer */ int32_t -glusterd_brick_connect (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo) -{ - int ret = 0; - char socketpath[PATH_MAX] = {0}; - dict_t *options = NULL; - struct rpc_clnt *rpc = NULL; - - GF_ASSERT (volinfo); - GF_ASSERT (brickinfo); - - if (brickinfo->rpc == NULL) { - glusterd_set_brick_socket_filepath (volinfo, brickinfo, - socketpath, - sizeof (socketpath)); - ret = rpc_clnt_transport_unix_options_build (&options, socketpath); - if (ret) - goto out; - ret = glusterd_rpc_create (&rpc, options, - glusterd_brick_rpc_notify, - brickinfo); - if (ret) - goto out; - brickinfo->rpc = rpc; +glusterd_brick_connect(glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo, char *socketpath) +{ + int ret = 0; + char volume_id_str[64] = ""; + char *brickid = NULL; + dict_t *options = NULL; + struct rpc_clnt *rpc = NULL; + + GF_ASSERT(volinfo); + GF_ASSERT(brickinfo); + GF_ASSERT(socketpath); + + if (brickinfo->rpc == NULL) { + /* Setting frame-timeout to 10mins (600seconds). + * Unix domain sockets ensures that the connection is reliable. + * The default timeout of 30mins used for unreliable network + * connections is too long for unix domain socket connections. + */ + options = dict_new(); + if (!options) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, + NULL); + goto out; + } + + ret = rpc_transport_unix_options_build(options, socketpath, 600); + if (ret) + goto out; + + uuid_utoa_r(volinfo->volume_id, volume_id_str); + ret = gf_asprintf(&brickid, "%s:%s:%s", volume_id_str, + brickinfo->hostname, brickinfo->path); + if (ret < 0) + goto out; + + ret = glusterd_rpc_create(&rpc, options, glusterd_brick_rpc_notify, + brickid, _gf_false); + if (ret) { + GF_FREE(brickid); + goto out; } + brickinfo->rpc = rpc; + } out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + if (options) + dict_unref(options); + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; +} + +static int +_mk_rundir_p(glusterd_volinfo_t *volinfo) +{ + char rundir[PATH_MAX] = ""; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + int ret = -1; + + this = THIS; + priv = this->private; + GLUSTERD_GET_VOLUME_PID_DIR(rundir, volinfo, priv); + ret = mkdir_p(rundir, 0755, _gf_true); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_CREATE_DIR_FAILED, + "Failed to create rundir"); + return ret; } int32_t -glusterd_volume_start_glusterfs (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo) -{ - int32_t ret = -1; - xlator_t *this = NULL; - glusterd_conf_t *priv = NULL; - char pidfile[PATH_MAX] = {0,}; - char volfile[PATH_MAX] = {0,}; - char path[PATH_MAX] = {0,}; - char cmd_str[8192] = {0,}; - char rundir[PATH_MAX] = {0,}; - char exp_path[PATH_MAX] = {0,}; - char logfile[PATH_MAX] = {0,}; - int port = 0; - FILE *file = NULL; - gf_boolean_t is_locked = _gf_false; - char socketpath[PATH_MAX] = {0}; - - GF_ASSERT (volinfo); - GF_ASSERT (brickinfo); - - this = THIS; - GF_ASSERT (this); - - priv = this->private; - - GLUSTERD_GET_VOLUME_DIR (path, volinfo, priv); - snprintf (rundir, PATH_MAX, "%s/run", path); - ret = mkdir (rundir, 0777); - - if ((ret == -1) && (EEXIST != errno)) { - gf_log ("", GF_LOG_ERROR, "Unable to create rundir %s", - rundir); - goto out; +glusterd_volume_start_glusterfs(glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo, + gf_boolean_t wait) +{ + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + char pidfile[PATH_MAX + 1] = ""; + char volfile[PATH_MAX] = ""; + runner_t runner = { + 0, + }; + char exp_path[PATH_MAX] = ""; + char logfile[PATH_MAX] = ""; + int port = 0; + int rdma_port = 0; + char *bind_address = NULL; + char *localtime_logging = NULL; + char socketpath[PATH_MAX] = ""; + char glusterd_uuid[1024] = ""; + char valgrind_logfile[PATH_MAX] = ""; + char rdma_brick_path[PATH_MAX] = ""; + struct rpc_clnt *rpc = NULL; + rpc_clnt_connection_t *conn = NULL; + int pid = -1; + int32_t len = 0; + glusterd_brick_proc_t *brick_proc = NULL; + char *inet_family = NULL; + char *global_threading = NULL; + bool threading = false; + + GF_ASSERT(volinfo); + GF_ASSERT(brickinfo); + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + GF_ASSERT(priv); + + if (brickinfo->snap_status == -1) { + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_SNAPSHOT_PENDING, + "Snapshot is pending on %s:%s. " + "Hence not starting the brick", + brickinfo->hostname, brickinfo->path); + ret = 0; + goto out; + } + + GLUSTERD_GET_BRICK_PIDFILE(pidfile, volinfo, brickinfo, priv); + if (gf_is_service_running(pidfile, &pid)) { + goto connect; + } + + /* + * There are all sorts of races in the start/stop code that could leave + * a UNIX-domain socket or RPC-client object associated with a + * long-dead incarnation of this brick, while the new incarnation is + * listening on a new socket at the same path and wondering why we + * haven't shown up. To avoid the whole mess and be on the safe side, + * we just blow away anything that might have been left over, and start + * over again. + */ + glusterd_set_brick_socket_filepath(volinfo, brickinfo, socketpath, + sizeof(socketpath)); + (void)glusterd_unlink_file(socketpath); + rpc = brickinfo->rpc; + if (rpc) { + brickinfo->rpc = NULL; + conn = &rpc->conn; + pthread_mutex_lock(&conn->lock); + if (conn->reconnect) { + (void)gf_timer_call_cancel(rpc->ctx, conn->reconnect); + conn->reconnect = NULL; } - - glusterd_set_brick_socket_filepath (volinfo, brickinfo, socketpath, - sizeof (socketpath)); - GLUSTERD_GET_BRICK_PIDFILE (pidfile, path, brickinfo->hostname, - brickinfo->path); - - file = fopen (pidfile, "r+"); - if (file) { - ret = lockf (fileno (file), F_TLOCK, 0); - if (ret && ((EAGAIN == errno) || (EACCES == errno))) { - ret = 0; - gf_log ("", GF_LOG_INFO, "brick %s:%s " - "already started", brickinfo->hostname, - brickinfo->path); - goto connect; - } + pthread_mutex_unlock(&conn->lock); + rpc_clnt_unref(rpc); + } + + port = pmap_assign_port(THIS, brickinfo->port, brickinfo->path); + if (!port) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PORTS_EXHAUSTED, + "All the ports in the range are exhausted, can't start " + "brick %s for volume %s", + brickinfo->path, volinfo->volname); + ret = -1; + goto out; + } + /* Build the exp_path, before starting the glusterfsd even in + valgrind mode. Otherwise all the glusterfsd processes start + writing the valgrind log to the same file. + */ + GLUSTERD_REMOVE_SLASH_FROM_PATH(brickinfo->path, exp_path); + +retry: + runinit(&runner); + + if (this->ctx->cmd_args.vgtool != _gf_none) { + /* Run bricks with valgrind. */ + if (volinfo->logdir) { + len = snprintf(valgrind_logfile, PATH_MAX, "%s/valgrind-%s-%s.log", + volinfo->logdir, volinfo->volname, exp_path); + } else { + len = snprintf(valgrind_logfile, PATH_MAX, + "%s/bricks/valgrind-%s-%s.log", priv->logdir, + volinfo->volname, exp_path); + } + if ((len < 0) || (len >= PATH_MAX)) { + ret = -1; + goto out; } - ret = pmap_registry_search (this, brickinfo->path, - GF_PMAP_PORT_BRICKSERVER); - if (ret) { - ret = 0; - file = fopen (pidfile, "r+"); - if (file) { - ret = lockf (fileno (file), F_TLOCK, 0); - if (ret && ((EAGAIN == errno) || (EACCES == errno))) { - ret = 0; - gf_log ("", GF_LOG_INFO, "brick %s:%s " - "already started", brickinfo->hostname, - brickinfo->path); - goto connect; - } else if (0 == ret) { - is_locked = _gf_true; - } - } - /* This means, pmap has the entry, remove it */ - ret = pmap_registry_remove (this, 0, brickinfo->path, - GF_PMAP_PORT_BRICKSERVER, NULL); - } - unlink (pidfile); - - gf_log ("", GF_LOG_INFO, "About to start glusterfs" - " for brick %s:%s", brickinfo->hostname, - brickinfo->path); - GLUSTERD_REMOVE_SLASH_FROM_PATH (brickinfo->path, exp_path); - snprintf (volfile, PATH_MAX, "%s.%s.%s", volinfo->volname, - brickinfo->hostname, exp_path); - - if (!brickinfo->logfile && volinfo->logdir) { - snprintf (logfile, PATH_MAX, "%s/%s.log", volinfo->logdir, - exp_path); - brickinfo->logfile = gf_strdup (logfile); - } else if (!brickinfo->logfile) { - snprintf (logfile, PATH_MAX, "%s/bricks/%s.log", - DEFAULT_LOG_FILE_DIRECTORY, exp_path); - brickinfo->logfile = gf_strdup (logfile); - } - - port = brickinfo->port; - if (!port) - port = pmap_registry_alloc (THIS); - - snprintf (cmd_str, 8192, - "%s/sbin/glusterfsd --xlator-option %s-server.listen-port=%d " - "-s localhost --volfile-id %s -p %s -S %s --brick-name %s " - "--brick-port %d -l %s", GFS_PREFIX, volinfo->volname, - port, volfile, pidfile, socketpath, brickinfo->path, port, - brickinfo->logfile); - - gf_log ("",GF_LOG_DEBUG,"Starting GlusterFS Command Executed: \n %s \n", cmd_str); - ret = gf_system (cmd_str); - - if (ret == 0) { - //pmap_registry_bind (THIS, port, brickinfo->path); - brickinfo->port = port; + if (this->ctx->cmd_args.vgtool == _gf_memcheck) + runner_add_args(&runner, "valgrind", "--leak-check=full", + "--trace-children=yes", "--track-origins=yes", + NULL); + else + runner_add_args(&runner, "valgrind", "--tool=drd", NULL); + + runner_argprintf(&runner, "--log-file=%s", valgrind_logfile); + } + + if (volinfo->is_snap_volume) { + len = snprintf(volfile, PATH_MAX, "/%s/%s/%s/%s.%s.%s", + GLUSTERD_VOL_SNAP_DIR_PREFIX, + volinfo->snapshot->snapname, volinfo->volname, + volinfo->volname, brickinfo->hostname, exp_path); + } else { + len = snprintf(volfile, PATH_MAX, "%s.%s.%s", volinfo->volname, + brickinfo->hostname, exp_path); + } + if ((len < 0) || (len >= PATH_MAX)) { + ret = -1; + goto out; + } + + if (volinfo->logdir) { + len = snprintf(logfile, PATH_MAX, "%s/%s.log", volinfo->logdir, + exp_path); + } else { + len = snprintf(logfile, PATH_MAX, "%s/bricks/%s.log", priv->logdir, + exp_path); + } + if ((len < 0) || (len >= PATH_MAX)) { + ret = -1; + goto out; + } + + if (!brickinfo->logfile) + brickinfo->logfile = gf_strdup(logfile); + + (void)snprintf(glusterd_uuid, 1024, "*-posix.glusterd-uuid=%s", + uuid_utoa(MY_UUID)); + runner_add_args(&runner, SBIN_DIR "/glusterfsd", "-s", brickinfo->hostname, + "--volfile-id", volfile, "-p", pidfile, "-S", socketpath, + "--brick-name", brickinfo->path, "-l", brickinfo->logfile, + "--xlator-option", glusterd_uuid, "--process-name", "brick", + NULL); + + if (dict_get_strn(priv->opts, GLUSTERD_LOCALTIME_LOGGING_KEY, + SLEN(GLUSTERD_LOCALTIME_LOGGING_KEY), + &localtime_logging) == 0) { + if (strcmp(localtime_logging, "enable") == 0) + runner_add_arg(&runner, "--localtime-logging"); + } + + runner_add_arg(&runner, "--brick-port"); + if (volinfo->transport_type != GF_TRANSPORT_BOTH_TCP_RDMA) { + runner_argprintf(&runner, "%d", port); + } else { + len = snprintf(rdma_brick_path, sizeof(rdma_brick_path), "%s.rdma", + brickinfo->path); + if ((len < 0) || (len >= sizeof(rdma_brick_path))) { + ret = -1; + goto out; + } + rdma_port = pmap_assign_port(THIS, brickinfo->rdma_port, + rdma_brick_path); + if (!rdma_port) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PORTS_EXHAUSTED, + "All rdma ports in the " + "range are exhausted, can't start brick %s for " + "volume %s", + rdma_brick_path, volinfo->volname); + ret = -1; + goto out; } + runner_argprintf(&runner, "%d,%d", port, rdma_port); + runner_add_arg(&runner, "--xlator-option"); + runner_argprintf(&runner, "%s-server.transport.rdma.listen-port=%d", + volinfo->volname, rdma_port); + } + + if (dict_get_strn(volinfo->dict, VKEY_CONFIG_GLOBAL_THREADING, + SLEN(VKEY_CONFIG_GLOBAL_THREADING), + &global_threading) == 0) { + if ((gf_string2boolean(global_threading, &threading) == 0) && + threading) { + runner_add_arg(&runner, "--global-threading"); + } + } + + runner_add_arg(&runner, "--xlator-option"); + runner_argprintf(&runner, "%s-server.listen-port=%d", volinfo->volname, + port); + + if (dict_get_strn(this->options, "transport.socket.bind-address", + SLEN("transport.socket.bind-address"), + &bind_address) == 0) { + runner_add_arg(&runner, "--xlator-option"); + runner_argprintf(&runner, "transport.socket.bind-address=%s", + bind_address); + } + + if (volinfo->transport_type == GF_TRANSPORT_RDMA) + runner_argprintf(&runner, "--volfile-server-transport=rdma"); + else if (volinfo->transport_type == GF_TRANSPORT_BOTH_TCP_RDMA) + runner_argprintf(&runner, "--volfile-server-transport=socket,rdma"); + + ret = dict_get_str(this->options, "transport.address-family", &inet_family); + if (!ret) { + runner_add_arg(&runner, "--xlator-option"); + runner_argprintf(&runner, "transport.address-family=%s", inet_family); + } + + if (volinfo->memory_accounting) + runner_add_arg(&runner, "--mem-accounting"); + + if (is_brick_mx_enabled()) + runner_add_arg(&runner, "--brick-mux"); + + runner_log(&runner, "", GF_LOG_DEBUG, "Starting GlusterFS"); + + brickinfo->port = port; + brickinfo->rdma_port = rdma_port; + brickinfo->status = GF_BRICK_STARTING; + brickinfo->port_registered = _gf_false; + + if (wait) { + synclock_unlock(&priv->big_lock); + errno = 0; + ret = runner_run(&runner); + if (errno != 0) + ret = errno; + synclock_lock(&priv->big_lock); + + if (ret == EADDRINUSE) { + /* retry after getting a new port */ + gf_msg(this->name, GF_LOG_WARNING, -ret, + GD_MSG_SRC_BRICK_PORT_UNAVAIL, + "Port %d is used by other process", port); + + port = pmap_registry_alloc(this); + if (!port) { + gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_NO_FREE_PORTS, + "Couldn't allocate a port"); + ret = -1; + goto out; + } + gf_msg(this->name, GF_LOG_NOTICE, 0, GD_MSG_RETRY_WITH_NEW_PORT, + "Retrying to start brick %s with new port %d", + brickinfo->path, port); + goto retry; + } + } else { + ret = runner_run_nowait(&runner); + } + + if (ret) { + brickinfo->port = 0; + brickinfo->rdma_port = 0; + goto out; + } + + ret = glusterd_brickprocess_new(&brick_proc); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICKPROC_NEW_FAILED, + "Failed to create " + "new brick process instance"); + goto out; + } + + brick_proc->port = brickinfo->port; + cds_list_add_tail(&brick_proc->brick_proc_list, &priv->brick_procs); + brickinfo->brick_proc = brick_proc; + cds_list_add_tail(&brickinfo->mux_bricks, &brick_proc->bricks); + brickinfo->brick_proc = brick_proc; + brick_proc->brick_count++; connect: - ret = glusterd_brick_connect (volinfo, brickinfo); - if (ret) - goto out; + ret = glusterd_brick_connect(volinfo, brickinfo, socketpath); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_DISCONNECTED, + "Failed to connect to brick %s:%s on %s", brickinfo->hostname, + brickinfo->path, socketpath); + goto out; + } + out: - if (is_locked && file) - lockf (fileno (file), F_ULOCK, 0); - if (file) - fclose (file); - return ret; + if (ret) + brickinfo->status = GF_BRICK_STOPPED; + return ret; } int32_t -glusterd_brick_unlink_socket_file (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo) -{ - char path[PATH_MAX] = {0,}; - char socketpath[PATH_MAX] = {0}; - xlator_t *this = NULL; - glusterd_conf_t *priv = NULL; - int ret = 0; - - GF_ASSERT (volinfo); - GF_ASSERT (brickinfo); - - this = THIS; - GF_ASSERT (this); - - priv = this->private; - GLUSTERD_GET_VOLUME_DIR (path, volinfo, priv); - glusterd_set_brick_socket_filepath (volinfo, brickinfo, socketpath, - sizeof (socketpath)); - ret = unlink (socketpath); - if (ret && (ENOENT == errno)) { - ret = 0; - } else { - gf_log ("glusterd", GF_LOG_ERROR, "Failed to remove %s" - " error: %s", socketpath, strerror (errno)); - } +glusterd_brick_unlink_socket_file(glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo) +{ + char path[PATH_MAX] = ""; + char socketpath[PATH_MAX] = ""; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; - return ret; + GF_ASSERT(volinfo); + GF_ASSERT(brickinfo); + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + GLUSTERD_GET_VOLUME_DIR(path, volinfo, priv); + glusterd_set_brick_socket_filepath(volinfo, brickinfo, socketpath, + sizeof(socketpath)); + + return glusterd_unlink_file(socketpath); } int32_t -glusterd_brick_disconnect (glusterd_brickinfo_t *brickinfo) +glusterd_brick_disconnect(glusterd_brickinfo_t *brickinfo) { - GF_ASSERT (brickinfo); + rpc_clnt_t *rpc = NULL; + glusterd_conf_t *priv = THIS->private; - if (brickinfo->rpc) { - rpc_clnt_unref (brickinfo->rpc); - brickinfo->rpc = NULL; - } - return 0; + GF_ASSERT(brickinfo); + + if (!brickinfo) { + gf_msg_callingfn("glusterd", GF_LOG_WARNING, EINVAL, + GD_MSG_BRICK_NOT_FOUND, "!brickinfo"); + return -1; + } + + rpc = brickinfo->rpc; + brickinfo->rpc = NULL; + + if (rpc) { + glusterd_rpc_clnt_unref(priv, rpc); + } + + return 0; } -int32_t -glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo) +static gf_boolean_t +unsafe_option(dict_t *this, char *key, data_t *value, void *arg) { - xlator_t *this = NULL; - glusterd_conf_t *priv = NULL; - char pidfile[PATH_MAX] = {0,}; - char path[PATH_MAX] = {0,}; - int ret = 0; + /* + * Certain options are safe because they're already being handled other + * ways, such as being copied down to the bricks (all auth options) or + * being made irrelevant (event-threads). All others are suspect and + * must be checked in the next function. + */ + if (fnmatch("*auth*", key, 0) == 0) { + return _gf_false; + } - GF_ASSERT (volinfo); - GF_ASSERT (brickinfo); + if (fnmatch("*event-threads", key, 0) == 0) { + return _gf_false; + } - this = THIS; - GF_ASSERT (this); + if (fnmatch("*diagnostics.brick-log*", key, 0) == 0) { + return _gf_false; + } - priv = this->private; - (void) glusterd_brick_disconnect (brickinfo); + if (fnmatch("*diagnostics.client-log*", key, 0) == 0) { + return _gf_false; + } + if (fnmatch("user.*", key, 0) == 0) { + return _gf_false; + } - GLUSTERD_GET_VOLUME_DIR (path, volinfo, priv); - GLUSTERD_GET_BRICK_PIDFILE (pidfile, path, brickinfo->hostname, - brickinfo->path); + return _gf_true; +} - ret = glusterd_service_stop ("brick", pidfile, SIGTERM, _gf_false); - if (ret == 0) { - glusterd_set_brick_status (brickinfo, GF_BRICK_STOPPED); - ret = glusterd_brick_unlink_socket_file (volinfo, brickinfo); +static int +opts_mismatch(dict_t *dict1, char *key, data_t *value1, void *dict2) +{ + data_t *value2 = dict_get(dict2, key); + int32_t min_len; + + /* + * If the option is only present on one, we can either look at the + * default or assume a mismatch. Looking at the default is pretty + * hard, because that's part of a structure within each translator and + * there's no dlopen interface to get at it, so we assume a mismatch. + * If the user really wants them to match (and for their bricks to be + * multiplexed, they can always reset the option). + */ + if (!value2) { + gf_log(THIS->name, GF_LOG_DEBUG, "missing option %s", key); + return -1; + } + + min_len = MIN(value1->len, value2->len); + if (strncmp(value1->data, value2->data, min_len) != 0) { + gf_log(THIS->name, GF_LOG_DEBUG, "option mismatch, %s, %s != %s", key, + value1->data, value2->data); + return -1; + } + + return 0; +} + +int +glusterd_brickprocess_delete(glusterd_brick_proc_t *brick_proc) +{ + cds_list_del_init(&brick_proc->brick_proc_list); + cds_list_del_init(&brick_proc->bricks); + + GF_FREE(brick_proc); + + return 0; +} + +int +glusterd_brick_process_remove_brick(glusterd_brickinfo_t *brickinfo, + int *last_brick) +{ + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + glusterd_brick_proc_t *brick_proc = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, out); + GF_VALIDATE_OR_GOTO(this->name, brickinfo, out); + + brick_proc = brickinfo->brick_proc; + if (!brick_proc) { + if (brickinfo->status != GF_BRICK_STARTED) { + /* this function will be called from gluster_pmap_signout and + * glusterd_volume_stop_glusterfs. So it is possible to have + * brick_proc set as null. + */ + ret = 0; } - return ret; + goto out; + } + + GF_VALIDATE_OR_GOTO(this->name, (brick_proc->brick_count > 0), out); + + cds_list_del_init(&brickinfo->mux_bricks); + brick_proc->brick_count--; + + /* If all bricks have been removed, delete the brick process */ + if (brick_proc->brick_count == 0) { + if (last_brick != NULL) + *last_brick = 1; + ret = glusterd_brickprocess_delete(brick_proc); + if (ret) + goto out; + } + brickinfo->brick_proc = NULL; + ret = 0; +out: + return ret; } -int32_t -glusterd_peer_hostname_new (char *hostname, glusterd_peer_hostname_t **name) +int +glusterd_brick_process_add_brick(glusterd_brickinfo_t *brickinfo, + glusterd_brickinfo_t *parent_brickinfo) { - glusterd_peer_hostname_t *peer_hostname = NULL; - int32_t ret = -1; + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + glusterd_brick_proc_t *brick_proc = NULL; - GF_ASSERT (hostname); - GF_ASSERT (name); + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); - peer_hostname = GF_CALLOC (1, sizeof (*peer_hostname), - gf_gld_mt_peer_hostname_t); + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, out); + GF_VALIDATE_OR_GOTO(this->name, brickinfo, out); - if (!peer_hostname) + if (!parent_brickinfo) { + ret = glusterd_brick_proc_for_port(brickinfo->port, &brick_proc); + if (ret) { + ret = glusterd_brickprocess_new(&brick_proc); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICKPROC_NEW_FAILED, + "Failed to create " + "new brick process instance"); goto out; + } - peer_hostname->hostname = gf_strdup (hostname); - INIT_LIST_HEAD (&peer_hostname->hostname_list); + brick_proc->port = brickinfo->port; - *name = peer_hostname; + cds_list_add_tail(&brick_proc->brick_proc_list, &priv->brick_procs); + } + } else { ret = 0; + brick_proc = parent_brickinfo->brick_proc; + } + cds_list_add_tail(&brickinfo->mux_bricks, &brick_proc->bricks); + brickinfo->brick_proc = brick_proc; + brick_proc->brick_count++; out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + return ret; } +/* ret = 0 only when you get a brick process associated with the port + * ret = -1 otherwise + */ int -glusterd_volume_compute_cksum (glusterd_volinfo_t *volinfo) +glusterd_brick_proc_for_port(int port, glusterd_brick_proc_t **brickprocess) { - int32_t ret = -1; - glusterd_conf_t *priv = NULL; - char path[PATH_MAX] = {0,}; - char cksum_path[PATH_MAX] = {0,}; - char filepath[PATH_MAX] = {0,}; - int fd = -1; - uint32_t cksum = 0; - char buf[4096] = {0,}; - char sort_filepath[PATH_MAX] = {0}; - gf_boolean_t unlink_sortfile = _gf_false; - char sort_cmd[2*PATH_MAX + 32]; - int sort_fd = 0; + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + glusterd_brick_proc_t *brick_proc = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, out); + + cds_list_for_each_entry(brick_proc, &priv->brick_procs, brick_proc_list) + { + if (brick_proc->port == port) { + *brickprocess = brick_proc; + ret = 0; + break; + } + } +out: + return ret; +} - GF_ASSERT (volinfo); +int32_t +glusterd_volume_stop_glusterfs(glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo, + gf_boolean_t del_brick) +{ + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + int ret = -1; + char *op_errstr = NULL; + char pidfile[PATH_MAX] = ""; + int last_brick = -1; + + GF_ASSERT(volinfo); + GF_ASSERT(brickinfo); + + this = THIS; + GF_ASSERT(this); + + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, out); + + ret = glusterd_brick_process_remove_brick(brickinfo, &last_brick); + if (ret) { + gf_msg_debug(this->name, 0, + "Couldn't remove brick from" + " brick process"); + goto out; + } + + if (del_brick) + cds_list_del_init(&brickinfo->brick_list); + + if (GLUSTERD_STATUS_STARTED == volinfo->status) { + /* + * In a post-multiplexing world, even if we're not actually + * doing any multiplexing, just dropping the RPC connection + * isn't enough. There might be many such connections during + * the brick daemon's lifetime, even if we only consider the + * management RPC port (because tests etc. might be manually + * attaching and detaching bricks). Therefore, we have to send + * an actual signal instead. + */ + if (is_brick_mx_enabled() && last_brick != 1) { + ret = send_attach_req(this, brickinfo->rpc, brickinfo->path, NULL, + NULL, GLUSTERD_BRICK_TERMINATE); + if (ret && brickinfo->status == GF_BRICK_STARTED) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_STOP_FAIL, + "Failed to send" + " detach request for brick %s", + brickinfo->path); + goto out; + } + gf_log(this->name, GF_LOG_INFO, + "Detach request for " + "brick %s:%s is sent successfully", + brickinfo->hostname, brickinfo->path); - priv = THIS->private; - GF_ASSERT (priv); + } else { + gf_msg_debug(this->name, 0, + "About to stop glusterfsd" + " for brick %s:%s", + brickinfo->hostname, brickinfo->path); + ret = glusterd_brick_terminate(volinfo, brickinfo, NULL, 0, + &op_errstr); + if (ret && brickinfo->status == GF_BRICK_STARTED) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_STOP_FAIL, + "Failed to kill" + " the brick %s", + brickinfo->path); + goto out; + } + + if (op_errstr) { + GF_FREE(op_errstr); + } + if (is_brick_mx_enabled()) { + /* In case of brick multiplexing we need to make + * sure the port is cleaned up from here as the + * RPC connection may not have been originated + * for the same brick instance + */ + pmap_registry_remove(THIS, brickinfo->port, brickinfo->path, + GF_PMAP_PORT_BRICKSERVER, NULL, _gf_true); + } + } - GLUSTERD_GET_VOLUME_DIR (path, volinfo, priv); + (void)glusterd_brick_disconnect(brickinfo); + ret = 0; + } - snprintf (cksum_path, sizeof (cksum_path), "%s/%s", - path, GLUSTERD_CKSUM_FILE); + GLUSTERD_GET_BRICK_PIDFILE(pidfile, volinfo, brickinfo, conf); + gf_msg_debug(this->name, 0, "Unlinking pidfile %s", pidfile); + (void)sys_unlink(pidfile); - fd = open (cksum_path, O_RDWR | O_APPEND | O_CREAT| O_TRUNC, 0644); + brickinfo->status = GF_BRICK_STOPPED; + brickinfo->start_triggered = _gf_false; + brickinfo->brick_proc = NULL; + if (del_brick) + glusterd_delete_brick(volinfo, brickinfo); +out: + return ret; +} - if (-1 == fd) { - gf_log ("", GF_LOG_ERROR, "Unable to open %s, errno: %d", - cksum_path, errno); - ret = -1; +/* Free LINE[0..N-1] and then the LINE buffer. */ +static void +free_lines(char **line, size_t n) +{ + size_t i; + for (i = 0; i < n; i++) + GF_FREE(line[i]); + GF_FREE(line); +} + +static char ** +glusterd_readin_file(const char *filepath, int *line_count) +{ + int ret = -1; + int n = 8; + int counter = 0; + char buffer[PATH_MAX + 256] = ""; + char **lines = NULL; + FILE *fp = NULL; + void *p; + + fp = fopen(filepath, "r"); + if (!fp) + goto out; + + lines = GF_CALLOC(1, n * sizeof(*lines), gf_gld_mt_charptr); + if (!lines) + goto out; + + for (counter = 0; fgets(buffer, sizeof(buffer), fp); counter++) { + if (counter == n - 1) { + n *= 2; + p = GF_REALLOC(lines, n * sizeof(char *)); + if (!p) { + free_lines(lines, n / 2); + lines = NULL; goto out; + } + lines = p; } - snprintf (filepath, sizeof (filepath), "%s/%s", path, - GLUSTERD_VOLUME_INFO_FILE); - snprintf (sort_filepath, sizeof (sort_filepath), "/tmp/%s.XXXXXX", - volinfo->volname); - sort_fd = mkstemp (sort_filepath); - if (sort_fd < 0) { - gf_log ("", GF_LOG_ERROR, "Could not generate temp file, " - "reason: %s for volume: %s", strerror (errno), - volinfo->volname); - goto out; + lines[counter] = gf_strdup(buffer); + } + + lines[counter] = NULL; + /* Reduce allocation to minimal size. */ + p = GF_REALLOC(lines, (counter + 1) * sizeof(char *)); + if (!p) { + /* coverity[TAINTED_SCALAR] */ + free_lines(lines, counter); + lines = NULL; + goto out; + } + lines = p; + + *line_count = counter; + ret = 0; + +out: + if (ret) + gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_READIN_FILE_FAILED, "%s", + strerror(errno)); + if (fp) + fclose(fp); + + return lines; +} + +int +glusterd_compare_lines(const void *a, const void *b) +{ + return strcmp(*(char *const *)a, *(char *const *)b); +} + +static int +glusterd_sort_and_redirect(const char *src_filepath, int dest_fd) +{ + int ret = -1; + int line_count = 0; + int counter = 0; + char **lines = NULL; + + if (!src_filepath || dest_fd < 0) + goto out; + + lines = glusterd_readin_file(src_filepath, &line_count); + if (!lines) + goto out; + + qsort(lines, line_count, sizeof(*lines), glusterd_compare_lines); + + for (counter = 0; lines[counter]; counter++) { + ret = sys_write(dest_fd, lines[counter], strlen(lines[counter])); + if (ret < 0) + goto out; + + GF_FREE(lines[counter]); + } + + ret = 0; +out: + GF_FREE(lines); + + return ret; +} + +static int +glusterd_volume_compute_cksum(glusterd_volinfo_t *volinfo, char *cksum_path, + char *filepath, gf_boolean_t is_quota_conf, + uint32_t *cs) +{ + int32_t ret = -1; + uint32_t cksum = 0; + int fd = -1; + int sort_fd = 0; + char sort_filepath[PATH_MAX] = ""; + char buf[32]; + gf_boolean_t unlink_sortfile = _gf_false; + glusterd_conf_t *priv = THIS->private; + xlator_t *this = THIS; + mode_t orig_umask = 0; + + GF_ASSERT(volinfo); + GF_ASSERT(priv); + + fd = open(cksum_path, O_RDWR | O_APPEND | O_CREAT | O_TRUNC, 0600); + if (-1 == fd) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "Unable to open %s," + " errno: %d", + cksum_path, errno); + ret = -1; + goto out; + } + + if (!is_quota_conf) { + snprintf(sort_filepath, sizeof(sort_filepath), "/tmp/%s.XXXXXX", + volinfo->volname); + + orig_umask = umask(S_IRWXG | S_IRWXO); + sort_fd = mkstemp(sort_filepath); + umask(orig_umask); + if (-1 == sort_fd) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "Could not generate " + "temp file, reason: %s for volume: %s", + strerror(errno), volinfo->volname); + goto out; } else { - unlink_sortfile = _gf_true; + unlink_sortfile = _gf_true; } - snprintf (sort_cmd, sizeof (sort_cmd), "sort %s -o %s", - filepath, sort_filepath); - ret = system (sort_cmd); + /* sort the info file, result in sort_filepath */ + + ret = glusterd_sort_and_redirect(filepath, sort_fd); if (ret) { - gf_log ("", GF_LOG_ERROR, "failed to sort file %s to %s", - filepath, sort_filepath); - goto out; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED, + "sorting info file " + "failed"); + goto out; } - ret = get_checksum_for_path (sort_filepath, &cksum); + ret = sys_close(sort_fd); + if (ret) + goto out; + + ret = get_checksum_for_path(sort_filepath, &cksum, priv->op_version); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get checksum" - " for path: %s", sort_filepath); - goto out; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CKSUM_GET_FAIL, + "unable to get " + "checksum for path: %s", + sort_filepath); + goto out; } - snprintf (buf, sizeof (buf), "%s=%u\n", "info", cksum); - ret = write (fd, buf, strlen (buf)); - + ret = snprintf(buf, sizeof(buf), "info=%u\n", cksum); + ret = sys_write(fd, buf, ret); if (ret <= 0) { - ret = -1; - goto out; + ret = -1; + goto out; } + } else if (priv->op_version < GD_OP_VERSION_7_0) { + ret = get_checksum_for_path(filepath, &cksum, priv->op_version); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CKSUM_GET_FAIL, + "unable to get " + "checksum for path: %s", + filepath); + goto out; + } + } - ret = get_checksum_for_file (fd, &cksum); - - if (ret) - goto out; + ret = get_checksum_for_file(fd, &cksum, priv->op_version); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CKSUM_GET_FAIL, + "unable to get checksum for path: %s", filepath); + goto out; + } - volinfo->cksum = cksum; + *cs = cksum; out: - if (fd > 0) - close (fd); - if (sort_fd > 0) - close (sort_fd); - if (unlink_sortfile) - unlink (sort_filepath); - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + if (fd != -1) + sys_close(fd); + if (unlink_sortfile) + sys_unlink(sort_filepath); + gf_msg_debug(this->name, 0, "Returning with %d", ret); - return ret; + return ret; } -void -_add_volume_option_to_dict (dict_t *this, char *key, data_t *value, void *data) +int +glusterd_compute_cksum(glusterd_volinfo_t *volinfo, gf_boolean_t is_quota_conf) { - int exists = 0; - glusterd_volopt_ctx_t *ctx = NULL; - char optkey[512] = {0,}; - int ret = -1; - - exists = glusterd_check_option_exists (key, NULL); - if (0 == exists) - return; + int ret = -1; + uint32_t cs = 0; + char cksum_path[PATH_MAX] = ""; + char path[PATH_MAX] = ""; + char filepath[PATH_MAX] = ""; + glusterd_conf_t *conf = NULL; + xlator_t *this = NULL; + int32_t len1 = 0; + int32_t len2 = 0; + + this = THIS; + GF_ASSERT(this); + conf = this->private; + GF_ASSERT(conf); + + GLUSTERD_GET_VOLUME_DIR(path, volinfo, conf); + + if (is_quota_conf) { + len1 = snprintf(cksum_path, sizeof(cksum_path), "%s/%s", path, + GLUSTERD_VOL_QUOTA_CKSUM_FILE); + len2 = snprintf(filepath, sizeof(filepath), "%s/%s", path, + GLUSTERD_VOLUME_QUOTA_CONFIG); + } else { + len1 = snprintf(cksum_path, sizeof(cksum_path), "%s/%s", path, + GLUSTERD_CKSUM_FILE); + len2 = snprintf(filepath, sizeof(filepath), "%s/%s", path, + GLUSTERD_VOLUME_INFO_FILE); + } + if ((len1 < 0) || (len2 < 0) || (len1 >= sizeof(cksum_path)) || + (len2 >= sizeof(filepath))) { + goto out; + } + + ret = glusterd_volume_compute_cksum(volinfo, cksum_path, filepath, + is_quota_conf, &cs); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CKSUM_COMPUTE_FAIL, + "Failed to compute checksum " + "for volume %s", + volinfo->volname); + goto out; + } + + if (is_quota_conf) + volinfo->quota_conf_cksum = cs; + else + volinfo->cksum = cs; + + ret = 0; +out: + return ret; +} - ctx = data; - snprintf (optkey, sizeof (optkey), "volume%d.key%d", ctx->count, - ctx->opt_count); - ret = dict_set_str (ctx->dict, optkey, key); - if (ret) - gf_log ("", GF_LOG_ERROR, "option add for key%d %s", - ctx->count, key); - snprintf (optkey, sizeof (optkey), "volume%d.value%d", ctx->count, - ctx->opt_count); - ret = dict_set_str (ctx->dict, optkey, value->data); - if (ret) - gf_log ("", GF_LOG_ERROR, "option add for value%d %s", - ctx->count, value->data); - ctx->opt_count++; +static int +_add_dict_to_prdict(dict_t *this, char *key, data_t *value, void *data) +{ + glusterd_dict_ctx_t *ctx = data; + char optkey[64]; /* optkey are usually quite small */ + int ret = -1; - return; + ret = snprintf(optkey, sizeof(optkey), "%s.%s%d", ctx->prefix, + ctx->key_name, ctx->opt_count); + if (ret < 0 || ret >= sizeof(optkey)) + return -1; + ret = dict_set_strn(ctx->dict, optkey, ret, key); + if (ret) + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "option add for %s%d %s", ctx->key_name, ctx->opt_count, key); + ret = snprintf(optkey, sizeof(optkey), "%s.%s%d", ctx->prefix, + ctx->val_name, ctx->opt_count); + if (ret < 0 || ret >= sizeof(optkey)) + return -1; + ret = dict_set_strn(ctx->dict, optkey, ret, value->data); + if (ret) + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "option add for %s%d %s", ctx->val_name, ctx->opt_count, + value->data); + ctx->opt_count++; + + return ret; } int32_t -glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, - dict_t *dict, int32_t count) +glusterd_add_bricks_hname_path_to_dict(dict_t *dict, + glusterd_volinfo_t *volinfo) { - int32_t ret = -1; - char key[512] = {0,}; - glusterd_brickinfo_t *brickinfo = NULL; - int32_t i = 1; - char *volume_id_str = NULL; - glusterd_volopt_ctx_t ctx = {0}; + glusterd_brickinfo_t *brickinfo = NULL; + int ret = 0; + char key[64] = ""; + int index = 0; + + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + ret = snprintf(key, sizeof(key), "%d-hostname", index); + ret = dict_set_strn(dict, key, ret, brickinfo->hostname); + if (ret) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + goto out; + } - GF_ASSERT (dict); - GF_ASSERT (volinfo); + ret = snprintf(key, sizeof(key), "%d-path", index); + ret = dict_set_strn(dict, key, ret, brickinfo->path); + if (ret) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + goto out; + } - snprintf (key, sizeof (key), "volume%d.name", count); - ret = dict_set_str (dict, key, volinfo->volname); + index++; + } +out: + return ret; +} + +/* The prefix represents the type of volume to be added. + * It will be "volume" for normal volumes, and snap# like + * snap1, snap2, for snapshot volumes + */ +int32_t +glusterd_add_volume_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict, + int32_t count, char *prefix) +{ + int32_t ret = -1; + char pfx[32] = ""; /* prefix should be quite small */ + char key[64] = ""; + int keylen; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_brickinfo_t *ta_brickinfo = NULL; + int32_t i = 1; + char *volume_id_str = NULL; + char *str = NULL; + glusterd_dict_ctx_t ctx = {0}; + char *rebalance_id_str = NULL; + char *rb_id_str = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(dict); + GF_ASSERT(volinfo); + GF_ASSERT(prefix); + + ret = snprintf(pfx, sizeof(pfx), "%s%d", prefix, count); + if (ret < 0 || ret >= sizeof(pfx)) { + ret = -1; + goto out; + } + + keylen = snprintf(key, sizeof(key), "%s.name", pfx); + ret = dict_set_strn(dict, key, keylen, volinfo->volname); + if (ret) + goto out; + + keylen = snprintf(key, sizeof(key), "%s.type", pfx); + ret = dict_set_int32n(dict, key, keylen, volinfo->type); + if (ret) + goto out; + + keylen = snprintf(key, sizeof(key), "%s.brick_count", pfx); + ret = dict_set_int32n(dict, key, keylen, volinfo->brick_count); + if (ret) + goto out; + + keylen = snprintf(key, sizeof(key), "%s.version", pfx); + ret = dict_set_int32n(dict, key, keylen, volinfo->version); + if (ret) + goto out; + + keylen = snprintf(key, sizeof(key), "%s.status", pfx); + ret = dict_set_int32n(dict, key, keylen, volinfo->status); + if (ret) + goto out; + + keylen = snprintf(key, sizeof(key), "%s.sub_count", pfx); + ret = dict_set_int32n(dict, key, keylen, volinfo->sub_count); + if (ret) + goto out; + + keylen = snprintf(key, sizeof(key), "%s.subvol_count", pfx); + ret = dict_set_int32n(dict, key, keylen, volinfo->subvol_count); + if (ret) + goto out; + + keylen = snprintf(key, sizeof(key), "%s.stripe_count", pfx); + ret = dict_set_int32n(dict, key, keylen, volinfo->stripe_count); + if (ret) + goto out; + + keylen = snprintf(key, sizeof(key), "%s.replica_count", pfx); + ret = dict_set_int32n(dict, key, keylen, volinfo->replica_count); + if (ret) + goto out; + + keylen = snprintf(key, sizeof(key), "%s.arbiter_count", pfx); + ret = dict_set_int32n(dict, key, keylen, volinfo->arbiter_count); + if (ret) + goto out; + + keylen = snprintf(key, sizeof(key), "%s.thin_arbiter_count", pfx); + ret = dict_set_int32n(dict, key, keylen, volinfo->thin_arbiter_count); + if (ret) + goto out; + + keylen = snprintf(key, sizeof(key), "%s.disperse_count", pfx); + ret = dict_set_int32n(dict, key, keylen, volinfo->disperse_count); + if (ret) + goto out; + + keylen = snprintf(key, sizeof(key), "%s.redundancy_count", pfx); + ret = dict_set_int32n(dict, key, keylen, volinfo->redundancy_count); + if (ret) + goto out; + + keylen = snprintf(key, sizeof(key), "%s.dist_count", pfx); + ret = dict_set_int32n(dict, key, keylen, volinfo->dist_leaf_count); + if (ret) + goto out; + + snprintf(key, sizeof(key), "%s.ckusm", pfx); + ret = dict_set_int64(dict, key, volinfo->cksum); + if (ret) + goto out; + + snprintf(key, sizeof(key), "%s.transport_type", pfx); + ret = dict_set_uint32(dict, key, volinfo->transport_type); + if (ret) + goto out; + + snprintf(key, sizeof(key), "%s.stage_deleted", pfx); + ret = dict_set_uint32(dict, key, (uint32_t)volinfo->stage_deleted); + if (ret) + goto out; + + ret = gd_add_vol_snap_details_to_dict(dict, pfx, volinfo); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "vol snap details", NULL); + goto out; + } + + volume_id_str = gf_strdup(uuid_utoa(volinfo->volume_id)); + if (!volume_id_str) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED, + "volume id=%s", volinfo->volume_id, NULL); + ret = -1; + goto out; + } + keylen = snprintf(key, sizeof(key), "%s.volume_id", pfx); + ret = dict_set_dynstrn(dict, key, keylen, volume_id_str); + if (ret) + goto out; + volume_id_str = NULL; + + keylen = snprintf(key, sizeof(key), "%s.username", pfx); + str = glusterd_auth_get_username(volinfo); + if (str) { + ret = dict_set_dynstrn(dict, key, keylen, gf_strdup(str)); if (ret) - goto out; + goto out; + } - memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.type", count); - ret = dict_set_int32 (dict, key, volinfo->type); + keylen = snprintf(key, sizeof(key), "%s.password", pfx); + str = glusterd_auth_get_password(volinfo); + if (str) { + ret = dict_set_dynstrn(dict, key, keylen, gf_strdup(str)); if (ret) - goto out; + goto out; + } + + keylen = snprintf(key, sizeof(key), "%s.rebalance", pfx); + ret = dict_set_int32n(dict, key, keylen, volinfo->rebal.defrag_cmd); + if (ret) + goto out; + + rebalance_id_str = gf_strdup(uuid_utoa(volinfo->rebal.rebalance_id)); + if (!rebalance_id_str) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED, + "rebalance_id=%s", volinfo->rebal.rebalance_id, NULL); + ret = -1; + goto out; + } + keylen = snprintf(key, sizeof(key), "%s.rebalance-id", pfx); + ret = dict_set_dynstrn(dict, key, keylen, rebalance_id_str); + if (ret) + goto out; + rebalance_id_str = NULL; + + snprintf(key, sizeof(key), "%s.rebalance-op", pfx); + ret = dict_set_uint32(dict, key, volinfo->rebal.op); + if (ret) + goto out; + + if (volinfo->rebal.dict) { + ctx.dict = dict; + ctx.prefix = pfx; + ctx.opt_count = 1; + ctx.key_name = "rebal-dict-key"; + ctx.val_name = "rebal-dict-value"; - memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.brick_count", count); - ret = dict_set_int32 (dict, key, volinfo->brick_count); + dict_foreach(volinfo->rebal.dict, _add_dict_to_prdict, &ctx); + ctx.opt_count--; + keylen = snprintf(key, sizeof(key), "volume%d.rebal-dict-count", count); + ret = dict_set_int32n(dict, key, keylen, ctx.opt_count); if (ret) - goto out; + goto out; + } + + ctx.dict = dict; + ctx.prefix = pfx; + ctx.opt_count = 1; + ctx.key_name = "key"; + ctx.val_name = "value"; + GF_ASSERT(volinfo->dict); + + dict_foreach(volinfo->dict, _add_dict_to_prdict, &ctx); + ctx.opt_count--; + keylen = snprintf(key, sizeof(key), "%s.opt-count", pfx); + ret = dict_set_int32n(dict, key, keylen, ctx.opt_count); + if (ret) + goto out; + + ctx.dict = dict; + ctx.prefix = pfx; + ctx.opt_count = 1; + ctx.key_name = "slave-num"; + ctx.val_name = "slave-val"; + GF_ASSERT(volinfo->gsync_slaves); + + dict_foreach(volinfo->gsync_slaves, _add_dict_to_prdict, &ctx); + ctx.opt_count--; + + keylen = snprintf(key, sizeof(key), "%s.gsync-count", pfx); + ret = dict_set_int32n(dict, key, keylen, ctx.opt_count); + if (ret) + goto out; + + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + keylen = snprintf(key, sizeof(key), "%s.brick%d.hostname", pfx, i); + ret = dict_set_strn(dict, key, keylen, brickinfo->hostname); + if (ret) + goto out; - memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.version", count); - ret = dict_set_int32 (dict, key, volinfo->version); + keylen = snprintf(key, sizeof(key), "%s.brick%d.path", pfx, i); + ret = dict_set_strn(dict, key, keylen, brickinfo->path); if (ret) - goto out; + goto out; - memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.status", count); - ret = dict_set_int32 (dict, key, volinfo->status); + keylen = snprintf(key, sizeof(key), "%s.brick%d.decommissioned", pfx, + i); + ret = dict_set_int32n(dict, key, keylen, brickinfo->decommissioned); if (ret) - goto out; + goto out; - memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.sub_count", count); - ret = dict_set_int32 (dict, key, volinfo->sub_count); + keylen = snprintf(key, sizeof(key), "%s.brick%d.brick_id", pfx, i); + ret = dict_set_strn(dict, key, keylen, brickinfo->brick_id); if (ret) - goto out; + goto out; - memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.ckusm", count); - ret = dict_set_int64 (dict, key, volinfo->cksum); + snprintf(key, sizeof(key), "%s.brick%d.uuid", pfx, i); + ret = dict_set_dynstr_with_alloc(dict, key, uuid_utoa(brickinfo->uuid)); if (ret) - goto out; + goto out; - memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.transport_type", count); - ret = dict_set_uint32 (dict, key, volinfo->transport_type); + snprintf(key, sizeof(key), "%s.brick%d", pfx, i); + ret = gd_add_brick_snap_details_to_dict(dict, key, brickinfo); if (ret) + goto out; + + i++; + } + + i = 1; + if (volinfo->thin_arbiter_count == 1) { + cds_list_for_each_entry(ta_brickinfo, &volinfo->ta_bricks, brick_list) + { + keylen = snprintf(key, sizeof(key), "%s.ta-brick%d.hostname", pfx, + i); + ret = dict_set_strn(dict, key, keylen, ta_brickinfo->hostname); + if (ret) goto out; - volume_id_str = gf_strdup (uuid_utoa (volinfo->volume_id)); - if (!volume_id_str) + keylen = snprintf(key, sizeof(key), "%s.ta-brick%d.path", pfx, i); + ret = dict_set_strn(dict, key, keylen, ta_brickinfo->path); + if (ret) goto out; - memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.volume_id", count); - ret = dict_set_dynstr (dict, key, volume_id_str); - if (ret) + keylen = snprintf(key, sizeof(key), "%s.ta-brick%d.decommissioned", + pfx, i); + ret = dict_set_int32n(dict, key, keylen, + ta_brickinfo->decommissioned); + if (ret) goto out; - ctx.dict = dict; - ctx.count = count; - ctx.opt_count = 1; - GF_ASSERT (volinfo->dict); + keylen = snprintf(key, sizeof(key), "%s.ta-brick%d.brick_id", pfx, + i); + ret = dict_set_strn(dict, key, keylen, ta_brickinfo->brick_id); + if (ret) + goto out; - dict_foreach (volinfo->dict, _add_volume_option_to_dict, &ctx); - ctx.opt_count--; - memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.opt-count", count); - ret = dict_set_int32 (dict, key, ctx.opt_count); - if (ret) + snprintf(key, sizeof(key), "%s.ta-brick%d.uuid", pfx, i); + ret = dict_set_dynstr_with_alloc(dict, key, + uuid_utoa(ta_brickinfo->uuid)); + if (ret) goto out; - list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { - memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.brick%d.hostname", - count, i); - ret = dict_set_str (dict, key, brickinfo->hostname); - if (ret) - goto out; + i++; + } + } + + /* Add volume op-versions to dict. This prevents volume inconsistencies + * in the cluster + */ + keylen = snprintf(key, sizeof(key), "%s.op-version", pfx); + ret = dict_set_int32n(dict, key, keylen, volinfo->op_version); + if (ret) + goto out; + keylen = snprintf(key, sizeof(key), "%s.client-op-version", pfx); + ret = dict_set_int32n(dict, key, keylen, volinfo->client_op_version); + if (ret) + goto out; + + keylen = snprintf(key, sizeof(key), "%s.quota-xattr-version", pfx); + ret = dict_set_int32n(dict, key, keylen, volinfo->quota_xattr_version); +out: + GF_FREE(volume_id_str); + GF_FREE(rebalance_id_str); + GF_FREE(rb_id_str); + + if (key[0] != '\0' && ret != 0) + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + gf_msg_debug(this->name, 0, "Returning with %d", ret); + return ret; +} - memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.brick%d.path", - count, i); - ret = dict_set_str (dict, key, brickinfo->path); - if (ret) - goto out; +/* The prefix represents the type of volume to be added. + * It will be "volume" for normal volumes, and snap# like + * snap1, snap2, for snapshot volumes + */ +int +glusterd_vol_add_quota_conf_to_dict(glusterd_volinfo_t *volinfo, dict_t *load, + int vol_idx, char *prefix) +{ + int fd = -1; + unsigned char buf[16] = ""; + char key[64]; + char key_prefix[32]; + int gfid_idx = 0; + int ret = -1; + xlator_t *this = NULL; + char type = 0; + float version = 0.0f; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(prefix); + + ret = glusterd_store_create_quota_conf_sh_on_absence(volinfo); + if (ret) + goto out; + + fd = open(volinfo->quota_conf_shandle->path, O_RDONLY); + if (fd == -1) { + ret = -1; + goto out; + } + + ret = quota_conf_read_version(fd, &version); + if (ret) + goto out; - i++; + ret = snprintf(key_prefix, sizeof(key_prefix), "%s%d", prefix, vol_idx); + if (ret < 0 || ret >= sizeof(key_prefix)) { + ret = -1; + goto out; + } + for (gfid_idx = 0;; gfid_idx++) { + ret = quota_conf_read_gfid(fd, buf, &type, version); + if (ret == 0) { + break; + } else if (ret < 0) { + gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_QUOTA_CONF_CORRUPT, + "Quota " + "configuration store may be corrupt."); + goto out; } + snprintf(key, sizeof(key) - 1, "%s.gfid%d", key_prefix, gfid_idx); + ret = dict_set_dynstr_with_alloc(load, key, uuid_utoa(buf)); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + goto out; + } + snprintf(key, sizeof(key) - 1, "%s.gfid-type%d", key_prefix, gfid_idx); + ret = dict_set_int8(load, key, type); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + goto out; + } + } + + ret = snprintf(key, sizeof(key), "%s.gfid-count", key_prefix); + ret = dict_set_int32n(load, key, ret, gfid_idx); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + goto out; + } + + snprintf(key, sizeof(key), "%s.quota-cksum", key_prefix); + ret = dict_set_uint32(load, key, volinfo->quota_conf_cksum); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + goto out; + } + + snprintf(key, sizeof(key), "%s.quota-version", key_prefix); + ret = dict_set_uint32(load, key, volinfo->quota_conf_version); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + goto out; + } + + ret = 0; out: - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + if (fd != -1) + sys_close(fd); + return ret; +} - return ret; +void * +glusterd_add_bulk_volumes_create_thread(void *data) +{ + int32_t ret = -1; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *volinfo = NULL; + int32_t count = 0; + xlator_t *this = NULL; + glusterd_add_dict_args_t *arg = NULL; + dict_t *dict = NULL; + int start = 0; + int end = 0; + + GF_ASSERT(data); + + arg = data; + dict = arg->voldict; + start = arg->start; + end = arg->end; + this = arg->this; + THIS = arg->this; + priv = this->private; + GF_ASSERT(priv); + + cds_list_for_each_entry(volinfo, &priv->volumes, vol_list) + { + count++; + + /* Skip volumes if index count is less than start + index to handle volume for specific thread + */ + if (count < start) + continue; + + /* No need to process volume if index count is greater + than end index + */ + if (count > end) + break; + + ret = glusterd_add_volume_to_dict(volinfo, dict, count, "volume"); + if (ret) + goto out; + if (!dict_get_sizen(volinfo->dict, VKEY_FEATURES_QUOTA)) + continue; + ret = glusterd_vol_add_quota_conf_to_dict(volinfo, dict, count, + "volume"); + if (ret) + goto out; + } + +out: + GF_ATOMIC_DEC(priv->thread_count); + free(arg); + return NULL; } -int32_t -glusterd_build_volume_dict (dict_t **vols) +int +glusterd_dict_searialize(dict_t *dict_arr[], int count, int totcount, char *buf) { - int32_t ret = -1; - dict_t *dict = NULL; - glusterd_conf_t *priv = NULL; - glusterd_volinfo_t *volinfo = NULL; - int32_t count = 0; + int i = 0; + int32_t keylen = 0; + int64_t netword = 0; + data_pair_t *pair = NULL; + int dict_count = 0; + int ret = 0; + + netword = hton32(totcount); + memcpy(buf, &netword, sizeof(netword)); + buf += DICT_HDR_LEN; + + for (i = 0; i < count; i++) { + if (dict_arr[i]) { + dict_count = dict_arr[i]->count; + pair = dict_arr[i]->members_list; + while (dict_count) { + if (!pair) { + gf_msg("glusterd", GF_LOG_ERROR, 0, + LG_MSG_PAIRS_LESS_THAN_COUNT, + "less than count data pairs found!"); + ret = -1; + goto out; + } - priv = THIS->private; + if (!pair->key) { + gf_msg("glusterd", GF_LOG_ERROR, 0, LG_MSG_NULL_PTR, + "pair->key is null!"); + ret = -1; + goto out; + } - dict = dict_new (); + keylen = strlen(pair->key); + netword = hton32(keylen); + memcpy(buf, &netword, sizeof(netword)); + buf += DICT_DATA_HDR_KEY_LEN; + if (!pair->value) { + gf_msg("glusterd", GF_LOG_ERROR, 0, LG_MSG_NULL_PTR, + "pair->value is null!"); + ret = -1; + goto out; + } - if (!dict) - goto out; + netword = hton32(pair->value->len); + memcpy(buf, &netword, sizeof(netword)); + buf += DICT_DATA_HDR_VAL_LEN; - list_for_each_entry (volinfo, &priv->volumes, vol_list) { - count++; - ret = glusterd_add_volume_to_dict (volinfo, dict, count); - if (ret) - goto out; + memcpy(buf, pair->key, keylen); + buf += keylen; + *buf++ = '\0'; + + if (pair->value->data) { + memcpy(buf, pair->value->data, pair->value->len); + buf += pair->value->len; + } + + pair = pair->next; + dict_count--; + } } + } +out: + for (i = 0; i < count; i++) { + if (dict_arr[i]) + dict_unref(dict_arr[i]); + } + return ret; +} - ret = dict_set_int32 (dict, "count", count); - if (ret) +int +glusterd_dict_arr_serialize(dict_t *dict_arr[], int count, char **buf, + u_int *length) +{ + ssize_t len = 0; + int i = 0; + int totcount = 0; + int ret = 0; + + for (i = 0; i < count; i++) { + if (dict_arr[i]) { + len += dict_serialized_length_lk(dict_arr[i]); + totcount += dict_arr[i]->count; + } + } + + // Subtract HDR_LEN except one dictionary + len = len - ((count - 1) * DICT_HDR_LEN); + + *buf = GF_MALLOC(len, gf_common_mt_char); + if (*buf == NULL) { + ret = -ENOMEM; + goto out; + } + + if (length != NULL) { + *length = len; + } + + ret = glusterd_dict_searialize(dict_arr, count, totcount, *buf); + +out: + return ret; +} + +int32_t +glusterd_add_volumes_to_export_dict(dict_t *peer_data, char **buf, + u_int *length) +{ + int32_t ret = -1; + dict_t *dict_arr[128] = { + 0, + }; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *volinfo = NULL; + int32_t count = 0; + glusterd_dict_ctx_t ctx = {0}; + xlator_t *this = NULL; + int totthread = 0; + int volcnt = 0; + int start = 1; + int endindex = 0; + int vol_per_thread_limit = 0; + glusterd_add_dict_args_t *arg = NULL; + pthread_t th_id = { + 0, + }; + int th_ret = 0; + int i = 0; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + /* Count the total number of volumes */ + cds_list_for_each_entry(volinfo, &priv->volumes, vol_list) volcnt++; + + get_gd_vol_thread_limit(&vol_per_thread_limit); + + if ((vol_per_thread_limit == 1) || (vol_per_thread_limit == 0) || + (vol_per_thread_limit > 100)) { + totthread = 0; + } else { + totthread = volcnt / vol_per_thread_limit; + if (totthread) { + endindex = volcnt % vol_per_thread_limit; + if (endindex) + totthread++; + } + } + + if (totthread == 0) { + cds_list_for_each_entry(volinfo, &priv->volumes, vol_list) + { + count++; + ret = glusterd_add_volume_to_dict(volinfo, peer_data, count, + "volume"); + if (ret) goto out; - *vols = dict; + if (!dict_get_sizen(volinfo->dict, VKEY_FEATURES_QUOTA)) + continue; + + ret = glusterd_vol_add_quota_conf_to_dict(volinfo, peer_data, count, + "volume"); + if (ret) + goto out; + } + } else { + for (i = 0; i < totthread; i++) { + arg = calloc(1, sizeof(*arg)); + dict_arr[i] = dict_new(); + arg->this = this; + arg->voldict = dict_arr[i]; + arg->start = start; + if ((i + 1) != totthread) { + arg->end = ((i + 1) * vol_per_thread_limit); + } else { + arg->end = (((i + 1) * vol_per_thread_limit) + endindex); + } + th_ret = gf_thread_create_detached( + &th_id, glusterd_add_bulk_volumes_create_thread, arg, + "bulkvoldict"); + if (th_ret) { + gf_log(this->name, GF_LOG_ERROR, + "glusterd_add_bulk_volume %s" + " thread creation failed", + "bulkvoldict"); + free(arg); + goto out; + } + + start = start + vol_per_thread_limit; + GF_ATOMIC_INC(priv->thread_count); + gf_log(this->name, GF_LOG_INFO, + "Create thread %d to populate dict data for volume" + " start index is %d end index is %d", + (i + 1), arg->start, arg->end); + } + while (GF_ATOMIC_GET(priv->thread_count)) { + sleep(1); + } + + gf_log(this->name, GF_LOG_INFO, + "Finished dictionary population in all threads"); + } + + ret = dict_set_int32n(peer_data, "count", SLEN("count"), volcnt); + if (ret) + goto out; + + ctx.dict = peer_data; + ctx.prefix = "global"; + ctx.opt_count = 1; + ctx.key_name = "key"; + ctx.val_name = "val"; + dict_foreach(priv->opts, _add_dict_to_prdict, &ctx); + ctx.opt_count--; + ret = dict_set_int32n(peer_data, "global-opt-count", + SLEN("global-opt-count"), ctx.opt_count); + if (ret) + goto out; + + if (totthread) { + gf_log(this->name, GF_LOG_INFO, + "Merged multiple dictionaries into a single one"); + dict_arr[totthread++] = dict_ref(peer_data); + ret = glusterd_dict_arr_serialize(dict_arr, totthread, buf, length); + gf_log(this->name, GF_LOG_INFO, "Serialize dictionary data returned %d", + ret); + } + out: - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); - if (ret) - dict_unref (dict); - return ret; + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; } -int32_t -glusterd_compare_friend_volume (dict_t *vols, int32_t count, int32_t *status) +static int32_t +glusterd_compare_friend_volume(dict_t *peer_data, int32_t count, + int32_t *status, char *hostname) +{ + int32_t ret = -1; + char key[64] = ""; + char key_prefix[32]; + int keylen; + glusterd_volinfo_t *volinfo = NULL; + char *volname = NULL; + uint32_t cksum = 0; + uint32_t quota_cksum = 0; + uint32_t quota_version = 0; + uint32_t stage_deleted = 0; + int32_t version = 0; + xlator_t *this = NULL; + + GF_ASSERT(peer_data); + GF_ASSERT(status); + + this = THIS; + GF_ASSERT(this); + + snprintf(key_prefix, sizeof(key_prefix), "volume%d", count); + keylen = snprintf(key, sizeof(key), "%s.name", key_prefix); + ret = dict_get_strn(peer_data, key, keylen, &volname); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=%s", key, NULL); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + snprintf(key, sizeof(key), "%s.stage_deleted", key_prefix); + ret = dict_get_uint32(peer_data, key, &stage_deleted); + /* stage_deleted = 1 means the volume is still in the process of + * deleting a volume, so we shouldn't be trying to create a + * fresh volume here which would lead to a stale entry + */ + if (!ret && stage_deleted == 0) + *status = GLUSTERD_VOL_COMP_UPDATE_REQ; + goto out; + } + + keylen = snprintf(key, sizeof(key), "%s.version", key_prefix); + ret = dict_get_int32n(peer_data, key, keylen, &version); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=%s", key, NULL); + goto out; + } + + if (version > volinfo->version) { + // Mismatch detected + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_VOL_VERS_MISMATCH, + "Version of volume %s differ. local version = %d, " + "remote version = %d on peer %s", + volinfo->volname, volinfo->version, version, hostname); + GF_ATOMIC_INIT(volinfo->volpeerupdate, 1); + *status = GLUSTERD_VOL_COMP_UPDATE_REQ; + goto out; + } else if (version < volinfo->version) { + *status = GLUSTERD_VOL_COMP_SCS; + goto out; + } + + // Now, versions are same, compare cksums. + // + snprintf(key, sizeof(key), "%s.ckusm", key_prefix); + ret = dict_get_uint32(peer_data, key, &cksum); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=%s", key, NULL); + goto out; + } + + if (cksum != volinfo->cksum) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CKSUM_VERS_MISMATCH, + "Version of Cksums %s differ. local cksum = %u, remote " + "cksum = %u on peer %s", + volinfo->volname, volinfo->cksum, cksum, hostname); + *status = GLUSTERD_VOL_COMP_RJT; + goto out; + } + + if (!dict_get_sizen(volinfo->dict, VKEY_FEATURES_QUOTA)) + goto skip_quota; + + snprintf(key, sizeof(key), "%s.quota-version", key_prefix); + ret = dict_get_uint32(peer_data, key, "a_version); + if (ret) { + gf_msg_debug(this->name, 0, + "quota-version key absent for" + " volume %s in peer %s's response", + volinfo->volname, hostname); + } else { + if (quota_version > volinfo->quota_conf_version) { + // Mismatch detected + gf_msg(this->name, GF_LOG_INFO, 0, + GD_MSG_QUOTA_CONFIG_VERS_MISMATCH, + "Quota configuration versions of volume %s " + "differ. local version = %d, remote version = " + "%d on peer %s", + volinfo->volname, volinfo->quota_conf_version, quota_version, + hostname); + *status = GLUSTERD_VOL_COMP_UPDATE_REQ; + goto out; + } else if (quota_version < volinfo->quota_conf_version) { + *status = GLUSTERD_VOL_COMP_SCS; + goto out; + } + } + + // Now, versions are same, compare cksums. + // + snprintf(key, sizeof(key), "%s.quota-cksum", key_prefix); + ret = dict_get_uint32(peer_data, key, "a_cksum); + if (ret) { + gf_msg_debug(this->name, 0, + "quota checksum absent for " + "volume %s in peer %s's response", + volinfo->volname, hostname); + } else { + if (quota_cksum != volinfo->quota_conf_cksum) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_QUOTA_CONFIG_CKSUM_MISMATCH, + "Cksums of " + "quota configuration of volume %s differ. local" + " cksum = %u, remote cksum = %u on peer %s", + volinfo->volname, volinfo->quota_conf_cksum, quota_cksum, + hostname); + *status = GLUSTERD_VOL_COMP_RJT; + goto out; + } + } + +skip_quota: + *status = GLUSTERD_VOL_COMP_SCS; + +out: + keylen = snprintf(key, sizeof(key), "%s.update", key_prefix); + + if (*status == GLUSTERD_VOL_COMP_UPDATE_REQ) { + ret = dict_set_int32n(peer_data, key, keylen, 1); + } else { + ret = dict_set_int32n(peer_data, key, keylen, 0); + } + if (*status == GLUSTERD_VOL_COMP_RJT) { + gf_event(EVENT_COMPARE_FRIEND_VOLUME_FAILED, "volume=%s", + volinfo->volname); + } + gf_msg_debug(this->name, 0, "Returning with ret: %d, status: %d", ret, + *status); + return ret; +} + +static int32_t +import_prdict_dict(dict_t *peer_data, dict_t *dst_dict, char *key_prefix, + char *value_prefix, int opt_count, char *prefix) +{ + char key[512] = ""; + int keylen; + int32_t ret = 0; + int i = 1; + char *opt_key = NULL; + char *opt_val = NULL; + char *dup_opt_val = NULL; + char msg[2048] = ""; + + while (i <= opt_count) { + keylen = snprintf(key, sizeof(key), "%s.%s%d", prefix, key_prefix, i); + ret = dict_get_strn(peer_data, key, keylen, &opt_key); + if (ret) { + snprintf(msg, sizeof(msg), + "Volume dict key not " + "specified"); + goto out; + } + + keylen = snprintf(key, sizeof(key), "%s.%s%d", prefix, value_prefix, i); + ret = dict_get_strn(peer_data, key, keylen, &opt_val); + if (ret) { + snprintf(msg, sizeof(msg), + "Volume dict value not " + "specified"); + goto out; + } + dup_opt_val = gf_strdup(opt_val); + if (!dup_opt_val) { + ret = -1; + goto out; + } + ret = dict_set_dynstr(dst_dict, opt_key, dup_opt_val); + if (ret) { + snprintf(msg, sizeof(msg), + "Volume set %s %s " + "unsuccessful", + opt_key, dup_opt_val); + goto out; + } + i++; + } + +out: + if (msg[0]) + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_IMPORT_PRDICT_DICT, "%s", + msg); + gf_msg_debug("glusterd", 0, "Returning with %d", ret); + return ret; +} + +int +glusterd_spawn_daemons(void *opaque) +{ + glusterd_conf_t *conf = THIS->private; + int ret = -1; + + /* glusterd_restart_brick() will take the sync_lock. */ + glusterd_restart_bricks(NULL); + glusterd_restart_gsyncds(conf); + glusterd_restart_rebalance(conf); + ret = glusterd_snapdsvc_restart(); + ret = glusterd_gfproxydsvc_restart(); + ret = glusterd_shdsvc_restart(); + return ret; +} + +static int32_t +glusterd_import_friend_volume_opts(dict_t *peer_data, int count, + glusterd_volinfo_t *volinfo, char *prefix) +{ + char key[64]; + int keylen; + int32_t ret = -1; + int opt_count = 0; + char msg[2048] = ""; + char volume_prefix[32]; + + GF_ASSERT(peer_data); + GF_ASSERT(volinfo); + + snprintf(volume_prefix, sizeof(volume_prefix), "%s%d", prefix, count); + + keylen = snprintf(key, sizeof(key), "%s.opt-count", volume_prefix); + ret = dict_get_int32n(peer_data, key, keylen, &opt_count); + if (ret) { + snprintf(msg, sizeof(msg), + "Volume option count not " + "specified for %s", + volinfo->volname); + goto out; + } + + ret = import_prdict_dict(peer_data, volinfo->dict, "key", "value", + opt_count, volume_prefix); + if (ret) { + snprintf(msg, sizeof(msg), + "Unable to import options dict " + "specified for %s", + volinfo->volname); + goto out; + } + + keylen = snprintf(key, sizeof(key), "%s.gsync-count", volume_prefix); + ret = dict_get_int32n(peer_data, key, keylen, &opt_count); + if (ret) { + snprintf(msg, sizeof(msg), + "Gsync count not " + "specified for %s", + volinfo->volname); + goto out; + } + + ret = import_prdict_dict(peer_data, volinfo->gsync_slaves, "slave-num", + "slave-val", opt_count, volume_prefix); + if (ret) { + snprintf(msg, sizeof(msg), + "Unable to import gsync sessions " + "specified for %s", + volinfo->volname); + goto out; + } + +out: + if (msg[0]) + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOL_OPTS_IMPORT_FAIL, "%s", + msg); + gf_msg_debug("glusterd", 0, "Returning with %d", ret); + return ret; +} + +static int32_t +glusterd_import_new_ta_brick(dict_t *peer_data, int32_t vol_count, + int32_t brick_count, + glusterd_brickinfo_t **ta_brickinfo, char *prefix) { + char key[128]; + char key_prefix[64]; + int keylen; + int ret = -1; + char *hostname = NULL; + char *path = NULL; + char *brick_id = NULL; + int decommissioned = 0; + glusterd_brickinfo_t *new_ta_brickinfo = NULL; + char msg[256] = ""; + char *brick_uuid_str = NULL; + + GF_ASSERT(peer_data); + GF_ASSERT(vol_count >= 0); + GF_ASSERT(ta_brickinfo); + GF_ASSERT(prefix); + + ret = snprintf(key_prefix, sizeof(key_prefix), "%s%d.ta-brick%d", prefix, + vol_count, brick_count); + + if (ret < 0 || ret >= sizeof(key_prefix)) { + ret = -1; + snprintf(msg, sizeof(msg), "key_prefix too long"); + goto out; + } + + keylen = snprintf(key, sizeof(key), "%s.hostname", key_prefix); + ret = dict_get_strn(peer_data, key, keylen, &hostname); + if (ret) { + snprintf(msg, sizeof(msg), "%s missing in payload", key); + goto out; + } + + keylen = snprintf(key, sizeof(key), "%s.path", key_prefix); + ret = dict_get_strn(peer_data, key, keylen, &path); + if (ret) { + snprintf(msg, sizeof(msg), "%s missing in payload", key); + goto out; + } + + keylen = snprintf(key, sizeof(key), "%s.brick_id", key_prefix); + ret = dict_get_strn(peer_data, key, keylen, &brick_id); + + keylen = snprintf(key, sizeof(key), "%s.decommissioned", key_prefix); + ret = dict_get_int32n(peer_data, key, keylen, &decommissioned); + if (ret) { + /* For backward compatibility */ + ret = 0; + } + + ret = glusterd_brickinfo_new(&new_ta_brickinfo); + if (ret) + goto out; - int32_t ret = -1; - char key[512] = {0,}; - glusterd_volinfo_t *volinfo = NULL; - char *volname = NULL; - uint32_t cksum = 0; - int32_t version = 0; + ret = snprintf(new_ta_brickinfo->path, sizeof(new_ta_brickinfo->path), "%s", + path); + if (ret < 0 || ret >= sizeof(new_ta_brickinfo->path)) { + ret = -1; + goto out; + } + ret = snprintf(new_ta_brickinfo->hostname, + sizeof(new_ta_brickinfo->hostname), "%s", hostname); + if (ret < 0 || ret >= sizeof(new_ta_brickinfo->hostname)) { + ret = -1; + goto out; + } + new_ta_brickinfo->decommissioned = decommissioned; + if (brick_id) + (void)snprintf(new_ta_brickinfo->brick_id, + sizeof(new_ta_brickinfo->brick_id), "%s", brick_id); + keylen = snprintf(key, sizeof(key), "%s.uuid", key_prefix); + ret = dict_get_strn(peer_data, key, keylen, &brick_uuid_str); + if (ret) + goto out; + gf_uuid_parse(brick_uuid_str, new_ta_brickinfo->uuid); + + *ta_brickinfo = new_ta_brickinfo; + +out: + if (msg[0]) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_BRICK_IMPORT_FAIL, "%s", + msg); + gf_event(EVENT_IMPORT_BRICK_FAILED, "peer=%s;ta-brick=%s", + new_ta_brickinfo->hostname, new_ta_brickinfo->path); + } + gf_msg_debug("glusterd", 0, "Returning with %d", ret); + return ret; +} + +/* The prefix represents the type of volume to be added. + * It will be "volume" for normal volumes, and snap# like + * snap1, snap2, for snapshot volumes + */ +static int32_t +glusterd_import_new_brick(dict_t *peer_data, int32_t vol_count, + int32_t brick_count, glusterd_brickinfo_t **brickinfo, + char *prefix) +{ + char key[128]; + char key_prefix[64]; + int keylen; + int ret = -1; + char *hostname = NULL; + char *path = NULL; + char *brick_id = NULL; + int decommissioned = 0; + glusterd_brickinfo_t *new_brickinfo = NULL; + char msg[256] = ""; + char *brick_uuid_str = NULL; + + GF_ASSERT(peer_data); + GF_ASSERT(vol_count >= 0); + GF_ASSERT(brickinfo); + GF_ASSERT(prefix); + + ret = snprintf(key_prefix, sizeof(key_prefix), "%s%d.brick%d", prefix, + vol_count, brick_count); + if (ret < 0 || ret >= sizeof(key_prefix)) { + ret = -1; + snprintf(msg, sizeof(msg), "key_prefix too long"); + goto out; + } + keylen = snprintf(key, sizeof(key), "%s.hostname", key_prefix); + ret = dict_get_strn(peer_data, key, keylen, &hostname); + if (ret) { + snprintf(msg, sizeof(msg), "%s missing in payload", key); + goto out; + } + + keylen = snprintf(key, sizeof(key), "%s.path", key_prefix); + ret = dict_get_strn(peer_data, key, keylen, &path); + if (ret) { + snprintf(msg, sizeof(msg), "%s missing in payload", key); + goto out; + } + + keylen = snprintf(key, sizeof(key), "%s.brick_id", key_prefix); + ret = dict_get_strn(peer_data, key, keylen, &brick_id); + + keylen = snprintf(key, sizeof(key), "%s.decommissioned", key_prefix); + ret = dict_get_int32n(peer_data, key, keylen, &decommissioned); + if (ret) { + /* For backward compatibility */ + ret = 0; + } - GF_ASSERT (vols); - GF_ASSERT (status); + ret = glusterd_brickinfo_new(&new_brickinfo); + if (ret) + goto out; - snprintf (key, sizeof (key), "volume%d.name", count); - ret = dict_get_str (vols, key, &volname); + ret = snprintf(new_brickinfo->path, sizeof(new_brickinfo->path), "%s", + path); + if (ret < 0 || ret >= sizeof(new_brickinfo->path)) { + ret = -1; + goto out; + } + ret = snprintf(new_brickinfo->hostname, sizeof(new_brickinfo->hostname), + "%s", hostname); + if (ret < 0 || ret >= sizeof(new_brickinfo->hostname)) { + ret = -1; + goto out; + } + new_brickinfo->decommissioned = decommissioned; + if (brick_id) + (void)snprintf(new_brickinfo->brick_id, sizeof(new_brickinfo->brick_id), + "%s", brick_id); + + ret = gd_import_new_brick_snap_details(peer_data, key_prefix, + new_brickinfo); + if (ret) + goto out; + + keylen = snprintf(key, sizeof(key), "%s.uuid", key_prefix); + ret = dict_get_strn(peer_data, key, keylen, &brick_uuid_str); + if (ret) + goto out; + gf_uuid_parse(brick_uuid_str, new_brickinfo->uuid); + + *brickinfo = new_brickinfo; +out: + if (msg[0]) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_BRICK_IMPORT_FAIL, "%s", + msg); + if (new_brickinfo) + gf_event(EVENT_IMPORT_BRICK_FAILED, "peer=%s;brick=%s", + new_brickinfo->hostname, new_brickinfo->path); + } + gf_msg_debug("glusterd", 0, "Returning with %d", ret); + return ret; +} + +/* The prefix represents the type of volume to be added. + * It will be "volume" for normal volumes, and snap# like + * snap1, snap2, for snapshot volumes + */ +static int32_t +glusterd_import_bricks(dict_t *peer_data, int32_t vol_count, + glusterd_volinfo_t *new_volinfo, char *prefix) +{ + int ret = -1; + int brick_count = 1; + int ta_brick_count = 1; + int brickid = 0; + glusterd_brickinfo_t *new_brickinfo = NULL; + glusterd_brickinfo_t *new_ta_brickinfo = NULL; + + GF_ASSERT(peer_data); + GF_ASSERT(vol_count >= 0); + GF_ASSERT(new_volinfo); + GF_ASSERT(prefix); + while (brick_count <= new_volinfo->brick_count) { + ret = glusterd_import_new_brick(peer_data, vol_count, brick_count, + &new_brickinfo, prefix); if (ret) + goto out; + if (new_brickinfo->brick_id[0] == '\0') + /*We were probed from a peer having op-version + less than GD_OP_VER_PERSISTENT_AFR_XATTRS*/ + GLUSTERD_ASSIGN_BRICKID_TO_BRICKINFO(new_brickinfo, new_volinfo, + brickid++); + cds_list_add_tail(&new_brickinfo->brick_list, &new_volinfo->bricks); + brick_count++; + } + + if (new_volinfo->thin_arbiter_count == 1) { + while (ta_brick_count <= new_volinfo->subvol_count) { + ret = glusterd_import_new_ta_brick(peer_data, vol_count, + ta_brick_count, + &new_ta_brickinfo, prefix); + if (ret) goto out; + cds_list_add_tail(&new_ta_brickinfo->brick_list, + &new_volinfo->ta_bricks); + ta_brick_count++; + } + } + ret = 0; +out: + gf_msg_debug("glusterd", 0, "Returning with %d", ret); + return ret; +} - ret = glusterd_volinfo_find (volname, &volinfo); +/* The prefix represents the type of volume to be added. + * It will be "volume" for normal volumes, and snap# like + * snap1, snap2, for snapshot volumes + */ +int +glusterd_import_quota_conf(dict_t *peer_data, int vol_idx, + glusterd_volinfo_t *new_volinfo, char *prefix) +{ + int gfid_idx = 0; + int gfid_count = 0; + int ret = -1; + int fd = -1; + char key[128]; + char key_prefix[64]; + int keylen; + char *gfid_str = NULL; + uuid_t gfid = { + 0, + }; + xlator_t *this = NULL; + int8_t gfid_type = 0; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(peer_data); + GF_ASSERT(prefix); + + if (!glusterd_is_volume_quota_enabled(new_volinfo)) { + (void)glusterd_clean_up_quota_store(new_volinfo); + return 0; + } + + ret = glusterd_store_create_quota_conf_sh_on_absence(new_volinfo); + if (ret) + goto out; + + fd = gf_store_mkstemp(new_volinfo->quota_conf_shandle); + if (fd < 0) { + ret = -1; + goto out; + } + ret = snprintf(key_prefix, sizeof(key_prefix), "%s%d", prefix, vol_idx); + if (ret < 0 || ret >= sizeof(key_prefix)) { + ret = -1; + gf_msg_debug(this->name, 0, "Failed to set key_prefix for quota conf"); + goto out; + } + snprintf(key, sizeof(key), "%s.quota-cksum", key_prefix); + ret = dict_get_uint32(peer_data, key, &new_volinfo->quota_conf_cksum); + if (ret) + gf_msg_debug(this->name, 0, "Failed to get quota cksum"); + + snprintf(key, sizeof(key), "%s.quota-version", key_prefix); + ret = dict_get_uint32(peer_data, key, &new_volinfo->quota_conf_version); + if (ret) + gf_msg_debug(this->name, 0, + "Failed to get quota " + "version"); + + keylen = snprintf(key, sizeof(key), "%s.gfid-count", key_prefix); + ret = dict_get_int32n(peer_data, key, keylen, &gfid_count); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=%s", key, NULL); + goto out; + } + + ret = glusterd_quota_conf_write_header(fd); + if (ret) + goto out; + + for (gfid_idx = 0; gfid_idx < gfid_count; gfid_idx++) { + keylen = snprintf(key, sizeof(key) - 1, "%s.gfid%d", key_prefix, + gfid_idx); + ret = dict_get_strn(peer_data, key, keylen, &gfid_str); if (ret) { - *status = GLUSTERD_VOL_COMP_UPDATE_REQ; - ret = 0; - goto out; + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=%s", key, NULL); + goto out; } - memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.version", count); - ret = dict_get_int32 (vols, key, &version); + snprintf(key, sizeof(key) - 1, "%s.gfid-type%d", key_prefix, gfid_idx); + ret = dict_get_int8(peer_data, key, &gfid_type); if (ret) - goto out; + gfid_type = GF_QUOTA_CONF_TYPE_USAGE; + + gf_uuid_parse(gfid_str, gfid); + ret = glusterd_quota_conf_write_gfid(fd, gfid, (char)gfid_type); + if (ret < 0) { + gf_msg(this->name, GF_LOG_CRITICAL, errno, + GD_MSG_QUOTA_CONF_WRITE_FAIL, + "Unable to write " + "gfid %s into quota.conf for %s", + gfid_str, new_volinfo->volname); + ret = -1; + goto out; + } + } - if (version > volinfo->version) { - //Mismatch detected - ret = 0; - gf_log ("", GF_LOG_ERROR, "Version of volume %s differ." - "local version = %d, remote version = %d", - volinfo->volname, volinfo->version, version); - *status = GLUSTERD_VOL_COMP_UPDATE_REQ; - goto out; - } else if (version < volinfo->version) { - *status = GLUSTERD_VOL_COMP_SCS; - goto out; - } - - //Now, versions are same, compare cksums. - // - memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.ckusm", count); - ret = dict_get_uint32 (vols, key, &cksum); - if (ret) - goto out; + ret = gf_store_rename_tmppath(new_volinfo->quota_conf_shandle); - if (cksum != volinfo->cksum) { - ret = 0; - gf_log ("", GF_LOG_ERROR, "Cksums of volume %s differ." - " local cksum = %d, remote cksum = %d", - volinfo->volname, volinfo->cksum, cksum); - *status = GLUSTERD_VOL_COMP_RJT; - goto out; + ret = 0; + +out: + if (!ret) { + ret = glusterd_compute_cksum(new_volinfo, _gf_true); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CKSUM_COMPUTE_FAIL, + "Failed to compute checksum"); + goto clear_quota_conf; } - *status = GLUSTERD_VOL_COMP_SCS; + ret = glusterd_store_save_quota_version_and_cksum(new_volinfo); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_QUOTA_CKSUM_VER_STORE_FAIL, + "Failed to save quota version and checksum"); + } + +clear_quota_conf: + if (ret && (fd > 0)) { + gf_store_unlink_tmppath(new_volinfo->quota_conf_shandle); + (void)gf_store_handle_destroy(new_volinfo->quota_conf_shandle); + new_volinfo->quota_conf_shandle = NULL; + } + + return ret; +} + +int +gd_import_friend_volume_rebal_dict(dict_t *dict, int count, + glusterd_volinfo_t *volinfo) +{ + int ret = -1; + char key[64] = ""; + int dict_count = 0; + char key_prefix[32]; + + GF_ASSERT(dict); + GF_ASSERT(volinfo); + xlator_t *this = THIS; + GF_ASSERT(this); + + snprintf(key_prefix, sizeof(key_prefix), "volume%d", count); + ret = snprintf(key, sizeof(key), "%s.rebal-dict-count", key_prefix); + ret = dict_get_int32n(dict, key, ret, &dict_count); + if (ret) { + /* Older peers will not have this dict */ + gf_smsg(this->name, GF_LOG_INFO, errno, GD_MSG_DICT_GET_FAILED, + "Key=%s", key, NULL); + ret = 0; + goto out; + } + + volinfo->rebal.dict = dict_new(); + if (!volinfo->rebal.dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); + ret = -1; + goto out; + } + ret = import_prdict_dict(dict, volinfo->rebal.dict, "rebal-dict-key", + "rebal-dict-value", dict_count, key_prefix); out: - gf_log ("", GF_LOG_DEBUG, "Returning with ret: %d, status: %d", - ret, *status); - return ret; + if (ret && volinfo->rebal.dict) + dict_unref(volinfo->rebal.dict); + gf_msg_debug(this->name, 0, "Returning with %d", ret); + return ret; } +/* The prefix represents the type of volume to be added. + * It will be "volume" for normal volumes, and snap# like + * snap1, snap2, for snapshot volumes + */ int32_t -glusterd_import_friend_volume_opts (dict_t *vols, int count, - glusterd_volinfo_t *volinfo) -{ - char key[512] = {0,}; - int32_t ret = -1; - int i = 1; - int opt_count = 0; - char *opt_key = NULL; - char *opt_val = NULL; - char *dup_opt_val = NULL; - char msg[2048] = {0}; - - memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.opt-count", count); - ret = dict_get_int32 (vols, key, &opt_count); - if (ret) { - snprintf (msg, sizeof (msg), "Volume option count not " - "specified for %s", volinfo->volname); - goto out; +glusterd_import_volinfo(dict_t *peer_data, int count, + glusterd_volinfo_t **volinfo, char *prefix) +{ + int ret = -1; + char key[64] = ""; + char key_prefix[32]; + int keylen; + char *parent_volname = NULL; + char *volname = NULL; + glusterd_volinfo_t *new_volinfo = NULL; + char *volume_id_str = NULL; + char msg[2048] = ""; + char *str = NULL; + char *rebalance_id_str = NULL; + int op_version = 0; + int client_op_version = 0; + uint32_t stage_deleted = 0; + + GF_ASSERT(peer_data); + GF_ASSERT(volinfo); + GF_ASSERT(prefix); + + ret = snprintf(key_prefix, sizeof(key_prefix), "%s%d", prefix, count); + if (ret < 0 || ret >= sizeof(key_prefix)) { + ret = -1; + snprintf(msg, sizeof(msg), "key_prefix too big"); + goto out; + } + + keylen = snprintf(key, sizeof(key), "%s.name", key_prefix); + ret = dict_get_strn(peer_data, key, keylen, &volname); + if (ret) { + snprintf(msg, sizeof(msg), "%s missing in payload", key); + goto out; + } + + snprintf(key, sizeof(key), "%s.stage_deleted", key_prefix); + ret = dict_get_uint32(peer_data, key, &stage_deleted); + /* stage_deleted = 1 means the volume is still in the process of + * deleting a volume, so we shouldn't be trying to create a + * fresh volume here which would lead to a stale entry + */ + if (stage_deleted) { + goto out; + } + + ret = glusterd_volinfo_new(&new_volinfo); + if (ret) + goto out; + ret = snprintf(new_volinfo->volname, sizeof(new_volinfo->volname), "%s", + volname); + if (ret < 0 || ret >= sizeof(new_volinfo->volname)) { + ret = -1; + goto out; + } + keylen = snprintf(key, sizeof(key), "%s.type", key_prefix); + ret = dict_get_int32n(peer_data, key, keylen, &new_volinfo->type); + if (ret) { + snprintf(msg, sizeof(msg), "%s missing in payload for %s", key, + volname); + goto out; + } + + keylen = snprintf(key, sizeof(key), "%s.parent_volname", key_prefix); + ret = dict_get_strn(peer_data, key, keylen, &parent_volname); + if (!ret) { + ret = snprintf(new_volinfo->parent_volname, + sizeof(new_volinfo->parent_volname), "%s", + parent_volname); + if (ret < 0 || ret >= sizeof(new_volinfo->volname)) { + ret = -1; + goto out; } - while (i <= opt_count) { - memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.key%d", - count, i); - ret = dict_get_str (vols, key, &opt_key); + } + keylen = snprintf(key, sizeof(key), "%s.brick_count", key_prefix); + ret = dict_get_int32n(peer_data, key, keylen, &new_volinfo->brick_count); + if (ret) { + snprintf(msg, sizeof(msg), "%s missing in payload for %s", key, + volname); + goto out; + } + + keylen = snprintf(key, sizeof(key), "%s.version", key_prefix); + ret = dict_get_int32n(peer_data, key, keylen, &new_volinfo->version); + if (ret) { + snprintf(msg, sizeof(msg), "%s missing in payload for %s", key, + volname); + goto out; + } + + keylen = snprintf(key, sizeof(key), "%s.status", key_prefix); + ret = dict_get_int32n(peer_data, key, keylen, + (int32_t *)&new_volinfo->status); + if (ret) { + snprintf(msg, sizeof(msg), "%s missing in payload for %s", key, + volname); + goto out; + } + + keylen = snprintf(key, sizeof(key), "%s.sub_count", key_prefix); + ret = dict_get_int32n(peer_data, key, keylen, &new_volinfo->sub_count); + if (ret) { + snprintf(msg, sizeof(msg), "%s missing in payload for %s", key, + volname); + goto out; + } + + keylen = snprintf(key, sizeof(key), "%s.subvol_count", key_prefix); + ret = dict_get_int32n(peer_data, key, keylen, &new_volinfo->subvol_count); + if (ret) { + snprintf(msg, sizeof(msg), "%s missing in payload for %s", key, + volname); + goto out; + } + + /* not having a 'stripe_count' key is not a error + (as peer may be of old version) */ + keylen = snprintf(key, sizeof(key), "%s.stripe_count", key_prefix); + ret = dict_get_int32n(peer_data, key, keylen, &new_volinfo->stripe_count); + if (ret) + gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_DICT_GET_FAILED, + "peer is possibly old version"); + + /* not having a 'replica_count' key is not a error + (as peer may be of old version) */ + keylen = snprintf(key, sizeof(key), "%s.replica_count", key_prefix); + ret = dict_get_int32n(peer_data, key, keylen, &new_volinfo->replica_count); + if (ret) + gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_DICT_GET_FAILED, + "peer is possibly old version"); + + /* not having a 'arbiter_count' key is not a error + (as peer may be of old version) */ + keylen = snprintf(key, sizeof(key), "%s.arbiter_count", key_prefix); + ret = dict_get_int32n(peer_data, key, keylen, &new_volinfo->arbiter_count); + if (ret) + gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_DICT_GET_FAILED, + "peer is possibly old version"); + + /* not having a 'thin_arbiter_count' key is not a error + (as peer may be of old version) */ + keylen = snprintf(key, sizeof(key), "%s.thin_arbiter_count", key_prefix); + ret = dict_get_int32n(peer_data, key, keylen, + &new_volinfo->thin_arbiter_count); + if (ret) + gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_DICT_GET_FAILED, + "peer is possibly old version"); + + /* not having a 'disperse_count' key is not a error + (as peer may be of old version) */ + keylen = snprintf(key, sizeof(key), "%s.disperse_count", key_prefix); + ret = dict_get_int32n(peer_data, key, keylen, &new_volinfo->disperse_count); + if (ret) + gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_DICT_GET_FAILED, + "peer is possibly old version"); + + /* not having a 'redundancy_count' key is not a error + (as peer may be of old version) */ + keylen = snprintf(key, sizeof(key), "%s.redundancy_count", key_prefix); + ret = dict_get_int32n(peer_data, key, keylen, + &new_volinfo->redundancy_count); + if (ret) + gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_DICT_GET_FAILED, + "peer is possibly old version"); + + /* not having a 'dist_count' key is not a error + (as peer may be of old version) */ + keylen = snprintf(key, sizeof(key), "%s.dist_count", key_prefix); + ret = dict_get_int32n(peer_data, key, keylen, + &new_volinfo->dist_leaf_count); + if (ret) + gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_DICT_GET_FAILED, + "peer is possibly old version"); + + new_volinfo->subvol_count = new_volinfo->brick_count / + glusterd_get_dist_leaf_count(new_volinfo); + snprintf(key, sizeof(key), "%s.ckusm", key_prefix); + ret = dict_get_uint32(peer_data, key, &new_volinfo->cksum); + if (ret) { + snprintf(msg, sizeof(msg), "%s missing in payload for %s", key, + volname); + goto out; + } + + keylen = snprintf(key, sizeof(key), "%s.volume_id", key_prefix); + ret = dict_get_strn(peer_data, key, keylen, &volume_id_str); + if (ret) { + snprintf(msg, sizeof(msg), "%s missing in payload for %s", key, + volname); + goto out; + } + + gf_uuid_parse(volume_id_str, new_volinfo->volume_id); + + keylen = snprintf(key, sizeof(key), "%s.username", key_prefix); + ret = dict_get_strn(peer_data, key, keylen, &str); + if (!ret) { + ret = glusterd_auth_set_username(new_volinfo, str); + if (ret) + goto out; + } + + keylen = snprintf(key, sizeof(key), "%s.password", key_prefix); + ret = dict_get_strn(peer_data, key, keylen, &str); + if (!ret) { + ret = glusterd_auth_set_password(new_volinfo, str); + if (ret) + goto out; + } + + snprintf(key, sizeof(key), "%s.transport_type", key_prefix); + ret = dict_get_uint32(peer_data, key, &new_volinfo->transport_type); + if (ret) { + snprintf(msg, sizeof(msg), "%s missing in payload for %s", key, + volname); + goto out; + } + + snprintf(key, sizeof(key), "%s.rebalance", key_prefix); + ret = dict_get_uint32(peer_data, key, &new_volinfo->rebal.defrag_cmd); + if (ret) { + snprintf(msg, sizeof(msg), "%s missing in payload for %s", key, + volname); + goto out; + } + + keylen = snprintf(key, sizeof(key), "%s.rebalance-id", key_prefix); + ret = dict_get_strn(peer_data, key, keylen, &rebalance_id_str); + if (ret) { + /* This is not present in older glusterfs versions, + * so don't error out + */ + ret = 0; + } else { + gf_uuid_parse(rebalance_id_str, new_volinfo->rebal.rebalance_id); + } + + snprintf(key, sizeof(key), "%s.rebalance-op", key_prefix); + /* This is not present in older glusterfs versions, + * so don't error out + */ + ret = dict_get_uint32(peer_data, key, (uint32_t *)&new_volinfo->rebal.op); + + ret = gd_import_friend_volume_rebal_dict(peer_data, count, new_volinfo); + if (ret) { + snprintf(msg, sizeof(msg), + "Failed to import rebalance dict " + "for volume."); + goto out; + } + + ret = gd_import_volume_snap_details(peer_data, new_volinfo, key_prefix, + volname); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_SNAP_DETAILS_IMPORT_FAIL, + "Failed to import snapshot " + "details for volume %s", + volname); + goto out; + } + + ret = glusterd_import_friend_volume_opts(peer_data, count, new_volinfo, + prefix); + if (ret) + goto out; + + /* Import the volume's op-versions if available else set it to 1. + * Not having op-versions implies this informtation was obtained from a + * op-version 1 friend (gluster-3.3), ergo the cluster is at op-version + * 1 and all volumes are at op-versions 1. + * + * Either both the volume op-versions should be absent or both should be + * present. Only one being present is a failure + */ + keylen = snprintf(key, sizeof(key), "%s.op-version", key_prefix); + ret = dict_get_int32n(peer_data, key, keylen, &op_version); + if (ret) + ret = 0; + keylen = snprintf(key, sizeof(key), "%s.client-op-version", key_prefix); + ret = dict_get_int32n(peer_data, key, keylen, &client_op_version); + if (ret) + ret = 0; + + if (op_version && client_op_version) { + new_volinfo->op_version = op_version; + new_volinfo->client_op_version = client_op_version; + } else if (((op_version == 0) && (client_op_version != 0)) || + ((op_version != 0) && (client_op_version == 0))) { + ret = -1; + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Only one volume op-version found"); + goto out; + } else { + new_volinfo->op_version = 1; + new_volinfo->client_op_version = 1; + } + + keylen = snprintf(key, sizeof(key), "%s.quota-xattr-version", key_prefix); + /*This is not present in older glusterfs versions, so ignore ret value*/ + ret = dict_get_int32n(peer_data, key, keylen, + &new_volinfo->quota_xattr_version); + + ret = glusterd_import_bricks(peer_data, count, new_volinfo, prefix); + if (ret) + goto out; + + *volinfo = new_volinfo; +out: + if (msg[0]) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_IMPORT_FAIL, "%s", + msg); + gf_event(EVENT_IMPORT_VOLUME_FAILED, "volume=%s", new_volinfo->volname); + } + gf_msg_debug("glusterd", 0, "Returning with %d", ret); + return ret; +} + +int32_t +glusterd_volume_disconnect_all_bricks(glusterd_volinfo_t *volinfo) +{ + int ret = 0; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_brick_proc_t *brick_proc = NULL; + int brick_count = 0; + + GF_ASSERT(volinfo); + + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + if (glusterd_is_brick_started(brickinfo)) { + /* If brick multiplexing is enabled then we can't + * blindly set brickinfo->rpc to NULL as it might impact + * the other attached bricks. + */ + ret = glusterd_brick_proc_for_port(brickinfo->port, &brick_proc); + if (!ret) { + brick_count = brick_proc->brick_count; + } + if (!is_brick_mx_enabled() || brick_count == 0) { + ret = glusterd_brick_disconnect(brickinfo); if (ret) { - snprintf (msg, sizeof (msg), "Volume option key not " - "specified for %s", volinfo->volname); - goto out; + gf_msg("glusterd", GF_LOG_ERROR, 0, + GD_MSD_BRICK_DISCONNECT_FAIL, + "Failed to " + "disconnect %s:%s", + brickinfo->hostname, brickinfo->path); + break; } + } + } + } - memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.value%d", - count, i); - ret = dict_get_str (vols, key, &opt_val); - if (ret) { - snprintf (msg, sizeof (msg), "Volume option value not " - "specified for %s", volinfo->volname); - goto out; + return ret; +} + +int32_t +glusterd_volinfo_copy_brickinfo(glusterd_volinfo_t *old_volinfo, + glusterd_volinfo_t *new_volinfo) +{ + glusterd_brickinfo_t *new_brickinfo = NULL; + glusterd_brickinfo_t *old_brickinfo = NULL; + glusterd_brickinfo_t *new_ta_brickinfo = NULL; + glusterd_brickinfo_t *old_ta_brickinfo = NULL; + glusterd_conf_t *priv = NULL; + int ret = 0; + xlator_t *this = NULL; + char abspath[PATH_MAX] = ""; + + GF_ASSERT(new_volinfo); + GF_ASSERT(old_volinfo); + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + cds_list_for_each_entry(new_brickinfo, &new_volinfo->bricks, brick_list) + { + ret = glusterd_volume_brickinfo_get( + new_brickinfo->uuid, new_brickinfo->hostname, new_brickinfo->path, + old_volinfo, &old_brickinfo); + if (ret == 0) { + new_brickinfo->port = old_brickinfo->port; + + if (old_brickinfo->real_path[0] == '\0') { + if (!realpath(new_brickinfo->path, abspath)) { + /* Here an ENOENT should also be a + * failure as the brick is expected to + * be in existence + */ + gf_msg(this->name, GF_LOG_CRITICAL, errno, + GD_MSG_BRICKINFO_CREATE_FAIL, + "realpath () failed for brick " + "%s. The underlying filesystem " + "may be in bad state", + new_brickinfo->path); + ret = -1; + goto out; + } + if (strlen(abspath) >= sizeof(new_brickinfo->real_path)) { + ret = -1; + goto out; } - dup_opt_val = gf_strdup (opt_val); - if (!dup_opt_val) { + (void)strncpy(new_brickinfo->real_path, abspath, + sizeof(new_brickinfo->real_path)); + } else { + (void)strncpy(new_brickinfo->real_path, + old_brickinfo->real_path, + sizeof(new_brickinfo->real_path)); + } + } + } + if (new_volinfo->thin_arbiter_count == 1) { + cds_list_for_each_entry(new_ta_brickinfo, &new_volinfo->ta_bricks, + brick_list) + { + ret = glusterd_volume_ta_brickinfo_get( + new_ta_brickinfo->uuid, new_ta_brickinfo->hostname, + new_ta_brickinfo->path, old_volinfo, &old_ta_brickinfo); + if (ret == 0) { + new_ta_brickinfo->port = old_ta_brickinfo->port; + + if (old_ta_brickinfo->real_path[0] == '\0') { + if (!realpath(new_ta_brickinfo->path, abspath)) { + /* Here an ENOENT should also be a + * failure as the brick is expected to + * be in existence + */ + gf_msg(this->name, GF_LOG_CRITICAL, errno, + GD_MSG_BRICKINFO_CREATE_FAIL, + "realpath () failed for brick " + "%s. The underlying filesystem " + "may be in bad state", + new_brickinfo->path); ret = -1; goto out; - } - ret = dict_set_dynstr (volinfo->dict, opt_key, dup_opt_val); - if (ret) { - snprintf (msg, sizeof (msg), "Volume set %s %s " - "unsuccessful for %s", opt_key, dup_opt_val, - volinfo->volname); + } + if (strlen(abspath) >= + sizeof(new_ta_brickinfo->real_path)) { + ret = -1; goto out; + } + (void)strncpy(new_ta_brickinfo->real_path, abspath, + sizeof(new_ta_brickinfo->real_path)); + } else { + (void)strncpy(new_ta_brickinfo->real_path, + old_ta_brickinfo->real_path, + sizeof(new_ta_brickinfo->real_path)); } - i++; + } } + } + ret = 0; + out: - if (msg[0]) - gf_log ("glusterd", GF_LOG_ERROR, "%s", msg); - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); - return ret; + return ret; } int32_t -glusterd_import_new_brick (dict_t *vols, int32_t vol_count, - int32_t brick_count, - glusterd_brickinfo_t **brickinfo) -{ - char key[512] = {0,}; - int ret = -1; - char *hostname = NULL; - char *path = NULL; - glusterd_brickinfo_t *new_brickinfo = NULL; - char msg[2048] = {0}; - - GF_ASSERT (vols); - GF_ASSERT (vol_count >= 0); - GF_ASSERT (brickinfo); - - memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.brick%d.hostname", - vol_count, brick_count); - ret = dict_get_str (vols, key, &hostname); - if (ret) { - snprintf (msg, sizeof (msg), "%s missing in payload", key); - goto out; +glusterd_volinfo_stop_stale_bricks(glusterd_volinfo_t *new_volinfo, + glusterd_volinfo_t *old_volinfo) +{ + glusterd_brickinfo_t *new_brickinfo = NULL; + glusterd_brickinfo_t *old_brickinfo = NULL; + + int ret = 0; + GF_ASSERT(new_volinfo); + GF_ASSERT(old_volinfo); + if (_gf_false == glusterd_is_volume_started(old_volinfo)) + goto out; + cds_list_for_each_entry(old_brickinfo, &old_volinfo->bricks, brick_list) + { + ret = glusterd_volume_brickinfo_get( + old_brickinfo->uuid, old_brickinfo->hostname, old_brickinfo->path, + new_volinfo, &new_brickinfo); + /* If the brick is stale, i.e it's not a part of the new volume + * or if it's part of the new volume and is pending a snap or if it's + * brick multiplexing enabled, then stop the brick process + */ + if (ret || (new_brickinfo->snap_status == -1) || + GF_ATOMIC_GET(old_volinfo->volpeerupdate)) { + /*TODO: may need to switch to 'atomic' flavour of + * brick_stop, once we make peer rpc program also + * synctask enabled*/ + ret = glusterd_brick_stop(old_volinfo, old_brickinfo, _gf_false); + if (ret) + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_BRICK_STOP_FAIL, + "Failed to stop" + " brick %s:%s", + old_brickinfo->hostname, old_brickinfo->path); } + } + ret = 0; +out: + gf_msg_debug("glusterd", 0, "Returning with %d", ret); + return ret; +} - memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.brick%d.path", - vol_count, brick_count); - ret = dict_get_str (vols, key, &path); +int32_t +glusterd_delete_stale_volume(glusterd_volinfo_t *stale_volinfo, + glusterd_volinfo_t *valid_volinfo) +{ + int32_t ret = -1; + glusterd_volinfo_t *temp_volinfo = NULL; + glusterd_volinfo_t *voliter = NULL; + xlator_t *this = NULL; + glusterd_svc_t *svc = NULL; + + GF_ASSERT(stale_volinfo); + GF_ASSERT(valid_volinfo); + this = THIS; + GF_ASSERT(this); + + /* Copy snap_volumes list from stale_volinfo to valid_volinfo */ + valid_volinfo->snap_count = 0; + cds_list_for_each_entry_safe(voliter, temp_volinfo, + &stale_volinfo->snap_volumes, snapvol_list) + { + cds_list_add_tail(&voliter->snapvol_list, &valid_volinfo->snap_volumes); + valid_volinfo->snap_count++; + } + + if ((!gf_uuid_is_null(stale_volinfo->restored_from_snap)) && + (gf_uuid_compare(stale_volinfo->restored_from_snap, + valid_volinfo->restored_from_snap))) { + ret = glusterd_lvm_snapshot_remove(NULL, stale_volinfo); if (ret) { - snprintf (msg, sizeof (msg), "%s missing in payload", key); - goto out; + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_REMOVE_FAIL, + "Failed to remove lvm snapshot for " + "restored volume %s", + stale_volinfo->volname); } + } + + /* If stale volume is in started state, stop the stale bricks if the new + * volume is started else, stop all bricks. + * We don't want brick_rpc_notify to access already deleted brickinfo, + * so disconnect all bricks from stale_volinfo (unconditionally), since + * they are being deleted subsequently. + */ + if (glusterd_is_volume_started(stale_volinfo)) { + if (glusterd_is_volume_started(valid_volinfo)) { + (void)glusterd_volinfo_stop_stale_bricks(valid_volinfo, + stale_volinfo); + + } else { + (void)glusterd_stop_bricks(stale_volinfo); + } + + (void)glusterd_volume_disconnect_all_bricks(stale_volinfo); + } + /* Delete all the bricks and stores and vol files. They will be created + * again by the valid_volinfo. Volume store delete should not be + * performed because some of the bricks could still be running, + * keeping pid files under run directory + */ + (void)glusterd_delete_all_bricks(stale_volinfo); + if (stale_volinfo->shandle) { + sys_unlink(stale_volinfo->shandle->path); + (void)gf_store_handle_destroy(stale_volinfo->shandle); + stale_volinfo->shandle = NULL; + } + + /* Marking volume as stopped, so that svc manager stops snapd + * and we are deleting the volume. + */ + stale_volinfo->status = GLUSTERD_STATUS_STOPPED; + + if (!stale_volinfo->is_snap_volume) { + svc = &(stale_volinfo->snapd.svc); + (void)svc->manager(svc, stale_volinfo, PROC_START_NO_WAIT); + } + svc = &(stale_volinfo->shd.svc); + (void)svc->manager(svc, stale_volinfo, PROC_START_NO_WAIT); + + (void)glusterd_volinfo_remove(stale_volinfo); + + return 0; +} - ret = glusterd_brickinfo_new (&new_brickinfo); +/* This function updates the rebalance information of the new volinfo using the + * information from the old volinfo. + */ +int +gd_check_and_update_rebalance_info(glusterd_volinfo_t *old_volinfo, + glusterd_volinfo_t *new_volinfo) +{ + int ret = -1; + glusterd_rebalance_t *old = NULL; + glusterd_rebalance_t *new = NULL; + + GF_ASSERT(old_volinfo); + GF_ASSERT(new_volinfo); + + old = &(old_volinfo->rebal); + new = &(new_volinfo->rebal); + + // Disconnect from rebalance process + if (glusterd_defrag_rpc_get(old->defrag)) { + rpc_transport_disconnect(old->defrag->rpc->conn.trans, _gf_false); + glusterd_defrag_rpc_put(old->defrag); + } + + if (!gf_uuid_is_null(old->rebalance_id) && + gf_uuid_compare(old->rebalance_id, new->rebalance_id)) { + (void)gd_stop_rebalance_process(old_volinfo); + goto out; + } + + /* If the tasks match, copy the status and other information of the + * rebalance process from old_volinfo to new_volinfo + */ + new->defrag_status = old->defrag_status; + new->rebalance_files = old->rebalance_files; + new->rebalance_data = old->rebalance_data; + new->lookedup_files = old->lookedup_files; + new->skipped_files = old->skipped_files; + new->rebalance_failures = old->rebalance_failures; + new->rebalance_time = old->rebalance_time; + + /* glusterd_rebalance_t.{op, id, defrag_cmd} are copied during volume + * import a new defrag object should come to life with rebalance being + * restarted + */ +out: + return ret; +} + +static int32_t +glusterd_import_friend_volume(dict_t *peer_data, int count) +{ + int32_t ret = -1; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + glusterd_volinfo_t *old_volinfo = NULL; + glusterd_volinfo_t *new_volinfo = NULL; + glusterd_svc_t *svc = NULL; + int32_t update = 0; + char key[64] = ""; + + GF_ASSERT(peer_data); + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + ret = snprintf(key, sizeof(key), "volume%d.update", count); + ret = dict_get_int32n(peer_data, key, ret, &update); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=%s", key, NULL); + goto out; + } + + if (!update) { + /* if update is 0 that means the volume is not imported */ + gf_smsg(this->name, GF_LOG_INFO, 0, GD_MSG_VOLUME_NOT_IMPORTED, NULL); + goto out; + } + + ret = glusterd_import_volinfo(peer_data, count, &new_volinfo, "volume"); + if (ret) + goto out; + + if (!new_volinfo) { + gf_msg_debug(this->name, 0, "Not importing snap volume"); + goto out; + } + + ret = glusterd_volinfo_find(new_volinfo->volname, &old_volinfo); + if (0 == ret) { + if (new_volinfo->version <= old_volinfo->version) { + /* When this condition is true, it already means that + * the other synctask thread of import volume has + * already up to date volume, so just ignore this volume + * now + */ + goto out; + } + /* Ref count the old_volinfo such that deleting it doesn't crash + * if its been already in use by other thread + */ + glusterd_volinfo_ref(old_volinfo); + (void)gd_check_and_update_rebalance_info(old_volinfo, new_volinfo); + + /* Copy brick ports & real_path from the old volinfo always. + * The old_volinfo will be cleaned up and this information + * could be lost + */ + (void)glusterd_volinfo_copy_brickinfo(old_volinfo, new_volinfo); + + (void)glusterd_delete_stale_volume(old_volinfo, new_volinfo); + glusterd_volinfo_unref(old_volinfo); + } + + ret = glusterd_store_volinfo(new_volinfo, GLUSTERD_VOLINFO_VER_AC_NONE); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_STORE_FAIL, + "Failed to store " + "volinfo for volume %s", + new_volinfo->volname); + goto out; + } + + ret = glusterd_create_volfiles(new_volinfo); + if (ret) + goto out; + + glusterd_list_add_order(&new_volinfo->vol_list, &priv->volumes, + glusterd_compare_volume_name); + + if (glusterd_is_volume_started(new_volinfo)) { + (void)glusterd_start_bricks(new_volinfo); + if (glusterd_is_snapd_enabled(new_volinfo)) { + svc = &(new_volinfo->snapd.svc); + if (svc->manager(svc, new_volinfo, PROC_START_NO_WAIT)) { + gf_event(EVENT_SVC_MANAGER_FAILED, "svc_name=%s", svc->name); + } + } + svc = &(new_volinfo->shd.svc); + if (svc->manager(svc, new_volinfo, PROC_START_NO_WAIT)) { + gf_event(EVENT_SVC_MANAGER_FAILED, "svc_name=%s", svc->name); + } + } + + ret = glusterd_import_quota_conf(peer_data, count, new_volinfo, "volume"); + if (ret) { + gf_event(EVENT_IMPORT_QUOTA_CONF_FAILED, "volume=%s", + new_volinfo->volname); + goto out; + } + + ret = glusterd_fetchspec_notify(this); +out: + gf_msg_debug("glusterd", 0, "Returning with ret: %d", ret); + return ret; +} + +int32_t +glusterd_import_friend_volumes_synctask(void *opaque) +{ + int32_t ret = -1; + int32_t count = 0; + int i = 1; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + dict_t *peer_data = NULL; + glusterd_friend_synctask_args_t *arg = NULL; + + this = THIS; + GF_ASSERT(this); + + conf = this->private; + GF_ASSERT(conf); + + arg = opaque; + if (!arg) + goto out; + + peer_data = dict_new(); + if (!peer_data) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); + goto out; + } + + ret = dict_unserialize(arg->dict_buf, arg->dictlen, &peer_data); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_UNSERIALIZE_FAIL, + NULL); + errno = ENOMEM; + goto out; + } + + ret = dict_get_int32n(peer_data, "count", SLEN("count"), &count); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=count", NULL); + goto out; + } + + synclock_lock(&conf->big_lock); + + /* We need to ensure that importing a volume shouldn't race with an + * other thread where as part of restarting glusterd, bricks are + * restarted (refer glusterd_restart_bricks ()) + */ + while (conf->restart_bricks) { + synccond_wait(&conf->cond_restart_bricks, &conf->big_lock); + } + conf->restart_bricks = _gf_true; + + while (i <= count) { + ret = glusterd_import_friend_volume(peer_data, i); + if (ret) { + break; + } + i++; + } + if (i > count) { + glusterd_svcs_manager(NULL); + } + conf->restart_bricks = _gf_false; + synccond_broadcast(&conf->cond_restart_bricks); +out: + if (peer_data) + dict_unref(peer_data); + if (arg) { + if (arg->dict_buf) + GF_FREE(arg->dict_buf); + GF_FREE(arg); + } + + gf_msg_debug("glusterd", 0, "Returning with %d", ret); + return ret; +} + +int32_t +glusterd_import_friend_volumes(dict_t *peer_data) +{ + int32_t ret = -1; + int32_t count = 0; + int i = 1; + + GF_ASSERT(peer_data); + + ret = dict_get_int32n(peer_data, "count", SLEN("count"), &count); + if (ret) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=count", NULL); + goto out; + } + + while (i <= count) { + ret = glusterd_import_friend_volume(peer_data, i); if (ret) - goto out; + goto out; + i++; + } + +out: + gf_msg_debug("glusterd", 0, "Returning with %d", ret); + return ret; +} + +int +glusterd_get_global_server_quorum_ratio(dict_t *opts, double *quorum) +{ + int ret = -1; + char *quorum_str = NULL; + xlator_t *this = THIS; + GF_ASSERT(this); + + ret = dict_get_strn(opts, GLUSTERD_QUORUM_RATIO_KEY, + SLEN(GLUSTERD_QUORUM_RATIO_KEY), &quorum_str); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=%s", GLUSTERD_QUORUM_RATIO_KEY, NULL); + goto out; + } + + ret = gf_string2percent(quorum_str, quorum); + if (ret) + goto out; + ret = 0; +out: + return ret; +} + +int +glusterd_get_global_opt_version(dict_t *opts, uint32_t *version) +{ + int ret = -1; + char *version_str = NULL; + xlator_t *this = THIS; + GF_ASSERT(this); + + ret = dict_get_strn(opts, GLUSTERD_GLOBAL_OPT_VERSION, + SLEN(GLUSTERD_GLOBAL_OPT_VERSION), &version_str); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=%s", GLUSTERD_GLOBAL_OPT_VERSION, NULL); + goto out; + } + + ret = gf_string2uint(version_str, version); + if (ret) + goto out; + ret = 0; +out: + return ret; +} - strcpy (new_brickinfo->path, path); - strcpy (new_brickinfo->hostname, hostname); - //peerinfo might not be added yet - (void) glusterd_resolve_brick (new_brickinfo); +int +glusterd_get_next_global_opt_version_str(dict_t *opts, char **version_str) +{ + int ret = -1; + char version_string[64] = ""; + uint32_t version = 0; + + ret = glusterd_get_global_opt_version(opts, &version); + if (ret) + goto out; + version++; + snprintf(version_string, sizeof(version_string), "%" PRIu32, version); + *version_str = gf_strdup(version_string); + if (*version_str) ret = 0; - *brickinfo = new_brickinfo; out: - if (msg[0]) - gf_log ("glusterd", GF_LOG_ERROR, "%s", msg); - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); - return ret; + return ret; } int32_t -glusterd_import_bricks (dict_t *vols, int32_t vol_count, - glusterd_volinfo_t *new_volinfo) +glusterd_import_global_opts(dict_t *friend_data) { - int ret = -1; - int brick_count = 1; - glusterd_brickinfo_t *new_brickinfo = NULL; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + int ret = -1; + dict_t *import_options = NULL; + int count = 0; + uint32_t local_version = 0; + uint32_t remote_version = 0; + double old_quorum = 0.0; + double new_quorum = 0.0; + + this = THIS; + conf = this->private; + + ret = dict_get_int32n(friend_data, "global-opt-count", + SLEN("global-opt-count"), &count); + if (ret) { + // old version peer + gf_smsg(this->name, GF_LOG_INFO, errno, GD_MSG_DICT_GET_FAILED, + "Key=global-opt-count", NULL); + ret = 0; + goto out; + } + + import_options = dict_new(); + if (!import_options) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); + goto out; + } + ret = import_prdict_dict(friend_data, import_options, "key", "val", count, + "global"); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLOBAL_OPT_IMPORT_FAIL, + "Failed to import" + " global options"); + goto out; + } + + /* Not handling ret since server-quorum-ratio might not yet be set */ + ret = glusterd_get_global_server_quorum_ratio(conf->opts, &old_quorum); + ret = glusterd_get_global_server_quorum_ratio(import_options, &new_quorum); + + ret = glusterd_get_global_opt_version(conf->opts, &local_version); + if (ret) + goto out; + ret = glusterd_get_global_opt_version(import_options, &remote_version); + if (ret) + goto out; + + if (remote_version > local_version) { + ret = glusterd_store_options(this, import_options); + if (ret) + goto out; + dict_unref(conf->opts); + conf->opts = dict_ref(import_options); - GF_ASSERT (vols); - GF_ASSERT (vol_count >= 0); - GF_ASSERT (new_volinfo); - while (brick_count <= new_volinfo->brick_count) { + /* If server quorum ratio has changed, restart bricks to + * recompute if quorum is met. If quorum is not met bricks are + * not started and those already running are stopped + */ + if (old_quorum != new_quorum) { + glusterd_launch_synctask(glusterd_restart_bricks, NULL); + } + } - ret = glusterd_import_new_brick (vols, vol_count, brick_count, - &new_brickinfo); - if (ret) - goto out; - list_add_tail (&new_brickinfo->brick_list, &new_volinfo->bricks); - brick_count++; + ret = 0; +out: + if (import_options) + dict_unref(import_options); + return ret; +} + +int32_t +glusterd_compare_friend_data(dict_t *peer_data, int32_t *status, char *hostname) +{ + int32_t ret = -1; + int32_t count = 0; + int i = 1; + gf_boolean_t update = _gf_false; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + glusterd_friend_synctask_args_t *arg = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(peer_data); + GF_ASSERT(status); + + priv = this->private; + GF_ASSERT(priv); + ret = glusterd_import_global_opts(peer_data); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLOBAL_OPT_IMPORT_FAIL, + "Importing global " + "options failed"); + goto out; + } + + ret = dict_get_int32n(peer_data, "count", SLEN("count"), &count); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=count", NULL); + goto out; + } + + while (i <= count) { + ret = glusterd_compare_friend_volume(peer_data, i, status, hostname); + if (ret) + goto out; + + if (GLUSTERD_VOL_COMP_RJT == *status) { + ret = 0; + goto out; } - ret = 0; + if (GLUSTERD_VOL_COMP_UPDATE_REQ == *status) { + update = _gf_true; + } + i++; + } + + if (update) { + /* Launch the import friend volume as a separate synctask as it + * has to trigger start bricks where we may need to wait for the + * first brick to come up before attaching the subsequent bricks + * in case brick multiplexing is enabled + */ + arg = GF_CALLOC(1, sizeof(*arg), gf_common_mt_char); + ret = dict_allocate_and_serialize(peer_data, &arg->dict_buf, + &arg->dictlen); + if (ret < 0) { + gf_log(this->name, GF_LOG_ERROR, + "dict_serialize failed while handling " + " import friend volume request"); + goto out; + } + + glusterd_launch_synctask(glusterd_import_friend_volumes_synctask, arg); + } + out: - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); - return ret; + if (ret && arg) { + GF_FREE(arg); + } + gf_msg_debug(this->name, 0, "Returning with ret: %d, status: %d", ret, + *status); + return ret; +} + +struct rpc_clnt * +glusterd_defrag_rpc_get(glusterd_defrag_info_t *defrag) +{ + struct rpc_clnt *rpc = NULL; + + if (!defrag) + return NULL; + + LOCK(&defrag->lock); + { + rpc = rpc_clnt_ref(defrag->rpc); + } + UNLOCK(&defrag->lock); + return rpc; +} + +struct rpc_clnt * +glusterd_defrag_rpc_put(glusterd_defrag_info_t *defrag) +{ + struct rpc_clnt *rpc = NULL; + + if (!defrag) + return NULL; + + LOCK(&defrag->lock); + { + rpc = rpc_clnt_unref(defrag->rpc); + defrag->rpc = rpc; + } + UNLOCK(&defrag->lock); + return rpc; +} + +struct rpc_clnt * +glusterd_pending_node_get_rpc(glusterd_pending_node_t *pending_node) +{ + struct rpc_clnt *rpc = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_svc_t *svc = NULL; + + GF_VALIDATE_OR_GOTO(THIS->name, pending_node, out); + GF_VALIDATE_OR_GOTO(THIS->name, pending_node->node, out); + + if (pending_node->type == GD_NODE_BRICK) { + brickinfo = pending_node->node; + rpc = brickinfo->rpc; + + } else if (pending_node->type == GD_NODE_SHD || + pending_node->type == GD_NODE_NFS || + pending_node->type == GD_NODE_QUOTAD || + pending_node->type == GD_NODE_SCRUB) { + svc = pending_node->node; + rpc = svc->conn.rpc; + } else if (pending_node->type == GD_NODE_REBALANCE) { + volinfo = pending_node->node; + rpc = glusterd_defrag_rpc_get(volinfo->rebal.defrag); + + } else if (pending_node->type == GD_NODE_SNAPD) { + volinfo = pending_node->node; + rpc = volinfo->snapd.svc.conn.rpc; + } else { + GF_ASSERT(0); + } + +out: + return rpc; +} + +void +glusterd_pending_node_put_rpc(glusterd_pending_node_t *pending_node) +{ + glusterd_volinfo_t *volinfo = NULL; + + switch (pending_node->type) { + case GD_NODE_REBALANCE: + volinfo = pending_node->node; + glusterd_defrag_rpc_put(volinfo->rebal.defrag); + break; + + default: + break; + } } int32_t -glusterd_import_volinfo (dict_t *vols, int count, - glusterd_volinfo_t **volinfo) +glusterd_unlink_file(char *sockfpath) { - int ret = -1; - char key[256] = {0}; - char *volname = NULL; - glusterd_volinfo_t *new_volinfo = NULL; - char *volume_id_str = NULL; - char msg[2048] = {0}; + int ret = 0; - GF_ASSERT (vols); - GF_ASSERT (volinfo); + ret = sys_unlink(sockfpath); + if (ret) { + if (ENOENT == errno) + ret = 0; + else + gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "Failed to remove %s" + " error: %s", + sockfpath, strerror(errno)); + } - snprintf (key, sizeof (key), "volume%d.name", count); - ret = dict_get_str (vols, key, &volname); - if (ret) { - snprintf (msg, sizeof (msg), "%s missing in payload", key); - goto out; - } + return ret; +} - ret = glusterd_volinfo_new (&new_volinfo); - if (ret) +void +glusterd_nfs_pmap_deregister() +{ + if (pmap_unset(MOUNT_PROGRAM, MOUNTV3_VERSION)) + gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_DEREGISTER_SUCCESS, + "De-registered MOUNTV3 successfully"); + else + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_PMAP_UNSET_FAIL, + "De-register MOUNTV3 is unsuccessful"); + + if (pmap_unset(MOUNT_PROGRAM, MOUNTV1_VERSION)) + gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_DEREGISTER_SUCCESS, + "De-registered MOUNTV1 successfully"); + else + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_PMAP_UNSET_FAIL, + "De-register MOUNTV1 is unsuccessful"); + + if (pmap_unset(NFS_PROGRAM, NFSV3_VERSION)) + gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_DEREGISTER_SUCCESS, + "De-registered NFSV3 successfully"); + else + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_PMAP_UNSET_FAIL, + "De-register NFSV3 is unsuccessful"); + + if (pmap_unset(NLM_PROGRAM, NLMV4_VERSION)) + gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_DEREGISTER_SUCCESS, + "De-registered NLM v4 successfully"); + else + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_PMAP_UNSET_FAIL, + "De-registration of NLM v4 failed"); + + if (pmap_unset(NLM_PROGRAM, NLMV1_VERSION)) + gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_DEREGISTER_SUCCESS, + "De-registered NLM v1 successfully"); + else + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_PMAP_UNSET_FAIL, + "De-registration of NLM v1 failed"); + + if (pmap_unset(ACL_PROGRAM, ACLV3_VERSION)) + gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_DEREGISTER_SUCCESS, + "De-registered ACL v3 successfully"); + else + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_PMAP_UNSET_FAIL, + "De-registration of ACL v3 failed"); +} + +int +glusterd_add_node_to_dict(char *server, dict_t *dict, int count, + dict_t *vol_opts) +{ + int ret = -1; + char pidfile[PATH_MAX] = ""; + gf_boolean_t running = _gf_false; + int pid = -1; + int port = 0; + glusterd_svc_t *svc = NULL; + char key[64] = ""; + int keylen; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + GF_ASSERT(priv); + + if (!strcmp(server, "")) { + ret = 0; + goto out; + } + + glusterd_svc_build_pidfile_path(server, priv->rundir, pidfile, + sizeof(pidfile)); + + if (strcmp(server, priv->quotad_svc.name) == 0) + svc = &(priv->quotad_svc); +#ifdef BUILD_GNFS + else if (strcmp(server, priv->nfs_svc.name) == 0) + svc = &(priv->nfs_svc); +#endif + else if (strcmp(server, priv->bitd_svc.name) == 0) + svc = &(priv->bitd_svc); + else if (strcmp(server, priv->scrub_svc.name) == 0) + svc = &(priv->scrub_svc); + else { + ret = 0; + goto out; + } + + // Consider service to be running only when glusterd sees it Online + if (svc->online) + running = gf_is_service_running(pidfile, &pid); + + /* For nfs-servers/self-heal-daemon setting + * brick<n>.hostname = "NFS Server" / "Self-heal Daemon" + * brick<n>.path = uuid + * brick<n>.port = 0 + * + * This might be confusing, but cli displays the name of + * the brick as hostname+path, so this will make more sense + * when output. + */ + + keylen = snprintf(key, sizeof(key), "brick%d.hostname", count); + if (!strcmp(server, priv->quotad_svc.name)) + ret = dict_set_nstrn(dict, key, keylen, "Quota Daemon", + SLEN("Quota Daemon")); +#ifdef BUILD_GNFS + else if (!strcmp(server, priv->nfs_svc.name)) + ret = dict_set_nstrn(dict, key, keylen, "NFS Server", + SLEN("NFS Server")); +#endif + else if (!strcmp(server, priv->bitd_svc.name)) + ret = dict_set_nstrn(dict, key, keylen, "Bitrot Daemon", + SLEN("Bitrot Daemon")); + else if (!strcmp(server, priv->scrub_svc.name)) + ret = dict_set_nstrn(dict, key, keylen, "Scrubber Daemon", + SLEN("Scrubber Daemon")); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + goto out; + } + + keylen = snprintf(key, sizeof(key), "brick%d.path", count); + ret = dict_set_dynstrn(dict, key, keylen, gf_strdup(uuid_utoa(MY_UUID))); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + goto out; + } + +#ifdef BUILD_GNFS + /* Port is available only for the NFS server. + * Self-heal daemon doesn't provide any port for access + * by entities other than gluster. + */ + if (!strcmp(server, priv->nfs_svc.name)) { + if (dict_getn(vol_opts, "nfs.port", SLEN("nfs.port"))) { + ret = dict_get_int32n(vol_opts, "nfs.port", SLEN("nfs.port"), + &port); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=nfs.port", NULL); goto out; - strncpy (new_volinfo->volname, volname, sizeof (new_volinfo->volname)); + } + } else + port = GF_NFS3_PORT; + } +#endif + keylen = snprintf(key, sizeof(key), "brick%d.port", count); + ret = dict_set_int32n(dict, key, keylen, port); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + goto out; + } + + keylen = snprintf(key, sizeof(key), "brick%d.pid", count); + ret = dict_set_int32n(dict, key, keylen, pid); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + goto out; + } + + keylen = snprintf(key, sizeof(key), "brick%d.status", count); + ret = dict_set_int32n(dict, key, keylen, running); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + goto out; + } + +out: + gf_msg_debug(THIS->name, 0, "Returning %d", ret); + return ret; +} +int +glusterd_remote_hostname_get(rpcsvc_request_t *req, char *remote_host, int len) +{ + GF_ASSERT(req); + GF_ASSERT(remote_host); + GF_ASSERT(req->trans); + + char *name = NULL; + char *hostname = NULL; + char *tmp_host = NULL; + char *canon = NULL; + int ret = 0; + + name = req->trans->peerinfo.identifier; + tmp_host = gf_strdup(name); + if (tmp_host) + get_host_name(tmp_host, &hostname); + + GF_ASSERT(hostname); + if (!hostname) { + memset(remote_host, 0, len); + ret = -1; + goto out; + } + + if ((gf_get_hostname_from_ip(hostname, &canon) == 0) && canon) { + GF_FREE(tmp_host); + tmp_host = hostname = canon; + } + + (void)snprintf(remote_host, len, "%s", hostname); - memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.type", count); - ret = dict_get_int32 (vols, key, &new_volinfo->type); +out: + GF_FREE(tmp_host); + return ret; +} + +gf_boolean_t +glusterd_are_all_volumes_stopped() +{ + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + glusterd_volinfo_t *voliter = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + cds_list_for_each_entry(voliter, &priv->volumes, vol_list) + { + if (voliter->status == GLUSTERD_STATUS_STARTED) + return _gf_false; + } + + return _gf_true; +} + +gf_boolean_t +glusterd_all_shd_compatible_volumes_stopped() +{ + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + glusterd_volinfo_t *voliter = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + cds_list_for_each_entry(voliter, &priv->volumes, vol_list) + { + if (!glusterd_is_shd_compatible_volume(voliter)) + continue; + if (voliter->status == GLUSTERD_STATUS_STARTED) + return _gf_false; + } + + return _gf_true; +} + +gf_boolean_t +glusterd_all_volumes_with_quota_stopped() +{ + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + glusterd_volinfo_t *voliter = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + cds_list_for_each_entry(voliter, &priv->volumes, vol_list) + { + if (!glusterd_is_volume_quota_enabled(voliter)) + continue; + if (voliter->status == GLUSTERD_STATUS_STARTED) + return _gf_false; + } + + return _gf_true; +} + +gf_boolean_t +glusterd_have_volumes() +{ + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + gf_boolean_t volumes_exist = _gf_false; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", (this != NULL), out); + + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, (priv != NULL), out); + + volumes_exist = !cds_list_empty(&priv->volumes); +out: + return volumes_exist; +} + +int +glusterd_volume_count_get(void) +{ + glusterd_volinfo_t *tmp_volinfo = NULL; + int32_t ret = 0; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + + cds_list_for_each_entry(tmp_volinfo, &priv->volumes, vol_list) { ret++; } + + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; +} + +int +glusterd_brickinfo_get(uuid_t uuid, char *hostname, char *path, + glusterd_brickinfo_t **brickinfo) +{ + glusterd_volinfo_t *volinfo = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + int ret = -1; + + GF_ASSERT(path); + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + + cds_list_for_each_entry(volinfo, &priv->volumes, vol_list) + { + ret = glusterd_volume_brickinfo_get(uuid, hostname, path, volinfo, + brickinfo); + if (ret == 0) + /*Found*/ + goto out; + } +out: + return ret; +} + +static int32_t +my_callback(struct rpc_req *req, struct iovec *iov, int count, void *v_frame) +{ + call_frame_t *frame = v_frame; + glusterd_conf_t *conf = frame->this->private; + + if (GF_ATOMIC_DEC(conf->blockers) == 0) { + synccond_broadcast(&conf->cond_blockers); + } + + STACK_DESTROY(frame->root); + return 0; +} + +static int32_t +attach_brick_callback(struct rpc_req *req, struct iovec *iov, int count, + void *v_frame) +{ + call_frame_t *frame = v_frame; + glusterd_conf_t *conf = frame->this->private; + glusterd_brickinfo_t *brickinfo = frame->local; + glusterd_brickinfo_t *other_brick = frame->cookie; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = THIS; + int ret = -1; + char pidfile1[PATH_MAX] = ""; + char pidfile2[PATH_MAX] = ""; + gf_getspec_rsp rsp = { + 0, + }; + int last_brick = -1; + + frame->local = NULL; + frame->cookie = NULL; + + if (!iov) { + gf_log(frame->this->name, GF_LOG_ERROR, "iov is NULL"); + ret = -1; + goto out; + } + + ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_getspec_rsp); + if (ret < 0) { + gf_log(frame->this->name, GF_LOG_ERROR, "XDR decoding error"); + ret = -1; + goto out; + } + + ret = glusterd_get_volinfo_from_brick(other_brick->path, &volinfo); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, + "Failed to get volinfo" + " from brick(%s) so pidfile copying/unlink will fail", + other_brick->path); + goto out; + } + GLUSTERD_GET_BRICK_PIDFILE(pidfile1, volinfo, other_brick, conf); + volinfo = NULL; + + ret = glusterd_get_volinfo_from_brick(brickinfo->path, &volinfo); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, + "Failed to get volinfo" + " from brick(%s) so pidfile copying/unlink will fail", + brickinfo->path); + goto out; + } + GLUSTERD_GET_BRICK_PIDFILE(pidfile2, volinfo, brickinfo, conf); + + if (rsp.op_ret == 0) { + brickinfo->port_registered = _gf_true; + + /* PID file is copied once brick has attached + successfully + */ + ret = glusterd_copy_file(pidfile1, pidfile2); if (ret) { - snprintf (msg, sizeof (msg), "%s missing in payload for %s", - key, volname); - goto out; + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "Could not copy file %s to %s", pidfile1, pidfile2); + goto out; } - memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.brick_count", count); - ret = dict_get_int32 (vols, key, &new_volinfo->brick_count); + brickinfo->status = GF_BRICK_STARTED; + brickinfo->rpc = rpc_clnt_ref(other_brick->rpc); + gf_log(THIS->name, GF_LOG_INFO, "brick %s is attached successfully", + brickinfo->path); + } else { + gf_log(THIS->name, GF_LOG_INFO, + "attach_brick failed pidfile" + " is %s for brick_path %s", + pidfile2, brickinfo->path); + brickinfo->port = 0; + brickinfo->status = GF_BRICK_STOPPED; + ret = glusterd_brick_process_remove_brick(brickinfo, &last_brick); + if (ret) + gf_msg_debug(this->name, 0, + "Couldn't remove brick from" + " brick process"); + LOCK(&volinfo->lock); + ret = glusterd_store_volinfo(volinfo, GLUSTERD_VOLINFO_VER_AC_NONE); + UNLOCK(&volinfo->lock); if (ret) { - snprintf (msg, sizeof (msg), "%s missing in payload for %s", - key, volname); - goto out; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_SET_FAIL, + "Failed to store volinfo of " + "%s volume", + volinfo->volname); + goto out; } + } +out: + if (GF_ATOMIC_DEC(conf->blockers) == 0) { + synccond_broadcast(&conf->cond_blockers); + } + STACK_DESTROY(frame->root); + return 0; +} - memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.version", count); - ret = dict_get_int32 (vols, key, &new_volinfo->version); - if (ret) { - snprintf (msg, sizeof (msg), "%s missing in payload for %s", - key, volname); - goto out; +int +send_attach_req(xlator_t *this, struct rpc_clnt *rpc, char *path, + glusterd_brickinfo_t *brickinfo, + glusterd_brickinfo_t *other_brick, int op) +{ + int ret = -1; + struct iobuf *iobuf = NULL; + struct iobref *iobref = NULL; + struct iovec iov = { + 0, + }; + ssize_t req_size = 0; + call_frame_t *frame = NULL; + gd1_mgmt_brick_op_req brick_req; + void *req = &brick_req; + void *errlbl = &&err; + struct rpc_clnt_connection *conn; + glusterd_conf_t *conf = this->private; + extern struct rpc_clnt_program gd_brick_prog; + fop_cbk_fn_t cbkfn = my_callback; + + if (!rpc) { + gf_log(this->name, GF_LOG_ERROR, "called with null rpc"); + return -1; + } + + conn = &rpc->conn; + if (!conn->connected || conn->disconnected) { + gf_log(this->name, GF_LOG_INFO, "not connected yet"); + return -1; + } + + brick_req.op = op; + brick_req.name = path; + brick_req.input.input_val = NULL; + brick_req.input.input_len = 0; + brick_req.dict.dict_val = NULL; + brick_req.dict.dict_len = 0; + + req_size = xdr_sizeof((xdrproc_t)xdr_gd1_mgmt_brick_op_req, req); + iobuf = iobuf_get2(rpc->ctx->iobuf_pool, req_size); + if (!iobuf) { + goto *errlbl; + } + errlbl = &&maybe_free_iobuf; + + iov.iov_base = iobuf->ptr; + iov.iov_len = iobuf_pagesize(iobuf); + + iobref = iobref_new(); + if (!iobref) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); + goto *errlbl; + } + errlbl = &&free_iobref; + + frame = create_frame(this, this->ctx->pool); + if (!frame) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_FRAME_CREATE_FAIL, + NULL); + goto *errlbl; + } + + iobref_add(iobref, iobuf); + /* + * Drop our reference to the iobuf. The iobref should already have + * one after iobref_add, so when we unref that we'll free the iobuf as + * well. This allows us to pass just the iobref as frame->local. + */ + iobuf_unref(iobuf); + /* Set the pointer to null so we don't free it on a later error. */ + iobuf = NULL; + + /* Create the xdr payload */ + ret = xdr_serialize_generic(iov, req, (xdrproc_t)xdr_gd1_mgmt_brick_op_req); + if (ret == -1) { + goto *errlbl; + } + + iov.iov_len = ret; + + if (op == GLUSTERD_BRICK_ATTACH) { + frame->local = brickinfo; + frame->cookie = other_brick; + cbkfn = attach_brick_callback; + } + /* Send the msg */ + GF_ATOMIC_INC(conf->blockers); + ret = rpc_clnt_submit(rpc, &gd_brick_prog, op, cbkfn, &iov, 1, NULL, 0, + iobref, frame, NULL, 0, NULL, 0, NULL); + +free_iobref: + iobref_unref(iobref); +maybe_free_iobuf: + if (iobuf) { + iobuf_unref(iobuf); + } +err: + return ret; +} + +extern size_t +build_volfile_path(char *volume_id, char *path, size_t path_len, + char *trusted_str, dict_t *dict); + +static int +attach_brick(xlator_t *this, glusterd_brickinfo_t *brickinfo, + glusterd_brickinfo_t *other_brick, glusterd_volinfo_t *volinfo, + glusterd_volinfo_t *other_vol) +{ + glusterd_conf_t *conf = this->private; + char unslashed[PATH_MAX] = { + '\0', + }; + char full_id[PATH_MAX] = { + '\0', + }; + char path[PATH_MAX] = { + '\0', + }; + int ret = -1; + int tries; + rpc_clnt_t *rpc; + int32_t len; + + gf_log(this->name, GF_LOG_INFO, "add brick %s to existing process for %s", + brickinfo->path, other_brick->path); + + GLUSTERD_REMOVE_SLASH_FROM_PATH(brickinfo->path, unslashed); + + if (volinfo->is_snap_volume) { + len = snprintf(full_id, sizeof(full_id), "/%s/%s/%s/%s.%s.%s", + GLUSTERD_VOL_SNAP_DIR_PREFIX, + volinfo->snapshot->snapname, volinfo->volname, + volinfo->volname, brickinfo->hostname, unslashed); + } else { + len = snprintf(full_id, sizeof(full_id), "%s.%s.%s", volinfo->volname, + brickinfo->hostname, unslashed); + } + if ((len < 0) || (len >= sizeof(full_id))) { + goto out; + } + + (void)build_volfile_path(full_id, path, sizeof(path), NULL, NULL); + + for (tries = 15; tries > 0; --tries) { + rpc = rpc_clnt_ref(other_brick->rpc); + if (rpc) { + ret = send_attach_req(this, rpc, path, brickinfo, other_brick, + GLUSTERD_BRICK_ATTACH); + rpc_clnt_unref(rpc); + if (!ret) { + ret = pmap_registry_extend(this, other_brick->port, + brickinfo->path); + if (ret != 0) { + gf_log(this->name, GF_LOG_ERROR, + "adding brick to process failed"); + goto out; + } + brickinfo->port = other_brick->port; + ret = glusterd_brick_process_add_brick(brickinfo, other_brick); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_BRICKPROC_ADD_BRICK_FAILED, + "Adding brick %s:%s to brick " + "process failed", + brickinfo->hostname, brickinfo->path); + return ret; + } + return 0; + } } + /* + * It might not actually be safe to manipulate the lock + * like this, but if we don't then the connection can + * never actually complete and retries are useless. + * Unfortunately, all of the alternatives (e.g. doing + * all of this in a separate thread) are much more + * complicated and risky. + * TBD: see if there's a better way + */ + synclock_unlock(&conf->big_lock); + synctask_sleep(1); + synclock_lock(&conf->big_lock); + } - memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.status", count); - ret = dict_get_int32 (vols, key, (int32_t *)&new_volinfo->status); - if (ret) { - snprintf (msg, sizeof (msg), "%s missing in payload for %s", - key, volname); - goto out; +out: + gf_log(this->name, GF_LOG_WARNING, "attach failed for %s", brickinfo->path); + return ret; +} + +/* This name was just getting too long, hence the abbreviations. */ +static glusterd_brickinfo_t * +find_compat_brick_in_vol(glusterd_conf_t *conf, + glusterd_volinfo_t *srch_vol, /* volume to search */ + glusterd_volinfo_t *comp_vol, /* volume to compare */ + glusterd_brickinfo_t *brickinfo) +{ + xlator_t *this = THIS; + glusterd_brickinfo_t *other_brick = NULL; + glusterd_brick_proc_t *brick_proc = NULL; + char pidfile2[PATH_MAX] = ""; + int32_t pid2 = -1; + int16_t retries = 15; + int mux_limit = -1; + int ret = -1; + gf_boolean_t brick_status = _gf_false; + gf_boolean_t is_shared_storage = _gf_false; + + /* + * If comp_vol is provided, we have to check *volume* compatibility + * before we can check *brick* compatibility. + */ + if (comp_vol) { + /* + * We should not attach bricks of a normal volume to bricks + * of shared storage volume. + */ + if (!strcmp(srch_vol->volname, GLUSTER_SHARED_STORAGE)) + is_shared_storage = _gf_true; + + if (!strcmp(comp_vol->volname, GLUSTER_SHARED_STORAGE)) { + if (!is_shared_storage) + return NULL; + } else if (is_shared_storage) + return NULL; + + /* + * It's kind of a shame that we have to do this check in both + * directions, but an option might only exist on one of the two + * dictionaries and dict_foreach_match will only find that one. + */ + + gf_log(THIS->name, GF_LOG_DEBUG, "comparing options for %s and %s", + comp_vol->volname, srch_vol->volname); + + if (dict_foreach_match(comp_vol->dict, unsafe_option, NULL, + opts_mismatch, srch_vol->dict) < 0) { + gf_log(THIS->name, GF_LOG_DEBUG, "failure forward"); + return NULL; } - memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.sub_count", count); - ret = dict_get_int32 (vols, key, &new_volinfo->sub_count); - if (ret) { - snprintf (msg, sizeof (msg), "%s missing in payload for %s", - key, volname); - goto out; + if (dict_foreach_match(srch_vol->dict, unsafe_option, NULL, + opts_mismatch, comp_vol->dict) < 0) { + gf_log(THIS->name, GF_LOG_DEBUG, "failure backward"); + return NULL; } - memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.ckusm", count); - ret = dict_get_uint32 (vols, key, &new_volinfo->cksum); - if (ret) { - snprintf (msg, sizeof (msg), "%s missing in payload for %s", - key, volname); - goto out; + gf_log(THIS->name, GF_LOG_DEBUG, "all options match"); + } + + ret = get_mux_limit_per_process(&mux_limit); + if (ret) { + gf_msg_debug(THIS->name, 0, + "Retrieving brick mux " + "limit failed. Returning NULL"); + return NULL; + } + + cds_list_for_each_entry(other_brick, &srch_vol->bricks, brick_list) + { + if (other_brick == brickinfo) { + continue; + } + if (gf_uuid_compare(brickinfo->uuid, other_brick->uuid)) { + continue; + } + if (other_brick->status != GF_BRICK_STARTED && + other_brick->status != GF_BRICK_STARTING) { + continue; } - memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.volume_id", count); - ret = dict_get_str (vols, key, &volume_id_str); + ret = glusterd_brick_proc_for_port(other_brick->port, &brick_proc); if (ret) { - snprintf (msg, sizeof (msg), "%s missing in payload for %s", - key, volname); - goto out; + gf_msg_debug(THIS->name, 0, + "Couldn't get brick " + "process corresponding to brick %s:%s", + other_brick->hostname, other_brick->path); + continue; } - memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.transport_type", count); - ret = dict_get_uint32 (vols, key, &new_volinfo->transport_type); - if (ret) { - snprintf (msg, sizeof (msg), "%s missing in payload for %s", - key, volname); - goto out; + if (mux_limit != 0) { + if (brick_proc->brick_count >= mux_limit) + continue; + } else { + /* This means that the "cluster.max-bricks-per-process" + * options hasn't yet been explicitly set. Continue + * as if there's no limit set + */ + gf_msg(THIS->name, GF_LOG_WARNING, 0, GD_MSG_NO_MUX_LIMIT, + "cluster.max-bricks-per-process options isn't " + "set. Continuing with no limit set for " + "brick multiplexing."); + } + /* The first brick process might take some time to finish its + * handshake with glusterd and prepare the graph. We can't + * afford to send attach_req for other bricks till that time. + * brick process sends PMAP_SIGNIN event after processing the + * volfile and hence it's safe to assume that if glusterd has + * received a pmap signin request for the same brick, we are + * good for subsequent attach requests. + */ + retries = 15; + while (retries > 0) { + if (other_brick->port_registered) { + GLUSTERD_GET_BRICK_PIDFILE(pidfile2, srch_vol, other_brick, + conf); + if (sys_access(pidfile2, F_OK) == 0 && + gf_is_service_running(pidfile2, &pid2)) { + gf_msg_debug(this->name, 0, + "brick %s is running as a pid %d ", + other_brick->path, pid2); + brick_status = _gf_true; + break; + } + } + + synclock_unlock(&conf->big_lock); + gf_msg_debug(this->name, 0, + "brick %s is still" + " starting, waiting for 2 seconds ", + other_brick->path); + synctask_sleep(2); + synclock_lock(&conf->big_lock); + retries--; } - uuid_parse (volume_id_str, new_volinfo->volume_id); + if (!brick_status) { + gf_log(this->name, GF_LOG_INFO, + "brick has not come up so cleaning up dead brick %s:%s", + other_brick->hostname, other_brick->path); + other_brick->status = GF_BRICK_STOPPED; + if (pidfile2[0]) + sys_unlink(pidfile2); + continue; + } + return other_brick; + } - ret = glusterd_import_friend_volume_opts (vols, count, new_volinfo); - if (ret) - goto out; - ret = glusterd_import_bricks (vols, count, new_volinfo); - if (ret) - goto out; - *volinfo = new_volinfo; -out: - if (msg[0]) - gf_log ("glusterd", GF_LOG_ERROR, "%s", msg); - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); - return ret; + return NULL; } -int32_t -glusterd_volume_disconnect_all_bricks (glusterd_volinfo_t *volinfo) -{ - int ret = 0; - glusterd_brickinfo_t *brickinfo = NULL; - GF_ASSERT (volinfo); - - list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { - if (glusterd_is_brick_started (brickinfo)) { - ret = glusterd_brick_disconnect (brickinfo); - if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, "Failed to " - "disconnect %s:%s", brickinfo->hostname, - brickinfo->path); - break; - } +static glusterd_brickinfo_t * +find_compatible_brick(glusterd_conf_t *conf, glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo, + glusterd_volinfo_t **other_vol_p) +{ + glusterd_brickinfo_t *other_brick = NULL; + glusterd_volinfo_t *other_vol = NULL; + glusterd_snap_t *snap = NULL; + + /* Just return NULL here if multiplexing is disabled. */ + if (!is_brick_mx_enabled()) { + return NULL; + } + + other_brick = find_compat_brick_in_vol(conf, volinfo, NULL, brickinfo); + if (other_brick) { + *other_vol_p = volinfo; + return other_brick; + } + + /* + * This check is necessary because changes to a volume's + * transport options aren't propagated to snapshots. Such a + * change might break compatibility between the two, but we + * have no way to "evict" a brick from the process it's + * currently in. If we keep it separate from the start, we + * avoid the problem. Note that snapshot bricks can still be + * colocated with one another, even if they're for different + * volumes, because the only thing likely to differ is their + * auth options and those are not a factor in determining + * compatibility. + * + * The very same immutability of snapshot bricks' transport + * options, which can make them incompatible with their parent + * volumes, ensures that once-compatible snapshot bricks will + * remain compatible. However, the same is not true for bricks + * belonging to two non-snapshot volumes. In that case, a + * change to one might break compatibility and require them to + * be separated, which is not yet done. + * + * TBD: address the option-change issue for non-snapshot bricks + */ + if (!volinfo->is_snap_volume) { + cds_list_for_each_entry(other_vol, &conf->volumes, vol_list) + { + if (other_vol == volinfo) { + continue; + } + other_brick = find_compat_brick_in_vol(conf, other_vol, volinfo, + brickinfo); + if (other_brick) { + *other_vol_p = other_vol; + return other_brick; + } + } + } else { + cds_list_for_each_entry(snap, &conf->snapshots, snap_list) + { + cds_list_for_each_entry(other_vol, &snap->volumes, vol_list) + { + if (other_vol == volinfo) { + continue; + } + other_brick = find_compat_brick_in_vol(conf, other_vol, volinfo, + brickinfo); + if (other_brick) { + *other_vol_p = other_vol; + return other_brick; } + } + } + } + + return NULL; +} + +/* Below function is use to populate sockpath based on passed pid + value as a argument after check the value from proc and also + check if passed pid is match with running glusterfs process +*/ + +int +glusterd_get_sock_from_brick_pid(int pid, char *sockpath, size_t len) +{ + char buf[1024] = ""; + char cmdline[2048] = ""; + xlator_t *this = NULL; + int fd = -1; + int i = 0, j = 0; + char *ptr = NULL; + char *brptr = NULL; + char tmpsockpath[PATH_MAX] = ""; + size_t blen = 0; + int ret = -1; + + this = THIS; + GF_ASSERT(this); + +#ifdef __FreeBSD__ + blen = sizeof(buf); + int mib[4]; + + mib[0] = CTL_KERN; + mib[1] = KERN_PROC; + mib[2] = KERN_PROC_ARGS; + mib[3] = pid; + + if (sys_sysctl(mib, 4, buf, &blen, NULL, blen) != 0) { + gf_log(this->name, GF_LOG_ERROR, "brick process %d is not running", + pid); + return ret; + } +#else + char fname[128] = ""; + snprintf(fname, sizeof(fname), "/proc/%d/cmdline", pid); + + if (sys_access(fname, R_OK) != 0) { + gf_log(this->name, GF_LOG_ERROR, "brick process %d is not running", + pid); + return ret; + } + + fd = open(fname, O_RDONLY); + if (fd != -1) { + blen = (int)sys_read(fd, buf, 1024); + } else { + gf_log(this->name, GF_LOG_ERROR, "open failed %s to open a file %s", + strerror(errno), fname); + return ret; + } +#endif + + /* convert cmdline to single string */ + for (i = 0, j = 0; i < blen; i++) { + if (buf[i] == '\0') + cmdline[j++] = ' '; + else if (buf[i] < 32 || buf[i] > 126) /* remove control char */ + continue; + else if (buf[i] == '"' || buf[i] == '\\') { + cmdline[j++] = '\\'; + cmdline[j++] = buf[i]; + } else { + cmdline[j++] = buf[i]; } + } + cmdline[j] = '\0'; + if (fd) + sys_close(fd); + if (!strstr(cmdline, "glusterfs")) + return ret; + ptr = strstr(cmdline, "-S "); + if (!ptr) + return ret; + ptr = strchr(ptr, '/'); + if (!ptr) return ret; + brptr = strstr(ptr, "--brick-name"); + if (!brptr) + return ret; + i = 0; + + while (ptr < brptr) { + if (*ptr != 32) + tmpsockpath[i++] = *ptr; + ptr++; + } + + if (tmpsockpath[0]) { + strncpy(sockpath, tmpsockpath, i); + ret = 0; + } + + return ret; } -int32_t -glusterd_volinfo_copy_brick_portinfo (glusterd_volinfo_t *new_volinfo, - glusterd_volinfo_t *old_volinfo) +char * +search_brick_path_from_proc(pid_t brick_pid, char *brickpath) { - glusterd_brickinfo_t *new_brickinfo = NULL; - glusterd_brickinfo_t *old_brickinfo = NULL; + char *brick_path = NULL; +#ifdef __FreeBSD__ + struct filestat *fst; + struct procstat *ps; + struct kinfo_proc *kp; + struct filestat_list *head; + + ps = procstat_open_sysctl(); + if (ps == NULL) + goto out; + + kp = kinfo_getproc(brick_pid); + if (kp == NULL) + goto out; + + head = procstat_getfiles(ps, (void *)kp, 0); + if (head == NULL) + goto out; + + STAILQ_FOREACH(fst, head, next) + { + if (fst->fs_fd < 0) + continue; + + if (!strcmp(fst->fs_path, brickpath)) { + brick_path = gf_strdup(fst->fs_path); + break; + } + } - int ret = 0; - GF_ASSERT (new_volinfo); - GF_ASSERT (old_volinfo); - if (_gf_false == glusterd_is_volume_started (new_volinfo)) - goto out; - list_for_each_entry (new_brickinfo, &new_volinfo->bricks, brick_list) { - ret = glusterd_volume_brickinfo_get (new_brickinfo->uuid, - new_brickinfo->hostname, - new_brickinfo->path, - old_volinfo, &old_brickinfo); - if ((0 == ret) && glusterd_is_brick_started (old_brickinfo)) { - new_brickinfo->port = old_brickinfo->port; - } +out: + if (head != NULL) + procstat_freefiles(ps, head); + if (kp != NULL) + free(kp); + procstat_close(ps); +#else + struct dirent *dp = NULL; + DIR *dirp = NULL; + size_t len = 0; + int fd = -1; + char path[PATH_MAX] = ""; + struct dirent scratch[2] = { + { + 0, + }, + }; + + if (!brickpath) + goto out; + + len = sprintf(path, "/proc/%d/fd/", brick_pid); + if (len >= (sizeof(path) - 2)) + goto out; + + dirp = sys_opendir(path); + if (!dirp) + goto out; + + fd = dirfd(dirp); + if (fd < 0) + goto out; + + while ((dp = sys_readdir(dirp, scratch))) { + if (!strcmp(dp->d_name, ".") || !strcmp(dp->d_name, "..")) + continue; + + /* check for non numerical descriptors */ + if (!strtol(dp->d_name, (char **)NULL, 10)) + continue; + + len = readlinkat(fd, dp->d_name, path, sizeof(path) - 1); + /* TODO: handle len == -1 -> error condition in readlinkat */ + if (len > 1) { + path[len] = '\0'; + if (!strcmp(path, brickpath)) { + brick_path = gf_strdup(path); + break; + } } + } out: + if (dirp) + sys_closedir(dirp); +#endif + return brick_path; +} + +int +glusterd_brick_start(glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo, gf_boolean_t wait, + gf_boolean_t only_connect) +{ + int ret = -1; + xlator_t *this = NULL; + glusterd_brickinfo_t *other_brick; + glusterd_conf_t *conf = NULL; + int32_t pid = -1; + char pidfile[PATH_MAX] = ""; + char socketpath[PATH_MAX] = ""; + char *brickpath = NULL; + glusterd_volinfo_t *other_vol; + gf_boolean_t is_service_running = _gf_false; + uuid_t volid = { + 0, + }; + ssize_t size = -1; + + this = THIS; + GF_ASSERT(this); + conf = this->private; + + if ((!brickinfo) || (!volinfo)) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); + goto out; + } + + if (gf_uuid_is_null(brickinfo->uuid)) { + ret = glusterd_resolve_brick(brickinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RESOLVE_BRICK_FAIL, + FMTSTR_RESOLVE_BRICK, brickinfo->hostname, brickinfo->path); + gf_event(EVENT_BRICKPATH_RESOLVE_FAILED, + "peer=%s;volume=%s;brick=%s", brickinfo->hostname, + volinfo->volname, brickinfo->path); + goto out; + } + } + + if (gf_uuid_compare(brickinfo->uuid, MY_UUID)) { ret = 0; - return ret; + goto out; + } + + /* If a trigger to start the brick is already initiated then no need for + * a reattempt as it's an overkill. With glusterd_brick_start () + * function being used in multiple places, when glusterd restarts we see + * three different triggers for an attempt to start the brick process + * due to the quorum handling code in glusterd_friend_sm. + */ + if (brickinfo->status == GF_BRICK_STARTING || brickinfo->start_triggered || + GF_ATOMIC_GET(volinfo->volpeerupdate)) { + gf_msg_debug(this->name, 0, + "brick %s is already in starting " + "phase", + brickinfo->path); + ret = 0; + goto out; + } + if (!only_connect) + brickinfo->start_triggered = _gf_true; + + GLUSTERD_GET_BRICK_PIDFILE(pidfile, volinfo, brickinfo, conf); + + /* Compare volume-id xattr is helpful to ensure the existence of a + brick_root path before the start/attach a brick + */ + size = sys_lgetxattr(brickinfo->path, GF_XATTR_VOL_ID_KEY, volid, 16); + if (size != 16) { + gf_log(this->name, GF_LOG_ERROR, + "Missing %s extended attribute on brick root (%s)," + " brick is deemed not to be a part of the volume (%s) ", + GF_XATTR_VOL_ID_KEY, brickinfo->path, volinfo->volname); + goto out; + } + + if (strncmp(uuid_utoa(volinfo->volume_id), uuid_utoa(volid), + GF_UUID_BUF_SIZE)) { + gf_log(this->name, GF_LOG_ERROR, + "Mismatching %s extended attribute on brick root (%s)," + " brick is deemed not to be a part of the volume (%s)", + GF_XATTR_VOL_ID_KEY, brickinfo->path, volinfo->volname); + goto out; + } + is_service_running = gf_is_service_running(pidfile, &pid); + if (is_service_running) { + if (is_brick_mx_enabled()) { + brickpath = search_brick_path_from_proc(pid, brickinfo->path); + if (!brickpath) { + if (only_connect) + return 0; + gf_log(this->name, GF_LOG_INFO, + "Either pid %d is not running or brick" + " path %s is not consumed so cleanup pidfile", + pid, brickinfo->path); + /* brick isn't running,so unlink stale pidfile + * if any. + */ + if (sys_access(pidfile, R_OK) == 0) { + sys_unlink(pidfile); + } + goto run; + } + GF_FREE(brickpath); + ret = glusterd_get_sock_from_brick_pid(pid, socketpath, + sizeof(socketpath)); + if (ret) { + if (only_connect) + return 0; + gf_log(this->name, GF_LOG_INFO, + "Either pid %d is not running or does " + "not match with any running brick " + "processes", + pid); + /* Fetch unix socket is failed so unlink pidfile */ + if (sys_access(pidfile, R_OK) == 0) { + sys_unlink(pidfile); + } + goto run; + } + } + if (brickinfo->status != GF_BRICK_STARTING && + brickinfo->status != GF_BRICK_STARTED) { + gf_log(this->name, GF_LOG_INFO, + "discovered already-running brick %s", brickinfo->path); + (void)pmap_registry_bind(this, brickinfo->port, brickinfo->path, + GF_PMAP_PORT_BRICKSERVER, NULL); + brickinfo->port_registered = _gf_true; + /* + * This will unfortunately result in a separate RPC + * connection per brick, even though they're all in + * the same process. It works, but it would be nicer + * if we could find a pre-existing connection to that + * same port (on another brick) and re-use that. + * TBD: re-use RPC connection across bricks + */ + if (!is_brick_mx_enabled()) { + glusterd_set_brick_socket_filepath( + volinfo, brickinfo, socketpath, sizeof(socketpath)); + } + gf_log(this->name, GF_LOG_DEBUG, + "Using %s as sockfile for brick %s of volume %s ", + socketpath, brickinfo->path, volinfo->volname); + + (void)glusterd_brick_connect(volinfo, brickinfo, socketpath); + + ret = glusterd_brick_process_add_brick(brickinfo, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_BRICKPROC_ADD_BRICK_FAILED, + "Adding brick %s:%s to brick process " + "failed.", + brickinfo->hostname, brickinfo->path); + goto out; + } + /* We need to set the status back to STARTING so that + * while the other (re)start brick requests come in for + * other bricks, this brick can be considered as + * compatible. + */ + brickinfo->status = GF_BRICK_STARTING; + } + return 0; + } + if (only_connect) + return 0; + +run: + ret = _mk_rundir_p(volinfo); + if (ret) + goto out; + + other_brick = find_compatible_brick(conf, volinfo, brickinfo, &other_vol); + if (other_brick) { + /* mark the brick to starting as send_attach_req might take few + * iterations to successfully attach the brick and we don't want + * to get into a state where another needless trigger to start + * the brick is processed + */ + brickinfo->status = GF_BRICK_STARTING; + ret = attach_brick(this, brickinfo, other_brick, volinfo, other_vol); + if (ret == 0) { + goto out; + } + /* Attach_brick is failed so unlink pidfile */ + if (sys_access(pidfile, R_OK) == 0) { + sys_unlink(pidfile); + } + } + + /* + * This hack is necessary because our brick-process management is a + * total nightmare. We expect a brick process's socket and pid files + * to be ready *immediately* after we start it. Ditto for it calling + * back to bind its port. Unfortunately, none of that is realistic. + * Any process takes non-zero time to start up. This has *always* been + * racy and unsafe; it just became more visible with multiplexing. + * + * The right fix would be to do all of this setup *in the parent*, + * which would include (among other things) getting the PID back from + * the "runner" code. That's all prohibitively difficult and risky. + * To work around the more immediate problems, we create a stub pidfile + * here to let gf_is_service_running know that we expect the process to + * be there shortly, and then it gets filled in with a real PID when + * the process does finish starting up. + * + * TBD: pray for GlusterD 2 to be ready soon. + */ + gf_log(this->name, GF_LOG_INFO, + "starting a fresh brick process for " + "brick %s", + brickinfo->path); + ret = glusterd_volume_start_glusterfs(volinfo, brickinfo, wait); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_DISCONNECTED, + "Unable to start brick %s:%s", brickinfo->hostname, + brickinfo->path); + gf_event(EVENT_BRICK_START_FAILED, "peer=%s;volume=%s;brick=%s", + brickinfo->hostname, volinfo->volname, brickinfo->path); + goto out; + } + +out: + if (ret && brickinfo) { + brickinfo->start_triggered = _gf_false; + } + gf_msg_debug(this->name, 0, "returning %d ", ret); + return ret; } -int32_t -glusterd_volinfo_stop_stale_bricks (glusterd_volinfo_t *new_volinfo, - glusterd_volinfo_t *old_volinfo) +int +glusterd_restart_bricks(void *opaque) { - glusterd_brickinfo_t *new_brickinfo = NULL; - glusterd_brickinfo_t *old_brickinfo = NULL; + int ret = 0; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_snap_t *snap = NULL; + gf_boolean_t start_svcs = _gf_false; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + int active_count = 0; + int quorum_count = 0; + gf_boolean_t node_quorum = _gf_false; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, return_block); + + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, return_block); + + synclock_lock(&conf->big_lock); + + /* We need to ensure that restarting the bricks during glusterd restart + * shouldn't race with the import volume thread (refer + * glusterd_compare_friend_data ()) + */ + while (conf->restart_bricks) { + synccond_wait(&conf->cond_restart_bricks, &conf->big_lock); + } + conf->restart_bricks = _gf_true; + + GF_ATOMIC_INC(conf->blockers); + ret = glusterd_get_quorum_cluster_counts(this, &active_count, + &quorum_count); + if (ret) + goto out; + + if (does_quorum_meet(active_count, quorum_count)) + node_quorum = _gf_true; + + cds_list_for_each_entry(volinfo, &conf->volumes, vol_list) + { + if (volinfo->status != GLUSTERD_STATUS_STARTED) { + continue; + } + gf_msg_debug(this->name, 0, "starting the volume %s", volinfo->volname); - int ret = 0; - GF_ASSERT (new_volinfo); - GF_ASSERT (old_volinfo); - if (_gf_false == glusterd_is_volume_started (old_volinfo)) + /* Check the quorum, if quorum is not met, don't start the + bricks. Stop bricks in case they are running. + */ + ret = check_quorum_for_brick_start(volinfo, node_quorum); + if (ret == 0) { + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_SERVER_QUORUM_NOT_MET, + "Skipping brick " + "restart for volume %s as quorum is not met", + volinfo->volname); + (void)glusterd_stop_bricks(volinfo); + continue; + } else if (ret == 2 && conf->restart_done == _gf_true) { + /* If glusterd has been restarted and quorum is not + * applicable then do not restart the bricks as this + * might start bricks brought down purposely, say for + * maintenance + */ + continue; + } else { + start_svcs = _gf_true; + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + if (!brickinfo->start_triggered) { + pthread_mutex_lock(&brickinfo->restart_mutex); + { + glusterd_brick_start(volinfo, brickinfo, _gf_false, + _gf_false); + } + pthread_mutex_unlock(&brickinfo->restart_mutex); + } + } + ret = glusterd_store_volinfo(volinfo, GLUSTERD_VOLINFO_VER_AC_NONE); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_STORE_FAIL, + "Failed to " + "write volinfo for volume %s", + volinfo->volname); goto out; - list_for_each_entry (old_brickinfo, &old_volinfo->bricks, brick_list) { - ret = glusterd_volume_brickinfo_get (old_brickinfo->uuid, - old_brickinfo->hostname, - old_brickinfo->path, - new_volinfo, &new_brickinfo); - if (ret) { - ret = glusterd_brick_stop (old_volinfo, old_brickinfo); - if (ret) - gf_log ("glusterd", GF_LOG_ERROR, "Failed to " - "stop brick %s:%s", old_brickinfo->hostname, - old_brickinfo->path); + } + } + } + + cds_list_for_each_entry(snap, &conf->snapshots, snap_list) + { + cds_list_for_each_entry(volinfo, &snap->volumes, vol_list) + { + if (volinfo->status != GLUSTERD_STATUS_STARTED) + continue; + /* Check the quorum, if quorum is not met, don't start + * the bricks + */ + ret = check_quorum_for_brick_start(volinfo, node_quorum); + if (ret == 0) { + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_SERVER_QUORUM_NOT_MET, + "Skipping" + " brick restart for volume %s as " + "quorum is not met", + volinfo->volname); + continue; + } + start_svcs = _gf_true; + gf_msg_debug(this->name, 0, + "starting the snap " + "volume %s", + volinfo->volname); + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + if (!brickinfo->start_triggered) { + pthread_mutex_lock(&brickinfo->restart_mutex); + { + /* coverity[SLEEP] */ + glusterd_brick_start(volinfo, brickinfo, _gf_false, + _gf_false); + } + pthread_mutex_unlock(&brickinfo->restart_mutex); } + } + ret = glusterd_store_volinfo(volinfo, GLUSTERD_VOLINFO_VER_AC_NONE); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_STORE_FAIL, + "Failed to " + "write volinfo for volume %s", + volinfo->volname); + goto out; + } } + } + if (start_svcs == _gf_true) { + glusterd_svcs_manager(NULL); + } + + ret = 0; + +out: + conf->restart_done = _gf_true; + conf->restart_bricks = _gf_false; + if (GF_ATOMIC_DEC(conf->blockers) == 0) { + synccond_broadcast(&conf->cond_blockers); + } + synccond_broadcast(&conf->cond_restart_bricks); + +return_block: + return ret; +} + +int +_local_gsyncd_start(dict_t *this, char *key, data_t *value, void *data) +{ + char *path_list = NULL; + char *slave = NULL; + char *slave_url = NULL; + char *slave_vol = NULL; + char *slave_host = NULL; + char *statefile = NULL; + char buf[1024] = "faulty"; + int ret = 0; + int op_ret = 0; + int ret_status = 0; + char uuid_str[64] = ""; + glusterd_volinfo_t *volinfo = NULL; + char confpath[PATH_MAX] = ""; + char *op_errstr = NULL; + glusterd_conf_t *priv = NULL; + gf_boolean_t is_template_in_use = _gf_false; + gf_boolean_t is_paused = _gf_false; + char key1[1024] = ""; + xlator_t *this1 = NULL; + + this1 = THIS; + GF_ASSERT(this1); + priv = this1->private; + GF_ASSERT(priv); + GF_ASSERT(data); + + volinfo = data; + slave = strchr(value->data, ':'); + if (slave) + slave++; + else + return 0; + + (void)snprintf(uuid_str, sizeof(uuid_str), "%s", (char *)value->data); + + /* Getting Local Brickpaths */ + ret = glusterd_get_local_brickpaths(volinfo, &path_list); + + /*Generating the conf file path needed by gsyncd */ + ret = glusterd_get_slave_info(slave, &slave_url, &slave_host, &slave_vol, + &op_errstr); + if (ret) { + gf_msg(this1->name, GF_LOG_ERROR, 0, GD_MSG_SLAVEINFO_FETCH_ERROR, + "Unable to fetch slave details."); + ret = -1; + goto out; + } + + ret = snprintf(confpath, sizeof(confpath) - 1, + "%s/" GEOREP "/%s_%s_%s/gsyncd.conf", priv->workdir, + volinfo->volname, slave_host, slave_vol); + confpath[ret] = '\0'; + + /* Fetching the last status of the node */ + ret = glusterd_get_statefile_name(volinfo, slave, confpath, &statefile, + &is_template_in_use); + if (ret) { + if (!strstr(slave, "::")) + gf_msg(this1->name, GF_LOG_INFO, 0, GD_MSG_SLAVE_URL_INVALID, + "%s is not a valid slave url.", slave); + else + gf_msg(this1->name, GF_LOG_INFO, 0, + GD_MSG_GET_STATEFILE_NAME_FAILED, + "Unable to get" + " statefile's name"); + goto out; + } + + /* If state-file entry is missing from the config file, + * do not start gsyncd on restart */ + if (is_template_in_use) { + gf_msg(this1->name, GF_LOG_INFO, 0, GD_MSG_NO_STATEFILE_ENTRY, + "state-file entry is missing in config file." + "Not Restarting"); + goto out; + } + + is_template_in_use = _gf_false; + + ret = gsync_status(volinfo->volname, slave, confpath, &ret_status, + &is_template_in_use); + if (ret == -1) { + gf_msg(this1->name, GF_LOG_INFO, 0, GD_MSG_GSYNC_VALIDATION_FAIL, + GEOREP " start option validation failed "); + ret = 0; + goto out; + } + + if (is_template_in_use == _gf_true) { + gf_msg(this1->name, GF_LOG_INFO, 0, GD_MSG_PIDFILE_NOT_FOUND, + "pid-file entry is missing in config file." + "Not Restarting"); ret = 0; + goto out; + } + + ret = glusterd_gsync_read_frm_status(statefile, buf, sizeof(buf)); + if (ret <= 0) { + gf_msg(this1->name, GF_LOG_ERROR, 0, GD_MSG_STAT_FILE_READ_FAILED, + "Unable to read the status"); + goto out; + } + + /* Form key1 which is "<user@><slave_host>::<slavevol>" */ + snprintf(key1, sizeof(key1), "%s::%s", slave_url, slave_vol); + + /* Looks for the last status, to find if the session was running + * when the node went down. If the session was just created or + * stopped, do not restart the geo-rep session */ + if ((!strcmp(buf, "Created")) || (!strcmp(buf, "Stopped"))) { + gf_msg(this1->name, GF_LOG_INFO, 0, GD_MSG_GEO_REP_START_FAILED, + "Geo-Rep Session was not started between " + "%s and %s::%s. Not Restarting", + volinfo->volname, slave_url, slave_vol); + goto out; + } else if (strstr(buf, "Paused")) { + is_paused = _gf_true; + } else if ((!strcmp(buf, "Config Corrupted"))) { + gf_msg(this1->name, GF_LOG_INFO, 0, GD_MSG_RECOVERING_CORRUPT_CONF, + "Recovering from a corrupted config. " + "Not Restarting. Use start (force) to " + "start the session between %s and %s::%s.", + volinfo->volname, slave_url, slave_vol); + goto out; + } + + if (is_paused) { + glusterd_start_gsync(volinfo, slave, path_list, confpath, uuid_str, + NULL, _gf_true); + } else { + /* Add slave to the dict indicating geo-rep session is running*/ + ret = dict_set_dynstr_with_alloc(volinfo->gsync_active_slaves, key1, + "running"); + if (ret) { + gf_msg(this1->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to set key:%s" + " value:running in the dict", + key1); + goto out; + } + ret = glusterd_start_gsync(volinfo, slave, path_list, confpath, + uuid_str, NULL, _gf_false); + if (ret) + dict_del(volinfo->gsync_active_slaves, key1); + } + out: - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); - return ret; + if (statefile) + GF_FREE(statefile); + if (slave_url) + GF_FREE(slave_url); + + if (is_template_in_use) { + op_ret = glusterd_create_status_file( + volinfo->volname, slave, slave_host, slave_vol, "Config Corrupted"); + if (op_ret) { + gf_msg(this1->name, GF_LOG_ERROR, 0, + GD_MSG_STATUSFILE_CREATE_FAILED, + "Unable to create status file" + ". Error : %s", + strerror(errno)); + ret = op_ret; + } + } + if (slave_vol) + GF_FREE(slave_vol); + GF_FREE(path_list); + GF_FREE(op_errstr); + + return ret; } -int32_t -glusterd_delete_stale_volume (glusterd_volinfo_t *stale_volinfo, - glusterd_volinfo_t *valid_volinfo) -{ - GF_ASSERT (stale_volinfo); - GF_ASSERT (valid_volinfo); - - /* If stale volume is in started state, copy the port numbers of the - * local bricks if they exist in the valid volume information. - * stop stale bricks. Stale volume information is going to be deleted. - * Which deletes the valid brick information inside stale volinfo. - * We dont want brick_rpc_notify to access already deleted brickinfo. - * Disconnect valid bricks. - */ - if (glusterd_is_volume_started (stale_volinfo)) { - if (glusterd_is_volume_started (valid_volinfo)) { - (void) glusterd_volinfo_stop_stale_bricks (valid_volinfo, - stale_volinfo); - //Only valid bricks will be running now. - (void) glusterd_volinfo_copy_brick_portinfo (valid_volinfo, - stale_volinfo); - (void) glusterd_volume_disconnect_all_bricks (stale_volinfo); - } else { - (void) glusterd_stop_bricks (stale_volinfo); +int +glusterd_volume_restart_gsyncds(glusterd_volinfo_t *volinfo) +{ + GF_ASSERT(volinfo); + + dict_foreach(volinfo->gsync_slaves, _local_gsyncd_start, volinfo); + return 0; +} + +int +glusterd_restart_gsyncds(glusterd_conf_t *conf) +{ + glusterd_volinfo_t *volinfo = NULL; + int ret = 0; + + cds_list_for_each_entry(volinfo, &conf->volumes, vol_list) + { + glusterd_volume_restart_gsyncds(volinfo); + } + return ret; +} + +int +glusterd_calc_dist_leaf_count(int rcount, int scount) +{ + return (rcount ? rcount : 1) * (scount ? scount : 1); +} + +int +glusterd_get_dist_leaf_count(glusterd_volinfo_t *volinfo) +{ + int rcount = volinfo->replica_count; + int scount = volinfo->stripe_count; + + if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE) + return volinfo->disperse_count; + + return glusterd_calc_dist_leaf_count(rcount, scount); +} + +int +glusterd_get_brickinfo(xlator_t *this, const char *brickname, int port, + glusterd_brickinfo_t **brickinfo) +{ + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *tmpbrkinfo = NULL; + glusterd_snap_t *snap = NULL; + int ret = -1; + + GF_ASSERT(brickname); + GF_ASSERT(this); + + priv = this->private; + cds_list_for_each_entry(volinfo, &priv->volumes, vol_list) + { + cds_list_for_each_entry(tmpbrkinfo, &volinfo->bricks, brick_list) + { + if (gf_uuid_compare(tmpbrkinfo->uuid, MY_UUID)) + continue; + if (!strcmp(tmpbrkinfo->path, brickname) && + (tmpbrkinfo->port == port)) { + *brickinfo = tmpbrkinfo; + return 0; + } + } + } + /* In case normal volume is not found, check for snapshot volumes */ + cds_list_for_each_entry(snap, &priv->snapshots, snap_list) + { + cds_list_for_each_entry(volinfo, &snap->volumes, vol_list) + { + cds_list_for_each_entry(tmpbrkinfo, &volinfo->bricks, brick_list) + { + if (gf_uuid_compare(tmpbrkinfo->uuid, MY_UUID)) + continue; + if (!strcmp(tmpbrkinfo->path, brickname)) { + *brickinfo = tmpbrkinfo; + return 0; } + } } - /* Delete all the bricks and stores and vol files. They will be created - * again by the valid_volinfo. Volume store delete should not be - * performed because some of the bricks could still be running, - * keeping pid files under run directory - */ - (void) glusterd_delete_all_bricks (stale_volinfo); - if (stale_volinfo->shandle) { - unlink (stale_volinfo->shandle->path); - (void) glusterd_store_handle_destroy (stale_volinfo->shandle); - stale_volinfo->shandle = NULL; + } + + return ret; +} + +glusterd_brickinfo_t * +glusterd_get_brickinfo_by_position(glusterd_volinfo_t *volinfo, uint32_t pos) +{ + glusterd_brickinfo_t *tmpbrkinfo = NULL; + + cds_list_for_each_entry(tmpbrkinfo, &volinfo->bricks, brick_list) + { + if (pos == 0) + return tmpbrkinfo; + pos--; + } + return NULL; +} + +void +glusterd_set_brick_status(glusterd_brickinfo_t *brickinfo, + gf_brick_status_t status) +{ + GF_ASSERT(brickinfo); + brickinfo->status = status; + if (GF_BRICK_STARTED == status) { + gf_msg_debug("glusterd", 0, + "Setting brick %s:%s status " + "to started", + brickinfo->hostname, brickinfo->path); + } else { + gf_msg_debug("glusterd", 0, + "Setting brick %s:%s status " + "to stopped", + brickinfo->hostname, brickinfo->path); + } +} + +gf_boolean_t +glusterd_is_brick_started(glusterd_brickinfo_t *brickinfo) +{ + GF_ASSERT(brickinfo); + return (brickinfo->status == GF_BRICK_STARTED); +} + +int +glusterd_friend_brick_belongs(glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo, void *uuid) +{ + int ret = -1; + + GF_ASSERT(volinfo); + GF_ASSERT(brickinfo); + GF_ASSERT(uuid); + + if (gf_uuid_is_null(brickinfo->uuid)) { + ret = glusterd_resolve_brick(brickinfo); + if (ret) { + GF_ASSERT(0); + goto out; } - (void) glusterd_volinfo_delete (stale_volinfo); + } + if (!gf_uuid_compare(brickinfo->uuid, *((uuid_t *)uuid))) return 0; +out: + return -1; } -int32_t -glusterd_import_friend_volume (dict_t *vols, size_t count) +int +glusterd_get_brick_root(char *path, char **mount_point) { + char *ptr = NULL; + char *mnt_pt = NULL; + struct stat brickstat = {0}; + struct stat buf = {0}; + xlator_t *this = THIS; + GF_ASSERT(this); + + if (!path) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); + goto err; + } + mnt_pt = gf_strdup(path); + if (!mnt_pt) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); + goto err; + } + if (sys_stat(mnt_pt, &brickstat)) + goto err; + + while ((ptr = strrchr(mnt_pt, '/')) && ptr != mnt_pt) { + *ptr = '\0'; + if (sys_stat(mnt_pt, &buf)) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "Error in stat=%s", strerror(errno), NULL); + goto err; + } - int32_t ret = -1; - glusterd_conf_t *priv = NULL; - xlator_t *this = NULL; - glusterd_volinfo_t *old_volinfo = NULL; - glusterd_volinfo_t *new_volinfo = NULL; + if (brickstat.st_dev != buf.st_dev) { + *ptr = '/'; + break; + } + } - GF_ASSERT (vols); + if (ptr == mnt_pt) { + if (sys_stat("/", &buf)) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "Error in stat=%s", strerror(errno), NULL); + goto err; + } + if (brickstat.st_dev == buf.st_dev) + strcpy(mnt_pt, "/"); + } - this = THIS; - GF_ASSERT (this); - priv = this->private; - GF_ASSERT (priv); - ret = glusterd_import_volinfo (vols, count, &new_volinfo); - if (ret) - goto out; + *mount_point = mnt_pt; + return 0; - ret = glusterd_volinfo_find (new_volinfo->volname, &old_volinfo); - if (0 == ret) { - (void) glusterd_delete_stale_volume (old_volinfo, new_volinfo); - } +err: + GF_FREE(mnt_pt); + return -1; +} + +static char * +glusterd_parse_inode_size(char *stream, char *pattern) +{ + char *needle = NULL; + char *trail = NULL; + + needle = strstr(stream, pattern); + if (!needle) + goto out; - if (glusterd_is_volume_started (new_volinfo)) { - (void) glusterd_start_bricks (new_volinfo); + needle = nwstrtail(needle, pattern); + + trail = needle; + while (trail && isdigit(*trail)) + trail++; + if (trail) + *trail = '\0'; + +out: + return needle; +} + +static struct fs_info { + char *fs_type_name; + char *fs_tool_name; + char *fs_tool_arg; + char *fs_tool_pattern; + char *fs_tool_pkg; +} glusterd_fs[] = {{"xfs", "xfs_info", NULL, "isize=", "xfsprogs"}, + {"ext3", "tune2fs", "-l", "Inode size:", "e2fsprogs"}, + {"ext4", "tune2fs", "-l", "Inode size:", "e2fsprogs"}, + {"btrfs", NULL, NULL, NULL, NULL}, + {"zfs", NULL, NULL, NULL, NULL}, + {NULL, NULL, NULL, NULL, NULL}}; + +static int +glusterd_add_inode_size_to_dict(dict_t *dict, int count) +{ + int ret = -1; + char key[64]; + char buffer[4096] = ""; + char *device = NULL; + char *fs_name = NULL; + char *cur_word = NULL; + char *trail = NULL; + runner_t runner = { + 0, + }; + struct fs_info *fs = NULL; + static dict_t *cached_fs = NULL; + xlator_t *this = THIS; + GF_ASSERT(this); + + ret = snprintf(key, sizeof(key), "brick%d.device", count); + ret = dict_get_strn(dict, key, ret, &device); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=%s", key, NULL); + goto out; + } + + if (cached_fs) { + if (dict_get_str(cached_fs, device, &cur_word) == 0) { + goto cached; + } + } else { + cached_fs = dict_new(); + } + + ret = snprintf(key, sizeof(key), "brick%d.fs_name", count); + ret = dict_get_strn(dict, key, ret, &fs_name); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=%s", key, NULL); + goto out; + } + + runinit(&runner); + runner_redir(&runner, STDOUT_FILENO, RUN_PIPE); + + for (fs = glusterd_fs; fs->fs_type_name; fs++) { + if (strcmp(fs_name, fs->fs_type_name) == 0) { + if (!fs->fs_tool_name) { + /* dynamic inodes */ + gf_smsg(this->name, GF_LOG_INFO, 0, GD_MSG_INODE_SIZE_GET_FAIL, + "The brick on device uses dynamic inode sizes", + "Device=%s (%s)", device, fs_name, NULL); + cur_word = "N/A"; + goto cached; + } + runner_add_arg(&runner, fs->fs_tool_name); + break; } + } + + if (runner.argv[0]) { + if (fs->fs_tool_arg) + runner_add_arg(&runner, fs->fs_tool_arg); + runner_add_arg(&runner, device); + } else { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_INODE_SIZE_GET_FAIL, + "Could not find tool to get inode size for device", "Tool=%s", + fs->fs_tool_name, "Device=%s (%s)", device, fs_name, + "Missing package=%s ?", fs->fs_tool_pkg, NULL); + goto out; + } + + ret = runner_start(&runner); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_CMD_EXEC_FAIL, + "Failed to execute \"%s\"", fs->fs_tool_name, NULL); + /* + * Runner_start might return an error after the child has + * been forked, e.g. if the program isn't there. In that + * case, we still need to call runner_end to reap the + * child and free resources. Fortunately, that seems to + * be harmless for other kinds of failures. + */ + (void)runner_end(&runner); + goto out; + } + + for (;;) { + if (fgets(buffer, sizeof(buffer), + runner_chio(&runner, STDOUT_FILENO)) == NULL) + break; + trail = strrchr(buffer, '\n'); + if (trail) + *trail = '\0'; + + cur_word = glusterd_parse_inode_size(buffer, fs->fs_tool_pattern); + + if (cur_word) + break; + } + + ret = runner_end(&runner); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_CMD_EXEC_FAIL, + "Tool exited with non-zero exit status", "Tool=%s", + fs->fs_tool_name, NULL); + + goto out; + } + if (!cur_word) { + ret = -1; + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_INODE_SIZE_GET_FAIL, + "Using Tool=%s", fs->fs_tool_name, NULL); + goto out; + } - ret = glusterd_store_volinfo (new_volinfo, GLUSTERD_VOLINFO_VER_AC_NONE); - ret = glusterd_create_volfiles_and_notify_services (new_volinfo); - if (ret) - goto out; + if (dict_set_dynstr_with_alloc(cached_fs, device, cur_word)) { + /* not fatal if not entered into the cache */ + gf_msg_debug(this->name, 0, "failed to cache fs inode size for %s", + device); + } + +cached: + snprintf(key, sizeof(key), "brick%d.inode_size", count); + + ret = dict_set_dynstr_with_alloc(dict, key, cur_word); - list_add_tail (&new_volinfo->vol_list, &priv->volumes); out: - gf_log ("", GF_LOG_DEBUG, "Returning with ret: %d", ret); - return ret; + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INODE_SIZE_GET_FAIL, NULL); + return ret; } -int32_t -glusterd_import_friend_volumes (dict_t *vols) +struct mntent * +glusterd_get_mnt_entry_info(char *mnt_pt, char *buff, int buflen, + struct mntent *entry_ptr) { - int32_t ret = -1; - int32_t count = 0; - int i = 1; + struct mntent *entry = NULL; + FILE *mtab = NULL; + char abspath[PATH_MAX] = ""; - GF_ASSERT (vols); + GF_ASSERT(mnt_pt); + GF_ASSERT(buff); + GF_ASSERT(entry_ptr); - ret = dict_get_int32 (vols, "count", &count); - if (ret) - goto out; + mtab = setmntent(_PATH_MOUNTED, "r"); + if (!mtab) + goto out; - while (i <= count) { - ret = glusterd_import_friend_volume (vols, i); - if (ret) - goto out; - i++; + if (!realpath(mnt_pt, abspath)) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_MNTENTRY_GET_FAIL, + "realpath () failed for path %s", mnt_pt); + goto out; + } + + entry = getmntent_r(mtab, entry_ptr, buff, buflen); + + while (1) { + if (!entry) + goto out; + + if (!strcmp(entry->mnt_dir, abspath) && + strcmp(entry->mnt_type, "rootfs")) + break; + entry = getmntent_r(mtab, entry_ptr, buff, buflen); + } + +out: + if (NULL != mtab) { + endmntent(mtab); + } + return entry; +} + +static int +glusterd_add_brick_mount_details(glusterd_brickinfo_t *brickinfo, dict_t *dict, + int count) +{ + int ret = -1; + char key[64] = ""; + char buff[PATH_MAX] = ""; + char base_key[32] = ""; + struct mntent save_entry = {0}; + char *mnt_pt = NULL; + struct mntent *entry = NULL; + xlator_t *this = THIS; + GF_ASSERT(this); + + snprintf(base_key, sizeof(base_key), "brick%d", count); + + ret = glusterd_get_brick_root(brickinfo->path, &mnt_pt); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_BRICKPATH_ROOT_GET_FAIL, + NULL); + goto out; + } + + entry = glusterd_get_mnt_entry_info(mnt_pt, buff, sizeof(buff), + &save_entry); + if (!entry) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GET_MNT_ENTRY_INFO_FAIL, + NULL); + ret = -1; + goto out; + } + + /* get device file */ + snprintf(key, sizeof(key), "%s.device", base_key); + + ret = dict_set_dynstr_with_alloc(dict, key, entry->mnt_fsname); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + goto out; + } + + /* fs type */ + snprintf(key, sizeof(key), "%s.fs_name", base_key); + + ret = dict_set_dynstr_with_alloc(dict, key, entry->mnt_type); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + goto out; + } + + /* mount options */ + snprintf(key, sizeof(key), "%s.mnt_options", base_key); + + ret = dict_set_dynstr_with_alloc(dict, key, entry->mnt_opts); + +out: + if (mnt_pt) + GF_FREE(mnt_pt); + + return ret; +} + +char * +glusterd_get_brick_mount_device(char *brick_path) +{ + int ret = -1; + char *mnt_pt = NULL; + char *device = NULL; + char buff[PATH_MAX] = ""; + struct mntent *entry = NULL; + struct mntent save_entry = { + 0, + }; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(brick_path); + + ret = glusterd_get_brick_root(brick_path, &mnt_pt); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICKPATH_ROOT_GET_FAIL, + "Failed to get mount point " + "for %s brick", + brick_path); + goto out; + } + + entry = glusterd_get_mnt_entry_info(mnt_pt, buff, sizeof(buff), + &save_entry); + if (NULL == entry) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MNTENTRY_GET_FAIL, + "Failed to get mnt entry " + "for %s mount path", + mnt_pt); + goto out; + } + + /* get the fs_name/device */ + device = gf_strdup(entry->mnt_fsname); + +out: + if (mnt_pt) + GF_FREE(mnt_pt); + + return device; +} + +int +glusterd_add_brick_detail_to_dict(glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo, dict_t *dict, + int count) +{ + int ret = -1; + uint64_t memtotal = 0; + uint64_t memfree = 0; + uint64_t inodes_total = 0; + uint64_t inodes_free = 0; + uint64_t block_size = 0; + char key[64]; + char base_key[32]; + struct statvfs brickstat = {0}; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(volinfo); + GF_ASSERT(brickinfo); + GF_ASSERT(dict); + + snprintf(base_key, sizeof(base_key), "brick%d", count); + + ret = sys_statvfs(brickinfo->path, &brickstat); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "statfs error: %s ", strerror(errno)); + goto out; + } + + /* file system block size */ + block_size = brickstat.f_bsize; + snprintf(key, sizeof(key), "%s.block_size", base_key); + ret = dict_set_uint64(dict, key, block_size); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + goto out; + } + + /* free space in brick */ + memfree = brickstat.f_bfree * brickstat.f_bsize; + snprintf(key, sizeof(key), "%s.free", base_key); + ret = dict_set_uint64(dict, key, memfree); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + goto out; + } + + /* total space of brick */ + memtotal = brickstat.f_blocks * brickstat.f_bsize; + snprintf(key, sizeof(key), "%s.total", base_key); + ret = dict_set_uint64(dict, key, memtotal); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + goto out; + } + + /* inodes: total and free counts only for ext2/3/4 and xfs */ + inodes_total = brickstat.f_files; + if (inodes_total) { + snprintf(key, sizeof(key), "%s.total_inodes", base_key); + ret = dict_set_uint64(dict, key, inodes_total); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + goto out; } + } + inodes_free = brickstat.f_ffree; + if (inodes_free) { + snprintf(key, sizeof(key), "%s.free_inodes", base_key); + ret = dict_set_uint64(dict, key, inodes_free); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + goto out; + } + } + + ret = glusterd_add_brick_mount_details(brickinfo, dict, count); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_ADD_BRICK_MNT_INFO_FAIL, + NULL); + goto out; + } + + ret = glusterd_add_inode_size_to_dict(dict, count); out: - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); - return ret; + if (ret) + gf_msg_debug(this->name, 0, + "Error adding brick" + " detail to dict: %s", + strerror(errno)); + return ret; } int32_t -glusterd_compare_friend_data (dict_t *vols, int32_t *status) +glusterd_add_brick_to_dict(glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo, dict_t *dict, + int32_t count) { - int32_t ret = -1; - int32_t count = 0; - int i = 1; - gf_boolean_t update = _gf_false; - gf_boolean_t stale_nfs = _gf_false; + int ret = -1; + int32_t pid = -1; + char key[64]; + int keylen; + char base_key[32]; + char pidfile[PATH_MAX] = ""; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + gf_boolean_t brick_online = _gf_false; + char *brickpath = NULL; + + GF_ASSERT(volinfo); + GF_ASSERT(brickinfo); + GF_ASSERT(dict); + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + + snprintf(base_key, sizeof(base_key), "brick%d", count); + keylen = snprintf(key, sizeof(key), "%s.hostname", base_key); + + ret = dict_set_strn(dict, key, keylen, brickinfo->hostname); + if (ret) + goto out; + + keylen = snprintf(key, sizeof(key), "%s.path", base_key); + ret = dict_set_strn(dict, key, keylen, brickinfo->path); + if (ret) + goto out; + + /* add peer uuid */ + snprintf(key, sizeof(key), "%s.peerid", base_key); + ret = dict_set_dynstr_with_alloc(dict, key, uuid_utoa(brickinfo->uuid)); + if (ret) { + goto out; + } + + keylen = snprintf(key, sizeof(key), "%s.port", base_key); + ret = dict_set_int32n( + dict, key, keylen, + (volinfo->transport_type == GF_TRANSPORT_RDMA) ? 0 : brickinfo->port); + if (ret) + goto out; + + keylen = snprintf(key, sizeof(key), "%s.rdma_port", base_key); + if (volinfo->transport_type == GF_TRANSPORT_RDMA) { + ret = dict_set_int32n(dict, key, keylen, brickinfo->port); + } else if (volinfo->transport_type == GF_TRANSPORT_BOTH_TCP_RDMA) { + ret = dict_set_int32n(dict, key, keylen, brickinfo->rdma_port); + } else + ret = dict_set_int32n(dict, key, keylen, 0); + + if (ret) + goto out; + + GLUSTERD_GET_BRICK_PIDFILE(pidfile, volinfo, brickinfo, priv); + + if (glusterd_is_brick_started(brickinfo)) { + if (gf_is_service_running(pidfile, &pid) && + brickinfo->port_registered) { + if (!is_brick_mx_enabled()) { + brick_online = _gf_true; + } else { + brickpath = search_brick_path_from_proc(pid, brickinfo->path); + if (!brickpath) { + gf_log(this->name, GF_LOG_INFO, + "brick path %s is not consumed", brickinfo->path); + brick_online = _gf_false; + } else { + brick_online = _gf_true; + GF_FREE(brickpath); + } + } + } else { + pid = -1; + } + } - GF_ASSERT (vols); - GF_ASSERT (status); + keylen = snprintf(key, sizeof(key), "%s.pid", base_key); + ret = dict_set_int32n(dict, key, keylen, pid); + if (ret) + goto out; - ret = dict_get_int32 (vols, "count", &count); + keylen = snprintf(key, sizeof(key), "%s.status", base_key); + ret = dict_set_int32n(dict, key, keylen, brick_online); + +out: + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + gf_msg_debug(this->name, 0, "Returning %d", ret); + } + + return ret; +} + +int32_t +glusterd_get_all_volnames(dict_t *dict) +{ + int ret = -1; + int32_t vol_count = 0; + char key[64] = ""; + int keylen; + glusterd_volinfo_t *entry = NULL; + glusterd_conf_t *priv = NULL; + + priv = THIS->private; + GF_ASSERT(priv); + + cds_list_for_each_entry(entry, &priv->volumes, vol_list) + { + keylen = snprintf(key, sizeof(key), "vol%d", vol_count); + ret = dict_set_strn(dict, key, keylen, entry->volname); if (ret) + goto out; + + vol_count++; + } + + ret = dict_set_int32n(dict, "vol_count", SLEN("vol_count"), vol_count); + +out: + if (ret) + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "failed to get all " + "volume names for status"); + return ret; +} + +int +glusterd_all_volume_cond_check(glusterd_condition_func func, int status, + void *ctx) +{ + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + priv = this->private; + + cds_list_for_each_entry(volinfo, &priv->volumes, vol_list) + { + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + ret = func(volinfo, brickinfo, ctx); + if (ret != status) { + ret = -1; goto out; + } + } + } + ret = 0; +out: + gf_msg_debug("glusterd", 0, "returning %d", ret); + return ret; +} - while (i <= count) { - ret = glusterd_compare_friend_volume (vols, i, status); - if (ret) - goto out; +int +glusterd_brick_stop(glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo, gf_boolean_t del_brick) +{ + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + + this = THIS; + GF_ASSERT(this); + conf = this->private; + GF_ASSERT(conf); + + if ((!brickinfo) || (!volinfo)) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); + goto out; + } + + if (gf_uuid_is_null(brickinfo->uuid)) { + ret = glusterd_resolve_brick(brickinfo); + if (ret) { + gf_event(EVENT_BRICKPATH_RESOLVE_FAILED, + "peer=%s;volume=%s;brick=%s", brickinfo->hostname, + volinfo->volname, brickinfo->path); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RESOLVE_BRICK_FAIL, + FMTSTR_RESOLVE_BRICK, brickinfo->hostname, brickinfo->path); + goto out; + } + } - if (GLUSTERD_VOL_COMP_RJT == *status) { - ret = 0; - goto out; - } - if (GLUSTERD_VOL_COMP_UPDATE_REQ == *status) - update = _gf_true; + if (gf_uuid_compare(brickinfo->uuid, MY_UUID)) { + ret = 0; + if (del_brick) + glusterd_delete_brick(volinfo, brickinfo); + goto out; + } + + ret = glusterd_volume_stop_glusterfs(volinfo, brickinfo, del_brick); + if (ret) { + gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_BRICK_STOP_FAIL, + "Unable to stop" + " brick: %s:%s", + brickinfo->hostname, brickinfo->path); + goto out; + } - i++; +out: + gf_msg_debug(this->name, 0, "returning %d ", ret); + return ret; +} + +int +glusterd_is_defrag_on(glusterd_volinfo_t *volinfo) +{ + return (volinfo->rebal.defrag != NULL); +} + +int +glusterd_new_brick_validate(char *brick, glusterd_brickinfo_t *brickinfo, + char *op_errstr, size_t len, char *op) +{ + glusterd_brickinfo_t *newbrickinfo = NULL; + int ret = -1; + gf_boolean_t is_allocated = _gf_false; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + GF_ASSERT(brick); + GF_ASSERT(op_errstr); + + if (!brickinfo) { + ret = glusterd_brickinfo_new_from_brick(brick, &newbrickinfo, _gf_true, + NULL); + if (ret) + goto out; + is_allocated = _gf_true; + } else { + newbrickinfo = brickinfo; + } + + ret = glusterd_resolve_brick(newbrickinfo); + if (ret) { + snprintf(op_errstr, len, + "Host %s is not in \'Peer " + "in Cluster\' state", + newbrickinfo->hostname); + goto out; + } + + if (!gf_uuid_compare(MY_UUID, newbrickinfo->uuid)) { + /* brick is local */ + if (!glusterd_is_brickpath_available(newbrickinfo->uuid, + newbrickinfo->path)) { + snprintf(op_errstr, len, + "Brick: %s not available." + " Brick may be containing or be contained " + "by an existing brick.", + brick); + if (op && (!strcmp(op, "GF_RESET_OP_COMMIT") || + !strcmp(op, "GF_RESET_OP_COMMIT_FORCE"))) + ret = 1; + else + ret = -1; + goto out; } - if (update) { - if (glusterd_is_nfs_started ()) - stale_nfs = _gf_true; - ret = glusterd_import_friend_volumes (vols); - if (ret) - goto out; - if (_gf_false == glusterd_are_all_volumes_stopped ()) { - ret = glusterd_check_generate_start_nfs (); - } else { - if (stale_nfs) - glusterd_nfs_server_stop (); - } + } else { + peerinfo = glusterd_peerinfo_find_by_uuid(newbrickinfo->uuid); + if (peerinfo == NULL) { + ret = -1; + snprintf(op_errstr, len, "Failed to find host %s", + newbrickinfo->hostname); + goto out; } + if ((!peerinfo->connected)) { + snprintf(op_errstr, len, "Host %s not connected", + newbrickinfo->hostname); + ret = -1; + goto out; + } + + if (peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) { + snprintf(op_errstr, len, + "Host %s is not in \'Peer " + "in Cluster\' state", + newbrickinfo->hostname); + ret = -1; + goto out; + } + } + + ret = 0; out: - gf_log ("", GF_LOG_DEBUG, "Returning with ret: %d, status: %d", - ret, *status); + if (is_allocated) + glusterd_brickinfo_delete(newbrickinfo); + if (op_errstr[0] != '\0') + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_VALIDATE_FAIL, "%s", + op_errstr); + gf_msg_debug(this->name, 0, "returning %d ", ret); + return ret; +} - return ret; +int +glusterd_rb_check_bricks(glusterd_volinfo_t *volinfo, glusterd_brickinfo_t *src, + glusterd_brickinfo_t *dst) +{ + glusterd_replace_brick_t *rb = NULL; + + GF_ASSERT(volinfo); + + rb = &volinfo->rep_brick; + + if (!rb->src_brick || !rb->dst_brick) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); + return -1; + } + + if (strcmp(rb->src_brick->hostname, src->hostname) || + strcmp(rb->src_brick->path, src->path)) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_RB_SRC_BRICKS_MISMATCH, + "Replace brick src bricks differ"); + return -1; + } + + if (strcmp(rb->dst_brick->hostname, dst->hostname) || + strcmp(rb->dst_brick->path, dst->path)) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_RB_DST_BRICKS_MISMATCH, + "Replace brick dst bricks differ"); + return -1; + } + + return 0; } -gf_boolean_t -glusterd_is_nfs_started () +/*path needs to be absolute; works only on gfid, volume-id*/ +static int +glusterd_is_uuid_present(char *path, char *xattr, gf_boolean_t *present) { - int32_t ret = -1; - xlator_t *this = NULL; - glusterd_conf_t *priv = NULL; - char pidfile[PATH_MAX] = {0,}; + GF_ASSERT(path); + GF_ASSERT(xattr); + GF_ASSERT(present); - this = THIS; - GF_ASSERT(this); + int ret = -1; + uuid_t uid = { + 0, + }; - priv = this->private; + if (!path || !xattr || !present) + goto out; - GLUSTERD_GET_NFS_PIDFILE(pidfile); - ret = access (pidfile, F_OK); + ret = sys_lgetxattr(path, xattr, &uid, 16); - if (ret == 0) - return _gf_true; - else - return _gf_false; + if (ret >= 0) { + *present = _gf_true; + ret = 0; + goto out; + } + + switch (errno) { +#if defined(ENODATA) + case ENODATA: /* FALLTHROUGH */ +#endif +#if defined(ENOATTR) && (ENOATTR != ENODATA) + case ENOATTR: /* FALLTHROUGH */ +#endif + case ENOTSUP: + *present = _gf_false; + ret = 0; + break; + default: + break; + } +out: + return ret; } -int32_t -glusterd_nfs_server_start () -{ - int32_t ret = -1; - xlator_t *this = NULL; - glusterd_conf_t *priv = NULL; - char pidfile[PATH_MAX] = {0,}; - char logfile[PATH_MAX] = {0,}; - char volfile[PATH_MAX] = {0,}; - char path[PATH_MAX] = {0,}; - char cmd_str[8192] = {0,}; - char rundir[PATH_MAX] = {0,}; - - this = THIS; - GF_ASSERT(this); - - priv = this->private; - - GLUSTERD_GET_NFS_DIR(path, priv); - snprintf (rundir, PATH_MAX, "%s/run", path); - ret = mkdir (rundir, 0777); - - if ((ret == -1) && (EEXIST != errno)) { - gf_log ("", GF_LOG_ERROR, "Unable to create rundir %s", - rundir); +/*path needs to be absolute*/ +static int +glusterd_is_path_in_use(char *path, gf_boolean_t *in_use, char **op_errstr) +{ + int i = 0; + int ret = -1; + gf_boolean_t used = _gf_false; + char dir[PATH_MAX] = ""; + char *curdir = NULL; + char msg[2048] = ""; + char *keys[3] = {GFID_XATTR_KEY, GF_XATTR_VOL_ID_KEY, NULL}; + + GF_ASSERT(path); + if (!path) + goto out; + + if (snprintf(dir, PATH_MAX, "%s", path) >= PATH_MAX) + goto out; + + curdir = dir; + do { + for (i = 0; !used && keys[i]; i++) { + ret = glusterd_is_uuid_present(curdir, keys[i], &used); + if (ret) goto out; } - GLUSTERD_GET_NFS_PIDFILE(pidfile); - glusterd_get_nfs_filepath (volfile); + if (used) + break; - ret = access (volfile, F_OK); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Nfs Volfile %s is not present", - volfile); + curdir = dirname(curdir); + if (!strcmp(curdir, ".")) + goto out; + + } while (strcmp(curdir, "/")); + + if (!strcmp(curdir, "/")) { + for (i = 0; !used && keys[i]; i++) { + ret = glusterd_is_uuid_present(curdir, keys[i], &used); + if (ret) goto out; } + } - snprintf (logfile, PATH_MAX, "%s/nfs.log", DEFAULT_LOG_FILE_DIRECTORY); + ret = 0; + *in_use = used; +out: + if (ret) { + snprintf(msg, sizeof(msg), + "Failed to get extended " + "attribute %s, reason: %s", + keys[i], strerror(errno)); + } + + if (*in_use) { + if (path && curdir && !strcmp(path, curdir)) { + snprintf(msg, sizeof(msg), + "%s is already part of a " + "volume", + path); + } else { + snprintf(msg, sizeof(msg), + "parent directory %s is " + "already part of a volume", + curdir); + } + } - snprintf (cmd_str, 8192, - "%s/sbin/glusterfs -f %s -p %s -l %s", - GFS_PREFIX, volfile, pidfile, logfile); - ret = gf_system (cmd_str); + if (strlen(msg)) { + gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_PATH_ALREADY_PART_OF_VOL, + "%s", msg); + *op_errstr = gf_strdup(msg); + } + return ret; +} + +int +glusterd_check_and_set_brick_xattr(char *host, char *path, uuid_t uuid, + char **op_errstr, gf_boolean_t is_force) +{ + int ret = -1; + char msg[2048] = ""; + gf_boolean_t in_use = _gf_false; + int flags = 0; + xlator_t *this = THIS; + GF_ASSERT(this); + + /* Check for xattr support in backend fs */ + ret = sys_lsetxattr(path, "trusted.glusterfs.test", "working", 8, 0); + if (ret == -1) { + snprintf(msg, sizeof(msg), + "Glusterfs is not" + " supported on brick: %s:%s.\nSetting" + " extended attributes failed, reason:" + " %s.", + host, path, strerror(errno)); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_SET_XATTR_BRICK_FAIL, + "Host=%s, Path=%s", host, path, NULL); + goto out; + + } else { + ret = sys_lremovexattr(path, "trusted.glusterfs.test"); + if (ret) { + snprintf(msg, sizeof(msg), + "Removing test extended" + " attribute failed, reason: %s", + strerror(errno)); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_REMOVE_XATTR_FAIL, + NULL); + goto out; + } + } + + ret = glusterd_is_path_in_use(path, &in_use, op_errstr); + if (ret) + goto out; + + if (in_use && !is_force) { + ret = -1; + goto out; + } + + if (!is_force) + flags = XATTR_CREATE; + + ret = sys_lsetxattr(path, GF_XATTR_VOL_ID_KEY, uuid, 16, flags); + if (ret == -1) { + snprintf(msg, sizeof(msg), + "Failed to set extended " + "attributes %s, reason: %s", + GF_XATTR_VOL_ID_KEY, strerror(errno)); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_SET_XATTR_FAIL, + "Attriutes=%s", GF_XATTR_VOL_ID_KEY, NULL); + goto out; + } + + ret = 0; out: - return ret; + if (strlen(msg)) + *op_errstr = gf_strdup(msg); + + return ret; +} + +static int +glusterd_sm_tr_log_transition_add_to_dict(dict_t *dict, + glusterd_sm_tr_log_t *log, int i, + int count) +{ + int ret = -1; + char key[64] = ""; + int keylen; + char timestr[GF_TIMESTR_SIZE] = ""; + char *str = NULL; + + GF_ASSERT(dict); + GF_ASSERT(log); + + keylen = snprintf(key, sizeof(key), "log%d-old-state", count); + str = log->state_name_get(log->transitions[i].old_state); + ret = dict_set_strn(dict, key, keylen, str); + if (ret) + goto out; + + keylen = snprintf(key, sizeof(key), "log%d-event", count); + str = log->event_name_get(log->transitions[i].event); + ret = dict_set_strn(dict, key, keylen, str); + if (ret) + goto out; + + keylen = snprintf(key, sizeof(key), "log%d-new-state", count); + str = log->state_name_get(log->transitions[i].new_state); + ret = dict_set_strn(dict, key, keylen, str); + if (ret) + goto out; + + snprintf(key, sizeof(key), "log%d-time", count); + gf_time_fmt(timestr, sizeof timestr, log->transitions[i].time, + gf_timefmt_FT); + ret = dict_set_dynstr_with_alloc(dict, key, timestr); + if (ret) + goto out; + +out: + if (key[0] != '\0' && ret != 0) + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + gf_msg_debug("glusterd", 0, "returning %d", ret); + return ret; +} + +int +glusterd_sm_tr_log_add_to_dict(dict_t *dict, glusterd_sm_tr_log_t *circular_log) +{ + int ret = -1; + int i = 0; + int start = 0; + int end = 0; + int index = 0; + char key[16] = {0}; + glusterd_sm_tr_log_t *log = NULL; + int count = 0; + + GF_ASSERT(dict); + GF_ASSERT(circular_log); + + log = circular_log; + if (!log->count) + return 0; + + if (log->count == log->size) + start = log->current + 1; + + end = start + log->count; + for (i = start; i < end; i++, count++) { + index = i % log->count; + ret = glusterd_sm_tr_log_transition_add_to_dict(dict, log, index, + count); + if (ret) + goto out; + } + + ret = snprintf(key, sizeof(key), "count"); + ret = dict_set_int32n(dict, key, ret, log->count); + +out: + gf_msg_debug("glusterd", 0, "returning %d", ret); + return ret; +} + +int +glusterd_sm_tr_log_init(glusterd_sm_tr_log_t *log, char *(*state_name_get)(int), + char *(*event_name_get)(int), size_t size) +{ + glusterd_sm_transition_t *transitions = NULL; + int ret = -1; + + GF_ASSERT(size > 0); + GF_ASSERT(log && state_name_get && event_name_get); + + if (!log || !state_name_get || !event_name_get || (size <= 0)) + goto out; + + transitions = GF_CALLOC(size, sizeof(*transitions), gf_gld_mt_sm_tr_log_t); + if (!transitions) + goto out; + + log->transitions = transitions; + log->size = size; + log->state_name_get = state_name_get; + log->event_name_get = event_name_get; + ret = 0; + +out: + gf_msg_debug("glusterd", 0, "returning %d", ret); + return ret; } void -glusterd_nfs_pmap_deregister () +glusterd_sm_tr_log_delete(glusterd_sm_tr_log_t *log) { - if (pmap_unset (MOUNT_PROGRAM, MOUNTV3_VERSION)) - gf_log ("", GF_LOG_INFO, "De-registered MOUNTV3 successfully"); - else - gf_log ("", GF_LOG_ERROR, "De-register MOUNTV3 is unsuccessful"); + if (!log) + return; + GF_FREE(log->transitions); + return; +} - if (pmap_unset (MOUNT_PROGRAM, MOUNTV1_VERSION)) - gf_log ("", GF_LOG_INFO, "De-registered MOUNTV1 successfully"); - else - gf_log ("", GF_LOG_ERROR, "De-register MOUNTV1 is unsuccessful"); +int +glusterd_sm_tr_log_transition_add(glusterd_sm_tr_log_t *log, int old_state, + int new_state, int event) +{ + glusterd_sm_transition_t *transitions = NULL; + int ret = -1; + int next = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + GF_ASSERT(log); + if (!log) + goto out; + + transitions = log->transitions; + if (!transitions) + goto out; + + if (log->count) + next = (log->current + 1) % log->size; + else + next = 0; + + transitions[next].old_state = old_state; + transitions[next].new_state = new_state; + transitions[next].event = event; + transitions[next].time = gf_time(); + + log->current = next; + if (log->count < log->size) + log->count++; + ret = 0; + gf_msg_debug(this->name, 0, + "Transitioning from '%s' to '%s' " + "due to event '%s'", + log->state_name_get(old_state), log->state_name_get(new_state), + log->event_name_get(event)); +out: + gf_msg_debug(this->name, 0, "returning %d", ret); + return ret; +} - if (pmap_unset (NFS_PROGRAM, NFSV3_VERSION)) - gf_log ("", GF_LOG_INFO, "De-registered NFSV3 successfully"); - else - gf_log ("", GF_LOG_ERROR, "De-register NFSV3 is unsuccessful"); +int +glusterd_remove_pending_entry(struct cds_list_head *list, void *elem) +{ + glusterd_pending_node_t *pending_node = NULL; + glusterd_pending_node_t *tmp = NULL; + int ret = 0; + + cds_list_for_each_entry_safe(pending_node, tmp, list, list) + { + if (elem == pending_node->node) { + cds_list_del_init(&pending_node->list); + GF_FREE(pending_node); + ret = 0; + goto out; + } + } +out: + gf_msg_debug(THIS->name, 0, "returning %d", ret); + return ret; +} + +int +glusterd_clear_pending_nodes(struct cds_list_head *list) +{ + glusterd_pending_node_t *pending_node = NULL; + glusterd_pending_node_t *tmp = NULL; + cds_list_for_each_entry_safe(pending_node, tmp, list, list) + { + cds_list_del_init(&pending_node->list); + GF_FREE(pending_node); + } + + return 0; } int32_t -glusterd_nfs_server_stop () +glusterd_delete_volume(glusterd_volinfo_t *volinfo) { - xlator_t *this = NULL; - glusterd_conf_t *priv = NULL; - char pidfile[PATH_MAX] = {0,}; - char path[PATH_MAX] = {0,}; + int ret = -1; + GF_ASSERT(volinfo); - this = THIS; - GF_ASSERT(this); + ret = glusterd_store_delete_volume(volinfo); - priv = this->private; + if (ret) + goto out; - GLUSTERD_GET_NFS_DIR(path, priv); - GLUSTERD_GET_NFS_PIDFILE(pidfile); + glusterd_volinfo_remove(volinfo); +out: + gf_msg_debug(THIS->name, 0, "returning %d", ret); + return ret; +} - glusterd_service_stop ("nfsd", pidfile, SIGKILL, _gf_true); - glusterd_nfs_pmap_deregister (); +int32_t +glusterd_delete_brick(glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo) +{ + int ret = 0; + char voldir[PATH_MAX] = ""; + glusterd_conf_t *priv = THIS->private; + GF_ASSERT(volinfo); + GF_ASSERT(brickinfo); + + GLUSTERD_GET_VOLUME_DIR(voldir, volinfo, priv); + + glusterd_delete_volfile(volinfo, brickinfo); + glusterd_store_delete_brick(brickinfo, voldir); + glusterd_brickinfo_delete(brickinfo); + volinfo->brick_count--; + return ret; +} - return 0; +int32_t +glusterd_delete_all_bricks(glusterd_volinfo_t *volinfo) +{ + int ret = 0; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_brickinfo_t *tmp = NULL; + + GF_ASSERT(volinfo); + + cds_list_for_each_entry_safe(brickinfo, tmp, &volinfo->bricks, brick_list) + { + ret = glusterd_delete_brick(volinfo, brickinfo); + } + return ret; } int -glusterd_remote_hostname_get (rpcsvc_request_t *req, char *remote_host, int len) +glusterd_get_local_brickpaths(glusterd_volinfo_t *volinfo, char **pathlist) { - GF_ASSERT (req); - GF_ASSERT (remote_host); - GF_ASSERT (req->trans); + char **path_tokens = NULL; + char *tmp_path_list = NULL; + char path[PATH_MAX] = ""; + int32_t count = 0; + int32_t pathlen = 0; + int32_t total_len = 0; + int32_t ret = 0; + int i = 0; + glusterd_brickinfo_t *brickinfo = NULL; + + if ((!volinfo) || (!pathlist)) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); + goto out; + } + + path_tokens = GF_CALLOC(sizeof(char *), volinfo->brick_count, + gf_gld_mt_charptr); + if (!path_tokens) { + gf_msg("glusterd", GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "Could not allocate memory."); + ret = -1; + goto out; + } - char *name = NULL; - char *delimiter = NULL; + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + if (gf_uuid_compare(brickinfo->uuid, MY_UUID)) + continue; - name = req->trans->peerinfo.identifier; - strncpy (remote_host, name, len); - delimiter = strchr (remote_host, ':'); + pathlen = snprintf(path, sizeof(path), "--path=%s ", brickinfo->path); + if (pathlen < sizeof(path)) + path[pathlen] = '\0'; + else + path[sizeof(path) - 1] = '\0'; + path_tokens[count] = gf_strdup(path); + if (!path_tokens[count]) { + gf_msg("glusterd", GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "Could not allocate memory."); + ret = -1; + goto out; + } + count++; + total_len += pathlen; + } + + tmp_path_list = GF_CALLOC(sizeof(char), total_len + 1, gf_gld_mt_char); + if (!tmp_path_list) { + gf_msg("glusterd", GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "Could not allocate memory."); + ret = -1; + goto out; + } + + for (i = 0; i < count; i++) + strcat(tmp_path_list, path_tokens[i]); - GF_ASSERT (delimiter); - if (!delimiter) { - memset (remote_host, 0, len); - return -1; + if (count) + *pathlist = tmp_path_list; + + ret = count; +out: + if (path_tokens) { + for (i = 0; i < count; i++) { + GF_FREE(path_tokens[i]); } + } - *delimiter = '\0'; + GF_FREE(path_tokens); + path_tokens = NULL; - return 0; + if (ret == 0) { + gf_msg_debug("glusterd", 0, "No Local Bricks Present."); + GF_FREE(tmp_path_list); + tmp_path_list = NULL; + } + + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; } int -glusterd_check_generate_start_nfs () +glusterd_start_gsync(glusterd_volinfo_t *master_vol, char *slave, + char *path_list, char *conf_path, char *glusterd_uuid_str, + char **op_errstr, gf_boolean_t is_pause) { - int ret = -1; + int32_t ret = 0; + int32_t status = 0; + char uuid_str[64] = ""; + runner_t runner = { + 0, + }; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + int errcode = 0; + gf_boolean_t is_template_in_use = _gf_false; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + uuid_utoa_r(MY_UUID, uuid_str); + + if (!path_list) { + ret = 0; + gf_msg_debug("glusterd", 0, + "No Bricks in this node." + " Not starting gsyncd."); + goto out; + } + + ret = gsync_status(master_vol->volname, slave, conf_path, &status, + &is_template_in_use); + if (status == 0) + goto out; + + if (is_template_in_use == _gf_true) { + gf_asprintf(op_errstr, + GEOREP + " start failed for %s %s : " + "pid-file entry missing in config file", + master_vol->volname, slave); + ret = -1; + goto out; + } + + uuid_utoa_r(master_vol->volume_id, uuid_str); + runinit(&runner); + runner_add_args(&runner, GSYNCD_PREFIX "/gsyncd", path_list, "-c", NULL); + runner_argprintf(&runner, "%s", conf_path); + runner_argprintf(&runner, ":%s", master_vol->volname); + runner_add_args(&runner, slave, "--config-set", "session-owner", NULL); + runner_argprintf(&runner, "--value=%s", uuid_str); + synclock_unlock(&priv->big_lock); + ret = runner_run(&runner); + synclock_lock(&priv->big_lock); + if (ret == -1) { + errcode = -1; + goto out; + } + + runinit(&runner); + runner_add_args(&runner, GSYNCD_PREFIX "/gsyncd", path_list, "--monitor", + "-c", NULL); + runner_argprintf(&runner, "%s", conf_path); + runner_argprintf(&runner, "--iprefix=%s", DATADIR); + runner_argprintf(&runner, ":%s", master_vol->volname); + runner_argprintf(&runner, "--glusterd-uuid=%s", uuid_utoa(priv->uuid)); + runner_add_arg(&runner, slave); + if (is_pause) + runner_add_arg(&runner, "--pause-on-start"); + synclock_unlock(&priv->big_lock); + ret = runner_run(&runner); + synclock_lock(&priv->big_lock); + if (ret == -1) { + gf_asprintf(op_errstr, GEOREP " start failed for %s %s", + master_vol->volname, slave); + goto out; + } + + ret = 0; - ret = glusterd_create_nfs_volfile (); - if (ret) - goto out; +out: + if ((ret != 0) && errcode == -1) { + if (op_errstr) + *op_errstr = gf_strdup( + "internal error, cannot start " + "the " GEOREP " session"); + } + + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; +} - if (glusterd_is_nfs_started ()) { - ret = glusterd_nfs_server_stop (); - if (ret) - goto out; +int32_t +glusterd_recreate_volfiles(glusterd_conf_t *conf) +{ + glusterd_volinfo_t *volinfo = NULL; + int ret = 0; + int op_ret = 0; + + GF_ASSERT(conf); + + cds_list_for_each_entry(volinfo, &conf->volumes, vol_list) + { + ret = generate_brick_volfiles(volinfo); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL, + "Failed to " + "regenerate brick volfiles for %s", + volinfo->volname); + op_ret = ret; + } + ret = generate_client_volfiles(volinfo, GF_CLIENT_TRUSTED); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL, + "Failed to " + "regenerate trusted client volfiles for %s", + volinfo->volname); + op_ret = ret; } + ret = generate_client_volfiles(volinfo, GF_CLIENT_OTHER); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL, + "Failed to " + "regenerate client volfiles for %s", + volinfo->volname); + op_ret = ret; + } + } + return op_ret; +} + +int32_t +glusterd_handle_upgrade_downgrade(dict_t *options, glusterd_conf_t *conf, + gf_boolean_t upgrade, gf_boolean_t downgrade) +{ + int ret = 0; + gf_boolean_t regenerate_volfiles = _gf_false; + gf_boolean_t terminate = _gf_false; - ret = glusterd_nfs_server_start (); + if (_gf_true == upgrade) + regenerate_volfiles = _gf_true; + + if (upgrade && downgrade) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_WRONG_OPTS_SETTING, + "Both upgrade and downgrade" + " options are set. Only one should be on"); + ret = -1; + goto out; + } + + if (!upgrade && !downgrade) + ret = 0; + else + terminate = _gf_true; + if (regenerate_volfiles) { + ret = glusterd_recreate_volfiles(conf); + } out: - return ret; + if (terminate && (ret == 0)) + kill(getpid(), SIGTERM); + return ret; +} + +static inline int +glusterd_is_replica_volume(int type) +{ + if (type == GF_CLUSTER_TYPE_REPLICATE) + return 1; + return 0; +} +gf_boolean_t +glusterd_is_volume_replicate(glusterd_volinfo_t *volinfo) +{ + return glusterd_is_replica_volume((volinfo->type)); +} + +gf_boolean_t +glusterd_is_shd_compatible_type(int type) +{ + switch (type) { + case GF_CLUSTER_TYPE_REPLICATE: + case GF_CLUSTER_TYPE_DISPERSE: + return _gf_true; + } + return _gf_false; +} + +gf_boolean_t +glusterd_is_shd_compatible_volume(glusterd_volinfo_t *volinfo) +{ + return glusterd_is_shd_compatible_type(volinfo->type); } int -glusterd_volume_count_get (void) +glusterd_set_dump_options(char *dumpoptions_path, char *options, int option_cnt) { - glusterd_volinfo_t *tmp_volinfo = NULL; - int32_t ret = 0; - xlator_t *this = NULL; - glusterd_conf_t *priv = NULL; + int ret = 0; + char *dup_options = NULL; + char *option = NULL; + char *tmpptr = NULL; + FILE *fp = NULL; + int nfs_cnt = 0; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + GF_ASSERT(priv); + + if (0 == option_cnt || (option_cnt == 1 && (!strcmp(options, "nfs ")))) { + ret = 0; + goto out; + } - this = THIS; - GF_ASSERT (this); + fp = fopen(dumpoptions_path, "w"); + if (!fp) { + ret = -1; + goto out; + } + dup_options = gf_strdup(options); + + if (!dup_options) { + goto out; + } + gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_STATEDUMP_OPTS_RCVD, + "Received following statedump options: %s", dup_options); + option = strtok_r(dup_options, " ", &tmpptr); + while (option) { + if (!strcmp(option, priv->nfs_svc.name)) { + if (nfs_cnt > 0) { + sys_unlink(dumpoptions_path); + ret = 0; + goto out; + } + nfs_cnt++; + option = strtok_r(NULL, " ", &tmpptr); + continue; + } + fprintf(fp, "%s=yes\n", option); + option = strtok_r(NULL, " ", &tmpptr); + } - priv = this->private; +out: + if (fp) + fclose(fp); + GF_FREE(dup_options); + return ret; +} - list_for_each_entry (tmp_volinfo, &priv->volumes, vol_list) { - ret++; +static int +glusterd_brick_signal(glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo, char *options, + int option_cnt, char **op_errstr, int sig) +{ + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + char pidfile_path[PATH_MAX] = ""; + char dumpoptions_path[PATH_MAX] = ""; + FILE *pidfile = NULL; + pid_t pid = -1; + + this = THIS; + GF_ASSERT(this); + conf = this->private; + GF_ASSERT(conf); + + if (gf_uuid_is_null(brickinfo->uuid)) { + ret = glusterd_resolve_brick(brickinfo); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_RESOLVE_BRICK_FAIL, + "Cannot resolve brick %s:%s", brickinfo->hostname, + brickinfo->path); + goto out; } + } + + if (gf_uuid_compare(brickinfo->uuid, MY_UUID)) { + ret = 0; + goto out; + } + GLUSTERD_GET_BRICK_PIDFILE(pidfile_path, volinfo, brickinfo, conf); - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + /* TBD: use gf_is_service_running instead of almost-identical code? */ + pidfile = fopen(pidfile_path, "r"); + if (!pidfile) { + gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "Unable to open pidfile: %s", pidfile_path); + ret = -1; + goto out; + } + + ret = fscanf(pidfile, "%d", &pid); + if (ret <= 0) { + gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "Unable to get pid of brick process"); + ret = -1; + goto out; + } + + if (pid == 0) { + gf_msg("glusterd", GF_LOG_WARNING, 0, GD_MSG_NO_SIG_TO_PID_ZERO, + "refusing to send signal %d to pid zero", sig); + goto out; + } + + if (sig == SIGUSR1) { + snprintf(dumpoptions_path, sizeof(dumpoptions_path), + DEFAULT_VAR_RUN_DIRECTORY "/glusterdump.%d.options", pid); + ret = glusterd_set_dump_options(dumpoptions_path, options, option_cnt); + if (ret < 0) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_BRK_STATEDUMP_FAIL, + "error while parsing the statedump " + "options"); + ret = -1; + goto out; + } + } + + gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_STATEDUMP_INFO, + "sending signal %d to brick with pid %d", sig, pid); + kill(pid, sig); + + sleep(1); + sys_unlink(dumpoptions_path); + ret = 0; +out: + if (pidfile) + fclose(pidfile); + return ret; } int -glusterd_brickinfo_get (uuid_t uuid, char *hostname, char *path, - glusterd_brickinfo_t **brickinfo) +glusterd_brick_statedump(glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo, char *options, + int option_cnt, char **op_errstr) { - glusterd_volinfo_t *volinfo = NULL; - glusterd_conf_t *priv = NULL; - xlator_t *this = NULL; - int ret = -1; + return glusterd_brick_signal(volinfo, brickinfo, options, option_cnt, + op_errstr, SIGUSR1); +} - GF_ASSERT (path); +int +glusterd_brick_terminate(glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo, char *options, + int option_cnt, char **op_errstr) +{ + return glusterd_brick_signal(volinfo, brickinfo, options, option_cnt, + op_errstr, SIGTERM); +} - this = THIS; - GF_ASSERT (this); +#ifdef BUILD_GNFS +int +glusterd_nfs_statedump(char *options, int option_cnt, char **op_errstr) +{ + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + char pidfile_path[PATH_MAX] = ""; + FILE *pidfile = NULL; + pid_t pid = -1; + char dumpoptions_path[PATH_MAX] = ""; + char *option = NULL; + char *tmpptr = NULL; + char *dup_options = NULL; + char msg[256] = ""; + + this = THIS; + GF_ASSERT(this); + conf = this->private; + GF_ASSERT(conf); + + dup_options = gf_strdup(options); + + if (!dup_options) { + goto out; + } + option = strtok_r(dup_options, " ", &tmpptr); + if (strcmp(option, conf->nfs_svc.name)) { + snprintf(msg, sizeof(msg), + "for nfs statedump, options should" + " be after the key nfs"); + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_INVALID_ENTRY, + "Options misplaced", NULL); + *op_errstr = gf_strdup(msg); + ret = -1; + goto out; + } - priv = this->private; + GLUSTERD_GET_NFS_PIDFILE(pidfile_path, conf); - list_for_each_entry (volinfo, &priv->volumes, vol_list) { + pidfile = fopen(pidfile_path, "r"); + if (!pidfile) { + gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "Unable to open pidfile: %s", pidfile_path); + ret = -1; + goto out; + } - ret = glusterd_volume_brickinfo_get (uuid, hostname, path, - volinfo, - brickinfo); - if (!ret) - goto out; - } + ret = fscanf(pidfile, "%d", &pid); + if (ret <= 0) { + gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "Unable to get pid of brick process"); + ret = -1; + goto out; + } + + snprintf(dumpoptions_path, sizeof(dumpoptions_path), + DEFAULT_VAR_RUN_DIRECTORY "/glusterdump.%d.options", pid); + ret = glusterd_set_dump_options(dumpoptions_path, options, option_cnt); + if (ret < 0) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_BRK_STATEDUMP_FAIL, + "error while parsing the statedump " + "options"); + ret = -1; + goto out; + } + + gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_STATEDUMP_INFO, + "Performing statedump on nfs server with " + "pid %d", + pid); + + kill(pid, SIGUSR1); + + sleep(1); + /* coverity[TAINTED_STRING] */ + sys_unlink(dumpoptions_path); + ret = 0; out: - return ret; + if (pidfile) + fclose(pidfile); + GF_FREE(dup_options); + return ret; } +#endif int -glusterd_brick_start (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo) +glusterd_client_statedump(char *volname, char *options, int option_cnt, + char **op_errstr) { - int ret = -1; - xlator_t *this = NULL; - glusterd_conf_t *conf = NULL; + int ret = 0; + char *dup_options = NULL; + char *option = NULL; + char *tmpptr = NULL; + char msg[256] = ""; + char *target_ip = NULL; + char *pid = NULL; + + dup_options = gf_strdup(options); + if (!dup_options) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED, + "options=%s", options, NULL); + goto out; + } + option = strtok_r(dup_options, " ", &tmpptr); + if (strcmp(option, "client")) { + snprintf(msg, sizeof(msg), + "for gluster client statedump, options " + "should be after the key 'client'"); + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_INVALID_ENTRY, + "Options misplaced", NULL); + *op_errstr = gf_strdup(msg); + ret = -1; + goto out; + } + target_ip = strtok_r(NULL, " ", &tmpptr); + if (target_ip == NULL) { + snprintf(msg, sizeof(msg), "ip address not specified"); + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_INVALID_ENTRY, msg, + NULL); + *op_errstr = gf_strdup(msg); + ret = -1; + goto out; + } + + pid = strtok_r(NULL, " ", &tmpptr); + if (pid == NULL) { + snprintf(msg, sizeof(msg), "pid not specified"); + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_INVALID_ENTRY, msg, + NULL); + *op_errstr = gf_strdup(msg); + ret = -1; + goto out; + } - if ((!brickinfo) || (!volinfo)) - goto out; + ret = glusterd_client_statedump_submit_req(volname, target_ip, pid); +out: + GF_FREE(dup_options); + return ret; +} - this = THIS; - GF_ASSERT (this); - conf = this->private; - GF_ASSERT (conf); +int +glusterd_quotad_statedump(char *options, int option_cnt, char **op_errstr) +{ + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + char pidfile_path[PATH_MAX] = ""; + FILE *pidfile = NULL; + pid_t pid = -1; + char dumpoptions_path[PATH_MAX] = ""; + char *option = NULL; + char *tmpptr = NULL; + char *dup_options = NULL; + char msg[256] = ""; + + this = THIS; + GF_ASSERT(this); + conf = this->private; + GF_ASSERT(conf); + + dup_options = gf_strdup(options); + if (!dup_options) { + goto out; + } + option = strtok_r(dup_options, " ", &tmpptr); + if (strcmp(option, conf->quotad_svc.name)) { + snprintf(msg, sizeof(msg), + "for quotad statedump, options " + "should be after the key 'quotad'"); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ENTRY, + "Options misplaced", NULL); + *op_errstr = gf_strdup(msg); + ret = -1; + goto out; + } + + GLUSTERD_GET_QUOTAD_PIDFILE(pidfile_path, conf); - if (uuid_is_null (brickinfo->uuid)) { - ret = glusterd_resolve_brick (brickinfo); + pidfile = fopen(pidfile_path, "r"); + if (!pidfile) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "Unable to open pidfile: %s", pidfile_path); + ret = -1; + goto out; + } + + ret = fscanf(pidfile, "%d", &pid); + if (ret <= 0) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "Unable to get pid of quotad " + "process"); + ret = -1; + goto out; + } + + snprintf(dumpoptions_path, sizeof(dumpoptions_path), + DEFAULT_VAR_RUN_DIRECTORY "/glusterdump.%d.options", pid); + ret = glusterd_set_dump_options(dumpoptions_path, options, option_cnt); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRK_STATEDUMP_FAIL, + "error while parsing " + "statedump options"); + ret = -1; + goto out; + } + + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_STATEDUMP_INFO, + "Performing statedump on quotad with " + "pid %d", + pid); + + kill(pid, SIGUSR1); + + sleep(1); + + /* coverity[TAINTED_STRING] */ + sys_unlink(dumpoptions_path); + ret = 0; +out: + if (pidfile) + fclose(pidfile); + GF_FREE(dup_options); + return ret; +} + +/* Checks if the given peer contains bricks belonging to the given volume. + * Returns, + * 2 - if peer contains all the bricks + * 1 - if peer contains at least 1 brick + * 0 - if peer contains no bricks + */ +int +glusterd_friend_contains_vol_bricks(glusterd_volinfo_t *volinfo, + uuid_t friend_uuid) +{ + int ret = 0; + glusterd_brickinfo_t *brickinfo = NULL; + int count = 0; + + GF_ASSERT(volinfo); + + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + if (!gf_uuid_compare(brickinfo->uuid, friend_uuid)) { + count++; + } + } + + if (count) { + if (count == volinfo->brick_count) + ret = 2; + else + ret = 1; + } + gf_msg_debug(THIS->name, 0, "Returning %d", ret); + return ret; +} + +/* Checks if the given peer contains bricks belonging to the given volume. + * Returns, + * 2 - if peer contains all the bricks + * 1 - if peer contains at least 1 brick + * 0 - if peer contains no bricks + */ +int +glusterd_friend_contains_snap_bricks(glusterd_snap_t *snapinfo, + uuid_t friend_uuid) +{ + int ret = -1; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + int count = 0; + + GF_VALIDATE_OR_GOTO("glusterd", snapinfo, out); + + cds_list_for_each_entry(volinfo, &snapinfo->volumes, vol_list) + { + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + if (!gf_uuid_compare(brickinfo->uuid, friend_uuid)) { + count++; + } + } + } + + if (count > 0) + ret = 1; + else + ret = 0; + +out: + gf_msg_debug(THIS->name, 0, "Returning %d", ret); + return ret; +} + +/* Cleanup the stale volumes left behind in the cluster. The volumes which are + * contained completely within the detached peer are stale with respect to the + * cluster. + */ +int +glusterd_friend_remove_cleanup_vols(uuid_t uuid) +{ + int ret = -1; + glusterd_conf_t *priv = NULL; + glusterd_svc_t *svc = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_volinfo_t *tmp_volinfo = NULL; + + priv = THIS->private; + GF_ASSERT(priv); + + cds_list_for_each_entry_safe(volinfo, tmp_volinfo, &priv->volumes, vol_list) + { + if (!glusterd_friend_contains_vol_bricks(volinfo, MY_UUID)) { + /*Stop snapd daemon service if snapd daemon is running*/ + if (!volinfo->is_snap_volume) { + svc = &(volinfo->snapd.svc); + ret = svc->stop(svc, SIGTERM); if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, - "cannot resolve brick: %s:%s", - brickinfo->hostname, brickinfo->path); - goto out; + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SVC_STOP_FAIL, + "Failed " + "to stop snapd daemon service"); + } + } + + if (glusterd_is_shd_compatible_volume(volinfo)) { + /* + * Sending stop request for all volumes. So it is fine + * to send stop for mux shd + */ + svc = &(volinfo->shd.svc); + ret = svc->stop(svc, SIGTERM); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SVC_STOP_FAIL, + "Failed " + "to stop shd daemon service"); } + } } - if (uuid_compare (brickinfo->uuid, conf->uuid)) { - ret = 0; + if (glusterd_friend_contains_vol_bricks(volinfo, uuid) == 2) { + gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_STALE_VOL_DELETE_INFO, + "Deleting stale volume %s", volinfo->volname); + ret = glusterd_delete_volume(volinfo); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, + GD_MSG_STALE_VOL_REMOVE_FAIL, + "Error deleting stale volume"); goto out; + } } - ret = glusterd_volume_start_glusterfs (volinfo, brickinfo); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to start " - "glusterfs, ret: %d", ret); - goto out; - } - + } + + /* Reconfigure all daemon services upon peer detach */ + ret = glusterd_svcs_reconfigure(NULL); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SVC_STOP_FAIL, + "Failed to reconfigure all daemon services."); + } + ret = 0; out: - gf_log ("", GF_LOG_DEBUG, "returning %d ", ret); - return ret; + gf_msg_debug(THIS->name, 0, "Returning %d", ret); + return ret; } int -glusterd_restart_bricks (glusterd_conf_t *conf) +glusterd_get_bitd_filepath(char *filepath, glusterd_volinfo_t *volinfo) { - glusterd_volinfo_t *volinfo = NULL; - glusterd_brickinfo_t *brickinfo = NULL; - int ret = -1; + int ret = 0; + char path[PATH_MAX] = ""; + glusterd_conf_t *priv = NULL; + int32_t len = 0; - GF_ASSERT (conf); + priv = THIS->private; - list_for_each_entry (volinfo, &conf->volumes, vol_list) { - //If volume status is not started, do not proceed - if (volinfo->status == GLUSTERD_STATUS_STARTED) { - list_for_each_entry (brickinfo, &volinfo->bricks, - brick_list) { - glusterd_brick_start (volinfo, brickinfo); - } - glusterd_check_generate_start_nfs (); - } - } - return ret; + GLUSTERD_GET_VOLUME_DIR(path, volinfo, priv); + + len = snprintf(filepath, PATH_MAX, "%s/%s-bitd.vol", path, + volinfo->volname); + if ((len < 0) || (len >= PATH_MAX)) { + ret = -1; + } + + return ret; } int -glusterd_get_brickinfo (xlator_t *this, const char *brickname, int port, - gf_boolean_t localhost, glusterd_brickinfo_t **brickinfo) +glusterd_get_client_filepath(char *filepath, glusterd_volinfo_t *volinfo, + gf_transport_type type) { - glusterd_conf_t *priv = NULL; - glusterd_volinfo_t *volinfo = NULL; - glusterd_brickinfo_t *tmpbrkinfo = NULL; - int ret = -1; + int ret = 0; + char path[PATH_MAX] = ""; + glusterd_conf_t *priv = NULL; + int32_t len = 0; + + priv = THIS->private; + + GLUSTERD_GET_VOLUME_DIR(path, volinfo, priv); + + switch (type) { + case GF_TRANSPORT_TCP: + len = snprintf(filepath, PATH_MAX, "%s/%s.tcp-fuse.vol", path, + volinfo->volname); + break; + + case GF_TRANSPORT_RDMA: + len = snprintf(filepath, PATH_MAX, "%s/%s.rdma-fuse.vol", path, + volinfo->volname); + break; + default: + ret = -1; + break; + } + if ((len < 0) || (len >= PATH_MAX)) { + ret = -1; + } - GF_ASSERT (brickname); - GF_ASSERT (this); + return ret; +} - priv = this->private; - list_for_each_entry (volinfo, &priv->volumes, vol_list) { - list_for_each_entry (tmpbrkinfo, &volinfo->bricks, - brick_list) { - if (localhost && glusterd_is_local_addr (tmpbrkinfo->hostname)) - continue; - if (!strcmp(tmpbrkinfo->path, brickname) && - (tmpbrkinfo->port == port)) { - *brickinfo = tmpbrkinfo; - return 0; - } - } - } +int +glusterd_get_trusted_client_filepath(char *filepath, + glusterd_volinfo_t *volinfo, + gf_transport_type type) +{ + int ret = 0; + char path[PATH_MAX] = ""; + glusterd_conf_t *priv = NULL; + int32_t len = 0; + + priv = THIS->private; + + GLUSTERD_GET_VOLUME_DIR(path, volinfo, priv); + + switch (type) { + case GF_TRANSPORT_TCP: + len = snprintf(filepath, PATH_MAX, "%s/trusted-%s.tcp-fuse.vol", + path, volinfo->volname); + break; + + case GF_TRANSPORT_RDMA: + len = snprintf(filepath, PATH_MAX, "%s/trusted-%s.rdma-fuse.vol", + path, volinfo->volname); + break; + default: + ret = -1; + break; + } + if ((len < 0) || (len >= PATH_MAX)) { + ret = -1; + } + + return ret; +} + +int +glusterd_get_dummy_client_filepath(char *filepath, glusterd_volinfo_t *volinfo, + gf_transport_type type) +{ + int ret = 0; + + switch (type) { + case GF_TRANSPORT_TCP: + case GF_TRANSPORT_BOTH_TCP_RDMA: + snprintf(filepath, PATH_MAX, "/tmp/%s.tcp-fuse.vol", + volinfo->volname); + break; + + case GF_TRANSPORT_RDMA: + snprintf(filepath, PATH_MAX, "/tmp/%s.rdma-fuse.vol", + volinfo->volname); + break; + default: + ret = -1; + break; + } + + return ret; +} + +int +glusterd_volume_defrag_restart(glusterd_volinfo_t *volinfo, char *op_errstr, + size_t len, int cmd, defrag_cbk_fn_t cbk) +{ + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + char pidfile[PATH_MAX] = ""; + int ret = -1; + pid_t pid = 0; + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + if (!priv) return ret; + + /* Don't start the rebalance process if the stautus is already + * completed, stopped or failed. If the status is started, check if + * there is an existing process already and connect to it. If not, then + * start the rebalance process + */ + + switch (volinfo->rebal.defrag_status) { + case GF_DEFRAG_STATUS_COMPLETE: + case GF_DEFRAG_STATUS_STOPPED: + case GF_DEFRAG_STATUS_FAILED: + break; + case GF_DEFRAG_STATUS_STARTED: + GLUSTERD_GET_DEFRAG_PID_FILE(pidfile, volinfo, priv); + if (gf_is_service_running(pidfile, &pid)) { + ret = glusterd_rebalance_defrag_init(volinfo, cbk); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_REBALANCE_START_FAIL, + "Failed to initialize defrag." + "Not starting rebalance process for " + "%s.", + volinfo->volname); + gf_event(EVENT_REBALANCE_START_FAILED, "volume=%s", + volinfo->volname); + goto out; + } + ret = glusterd_rebalance_rpc_create(volinfo); + break; + } + case GF_DEFRAG_STATUS_NOT_STARTED: + ret = glusterd_handle_defrag_start(volinfo, op_errstr, len, cmd, + cbk, volinfo->rebal.op); + if (ret) { + volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_FAILED; + gf_event(EVENT_REBALANCE_START_FAILED, "volume=%s", + volinfo->volname); + } + break; + default: + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REBALANCE_START_FAIL, + "Unknown defrag status (%d)." + "Not starting rebalance process for %s.", + volinfo->rebal.defrag_status, volinfo->volname); + break; + } +out: + return ret; } void -glusterd_set_brick_status (glusterd_brickinfo_t *brickinfo, - gf_brick_status_t status) -{ - GF_ASSERT (brickinfo); - brickinfo->status = status; - if (GF_BRICK_STARTED == status) { - gf_log ("glusterd", GF_LOG_DEBUG, "Setting brick %s:%s status " - "to started", brickinfo->hostname, brickinfo->path); - } else { - gf_log ("glusterd", GF_LOG_DEBUG, "Setting brick %s:%s status " - "to stopped", brickinfo->hostname, brickinfo->path); +glusterd_defrag_info_set(glusterd_volinfo_t *volinfo, dict_t *dict, int cmd, + int status, int op) +{ + xlator_t *this = NULL; + int ret = -1; + char *task_id_str = NULL; + glusterd_rebalance_t *rebal = NULL; + + this = THIS; + rebal = &volinfo->rebal; + + rebal->defrag_cmd = cmd; + rebal->defrag_status = status; + rebal->op = op; + + if (gf_uuid_is_null(rebal->rebalance_id)) + return; + + if (is_origin_glusterd(dict)) { + ret = glusterd_generate_and_set_task_id(dict, GF_REBALANCE_TID_KEY, + SLEN(GF_REBALANCE_TID_KEY)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TASKID_GEN_FAIL, + "Failed to generate task-id"); + goto out; + } + } + ret = dict_get_strn(dict, GF_REBALANCE_TID_KEY, SLEN(GF_REBALANCE_TID_KEY), + &task_id_str); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_REBALANCE_ID_MISSING, + "Missing rebalance-id"); + ret = 0; + goto out; + } + + gf_uuid_parse(task_id_str, rebal->rebalance_id); +out: + + if (ret) { + gf_msg_debug(this->name, 0, "Rebalance start validate failed"); + } + return; +} + +int +glusterd_restart_rebalance_for_volume(glusterd_volinfo_t *volinfo) +{ + int ret = -1; + char op_errstr[PATH_MAX] = ""; + + if (!gd_should_i_start_rebalance(volinfo)) { + /* Store the rebalance-id and rebalance command even if + * the peer isn't starting a rebalance process. On peers + * where a rebalance process is started, + * glusterd_handle_defrag_start performs the storing. + * + * Storing this is needed for having 'volume status' + * work correctly. + */ + volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_NOT_STARTED; + return 0; + } + if (!volinfo->rebal.defrag_cmd) { + volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_FAILED; + return -1; + } + + ret = glusterd_volume_defrag_restart(volinfo, op_errstr, PATH_MAX, + volinfo->rebal.defrag_cmd, + volinfo->rebal.op == GD_OP_REMOVE_BRICK + ? glusterd_remove_brick_migrate_cbk + : NULL); + if (!ret) { + /* If remove brick is started then ensure that on a glusterd + * restart decommission_is_in_progress is set to avoid remove + * brick commit to happen when rebalance is not completed. + */ + if (volinfo->rebal.op == GD_OP_REMOVE_BRICK && + volinfo->rebal.defrag_status == GF_DEFRAG_STATUS_STARTED) { + volinfo->decommission_in_progress = 1; } + } + return ret; +} +int +glusterd_restart_rebalance(glusterd_conf_t *conf) +{ + glusterd_volinfo_t *volinfo = NULL; + int ret = 0; + + cds_list_for_each_entry(volinfo, &conf->volumes, vol_list) + { + glusterd_restart_rebalance_for_volume(volinfo); + } + return ret; +} + +void +glusterd_volinfo_reset_defrag_stats(glusterd_volinfo_t *volinfo) +{ + glusterd_rebalance_t *rebal = NULL; + GF_ASSERT(volinfo); + + rebal = &volinfo->rebal; + rebal->rebalance_files = 0; + rebal->rebalance_data = 0; + rebal->lookedup_files = 0; + rebal->rebalance_failures = 0; + rebal->rebalance_time = 0; + rebal->skipped_files = 0; } gf_boolean_t -glusterd_is_brick_started (glusterd_brickinfo_t *brickinfo) +glusterd_is_local_brick(xlator_t *this, glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo) +{ + gf_boolean_t local = _gf_false; + int ret = 0; + + if (gf_uuid_is_null(brickinfo->uuid)) { + ret = glusterd_resolve_brick(brickinfo); + if (ret) + goto out; + } + local = !gf_uuid_compare(brickinfo->uuid, MY_UUID); +out: + return local; +} +int +glusterd_validate_volume_id(dict_t *op_dict, glusterd_volinfo_t *volinfo) +{ + int ret = -1; + char *volid_str = NULL; + uuid_t vol_uid = { + 0, + }; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + ret = dict_get_strn(op_dict, "vol-id", SLEN("vol-id"), &volid_str); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get volume id for " + "volume %s", + volinfo->volname); + goto out; + } + ret = gf_uuid_parse(volid_str, vol_uid); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UUID_PARSE_FAIL, + "Failed to parse volume id " + "for volume %s", + volinfo->volname); + goto out; + } + + if (gf_uuid_compare(vol_uid, volinfo->volume_id)) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_ID_MISMATCH, + "Volume ids of volume %s - %s" + " and %s - are different. Possibly a split brain among " + "peers.", + volinfo->volname, volid_str, uuid_utoa(volinfo->volume_id)); + ret = -1; + goto out; + } + +out: + return ret; +} + +int +glusterd_defrag_volume_status_update(glusterd_volinfo_t *volinfo, + dict_t *rsp_dict, int32_t cmd) { - GF_ASSERT (brickinfo); - return (brickinfo->status == GF_BRICK_STARTED); + int ret = 0; + int ret2 = 0; + uint64_t files = 0; + uint64_t size = 0; + uint64_t lookup = 0; + gf_defrag_status_t status = GF_DEFRAG_STATUS_NOT_STARTED; + uint64_t failures = 0; + uint64_t skipped = 0; + xlator_t *this = NULL; + double run_time = 0; + uint64_t promoted = 0; + uint64_t demoted = 0; + uint64_t time_left = 0; + + this = THIS; + + ret = dict_get_uint64(rsp_dict, "files", &files); + if (ret) + gf_msg_trace(this->name, 0, "failed to get file count"); + + ret = dict_get_uint64(rsp_dict, "size", &size); + if (ret) + gf_msg_trace(this->name, 0, "failed to get size of xfer"); + + ret = dict_get_uint64(rsp_dict, "lookups", &lookup); + if (ret) + gf_msg_trace(this->name, 0, "failed to get lookedup file count"); + + ret = dict_get_int32n(rsp_dict, "status", SLEN("status"), + (int32_t *)&status); + if (ret) + gf_msg_trace(this->name, 0, "failed to get status"); + + ret = dict_get_uint64(rsp_dict, "failures", &failures); + if (ret) + gf_msg_trace(this->name, 0, "failed to get failure count"); + + ret = dict_get_uint64(rsp_dict, "skipped", &skipped); + if (ret) + gf_msg_trace(this->name, 0, "failed to get skipped count"); + + ret = dict_get_uint64(rsp_dict, "promoted", &promoted); + if (ret) + gf_msg_trace(this->name, 0, "failed to get promoted count"); + + ret = dict_get_uint64(rsp_dict, "demoted", &demoted); + if (ret) + gf_msg_trace(this->name, 0, "failed to get demoted count"); + + ret = dict_get_double(rsp_dict, "run-time", &run_time); + if (ret) + gf_msg_trace(this->name, 0, "failed to get run-time"); + + ret2 = dict_get_uint64(rsp_dict, "time-left", &time_left); + if (ret2) + gf_msg_trace(this->name, 0, "failed to get time left"); + + if (files) + volinfo->rebal.rebalance_files = files; + if (size) + volinfo->rebal.rebalance_data = size; + if (lookup) + volinfo->rebal.lookedup_files = lookup; + if (status) + volinfo->rebal.defrag_status = status; + if (failures) + volinfo->rebal.rebalance_failures = failures; + if (skipped) + volinfo->rebal.skipped_files = skipped; + if (run_time) + volinfo->rebal.rebalance_time = run_time; + if (!ret2) + volinfo->rebal.time_left = time_left; + + return ret; } int -glusterd_friend_brick_belongs (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo, void* uuid) +glusterd_check_topology_identical(const char *filename1, const char *filename2, + gf_boolean_t *identical) { - int ret = -1; + int ret = -1; /* FAILURE */ + xlator_t *this = THIS; + FILE *fp1 = NULL; + FILE *fp2 = NULL; + glusterfs_graph_t *grph1 = NULL; + glusterfs_graph_t *grph2 = NULL; + + /* Invalid xlator, Nothing to do */ + if (!this) + return (-1); + + /* Sanitize the inputs */ + GF_VALIDATE_OR_GOTO(this->name, filename1, out); + GF_VALIDATE_OR_GOTO(this->name, filename2, out); + GF_VALIDATE_OR_GOTO(this->name, identical, out); + + /* fopen() the volfile1 to create the graph */ + fp1 = fopen(filename1, "r"); + if (fp1 == NULL) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "fopen() on file: %s failed " + "(%s)", + filename1, strerror(errno)); + goto out; + } + + /* fopen() the volfile2 to create the graph */ + fp2 = fopen(filename2, "r"); + if (fp2 == NULL) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "fopen() on file: %s failed " + "(%s)", + filename2, strerror(errno)); + goto out; + } + + /* create the graph for filename1 */ + grph1 = glusterfs_graph_construct(fp1); + if (grph1 == NULL) + goto out; + + /* create the graph for filename2 */ + grph2 = glusterfs_graph_construct(fp2); + if (grph2 == NULL) + goto out; + + /* compare the graph topology */ + *identical = is_graph_topology_equal(grph1, grph2); + ret = 0; /* SUCCESS */ +out: + if (fp1) + fclose(fp1); + if (fp2) + fclose(fp2); + if (grph1) + glusterfs_graph_destroy(grph1); + if (grph2) + glusterfs_graph_destroy(grph2); + + gf_msg_debug(this->name, 0, "Returning with %d", ret); + return ret; +} - GF_ASSERT (volinfo); - GF_ASSERT (brickinfo); - GF_ASSERT (uuid); +int +glusterd_check_files_identical(char *filename1, char *filename2, + gf_boolean_t *identical) +{ + int ret = -1; + struct stat buf1 = { + 0, + }; + struct stat buf2 = { + 0, + }; + uint32_t cksum1 = 0; + uint32_t cksum2 = 0; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + + GF_ASSERT(filename1); + GF_ASSERT(filename2); + GF_ASSERT(identical); + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, out); + + ret = sys_stat(filename1, &buf1); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "stat on file: %s failed " + "(%s)", + filename1, strerror(errno)); + goto out; + } + + ret = sys_stat(filename2, &buf2); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "stat on file: %s failed " + "(%s)", + filename2, strerror(errno)); + goto out; + } + + if (buf1.st_size != buf2.st_size) { + *identical = _gf_false; + goto out; + } + + ret = get_checksum_for_path(filename1, &cksum1, priv->op_version); + if (ret) + goto out; + + ret = get_checksum_for_path(filename2, &cksum2, priv->op_version); + if (ret) + goto out; + + if (cksum1 != cksum2) + *identical = _gf_false; + else + *identical = _gf_true; - if (uuid_is_null (brickinfo->uuid)) { - ret = glusterd_resolve_brick (brickinfo); - if (ret) { - GF_ASSERT (0); - goto out; - } - } - if (!uuid_compare (brickinfo->uuid, *((uuid_t *)uuid))) - return 0; out: - return -1; + gf_msg_debug(this->name, 0, "Returning with %d", ret); + return ret; } int -glusterd_all_volume_cond_check (glusterd_condition_func func, int status, - void *ctx) +glusterd_volset_help(dict_t *dict, char **op_errstr) { - glusterd_conf_t *priv = NULL; - glusterd_volinfo_t *volinfo = NULL; - glusterd_brickinfo_t *brickinfo = NULL; - int ret = -1; - xlator_t *this = NULL; + int ret = -1; + gf_boolean_t xml_out = _gf_false; +#if (!HAVE_LIB_XML) + xlator_t *this = NULL; - this = THIS; - priv = this->private; + this = THIS; +#endif - list_for_each_entry (volinfo, &priv->volumes, vol_list) { - list_for_each_entry (brickinfo, &volinfo->bricks, - brick_list) { - ret = func (volinfo, brickinfo, ctx); - if (ret != status) { - ret = -1; - goto out; - } - } + if (!dict) { + if (!(dict = glusterd_op_get_ctx())) { + ret = 0; + goto out; } + } + + if (dict_getn(dict, "help", SLEN("help"))) { + xml_out = _gf_false; + + } else if (dict_getn(dict, "help-xml", SLEN("help-xml"))) { + xml_out = _gf_true; +#if (HAVE_LIB_XML) ret = 0; +#else + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MODULE_NOT_INSTALLED, + "libxml not present in the system"); + if (op_errstr) + *op_errstr = gf_strdup( + "Error: xml libraries not " + "present to produce " + "xml-output"); + goto out; +#endif + + } else { + goto out; + } + + ret = glusterd_get_volopt_content(dict, xml_out); + if (ret && op_errstr) + *op_errstr = gf_strdup("Failed to get volume options help"); out: - gf_log ("", GF_LOG_DEBUG, "returning %d", ret); - return ret; + + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; } int -glusterd_friend_find_by_uuid (uuid_t uuid, - glusterd_peerinfo_t **peerinfo) +glusterd_to_cli(rpcsvc_request_t *req, gf_cli_rsp *arg, struct iovec *payload, + int payloadcount, struct iobref *iobref, xdrproc_t xdrproc, + dict_t *dict) { - int ret = -1; - glusterd_conf_t *priv = NULL; - glusterd_peerinfo_t *entry = NULL; + int ret = -1; + char *cmd = NULL; + int op_ret = 0; + char *op_errstr = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + op_ret = arg->op_ret; + op_errstr = arg->op_errstr; + + ret = dict_get_strn(dict, "cmd-str", SLEN("cmd-str"), &cmd); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get command " + "string"); + + if (cmd) { + if (op_ret) + gf_cmd_log("", "%s : FAILED %s %s", cmd, (op_errstr) ? ":" : " ", + (op_errstr) ? op_errstr : " "); + else + gf_cmd_log("", "%s : SUCCESS", cmd); + } - GF_ASSERT (peerinfo); + glusterd_submit_reply(req, arg, payload, payloadcount, iobref, + (xdrproc_t)xdrproc); - *peerinfo = NULL; - priv = THIS->private; + if (dict) { + dict_unref(dict); + } + return ret; +} - GF_ASSERT (priv); +static int32_t +glusterd_append_gsync_status(dict_t *dst, dict_t *src) +{ + int ret = 0; + char *stop_msg = NULL; + + ret = dict_get_strn(src, "gsync-status", SLEN("gsync-status"), &stop_msg); + if (ret) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=gsync-status", NULL); + ret = 0; + goto out; + } + + ret = dict_set_dynstr_with_alloc(dst, "gsync-status", stop_msg); + if (ret) { + gf_msg("glusterd", GF_LOG_WARNING, 0, GD_MSG_DICT_SET_FAILED, + "Unable to set the stop" + "message in the ctx dictionary"); + goto out; + } + + ret = 0; +out: + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; +} - if (uuid_is_null (uuid)) - return -1; +int32_t +glusterd_append_status_dicts(dict_t *dst, dict_t *src) +{ + char sts_val_name[PATH_MAX] = ""; + int dst_count = 0; + int src_count = 0; + int i = 0; + int ret = 0; + gf_gsync_status_t *sts_val = NULL; + gf_gsync_status_t *dst_sts_val = NULL; + + GF_ASSERT(dst); + + if (src == NULL) + goto out; + + ret = dict_get_int32n(dst, "gsync-count", SLEN("gsync-count"), &dst_count); + if (ret) + dst_count = 0; + + ret = dict_get_int32n(src, "gsync-count", SLEN("gsync-count"), &src_count); + if (ret || !src_count) { + gf_msg_debug("glusterd", 0, "Source brick empty"); + ret = 0; + goto out; + } - list_for_each_entry (entry, &priv->peers, uuid_list) { - if (!uuid_compare (entry->uuid, uuid)) { + for (i = 0; i < src_count; i++) { + snprintf(sts_val_name, sizeof(sts_val_name), "status_value%d", i); - gf_log ("glusterd", GF_LOG_INFO, - "Friend found... state: %s", - glusterd_friend_sm_state_name_get (entry->state.state)); - *peerinfo = entry; - return 0; - } + ret = dict_get_bin(src, sts_val_name, (void **)&sts_val); + if (ret) + goto out; + + dst_sts_val = GF_MALLOC(sizeof(gf_gsync_status_t), + gf_common_mt_gsync_status_t); + if (!dst_sts_val) { + gf_msg("glusterd", GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "Out Of Memory"); + goto out; } - return ret; + memcpy(dst_sts_val, sts_val, sizeof(gf_gsync_status_t)); + + snprintf(sts_val_name, sizeof(sts_val_name), "status_value%d", + i + dst_count); + + ret = dict_set_bin(dst, sts_val_name, dst_sts_val, + sizeof(gf_gsync_status_t)); + if (ret) { + GF_FREE(dst_sts_val); + goto out; + } + } + + ret = dict_set_int32n(dst, "gsync-count", SLEN("gsync-count"), + dst_count + src_count); + +out: + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; } +int32_t +glusterd_aggr_brick_mount_dirs(dict_t *aggr, dict_t *rsp_dict) +{ + char key[64] = ""; + int keylen; + char *brick_mount_dir = NULL; + int32_t brick_count = -1; + int32_t ret = -1; + int32_t i = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(aggr); + GF_ASSERT(rsp_dict); + + ret = dict_get_int32n(rsp_dict, "brick_count", SLEN("brick_count"), + &brick_count); + if (ret) { + gf_msg_debug(this->name, 0, "No brick_count present"); + ret = 0; + goto out; + } -int -glusterd_friend_find_by_hostname (const char *hoststr, - glusterd_peerinfo_t **peerinfo) + for (i = 1; i <= brick_count; i++) { + brick_mount_dir = NULL; + keylen = snprintf(key, sizeof(key), "brick%d.mount_dir", i); + ret = dict_get_strn(rsp_dict, key, keylen, &brick_mount_dir); + if (ret) { + /* Coz the info will come from a different node */ + gf_msg_debug(this->name, 0, "%s not present", key); + continue; + } + + ret = dict_set_dynstr_with_alloc(aggr, key, brick_mount_dir); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set %s", key); + goto out; + } + } + + ret = 0; +out: + gf_msg_trace(this->name, 0, "Returning %d ", ret); + return ret; +} + +int32_t +glusterd_gsync_use_rsp_dict(dict_t *aggr, dict_t *rsp_dict, char *op_errstr) { - int ret = -1; - glusterd_conf_t *priv = NULL; - glusterd_peerinfo_t *entry = NULL; - struct addrinfo *addr = NULL; - struct addrinfo *p = NULL; - char *host = NULL; - struct sockaddr_in6 *s6 = NULL; - struct sockaddr_in *s4 = NULL; - struct in_addr *in_addr = NULL; - char hname[1024] = {0,}; + dict_t *ctx = NULL; + int ret = 0; + char *conf_path = NULL; + + if (aggr) { + ctx = aggr; + + } else { + ctx = glusterd_op_get_ctx(); + if (!ctx) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_OPCTX_GET_FAIL, + "Operation Context is not present"); + GF_ASSERT(0); + } + } - GF_ASSERT (hoststr); - GF_ASSERT (peerinfo); + if (rsp_dict) { + ret = glusterd_append_status_dicts(ctx, rsp_dict); + if (ret) + goto out; - *peerinfo = NULL; - priv = THIS->private; + ret = glusterd_append_gsync_status(ctx, rsp_dict); + if (ret) + goto out; + + ret = dict_get_strn(rsp_dict, "conf_path", SLEN("conf_path"), + &conf_path); + if (!ret && conf_path) { + ret = dict_set_dynstr_with_alloc(ctx, "conf_path", conf_path); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to store conf path."); + goto out; + } + } + } + if ((op_errstr) && (strcmp("", op_errstr))) { + ret = dict_set_dynstr_with_alloc(ctx, "errstr", op_errstr); + if (ret) + goto out; + } - GF_ASSERT (priv); + ret = 0; +out: + gf_msg_debug("glusterd", 0, "Returning %d ", ret); + return ret; +} - list_for_each_entry (entry, &priv->peers, uuid_list) { - if (!strncmp (entry->hostname, hoststr, - 1024)) { +int32_t +glusterd_rb_use_rsp_dict(dict_t *aggr, dict_t *rsp_dict) +{ + int32_t src_port = 0; + int32_t dst_port = 0; + int ret = 0; + dict_t *ctx = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + if (aggr) { + ctx = aggr; + + } else { + ctx = glusterd_op_get_ctx(); + if (!ctx) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_OPCTX_GET_FAIL, + "Operation Context is not present"); + GF_ASSERT(0); + } + } - gf_log ("glusterd", GF_LOG_INFO, - "Friend %s found.. state: %d", hoststr, - entry->state.state); - *peerinfo = entry; - return 0; - } + if (rsp_dict) { + ret = dict_get_int32n(rsp_dict, "src-brick-port", + SLEN("src-brick-port"), &src_port); + if (ret == 0) { + gf_msg_debug("glusterd", 0, "src-brick-port=%d found", src_port); } - ret = getaddrinfo(hoststr, NULL, NULL, &addr); - if (ret != 0) { - gf_log ("", GF_LOG_ERROR, "error in getaddrinfo: %s\n", - gai_strerror(ret)); - goto out; + ret = dict_get_int32n(rsp_dict, "dst-brick-port", + SLEN("dst-brick-port"), &dst_port); + if (ret == 0) { + gf_msg_debug("glusterd", 0, "dst-brick-port=%d found", dst_port); } - for (p = addr; p != NULL; p = p->ai_next) { - switch (p->ai_family) { - case AF_INET: - s4 = (struct sockaddr_in *) p->ai_addr; - in_addr = &s4->sin_addr; - break; - case AF_INET6: - s6 = (struct sockaddr_in6 *) p->ai_addr; - in_addr =(struct in_addr *) &s6->sin6_addr; - break; - default: ret = -1; - goto out; - } - host = inet_ntoa(*in_addr); + ret = glusterd_aggr_brick_mount_dirs(ctx, rsp_dict); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_MOUNDIRS_AGGR_FAIL, + "Failed to " + "aggregate brick mount dirs"); + goto out; + } + } - ret = getnameinfo (p->ai_addr, p->ai_addrlen, hname, - 1024, NULL, 0, 0); - if (ret) - goto out; + if (src_port) { + ret = dict_set_int32n(ctx, "src-brick-port", SLEN("src-brick-port"), + src_port); + if (ret) { + gf_msg_debug("glusterd", 0, "Could not set src-brick"); + goto out; + } + } - list_for_each_entry (entry, &priv->peers, uuid_list) { - if (!strncmp (entry->hostname, host, - 1024) || !strncmp (entry->hostname,hname, - 1024)) { - gf_log ("glusterd", GF_LOG_INFO, - "Friend %s found.. state: %d", - hoststr, entry->state.state); - *peerinfo = entry; - freeaddrinfo (addr); - return 0; - } - } + if (dst_port) { + ret = dict_set_int32n(ctx, "dst-brick-port", SLEN("dst-brick-port"), + dst_port); + if (ret) { + gf_msg_debug("glusterd", 0, "Could not set dst-brick"); + goto out; } + } out: - if (addr) - freeaddrinfo (addr); - return -1; + return ret; +} + +int32_t +glusterd_sync_use_rsp_dict(dict_t *aggr, dict_t *rsp_dict) +{ + int ret = 0; + + GF_ASSERT(rsp_dict); + xlator_t *this = THIS; + GF_ASSERT(this); + + if (!rsp_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); + goto out; + } + + ret = glusterd_import_friend_volumes(rsp_dict); +out: + return ret; +} + +static int +_profile_volume_add_friend_rsp(dict_t *this, char *key, data_t *value, + void *data) +{ + char new_key[264] = ""; + int new_key_len; + glusterd_pr_brick_rsp_conv_t *rsp_ctx = NULL; + data_t *new_value = NULL; + int brick_count = 0; + char brick_key[256] = ""; + + if (strcmp(key, "count") == 0) + return 0; + sscanf(key, "%d%s", &brick_count, brick_key); + rsp_ctx = data; + new_value = data_copy(value); + GF_ASSERT(new_value); + new_key_len = snprintf(new_key, sizeof(new_key), "%d%s", + rsp_ctx->count + brick_count, brick_key); + dict_setn(rsp_ctx->dict, new_key, new_key_len, new_value); + return 0; } int -glusterd_hostname_to_uuid (char *hostname, uuid_t uuid) +glusterd_profile_volume_use_rsp_dict(dict_t *aggr, dict_t *rsp_dict) +{ + int ret = 0; + glusterd_pr_brick_rsp_conv_t rsp_ctx = {0}; + int32_t brick_count = 0; + int32_t count = 0; + dict_t *ctx_dict = NULL; + xlator_t *this = NULL; + + GF_ASSERT(rsp_dict); + this = THIS; + GF_ASSERT(this); + + ret = dict_get_int32n(rsp_dict, "count", SLEN("count"), &brick_count); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=count", NULL); + ret = 0; // no bricks in the rsp + goto out; + } + if (aggr) { + ctx_dict = aggr; + + } else { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OPCTX_GET_FAIL, + "Operation Context is not present"); + ret = -1; + goto out; + } + + ret = dict_get_int32n(ctx_dict, "count", SLEN("count"), &count); + rsp_ctx.count = count; + rsp_ctx.dict = ctx_dict; + dict_foreach(rsp_dict, _profile_volume_add_friend_rsp, &rsp_ctx); + ret = dict_set_int32n(ctx_dict, "count", SLEN("count"), + count + brick_count); +out: + return ret; +} + +static int +glusterd_volume_status_add_peer_rsp(dict_t *this, char *key, data_t *value, + void *data) +{ + glusterd_status_rsp_conv_t *rsp_ctx = NULL; + data_t *new_value = NULL; + char brick_key[1024] = ""; + char new_key[1024] = ""; + int32_t index = 0; + int32_t ret = -1; + int32_t len = 0; + + /* Skip the following keys, they are already present in the ctx_dict */ + /* Also, skip all the task related pairs. They will be added to the + * ctx_dict later + */ + if (!strcmp(key, "count") || !strcmp(key, "cmd") || + !strcmp(key, "brick-index-max") || !strcmp(key, "other-count") || + !strncmp(key, "task", 4)) + return 0; + + rsp_ctx = data; + new_value = data_copy(value); + GF_ASSERT(new_value); + + sscanf(key, "brick%d.%s", &index, brick_key); + + if (index > rsp_ctx->brick_index_max) { + len = snprintf(new_key, sizeof(new_key), "brick%d.%s", + index + rsp_ctx->other_count, brick_key); + } else { + len = snprintf(new_key, sizeof(new_key), "%s", key); + } + if (len < 0 || len >= sizeof(new_key)) + goto out; + + ret = dict_setn(rsp_ctx->dict, new_key, len, new_value); +out: + if (ret) { + data_unref(new_value); + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to set key: %s in dict", key); + } + + return 0; +} + +static int +glusterd_volume_status_copy_tasks_to_ctx_dict(dict_t *this, char *key, + data_t *value, void *data) { - GF_ASSERT (hostname); - GF_ASSERT (uuid); + int ret = 0; + dict_t *ctx_dict = NULL; + data_t *new_value = NULL; + + if (strncmp(key, "task", 4)) + return 0; - glusterd_peerinfo_t *peerinfo = NULL; - glusterd_conf_t *priv = NULL; - int ret = -1; - xlator_t *this = NULL; + ctx_dict = data; + GF_ASSERT(ctx_dict); - this = THIS; - GF_ASSERT (this); - priv = this->private; - GF_ASSERT (priv); + new_value = data_copy(value); + GF_ASSERT(new_value); - ret = glusterd_friend_find_by_hostname (hostname, &peerinfo); + ret = dict_set(ctx_dict, key, new_value); + + return ret; +} + +int +glusterd_volume_status_aggregate_tasks_status(dict_t *ctx_dict, + dict_t *rsp_dict) +{ + int ret = -1; + xlator_t *this = NULL; + int local_count = 0; + int remote_count = 0; + int i = 0; + int j = 0; + char key[128] = ""; + int keylen; + char *task_type = NULL; + int local_status = 0; + int remote_status = 0; + char *local_task_id = NULL; + char *remote_task_id = NULL; + + GF_ASSERT(ctx_dict); + GF_ASSERT(rsp_dict); + + this = THIS; + GF_ASSERT(this); + + ret = dict_get_int32n(rsp_dict, "tasks", SLEN("tasks"), &remote_count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get remote task count"); + goto out; + } + /* Local count will not be present when this is called for the first + * time with the origins rsp_dict + */ + ret = dict_get_int32n(ctx_dict, "tasks", SLEN("tasks"), &local_count); + if (ret) { + ret = dict_foreach( + rsp_dict, glusterd_volume_status_copy_tasks_to_ctx_dict, ctx_dict); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to copy tasks" + "to ctx_dict."); + goto out; + } + + if (local_count != remote_count) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TASKS_COUNT_MISMATCH, + "Local tasks count (%d) and " + "remote tasks count (%d) do not match. Not aggregating " + "tasks status.", + local_count, remote_count); + ret = -1; + goto out; + } + + /* Update the tasks statuses. For every remote tasks, search for the + * local task, and update the local task status based on the remote + * status. + */ + for (i = 0; i < remote_count; i++) { + keylen = snprintf(key, sizeof(key), "task%d.type", i); + ret = dict_get_strn(rsp_dict, key, keylen, &task_type); if (ret) { - ret = glusterd_is_local_addr (hostname); - if (ret) - goto out; - else - uuid_copy (uuid, priv->uuid); - } else { - uuid_copy (uuid, peerinfo->uuid); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get task typpe from rsp dict"); + goto out; + } + + /* Skip replace-brick status as it is going to be the same on + * all peers. rb_status is set by the replace brick commit + * function on all peers based on the replace brick command. + * We return the value of rb_status as the status for a + * replace-brick task in a 'volume status' command. + */ + if (!strcmp(task_type, "Replace brick")) + continue; + + keylen = snprintf(key, sizeof(key), "task%d.status", i); + ret = dict_get_int32n(rsp_dict, key, keylen, &remote_status); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get task status from rsp dict"); + goto out; + } + keylen = snprintf(key, sizeof(key), "task%d.id", i); + ret = dict_get_strn(rsp_dict, key, keylen, &remote_task_id); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get task id from rsp dict"); + goto out; + } + for (j = 0; j < local_count; j++) { + keylen = snprintf(key, sizeof(key), "task%d.id", j); + ret = dict_get_strn(ctx_dict, key, keylen, &local_task_id); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get local task-id"); + goto out; + } + + if (strncmp(remote_task_id, local_task_id, + strlen(remote_task_id))) { + /* Quit if a matching local task is not found */ + if (j == (local_count - 1)) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_TASKS_COUNT_MISMATCH, + "Could not find matching local " + "task for task %s", + remote_task_id); + goto out; + } + continue; + } + + keylen = snprintf(key, sizeof(key), "task%d.status", j); + ret = dict_get_int32n(ctx_dict, key, keylen, &local_status); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get local task status"); + goto out; + } + + /* Rebalance has 5 states, + * NOT_STARTED, STARTED, STOPPED, COMPLETE, FAILED + * The precedence used to determine the aggregate status + * is as below, + * STARTED > FAILED > STOPPED > COMPLETE > NOT_STARTED + */ + /* TODO: Move this to a common place utilities that both + * CLI and glusterd need. + * Till then if the below algorithm is changed, change + * it in cli_xml_output_vol_rebalance_status in + * cli-xml-output.c + */ + ret = 0; + int rank[] = {[GF_DEFRAG_STATUS_STARTED] = 1, + [GF_DEFRAG_STATUS_FAILED] = 2, + [GF_DEFRAG_STATUS_STOPPED] = 3, + [GF_DEFRAG_STATUS_COMPLETE] = 4, + [GF_DEFRAG_STATUS_NOT_STARTED] = 5}; + if (rank[remote_status] <= rank[local_status]) + ret = dict_set_int32n(ctx_dict, key, keylen, remote_status); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_TASK_STATUS_UPDATE_FAIL, + "Failed to " + "update task status"); + goto out; + } + break; } + } out: - gf_log ("", GF_LOG_DEBUG, "returning %d", ret); - return ret; + return ret; } -int -glusterd_brick_stop (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo) +gf_boolean_t +glusterd_status_has_tasks(int cmd) { - int ret = -1; - xlator_t *this = NULL; - glusterd_conf_t *conf = NULL; + if (((cmd & GF_CLI_STATUS_MASK) == GF_CLI_STATUS_NONE) && + (cmd & GF_CLI_STATUS_VOL)) + return _gf_true; + return _gf_false; +} - if ((!brickinfo) || (!volinfo)) +int +glusterd_volume_status_copy_to_op_ctx_dict(dict_t *aggr, dict_t *rsp_dict) +{ + int ret = 0; + glusterd_status_rsp_conv_t rsp_ctx = {0}; + int32_t cmd = GF_CLI_STATUS_NONE; + int32_t node_count = 0; + int32_t other_count = 0; + int32_t brick_index_max = -1; + int32_t hot_brick_count = -1; + int32_t type = -1; + int32_t rsp_node_count = 0; + int32_t rsp_other_count = 0; + int vol_count = -1; + int i = 0; + dict_t *ctx_dict = NULL; + char key[64] = ""; + int keylen; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + + GF_ASSERT(rsp_dict); + xlator_t *this = THIS; + GF_ASSERT(this); + + if (aggr) { + ctx_dict = aggr; + + } else { + ctx_dict = glusterd_op_get_ctx(GD_OP_STATUS_VOLUME); + } + + ret = dict_get_int32n(ctx_dict, "cmd", SLEN("cmd"), &cmd); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "Key=cmd", + NULL); + goto out; + } + + if (cmd & GF_CLI_STATUS_ALL && is_origin_glusterd(ctx_dict)) { + ret = dict_get_int32n(rsp_dict, "vol_count", SLEN("vol_count"), + &vol_count); + if (ret == 0) { + ret = dict_set_int32n(ctx_dict, "vol_count", SLEN("vol_count"), + vol_count); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Key=vol_count", NULL); goto out; + } - this = THIS; - GF_ASSERT (this); - conf = this->private; - GF_ASSERT (conf); + for (i = 0; i < vol_count; i++) { + keylen = snprintf(key, sizeof(key), "vol%d", i); + ret = dict_get_strn(rsp_dict, key, keylen, &volname); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Key=%s", key, NULL); + goto out; + } - if (uuid_is_null (brickinfo->uuid)) { - ret = glusterd_resolve_brick (brickinfo); + ret = dict_set_strn(ctx_dict, key, keylen, volname); if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, - "cannot resolve brick: %s:%s", - brickinfo->hostname, brickinfo->path); - goto out; + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Key=%s", key, NULL); + goto out; } + } + } else { + /* Ignore the error as still the aggregation applies in + * case its a task sub command */ + ret = 0; } - - if (uuid_compare (brickinfo->uuid, conf->uuid)) { - ret = 0; - goto out; + } + + if ((cmd & GF_CLI_STATUS_TASKS) != 0) + goto aggregate_tasks; + + ret = dict_get_int32n(rsp_dict, "count", SLEN("count"), &rsp_node_count); + if (ret) { + gf_smsg(this->name, GF_LOG_INFO, 0, GD_MSG_DICT_GET_FAILED, "Key=count", + NULL); + ret = 0; // no bricks in the rsp + goto out; + } + + ret = dict_get_int32n(rsp_dict, "other-count", SLEN("other-count"), + &rsp_other_count); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Key=other-count", NULL); + goto out; + } + + ret = dict_get_int32n(ctx_dict, "count", SLEN("count"), &node_count); + ret = dict_get_int32n(ctx_dict, "other-count", SLEN("other-count"), + &other_count); + if (!dict_getn(ctx_dict, "brick-index-max", SLEN("brick-index-max"))) { + ret = dict_get_int32n(rsp_dict, "brick-index-max", + SLEN("brick-index-max"), &brick_index_max); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Key=brick-index-max", NULL); + goto out; + } + ret = dict_set_int32n(ctx_dict, "brick-index-max", + SLEN("brick-index-max"), brick_index_max); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Key=brick-index-max", NULL); + goto out; } - gf_log ("", GF_LOG_INFO, "About to stop glusterfs" - " for brick %s:%s", brickinfo->hostname, - brickinfo->path); - ret = glusterd_volume_stop_glusterfs (volinfo, brickinfo); + } else { + ret = dict_get_int32n(ctx_dict, "brick-index-max", + SLEN("brick-index-max"), &brick_index_max); if (ret) { - gf_log ("", GF_LOG_CRITICAL, "Unable to remove" - " brick: %s:%s", brickinfo->hostname, - brickinfo->path); - goto out; + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Key=brick-index-max", NULL); + goto out; } + } + + rsp_ctx.count = node_count; + rsp_ctx.brick_index_max = brick_index_max; + rsp_ctx.other_count = other_count; + rsp_ctx.dict = ctx_dict; + + dict_foreach(rsp_dict, glusterd_volume_status_add_peer_rsp, &rsp_ctx); + + ret = dict_set_int32n(ctx_dict, "count", SLEN("count"), + node_count + rsp_node_count); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Key=count", NULL); + goto out; + } + + ret = dict_set_int32n(ctx_dict, "other-count", SLEN("other-count"), + (other_count + rsp_other_count)); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Key=other-count", NULL); + goto out; + } + + ret = dict_get_strn(ctx_dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Key=volname", NULL); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, + "Volume=%s", volname, NULL); + goto out; + } + + ret = dict_set_int32n(ctx_dict, "hot_brick_count", SLEN("hot_brick_count"), + hot_brick_count); + if (ret) { + gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=hot_brick_count", NULL); + goto out; + } + + ret = dict_set_int32n(ctx_dict, "type", SLEN("type"), type); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=type", NULL); + goto out; + } + +aggregate_tasks: + /* Tasks are only present for a normal status command for a volume or + * for an explicit tasks status command for a volume + */ + if (!(cmd & GF_CLI_STATUS_ALL) && + (((cmd & GF_CLI_STATUS_TASKS) != 0) || glusterd_status_has_tasks(cmd))) + ret = glusterd_volume_status_aggregate_tasks_status(ctx_dict, rsp_dict); out: - gf_log ("", GF_LOG_DEBUG, "returning %d ", ret); - return ret; + return ret; } int -glusterd_is_defrag_on (glusterd_volinfo_t *volinfo) +glusterd_max_opversion_use_rsp_dict(dict_t *dst, dict_t *src) { - return ((volinfo->defrag_status == GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED) || - (volinfo->defrag_status == GF_DEFRAG_STATUS_MIGRATE_DATA_STARTED)); + int ret = -1; + int src_max_opversion = -1; + int max_opversion = -1; + + GF_VALIDATE_OR_GOTO(THIS->name, dst, out); + GF_VALIDATE_OR_GOTO(THIS->name, src, out); + + ret = dict_get_int32n(dst, "max-opversion", SLEN("max-opversion"), + &max_opversion); + if (ret) + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Maximum supported op-version not set in destination " + "dictionary"); + + ret = dict_get_int32n(src, "max-opversion", SLEN("max-opversion"), + &src_max_opversion); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get maximum supported op-version from source"); + goto out; + } + + if (max_opversion == -1 || src_max_opversion < max_opversion) + max_opversion = src_max_opversion; + + ret = dict_set_int32n(dst, "max-opversion", SLEN("max-opversion"), + max_opversion); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set max op-version"); + goto out; + } +out: + return ret; } int -glusterd_is_replace_running (glusterd_volinfo_t *volinfo, glusterd_brickinfo_t *brickinfo) +glusterd_volume_bitrot_scrub_use_rsp_dict(dict_t *aggr, dict_t *rsp_dict) { - int ret = 0; - char *src_hostname = NULL; - char *brick_hostname = NULL; + int ret = -1; + int j = 0; + uint64_t value = 0; + char key[64] = ""; + int keylen; + char *last_scrub_time = NULL; + char *scrub_time = NULL; + char *volname = NULL; + char *node_uuid = NULL; + char *node_uuid_str = NULL; + char *bitd_log = NULL; + char *scrub_log = NULL; + char *scrub_freq = NULL; + char *scrub_state = NULL; + char *scrub_impact = NULL; + char *bad_gfid_str = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *volinfo = NULL; + int src_count = 0; + int dst_count = 0; + int8_t scrub_running = 0; + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + GF_ASSERT(priv); + + ret = dict_get_strn(aggr, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get volume name"); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, + "Unable to find volinfo for volume: %s", volname); + goto out; + } + + ret = dict_get_int32n(aggr, "count", SLEN("count"), &dst_count); + + ret = dict_get_int32n(rsp_dict, "count", SLEN("count"), &src_count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "failed to get count value"); + ret = 0; + goto out; + } + + ret = dict_set_int32n(aggr, "count", SLEN("count"), src_count + dst_count); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set count in dictonary"); + + keylen = snprintf(key, sizeof(key), "node-uuid-%d", src_count); + ret = dict_get_strn(rsp_dict, key, keylen, &node_uuid); + if (!ret) { + node_uuid_str = gf_strdup(node_uuid); + keylen = snprintf(key, sizeof(key), "node-uuid-%d", + src_count + dst_count); + ret = dict_set_dynstrn(aggr, key, keylen, node_uuid_str); + if (ret) { + gf_msg_debug(this->name, 0, "failed to set node-uuid"); + } + } - if (volinfo->src_brick) { - src_hostname = gf_strdup (volinfo->src_brick->hostname); - if (!src_hostname) { - ret = -1; - goto out; + snprintf(key, sizeof(key), "scrub-running-%d", src_count); + ret = dict_get_int8(rsp_dict, key, &scrub_running); + if (!ret) { + snprintf(key, sizeof(key), "scrub-running-%d", src_count + dst_count); + ret = dict_set_int8(aggr, key, scrub_running); + if (ret) { + gf_msg_debug(this->name, 0, + "Failed to set " + "scrub-running value"); + } + } + + snprintf(key, sizeof(key), "scrubbed-files-%d", src_count); + ret = dict_get_uint64(rsp_dict, key, &value); + if (!ret) { + snprintf(key, sizeof(key), "scrubbed-files-%d", src_count + dst_count); + ret = dict_set_uint64(aggr, key, value); + if (ret) { + gf_msg_debug(this->name, 0, + "Failed to set " + "scrubbed-file value"); + } + } + + snprintf(key, sizeof(key), "unsigned-files-%d", src_count); + ret = dict_get_uint64(rsp_dict, key, &value); + if (!ret) { + snprintf(key, sizeof(key), "unsigned-files-%d", src_count + dst_count); + ret = dict_set_uint64(aggr, key, value); + if (ret) { + gf_msg_debug(this->name, 0, + "Failed to set " + "unsigned-file value"); + } + } + + keylen = snprintf(key, sizeof(key), "last-scrub-time-%d", src_count); + ret = dict_get_strn(rsp_dict, key, keylen, &last_scrub_time); + if (!ret) { + scrub_time = gf_strdup(last_scrub_time); + keylen = snprintf(key, sizeof(key), "last-scrub-time-%d", + src_count + dst_count); + ret = dict_set_dynstrn(aggr, key, keylen, scrub_time); + if (ret) { + gf_msg_debug(this->name, 0, + "Failed to set " + "last scrub time value"); + } + } + + snprintf(key, sizeof(key), "scrub-duration-%d", src_count); + ret = dict_get_uint64(rsp_dict, key, &value); + if (!ret) { + snprintf(key, sizeof(key), "scrub-duration-%d", src_count + dst_count); + ret = dict_set_uint64(aggr, key, value); + if (ret) { + gf_msg_debug(this->name, 0, + "Failed to set " + "scrubbed-duration value"); + } + } + + snprintf(key, sizeof(key), "error-count-%d", src_count); + ret = dict_get_uint64(rsp_dict, key, &value); + if (!ret) { + snprintf(key, sizeof(key), "error-count-%d", src_count + dst_count); + ret = dict_set_uint64(aggr, key, value); + if (ret) { + gf_msg_debug(this->name, 0, + "Failed to set error " + "count value"); + } + + /* Storing all the bad files in the dictionary */ + for (j = 0; j < value; j++) { + keylen = snprintf(key, sizeof(key), "quarantine-%d-%d", j, + src_count); + ret = dict_get_strn(rsp_dict, key, keylen, &bad_gfid_str); + if (!ret) { + snprintf(key, sizeof(key), "quarantine-%d-%d", j, + src_count + dst_count); + ret = dict_set_dynstr_with_alloc(aggr, key, bad_gfid_str); + if (ret) { + gf_msg_debug(this->name, 0, + "Failed to" + "bad file gfid "); } - } else { - gf_log ("glusterd", GF_LOG_DEBUG, - "replace brick is not running"); - goto out; + } } + } - brick_hostname = gf_strdup (brickinfo->hostname); - if (!brick_hostname) { - ret = -1; - goto out; + ret = dict_get_strn(rsp_dict, "bitrot_log_file", SLEN("bitrot_log_file"), + &bitd_log); + if (!ret) { + ret = dict_set_dynstr_with_alloc(aggr, "bitrot_log_file", bitd_log); + if (ret) { + gf_msg_debug(this->name, 0, + "Failed to set " + "bitrot log file location"); + goto out; } - if (!glusterd_is_local_addr (src_hostname) && !glusterd_is_local_addr (brick_hostname)) { - if (glusterd_is_rb_started (volinfo) || glusterd_is_rb_paused (volinfo)) - ret = -1; + } + + ret = dict_get_strn(rsp_dict, "scrub_log_file", SLEN("scrub_log_file"), + &scrub_log); + if (!ret) { + ret = dict_set_dynstr_with_alloc(aggr, "scrub_log_file", scrub_log); + if (ret) { + gf_msg_debug(this->name, 0, + "Failed to set " + "scrubber log file location"); + goto out; + } + } + + ret = dict_get_strn(rsp_dict, "features.scrub-freq", + SLEN("features.scrub-freq"), &scrub_freq); + if (!ret) { + ret = dict_set_dynstr_with_alloc(aggr, "features.scrub-freq", + scrub_freq); + if (ret) { + gf_msg_debug(this->name, 0, + "Failed to set " + "scrub-frequency value to dictionary"); + goto out; + } + } + + ret = dict_get_strn(rsp_dict, "features.scrub-throttle", + SLEN("features.scrub-throttle"), &scrub_impact); + if (!ret) { + ret = dict_set_dynstr_with_alloc(aggr, "features.scrub-throttle", + scrub_impact); + if (ret) { + gf_msg_debug(this->name, 0, + "Failed to set " + "scrub-throttle value to dictionary"); + goto out; + } + } + + ret = dict_get_strn(rsp_dict, "features.scrub", SLEN("features.scrub"), + &scrub_state); + if (!ret) { + ret = dict_set_dynstr_with_alloc(aggr, "features.scrub", scrub_state); + if (ret) { + gf_msg_debug(this->name, 0, + "Failed to set " + "scrub state value to dictionary"); + goto out; } + } + ret = 0; out: - if (src_hostname) - GF_FREE (src_hostname); - if (brick_hostname) - GF_FREE (brick_hostname); - return ret; + return ret; } int -glusterd_new_brick_validate (char *brick, glusterd_brickinfo_t *brickinfo, - char *op_errstr, size_t len) +glusterd_bitrot_volume_node_rsp(dict_t *aggr, dict_t *rsp_dict) { - glusterd_brickinfo_t *newbrickinfo = NULL; - glusterd_brickinfo_t *tmpbrkinfo = NULL; - int ret = -1; - gf_boolean_t is_allocated = _gf_false; - glusterd_peerinfo_t *peerinfo = NULL; - glusterd_conf_t *priv = NULL; - xlator_t *this = NULL; + int ret = -1; + uint64_t value = 0; + char key[64] = ""; + int keylen; + char buf[1024] = ""; + int32_t i = 0; + int32_t j = 0; + char *last_scrub_time = NULL; + char *scrub_time = NULL; + char *volname = NULL; + char *scrub_freq = NULL; + char *scrub_state = NULL; + char *scrub_impact = NULL; + char *bad_gfid_str = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *volinfo = NULL; + int8_t scrub_running = 0; + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + GF_ASSERT(priv); + + ret = dict_set_strn(aggr, "bitrot_log_file", SLEN("bitrot_log_file"), + priv->bitd_svc.proc.logfile); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set bitrot log file location"); + goto out; + } + + ret = dict_set_strn(aggr, "scrub_log_file", SLEN("scrub_log_file"), + priv->scrub_svc.proc.logfile); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set scrubber log file location"); + goto out; + } + + ret = dict_get_strn(aggr, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get volume name"); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, + "Unable to find volinfo for volume: %s", volname); + goto out; + } + + ret = dict_get_int32n(aggr, "count", SLEN("count"), &i); + i++; + + ret = dict_set_int32n(aggr, "count", SLEN("count"), i); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set count"); + + snprintf(buf, sizeof(buf), "%s", uuid_utoa(MY_UUID)); + + snprintf(key, sizeof(key), "node-uuid-%d", i); + ret = dict_set_dynstr_with_alloc(aggr, key, buf); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "failed to set node-uuid"); + + ret = dict_get_strn(volinfo->dict, "features.scrub-freq", + SLEN("features.scrub-freq"), &scrub_freq); + if (!ret) { + ret = dict_set_strn(aggr, "features.scrub-freq", + SLEN("features.scrub-freq"), scrub_freq); + if (ret) { + gf_msg_debug(this->name, 0, + "Failed to set " + "scrub-frequency value to dictionary"); + } + } else { + /* By Default scrub-frequency is bi-weekly. So when user + * enable bitrot then scrub-frequency value will not be + * present in volinfo->dict. Setting by-default value of + * scrub-frequency explicitly for presenting it to scrub + * status. + */ + ret = dict_set_dynstr_with_alloc(aggr, "features.scrub-freq", + "biweekly"); + if (ret) { + gf_msg_debug(this->name, 0, + "Failed to set " + "scrub-frequency value to dictionary"); + } + } - this = THIS; - GF_ASSERT (this); - priv = this->private; - GF_ASSERT (priv); + ret = dict_get_strn(volinfo->dict, "features.scrub-throttle", + SLEN("features.scrub-throttle"), &scrub_impact); + if (!ret) { + ret = dict_set_strn(aggr, "features.scrub-throttle", + SLEN("features.scrub-throttle"), scrub_impact); + if (ret) { + gf_msg_debug(this->name, 0, + "Failed to set " + "scrub-throttle value to dictionary"); + } + } else { + /* By Default scrub-throttle is lazy. So when user + * enable bitrot then scrub-throttle value will not be + * present in volinfo->dict. Setting by-default value of + * scrub-throttle explicitly for presenting it to + * scrub status. + */ + ret = dict_set_dynstr_with_alloc(aggr, "features.scrub-throttle", + "lazy"); + if (ret) { + gf_msg_debug(this->name, 0, + "Failed to set " + "scrub-throttle value to dictionary"); + } + } + ret = dict_get_strn(volinfo->dict, "features.scrub", SLEN("features.scrub"), + &scrub_state); + if (!ret) { + ret = dict_set_strn(aggr, "features.scrub", SLEN("features.scrub"), + scrub_state); + if (ret) { + gf_msg_debug(this->name, 0, + "Failed to set " + "scrub state value to dictionary"); + } + } - GF_ASSERT (brick); - GF_ASSERT (op_errstr); + ret = dict_get_int8(rsp_dict, "scrub-running", &scrub_running); + if (!ret) { + snprintf(key, sizeof(key), "scrub-running-%d", i); + ret = dict_set_uint64(aggr, key, scrub_running); + if (ret) { + gf_msg_debug(this->name, 0, + "Failed to set " + "scrub-running value"); + } + } - if (!brickinfo) { - ret = glusterd_brickinfo_from_brick (brick, &newbrickinfo); - if (ret) - goto out; - is_allocated = _gf_true; - } else { - newbrickinfo = brickinfo; + ret = dict_get_uint64(rsp_dict, "scrubbed-files", &value); + if (!ret) { + snprintf(key, sizeof(key), "scrubbed-files-%d", i); + ret = dict_set_uint64(aggr, key, value); + if (ret) { + gf_msg_debug(this->name, 0, + "Failed to set " + "scrubbed-file value"); } + } - ret = glusterd_resolve_brick (newbrickinfo); + ret = dict_get_uint64(rsp_dict, "unsigned-files", &value); + if (!ret) { + snprintf(key, sizeof(key), "unsigned-files-%d", i); + ret = dict_set_uint64(aggr, key, value); if (ret) { - snprintf (op_errstr, len, "Host %s not a friend", - newbrickinfo->hostname); - gf_log ("glusterd", GF_LOG_ERROR, "%s", op_errstr); - goto out; + gf_msg_debug(this->name, 0, + "Failed to set " + "unsigned-file value"); } + } - if (!uuid_compare (priv->uuid, newbrickinfo->uuid)) - goto brick_validation; - ret = glusterd_friend_find_by_uuid (newbrickinfo->uuid, &peerinfo); - if (ret) - goto out; - if ((!peerinfo->connected) || - (peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED)) { - snprintf(op_errstr, len, "Host %s not connected", - newbrickinfo->hostname); - gf_log ("glusterd", GF_LOG_ERROR, "%s", op_errstr); - ret = -1; - goto out; + ret = dict_get_strn(rsp_dict, "last-scrub-time", SLEN("last-scrub-time"), + &last_scrub_time); + if (!ret) { + keylen = snprintf(key, sizeof(key), "last-scrub-time-%d", i); + + scrub_time = gf_strdup(last_scrub_time); + ret = dict_set_dynstrn(aggr, key, keylen, scrub_time); + if (ret) { + gf_msg_debug(this->name, 0, + "Failed to set " + "last scrub time value"); } -brick_validation: - ret = glusterd_brickinfo_get (newbrickinfo->uuid, - newbrickinfo->hostname, - newbrickinfo->path, &tmpbrkinfo); - if (!ret) { - snprintf(op_errstr, len, "Brick: %s already in use", - brick); - gf_log ("", GF_LOG_ERROR, "%s", op_errstr); - ret = -1; - goto out; - } else { - ret = 0; + } + + ret = dict_get_uint64(rsp_dict, "scrub-duration", &value); + if (!ret) { + snprintf(key, sizeof(key), "scrub-duration-%d", i); + ret = dict_set_uint64(aggr, key, value); + if (ret) { + gf_msg_debug(this->name, 0, + "Failed to set " + "scrubbed-duration value"); + } + } + + ret = dict_get_uint64(rsp_dict, "total-count", &value); + if (!ret) { + snprintf(key, sizeof(key), "error-count-%d", i); + ret = dict_set_uint64(aggr, key, value); + if (ret) { + gf_msg_debug(this->name, 0, + "Failed to set error " + "count value"); } + + /* Storing all the bad files in the dictionary */ + for (j = 0; j < value; j++) { + keylen = snprintf(key, sizeof(key), "quarantine-%d", j); + ret = dict_get_strn(rsp_dict, key, keylen, &bad_gfid_str); + if (!ret) { + snprintf(key, sizeof(key), "quarantine-%d-%d", j, i); + ret = dict_set_dynstr_with_alloc(aggr, key, bad_gfid_str); + if (ret) { + gf_msg_debug(this->name, 0, + "Failed to" + "bad file gfid "); + } + } + } + } + + ret = 0; out: - if (is_allocated && newbrickinfo) - glusterd_brickinfo_delete (newbrickinfo); - gf_log ("", GF_LOG_DEBUG, "returning %d ", ret); - return ret; + return ret; } int -glusterd_is_rb_started(glusterd_volinfo_t *volinfo) +glusterd_volume_rebalance_use_rsp_dict(dict_t *aggr, dict_t *rsp_dict) { - gf_log ("", GF_LOG_DEBUG, - "is_rb_started:status=%d", volinfo->rb_status); - return (volinfo->rb_status == GF_RB_STATUS_STARTED); + char key[64] = ""; + int keylen; + char *node_uuid = NULL; + char *node_uuid_str = NULL; + char *volname = NULL; + dict_t *ctx_dict = NULL; + double elapsed_time = 0; + glusterd_conf_t *conf = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_volinfo_t *volinfo = NULL; + int ret = 0; + int32_t index = 0; + int32_t count = 0; + int32_t current_index = 1; + int32_t value32 = 0; + uint64_t value = 0; + char *peer_uuid_str = NULL; + xlator_t *this = NULL; + + GF_ASSERT(rsp_dict); + this = THIS; + GF_ASSERT(this); + conf = this->private; + + if (conf->op_version < GD_OP_VERSION_6_0) + current_index = 2; + if (aggr) { + ctx_dict = aggr; + + } else { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OPCTX_GET_FAIL, + "Operation Context is not present"); + goto out; + } + + ret = dict_get_strn(ctx_dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get volume name"); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + + if (ret) + goto out; + + ret = dict_get_int32n(rsp_dict, "count", SLEN("count"), &index); + if (ret) + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "failed to get index from rsp dict"); + + keylen = snprintf(key, sizeof(key), "node-uuid-%d", index); + ret = dict_get_strn(rsp_dict, key, keylen, &node_uuid); + if (!ret) { + node_uuid_str = gf_strdup(node_uuid); + + /* Finding the index of the node-uuid in the peer-list */ + RCU_READ_LOCK; + cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list) + { + peer_uuid_str = gd_peer_uuid_str(peerinfo); + if (strcmp(peer_uuid_str, node_uuid_str) == 0) + break; + + current_index++; + } + RCU_READ_UNLOCK; + + /* Setting the largest index value as the total count. */ + ret = dict_get_int32n(ctx_dict, "count", SLEN("count"), &count); + if (count < current_index) { + ret = dict_set_int32n(ctx_dict, "count", SLEN("count"), + current_index); + if (ret) + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set count"); + } + /* Setting the same index for the node, as is in the peerlist.*/ + keylen = snprintf(key, sizeof(key), "node-uuid-%d", current_index); + ret = dict_set_dynstrn(ctx_dict, key, keylen, node_uuid_str); + if (ret) { + gf_msg_debug(THIS->name, 0, "failed to set node-uuid"); + } + } + + snprintf(key, sizeof(key), "files-%d", index); + ret = dict_get_uint64(rsp_dict, key, &value); + if (!ret) { + snprintf(key, sizeof(key), "files-%d", current_index); + ret = dict_set_uint64(ctx_dict, key, value); + if (ret) { + gf_msg_debug(THIS->name, 0, "failed to set the file count"); + } + } + + snprintf(key, sizeof(key), "size-%d", index); + ret = dict_get_uint64(rsp_dict, key, &value); + if (!ret) { + snprintf(key, sizeof(key), "size-%d", current_index); + ret = dict_set_uint64(ctx_dict, key, value); + if (ret) { + gf_msg_debug(THIS->name, 0, "failed to set the size of migration"); + } + } + + snprintf(key, sizeof(key), "lookups-%d", index); + ret = dict_get_uint64(rsp_dict, key, &value); + if (!ret) { + snprintf(key, sizeof(key), "lookups-%d", current_index); + ret = dict_set_uint64(ctx_dict, key, value); + if (ret) { + gf_msg_debug(THIS->name, 0, "failed to set looked up file count"); + } + } + + keylen = snprintf(key, sizeof(key), "status-%d", index); + ret = dict_get_int32n(rsp_dict, key, keylen, &value32); + if (!ret) { + keylen = snprintf(key, sizeof(key), "status-%d", current_index); + ret = dict_set_int32n(ctx_dict, key, keylen, value32); + if (ret) { + gf_msg_debug(THIS->name, 0, "failed to set status"); + } + } + + snprintf(key, sizeof(key), "failures-%d", index); + ret = dict_get_uint64(rsp_dict, key, &value); + if (!ret) { + snprintf(key, sizeof(key), "failures-%d", current_index); + ret = dict_set_uint64(ctx_dict, key, value); + if (ret) { + gf_msg_debug(THIS->name, 0, "failed to set failure count"); + } + } + + snprintf(key, sizeof(key), "skipped-%d", index); + ret = dict_get_uint64(rsp_dict, key, &value); + if (!ret) { + snprintf(key, sizeof(key), "skipped-%d", current_index); + ret = dict_set_uint64(ctx_dict, key, value); + if (ret) { + gf_msg_debug(THIS->name, 0, "failed to set skipped count"); + } + } + snprintf(key, sizeof(key), "run-time-%d", index); + ret = dict_get_double(rsp_dict, key, &elapsed_time); + if (!ret) { + snprintf(key, sizeof(key), "run-time-%d", current_index); + ret = dict_set_double(ctx_dict, key, elapsed_time); + if (ret) { + gf_msg_debug(THIS->name, 0, "failed to set run-time"); + } + } + + snprintf(key, sizeof(key), "time-left-%d", index); + ret = dict_get_uint64(rsp_dict, key, &value); + if (!ret) { + snprintf(key, sizeof(key), "time-left-%d", current_index); + ret = dict_set_uint64(ctx_dict, key, value); + if (ret) { + gf_msg_debug(THIS->name, 0, "failed to set time-left"); + } + } + snprintf(key, sizeof(key), "demoted-%d", index); + ret = dict_get_uint64(rsp_dict, key, &value); + if (!ret) { + snprintf(key, sizeof(key), "demoted-%d", current_index); + ret = dict_set_uint64(ctx_dict, key, value); + if (ret) { + gf_msg_debug(THIS->name, 0, "failed to set demoted count"); + } + } + snprintf(key, sizeof(key), "promoted-%d", index); + ret = dict_get_uint64(rsp_dict, key, &value); + if (!ret) { + snprintf(key, sizeof(key), "promoted-%d", current_index); + ret = dict_set_uint64(ctx_dict, key, value); + if (ret) { + gf_msg_debug(THIS->name, 0, "failed to set promoted count"); + } + } + + ret = 0; + +out: + return ret; +} + +int +glusterd_sys_exec_output_rsp_dict(dict_t *dst, dict_t *src) +{ + char output_name[64] = ""; + char *output = NULL; + int ret = 0; + int i = 0; + int keylen; + int src_output_count = 0; + int dst_output_count = 0; + + if (!dst || !src) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_EMPTY, + "Source or Destination " + "dict is empty."); + goto out; + } + + ret = dict_get_int32n(dst, "output_count", SLEN("output_count"), + &dst_output_count); + + ret = dict_get_int32n(src, "output_count", SLEN("output_count"), + &src_output_count); + if (ret) { + gf_msg_debug("glusterd", 0, "No output from source"); + ret = 0; + goto out; + } + + for (i = 1; i <= src_output_count; i++) { + keylen = snprintf(output_name, sizeof(output_name), "output_%d", i); + if (keylen <= 0 || keylen >= sizeof(output_name)) { + ret = -1; + goto out; + } + ret = dict_get_strn(src, output_name, keylen, &output); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to fetch %s", output_name); + goto out; + } + + keylen = snprintf(output_name, sizeof(output_name), "output_%d", + i + dst_output_count); + if (keylen <= 0 || keylen >= sizeof(output_name)) { + ret = -1; + goto out; + } + + ret = dict_set_dynstrn(dst, output_name, keylen, gf_strdup(output)); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Unable to set %s", output_name); + goto out; + } + } + + ret = dict_set_int32n(dst, "output_count", SLEN("output_count"), + dst_output_count + src_output_count); +out: + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; } int -glusterd_is_rb_paused ( glusterd_volinfo_t *volinfo) +glusterd_use_rsp_dict(dict_t *aggr, dict_t *rsp_dict) { - gf_log ("", GF_LOG_DEBUG, - "is_rb_paused:status=%d", volinfo->rb_status); + int ret = 0; + + GF_ASSERT(aggr); + GF_ASSERT(rsp_dict); - return (volinfo->rb_status == GF_RB_STATUS_PAUSED); + if (!aggr) + goto out; + dict_copy(rsp_dict, aggr); +out: + return ret; } -inline int -glusterd_set_rb_status (glusterd_volinfo_t *volinfo, gf_rb_status_t status) +int +glusterd_volume_heal_use_rsp_dict(dict_t *aggr, dict_t *rsp_dict) { - gf_log ("", GF_LOG_DEBUG, - "setting status from %d to %d", - volinfo->rb_status, - status); + int ret = 0; + dict_t *ctx_dict = NULL; + uuid_t *txn_id = NULL; + glusterd_op_info_t txn_op_info = { + {0}, + }; + glusterd_op_t op = GD_OP_NONE; + + GF_ASSERT(rsp_dict); + + ret = dict_get_bin(aggr, "transaction_id", (void **)&txn_id); + if (ret) + goto out; + gf_msg_debug(THIS->name, 0, "transaction ID = %s", uuid_utoa(*txn_id)); + + ret = glusterd_get_txn_opinfo(txn_id, &txn_op_info); + if (ret) { + gf_msg_callingfn(THIS->name, GF_LOG_ERROR, 0, + GD_MSG_TRANS_OPINFO_GET_FAIL, + "Unable to get transaction opinfo " + "for transaction ID : %s", + uuid_utoa(*txn_id)); + goto out; + } + + op = txn_op_info.op; + GF_ASSERT(GD_OP_HEAL_VOLUME == op); + + if (aggr) { + ctx_dict = aggr; + + } else { + ctx_dict = txn_op_info.op_ctx; + } + + if (!ctx_dict) + goto out; + dict_copy(rsp_dict, ctx_dict); +out: + return ret; +} - volinfo->rb_status = status; - return 0; +int +_profile_volume_add_brick_rsp(dict_t *this, char *key, data_t *value, + void *data) +{ + char new_key[256] = ""; + int keylen; + glusterd_pr_brick_rsp_conv_t *rsp_ctx = NULL; + data_t *new_value = NULL; + + rsp_ctx = data; + new_value = data_copy(value); + GF_ASSERT(new_value); + keylen = snprintf(new_key, sizeof(new_key), "%d-%s", rsp_ctx->count, key); + dict_setn(rsp_ctx->dict, new_key, keylen, new_value); + return 0; } -inline int -glusterd_rb_check_bricks (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *src, glusterd_brickinfo_t *dst) +int +glusterd_volume_quota_copy_to_op_ctx_dict(dict_t *dict, dict_t *rsp_dict) { - if (!volinfo->src_brick || !volinfo->dst_brick) - return -1; + int ret = -1; + int i = 0; + int count = 0; + int rsp_dict_count = 0; + char *uuid_str = NULL; + char *uuid_str_dup = NULL; + char key[64] = ""; + int keylen; + xlator_t *this = NULL; + int type = GF_QUOTA_OPTION_TYPE_NONE; + + this = THIS; + GF_ASSERT(this); + + ret = dict_get_int32n(dict, "type", SLEN("type"), &type); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get quota opcode"); + goto out; + } + + if ((type != GF_QUOTA_OPTION_TYPE_LIMIT_USAGE) && + (type != GF_QUOTA_OPTION_TYPE_LIMIT_OBJECTS) && + (type != GF_QUOTA_OPTION_TYPE_REMOVE) && + (type != GF_QUOTA_OPTION_TYPE_REMOVE_OBJECTS)) { + dict_copy(rsp_dict, dict); + ret = 0; + goto out; + } + + ret = dict_get_int32n(rsp_dict, "count", SLEN("count"), &rsp_dict_count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get the count of " + "gfids from the rsp dict"); + goto out; + } + + ret = dict_get_int32n(dict, "count", SLEN("count"), &count); + if (ret) + /* The key "count" is absent in op_ctx when this function is + * called after self-staging on the originator. This must not + * be treated as error. + */ + gf_msg_debug(this->name, 0, + "Failed to get count of gfids" + " from req dict. This could be because count is not yet" + " copied from rsp_dict into op_ctx"); + + for (i = 0; i < rsp_dict_count; i++) { + keylen = snprintf(key, sizeof(key), "gfid%d", i); + ret = dict_get_strn(rsp_dict, key, keylen, &uuid_str); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get gfid " + "from rsp dict"); + goto out; + } - if (strcmp (volinfo->src_brick->hostname, src->hostname) || - strcmp (volinfo->src_brick->path, src->path)) { - gf_log("", GF_LOG_ERROR, "Replace brick src bricks differ"); - return -1; + uuid_str_dup = gf_strdup(uuid_str); + if (!uuid_str_dup) { + ret = -1; + goto out; } - if (strcmp (volinfo->dst_brick->hostname, dst->hostname) || - strcmp (volinfo->dst_brick->path, dst->path)) { - gf_log ("", GF_LOG_ERROR, "Replace brick dst bricks differ"); - return -1; + + keylen = snprintf(key, sizeof(key), "gfid%d", i + count); + ret = dict_set_dynstrn(dict, key, keylen, uuid_str_dup); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set gfid " + "from rsp dict into req dict"); + GF_FREE(uuid_str_dup); + goto out; } - return 0; + } + + ret = dict_set_int32n(dict, "count", SLEN("count"), rsp_dict_count + count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set aggregated " + "count in req dict"); + goto out; + } + +out: + return ret; } int -glusterd_brick_create_path (char *host, char *path, mode_t mode, - char **op_errstr) +glusterd_profile_volume_brick_rsp(void *pending_entry, dict_t *rsp_dict, + dict_t *op_ctx, char **op_errstr, + gd_node_type type) { - int ret = -1; - char msg[2048] = {0}; - struct stat st_buf = {0}; + int ret = 0; + glusterd_pr_brick_rsp_conv_t rsp_ctx = {0}; + int32_t count = 0; + char brick[PATH_MAX + 1024] = ""; + char key[64] = ""; + int keylen; + char *full_brick = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + + GF_ASSERT(rsp_dict); + GF_ASSERT(op_ctx); + GF_ASSERT(op_errstr); + GF_ASSERT(pending_entry); + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + ret = dict_get_int32n(op_ctx, "count", SLEN("count"), &count); + if (ret) { + count = 1; + } else { + count++; + } + if (type == GD_NODE_BRICK) { + brickinfo = pending_entry; + snprintf(brick, sizeof(brick), "%s:%s", brickinfo->hostname, + brickinfo->path); + } else if (type == GD_NODE_NFS) { + snprintf(brick, sizeof(brick), "%s", uuid_utoa(MY_UUID)); + } + full_brick = gf_strdup(brick); + GF_ASSERT(full_brick); + keylen = snprintf(key, sizeof(key), "%d-brick", count); + ret = dict_set_dynstrn(op_ctx, key, keylen, full_brick); + + rsp_ctx.count = count; + rsp_ctx.dict = op_ctx; + dict_foreach(rsp_dict, _profile_volume_add_brick_rsp, &rsp_ctx); + ret = dict_set_int32n(op_ctx, "count", SLEN("count"), count); + return ret; +} - ret = stat (path, &st_buf); - if ((!ret) && (!S_ISDIR (st_buf.st_mode))) { - snprintf (msg, sizeof (msg), "brick %s:%s, " - "path %s is not a directory", host, path, path); - gf_log ("", GF_LOG_ERROR, "%s", msg); - *op_errstr = gf_strdup (msg); - ret = -1; - goto out; - } else if (!ret) { - goto out; +// input-key: <replica-id>:<child-id>-* +// output-key: <brick-id>-* +int +_heal_volume_add_shd_rsp(dict_t *this, char *key, data_t *value, void *data) +{ + char new_key[256] = ""; + char int_str[16] = ""; + data_t *new_value = NULL; + char *rxl_end = NULL; + int rxl_end_len; + char *rxl_child_end = NULL; + glusterd_volinfo_t *volinfo = NULL; + int rxl_id = 0; + int rxl_child_id = 0; + int brick_id = 0; + int int_len = 0; + int ret = 0; + glusterd_heal_rsp_conv_t *rsp_ctx = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + + rsp_ctx = data; + rxl_end = strchr(key, '-'); + if (!rxl_end) + goto out; + + rxl_child_end = strchr(rxl_end + 1, '-'); + if (!rxl_child_end) + goto out; + + rxl_end_len = strlen(rxl_end); + int_len = strlen(key) - rxl_end_len; + (void)memcpy(int_str, key, int_len); + int_str[int_len] = '\0'; + + ret = gf_string2int(int_str, &rxl_id); + if (ret) + goto out; + + int_len = rxl_end_len - strlen(rxl_child_end) - 1; + (void)memcpy(int_str, rxl_end + 1, int_len); + int_str[int_len] = '\0'; + + ret = gf_string2int(int_str, &rxl_child_id); + if (ret) + goto out; + + volinfo = rsp_ctx->volinfo; + brick_id = rxl_id * volinfo->replica_count + rxl_child_id; + + if (!strcmp(rxl_child_end, "-status")) { + brickinfo = glusterd_get_brickinfo_by_position(volinfo, brick_id); + if (!brickinfo) + goto out; + if (!glusterd_is_local_brick(rsp_ctx->this, volinfo, brickinfo)) + goto out; + } + new_value = data_copy(value); + int_len = snprintf(new_key, sizeof(new_key), "%d%s", brick_id, + rxl_child_end); + dict_setn(rsp_ctx->dict, new_key, int_len, new_value); + +out: + return 0; +} + +int +_heal_volume_add_shd_rsp_of_statistics(dict_t *this, char *key, data_t *value, + void *data) +{ + char new_key[256] = ""; + char int_str[16] = ""; + char key_begin_string[128] = ""; + data_t *new_value = NULL; + char *rxl_end = NULL; + int rxl_end_len; + char *rxl_child_end = NULL; + glusterd_volinfo_t *volinfo = NULL; + char *key_begin_str = NULL; + int key_begin_strlen; + int rxl_id = 0; + int rxl_child_id = 0; + int brick_id = 0; + int int_len = 0; + int ret = 0; + glusterd_heal_rsp_conv_t *rsp_ctx = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + + rsp_ctx = data; + key_begin_str = strchr(key, '-'); + if (!key_begin_str) + goto out; + + rxl_end = strchr(key_begin_str + 1, '-'); + if (!rxl_end) + goto out; + + rxl_child_end = strchr(rxl_end + 1, '-'); + if (!rxl_child_end) + goto out; + + key_begin_strlen = strlen(key_begin_str); + int_len = strlen(key) - key_begin_strlen; + + (void)memcpy(key_begin_string, key, int_len); + key_begin_string[int_len] = '\0'; + + rxl_end_len = strlen(rxl_end); + int_len = key_begin_strlen - rxl_end_len - 1; + (void)memcpy(int_str, key_begin_str + 1, int_len); + int_str[int_len] = '\0'; + ret = gf_string2int(int_str, &rxl_id); + if (ret) + goto out; + + int_len = rxl_end_len - strlen(rxl_child_end) - 1; + (void)memcpy(int_str, rxl_end + 1, int_len); + int_str[int_len] = '\0'; + ret = gf_string2int(int_str, &rxl_child_id); + if (ret) + goto out; + + volinfo = rsp_ctx->volinfo; + brick_id = rxl_id * volinfo->replica_count + rxl_child_id; + + brickinfo = glusterd_get_brickinfo_by_position(volinfo, brick_id); + if (!brickinfo) + goto out; + if (!glusterd_is_local_brick(rsp_ctx->this, volinfo, brickinfo)) + goto out; + + new_value = data_copy(value); + int_len = snprintf(new_key, sizeof(new_key), "%s-%d%s", key_begin_string, + brick_id, rxl_child_end); + dict_setn(rsp_ctx->dict, new_key, int_len, new_value); + +out: + return 0; +} + +int +glusterd_heal_volume_brick_rsp(dict_t *req_dict, dict_t *rsp_dict, + dict_t *op_ctx, char **op_errstr) +{ + int ret = 0; + glusterd_heal_rsp_conv_t rsp_ctx = {0}; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + int heal_op = -1; + + GF_ASSERT(rsp_dict); + GF_ASSERT(op_ctx); + GF_ASSERT(op_errstr); + + ret = dict_get_strn(req_dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get volume name"); + goto out; + } + + ret = dict_get_int32n(req_dict, "heal-op", SLEN("heal-op"), &heal_op); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get heal_op"); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + + if (ret) + goto out; + + rsp_ctx.dict = op_ctx; + rsp_ctx.volinfo = volinfo; + rsp_ctx.this = THIS; + if (heal_op == GF_SHD_OP_STATISTICS) + dict_foreach(rsp_dict, _heal_volume_add_shd_rsp_of_statistics, + &rsp_ctx); + else + dict_foreach(rsp_dict, _heal_volume_add_shd_rsp, &rsp_ctx); + +out: + return ret; +} + +int +_status_volume_add_brick_rsp(dict_t *this, char *key, data_t *value, void *data) +{ + char new_key[256] = ""; + int keylen; + data_t *new_value = 0; + glusterd_pr_brick_rsp_conv_t *rsp_ctx = NULL; + + rsp_ctx = data; + new_value = data_copy(value); + keylen = snprintf(new_key, sizeof(new_key), "brick%d.%s", rsp_ctx->count, + key); + dict_setn(rsp_ctx->dict, new_key, keylen, new_value); + + return 0; +} + +int +glusterd_status_volume_brick_rsp(dict_t *rsp_dict, dict_t *op_ctx, + char **op_errstr) +{ + int ret = 0; + glusterd_pr_brick_rsp_conv_t rsp_ctx = {0}; + int32_t count = 0; + int index = 0; + + GF_ASSERT(rsp_dict); + GF_ASSERT(op_ctx); + GF_ASSERT(op_errstr); + + ret = dict_get_int32n(op_ctx, "count", SLEN("count"), &count); + if (ret) { + count = 0; + } else { + count++; + } + ret = dict_get_int32n(rsp_dict, "index", SLEN("index"), &index); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Couldn't get node index"); + goto out; + } + dict_deln(rsp_dict, "index", SLEN("index")); + + rsp_ctx.count = index; + rsp_ctx.dict = op_ctx; + dict_foreach(rsp_dict, _status_volume_add_brick_rsp, &rsp_ctx); + ret = dict_set_int32n(op_ctx, "count", SLEN("count"), count); + +out: + return ret; +} + +int +glusterd_status_volume_client_list(dict_t *rsp_dict, dict_t *op_ctx, + char **op_errstr) +{ + int ret = 0; + char *process = 0; + int32_t count = 0; + int32_t fuse_count = 0; + int32_t gfapi_count = 0; + int32_t rebalance_count = 0; + int32_t glustershd_count = 0; + int32_t quotad_count = 0; + int32_t snapd_count = 0; + int32_t client_count = 0; + int i = 0; + char key[64] = ""; + + GF_ASSERT(rsp_dict); + GF_ASSERT(op_ctx); + GF_ASSERT(op_errstr); + + ret = dict_get_int32n(rsp_dict, "clientcount", SLEN("clientcount"), + &client_count); + if (ret) { + gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_DICT_GET_FAILED, + "Couldn't get node index"); + } + ret = dict_set_int32n(op_ctx, "client-count", SLEN("client-count"), + client_count); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Couldn't get node index"); + goto out; + } + for (i = 0; i < client_count; i++) { + count = 0; + ret = snprintf(key, sizeof(key), "client%d.name", i); + ret = dict_get_strn(rsp_dict, key, ret, &process); + if (ret) { + gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_DICT_GET_FAILED, + "Couldn't get client name"); + goto out; } + ret = dict_add_dynstr_with_alloc(op_ctx, key, process); + if (ret) { + gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_DICT_SET_FAILED, + "Couldn't set client name"); + } + if (!strncmp(process, "fuse", 4)) { + ret = dict_get_int32n(op_ctx, "fuse-count", SLEN("fuse-count"), + &count); + if (ret) { + gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_DICT_GET_FAILED, + "Couldn't get fuse-count"); + } + fuse_count++; + continue; + } else if (!strncmp(process, "gfapi", 5)) { + ret = dict_get_int32n(op_ctx, "gfapi-count", SLEN("gfapi-count"), + &count); + if (ret) { + gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_DICT_GET_FAILED, + "Couldn't get gfapi-count"); + } + gfapi_count++; + continue; + + } else if (!strcmp(process, "rebalance")) { + ret = dict_get_int32n(op_ctx, "rebalance-count", + SLEN("rebalance-count"), &count); + if (ret) { + gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_DICT_GET_FAILED, + "Couldn't get rebalance-count"); + } + rebalance_count++; + continue; + } else if (!strcmp(process, "glustershd")) { + ret = dict_get_int32n(op_ctx, "glustershd-count", + SLEN("glustershd-count"), &count); + if (ret) { + gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_DICT_GET_FAILED, + "Couldn't get glustershd-count"); + } + glustershd_count++; + continue; + } else if (!strcmp(process, "quotad")) { + ret = dict_get_int32n(op_ctx, "quotad-count", SLEN("quotad-count"), + &count); + if (ret) { + gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_DICT_GET_FAILED, + "Couldn't get quotad-count"); + } + quotad_count++; + continue; + } else if (!strcmp(process, "snapd")) { + ret = dict_get_int32n(op_ctx, "snapd-count", SLEN("snapd-count"), + &count); + if (ret) { + gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_DICT_GET_FAILED, + "Couldn't get snapd-count"); + } + snapd_count++; + } + } - ret = mkdir (path, mode); - if ((ret == -1) && (EEXIST != errno)) { - snprintf (msg, sizeof (msg), "brick: %s:%s, path " - "creation failed, reason: %s", - host, path, strerror(errno)); - gf_log ("glusterd",GF_LOG_ERROR, "%s", msg); - *op_errstr = gf_strdup (msg); - } else { - ret = 0; + if (fuse_count) { + ret = dict_set_int32n(op_ctx, "fuse-count", SLEN("fuse-count"), + fuse_count); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Couldn't set fuse-count"); + goto out; + } + } + if (gfapi_count) { + ret = dict_set_int32n(op_ctx, "gfapi-count", SLEN("gfapi-count"), + gfapi_count); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Couldn't set gfapi-count"); + goto out; + } + } + if (rebalance_count) { + ret = dict_set_int32n(op_ctx, "rebalance-count", + SLEN("rebalance-count"), rebalance_count); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Couldn't set rebalance-count"); + goto out; + } + } + if (glustershd_count) { + ret = dict_set_int32n(op_ctx, "glustershd-count", + SLEN("glustershd-count"), glustershd_count); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Couldn't set glustershd-count"); + goto out; + } + } + if (quotad_count) { + ret = dict_set_int32n(op_ctx, "quotad-count", SLEN("quotad-count"), + quotad_count); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Couldn't set quotad-count"); + goto out; } + } + if (snapd_count) { + ret = dict_set_int32n(op_ctx, "snapd-count", SLEN("snapd-count"), + snapd_count); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Couldn't set snapd-count"); + goto out; + } + } out: - gf_log ("", GF_LOG_DEBUG, "returning %d", ret); - return ret; + return ret; } int -glusterd_sm_tr_log_transition_add_to_dict (dict_t *dict, - glusterd_sm_tr_log_t *log, int i, - int count) +glusterd_rebalance_rsp(dict_t *op_ctx, glusterd_rebalance_t *index, int32_t i) { - int ret = -1; - char key[512] = {0}; - char timestr[256] = {0,}; - char *str = NULL; - struct tm tm = {0}; + int ret = 0; + char key[64] = ""; + int keylen; + + snprintf(key, sizeof(key), "files-%d", i); + ret = dict_set_uint64(op_ctx, key, index->rebalance_files); + if (ret) + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "failed to set file count"); + + snprintf(key, sizeof(key), "size-%d", i); + ret = dict_set_uint64(op_ctx, key, index->rebalance_data); + if (ret) + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "failed to set size of xfer"); + + snprintf(key, sizeof(key), "lookups-%d", i); + ret = dict_set_uint64(op_ctx, key, index->lookedup_files); + if (ret) + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "failed to set lookedup file count"); + + keylen = snprintf(key, sizeof(key), "status-%d", i); + ret = dict_set_int32n(op_ctx, key, keylen, index->defrag_status); + if (ret) + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "failed to set status"); + + snprintf(key, sizeof(key), "failures-%d", i); + ret = dict_set_uint64(op_ctx, key, index->rebalance_failures); + if (ret) + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "failed to set failure count"); + + snprintf(key, sizeof(key), "skipped-%d", i); + ret = dict_set_uint64(op_ctx, key, index->skipped_files); + if (ret) + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "failed to set skipped count"); + + snprintf(key, sizeof(key), "run-time-%d", i); + ret = dict_set_double(op_ctx, key, index->rebalance_time); + if (ret) + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "failed to set run-time"); + + return ret; +} - GF_ASSERT (dict); - GF_ASSERT (log); +int +glusterd_defrag_volume_node_rsp(dict_t *req_dict, dict_t *rsp_dict, + dict_t *op_ctx) +{ + int ret = 0; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + char key[64] = ""; + int keylen; + int32_t i = 0; + char buf[64] = ""; + char *node_str = NULL; + int32_t cmd = 0; + + GF_ASSERT(req_dict); + + ret = dict_get_strn(req_dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get volume name"); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + + ret = dict_get_int32n(req_dict, "rebalance-command", + SLEN("rebalance-command"), &cmd); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Unable to get the cmd"); + goto out; + } + + if (rsp_dict) { + ret = glusterd_defrag_volume_status_update(volinfo, rsp_dict, cmd); + } + + if (!op_ctx) { + dict_copy(rsp_dict, op_ctx); + goto out; + } + + ret = dict_get_int32n(op_ctx, "count", SLEN("count"), &i); + i++; + + ret = dict_set_int32n(op_ctx, "count", SLEN("count"), i); + if (ret) + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set count"); + + snprintf(buf, sizeof(buf), "%s", uuid_utoa(MY_UUID)); + node_str = gf_strdup(buf); + + keylen = snprintf(key, sizeof(key), "node-uuid-%d", i); + ret = dict_set_dynstrn(op_ctx, key, keylen, node_str); + if (ret) + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "failed to set node-uuid"); + + glusterd_rebalance_rsp(op_ctx, &volinfo->rebal, i); + + snprintf(key, sizeof(key), "time-left-%d", i); + ret = dict_set_uint64(op_ctx, key, volinfo->rebal.time_left); + if (ret) + gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "failed to set time left"); - memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "log%d-old-state", count); - str = log->state_name_get (log->transitions[i].old_state); - ret = dict_set_str (dict, key, str); - if (ret) - goto out; +out: + return ret; +} +int32_t +glusterd_handle_node_rsp(dict_t *req_dict, void *pending_entry, + glusterd_op_t op, dict_t *rsp_dict, dict_t *op_ctx, + char **op_errstr, gd_node_type type) +{ + int ret = 0; + int32_t cmd = GF_OP_CMD_NONE; + + GF_ASSERT(op_errstr); + + switch (op) { + case GD_OP_PROFILE_VOLUME: + ret = glusterd_profile_volume_brick_rsp(pending_entry, rsp_dict, + op_ctx, op_errstr, type); + break; + case GD_OP_STATUS_VOLUME: + ret = dict_get_int32n(req_dict, "cmd", SLEN("cmd"), &cmd); + if (!ret && (cmd & GF_CLI_STATUS_CLIENT_LIST)) { + ret = glusterd_status_volume_client_list(rsp_dict, op_ctx, + op_errstr); + } else + ret = glusterd_status_volume_brick_rsp(rsp_dict, op_ctx, + op_errstr); + break; + case GD_OP_DEFRAG_BRICK_VOLUME: + glusterd_defrag_volume_node_rsp(req_dict, rsp_dict, op_ctx); + break; + + case GD_OP_HEAL_VOLUME: + ret = glusterd_heal_volume_brick_rsp(req_dict, rsp_dict, op_ctx, + op_errstr); + break; + case GD_OP_SCRUB_STATUS: + ret = glusterd_bitrot_volume_node_rsp(op_ctx, rsp_dict); + + break; + default: + break; + } + + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; +} - memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "log%d-event", count); - str = log->event_name_get (log->transitions[i].event); - ret = dict_set_str (dict, key, str); - if (ret) - goto out; +int32_t +glusterd_set_originator_uuid(dict_t *dict) +{ + int ret = -1; + uuid_t *originator_uuid = NULL; - memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "log%d-new-state", count); - str = log->state_name_get (log->transitions[i].new_state); - ret = dict_set_str (dict, key, str); - if (ret) - goto out; + GF_ASSERT(dict); + originator_uuid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); + if (!originator_uuid) { + ret = -1; + goto out; + } + + gf_uuid_copy(*originator_uuid, MY_UUID); + ret = dict_set_bin(dict, "originator_uuid", originator_uuid, + sizeof(uuid_t)); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set originator_uuid."); + goto out; + } - memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "log%d-time", count); - localtime_r ((const time_t*)&log->transitions[i].time, &tm); - memset (timestr, 0, sizeof (timestr)); - strftime (timestr, 256, "%Y-%m-%d %H:%M:%S", &tm); - str = gf_strdup (timestr); - ret = dict_set_dynstr (dict, key, str); - if (ret) - goto out; +out: + if (ret && originator_uuid) + GF_FREE(originator_uuid); + + return ret; +} + +/* Should be used only when an operation is in progress, as that is the only + * time a lock_owner is set + */ +gf_boolean_t +is_origin_glusterd(dict_t *dict) +{ + gf_boolean_t ret = _gf_false; + uuid_t lock_owner = { + 0, + }; + uuid_t *originator_uuid = NULL; + + GF_ASSERT(dict); + + ret = dict_get_bin(dict, "originator_uuid", (void **)&originator_uuid); + if (ret) { + /* If not originator_uuid has been set, then the command + * has been originated from a glusterd running on older version + * Hence fetching the lock owner */ + ret = glusterd_get_lock_owner(&lock_owner); + if (ret) { + ret = _gf_false; + goto out; + } + ret = !gf_uuid_compare(MY_UUID, lock_owner); + } else + ret = !gf_uuid_compare(MY_UUID, *originator_uuid); out: - gf_log ("", GF_LOG_DEBUG, "returning %d", ret); - return ret; + return ret; } int -glusterd_sm_tr_log_add_to_dict (dict_t *dict, - glusterd_sm_tr_log_t *circular_log) +glusterd_generate_and_set_task_id(dict_t *dict, char *key, const int keylen) { - int ret = -1; - int i = 0; - int start = 0; - int end = 0; - int index = 0; - char key[256] = {0}; - glusterd_sm_tr_log_t *log = NULL; - int count = 0; + int ret = -1; + uuid_t task_id = { + 0, + }; + char *uuid_str = NULL; + xlator_t *this = NULL; - GF_ASSERT (dict); - GF_ASSERT (circular_log); + GF_ASSERT(dict); - log = circular_log; - if (!log->count) - return 0; + this = THIS; + GF_ASSERT(this); - if (log->count == log->size) - start = log->current + 1; + gf_uuid_generate(task_id); + uuid_str = gf_strdup(uuid_utoa(task_id)); + if (!uuid_str) { + ret = -1; + goto out; + } + + ret = dict_set_dynstrn(dict, key, keylen, uuid_str); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set %s in dict", key); + goto out; + } + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_TASK_ID_INFO, + "Generated task-id %s for key %s", uuid_str, key); - end = start + log->count; - for (i = start; i < end; i++, count++) { - index = i % log->count; - ret = glusterd_sm_tr_log_transition_add_to_dict (dict, log, index, - count); - if (ret) - goto out; +out: + if (ret) + GF_FREE(uuid_str); + return ret; +} + +int +glusterd_copy_uuid_to_dict(uuid_t uuid, dict_t *dict, char *key, + const int keylen) +{ + int ret = -1; + char tmp_str[40] = ""; + char *task_id_str = NULL; + + GF_ASSERT(dict); + GF_ASSERT(key); + + gf_uuid_unparse(uuid, tmp_str); + task_id_str = gf_strdup(tmp_str); + if (!task_id_str) + return -1; + + ret = dict_set_dynstrn(dict, key, keylen, task_id_str); + if (ret) { + GF_FREE(task_id_str); + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Error setting uuid in dict with key %s", key); + } + + return 0; +} + +static int +_update_volume_op_versions(dict_t *this, char *key, data_t *value, void *data) +{ + int op_version = 0; + glusterd_volinfo_t *ctx = NULL; + gf_boolean_t enabled = _gf_true; + int ret = -1; + struct volopt_map_entry *vmep = NULL; + + GF_ASSERT(data); + ctx = data; + + vmep = gd_get_vmep(key); + op_version = glusterd_get_op_version_from_vmep(vmep); + + if (gd_is_xlator_option(vmep) || gd_is_boolean_option(vmep)) { + ret = gf_string2boolean(value->data, &enabled); + if (ret) + return 0; + + if (!enabled) + return 0; + } + + if (op_version > ctx->op_version) + ctx->op_version = op_version; + + if (gd_is_client_option(vmep) && (op_version > ctx->client_op_version)) + ctx->client_op_version = op_version; + + return 0; +} + +void +gd_update_volume_op_versions(glusterd_volinfo_t *volinfo) +{ + glusterd_conf_t *conf = NULL; + gf_boolean_t ob_enabled = _gf_false; + + GF_ASSERT(volinfo); + + conf = THIS->private; + GF_ASSERT(conf); + + /* Reset op-versions to minimum */ + volinfo->op_version = 1; + volinfo->client_op_version = 1; + + dict_foreach(volinfo->dict, _update_volume_op_versions, volinfo); + + /* Special case for open-behind + * If cluster op-version >= 2 and open-behind hasn't been explicitly + * disabled, volume op-versions must be updated to account for it + */ + + /* TODO: Remove once we have a general way to update automatically + * enabled features + */ + if (conf->op_version >= 2) { + ob_enabled = dict_get_str_boolean(volinfo->dict, + "performance.open-behind", _gf_true); + if (ob_enabled) { + if (volinfo->op_version < 2) + volinfo->op_version = 2; + if (volinfo->client_op_version < 2) + volinfo->client_op_version = 2; } + } - memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "count"); - ret = dict_set_int32 (dict, key, log->count); + if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE) { + if (volinfo->op_version < GD_OP_VERSION_3_6_0) + volinfo->op_version = GD_OP_VERSION_3_6_0; + if (volinfo->client_op_version < GD_OP_VERSION_3_6_0) + volinfo->client_op_version = GD_OP_VERSION_3_6_0; + } -out: - gf_log ("", GF_LOG_DEBUG, "returning %d", ret); - return ret; + return; } int -glusterd_sm_tr_log_init (glusterd_sm_tr_log_t *log, - char * (*state_name_get) (int), - char * (*event_name_get) (int), - size_t size) +op_version_check(xlator_t *this, int min_op_version, char *msg, int msglen) { - glusterd_sm_transition_t *transitions = NULL; - int ret = -1; + int ret = 0; + glusterd_conf_t *priv = NULL; + + GF_ASSERT(this); + GF_ASSERT(msg); + + priv = this->private; + if (priv->op_version < min_op_version) { + snprintf(msg, msglen, + "One or more nodes do not support " + "the required op-version. Cluster op-version must " + "at least be %d.", + min_op_version); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UNSUPPORTED_VERSION, "%s", + msg); + ret = -1; + } + return ret; +} - GF_ASSERT (size > 0); - GF_ASSERT (log && state_name_get && event_name_get); +/* A task is committed/completed once the task-id for it is cleared */ +gf_boolean_t +gd_is_remove_brick_committed(glusterd_volinfo_t *volinfo) +{ + GF_ASSERT(volinfo); - if (!log || !state_name_get || !event_name_get || (size <= 0)) - goto out; + if ((GD_OP_REMOVE_BRICK == volinfo->rebal.op) && + !gf_uuid_is_null(volinfo->rebal.rebalance_id)) + return _gf_false; + + return _gf_true; +} + +gf_boolean_t +glusterd_is_status_tasks_op(glusterd_op_t op, dict_t *dict) +{ + int ret = -1; + uint32_t cmd = GF_CLI_STATUS_NONE; + gf_boolean_t is_status_tasks = _gf_false; + + if (op != GD_OP_STATUS_VOLUME) + goto out; + + ret = dict_get_uint32(dict, "cmd", &cmd); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get opcode"); + goto out; + } + + if (cmd & GF_CLI_STATUS_TASKS) + is_status_tasks = _gf_true; - transitions = GF_CALLOC (size, sizeof (*transitions), - gf_gld_mt_sm_tr_log_t); - if (!transitions) +out: + return is_status_tasks; +} + +/* Tells if rebalance needs to be started for the given volume on the peer + * + * Rebalance should be started on a peer only if an involved brick is present on + * the peer. + * + * For a normal rebalance, if any one brick of the given volume is present on + * the peer, the rebalance process should be started. + * + * For a rebalance as part of a remove-brick operation, the rebalance process + * should be started only if one of the bricks being removed is present on the + * peer + */ +gf_boolean_t +gd_should_i_start_rebalance(glusterd_volinfo_t *volinfo) +{ + gf_boolean_t retval = _gf_false; + int ret = -1; + glusterd_brickinfo_t *brick = NULL; + int count = 0; + int i = 0; + char key[64] = ""; + int keylen; + char *brickname = NULL; + + switch (volinfo->rebal.op) { + case GD_OP_REBALANCE: + cds_list_for_each_entry(brick, &volinfo->bricks, brick_list) + { + if (gf_uuid_compare(MY_UUID, brick->uuid) == 0) { + retval = _gf_true; + break; + } + } + break; + case GD_OP_REMOVE_BRICK: + ret = dict_get_int32n(volinfo->rebal.dict, "count", SLEN("count"), + &count); + if (ret) { goto out; + } + for (i = 1; i <= count; i++) { + keylen = snprintf(key, sizeof(key), "brick%d", i); + ret = dict_get_strn(volinfo->rebal.dict, key, keylen, + &brickname); + if (ret) + goto out; + ret = glusterd_volume_brickinfo_get_by_brick(brickname, volinfo, + &brick, _gf_false); + if (ret) + goto out; + if (gf_uuid_compare(MY_UUID, brick->uuid) == 0) { + retval = _gf_true; + break; + } + } + break; + default: + break; + } + +out: + return retval; +} + +int +glusterd_is_volume_quota_enabled(glusterd_volinfo_t *volinfo) +{ + return (glusterd_volinfo_get_boolean(volinfo, VKEY_FEATURES_QUOTA)); +} - log->transitions = transitions; - log->size = size; - log->state_name_get = state_name_get; - log->event_name_get = event_name_get; +int +glusterd_is_volume_inode_quota_enabled(glusterd_volinfo_t *volinfo) +{ + return (glusterd_volinfo_get_boolean(volinfo, VKEY_FEATURES_INODE_QUOTA)); +} + +int +glusterd_is_bitrot_enabled(glusterd_volinfo_t *volinfo) +{ + return glusterd_volinfo_get_boolean(volinfo, VKEY_FEATURES_BITROT); +} + +int +glusterd_validate_and_set_gfid(dict_t *op_ctx, dict_t *req_dict, + char **op_errstr) +{ + int ret = -1; + int count = 0; + int i = 0; + int op_code = GF_QUOTA_OPTION_TYPE_NONE; + uuid_t uuid1 = {0}; + uuid_t uuid2 = { + 0, + }; + char *path = NULL; + char key[64] = ""; + int keylen; + char *uuid1_str = NULL; + char *uuid1_str_dup = NULL; + char *uuid2_str = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + ret = dict_get_int32n(op_ctx, "type", SLEN("type"), &op_code); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get quota opcode"); + goto out; + } + + if ((op_code != GF_QUOTA_OPTION_TYPE_LIMIT_USAGE) && + (op_code != GF_QUOTA_OPTION_TYPE_LIMIT_OBJECTS) && + (op_code != GF_QUOTA_OPTION_TYPE_REMOVE) && + (op_code != GF_QUOTA_OPTION_TYPE_REMOVE_OBJECTS)) { ret = 0; + goto out; + } + + ret = dict_get_strn(op_ctx, "path", SLEN("path"), &path); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get path"); + goto out; + } + + ret = dict_get_int32n(op_ctx, "count", SLEN("count"), &count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get count"); + goto out; + } + + /* If count is 0, fail the command with ENOENT. + * + * If count is 1, treat gfid0 as the gfid on which the operation + * is to be performed and resume the command. + * + * if count > 1, get the 0th gfid from the op_ctx and, + * compare it with the remaining 'count -1' gfids. + * If they are found to be the same, set gfid0 in the op_ctx and + * resume the operation, else error out. + */ + + if (count == 0) { + gf_asprintf(op_errstr, + "Failed to get trusted.gfid attribute " + "on path %s. Reason : %s", + path, strerror(ENOENT)); + ret = -ENOENT; + goto out; + } + + keylen = snprintf(key, sizeof(key), "gfid%d", 0); + + ret = dict_get_strn(op_ctx, key, keylen, &uuid1_str); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get key '%s'", key); + goto out; + } + + gf_uuid_parse(uuid1_str, uuid1); + + for (i = 1; i < count; i++) { + keylen = snprintf(key, sizeof(key), "gfid%d", i); + + ret = dict_get_strn(op_ctx, key, keylen, &uuid2_str); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get key " + "'%s'", + key); + goto out; + } + + gf_uuid_parse(uuid2_str, uuid2); + + if (gf_uuid_compare(uuid1, uuid2)) { + gf_asprintf(op_errstr, + "gfid mismatch between %s and " + "%s for path %s", + uuid1_str, uuid2_str, path); + ret = -1; + goto out; + } + } + + if (i == count) { + uuid1_str_dup = gf_strdup(uuid1_str); + if (!uuid1_str_dup) { + ret = -1; + goto out; + } + + ret = dict_set_dynstrn(req_dict, "gfid", SLEN("gfid"), uuid1_str_dup); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set gfid"); + GF_FREE(uuid1_str_dup); + goto out; + } + } else { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_ITER_FAIL, + "Failed to iterate through %d" + " entries in the req dict", + count); + ret = -1; + goto out; + } + ret = 0; out: - gf_log ("", GF_LOG_DEBUG, "returning %d", ret); - return ret; + return ret; } void -glusterd_sm_tr_log_delete (glusterd_sm_tr_log_t *log) +glusterd_clean_up_quota_store(glusterd_volinfo_t *volinfo) { - if (!log) - return; - if (log->transitions) - GF_FREE (log->transitions); - return; + char voldir[PATH_MAX] = ""; + char quota_confpath[PATH_MAX] = ""; + char cksum_path[PATH_MAX] = ""; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + int32_t len = 0; + + this = THIS; + GF_ASSERT(this); + conf = this->private; + GF_ASSERT(conf); + + GLUSTERD_GET_VOLUME_DIR(voldir, volinfo, conf); + + len = snprintf(quota_confpath, sizeof(quota_confpath), "%s/%s", voldir, + GLUSTERD_VOLUME_QUOTA_CONFIG); + if ((len < 0) || (len >= sizeof(quota_confpath))) { + quota_confpath[0] = 0; + } + len = snprintf(cksum_path, sizeof(cksum_path), "%s/%s", voldir, + GLUSTERD_VOL_QUOTA_CKSUM_FILE); + if ((len < 0) || (len >= sizeof(cksum_path))) { + cksum_path[0] = 0; + } + + sys_unlink(quota_confpath); + sys_unlink(cksum_path); + + gf_store_handle_destroy(volinfo->quota_conf_shandle); + volinfo->quota_conf_shandle = NULL; + volinfo->quota_conf_version = 0; } int -glusterd_sm_tr_log_transition_add (glusterd_sm_tr_log_t *log, - int old_state, int new_state, - int event) +glusterd_remove_auxiliary_mount(char *volname) { - glusterd_sm_transition_t *transitions = NULL; - int ret = -1; - int next = 0; + int ret = -1; + char mountdir[PATH_MAX] = ""; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + GLUSTERD_GET_QUOTA_LIMIT_MOUNT_PATH(mountdir, volname, "/"); + ret = gf_umount_lazy(this->name, mountdir, 1); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_LAZY_UMOUNT_FAIL, + "umount on %s failed, " + "reason : %s", + mountdir, strerror(errno)); + + /* Hide EBADF as it means the mount is already gone */ + if (errno == EBADF) + ret = 0; + } + + return ret; +} - GF_ASSERT (log); - if (!log) - goto out; +/* Stops the rebalance process of the given volume + */ +int +gd_stop_rebalance_process(glusterd_volinfo_t *volinfo) +{ + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + char pidfile[PATH_MAX] = ""; - transitions = log->transitions; - if (!transitions) - goto out; + GF_ASSERT(volinfo); - if (log->count) - next = (log->current + 1) % log->size; - else - next = 0; - - transitions[next].old_state = old_state; - transitions[next].new_state = new_state; - transitions[next].event = event; - time (&transitions[next].time); - log->current = next; - if (log->count < log->size) - log->count++; - ret = 0; - gf_log ("glusterd", GF_LOG_DEBUG, "Transitioning from '%s' to '%s' " - "due to event '%s'", log->state_name_get (old_state), - log->state_name_get (new_state), log->event_name_get (event)); -out: - gf_log ("", GF_LOG_DEBUG, "returning %d", ret); - return ret; + this = THIS; + GF_ASSERT(this); + + conf = this->private; + GF_ASSERT(conf); + + GLUSTERD_GET_DEFRAG_PID_FILE(pidfile, volinfo, conf); + ret = glusterd_service_stop("rebalance", pidfile, SIGTERM, _gf_true); + + return ret; +} + +rpc_clnt_t * +glusterd_rpc_clnt_unref(glusterd_conf_t *conf, rpc_clnt_t *rpc) +{ + rpc_clnt_t *ret = NULL; + + GF_ASSERT(conf); + GF_ASSERT(rpc); + synclock_unlock(&conf->big_lock); + (void)rpc_clnt_reconnect_cleanup(&rpc->conn); + ret = rpc_clnt_unref(rpc); + synclock_lock(&conf->big_lock); + + return ret; +} + +int32_t +glusterd_compare_volume_name(struct cds_list_head *list1, + struct cds_list_head *list2) +{ + glusterd_volinfo_t *volinfo1 = NULL; + glusterd_volinfo_t *volinfo2 = NULL; + + volinfo1 = cds_list_entry(list1, glusterd_volinfo_t, vol_list); + volinfo2 = cds_list_entry(list2, glusterd_volinfo_t, vol_list); + return strcmp(volinfo1->volname, volinfo2->volname); +} + +static int +gd_default_synctask_cbk(int ret, call_frame_t *frame, void *opaque) +{ + glusterd_conf_t *priv = THIS->private; + synclock_unlock(&priv->big_lock); + return ret; } +void +glusterd_launch_synctask(synctask_fn_t fn, void *opaque) +{ + xlator_t *this = NULL; + int ret = -1; + + this = THIS; + + /* synclock_lock must be called from within synctask, @fn must call it + * before it starts with its work*/ + ret = synctask_new(this->ctx->env, fn, gd_default_synctask_cbk, NULL, + opaque); + if (ret) + gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_SPAWN_SVCS_FAIL, + "Failed to spawn bricks" + " and other volume related services"); +} + +/* + * glusterd_enable_default_options enable certain options by default on the + * given volume based on the cluster op-version. This is called only during + * volume create or during volume reset + * + * @volinfo - volume on which to enable the default options + * @option - option to be set to default. If NULL, all possible options will be + * set to default + * + * Returns 0 on success and -1 on failure. If @option is given, but doesn't + * match any of the options that could be set, it is a success. + */ +/* + * TODO: Make this able to parse the volume-set table to set options + * Currently, the check and set for any option which wants to make use of this + * 'framework' needs to be done here manually. This would mean more work for the + * developer. This little extra work can be avoided if we make it possible to + * parse the volume-set table to get the options which could be set and their + * default values + */ int -glusterd_peerinfo_new (glusterd_peerinfo_t **peerinfo, - glusterd_friend_sm_state_t state, - uuid_t *uuid, const char *hostname) +glusterd_enable_default_options(glusterd_volinfo_t *volinfo, char *option) { - glusterd_peerinfo_t *new_peer = NULL; - int ret = -1; + int ret = 0; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; +#ifdef IPV6_DEFAULT + char *addr_family = "inet6"; +#else + char *addr_family = "inet"; +#endif - GF_ASSERT (peerinfo); - if (!peerinfo) - goto out; + this = THIS; + GF_ASSERT(this); + + GF_VALIDATE_OR_GOTO(this->name, volinfo, out); + + conf = this->private; + GF_ASSERT(conf); - new_peer = GF_CALLOC (1, sizeof (*new_peer), gf_gld_mt_peerinfo_t); - if (!new_peer) +#ifdef GD_OP_VERSION_3_8_0 + if (conf->op_version >= GD_OP_VERSION_3_8_0) { + /* nfs.disable needs to be enabled for new volumes with + * >= gluster version 3.7 (for now) 3.8 later + */ + if (!option || !strcmp(NFS_DISABLE_MAP_KEY, option)) { + ret = dict_set_dynstr_with_alloc(volinfo->dict, NFS_DISABLE_MAP_KEY, + "on"); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Failed to set option '" NFS_DISABLE_MAP_KEY + "' on volume " + "%s", + volinfo->volname); goto out; + } + } + } +#endif + + if (conf->op_version >= GD_OP_VERSION_3_7_0) { + /* Set needed volume options in volinfo->dict + * For ex., + * + * if (!option || !strcmp("someoption", option) { + * ret = dict_set_str(volinfo->dict, "someoption", "on"); + * ... + * } + * */ + + /* Option 'features.quota-deem-statfs' should not be turned off + * with 'gluster volume reset <VOLNAME>', since quota features + * can be reset only with 'gluster volume quota <VOLNAME> + * disable'. + */ - new_peer->state.state = state; - if (hostname) - new_peer->hostname = gf_strdup (hostname); + if (!option || !strcmp("features.quota-deem-statfs", option)) { + if (glusterd_is_volume_quota_enabled(volinfo)) { + ret = dict_set_dynstr_with_alloc( + volinfo->dict, "features.quota-deem-statfs", "on"); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_SET_FAILED, + "Failed to set option " + "'features.quota-deem-statfs' " + "on volume %s", + volinfo->volname); + goto out; + } + } + } + } - INIT_LIST_HEAD (&new_peer->uuid_list); + if (conf->op_version >= GD_OP_VERSION_3_9_0) { + if (!option || !strcmp("transport.address-family", option)) { + if (volinfo->transport_type == GF_TRANSPORT_TCP) { + ret = dict_set_dynstr_with_alloc( + volinfo->dict, "transport.address-family", addr_family); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, + GD_MSG_DICT_SET_FAILED, + "failed to set transport." + "address-family on %s", + volinfo->volname); + goto out; + } + } + } + } - if (uuid) { - uuid_copy (new_peer->uuid, *uuid); + if (conf->op_version >= GD_OP_VERSION_7_0) { + ret = dict_set_dynstr_with_alloc(volinfo->dict, + "storage.fips-mode-rchecksum", "on"); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Failed to set option 'storage.fips-mode-rchecksum' " + "on volume %s", + volinfo->volname); + goto out; } + } +out: + return ret; +} - ret = glusterd_sm_tr_log_init (&new_peer->sm_log, - glusterd_friend_sm_state_name_get, - glusterd_friend_sm_event_name_get, - GLUSTERD_TR_LOG_SIZE); - if (ret) - goto out; +void +glusterd_get_gfproxy_client_volfile(glusterd_volinfo_t *volinfo, char *path, + int path_len) +{ + char workdir[PATH_MAX] = ""; + glusterd_conf_t *priv = THIS->private; + + GLUSTERD_GET_VOLUME_DIR(workdir, volinfo, priv); + + switch (volinfo->transport_type) { + case GF_TRANSPORT_TCP: + case GF_TRANSPORT_BOTH_TCP_RDMA: + snprintf(path, path_len, "%s/trusted-%s.tcp-gfproxy-fuse.vol", + workdir, volinfo->volname); + break; + + case GF_TRANSPORT_RDMA: + snprintf(path, path_len, "%s/trusted-%s.rdma-gfproxy-fuse.vol", + workdir, volinfo->volname); + break; + default: + break; + } +} + +void +glusterd_get_rebalance_volfile(glusterd_volinfo_t *volinfo, char *path, + int path_len) +{ + char workdir[PATH_MAX] = ""; + glusterd_conf_t *priv = THIS->private; + + GLUSTERD_GET_VOLUME_DIR(workdir, volinfo, priv); - *peerinfo = new_peer; + snprintf(path, path_len, "%s/%s-rebalance.vol", workdir, volinfo->volname); +} + +/* This function will update the backend file-system + * type and the mount options in origin and snap brickinfo. + * This will be later used to perform file-system specific operation + * during LVM snapshot. + * + * @param brick_path brickpath for which fstype to be found + * @param brickinfo brickinfo of snap/origin volume + * @return 0 on success and -1 on failure + */ +int +glusterd_update_mntopts(char *brick_path, glusterd_brickinfo_t *brickinfo) +{ + int32_t ret = -1; + char *mnt_pt = NULL; + char buff[PATH_MAX] = ""; + struct mntent *entry = NULL; + struct mntent save_entry = { + 0, + }; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(brick_path); + GF_ASSERT(brickinfo); + + ret = glusterd_get_brick_root(brick_path, &mnt_pt); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICKPATH_ROOT_GET_FAIL, + "getting the root " + "of the brick (%s) failed ", + brick_path); + goto out; + } + + entry = glusterd_get_mnt_entry_info(mnt_pt, buff, sizeof(buff), + &save_entry); + if (!entry) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MNTENTRY_GET_FAIL, + "getting the mount entry for " + "the brick (%s) failed", + brick_path); + ret = -1; + goto out; + } + + if (snprintf(brickinfo->fstype, sizeof(brickinfo->fstype), "%s", + entry->mnt_type) >= sizeof(brickinfo->fstype)) { + ret = -1; + goto out; + } + (void)snprintf(brickinfo->mnt_opts, sizeof(brickinfo->mnt_opts), "%s", + entry->mnt_opts); + + gf_strncpy(brickinfo->mnt_opts, entry->mnt_opts, + sizeof(brickinfo->mnt_opts)); + + ret = 0; out: - if (ret && new_peer) - glusterd_friend_cleanup (new_peer); - gf_log ("", GF_LOG_DEBUG, "returning %d", ret); - return ret; + if (mnt_pt) + GF_FREE(mnt_pt); + return ret; } -int32_t -glusterd_peer_destroy (glusterd_peerinfo_t *peerinfo) +int +glusterd_get_value_for_vme_entry(struct volopt_map_entry *vme, char **def_val) { - int32_t ret = -1; + int ret = -1; + char *key = NULL; + xlator_t *this = NULL; + char *descr = NULL; + char *local_def_val = NULL; + void *dl_handle = NULL; + volume_opt_list_t vol_opt_handle = { + {0}, + }; + + this = THIS; + GF_ASSERT(this); + + CDS_INIT_LIST_HEAD(&vol_opt_handle.list); + + if (_get_xlator_opt_key_from_vme(vme, &key)) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GET_KEY_FAILED, + "Failed to get %s key from " + "volume option entry", + vme->key); + goto out; + } + + ret = xlator_volopt_dynload(vme->voltype, &dl_handle, &vol_opt_handle); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_XLATOR_VOLOPT_DYNLOAD_ERROR, + "xlator_volopt_dynload error " + "(%d)", + ret); + ret = -2; + goto cont; + } + + ret = xlator_option_info_list(&vol_opt_handle, key, &local_def_val, &descr); + if (ret) { + /*Swallow Error if option not found*/ + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_GET_KEY_FAILED, + "Failed to get option for %s " + "key", + key); + ret = -2; + goto cont; + } + if (!local_def_val) + local_def_val = "(null)"; + + *def_val = gf_strdup(local_def_val); + +cont: + if (dl_handle) { + dlclose(dl_handle); + dl_handle = NULL; + vol_opt_handle.given_opt = NULL; + } + if (key) { + _free_xlator_opt_key(key); + key = NULL; + } + + if (ret) + goto out; - if (!peerinfo) +out: + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; +} + +int +glusterd_get_global_max_op_version(rpcsvc_request_t *req, dict_t *ctx, + int count) +{ + int ret = -1; + char *def_val = NULL; + char dict_key[50] = ""; + int keylen; + + ret = glusterd_mgmt_v3_initiate_all_phases(req, GD_OP_MAX_OPVERSION, ctx); + + ret = dict_get_strn(ctx, "max-opversion", SLEN("max-opversion"), &def_val); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get max-opversion value from" + " dictionary"); + goto out; + } + + keylen = sprintf(dict_key, "key%d", count); + ret = dict_set_nstrn(ctx, dict_key, keylen, GLUSTERD_MAX_OP_VERSION_KEY, + SLEN(GLUSTERD_MAX_OP_VERSION_KEY)); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set %s in " + "dictionary", + GLUSTERD_MAX_OP_VERSION_KEY); + goto out; + } + + sprintf(dict_key, "value%d", count); + ret = dict_set_dynstr_with_alloc(ctx, dict_key, def_val); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set %s for key %s in dictionary", def_val, + GLUSTERD_MAX_OP_VERSION_KEY); + goto out; + } + +out: + return ret; +} + +int +glusterd_get_global_options_for_all_vols(rpcsvc_request_t *req, dict_t *ctx, + char **op_errstr) +{ + int ret = -1; + int count = 0; + gf_boolean_t all_opts = _gf_false; + gf_boolean_t key_found = _gf_false; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + char *key = NULL; + char *key_fixed = NULL; + char dict_key[50] = ""; + char *def_val = NULL; + char err_str[PATH_MAX] = ""; + char *allvolopt = NULL; + int32_t i = 0; + gf_boolean_t exists = _gf_false; + gf_boolean_t need_free = _gf_false; + + this = THIS; + GF_VALIDATE_OR_GOTO(THIS->name, this, out); + + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, out); + + GF_VALIDATE_OR_GOTO(this->name, ctx, out); + + ret = dict_get_strn(ctx, "key", SLEN("key"), &key); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get option key from dictionary"); + goto out; + } + + if (strcasecmp(key, "all") == 0) + all_opts = _gf_true; + else { + exists = glusterd_check_option_exists(key, &key_fixed); + if (!exists) { + snprintf(err_str, sizeof(err_str), + "Option " + "with name: %s does not exist", + key); + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_UNKNOWN_KEY, "%s", + err_str); + if (key_fixed) + snprintf(err_str, sizeof(err_str), "Did you mean %s?", + key_fixed); + ret = -1; + goto out; + } + if (key_fixed) + key = key_fixed; + } + /* coverity[CONSTANT_EXPRESSION_RESULT] */ + ALL_VOLUME_OPTION_CHECK("all", _gf_true, key, ret, op_errstr, out); + + for (i = 0; valid_all_vol_opts[i].option; i++) { + allvolopt = valid_all_vol_opts[i].option; + + if (!all_opts && strcmp(key, allvolopt) != 0) + continue; + + /* Found global option */ + if (strcmp(allvolopt, GLUSTERD_MAX_OP_VERSION_KEY) == 0) { + count++; + ret = glusterd_get_global_max_op_version(req, ctx, count); + if (ret) goto out; + else + continue; + } + + ret = dict_get_str(priv->opts, allvolopt, &def_val); - ret = glusterd_store_delete_peerinfo (peerinfo); + /* If global option isn't set explicitly */ + if (!def_val) { + if (!strcmp(allvolopt, GLUSTERD_GLOBAL_OP_VERSION_KEY)) { + gf_asprintf(&def_val, "%d", priv->op_version); + need_free = _gf_true; + } else { + gf_asprintf(&def_val, "%s (DEFAULT)", + valid_all_vol_opts[i].dflt_val); + need_free = _gf_true; + } + } + + count++; + ret = sprintf(dict_key, "key%d", count); + ret = dict_set_strn(ctx, dict_key, ret, allvolopt); if (ret) { - gf_log ("", GF_LOG_ERROR, "Deleting peer info failed"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set %s in dictionary", allvolopt); + goto out; } - list_del_init (&peerinfo->uuid_list); - if (peerinfo->hostname) - GF_FREE (peerinfo->hostname); - glusterd_sm_tr_log_delete (&peerinfo->sm_log); - GF_FREE (peerinfo); - peerinfo = NULL; + sprintf(dict_key, "value%d", count); + ret = dict_set_dynstr_with_alloc(ctx, dict_key, def_val); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set %s for key %s in dictionary", def_val, + allvolopt); + goto out; + } - ret = 0; + if (need_free) { + GF_FREE(def_val); + need_free = _gf_false; + } + def_val = NULL; + allvolopt = NULL; + + if (!all_opts) + break; + } + + ret = dict_set_int32n(ctx, "count", SLEN("count"), count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set count in dictionary"); + } out: - return ret; + if (ret && !all_opts && !key_found) { + if (err_str[0] == 0) + snprintf(err_str, sizeof(err_str), "option %s does not exist", key); + if (*op_errstr == NULL) + *op_errstr = gf_strdup(err_str); + } + + if (ret && need_free) { + GF_FREE(def_val); + } + GF_FREE(key_fixed); + gf_msg_debug(THIS->name, 0, "Returning %d", ret); + + return ret; } -int -glusterd_remove_pending_entry (struct list_head *list, void *elem) +char * +glusterd_get_option_value(glusterd_volinfo_t *volinfo, char *key) { - glusterd_pending_node_t *pending_node = NULL; - glusterd_pending_node_t *tmp = NULL; - int ret = -1; + char *value = NULL; + + if (!glusterd_is_volume_replicate(volinfo)) + goto ret; - list_for_each_entry_safe (pending_node, tmp, list, list) { - if (elem == pending_node->node) { - list_del_init (&pending_node->list); - GF_FREE (pending_node); - ret = 0; + if (!strcmp(key, "performance.client-io-threads")) { + value = "off"; + } else if (!strcmp(key, "cluster.quorum-type")) { + if (volinfo->replica_count % 2) { + value = "auto"; + } + } +ret: + return value; +} + +int +glusterd_get_default_val_for_volopt(dict_t *ctx, gf_boolean_t all_opts, + char *input_key, char *orig_key, + glusterd_volinfo_t *volinfo, + char **op_errstr) +{ + struct volopt_map_entry *vme = NULL; + int ret = -1; + int count = 0; + xlator_t *this = NULL; + char *def_val = NULL; + char *def_val_str = NULL; + char dict_key[50] = ""; + int keylen; + gf_boolean_t key_found = _gf_false; + gf_boolean_t get_value_vme = _gf_false; + glusterd_conf_t *priv = NULL; + dict_t *vol_dict = NULL; + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, out); + + vol_dict = volinfo->dict; + GF_VALIDATE_OR_GOTO(this->name, vol_dict, out); + + /* Check whether key is passed for a single option */ + if (!all_opts && !input_key) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_KEY_NULL, "Key is NULL"); + goto out; + } + + for (vme = &glusterd_volopt_map[0]; vme->key; vme++) { + if (!all_opts && strcmp(vme->key, input_key)) + continue; + key_found = _gf_true; + get_value_vme = _gf_false; + /* First look for the key in the priv->opts for global option + * and then into vol_dict, if its not present then look for + * translator default value */ + keylen = strlen(vme->key); + ret = dict_get_strn(priv->opts, vme->key, keylen, &def_val); + if (!def_val) { + ret = dict_get_strn(vol_dict, vme->key, keylen, &def_val); + if (ret == -ENOENT) + def_val = glusterd_get_option_value(volinfo, vme->key); + if (!def_val) { + if (vme->value) { + def_val = vme->value; + } else { + ret = glusterd_get_value_for_vme_entry(vme, &def_val); + get_value_vme = _gf_true; + if (!all_opts && ret) goto out; + else if (ret == -2) + continue; } + } } -out: - gf_log ("", GF_LOG_DEBUG, "returning %d", ret); - return ret; + count++; + keylen = sprintf(dict_key, "key%d", count); + ret = dict_set_strn(ctx, dict_key, keylen, vme->key); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to " + "set %s in dictionary", + vme->key); + goto out; + } + sprintf(dict_key, "value%d", count); + if (get_value_vme) { // the value was never changed - DEFAULT is used + gf_asprintf(&def_val_str, "%s (DEFAULT)", def_val); + ret = dict_set_dynstr_with_alloc(ctx, dict_key, def_val_str); + GF_FREE(def_val_str); + def_val_str = NULL; + } else + ret = dict_set_dynstr_with_alloc(ctx, dict_key, def_val); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to " + "set %s for key %s in dictionary", + def_val, vme->key); + goto out; + } + if (get_value_vme) + GF_FREE(def_val); + + def_val = NULL; + if (!all_opts) + break; + } + if (!all_opts && !key_found) + goto out; + + ret = dict_set_int32n(ctx, "count", SLEN("count"), count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set count " + "in dictionary"); + } +out: + if (ret && !all_opts && !key_found) { + char err_str[PATH_MAX]; + snprintf(err_str, sizeof(err_str), "option %s does not exist", + orig_key); + *op_errstr = gf_strdup(err_str); + } + if (def_val) + GF_FREE(def_val); + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; } int -glusterd_clear_pending_nodes (struct list_head *list) +glusterd_get_volopt_content(dict_t *ctx, gf_boolean_t xml_out) { - glusterd_pending_node_t *pending_node = NULL; - glusterd_pending_node_t *tmp = NULL; + void *dl_handle = NULL; + volume_opt_list_t vol_opt_handle = { + {0}, + }; + char *key = NULL; + struct volopt_map_entry *vme = NULL; + int ret = -1; + char *def_val = NULL; + char *descr = NULL; + char *output = NULL; + size_t size = 0; + size_t used = 0; +#if (HAVE_LIB_XML) + xmlTextWriterPtr writer = NULL; + xmlBufferPtr buf = NULL; + + if (xml_out) { + ret = init_sethelp_xml_doc(&writer, &buf); + if (ret) /*logging done in init_xml_lib*/ + goto out; + } +#endif - list_for_each_entry_safe (pending_node, tmp, list, list) { - list_del_init (&pending_node->list); - GF_FREE (pending_node); + if (!xml_out) { + size = 65536; + output = GF_MALLOC(size, gf_common_mt_char); + if (output == NULL) { + ret = -1; + goto out; } + } - return 0; + CDS_INIT_LIST_HEAD(&vol_opt_handle.list); + + for (vme = &glusterd_volopt_map[0]; vme->key; vme++) { + if ((vme->type == NO_DOC) || (vme->type == GLOBAL_NO_DOC)) + continue; + + if (vme->description) { + descr = vme->description; + def_val = vme->value; + } else { + if (_get_xlator_opt_key_from_vme(vme, &key)) { + gf_msg_debug("glusterd", 0, + "Failed to " + "get %s key from volume option entry", + vme->key); + goto out; /*Some error while getting key*/ + } + + ret = xlator_volopt_dynload(vme->voltype, &dl_handle, + &vol_opt_handle); + + if (ret) { + gf_msg_debug("glusterd", 0, "xlator_volopt_dynload error(%d)", + ret); + ret = 0; + goto cont; + } + + ret = xlator_option_info_list(&vol_opt_handle, key, &def_val, + &descr); + if (ret) { /*Swallow Error i.e if option not found*/ + gf_msg_debug("glusterd", 0, "Failed to get option for %s key", + key); + ret = 0; + goto cont; + } + } + + if (xml_out) { +#if (HAVE_LIB_XML) + if (xml_add_volset_element(writer, vme->key, def_val, descr)) { + ret = -1; + goto cont; + } +#else + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_MODULE_NOT_INSTALLED, + "Libxml not present"); +#endif + } else { + void *tmp; + int len; + + do { + len = snprintf(output + used, size - used, + "Option: %s\nDefault Value: %s\n" + "Description: %s\n\n", + vme->key, def_val, descr); + if (len < 0) { + ret = -1; + goto cont; + } + if (used + len < size) { + used += len; + break; + } + + size += (len + 65536) & ~65535; + tmp = GF_REALLOC(output, size); + if (tmp == NULL) { + ret = -1; + goto cont; + } + output = tmp; + } while (1); + } + cont: + if (dl_handle) { + dlclose(dl_handle); + dl_handle = NULL; + vol_opt_handle.given_opt = NULL; + } + if (key) { + _free_xlator_opt_key(key); + key = NULL; + } + if (ret) + goto out; + } + +#if (HAVE_LIB_XML) + if ((xml_out) && (ret = end_sethelp_xml_doc(writer))) + goto out; +#else + if (xml_out) + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_MODULE_NOT_INSTALLED, + "Libxml not present"); +#endif + + if (xml_out) { +#if (HAVE_LIB_XML) + output = gf_strdup((char *)buf->content); + if (NULL == output) { + ret = -1; + goto out; + } +#else + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_MODULE_NOT_INSTALLED, + "Libxml not present"); +#endif + } + + ret = dict_set_dynstrn(ctx, "help-str", SLEN("help-str"), output); + if (ret >= 0) { + output = NULL; + } +out: + GF_FREE(output); + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; +} + +int +glusterd_check_client_op_version_support(char *volname, uint32_t op_version, + char **op_errstr) +{ + int ret = 0; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + rpc_transport_t *xprt = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + pthread_mutex_lock(&priv->xprt_lock); + list_for_each_entry(xprt, &priv->xprt_list, list) + { + if ((!strcmp(volname, xprt->peerinfo.volname)) && + ((op_version > xprt->peerinfo.max_op_version) || + (op_version < xprt->peerinfo.min_op_version))) { + ret = -1; + break; + } + } + pthread_mutex_unlock(&priv->xprt_lock); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UNSUPPORTED_VERSION, + "Client %s is running with min_op_version as %d and " + "max_op_version as %d and don't support the required " + "op-version %d", + xprt->peerinfo.identifier, xprt->peerinfo.min_op_version, + xprt->peerinfo.max_op_version, op_version); + if (op_errstr) + ret = gf_asprintf(op_errstr, + "One of the client %s is " + "running with op-version %d and " + "doesn't support the required " + "op-version %d. This client needs to" + " be upgraded or disconnected " + "before running this command again", + xprt->peerinfo.identifier, + xprt->peerinfo.max_op_version, op_version); + + return -1; + } + return 0; } gf_boolean_t -glusterd_peerinfo_is_uuid_unknown (glusterd_peerinfo_t *peerinfo) +glusterd_have_peers() { - GF_ASSERT (peerinfo); + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; - if (uuid_is_null (peerinfo->uuid)) - return _gf_true; - return _gf_false; + this = THIS; + GF_ASSERT(this); + conf = this->private; + GF_ASSERT(conf); + + return !cds_list_empty(&conf->peers); +} + +gf_boolean_t +glusterd_is_volume_started(glusterd_volinfo_t *volinfo) +{ + GF_ASSERT(volinfo); + return (volinfo->status == GLUSTERD_STATUS_STARTED); +} + +int +glusterd_volume_get_type_str(glusterd_volinfo_t *volinfo, char **voltype_str) +{ + int ret = -1; + int type = 0; + + GF_VALIDATE_OR_GOTO(THIS->name, volinfo, out); + + type = get_vol_type(volinfo->type, volinfo->dist_leaf_count, + volinfo->brick_count); + + *voltype_str = vol_type_str[type]; + + ret = 0; +out: + return ret; +} + +int +glusterd_volume_get_status_str(glusterd_volinfo_t *volinfo, char *status_str) +{ + int ret = -1; + + GF_VALIDATE_OR_GOTO(THIS->name, volinfo, out); + GF_VALIDATE_OR_GOTO(THIS->name, status_str, out); + + switch (volinfo->status) { + case GLUSTERD_STATUS_NONE: + sprintf(status_str, "%s", "Created"); + break; + case GLUSTERD_STATUS_STARTED: + sprintf(status_str, "%s", "Started"); + break; + case GLUSTERD_STATUS_STOPPED: + sprintf(status_str, "%s", "Stopped"); + break; + default: + goto out; + } + ret = 0; +out: + return ret; +} + +void +glusterd_brick_get_status_str(glusterd_brickinfo_t *brickinfo, char *status_str) +{ + GF_VALIDATE_OR_GOTO(THIS->name, brickinfo, out); + GF_VALIDATE_OR_GOTO(THIS->name, status_str, out); + + switch (brickinfo->status) { + case GF_BRICK_STOPPED: + sprintf(status_str, "%s", "Stopped"); + break; + case GF_BRICK_STARTED: + sprintf(status_str, "%s", "Started"); + break; + case GF_BRICK_STARTING: + sprintf(status_str, "%s", "Starting"); + break; + case GF_BRICK_STOPPING: + sprintf(status_str, "%s", "Stopping"); + break; + default: + sprintf(status_str, "%s", "None"); + break; + } + +out: + return; +} + +int +glusterd_volume_get_transport_type_str(glusterd_volinfo_t *volinfo, + char *transport_type_str) +{ + int ret = -1; + + GF_VALIDATE_OR_GOTO(THIS->name, volinfo, out); + GF_VALIDATE_OR_GOTO(THIS->name, transport_type_str, out); + + switch (volinfo->transport_type) { + case GF_TRANSPORT_TCP: + sprintf(transport_type_str, "%s", "tcp"); + break; + case GF_TRANSPORT_RDMA: + sprintf(transport_type_str, "%s", "rdma"); + break; + case GF_TRANSPORT_BOTH_TCP_RDMA: + sprintf(transport_type_str, "%s", "tcp_rdma_both"); + break; + default: + goto out; + } + ret = 0; +out: + return ret; +} + +int +glusterd_volume_get_quorum_status_str(glusterd_volinfo_t *volinfo, + char *quorum_status_str) +{ + int ret = -1; + + GF_VALIDATE_OR_GOTO(THIS->name, volinfo, out); + GF_VALIDATE_OR_GOTO(THIS->name, quorum_status_str, out); + + switch (volinfo->quorum_status) { + case NOT_APPLICABLE_QUORUM: + sprintf(quorum_status_str, "%s", "not_applicable"); + break; + case MEETS_QUORUM: + sprintf(quorum_status_str, "%s", "meets"); + break; + case DOESNT_MEET_QUORUM: + sprintf(quorum_status_str, "%s", "does_not_meet"); + break; + default: + goto out; + } + ret = 0; +out: + return ret; +} + +int +glusterd_volume_get_rebalance_status_str(glusterd_volinfo_t *volinfo, + char *rebal_status_str) +{ + int ret = -1; + + GF_VALIDATE_OR_GOTO(THIS->name, volinfo, out); + GF_VALIDATE_OR_GOTO(THIS->name, rebal_status_str, out); + + switch (volinfo->rebal.defrag_status) { + case GF_DEFRAG_STATUS_NOT_STARTED: + sprintf(rebal_status_str, "%s", "not_started"); + break; + case GF_DEFRAG_STATUS_STARTED: + sprintf(rebal_status_str, "%s", "started"); + break; + case GF_DEFRAG_STATUS_STOPPED: + sprintf(rebal_status_str, "%s", "stopped"); + break; + case GF_DEFRAG_STATUS_COMPLETE: + sprintf(rebal_status_str, "%s", "completed"); + break; + case GF_DEFRAG_STATUS_FAILED: + sprintf(rebal_status_str, "%s", "failed"); + break; + case GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED: + sprintf(rebal_status_str, "%s", "layout_fix_started"); + break; + case GF_DEFRAG_STATUS_LAYOUT_FIX_STOPPED: + sprintf(rebal_status_str, "%s", "layout_fix_stopped"); + break; + case GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE: + sprintf(rebal_status_str, "%s", "layout_fix_complete"); + break; + case GF_DEFRAG_STATUS_LAYOUT_FIX_FAILED: + sprintf(rebal_status_str, "%s", "layout_fix_failed"); + break; + default: + goto out; + } + ret = 0; +out: + return ret; +} + +/* This function will insert the element to the list in a order. + Order will be based on the compare function provided as a input. + If element to be inserted in ascending order compare should return: + 0: if both the arguments are equal + >0: if first argument is greater than second argument + <0: if first argument is less than second argument */ +void +glusterd_list_add_order(struct cds_list_head *new, struct cds_list_head *head, + int (*compare)(struct cds_list_head *, + struct cds_list_head *)) +{ + struct cds_list_head *pos = NULL; + + cds_list_for_each_rcu(pos, head) + { + if (compare(new, pos) <= 0) + break; + } + + cds_list_add_rcu(new, rcu_dereference(pos->prev)); } int32_t -glusterd_delete_volume (glusterd_volinfo_t *volinfo) +glusterd_count_connected_peers(int32_t *count) { - int ret = -1; - GF_ASSERT (volinfo); + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_conf_t *conf = NULL; + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, out); + GF_VALIDATE_OR_GOTO(this->name, count, out); + + *count = 1; + + RCU_READ_LOCK; + cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list) + { + /* Find peer who is connected and is a friend */ + if ((peerinfo->connected) && + (peerinfo->state.state == GD_FRIEND_STATE_BEFRIENDED)) { + (*count)++; + } + } + RCU_READ_UNLOCK; - ret = glusterd_store_delete_volume (volinfo); + ret = 0; +out: + return ret; +} - if (ret) +char * +gd_get_shd_key(int type) +{ + char *key = NULL; + + switch (type) { + case GF_CLUSTER_TYPE_REPLICATE: + key = "cluster.self-heal-daemon"; + break; + case GF_CLUSTER_TYPE_DISPERSE: + key = "cluster.disperse-self-heal-daemon"; + break; + default: + key = NULL; + break; + } + return key; +} + +int +glusterd_handle_replicate_brick_ops(glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo, + glusterd_op_t op) +{ + int32_t ret = -1; + char tmpmount[] = "/tmp/mntXXXXXX"; + char logfile[PATH_MAX] = ""; + int dirty[3] = { + 0, + }; + runner_t runner = {0}; + glusterd_conf_t *priv = NULL; + char *pid = NULL; + char vpath[PATH_MAX] = ""; + char *volfileserver = NULL; + + xlator_t *this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, out); + + dirty[2] = hton32(1); + + ret = sys_lsetxattr(brickinfo->path, GF_AFR_DIRTY, dirty, sizeof(dirty), 0); + if (ret == -1) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_SET_XATTR_FAIL, + "Attribute=%s", GF_AFR_DIRTY, "Reason=%s", strerror(errno), + NULL); + goto out; + } + + if (mkdtemp(tmpmount) == NULL) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_CREATE_DIR_FAILED, + NULL); + ret = -1; + goto out; + } + + ret = gf_asprintf(&pid, "%d", GF_CLIENT_PID_ADD_REPLICA_MOUNT); + if (ret < 0) + goto out; + + switch (op) { + case GD_OP_REPLACE_BRICK: + if (dict_get_strn(this->options, "transport.socket.bind-address", + SLEN("transport.socket.bind-address"), + &volfileserver) != 0) + volfileserver = "localhost"; + + snprintf(logfile, sizeof(logfile), "%s/%s-replace-brick-mount.log", + priv->logdir, volinfo->volname); + if (!*logfile) { + ret = -1; + goto out; + } + runinit(&runner); + runner_add_args(&runner, SBIN_DIR "/glusterfs", "-s", volfileserver, + "--volfile-id", volinfo->volname, "--client-pid", + pid, "-l", logfile, tmpmount, NULL); + break; + + case GD_OP_ADD_BRICK: + snprintf(logfile, sizeof(logfile), "%s/%s-add-brick-mount.log", + priv->logdir, volinfo->volname); + if (!*logfile) { + ret = -1; goto out; + } + ret = glusterd_get_dummy_client_filepath(vpath, volinfo, + volinfo->transport_type); + if (ret) { + gf_log("", GF_LOG_ERROR, + "Failed to get " + "volfile path"); + goto out; + } + runinit(&runner); + runner_add_args(&runner, SBIN_DIR "/glusterfs", "--volfile", vpath, + "--client-pid", pid, "-l", logfile, tmpmount, NULL); + break; + default: + break; + } + synclock_unlock(&priv->big_lock); + ret = runner_run(&runner); + + if (ret) { + gf_log(this->name, GF_LOG_ERROR, + "mount command" + " failed."); + goto lock; + } + ret = sys_lsetxattr( + tmpmount, + (op == GD_OP_REPLACE_BRICK) ? GF_AFR_REPLACE_BRICK : GF_AFR_ADD_BRICK, + brickinfo->brick_id, sizeof(brickinfo->brick_id), 0); + if (ret == -1) + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_SET_XATTR_FAIL, + "Attribute=%s, Reason=%s", + (op == GD_OP_REPLACE_BRICK) ? GF_AFR_REPLACE_BRICK + : GF_AFR_ADD_BRICK, + strerror(errno), NULL); + gf_umount_lazy(this->name, tmpmount, 1); +lock: + synclock_lock(&priv->big_lock); +out: + if (pid) + GF_FREE(pid); + gf_msg_debug(this->name, 0, "Returning with ret"); + return ret; +} + +void +assign_brick_groups(glusterd_volinfo_t *volinfo) +{ + glusterd_brickinfo_t *brickinfo = NULL; + uint16_t group_num = 0; + int in_group = 0; + + list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + brickinfo->group = group_num; + if (++in_group >= volinfo->replica_count) { + in_group = 0; + ++group_num; + } + } +} + +glusterd_brickinfo_t * +get_last_brick_of_brick_group(glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo) +{ + glusterd_brickinfo_t *next = NULL; + glusterd_brickinfo_t *last = NULL; + + last = brickinfo; + for (;;) { + next = list_next(last, &volinfo->bricks, glusterd_brickinfo_t, + brick_list); + if (!next || (next->group != brickinfo->group)) { + break; + } + last = next; + } + + return last; +} + +int +glusterd_get_rb_dst_brickinfo(glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t **brickinfo) +{ + int32_t ret = -1; + + if (!volinfo || !brickinfo) + goto out; + + *brickinfo = volinfo->rep_brick.dst_brick; + + ret = 0; - ret = glusterd_volinfo_delete (volinfo); out: - gf_log ("", GF_LOG_DEBUG, "returning %d", ret); - return ret; + return ret; +} + +int +rb_update_dstbrick_port(glusterd_brickinfo_t *dst_brickinfo, dict_t *rsp_dict, + dict_t *req_dict) +{ + int ret = 0; + int dict_ret = 0; + int dst_port = 0; + + dict_ret = dict_get_int32n(req_dict, "dst-brick-port", + SLEN("dst-brick-port"), &dst_port); + if (!dict_ret) + dst_brickinfo->port = dst_port; + + if (gf_is_local_addr(dst_brickinfo->hostname)) { + gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_BRK_PORT_NO_ADD_INDO, + "adding dst-brick port no %d", dst_port); + + if (rsp_dict) { + ret = dict_set_int32n(rsp_dict, "dst-brick-port", + SLEN("dst-brick-port"), dst_brickinfo->port); + if (ret) { + gf_msg_debug("glusterd", 0, + "Could not set dst-brick port no in rsp dict"); + goto out; + } + } + + if (req_dict && !dict_ret) { + ret = dict_set_int32n(req_dict, "dst-brick-port", + SLEN("dst-brick-port"), dst_brickinfo->port); + if (ret) { + gf_msg_debug("glusterd", 0, "Could not set dst-brick port no"); + goto out; + } + } + } +out: + return ret; +} + +int +glusterd_brick_op_prerequisites(dict_t *dict, char **op, glusterd_op_t *gd_op, + char **volname, glusterd_volinfo_t **volinfo, + char **src_brick, + glusterd_brickinfo_t **src_brickinfo, + char *pidfile, char **op_errstr, + dict_t *rsp_dict) +{ + int ret = 0; + char msg[2048] = ""; + gsync_status_param_t param = { + 0, + }; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *v = NULL; + glusterd_brickinfo_t *b = NULL; + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + GF_ASSERT(priv); + + ret = dict_get_strn(dict, "operation", SLEN("operation"), op); + if (ret) { + gf_msg_debug(this->name, 0, "dict get on operation type failed"); + goto out; + } + + *gd_op = gd_cli_to_gd_op(*op); + if (*gd_op < 0) + goto out; + + ret = dict_get_strn(dict, "volname", SLEN("volname"), volname); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get volume name"); + goto out; + } + + ret = glusterd_volinfo_find(*volname, volinfo); + if (ret) { + snprintf(msg, sizeof(msg), "volume: %s does not exist", *volname); + *op_errstr = gf_strdup(msg); + goto out; + } + + if (GLUSTERD_STATUS_STARTED != (*volinfo)->status) { + ret = -1; + snprintf(msg, sizeof(msg), "volume: %s is not started", *volname); + *op_errstr = gf_strdup(msg); + goto out; + } + + /* If geo-rep is configured, for this volume, it should be stopped. */ + param.volinfo = *volinfo; + ret = glusterd_check_geo_rep_running(¶m, op_errstr); + if (ret || param.is_active) { + ret = -1; + goto out; + } + + if (glusterd_is_defrag_on(*volinfo)) { + snprintf(msg, sizeof(msg), + "Volume name %s rebalance is in " + "progress. Please retry after completion", + *volname); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OIP_RETRY_LATER, "%s", msg); + *op_errstr = gf_strdup(msg); + ret = -1; + goto out; + } + + if (dict) { + if (!glusterd_is_fuse_available()) { + gf_msg(this->name, GF_LOG_ERROR, 0, + (*gd_op == GD_OP_REPLACE_BRICK) + ? GD_MSG_RB_CMD_FAIL + : GD_MSG_RESET_BRICK_CMD_FAIL, + "Unable to open /dev/" + "fuse (%s), %s command failed", + strerror(errno), gd_rb_op_to_str(*op)); + snprintf(msg, sizeof(msg), + "Fuse unavailable\n " + "%s failed", + gd_rb_op_to_str(*op)); + *op_errstr = gf_strdup(msg); + ret = -1; + goto out; + } + } + + ret = dict_get_strn(dict, "src-brick", SLEN("src-brick"), src_brick); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get src brick"); + goto out; + } + + gf_msg_debug(this->name, 0, "src brick=%s", *src_brick); + + ret = glusterd_volume_brickinfo_get_by_brick(*src_brick, *volinfo, + src_brickinfo, _gf_false); + if (ret) { + snprintf(msg, sizeof(msg), + "brick: %s does not exist in " + "volume: %s", + *src_brick, *volname); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_NOT_FOUND, + "Brick=%s, Volume=%s", *src_brick, *volname, NULL); + *op_errstr = gf_strdup(msg); + goto out; + } + + if (gf_is_local_addr((*src_brickinfo)->hostname)) { + gf_msg_debug(this->name, 0, "I AM THE SOURCE HOST"); + if ((*src_brickinfo)->port && rsp_dict) { + ret = dict_set_int32n(rsp_dict, "src-brick-port", + SLEN("src-brick-port"), + (*src_brickinfo)->port); + if (ret) { + gf_msg_debug(this->name, 0, "Could not set src-brick-port=%d", + (*src_brickinfo)->port); + } + } + + v = *volinfo; + b = *src_brickinfo; + GLUSTERD_GET_BRICK_PIDFILE(pidfile, v, b, priv); + } + + ret = 0; +out: + return ret; +} + +int +glusterd_get_dst_brick_info(char **dst_brick, char *volname, char **op_errstr, + glusterd_brickinfo_t **dst_brickinfo, char **host, + dict_t *dict, char **dup_dstbrick) +{ + char *path = NULL; + char *c = NULL; + char msg[2048] = ""; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + int ret = 0; + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + GF_ASSERT(priv); + + ret = dict_get_strn(dict, "dst-brick", SLEN("dst-brick"), dst_brick); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get dest brick."); + goto out; + } + + gf_msg_debug(this->name, 0, "dst brick=%s", *dst_brick); + + if (!glusterd_store_is_valid_brickpath(volname, *dst_brick) || + !glusterd_is_valid_volfpath(volname, *dst_brick)) { + snprintf(msg, sizeof(msg), + "brick path %s is too " + "long.", + *dst_brick); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRKPATH_TOO_LONG, "%s", msg); + *op_errstr = gf_strdup(msg); + + ret = -1; + goto out; + } + + *dup_dstbrick = gf_strdup(*dst_brick); + if (!*dup_dstbrick) { + ret = -1; + goto out; + } + + /* + * IPv4 address contains '.' and ipv6 addresses contains ':' + * So finding the last occurrence of ':' to + * mark the start of brick path + */ + c = strrchr(*dup_dstbrick, ':'); + if (c != NULL) { + c[0] = '\0'; + *host = *dup_dstbrick; + path = c++; + } + + if (!host || !path) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BAD_FORMAT, + "dst brick %s is not of " + "form <HOSTNAME>:<export-dir>", + *dst_brick); + ret = -1; + goto out; + } + + ret = glusterd_brickinfo_new_from_brick(*dst_brick, dst_brickinfo, _gf_true, + NULL); + if (ret) + goto out; + + ret = 0; +out: + return ret; +} + +int +glusterd_get_volinfo_from_brick(char *brick, glusterd_volinfo_t **volinfo) +{ + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + glusterd_volinfo_t *voliter = NULL; + glusterd_brickinfo_t *brickiter = NULL; + glusterd_snap_t *snap = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, out); + + /* First check for normal volumes */ + cds_list_for_each_entry(voliter, &conf->volumes, vol_list) + { + cds_list_for_each_entry(brickiter, &voliter->bricks, brick_list) + { + if (gf_uuid_compare(brickiter->uuid, MY_UUID)) + continue; + if (!strcmp(brickiter->path, brick)) { + *volinfo = voliter; + return 0; + } + } + } + /* In case normal volume is not found, check for snapshot volumes */ + cds_list_for_each_entry(snap, &conf->snapshots, snap_list) + { + cds_list_for_each_entry(voliter, &snap->volumes, vol_list) + { + cds_list_for_each_entry(brickiter, &voliter->bricks, brick_list) + { + if (gf_uuid_compare(brickiter->uuid, MY_UUID)) + continue; + if (!strcmp(brickiter->path, brick)) { + *volinfo = voliter; + return 0; + } + } + } + } + +out: + return ret; +} + +glusterd_op_t +gd_cli_to_gd_op(char *cli_op) +{ + if (!strcmp(cli_op, "GF_RESET_OP_START") || + !strcmp(cli_op, "GF_RESET_OP_COMMIT") || + !strcmp(cli_op, "GF_RESET_OP_COMMIT_FORCE")) { + return GD_OP_RESET_BRICK; + } + + if (!strcmp(cli_op, "GF_REPLACE_OP_COMMIT_FORCE")) + return GD_OP_REPLACE_BRICK; + + return -1; +} + +char * +gd_rb_op_to_str(char *op) +{ + if (!strcmp(op, "GF_RESET_OP_START")) + return "reset-brick start"; + if (!strcmp(op, "GF_RESET_OP_COMMIT")) + return "reset-brick commit"; + if (!strcmp(op, "GF_RESET_OP_COMMIT_FORCE")) + return "reset-brick commit force"; + if (!strcmp(op, "GF_REPLACE_OP_COMMIT_FORCE")) + return "replace-brick commit force"; + return NULL; +} + +gf_boolean_t +glusterd_is_profile_on(glusterd_volinfo_t *volinfo) +{ + int ret = -1; + gf_boolean_t is_latency_on = _gf_false; + gf_boolean_t is_fd_stats_on = _gf_false; + + GF_ASSERT(volinfo); + + ret = glusterd_volinfo_get_boolean(volinfo, VKEY_DIAG_CNT_FOP_HITS); + if (ret != -1) + is_fd_stats_on = ret; + ret = glusterd_volinfo_get_boolean(volinfo, VKEY_DIAG_LAT_MEASUREMENT); + if (ret != -1) + is_latency_on = ret; + if ((_gf_true == is_latency_on) && (_gf_true == is_fd_stats_on)) + return _gf_true; + return _gf_false; } int32_t -glusterd_delete_brick (glusterd_volinfo_t* volinfo, - glusterd_brickinfo_t *brickinfo) -{ - int ret = 0; - GF_ASSERT (volinfo); - GF_ASSERT (brickinfo); - -#ifdef DEBUG - ret = glusterd_volume_brickinfo_get (brickinfo->uuid, - brickinfo->hostname, - brickinfo->path, volinfo, NULL); - GF_ASSERT (0 == ret); -#endif - glusterd_delete_volfile (volinfo, brickinfo); - glusterd_store_delete_brick (volinfo, brickinfo); - glusterd_brickinfo_delete (brickinfo); - volinfo->brick_count--; - return ret; +glusterd_add_shd_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict, + int32_t count) +{ + int ret = -1; + int32_t pid = -1; + int32_t brick_online = -1; + char key[64] = {0}; + int keylen; + char *pidfile = NULL; + xlator_t *this = NULL; + char *uuid_str = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO(THIS->name, this, out); + + GF_VALIDATE_OR_GOTO(this->name, volinfo, out); + GF_VALIDATE_OR_GOTO(this->name, dict, out); + + keylen = snprintf(key, sizeof(key), "brick%d.hostname", count); + ret = dict_set_nstrn(dict, key, keylen, "Self-heal Daemon", + SLEN("Self-heal Daemon")); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s", + key, NULL); + goto out; + } + + keylen = snprintf(key, sizeof(key), "brick%d.path", count); + uuid_str = gf_strdup(uuid_utoa(MY_UUID)); + if (!uuid_str) { + ret = -1; + goto out; + } + ret = dict_set_dynstrn(dict, key, keylen, uuid_str); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s", + key, NULL); + goto out; + } + uuid_str = NULL; + + /* shd doesn't have a port. but the cli needs a port key with + * a zero value to parse. + * */ + + keylen = snprintf(key, sizeof(key), "brick%d.port", count); + ret = dict_set_int32n(dict, key, keylen, 0); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s", + key, NULL); + goto out; + } + + pidfile = volinfo->shd.svc.proc.pidfile; + + brick_online = gf_is_service_running(pidfile, &pid); + + /* If shd is not running, then don't print the pid */ + if (!brick_online) + pid = -1; + keylen = snprintf(key, sizeof(key), "brick%d.pid", count); + ret = dict_set_int32n(dict, key, keylen, pid); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s", + key, NULL); + goto out; + } + + keylen = snprintf(key, sizeof(key), "brick%d.status", count); + ret = dict_set_int32n(dict, key, keylen, brick_online); + +out: + if (uuid_str) + GF_FREE(uuid_str); + if (ret) + gf_msg(this ? this->name : "glusterd", GF_LOG_ERROR, 0, + GD_MSG_DICT_SET_FAILED, + "Returning %d. adding values to dict failed", ret); + + return ret; } +static gf_ai_compare_t +glusterd_compare_addrinfo(struct addrinfo *first, struct addrinfo *next) +{ + int ret = -1; + struct addrinfo *tmp1 = NULL; + struct addrinfo *tmp2 = NULL; + char firstip[NI_MAXHOST] = {0.}; + char nextip[NI_MAXHOST] = { + 0, + }; + + for (tmp1 = first; tmp1 != NULL; tmp1 = tmp1->ai_next) { + ret = getnameinfo(tmp1->ai_addr, tmp1->ai_addrlen, firstip, NI_MAXHOST, + NULL, 0, NI_NUMERICHOST); + if (ret) + return GF_AI_COMPARE_ERROR; + for (tmp2 = next; tmp2 != NULL; tmp2 = tmp2->ai_next) { + ret = getnameinfo(tmp2->ai_addr, tmp2->ai_addrlen, nextip, + NI_MAXHOST, NULL, 0, NI_NUMERICHOST); + if (ret) + return GF_AI_COMPARE_ERROR; + if (!strcmp(firstip, nextip)) { + return GF_AI_COMPARE_MATCH; + } + } + } + return GF_AI_COMPARE_NO_MATCH; +} + +/* Check for non optimal brick order for Replicate/Disperse : + * Checks if bricks belonging to a replicate or disperse + * volume are present on the same server + */ int32_t -glusterd_delete_all_bricks (glusterd_volinfo_t* volinfo) +glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type, + char **volname, char **brick_list, + int32_t *brick_count, int32_t sub_count) { - int ret = 0; - glusterd_brickinfo_t *brickinfo = NULL; - glusterd_brickinfo_t *tmp = NULL; + int ret = -1; + int i = 0; + int j = 0; + int k = 0; + xlator_t *this = NULL; + addrinfo_list_t *ai_list = NULL; + addrinfo_list_t *ai_list_tmp1 = NULL; + addrinfo_list_t *ai_list_tmp2 = NULL; + char *brick = NULL; + char *brick_list_dup = NULL; + char *brick_list_ptr = NULL; + char *tmpptr = NULL; + struct addrinfo *ai_info = NULL; + char brick_addr[128] = { + 0, + }; + int addrlen = 0; + + const char failed_string[2048] = + "Failed to perform brick order " + "check. Use 'force' at the end of the command" + " if you want to override this behavior. "; + const char found_string[2048] = + "Multiple bricks of a %s " + "volume are present on the same server. This " + "setup is not optimal. Bricks should be on " + "different nodes to have best fault tolerant " + "configuration. Use 'force' at the end of the " + "command if you want to override this " + "behavior. "; + + this = THIS; + + GF_ASSERT(this); + + ai_list = MALLOC(sizeof(addrinfo_list_t)); + ai_list->info = NULL; + CDS_INIT_LIST_HEAD(&ai_list->list); + + if (!(*volname)) { + ret = dict_get_strn(dict, "volname", SLEN("volname"), &(*volname)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get volume name"); + goto out; + } + } - GF_ASSERT (volinfo); + if (!(*brick_list)) { + ret = dict_get_strn(dict, "bricks", SLEN("bricks"), &(*brick_list)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Bricks check : Could not " + "retrieve bricks list"); + goto out; + } + } - list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks, brick_list) { - ret = glusterd_delete_brick (volinfo, brickinfo); + if (!(*brick_count)) { + ret = dict_get_int32n(dict, "count", SLEN("count"), &(*brick_count)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Bricks check : Could not " + "retrieve brick count"); + goto out; } - return ret; + } + + brick_list_dup = brick_list_ptr = gf_strdup(*brick_list); + /* Resolve hostnames and get addrinfo */ + while (i < *brick_count) { + ++i; + brick = strtok_r(brick_list_dup, " \n", &tmpptr); + brick_list_dup = tmpptr; + if (brick == NULL) + goto check_failed; + tmpptr = strrchr(brick, ':'); + if (tmpptr == NULL) + goto check_failed; + addrlen = strlen(brick) - strlen(tmpptr); + strncpy(brick_addr, brick, addrlen); + brick_addr[addrlen] = '\0'; + ret = getaddrinfo(brick_addr, NULL, NULL, &ai_info); + if (ret != 0) { + ret = 0; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_HOSTNAME_RESOLVE_FAIL, + "unable to resolve host name for addr %s", brick_addr); + goto out; + } + ai_list_tmp1 = MALLOC(sizeof(addrinfo_list_t)); + if (ai_list_tmp1 == NULL) { + ret = 0; + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "failed to allocate " + "memory"); + freeaddrinfo(ai_info); + goto out; + } + ai_list_tmp1->info = ai_info; + cds_list_add_tail(&ai_list_tmp1->list, &ai_list->list); + ai_list_tmp1 = NULL; + } + + i = 0; + ai_list_tmp1 = cds_list_entry(ai_list->list.next, addrinfo_list_t, list); + + if (*brick_count < sub_count) { + sub_count = *brick_count; + } + + /* Check for bad brick order */ + while (i < *brick_count) { + ++i; + ai_info = ai_list_tmp1->info; + ai_list_tmp1 = cds_list_entry(ai_list_tmp1->list.next, addrinfo_list_t, + list); + if (0 == i % sub_count) { + j = 0; + continue; + } + ai_list_tmp2 = ai_list_tmp1; + k = j; + while (k < sub_count - 1) { + ++k; + ret = glusterd_compare_addrinfo(ai_info, ai_list_tmp2->info); + if (GF_AI_COMPARE_ERROR == ret) + goto check_failed; + if (GF_AI_COMPARE_MATCH == ret) + goto found_bad_brick_order; + ai_list_tmp2 = cds_list_entry(ai_list_tmp2->list.next, + addrinfo_list_t, list); + } + ++j; + } + gf_msg_debug(this->name, 0, "Brick order okay"); + ret = 0; + goto out; + +check_failed: + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BAD_BRKORDER_CHECK_FAIL, + "Failed bad brick order check"); + snprintf(err_str, sizeof(failed_string), failed_string); + ret = -1; + goto out; + +found_bad_brick_order: + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_BAD_BRKORDER, + "Bad brick order found"); + if (type == GF_CLUSTER_TYPE_DISPERSE) { + snprintf(err_str, sizeof(found_string), found_string, "disperse"); + } else { + snprintf(err_str, sizeof(found_string), found_string, "replicate"); + } + + ret = -1; +out: + ai_list_tmp2 = NULL; + GF_FREE(brick_list_ptr); + cds_list_for_each_entry(ai_list_tmp1, &ai_list->list, list) + { + if (ai_list_tmp1->info) + freeaddrinfo(ai_list_tmp1->info); + free(ai_list_tmp2); + ai_list_tmp2 = ai_list_tmp1; + } + free(ai_list); + free(ai_list_tmp2); + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; +} + +static gf_boolean_t +search_peer_in_auth_list(char *peer_hostname, char *auth_allow_list) +{ + if (strstr(auth_allow_list, peer_hostname)) { + return _gf_true; + } + + return _gf_false; +} + +/* glusterd_add_peers_to_auth_list() adds peers into auth.allow list + * if auth.allow list is not empty. This is called for add-brick and + * replica brick operations to avoid failing the temporary mount. New + * volfiles will be generated and clients are notified reg new volfiles. + */ +void +glusterd_add_peers_to_auth_list(char *volname) +{ + int ret = 0; + glusterd_volinfo_t *volinfo = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + int32_t len = 0; + char *auth_allow_list = NULL; + char *new_auth_allow_list = NULL; + + this = THIS; + GF_ASSERT(this); + conf = this->private; + GF_ASSERT(conf); + + GF_VALIDATE_OR_GOTO(this->name, volname, out); + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, + "Unable to find volume: %s", volname); + goto out; + } + + ret = dict_get_str_sizen(volinfo->dict, "auth.allow", &auth_allow_list); + if (ret) { + gf_msg(this->name, GF_LOG_INFO, errno, GD_MSG_DICT_GET_FAILED, + "auth allow list is not set"); + goto out; + } + cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list) + { + len += strlen(peerinfo->hostname); + } + len += strlen(auth_allow_list) + 1; + + new_auth_allow_list = GF_CALLOC(1, len, gf_common_mt_char); + + new_auth_allow_list = strncat(new_auth_allow_list, auth_allow_list, + strlen(auth_allow_list)); + cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list) + { + ret = search_peer_in_auth_list(peerinfo->hostname, new_auth_allow_list); + if (!ret) { + gf_log(this->name, GF_LOG_DEBUG, + "peer %s not found in auth.allow list", peerinfo->hostname); + new_auth_allow_list = strcat(new_auth_allow_list, ","); + new_auth_allow_list = strncat(new_auth_allow_list, + peerinfo->hostname, + strlen(peerinfo->hostname)); + } + } + if (strcmp(new_auth_allow_list, auth_allow_list) != 0) { + /* In case, new_auth_allow_list is not same as auth_allow_list, + * we need to update the volinfo->dict with new_auth_allow_list. + * we delete the auth_allow_list and replace it with + * new_auth_allow_list. for reverting the changes in post commit, we + * keep the copy of auth_allow_list as old_auth_allow_list in + * volinfo->dict. + */ + dict_del_sizen(volinfo->dict, "auth.allow"); + ret = dict_set_strn(volinfo->dict, "auth.allow", SLEN("auth.allow"), + new_auth_allow_list); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Unable to set new auth.allow list"); + goto out; + } + ret = dict_set_strn(volinfo->dict, "old.auth.allow", + SLEN("old.auth.allow"), auth_allow_list); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Unable to set old auth.allow list"); + goto out; + } + ret = glusterd_create_volfiles_and_notify_services(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOLFILE_CREATE_FAIL, + "failed to create volfiles"); + goto out; + } + } +out: + GF_FREE(new_auth_allow_list); + return; +} + +int +glusterd_replace_old_auth_allow_list(char *volname) +{ + int ret = 0; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + char *old_auth_allow_list = NULL; + + this = THIS; + GF_ASSERT(this); + + GF_VALIDATE_OR_GOTO(this->name, volname, out); + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, + "Unable to find volume: %s", volname); + goto out; + } + + ret = dict_get_str_sizen(volinfo->dict, "old.auth.allow", + &old_auth_allow_list); + if (ret) { + gf_msg(this->name, GF_LOG_INFO, errno, GD_MSG_DICT_GET_FAILED, + "old auth allow list is not set, no need to replace the list"); + ret = 0; + goto out; + } + + dict_del_sizen(volinfo->dict, "auth.allow"); + ret = dict_set_strn(volinfo->dict, "auth.allow", SLEN("auth.allow"), + old_auth_allow_list); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Unable to replace auth.allow list"); + goto out; + } + + dict_del_sizen(volinfo->dict, "old.auth.allow"); + + ret = glusterd_create_volfiles_and_notify_services(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOLFILE_CREATE_FAIL, + "failed to create volfiles"); + goto out; + } + ret = glusterd_store_volinfo(volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOLINFO_STORE_FAIL, + "failed to store volinfo"); + goto out; + } +out: + return ret; } diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index ffb9e971aff..bf6ac295e26 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -1,272 +1,865 @@ /* - Copyright (c) 2006-2010 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ #ifndef _GLUSTERD_UTILS_H -#define _GLUSTERD_UTILS_H_ - -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif +#define _GLUSTERD_UTILS_H #include <pthread.h> -#include "uuid.h" - -#include "glusterfs.h" -#include "xlator.h" -#include "logging.h" -#include "call-stub.h" -#include "fd.h" -#include "byte-order.h" +#include <glusterfs/compat-uuid.h> + +#include <glusterfs/glusterfs.h> +#include <glusterfs/xlator.h> +#include <glusterfs/logging.h> +#include <glusterfs/call-stub.h> +#include <glusterfs/byte-order.h> #include "glusterd.h" #include "rpc-clnt.h" #include "protocol-common.h" +#include "glusterfs3-xdr.h" +#include "glusterd-peer-utils.h" + +#define GLUSTERD_SOCK_DIR "/var/run/gluster" +#define GLUSTERD_ASSIGN_BRICKID_TO_BRICKINFO(brickinfo, volinfo, brickid) \ + do { \ + sprintf(brickinfo->brick_id, "%s-client-%d", volinfo->volname, \ + brickid); \ + } while (0) + +#define GLUSTERD_ASSIGN_BRICKID_TO_TA_BRICKINFO(ta_brickinfo, volinfo, \ + brickid) \ + do { \ + sprintf(ta_brickinfo->brick_id, "%s-ta-%d", volinfo->volname, \ + brickid); \ + } while (0) + +#define ALL_VOLUME_OPTION_CHECK(volname, get_opt, key, ret, op_errstr, label) \ + do { \ + gf_boolean_t _all = !strcmp("all", volname); \ + gf_boolean_t _key_all = !strcmp(key, "all"); \ + gf_boolean_t _is_valid_opt = _gf_false; \ + int32_t i = 0; \ + \ + if (!get_opt && \ + (_key_all || !strcmp(key, GLUSTERD_MAX_OP_VERSION_KEY))) { \ + ret = -1; \ + *op_errstr = gf_strdup("Not a valid option to set"); \ + goto out; \ + } \ + if (_key_all) { \ + _is_valid_opt = _gf_true; \ + } else { \ + for (i = 0; valid_all_vol_opts[i].option; i++) { \ + if (!strcmp(key, valid_all_vol_opts[i].option)) { \ + _is_valid_opt = _gf_true; \ + break; \ + } \ + } \ + } \ + if (_all && !_is_valid_opt) { \ + ret = -1; \ + *op_errstr = gf_strdup("Not a valid option for all volumes"); \ + goto label; \ + } else if (!_all && _is_valid_opt) { \ + ret = -1; \ + *op_errstr = gf_strdup("Not a valid option for single volume"); \ + goto label; \ + } \ + } while (0) + struct glusterd_lock_ { - uuid_t owner; - time_t timestamp; + uuid_t owner; + time_t timestamp; }; -typedef struct glusterd_volopt_ctx_ { - dict_t *dict; - int count; - int opt_count; -} glusterd_volopt_ctx_t; +typedef struct glusterd_dict_ctx_ { + dict_t *dict; + int opt_count; + char *key_name; + char *val_name; + char *prefix; +} glusterd_dict_ctx_t; -typedef int (*glusterd_condition_func) (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo, - void *ctx); +gf_boolean_t +is_brick_mx_enabled(void); +int +glusterd_compare_lines(const void *a, const void *b); + +typedef int (*glusterd_condition_func)(glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo, + void *ctx); typedef struct glusterd_lock_ glusterd_lock_t; int32_t -glusterd_lock (uuid_t new_owner); +glusterd_get_lock_owner(uuid_t *cur_owner); int32_t -glusterd_unlock (uuid_t owner); +glusterd_lock(uuid_t new_owner); int32_t -glusterd_get_uuid (uuid_t *uuid); +glusterd_unlock(uuid_t owner); + +int32_t +glusterd_get_uuid(uuid_t *uuid); + +char * +gd_get_shd_key(int type); int -glusterd_submit_reply (rpcsvc_request_t *req, void *arg, - struct iovec *payload, int payloadcount, - struct iobref *iobref, gd_serialize_t sfunc); +glusterd_submit_reply(rpcsvc_request_t *req, void *arg, struct iovec *payload, + int payloadcount, struct iobref *iobref, + xdrproc_t xdrproc); int -glusterd_submit_request (struct rpc_clnt *rpc, void *req, - call_frame_t *frame, rpc_clnt_prog_t *prog, - int procnum, struct iobref *iobref, - gd_serialize_t sfunc, xlator_t *this, - fop_cbk_fn_t cbkfn); +glusterd_to_cli(rpcsvc_request_t *req, gf_cli_rsp *arg, struct iovec *payload, + int payloadcount, struct iobref *iobref, xdrproc_t xdrproc, + dict_t *dict); + +int +glusterd_submit_request(struct rpc_clnt *rpc, void *req, call_frame_t *frame, + rpc_clnt_prog_t *prog, int procnum, + struct iobref *iobref, xlator_t *this, + fop_cbk_fn_t cbkfn, xdrproc_t xdrproc); int32_t -glusterd_volinfo_new (glusterd_volinfo_t **volinfo); +glusterd_volinfo_new(glusterd_volinfo_t **volinfo); -gf_boolean_t -glusterd_check_volume_exists (char *volname); +int32_t +glusterd_volinfo_dup(glusterd_volinfo_t *volinfo, + glusterd_volinfo_t **dup_volinfo, + gf_boolean_t set_userauth); + +char * +glusterd_auth_get_username(glusterd_volinfo_t *volinfo); + +char * +glusterd_auth_get_password(glusterd_volinfo_t *volinfo); int32_t -glusterd_brickinfo_new (glusterd_brickinfo_t **brickinfo); +glusterd_auth_set_username(glusterd_volinfo_t *volinfo, char *username); int32_t -glusterd_brickinfo_from_brick (char *brick, glusterd_brickinfo_t **brickinfo); +glusterd_auth_set_password(glusterd_volinfo_t *volinfo, char *password); + +void +glusterd_auth_cleanup(glusterd_volinfo_t *volinfo); int32_t -glusterd_friend_cleanup (glusterd_peerinfo_t *peerinfo); +glusterd_brickprocess_new(glusterd_brick_proc_t **brickprocess); int32_t -glusterd_peer_destroy (glusterd_peerinfo_t *peerinfo); +glusterd_brickinfo_new(glusterd_brickinfo_t **brickinfo); int32_t -glusterd_peer_hostname_new (char *hostname, glusterd_peer_hostname_t **name); +glusterd_brickinfo_new_from_brick(char *brick, glusterd_brickinfo_t **brickinfo, + gf_boolean_t construct_real_path, + char **op_errstr); int32_t -glusterd_volinfo_find (char *volname, glusterd_volinfo_t **volinfo); +glusterd_volinfo_find(const char *volname, glusterd_volinfo_t **volinfo); + +gf_boolean_t +glusterd_volume_exists(const char *volname); + +int +glusterd_volinfo_find_by_volume_id(uuid_t volume_id, + glusterd_volinfo_t **volinfo); int32_t glusterd_service_stop(const char *service, char *pidfile, int sig, gf_boolean_t force_kill); int32_t -glusterd_resolve_brick (glusterd_brickinfo_t *brickinfo); +glusterd_service_stop_nolock(const char *service, char *pidfile, int sig, + gf_boolean_t force_kill); + +int +glusterd_get_next_available_brickid(glusterd_volinfo_t *volinfo); int32_t -glusterd_volume_start_glusterfs (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo); +glusterd_resolve_brick(glusterd_brickinfo_t *brickinfo); + +int +glusterd_brick_process_add_brick(glusterd_brickinfo_t *brickinfo, + glusterd_brickinfo_t *parent_brickinfo); + +int +glusterd_brick_process_remove_brick(glusterd_brickinfo_t *brickinfo, + int *last_brick); + +int +glusterd_brick_proc_for_port(int port, glusterd_brick_proc_t **brickprocess); + +int32_t +glusterd_volume_start_glusterfs(glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo, + gf_boolean_t wait); int32_t -glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo); +glusterd_volume_stop_glusterfs(glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo, + gf_boolean_t del_brick); + +int +send_attach_req(xlator_t *this, struct rpc_clnt *rpc, char *path, + glusterd_brickinfo_t *brick, glusterd_brickinfo_t *other_brick, + int op); + +glusterd_volinfo_t * +glusterd_volinfo_ref(glusterd_volinfo_t *volinfo); + +glusterd_volinfo_t * +glusterd_volinfo_unref(glusterd_volinfo_t *volinfo); int32_t -glusterd_volinfo_delete (glusterd_volinfo_t *volinfo); +glusterd_volinfo_delete(glusterd_volinfo_t *volinfo); int32_t -glusterd_brickinfo_delete (glusterd_brickinfo_t *brickinfo); +glusterd_brickinfo_delete(glusterd_brickinfo_t *brickinfo); gf_boolean_t -glusterd_is_cli_op_req (int32_t op); +glusterd_is_cli_op_req(int32_t op); int32_t -glusterd_volume_brickinfo_get_by_brick (char *brick, - glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t **brickinfo); +glusterd_volume_brickinfo_get_by_brick(char *brick, glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t **brickinfo, + gf_boolean_t construct_real_path); + int32_t -glusterd_is_local_addr (char *hostname); +glusterd_add_volumes_to_export_dict(dict_t *peer_data, char **buf, + u_int *length); int32_t -glusterd_build_volume_dict (dict_t **vols); +glusterd_compare_friend_data(dict_t *peer_data, int32_t *status, + char *hostname); + +int +glusterd_compute_cksum(glusterd_volinfo_t *volinfo, gf_boolean_t is_quota_conf); + +void +glusterd_set_socket_filepath(char *sock_filepath, char *sockpath, size_t len); + +struct rpc_clnt * +glusterd_pending_node_get_rpc(glusterd_pending_node_t *pending_node); + +void +glusterd_pending_node_put_rpc(glusterd_pending_node_t *pending_node); + +int +glusterd_remote_hostname_get(rpcsvc_request_t *req, char *remote_host, int len); int32_t -glusterd_compare_friend_data (dict_t *vols, int32_t *status); +glusterd_import_friend_volumes_synctask(void *opaque); +int32_t +glusterd_import_friend_volumes(dict_t *peer_data); +void +glusterd_set_volume_status(glusterd_volinfo_t *volinfo, + glusterd_volume_status status); + +int32_t +glusterd_volume_count_get(void); +int32_t +glusterd_add_volume_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict, + int32_t count, char *prefix); int -glusterd_volume_compute_cksum (glusterd_volinfo_t *volinfo); +glusterd_get_brickinfo(xlator_t *this, const char *brickname, int port, + glusterd_brickinfo_t **brickinfo); + +void +glusterd_set_brick_status(glusterd_brickinfo_t *brickinfo, + gf_brick_status_t status); gf_boolean_t -glusterd_is_nfs_started (); +glusterd_is_brick_started(glusterd_brickinfo_t *brickinfo); -int32_t -glusterd_nfs_server_start (); +int +glusterd_friend_brick_belongs(glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo, void *uuid); +int +glusterd_all_volume_cond_check(glusterd_condition_func func, int status, + void *ctx); +int +glusterd_brick_start(glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo, gf_boolean_t wait, + gf_boolean_t only_connect); +int +glusterd_brick_stop(glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo, gf_boolean_t del_brick); + +int +glusterd_is_defrag_on(glusterd_volinfo_t *volinfo); int32_t -glusterd_nfs_server_stop (); +glusterd_volinfo_bricks_delete(glusterd_volinfo_t *volinfo); int -glusterd_remote_hostname_get (rpcsvc_request_t *req, - char *remote_host, int len); +glusterd_new_brick_validate(char *brick, glusterd_brickinfo_t *brickinfo, + char *op_errstr, size_t len, char *op); +int32_t +glusterd_volume_brickinfos_delete(glusterd_volinfo_t *volinfo); + int32_t -glusterd_import_friend_volumes (dict_t *vols); +glusterd_volume_brickinfo_get(uuid_t uuid, char *hostname, char *path, + glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t **brickinfo); + +int +glusterd_brickinfo_get(uuid_t uuid, char *hostname, char *path, + glusterd_brickinfo_t **brickinfo); + +int +glusterd_rb_check_bricks(glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *src_brick, + glusterd_brickinfo_t *dst_brick); + +int +glusterd_check_and_set_brick_xattr(char *host, char *path, uuid_t uuid, + char **op_errstr, gf_boolean_t is_force); + +int +glusterd_validate_and_create_brickpath(glusterd_brickinfo_t *brickinfo, + uuid_t volume_id, char *volname, + char **op_errstr, gf_boolean_t is_force, + gf_boolean_t ignore_partition); +int +glusterd_sm_tr_log_transition_add(glusterd_sm_tr_log_t *log, int old_state, + int new_state, int event); +int +glusterd_sm_tr_log_init(glusterd_sm_tr_log_t *log, char *(*state_name_get)(int), + char *(*event_name_get)(int), size_t size); void -glusterd_set_volume_status (glusterd_volinfo_t *volinfo, - glusterd_volume_status status); +glusterd_sm_tr_log_delete(glusterd_sm_tr_log_t *log); + +int +glusterd_sm_tr_log_add_to_dict(dict_t *dict, + glusterd_sm_tr_log_t *circular_log); int -glusterd_check_generate_start_nfs (void); +glusterd_remove_pending_entry(struct cds_list_head *list, void *elem); +int +glusterd_clear_pending_nodes(struct cds_list_head *list); +int32_t +glusterd_brick_connect(glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo, char *socketpath); int32_t -glusterd_volume_count_get (void); +glusterd_brick_disconnect(glusterd_brickinfo_t *brickinfo); int32_t -glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, - dict_t *dict, int32_t count); +glusterd_delete_volume(glusterd_volinfo_t *volinfo); +int32_t +glusterd_delete_brick(glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo); + +int32_t +glusterd_delete_all_bricks(glusterd_volinfo_t *volinfo); + int -glusterd_get_brickinfo (xlator_t *this, const char *brickname, - int port, gf_boolean_t localhost, - glusterd_brickinfo_t **brickinfo); +glusterd_spawn_daemons(void *opaque); -void -glusterd_set_brick_status (glusterd_brickinfo_t *brickinfo, - gf_brick_status_t status); +int +glusterd_restart_gsyncds(glusterd_conf_t *conf); + +int +glusterd_start_gsync(glusterd_volinfo_t *master_vol, char *slave, + char *path_list, char *conf_path, char *glusterd_uuid_str, + char **op_errstr, gf_boolean_t is_pause); +int +glusterd_get_local_brickpaths(glusterd_volinfo_t *volinfo, char **pathlist); + +int32_t +glusterd_recreate_bricks(glusterd_conf_t *conf); +int32_t +glusterd_handle_upgrade_downgrade(dict_t *options, glusterd_conf_t *conf, + gf_boolean_t upgrade, gf_boolean_t downgrade); + +int +glusterd_add_brick_detail_to_dict(glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo, dict_t *dict, + int32_t count); + +int32_t +glusterd_add_brick_to_dict(glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo, dict_t *dict, + int32_t count); + +int32_t +glusterd_get_all_volnames(dict_t *dict); gf_boolean_t -glusterd_is_brick_started (glusterd_brickinfo_t *brickinfo); +glusterd_is_fuse_available(); int -glusterd_friend_find_by_hostname (const char *hoststr, - glusterd_peerinfo_t **peerinfo); +glusterd_brick_statedump(glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo, char *options, + int option_cnt, char **op_errstr); + int -glusterd_hostname_to_uuid (char *hostname, uuid_t uuid); +glusterd_brick_terminate(glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo, char *options, + int option_cnt, char **op_errstr); +#ifdef BUILD_GNFS int -glusterd_friend_brick_belongs (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo, void *uuid); +glusterd_nfs_statedump(char *options, int option_cnt, char **op_errstr); +#endif + int -glusterd_all_volume_cond_check (glusterd_condition_func func, int status, - void *ctx); +glusterd_client_statedump(char *volname, char *options, int option_cnt, + char **op_errstr); + int -glusterd_brick_start (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo); +glusterd_quotad_statedump(char *options, int option_cnt, char **op_errstr); + +gf_boolean_t +glusterd_is_volume_replicate(glusterd_volinfo_t *volinfo); + +gf_boolean_t +glusterd_is_brick_decommissioned(glusterd_volinfo_t *volinfo, char *hostname, + char *path); int -glusterd_brick_stop (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo); +glusterd_friend_contains_vol_bricks(glusterd_volinfo_t *volinfo, + uuid_t friend_uuid); int -glusterd_is_defrag_on (glusterd_volinfo_t *volinfo); +glusterd_friend_contains_snap_bricks(glusterd_snap_t *snapinfo, + uuid_t friend_uuid); +int +glusterd_friend_remove_cleanup_vols(uuid_t uuid); + +int +glusterd_get_client_filepath(char *filepath, glusterd_volinfo_t *volinfo, + gf_transport_type type); +int +glusterd_get_trusted_client_filepath(char *filepath, + glusterd_volinfo_t *volinfo, + gf_transport_type type); +int +glusterd_restart_rebalance(glusterd_conf_t *conf); + +int +glusterd_restart_rebalance_for_volume(glusterd_volinfo_t *volinfo); + +void +glusterd_defrag_info_set(glusterd_volinfo_t *volinfo, dict_t *dict, int cmd, + int status, int op); int32_t -glusterd_volinfo_bricks_delete (glusterd_volinfo_t *volinfo); +glusterd_add_bricks_hname_path_to_dict(dict_t *dict, + glusterd_volinfo_t *volinfo); + +int +glusterd_add_node_to_dict(char *server, dict_t *dict, int count, + dict_t *vol_opts); + +int +glusterd_calc_dist_leaf_count(int rcount, int scount); + int -glusterd_friend_find_by_uuid (uuid_t uuid, - glusterd_peerinfo_t **peerinfo); +glusterd_get_dist_leaf_count(glusterd_volinfo_t *volinfo); + +glusterd_brickinfo_t * +glusterd_get_brickinfo_by_position(glusterd_volinfo_t *volinfo, uint32_t pos); + +gf_boolean_t +glusterd_is_local_brick(xlator_t *this, glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo); +int +glusterd_validate_volume_id(dict_t *op_dict, glusterd_volinfo_t *volinfo); + +int +glusterd_defrag_volume_status_update(glusterd_volinfo_t *volinfo, + dict_t *rsp_dict, int32_t cmd); + +int +glusterd_check_files_identical(char *filename1, char *filename2, + gf_boolean_t *identical); + int -glusterd_new_brick_validate (char *brick, glusterd_brickinfo_t *brickinfo, - char *op_errstr, size_t len); +glusterd_check_topology_identical(const char *filename1, const char *filename2, + gf_boolean_t *identical); + +void +glusterd_volinfo_reset_defrag_stats(glusterd_volinfo_t *volinfo); +int +glusterd_volset_help(dict_t *dict, char **op_errstr); + +int32_t +glusterd_sync_use_rsp_dict(dict_t *aggr, dict_t *rsp_dict); int32_t -glusterd_volume_brickinfos_delete (glusterd_volinfo_t *volinfo); +glusterd_gsync_use_rsp_dict(dict_t *aggr, dict_t *rsp_dict, char *op_errstr); int32_t -glusterd_volume_brickinfo_get (uuid_t uuid, char *hostname, char *path, - glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t **brickinfo); +glusterd_rb_use_rsp_dict(dict_t *aggr, dict_t *rsp_dict); int -glusterd_brickinfo_get (uuid_t uuid, char *hostname, char *path, - glusterd_brickinfo_t **brickinfo); +glusterd_profile_volume_use_rsp_dict(dict_t *aggr, dict_t *rsp_dict); +int +glusterd_volume_status_copy_to_op_ctx_dict(dict_t *aggr, dict_t *rsp_dict); +int +glusterd_volume_rebalance_use_rsp_dict(dict_t *aggr, dict_t *rsp_dict); +int +glusterd_volume_heal_use_rsp_dict(dict_t *aggr, dict_t *rsp_dict); +int +glusterd_use_rsp_dict(dict_t *aggr, dict_t *rsp_dict); +int +glusterd_sys_exec_output_rsp_dict(dict_t *aggr, dict_t *rsp_dict); +int32_t +glusterd_handle_node_rsp(dict_t *req_ctx, void *pending_entry, glusterd_op_t op, + dict_t *rsp_dict, dict_t *op_ctx, char **op_errstr, + gd_node_type type); +int +glusterd_max_opversion_use_rsp_dict(dict_t *dst, dict_t *src); + +int +glusterd_volume_bitrot_scrub_use_rsp_dict(dict_t *aggr, dict_t *rsp_dict); + +int +glusterd_volume_heal_use_rsp_dict(dict_t *aggr, dict_t *rsp_dict); + +int32_t +glusterd_check_if_quota_trans_enabled(glusterd_volinfo_t *volinfo); + +int +glusterd_volume_quota_copy_to_op_ctx_dict(dict_t *aggr, dict_t *rsp); +int +_profile_volume_add_brick_rsp(dict_t *this, char *key, data_t *value, + void *data); +int +glusterd_profile_volume_brick_rsp(void *pending_entry, dict_t *rsp_dict, + dict_t *op_ctx, char **op_errstr, + gd_node_type type); + +int32_t +glusterd_set_originator_uuid(dict_t *dict); + +/* Should be used only when an operation is in progress, as that is the only + * time a lock_owner is set + */ +gf_boolean_t +is_origin_glusterd(dict_t *dict); + +int +glusterd_get_next_global_opt_version_str(dict_t *opts, char **version_str); + +int +glusterd_generate_and_set_task_id(dict_t *dict, char *key, const int keylen); + +int +glusterd_validate_and_set_gfid(dict_t *op_ctx, dict_t *req_dict, + char **op_errstr); + +int +glusterd_copy_uuid_to_dict(uuid_t uuid, dict_t *dict, char *key, + const int keylen); + +gf_boolean_t +glusterd_is_same_address(char *name1, char *name2); + +void +gd_update_volume_op_versions(glusterd_volinfo_t *volinfo); + int -glusterd_is_rb_started (glusterd_volinfo_t *volinfo); +op_version_check(xlator_t *this, int min_op_version, char *msg, int msglen); + +gf_boolean_t +gd_is_remove_brick_committed(glusterd_volinfo_t *volinfo); int -glusterd_is_rb_paused (glusterd_volinfo_t *volinfo); +glusterd_remove_brick_validate_bricks(gf1_op_commands cmd, int32_t brick_count, + dict_t *dict, glusterd_volinfo_t *volinfo, + char **errstr, gf_cli_defrag_type); +int +glusterd_get_slave_details_confpath(glusterd_volinfo_t *volinfo, dict_t *dict, + char **slave_url, char **slave_host, + char **slave_vol, char **conf_path, + char **op_errstr); int -glusterd_set_rb_status (glusterd_volinfo_t *volinfo, gf_rb_status_t status); +glusterd_get_slave_info(char *slave, char **slave_url, char **hostname, + char **slave_vol, char **op_errstr); int -glusterd_is_replace_running (glusterd_volinfo_t *volinfo, glusterd_brickinfo_t *brickinfo); +glusterd_get_statefile_name(glusterd_volinfo_t *volinfo, char *slave, + char *conf_path, char **statefile, + gf_boolean_t *is_template_in_use); int -glusterd_rb_check_bricks (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *src_brick, glusterd_brickinfo_t *dst_brick); +glusterd_gsync_read_frm_status(char *path, char *buf, size_t blen); + int -glusterd_brick_create_path (char *host, char *path, mode_t mode, - char **op_errstr); +glusterd_create_status_file(char *master, char *slave, char *slave_url, + char *slave_vol, char *status); + int -glusterd_sm_tr_log_transition_add (glusterd_sm_tr_log_t *log, - int old_state, int new_state, - int event); +glusterd_check_restart_gsync_session(glusterd_volinfo_t *volinfo, char *slave, + dict_t *resp_dict, char *path_list, + char *conf_path, gf_boolean_t is_force); + int -glusterd_peerinfo_new (glusterd_peerinfo_t **peerinfo, - glusterd_friend_sm_state_t state, - uuid_t *uuid, const char *hostname); +glusterd_check_gsync_running_local(char *master, char *slave, char *conf_path, + gf_boolean_t *is_run); + +gf_boolean_t +glusterd_is_status_tasks_op(glusterd_op_t op, dict_t *dict); + +gf_boolean_t +gd_should_i_start_rebalance(glusterd_volinfo_t *volinfo); + int -glusterd_sm_tr_log_init (glusterd_sm_tr_log_t *log, - char * (*state_name_get) (int), - char * (*event_name_get) (int), - size_t size); +glusterd_is_volume_quota_enabled(glusterd_volinfo_t *volinfo); + +int +glusterd_is_volume_inode_quota_enabled(glusterd_volinfo_t *volinfo); + +int +glusterd_is_bitrot_enabled(glusterd_volinfo_t *volinfo); + +gf_boolean_t +glusterd_all_volumes_with_quota_stopped(); + void -glusterd_sm_tr_log_delete (glusterd_sm_tr_log_t *log); +glusterd_clean_up_quota_store(glusterd_volinfo_t *volinfo); + +int +glusterd_remove_auxiliary_mount(char *volname); + +gf_boolean_t +glusterd_status_has_tasks(int cmd); + +int +gd_stop_rebalance_process(glusterd_volinfo_t *volinfo); + +rpc_clnt_t * +glusterd_rpc_clnt_unref(glusterd_conf_t *conf, rpc_clnt_t *rpc); + +int32_t +glusterd_compare_volume_name(struct cds_list_head *, struct cds_list_head *); + +char * +glusterd_get_brick_mount_device(char *brick_path); + +struct mntent * +glusterd_get_mnt_entry_info(char *mnt_pt, char *buff, int buflen, + struct mntent *entry_ptr); + +int +glusterd_get_brick_root(char *path, char **mount_point); + +int32_t +glusterd_lvm_snapshot_remove(dict_t *rsp_dict, glusterd_volinfo_t *snap_vol); + +gf_boolean_t +gd_vol_is_geo_rep_active(glusterd_volinfo_t *volinfo); + +int32_t +glusterd_get_brick_mount_dir(char *brickpath, char *hostname, char *mount_dir); + +int32_t +glusterd_aggr_brick_mount_dirs(dict_t *aggr, dict_t *rsp_dict); + +int32_t +glusterd_take_lvm_snapshot(glusterd_brickinfo_t *brickinfo, + char *origin_brick_path); + +void +glusterd_launch_synctask(synctask_fn_t fn, void *opaque); + +int +glusterd_enable_default_options(glusterd_volinfo_t *volinfo, char *option); + +int +glusterd_unlink_file(char *sock_file_path); + +int32_t +glusterd_find_brick_mount_path(char *brick_path, char **brick_mount_path); + +/* + * Function to retrieve list of snap volnames and their uuids + */ +int +glusterd_snapshot_get_volnames_uuids(dict_t *dict, char *volname, + gf_getsnap_name_uuid_rsp *snap_info_rsp); + +int +glusterd_update_mntopts(char *brick_path, glusterd_brickinfo_t *brickinfo); + +int +glusterd_update_fs_label(glusterd_brickinfo_t *brickinfo); + +int +glusterd_get_volopt_content(dict_t *dict, gf_boolean_t xml_out); int -glusterd_sm_tr_log_add_to_dict (dict_t *dict, - glusterd_sm_tr_log_t *circular_log); +glusterd_get_global_max_op_version(rpcsvc_request_t *req, dict_t *ctx, + int count); + int -glusterd_remove_pending_entry (struct list_head *list, void *elem); +glusterd_get_global_options_for_all_vols(rpcsvc_request_t *req, dict_t *dict, + char **op_errstr); + int -glusterd_clear_pending_nodes (struct list_head *list); +glusterd_get_default_val_for_volopt(dict_t *dict, gf_boolean_t all_opts, + char *key, char *orig_key, + glusterd_volinfo_t *volinfo, + char **err_str); + +int +glusterd_check_client_op_version_support(char *volname, uint32_t op_version, + char **op_errstr); + gf_boolean_t -glusterd_peerinfo_is_uuid_unknown (glusterd_peerinfo_t *peerinfo); +glusterd_have_peers(); + +gf_boolean_t +glusterd_have_volumes(); + +void +glusterd_get_rebalance_volfile(glusterd_volinfo_t *volinfo, char *path, + int path_len); + +void +glusterd_get_gfproxy_client_volfile(glusterd_volinfo_t *volinfo, char *path, + int path_len); + int32_t -glusterd_brick_connect (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo); +glusterd_brickinfo_dup(glusterd_brickinfo_t *brickinfo, + glusterd_brickinfo_t *dup_brickinfo); + +int +glusterd_vol_add_quota_conf_to_dict(glusterd_volinfo_t *volinfo, dict_t *load, + int vol_idx, char *prefix); + int32_t -glusterd_brick_disconnect (glusterd_brickinfo_t *brickinfo); +glusterd_import_volinfo(dict_t *peer_data, int count, + glusterd_volinfo_t **volinfo, char *prefix); + +int +glusterd_import_quota_conf(dict_t *peer_data, int vol_idx, + glusterd_volinfo_t *new_volinfo, char *prefix); + +gf_boolean_t +glusterd_is_shd_compatible_volume(glusterd_volinfo_t *volinfo); + +gf_boolean_t +glusterd_is_shd_compatible_type(int type); + +gf_boolean_t +glusterd_are_all_volumes_stopped(); + +gf_boolean_t +glusterd_all_shd_compatible_volumes_stopped(); + +void +glusterd_nfs_pmap_deregister(); + +gf_boolean_t +glusterd_is_volume_started(glusterd_volinfo_t *volinfo); + +int +glusterd_volume_get_type_str(glusterd_volinfo_t *volinfo, char **vol_type_str); + +int +glusterd_volume_get_status_str(glusterd_volinfo_t *volinfo, char *status_str); + +void +glusterd_brick_get_status_str(glusterd_brickinfo_t *brickinfo, + char *status_str); + +int +glusterd_volume_get_transport_type_str(glusterd_volinfo_t *volinfo, + char *transport_type_str); + +int +glusterd_volume_get_quorum_status_str(glusterd_volinfo_t *volinfo, + char *quorum_status_str); + +int +glusterd_volume_get_rebalance_status_str(glusterd_volinfo_t *volinfo, + char *rebal_status_str); + +void +glusterd_list_add_order(struct cds_list_head *new, struct cds_list_head *head, + int (*compare)(struct cds_list_head *, + struct cds_list_head *)); + +struct rpc_clnt * +glusterd_defrag_rpc_get(glusterd_defrag_info_t *defrag); + +struct rpc_clnt * +glusterd_defrag_rpc_put(glusterd_defrag_info_t *defrag); + int32_t -glusterd_delete_volume (glusterd_volinfo_t *volinfo); +glusterd_count_connected_peers(int32_t *count); + +int +glusterd_volume_brick_for_each(glusterd_volinfo_t *volinfo, void *data, + int (*fn)(glusterd_volinfo_t *, + glusterd_brickinfo_t *, + dict_t *mod_dict, void *)); + +int +glusterd_get_dummy_client_filepath(char *filepath, glusterd_volinfo_t *volinfo, + gf_transport_type type); + +int +glusterd_handle_replicate_brick_ops(glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo, + glusterd_op_t op); +void +assign_brick_groups(glusterd_volinfo_t *volinfo); + +glusterd_brickinfo_t * +get_last_brick_of_brick_group(glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo); +int +glusterd_get_rb_dst_brickinfo(glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t **brickinfo); +int +rb_update_dstbrick_port(glusterd_brickinfo_t *dst_brickinfo, dict_t *rsp_dict, + dict_t *req_dict); +int +glusterd_op_perform_replace_brick(glusterd_volinfo_t *volinfo, char *old_brick, + char *new_brick, dict_t *dict); +int32_t +glusterd_brick_unlink_socket_file(glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo); +char * +gd_rb_op_to_str(char *op); + +glusterd_op_t +gd_cli_to_gd_op(char *cli_op); + +int +glusterd_get_dst_brick_info(char **dst_brick, char *volname, char **op_errstr, + glusterd_brickinfo_t **dst_brickinfo, char **host, + dict_t *dict, char **dup_dstbrick); + +int +glusterd_brick_op_prerequisites(dict_t *dict, char **op, glusterd_op_t *gd_op, + char **volname, glusterd_volinfo_t **volinfo, + char **src_brick, + glusterd_brickinfo_t **src_brickinfo, + char *pidfile, char **op_errstr, + dict_t *rsp_dict); + +int +glusterd_get_volinfo_from_brick(char *brick, glusterd_volinfo_t **volinfo); + +gf_boolean_t +glusterd_is_profile_on(glusterd_volinfo_t *volinfo); + +char * +search_brick_path_from_proc(pid_t brick_pid, char *brickpath); + int32_t -glusterd_delete_brick (glusterd_volinfo_t* volinfo, - glusterd_brickinfo_t *brickinfo); +glusterd_add_shd_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict, + int32_t count); int32_t -glusterd_delete_all_bricks (glusterd_volinfo_t* volinfo); +glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type, + char **volname, char **bricks, int32_t *brick_count, + int32_t sub_count); + #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index 5844c7c7e8c..8d6fb5e0fac 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -1,178 +1,73 @@ /* - Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is GF_FREE software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ - + Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ #include <fnmatch.h> +#include <sys/wait.h> +#include <dlfcn.h> +#include <utime.h> -#include "xlator.h" +#include <glusterfs/xlator.h> #include "glusterd.h" -#include "defaults.h" -#include "logging.h" -#include "dict.h" -#include "graph-utils.h" -#include "trie.h" +#include <glusterfs/defaults.h> +#include <glusterfs/syscall.h> +#include <glusterfs/logging.h> +#include <glusterfs/dict.h> +#include <glusterfs/graph-utils.h> +#include <glusterfs/common-utils.h> +#include "glusterd-store.h" +#include "glusterd-hooks.h" +#include <glusterfs/trie.h> #include "glusterd-mem-types.h" -#include "cli1.h" +#include "cli1-xdr.h" #include "glusterd-volgen.h" - - -/* dispatch table for VOLUME SET - * ----------------------------- - * - * Format of entries: - * - * First field is the <key>, for the purpose of looking it up - * in volume dictionary. Each <key> is of the format "<domain>.<specifier>". - * - * Second field is <voltype>. - * - * Third field is <option>, if its unset, it's assumed to be - * the same as <specifier>. - * - * Fourth field is <value>. In this context they are used to specify - * a default. That is, even the volume dict doesn't have a value, - * we procced as if the default value were set for it. - * - * There are two type of entries: basic and special. - * - * - Basic entries are the ones where the <option> does _not_ start with - * the bang! character ('!'). - * - * In their case, <option> is understood as an option for an xlator of - * type <voltype>. Their effect is to copy over the volinfo->dict[<key>] - * value to all graph nodes of type <voltype> (if such a value is set). - * - * You are free to add entries of this type, they will become functional - * just by being present in the table. - * - * - Special entries where the <option> starts with the bang!. - * - * They are not applied to all graphs during generation, and you cannot - * extend them in a trivial way which could be just picked up. Better - * not touch them unless you know what you do. - * - * "NODOC" entries are not part of the public interface and are subject - * to change at any time. - */ -typedef enum { DOC, NO_DOC, GLOBAL_DOC, GLOBAL_NO_DOC } option_type_t; - - -struct volopt_map_entry { - char *key; - char *voltype; - char *option; - char *value; - option_type_t type; +#include "glusterd-geo-rep.h" +#include "glusterd-utils.h" +#include "glusterd-messages.h" +#include <glusterfs/run.h> +#include <glusterfs/options.h> +#include "glusterd-snapshot-utils.h" +#include "glusterd-svc-mgmt.h" +#include "glusterd-svc-helper.h" +#include "glusterd-snapd-svc-helper.h" +#include "glusterd-shd-svc-helper.h" +#include "glusterd-gfproxyd-svc-helper.h" + +struct gd_validate_reconf_opts { + dict_t *options; + char **op_errstr; }; -static struct volopt_map_entry glusterd_volopt_map[] = { - - {"cluster.lookup-unhashed", "cluster/distribute", NULL, NULL, NO_DOC }, /* NODOC */ - {"cluster.min-free-disk", "cluster/distribute", NULL, NULL, NO_DOC }, /* NODOC */ - - {"cluster.entry-change-log", "cluster/replicate", NULL, NULL, NO_DOC }, /* NODOC */ - {"cluster.read-subvolume", "cluster/replicate", NULL, NULL, NO_DOC }, /* NODOC */ - {"cluster.background-self-heal-count", "cluster/replicate", NULL, NULL, NO_DOC }, /* NODOC */ - {"cluster.metadata-self-heal", "cluster/replicate", NULL, NULL, NO_DOC }, /* NODOC */ - {"cluster.data-self-heal", "cluster/replicate", NULL, NULL, NO_DOC }, /* NODOC */ - {"cluster.entry-self-heal", "cluster/replicate", NULL, NULL, NO_DOC }, /* NODOC */ - {"cluster.strict-readdir", "cluster/replicate", NULL, NULL, NO_DOC }, /* NODOC */ - {"cluster.self-heal-window-size", "cluster/replicate", "data-self-heal-window-size", NULL, DOC}, - {"cluster.data-change-log", "cluster/replicate", NULL, NULL, NO_DOC }, /* NODOC */ - {"cluster.metadata-change-log", "cluster/replicate", NULL, NULL, NO_DOC }, /* NODOC */ - {"cluster.data-self-heal-algorithm", "cluster/replicate", "data-self-heal-algorithm", NULL,DOC}, - - {"cluster.stripe-block-size", "cluster/stripe", "block-size", NULL, DOC}, - - {"diagnostics.latency-measurement", "debug/io-stats", NULL, NULL, NO_DOC }, - {"diagnostics.dump-fd-stats", "debug/io-stats", NULL, NULL, NO_DOC }, - {"diagnostics.count-fop-hits", "debug/io-stats", NULL, NULL, NO_DOC }, - {"diagnostics.brick-log-level", "debug/io-stats", "!log-level", NULL, DOC}, - {"diagnostics.client-log-level", "debug/io-stats", "!log-level", NULL, DOC}, - - {"performance.cache-max-file-size", "performance/io-cache", "max-file-size", NULL, DOC}, - {"performance.cache-min-file-size", "performance/io-cache", "min-file-size", NULL, DOC}, - {"performance.cache-refresh-timeout", "performance/io-cache", "cache-timeout", NULL, DOC}, - {"performance.cache-priority", "performance/io-cache", "priority", NULL, DOC}, /* NODOC */ - {"performance.cache-size", "performance/io-cache", NULL, NULL, NO_DOC }, - {"performance.cache-size", "performance/quick-read", NULL, NULL, NO_DOC }, - {"performance.flush-behind", "performance/write-behind", "flush-behind", NULL, DOC}, - - {"performance.io-thread-count", "performance/io-threads", "thread-count", DOC}, - - {"performance.disk-usage-limit", "performance/quota", NULL, NULL, NO_DOC }, /* NODOC */ - {"performance.min-free-disk-limit", "performance/quota", NULL, NULL, NO_DOC }, /* NODOC */ - - {"performance.write-behind-window-size", "performance/write-behind", "cache-size", NULL, DOC}, - - {"network.frame-timeout", "protocol/client", NULL, NULL, NO_DOC }, - {"network.ping-timeout", "protocol/client", NULL, NULL, NO_DOC }, - {"network.inode-lru-limit", "protocol/server", NULL, NULL, NO_DOC }, /* NODOC */ - - {"auth.allow", "protocol/server", "!server-auth", "*", DOC}, - {"auth.reject", "protocol/server", "!server-auth", NULL, DOC}, - - {"transport.keepalive", "protocol/server", "transport.socket.keepalive", NULL, NO_DOC}, - {"server.allow-insecure", "protocol/server", "rpc-auth-allow-insecure", NULL, NO_DOC}, - - {"performance.write-behind", "performance/write-behind", "!perf", "on", NO_DOC}, /* NODOC */ - {"performance.read-ahead", "performance/read-ahead", "!perf", "on", NO_DOC}, /* NODOC */ - {"performance.io-cache", "performance/io-cache", "!perf", "on", NO_DOC}, /* NODOC */ - {"performance.quick-read", "performance/quick-read", "!perf", "on", NO_DOC}, /* NODOC */ - {"performance.stat-prefetch", "performance/stat-prefetch", "!perf", "on", NO_DOC}, /* NODOC */ - - {"features.marker-gsync", "features/marker", "gsync", "off", NO_DOC}, - - {"nfs.enable-ino32", "nfs/server", "nfs.enable-ino32", NULL, GLOBAL_DOC}, - {"nfs.mem-factor", "nfs/server", "nfs.mem-factor", NULL, GLOBAL_DOC}, - {"nfs.export-dirs", "nfs/server", "nfs3.export-dirs", NULL, GLOBAL_DOC}, - {"nfs.export-volumes", "nfs/server", "nfs3.export-volumes", NULL, GLOBAL_DOC}, - {"nfs.addr-namelookup", "nfs/server", "rpc-auth.addr.namelookup", NULL, GLOBAL_DOC}, - {"nfs.dynamic-volumes", "nfs/server", "nfs.dynamic-volumes", NULL, GLOBAL_DOC}, - {"nfs.register-with-portmap", "nfs/server", "rpc.register-with-portmap", NULL, GLOBAL_DOC}, - {"nfs.port", "nfs/server", "nfs.port", NULL, GLOBAL_DOC}, - - {"nfs.rpc-auth-unix", "nfs/server", "!nfs.rpc-auth-auth-unix", NULL, DOC}, - {"nfs.rpc-auth-null", "nfs/server", "!nfs.rpc-auth-auth-null", NULL, DOC}, - {"nfs.rpc-auth-allow", "nfs/server", "!nfs.rpc-auth.addr.allow", NULL, DOC}, - {"nfs.rpc-auth-reject", "nfs/server", "!nfs.rpc-auth.addr.reject", NULL, DOC}, - {"nfs.ports-insecure", "nfs/server", "!nfs.auth.ports.insecure", NULL, DOC}, - - {"nfs.trusted-sync", "nfs/server", "!nfs-trusted-sync", NULL, DOC}, - {"nfs.trusted-write", "nfs/server", "!nfs-trusted-write", NULL, DOC}, - {"nfs.volume-access", "nfs/server", "!nfs-volume-access", NULL, DOC}, - {"nfs.export-dir", "nfs/server", "!nfs-export-dir", NULL, DOC}, - {"nfs.disable", "nfs/server", "!nfs-disable", NULL, DOC}, - - {"features.quota", "features/quota", "quota", "off", NO_DOC}, - {"features.quota", "features/marker", "quota", "off", NO_DOC}, - {"features.limit-usage", "features/quota", "limit-set", NULL, NO_DOC}, - - {NULL, } -}; +extern struct volopt_map_entry glusterd_volopt_map[]; + +#define RPC_SET_OPT(XL, CLI_OPT, XLATOR_OPT, ERROR_CMD) \ + do { \ + char *_value = NULL; \ + \ + if (dict_get_str_sizen(set_dict, CLI_OPT, &_value) == 0) { \ + if (xlator_set_fixed_option(XL, "transport.socket." XLATOR_OPT, \ + _value) != 0) { \ + gf_msg("glusterd", GF_LOG_WARNING, errno, \ + GD_MSG_XLATOR_SET_OPT_FAIL, \ + "failed to set " XLATOR_OPT); \ + ERROR_CMD; \ + } \ + } \ + } while (0 /* CONSTCOND */) +static int +volgen_graph_build_clients(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + dict_t *set_dict, void *param); +static int +build_client_graph(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + dict_t *mod_dict); /********************************************* * @@ -180,180 +75,189 @@ static struct volopt_map_entry glusterd_volopt_map[] = { * *********************************************/ - - - -static xlator_t * -xlator_instantiate_va (const char *type, const char *format, va_list arg) +static void +set_graph_errstr(volgen_graph_t *graph, const char *str) { - xlator_t *xl = NULL; - char *volname = NULL; - int ret = 0; - - ret = gf_vasprintf (&volname, format, arg); - if (ret < 0) { - volname = NULL; - - goto error; - } - - xl = GF_CALLOC (1, sizeof (*xl), gf_common_mt_xlator_t); - if (!xl) - goto error; - ret = xlator_set_type_virtual (xl, type); - if (ret) - goto error; - xl->options = get_new_dict(); - if (!xl->options) - goto error; - xl->name = volname; - INIT_LIST_HEAD (&xl->volume_options); - - return xl; - - error: - gf_log ("", GF_LOG_ERROR, "creating xlator of type %s failed", - type); - if (volname) - GF_FREE (volname); - if (xl) - xlator_destroy (xl); + if (!graph->errstr) + return; - return NULL; + *graph->errstr = gf_strdup(str); } -#ifdef __not_used_as_of_now_ static xlator_t * -xlator_instantiate (const char *type, const char *format, ...) +xlator_instantiate_va(const char *type, const char *format, va_list arg) { - va_list arg; - xlator_t *xl; - - va_start (arg, format); - xl = xlator_instantiate_va (type, format, arg); - va_end (arg); - - return xl; + xlator_t *xl = NULL; + char *volname = NULL; + int ret = 0; + xlator_t *this = THIS; + GF_ASSERT(this); + + ret = gf_vasprintf(&volname, format, arg); + if (ret < 0) { + volname = NULL; + + goto error; + } + + xl = GF_CALLOC(1, sizeof(*xl), gf_common_mt_xlator_t); + if (!xl) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); + goto error; + } + ret = xlator_set_type_virtual(xl, type); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_XLATOR_SET_OPT_FAIL, + NULL); + goto error; + } + xl->options = dict_new(); + if (!xl->options) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); + goto error; + } + xl->name = volname; + CDS_INIT_LIST_HEAD(&xl->volume_options); + + xl->ctx = THIS->ctx; + + return xl; + +error: + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_XLATOR_CREATE_FAIL, "Type=%s", + type, NULL); + GF_FREE(volname); + if (xl) + xlator_destroy(xl); + + return NULL; } -#endif static int -volgen_xlator_link (xlator_t *pxl, xlator_t *cxl) +volgen_xlator_link(xlator_t *pxl, xlator_t *cxl) { - int ret = 0; + int ret = 0; - ret = glusterfs_xlator_link (pxl, cxl); - if (ret == -1) { - gf_log ("", GF_LOG_ERROR, - "Out of memory, cannot link xlators %s <- %s", - pxl->name, cxl->name); - } + ret = glusterfs_xlator_link(pxl, cxl); + if (ret == -1) { + gf_msg("glusterd", GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "Out of memory, cannot link xlators %s <- %s", pxl->name, + cxl->name); + } - return ret; + return ret; } static int -volgen_graph_link (glusterfs_graph_t *graph, xlator_t *xl) +volgen_graph_link(volgen_graph_t *graph, xlator_t *xl) { - int ret = 0; + int ret = 0; - /* no need to care about graph->top here */ - if (graph->first) - ret = volgen_xlator_link (xl, graph->first); - if (ret == -1) { - gf_log ("", GF_LOG_ERROR, "failed to add graph entry %s", - xl->name); + /* no need to care about graph->top here */ + if (graph->graph.first) + ret = volgen_xlator_link(xl, graph->graph.first); + if (ret == -1) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_GRAPH_ENTRY_ADD_FAIL, + "failed to add graph entry %s", xl->name); - return -1; - } + return -1; + } - return 0; + return 0; } static xlator_t * -volgen_graph_add_as (glusterfs_graph_t *graph, const char *type, - const char *format, ...) +volgen_graph_add_as(volgen_graph_t *graph, const char *type, const char *format, + ...) { - va_list arg; - xlator_t *xl = NULL; + va_list arg; + xlator_t *xl = NULL; - va_start (arg, format); - xl = xlator_instantiate_va (type, format, arg); - va_end (arg); + va_start(arg, format); + xl = xlator_instantiate_va(type, format, arg); + va_end(arg); - if (!xl) - return NULL; + if (!xl) + return NULL; - if (volgen_graph_link (graph, xl)) { - xlator_destroy (xl); + if (volgen_graph_link(graph, xl)) { + xlator_destroy(xl); - return NULL; - } else - glusterfs_graph_set_first (graph, xl); + return NULL; + } else + glusterfs_graph_set_first(&graph->graph, xl); - return xl; + return xl; } static xlator_t * -volgen_graph_add_nolink (glusterfs_graph_t *graph, const char *type, - const char *format, ...) +volgen_graph_add_nolink(volgen_graph_t *graph, const char *type, + const char *format, ...) { - va_list arg; - xlator_t *xl = NULL; + va_list arg; + xlator_t *xl = NULL; - va_start (arg, format); - xl = xlator_instantiate_va (type, format, arg); - va_end (arg); + va_start(arg, format); + xl = xlator_instantiate_va(type, format, arg); + va_end(arg); - if (!xl) - return NULL; + if (!xl) + return NULL; - glusterfs_graph_set_first (graph, xl); + glusterfs_graph_set_first(&graph->graph, xl); - return xl; + return xl; } static xlator_t * -volgen_graph_add (glusterfs_graph_t *graph, char *type, char *volname) +volgen_graph_add(volgen_graph_t *graph, char *type, char *volname) { - char *shorttype = NULL; + char *shorttype = NULL; - shorttype = strrchr (type, '/'); - GF_ASSERT (shorttype); - shorttype++; - GF_ASSERT (*shorttype); + shorttype = strrchr(type, '/'); + GF_ASSERT(shorttype); + shorttype++; + GF_ASSERT(*shorttype); - return volgen_graph_add_as (graph, type, "%s-%s", volname, shorttype); + return volgen_graph_add_as(graph, type, "%s-%s", volname, shorttype); } +#define xlator_set_fixed_option(xl, key, value) \ + xlator_set_option(xl, key, SLEN(key), value) + /* XXX Seems there is no such generic routine? * Maybe should put to xlator.c ?? */ static int -xlator_set_option (xlator_t *xl, char *key, char *value) +xlator_set_option(xlator_t *xl, char *key, const int keylen, char *value) { - char *dval = NULL; + char *dval = gf_strdup(value); - dval = gf_strdup (value); - if (!dval) { - gf_log ("", GF_LOG_ERROR, - "failed to set xlator opt: %s[%s] = %s", - xl->name, key, value); + if (!dval) { + gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, + "failed to set xlator opt: %s[%s] = %s", xl->name, key, value); - return -1; - } + return -1; + } - return dict_set_dynstr (xl->options, key, dval); + return dict_set_dynstrn(xl->options, key, keylen, dval); } -static inline xlator_t * -first_of (glusterfs_graph_t *graph) +#define xlator_get_fixed_option(xl, key, value) \ + xlator_get_option(xl, key, SLEN(key), value) + +static int +xlator_get_option(xlator_t *xl, char *key, const int keylen, char **value) { - return (xlator_t *)graph->first; + GF_ASSERT(xl); + return dict_get_strn(xl->options, key, keylen, value); } - - +static xlator_t * +first_of(volgen_graph_t *graph) +{ + return (xlator_t *)graph->graph.first; +} /************************** * @@ -361,1839 +265,6490 @@ first_of (glusterfs_graph_t *graph) * *************************/ - static int -volopt_selector (int lvl, char **patt, void *param, - int (*optcbk)(char *word, void *param)) +volopt_selector(int lvl, char **patt, void *param, + int (*optcbk)(char *word, void *param)) { - struct volopt_map_entry *vme = NULL; - char *w = NULL; - int i = 0; - int len = 0; - int ret = 0; - char *dot = NULL; - - for (vme = glusterd_volopt_map; vme->key; vme++) { - w = vme->key; - - for (i = 0; i < lvl; i++) { - if (patt[i]) { - w = strtail (w, patt[i]); - GF_ASSERT (!w || *w); - if (!w || *w != '.') - goto next; - } else { - w = strchr (w, '.'); - GF_ASSERT (w); - } - w++; - } + struct volopt_map_entry *vme = NULL; + char *w = NULL; + int i = 0; + int len = 0; + int ret = 0; + char *dot = NULL; + + for (vme = glusterd_volopt_map; vme->key; vme++) { + w = vme->key; + + for (i = 0; i < lvl; i++) { + if (patt[i]) { + w = strtail(w, patt[i]); + GF_ASSERT(!w || *w); + if (!w || *w != '.') + goto next; + } else { + w = strchr(w, '.'); + GF_ASSERT(w); + } + w++; + } - dot = strchr (w, '.'); - if (dot) { - len = dot - w; - w = gf_strdup (w); - if (!w) - return -1; - w[len] = '\0'; - } - ret = optcbk (w, param); - if (dot) - GF_FREE (w); - if (ret) - return -1; - next: - continue; + dot = strchr(w, '.'); + if (dot) { + len = dot - w; + w = gf_strdup(w); + if (!w) + return -1; + w[len] = '\0'; } + ret = optcbk(w, param); + if (dot) + GF_FREE(w); + if (ret) + return -1; + next: + continue; + } - return 0; + return 0; } static int -volopt_trie_cbk (char *word, void *param) +volopt_trie_cbk(char *word, void *param) { - return trie_add ((trie_t *)param, word); + return trie_add((trie_t *)param, word); } static int -process_nodevec (struct trienodevec *nodevec, char **hint) +process_nodevec(struct trienodevec *nodevec, char **outputhint, char *inputhint) { - int ret = 0; - char *hint1 = NULL; - char *hint2 = NULL; - char *hintinfx = ""; - trienode_t **nodes = nodevec->nodes; - - if (!nodes[0]) { - *hint = NULL; - return 0; - } + int ret = 0; + char *hint1 = NULL; + char *hint2 = NULL; + char *hintinfx = ""; + trienode_t **nodes = nodevec->nodes; + + if (!nodes[0]) { + *outputhint = NULL; + return 0; + } #if 0 /* Limit as in git */ if (trienode_get_dist (nodes[0]) >= 6) { - *hint = NULL; + *outputhint = NULL; return 0; } #endif - if (trienode_get_word (nodes[0], &hint1)) - return -1; + if (trienode_get_word(nodes[0], &hint1)) + return -1; - if (nodevec->cnt < 2 || !nodes[1]) { - *hint = hint1; - return 0; - } + if (nodevec->cnt < 2 || !nodes[1]) { + *outputhint = hint1; + return 0; + } - if (trienode_get_word (nodes[1], &hint2)) - return -1; + if (trienode_get_word(nodes[1], &hint2)) { + GF_FREE(hint1); + return -1; + } - if (*hint) - hintinfx = *hint; - ret = gf_asprintf (hint, "%s or %s%s", hint1, hintinfx, hint2); - if (ret > 0) - ret = 0; - return ret; + if (inputhint) + hintinfx = inputhint; + ret = gf_asprintf(outputhint, "%s or %s%s", hint1, hintinfx, hint2); + if (ret > 0) + ret = 0; + if (hint1) + GF_FREE(hint1); + if (hint2) + GF_FREE(hint2); + return ret; } static int -volopt_trie_section (int lvl, char **patt, char *word, char **hint, int hints) +volopt_trie_section(int lvl, char **patt, char *word, char **outputhint, + char *inputhint, int hints) { - trienode_t *nodes[] = { NULL, NULL }; - struct trienodevec nodevec = { nodes, 2}; - trie_t *trie = NULL; - int ret = 0; + trienode_t *nodes[] = {NULL, NULL}; + struct trienodevec nodevec = {nodes, 2}; + trie_t *trie = NULL; + int ret = 0; - trie = trie_new (); - if (!trie) - return -1; + trie = trie_new(); + if (!trie) + return -1; - if (volopt_selector (lvl, patt, trie, &volopt_trie_cbk)) { - trie_destroy (trie); + if (volopt_selector(lvl, patt, trie, &volopt_trie_cbk)) { + trie_destroy(trie); - return -1; - } + return -1; + } - GF_ASSERT (hints <= 2); - nodevec.cnt = hints; - ret = trie_measure_vec (trie, word, &nodevec); - if (ret || !nodevec.nodes[0]) - trie_destroy (trie); + GF_ASSERT(hints <= 2); + nodevec.cnt = hints; + ret = trie_measure_vec(trie, word, &nodevec); + if (!ret && nodevec.nodes[0]) + ret = process_nodevec(&nodevec, outputhint, inputhint); - ret = process_nodevec (&nodevec, hint); - trie_destroy (trie); + trie_destroy(trie); - return ret; + return ret; } static int -volopt_trie (char *key, char **hint) +volopt_trie(char *key, char **hint) { - char *patt[] = { NULL }; - char *fullhint = NULL; - char *dot = NULL; - char *dom = NULL; - int len = 0; - int ret = 0; - - *hint = NULL; - - dot = strchr (key, '.'); - if (!dot) - return volopt_trie_section (1, patt, key, hint, 2); - - len = dot - key; - dom = gf_strdup (key); - if (!dom) - return -1; - dom[len] = '\0'; - - ret = volopt_trie_section (0, NULL, dom, patt, 1); - GF_FREE (dom); - if (ret) { - patt[0] = NULL; - goto out; - } - if (!patt[0]) - goto out; - - *hint = "..."; - ret = volopt_trie_section (1, patt, dot + 1, hint, 2); - if (ret) - goto out; - if (*hint) { - ret = gf_asprintf (&fullhint, "%s.%s", patt[0], *hint); - GF_FREE (*hint); - if (ret >= 0) { - ret = 0; - *hint = fullhint; - } + char *patt[] = {NULL}; + char *fullhint = NULL; + char *inputhint = NULL; + char *dot = NULL; + char *dom = NULL; + int len = 0; + int ret = 0; + + *hint = NULL; + + dot = strchr(key, '.'); + if (!dot) + return volopt_trie_section(1, patt, key, hint, inputhint, 2); + + len = dot - key; + dom = gf_strdup(key); + if (!dom) + return -1; + dom[len] = '\0'; + + ret = volopt_trie_section(0, NULL, dom, patt, inputhint, 1); + GF_FREE(dom); + if (ret) { + patt[0] = NULL; + goto out; + } + if (!patt[0]) + goto out; + + inputhint = "..."; + ret = volopt_trie_section(1, patt, dot + 1, hint, inputhint, 2); + if (ret) + goto out; + if (*hint) { + ret = gf_asprintf(&fullhint, "%s.%s", patt[0], *hint); + GF_FREE(*hint); + if (ret >= 0) { + ret = 0; + *hint = fullhint; } + } - out: - if (patt[0]) - GF_FREE (patt[0]); - if (ret) - *hint = NULL; +out: + GF_FREE(patt[0]); + if (ret) + *hint = NULL; - return ret; + return ret; } - - - /************************** * * Volume generation engine * **************************/ - -typedef int (*volgen_opthandler_t) (glusterfs_graph_t *graph, - struct volopt_map_entry *vme, - void *param); +typedef int (*volgen_opthandler_t)(volgen_graph_t *graph, + struct volopt_map_entry *vme, void *param); struct opthandler_data { - glusterfs_graph_t *graph; - volgen_opthandler_t handler; - struct volopt_map_entry *vme; - gf_boolean_t found; - gf_boolean_t data_t_fake; - int rv; - char *volname; - void *param; + volgen_graph_t *graph; + volgen_opthandler_t handler; + struct volopt_map_entry *vme; + gf_boolean_t found; + gf_boolean_t data_t_fake; + int rv; + char *volname; + void *param; }; -#define pattern_match_options 0 - - static void -process_option (dict_t *dict, char *key, data_t *value, void *param) +process_option(char *key, data_t *value, void *param) { - struct opthandler_data *odt = param; - struct volopt_map_entry vme = {0,}; - - if (odt->rv) - return; -#if pattern_match_options - if (fnmatch (odt->vme->key, key, 0) != 0) - return; -#endif - odt->found = _gf_true; - - vme.key = key; - vme.voltype = odt->vme->voltype; - vme.option = odt->vme->option; - if (!vme.option) { - vme.option = strrchr (key, '.'); - if (vme.option) - vme.option++; - else - vme.option = key; - } - if (odt->data_t_fake) - vme.value = (char *)value; + struct opthandler_data *odt = param; + struct volopt_map_entry vme = { + 0, + }; + + if (odt->rv) + return; + odt->found = _gf_true; + + vme.key = key; + vme.voltype = odt->vme->voltype; + vme.option = odt->vme->option; + vme.op_version = odt->vme->op_version; + + if (!vme.option) { + vme.option = strrchr(key, '.'); + if (vme.option) + vme.option++; else - vme.value = value->data; - - odt->rv = odt->handler (odt->graph, &vme, odt->param); + vme.option = key; + } + if (odt->data_t_fake) + vme.value = (char *)value; + else + vme.value = value->data; + + odt->rv = odt->handler(odt->graph, &vme, odt->param); + return; } static int -volgen_graph_set_options_generic (glusterfs_graph_t *graph, dict_t *dict, - void *param, volgen_opthandler_t handler) +volgen_graph_set_options_generic(volgen_graph_t *graph, dict_t *dict, + void *param, volgen_opthandler_t handler) { - struct volopt_map_entry *vme = NULL; - struct opthandler_data odt = {0,}; - data_t *data = NULL; - - odt.graph = graph; - odt.handler = handler; - odt.param = param; - (void)data; - - for (vme = glusterd_volopt_map; vme->key; vme++) { - odt.vme = vme; - odt.found = _gf_false; - odt.data_t_fake = _gf_false; - -#if pattern_match_options - dict_foreach (dict, process_option, &odt); -#else - data = dict_get (dict, vme->key); + struct volopt_map_entry *vme = NULL; + struct opthandler_data odt = { + 0, + }; + data_t *data = NULL; + int keylen; + + odt.graph = graph; + odt.handler = handler; + odt.param = param; + (void)data; + + for (vme = glusterd_volopt_map; vme->key; vme++) { + keylen = strlen(vme->key); + if (keylen == SLEN("performance.client-io-threads") && + !strcmp(vme->key, "performance.client-io-threads") && + dict_get_str_boolean(dict, "skip-CLIOT", _gf_false) == _gf_true) { + continue; + } - if (data) - process_option (dict, vme->key, data, &odt); -#endif - if (odt.rv) - return odt.rv; - - if (odt.found) - continue; - - /* check for default value */ - - if (vme->value) { - /* stupid hack to be able to reuse dict iterator - * in this context - */ - odt.data_t_fake = _gf_true; - process_option (NULL, vme->key, (data_t *)vme->value, - &odt); - if (odt.rv) - return odt.rv; - } + odt.vme = vme; + odt.found = _gf_false; + odt.data_t_fake = _gf_false; + data = dict_getn(dict, vme->key, keylen); + if (data) + process_option(vme->key, data, &odt); + if (odt.rv) + return odt.rv; + + if (odt.found) + continue; + + /* check for default value */ + + if (vme->value) { + /* stupid hack to be able to reuse dict iterator + * in this context + */ + odt.data_t_fake = _gf_true; + process_option(vme->key, (data_t *)vme->value, &odt); + if (odt.rv) + return odt.rv; } + } - return 0; + return 0; } static int -basic_option_handler (glusterfs_graph_t *graph, struct volopt_map_entry *vme, - void *param) +no_filter_option_handler(volgen_graph_t *graph, struct volopt_map_entry *vme, + void *param) { - xlator_t *trav; - int ret = 0; - - if (vme->option[0] == '!') - return 0; + xlator_t *trav; + int ret = 0; + + for (trav = first_of(graph); trav; trav = trav->next) { + if (strcmp(trav->type, vme->voltype) != 0) + continue; + if (strcmp(vme->option, "ta-remote-port") == 0) { + if (strstr(trav->name, "-ta-") != NULL) { + ret = xlator_set_option(trav, "remote-port", + strlen(vme->option), vme->value); + } + continue; + } + ret = xlator_set_option(trav, vme->option, strlen(vme->option), + vme->value); + if (ret) + break; + } + return ret; +} - for (trav = first_of (graph); trav; trav = trav->next) { - if (strcmp (trav->type, vme->voltype) != 0) - continue; +static int +basic_option_handler(volgen_graph_t *graph, struct volopt_map_entry *vme, + void *param) +{ + int ret = 0; - ret = xlator_set_option (trav, vme->option, vme->value); - if (ret) - return -1; - } + if (vme->option[0] == '!') + goto out; - return 0; + ret = no_filter_option_handler(graph, vme, param); +out: + return ret; } static int -volgen_graph_set_options (glusterfs_graph_t *graph, dict_t *dict) +volgen_graph_set_options(volgen_graph_t *graph, dict_t *dict) { - return volgen_graph_set_options_generic (graph, dict, NULL, - &basic_option_handler); + return volgen_graph_set_options_generic(graph, dict, NULL, + &basic_option_handler); } static int -optget_option_handler (glusterfs_graph_t *graph, struct volopt_map_entry *vme, - void *param) +optget_option_handler(volgen_graph_t *graph, struct volopt_map_entry *vme, + void *param) { - struct volopt_map_entry *vme2 = param; + struct volopt_map_entry *vme2 = param; - if (strcmp (vme->key, vme2->key) == 0) - vme2->value = vme->value; + if (strcmp(vme->key, vme2->key) == 0) + vme2->value = vme->value; - return 0; + return 0; } /* This getter considers defaults also. */ static int -volgen_dict_get (dict_t *dict, char *key, char **value) +volgen_dict_get(dict_t *dict, char *key, char **value) { - struct volopt_map_entry vme = {0,}; - int ret = 0; + struct volopt_map_entry vme = { + 0, + }; + int ret = 0; - vme.key = key; + vme.key = key; - ret = volgen_graph_set_options_generic (NULL, dict, &vme, - &optget_option_handler); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Out of memory"); + ret = volgen_graph_set_options_generic(NULL, dict, &vme, + &optget_option_handler); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "Out of memory"); - return -1; - } + return -1; + } - *value = vme.value; + *value = vme.value; - return 0; + return 0; } static int -option_complete (char *key, char **completion) +option_complete(char *key, char **completion) { - struct volopt_map_entry *vme = NULL; + struct volopt_map_entry *vme = NULL; - *completion = NULL; - for (vme = glusterd_volopt_map; vme->key; vme++) { - if (strcmp (strchr (vme->key, '.') + 1, key) != 0) - continue; + *completion = NULL; + for (vme = glusterd_volopt_map; vme->key; vme++) { + if (strcmp(strchr(vme->key, '.') + 1, key) != 0) + continue; - if (*completion && strcmp (*completion, vme->key) != 0) { - /* cancel on non-unique match */ - *completion = NULL; + if (*completion && strcmp(*completion, vme->key) != 0) { + /* cancel on non-unique match */ + *completion = NULL; - return 0; - } else - *completion = vme->key; - } + return 0; + } else + *completion = vme->key; + } - if (*completion) { - /* For sake of unified API we want - * have the completion to be a to-be-freed - * string. - */ - *completion = gf_strdup (*completion); - return -!*completion; - } + if (*completion) { + /* For sake of unified API we want + * have the completion to be a to-be-freed + * string. + */ + *completion = gf_strdup(*completion); + return -!*completion; + } - return 0; + return 0; } int -glusterd_volinfo_get (glusterd_volinfo_t *volinfo, char *key, char **value) +glusterd_volinfo_get(glusterd_volinfo_t *volinfo, char *key, char **value) { - return volgen_dict_get (volinfo->dict, key, value); + return volgen_dict_get(volinfo->dict, key, value); } -gf_boolean_t -glusterd_check_globaloption (char *key) +int +glusterd_volinfo_get_boolean(glusterd_volinfo_t *volinfo, char *key) { - char *completion = NULL; - struct volopt_map_entry *vmep = NULL; - int ret = 0; + char *val = NULL; + gf_boolean_t enabled = _gf_false; + int ret = 0; - if (!strchr (key, '.')) { - ret = option_complete (key, &completion); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Out of memory"); - return _gf_false; - } + ret = glusterd_volinfo_get(volinfo, key, &val); + if (ret) + return -1; - if (!completion) { - gf_log ("", GF_LOG_ERROR, "option %s does not exist", - key); - return _gf_false; - } - } + if (val) + ret = gf_string2boolean(val, &enabled); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, + "value for %s option is not valid", key); - for (vmep = glusterd_volopt_map; vmep->key; vmep++) { - if (strcmp (vmep->key, key) == 0) { - if ((vmep->type == GLOBAL_DOC) || - (vmep->type == GLOBAL_NO_DOC)) - return _gf_true; - else - return _gf_false; - } - } + return -1; + } - return _gf_false; + return enabled; } gf_boolean_t -glusterd_check_localoption (char *key) +glusterd_check_voloption_flags(char *key, int32_t flags) { - char *completion = NULL; - struct volopt_map_entry *vmep = NULL; - int ret = 0; + char *completion = NULL; + struct volopt_map_entry *vmep = NULL; + int ret = 0; + + COMPLETE_OPTION(key, completion, ret); + for (vmep = glusterd_volopt_map; vmep->key; vmep++) { + if (strcmp(vmep->key, key) == 0) { + if (vmep->flags & flags) + return _gf_true; + else + return _gf_false; + } + } - if (!strchr (key, '.')) { - ret = option_complete (key, &completion); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Out of memory"); - return _gf_false; - } + return _gf_false; +} - if (!completion) { - gf_log ("", GF_LOG_ERROR, "option %s does not exist", - key); - return _gf_false; - } +gf_boolean_t +glusterd_check_globaloption(char *key) +{ + char *completion = NULL; + struct volopt_map_entry *vmep = NULL; + int ret = 0; + + COMPLETE_OPTION(key, completion, ret); + for (vmep = glusterd_volopt_map; vmep->key; vmep++) { + if (strcmp(vmep->key, key) == 0) { + if ((vmep->type == GLOBAL_DOC) || (vmep->type == GLOBAL_NO_DOC)) + return _gf_true; + else + return _gf_false; } + } - for (vmep = glusterd_volopt_map; vmep->key; vmep++) { - if (strcmp (vmep->key, key) == 0) { - if ((vmep->type == DOC) || - (vmep->type == NO_DOC)) - return _gf_true; - else - return _gf_false; - } + return _gf_false; +} + +gf_boolean_t +glusterd_check_localoption(char *key) +{ + char *completion = NULL; + struct volopt_map_entry *vmep = NULL; + int ret = 0; + + COMPLETE_OPTION(key, completion, ret); + for (vmep = glusterd_volopt_map; vmep->key; vmep++) { + if (strcmp(vmep->key, key) == 0) { + if ((vmep->type == DOC) || (vmep->type == NO_DOC)) + return _gf_true; + else + return _gf_false; } + } - return _gf_false; + return _gf_false; } int -glusterd_check_voloption (char *key) +glusterd_check_option_exists(char *key, char **completion) { - char *completion = NULL; - struct volopt_map_entry *vmep = NULL; - int ret = 0; + struct volopt_map_entry vme = { + 0, + }; + struct volopt_map_entry *vmep = NULL; + int ret = 0; + xlator_t *this = THIS; + + (void)vme; + (void)vmep; + + if (!strchr(key, '.')) { + if (completion) { + ret = option_complete(key, completion); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "Out of memory"); + return -1; + } - if (!strchr (key, '.')) { - ret = option_complete (key, &completion); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Out of memory"); - return _gf_false; - } + ret = !!*completion; + if (ret) + return ret; + else + goto trie; + } else + return 0; + } - if (!completion) { - gf_log ("", GF_LOG_ERROR, "option %s does not exist", - key); - return _gf_false; - } + for (vmep = glusterd_volopt_map; vmep->key; vmep++) { + if (strcmp(vmep->key, key) == 0) { + ret = 1; + break; } + } - for (vmep = glusterd_volopt_map; vmep->key; vmep++) { - if (strcmp (vmep->key, key) == 0) { - if ((vmep->type == DOC) || - (vmep->type == DOC)) - return _gf_true; - else - return _gf_false; - } - } + if (ret || !completion) + return ret; - return _gf_false; +trie: + ret = volopt_trie(key, completion); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_ERROR_ENCOUNTERED, + "Some error occurred during keyword hinting"); + } + return ret; } int -glusterd_check_option_exists (char *key, char **completion) -{ - dict_t *dict = NULL; - struct volopt_map_entry vme = {0,}; - struct volopt_map_entry *vmep = NULL; - int ret = 0; - - (void)vme; - (void)vmep; - (void)dict; - - if (!strchr (key, '.')) { - if (completion) { - ret = option_complete (key, completion); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Out of memory"); - return -1; - } - - ret = !!*completion; - if (ret) - return ret; - else - goto trie; - } else - return 0; - } - -#if !pattern_match_options - for (vmep = glusterd_volopt_map; vmep->key; vmep++) { - if (strcmp (vmep->key, key) == 0) { - ret = 1; - break; - } +glusterd_volopt_validate(glusterd_volinfo_t *volinfo, dict_t *dict, char *key, + char *value, char **op_errstr) +{ + struct volopt_map_entry *vme = NULL; + int ret = 0; + xlator_t *this = THIS; + + if (!dict || !key || !value) { + gf_msg_callingfn(this->name, GF_LOG_WARNING, EINVAL, + GD_MSG_INVALID_ENTRY, + "Invalid " + "Arguments (dict=%p, key=%s, value=%s)", + dict, key, value); + return -1; + } + + for (vme = &glusterd_volopt_map[0]; vme->key; vme++) { + if ((vme->validate_fn) && ((!strcmp(key, vme->key)) || + (!strcmp(key, strchr(vme->key, '.') + 1)))) { + if ((vme->type != GLOBAL_DOC && vme->type != GLOBAL_NO_DOC) && + !volinfo) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY, + "%s is not" + " a global option", + vme->key); + ret = -1; + goto out; + } + ret = vme->validate_fn(volinfo, dict, key, value, op_errstr); + if (ret) + goto out; + break; } -#else - vme.key = key; + } +out: + return ret; +} - /* We are getting a bit anal here to avoid typing - * fnmatch one more time. Orthogonality foremost! - * The internal logic of looking up in the volopt_map table - * should be coded exactly once. - * - * [[Ha-ha-ha, so now if I ever change the internals then I'll - * have to update the fnmatch in this comment also :P ]] - */ - dict = get_new_dict (); - if (!dict || dict_set_str (dict, key, "")) { - gf_log ("", GF_LOG_ERROR, "Out of memory"); +char * +glusterd_get_trans_type_rb(gf_transport_type ttype) +{ + char *trans_type = NULL; - return -1; - } + switch (ttype) { + case GF_TRANSPORT_RDMA: + gf_asprintf(&trans_type, "rdma"); + break; + case GF_TRANSPORT_TCP: + case GF_TRANSPORT_BOTH_TCP_RDMA: + gf_asprintf(&trans_type, "tcp"); + break; + default: + gf_msg(THIS->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, + "Unknown " + "transport type"); + } + + return trans_type; +} - ret = volgen_graph_set_options_generic (NULL, dict, &vme, - &optget_option_handler); - dict_destroy (dict); +static int +_xl_link_children(xlator_t *parent, xlator_t *children, size_t child_count) +{ + xlator_t *trav = NULL; + size_t seek = 0; + int ret = -1; + xlator_t *this = THIS; + GF_ASSERT(this); + + if (child_count == 0) + goto out; + seek = child_count; + for (trav = children; --seek; trav = trav->next) + ; + for (; child_count--; trav = trav->prev) { + ret = volgen_xlator_link(parent, trav); + gf_msg_debug(this->name, 0, "%s:%s", parent->name, trav->name); if (ret) { - gf_log ("", GF_LOG_ERROR, "Out of memory"); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_XLATOR_LINK_FAIL, + NULL); + goto out; + } + } + ret = 0; +out: + return ret; +} - return -1; +static int +volgen_graph_merge_sub(volgen_graph_t *dgraph, volgen_graph_t *sgraph, + size_t child_count) +{ + xlator_t *trav = NULL; + int ret = 0; + + GF_ASSERT(dgraph->graph.first); + + ret = _xl_link_children(first_of(dgraph), first_of(sgraph), child_count); + if (ret) + goto out; + + for (trav = first_of(dgraph); trav->next; trav = trav->next) + ; + + trav->next = first_of(sgraph); + trav->next->prev = trav; + dgraph->graph.xl_count += sgraph->graph.xl_count; + +out: + return ret; +} + +static void +volgen_apply_filters(char *orig_volfile) +{ + DIR *filterdir = NULL; + struct dirent *entry = NULL; + struct dirent scratch[2] = { + { + 0, + }, + }; + struct stat statbuf = { + 0, + }; + char filterpath[PATH_MAX] = { + 0, + }; + + filterdir = sys_opendir(FILTERDIR); + + if (!filterdir) + return; + + for (;;) { + errno = 0; + + entry = sys_readdir(filterdir, scratch); + + if (!entry || errno != 0) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_READ_ERROR, NULL); + break; + } + + if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0) + continue; + /* + * d_type isn't guaranteed to be present/valid on all systems, + * so do an explicit stat instead. + */ + (void)snprintf(filterpath, sizeof(filterpath), "%s/%s", FILTERDIR, + entry->d_name); + + /* Deliberately use stat instead of lstat to allow symlinks. */ + if (sys_stat(filterpath, &statbuf) == -1) + continue; + + if (!S_ISREG(statbuf.st_mode)) + continue; + /* + * We could check the mode in statbuf directly, or just skip + * this entirely and check for EPERM after exec fails, but this + * is cleaner. + */ + if (sys_access(filterpath, X_OK) != 0) + continue; + + if (runcmd(filterpath, orig_volfile, NULL)) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_FILTER_RUN_FAILED, + "failed to run filter %s", entry->d_name); } + } + + (void)sys_closedir(filterdir); +} + +static int +volgen_write_volfile(volgen_graph_t *graph, char *filename) +{ + char *ftmp = NULL; + FILE *f = NULL; + int fd = 0; + xlator_t *this = NULL; + + this = THIS; + + if (gf_asprintf(&ftmp, "%s.tmp", filename) == -1) { + ftmp = NULL; + goto error; + } + + fd = sys_creat(ftmp, S_IRUSR | S_IWUSR); + if (fd < 0) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "file creation failed"); + goto error; + } + + sys_close(fd); + + f = fopen(ftmp, "w"); + if (!f) + goto error; + + if (glusterfs_graph_print_file(f, &graph->graph) == -1) + goto error; + + if (fclose(f) != 0) { + gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "fclose on the file %s " + "failed", + ftmp); + /* + * Even though fclose has failed here, we have to set f to NULL. + * Otherwise when the code path goes to error, there again we + * try to close it which might cause undefined behavior such as + * process crash. + */ + f = NULL; + goto error; + } + + f = NULL; + + if (sys_rename(ftmp, filename) == -1) + goto error; + + GF_FREE(ftmp); + + volgen_apply_filters(filename); + + return 0; + +error: + + GF_FREE(ftmp); + if (f) + fclose(f); + + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL, + "failed to create volfile %s", filename); + + return -1; +} + +static void +volgen_graph_free(volgen_graph_t *graph) +{ + xlator_t *trav = NULL; + xlator_t *trav_old = NULL; + + for (trav = first_of(graph);; trav = trav->next) { + if (trav_old) + xlator_destroy(trav_old); + + trav_old = trav; + + if (!trav) + break; + } +} + +static int +build_graph_generic(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + dict_t *mod_dict, void *param, + int (*builder)(volgen_graph_t *graph, + glusterd_volinfo_t *volinfo, + dict_t *set_dict, void *param)) +{ + dict_t *set_dict = NULL; + int ret = 0; + + if (mod_dict) { + set_dict = dict_copy_with_ref(volinfo->dict, NULL); + if (!set_dict) + return -1; + dict_copy(mod_dict, set_dict); + /* XXX dict_copy swallows errors */ + } else { + set_dict = volinfo->dict; + } + + ret = builder(graph, volinfo, set_dict, param); + if (!ret) + ret = volgen_graph_set_options(graph, set_dict); + + if (mod_dict) + dict_unref(set_dict); + + return ret; +} + +static gf_transport_type +transport_str_to_type(char *tt) +{ + gf_transport_type type = GF_TRANSPORT_TCP; + + if (!strcmp("tcp", tt)) + type = GF_TRANSPORT_TCP; + else if (!strcmp("rdma", tt)) + type = GF_TRANSPORT_RDMA; + else if (!strcmp("tcp,rdma", tt)) + type = GF_TRANSPORT_BOTH_TCP_RDMA; + return type; +} + +static void +transport_type_to_str(gf_transport_type type, char *tt) +{ + switch (type) { + case GF_TRANSPORT_RDMA: + strcpy(tt, "rdma"); + break; + case GF_TRANSPORT_TCP: + strcpy(tt, "tcp"); + break; + case GF_TRANSPORT_BOTH_TCP_RDMA: + strcpy(tt, "tcp,rdma"); + break; + } +} + +static void +get_vol_transport_type(glusterd_volinfo_t *volinfo, char *tt) +{ + transport_type_to_str(volinfo->transport_type, tt); +} - ret = !!vme.value; +#ifdef BUILD_GNFS +/* If no value has specified for tcp,rdma volume from cli + * use tcp as default value.Otherwise, use transport type + * mentioned in volinfo + */ +static void +get_vol_nfs_transport_type(glusterd_volinfo_t *volinfo, char *tt) +{ + if (volinfo->transport_type == GF_TRANSPORT_BOTH_TCP_RDMA) { + strcpy(tt, "tcp"); + gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_DEFAULT_OPT_INFO, + "The default transport type for tcp,rdma volume " + "is tcp if option is not defined by the user "); + } else + transport_type_to_str(volinfo->transport_type, tt); +} #endif - if (ret || !completion) - return ret; +/* gets the volinfo, dict, a character array for filling in + * the transport type and a boolean option which says whether + * the transport type is required for nfs or not. If its not + * for nfs, then it is considered as the client transport + * and client transport type is filled in the character array + */ +static void +get_transport_type(glusterd_volinfo_t *volinfo, dict_t *set_dict, char *transt, + gf_boolean_t is_nfs) +{ + int ret = -1; + char *tt = NULL; - trie: - ret = volopt_trie (key, completion); - if (ret) { - gf_log ("", GF_LOG_ERROR, - "Some error occured during keyword hinting"); - } + if (is_nfs == _gf_false) { + ret = dict_get_str_sizen(set_dict, "client-transport-type", &tt); + if (ret) + get_vol_transport_type(volinfo, transt); + } else { +#ifdef BUILD_GNFS + ret = dict_get_str_sizen(set_dict, "nfs.transport-type", &tt); + if (ret) + get_vol_nfs_transport_type(volinfo, transt); +#endif + } - return ret; + if (!ret) + strcpy(transt, tt); } static int -volgen_graph_merge_sub (glusterfs_graph_t *dgraph, glusterfs_graph_t *sgraph) +server_auth_option_handler(volgen_graph_t *graph, struct volopt_map_entry *vme, + void *param) { - xlator_t *trav = NULL; + xlator_t *xl = NULL; + char *aa = NULL; + int ret = 0; + char *key = NULL; + char *auth_path = NULL; - GF_ASSERT (dgraph->first); + if (strcmp(vme->option, "!server-auth") != 0) + return 0; - if (volgen_xlator_link (first_of (dgraph), first_of (sgraph)) == -1) - return -1; + xl = first_of(graph); + + /* from 'auth.allow' -> 'allow', and 'auth.reject' -> 'reject' */ + key = strchr(vme->key, '.') + 1; + + ret = xlator_get_fixed_option(xl, "auth-path", &auth_path); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DEFAULT_OPT_INFO, + "Failed to get auth-path from server graph"); + return -1; + } + ret = gf_asprintf(&aa, "auth.addr.%s.%s", auth_path, key); + if (ret != -1) { + ret = xlator_set_option(xl, aa, ret, vme->value); + GF_FREE(aa); + } + if (ret) + return -1; - for (trav = first_of (dgraph); trav->next; trav = trav->next); + return 0; +} + +static int +loglevel_option_handler(volgen_graph_t *graph, struct volopt_map_entry *vme, + void *param) +{ + char *role = param; + struct volopt_map_entry vme2 = { + 0, + }; + + if ((strcmp(vme->option, "!client-log-level") != 0 && + strcmp(vme->option, "!brick-log-level") != 0) || + !strstr(vme->key, role)) + return 0; - trav->next = sgraph->first; - trav->next->prev = trav; - dgraph->xl_count += sgraph->xl_count; + memcpy(&vme2, vme, sizeof(vme2)); + vme2.option = "log-level"; + return basic_option_handler(graph, &vme2, NULL); +} + +static int +threads_option_handler(volgen_graph_t *graph, struct volopt_map_entry *vme, + void *param) +{ + char *role = param; + struct volopt_map_entry vme2 = { + 0, + }; + + if ((strcmp(vme->option, "!client-threads") != 0 && + strcmp(vme->option, "!brick-threads") != 0) || + !strstr(vme->key, role)) return 0; + + memcpy(&vme2, vme, sizeof(vme2)); + vme2.option = "threads"; + + return basic_option_handler(graph, &vme2, NULL); } static int -volgen_write_volfile (glusterfs_graph_t *graph, char *filename) +server_check_changelog_off(volgen_graph_t *graph, struct volopt_map_entry *vme, + glusterd_volinfo_t *volinfo) { - char *ftmp = NULL; - FILE *f = NULL; + gf_boolean_t enabled = _gf_false; + int ret = 0; + + GF_ASSERT(volinfo); + GF_ASSERT(vme); - if (gf_asprintf (&ftmp, "%s.tmp", filename) == -1) { - ftmp = NULL; + if (strcmp(vme->option, "changelog") != 0) + return 0; - goto error; + ret = gf_string2boolean(vme->value, &enabled); + if (ret || enabled) + goto out; + + ret = glusterd_volinfo_get_boolean(volinfo, VKEY_CHANGELOG); + if (ret < 0) { + gf_msg("glusterd", GF_LOG_WARNING, 0, GD_MSG_CHANGELOG_GET_FAIL, + "failed to get the changelog status"); + ret = -1; + goto out; + } + + if (ret) { + enabled = _gf_false; + glusterd_check_geo_rep_configured(volinfo, &enabled); + + if (enabled) { + gf_msg("glusterd", GF_LOG_WARNING, 0, GD_MSG_XLATOR_SET_OPT_FAIL, + GEOREP + " sessions active" + "for the volume %s, cannot disable changelog ", + volinfo->volname); + set_graph_errstr(graph, VKEY_CHANGELOG + " cannot be disabled " + "while " GEOREP " sessions exist"); + ret = -1; + goto out; } + } - f = fopen (ftmp, "w"); - if (!f) - goto error; + ret = 0; +out: + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; +} - if (glusterfs_graph_print_file (f, graph) == -1) - goto error; +static int +server_check_marker_off(volgen_graph_t *graph, struct volopt_map_entry *vme, + glusterd_volinfo_t *volinfo) +{ + gf_boolean_t enabled = _gf_false; + int ret = 0; - if (fclose (f) == -1) - goto error; - f = NULL; + GF_ASSERT(volinfo); + GF_ASSERT(vme); + + if (strcmp(vme->option, "!xtime") != 0) + return 0; + + ret = gf_string2boolean(vme->value, &enabled); + if (ret || enabled) + goto out; + + ret = glusterd_volinfo_get_boolean(volinfo, VKEY_MARKER_XTIME); + if (ret < 0) { + gf_msg("glusterd", GF_LOG_WARNING, 0, GD_MSG_MARKER_STATUS_GET_FAIL, + "failed to get the marker status"); + ret = -1; + goto out; + } + + if (ret) { + enabled = _gf_false; + glusterd_check_geo_rep_configured(volinfo, &enabled); + + if (enabled) { + gf_msg("glusterd", GF_LOG_WARNING, 0, GD_MSG_MARKER_DISABLE_FAIL, + GEOREP + " sessions active" + "for the volume %s, cannot disable marker ", + volinfo->volname); + set_graph_errstr(graph, VKEY_MARKER_XTIME + " cannot be disabled " + "while " GEOREP " sessions exist"); + ret = -1; + goto out; + } + } + + ret = 0; +out: + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; +} - if (rename (ftmp, filename) == -1) - goto error; +static int +sys_loglevel_option_handler(volgen_graph_t *graph, struct volopt_map_entry *vme, + void *param) +{ + char *role = NULL; + struct volopt_map_entry vme2 = { + 0, + }; - GF_FREE (ftmp); + role = (char *)param; + if (strcmp(vme->option, "!sys-log-level") != 0 || !strstr(vme->key, role)) return 0; - error: + memcpy(&vme2, vme, sizeof(vme2)); + vme2.option = "sys-log-level"; - if (ftmp) - GF_FREE (ftmp); - if (f) - fclose (f); + return basic_option_handler(graph, &vme2, NULL); +} - gf_log ("", GF_LOG_ERROR, "failed to create volfile %s", filename); +static int +logger_option_handler(volgen_graph_t *graph, struct volopt_map_entry *vme, + void *param) +{ + char *role = NULL; + struct volopt_map_entry vme2 = { + 0, + }; - return -1; + role = (char *)param; + + if (strcmp(vme->option, "!logger") != 0 || !strstr(vme->key, role)) + return 0; + + memcpy(&vme2, vme, sizeof(vme2)); + vme2.option = "logger"; + + return basic_option_handler(graph, &vme2, NULL); } -static void -volgen_graph_free (glusterfs_graph_t *graph) +static int +log_format_option_handler(volgen_graph_t *graph, struct volopt_map_entry *vme, + void *param) { - xlator_t *trav = NULL; - xlator_t *trav_old = NULL; + char *role = NULL; + struct volopt_map_entry vme2 = { + 0, + }; - for (trav = first_of (graph) ;; trav = trav->next) { - if (trav_old) - xlator_destroy (trav_old); + role = (char *)param; - trav_old = trav; + if (strcmp(vme->option, "!log-format") != 0 || !strstr(vme->key, role)) + return 0; - if (!trav) - break; - } + memcpy(&vme2, vme, sizeof(vme2)); + vme2.option = "log-format"; + + return basic_option_handler(graph, &vme2, NULL); } static int -build_graph_generic (glusterfs_graph_t *graph, glusterd_volinfo_t *volinfo, - dict_t *mod_dict, void *param, - int (*builder) (glusterfs_graph_t *graph, - glusterd_volinfo_t *volinfo, - dict_t *set_dict, void *param)) +log_localtime_logging_option_handler(volgen_graph_t *graph, + struct volopt_map_entry *vme, void *param) { - dict_t *set_dict = NULL; - int ret = 0; + char *role = NULL; + struct volopt_map_entry vme2 = { + 0, + }; - if (mod_dict) { - set_dict = dict_copy (volinfo->dict, NULL); - if (!set_dict) - return -1; - dict_copy (mod_dict, set_dict); - /* XXX dict_copy swallows errors */ - } else - set_dict = volinfo->dict; + role = (char *)param; - ret = builder (graph, volinfo, set_dict, param); - if (!ret) - ret = volgen_graph_set_options (graph, set_dict); + if (strcmp(vme->option, "!cluster.localtime-logging") != 0 || + !strstr(vme->key, role)) + return 0; - if (mod_dict) - dict_destroy (set_dict); + memcpy(&vme2, vme, sizeof(vme2)); + vme2.option = GLUSTERD_LOCALTIME_LOGGING_KEY; - return ret; + return basic_option_handler(graph, &vme2, NULL); } -static void -get_vol_transport_type (glusterd_volinfo_t *volinfo, char *tt) +static int +log_buf_size_option_handler(volgen_graph_t *graph, struct volopt_map_entry *vme, + void *param) { - switch (volinfo->transport_type) { - case GF_TRANSPORT_RDMA: - strcpy (tt, "rdma"); - break; - case GF_TRANSPORT_TCP: - strcpy (tt, "tcp"); - break; - case GF_TRANSPORT_BOTH_TCP_RDMA: - strcpy (tt, "tcp,rdma"); - break; - } + char *role = NULL; + struct volopt_map_entry vme2 = { + 0, + }; + + role = (char *)param; + + if (strcmp(vme->option, "!log-buf-size") != 0 || !strstr(vme->key, role)) + return 0; + + memcpy(&vme2, vme, sizeof(vme2)); + vme2.option = "log-buf-size"; + + return basic_option_handler(graph, &vme2, NULL); } static int -server_auth_option_handler (glusterfs_graph_t *graph, - struct volopt_map_entry *vme, void *param) +log_flush_timeout_option_handler(volgen_graph_t *graph, + struct volopt_map_entry *vme, void *param) { - xlator_t *xl = NULL; - xlator_list_t *trav = NULL; - char *aa = NULL; - int ret = 0; - char *key = NULL; + char *role = NULL; + struct volopt_map_entry vme2 = { + 0, + }; - if (strcmp (vme->option, "!server-auth") != 0) - return 0; + role = (char *)param; - xl = first_of (graph); + if (strcmp(vme->option, "!log-flush-timeout") != 0 || + !strstr(vme->key, role)) + return 0; - /* from 'auth.allow' -> 'allow', and 'auth.reject' -> 'reject' */ - key = strchr (vme->key, '.') + 1; + memcpy(&vme2, vme, sizeof(vme2)); + vme2.option = "log-flush-timeout"; - for (trav = xl->children; trav; trav = trav->next) { - ret = gf_asprintf (&aa, "auth.addr.%s.%s", trav->xlator->name, - key); - if (ret != -1) { - ret = xlator_set_option (xl, aa, vme->value); - GF_FREE (aa); - } - if (ret) - return -1; + return basic_option_handler(graph, &vme2, NULL); +} + +static int +volgen_graph_set_xl_options(volgen_graph_t *graph, dict_t *dict) +{ + int32_t ret = -1; + char *xlator = NULL; + char xlator_match[1024] = { + 0, + }; /* for posix* -> *posix* */ + char *loglevel = NULL; + xlator_t *trav = NULL; + xlator_t *this = THIS; + GF_ASSERT(this); + + ret = dict_get_str_sizen(dict, "xlator", &xlator); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=xlator", NULL); + goto out; + } + + ret = dict_get_str_sizen(dict, "loglevel", &loglevel); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=loglevel", NULL); + goto out; + } + + snprintf(xlator_match, 1024, "*%s", xlator); + + for (trav = first_of(graph); trav; trav = trav->next) { + if (fnmatch(xlator_match, trav->type, FNM_NOESCAPE) == 0) { + gf_msg_debug("glusterd", 0, "Setting log level for xlator: %s", + trav->type); + ret = xlator_set_fixed_option(trav, "log-level", loglevel); + if (ret) + break; } + } - return 0; +out: + return ret; } static int -loglevel_option_handler (glusterfs_graph_t *graph, - struct volopt_map_entry *vme, void *param) +server_spec_option_handler(volgen_graph_t *graph, struct volopt_map_entry *vme, + void *param) { - char *role = param; - struct volopt_map_entry vme2 = {0,}; + int ret = 0; + glusterd_volinfo_t *volinfo = NULL; - if (strcmp (vme->option, "!log-level") != 0 || - !strstr (vme->key, role)) - return 0; + volinfo = param; + + ret = server_auth_option_handler(graph, vme, NULL); + if (!ret) + ret = server_check_marker_off(graph, vme, volinfo); + + if (!ret) + ret = server_check_changelog_off(graph, vme, volinfo); + + if (!ret) + ret = loglevel_option_handler(graph, vme, "brick"); - memcpy (&vme2, vme, sizeof (vme2)); - vme2.option = "log-level"; + if (!ret) + ret = sys_loglevel_option_handler(graph, vme, "brick"); - return basic_option_handler (graph, &vme2, NULL); + if (!ret) + ret = logger_option_handler(graph, vme, "brick"); + + if (!ret) + ret = log_format_option_handler(graph, vme, "brick"); + + if (!ret) + ret = log_buf_size_option_handler(graph, vme, "brick"); + + if (!ret) + ret = log_flush_timeout_option_handler(graph, vme, "brick"); + + if (!ret) + ret = log_localtime_logging_option_handler(graph, vme, "brick"); + + if (!ret) + ret = threads_option_handler(graph, vme, "brick"); + + return ret; } static int -server_spec_option_handler (glusterfs_graph_t *graph, - struct volopt_map_entry *vme, void *param) +server_spec_extended_option_handler(volgen_graph_t *graph, + struct volopt_map_entry *vme, void *param) { - int ret = 0; + int ret = 0; + dict_t *dict = NULL; - ret = server_auth_option_handler (graph, vme, NULL); - if (!ret) - ret = loglevel_option_handler (graph, vme, "brick"); + GF_ASSERT(param); + dict = (dict_t *)param; - return ret; + ret = server_auth_option_handler(graph, vme, NULL); + if (!ret) + ret = volgen_graph_set_xl_options(graph, dict); + + return ret; } static void -get_vol_tstamp_file (char *filename, glusterd_volinfo_t *volinfo) +get_vol_tstamp_file(char *filename, glusterd_volinfo_t *volinfo); + +static int +gfproxy_server_graph_builder(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + dict_t *set_dict, void *param) { - glusterd_conf_t *priv = NULL; + xlator_t *xl = NULL; + /*char *value = NULL;*/ + char transt[16] = { + 0, + }; + char key[1024] = { + 0, + }; + int keylen; + /*char port_str[7] = {0, };*/ + int ret = 0; + char *username = NULL; + char *password = NULL; + /*int rclusters = 0;*/ + + xlator_t *this = THIS; + GF_ASSERT(this); + /* We are a trusted client */ + ret = dict_set_uint32(set_dict, "trusted-client", GF_CLIENT_TRUSTED); + if (ret != 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=trusted-client", NULL); + goto out; + } + + ret = dict_set_int32_sizen(set_dict, "gfproxy-server", 1); + if (ret != 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=gfproxy-server", NULL); + goto out; + } + + /* Build the client section of the graph first */ + build_client_graph(graph, volinfo, set_dict); + + /* Clear this setting so that future users of set_dict do not end up + * thinking they are a gfproxy server */ + dict_del_sizen(set_dict, "gfproxy-server"); + dict_del_sizen(set_dict, "trusted-client"); + + /* Then add the server to it */ + get_vol_transport_type(volinfo, transt); + xl = volgen_graph_add(graph, "protocol/server", volinfo->volname); + if (!xl) + goto out; + + ret = xlator_set_fixed_option(xl, "transport-type", transt); + if (ret != 0) + goto out; + + /* Set username and password */ + username = glusterd_auth_get_username(volinfo); + password = glusterd_auth_get_password(volinfo); + if (username) { + keylen = snprintf(key, sizeof(key), "auth.login.gfproxyd-%s.allow", + volinfo->volname); + ret = xlator_set_option(xl, key, keylen, username); + if (ret) + return -1; + } - priv = THIS->private; + if (password) { + keylen = snprintf(key, sizeof(key), "auth.login.%s.password", username); + ret = xlator_set_option(xl, key, keylen, password); + if (ret != 0) + goto out; + } - GLUSTERD_GET_VOLUME_DIR (filename, volinfo, priv); - strncat (filename, "/marker.tstamp", - PATH_MAX - strlen(filename) - 1); + snprintf(key, sizeof(key), "gfproxyd-%s", volinfo->volname); + ret = xlator_set_fixed_option(xl, "auth-path", key); + +out: + return ret; } -static int32_t -glusterd_gsync_option_set (glusterd_volinfo_t *volinfo, - xlator_t *xl, dict_t *set_dict) +static int +brick_graph_add_posix(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + dict_t *set_dict, glusterd_brickinfo_t *brickinfo) { - int32_t ret = -1; - char *gsync_val = NULL; - gf_boolean_t gsync = _gf_false; - char volume_id [64] = {0, }; - char tstamp_file[PATH_MAX] = {0,}; + char tmpstr[10] = { + 0, + }; + int ret = -1; + gf_boolean_t quota_enabled = _gf_true; + gf_boolean_t trash_enabled = _gf_false; + gf_boolean_t pgfid_feat = _gf_false; + char *value = NULL; + xlator_t *xl = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + + if (!graph || !volinfo || !set_dict || !brickinfo) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); + goto out; + } + + priv = this->private; + GF_VALIDATE_OR_GOTO("glusterd", priv, out); + + ret = glusterd_volinfo_get(volinfo, VKEY_FEATURES_QUOTA, &value); + if (value) { + ret = gf_string2boolean(value, "a_enabled); + if (ret) + goto out; + } - GF_VALIDATE_OR_GOTO ("glusterd", volinfo, out); - GF_VALIDATE_OR_GOTO ("glusterd", xl, out); - GF_VALIDATE_OR_GOTO ("glusterd", set_dict, out); + ret = glusterd_volinfo_get(volinfo, VKEY_FEATURES_TRASH, &value); + if (value) { + ret = gf_string2boolean(value, &trash_enabled); + if (ret) + goto out; + } - ret = volgen_dict_get (set_dict, "features.marker-gsync", - &gsync_val); + ret = glusterd_volinfo_get(volinfo, "update-link-count-parent", &value); + if (value) { + ret = gf_string2boolean(value, &pgfid_feat); if (ret) - return -1; + goto out; + } - if (gsync_val) - ret = gf_string2boolean (gsync_val, &gsync); + ret = -1; + + xl = volgen_graph_add(graph, "storage/posix", volinfo->volname); + if (!xl) + goto out; + + ret = xlator_set_fixed_option(xl, "directory", brickinfo->path); + if (ret) + goto out; + + ret = xlator_set_fixed_option(xl, "volume-id", + uuid_utoa(volinfo->volume_id)); + if (ret) + goto out; + + if (quota_enabled || pgfid_feat || trash_enabled) { + ret = xlator_set_fixed_option(xl, "update-link-count-parent", "on"); if (ret) { - gf_log ("", GF_LOG_ERROR, - "value for marker-gsync option is junk"); - return -1; - } - get_vol_tstamp_file (tstamp_file, volinfo); - if (gsync == _gf_false) { - ret = unlink (tstamp_file); - if (ret == -1 && errno == ENOENT) - ret = 0; - if (ret == -1) { - gf_log ("", GF_LOG_ERROR, "failed to unlink %s (%s)", - tstamp_file, strerror (errno)); - return -1; - } - goto out; + goto out; } + } - ret = open (tstamp_file, O_WRONLY|O_CREAT|O_EXCL, 0644); - if (ret == -1 && errno == EEXIST) { - gf_log ("", GF_LOG_DEBUG, "timestamp file exist"); - ret = -2; - } - if (ret == -1) { - gf_log ("", GF_LOG_WARNING, "failed to create %s (%s)", - tstamp_file, strerror (errno)); - return -1; + if (priv->op_version >= GD_OP_VERSION_7_0) { + ret = xlator_set_fixed_option(xl, "fips-mode-rchecksum", "on"); + if (ret) { + goto out; } - if (ret >= 0) - close (ret); + } + snprintf(tmpstr, sizeof(tmpstr), "%d", brickinfo->fs_share_count); + ret = xlator_set_fixed_option(xl, "shared-brick-count", tmpstr); +out: + return ret; +} - uuid_unparse (volinfo->volume_id, volume_id); - ret = xlator_set_option (xl, "volume-uuid", volume_id); - if (ret) - return -1; +static int +brick_graph_add_selinux(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + dict_t *set_dict, glusterd_brickinfo_t *brickinfo) +{ + xlator_t *xl = NULL; + int ret = -1; + xlator_t *this = THIS; + GF_ASSERT(this); - ret = xlator_set_option (xl, "timestamp-file", tstamp_file); - if (ret) - return -1; + if (!graph || !volinfo) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); + goto out; + } - ret = 0; + xl = volgen_graph_add(graph, "features/selinux", volinfo->volname); + if (!xl) + goto out; + + ret = 0; out: - return ret; + return ret; } static int -server_graph_builder (glusterfs_graph_t *graph, glusterd_volinfo_t *volinfo, - dict_t *set_dict, void *param) +brick_graph_add_trash(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + dict_t *set_dict, glusterd_brickinfo_t *brickinfo) { - char *volname = NULL; - char *path = NULL; - int pump = 0; - xlator_t *xl = NULL; - xlator_t *txl = NULL; - xlator_t *rbxl = NULL; - char transt[16] = {0,}; - int ret = 0; + int ret = -1; + xlator_t *xl = NULL; + + xl = volgen_graph_add(graph, "features/trash", volinfo->volname); + if (!xl) + goto out; + ret = xlator_set_fixed_option(xl, "trash-dir", ".trashcan"); + if (ret) + goto out; + ret = xlator_set_fixed_option(xl, "brick-path", brickinfo->path); + if (ret) + goto out; + ret = xlator_set_fixed_option(xl, "trash-internal-op", "off"); + if (ret) + goto out; +out: + return ret; +} - path = param; - volname = volinfo->volname; - get_vol_transport_type (volinfo, transt); +static int +brick_graph_add_arbiter(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + dict_t *set_dict, glusterd_brickinfo_t *brickinfo) +{ + xlator_t *xl = NULL; + glusterd_brickinfo_t *last = NULL; + int ret = -1; - xl = volgen_graph_add (graph, "storage/posix", volname); - if (!xl) - return -1; + if (volinfo->arbiter_count != 1) + return 0; - ret = xlator_set_option (xl, "directory", path); - if (ret) - return -1; + /* Add arbiter only if it is the last (i.e. 3rd) brick. */ + last = get_last_brick_of_brick_group(volinfo, brickinfo); + if (last != brickinfo) + return 0; - xl = volgen_graph_add (graph, "features/access-control", volname); - if (!xl) - return -1; + xl = volgen_graph_add(graph, "features/arbiter", volinfo->volname); + if (!xl) + goto out; + ret = 0; +out: + return ret; +} - xl = volgen_graph_add (graph, "features/locks", volname); - if (!xl) - return -1; +static int +brick_graph_add_bitrot_stub(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + dict_t *set_dict, glusterd_brickinfo_t *brickinfo) +{ + xlator_t *xl = NULL; + int ret = -1; + char *value = NULL; + xlator_t *this = THIS; + + if (!graph || !volinfo || !set_dict || !brickinfo) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); + goto out; + } + + xl = volgen_graph_add(graph, "features/bitrot-stub", volinfo->volname); + if (!xl) + goto out; + + ret = xlator_set_fixed_option(xl, "export", brickinfo->path); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, + "failed to set the export " + "option in bit-rot-stub"); + goto out; + } + + ret = glusterd_volinfo_get(volinfo, VKEY_FEATURES_BITROT, &value); + ret = xlator_set_fixed_option(xl, "bitrot", value); + if (ret) + gf_log(this->name, GF_LOG_WARNING, + "failed to set bitrot " + "enable option in bit-rot-stub"); - ret = dict_get_int32 (volinfo->dict, "enable-pump", &pump); - if (ret == -ENOENT) - ret = pump = 0; +out: + return ret; +} + +static int +brick_graph_add_changelog(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + dict_t *set_dict, glusterd_brickinfo_t *brickinfo) +{ + xlator_t *xl = NULL; + char changelog_basepath[PATH_MAX] = { + 0, + }; + int ret = -1; + int32_t len = 0; + xlator_t *this = THIS; + GF_ASSERT(this); + + if (!graph || !volinfo || !set_dict || !brickinfo) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); + goto out; + } + + xl = volgen_graph_add(graph, "features/changelog", volinfo->volname); + if (!xl) + goto out; + + ret = xlator_set_fixed_option(xl, "changelog-brick", brickinfo->path); + if (ret) + goto out; + + len = snprintf(changelog_basepath, sizeof(changelog_basepath), "%s/%s", + brickinfo->path, ".glusterfs/changelogs"); + if ((len < 0) || (len >= sizeof(changelog_basepath))) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); + ret = -1; + goto out; + } + ret = xlator_set_fixed_option(xl, "changelog-dir", changelog_basepath); + if (ret) + goto out; + + ret = glusterd_is_bitrot_enabled(volinfo); + if (ret == -1) { + goto out; + } else if (ret) { + ret = xlator_set_fixed_option(xl, "changelog-notification", "on"); if (ret) - return -1; - if (pump) { - txl = first_of (graph); - - rbxl = volgen_graph_add_nolink (graph, "protocol/client", - "%s-replace-brick", volname); - if (!rbxl) - return -1; - ret = xlator_set_option (rbxl, "transport-type", transt); - if (ret) - return -1; - xl = volgen_graph_add_nolink (graph, "cluster/pump", "%s-pump", - volname); - if (!xl) - return -1; - ret = volgen_xlator_link (xl, txl); - if (ret) - return -1; - ret = volgen_xlator_link (xl, rbxl); - if (ret) - return -1; - } + goto out; + } else { + ret = xlator_set_fixed_option(xl, "changelog-notification", "off"); + if (ret) + goto out; + } +out: + return ret; +} - xl = volgen_graph_add (graph, "performance/io-threads", volname); - if (!xl) - return -1; +static int +brick_graph_add_acl(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + dict_t *set_dict, glusterd_brickinfo_t *brickinfo) +{ + xlator_t *xl = NULL; + int ret = -1; + xlator_t *this = THIS; + GF_ASSERT(this); + + if (!graph || !volinfo || !set_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); + goto out; + } + + ret = dict_get_str_boolean(set_dict, "features.acl", 1); + if (!ret) { + /* Skip creating this volume if option is disabled */ + /* By default, this is 'true' */ + goto out; + } else if (ret < 0) { + /* lets not treat this as error, as this option is not critical, + and implemented for debug help */ + gf_log(THIS->name, GF_LOG_INFO, + "failed to get 'features.acl' flag from dict"); + } + + xl = volgen_graph_add(graph, "features/access-control", volinfo->volname); + if (!xl) { + ret = -1; + goto out; + } + ret = 0; +out: + return ret; +} - xl = volgen_graph_add (graph, "features/marker", volname); - if (!xl) - return -1; +static int +brick_graph_add_locks(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + dict_t *set_dict, glusterd_brickinfo_t *brickinfo) +{ + xlator_t *xl = NULL; + int ret = -1; + xlator_t *this = THIS; + GF_ASSERT(this); - ret = glusterd_gsync_option_set (volinfo, xl, set_dict); - if (ret < 0) - return -1; + if (!graph || !volinfo || !set_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); + goto out; + } - xl = volgen_graph_add_as (graph, "debug/io-stats", path); - if (!xl) - return -1; + xl = volgen_graph_add(graph, "features/locks", volinfo->volname); + if (!xl) + goto out; + + ret = 0; +out: + return ret; +} + +static int +brick_graph_add_iot(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + dict_t *set_dict, glusterd_brickinfo_t *brickinfo) +{ + xlator_t *xl = NULL; + int ret = -1; + xlator_t *this = THIS; + GF_ASSERT(this); + + if (!graph || !volinfo || !set_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); + goto out; + } + + xl = volgen_graph_add(graph, "performance/io-threads", volinfo->volname); + if (!xl) + goto out; + ret = 0; +out: + return ret; +} + +static int +brick_graph_add_barrier(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + dict_t *set_dict, glusterd_brickinfo_t *brickinfo) +{ + xlator_t *xl = NULL; + int ret = -1; + xlator_t *this = THIS; + + if (!graph || !volinfo) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); + goto out; + } - xl = volgen_graph_add (graph, "protocol/server", volname); + xl = volgen_graph_add(graph, "features/barrier", volinfo->volname); + if (!xl) + goto out; + + ret = 0; +out: + return ret; +} + +static int +brick_graph_add_sdfs(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + dict_t *set_dict, glusterd_brickinfo_t *brickinfo) +{ + xlator_t *xl = NULL; + int ret = -1; + xlator_t *this = THIS; + GF_ASSERT(this); + + if (!graph || !volinfo) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); + goto out; + } + + if (!dict_get_str_boolean(set_dict, "features.sdfs", 0)) { + /* update only if option is enabled */ + ret = 0; + goto out; + } + + xl = volgen_graph_add(graph, "features/sdfs", volinfo->volname); + if (!xl) + goto out; + /* If we don't set this option here, the translator by default marks + it 'pass-through' */ + ret = xlator_set_fixed_option(xl, "pass-through", "false"); + if (ret) + goto out; + + ret = 0; +out: + return ret; +} + +static int +brick_graph_add_namespace(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + dict_t *set_dict, glusterd_brickinfo_t *brickinfo) +{ + xlator_t *xl = NULL; + int ret = -1; + xlator_t *this = THIS; + GF_ASSERT(this); + + if (!graph || !volinfo || !set_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); + goto out; + } + + ret = dict_get_str_boolean(set_dict, "features.tag-namespaces", 0); + if (ret == -1) + goto out; + + if (ret) { + xl = volgen_graph_add(graph, "features/namespace", volinfo->volname); if (!xl) - return -1; - ret = xlator_set_option (xl, "transport-type", transt); - if (ret) - return -1; + goto out; + } - ret = volgen_graph_set_options_generic (graph, set_dict, NULL, - &server_spec_option_handler); + ret = 0; +out: + return ret; +} - return ret; +xlator_t * +add_one_peer(volgen_graph_t *graph, glusterd_brickinfo_t *peer, char *volname, + uint16_t index) +{ + xlator_t *kid; + + kid = volgen_graph_add_nolink(graph, "protocol/client", "%s-client-%u", + volname, index++); + if (!kid) { + return NULL; + } + + /* TBD: figure out where to get the proper transport list */ + if (xlator_set_fixed_option(kid, "transport-type", "socket")) { + return NULL; + } + if (xlator_set_fixed_option(kid, "remote-host", peer->hostname)) { + return NULL; + } + if (xlator_set_fixed_option(kid, "remote-subvolume", peer->path)) { + return NULL; + } + /* TBD: deal with RDMA, SSL */ + + return kid; +} + +static int +brick_graph_add_index(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + dict_t *set_dict, glusterd_brickinfo_t *brickinfo) +{ + xlator_t *xl = NULL; + char *pending_xattr = NULL; + char index_basepath[PATH_MAX] = {0}; + int ret = -1; + int32_t len = 0; + xlator_t *this = THIS; + GF_ASSERT(this); + + if (!graph || !volinfo || !brickinfo || !set_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); + goto out; + } + + xl = volgen_graph_add(graph, "features/index", volinfo->volname); + if (!xl) + goto out; + + len = snprintf(index_basepath, sizeof(index_basepath), "%s/%s", + brickinfo->path, ".glusterfs/indices"); + if ((len < 0) || (len >= sizeof(index_basepath))) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); + goto out; + } + + ret = xlator_set_fixed_option(xl, "index-base", index_basepath); + if (ret) + goto out; + if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE) { + ret = xlator_set_fixed_option(xl, "xattrop64-watchlist", + "trusted.ec.dirty"); + if (ret) + goto out; + } + if ((volinfo->type == GF_CLUSTER_TYPE_REPLICATE || + volinfo->type == GF_CLUSTER_TYPE_NONE)) { + ret = xlator_set_fixed_option(xl, "xattrop-dirty-watchlist", + "trusted.afr.dirty"); + if (ret) + goto out; + ret = gf_asprintf(&pending_xattr, "trusted.afr.%s-", volinfo->volname); + if (ret < 0) + goto out; + ret = xlator_set_fixed_option(xl, "xattrop-pending-watchlist", + pending_xattr); + if (ret) + goto out; + } +out: + GF_FREE(pending_xattr); + return ret; } +static int +brick_graph_add_marker(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + dict_t *set_dict, glusterd_brickinfo_t *brickinfo) +{ + int ret = -1; + xlator_t *xl = NULL; + char tstamp_file[PATH_MAX] = { + 0, + }; + char volume_id[64] = { + 0, + }; + char buf[32] = { + 0, + }; + xlator_t *this = THIS; + GF_ASSERT(this); + + if (!graph || !volinfo || !set_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); + goto out; + } + + xl = volgen_graph_add(graph, "features/marker", volinfo->volname); + if (!xl) + goto out; + + gf_uuid_unparse(volinfo->volume_id, volume_id); + ret = xlator_set_fixed_option(xl, "volume-uuid", volume_id); + if (ret) + goto out; + get_vol_tstamp_file(tstamp_file, volinfo); + ret = xlator_set_fixed_option(xl, "timestamp-file", tstamp_file); + if (ret) + goto out; + + snprintf(buf, sizeof(buf), "%d", volinfo->quota_xattr_version); + ret = xlator_set_fixed_option(xl, "quota-version", buf); + if (ret) + goto out; + +out: + return ret; +} -/* builds a graph for server role , with option overrides in mod_dict */ static int -build_server_graph (glusterfs_graph_t *graph, glusterd_volinfo_t *volinfo, - dict_t *mod_dict, char *path) +brick_graph_add_quota(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + dict_t *set_dict, glusterd_brickinfo_t *brickinfo) { - return build_graph_generic (graph, volinfo, mod_dict, path, - &server_graph_builder); + int ret = -1; + xlator_t *xl = NULL; + char *value = NULL; + xlator_t *this = THIS; + GF_ASSERT(this); + + if (!graph || !volinfo || !set_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); + goto out; + } + + xl = volgen_graph_add(graph, "features/quota", volinfo->volname); + if (!xl) + goto out; + + ret = xlator_set_fixed_option(xl, "volume-uuid", volinfo->volname); + if (ret) + goto out; + + ret = glusterd_volinfo_get(volinfo, VKEY_FEATURES_QUOTA, &value); + if (value) { + ret = xlator_set_fixed_option(xl, "server-quota", value); + if (ret) + goto out; + } +out: + return ret; } static int -perfxl_option_handler (glusterfs_graph_t *graph, struct volopt_map_entry *vme, - void *param) +brick_graph_add_ro(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + dict_t *set_dict, glusterd_brickinfo_t *brickinfo) { - char *volname = NULL; - gf_boolean_t enabled = _gf_false; + int ret = -1; + xlator_t *xl = NULL; + xlator_t *this = THIS; + GF_ASSERT(this); + + if (!graph || !volinfo || !set_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); + goto out; + } + + if (dict_get_str_boolean(set_dict, "features.read-only", 0) && + (dict_get_str_boolean(set_dict, "features.worm", 0) || + dict_get_str_boolean(set_dict, "features.worm-file-level", 0))) { + gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "read-only and worm cannot be set together"); + ret = -1; + goto out; + } + + xl = volgen_graph_add(graph, "features/read-only", volinfo->volname); + if (!xl) + return -1; + ret = xlator_set_fixed_option(xl, "read-only", "off"); + if (ret) + return -1; - volname = param; + ret = 0; - if (strcmp (vme->option, "!perf") != 0) - return 0; +out: + return ret; +} - if (gf_string2boolean (vme->value, &enabled) == -1) - return -1; - if (!enabled) - return 0; +static int +brick_graph_add_worm(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + dict_t *set_dict, glusterd_brickinfo_t *brickinfo) +{ + int ret = -1; + xlator_t *xl = NULL; + xlator_t *this = THIS; + GF_ASSERT(this); + + if (!graph || !volinfo || !set_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); + goto out; + } + + if (dict_get_str_boolean(set_dict, "features.read-only", 0) && + (dict_get_str_boolean(set_dict, "features.worm", 0) || + dict_get_str_boolean(set_dict, "features.worm-file-level", 0))) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_INCOMPATIBLE_VALUE, + "read-only and worm cannot be set together"); + ret = -1; + goto out; + } + + xl = volgen_graph_add(graph, "features/worm", volinfo->volname); + if (!xl) + return -1; - if (volgen_graph_add (graph, vme->voltype, volname)) - return 0; - else - return -1; + ret = 0; + +out: + return ret; } static int -client_graph_builder (glusterfs_graph_t *graph, glusterd_volinfo_t *volinfo, - dict_t *set_dict, void *param) -{ - int dist_count = 0; - char transt[16] = {0,}; - char *tt = NULL; - char *volname = NULL; - dict_t *dict = NULL; - glusterd_brickinfo_t *brick = NULL; - char *replicate_args[] = {"cluster/replicate", - "%s-replicate-%d"}; - char *stripe_args[] = {"cluster/stripe", - "%s-stripe-%d"}; - char **cluster_args = NULL; - int i = 0; - int j = 0; - int ret = 0; - xlator_t *xl = NULL; - xlator_t *txl = NULL; - xlator_t *trav = NULL; - char *quota_val = NULL; - gf_boolean_t quota = _gf_false; +brick_graph_add_cdc(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + dict_t *set_dict, glusterd_brickinfo_t *brickinfo) +{ + int ret = -1; + xlator_t *xl = NULL; + xlator_t *this = THIS; + GF_ASSERT(this); + + if (!graph || !volinfo || !set_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); + goto out; + } + + /* Check for compress volume option, and add it to the graph on + * server side */ + ret = dict_get_str_boolean(set_dict, "network.compression", 0); + if (ret == -1) + goto out; + if (ret) { + xl = volgen_graph_add(graph, "features/cdc", volinfo->volname); + if (!xl) { + ret = -1; + goto out; + } + ret = xlator_set_fixed_option(xl, "mode", "server"); + if (ret) + goto out; + } +out: + return ret; +} - volname = volinfo->volname; - dict = volinfo->dict; - GF_ASSERT (dict); +static int +brick_graph_add_io_stats(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + dict_t *set_dict, glusterd_brickinfo_t *brickinfo) +{ + int ret = -1; + xlator_t *xl = NULL; + xlator_t *this = THIS; + glusterd_conf_t *priv = this->private; + + if (!graph || !set_dict || !brickinfo) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); + goto out; + } + + xl = volgen_graph_add_as(graph, "debug/io-stats", brickinfo->path); + if (!xl) + goto out; + + ret = xlator_set_fixed_option(xl, "unique-id", brickinfo->path); + if (ret) + goto out; + + if (priv->op_version >= GD_OP_VERSION_7_1) { + ret = xlator_set_fixed_option(xl, "volume-id", + uuid_utoa(volinfo->volume_id)); + if (ret) + goto out; + } - if (volinfo->brick_count == 0) { - gf_log ("", GF_LOG_ERROR, - "volume inconsistency: brick count is 0"); + ret = 0; +out: + return ret; +} - return -1; +static int +brick_graph_add_upcall(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + dict_t *set_dict, glusterd_brickinfo_t *brickinfo) +{ + xlator_t *xl = NULL; + int ret = -1; + xlator_t *this = THIS; + GF_ASSERT(this); + + if (!graph || !volinfo || !set_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); + goto out; + } + + xl = volgen_graph_add(graph, "features/upcall", volinfo->volname); + if (!xl) { + gf_msg("glusterd", GF_LOG_WARNING, 0, GD_MSG_GRAPH_FEATURE_ADD_FAIL, + "failed to add features/upcall to graph"); + goto out; + } + + ret = 0; +out: + return ret; +} + +static int +brick_graph_add_leases(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + dict_t *set_dict, glusterd_brickinfo_t *brickinfo) +{ + xlator_t *xl = NULL; + int ret = -1; + xlator_t *this = THIS; + GF_ASSERT(this); + + if (!graph || !volinfo || !set_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); + goto out; + } + + xl = volgen_graph_add(graph, "features/leases", volinfo->volname); + if (!xl) { + gf_msg("glusterd", GF_LOG_WARNING, 0, GD_MSG_GRAPH_FEATURE_ADD_FAIL, + "failed to add features/leases to graph"); + goto out; + } + + ret = 0; +out: + return ret; +} + +static int +brick_graph_add_server(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + dict_t *set_dict, glusterd_brickinfo_t *brickinfo) +{ + int ret = -1; + xlator_t *xl = NULL; + char transt[16] = { + 0, + }; + char *username = NULL; + char *password = NULL; + char key[1024] = {0}; + char *ssl_user = NULL; + char *volname = NULL; + char *address_family_data = NULL; + int32_t len = 0; + xlator_t *this = THIS; + GF_ASSERT(this); + + if (!graph || !volinfo || !set_dict || !brickinfo) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); + goto out; + } + + get_vol_transport_type(volinfo, transt); + + username = glusterd_auth_get_username(volinfo); + password = glusterd_auth_get_password(volinfo); + + xl = volgen_graph_add(graph, "protocol/server", volinfo->volname); + if (!xl) + goto out; + + ret = xlator_set_fixed_option(xl, "transport-type", transt); + if (ret) + goto out; + + /*In the case of running multiple glusterds on a single machine, + * we should ensure that bricks don't listen on all IPs on that + * machine and break the IP based separation being brought about.*/ + if (dict_get_sizen(THIS->options, "transport.socket.bind-address")) { + ret = xlator_set_fixed_option(xl, "transport.socket.bind-address", + brickinfo->hostname); + if (ret) + return -1; + } + + RPC_SET_OPT(xl, SSL_OWN_CERT_OPT, "ssl-own-cert", return -1); + RPC_SET_OPT(xl, SSL_PRIVATE_KEY_OPT, "ssl-private-key", return -1); + RPC_SET_OPT(xl, SSL_CA_LIST_OPT, "ssl-ca-list", return -1); + RPC_SET_OPT(xl, SSL_CRL_PATH_OPT, "ssl-crl-path", return -1); + RPC_SET_OPT(xl, SSL_CERT_DEPTH_OPT, "ssl-cert-depth", return -1); + RPC_SET_OPT(xl, SSL_CIPHER_LIST_OPT, "ssl-cipher-list", return -1); + RPC_SET_OPT(xl, SSL_DH_PARAM_OPT, "ssl-dh-param", return -1); + RPC_SET_OPT(xl, SSL_EC_CURVE_OPT, "ssl-ec-curve", return -1); + + if (dict_get_str_sizen(volinfo->dict, "transport.address-family", + &address_family_data) == 0) { + ret = xlator_set_fixed_option(xl, "transport.address-family", + address_family_data); + if (ret) { + gf_log("glusterd", GF_LOG_WARNING, + "failed to set transport.address-family"); + return -1; } - if (volinfo->sub_count && volinfo->sub_count < volinfo->brick_count && - volinfo->brick_count % volinfo->sub_count != 0) { - gf_log ("", GF_LOG_ERROR, - "volume inconsistency: " - "total number of bricks (%d) is not divisible with " - "number of bricks per cluster (%d) in a multi-cluster " - "setup", - volinfo->brick_count, volinfo->sub_count); - return -1; + } + + if (username) { + len = snprintf(key, sizeof(key), "auth.login.%s.allow", + brickinfo->path); + if ((len < 0) || (len >= sizeof(key))) { + return -1; } - ret = dict_get_str (set_dict, "client-transport-type", &tt); + ret = xlator_set_option(xl, key, len, username); if (ret) - get_vol_transport_type (volinfo, transt); - if (!ret) - strcpy (transt, tt); - - i = 0; - list_for_each_entry (brick, &volinfo->bricks, brick_list) { - xl = volgen_graph_add_nolink (graph, "protocol/client", - "%s-client-%d", volname, i); - if (!xl) - return -1; - ret = xlator_set_option (xl, "remote-host", brick->hostname); - if (ret) - return -1; - ret = xlator_set_option (xl, "remote-subvolume", brick->path); - if (ret) - return -1; - ret = xlator_set_option (xl, "transport-type", transt); - if (ret) - return -1; + return -1; + } - i++; + if (password) { + len = snprintf(key, sizeof(key), "auth.login.%s.password", username); + if ((len < 0) || (len >= sizeof(key))) { + return -1; } - if (i != volinfo->brick_count) { - gf_log ("", GF_LOG_ERROR, - "volume inconsistency: actual number of bricks (%d) " - "differs from brick count (%d)", i, - volinfo->brick_count); + ret = xlator_set_option(xl, key, len, password); + if (ret) + return -1; + } - return -1; + ret = xlator_set_fixed_option(xl, "auth-path", brickinfo->path); + if (ret) + return -1; + + volname = volinfo->is_snap_volume ? volinfo->parent_volname + : volinfo->volname; + + if (volname && !strcmp(volname, GLUSTER_SHARED_STORAGE)) { + ret = xlator_set_fixed_option(xl, "strict-auth-accept", "true"); + if (ret) + return -1; + } + + if (dict_get_str_sizen(volinfo->dict, "auth.ssl-allow", &ssl_user) == 0) { + len = snprintf(key, sizeof(key), "auth.login.%s.ssl-allow", + brickinfo->path); + if ((len < 0) || (len >= sizeof(key))) { + return -1; } - if (volinfo->sub_count > 1) { - switch (volinfo->type) { - case GF_CLUSTER_TYPE_REPLICATE: - cluster_args = replicate_args; - break; - case GF_CLUSTER_TYPE_STRIPE: - cluster_args = stripe_args; - break; - default: - gf_log ("", GF_LOG_ERROR, "volume inconsistency: " - "unrecognized clustering type"); - return -1; - } + ret = xlator_set_option(xl, key, len, ssl_user); + if (ret) + return -1; + } - i = 0; - j = 0; - txl = first_of (graph); - for (trav = txl; trav->next; trav = trav->next); - for (;; trav = trav->prev) { - if (i % volinfo->sub_count == 0) { - xl = volgen_graph_add_nolink (graph, - cluster_args[0], - cluster_args[1], - volname, j); - if (!xl) - return -1; - j++; - } - - ret = volgen_xlator_link (xl, trav); - if (ret) - return -1; - - if (trav == txl) - break; - i++; - } +out: + return ret; +} + +static int +brick_graph_add_pump(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + dict_t *set_dict, glusterd_brickinfo_t *brickinfo) +{ + int ret = -1; + int pump = 0; + xlator_t *xl = NULL; + xlator_t *txl = NULL; + xlator_t *rbxl = NULL; + char *username = NULL; + char *password = NULL; + char *ptranst = NULL; + char *address_family_data = NULL; + xlator_t *this = THIS; + GF_ASSERT(this); + + if (!graph || !volinfo || !set_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); + goto out; + } + + ret = dict_get_int32(volinfo->dict, "enable-pump", &pump); + if (ret == -ENOENT) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=enable-pump", NULL); + ret = pump = 0; + } + if (ret) + return -1; + + username = glusterd_auth_get_username(volinfo); + password = glusterd_auth_get_password(volinfo); + + if (pump) { + txl = first_of(graph); + + rbxl = volgen_graph_add_nolink(graph, "protocol/client", + "%s-replace-brick", volinfo->volname); + if (!rbxl) + return -1; + + ptranst = glusterd_get_trans_type_rb(volinfo->transport_type); + if (NULL == ptranst) + return -1; + + RPC_SET_OPT(rbxl, SSL_OWN_CERT_OPT, "ssl-own-cert", return -1); + RPC_SET_OPT(rbxl, SSL_PRIVATE_KEY_OPT, "ssl-private-key", return -1); + RPC_SET_OPT(rbxl, SSL_CA_LIST_OPT, "ssl-ca-list", return -1); + RPC_SET_OPT(rbxl, SSL_CRL_PATH_OPT, "ssl-crl-path", return -1); + RPC_SET_OPT(rbxl, SSL_CERT_DEPTH_OPT, "ssl-cert-depth", return -1); + RPC_SET_OPT(rbxl, SSL_CIPHER_LIST_OPT, "ssl-cipher-list", return -1); + RPC_SET_OPT(rbxl, SSL_DH_PARAM_OPT, "ssl-dh-param", return -1); + RPC_SET_OPT(rbxl, SSL_EC_CURVE_OPT, "ssl-ec-curve", return -1); + + if (username) { + ret = xlator_set_fixed_option(rbxl, "username", username); + if (ret) + return -1; } - if (volinfo->sub_count) - dist_count = volinfo->brick_count / volinfo->sub_count; - else - dist_count = volinfo->brick_count; - if (dist_count > 1) { - xl = volgen_graph_add_nolink (graph, "cluster/distribute", - "%s-dht", volname); - if (!xl) - return -1; - - trav = xl; - for (i = 0; i < dist_count; i++) - trav = trav->next; - for (; trav != xl; trav = trav->prev) { - ret = volgen_xlator_link (xl, trav); - if (ret) - return -1; - } + if (password) { + ret = xlator_set_fixed_option(rbxl, "password", password); + if (ret) + return -1; } - ret = glusterd_volinfo_get (volinfo, "features.quota", "a_val); + ret = xlator_set_fixed_option(rbxl, "transport-type", ptranst); + GF_FREE(ptranst); if (ret) + return -1; + + if (dict_get_str_sizen(volinfo->dict, "transport.address-family", + &address_family_data) == 0) { + ret = xlator_set_fixed_option(rbxl, "transport.address-family", + address_family_data); + if (ret) { + gf_log("glusterd", GF_LOG_WARNING, + "failed to set transport.address-family"); return -1; + } + } - if (quota_val) - ret = gf_string2boolean (quota_val, "a); - if (ret) { - gf_log ("", GF_LOG_ERROR, "value for quota option is not valid"); + xl = volgen_graph_add_nolink(graph, "cluster/pump", "%s-pump", + volinfo->volname); + if (!xl) + return -1; + ret = volgen_xlator_link(xl, txl); + if (ret) + return -1; + ret = volgen_xlator_link(xl, rbxl); + if (ret) + return -1; + } - return -1; +out: + return ret; +} + +/* The order of xlator definition here determines + * the topology of the brick graph */ +static volgen_brick_xlator_t server_graph_table[] = { + {brick_graph_add_server, NULL}, + {brick_graph_add_io_stats, "NULL"}, + {brick_graph_add_sdfs, "sdfs"}, + {brick_graph_add_namespace, "namespace"}, + {brick_graph_add_cdc, NULL}, + {brick_graph_add_quota, "quota"}, + {brick_graph_add_index, "index"}, + {brick_graph_add_barrier, NULL}, + {brick_graph_add_marker, "marker"}, + {brick_graph_add_selinux, "selinux"}, + {brick_graph_add_iot, "io-threads"}, + {brick_graph_add_upcall, "upcall"}, + {brick_graph_add_leases, "leases"}, + {brick_graph_add_pump, NULL}, + {brick_graph_add_ro, NULL}, + {brick_graph_add_worm, NULL}, + {brick_graph_add_locks, "locks"}, + {brick_graph_add_acl, "acl"}, + {brick_graph_add_bitrot_stub, "bitrot-stub"}, + {brick_graph_add_changelog, "changelog"}, + {brick_graph_add_trash, "trash"}, + {brick_graph_add_arbiter, "arbiter"}, + {brick_graph_add_posix, "posix"}, +}; + +static glusterd_server_xlator_t +get_server_xlator(char *xlator) +{ + int i = 0; + int size = sizeof(server_graph_table) / sizeof(server_graph_table[0]); + + for (i = 0; i < size; i++) { + if (!server_graph_table[i].dbg_key) + continue; + if (strcmp(xlator, server_graph_table[i].dbg_key)) + return GF_XLATOR_SERVER; + } + + return GF_XLATOR_NONE; +} + +static glusterd_client_xlator_t +get_client_xlator(char *xlator) +{ + glusterd_client_xlator_t subvol = GF_CLNT_XLATOR_NONE; + + if (strcmp(xlator, "client") == 0) + subvol = GF_CLNT_XLATOR_FUSE; + + return subvol; +} + +static int +debugxl_option_handler(volgen_graph_t *graph, struct volopt_map_entry *vme, + void *param) +{ + char *volname = NULL; + gf_boolean_t enabled = _gf_false; + + volname = param; + + if (strcmp(vme->option, "!debug") != 0) + return 0; + + if (!strcmp(vme->key, "debug.trace") || + !strcmp(vme->key, "debug.error-gen") || + !strcmp(vme->key, "debug.delay-gen")) { + if (get_server_xlator(vme->value) == GF_XLATOR_NONE && + get_client_xlator(vme->value) == GF_CLNT_XLATOR_NONE) + return 0; + } + + if (gf_string2boolean(vme->value, &enabled) == -1) + goto add_graph; + if (!enabled) + return 0; + +add_graph: + if (strcmp(vme->value, "off") == 0) + return 0; + if (volgen_graph_add(graph, vme->voltype, volname)) + return 0; + else + return -1; +} + +int +check_and_add_debug_xl(volgen_graph_t *graph, dict_t *set_dict, char *volname, + char *xlname) +{ + int i = 0; + int ret = 0; + char *value_str = NULL; + static char *xls[] = {"debug.trace", "debug.error-gen", "debug.delay-gen", + NULL}; + + if (!xlname) + goto out; + + while (xls[i]) { + ret = dict_get_str(set_dict, xls[i], &value_str); + if (!ret) { + if (strcmp(xlname, value_str) == 0) { + ret = volgen_graph_set_options_generic(graph, set_dict, volname, + &debugxl_option_handler); + if (ret) + goto out; + } } + i++; + } + ret = 0; + +out: + return ret; +} + +static int +server_graph_builder(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + dict_t *set_dict, void *param) +{ + int ret = 0; + char *xlator = NULL; + char *loglevel = NULL; + int i = 0; - if (quota) { - xl = volgen_graph_add (graph, "features/quota", volname); + i = sizeof(server_graph_table) / sizeof(server_graph_table[0]) - 1; - if (!xl) - return -1; + while (i >= 0) { + ret = server_graph_table[i].builder(graph, volinfo, set_dict, param); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_BUILD_GRAPH_FAILED, + "Builing graph " + "failed for server graph table entry: %d", + i); + goto out; } - ret = volgen_graph_set_options_generic (graph, set_dict, volname, - &perfxl_option_handler); + ret = check_and_add_debug_xl(graph, set_dict, volinfo->volname, + server_graph_table[i].dbg_key); if (ret) - return -1; + goto out; - xl = volgen_graph_add_as (graph, "debug/io-stats", volname); - if (!xl) - return -1; + i--; + } - ret = volgen_graph_set_options_generic (graph, set_dict, "client", - &loglevel_option_handler); + ret = dict_get_str_sizen(set_dict, "xlator", &xlator); - return ret; + /* got a cli log level request */ + if (!ret) { + ret = dict_get_str_sizen(set_dict, "loglevel", &loglevel); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "could not get both" + " translator name and loglevel for log level request"); + goto out; + } + } + + ret = volgen_graph_set_options_generic( + graph, set_dict, (xlator && loglevel) ? (void *)set_dict : volinfo, + (xlator && loglevel) ? &server_spec_extended_option_handler + : &server_spec_option_handler); + +out: + return ret; } +/* builds a graph for server role , with option overrides in mod_dict */ +static int +build_server_graph(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + dict_t *mod_dict, glusterd_brickinfo_t *brickinfo) +{ + return build_graph_generic(graph, volinfo, mod_dict, brickinfo, + &server_graph_builder); +} -/* builds a graph for client role , with option overrides in mod_dict */ static int -build_client_graph (glusterfs_graph_t *graph, glusterd_volinfo_t *volinfo, - dict_t *mod_dict) +perfxl_option_handler(volgen_graph_t *graph, struct volopt_map_entry *vme, + void *param) { - return build_graph_generic (graph, volinfo, mod_dict, NULL, - &client_graph_builder); + gf_boolean_t enabled = _gf_false; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + + GF_VALIDATE_OR_GOTO("glusterd", param, out); + volinfo = param; + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + priv = this->private; + GF_VALIDATE_OR_GOTO("glusterd", priv, out); + + if (strcmp(vme->option, "!perf") != 0) + return 0; + + if (gf_string2boolean(vme->value, &enabled) == -1) + return -1; + if (!enabled) + return 0; + + /* Check op-version before adding the 'open-behind' xlator in the graph + */ + if (!strcmp(vme->key, "performance.open-behind") && + (vme->op_version > volinfo->client_op_version)) + return 0; + + if (priv->op_version < GD_OP_VERSION_3_12_2) { + /* For replicate volumes do not load io-threads as it affects + * performance + */ + if (!strcmp(vme->key, "performance.client-io-threads") && + (GF_CLUSTER_TYPE_REPLICATE == volinfo->type)) + return 0; + } + + /* if VKEY_READDIR_AHEAD is enabled and parallel readdir is + * not enabled then load readdir-ahead here else it will be + * loaded as a child of dht */ + if (!strcmp(vme->key, VKEY_READDIR_AHEAD) && + glusterd_volinfo_get_boolean(volinfo, VKEY_PARALLEL_READDIR)) + return 0; + + if (volgen_graph_add(graph, vme->voltype, volinfo->volname)) + return 0; +out: + return -1; } static int -nfs_option_handler (glusterfs_graph_t *graph, - struct volopt_map_entry *vme, void *param) +gfproxy_server_perfxl_option_handler(volgen_graph_t *graph, + struct volopt_map_entry *vme, void *param) { - xlator_t *xl = NULL; - char *aa = NULL; - int ret = 0; - glusterd_volinfo_t *volinfo = NULL; + GF_ASSERT(param); + + /* write-behind is the *not* allowed for gfproxy-servers */ + if (strstr(vme->key, "write-behind")) { + return 0; + } - volinfo = param; + perfxl_option_handler(graph, vme, param); - xl = first_of (graph); + return 0; +} -/* if (vme->type == GLOBAL_DOC || vme->type == GLOBAL_NO_DOC) { +static int +gfproxy_client_perfxl_option_handler(volgen_graph_t *graph, + struct volopt_map_entry *vme, void *param) +{ + GF_ASSERT(param); - ret = xlator_set_option (xl, vme->key, vme->value); - }*/ - if ( !volinfo || !volinfo->volname) - return 0; + /* write-behind is the only allowed "perf" for gfproxy-clients */ + if (!strstr(vme->key, "write-behind")) + return 0; - if (! strcmp (vme->option, "!nfs.rpc-auth-addr-allow")) { - ret = gf_asprintf (&aa, "rpc-auth.addr.%s.allow", - volinfo->volname); + perfxl_option_handler(graph, vme, param); - if (ret != -1) { - ret = xlator_set_option (xl, aa, vme->value); - GF_FREE (aa); - } + return 0; +} - if (ret) - return -1; +#ifdef BUILD_GNFS +static int +nfsperfxl_option_handler(volgen_graph_t *graph, struct volopt_map_entry *vme, + void *param) +{ + char *volname = NULL; + gf_boolean_t enabled = _gf_false; + + volname = param; + + if (strcmp(vme->option, "!nfsperf") != 0) + return 0; + + if (gf_string2boolean(vme->value, &enabled) == -1) + return -1; + if (!enabled) + return 0; + + if (volgen_graph_add(graph, vme->voltype, volname)) + return 0; + else + return -1; +} +#endif + +#if (HAVE_LIB_XML) +int +end_sethelp_xml_doc(xmlTextWriterPtr writer) +{ + int ret = -1; + + ret = xmlTextWriterEndElement(writer); + if (ret < 0) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_XML_TEXT_WRITE_FAIL, + "Could not end an " + "xmlElement"); + ret = -1; + goto out; + } + ret = xmlTextWriterEndDocument(writer); + if (ret < 0) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_XML_TEXT_WRITE_FAIL, + "Could not end an " + "xmlDocument"); + ret = -1; + goto out; + } + ret = 0; +out: + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; +} + +int +init_sethelp_xml_doc(xmlTextWriterPtr *writer, xmlBufferPtr *buf) +{ + int ret = -1; + + if (!writer || !buf) + goto out; + + *buf = xmlBufferCreateSize(8192); + if (buf == NULL) { + gf_msg("glusterd", GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "Error creating the xml " + "buffer"); + ret = -1; + goto out; + } + + xmlBufferSetAllocationScheme(*buf, XML_BUFFER_ALLOC_DOUBLEIT); + + *writer = xmlNewTextWriterMemory(*buf, 0); + if (writer == NULL) { + gf_msg("glusterd", GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + " Error creating the xml " + "writer"); + ret = -1; + goto out; + } + + ret = xmlTextWriterStartDocument(*writer, "1.0", "UTF-8", "yes"); + if (ret < 0) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_XML_DOC_START_FAIL, + "Error While starting the " + "xmlDoc"); + goto out; + } + + ret = xmlTextWriterStartElement(*writer, (xmlChar *)"options"); + if (ret < 0) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_XML_ELE_CREATE_FAIL, + "Could not create an " + "xmlElement"); + ret = -1; + goto out; + } + + ret = 0; + +out: + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; +} + +int +xml_add_volset_element(xmlTextWriterPtr writer, const char *name, + const char *def_val, const char *dscrpt) +{ + int ret = -1; + + GF_ASSERT(name); + + ret = xmlTextWriterStartElement(writer, (xmlChar *)"option"); + if (ret < 0) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_XML_ELE_CREATE_FAIL, + "Could not create an " + "xmlElemetnt"); + ret = -1; + goto out; + } + + ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"defaultValue", + "%s", def_val); + if (ret < 0) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_XML_ELE_CREATE_FAIL, + "Could not create an " + "xmlElemetnt"); + ret = -1; + goto out; + } + + ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"description", + "%s", dscrpt); + if (ret < 0) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_XML_ELE_CREATE_FAIL, + "Could not create an " + "xmlElemetnt"); + ret = -1; + goto out; + } + + ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"name", "%s", + name); + if (ret < 0) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_XML_ELE_CREATE_FAIL, + "Could not create an " + "xmlElemetnt"); + ret = -1; + goto out; + } + + ret = xmlTextWriterEndElement(writer); + if (ret < 0) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_XML_ELE_CREATE_FAIL, + "Could not end an " + "xmlElemetnt"); + ret = -1; + goto out; + } + + ret = 0; +out: + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; +} + +#endif + +int +_get_xlator_opt_key_from_vme(struct volopt_map_entry *vme, char **key) +{ + int ret = 0; + + GF_ASSERT(vme); + GF_ASSERT(key); + + if (!strcmp(vme->key, AUTH_ALLOW_MAP_KEY)) + *key = gf_strdup(AUTH_ALLOW_OPT_KEY); + else if (!strcmp(vme->key, AUTH_REJECT_MAP_KEY)) + *key = gf_strdup(AUTH_REJECT_OPT_KEY); +#ifdef BUILD_GNFS + else if (!strcmp(vme->key, NFS_DISABLE_MAP_KEY)) + *key = gf_strdup(NFS_DISABLE_OPT_KEY); +#endif + else { + if (vme->option) { + if (vme->option[0] == '!') { + *key = vme->option + 1; + if (!*key[0]) + ret = -1; + } else { + *key = vme->option; + } + } else { + *key = strchr(vme->key, '.'); + if (*key) { + (*key)++; + if (!*key[0]) + ret = -1; + } else { + ret = -1; + } } + } + if (ret) + gf_msg("glusterd", GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, + "Wrong entry found in " + "glusterd_volopt_map entry %s", + vme->key); + else + gf_msg_debug("glusterd", 0, "Returning %d", ret); + + return ret; +} - if (! strcmp (vme->option, "!nfs.rpc-auth-addr-reject")) { - ret = gf_asprintf (&aa, "rpc-auth.addr.%s.reject", - volinfo->volname); +void +_free_xlator_opt_key(char *key) +{ + GF_ASSERT(key); - if (ret != -1) { - ret = xlator_set_option (xl, aa, vme->value); - GF_FREE (aa); - } + if (!strcmp(key, AUTH_ALLOW_OPT_KEY) || !strcmp(key, AUTH_REJECT_OPT_KEY) || + !strcmp(key, NFS_DISABLE_OPT_KEY)) + GF_FREE(key); - if (ret) - return -1; + return; +} + +static xlator_t * +volgen_graph_build_client(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + char *hostname, char *port, char *subvol, char *xl_id, + char *transt, dict_t *set_dict) +{ + xlator_t *xl = NULL; + int ret = -2; + uint32_t client_type = GF_CLIENT_OTHER; + char *str = NULL; + char *ssl_str = NULL; + gf_boolean_t ssl_bool = _gf_false; + char *address_family_data = NULL; + + GF_ASSERT(graph); + GF_ASSERT(subvol); + GF_ASSERT(xl_id); + GF_ASSERT(transt); + + xl = volgen_graph_add_nolink(graph, "protocol/client", "%s", xl_id); + if (!xl) + goto err; + + ret = xlator_set_fixed_option(xl, "ping-timeout", "42"); + if (ret) + goto err; + + if (hostname) { + ret = xlator_set_fixed_option(xl, "remote-host", hostname); + if (ret) + goto err; + } + + if (port) { + ret = xlator_set_fixed_option(xl, "remote-port", port); + if (ret) + goto err; + } + + ret = xlator_set_fixed_option(xl, "remote-subvolume", subvol); + if (ret) + goto err; + + ret = xlator_set_fixed_option(xl, "transport-type", transt); + if (ret) + goto err; + + if (dict_get_str_sizen(volinfo->dict, "transport.address-family", + &address_family_data) == 0) { + ret = xlator_set_fixed_option(xl, "transport.address-family", + address_family_data); + if (ret) { + gf_log("glusterd", GF_LOG_WARNING, + "failed to set transport.address-family"); + goto err; + } + } + + ret = dict_get_uint32(set_dict, "trusted-client", &client_type); + + if (!ret && (client_type == GF_CLIENT_TRUSTED || + client_type == GF_CLIENT_TRUSTED_PROXY)) { + str = NULL; + str = glusterd_auth_get_username(volinfo); + if (str) { + ret = xlator_set_fixed_option(xl, "username", str); + if (ret) + goto err; } - if (! strcmp (vme->option, "!nfs.rpc-auth-auth-unix")) { - ret = gf_asprintf (&aa, "rpc-auth.auth.unix.%s", - volinfo->volname); + str = glusterd_auth_get_password(volinfo); + if (str) { + ret = xlator_set_fixed_option(xl, "password", str); + if (ret) + goto err; + } + } - if (ret != -1) { - ret = xlator_set_option (xl, aa, vme->value); - GF_FREE (aa); + if (dict_get_str_sizen(set_dict, "client.ssl", &ssl_str) == 0) { + if (gf_string2boolean(ssl_str, &ssl_bool) == 0) { + if (ssl_bool) { + ret = xlator_set_fixed_option( + xl, "transport.socket.ssl-enabled", "true"); + if (ret) { + goto err; } + } + } + } + + RPC_SET_OPT(xl, SSL_OWN_CERT_OPT, "ssl-own-cert", goto err); + RPC_SET_OPT(xl, SSL_PRIVATE_KEY_OPT, "ssl-private-key", goto err); + RPC_SET_OPT(xl, SSL_CA_LIST_OPT, "ssl-ca-list", goto err); + RPC_SET_OPT(xl, SSL_CRL_PATH_OPT, "ssl-crl-path", goto err); + RPC_SET_OPT(xl, SSL_CERT_DEPTH_OPT, "ssl-cert-depth", goto err); + RPC_SET_OPT(xl, SSL_CIPHER_LIST_OPT, "ssl-cipher-list", goto err); + RPC_SET_OPT(xl, SSL_DH_PARAM_OPT, "ssl-dh-param", goto err); + RPC_SET_OPT(xl, SSL_EC_CURVE_OPT, "ssl-ec-curve", goto err); + + return xl; +err: + return NULL; +} - if (ret) - return -1; +static int +volgen_graph_build_clients(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + dict_t *set_dict, void *param) +{ + int i = 0; + int ret = -1; + char transt[16] = { + 0, + }; + glusterd_brickinfo_t *brick = NULL; + glusterd_brickinfo_t *ta_brick = NULL; + xlator_t *xl = NULL; + int subvol_index = 0; + int thin_arbiter_index = 0; + + if (volinfo->brick_count == 0) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLUME_INCONSISTENCY, + "volume inconsistency: brick count is 0"); + goto out; + } + + if ((volinfo->dist_leaf_count < volinfo->brick_count) && + ((volinfo->brick_count % volinfo->dist_leaf_count) != 0)) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLUME_INCONSISTENCY, + "volume inconsistency: " + "total number of bricks (%d) is not divisible with " + "number of bricks per cluster (%d) in a multi-cluster " + "setup", + volinfo->brick_count, volinfo->dist_leaf_count); + goto out; + } + + get_transport_type(volinfo, set_dict, transt, _gf_false); + + if (!strcmp(transt, "tcp,rdma")) + strcpy(transt, "tcp"); + + i = 0; + cds_list_for_each_entry(brick, &volinfo->bricks, brick_list) + { + /* insert ta client xlator entry. + * eg - If subvol count is > 1, then after every two client xlator + * entries there should be a ta client xlator entry in the volfile. ta + * client xlator indexes are - 2, 5, 8 etc depending on the index of + * subvol. + */ + if (volinfo->thin_arbiter_count && + (i + 1) % (volinfo->replica_count + 1) == 0) { + thin_arbiter_index = 0; + cds_list_for_each_entry(ta_brick, &volinfo->ta_bricks, brick_list) + { + if (thin_arbiter_index == subvol_index) { + xl = volgen_graph_build_client( + graph, volinfo, ta_brick->hostname, NULL, + ta_brick->path, ta_brick->brick_id, transt, set_dict); + if (!xl) { + ret = -1; + goto out; + } + } + thin_arbiter_index++; + } + subvol_index++; + } + xl = volgen_graph_build_client(graph, volinfo, brick->hostname, NULL, + brick->path, brick->brick_id, transt, + set_dict); + if (!xl) { + ret = -1; + goto out; } - if (! strcmp (vme->option, "!nfs.rpc-auth-auth-null")) { - ret = gf_asprintf (&aa, "rpc-auth.auth.null.%s", - volinfo->volname); - if (ret != -1) { - ret = xlator_set_option (xl, aa, vme->value); - GF_FREE (aa); + i++; + } + + /* Add ta client xlator entry for last subvol + * Above loop will miss out on making the ta client + * xlator entry for the last subvolume in the volfile + */ + if (volinfo->thin_arbiter_count) { + thin_arbiter_index = 0; + cds_list_for_each_entry(ta_brick, &volinfo->ta_bricks, brick_list) + { + if (thin_arbiter_index == subvol_index) { + xl = volgen_graph_build_client( + graph, volinfo, ta_brick->hostname, NULL, ta_brick->path, + ta_brick->brick_id, transt, set_dict); + if (!xl) { + ret = -1; + goto out; } + } - if (ret) - return -1; + thin_arbiter_index++; } + } + + if (i != volinfo->brick_count) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLUME_INCONSISTENCY, + "volume inconsistency: actual number of bricks (%d) " + "differs from brick count (%d)", + i, volinfo->brick_count); + + ret = -1; + goto out; + } + ret = 0; +out: + return ret; +} - if (! strcmp (vme->option, "!nfs-trusted-sync")) { - ret = gf_asprintf (&aa, "nfs3.%s.trusted-sync", - volinfo->volname); +static int +volgen_link_bricks(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + char *xl_type, char *xl_namefmt, size_t child_count, + size_t sub_count, size_t start_count, xlator_t *trav) +{ + int i = 0; + int j = start_count; + xlator_t *xl = NULL; + char *volname = NULL; + int ret = -1; + + if (child_count == 0) + goto out; + volname = volinfo->volname; + + for (;; trav = trav->prev) { + if ((i % sub_count) == 0) { + xl = volgen_graph_add_nolink(graph, xl_type, xl_namefmt, volname, + j); + j++; + } - if (ret != -1) { - ret = xlator_set_option (xl, aa, vme->value); - GF_FREE (aa); - } + if (!xl) { + ret = -1; + goto out; + } - if (ret) - return -1; + if (strncmp(xl_type, "performance/readdir-ahead", + SLEN("performance/readdir-ahead")) == 0) { + ret = xlator_set_fixed_option(xl, "performance.readdir-ahead", + "on"); + if (ret) + goto out; } - if (! strcmp (vme->option, "!nfs-trusted-write")) { - ret = gf_asprintf (&aa, "nfs3.%s.trusted-write", - volinfo->volname); + ret = volgen_xlator_link(xl, trav); + if (ret) + goto out; - if (ret != -1) { - ret = xlator_set_option (xl, aa, vme->value); - GF_FREE (aa); - } + i++; + if (i == child_count) + break; + } - if (ret) - return -1; - } + ret = j - start_count; +out: + return ret; +} - if (! strcmp (vme->option, "!nfs-volume-access")) { - ret = gf_asprintf (&aa, "nfs3.%s.volume-access", - volinfo->volname); +static int +volgen_link_bricks_from_list_tail_start(volgen_graph_t *graph, + glusterd_volinfo_t *volinfo, + char *xl_type, char *xl_namefmt, + size_t child_count, size_t sub_count, + size_t start_count) +{ + xlator_t *trav = NULL; + size_t cnt = child_count; - if (ret != -1) { - ret = xlator_set_option (xl, aa, vme->value); - GF_FREE (aa); - } + if (!cnt) + return -1; - if (ret) - return -1; - } + for (trav = first_of(graph); --cnt; trav = trav->next) + ; - if (! strcmp (vme->option, "!nfs-export-dir")) { - ret = gf_asprintf (&aa, "nfs3.%s.export-dir", - volinfo->volname); + return volgen_link_bricks(graph, volinfo, xl_type, xl_namefmt, child_count, + sub_count, start_count, trav); +} - if (ret != -1) { - ret = xlator_set_option (xl, aa, vme->value); - GF_FREE (aa); - } +static int +volgen_link_bricks_from_list_tail(volgen_graph_t *graph, + glusterd_volinfo_t *volinfo, char *xl_type, + char *xl_namefmt, size_t child_count, + size_t sub_count) +{ + xlator_t *trav = NULL; + size_t cnt = child_count; - if (ret) - return -1; + if (!cnt) + return -1; + + for (trav = first_of(graph); --cnt; trav = trav->next) + ; + + return volgen_link_bricks(graph, volinfo, xl_type, xl_namefmt, child_count, + sub_count, 0, trav); +} + +/** + * This is the build graph function for user-serviceable snapshots. + * Generates snapview-client + */ +static int +volgen_graph_build_snapview_client(volgen_graph_t *graph, + glusterd_volinfo_t *volinfo, char *volname, + dict_t *set_dict) +{ + int ret = 0; + xlator_t *prev_top = NULL; + xlator_t *prot_clnt = NULL; + xlator_t *svc = NULL; + char transt[16] = { + 0, + }; + char *svc_args[] = {"features/snapview-client", "%s-snapview-client"}; + char subvol[1024] = { + 0, + }; + char xl_id[1024] = { + 0, + }; + + prev_top = (xlator_t *)(graph->graph.first); + + snprintf(subvol, sizeof(subvol), "snapd-%s", volinfo->volname); + snprintf(xl_id, sizeof(xl_id), "%s-snapd-client", volinfo->volname); + + get_transport_type(volinfo, set_dict, transt, _gf_false); + + prot_clnt = volgen_graph_build_client(graph, volinfo, NULL, NULL, subvol, + xl_id, transt, set_dict); + if (!prot_clnt) { + ret = -1; + goto out; + } + + svc = volgen_graph_add_nolink(graph, svc_args[0], svc_args[1], volname); + if (!svc) { + ret = -1; + goto out; + } + + /** + * Ordering the below two traslators (cur_top & prot_clnt) is important + * as snapview client implementation is built on the policy that + * normal volume path goes to FIRST_CHILD and snap world operations + * goes to SECOND_CHILD + **/ + ret = volgen_xlator_link(graph->graph.first, prev_top); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_XLATOR_LINK_FAIL, + "failed to link the " + "snapview-client to distribute"); + goto out; + } + + ret = volgen_xlator_link(graph->graph.first, prot_clnt); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_XLATOR_LINK_FAIL, + "failed to link the " + "snapview-client to snapview-server"); + goto out; + } + +out: + return ret; +} + +gf_boolean_t +_xl_is_client_decommissioned(xlator_t *xl, glusterd_volinfo_t *volinfo) +{ + int ret = 0; + gf_boolean_t decommissioned = _gf_false; + char *hostname = NULL; + char *path = NULL; + + GF_ASSERT(!strcmp(xl->type, "protocol/client")); + ret = xlator_get_fixed_option(xl, "remote-host", &hostname); + if (ret) { + GF_ASSERT(0); + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_REMOTE_HOST_GET_FAIL, + "Failed to get remote-host " + "from client %s", + xl->name); + goto out; + } + ret = xlator_get_fixed_option(xl, "remote-subvolume", &path); + if (ret) { + GF_ASSERT(0); + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_REMOTE_HOST_GET_FAIL, + "Failed to get remote-host " + "from client %s", + xl->name); + goto out; + } + + decommissioned = glusterd_is_brick_decommissioned(volinfo, hostname, path); +out: + return decommissioned; +} + +gf_boolean_t +_xl_has_decommissioned_clients(xlator_t *xl, glusterd_volinfo_t *volinfo) +{ + xlator_list_t *xl_child = NULL; + gf_boolean_t decommissioned = _gf_false; + xlator_t *cxl = NULL; + + if (!xl) + goto out; + + if (!strcmp(xl->type, "protocol/client")) { + decommissioned = _xl_is_client_decommissioned(xl, volinfo); + goto out; + } + + xl_child = xl->children; + while (xl_child) { + cxl = xl_child->xlator; + /* this can go into 2 depths if the volume type + is stripe-replicate */ + decommissioned = _xl_has_decommissioned_clients(cxl, volinfo); + if (decommissioned) + break; + + xl_child = xl_child->next; + } +out: + return decommissioned; +} + +static int +_graph_get_decommissioned_children(xlator_t *dht, glusterd_volinfo_t *volinfo, + char **children) +{ + int ret = -1; + xlator_list_t *xl_child = NULL; + xlator_t *cxl = NULL; + gf_boolean_t comma = _gf_false; + + *children = NULL; + xl_child = dht->children; + while (xl_child) { + cxl = xl_child->xlator; + if (_xl_has_decommissioned_clients(cxl, volinfo)) { + if (!*children) { + *children = GF_CALLOC(16 * GF_UNIT_KB, 1, gf_common_mt_char); + if (!*children) + goto out; + } + + if (comma) + strcat(*children, ","); + strcat(*children, cxl->name); + comma = _gf_true; } + xl_child = xl_child->next; + } + ret = 0; +out: + return ret; +} +static int +volgen_graph_build_readdir_ahead(volgen_graph_t *graph, + glusterd_volinfo_t *volinfo, + size_t child_count) +{ + int32_t clusters = 0; - if (! strcmp (vme->option, "!nfs.ports-insecure")) { - ret = gf_asprintf (&aa, "rpc-auth.ports.%s.insecure", - volinfo->volname); + if (graph->type == GF_QUOTAD || graph->type == GF_SNAPD || + !glusterd_volinfo_get_boolean(volinfo, VKEY_PARALLEL_READDIR)) + goto out; - if (ret != -1) { - ret = xlator_set_option (xl, aa, vme->value); - GF_FREE (aa); - } + clusters = volgen_link_bricks_from_list_tail( + graph, volinfo, "performance/readdir-ahead", "%s-readdir-ahead-%d", + child_count, 1); - if (ret) - return -1; +out: + return clusters; +} + +static int +volgen_graph_build_dht_cluster(volgen_graph_t *graph, + glusterd_volinfo_t *volinfo, size_t child_count, + gf_boolean_t is_quotad) +{ + int32_t clusters = 0; + int ret = -1; + char *decommissioned_children = NULL; + xlator_t *dht = NULL; + char *voltype = "cluster/distribute"; + char *name_fmt = NULL; + + /* NUFA and Switch section */ + if (dict_get_str_boolean(volinfo->dict, "cluster.nufa", 0) && + dict_get_str_boolean(volinfo->dict, "cluster.switch", 0)) { + gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "nufa and switch cannot be set together"); + ret = -1; + goto out; + } + + /* Check for NUFA volume option, and change the voltype */ + if (dict_get_str_boolean(volinfo->dict, "cluster.nufa", 0)) + voltype = "cluster/nufa"; + + /* Check for switch volume option, and change the voltype */ + if (dict_get_str_boolean(volinfo->dict, "cluster.switch", 0)) + voltype = "cluster/switch"; + + if (is_quotad) + name_fmt = "%s"; + else + name_fmt = "%s-dht"; + + clusters = volgen_link_bricks_from_list_tail( + graph, volinfo, voltype, name_fmt, child_count, child_count); + if (clusters < 0) + goto out; + + dht = first_of(graph); + ret = _graph_get_decommissioned_children(dht, volinfo, + &decommissioned_children); + if (ret) + goto out; + if (decommissioned_children) { + ret = xlator_set_fixed_option(dht, "decommissioned-bricks", + decommissioned_children); + if (ret) + goto out; + } + ret = 0; +out: + GF_FREE(decommissioned_children); + return ret; +} + +static int +volgen_graph_build_ec_clusters(volgen_graph_t *graph, + glusterd_volinfo_t *volinfo) +{ + int i = 0; + int ret = 0; + int clusters = 0; + char *disperse_args[] = {"cluster/disperse", "%s-disperse-%d"}; + xlator_t *ec = NULL; + char option[32] = {0}; + int start_count = 0; + + clusters = volgen_link_bricks_from_list_tail_start( + graph, volinfo, disperse_args[0], disperse_args[1], + volinfo->brick_count, volinfo->disperse_count, start_count); + if (clusters < 0) + goto out; + + sprintf(option, "%d", volinfo->redundancy_count); + ec = first_of(graph); + for (i = 0; i < clusters; i++) { + ret = xlator_set_fixed_option(ec, "redundancy", option); + if (ret) { + clusters = -1; + goto out; } + ec = ec->next; + } +out: + return clusters; +} - if (! strcmp (vme->option, "!nfs-disable")) { - ret = gf_asprintf (&aa, "nfs.%s.disable", - volinfo->volname); +static int +set_afr_pending_xattrs_option(volgen_graph_t *graph, + glusterd_volinfo_t *volinfo, int clusters) +{ + xlator_t *xlator = NULL; + xlator_t **afr_xlators_list = NULL; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + glusterd_brickinfo_t *brick = NULL; + glusterd_brickinfo_t *ta_brick = NULL; + char *ptr = NULL; + int i = 0; + int index = -1; + int ret = 0; + char *afr_xattrs_list = NULL; + int list_size = -1; + int ta_brick_index = 0; + int subvol_index = 0; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, out); + + if (conf->op_version < GD_OP_VERSION_3_9_0) + return ret; - if (ret != -1) { - ret = xlator_set_option (xl, aa, vme->value); - GF_FREE (aa); + /* (brick_id x rep.count) + (rep.count-1 commas) + NULL*/ + list_size = (1024 * volinfo->replica_count) + (volinfo->replica_count - 1) + + 1; + afr_xattrs_list = GF_CALLOC(1, list_size, gf_common_mt_char); + if (!afr_xattrs_list) + goto out; + + ptr = afr_xattrs_list; + afr_xlators_list = GF_CALLOC(clusters, sizeof(xlator_t *), + gf_common_mt_xlator_t); + if (!afr_xlators_list) + goto out; + + xlator = first_of(graph); + + for (i = 0, index = clusters - 1; i < clusters; i++) { + afr_xlators_list[index--] = xlator; + xlator = xlator->next; + } + + i = 1; + index = 0; + + cds_list_for_each_entry(brick, &volinfo->bricks, brick_list) + { + if (index == clusters) + break; + strncat(ptr, brick->brick_id, strlen(brick->brick_id)); + if (i == volinfo->replica_count) { + /* add ta client xlator in afr-pending-xattrs before making entries + * for client xlators in volfile. + * ta client xlator indexes are - 2, 5, 8 depending on the index of + * subvol. e.g- For first subvol ta client xlator id is volname-ta-2 + * For pending-xattr, ta name would be + * 'volname-ta-2.{{volume-uuid}}' from GD_OP_VERSION_7_3. + */ + ta_brick_index = 0; + if (volinfo->thin_arbiter_count == 1) { + ptr[strlen(brick->brick_id)] = ','; + cds_list_for_each_entry(ta_brick, &volinfo->ta_bricks, + brick_list) + { + if (ta_brick_index == subvol_index) { + break; + } + ta_brick_index++; + } + if (conf->op_version < GD_OP_VERSION_7_3) { + strncat(ptr, ta_brick->brick_id, + strlen(ta_brick->brick_id)); + } else { + char ta_volname[PATH_MAX] = ""; + int len = snprintf(ta_volname, PATH_MAX, "%s.%s", + ta_brick->brick_id, + uuid_utoa(volinfo->volume_id)); + strncat(ptr, ta_volname, len); } + } - if (ret) - return -1; + ret = xlator_set_fixed_option(afr_xlators_list[index++], + "afr-pending-xattr", afr_xattrs_list); + if (ret) + goto out; + memset(afr_xattrs_list, 0, list_size); + ptr = afr_xattrs_list; + i = 1; + subvol_index++; + continue; } + ptr[strlen(brick->brick_id)] = ','; + ptr += strlen(brick->brick_id) + 1; + i++; + } + +out: + GF_FREE(afr_xattrs_list); + GF_FREE(afr_xlators_list); + return ret; +} + +static int +set_volfile_id_option(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + int clusters) +{ + xlator_t *xlator = NULL; + int i = 0; + int ret = -1; + glusterd_conf_t *conf = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, out); + + if (conf->op_version < GD_OP_VERSION_9_0) + return 0; + xlator = first_of(graph); + for (i = 0; i < clusters; i++) { + ret = xlator_set_fixed_option(xlator, "volume-id", + uuid_utoa(volinfo->volume_id)); + if (ret) + goto out; + + xlator = xlator->next; + } - /*key = strchr (vme->key, '.') + 1; +out: + return ret; +} + +static int +volgen_graph_build_afr_clusters(volgen_graph_t *graph, + glusterd_volinfo_t *volinfo) +{ + int i = 0; + int ret = 0; + int clusters = 0; + char *replicate_type = "cluster/replicate"; + char *replicate_name = "%s-replicate-%d"; + xlator_t *afr = NULL; + char option[32] = {0}; + glusterd_brickinfo_t *ta_brick = NULL; + int ta_brick_index = 0; + int ta_replica_offset = 0; + int ta_brick_offset = 0; + char ta_option[4096] = { + 0, + }; + + /* In thin-arbiter case brick count and replica count remain same + * but due to additional entries of ta client xlators in the volfile, + * GD1 is manipulated to include these client xlators while linking them to + * afr/cluster entry in the volfile. + */ + if (volinfo->thin_arbiter_count == 1) { + ta_replica_offset = 1; + ta_brick_offset = volinfo->subvol_count; + } + + clusters = volgen_link_bricks_from_list_tail( + graph, volinfo, replicate_type, replicate_name, + volinfo->brick_count + ta_brick_offset, + volinfo->replica_count + ta_replica_offset); + + if (clusters < 0) + goto out; + + ret = set_afr_pending_xattrs_option(graph, volinfo, clusters); + if (ret) { + clusters = -1; + goto out; + } + + ret = set_volfile_id_option(graph, volinfo, clusters); + if (ret) { + clusters = -1; + goto out; + } + + if (!volinfo->arbiter_count && !volinfo->thin_arbiter_count) + goto out; + + afr = first_of(graph); + + if (volinfo->arbiter_count) { + sprintf(option, "%d", volinfo->arbiter_count); + for (i = 0; i < clusters; i++) { + ret = xlator_set_fixed_option(afr, "arbiter-count", option); + if (ret) { + clusters = -1; + goto out; + } - for (trav = xl->children; trav; trav = trav->next) { - ret = gf_asprintf (&aa, "auth.addr.%s.%s", trav->xlator->name, - key); - if (ret != -1) { - ret = xlator_set_option (xl, aa, vme->value); - GF_FREE (aa); + afr = afr->next; + } + } + + if (volinfo->thin_arbiter_count == 1) { + for (i = 0; i < clusters; i++) { + ta_brick_index = 0; + cds_list_for_each_entry(ta_brick, &volinfo->ta_bricks, brick_list) + { + if (ta_brick_index == i) { + break; } - if (ret) - return -1; - }*/ + ta_brick_index++; + } + snprintf(ta_option, sizeof(ta_option), "%s:%s", ta_brick->hostname, + ta_brick->path); + ret = xlator_set_fixed_option(afr, "thin-arbiter", ta_option); + if (ret) { + clusters = -1; + goto out; + } + afr = afr->next; + } + } +out: + return clusters; +} - return 0; +static int +volume_volgen_graph_build_clusters(volgen_graph_t *graph, + glusterd_volinfo_t *volinfo, + gf_boolean_t is_quotad) +{ + int clusters = 0; + int dist_count = 0; + int ret = -1; + + if (!volinfo->dist_leaf_count) + goto out; + + if (volinfo->dist_leaf_count == 1) + goto build_distribute; + + /* All other cases, it will have one or the other cluster type */ + switch (volinfo->type) { + case GF_CLUSTER_TYPE_REPLICATE: + clusters = volgen_graph_build_afr_clusters(graph, volinfo); + if (clusters < 0) + goto out; + break; + case GF_CLUSTER_TYPE_DISPERSE: + clusters = volgen_graph_build_ec_clusters(graph, volinfo); + if (clusters < 0) + goto out; + + break; + default: + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLUME_INCONSISTENCY, + "volume inconsistency: " + "unrecognized clustering type"); + goto out; + } + +build_distribute: + dist_count = volinfo->brick_count / volinfo->dist_leaf_count; + if (!dist_count) { + ret = -1; + goto out; + } + clusters = volgen_graph_build_readdir_ahead(graph, volinfo, dist_count); + if (clusters < 0) + goto out; + + ret = volgen_graph_build_dht_cluster(graph, volinfo, dist_count, is_quotad); + if (ret) + goto out; + + ret = 0; +out: + return ret; } static int -nfs_spec_option_handler (glusterfs_graph_t *graph, - struct volopt_map_entry *vme, void *param) +client_graph_set_rda_options(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + dict_t *set_dict) { - int ret = 0; + char *rda_cache_s = NULL; + int32_t ret = 0; + uint64_t rda_cache_size = 0; + char *rda_req_s = NULL; + uint64_t rda_req_size = 0; + uint64_t new_cache_size = 0; + char new_cache_size_str[50] = { + 0, + }; + char new_req_size_str[50] = { + 0, + }; + int dist_count = 0; + + dist_count = volinfo->brick_count / volinfo->dist_leaf_count; + if (dist_count <= 1) + goto out; + + if (graph->type == GF_QUOTAD || graph->type == GF_SNAPD || + !glusterd_volinfo_get_boolean(volinfo, VKEY_PARALLEL_READDIR) || + !glusterd_volinfo_get_boolean(volinfo, VKEY_READDIR_AHEAD)) + goto out; + + /* glusterd_volinfo_get() will get the default value if nothing set + * explicitly. Hence it is important to check set_dict before checking + * glusterd_volinfo_get, so that we consider key value of the in + * progress volume set option. + */ + ret = dict_get_str_sizen(set_dict, VKEY_RDA_CACHE_LIMIT, &rda_cache_s); + if (ret < 0) { + ret = glusterd_volinfo_get(volinfo, VKEY_RDA_CACHE_LIMIT, &rda_cache_s); + if (ret < 0) + goto out; + } + ret = gf_string2bytesize_uint64(rda_cache_s, &rda_cache_size); + if (ret < 0) { + set_graph_errstr( + graph, "invalid number format in option " VKEY_RDA_CACHE_LIMIT); + goto out; + } + + ret = dict_get_str_sizen(set_dict, VKEY_RDA_REQUEST_SIZE, &rda_req_s); + if (ret < 0) { + ret = glusterd_volinfo_get(volinfo, VKEY_RDA_REQUEST_SIZE, &rda_req_s); + if (ret < 0) + goto out; + } + ret = gf_string2bytesize_uint64(rda_req_s, &rda_req_size); + if (ret < 0) { + set_graph_errstr( + graph, "invalid number format in option " VKEY_RDA_REQUEST_SIZE); + goto out; + } + + if (rda_cache_size == 0 || rda_req_size == 0) { + set_graph_errstr(graph, "Value cannot be 0"); + ret = -1; + goto out; + } + + new_cache_size = rda_cache_size / dist_count; + if (new_cache_size < rda_req_size) { + if (new_cache_size < 4 * 1024) + new_cache_size = rda_req_size = 4 * 1024; + else + rda_req_size = new_cache_size; + + snprintf(new_req_size_str, sizeof(new_req_size_str), "%" PRId64 "%s", + rda_req_size, "B"); + ret = dict_set_dynstr_with_alloc(set_dict, VKEY_RDA_REQUEST_SIZE, + new_req_size_str); + if (ret < 0) + goto out; + } + + snprintf(new_cache_size_str, sizeof(new_cache_size_str), "%" PRId64 "%s", + new_cache_size, "B"); + ret = dict_set_dynstr_with_alloc(set_dict, VKEY_RDA_CACHE_LIMIT, + new_cache_size_str); + if (ret < 0) + goto out; - ret = nfs_option_handler (graph, vme, param); - if (!ret) - return basic_option_handler (graph, vme, NULL); +out: + return ret; +} + +static int +client_graph_set_perf_options(volgen_graph_t *graph, + glusterd_volinfo_t *volinfo, dict_t *set_dict) +{ + int ret = 0; + + /* + * Logic to make sure gfproxy-client gets custom performance translators + */ + ret = dict_get_str_boolean(set_dict, "gfproxy-client", 0); + if (ret == 1) { + return volgen_graph_set_options_generic( + graph, set_dict, volinfo, &gfproxy_client_perfxl_option_handler); + } + + /* + * Logic to make sure gfproxy-server gets custom performance translators + */ + ret = dict_get_str_boolean(set_dict, "gfproxy-server", 0); + if (ret == 1) { + return volgen_graph_set_options_generic( + graph, set_dict, volinfo, &gfproxy_server_perfxl_option_handler); + } + + /* + * Logic to make sure NFS doesn't have performance translators by + * default for a volume + */ + ret = client_graph_set_rda_options(graph, volinfo, set_dict); + if (ret < 0) return ret; + +#ifdef BUILD_GNFS + data_t *tmp_data = NULL; + char *volname = NULL; + + tmp_data = dict_get_sizen(set_dict, "nfs-volume-file"); + if (tmp_data) { + volname = volinfo->volname; + return volgen_graph_set_options_generic(graph, set_dict, volname, + &nfsperfxl_option_handler); + } else +#endif + return volgen_graph_set_options_generic(graph, set_dict, volinfo, + &perfxl_option_handler); } -/* builds a graph for nfs server role, with option overrides in mod_dict */ static int -build_nfs_graph (glusterfs_graph_t *graph, dict_t *mod_dict) -{ - glusterfs_graph_t cgraph = {{0,},}; - glusterd_volinfo_t *voliter = NULL; - xlator_t *this = NULL; - glusterd_conf_t *priv = NULL; - dict_t *set_dict = NULL; - xlator_t *nfsxl = NULL; - char *skey = NULL; - int ret = 0; - - this = THIS; - GF_ASSERT (this); - priv = this->private; - GF_ASSERT (priv); - - set_dict = dict_new (); - if (!set_dict) { - gf_log ("", GF_LOG_ERROR, "Out of memory"); - return -1; +graph_set_generic_options(xlator_t *this, volgen_graph_t *graph, + dict_t *set_dict, char *identifier) +{ + int ret = 0; + + ret = volgen_graph_set_options_generic(graph, set_dict, "client", + &loglevel_option_handler); + + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_GRAPH_SET_OPT_FAIL, + "changing %s log level" + " failed", + identifier); + + ret = volgen_graph_set_options_generic(graph, set_dict, "client", + &sys_loglevel_option_handler); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_GRAPH_SET_OPT_FAIL, + "changing %s syslog " + "level failed", + identifier); + + ret = volgen_graph_set_options_generic(graph, set_dict, "client", + &logger_option_handler); + + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_GRAPH_SET_OPT_FAIL, + "changing %s logger" + " failed", + identifier); + + ret = volgen_graph_set_options_generic(graph, set_dict, "client", + &log_format_option_handler); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_GRAPH_SET_OPT_FAIL, + "changing %s log format" + " failed", + identifier); + + ret = volgen_graph_set_options_generic(graph, set_dict, "client", + &log_buf_size_option_handler); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_GRAPH_SET_OPT_FAIL, + "Failed to change " + "log-buf-size option"); + + ret = volgen_graph_set_options_generic(graph, set_dict, "client", + &log_flush_timeout_option_handler); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_GRAPH_SET_OPT_FAIL, + "Failed to change " + "log-flush-timeout option"); + + ret = volgen_graph_set_options_generic( + graph, set_dict, "client", &log_localtime_logging_option_handler); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_GRAPH_SET_OPT_FAIL, + "Failed to change " + "log-localtime-logging option"); + + ret = volgen_graph_set_options_generic(graph, set_dict, "client", + &threads_option_handler); + + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_GRAPH_SET_OPT_FAIL, + "changing %s threads failed", identifier); + + return 0; +} + +static int +client_graph_builder(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + dict_t *set_dict, void *param) +{ + int ret = 0; + xlator_t *xl = NULL; + char *volname = NULL; + glusterd_conf_t *conf = THIS->private; + char *tmp = NULL; + gf_boolean_t var = _gf_false; + gf_boolean_t ob = _gf_false; + int uss_enabled = -1; + xlator_t *this = THIS; + char *subvol = NULL; + size_t namelen = 0; + char *xl_id = NULL; + gf_boolean_t gfproxy_clnt = _gf_false; + + GF_ASSERT(this); + GF_ASSERT(conf); + + ret = dict_get_str_boolean(set_dict, "gfproxy-client", 0); + if (ret == -1) + goto out; + + volname = volinfo->volname; + if (ret == 0) { + ret = volgen_graph_build_clients(graph, volinfo, set_dict, param); + if (ret) + goto out; + + else + ret = volume_volgen_graph_build_clusters(graph, volinfo, _gf_false); + + if (ret == -1) + goto out; + } else { + gfproxy_clnt = _gf_true; + namelen = strlen(volinfo->volname) + SLEN("gfproxyd-") + 1; + subvol = alloca(namelen); + snprintf(subvol, namelen, "gfproxyd-%s", volinfo->volname); + + namelen = strlen(volinfo->volname) + SLEN("-gfproxy-client") + 1; + xl_id = alloca(namelen); + snprintf(xl_id, namelen, "%s-gfproxy-client", volinfo->volname); + volgen_graph_build_client(graph, volinfo, NULL, NULL, subvol, xl_id, + "tcp", set_dict); + } + + ret = dict_get_str_boolean(set_dict, "features.cloudsync", _gf_false); + if (ret == -1) + goto out; + + if (ret) { + xl = volgen_graph_add(graph, "features/cloudsync", volname); + if (!xl) { + ret = -1; + goto out; } + } - ret = dict_set_str (set_dict, "performance.stat-prefetch", - "off"); + ret = dict_get_str_boolean(set_dict, "features.shard", _gf_false); + if (ret == -1) + goto out; + + if (ret) { + xl = volgen_graph_add(graph, "features/shard", volname); + if (!xl) { + ret = -1; + goto out; + } + } + /* a. ret will be -1 if features.ctime is not set in the volinfo->dict which + * means ctime should be loaded into the graph. + * b. ret will be 1 if features.ctime is explicitly turned on through + * volume set and in that case ctime should be loaded into the graph. + * c. ret will be 0 if features.ctime is explicitly turned off and in that + * case ctime shouldn't be loaded into the graph. + */ + ret = dict_get_str_boolean(set_dict, "features.ctime", -1); + if (conf->op_version >= GD_OP_VERSION_5_0 && ret) { + xl = volgen_graph_add(graph, "features/utime", volname); + if (!xl) { + ret = -1; + goto out; + } + } + + /* As of now snapshot volume is read-only. Read-only xlator is loaded + * in client graph so that AFR & DHT healing can be done in server. + */ + if (volinfo->is_snap_volume) { + xl = volgen_graph_add(graph, "features/read-only", volname); + if (!xl) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GRAPH_FEATURE_ADD_FAIL, + "Failed to add " + "read-only feature to the graph of %s " + "snapshot with %s origin volume", + volname, volinfo->parent_volname); + ret = -1; + goto out; + } + ret = xlator_set_fixed_option(xl, "read-only", "on"); if (ret) - goto out; + goto out; + } + + /* Check for compress volume option, and add it to the graph on client side + */ + ret = dict_get_str_boolean(set_dict, "network.compression", 0); + if (ret == -1) + goto out; + if (ret) { + xl = volgen_graph_add(graph, "features/cdc", volname); + if (!xl) { + ret = -1; + goto out; + } + ret = xlator_set_fixed_option(xl, "mode", "client"); + if (ret) + goto out; + } + + /* gfproxy needs the quiesce translator */ + if (gfproxy_clnt) { + xl = volgen_graph_add(graph, "features/quiesce", volname); + if (!xl) { + ret = -1; + goto out; + } + } - nfsxl = volgen_graph_add_as (graph, "nfs/server", "nfs-server"); - if (!nfsxl) { + if (conf->op_version == GD_OP_VERSION_MIN) { + ret = glusterd_volinfo_get_boolean(volinfo, VKEY_FEATURES_QUOTA); + if (ret == -1) + goto out; + if (ret) { + xl = volgen_graph_add(graph, "features/quota", volname); + if (!xl) { ret = -1; goto out; + } } - ret = xlator_set_option (nfsxl, "nfs.dynamic-volumes", "on"); + } + + /* Do not allow changing read-after-open option if root-squash is + enabled. + */ + ret = dict_get_str_sizen(set_dict, "performance.read-after-open", &tmp); + if (!ret) { + ret = dict_get_str_sizen(volinfo->dict, "server.root-squash", &tmp); + if (!ret) { + ob = _gf_false; + ret = gf_string2boolean(tmp, &ob); + if (!ret && ob) { + gf_msg(this->name, GF_LOG_WARNING, 0, + GD_MSG_ROOT_SQUASH_ENABLED, + "root-squash is enabled. Please turn it" + " off to change read-after-open " + "option"); + ret = -1; + goto out; + } + } + } + + /* open behind causes problems when root-squash is enabled + (by allowing reads to happen even though the squashed user + does not have permissions to do so) as it fakes open to be + successful and later sends reads on anonymous fds. So when + root-squash is enabled, open-behind's option to read after + open is done is also enabled. + */ + ret = dict_get_str_sizen(set_dict, "server.root-squash", &tmp); + if (!ret) { + ret = gf_string2boolean(tmp, &var); if (ret) - goto out;; + goto out; + + if (var) { + ret = dict_get_str_sizen(volinfo->dict, + "performance.read-after-open", &tmp); + if (!ret) { + ret = gf_string2boolean(tmp, &ob); + /* go ahead with turning read-after-open on + even if string2boolean conversion fails, + OR if read-after-open option is turned off + */ + if (ret || !ob) + ret = dict_set_sizen_str_sizen( + set_dict, "performance.read-after-open", "yes"); + } else { + ret = dict_set_sizen_str_sizen( + set_dict, "performance.read-after-open", "yes"); + } + } else { + /* When root-squash has to be turned off, open-behind's + read-after-open option should be reset to what was + there before root-squash was turned on. If the option + cannot be found in volinfo's dict, it means that + option was not set before turning on root-squash. + */ + ob = _gf_false; + ret = dict_get_str_sizen(volinfo->dict, + "performance.read-after-open", &tmp); + if (!ret) { + ret = gf_string2boolean(tmp, &ob); + + if (!ret && ob) { + ret = dict_set_sizen_str_sizen( + set_dict, "performance.read-after-open", "yes"); + } + } + /* consider operation is failure only if read-after-open + option is enabled and could not set into set_dict + */ + if (!ob) + ret = 0; + } + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_ROOT_SQUASH_FAILED, + "setting " + "open behind option as part of root " + "squash failed"); + goto out; + } + } - list_for_each_entry (voliter, &priv->volumes, vol_list) { - if (voliter->status != GLUSTERD_STATUS_STARTED) - continue; + ret = dict_get_str_boolean(set_dict, "server.manage-gids", _gf_false); + if (ret != -1) { + ret = dict_set_str_sizen(set_dict, "client.send-gids", + ret ? "false" : "true"); + if (ret) + gf_msg(THIS->name, GF_LOG_WARNING, errno, GD_MSG_DICT_SET_FAILED, + "changing client" + " protocol option failed"); + } + + ret = client_graph_set_perf_options(graph, volinfo, set_dict); + if (ret) + goto out; + + uss_enabled = dict_get_str_boolean(set_dict, "features.uss", _gf_false); + if (uss_enabled == -1) + goto out; + if (uss_enabled && !volinfo->is_snap_volume) { + ret = volgen_graph_build_snapview_client(graph, volinfo, volname, + set_dict); + if (ret == -1) + goto out; + } + + /* add debug translators depending on the options */ + ret = check_and_add_debug_xl(graph, set_dict, volname, "client"); + if (ret) + return -1; - if (dict_get_str_boolean (voliter->dict, "nfs.disable", 0)) - continue; + /* if the client is part of 'gfproxyd' server, then we need to keep the + volume name as 'gfproxyd-<volname>', for better portmapper options */ + subvol = volname; + ret = dict_get_str_boolean(set_dict, "gfproxy-server", 0); + if (ret > 0) { + namelen = strlen(volinfo->volname) + SLEN("gfproxyd-") + 1; + subvol = alloca(namelen); + snprintf(subvol, namelen, "gfproxyd-%s", volname); + } + + ret = -1; + xl = volgen_graph_add_as(graph, "debug/io-stats", subvol); + if (!xl) { + goto out; + } + + ret = graph_set_generic_options(this, graph, set_dict, "client"); +out: + return ret; +} - ret = gf_asprintf (&skey, "rpc-auth.addr.%s.allow", - voliter->volname); - if (ret == -1) { - gf_log ("", GF_LOG_ERROR, "Out of memory"); - goto out; - } - ret = xlator_set_option (nfsxl, skey, "*"); - GF_FREE (skey); - if (ret) - goto out; +/* builds a graph for client role , with option overrides in mod_dict */ +static int +build_client_graph(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + dict_t *mod_dict) +{ + return build_graph_generic(graph, volinfo, mod_dict, NULL, + &client_graph_builder); +} - ret = gf_asprintf (&skey, "nfs3.%s.volume-id", - voliter->volname); - if (ret == -1) { - gf_log ("", GF_LOG_ERROR, "Out of memory"); - goto out; - } - ret = xlator_set_option (nfsxl, skey, uuid_utoa (voliter->volume_id)); - GF_FREE (skey); - if (ret) - goto out; +char *gd_shd_options[] = {"!self-heal-daemon", "!heal-timeout", NULL}; - /* If both RDMA and TCP are the transport_type, use RDMA - for NFS client protocols */ - if (voliter->transport_type == GF_TRANSPORT_BOTH_TCP_RDMA) { - ret = dict_set_str (set_dict, "client-transport-type", - "rdma"); - if (ret) - goto out; - } +char * +gd_get_matching_option(char **options, char *option) +{ + while (*options && strcmp(*options, option)) + options++; + return *options; +} - memset (&cgraph, 0, sizeof (cgraph)); - ret = build_client_graph (&cgraph, voliter, mod_dict); - if (ret) - goto out;; - ret = volgen_graph_merge_sub (graph, &cgraph); - if (ret) - goto out; +static int +bitrot_option_handler(volgen_graph_t *graph, struct volopt_map_entry *vme, + void *param) +{ + xlator_t *xl = NULL; + int ret = 0; - if (mod_dict) { - dict_copy (mod_dict, set_dict); - ret = volgen_graph_set_options_generic (graph, set_dict, voliter, - nfs_spec_option_handler); - } - else - ret = volgen_graph_set_options_generic (graph, voliter->dict, voliter, - nfs_spec_option_handler); + xl = first_of(graph); + + if (!strcmp(vme->option, "expiry-time")) { + ret = xlator_set_fixed_option(xl, "expiry-time", vme->value); + if (ret) + return -1; + } + + if (!strcmp(vme->option, "signer-threads")) { + ret = xlator_set_fixed_option(xl, "signer-threads", vme->value); + if (ret) + return -1; + } + + return ret; +} + +static int +scrubber_option_handler(volgen_graph_t *graph, struct volopt_map_entry *vme, + void *param) +{ + xlator_t *xl = NULL; + int ret = 0; + + xl = first_of(graph); + + if (!strcmp(vme->option, "scrub-throttle")) { + ret = xlator_set_fixed_option(xl, "scrub-throttle", vme->value); + if (ret) + return -1; + } + + if (!strcmp(vme->option, "scrub-frequency")) { + ret = xlator_set_fixed_option(xl, "scrub-freq", vme->value); + if (ret) + return -1; + } + if (!strcmp(vme->option, "scrubber")) { + if (!strcmp(vme->value, "pause")) { + ret = xlator_set_fixed_option(xl, "scrub-state", vme->value); + if (ret) + return -1; } + } + return ret; +} +static int +shd_option_handler(volgen_graph_t *graph, struct volopt_map_entry *vme, + void *param) +{ + int ret = 0; + struct volopt_map_entry new_vme = {0}; + char *shd_option = NULL; + + shd_option = gd_get_matching_option(gd_shd_options, vme->option); + if ((vme->option[0] == '!') && !shd_option) + goto out; + new_vme = *vme; + if (shd_option) { + new_vme.option = shd_option + 1; // option with out '!' + } + + ret = no_filter_option_handler(graph, &new_vme, param); +out: + return ret; +} - out: - dict_destroy (set_dict); +#ifdef BUILD_GNFS +static int +nfs_option_handler(volgen_graph_t *graph, struct volopt_map_entry *vme, + void *param) +{ + static struct nfs_opt nfs_opts[] = { + /* {pattern, printf_pattern} */ + {"!rpc-auth.addr.*.allow", "rpc-auth.addr.%s.allow"}, + {"!rpc-auth.addr.*.reject", "rpc-auth.addr.%s.reject"}, + {"!rpc-auth.auth-unix.*", "rpc-auth.auth-unix.%s"}, + {"!rpc-auth.auth-null.*", "rpc-auth.auth-null.%s"}, + {"!nfs3.*.trusted-sync", "nfs3.%s.trusted-sync"}, + {"!nfs3.*.trusted-write", "nfs3.%s.trusted-write"}, + {"!nfs3.*.volume-access", "nfs3.%s.volume-access"}, + {"!rpc-auth.ports.*.insecure", "rpc-auth.ports.%s.insecure"}, + {"!nfs-disable", "nfs.%s.disable"}, + {NULL, NULL}}; + xlator_t *xl = NULL; + char *aa = NULL; + int ret = 0; + glusterd_volinfo_t *volinfo = NULL; + int keylen; + struct nfs_opt *opt = NULL; + + volinfo = param; + + if (!volinfo || (volinfo->volname[0] == '\0')) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); + return 0; + } - return ret; + if (!vme || !(vme->option)) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); + return 0; + } + + xl = first_of(graph); + + for (opt = nfs_opts; opt->pattern; opt++) { + if (!strcmp(vme->option, opt->pattern)) { + keylen = gf_asprintf(&aa, opt->printf_pattern, volinfo->volname); + + if (keylen == -1) { + return -1; + } + + ret = xlator_set_option(xl, aa, keylen, vme->value); + GF_FREE(aa); + + if (ret) + return -1; + + goto out; + } + } + + if (!strcmp(vme->option, "!nfs3.*.export-dir")) { + keylen = gf_asprintf(&aa, "nfs3.%s.export-dir", volinfo->volname); + + if (keylen == -1) { + return -1; + } + + ret = gf_canonicalize_path(vme->value); + if (ret) { + GF_FREE(aa); + return -1; + } + ret = xlator_set_option(xl, aa, keylen, vme->value); + GF_FREE(aa); + + if (ret) + return -1; + } else if ((strcmp(vme->voltype, "nfs/server") == 0) && + (vme->option[0] != '!')) { + ret = xlator_set_option(xl, vme->option, strlen(vme->option), + vme->value); + if (ret) + return -1; + } + +out: + return 0; +} + +#endif +char * +volgen_get_shd_key(int type) +{ + char *key = NULL; + + switch (type) { + case GF_CLUSTER_TYPE_REPLICATE: + key = "cluster.self-heal-daemon"; + break; + case GF_CLUSTER_TYPE_DISPERSE: + key = "cluster.disperse-self-heal-daemon"; + break; + default: + key = NULL; + break; + } + + return key; +} + +static int +volgen_set_shd_key_enable(dict_t *set_dict, const int type) +{ + int ret = 0; + + switch (type) { + case GF_CLUSTER_TYPE_REPLICATE: + ret = dict_set_sizen_str_sizen(set_dict, "cluster.self-heal-daemon", + "enable"); + break; + case GF_CLUSTER_TYPE_DISPERSE: + ret = dict_set_sizen_str_sizen( + set_dict, "cluster.disperse-self-heal-daemon", "enable"); + break; + default: + break; + } + + return ret; +} + +static gf_boolean_t +volgen_is_shd_compatible_xl(char *xl_type) +{ + char *shd_xls[] = {"cluster/replicate", "cluster/disperse", NULL}; + if (gf_get_index_by_elem(shd_xls, xl_type) != -1) + return _gf_true; + + return _gf_false; +} + +static int +volgen_graph_set_iam_shd(volgen_graph_t *graph) +{ + xlator_t *trav; + int ret = 0; + + for (trav = first_of(graph); trav; trav = trav->next) { + if (!volgen_is_shd_compatible_xl(trav->type)) + continue; + + ret = xlator_set_fixed_option(trav, "iam-self-heal-daemon", "yes"); + if (ret) + break; + } + return ret; +} + +static int +prepare_shd_volume_options(glusterd_volinfo_t *volinfo, dict_t *mod_dict, + dict_t *set_dict) +{ + int ret = 0; + + ret = volgen_set_shd_key_enable(set_dict, volinfo->type); + if (ret) + goto out; + + ret = dict_set_uint32(set_dict, "trusted-client", GF_CLIENT_TRUSTED); + if (ret) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=trusted-client", NULL); + goto out; + } + + dict_copy(volinfo->dict, set_dict); + if (mod_dict) + dict_copy(mod_dict, set_dict); +out: + return ret; } +static int +build_afr_ec_clusters(volgen_graph_t *graph, glusterd_volinfo_t *volinfo) +{ + int clusters = -1; + switch (volinfo->type) { + case GF_CLUSTER_TYPE_REPLICATE: + clusters = volgen_graph_build_afr_clusters(graph, volinfo); + break; + + case GF_CLUSTER_TYPE_DISPERSE: + clusters = volgen_graph_build_ec_clusters(graph, volinfo); + break; + } + return clusters; +} + +static int +build_shd_clusters(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + dict_t *set_dict) +{ + int ret = 0; + int clusters = -1; + + ret = volgen_graph_build_clients(graph, volinfo, set_dict, NULL); + if (ret) + goto out; + clusters = build_afr_ec_clusters(graph, volinfo); + +out: + return clusters; +} + +gf_boolean_t +gd_is_self_heal_enabled(glusterd_volinfo_t *volinfo, dict_t *dict) +{ + char *shd_key = NULL; + gf_boolean_t shd_enabled = _gf_false; + + GF_VALIDATE_OR_GOTO("glusterd", volinfo, out); + + switch (volinfo->type) { + case GF_CLUSTER_TYPE_REPLICATE: + case GF_CLUSTER_TYPE_DISPERSE: + shd_key = volgen_get_shd_key(volinfo->type); + shd_enabled = dict_get_str_boolean(dict, shd_key, _gf_true); + break; + default: + break; + } +out: + return shd_enabled; +} + +int +build_rebalance_volfile(glusterd_volinfo_t *volinfo, char *filepath, + dict_t *mod_dict) +{ + volgen_graph_t graph = { + 0, + }; + xlator_t *xl = NULL; + int ret = -1; + xlator_t *this = NULL; + dict_t *set_dict = NULL; + + this = THIS; + + graph.type = GF_REBALANCED; + + if (volinfo->brick_count <= volinfo->dist_leaf_count) { + /* + * Volume is not a distribute volume or + * contains only 1 brick, no need to create + * the volfiles. + */ + return 0; + } + + set_dict = dict_copy_with_ref(volinfo->dict, NULL); + if (!set_dict) + return -1; + + if (mod_dict) { + dict_copy(mod_dict, set_dict); + /* XXX dict_copy swallows errors */ + } + + /* Rebalance is always a trusted client*/ + ret = dict_set_uint32(set_dict, "trusted-client", GF_CLIENT_TRUSTED); + if (ret) + return -1; + + ret = volgen_graph_build_clients(&graph, volinfo, set_dict, NULL); + if (ret) + goto out; + + ret = volume_volgen_graph_build_clusters(&graph, volinfo, _gf_false); + if (ret) + goto out; + + xl = volgen_graph_add_as(&graph, "debug/io-stats", volinfo->volname); + if (!xl) { + ret = -1; + goto out; + } + + ret = graph_set_generic_options(this, &graph, set_dict, "rebalance-daemon"); + if (ret) + goto out; + + ret = volgen_graph_set_options_generic(&graph, set_dict, volinfo, + basic_option_handler); + + if (!ret) + ret = volgen_write_volfile(&graph, filepath); + +out: + volgen_graph_free(&graph); + + dict_unref(set_dict); + + return ret; +} + +static int +build_shd_volume_graph(xlator_t *this, volgen_graph_t *graph, + glusterd_volinfo_t *volinfo, dict_t *mod_dict, + dict_t *set_dict, gf_boolean_t graph_check) +{ + volgen_graph_t cgraph = {0}; + int ret = 0; + int clusters = -1; + + if (!glusterd_is_shd_compatible_volume(volinfo)) + goto out; + + ret = prepare_shd_volume_options(volinfo, mod_dict, set_dict); + if (ret) + goto out; + + clusters = build_shd_clusters(&cgraph, volinfo, set_dict); + if (clusters < 0) { + ret = -1; + goto out; + } + + ret = volgen_graph_set_options_generic(&cgraph, set_dict, volinfo, + shd_option_handler); + if (ret) + goto out; + + ret = volgen_graph_set_iam_shd(&cgraph); + if (ret) + goto out; + + ret = volgen_graph_merge_sub(graph, &cgraph, clusters); + if (ret) + goto out; + + ret = graph_set_generic_options(this, graph, set_dict, "self-heal daemon"); +out: + return ret; +} + +int +build_shd_graph(glusterd_volinfo_t *volinfo, volgen_graph_t *graph, + dict_t *mod_dict) +{ + xlator_t *this = NULL; + dict_t *set_dict = NULL; + int ret = 0; + xlator_t *iostxl = NULL; + gf_boolean_t graph_check = _gf_false; + + this = THIS; + + set_dict = dict_new(); + if (!set_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); + ret = -ENOMEM; + goto out; + } + + if (mod_dict) + graph_check = dict_get_str_boolean(mod_dict, "graph-check", 0); + iostxl = volgen_graph_add_as(graph, "debug/io-stats", volinfo->volname); + if (!iostxl) { + ret = -1; + goto out; + } + + ret = build_shd_volume_graph(this, graph, volinfo, mod_dict, set_dict, + graph_check); + +out: + if (set_dict) + dict_unref(set_dict); + return ret; +} + +#ifdef BUILD_GNFS + +static int +volgen_graph_set_iam_nfsd(const volgen_graph_t *graph) +{ + xlator_t *trav; + int ret = 0; + + for (trav = first_of((volgen_graph_t *)graph); trav; trav = trav->next) { + if (strcmp(trav->type, "cluster/replicate") != 0) + continue; + + ret = xlator_set_fixed_option(trav, "iam-nfs-daemon", "yes"); + if (ret) + break; + } + return ret; +} + +/* builds a graph for nfs server role, with option overrides in mod_dict */ +int +build_nfs_graph(volgen_graph_t *graph, dict_t *mod_dict) +{ + volgen_graph_t cgraph = { + 0, + }; + glusterd_volinfo_t *voliter = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + dict_t *set_dict = NULL; + xlator_t *nfsxl = NULL; + char *skey = NULL; + int ret = 0; + char nfs_xprt[16] = { + 0, + }; + char *volname = NULL; + data_t *data = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + set_dict = dict_new(); + if (!set_dict) { + gf_msg("glusterd", GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "Out of memory"); + return -1; + } + + nfsxl = volgen_graph_add_as(graph, "nfs/server", "nfs-server"); + if (!nfsxl) { + ret = -1; + goto out; + } + ret = xlator_set_fixed_option(nfsxl, "nfs.dynamic-volumes", "on"); + if (ret) + goto out; + + ret = xlator_set_fixed_option(nfsxl, "nfs.nlm", "on"); + if (ret) + goto out; + + ret = xlator_set_fixed_option(nfsxl, "nfs.drc", "off"); + if (ret) + goto out; + + cds_list_for_each_entry(voliter, &priv->volumes, vol_list) + { + if (voliter->status != GLUSTERD_STATUS_STARTED) + continue; + + if (dict_get_str_boolean(voliter->dict, NFS_DISABLE_MAP_KEY, 0)) + continue; + + ret = gf_asprintf(&skey, "rpc-auth.addr.%s.allow", voliter->volname); + if (ret == -1) { + gf_msg("glusterd", GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "Out of memory"); + goto out; + } + ret = xlator_set_option(nfsxl, skey, ret, "*"); + GF_FREE(skey); + if (ret) + goto out; + + ret = gf_asprintf(&skey, "nfs3.%s.volume-id", voliter->volname); + if (ret == -1) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_NO_MEMORY, + "Out of memory"); + goto out; + } + ret = xlator_set_option(nfsxl, skey, ret, + uuid_utoa(voliter->volume_id)); + GF_FREE(skey); + if (ret) + goto out; + + /* If both RDMA and TCP are the transport_type, use TCP for NFS + * client protocols, because tcp,rdma volume can be created in + * servers which does not have rdma supported hardware + * The transport type specified here is client transport type + * which is used for communication between gluster-nfs and brick + * processes. + * User can specify client transport for tcp,rdma volume using + * nfs.transport-type, if it is not set by user default + * one will be tcp. + */ + memset(&cgraph, 0, sizeof(cgraph)); + if (mod_dict) + get_transport_type(voliter, mod_dict, nfs_xprt, _gf_true); + else + get_transport_type(voliter, voliter->dict, nfs_xprt, _gf_true); + + ret = dict_set_sizen_str_sizen(set_dict, "performance.stat-prefetch", + "off"); + if (ret) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=performance.stat-prefetch", NULL); + goto out; + } + + ret = dict_set_sizen_str_sizen(set_dict, + "performance.client-io-threads", "off"); + if (ret) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=performance.client-io-threads", NULL); + goto out; + } + + ret = dict_set_str_sizen(set_dict, "client-transport-type", nfs_xprt); + if (ret) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=client-transport-type", NULL); + goto out; + } + + ret = dict_set_uint32(set_dict, "trusted-client", GF_CLIENT_TRUSTED); + if (ret) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=trusted-client", NULL); + goto out; + } + + ret = dict_set_sizen_str_sizen(set_dict, "nfs-volume-file", "yes"); + if (ret) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=nfs-volume-file", NULL); + goto out; + } + + if (mod_dict && (data = dict_get_sizen(mod_dict, "volume-name"))) { + volname = data->data; + if (strcmp(volname, voliter->volname) == 0) + dict_copy(mod_dict, set_dict); + } + + ret = build_client_graph(&cgraph, voliter, set_dict); + if (ret) + goto out; + + if (mod_dict) { + dict_copy(mod_dict, set_dict); + ret = volgen_graph_set_options_generic(&cgraph, set_dict, voliter, + basic_option_handler); + } else { + ret = volgen_graph_set_options_generic( + &cgraph, voliter->dict, voliter, basic_option_handler); + } + + if (ret) + goto out; + + ret = volgen_graph_set_iam_nfsd(&cgraph); + if (ret) + goto out; + ret = volgen_graph_merge_sub(graph, &cgraph, 1); + if (ret) + goto out; + ret = dict_reset(set_dict); + if (ret) + goto out; + } + + cds_list_for_each_entry(voliter, &priv->volumes, vol_list) + { + if (mod_dict) { + ret = volgen_graph_set_options_generic(graph, mod_dict, voliter, + nfs_option_handler); + } else { + ret = volgen_graph_set_options_generic(graph, voliter->dict, + voliter, nfs_option_handler); + } + + if (ret) + gf_msg("glusterd", GF_LOG_WARNING, 0, GD_MSG_GRAPH_SET_OPT_FAIL, + "Could not set " + "vol-options for the volume %s", + voliter->volname); + } +out: + gf_msg_debug("glusterd", 0, "Returning %d", ret); + dict_unref(set_dict); + return ret; +} +#endif /**************************** * * Volume generation interface * ****************************/ - static void -get_brick_filepath (char *filename, glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo) +get_brick_filepath(char *filename, glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo, char *prefix) { - char path[PATH_MAX] = {0,}; - char brick[PATH_MAX] = {0,}; - glusterd_conf_t *priv = NULL; + char path[PATH_MAX] = { + 0, + }; + char brick[PATH_MAX] = { + 0, + }; + glusterd_conf_t *priv = NULL; + int32_t len = 0; + + priv = THIS->private; + + GLUSTERD_REMOVE_SLASH_FROM_PATH(brickinfo->path, brick); + GLUSTERD_GET_VOLUME_DIR(path, volinfo, priv); + + if (prefix) + len = snprintf(filename, PATH_MAX, "%s/%s.%s.%s.%s.vol", path, + volinfo->volname, prefix, brickinfo->hostname, brick); + else + len = snprintf(filename, PATH_MAX, "%s/%s.%s.%s.vol", path, + volinfo->volname, brickinfo->hostname, brick); + if ((len < 0) || (len >= PATH_MAX)) { + filename[0] = 0; + } +} - priv = THIS->private; +gf_boolean_t +glusterd_is_valid_volfpath(char *volname, char *brick) +{ + char volfpath[PATH_MAX] = { + 0, + }; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_volinfo_t *volinfo = NULL; + int32_t ret = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + ret = glusterd_brickinfo_new_from_brick(brick, &brickinfo, _gf_false, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_BRICKINFO_CREATE_FAIL, + "Failed to create brickinfo" + " for brick %s", + brick); + ret = 0; + goto out; + } + ret = glusterd_volinfo_new(&volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOLINFO_STORE_FAIL, + "Failed to create volinfo"); + ret = 0; + goto out; + } + (void)snprintf(volinfo->volname, sizeof(volinfo->volname), "%s", volname); + get_brick_filepath(volfpath, volinfo, brickinfo, NULL); - GLUSTERD_REMOVE_SLASH_FROM_PATH (brickinfo->path, brick); - GLUSTERD_GET_VOLUME_DIR (path, volinfo, priv); + ret = ((strlen(volfpath) < PATH_MAX) && + strlen(strrchr(volfpath, '/')) < _POSIX_PATH_MAX); - snprintf (filename, PATH_MAX, "%s/%s.%s.%s.vol", - path, volinfo->volname, - brickinfo->hostname, - brick); +out: + if (brickinfo) + glusterd_brickinfo_delete(brickinfo); + if (volinfo) + glusterd_volinfo_unref(volinfo); + return ret; } -static int -glusterd_generate_brick_volfile (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo) +int +glusterd_build_gfproxyd_volfile(glusterd_volinfo_t *volinfo, char *filename) { - glusterfs_graph_t graph = {{0,},}; - char filename[PATH_MAX] = {0,}; - int ret = -1; + volgen_graph_t graph = { + 0, + }; + int ret = -1; + + ret = build_graph_generic(&graph, volinfo, NULL, NULL, + &gfproxy_server_graph_builder); + if (ret == 0) + ret = volgen_write_volfile(&graph, filename); - GF_ASSERT (volinfo); - GF_ASSERT (brickinfo); + volgen_graph_free(&graph); - get_brick_filepath (filename, volinfo, brickinfo); + return ret; +} - ret = build_server_graph (&graph, volinfo, NULL, brickinfo->path); - if (!ret) - ret = volgen_write_volfile (&graph, filename); +int +glusterd_generate_gfproxyd_volfile(glusterd_volinfo_t *volinfo) +{ + char filename[PATH_MAX] = { + 0, + }; + int ret = -1; - volgen_graph_free (&graph); + GF_ASSERT(volinfo); - return ret; + glusterd_svc_build_gfproxyd_volfile_path(volinfo, filename, PATH_MAX - 1); + + ret = glusterd_build_gfproxyd_volfile(volinfo, filename); + + return ret; } static int -generate_brick_volfiles (glusterd_volinfo_t *volinfo) +glusterd_generate_brick_volfile(glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo, + dict_t *mod_dict, void *data) { - glusterd_brickinfo_t *brickinfo = NULL; - int ret = -1; + volgen_graph_t graph = { + 0, + }; + char filename[PATH_MAX] = { + 0, + }; + int ret = -1; - list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { - gf_log ("", GF_LOG_DEBUG, - "Found a brick - %s:%s", brickinfo->hostname, - brickinfo->path); + GF_ASSERT(volinfo); + GF_ASSERT(brickinfo); - ret = glusterd_generate_brick_volfile (volinfo, brickinfo); - if (ret) - goto out; + get_brick_filepath(filename, volinfo, brickinfo, NULL); + + ret = build_server_graph(&graph, volinfo, mod_dict, brickinfo); + if (!ret) + ret = volgen_write_volfile(&graph, filename); + volgen_graph_free(&graph); + + return ret; +} + +int +build_quotad_graph(volgen_graph_t *graph, dict_t *mod_dict) +{ + volgen_graph_t cgraph = {0}; + glusterd_volinfo_t *voliter = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + dict_t *set_dict = NULL; + int ret = 0; + xlator_t *quotad_xl = NULL; + char *skey = NULL; + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + GF_ASSERT(priv); + + graph->type = GF_QUOTAD; + + set_dict = dict_new(); + if (!set_dict) { + ret = -ENOMEM; + goto out; + } + + quotad_xl = volgen_graph_add_as(graph, "features/quotad", "quotad"); + if (!quotad_xl) { + ret = -1; + goto out; + } + + cds_list_for_each_entry(voliter, &priv->volumes, vol_list) + { + if (voliter->status != GLUSTERD_STATUS_STARTED) + continue; + + if (1 != glusterd_is_volume_quota_enabled(voliter)) + continue; + + ret = dict_set_uint32(set_dict, "trusted-client", GF_CLIENT_TRUSTED); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=trusted-client", NULL); + goto out; } - ret = 0; + dict_copy(voliter->dict, set_dict); + if (mod_dict) + dict_copy(mod_dict, set_dict); + + ret = gf_asprintf(&skey, "%s.volume-id", voliter->volname); + if (ret == -1) { + gf_msg("glusterd", GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "Out of memory"); + goto out; + } + ret = xlator_set_option(quotad_xl, skey, ret, voliter->volname); + GF_FREE(skey); + if (ret) + goto out; + + memset(&cgraph, 0, sizeof(cgraph)); + ret = volgen_graph_build_clients(&cgraph, voliter, set_dict, NULL); + if (ret) + goto out; + + ret = volume_volgen_graph_build_clusters(&cgraph, voliter, _gf_true); + if (ret) { + ret = -1; + goto out; + } + + if (mod_dict) { + dict_copy(mod_dict, set_dict); + ret = volgen_graph_set_options_generic(&cgraph, set_dict, voliter, + basic_option_handler); + } else { + ret = volgen_graph_set_options_generic( + &cgraph, voliter->dict, voliter, basic_option_handler); + } + if (ret) + goto out; + + ret = volgen_graph_merge_sub(graph, &cgraph, 1); + if (ret) + goto out; + + ret = dict_reset(set_dict); + if (ret) + goto out; + } out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + if (set_dict) + dict_unref(set_dict); + return ret; } static void -get_client_filepath (char *filename, glusterd_volinfo_t *volinfo) +get_vol_tstamp_file(char *filename, glusterd_volinfo_t *volinfo) { - char path[PATH_MAX] = {0,}; - glusterd_conf_t *priv = NULL; - - priv = THIS->private; + glusterd_conf_t *priv = NULL; - GLUSTERD_GET_VOLUME_DIR (path, volinfo, priv); + priv = THIS->private; - snprintf (filename, PATH_MAX, "%s/%s-fuse.vol", - path, volinfo->volname); + GLUSTERD_GET_VOLUME_DIR(filename, volinfo, priv); + strncat(filename, "/marker.tstamp", PATH_MAX - strlen(filename) - 1); } static void -get_rdma_client_filepath (char *filename, glusterd_volinfo_t *volinfo) +get_parent_vol_tstamp_file(char *filename, glusterd_volinfo_t *volinfo) { - char path[PATH_MAX] = {0,}; - glusterd_conf_t *priv = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + int32_t len = 0; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + len = snprintf(filename, PATH_MAX, "%s/vols/%s/marker.tstamp", + priv->workdir, volinfo->parent_volname); + if ((len < 0) || (len >= PATH_MAX)) { + filename[0] = 0; + } +} + +int +generate_brick_volfiles(glusterd_volinfo_t *volinfo) +{ + char tstamp_file[PATH_MAX] = { + 0, + }; + char parent_tstamp_file[PATH_MAX] = { + 0, + }; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + ret = glusterd_volinfo_get_boolean(volinfo, VKEY_MARKER_XTIME); + if (ret == -1) + return -1; + + assign_brick_groups(volinfo); + get_vol_tstamp_file(tstamp_file, volinfo); - priv = THIS->private; + if (ret) { + ret = open(tstamp_file, O_WRONLY | O_CREAT | O_EXCL, 0600); + if (ret == -1 && errno == EEXIST) { + gf_msg_debug(this->name, 0, "timestamp file exist"); + ret = -2; + } + if (ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "failed to create " + "%s", + tstamp_file); + return -1; + } + if (ret >= 0) { + sys_close(ret); + /* If snap_volume, retain timestamp for marker.tstamp + * from parent. Geo-replication depends on mtime of + * 'marker.tstamp' to decide the volume-mark, i.e., + * geo-rep start time just after session is created. + */ + if (volinfo->is_snap_volume) { + get_parent_vol_tstamp_file(parent_tstamp_file, volinfo); + ret = gf_set_timestamp(parent_tstamp_file, tstamp_file); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TSTAMP_SET_FAIL, + "Unable to set atime and mtime" + " of %s as of %s", + tstamp_file, parent_tstamp_file); + goto out; + } + } + } + } else { + ret = sys_unlink(tstamp_file); + if (ret == -1 && errno == ENOENT) + ret = 0; + if (ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "failed to unlink " + "%s", + tstamp_file); + return -1; + } + } - GLUSTERD_GET_VOLUME_DIR (path, volinfo, priv); + ret = glusterd_volume_brick_for_each(volinfo, NULL, + glusterd_generate_brick_volfile); + if (ret) + goto out; - snprintf (filename, PATH_MAX, "%s/%s-rdma-fuse.vol", - path, volinfo->volname); + ret = 0; + +out: + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; } static int -generate_client_volfile (glusterd_volinfo_t *volinfo) +generate_single_transport_client_volfile(glusterd_volinfo_t *volinfo, + char *filepath, dict_t *dict) { - glusterfs_graph_t graph = {{0,},}; - char filename[PATH_MAX] = {0,}; - int ret = -1; - dict_t *dict = NULL; + volgen_graph_t graph = { + 0, + }; + int ret = -1; - get_client_filepath (filename, volinfo); + ret = build_client_graph(&graph, volinfo, dict); + if (!ret) + ret = volgen_write_volfile(&graph, filepath); - if (volinfo->transport_type == GF_TRANSPORT_BOTH_TCP_RDMA) { - dict = dict_new (); - if (!dict) - goto out; - ret = dict_set_str (dict, "client-transport-type", "tcp"); - if (ret) - goto out; + volgen_graph_free(&graph); + + return ret; +} + +int +glusterd_generate_client_per_brick_volfile(glusterd_volinfo_t *volinfo) +{ + char filepath[PATH_MAX] = { + 0, + }; + glusterd_brickinfo_t *brick = NULL; + volgen_graph_t graph = { + 0, + }; + dict_t *dict = NULL; + xlator_t *xl = NULL; + int ret = -1; + char *ssl_str = NULL; + gf_boolean_t ssl_bool = _gf_false; + xlator_t *this = THIS; + GF_ASSERT(this); + + dict = dict_new(); + if (!dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); + goto out; + } + + ret = dict_set_uint32(dict, "trusted-client", GF_CLIENT_TRUSTED); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=trusted-client", NULL); + goto free_dict; + } + + if (dict_get_str_sizen(volinfo->dict, "client.ssl", &ssl_str) == 0) { + if (gf_string2boolean(ssl_str, &ssl_bool) == 0) { + if (ssl_bool) { + if (dict_set_dynstr_with_alloc(dict, "client.ssl", "on") != 0) { + ret = -1; + goto free_dict; + } + } + } else { + ret = -1; + goto free_dict; } + } + + cds_list_for_each_entry(brick, &volinfo->bricks, brick_list) + { + xl = volgen_graph_build_client(&graph, volinfo, brick->hostname, NULL, + brick->path, brick->brick_id, "tcp", + dict); + if (!xl) { + ret = -1; + goto out; + } + + get_brick_filepath(filepath, volinfo, brick, "client"); + ret = volgen_write_volfile(&graph, filepath); + if (ret < 0) + goto out; - ret = build_client_graph (&graph, volinfo, dict); - if (!ret) - ret = volgen_write_volfile (&graph, filename); + volgen_graph_free(&graph); + memset(&graph, 0, sizeof(graph)); + } - volgen_graph_free (&graph); + ret = 0; +out: + if (ret) + volgen_graph_free(&graph); - if (dict) { - /* This means, transport type is both RDMA and TCP */ +free_dict: - memset (&graph, 0, sizeof (graph)); - get_rdma_client_filepath (filename, volinfo); + if (dict) + dict_unref(dict); - ret = dict_set_str (dict, "client-transport-type", "rdma"); - if (ret) - goto out; + return ret; +} + +static void +enumerate_transport_reqs(gf_transport_type type, char **types) +{ + switch (type) { + case GF_TRANSPORT_TCP: + types[0] = "tcp"; + break; + case GF_TRANSPORT_RDMA: + types[0] = "rdma"; + break; + case GF_TRANSPORT_BOTH_TCP_RDMA: + types[0] = "tcp"; + types[1] = "rdma"; + break; + } +} - ret = build_client_graph (&graph, volinfo, dict); - if (!ret) - ret = volgen_write_volfile (&graph, filename); +int +generate_dummy_client_volfiles(glusterd_volinfo_t *volinfo) +{ + int i = 0; + int ret = -1; + char filepath[PATH_MAX] = { + 0, + }; + char *types[] = {NULL, NULL, NULL}; + dict_t *dict = NULL; + xlator_t *this = NULL; + gf_transport_type type = GF_TRANSPORT_TCP; + + this = THIS; + + enumerate_transport_reqs(volinfo->transport_type, types); + dict = dict_new(); + if (!dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); + goto out; + } + for (i = 0; types[i]; i++) { + ret = dict_set_str(dict, "client-transport-type", types[i]); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=client-transport-type", NULL); + goto out; + } + type = transport_str_to_type(types[i]); - volgen_graph_free (&graph); + ret = dict_set_uint32(dict, "trusted-client", GF_CLIENT_OTHER); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=trusted-client", NULL); + goto out; + } - dict_unref (dict); + ret = glusterd_get_dummy_client_filepath(filepath, volinfo, type); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, + "Received invalid transport-type."); + goto out; } + ret = generate_single_transport_client_volfile(volinfo, filepath, dict); + if (ret) + goto out; + } + out: - return ret; + if (dict) + dict_unref(dict); + + gf_msg_trace("glusterd", 0, "Returning %d", ret); + return ret; } int -glusterd_create_rb_volfiles (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo) +generate_client_volfiles(glusterd_volinfo_t *volinfo, + glusterd_client_type_t client_type) { - int ret = -1; + int i = 0; + int ret = -1; + char filepath[PATH_MAX] = { + 0, + }; + char *volname = NULL; + char *types[] = {NULL, NULL, NULL}; + dict_t *dict = NULL; + xlator_t *this = NULL; + gf_transport_type type = GF_TRANSPORT_TCP; + + this = THIS; + + volname = volinfo->is_snap_volume ? volinfo->parent_volname + : volinfo->volname; + + if (volname && !strcmp(volname, GLUSTER_SHARED_STORAGE) && + client_type != GF_CLIENT_TRUSTED) { + /* + * shared storage volume cannot be mounted from non trusted + * nodes. So we are not creating volfiles for non-trusted + * clients for shared volumes as well as snapshot of shared + * volumes. + */ - ret = glusterd_generate_brick_volfile (volinfo, brickinfo); - if (!ret) - ret = generate_client_volfile (volinfo); - if (!ret) - ret = glusterd_fetchspec_notify (THIS); + ret = 0; + gf_msg_debug("glusterd", 0, + "Skipping the non-trusted volfile" + "creation for shared storage volume. Volume %s", + volname); + goto out; + } + + enumerate_transport_reqs(volinfo->transport_type, types); + dict = dict_new(); + if (!dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); + goto out; + } + for (i = 0; types[i]; i++) { + ret = dict_set_str(dict, "client-transport-type", types[i]); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=client-transport-type", NULL); + goto out; + } + type = transport_str_to_type(types[i]); - return ret; + ret = dict_set_uint32(dict, "trusted-client", client_type); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=trusted-client", NULL); + goto out; + } + + if (client_type == GF_CLIENT_TRUSTED) { + ret = glusterd_get_trusted_client_filepath(filepath, volinfo, type); + } else if (client_type == GF_CLIENT_TRUSTED_PROXY) { + glusterd_get_gfproxy_client_volfile(volinfo, filepath, PATH_MAX); + ret = dict_set_int32_sizen(dict, "gfproxy-client", 1); + } else { + ret = glusterd_get_client_filepath(filepath, volinfo, type); + } + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, + "Received invalid transport-type"); + goto out; + } + + ret = generate_single_transport_client_volfile(volinfo, filepath, dict); + if (ret) + goto out; + } + + /* Generate volfile for rebalance process */ + glusterd_get_rebalance_volfile(volinfo, filepath, PATH_MAX); + ret = build_rebalance_volfile(volinfo, filepath, dict); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL, + "Failed to create rebalance volfile for %s", volinfo->volname); + goto out; + } + +out: + if (dict) + dict_unref(dict); + + gf_msg_trace("glusterd", 0, "Returning %d", ret); + return ret; } int -glusterd_create_volfiles_and_notify_services (glusterd_volinfo_t *volinfo) +glusterd_snapdsvc_generate_volfile(volgen_graph_t *graph, + glusterd_volinfo_t *volinfo) { - int ret = -1; + xlator_t *xl = NULL; + char *username = NULL; + char *passwd = NULL; + int ret = 0; + char key[PATH_MAX] = { + 0, + }; + dict_t *set_dict = NULL; + char *loglevel = NULL; + char *xlator = NULL; + char *ssl_str = NULL; + gf_boolean_t ssl_bool = _gf_false; + + set_dict = dict_copy(volinfo->dict, NULL); + if (!set_dict) + return -1; - ret = generate_brick_volfiles (volinfo); + ret = dict_get_str_sizen(set_dict, "xlator", &xlator); + if (!ret) { + ret = dict_get_str_sizen(set_dict, "loglevel", &loglevel); if (ret) { - gf_log ("", GF_LOG_ERROR, - "Could not generate volfiles for bricks"); - goto out; + gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "could not get both" + " translator name and loglevel for log level " + "request"); + return -1; } + } - ret = generate_client_volfile (volinfo); - if (ret) { - gf_log ("", GF_LOG_ERROR, - "Could not generate volfile for client"); - goto out; + xl = volgen_graph_add(graph, "features/snapview-server", volinfo->volname); + if (!xl) + return -1; + + ret = xlator_set_fixed_option(xl, "volname", volinfo->volname); + if (ret) + return -1; + + xl = volgen_graph_add(graph, "performance/io-threads", volinfo->volname); + if (!xl) + return -1; + + snprintf(key, sizeof(key), "snapd-%s", volinfo->volname); + xl = volgen_graph_add_as(graph, "debug/io-stats", key); + if (!xl) + return -1; + + xl = volgen_graph_add(graph, "protocol/server", volinfo->volname); + if (!xl) + return -1; + + ret = xlator_set_fixed_option(xl, "transport-type", "tcp"); + if (ret) + return -1; + + if (dict_get_str_sizen(set_dict, "server.ssl", &ssl_str) == 0) { + if (gf_string2boolean(ssl_str, &ssl_bool) == 0) { + if (ssl_bool) { + ret = xlator_set_fixed_option( + xl, "transport.socket.ssl-enabled", "true"); + if (ret) { + return -1; + } + } } + } + + RPC_SET_OPT(xl, SSL_OWN_CERT_OPT, "ssl-own-cert", return -1); + RPC_SET_OPT(xl, SSL_PRIVATE_KEY_OPT, "ssl-private-key", return -1); + RPC_SET_OPT(xl, SSL_CA_LIST_OPT, "ssl-ca-list", return -1); + RPC_SET_OPT(xl, SSL_CRL_PATH_OPT, "ssl-crl-path", return -1); + RPC_SET_OPT(xl, SSL_CERT_DEPTH_OPT, "ssl-cert-depth", return -1); + RPC_SET_OPT(xl, SSL_CIPHER_LIST_OPT, "ssl-cipher-list", return -1); + RPC_SET_OPT(xl, SSL_DH_PARAM_OPT, "ssl-dh-param", return -1); + RPC_SET_OPT(xl, SSL_EC_CURVE_OPT, "ssl-ec-curve", return -1); + + username = glusterd_auth_get_username(volinfo); + passwd = glusterd_auth_get_password(volinfo); + + ret = snprintf(key, sizeof(key), "auth.login.snapd-%s.allow", + volinfo->volname); + ret = xlator_set_option(xl, key, ret, username); + if (ret) + return -1; + + ret = snprintf(key, sizeof(key), "auth.login.%s.password", username); + ret = xlator_set_option(xl, key, ret, passwd); + if (ret) + return -1; + + snprintf(key, sizeof(key), "snapd-%s", volinfo->volname); + ret = xlator_set_fixed_option(xl, "auth-path", key); + if (ret) + return -1; + + ret = volgen_graph_set_options_generic( + graph, set_dict, (xlator && loglevel) ? (void *)set_dict : volinfo, + (xlator && loglevel) ? &server_spec_extended_option_handler + : &server_spec_option_handler); + + return ret; +} + +static int +prepare_bitrot_scrub_volume_options(glusterd_volinfo_t *volinfo, + dict_t *mod_dict, dict_t *set_dict) +{ + int ret = 0; + xlator_t *this = THIS; + GF_ASSERT(this); + + ret = dict_set_uint32(set_dict, "trusted-client", GF_CLIENT_TRUSTED); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=trusted-client", NULL); + goto out; + } - ret = glusterd_fetchspec_notify (THIS); + dict_copy(volinfo->dict, set_dict); + if (mod_dict) + dict_copy(mod_dict, set_dict); out: - return ret; + return ret; } -void -glusterd_get_nfs_filepath (char *filename) +static int +build_bitd_clusters(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + dict_t *set_dict, int brick_count, unsigned int numbricks) +{ + int ret = -1; + int clusters = 0; + xlator_t *xl = NULL; + char *brick_hint = NULL; + char *bitrot_args[] = {"features/bit-rot", "%s-bit-rot-%d"}; + + ret = volgen_link_bricks_from_list_tail(graph, volinfo, bitrot_args[0], + bitrot_args[1], brick_count, + brick_count); + clusters = ret; + + xl = first_of(graph); + + ret = gf_asprintf(&brick_hint, "%d", numbricks); + if (ret < 0) + goto out; + + ret = xlator_set_fixed_option(xl, "brick-count", brick_hint); + if (ret) + goto out; + + ret = clusters; + +out: + GF_FREE(brick_hint); + brick_hint = NULL; + return ret; +} + +static int +build_bitd_volume_graph(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + dict_t *mod_dict, unsigned int numbricks) { - char path[PATH_MAX] = {0,}; - glusterd_conf_t *priv = NULL; + volgen_graph_t cgraph = {0}; + xlator_t *this = NULL; + xlator_t *xl = NULL; + dict_t *set_dict = NULL; + glusterd_conf_t *priv = NULL; + int ret = 0; + int clusters = -1; + glusterd_brickinfo_t *brickinfo = NULL; + int brick_count = 0; + char transt[16] = { + 0, + }; + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + GF_ASSERT(priv); + + set_dict = dict_new(); + if (!set_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); + ret = -1; + goto out; + } + + ret = prepare_bitrot_scrub_volume_options(volinfo, mod_dict, set_dict); + if (ret) + goto out; + + get_transport_type(volinfo, set_dict, transt, _gf_false); + if (!strncmp(transt, "tcp,rdma", SLEN("tcp,rdma"))) + (void)snprintf(transt, sizeof(transt), "%s", "tcp"); + + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + if (!glusterd_is_local_brick(this, volinfo, brickinfo)) + continue; + + xl = volgen_graph_build_client(&cgraph, volinfo, brickinfo->hostname, + NULL, brickinfo->path, + brickinfo->brick_id, transt, set_dict); + if (!xl) { + ret = -1; + goto out; + } + brick_count++; + } + + if (brick_count == 0) { + ret = 0; + goto out; + } - priv = THIS->private; + clusters = build_bitd_clusters(&cgraph, volinfo, set_dict, brick_count, + numbricks); + if (clusters < 0) { + ret = -1; + goto out; + } - GLUSTERD_GET_NFS_DIR (path, priv); + ret = volgen_graph_set_options_generic(&cgraph, set_dict, volinfo, + bitrot_option_handler); + if (ret) + goto out; - snprintf (filename, PATH_MAX, "%s/nfs-server.vol", path); + ret = volgen_graph_merge_sub(graph, &cgraph, clusters); + if (ret) + goto out; + + ret = graph_set_generic_options(this, graph, set_dict, "Bitrot"); + +out: + if (set_dict) + dict_unref(set_dict); + + return ret; } int -glusterd_create_nfs_volfile () +build_bitd_graph(volgen_graph_t *graph, dict_t *mod_dict) { - glusterfs_graph_t graph = {{0,},}; - char filename[PATH_MAX] = {0,}; - int ret = -1; + glusterd_volinfo_t *voliter = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + int ret = 0; + xlator_t *iostxl = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + unsigned int numbricks = 0; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + iostxl = volgen_graph_add_as(graph, "debug/io-stats", "bitd"); + if (!iostxl) { + ret = -1; + goto out; + } + + /* TODO: do way with this extra loop _if possible_ */ + cds_list_for_each_entry(voliter, &priv->volumes, vol_list) + { + if (voliter->status != GLUSTERD_STATUS_STARTED) + continue; + if (!glusterd_is_bitrot_enabled(voliter)) + continue; + + cds_list_for_each_entry(brickinfo, &voliter->bricks, brick_list) + { + if (!glusterd_is_local_brick(this, voliter, brickinfo)) + continue; + numbricks++; + } + } - glusterd_get_nfs_filepath (filename); + cds_list_for_each_entry(voliter, &priv->volumes, vol_list) + { + if (voliter->status != GLUSTERD_STATUS_STARTED) + continue; - ret = build_nfs_graph (&graph, NULL); - if (!ret) - ret = volgen_write_volfile (&graph, filename); + if (!glusterd_is_bitrot_enabled(voliter)) + continue; - volgen_graph_free (&graph); + ret = build_bitd_volume_graph(graph, voliter, mod_dict, numbricks); + } +out: + return ret; +} - return ret; +static int +build_scrub_clusters(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + dict_t *set_dict, int brick_count) +{ + int ret = -1; + int clusters = 0; + xlator_t *xl = NULL; + char *scrub_args[] = {"features/bit-rot", "%s-bit-rot-%d"}; + + ret = volgen_link_bricks_from_list_tail( + graph, volinfo, scrub_args[0], scrub_args[1], brick_count, brick_count); + clusters = ret; + + xl = first_of(graph); + + ret = xlator_set_fixed_option(xl, "scrubber", "true"); + if (ret) + goto out; + + ret = clusters; + +out: + return ret; } -int -glusterd_delete_volfile (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo) +static int +build_scrub_volume_graph(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + dict_t *mod_dict) { - int ret = 0; - char filename[PATH_MAX] = {0,}; + volgen_graph_t cgraph = {0}; + dict_t *set_dict = NULL; + xlator_t *this = NULL; + xlator_t *xl = NULL; + glusterd_conf_t *priv = NULL; + int ret = 0; + int clusters = -1; + int brick_count = 0; + char transt[16] = { + 0, + }; + glusterd_brickinfo_t *brickinfo = NULL; + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + GF_ASSERT(priv); + + set_dict = dict_new(); + if (!set_dict) { + ret = -1; + goto out; + } + + ret = prepare_bitrot_scrub_volume_options(volinfo, mod_dict, set_dict); + if (ret) + goto out; + + get_transport_type(volinfo, set_dict, transt, _gf_false); + if (!strncmp(transt, "tcp,rdma", SLEN("tcp,rdma"))) + (void)snprintf(transt, sizeof(transt), "%s", "tcp"); + + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + if (!glusterd_is_local_brick(this, volinfo, brickinfo)) + continue; + + xl = volgen_graph_build_client(&cgraph, volinfo, brickinfo->hostname, + NULL, brickinfo->path, + brickinfo->brick_id, transt, set_dict); + if (!xl) { + ret = -1; + goto out; + } + brick_count++; + } - GF_ASSERT (volinfo); - GF_ASSERT (brickinfo); + if (brick_count == 0) { + ret = 0; + goto out; + } - get_brick_filepath (filename, volinfo, brickinfo); - ret = unlink (filename); - if (ret) - gf_log ("glusterd", GF_LOG_ERROR, "failed to delete file: %s, " - "reason: %s", filename, strerror (errno)); - return ret; + clusters = build_scrub_clusters(&cgraph, volinfo, set_dict, brick_count); + if (clusters < 0) { + ret = -1; + goto out; + } + + ret = volgen_graph_set_options_generic(&cgraph, set_dict, volinfo, + scrubber_option_handler); + if (ret) + goto out; + + ret = volgen_graph_merge_sub(graph, &cgraph, clusters); + if (ret) + goto out; + + ret = graph_set_generic_options(this, graph, set_dict, "Scrubber"); +out: + if (set_dict) + dict_unref(set_dict); + + return ret; } int -validate_nfsopts (glusterd_volinfo_t *volinfo, - dict_t *val_dict, - char **op_errstr) +build_scrub_graph(volgen_graph_t *graph, dict_t *mod_dict) +{ + glusterd_volinfo_t *voliter = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + int ret = 0; + xlator_t *iostxl = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + iostxl = volgen_graph_add_as(graph, "debug/io-stats", "scrub"); + if (!iostxl) { + ret = -1; + goto out; + } + + cds_list_for_each_entry(voliter, &priv->volumes, vol_list) + { + if (voliter->status != GLUSTERD_STATUS_STARTED) + continue; + + if (!glusterd_is_bitrot_enabled(voliter)) + continue; + + ret = build_scrub_volume_graph(graph, voliter, mod_dict); + } +out: + return ret; +} + +int +glusterd_snapdsvc_create_volfile(glusterd_volinfo_t *volinfo) { - glusterfs_graph_t graph = {{0,},}; - int ret = -1; + volgen_graph_t graph = { + 0, + }; + int ret = -1; + char filename[PATH_MAX] = { + 0, + }; - ret = build_nfs_graph (&graph, val_dict); - if (!ret) - ret = graph_reconf_validateopt (&graph, op_errstr); + graph.type = GF_SNAPD; + glusterd_svc_build_snapd_volfile(volinfo, filename, PATH_MAX); - volgen_graph_free (&graph); + ret = glusterd_snapdsvc_generate_volfile(&graph, volinfo); + if (!ret) + ret = volgen_write_volfile(&graph, filename); - gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + volgen_graph_free(&graph); + + return ret; } int -validate_clientopts (glusterd_volinfo_t *volinfo, - dict_t *val_dict, - char **op_errstr) +glusterd_create_rb_volfiles(glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo) { - glusterfs_graph_t graph = {{0,},}; - int ret = -1; - - GF_ASSERT (volinfo); + int ret = -1; + ret = glusterd_generate_brick_volfile(volinfo, brickinfo, NULL, NULL); + if (!ret) + ret = generate_client_volfiles(volinfo, GF_CLIENT_TRUSTED); + if (!ret) + ret = glusterd_fetchspec_notify(THIS); - ret = build_client_graph (&graph, volinfo, val_dict); - if (!ret) - ret = graph_reconf_validateopt (&graph, op_errstr); + return ret; +} - volgen_graph_free (&graph); +int +glusterd_create_volfiles(glusterd_volinfo_t *volinfo) +{ + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + + ret = generate_brick_volfiles(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL, + "Could not generate volfiles for bricks"); + goto out; + } + + ret = generate_client_volfiles(volinfo, GF_CLIENT_TRUSTED); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL, + "Could not generate trusted client volfiles"); + goto out; + } + + ret = generate_client_volfiles(volinfo, GF_CLIENT_TRUSTED_PROXY); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, + "Could not generate gfproxy client volfiles"); + goto out; + } + + ret = generate_client_volfiles(volinfo, GF_CLIENT_OTHER); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL, + "Could not generate client volfiles"); + + ret = glusterd_generate_gfproxyd_volfile(volinfo); + if (ret) + gf_log(this->name, GF_LOG_ERROR, "Could not generate gfproxy volfiles"); + + ret = glusterd_shdsvc_create_volfile(volinfo); + if (ret) + gf_log(this->name, GF_LOG_ERROR, "Could not generate shd volfiles"); + + dict_del_sizen(volinfo->dict, "skip-CLIOT"); - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; +out: + return ret; } int -validate_brickopts (glusterd_volinfo_t *volinfo, - char *brickinfo_path, - dict_t *val_dict, - char **op_errstr) +glusterd_create_volfiles_and_notify_services(glusterd_volinfo_t *volinfo) { - glusterfs_graph_t graph = {{0,},}; - int ret = -1; + int ret = -1; + xlator_t *this = NULL; - GF_ASSERT (volinfo); + this = THIS; + ret = glusterd_create_volfiles(volinfo); + if (ret) + goto out; + ret = glusterd_fetchspec_notify(this); - ret = build_server_graph (&graph, volinfo, val_dict, brickinfo_path); - if (!ret) - ret = graph_reconf_validateopt (&graph, op_errstr); +out: + return ret; +} + +int +glusterd_create_global_volfile(glusterd_graph_builder_t builder, char *filepath, + dict_t *mod_dict) +{ + volgen_graph_t graph = { + 0, + }; + int ret = -1; - volgen_graph_free (&graph); + ret = builder(&graph, mod_dict); + if (!ret) + ret = volgen_write_volfile(&graph, filepath); - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + volgen_graph_free(&graph); + + return ret; } int -glusterd_validate_brickreconf (glusterd_volinfo_t *volinfo, - dict_t *val_dict, - char **op_errstr) +glusterd_delete_volfile(glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo) { - glusterd_brickinfo_t *brickinfo = NULL; - int ret = -1; + int ret = 0; + char filename[PATH_MAX] = { + 0, + }; + + GF_ASSERT(volinfo); + GF_ASSERT(brickinfo); + + get_brick_filepath(filename, volinfo, brickinfo, NULL); + ret = sys_unlink(filename); + if (ret) + gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "failed to delete file: %s", filename); + return ret; +} - list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { - gf_log ("", GF_LOG_DEBUG, - "Validating %s", brickinfo->hostname); +int +validate_shdopts(glusterd_volinfo_t *volinfo, dict_t *val_dict, + char **op_errstr) +{ + volgen_graph_t graph = { + 0, + }; + int ret = -1; - ret = validate_brickopts (volinfo, brickinfo->path, val_dict, - op_errstr); - if (ret) - goto out; - } + graph.errstr = op_errstr; + if (!glusterd_is_shd_compatible_volume(volinfo)) { ret = 0; + goto out; + } + ret = dict_set_int32_sizen(val_dict, "graph-check", 1); + if (ret) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=graph-check", NULL); + goto out; + } + ret = build_shd_graph(volinfo, &graph, val_dict); + if (!ret) + ret = graph_reconf_validateopt(&graph.graph, op_errstr); + + volgen_graph_free(&graph); + + gf_msg_debug("glusterd", 0, "Returning %d", ret); out: + dict_del_sizen(val_dict, "graph-check"); + return ret; +} - return ret; +#ifdef BUILD_GNFS +static int +validate_nfsopts(glusterd_volinfo_t *volinfo, dict_t *val_dict, + char **op_errstr) +{ + volgen_graph_t graph = { + 0, + }; + int ret = -1; + char transport_type[16] = { + 0, + }; + char *tt = NULL; + char err_str[128] = { + 0, + }; + xlator_t *this = THIS; + + GF_ASSERT(this); + + graph.errstr = op_errstr; + + get_vol_transport_type(volinfo, transport_type); + ret = dict_get_str_sizen(val_dict, "nfs.transport-type", &tt); + if (!ret) { + if (volinfo->transport_type != GF_TRANSPORT_BOTH_TCP_RDMA) { + snprintf(err_str, sizeof(err_str), + "Changing nfs " + "transport type is allowed only for volumes " + "of transport type tcp,rdma"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OP_UNSUPPORTED, "%s", + err_str); + *op_errstr = gf_strdup(err_str); + ret = -1; + goto out; + } + if (strcmp(tt, "tcp") && strcmp(tt, "rdma")) { + snprintf(err_str, sizeof(err_str), + "wrong transport " + "type %s", + tt); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_INCOMPATIBLE_VALUE, + "Type=%s", tt, NULL); + *op_errstr = gf_strdup(err_str); + ret = -1; + goto out; + } + } + + ret = dict_set_str_sizen(val_dict, "volume-name", volinfo->volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Failed to set volume name"); + goto out; + } + + ret = build_nfs_graph(&graph, val_dict); + if (!ret) + ret = graph_reconf_validateopt(&graph.graph, op_errstr); + + volgen_graph_free(&graph); + +out: + if (dict_get_sizen(val_dict, "volume-name")) + dict_del_sizen(val_dict, "volume-name"); + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; } +#endif -static void -_check_globalopt (dict_t *this, char *key, data_t *value, void *ret_val) +int +validate_clientopts(glusterd_volinfo_t *volinfo, dict_t *val_dict, + char **op_errstr) { - int *ret = NULL; + volgen_graph_t graph = { + 0, + }; + int ret = -1; - ret = ret_val; - if (*ret) - return; - if (!glusterd_check_globaloption (key)) - *ret = 1; + GF_ASSERT(volinfo); + + graph.errstr = op_errstr; + + ret = build_client_graph(&graph, volinfo, val_dict); + if (!ret) + ret = graph_reconf_validateopt(&graph.graph, op_errstr); + + volgen_graph_free(&graph); + + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; } int -glusterd_validate_globalopts (glusterd_volinfo_t *volinfo, - dict_t *val_dict, char **op_errstr) +validate_brickopts(glusterd_volinfo_t *volinfo, glusterd_brickinfo_t *brickinfo, + dict_t *mod_dict, void *reconf) { - int ret = 0; + volgen_graph_t graph = { + 0, + }; + int ret = -1; + struct gd_validate_reconf_opts *brickreconf = reconf; + dict_t *val_dict = brickreconf->options; + char **op_errstr = brickreconf->op_errstr; + dict_t *full_dict = NULL; - dict_foreach (val_dict, _check_globalopt, &ret); - if (ret) { - *op_errstr = gf_strdup ( "option specified is not a global option"); - return -1; - } - ret = glusterd_validate_brickreconf (volinfo, val_dict, op_errstr); + GF_ASSERT(volinfo); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Could not Validate bricks"); - goto out; - } + graph.errstr = op_errstr; + full_dict = dict_new(); + if (!full_dict) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); + ret = -1; + goto out; + } - ret = validate_clientopts (volinfo, val_dict, op_errstr); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Could not Validate client"); - goto out; - } + if (mod_dict) + dict_copy(mod_dict, full_dict); + + if (val_dict) + dict_copy(val_dict, full_dict); - ret = validate_nfsopts (volinfo, val_dict, op_errstr); + ret = build_server_graph(&graph, volinfo, full_dict, brickinfo); + if (!ret) + ret = graph_reconf_validateopt(&graph.graph, op_errstr); + volgen_graph_free(&graph); out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + if (full_dict) + dict_unref(full_dict); + + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; } -static void -_check_localopt (dict_t *this, char *key, data_t *value, void *ret_val) +int +glusterd_validate_brickreconf(glusterd_volinfo_t *volinfo, dict_t *val_dict, + char **op_errstr) { - int *ret = NULL; + int ret = -1; + struct gd_validate_reconf_opts brickreconf = {0}; + + brickreconf.options = val_dict; + brickreconf.op_errstr = op_errstr; + ret = glusterd_volume_brick_for_each(volinfo, &brickreconf, + validate_brickopts); + return ret; +} - ret = ret_val; - if (*ret) - return; - if (!glusterd_check_localoption (key)) - *ret = 1; +static int +_check_globalopt(dict_t *this, char *key, data_t *value, void *ret_val) +{ + int *ret = NULL; + + ret = ret_val; + if (*ret) + return 0; + if (!glusterd_check_globaloption(key)) + *ret = 1; + + return 0; } int -glusterd_validate_reconfopts (glusterd_volinfo_t *volinfo, dict_t *val_dict, - char **op_errstr) +glusterd_validate_globalopts(glusterd_volinfo_t *volinfo, dict_t *val_dict, + char **op_errstr) { - int ret = 0; + int ret = 0; - dict_foreach (val_dict, _check_localopt, &ret); - if (ret) { - *op_errstr = gf_strdup ( "option specified is not a local option"); - return -1; - } - ret = glusterd_validate_brickreconf (volinfo, val_dict, op_errstr); + dict_foreach(val_dict, _check_globalopt, &ret); + if (ret) { + *op_errstr = gf_strdup("option specified is not a global option"); + return -1; + } + ret = glusterd_validate_brickreconf(volinfo, val_dict, op_errstr); + + if (ret) { + gf_msg_debug("glusterd", 0, "Could not Validate bricks"); + goto out; + } + + ret = validate_clientopts(volinfo, val_dict, op_errstr); + if (ret) { + gf_msg_debug("glusterd", 0, "Could not Validate client"); + goto out; + } +#ifdef BUILD_GNFS + ret = validate_nfsopts(volinfo, val_dict, op_errstr); + if (ret) { + gf_msg_debug("glusterd", 0, "Could not Validate nfs"); + goto out; + } +#endif + ret = validate_shdopts(volinfo, val_dict, op_errstr); + if (ret) { + gf_msg_debug("glusterd", 0, "Could not Validate self-heald"); + goto out; + } - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Could not Validate bricks"); - goto out; - } +out: + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; +} - ret = validate_clientopts (volinfo, val_dict, op_errstr); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Could not Validate client"); - goto out; - } +static int +_check_localopt(dict_t *this, char *key, data_t *value, void *ret_val) +{ + int *ret = NULL; + + ret = ret_val; + if (*ret) + return 0; + if (!glusterd_check_localoption(key)) + *ret = 1; + + return 0; +} + +int +glusterd_validate_reconfopts(glusterd_volinfo_t *volinfo, dict_t *val_dict, + char **op_errstr) +{ + int ret = 0; + + dict_foreach(val_dict, _check_localopt, &ret); + if (ret) { + *op_errstr = gf_strdup("option specified is not a local option"); + return -1; + } + ret = glusterd_validate_brickreconf(volinfo, val_dict, op_errstr); + + if (ret) { + gf_msg_debug("glusterd", 0, "Could not Validate bricks"); + goto out; + } + + ret = validate_clientopts(volinfo, val_dict, op_errstr); + if (ret) { + gf_msg_debug("glusterd", 0, "Could not Validate client"); + goto out; + } + +#ifdef BUILD_GNFS + ret = validate_nfsopts(volinfo, val_dict, op_errstr); + if (ret) { + gf_msg_debug("glusterd", 0, "Could not Validate nfs"); + goto out; + } +#endif + ret = validate_shdopts(volinfo, val_dict, op_errstr); + if (ret) { + gf_msg_debug("glusterd", 0, "Could not Validate self-heald"); + goto out; + } + +out: + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; +} + +struct volopt_map_entry * +gd_get_vmep(const char *key) +{ + char *completion = NULL; + struct volopt_map_entry *vmep = NULL; + int ret = 0; + + if (!key) + return NULL; + + COMPLETE_OPTION((char *)key, completion, ret); + for (vmep = glusterd_volopt_map; vmep->key; vmep++) { + if (strcmp(vmep->key, key) == 0) + return vmep; + } + + return NULL; +} + +uint32_t +glusterd_get_op_version_from_vmep(struct volopt_map_entry *vmep) +{ + if (vmep) + return vmep->op_version; + + return 0; +} + +gf_boolean_t +gd_is_client_option(struct volopt_map_entry *vmep) +{ + if (vmep && (vmep->flags & VOLOPT_FLAG_CLIENT_OPT)) + return _gf_true; + + return _gf_false; +} + +gf_boolean_t +gd_is_xlator_option(struct volopt_map_entry *vmep) +{ + if (vmep && (vmep->flags & VOLOPT_FLAG_XLATOR_OPT)) + return _gf_true; + + return _gf_false; +} - ret = validate_nfsopts (volinfo, val_dict, op_errstr); +static volume_option_type_t +_gd_get_option_type(struct volopt_map_entry *vmep) +{ + void *dl_handle = NULL; + volume_opt_list_t vol_opt_list = { + {0}, + }; + int ret = -1; + volume_option_t *opt = NULL; + char *xlopt_key = NULL; + volume_option_type_t opt_type = GF_OPTION_TYPE_MAX; + + if (vmep) { + CDS_INIT_LIST_HEAD(&vol_opt_list.list); + ret = xlator_volopt_dynload(vmep->voltype, &dl_handle, &vol_opt_list); + if (ret) + goto out; + if (_get_xlator_opt_key_from_vme(vmep, &xlopt_key)) + goto out; + + opt = xlator_volume_option_get_list(&vol_opt_list, xlopt_key); + _free_xlator_opt_key(xlopt_key); + + if (opt) + opt_type = opt->type; + } out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + if (dl_handle) { + dlclose(dl_handle); + dl_handle = NULL; + } + + return opt_type; +} + +gf_boolean_t +gd_is_boolean_option(struct volopt_map_entry *vmep) +{ + if (GF_OPTION_TYPE_BOOL == _gd_get_option_type(vmep)) + return _gf_true; + + return _gf_false; +} + +int +glusterd_shdsvc_generate_volfile(glusterd_volinfo_t *volinfo, char *filename, + dict_t *mode_dict) +{ + int ret = -1; + volgen_graph_t graph = { + 0, + }; + + graph.type = GF_SHD; + ret = build_shd_graph(volinfo, &graph, mode_dict); + if (!ret) + ret = volgen_write_volfile(&graph, filename); + + volgen_graph_free(&graph); + + return ret; } diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.h b/xlators/mgmt/glusterd/src/glusterd-volgen.h index 26861150147..cd4d0c7d0cc 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.h +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.h @@ -1,49 +1,338 @@ /* - Copyright (c) 2006-2010 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ #ifndef _GLUSTERD_VOLGEN_H_ #define _GLUSTERD_VOLGEN_H_ -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" +#if (HAVE_LIB_XML) +#include <libxml/encoding.h> +#include <libxml/xmlwriter.h> #endif #include "glusterd.h" +#include "glusterd-messages.h" + +/* volopt map key name definitions */ + +#define VKEY_DIAG_CNT_FOP_HITS "diagnostics.count-fop-hits" +#define VKEY_DIAG_LAT_MEASUREMENT "diagnostics.latency-measurement" +#define VKEY_FEATURES_LIMIT_USAGE "features.limit-usage" +#define VKEY_FEATURES_SOFT_LIMIT "features.soft-limit" +#define VKEY_MARKER_XTIME GEOREP ".indexing" +#define VKEY_MARKER_XTIME_FORCE GEOREP ".ignore-pid-check" +#define VKEY_CHANGELOG "changelog.changelog" +#define VKEY_FEATURES_QUOTA "features.quota" +#define VKEY_FEATURES_INODE_QUOTA "features.inode-quota" +#define VKEY_FEATURES_TRASH "features.trash" +#define VKEY_FEATURES_BITROT "features.bitrot" +#define VKEY_FEATURES_SCRUB "features.scrub" +#define VKEY_FEATURES_SELINUX "features.selinux" +#define VKEY_PARALLEL_READDIR "performance.parallel-readdir" +#define VKEY_READDIR_AHEAD "performance.readdir-ahead" +#define VKEY_RDA_CACHE_LIMIT "performance.rda-cache-limit" +#define VKEY_RDA_REQUEST_SIZE "performance.rda-request-size" +#define VKEY_CONFIG_GFPROXY "config.gfproxyd" +#define VKEY_CONFIG_GLOBAL_THREADING "config.global-threading" +#define VKEY_CONFIG_CLIENT_THREADS "config.client-threads" +#define VKEY_CONFIG_BRICK_THREADS "config.brick-threads" + +#define AUTH_ALLOW_MAP_KEY "auth.allow" +#define AUTH_REJECT_MAP_KEY "auth.reject" +#define NFS_DISABLE_MAP_KEY "nfs.disable" +#define AUTH_ALLOW_OPT_KEY "auth.addr.*.allow" +#define AUTH_REJECT_OPT_KEY "auth.addr.*.reject" +#define NFS_DISABLE_OPT_KEY "nfs.*.disable" + +#define SSL_OWN_CERT_OPT "ssl.own-cert" +#define SSL_PRIVATE_KEY_OPT "ssl.private-key" +#define SSL_CA_LIST_OPT "ssl.ca-list" +#define SSL_CRL_PATH_OPT "ssl.crl-path" +#define SSL_CERT_DEPTH_OPT "ssl.certificate-depth" +#define SSL_CIPHER_LIST_OPT "ssl.cipher-list" +#define SSL_DH_PARAM_OPT "ssl.dh-param" +#define SSL_EC_CURVE_OPT "ssl.ec-curve" + +typedef enum { + GF_CLIENT_TRUSTED, + GF_CLIENT_OTHER, + GF_CLIENT_TRUSTED_PROXY, +} glusterd_client_type_t; + +/* It indicates the type of volfile that the graph is built for */ +typedef enum { + GF_REBALANCED = 1, + GF_QUOTAD, + GF_SNAPD, + GF_SHD, +} glusterd_graph_type_t; + +struct volgen_graph { + char **errstr; + glusterfs_graph_t graph; + glusterd_graph_type_t type; +}; +typedef struct volgen_graph volgen_graph_t; + +typedef int (*glusterd_graph_builder_t)(volgen_graph_t *graph, + dict_t *mod_dict); +typedef int (*glusterd_vol_graph_builder_t)(glusterd_volinfo_t *, + char *filename, dict_t *mod_dict); + +#define COMPLETE_OPTION(key, completion, ret) \ + do { \ + if (!strchr(key, '.')) { \ + ret = option_complete(key, &completion); \ + if (ret) { \ + gf_msg("", GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, \ + "Out of memory"); \ + return _gf_false; \ + } \ + \ + if (!completion) { \ + gf_msg("", GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY, \ + "option %s does not" \ + "exist", \ + key); \ + return _gf_false; \ + } \ + } \ + \ + if (completion) \ + GF_FREE(completion); \ + } while (0); + +typedef enum gd_volopt_flags_ { + VOLOPT_FLAG_NONE, + VOLOPT_FLAG_FORCE = 0x01, /* option needs force to be reset */ + VOLOPT_FLAG_XLATOR_OPT = 0x02, /* option enables/disables xlators */ + VOLOPT_FLAG_CLIENT_OPT = 0x04, /* option affects clients */ + VOLOPT_FLAG_NEVER_RESET = 0x08, /* option which should not be reset */ +} gd_volopt_flags_t; + +typedef enum { + GF_XLATOR_POSIX = 0, + GF_XLATOR_ACL, + GF_XLATOR_LOCKS, + GF_XLATOR_LEASES, + GF_XLATOR_UPCALL, + GF_XLATOR_IOT, + GF_XLATOR_INDEX, + GF_XLATOR_MARKER, + GF_XLATOR_IO_STATS, + GF_XLATOR_BD, + GF_XLATOR_SERVER, + GF_XLATOR_NONE, +} glusterd_server_xlator_t; + +/* As of now debug xlators can be loaded only below fuse in the client + * graph via cli. More xlators can be added below when the cli option + * for adding debug xlators anywhere in the client graph has to be made + * available. + */ +typedef enum { + GF_CLNT_XLATOR_FUSE = 0, + GF_CLNT_XLATOR_NONE, +} glusterd_client_xlator_t; + +typedef enum { DOC, NO_DOC, GLOBAL_DOC, GLOBAL_NO_DOC } option_type_t; + +typedef int (*vme_option_validation)(glusterd_volinfo_t *volinfo, dict_t *dict, + char *key, char *value, char **op_errstr); + +struct volopt_map_entry { + char *key; + char *voltype; + char *option; + char *value; + option_type_t type; + uint32_t flags; + uint32_t op_version; + char *description; + vme_option_validation validate_fn; + /* If client_option is true, the option affects clients. + * this is used to calculate client-op-version of volumes + */ + // gf_boolean_t client_option; +}; + +typedef int (*brick_xlator_builder)(volgen_graph_t *graph, + glusterd_volinfo_t *volinfo, + dict_t *set_dict, + glusterd_brickinfo_t *brickinfo); + +struct volgen_brick_xlator { + /* function that builds a xlator */ + brick_xlator_builder builder; + /* debug key for a xlator that + * gets used for adding debug translators like trace, error-gen, + * delay-gen before this xlator */ + char *dbg_key; +}; + +struct nfs_opt { + const char *pattern; + const char *printf_pattern; +}; + +typedef struct volgen_brick_xlator volgen_brick_xlator_t; + +int +glusterd_snapdsvc_create_volfile(glusterd_volinfo_t *volinfo); + +int +glusterd_snapdsvc_generate_volfile(volgen_graph_t *graph, + glusterd_volinfo_t *volinfo); + +int +glusterd_create_global_volfile(glusterd_graph_builder_t builder, char *filepath, + dict_t *mod_dict); + +int +glusterd_create_rb_volfiles(glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo); + +int +glusterd_create_volfiles(glusterd_volinfo_t *volinfo); + +int +glusterd_create_volfiles_and_notify_services(glusterd_volinfo_t *volinfo); -int glusterd_create_rb_volfiles (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo); +int +glusterd_generate_client_per_brick_volfile(glusterd_volinfo_t *volinfo); -int glusterd_create_volfiles_and_notify_services (glusterd_volinfo_t *volinfo); +void +glusterd_get_nfs_filepath(char *filename); -void glusterd_get_nfs_filepath (char *filename); +void +glusterd_get_shd_filepath(char *filename); -int glusterd_create_nfs_volfile (); +int +build_shd_graph(glusterd_volinfo_t *volinfo, volgen_graph_t *graph, + dict_t *mod_dict); -int glusterd_delete_volfile (glusterd_volinfo_t *volinfo, +#ifdef BUILD_GNFS +int +build_nfs_graph(volgen_graph_t *graph, dict_t *mod_dict); +#endif +int +build_quotad_graph(volgen_graph_t *graph, dict_t *mod_dict); + +int +build_rebalance_volfile(glusterd_volinfo_t *volinfo, char *filepath, + dict_t *mod_dict); + +int +build_bitd_graph(volgen_graph_t *graph, dict_t *mod_dict); + +int +build_scrub_graph(volgen_graph_t *graph, dict_t *mod_dict); + +int +glusterd_delete_volfile(glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo); +int +glusterd_delete_snap_volfile(glusterd_volinfo_t *volinfo, + glusterd_volinfo_t *snap_volinfo, glusterd_brickinfo_t *brickinfo); -int glusterd_volinfo_get (glusterd_volinfo_t *volinfo, char *key, char **value); +int +glusterd_volinfo_get(glusterd_volinfo_t *volinfo, char *key, char **value); + +int +glusterd_volinfo_get_boolean(glusterd_volinfo_t *volinfo, char *key); + +int +glusterd_validate_globalopts(glusterd_volinfo_t *volinfo, dict_t *val_dict, + char **op_errstr); + +int +glusterd_validate_localopts(dict_t *val_dict, char **op_errstr); + +gf_boolean_t +glusterd_check_globaloption(char *key); + +gf_boolean_t +glusterd_check_voloption_flags(char *key, int32_t flags); + +gf_boolean_t +glusterd_is_valid_volfpath(char *volname, char *brick); + +int +generate_brick_volfiles(glusterd_volinfo_t *volinfo); + +int +generate_snap_brick_volfiles(glusterd_volinfo_t *volinfo, + glusterd_volinfo_t *snap_volinfo); +int +generate_client_volfiles(glusterd_volinfo_t *volinfo, + glusterd_client_type_t client_type); +int +generate_snap_client_volfiles(glusterd_volinfo_t *actual_volinfo, + glusterd_volinfo_t *snap_volinfo, + glusterd_client_type_t client_type, + gf_boolean_t vol_restore); + +int +_get_xlator_opt_key_from_vme(struct volopt_map_entry *vme, char **key); + +void +_free_xlator_opt_key(char *key); + +#if (HAVE_LIB_XML) +int +init_sethelp_xml_doc(xmlTextWriterPtr *writer, xmlBufferPtr *buf); + +int +xml_add_volset_element(xmlTextWriterPtr writer, const char *name, + const char *def_val, const char *dscrpt); +int +end_sethelp_xml_doc(xmlTextWriterPtr writer); +#endif /* HAVE_LIB_XML */ + +char * +glusterd_get_trans_type_rb(gf_transport_type ttype); + +struct volopt_map_entry * +gd_get_vmep(const char *key); + +uint32_t +glusterd_get_op_version_from_vmep(struct volopt_map_entry *vmep); + +gf_boolean_t +gd_is_client_option(struct volopt_map_entry *vmep); + +gf_boolean_t +gd_is_xlator_option(struct volopt_map_entry *vmep); + +gf_boolean_t +gd_is_boolean_option(struct volopt_map_entry *vmep); + +char * +volgen_get_shd_key(int type); + +int +glusterd_volopt_validate(glusterd_volinfo_t *volinfo, dict_t *dict, char *key, + char *value, char **op_errstr); +gf_boolean_t +gd_is_self_heal_enabled(glusterd_volinfo_t *volinfo, dict_t *dict); + +int +generate_dummy_client_volfiles(glusterd_volinfo_t *volinfo); + +int +glusterd_generate_gfproxyd_volfile(glusterd_volinfo_t *volinfo); -int glusterd_validate_globalopts (glusterd_volinfo_t *volinfo, dict_t *val_dict, char **op_errstr); +int +glusterd_build_gfproxyd_volfile(glusterd_volinfo_t *volinfo, char *filename); -int glusterd_validate_localopts (dict_t *val_dict, char **op_errstr); -gf_boolean_t glusterd_check_globaloption (char *key); +int +glusterd_shdsvc_generate_volfile(glusterd_volinfo_t *volinfo, char *filename, + dict_t *mode_dict); #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c new file mode 100644 index 00000000000..814ab14fb27 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c @@ -0,0 +1,3033 @@ +/* + Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#include <glusterfs/common-utils.h> +#include <glusterfs/syscall.h> +#include "cli1-xdr.h" +#include "xdr-generic.h" +#include "glusterd.h" +#include "glusterd-op-sm.h" +#include "glusterd-geo-rep.h" +#include "glusterd-store.h" +#include "glusterd-utils.h" +#include "glusterd-volgen.h" +#include "glusterd-messages.h" +#include <glusterfs/run.h> +#include "glusterd-snapshot-utils.h" +#include "glusterd-svc-mgmt.h" +#include "glusterd-svc-helper.h" +#include "glusterd-shd-svc.h" +#include "glusterd-snapd-svc.h" +#include "glusterd-mgmt.h" +#include "glusterd-server-quorum.h" + +#include <stdint.h> +#include <sys/socket.h> +#include <netdb.h> +#include <sys/types.h> +#include <netinet/in.h> +#include <stdlib.h> + +#define glusterd_op_start_volume_args_get(dict, volname, flags) \ + glusterd_op_stop_volume_args_get(dict, volname, flags) + +int +__glusterd_handle_create_volume(rpcsvc_request_t *req) +{ + int32_t ret = -1; + gf_cli_req cli_req = {{ + 0, + }}; + dict_t *dict = NULL; + char *bricks = NULL; + char *volname = NULL; + int brick_count = 0; + int thin_arbiter_count = 0; + void *cli_rsp = NULL; + char err_str[2048] = { + 0, + }; + gf_cli_rsp rsp = { + 0, + }; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + char *free_ptr = NULL; + char *trans_type = NULL; + char *address_family_str = NULL; + uuid_t volume_id = { + 0, + }; + uuid_t tmp_uuid = {0}; + int32_t type = 0; + char *username = NULL; + char *password = NULL; +#ifdef IPV6_DEFAULT + char *addr_family = "inet6"; +#else + char *addr_family = "inet"; +#endif + glusterd_volinfo_t *volinfo = NULL; + + GF_ASSERT(req); + + this = THIS; + GF_ASSERT(this); + + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, out); + + ret = -1; + ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + req->rpc_err = GARBAGE_ARGS; + snprintf(err_str, sizeof(err_str), + "Failed to decode request " + "received from cli"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, "%s", + err_str); + goto out; + } + + gf_msg_debug(this->name, 0, "Received create volume req"); + + if (cli_req.dict.dict_len) { + /* Unserialize the dictionary */ + dict = dict_new(); + + ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + "failed to " + "unserialize req-buffer to dictionary"); + snprintf(err_str, sizeof(err_str), + "Unable to decode " + "the command"); + goto out; + } else { + dict->extra_stdfree = cli_req.dict.dict_val; + } + } + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + + if (ret) { + snprintf(err_str, sizeof(err_str), + "Unable to get volume " + "name"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", + err_str); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (!ret) { + ret = -1; + snprintf(err_str, sizeof(err_str), "Volume %s already exists", volname); + gf_msg(this->name, GF_LOG_ERROR, EEXIST, GD_MSG_VOL_ALREADY_EXIST, "%s", + err_str); + goto out; + } + + ret = dict_get_int32n(dict, "count", SLEN("count"), &brick_count); + if (ret) { + snprintf(err_str, sizeof(err_str), + "Unable to get brick count" + " for volume %s", + volname); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", + err_str); + goto out; + } + + ret = dict_get_int32n(dict, "type", SLEN("type"), &type); + if (ret) { + snprintf(err_str, sizeof(err_str), + "Unable to get type of " + "volume %s", + volname); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", + err_str); + goto out; + } + + ret = dict_get_strn(dict, "transport", SLEN("transport"), &trans_type); + if (ret) { + snprintf(err_str, sizeof(err_str), + "Unable to get " + "transport-type of volume %s", + volname); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", + err_str); + goto out; + } + + ret = dict_get_strn(this->options, "transport.address-family", + SLEN("transport.address-family"), &address_family_str); + + if (!ret) { + ret = dict_set_dynstr_with_alloc(dict, "transport.address-family", + address_family_str); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, + "failed to set transport.address-family"); + goto out; + } + } else if (!strcmp(trans_type, "tcp")) { + /* Setting default as inet for trans_type tcp if the op-version + * is >= 3.8.0 + */ + if (conf->op_version >= GD_OP_VERSION_3_8_0) { + ret = dict_set_dynstr_with_alloc(dict, "transport.address-family", + addr_family); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, + "failed to set " + "transport.address-family " + "to %s", + addr_family); + goto out; + } + } + } + ret = dict_get_strn(dict, "bricks", SLEN("bricks"), &bricks); + if (ret) { + snprintf(err_str, sizeof(err_str), + "Unable to get bricks for " + "volume %s", + volname); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", + err_str); + goto out; + } + + ret = dict_get_int32n(dict, "thin-arbiter-count", + SLEN("thin-arbiter-count"), &thin_arbiter_count); + if (thin_arbiter_count && conf->op_version < GD_OP_VERSION_7_0) { + snprintf(err_str, sizeof(err_str), + "Cannot execute command. " + "The cluster is operating at version %d. " + "Thin-arbiter volume creation is unavailable in " + "this version", + conf->op_version); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLUSTERD_OP_FAILED, "%s", + err_str); + ret = -1; + goto out; + } + + if (!dict_getn(dict, "force", SLEN("force"))) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get 'force' flag"); + goto out; + } + + gf_uuid_generate(volume_id); + free_ptr = gf_strdup(uuid_utoa(volume_id)); + ret = dict_set_dynstrn(dict, "volume-id", SLEN("volume-id"), free_ptr); + if (ret) { + snprintf(err_str, sizeof(err_str), + "Unable to set volume " + "id of volume %s", + volname); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "%s", + err_str); + goto out; + } + free_ptr = NULL; + + /* generate internal username and password */ + + gf_uuid_generate(tmp_uuid); + username = gf_strdup(uuid_utoa(tmp_uuid)); + ret = dict_set_dynstrn(dict, "internal-username", SLEN("internal-username"), + username); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set username for " + "volume %s", + volname); + goto out; + } + + gf_uuid_generate(tmp_uuid); + password = gf_strdup(uuid_utoa(tmp_uuid)); + ret = dict_set_dynstrn(dict, "internal-password", SLEN("internal-password"), + password); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set password for " + "volume %s", + volname); + goto out; + } + + ret = glusterd_op_begin_synctask(req, GD_OP_CREATE_VOLUME, dict); + +out: + if (ret) { + rsp.op_ret = -1; + rsp.op_errno = 0; + if (err_str[0] == '\0') + snprintf(err_str, sizeof(err_str), "Operation failed"); + rsp.op_errstr = err_str; + cli_rsp = &rsp; + glusterd_to_cli(req, cli_rsp, NULL, 0, NULL, (xdrproc_t)xdr_gf_cli_rsp, + dict); + ret = 0; // Client response sent, prevent second response + } + + GF_FREE(free_ptr); + + return ret; +} + +int +glusterd_handle_create_volume(rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler(req, __glusterd_handle_create_volume); +} + +int +__glusterd_handle_cli_start_volume(rpcsvc_request_t *req) +{ + int32_t ret = -1; + gf_cli_req cli_req = {{ + 0, + }}; + char *volname = NULL; + dict_t *dict = NULL; + glusterd_op_t cli_op = GD_OP_START_VOLUME; + char errstr[2048] = { + 0, + }; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(req); + + conf = this->private; + GF_ASSERT(conf); + ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + snprintf(errstr, sizeof(errstr), + "Failed to decode message " + "received from cli"); + req->rpc_err = GARBAGE_ARGS; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, "%s", + errstr); + goto out; + } + + if (cli_req.dict.dict_len) { + /* Unserialize the dictionary */ + dict = dict_new(); + + ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + "failed to " + "unserialize req-buffer to dictionary"); + snprintf(errstr, sizeof(errstr), + "Unable to decode " + "the command"); + goto out; + } + } + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + snprintf(errstr, sizeof(errstr), "Unable to get volume name"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", + errstr); + goto out; + } + + gf_msg_debug(this->name, 0, + "Received start vol req" + " for volume %s", + volname); + + if (conf->op_version <= GD_OP_VERSION_3_7_6) { + gf_msg_debug(this->name, 0, + "The cluster is operating at " + "version less than or equal to %d. Volume start " + "falling back to syncop framework.", + GD_OP_VERSION_3_7_6); + ret = glusterd_op_begin_synctask(req, GD_OP_START_VOLUME, dict); + } else { + ret = glusterd_mgmt_v3_initiate_all_phases(req, GD_OP_START_VOLUME, + dict); + } +out: + free(cli_req.dict.dict_val); // its malloced by xdr + + if (ret) { + if (errstr[0] == '\0') + snprintf(errstr, sizeof(errstr), "Operation failed"); + ret = glusterd_op_send_cli_response(cli_op, ret, 0, req, dict, errstr); + } + + return ret; +} + +int +glusterd_handle_cli_start_volume(rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler(req, __glusterd_handle_cli_start_volume); +} + +int +__glusterd_handle_cli_stop_volume(rpcsvc_request_t *req) +{ + int32_t ret = -1; + gf_cli_req cli_req = {{ + 0, + }}; + char *dup_volname = NULL; + dict_t *dict = NULL; + glusterd_op_t cli_op = GD_OP_STOP_VOLUME; + xlator_t *this = NULL; + char err_str[64] = { + 0, + }; + glusterd_conf_t *conf = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(req); + conf = this->private; + GF_ASSERT(conf); + + ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + snprintf(err_str, sizeof(err_str), + "Failed to decode message " + "received from cli"); + req->rpc_err = GARBAGE_ARGS; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, "%s", + err_str); + goto out; + } + if (cli_req.dict.dict_len) { + /* Unserialize the dictionary */ + dict = dict_new(); + + ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + "failed to " + "unserialize req-buffer to dictionary"); + snprintf(err_str, sizeof(err_str), + "Unable to decode " + "the command"); + goto out; + } + } + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &dup_volname); + + if (ret) { + snprintf(err_str, sizeof(err_str), + "Failed to get volume " + "name"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", + err_str); + goto out; + } + + gf_msg_debug(this->name, 0, + "Received stop vol req " + "for volume %s", + dup_volname); + + if (conf->op_version < GD_OP_VERSION_4_1_0) { + gf_msg_debug(this->name, 0, + "The cluster is operating at " + "version less than %d. Volume start " + "falling back to syncop framework.", + GD_OP_VERSION_4_1_0); + ret = glusterd_op_begin_synctask(req, GD_OP_STOP_VOLUME, dict); + } else { + ret = glusterd_mgmt_v3_initiate_all_phases(req, GD_OP_STOP_VOLUME, + dict); + } + +out: + free(cli_req.dict.dict_val); // its malloced by xdr + + if (ret) { + if (err_str[0] == '\0') + snprintf(err_str, sizeof(err_str), "Operation failed"); + ret = glusterd_op_send_cli_response(cli_op, ret, 0, req, dict, err_str); + } + + return ret; +} + +int +glusterd_handle_cli_stop_volume(rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler(req, __glusterd_handle_cli_stop_volume); +} + +int +__glusterd_handle_cli_delete_volume(rpcsvc_request_t *req) +{ + int32_t ret = -1; + gf_cli_req cli_req = { + { + 0, + }, + }; + glusterd_op_t cli_op = GD_OP_DELETE_VOLUME; + dict_t *dict = NULL; + char *volname = NULL; + char err_str[64] = { + 0, + }; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + GF_ASSERT(req); + + ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + snprintf(err_str, sizeof(err_str), + "Failed to decode request " + "received from cli"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, "%s", + err_str); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (cli_req.dict.dict_len) { + /* Unserialize the dictionary */ + dict = dict_new(); + + ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + "failed to " + "unserialize req-buffer to dictionary"); + snprintf(err_str, sizeof(err_str), + "Unable to decode " + "the command"); + goto out; + } + } + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + snprintf(err_str, sizeof(err_str), + "Failed to get volume " + "name"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", + err_str); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + gf_msg_debug(this->name, 0, + "Received delete vol req" + "for volume %s", + volname); + + ret = glusterd_op_begin_synctask(req, GD_OP_DELETE_VOLUME, dict); + +out: + free(cli_req.dict.dict_val); // its malloced by xdr + + if (ret) { + if (err_str[0] == '\0') + snprintf(err_str, sizeof(err_str), "Operation failed"); + ret = glusterd_op_send_cli_response(cli_op, ret, 0, req, dict, err_str); + } + + return ret; +} +int +glusterd_handle_cli_delete_volume(rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler(req, + __glusterd_handle_cli_delete_volume); +} +static int +glusterd_handle_heal_options_enable_disable(rpcsvc_request_t *req, dict_t *dict, + glusterd_volinfo_t *volinfo) +{ + gf_xl_afr_op_t heal_op = GF_SHD_OP_INVALID; + int ret = 0; + char *key = NULL; + char *value = NULL; + xlator_t *this = THIS; + GF_ASSERT(this); + + ret = dict_get_int32n(dict, "heal-op", SLEN("heal-op"), + (int32_t *)&heal_op); + if (ret || (heal_op == GF_SHD_OP_INVALID)) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=heal-op", NULL); + ret = -1; + goto out; + } + + if ((heal_op != GF_SHD_OP_HEAL_ENABLE) && + (heal_op != GF_SHD_OP_HEAL_DISABLE) && + (heal_op != GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE) && + (heal_op != GF_SHD_OP_GRANULAR_ENTRY_HEAL_DISABLE)) { + ret = -EINVAL; + goto out; + } + + if (((heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE) || + (heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_DISABLE)) && + (volinfo->type == GF_CLUSTER_TYPE_DISPERSE)) { + ret = -1; + goto out; + } + + if ((heal_op == GF_SHD_OP_HEAL_ENABLE) || + (heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE)) { + value = "enable"; + } else if ((heal_op == GF_SHD_OP_HEAL_DISABLE) || + (heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_DISABLE)) { + value = "disable"; + } + + if ((heal_op == GF_SHD_OP_HEAL_ENABLE) || + (heal_op == GF_SHD_OP_HEAL_DISABLE)) { + key = volgen_get_shd_key(volinfo->type); + if (!key) { + ret = -1; + goto out; + } + } else { + key = "cluster.granular-entry-heal"; + ret = dict_set_int8(dict, "is-special-key", 1); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=is-special-key", NULL); + goto out; + } + } + + ret = dict_set_strn(dict, "key1", SLEN("key1"), key); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=key1", NULL); + goto out; + } + + ret = dict_set_strn(dict, "value1", SLEN("value1"), value); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=value1", NULL); + goto out; + } + + ret = dict_set_int32n(dict, "count", SLEN("count"), 1); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=count", NULL); + goto out; + } + + ret = glusterd_op_begin_synctask(req, GD_OP_SET_VOLUME, dict); + +out: + return ret; +} + +int +__glusterd_handle_cli_heal_volume(rpcsvc_request_t *req) +{ + int32_t ret = -1; + gf_cli_req cli_req = {{ + 0, + }}; + dict_t *dict = NULL; + glusterd_op_t cli_op = GD_OP_HEAL_VOLUME; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + char op_errstr[2048] = { + 0, + }; + + this = THIS; + GF_ASSERT(this); + + GF_ASSERT(req); + + ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + // failed to decode msg; + req->rpc_err = GARBAGE_ARGS; + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL); + goto out; + } + + if (cli_req.dict.dict_len) { + /* Unserialize the dictionary */ + dict = dict_new(); + + ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + "failed to " + "unserialize req-buffer to dictionary"); + snprintf(op_errstr, sizeof(op_errstr), + "Unable to decode the command"); + goto out; + } else { + dict->extra_stdfree = cli_req.dict.dict_val; + } + } + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + snprintf(op_errstr, sizeof(op_errstr), + "Unable to find " + "volume name"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", + op_errstr); + goto out; + } + + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_HEAL_VOL_REQ_RCVD, + "Received heal vol req " + "for volume %s", + volname); + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + snprintf(op_errstr, sizeof(op_errstr), "Volume %s does not exist", + volname); + goto out; + } + + ret = glusterd_handle_heal_options_enable_disable(req, dict, volinfo); + if (ret == -EINVAL) { + ret = 0; + } else { + /* + * If the return value is -ve but not -EINVAL then the command + * failed. If the return value is 0 then the synctask for the + * op has begun, so in both cases just 'goto out'. If there was + * a failure it will respond with an error, otherwise the + * synctask will take the responsibility of sending the + * response. + */ + goto out; + } + + ret = glusterd_add_bricks_hname_path_to_dict(dict, volinfo); + if (ret) + goto out; + + ret = dict_set_int32n(dict, "count", SLEN("count"), volinfo->brick_count); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=count", NULL); + goto out; + } + + ret = glusterd_op_begin_synctask(req, GD_OP_HEAL_VOLUME, dict); + +out: + if (ret) { + if (op_errstr[0] == '\0') + snprintf(op_errstr, sizeof(op_errstr), "operation failed"); + gf_msg((this ? this->name : "glusterd"), GF_LOG_ERROR, 0, + GD_MSG_GLUSTERD_OP_FAILED, "%s", op_errstr); + ret = glusterd_op_send_cli_response(cli_op, ret, 0, req, dict, + op_errstr); + } + + return ret; +} + +int +glusterd_handle_cli_heal_volume(rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler(req, __glusterd_handle_cli_heal_volume); +} + +int +__glusterd_handle_cli_statedump_volume(rpcsvc_request_t *req) +{ + int32_t ret = -1; + gf_cli_req cli_req = {{ + 0, + }}; + char *volname = NULL; + char *options = NULL; + dict_t *dict = NULL; + int32_t option_cnt = 0; + glusterd_op_t cli_op = GD_OP_STATEDUMP_VOLUME; + char err_str[128] = { + 0, + }; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + GF_ASSERT(req); + + ret = -1; + ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + req->rpc_err = GARBAGE_ARGS; + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL); + goto out; + } + if (cli_req.dict.dict_len) { + /* Unserialize the dictionary */ + dict = dict_new(); + + ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, + "failed to " + "unserialize req-buffer to dictionary"); + snprintf(err_str, sizeof(err_str), + "Unable to " + "decode the command"); + goto out; + } + } + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + snprintf(err_str, sizeof(err_str), "Unable to get the volume name"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", + err_str); + goto out; + } + + ret = dict_get_strn(dict, "options", SLEN("options"), &options); + if (ret) { + snprintf(err_str, sizeof(err_str), "Unable to get options"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", + err_str); + goto out; + } + + ret = dict_get_int32n(dict, "option_cnt", SLEN("option_cnt"), &option_cnt); + if (ret) { + snprintf(err_str, sizeof(err_str), + "Unable to get option " + "count"); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", + err_str); + goto out; + } + + if (priv->op_version == GD_OP_VERSION_MIN && strstr(options, "quotad")) { + snprintf(err_str, sizeof(err_str), + "The cluster is operating " + "at op-version 1. Taking quotad's statedump is " + "disallowed in this state"); + ret = -1; + goto out; + } + + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_STATEDUMP_VOL_REQ_RCVD, + "Received statedump request for " + "volume %s with options %s", + volname, options); + + ret = glusterd_op_begin_synctask(req, GD_OP_STATEDUMP_VOLUME, dict); + +out: + if (ret) { + if (err_str[0] == '\0') + snprintf(err_str, sizeof(err_str), "Operation failed"); + ret = glusterd_op_send_cli_response(cli_op, ret, 0, req, dict, err_str); + } + free(cli_req.dict.dict_val); + + return ret; +} + +int +glusterd_handle_cli_statedump_volume(rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler(req, + __glusterd_handle_cli_statedump_volume); +} + +/* op-sm */ +int +glusterd_op_stage_create_volume(dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + int ret = 0; + char *volname = NULL; + char *bricks = NULL; + char *brick_list = NULL; + char *free_ptr = NULL; + char key[64] = ""; + glusterd_brickinfo_t *brick_info = NULL; + int32_t brick_count = 0; + int32_t local_brick_count = 0; + int32_t i = 0; + int32_t type = 0; + int32_t replica_count = 0; + int32_t disperse_count = 0; + char *brick = NULL; + char *tmpptr = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + char msg[2048] = {0}; + uuid_t volume_uuid; + char *volume_uuid_str; + gf_boolean_t is_force = _gf_false; + glusterd_volinfo_t *volinfo = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + GF_ASSERT(rsp_dict); + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get volume name"); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (!ret) { + snprintf(msg, sizeof(msg), "Volume %s already exists", volname); + ret = -1; + goto out; + } + + ret = dict_get_int32n(dict, "count", SLEN("count"), &brick_count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get brick count " + "for volume %s", + volname); + goto out; + } + + ret = dict_get_strn(dict, "volume-id", SLEN("volume-id"), &volume_uuid_str); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get volume id of " + "volume %s", + volname); + goto out; + } + + ret = gf_uuid_parse(volume_uuid_str, volume_uuid); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UUID_PARSE_FAIL, + "Unable to parse volume id of" + " volume %s", + volname); + goto out; + } + + ret = dict_get_strn(dict, "bricks", SLEN("bricks"), &bricks); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get bricks for " + "volume %s", + volname); + goto out; + } + + is_force = dict_get_str_boolean(dict, "force", _gf_false); + + if (bricks) { + brick_list = gf_strdup(bricks); + if (!brick_list) { + ret = -1; + goto out; + } else { + free_ptr = brick_list; + } + } + + /*Check brick order if the volume type is replicate or disperse. If + * force at the end of command not given then check brick order. + */ + if (is_origin_glusterd(dict)) { + ret = dict_get_int32n(dict, "type", SLEN("type"), &type); + if (ret) { + snprintf(msg, sizeof(msg), + "Unable to get type of " + "volume %s", + volname); + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED, "%s", + msg); + goto out; + } + + if (!is_force) { + if (type == GF_CLUSTER_TYPE_REPLICATE) { + ret = dict_get_int32n(dict, "replica-count", + SLEN("replica-count"), &replica_count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Bricks check : Could" + " not retrieve replica count"); + goto out; + } + gf_msg_debug(this->name, 0, + "Replicate cluster type " + "found. Checking brick order."); + ret = glusterd_check_brick_order(dict, msg, type, &volname, + &bricks, &brick_count, + replica_count); + } else if (type == GF_CLUSTER_TYPE_DISPERSE) { + ret = dict_get_int32n(dict, "disperse-count", + SLEN("disperse-count"), &disperse_count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Bricks check : Could" + " not retrieve disperse count"); + goto out; + } + gf_msg_debug(this->name, 0, + "Disperse cluster type" + " found. Checking brick order."); + ret = glusterd_check_brick_order(dict, msg, type, &volname, + &bricks, &brick_count, + disperse_count); + } + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BAD_BRKORDER, + "Not creating the volume because of " + "bad brick order. %s", + msg); + *op_errstr = gf_strdup(msg); + goto out; + } + } + } + + while (i < brick_count) { + i++; + brick = strtok_r(brick_list, " \n", &tmpptr); + brick_list = tmpptr; + + if (!glusterd_store_is_valid_brickpath(volname, brick)) { + snprintf(msg, sizeof(msg), + "brick path %s is too " + "long.", + brick); + ret = -1; + goto out; + } + + if (!glusterd_is_valid_volfpath(volname, brick)) { + snprintf(msg, sizeof(msg), + "Volume file path for " + "volume %s and brick path %s is too long.", + volname, brick); + ret = -1; + goto out; + } + + ret = glusterd_brickinfo_new_from_brick(brick, &brick_info, _gf_true, + op_errstr); + if (ret) + goto out; + + ret = glusterd_new_brick_validate(brick, brick_info, msg, sizeof(msg), + NULL); + if (ret) + goto out; + + ret = glusterd_resolve_brick(brick_info); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RESOLVE_BRICK_FAIL, + FMTSTR_RESOLVE_BRICK, brick_info->hostname, + brick_info->path); + goto out; + } + + if (!gf_uuid_compare(brick_info->uuid, MY_UUID)) { + ret = glusterd_validate_and_create_brickpath( + brick_info, volume_uuid, volname, op_errstr, is_force, + _gf_false); + if (ret) + goto out; + + /* A bricks mount dir is required only by snapshots which were + * introduced in gluster-3.6.0 + */ + if (priv->op_version >= GD_OP_VERSION_3_6_0) { + ret = glusterd_get_brick_mount_dir(brick_info->path, + brick_info->hostname, + brick_info->mount_dir); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_BRICK_MOUNTDIR_GET_FAIL, + "Failed to get brick mount_dir"); + goto out; + } + + snprintf(key, sizeof(key), "brick%d.mount_dir", i); + ret = dict_set_dynstr_with_alloc(rsp_dict, key, + brick_info->mount_dir); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set %s", key); + goto out; + } + } + local_brick_count = i; + + brick_list = tmpptr; + } + glusterd_brickinfo_delete(brick_info); + brick_info = NULL; + } + + ret = dict_set_int32n(rsp_dict, "brick_count", SLEN("brick_count"), + local_brick_count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set local_brick_count"); + goto out; + } +out: + GF_FREE(free_ptr); + if (brick_info) + glusterd_brickinfo_delete(brick_info); + + if (msg[0] != '\0') { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OP_STAGE_CREATE_VOL_FAIL, + "%s", msg); + *op_errstr = gf_strdup(msg); + } + gf_msg_debug(this->name, 0, "Returning %d", ret); + + return ret; +} + +int +glusterd_op_stop_volume_args_get(dict_t *dict, char **volname, int *flags) +{ + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + if (!dict) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ARGUMENT, NULL); + goto out; + } + + if (!volname) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ARGUMENT, NULL); + goto out; + } + + if (!flags) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ARGUMENT, NULL); + goto out; + } + + ret = dict_get_strn(dict, "volname", SLEN("volname"), volname); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Key=volname", NULL); + goto out; + } + + ret = dict_get_int32n(dict, "flags", SLEN("flags"), flags); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Key=flags", NULL); + goto out; + } +out: + return ret; +} + +int +glusterd_op_statedump_volume_args_get(dict_t *dict, char **volname, + char **options, int *option_cnt) +{ + int ret = -1; + + if (!dict || !volname || !options || !option_cnt) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); + goto out; + } + + ret = dict_get_strn(dict, "volname", SLEN("volname"), volname); + if (ret) { + gf_smsg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Key=volname", NULL); + goto out; + } + + ret = dict_get_strn(dict, "options", SLEN("options"), options); + if (ret) { + gf_smsg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Key=options", NULL); + goto out; + } + + ret = dict_get_int32n(dict, "option_cnt", SLEN("option_cnt"), option_cnt); + if (ret) { + gf_smsg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Key=option_cnt", NULL); + goto out; + } + +out: + return ret; +} + +int +glusterd_op_stage_start_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict) +{ + int ret = 0; + char *volname = NULL; + char key[64] = ""; + int flags = 0; + int32_t brick_count = 0; + int32_t local_brick_count = 0; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + char msg[2048] = { + 0, + }; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + uuid_t volume_id = { + 0, + }; + char volid[50] = { + 0, + }; + char xattr_volid[50] = { + 0, + }; + int32_t len = 0; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + GF_ASSERT(rsp_dict); + + ret = glusterd_op_start_volume_args_get(dict, &volname, &flags); + if (ret) + goto out; + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + snprintf(msg, sizeof(msg), FMTSTR_CHECK_VOL_EXISTS, volname); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, + FMTSTR_CHECK_VOL_EXISTS, volname); + goto out; + } + + /* This is an incremental approach to have all the volinfo objects ref + * count. The first attempt is made in volume start transaction to + * ensure it doesn't race with import volume where stale volume is + * deleted. There are multiple instances of GlusterD crashing in + * bug-948686.t because of this. Once this approach is full proof, all + * other volinfo objects will be refcounted. + */ + glusterd_volinfo_ref(volinfo); + + if (priv->op_version > GD_OP_VERSION_3_7_5) { + ret = glusterd_validate_quorum(this, GD_OP_START_VOLUME, dict, + op_errstr); + if (ret) { + gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_SERVER_QUORUM_NOT_MET, + "Server quorum not met. Rejecting operation."); + goto out; + } + } + + ret = glusterd_validate_volume_id(dict, volinfo); + if (ret) + goto out; + + if (!(flags & GF_CLI_FLAG_OP_FORCE)) { + if (glusterd_is_volume_started(volinfo)) { + snprintf(msg, sizeof(msg), + "Volume %s already " + "started", + volname); + ret = -1; + goto out; + } + } + + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + brick_count++; + ret = glusterd_resolve_brick(brickinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RESOLVE_BRICK_FAIL, + FMTSTR_RESOLVE_BRICK, brickinfo->hostname, brickinfo->path); + goto out; + } + + if ((gf_uuid_compare(brickinfo->uuid, MY_UUID)) || + (brickinfo->snap_status == -1)) + continue; + + ret = gf_lstat_dir(brickinfo->path, NULL); + if (ret && (flags & GF_CLI_FLAG_OP_FORCE)) { + continue; + } else if (ret) { + len = snprintf(msg, sizeof(msg), + "Failed to find " + "brick directory %s for volume %s. " + "Reason : %s", + brickinfo->path, volname, strerror(errno)); + if (len < 0) { + strcpy(msg, "<error>"); + } + goto out; + } + ret = sys_lgetxattr(brickinfo->path, GF_XATTR_VOL_ID_KEY, volume_id, + 16); + if (ret < 0 && (!(flags & GF_CLI_FLAG_OP_FORCE))) { + len = snprintf(msg, sizeof(msg), + "Failed to get " + "extended attribute %s for brick dir " + "%s. Reason : %s", + GF_XATTR_VOL_ID_KEY, brickinfo->path, + strerror(errno)); + if (len < 0) { + strcpy(msg, "<error>"); + } + ret = -1; + goto out; + } else if (ret < 0) { + ret = sys_lsetxattr(brickinfo->path, GF_XATTR_VOL_ID_KEY, + volinfo->volume_id, 16, XATTR_CREATE); + if (ret == -1) { + len = snprintf(msg, sizeof(msg), + "Failed to " + "set extended attribute %s on " + "%s. Reason: %s", + GF_XATTR_VOL_ID_KEY, brickinfo->path, + strerror(errno)); + if (len < 0) { + strcpy(msg, "<error>"); + } + goto out; + } else { + continue; + } + } + if (gf_uuid_compare(volinfo->volume_id, volume_id)) { + len = snprintf(msg, sizeof(msg), + "Volume id " + "mismatch for brick %s:%s. Expected " + "volume id %s, volume id %s found", + brickinfo->hostname, brickinfo->path, + uuid_utoa_r(volinfo->volume_id, volid), + uuid_utoa_r(volume_id, xattr_volid)); + if (len < 0) { + strcpy(msg, "<error>"); + } + ret = -1; + goto out; + } + + /* A bricks mount dir is required only by snapshots which were + * introduced in gluster-3.6.0 + */ + if (priv->op_version >= GD_OP_VERSION_3_6_0) { + if (strlen(brickinfo->mount_dir) < 1) { + ret = glusterd_get_brick_mount_dir( + brickinfo->path, brickinfo->hostname, brickinfo->mount_dir); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_BRICK_MOUNTDIR_GET_FAIL, + "Failed to get brick mount_dir"); + goto out; + } + + snprintf(key, sizeof(key), "brick%d.mount_dir", brick_count); + ret = dict_set_dynstr_with_alloc(rsp_dict, key, + brickinfo->mount_dir); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set %s", key); + goto out; + } + local_brick_count = brick_count; + } + } + } + + ret = dict_set_int32n(rsp_dict, "brick_count", SLEN("brick_count"), + local_brick_count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set local_brick_count"); + goto out; + } + + ret = 0; +out: + if (volinfo) + glusterd_volinfo_unref(volinfo); + + if (ret && (msg[0] != '\0')) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OP_STAGE_START_VOL_FAIL, + "%s", msg); + *op_errstr = gf_strdup(msg); + } + return ret; +} + +int +glusterd_op_stage_stop_volume(dict_t *dict, char **op_errstr) +{ + int ret = -1; + char *volname = NULL; + int flags = 0; + glusterd_volinfo_t *volinfo = NULL; + char msg[2048] = {0}; + xlator_t *this = NULL; + gsync_status_param_t param = { + 0, + }; + + this = THIS; + GF_ASSERT(this); + + ret = glusterd_op_stop_volume_args_get(dict, &volname, &flags); + if (ret) { + snprintf(msg, sizeof(msg), "Failed to get details of volume %s", + volname); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_STOP_ARGS_GET_FAILED, + "Volume name=%s", volname, NULL); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + snprintf(msg, sizeof(msg), FMTSTR_CHECK_VOL_EXISTS, volname); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, "%s", msg); + goto out; + } + + ret = glusterd_validate_volume_id(dict, volinfo); + if (ret) + goto out; + + /* If 'force' flag is given, no check is required */ + if (flags & GF_CLI_FLAG_OP_FORCE) + goto out; + + if (_gf_false == glusterd_is_volume_started(volinfo)) { + snprintf(msg, sizeof(msg), + "Volume %s " + "is not in the started state", + volname); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_STARTED, "%s", msg); + ret = -1; + goto out; + } + + /* If geo-rep is configured, for this volume, it should be stopped. */ + param.volinfo = volinfo; + ret = glusterd_check_geo_rep_running(¶m, op_errstr); + if (ret || param.is_active) { + ret = -1; + goto out; + } + + ret = glusterd_check_ganesha_export(volinfo); + if (ret) { + ret = ganesha_manage_export(dict, "off", _gf_false, op_errstr); + if (ret) { + gf_msg(THIS->name, GF_LOG_WARNING, 0, + GD_MSG_NFS_GNS_UNEXPRT_VOL_FAIL, + "Could not " + "unexport volume via NFS-Ganesha"); + ret = 0; + } + } + + if (glusterd_is_defrag_on(volinfo)) { + snprintf(msg, sizeof(msg), + "rebalance session is " + "in progress for the volume '%s'", + volname); + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_OIP, "%s", msg); + ret = -1; + goto out; + } + +out: + if (msg[0] != 0) + *op_errstr = gf_strdup(msg); + gf_msg_debug(this->name, 0, "Returning %d", ret); + + return ret; +} + +int +glusterd_op_stage_delete_volume(dict_t *dict, char **op_errstr) +{ + int ret = 0; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + char msg[2048] = {0}; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get volume name"); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + snprintf(msg, sizeof(msg), FMTSTR_CHECK_VOL_EXISTS, volname); + goto out; + } + + ret = glusterd_validate_volume_id(dict, volinfo); + if (ret) + goto out; + + if (glusterd_is_volume_started(volinfo)) { + snprintf(msg, sizeof(msg), + "Volume %s has been started." + "Volume needs to be stopped before deletion.", + volname); + ret = -1; + goto out; + } + + if (volinfo->snap_count > 0 || !cds_list_empty(&volinfo->snap_volumes)) { + snprintf(msg, sizeof(msg), + "Cannot delete Volume %s ," + "as it has %" PRIu64 + " snapshots. " + "To delete the volume, " + "first delete all the snapshots under it.", + volname, volinfo->snap_count); + ret = -1; + goto out; + } + + if (!glusterd_are_all_peers_up()) { + ret = -1; + snprintf(msg, sizeof(msg), "Some of the peers are down"); + goto out; + } + volinfo->stage_deleted = _gf_true; + gf_log(this->name, GF_LOG_INFO, + "Setting stage deleted flag to true for " + "volume %s", + volinfo->volname); + ret = 0; + +out: + if (msg[0] != '\0') { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OP_STAGE_DELETE_VOL_FAIL, + "%s", msg); + *op_errstr = gf_strdup(msg); + } + gf_msg_debug(this->name, 0, "Returning %d", ret); + + return ret; +} + +static int +glusterd_handle_heal_cmd(xlator_t *this, glusterd_volinfo_t *volinfo, + dict_t *dict, char **op_errstr) +{ + glusterd_svc_t *svc = NULL; + gf_xl_afr_op_t heal_op = GF_SHD_OP_INVALID; + int ret = 0; + char msg[2408] = { + 0, + }; + char *offline_msg = + "Self-heal daemon is not running. " + "Check self-heal daemon log file."; + + ret = dict_get_int32n(dict, "heal-op", SLEN("heal-op"), + (int32_t *)&heal_op); + if (ret) { + ret = -1; + *op_errstr = gf_strdup("Heal operation not specified"); + goto out; + } + + svc = &(volinfo->shd.svc); + switch (heal_op) { + case GF_SHD_OP_INVALID: + case GF_SHD_OP_HEAL_ENABLE: /* This op should be handled in volume-set*/ + case GF_SHD_OP_HEAL_DISABLE: /* This op should be handled in + volume-set*/ + case GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE: /* This op should be handled + in volume-set */ + case GF_SHD_OP_GRANULAR_ENTRY_HEAL_DISABLE: /* This op should be handled + in volume-set */ + case GF_SHD_OP_HEAL_SUMMARY: /*glfsheal cmd*/ + case GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE: /*glfsheal cmd*/ + case GF_SHD_OP_SBRAIN_HEAL_FROM_LATEST_MTIME: /*glfsheal cmd*/ + case GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK: /*glfsheal cmd*/ + ret = -1; + *op_errstr = gf_strdup("Invalid heal-op"); + goto out; + + case GF_SHD_OP_HEAL_INDEX: + case GF_SHD_OP_HEAL_FULL: + if (!glusterd_is_shd_compatible_volume(volinfo)) { + ret = -1; + snprintf(msg, sizeof(msg), + "Volume %s is not of type " + "replicate or disperse", + volinfo->volname); + *op_errstr = gf_strdup(msg); + goto out; + } + + if (!svc->online) { + ret = -1; + *op_errstr = gf_strdup(offline_msg); + goto out; + } + break; + case GF_SHD_OP_INDEX_SUMMARY: + case GF_SHD_OP_SPLIT_BRAIN_FILES: + case GF_SHD_OP_STATISTICS: + case GF_SHD_OP_STATISTICS_HEAL_COUNT: + case GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA: + if (!glusterd_is_volume_replicate(volinfo)) { + ret = -1; + snprintf(msg, sizeof(msg), + "This command is supported " + "for only volume of replicated " + "type. Volume %s is not of type " + "replicate", + volinfo->volname); + *op_errstr = gf_strdup(msg); + goto out; + } + + if (!svc->online) { + ret = -1; + *op_errstr = gf_strdup(offline_msg); + goto out; + } + break; + case GF_SHD_OP_HEALED_FILES: + case GF_SHD_OP_HEAL_FAILED_FILES: + ret = -1; + snprintf(msg, sizeof(msg), + "Command not supported. " + "Please use \"gluster volume heal %s info\" " + "and logs to find the heal information.", + volinfo->volname); + *op_errstr = gf_strdup(msg); + goto out; + } +out: + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_HANDLE_HEAL_CMD_FAIL, "%s", + *op_errstr); + return ret; +} + +int +glusterd_op_stage_heal_volume(dict_t *dict, char **op_errstr) +{ + int ret = 0; + char *volname = NULL; + gf_boolean_t enabled = _gf_false; + glusterd_volinfo_t *volinfo = NULL; + char msg[2048]; + glusterd_conf_t *priv = NULL; + dict_t *opt_dict = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + if (!priv) { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PRIV_NULL, "priv is NULL"); + goto out; + } + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get volume name"); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + ret = -1; + snprintf(msg, sizeof(msg), "Volume %s does not exist", volname); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, "%s", msg); + *op_errstr = gf_strdup(msg); + goto out; + } + + ret = glusterd_validate_volume_id(dict, volinfo); + if (ret) + goto out; + + if (!glusterd_is_volume_started(volinfo)) { + ret = -1; + snprintf(msg, sizeof(msg), "Volume %s is not started.", volname); + gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOL_NOT_STARTED, + "Volume=%s", volname, NULL); + *op_errstr = gf_strdup(msg); + goto out; + } + + opt_dict = volinfo->dict; + if (!opt_dict) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, NULL); + ret = 0; + goto out; + } + enabled = gd_is_self_heal_enabled(volinfo, opt_dict); + if (!enabled) { + ret = -1; + snprintf(msg, sizeof(msg), + "Self-heal-daemon is " + "disabled. Heal will not be triggered on volume %s", + volname); + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SELF_HEALD_DISABLED, "%s", + msg); + *op_errstr = gf_strdup(msg); + goto out; + } + + ret = glusterd_handle_heal_cmd(this, volinfo, dict, op_errstr); + if (ret) + goto out; + + ret = 0; +out: + gf_msg_debug("glusterd", 0, "Returning %d", ret); + + return ret; +} + +int +glusterd_op_stage_statedump_volume(dict_t *dict, char **op_errstr) +{ + int ret = -1; + char *volname = NULL; + char *options = NULL; + int option_cnt = 0; + gf_boolean_t is_running = _gf_false; + glusterd_volinfo_t *volinfo = NULL; + char msg[2408] = { + 0, + }; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + ret = glusterd_op_statedump_volume_args_get(dict, &volname, &options, + &option_cnt); + if (ret) + goto out; + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + snprintf(msg, sizeof(msg), FMTSTR_CHECK_VOL_EXISTS, volname); + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_VOLINFO_GET_FAIL, + "Volume=%s", volname, NULL); + goto out; + } + + ret = glusterd_validate_volume_id(dict, volinfo); + if (ret) + goto out; + + is_running = glusterd_is_volume_started(volinfo); + if (!is_running) { + snprintf(msg, sizeof(msg), + "Volume %s is not in the started" + " state", + volname); + ret = -1; + goto out; + } + + if (priv->op_version == GD_OP_VERSION_MIN && strstr(options, "quotad")) { + snprintf(msg, sizeof(msg), + "The cluster is operating " + "at op-version 1. Taking quotad's statedump is " + "disallowed in this state"); + ret = -1; + goto out; + } + if ((strstr(options, "quotad")) && + (!glusterd_is_volume_quota_enabled(volinfo))) { + snprintf(msg, sizeof(msg), + "Quota is not enabled on " + "volume %s", + volname); + ret = -1; + goto out; + } +out: + if (ret && msg[0] != '\0') + *op_errstr = gf_strdup(msg); + gf_msg_debug(this->name, 0, "Returning %d", ret); + return ret; +} + +int +glusterd_op_stage_clearlocks_volume(dict_t *dict, char **op_errstr) +{ + int ret = -1; + char *volname = NULL; + char *path = NULL; + char *type = NULL; + char *kind = NULL; + glusterd_volinfo_t *volinfo = NULL; + char msg[2048] = { + 0, + }; + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + snprintf(msg, sizeof(msg), "Failed to get volume name"); + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", msg); + *op_errstr = gf_strdup(msg); + goto out; + } + + ret = dict_get_strn(dict, "path", SLEN("path"), &path); + if (ret) { + snprintf(msg, sizeof(msg), "Failed to get path"); + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", msg); + *op_errstr = gf_strdup(msg); + goto out; + } + + ret = dict_get_strn(dict, "kind", SLEN("kind"), &kind); + if (ret) { + snprintf(msg, sizeof(msg), "Failed to get kind"); + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", msg); + *op_errstr = gf_strdup(msg); + goto out; + } + + ret = dict_get_strn(dict, "type", SLEN("type"), &type); + if (ret) { + snprintf(msg, sizeof(msg), "Failed to get type"); + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", msg); + *op_errstr = gf_strdup(msg); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + snprintf(msg, sizeof(msg), "Volume %s does not exist", volname); + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, "%s", msg); + *op_errstr = gf_strdup(msg); + goto out; + } + + ret = glusterd_validate_volume_id(dict, volinfo); + if (ret) + goto out; + + if (!glusterd_is_volume_started(volinfo)) { + snprintf(msg, sizeof(msg), "Volume %s is not started", volname); + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_STARTED, "%s", msg); + *op_errstr = gf_strdup(msg); + goto out; + } + + ret = 0; +out: + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; +} + +int +glusterd_op_create_volume(dict_t *dict, char **op_errstr) +{ + int ret = 0; + char *volname = NULL; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *volinfo = NULL; + gf_boolean_t vol_added = _gf_false; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_brickinfo_t *ta_brickinfo = NULL; + xlator_t *this = NULL; + char *brick = NULL; + char *ta_brick = NULL; + int32_t count = 0; + int32_t i = 1; + char *bricks = NULL; + char *ta_bricks = NULL; + char *brick_list = NULL; + char *ta_brick_list = NULL; + char *free_ptr = NULL; + char *ta_free_ptr = NULL; + char *saveptr = NULL; + char *ta_saveptr = NULL; + char *trans_type = NULL; + char *str = NULL; + char *username = NULL; + char *password = NULL; + int brickid = 0; + char msg[1024] __attribute__((unused)) = { + 0, + }; + char *brick_mount_dir = NULL; + char key[64] = ""; + char *address_family_str = NULL; + struct statvfs brickstat = { + 0, + }; + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + GF_ASSERT(priv); + + ret = glusterd_volinfo_new(&volinfo); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "Unable to allocate memory for volinfo"); + goto out; + } + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get volume name"); + goto out; + } + + if (snprintf(volinfo->volname, sizeof(volinfo->volname), "%s", volname) >= + sizeof(volinfo->volname)) { + ret = -1; + goto out; + } + + GF_ASSERT(volinfo->volname); + + ret = dict_get_int32n(dict, "type", SLEN("type"), &volinfo->type); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get type of volume" + " %s", + volname); + goto out; + } + + ret = dict_get_int32n(dict, "count", SLEN("count"), &volinfo->brick_count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get brick count of" + " volume %s", + volname); + goto out; + } + + ret = dict_get_int32n(dict, "port", SLEN("port"), &volinfo->port); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get port"); + goto out; + } + + ret = dict_get_strn(dict, "bricks", SLEN("bricks"), &bricks); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get bricks for " + "volume %s", + volname); + goto out; + } + + /* replica-count 1 means, no replication, file is in one brick only */ + volinfo->replica_count = 1; + /* stripe-count 1 means, no striping, file is present as a whole */ + volinfo->stripe_count = 1; + + if (GF_CLUSTER_TYPE_REPLICATE == volinfo->type) { + /* performance.client-io-threads is turned on to default, + * however this has adverse effects on replicate volumes due to + * replication design issues, till that get addressed + * performance.client-io-threads option is turned off for all + * replicate volumes + */ + if (priv->op_version >= GD_OP_VERSION_3_12_2) { + ret = dict_set_nstrn(volinfo->dict, "performance.client-io-threads", + SLEN("performance.client-io-threads"), "off", + SLEN("off")); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set " + "performance.client-io-threads to off"); + goto out; + } + } + ret = dict_get_int32n(dict, "replica-count", SLEN("replica-count"), + &volinfo->replica_count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get " + "replica count for volume %s", + volname); + goto out; + } + + /* coverity[unused_value] arbiter count is optional */ + ret = dict_get_int32n(dict, "arbiter-count", SLEN("arbiter-count"), + &volinfo->arbiter_count); + ret = dict_get_int32n(dict, "thin-arbiter-count", + SLEN("thin-arbiter-count"), + &volinfo->thin_arbiter_count); + if (volinfo->thin_arbiter_count) { + ret = dict_get_strn(dict, "ta-brick", SLEN("ta-brick"), &ta_bricks); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get thin arbiter brick for " + "volume %s", + volname); + goto out; + } + } + + } else if (GF_CLUSTER_TYPE_DISPERSE == volinfo->type) { + ret = dict_get_int32n(dict, "disperse-count", SLEN("disperse-count"), + &volinfo->disperse_count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get " + "disperse count for volume %s", + volname); + goto out; + } + ret = dict_get_int32n(dict, "redundancy-count", + SLEN("redundancy-count"), + &volinfo->redundancy_count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get " + "redundancy count for volume %s", + volname); + goto out; + } + if (priv->op_version < GD_OP_VERSION_3_6_0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UNSUPPORTED_VERSION, + "Disperse volume " + "needs op-version 3.6.0 or higher"); + ret = -1; + goto out; + } + } + + /* dist-leaf-count is the count of brick nodes for a given + subvolume of distribute */ + volinfo->dist_leaf_count = glusterd_get_dist_leaf_count(volinfo); + + /* subvol_count is the count of number of subvolumes present + for a given distribute volume */ + volinfo->subvol_count = (volinfo->brick_count / volinfo->dist_leaf_count); + + /* Keep sub-count same as earlier, for the sake of backward + compatibility */ + if (volinfo->dist_leaf_count > 1) + volinfo->sub_count = volinfo->dist_leaf_count; + + ret = dict_get_strn(dict, "transport", SLEN("transport"), &trans_type); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get transport type of volume %s", volname); + goto out; + } + + ret = dict_get_strn(dict, "volume-id", SLEN("volume-id"), &str); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get volume-id of volume %s", volname); + goto out; + } + ret = gf_uuid_parse(str, volinfo->volume_id); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UUID_PARSE_FAIL, + "unable to parse uuid %s of volume %s", str, volname); + goto out; + } + + ret = dict_get_strn(dict, "internal-username", SLEN("internal-username"), + &username); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "unable to get internal username of volume %s", volname); + goto out; + } + glusterd_auth_set_username(volinfo, username); + + ret = dict_get_strn(dict, "internal-password", SLEN("internal-password"), + &password); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "unable to get internal password of volume %s", volname); + goto out; + } + glusterd_auth_set_password(volinfo, password); + + if (strcasecmp(trans_type, "rdma") == 0) { + volinfo->transport_type = GF_TRANSPORT_RDMA; + } else if (strcasecmp(trans_type, "tcp") == 0) { + volinfo->transport_type = GF_TRANSPORT_TCP; + } else { + volinfo->transport_type = GF_TRANSPORT_BOTH_TCP_RDMA; + } + + if (ta_bricks) { + ta_brick_list = gf_strdup(ta_bricks); + ta_free_ptr = ta_brick_list; + } + + if (volinfo->thin_arbiter_count) { + ta_brick = strtok_r(ta_brick_list + 1, " \n", &ta_saveptr); + + count = 1; + brickid = volinfo->replica_count; + /* assign brickid to ta_bricks + * Following loop runs for number of subvols times. Although + * there is only one ta-brick for a volume but the volume fuse volfile + * requires an entry of ta-brick for each subvolume. Also, the ta-brick + * id needs to be adjusted according to the subvol count. + * For eg- For first subvolume ta-brick id is volname-ta-2, for second + * subvol ta-brick id is volname-ta-5. + */ + while (count <= volinfo->subvol_count) { + ret = glusterd_brickinfo_new_from_brick(ta_brick, &ta_brickinfo, + _gf_false, op_errstr); + if (ret) + goto out; + + GLUSTERD_ASSIGN_BRICKID_TO_TA_BRICKINFO(ta_brickinfo, volinfo, + brickid); + cds_list_add_tail(&ta_brickinfo->brick_list, &volinfo->ta_bricks); + count++; + brickid += volinfo->replica_count + 1; + } + } + + if (bricks) { + brick_list = gf_strdup(bricks); + free_ptr = brick_list; + } + + count = volinfo->brick_count; + + if (count) + brick = strtok_r(brick_list + 1, " \n", &saveptr); + + brickid = glusterd_get_next_available_brickid(volinfo); + if (brickid < 0) + goto out; + while (i <= count) { + ret = glusterd_brickinfo_new_from_brick(brick, &brickinfo, _gf_true, + op_errstr); + if (ret) + goto out; + if (volinfo->thin_arbiter_count == 1 && + (brickid + 1) % (volinfo->replica_count + 1) == 0) { + brickid = brickid + 1; + } + GLUSTERD_ASSIGN_BRICKID_TO_BRICKINFO(brickinfo, volinfo, brickid++); + + ret = glusterd_resolve_brick(brickinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RESOLVE_BRICK_FAIL, + FMTSTR_RESOLVE_BRICK, brickinfo->hostname, brickinfo->path); + goto out; + } + + /* A bricks mount dir is required only by snapshots which were + * introduced in gluster-3.6.0 + */ + if (priv->op_version >= GD_OP_VERSION_3_6_0) { + brick_mount_dir = NULL; + ret = snprintf(key, sizeof(key), "brick%d.mount_dir", i); + ret = dict_get_strn(dict, key, ret, &brick_mount_dir); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "%s not present", key); + goto out; + } + snprintf(brickinfo->mount_dir, sizeof(brickinfo->mount_dir), "%s", + brick_mount_dir); + } + + if (!gf_uuid_compare(brickinfo->uuid, MY_UUID)) { + ret = sys_statvfs(brickinfo->path, &brickstat); + if (ret) { + gf_log("brick-op", GF_LOG_ERROR, + "Failed to fetch disk" + " utilization from the brick (%s:%s). Please " + "check health of the brick. Error code was %s", + brickinfo->hostname, brickinfo->path, strerror(errno)); + goto out; + } + brickinfo->statfs_fsid = brickstat.f_fsid; + } + + cds_list_add_tail(&brickinfo->brick_list, &volinfo->bricks); + brick = strtok_r(NULL, " \n", &saveptr); + i++; + } + + ret = glusterd_enable_default_options(volinfo, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FAIL_DEFAULT_OPT_SET, + "Failed to set default " + "options on create for volume %s", + volinfo->volname); + goto out; + } + + ret = dict_get_strn(dict, "transport.address-family", + SLEN("transport.address-family"), &address_family_str); + + if (!ret) { + ret = dict_set_dynstr_with_alloc( + volinfo->dict, "transport.address-family", address_family_str); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, + "Failed to set transport.address-family for %s", + volinfo->volname); + goto out; + } + } + + gd_update_volume_op_versions(volinfo); + + ret = glusterd_store_volinfo(volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) { + glusterd_store_delete_volume(volinfo); + *op_errstr = gf_strdup( + "Failed to store the " + "Volume information"); + goto out; + } + + ret = glusterd_create_volfiles_and_notify_services(volinfo); + if (ret) { + *op_errstr = gf_strdup("Failed to create volume files"); + goto out; + } + + volinfo->rebal.defrag_status = 0; + glusterd_list_add_order(&volinfo->vol_list, &priv->volumes, + glusterd_compare_volume_name); + vol_added = _gf_true; + +out: + GF_FREE(free_ptr); + GF_FREE(ta_free_ptr); + if (!vol_added && volinfo) + glusterd_volinfo_unref(volinfo); + return ret; +} + +int +glusterd_start_volume(glusterd_volinfo_t *volinfo, int flags, gf_boolean_t wait) + +{ + int ret = 0; + glusterd_brickinfo_t *brickinfo = NULL; + xlator_t *this = NULL; + glusterd_volinfo_ver_ac_t verincrement = 0; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(volinfo); + + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + /* Mark start_triggered to false so that in case if this brick + * was brought down through gf_attach utility, the + * brickinfo->start_triggered wouldn't have been updated to + * _gf_false + */ + if (flags & GF_CLI_FLAG_OP_FORCE) { + brickinfo->start_triggered = _gf_false; + } + ret = glusterd_brick_start(volinfo, brickinfo, wait, _gf_false); + /* If 'force' try to start all bricks regardless of success or + * failure + */ + if (!(flags & GF_CLI_FLAG_OP_FORCE) && ret) + goto out; + } + + /* Increment the volinfo version only if there is a + * change in status. Force option can be used to start + * dead bricks even if the volume is in started state. + * In such case volume status will be GLUSTERD_STATUS_STARTED. + * Therefore we should not increment the volinfo version.*/ + if (GLUSTERD_STATUS_STARTED != volinfo->status) { + verincrement = GLUSTERD_VOLINFO_VER_AC_INCREMENT; + } else { + verincrement = GLUSTERD_VOLINFO_VER_AC_NONE; + } + + glusterd_set_volume_status(volinfo, GLUSTERD_STATUS_STARTED); + /* Update volinfo on disk in critical section because + attach_brick_callback can also call store_volinfo for same + volume to update volinfo on disk + */ + /* coverity[ORDER_REVERSAL] */ + LOCK(&volinfo->lock); + ret = glusterd_store_volinfo(volinfo, verincrement); + UNLOCK(&volinfo->lock); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_SET_FAIL, + "Failed to store volinfo of " + "%s volume", + volinfo->volname); + goto out; + } +out: + gf_msg_trace(this->name, 0, "returning %d ", ret); + return ret; +} + +int +glusterd_op_start_volume(dict_t *dict, char **op_errstr) +{ + int ret = 0; + int32_t brick_count = 0; + char *brick_mount_dir = NULL; + char key[64] = ""; + char *volname = NULL; + int flags = 0; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + glusterd_svc_t *svc = NULL; + char *str = NULL; + gf_boolean_t option = _gf_false; + + this = THIS; + GF_ASSERT(this); + conf = this->private; + GF_ASSERT(conf); + + ret = glusterd_op_start_volume_args_get(dict, &volname, &flags); + if (ret) + goto out; + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, + FMTSTR_CHECK_VOL_EXISTS, volname); + goto out; + } + + /* This is an incremental approach to have all the volinfo objects ref + * count. The first attempt is made in volume start transaction to + * ensure it doesn't race with import volume where stale volume is + * deleted. There are multiple instances of GlusterD crashing in + * bug-948686.t because of this. Once this approach is full proof, all + * other volinfo objects will be refcounted. + */ + glusterd_volinfo_ref(volinfo); + + /* A bricks mount dir is required only by snapshots which were + * introduced in gluster-3.6.0 + */ + if (conf->op_version >= GD_OP_VERSION_3_6_0) { + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + brick_count++; + /* Don't check bricks that are not owned by you + */ + if (gf_uuid_compare(brickinfo->uuid, MY_UUID)) + continue; + if (strlen(brickinfo->mount_dir) < 1) { + brick_mount_dir = NULL; + ret = snprintf(key, sizeof(key), "brick%d.mount_dir", + brick_count); + ret = dict_get_strn(dict, key, ret, &brick_mount_dir); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "%s not present", key); + goto out; + } + if (snprintf(brickinfo->mount_dir, sizeof(brickinfo->mount_dir), + "%s", + brick_mount_dir) >= sizeof(brickinfo->mount_dir)) { + ret = -1; + goto out; + } + } + } + } + + ret = dict_get_str(conf->opts, GLUSTERD_STORE_KEY_GANESHA_GLOBAL, &str); + if (ret != 0) { + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_DICT_GET_FAILED, + "Global dict not present."); + ret = 0; + + } else { + ret = gf_string2boolean(str, &option); + /* Check if the feature is enabled and set nfs-disable to true */ + if (option) { + gf_msg_debug(this->name, 0, "NFS-Ganesha is enabled"); + /* Gluster-nfs should not start when NFS-Ganesha is enabled*/ + ret = dict_set_str(volinfo->dict, NFS_DISABLE_MAP_KEY, "on"); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Failed to set nfs.disable for" + "volume %s", + volname); + goto out; + } + } + } + + ret = glusterd_start_volume(volinfo, flags, _gf_true); + if (ret) + goto out; + + if (!volinfo->is_snap_volume) { + svc = &(volinfo->snapd.svc); + ret = svc->manager(svc, volinfo, PROC_START_NO_WAIT); + if (ret) + goto out; + } + + svc = &(volinfo->gfproxyd.svc); + ret = svc->manager(svc, volinfo, PROC_START_NO_WAIT); + ret = glusterd_svcs_manager(volinfo); + +out: + if (volinfo) + glusterd_volinfo_unref(volinfo); + + gf_msg_trace(this->name, 0, "returning %d ", ret); + return ret; +} + +int +glusterd_stop_volume(glusterd_volinfo_t *volinfo) +{ + int ret = -1; + glusterd_brickinfo_t *brickinfo = NULL; + xlator_t *this = NULL; + glusterd_svc_t *svc = NULL; + + this = THIS; + GF_ASSERT(this); + + GF_VALIDATE_OR_GOTO(this->name, volinfo, out); + + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + ret = glusterd_brick_stop(volinfo, brickinfo, _gf_false); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_STOP_FAIL, + "Failed to stop " + "brick (%s)", + brickinfo->path); + goto out; + } + } + + glusterd_set_volume_status(volinfo, GLUSTERD_STATUS_STOPPED); + + ret = glusterd_store_volinfo(volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_SET_FAIL, + "Failed to store volinfo of " + "%s volume", + volinfo->volname); + goto out; + } + + if (!volinfo->is_snap_volume) { + svc = &(volinfo->snapd.svc); + ret = svc->manager(svc, volinfo, PROC_START_NO_WAIT); + if (ret) + goto out; + } + + ret = glusterd_svcs_manager(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_GRAPH_CHANGE_NOTIFY_FAIL, + "Failed to notify graph " + "change for %s volume", + volinfo->volname); + + goto out; + } + +out: + return ret; +} + +int +glusterd_op_stop_volume(dict_t *dict) +{ + int ret = 0; + int flags = 0; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + ret = glusterd_op_stop_volume_args_get(dict, &volname, &flags); + if (ret) + goto out; + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, + FMTSTR_CHECK_VOL_EXISTS, volname); + goto out; + } + + ret = glusterd_stop_volume(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_STOP_FAILED, + "Failed to stop %s volume", volname); + goto out; + } +out: + gf_msg_trace(this->name, 0, "returning %d ", ret); + return ret; +} + +int +glusterd_op_delete_volume(dict_t *dict) +{ + int ret = 0; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to get volume name"); + goto out; + } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, + FMTSTR_CHECK_VOL_EXISTS, volname); + goto out; + } + + if (glusterd_check_ganesha_export(volinfo) && is_origin_glusterd(dict)) { + ret = manage_export_config(volname, "off", NULL); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, 0, + "Could not delete ganesha export conf file " + "for %s", + volname); + } + + ret = glusterd_delete_volume(volinfo); +out: + gf_msg_debug(this->name, 0, "returning %d", ret); + return ret; +} + +int +glusterd_op_heal_volume(dict_t *dict, char **op_errstr) +{ + int ret = 0; + /* Necessary subtasks of heal are completed in brick op */ + + return ret; +} + +int +glusterd_op_statedump_volume(dict_t *dict, char **op_errstr) +{ + int ret = 0; + char *volname = NULL; + char *options = NULL; + int option_cnt = 0; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + + ret = glusterd_op_statedump_volume_args_get(dict, &volname, &options, + &option_cnt); + if (ret) + goto out; + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) + goto out; + gf_msg_debug("glusterd", 0, "Performing statedump on volume %s", volname); + if (strstr(options, "quotad")) { + ret = glusterd_quotad_statedump(options, option_cnt, op_errstr); + if (ret) + goto out; +#ifdef BUILD_GNFS + } else if (strstr(options, "nfs") != NULL) { + ret = glusterd_nfs_statedump(options, option_cnt, op_errstr); + if (ret) + goto out; +#endif + } else if (strstr(options, "client")) { + ret = glusterd_client_statedump(volname, options, option_cnt, + op_errstr); + if (ret) + goto out; + + } else { + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + ret = glusterd_brick_statedump(volinfo, brickinfo, options, + option_cnt, op_errstr); + /* Let us take the statedump of other bricks instead of + * exiting, if statedump of this brick fails. + */ + if (ret) + gf_msg(THIS->name, GF_LOG_WARNING, 0, GD_MSG_BRK_STATEDUMP_FAIL, + "could not " + "take the statedump of the brick %s:%s." + " Proceeding to other bricks", + brickinfo->hostname, brickinfo->path); + } + } + +out: + return ret; +} + +int +glusterd_clearlocks_send_cmd(glusterd_volinfo_t *volinfo, char *cmd, char *path, + char *result, char *errstr, int err_len, + char *mntpt) +{ + int ret = -1; + char abspath[PATH_MAX] = { + 0, + }; + + snprintf(abspath, sizeof(abspath), "%s/%s", mntpt, path); + ret = sys_lgetxattr(abspath, cmd, result, PATH_MAX); + if (ret < 0) { + snprintf(errstr, err_len, + "clear-locks getxattr command " + "failed. Reason: %s", + strerror(errno)); + gf_msg_debug(THIS->name, 0, "%s", errstr); + goto out; + } + + ret = 0; +out: + return ret; +} + +int +glusterd_clearlocks_rmdir_mount(glusterd_volinfo_t *volinfo, char *mntpt) +{ + int ret = -1; + + ret = sys_rmdir(mntpt); + if (ret) { + gf_msg_debug(THIS->name, 0, "rmdir failed"); + goto out; + } + + ret = 0; +out: + return ret; +} + +void +glusterd_clearlocks_unmount(glusterd_volinfo_t *volinfo, char *mntpt) +{ + glusterd_conf_t *priv = NULL; + runner_t runner = { + 0, + }; + int ret = 0; + + priv = THIS->private; + + /*umount failures are ignored. Using stat we could have avoided + * attempting to unmount a non-existent filesystem. But a failure of + * stat() on mount can be due to network failures.*/ + + runinit(&runner); + runner_add_args(&runner, _PATH_UMOUNT, "-f", NULL); + runner_argprintf(&runner, "%s", mntpt); + + synclock_unlock(&priv->big_lock); + ret = runner_run(&runner); + synclock_lock(&priv->big_lock); + if (ret) { + ret = 0; + gf_msg_debug("glusterd", 0, "umount failed on maintenance client"); + } + + return; +} + +int +glusterd_clearlocks_create_mount(glusterd_volinfo_t *volinfo, char **mntpt) +{ + int ret = -1; + char template[PATH_MAX] = { + 0, + }; + char *tmpl = NULL; + + snprintf(template, sizeof(template), "/tmp/%s.XXXXXX", volinfo->volname); + tmpl = mkdtemp(template); + if (!tmpl) { + gf_msg_debug(THIS->name, 0, + "Couldn't create temporary " + "mount directory. Reason %s", + strerror(errno)); + goto out; + } + + *mntpt = gf_strdup(tmpl); + ret = 0; +out: + return ret; +} + +int +glusterd_clearlocks_mount(glusterd_volinfo_t *volinfo, char **xl_opts, + char *mntpt) +{ + int ret = -1; + int i = 0; + glusterd_conf_t *priv = NULL; + runner_t runner = { + 0, + }; + char client_volfpath[PATH_MAX] = { + 0, + }; + char self_heal_opts[3][1024] = {"*replicate*.data-self-heal=off", + "*replicate*.metadata-self-heal=off", + "*replicate*.entry-self-heal=off"}; + + priv = THIS->private; + + runinit(&runner); + glusterd_get_trusted_client_filepath(client_volfpath, volinfo, + volinfo->transport_type); + runner_add_args(&runner, SBIN_DIR "/glusterfs", "-f", NULL); + runner_argprintf(&runner, "%s", client_volfpath); + runner_add_arg(&runner, "-l"); + runner_argprintf(&runner, "%s/%s-clearlocks-mnt.log", priv->logdir, + volinfo->volname); + if (volinfo->memory_accounting) + runner_add_arg(&runner, "--mem-accounting"); + + for (i = 0; i < volinfo->brick_count && xl_opts[i]; i++) { + runner_add_arg(&runner, "--xlator-option"); + runner_argprintf(&runner, "%s", xl_opts[i]); + } + + for (i = 0; i < 3; i++) { + runner_add_args(&runner, "--xlator-option", self_heal_opts[i], NULL); + } + + runner_argprintf(&runner, "%s", mntpt); + synclock_unlock(&priv->big_lock); + ret = runner_run(&runner); + synclock_lock(&priv->big_lock); + if (ret) { + gf_msg_debug(THIS->name, 0, "Could not start glusterfs"); + goto out; + } + gf_msg_debug(THIS->name, 0, "Started glusterfs successfully"); + +out: + return ret; +} + +int +glusterd_clearlocks_get_local_client_ports(glusterd_volinfo_t *volinfo, + char **xl_opts) +{ + glusterd_brickinfo_t *brickinfo = NULL; + char brickname[PATH_MAX] = { + 0, + }; + int index = 0; + int ret = -1; + int i = 0; + int port = 0; + int32_t len = 0; + + GF_ASSERT(xl_opts); + if (!xl_opts) { + gf_msg_debug(THIS->name, 0, + "Should pass non-NULL " + "xl_opts"); + goto out; + } + + index = -1; + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + index++; + if (gf_uuid_compare(brickinfo->uuid, MY_UUID)) + continue; + + if (volinfo->transport_type == GF_TRANSPORT_RDMA) { + len = snprintf(brickname, sizeof(brickname), "%s.rdma", + brickinfo->path); + } else + len = snprintf(brickname, sizeof(brickname), "%s", brickinfo->path); + if ((len < 0) || (len >= sizeof(brickname))) { + ret = -1; + goto out; + } + + port = pmap_registry_search(THIS, brickname, GF_PMAP_PORT_BRICKSERVER, + _gf_false); + if (!port) { + ret = -1; + gf_msg_debug(THIS->name, 0, + "Couldn't get port " + " for brick %s:%s", + brickinfo->hostname, brickinfo->path); + goto out; + } + + ret = gf_asprintf(&xl_opts[i], "%s-client-%d.remote-port=%d", + volinfo->volname, index, port); + if (ret == -1) { + xl_opts[i] = NULL; + goto out; + } + i++; + } + + ret = 0; +out: + return ret; +} + +int +glusterd_op_clearlocks_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict) +{ + int32_t ret = -1; + int i = 0; + char *volname = NULL; + char *path = NULL; + char *kind = NULL; + char *type = NULL; + char *opts = NULL; + char *cmd_str = NULL; + char *free_ptr = NULL; + char msg[PATH_MAX] = { + 0, + }; + char result[PATH_MAX] = { + 0, + }; + char *mntpt = NULL; + char **xl_opts = NULL; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = THIS; + GF_ASSERT(this); + + ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Key=volname", NULL); + goto out; + } + gf_msg_debug("glusterd", 0, "Performing clearlocks on volume %s", volname); + + ret = dict_get_strn(dict, "path", SLEN("path"), &path); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "Key=path", + NULL); + goto out; + } + + ret = dict_get_strn(dict, "kind", SLEN("kind"), &kind); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "Key=kind", + NULL); + goto out; + } + + ret = dict_get_strn(dict, "type", SLEN("type"), &type); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "Key=type", + NULL); + goto out; + } + + ret = dict_get_strn(dict, "opts", SLEN("opts"), &opts); + if (ret) + ret = 0; + + gf_smsg(this->name, GF_LOG_INFO, 0, GD_MSG_CLRCLK_VOL_REQ_RCVD, + "Volume=%s, Kind=%s, Type=%s, Options=%s", volname, kind, type, + opts, NULL); + + if (opts) + ret = gf_asprintf(&cmd_str, GF_XATTR_CLRLK_CMD ".t%s.k%s.%s", type, + kind, opts); + else + ret = gf_asprintf(&cmd_str, GF_XATTR_CLRLK_CMD ".t%s.k%s", type, kind); + if (ret == -1) + goto out; + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { + snprintf(msg, sizeof(msg), "Volume %s doesn't exist.", volname); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, "Volume=%s", + volname, NULL); + goto out; + } + + xl_opts = GF_CALLOC(volinfo->brick_count + 1, sizeof(char *), + gf_gld_mt_charptr); + if (!xl_opts) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); + goto out; + } + + ret = glusterd_clearlocks_get_local_client_ports(volinfo, xl_opts); + if (ret) { + snprintf(msg, sizeof(msg), + "Couldn't get port numbers of " + "local bricks"); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRK_PORT_NUM_GET_FAIL, + NULL); + goto out; + } + + ret = glusterd_clearlocks_create_mount(volinfo, &mntpt); + if (ret) { + snprintf(msg, sizeof(msg), + "Creating mount directory " + "for clear-locks failed."); + gf_smsg(this->name, GF_LOG_ERROR, 0, + GD_MSG_CLRLOCKS_MOUNTDIR_CREATE_FAIL, NULL); + goto out; + } + + ret = glusterd_clearlocks_mount(volinfo, xl_opts, mntpt); + if (ret) { + snprintf(msg, sizeof(msg), + "Failed to mount clear-locks " + "maintenance client."); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_CLRLOCKS_CLNT_MOUNT_FAIL, + NULL); + goto out; + } + + ret = glusterd_clearlocks_send_cmd(volinfo, cmd_str, path, result, msg, + sizeof(msg), mntpt); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_CLRCLK_SND_CMD_FAIL, NULL); + goto umount; + } + + free_ptr = gf_strdup(result); + if (dict_set_dynstrn(rsp_dict, "lk-summary", SLEN("lk-summary"), + free_ptr)) { + GF_FREE(free_ptr); + snprintf(msg, sizeof(msg), + "Failed to set clear-locks " + "result"); + gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "Key=lk-summary", NULL); + } + +umount: + glusterd_clearlocks_unmount(volinfo, mntpt); + + if (glusterd_clearlocks_rmdir_mount(volinfo, mntpt)) + gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_CLRLOCKS_CLNT_UMOUNT_FAIL, + NULL); + +out: + if (ret) + *op_errstr = gf_strdup(msg); + + if (xl_opts) { + for (i = 0; i < volinfo->brick_count && xl_opts[i]; i++) + GF_FREE(xl_opts[i]); + GF_FREE(xl_opts); + } + + GF_FREE(cmd_str); + + GF_FREE(mntpt); + + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c new file mode 100644 index 00000000000..398b4d76f52 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -0,0 +1,3146 @@ +/* +Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> +This file is part of GlusterFS. + +This file is licensed to you under your choice of the GNU Lesser +General Public License, version 3 or any later version (LGPLv3 or +later), or the GNU General Public License, version 2 (GPLv2), in all +cases as published by the Free Software Foundation. +*/ + +#include <glusterfs/syscall.h> +#include "glusterd-volgen.h" +#include "glusterd-utils.h" + +static int +validate_cache_max_min_size(glusterd_volinfo_t *volinfo, dict_t *dict, + char *key, char *value, char **op_errstr) +{ + char *current_max_value = NULL; + char *current_min_value = NULL; + char errstr[2048] = ""; + glusterd_conf_t *priv = NULL; + int ret = 0; + uint64_t max_value = 0; + uint64_t min_value = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + if ((!strcmp(key, "performance.cache-min-file-size")) || + (!strcmp(key, "cache-min-file-size"))) { + glusterd_volinfo_get(volinfo, "performance.cache-max-file-size", + ¤t_max_value); + if (current_max_value) { + gf_string2bytesize_uint64(current_max_value, &max_value); + gf_string2bytesize_uint64(value, &min_value); + current_min_value = value; + } + } else if ((!strcmp(key, "performance.cache-max-file-size")) || + (!strcmp(key, "cache-max-file-size"))) { + glusterd_volinfo_get(volinfo, "performance.cache-min-file-size", + ¤t_min_value); + if (current_min_value) { + gf_string2bytesize_uint64(current_min_value, &min_value); + gf_string2bytesize_uint64(value, &max_value); + current_max_value = value; + } + } + + if (min_value > max_value) { + snprintf(errstr, sizeof(errstr), + "cache-min-file-size (%s) is greater than " + "cache-max-file-size (%s)", + current_min_value, current_max_value); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CACHE_MINMAX_SIZE_INVALID, + "%s", errstr); + *op_errstr = gf_strdup(errstr); + ret = -1; + goto out; + } + +out: + gf_msg_debug(this->name, 0, "Returning %d", ret); + + return ret; +} + +static int +validate_defrag_throttle_option(glusterd_volinfo_t *volinfo, dict_t *dict, + char *key, char *value, char **op_errstr) +{ + char errstr[2048] = ""; + int ret = 0; + xlator_t *this = NULL; + int thread_count = 0; + long int cores_available = 0; + + this = THIS; + GF_ASSERT(this); + + cores_available = sysconf(_SC_NPROCESSORS_ONLN); + + /* Throttle option should be one of lazy|normal|aggressive or a number + * configured by user max up to the number of cores in the machine */ + + if (!strcasecmp(value, "lazy") || !strcasecmp(value, "normal") || + !strcasecmp(value, "aggressive")) { + ret = 0; + } else if ((gf_string2int(value, &thread_count) == 0)) { + if ((thread_count > 0) && (thread_count <= cores_available)) { + ret = 0; + } else { + ret = -1; + snprintf(errstr, sizeof(errstr), + "%s should be within" + " range of 0 and maximum number of cores " + "available (cores available - %ld)", + key, cores_available); + + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, "%s", + errstr); + + *op_errstr = gf_strdup(errstr); + } + } else { + ret = -1; + snprintf(errstr, sizeof(errstr), + "%s should be " + "{lazy|normal|aggressive} or a number up to number of" + " cores available (cores available - %ld)", + key, cores_available); + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, "%s", + errstr); + *op_errstr = gf_strdup(errstr); + } + + gf_msg_debug(this->name, 0, "Returning %d", ret); + + return ret; +} + +static int +validate_quota(glusterd_volinfo_t *volinfo, dict_t *dict, char *key, + char *value, char **op_errstr) +{ + char errstr[2048] = ""; + glusterd_conf_t *priv = NULL; + int ret = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + ret = glusterd_volinfo_get_boolean(volinfo, VKEY_FEATURES_QUOTA); + if (ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_QUOTA_GET_STAT_FAIL, + "failed to get the quota status"); + goto out; + } + + if (ret == _gf_false) { + snprintf(errstr, sizeof(errstr), "Cannot set %s. Enable quota first.", + key); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_QUOTA_DISABLED, "%s", + errstr); + *op_errstr = gf_strdup(errstr); + ret = -1; + goto out; + } + + ret = 0; +out: + gf_msg_debug(this->name, 0, "Returning %d", ret); + + return ret; +} + +static int +validate_uss(glusterd_volinfo_t *volinfo, dict_t *dict, char *key, char *value, + char **op_errstr) +{ + char errstr[2048] = ""; + int ret = 0; + xlator_t *this = NULL; + gf_boolean_t b = _gf_false; + + this = THIS; + GF_ASSERT(this); + + ret = gf_string2boolean(value, &b); + if (ret) { + snprintf(errstr, sizeof(errstr), + "%s is not a valid boolean " + "value. %s expects a valid boolean value.", + value, key); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY, "%s", errstr); + *op_errstr = gf_strdup(errstr); + goto out; + } +out: + gf_msg_debug(this->name, 0, "Returning %d", ret); + + return ret; +} + +static int +validate_uss_dir(glusterd_volinfo_t *volinfo, dict_t *dict, char *key, + char *value, char **op_errstr) +{ + char errstr[2048] = ""; + int ret = -1; + int i = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + i = strlen(value); + if (i > NAME_MAX) { + snprintf(errstr, sizeof(errstr), + "value of %s exceedes %d " + "characters", + key, NAME_MAX); + goto out; + } else if (i < 2) { + snprintf(errstr, sizeof(errstr), + "value of %s too short, " + "expects at least two characters", + key); + goto out; + } + + if (value[0] != '.') { + snprintf(errstr, sizeof(errstr), + "%s expects value starting " + "with '.' ", + key); + goto out; + } + + for (i = 1; value[i]; i++) { + if (isalnum(value[i]) || value[i] == '_' || value[i] == '-') + continue; + + snprintf(errstr, sizeof(errstr), + "%s expects value to" + " contain only '0-9a-z-_'", + key); + goto out; + } + + ret = 0; +out: + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, "%s", + errstr); + *op_errstr = gf_strdup(errstr); + } + + gf_msg_debug(this->name, 0, "Returning %d", ret); + + return ret; +} + +static int +validate_server_options(glusterd_volinfo_t *volinfo, dict_t *dict, char *key, + char *value, char **op_errstr) +{ + char errstr[2048] = ""; + xlator_t *this = NULL; + int ret = -1; + int origin_val = 0; + + this = THIS; + GF_ASSERT(this); + + if (volinfo->status == GLUSTERD_STATUS_STARTED) { + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_VOL_SET_VALIDATION_INFO, + "Please note that " + "volume %s is started. This option will only get " + "effected after a brick restart.", + volinfo->volname); + } + + ret = gf_string2int(value, &origin_val); + if (ret) { + snprintf(errstr, sizeof(errstr), + "%s is not a compatible " + "value. %s expects an integer value.", + value, key); + ret = -1; + goto out; + } + + if (origin_val < 0) { + snprintf(errstr, sizeof(errstr), + "%s is not a " + "compatible value. %s expects a positive" + "integer value.", + value, key); + ret = -1; + goto out; + } + + ret = 0; +out: + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INCOMPATIBLE_VALUE, + "%s", errstr); + *op_errstr = gf_strdup(errstr); + } + + return ret; +} + +static int +validate_disperse(glusterd_volinfo_t *volinfo, dict_t *dict, char *key, + char *value, char **op_errstr) +{ + char errstr[2048] = ""; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + + if (volinfo->type != GF_CLUSTER_TYPE_DISPERSE) { + snprintf(errstr, sizeof(errstr), + "Cannot set %s for a non-disperse volume.", key); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_DISPERSE, "%s", + errstr); + *op_errstr = gf_strdup(errstr); + ret = -1; + goto out; + } + ret = 0; + +out: + gf_msg_debug(ret == 0 ? THIS->name : "glusterd", 0, "Returning %d", ret); + + return ret; +} + +static int +validate_replica(glusterd_volinfo_t *volinfo, dict_t *dict, char *key, + char *value, char **op_errstr) +{ + char errstr[2048] = ""; + int ret = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + if (volinfo->replica_count == 1) { + snprintf(errstr, sizeof(errstr), + "Cannot set %s for a non-replicate volume.", key); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_REPLICA, "%s", + errstr); + *op_errstr = gf_strdup(errstr); + ret = -1; + goto out; + } + +out: + gf_msg_debug(this->name, 0, "Returning %d", ret); + + return ret; +} + +static int +validate_quorum_count(glusterd_volinfo_t *volinfo, dict_t *dict, char *key, + char *value, char **op_errstr) +{ + int ret = 0; + xlator_t *this = NULL; + int q_count = 0; + + this = THIS; + GF_ASSERT(this); + + ret = gf_string2int(value, &q_count); + if (ret) { + gf_asprintf(op_errstr, + "%s is not an integer. %s expects a " + "valid integer value.", + value, key); + goto out; + } + + if (q_count < 1 || q_count > volinfo->replica_count) { + gf_asprintf(op_errstr, "%d in %s %d is out of range [1 - %d]", q_count, + key, q_count, volinfo->replica_count); + ret = -1; + } + +out: + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY, "%s", + *op_errstr); + } + gf_msg_debug(this->name, 0, "Returning %d", ret); + + return ret; +} + +static int +validate_subvols_per_directory(glusterd_volinfo_t *volinfo, dict_t *dict, + char *key, char *value, char **op_errstr) +{ + char errstr[2048] = ""; + glusterd_conf_t *priv = NULL; + int ret = 0; + int subvols = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + subvols = atoi(value); + + /* Checking if the subvols-per-directory exceed the total + number of subvolumes. */ + if (subvols > volinfo->subvol_count) { + snprintf(errstr, sizeof(errstr), + "subvols-per-directory(%d) is greater " + "than the number of subvolumes(%d).", + subvols, volinfo->subvol_count); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SUBVOLUMES_EXCEED, "%s.", + errstr); + *op_errstr = gf_strdup(errstr); + ret = -1; + goto out; + } + +out: + gf_msg_debug(this->name, 0, "Returning %d", ret); + + return ret; +} + +static int +validate_replica_heal_enable_disable(glusterd_volinfo_t *volinfo, dict_t *dict, + char *key, char *value, char **op_errstr) +{ + int ret = 0; + + if (!glusterd_is_volume_replicate(volinfo)) { + gf_asprintf(op_errstr, "Volume %s is not of replicate type", + volinfo->volname); + ret = -1; + } + + return ret; +} + +static int +validate_mandatory_locking(glusterd_volinfo_t *volinfo, dict_t *dict, char *key, + char *value, char **op_errstr) +{ + char errstr[2048] = ""; + int ret = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + if (strcmp(value, "off") != 0 && strcmp(value, "file") != 0 && + strcmp(value, "forced") != 0 && strcmp(value, "optimal") != 0) { + snprintf(errstr, sizeof(errstr), + "Invalid option value '%s':" + " Available options are 'off', 'file', " + "'forced' or 'optimal'", + value); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY, "%s", errstr); + *op_errstr = gf_strdup(errstr); + ret = -1; + goto out; + } +out: + gf_msg_debug(this->name, 0, "Returning %d", ret); + + return ret; +} + +static int +validate_disperse_heal_enable_disable(glusterd_volinfo_t *volinfo, dict_t *dict, + char *key, char *value, char **op_errstr) +{ + int ret = 0; + + if (volinfo->type != GF_CLUSTER_TYPE_DISPERSE) { + gf_asprintf(op_errstr, "Volume %s is not of disperse type", + volinfo->volname); + ret = -1; + } + + return ret; +} + +static int +validate_lock_migration_option(glusterd_volinfo_t *volinfo, dict_t *dict, + char *key, char *value, char **op_errstr) +{ + char errstr[2048] = ""; + int ret = 0; + xlator_t *this = NULL; + gf_boolean_t b = _gf_false; + + this = THIS; + GF_ASSERT(this); + + if (volinfo->replica_count > 1 || volinfo->disperse_count) { + snprintf(errstr, sizeof(errstr), + "Lock migration is " + "a experimental feature. Currently works with" + " pure distribute volume only"); + ret = -1; + + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, "%s", + errstr); + + *op_errstr = gf_strdup(errstr); + goto out; + } + + ret = gf_string2boolean(value, &b); + if (ret) { + snprintf(errstr, sizeof(errstr), + "Invalid value" + " for volume set command. Use on/off only."); + ret = -1; + + gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, "%s", + errstr); + + *op_errstr = gf_strdup(errstr); + + goto out; + } + + gf_msg_debug(this->name, 0, "Returning %d", ret); + +out: + return ret; +} + +static int +validate_mux_limit(glusterd_volinfo_t *volinfo, dict_t *dict, char *key, + char *value, char **op_errstr) +{ + xlator_t *this = NULL; + uint val = 0; + int ret = -1; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + + if (!is_brick_mx_enabled()) { + gf_asprintf(op_errstr, + "Brick-multiplexing is not enabled. " + "Please enable brick multiplexing before trying " + "to set this option."); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_WRONG_OPTS_SETTING, "%s", + *op_errstr); + goto out; + } + + ret = gf_string2uint(value, &val); + if (ret) { + gf_asprintf(op_errstr, + "%s is not a valid count. " + "%s expects an unsigned integer.", + value, key); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY, "%s", + *op_errstr); + } + + if (val == 1) { + gf_asprintf(op_errstr, + "Brick-multiplexing is enabled. " + "Please set this option to a value other than 1 " + "to make use of the brick-multiplexing feature."); + ret = -1; + goto out; + } +out: + gf_msg_debug("glusterd", 0, "Returning %d", ret); + + return ret; +} + +static int +validate_volume_per_thread_limit(glusterd_volinfo_t *volinfo, dict_t *dict, + char *key, char *value, char **op_errstr) +{ + xlator_t *this = NULL; + uint val = 0; + int ret = -1; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + + if (!is_brick_mx_enabled()) { + gf_asprintf(op_errstr, + "Brick-multiplexing is not enabled. " + "Please enable brick multiplexing before trying " + "to set this option."); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_WRONG_OPTS_SETTING, "%s", + *op_errstr); + goto out; + } + + ret = gf_string2uint(value, &val); + if (ret) { + gf_asprintf(op_errstr, + "%s is not a valid count. " + "%s expects an unsigned integer.", + value, key); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY, "%s", + *op_errstr); + } + + if ((val < 5) || (val > 200)) { + gf_asprintf( + op_errstr, + "Please set this option to a value between 5 and 200 to" + "optimize processing large numbers of volumes in parallel."); + ret = -1; + goto out; + } +out: + gf_msg_debug("glusterd", 0, "Returning %d", ret); + + return ret; +} + +static int +validate_boolean(glusterd_volinfo_t *volinfo, dict_t *dict, char *key, + char *value, char **op_errstr) +{ + xlator_t *this = NULL; + gf_boolean_t b = _gf_false; + int ret = -1; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + ret = gf_string2boolean(value, &b); + if (ret) { + gf_asprintf(op_errstr, + "%s is not a valid boolean value. %s " + "expects a valid boolean value.", + value, key); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY, "%s", + *op_errstr); + } +out: + gf_msg_debug("glusterd", 0, "Returning %d", ret); + + return ret; +} + +static int +validate_disperse_quorum_count(glusterd_volinfo_t *volinfo, dict_t *dict, + char *key, char *value, char **op_errstr) +{ + int ret = -1; + int quorum_count = 0; + int data_count = 0; + + ret = gf_string2int(value, &quorum_count); + if (ret) { + gf_asprintf(op_errstr, + "%s is not an integer. %s expects a " + "valid integer value.", + value, key); + goto out; + } + + if (volinfo->type != GF_CLUSTER_TYPE_DISPERSE) { + gf_asprintf(op_errstr, "Cannot set %s for a non-disperse volume.", key); + ret = -1; + goto out; + } + + data_count = volinfo->disperse_count - volinfo->redundancy_count; + if (quorum_count < data_count || quorum_count > volinfo->disperse_count) { + gf_asprintf(op_errstr, "%d for %s is out of range [%d - %d]", + quorum_count, key, data_count, volinfo->disperse_count); + ret = -1; + goto out; + } + + ret = 0; +out: + return ret; +} + +static int +validate_parallel_readdir(glusterd_volinfo_t *volinfo, dict_t *dict, char *key, + char *value, char **op_errstr) +{ + int ret = -1; + + ret = validate_boolean(volinfo, dict, key, value, op_errstr); + if (ret) + goto out; + + ret = glusterd_is_defrag_on(volinfo); + if (ret) { + gf_asprintf(op_errstr, + "%s option should be set " + "after rebalance is complete", + key); + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY, "%s", + *op_errstr); + } +out: + gf_msg_debug("glusterd", 0, "Returning %d", ret); + + return ret; +} + +static int +validate_rda_cache_limit(glusterd_volinfo_t *volinfo, dict_t *dict, char *key, + char *value, char **op_errstr) +{ + int ret = 0; + uint64_t rda_cache_size = 0; + + ret = gf_string2bytesize_uint64(value, &rda_cache_size); + if (ret < 0) + goto out; + + if (rda_cache_size <= (1 * GF_UNIT_GB)) + goto out; + + /* With release 3.11 the max value of rda_cache_limit is changed from + * 1GB to INFINITY. If there are clients older than 3.11 and the value + * of rda-cache-limit is set to > 1GB, the older clients will stop + * working. Hence if a user is setting rda-cache-limit to > 1GB + * ensure that all the clients are 3.11 or greater. + */ + ret = glusterd_check_client_op_version_support( + volinfo->volname, GD_OP_VERSION_3_11_0, op_errstr); +out: + gf_msg_debug("glusterd", 0, "Returning %d", ret); + + return ret; +} + +static int +validate_worm_period(glusterd_volinfo_t *volinfo, dict_t *dict, char *key, + char *value, char **op_errstr) +{ + xlator_t *this = NULL; + uint64_t period = -1; + int ret = -1; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + ret = gf_string2uint64(value, &period); + if (ret) { + gf_asprintf(op_errstr, + "%s is not a valid uint64_t value." + " %s expects a valid uint64_t value.", + value, key); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY, "%s", + *op_errstr); + } +out: + gf_msg_debug("glusterd", 0, "Returning %d", ret); + + return ret; +} + +static int +validate_reten_mode(glusterd_volinfo_t *volinfo, dict_t *dict, char *key, + char *value, char **op_errstr) +{ + xlator_t *this = NULL; + int ret = -1; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + if ((strcmp(value, "relax") && strcmp(value, "enterprise"))) { + gf_asprintf(op_errstr, + "The value of retention mode should be " + "either relax or enterprise. But the value" + " of %s is %s", + key, value); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY, "%s", + *op_errstr); + ret = -1; + goto out; + } + ret = 0; +out: + gf_msg_debug("glusterd", 0, "Returning %d", ret); + + return ret; +} +static int +is_directory(const char *path) +{ + struct stat statbuf; + if (sys_stat(path, &statbuf) != 0) + return 0; + return S_ISDIR(statbuf.st_mode); +} +static int +validate_statedump_path(glusterd_volinfo_t *volinfo, dict_t *dict, char *key, + char *value, char **op_errstr) +{ + xlator_t *this = NULL; + this = THIS; + GF_ASSERT(this); + + int ret = 0; + if (!is_directory(value)) { + gf_asprintf(op_errstr, "Failed: %s is not a directory", value); + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY, "%s", + *op_errstr); + } + + return ret; +} + +/* dispatch table for VOLUME SET + * ----------------------------- + * + * Format of entries: + * + * First field is the <key>, for the purpose of looking it up + * in volume dictionary. Each <key> is of the format "<domain>.<specifier>". + * + * Second field is <voltype>. + * + * Third field is <option>, if its unset, it's assumed to be + * the same as <specifier>. + * + * Fourth field is <value>. In this context they are used to specify + * a default. That is, even the volume dict doesn't have a value, + * we procced as if the default value were set for it. + * + * Fifth field is <doctype>, which decides if the option is public and available + * in "set help" or not. "NO_DOC" entries are not part of the public interface + * and are subject to change at any time. This also decides if an option is + * global (applies to all volumes) or normal (applies to only specified volume). + * + * Sixth field is <flags>. + * + * Seventh field is <op-version>. + * + * Eight field is description of option: If NULL, tried to fetch from + * translator code's xlator_options table. + * + * Ninth field is validation function: If NULL, xlator's option specific + * validation will be tried, otherwise tried at glusterd code itself. + * + * There are two type of entries: basic and special. + * + * - Basic entries are the ones where the <option> does _not_ start with + * the bang! character ('!'). + * + * In their case, <option> is understood as an option for an xlator of + * type <voltype>. Their effect is to copy over the volinfo->dict[<key>] + * value to all graph nodes of type <voltype> (if such a value is set). + * + * You are free to add entries of this type, they will become functional + * just by being present in the table. + * + * - Special entries where the <option> starts with the bang!. + * + * They are not applied to all graphs during generation, and you cannot + * extend them in a trivial way which could be just picked up. Better + * not touch them unless you know what you do. + * + * + * Another kind of grouping for options, according to visibility: + * + * - Exported: one which is used in the code. These are characterized by + * being used a macro as <key> (of the format VKEY_..., defined in + * glusterd-volgen.h + * + * - Non-exported: the rest; these have string literal <keys>. + * + * Adhering to this policy, option name changes shall be one-liners. + * + */ + +struct volopt_map_entry glusterd_volopt_map[] = { + /* DHT xlator options */ + {.key = "cluster.lookup-unhashed", + .voltype = "cluster/distribute", + .op_version = 1, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.lookup-optimize", + .voltype = "cluster/distribute", + .op_version = GD_OP_VERSION_3_7_2, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.min-free-disk", + .voltype = "cluster/distribute", + .op_version = 1, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.min-free-inodes", + .voltype = "cluster/distribute", + .op_version = 1, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.rebalance-stats", + .voltype = "cluster/distribute", + .op_version = 2, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.subvols-per-directory", + .voltype = "cluster/distribute", + .option = "directory-layout-spread", + .op_version = 2, + .validate_fn = validate_subvols_per_directory, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.readdir-optimize", + .voltype = "cluster/distribute", + .op_version = 1, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.rsync-hash-regex", + .voltype = "cluster/distribute", + .type = NO_DOC, + .op_version = 3, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.extra-hash-regex", + .voltype = "cluster/distribute", + .type = NO_DOC, + .op_version = 3, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.dht-xattr-name", + .voltype = "cluster/distribute", + .option = "xattr-name", + .type = NO_DOC, + .op_version = 3, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + { + .key = "cluster.randomize-hash-range-by-gfid", + .voltype = "cluster/distribute", + .option = "randomize-hash-range-by-gfid", + .type = NO_DOC, + .op_version = GD_OP_VERSION_3_6_0, + .flags = VOLOPT_FLAG_CLIENT_OPT, + }, + { + .key = "cluster.rebal-throttle", + .voltype = "cluster/distribute", + .option = "rebal-throttle", + .op_version = GD_OP_VERSION_3_7_0, + .validate_fn = validate_defrag_throttle_option, + .flags = VOLOPT_FLAG_CLIENT_OPT, + }, + + { + .key = "cluster.lock-migration", + .voltype = "cluster/distribute", + .option = "lock-migration", + .value = "off", + .op_version = GD_OP_VERSION_3_8_0, + .validate_fn = validate_lock_migration_option, + .flags = VOLOPT_FLAG_CLIENT_OPT, + }, + + { + .key = "cluster.force-migration", + .voltype = "cluster/distribute", + .option = "force-migration", + .value = "off", + .op_version = GD_OP_VERSION_4_0_0, + .flags = VOLOPT_FLAG_CLIENT_OPT, + }, + + /* NUFA xlator options (Distribute special case) */ + {.key = "cluster.nufa", + .voltype = "cluster/distribute", + .option = "!nufa", + .type = NO_DOC, + .op_version = 2, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.local-volume-name", + .voltype = "cluster/nufa", + .option = "local-volume-name", + .type = NO_DOC, + .op_version = 3, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + { + .key = "cluster.weighted-rebalance", + .voltype = "cluster/distribute", + .op_version = GD_OP_VERSION_3_6_0, + }, + + /* Switch xlator options (Distribute special case) */ + {.key = "cluster.switch", + .voltype = "cluster/distribute", + .option = "!switch", + .type = NO_DOC, + .op_version = 3, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.switch-pattern", + .voltype = "cluster/switch", + .option = "pattern.switch.case", + .type = NO_DOC, + .op_version = 3, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + + /* AFR xlator options */ + {.key = "cluster.entry-change-log", + .voltype = "cluster/replicate", + .op_version = 1, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.read-subvolume", + .voltype = "cluster/replicate", + .op_version = 1, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.read-subvolume-index", + .voltype = "cluster/replicate", + .op_version = 2, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.read-hash-mode", + .voltype = "cluster/replicate", + .op_version = 2, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.background-self-heal-count", + .voltype = "cluster/replicate", + .op_version = 1, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.metadata-self-heal", + .voltype = "cluster/replicate", + .op_version = 1, + .validate_fn = validate_replica, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.data-self-heal", + .voltype = "cluster/replicate", + .op_version = 1, + .validate_fn = validate_replica, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.entry-self-heal", + .voltype = "cluster/replicate", + .op_version = 1, + .validate_fn = validate_replica, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.self-heal-daemon", + .voltype = "cluster/replicate", + .option = "!self-heal-daemon", + .op_version = 1, + .validate_fn = validate_replica_heal_enable_disable}, + {.key = "cluster.heal-timeout", + .voltype = "cluster/replicate", + .option = "!heal-timeout", + .op_version = 2, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.strict-readdir", + .voltype = "cluster/replicate", + .type = NO_DOC, + .op_version = 1, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.self-heal-window-size", + .voltype = "cluster/replicate", + .option = "data-self-heal-window-size", + .op_version = 1, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.data-change-log", + .voltype = "cluster/replicate", + .op_version = 1, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.metadata-change-log", + .voltype = "cluster/replicate", + .op_version = 1, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.data-self-heal-algorithm", + .voltype = "cluster/replicate", + .option = "data-self-heal-algorithm", + .op_version = 1, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.eager-lock", + .voltype = "cluster/replicate", + .op_version = 1, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "disperse.eager-lock", + .voltype = "cluster/disperse", + .op_version = GD_OP_VERSION_3_7_10, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "disperse.other-eager-lock", + .voltype = "cluster/disperse", + .op_version = GD_OP_VERSION_3_13_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "disperse.eager-lock-timeout", + .voltype = "cluster/disperse", + .op_version = GD_OP_VERSION_4_0_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "disperse.other-eager-lock-timeout", + .voltype = "cluster/disperse", + .op_version = GD_OP_VERSION_4_0_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.quorum-type", + .voltype = "cluster/replicate", + .option = "quorum-type", + .op_version = 1, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.quorum-count", + .voltype = "cluster/replicate", + .option = "quorum-count", + .op_version = 1, + .validate_fn = validate_quorum_count, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.choose-local", + .voltype = "cluster/replicate", + .op_version = 2, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.self-heal-readdir-size", + .voltype = "cluster/replicate", + .op_version = 2, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.post-op-delay-secs", + .voltype = "cluster/replicate", + .type = NO_DOC, + .op_version = 2, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.readdir-failover", + .voltype = "cluster/replicate", + .op_version = 2, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.ensure-durability", + .voltype = "cluster/replicate", + .op_version = 3, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.consistent-metadata", + .voltype = "cluster/replicate", + .type = DOC, + .op_version = GD_OP_VERSION_3_7_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.heal-wait-queue-length", + .voltype = "cluster/replicate", + .type = DOC, + .op_version = GD_OP_VERSION_3_7_10, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.favorite-child-policy", + .voltype = "cluster/replicate", + .type = DOC, + .op_version = GD_OP_VERSION_3_7_12, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.full-lock", + .voltype = "cluster/replicate", + .type = NO_DOC, + .op_version = GD_OP_VERSION_3_13_2, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.optimistic-change-log", + .voltype = "cluster/replicate", + .type = NO_DOC, + .op_version = GD_OP_VERSION_7_2, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + + /* IO-stats xlator options */ + {.key = VKEY_DIAG_LAT_MEASUREMENT, + .voltype = "debug/io-stats", + .option = "latency-measurement", + .value = "off", + .op_version = 1}, + {.key = "diagnostics.dump-fd-stats", + .voltype = "debug/io-stats", + .op_version = 1}, + {.key = VKEY_DIAG_CNT_FOP_HITS, + .voltype = "debug/io-stats", + .option = "count-fop-hits", + .value = "off", + .type = NO_DOC, + .op_version = 1}, + {.key = "diagnostics.brick-log-level", + .voltype = "debug/io-stats", + .value = "INFO", + .option = "!brick-log-level", + .op_version = 1}, + {.key = "diagnostics.client-log-level", + .voltype = "debug/io-stats", + .value = "INFO", + .option = "!client-log-level", + .op_version = 1, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "diagnostics.brick-sys-log-level", + .voltype = "debug/io-stats", + .option = "!sys-log-level", + .op_version = 1}, + {.key = "diagnostics.client-sys-log-level", + .voltype = "debug/io-stats", + .option = "!sys-log-level", + .op_version = 1, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + { + .key = "diagnostics.brick-logger", + .voltype = "debug/io-stats", + .option = "!logger", + .op_version = GD_OP_VERSION_3_6_0, + }, + {.key = "diagnostics.client-logger", + .voltype = "debug/io-stats", + .option = "!logger", + .op_version = GD_OP_VERSION_3_6_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + { + .key = "diagnostics.brick-log-format", + .voltype = "debug/io-stats", + .option = "!log-format", + .op_version = GD_OP_VERSION_3_6_0, + }, + {.key = "diagnostics.client-log-format", + .voltype = "debug/io-stats", + .option = "!log-format", + .op_version = GD_OP_VERSION_3_6_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + { + .key = "diagnostics.brick-log-buf-size", + .voltype = "debug/io-stats", + .option = "!log-buf-size", + .op_version = GD_OP_VERSION_3_6_0, + }, + {.key = "diagnostics.client-log-buf-size", + .voltype = "debug/io-stats", + .option = "!log-buf-size", + .op_version = GD_OP_VERSION_3_6_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + { + .key = "diagnostics.brick-log-flush-timeout", + .voltype = "debug/io-stats", + .option = "!log-flush-timeout", + .op_version = GD_OP_VERSION_3_6_0, + }, + {.key = "diagnostics.client-log-flush-timeout", + .voltype = "debug/io-stats", + .option = "!log-flush-timeout", + .op_version = GD_OP_VERSION_3_6_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "diagnostics.stats-dump-interval", + .voltype = "debug/io-stats", + .option = "ios-dump-interval", + .op_version = 1}, + {.key = "diagnostics.fop-sample-interval", + .voltype = "debug/io-stats", + .option = "ios-sample-interval", + .op_version = 1}, + { + .key = "diagnostics.stats-dump-format", + .voltype = "debug/io-stats", + .option = "ios-dump-format", + .op_version = GD_OP_VERSION_3_12_0, + }, + {.key = "diagnostics.fop-sample-buf-size", + .voltype = "debug/io-stats", + .option = "ios-sample-buf-size", + .op_version = 1}, + {.key = "diagnostics.stats-dnscache-ttl-sec", + .voltype = "debug/io-stats", + .option = "ios-dnscache-ttl-sec", + .op_version = 1}, + + /* IO-cache xlator options */ + {.key = "performance.cache-max-file-size", + .voltype = "performance/io-cache", + .option = "max-file-size", + .op_version = 1, + .validate_fn = validate_cache_max_min_size, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "performance.cache-min-file-size", + .voltype = "performance/io-cache", + .option = "min-file-size", + .op_version = 1, + .validate_fn = validate_cache_max_min_size, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "performance.cache-refresh-timeout", + .voltype = "performance/io-cache", + .option = "cache-timeout", + .op_version = 1, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "performance.cache-priority", + .voltype = "performance/io-cache", + .option = "priority", + .op_version = 1, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "performance.io-cache-size", + .voltype = "performance/io-cache", + .option = "cache-size", + .op_version = GD_OP_VERSION_8_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + { + .key = "performance.cache-size", + .voltype = "performance/io-cache", + .op_version = 1, + .flags = VOLOPT_FLAG_CLIENT_OPT, + .description = "Deprecated option. Use performance.io-cache-size " + "to adjust the cache size of the io-cache translator, " + "and use performance.quick-read-cache-size to adjust " + "the cache size of the quick-read translator.", + }, + + /* IO-threads xlator options */ + {.key = "performance.io-thread-count", + .voltype = "performance/io-threads", + .option = "thread-count", + .op_version = 1}, + {.key = "performance.high-prio-threads", + .voltype = "performance/io-threads", + .op_version = 1}, + {.key = "performance.normal-prio-threads", + .voltype = "performance/io-threads", + .op_version = 1}, + {.key = "performance.low-prio-threads", + .voltype = "performance/io-threads", + .op_version = 1}, + {.key = "performance.least-prio-threads", + .voltype = "performance/io-threads", + .op_version = 1}, + {.key = "performance.enable-least-priority", + .voltype = "performance/io-threads", + .op_version = 1}, + {.key = "performance.iot-watchdog-secs", + .voltype = "performance/io-threads", + .option = "watchdog-secs", + .op_version = GD_OP_VERSION_4_1_0}, + {.key = "performance.iot-cleanup-disconnected-reqs", + .voltype = "performance/io-threads", + .option = "cleanup-disconnected-reqs", + .op_version = GD_OP_VERSION_4_1_0}, + {.key = "performance.iot-pass-through", + .voltype = "performance/io-threads", + .option = "pass-through", + .op_version = GD_OP_VERSION_4_1_0}, + + /* Other perf xlators' options */ + {.key = "performance.io-cache-pass-through", + .voltype = "performance/io-cache", + .option = "pass-through", + .op_version = GD_OP_VERSION_4_1_0}, + {.key = "performance.quick-read-cache-size", + .voltype = "performance/quick-read", + .option = "cache-size", + .op_version = GD_OP_VERSION_8_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "performance.cache-size", + .voltype = "performance/quick-read", + .type = NO_DOC, + .op_version = 1, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "performance.quick-read-cache-timeout", + .voltype = "performance/quick-read", + .option = "cache-timeout", + .op_version = GD_OP_VERSION_8_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "performance.qr-cache-timeout", + .voltype = "performance/quick-read", + .option = "cache-timeout", + .op_version = 1, + .flags = VOLOPT_FLAG_CLIENT_OPT, + .description = + "Deprecated option. Use performance.quick-read-cache-timeout " + "instead."}, + {.key = "performance.quick-read-cache-invalidation", + .voltype = "performance/quick-read", + .option = "quick-read-cache-invalidation", + .op_version = GD_OP_VERSION_4_0_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "performance.ctime-invalidation", + .voltype = "performance/quick-read", + .option = "ctime-invalidation", + .op_version = GD_OP_VERSION_5_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "performance.flush-behind", + .voltype = "performance/write-behind", + .option = "flush-behind", + .op_version = 1, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "performance.nfs.flush-behind", + .voltype = "performance/write-behind", + .option = "flush-behind", + .op_version = 1, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "performance.write-behind-window-size", + .voltype = "performance/write-behind", + .option = "cache-size", + .op_version = 1, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + { + .key = "performance.resync-failed-syncs-after-fsync", + .voltype = "performance/write-behind", + .option = "resync-failed-syncs-after-fsync", + .op_version = GD_OP_VERSION_3_7_7, + .flags = VOLOPT_FLAG_CLIENT_OPT, + .description = "If sync of \"cached-writes issued before fsync\" " + "(to backend) fails, this option configures whether " + "to retry syncing them after fsync or forget them. " + "If set to on, cached-writes are retried " + "till a \"flush\" fop (or a successful sync) on sync " + "failures. " + "fsync itself is failed irrespective of the value of " + "this option. ", + }, + {.key = "performance.nfs.write-behind-window-size", + .voltype = "performance/write-behind", + .option = "cache-size", + .op_version = 1, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "performance.strict-o-direct", + .voltype = "performance/write-behind", + .option = "strict-O_DIRECT", + .op_version = 2, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "performance.nfs.strict-o-direct", + .voltype = "performance/write-behind", + .option = "strict-O_DIRECT", + .op_version = 2, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "performance.strict-write-ordering", + .voltype = "performance/write-behind", + .option = "strict-write-ordering", + .op_version = 2, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "performance.nfs.strict-write-ordering", + .voltype = "performance/write-behind", + .option = "strict-write-ordering", + .op_version = 2, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "performance.write-behind-trickling-writes", + .voltype = "performance/write-behind", + .option = "trickling-writes", + .op_version = GD_OP_VERSION_3_13_1, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "performance.aggregate-size", + .voltype = "performance/write-behind", + .option = "aggregate-size", + .op_version = GD_OP_VERSION_4_1_0, + .flags = OPT_FLAG_CLIENT_OPT}, + {.key = "performance.nfs.write-behind-trickling-writes", + .voltype = "performance/write-behind", + .option = "trickling-writes", + .op_version = GD_OP_VERSION_3_13_1, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "performance.lazy-open", + .voltype = "performance/open-behind", + .option = "lazy-open", + .op_version = 3, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "performance.read-after-open", + .voltype = "performance/open-behind", + .option = "read-after-open", + .op_version = 3, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + { + .key = "performance.open-behind-pass-through", + .voltype = "performance/open-behind", + .option = "pass-through", + .op_version = GD_OP_VERSION_4_1_0, + }, + {.key = "performance.read-ahead-page-count", + .voltype = "performance/read-ahead", + .option = "page-count", + .op_version = 1, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + { + .key = "performance.read-ahead-pass-through", + .voltype = "performance/read-ahead", + .option = "pass-through", + .op_version = GD_OP_VERSION_4_1_0, + }, + { + .key = "performance.readdir-ahead-pass-through", + .voltype = "performance/readdir-ahead", + .option = "pass-through", + .op_version = GD_OP_VERSION_4_1_0, + }, + {.key = "performance.md-cache-pass-through", + .voltype = "performance/md-cache", + .option = "pass-through", + .op_version = GD_OP_VERSION_4_1_0}, + {.key = "performance.md-cache-timeout", + .voltype = "performance/md-cache", + .option = "md-cache-timeout", + .op_version = 2, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "performance.cache-swift-metadata", + .voltype = "performance/md-cache", + .option = "cache-swift-metadata", + .op_version = GD_OP_VERSION_3_7_10, + .description = "Cache swift metadata (user.swift.metadata xattr)", + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "performance.cache-samba-metadata", + .voltype = "performance/md-cache", + .option = "cache-samba-metadata", + .op_version = GD_OP_VERSION_3_9_0, + .description = "Cache samba metadata (user.DOSATTRIB, security.NTACL" + " xattr)", + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "performance.cache-capability-xattrs", + .voltype = "performance/md-cache", + .option = "cache-capability-xattrs", + .op_version = GD_OP_VERSION_3_10_0, + .description = "Cache xattrs required for capability based security", + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "performance.cache-ima-xattrs", + .voltype = "performance/md-cache", + .option = "cache-ima-xattrs", + .op_version = GD_OP_VERSION_3_10_0, + .description = "Cache xattrs required for IMA " + "(Integrity Measurement Architecture)", + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "performance.md-cache-statfs", + .voltype = "performance/md-cache", + .option = "md-cache-statfs", + .op_version = GD_OP_VERSION_4_0_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "performance.xattr-cache-list", + .voltype = "performance/md-cache", + .option = "xattr-cache-list", + .op_version = GD_OP_VERSION_4_0_0, + .flags = VOLOPT_FLAG_CLIENT_OPT, + .description = "A comma separated list of xattrs that shall be " + "cached by md-cache. The only wildcard allowed is '*'"}, + {.key = "performance.nl-cache-pass-through", + .voltype = "performance/nl-cache", + .option = "pass-through", + .op_version = GD_OP_VERSION_4_1_0}, + + /* Client xlator options */ + {.key = "network.frame-timeout", + .voltype = "protocol/client", + .op_version = 1, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "network.ping-timeout", + .voltype = "protocol/client", + .op_version = 1, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "network.tcp-window-size", + .voltype = "protocol/client", + .op_version = 1, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "client.ssl", + .voltype = "protocol/client", + .option = "transport.socket.ssl-enabled", + .value = "off", + .op_version = 2, + .description = "enable/disable client.ssl flag in the " + "volume.", + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "network.remote-dio", + .voltype = "protocol/client", + .option = "filter-O_DIRECT", + .op_version = 2, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + { + .key = "client.own-thread", + .voltype = "protocol/client", + .option = "transport.socket.own-thread", + .type = NO_DOC, + .op_version = GD_OP_VERSION_3_7_0, + }, + { + .key = "client.event-threads", + .voltype = "protocol/client", + .op_version = GD_OP_VERSION_3_7_0, + }, + {.key = "client.tcp-user-timeout", + .voltype = "protocol/client", + .option = "transport.tcp-user-timeout", + .op_version = GD_OP_VERSION_3_10_2, + .value = "0", /* 0 - implies "use system default" */ + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "client.keepalive-time", + .voltype = "protocol/client", + .option = "transport.socket.keepalive-time", + .op_version = GD_OP_VERSION_3_10_2, + .value = "20", + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "client.keepalive-interval", + .voltype = "protocol/client", + .option = "transport.socket.keepalive-interval", + .op_version = GD_OP_VERSION_3_10_2, + .value = "2", + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "client.keepalive-count", + .voltype = "protocol/client", + .option = "transport.socket.keepalive-count", + .op_version = GD_OP_VERSION_3_10_2, + .value = "9", + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "client.strict-locks", + .voltype = "protocol/client", + .option = "strict-locks", + .value = "off", + .op_version = GD_OP_VERSION_8_0, + .validate_fn = validate_boolean, + .type = GLOBAL_DOC, + .description = "When set, doesn't reopen saved fds after reconnect " + "if POSIX locks are held on them. Hence subsequent " + "operations on these fds will fail. This is " + "necessary for stricter lock complaince as bricks " + "cleanup any granted locks when a client " + "disconnects."}, + + /* Although the following option is named ta-remote-port but it will be + * added as remote-port in client volfile for ta-bricks only. + */ + {.key = "client.ta-brick-port", + .voltype = "protocol/client", + .option = "ta-remote-port", + .op_version = GD_OP_VERSION_7_0}, + + /* Server xlator options */ + {.key = "network.tcp-window-size", + .voltype = "protocol/server", + .type = NO_DOC, + .op_version = 1}, + {.key = "network.inode-lru-limit", + .voltype = "protocol/server", + .op_version = 1}, + {.key = AUTH_ALLOW_MAP_KEY, + .voltype = "protocol/server", + .option = "!server-auth", + .value = "*", + .op_version = 1}, + {.key = AUTH_REJECT_MAP_KEY, + .voltype = "protocol/server", + .option = "!server-auth", + .op_version = 1}, + {.key = "transport.keepalive", + .voltype = "protocol/server", + .option = "transport.socket.keepalive", + .type = NO_DOC, + .value = "1", + .op_version = 1}, + {.key = "server.allow-insecure", + .voltype = "protocol/server", + .option = "rpc-auth-allow-insecure", + .type = DOC, + .op_version = 1}, + {.key = "server.root-squash", + .voltype = "protocol/server", + .option = "root-squash", + .op_version = 2}, + {.key = "server.all-squash", + .voltype = "protocol/server", + .option = "all-squash", + .op_version = GD_OP_VERSION_6_0}, + {.key = "server.anonuid", + .voltype = "protocol/server", + .option = "anonuid", + .op_version = 3}, + {.key = "server.anongid", + .voltype = "protocol/server", + .option = "anongid", + .op_version = 3}, + {.key = "server.statedump-path", + .voltype = "protocol/server", + .option = "statedump-path", + .op_version = 1, + .validate_fn = validate_statedump_path}, + {.key = "server.outstanding-rpc-limit", + .voltype = "protocol/server", + .option = "rpc.outstanding-rpc-limit", + .type = GLOBAL_DOC, + .op_version = 3}, + {.key = "server.ssl", + .voltype = "protocol/server", + .value = "off", + .option = "transport.socket.ssl-enabled", + .description = "enable/disable server.ssl flag in the " + "volume.", + .op_version = 2}, + { + .key = "auth.ssl-allow", + .voltype = "protocol/server", + .option = "!ssl-allow", + .value = "*", + .type = DOC, + .description = "Allow a comma separated list of common names (CN) of " + "the clients that are allowed to access the server." + "By default, all TLS authenticated clients are " + "allowed to access the server.", + .op_version = GD_OP_VERSION_3_6_0, + }, + { + .key = "server.manage-gids", + .voltype = "protocol/server", + .op_version = GD_OP_VERSION_3_6_0, + }, + { + .key = "server.dynamic-auth", + .voltype = "protocol/server", + .op_version = GD_OP_VERSION_3_7_5, + }, + { + .key = "client.send-gids", + .voltype = "protocol/client", + .type = NO_DOC, + .op_version = GD_OP_VERSION_3_6_0, + }, + { + .key = "server.gid-timeout", + .voltype = "protocol/server", + .op_version = GD_OP_VERSION_3_6_0, + }, + { + .key = "server.own-thread", + .voltype = "protocol/server", + .option = "transport.socket.own-thread", + .type = NO_DOC, + .op_version = GD_OP_VERSION_3_7_0, + }, + { + .key = "server.event-threads", + .voltype = "protocol/server", + .op_version = GD_OP_VERSION_3_7_0, + }, + { + .key = "server.tcp-user-timeout", + .voltype = "protocol/server", + .option = "transport.tcp-user-timeout", + .op_version = GD_OP_VERSION_3_10_2, + }, + { + .key = "server.keepalive-time", + .voltype = "protocol/server", + .option = "transport.socket.keepalive-time", + .op_version = GD_OP_VERSION_3_10_2, + .value = "20", + }, + { + .key = "server.keepalive-interval", + .voltype = "protocol/server", + .option = "transport.socket.keepalive-interval", + .op_version = GD_OP_VERSION_3_10_2, + .value = "2", + }, + { + .key = "server.keepalive-count", + .voltype = "protocol/server", + .option = "transport.socket.keepalive-count", + .op_version = GD_OP_VERSION_3_10_2, + .value = "9", + }, + { + .key = "transport.listen-backlog", + .voltype = "protocol/server", + .option = "transport.listen-backlog", + .op_version = GD_OP_VERSION_3_11_1, + .validate_fn = validate_server_options, + .description = "This option uses the value of backlog argument that " + "defines the maximum length to which the queue of " + "pending connections for socket fd may grow.", + .value = "1024", + }, + + /* Generic transport options */ + { + .key = SSL_OWN_CERT_OPT, + .voltype = "rpc-transport/socket", + .option = "!ssl-own-cert", + .op_version = GD_OP_VERSION_3_7_4, + }, + { + .key = SSL_PRIVATE_KEY_OPT, + .voltype = "rpc-transport/socket", + .option = "!ssl-private-key", + .op_version = GD_OP_VERSION_3_7_4, + }, + { + .key = SSL_CA_LIST_OPT, + .voltype = "rpc-transport/socket", + .option = "!ssl-ca-list", + .op_version = GD_OP_VERSION_3_7_4, + }, + { + .key = SSL_CRL_PATH_OPT, + .voltype = "rpc-transport/socket", + .option = "!ssl-crl-path", + .op_version = GD_OP_VERSION_3_7_4, + }, + { + .key = SSL_CERT_DEPTH_OPT, + .voltype = "rpc-transport/socket", + .option = "!ssl-cert-depth", + .op_version = GD_OP_VERSION_3_6_0, + }, + { + .key = SSL_CIPHER_LIST_OPT, + .voltype = "rpc-transport/socket", + .option = "!ssl-cipher-list", + .op_version = GD_OP_VERSION_3_6_0, + }, + { + .key = SSL_DH_PARAM_OPT, + .voltype = "rpc-transport/socket", + .option = "!ssl-dh-param", + .op_version = GD_OP_VERSION_3_7_4, + }, + { + .key = SSL_EC_CURVE_OPT, + .voltype = "rpc-transport/socket", + .option = "!ssl-ec-curve", + .op_version = GD_OP_VERSION_3_7_4, + }, + { + .key = "transport.address-family", + .voltype = "protocol/server", + .option = "!address-family", + .op_version = GD_OP_VERSION_3_7_4, + .type = NO_DOC, + }, + + /* Performance xlators enable/disbable options */ + {.key = "performance.write-behind", + .voltype = "performance/write-behind", + .option = "!perf", + .value = "on", + .op_version = 1, + .description = "enable/disable write-behind translator in the " + "volume.", + .flags = VOLOPT_FLAG_CLIENT_OPT | VOLOPT_FLAG_XLATOR_OPT}, + {.key = "performance.read-ahead", + .voltype = "performance/read-ahead", + .option = "!perf", + .value = "off", + .op_version = 1, + .description = "enable/disable read-ahead translator in the volume.", + .flags = VOLOPT_FLAG_CLIENT_OPT | VOLOPT_FLAG_XLATOR_OPT}, + {.key = "performance.readdir-ahead", + .voltype = "performance/readdir-ahead", + .option = "!perf", + .value = "off", + .op_version = 3, + .description = "enable/disable readdir-ahead translator in the volume.", + .flags = VOLOPT_FLAG_CLIENT_OPT | VOLOPT_FLAG_XLATOR_OPT}, + {.key = "performance.io-cache", + .voltype = "performance/io-cache", + .option = "!perf", + .value = "off", + .op_version = 1, + .description = "enable/disable io-cache translator in the volume.", + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "performance.open-behind", + .voltype = "performance/open-behind", + .option = "!perf", + .value = "on", + .op_version = 2, + .description = "enable/disable open-behind translator in the volume.", + .flags = VOLOPT_FLAG_CLIENT_OPT | VOLOPT_FLAG_XLATOR_OPT + + }, + {.key = "performance.quick-read", + .voltype = "performance/quick-read", + .option = "!perf", + .value = "on", + .op_version = 1, + .description = "enable/disable quick-read translator in the volume.", + .flags = VOLOPT_FLAG_CLIENT_OPT | VOLOPT_FLAG_XLATOR_OPT}, + {.key = "performance.nl-cache", + .voltype = "performance/nl-cache", + .option = "!perf", + .value = "off", + .op_version = GD_OP_VERSION_3_11_0, + .description = "enable/disable negative entry caching translator in " + "the volume. Enabling this option improves performance" + " of 'create file/directory' workload", + .flags = VOLOPT_FLAG_CLIENT_OPT | VOLOPT_FLAG_XLATOR_OPT}, + {.key = "performance.stat-prefetch", + .voltype = "performance/md-cache", + .option = "!perf", + .value = "on", + .op_version = 1, + .description = "enable/disable meta-data caching translator in the " + "volume.", + .flags = VOLOPT_FLAG_CLIENT_OPT | VOLOPT_FLAG_XLATOR_OPT}, + {.key = "performance.client-io-threads", + .voltype = "performance/io-threads", + .option = "!perf", + .value = "on", + .op_version = 1, + .description = "enable/disable io-threads translator in the client " + "graph of volume.", + .flags = VOLOPT_FLAG_CLIENT_OPT | VOLOPT_FLAG_XLATOR_OPT}, + {.key = "performance.nfs.write-behind", + .voltype = "performance/write-behind", + .option = "!nfsperf", + .value = "on", + .op_version = 1, + .description = "enable/disable write-behind translator in the volume", + .flags = VOLOPT_FLAG_XLATOR_OPT}, + {.key = "performance.nfs.read-ahead", + .voltype = "performance/read-ahead", + .option = "!nfsperf", + .value = "off", + .type = NO_DOC, + .op_version = 1, + .flags = VOLOPT_FLAG_XLATOR_OPT}, + {.key = "performance.nfs.io-cache", + .voltype = "performance/io-cache", + .option = "!nfsperf", + .value = "off", + .type = NO_DOC, + .op_version = 1, + .flags = VOLOPT_FLAG_XLATOR_OPT}, + {.key = "performance.nfs.quick-read", + .voltype = "performance/quick-read", + .option = "!nfsperf", + .value = "off", + .type = NO_DOC, + .op_version = 1, + .flags = VOLOPT_FLAG_XLATOR_OPT}, + {.key = "performance.nfs.stat-prefetch", + .voltype = "performance/md-cache", + .option = "!nfsperf", + .value = "off", + .type = NO_DOC, + .op_version = 1, + .flags = VOLOPT_FLAG_XLATOR_OPT}, + {.key = "performance.nfs.io-threads", + .voltype = "performance/io-threads", + .option = "!nfsperf", + .value = "off", + .type = NO_DOC, + .op_version = 1, + .flags = VOLOPT_FLAG_XLATOR_OPT}, + {.key = "performance.force-readdirp", + .voltype = "performance/md-cache", + .option = "force-readdirp", + .op_version = 2, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "performance.cache-invalidation", + .voltype = "performance/md-cache", + .option = "cache-invalidation", + .op_version = GD_OP_VERSION_3_9_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + + {.key = "performance.global-cache-invalidation", + .voltype = "performance/md-cache", + .option = "global-cache-invalidation", + .op_version = GD_OP_VERSION_6_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + + /* Feature translators */ + {.key = "features.uss", + .voltype = "features/snapview-server", + .op_version = GD_OP_VERSION_3_6_0, + .value = "off", + .flags = VOLOPT_FLAG_CLIENT_OPT | VOLOPT_FLAG_XLATOR_OPT, + .validate_fn = validate_uss, + .description = "enable/disable User Serviceable Snapshots on the " + "volume."}, + + {.key = "features.snapshot-directory", + .voltype = "features/snapview-client", + .op_version = GD_OP_VERSION_3_6_0, + .value = ".snaps", + .flags = VOLOPT_FLAG_CLIENT_OPT | VOLOPT_FLAG_XLATOR_OPT, + .validate_fn = validate_uss_dir, + .description = "Entry point directory for entering snapshot world. " + "Value can have only [0-9a-z-_] and starts with " + "dot (.) and cannot exceed 255 character"}, + + {.key = "features.show-snapshot-directory", + .voltype = "features/snapview-client", + .op_version = GD_OP_VERSION_3_6_0, + .value = "off", + .flags = VOLOPT_FLAG_CLIENT_OPT | VOLOPT_FLAG_XLATOR_OPT, + .description = "show entry point in readdir output of " + "snapdir-entry-path which is set by samba"}, + + {.key = "features.tag-namespaces", + .voltype = "features/namespace", + .op_version = GD_OP_VERSION_4_1_0, + .option = "tag-namespaces", + .value = "off", + .flags = OPT_FLAG_CLIENT_OPT, + .description = "This option enables this translator's functionality " + "that tags every fop with a namespace hash for later " + "throttling, stats collection, logging, etc."}, + +#ifdef HAVE_LIB_Z + /* Compressor-decompressor xlator options + * defaults used from xlator/features/compress/src/cdc.h + */ + {.key = "network.compression", + .voltype = "features/cdc", + .option = "!feat", + .value = "off", + .op_version = 3, + .description = "enable/disable network compression translator", + .flags = VOLOPT_FLAG_XLATOR_OPT}, + {.key = "network.compression.window-size", + .voltype = "features/cdc", + .option = "window-size", + .op_version = 3}, + {.key = "network.compression.mem-level", + .voltype = "features/cdc", + .option = "mem-level", + .op_version = 3}, + {.key = "network.compression.min-size", + .voltype = "features/cdc", + .option = "min-size", + .op_version = 3}, + {.key = "network.compression.compression-level", + .voltype = "features/cdc", + .option = "compression-level", + .op_version = 3}, + {.key = "network.compression.debug", + .voltype = "features/cdc", + .option = "debug", + .type = NO_DOC, + .op_version = 3}, +#endif + + /* Quota xlator options */ + { + .key = VKEY_FEATURES_LIMIT_USAGE, + .voltype = "features/quota", + .option = "limit-set", + .type = NO_DOC, + .op_version = 1, + }, + { + .key = "features.default-soft-limit", + .voltype = "features/quota", + .option = "default-soft-limit", + .type = NO_DOC, + .op_version = 3, + }, + { + .key = "features.soft-timeout", + .voltype = "features/quota", + .option = "soft-timeout", + .type = NO_DOC, + .op_version = 3, + }, + { + .key = "features.hard-timeout", + .voltype = "features/quota", + .option = "hard-timeout", + .type = NO_DOC, + .op_version = 3, + }, + { + .key = "features.alert-time", + .voltype = "features/quota", + .option = "alert-time", + .type = NO_DOC, + .op_version = 3, + }, + { + .key = "features.quota-deem-statfs", + .voltype = "features/quota", + .option = "deem-statfs", + .value = "off", + .type = DOC, + .op_version = 2, + .validate_fn = validate_quota, + }, + + /* Marker xlator options */ + {.key = VKEY_MARKER_XTIME, + .voltype = "features/marker", + .option = "xtime", + .value = "off", + .type = NO_DOC, + .flags = VOLOPT_FLAG_FORCE, + .op_version = 1}, + {.key = VKEY_MARKER_XTIME, + .voltype = "features/marker", + .option = "!xtime", + .value = "off", + .type = NO_DOC, + .flags = VOLOPT_FLAG_FORCE, + .op_version = 1}, + {.key = VKEY_MARKER_XTIME_FORCE, + .voltype = "features/marker", + .option = "gsync-force-xtime", + .value = "off", + .type = NO_DOC, + .flags = VOLOPT_FLAG_FORCE, + .op_version = 2}, + {.key = VKEY_MARKER_XTIME_FORCE, + .voltype = "features/marker", + .option = "!gsync-force-xtime", + .value = "off", + .type = NO_DOC, + .flags = VOLOPT_FLAG_FORCE, + .op_version = 2}, + {.key = VKEY_FEATURES_QUOTA, + .voltype = "features/marker", + .option = "quota", + .value = "off", + .type = NO_DOC, + .flags = VOLOPT_FLAG_NEVER_RESET, + .op_version = 1}, + {.key = VKEY_FEATURES_INODE_QUOTA, + .voltype = "features/marker", + .option = "inode-quota", + .value = "off", + .type = NO_DOC, + .flags = VOLOPT_FLAG_NEVER_RESET, + .op_version = 1}, + {.key = VKEY_FEATURES_BITROT, + .voltype = "features/bit-rot", + .option = "bitrot", + .value = "disable", + .type = NO_DOC, + .flags = VOLOPT_FLAG_FORCE, + .op_version = GD_OP_VERSION_3_7_0}, + + /* Debug xlators options */ + {.key = "debug.trace", + .voltype = "debug/trace", + .option = "!debug", + .value = "off", + .type = NO_DOC, + .op_version = 1, + .flags = VOLOPT_FLAG_XLATOR_OPT}, + {.key = "debug.log-history", + .voltype = "debug/trace", + .option = "log-history", + .type = NO_DOC, + .op_version = 2}, + {.key = "debug.log-file", + .voltype = "debug/trace", + .option = "log-file", + .type = NO_DOC, + .op_version = 2}, + {.key = "debug.exclude-ops", + .voltype = "debug/trace", + .option = "exclude-ops", + .type = NO_DOC, + .op_version = 2}, + {.key = "debug.include-ops", + .voltype = "debug/trace", + .option = "include-ops", + .type = NO_DOC, + .op_version = 2}, + {.key = "debug.error-gen", + .voltype = "debug/error-gen", + .option = "!debug", + .value = "off", + .type = NO_DOC, + .op_version = 1, + .flags = VOLOPT_FLAG_XLATOR_OPT}, + {.key = "debug.error-failure", + .voltype = "debug/error-gen", + .option = "failure", + .type = NO_DOC, + .op_version = 3}, + {.key = "debug.error-number", + .voltype = "debug/error-gen", + .option = "error-no", + .type = NO_DOC, + .op_version = 3}, + {.key = "debug.random-failure", + .voltype = "debug/error-gen", + .option = "random-failure", + .type = NO_DOC, + .op_version = 3}, + {.key = "debug.error-fops", + .voltype = "debug/error-gen", + .option = "enable", + .type = NO_DOC, + .op_version = 3}, + + /* NFS xlator options */ + {.key = "nfs.enable-ino32", + .voltype = "nfs/server", + .option = "nfs.enable-ino32", + .type = GLOBAL_DOC, + .op_version = 1}, + {.key = "nfs.mem-factor", + .voltype = "nfs/server", + .option = "nfs.mem-factor", + .type = GLOBAL_DOC, + .op_version = 1}, + {.key = "nfs.export-dirs", + .voltype = "nfs/server", + .option = "nfs3.export-dirs", + .type = GLOBAL_DOC, + .op_version = 1}, + {.key = "nfs.export-volumes", + .voltype = "nfs/server", + .option = "nfs3.export-volumes", + .type = GLOBAL_DOC, + .op_version = 1}, + {.key = "nfs.addr-namelookup", + .voltype = "nfs/server", + .option = "rpc-auth.addr.namelookup", + .type = GLOBAL_DOC, + .op_version = 1}, + {.key = "nfs.dynamic-volumes", + .voltype = "nfs/server", + .option = "nfs.dynamic-volumes", + .type = GLOBAL_DOC, + .op_version = 1}, + {.key = "nfs.register-with-portmap", + .voltype = "nfs/server", + .option = "rpc.register-with-portmap", + .type = GLOBAL_DOC, + .op_version = 1}, + {.key = "nfs.outstanding-rpc-limit", + .voltype = "nfs/server", + .option = "rpc.outstanding-rpc-limit", + .type = GLOBAL_DOC, + .op_version = 3}, + {.key = "nfs.port", + .voltype = "nfs/server", + .option = "nfs.port", + .type = GLOBAL_DOC, + .op_version = 1}, + {.key = "nfs.rpc-auth-unix", + .voltype = "nfs/server", + .option = "!rpc-auth.auth-unix.*", + .op_version = 1}, + {.key = "nfs.rpc-auth-null", + .voltype = "nfs/server", + .option = "!rpc-auth.auth-null.*", + .op_version = 1}, + {.key = "nfs.rpc-auth-allow", + .voltype = "nfs/server", + .option = "!rpc-auth.addr.*.allow", + .op_version = 1}, + {.key = "nfs.rpc-auth-reject", + .voltype = "nfs/server", + .option = "!rpc-auth.addr.*.reject", + .op_version = 1}, + {.key = "nfs.ports-insecure", + .voltype = "nfs/server", + .option = "!rpc-auth.ports.*.insecure", + .op_version = 1}, + {.key = "nfs.transport-type", + .voltype = "nfs/server", + .option = "!nfs.transport-type", + .op_version = 1, + .description = "Specifies the nfs transport type. Valid " + "transport types are 'tcp' and 'rdma'."}, + {.key = "nfs.trusted-sync", + .voltype = "nfs/server", + .option = "!nfs3.*.trusted-sync", + .op_version = 1}, + {.key = "nfs.trusted-write", + .voltype = "nfs/server", + .option = "!nfs3.*.trusted-write", + .op_version = 1}, + {.key = "nfs.volume-access", + .voltype = "nfs/server", + .option = "!nfs3.*.volume-access", + .op_version = 1}, + {.key = "nfs.export-dir", + .voltype = "nfs/server", + .option = "!nfs3.*.export-dir", + .op_version = 1}, + {.key = NFS_DISABLE_MAP_KEY, + .voltype = "nfs/server", + .option = "!nfs-disable", + .value = SITE_H_NFS_DISABLE, + .op_version = 1}, + {.key = "nfs.nlm", + .voltype = "nfs/server", + .option = "nfs.nlm", + .type = GLOBAL_DOC, + .op_version = 1}, + {.key = "nfs.acl", + .voltype = "nfs/server", + .option = "nfs.acl", + .type = GLOBAL_DOC, + .op_version = 3}, + {.key = "nfs.mount-udp", + .voltype = "nfs/server", + .option = "nfs.mount-udp", + .type = GLOBAL_DOC, + .op_version = 1}, + {.key = "nfs.mount-rmtab", + .voltype = "nfs/server", + .option = "nfs.mount-rmtab", + .type = GLOBAL_DOC, + .op_version = 1}, + { + .key = "nfs.rpc-statd", + .voltype = "nfs/server", + .option = "nfs.rpc-statd", + .type = NO_DOC, + .op_version = GD_OP_VERSION_3_6_0, + }, + { + .key = "nfs.log-level", + .voltype = "nfs/server", + .option = "nfs.log-level", + .type = NO_DOC, + .op_version = GD_OP_VERSION_3_6_0, + }, + {.key = "nfs.server-aux-gids", + .voltype = "nfs/server", + .option = "nfs.server-aux-gids", + .type = NO_DOC, + .op_version = 2}, + {.key = "nfs.drc", + .voltype = "nfs/server", + .option = "nfs.drc", + .type = GLOBAL_DOC, + .op_version = 3}, + {.key = "nfs.drc-size", + .voltype = "nfs/server", + .option = "nfs.drc-size", + .type = GLOBAL_DOC, + .op_version = 3}, + {.key = "nfs.read-size", + .voltype = "nfs/server", + .option = "nfs3.read-size", + .type = GLOBAL_DOC, + .op_version = 3}, + {.key = "nfs.write-size", + .voltype = "nfs/server", + .option = "nfs3.write-size", + .type = GLOBAL_DOC, + .op_version = 3}, + {.key = "nfs.readdir-size", + .voltype = "nfs/server", + .option = "nfs3.readdir-size", + .type = GLOBAL_DOC, + .op_version = 3}, + {.key = "nfs.rdirplus", + .voltype = "nfs/server", + .option = "nfs.rdirplus", + .type = GLOBAL_DOC, + .op_version = GD_OP_VERSION_3_7_12, + .description = "When this option is set to off NFS falls back to " + "standard readdir instead of readdirp"}, + { + .key = "nfs.event-threads", + .voltype = "nfs/server", + .option = "nfs.event-threads", + .type = NO_DOC, + .op_version = GD_OP_VERSION_4_0_0, + }, + + /* Cli options for Export authentication on nfs mount */ + {.key = "nfs.exports-auth-enable", + .voltype = "nfs/server", + .option = "nfs.exports-auth-enable", + .type = GLOBAL_DOC, + .op_version = GD_OP_VERSION_3_7_0}, + {.key = "nfs.auth-refresh-interval-sec", + .voltype = "nfs/server", + .option = "nfs.auth-refresh-interval-sec", + .type = GLOBAL_DOC, + .op_version = GD_OP_VERSION_3_7_0}, + {.key = "nfs.auth-cache-ttl-sec", + .voltype = "nfs/server", + .option = "nfs.auth-cache-ttl-sec", + .type = GLOBAL_DOC, + .op_version = GD_OP_VERSION_3_7_0}, + + /* Other options which don't fit any place above */ + {.key = "features.read-only", + .voltype = "features/read-only", + .option = "read-only", + .op_version = 1, + .flags = VOLOPT_FLAG_CLIENT_OPT | VOLOPT_FLAG_XLATOR_OPT}, + {.key = "features.worm", + .voltype = "features/worm", + .option = "worm", + .value = "off", + .validate_fn = validate_boolean, + .op_version = 2, + .flags = VOLOPT_FLAG_CLIENT_OPT | VOLOPT_FLAG_XLATOR_OPT}, + {.key = "features.worm-file-level", + .voltype = "features/worm", + .option = "worm-file-level", + .value = "off", + .validate_fn = validate_boolean, + .op_version = GD_OP_VERSION_3_8_0, + .flags = VOLOPT_FLAG_CLIENT_OPT | VOLOPT_FLAG_XLATOR_OPT}, + {.key = "features.worm-files-deletable", + .voltype = "features/worm", + .option = "worm-files-deletable", + .value = "on", + .validate_fn = validate_boolean, + .op_version = GD_OP_VERSION_3_13_0, + .flags = VOLOPT_FLAG_CLIENT_OPT | VOLOPT_FLAG_XLATOR_OPT}, + { + .key = "features.default-retention-period", + .voltype = "features/worm", + .option = "default-retention-period", + .validate_fn = validate_worm_period, + .op_version = GD_OP_VERSION_3_8_0, + }, + { + .key = "features.retention-mode", + .voltype = "features/worm", + .option = "retention-mode", + .validate_fn = validate_reten_mode, + .op_version = GD_OP_VERSION_3_8_0, + }, + { + .key = "features.auto-commit-period", + .voltype = "features/worm", + .option = "auto-commit-period", + .validate_fn = validate_worm_period, + .op_version = GD_OP_VERSION_3_8_0, + }, + {.key = "storage.linux-aio", .voltype = "storage/posix", .op_version = 1}, + {.key = "storage.batch-fsync-mode", + .voltype = "storage/posix", + .op_version = 3}, + {.key = "storage.batch-fsync-delay-usec", + .voltype = "storage/posix", + .op_version = 3}, + { + .key = "storage.xattr-user-namespace-mode", + .voltype = "storage/posix", + .op_version = GD_OP_VERSION_3_6_0, + }, + {.key = "storage.owner-uid", + .voltype = "storage/posix", + .option = "brick-uid", + .op_version = 1}, + {.key = "storage.owner-gid", + .voltype = "storage/posix", + .option = "brick-gid", + .op_version = 1}, + {.key = "storage.node-uuid-pathinfo", + .voltype = "storage/posix", + .op_version = 3}, + {.key = "storage.health-check-interval", + .voltype = "storage/posix", + .op_version = 3}, + { + .option = "update-link-count-parent", + .key = "storage.build-pgfid", + .voltype = "storage/posix", + .op_version = GD_OP_VERSION_3_6_0, + }, + { + .option = "gfid2path", + .key = "storage.gfid2path", + .type = NO_DOC, + .voltype = "storage/posix", + .op_version = GD_OP_VERSION_3_12_0, + }, + { + .option = "gfid2path-separator", + .key = "storage.gfid2path-separator", + .voltype = "storage/posix", + .op_version = GD_OP_VERSION_3_12_0, + }, + { + .key = "storage.reserve", + .voltype = "storage/posix", + .op_version = GD_OP_VERSION_3_13_0, + }, + { + .option = "health-check-timeout", + .key = "storage.health-check-timeout", + .type = NO_DOC, + .voltype = "storage/posix", + .op_version = GD_OP_VERSION_4_0_0, + }, + { + .option = "fips-mode-rchecksum", + .key = "storage.fips-mode-rchecksum", + .type = NO_DOC, + .voltype = "storage/posix", + .op_version = GD_OP_VERSION_4_0_0, + }, + { + .option = "force-create-mode", + .key = "storage.force-create-mode", + .voltype = "storage/posix", + .op_version = GD_OP_VERSION_4_0_0, + }, + { + .option = "force-directory-mode", + .key = "storage.force-directory-mode", + .voltype = "storage/posix", + .op_version = GD_OP_VERSION_4_0_0, + }, + { + .option = "create-mask", + .key = "storage.create-mask", + .voltype = "storage/posix", + .op_version = GD_OP_VERSION_4_0_0, + }, + { + .option = "create-directory-mask", + .key = "storage.create-directory-mask", + .voltype = "storage/posix", + .op_version = GD_OP_VERSION_4_0_0, + }, + { + .option = "max-hardlinks", + .key = "storage.max-hardlinks", + .voltype = "storage/posix", + .op_version = GD_OP_VERSION_4_0_0, + }, + { + .option = "ctime", + .key = "features.ctime", + .voltype = "storage/posix", + .op_version = GD_OP_VERSION_4_1_0, + }, + {.key = "config.memory-accounting", + .voltype = "mgmt/glusterd", + .option = "!config", + .op_version = 2, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "config.transport", + .voltype = "mgmt/glusterd", + .option = "!config", + .op_version = 2}, + {.key = VKEY_CONFIG_GFPROXY, + .voltype = "configuration", + .option = "gfproxyd", + .value = "off", + .type = DOC, + .op_version = GD_OP_VERSION_3_13_0, + .description = "If this option is enabled, the proxy client daemon " + "called gfproxyd will be started on all the trusted " + "storage pool nodes"}, + {.key = GLUSTERD_QUORUM_TYPE_KEY, + .voltype = "mgmt/glusterd", + .value = "off", + .op_version = 2}, + {.key = GLUSTERD_QUORUM_RATIO_KEY, + .voltype = "mgmt/glusterd", + .value = "51", + .op_version = 2}, + /* changelog translator - global tunables */ + {.key = "changelog.changelog", + .voltype = "features/changelog", + .type = NO_DOC, + .op_version = 3}, + {.key = "changelog.changelog-dir", + .voltype = "features/changelog", + .type = NO_DOC, + .op_version = 3}, + {.key = "changelog.encoding", + .voltype = "features/changelog", + .type = NO_DOC, + .op_version = 3}, + {.key = "changelog.rollover-time", + .voltype = "features/changelog", + .type = NO_DOC, + .op_version = 3}, + {.key = "changelog.fsync-interval", + .voltype = "features/changelog", + .type = NO_DOC, + .op_version = 3}, + { + .key = "changelog.changelog-barrier-timeout", + .voltype = "features/changelog", + .value = BARRIER_TIMEOUT, + .op_version = GD_OP_VERSION_3_6_0, + }, + {.key = "changelog.capture-del-path", + .voltype = "features/changelog", + .type = NO_DOC, + .op_version = 3}, + { + .key = "features.barrier", + .voltype = "features/barrier", + .value = "disable", + .type = NO_DOC, + .op_version = GD_OP_VERSION_3_7_0, + }, + { + .key = "features.barrier-timeout", + .voltype = "features/barrier", + .value = BARRIER_TIMEOUT, + .op_version = GD_OP_VERSION_3_6_0, + }, + { + .key = GLUSTERD_GLOBAL_OP_VERSION_KEY, + .voltype = "mgmt/glusterd", + .op_version = GD_OP_VERSION_3_6_0, + }, + { + .key = GLUSTERD_MAX_OP_VERSION_KEY, + .voltype = "mgmt/glusterd", + .op_version = GD_OP_VERSION_3_10_0, + }, + /*Trash translator options */ + { + .key = "features.trash", + .voltype = "features/trash", + .op_version = GD_OP_VERSION_3_7_0, + }, + { + .key = "features.trash-dir", + .voltype = "features/trash", + .op_version = GD_OP_VERSION_3_7_0, + }, + { + .key = "features.trash-eliminate-path", + .voltype = "features/trash", + .op_version = GD_OP_VERSION_3_7_0, + }, + { + .key = "features.trash-max-filesize", + .voltype = "features/trash", + .op_version = GD_OP_VERSION_3_7_0, + }, + { + .key = "features.trash-internal-op", + .voltype = "features/trash", + .op_version = GD_OP_VERSION_3_7_0, + }, + {.key = GLUSTERD_SHARED_STORAGE_KEY, + .voltype = "mgmt/glusterd", + .value = "disable", + .type = GLOBAL_DOC, + .op_version = GD_OP_VERSION_3_7_1, + .description = "Create and mount the shared storage volume" + "(gluster_shared_storage) at " + "/var/run/gluster/shared_storage on enabling this " + "option. Unmount and delete the shared storage volume " + " on disabling this option."}, + { + .key = "locks.trace", + .voltype = "features/locks", + .op_version = GD_OP_VERSION_3_7_0, + }, + { + .key = "locks.mandatory-locking", + .voltype = "features/locks", + .op_version = GD_OP_VERSION_3_8_0, + .validate_fn = validate_mandatory_locking, + }, + {.key = "cluster.disperse-self-heal-daemon", + .voltype = "cluster/disperse", + .type = NO_DOC, + .option = "self-heal-daemon", + .op_version = GD_OP_VERSION_3_7_0, + .validate_fn = validate_disperse_heal_enable_disable}, + {.key = "cluster.quorum-reads", + .voltype = "cluster/replicate", + .op_version = GD_OP_VERSION_3_7_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "client.bind-insecure", + .voltype = "protocol/client", + .option = "client-bind-insecure", + .type = NO_DOC, + .op_version = GD_OP_VERSION_3_7_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "features.timeout", + .voltype = "features/quiesce", + .option = "timeout", + .op_version = GD_OP_VERSION_4_0_0, + .flags = VOLOPT_FLAG_CLIENT_OPT, + .description = "Specifies the number of seconds the " + "quiesce translator will wait " + "for a CHILD_UP event before " + "force-unwinding the frames it has " + "currently stored for retry."}, + {.key = "features.failover-hosts", + .voltype = "features/quiesce", + .option = "failover-hosts", + .op_version = GD_OP_VERSION_4_0_0, + .flags = VOLOPT_FLAG_CLIENT_OPT, + .description = "It is a comma separated list of hostname/IP " + "addresses. It Specifies the list of hosts where " + "the gfproxy daemons are running, to which the " + "the thin clients can failover to."}, + {.key = "features.shard", + .voltype = "features/shard", + .value = "off", + .option = "!shard", + .op_version = GD_OP_VERSION_3_7_0, + .description = "enable/disable sharding translator on the volume.", + .flags = VOLOPT_FLAG_CLIENT_OPT | VOLOPT_FLAG_XLATOR_OPT}, + {.key = "features.shard-block-size", + .voltype = "features/shard", + .op_version = GD_OP_VERSION_3_7_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + { + .key = "features.shard-lru-limit", + .voltype = "features/shard", + .op_version = GD_OP_VERSION_5_0, + .flags = VOLOPT_FLAG_CLIENT_OPT, + .type = NO_DOC, + }, + {.key = "features.shard-deletion-rate", + .voltype = "features/shard", + .op_version = GD_OP_VERSION_5_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + { + .key = "features.scrub-throttle", + .voltype = "features/bit-rot", + .value = "lazy", + .option = "scrub-throttle", + .op_version = GD_OP_VERSION_3_7_0, + .type = NO_DOC, + }, + { + .key = "features.scrub-freq", + .voltype = "features/bit-rot", + .value = "biweekly", + .option = "scrub-frequency", + .op_version = GD_OP_VERSION_3_7_0, + .type = NO_DOC, + }, + { + .key = "features.scrub", + .voltype = "features/bit-rot", + .option = "scrubber", + .op_version = GD_OP_VERSION_3_7_0, + .flags = VOLOPT_FLAG_FORCE, + .type = NO_DOC, + }, + { + .key = "features.expiry-time", + .voltype = "features/bit-rot", + .value = SIGNING_TIMEOUT, + .option = "expiry-time", + .op_version = GD_OP_VERSION_3_7_0, + .type = NO_DOC, + }, + { + .key = "features.signer-threads", + .voltype = "features/bit-rot", + .value = BR_WORKERS, + .option = "signer-threads", + .op_version = GD_OP_VERSION_8_0, + .type = NO_DOC, + }, + /* Upcall translator options */ + /* Upcall translator options */ + { + .key = "features.cache-invalidation", + .voltype = "features/upcall", + .value = "off", + .op_version = GD_OP_VERSION_3_7_0, + }, + { + .key = "features.cache-invalidation-timeout", + .voltype = "features/upcall", + .op_version = GD_OP_VERSION_3_7_0, + }, + { + .key = "ganesha.enable", + .voltype = "mgmt/ganesha", + .value = "off", + .option = "ganesha.enable", + .op_version = GD_OP_VERSION_7_0, + }, + /* Lease translator options */ + { + .key = "features.leases", + .voltype = "features/leases", + .value = "off", + .op_version = GD_OP_VERSION_3_8_0, + }, + { + .key = "features.lease-lock-recall-timeout", + .voltype = "features/leases", + .op_version = GD_OP_VERSION_3_8_0, + }, + {.key = "disperse.background-heals", + .voltype = "cluster/disperse", + .op_version = GD_OP_VERSION_3_7_3, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "disperse.heal-wait-qlength", + .voltype = "cluster/disperse", + .op_version = GD_OP_VERSION_3_7_3, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + { + .key = "cluster.heal-timeout", + .voltype = "cluster/disperse", + .option = "!heal-timeout", + .op_version = GD_OP_VERSION_3_7_3, + .type = NO_DOC, + }, + {.key = "dht.force-readdirp", + .voltype = "cluster/distribute", + .option = "use-readdirp", + .op_version = GD_OP_VERSION_3_7_5, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "disperse.read-policy", + .voltype = "cluster/disperse", + .op_version = GD_OP_VERSION_3_7_6, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.shd-max-threads", + .voltype = "cluster/replicate", + .op_version = GD_OP_VERSION_3_7_12, + .flags = VOLOPT_FLAG_CLIENT_OPT, + .validate_fn = validate_replica}, + {.key = "cluster.shd-wait-qlength", + .voltype = "cluster/replicate", + .op_version = GD_OP_VERSION_3_7_12, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.locking-scheme", + .voltype = "cluster/replicate", + .type = DOC, + .op_version = GD_OP_VERSION_3_7_12, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.granular-entry-heal", + .voltype = "cluster/replicate", + .type = DOC, + .op_version = GD_OP_VERSION_3_8_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + { + .option = "revocation-secs", + .key = "features.locks-revocation-secs", + .voltype = "features/locks", + .op_version = GD_OP_VERSION_3_9_0, + }, + { + .option = "revocation-clear-all", + .key = "features.locks-revocation-clear-all", + .voltype = "features/locks", + .op_version = GD_OP_VERSION_3_9_0, + }, + { + .option = "revocation-max-blocked", + .key = "features.locks-revocation-max-blocked", + .voltype = "features/locks", + .op_version = GD_OP_VERSION_3_9_0, + }, + { + .option = "monkey-unlocking", + .key = "features.locks-monkey-unlocking", + .voltype = "features/locks", + .op_version = GD_OP_VERSION_3_9_0, + .type = NO_DOC, + }, + { + .option = "notify-contention", + .key = "features.locks-notify-contention", + .voltype = "features/locks", + .op_version = GD_OP_VERSION_4_0_0, + }, + { + .option = "notify-contention-delay", + .key = "features.locks-notify-contention-delay", + .voltype = "features/locks", + .op_version = GD_OP_VERSION_4_0_0, + }, + {.key = "disperse.shd-max-threads", + .voltype = "cluster/disperse", + .op_version = GD_OP_VERSION_3_9_0, + .flags = VOLOPT_FLAG_CLIENT_OPT, + .validate_fn = validate_disperse}, + {.key = "disperse.shd-wait-qlength", + .voltype = "cluster/disperse", + .op_version = GD_OP_VERSION_3_9_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "disperse.cpu-extensions", + .voltype = "cluster/disperse", + .op_version = GD_OP_VERSION_3_9_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "disperse.self-heal-window-size", + .voltype = "cluster/disperse", + .op_version = GD_OP_VERSION_3_11_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.use-compound-fops", + .voltype = "cluster/replicate", + .value = "off", + .type = DOC, + .op_version = GD_OP_VERSION_3_8_4, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "performance.parallel-readdir", + .voltype = "performance/readdir-ahead", + .option = "parallel-readdir", + .value = "off", + .type = DOC, + .op_version = GD_OP_VERSION_3_10_0, + .validate_fn = validate_parallel_readdir, + .description = "If this option is enabled, the readdir operation " + "is performed in parallel on all the bricks, thus " + "improving the performance of readdir. Note that " + "the performance improvement is higher in large " + "clusters"}, + { + .key = "performance.rda-request-size", + .voltype = "performance/readdir-ahead", + .option = "rda-request-size", + .value = "131072", + .flags = VOLOPT_FLAG_CLIENT_OPT, + .type = DOC, + .op_version = GD_OP_VERSION_3_9_1, + }, + { + .key = "performance.rda-low-wmark", + .voltype = "performance/readdir-ahead", + .option = "rda-low-wmark", + .type = NO_DOC, + .flags = VOLOPT_FLAG_CLIENT_OPT, + .op_version = GD_OP_VERSION_3_9_1, + }, + { + .key = "performance.rda-high-wmark", + .voltype = "performance/readdir-ahead", + .type = NO_DOC, + .flags = VOLOPT_FLAG_CLIENT_OPT, + .op_version = GD_OP_VERSION_3_9_1, + }, + {.key = "performance.rda-cache-limit", + .voltype = "performance/readdir-ahead", + .value = "10MB", + .type = DOC, + .flags = VOLOPT_FLAG_CLIENT_OPT, + .op_version = GD_OP_VERSION_3_9_1, + .validate_fn = validate_rda_cache_limit}, + { + .key = "performance.nl-cache-positive-entry", + .voltype = "performance/nl-cache", + .type = DOC, + .flags = VOLOPT_FLAG_CLIENT_OPT, + .op_version = GD_OP_VERSION_3_11_0, + .description = "enable/disable storing of entries that were lookedup" + " and found to be present in the volume, thus lookup" + " on non existent file is served from the cache", + }, + { + .key = "performance.nl-cache-limit", + .voltype = "performance/nl-cache", + .value = "10MB", + .flags = VOLOPT_FLAG_CLIENT_OPT, + .op_version = GD_OP_VERSION_3_11_0, + }, + { + .key = "performance.nl-cache-timeout", + .voltype = "performance/nl-cache", + .flags = VOLOPT_FLAG_CLIENT_OPT, + .op_version = GD_OP_VERSION_3_11_0, + }, + + /* Brick multiplexing options */ + {.key = GLUSTERD_BRICK_MULTIPLEX_KEY, + .voltype = "mgmt/glusterd", + .value = "disable", + .op_version = GD_OP_VERSION_3_10_0, + .validate_fn = validate_boolean, + .type = GLOBAL_DOC, + .description = "This global option can be used to enable/disable " + "brick multiplexing. Brick multiplexing ensures that " + "compatible brick instances can share one single " + "brick process."}, + {.key = GLUSTERD_VOL_CNT_PER_THRD, + .voltype = "mgmt/glusterd", + .value = GLUSTERD_VOL_CNT_PER_THRD_DEFAULT_VALUE, + .op_version = GD_OP_VERSION_7_0, + .validate_fn = validate_volume_per_thread_limit, + .type = GLOBAL_NO_DOC, + .description = + "This option can be used to limit the number of volumes " + "handled per thread to populate peer data.The option accepts " + "values in the range of 5 to 200"}, + {.key = GLUSTERD_BRICKMUX_LIMIT_KEY, + .voltype = "mgmt/glusterd", + .value = GLUSTERD_BRICKMUX_LIMIT_DFLT_VALUE, + .op_version = GD_OP_VERSION_3_12_0, + .validate_fn = validate_mux_limit, + .type = GLOBAL_DOC, + .description = "This option can be used to limit the number of brick " + "instances per brick process when brick-multiplexing " + "is enabled. If not explicitly set, this tunable is " + "set to 0 which denotes that brick-multiplexing can " + "happen without any limit on the number of bricks per " + "process. Also this option can't be set when the " + "brick-multiplexing feature is disabled."}, + {.key = "disperse.optimistic-change-log", + .voltype = "cluster/disperse", + .type = NO_DOC, + .op_version = GD_OP_VERSION_3_10_1, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "disperse.stripe-cache", + .voltype = "cluster/disperse", + .type = NO_DOC, + .op_version = GD_OP_VERSION_4_0_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + + /* Halo replication options */ + {.key = "cluster.halo-enabled", + .voltype = "cluster/replicate", + .op_version = GD_OP_VERSION_3_11_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.halo-shd-max-latency", + .voltype = "cluster/replicate", + .op_version = GD_OP_VERSION_3_11_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.halo-nfsd-max-latency", + .voltype = "cluster/replicate", + .op_version = GD_OP_VERSION_3_11_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.halo-max-latency", + .voltype = "cluster/replicate", + .op_version = GD_OP_VERSION_3_11_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.halo-max-replicas", + .voltype = "cluster/replicate", + .op_version = GD_OP_VERSION_3_11_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.halo-min-replicas", + .voltype = "cluster/replicate", + .op_version = GD_OP_VERSION_3_11_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = VKEY_FEATURES_SELINUX, + .voltype = "features/selinux", + .type = NO_DOC, + .value = "on", + .op_version = GD_OP_VERSION_3_11_0, + .description = "Convert security.selinux xattrs to " + "trusted.gluster.selinux on the bricks. Recommended " + "to have enabled when clients and/or bricks support " + "SELinux."}, + {.key = GLUSTERD_LOCALTIME_LOGGING_KEY, + .voltype = "mgmt/glusterd", + .type = GLOBAL_DOC, + .op_version = GD_OP_VERSION_3_12_0, + .validate_fn = validate_boolean}, + {.key = GLUSTERD_DAEMON_LOG_LEVEL_KEY, + .voltype = "mgmt/glusterd", + .type = GLOBAL_NO_DOC, + .value = "INFO", + .op_version = GD_OP_VERSION_5_0}, + {.key = "debug.delay-gen", + .voltype = "debug/delay-gen", + .option = "!debug", + .value = "off", + .type = NO_DOC, + .op_version = GD_OP_VERSION_3_13_0, + .flags = VOLOPT_FLAG_XLATOR_OPT}, + { + .key = "delay-gen.delay-percentage", + .voltype = "debug/delay-gen", + .type = NO_DOC, + .op_version = GD_OP_VERSION_3_13_0, + }, + { + .key = "delay-gen.delay-duration", + .voltype = "debug/delay-gen", + .type = NO_DOC, + .op_version = GD_OP_VERSION_3_13_0, + }, + { + .key = "delay-gen.enable", + .voltype = "debug/delay-gen", + .type = NO_DOC, + .op_version = GD_OP_VERSION_3_13_0, + }, + {.key = "disperse.parallel-writes", + .voltype = "cluster/disperse", + .type = NO_DOC, + .op_version = GD_OP_VERSION_3_13_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "disperse.quorum-count", + .voltype = "cluster/disperse", + .type = NO_DOC, + .op_version = GD_OP_VERSION_8_0, + .validate_fn = validate_disperse_quorum_count, + .description = "This option can be used to define how many successes on" + "the bricks constitute a success to the application. This" + " count should be in the range" + "[disperse-data-count, disperse-count] (inclusive)", + .flags = VOLOPT_FLAG_CLIENT_OPT}, + { + .key = "features.sdfs", + .voltype = "features/sdfs", + .value = "off", + .option = "!features", + .op_version = GD_OP_VERSION_4_0_0, + .description = "enable/disable dentry serialization xlator in volume", + .type = NO_DOC, + }, + {.key = "features.cloudsync", + .voltype = "features/cloudsync", + .value = "off", + .op_version = GD_OP_VERSION_4_1_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "features.ctime", + .voltype = "features/utime", + .validate_fn = validate_boolean, + .value = "on", + .option = "!utime", + .op_version = GD_OP_VERSION_4_1_0, + .description = "enable/disable utime translator on the volume.", + .flags = VOLOPT_FLAG_CLIENT_OPT | VOLOPT_FLAG_XLATOR_OPT}, + {.key = "ctime.noatime", + .voltype = "features/utime", + .validate_fn = validate_boolean, + .value = "on", + .option = "noatime", + .op_version = GD_OP_VERSION_5_0, + .description = "enable/disable noatime option with ctime enabled.", + .flags = VOLOPT_FLAG_CLIENT_OPT | VOLOPT_FLAG_XLATOR_OPT}, + {.key = "features.cloudsync-storetype", + .voltype = "features/cloudsync", + .op_version = GD_OP_VERSION_5_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "features.s3plugin-seckey", + .voltype = "features/cloudsync", + .op_version = GD_OP_VERSION_5_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "features.s3plugin-keyid", + .voltype = "features/cloudsync", + .op_version = GD_OP_VERSION_5_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "features.s3plugin-bucketid", + .voltype = "features/cloudsync", + .op_version = GD_OP_VERSION_5_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "features.s3plugin-hostname", + .voltype = "features/cloudsync", + .op_version = GD_OP_VERSION_5_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "features.enforce-mandatory-lock", + .voltype = "features/locks", + .value = "off", + .type = NO_DOC, + .op_version = GD_OP_VERSION_6_0, + .validate_fn = validate_boolean, + .description = "option to enforce mandatory lock on a file", + .flags = VOLOPT_FLAG_XLATOR_OPT}, + {.key = VKEY_CONFIG_GLOBAL_THREADING, + .voltype = "debug/io-stats", + .option = "global-threading", + .value = "off", + .op_version = GD_OP_VERSION_6_0}, + {.key = VKEY_CONFIG_CLIENT_THREADS, + .voltype = "debug/io-stats", + .option = "!client-threads", + .value = "16", + .op_version = GD_OP_VERSION_6_0}, + {.key = VKEY_CONFIG_BRICK_THREADS, + .voltype = "debug/io-stats", + .option = "!brick-threads", + .value = "16", + .op_version = GD_OP_VERSION_6_0}, + {.key = "features.cloudsync-remote-read", + .voltype = "features/cloudsync", + .value = "off", + .op_version = GD_OP_VERSION_7_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "features.cloudsync-store-id", + .voltype = "features/cloudsync", + .op_version = GD_OP_VERSION_7_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "features.cloudsync-product-id", + .voltype = "features/cloudsync", + .op_version = GD_OP_VERSION_7_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + { + .key = "features.acl", + .voltype = "features/access-control", + .value = "enable", + .option = "!features", + .op_version = GD_OP_VERSION_8_0, + .description = "(WARNING: for debug purpose only) enable/disable " + "access-control xlator in volume", + .type = NO_DOC, + }, + + {.key = "cluster.use-anonymous-inode", + .voltype = "cluster/replicate", + .op_version = GD_OP_VERSION_9_0, + .value = "yes", + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = NULL}}; diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c index e1c338bc38b..7a86c2997b1 100644 --- a/xlators/mgmt/glusterd/src/glusterd.c +++ b/xlators/mgmt/glusterd/src/glusterd.c @@ -1,520 +1,2105 @@ /* - Copyright (c) 2006-2010 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is GF_FREE software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ + Copyright (c) 2006-2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif #include <time.h> +#include <grp.h> #include <sys/uio.h> #include <sys/resource.h> #include <libgen.h> -#include "uuid.h" +#include <glusterfs/compat-uuid.h> #include "glusterd.h" #include "rpcsvc.h" #include "fnmatch.h" -#include "xlator.h" -#include "call-stub.h" -#include "defaults.h" -#include "list.h" -#include "dict.h" -#include "compat.h" -#include "compat-errno.h" -#include "statedump.h" +#include <glusterfs/xlator.h> +#include <glusterfs/call-stub.h> +#include <glusterfs/defaults.h> +#include <glusterfs/list.h> +#include <glusterfs/dict.h> +#include <glusterfs/options.h> +#include <glusterfs/compat.h> +#include <glusterfs/compat-errno.h> +#include <glusterfs/syscall.h> +#include "glusterd-statedump.h" #include "glusterd-sm.h" #include "glusterd-op-sm.h" #include "glusterd-store.h" +#include "glusterd-hooks.h" #include "glusterd-utils.h" -#include "common-utils.h" +#include "glusterd-locks.h" +#include "glusterd-svc-mgmt.h" +#include "glusterd-shd-svc.h" +#ifdef BUILD_GNFS +#include "glusterd-nfs-svc.h" +#endif +#include "glusterd-bitd-svc.h" +#include "glusterd-scrub-svc.h" +#include "glusterd-quotad-svc.h" +#include "glusterd-snapd-svc.h" +#include "glusterd-messages.h" +#include <glusterfs/common-utils.h> +#include "glusterd-geo-rep.h" +#include <glusterfs/run.h> +#include "rpc-clnt-ping.h" +#include "rpc-common-xdr.h" + +#include <glusterfs/syncop.h> + +#include "glusterd-mountbroker.h" -static uuid_t glusterd_uuid; -extern struct rpcsvc_program glusterd1_mop_prog; -extern struct rpcsvc_program gd_svc_mgmt_prog; -extern struct rpcsvc_program gd_svc_cli_prog; extern struct rpcsvc_program gluster_handshake_prog; +extern struct rpcsvc_program gluster_cli_getspec_prog; extern struct rpcsvc_program gluster_pmap_prog; extern glusterd_op_info_t opinfo; -extern struct rpc_clnt_program glusterd_glusterfs_3_1_mgmt_prog; +extern struct rpcsvc_program gd_svc_mgmt_prog; +extern struct rpcsvc_program gd_svc_mgmt_v3_prog; +extern struct rpcsvc_program gd_svc_peer_prog; +extern struct rpcsvc_program gd_svc_cli_prog; +extern struct rpcsvc_program gd_svc_cli_trusted_progs; +extern struct rpc_clnt_program gd_brick_prog; +extern struct rpcsvc_program glusterd_mgmt_hndsk_prog; + +extern char snap_mount_dir[VALID_GLUSTERD_PATHMAX]; rpcsvc_cbk_program_t glusterd_cbk_prog = { - .progname = "Gluster Callback", - .prognum = GLUSTER_CBK_PROGRAM, - .progver = GLUSTER_CBK_VERSION, + .progname = "Gluster Callback", + .prognum = GLUSTER_CBK_PROGRAM, + .progver = GLUSTER_CBK_VERSION, }; +struct rpcsvc_program *gd_inet_programs[] = { + &gd_svc_peer_prog, &gd_svc_cli_trusted_progs, /* Must be index 1 for + secure_mgmt! */ + &gd_svc_mgmt_prog, &gd_svc_mgmt_v3_prog, &gluster_pmap_prog, + &gluster_handshake_prog, &glusterd_mgmt_hndsk_prog, +}; +int gd_inet_programs_count = (sizeof(gd_inet_programs) / + sizeof(gd_inet_programs[0])); + +struct rpcsvc_program *gd_uds_programs[] = { + &gd_svc_cli_prog, + &gluster_cli_getspec_prog, +}; +int gd_uds_programs_count = (sizeof(gd_uds_programs) / + sizeof(gd_uds_programs[0])); + +const char *gd_op_list[GD_OP_MAX + 1] = { + [GD_OP_NONE] = "Invalid op", + [GD_OP_CREATE_VOLUME] = "Create", + [GD_OP_START_BRICK] = "Start Brick", + [GD_OP_STOP_BRICK] = "Stop Brick", + [GD_OP_DELETE_VOLUME] = "Delete", + [GD_OP_START_VOLUME] = "Start", + [GD_OP_STOP_VOLUME] = "Stop", + [GD_OP_DEFRAG_VOLUME] = "Rebalance", + [GD_OP_ADD_BRICK] = "Add brick", + [GD_OP_DETACH_TIER] = "Detach tier", + [GD_OP_TIER_MIGRATE] = "Tier migration", + [GD_OP_REMOVE_BRICK] = "Remove brick", + [GD_OP_REPLACE_BRICK] = "Replace brick", + [GD_OP_SET_VOLUME] = "Set", + [GD_OP_RESET_VOLUME] = "Reset", + [GD_OP_SYNC_VOLUME] = "Sync", + [GD_OP_LOG_ROTATE] = "Log rotate", + [GD_OP_GSYNC_SET] = "Geo-replication", + [GD_OP_PROFILE_VOLUME] = "Profile", + [GD_OP_QUOTA] = "Quota", + [GD_OP_STATUS_VOLUME] = "Status", + [GD_OP_REBALANCE] = "Rebalance", + [GD_OP_HEAL_VOLUME] = "Heal", + [GD_OP_STATEDUMP_VOLUME] = "Statedump", + [GD_OP_LIST_VOLUME] = "Lists", + [GD_OP_CLEARLOCKS_VOLUME] = "Clear locks", + [GD_OP_DEFRAG_BRICK_VOLUME] = "Rebalance", + [GD_OP_COPY_FILE] = "Copy File", + [GD_OP_SYS_EXEC] = "Execute system commands", + [GD_OP_GSYNC_CREATE] = "Geo-replication Create", + [GD_OP_SNAP] = "Snapshot", + [GD_OP_RESET_BRICK] = "Reset Brick", + [GD_OP_MAX_OPVERSION] = "Maximum supported op-version", + [GD_OP_MAX] = "Invalid op"}; static int -glusterd_opinfo_init () +glusterd_opinfo_init() { - int32_t ret = -1; + int32_t ret = -1; - ret = pthread_mutex_init (&opinfo.lock, NULL); + opinfo.op = GD_OP_NONE; - return ret; + return ret; } -static int -glusterd_uuid_init (int flag) +int +glusterd_uuid_init() { - int ret = -1; - glusterd_conf_t *priv = NULL; - - priv = THIS->private; - - if (!flag) { - ret = glusterd_retrieve_uuid (); - if (!ret) { - uuid_copy (glusterd_uuid, priv->uuid); - gf_log ("glusterd", GF_LOG_INFO, - "retrieved UUID: %s", uuid_utoa (priv->uuid)); - return 0; - } - } - - uuid_generate (glusterd_uuid); - - gf_log ("glusterd", GF_LOG_INFO, - "generated UUID: %s", uuid_utoa (glusterd_uuid)); - uuid_copy (priv->uuid, glusterd_uuid); + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + + ret = glusterd_retrieve_uuid(); + if (ret == 0) { + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_RETRIEVED_UUID, + "retrieved UUID: %s", uuid_utoa(priv->uuid)); + return 0; + } - ret = glusterd_store_uuid (); + ret = glusterd_uuid_generate_save(); - if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, - "Unable to store generated UUID"); - return ret; - } + if (ret) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_UUID_GEN_STORE_FAIL, + "Unable to generate and save new UUID"); + return ret; + } - return 0; + return 0; } int -glusterd_fetchspec_notify (xlator_t *this) +glusterd_uuid_generate_save() { - int ret = -1; - glusterd_conf_t *priv = NULL; - rpc_transport_t *trans = NULL; + int ret = -1; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; - priv = this->private; + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); - list_for_each_entry (trans, &priv->xprt_list, list) { - rpcsvc_callback_submit (priv->rpc, trans, &glusterd_cbk_prog, - GF_CBK_FETCHSPEC, NULL, 0); - } + gf_uuid_generate(priv->uuid); - ret = 0; + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_GENERATED_UUID, + "generated UUID: %s", uuid_utoa(priv->uuid)); - return ret; + ret = glusterd_store_global_info(this); + + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UUID_STORE_FAIL, + "Unable to store the generated uuid %s", uuid_utoa(priv->uuid)); + + return ret; } int -glusterd_priv (xlator_t *this) +glusterd_options_init(xlator_t *this) { - return 0; + int ret = -1; + glusterd_conf_t *priv = NULL; + char *initial_version = "0"; + + priv = this->private; + + priv->opts = dict_new(); + if (!priv->opts) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); + goto out; + } + + ret = glusterd_store_retrieve_options(this); + if (ret == 0) { + goto out; + } + + ret = dict_set_str(priv->opts, GLUSTERD_GLOBAL_OPT_VERSION, + initial_version); + if (ret) + goto out; + + ret = glusterd_store_options(this, priv->opts); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VERS_STORE_FAIL, + "Unable to store version"); + return ret; + } +out: + return 0; } - - -int32_t -mem_acct_init (xlator_t *this) +int +glusterd_client_statedump_submit_req(char *volname, char *target_ip, char *pid) { - int ret = -1; + gf_statedump statedump_req = { + 0, + }; + glusterd_conf_t *conf = NULL; + int ret = 0; + char *end_ptr = NULL; + rpc_transport_t *trans = NULL; + char *ip_addr = NULL; + xlator_t *this = NULL; + char tmp[UNIX_PATH_MAX] = { + 0, + }; + + this = THIS; + GF_ASSERT(this); + conf = this->private; + GF_ASSERT(conf); + + if (target_ip == NULL || pid == NULL) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL); + ret = -1; + goto out; + } + + statedump_req.pid = strtol(pid, &end_ptr, 10); + + gf_msg_debug(this->name, 0, + "Performing statedump on volume %s " + "client with pid:%d host:%s", + volname, statedump_req.pid, target_ip); + + pthread_mutex_lock(&conf->xprt_lock); + { + list_for_each_entry(trans, &conf->xprt_list, list) + { + /* check if this connection matches "all" or the + * volname */ + if (strncmp(volname, "all", NAME_MAX) && + strncmp(trans->peerinfo.volname, volname, NAME_MAX)) { + /* no match, try next trans */ + continue; + } + + strcpy(tmp, trans->peerinfo.identifier); + ip_addr = strtok(tmp, ":"); + if (gf_is_same_address(ip_addr, target_ip)) { + /* Every gluster client would have + * connected to glusterd(volfile server). This + * connection is used to send the statedump + * request rpc to the application. + */ + gf_msg_trace(this->name, 0, + "Submitting " + "statedump rpc request for %s", + trans->peerinfo.identifier); + rpcsvc_request_submit(conf->rpc, trans, &glusterd_cbk_prog, + GF_CBK_STATEDUMP, &statedump_req, + this->ctx, (xdrproc_t)xdr_gf_statedump); + } + } + } + pthread_mutex_unlock(&conf->xprt_lock); +out: + return ret; +} - if (!this) - return ret; +int +glusterd_fetchspec_notify(xlator_t *this) +{ + int ret = -1; + glusterd_conf_t *priv = NULL; + rpc_transport_t *trans = NULL; - ret = xlator_mem_acct_init (this, gf_gld_mt_end + 1); + priv = this->private; - if (ret != 0) { - gf_log (this->name, GF_LOG_ERROR, "Memory accounting init" - " failed"); - return ret; + pthread_mutex_lock(&priv->xprt_lock); + { + list_for_each_entry(trans, &priv->xprt_list, list) + { + rpcsvc_callback_submit(priv->rpc, trans, &glusterd_cbk_prog, + GF_CBK_FETCHSPEC, NULL, 0, NULL); } + } + pthread_mutex_unlock(&priv->xprt_lock); - return ret; + ret = 0; + + return ret; } int -glusterd_rpcsvc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event, - void *data) +glusterd_fetchsnap_notify(xlator_t *this) { - xlator_t *this = NULL; - rpc_transport_t *xprt = NULL; - glusterd_conf_t *priv = NULL; - - if (!xl || !data) { - gf_log ("glusterd", GF_LOG_WARNING, - "Calling rpc_notify without initializing"); - goto out; + int ret = -1; + glusterd_conf_t *priv = NULL; + rpc_transport_t *trans = NULL; + + priv = this->private; + + /* + * TODO: As of now, the identification of the rpc clients in the + * handshake protocol is not there. So among so many glusterfs processes + * registered with glusterd, it is hard to identify one particular + * process (in this particular case, the snap daemon). So the callback + * notification is sent to all the transports from the transport list. + * Only those processes which have a rpc client registered for this + * callback will respond to the notification. Once the identification + * of the rpc clients becomes possible, the below section can be changed + * to send callback notification to only those rpc clients, which have + * registered. + */ + pthread_mutex_lock(&priv->xprt_lock); + { + list_for_each_entry(trans, &priv->xprt_list, list) + { + rpcsvc_callback_submit(priv->rpc, trans, &glusterd_cbk_prog, + GF_CBK_GET_SNAPS, NULL, 0, NULL); } + } + pthread_mutex_unlock(&priv->xprt_lock); - this = xl; - xprt = data; + ret = 0; - priv = this->private; + return ret; +} - switch (event) { - case RPCSVC_EVENT_ACCEPT: - { - INIT_LIST_HEAD (&xprt->list); +int32_t +mem_acct_init(xlator_t *this) +{ + int ret = -1; - list_add_tail (&xprt->list, &priv->xprt_list); - break; + if (!this) + return ret; + + ret = xlator_mem_acct_init(this, gf_gld_mt_end + 1); + + if (ret != 0) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, + "Memory accounting init" + " failed"); + return ret; + } + + return ret; +} + +int +glusterd_rpcsvc_notify(rpcsvc_t *rpc, void *xl, rpcsvc_event_t event, + void *data) +{ + xlator_t *this = NULL; + rpc_transport_t *xprt = NULL; + glusterd_conf_t *priv = NULL; + + if (!xl || !data) { + gf_msg("glusterd", GF_LOG_WARNING, 0, GD_MSG_NO_INIT, + "Calling rpc_notify without initializing"); + goto out; + } + + this = xl; + xprt = data; + + priv = this->private; + + switch (event) { + case RPCSVC_EVENT_ACCEPT: { + pthread_mutex_lock(&priv->xprt_lock); + list_add_tail(&xprt->list, &priv->xprt_list); + pthread_mutex_unlock(&priv->xprt_lock); + break; } - case RPCSVC_EVENT_DISCONNECT: - { - list_del (&xprt->list); - pmap_registry_remove (this, 0, NULL, GF_PMAP_PORT_NONE, xprt); + case RPCSVC_EVENT_DISCONNECT: { + /* A DISCONNECT event could come without an ACCEPT event + * happening for this transport. This happens when the server is + * expecting encrypted connections by the client tries to + * connect unecnrypted + */ + if (list_empty(&xprt->list)) break; + + pthread_mutex_lock(&priv->xprt_lock); + list_del(&xprt->list); + pthread_mutex_unlock(&priv->xprt_lock); + pmap_registry_remove(this, 0, NULL, GF_PMAP_PORT_ANY, xprt, + _gf_false); + break; } default: - break; - } + break; + } out: - return 0; + return 0; } - -inline int32_t -glusterd_program_register (xlator_t *this, rpcsvc_t *svc, - rpcsvc_program_t *prog) +static int32_t +glusterd_program_register(xlator_t *this, rpcsvc_t *svc, rpcsvc_program_t *prog) { - int32_t ret = -1; + int32_t ret = -1; - ret = rpcsvc_program_register (svc, prog); - if (ret) { - gf_log (this->name, GF_LOG_DEBUG, - "cannot register program (name: %s, prognum:%d, " - "progver:%d)", prog->progname, prog->prognum, - prog->progver); - goto out; - } + ret = rpcsvc_program_register(svc, prog, _gf_false); + if (ret) { + gf_msg_debug(this->name, 0, + "cannot register program (name: %s, prognum:%d, " + "progver:%d)", + prog->progname, prog->prognum, prog->progver); + goto out; + } out: - return ret; + return ret; } int -glusterd_rpcsvc_options_build (dict_t *options) +glusterd_rpcsvc_options_build(dict_t *options) { - int ret = 0; + int ret = 0; + uint32_t backlog = 0; + xlator_t *this = THIS; + GF_ASSERT(this); - if (!dict_get (options, "rpc-auth-allow-insecure")) { - ret = dict_set_str (options, "rpc-auth-allow-insecure", "on"); - if (ret) - goto out; + ret = dict_get_uint32(options, "transport.listen-backlog", &backlog); + + if (ret) { + backlog = GLUSTERFS_SOCKET_LISTEN_BACKLOG; + ret = dict_set_uint32(options, "transport.listen-backlog", backlog); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, + "Key=transport.listen-backlog", NULL); + goto out; } + } + + gf_msg_debug("glusterd", 0, "listen-backlog value: %d", backlog); + out: - return ret; + return ret; } +#if SYNCDAEMON_COMPILE static int -configure_syncaemon (xlator_t *this, const char *workdir) +glusterd_check_gsync_present(int *valid_state) { - int ret = 0; -#if SYNCDAEMON_COMPILE - char voldir[PATH_MAX] = {0,}; - char cmd[4096] = {0,}; - int blen = 0; - - snprintf (voldir, PATH_MAX, "%s/gsync", workdir); - ret = mkdir (voldir, 0777); - if ((-1 == ret) && (errno != EEXIST)) { - gf_log (this->name, GF_LOG_CRITICAL, - "Unable to create gsync directory %s (%s)", - voldir, strerror (errno)); - return -1; + char buff[PATH_MAX] = { + 0, + }; + runner_t runner = { + 0, + }; + char *ptr = NULL; + int ret = 0; + + runinit(&runner); + runner_add_args(&runner, GSYNCD_PREFIX "/gsyncd", "--version", NULL); + runner_redir(&runner, STDOUT_FILENO, RUN_PIPE); + ret = runner_start(&runner); + if (ret == -1) { + if (errno == ENOENT) { + gf_msg("glusterd", GF_LOG_INFO, errno, GD_MSG_MODULE_NOT_INSTALLED, + GEOREP " module not installed in the system"); + *valid_state = 0; + } else { + gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_MODULE_NOT_WORKING, + GEOREP " module not working as desired"); + *valid_state = -1; + } + goto out; + } + + ptr = fgets(buff, sizeof(buff), runner_chio(&runner, STDOUT_FILENO)); + if (ptr) { + if (!strstr(buff, "gsyncd")) { + ret = -1; + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_MODULE_NOT_WORKING, + GEOREP + " module not " + "working as desired"); + *valid_state = -1; + goto out; } + } else { + ret = -1; + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_MODULE_NOT_WORKING, + GEOREP + " module not " + "working as desired"); + *valid_state = -1; + goto out; + } + + ret = 0; +out: - blen = snprintf (cmd, PATH_MAX, GSYNCD_PREFIX"/gsyncd -c %s/"GSYNC_CONF - " --config-set-rx ", workdir); + runner_end(&runner); - /* remote-gsyncd */ - strcpy (cmd + blen, - "remote-gsyncd " - "'"GSYNCD_PREFIX"/gsyncd --gluster-command "GFS_PREFIX"/sbin/glusterfs' " - ". ."); - ret = system (cmd); - if (ret) - goto out; + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; +} - strcpy (cmd + blen, - "remote-gsyncd /usr/local/libexec/glusterfs/gsyncd . ^ssh:"); - ret = system (cmd); - if (ret) - goto out; +static int +group_write_allow(char *path, gid_t gid) +{ + struct stat st = { + 0, + }; + int ret = 0; - /* gluster-command */ - /* XXX $sbindir should be used (throughout the codebase) */ - strcpy (cmd + blen, - "gluster-command "GFS_PREFIX"/sbin/glusterfs . ."); - ret = system (cmd); - if (ret) - goto out; + ret = sys_stat(path, &st); + if (ret == -1) + goto out; + GF_ASSERT(S_ISDIR(st.st_mode)); - /* ssh-command */ - strcpy (cmd + blen, - "ssh-command 'ssh -oPasswordAuthentication=no' . ."); - ret = system (cmd); - if (ret) - goto out; + ret = sys_chown(path, -1, gid); + if (ret == -1) + goto out; - out: -#else - (void)this; - (void)workdir; -#endif - return ret ? -1 : 0; -} + ret = sys_chmod(path, (st.st_mode & ~S_IFMT) | S_IWGRP | S_IXGRP | S_ISVTX); +out: + if (ret == -1) + gf_msg("glusterd", GF_LOG_CRITICAL, errno, + GD_MSG_WRITE_ACCESS_GRANT_FAIL, + "failed to set up write access to %s for group %d (%s)", path, + gid, strerror(errno)); + return ret; +} -/* - * init - called during glusterd initialization - * - * @this: - * - */ -int -init (xlator_t *this) +static int +glusterd_crt_georep_folders(char *georepdir, glusterd_conf_t *conf) { - int32_t ret = -1; - rpcsvc_t *rpc = NULL; - glusterd_conf_t *conf = NULL; - data_t *dir_data = NULL; - struct stat buf = {0,}; - char voldir [PATH_MAX] = {0,}; - char dirname [PATH_MAX]; - char cmd_log_filename [PATH_MAX] = {0,}; - int first_time = 0; - - dir_data = dict_get (this->options, "working-directory"); - - if (!dir_data) { - //Use default working dir - strncpy (dirname, GLUSTERD_DEFAULT_WORKDIR, PATH_MAX); - } else { - strncpy (dirname, dir_data->data, PATH_MAX); + char *greplg_s = NULL; + struct group *gr = NULL; + int ret = 0; + int gr_ret = 0; + int32_t len = 0; + char logdir[PATH_MAX] = {0}; + + GF_ASSERT(georepdir); + GF_ASSERT(conf); + + if (strlen(conf->workdir) + 2 > PATH_MAX - SLEN(GEOREP)) { + ret = -1; + gf_msg("glusterd", GF_LOG_CRITICAL, 0, GD_MSG_DIRPATH_TOO_LONG, + "directory path %s/" GEOREP " is longer than PATH_MAX", + conf->workdir); + goto out; + } + + len = snprintf(georepdir, PATH_MAX, "%s/" GEOREP, conf->workdir); + if ((len < 0) || (len >= PATH_MAX)) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); + ret = -1; + goto out; + } + ret = mkdir_p(georepdir, 0755, _gf_true); + if (-1 == ret) { + gf_msg("glusterd", GF_LOG_CRITICAL, errno, GD_MSG_CREATE_DIR_FAILED, + "Unable to create " GEOREP " directory %s", georepdir); + goto out; + } + + ret = dict_get_str(THIS->options, GEOREP "-log-group", &greplg_s); + if (ret) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "Key=log-group", NULL); + ret = 0; + } else { + gr = getgrnam(greplg_s); + if (!gr) { + gf_msg("glusterd", GF_LOG_CRITICAL, 0, GD_MSG_LOGGROUP_INVALID, + "group " GEOREP "-log-group %s does not exist", greplg_s); + gr_ret = -1; } + } + if ((strlen(conf->logdir) + 2 + SLEN(GEOREP)) >= PATH_MAX) { + ret = -1; + gf_msg("glusterd", GF_LOG_CRITICAL, 0, GD_MSG_DIRPATH_TOO_LONG, + "directory path %s/" GEOREP " is longer than PATH_MAX", + conf->logdir); + goto out; + } + len = snprintf(logdir, PATH_MAX, "%s/" GEOREP, conf->logdir); + if ((len < 0) || (len >= PATH_MAX)) { + ret = -1; + goto out; + } + ret = mkdir_p(logdir, 0755, _gf_true); + if (-1 == ret) { + gf_msg("glusterd", GF_LOG_CRITICAL, errno, GD_MSG_CREATE_DIR_FAILED, + "Unable to create " GEOREP " log directory"); + goto out; + } + if (gr) { + gr_ret = group_write_allow(logdir, gr->gr_gid); + } + + if ((strlen(conf->logdir) + 2 + SLEN(GEOREP "-slaves")) >= PATH_MAX) { + ret = -1; + gf_msg("glusterd", GF_LOG_CRITICAL, 0, GD_MSG_DIRPATH_TOO_LONG, + "directory path %s/" GEOREP + "-slaves" + " is longer than PATH_MAX", + conf->logdir); + goto out; + } + len = snprintf(logdir, PATH_MAX, "%s/" GEOREP "-slaves", conf->logdir); + if ((len < 0) || (len >= PATH_MAX)) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); + ret = -1; + goto out; + } + ret = mkdir_p(logdir, 0755, _gf_true); + if (-1 == ret) { + gf_msg("glusterd", GF_LOG_CRITICAL, errno, GD_MSG_CREATE_DIR_FAILED, + "Unable to create " GEOREP " slave log directory"); + goto out; + } + if (gr && !gr_ret) { + gr_ret = group_write_allow(logdir, gr->gr_gid); + } + + /* MountBroker log file directory */ + if ((strlen(conf->logdir) + 2 + SLEN(GEOREP "-slaves/mbr")) >= PATH_MAX) { + ret = -1; + gf_msg("glusterd", GF_LOG_CRITICAL, 0, GD_MSG_DIRPATH_TOO_LONG, + "directory path %s/" GEOREP + "-slaves/mbr" + " is longer than PATH_MAX", + conf->logdir); + goto out; + } + + len = snprintf(logdir, PATH_MAX, "%s/" GEOREP "-slaves/mbr", conf->logdir); + if ((len < 0) || (len >= PATH_MAX)) { + gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL); + ret = -1; + goto out; + } + + ret = mkdir_p(logdir, 0755, _gf_true); + if (-1 == ret) { + gf_msg("glusterd", GF_LOG_CRITICAL, errno, GD_MSG_CREATE_DIR_FAILED, + "Unable to create " GEOREP " mountbroker slave log directory"); + goto out; + } + if (gr && !gr_ret) { + gr_ret = group_write_allow(logdir, gr->gr_gid); + } + if (gr_ret) + ret = gr_ret; +out: + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; +} + +static void +runinit_gsyncd_setrx(runner_t *runner, glusterd_conf_t *conf) +{ + runinit(runner); + runner_add_args(runner, GSYNCD_PREFIX "/gsyncd", "-c", NULL); + runner_argprintf(runner, "%s/" GSYNC_CONF_TEMPLATE, conf->workdir); + runner_add_arg(runner, "--config-set-rx"); +} + +static int +configure_syncdaemon(glusterd_conf_t *conf) +#define RUN_GSYNCD_CMD \ + do { \ + ret = runner_run_reuse(&runner); \ + if (ret == -1) { \ + runner_log(&runner, "glusterd", GF_LOG_ERROR, "command failed"); \ + runner_end(&runner); \ + goto out; \ + } \ + runner_end(&runner); \ + } while (0) +{ + int ret = 0; + runner_t runner = { + 0, + }; + char georepdir[PATH_MAX] = { + 0, + }; + int valid_state = 0; + + ret = setenv("_GLUSTERD_CALLED_", "1", 1); + if (ret < 0) { + ret = 0; + goto out; + } + valid_state = -1; + ret = glusterd_check_gsync_present(&valid_state); + if (-1 == ret) { + ret = valid_state; + goto out; + } + + glusterd_crt_georep_folders(georepdir, conf); + if (ret) { + ret = 0; + goto out; + } + + /************ + * master pre-configuration + ************/ + + /* remote-gsyncd */ + runinit_gsyncd_setrx(&runner, conf); + runner_add_args(&runner, "remote-gsyncd", GSYNCD_PREFIX "/gsyncd", ".", ".", + NULL); + RUN_GSYNCD_CMD; + + runinit_gsyncd_setrx(&runner, conf); + runner_add_args(&runner, "remote-gsyncd", "/nonexistent/gsyncd", ".", + "^ssh:", NULL); + RUN_GSYNCD_CMD; + + /* gluster-command-dir */ + runinit_gsyncd_setrx(&runner, conf); + runner_add_args(&runner, "gluster-command-dir", SBIN_DIR "/", ".", ".", + NULL); + RUN_GSYNCD_CMD; + + /* gluster-params */ + runinit_gsyncd_setrx(&runner, conf); + runner_add_args(&runner, "gluster-params", "aux-gfid-mount acl", ".", ".", + NULL); + RUN_GSYNCD_CMD; + + /* ssh-command */ + runinit_gsyncd_setrx(&runner, conf); + runner_add_arg(&runner, "ssh-command"); + runner_argprintf(&runner, + "ssh -oPasswordAuthentication=no " + "-oStrictHostKeyChecking=no " + "-i %s/secret.pem", + georepdir); + runner_add_args(&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* ssh-command tar */ + runinit_gsyncd_setrx(&runner, conf); + runner_add_arg(&runner, "ssh-command-tar"); + runner_argprintf(&runner, + "ssh -oPasswordAuthentication=no " + "-oStrictHostKeyChecking=no " + "-i %s/tar_ssh.pem", + georepdir); + runner_add_args(&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* pid-file */ + runinit_gsyncd_setrx(&runner, conf); + runner_add_arg(&runner, "pid-file"); + runner_argprintf(&runner, + "%s/${mastervol}_${remotehost}_${slavevol}/monitor.pid", + georepdir); + runner_add_args(&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* geo-rep working dir */ + runinit_gsyncd_setrx(&runner, conf); + runner_add_arg(&runner, "georep-session-working-dir"); + runner_argprintf(&runner, "%s/${mastervol}_${remotehost}_${slavevol}/", + georepdir); + runner_add_args(&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* state-file */ + runinit_gsyncd_setrx(&runner, conf); + runner_add_arg(&runner, "state-file"); + runner_argprintf(&runner, + "%s/${mastervol}_${remotehost}_${slavevol}/monitor.status", + georepdir); + runner_add_args(&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* state-detail-file */ + runinit_gsyncd_setrx(&runner, conf); + runner_add_arg(&runner, "state-detail-file"); + runner_argprintf( + &runner, + "%s/${mastervol}_${remotehost}_${slavevol}/${eSlave}-detail.status", + georepdir); + runner_add_args(&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* state-detail-file */ + runinit_gsyncd_setrx(&runner, conf); + runner_add_arg(&runner, "state-detail-file"); + runner_argprintf( + &runner, + "%s/${mastervol}_${remotehost}_${slavevol}/${eSlave}-detail.status", + georepdir); + runner_add_args(&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* state-socket */ + runinit_gsyncd_setrx(&runner, conf); + runner_add_arg(&runner, "state-socket-unencoded"); + runner_argprintf(&runner, "%s/${mastervol}/${eSlave}.socket", georepdir); + runner_add_args(&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* socketdir */ + runinit_gsyncd_setrx(&runner, conf); + runner_add_args(&runner, "socketdir", GLUSTERD_SOCK_DIR, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* log-file */ + runinit_gsyncd_setrx(&runner, conf); + runner_add_arg(&runner, "log-file"); + runner_argprintf(&runner, "%s/" GEOREP "/${mastervol}/${eSlave}.log", + conf->logdir); + runner_add_args(&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* gluster-log-file */ + runinit_gsyncd_setrx(&runner, conf); + runner_add_arg(&runner, "gluster-log-file"); + runner_argprintf( + &runner, "%s/" GEOREP "/${mastervol}/${eSlave}${local_id}.gluster.log", + conf->logdir); + runner_add_args(&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* ignore-deletes */ + runinit_gsyncd_setrx(&runner, conf); + runner_add_args(&runner, "ignore-deletes", "true", ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* special-sync-mode */ + runinit_gsyncd_setrx(&runner, conf); + runner_add_args(&runner, "special-sync-mode", "partial", ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* change-detector == changelog */ + runinit_gsyncd_setrx(&runner, conf); + runner_add_args(&runner, "change-detector", "changelog", ".", ".", NULL); + RUN_GSYNCD_CMD; + + runinit_gsyncd_setrx(&runner, conf); + runner_add_arg(&runner, "working-dir"); + runner_argprintf(&runner, "%s/${mastervol}/${eSlave}", + DEFAULT_VAR_RUN_DIRECTORY); + runner_add_args(&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /************ + * slave pre-configuration + ************/ + + /* slave-gluster-command-dir */ + runinit_gsyncd_setrx(&runner, conf); + runner_add_args(&runner, "slave-gluster-command-dir", SBIN_DIR "/", ".", + NULL); + RUN_GSYNCD_CMD; + + /* gluster-params */ + runinit_gsyncd_setrx(&runner, conf); + runner_add_args(&runner, "gluster-params", "aux-gfid-mount acl", ".", NULL); + RUN_GSYNCD_CMD; + + /* log-file */ + runinit_gsyncd_setrx(&runner, conf); + runner_add_arg(&runner, "log-file"); + runner_argprintf( + &runner, + "%s/" GEOREP + "-slaves/${session_owner}:${local_node}${local_id}.${slavevol}.log", + conf->logdir); + runner_add_args(&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* MountBroker log-file */ + runinit_gsyncd_setrx(&runner, conf); + runner_add_arg(&runner, "log-file-mbr"); + runner_argprintf( + &runner, + "%s/" GEOREP + "-slaves/mbr/${session_owner}:${local_node}${local_id}.${slavevol}.log", + conf->logdir); + runner_add_args(&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* gluster-log-file */ + runinit_gsyncd_setrx(&runner, conf); + runner_add_arg(&runner, "gluster-log-file"); + runner_argprintf( + &runner, + "%s/" GEOREP + "-slaves/" + "${session_owner}:${local_node}${local_id}.${slavevol}.gluster.log", + conf->logdir); + runner_add_args(&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; - ret = stat (dirname, &buf); - if ((ret != 0) && (ENOENT != errno)) { - gf_log (this->name, GF_LOG_ERROR, - "stat fails on %s, exiting. (errno = %d)", - dirname, errno); - exit (1); +out: + return ret ? -1 : 0; +} +#undef RUN_GSYNCD_CMD +#else /* SYNCDAEMON_COMPILE */ +static int +configure_syncdaemon(glusterd_conf_t *conf) +{ + return 0; +} +#endif /* !SYNCDAEMON_COMPILE */ + +static int +check_prepare_mountbroker_root(char *mountbroker_root) +{ + int dfd0 = -1; + int dfd = -1; + int dfd2 = -1; + struct stat st = { + 0, + }; + struct stat st2 = { + 0, + }; + int ret = 0; + + ret = open(mountbroker_root, O_RDONLY); + if (ret != -1) { + dfd = ret; + ret = sys_fstat(dfd, &st); + } + if (ret == -1 || !S_ISDIR(st.st_mode)) { + gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED, + "cannot access mountbroker-root directory %s", mountbroker_root); + ret = -1; + goto out; + } + if (st.st_uid != 0 || (st.st_mode & (S_IWGRP | S_IWOTH))) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DIR_PERM_LIBERAL, + "permissions on mountbroker-root directory %s are " + "too liberal", + mountbroker_root); + ret = -1; + goto out; + } + if (!(st.st_mode & (S_IXGRP | S_IXOTH))) { + gf_msg("glusterd", GF_LOG_WARNING, 0, GD_MSG_DIR_PERM_STRICT, + "permissions on mountbroker-root directory %s are " + "probably too strict", + mountbroker_root); + } + + dfd0 = dup(dfd); + + for (;;) { + ret = sys_openat(dfd, "..", O_RDONLY, 0); + if (ret != -1) { + dfd2 = ret; + ret = sys_fstat(dfd2, &st2); + } + if (ret == -1) { + gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED, + "error while checking mountbroker-root ancestors " + "%d (%s)", + errno, strerror(errno)); + goto out; } - if ((!ret) && (!S_ISDIR(buf.st_mode))) { - gf_log (this->name, GF_LOG_CRITICAL, - "Provided working area %s is not a directory," - "exiting", dirname); - exit (1); + if (st2.st_ino == st.st_ino) + break; /* arrived to root */ + + if (st2.st_uid != 0 || + ((st2.st_mode & (S_IWGRP | S_IWOTH)) && !(st2.st_mode & S_ISVTX))) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DIR_PERM_LIBERAL, + "permissions on ancestors of mountbroker-root " + "directory are too liberal"); + ret = -1; + goto out; + } + if (!(st.st_mode & (S_IXGRP | S_IXOTH))) { + gf_msg("glusterd", GF_LOG_WARNING, 0, GD_MSG_DIR_PERM_STRICT, + "permissions on ancestors of mountbroker-root " + "directory are probably too strict"); } + sys_close(dfd); + dfd = dfd2; + st = st2; + } - if ((-1 == ret) && (ENOENT == errno)) { - ret = mkdir (dirname, 0777); + ret = sys_mkdirat(dfd0, MB_HIVE, 0711); + if (ret == -1 && errno == EEXIST) + ret = 0; + if (ret != -1) + ret = sys_fstatat(dfd0, MB_HIVE, &st, AT_SYMLINK_NOFOLLOW); + if (ret == -1 || st.st_mode != (S_IFDIR | 0711)) { + gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_CREATE_DIR_FAILED, + "failed to set up mountbroker-root directory %s", + mountbroker_root); + ret = -1; + goto out; + } + + ret = 0; + +out: + if (dfd0 != -1) + sys_close(dfd0); + if (dfd != -1) + sys_close(dfd); + if (dfd2 != -1 && dfd != dfd2) + sys_close(dfd2); + + return ret; +} - if (-1 == ret) { - gf_log (this->name, GF_LOG_CRITICAL, - "Unable to create directory %s" - " ,errno = %d", dirname, errno); - exit (1); - } - first_time = 1; +static int +_install_mount_spec(dict_t *opts, char *key, data_t *value, void *data) +{ + glusterd_conf_t *priv = THIS->private; + char *label = NULL; + gf_boolean_t georep = _gf_false; + char *pdesc = value->data; + char *volname = NULL; + int rv = 0; + gf_mount_spec_t *mspec = NULL; + char *user = NULL; + xlator_t *this = THIS; + GF_ASSERT(this); + + label = strtail(key, "mountbroker."); + + /* check for presence of geo-rep label */ + if (!label) { + label = strtail(key, "mountbroker-" GEOREP "."); + if (label) + georep = _gf_true; + } + + if (!label) + return 0; + + mspec = GF_CALLOC(1, sizeof(*mspec), gf_gld_mt_mount_spec); + if (!mspec) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL); + goto err; + } + mspec->label = label; + + if (georep) { + volname = gf_strdup(pdesc); + if (!volname) + goto err; + user = strchr(volname, ':'); + if (user) { + *user = '\0'; + user++; + } else + user = label; + + rv = make_georep_mountspec(mspec, volname, user, priv->logdir); + + GF_FREE(volname); + if (rv != 0) + goto err; + } else if (parse_mount_pattern_desc(mspec, pdesc) != 0) + goto err; + + cds_list_add_tail(&mspec->speclist, &priv->mount_specs); + + return 0; +err: + + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_MOUNT_SPEC_INSTALL_FAIL, + "adding %smount spec failed: label: %s desc: %s", + georep ? GEOREP " " : "", label, pdesc ? pdesc : ""); + + if (mspec) { + if (mspec->patterns) { + GF_FREE(mspec->patterns->components); + GF_FREE(mspec->patterns); } + GF_FREE(mspec); + } - gf_log (this->name, GF_LOG_INFO, "Using %s as working directory", - dirname); + return -1; +} - snprintf (cmd_log_filename, PATH_MAX,"%s/.cmd_log_history", - DEFAULT_LOG_FILE_DIRECTORY); - ret = gf_cmd_log_init (cmd_log_filename); +/* The glusterd unix domain socket listener only listens for cli */ +rpcsvc_t * +glusterd_init_uds_listener(xlator_t *this) +{ + int ret = -1; + dict_t *options = NULL; + rpcsvc_t *rpc = NULL; + data_t *sock_data = NULL; + char sockfile[UNIX_PATH_MAX] = {0}; + int i = 0; + + GF_ASSERT(this); + + options = dict_new(); + if (!options) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL); + goto out; + } + + sock_data = dict_get(this->options, "glusterd-sockfile"); + (void)snprintf(sockfile, sizeof(sockfile), "%s", + sock_data ? sock_data->data : DEFAULT_GLUSTERD_SOCKFILE); + + ret = rpcsvc_transport_unix_options_build(options, sockfile); + if (ret) + goto out; + + rpc = rpcsvc_init(this, this->ctx, options, 8); + if (rpc == NULL) { + ret = -1; + goto out; + } + + ret = rpcsvc_register_notify(rpc, glusterd_rpcsvc_notify, this); + if (ret) { + gf_msg_debug(this->name, 0, "Failed to register notify function"); + goto out; + } + + ret = rpcsvc_create_listeners(rpc, options, this->name); + if (ret != 1) { + gf_msg_debug(this->name, 0, "Failed to create listener"); + goto out; + } + ret = 0; + + for (i = 0; i < gd_uds_programs_count; i++) { + ret = glusterd_program_register(this, rpc, gd_uds_programs[i]); + if (ret) { + i--; + for (; i >= 0; i--) + rpcsvc_program_unregister(rpc, gd_uds_programs[i]); - if (ret == -1) { - gf_log ("this->name", GF_LOG_CRITICAL, - "Unable to create cmd log file %s", cmd_log_filename); - exit (1); + goto out; } + } - snprintf (voldir, PATH_MAX, "%s/vols", dirname); +out: + if (options) + dict_unref(options); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, + GD_MSG_GLUSTERD_SOCK_LISTENER_START_FAIL, + "Failed to start glusterd " + "unix domain socket listener."); + if (rpc) { + GF_FREE(rpc); + rpc = NULL; + } + } + return rpc; +} - ret = mkdir (voldir, 0777); +void +glusterd_stop_uds_listener(xlator_t *this) +{ + glusterd_conf_t *conf = NULL; + rpcsvc_listener_t *listener = NULL; + rpcsvc_listener_t *next = NULL; + data_t *sock_data = NULL; + char sockfile[UNIX_PATH_MAX] = {0}; - if ((-1 == ret) && (errno != EEXIST)) { - gf_log (this->name, GF_LOG_CRITICAL, - "Unable to create volume directory %s" - " ,errno = %d", voldir, errno); - exit (1); - } + GF_ASSERT(this); + conf = this->private; - snprintf (voldir, PATH_MAX, "%s/peers", dirname); + (void)rpcsvc_program_unregister(conf->uds_rpc, &gd_svc_cli_prog); + (void)rpcsvc_program_unregister(conf->uds_rpc, &gluster_handshake_prog); - ret = mkdir (voldir, 0777); + list_for_each_entry_safe(listener, next, &conf->uds_rpc->listeners, list) + { + rpcsvc_listener_destroy(listener); + } - if ((-1 == ret) && (errno != EEXIST)) { - gf_log (this->name, GF_LOG_CRITICAL, - "Unable to create peers directory %s" - " ,errno = %d", voldir, errno); - exit (1); - } + (void)rpcsvc_unregister_notify(conf->uds_rpc, glusterd_rpcsvc_notify, this); - snprintf (voldir, PATH_MAX, "%s/bricks", DEFAULT_LOG_FILE_DIRECTORY); - ret = mkdir (voldir, 0777); - if ((-1 == ret) && (errno != EEXIST)) { - gf_log (this->name, GF_LOG_CRITICAL, - "Unable to create logs directory %s" - " ,errno = %d", voldir, errno); - exit (1); - } + sock_data = dict_get(this->options, "glusterd-sockfile"); + (void)snprintf(sockfile, sizeof(sockfile), "%s", + sock_data ? sock_data->data : DEFAULT_GLUSTERD_SOCKFILE); + sys_unlink(sockfile); - snprintf (voldir, PATH_MAX, "%s/nfs", dirname); - ret = mkdir (voldir, 0777); - if ((-1 == ret) && (errno != EEXIST)) { - gf_log (this->name, GF_LOG_CRITICAL, - "Unable to create nfs directory %s" - " ,errno = %d", voldir, errno); - exit (1); - } + return; +} - ret = configure_syncaemon (this, dirname); - if (ret) - goto out; +void +glusterd_stop_listener(xlator_t *this) +{ + glusterd_conf_t *conf = NULL; + rpcsvc_listener_t *listener = NULL; + rpcsvc_listener_t *next = NULL; + int i = 0; - ret = glusterd_rpcsvc_options_build (this->options); - if (ret) - goto out; - rpc = rpcsvc_init (this->ctx, this->options); - if (rpc == NULL) { - gf_log (this->name, GF_LOG_ERROR, - "failed to init rpc"); - goto out; - } + GF_VALIDATE_OR_GOTO("glusterd", this, out); + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, out); - ret = rpcsvc_register_notify (rpc, glusterd_rpcsvc_notify, this); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "rpcsvc_register_notify returned %d", ret); - goto out; - } + gf_msg_debug(this->name, 0, "%s function called ", __func__); - /* - * only one (atmost a pair - rdma and socket) listener for - * glusterd1_mop_prog, gluster_pmap_prog and gluster_handshake_prog. - */ - ret = rpcsvc_create_listeners (rpc, this->options, this->name); - if (ret < 1) { - gf_log (this->name, GF_LOG_ERROR, - "creation of listener failed"); - ret = -1; - goto out; - } + for (i = 0; i < gd_inet_programs_count; i++) { + rpcsvc_program_unregister(conf->rpc, gd_inet_programs[i]); + } - ret = glusterd_program_register (this, rpc, &glusterd1_mop_prog); - if (ret) { - goto out; + list_for_each_entry_safe(listener, next, &conf->rpc->listeners, list) + { + rpcsvc_listener_destroy(listener); + } + + (void)rpcsvc_unregister_notify(conf->rpc, glusterd_rpcsvc_notify, this); + +out: + + return; +} + +static int +glusterd_find_correct_var_run_dir(xlator_t *this, char *var_run_dir) +{ + int ret = -1; + struct stat buf = { + 0, + }; + + GF_VALIDATE_OR_GOTO("glusterd", this, out); + GF_VALIDATE_OR_GOTO(this->name, var_run_dir, out); + + /* /var/run is normally a symbolic link to /run dir, which + * creates problems as the entry point in the mtab for the mount point + * and glusterd maintained entry point will be different. Therefore + * identify the correct run dir and use it + */ + ret = sys_lstat(GLUSTERD_VAR_RUN_DIR, &buf); + if (ret != 0) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "stat fails on %s, exiting. (errno = %d)", GLUSTERD_VAR_RUN_DIR, + errno); + goto out; + } + + /* If /var/run is symlink then use /run dir */ + if (S_ISLNK(buf.st_mode)) { + strcpy(var_run_dir, GLUSTERD_RUN_DIR); + } else { + strcpy(var_run_dir, GLUSTERD_VAR_RUN_DIR); + } + + ret = 0; +out: + return ret; +} + +static int +glusterd_init_var_run_dirs(xlator_t *this, char *var_run_dir, + char *dir_to_be_created) +{ + int ret = -1; + struct stat buf = { + 0, + }; + char abs_path[PATH_MAX] = { + 0, + }; + + GF_VALIDATE_OR_GOTO("glusterd", this, out); + GF_VALIDATE_OR_GOTO(this->name, var_run_dir, out); + GF_VALIDATE_OR_GOTO(this->name, dir_to_be_created, out); + + snprintf(abs_path, sizeof(abs_path), "%s%s", var_run_dir, + dir_to_be_created); + + ret = sys_stat(abs_path, &buf); + if ((ret != 0) && (ENOENT != errno)) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, + "stat fails on %s, exiting. (errno = %d)", abs_path, errno); + ret = -1; + goto out; + } + + if ((!ret) && (!S_ISDIR(buf.st_mode))) { + gf_msg(this->name, GF_LOG_CRITICAL, ENOENT, GD_MSG_DIR_NOT_FOUND, + "Provided snap path %s is not a directory," + "exiting", + abs_path); + ret = -1; + goto out; + } + + if ((-1 == ret) && (ENOENT == errno)) { + /* Create missing dirs */ + ret = mkdir_p(abs_path, 0755, _gf_true); + + if (-1 == ret) { + gf_msg(this->name, GF_LOG_CRITICAL, errno, GD_MSG_CREATE_DIR_FAILED, + "Unable to create directory %s" + " ,errno = %d", + abs_path, errno); + goto out; } + } + +out: + return ret; +} + +static int +is_upgrade(dict_t *options, gf_boolean_t *upgrade) +{ + int ret = 0; + char *type = NULL; - ret = glusterd_program_register (this, rpc, &gd_svc_cli_prog); + ret = dict_get_str(options, "upgrade", &type); + if (!ret) { + ret = gf_string2boolean(type, upgrade); if (ret) { - rpcsvc_program_unregister (rpc, &glusterd1_mop_prog); - goto out; + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_STR_TO_BOOL_FAIL, + "upgrade option " + "%s is not a valid boolean type", + type); + ret = -1; + goto out; } + } + ret = 0; +out: + return ret; +} - ret = glusterd_program_register (this, rpc, &gd_svc_mgmt_prog); +static int +is_downgrade(dict_t *options, gf_boolean_t *downgrade) +{ + int ret = 0; + char *type = NULL; + + ret = dict_get_str(options, "downgrade", &type); + if (!ret) { + ret = gf_string2boolean(type, downgrade); if (ret) { - rpcsvc_program_unregister (rpc, &glusterd1_mop_prog); - rpcsvc_program_unregister (rpc, &gd_svc_cli_prog); - goto out; + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_STR_TO_BOOL_FAIL, + "downgrade option " + "%s is not a valid boolean type", + type); + ret = -1; + goto out; } + } + ret = 0; +out: + return ret; +} - ret = glusterd_program_register (this, rpc, &gluster_pmap_prog); - if (ret) { - rpcsvc_program_unregister (rpc, &glusterd1_mop_prog); - rpcsvc_program_unregister (rpc, &gd_svc_cli_prog); - rpcsvc_program_unregister (rpc, &gd_svc_mgmt_prog); - goto out; +/* + * init - called during glusterd initialization + * + * @this: + * + */ +int +init(xlator_t *this) +{ + int32_t ret = -1; + rpcsvc_t *rpc = NULL; + rpcsvc_t *uds_rpc = NULL; + glusterd_conf_t *conf = NULL; + data_t *dir_data = NULL; + struct stat buf = { + 0, + }; + char storedir[PATH_MAX] = { + 0, + }; + char workdir[PATH_MAX] = { + 0, + }; + char rundir[PATH_MAX] = { + 0, + }; + char logdir[VALID_GLUSTERD_PATHMAX] = { + 0, + }; + char cmd_log_filename[PATH_MAX] = { + 0, + }; + char *mountbroker_root = NULL; + int i = 0; + int total_transport = 0; + gf_valgrind_tool vgtool; + char *valgrind_str = NULL; + char *transport_type = NULL; + char var_run_dir[PATH_MAX] = { + 0, + }; + int32_t workers = 0; + gf_boolean_t upgrade = _gf_false; + gf_boolean_t downgrade = _gf_false; + char *localtime_logging = NULL; + int32_t len = 0; + int op_version = 0; + +#if defined(RUN_WITH_MEMCHECK) + vgtool = _gf_memcheck; +#elif defined(RUN_WITH_DRD) + vgtool = _gf_drd; +#else + vgtool = _gf_none; +#endif + +#ifndef GF_DARWIN_HOST_OS + { + struct rlimit lim; + lim.rlim_cur = 65536; + lim.rlim_max = 65536; + + if (setrlimit(RLIMIT_NOFILE, &lim) == -1) { + gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_SET_XATTR_FAIL, + "Failed to set 'ulimit -n 65536'", NULL); + } else { + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_FILE_DESC_LIMIT_SET, + "Maximum allowed open file descriptors " + "set to 65536"); } + } +#endif - ret = glusterd_program_register (this, rpc, &gluster_handshake_prog); - if (ret) { - rpcsvc_program_unregister (rpc, &glusterd1_mop_prog); - rpcsvc_program_unregister (rpc, &gluster_pmap_prog); - rpcsvc_program_unregister (rpc, &gd_svc_cli_prog); - rpcsvc_program_unregister (rpc, &gd_svc_mgmt_prog); - goto out; + dir_data = dict_get(this->options, "run-directory"); + + if (!dir_data) { + /* Use default working dir */ + len = snprintf(rundir, PATH_MAX, "%s", DEFAULT_VAR_RUN_DIRECTORY); + } else { + len = snprintf(rundir, PATH_MAX, "%s", dir_data->data); + } + if (len < 0 || len >= PATH_MAX) + exit(2); + + dir_data = dict_get(this->options, "cluster-test-mode"); + if (!dir_data) { + /* Use default working dir */ + len = snprintf(logdir, VALID_GLUSTERD_PATHMAX, "%s", + DEFAULT_LOG_FILE_DIRECTORY); + } else { + len = snprintf(logdir, VALID_GLUSTERD_PATHMAX, "%s", dir_data->data); + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_CLUSTER_RC_ENABLE, + "cluster-test-mode is enabled logdir is %s", dir_data->data); + } + if (len < 0 || len >= PATH_MAX) + exit(2); + + ret = mkdir_p(logdir, 0777, _gf_true); + if ((ret == -1) && (EEXIST != errno)) { + gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_CREATE_DIR_FAILED, + "Unable to create log dir %s", logdir); + exit(1); + } + + dir_data = dict_get(this->options, "working-directory"); + + if (!dir_data) { + // Use default working dir + len = snprintf(workdir, PATH_MAX, "%s", GLUSTERD_DEFAULT_WORKDIR); + } else { + len = snprintf(workdir, PATH_MAX, "%s", dir_data->data); + } + if (len < 0 || len >= PATH_MAX) + exit(2); + + ret = sys_stat(workdir, &buf); + if ((ret != 0) && (ENOENT != errno)) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED, + "stat fails on %s, exiting. (errno = %d)", workdir, errno); + exit(1); + } + + if ((!ret) && (!S_ISDIR(buf.st_mode))) { + gf_msg(this->name, GF_LOG_CRITICAL, ENOENT, GD_MSG_DIR_NOT_FOUND, + "Provided working area %s is not a directory," + "exiting", + workdir); + exit(1); + } + + if ((-1 == ret) && (ENOENT == errno)) { + ret = mkdir_p(workdir, 0755, _gf_true); + + if (-1 == ret) { + gf_msg(this->name, GF_LOG_CRITICAL, errno, GD_MSG_CREATE_DIR_FAILED, + "Unable to create directory %s" + " ,errno = %d", + workdir, errno); + exit(1); } + } + + setenv("GLUSTERD_WORKDIR", workdir, 1); + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_CURR_WORK_DIR_INFO, + "Using %s as working directory", workdir); + + setenv("DEFAULT_VAR_RUN_DIRECTORY", rundir, 1); + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_CURR_WORK_DIR_INFO, + "Using %s as pid file working " + "directory", + rundir); + + ret = glusterd_find_correct_var_run_dir(this, var_run_dir); + if (ret) { + gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_VAR_RUN_DIR_FIND_FAIL, + "Unable to find " + "the correct var run dir"); + exit(1); + } + + ret = glusterd_init_var_run_dirs(this, var_run_dir, + GLUSTERD_DEFAULT_SNAPS_BRICK_DIR); + + if (ret) { + gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_CREATE_DIR_FAILED, + "Unable to create " + "snap backend folder"); + exit(1); + } + + len = snprintf(snap_mount_dir, sizeof(snap_mount_dir), "%s%s", var_run_dir, + GLUSTERD_DEFAULT_SNAPS_BRICK_DIR); + if ((len < 0) || (len >= sizeof(snap_mount_dir))) { + gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_DIR_OP_FAILED, + "Snap mount dir too long"); + exit(1); + } + + ret = mkdir_p(GLUSTER_SHARED_STORAGE_BRICK_DIR, 0755, _gf_true); + if (ret) { + gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_DIR_OP_FAILED, + "Unable to create " + "shared storage brick"); + exit(1); + } + + ret = glusterd_init_var_run_dirs(this, rundir, GLUSTERD_BITD_RUN_DIR); + if (ret) { + gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_CREATE_DIR_FAILED, + "Unable to create " + "bitd running directory"); + exit(1); + } + + ret = glusterd_init_var_run_dirs(this, rundir, GLUSTERD_SCRUB_RUN_DIR); + if (ret) { + gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_CREATE_DIR_FAILED, + "Unable to create " + "scrub running directory"); + exit(1); + } + +#ifdef BUILD_GNFS + ret = glusterd_init_var_run_dirs(this, rundir, GLUSTERD_NFS_RUN_DIR); + if (ret) { + gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_CREATE_DIR_FAILED, + "Unable to create " + "nfs running directory"); + exit(1); + } +#endif - conf = GF_CALLOC (1, sizeof (glusterd_conf_t), - gf_gld_mt_glusterd_conf_t); - GF_VALIDATE_OR_GOTO(this->name, conf, out); - INIT_LIST_HEAD (&conf->peers); - INIT_LIST_HEAD (&conf->volumes); - pthread_mutex_init (&conf->mutex, NULL); - conf->rpc = rpc; - conf->gfs_mgmt = &glusterd_glusterfs_3_1_mgmt_prog; - strncpy (conf->workdir, dirname, PATH_MAX); - - INIT_LIST_HEAD (&conf->xprt_list); - ret = glusterd_sm_tr_log_init (&conf->op_sm_log, - glusterd_op_sm_state_name_get, - glusterd_op_sm_event_name_get, - GLUSTERD_TR_LOG_SIZE); - if (ret) - goto out; + ret = glusterd_init_var_run_dirs(this, rundir, GLUSTERD_QUOTAD_RUN_DIR); + if (ret) { + gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_CREATE_DIR_FAILED, + "Unable to create " + "quota running directory"); + exit(1); + } + + snprintf(cmd_log_filename, PATH_MAX, "%s/cmd_history.log", logdir); + ret = gf_cmd_log_init(cmd_log_filename); + + if (ret == -1) { + gf_msg("this->name", GF_LOG_CRITICAL, errno, GD_MSG_FILE_OP_FAILED, + "Unable to create cmd log file %s", cmd_log_filename); + exit(1); + } + + len = snprintf(storedir, sizeof(storedir), "%s/vols", workdir); + if ((len < 0) || (len >= sizeof(storedir))) { + exit(1); + } + + ret = sys_mkdir(storedir, 0755); + + if ((-1 == ret) && (errno != EEXIST)) { + gf_msg(this->name, GF_LOG_CRITICAL, errno, GD_MSG_CREATE_DIR_FAILED, + "Unable to create volume directory %s" + " ,errno = %d", + storedir, errno); + exit(1); + } + + /*keeping individual volume pid file information in /var/run/gluster* */ + len = snprintf(storedir, sizeof(storedir), "%s/vols", rundir); + if ((len < 0) || (len >= sizeof(storedir))) { + exit(1); + } + + ret = sys_mkdir(storedir, 0755); + + if ((-1 == ret) && (errno != EEXIST)) { + gf_msg(this->name, GF_LOG_CRITICAL, errno, GD_MSG_CREATE_DIR_FAILED, + "Unable to create volume directory %s" + " ,errno = %d", + storedir, errno); + exit(1); + } + + len = snprintf(storedir, sizeof(storedir), "%s/snaps", workdir); + if ((len < 0) || (len >= sizeof(storedir))) { + exit(1); + } + + ret = sys_mkdir(storedir, 0755); + + if ((-1 == ret) && (errno != EEXIST)) { + gf_msg(this->name, GF_LOG_CRITICAL, errno, GD_MSG_CREATE_DIR_FAILED, + "Unable to create snaps directory %s" + " ,errno = %d", + storedir, errno); + exit(1); + } + + len = snprintf(storedir, sizeof(storedir), "%s/peers", workdir); + if ((len < 0) || (len >= sizeof(storedir))) { + exit(1); + } + + ret = sys_mkdir(storedir, 0755); + + if ((-1 == ret) && (errno != EEXIST)) { + gf_msg(this->name, GF_LOG_CRITICAL, errno, GD_MSG_CREATE_DIR_FAILED, + "Unable to create peers directory %s" + " ,errno = %d", + storedir, errno); + exit(1); + } + + len = snprintf(storedir, sizeof(storedir), "%s/bricks", logdir); + if ((len < 0) || (len >= sizeof(storedir))) { + exit(1); + } + + ret = sys_mkdir(storedir, 0755); + if ((-1 == ret) && (errno != EEXIST)) { + gf_msg(this->name, GF_LOG_CRITICAL, errno, GD_MSG_CREATE_DIR_FAILED, + "Unable to create logs directory %s" + " ,errno = %d", + storedir, errno); + exit(1); + } + +#ifdef BUILD_GNFS + len = snprintf(storedir, sizeof(storedir), "%s/nfs", workdir); + if ((len < 0) || (len >= sizeof(storedir))) { + exit(1); + } + ret = sys_mkdir(storedir, 0755); + if ((-1 == ret) && (errno != EEXIST)) { + gf_msg(this->name, GF_LOG_CRITICAL, errno, GD_MSG_CREATE_DIR_FAILED, + "Unable to create nfs directory %s" + " ,errno = %d", + storedir, errno); + exit(1); + } +#endif + len = snprintf(storedir, sizeof(storedir), "%s/bitd", workdir); + if ((len < 0) || (len >= sizeof(storedir))) { + exit(1); + } + ret = sys_mkdir(storedir, 0755); + if ((-1 == ret) && (errno != EEXIST)) { + gf_msg(this->name, GF_LOG_CRITICAL, errno, GD_MSG_CREATE_DIR_FAILED, + "Unable to create bitrot directory %s", storedir); + exit(1); + } + + len = snprintf(storedir, sizeof(storedir), "%s/scrub", workdir); + if ((len < 0) || (len >= sizeof(storedir))) { + exit(1); + } + ret = sys_mkdir(storedir, 0755); + if ((-1 == ret) && (errno != EEXIST)) { + gf_msg(this->name, GF_LOG_CRITICAL, errno, GD_MSG_CREATE_DIR_FAILED, + "Unable to create scrub directory %s", storedir); + exit(1); + } + + len = snprintf(storedir, sizeof(storedir), "%s/glustershd", workdir); + if ((len < 0) || (len >= sizeof(storedir))) { + exit(1); + } + ret = sys_mkdir(storedir, 0755); + if ((-1 == ret) && (errno != EEXIST)) { + gf_msg(this->name, GF_LOG_CRITICAL, errno, GD_MSG_CREATE_DIR_FAILED, + "Unable to create glustershd directory %s" + " ,errno = %d", + storedir, errno); + exit(1); + } + + len = snprintf(storedir, sizeof(storedir), "%s/quotad", workdir); + if ((len < 0) || (len >= sizeof(storedir))) { + exit(1); + } + ret = sys_mkdir(storedir, 0755); + if ((-1 == ret) && (errno != EEXIST)) { + gf_msg(this->name, GF_LOG_CRITICAL, errno, GD_MSG_CREATE_DIR_FAILED, + "Unable to create quotad directory %s" + " ,errno = %d", + storedir, errno); + exit(1); + } + + len = snprintf(storedir, sizeof(storedir), "%s/groups", workdir); + if ((len < 0) || (len >= sizeof(storedir))) { + exit(1); + } + ret = sys_mkdir(storedir, 0755); + if ((-1 == ret) && (errno != EEXIST)) { + gf_msg(this->name, GF_LOG_CRITICAL, errno, GD_MSG_CREATE_DIR_FAILED, + "Unable to create glustershd directory %s" + " ,errno = %d", + storedir, errno); + exit(1); + } + + ret = glusterd_rpcsvc_options_build(this->options); + if (ret) + goto out; + rpc = rpcsvc_init(this, this->ctx, this->options, 64); + if (rpc == NULL) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RPC_INIT_FAIL, + "failed to init rpc"); + goto out; + } + + ret = rpcsvc_register_notify(rpc, glusterd_rpcsvc_notify, this); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RPCSVC_REG_NOTIFY_RETURNED, + "rpcsvc_register_notify returned %d", ret); + goto out; + } + + /* Enable encryption for the TCP listener is management encryption is + * enabled + */ + if (this->ctx->secure_mgmt) { + ret = dict_set_str(this->options, "transport.socket.ssl-enabled", "on"); + if (ret != 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, + "failed to set ssl-enabled in dict"); + goto out; + } + /* + * This is the only place where we want secure_srvr to reflect + * the management-plane setting. + */ + this->ctx->secure_srvr = MGMT_SSL_ALWAYS; + } + + /* + * only one (at most a pair - rdma and socket) listener for + * glusterd1_mop_prog, gluster_pmap_prog and gluster_handshake_prog. + */ + + ret = dict_get_str(this->options, "transport-type", &transport_type); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Failed to get transport type"); + ret = -1; + goto out; + } + + total_transport = rpc_transport_count(transport_type); + if (total_transport <= 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RPC_TRANSPORT_COUNT_GET_FAIL, + "failed to get total number of available tranpsorts"); + ret = -1; + goto out; + } + + ret = rpcsvc_create_listeners(rpc, this->options, this->name); + if (ret < 1) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RPC_LISTENER_CREATE_FAIL, + "creation of listener failed"); + ret = -1; + goto out; + } else if (ret < total_transport) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RPC_LISTENER_CREATE_FAIL, + "creation of %d listeners failed, continuing with " + "succeeded transport", + (total_transport - ret)); + } + + for (i = 0; i < gd_inet_programs_count; i++) { + ret = glusterd_program_register(this, rpc, gd_inet_programs[i]); + if (ret) { + i--; + for (; i >= 0; i--) + rpcsvc_program_unregister(rpc, gd_inet_programs[i]); - this->private = conf; - //this->ctx->top = this; + goto out; + } + } + + /* + * Start a unix domain socket listener just for cli commands This + * should prevent ports from being wasted by being in TIMED_WAIT when + * cli commands are done continuously + */ + uds_rpc = glusterd_init_uds_listener(this); + if (uds_rpc == NULL) { + ret = -1; + goto out; + } + + conf = GF_CALLOC(1, sizeof(glusterd_conf_t), gf_gld_mt_glusterd_conf_t); + GF_VALIDATE_OR_GOTO(this->name, conf, out); + + CDS_INIT_LIST_HEAD(&conf->peers); + CDS_INIT_LIST_HEAD(&conf->volumes); + CDS_INIT_LIST_HEAD(&conf->snapshots); + CDS_INIT_LIST_HEAD(&conf->missed_snaps_list); + CDS_INIT_LIST_HEAD(&conf->brick_procs); + CDS_INIT_LIST_HEAD(&conf->shd_procs); + pthread_mutex_init(&conf->attach_lock, NULL); + pthread_mutex_init(&conf->volume_lock, NULL); + + pthread_mutex_init(&conf->mutex, NULL); + conf->rpc = rpc; + conf->uds_rpc = uds_rpc; + conf->gfs_mgmt = &gd_brick_prog; + conf->restart_shd = _gf_false; + this->private = conf; + /* conf->workdir and conf->rundir are smaller than PATH_MAX; gcc's + * snprintf checking will throw an error here if sprintf is used. + * Dueling gcc-8 and coverity, now coverity isn't smart enough to + * detect that these strncpy calls are safe. And for extra fun, + * the annotations don't do anything. */ + if (strlen(workdir) >= sizeof(conf->workdir)) { + ret = -1; + goto out; + } + /* coverity[BUFFER_SIZE_WARNING] */ + (void)strncpy(conf->workdir, workdir, sizeof(conf->workdir)); + /* separate tests because combined tests confuses gcc */ + if (strlen(rundir) >= sizeof(conf->rundir)) { + ret = -1; + goto out; + } + /* coverity[BUFFER_SIZE_WARNING] */ + (void)strncpy(conf->rundir, rundir, sizeof(conf->rundir)); + + /* coverity[BUFFER_SIZE_WARNING] */ + (void)strncpy(conf->logdir, logdir, sizeof(conf->logdir)); + + synclock_init(&conf->big_lock, SYNC_LOCK_RECURSIVE); + synccond_init(&conf->cond_restart_bricks); + synccond_init(&conf->cond_restart_shd); + synccond_init(&conf->cond_blockers); + pthread_mutex_init(&conf->xprt_lock, NULL); + INIT_LIST_HEAD(&conf->xprt_list); + pthread_mutex_init(&conf->import_volumes, NULL); + + glusterd_friend_sm_init(); + glusterd_op_sm_init(); + glusterd_opinfo_init(); + ret = glusterd_sm_tr_log_init( + &conf->op_sm_log, glusterd_op_sm_state_name_get, + glusterd_op_sm_event_name_get, GLUSTERD_TR_LOG_SIZE); + if (ret) + goto out; + + conf->base_port = GF_IANA_PRIV_PORTS_START; + if (dict_get_uint32(this->options, "base-port", &conf->base_port) == 0) { + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_DICT_SET_FAILED, + "base-port override: %d", conf->base_port); + } + conf->max_port = GF_PORT_MAX; + if (dict_get_uint32(this->options, "max-port", &conf->max_port) == 0) { + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_DICT_SET_FAILED, + "max-port override: %d", conf->max_port); + } + + conf->mgmt_v3_lock_timeout = GF_LOCK_TIMER; + if (dict_get_uint32(this->options, "lock-timer", + &conf->mgmt_v3_lock_timeout) == 0) { + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_DICT_SET_FAILED, + "lock-timer override: %d", conf->mgmt_v3_lock_timeout); + } + + /* Set option to run bricks on valgrind if enabled in glusterd.vol */ + this->ctx->cmd_args.vgtool = vgtool; + ret = dict_get_str(this->options, "run-with-valgrind", &valgrind_str); + if (ret < 0) { + gf_msg_debug(this->name, 0, "cannot get run-with-valgrind value"); + } + if (valgrind_str) { + gf_boolean_t vg = _gf_false; + + if (!strcmp(valgrind_str, "memcheck")) + this->ctx->cmd_args.vgtool = _gf_memcheck; + else if (!strcmp(valgrind_str, "drd")) + this->ctx->cmd_args.vgtool = _gf_drd; + else if (!gf_string2boolean(valgrind_str, &vg)) + this->ctx->cmd_args.vgtool = (vg ? _gf_memcheck : _gf_none); + else + gf_msg(this->name, GF_LOG_WARNING, EINVAL, GD_MSG_INVALID_ENTRY, + "run-with-valgrind is neither boolean" + " nor one of 'memcheck' or 'drd'"); + } + + /* Store ping-timeout in conf */ + ret = dict_get_int32(this->options, "ping-timeout", &conf->ping_timeout); + /* Not failing here since ping-timeout can be optional as well */ + + glusterd_mgmt_v3_lock_init(); + glusterd_mgmt_v3_lock_timer_init(); + glusterd_txn_opinfo_dict_init(); + +#ifdef BUILD_GNFS + glusterd_nfssvc_build(&conf->nfs_svc); +#endif + glusterd_quotadsvc_build(&conf->quotad_svc); + glusterd_bitdsvc_build(&conf->bitd_svc); + glusterd_scrubsvc_build(&conf->scrub_svc); + + /* Make install copies few of the hook-scripts by creating hooks + * directory. Hence purposefully not doing the check for the presence of + * hooks directory. Doing so avoids creation of complete hooks directory + * tree. + */ + ret = glusterd_hooks_create_hooks_directory(conf->workdir); + if (-1 == ret) { + gf_msg(this->name, GF_LOG_CRITICAL, errno, GD_MSG_DIR_OP_FAILED, + "Unable to create hooks directory "); + exit(1); + } + + CDS_INIT_LIST_HEAD(&conf->mount_specs); + + ret = dict_foreach(this->options, _install_mount_spec, NULL); + if (ret) + goto out; + ret = dict_get_str(this->options, "mountbroker-root", &mountbroker_root); + if (ret) + ret = 0; + else + ret = check_prepare_mountbroker_root(mountbroker_root); + if (ret) + goto out; - ret = glusterd_uuid_init (first_time); - if (ret < 0) - goto out; + ret = is_upgrade(this->options, &upgrade); + if (ret) + goto out; - ret = glusterd_restore (); - if (ret < 0) - goto out; + ret = is_downgrade(this->options, &downgrade); + if (ret) + goto out; - glusterd_friend_sm_init (); - glusterd_op_sm_init (); - glusterd_opinfo_init (); + if (!upgrade && !downgrade) { + ret = configure_syncdaemon(conf); + if (ret) + goto out; + } + + /* Restoring op-version needs to be done before initializing the + * services as glusterd_svc_init_common () invokes + * glusterd_conn_build_socket_filepath () which uses MY_UUID macro. + * MY_UUID generates a new uuid if its not been generated and writes it + * in the info file, Since the op-version is not read yet + * the default value i.e. 0 will be written for op-version and restore + * will fail. This is why restoring op-version needs to happen before + * service initialization + * */ + ret = glusterd_restore_op_version(this); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OP_VERS_RESTORE_FAIL, + "Failed to restore op_version"); + goto out; + } + + ret = glusterd_restore(); + if (ret < 0) + goto out; + + if (dict_get_str(conf->opts, GLUSTERD_LOCALTIME_LOGGING_KEY, + &localtime_logging) == 0) { + int already_enabled = gf_log_get_localtime(); + + if (strcmp(localtime_logging, "enable") == 0) { + gf_log_set_localtime(1); + if (!already_enabled) + gf_msg(this->name, GF_LOG_INFO, 0, + GD_MSG_LOCALTIME_LOGGING_ENABLE, + "localtime logging enable"); + } else if (strcmp(localtime_logging, "disable") == 0) { + gf_log_set_localtime(0); + if (already_enabled) + gf_msg(this->name, GF_LOG_INFO, 0, + GD_MSG_LOCALTIME_LOGGING_DISABLE, + "localtime logging disable"); + } + } + + GF_ATOMIC_INIT(conf->blockers, 0); + ret = glusterd_handle_upgrade_downgrade(this->options, conf, upgrade, + downgrade); + if (ret) + goto out; + + ret = glusterd_retrieve_max_op_version(this, &op_version); + /* first condition indicates file isn't present which means this code + * change is hitting for the first time or someone has deleted it from the + * backend.second condition is when max op_version differs, in both cases + * volfiles should be regenerated + */ + if (op_version == 0 || op_version != GD_OP_VERSION_MAX) { + gf_log(this->name, GF_LOG_INFO, + "Regenerating volfiles due to a max op-version mismatch or " + "glusterd.upgrade file not being present, op_version retrieved:" + "%d, max op_version: %d", + op_version, GD_OP_VERSION_MAX); + glusterd_recreate_volfiles(conf); + ret = glusterd_store_max_op_version(this); + if (ret) + gf_log(this->name, GF_LOG_ERROR, "Failed to store max op-version"); + } + + /* If the peer count is less than 2 then this would be the best time to + * spawn process/bricks that may need (re)starting since last time + * (this) glusterd was up. */ + if (glusterd_get_peers_count() < 2) + glusterd_launch_synctask(glusterd_spawn_daemons, NULL); + + ret = glusterd_hooks_spawn_worker(this); + if (ret) + goto out; + + GF_OPTION_INIT("event-threads", workers, int32, out); + if (workers > 0 && workers != conf->workers) { + conf->workers = workers; + ret = gf_event_reconfigure_threads(this->ctx->event_pool, workers); + if (ret) + goto out; + } - glusterd_restart_bricks (conf); - ret = 0; + ret = 0; out: - if (ret < 0) { - if (this->private != NULL) { - GF_FREE (this->private); - this->private = NULL; - } + if (ret < 0) { + if (this->private != NULL) { + GF_FREE(this->private); + this->private = NULL; } + } - return ret; + return ret; } - - - /* * fini - finish function for glusterd, called before * unloading gluster. @@ -523,22 +2108,37 @@ out: * */ void -fini (xlator_t *this) +fini(xlator_t *this) { - glusterd_conf_t *conf = NULL; - if (!this || !this->private) - goto out; - - conf = this->private; - if (conf->pmap) - FREE (conf->pmap); + if (!this || !this->private) + goto out; + + glusterd_stop_uds_listener(this); /*stop unix socket rpc*/ + glusterd_stop_listener(this); /*stop tcp/ip socket rpc*/ + +#if 0 + /* Running threads might be using these resourses, we have to cancel/stop + * running threads before deallocating the memory, but we don't have + * control over the running threads to do pthread_cancel(). + * So memory freeing handover to kernel. + */ + /*TODO: cancel/stop the running threads*/ + + GF_FREE (conf->uds_rpc); + GF_FREE (conf->rpc); + FREE (conf->pmap); if (conf->handle) - glusterd_store_handle_destroy (conf->handle); + gf_store_handle_destroy (conf->handle); glusterd_sm_tr_log_delete (&conf->op_sm_log); + glusterd_mgmt_v3_lock_fini (); + glusterd_mgmt_v3_lock_timer_fini (); + glusterd_txn_opinfo_dict_fini (); GF_FREE (conf); + this->private = NULL; +#endif out: - return; + return; } /* @@ -549,57 +2149,157 @@ out: * */ int -notify (xlator_t *this, int32_t event, void *data, ...) +notify(xlator_t *this, int32_t event, void *data, ...) { - int ret = 0; + int ret = 0; - switch (event) { - case GF_EVENT_POLLIN: - break; + switch (event) { + case GF_EVENT_POLLIN: + break; - case GF_EVENT_POLLERR: - break; + case GF_EVENT_POLLERR: + break; - case GF_EVENT_TRANSPORT_CLEANUP: - break; + case GF_EVENT_CLEANUP: + break; - default: - default_notify (this, event, data); - break; - - } + default: + default_notify(this, event, data); + break; + } - return ret; + return ret; } +struct xlator_fops fops; -struct xlator_fops fops = { -}; - -struct xlator_cbks cbks = { -}; +struct xlator_cbks cbks; struct xlator_dumpops dumpops = { - .priv = glusterd_priv, + .priv = glusterd_dump_priv, }; - struct volume_options options[] = { - { .key = {"working-directory"}, - .type = GF_OPTION_TYPE_PATH, - }, - { .key = {"transport-type"}, - .type = GF_OPTION_TYPE_ANY, - }, - { .key = {"transport.*"}, - .type = GF_OPTION_TYPE_ANY, - }, - { .key = {"rpc-auth.*"}, - .type = GF_OPTION_TYPE_ANY, - }, - { .key = {"rpc-auth-allow-insecure"}, - .type = GF_OPTION_TYPE_BOOL, - }, - - { .key = {NULL} }, + { + .key = {"working-directory"}, + .type = GF_OPTION_TYPE_PATH, + }, + { + .key = {"transport-type"}, + .type = GF_OPTION_TYPE_ANY, + }, + { + .key = {"transport.*"}, + .type = GF_OPTION_TYPE_ANY, + }, + { + .key = {"rpc-auth.*"}, + .type = GF_OPTION_TYPE_ANY, + }, + { + .key = {"rpc-auth-allow-insecure"}, + .type = GF_OPTION_TYPE_BOOL, + }, + { + .key = {"upgrade"}, + .type = GF_OPTION_TYPE_BOOL, + }, + { + .key = {"downgrade"}, + .type = GF_OPTION_TYPE_BOOL, + }, + { + .key = {"bind-insecure"}, + .type = GF_OPTION_TYPE_BOOL, + }, + { + .key = {"mountbroker-root"}, + .type = GF_OPTION_TYPE_PATH, + }, + { + .key = {"mountbroker.*"}, + .type = GF_OPTION_TYPE_ANY, + }, + { + .key = {"mountbroker-" GEOREP ".*"}, + .type = GF_OPTION_TYPE_ANY, + }, + { + .key = {GEOREP "-log-group"}, + .type = GF_OPTION_TYPE_ANY, + }, + { + .key = {"run-with-valgrind"}, + .type = GF_OPTION_TYPE_BOOL, + }, + {.key = {"server-quorum-type"}, + .type = GF_OPTION_TYPE_STR, + .value = {"none", "server"}, + .default_value = "none", + .description = "It can be set to none or server. When set to server, " + "this option enables the specified volume to " + "participate in the server-side quorum. " + "This feature is on the server-side i.e. in glusterd. " + "Whenever the glusterd on a machine observes that " + "the quorum is not met, it brings down the bricks to " + "prevent data split-brains. When the network " + "connections are brought back up and the quorum is " + "restored the bricks in " + "the volume are brought back up."}, + {.key = {"server-quorum-ratio"}, + .type = GF_OPTION_TYPE_PERCENT, + .description = "Sets the quorum percentage for the trusted " + "storage pool."}, + {.key = {"glusterd-sockfile"}, + .type = GF_OPTION_TYPE_PATH, + .description = "The socket file on which glusterd should listen for " + "cli requests. Default is " DEFAULT_GLUSTERD_SOCKFILE "."}, + {.key = {"base-port"}, + .type = GF_OPTION_TYPE_INT, + .description = "Sets the base port for portmap query"}, + {.key = {"max-port"}, + .type = GF_OPTION_TYPE_INT, + .max = GF_PORT_MAX, + .description = "Sets the max port for portmap query"}, + {.key = {"mgmt-v3-lock-timeout"}, + .type = GF_OPTION_TYPE_INT, + .max = 600, + .description = "Sets the mgmt-v3-lock-timeout for transactions." + "Specifes the default timeout value after which " + "lock acquired while performing transaction will " + "be released."}, + {.key = {"snap-brick-path"}, + .type = GF_OPTION_TYPE_STR, + .description = + "directory where the bricks for the snapshots will be created"}, + { + .key = {"ping-timeout"}, + .type = GF_OPTION_TYPE_TIME, + .min = 0, + .max = 300, + .default_value = TOSTRING(RPC_DEFAULT_PING_TIMEOUT), + }, + {.key = {"event-threads"}, + .type = GF_OPTION_TYPE_INT, + .min = 1, + .max = 32, + .default_value = "2", + .description = "Specifies the number of event threads to execute " + "in parallel. Larger values would help process" + " responses faster, depending on available processing" + " power. Range 1-32 threads."}, + {.key = {NULL}}, +}; + +xlator_api_t xlator_api = { + .init = init, + .fini = fini, + .mem_acct_init = mem_acct_init, + .op_version = {1}, /* Present from the initial version */ + .dumpops = &dumpops, + .fops = &fops, + .cbks = &cbks, + .options = options, + .identifier = "glusterd", + .category = GF_MAINTAINED, }; diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index 821a2ca46f2..cc4f98ecf47 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -1,510 +1,1375 @@ /* - Copyright (c) 2006-2010 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ + Copyright (c) 2006-2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ #ifndef _GLUSTERD_H_ #define _GLUSTERD_H_ -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - #include <sys/types.h> #include <dirent.h> #include <pthread.h> #include <libgen.h> -#include "uuid.h" +#include <glusterfs/compat-uuid.h> #include "rpc-clnt.h" -#include "glusterfs.h" -#include "xlator.h" -#include "logging.h" -#include "call-stub.h" -#include "fd.h" -#include "byte-order.h" +#include <glusterfs/glusterfs.h> +#include <glusterfs/xlator.h> +#include <glusterfs/logging.h> +#include <glusterfs/call-stub.h> +#include <glusterfs/byte-order.h> #include "glusterd-mem-types.h" #include "rpcsvc.h" #include "glusterd-sm.h" +#include "glusterd-snapd-svc.h" +#include "glusterd-shd-svc.h" +#include "glusterd-bitd-svc.h" #include "glusterd1-xdr.h" #include "protocol-common.h" #include "glusterd-pmap.h" +#include "cli1-xdr.h" +#include <glusterfs/syncop.h> +#include <glusterfs/store.h> +#include "glusterd-rcu.h" +#include <glusterfs/events.h> +#include "glusterd-gfproxyd-svc.h" + +#include "gd-common-utils.h" + +#define GLUSTERD_TR_LOG_SIZE 50 +#define GLUSTERD_QUORUM_TYPE_KEY "cluster.server-quorum-type" +#define GLUSTERD_QUORUM_RATIO_KEY "cluster.server-quorum-ratio" +#define GLUSTERD_GLOBAL_OPT_VERSION "global-option-version" +#define GLUSTERD_GLOBAL_OP_VERSION_KEY "cluster.op-version" +#define GLUSTERD_MAX_OP_VERSION_KEY "cluster.max-op-version" +#define GLUSTERD_COMMON_PEM_PUB_FILE "/geo-replication/common_secret.pem.pub" +#define GEO_CONF_MAX_OPT_VALS 6 +#define GLUSTERD_CREATE_HOOK_SCRIPT \ + "/hooks/1/gsync-create/post/" \ + "S56glusterd-geo-rep-create-post.sh" +#define GLUSTERD_SHRD_STRG_HOOK_SCRIPT \ + "/hooks/1/set/post/" \ + "S32gluster_enable_shared_storage.sh" +#define GLUSTER_SHARED_STORAGE "gluster_shared_storage" +#define GLUSTERD_SHARED_STORAGE_KEY "cluster.enable-shared-storage" +#define GLUSTERD_BRICK_MULTIPLEX_KEY "cluster.brick-multiplex" +#define GLUSTERD_VOL_CNT_PER_THRD "glusterd.vol_count_per_thread" +#define GLUSTERD_BRICKMUX_LIMIT_KEY "cluster.max-bricks-per-process" +#define GLUSTERD_BRICKMUX_LIMIT_DFLT_VALUE "250" +#define GLUSTERD_VOL_CNT_PER_THRD_DEFAULT_VALUE "100" +#define GLUSTERD_LOCALTIME_LOGGING_KEY "cluster.localtime-logging" +#define GLUSTERD_DAEMON_LOG_LEVEL_KEY "cluster.daemon-log-level" + +#define GANESHA_HA_CONF CONFDIR "/ganesha-ha.conf" +#define GANESHA_EXPORT_DIRECTORY CONFDIR "/exports" + +#define GLUSTERD_SNAPS_MAX_HARD_LIMIT 256 +#define GLUSTERD_SNAPS_DEF_SOFT_LIMIT_PERCENT 90 +#define GLUSTERD_SNAPS_MAX_SOFT_LIMIT_PERCENT 100 +#define GLUSTERD_SERVER_QUORUM "server" +#define STATUS_STRLEN 128 + +#define FMTSTR_CHECK_VOL_EXISTS "Volume %s does not exist" +#define FMTSTR_RESOLVE_BRICK "Could not find peer on which brick %s:%s resides" + +#define LOGSTR_FOUND_BRICK "Found brick %s:%s in volume %s" +#define LOGSTR_BUILD_PAYLOAD "Failed to build payload for operation 'Volume %s'" +#define LOGSTR_STAGE_FAIL "Staging of operation 'Volume %s' failed on %s %s %s" +#define LOGSTR_COMMIT_FAIL "Commit of operation 'Volume %s' failed on %s %s %s" + +#define OPERRSTR_BUILD_PAYLOAD \ + "Failed to build payload. Please check the log " \ + "file for more details." +#define OPERRSTR_STAGE_FAIL \ + "Staging failed on %s. Please check the log file " \ + "for more details." +#define OPERRSTR_COMMIT_FAIL \ + "Commit failed on %s. Please check the log file " \ + "for more details." +struct glusterd_volinfo_; +typedef struct glusterd_volinfo_ glusterd_volinfo_t; +struct glusterd_snap_; +typedef struct glusterd_snap_ glusterd_snap_t; -#define GLUSTERD_MAX_VOLUME_NAME 1000 -#define DEFAULT_LOG_FILE_DIRECTORY DATADIR "/log/glusterfs" -#define GLUSTERD_TR_LOG_SIZE 50 -#define GLUSTERD_NAME "glusterd" - - +/* For every new feature please add respective enum of new feature + * at the end of latest enum (just before the GD_OP_MAX enum) + */ typedef enum glusterd_op_ { - GD_OP_NONE = 0, - GD_OP_CREATE_VOLUME, - GD_OP_START_BRICK, - GD_OP_STOP_BRICK, - GD_OP_DELETE_VOLUME, - GD_OP_START_VOLUME, - GD_OP_STOP_VOLUME, - GD_OP_RENAME_VOLUME, - GD_OP_DEFRAG_VOLUME, - GD_OP_ADD_BRICK, - GD_OP_REMOVE_BRICK, - GD_OP_REPLACE_BRICK, - GD_OP_SET_VOLUME, - GD_OP_RESET_VOLUME, - GD_OP_SYNC_VOLUME, - GD_OP_LOG_FILENAME, - GD_OP_LOG_LOCATE, - GD_OP_LOG_ROTATE, - GD_OP_GSYNC_SET, - GD_OP_PROFILE_VOLUME, - GD_OP_QUOTA, - GD_OP_MAX, + GD_OP_NONE = 0, + GD_OP_CREATE_VOLUME, + GD_OP_START_BRICK, + GD_OP_STOP_BRICK, + GD_OP_DELETE_VOLUME, + GD_OP_START_VOLUME, + GD_OP_STOP_VOLUME, + GD_OP_DEFRAG_VOLUME, + GD_OP_ADD_BRICK, + GD_OP_REMOVE_BRICK, + GD_OP_REPLACE_BRICK, + GD_OP_SET_VOLUME, + GD_OP_RESET_VOLUME, + GD_OP_SYNC_VOLUME, + GD_OP_LOG_ROTATE, + GD_OP_GSYNC_SET, + GD_OP_PROFILE_VOLUME, + GD_OP_QUOTA, + GD_OP_STATUS_VOLUME, + GD_OP_REBALANCE, + GD_OP_HEAL_VOLUME, + GD_OP_STATEDUMP_VOLUME, + GD_OP_LIST_VOLUME, + GD_OP_CLEARLOCKS_VOLUME, + GD_OP_DEFRAG_BRICK_VOLUME, + GD_OP_COPY_FILE, + GD_OP_SYS_EXEC, + GD_OP_GSYNC_CREATE, + GD_OP_SNAP, + GD_OP_BARRIER, + GD_OP_GANESHA, /* obsolete */ + GD_OP_BITROT, + GD_OP_DETACH_TIER, + GD_OP_TIER_MIGRATE, + GD_OP_SCRUB_STATUS, + GD_OP_SCRUB_ONDEMAND, + GD_OP_RESET_BRICK, + GD_OP_MAX_OPVERSION, + GD_OP_TIER_START_STOP, + GD_OP_TIER_STATUS, + GD_OP_DETACH_TIER_STATUS, + GD_OP_DETACH_NOT_STARTED, + GD_OP_REMOVE_TIER_BRICK, + GD_OP_ADD_TIER_BRICK, + GD_OP_MAX, } glusterd_op_t; +extern const char *gd_op_list[]; -struct glusterd_store_iter_ { - int fd; - FILE *file; - char filepath[PATH_MAX]; +struct glusterd_volgen { + dict_t *dict; }; -typedef struct glusterd_store_iter_ glusterd_store_iter_t; +/* Keeping all the paths required in glusterd would + cause many buffer overflow errors, as we append + more defined paths to the brick path, workdir etc etc. + It is better to keep limit on this as lesser value, + so we get an option to continue with all functionalities. + For example, snapname max would be appended on brick-path and + would be stored on workdir... all of these being PATH_MAX, is + not an ideal situation for success. */ +#define VALID_GLUSTERD_PATHMAX (PATH_MAX - (256 + 64)) -struct glusterd_volgen { - dict_t *dict; -}; typedef struct { - struct _volfile_ctx *volfile; - pthread_mutex_t mutex; - struct list_head peers; -// struct list_head pending_peers; - gf_boolean_t verify_volfile_checksum; - gf_boolean_t trace; - uuid_t uuid; - char workdir[PATH_MAX]; - rpcsvc_t *rpc; - struct pmap_registry *pmap; - struct list_head volumes; - struct list_head xprt_list; - glusterd_store_handle_t *handle; - gf_timer_t *timer; - glusterd_sm_tr_log_t op_sm_log; - struct rpc_clnt_program *gfs_mgmt; + struct _volfile_ctx *volfile; + pthread_mutex_t mutex; + struct cds_list_head peers; + uuid_t uuid; + rpcsvc_t *rpc; + glusterd_svc_t nfs_svc; + glusterd_svc_t bitd_svc; + glusterd_svc_t scrub_svc; + glusterd_svc_t quotad_svc; + struct pmap_registry *pmap; + struct cds_list_head volumes; + struct cds_list_head snapshots; /*List of snap volumes */ + struct cds_list_head brick_procs; /* List of brick processes */ + struct cds_list_head shd_procs; /* List of shd processes */ + pthread_mutex_t xprt_lock; + struct list_head xprt_list; + pthread_mutex_t import_volumes; + gf_store_handle_t *handle; + gf_timer_t *timer; + glusterd_sm_tr_log_t op_sm_log; + struct rpc_clnt_program *gfs_mgmt; + dict_t *mgmt_v3_lock; /* Dict for saving + * mgmt_v3 locks */ + dict_t *glusterd_txn_opinfo; /* Dict for saving + * transaction opinfos */ + uuid_t global_txn_id; /* To be used in + * heterogeneous + * cluster with no + * transaction ids */ + + dict_t *mgmt_v3_lock_timer; + struct cds_list_head mount_specs; + pthread_t brick_thread; + void *hooks_priv; + + xlator_t *xl; /* Should be set to 'THIS' before creating thread */ + /* need for proper handshake_t */ + int op_version; /* Starts with 1 for 3.3.0 */ + gf_boolean_t pending_quorum_action; + gf_boolean_t verify_volfile_checksum; + gf_boolean_t trace; + gf_boolean_t restart_done; + dict_t *opts; + synclock_t big_lock; + synccond_t cond_restart_bricks; + synccond_t cond_restart_shd; + synccond_t cond_blockers; + rpcsvc_t *uds_rpc; /* RPCSVC for the unix domain socket */ + uint32_t base_port; + uint32_t max_port; + char *snap_bricks_directory; + gf_store_handle_t *missed_snaps_list_shandle; + struct cds_list_head missed_snaps_list; + int ping_timeout; + uint32_t generation; + int32_t workers; + uint32_t mgmt_v3_lock_timeout; + gf_atomic_t blockers; + pthread_mutex_t attach_lock; /* Lock can be per process or a common one */ + pthread_mutex_t volume_lock; /* We release the big_lock from lot of places + which might lead the modification of volinfo + list. + */ + gf_atomic_t thread_count; + gf_boolean_t restart_bricks; + gf_boolean_t restart_shd; /* This flag prevents running two shd manager + simultaneously + */ + char workdir[VALID_GLUSTERD_PATHMAX]; + char rundir[VALID_GLUSTERD_PATHMAX]; + char logdir[VALID_GLUSTERD_PATHMAX]; } glusterd_conf_t; +typedef struct glusterd_add_dict_args { + xlator_t *this; + dict_t *voldict; + int start; + int end; +} glusterd_add_dict_args_t; + +typedef struct glusterd_friend_synctask_args { + char *dict_buf; + u_int dictlen; +} glusterd_friend_synctask_args_t; + typedef enum gf_brick_status { - GF_BRICK_STOPPED, - GF_BRICK_STARTED, + GF_BRICK_STOPPED, + GF_BRICK_STARTED, + GF_BRICK_STOPPING, + GF_BRICK_STARTING } gf_brick_status_t; +typedef struct glusterd_brickinfo glusterd_brickinfo_t; + +struct glusterd_brick_proc { + int port; + uint32_t brick_count; + struct cds_list_head brick_proc_list; + struct cds_list_head bricks; +}; + +typedef struct glusterd_brick_proc glusterd_brick_proc_t; + struct glusterd_brickinfo { - char hostname[1024]; - char path[PATH_MAX]; - struct list_head brick_list; - uuid_t uuid; - int port; - char *logfile; - gf_boolean_t signed_in; - glusterd_store_handle_t *shandle; - gf_brick_status_t status; - struct rpc_clnt *rpc; - gf_timer_t *timer; + struct cds_list_head brick_list; + uuid_t uuid; + int port; + int rdma_port; + char *logfile; + gf_store_handle_t *shandle; + struct rpc_clnt *rpc; + int decommissioned; + gf_brick_status_t status; + int32_t snap_status; + /* + * The group is used to identify which bricks are part of the same + * replica set during brick-volfile generation, so that JBR volfiles + * can "cross-connect" the bricks to one another. It is also used by + * AFR to load the arbiter xlator in the appropriate brick in case of + * a replica 3 volume with arbiter enabled. + */ + uint16_t group; + gf_boolean_t port_registered; + gf_boolean_t start_triggered; + + /* Below are used for handling the case of multiple bricks sharing + the backend filesystem */ + uint64_t statfs_fsid; + pthread_mutex_t restart_mutex; + glusterd_brick_proc_t *brick_proc; /* Information regarding mux bricks */ + struct cds_list_head mux_bricks; /* List to store the bricks in brick_proc*/ + uint32_t fs_share_count; + char hostname[NAME_MAX]; + char path[VALID_GLUSTERD_PATHMAX]; + char real_path[VALID_GLUSTERD_PATHMAX]; + char device_path[VALID_GLUSTERD_PATHMAX]; + char mount_dir[VALID_GLUSTERD_PATHMAX]; + char brick_id[1024]; /*Client xlator name, AFR changelog name*/ + char fstype[NAME_MAX]; /* Brick file-system type */ + char mnt_opts[1024]; /* Brick mount options */ + char vg[PATH_MAX]; /* FIXME: Use max size for length of vg */ }; -typedef struct glusterd_brickinfo glusterd_brickinfo_t; +struct glusterd_gfproxyd_info { + char *logfile; + short port; +}; struct gf_defrag_brickinfo_ { - char *name; - int files; - int size; + char *name; + int files; + int size; }; -typedef enum gf_defrag_status_ { - GF_DEFRAG_STATUS_NOT_STARTED, - GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED, - GF_DEFRAG_STATUS_MIGRATE_DATA_STARTED, - GF_DEFRAG_STATUS_STOPED, - GF_DEFRAG_STATUS_COMPLETE, - GF_DEFRAG_STATUS_FAILED, - GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE, - GF_DEFRAG_STATUS_MIGRATE_DATA_COMPLETE, -} gf_defrag_status_t; +typedef int (*defrag_cbk_fn_t)(glusterd_volinfo_t *volinfo, + gf_defrag_status_t status); struct glusterd_defrag_info_ { - uint64_t total_files; - uint64_t total_data; - uint64_t num_files_lookedup; - gf_lock_t lock; - int cmd; - pthread_t th; - char mount[1024]; - char databuf[131072]; - struct gf_defrag_brickinfo_ *bricks; /* volinfo->brick_count */ + uint64_t total_files; + uint64_t total_data; + uint64_t num_files_lookedup; + uint64_t total_failures; + gf_lock_t lock; + int cmd; + uint32_t connected; + pthread_t th; + struct rpc_clnt *rpc; + struct gf_defrag_brickinfo_ *bricks; /* volinfo->brick_count */ + defrag_cbk_fn_t cbk_fn; + gf_defrag_status_t defrag_status; + char mount[1024]; }; - typedef struct glusterd_defrag_info_ glusterd_defrag_info_t; typedef enum gf_transport_type_ { - GF_TRANSPORT_TCP, //DEFAULT - GF_TRANSPORT_RDMA, - GF_TRANSPORT_BOTH_TCP_RDMA, + GF_TRANSPORT_TCP, // DEFAULT + GF_TRANSPORT_RDMA, + GF_TRANSPORT_BOTH_TCP_RDMA, } gf_transport_type; - typedef enum gf_rb_status_ { - GF_RB_STATUS_NONE, - GF_RB_STATUS_STARTED, - GF_RB_STATUS_PAUSED, + GF_RB_STATUS_NONE, + GF_RB_STATUS_STARTED, + GF_RB_STATUS_PAUSED, } gf_rb_status_t; +struct _auth { + char *username; + char *password; +}; + +typedef struct _auth auth_t; + +/* Capabilities of xlator */ +#define CAPS_BD 0x00000001 +#define CAPS_THIN 0x00000002 +#define CAPS_OFFLOAD_COPY 0x00000004 +#define CAPS_OFFLOAD_SNAPSHOT 0x00000008 +#define CAPS_OFFLOAD_ZERO 0x00000020 + +struct glusterd_bitrot_scrub_ { + char *scrub_state; + char *scrub_impact; + char *scrub_freq; + uint64_t scrubbed_files; + uint64_t unsigned_files; + uint64_t last_scrub_time; + uint64_t scrub_duration; + uint64_t error_count; +}; + +typedef struct glusterd_bitrot_scrub_ glusterd_bitrot_scrub_t; + +struct glusterd_rebalance_ { + uint64_t rebalance_files; + uint64_t rebalance_data; + uint64_t lookedup_files; + uint64_t skipped_files; + uint64_t rebalance_failures; + glusterd_defrag_info_t *defrag; + gf_cli_defrag_type defrag_cmd; + gf_defrag_status_t defrag_status; + uuid_t rebalance_id; + double rebalance_time; + uint64_t time_left; + dict_t *dict; /* Dict to store misc information + * like list of bricks being removed */ + glusterd_op_t op; + uint32_t commit_hash; +}; + +typedef struct glusterd_rebalance_ glusterd_rebalance_t; + +struct glusterd_replace_brick_ { + glusterd_brickinfo_t *src_brick; + glusterd_brickinfo_t *dst_brick; +}; + +typedef struct glusterd_replace_brick_ glusterd_replace_brick_t; + +typedef enum gd_quorum_status_ { + NOT_APPLICABLE_QUORUM, // Does not follow quorum + MEETS_QUORUM, // Follows quorum and meets. + DOESNT_MEET_QUORUM, // Follows quorum and does not meet. +} gd_quorum_status_t; + struct glusterd_volinfo_ { - char volname[GLUSTERD_MAX_VOLUME_NAME]; - int type; - int brick_count; - struct list_head vol_list; - struct list_head bricks; - glusterd_volume_status status; - int sub_count; - int port; - glusterd_store_handle_t *shandle; - - /* Defrag/rebalance related */ - gf_defrag_status_t defrag_status; - uint64_t rebalance_files; - uint64_t rebalance_data; - uint64_t lookedup_files; - glusterd_defrag_info_t *defrag; - - /* Replace brick status */ - gf_rb_status_t rb_status; - glusterd_brickinfo_t *src_brick; - glusterd_brickinfo_t *dst_brick; - - int version; - uint32_t cksum; - gf_transport_type transport_type; - - dict_t *dict; - - uuid_t volume_id; - char *logdir; + gf_lock_t lock; + glusterd_snap_t *snapshot; + uuid_t restored_from_snap; + int type; + int brick_count; + uint64_t snap_count; + uint64_t snap_max_hard_limit; + struct cds_list_head vol_list; + /* In case of a snap volume + i.e (is_snap_volume == TRUE) this + is linked to glusterd_snap_t->volumes. + In case of a non-snap volume, this is + linked to glusterd_conf_t->volumes */ + struct cds_list_head snapvol_list; + /* This is a current pointer for + glusterd_volinfo_t->snap_volumes */ + struct cds_list_head bricks; + struct cds_list_head ta_bricks; + struct cds_list_head snap_volumes; + /* TODO : Need to remove this, as this + * is already part of snapshot object. + */ + glusterd_volume_status status; + int sub_count; /* backward compatibility */ + int stripe_count; + int replica_count; + int arbiter_count; + int thin_arbiter_count; + int disperse_count; + int redundancy_count; + int subvol_count; /* Number of subvolumes in a + distribute volume */ + int dist_leaf_count; /* Number of bricks in one + distribute subvolume */ + int port; + gf_store_handle_t *shandle; + gf_store_handle_t *node_state_shandle; + gf_store_handle_t *quota_conf_shandle; + + /* Defrag/rebalance related */ + glusterd_rebalance_t rebal; + + /* Replace brick status */ + glusterd_replace_brick_t rep_brick; + + /* Bitrot scrub status*/ + glusterd_bitrot_scrub_t bitrot_scrub; + + int version; + uint32_t quota_conf_version; + uint32_t cksum; + uint32_t quota_conf_cksum; + + dict_t *dict; + + uuid_t volume_id; + auth_t auth; + char *logdir; + + dict_t *gsync_slaves; + dict_t *gsync_active_slaves; + + xlator_t *xl; + int decommission_in_progress; + + int op_version; + int client_op_version; + int32_t quota_xattr_version; + pthread_mutex_t reflock; + int refcnt; + gd_quorum_status_t quorum_status; + + glusterd_snapdsvc_t snapd; + glusterd_shdsvc_t shd; + glusterd_gfproxydsvc_t gfproxyd; + pthread_mutex_t store_volinfo_lock; /* acquire lock for + * updating the volinfo + */ + gf_transport_type transport_type; + gf_boolean_t is_snap_volume; + gf_boolean_t memory_accounting; + gf_boolean_t stage_deleted; /* volume has passed staging + * for delete operation + */ + char parent_volname[GD_VOLUME_NAME_MAX]; + /* In case of a snap volume + i.e (is_snap_volume == TRUE) this + field will contain the name of + the volume which is snapped. In + case of a non-snap volume, this + field will be initialized as N/A */ + char volname[NAME_MAX + 1]; + /* NAME_MAX + 1 will be equal to + * GD_VOLUME_NAME_MAX + 5.(also to + * GD_VOLUME_NAME_MAX_TIER). An extra 5 + * bytes are added to GD_VOLUME_NAME_MAX + * because, as part of the tiering + * volfile generation code, we are + * temporarily appending either "-hot" + * or "-cold" */ + gf_atomic_t volpeerupdate; + /* Flag to check about volume has received updates + from peer + */ }; -typedef struct glusterd_volinfo_ glusterd_volinfo_t; +typedef enum gd_snap_status_ { + GD_SNAP_STATUS_NONE, + GD_SNAP_STATUS_INIT, + GD_SNAP_STATUS_IN_USE, + GD_SNAP_STATUS_DECOMMISSION, + GD_SNAP_STATUS_UNDER_RESTORE, + GD_SNAP_STATUS_RESTORED, +} gd_snap_status_t; + +struct glusterd_snap_ { + gf_lock_t lock; + struct cds_list_head volumes; + struct cds_list_head snap_list; + char *description; + uuid_t snap_id; + time_t time_stamp; + gf_store_handle_t *shandle; + gd_snap_status_t snap_status; + gf_boolean_t snap_restored; + char snapname[GLUSTERD_MAX_SNAP_NAME]; +}; + +typedef struct glusterd_snap_op_ { + char *snap_vol_id; + char *brick_path; + struct cds_list_head snap_ops_list; + int32_t brick_num; + int32_t op; + int32_t status; +} glusterd_snap_op_t; + +typedef struct glusterd_missed_snap_ { + char *node_uuid; + char *snap_uuid; + struct cds_list_head missed_snaps; + struct cds_list_head snap_ops; +} glusterd_missed_snap_info; + +typedef enum gd_node_type_ { + GD_NODE_NONE, + GD_NODE_BRICK, + GD_NODE_SHD, + GD_NODE_REBALANCE, + GD_NODE_NFS, + GD_NODE_QUOTAD, + GD_NODE_SNAPD, + GD_NODE_BITD, + GD_NODE_SCRUB, + GD_NODE_TIERD +} gd_node_type; + +typedef enum missed_snap_stat { + GD_MISSED_SNAP_NONE, + GD_MISSED_SNAP_PENDING, + GD_MISSED_SNAP_DONE, +} missed_snap_stat; typedef struct glusterd_pending_node_ { - void *node; - struct list_head list; + struct cds_list_head list; + void *node; + gd_node_type type; + int32_t index; } glusterd_pending_node_t; +struct gsync_config_opt_vals_ { + char *op_name; + char *values[GEO_CONF_MAX_OPT_VALS]; + int no_of_pos_vals; + gf_boolean_t case_sensitive; +}; + enum glusterd_op_ret { - GLUSTERD_CONNECTION_AWAITED = 100, + GLUSTERD_CONNECTION_AWAITED = 100, }; enum glusterd_vol_comp_status_ { - GLUSTERD_VOL_COMP_NONE = 0, - GLUSTERD_VOL_COMP_SCS = 1, - GLUSTERD_VOL_COMP_UPDATE_REQ, - GLUSTERD_VOL_COMP_RJT, + GLUSTERD_VOL_COMP_NONE = 0, + GLUSTERD_VOL_COMP_SCS = 1, + GLUSTERD_VOL_COMP_UPDATE_REQ, + GLUSTERD_VOL_COMP_RJT, }; -#define GLUSTERD_DEFAULT_WORKDIR "/etc/glusterd" -#define GLUSTERD_DEFAULT_PORT GF_DEFAULT_BASE_PORT -#define GLUSTERD_INFO_FILE "glusterd.info" +typedef struct addrinfo_list { + struct cds_list_head list; + struct addrinfo *info; +} addrinfo_list_t; + +typedef enum { + GF_AI_COMPARE_NO_MATCH = 0, + GF_AI_COMPARE_MATCH = 1, + GF_AI_COMPARE_ERROR = 2 +} gf_ai_compare_t; + +#define GLUSTERD_DEFAULT_PORT GF_DEFAULT_BASE_PORT +#define GLUSTERD_INFO_FILE "glusterd.info" +#define GLUSTERD_UPGRADE_FILE \ + "glusterd.upgrade" /* zero byte file to detect a need for regenerating \ + volfiles in container mode */ +#define GLUSTERD_VOLUME_QUOTA_CONFIG "quota.conf" #define GLUSTERD_VOLUME_DIR_PREFIX "vols" #define GLUSTERD_PEER_DIR_PREFIX "peers" #define GLUSTERD_VOLUME_INFO_FILE "info" +#define GLUSTERD_VOLUME_SNAPD_INFO_FILE "snapd.info" +#define GLUSTERD_SNAP_INFO_FILE "info" +#define GLUSTERD_VOLUME_RBSTATE_FILE "rbstate" #define GLUSTERD_BRICK_INFO_DIR "bricks" #define GLUSTERD_CKSUM_FILE "cksum" - -/*All definitions related to replace brick */ -#define RB_PUMP_START_CMD "trusted.glusterfs.pump.start" -#define RB_PUMP_PAUSE_CMD "trusted.glusterfs.pump.pause" -#define RB_PUMP_ABORT_CMD "trusted.glusterfs.pump.abort" -#define RB_PUMP_STATUS_CMD "trusted.glusterfs.pump.status" -#define RB_CLIENT_MOUNTPOINT "rb_mount" -#define RB_CLIENTVOL_FILENAME "rb_client.vol" -#define RB_DSTBRICK_PIDFILE "rb_dst_brick.pid" +#define GLUSTERD_VOL_QUOTA_CKSUM_FILE "quota.cksum" +#define GLUSTERD_TRASH "trash" +#define GLUSTERD_NODE_STATE_FILE "node_state.info" +#define GLUSTERD_MISSED_SNAPS_LIST_FILE "missed_snaps_list" +#define GLUSTERD_VOL_SNAP_DIR_PREFIX "snaps" + +#define GLUSTERD_DEFAULT_SNAPS_BRICK_DIR "/gluster/snaps" +#define GLUSTERD_BITD_RUN_DIR "/bitd" +#define GLUSTERD_SCRUB_RUN_DIR "/scrub" +#define GLUSTERD_NFS_RUN_DIR "/nfs" +#define GLUSTERD_QUOTAD_RUN_DIR "/quotad" +#define GLUSTER_SHARED_STORAGE_BRICK_DIR GLUSTERD_DEFAULT_WORKDIR "/ss_brick" +#define GLUSTERD_VAR_RUN_DIR "/var/run" +#define GLUSTERD_RUN_DIR "/run" + +/* definitions related to replace brick */ +#define RB_CLIENT_MOUNTPOINT "rb_mount" +#define RB_CLIENTVOL_FILENAME "rb_client.vol" +#define RB_DSTBRICK_PIDFILE "rb_dst_brick.pid" #define RB_DSTBRICKVOL_FILENAME "rb_dst_brick.vol" +#define RB_PUMP_DEF_ARG "default" #define GLUSTERD_UUID_LEN 50 -typedef ssize_t (*gd_serialize_t) (struct iovec outmsg, void *args); - -#define GLUSTERD_GET_NFS_DIR(path, priv) \ - do { \ - snprintf (path, PATH_MAX, "%s/nfs", priv->workdir);\ - } while (0); \ - -#define GLUSTERD_GET_VOLUME_DIR(path, volinfo, priv) \ - snprintf (path, PATH_MAX, "%s/vols/%s", priv->workdir,\ - volinfo->volname); - -#define GLUSTERD_GET_BRICK_DIR(path, volinfo, priv) \ - snprintf (path, PATH_MAX, "%s/%s/%s/%s", priv->workdir,\ - GLUSTERD_VOLUME_DIR_PREFIX, volinfo->volname, \ - GLUSTERD_BRICK_INFO_DIR); - -#define GLUSTERD_GET_NFS_PIDFILE(pidfile) \ - snprintf (pidfile, PATH_MAX, "%s/nfs/run/nfs.pid", \ - priv->workdir); \ - -#define GLUSTERD_REMOVE_SLASH_FROM_PATH(path,string) do { \ - int i = 0; \ - for (i = 1; i < strlen (path); i++) { \ - string[i-1] = path[i]; \ - if (string[i-1] == '/') \ - string[i-1] = '-'; \ - } \ - } while (0) - -#define GLUSTERD_GET_BRICK_PIDFILE(pidfile,volpath,hostname,brickpath) { \ - char exp_path[PATH_MAX] = {0,}; \ - GLUSTERD_REMOVE_SLASH_FROM_PATH (brickpath, exp_path); \ - snprintf (pidfile, PATH_MAX, "%s/run/%s-%s.pid", \ - volpath, hostname, exp_path); \ - } - -#define GLUSTERD_STACK_DESTROY(frame) do {\ - void *__local = NULL; \ - xlator_t *__xl = NULL; \ - __xl = frame->this; \ - __local = frame->local; \ - frame->local = NULL; \ - STACK_DESTROY (frame->root);\ - } while (0) +typedef ssize_t (*gd_serialize_t)(struct iovec outmsg, void *args); + +#define GLUSTERD_GET_VOLUME_DIR(path, volinfo, priv) \ + do { \ + int32_t _vol_dir_len; \ + if (volinfo->is_snap_volume) { \ + _vol_dir_len = snprintf( \ + path, PATH_MAX, "%s/snaps/%s/%s", priv->workdir, \ + volinfo->snapshot->snapname, volinfo->volname); \ + } else { \ + _vol_dir_len = snprintf(path, PATH_MAX, "%s/vols/%s", \ + priv->workdir, volinfo->volname); \ + } \ + if ((_vol_dir_len < 0) || (_vol_dir_len >= PATH_MAX)) { \ + path[0] = 0; \ + } \ + } while (0) + +#define GLUSTERD_GET_DEFRAG_DIR(path, volinfo, priv) \ + do { \ + char vol_path[PATH_MAX]; \ + int32_t _defrag_dir_len; \ + GLUSTERD_GET_VOLUME_DIR(vol_path, volinfo, priv); \ + _defrag_dir_len = snprintf(path, PATH_MAX, "%s/%s", vol_path, \ + "rebalance"); \ + if ((_defrag_dir_len < 0) || (_defrag_dir_len >= PATH_MAX)) { \ + path[0] = 0; \ + } \ + } while (0) + +#define GLUSTERD_GET_DEFRAG_PID_FILE(path, volinfo, priv) \ + do { \ + char defrag_path[PATH_MAX]; \ + int32_t _defrag_pidfile_len; \ + GLUSTERD_GET_DEFRAG_DIR(defrag_path, volinfo, priv); \ + _defrag_pidfile_len = snprintf(path, PATH_MAX, "%s/%s.pid", \ + defrag_path, uuid_utoa(MY_UUID)); \ + if ((_defrag_pidfile_len < 0) || (_defrag_pidfile_len >= PATH_MAX)) { \ + path[0] = 0; \ + } \ + } while (0) + +#define GLUSTERD_GET_SHD_RUNDIR(path, volinfo, priv) \ + do { \ + int32_t _shd_dir_len; \ + _shd_dir_len = snprintf(path, PATH_MAX, "%s/shd/%s", priv->rundir, \ + volinfo->volname); \ + if ((_shd_dir_len < 0) || (_shd_dir_len >= PATH_MAX)) { \ + path[0] = 0; \ + } \ + } while (0) + +#define GLUSTERD_GET_VOLUME_PID_DIR(path, volinfo, priv) \ + do { \ + int32_t _vol_pid_len; \ + if (volinfo->is_snap_volume) { \ + _vol_pid_len = snprintf(path, PATH_MAX, "%s/snaps/%s/%s", \ + priv->rundir, volinfo->snapshot->snapname, \ + volinfo->volname); \ + } else { \ + _vol_pid_len = snprintf(path, PATH_MAX, "%s/vols/%s", \ + priv->rundir, volinfo->volname); \ + } \ + if ((_vol_pid_len < 0) || (_vol_pid_len >= PATH_MAX)) { \ + path[0] = 0; \ + } \ + } while (0) + +#define GLUSTERD_GET_SNAP_GEO_REP_DIR(path, snap, priv) \ + do { \ + int32_t _snap_geo_len; \ + _snap_geo_len = snprintf(path, PATH_MAX, "%s/snaps/%s/%s", \ + priv->workdir, snap->snapname, GEOREP); \ + if ((_snap_geo_len < 0) || (_snap_geo_len >= PATH_MAX)) { \ + path[0] = 0; \ + } \ + } while (0) + +#define GLUSTERD_GET_QUOTA_LIMIT_MOUNT_PATH(abspath, volname, path) \ + do { \ + snprintf(abspath, sizeof(abspath) - 1, \ + DEFAULT_VAR_RUN_DIRECTORY "/%s_quota_limit%s", volname, \ + path); \ + } while (0) + +#define GLUSTERD_REMOVE_SLASH_FROM_PATH(path, string) \ + do { \ + int i = 0; \ + for (i = 1; i < strlen(path); i++) { \ + string[i - 1] = path[i]; \ + if (string[i - 1] == '/' && (i != strlen(path) - 1)) \ + string[i - 1] = '-'; \ + } \ + } while (0) + +#define GLUSTERD_GET_BRICK_PIDFILE(pidfile, volinfo, brickinfo, priv) \ + do { \ + char exp_path[PATH_MAX] = { \ + 0, \ + }; \ + char volpath[PATH_MAX] = { \ + 0, \ + }; \ + int32_t _brick_pid_len = 0; \ + GLUSTERD_GET_VOLUME_PID_DIR(volpath, volinfo, priv); \ + GLUSTERD_REMOVE_SLASH_FROM_PATH(brickinfo->path, exp_path); \ + _brick_pid_len = snprintf(pidfile, PATH_MAX, "%s/%s-%s.pid", volpath, \ + brickinfo->hostname, exp_path); \ + if ((_brick_pid_len < 0) || (_brick_pid_len >= PATH_MAX)) { \ + pidfile[0] = 0; \ + } \ + } while (0) + +#define RCU_READ_LOCK \ + pthread_mutex_lock(&(THIS->ctx)->cleanup_lock); \ + { \ + rcu_read_lock(); \ + } \ + pthread_mutex_unlock(&(THIS->ctx)->cleanup_lock); + +#define RCU_READ_UNLOCK \ + pthread_mutex_lock(&(THIS->ctx)->cleanup_lock); \ + { \ + rcu_read_unlock(); \ + } \ + pthread_mutex_unlock(&(THIS->ctx)->cleanup_lock); + +#define GLUSTERD_DUMP_PEERS(head, member, xpeers) \ + do { \ + glusterd_peerinfo_t *_peerinfo = NULL; \ + int index = 1; \ + char *key = NULL; \ + \ + key = xpeers ? "glusterd.xaction_peer" : "glusterd.peer"; \ + \ + RCU_READ_LOCK; \ + cds_list_for_each_entry_rcu(_peerinfo, head, member) \ + { \ + glusterd_dump_peer(_peerinfo, key, index, xpeers); \ + if (!xpeers) \ + glusterd_dump_peer_rpcstat(_peerinfo, key, index); \ + index++; \ + } \ + RCU_READ_UNLOCK; \ + \ + } while (0) + +int +glusterd_uuid_init(); + +int +glusterd_uuid_generate_save(); + +#define MY_UUID (__glusterd_uuid()) + +static inline unsigned char * +__glusterd_uuid() +{ + glusterd_conf_t *priv = THIS->private; + + if (gf_uuid_is_null(priv->uuid)) + glusterd_uuid_init(); + return &priv->uuid[0]; +} + +int +glusterd_big_locked_notify(struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data, + rpc_clnt_notify_t notify_fn); + +int +glusterd_big_locked_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe, fop_cbk_fn_t fn); + +int +glusterd_big_locked_handler(rpcsvc_request_t *req, rpcsvc_actor actor_fn); int32_t -glusterd_brick_from_brickinfo (glusterd_brickinfo_t *brickinfo, - char **new_brick); +glusterd_brick_from_brickinfo(glusterd_brickinfo_t *brickinfo, + char **new_brick); int -glusterd_probe_begin (rpcsvc_request_t *req, const char *hoststr, int port); +glusterd_probe_begin(rpcsvc_request_t *req, const char *hoststr, int port, + dict_t *dict, int *op_errno); int -glusterd_xfer_friend_add_resp (rpcsvc_request_t *req, char *hostname, - int port, int32_t op_ret, int32_t op_errno); +glusterd_xfer_friend_add_resp(rpcsvc_request_t *req, char *myhostname, + char *remote_hostname, int port, int32_t op_ret, + int32_t op_errno); int -glusterd_friend_find (uuid_t uuid, char *hostname, - glusterd_peerinfo_t **peerinfo); +glusterd_friend_add(const char *hoststr, int port, + glusterd_friend_sm_state_t state, uuid_t *uuid, + glusterd_peerinfo_t **friend, gf_boolean_t restore, + glusterd_peerctx_args_t *args); int -glusterd_friend_add (const char *hoststr, int port, - glusterd_friend_sm_state_t state, - uuid_t *uuid, struct rpc_clnt *rpc, - glusterd_peerinfo_t **friend, - gf_boolean_t restore, - glusterd_peerctx_args_t *args); +glusterd_friend_add_from_peerinfo(glusterd_peerinfo_t *friend, + gf_boolean_t restore, + glusterd_peerctx_args_t *args); +int +glusterd_friend_rpc_create(xlator_t *this, glusterd_peerinfo_t *peerinfo, + glusterd_peerctx_args_t *args); +int +glusterd_friend_remove(uuid_t uuid, char *hostname); int -glusterd_friend_remove (uuid_t uuid, char *hostname); +glusterd_op_lock_send_resp(rpcsvc_request_t *req, int32_t status); int -glusterd_op_lock_send_resp (rpcsvc_request_t *req, int32_t status); +glusterd_op_unlock_send_resp(rpcsvc_request_t *req, int32_t status); int -glusterd_op_unlock_send_resp (rpcsvc_request_t *req, int32_t status); +glusterd_op_mgmt_v3_lock_send_resp(rpcsvc_request_t *req, uuid_t *txn_id, + int32_t status); int -glusterd_op_stage_send_resp (rpcsvc_request_t *req, - int32_t op, int32_t status, - char *op_errstr, dict_t *rsp_dict); +glusterd_op_mgmt_v3_unlock_send_resp(rpcsvc_request_t *req, uuid_t *txn_id, + int32_t status); int -glusterd_op_commmit_send_resp (rpcsvc_request_t *req, - int32_t op, int32_t status); +glusterd_op_stage_send_resp(rpcsvc_request_t *req, int32_t op, int32_t status, + char *op_errstr, dict_t *rsp_dict); + +int +glusterd_op_commmit_send_resp(rpcsvc_request_t *req, int32_t op, + int32_t status); int32_t -glusterd_create_volume (rpcsvc_request_t *req, dict_t *dict); +glusterd_create_volume(rpcsvc_request_t *req, dict_t *dict); int -glusterd_handle_incoming_friend_req (rpcsvc_request_t *req); +glusterd_handle_incoming_friend_req(rpcsvc_request_t *req); int -glusterd_handle_probe_query (rpcsvc_request_t *req); +glusterd_handle_probe_query(rpcsvc_request_t *req); int -glusterd_handle_cluster_lock (rpcsvc_request_t *req); +glusterd_handle_cluster_lock(rpcsvc_request_t *req); int -glusterd_handle_cluster_unlock (rpcsvc_request_t *req); +glusterd_handle_cluster_unlock(rpcsvc_request_t *req); int -glusterd_handle_stage_op (rpcsvc_request_t *req); +glusterd_handle_stage_op(rpcsvc_request_t *req); int -glusterd_handle_commit_op (rpcsvc_request_t *req); +glusterd_handle_commit_op(rpcsvc_request_t *req); int -glusterd_handle_cli_probe (rpcsvc_request_t *req); +glusterd_handle_cli_probe(rpcsvc_request_t *req); int -glusterd_handle_create_volume (rpcsvc_request_t *req); +glusterd_handle_create_volume(rpcsvc_request_t *req); int -glusterd_handle_defrag_volume (rpcsvc_request_t *req); +glusterd_handle_defrag_volume(rpcsvc_request_t *req); int -glusterd_handle_defrag_volume_v2 (rpcsvc_request_t *req); +glusterd_handle_defrag_volume_v2(rpcsvc_request_t *req); int -glusterd_xfer_cli_probe_resp (rpcsvc_request_t *req, int32_t op_ret, - int32_t op_errno, char *hostname, int port); +glusterd_xfer_cli_probe_resp(rpcsvc_request_t *req, int32_t op_ret, + int32_t op_errno, char *op_errstr, char *hostname, + int port, dict_t *dict); int -glusterd_op_commit_send_resp (rpcsvc_request_t *req, - int32_t op, int32_t status, char *op_errstr, - dict_t *rsp_dict); +glusterd_op_commit_send_resp(rpcsvc_request_t *req, int32_t op, int32_t status, + char *op_errstr, dict_t *rsp_dict); int -glusterd_xfer_friend_remove_resp (rpcsvc_request_t *req, char *hostname, int port); +glusterd_xfer_friend_remove_resp(rpcsvc_request_t *req, char *hostname, + int port); int -glusterd_deprobe_begin (rpcsvc_request_t *req, const char *hoststr, int port, - uuid_t uuid); +glusterd_deprobe_begin(rpcsvc_request_t *req, const char *hoststr, int port, + uuid_t uuid, dict_t *dict, int *op_errno); int -glusterd_handle_cli_deprobe (rpcsvc_request_t *req); +glusterd_handle_cli_deprobe(rpcsvc_request_t *req); int -glusterd_handle_incoming_unfriend_req (rpcsvc_request_t *req); +glusterd_handle_incoming_unfriend_req(rpcsvc_request_t *req); int32_t -glusterd_list_friends (rpcsvc_request_t *req, dict_t *dict, int32_t flags); +glusterd_list_friends(rpcsvc_request_t *req, dict_t *dict, int32_t flags); int -glusterd_handle_cli_list_friends (rpcsvc_request_t *req); +glusterd_handle_cli_list_friends(rpcsvc_request_t *req); int -glusterd_handle_cli_start_volume (rpcsvc_request_t *req); +glusterd_handle_cli_start_volume(rpcsvc_request_t *req); int -glusterd_handle_friend_update (rpcsvc_request_t *req); +glusterd_handle_friend_update(rpcsvc_request_t *req); int -glusterd_handle_cli_stop_volume (rpcsvc_request_t *req); +glusterd_handle_cli_stop_volume(rpcsvc_request_t *req); int -glusterd_handle_cli_delete_volume (rpcsvc_request_t *req); +glusterd_handle_cli_delete_volume(rpcsvc_request_t *req); int -glusterd_handle_cli_get_volume (rpcsvc_request_t *req); +glusterd_handle_cli_get_volume(rpcsvc_request_t *req); int32_t -glusterd_get_volumes (rpcsvc_request_t *req, dict_t *dict, int32_t flags); +glusterd_get_volumes(rpcsvc_request_t *req, dict_t *dict, int32_t flags); + +int +glusterd_handle_add_brick(rpcsvc_request_t *req); + +int +glusterd_handle_tier(rpcsvc_request_t *req); + +int +glusterd_handle_attach_tier(rpcsvc_request_t *req); int -glusterd_handle_add_brick (rpcsvc_request_t *req); +glusterd_handle_detach_tier(rpcsvc_request_t *req); int -glusterd_handle_replace_brick (rpcsvc_request_t *req); +glusterd_handle_add_tier_brick(rpcsvc_request_t *req); int -glusterd_handle_remove_brick (rpcsvc_request_t *req); +glusterd_handle_replace_brick(rpcsvc_request_t *req); int -glusterd_handle_log_filename (rpcsvc_request_t *req); +glusterd_handle_remove_brick(rpcsvc_request_t *req); + int -glusterd_handle_log_locate (rpcsvc_request_t *req); +glusterd_handle_log_rotate(rpcsvc_request_t *req); + int -glusterd_handle_log_rotate (rpcsvc_request_t *req); +glusterd_handle_sync_volume(rpcsvc_request_t *req); int -glusterd_handle_sync_volume (rpcsvc_request_t *req); +glusterd_defrag_start_validate(glusterd_volinfo_t *volinfo, char *op_errstr, + size_t len, glusterd_op_t op); + +int +glusterd_rebalance_cmd_validate(int cmd, char *volname, + glusterd_volinfo_t **volinfo, char *op_errstr, + size_t len); int32_t -glusterd_log_filename (rpcsvc_request_t *req, dict_t *dict); +glusterd_log_filename(rpcsvc_request_t *req, dict_t *dict); int32_t -glusterd_log_rotate (rpcsvc_request_t *req, dict_t *dict); +glusterd_log_rotate(rpcsvc_request_t *req, dict_t *dict); int32_t -glusterd_remove_brick (rpcsvc_request_t *req, dict_t *dict); +glusterd_remove_brick(rpcsvc_request_t *req, dict_t *dict); int32_t -glusterd_set_volume (rpcsvc_request_t *req, dict_t *dict); +glusterd_set_volume(rpcsvc_request_t *req, dict_t *dict); int32_t -glusterd_reset_volume (rpcsvc_request_t *req, dict_t *dict); +glusterd_reset_volume(rpcsvc_request_t *req, dict_t *dict); int32_t -glusterd_gsync_set (rpcsvc_request_t *req, dict_t *dict); +glusterd_gsync_set(rpcsvc_request_t *req, dict_t *dict); int32_t -glusterd_quota (rpcsvc_request_t *req, dict_t *dict); +glusterd_quota(rpcsvc_request_t *req, dict_t *dict); + +int +glusterd_handle_set_volume(rpcsvc_request_t *req); int -glusterd_handle_set_volume (rpcsvc_request_t *req); +glusterd_handle_reset_volume(rpcsvc_request_t *req); int -glusterd_handle_reset_volume (rpcsvc_request_t *req); +glusterd_handle_copy_file(rpcsvc_request_t *req); int -glusterd_handle_gsync_set (rpcsvc_request_t *req); +glusterd_handle_sys_exec(rpcsvc_request_t *req); int -glusterd_handle_quota (rpcsvc_request_t *req); +glusterd_handle_gsync_set(rpcsvc_request_t *req); int -glusterd_handle_fsm_log (rpcsvc_request_t *req); +glusterd_handle_quota(rpcsvc_request_t *req); int -glusterd_xfer_cli_deprobe_resp (rpcsvc_request_t *req, int32_t op_ret, - int32_t op_errno, char *hostname); +glusterd_handle_bitrot(rpcsvc_request_t *req); int -glusterd_fetchspec_notify (xlator_t *this); +glusterd_handle_fsm_log(rpcsvc_request_t *req); int -glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo, - dict_t *volumes, int count); +glusterd_handle_reset_brick(rpcsvc_request_t *req); int -glusterd_restart_bricks (glusterd_conf_t *conf); +glusterd_xfer_cli_deprobe_resp(rpcsvc_request_t *req, int32_t op_ret, + int32_t op_errno, char *op_errstr, + char *hostname, dict_t *dict); + +int +glusterd_client_statedump_submit_req(char *volname, char *target_ip, char *pid); + +int +glusterd_fetchspec_notify(xlator_t *this); + +int +glusterd_fetchsnap_notify(xlator_t *this); + +int +glusterd_add_volume_detail_to_dict(glusterd_volinfo_t *volinfo, dict_t *volumes, + int count); + +int +glusterd_restart_bricks(void *opaque); int32_t -glusterd_volume_txn (rpcsvc_request_t *req, char *volname, int flags, - glusterd_op_t op); +glusterd_volume_txn(rpcsvc_request_t *req, char *volname, int flags, + glusterd_op_t op); int -glusterd_peer_handshake (xlator_t *this, struct rpc_clnt *rpc, - glusterd_peerctx_t *peerctx); +glusterd_peer_dump_version(xlator_t *this, struct rpc_clnt *rpc, + glusterd_peerctx_t *peerctx); int -glusterd_validate_reconfopts (glusterd_volinfo_t *volinfo, dict_t *val_dict, char **op_errstr); +glusterd_validate_reconfopts(glusterd_volinfo_t *volinfo, dict_t *val_dict, + char **op_errstr); int -glusterd_handle_cli_profile_volume (rpcsvc_request_t *req); +glusterd_handle_cli_profile_volume(rpcsvc_request_t *req); + +int +glusterd_handle_getwd(rpcsvc_request_t *req); int32_t -glusterd_set_volume (rpcsvc_request_t *req, dict_t *dict); +glusterd_set_volume(rpcsvc_request_t *req, dict_t *dict); int -glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata, - rpc_clnt_event_t event, - void *data); +glusterd_peer_rpc_notify(struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data); int -glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata, +glusterd_brick_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event, void *data); int -glusterd_rpc_create (struct rpc_clnt **rpc, dict_t *options, - rpc_clnt_notify_t notify_fn, void *notify_data); +glusterd_rpc_create(struct rpc_clnt **rpc, dict_t *options, + rpc_clnt_notify_t notify_fn, void *notify_data, + gf_boolean_t force); + +/* handler functions */ +int32_t +glusterd_op_begin(rpcsvc_request_t *req, glusterd_op_t op, void *ctx, + char *err_str, size_t size); + +/* removed other definitions as they have been defined elsewhere in this file*/ + +int +glusterd_handle_cli_statedump_volume(rpcsvc_request_t *req); +int +glusterd_handle_cli_clearlocks_volume(rpcsvc_request_t *req); + +int +glusterd_handle_defrag_start(glusterd_volinfo_t *volinfo, char *op_errstr, + size_t len, int cmd, defrag_cbk_fn_t cbk, + glusterd_op_t op); +int +glusterd_rebalance_rpc_create(glusterd_volinfo_t *volinfo); + +int +glusterd_rebalance_defrag_init(glusterd_volinfo_t *volinfo, + defrag_cbk_fn_t cbk); + +int +glusterd_handle_cli_heal_volume(rpcsvc_request_t *req); + +int +glusterd_handle_cli_list_volume(rpcsvc_request_t *req); + +int +glusterd_handle_snapshot(rpcsvc_request_t *req); + +/* op-sm functions */ +int +glusterd_op_stage_heal_volume(dict_t *dict, char **op_errstr); +int +glusterd_op_heal_volume(dict_t *dict, char **op_errstr); +int +glusterd_op_stage_gsync_set(dict_t *dict, char **op_errstr); +int +glusterd_op_gsync_set(dict_t *dict, char **op_errstr, dict_t *rsp_dict); +int +glusterd_op_stage_copy_file(dict_t *dict, char **op_errstr); +int +glusterd_op_copy_file(dict_t *dict, char **op_errstr); +int +glusterd_op_stage_sys_exec(dict_t *dict, char **op_errstr); +int +glusterd_op_sys_exec(dict_t *dict, char **op_errstr, dict_t *rsp_dict); +int +glusterd_op_stage_gsync_create(dict_t *dict, char **op_errstr); +int +glusterd_op_gsync_create(dict_t *dict, char **op_errstr, dict_t *rsp_dict); +int +glusterd_op_quota(dict_t *dict, char **op_errstr, dict_t *rsp_dict); + +int +glusterd_op_bitrot(dict_t *dict, char **op_errstr, dict_t *rsp_dict); + +int +glusterd_op_stage_quota(dict_t *dict, char **op_errstr, dict_t *rsp_dict); + +int +glusterd_op_stage_bitrot(dict_t *dict, char **op_errstr, dict_t *rsp_dict); + +int +glusterd_op_stage_replace_brick(dict_t *dict, char **op_errstr, + dict_t *rsp_dict); +int +glusterd_op_replace_brick(dict_t *dict, dict_t *rsp_dict); +int +glusterd_op_log_rotate(dict_t *dict); +int +glusterd_op_stage_log_rotate(dict_t *dict, char **op_errstr); +int +glusterd_op_stage_create_volume(dict_t *dict, char **op_errstr, + dict_t *rsp_dict); +int +glusterd_op_stage_start_volume(dict_t *dict, char **op_errstr, + dict_t *rsp_dict); +int +glusterd_op_stage_stop_volume(dict_t *dict, char **op_errstr); +int +glusterd_op_stage_delete_volume(dict_t *dict, char **op_errstr); +int +glusterd_op_create_volume(dict_t *dict, char **op_errstr); +int +glusterd_op_start_volume(dict_t *dict, char **op_errstr); +int +glusterd_op_stop_volume(dict_t *dict); +int +glusterd_op_delete_volume(dict_t *dict); +int +glusterd_handle_ganesha_op(dict_t *dict, char **op_errstr, char *key, + char *value); +int +glusterd_check_ganesha_cmd(char *key, char *value, char **errstr, dict_t *dict); +int +glusterd_op_stage_set_ganesha(dict_t *dict, char **op_errstr); +int +glusterd_op_set_ganesha(dict_t *dict, char **errstr); +int +ganesha_manage_export(dict_t *dict, char *value, + gf_boolean_t update_cache_invalidation, char **op_errstr); +int +gd_ganesha_send_dbus(char *volname, char *value); +gf_boolean_t +glusterd_is_ganesha_cluster(); +gf_boolean_t +glusterd_check_ganesha_export(glusterd_volinfo_t *volinfo); +int +stop_ganesha(char **op_errstr); +int +tear_down_cluster(gf_boolean_t run_teardown); +int +manage_export_config(char *volname, char *value, char **op_errstr); + +int +glusterd_op_add_brick(dict_t *dict, char **op_errstr); +int +glusterd_op_remove_brick(dict_t *dict, char **op_errstr); +int +glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict); +int +glusterd_op_stage_remove_brick(dict_t *dict, char **op_errstr); + +int +glusterd_set_rebalance_id_for_remove_brick(dict_t *req_dict, dict_t *rsp_dict); + +int +glusterd_set_rebalance_id_in_rsp_dict(dict_t *req_dict, dict_t *rsp_dict); + +int +glusterd_mgmt_v3_op_stage_rebalance(dict_t *dict, char **op_errstr); + +int +glusterd_mgmt_v3_op_rebalance(dict_t *dict, char **op_errstr, dict_t *rsp_dict); + +int +glusterd_op_stage_rebalance(dict_t *dict, char **op_errstr); +int +glusterd_op_rebalance(dict_t *dict, char **op_errstr, dict_t *rsp_dict); + +int +glusterd_op_stage_statedump_volume(dict_t *dict, char **op_errstr); +int +glusterd_op_statedump_volume(dict_t *dict, char **op_errstr); + +int +glusterd_op_stage_clearlocks_volume(dict_t *dict, char **op_errstr); +int +glusterd_op_clearlocks_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict); + +int +glusterd_op_stage_barrier(dict_t *dict, char **op_errstr); +int +glusterd_op_barrier(dict_t *dict, char **op_errstr); + +/* misc */ +int +glusterd_op_perform_remove_brick(glusterd_volinfo_t *volinfo, char *brick, + int force, int *need_migrate); +int +glusterd_op_stop_volume_args_get(dict_t *dict, char **volname, int *flags); +int +glusterd_op_statedump_volume_args_get(dict_t *dict, char **volname, + char **options, int *option_cnt); + +int +glusterd_op_gsync_args_get(dict_t *dict, char **op_errstr, char **master, + char **slave, char **host_uuid); + +int +glusterd_op_get_max_opversion(char **op_errstr, dict_t *rsp_dict); + +int +glusterd_start_volume(glusterd_volinfo_t *volinfo, int flags, + gf_boolean_t wait); + +int +glusterd_stop_volume(glusterd_volinfo_t *volinfo); + +/* Synctask part */ +int32_t +glusterd_op_begin_synctask(rpcsvc_request_t *req, glusterd_op_t op, void *dict); +int32_t +glusterd_defrag_event_notify_handle(dict_t *dict); + +int32_t +glusterd_txn_opinfo_dict_init(); + +void +glusterd_txn_opinfo_dict_fini(); + +void +glusterd_txn_opinfo_init(); + +/* snapshot */ +glusterd_snap_t * +glusterd_new_snap_object(); + +int32_t +glusterd_list_add_snapvol(glusterd_volinfo_t *origin_vol, + glusterd_volinfo_t *snap_vol); + +glusterd_snap_t * +glusterd_remove_snap_by_id(uuid_t snap_id); + +glusterd_snap_t * +glusterd_remove_snap_by_name(char *snap_name); + +glusterd_snap_t * +glusterd_find_snap_by_name(char *snap_name); + +glusterd_snap_t * +glusterd_find_snap_by_id(uuid_t snap_id); + +int +glusterd_snapshot_prevalidate(dict_t *dict, char **op_errstr, dict_t *rsp_dict, + uint32_t *op_errno); +int +glusterd_snapshot_brickop(dict_t *dict, char **op_errstr, dict_t *rsp_dict); +int +glusterd_snapshot(dict_t *dict, char **op_errstr, uint32_t *op_errno, + dict_t *rsp_dict); +int +glusterd_snapshot_postvalidate(dict_t *dict, int32_t op_ret, char **op_errstr, + dict_t *rsp_dict); +char * +glusterd_build_snap_device_path(char *device, char *snapname, + int32_t brick_count); + +int32_t +glusterd_snap_remove(dict_t *rsp_dict, glusterd_snap_t *snap, + gf_boolean_t remove_lvm, gf_boolean_t force, + gf_boolean_t is_clone); +int32_t +glusterd_snapshot_cleanup(dict_t *dict, char **op_errstr, dict_t *rsp_dict); + +int32_t +glusterd_add_missed_snaps_to_list(dict_t *dict, int32_t missed_snap_count); + +int32_t +glusterd_add_new_entry_to_list(char *missed_info, char *snap_vol_id, + int32_t brick_num, char *brick_path, + int32_t snap_op, int32_t snap_status); + +int +glusterd_snapshot_revert_restore_from_snap(glusterd_snap_t *snap); + +int +glusterd_add_brick_status_to_dict(dict_t *dict, glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo, + char *key_prefix); + +int32_t +glusterd_handle_snap_limit(dict_t *dict, dict_t *rsp_dict); + +gf_boolean_t +glusterd_should_i_stop_bitd(); + +int +glusterd_remove_brick_migrate_cbk(glusterd_volinfo_t *volinfo, + gf_defrag_status_t status); +int +__glusterd_handle_reset_brick(rpcsvc_request_t *req); + +int +glusterd_options_init(xlator_t *this); + +int32_t +glusterd_recreate_volfiles(glusterd_conf_t *conf); + +void +glusterd_add_peers_to_auth_list(char *volname); + +int +glusterd_replace_old_auth_allow_list(char *volname); #endif |
