diff options
Diffstat (limited to 'geo-replication')
43 files changed, 8488 insertions, 3255 deletions
diff --git a/geo-replication/Makefile.am b/geo-replication/Makefile.am index 556951d9fb7..591b23d0eaf 100644 --- a/geo-replication/Makefile.am +++ b/geo-replication/Makefile.am @@ -1,3 +1,8 @@ SUBDIRS = syncdaemon src CLEANFILES = + +EXTRA_DIST = gsyncd.conf.in + +gsyncdconfdir = $(sysconfdir)/glusterfs/ +gsyncdconf_DATA = gsyncd.conf diff --git a/geo-replication/gsyncd.conf.in b/geo-replication/gsyncd.conf.in new file mode 100644 index 00000000000..9688c79fab7 --- /dev/null +++ b/geo-replication/gsyncd.conf.in @@ -0,0 +1,349 @@ +[__meta__] +version = 4.0 + +[master-bricks] +configurable=false + +[slave-bricks] +configurable=false + +[master-volume-id] +configurable=false + +[slave-volume-id] +configurable=false + +[master-replica-count] +configurable=false +type=int +value=1 + +[master-disperse-count] +configurable=false +type=int +value=1 + +[master-distribution-count] +configurable=false +type=int +value=1 + +[glusterd-workdir] +value = @GLUSTERD_WORKDIR@ + +[gluster-logdir] +value = /var/log/glusterfs + +[gluster-rundir] +value = /var/run/gluster + +[gsyncd-miscdir] +value = /var/lib/misc/gluster/gsyncd + +[stime-xattr-prefix] +value= + +[checkpoint] +value=0 +help=Set Checkpoint +validation=unixtime +type=int + +[gluster-cli-options] +value= +help=Gluster CLI Options + +[pid-file] +value=${gluster_rundir}/gsyncd-${master}-${primary_slave_host}-${slavevol}.pid +configurable=false +template = true +help=PID file path + +[state-file] +value=${glusterd_workdir}/geo-replication/${master}_${primary_slave_host}_${slavevol}/monitor.status +configurable=false +template=true +help=Status File path + +[georep-session-working-dir] +value=${glusterd_workdir}/geo-replication/${master}_${primary_slave_host}_${slavevol}/ +template=true +help=Session Working directory +configurable=false + +[access-mount] +value=false +type=bool +validation=bool +help=Do not lazy unmount the master volume. This allows admin to access the mount for debugging. + +[slave-access-mount] +value=false +type=bool +validation=bool +help=Do not lazy unmount the slave volume. This allows admin to access the mount for debugging. + +[isolated-slaves] +value= +help=List of Slave nodes which are isolated + +[changelog-batch-size] +# Max size of Changelogs to process per batch, Changelogs Processing is +# not limited by the number of changelogs but instead based on +# size of the changelog file, One sample changelog file size was 145408 +# with ~1000 CREATE and ~1000 DATA. 5 such files in one batch is 727040 +# If geo-rep worker crashes while processing a batch, it has to retry only +# that batch since stime will get updated after each batch. +value=727040 +help=Max size of Changelogs to process per batch. +type=int + +[slave-timeout] +value=120 +type=int +help=Timeout in seconds for Slave Gsyncd. If no activity from master for this timeout, Slave gsyncd will be disconnected. Set Timeout to zero to skip this check. + +[connection-timeout] +value=60 +type=int +help=Timeout for mounts + +[replica-failover-interval] +value=1 +type=int +help=Minimum time interval in seconds for passive worker to become Active + +[changelog-archive-format] +value=%Y%m +help=Processed changelogs will be archived in working directory. Pattern for archive file + +[use-meta-volume] +value=false +type=bool +help=Use this to set Active Passive mode to meta-volume. + +[meta-volume-mnt] +value=/run/gluster/shared_storage +help=Meta Volume or Shared Volume mount path + +[allow-network] +value= + +[change-interval] +value=5 +type=int + +[sync-method] +value=rsync +help=Sync method for data sync. Available methods are tar over ssh and rsync. Default is rsync. +validation=choice +allowed_values=tarssh,rsync + +[remote-gsyncd] +value = +help=If SSH keys are not secured with gsyncd prefix then use this configuration to set the actual path of gsyncd(Usually /usr/libexec/glusterfs/gsyncd) + +[gluster-command-dir] +value=@SBIN_DIR@ +help=Directory where Gluster binaries exist on master + +[slave-gluster-command-dir] +value=@SBIN_DIR@ +help=Directory where Gluster binaries exist on slave + +[gluster-params] +value = aux-gfid-mount acl +help=Parameters for Gluster Geo-rep mount in Master + +[slave-gluster-params] +value = aux-gfid-mount acl +help=Parameters for Gluster Geo-rep mount in Slave + +[ignore-deletes] +value = false +type=bool +help=Do not sync deletes in Slave + +[special-sync-mode] +# tunables for failover/failback mechanism: +# None - gsyncd behaves as normal +# blind - gsyncd works with xtime pairs to identify +# candidates for synchronization +# wrapup - same as normal mode but does not assign +# xtimes to orphaned files +# see crawl() for usage of the above tunables +value = +help= + +[gfid-conflict-resolution] +value = true +validation=bool +type=bool +help=Disables automatic gfid conflict resolution while syncing + +[working-dir] +value = ${gsyncd_miscdir}/${master}_${primary_slave_host}_${slavevol}/ +template=true +configurable=false +help=Working directory for storing Changelogs + +[change-detector] +value=changelog +help=Change detector +validation=choice +allowed_values=changelog,xsync + +[cli-log-file] +value=${gluster_logdir}/geo-replication/cli.log +template=true +configurable=false + +[cli-log-level] +value=INFO +help=Set CLI Log Level +validation=choice +allowed_values=ERROR,INFO,WARNING,DEBUG + +[log-file] +value=${gluster_logdir}/geo-replication/${master}_${primary_slave_host}_${slavevol}/gsyncd.log +configurable=false +template=true + +[changelog-log-file] +value=${gluster_logdir}/geo-replication/${master}_${primary_slave_host}_${slavevol}/changes-${local_id}.log +configurable=false +template=true + +[gluster-log-file] +value=${gluster_logdir}/geo-replication/${master}_${primary_slave_host}_${slavevol}/mnt-${local_id}.log +template=true +configurable=false + +[slave-log-file] +value=${gluster_logdir}/geo-replication-slaves/${master}_${primary_slave_host}_${slavevol}/gsyncd.log +template=true +configurable=false + +[slave-gluster-log-file] +value=${gluster_logdir}/geo-replication-slaves/${master}_${primary_slave_host}_${slavevol}/mnt-${master_node}-${master_brick_id}.log +template=true +configurable=false + +[slave-gluster-log-file-mbr] +value=${gluster_logdir}/geo-replication-slaves/${master}_${primary_slave_host}_${slavevol}/mnt-mbr-${master_node}-${master_brick_id}.log +template=true +configurable=false + +[log-level] +value=INFO +help=Set Log Level +validation=choice +allowed_values=ERROR,INFO,WARNING,DEBUG + +[gluster-log-level] +value=INFO +help=Set Gluster mount Log Level +validation=choice +allowed_values=ERROR,INFO,WARNING,DEBUG + +[changelog-log-level] +value=INFO +help=Set Changelog Log Level +validation=choice +allowed_values=ERROR,INFO,WARNING,DEBUG + +[slave-log-level] +value=INFO +help=Set Slave Gsyncd Log Level +validation=choice +allowed_values=ERROR,INFO,WARNING,DEBUG + +[slave-gluster-log-level] +value=INFO +help=Set Slave Gluster mount Log Level +validation=choice +allowed_values=ERROR,INFO,WARNING,DEBUG + +[ssh-port] +value=22 +validation=minmax +min=1 +max=65535 +help=Set SSH port +type=int + +[ssh-command] +value=ssh +help=Set ssh binary path +validation=execpath + +[tar-command] +value=tar +help=Set tar command path +validation=execpath + +[ssh-options] +value = -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i ${glusterd_workdir}/geo-replication/secret.pem +template=true + +[ssh-options-tar] +value = -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i ${glusterd_workdir}/geo-replication/tar_ssh.pem +template=true + +[gluster-command] +value=gluster +help=Set gluster binary path +validation=execpath + +[sync-jobs] +value=3 +help=Number of Syncer jobs +validation=minmax +min=1 +max=100 +type=int + +[rsync-command] +value=rsync +help=Set rsync command path +validation=execpath + +[rsync-options] +value= + +[rsync-ssh-options] +value= + +[rsync-opt-ignore-missing-args] +value=true +type=bool + +[rsync-opt-existing] +value=true +type=bool + +[log-rsync-performance] +value=false +help=Log Rsync performance +validation=bool +type=bool + +[use-rsync-xattrs] +value=false +type=bool + +[sync-xattrs] +value=true +type=bool + +[sync-acls] +value=true +type=bool + +[max-rsync-retries] +value=10 +type=int + +[state_socket_unencoded] +# Unused, For backward compatibility +value= diff --git a/geo-replication/setup.py b/geo-replication/setup.py new file mode 100644 index 00000000000..0eae469d2d6 --- /dev/null +++ b/geo-replication/setup.py @@ -0,0 +1,32 @@ +# +# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com> +# This file is part of GlusterFS. +# +# This file is licensed to you under your choice of the GNU Lesser +# General Public License, version 3 or any later version (LGPLv3 or +# later), or the GNU General Public License, version 2 (GPLv2), in all +# cases as published by the Free Software Foundation. +# + +""" +This setup.py only used to run tests, since geo-replication will +be installed in /usr/local/libexec/glusterfs or /usr/libexec/glusterfs +""" +from setuptools import setup + +name = 'syncdaemon' + +setup( + name=name, + version="", + description='GlusterFS Geo Replication', + license='GPLV2 and LGPLV3+', + author='Red Hat, Inc.', + author_email='gluster-devel@gluster.org', + url='http://www.gluster.org', + packages=[name, ], + test_suite='nose.collector', + install_requires=[], + scripts=[], + entry_points={}, +) diff --git a/geo-replication/src/Makefile.am b/geo-replication/src/Makefile.am index 324d8869f8b..9937a0bd026 100644 --- a/geo-replication/src/Makefile.am +++ b/geo-replication/src/Makefile.am @@ -1,33 +1,48 @@ +gsyncddir = $(GLUSTERFS_LIBEXECDIR) -gsyncddir = $(libexecdir)/glusterfs - -gsyncd_SCRIPTS = gverify.sh peer_add_secret_pub peer_gsec_create +gsyncd_SCRIPTS = gverify.sh peer_gsec_create \ + set_geo_rep_pem_keys.sh peer_mountbroker peer_mountbroker.py \ + peer_georep-sshkey.py # peer_gsec_create and peer_add_secret_pub are not added to # EXTRA_DIST as it's derived from a .in file -EXTRA_DIST = gverify.sh +EXTRA_DIST = gverify.sh set_geo_rep_pem_keys.sh peer_mountbroker.py.in \ + peer_georep-sshkey.py.in gsyncd_PROGRAMS = gsyncd gsyncd_SOURCES = gsyncd.c procdiggy.c -gsyncd_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ - $(GF_GLUSTERFS_LIBS) +gsyncd_LDADD = $(GF_LDADD) $(top_builddir)/libglusterfs/src/libglusterfs.la gsyncd_LDFLAGS = $(GF_LDFLAGS) noinst_HEADERS = procdiggy.h -AM_CPPFLAGS = $(GF_CPPFLAGS) \ - -I$(top_srcdir)/libglusterfs/src\ - -DGSYNCD_PREFIX=\"$(libexecdir)/glusterfs\"\ - -DUSE_LIBGLUSTERFS\ +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ + -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \ + -DGSYNCD_PREFIX=\"$(GLUSTERFS_LIBEXECDIR)\" -DUSE_LIBGLUSTERFS \ -DSBIN_DIR=\"$(sbindir)\" -DPYTHON=\"$(PYTHON)\" AM_CFLAGS = -Wall $(GF_CFLAGS) - CLEANFILES = $(top_builddir)/libglusterfs/src/libglusterfs.la: $(MAKE) -C $(top_builddir)/libglusterfs/src/ all + + +install-exec-hook: + $(mkdir_p) $(DESTDIR)$(sbindir) + rm -f $(DESTDIR)$(sbindir)/gluster-mountbroker + ln -s $(GLUSTERFS_LIBEXECDIR)/peer_mountbroker.py \ + $(DESTDIR)$(sbindir)/gluster-mountbroker + + rm -f $(DESTDIR)$(sbindir)/gluster-georep-sshkey + ln -s $(GLUSTERFS_LIBEXECDIR)/peer_georep-sshkey.py \ + $(DESTDIR)$(sbindir)/gluster-georep-sshkey + + +uninstall-hook: + rm -f $(DESTDIR)$(sbindir)/gluster-mountbroker + rm -f $(DESTDIR)$(sbindir)/gluster-georep-sshkey diff --git a/geo-replication/src/gsyncd.c b/geo-replication/src/gsyncd.c index 0830e7f9bcc..b5aeec5bf33 100644 --- a/geo-replication/src/gsyncd.c +++ b/geo-replication/src/gsyncd.c @@ -7,11 +7,8 @@ later), or the GNU General Public License, version 2 (GPLv2), in all cases as published by the Free Software Foundation. */ - -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif +#include <glusterfs/compat.h> +#include <glusterfs/syscall.h> #include <stdlib.h> #include <stdio.h> @@ -27,12 +24,13 @@ * We unconditionally pass then while building gsyncd binary. */ #ifdef USE_LIBGLUSTERFS -#include "glusterfs.h" -#include "globals.h" +#include <glusterfs/glusterfs.h> +#include <glusterfs/globals.h> +#include <glusterfs/defaults.h> #endif -#include "common-utils.h" -#include "run.h" +#include <glusterfs/common-utils.h> +#include <glusterfs/run.h> #include "procdiggy.h" #define _GLUSTERD_CALLED_ "_GLUSTERD_CALLED_" @@ -44,365 +42,361 @@ int restricted = 0; static int -duplexpand (void **buf, size_t tsiz, size_t *len) +duplexpand(void **buf, size_t tsiz, size_t *len) { - size_t osiz = tsiz * *len; - char *p = realloc (*buf, osiz << 1); - if (!p) { - free(*buf); - return -1; - } + size_t osiz = tsiz * *len; + char *p = realloc(*buf, osiz << 1); + if (!p) { + return -1; + } - memset (p + osiz, 0, osiz); - *buf = p; - *len <<= 1; + memset(p + osiz, 0, osiz); + *buf = p; + *len <<= 1; - return 0; + return 0; } static int -str2argv (char *str, char ***argv) +str2argv(char *str, char ***argv) { - char *p = NULL; - char *savetok = NULL; - int argc = 0; - size_t argv_len = 32; - int ret = 0; - - assert (str); - str = strdup (str); - if (!str) - return -1; - - *argv = calloc (argv_len, sizeof (**argv)); - if (!*argv) + char *p = NULL; + char *savetok = NULL; + char *temp = NULL; + char *temp1 = NULL; + int argc = 0; + size_t argv_len = 32; + int ret = 0; + int i = 0; + + assert(str); + temp = str = strdup(str); + if (!str) + goto error; + + *argv = calloc(argv_len, sizeof(**argv)); + if (!*argv) + goto error; + + while ((p = strtok_r(str, " ", &savetok))) { + str = NULL; + + argc++; + if (argc == argv_len) { + ret = duplexpand((void *)argv, sizeof(**argv), &argv_len); + if (ret == -1) goto error; - - while ((p = strtok_r (str, " ", &savetok))) { - str = NULL; - - argc++; - if (argc == argv_len) { - ret = duplexpand ((void *)argv, - sizeof (**argv), - &argv_len); - if (ret == -1) - goto error; - } - (*argv)[argc - 1] = p; } - - return argc; - - error: - fprintf (stderr, "out of memory\n"); - return -1; + temp1 = strdup(p); + if (!temp1) + goto error; + (*argv)[argc - 1] = temp1; + } + + free(temp); + return argc; + +error: + fprintf(stderr, "out of memory\n"); + free(temp); + for (i = 0; i < argc - 1; i++) + free((*argv)[i]); + free(*argv); + return -1; } static int -invoke_gsyncd (int argc, char **argv) +invoke_gsyncd(int argc, char **argv) { - char config_file[PATH_MAX] = {0,}; - size_t gluster_workdir_len = 0; - runner_t runner = {0,}; - int i = 0; - int j = 0; - char *nargv[argc + 4]; - char *python = NULL; - - if (restricted) { - size_t len; - /* in restricted mode we forcibly use the system-wide config */ - runinit (&runner); - runner_add_args (&runner, SBIN_DIR"/gluster", - "--log-file=-", "system::", "getwd", - NULL); - runner_redir (&runner, STDOUT_FILENO, RUN_PIPE); - if (runner_start (&runner) == 0 && - fgets (config_file, PATH_MAX, - runner_chio (&runner, STDOUT_FILENO)) != NULL && - (len = strlen (config_file)) && - config_file[len - 1] == '\n' && - runner_end (&runner) == 0) - gluster_workdir_len = len - 1; - - if (gluster_workdir_len) { - if (gluster_workdir_len + 1 + strlen (GSYNCD_CONF_TEMPLATE) + 1 > - PATH_MAX) - goto error; - config_file[gluster_workdir_len] = '/'; - strcat (config_file, GSYNCD_CONF_TEMPLATE); - } else - goto error; - - if (setenv ("_GSYNCD_RESTRICTED_", "1", 1) == -1) - goto error; - } + int i = 0; + int j = 0; + char *nargv[argc + 4]; + char *python = NULL; - if (chdir ("/") == -1) - goto error; + if (chdir("/") == -1) + goto error; - j = 0; - python = getenv("PYTHON"); - if(!python) - python = PYTHON; - nargv[j++] = python; - nargv[j++] = GSYNCD_PREFIX"/python/syncdaemon/"GSYNCD_PY; - for (i = 1; i < argc; i++) - nargv[j++] = argv[i]; - if (config_file[0]) { - nargv[j++] = "-c"; - nargv[j++] = config_file; - } - nargv[j++] = NULL; + j = 0; + python = getenv("PYTHON"); + if (!python) + python = PYTHON; + nargv[j++] = python; + nargv[j++] = GSYNCD_PREFIX "/python/syncdaemon/" GSYNCD_PY; + for (i = 1; i < argc; i++) + nargv[j++] = argv[i]; - execvp (python, nargv); + nargv[j++] = NULL; - fprintf (stderr, "exec of '%s' failed\n", python); - return 127; + execvp(python, nargv); - error: - fprintf (stderr, "gsyncd initializaion failed\n"); - return 1; -} + fprintf(stderr, "exec of '%s' failed\n", python); + return 127; +error: + fprintf(stderr, "gsyncd initializaion failed\n"); + return 1; +} static int -find_gsyncd (pid_t pid, pid_t ppid, char *name, void *data) +find_gsyncd(pid_t pid, pid_t ppid, char *name, void *data) { - char buf[NAME_MAX * 2] = {0,}; - char path[PATH_MAX] = {0,}; - char *p = NULL; - int zeros = 0; - int ret = 0; - int fd = -1; - pid_t *pida = (pid_t *)data; - - if (ppid != pida[0]) - return 0; - - sprintf (path, PROC"/%d/cmdline", pid); - fd = open (path, O_RDONLY); - if (fd == -1) - return 0; - ret = read (fd, buf, sizeof (buf)); - close (fd); - if (ret == -1) - return 0; - for (zeros = 0, p = buf; zeros < 2 && p < buf + ret; p++) - zeros += !*p; - - ret = 0; - switch (zeros) { + char buf[NAME_MAX * 2] = { + 0, + }; + char path[PATH_MAX] = { + 0, + }; + char *p = NULL; + int zeros = 0; + int ret = 0; + int fd = -1; + pid_t *pida = (pid_t *)data; + + if (ppid != pida[0]) + return 0; + + snprintf(path, sizeof path, PROC "/%d/cmdline", pid); + fd = open(path, O_RDONLY); + if (fd == -1) + return 0; + ret = sys_read(fd, buf, sizeof(buf)); + sys_close(fd); + if (ret == -1) + return 0; + for (zeros = 0, p = buf; zeros < 2 && p < buf + ret; p++) + zeros += !*p; + + ret = 0; + switch (zeros) { case 2: - if ((strcmp (basename (buf), basename (PYTHON)) || - strcmp (basename (buf + strlen (buf) + 1), GSYNCD_PY)) == 0) { - ret = 1; - break; - } - /* fallthrough */ + if ((strcmp(basename(buf), basename(PYTHON)) || + strcmp(basename(buf + strlen(buf) + 1), GSYNCD_PY)) == 0) { + ret = 1; + break; + } + /* fallthrough */ case 1: - if (strcmp (basename (buf), GSYNCD_PY) == 0) - ret = 1; - } - - if (ret == 1) { - if (pida[1] != -1) { - fprintf (stderr, GSYNCD_PY" sibling is not unique"); - return -1; - } - pida[1] = pid; + if (strcmp(basename(buf), GSYNCD_PY) == 0) + ret = 1; + } + + if (ret == 1) { + if (pida[1] != -1) { + fprintf(stderr, GSYNCD_PY " sibling is not unique"); + return -1; } + pida[1] = pid; + } - return 0; + return 0; } static int -invoke_rsync (int argc, char **argv) +invoke_rsync(int argc, char **argv) { - int i = 0; - char path[PATH_MAX] = {0,}; - pid_t pid = -1; - pid_t ppid = -1; - pid_t pida[] = {-1, -1}; - char *name = NULL; - char buf[PATH_MAX + 1] = {0,}; - int ret = 0; - - assert (argv[argc] == NULL); - - if (argc < 2 || strcmp (argv[1], "--server") != 0) - goto error; - - for (i = 2; i < argc && argv[i][0] == '-'; i++); - - if (!(i == argc - 2 && strcmp (argv[i], ".") == 0 && argv[i + 1][0] == '/')) { - fprintf (stderr, "need an rsync invocation without protected args\n"); - goto error; - } - - /* look up sshd we are spawned from */ - for (pid = getpid () ;; pid = ppid) { - ppid = pidinfo (pid, &name); - if (ppid < 0) { - fprintf (stderr, "sshd ancestor not found\n"); - goto error; - } - if (strcmp (name, "sshd") == 0) { - GF_FREE (name); - break; - } - GF_FREE (name); - } - /* look up "ssh-sibling" gsyncd */ - pida[0] = pid; - ret = prociter (find_gsyncd, pida); - if (ret == -1 || pida[1] == -1) { - fprintf (stderr, "gsyncd sibling not found\n"); - goto error; + int i = 0; + char path[PATH_MAX] = { + 0, + }; + pid_t pid = -1; + pid_t ppid = -1; + pid_t pida[] = {-1, -1}; + char *name = NULL; + char buf[PATH_MAX + 1] = { + 0, + }; + int ret = 0; + + assert(argv[argc] == NULL); + + if (argc < 2 || strcmp(argv[1], "--server") != 0) + goto error; + + for (i = 2; i < argc && argv[i][0] == '-'; i++) + ; + + if (!(i == argc - 2 && strcmp(argv[i], ".") == 0 && + argv[i + 1][0] == '/')) { + fprintf(stderr, "need an rsync invocation without protected args\n"); + goto error; + } + + /* look up sshd we are spawned from */ + for (pid = getpid();; pid = ppid) { + ppid = pidinfo(pid, &name); + if (ppid < 0) { + fprintf(stderr, "sshd ancestor not found\n"); + goto error; } - /* check if rsync target matches gsyncd target */ - sprintf (path, PROC"/%d/cwd", pida[1]); - ret = readlink (path, buf, sizeof (buf)); - if (ret == -1 || ret == sizeof (buf)) - goto error; - if (strcmp (argv[argc - 1], "/") == 0 /* root dir cannot be a target */ || - (strcmp (argv[argc - 1], path) /* match against gluster target */ && - strcmp (argv[argc - 1], buf) /* match against file target */) != 0) { - fprintf (stderr, "rsync target does not match "GEOREP" session\n"); - goto error; + if (strcmp(name, "sshd") == 0) { + GF_FREE(name); + break; } - - argv[0] = RSYNC; - - execvp (RSYNC, argv); - - fprintf (stderr, "exec of "RSYNC" failed\n"); - return 127; - - error: - fprintf (stderr, "disallowed "RSYNC" invocation\n"); - return 1; + GF_FREE(name); + } + /* look up "ssh-sibling" gsyncd */ + pida[0] = pid; + ret = prociter(find_gsyncd, pida); + if (ret == -1 || pida[1] == -1) { + fprintf(stderr, "gsyncd sibling not found\n"); + goto error; + } + /* check if rsync target matches gsyncd target */ + snprintf(path, sizeof path, PROC "/%d/cwd", pida[1]); + ret = sys_readlink(path, buf, sizeof(buf)); + if (ret == -1 || ret == sizeof(buf)) + goto error; + if (strcmp(argv[argc - 1], "/") == 0 /* root dir cannot be a target */ || + (strcmp(argv[argc - 1], path) /* match against gluster target */ && + strcmp(argv[argc - 1], buf) /* match against file target */) != 0) { + fprintf(stderr, "rsync target does not match " GEOREP " session\n"); + goto error; + } + + argv[0] = RSYNC; + + execvp(RSYNC, argv); + + fprintf(stderr, "exec of " RSYNC " failed\n"); + return 127; + +error: + fprintf(stderr, "disallowed " RSYNC " invocation\n"); + return 1; } static int -invoke_gluster (int argc, char **argv) +invoke_gluster(int argc, char **argv) { - int i = 0; - int j = 0; - int optsover = 0; - char *ov = NULL; - - for (i = 1; i < argc; i++) { - ov = strtail (argv[i], "--"); - if (ov && !optsover) { - if (*ov == '\0') - optsover = 1; - continue; - } - switch (++j) { - case 1: - if (strcmp (argv[i], "volume") != 0) - goto error; - break; - case 2: - if (strcmp (argv[i], "info") != 0) - goto error; - break; - case 3: - break; - default: - goto error; - } + int i = 0; + int j = 0; + int optsover = 0; + char *ov = NULL; + + for (i = 1; i < argc; i++) { + ov = strtail(argv[i], "--"); + if (ov && !optsover) { + if (*ov == '\0') + optsover = 1; + continue; + } + switch (++j) { + case 1: + if (strcmp(argv[i], "volume") != 0) + goto error; + break; + case 2: + if (strcmp(argv[i], "info") != 0) + goto error; + break; + case 3: + break; + default: + goto error; } + } - argv[0] = "gluster"; - execvp (SBIN_DIR"/gluster", argv); - fprintf (stderr, "exec of gluster failed\n"); - return 127; + argv[0] = "gluster"; + execvp(SBIN_DIR "/gluster", argv); + fprintf(stderr, "exec of gluster failed\n"); + return 127; - error: - fprintf (stderr, "disallowed gluster invocation\n"); - return 1; +error: + fprintf(stderr, "disallowed gluster invocation\n"); + return 1; } struct invocable { - char *name; - int (*invoker) (int argc, char **argv); + char *name; + int (*invoker)(int argc, char **argv); }; -struct invocable invocables[] = { - { "rsync", invoke_rsync }, - { "gsyncd", invoke_gsyncd }, - { "gluster", invoke_gluster }, - { NULL, NULL} -}; +struct invocable invocables[] = {{"rsync", invoke_rsync}, + {"gsyncd", invoke_gsyncd}, + {"gluster", invoke_gluster}, + {NULL, NULL}}; int -main (int argc, char **argv) +main(int argc, char **argv) { - char *evas = NULL; - struct invocable *i = NULL; - char *b = NULL; - char *sargv = NULL; + int ret = -1; + char *evas = NULL; + struct invocable *i = NULL; + char *b = NULL; + char *sargv = NULL; + int j = 0; #ifdef USE_LIBGLUSTERFS - glusterfs_ctx_t *ctx = NULL; + glusterfs_ctx_t *ctx = NULL; - ctx = glusterfs_ctx_new (); - if (!ctx) - return ENOMEM; + ctx = glusterfs_ctx_new(); + if (!ctx) + return ENOMEM; - if (glusterfs_globals_init (ctx)) - return 1; + if (glusterfs_globals_init(ctx)) + return 1; - THIS->ctx = ctx; + THIS->ctx = ctx; + ret = default_mem_acct_init(THIS); + if (ret) { + fprintf(stderr, "internal error: mem accounting failed\n"); + return 1; + } #endif - evas = getenv (_GLUSTERD_CALLED_); - if (evas && strcmp (evas, "1") == 0) - /* OK, we know glusterd called us, no need to look for further config - * ... altough this conclusion should not inherit to our children - */ - unsetenv (_GLUSTERD_CALLED_); - else { - /* we regard all gsyncd invocations unsafe - * that do not come from glusterd and - * therefore restrict it - */ - restricted = 1; - - if (!getenv (_GSYNCD_DISPATCHED_)) { - evas = getenv ("SSH_ORIGINAL_COMMAND"); - if (evas) - sargv = evas; - else { - evas = getenv ("SHELL"); - if (evas && strcmp (basename (evas), "gsyncd") == 0 && - argc == 3 && strcmp (argv[1], "-c") == 0) - sargv = argv[2]; - } - } - - } - - if (!(sargv && restricted)) - return invoke_gsyncd (argc, argv); - - argc = str2argv (sargv, &argv); - if (argc == -1 || setenv (_GSYNCD_DISPATCHED_, "1", 1) == -1) { - fprintf (stderr, "internal error\n"); - return 1; + evas = getenv(_GLUSTERD_CALLED_); + if (evas && strcmp(evas, "1") == 0) + /* OK, we know glusterd called us, no need to look for further config + *...although this conclusion should not inherit to our children + */ + unsetenv(_GLUSTERD_CALLED_); + else { + /* we regard all gsyncd invocations unsafe + * that do not come from glusterd and + * therefore restrict it + */ + restricted = 1; + + if (!getenv(_GSYNCD_DISPATCHED_)) { + evas = getenv("SSH_ORIGINAL_COMMAND"); + if (evas) + sargv = evas; + else { + evas = getenv("SHELL"); + if (evas && strcmp(basename(evas), "gsyncd") == 0 && + argc == 3 && strcmp(argv[1], "-c") == 0) + sargv = argv[2]; + } } + } - b = basename (argv[0]); - for (i = invocables; i->name; i++) { - if (strcmp (b, i->name) == 0) - return i->invoker (argc, argv); - } + if (!(sargv && restricted)) + return invoke_gsyncd(argc, argv); - fprintf (stderr, "invoking %s in restricted SSH session is not allowed\n", - b); + argc = str2argv(sargv, &argv); + if (argc == -1) { + fprintf(stderr, "internal error\n"); return 1; + } + + if (setenv(_GSYNCD_DISPATCHED_, "1", 1) == -1) { + fprintf(stderr, "internal error\n"); + goto out; + } + + b = basename(argv[0]); + for (i = invocables; i->name; i++) { + if (strcmp(b, i->name) == 0) + return i->invoker(argc, argv); + } + + fprintf(stderr, "invoking %s in restricted SSH session is not allowed\n", + b); + +out: + for (j = 1; j < argc; j++) + free(argv[j]); + free(argv); + return 1; } diff --git a/geo-replication/src/gverify.sh b/geo-replication/src/gverify.sh index 186af53a407..f5f70d245e0 100755 --- a/geo-replication/src/gverify.sh +++ b/geo-replication/src/gverify.sh @@ -1,73 +1,90 @@ #!/bin/bash # Script to verify the Master and Slave Gluster compatibility. -# To use ./gverify <master volume> <slave host> <slave volume> +# To use ./gverify <master volume> <slave user> <slave host> <slave volume> <ssh port> <log file> # Returns 0 if master and slave compatible. -BUFFER_SIZE=1000; -slave_log_file=`gluster --print-logdir`/geo-replication-slaves/slave.log +# Considering buffer_size 100MB +BUFFER_SIZE=104857600; +SSH_PORT=$5; +master_log_file=`gluster --print-logdir`/geo-replication/gverify-mastermnt.log +slave_log_file=`gluster --print-logdir`/geo-replication/gverify-slavemnt.log function SSHM() { - ssh -q \ - -oPasswordAuthentication=no \ - -oStrictHostKeyChecking=no \ - -oControlMaster=yes \ - "$@"; + if [[ -z "${GR_SSH_IDENTITY_KEY}" ]]; then + ssh -p ${SSH_PORT} -q \ + -oPasswordAuthentication=no \ + -oStrictHostKeyChecking=no \ + -oControlMaster=yes \ + "$@"; + else + ssh -p ${SSH_PORT} -i ${GR_SSH_IDENTITY_KEY} -q \ + -oPasswordAuthentication=no \ + -oStrictHostKeyChecking=no \ + -oControlMaster=yes \ + "$@"; + fi } -function cmd_master() +function get_inode_num() { - VOL=$1; - local cmd_line; - cmd_line=$(cat <<EOF -function do_verify() { -v=\$1; -d=\$(mktemp -d 2>/dev/null); -glusterfs -s localhost --xlator-option="*dht.lookup-unhashed=off" --volfile-id \$v -l $slave_log_file \$d; -i=\$(stat -c "%i" \$d); -if [[ "\$i" -ne "1" ]]; then -echo 0:0; -exit 1; -fi; -cd \$d; -available_size=\$(df \$d | tail -1 | awk "{print \\\$2}"); -umount -l \$d; -rmdir \$d; -ver=\$(gluster --version | head -1 | cut -f2 -d " "); -echo \$available_size:\$ver; -}; -cd /tmp; -[ x$VOL != x ] && do_verify $VOL; -EOF -); + local os + case `uname -s` in + NetBSD) os="NetBSD";; + Linux) os="Linux";; + *) os="Default";; + esac + + if [[ "X$os" = "XNetBSD" ]]; then + echo $(stat -f "%i" "$1") + else + echo $(stat -c "%i" "$1") + fi +} + +function umount_lazy() +{ + local os + case `uname -s` in + NetBSD) os="NetBSD";; + Linux) os="Linux";; + *) os="Default";; + esac + + if [[ "X$os" = "XNetBSD" ]]; then + umount -f -R "$1" + else + umount -l "$1" + fi; +} + +function disk_usage() +{ + local os + case `uname -s` in + NetBSD) os="NetBSD";; + Linux) os="Linux";; + *) os="Default";; + esac + + if [[ "X$os" = "XNetBSD" ]]; then + echo $(df -P "$1" | tail -1) + else + echo $(df -P -B1 "$1" | tail -1) + fi; -echo $cmd_line; } function cmd_slave() { - VOL=$1; local cmd_line; cmd_line=$(cat <<EOF function do_verify() { -v=\$1; -d=\$(mktemp -d 2>/dev/null); -glusterfs -s localhost --xlator-option="*dht.lookup-unhashed=off" --volfile-id \$v -l $slave_log_file \$d; -i=\$(stat -c "%i" \$d); -if [[ "\$i" -ne "1" ]]; then -echo 0:0; -exit 1; -fi; -cd \$d; -available_size=\$(df \$d | tail -1 | awk "{print \\\$4}"); -umount -l \$d; -rmdir \$d; ver=\$(gluster --version | head -1 | cut -f2 -d " "); -echo \$available_size:\$ver; +echo \$ver; }; -cd /tmp; -[ x$VOL != x ] && do_verify $VOL; +source /etc/profile && do_verify; EOF ); @@ -77,62 +94,179 @@ echo $cmd_line; function master_stats() { MASTERVOL=$1; - local cmd_line; - cmd_line=$(cmd_master $MASTERVOL); - bash -c "$cmd_line"; + local inet6=$2; + local d; + local i; + local disk_size; + local used_size; + local ver; + local m_status; + + d=$(mktemp -d -t ${0##*/}.XXXXXX 2>/dev/null); + if [ "$inet6" = "inet6" ]; then + glusterfs -s localhost --xlator-option="*dht.lookup-unhashed=off" --xlator-option="transport.address-family=inet6" --volfile-id $MASTERVOL -l $master_log_file $d; + else + glusterfs -s localhost --xlator-option="*dht.lookup-unhashed=off" --volfile-id $MASTERVOL -l $master_log_file $d; + fi + + i=$(get_inode_num $d); + if [[ "$i" -ne "1" ]]; then + echo 0:0; + exit 1; + fi; + cd $d; + disk_size=$(disk_usage $d | awk "{print \$2}"); + used_size=$(disk_usage $d | awk "{print \$3}"); + umount_lazy $d; + rmdir $d; + ver=$(gluster --version | head -1 | cut -f2 -d " "); + m_status=$(echo "$disk_size:$used_size:$ver"); + echo $m_status } function slave_stats() { - SLAVEHOST=$1; - SLAVEVOL=$2; + SLAVEUSER=$1; + SLAVEHOST=$2; + SLAVEVOL=$3; + local inet6=$4; local cmd_line; - cmd_line=$(cmd_slave $SLAVEVOL); - SSHM $SLAVEHOST bash -c "'$cmd_line'"; + local ver; + local status; + + d=$(mktemp -d -t ${0##*/}.XXXXXX 2>/dev/null); + if [ "$inet6" = "inet6" ]; then + glusterfs --xlator-option="*dht.lookup-unhashed=off" --xlator-option="transport.address-family=inet6" --volfile-server $SLAVEHOST --volfile-id $SLAVEVOL -l $slave_log_file $d; + else + glusterfs --xlator-option="*dht.lookup-unhashed=off" --volfile-server $SLAVEHOST --volfile-id $SLAVEVOL -l $slave_log_file $d; + fi + + i=$(get_inode_num $d); + if [[ "$i" -ne "1" ]]; then + echo 0:0; + exit 1; + fi; + cd $d; + disk_size=$(disk_usage $d | awk "{print \$2}"); + used_size=$(disk_usage $d | awk "{print \$3}"); + no_of_files=$(find $d -maxdepth 1 -path "$d/.trashcan" -prune -o -path "$d" -o -print0 -quit); + umount_lazy $d; + rmdir $d; + + cmd_line=$(cmd_slave); + ver=`SSHM $SLAVEUSER@$SLAVEHOST bash -c "'$cmd_line'"`; + status=$disk_size:$used_size:$ver:$no_of_files; + echo $status } +function ping_host () +{ + ### Use bash internal socket support + { + exec 100<>/dev/tcp/$1/$2 + if [ $? -ne '0' ]; then + return 1; + else + exec 100>&- + return 0; + fi + } 1>&2 2>/dev/null +} function main() { + log_file=$6 + > $log_file + + inet6=$7 + local cmd_line + local ver + + # Use FORCE_BLOCKER flag in the error message to differentiate + # between the errors which the force command should bypass + + # Test tcp connection to port 22, this is necessary since `ping` + # does not work on all environments where 'ssh' is allowed but + # ICMP is filterd + + ping_host $3 ${SSH_PORT} + + if [ $? -ne 0 ]; then + echo "FORCE_BLOCKER|$3 not reachable." > $log_file + exit 1; + fi; + + if [[ -z "${GR_SSH_IDENTITY_KEY}" ]]; then + ssh -p ${SSH_PORT} -oNumberOfPasswordPrompts=0 -oStrictHostKeyChecking=no $2@$3 "echo Testing_Passwordless_SSH"; + else + ssh -p ${SSH_PORT} -i ${GR_SSH_IDENTITY_KEY} -oNumberOfPasswordPrompts=0 -oStrictHostKeyChecking=no $2@$3 "echo Testing_Passwordless_SSH"; + fi + + if [ $? -ne 0 ]; then + echo "FORCE_BLOCKER|Passwordless ssh login has not been setup with $3 for user $2." > $log_file + exit 1; + fi; + + cmd_line=$(cmd_slave); + if [[ -z "${GR_SSH_IDENTITY_KEY}" ]]; then + ver=$(ssh -p ${SSH_PORT} -oNumberOfPasswordPrompts=0 -oStrictHostKeyChecking=no $2@$3 bash -c "'$cmd_line'") + else + ver=$(ssh -p ${SSH_PORT} -i ${GR_SSH_IDENTITY_KEY} -oNumberOfPasswordPrompts=0 -oStrictHostKeyChecking=no $2@$3 bash -c "'$cmd_line'") + fi + + if [ -z "$ver" ]; then + echo "FORCE_BLOCKER|gluster command not found on $3 for user $2." > $log_file + exit 1; + fi; + ERRORS=0; - master_data=$(master_stats $1); - slave_data=$(slave_stats $2 $3); - master_size=$(echo $master_data | cut -f1 -d':'); - slave_size=$(echo $slave_data | cut -f1 -d':'); - master_version=$(echo $master_data | cut -f2 -d':'); - slave_version=$(echo $slave_data | cut -f2 -d':'); - log_file=$4 - - if [[ "x$master_size" = "x" || "x$master_version" = "x" || "$master_size" -eq "0" ]]; then - echo "Unable to fetch master volume details." > $log_file; + master_data=$(master_stats $1 ${inet6}); + slave_data=$(slave_stats $2 $3 $4 ${inet6}); + master_disk_size=$(echo $master_data | cut -f1 -d':'); + slave_disk_size=$(echo $slave_data | cut -f1 -d':'); + master_used_size=$(echo $master_data | cut -f2 -d':'); + slave_used_size=$(echo $slave_data | cut -f2 -d':'); + master_version=$(echo $master_data | cut -f3 -d':'); + slave_version=$(echo $slave_data | cut -f3 -d':'); + slave_no_of_files=$(echo $slave_data | cut -f4 -d':'); + + if [[ "x$master_disk_size" = "x" || "x$master_version" = "x" || "$master_disk_size" -eq "0" ]]; then + echo "FORCE_BLOCKER|Unable to mount and fetch master volume details. Please check the log: $master_log_file" > $log_file; exit 1; fi; - if [[ "x$slave_size" = "x" || "x$slave_version" = "x" || "$slave_size" -eq "0" ]]; then - ping -w 5 $2; - if [ $? -ne 0 ]; then - echo "$2 not reachable." > $log_file - exit 1; - fi; - echo "Unable to fetch slave volume details." > $log_file; + if [[ "x$slave_disk_size" = "x" || "x$slave_version" = "x" || "$slave_disk_size" -eq "0" ]]; then + echo "FORCE_BLOCKER|Unable to mount and fetch slave volume details. Please check the log: $slave_log_file" > $log_file; exit 1; fi; - if [ $slave_size -ge $(($master_size - $BUFFER_SIZE )) ]; then - echo "Total size of master is lesser than available size of slave." > $log_file; - else - echo "Total size of master is greater than available size of slave." > $log_file; - ERRORS=$(($ERRORS + 1)); - exit $ERRORS; + # The above checks are mandatory and force command should be blocked + # if they fail. The checks below can be bypassed if force option is + # provided hence no FORCE_BLOCKER flag. + + if [ "$slave_disk_size" -lt "$master_disk_size" ]; then + echo "Total disk size of master is greater than disk size of slave." >> $log_file; + ERRORS=$(($ERRORS + 1)); + fi + + effective_master_used_size=$(( $master_used_size + $BUFFER_SIZE )) + slave_available_size=$(( $slave_disk_size - $slave_used_size )) + master_available_size=$(( $master_disk_size - $effective_master_used_size )); + + if [ "$slave_available_size" -lt "$master_available_size" ]; then + echo "Total available size of master is greater than available size of slave" >> $log_file; + ERRORS=$(($ERRORS + 1)); + fi + + if [ ! -z $slave_no_of_files ]; then + echo "$3::$4 is not empty. Please delete existing files in $3::$4 and retry, or use force to continue without deleting the existing files." >> $log_file; + ERRORS=$(($ERRORS + 1)); fi; - if [[ $master_version < $slave_version || $master_version == $slave_version ]]; then - echo "Gluster version of master and slave matches." > $log_file; - else - echo "Gluster version mismatch between master and slave." > $log_file; - ERRORS=$(($ERRORS + 1)); - exit $ERRORS; + if [[ $master_version != $slave_version ]]; then + echo "Gluster version mismatch between master and slave. Master version: $master_version Slave version: $slave_version" >> $log_file; + ERRORS=$(($ERRORS + 1)); fi; exit $ERRORS; diff --git a/geo-replication/src/peer_add_secret_pub.in b/geo-replication/src/peer_add_secret_pub.in deleted file mode 100644 index c036cf33416..00000000000 --- a/geo-replication/src/peer_add_secret_pub.in +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash - -if [ ! -d ~/.ssh ]; then - mkdir ~/.ssh; - chmod 700 ~/.ssh - chown root:root ~/.ssh -fi - -cat "$GLUSTERD_WORKING_DIR"/geo-replication/common_secret.pem.pub >> ~/.ssh/authorized_keys diff --git a/geo-replication/src/peer_georep-sshkey.py.in b/geo-replication/src/peer_georep-sshkey.py.in new file mode 100644 index 00000000000..58696e9a616 --- /dev/null +++ b/geo-replication/src/peer_georep-sshkey.py.in @@ -0,0 +1,116 @@ +#!/usr/bin/python3 +# -*- coding: utf-8 -*- +# +# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> +# This file is part of GlusterFS. +# +# This file is licensed to you under your choice of the GNU Lesser +# General Public License, version 3 or any later version (LGPLv3 or +# later), or the GNU General Public License, version 2 (GPLv2), in all +# cases as published by the Free Software Foundation. +# +""" +Usage: + gluster-georep-sshkey generate + or + gluster-georep-sshkey generate --no-prefix + +Generates two SSH keys(one for gsyncd access and other for tar) in all +peer nodes and collects the public keys to the local node where it is +initiated. Adds `command=` prefix to common_secret.pem.pub if `--no-prefix` +argument is not passed. +""" +import os +import glob + +from gluster.cliutils import (node_output_ok, execute, execute_in_peers, + Cmd, runcli) +from prettytable import PrettyTable + + +SECRET_PEM = "@GLUSTERD_WORKDIR@/geo-replication/secret.pem" +TAR_SSH_PEM = "@GLUSTERD_WORKDIR@/geo-replication/tar_ssh.pem" +GSYNCD_CMD = 'command="@GLUSTERFS_LIBEXECDIR@/gsyncd" ' +TAR_CMD = 'command="tar ${SSH_ORIGINAL_COMMAND#* }" ' +COMMON_SECRET_FILE = "@GLUSTERD_WORKDIR@/geo-replication/common_secret.pem.pub" + + +class NodeGenCmd(Cmd): + name = "node-generate" + + def args(self, parser): + parser.add_argument("no_prefix") + + def run(self, args): + # Regenerate if secret.pem.pub not exists + if not os.path.exists(SECRET_PEM + ".pub"): + # Cleanup old files + for f in glob.glob(SECRET_PEM + "*"): + os.remove(f) + + execute(["ssh-keygen", "-N", "", "-f", SECRET_PEM]) + + # Regenerate if ssh_tar.pem.pub not exists + if not os.path.exists(TAR_SSH_PEM + ".pub"): + # Cleanup old files + for f in glob.glob(TAR_SSH_PEM + "*"): + os.remove(f) + + execute(["ssh-keygen", "-N", "", "-f", TAR_SSH_PEM]) + + # Add required prefixes if prefix is not "container" + prefix_secret_pem_pub = "" + prefix_tar_ssh_pem_pub = "" + if args.no_prefix != "no-prefix": + prefix_secret_pem_pub = GSYNCD_CMD + prefix_tar_ssh_pem_pub = TAR_CMD + + data = {"default_pub": "", "tar_pub": ""} + with open(SECRET_PEM + ".pub") as f: + data["default_pub"] = prefix_secret_pem_pub + f.read().strip() + + with open(TAR_SSH_PEM + ".pub") as f: + data["tar_pub"] = prefix_tar_ssh_pem_pub + f.read().strip() + + node_output_ok(data) + + +def color_status(value): + if value in ["UP", "OK"]: + return "green" + return "red" + + +class GenCmd(Cmd): + name = "generate" + + def args(self, parser): + parser.add_argument("--no-prefix", help="Do not use prefix in " + "generated pub keys", action="store_true") + + def run(self, args): + prefix = "no-prefix" if args.no_prefix else "." + out = execute_in_peers("node-generate", [prefix]) + + common_secrets = [] + table = PrettyTable(["NODE", "NODE STATUS", "KEYGEN STATUS"]) + table.align["NODE STATUS"] = "r" + table.align["KEYGEN STATUS"] = "r" + for p in out: + if p.ok: + common_secrets.append(p.output["default_pub"]) + common_secrets.append(p.output["tar_pub"]) + + table.add_row([p.hostname, + "UP" if p.node_up else "DOWN", + "OK" if p.ok else "NOT OK: {0}".format( + p.error)]) + + with open(COMMON_SECRET_FILE, "w") as f: + f.write("\n".join(common_secrets) + "\n") + + print (table) + + +if __name__ == "__main__": + runcli() diff --git a/geo-replication/src/peer_gsec_create.in b/geo-replication/src/peer_gsec_create.in index ef630bd4417..6d4a4847013 100755 --- a/geo-replication/src/peer_gsec_create.in +++ b/geo-replication/src/peer_gsec_create.in @@ -2,11 +2,23 @@ prefix=@prefix@ exec_prefix=@exec_prefix@ +libexecdir=@libexecdir@ -if [ ! -f "$GLUSTERD_WORKING_DIR"/geo-replication/secret.pem.pub ]; then - \rm -rf "$GLUSTERD_WORKING_DIR"/geo-replication/secret.pem* - ssh-keygen -N '' -f "$GLUSTERD_WORKING_DIR"/geo-replication/secret.pem > /dev/null +if [ ! -f "$GLUSTERD_WORKDIR"/geo-replication/secret.pem.pub ]; then + \rm -rf "$GLUSTERD_WORKDIR"/geo-replication/secret.pem* + ssh-keygen -N '' -f "$GLUSTERD_WORKDIR"/geo-replication/secret.pem > /dev/null fi -output=`echo command=\"@libexecdir@/glusterfs/gsyncd\" " "``cat "$GLUSTERD_WORKING_DIR"/geo-replication/secret.pem.pub` -echo $output +if [ ! -f "$GLUSTERD_WORKDIR"/geo-replication/tar_ssh.pem.pub ]; then + \rm -rf "$GLUSTERD_WORKDIR"/geo-replication/tar_ssh.pem* + ssh-keygen -N '' -f "$GLUSTERD_WORKDIR"/geo-replication/tar_ssh.pem > /dev/null +fi + +if [ "Xcontainer" = "X$1" ]; then + output1=`cat "$GLUSTERD_WORKDIR"/geo-replication/secret.pem.pub` + output2=`cat "$GLUSTERD_WORKDIR"/geo-replication/tar_ssh.pem.pub` +else + output1=`echo command=\"${libexecdir}/glusterfs/gsyncd\" ""``cat "$GLUSTERD_WORKDIR"/geo-replication/secret.pem.pub` + output2=`echo command=\"tar \$\{SSH_ORIGINAL_COMMAND#* \}\" ""``cat "$GLUSTERD_WORKDIR"/geo-replication/tar_ssh.pem.pub` +fi +echo -e "$output1\n$output2" diff --git a/geo-replication/src/peer_mountbroker.in b/geo-replication/src/peer_mountbroker.in new file mode 100644 index 00000000000..8ecf38ded41 --- /dev/null +++ b/geo-replication/src/peer_mountbroker.in @@ -0,0 +1,211 @@ +#!/usr/bin/python3 + +from __future__ import print_function + +import os +from argparse import ArgumentParser, RawDescriptionHelpFormatter +import json +import sys + +PROG_DESCRIPTION = """ +GlusterFS Mountbroker user management +""" + +args = None + + +def ok(message=""): + if (not args and "-j" in sys.argv) or (args and args.json): + print(json.dumps({"ok": True, "message": message})) + else: + if message: + print(message) + + sys.exit(0) + + +def notok(message=""): + if (not args and "-j" in sys.argv) or (args and args.json): + print(json.dumps({"ok": False, "message": message})) + else: + print("error: %s" % message) + + # Always return zero due to limitation while executing + # as `gluster system:: execute` + sys.exit(0) + + +class NoStdErrParser(ArgumentParser): + """ + with gluster system:: execute, stderr gives + "Unable to end. Error : Bad file descriptor" error, + so deriving new class, prints error message and + exits with zero. + """ + def error(self, message): + notok(message) + + +class MountbrokerUserMgmt(object): + def __init__(self, volfile): + self.volfile = volfile + self._options = {} + self.commented_lines = [] + self._parse() + + def _parse(self): + with open(self.volfile, "r") as f: + for line in f: + line = line.strip() + if line.startswith("option "): + key, value = line.split(" ")[1:] + self._options[key] = value + if line.startswith("#"): + self.commented_lines.append(line) + + def _get_write_data(self): + op = "volume management\n" + op += " type mgmt/glusterd\n" + for k, v in self._options.items(): + op += " option %s %s\n" % (k, v) + for line in self.commented_lines: + op += " %s\n" % line + op += "end-volume" + return op + + def save(self): + with open(self.volfile + "_tmp", "w") as f: + f.write(self._get_write_data()) + f.flush() + os.fsync(f.fileno()) + os.rename(self.volfile + "_tmp", self.volfile) + + def set_opt(self, key, value): + self._options[key] = value.strip() + + def remove_opt(self, key): + if key in self._options: + del(self._options[key]) + + def add_user(self, user, volumes): + vols = set() + for k, v in self._options.items(): + if k.startswith("mountbroker-geo-replication.") \ + and user == k.split(".")[-1]: + vols.update(v.split(",")) + + vols.update(volumes) + self.set_opt("mountbroker-geo-replication.%s" % user, + ",".join(vols)) + + def remove_volume(self, user, volumes): + vols = set() + for k, v in self._options.items(): + if k.startswith("mountbroker-geo-replication.") \ + and user == k.split(".")[-1]: + vols.update(v.split(",")) + + for v1 in volumes: + vols.discard(v1) + + if vols: + self.set_opt("mountbroker-geo-replication.%s" % user, + ",".join(vols)) + else: + self.remove_opt("mountbroker-geo-replication.%s" % user) + + def remove_user(self, user): + self.remove_opt("mountbroker-geo-replication.%s" % user) + + def info(self): + data = {"users": []} + + for k, v in self._options.items(): + if k.startswith("mountbroker-geo-replication."): + data["users"].append( + {"name": k.split(".")[-1], "volumes": v.split(",")} + ) + else: + data[k] = v + + return data + + +def format_info(data): + op = "%s %s\n" % ("Option".ljust(50), "Value".ljust(50)) + op += ("-" * 101) + "\n" + for key, value in data.items(): + if key != "users": + op += "%s %s\n" % (key.ljust(50), value) + + op += "\nUsers: %s\n" % ("None" if not data["users"] else "") + for user in data["users"]: + op += "%s: %s\n" % (user["name"], ", ".join(user["volumes"])) + op += "\n\n" + return op + + +def _get_args(): + parser = NoStdErrParser(formatter_class=RawDescriptionHelpFormatter, + description=PROG_DESCRIPTION) + + parser.add_argument('-j', dest="json", help="JSON output", + action="store_true") + subparsers = parser.add_subparsers(title='subcommands', dest='cmd') + parser_useradd = subparsers.add_parser('user') + parser_userdel = subparsers.add_parser('userdel') + parser_volumedel = subparsers.add_parser('volumedel') + subparsers.add_parser('info') + parser_opt = subparsers.add_parser('opt') + parser_optdel = subparsers.add_parser('optdel') + + parser_useradd.add_argument('username', help="Username", type=str) + parser_useradd.add_argument('volumes', type=str, default='', + help="Volumes list. ',' separated") + + parser_volumedel.add_argument('username', help="Username", type=str) + parser_volumedel.add_argument('volumes', type=str, default='', + help="Volumes list. ',' separated") + + parser_userdel.add_argument('username', help="Username", type=str) + + parser_opt.add_argument('opt_name', help="Name", type=str) + parser_opt.add_argument('opt_value', help="Value", type=str) + + parser_optdel.add_argument('opt_name', help="Name", type=str) + + return parser.parse_args() + + +def main(): + global args + args = _get_args() + + m = MountbrokerUserMgmt("@GLUSTERD_VOLFILE@") + + if args.cmd == "opt": + m.set_opt(args.opt_name, args.opt_value) + elif args.cmd == "optdel": + m.remove_opt(args.opt_name) + elif args.cmd == "userdel": + m.remove_user(args.username) + elif args.cmd == "user": + volumes = [v.strip() for v in args.volumes.split(",") + if v.strip() != ""] + m.add_user(args.username, volumes) + elif args.cmd == "volumedel": + volumes = [v.strip() for v in args.volumes.split(",") + if v.strip() != ""] + m.remove_volume(args.username, volumes) + elif args.cmd == "info": + info = m.info() + if not args.json: + info = format_info(info) + ok(info) + + if args.cmd != "info": + m.save() + ok() + +if __name__ == "__main__": + main() diff --git a/geo-replication/src/peer_mountbroker.py.in b/geo-replication/src/peer_mountbroker.py.in new file mode 100644 index 00000000000..40b90ffc560 --- /dev/null +++ b/geo-replication/src/peer_mountbroker.py.in @@ -0,0 +1,401 @@ +#!/usr/bin/python3 + +from __future__ import print_function + +import os +from errno import EEXIST, ENOENT + +from gluster.cliutils import (execute, Cmd, node_output_ok, + node_output_notok, execute_in_peers, + runcli, oknotok) +from prettytable import PrettyTable + +LOG_DIR = "@localstatedir@/log/glusterfs/geo-replication-slaves" +CLI_LOG = "@localstatedir@/log/glusterfs/cli.log" +GEOREP_DIR = "@GLUSTERD_WORKDIR@/geo-replication" +GLUSTERD_VOLFILE = "@GLUSTERD_VOLFILE@" + + +class MountbrokerUserMgmt(object): + def __init__(self, volfile): + self.volfile = volfile + self._options = {} + self.commented_lines = [] + self.user_volumes = {} + self._parse() + + def _parse(self): + """ Example glusterd.vol + volume management + type mgmt/glusterd + option working-directory /var/lib/glusterd + option transport-type socket,rdma + option transport.socket.keepalive-time 10 + option transport.socket.keepalive-interval 2 + option transport.socket.read-fail-log off + option rpc-auth-allow-insecure on + option ping-timeout 0 + option event-threads 1 + # option base-port 49152 + option mountbroker-root /var/mountbroker-root + option mountbroker-geo-replication.user1 vol1,vol2,vol3 + option geo-replication-log-group geogroup + option rpc-auth-allow-insecure on + end-volume + """ + with open(self.volfile, "r") as f: + for line in f: + line = line.strip() + if line.startswith("option "): + key, value = line.split()[1:] + self._options[key] = value + if line.startswith("#"): + self.commented_lines.append(line) + + for k, v in self._options.items(): + if k.startswith("mountbroker-geo-replication."): + user = k.split(".")[-1] + self.user_volumes[user] = set(v.split(",")) + + def get_group(self): + return self._options.get("geo-replication-log-group", None) + + def _get_write_data(self): + op = "volume management\n" + op += " type mgmt/glusterd\n" + for k, v in self._options.items(): + if k.startswith("mountbroker-geo-replication."): + # Users will be added seperately + continue + + op += " option %s %s\n" % (k, v) + + for k, v in self.user_volumes.items(): + if v: + op += (" option mountbroker-geo-replication." + "%s %s\n" % (k, ",".join(v))) + + for line in self.commented_lines: + op += " %s\n" % line + + op += "end-volume" + return op + + def save(self): + with open(self.volfile + "_tmp", "w") as f: + f.write(self._get_write_data()) + f.flush() + os.fsync(f.fileno()) + os.rename(self.volfile + "_tmp", self.volfile) + + def set_mount_root_and_group(self, mnt_root, group): + self._options["mountbroker-root"] = mnt_root + self._options["geo-replication-log-group"] = group + + def add(self, volume, user): + user_volumes = self.user_volumes.get(user, None) + + if user_volumes is not None and volume in user_volumes: + # User and Volume already exists + return + + if user_volumes is None: + # User not exists + self.user_volumes[user] = set() + + self.user_volumes[user].add(volume) + + def remove(self, volume=None, user=None): + if user is not None: + if volume is None: + self.user_volumes[user] = set() + else: + try: + self.user_volumes.get(user, set()).remove(volume) + except KeyError: + pass + else: + if volume is None: + return + + for k, v in self.user_volumes.items(): + try: + self.user_volumes[k].remove(volume) + except KeyError: + pass + + def info(self): + # Convert Volumes set into Volumes list + users = {} + for k, v in self.user_volumes.items(): + users[k] = list(v) + + data = { + "mountbroker-root": self._options.get("mountbroker-root", "None"), + "geo-replication-log-group": self._options.get( + "geo-replication-log-group", ""), + "users": users + } + + return data + + +class NodeSetup(Cmd): + # Test if group exists using `getent group <grp>` + # and then group add using `groupadd <grp>` + # chgrp -R <grp> /var/log/glusterfs/geo-replication-slaves + # chgrp -R <grp> /var/lib/glusterd/geo-replication + # chmod -R 770 /var/log/glusterfs/geo-replication-slaves + # chmod 770 /var/lib/glusterd/geo-replication + # mkdir -p <mnt_root> + # chmod 0711 <mnt_root> + # If selinux, + # semanage fcontext -a -e /home /var/mountbroker-root + # restorecon -Rv /var/mountbroker-root + name = "node-setup" + + def args(self, parser): + parser.add_argument("mount_root") + parser.add_argument("group") + + def run(self, args): + m = MountbrokerUserMgmt(GLUSTERD_VOLFILE) + + try: + os.makedirs(args.mount_root) + except OSError as e: + if e.errno == EEXIST: + pass + else: + node_output_notok("Unable to Create {0}".format( + args.mount_root)) + + execute(["chmod", "0711", args.mount_root]) + try: + execute(["semanage", "fcontext", "-a", "-e", + "/home", args.mount_root]) + except OSError as e: + if e.errno == ENOENT: + pass + else: + node_output_notok( + "Unable to run semanage: {0}".format(e)) + + try: + execute(["restorecon", "-Rv", args.mount_root]) + except OSError as e: + if e.errno == ENOENT: + pass + else: + node_output_notok( + "Unable to run restorecon: {0}".format(e)) + + rc, out, err = execute(["getent", "group", args.group]) + if rc != 0: + node_output_notok("User Group not exists") + + execute(["chgrp", "-R", args.group, GEOREP_DIR]) + execute(["chgrp", "-R", args.group, LOG_DIR]) + execute(["chgrp", args.group, CLI_LOG]) + execute(["chmod", "770", GEOREP_DIR]) + execute(["find", LOG_DIR, "-type", "d", "-exec", "chmod", "770", "{}", + "+"]) + execute(["find", LOG_DIR, "-type", "f", "-exec", "chmod", "660", "{}", + "+"]) + execute(["chmod", "660", CLI_LOG]) + + m.set_mount_root_and_group(args.mount_root, args.group) + m.save() + + node_output_ok() + + +def color_status(value): + if value.lower() in ("up", "ok", "yes"): + return "green" + else: + return "red" + + +class CliSetup(Cmd): + # gluster-mountbroker setup <MOUNT_ROOT> <GROUP> + name = "setup" + + def args(self, parser): + parser.add_argument("mount_root", + help="Path to the mountbroker-root directory.") + parser.add_argument("group", + help="Group to be used for setup.") + + def run(self, args): + out = execute_in_peers("node-setup", [args.mount_root, + args.group]) + table = PrettyTable(["NODE", "NODE STATUS", "SETUP STATUS"]) + table.align["NODE STATUS"] = "r" + table.align["SETUP STATUS"] = "r" + for p in out: + table.add_row([p.hostname, + "UP" if p.node_up else "DOWN", + "OK" if p.ok else "NOT OK: {0}".format( + p.error)]) + + print(table) + + +class NodeStatus(Cmd): + # Check if Group exists + # Check if user exists + # Check directory permission /var/log/glusterfs/geo-replication-slaves + # and /var/lib/glusterd/geo-replication + # Check mount root and its permissions + # Check glusterd.vol file for user, group, dir existance + name = "node-status" + + def run(self, args): + m = MountbrokerUserMgmt(GLUSTERD_VOLFILE) + data = m.info() + data["group_exists"] = False + data["path_exists"] = False + + rc, out, err = execute(["getent", "group", + data["geo-replication-log-group"]]) + + if rc == 0: + data["group_exists"] = True + + if os.path.exists(data["mountbroker-root"]): + data["path_exists"] = True + + node_output_ok(data) + + +class CliStatus(Cmd): + # gluster-mountbroker status + name = "status" + + def run(self, args): + out = execute_in_peers("node-status") + table = PrettyTable(["NODE", "NODE STATUS", "MOUNT ROOT", + "GROUP", "USERS"]) + table.align["NODE STATUS"] = "r" + + for p in out: + node_data = p.output + if node_data == "" or node_data == "N/A": + node_data = {} + + users_row_data = "" + for k, v in node_data.get("users", {}).items(): + users_row_data += "{0}({1}) ".format(k, ", ".join(v)) + + if not users_row_data: + users_row_data = "None" + + mount_root = node_data.get("mountbroker-root", "None") + if mount_root != "None": + mount_root += "({0})".format(oknotok( + node_data.get("path_exists", False))) + + grp = node_data.get("geo-replication-log-group", "None") + if grp != "None": + grp += "({0})".format(oknotok( + node_data.get("group_exists", False))) + + table.add_row([p.hostname, + "UP" if p.node_up else "DOWN", + mount_root, + grp, + users_row_data]) + + print(table) + + +class NodeAdd(Cmd): + # useradd -m -g <grp> <usr> + # useradd to glusterd.vol + name = "node-add" + + def args(self, parser): + parser.add_argument("volume") + parser.add_argument("user") + + def run(self, args): + m = MountbrokerUserMgmt(GLUSTERD_VOLFILE) + grp = m.get_group() + if grp is None: + node_output_notok("Group is not available") + + m.add(args.volume, args.user) + m.save() + node_output_ok() + + +class CliAdd(Cmd): + # gluster-mountbroker add <VOLUME> <USER> + name = "add" + + def args(self, parser): + parser.add_argument("volume", + help="Volume to be added.") + parser.add_argument("user", + help="User for which volume is to be added.") + + def run(self, args): + out = execute_in_peers("node-add", [args.volume, + args.user]) + table = PrettyTable(["NODE", "NODE STATUS", "ADD STATUS"]) + table.align["NODE STATUS"] = "r" + table.align["ADD STATUS"] = "r" + + for p in out: + table.add_row([p.hostname, + "UP" if p.node_up else "DOWN", + "OK" if p.ok else "NOT OK: {0}".format( + p.error)]) + + print(table) + + +class NodeRemove(Cmd): + # userremove from glusterd.vol file + name = "node-remove" + + def args(self, parser): + parser.add_argument("volume") + parser.add_argument("user") + + def run(self, args): + m = MountbrokerUserMgmt(GLUSTERD_VOLFILE) + volume = None if args.volume == "." else args.volume + user = None if args.user == "." else args.user + m.remove(volume=volume, user=user) + m.save() + node_output_ok() + + +class CliRemove(Cmd): + # gluster-mountbroker remove --volume <VOLUME> --user <USER> + name = "remove" + + def args(self, parser): + parser.add_argument("--volume", default=".", help="Volume to be removed.") + parser.add_argument("--user", default=".", + help="User for which volume has to be removed.") + + def run(self, args): + out = execute_in_peers("node-remove", [args.volume, + args.user]) + table = PrettyTable(["NODE", "NODE STATUS", "REMOVE STATUS"]) + table.align["NODE STATUS"] = "r" + table.align["REMOVE STATUS"] = "r" + + for p in out: + table.add_row([p.hostname, + "UP" if p.node_up else "DOWN", + "OK" if p.ok else "NOT OK: {0}".format( + p.error)]) + + print(table) + +if __name__ == "__main__": + runcli() diff --git a/geo-replication/src/procdiggy.c b/geo-replication/src/procdiggy.c index 1eba414c116..8068ef79a42 100644 --- a/geo-replication/src/procdiggy.c +++ b/geo-replication/src/procdiggy.c @@ -8,11 +8,6 @@ cases as published by the Free Software Foundation. */ -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - #include <stdlib.h> #include <stdio.h> #include <unistd.h> @@ -20,102 +15,122 @@ #include <ctype.h> #include <sys/param.h> /* for PATH_MAX */ -#include "common-utils.h" +#include <glusterfs/common-utils.h> +#include <glusterfs/syscall.h> #include "procdiggy.h" pid_t -pidinfo (pid_t pid, char **name) +pidinfo(pid_t pid, char **name) { - char buf[NAME_MAX * 2] = {0,}; - FILE *f = NULL; - char path[PATH_MAX] = {0,}; - char *p = NULL; - int ret = 0; - - sprintf (path, PROC"/%d/status", pid); - - f = fopen (path, "r"); - if (!f) - return -1; - - if (name) - *name = NULL; - for (;;) { - size_t len; - memset (buf, 0, sizeof (buf)); - if (fgets (buf, sizeof (buf), f) == NULL || - (len = strlen (buf)) == 0 || - buf[len - 1] != '\n') { - pid = -1; - goto out; - } - buf[len - 1] = '\0'; - - if (name && !*name) { - p = strtail (buf, "Name:"); - if (p) { - while (isspace (*++p)); - *name = gf_strdup (p); - if (!*name) { - pid = -2; - goto out; - } - continue; - } - } + char buf[NAME_MAX * 2] = { + 0, + }; + FILE *f = NULL; + char path[PATH_MAX] = { + 0, + }; + char *p = NULL; + int ret = 0; + pid_t lpid = -1; + + if (name) + *name = NULL; + + snprintf(path, sizeof path, PROC "/%d/status", pid); + + f = fopen(path, "r"); + if (!f) + return -1; + + for (;;) { + size_t len; + memset(buf, 0, sizeof(buf)); + if (fgets(buf, sizeof(buf), f) == NULL || (len = strlen(buf)) == 0 || + buf[len - 1] != '\n') { + lpid = -1; + goto out; + } + buf[len - 1] = '\0'; - p = strtail (buf, "PPid:"); - if (p) - break; + if (name && !*name) { + p = strtail(buf, "Name:"); + if (p) { + while (isspace(*++p)) + ; + *name = gf_strdup(p); + if (!*name) { + lpid = -2; + goto out; + } + continue; + } } - while (isspace (*++p)); - ret = gf_string2int (p, &pid); - if (ret == -1) - pid = -1; - - out: - fclose (f); - if (pid == -1 && name && *name) - GF_FREE (name); - if (pid == -2) - fprintf (stderr, "out of memory\n"); - return pid; + p = strtail(buf, "PPid:"); + if (p) + break; + } + + while (isspace(*++p)) + ; + ret = gf_string2int(p, &lpid); + if (ret == -1) + lpid = -1; + +out: + fclose(f); + if (lpid == -1 && name && *name) + GF_FREE(*name); + if (lpid == -2) + fprintf(stderr, "out of memory\n"); + return lpid; } int -prociter (int (*proch) (pid_t pid, pid_t ppid, char *tmpname, void *data), - void *data) +prociter(int (*proch)(pid_t pid, pid_t ppid, char *tmpname, void *data), + void *data) { - char *name = NULL; - DIR *d = NULL; - struct dirent *de = NULL; - pid_t pid = -1; - pid_t ppid = -1; - int ret = 0; - - d = opendir (PROC); - if (!d) - return -1; - while (errno = 0, de = readdir (d)) { - if (gf_string2int (de->d_name, &pid) != -1 && pid >= 0) { - ppid = pidinfo (pid, &name); - switch (ppid) { - case -1: continue; - case -2: ret = -1; break; - } - ret = proch (pid, ppid, name, data); - GF_FREE (name); - if (ret) - break; - } - } - closedir (d); - if (!de && errno) { - fprintf (stderr, "failed to traverse "PROC" (%s)\n", - strerror (errno)); - ret = -1; + char *name = NULL; + DIR *d = NULL; + struct dirent *de = NULL; + struct dirent scratch[2] = { + { + 0, + }, + }; + pid_t pid = -1; + pid_t ppid = -1; + int ret = 0; + + d = sys_opendir(PROC); + if (!d) + return -1; + + for (;;) { + errno = 0; + de = sys_readdir(d, scratch); + if (!de || errno != 0) + break; + + if (gf_string2int(de->d_name, &pid) != -1 && pid >= 0) { + ppid = pidinfo(pid, &name); + switch (ppid) { + case -1: + continue; + case -2: + break; + } + ret = proch(pid, ppid, name, data); + GF_FREE(name); + if (ret) + break; } + } + sys_closedir(d); + if (!de && errno) { + fprintf(stderr, "failed to traverse " PROC " (%s)\n", strerror(errno)); + ret = -1; + } - return ret; + return ret; } diff --git a/geo-replication/src/procdiggy.h b/geo-replication/src/procdiggy.h index 56dfc4eb213..e17ccd31c89 100644 --- a/geo-replication/src/procdiggy.h +++ b/geo-replication/src/procdiggy.h @@ -13,8 +13,9 @@ #define PROC "/proc" -pid_t pidinfo (pid_t pid, char **name); - -int prociter (int (*proch) (pid_t pid, pid_t ppid, char *name, void *data), - void *data); +pid_t +pidinfo(pid_t pid, char **name); +int +prociter(int (*proch)(pid_t pid, pid_t ppid, char *name, void *data), + void *data); diff --git a/geo-replication/src/set_geo_rep_pem_keys.sh b/geo-replication/src/set_geo_rep_pem_keys.sh new file mode 100755 index 00000000000..8a43fa39d1f --- /dev/null +++ b/geo-replication/src/set_geo_rep_pem_keys.sh @@ -0,0 +1,58 @@ +#!/bin/bash + +# Script to copy the pem keys from the user's home directory +# to $GLUSTERD_WORKDIR/geo-replication and then copy +# the keys to other nodes in the cluster and add them to the +# respective authorized keys. The script takes as argument the +# user name and assumes that the user will be present in all +# the nodes in the cluster. Not to be used for root user + +function main() +{ + user=$1 + master_vol=$2 + slave_vol=$3 + GLUSTERD_WORKDIR=$(gluster system:: getwd) + + if [ "$user" == "" ]; then + echo "Please enter the user's name" + exit 1; + fi + + if [ "$master_vol" == "" ]; then + echo "Invalid master volume name" + exit 1; + fi + + if [ "$slave_vol" == "" ]; then + echo "Invalid slave volume name" + exit 1; + fi + + COMMON_SECRET_PEM_PUB=${master_vol}_${slave_vol}_common_secret.pem.pub + + if [ "$user" == "root" ]; then + echo "This script is not needed for root" + exit 1; + fi + + home_dir=`getent passwd $user | cut -d ':' -f 6`; + + if [ "$home_dir" == "" ]; then + echo "No user $user found" + exit 1; + fi + + if [ -f $home_dir/${COMMON_SECRET_PEM_PUB} ]; then + cp $home_dir/${COMMON_SECRET_PEM_PUB} ${GLUSTERD_WORKDIR}/geo-replication/ + gluster system:: copy file /geo-replication/${COMMON_SECRET_PEM_PUB} + gluster system:: execute add_secret_pub $user geo-replication/${master_vol}_${slave_vol}_common_secret.pem.pub + gluster vol set ${slave_vol} features.read-only on + else + echo "$home_dir/common_secret.pem.pub not present. Please run geo-replication command on master with push-pem option to generate the file" + exit 1; + fi + exit 0; +} + +main "$@"; diff --git a/geo-replication/syncdaemon/Makefile.am b/geo-replication/syncdaemon/Makefile.am index 83f969639cc..d70e3368faf 100644 --- a/geo-replication/syncdaemon/Makefile.am +++ b/geo-replication/syncdaemon/Makefile.am @@ -1,7 +1,8 @@ -syncdaemondir = $(libexecdir)/glusterfs/python/syncdaemon +syncdaemondir = $(GLUSTERFS_LIBEXECDIR)/python/syncdaemon -syncdaemon_PYTHON = gconf.py gsyncd.py __init__.py master.py README.md repce.py \ - resource.py configinterface.py syncdutils.py monitor.py libcxattr.py \ - $(top_builddir)/contrib/ipaddr-py/ipaddr.py libgfchangelog.py +syncdaemon_PYTHON = rconf.py gsyncd.py __init__.py master.py README.md repce.py \ + resource.py syncdutils.py monitor.py libcxattr.py gsyncdconfig.py \ + libgfchangelog.py gsyncdstatus.py conf.py logutils.py \ + subcmds.py argsupgrade.py py2py3.py CLEANFILES = diff --git a/geo-replication/syncdaemon/README.md b/geo-replication/syncdaemon/README.md index 67f346ace5a..5ab785ae669 100644 --- a/geo-replication/syncdaemon/README.md +++ b/geo-replication/syncdaemon/README.md @@ -12,14 +12,13 @@ Requirements are categorized according to this. * OpenSSH >= 4.0 (master) / SSH2 compliant sshd (eg. openssh) (slave) * rsync (both) * glusterfs: with marker and changelog support (master & slave); -* FUSE: glusterfs fuse module with auxilary gfid based access support +* FUSE: glusterfs fuse module with auxiliary gfid based access support INSTALLATION ------------ As of now, the supported way of operation is running from the source directory or using the RPMs given. -If you use Python 2.4.x, you need to install the [Ctypes module](http://python.net/crew/theller/ctypes/). CONFIGURATION ------------- @@ -45,7 +44,7 @@ in the source tree. USAGE ----- -gsyncd is a utilitly for continous mirroring, ie. it mirrors master to slave incrementally. +gsyncd is a utilitly for continuous mirroring, ie. it mirrors master to slave incrementally. Assume we have a gluster volume _pop_ at localhost. We try to set up the mirroring for volume _pop_ using gsyncd for gluster volume _moz_ on remote machine/cluster @ example.com. The respective gsyncd invocations are (demoing some syntax sugaring): diff --git a/geo-replication/syncdaemon/__codecheck.py b/geo-replication/syncdaemon/__codecheck.py index e3386afba8b..9437147f7d9 100644 --- a/geo-replication/syncdaemon/__codecheck.py +++ b/geo-replication/syncdaemon/__codecheck.py @@ -1,10 +1,21 @@ +# +# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com> +# This file is part of GlusterFS. + +# This file is licensed to you under your choice of the GNU Lesser +# General Public License, version 3 or any later version (LGPLv3 or +# later), or the GNU General Public License, version 2 (GPLv2), in all +# cases as published by the Free Software Foundation. +# + +from __future__ import print_function import os import os.path import sys import tempfile import shutil -ipd = tempfile.mkdtemp(prefix = 'codecheck-aux') +ipd = tempfile.mkdtemp(prefix='codecheck-aux') try: # add a fake ipaddr module, we don't want to @@ -25,7 +36,7 @@ class IPNetwork(list): if f[-3:] != '.py' or f[0] == '_': continue m = f[:-3] - sys.stdout.write('importing %s ...' % m) + sys.stdout.write('importing %s ...' % m) __import__(m) print(' OK.') @@ -33,8 +44,9 @@ class IPNetwork(list): sys.argv = sys.argv[:1] + a gsyncd = sys.modules['gsyncd'] - for a in [['--help'], ['--version'], ['--canonicalize-escape-url', '/foo']]: - print('>>> invoking program with args: %s' % ' '.join(a)) + for a in [['--help'], ['--version'], + ['--canonicalize-escape-url', '/foo']]: + print(('>>> invoking program with args: %s' % ' '.join(a))) pid = os.fork() if not pid: sys_argv_set(a) diff --git a/geo-replication/syncdaemon/__init__.py b/geo-replication/syncdaemon/__init__.py index e69de29bb2d..b4648b69645 100644 --- a/geo-replication/syncdaemon/__init__.py +++ b/geo-replication/syncdaemon/__init__.py @@ -0,0 +1,9 @@ +# +# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com> +# This file is part of GlusterFS. + +# This file is licensed to you under your choice of the GNU Lesser +# General Public License, version 3 or any later version (LGPLv3 or +# later), or the GNU General Public License, version 2 (GPLv2), in all +# cases as published by the Free Software Foundation. +# diff --git a/geo-replication/syncdaemon/argsupgrade.py b/geo-replication/syncdaemon/argsupgrade.py new file mode 100644 index 00000000000..7af40633ef8 --- /dev/null +++ b/geo-replication/syncdaemon/argsupgrade.py @@ -0,0 +1,359 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> +# This file is part of GlusterFS. +# +# This file is licensed to you under your choice of the GNU Lesser +# General Public License, version 3 or any later version (LGPLv3 or +# later), or the GNU General Public License, version 2 (GPLv2), in all +# cases as published by the Free Software Foundation. +# +# Converts old style args into new style args + +from __future__ import print_function +import sys +from argparse import ArgumentParser +import socket +import os + +from syncdutils import GsyncdError +from conf import GLUSTERD_WORKDIR + + +def gethostbyname(hnam): + """gethostbyname wrapper""" + try: + return socket.gethostbyname(hnam) + except socket.gaierror: + ex = sys.exc_info()[1] + raise GsyncdError("failed to resolve %s: %s" % + (hnam, ex.strerror)) + + +def slave_url(urldata): + urldata = urldata.replace("ssh://", "") + host, vol = urldata.split("::") + vol = vol.split(":")[0] + return "%s::%s" % (host, vol) + + +def init_gsyncd_template_conf(): + path = GLUSTERD_WORKDIR + "/geo-replication/gsyncd_template.conf" + dname = os.path.dirname(path) + if not os.path.exists(dname): + try: + os.mkdir(dname) + except OSError: + pass + + if not os.path.exists(path): + fd = os.open(path, os.O_CREAT | os.O_RDWR) + os.close(fd) + + +def init_gsyncd_session_conf(master, slave): + slave = slave_url(slave) + master = master.strip(":") + slavehost, slavevol = slave.split("::") + slavehost = slavehost.split("@")[-1] + + # Session Config File + path = "%s/geo-replication/%s_%s_%s/gsyncd.conf" % ( + GLUSTERD_WORKDIR, master, slavehost, slavevol) + + if os.path.exists(os.path.dirname(path)) and not os.path.exists(path): + fd = os.open(path, os.O_CREAT | os.O_RDWR) + os.close(fd) + + +def init_gsyncd_conf(path): + dname = os.path.dirname(path) + if not os.path.exists(dname): + try: + os.mkdir(dname) + except OSError: + pass + + if os.path.exists(dname) and not os.path.exists(path): + fd = os.open(path, os.O_CREAT | os.O_RDWR) + os.close(fd) + + +def upgrade(): + # Create dummy template conf(empty), hack to avoid glusterd + # fail when it does stat to check the existence. + init_gsyncd_template_conf() + + inet6 = False + if "--inet6" in sys.argv: + inet6 = True + + if "--monitor" in sys.argv: + # python gsyncd.py --path=/bricks/b1 + # --monitor -c gsyncd.conf + # --iprefix=/var :gv1 + # --glusterd-uuid=f26ac7a8-eb1b-4ea7-959c-80b27d3e43d0 + # f241::gv2 + p = ArgumentParser() + p.add_argument("master") + p.add_argument("slave") + p.add_argument("--glusterd-uuid") + p.add_argument("-c") + p.add_argument("--iprefix") + p.add_argument("--path", action="append") + pargs = p.parse_known_args(sys.argv[1:])[0] + + # Overwrite the sys.argv after rearrange + init_gsyncd_session_conf(pargs.master, pargs.slave) + sys.argv = [ + sys.argv[0], + "monitor", + pargs.master.strip(":"), + slave_url(pargs.slave), + "--local-node-id", + pargs.glusterd_uuid + ] + elif "--status-get" in sys.argv: + # -c gsyncd.conf --iprefix=/var :gv1 f241::gv2 + # --status-get --path /bricks/b1 + p = ArgumentParser() + p.add_argument("master") + p.add_argument("slave") + p.add_argument("-c") + p.add_argument("--path") + p.add_argument("--iprefix") + pargs = p.parse_known_args(sys.argv[1:])[0] + + init_gsyncd_session_conf(pargs.master, pargs.slave) + + sys.argv = [ + sys.argv[0], + "status", + pargs.master.strip(":"), + slave_url(pargs.slave), + "--local-path", + pargs.path + ] + elif "--canonicalize-url" in sys.argv: + # This can accept multiple URLs and converts each URL to the + # format ssh://USER@IP:gluster://127.0.0.1:VOLUME + # This format not used in gsyncd, but added for glusterd compatibility + p = ArgumentParser() + p.add_argument("--canonicalize-url", nargs="+") + pargs = p.parse_known_args(sys.argv[1:])[0] + + for url in pargs.canonicalize_url: + host, vol = url.split("::") + host = host.replace("ssh://", "") + remote_addr = host + if "@" not in remote_addr: + remote_addr = "root@" + remote_addr + + user, hname = remote_addr.split("@") + + if not inet6: + hname = gethostbyname(hname) + + print(("ssh://%s@%s:gluster://127.0.0.1:%s" % ( + user, hname, vol))) + + sys.exit(0) + elif "--normalize-url" in sys.argv: + # Adds schema prefix as ssh:// + # This format not used in gsyncd, but added for glusterd compatibility + p = ArgumentParser() + p.add_argument("--normalize-url") + pargs = p.parse_known_args(sys.argv[1:])[0] + print(("ssh://%s" % slave_url(pargs.normalize_url))) + sys.exit(0) + elif "--config-get-all" in sys.argv: + # -c gsyncd.conf --iprefix=/var :gv1 f241::gv2 --config-get-all + p = ArgumentParser() + p.add_argument("master") + p.add_argument("slave") + p.add_argument("-c") + p.add_argument("--iprefix") + pargs = p.parse_known_args(sys.argv[1:])[0] + + init_gsyncd_session_conf(pargs.master, pargs.slave) + + sys.argv = [ + sys.argv[0], + "config-get", + pargs.master.strip(":"), + slave_url(pargs.slave), + "--show-defaults", + "--use-underscore" + ] + elif "--verify" in sys.argv and "spawning" in sys.argv: + # Just checks that able to spawn gsyncd or not + sys.exit(0) + elif "--slavevoluuid-get" in sys.argv: + # --slavevoluuid-get f241::gv2 + p = ArgumentParser() + p.add_argument("--slavevoluuid-get") + p.add_argument("-c") + p.add_argument("--iprefix") + pargs = p.parse_known_args(sys.argv[1:])[0] + host, vol = pargs.slavevoluuid_get.split("::") + + # Modified sys.argv + sys.argv = [ + sys.argv[0], + "voluuidget", + host, + vol + ] + elif "--config-set-rx" in sys.argv: + # Not required since default conf is not generated + # and custom conf generated only when required + # -c gsyncd.conf --config-set-rx remote-gsyncd + # /usr/local/libexec/glusterfs/gsyncd . . + # Touch the gsyncd.conf file and create session + # directory if required + p = ArgumentParser() + p.add_argument("-c", dest="config_file") + pargs = p.parse_known_args(sys.argv[1:])[0] + + # If not template conf then it is trying to create + # session config, create a empty file instead + if pargs.config_file.endswith("gsyncd.conf"): + init_gsyncd_conf(pargs.config_file) + sys.exit(0) + elif "--create" in sys.argv: + # To update monitor status file + # --create Created -c gsyncd.conf + # --iprefix=/var :gv1 f241::gv2 + p = ArgumentParser() + p.add_argument("--create") + p.add_argument("master") + p.add_argument("slave") + p.add_argument("-c") + p.add_argument("--iprefix") + pargs = p.parse_known_args(sys.argv[1:])[0] + + init_gsyncd_session_conf(pargs.master, pargs.slave) + + # Modified sys.argv + sys.argv = [ + sys.argv[0], + "monitor-status", + pargs.master.strip(":"), + slave_url(pargs.slave), + pargs.create + ] + elif "--config-get" in sys.argv: + # -c gsyncd.conf --iprefix=/var :gv1 f241::gv2 --config-get pid-file + p = ArgumentParser() + p.add_argument("--config-get") + p.add_argument("master") + p.add_argument("slave") + p.add_argument("-c") + p.add_argument("--iprefix") + pargs = p.parse_known_args(sys.argv[1:])[0] + + init_gsyncd_session_conf(pargs.master, pargs.slave) + + # Modified sys.argv + sys.argv = [ + sys.argv[0], + "config-get", + pargs.master.strip(":"), + slave_url(pargs.slave), + "--only-value", + "--show-defaults", + "--name", + pargs.config_get.replace("_", "-") + ] + elif "--config-set" in sys.argv: + # ignore session-owner + if "session-owner" in sys.argv: + sys.exit(0) + + # --path=/bricks/b1 -c gsyncd.conf :gv1 f241::gv2 + # --config-set log_level DEBUG + p = ArgumentParser() + p.add_argument("master") + p.add_argument("slave") + p.add_argument("--config-set", action='store_true') + p.add_argument("name") + p.add_argument("--value") + p.add_argument("-c") + pargs = p.parse_known_args(sys.argv[1:])[0] + + init_gsyncd_session_conf(pargs.master, pargs.slave) + + # Modified sys.argv + sys.argv = [ + sys.argv[0], + "config-set", + pargs.master.strip(":"), + slave_url(pargs.slave), + "--name=%s" % pargs.name, + "--value=%s" % pargs.value + ] + elif "--config-check" in sys.argv: + # --config-check georep_session_working_dir + p = ArgumentParser() + p.add_argument("--config-check") + p.add_argument("-c") + pargs = p.parse_known_args(sys.argv[1:])[0] + + # Modified sys.argv + sys.argv = [ + sys.argv[0], + "config-check", + pargs.config_check.replace("_", "-") + ] + elif "--config-del" in sys.argv: + # -c gsyncd.conf --iprefix=/var :gv1 f241::gv2 --config-del log_level + p = ArgumentParser() + p.add_argument("--config-del") + p.add_argument("master") + p.add_argument("slave") + p.add_argument("-c") + p.add_argument("--iprefix") + pargs = p.parse_known_args(sys.argv[1:])[0] + + init_gsyncd_session_conf(pargs.master, pargs.slave) + + # Modified sys.argv + sys.argv = [ + sys.argv[0], + "config-reset", + pargs.master.strip(":"), + slave_url(pargs.slave), + pargs.config_del.replace("_", "-") + ] + elif "--delete" in sys.argv: + # --delete -c gsyncd.conf --iprefix=/var + # --path-list=--path=/bricks/b1 :gv1 f241::gv2 + p = ArgumentParser() + p.add_argument("--reset-sync-time", action="store_true") + p.add_argument("--path-list") + p.add_argument("master") + p.add_argument("slave") + p.add_argument("--iprefix") + p.add_argument("-c") + pargs = p.parse_known_args(sys.argv[1:])[0] + + init_gsyncd_session_conf(pargs.master, pargs.slave) + + paths = pargs.path_list.split("--path=") + paths = ["--path=%s" % x.strip() for x in paths if x.strip() != ""] + + # Modified sys.argv + sys.argv = [ + sys.argv[0], + "delete", + pargs.master.strip(":"), + slave_url(pargs.slave) + ] + sys.argv += paths + + if pargs.reset_sync_time: + sys.argv.append("--reset-sync-time") + + if inet6: + # Add `--inet6` as first argument + sys.argv = [sys.argv[0], "--inet6"] + sys.argv[1:] diff --git a/geo-replication/syncdaemon/conf.py.in b/geo-replication/syncdaemon/conf.py.in new file mode 100644 index 00000000000..2042fa9cdfb --- /dev/null +++ b/geo-replication/syncdaemon/conf.py.in @@ -0,0 +1,17 @@ +# +# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> +# This file is part of GlusterFS. + +# This file is licensed to you under your choice of the GNU Lesser +# General Public License, version 3 or any later version (LGPLv3 or +# later), or the GNU General Public License, version 2 (GPLv2), in all +# cases as published by the Free Software Foundation. +# + +GLUSTERFS_LIBEXECDIR = '@GLUSTERFS_LIBEXECDIR@' +GLUSTERD_WORKDIR = "@GLUSTERD_WORKDIR@" + +LOCALSTATEDIR = "@localstatedir@" +UUID_FILE = "@GLUSTERD_WORKDIR@/glusterd.info" +GLUSTERFS_CONFDIR = "@SYSCONF_DIR@/glusterfs" +GCONF_VERSION = 4.0 diff --git a/geo-replication/syncdaemon/configinterface.py b/geo-replication/syncdaemon/configinterface.py deleted file mode 100644 index a326e824681..00000000000 --- a/geo-replication/syncdaemon/configinterface.py +++ /dev/null @@ -1,224 +0,0 @@ -try: - import ConfigParser -except ImportError: - # py 3 - import configparser as ConfigParser -import re -from string import Template - -from syncdutils import escape, unescape, norm, update_file, GsyncdError - -SECT_ORD = '__section_order__' -SECT_META = '__meta__' -config_version = 2.0 - -re_type = type(re.compile('')) - - -class MultiDict(object): - """a virtual dict-like class which functions as the union of underlying dicts""" - - def __init__(self, *dd): - self.dicts = dd - - def __getitem__(self, key): - val = None - for d in self.dicts: - if d.get(key) != None: - val = d[key] - if val == None: - raise KeyError(key) - return val - - -class GConffile(object): - """A high-level interface to ConfigParser which flattens the two-tiered - config layout by implenting automatic section dispatch based on initial - parameters. - - Also ensure section ordering in terms of their time of addition -- a compat - hack for Python < 2.7. - """ - - def _normconfig(self): - """normalize config keys by s/-/_/g""" - for n, s in self.config._sections.items(): - if n.find('__') == 0: - continue - s2 = type(s)() - for k, v in s.items(): - if k.find('__') != 0: - k = norm(k) - s2[k] = v - self.config._sections[n] = s2 - - def __init__(self, path, peers, *dd): - """ - - .path: location of config file - - .config: underlying ConfigParser instance - - .peers: on behalf of whom we flatten .config - (master, or master-slave url pair) - - .auxdicts: template subtituents - """ - self.peers = peers - self.path = path - self.auxdicts = dd - self.config = ConfigParser.RawConfigParser() - self.config.read(path) - self._normconfig() - - def section(self, rx=False): - """get the section name of the section representing .peers in .config""" - peers = self.peers - if not peers: - peers = ['.', '.'] - rx = True - if rx: - st = 'peersrx' - else: - st = 'peers' - return ' '.join([st] + [escape(u) for u in peers]) - - @staticmethod - def parse_section(section): - """retrieve peers sequence encoded by section name - (as urls or regexen, depending on section type) - """ - sl = section.split() - st = sl.pop(0) - sl = [unescape(u) for u in sl] - if st == 'peersrx': - sl = [re.compile(u) for u in sl] - return sl - - def ord_sections(self): - """Return an ordered list of sections. - - Ordering happens based on the auxiliary - SECT_ORD section storing indices for each - section added through the config API. - - To not to go corrupt in case of manually - written config files, we take care to append - also those sections which are not registered - in SECT_ORD. - - Needed for python 2.{4,5,6} where ConfigParser - cannot yet order sections/options internally. - """ - so = {} - if self.config.has_section(SECT_ORD): - so = self.config._sections[SECT_ORD] - so2 = {} - for k, v in so.items(): - if k != '__name__': - so2[k] = int(v) - tv = 0 - if so2: - tv = max(so2.values()) + 1 - ss = [s for s in self.config.sections() if s.find('__') != 0] - for s in ss: - if s in so.keys(): - continue - so2[s] = tv - tv += 1 - def scmp(x, y): - return cmp(*(so2[s] for s in (x, y))) - ss.sort(scmp) - return ss - - def update_to(self, dct, allow_unresolved=False): - """update @dct from key/values of ours. - - key/values are collected from .config by filtering the regexp sections - according to match, and from .section. The values are treated as templates, - which are substituted from .auxdicts and (in case of regexp sections) - match groups. - """ - if not self.peers: - raise GsyncdError('no peers given, cannot select matching options') - def update_from_sect(sect, mud): - for k, v in self.config._sections[sect].items(): - if k == '__name__': - continue - if allow_unresolved: - dct[k] = Template(v).safe_substitute(mud) - else: - dct[k] = Template(v).substitute(mud) - for sect in self.ord_sections(): - sp = self.parse_section(sect) - if isinstance(sp[0], re_type) and len(sp) == len(self.peers): - match = True - mad = {} - for i in range(len(sp)): - m = sp[i].search(self.peers[i]) - if not m: - match = False - break - for j in range(len(m.groups())): - mad['match%d_%d' % (i+1, j+1)] = m.groups()[j] - if match: - update_from_sect(sect, MultiDict(dct, mad, *self.auxdicts)) - if self.config.has_section(self.section()): - update_from_sect(self.section(), MultiDict(dct, *self.auxdicts)) - - def get(self, opt=None): - """print the matching key/value pairs from .config, - or if @opt given, the value for @opt (according to the - logic described in .update_to) - """ - d = {} - self.update_to(d, allow_unresolved = True) - if opt: - opt = norm(opt) - v = d.get(opt) - if v: - print(v) - else: - for k, v in d.iteritems(): - if k == '__name__': - continue - print("%s: %s" % (k, v)) - - def write(self, trfn, opt, *a, **kw): - """update on-disk config transactionally - - @trfn is the transaction function - """ - def mergeconf(f): - self.config = ConfigParser.RawConfigParser() - self.config.readfp(f) - self._normconfig() - if not self.config.has_section(SECT_META): - self.config.add_section(SECT_META) - self.config.set(SECT_META, 'version', config_version) - return trfn(norm(opt), *a, **kw) - def updateconf(f): - self.config.write(f) - update_file(self.path, updateconf, mergeconf) - - def _set(self, opt, val, rx=False): - """set @opt to @val in .section""" - sect = self.section(rx) - if not self.config.has_section(sect): - self.config.add_section(sect) - # regarding SECT_ORD, cf. ord_sections - if not self.config.has_section(SECT_ORD): - self.config.add_section(SECT_ORD) - self.config.set(SECT_ORD, sect, len(self.config._sections[SECT_ORD])) - self.config.set(sect, opt, val) - return True - - def set(self, opt, *a, **kw): - """perform ._set transactionally""" - self.write(self._set, opt, *a, **kw) - - def _delete(self, opt, rx=False): - """delete @opt from .section""" - sect = self.section(rx) - if self.config.has_section(sect): - return self.config.remove_option(sect, opt) - - def delete(self, opt, *a, **kw): - """perform ._delete transactionally""" - self.write(self._delete, opt, *a, **kw) diff --git a/geo-replication/syncdaemon/gconf.py b/geo-replication/syncdaemon/gconf.py deleted file mode 100644 index 146c72a1825..00000000000 --- a/geo-replication/syncdaemon/gconf.py +++ /dev/null @@ -1,20 +0,0 @@ -import os - -class GConf(object): - """singleton class to store globals - shared between gsyncd modules""" - - ssh_ctl_dir = None - ssh_ctl_args = None - cpid = None - pid_file_owned = False - log_exit = False - permanent_handles = [] - log_metadata = {} - - @classmethod - def setup_ssh_ctl(cls, ctld): - cls.ssh_ctl_dir = ctld - cls.ssh_ctl_args = ["-oControlMaster=auto", "-S", os.path.join(ctld, "gsycnd-ssh-%r@%h:%p")] - -gconf = GConf() diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py index 67ba0737087..257ed72c6ae 100644 --- a/geo-replication/syncdaemon/gsyncd.py +++ b/geo-replication/syncdaemon/gsyncd.py @@ -1,514 +1,324 @@ -#!/usr/bin/env python - +#!/usr/bin/python3 +# -*- coding: utf-8 -*- +# +# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> +# This file is part of GlusterFS. +# +# This file is licensed to you under your choice of the GNU Lesser +# General Public License, version 3 or any later version (LGPLv3 or +# later), or the GNU General Public License, version 2 (GPLv2), in all +# cases as published by the Free Software Foundation. +# + +from argparse import ArgumentParser +import time import os -import os.path -import glob +from errno import EEXIST import sys -import time import logging -import signal -import shutil -import optparse -import fcntl -import fnmatch -from optparse import OptionParser, SUPPRESS_HELP -from logging import Logger -from errno import EEXIST, ENOENT - -from ipaddr import IPAddress, IPNetwork - -from gconf import gconf -from syncdutils import FreeObject, norm, grabpidfile, finalize, log_raise_exception -from syncdutils import GsyncdError, select, set_term_handler, privileged, update_file -from configinterface import GConffile -import resource -from monitor import monitor - -class GLogger(Logger): - """Logger customizations for gsyncd. - - It implements a log format similar to that of glusterfs. - """ - - def makeRecord(self, name, level, *a): - rv = Logger.makeRecord(self, name, level, *a) - rv.nsecs = (rv.created - int(rv.created)) * 1000000 - fr = sys._getframe(4) - callee = fr.f_locals.get('self') - if callee: - ctx = str(type(callee)).split("'")[1].split('.')[-1] - else: - ctx = '<top>' - if not hasattr(rv, 'funcName'): - rv.funcName = fr.f_code.co_name - rv.lvlnam = logging.getLevelName(level)[0] - rv.ctx = ctx - return rv - - @classmethod - def setup(cls, **kw): - lbl = kw.get('label', "") - if lbl: - lbl = '(' + lbl + ')' - lprm = {'datefmt': "%Y-%m-%d %H:%M:%S", - 'format': "[%(asctime)s.%(nsecs)d] %(lvlnam)s [%(module)s" + lbl + ":%(lineno)s:%(funcName)s] %(ctx)s: %(message)s"} - lprm.update(kw) - lvl = kw.get('level', logging.INFO) - lprm['level'] = lvl - logging.root = cls("root", lvl) - logging.setLoggerClass(cls) - logging.getLogger().handlers = [] - logging.basicConfig(**lprm) - - @classmethod - def _gsyncd_loginit(cls, **kw): - lkw = {} - if gconf.log_level: - lkw['level'] = gconf.log_level - if kw.get('log_file'): - if kw['log_file'] in ('-', '/dev/stderr'): - lkw['stream'] = sys.stderr - elif kw['log_file'] == '/dev/stdout': - lkw['stream'] = sys.stdout - else: - lkw['filename'] = kw['log_file'] - - cls.setup(label=kw.get('label'), **lkw) - - lkw.update({'saved_label': kw.get('label')}) - gconf.log_metadata = lkw - gconf.log_exit = True - -def startup(**kw): - """set up logging, pidfile grabbing, daemonization""" - if getattr(gconf, 'pid_file', None) and kw.get('go_daemon') != 'postconn': - if not grabpidfile(): - sys.stderr.write("pidfile is taken, exiting.\n") - sys.exit(2) - gconf.pid_file_owned = True - - if kw.get('go_daemon') == 'should': - x, y = os.pipe() - gconf.cpid = os.fork() - if gconf.cpid: - os.close(x) - sys.exit() - os.close(y) - os.setsid() - dn = os.open(os.devnull, os.O_RDWR) - for f in (sys.stdin, sys.stdout, sys.stderr): - os.dup2(dn, f.fileno()) - if getattr(gconf, 'pid_file', None): - if not grabpidfile(gconf.pid_file + '.tmp'): - raise GsyncdError("cannot grab temporary pidfile") - os.rename(gconf.pid_file + '.tmp', gconf.pid_file) - # wait for parent to terminate - # so we can start up with - # no messing from the dirty - # ol' bustard - select((x,), (), ()) - os.close(x) - - GLogger._gsyncd_loginit(**kw) - - -def _unlink(path): - try: - os.unlink(path) - except (OSError, IOError): - if sys.exc_info()[1].errno == ENOENT: - pass - else: - raise GsyncdError('Unlink error: %s' % path) + +from logutils import setup_logging +import gsyncdconfig as gconf +from rconf import rconf +import subcmds +from conf import GLUSTERD_WORKDIR, GLUSTERFS_CONFDIR, GCONF_VERSION +from syncdutils import (set_term_handler, finalize, lf, + log_raise_exception, FreeObject, escape) +import argsupgrade + + +GSYNCD_VERSION = "gsyncd.py %s.0" % GCONF_VERSION def main(): - """main routine, signal/exception handling boilerplates""" - gconf.starttime = time.time() + rconf.starttime = time.time() + + # If old Glusterd sends commands in old format, below function + # converts the sys.argv to new format. This conversion is added + # temporarily for backward compatibility. This can be removed + # once integrated with Glusterd2 + # This modifies sys.argv globally, so rest of the code works as usual + argsupgrade.upgrade() + + # Default argparse version handler prints to stderr, which is fixed in + # 3.x series but not in 2.x, using custom parser to fix this issue + if "--version" in sys.argv: + print(GSYNCD_VERSION) + sys.exit(0) + + parser = ArgumentParser() + parser.add_argument("--inet6", action="store_true") + sp = parser.add_subparsers(dest="subcmd") + + # Monitor Status File update + p = sp.add_parser("monitor-status") + p.add_argument("master", help="Master Volume Name") + p.add_argument("slave", help="Slave details user@host::vol format") + p.add_argument("status", help="Update Monitor Status") + p.add_argument("-c", "--config-file", help="Config File") + p.add_argument("--debug", action="store_true") + + # Monitor + p = sp.add_parser("monitor") + p.add_argument("master", help="Master Volume Name") + p.add_argument("slave", help="Slave details user@host::vol format") + p.add_argument("-c", "--config-file", help="Config File") + p.add_argument("--pause-on-start", + action="store_true", + help="Start with Paused state") + p.add_argument("--local-node-id", help="Local Node ID") + p.add_argument("--debug", action="store_true") + p.add_argument("--use-gconf-volinfo", action="store_true") + + # Worker + p = sp.add_parser("worker") + p.add_argument("master", help="Master Volume Name") + p.add_argument("slave", help="Slave details user@host::vol format") + p.add_argument("--local-path", help="Local Brick Path") + p.add_argument("--feedback-fd", type=int, + help="feedback fd between monitor and worker") + p.add_argument("--local-node", help="Local master node") + p.add_argument("--local-node-id", help="Local Node ID") + p.add_argument("--subvol-num", type=int, help="Subvolume number") + p.add_argument("--is-hottier", action="store_true", + help="Is this brick part of hot tier") + p.add_argument("--resource-remote", + help="Remote node to connect to Slave Volume") + p.add_argument("--resource-remote-id", + help="Remote node ID to connect to Slave Volume") + p.add_argument("--slave-id", help="Slave Volume ID") + p.add_argument("-c", "--config-file", help="Config File") + p.add_argument("--debug", action="store_true") + + # Slave + p = sp.add_parser("slave") + p.add_argument("master", help="Master Volume Name") + p.add_argument("slave", help="Slave details user@host::vol format") + p.add_argument("--session-owner") + p.add_argument("--master-brick", + help="Master brick which is connected to the Slave") + p.add_argument("--master-node", + help="Master node which is connected to the Slave") + p.add_argument("--master-node-id", + help="Master node ID which is connected to the Slave") + p.add_argument("--local-node", help="Local Slave node") + p.add_argument("--local-node-id", help="Local Slave ID") + p.add_argument("-c", "--config-file", help="Config File") + p.add_argument("--debug", action="store_true") + + # All configurations which are configured via "slave-" options + # DO NOT add default values for these configurations, default values + # will be picked from template config file + p.add_argument("--slave-timeout", type=int, + help="Timeout to end gsyncd at Slave side") + p.add_argument("--use-rsync-xattrs", action="store_true") + p.add_argument("--slave-log-level", help="Slave Gsyncd Log level") + p.add_argument("--slave-gluster-log-level", + help="Slave Gluster mount Log level") + p.add_argument("--slave-gluster-command-dir", + help="Directory where Gluster binaries exist on slave") + p.add_argument("--slave-access-mount", action="store_true", + help="Do not lazy umount the slave volume") + p.add_argument("--master-dist-count", type=int, + help="Master Distribution count") + + # Status + p = sp.add_parser("status") + p.add_argument("master", help="Master Volume Name") + p.add_argument("slave", help="Slave") + p.add_argument("-c", "--config-file", help="Config File") + p.add_argument("--local-path", help="Local Brick Path") + p.add_argument("--debug", action="store_true") + p.add_argument("--json", action="store_true") + + # Config-check + p = sp.add_parser("config-check") + p.add_argument("name", help="Config Name") + p.add_argument("--value", help="Config Value") + p.add_argument("--debug", action="store_true") + + # Config-get + p = sp.add_parser("config-get") + p.add_argument("master", help="Master Volume Name") + p.add_argument("slave", help="Slave") + p.add_argument("--name", help="Config Name") + p.add_argument("-c", "--config-file", help="Config File") + p.add_argument("--debug", action="store_true") + p.add_argument("--show-defaults", action="store_true") + p.add_argument("--only-value", action="store_true") + p.add_argument("--use-underscore", action="store_true") + p.add_argument("--json", action="store_true") + + # Config-set + p = sp.add_parser("config-set") + p.add_argument("master", help="Master Volume Name") + p.add_argument("slave", help="Slave") + p.add_argument("-n", "--name", help="Config Name") + p.add_argument("-v", "--value", help="Config Value") + p.add_argument("-c", "--config-file", help="Config File") + p.add_argument("--debug", action="store_true") + + # Config-reset + p = sp.add_parser("config-reset") + p.add_argument("master", help="Master Volume Name") + p.add_argument("slave", help="Slave") + p.add_argument("name", help="Config Name") + p.add_argument("-c", "--config-file", help="Config File") + p.add_argument("--debug", action="store_true") + + # voluuidget + p = sp.add_parser("voluuidget") + p.add_argument("host", help="Hostname") + p.add_argument("volname", help="Volume Name") + p.add_argument("--debug", action="store_true") + + # Delete + p = sp.add_parser("delete") + p.add_argument("master", help="Master Volume Name") + p.add_argument("slave", help="Slave") + p.add_argument("-c", "--config-file", help="Config File") + p.add_argument('--path', dest='paths', action="append") + p.add_argument("--reset-sync-time", action="store_true", + help="Reset Sync Time") + p.add_argument("--debug", action="store_true") + + # Parse arguments + args = parser.parse_args() + + # Extra template values, All arguments are already part of template + # variables, use this for adding extra variables + extra_tmpl_args = {} + + # Add First/Primary Slave host, user and volume + if getattr(args, "slave", None) is not None: + hostdata, slavevol = args.slave.split("::") + hostdata = hostdata.split("@") + slavehost = hostdata[-1] + slaveuser = "root" + if len(hostdata) == 2: + slaveuser = hostdata[0] + extra_tmpl_args["primary_slave_host"] = slavehost + extra_tmpl_args["slaveuser"] = slaveuser + extra_tmpl_args["slavevol"] = slavevol + + # Add Bricks encoded path + if getattr(args, "local_path", None) is not None: + extra_tmpl_args["local_id"] = escape(args.local_path) + + # Add Master Bricks encoded path(For Slave) + if getattr(args, "master_brick", None) is not None: + extra_tmpl_args["master_brick_id"] = escape(args.master_brick) + + # Load configurations + config_file = getattr(args, "config_file", None) + + # Subcmd accepts config file argument but not passed + # Set default path for config file in that case + # If an subcmd accepts config file then it also accepts + # master and Slave arguments. + if config_file is None and hasattr(args, "config_file") \ + and args.subcmd != "slave": + config_file = "%s/geo-replication/%s_%s_%s/gsyncd.conf" % ( + GLUSTERD_WORKDIR, + args.master, + extra_tmpl_args["primary_slave_host"], + extra_tmpl_args["slavevol"]) + + # If Config file path not exists, log error and continue using default conf + config_file_error_msg = None + if config_file is not None and not os.path.exists(config_file): + # Logging not yet initialized, create the error message to + # log later and reset the config_file to None + config_file_error_msg = lf( + "Session config file not exists, using the default config", + path=config_file) + config_file = None + + rconf.config_file = config_file + + # Override gconf values from argument values only if it is slave gsyncd + override_from_args = False + if args.subcmd == "slave": + override_from_args = True + + if config_file is not None and \ + args.subcmd in ["monitor", "config-get", "config-set", "config-reset"]: + ret = gconf.is_config_file_old(config_file, args.master, extra_tmpl_args["slavevol"]) + if ret is not None: + gconf.config_upgrade(config_file, ret) + + # Load Config file + gconf.load(GLUSTERFS_CONFDIR + "/gsyncd.conf", + config_file, + vars(args), + extra_tmpl_args, + override_from_args) + + # Default label to print in log file + label = args.subcmd + if args.subcmd in ("worker"): + # If Worker, then add brick path also to label + label = "%s %s" % (args.subcmd, args.local_path) + elif args.subcmd == "slave": + # If Slave add Master node and Brick details + label = "%s %s%s" % (args.subcmd, args.master_node, args.master_brick) + + # Setup Logger + # Default log file + log_file = gconf.get("cli-log-file") + log_level = gconf.get("cli-log-level") + if getattr(args, "master", None) is not None and \ + getattr(args, "slave", None) is not None: + log_file = gconf.get("log-file") + log_level = gconf.get("log-level") + + # Use different log file location for Slave log file + if args.subcmd == "slave": + log_file = gconf.get("slave-log-file") + log_level = gconf.get("slave-log-level") + + if args.debug: + log_file = "-" + log_level = "DEBUG" + + # Create Logdir if not exists + try: + if log_file != "-": + os.mkdir(os.path.dirname(log_file)) + except OSError as e: + if e.errno != EEXIST: + raise + + setup_logging( + log_file=log_file, + level=log_level, + label=label + ) + + if config_file_error_msg is not None: + logging.warn(config_file_error_msg) + + # Log message for loaded config file + if config_file is not None: + logging.debug(lf("Using session config file", path=config_file)) + set_term_handler() - GLogger.setup() - excont = FreeObject(exval = 0) + excont = FreeObject(exval=0) + + # Gets the function name based on the input argument. For example + # if subcommand passed as argument is monitor then it looks for + # function with name "subcmd_monitor" in subcmds file + func = getattr(subcmds, "subcmd_" + args.subcmd.replace("-", "_"), None) + try: try: - main_i() + if func is not None: + rconf.args = args + func(args) except: log_raise_exception(excont) finally: - finalize(exval = excont.exval) - -def main_i(): - """internal main routine - - parse command line, decide what action will be taken; - we can either: - - query/manipulate configuration - - format gsyncd urls using gsyncd's url parsing engine - - start service in following modes, in given stages: - - monitor: startup(), monitor() - - master: startup(), connect_remote(), connect(), service_loop() - - slave: startup(), connect(), service_loop() - """ - rconf = {'go_daemon': 'should'} - - def store_abs(opt, optstr, val, parser): - if val and val != '-': - val = os.path.abspath(val) - setattr(parser.values, opt.dest, val) - def store_local(opt, optstr, val, parser): - rconf[opt.dest] = val - def store_local_curry(val): - return lambda o, oo, vx, p: store_local(o, oo, val, p) - def store_local_obj(op, dmake): - return lambda o, oo, vx, p: store_local(o, oo, FreeObject(op=op, **dmake(vx)), p) - - op = OptionParser(usage="%prog [options...] <master> <slave>", version="%prog 0.0.1") - op.add_option('--gluster-command-dir', metavar='DIR', default='') - op.add_option('--gluster-log-file', metavar='LOGF', default=os.devnull, type=str, action='callback', callback=store_abs) - op.add_option('--gluster-log-level', metavar='LVL') - op.add_option('--gluster-params', metavar='PRMS', default='') - op.add_option('--glusterd-uuid', metavar='UUID', type=str, default='', help=SUPPRESS_HELP) - op.add_option('--gluster-cli-options', metavar='OPTS', default='--log-file=-') - op.add_option('--mountbroker', metavar='LABEL') - op.add_option('-p', '--pid-file', metavar='PIDF', type=str, action='callback', callback=store_abs) - op.add_option('-l', '--log-file', metavar='LOGF', type=str, action='callback', callback=store_abs) - op.add_option('--log-file-mbr', metavar='LOGF', type=str, action='callback', callback=store_abs) - op.add_option('--state-file', metavar='STATF', type=str, action='callback', callback=store_abs) - op.add_option('--state-detail-file', metavar='STATF', type=str, action='callback', callback=store_abs) - op.add_option('--ignore-deletes', default=False, action='store_true') - op.add_option('--isolated-slave', default=False, action='store_true') - op.add_option('--use-rsync-xattrs', default=False, action='store_true') - op.add_option('-L', '--log-level', metavar='LVL') - op.add_option('-r', '--remote-gsyncd', metavar='CMD', default=os.path.abspath(sys.argv[0])) - op.add_option('--volume-id', metavar='UUID') - op.add_option('--slave-id', metavar='ID') - op.add_option('--session-owner', metavar='ID') - op.add_option('--local-id', metavar='ID', help=SUPPRESS_HELP, default='') - op.add_option('--local-path', metavar='PATH', help=SUPPRESS_HELP, default='') - op.add_option('-s', '--ssh-command', metavar='CMD', default='ssh') - op.add_option('--rsync-command', metavar='CMD', default='rsync') - op.add_option('--rsync-options', metavar='OPTS', default='') - op.add_option('--rsync-ssh-options', metavar='OPTS', default='--compress') - op.add_option('--timeout', metavar='SEC', type=int, default=120) - op.add_option('--connection-timeout', metavar='SEC', type=int, default=60, help=SUPPRESS_HELP) - op.add_option('--sync-jobs', metavar='N', type=int, default=3) - op.add_option('--turns', metavar='N', type=int, default=0, help=SUPPRESS_HELP) - op.add_option('--allow-network', metavar='IPS', default='') - op.add_option('--socketdir', metavar='DIR') - op.add_option('--state-socket-unencoded', metavar='SOCKF', type=str, action='callback', callback=store_abs) - op.add_option('--checkpoint', metavar='LABEL', default='') - # tunables for failover/failback mechanism: - # None - gsyncd behaves as normal - # blind - gsyncd works with xtime pairs to identify - # candidates for synchronization - # wrapup - same as normal mode but does not assign - # xtimes to orphaned files - # see crawl() for usage of the above tunables - op.add_option('--special-sync-mode', type=str, help=SUPPRESS_HELP) - - # changelog or xtime? (TODO: Change the default) - op.add_option('--change-detector', metavar='MODE', type=str, default='xtime') - # sleep interval for change detection (xtime crawl uses a hardcoded 1 second sleep time) - op.add_option('--change-interval', metavar='SEC', type=int, default=3) - # working directory for changelog based mechanism - op.add_option('--working-dir', metavar='DIR', type=str, action='callback', callback=store_abs) - - op.add_option('-c', '--config-file', metavar='CONF', type=str, action='callback', callback=store_local) - # duh. need to specify dest or value will be mapped to None :S - op.add_option('--monitor', dest='monitor', action='callback', callback=store_local_curry(True)) - op.add_option('--resource-local', dest='resource_local', type=str, action='callback', callback=store_local) - op.add_option('--resource-remote', dest='resource_remote', type=str, action='callback', callback=store_local) - op.add_option('--feedback-fd', dest='feedback_fd', type=int, help=SUPPRESS_HELP, action='callback', callback=store_local) - op.add_option('--listen', dest='listen', help=SUPPRESS_HELP, action='callback', callback=store_local_curry(True)) - op.add_option('-N', '--no-daemon', dest="go_daemon", action='callback', callback=store_local_curry('dont')) - op.add_option('--verify', type=str, dest="verify", action='callback', callback=store_local) - op.add_option('--create', type=str, dest="create", action='callback', callback=store_local) - op.add_option('--delete', dest='delete', action='callback', callback=store_local_curry(True)) - op.add_option('--debug', dest="go_daemon", action='callback', callback=lambda *a: (store_local_curry('dont')(*a), - setattr(a[-1].values, 'log_file', '-'), - setattr(a[-1].values, 'log_level', 'DEBUG'))), - op.add_option('--path', type=str, action='append') - - for a in ('check', 'get'): - op.add_option('--config-' + a, metavar='OPT', type=str, dest='config', action='callback', - callback=store_local_obj(a, lambda vx: {'opt': vx})) - op.add_option('--config-get-all', dest='config', action='callback', callback=store_local_obj('get', lambda vx: {'opt': None})) - for m in ('', '-rx', '-glob'): - # call this code 'Pythonic' eh? - # have to define a one-shot local function to be able to inject (a value depending on the) - # iteration variable into the inner lambda - def conf_mod_opt_regex_variant(rx): - op.add_option('--config-set' + m, metavar='OPT VAL', type=str, nargs=2, dest='config', action='callback', - callback=store_local_obj('set', lambda vx: {'opt': vx[0], 'val': vx[1], 'rx': rx})) - op.add_option('--config-del' + m, metavar='OPT', type=str, dest='config', action='callback', - callback=store_local_obj('del', lambda vx: {'opt': vx, 'rx': rx})) - conf_mod_opt_regex_variant(m and m[1:] or False) - - op.add_option('--normalize-url', dest='url_print', action='callback', callback=store_local_curry('normal')) - op.add_option('--canonicalize-url', dest='url_print', action='callback', callback=store_local_curry('canon')) - op.add_option('--canonicalize-escape-url', dest='url_print', action='callback', callback=store_local_curry('canon_esc')) - - tunables = [ norm(o.get_opt_string()[2:]) for o in op.option_list if o.callback in (store_abs, 'store_true', None) and o.get_opt_string() not in ('--version', '--help') ] - remote_tunables = [ 'listen', 'go_daemon', 'timeout', 'session_owner', 'config_file', 'use_rsync_xattrs' ] - rq_remote_tunables = { 'listen': True } - - # precedence for sources of values: 1) commandline, 2) cfg file, 3) defaults - # -- for this to work out we need to tell apart defaults from explicitly set - # options... so churn out the defaults here and call the parser with virgin - # values container. - defaults = op.get_default_values() - opts, args = op.parse_args(values=optparse.Values()) - args_orig = args[:] - r = rconf.get('resource_local') - if r: - if len(args) == 0: - args.append(None) - args[0] = r - r = rconf.get('resource_remote') - if r: - if len(args) == 0: - raise GsyncdError('local resource unspecfied') - elif len(args) == 1: - args.append(None) - args[1] = r - confdata = rconf.get('config') - if not (len(args) == 2 or \ - (len(args) == 1 and rconf.get('listen')) or \ - (len(args) <= 2 and confdata) or \ - rconf.get('url_print')): - sys.stderr.write("error: incorrect number of arguments\n\n") - sys.stderr.write(op.get_usage() + "\n") - sys.exit(1) - - verify = rconf.get('verify') - if verify: - logging.info (verify) - logging.info ("Able to spawn gsyncd.py") - return - - restricted = os.getenv('_GSYNCD_RESTRICTED_') - - if restricted: - allopts = {} - allopts.update(opts.__dict__) - allopts.update(rconf) - bannedtuns = set(allopts.keys()) - set(remote_tunables) - if bannedtuns: - raise GsyncdError('following tunables cannot be set with restricted SSH invocaton: ' + \ - ', '.join(bannedtuns)) - for k, v in rq_remote_tunables.items(): - if not k in allopts or allopts[k] != v: - raise GsyncdError('tunable %s is not set to value %s required for restricted SSH invocaton' % \ - (k, v)) - - confrx = getattr(confdata, 'rx', None) - def makersc(aa, check=True): - if not aa: - return ([], None, None) - ra = [resource.parse_url(u) for u in aa] - local = ra[0] - remote = None - if len(ra) > 1: - remote = ra[1] - if check and not local.can_connect_to(remote): - raise GsyncdError("%s cannot work with %s" % (local.path, remote and remote.path)) - return (ra, local, remote) - if confrx: - # peers are regexen, don't try to parse them - if confrx == 'glob': - args = [ '\A' + fnmatch.translate(a) for a in args ] - canon_peers = args - namedict = {} - else: - dc = rconf.get('url_print') - rscs, local, remote = makersc(args_orig, not dc) - if dc: - for r in rscs: - print(r.get_url(**{'normal': {}, - 'canon': {'canonical': True}, - 'canon_esc': {'canonical': True, 'escaped': True}}[dc])) - return - pa = ([], [], []) - urlprms = ({}, {'canonical': True}, {'canonical': True, 'escaped': True}) - for x in rscs: - for i in range(len(pa)): - pa[i].append(x.get_url(**urlprms[i])) - _, canon_peers, canon_esc_peers = pa - # creating the namedict, a dict representing various ways of referring to / repreenting - # peers to be fillable in config templates - mods = (lambda x: x, lambda x: x[0].upper() + x[1:], lambda x: 'e' + x[0].upper() + x[1:]) - if remote: - rmap = { local: ('local', 'master'), remote: ('remote', 'slave') } - else: - rmap = { local: ('local', 'slave') } - namedict = {} - for i in range(len(rscs)): - x = rscs[i] - for name in rmap[x]: - for j in range(3): - namedict[mods[j](name)] = pa[j][i] - namedict[name + 'vol'] = x.volume - if not 'config_file' in rconf: - rconf['config_file'] = os.path.join(os.path.dirname(sys.argv[0]), "conf/gsyncd_template.conf") - gcnf = GConffile(rconf['config_file'], canon_peers, defaults.__dict__, opts.__dict__, namedict) - - checkpoint_change = False - if confdata: - opt_ok = norm(confdata.opt) in tunables + [None] - if confdata.op == 'check': - if opt_ok: - sys.exit(0) - else: - sys.exit(1) - elif not opt_ok: - raise GsyncdError("not a valid option: " + confdata.opt) - if confdata.op == 'get': - gcnf.get(confdata.opt) - elif confdata.op == 'set': - gcnf.set(confdata.opt, confdata.val, confdata.rx) - elif confdata.op == 'del': - gcnf.delete(confdata.opt, confdata.rx) - # when modifying checkpoint, it's important to make a log - # of that, so in that case we go on to set up logging even - # if its just config invocation - if confdata.opt == 'checkpoint' and confdata.op in ('set', 'del') and \ - not confdata.rx: - checkpoint_change = True - if not checkpoint_change: - return - - gconf.__dict__.update(defaults.__dict__) - gcnf.update_to(gconf.__dict__) - gconf.__dict__.update(opts.__dict__) - gconf.configinterface = gcnf - - delete = rconf.get('delete') - if delete: - logging.info ('geo-replication delete') - # Delete pid file, status file, socket file - cleanup_paths = [] - if getattr(gconf, 'pid_file', None): - cleanup_paths.append(gconf.pid_file) - - if getattr(gconf, 'state_file', None): - cleanup_paths.append(gconf.state_file) - - if getattr(gconf, 'state_detail_file', None): - cleanup_paths.append(gconf.state_detail_file) - - if getattr(gconf, 'state_socket_unencoded', None): - cleanup_paths.append(gconf.state_socket_unencoded) - - cleanup_paths.append(rconf['config_file'][:-11] + "*"); - - # Cleanup changelog working dirs - if getattr(gconf, 'working_dir', None): - try: - shutil.rmtree(gconf.working_dir) - except (IOError, OSError): - if sys.exc_info()[1].errno == ENOENT: - pass - else: - raise GsyncdError('Error while removing working dir: %s' % gconf.working_dir) - - for path in cleanup_paths: - # To delete temp files - for f in glob.glob(path + "*"): - _unlink(f) - return - - if restricted and gconf.allow_network: - ssh_conn = os.getenv('SSH_CONNECTION') - if not ssh_conn: - #legacy env var - ssh_conn = os.getenv('SSH_CLIENT') - if ssh_conn: - allowed_networks = [ IPNetwork(a) for a in gconf.allow_network.split(',') ] - client_ip = IPAddress(ssh_conn.split()[0]) - allowed = False - for nw in allowed_networks: - if client_ip in nw: - allowed = True - break - if not allowed: - raise GsyncdError("client IP address is not allowed") - - ffd = rconf.get('feedback_fd') - if ffd: - fcntl.fcntl(ffd, fcntl.F_SETFD, fcntl.FD_CLOEXEC) - - #normalize loglevel - lvl0 = gconf.log_level - if isinstance(lvl0, str): - lvl1 = lvl0.upper() - lvl2 = logging.getLevelName(lvl1) - # I have _never_ _ever_ seen such an utterly braindead - # error condition - if lvl2 == "Level " + lvl1: - raise GsyncdError('cannot recognize log level "%s"' % lvl0) - gconf.log_level = lvl2 - - if not privileged() and gconf.log_file_mbr: - gconf.log_file = gconf.log_file_mbr - - if checkpoint_change: - try: - GLogger._gsyncd_loginit(log_file=gconf.log_file, label='conf') - if confdata.op == 'set': - logging.info('checkpoint %s set' % confdata.val) - elif confdata.op == 'del': - logging.info('checkpoint info was reset') - except IOError: - if sys.exc_info()[1].errno == ENOENT: - # directory of log path is not present, - # which happens if we get here from - # a peer-multiplexed "config-set checkpoint" - # (as that directory is created only on the - # original node) - pass - else: - raise - return - - create = rconf.get('create') - if create: - if getattr(gconf, 'state_file', None): - update_file(gconf.state_file, lambda f: f.write(create + '\n')) - return - - go_daemon = rconf['go_daemon'] - be_monitor = rconf.get('monitor') - - rscs, local, remote = makersc(args) - if not be_monitor and isinstance(remote, resource.SSH) and \ - go_daemon == 'should': - go_daemon = 'postconn' - log_file = None - else: - log_file = gconf.log_file - if be_monitor: - label = 'monitor' - elif remote: - #master - label = gconf.local_path - else: - label = 'slave' - startup(go_daemon=go_daemon, log_file=log_file, label=label) - resource.Popen.init_errhandler() - - if be_monitor: - return monitor(*rscs) - - logging.info("syncing: %s" % " -> ".join(r.url for r in rscs)) - if remote: - go_daemon = remote.connect_remote(go_daemon=go_daemon) - if go_daemon: - startup(go_daemon=go_daemon, log_file=gconf.log_file) - # complete remote connection in child - remote.connect_remote(go_daemon='done') - local.connect() - if ffd: - os.close(ffd) - local.service_loop(*[r for r in [remote] if r]) + finalize(exval=excont.exval) if __name__ == "__main__": diff --git a/geo-replication/syncdaemon/gsyncdconfig.py b/geo-replication/syncdaemon/gsyncdconfig.py new file mode 100644 index 00000000000..8848071997a --- /dev/null +++ b/geo-replication/syncdaemon/gsyncdconfig.py @@ -0,0 +1,485 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> +# This file is part of GlusterFS. +# +# This file is licensed to you under your choice of the GNU Lesser +# General Public License, version 3 or any later version (LGPLv3 or +# later), or the GNU General Public License, version 2 (GPLv2), in all +# cases as published by the Free Software Foundation. +# + +try: + from ConfigParser import RawConfigParser, NoSectionError +except ImportError: + from configparser import RawConfigParser, NoSectionError +import os +import shutil +from string import Template +from datetime import datetime +from threading import Lock + + +# Global object which can be used in other modules +# once load_config is called +_gconf = {} + + +class GconfNotConfigurable(Exception): + pass + + +class GconfInvalidValue(Exception): + pass + + +class Gconf(object): + def __init__(self, default_conf_file, custom_conf_file=None, + args={}, extra_tmpl_args={}, override_from_args=False): + self.lock = Lock() + self.default_conf_file = default_conf_file + self.custom_conf_file = custom_conf_file + self.tmp_conf_file = None + self.gconf = {} + self.gconfdata = {} + self.gconf_typecast = {} + self.template_conf = [] + self.non_configurable_configs = [] + self.prev_mtime = 0 + if custom_conf_file is not None: + self.tmp_conf_file = custom_conf_file + ".tmp" + + self.session_conf_items = [] + self.args = args + self.extra_tmpl_args = extra_tmpl_args + self.override_from_args = override_from_args + # Store default values only if overwritten, Only for JSON/CLI output + self.default_values = {} + self._load() + + def _tmpl_substitute(self): + tmpl_values = {} + for k, v in self.gconf.items(): + tmpl_values[k.replace("-", "_")] = v + + # override the config file values with the one user passed + for k, v in self.args.items(): + # override the existing value only if set by user + if v is not None: + tmpl_values[k] = v + + for k, v in self.extra_tmpl_args.items(): + tmpl_values[k] = v + + for k, v in self.gconf.items(): + if k in self.template_conf and \ + (isinstance(v, str) or isinstance(v, unicode)): + self.gconf[k] = Template(v).safe_substitute(tmpl_values) + + def _do_typecast(self): + for k, v in self.gconf.items(): + cast_func = globals().get( + "to_" + self.gconf_typecast.get(k, "string"), None) + if cast_func is not None: + self.gconf[k] = cast_func(v) + if self.default_values.get(k, None) is not None: + self.default_values[k] = cast_func(v) + + def reset(self, name): + # If custom conf file is not set then it is only read only configs + if self.custom_conf_file is None: + raise GconfNotConfigurable() + + # If a config can not be modified + if name != "all" and not self._is_configurable(name): + raise GconfNotConfigurable() + + cnf = RawConfigParser() + with open(self.custom_conf_file) as f: + cnf.readfp(f) + + # Nothing to Reset, Not configured + if name != "all": + if not cnf.has_option("vars", name): + return True + + # Remove option from custom conf file + cnf.remove_option("vars", name) + else: + # Remove and add empty section, do not disturb if config file + # already has any other section + try: + cnf.remove_section("vars") + except NoSectionError: + pass + + cnf.add_section("vars") + + with open(self.tmp_conf_file, "w") as fw: + cnf.write(fw) + + os.rename(self.tmp_conf_file, self.custom_conf_file) + + self.reload() + + return True + + def set(self, name, value): + if self.custom_conf_file is None: + raise GconfNotConfigurable() + + if not self._is_configurable(name): + raise GconfNotConfigurable() + + if not self._is_valid_value(name, value): + raise GconfInvalidValue() + + curr_val = self.gconf.get(name, None) + if curr_val == value: + return True + + cnf = RawConfigParser() + with open(self.custom_conf_file) as f: + cnf.readfp(f) + + if not cnf.has_section("vars"): + cnf.add_section("vars") + + cnf.set("vars", name, value) + with open(self.tmp_conf_file, "w") as fw: + cnf.write(fw) + + os.rename(self.tmp_conf_file, self.custom_conf_file) + + self.reload() + + return True + + def check(self, name, value=None, with_conffile=True): + if with_conffile and self.custom_conf_file is None: + raise GconfNotConfigurable() + + if not self._is_configurable(name): + raise GconfNotConfigurable() + + if value is not None and not self._is_valid_value(name, value): + raise GconfInvalidValue() + + + def _load_with_lock(self): + with self.lock: + self._load() + + def _load(self): + self.gconf = {} + self.template_conf = [] + self.gconf_typecast = {} + self.non_configurable_configs = [] + self.session_conf_items = [] + self.default_values = {} + + conf = RawConfigParser() + # Default Template config file + with open(self.default_conf_file) as f: + conf.readfp(f) + + # Custom Config file + if self.custom_conf_file is not None: + with open(self.custom_conf_file) as f: + conf.readfp(f) + + # Get version from default conf file + self.version = conf.get("__meta__", "version") + + # Populate default values + for sect in conf.sections(): + if sect in ["__meta__", "vars"]: + continue + + # Collect list of available options with help details + self.gconfdata[sect] = {} + for k, v in conf.items(sect): + self.gconfdata[sect][k] = v.strip() + + # Collect the Type cast information + if conf.has_option(sect, "type"): + self.gconf_typecast[sect] = conf.get(sect, "type") + + # Prepare list of configurable conf + if conf.has_option(sect, "configurable"): + if conf.get(sect, "configurable").lower() == "false": + self.non_configurable_configs.append(sect) + + # if it is a template conf value which needs to be substituted + if conf.has_option(sect, "template"): + if conf.get(sect, "template").lower().strip() == "true": + self.template_conf.append(sect) + + # Set default values + if conf.has_option(sect, "value"): + self.gconf[sect] = conf.get(sect, "value").strip() + + # Load the custom conf elements and overwrite + if conf.has_section("vars"): + for k, v in conf.items("vars"): + self.session_conf_items.append(k) + self.default_values[k] = self.gconf.get(k, "") + self.gconf[k] = v.strip() + + # Overwrite the Slave configurations which are sent as + # arguments to gsyncd slave + if self.override_from_args: + for k, v in self.args.items(): + k = k.replace("_", "-") + if k.startswith("slave-") and k in self.gconf: + self.gconf[k] = v + + self._tmpl_substitute() + self._do_typecast() + + def reload(self, with_lock=True): + if self._is_config_changed(): + if with_lock: + self._load_with_lock() + else: + self._load() + + def get(self, name, default_value=None, with_lock=True): + if with_lock: + with self.lock: + return self.gconf.get(name, default_value) + else: + return self.gconf.get(name, default_value) + + def getall(self, show_defaults=False, show_non_configurable=False): + cnf = {} + if not show_defaults: + for k in self.session_conf_items: + if k not in self.non_configurable_configs: + dv = self.default_values.get(k, "") + cnf[k] = { + "value": self.get(k), + "default": dv, + "configurable": True, + "modified": False if dv == "" else True + } + return cnf + + # Show all configs including defaults + for k, v in self.gconf.items(): + configurable = False if k in self.non_configurable_configs \ + else True + dv = self.default_values.get(k, "") + modified = False if dv == "" else True + if show_non_configurable: + cnf[k] = { + "value": v, + "default": dv, + "configurable": configurable, + "modified": modified + } + else: + if k not in self.non_configurable_configs: + cnf[k] = { + "value": v, + "default": dv, + "configurable": configurable, + "modified": modified + } + + return cnf + + def getr(self, name, default_value=None): + with self.lock: + self.reload(with_lock=False) + return self.get(name, default_value, with_lock=False) + + def get_help(self, name=None): + pass + + def _is_configurable(self, name): + item = self.gconfdata.get(name, None) + if item is None: + return False + + return item.get("configurable", True) + + def _is_valid_value(self, name, value): + item = self.gconfdata.get(name, None) + if item is None: + return False + + # If validation func not defined + if item.get("validation", None) is None: + return True + + # minmax validation + if item["validation"] == "minmax": + return validate_minmax(value, item["min"], item["max"]) + + if item["validation"] == "choice": + return validate_choice(value, item["allowed_values"]) + + if item["validation"] == "bool": + return validate_bool(value) + + if item["validation"] == "execpath": + return validate_execpath(value) + + if item["validation"] == "unixtime": + return validate_unixtime(value) + + if item["validation"] == "int": + return validate_int(value) + + return False + + def _is_config_changed(self): + if self.custom_conf_file is not None and \ + os.path.exists(self.custom_conf_file): + st = os.lstat(self.custom_conf_file) + if st.st_mtime > self.prev_mtime: + self.prev_mtime = st.st_mtime + return True + + return False + +def is_config_file_old(config_file, mastervol, slavevol): + cnf = RawConfigParser() + cnf.read(config_file) + session_section = "peers %s %s" % (mastervol, slavevol) + try: + return dict(cnf.items(session_section)) + except NoSectionError: + return None + +def config_upgrade(config_file, ret): + config_file_backup = os.path.join(os.path.dirname(config_file), "gsyncd.conf.bkp") + + #copy old config file in a backup file + shutil.copyfile(config_file, config_file_backup) + + #write a new config file + config = RawConfigParser() + config.add_section('vars') + + for key, value in ret.items(): + #handle option name changes + if key == "use_tarssh": + new_key = "sync-method" + if value == "true": + new_value = "tarssh" + else: + new_value = "rsync" + config.set('vars', new_key, new_value) + elif key == "timeout": + new_key = "slave-timeout" + config.set('vars', new_key, value) + #for changes like: ignore_deletes to ignore-deletes + else: + new_key = key.replace("_", "-") + config.set('vars', new_key, value) + + with open(config_file, 'w') as configfile: + config.write(configfile) + + +def validate_int(value): + try: + _ = int(value) + return True + except ValueError: + return False + + +def validate_unixtime(value): + try: + y = datetime.fromtimestamp(int(value)).strftime("%Y") + if y == "1970": + return False + + return True + except ValueError: + return False + + +def validate_minmax(value, minval, maxval): + try: + value = int(value) + minval = int(minval) + maxval = int(maxval) + return value >= minval and value <= maxval + except ValueError: + return False + + +def validate_choice(value, allowed_values): + allowed_values = allowed_values.split(",") + allowed_values = [v.strip() for v in allowed_values] + + return value in allowed_values + + +def validate_bool(value): + return value in ["true", "false"] + + +def validate_execpath(value): + return os.path.isfile(value) and os.access(value, os.X_OK) + + +def validate_filepath(value): + return os.path.isfile(value) + + +def validate_path(value): + return os.path.exists(value) + + +def to_int(value): + return int(value) + + +def to_float(value): + return float(value) + + +def to_bool(value): + if isinstance(value, bool): + return value + return True if value in ["true", "True"] else False + + +def get(name, default_value=None): + return _gconf.get(name, default_value) + + +def getall(show_defaults=False, show_non_configurable=False): + return _gconf.getall(show_defaults=show_defaults, + show_non_configurable=show_non_configurable) + + +def getr(name, default_value=None): + return _gconf.getr(name, default_value) + + +def load(default_conf, custom_conf=None, args={}, extra_tmpl_args={}, + override_from_args=False): + global _gconf + _gconf = Gconf(default_conf, custom_conf, args, extra_tmpl_args, + override_from_args) + + +def setconfig(name, value): + global _gconf + _gconf.set(name, value) + + +def resetconfig(name): + global _gconf + _gconf.reset(name) + + +def check(name, value=None, with_conffile=True): + global _gconf + _gconf.check(name, value=value, with_conffile=with_conffile) diff --git a/geo-replication/syncdaemon/gsyncdstatus.py b/geo-replication/syncdaemon/gsyncdstatus.py new file mode 100644 index 00000000000..1a655ff8887 --- /dev/null +++ b/geo-replication/syncdaemon/gsyncdstatus.py @@ -0,0 +1,419 @@ +#!/usr/bin/python3 +# +# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com> +# This file is part of GlusterFS. + +# This file is licensed to you under your choice of the GNU Lesser +# General Public License, version 3 or any later version (LGPLv3 or +# later), or the GNU General Public License, version 2 (GPLv2), in all +# cases as published by the Free Software Foundation. +# + +from __future__ import print_function +import fcntl +import os +import tempfile +try: + import urllib.parse as urllib +except ImportError: + import urllib +import json +import time +from datetime import datetime +from errno import EACCES, EAGAIN, ENOENT +import logging + +from syncdutils import (EVENT_GEOREP_ACTIVE, EVENT_GEOREP_PASSIVE, gf_event, + EVENT_GEOREP_CHECKPOINT_COMPLETED, lf) + +DEFAULT_STATUS = "N/A" +MONITOR_STATUS = ("Created", "Started", "Paused", "Stopped") +STATUS_VALUES = (DEFAULT_STATUS, + "Initializing...", + "Active", + "Passive", + "Faulty") + +CRAWL_STATUS_VALUES = (DEFAULT_STATUS, + "Hybrid Crawl", + "History Crawl", + "Changelog Crawl") + + +def human_time(ts): + try: + return datetime.fromtimestamp(float(ts)).strftime("%Y-%m-%d %H:%M:%S") + except ValueError: + return DEFAULT_STATUS + + +def human_time_utc(ts): + try: + return datetime.utcfromtimestamp( + float(ts)).strftime("%Y-%m-%d %H:%M:%S") + except ValueError: + return DEFAULT_STATUS + + +def get_default_values(): + return { + "slave_node": DEFAULT_STATUS, + "worker_status": DEFAULT_STATUS, + "last_synced": 0, + "last_synced_entry": 0, + "crawl_status": DEFAULT_STATUS, + "entry": 0, + "data": 0, + "meta": 0, + "failures": 0, + "checkpoint_completed": DEFAULT_STATUS, + "checkpoint_time": 0, + "checkpoint_completion_time": 0} + + +class LockedOpen(object): + + def __init__(self, filename, *args, **kwargs): + self.filename = filename + self.open_args = args + self.open_kwargs = kwargs + self.fileobj = None + + def __enter__(self): + """ + If two processes compete to update a file, The first process + gets the lock and the second process is blocked in the fcntl.flock() + call. When first process replaces the file and releases the lock, + the already open file descriptor in the second process now points + to a "ghost" file(not reachable by any path name) with old contents. + To avoid that conflict, check the fd already opened is same or + not. Open new one if not same + """ + f = open(self.filename, *self.open_args, **self.open_kwargs) + while True: + fcntl.flock(f, fcntl.LOCK_EX) + fnew = open(self.filename, *self.open_args, **self.open_kwargs) + if os.path.sameopenfile(f.fileno(), fnew.fileno()): + fnew.close() + break + else: + f.close() + f = fnew + self.fileobj = f + return f + + def __exit__(self, _exc_type, _exc_value, _traceback): + fcntl.flock(self.fileobj, fcntl.LOCK_UN) + self.fileobj.close() + + +def set_monitor_status(status_file, status): + fd = os.open(status_file, os.O_CREAT | os.O_RDWR) + os.close(fd) + with LockedOpen(status_file, 'r+'): + with tempfile.NamedTemporaryFile('w', dir=os.path.dirname(status_file), + delete=False) as tf: + tf.write(status) + tempname = tf.name + + os.rename(tempname, status_file) + dirfd = os.open(os.path.dirname(os.path.abspath(status_file)), + os.O_DIRECTORY) + os.fsync(dirfd) + os.close(dirfd) + + +class GeorepStatus(object): + def __init__(self, monitor_status_file, master_node, brick, master_node_id, + master, slave, monitor_pid_file=None): + self.master = master + slv_data = slave.split("::") + self.slave_host = slv_data[0] + self.slave_volume = slv_data[1].split(":")[0] # Remove Slave UUID + self.work_dir = os.path.dirname(monitor_status_file) + self.monitor_status_file = monitor_status_file + self.filename = os.path.join(self.work_dir, + "brick_%s.status" + % urllib.quote_plus(brick)) + + fd = os.open(self.filename, os.O_CREAT | os.O_RDWR) + os.close(fd) + fd = os.open(self.monitor_status_file, os.O_CREAT | os.O_RDWR) + os.close(fd) + self.master_node = master_node + self.master_node_id = master_node_id + self.brick = brick + self.default_values = get_default_values() + self.monitor_pid_file = monitor_pid_file + + def send_event(self, event_type, **kwargs): + gf_event(event_type, + master_volume=self.master, + master_node=self.master_node, + master_node_id=self.master_node_id, + slave_host=self.slave_host, + slave_volume=self.slave_volume, + brick_path=self.brick, + **kwargs) + + def _update(self, mergerfunc): + data = self.default_values + with LockedOpen(self.filename, 'r+') as f: + try: + data.update(json.load(f)) + except ValueError: + pass + + data = mergerfunc(data) + # If Data is not changed by merger func + if not data: + return False + + with tempfile.NamedTemporaryFile( + 'w', + dir=os.path.dirname(self.filename), + delete=False) as tf: + tf.write(data) + tempname = tf.name + + os.rename(tempname, self.filename) + dirfd = os.open(os.path.dirname(os.path.abspath(self.filename)), + os.O_DIRECTORY) + os.fsync(dirfd) + os.close(dirfd) + return True + + def reset_on_worker_start(self): + def merger(data): + data["slave_node"] = DEFAULT_STATUS + data["crawl_status"] = DEFAULT_STATUS + data["entry"] = 0 + data["data"] = 0 + data["meta"] = 0 + return json.dumps(data) + + self._update(merger) + + def set_field(self, key, value): + def merger(data): + # Current data and prev data is same + if data[key] == value: + return {} + + data[key] = value + return json.dumps(data) + + return self._update(merger) + + def trigger_gf_event_checkpoint_completion(self, checkpoint_time, + checkpoint_completion_time): + self.send_event(EVENT_GEOREP_CHECKPOINT_COMPLETED, + checkpoint_time=checkpoint_time, + checkpoint_completion_time=checkpoint_completion_time) + + def set_last_synced(self, value, checkpoint_time): + def merger(data): + data["last_synced"] = value[0] + + # If checkpoint is not set or reset + # or if last set checkpoint is changed + if checkpoint_time == 0 or \ + checkpoint_time != data["checkpoint_time"]: + data["checkpoint_time"] = 0 + data["checkpoint_completion_time"] = 0 + data["checkpoint_completed"] = "No" + + # If checkpoint is completed and not marked as completed + # previously then update the checkpoint completed time + if checkpoint_time > 0 and checkpoint_time <= value[0]: + if data["checkpoint_completed"] == "No": + curr_time = int(time.time()) + data["checkpoint_time"] = checkpoint_time + data["checkpoint_completion_time"] = curr_time + data["checkpoint_completed"] = "Yes" + logging.info(lf("Checkpoint completed", + checkpoint_time=human_time_utc( + checkpoint_time), + completion_time=human_time_utc(curr_time))) + self.trigger_gf_event_checkpoint_completion( + checkpoint_time, curr_time) + + return json.dumps(data) + + self._update(merger) + + def set_worker_status(self, status): + if self.set_field("worker_status", status): + logging.info(lf("Worker Status Change", + status=status)) + + def set_worker_crawl_status(self, status): + if self.set_field("crawl_status", status): + logging.info(lf("Crawl Status Change", + status=status)) + + def set_slave_node(self, slave_node): + def merger(data): + data["slave_node"] = slave_node + return json.dumps(data) + + self._update(merger) + + def inc_value(self, key, value): + def merger(data): + data[key] = data.get(key, 0) + value + return json.dumps(data) + + self._update(merger) + + def dec_value(self, key, value): + def merger(data): + data[key] = data.get(key, 0) - value + if data[key] < 0: + data[key] = 0 + return json.dumps(data) + + self._update(merger) + + def set_active(self): + if self.set_field("worker_status", "Active"): + logging.info(lf("Worker Status Change", + status="Active")) + self.send_event(EVENT_GEOREP_ACTIVE) + + def set_passive(self): + if self.set_field("worker_status", "Passive"): + logging.info(lf("Worker Status Change", + status="Passive")) + self.send_event(EVENT_GEOREP_PASSIVE) + + def get_monitor_status(self): + data = "" + with open(self.monitor_status_file, "r") as f: + data = f.read().strip() + return data + + def get_status(self, checkpoint_time=0): + """ + Monitor Status ---> Created Started Paused Stopped + ---------------------------------------------------------------------- + slave_node N/A VALUE VALUE N/A + status Created VALUE Paused Stopped + last_synced N/A VALUE VALUE VALUE + last_synced_entry N/A VALUE VALUE VALUE + crawl_status N/A VALUE N/A N/A + entry N/A VALUE N/A N/A + data N/A VALUE N/A N/A + meta N/A VALUE N/A N/A + failures N/A VALUE VALUE VALUE + checkpoint_completed N/A VALUE VALUE VALUE + checkpoint_time N/A VALUE VALUE VALUE + checkpoint_completed_time N/A VALUE VALUE VALUE + """ + data = self.default_values + with open(self.filename) as f: + try: + data.update(json.load(f)) + except ValueError: + pass + monitor_status = self.get_monitor_status() + + # Verifying whether monitor process running and adjusting status + if monitor_status in ["Started", "Paused"]: + try: + with open(self.monitor_pid_file, "r+") as f: + fcntl.lockf(f, fcntl.LOCK_EX | fcntl.LOCK_NB) + monitor_status = "Stopped" + except (IOError, OSError) as e: + # If pid file not exists, either monitor died or Geo-rep + # not even started once + if e.errno == ENOENT: + monitor_status = "Stopped" + elif e.errno in (EACCES, EAGAIN): + # cannot grab. so, monitor process still running..move on + pass + else: + raise + + if monitor_status in ["Created", "Paused", "Stopped"]: + data["worker_status"] = monitor_status + + if monitor_status == "": + data["worker_status"] = "Stopped" + + # Checkpoint adjustments + if checkpoint_time == 0: + data["checkpoint_completed"] = DEFAULT_STATUS + data["checkpoint_time"] = DEFAULT_STATUS + data["checkpoint_completion_time"] = DEFAULT_STATUS + else: + if checkpoint_time != data["checkpoint_time"]: + if checkpoint_time <= data["last_synced"]: + data["checkpoint_completed"] = "Yes" + data["checkpoint_time"] = checkpoint_time + data["checkpoint_completion_time"] = data["last_synced"] + else: + data["checkpoint_completed"] = "No" + data["checkpoint_time"] = checkpoint_time + data["checkpoint_completion_time"] = DEFAULT_STATUS + + if data["checkpoint_time"] not in [0, DEFAULT_STATUS]: + chkpt_time = data["checkpoint_time"] + data["checkpoint_time"] = human_time(chkpt_time) + data["checkpoint_time_utc"] = human_time_utc(chkpt_time) + + if data["checkpoint_completion_time"] not in [0, DEFAULT_STATUS]: + chkpt_completion_time = data["checkpoint_completion_time"] + data["checkpoint_completion_time"] = human_time( + chkpt_completion_time) + data["checkpoint_completion_time_utc"] = human_time_utc( + chkpt_completion_time) + + if data["last_synced"] == 0: + data["last_synced"] = DEFAULT_STATUS + data["last_synced_utc"] = DEFAULT_STATUS + else: + last_synced = data["last_synced"] + data["last_synced"] = human_time(last_synced) + data["last_synced_utc"] = human_time_utc(last_synced) + + if data["worker_status"] != "Active": + data["last_synced"] = DEFAULT_STATUS + data["last_synced_utc"] = DEFAULT_STATUS + data["crawl_status"] = DEFAULT_STATUS + data["entry"] = DEFAULT_STATUS + data["data"] = DEFAULT_STATUS + data["meta"] = DEFAULT_STATUS + data["failures"] = DEFAULT_STATUS + data["checkpoint_completed"] = DEFAULT_STATUS + data["checkpoint_time"] = DEFAULT_STATUS + data["checkpoint_completed_time"] = DEFAULT_STATUS + data["checkpoint_time_utc"] = DEFAULT_STATUS + data["checkpoint_completion_time_utc"] = DEFAULT_STATUS + + if data["worker_status"] not in ["Active", "Passive"]: + data["slave_node"] = DEFAULT_STATUS + + if data.get("last_synced_utc", 0) == 0: + data["last_synced_utc"] = DEFAULT_STATUS + + if data.get("checkpoint_completion_time_utc", 0) == 0: + data["checkpoint_completion_time_utc"] = DEFAULT_STATUS + + if data.get("checkpoint_time_utc", 0) == 0: + data["checkpoint_time_utc"] = DEFAULT_STATUS + + return data + + def print_status(self, checkpoint_time=0, json_output=False): + status_out = self.get_status(checkpoint_time) + if json_output: + out = {} + # Convert all values as string + for k, v in status_out.items(): + out[k] = str(v) + print(json.dumps(out)) + return + + for key, value in status_out.items(): + print(("%s: %s" % (key, value))) diff --git a/geo-replication/syncdaemon/libcxattr.py b/geo-replication/syncdaemon/libcxattr.py index b5b6956aea6..e6406c36bd7 100644 --- a/geo-replication/syncdaemon/libcxattr.py +++ b/geo-replication/syncdaemon/libcxattr.py @@ -1,9 +1,22 @@ +# +# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com> +# This file is part of GlusterFS. + +# This file is licensed to you under your choice of the GNU Lesser +# General Public License, version 3 or any later version (LGPLv3 or +# later), or the GNU General Public License, version 2 (GPLv2), in all +# cases as published by the Free Software Foundation. +# + import os -from ctypes import * -from ctypes.util import find_library +from ctypes import CDLL, get_errno +from py2py3 import (bytearray_to_str, gr_create_string_buffer, + gr_query_xattr, gr_lsetxattr, gr_lremovexattr) + class Xattr(object): - """singleton that wraps the extended attribues system + + """singleton that wraps the extended attributes system interface for python using ctypes Just implement it to the degree we need it, in particular @@ -13,11 +26,11 @@ class Xattr(object): sizes we expect """ - libc = CDLL(find_library("libc")) + libc = CDLL("libc.so.6", use_errno=True) @classmethod def geterrno(cls): - return c_int.in_dll(cls.libc, 'errno').value + return get_errno() @classmethod def raise_oserr(cls): @@ -27,20 +40,23 @@ class Xattr(object): @classmethod def _query_xattr(cls, path, siz, syscall, *a): if siz: - buf = create_string_buffer('\0' * siz) + buf = gr_create_string_buffer(siz) else: buf = None ret = getattr(cls.libc, syscall)(*((path,) + a + (buf, siz))) if ret == -1: cls.raise_oserr() if siz: - return buf.raw[:ret] + # py2 and py3 compatibility. Convert bytes array + # to string + result = bytearray_to_str(buf.raw) + return result[:ret] else: return ret @classmethod def lgetxattr(cls, path, attr, siz=0): - return cls._query_xattr( path, siz, 'lgetxattr', attr) + return gr_query_xattr(cls, path, siz, 'lgetxattr', attr) @classmethod def lgetxattr_buf(cls, path, attr): @@ -54,34 +70,43 @@ class Xattr(object): @classmethod def llistxattr(cls, path, siz=0): - ret = cls._query_xattr(path, siz, 'llistxattr') + ret = gr_query_xattr(cls, path, siz, 'llistxattr') if isinstance(ret, str): - ret = ret.split('\0') + ret = ret.strip('\0') + ret = ret.split('\0') if ret else [] return ret @classmethod def lsetxattr(cls, path, attr, val): - ret = cls.libc.lsetxattr(path, attr, val, len(val), 0) + ret = gr_lsetxattr(cls, path, attr, val) if ret == -1: cls.raise_oserr() @classmethod - def lsetxattr_l(cls, path, attr, val): - """ lazy lsetxattr(): caller handles errno """ - cls.libc.lsetxattr(path, attr, val, len(val), 0) - - @classmethod def lremovexattr(cls, path, attr): - ret = cls.libc.lremovexattr(path, attr) + ret = gr_lremovexattr(cls, path, attr) if ret == -1: cls.raise_oserr() @classmethod def llistxattr_buf(cls, path): """listxattr variant with size discovery""" - size = cls.llistxattr(path) - if size == -1: - cls.raise_oserr() - if size == 0: - return [] - return cls.llistxattr(path, size) + try: + # Assuming no more than 100 xattrs in a file/directory and + # each xattr key length will be less than 256 bytes + # llistxattr will be called with bigger size so that + # listxattr will not fail with ERANGE. OSError will be + # raised if fails even with the large size specified. + size = 256 * 100 + return cls.llistxattr(path, size) + except OSError: + # If fixed length failed for getting list of xattrs then + # use the llistxattr call to get the size and use that + # size to get the list of xattrs. + size = cls.llistxattr(path) + if size == -1: + cls.raise_oserr() + if size == 0: + return [] + + return cls.llistxattr(path, size) diff --git a/geo-replication/syncdaemon/libgfchangelog.py b/geo-replication/syncdaemon/libgfchangelog.py index 68ec3baf144..a3bda7282c0 100644 --- a/geo-replication/syncdaemon/libgfchangelog.py +++ b/geo-replication/syncdaemon/libgfchangelog.py @@ -1,64 +1,143 @@ +# +# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com> +# This file is part of GlusterFS. + +# This file is licensed to you under your choice of the GNU Lesser +# General Public License, version 3 or any later version (LGPLv3 or +# later), or the GNU General Public License, version 2 (GPLv2), in all +# cases as published by the Free Software Foundation. +# + import os -from ctypes import * +from ctypes import CDLL, RTLD_GLOBAL, get_errno, byref, c_ulong from ctypes.util import find_library +from syncdutils import ChangelogException, ChangelogHistoryNotAvailable +from py2py3 import (gr_cl_history_changelog, gr_cl_done, + gr_create_string_buffer, gr_cl_register, + gr_cl_history_done, bytearray_to_str) + + +libgfc = CDLL( + find_library("gfchangelog"), + mode=RTLD_GLOBAL, + use_errno=True +) + + +def _raise_changelog_err(): + errn = get_errno() + raise ChangelogException(errn, os.strerror(errn)) + + +def _init(): + if libgfc.gf_changelog_init(None) == -1: + _raise_changelog_err() + + +def register(brick, path, log_file, log_level, retries=0): + _init() + + ret = gr_cl_register(libgfc, brick, path, log_file, log_level, retries) + + if ret == -1: + _raise_changelog_err() + + +def scan(): + ret = libgfc.gf_changelog_scan() + if ret == -1: + _raise_changelog_err() + + +def startfresh(): + ret = libgfc.gf_changelog_start_fresh() + if ret == -1: + _raise_changelog_err() + + +def getchanges(): + def clsort(cfile): + return cfile.split('.')[-1] + + changes = [] + buf = gr_create_string_buffer(4096) + call = libgfc.gf_changelog_next_change + + while True: + ret = call(buf, 4096) + if ret in (0, -1): + break + + # py2 and py3 compatibility + result = bytearray_to_str(buf.raw[:ret - 1]) + changes.append(result) + + if ret == -1: + _raise_changelog_err() + + # cleanup tracker + startfresh() + + return sorted(changes, key=clsort) + + +def done(clfile): + ret = gr_cl_done(libgfc, clfile) + if ret == -1: + _raise_changelog_err() + + +def history_scan(): + ret = libgfc.gf_history_changelog_scan() + if ret == -1: + _raise_changelog_err() + + return ret + + +def history_changelog(changelog_path, start, end, num_parallel): + actual_end = c_ulong() + ret = gr_cl_history_changelog(libgfc, changelog_path, start, end, + num_parallel, byref(actual_end)) + if ret == -1: + _raise_changelog_err() + + if ret == -2: + raise ChangelogHistoryNotAvailable() + + return (ret, actual_end.value) + + +def history_startfresh(): + ret = libgfc.gf_history_changelog_start_fresh() + if ret == -1: + _raise_changelog_err() + + +def history_getchanges(): + def clsort(cfile): + return cfile.split('.')[-1] + + changes = [] + buf = gr_create_string_buffer(4096) + call = libgfc.gf_history_changelog_next_change + + while True: + ret = call(buf, 4096) + if ret in (0, -1): + break + + # py2 and py3 compatibility + result = bytearray_to_str(buf.raw[:ret - 1]) + changes.append(result) + + if ret == -1: + _raise_changelog_err() + + return sorted(changes, key=clsort) + -class Changes(object): - libgfc = CDLL(find_library("gfchangelog"), use_errno=True) - - @classmethod - def geterrno(cls): - return get_errno() - - @classmethod - def raise_oserr(cls): - errn = cls.geterrno() - raise OSError(errn, os.strerror(errn)) - - @classmethod - def _get_api(cls, call): - return getattr(cls.libgfc, call) - - @classmethod - def cl_register(cls, brick, path, log_file, log_level, retries = 0): - ret = cls._get_api('gf_changelog_register')(brick, path, - log_file, log_level, retries) - if ret == -1: - cls.raise_oserr() - - @classmethod - def cl_scan(cls): - ret = cls._get_api('gf_changelog_scan')() - if ret == -1: - cls.raise_oserr() - - @classmethod - def cl_startfresh(cls): - ret = cls._get_api('gf_changelog_start_fresh')() - if ret == -1: - cls.raise_oserr() - - @classmethod - def cl_getchanges(cls): - """ remove hardcoding for path name length """ - def clsort(f): - return f.split('.')[-1] - changes = [] - buf = create_string_buffer('\0', 4096) - call = cls._get_api('gf_changelog_next_change') - - while True: - ret = call(buf, 4096) - if ret in (0, -1): - break; - changes.append(buf.raw[:ret-1]) - if ret == -1: - cls.raise_oserr() - # cleanup tracker - cls.cl_startfresh() - return sorted(changes, key=clsort) - - @classmethod - def cl_done(cls, clfile): - ret = cls._get_api('gf_changelog_done')(clfile) - if ret == -1: - cls.raise_oserr() +def history_done(clfile): + ret = gr_cl_history_done(libgfc, clfile) + if ret == -1: + _raise_changelog_err() diff --git a/geo-replication/syncdaemon/logutils.py b/geo-replication/syncdaemon/logutils.py new file mode 100644 index 00000000000..01ae7852f23 --- /dev/null +++ b/geo-replication/syncdaemon/logutils.py @@ -0,0 +1,77 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> +# This file is part of GlusterFS. +# +# This file is licensed to you under your choice of the GNU Lesser +# General Public License, version 3 or any later version (LGPLv3 or +# later), or the GNU General Public License, version 2 (GPLv2), in all +# cases as published by the Free Software Foundation. +# + +import logging +from logging import Logger, handlers +import sys +import time + + +class GLogger(Logger): + + """Logger customizations for gsyncd. + + It implements a log format similar to that of glusterfs. + """ + + def makeRecord(self, name, level, *a): + rv = Logger.makeRecord(self, name, level, *a) + rv.nsecs = (rv.created - int(rv.created)) * 1000000 + fr = sys._getframe(4) + callee = fr.f_locals.get('self') + if callee: + ctx = str(type(callee)).split("'")[1].split('.')[-1] + else: + ctx = '<top>' + if not hasattr(rv, 'funcName'): + rv.funcName = fr.f_code.co_name + rv.lvlnam = logging.getLevelName(level)[0] + rv.ctx = ctx + return rv + + +LOGFMT = ("[%(asctime)s.%(nsecs)d] %(lvlnam)s [%(module)s{0}" + ":%(lineno)s:%(funcName)s] %(ctx)s: %(message)s") + + +def setup_logging(level="INFO", label="", log_file=""): + if label: + label = "(" + label + ")" + + filename = None + stream = None + if log_file: + if log_file in ('-', '/dev/stderr'): + stream = sys.stderr + elif log_file == '/dev/stdout': + stream = sys.stdout + else: + filename = log_file + + datefmt = "%Y-%m-%d %H:%M:%S" + fmt = LOGFMT.format(label) + logging.root = GLogger("root", level) + logging.setLoggerClass(GLogger) + logging.Formatter.converter = time.gmtime # Log in GMT/UTC time + logging.getLogger().handlers = [] + logging.getLogger().setLevel(level) + + if filename is not None: + logging_handler = handlers.WatchedFileHandler(filename) + formatter = logging.Formatter(fmt=fmt, + datefmt=datefmt) + logging_handler.setFormatter(formatter) + logging.getLogger().addHandler(logging_handler) + else: + logging.basicConfig(stream=stream, + format=fmt, + datefmt=datefmt, + level=level) diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py index 58df14954bb..9501aeae6b5 100644 --- a/geo-replication/syncdaemon/master.py +++ b/geo-replication/syncdaemon/master.py @@ -1,50 +1,88 @@ +# +# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com> +# This file is part of GlusterFS. + +# This file is licensed to you under your choice of the GNU Lesser +# General Public License, version 3 or any later version (LGPLv3 or +# later), or the GNU General Public License, version 2 (GPLv2), in all +# cases as published by the Free Software Foundation. +# + import os import sys import time import stat -import random -import signal -import json import logging -import socket +import fcntl import string import errno -from shutil import copyfileobj -from errno import ENOENT, ENODATA, EPIPE, EEXIST -from threading import currentThread, Condition, Lock +import tarfile +from errno import ENOENT, ENODATA, EEXIST, EACCES, EAGAIN, ESTALE, EINTR +from threading import Condition, Lock from datetime import datetime -from gconf import gconf -from tempfile import mkdtemp, NamedTemporaryFile -from syncdutils import FreeObject, Thread, GsyncdError, boolify, escape, \ - unescape, select, gauxpfx, md5hex, selfkill, entry2pb +import gsyncdconfig as gconf +import libgfchangelog +from rconf import rconf +from syncdutils import (Thread, GsyncdError, escape_space_newline, + unescape_space_newline, gauxpfx, escape, + lstat, errno_wrap, FreeObject, lf, matching_disk_gfid, + NoStimeAvailable, PartialHistoryAvailable, + host_brick_split) URXTIME = (-1, 0) +# Default rollover time set in changelog translator +# changelog rollover time is hardcoded here to avoid the +# xsync usage when crawling switch happens from history +# to changelog. If rollover time increased in translator +# then geo-rep can enter into xsync crawl after history +# crawl before starting live changelog crawl. +CHANGELOG_ROLLOVER_TIME = 15 + # Utility functions to help us to get to closer proximity # of the DRY principle (no, don't look for elevated or # perspectivistic things here) + def _xtime_now(): t = time.time() sec = int(t) nsec = int((t - sec) * 1000000) return (sec, nsec) + def _volinfo_hook_relax_foreign(self): volinfo_sys = self.get_sys_volinfo() fgn_vi = volinfo_sys[self.KFGN] if fgn_vi: expiry = fgn_vi['timeout'] - int(time.time()) + 1 - logging.info('foreign volume info found, waiting %d sec for expiry' % \ - expiry) + logging.info(lf('foreign volume info found, waiting for expiry', + expiry=expiry)) time.sleep(expiry) volinfo_sys = self.get_sys_volinfo() - self.volinfo_state, state_change = self.volinfo_state_machine(self.volinfo_state, - volinfo_sys) - if self.inter_master: - raise GsyncdError("cannot be intermediate master in special mode") - return (volinfo_sys, state_change) + return volinfo_sys + + +def edct(op, **ed): + dct = {} + dct['op'] = op + # This is used in automatic gfid conflict resolution. + # When marked True, it's skipped during re-processing. + dct['skip_entry'] = False + for k in ed: + if k == 'stat': + st = ed[k] + dst = dct['stat'] = {} + if st: + dst['uid'] = st.st_uid + dst['gid'] = st.st_gid + dst['mode'] = st.st_mode + dst['atime'] = st.st_atime + dst['mtime'] = st.st_mtime + else: + dct[k] = ed[k] + return dct # The API! @@ -53,17 +91,41 @@ def gmaster_builder(excrawl=None): """produce the GMaster class variant corresponding to sync mode""" this = sys.modules[__name__] - modemixin = gconf.special_sync_mode + modemixin = gconf.get("special-sync-mode") if not modemixin: modemixin = 'normal' - changemixin = isinstance(excrawl, str) and excrawl or gconf.change_detector - logging.info('setting up %s change detection mode' % changemixin) + + if gconf.get("change-detector") == 'xsync': + changemixin = 'xsync' + elif excrawl: + changemixin = excrawl + else: + changemixin = gconf.get("change-detector") + + logging.debug(lf('setting up change detection mode', + mode=changemixin)) modemixin = getattr(this, modemixin.capitalize() + 'Mixin') crawlmixin = getattr(this, 'GMaster' + changemixin.capitalize() + 'Mixin') - sendmarkmixin = boolify(gconf.use_rsync_xattrs) and SendmarkRsyncMixin or SendmarkNormalMixin - purgemixin = boolify(gconf.ignore_deletes) and PurgeNoopMixin or PurgeNormalMixin - class _GMaster(crawlmixin, modemixin, sendmarkmixin, purgemixin): + + if gconf.get("use-rsync-xattrs"): + sendmarkmixin = SendmarkRsyncMixin + else: + sendmarkmixin = SendmarkNormalMixin + + if gconf.get("ignore-deletes"): + purgemixin = PurgeNoopMixin + else: + purgemixin = PurgeNormalMixin + + if gconf.get("sync-method") == "tarssh": + syncengine = TarSSHEngine + else: + syncengine = RsyncEngine + + class _GMaster(crawlmixin, modemixin, sendmarkmixin, + purgemixin, syncengine): pass + return _GMaster @@ -72,6 +134,7 @@ def gmaster_builder(excrawl=None): # sync modes class NormalMixin(object): + """normal geo-rep behavior""" minus_infinity = URXTIME @@ -99,21 +162,30 @@ class NormalMixin(object): return xt0 >= xt1 def make_xtime_opts(self, is_master, opts): - if not 'create' in opts: + if 'create' not in opts: opts['create'] = is_master - if not 'default_xtime' in opts: + if 'default_xtime' not in opts: opts['default_xtime'] = URXTIME - def xtime_low(self, server, path, **opts): - xt = server.xtime(path, self.uuid) + def xtime_low(self, rsc, path, **opts): + if rsc == self.master: + xt = rsc.server.xtime(path, self.uuid) + else: + xt = rsc.server.stime(path, self.uuid) + if isinstance(xt, int) and xt == ENODATA: + xt = rsc.server.xtime(path, self.uuid) + if not isinstance(xt, int): + self.slave.server.set_stime(path, self.uuid, xt) if isinstance(xt, int) and xt != ENODATA: return xt if xt == ENODATA or xt < self.volmark: if opts['create']: xt = _xtime_now() - server.aggregated.set_xtime(path, self.uuid, xt) + rsc.server.aggregated.set_xtime(path, self.uuid, xt) else: - xt = opts['default_xtime'] + zero_zero = (0, 0) + if xt != zero_zero: + xt = opts['default_xtime'] return xt def keepalive_payload_hook(self, timo, gap): @@ -125,7 +197,7 @@ class NormalMixin(object): vi = vi.copy() vi['timeout'] = int(time.time()) + timo else: - # send keep-alives more frequently to + # send keep-alive more frequently to # avoid a delay in announcing our volume info # to slave if it becomes established in the # meantime @@ -133,10 +205,7 @@ class NormalMixin(object): return (vi, gap) def volinfo_hook(self): - volinfo_sys = self.get_sys_volinfo() - self.volinfo_state, state_change = self.volinfo_state_machine(self.volinfo_state, - volinfo_sys) - return (volinfo_sys, state_change) + return self.get_sys_volinfo() def xtime_reversion_hook(self, path, xtl, xtr): if xtr > xtl: @@ -146,186 +215,137 @@ class NormalMixin(object): return xte > xtrd def set_slave_xtime(self, path, mark): - self.slave.server.set_xtime(path, self.uuid, mark) + self.slave.server.set_stime(path, self.uuid, mark) + # self.slave.server.set_xtime_remote(path, self.uuid, mark) + class PartialMixin(NormalMixin): + """a variant tuned towards operation with a master that has partial info of the slave (brick typically)""" def xtime_reversion_hook(self, path, xtl, xtr): pass -class WrapupMixin(NormalMixin): + +class RecoverMixin(NormalMixin): + """a variant that differs from normal in terms of ignoring non-indexed files""" @staticmethod def make_xtime_opts(is_master, opts): - if not 'create' in opts: + if 'create' not in opts: opts['create'] = False - if not 'default_xtime' in opts: + if 'default_xtime' not in opts: opts['default_xtime'] = URXTIME - @staticmethod def keepalive_payload_hook(self, timo, gap): return (None, gap) def volinfo_hook(self): return _volinfo_hook_relax_foreign(self) -class BlindMixin(object): - """Geo-rep flavor using vectored xtime. +# Further mixins for certain tunable behaviors - Coordinates are the master, slave uuid pair; - in master coordinate behavior is normal, - in slave coordinate we force synchronization - on any value difference (these are in disjunctive - relation, ie. if either orders the entry to be - synced, it shall be synced. - """ - minus_infinity = (URXTIME, None) +class SendmarkNormalMixin(object): - @staticmethod - def serialize_xtime(xt): - a = [] - for x in xt: - if not x: - x = ('None', '') - a.extend(x) - return '.'.join(str(n) for n in a) + def sendmark_regular(self, *a, **kw): + return self.sendmark(*a, **kw) - @staticmethod - def deserialize_xtime(xt): - a = xt.split(".") - a = (tuple(a[0:2]), tuple(a[3:4])) - b = [] - for p in a: - if p[0] == 'None': - p = None - else: - p = tuple(int(x) for x in p) - b.append(p) - return tuple(b) - @staticmethod - def native_xtime(xt): - return xt[0] +class SendmarkRsyncMixin(object): - @staticmethod - def xtime_geq(xt0, xt1): - return (not xt1[0] or xt0[0] >= xt1[0]) and \ - (not xt1[1] or xt0[1] >= xt1[1]) + def sendmark_regular(self, *a, **kw): + pass - @property - def ruuid(self): - if self.volinfo_r: - return self.volinfo_r['uuid'] - @staticmethod - def make_xtime_opts(is_master, opts): - if not 'create' in opts: - opts['create'] = is_master - if not 'default_xtime' in opts: - opts['default_xtime'] = URXTIME +class PurgeNormalMixin(object): - def xtime_low(self, server, path, **opts): - xtd = server.xtime_vec(path, self.uuid, self.ruuid) - if isinstance(xtd, int): - return xtd - xt = (xtd[self.uuid], xtd[self.ruuid]) - if not xt[1] and (not xt[0] or xt[0] < self.volmark): - if opts['create']: - # not expected, but can happen if file originates - # from interrupted gsyncd transfer - logging.warn('have to fix up missing xtime on ' + path) - xt0 = _xtime_now() - server.aggregated.set_xtime(path, self.uuid, xt0) - else: - xt0 = opts['default_xtime'] - xt = (xt0, xt[1]) - return xt + def purge_missing(self, path, names): + self.slave.server.purge(path, names) - @staticmethod - def keepalive_payload_hook(self, timo, gap): - return (None, gap) - def volinfo_hook(self): - res = _volinfo_hook_relax_foreign(self) - volinfo_r_new = self.slave.server.aggregated.native_volume_info() - if volinfo_r_new['retval']: - raise GsyncdError("slave is corrupt") - if getattr(self, 'volinfo_r', None): - if self.volinfo_r['uuid'] != volinfo_r_new['uuid']: - raise GsyncdError("uuid mismatch on slave") - self.volinfo_r = volinfo_r_new - return res +class PurgeNoopMixin(object): - def xtime_reversion_hook(self, path, xtl, xtr): - if not isinstance(xtr[0], int) and \ - (isinstance(xtl[0], int) or xtr[0] > xtl[0]): - raise GsyncdError("timestamp corruption for " + path) + def purge_missing(self, path, names): + pass - def need_sync(self, e, xte, xtrd): - if xte[0]: - if not xtrd[0] or xte[0] > xtrd[0]: - # there is outstanding diff at 0th pos, - # we can short-cut to true - return True - # we arrived to this point by either of these - # two possiblilites: - # - no outstanding difference at 0th pos, - # wanna see 1st pos if he raises veto - # against "no need to sync" proposal - # - no data at 0th pos, 1st pos will have - # to decide (due to xtime assignment, - # in this case 1st pos does carry data - # -- iow, if 1st pos did not have data, - # and 0th neither, 0th would have been - # force-feeded) - if not xte[1]: - # no data, no veto - return False - # the hard work: for 1st pos, - # the conduct is fetch corresponding - # slave data and do a "blind" comparison - # (ie. do not care who is newer, we trigger - # sync on non-identical xitmes) - xtr = self.xtime(e, self.slave) - return isinstance(xtr, int) or xte[1] != xtr[1] - def set_slave_xtime(self, path, mark): - xtd = {} - for (u, t) in zip((self.uuid, self.ruuid), mark): - if t: - xtd[u] = t - self.slave.server.set_xtime_vec(path, xtd) +class TarSSHEngine(object): + """Sync engine that uses tar(1) piped over ssh(1) + for data transfers. Good for lots of small files. + """ -# Further mixins for certain tunable behaviors + def a_syncdata(self, files): + logging.debug(lf("Files", files=files)) -class SendmarkNormalMixin(object): + for f in files: + pb = self.syncer.add(f) - def sendmark_regular(self, *a, **kw): - return self.sendmark(*a, **kw) + def regjob(se, xte, pb): + rv = pb.wait() + if rv[0]: + logging.debug(lf('synced', file=se)) + return True + else: + # stat check for file presence + st = lstat(se) + if isinstance(st, int): + # file got unlinked in the interim + self.unlinked_gfids.add(se) + return True -class SendmarkRsyncMixin(object): + self.add_job(self.FLAT_DIR_HIERARCHY, 'reg', regjob, f, None, pb) - def sendmark_regular(self, *a, **kw): - pass + def syncdata_wait(self): + if self.wait(self.FLAT_DIR_HIERARCHY, None): + return True + def syncdata(self, files): + self.a_syncdata(files) + self.syncdata_wait() -class PurgeNormalMixin(object): - def purge_missing(self, path, names): - self.slave.server.purge(path, names) +class RsyncEngine(object): -class PurgeNoopMixin(object): + """Sync engine that uses rsync(1) for data transfers""" + + def a_syncdata(self, files): + logging.debug(lf("files", files=files)) + + for f in files: + logging.debug(lf('candidate for syncing', file=f)) + pb = self.syncer.add(f) + + def regjob(se, xte, pb): + rv = pb.wait() + if rv[0]: + logging.debug(lf('synced', file=se)) + return True + else: + # stat to check if the file exist + st = lstat(se) + if isinstance(st, int): + # file got unlinked in the interim + self.unlinked_gfids.add(se) + return True + + self.add_job(self.FLAT_DIR_HIERARCHY, 'reg', regjob, f, None, pb) + + def syncdata_wait(self): + if self.wait(self.FLAT_DIR_HIERARCHY, None): + return True + + def syncdata(self, files): + self.a_syncdata(files) + self.syncdata_wait() - def purge_missing(self, path, names): - pass class GMasterCommon(object): + """abstract class impementling master role""" KFGN = 0 @@ -336,8 +356,9 @@ class GMasterCommon(object): err out on multiple foreign masters """ - fgn_vis, nat_vi = self.master.server.aggregated.foreign_volume_infos(), \ - self.master.server.aggregated.native_volume_info() + fgn_vis, nat_vi = ( + self.master.server.aggregated.foreign_volume_infos(), + self.master.server.aggregated.native_volume_info()) fgn_vi = None if fgn_vis: if len(fgn_vis) > 1: @@ -355,12 +376,17 @@ class GMasterCommon(object): if self.volinfo: return self.volinfo['volume_mark'] - @property - def inter_master(self): - """decide if we are an intermediate master - in a cascading setup - """ - return self.volinfo_state[self.KFGN] and True or False + def get_entry_stime(self): + data = self.slave.server.entry_stime(".", self.uuid) + if isinstance(data, int): + data = None + return data + + def get_data_stime(self): + data = self.slave.server.stime(".", self.uuid) + if isinstance(data, int): + data = None + return data def xtime(self, path, *a, **opts): """get amended xtime @@ -376,40 +402,17 @@ class GMasterCommon(object): else: rsc = self.master self.make_xtime_opts(rsc == self.master, opts) - return self.xtime_low(rsc.server, path, **opts) - - def get_initial_crawl_data(self): - default_data = {'sync_time': 0, 'files_synced': 0, 'bytes_synced': 0} - if getattr(gconf, 'state_detail_file', None): - try: - return json.load(open(gconf.state_detail_file)) - except (IOError, OSError): - ex = sys.exc_info()[1] - if ex.errno == ENOENT: - # Create file with initial data - with open(gconf.state_detail_file, 'wb') as f: - json.dump(default_data, f) - return default_data - else: - raise - - return default_data - - def update_crawl_data(self): - if getattr(gconf, 'state_detail_file', None): - try: - same_dir = os.path.dirname(gconf.state_detail_file) - with NamedTemporaryFile(dir=same_dir, delete=False) as tmp: - json.dump(self.total_crawl_stats, tmp) - os.rename(tmp.name, gconf.state_detail_file) - except (IOError, OSError): - raise + return self.xtime_low(rsc, path, **opts) def __init__(self, master, slave): self.master = master self.slave = slave self.jobtab = {} - self.syncer = Syncer(slave) + if gconf.get("sync-method") == "tarssh": + self.syncer = Syncer(slave, self.slave.tarssh, [2]) + else: + # partial transfer (cf. rsync(1)), that's normal + self.syncer = Syncer(slave, self.slave.rsync, [23, 24]) # crawls vs. turns: # - self.crawls is simply the number of crawl() invocations on root # - one turn is a maximal consecutive sequence of crawls so that each @@ -417,30 +420,24 @@ class GMasterCommon(object): # - self.turns is the number of turns since start # - self.total_turns is a limit so that if self.turns reaches it, then # we exit (for diagnostic purposes) - # so, eg., if the master fs changes unceasingly, self.turns will remain 0. + # so, eg., if the master fs changes unceasingly, self.turns will remain + # 0. self.crawls = 0 self.turns = 0 - self.total_turns = int(gconf.turns) + self.total_turns = rconf.turns + self.crawl_start = datetime.now() self.lastreport = {'crawls': 0, 'turns': 0, 'time': 0} - self.crawl_stats = {'sync_time': 0, 'last_synctime': 0, 'crawl_starttime': 0, - 'crawl_time': 0, 'files_synced': 0, 'bytes_synced' :0} - self.total_crawl_stats = self.get_initial_crawl_data() self.start = None self.change_seen = None - # the authoritative (foreign, native) volinfo pair - # which lets us deduce what to do when we refetch - # the volinfos from system - uuid_preset = getattr(gconf, 'volume_id', None) - self.volinfo_state = (uuid_preset and {'uuid': uuid_preset}, None) # the actual volinfo we make use of self.volinfo = None self.terminate = False self.sleep_interval = 1 - self.checkpoint_thread = None + self.unlinked_gfids = set() def init_keep_alive(cls): """start the keep-alive thread """ - timo = int(gconf.timeout or 0) + timo = gconf.get("slave-timeout", 0) if timo > 0: def keep_alive(): while True: @@ -450,132 +447,182 @@ class GMasterCommon(object): t = Thread(target=keep_alive) t.start() - def volinfo_query(self): - """volume info state machine""" - volinfo_sys, state_change = self.volinfo_hook() - if self.inter_master: - self.volinfo = volinfo_sys[self.KFGN] - else: - self.volinfo = volinfo_sys[self.KNAT] - if state_change == self.KFGN or (state_change == self.KNAT and not self.inter_master): - logging.info('new master is %s', self.uuid) - if self.volinfo: - logging.info("%s master with volume id %s ..." % \ - (self.inter_master and "intermediate" or "primary", - self.uuid)) - if state_change == self.KFGN: - gconf.configinterface.set('volume_id', self.uuid) - if self.volinfo: - if self.volinfo['retval']: - raise GsyncdError ("master is corrupt") - self.start_checkpoint_thread() - else: - if should_display_info or self.crawls == 0: - if self.inter_master: - logging.info("waiting for being synced from %s ..." % \ - self.volinfo_state[self.KFGN]['uuid']) - else: - logging.info("waiting for volume info ...") - return True + def mgmt_lock(self): + + """Take management volume lock """ + if rconf.mgmt_lock_fd: + try: + fcntl.lockf(rconf.mgmt_lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB) + return True + except: + ex = sys.exc_info()[1] + if isinstance(ex, IOError) and ex.errno in (EACCES, EAGAIN): + return False + raise + + fd = None + bname = str(self.uuid) + "_" + rconf.args.slave_id + "_subvol_" \ + + str(rconf.args.subvol_num) + ".lock" + mgmt_lock_dir = os.path.join(gconf.get("meta-volume-mnt"), "geo-rep") + path = os.path.join(mgmt_lock_dir, bname) + logging.debug(lf("lock file path", path=path)) + try: + fd = os.open(path, os.O_CREAT | os.O_RDWR) + except OSError: + ex = sys.exc_info()[1] + if ex.errno == ENOENT: + logging.info("Creating geo-rep directory in meta volume...") + try: + os.makedirs(mgmt_lock_dir) + except OSError: + ex = sys.exc_info()[1] + if ex.errno == EEXIST: + pass + else: + raise + fd = os.open(path, os.O_CREAT | os.O_RDWR) + else: + raise + try: + fcntl.lockf(fd, fcntl.LOCK_EX | fcntl.LOCK_NB) + # Save latest FD for future use + rconf.mgmt_lock_fd = fd + except: + ex = sys.exc_info()[1] + if isinstance(ex, IOError) and ex.errno in (EACCES, EAGAIN): + # cannot grab, it's taken + rconf.mgmt_lock_fd = fd + return False + raise - def should_crawl(cls): - return (gconf.glusterd_uuid in cls.master.server.node_uuid()) + return True + + def should_crawl(self): + if not gconf.get("use-meta-volume"): + return rconf.args.local_node_id in self.master.server.node_uuid() + + if not os.path.ismount(gconf.get("meta-volume-mnt")): + logging.error("Meta-volume is not mounted. Worker Exiting...") + sys.exit(1) + return self.mgmt_lock() def register(self): self.register() - def crawlwrap(self, oneshot=False): + def crawlwrap(self, oneshot=False, register_time=None): if oneshot: # it's important to do this during the oneshot crawl as # for a passive gsyncd (ie. in a replicate scenario) # the keepalive thread would keep the connection alive. self.init_keep_alive() + + # If crawlwrap is called when partial history available, + # then it sets register_time which is the time when geo-rep + # worker registered to changelog consumption. Since nsec is + # not considered in register time, there are chances of skipping + # changes detection in xsync crawl. This limit will be reset when + # crawlwrap is called again. + self.live_changelog_start_time = None + if register_time: + self.live_changelog_start_time = (register_time, 0) + + # no need to maintain volinfo state machine. + # in a cascading setup, each geo-replication session is + # independent (ie. 'volume-mark' and 'xtime' are not + # propagated). This is because the slave's xtime is now + # stored on the master itself. 'volume-mark' just identifies + # that we are in a cascading setup and need to enable + # 'geo-replication.ignore-pid-check' option. + volinfo_sys = self.volinfo_hook() + self.volinfo = volinfo_sys[self.KNAT] + inter_master = volinfo_sys[self.KFGN] + logging.debug("%s master with volume id %s ..." % + (inter_master and "intermediate" or "primary", + self.uuid)) + rconf.volume_id = self.uuid + if self.volinfo: + if self.volinfo['retval']: + logging.warn(lf("master cluster's info may not be valid", + error=self.volinfo['retval'])) + else: + raise GsyncdError("master volinfo unavailable") self.lastreport['time'] = time.time() - self.crawl_stats['crawl_starttime'] = datetime.now() - logging.info('crawl interval: %d seconds' % self.sleep_interval) t0 = time.time() crawl = self.should_crawl() while not self.terminate: - if self.volinfo_query(): - continue + if self.start: + logging.debug("... crawl #%d done, took %.6f seconds" % + (self.crawls, time.time() - self.start)) + self.start = time.time() + should_display_info = self.start - self.lastreport['time'] >= 60 + if should_display_info: + logging.debug("%d crawls, %d turns", + self.crawls - self.lastreport['crawls'], + self.turns - self.lastreport['turns']) + self.lastreport.update(crawls=self.crawls, + turns=self.turns, + time=self.start) t1 = time.time() - if int(t1 - t0) >= 60: #lets hardcode this check to 60 seconds + if int(t1 - t0) >= gconf.get("replica-failover-interval"): crawl = self.should_crawl() t0 = t1 + self.update_worker_remote_node() if not crawl: + self.status.set_passive() + # bring up _this_ brick to the cluster stime + # which is min of cluster (but max of the replicas) + brick_stime = self.xtime('.', self.slave) + cluster_stime = self.master.server.aggregated.stime_mnt( + '.', '.'.join([str(self.uuid), rconf.args.slave_id])) + logging.debug(lf("Crawl info", + cluster_stime=cluster_stime, + brick_stime=brick_stime)) + + if not isinstance(cluster_stime, int): + if brick_stime < cluster_stime: + self.slave.server.set_stime( + self.FLAT_DIR_HIERARCHY, self.uuid, cluster_stime) + self.upd_stime(cluster_stime) + # Purge all changelogs available in processing dir + # less than cluster_stime + proc_dir = os.path.join(self.tempdir, + ".processing") + + if os.path.exists(proc_dir): + to_purge = [f for f in os.listdir(proc_dir) + if (f.startswith("CHANGELOG.") and + int(f.split('.')[-1]) < + cluster_stime[0])] + for f in to_purge: + os.remove(os.path.join(proc_dir, f)) + time.sleep(5) continue - if self.start: - logging.debug("... crawl #%d done, took %.6f seconds" % \ - (self.crawls, time.time() - self.start)) - self.start = t1 - should_display_info = self.start - self.lastreport['time'] >= 60 - if should_display_info: - logging.info("%d crawls, %d turns", - self.crawls - self.lastreport['crawls'], - self.turns - self.lastreport['turns']) - self.lastreport.update(crawls = self.crawls, - turns = self.turns, - time = self.start) + + self.status.set_active() self.crawl() + if oneshot: return time.sleep(self.sleep_interval) - @classmethod - def _checkpt_param(cls, chkpt, prm, xtimish=True): - """use config backend to lookup a parameter belonging to - checkpoint @chkpt""" - cprm = getattr(gconf, 'checkpoint_' + prm, None) - if not cprm: - return - chkpt_mapped, val = cprm.split(':', 1) - if unescape(chkpt_mapped) != chkpt: - return - if xtimish: - val = cls.deserialize_xtime(val) - return val - - @classmethod - def _set_checkpt_param(cls, chkpt, prm, val, xtimish=True): - """use config backend to store a parameter associated - with checkpoint @chkpt""" - if xtimish: - val = cls.serialize_xtime(val) - gconf.configinterface.set('checkpoint_' + prm, "%s:%s" % (escape(chkpt), val)) - @staticmethod def humantime(*tpair): """format xtime-like (sec, nsec) pair to human readable format""" ts = datetime.fromtimestamp(float('.'.join(str(n) for n in tpair))).\ - strftime("%Y-%m-%d %H:%M:%S") + strftime("%Y-%m-%d %H:%M:%S") if len(tpair) > 1: ts += '.' + str(tpair[1]) return ts - def get_extra_info(self): - str_info = "\nFilesSynced=%d;" % (self.crawl_stats['files_synced']) - str_info += "BytesSynced=%s;" % (self.crawl_stats['bytes_synced']) - - self.crawl_stats['crawl_time'] = datetime.now() - self.crawl_stats['crawl_starttime'] - - str_info += "Uptime=%s;" % (self._crawl_time_format(self.crawl_stats['crawl_time'])) - str_info += "SyncTime=%s;" % (self.crawl_stats['sync_time']) - str_info += "TotalSyncTime=%s;" % (self.total_crawl_stats['sync_time']) - str_info += "TotalFilesSynced=%d;" % (self.total_crawl_stats['files_synced']) - str_info += "TotalBytesSynced=%s;" % (self.total_crawl_stats['bytes_synced']) - str_info += "\0" - logging.debug(str_info) - return str_info - def _crawl_time_format(self, crawl_time): # Ex: 5 years, 4 days, 20:23:10 years, days = divmod(crawl_time.days, 365.25) years = int(years) days = int(days) - date="" + date = "" m, s = divmod(crawl_time.seconds, 60) h, m = divmod(m, 60) @@ -584,103 +631,15 @@ class GMasterCommon(object): if days != 0: date += "%s %s " % (days, "day" if days == 1 else "days") - date += "%s:%s:%s" % (string.zfill(h, 2), string.zfill(m, 2), string.zfill(s, 2)) + date += "%s:%s:%s" % (string.zfill(h, 2), + string.zfill(m, 2), string.zfill(s, 2)) return date - def checkpt_service(self, chan, chkpt, tgt): - """checkpoint service loop - - monitor and verify checkpoint status for @chkpt, and listen - for incoming requests for whom we serve a pretty-formatted - status report""" - if not chkpt: - # dummy loop for the case when there is no checkpt set - while True: - select([chan], [], []) - conn, _ = chan.accept() - conn.send(self.get_extra_info()) - conn.close() - completed = self._checkpt_param(chkpt, 'completed', xtimish=False) - if completed: - completed = tuple(int(x) for x in completed.split('.')) - while True: - s,_,_ = select([chan], [], [], (not completed) and 5 or None) - # either request made and we re-check to not - # give back stale data, or we still hunting for completion - if self.native_xtime(tgt) and self.native_xtime(tgt) < self.volmark: - # indexing has been reset since setting the checkpoint - status = "is invalid" - else: - xtr = self.xtime('.', self.slave) - if isinstance(xtr, int): - raise GsyncdError("slave root directory is unaccessible (%s)", - os.strerror(xtr)) - ncompleted = self.xtime_geq(xtr, tgt) - if completed and not ncompleted: # stale data - logging.warn("completion time %s for checkpoint %s became stale" % \ - (self.humantime(*completed), chkpt)) - completed = None - gconf.confdata.delete('checkpoint-completed') - if ncompleted and not completed: # just reaching completion - completed = "%.6f" % time.time() - self._set_checkpt_param(chkpt, 'completed', completed, xtimish=False) - completed = tuple(int(x) for x in completed.split('.')) - logging.info("checkpoint %s completed" % chkpt) - status = completed and \ - "completed at " + self.humantime(completed[0]) or \ - "not reached yet" - if s: - conn = None - try: - conn, _ = chan.accept() - try: - conn.send(" | checkpoint %s %s %s" % (chkpt, status, self.get_extra_info())) - except: - exc = sys.exc_info()[1] - if (isinstance(exc, OSError) or isinstance(exc, IOError)) and \ - exc.errno == EPIPE: - logging.debug('checkpoint client disconnected') - else: - raise - finally: - if conn: - conn.close() - - def start_checkpoint_thread(self): - """prepare and start checkpoint service""" - if self.checkpoint_thread or not ( - getattr(gconf, 'state_socket_unencoded', None) and getattr(gconf, 'socketdir', None) - ): - return - chan = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) - state_socket = os.path.join(gconf.socketdir, md5hex(gconf.state_socket_unencoded) + ".socket") - try: - os.unlink(state_socket) - except: - if sys.exc_info()[0] == OSError: - pass - chan.bind(state_socket) - chan.listen(1) - checkpt_tgt = None - if gconf.checkpoint: - checkpt_tgt = self._checkpt_param(gconf.checkpoint, 'target') - if not checkpt_tgt: - checkpt_tgt = self.xtime('.') - if isinstance(checkpt_tgt, int): - raise GsyncdError("master root directory is unaccessible (%s)", - os.strerror(checkpt_tgt)) - self._set_checkpt_param(gconf.checkpoint, 'target', checkpt_tgt) - logging.debug("checkpoint target %s has been determined for checkpoint %s" % \ - (repr(checkpt_tgt), gconf.checkpoint)) - t = Thread(target=self.checkpt_service, args=(chan, gconf.checkpoint, checkpt_tgt)) - t.start() - self.checkpoint_thread = t - def add_job(self, path, label, job, *a, **kw): """insert @job function to job table at @path with @label""" - if self.jobtab.get(path) == None: + if self.jobtab.get(path) is None: self.jobtab[path] = [] - self.jobtab[path].append((label, a, lambda : job(*a, **kw))) + self.jobtab[path].append((label, a, lambda: job(*a, **kw))) def add_failjob(self, path, label): """invoke .add_job with a job that does nothing just fails""" @@ -691,7 +650,7 @@ class GMasterCommon(object): """perform jobs registered for @path Reset jobtab entry for @path, - determine success as the conjuction of + determine success as the conjunction of success of all the jobs. In case of success, call .sendmark on @path """ @@ -701,7 +660,7 @@ class GMasterCommon(object): ret = j[-1]() if not ret: succeed = False - if succeed and not args[0] == None: + if succeed and not args[0] is None: self.sendmark(path, *args) return succeed @@ -714,132 +673,348 @@ class GMasterCommon(object): self.slave.server.setattr(path, adct) self.set_slave_xtime(path, mark) - @staticmethod - def volinfo_state_machine(volinfo_state, volinfo_sys): - """compute new volinfo_state from old one and incoming - as of current system state, also indicating if there was a - change regarding which volume mark is the authoritative one - - @volinfo_state, @volinfo_sys are pairs of volume mark dicts - (foreign, native). - Note this method is marked as static, ie. the computation is - pure, without reliance on any excess implicit state. State - transitions which are deemed as ambiguous or banned will raise - an exception. +class XCrawlMetadata(object): + def __init__(self, st_uid, st_gid, st_mode, st_atime, st_mtime): + self.st_uid = int(st_uid) + self.st_gid = int(st_gid) + self.st_mode = int(st_mode) + self.st_atime = float(st_atime) + self.st_mtime = float(st_mtime) - """ - # store the value below "boxed" to emulate proper closures - # (variables of the enclosing scope are available inner functions - # provided they are no reassigned; mutation is OK). - param = FreeObject(relax_mismatch = False, state_change = None, index=-1) - def select_vi(vi0, vi): - param.index += 1 - if vi and (not vi0 or vi0['uuid'] == vi['uuid']): - if not vi0 and not param.relax_mismatch: - param.state_change = param.index - # valid new value found; for the rest, we are graceful about - # uuid mismatch - param.relax_mismatch = True - return vi - if vi0 and vi and vi0['uuid'] != vi['uuid'] and not param.relax_mismatch: - # uuid mismatch for master candidate, bail out - raise GsyncdError("aborting on uuid change from %s to %s" % \ - (vi0['uuid'], vi['uuid'])) - # fall back to old - return vi0 - newstate = tuple(select_vi(*vip) for vip in zip(volinfo_state, volinfo_sys)) - srep = lambda vi: vi and vi['uuid'][0:8] - logging.debug('(%s, %s) << (%s, %s) -> (%s, %s)' % \ - tuple(srep(vi) for vi in volinfo_state + volinfo_sys + newstate)) - return newstate, param.state_change class GMasterChangelogMixin(GMasterCommon): + """ changelog based change detection and syncing """ # index for change type and entry IDX_START = 0 - IDX_END = 2 + IDX_END = 2 + UNLINK_ENTRY = 2 - POS_GFID = 0 - POS_TYPE = 1 - POS_ENTRY1 = 2 - POS_ENTRY2 = 3 # renames + POS_GFID = 0 + POS_TYPE = 1 + POS_ENTRY1 = -1 - _CL_TYPE_DATA_PFX = "D " - _CL_TYPE_METADATA_PFX = "M " - _CL_TYPE_ENTRY_PFX = "E " + TYPE_META = "M " + TYPE_GFID = "D " + TYPE_ENTRY = "E " - TYPE_GFID = [_CL_TYPE_DATA_PFX] # ignoring metadata ops - TYPE_ENTRY = [_CL_TYPE_ENTRY_PFX] + MAX_EF_RETRIES = 10 + MAX_OE_RETRIES = 10 - # flat directory heirarchy for gfid based access + # flat directory hierarchy for gfid based access FLAT_DIR_HIERARCHY = '.' - def fallback_xsync(self): - logging.info('falling back to xsync mode') - gconf.configinterface.set('change-detector', 'xsync') - selfkill() - - def setup_working_dir(self): - workdir = os.path.join(gconf.working_dir, md5hex(gconf.local_path)) - logfile = os.path.join(workdir, 'changes.log') - logging.debug('changelog working dir %s (log: %s)' % (workdir, logfile)) - return (workdir, logfile) + CHANGELOG_CONN_RETRIES = 5 + + def init_fop_batch_stats(self): + self.batch_stats = { + "CREATE": 0, + "MKNOD": 0, + "UNLINK": 0, + "MKDIR": 0, + "RMDIR": 0, + "LINK": 0, + "SYMLINK": 0, + "RENAME": 0, + "SETATTR": 0, + "SETXATTR": 0, + "XATTROP": 0, + "DATA": 0, + "ENTRY_SYNC_TIME": 0, + "META_SYNC_TIME": 0, + "DATA_START_TIME": 0 + } + + def update_fop_batch_stats(self, ty): + if ty in ['FSETXATTR']: + ty = 'SETXATTR' + self.batch_stats[ty] = self.batch_stats.get(ty, 0) + 1 + + def archive_and_purge_changelogs(self, changelogs): + # Creates tar file instead of tar.gz, since changelogs will + # be appended to existing tar. archive name is + # archive_<YEAR><MONTH>.tar + archive_name = "archive_%s.tar" % datetime.today().strftime( + gconf.get("changelog-archive-format")) - def lstat(self, e): try: - return os.lstat(e) - except (IOError, OSError): - ex = sys.exc_info()[1] - if ex.errno == ENOENT: - return ex.errno + tar = tarfile.open(os.path.join(self.processed_changelogs_dir, + archive_name), + "a") + except tarfile.ReadError: + tar = tarfile.open(os.path.join(self.processed_changelogs_dir, + archive_name), + "w") + + for f in changelogs: + try: + f = os.path.basename(f) + tar.add(os.path.join(self.processed_changelogs_dir, f), + arcname=os.path.basename(f)) + except: + exc = sys.exc_info()[1] + if ((isinstance(exc, OSError) or + isinstance(exc, IOError)) and exc.errno == ENOENT): + continue + else: + tar.close() + raise + tar.close() + + for f in changelogs: + try: + f = os.path.basename(f) + os.remove(os.path.join(self.processed_changelogs_dir, f)) + except OSError as e: + if e.errno == errno.ENOENT: + continue + else: + raise + + def setup_working_dir(self): + workdir = os.path.join(gconf.get("working-dir"), + escape(rconf.args.local_path)) + logging.debug('changelog working dir %s' % workdir) + return workdir + + def log_failures(self, failures, entry_key, gfid_prefix, log_prefix): + num_failures = 0 + for failure in failures: + st = lstat(os.path.join(gfid_prefix, failure[0][entry_key])) + if not isinstance(st, int): + num_failures += 1 + logging.error(lf('%s FAILED' % log_prefix, + data=failure)) + if failure[0]['op'] == 'MKDIR': + raise GsyncdError("The above directory failed to sync." + " Please fix it to proceed further.") + + self.status.inc_value("failures", num_failures) + + def fix_possible_entry_failures(self, failures, retry_count, entries): + pfx = gauxpfx() + fix_entry_ops = [] + failures1 = [] + remove_gfids = set() + for failure in failures: + if failure[2]['name_mismatch']: + pbname = failure[2]['slave_entry'] + elif failure[2]['dst']: + pbname = failure[0]['entry1'] else: - raise + pbname = failure[0]['entry'] + + op = failure[0]['op'] + # name exists but gfid is different + if failure[2]['gfid_mismatch'] or failure[2]['name_mismatch']: + slave_gfid = failure[2]['slave_gfid'] + st = lstat(os.path.join(pfx, slave_gfid)) + # Takes care of scenarios with no hardlinks + if isinstance(st, int) and st == ENOENT: + logging.debug(lf('Entry not present on master. Fixing gfid ' + 'mismatch in slave. Deleting the entry', + retry_count=retry_count, + entry=repr(failure))) + # Add deletion to fix_entry_ops list + if failure[2]['slave_isdir']: + fix_entry_ops.append( + edct('RMDIR', + gfid=failure[2]['slave_gfid'], + entry=pbname)) + else: + fix_entry_ops.append( + edct('UNLINK', + gfid=failure[2]['slave_gfid'], + entry=pbname)) + remove_gfids.add(slave_gfid) + if op in ['RENAME']: + # If renamed gfid doesn't exists on master, remove + # rename entry and unlink src on slave + st = lstat(os.path.join(pfx, failure[0]['gfid'])) + if isinstance(st, int) and st == ENOENT: + logging.debug("Unlink source %s" % repr(failure)) + remove_gfids.add(failure[0]['gfid']) + fix_entry_ops.append( + edct('UNLINK', + gfid=failure[0]['gfid'], + entry=failure[0]['entry'])) + # Takes care of scenarios of hardlinks/renames on master + elif not isinstance(st, int): + if matching_disk_gfid(slave_gfid, pbname): + # Safe to ignore the failure as master contains same + # file with same gfid. Remove entry from entries list + logging.debug(lf('Fixing gfid mismatch in slave. ' + ' Safe to ignore, take out entry', + retry_count=retry_count, + entry=repr(failure))) + remove_gfids.add(failure[0]['gfid']) + if op == 'RENAME': + fix_entry_ops.append( + edct('UNLINK', + gfid=failure[0]['gfid'], + entry=failure[0]['entry'])) + # The file exists on master but with different name. + # Probably renamed and got missed during xsync crawl. + elif failure[2]['slave_isdir']: + realpath = os.readlink(os.path.join( + rconf.args.local_path, + ".glusterfs", + slave_gfid[0:2], + slave_gfid[2:4], + slave_gfid)) + dst_entry = os.path.join(pfx, realpath.split('/')[-2], + realpath.split('/')[-1]) + src_entry = pbname + logging.debug(lf('Fixing dir name/gfid mismatch in ' + 'slave', retry_count=retry_count, + entry=repr(failure))) + if src_entry == dst_entry: + # Safe to ignore the failure as master contains + # same directory as in slave with same gfid. + # Remove the failure entry from entries list + logging.debug(lf('Fixing dir name/gfid mismatch' + ' in slave. Safe to ignore, ' + 'take out entry', + retry_count=retry_count, + entry=repr(failure))) + try: + entries.remove(failure[0]) + except ValueError: + pass + else: + rename_dict = edct('RENAME', gfid=slave_gfid, + entry=src_entry, + entry1=dst_entry, stat=st, + link=None) + logging.debug(lf('Fixing dir name/gfid mismatch' + ' in slave. Renaming', + retry_count=retry_count, + entry=repr(rename_dict))) + fix_entry_ops.append(rename_dict) + else: + # A hardlink file exists with different name or + # renamed file exists and we are sure from + # matching_disk_gfid check that the entry doesn't + # exist with same gfid so we can safely delete on slave + logging.debug(lf('Fixing file gfid mismatch in slave. ' + 'Hardlink/Rename Case. Deleting entry', + retry_count=retry_count, + entry=repr(failure))) + fix_entry_ops.append( + edct('UNLINK', + gfid=failure[2]['slave_gfid'], + entry=pbname)) + elif failure[1] == ENOENT: + if op in ['RENAME']: + pbname = failure[0]['entry1'] + else: + pbname = failure[0]['entry'] - # sync data - def syncdata(self, datas): - logging.debug('datas: %s' % (datas)) - for data in datas: - logging.debug('candidate for syncing %s' % data) - pb = self.syncer.add(data) - timeA = datetime.now() - def regjob(se, xte, pb): - rv = pb.wait() - if rv[0]: - logging.debug('synced ' + se) - # update stats - timeB = datetime.now() - self.crawl_stats['last_synctime'] = timeB - timeA - self.crawl_stats['sync_time'] += ((self.crawl_stats['last_synctime'].microseconds) / (10.0 ** 6)) - self.crawl_stats['files_synced'] += 1 - self.crawl_stats['bytes_synced'] += self.syncer.bytes_synced - - # cumulative statistics - self.total_crawl_stats['bytes_synced'] += self.syncer.bytes_synced - self.total_crawl_stats['sync_time'] += ((self.crawl_stats['last_synctime'].microseconds) / (10.0 ** 6)) - self.total_crawl_stats['files_synced'] += 1 - return True + pargfid = pbname.split('/')[1] + st = lstat(os.path.join(pfx, pargfid)) + # Safe to ignore the failure as master doesn't contain + # parent directory. + if isinstance(st, int): + logging.debug(lf('Fixing ENOENT error in slave. Parent ' + 'does not exist on master. Safe to ' + 'ignore, take out entry', + retry_count=retry_count, + entry=repr(failure))) + try: + entries.remove(failure[0]) + except ValueError: + pass else: - if rv[1] in [23, 24]: - # stat to check if the file exist - st = self.lstat(se) - if isinstance(st, int): - # file got unlinked in the interim - return True - logging.warn('Rsync: %s [errcode: %d]' % (se, rv[1])) - self.add_job(self.FLAT_DIR_HIERARCHY, 'reg', regjob, data, None, pb) - if self.wait(self.FLAT_DIR_HIERARCHY, None): - self.update_crawl_data() - return True + logging.debug(lf('Fixing ENOENT error in slave. Create ' + 'parent directory on slave.', + retry_count=retry_count, + entry=repr(failure))) + realpath = os.readlink(os.path.join(rconf.args.local_path, + ".glusterfs", + pargfid[0:2], + pargfid[2:4], + pargfid)) + dir_entry = os.path.join(pfx, realpath.split('/')[-2], + realpath.split('/')[-1]) + fix_entry_ops.append( + edct('MKDIR', gfid=pargfid, entry=dir_entry, + mode=st.st_mode, uid=st.st_uid, gid=st.st_gid)) + + logging.debug("remove_gfids: %s" % repr(remove_gfids)) + if remove_gfids: + for e in entries: + if e['op'] in ['MKDIR', 'MKNOD', 'CREATE', 'RENAME'] \ + and e['gfid'] in remove_gfids: + logging.debug("Removed entry op from retrial list: entry: %s" % repr(e)) + e['skip_entry'] = True + + if fix_entry_ops: + # Process deletions of entries whose gfids are mismatched + failures1 = self.slave.server.entry_ops(fix_entry_ops) + + return (failures1, fix_entry_ops) + + def handle_entry_failures(self, failures, entries): + retries = 0 + pending_failures = False + failures1 = [] + failures2 = [] + entry_ops1 = [] + entry_ops2 = [] + + if failures: + pending_failures = True + failures1 = failures + entry_ops1 = entries + + while pending_failures and retries < self.MAX_EF_RETRIES: + retries += 1 + (failures2, entry_ops2) = self.fix_possible_entry_failures( + failures1, retries, entry_ops1) + if not failures2: + pending_failures = False + logging.info(lf('Successfully fixed entry ops with gfid ' + 'mismatch', retry_count=retries)) + else: + pending_failures = True + failures1 = failures2 + entry_ops1 = entry_ops2 - def process_change(self, change, done): - clist = [] + if pending_failures: + for failure in failures1: + logging.error("Failed to fix entry ops %s", repr(failure)) + + def process_change(self, change, done, retry): + pfx = gauxpfx() + clist = [] entries = [] - purges = set() - links = set() + meta_gfid = set() datas = set() - pfx = gauxpfx() + + change_ts = change.split(".")[-1] + + # Ignore entry ops which are already processed in Changelog modes + ignore_entry_ops = False + entry_stime = None + data_stime = None + if self.name in ["live_changelog", "history_changelog"]: + entry_stime = self.get_entry_stime() + data_stime = self.get_data_stime() + + if entry_stime is not None and data_stime is not None: + # if entry_stime is not None but data_stime > entry_stime + # This situation is caused by the stime update of Passive worker + # Consider data_stime in this case. + if data_stime[0] > entry_stime[0]: + entry_stime = data_stime + + # Compare the entry_stime with changelog file suffix + # if changelog time is less than entry_stime then ignore + if int(change_ts) <= entry_stime[0]: + ignore_entry_ops = True + try: f = open(change, "r") clist = f.readlines() @@ -847,137 +1022,631 @@ class GMasterChangelogMixin(GMasterCommon): except IOError: raise - def edct(op, **ed): - dct = {} - dct['op'] = op - for k in ed: - if k == 'stat': - st = ed[k] - dst = dct['stat'] = {} - dst['uid'] = st.st_uid - dst['gid'] = st.st_gid - dst['mode'] = st.st_mode - else: - dct[k] = ed[k] - return dct for e in clist: e = e.strip() - et = e[self.IDX_START:self.IDX_END] - ec = e[self.IDX_END:].split(' ') - if et in self.TYPE_ENTRY: + et = e[self.IDX_START:self.IDX_END] # entry type + ec = e[self.IDX_END:].split(' ') # rest of the bits + + # skip ENTRY operation if hot tier brick + if self.name == 'live_changelog' or \ + self.name == 'history_changelog': + if rconf.args.is_hottier and et == self.TYPE_ENTRY: + logging.debug(lf('skip ENTRY op if hot tier brick', + op=ec[self.POS_TYPE])) + continue + + # Data and Meta operations are decided while parsing + # UNLINK/RMDIR/MKNOD except that case ignore all the other + # entry ops if ignore_entry_ops is True. + # UNLINK/RMDIR/MKNOD entry_ops are ignored in the end + if ignore_entry_ops and et == self.TYPE_ENTRY and \ + ec[self.POS_TYPE] not in ["UNLINK", "RMDIR", "MKNOD"]: + continue + + if et == self.TYPE_ENTRY: + # extract information according to the type of + # the entry operation. create(), mkdir() and mknod() + # have mode, uid, gid information in the changelog + # itself, so no need to stat()... ty = ec[self.POS_TYPE] - en = unescape(os.path.join(pfx, ec[self.POS_ENTRY1])) + + self.update_fop_batch_stats(ec[self.POS_TYPE]) + + # PARGFID/BNAME + en = unescape_space_newline( + os.path.join(pfx, ec[self.POS_ENTRY1])) + # GFID of the entry gfid = ec[self.POS_GFID] - # definitely need a better way bucketize entry ops + if ty in ['UNLINK', 'RMDIR']: - entries.append(edct(ty, gfid=gfid, entry=en)) - purges.update([os.path.join(pfx, gfid)]) - continue - if not ty == 'RENAME': + # The index of PARGFID/BNAME for UNLINK, RMDIR + # is no more the last index. It varies based on + # changelog.capture-del-path is enabled or not. + en = unescape_space_newline( + os.path.join(pfx, ec[self.UNLINK_ENTRY])) + + # Remove from DATA list, so that rsync will + # not fail + pt = os.path.join(pfx, ec[0]) + st = lstat(pt) + if pt in datas and isinstance(st, int): + # file got unlinked, May be historical Changelog + datas.remove(pt) + + if ty in ['RMDIR'] and not isinstance(st, int): + logging.info(lf('Ignoring rmdir. Directory present in ' + 'master', gfid=gfid, pgfid_bname=en)) + continue + + if not gconf.get("ignore-deletes"): + if not ignore_entry_ops: + entries.append(edct(ty, gfid=gfid, entry=en)) + elif ty in ['CREATE', 'MKDIR', 'MKNOD']: + # Special case: record mknod as link + if ty in ['MKNOD']: + mode = int(ec[2]) + if mode & 0o1000: + # Avoid stat'ing the file as it + # may be deleted in the interim + st = FreeObject(st_mode=int(ec[2]), + st_uid=int(ec[3]), + st_gid=int(ec[4]), + st_atime=0, + st_mtime=0) + + # So, it may be deleted, but still we are + # append LINK? Because, the file will be + # CREATED if source not exists. + entries.append(edct('LINK', stat=st, entry=en, + gfid=gfid)) + + # Here, we have the assumption that only + # tier-gfid.linkto causes this mknod. Add data + datas.add(os.path.join(pfx, ec[0])) + continue + + # stat info. present in the changelog itself + entries.append(edct(ty, gfid=gfid, entry=en, + mode=int(ec[2]), + uid=int(ec[3]), gid=int(ec[4]))) + elif ty == "RENAME": + go = os.path.join(pfx, gfid) + st = lstat(go) + if isinstance(st, int): + st = {} + + rl = None + if st and stat.S_ISLNK(st.st_mode): + rl = errno_wrap(os.readlink, [en], [ENOENT], + [ESTALE, EINTR]) + if isinstance(rl, int): + rl = None + + e1 = unescape_space_newline( + os.path.join(pfx, ec[self.POS_ENTRY1 - 1])) + entries.append(edct(ty, gfid=gfid, entry=e1, entry1=en, + stat=st, link=rl)) + # If src doesn't exist while doing rename, destination + # is created. If data is not followed by rename, this + # remains zero byte file on slave. Hence add data entry + # for renames + datas.add(os.path.join(pfx, gfid)) + else: + # stat() to get mode and other information + if not matching_disk_gfid(gfid, en): + logging.debug(lf('Ignoring entry, purged in the ' + 'interim', file=en, gfid=gfid)) + continue + go = os.path.join(pfx, gfid) - st = self.lstat(go) + st = lstat(go) if isinstance(st, int): - logging.debug('file %s got purged in the interim' % go) + logging.debug(lf('Ignoring entry, purged in the ' + 'interim', file=en, gfid=gfid)) continue - if ty in ['CREATE', 'MKDIR', 'MKNOD']: - entries.append(edct(ty, stat=st, entry=en, gfid=gfid)) - elif ty == 'LINK': - entries.append(edct(ty, stat=st, entry=en, gfid=gfid)) - links.update([os.path.join(pfx, gfid)]) - elif ty == 'SYMLINK': - entries.append(edct(ty, stat=st, entry=en, gfid=gfid, link=os.readlink(en))) - elif ty == 'RENAME': - e2 = unescape(os.path.join(pfx, ec[self.POS_ENTRY2])) - entries.append(edct(ty, gfid=gfid, entry=en, entry1=e2)) + + if ty == 'LINK': + rl = None + if st and stat.S_ISLNK(st.st_mode): + rl = errno_wrap(os.readlink, [en], [ENOENT], + [ESTALE, EINTR]) + if isinstance(rl, int): + rl = None + entries.append(edct(ty, stat=st, entry=en, gfid=gfid, + link=rl)) + # If src doesn't exist while doing link, destination + # is created based on file type. If data is not + # followed by link, this remains zero byte file on + # slave. Hence add data entry for links + if rl is None: + datas.add(os.path.join(pfx, gfid)) + elif ty == 'SYMLINK': + rl = errno_wrap(os.readlink, [en], [ENOENT], + [ESTALE, EINTR]) + if isinstance(rl, int): + continue + + entries.append( + edct(ty, stat=st, entry=en, gfid=gfid, link=rl)) + else: + logging.warn(lf('ignoring op', + gfid=gfid, + type=ty)) + elif et == self.TYPE_GFID: + # If self.unlinked_gfids is available, then that means it is + # retrying the changelog second time. Do not add the GFID's + # to rsync job if failed previously but unlinked in master + if self.unlinked_gfids and \ + os.path.join(pfx, ec[0]) in self.unlinked_gfids: + logging.debug("ignoring data, since file purged interim") else: - pass - elif et in self.TYPE_GFID: - da = os.path.join(pfx, ec[0]) - st = self.lstat(da) - if isinstance(st, int): - logging.debug('file %s got purged in the interim' % da) - continue - datas.update([da]) + datas.add(os.path.join(pfx, ec[0])) + elif et == self.TYPE_META: + self.update_fop_batch_stats(ec[self.POS_TYPE]) + if ec[1] == 'SETATTR': # only setattr's for now... + if len(ec) == 5: + # In xsync crawl, we already have stat data + # avoid doing stat again + meta_gfid.add((os.path.join(pfx, ec[0]), + XCrawlMetadata(st_uid=ec[2], + st_gid=ec[3], + st_mode=ec[4], + st_atime=ec[5], + st_mtime=ec[6]))) + else: + meta_gfid.add((os.path.join(pfx, ec[0]), )) + elif ec[1] in ['SETXATTR', 'XATTROP', 'FXATTROP']: + # To sync xattr/acls use rsync/tar, --xattrs and --acls + # switch to rsync and tar + if not gconf.get("sync-method") == "tarssh" and \ + (gconf.get("sync-xattrs") or gconf.get("sync-acls")): + datas.add(os.path.join(pfx, ec[0])) + else: + logging.warn(lf('got invalid fop type', + type=et)) logging.debug('entries: %s' % repr(entries)) + + # Increment counters for Status + self.files_in_batch += len(datas) + self.status.inc_value("data", len(datas)) + + self.batch_stats["DATA"] += self.files_in_batch - \ + self.batch_stats["SETXATTR"] - \ + self.batch_stats["XATTROP"] + + entry_start_time = time.time() # sync namespace - if (entries): - self.slave.server.entry_ops(entries) + if entries and not ignore_entry_ops: + # Increment counters for Status + self.status.inc_value("entry", len(entries)) + + failures = self.slave.server.entry_ops(entries) + + if gconf.get("gfid-conflict-resolution"): + count = 0 + if failures: + logging.info(lf('Entry ops failed with gfid mismatch', + count=len(failures))) + while failures and count < self.MAX_OE_RETRIES: + count += 1 + self.handle_entry_failures(failures, entries) + logging.info(lf('Retry original entries', count=count)) + failures = self.slave.server.entry_ops(entries) + if not failures: + logging.info("Successfully fixed all entry ops with " + "gfid mismatch") + break + + self.log_failures(failures, 'gfid', gauxpfx(), 'ENTRY') + self.status.dec_value("entry", len(entries)) + + # Update Entry stime in Brick Root only in case of Changelog mode + if self.name in ["live_changelog", "history_changelog"]: + entry_stime_to_update = (int(change_ts) - 1, 0) + self.upd_entry_stime(entry_stime_to_update) + self.status.set_field("last_synced_entry", + entry_stime_to_update[0]) + + self.batch_stats["ENTRY_SYNC_TIME"] += time.time() - entry_start_time + + if ignore_entry_ops: + # Book keeping, to show in logs the range of Changelogs skipped + self.num_skipped_entry_changelogs += 1 + if self.skipped_entry_changelogs_first is None: + self.skipped_entry_changelogs_first = change_ts + + self.skipped_entry_changelogs_last = change_ts + + meta_start_time = time.time() + # sync metadata + if meta_gfid: + meta_entries = [] + for go in meta_gfid: + if len(go) > 1: + st = go[1] + else: + st = lstat(go[0]) + if isinstance(st, int): + logging.debug(lf('file got purged in the interim', + file=go[0])) + continue + meta_entries.append(edct('META', go=go[0], stat=st)) + if meta_entries: + self.status.inc_value("meta", len(meta_entries)) + failures = self.slave.server.meta_ops(meta_entries) + self.log_failures(failures, 'go', '', 'META') + self.status.dec_value("meta", len(meta_entries)) + + self.batch_stats["META_SYNC_TIME"] += time.time() - meta_start_time + + if self.batch_stats["DATA_START_TIME"] == 0: + self.batch_stats["DATA_START_TIME"] = time.time() + # sync data - if self.syncdata(datas - (purges - links)): - if done: - self.master.server.changelog_done(change) - return True + if datas: + self.a_syncdata(datas) + self.datas_in_batch.update(datas) def process(self, changes, done=1): - for change in changes: - times = 0 - while True: - times += 1 - logging.debug('processing change %s [%d time(s)]' % (change, times)) - if self.process_change(change, done): - break - # it's either entry_ops() or Rsync that failed to do it's - # job. Mostly it's entry_ops() [which currently has a problem - # of failing to create an entry but failing to return an errno] - # Therefore we do not know if it's either Rsync or the freaking - # entry_ops() that failed... so we retry the _whole_ changelog - # again. - # TODO: remove entry retries when it's gets fixed. - logging.warn('incomplete sync, retrying changelog: %s' % change) - time.sleep(0.5) - self.turns += 1 + tries = 0 + retry = False + self.unlinked_gfids = set() + self.files_in_batch = 0 + self.datas_in_batch = set() + # Error log disabled till the last round + self.syncer.disable_errorlog() + self.skipped_entry_changelogs_first = None + self.skipped_entry_changelogs_last = None + self.num_skipped_entry_changelogs = 0 + self.batch_start_time = time.time() + self.init_fop_batch_stats() - def upd_stime(self, stime): - if stime: - self.sendmark(self.FLAT_DIR_HIERARCHY, stime) + while True: + # first, fire all changelog transfers in parallel. entry and + # metadata are performed synchronously, therefore in serial. + # However at the end of each changelog, data is synchronized + # with syncdata_async() - which means it is serial w.r.t + # entries/metadata of that changelog but happens in parallel + # with data of other changelogs. + + if retry: + if tries == (gconf.get("max-rsync-retries") - 1): + # Enable Error logging if it is last retry + self.syncer.enable_errorlog() + + # Remove Unlinked GFIDs from Queue + for unlinked_gfid in self.unlinked_gfids: + if unlinked_gfid in self.datas_in_batch: + self.datas_in_batch.remove(unlinked_gfid) + + # Retry only Sync. Do not retry entry ops + if self.datas_in_batch: + self.a_syncdata(self.datas_in_batch) + else: + for change in changes: + logging.debug(lf('processing change', + changelog=change)) + self.process_change(change, done, retry) + if not retry: + # number of changelogs processed in the batch + self.turns += 1 + + # Now we wait for all the data transfers fired off in the above + # step to complete. Note that this is not ideal either. Ideally + # we want to trigger the entry/meta-data transfer of the next + # batch while waiting for the data transfer of the current batch + # to finish. + + # Note that the reason to wait for the data transfer (vs doing it + # completely in the background and call the changelog_done() + # asynchronously) is because this waiting acts as a "backpressure" + # and prevents a spiraling increase of wait stubs from consuming + # unbounded memory and resources. + + # update the slave's time with the timestamp of the _last_ + # changelog file time suffix. Since, the changelog prefix time + # is the time when the changelog was rolled over, introduce a + # tolerance of 1 second to counter the small delta b/w the + # marker update and gettimeofday(). + # NOTE: this is only for changelog mode, not xsync. + + # @change is the last changelog (therefore max time for this batch) + if self.syncdata_wait(): + self.unlinked_gfids = set() + if done: + xtl = (int(change.split('.')[-1]) - 1, 0) + self.upd_stime(xtl) + list(map(self.changelog_done_func, changes)) + self.archive_and_purge_changelogs(changes) + + # Reset Data counter after sync + self.status.dec_value("data", self.files_in_batch) + self.files_in_batch = 0 + self.datas_in_batch = set() + break + + # We do not know which changelog transfer failed, retry everything. + retry = True + tries += 1 + if tries == gconf.get("max-rsync-retries"): + logging.error(lf('changelogs could not be processed ' + 'completely - moving on...', + files=list(map(os.path.basename, changes)))) + + # Reset data counter on failure + self.status.dec_value("data", self.files_in_batch) + self.files_in_batch = 0 + self.datas_in_batch = set() + + if done: + xtl = (int(change.split('.')[-1]) - 1, 0) + self.upd_stime(xtl) + list(map(self.changelog_done_func, changes)) + self.archive_and_purge_changelogs(changes) + break + # it's either entry_ops() or Rsync that failed to do it's + # job. Mostly it's entry_ops() [which currently has a problem + # of failing to create an entry but failing to return an errno] + # Therefore we do not know if it's either Rsync or the freaking + # entry_ops() that failed... so we retry the _whole_ changelog + # again. + # TODO: remove entry retries when it's gets fixed. + logging.warn(lf('incomplete sync, retrying changelogs', + files=list(map(os.path.basename, changes)))) + + # Reset the Data counter before Retry + self.status.dec_value("data", self.files_in_batch) + self.files_in_batch = 0 + self.init_fop_batch_stats() + time.sleep(0.5) + + # Log the Skipped Entry ops range if any + if self.skipped_entry_changelogs_first is not None and \ + self.skipped_entry_changelogs_last is not None: + logging.info(lf("Skipping already processed entry ops", + from_changelog=self.skipped_entry_changelogs_first, + to_changelog=self.skipped_entry_changelogs_last, + num_changelogs=self.num_skipped_entry_changelogs)) + + # Log Current batch details + if changes: + logging.info( + lf("Entry Time Taken", + UNL=self.batch_stats["UNLINK"], + RMD=self.batch_stats["RMDIR"], + CRE=self.batch_stats["CREATE"], + MKN=self.batch_stats["MKNOD"], + MKD=self.batch_stats["MKDIR"], + REN=self.batch_stats["RENAME"], + LIN=self.batch_stats["LINK"], + SYM=self.batch_stats["SYMLINK"], + duration="%.4f" % self.batch_stats["ENTRY_SYNC_TIME"])) + + logging.info( + lf("Data/Metadata Time Taken", + SETA=self.batch_stats["SETATTR"], + meta_duration="%.4f" % self.batch_stats["META_SYNC_TIME"], + SETX=self.batch_stats["SETXATTR"], + XATT=self.batch_stats["XATTROP"], + DATA=self.batch_stats["DATA"], + data_duration="%.4f" % ( + time.time() - self.batch_stats["DATA_START_TIME"]))) + + logging.info( + lf("Batch Completed", + mode=self.name, + duration="%.4f" % (time.time() - self.batch_start_time), + changelog_start=changes[0].split(".")[-1], + changelog_end=changes[-1].split(".")[-1], + num_changelogs=len(changes), + stime=self.get_data_stime(), + entry_stime=self.get_entry_stime())) + + def upd_entry_stime(self, stime): + self.slave.server.set_entry_stime(self.FLAT_DIR_HIERARCHY, + self.uuid, + stime) + + def upd_stime(self, stime, path=None): + if not path: + path = self.FLAT_DIR_HIERARCHY + if not stime == URXTIME: + self.sendmark(path, stime) + + # Update last_synced_time in status file based on stime + # only update stime if stime xattr set to Brick root + if path == self.FLAT_DIR_HIERARCHY: + chkpt_time = gconf.getr("checkpoint") + checkpoint_time = 0 + if chkpt_time is not None: + checkpoint_time = int(chkpt_time) + + self.status.set_last_synced(stime, checkpoint_time) + + def update_worker_remote_node(self): + node = rconf.args.resource_remote + node_data = node.split("@") + node = node_data[-1] + remote_node_ip, _ = host_brick_split(node) + self.status.set_slave_node(remote_node_ip) + + def changelogs_batch_process(self, changes): + changelogs_batches = [] + current_size = 0 + for c in changes: + si = os.lstat(c).st_size + if (si + current_size) > gconf.get("changelog-batch-size"): + # Create new batch if single Changelog file greater than + # Max Size! or current batch size exceeds Max size + changelogs_batches.append([c]) + current_size = si + else: + # Append to last batch, if No batches available Create one + current_size += si + if not changelogs_batches: + changelogs_batches.append([c]) + else: + changelogs_batches[-1].append(c) + + for batch in changelogs_batches: + logging.debug(lf('processing changes', + batch=batch)) + self.process(batch) def crawl(self): + self.status.set_worker_crawl_status("Changelog Crawl") changes = [] - try: - self.master.server.changelog_scan() - self.crawls += 1 - except OSError: - self.fallback_xsync() - changes = self.master.server.changelog_getchanges() + # get stime (from the brick) and purge changelogs + # that are _historical_ to that time. + data_stime = self.get_data_stime() + + libgfchangelog.scan() + self.crawls += 1 + changes = libgfchangelog.getchanges() if changes: - xtl = self.xtime(self.FLAT_DIR_HIERARCHY) - if isinstance(xtl, int): - raise GsyncdError('master is corrupt') - logging.debug('processing changes %s' % repr(changes)) - self.process(changes) - self.upd_stime(xtl) + if data_stime: + logging.info(lf("slave's time", + stime=data_stime)) + processed = [x for x in changes + if int(x.split('.')[-1]) < data_stime[0]] + for pr in processed: + logging.debug( + lf('skipping already processed change', + changelog=os.path.basename(pr))) + self.changelog_done_func(pr) + changes.remove(pr) + self.archive_and_purge_changelogs(processed) + + self.changelogs_batch_process(changes) + + def register(self, register_time, status): + self.sleep_interval = gconf.get("change-interval") + self.changelog_done_func = libgfchangelog.done + self.tempdir = self.setup_working_dir() + self.processed_changelogs_dir = os.path.join(self.tempdir, + ".processed") + self.name = "live_changelog" + self.status = status + + +class GMasterChangeloghistoryMixin(GMasterChangelogMixin): + def register(self, register_time, status): + self.changelog_register_time = register_time + self.history_crawl_start_time = register_time + self.changelog_done_func = libgfchangelog.history_done + self.history_turns = 0 + self.tempdir = self.setup_working_dir() + self.processed_changelogs_dir = os.path.join(self.tempdir, + ".history/.processed") + self.name = "history_changelog" + self.status = status + + def crawl(self): + self.history_turns += 1 + self.status.set_worker_crawl_status("History Crawl") + data_stime = self.get_data_stime() + + end_time = int(time.time()) + + #as start of historical crawl marks Geo-rep worker restart + if gconf.get("ignore-deletes"): + logging.info(lf('ignore-deletes config option is set', + stime=data_stime)) + + logging.info(lf('starting history crawl', + turns=self.history_turns, + stime=data_stime, + etime=end_time, + entry_stime=self.get_entry_stime())) + + if not data_stime or data_stime == URXTIME: + raise NoStimeAvailable() + + # Changelogs backend path is hardcoded as + # <BRICK_PATH>/.glusterfs/changelogs, if user configured to different + # location then consuming history will not work(Known issue as of now) + changelog_path = os.path.join(rconf.args.local_path, + ".glusterfs/changelogs") + ret, actual_end = libgfchangelog.history_changelog( + changelog_path, + data_stime[0], + end_time, + gconf.get("sync-jobs")) + + # scan followed by getchanges till scan returns zero. + # history_scan() is blocking call, till it gets the number + # of changelogs to process. Returns zero when no changelogs + # to be processed. returns positive value as number of changelogs + # to be processed, which will be fetched using + # history_getchanges() + while libgfchangelog.history_scan() > 0: + self.crawls += 1 + + changes = libgfchangelog.history_getchanges() + if changes: + if data_stime: + logging.info(lf("slave's time", + stime=data_stime)) + processed = [x for x in changes + if int(x.split('.')[-1]) < data_stime[0]] + for pr in processed: + logging.debug(lf('skipping already processed change', + changelog=os.path.basename(pr))) + self.changelog_done_func(pr) + changes.remove(pr) + + self.changelogs_batch_process(changes) + + history_turn_time = int(time.time()) - self.history_crawl_start_time + + logging.info(lf('finished history crawl', + endtime=actual_end, + stime=self.get_data_stime(), + entry_stime=self.get_entry_stime())) + + # If TS returned from history_changelog is < register_time + # then FS crawl may be required, since history is only available + # till TS returned from history_changelog + if actual_end < self.changelog_register_time: + if self.history_turns < 2: + sleep_time = 1 + if history_turn_time < CHANGELOG_ROLLOVER_TIME: + sleep_time = CHANGELOG_ROLLOVER_TIME - history_turn_time + time.sleep(sleep_time) + self.history_crawl_start_time = int(time.time()) + self.crawl() + else: + # This exception will be caught in resource.py and + # fallback to xsync for the small gap. + raise PartialHistoryAvailable(str(actual_end)) - def register(self): - (workdir, logfile) = self.setup_working_dir() - self.sleep_interval = int(gconf.change_interval) - # register with the changelog library - try: - # 9 == log level (DEBUG) - # 5 == connection retries - self.master.server.changelog_register(gconf.local_path, - workdir, logfile, 9, 5) - except OSError: - self.fallback_xsync() - # control should not reach here - raise class GMasterXsyncMixin(GMasterChangelogMixin): - """ + """ This crawl needs to be xtime based (as of now - it's not. this is beacuse we generate CHANGELOG + it's not. this is because we generate CHANGELOG file during each crawl which is then processed by process_change()). For now it's used as a one-shot initial sync mechanism and only syncs directories, regular - files and symlinks. + files, hardlinks and symlinks. """ - def register(self): + XSYNC_MAX_ENTRIES = 1 << 13 + + def register(self, register_time=None, status=None): + self.status = status + self.counter = 0 + self.comlist = [] + self.stimes = [] self.sleep_interval = 60 - self.tempdir = self.setup_working_dir()[0] + self.tempdir = self.setup_working_dir() + logging.info(lf('Working dir', + path=self.tempdir)) self.tempdir = os.path.join(self.tempdir, 'xsync') - logging.info('xsync temp directory: %s' % self.tempdir) + self.processed_changelogs_dir = self.tempdir + self.name = "xsync" try: os.makedirs(self.tempdir) except OSError: @@ -986,250 +1655,248 @@ class GMasterXsyncMixin(GMasterChangelogMixin): pass else: raise + # Purge stale unprocessed xsync changelogs + for f in os.listdir(self.tempdir): + if f.startswith("XSYNC-CHANGELOG"): + os.remove(os.path.join(self.tempdir, f)) + + + def crawl(self): + """ + event dispatcher thread + + this thread dispatches either changelog or synchronizes stime. + additionally terminates itself on receiving a 'finale' event + """ + def Xsyncer(): + self.Xcrawl() + t = Thread(target=Xsyncer) + t.start() + logging.info(lf('starting hybrid crawl', + stime=self.get_data_stime())) + self.status.set_worker_crawl_status("Hybrid Crawl") + while True: + try: + item = self.comlist.pop(0) + if item[0] == 'finale': + logging.info(lf('finished hybrid crawl', + stime=self.get_data_stime())) + break + elif item[0] == 'xsync': + logging.info(lf('processing xsync changelog', + path=item[1])) + self.process([item[1]], 0) + self.archive_and_purge_changelogs([item[1]]) + elif item[0] == 'stime': + logging.debug(lf('setting slave time', + time=item[1])) + self.upd_stime(item[1][1], item[1][0]) + else: + logging.warn(lf('unknown tuple in comlist', + entry=item)) + except IndexError: + time.sleep(1) def write_entry_change(self, prefix, data=[]): + if not getattr(self, "fh", None): + self.open() + self.fh.write("%s %s\n" % (prefix, ' '.join(data))) def open(self): try: - self.xsync_change = os.path.join(self.tempdir, 'XSYNC-CHANGELOG.' + str(int(time.time()))) + self.xsync_change = os.path.join( + self.tempdir, 'XSYNC-CHANGELOG.' + str(int(time.time()))) self.fh = open(self.xsync_change, 'w') except IOError: raise def close(self): - self.fh.close() + if getattr(self, "fh", None): + self.fh.flush() + os.fsync(self.fh.fileno()) + self.fh.close() + self.fh = None def fname(self): return self.xsync_change - def crawl(self, path='.', xtr=None, done=0): - """ generate a CHANGELOG file consumable by process_change """ + def put(self, mark, item): + self.comlist.append((mark, item)) + + def sync_xsync(self, last): + """schedule a processing of changelog""" + self.close() + if self.counter > 0: + self.put('xsync', self.fname()) + self.counter = 0 + if not last: + time.sleep(1) # make sure changelogs are 1 second apart + + def sync_stime(self, stime=None, last=False): + """schedule a stime synchronization""" + if stime: + self.put('stime', stime) + if last: + self.put('finale', None) + + def sync_done(self, stime=[], last=False): + self.sync_xsync(last) + if stime: + # Send last as True only for last stime entry + for st in stime[:-1]: + self.sync_stime(st, False) + + if stime and stime[-1]: + self.sync_stime(stime[-1], last) + + def is_sticky(self, path, mo): + """check for DHTs linkto sticky bit file""" + sticky = False + if mo & 0o1000: + sticky = self.master.server.linkto_check(path) + return sticky + + def Xcrawl(self, path='.', xtr_root=None): + """ + generate a CHANGELOG file consumable by process_change. + + slave's xtime (stime) is _cached_ for comparisons across + the filesystem tree, but set after directory synchronization. + """ if path == '.': - self.open() self.crawls += 1 - if not xtr: + if not xtr_root: # get the root stime and use it for all comparisons - xtr = self.xtime('.', self.slave) - if isinstance(xtr, int): - if xtr != ENOENT: - raise GsyncdError('slave is corrupt') - xtr = self.minus_infinity + xtr_root = self.xtime('.', self.slave) + if isinstance(xtr_root, int): + if xtr_root != ENOENT: + logging.warn(lf("slave cluster not returning the " + "xtime for root", + error=xtr_root)) + xtr_root = self.minus_infinity xtl = self.xtime(path) if isinstance(xtl, int): - raise GsyncdError('master is corrupt') - if xtr == xtl: + logging.warn("master cluster's xtime not found") + xtr = self.xtime(path, self.slave) + if isinstance(xtr, int): + if xtr != ENOENT: + logging.warn(lf("slave cluster not returning the " + "xtime for dir", + path=path, + error=xtr)) + xtr = self.minus_infinity + xtr = max(xtr, xtr_root) + zero_zero = (0, 0) + if xtr_root == zero_zero: + xtr = self.minus_infinity + if not self.need_sync(path, xtl, xtr): if path == '.': - self.close() + self.sync_done([(path, xtl)], True) return self.xtime_reversion_hook(path, xtl, xtr) logging.debug("entering " + path) dem = self.master.server.entries(path) pargfid = self.master.server.gfid(path) if isinstance(pargfid, int): - logging.warn('skipping directory %s' % (path)) + logging.warn(lf('skipping directory', + path=path)) for e in dem: bname = e e = os.path.join(path, e) - st = self.lstat(e) + xte = self.xtime(e) + if isinstance(xte, int): + logging.warn(lf("irregular xtime", + path=e, + error=errno.errorcode[xte])) + continue + if not self.need_sync(e, xte, xtr): + continue + st = self.master.server.lstat(e) if isinstance(st, int): - logging.warn('%s got purged in the interim..' % e) + logging.warn(lf('got purged in the interim', + path=e)) + continue + if self.is_sticky(e, st.st_mode): + logging.debug(lf('ignoring sticky bit file', + path=e)) continue gfid = self.master.server.gfid(e) if isinstance(gfid, int): - logging.warn('skipping entry %s..' % (e)) - continue - xte = self.xtime(e) - if isinstance(xte, int): - raise GsyncdError('master is corrupt') - if not self.need_sync(e, xte, xtr): + logging.warn(lf('skipping entry', + path=e)) continue mo = st.st_mode + self.counter += 1 if ((stat.S_ISDIR(mo) or + stat.S_ISLNK(mo) or + stat.S_ISREG(mo))) else 0 + if self.counter == self.XSYNC_MAX_ENTRIES: + self.sync_done(self.stimes, False) + self.stimes = [] if stat.S_ISDIR(mo): - self.write_entry_change("E", [gfid, 'MKDIR', escape(os.path.join(pargfid, bname))]) - self.crawl(e, xtr) - elif stat.S_ISREG(mo): - self.write_entry_change("E", [gfid, 'CREATE', escape(os.path.join(pargfid, bname))]) - self.write_entry_change("D", [gfid]) + self.write_entry_change("E", + [gfid, 'MKDIR', str(mo), + str(0), str(0), escape_space_newline( + os.path.join(pargfid, bname))]) + self.write_entry_change("M", [gfid, "SETATTR", str(st.st_uid), + str(st.st_gid), str(st.st_mode), + str(st.st_atime), + str(st.st_mtime)]) + self.Xcrawl(e, xtr_root) + stime_to_update = xte + # Live Changelog Start time indicates that from that time + # onwards Live changelogs are available. If we update stime + # greater than live_changelog_start time then Geo-rep will + # skip those changelogs as already processed. But Xsync + # actually failed to sync the deletes and Renames. Update + # stime as min(Live_changelogs_time, Actual_stime) When it + # switches to Changelog mode, it syncs Deletes and Renames. + if self.live_changelog_start_time: + stime_to_update = min(self.live_changelog_start_time, xte) + self.stimes.append((e, stime_to_update)) elif stat.S_ISLNK(mo): - self.write_entry_change("E", [gfid, 'SYMLINK', escape(os.path.join(pargfid, bname))]) - else: - logging.info('ignoring %s' % e) - if path == '.': - logging.info('processing xsync changelog %s' % self.fname()) - self.close() - self.process([self.fname()], done) - self.upd_stime(xtl) - -class GMasterXtimeMixin(GMasterCommon): - """ xtime based change detection and syncing """ - - def register(self): - pass - - def crawl(self, path='.', xtl=None): - """crawling... - - Standing around - All the right people - Crawling - Tennis on Tuesday - The ladder is long - It is your nature - You've gotta suntan - Football on Sunday - Society boy - - Recursively walk the master side tree and check if updates are - needed due to xtime differences. One invocation of crawl checks - children of @path and do a recursive enter only on - those directory children where there is an update needed. - - Way of updates depend on file type: - - for symlinks, sync them directy and synchronously - - for regular children, register jobs for @path (cf. .add_job) to start - and wait on their rsync - - for directory children, register a job for @path which waits (.wait) - on jobs for the given child - (other kind of filesystem nodes are not considered) - - Those slave side children which do not exist on master are simply - purged (see Server.purge). - - Behavior is fault tolerant, synchronization is adaptive: if some action fails, - just go on relentlessly, adding a fail job (see .add_failjob) which will prevent - the .sendmark on @path, so when the next crawl will arrive to @path it will not - see it as up-to-date and will try to sync it again. While this semantics can be - supported by funky design principles (http://c2.com/cgi/wiki?LazinessImpatienceHubris), - the ultimate reason which excludes other possibilities is simply transience: we cannot - assert that the file systems (master / slave) underneath do not change and actions - taken upon some condition will not lose their context by the time they are performed. - """ - logging.debug("entering " + path) - if not xtl: - xtl = self.xtime(path) - if isinstance(xtl, int): - self.add_failjob(path, 'no-local-node') - return - xtr = self.xtime(path, self.slave) - if isinstance(xtr, int): - if xtr != ENOENT: - self.slave.server.purge(path) - try: - self.slave.server.mkdir(path) - except OSError: - self.add_failjob(path, 'no-remote-node') - return - xtr = self.minus_infinity - else: - self.xtime_reversion_hook(path, xtl, xtr) - if xtl == xtr: - if path == '.' and self.change_seen: - self.turns += 1 - self.change_seen = False - if self.total_turns: - logging.info("finished turn #%s/%s" % \ - (self.turns, self.total_turns)) - if self.turns == self.total_turns: - logging.info("reached turn limit") - self.terminate = True - return - if path == '.': - self.change_seen = True - try: - dem = self.master.server.entries(path) - except OSError: - self.add_failjob(path, 'local-entries-fail') - return - random.shuffle(dem) - try: - des = self.slave.server.entries(path) - except OSError: - self.slave.server.purge(path) - try: - self.slave.server.mkdir(path) - des = self.slave.server.entries(path) - except OSError: - self.add_failjob(path, 'remote-entries-fail') - return - dd = set(des) - set(dem) - if dd: - self.purge_missing(path, dd) - chld = [] - for e in dem: - e = os.path.join(path, e) - xte = self.xtime(e) - if isinstance(xte, int): - logging.warn("irregular xtime for %s: %s" % (e, errno.errorcode[xte])) - elif self.need_sync(e, xte, xtr): - chld.append((e, xte)) - def indulgently(e, fnc, blame=None): - if not blame: - blame = path - try: - return fnc(e) - except (IOError, OSError): - ex = sys.exc_info()[1] - if ex.errno == ENOENT: - logging.warn("salvaged ENOENT for " + e) - self.add_failjob(blame, 'by-indulgently') - return False - else: - raise - for e, xte in chld: - st = indulgently(e, lambda e: os.lstat(e)) - if st == False: - continue - - mo = st.st_mode - adct = {'own': (st.st_uid, st.st_gid)} - if stat.S_ISLNK(mo): - if indulgently(e, lambda e: self.slave.server.symlink(os.readlink(e), e)) == False: - continue - self.sendmark(e, xte, adct) + self.write_entry_change( + "E", [gfid, 'SYMLINK', escape_space_newline( + os.path.join(pargfid, bname))]) elif stat.S_ISREG(mo): - logging.debug("syncing %s ..." % e) - pb = self.syncer.add(e) - timeA = datetime.now() - def regjob(e, xte, pb): - if pb.wait()[0]: - logging.debug("synced " + e) - self.sendmark_regular(e, xte) - # update stats - timeB = datetime.now() - self.crawl_stats['last_synctime'] = timeB - timeA - self.crawl_stats['sync_time'] += ((self.crawl_stats['last_synctime'].microseconds) / (10.0 ** 6)) - self.crawl_stats['files_synced'] += 1 - self.total_crawl_stats['sync_time'] += ((self.crawl_stats['last_synctime'].microseconds) / (10.0 ** 6)) - self.total_crawl_stats['files_synced'] += 1 - self.update_crawl_data() - return True - else: - logging.warn("failed to sync " + e) - self.add_job(path, 'reg', regjob, e, xte, pb) - elif stat.S_ISDIR(mo): - adct['mode'] = mo - if indulgently(e, lambda e: (self.add_job(path, 'cwait', self.wait, e, xte, adct), - self.crawl(e, xte), - True)[-1], blame=e) == False: - continue - else: - # ignore fifos, sockets and special files - pass + nlink = st.st_nlink + nlink -= 1 # fixup backend stat link count + # if a file has a hardlink, create a Changelog entry as + # 'LINK' so the slave side will decide if to create the + # new entry, or to create link. + if nlink == 1: + self.write_entry_change("E", + [gfid, 'MKNOD', str(mo), + str(0), str(0), + escape_space_newline( + os.path.join( + pargfid, bname))]) + else: + self.write_entry_change( + "E", [gfid, 'LINK', escape_space_newline( + os.path.join(pargfid, bname))]) + self.write_entry_change("D", [gfid]) if path == '.': - self.wait(path, xtl) + stime_to_update = xtl + if self.live_changelog_start_time: + stime_to_update = min(self.live_changelog_start_time, xtl) + self.stimes.append((path, stime_to_update)) + self.sync_done(self.stimes, True) class BoxClosedErr(Exception): pass + class PostBox(list): + """synchronized collection for storing things thought of as "requests" """ def __init__(self, *a): list.__init__(self, *a) # too bad Python stdlib does not have read/write locks... - # it would suffivce to grab the lock in .append as reader, in .close as writer + # it would suffivce to grab the lock in .append as reader, in .close as + # writer self.lever = Condition() self.open = True self.done = False @@ -1264,7 +1931,9 @@ class PostBox(list): self.open = False self.lever.release() + class Syncer(object): + """a staged queue to relay rsync requests to rsync workers By "staged queue" its meant that when a consumer comes to the @@ -1294,17 +1963,19 @@ class Syncer(object): each completed syncjob. """ - def __init__(self, slave): + def __init__(self, slave, sync_engine, resilient_errnos=[]): """spawn worker threads""" + self.log_err = False self.slave = slave self.lock = Lock() self.pb = PostBox() - self.bytes_synced = 0 - for i in range(int(gconf.sync_jobs)): - t = Thread(target=self.syncjob) + self.sync_engine = sync_engine + self.errnos_ok = resilient_errnos + for i in range(gconf.get("sync-jobs")): + t = Thread(target=self.syncjob, args=(i + 1, )) t.start() - def syncjob(self): + def syncjob(self, job_id): """the life of a worker""" while True: pb = None @@ -1317,11 +1988,17 @@ class Syncer(object): break time.sleep(0.5) pb.close() - po = self.slave.rsync(pb) + start = time.time() + po = self.sync_engine(pb, self.log_err) + logging.info(lf("Sync Time Taken", + job=job_id, + num_files=len(pb), + return_code=po.returncode, + duration="%.4f" % (time.time() - start))) + if po.returncode == 0: ret = (True, 0) - elif po.returncode in (23, 24): - # partial transfer (cf. rsync(1)), that's normal + elif po.returncode in self.errnos_ok: ret = (False, po.returncode) else: po.errfail() @@ -1335,3 +2012,9 @@ class Syncer(object): return pb except BoxClosedErr: pass + + def enable_errorlog(self): + self.log_err = True + + def disable_errorlog(self): + self.log_err = False diff --git a/geo-replication/syncdaemon/monitor.py b/geo-replication/syncdaemon/monitor.py index badd0d9c5f8..6aa7b9dfc99 100644 --- a/geo-replication/syncdaemon/monitor.py +++ b/geo-replication/syncdaemon/monitor.py @@ -1,86 +1,78 @@ +# +# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com> +# This file is part of GlusterFS. + +# This file is licensed to you under your choice of the GNU Lesser +# General Public License, version 3 or any later version (LGPLv3 or +# later), or the GNU General Public License, version 2 (GPLv2), in all +# cases as published by the Free Software Foundation. +# + import os import sys import time import signal import logging -import uuid import xml.etree.ElementTree as XET -from subprocess import PIPE -from resource import Popen, FILE, GLUSTER, SSH from threading import Lock -from gconf import gconf -from syncdutils import update_file, select, waitpid, set_term_handler, is_host_local, GsyncdError -from syncdutils import escape, Thread, finalize, memoize - -class Volinfo(object): - def __init__(self, vol, host='localhost', prelude=[]): - po = Popen(prelude + ['gluster', '--xml', '--remote-host=' + host, 'volume', 'info', vol], - stdout=PIPE, stderr=PIPE) - vix = po.stdout.read() - po.wait() - po.terminate_geterr() - vi = XET.fromstring(vix) - if vi.find('opRet').text != '0': - if prelude: - via = '(via %s) ' % prelude.join(' ') - else: - via = ' ' - raise GsyncdError('getting volume info of %s%s failed with errorcode %s', - (vol, via, vi.find('opErrno').text)) - self.tree = vi - self.volume = vol - self.host = host - - def get(self, elem): - return self.tree.findall('.//' + elem) - - @property - @memoize - def bricks(self): - def bparse(b): - host, dirp = b.text.split(':', 2) - return {'host': host, 'dir': dirp} - return [ bparse(b) for b in self.get('brick') ] - - @property - @memoize - def uuid(self): - ids = self.get('id') - if len(ids) != 1: - raise GsyncdError("volume info of %s obtained from %s: ambiguous uuid", - self.volume, self.host) - return ids[0].text +from errno import ECHILD, ESRCH +import random + +from resource import SSH +import gsyncdconfig as gconf +import libgfchangelog +from rconf import rconf +from syncdutils import (select, waitpid, errno_wrap, lf, grabpidfile, + set_term_handler, GsyncdError, + Thread, finalize, Volinfo, VolinfoFromGconf, + gf_event, EVENT_GEOREP_FAULTY, get_up_nodes, + unshare_propagation_supported) +from gsyncdstatus import GeorepStatus, set_monitor_status +import py2py3 +from py2py3 import pipe + +ParseError = XET.ParseError if hasattr(XET, 'ParseError') else SyntaxError + + +def get_subvol_num(brick_idx, vol, hot): + tier = vol.is_tier() + disperse_count = vol.disperse_count(tier, hot) + replica_count = vol.replica_count(tier, hot) + distribute_count = vol.distribution_count(tier, hot) + gconf.setconfig("master-distribution-count", distribute_count) + + if (tier and not hot): + brick_idx = brick_idx - vol.get_hot_bricks_count(tier) + + subvol_size = disperse_count if disperse_count > 0 else replica_count + cnt = int((brick_idx + 1) / subvol_size) + rem = (brick_idx + 1) % subvol_size + if rem > 0: + cnt = cnt + 1 + + if (tier and hot): + return "hot_" + str(cnt) + elif (tier and not hot): + return "cold_" + str(cnt) + else: + return str(cnt) class Monitor(object): + """class which spawns and manages gsyncd workers""" - ST_INIT = 'Initializing...' - ST_STABLE = 'Stable' - ST_FAULTY = 'faulty' - ST_INCON = 'inconsistent' + ST_INIT = 'Initializing...' + ST_STARTED = 'Started' + ST_STABLE = 'Active' + ST_FAULTY = 'Faulty' + ST_INCON = 'inconsistent' _ST_ORD = [ST_STABLE, ST_INIT, ST_FAULTY, ST_INCON] def __init__(self): self.lock = Lock() self.state = {} - - def set_state(self, state, w=None): - """set the state that can be used by external agents - like glusterd for status reporting""" - computestate = lambda: self.state and self._ST_ORD[max(self._ST_ORD.index(s) for s in self.state.values())] - if w: - self.lock.acquire() - old_state = computestate() - self.state[w] = state - state = computestate() - self.lock.release() - if state != old_state: - self.set_state(state) - else: - logging.info('new state: %s' % state) - if getattr(gconf, 'state_file', None): - update_file(gconf.state_file, lambda f: f.write(state + '\n')) + self.status = {} @staticmethod def terminate(): @@ -88,9 +80,10 @@ class Monitor(object): # standard handler set_term_handler(lambda *a: set_term_handler()) # give a chance to graceful exit - os.kill(-os.getpid(), signal.SIGTERM) + errno_wrap(os.kill, [-os.getpid(), signal.SIGTERM], [ESRCH]) - def monitor(self, w, argv, cpids): + def monitor(self, w, argv, cpids, slave_vol, slave_host, master, + suuid, slavenodes): """the monitor loop Basic logic is a blantantly simple blunt heuristics: @@ -109,153 +102,294 @@ class Monitor(object): blown worker blows up on EPIPE if the net goes down, due to the keep-alive thread) """ - - self.set_state(self.ST_INIT, w) + if not self.status.get(w[0]['dir'], None): + self.status[w[0]['dir']] = GeorepStatus(gconf.get("state-file"), + w[0]['host'], + w[0]['dir'], + w[0]['uuid'], + master, + "%s::%s" % (slave_host, + slave_vol)) ret = 0 + def nwait(p, o=0): - p2, r = waitpid(p, o) - if not p2: - return - return r + try: + p2, r = waitpid(p, o) + if not p2: + return + return r + except OSError as e: + # no child process, this happens if the child process + # already died and has been cleaned up + if e.errno == ECHILD: + return -1 + else: + raise + def exit_signalled(s): - """ child teminated due to receipt of SIGUSR1 """ + """ child terminated due to receipt of SIGUSR1 """ return (os.WIFSIGNALED(s) and (os.WTERMSIG(s) == signal.SIGUSR1)) + def exit_status(s): if os.WIFEXITED(s): return os.WEXITSTATUS(s) return 1 - conn_timeout = int(gconf.connection_timeout) + + conn_timeout = gconf.get("connection-timeout") while ret in (0, 1): - logging.info('-' * conn_timeout) - logging.info('starting gsyncd worker') - pr, pw = os.pipe() + remote_user, remote_host = w[1][0].split("@") + remote_id = w[1][1] + # Check the status of the connected slave node + # If the connected slave node is down then try to connect to + # different up node. + current_slave_host = remote_host + slave_up_hosts = get_up_nodes(slavenodes, gconf.get("ssh-port")) + + if (current_slave_host, remote_id) not in slave_up_hosts: + if len(slave_up_hosts) > 0: + remote_new = random.choice(slave_up_hosts) + remote_host = "%s@%s" % (remote_user, remote_new[0]) + remote_id = remote_new[1] + + # Spawn the worker in lock to avoid fd leak + self.lock.acquire() + + self.status[w[0]['dir']].set_worker_status(self.ST_INIT) + logging.info(lf('starting gsyncd worker', + brick=w[0]['dir'], + slave_node=remote_host)) + + pr, pw = pipe() cpid = os.fork() if cpid == 0: os.close(pr) - os.execv(sys.executable, argv + ['--feedback-fd', str(pw), - '--local-path', w[0], - '--local-id', '.' + escape(w[0]), - '--resource-remote', w[1]]) - self.lock.acquire() + + args_to_worker = argv + [ + 'worker', + rconf.args.master, + rconf.args.slave, + '--feedback-fd', str(pw), + '--local-path', w[0]['dir'], + '--local-node', w[0]['host'], + '--local-node-id', w[0]['uuid'], + '--slave-id', suuid, + '--subvol-num', str(w[2]), + '--resource-remote', remote_host, + '--resource-remote-id', remote_id + ] + + if rconf.args.config_file is not None: + args_to_worker += ['-c', rconf.args.config_file] + + if w[3]: + args_to_worker.append("--is-hottier") + + if rconf.args.debug: + args_to_worker.append("--debug") + + access_mount = gconf.get("access-mount") + if access_mount: + os.execv(sys.executable, args_to_worker) + else: + if unshare_propagation_supported(): + logging.debug("Worker would mount volume privately") + unshare_cmd = ['unshare', '-m', '--propagation', + 'private'] + cmd = unshare_cmd + args_to_worker + os.execvp("unshare", cmd) + else: + logging.debug("Mount is not private. It would be lazy" + " umounted") + os.execv(sys.executable, args_to_worker) + cpids.add(cpid) - self.lock.release() os.close(pw) + + self.lock.release() + t0 = time.time() so = select((pr,), (), (), conn_timeout)[0] os.close(pr) + if so: ret = nwait(cpid, os.WNOHANG) - if ret != None: - logging.debug("worker died before establishing connection") + + if ret is not None: + logging.info(lf("worker died before establishing " + "connection", + brick=w[0]['dir'])) else: - logging.debug("worker seems to be connected (?? racy check)") + logging.debug("worker(%s) connected" % w[0]['dir']) while time.time() < t0 + conn_timeout: ret = nwait(cpid, os.WNOHANG) - if ret != None: - logging.debug("worker died in startup phase") + + if ret is not None: + logging.info(lf("worker died in startup phase", + brick=w[0]['dir'])) break + time.sleep(1) else: - logging.debug("worker not confirmed in %d sec, aborting it" % \ - conn_timeout) - self.terminate() - time.sleep(1) - os.kill(cpid, signal.SIGKILL) + logging.info( + lf("Worker not confirmed after wait, aborting it. " + "Gsyncd invocation on remote slave via SSH or " + "gluster master mount might have hung. Please " + "check the above logs for exact issue and check " + "master or slave volume for errors. Restarting " + "master/slave volume accordingly might help.", + brick=w[0]['dir'], + timeout=conn_timeout)) + errno_wrap(os.kill, [cpid, signal.SIGKILL], [ESRCH]) ret = nwait(cpid) - if ret == None: - self.set_state(self.ST_STABLE, w) + if ret is None: ret = nwait(cpid) if exit_signalled(ret): ret = 0 else: ret = exit_status(ret) - if ret in (0,1): - self.set_state(self.ST_FAULTY, w) + if ret in (0, 1): + self.status[w[0]['dir']].set_worker_status(self.ST_FAULTY) + gf_event(EVENT_GEOREP_FAULTY, + master_volume=master.volume, + master_node=w[0]['host'], + master_node_id=w[0]['uuid'], + slave_host=slave_host, + slave_volume=slave_vol, + current_slave_host=current_slave_host, + brick_path=w[0]['dir']) time.sleep(10) - self.set_state(self.ST_INCON, w) + self.status[w[0]['dir']].set_worker_status(self.ST_INCON) return ret - def multiplex(self, wspx, suuid): - def sigcont_handler(*a): - """ - Re-init logging and send group kill signal - """ - md = gconf.log_metadata - logging.shutdown() - lcls = logging.getLoggerClass() - lcls.setup(label=md.get('saved_label'), **md) - pid = os.getpid() - os.kill(-pid, signal.SIGUSR1) - signal.signal(signal.SIGUSR1, lambda *a: ()) - signal.signal(signal.SIGCONT, sigcont_handler) - - argv = sys.argv[:] - for o in ('-N', '--no-daemon', '--monitor'): - while o in argv: - argv.remove(o) - argv.extend(('-N', '-p', '', '--slave-id', suuid)) - argv.insert(0, os.path.basename(sys.executable)) + def multiplex(self, wspx, suuid, slave_vol, slave_host, master, slavenodes): + argv = [os.path.basename(sys.executable), sys.argv[0]] cpids = set() ta = [] for wx in wspx: def wmon(w): - cpid, _ = self.monitor(w, argv, cpids) - terminate() + cpid, _ = self.monitor(w, argv, cpids, slave_vol, + slave_host, master, suuid, slavenodes) time.sleep(1) self.lock.acquire() for cpid in cpids: - os.kill(cpid, signal.SIGKILL) + errno_wrap(os.kill, [cpid, signal.SIGKILL], [ESRCH]) self.lock.release() finalize(exval=1) - t = Thread(target = wmon, args=[wx]) + t = Thread(target=wmon, args=[wx]) t.start() ta.append(t) + + # monitor status was being updated in each monitor thread. It + # should not be done as it can cause deadlock for a worker start. + # set_monitor_status uses flock to synchronize multple instances + # updating the file. Since each monitor thread forks worker, + # these processes can hold the reference to fd of status + # file causing deadlock to workers which starts later as flock + # will not be release until all references to same fd is closed. + # It will also cause fd leaks. + + self.lock.acquire() + set_monitor_status(gconf.get("state-file"), self.ST_STARTED) + self.lock.release() for t in ta: t.join() -def distribute(*resources): - master, slave = resources - mvol = Volinfo(master.volume, master.host) - logging.debug('master bricks: ' + repr(mvol.bricks)) - locmbricks = [ b['dir'] for b in mvol.bricks if is_host_local(b['host']) ] - prelude = [] - si = slave - if isinstance(slave, SSH): - prelude = gconf.ssh_command.split() + [slave.remote_addr] - si = slave.inner_rsc - logging.debug('slave SSH gateway: ' + slave.remote_addr) - if isinstance(si, FILE): - sbricks = {'host': 'localhost', 'dir': si.path} - suuid = uuid.uuid5(uuid.NAMESPACE_URL, slave.get_url(canonical=True)) - elif isinstance(si, GLUSTER): - svol = Volinfo(si.volume, si.host, prelude) - sbricks = svol.bricks - suuid = svol.uuid + +def distribute(master, slave): + if rconf.args.use_gconf_volinfo: + mvol = VolinfoFromGconf(master.volume, master=True) else: - raise GsyncdError("unkown slave type " + slave.url) - logging.info('slave bricks: ' + repr(sbricks)) - if isinstance(si, FILE): - slaves = [ slave.url ] + mvol = Volinfo(master.volume, master.host, master=True) + logging.debug('master bricks: ' + repr(mvol.bricks)) + prelude = [] + slave_host = None + slave_vol = None + + prelude = [gconf.get("ssh-command")] + \ + gconf.get("ssh-options").split() + \ + ["-p", str(gconf.get("ssh-port"))] + \ + [slave.remote_addr] + + logging.debug('slave SSH gateway: ' + slave.remote_addr) + + if rconf.args.use_gconf_volinfo: + svol = VolinfoFromGconf(slave.volume, master=False) else: - slavenodes = set(b['host'] for b in sbricks) - if isinstance(slave, SSH) and not gconf.isolated_slave: - rap = SSH.parse_ssh_address(slave.remote_addr) - slaves = [ 'ssh://' + rap['user'] + '@' + h + ':' + si.url for h in slavenodes ] - else: - slavevols = [ h + ':' + si.volume for h in slavenodes ] - if isinstance(slave, SSH): - slaves = [ 'ssh://' + rap.remote_addr + ':' + v for v in slavevols ] - else: - slaves = slavevols - locmbricks.sort() - slaves.sort() + svol = Volinfo(slave.volume, "localhost", prelude, master=False) + + sbricks = svol.bricks + suuid = svol.uuid + slave_host = slave.remote_addr.split('@')[-1] + slave_vol = slave.volume + + # save this xattr for the session delete command + old_stime_xattr_prefix = gconf.get("stime-xattr-prefix", None) + new_stime_xattr_prefix = "trusted.glusterfs." + mvol.uuid + "." + \ + svol.uuid + if not old_stime_xattr_prefix or \ + old_stime_xattr_prefix != new_stime_xattr_prefix: + gconf.setconfig("stime-xattr-prefix", new_stime_xattr_prefix) + + logging.debug('slave bricks: ' + repr(sbricks)) + + slavenodes = set((b['host'], b["uuid"]) for b in sbricks) + rap = SSH.parse_ssh_address(slave) + slaves = [(rap['user'] + '@' + h[0], h[1]) for h in slavenodes] + workerspex = [] - for i in range(len(locmbricks)): - workerspex.append((locmbricks[i], slaves[i % len(slaves)])) - logging.info('worker specs: ' + repr(workerspex)) - return workerspex, suuid + for idx, brick in enumerate(mvol.bricks): + if rconf.args.local_node_id == brick['uuid']: + is_hot = mvol.is_hot(":".join([brick['host'], brick['dir']])) + workerspex.append((brick, + slaves[idx % len(slaves)], + get_subvol_num(idx, mvol, is_hot), + is_hot)) + logging.debug('worker specs: ' + repr(workerspex)) + return workerspex, suuid, slave_vol, slave_host, master, slavenodes + + +def monitor(local, remote): + # Check if gsyncd restarted in pause state. If + # yes, send SIGSTOP to negative of monitor pid + # to go back to pause state. + if rconf.args.pause_on_start: + errno_wrap(os.kill, [-os.getpid(), signal.SIGSTOP], [ESRCH]) -def monitor(*resources): """oh yeah, actually Monitor is used as singleton, too""" - return Monitor().multiplex(*distribute(*resources)) + return Monitor().multiplex(*distribute(local, remote)) + + +def startup(go_daemon=True): + """set up logging, pidfile grabbing, daemonization""" + pid_file = gconf.get("pid-file") + if not grabpidfile(): + sys.stderr.write("pidfile is taken, exiting.\n") + sys.exit(2) + rconf.pid_file_owned = True + + if not go_daemon: + return + + x, y = pipe() + cpid = os.fork() + if cpid: + os.close(x) + sys.exit() + os.close(y) + os.setsid() + dn = os.open(os.devnull, os.O_RDWR) + for f in (sys.stdin, sys.stdout, sys.stderr): + os.dup2(dn, f.fileno()) + + if not grabpidfile(pid_file + '.tmp'): + raise GsyncdError("cannot grab temporary pidfile") + + os.rename(pid_file + '.tmp', pid_file) + + # wait for parent to terminate + # so we can start up with + # no messing from the dirty + # ol' bustard + select((x,), (), ()) + os.close(x) diff --git a/geo-replication/syncdaemon/py2py3.py b/geo-replication/syncdaemon/py2py3.py new file mode 100644 index 00000000000..f9c76e1b50a --- /dev/null +++ b/geo-replication/syncdaemon/py2py3.py @@ -0,0 +1,184 @@ +# +# Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com> +# This file is part of GlusterFS. + +# This file is licensed to you under your choice of the GNU Lesser +# General Public License, version 3 or any later version (LGPLv3 or +# later), or the GNU General Public License, version 2 (GPLv2), in all +# cases as published by the Free Software Foundation. +# + +# All python2/python3 compatibility routines + +import sys +import os +import stat +import struct +from syncdutils import umask +from ctypes import create_string_buffer + +if sys.version_info >= (3,): + def pipe(): + (r, w) = os.pipe() + os.set_inheritable(r, True) + os.set_inheritable(w, True) + return (r, w) + + # Raw conversion of bytearray to string. Used in the cases where + # buffer is created by create_string_buffer which is a 8-bit char + # array and passed to syscalls to fetch results. Using encode/decode + # doesn't work as it converts to string altering the size. + def bytearray_to_str(byte_arr): + return ''.join([chr(b) for b in byte_arr]) + + # Raw conversion of string to bytes. This is required to convert + # back the string into bytearray(c char array) to use in struc + # pack/unpacking. Again encode/decode can't be used as it + # converts it alters size. + def str_to_bytearray(string): + return bytes([ord(c) for c in string]) + + def gr_create_string_buffer(size): + return create_string_buffer(b'\0', size) + + def gr_query_xattr(cls, path, size, syscall, attr=None): + if attr: + return cls._query_xattr(path.encode(), size, syscall, + attr.encode()) + else: + return cls._query_xattr(path.encode(), size, syscall) + + def gr_lsetxattr(cls, path, attr, val): + return cls.libc.lsetxattr(path.encode(), attr.encode(), val, + len(val), 0) + + def gr_lremovexattr(cls, path, attr): + return cls.libc.lremovexattr(path.encode(), attr.encode()) + + def gr_cl_register(libgfapi, brick, path, log_file, log_level, retries): + return libgfapi.gf_changelog_register(brick.encode(), + path.encode(), + log_file.encode(), + log_level, retries) + + def gr_cl_done(libgfapi, clfile): + return libgfapi.gf_changelog_done(clfile.encode()) + + def gr_cl_history_changelog(libgfapi, changelog_path, start, end, num_parallel, + actual_end): + return libgfapi.gf_history_changelog(changelog_path.encode(), + start, end, num_parallel, + actual_end) + + def gr_cl_history_done(libgfapi, clfile): + return libgfapi.gf_history_changelog_done(clfile.encode()) + + # regular file + + def entry_pack_reg(cls, gf, bn, mo, uid, gid): + bn_encoded = bn.encode() + blen = len(bn_encoded) + return struct.pack(cls._fmt_mknod(blen), + uid, gid, gf.encode(), mo, bn_encoded, + stat.S_IMODE(mo), 0, umask()) + + def entry_pack_reg_stat(cls, gf, bn, st): + bn_encoded = bn.encode() + blen = len(bn_encoded) + mo = st['mode'] + return struct.pack(cls._fmt_mknod(blen), + st['uid'], st['gid'], + gf.encode(), mo, bn_encoded, + stat.S_IMODE(mo), 0, umask()) + # mkdir + + def entry_pack_mkdir(cls, gf, bn, mo, uid, gid): + bn_encoded = bn.encode() + blen = len(bn_encoded) + return struct.pack(cls._fmt_mkdir(blen), + uid, gid, gf.encode(), mo, bn_encoded, + stat.S_IMODE(mo), umask()) + # symlink + + def entry_pack_symlink(cls, gf, bn, lnk, st): + bn_encoded = bn.encode() + blen = len(bn_encoded) + lnk_encoded = lnk.encode() + llen = len(lnk_encoded) + return struct.pack(cls._fmt_symlink(blen, llen), + st['uid'], st['gid'], + gf.encode(), st['mode'], bn_encoded, + lnk_encoded) +else: + def pipe(): + (r, w) = os.pipe() + return (r, w) + + # Raw conversion of bytearray to string + def bytearray_to_str(byte_arr): + return byte_arr + + # Raw conversion of string to bytearray + def str_to_bytearray(string): + return string + + def gr_create_string_buffer(size): + return create_string_buffer('\0', size) + + def gr_query_xattr(cls, path, size, syscall, attr=None): + if attr: + return cls._query_xattr(path, size, syscall, attr) + else: + return cls._query_xattr(path, size, syscall) + + def gr_lsetxattr(cls, path, attr, val): + return cls.libc.lsetxattr(path, attr, val, len(val), 0) + + def gr_lremovexattr(cls, path, attr): + return cls.libc.lremovexattr(path, attr) + + def gr_cl_register(libgfapi, brick, path, log_file, log_level, retries): + return libgfapi.gf_changelog_register(brick, path, log_file, + log_level, retries) + + def gr_cl_done(libgfapi, clfile): + return libgfapi.gf_changelog_done(clfile) + + def gr_cl_history_changelog(libgfapi, changelog_path, start, end, num_parallel, + actual_end): + return libgfapi.gf_history_changelog(changelog_path, start, end, + num_parallel, actual_end) + + def gr_cl_history_done(libgfapi, clfile): + return libgfapi.gf_history_changelog_done(clfile) + + # regular file + + def entry_pack_reg(cls, gf, bn, mo, uid, gid): + blen = len(bn) + return struct.pack(cls._fmt_mknod(blen), + uid, gid, gf, mo, bn, + stat.S_IMODE(mo), 0, umask()) + + def entry_pack_reg_stat(cls, gf, bn, st): + blen = len(bn) + mo = st['mode'] + return struct.pack(cls._fmt_mknod(blen), + st['uid'], st['gid'], + gf, mo, bn, + stat.S_IMODE(mo), 0, umask()) + # mkdir + + def entry_pack_mkdir(cls, gf, bn, mo, uid, gid): + blen = len(bn) + return struct.pack(cls._fmt_mkdir(blen), + uid, gid, gf, mo, bn, + stat.S_IMODE(mo), umask()) + # symlink + + def entry_pack_symlink(cls, gf, bn, lnk, st): + blen = len(bn) + llen = len(lnk) + return struct.pack(cls._fmt_symlink(blen, llen), + st['uid'], st['gid'], + gf, st['mode'], bn, lnk) diff --git a/geo-replication/syncdaemon/rconf.py b/geo-replication/syncdaemon/rconf.py new file mode 100644 index 00000000000..ff716ee4d6d --- /dev/null +++ b/geo-replication/syncdaemon/rconf.py @@ -0,0 +1,31 @@ +# +# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com> +# This file is part of GlusterFS. + +# This file is licensed to you under your choice of the GNU Lesser +# General Public License, version 3 or any later version (LGPLv3 or +# later), or the GNU General Public License, version 2 (GPLv2), in all +# cases as published by the Free Software Foundation. +# + + +class RConf(object): + + """singleton class to store runtime globals + shared between gsyncd modules""" + + ssh_ctl_dir = None + ssh_ctl_args = None + cpid = None + pid_file_owned = False + log_exit = False + permanent_handles = [] + log_metadata = {} + mgmt_lock_fd = None + args = None + turns = 0 + mountbroker = False + mount_point = None + mbr_umount_cmd = [] + +rconf = RConf() diff --git a/geo-replication/syncdaemon/repce.py b/geo-replication/syncdaemon/repce.py index 755fb61df48..c622afa6373 100644 --- a/geo-replication/syncdaemon/repce.py +++ b/geo-replication/syncdaemon/repce.py @@ -1,32 +1,40 @@ +# +# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com> +# This file is part of GlusterFS. + +# This file is licensed to you under your choice of the GNU Lesser +# General Public License, version 3 or any later version (LGPLv3 or +# later), or the GNU General Public License, version 2 (GPLv2), in all +# cases as published by the Free Software Foundation. +# + import os import sys import time import logging from threading import Condition try: - import thread -except ImportError: - # py 3 import _thread as thread -try: - from Queue import Queue except ImportError: - # py 3 + import thread +try: from queue import Queue +except ImportError: + from Queue import Queue try: import cPickle as pickle except ImportError: - # py 3 import pickle -from syncdutils import Thread, select +from syncdutils import Thread, select, lf -pickle_proto = -1 +pickle_proto = 2 repce_version = 1.0 + def ioparse(i, o): if isinstance(i, int): - i = os.fdopen(i) + i = os.fdopen(i, 'rb') # rely on duck typing for recognizing # streams as that works uniformly # in py2 and py3 @@ -34,6 +42,7 @@ def ioparse(i, o): o = o.fileno() return (i, o) + def send(out, *args): """pickle args and write out wholly in one syscall @@ -43,12 +52,21 @@ def send(out, *args): """ os.write(out, pickle.dumps(args, pickle_proto)) + def recv(inf): - """load an object from input stream""" - return pickle.load(inf) + """load an object from input stream + python2 and python3 compatibility, inf is sys.stdin + and is opened as text stream by default. Hence using the + buffer attribute in python3 + """ + if hasattr(inf, "buffer"): + return pickle.load(inf.buffer) + else: + return pickle.load(inf) class RepceServer(object): + """RePCe is Hungarian for canola, http://hu.wikipedia.org/wiki/Repce ... also our homebrewed RPC backend where the transport layer is @@ -95,16 +113,17 @@ class RepceServer(object): if rmeth == '__repce_version__': res = repce_version else: - try: - res = getattr(self.obj, rmeth)(*in_data[2:]) - except: - res = sys.exc_info()[1] - exc = True - logging.exception("call failed: ") + try: + res = getattr(self.obj, rmeth)(*in_data[2:]) + except: + res = sys.exc_info()[1] + exc = True + logging.exception("call failed: ") send(self.out, rid, exc, res) class RepceJob(object): + """class representing message status we can use for waiting on reply""" @@ -137,6 +156,7 @@ class RepceJob(object): class RepceClient(object): + """RePCe is Hungarian for canola, http://hu.wikipedia.org/wiki/Repce ... also our homebrewed RPC backend where the transport layer is @@ -148,7 +168,7 @@ class RepceClient(object): def __init__(self, i, o): self.inf, self.out = ioparse(i, o) self.jtab = {} - t = Thread(target = self.listen) + t = Thread(target=self.listen) t.start() def listen(self): @@ -177,25 +197,33 @@ class RepceClient(object): return rjob def __call__(self, meth, *args): - """RePCe client is callabe, calling it implements a synchronous remote call + """RePCe client is callabe, calling it implements a synchronous + remote call. - We do a .push with a cbk which does a wakeup upon receiving anwser, then wait - on the RepceJob. + We do a .push with a cbk which does a wakeup upon receiving answer, + then wait on the RepceJob. """ - rjob = self.push(meth, *args, **{'cbk': lambda rj, res: rj.wakeup(res)}) + rjob = self.push( + meth, *args, **{'cbk': lambda rj, res: rj.wakeup(res)}) exc, res = rjob.wait() if exc: - logging.error('call %s (%s) failed on peer with %s' % (repr(rjob), meth, str(type(res).__name__))) + logging.error(lf('call failed', + call=repr(rjob), + method=meth, + error=str(type(res).__name__))) raise res logging.debug("call %s %s -> %s" % (repr(rjob), meth, repr(res))) return res class mprx(object): - """method proxy, standard trick to implement rubyesque method_missing - in Python - A class is a closure factory, you know what I mean, or go read some SICP. + """method proxy, standard trick to implement rubyesque + method_missing in Python + + A class is a closure factory, you know what I mean, or go read + some SICP. """ + def __init__(self, ins, meth): self.ins = ins self.meth = meth diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py index 4b0183b981d..f12c7ceaa36 100644 --- a/geo-replication/syncdaemon/resource.py +++ b/geo-replication/syncdaemon/resource.py @@ -1,256 +1,59 @@ +# +# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com> +# This file is part of GlusterFS. + +# This file is licensed to you under your choice of the GNU Lesser +# General Public License, version 3 or any later version (LGPLv3 or +# later), or the GNU General Public License, version 2 (GPLv2), in all +# cases as published by the Free Software Foundation. +# + import re import os import sys import stat import time import fcntl -import errno import types import struct -import socket import logging import tempfile -import threading import subprocess -from errno import EEXIST, ENOENT, ENODATA, ENOTDIR, ELOOP, EISDIR, ENOTEMPTY -from select import error as SelectError +from errno import (EEXIST, ENOENT, ENODATA, ENOTDIR, ELOOP, EACCES, + EISDIR, ENOTEMPTY, ESTALE, EINVAL, EBUSY, EPERM) +import errno + +from rconf import rconf +import gsyncdconfig as gconf +import libgfchangelog -from gconf import gconf import repce from repce import RepceServer, RepceClient -from master import gmaster_builder +from master import gmaster_builder import syncdutils -from syncdutils import GsyncdError, select, privileged, boolify, funcode -from syncdutils import umask, entry2pb, gauxpfx, errno_wrap - -UrlRX = re.compile('\A(\w+)://([^ *?[]*)\Z') -HostRX = re.compile('[a-z\d](?:[a-z\d.-]*[a-z\d])?', re.I) -UserRX = re.compile("[\w!\#$%&'*+-\/=?^_`{|}~]+") - -def sup(x, *a, **kw): - """a rubyesque "super" for python ;) - - invoke caller method in parent class with given args. - """ - return getattr(super(type(x), x), sys._getframe(1).f_code.co_name)(*a, **kw) - -def desugar(ustr): - """transform sugared url strings to standard <scheme>://<urlbody> form - - parsing logic enforces the constraint that sugared forms should contatin - a ':' or a '/', which ensures that sugared urls do not conflict with - gluster volume names. - """ - m = re.match('([^:]*):(.*)', ustr) - if m: - if not m.groups()[0]: - return "gluster://localhost" + ustr - elif '@' in m.groups()[0] or re.search('[:/]', m.groups()[1]): - return "ssh://" + ustr - else: - return "gluster://" + ustr - else: - if ustr[0] != '/': - raise GsyncdError("cannot resolve sugared url '%s'" % ustr) - ap = os.path.normpath(ustr) - if ap.startswith('//'): - ap = ap[1:] - return "file://" + ap - -def gethostbyname(hnam): - """gethostbyname wrapper""" - try: - return socket.gethostbyname(hnam) - except socket.gaierror: - ex = sys.exc_info()[1] - raise GsyncdError("failed to resolve %s: %s" % \ - (hnam, ex.strerror)) - -def parse_url(ustr): - """instantiate an url object by scheme-to-class dispatch - - The url classes taken into consideration are the ones in - this module whose names are full-caps. - """ - m = UrlRX.match(ustr) - if not m: - ustr = desugar(ustr) - m = UrlRX.match(ustr) - if not m: - raise GsyncdError("malformed url") - sch, path = m.groups() - this = sys.modules[__name__] - if not hasattr(this, sch.upper()): - raise GsyncdError("unknown url scheme " + sch) - return getattr(this, sch.upper())(path) - - -class _MetaXattr(object): - """singleton class, a lazy wrapper around the - libcxattr module - - libcxattr (a heavy import due to ctypes) is - loaded only when when the single - instance is tried to be used. - - This reduces runtime for those invocations - which do not need filesystem manipulation - (eg. for config, url parsing) - """ - - def __getattr__(self, meth): - from libcxattr import Xattr as LXattr - xmeth = [ m for m in dir(LXattr) if m[0] != '_' ] - if not meth in xmeth: - return - for m in xmeth: - setattr(self, m, getattr(LXattr, m)) - return getattr(self, meth) - -class _MetaChangelog(object): - def __getattr__(self, meth): - from libgfchangelog import Changes as LChanges - xmeth = [ m for m in dir(LChanges) if m[0] != '_' ] - if not meth in xmeth: - return - for m in xmeth: - setattr(self, m, getattr(LChanges, m)) - return getattr(self, meth) - -Xattr = _MetaXattr() -Changes = _MetaChangelog() +from syncdutils import (GsyncdError, select, privileged, funcode, + entry2pb, gauxpfx, errno_wrap, lstat, + NoStimeAvailable, PartialHistoryAvailable, + ChangelogException, ChangelogHistoryNotAvailable, + get_changelog_log_level, get_rsync_version, + GX_GFID_CANONICAL_LEN, + gf_mount_ready, lf, Popen, sup, + Xattr, matching_disk_gfid, get_gfid_from_mnt, + unshare_propagation_supported, get_slv_dir_path) +from gsyncdstatus import GeorepStatus +from py2py3 import (pipe, str_to_bytearray, entry_pack_reg, + entry_pack_reg_stat, entry_pack_mkdir, + entry_pack_symlink) -class Popen(subprocess.Popen): - """customized subclass of subprocess.Popen with a ring - buffer for children error output""" +ENOTSUP = getattr(errno, 'ENOTSUP', 'EOPNOTSUPP') - @classmethod - def init_errhandler(cls): - """start the thread which handles children's error output""" - cls.errstore = {} - def tailer(): - while True: - errstore = cls.errstore.copy() - try: - poe, _ ,_ = select([po.stderr for po in errstore], [], [], 1) - except (ValueError, SelectError): - continue - for po in errstore: - if po.stderr not in poe: - continue - po.lock.acquire() - try: - if po.on_death_row: - continue - la = errstore[po] - try: - fd = po.stderr.fileno() - except ValueError: # file is already closed - continue - l = os.read(fd, 1024) - if not l: - continue - tots = len(l) - for lx in la: - tots += len(lx) - while tots > 1<<20 and la: - tots -= len(la.pop(0)) - la.append(l) - finally: - po.lock.release() - t = syncdutils.Thread(target = tailer) - t.start() - cls.errhandler = t - - @classmethod - def fork(cls): - """fork wrapper that restarts errhandler thread in child""" - pid = os.fork() - if not pid: - cls.init_errhandler() - return pid - - def __init__(self, args, *a, **kw): - """customizations for subprocess.Popen instantiation - - - 'close_fds' is taken to be the default - - if child's stderr is chosen to be managed, - register it with the error handler thread - """ - self.args = args - if 'close_fds' not in kw: - kw['close_fds'] = True - self.lock = threading.Lock() - self.on_death_row = False - try: - sup(self, args, *a, **kw) - except: - ex = sys.exc_info()[1] - if not isinstance(ex, OSError): - raise - raise GsyncdError("""execution of "%s" failed with %s (%s)""" % \ - (args[0], errno.errorcode[ex.errno], os.strerror(ex.errno))) - if kw.get('stderr') == subprocess.PIPE: - assert(getattr(self, 'errhandler', None)) - self.errstore[self] = [] - - def errlog(self): - """make a log about child's failure event""" - filling = "" - if self.elines: - filling = ", saying:" - logging.error("""command "%s" returned with %s%s""" % \ - (" ".join(self.args), repr(self.returncode), filling)) - lp = '' - def logerr(l): - logging.error(self.args[0] + "> " + l) - for l in self.elines: - ls = l.split('\n') - ls[0] = lp + ls[0] - lp = ls.pop() - for ll in ls: - logerr(ll) - if lp: - logerr(lp) - - def errfail(self): - """fail nicely if child did not terminate with success""" - self.errlog() - syncdutils.finalize(exval = 1) - - def terminate_geterr(self, fail_on_err = True): - """kill child, finalize stderr harvesting (unregister - from errhandler, set up .elines), fail on error if - asked for - """ - self.lock.acquire() - try: - self.on_death_row = True - finally: - self.lock.release() - elines = self.errstore.pop(self) - if self.poll() == None: - self.terminate() - if self.poll() == None: - time.sleep(0.1) - self.kill() - self.wait() - while True: - if not select([self.stderr],[],[],0.1)[0]: - break - b = os.read(self.stderr.fileno(), 1024) - if b: - elines.append(b) - else: - break - self.stderr.close() - self.elines = elines - if fail_on_err and self.returncode != 0: - self.errfail() +slv_volume = None +slv_host = None class Server(object): + """singleton implemening those filesystem access primitives which are needed for geo-replication functionality @@ -260,22 +63,27 @@ class Server(object): GX_NSPACE_PFX = (privileged() and "trusted" or "system") GX_NSPACE = GX_NSPACE_PFX + ".glusterfs" - NTV_FMTSTR = "!" + "B"*19 + "II" + NTV_FMTSTR = "!" + "B" * 19 + "II" FRGN_XTRA_FMT = "I" FRGN_FMTSTR = NTV_FMTSTR + FRGN_XTRA_FMT - GX_GFID_CANONICAL_LEN = 37 # canonical gfid len + '\0' + + # for backend gfid fetch, do not use GX_NSPACE_PFX + GFID_XATTR = 'trusted.gfid' + GFID_FMTSTR = "!" + "B" * 16 local_path = '' @classmethod def _fmt_mknod(cls, l): - return "!II%dsI%dsIII" % (cls.GX_GFID_CANONICAL_LEN, l+1) + return "!II%dsI%dsIII" % (GX_GFID_CANONICAL_LEN, l + 1) + @classmethod def _fmt_mkdir(cls, l): - return "!II%dsI%dsII" % (cls.GX_GFID_CANONICAL_LEN, l+1) + return "!II%dsI%dsII" % (GX_GFID_CANONICAL_LEN, l + 1) + @classmethod def _fmt_symlink(cls, l1, l2): - return "!II%dsI%ds%ds" % (cls.GX_GFID_CANONICAL_LEN, l1+1, l2+1) + return "!II%dsI%ds%ds" % (GX_GFID_CANONICAL_LEN, l1 + 1, l2 + 1) def _pathguard(f): """decorator method that checks @@ -286,14 +94,15 @@ class Server(object): fc = funcode(f) pi = list(fc.co_varnames).index('path') - def ff(*a): - path = a[pi] + + def ff(*args): + path = args[pi] ps = path.split('/') if path[0] == '/' or '..' in ps: raise ValueError('unsafe path') - a = list(a) - a[pi] = os.path.join(a[0].local_path, path) - return f(*a) + args = list(args) + args[pi] = os.path.join(args[0].local_path, path) + return f(*args) return ff @classmethod @@ -307,6 +116,45 @@ class Server(object): @classmethod @_pathguard + def lstat(cls, path): + try: + return os.lstat(path) + except (IOError, OSError): + ex = sys.exc_info()[1] + if ex.errno == ENOENT: + return ex.errno + else: + raise + + @classmethod + @_pathguard + def linkto_check(cls, path): + try: + return not ( + Xattr.lgetxattr_buf(path, + 'trusted.glusterfs.dht.linkto') == '') + except (IOError, OSError): + ex = sys.exc_info()[1] + if ex.errno in (ENOENT, ENODATA): + return False + else: + raise + + @classmethod + @_pathguard + def gfid(cls, path): + buf = errno_wrap(Xattr.lgetxattr, [path, cls.GFID_XATTR, 16], + [ENOENT], [ESTALE, ENODATA]) + if buf == ENOENT: + return buf + else: + buf = str_to_bytearray(buf) + m = re.match('(.{8})(.{4})(.{4})(.{4})(.{12})', "".join( + ['%02x' % x for x in struct.unpack(cls.GFID_FMTSTR, buf)])) + return '-'.join(m.groups()) + + @classmethod + @_pathguard def purge(cls, path, entries=None): """force-delete subtrees @@ -322,7 +170,7 @@ class Server(object): for e in entries: cls.purge(os.path.join(path, e)) """ - me_also = entries == None + me_also = entries is None if not entries: try: # if it's a symlink, prevent @@ -388,7 +236,11 @@ class Server(object): """ try: - return struct.unpack('!II', Xattr.lgetxattr(path, '.'.join([cls.GX_NSPACE, uuid, 'xtime']), 8)) + val = Xattr.lgetxattr(path, + '.'.join([cls.GX_NSPACE, uuid, 'xtime']), + 8) + val = str_to_bytearray(val) + return struct.unpack('!II', val) except OSError: ex = sys.exc_info()[1] if ex.errno in (ENOENT, ENODATA, ENOTDIR): @@ -397,135 +249,519 @@ class Server(object): raise @classmethod - def gfid(cls, gfidpath): - return errno_wrap(Xattr.lgetxattr, [gfidpath, 'glusterfs.gfid', cls.GX_GFID_CANONICAL_LEN], [ENOENT]) + @_pathguard + def stime_mnt(cls, path, uuid): + """query xtime extended attribute + + Return xtime of @path for @uuid as a pair of integers. + "Normal" errors due to non-existent @path or extended attribute + are tolerated and errno is returned in such a case. + """ + + try: + val = Xattr.lgetxattr(path, + '.'.join([cls.GX_NSPACE, uuid, 'stime']), + 8) + val = str_to_bytearray(val) + return struct.unpack('!II', val) + except OSError: + ex = sys.exc_info()[1] + if ex.errno in (ENOENT, ENODATA, ENOTDIR): + return ex.errno + else: + raise + + @classmethod + @_pathguard + def stime(cls, path, uuid): + """query xtime extended attribute + + Return xtime of @path for @uuid as a pair of integers. + "Normal" errors due to non-existent @path or extended attribute + are tolerated and errno is returned in such a case. + """ + + try: + val = Xattr.lgetxattr(path, + '.'.join([cls.GX_NSPACE, uuid, 'stime']), + 8) + val = str_to_bytearray(val) + return struct.unpack('!II', val) + except OSError: + ex = sys.exc_info()[1] + if ex.errno in (ENOENT, ENODATA, ENOTDIR): + return ex.errno + else: + raise + + @classmethod + @_pathguard + def entry_stime(cls, path, uuid): + """ + entry_stime xattr to reduce the number of retry of Entry changes when + Geo-rep worker crashes and restarts. entry_stime is updated after + processing every changelog file. On failure and restart, worker only + have to reprocess the last changelog for Entry ops. + Xattr Key: <PFX>.<MASTERVOL_UUID>.<SLAVEVOL_UUID>.entry_stime + """ + try: + val = Xattr.lgetxattr(path, + '.'.join([cls.GX_NSPACE, uuid, + 'entry_stime']), + 8) + val = str_to_bytearray(val) + return struct.unpack('!II', val) + except OSError: + ex = sys.exc_info()[1] + if ex.errno in (ENOENT, ENODATA, ENOTDIR): + return ex.errno + else: + raise @classmethod def node_uuid(cls, path='.'): try: - uuid_l = Xattr.lgetxattr_buf(path, '.'.join([cls.GX_NSPACE, 'node-uuid'])) + uuid_l = Xattr.lgetxattr_buf( + path, '.'.join([cls.GX_NSPACE, 'node-uuid'])) return uuid_l[:-1].split(' ') except OSError: raise @classmethod - def xtime_vec(cls, path, *uuids): - """vectored version of @xtime + @_pathguard + def set_stime(cls, path, uuid, mark): + """set @mark as stime for @uuid on @path""" + errno_wrap(Xattr.lsetxattr, + [path, + '.'.join([cls.GX_NSPACE, uuid, 'stime']), + struct.pack('!II', *mark)], + [ENOENT], + [ESTALE, EINVAL]) - accepts a list of uuids and returns a dictionary - with uuid as key(s) and xtime as value(s) - """ - xt = {} - for uuid in uuids: - xtu = cls.xtime(path, uuid) - if xtu == ENODATA: - xtu = None - if isinstance(xtu, int): - return xtu - xt[uuid] = xtu - return xt + @classmethod + @_pathguard + def set_entry_stime(cls, path, uuid, mark): + """set @mark as stime for @uuid on @path""" + errno_wrap(Xattr.lsetxattr, + [path, + '.'.join([cls.GX_NSPACE, uuid, 'entry_stime']), + struct.pack('!II', *mark)], + [ENOENT], + [ESTALE, EINVAL]) @classmethod @_pathguard def set_xtime(cls, path, uuid, mark): """set @mark as xtime for @uuid on @path""" - Xattr.lsetxattr(path, '.'.join([cls.GX_NSPACE, uuid, 'xtime']), struct.pack('!II', *mark)) + errno_wrap(Xattr.lsetxattr, + [path, + '.'.join([cls.GX_NSPACE, uuid, 'xtime']), + struct.pack('!II', *mark)], + [ENOENT], + [ESTALE, EINVAL]) @classmethod - def set_xtime_vec(cls, path, mark_dct): - """vectored (or dictered) version of set_xtime - - ignore values that match @ignore + @_pathguard + def set_xtime_remote(cls, path, uuid, mark): + """ + set @mark as xtime for @uuid on @path + the difference b/w this and set_xtime() being + set_xtime() being overloaded to set the xtime + on the brick (this method sets xtime on the + remote slave) """ - for u,t in mark_dct.items(): - cls.set_xtime(path, u, t) + Xattr.lsetxattr( + path, '.'.join([cls.GX_NSPACE, uuid, 'xtime']), + struct.pack('!II', *mark)) @classmethod def entry_ops(cls, entries): pfx = gauxpfx() logging.debug('entries: %s' % repr(entries)) - # regular file - def entry_pack_reg(gf, bn, st): - blen = len(bn) - mo = st['mode'] - return struct.pack(cls._fmt_mknod(blen), - st['uid'], st['gid'], - gf, mo, bn, - stat.S_IMODE(mo), 0, umask()) - # mkdir - def entry_pack_mkdir(gf, bn, st): - blen = len(bn) - mo = st['mode'] - return struct.pack(cls._fmt_mkdir(blen), - st['uid'], st['gid'], - gf, mo, bn, - stat.S_IMODE(mo), umask()) - #symlink - def entry_pack_symlink(gf, bn, lnk, st): - blen = len(bn) - llen = len(lnk) - return struct.pack(cls._fmt_symlink(blen, llen), - st['uid'], st['gid'], - gf, st['mode'], bn, lnk) - def entry_purge(entry, gfid): + dist_count = rconf.args.master_dist_count + + def entry_purge(op, entry, gfid, e, uid, gid): # This is an extremely racy code and needs to be fixed ASAP. # The GFID check here is to be sure that the pargfid/bname # to be purged is the GFID gotten from the changelog. # (a stat(changelog_gfid) would also be valid here) # The race here is between the GFID check and the purge. - disk_gfid = cls.gfid(entry) - if isinstance(disk_gfid, int): + + # If the entry or the gfid of the file to be deleted is not present + # on slave, we can ignore the unlink/rmdir + if isinstance(lstat(entry), int) or \ + isinstance(lstat(os.path.join(pfx, gfid)), int): return - if not gfid == disk_gfid: + + if not matching_disk_gfid(gfid, entry): + collect_failure(e, EEXIST, uid, gid) + return + + if op == 'UNLINK': + er = errno_wrap(os.unlink, [entry], [ENOENT, ESTALE], [EBUSY]) + # EISDIR is safe error, ignore. This can only happen when + # unlink is sent from master while fixing gfid conflicts. + if er != EISDIR: + return er + + elif op == 'RMDIR': + er = errno_wrap(os.rmdir, [entry], [ENOENT, ESTALE, + ENOTEMPTY], [EBUSY]) + if er == ENOTEMPTY: + return er + + def collect_failure(e, cmd_ret, uid, gid, dst=False): + slv_entry_info = {} + slv_entry_info['gfid_mismatch'] = False + slv_entry_info['name_mismatch'] = False + slv_entry_info['dst'] = dst + slv_entry_info['slave_isdir'] = False + slv_entry_info['slave_name'] = None + slv_entry_info['slave_gfid'] = None + # We do this for failing fops on Slave + # Master should be logging this + if cmd_ret is None: + return False + + if e.get("stat", {}): + # Copy actual UID/GID value back to entry stat + e['stat']['uid'] = uid + e['stat']['gid'] = gid + + if cmd_ret in [EEXIST, ESTALE]: + if dst: + en = e['entry1'] + else: + en = e['entry'] + disk_gfid = get_gfid_from_mnt(en) + if isinstance(disk_gfid, str) and \ + e['gfid'] != disk_gfid: + slv_entry_info['gfid_mismatch'] = True + st = lstat(en) + if not isinstance(st, int): + if st and stat.S_ISDIR(st.st_mode): + slv_entry_info['slave_isdir'] = True + dir_name = get_slv_dir_path(slv_host, slv_volume, + disk_gfid) + slv_entry_info['slave_name'] = dir_name + else: + slv_entry_info['slave_isdir'] = False + slv_entry_info['slave_gfid'] = disk_gfid + failures.append((e, cmd_ret, slv_entry_info)) + else: + return False + else: + failures.append((e, cmd_ret, slv_entry_info)) + + return True + + failures = [] + + def recursive_rmdir(gfid, entry, path): + """disk_gfid check added for original path for which + recursive_delete is called. This disk gfid check executed + before every Unlink/Rmdir. If disk gfid is not matching + with GFID from Changelog, that means other worker + deleted the directory. Even if the subdir/file present, + it belongs to different parent. Exit without performing + further deletes. + """ + if not matching_disk_gfid(gfid, entry): return - er = errno_wrap(os.unlink, [entry], [ENOENT, EISDIR]) - if isinstance(er, int): + + names = [] + names = errno_wrap(os.listdir, [path], [ENOENT], [ESTALE, ENOTSUP]) + if isinstance(names, int): + return + + for name in names: + fullname = os.path.join(path, name) + if not matching_disk_gfid(gfid, entry): + return + er = errno_wrap(os.remove, [fullname], [ENOENT, ESTALE, + EISDIR], [EBUSY]) + if er == EISDIR: - er = errno_wrap(os.rmdir, [entry], [ENOENT, ENOTEMPTY]) - if er == ENOTEMPTY: - return er + recursive_rmdir(gfid, entry, fullname) + + if not matching_disk_gfid(gfid, entry): + return + + errno_wrap(os.rmdir, [path], [ENOENT, ESTALE], [EBUSY]) + + def rename_with_disk_gfid_confirmation(gfid, entry, en, uid, gid): + if not matching_disk_gfid(gfid, entry): + logging.error(lf("RENAME ignored: source entry does not match " + "with on-disk gfid", + source=entry, + gfid=gfid, + disk_gfid=get_gfid_from_mnt(entry), + target=en)) + collect_failure(e, EEXIST, uid, gid) + return + + cmd_ret = errno_wrap(os.rename, + [entry, en], + [ENOENT, EEXIST], [ESTALE, EBUSY]) + collect_failure(e, cmd_ret, uid, gid) + for e in entries: blob = None op = e['op'] gfid = e['gfid'] entry = e['entry'] + uid = 0 + gid = 0 + + # Skip entry processing if it's marked true during gfid + # conflict resolution + if e['skip_entry']: + continue + + if e.get("stat", {}): + # Copy UID/GID value and then reset to zero. Copied UID/GID + # will be used to run chown once entry is created. + uid = e['stat']['uid'] + gid = e['stat']['gid'] + e['stat']['uid'] = 0 + e['stat']['gid'] = 0 + (pg, bname) = entry2pb(entry) if op in ['RMDIR', 'UNLINK']: - while True: - er = entry_purge(entry, gfid) - if isinstance(er, int): - time.sleep(1) + # Try once, if rmdir failed with ENOTEMPTY + # then delete recursively. + er = entry_purge(op, entry, gfid, e, uid, gid) + if isinstance(er, int): + if er == ENOTEMPTY and op == 'RMDIR': + # Retry if ENOTEMPTY, ESTALE + er1 = errno_wrap(recursive_rmdir, + [gfid, entry, + os.path.join(pg, bname)], + [], [ENOTEMPTY, ESTALE, ENODATA]) + if not isinstance(er1, int): + logging.debug("Removed %s => %s/%s recursively" % + (gfid, pg, bname)) + else: + logging.warn(lf("Recursive remove failed", + gfid=gfid, + pgfid=pg, + bname=bname, + error=os.strerror(er1))) else: - break - elif op == 'CREATE': - blob = entry_pack_reg(gfid, bname, e['stat']) + logging.warn(lf("Failed to remove", + gfid=gfid, + pgfid=pg, + bname=bname, + error=os.strerror(er))) + elif op in ['CREATE', 'MKNOD']: + slink = os.path.join(pfx, gfid) + st = lstat(slink) + # don't create multiple entries with same gfid + if isinstance(st, int): + blob = entry_pack_reg(cls, gfid, bname, + e['mode'], e['uid'], e['gid']) + # Self healed hardlinks are recorded as MKNOD. + # So if the gfid already exists, it should be + # processed as hard link not mknod. + elif op in ['MKNOD']: + cmd_ret = errno_wrap(os.link, + [slink, entry], + [ENOENT, EEXIST], [ESTALE]) + collect_failure(e, cmd_ret, uid, gid) elif op == 'MKDIR': - blob = entry_pack_mkdir(gfid, bname, e['stat']) + en = e['entry'] + slink = os.path.join(pfx, gfid) + st = lstat(slink) + # don't create multiple entries with same gfid + if isinstance(st, int): + blob = entry_pack_mkdir(cls, gfid, bname, + e['mode'], e['uid'], e['gid']) + elif (isinstance(lstat(en), int) or + not matching_disk_gfid(gfid, en)): + # If gfid of a directory exists on slave but path based + # create is getting EEXIST. This means the directory is + # renamed in master but recorded as MKDIR during hybrid + # crawl. Get the directory path by reading the backend + # symlink and trying to rename to new name as said by + # master. + logging.info(lf("Special case: rename on mkdir", + gfid=gfid, entry=repr(entry))) + src_entry = get_slv_dir_path(slv_host, slv_volume, gfid) + if src_entry is None: + collect_failure(e, ENOENT, uid, gid) + if src_entry is not None and src_entry != entry: + slv_entry_info = {} + slv_entry_info['gfid_mismatch'] = False + slv_entry_info['name_mismatch'] = True + slv_entry_info['dst'] = False + slv_entry_info['slave_isdir'] = True + slv_entry_info['slave_gfid'] = gfid + slv_entry_info['slave_entry'] = src_entry + + failures.append((e, EEXIST, slv_entry_info)) elif op == 'LINK': - errno_wrap(os.link, [os.path.join(pfx, gfid), entry], [ENOENT, EEXIST]) + slink = os.path.join(pfx, gfid) + st = lstat(slink) + if isinstance(st, int): + (pg, bname) = entry2pb(entry) + if stat.S_ISREG(e['stat']['mode']): + blob = entry_pack_reg_stat(cls, gfid, bname, e['stat']) + elif stat.S_ISLNK(e['stat']['mode']): + blob = entry_pack_symlink(cls, gfid, bname, e['link'], + e['stat']) + else: + cmd_ret = errno_wrap(os.link, + [slink, entry], + [ENOENT, EEXIST], [ESTALE]) + collect_failure(e, cmd_ret, uid, gid) elif op == 'SYMLINK': - blob = entry_pack_symlink(gfid, bname, e['link'], e['stat']) + en = e['entry'] + st = lstat(entry) + if isinstance(st, int): + blob = entry_pack_symlink(cls, gfid, bname, e['link'], + e['stat']) + elif not matching_disk_gfid(gfid, en): + collect_failure(e, EEXIST, uid, gid) elif op == 'RENAME': en = e['entry1'] - errno_wrap(os.rename, [entry, en], [ENOENT, EEXIST]) + # The matching disk gfid check validates two things + # 1. Validates name is present, return false otherwise + # 2. Validates gfid is same, returns false otherwise + # So both validations are necessary to decide src doesn't + # exist. We can't rely on only gfid stat as hardlink could + # be present and we can't rely only on name as name could + # exist with different gfid. + if not matching_disk_gfid(gfid, entry): + if e['stat'] and not stat.S_ISDIR(e['stat']['mode']): + if stat.S_ISLNK(e['stat']['mode']): + # src is not present, so don't sync symlink as + # we don't know target. It's ok to ignore. If + # it's unliked, it's fine. If it's renamed to + # something else, it will be synced then. + if e['link'] is not None: + st1 = lstat(en) + if isinstance(st1, int): + (pg, bname) = entry2pb(en) + blob = entry_pack_symlink(cls, gfid, bname, + e['link'], + e['stat']) + elif not matching_disk_gfid(gfid, en): + collect_failure(e, EEXIST, uid, gid, True) + else: + slink = os.path.join(pfx, gfid) + st = lstat(slink) + # don't create multiple entries with same gfid + if isinstance(st, int): + (pg, bname) = entry2pb(en) + blob = entry_pack_reg_stat(cls, gfid, bname, + e['stat']) + else: + cmd_ret = errno_wrap(os.link, [slink, en], + [ENOENT, EEXIST], [ESTALE]) + collect_failure(e, cmd_ret, uid, gid) + else: + st = lstat(entry) + st1 = lstat(en) + if isinstance(st1, int): + rename_with_disk_gfid_confirmation(gfid, entry, en, + uid, gid) + else: + if st.st_ino == st1.st_ino: + # we have a hard link, we can now unlink source + try: + errno_wrap(os.unlink, [entry], + [ENOENT, ESTALE], [EBUSY]) + except OSError as e: + if e.errno == EISDIR: + try: + errno_wrap(os.rmdir, [entry], + [ENOENT, ESTALE], [EBUSY]) + except OSError as e: + if e.errno == ENOTEMPTY: + logging.error( + lf("Directory Rename failed. " + "Both Old and New" + " directories exists", + old=entry, + new=en)) + else: + raise + else: + raise + elif not matching_disk_gfid(gfid, en) and dist_count > 1: + collect_failure(e, EEXIST, uid, gid, True) + else: + # We are here which means matching_disk_gfid for + # both source and destination has returned false + # and distribution count for master vol is greater + # then one. Which basically says both the source and + # destination exist and not hardlinks. + # So we are safe to go ahead with rename here. + rename_with_disk_gfid_confirmation(gfid, entry, en, + uid, gid) if blob: - errno_wrap(Xattr.lsetxattr_l, [pg, 'glusterfs.gfid.newfile', blob], [ENOENT, EEXIST]) + cmd_ret = errno_wrap(Xattr.lsetxattr, + [pg, 'glusterfs.gfid.newfile', blob], + [EEXIST, ENOENT, ESTALE], + [ESTALE, EINVAL, EBUSY]) + collect_failure(e, cmd_ret, uid, gid) + + # If UID/GID is different than zero that means we are trying + # create Entry with different UID/GID. Create Entry with + # UID:0 and GID:0, and then call chown to set UID/GID + if uid != 0 or gid != 0: + path = os.path.join(pfx, gfid) + cmd_ret = errno_wrap(os.lchown, [path, uid, gid], [ENOENT], + [ESTALE, EINVAL]) + collect_failure(e, cmd_ret, uid, gid) + + return failures @classmethod - def changelog_register(cls, cl_brick, cl_dir, cl_log, cl_level, retries = 0): - Changes.cl_register(cl_brick, cl_dir, cl_log, cl_level, retries) - - @classmethod - def changelog_scan(cls): - Changes.cl_scan() - - @classmethod - def changelog_getchanges(cls): - return Changes.cl_getchanges() - - @classmethod - def changelog_done(cls, clfile): - Changes.cl_done(clfile) + def meta_ops(cls, meta_entries): + logging.debug('Meta-entries: %s' % repr(meta_entries)) + failures = [] + for e in meta_entries: + mode = e['stat']['mode'] + uid = e['stat']['uid'] + gid = e['stat']['gid'] + atime = e['stat']['atime'] + mtime = e['stat']['mtime'] + go = e['go'] + # Linux doesn't support chmod on symlink itself. + # It is always applied to the target file. So + # changelog would record target file's gfid + # and we are good. But 'chown' is supported on + # symlink file. So changelog would record symlink + # gfid in such cases. Since we do 'chown' 'chmod' + # 'utime' for each gfid recorded for metadata, and + # we know from changelog the metadata is on symlink's + # gfid or target file's gfid, we should be doing + # 'lchown' 'lchmod' 'utime with no-deference' blindly. + # But since 'lchmod' and 'utime with no de-reference' is + # not supported in python3, we have to rely on 'chmod' + # and 'utime with de-reference'. Hence avoiding 'chmod' + # and 'utime' if it's symlink file. + + is_symlink = False + cmd_ret = errno_wrap(os.lchown, [go, uid, gid], [ENOENT], + [ESTALE, EINVAL]) + if isinstance(cmd_ret, int): + continue + + is_symlink = os.path.islink(go) + + if not is_symlink: + cmd_ret = errno_wrap(os.chmod, [go, mode], + [ENOENT, EACCES, EPERM], [ESTALE, EINVAL]) + if isinstance(cmd_ret, int): + failures.append((e, cmd_ret, "chmod")) + + cmd_ret = errno_wrap(os.utime, [go, (atime, mtime)], + [ENOENT, EACCES, EPERM], [ESTALE, EINVAL]) + if isinstance(cmd_ret, int): + failures.append((e, cmd_ret, "utime")) + return failures @classmethod @_pathguard @@ -551,6 +787,7 @@ class Server(object): return os.getpid() last_keep_alive = 0 + @classmethod def keep_alive(cls, dct): """process keepalive messages. @@ -564,9 +801,12 @@ class Server(object): if dct: key = '.'.join([cls.GX_NSPACE, 'volume-mark', dct['uuid']]) val = struct.pack(cls.FRGN_FMTSTR, - *(dct['version'] + - tuple(int(x,16) for x in re.findall('(?:[\da-f]){2}', dct['uuid'])) + - (dct['retval'],) + dct['volume_mark'][0:2] + (dct['timeout'],))) + *(dct['version'] + + tuple(int(x, 16) + for x in re.findall('(?:[\da-f]){2}', + dct['uuid'])) + + (dct['retval'],) + dct['volume_mark'][0:2] + ( + dct['timeout'],))) Xattr.lsetxattr('.', key, val) cls.last_keep_alive += 1 return cls.last_keep_alive @@ -577,168 +817,274 @@ class Server(object): return 1.0 -class SlaveLocal(object): - """mix-in class to implement some factes of a slave server +class Mounter(object): - ("mix-in" is sort of like "abstract class", ie. it's not - instantiated just included in the ancesty DAG. I use "mix-in" - to indicate that it's not used as an abstract base class, - rather just taken in to implement additional functionality - on the basis of the assumed availability of certain interfaces.) - """ + """Abstract base class for mounter backends""" + + def __init__(self, params): + self.params = params + self.mntpt = None + self.umount_cmd = [] - def can_connect_to(self, remote): - """determine our position in the connectibility matrix""" - return not remote + @classmethod + def get_glusterprog(cls): + gluster_cmd_dir = gconf.get("gluster-command-dir") + if rconf.args.subcmd == "slave": + gluster_cmd_dir = gconf.get("slave-gluster-command-dir") + return os.path.join(gluster_cmd_dir, cls.glusterprog) + + def umount_l(self, d): + """perform lazy umount""" + po = Popen(self.make_umount_argv(d), stderr=subprocess.PIPE, + universal_newlines=True) + po.wait() + return po - def service_loop(self): - """start a RePCe server serving self's server + @classmethod + def make_umount_argv(cls, d): + raise NotImplementedError - stop servicing if a timeout is configured and got no - keep-alime in that inteval - """ + def make_mount_argv(self, label=None): + raise NotImplementedError - if boolify(gconf.use_rsync_xattrs) and not privileged(): - raise GsyncdError("using rsync for extended attributes is not supported") + def cleanup_mntpt(self, *a): + pass - repce = RepceServer(self.server, sys.stdin, sys.stdout, int(gconf.sync_jobs)) - t = syncdutils.Thread(target=lambda: (repce.service_loop(), - syncdutils.finalize())) - t.start() - logging.info("slave listening") - if gconf.timeout and int(gconf.timeout) > 0: + def handle_mounter(self, po): + po.wait() + + def inhibit(self, label): + """inhibit a gluster filesystem + + Mount glusterfs over a temporary mountpoint, + change into the mount, and lazy unmount the + filesystem. + """ + mpi, mpo = pipe() + mh = Popen.fork() + if mh: + # Parent + os.close(mpi) + fcntl.fcntl(mpo, fcntl.F_SETFD, fcntl.FD_CLOEXEC) + d = None + margv = self.make_mount_argv(label) + if self.mntpt: + # mntpt is determined pre-mount + d = self.mntpt + mnt_msg = d + '\0' + encoded_msg = mnt_msg.encode() + os.write(mpo, encoded_msg) + po = Popen(margv, **self.mountkw) + self.handle_mounter(po) + po.terminate_geterr() + logging.debug('auxiliary glusterfs mount in place') + if not d: + # mntpt is determined during mount + d = self.mntpt + mnt_msg = d + '\0' + encoded_msg = mnt_msg.encode() + os.write(mpo, encoded_msg) + encoded_msg = 'M'.encode() + os.write(mpo, encoded_msg) + t = syncdutils.Thread(target=lambda: os.chdir(d)) + t.start() + tlim = rconf.starttime + gconf.get("connection-timeout") while True: - lp = self.server.last_keep_alive - time.sleep(int(gconf.timeout)) - if lp == self.server.last_keep_alive: - logging.info("connection inactive for %d seconds, stopping" % int(gconf.timeout)) + if not t.isAlive(): break + + if time.time() >= tlim: + syncdutils.finalize(exval=1) + time.sleep(1) + os.close(mpo) + _, rv = syncdutils.waitpid(mh, 0) + if rv: + rv = (os.WIFEXITED(rv) and os.WEXITSTATUS(rv) or 0) - \ + (os.WIFSIGNALED(rv) and os.WTERMSIG(rv) or 0) + logging.warn(lf('stale mount possibly left behind', + path=d)) + raise GsyncdError("cleaning up temp mountpoint %s " + "failed with status %d" % + (d, rv)) else: - select((), (), ()) + rv = 0 + try: + os.setsid() + os.close(mpo) + mntdata = '' + while True: + c = os.read(mpi, 1) + c = c.decode() + if not c: + break + mntdata += c + if mntdata: + mounted = False + if mntdata[-1] == 'M': + mntdata = mntdata[:-1] + assert(mntdata) + mounted = True + assert(mntdata[-1] == '\0') + mntpt = mntdata[:-1] + assert(mntpt) + + umount_master = False + umount_slave = False + if rconf.args.subcmd == "worker" \ + and not unshare_propagation_supported() \ + and not gconf.get("access-mount"): + umount_master = True + if rconf.args.subcmd == "slave" \ + and not gconf.get("slave-access-mount"): + umount_slave = True + + if mounted and (umount_master or umount_slave): + po = self.umount_l(mntpt) + po.terminate_geterr(fail_on_err=False) + if po.returncode != 0: + po.errlog() + rv = po.returncode + logging.debug("Lazy umount done: %s" % mntpt) + if umount_master or umount_slave: + self.cleanup_mntpt(mntpt) + except: + logging.exception('mount cleanup failure:') + rv = 200 + os._exit(rv) + + #Polling the dht.subvol.status value. + RETRIES = 10 + while not gf_mount_ready(): + if RETRIES < 0: + logging.error('Subvols are not up') + break + RETRIES -= 1 + time.sleep(0.2) -class SlaveRemote(object): - """mix-in class to implement an interface to a remote slave""" + logging.debug('auxiliary glusterfs mount prepared') - def connect_remote(self, rargs=[], **opts): - """connects to a remote slave - Invoke an auxiliary utility (slave gsyncd, possibly wrapped) - which sets up the connection and set up a RePCe client to - communicate throuh its stdio. - """ - slave = opts.get('slave', self.url) - extra_opts = [] - so = getattr(gconf, 'session_owner', None) - if so: - extra_opts += ['--session-owner', so] - if boolify(gconf.use_rsync_xattrs): - extra_opts.append('--use-rsync-xattrs') - po = Popen(rargs + gconf.remote_gsyncd.split() + extra_opts + \ - ['-N', '--listen', '--timeout', str(gconf.timeout), slave], - stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - gconf.transport = po - return self.start_fd_client(po.stdout, po.stdin, **opts) +class DirectMounter(Mounter): - def start_fd_client(self, i, o, **opts): - """set up RePCe client, handshake with server + """mounter backend which calls mount(8), umount(8) directly""" - It's cut out as a separate method to let - subclasses hook into client startup - """ - self.server = RepceClient(i, o) - rv = self.server.__version__() - exrv = {'proto': repce.repce_version, 'object': Server.version()} - da0 = (rv, exrv) - da1 = ({}, {}) - for i in range(2): - for k, v in da0[i].iteritems(): - da1[i][k] = int(v) - if da1[0] != da1[1]: - raise GsyncdError("RePCe major version mismatch: local %s, remote %s" % (exrv, rv)) + mountkw = {'stderr': subprocess.PIPE, 'universal_newlines': True} + glusterprog = 'glusterfs' - def rsync(self, files, *args): - """invoke rsync""" - if not files: - raise GsyncdError("no files to sync") - logging.debug("files: " + ", ".join(files)) - argv = gconf.rsync_command.split() + \ - ['-avR0', '--inplace', '--files-from=-', '--super','--stats', '--numeric-ids', '--no-implied-dirs'] + \ - gconf.rsync_options.split() + (boolify(gconf.use_rsync_xattrs) and ['--xattrs'] or []) + \ - ['.'] + list(args) - po = Popen(argv, stdin=subprocess.PIPE,stderr=subprocess.PIPE) - for f in files: - po.stdin.write(f) - po.stdin.write('\0') - - po.stdin.close() - po.wait() - po.terminate_geterr(fail_on_err = False) + @staticmethod + def make_umount_argv(d): + return ['umount', '-l', d] - return po + def make_mount_argv(self, label=None): + self.mntpt = tempfile.mkdtemp(prefix='gsyncd-aux-mount-') + rconf.mount_point = self.mntpt + return [self.get_glusterprog()] + \ + ['--' + p for p in self.params] + [self.mntpt] + def cleanup_mntpt(self, mntpt=None): + if not mntpt: + mntpt = self.mntpt + errno_wrap(os.rmdir, [mntpt], [ENOENT, EBUSY]) -class AbstractUrl(object): - """abstract base class for url scheme classes""" - def __init__(self, path, pattern): - m = re.search(pattern, path) - if not m: - raise GsyncdError("malformed path") - self.path = path - return m.groups() +class MountbrokerMounter(Mounter): - @property - def scheme(self): - return type(self).__name__.lower() + """mounter backend using the mountbroker gluster service""" - def canonical_path(self): - return self.path + mountkw = {'stderr': subprocess.PIPE, 'stdout': subprocess.PIPE, + 'universal_newlines': True} + glusterprog = 'gluster' - def get_url(self, canonical=False, escaped=False): - """format self's url in various styles""" - if canonical: - pa = self.canonical_path() - else: - pa = self.path - u = "://".join((self.scheme, pa)) - if escaped: - u = syncdutils.escape(u) - return u + @classmethod + def make_cli_argv(cls): + return [cls.get_glusterprog()] + ['--remote-host=localhost'] + \ + gconf.get("gluster-cli-options").split() + ['system::'] - @property - def url(self): - return self.get_url() + @classmethod + def make_umount_argv(cls, d): + return cls.make_cli_argv() + ['umount', d, 'lazy'] + def make_mount_argv(self, label): + return self.make_cli_argv() + \ + ['mount', label, 'user-map-root=' + + syncdutils.getusername()] + self.params - ### Concrete resource classes ### + def handle_mounter(self, po): + self.mntpt = po.stdout.readline()[:-1] + rconf.mount_point = self.mntpt + rconf.mountbroker = True + self.umount_cmd = self.make_cli_argv() + ['umount'] + rconf.mbr_umount_cmd = self.umount_cmd + po.stdout.close() + sup(self, po) + if po.returncode != 0: + # if cli terminated with error due to being + # refused by glusterd, what it put + # out on stdout is a diagnostic message + logging.error(lf('glusterd answered', mnt=self.mntpt)) -class FILE(AbstractUrl, SlaveLocal, SlaveRemote): - """scheme class for file:// urls +class GLUSTERServer(Server): - can be used to represent a file slave server - on slave side, or interface to a remote file - file server on master side - """ + "server enhancements for a glusterfs backend""" - class FILEServer(Server): - """included server flavor""" - pass - - server = FILEServer + @classmethod + def _attr_unpack_dict(cls, xattr, extra_fields=''): + """generic volume mark fetching/parsing backed""" + fmt_string = cls.NTV_FMTSTR + extra_fields + buf = Xattr.lgetxattr('.', xattr, struct.calcsize(fmt_string)) + buf = str_to_bytearray(buf) + vm = struct.unpack(fmt_string, buf) + m = re.match( + '(.{8})(.{4})(.{4})(.{4})(.{12})', + "".join(['%02x' % x for x in vm[2:18]])) + uuid = '-'.join(m.groups()) + volinfo = {'version': vm[0:2], + 'uuid': uuid, + 'retval': vm[18], + 'volume_mark': vm[19:21], + } + if extra_fields: + return volinfo, vm[-len(extra_fields):] + else: + return volinfo - def __init__(self, path): - sup(self, path, '^/') + @classmethod + def foreign_volume_infos(cls): + """return list of valid (not expired) foreign volume marks""" + dict_list = [] + xattr_list = Xattr.llistxattr_buf('.') + for ele in xattr_list: + if ele.find('.'.join([cls.GX_NSPACE, 'volume-mark', ''])) == 0: + d, x = cls._attr_unpack_dict(ele, cls.FRGN_XTRA_FMT) + now = int(time.time()) + if x[0] > now: + logging.debug("volinfo[%s] expires: %d " + "(%d sec later)" % + (d['uuid'], x[0], x[0] - now)) + d['timeout'] = x[0] + dict_list.append(d) + else: + try: + Xattr.lremovexattr('.', ele) + except OSError: + pass + return dict_list - def connect(self): - """inhibit the resource beyond""" - os.chdir(self.path) + @classmethod + def native_volume_info(cls): + """get the native volume mark of the underlying gluster volume""" + try: + return cls._attr_unpack_dict('.'.join([cls.GX_NSPACE, + 'volume-mark'])) + except OSError: + ex = sys.exc_info()[1] + if ex.errno != ENODATA: + raise - def rsync(self, files): - return sup(self, files, self.path) +class GLUSTER(object): -class GLUSTER(AbstractUrl, SlaveLocal, SlaveRemote): """scheme class for gluster:// urls can be used to represent a gluster slave server @@ -747,226 +1093,17 @@ class GLUSTER(AbstractUrl, SlaveLocal, SlaveRemote): (slave-ish features come from the mixins, master functionality is outsourced to GMaster from master) """ - - class GLUSTERServer(Server): - "server enhancements for a glusterfs backend""" - - @classmethod - def _attr_unpack_dict(cls, xattr, extra_fields = ''): - """generic volume mark fetching/parsing backed""" - fmt_string = cls.NTV_FMTSTR + extra_fields - buf = Xattr.lgetxattr('.', xattr, struct.calcsize(fmt_string)) - vm = struct.unpack(fmt_string, buf) - m = re.match('(.{8})(.{4})(.{4})(.{4})(.{12})', "".join(['%02x' % x for x in vm[2:18]])) - uuid = '-'.join(m.groups()) - volinfo = { 'version': vm[0:2], - 'uuid' : uuid, - 'retval' : vm[18], - 'volume_mark': vm[19:21], - } - if extra_fields: - return volinfo, vm[-len(extra_fields):] - else: - return volinfo - - @classmethod - def foreign_volume_infos(cls): - """return list of valid (not expired) foreign volume marks""" - dict_list = [] - xattr_list = Xattr.llistxattr_buf('.') - for ele in xattr_list: - if ele.find('.'.join([cls.GX_NSPACE, 'volume-mark', ''])) == 0: - d, x = cls._attr_unpack_dict(ele, cls.FRGN_XTRA_FMT) - now = int(time.time()) - if x[0] > now: - logging.debug("volinfo[%s] expires: %d (%d sec later)" % \ - (d['uuid'], x[0], x[0] - now)) - d['timeout'] = x[0] - dict_list.append(d) - else: - try: - Xattr.lremovexattr('.', ele) - except OSError: - pass - return dict_list - - @classmethod - def native_volume_info(cls): - """get the native volume mark of the underlying gluster volume""" - try: - return cls._attr_unpack_dict('.'.join([cls.GX_NSPACE, 'volume-mark'])) - except OSError: - ex = sys.exc_info()[1] - if ex.errno != ENODATA: - raise - server = GLUSTERServer - def __init__(self, path): - self.host, self.volume = sup(self, path, '^(%s):(.+)' % HostRX.pattern) - - def canonical_path(self): - return ':'.join([gethostbyname(self.host), self.volume]) - - def can_connect_to(self, remote): - """determine our position in the connectibility matrix""" - return not remote or \ - (isinstance(remote, SSH) and isinstance(remote.inner_rsc, GLUSTER)) - - class Mounter(object): - """Abstract base class for mounter backends""" - - def __init__(self, params): - self.params = params - self.mntpt = None - - @classmethod - def get_glusterprog(cls): - return os.path.join(gconf.gluster_command_dir, cls.glusterprog) + def __init__(self, host, volume): + self.path = "%s:%s" % (host, volume) + self.host = host + self.volume = volume - def umount_l(self, d): - """perform lazy umount""" - po = Popen(self.make_umount_argv(d), stderr=subprocess.PIPE) - po.wait() - return po - - @classmethod - def make_umount_argv(cls, d): - raise NotImplementedError - - def make_mount_argv(self, *a): - raise NotImplementedError - - def cleanup_mntpt(self, *a): - pass - - def handle_mounter(self, po): - po.wait() - - def inhibit(self, *a): - """inhibit a gluster filesystem - - Mount glusterfs over a temporary mountpoint, - change into the mount, and lazy unmount the - filesystem. - """ - - mpi, mpo = os.pipe() - mh = Popen.fork() - if mh: - os.close(mpi) - fcntl.fcntl(mpo, fcntl.F_SETFD, fcntl.FD_CLOEXEC) - d = None - margv = self.make_mount_argv(*a) - if self.mntpt: - # mntpt is determined pre-mount - d = self.mntpt - os.write(mpo, d + '\0') - po = Popen(margv, **self.mountkw) - self.handle_mounter(po) - po.terminate_geterr() - logging.debug('auxiliary glusterfs mount in place') - if not d: - # mntpt is determined during mount - d = self.mntpt - os.write(mpo, d + '\0') - os.write(mpo, 'M') - t = syncdutils.Thread(target=lambda: os.chdir(d)) - t.start() - tlim = gconf.starttime + int(gconf.connection_timeout) - while True: - if not t.isAlive(): - break - if time.time() >= tlim: - syncdutils.finalize(exval = 1) - time.sleep(1) - os.close(mpo) - _, rv = syncdutils.waitpid(mh, 0) - if rv: - rv = (os.WIFEXITED(rv) and os.WEXITSTATUS(rv) or 0) - \ - (os.WIFSIGNALED(rv) and os.WTERMSIG(rv) or 0) - logging.warn('stale mount possibly left behind on ' + d) - raise GsyncdError("cleaning up temp mountpoint %s failed with status %d" % \ - (d, rv)) - else: - rv = 0 - try: - os.setsid() - os.close(mpo) - mntdata = '' - while True: - c = os.read(mpi, 1) - if not c: - break - mntdata += c - if mntdata: - mounted = False - if mntdata[-1] == 'M': - mntdata = mntdata[:-1] - assert(mntdata) - mounted = True - assert(mntdata[-1] == '\0') - mntpt = mntdata[:-1] - assert(mntpt) - if mounted: - po = self.umount_l(mntpt) - po.terminate_geterr(fail_on_err = False) - if po.returncode != 0: - po.errlog() - rv = po.returncode - self.cleanup_mntpt(mntpt) - except: - logging.exception('mount cleanup failure:') - rv = 200 - os._exit(rv) - logging.debug('auxiliary glusterfs mount prepared') - - class DirectMounter(Mounter): - """mounter backend which calls mount(8), umount(8) directly""" - - mountkw = {'stderr': subprocess.PIPE} - glusterprog = 'glusterfs' - - @staticmethod - def make_umount_argv(d): - return ['umount', '-l', d] - - def make_mount_argv(self): - self.mntpt = tempfile.mkdtemp(prefix = 'gsyncd-aux-mount-') - return [self.get_glusterprog()] + ['--' + p for p in self.params] + [self.mntpt] - - def cleanup_mntpt(self, mntpt = None): - if not mntpt: - mntpt = self.mntpt - os.rmdir(mntpt) - - class MountbrokerMounter(Mounter): - """mounter backend using the mountbroker gluster service""" - - mountkw = {'stderr': subprocess.PIPE, 'stdout': subprocess.PIPE} - glusterprog = 'gluster' - - @classmethod - def make_cli_argv(cls): - return [cls.get_glusterprog()] + gconf.gluster_cli_options.split() + ['system::'] - - @classmethod - def make_umount_argv(cls, d): - return cls.make_cli_argv() + ['umount', d, 'lazy'] - - def make_mount_argv(self, label): - return self.make_cli_argv() + \ - ['mount', label, 'user-map-root=' + syncdutils.getusername()] + self.params - - def handle_mounter(self, po): - self.mntpt = po.stdout.readline()[:-1] - po.stdout.close() - sup(self, po) - if po.returncode != 0: - # if cli terminated with error due to being - # refused by glusterd, what it put - # out on stdout is a diagnostic message - logging.error('glusterd answered: %s' % self.mntpt) + global slv_volume + global slv_host + slv_volume = self.volume + slv_host = self.host def connect(self): """inhibit the resource beyond @@ -976,25 +1113,39 @@ class GLUSTER(AbstractUrl, SlaveLocal, SlaveRemote): with given backend """ - label = getattr(gconf, 'mountbroker', None) + logging.info("Mounting gluster volume locally...") + t0 = time.time() + label = gconf.get('mountbroker', None) if not label and not privileged(): label = syncdutils.getusername() - mounter = label and self.MountbrokerMounter or self.DirectMounter - params = gconf.gluster_params.split() + \ - (gconf.gluster_log_level and ['log-level=' + gconf.gluster_log_level] or []) + \ - ['log-file=' + gconf.gluster_log_file, 'volfile-server=' + self.host, - 'volfile-id=' + self.volume, 'client-pid=-1'] - mounter(params).inhibit(*[l for l in [label] if l]) + mounter = label and MountbrokerMounter or DirectMounter + + log_file = gconf.get("gluster-log-file") + if rconf.args.subcmd == "slave": + log_file = gconf.get("slave-gluster-log-file") - def connect_remote(self, *a, **kw): - sup(self, *a, **kw) - self.slavedir = "/proc/%d/cwd" % self.server.pid() + log_level = gconf.get("gluster-log-level") + if rconf.args.subcmd == "slave": + log_level = gconf.get("slave-gluster-log-level") + + params = gconf.get("gluster-params").split() + \ + ['log-level=' + log_level] + \ + ['log-file=' + log_file, 'volfile-server=' + self.host] + \ + ['volfile-id=' + self.volume, 'client-pid=-1'] + + self.mounter = mounter(params) + self.mounter.inhibit(label) + logging.info(lf("Mounted gluster volume", + duration="%.4f" % (time.time() - t0))) def gmaster_instantiate_tuple(self, slave): - """return a tuple of the 'one shot' and the 'main crawl' class instance""" - return (gmaster_builder('xsync')(self, slave), gmaster_builder()(self, slave)) + """return a tuple of the 'one shot' and the 'main crawl' + class instance""" + return (gmaster_builder('xsync')(self, slave), + gmaster_builder()(self, slave), + gmaster_builder('changeloghistory')(self, slave)) - def service_loop(self, *args): + def service_loop(self, slave=None): """enter service loop - if slave given, instantiate GMaster and @@ -1002,100 +1153,216 @@ class GLUSTER(AbstractUrl, SlaveLocal, SlaveRemote): master behavior - else do that's what's inherited """ - if args: - slave = args[0] - if gconf.local_path: - class brickserver(FILE.FILEServer): - local_path = gconf.local_path - aggregated = self.server - @classmethod - def entries(cls, path): - e = super(brickserver, cls).entries(path) - # on the brick don't mess with /.glusterfs - if path == '.': - try: - e.remove('.glusterfs') - except ValueError: - pass - return e - if gconf.slave_id: - # define {,set_}xtime in slave, thus preempting - # the call to remote, so that it takes data from - # the local brick - slave.server.xtime = types.MethodType(lambda _self, path, uuid: brickserver.xtime(path, uuid + '.' + gconf.slave_id), slave.server) - slave.server.set_xtime = types.MethodType(lambda _self, path, uuid, mark: brickserver.set_xtime(path, uuid + '.' + gconf.slave_id, mark), slave.server) - (g1, g2) = self.gmaster_instantiate_tuple(slave) - g1.master.server = brickserver - g2.master.server = brickserver + if rconf.args.subcmd == "slave": + if gconf.get("use-rsync-xattrs") and not privileged(): + raise GsyncdError( + "using rsync for extended attributes is not supported") + + repce = RepceServer( + self.server, sys.stdin, sys.stdout, gconf.get("sync-jobs")) + t = syncdutils.Thread(target=lambda: (repce.service_loop(), + syncdutils.finalize())) + t.start() + logging.info("slave listening") + if gconf.get("slave-timeout") and gconf.get("slave-timeout") > 0: + while True: + lp = self.server.last_keep_alive + time.sleep(gconf.get("slave-timeout")) + if lp == self.server.last_keep_alive: + logging.info( + lf("connection inactive, stopping", + timeout=gconf.get("slave-timeout"))) + break else: - (g1, g2) = self.gmaster_instantiate_tuple(slave) - g1.master.server.aggregated = gmaster.master.server - g2.master.server.aggregated = gmaster.master.server - # bad bad bad: bad way to do things like this - # need to make this elegant - # register the crawlers and start crawling - g1.register() - g2.register() - g1.crawlwrap(oneshot=True) + select((), (), ()) + + return + + class brickserver(Server): + local_path = rconf.args.local_path + aggregated = self.server + + @classmethod + def entries(cls, path): + e = super(brickserver, cls).entries(path) + # on the brick don't mess with /.glusterfs + if path == '.': + try: + e.remove('.glusterfs') + e.remove('.trashcan') + except ValueError: + pass + return e + + @classmethod + def lstat(cls, e): + """ path based backend stat """ + return super(brickserver, cls).lstat(e) + + @classmethod + def gfid(cls, e): + """ path based backend gfid fetch """ + return super(brickserver, cls).gfid(e) + + @classmethod + def linkto_check(cls, e): + return super(brickserver, cls).linkto_check(e) + + # define {,set_}xtime in slave, thus preempting + # the call to remote, so that it takes data from + # the local brick + slave.server.xtime = types.MethodType( + lambda _self, path, uuid: ( + brickserver.xtime(path, + uuid + '.' + rconf.args.slave_id) + ), + slave.server) + slave.server.stime = types.MethodType( + lambda _self, path, uuid: ( + brickserver.stime(path, + uuid + '.' + rconf.args.slave_id) + ), + slave.server) + slave.server.entry_stime = types.MethodType( + lambda _self, path, uuid: ( + brickserver.entry_stime( + path, + uuid + '.' + rconf.args.slave_id) + ), + slave.server) + slave.server.set_stime = types.MethodType( + lambda _self, path, uuid, mark: ( + brickserver.set_stime(path, + uuid + '.' + rconf.args.slave_id, + mark) + ), + slave.server) + slave.server.set_entry_stime = types.MethodType( + lambda _self, path, uuid, mark: ( + brickserver.set_entry_stime( + path, + uuid + '.' + rconf.args.slave_id, + mark) + ), + slave.server) + + (g1, g2, g3) = self.gmaster_instantiate_tuple(slave) + g1.master.server = brickserver + g2.master.server = brickserver + g3.master.server = brickserver + + # bad bad bad: bad way to do things like this + # need to make this elegant + # register the crawlers and start crawling + # g1 ==> Xsync, g2 ==> config.change_detector(changelog by default) + # g3 ==> changelog History + status = GeorepStatus(gconf.get("state-file"), + rconf.args.local_node, + rconf.args.local_path, + rconf.args.local_node_id, + rconf.args.master, + rconf.args.slave) + status.reset_on_worker_start() + + try: + workdir = g2.setup_working_dir() + # Register only when change_detector is not set to + # xsync, else agent will generate changelog files + # in .processing directory of working dir + if gconf.get("change-detector") != 'xsync': + # register with the changelog library + # 9 == log level (DEBUG) + # 5 == connection retries + libgfchangelog.register(rconf.args.local_path, + workdir, + gconf.get("changelog-log-file"), + get_changelog_log_level( + gconf.get("changelog-log-level")), + g2.CHANGELOG_CONN_RETRIES) + + register_time = int(time.time()) + g2.register(register_time, status) + g3.register(register_time, status) + except ChangelogException as e: + logging.error(lf("Changelog register failed", error=e)) + sys.exit(1) + + g1.register(status=status) + logging.info(lf("Register time", + time=register_time)) + # oneshot: Try to use changelog history api, if not + # available switch to FS crawl + # Note: if config.change_detector is xsync then + # it will not use changelog history api + try: + g3.crawlwrap(oneshot=True) + except PartialHistoryAvailable as e: + logging.info(lf('Partial history available, using xsync crawl' + ' after consuming history', + till=e)) + g1.crawlwrap(oneshot=True, register_time=register_time) + except ChangelogHistoryNotAvailable: + logging.info('Changelog history not available, using xsync') + g1.crawlwrap(oneshot=True, register_time=register_time) + except NoStimeAvailable: + logging.info('No stime available, using xsync crawl') + g1.crawlwrap(oneshot=True, register_time=register_time) + except ChangelogException as e: + logging.error(lf("Changelog History Crawl failed", + error=e)) + sys.exit(1) + + try: g2.crawlwrap() - else: - sup(self, *args) + except ChangelogException as e: + logging.error(lf("Changelog crawl failed", error=e)) + sys.exit(1) - def rsync(self, files): - return sup(self, files, self.slavedir) +class SSH(object): -class SSH(AbstractUrl, SlaveRemote): """scheme class for ssh:// urls interface to remote slave on master side implementing an ssh based proxy """ - def __init__(self, path): - self.remote_addr, inner_url = sup(self, path, - '^((?:%s@)?%s):(.+)' % tuple([ r.pattern for r in (UserRX, HostRX) ])) - self.inner_rsc = parse_url(inner_url) - self.volume = inner_url[1:] + def __init__(self, host, volume): + self.remote_addr = host + self.volume = volume @staticmethod - def parse_ssh_address(addr): - m = re.match('([^@]+)@(.+)', addr) + def parse_ssh_address(self): + m = re.match('([^@]+)@(.+)', self.remote_addr) if m: u, h = m.groups() else: - u, h = syncdutils.getusername(), addr + u, h = syncdutils.getusername(), self.remote_addr + self.remotehost = h return {'user': u, 'host': h} - def canonical_path(self): - rap = self.parse_ssh_address(self.remote_addr) - remote_addr = '@'.join([rap['user'], gethostbyname(rap['host'])]) - return ':'.join([remote_addr, self.inner_rsc.get_url(canonical=True)]) - - def can_connect_to(self, remote): - """determine our position in the connectibility matrix""" - return False - - def start_fd_client(self, *a, **opts): - """customizations for client startup + def start_fd_client(self, i, o): + """set up RePCe client, handshake with server - - be a no-op if we are to daemonize (client startup is deferred - to post-daemon stage) - - determine target url for rsync after consulting server + It's cut out as a separate method to let + subclasses hook into client startup """ - if opts.get('deferred'): - return a - sup(self, *a) - ityp = type(self.inner_rsc) - if ityp == FILE: - slavepath = self.inner_rsc.path - elif ityp == GLUSTER: - slavepath = "/proc/%d/cwd" % self.server.pid() - else: - raise NotImplementedError + self.server = RepceClient(i, o) + rv = self.server.__version__() + exrv = {'proto': repce.repce_version, 'object': Server.version()} + da0 = (rv, exrv) + da1 = ({}, {}) + for i in range(2): + for k, v in da0[i].items(): + da1[i][k] = int(v) + if da1[0] != da1[1]: + raise GsyncdError( + "RePCe major version mismatch: local %s, remote %s" % + (exrv, rv)) + slavepath = "/proc/%d/cwd" % self.server.pid() self.slaveurl = ':'.join([self.remote_addr, slavepath]) - def connect_remote(self, go_daemon=None): + def connect_remote(self): """connect to inner slave url through outer ssh url Wrap the connecting utility in ssh. @@ -1113,30 +1380,204 @@ class SSH(AbstractUrl, SlaveRemote): [NB. ATM gluster product does not makes use of interactive authentication.] """ - if go_daemon == 'done': - return self.start_fd_client(*self.fd_pair) - gconf.setup_ssh_ctl(tempfile.mkdtemp(prefix='gsyncd-aux-ssh-')) - deferred = go_daemon == 'postconn' - ret = sup(self, gconf.ssh_command.split() + gconf.ssh_ctl_args + [self.remote_addr], slave=self.inner_rsc.url, deferred=deferred) - if deferred: - # send a message to peer so that we can wait for - # the answer from which we know connection is - # established and we can proceed with daemonization - # (doing that too early robs the ssh passwd prompt...) - # However, we'd better not start the RepceClient - # before daemonization (that's not preserved properly - # in daemon), we just do a an ad-hoc linear put/get. - i, o = ret - inf = os.fdopen(i) - repce.send(o, None, '__repce_version__') - select((inf,), (), ()) - repce.recv(inf) - # hack hack hack: store a global reference to the file - # to save it from getting GC'd which implies closing it - gconf.permanent_handles.append(inf) - self.fd_pair = (i, o) - return 'should' - - def rsync(self, files): - return sup(self, files, '-e', " ".join(gconf.ssh_command.split() + gconf.ssh_ctl_args), - *(gconf.rsync_ssh_options.split() + [self.slaveurl])) + syncdutils.setup_ssh_ctl(tempfile.mkdtemp(prefix='gsyncd-aux-ssh-'), + self.remote_addr, + self.volume) + + logging.info("Initializing SSH connection between master and slave...") + t0 = time.time() + + extra_opts = [] + remote_gsyncd = gconf.get("remote-gsyncd") + if remote_gsyncd == "": + remote_gsyncd = "/nonexistent/gsyncd" + + if gconf.get("use-rsync-xattrs"): + extra_opts.append('--use-rsync-xattrs') + + args_to_slave = [gconf.get("ssh-command")] + \ + gconf.get("ssh-options").split() + \ + ["-p", str(gconf.get("ssh-port"))] + \ + rconf.ssh_ctl_args + [self.remote_addr] + \ + [remote_gsyncd, "slave"] + \ + extra_opts + \ + [rconf.args.master, rconf.args.slave] + \ + [ + '--master-node', rconf.args.local_node, + '--master-node-id', rconf.args.local_node_id, + '--master-brick', rconf.args.local_path, + '--local-node', rconf.args.resource_remote, + '--local-node-id', rconf.args.resource_remote_id] + \ + [ + # Add all config arguments here, slave gsyncd will not use + # config file in slave side, so all overriding options should + # be sent as arguments + '--slave-timeout', str(gconf.get("slave-timeout")), + '--slave-log-level', gconf.get("slave-log-level"), + '--slave-gluster-log-level', + gconf.get("slave-gluster-log-level"), + '--slave-gluster-command-dir', + gconf.get("slave-gluster-command-dir"), + '--master-dist-count', + str(gconf.get("master-distribution-count"))] + + if gconf.get("slave-access-mount"): + args_to_slave.append('--slave-access-mount') + + if rconf.args.debug: + args_to_slave.append('--debug') + + po = Popen(args_to_slave, + stdin=subprocess.PIPE, stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + rconf.transport = po + self.start_fd_client(po.stdout, po.stdin) + logging.info(lf("SSH connection between master and slave established.", + duration="%.4f" % (time.time() - t0))) + + def rsync(self, files, *args, **kw): + """invoke rsync""" + if not files: + raise GsyncdError("no files to sync") + logging.debug("files: " + ", ".join(files)) + + extra_rsync_flags = [] + # Performance flag, --ignore-missing-args, if rsync version is + # greater than 3.1.0 then include this flag. + if gconf.get("rsync-opt-ignore-missing-args") and \ + get_rsync_version(gconf.get("rsync-command")) >= "3.1.0": + extra_rsync_flags = ["--ignore-missing-args"] + + rsync_ssh_opts = [gconf.get("ssh-command")] + \ + gconf.get("ssh-options").split() + \ + ["-p", str(gconf.get("ssh-port"))] + \ + rconf.ssh_ctl_args + \ + gconf.get("rsync-ssh-options").split() + + argv = [ + gconf.get("rsync-command"), + '-aR0', + '--inplace', + '--files-from=-', + '--super', + '--stats', + '--numeric-ids', + '--no-implied-dirs' + ] + + if gconf.get("rsync-opt-existing"): + argv += ["--existing"] + + if gconf.get("sync-xattrs"): + argv += ['--xattrs'] + + if gconf.get("sync-acls"): + argv += ['--acls'] + + argv = argv + \ + gconf.get("rsync-options").split() + \ + extra_rsync_flags + ['.'] + \ + ["-e", " ".join(rsync_ssh_opts)] + \ + [self.slaveurl] + + log_rsync_performance = gconf.getr("log-rsync-performance", False) + + if log_rsync_performance: + # use stdout=PIPE only when log_rsync_performance enabled + # Else rsync will write to stdout and nobody is there + # to consume. If PIPE is full rsync hangs. + po = Popen(argv, stdin=subprocess.PIPE, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, universal_newlines=True) + else: + po = Popen(argv, stdin=subprocess.PIPE, stderr=subprocess.PIPE, + universal_newlines=True) + + for f in files: + po.stdin.write(f) + po.stdin.write('\0') + + stdout, stderr = po.communicate() + + if kw.get("log_err", False): + for errline in stderr.strip().split("\n")[:-1]: + logging.error(lf("SYNC Error", + sync_engine="Rsync", + error=errline)) + + if log_rsync_performance: + rsync_msg = [] + for line in stdout.split("\n"): + if line.startswith("Number of files:") or \ + line.startswith("Number of regular files transferred:") or \ + line.startswith("Total file size:") or \ + line.startswith("Total transferred file size:") or \ + line.startswith("Literal data:") or \ + line.startswith("Matched data:") or \ + line.startswith("Total bytes sent:") or \ + line.startswith("Total bytes received:") or \ + line.startswith("sent "): + rsync_msg.append(line) + logging.info(lf("rsync performance", + data=", ".join(rsync_msg))) + + return po + + def tarssh(self, files, log_err=False): + """invoke tar+ssh + -z (compress) can be use if needed, but omitting it now + as it results in weird error (tar+ssh errors out (errcode: 2) + """ + if not files: + raise GsyncdError("no files to sync") + logging.debug("files: " + ", ".join(files)) + (host, rdir) = self.slaveurl.split(':') + + tar_cmd = ["tar"] + \ + ["--sparse", "-cf", "-", "--files-from", "-"] + ssh_cmd = gconf.get("ssh-command").split() + \ + gconf.get("ssh-options-tar").split() + \ + ["-p", str(gconf.get("ssh-port"))] + \ + [host, "tar"] + \ + ["--overwrite", "-xf", "-", "-C", rdir] + p0 = Popen(tar_cmd, stdout=subprocess.PIPE, + stdin=subprocess.PIPE, stderr=subprocess.PIPE, + universal_newlines=True) + p1 = Popen(ssh_cmd, stdin=p0.stdout, stderr=subprocess.PIPE, + universal_newlines=True) + for f in files: + p0.stdin.write(f) + p0.stdin.write('\n') + + p0.stdin.close() + p0.stdout.close() # Allow p0 to receive a SIGPIPE if p1 exits. + + # stdin and stdout of p0 is already closed, Reset to None and + # wait for child process to complete + p0.stdin = None + p0.stdout = None + + def wait_for_tar(p0): + _, stderr = p0.communicate() + if log_err: + for errline in stderr.strip().split("\n")[:-1]: + if "No such file or directory" not in errline: + logging.error(lf("SYNC Error", + sync_engine="Tarssh", + error=errline)) + + t = syncdutils.Thread(target=wait_for_tar, args=(p0, )) + # wait for tar to terminate, collecting any errors, further + # waiting for transfer to complete + t.start() + + # wait for ssh process + _, stderr1 = p1.communicate() + t.join() + + if log_err: + for errline in stderr1.strip().split("\n")[:-1]: + logging.error(lf("SYNC Error", + sync_engine="Tarssh", + error=errline)) + + return p1 diff --git a/geo-replication/syncdaemon/subcmds.py b/geo-replication/syncdaemon/subcmds.py new file mode 100644 index 00000000000..b8508532e30 --- /dev/null +++ b/geo-replication/syncdaemon/subcmds.py @@ -0,0 +1,335 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> +# This file is part of GlusterFS. +# +# This file is licensed to you under your choice of the GNU Lesser +# General Public License, version 3 or any later version (LGPLv3 or +# later), or the GNU General Public License, version 2 (GPLv2), in all +# cases as published by the Free Software Foundation. +# + +from __future__ import print_function +from syncdutils import lf +import logging +import gsyncdconfig as gconf + + +ERROR_CONFIG_INVALID = 2 +ERROR_CONFIG_INVALID_VALUE = 3 +ERROR_CONFIG_NOT_CONFIGURABLE = 4 + + +def subcmd_monitor_status(args): + from gsyncdstatus import set_monitor_status + from rconf import rconf + + set_monitor_status(gconf.get("state-file"), args.status) + rconf.log_exit = False + logging.info(lf("Monitor Status Change", status=args.status)) + + +def subcmd_status(args): + from gsyncdstatus import GeorepStatus + + master_name = args.master.replace(":", "") + slave_data = args.slave.replace("ssh://", "") + + brick_status = GeorepStatus(gconf.get("state-file"), + "", + args.local_path, + "", + master_name, + slave_data, + gconf.get("pid-file")) + checkpoint_time = gconf.get("checkpoint", 0) + brick_status.print_status(checkpoint_time=checkpoint_time, + json_output=args.json) + + +def subcmd_monitor(args): + import monitor + from resource import GLUSTER, SSH, Popen + go_daemon = False if args.debug else True + + monitor.startup(go_daemon) + Popen.init_errhandler() + local = GLUSTER("localhost", args.master) + slavehost, slavevol = args.slave.split("::") + remote = SSH(slavehost, slavevol) + return monitor.monitor(local, remote) + + +def subcmd_verify_spawning(args): + logging.info("Able to spawn gsyncd.py") + + +def subcmd_worker(args): + import os + import fcntl + + from resource import GLUSTER, SSH, Popen + + Popen.init_errhandler() + fcntl.fcntl(args.feedback_fd, fcntl.F_SETFD, fcntl.FD_CLOEXEC) + local = GLUSTER("localhost", args.master) + slave_url, slavevol = args.slave.split("::") + if "@" not in slave_url: + slavehost = args.resource_remote + else: + slavehost = "%s@%s" % (slave_url.split("@")[0], args.resource_remote) + remote = SSH(slavehost, slavevol) + remote.connect_remote() + local.connect() + logging.info("Worker spawn successful. Acknowledging back to monitor") + os.close(args.feedback_fd) + local.service_loop(remote) + + +def subcmd_slave(args): + from resource import GLUSTER, Popen + + Popen.init_errhandler() + slavevol = args.slave.split("::")[-1] + local = GLUSTER("localhost", slavevol) + + local.connect() + local.service_loop() + + +def subcmd_voluuidget(args): + from subprocess import Popen, PIPE + import xml.etree.ElementTree as XET + + ParseError = XET.ParseError if hasattr(XET, 'ParseError') else SyntaxError + + cmd = ['gluster', '--xml', '--remote-host=' + args.host, + 'volume', 'info', args.volname] + + if args.inet6: + cmd.append("--inet6") + + po = Popen(cmd, bufsize=0, + stdin=None, stdout=PIPE, stderr=PIPE, + universal_newlines=True) + + vix, err = po.communicate() + if po.returncode != 0: + logging.info(lf("Volume info failed, unable to get " + "volume uuid of slavevol, " + "returning empty string", + slavevol=args.volname, + slavehost=args.host, + error=po.returncode)) + return "" + vi = XET.fromstring(vix) + if vi.find('opRet').text != '0': + logging.info(lf("Unable to get volume uuid of slavevol, " + "returning empty string", + slavevol=args.volname, + slavehost=args.host, + error=vi.find('opErrstr').text)) + return "" + + try: + voluuid = vi.find("volInfo/volumes/volume/id").text + except (ParseError, AttributeError, ValueError) as e: + logging.info(lf("Parsing failed to volume uuid of slavevol, " + "returning empty string", + slavevol=args.volname, + slavehost=args.host, + error=e)) + voluuid = "" + + print(voluuid) + + +def _unlink(path): + import os + from errno import ENOENT + from syncdutils import GsyncdError + import sys + + try: + os.unlink(path) + except (OSError, IOError): + if sys.exc_info()[1].errno == ENOENT: + pass + else: + raise GsyncdError('Unlink error: %s' % path) + + +def subcmd_delete(args): + import logging + import shutil + import glob + import sys + from errno import ENOENT, ENODATA + import struct + + from syncdutils import GsyncdError, Xattr, errno_wrap + import gsyncdconfig as gconf + + logging.info('geo-replication delete') + # remove the stime xattr from all the brick paths so that + # a re-create of a session will start sync all over again + stime_xattr_prefix = gconf.get('stime-xattr-prefix', None) + + # Delete pid file, status file, socket file + cleanup_paths = [] + cleanup_paths.append(gconf.get("pid-file")) + + # Cleanup Session dir + try: + shutil.rmtree(gconf.get("georep-session-working-dir")) + except (IOError, OSError): + if sys.exc_info()[1].errno == ENOENT: + pass + else: + raise GsyncdError( + 'Error while removing working dir: %s' % + gconf.get("georep-session-working-dir")) + + # Cleanup changelog working dirs + try: + shutil.rmtree(gconf.get("working-dir")) + except (IOError, OSError): + if sys.exc_info()[1].errno == ENOENT: + pass + else: + raise GsyncdError( + 'Error while removing working dir: %s' % + gconf.get("working-dir")) + + for path in cleanup_paths: + # To delete temp files + for f in glob.glob(path + "*"): + _unlink(f) + + if args.reset_sync_time and stime_xattr_prefix: + for p in args.paths: + if p != "": + # set stime to (0,0) to trigger full volume content resync + # to slave on session recreation + # look at master.py::Xcrawl hint: zero_zero + errno_wrap(Xattr.lsetxattr, + (p, stime_xattr_prefix + ".stime", + struct.pack("!II", 0, 0)), + [ENOENT, ENODATA]) + errno_wrap(Xattr.lremovexattr, + (p, stime_xattr_prefix + ".entry_stime"), + [ENOENT, ENODATA]) + + return + + +def print_config(name, value, only_value=False, use_underscore=False): + val = value + if isinstance(value, bool): + val = str(value).lower() + + if only_value: + print(val) + else: + if use_underscore: + name = name.replace("-", "_") + + print(("%s:%s" % (name, val))) + + +def config_name_format(val): + return val.replace("_", "-") + + +def subcmd_config_get(args): + import sys + import json + + all_config = gconf.getall(show_defaults=args.show_defaults, + show_non_configurable=True) + if args.name is not None: + val = all_config.get(config_name_format(args.name), None) + if val is None: + sys.stderr.write("Invalid config name \"%s\"\n" % args.name) + sys.exit(ERROR_CONFIG_INVALID) + + print_config(args.name, val["value"], only_value=args.only_value, + use_underscore=args.use_underscore) + return + + if args.json: + out = [] + # Convert all values as string + for k in sorted(all_config): + v = all_config[k] + out.append({ + "name": k, + "value": str(v["value"]), + "default": str(v["default"]), + "configurable": v["configurable"], + "modified": v["modified"] + }) + + print((json.dumps(out))) + return + + for k in sorted(all_config): + print_config(k, all_config[k]["value"], + use_underscore=args.use_underscore) + + +def subcmd_config_check(args): + import sys + + try: + gconf.check(config_name_format(args.name), value=args.value, + with_conffile=False) + except gconf.GconfNotConfigurable: + cnf_val = gconf.get(config_name_format(args.name), None) + if cnf_val is None: + sys.stderr.write("Invalid config name \"%s\"\n" % args.name) + sys.exit(ERROR_CONFIG_INVALID) + + # Not configurable + sys.stderr.write("Not configurable \"%s\"\n" % args.name) + sys.exit(ERROR_CONFIG_NOT_CONFIGURABLE) + except gconf.GconfInvalidValue: + sys.stderr.write("Invalid config value \"%s=%s\"\n" % (args.name, + args.value)) + sys.exit(ERROR_CONFIG_INVALID_VALUE) + + +def subcmd_config_set(args): + import sys + + try: + gconf.setconfig(config_name_format(args.name), args.value) + except gconf.GconfNotConfigurable: + cnf_val = gconf.get(config_name_format(args.name), None) + if cnf_val is None: + sys.stderr.write("Invalid config name \"%s\"\n" % args.name) + sys.exit(ERROR_CONFIG_INVALID) + + # Not configurable + sys.stderr.write("Not configurable \"%s\"\n" % args.name) + sys.exit(ERROR_CONFIG_NOT_CONFIGURABLE) + except gconf.GconfInvalidValue: + sys.stderr.write("Invalid config value \"%s=%s\"\n" % (args.name, + args.value)) + sys.exit(ERROR_CONFIG_INVALID_VALUE) + + +def subcmd_config_reset(args): + import sys + + try: + gconf.resetconfig(config_name_format(args.name)) + except gconf.GconfNotConfigurable: + cnf_val = gconf.get(config_name_format(args.name), None) + if cnf_val is None: + sys.stderr.write("Invalid config name \"%s\"\n" % args.name) + sys.exit(ERROR_CONFIG_INVALID) + + # Not configurable + sys.stderr.write("Not configurable \"%s\"\n" % args.name) + sys.exit(ERROR_CONFIG_NOT_CONFIGURABLE) diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py index 720200018e5..a3df103e76c 100644 --- a/geo-replication/syncdaemon/syncdutils.py +++ b/geo-replication/syncdaemon/syncdutils.py @@ -1,3 +1,13 @@ +# +# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com> +# This file is part of GlusterFS. + +# This file is licensed to you under your choice of the GNU Lesser +# General Public License, version 3 or any later version (LGPLv3 or +# later), or the GNU General Public License, version 2 (GPLv2), in all +# cases as published by the Free Software Foundation. +# + import os import sys import pwd @@ -5,55 +15,116 @@ import time import fcntl import shutil import logging +import errno +import threading +import subprocess import socket +from subprocess import PIPE from threading import Lock, Thread as baseThread -from errno import EACCES, EAGAIN, EPIPE, ENOTCONN, ECONNABORTED, EINTR, ENOENT, EPERM, ESTALE, errorcode -from signal import signal, SIGTERM, SIGKILL -from time import sleep +from errno import (EACCES, EAGAIN, EPIPE, ENOTCONN, ENOMEM, ECONNABORTED, + EINTR, ENOENT, ESTALE, EBUSY, ENODATA, errorcode, EIO) +from signal import signal, SIGTERM import select as oselect from os import waitpid as owaitpid +import xml.etree.ElementTree as XET +from select import error as SelectError try: from cPickle import PickleError except ImportError: - # py 3 from pickle import PickleError -from gconf import gconf - +from conf import GLUSTERFS_LIBEXECDIR, UUID_FILE +sys.path.insert(1, GLUSTERFS_LIBEXECDIR) +EVENTS_ENABLED = True try: - # py 3 - from urllib import parse as urllib + from gfevents.eventtypes import GEOREP_FAULTY as EVENT_GEOREP_FAULTY + from gfevents.eventtypes import GEOREP_ACTIVE as EVENT_GEOREP_ACTIVE + from gfevents.eventtypes import GEOREP_PASSIVE as EVENT_GEOREP_PASSIVE + from gfevents.eventtypes import GEOREP_CHECKPOINT_COMPLETED \ + as EVENT_GEOREP_CHECKPOINT_COMPLETED except ImportError: - import urllib + # Events APIs not installed, dummy eventtypes with None + EVENTS_ENABLED = False + EVENT_GEOREP_FAULTY = None + EVENT_GEOREP_ACTIVE = None + EVENT_GEOREP_PASSIVE = None + EVENT_GEOREP_CHECKPOINT_COMPLETED = None -try: - from hashlib import md5 as md5 -except ImportError: - # py 2.4 - from md5 import new as md5 +import gsyncdconfig as gconf +from rconf import rconf + +from hashlib import sha256 as sha256 + +ENOTSUP = getattr(errno, 'ENOTSUP', 'EOPNOTSUPP') -# auxillary gfid based access prefix +# auxiliary gfid based access prefix _CL_AUX_GFID_PFX = ".gfid/" +ROOT_GFID = "00000000-0000-0000-0000-000000000001" +GF_OP_RETRIES = 10 + +GX_GFID_CANONICAL_LEN = 37 # canonical gfid len + '\0' + +NodeID = None +rsync_version = None +unshare_mnt_propagation = None +slv_bricks = None +SPACE_ESCAPE_CHAR = "%20" +NEWLINE_ESCAPE_CHAR = "%0A" +PERCENTAGE_ESCAPE_CHAR = "%25" + +final_lock = Lock() + +def sup(x, *a, **kw): + """a rubyesque "super" for python ;) + + invoke caller method in parent class with given args. + """ + return getattr(super(type(x), x), + sys._getframe(1).f_code.co_name)(*a, **kw) + def escape(s): """the chosen flavor of string escaping, used all over to turn whatever data to creatable representation""" - return urllib.quote_plus(s) + return s.replace("/", "-").strip("-") + + +def escape_space_newline(s): + return s.replace("%", PERCENTAGE_ESCAPE_CHAR)\ + .replace(" ", SPACE_ESCAPE_CHAR)\ + .replace("\n", NEWLINE_ESCAPE_CHAR) + -def unescape(s): - """inverse of .escape""" - return urllib.unquote_plus(s) +def unescape_space_newline(s): + return s.replace(SPACE_ESCAPE_CHAR, " ")\ + .replace(NEWLINE_ESCAPE_CHAR, "\n")\ + .replace(PERCENTAGE_ESCAPE_CHAR, "%") + +# gf_mount_ready() returns 1 if all subvols are up, else 0 +def gf_mount_ready(): + ret = errno_wrap(Xattr.lgetxattr, + ['.', 'dht.subvol.status', 16], + [ENOENT, ENOTSUP, ENODATA], [ENOMEM]) + + if isinstance(ret, int): + logging.error("failed to get the xattr value") + return 1 + ret = ret.rstrip('\x00') + if ret == "1": + return 1 + return 0 def norm(s): if s: return s.replace('-', '_') -def update_file(path, updater, merger = lambda f: True): + +def update_file(path, updater, merger=lambda f: True): """update a file in a transaction-like manner""" fr = fw = None try: - fd = os.open(path, os.O_CREAT|os.O_RDWR) + fd = os.open(path, os.O_CREAT | os.O_RDWR) try: fr = os.fdopen(fd, 'r+b') except: @@ -64,7 +135,7 @@ def update_file(path, updater, merger = lambda f: True): return tmpp = path + '.tmp.' + str(os.getpid()) - fd = os.open(tmpp, os.O_CREAT|os.O_EXCL|os.O_WRONLY) + fd = os.open(tmpp, os.O_CREAT | os.O_EXCL | os.O_WRONLY) try: fw = os.fdopen(fd, 'wb', 0) except: @@ -78,16 +149,60 @@ def update_file(path, updater, merger = lambda f: True): if fx: fx.close() + +def create_manifest(fname, content): + """ + Create manifest file for SSH Control Path + """ + fd = None + try: + fd = os.open(fname, os.O_CREAT | os.O_RDWR) + try: + os.write(fd, content) + except: + os.close(fd) + raise + finally: + if fd is not None: + os.close(fd) + + +def setup_ssh_ctl(ctld, remote_addr, resource_url): + """ + Setup GConf ssh control path parameters + """ + rconf.ssh_ctl_dir = ctld + content = "SLAVE_HOST=%s\nSLAVE_RESOURCE_URL=%s" % (remote_addr, + resource_url) + encoded_content = content.encode() + content_sha256 = sha256hex(encoded_content) + """ + The length of ctl_path for ssh connection should not be > 108. + ssh fails with ctl_path too long if it is so. But when rsync + is piped to ssh, it is not taking > 90. Hence using first 32 + bytes of hash. Hash collision doesn't matter as only one sock + file is created per directory. + """ + content_sha256 = content_sha256[:32] + fname = os.path.join(rconf.ssh_ctl_dir, + "%s.mft" % content_sha256) + + create_manifest(fname, encoded_content) + ssh_ctl_path = os.path.join(rconf.ssh_ctl_dir, + "%s.sock" % content_sha256) + rconf.ssh_ctl_args = ["-oControlMaster=auto", "-S", ssh_ctl_path] + + def grabfile(fname, content=None): """open @fname + contest for its fcntl lock @content: if given, set the file content to it """ # damn those messy open() mode codes - fd = os.open(fname, os.O_CREAT|os.O_RDWR) - f = os.fdopen(fd, 'r+b', 0) + fd = os.open(fname, os.O_CREAT | os.O_RDWR) + f = os.fdopen(fd, 'r+') try: - fcntl.lockf(f, fcntl.LOCK_EX|fcntl.LOCK_NB) + fcntl.lockf(f, fcntl.LOCK_EX | fcntl.LOCK_NB) except: ex = sys.exc_info()[1] f.close() @@ -99,32 +214,34 @@ def grabfile(fname, content=None): try: f.truncate() f.write(content) + f.flush() except: f.close() raise - gconf.permanent_handles.append(f) + rconf.permanent_handles.append(f) return f + def grabpidfile(fname=None, setpid=True): """.grabfile customization for pid files""" if not fname: - fname = gconf.pid_file + fname = gconf.get("pid-file") content = None if setpid: content = str(os.getpid()) + '\n' return grabfile(fname, content=content) -final_lock = Lock() -def finalize(*a, **kw): +def finalize(*args, **kwargs): """all those messy final steps we go trough upon termination Do away with pidfile, ssh control dir and logging. """ + final_lock.acquire() - if getattr(gconf, 'pid_file', None): - rm_pidf = gconf.pid_file_owned - if gconf.cpid: + if gconf.get('pid_file'): + rm_pidf = rconf.pid_file_owned + if rconf.cpid: # exit path from parent branch of daemonization rm_pidf = False while True: @@ -132,33 +249,51 @@ def finalize(*a, **kw): if not f: # child has already taken over pidfile break - if os.waitpid(gconf.cpid, os.WNOHANG)[0] == gconf.cpid: + if os.waitpid(rconf.cpid, os.WNOHANG)[0] == rconf.cpid: # child has terminated rm_pidf = True - break; + break time.sleep(0.1) if rm_pidf: try: - os.unlink(gconf.pid_file) + os.unlink(rconf.pid_file) except: ex = sys.exc_info()[1] if ex.errno == ENOENT: pass else: raise - if gconf.ssh_ctl_dir and not gconf.cpid: - shutil.rmtree(gconf.ssh_ctl_dir) - if getattr(gconf, 'state_socket', None): - try: - os.unlink(gconf.state_socket) - except: - if sys.exc_info()[0] == OSError: + if rconf.ssh_ctl_dir and not rconf.cpid: + def handle_rm_error(func, path, exc_info): + if exc_info[1].errno == ENOENT: + return + raise exc_info[1] + + shutil.rmtree(rconf.ssh_ctl_dir, onerror=handle_rm_error) + + """ Unmount if not done """ + if rconf.mount_point: + if rconf.mountbroker: + umount_cmd = rconf.mbr_umount_cmd + [rconf.mount_point, 'lazy'] + else: + umount_cmd = ['umount', '-l', rconf.mount_point] + p0 = subprocess.Popen(umount_cmd, stderr=subprocess.PIPE, + universal_newlines=True) + _, errdata = p0.communicate() + if p0.returncode == 0: + try: + os.rmdir(rconf.mount_point) + except OSError: pass - if gconf.log_exit: + else: + pass + + if rconf.log_exit: logging.info("exiting.") sys.stdout.flush() sys.stderr.flush() - os._exit(kw.get('exval', 0)) + os._exit(kwargs.get('exval', 0)) + def log_raise_exception(excont): """top-level exception handler @@ -167,6 +302,7 @@ def log_raise_exception(excont): Translate some weird sounding but well understood exceptions into human-friendly lingo """ + is_filelog = False for h in logging.getLogger().handlers: fno = getattr(getattr(h, 'stream', None), 'fileno', None) @@ -184,72 +320,128 @@ def log_raise_exception(excont): logging.error(exc.args[0]) sys.stderr.write('failure: ' + exc.args[0] + '\n') elif isinstance(exc, PickleError) or isinstance(exc, EOFError) or \ - ((isinstance(exc, OSError) or isinstance(exc, IOError)) and \ - exc.errno == EPIPE): + ((isinstance(exc, OSError) or isinstance(exc, IOError)) and + exc.errno == EPIPE): logging.error('connection to peer is broken') - if hasattr(gconf, 'transport'): - gconf.transport.wait() - if gconf.transport.returncode == 127: - logging.warn("!!!!!!!!!!!!!") - logging.warn('!!! getting "No such file or directory" errors ' - "is most likely due to MISCONFIGURATION, please consult " - "http://access.redhat.com/knowledge/docs/en-US/Red_Hat_Storage/2.0/html/Administration_Guide/chap-User_Guide-Geo_Rep-Preparation-Settingup_Environment.html") - logging.warn("!!!!!!!!!!!!!") - gconf.transport.terminate_geterr() - elif isinstance(exc, OSError) and exc.errno in (ENOTCONN, ECONNABORTED): - logging.error('glusterfs session went down [%s]', errorcode[exc.errno]) + if hasattr(rconf, 'transport'): + rconf.transport.wait() + if rconf.transport.returncode == 127: + logging.error("getting \"No such file or directory\"" + "errors is most likely due to " + "MISCONFIGURATION, please remove all " + "the public keys added by geo-replication " + "from authorized_keys file in slave nodes " + "and run Geo-replication create " + "command again.") + logging.error("If `gsec_create container` was used, then " + "run `gluster volume geo-replication " + "<MASTERVOL> [<SLAVEUSER>@]<SLAVEHOST>::" + "<SLAVEVOL> config remote-gsyncd " + "<GSYNCD_PATH> (Example GSYNCD_PATH: " + "`/usr/libexec/glusterfs/gsyncd`)") + rconf.transport.terminate_geterr() + elif isinstance(exc, OSError) and exc.errno in (ENOTCONN, + ECONNABORTED): + logging.error(lf('Gluster Mount process exited', + error=errorcode[exc.errno])) + elif isinstance(exc, OSError) and exc.errno == EIO: + logging.error("Getting \"Input/Output error\" " + "is most likely due to " + "a. Brick is down or " + "b. Split brain issue.") + logging.error("This is expected as per design to " + "keep the consistency of the file system. " + "Once the above issue is resolved " + "geo-replication would automatically " + "proceed further.") + logtag = "FAIL" else: logtag = "FAIL" if not logtag and logging.getLogger().isEnabledFor(logging.DEBUG): logtag = "FULL EXCEPTION TRACE" if logtag: logging.exception(logtag + ": ") - sys.stderr.write("failed with %s.\n" % type(exc).__name__) + sys.stderr.write("failed with %s: %s.\n" % (type(exc).__name__, exc)) excont.exval = 1 sys.exit(excont.exval) class FreeObject(object): + """wildcard class for which any attribute can be set""" def __init__(self, **kw): - for k,v in kw.items(): + for k, v in kw.items(): setattr(self, k, v) + class Thread(baseThread): + """thread class flavor for gsyncd - always a daemon thread - force exit for whole program if thread function coughs up an exception """ - def __init__(self, *a, **kw): - tf = kw.get('target') + + def __init__(self, *args, **kwargs): + tf = kwargs.get('target') if tf: - def twrap(*aa): - excont = FreeObject(exval = 0) + def twrap(*aargs): + excont = FreeObject(exval=0) try: - tf(*aa) + tf(*aargs) except: try: log_raise_exception(excont) finally: - finalize(exval = excont.exval) - kw['target'] = twrap - baseThread.__init__(self, *a, **kw) + finalize(exval=excont.exval) + kwargs['target'] = twrap + baseThread.__init__(self, *args, **kwargs) self.setDaemon(True) + class GsyncdError(Exception): pass -def getusername(uid = None): - if uid == None: + +class _MetaXattr(object): + + """singleton class, a lazy wrapper around the + libcxattr module + + libcxattr (a heavy import due to ctypes) is + loaded only when when the single + instance is tried to be used. + + This reduces runtime for those invocations + which do not need filesystem manipulation + (eg. for config, url parsing) + """ + + def __getattr__(self, meth): + from libcxattr import Xattr as LXattr + xmeth = [m for m in dir(LXattr) if m[0] != '_'] + if meth not in xmeth: + return + for m in xmeth: + setattr(self, m, getattr(LXattr, m)) + return getattr(self, meth) + + +Xattr = _MetaXattr() + + +def getusername(uid=None): + if uid is None: uid = os.geteuid() return pwd.getpwuid(uid).pw_name + def privileged(): return os.geteuid() == 0 + def boolify(s): """ Generic string to boolean converter @@ -260,7 +452,7 @@ def boolify(s): - False if it's in false_list - Warn if it's not present in either and return False """ - true_list = ['true', 'yes', '1', 'on'] + true_list = ['true', 'yes', '1', 'on'] false_list = ['false', 'no', '0', 'off'] if isinstance(s, bool): @@ -270,74 +462,64 @@ def boolify(s): lstr = s.lower() if lstr in true_list: rv = True - elif not lstr in false_list: - logging.warn("Unknown string (%s) in string to boolean conversion defaulting to False\n" % (s)) + elif lstr not in false_list: + logging.warn(lf("Unknown string in \"string to boolean\" conversion, " + "defaulting to False", + str=s)) return rv -def eintr_wrap(func, exc, *a): + +def eintr_wrap(func, exc, *args): """ wrapper around syscalls resilient to interrupt caused by signals """ while True: try: - return func(*a) + return func(*args) except exc: ex = sys.exc_info()[1] if not ex.args[0] == EINTR: raise -def select(*a): - return eintr_wrap(oselect.select, oselect.error, *a) -def waitpid (*a): - return eintr_wrap(owaitpid, OSError, *a) +def select(*args): + return eintr_wrap(oselect.select, oselect.error, *args) + + +def waitpid(*args): + return eintr_wrap(owaitpid, OSError, *args) -def set_term_handler(hook=lambda *a: finalize(*a, **{'exval': 1})): + +def term_handler_default_hook(signum, frame): + finalize(signum, frame, exval=1) + + +def set_term_handler(hook=term_handler_default_hook): signal(SIGTERM, hook) -def is_host_local(host): - locaddr = False - for ai in socket.getaddrinfo(host, None): - # cf. http://github.com/gluster/glusterfs/blob/ce111f47/xlators/mgmt/glusterd/src/glusterd-utils.c#L125 - if ai[0] == socket.AF_INET: - if ai[-1][0].split(".")[0] == "127": - locaddr = True - break - elif ai[0] == socket.AF_INET6: - if ai[-1][0] == "::1": - locaddr = True + +def get_node_uuid(): + global NodeID + if NodeID is not None: + return NodeID + + NodeID = "" + with open(UUID_FILE) as f: + for line in f: + if line.startswith("UUID="): + NodeID = line.strip().split("=")[-1] break - else: - continue - try: - # use ICMP socket to avoid net.ipv4.ip_nonlocal_bind issue, - # cf. https://bugzilla.redhat.com/show_bug.cgi?id=890587 - s = socket.socket(ai[0], socket.SOCK_RAW, socket.IPPROTO_ICMP) - except socket.error: - ex = sys.exc_info()[1] - if ex.errno != EPERM: - raise - f = None - try: - f = open("/proc/sys/net/ipv4/ip_nonlocal_bind") - if int(f.read()) != 0: - raise GsyncdError( - "non-local bind is set and not allowed to create raw sockets, " - "cannot determine if %s is local" % host) - s = socket.socket(ai[0], socket.SOCK_DGRAM) - finally: - if f: - f.close() - try: - s.bind(ai[-1]) - locaddr = True - break - except: - pass - s.close() - return locaddr + + if NodeID == "": + raise GsyncdError("Failed to get Host UUID from %s" % UUID_FILE) + return NodeID + + +def is_host_local(host_id): + return host_id == get_node_uuid() + def funcode(f): fc = getattr(f, 'func_code', None) @@ -346,36 +528,44 @@ def funcode(f): fc = f.__code__ return fc + def memoize(f): fc = funcode(f) fn = fc.co_name + def ff(self, *a, **kw): rv = getattr(self, '_' + fn, None) - if rv == None: + if rv is None: rv = f(self, *a, **kw) setattr(self, '_' + fn, rv) return rv return ff + def umask(): return os.umask(0) + def entry2pb(e): return e.rsplit('/', 1) + def gauxpfx(): return _CL_AUX_GFID_PFX -def md5hex(s): - return md5(s).hexdigest() + +def sha256hex(s): + return sha256(s).hexdigest() + def selfkill(sig=SIGTERM): os.kill(os.getpid(), sig) -def errno_wrap(call, arg=[], errnos=[]): + +def errno_wrap(call, arg=[], errnos=[], retry_errnos=[]): """ wrapper around calls resilient to errnos. - retry in case of ESTALE """ + nr_tries = 0 while True: try: return call(*arg) @@ -383,6 +573,543 @@ def errno_wrap(call, arg=[], errnos=[]): ex = sys.exc_info()[1] if ex.errno in errnos: return ex.errno - if not ex.errno == ESTALE: + if ex.errno not in retry_errnos: + raise + nr_tries += 1 + if nr_tries == GF_OP_RETRIES: + # probably a screwed state, cannot do much... + logging.warn(lf('reached maximum retries', + args=repr(arg), + error=ex)) + raise + time.sleep(0.250) # retry the call + + +def lstat(e): + return errno_wrap(os.lstat, [e], [ENOENT], [ESTALE, EBUSY]) + +def get_gfid_from_mnt(gfidpath): + return errno_wrap(Xattr.lgetxattr, + [gfidpath, 'glusterfs.gfid.string', + GX_GFID_CANONICAL_LEN], [ENOENT], [ESTALE]) + + +def matching_disk_gfid(gfid, entry): + disk_gfid = get_gfid_from_mnt(entry) + if isinstance(disk_gfid, int): + return False + + if not gfid == disk_gfid: + return False + + return True + + +class NoStimeAvailable(Exception): + pass + + +class PartialHistoryAvailable(Exception): + pass + + +class ChangelogHistoryNotAvailable(Exception): + pass + + +class ChangelogException(OSError): + pass + + +def gf_event(event_type, **kwargs): + if EVENTS_ENABLED: + from gfevents.gf_event import gf_event as gfevent + gfevent(event_type, **kwargs) + + +class GlusterLogLevel(object): + NONE = 0 + EMERG = 1 + ALERT = 2 + CRITICAL = 3 + ERROR = 4 + WARNING = 5 + NOTICE = 6 + INFO = 7 + DEBUG = 8 + TRACE = 9 + + +def get_changelog_log_level(lvl): + return getattr(GlusterLogLevel, lvl, GlusterLogLevel.INFO) + + +def get_master_and_slave_data_from_args(args): + master_name = None + slave_data = None + for arg in args: + if arg.startswith(":"): + master_name = arg.replace(":", "") + if "::" in arg: + slave_data = arg.replace("ssh://", "") + + return (master_name, slave_data) + +def unshare_propagation_supported(): + global unshare_mnt_propagation + if unshare_mnt_propagation is not None: + return unshare_mnt_propagation + + unshare_mnt_propagation = False + p = subprocess.Popen(["unshare", "--help"], + stderr=subprocess.PIPE, + stdout=subprocess.PIPE, + universal_newlines=True) + out, err = p.communicate() + if p.returncode == 0: + if "propagation" in out: + unshare_mnt_propagation = True + + return unshare_mnt_propagation + + +def get_rsync_version(rsync_cmd): + global rsync_version + if rsync_version is not None: + return rsync_version + + rsync_version = "0" + p = subprocess.Popen([rsync_cmd, "--version"], + stderr=subprocess.PIPE, + stdout=subprocess.PIPE, + universal_newlines=True) + out, err = p.communicate() + if p.returncode == 0: + rsync_version = out.split(" ", 4)[3] + + return rsync_version + + +def get_slv_dir_path(slv_host, slv_volume, gfid): + global slv_bricks + + dir_path = ENOENT + pfx = gauxpfx() + + if not slv_bricks: + slv_info = Volinfo(slv_volume, slv_host, master=False) + slv_bricks = slv_info.bricks + # Result of readlink would be of format as below. + # readlink = "../../pgfid[0:2]/pgfid[2:4]/pgfid/basename" + for brick in slv_bricks: + dir_path = errno_wrap(os.path.join, + [brick['dir'], + ".glusterfs", gfid[0:2], + gfid[2:4], + gfid], [ENOENT], [ESTALE]) + if dir_path != ENOENT: + try: + realpath = errno_wrap(os.readlink, [dir_path], + [ENOENT], [ESTALE]) + if not isinstance(realpath, int): + realpath_parts = realpath.split('/') + pargfid = realpath_parts[-2] + basename = realpath_parts[-1] + dir_entry = os.path.join(pfx, pargfid, basename) + return dir_entry + except OSError: + # .gfid/GFID + gfidpath = unescape_space_newline(os.path.join(pfx, gfid)) + realpath = errno_wrap(Xattr.lgetxattr_buf, + [gfidpath, 'glusterfs.gfid2path'], [ENOENT], [ESTALE]) + if not isinstance(realpath, int): + basename = os.path.basename(realpath).rstrip('\x00') + dirpath = os.path.dirname(realpath) + if dirpath == "/": + pargfid = ROOT_GFID + else: + dirpath = dirpath.strip("/") + pargfid = get_gfid_from_mnt(dirpath) + if isinstance(pargfid, int): + return None + dir_entry = os.path.join(pfx, pargfid, basename) + return dir_entry + + return None + + +def lf(event, **kwargs): + """ + Log Format helper function, log messages can be + easily modified to structured log format. + lf("Config Change", sync_jobs=4, brick=/bricks/b1) will be + converted as "Config Change [{brick=/bricks/b1}, {sync_jobs=4}]" + """ + msgparts = [] + for k, v in kwargs.items(): + msgparts.append("{%s=%s}" % (k, v)) + return "%s [%s]" % (event, ", ".join(msgparts)) + + +class Popen(subprocess.Popen): + + """customized subclass of subprocess.Popen with a ring + buffer for children error output""" + + @classmethod + def init_errhandler(cls): + """start the thread which handles children's error output""" + cls.errstore = {} + + def tailer(): + while True: + errstore = cls.errstore.copy() + try: + poe, _, _ = select( + [po.stderr for po in errstore], [], [], 1) + except (ValueError, SelectError): + # stderr is already closed wait for some time before + # checking next error + time.sleep(0.5) + continue + for po in errstore: + if po.stderr not in poe: + continue + po.lock.acquire() + try: + if po.on_death_row: + continue + la = errstore[po] + try: + fd = po.stderr.fileno() + except ValueError: # file is already closed + time.sleep(0.5) + continue + + try: + l = os.read(fd, 1024) + except OSError: + time.sleep(0.5) + continue + + if not l: + continue + tots = len(l) + for lx in la: + tots += len(lx) + while tots > 1 << 20 and la: + tots -= len(la.pop(0)) + la.append(l) + finally: + po.lock.release() + t = Thread(target=tailer) + t.start() + cls.errhandler = t + + @classmethod + def fork(cls): + """fork wrapper that restarts errhandler thread in child""" + pid = os.fork() + if not pid: + cls.init_errhandler() + return pid + + def __init__(self, args, *a, **kw): + """customizations for subprocess.Popen instantiation + + - 'close_fds' is taken to be the default + - if child's stderr is chosen to be managed, + register it with the error handler thread + """ + self.args = args + if 'close_fds' not in kw: + kw['close_fds'] = True + self.lock = threading.Lock() + self.on_death_row = False + self.elines = [] + try: + sup(self, args, *a, **kw) + except: + ex = sys.exc_info()[1] + if not isinstance(ex, OSError): raise - time.sleep(0.5) # retry the call + raise GsyncdError("""execution of "%s" failed with %s (%s)""" % + (args[0], errno.errorcode[ex.errno], + os.strerror(ex.errno))) + if kw.get('stderr') == subprocess.PIPE: + assert(getattr(self, 'errhandler', None)) + self.errstore[self] = [] + + def errlog(self): + """make a log about child's failure event""" + logging.error(lf("command returned error", + cmd=" ".join(self.args), + error=self.returncode)) + lp = '' + + def logerr(l): + logging.error(self.args[0] + "> " + l) + for l in self.elines: + ls = l.split('\n') + ls[0] = lp + ls[0] + lp = ls.pop() + for ll in ls: + logerr(ll) + if lp: + logerr(lp) + + def errfail(self): + """fail nicely if child did not terminate with success""" + self.errlog() + finalize(exval=1) + + def terminate_geterr(self, fail_on_err=True): + """kill child, finalize stderr harvesting (unregister + from errhandler, set up .elines), fail on error if + asked for + """ + self.lock.acquire() + try: + self.on_death_row = True + finally: + self.lock.release() + elines = self.errstore.pop(self) + if self.poll() is None: + self.terminate() + if self.poll() is None: + time.sleep(0.1) + self.kill() + self.wait() + while True: + if not select([self.stderr], [], [], 0.1)[0]: + break + b = os.read(self.stderr.fileno(), 1024) + if b: + elines.append(b.decode()) + else: + break + self.stderr.close() + self.elines = elines + if fail_on_err and self.returncode != 0: + self.errfail() + + +def host_brick_split(value): + """ + IPv6 compatible way to split and get the host + and brick information. Example inputs: + node1.example.com:/exports/bricks/brick1/brick + fe80::af0f:df82:844f:ef66%utun0:/exports/bricks/brick1/brick + """ + parts = value.split(":") + brick = parts[-1] + hostparts = parts[0:-1] + return (":".join(hostparts), brick) + + +class Volinfo(object): + + def __init__(self, vol, host='localhost', prelude=[], master=True): + if master: + gluster_cmd_dir = gconf.get("gluster-command-dir") + else: + gluster_cmd_dir = gconf.get("slave-gluster-command-dir") + + gluster_cmd = os.path.join(gluster_cmd_dir, 'gluster') + po = Popen(prelude + [gluster_cmd, '--xml', '--remote-host=' + host, + 'volume', 'info', vol], + stdout=PIPE, stderr=PIPE, universal_newlines=True) + vix = po.stdout.read() + po.wait() + po.terminate_geterr() + vi = XET.fromstring(vix) + if vi.find('opRet').text != '0': + if prelude: + via = '(via %s) ' % prelude.join(' ') + else: + via = ' ' + raise GsyncdError('getting volume info of %s%s ' + 'failed with errorcode %s' % + (vol, via, vi.find('opErrno').text)) + self.tree = vi + self.volume = vol + self.host = host + + def get(self, elem): + return self.tree.findall('.//' + elem) + + def is_tier(self): + return (self.get('typeStr')[0].text == 'Tier') + + def is_hot(self, brickpath): + logging.debug('brickpath: ' + repr(brickpath)) + return brickpath in self.hot_bricks + + @property + @memoize + def bricks(self): + def bparse(b): + host, dirp = host_brick_split(b.find("name").text) + return {'host': host, 'dir': dirp, 'uuid': b.find("hostUuid").text} + return [bparse(b) for b in self.get('brick')] + + @property + @memoize + def uuid(self): + ids = self.get('id') + if len(ids) != 1: + raise GsyncdError("volume info of %s obtained from %s: " + "ambiguous uuid" % (self.volume, self.host)) + return ids[0].text + + def replica_count(self, tier, hot): + if (tier and hot): + return int(self.get('hotBricks/hotreplicaCount')[0].text) + elif (tier and not hot): + return int(self.get('coldBricks/coldreplicaCount')[0].text) + else: + return int(self.get('replicaCount')[0].text) + + def disperse_count(self, tier, hot): + if (tier and hot): + # Tiering doesn't support disperse volume as hot brick, + # hence no xml output, so returning 0. In case, if it's + # supported later, we should change here. + return 0 + elif (tier and not hot): + return int(self.get('coldBricks/colddisperseCount')[0].text) + else: + return int(self.get('disperseCount')[0].text) + + def distribution_count(self, tier, hot): + if (tier and hot): + return int(self.get('hotBricks/hotdistCount')[0].text) + elif (tier and not hot): + return int(self.get('coldBricks/colddistCount')[0].text) + else: + return int(self.get('distCount')[0].text) + + @property + @memoize + def hot_bricks(self): + return [b.text for b in self.get('hotBricks/brick')] + + def get_hot_bricks_count(self, tier): + if (tier): + return int(self.get('hotBricks/hotbrickCount')[0].text) + else: + return 0 + + +class VolinfoFromGconf(object): + # Glusterd will generate following config items before Geo-rep start + # So that Geo-rep need not run gluster commands from inside + # Volinfo object API/interface kept as is so that caller need not + # change anything except calling this instead of Volinfo() + # + # master-bricks= + # master-bricks=NODEID:HOSTNAME:PATH,.. + # slave-bricks=NODEID:HOSTNAME,.. + # master-volume-id= + # slave-volume-id= + # master-replica-count= + # master-disperse_count= + def __init__(self, vol, host='localhost', master=True): + self.volume = vol + self.host = host + self.master = master + + def is_tier(self): + return False + + def is_hot(self, brickpath): + return False + + def is_uuid(self, value): + try: + uuid.UUID(value) + return True + except ValueError: + return False + + def possible_path(self, value): + return "/" in value + + @property + @memoize + def bricks(self): + pfx = "master-" if self.master else "slave-" + bricks_data = gconf.get(pfx + "bricks") + if bricks_data is None: + return [] + + bricks_data = bricks_data.split(",") + bricks_data = [b.strip() for b in bricks_data] + out = [] + for b in bricks_data: + parts = b.split(":") + b_uuid = None + if self.is_uuid(parts[0]): + b_uuid = parts[0] + # Set all parts except first + parts = parts[1:] + + if self.possible_path(parts[-1]): + bpath = parts[-1] + # Set all parts except last + parts = parts[0:-1] + + out.append({ + "host": ":".join(parts), # if remaining parts are IPv6 name + "dir": bpath, + "uuid": b_uuid + }) + + return out + + @property + @memoize + def uuid(self): + if self.master: + return gconf.get("master-volume-id") + else: + return gconf.get("slave-volume-id") + + def replica_count(self, tier, hot): + return gconf.get("master-replica-count") + + def disperse_count(self, tier, hot): + return gconf.get("master-disperse-count") + + def distribution_count(self, tier, hot): + return gconf.get("master-distribution-count") + + @property + @memoize + def hot_bricks(self): + return [] + + def get_hot_bricks_count(self, tier): + return 0 + + +def can_ssh(host, port=22): + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + try: + s.connect((host, port)) + flag = True + except socket.error: + flag = False + + s.close() + return flag + + +def get_up_nodes(hosts, port): + # List of hosts with Hostname/IP and UUID + up_nodes = [] + for h in hosts: + if can_ssh(h[0], port): + up_nodes.append(h) + + return up_nodes diff --git a/geo-replication/test-requirements.txt b/geo-replication/test-requirements.txt new file mode 100644 index 00000000000..d6165640d3f --- /dev/null +++ b/geo-replication/test-requirements.txt @@ -0,0 +1,7 @@ +# Hacking already pins down pep8, pyflakes and flake8 +flake8 +coverage +nose +nosexcover +nosehtmloutput +mock>=0.8.0 diff --git a/geo-replication/tests/__init__.py b/geo-replication/tests/__init__.py new file mode 100644 index 00000000000..b4648b69645 --- /dev/null +++ b/geo-replication/tests/__init__.py @@ -0,0 +1,9 @@ +# +# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com> +# This file is part of GlusterFS. + +# This file is licensed to you under your choice of the GNU Lesser +# General Public License, version 3 or any later version (LGPLv3 or +# later), or the GNU General Public License, version 2 (GPLv2), in all +# cases as published by the Free Software Foundation. +# diff --git a/geo-replication/tests/unit/__init__.py b/geo-replication/tests/unit/__init__.py new file mode 100644 index 00000000000..b4648b69645 --- /dev/null +++ b/geo-replication/tests/unit/__init__.py @@ -0,0 +1,9 @@ +# +# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com> +# This file is part of GlusterFS. + +# This file is licensed to you under your choice of the GNU Lesser +# General Public License, version 3 or any later version (LGPLv3 or +# later), or the GNU General Public License, version 2 (GPLv2), in all +# cases as published by the Free Software Foundation. +# diff --git a/geo-replication/tests/unit/test_gsyncdstatus.py b/geo-replication/tests/unit/test_gsyncdstatus.py new file mode 100755 index 00000000000..9c1aa2ad4ad --- /dev/null +++ b/geo-replication/tests/unit/test_gsyncdstatus.py @@ -0,0 +1,193 @@ +#!/usr/bin/python3 +# +# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com> +# This file is part of GlusterFS. + +# This file is licensed to you under your choice of the GNU Lesser +# General Public License, version 3 or any later version (LGPLv3 or +# later), or the GNU General Public License, version 2 (GPLv2), in all +# cases as published by the Free Software Foundation. +# + +import unittest +import os +import urllib + +from syncdaemon.gstatus import (GeorepStatus, set_monitor_status, + get_default_values, + MONITOR_STATUS, DEFAULT_STATUS, + STATUS_VALUES, CRAWL_STATUS_VALUES, + human_time, human_time_utc) + + +class GeorepStatusTestCase(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.work_dir = os.path.dirname(os.path.abspath(__file__)) + cls.monitor_status_file = os.path.join(cls.work_dir, "monitor.status") + cls.brick = "/exports/bricks/b1" + cls.status = GeorepStatus(cls.monitor_status_file, cls.brick) + cls.statusfile = os.path.join(cls.work_dir, + "brick_%s.status" + % urllib.quote_plus(cls.brick)) + + @classmethod + def tearDownClass(cls): + os.remove(cls.statusfile) + os.remove(cls.monitor_status_file) + + def _filter_dict(self, inp, keys): + op = {} + for k in keys: + op[k] = inp.get(k, None) + return op + + def test_monitor_status_file_created(self): + self.assertTrue(os.path.exists(self.monitor_status_file)) + + def test_status_file_created(self): + self.assertTrue(os.path.exists(self.statusfile)) + + def test_set_monitor_status(self): + for st in MONITOR_STATUS: + set_monitor_status(self.monitor_status_file, st) + self.assertTrue(self.status.get_monitor_status(), st) + + def test_default_values_test(self): + self.assertTrue(get_default_values(), { + "slave_node": DEFAULT_STATUS, + "worker_status": DEFAULT_STATUS, + "last_synced": 0, + "last_synced_utc": 0, + "crawl_status": DEFAULT_STATUS, + "entry": 0, + "data": 0, + "metadata": 0, + "failures": 0, + "checkpoint_completed": False, + "checkpoint_time": 0, + "checkpoint_time_utc": 0, + "checkpoint_completion_time": 0, + "checkpoint_completion_time_utc": 0 + }) + + def test_human_time(self): + self.assertTrue(human_time(1429174398), "2015-04-16 14:23:18") + + def test_human_time_utc(self): + self.assertTrue(human_time_utc(1429174398), "2015-04-16 08:53:18") + + def test_invalid_human_time(self): + self.assertTrue(human_time(142917439), DEFAULT_STATUS) + self.assertTrue(human_time("abcdef"), DEFAULT_STATUS) + + def test_invalid_human_time_utc(self): + self.assertTrue(human_time_utc(142917439), DEFAULT_STATUS) + self.assertTrue(human_time_utc("abcdef"), DEFAULT_STATUS) + + def test_worker_status(self): + set_monitor_status(self.monitor_status_file, "Started") + for st in STATUS_VALUES: + self.status.set_worker_status(st) + self.assertTrue(self.status.get_status()["worker_status"], st) + + def test_crawl_status(self): + set_monitor_status(self.monitor_status_file, "Started") + self.status.set_active() + for st in CRAWL_STATUS_VALUES: + self.status.set_worker_crawl_status(st) + self.assertTrue(self.status.get_status()["crawl_status"], st) + + def test_slave_node(self): + set_monitor_status(self.monitor_status_file, "Started") + self.status.set_active() + self.status.set_slave_node("fvm2") + self.assertTrue(self.status.get_status()["slave_node"], "fvm2") + + self.status.set_worker_status("Passive") + self.status.set_slave_node("fvm2") + self.assertTrue(self.status.get_status()["slave_node"], "fvm2") + + def test_active_worker_status(self): + set_monitor_status(self.monitor_status_file, "Started") + self.status.set_active() + self.assertTrue(self.status.get_status()["worker_status"], "Active") + + def test_passive_worker_status(self): + set_monitor_status(self.monitor_status_file, "Started") + self.status.set_passive() + self.assertTrue(self.status.get_status()["worker_status"], "Passive") + + def test_set_field(self): + set_monitor_status(self.monitor_status_file, "Started") + self.status.set_active() + self.status.set_field("entry", 42) + self.assertTrue(self.status.get_status()["entry"], 42) + + def test_inc_value(self): + set_monitor_status(self.monitor_status_file, "Started") + self.status.set_active() + self.status.set_field("entry", 0) + self.status.inc_value("entry", 2) + self.assertTrue(self.status.get_status()["entry"], 2) + + self.status.set_field("data", 0) + self.status.inc_value("data", 2) + self.assertTrue(self.status.get_status()["data"], 2) + + self.status.set_field("meta", 0) + self.status.inc_value("meta", 2) + self.assertTrue(self.status.get_status()["meta"], 2) + + self.status.set_field("failures", 0) + self.status.inc_value("failures", 2) + self.assertTrue(self.status.get_status()["failures"], 2) + + def test_dec_value(self): + set_monitor_status(self.monitor_status_file, "Started") + self.status.set_active() + + self.status.set_field("entry", 4) + self.status.inc_value("entry", 2) + self.assertTrue(self.status.get_status()["entry"], 2) + + self.status.set_field("data", 4) + self.status.inc_value("data", 2) + self.assertTrue(self.status.get_status()["data"], 2) + + self.status.set_field("meta", 4) + self.status.inc_value("meta", 2) + self.assertTrue(self.status.get_status()["meta"], 2) + + self.status.set_field("failures", 4) + self.status.inc_value("failures", 2) + self.assertTrue(self.status.get_status()["failures"], 2) + + def test_worker_status_when_monitor_status_created(self): + set_monitor_status(self.monitor_status_file, "Created") + for st in STATUS_VALUES: + self.status.set_worker_status(st) + self.assertTrue(self.status.get_status()["worker_status"], + "Created") + + def test_worker_status_when_monitor_status_paused(self): + set_monitor_status(self.monitor_status_file, "Paused") + for st in STATUS_VALUES: + self.status.set_worker_status(st) + self.assertTrue(self.status.get_status()["worker_status"], + "Paused") + + def test_worker_status_when_monitor_status_stopped(self): + set_monitor_status(self.monitor_status_file, "Stopped") + for st in STATUS_VALUES: + self.status.set_worker_status(st) + self.assertTrue(self.status.get_status()["worker_status"], + "Stopped") + + def test_status_when_worker_status_active(self): + set_monitor_status(self.monitor_status_file, "Started") + self.status.set_active() + + +if __name__ == "__main__": + unittest.main() diff --git a/geo-replication/tests/unit/test_syncdutils.py b/geo-replication/tests/unit/test_syncdutils.py new file mode 100644 index 00000000000..ff537ab2660 --- /dev/null +++ b/geo-replication/tests/unit/test_syncdutils.py @@ -0,0 +1,29 @@ +# +# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com> +# This file is part of GlusterFS. + +# This file is licensed to you under your choice of the GNU Lesser +# General Public License, version 3 or any later version (LGPLv3 or +# later), or the GNU General Public License, version 2 (GPLv2), in all +# cases as published by the Free Software Foundation. +# + +import unittest + +from syncdaemon import syncdutils + + +class SyncdutilsTestCase(unittest.TestCase): + def setUp(self): + pass + + def tearDown(self): + pass + + def test_escape(self): + self.assertEqual(syncdutils.escape("http://gluster.org"), + "http%3A%2F%2Fgluster.org") + + def test_unescape(self): + self.assertEqual(syncdutils.unescape("http%3A%2F%2Fgluster.org"), + "http://gluster.org") diff --git a/geo-replication/tox.ini b/geo-replication/tox.ini new file mode 100644 index 00000000000..57ff086b947 --- /dev/null +++ b/geo-replication/tox.ini @@ -0,0 +1,32 @@ +[tox] +envlist = py26,py27,pep8 + +[testenv] +whitelist_externals=bash +setenv = VIRTUAL_ENV={envdir} +deps = + --download-cache={homedir}/.pipcache + -r{toxinidir}/test-requirements.txt +changedir = {toxinidir}/tests/unit +commands = nosetests -v --exe --with-xunit --with-coverage --cover-package syncdaemon --cover-erase --cover-xml --cover-html --cover-branches --with-html-output {posargs} + +[tox:jenkins] +downloadcache = ~/cache/pip + +[testenv:pep8] +changedir = {toxinidir} +commands = + flake8 + flake8 syncdaemon tests + +[testenv:cover] +setenv = NOSE_WITH_COVERAGE=1 + +[testenv:venv] +commands = {posargs} + +[flake8] +ignore = H +builtins = _ +exclude = .venv,.tox,dist,doc,tests,*egg +show-source = True
\ No newline at end of file diff --git a/geo-replication/unittests.sh b/geo-replication/unittests.sh new file mode 100644 index 00000000000..d5dbd00bd4c --- /dev/null +++ b/geo-replication/unittests.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +cd $(dirname $0)/tests/unit +nosetests -v --exe --with-coverage --cover-package \ + syncdaemon --cover-erase --cover-html --cover-branches $@ + +saved_status=$? +rm -f .coverage +exit $saved_status |
