From c458433041aafb48ae6d6e5fcf3e1e737dc3fda3 Mon Sep 17 00:00:00 2001 From: Jeff Darcy Date: Mon, 8 Feb 2016 13:30:49 -0500 Subject: experimental: add fdl (Full Data Logging) translator NSR needs logging that is different than our existing changelog in several ways: * Full data, not just metadata * Pre-op, not post-op * High performance * Supports the concept of time-bounded "terms" Others (for example EC) might need the same thing. This patch adds such a translator. It also adds code to dump the resulting journals, and to replay them using syncops, plus (very rudimentary) tests for all of the above. Change-Id: I29680a1b4e0a9e7d5a8497fef302c46434b86636 Signed-off-by: Jeff Darcy Reviewed-on: http://review.gluster.org/12450 Smoke: Gluster Build System CentOS-regression: Gluster Build System NetBSD-regression: NetBSD Build System --- xlators/experimental/fdl/Makefile.am | 3 + xlators/experimental/fdl/src/Makefile.am | 42 +++ xlators/experimental/fdl/src/dump-tmpl.c | 156 +++++++++ xlators/experimental/fdl/src/fdl-tmpl.c | 506 +++++++++++++++++++++++++++++ xlators/experimental/fdl/src/gen_dumper.py | 116 +++++++ xlators/experimental/fdl/src/gen_fdl.py | 328 +++++++++++++++++++ xlators/experimental/fdl/src/gen_recon.py | 191 +++++++++++ xlators/experimental/fdl/src/jnl-types.h | 14 + xlators/experimental/fdl/src/logdump.c | 50 +++ xlators/experimental/fdl/src/recon-tmpl.c | 305 +++++++++++++++++ xlators/experimental/fdl/src/recon.c | 89 +++++ 11 files changed, 1800 insertions(+) create mode 100644 xlators/experimental/fdl/Makefile.am create mode 100644 xlators/experimental/fdl/src/Makefile.am create mode 100644 xlators/experimental/fdl/src/dump-tmpl.c create mode 100644 xlators/experimental/fdl/src/fdl-tmpl.c create mode 100755 xlators/experimental/fdl/src/gen_dumper.py create mode 100755 xlators/experimental/fdl/src/gen_fdl.py create mode 100755 xlators/experimental/fdl/src/gen_recon.py create mode 100644 xlators/experimental/fdl/src/jnl-types.h create mode 100644 xlators/experimental/fdl/src/logdump.c create mode 100644 xlators/experimental/fdl/src/recon-tmpl.c create mode 100644 xlators/experimental/fdl/src/recon.c (limited to 'xlators/experimental/fdl') diff --git a/xlators/experimental/fdl/Makefile.am b/xlators/experimental/fdl/Makefile.am new file mode 100644 index 00000000000..a985f42a877 --- /dev/null +++ b/xlators/experimental/fdl/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/experimental/fdl/src/Makefile.am b/xlators/experimental/fdl/src/Makefile.am new file mode 100644 index 00000000000..a05fc797b0a --- /dev/null +++ b/xlators/experimental/fdl/src/Makefile.am @@ -0,0 +1,42 @@ +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/experimental +xlator_LTLIBRARIES = fdl.la + +noinst_HEADERS = jnl-types.h + +nodist_fdl_la_SOURCES = fdl.c +fdl_la_LDFLAGS = -module -avoid-version +fdl_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +sbin_PROGRAMS = gf_logdump gf_recon +gf_logdump_SOURCES = logdump.c +nodist_gf_logdump_SOURCES = libfdl.c +gf_logdump_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la\ + $(top_builddir)/api/src/libgfapi.la + +# Eventually recon(ciliation) code will move elsewhere, but for now it's +# easier to have it next to the similar logdump code. +gf_recon_SOURCES = recon.c +nodist_gf_recon_SOURCES = librecon.c +gf_recon_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la\ + $(top_builddir)/api/src/libgfapi.la + +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ + -I$(top_srcdir)/api/src -fPIC \ + -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS) \ + -DDATADIR=\"$(localstatedir)\" + +AM_CFLAGS = -Wall $(GF_CFLAGS) + +noinst_PYTHON = gen_fdl.py gen_dumper.py gen_recon.py +EXTRA_DIST = fdl-tmpl.c dump-tmpl.c recon-tmpl.c + +CLEANFILES = $(nodist_fdl_la_SOURCES) $(nodist_gf_logdump_SOURCES) + +fdl.c: fdl-tmpl.c gen_fdl.py + $(PYTHON) $(srcdir)/gen_fdl.py $(srcdir)/fdl-tmpl.c > $@ + +libfdl.c: dump-tmpl.c gen_dumper.py + $(PYTHON) $(srcdir)/gen_dumper.py $(srcdir)/dump-tmpl.c > $@ + +librecon.c: recon-tmpl.c gen_recon.py + $(PYTHON) $(srcdir)/gen_recon.py $(srcdir)/recon-tmpl.c > $@ diff --git a/xlators/experimental/fdl/src/dump-tmpl.c b/xlators/experimental/fdl/src/dump-tmpl.c new file mode 100644 index 00000000000..cac1071a9c1 --- /dev/null +++ b/xlators/experimental/fdl/src/dump-tmpl.c @@ -0,0 +1,156 @@ +#pragma fragment PROLOG +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glfs.h" +#include "iatt.h" +#include "xlator.h" +#include "jnl-types.h" + +#pragma fragment DICT + { + int key_len, data_len; + char *key_ptr; + printf ("@ARGNAME@ = dict {\n"); + for (;;) { + key_len = *((int *)new_meta); + new_meta += sizeof(int); + if (!key_len) { + break; + } + key_ptr = new_meta; + new_meta += key_len; + data_len = *((int *)new_meta); + new_meta += sizeof(int) + data_len; + printf (" %s = <%d bytes>\n", key_ptr, data_len); + } + printf ("}\n"); + } + +#pragma fragment DOUBLE + printf ("@ARGNAME@ = @FORMAT@\n", *((uint64_t *)new_meta), + *((uint64_t *)new_meta)); + new_meta += sizeof(uint64_t); + +#pragma fragment GFID + printf ("@ARGNAME@ = \n", uuid_utoa(*((uuid_t *)new_meta))); + new_meta += 16; + +#pragma fragment INTEGER + printf ("@ARGNAME@ = @FORMAT@\n", *((uint32_t *)new_meta), + *((uint32_t *)new_meta)); + new_meta += sizeof(uint32_t); + +#pragma fragment LOC + printf ("@ARGNAME@ = loc {\n"); + printf (" gfid = %s\n", uuid_utoa(*((uuid_t *)new_meta))); + new_meta += 16; + printf (" pargfid = %s\n", uuid_utoa(*((uuid_t *)new_meta))); + new_meta += 16; + if (*(new_meta++)) { + printf (" name = %s\n", new_meta); + new_meta += (strlen(new_meta) + 1); + } + printf ("}\n"); + +#pragma fragment STRING + if (*(new_meta++)) { + printf ("@ARGNAME@ = %s\n", new_meta); + new_meta += (strlen(new_meta) + 1); + } + +#pragma fragment VECTOR + { + size_t len = *((size_t *)new_meta); + new_meta += sizeof(len); + printf ("@ARGNAME@ = <%zu bytes>\n", len); + new_data += len; + } + +#pragma fragment IATT + { + ia_prot_t *myprot = ((ia_prot_t *)new_meta); + printf ("@ARGNAME@ = iatt {\n"); + printf (" ia_prot = %c%c%c", + myprot->suid ? 'S' : '-', + myprot->sgid ? 'S' : '-', + myprot->sticky ? 'T' : '-'); + printf ("%c%c%c", + myprot->owner.read ? 'r' : '-', + myprot->owner.write ? 'w' : '-', + myprot->owner.exec ? 'x' : '-'); + printf ("%c%c%c", + myprot->group.read ? 'r' : '-', + myprot->group.write ? 'w' : '-', + myprot->group.exec ? 'x' : '-'); + printf ("%c%c%c\n", + myprot->other.read ? 'r' : '-', + myprot->other.write ? 'w' : '-', + myprot->other.exec ? 'x' : '-'); + new_meta += sizeof(ia_prot_t); + uint32_t *myints = (uint32_t *)new_meta; + printf (" ia_uid = %u\n", myints[0]); + printf (" ia_gid = %u\n", myints[1]); + printf (" ia_atime = %u.%09u\n", myints[2], myints[3]); + printf (" ia_mtime = %u.%09u\n", myints[4], myints[5]); + new_meta += sizeof(*myints) * 6; + } + +#pragma fragment FOP +void +fdl_dump_@NAME@ (char **old_meta, char **old_data) +{ + char *new_meta = *old_meta; + char *new_data = *old_data; + + /* TBD: word size/endianness */ +@FUNCTION_BODY@ + + *old_meta = new_meta; + *old_data = new_data; +} + +#pragma fragment CASE + case GF_FOP_@UPNAME@: + printf ("=== GF_FOP_@UPNAME@\n"); + fdl_dump_@NAME@ (&new_meta, &new_data); + break; + +#pragma fragment EPILOG +int +fdl_dump (char **old_meta, char **old_data) +{ + char *new_meta = *old_meta; + char *new_data = *old_data; + static glfs_t *fs = NULL; + int recognized = 1; + event_header_t *eh; + + /* + * We don't really call anything else in GFAPI, but this is the most + * convenient way to satisfy all of the spurious dependencies on how it + * or glusterfsd initialize (e.g. setting up THIS). + */ + if (!fs) { + fs = glfs_new ("dummy"); + } + + eh = (event_header_t *)new_meta; + new_meta += sizeof (*eh); + + /* TBD: check event_type instead of assuming NEW_REQUEST */ + + switch (eh->fop_type) { +@SWITCH_BODY@ + + default: + printf ("unknown fop %u\n", eh->fop_type); + recognized = 0; + } + + *old_meta = new_meta; + *old_data = new_data; + return recognized; +} diff --git a/xlators/experimental/fdl/src/fdl-tmpl.c b/xlators/experimental/fdl/src/fdl-tmpl.c new file mode 100644 index 00000000000..8fcc6a8d6ff --- /dev/null +++ b/xlators/experimental/fdl/src/fdl-tmpl.c @@ -0,0 +1,506 @@ +/* + Copyright (c) 2015 Red Hat, Inc. + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include "call-stub.h" +#include "iatt.h" +#include "defaults.h" +#include "syscall.h" +#include "xlator.h" +#include "jnl-types.h" + +/* TBD: make tunable */ +#define META_FILE_SIZE (1 << 20) +#define DATA_FILE_SIZE (1 << 24) + +enum gf_fdl { + gf_fdl_mt_fdl_private_t = gf_common_mt_end + 1, + gf_fdl_mt_end +}; + +typedef struct { + char *type; + off_t size; + char *path; + int fd; + void * ptr; + off_t max_offset; +} log_obj_t; + +typedef struct { + struct list_head reqs; + pthread_mutex_t req_lock; + pthread_cond_t req_cond; + char *log_dir; + pthread_t worker; + gf_boolean_t should_stop; + gf_boolean_t change_term; + log_obj_t meta_log; + log_obj_t data_log; + int term; + int first_term; +} fdl_private_t; + +void +fdl_enqueue (xlator_t *this, call_stub_t *stub) +{ + fdl_private_t *priv = this->private; + + pthread_mutex_lock (&priv->req_lock); + list_add_tail (&stub->list, &priv->reqs); + pthread_mutex_unlock (&priv->req_lock); + + pthread_cond_signal (&priv->req_cond); +} + +#pragma generate + +char * +fdl_open_term_log (xlator_t *this, log_obj_t *obj, int term) +{ + fdl_private_t *priv = this->private; + int ret; + char * ptr = NULL; + + /* + * Use .jnl instead of .log so that we don't get test info (mistakenly) + * appended to our journal files. + */ + if (this->ctx->cmd_args.log_ident) { + ret = gf_asprintf (&obj->path, "%s/%s-%s-%d.jnl", + priv->log_dir, this->ctx->cmd_args.log_ident, + obj->type, term); + } + else { + ret = gf_asprintf (&obj->path, "%s/fubar-%s-%d.jnl", + priv->log_dir, obj->type, term); + } + if ((ret <= 0) || !obj->path) { + gf_log (this->name, GF_LOG_ERROR, + "failed to construct log-file path"); + goto err; + } + + gf_log (this->name, GF_LOG_INFO, "opening %s (size %ld)", + obj->path, obj->size); + + obj->fd = open (obj->path, O_RDWR|O_CREAT|O_TRUNC, 0666); + if (obj->fd < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to open log file (%s)", strerror(errno)); + goto err; + } + +#if !defined(GF_BSD_HOST_OS) + /* + * NetBSD can just go die in a fire. Even though it claims to support + * fallocate/posix_fallocate they don't actually *do* anything so the + * file size remains zero. Then mmap succeeds anyway, but any access + * to the mmap'ed region will segfault. It would be acceptable for + * fallocate to do what it says, for mmap to fail, or for access to + * extend the file. NetBSD managed to hit the trifecta of Getting + * Everything Wrong, and debugging in that environment to get this far + * has already been painful enough (systems I worked on in 1990 were + * better that way). We'll fall through to the lseek/write method, and + * performance will be worse, and TOO BAD. + */ + if (sys_fallocate(obj->fd,0,0,obj->size) < 0) +#endif + { + gf_log (this->name, GF_LOG_WARNING, + "failed to fallocate space for log file"); + /* Have to do this the ugly page-faulty way. */ + (void) sys_lseek (obj->fd, obj->size-1, SEEK_SET); + (void) sys_write (obj->fd, "", 1); + } + + ptr = mmap (NULL, obj->size, PROT_WRITE, MAP_SHARED, obj->fd, 0); + if (ptr == MAP_FAILED) { + gf_log (this->name, GF_LOG_ERROR, "failed to mmap log (%s)", + strerror(errno)); + goto err; + } + + obj->ptr = ptr; + obj->max_offset = 0; + return ptr; + +err: + if (obj->fd >= 0) { + sys_close (obj->fd); + obj->fd = (-1); + } + if (obj->path) { + GF_FREE (obj->path); + obj->path = NULL; + } + return ptr; +} + +void +fdl_close_term_log (xlator_t *this, log_obj_t *obj) +{ + fdl_private_t *priv = this->private; + + if (obj->ptr) { + (void) munmap (obj->ptr, obj->size); + obj->ptr = NULL; + } + + if (obj->fd >= 0) { + gf_log (this->name, GF_LOG_INFO, + "truncating term %d %s journal to %ld", + priv->term, obj->type, obj->max_offset); + if (sys_ftruncate(obj->fd,obj->max_offset) < 0) { + gf_log (this->name, GF_LOG_WARNING, + "failed to truncate journal (%s)", + strerror(errno)); + } + sys_close (obj->fd); + obj->fd = (-1); + } + + if (obj->path) { + GF_FREE (obj->path); + obj->path = NULL; + } +} + +gf_boolean_t +fdl_change_term (xlator_t *this, char **meta_ptr, char **data_ptr) +{ + fdl_private_t *priv = this->private; + + fdl_close_term_log (this, &priv->meta_log); + fdl_close_term_log (this, &priv->data_log); + + ++(priv->term); + + *meta_ptr = fdl_open_term_log (this, &priv->meta_log, priv->term); + if (!*meta_ptr) { + return _gf_false; + } + + *data_ptr = fdl_open_term_log (this, &priv->data_log, priv->term); + if (!*data_ptr) { + return _gf_false; + } + + return _gf_true; +} + +void * +fdl_worker (void *arg) +{ + xlator_t *this = arg; + fdl_private_t *priv = this->private; + call_stub_t *stub; + char * meta_ptr = NULL; + off_t *meta_offset = &priv->meta_log.max_offset; + char * data_ptr = NULL; + off_t *data_offset = &priv->data_log.max_offset; + unsigned long base_as_ul; + void * msync_ptr; + size_t msync_len; + gf_boolean_t recycle; + void *err_label = &&err_unlocked; + + priv->meta_log.type = "meta"; + priv->meta_log.size = META_FILE_SIZE; + priv->meta_log.path = NULL; + priv->meta_log.fd = (-1); + priv->meta_log.ptr = NULL; + + priv->data_log.type = "data"; + priv->data_log.size = DATA_FILE_SIZE; + priv->data_log.path = NULL; + priv->data_log.fd = (-1); + priv->data_log.ptr = NULL; + + /* TBD: initial term should come from persistent storage (e.g. etcd) */ + priv->first_term = ++(priv->term); + meta_ptr = fdl_open_term_log (this, &priv->meta_log, priv->term); + if (!meta_ptr) { + goto *err_label; + } + data_ptr = fdl_open_term_log (this, &priv->data_log, priv->term); + if (!data_ptr) { + fdl_close_term_log (this, &priv->meta_log); + goto *err_label; + } + + for (;;) { + pthread_mutex_lock (&priv->req_lock); + err_label = &&err_locked; + while (list_empty(&priv->reqs)) { + pthread_cond_wait (&priv->req_cond, &priv->req_lock); + if (priv->should_stop) { + goto *err_label; + } + if (priv->change_term) { + if (!fdl_change_term(this, &meta_ptr, + &data_ptr)) { + goto *err_label; + } + priv->change_term = _gf_false; + continue; + } + } + stub = list_entry (priv->reqs.next, call_stub_t, list); + list_del_init (&stub->list); + pthread_mutex_unlock (&priv->req_lock); + err_label = &&err_unlocked; + /* + * TBD: batch requests + * + * What we should do here is gather up *all* of the requests + * that have accumulated since we were last at this point, + * blast them all out in one big writev, and then dispatch them + * all before coming back for more. That maximizes throughput, + * at some cost to latency (due to queuing effects at the log + * stage). Note that we're likely to be above io-threads, so + * the dispatch itself will be parallelized (at further cost to + * latency). For now, we just do the simplest thing and handle + * one request all the way through before fetching the next. + * + * So, why mmap/msync instead of writev/fdatasync? Because it's + * faster. Much faster. So much faster that I half-suspect + * cheating, but it's more convenient for now than having to + * ensure that everything's page-aligned for O_DIRECT (the only + * alternative that still might avoid ridiculous levels of + * local-FS overhead). + * + * TBD: check that msync really does get our data to disk. + */ + gf_log (this->name, GF_LOG_DEBUG, + "logging %u+%u bytes for op %d", + stub->jnl_meta_len, stub->jnl_data_len, stub->fop); + recycle = _gf_false; + if ((*meta_offset + stub->jnl_meta_len) > priv->meta_log.size) { + recycle = _gf_true; + } + if ((*data_offset + stub->jnl_data_len) > priv->data_log.size) { + recycle = _gf_true; + } + if (recycle && !fdl_change_term(this,&meta_ptr,&data_ptr)) { + goto *err_label; + } + meta_ptr = priv->meta_log.ptr; + data_ptr = priv->data_log.ptr; + gf_log (this->name, GF_LOG_DEBUG, "serializing to %p/%p", + meta_ptr + *meta_offset, data_ptr + *data_offset); + stub->serialize (stub, meta_ptr + *meta_offset, + data_ptr + *data_offset); + if (stub->jnl_meta_len > 0) { + base_as_ul = (unsigned long) (meta_ptr + *meta_offset); + msync_ptr = (void *) (base_as_ul & ~0x0fff); + msync_len = (size_t) (base_as_ul & 0x0fff); + if (msync (msync_ptr, msync_len+stub->jnl_meta_len, + MS_SYNC) < 0) { + gf_log (this->name, GF_LOG_WARNING, + "failed to log request meta (%s)", + strerror(errno)); + } + *meta_offset += stub->jnl_meta_len; + } + if (stub->jnl_data_len > 0) { + base_as_ul = (unsigned long) (data_ptr + *data_offset); + msync_ptr = (void *) (base_as_ul & ~0x0fff); + msync_len = (size_t) (base_as_ul & 0x0fff); + if (msync (msync_ptr, msync_len+stub->jnl_data_len, + MS_SYNC) < 0) { + gf_log (this->name, GF_LOG_WARNING, + "failed to log request data (%s)", + strerror(errno)); + } + *data_offset += stub->jnl_data_len; + } + call_resume (stub); + } + +err_locked: + pthread_mutex_unlock (&priv->req_lock); +err_unlocked: + fdl_close_term_log (this, &priv->meta_log); + fdl_close_term_log (this, &priv->data_log); + return NULL; +} + +int32_t +fdl_ipc (call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata) +{ + fdl_private_t *priv = this->private; + dict_t *tdict; + int32_t gt_err = EIO; + + switch (op) { + + case FDL_IPC_CHANGE_TERM: + gf_log (this->name, GF_LOG_INFO, "got CHANGE_TERM op"); + priv->change_term = _gf_true; + pthread_cond_signal (&priv->req_cond); + STACK_UNWIND_STRICT (ipc, frame, 0, 0, NULL); + break; + + case FDL_IPC_GET_TERMS: + gf_log (this->name, GF_LOG_INFO, "got GET_TERMS op"); + tdict = dict_new (); + if (!tdict) { + gt_err = ENOMEM; + goto gt_done; + } + if (dict_set_int32(tdict,"first",priv->first_term) != 0) { + goto gt_done; + } + if (dict_set_int32(tdict,"last",priv->term) != 0) { + goto gt_done; + } + gt_err = 0; + gt_done: + if (gt_err) { + STACK_UNWIND_STRICT (ipc, frame, -1, gt_err, NULL); + } else { + STACK_UNWIND_STRICT (ipc, frame, 0, 0, tdict); + } + if (tdict) { + dict_unref (tdict); + } + break; + + default: + STACK_WIND_TAIL (frame, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ipc, + op, xdata); + } + + return 0; +} + +int +fdl_init (xlator_t *this) +{ + fdl_private_t *priv = NULL; + + priv = GF_CALLOC (1, sizeof (*priv), gf_fdl_mt_fdl_private_t); + if (!priv) { + gf_log (this->name, GF_LOG_ERROR, + "failed to allocate fdl_private"); + goto err; + } + + INIT_LIST_HEAD (&priv->reqs); + if (pthread_mutex_init (&priv->req_lock, NULL) != 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to initialize req_lock"); + goto err; + } + if (pthread_cond_init (&priv->req_cond, NULL) != 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to initialize req_cond"); + goto err; + } + + GF_OPTION_INIT ("log-path", priv->log_dir, path, err); + + if (pthread_create(&priv->worker,NULL,fdl_worker,this) != 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to start fdl_worker"); + goto err; + } + + /* + * The rest of the fop table is automatically generated, so this is a + * bit cleaner than messing with the generation to add a hand-written + * exception. + */ + this->fops->ipc = fdl_ipc; + + this->private = priv; + return 0; + +err: + if (priv) { + GF_FREE(priv); + } + return -1; +} + +void +fdl_fini (xlator_t *this) +{ + fdl_private_t *priv = this->private; + + if (priv) { + priv->should_stop = _gf_true; + pthread_cond_signal (&priv->req_cond); + pthread_join (priv->worker, NULL); + GF_FREE(priv); + } +} + +int +fdl_reconfigure (xlator_t *this, dict_t *options) +{ + fdl_private_t *priv = this->private; + + GF_OPTION_RECONF ("log_dir", priv->log_dir, options, path, out); + /* TBD: react if it changed */ + +out: + return 0; +} + +int32_t +mem_acct_init (xlator_t *this) +{ + int ret = -1; + + GF_VALIDATE_OR_GOTO ("fdl", this, out); + + ret = xlator_mem_acct_init (this, gf_fdl_mt_end + 1); + + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, "Memory accounting init" + "failed"); + return ret; + } +out: + return ret; +} + +class_methods_t class_methods = { + .init = fdl_init, + .fini = fdl_fini, + .reconfigure = fdl_reconfigure, + .notify = default_notify, +}; + +struct volume_options options[] = { + { .key = {"log-path"}, + .type = GF_OPTION_TYPE_PATH, + .default_value = DEFAULT_LOG_FILE_DIRECTORY, + .description = "Directory for FDL files." + }, + { .key = {NULL} }, +}; + +struct xlator_cbks cbks = { + .release = default_release, + .releasedir = default_releasedir, + .forget = default_forget, +}; diff --git a/xlators/experimental/fdl/src/gen_dumper.py b/xlators/experimental/fdl/src/gen_dumper.py new file mode 100755 index 00000000000..42db55d2cb3 --- /dev/null +++ b/xlators/experimental/fdl/src/gen_dumper.py @@ -0,0 +1,116 @@ +#!/usr/bin/python + +import os +import re +import sys + +curdir = os.path.dirname (sys.argv[0]) +gendir = os.path.join (curdir, '../../../../libglusterfs/src') +sys.path.append (gendir) +from generator import ops, fop_subs, cbk_subs, generate + +# See the big header comment at the start of gen_fdl.py to see how the stages +# fit together. The big difference here is that *all* of the C code is in the +# template file as labelled fragments, instead of as Python strings. That +# makes it much easier to edit in one place, with proper syntax highlighting +# and indentation. +# +# Stage 1 uses type-specific fragments to generate FUNCTION_BODY, instead of +# LEN_*_TEMPLATE and SERLZ_*_TEMPLATE to generate LEN_CODE and SER_CODE. +# +# Stage 2 uses the FOP and CASE fragments instead of RECON_TEMPLATE and +# FOP_TEMPLATE. The expanded FOP code (including FUNCTION_BODY substitution +# in the middle of each function) is emitted immediately; the expanded CASE +# code is saved for the next stage. +# +# Stage 3 uses the PROLOG and EPILOG fragments, with the expanded CASE code +# in the middle of EPILOG, to generate the whole output file. +# +# Another way of looking at it is to consider how the fragments appear in +# the final output: +# +# PROLOG +# FOP (expanded for CREATE) +# FOP before FUNCTION_BODY +# LOC, INTEGER, GFID, etc. (one per arg, by type) +# FOP after FUNCTION_BODY +# FOP (expanded for WRITEV) +# FOP before FUNCTION_BODY +# GFID, VECTOR, etc. (on per arg, by type) +# FOP after FUNCTION_BODY +# (more FOPs) +# EPILOG +# EPILOG before CASE +# CASE statements (one per fop) +# EPILOG after CASE + +typemap = { + 'dict_t *': ( "DICT", ""), + 'fd_t *': ( "GFID", ""), + 'dev_t': ( "DOUBLE", "%ld (0x%lx)"), + 'gf_xattrop_flags_t': ( "INTEGER", "%d (0x%x)"), + 'int32_t': ( "INTEGER", "%d (0x%x)"), + 'mode_t': ( "INTEGER", "%d (0x%x)"), + 'off_t': ( "DOUBLE", "%ld (0x%lx)"), + 'size_t': ( "DOUBLE", "%ld (0x%lx)"), + 'uint32_t': ( "INTEGER", "%d (0x%x)"), + 'loc_t *': ( "LOC", ""), + 'const char *': ( "STRING", ""), + 'struct iovec *': ( "VECTOR", ""), + 'struct iatt *': ( "IATT", ""), +} + +def get_special_subs (args): + code = "" + for arg in args: + if (arg[0] != 'fop-arg') or (len(arg) < 4): + continue + recon_type, recon_fmt = typemap[arg[2]] + code += fragments[recon_type].replace("@ARGNAME@",arg[3]) \ + .replace("@FORMAT@",recon_fmt) + return code + +def gen_functions (): + code = "" + for name, value in ops.iteritems(): + if "journal" not in [ x[0] for x in value ]: + continue + fop_subs[name]["@FUNCTION_BODY@"] = get_special_subs(value) + # Print the FOP fragment with @FUNCTION_BODY@ in the middle. + code += generate(fragments["FOP"],name,fop_subs) + return code + +def gen_cases (): + code = "" + for name, value in ops.iteritems(): + if "journal" not in [ x[0] for x in value ]: + continue + # Add the CASE fragment for this fop. + code += generate(fragments["CASE"],name,fop_subs) + return code + +def load_fragments (path="recon-tmpl.c"): + pragma_re = re.compile('pragma fragment (.*)') + cur_symbol = None + cur_value = "" + result = {} + for line in open(path,"r").readlines(): + m = pragma_re.search(line) + if m: + if cur_symbol: + result[cur_symbol] = cur_value + cur_symbol = m.group(1) + cur_value = "" + else: + cur_value += line + if cur_symbol: + result[cur_symbol] = cur_value + return result + +if __name__ == "__main__": + fragments = load_fragments(sys.argv[1]) + print "/* BEGIN GENERATED CODE - DO NOT MODIFY */" + print fragments["PROLOG"] + print gen_functions() + print fragments["EPILOG"].replace("@SWITCH_BODY@",gen_cases()) + print "/* END GENERATED CODE */" diff --git a/xlators/experimental/fdl/src/gen_fdl.py b/xlators/experimental/fdl/src/gen_fdl.py new file mode 100755 index 00000000000..7f6b1aaaeaa --- /dev/null +++ b/xlators/experimental/fdl/src/gen_fdl.py @@ -0,0 +1,328 @@ +#!/usr/bin/python + +import os +import sys + +curdir = os.path.dirname (sys.argv[0]) +gendir = os.path.join (curdir, '../../../../libglusterfs/src') +sys.path.append (gendir) +from generator import ops, fop_subs, cbk_subs, generate + +# Generation occurs in three stages. In this case, it actually makes more +# sense to discuss them in the *opposite* order of that in which they +# actually happen. +# +# Stage 3 is to insert all of the generated code into a file, replacing the +# "#pragma generate" that's already there. The file can thus contain all +# sorts of stuff that's not specific to one fop, either before or after the +# generated code as appropriate. +# +# Stage 2 is to generate all of the code *for a particular fop*, using a +# string-valued template plus a table of substitution values. Most of these +# are built in to the generator itself. However, we also add a couple that +# are specific to this particular translator - LEN_CODE and SER_CODE. These +# are per-fop functions to get the length or the contents (respectively) of +# what we'll put in the log. As with stage 3 allowing per-file boilerplate +# before and after generated code, this allows per-fop boilerplate before and +# after generated code. +# +# Stage 1, therefore, is to create the LEN_CODE and SER_CODE substitutions for +# each fop, and put them in the same table where e.g. NAME and SHORT_ARGS +# already are. We do this by looking at the fop-description table in the +# generator module, then doing out own template substitution to plug each +# specific argument name into another string-valued template. +# +# So, what does this leave us with in terms of variables and files? +# +# For stage 1, we have a series of LEN_*_TEMPLATE and SERLZ_*_TEMPLATE +# strings, which are used to generate the length and serialization code for +# each argument type. +# +# For stage 2, we have a bunch of *_TEMPLATE strings (no LEN_ or SERLZ_ +# prefix), which are used (along with the output from stage 1) to generate +# whole functions. +# +# For stage 3, we have a whole separate file (fdl_tmpl.c) into which we insert +# the collection of all functions defined in stage 2. + + +LEN_TEMPLATE = """ +void +fdl_len_@NAME@ (call_stub_t *stub) +{ + uint32_t meta_len = sizeof (event_header_t); + uint32_t data_len = 0; + + /* TBD: global stuff, e.g. uid/gid */ +@LEN_CODE@ + + /* TBD: pad extension length */ + stub->jnl_meta_len = meta_len; + stub->jnl_data_len = data_len; +} +""" + +SER_TEMPLATE = """ +void +fdl_serialize_@NAME@ (call_stub_t *stub, char *meta_buf, char *data_buf) +{ + event_header_t *eh; + unsigned long offset = 0; + + /* TBD: word size/endianness */ + eh = (event_header_t *)meta_buf; + eh->event_type = NEW_REQUEST; + eh->fop_type = GF_FOP_@UPNAME@; + eh->request_id = 0; // TBD + meta_buf += sizeof (*eh); +@SER_CODE@ + /* TBD: pad extension length */ + eh->ext_length = offset; +} +""" + +CBK_TEMPLATE = """ +int32_t +fdl_@NAME@_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + @LONG_ARGS@) +{ + STACK_UNWIND_STRICT (@NAME@, frame, op_ret, op_errno, + @SHORT_ARGS@); + return 0; +} +""" + +CONTINUE_TEMPLATE = """ +int32_t +fdl_@NAME@_continue (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@) +{ + STACK_WIND (frame, fdl_@NAME@_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->@NAME@, + @SHORT_ARGS@); + return 0; +} + +""" + +FOP_TEMPLATE = """ +int32_t +fdl_@NAME@ (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@) +{ + call_stub_t *stub; + + stub = fop_@NAME@_stub (frame, default_@NAME@, + @SHORT_ARGS@); + fdl_len_@NAME@ (stub); + stub->serialize = fdl_serialize_@NAME@; + fdl_enqueue (this, stub); + + return 0; +} +""" + +LEN_DICT_TEMPLATE = """ + if (@SRC@) { + data_pair_t *memb; + for (memb = @SRC@->members_list; memb; memb = memb->next) { + meta_len += sizeof(int); + meta_len += strlen(memb->key) + 1; + meta_len += sizeof(int); + meta_len += memb->value->len; + } + } + meta_len += sizeof(int); +""" + +LEN_GFID_TEMPLATE = """ + meta_len += 16; +""" + +LEN_INTEGER_TEMPLATE = """ + meta_len += sizeof (@SRC@); +""" + +# 16 for gfid, 16 for pargfid, 1 for flag, 0/1 for terminating NUL +LEN_LOC_TEMPLATE = """ + if (@SRC@.name) { + meta_len += (strlen (@SRC@.name) + 34); + } else { + meta_len += 33; + } +""" + +LEN_STRING_TEMPLATE = """ + if (@SRC@) { + meta_len += (strlen (@SRC@) + 1); + } else { + meta_len += 1; + } +""" + +LEN_VECTOR_TEMPLATE = """ + meta_len += sizeof(size_t); + data_len += iov_length (@VEC@, @CNT@); +""" + +LEN_IATT_TEMPLATE = """ + meta_len += sizeof(@SRC@.ia_prot); + meta_len += sizeof(@SRC@.ia_uid); + meta_len += sizeof(@SRC@.ia_gid); + meta_len += sizeof(@SRC@.ia_atime); + meta_len += sizeof(@SRC@.ia_atime_nsec); + meta_len += sizeof(@SRC@.ia_mtime); + meta_len += sizeof(@SRC@.ia_mtime_nsec); +""" + +SERLZ_DICT_TEMPLATE = """ + if (@SRC@) { + data_pair_t *memb; + for (memb = @SRC@->members_list; memb; memb = memb->next) { + *((int *)(meta_buf+offset)) = strlen(memb->key) + 1; + offset += sizeof(int); + strcpy (meta_buf+offset, memb->key); + offset += strlen(memb->key) + 1; + *((int *)(meta_buf+offset)) = memb->value->len; + offset += sizeof(int); + memcpy (meta_buf+offset, memb->value->data, memb->value->len); + offset += memb->value->len; + } + } + *((int *)(meta_buf+offset)) = 0; + offset += sizeof(int); +""" + +SERLZ_GFID_TEMPLATE = """ + memcpy (meta_buf+offset, @SRC@->inode->gfid, 16); + offset += 16; +""" + +SERLZ_INTEGER_TEMPLATE = """ + memcpy (meta_buf+offset, &@SRC@, sizeof(@SRC@)); + offset += sizeof(@SRC@); +""" + +SERLZ_LOC_TEMPLATE = """ + memcpy (meta_buf+offset, @SRC@.gfid, 16); + offset += 16; + memcpy (meta_buf+offset, @SRC@.pargfid, 16); + offset += 16; + if (@SRC@.name) { + *(meta_buf+offset) = 1; + ++offset; + strcpy (meta_buf+offset, @SRC@.name); + offset += (strlen (@SRC@.name) + 1); + } else { + *(meta_buf+offset) = 0; + ++offset; + } +""" + +SERLZ_STRING_TEMPLATE = """ + if (@SRC@) { + *(meta_buf+offset) = 1; + ++offset; + strcpy (meta_buf+offset, @SRC@); + offset += strlen(@SRC@); + } else { + *(meta_buf+offset) = 0; + ++offset; + } +""" + +SERLZ_VECTOR_TEMPLATE = """ + *((size_t *)(meta_buf+offset)) = iov_length (@VEC@, @CNT@); + offset += sizeof(size_t); + int32_t i; + for (i = 0; i < @CNT@; ++i) { + memcpy (data_buf, @VEC@[i].iov_base, @VEC@[i].iov_len); + data_buf += @VEC@[i].iov_len; + } +""" + +# We don't need to save all of the fields - only those affected by chown, +# chgrp, chmod, and utime. +SERLZ_IATT_TEMPLATE = """ + *((ia_prot_t *)(meta_buf+offset)) = @SRC@.ia_prot; + offset += sizeof(@SRC@.ia_prot); + *((uint32_t *)(meta_buf+offset)) = @SRC@.ia_uid; + offset += sizeof(@SRC@.ia_uid); + *((uint32_t *)(meta_buf+offset)) = @SRC@.ia_gid; + offset += sizeof(@SRC@.ia_gid); + *((uint32_t *)(meta_buf+offset)) = @SRC@.ia_atime; + offset += sizeof(@SRC@.ia_atime); + *((uint32_t *)(meta_buf+offset)) = @SRC@.ia_atime_nsec; + offset += sizeof(@SRC@.ia_atime_nsec); + *((uint32_t *)(meta_buf+offset)) = @SRC@.ia_mtime; + offset += sizeof(@SRC@.ia_mtime); + *((uint32_t *)(meta_buf+offset)) = @SRC@.ia_mtime_nsec; + offset += sizeof(@SRC@.ia_mtime_nsec); +""" + +typemap = { + 'dict_t *': ( LEN_DICT_TEMPLATE, SERLZ_DICT_TEMPLATE), + 'fd_t *': ( LEN_GFID_TEMPLATE, SERLZ_GFID_TEMPLATE), + 'dev_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE), + 'gf_xattrop_flags_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE), + 'int32_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE), + 'mode_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE), + 'off_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE), + 'size_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE), + 'uint32_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE), + 'loc_t *': ( LEN_LOC_TEMPLATE, SERLZ_LOC_TEMPLATE), + 'const char *': ( LEN_STRING_TEMPLATE, SERLZ_STRING_TEMPLATE), + 'struct iatt *': ( LEN_IATT_TEMPLATE, SERLZ_IATT_TEMPLATE), +} + +def get_special_subs (args): + len_code = "" + ser_code = "" + for arg in args: + if (arg[0] != 'fop-arg') or (len(arg) < 4): + continue + # Let this throw an exception if we get an unknown field name. The + # broken build will remind whoever messed with the stub code that a + # corresponding update is needed here. + if arg[3] == "vector": + # Make it as obvious as possible that this is a special case. + len_code += LEN_VECTOR_TEMPLATE \ + .replace("@VEC@","stub->args.vector") \ + .replace("@CNT@","stub->args.count") + ser_code += SERLZ_VECTOR_TEMPLATE \ + .replace("@VEC@","stub->args.vector") \ + .replace("@CNT@","stub->args.count") + else: + len_tmpl, ser_tmpl = typemap[arg[2]] + src = "stub->args.%s" % arg[3] + len_code += len_tmpl.replace("@SRC@",src) + ser_code += ser_tmpl.replace("@SRC@",src) + return len_code, ser_code + +def gen_fdl (): + entrypoints = [] + for name, value in ops.iteritems(): + if "journal" not in [ x[0] for x in value ]: + continue + len_code, ser_code = get_special_subs(value) + fop_subs[name]["@LEN_CODE@"] = len_code[:-1] + fop_subs[name]["@SER_CODE@"] = ser_code[:-1] + print generate(LEN_TEMPLATE,name,fop_subs) + print generate(SER_TEMPLATE,name,fop_subs) + print generate(CBK_TEMPLATE,name,cbk_subs) + print generate(CONTINUE_TEMPLATE,name,fop_subs) + print generate(FOP_TEMPLATE,name,fop_subs) + entrypoints.append(name) + print "struct xlator_fops fops = {" + for ep in entrypoints: + print "\t.%s = fdl_%s," % (ep, ep) + print "};" + +for l in open(sys.argv[1],'r').readlines(): + if l.find('#pragma generate') != -1: + print "/* BEGIN GENERATED CODE - DO NOT MODIFY */" + gen_fdl() + print "/* END GENERATED CODE */" + else: + print l[:-1] diff --git a/xlators/experimental/fdl/src/gen_recon.py b/xlators/experimental/fdl/src/gen_recon.py new file mode 100755 index 00000000000..26318f92d88 --- /dev/null +++ b/xlators/experimental/fdl/src/gen_recon.py @@ -0,0 +1,191 @@ +#!/usr/bin/python + +import os +import re +import string +import sys + +curdir = os.path.dirname (sys.argv[0]) +gendir = os.path.join (curdir, '../../../../libglusterfs/src') +sys.path.append (gendir) +from generator import ops, fop_subs, cbk_subs, generate + +# See the big header comment at the start of gen_fdl.py to see how the stages +# fit together. The big difference here is that *all* of the C code is in the +# template file as labelled fragments, instead of as Python strings. That +# makes it much easier to edit in one place, with proper syntax highlighting +# and indentation. +# +# Stage 1 uses type-specific fragments to generate FUNCTION_BODY, instead of +# LEN_*_TEMPLATE and SERLZ_*_TEMPLATE to generate LEN_CODE and SER_CODE. +# +# Stage 2 uses the FOP and CASE fragments instead of RECON_TEMPLATE and +# FOP_TEMPLATE. The expanded FOP code (including FUNCTION_BODY substitution +# in the middle of each function) is emitted immediately; the expanded CASE +# code is saved for the next stage. +# +# Stage 3 uses the PROLOG and EPILOG fragments, with the expanded CASE code +# in the middle of EPILOG, to generate the whole output file. +# +# Another way of looking at it is to consider how the fragments appear in +# the final output: +# +# PROLOG +# FOP (expanded for CREATE) +# FOP before FUNCTION_BODY +# LOC, INTEGER, GFID, etc. (one per arg, by type) +# FOP after FUNCTION_BODY +# FOP (expanded for WRITEV) +# FOP before FUNCTION_BODY +# GFID, VECTOR, etc. (one per arg, by type) +# FOP after FUNCTION_BODY +# (more FOPs) +# EPILOG +# EPILOG before CASE +# CASE statements (one per fop) +# EPILOG after CASE + +typemap = { + 'dict_t *': "DICT", + 'fd_t *': "FD", + 'dev_t': "DOUBLE", + 'gf_xattrop_flags_t': "INTEGER", + 'int32_t': "INTEGER", + 'mode_t': "INTEGER", + 'off_t': "DOUBLE", + 'size_t': "DOUBLE", + 'uint32_t': "INTEGER", + 'loc_t *': "LOC", + 'const char *': "STRING", + 'struct iovec *': "VECTOR", + 'struct iatt *': "IATT", + 'struct iobref *': "IOBREF", +} + +def get_special_subs (name, args, fop_type): + code = "" + cleanups = "" + links = "" + s_args = [] + for arg in args: + if arg[0] == 'extra': + code += "\t%s %s;\n\n" % (arg[2], arg[1]) + s_args.append(arg[3]) + continue + if arg[0] == 'link': + links += fragments["LINK"].replace("@INODE_ARG@",arg[1]) \ + .replace("@IATT_ARG@",arg[2]) + continue + if arg[0] != 'fop-arg': + continue + if (name, arg[1]) == ('writev', 'count'): + # Special case: just skip this. We can't mark it as 'nosync' + # because of the way the translator and dumper generators look for + # that after 'stub-name' which we don't define. Instead of adding a + # bunch of generic infrastructure for this one case, just pound it + # here. + continue + recon_type = typemap[arg[2]] + # print "/* %s.%s => %s (%s)*/" % (name, arg[1], recon_type, fop_type) + if (name == "create") and (arg[1] == "fd"): + # Special case: fd for create is new, not looked up. + # print "/* change to NEW_FD */" + recon_type = "NEW_FD" + elif (recon_type == "LOC") and (fop_type == "entry-op"): + # Need to treat this differently for inode vs. entry ops. + # Special case: link source is treated like inode-op. + if (name != "link") or (arg[1] != "oldloc"): + # print "/* change to PARENT_LOC */" + recon_type = "PARENT_LOC" + code += fragments[recon_type].replace("@ARGNAME@",arg[1]) \ + .replace("@ARGTYPE@",arg[2]) + cleanup_key = recon_type + "_CLEANUP" + if fragments.has_key(cleanup_key): + cleanups += fragments[cleanup_key].replace("@ARGNAME@",arg[1]) + if 'nosync' in arg[4:]: + code += "\t(void)%s;\n" % arg[1]; + continue + if arg[2] in ("loc_t *", "struct iatt *"): + # These are passed as pointers to the syncop, but they're actual + # structures in the generated code. + s_args.append("&"+arg[1]); + else: + s_args.append(arg[1]) + # We have to handle a couple of special cases here, because some n00b + # defined the syncops with a different argument order than the fops they're + # based on. + if name == 'writev': + # Swap 'flags' and 'iobref'. Also, we need to add the iov count, which + # is not stored in or read from the journal. There are other ways to + # do that, but this is the only place we need anything similar and we + # already have to treat it as a special case so this is simplest. + s_args_str = 'fd, &vector, 1, off, iobref, flags, xdata' + elif name == 'symlink': + # Swap 'linkpath' and 'loc'. + s_args_str = '&loc, linkpath, &iatt, xdata' + else: + s_args_str = string.join (s_args, ", ") + return code, links, s_args_str, cleanups + +# TBD: probably need to generate type-specific cleanup code as well - e.g. +# fd_unref for an fd_t, loc_wipe for a loc_t, and so on. All of these +# generated CLEANUP fragments will go at the end of the function, with goto +# labels. Meanwhile, the error-checking part of each type-specific fragment +# (e.g. LOC or FD) will need to update the indirect label that we jump to when +# an error is detected. This will probably get messy. +def gen_functions (): + code = "" + for name, value in ops.iteritems(): + fop_type = [ x[1] for x in value if x[0] == "journal" ] + if not fop_type: + continue + body, links, syncop_args, cleanups = get_special_subs (name, value, + fop_type[0]) + fop_subs[name]["@FUNCTION_BODY@"] = body + fop_subs[name]["@LINKS@"] = links + fop_subs[name]["@SYNCOP_ARGS@"] = syncop_args + fop_subs[name]["@CLEANUPS@"] = cleanups + if name == "writev": + # Take advantage of the fact that, *during reconciliation*, the + # vector is always a single element. In normal I/O it's not. + fop_subs[name]["@SUCCESS_VALUE@"] = "vector.iov_len" + else: + fop_subs[name]["@SUCCESS_VALUE@"] = "GFAPI_SUCCESS" + # Print the FOP fragment with @FUNCTION_BODY@ in the middle. + code += generate(fragments["FOP"],name,fop_subs) + return code + +def gen_cases (): + code = "" + for name, value in ops.iteritems(): + if "journal" not in [ x[0] for x in value ]: + continue + # Add the CASE fragment for this fop. + code += generate(fragments["CASE"],name,fop_subs) + return code + +def load_fragments (path="recon-tmpl.c"): + pragma_re = re.compile('pragma fragment (.*)') + cur_symbol = None + cur_value = "" + result = {} + for line in open(path,"r").readlines(): + m = pragma_re.search(line) + if m: + if cur_symbol: + result[cur_symbol] = cur_value + cur_symbol = m.group(1) + cur_value = "" + else: + cur_value += line + if cur_symbol: + result[cur_symbol] = cur_value + return result + +if __name__ == "__main__": + fragments = load_fragments(sys.argv[1]) + print "/* BEGIN GENERATED CODE - DO NOT MODIFY */" + print fragments["PROLOG"] + print gen_functions() + print fragments["EPILOG"].replace("@SWITCH_BODY@",gen_cases()) + print "/* END GENERATED CODE */" diff --git a/xlators/experimental/fdl/src/jnl-types.h b/xlators/experimental/fdl/src/jnl-types.h new file mode 100644 index 00000000000..8cb39d01a25 --- /dev/null +++ b/xlators/experimental/fdl/src/jnl-types.h @@ -0,0 +1,14 @@ +#define NEW_REQUEST (uint8_t)'N' + +typedef struct { + uint8_t event_type; /* e.g. NEW_REQUEST */ + uint8_t fop_type; /* e.g. GF_FOP_SETATTR */ + uint16_t request_id; + uint32_t ext_length; +} event_header_t; + +enum { + FDL_IPC_BASE = 0xfeedbee5, /* ... and they make honey */ + FDL_IPC_CHANGE_TERM, + FDL_IPC_GET_TERMS, +}; diff --git a/xlators/experimental/fdl/src/logdump.c b/xlators/experimental/fdl/src/logdump.c new file mode 100644 index 00000000000..7c979c32a04 --- /dev/null +++ b/xlators/experimental/fdl/src/logdump.c @@ -0,0 +1,50 @@ +#include +#include +#include +#include +#include + +extern int fdl_dump (char **, char **); + +int +main (int argc, char **argv) +{ + int meta_fd = (-1); + char *meta_buf = NULL; + int data_fd = (-1); + char *data_buf = NULL; + + meta_fd = open (argv[1], O_RDONLY); + if (meta_fd < 0) { + perror ("open"); + return EXIT_FAILURE; + } + + /* TBD: get proper length */ + meta_buf = mmap (NULL, 1048576, PROT_READ, MAP_PRIVATE, meta_fd, 0); + if (meta_buf == MAP_FAILED) { + perror ("mmap"); + return EXIT_FAILURE; + } + + data_fd = open (argv[2], O_RDONLY); + if (data_fd < 0) { + perror ("open"); + return EXIT_FAILURE; + } + + /* TBD: get proper length */ + data_buf = mmap (NULL, 1048576, PROT_READ, MAP_PRIVATE, data_fd, 0); + if (data_buf == MAP_FAILED) { + perror ("mmap"); + return EXIT_FAILURE; + } + + for (;;) { + if (!fdl_dump(&meta_buf,&data_buf)) { + break; + } + } + + return EXIT_SUCCESS; +} diff --git a/xlators/experimental/fdl/src/recon-tmpl.c b/xlators/experimental/fdl/src/recon-tmpl.c new file mode 100644 index 00000000000..523bda39418 --- /dev/null +++ b/xlators/experimental/fdl/src/recon-tmpl.c @@ -0,0 +1,305 @@ +#pragma fragment PROLOG +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "fd.h" +#include "iatt.h" +#include "syncop.h" +#include "xlator.h" +#include "glfs-internal.h" + +#include "jnl-types.h" + +#define GFAPI_SUCCESS 0 + +inode_t * +recon_get_inode (glfs_t *fs, uuid_t gfid) +{ + inode_t *inode; + loc_t loc = {NULL,}; + struct iatt iatt; + int ret; + inode_t *newinode; + + inode = inode_find (fs->active_subvol->itable, gfid); + if (inode) { + printf ("=== FOUND %s IN TABLE\n", uuid_utoa(gfid)); + return inode; + } + + loc.inode = inode_new (fs->active_subvol->itable); + if (!loc.inode) { + return NULL; + } + gf_uuid_copy (loc.inode->gfid, gfid); + gf_uuid_copy (loc.gfid, gfid); + + printf ("=== DOING LOOKUP FOR %s\n", uuid_utoa(gfid)); + + ret = syncop_lookup (fs->active_subvol, &loc, &iatt, + NULL, NULL, NULL); + if (ret != GFAPI_SUCCESS) { + fprintf (stderr, "syncop_lookup failed (%d)\n", ret); + return NULL; + } + + newinode = inode_link (loc.inode, NULL, NULL, &iatt); + if (newinode) { + inode_lookup (newinode); + } + + return newinode; +} + +#pragma fragment DICT + dict_t *@ARGNAME@; + + @ARGNAME@ = dict_new(); + if (!@ARGNAME@) { + goto *err_label; + } + err_label = &&cleanup_@ARGNAME@; + + { + int key_len, data_len; + char *key_ptr; + int garbage; + for (;;) { + key_len = *((int *)new_meta); + new_meta += sizeof(int); + if (!key_len) { + break; + } + key_ptr = new_meta; + new_meta += key_len; + data_len = *((int *)new_meta); + new_meta += sizeof(int); + garbage = dict_set_static_bin (@ARGNAME@, key_ptr, + new_meta, data_len); + /* TBD: check error from dict_set_static_bin */ + (void)garbage; + new_meta += data_len; + } + } + +#pragma fragment DICT_CLEANUP +cleanup_@ARGNAME@: + dict_unref (@ARGNAME@); + +#pragma fragment DOUBLE + @ARGTYPE@ @ARGNAME@ = *((@ARGTYPE@ *)new_meta); + new_meta += sizeof(uint64_t); + +#pragma fragment FD + inode_t *@ARGNAME@_ino; + fd_t *@ARGNAME@; + + @ARGNAME@_ino = recon_get_inode (fs, *((uuid_t *)new_meta)); + new_meta += 16; + if (!@ARGNAME@_ino) { + goto *err_label; + } + err_label = &&cleanup_@ARGNAME@_ino; + + @ARGNAME@ = fd_anonymous (@ARGNAME@_ino); + if (!@ARGNAME@) { + goto *err_label; + } + err_label = &&cleanup_@ARGNAME@; + +#pragma fragment FD_CLEANUP +cleanup_@ARGNAME@: + fd_unref (@ARGNAME@); +cleanup_@ARGNAME@_ino: + inode_unref (@ARGNAME@_ino); + +#pragma fragment NEW_FD + /* + * This pseudo-type is only used for create, and in that case we know + * we'll be using loc.inode, so it's not worth generalizing to take an + * extra argument. + */ + fd_t *@ARGNAME@ = fd_anonymous (loc.inode); + + if (!fd) { + goto *err_label; + } + err_label = &&cleanup_@ARGNAME@; + new_meta += 16; + +#pragma fragment NEW_FD_CLEANUP +cleanup_@ARGNAME@: + fd_unref (@ARGNAME@); + +#pragma fragment INTEGER + @ARGTYPE@ @ARGNAME@ = *((@ARGTYPE@ *)new_meta); + + new_meta += sizeof(@ARGTYPE@); + +#pragma fragment LOC + loc_t @ARGNAME@ = { NULL, }; + + @ARGNAME@.inode = recon_get_inode (fs, *((uuid_t *)new_meta)); + if (!@ARGNAME@.inode) { + goto *err_label; + } + err_label = &&cleanup_@ARGNAME@; + gf_uuid_copy (@ARGNAME@.gfid, @ARGNAME@.inode->gfid); + new_meta += 16; + new_meta += 16; /* skip over pargfid */ + if (*(new_meta++)) { + @ARGNAME@.name = new_meta; + new_meta += strlen(new_meta) + 1; + } + +#pragma fragment LOC_CLEANUP +cleanup_@ARGNAME@: + loc_wipe (&@ARGNAME@); + +#pragma fragment PARENT_LOC + loc_t @ARGNAME@ = { NULL, }; + + new_meta += 16; /* skip over gfid */ + @ARGNAME@.parent = recon_get_inode (fs, *((uuid_t *)new_meta)); + if (!@ARGNAME@.parent) { + goto *err_label; + } + err_label = &&cleanup_@ARGNAME@; + gf_uuid_copy (@ARGNAME@.pargfid, @ARGNAME@.parent->gfid); + new_meta += 16; + if (!*(new_meta++)) { + goto *err_label; + } + @ARGNAME@.name = new_meta; + new_meta += strlen(new_meta) + 1; + + @ARGNAME@.inode = inode_new (fs->active_subvol->itable); + if (!@ARGNAME@.inode) { + goto *err_label; + } + +#pragma fragment PARENT_LOC_CLEANUP +cleanup_@ARGNAME@: + loc_wipe (&@ARGNAME@); + +#pragma fragment STRING + char *@ARGNAME@; + if (*(new_meta++)) { + @ARGNAME@ = new_meta; + new_meta += (strlen(new_meta) + 1); + } + else { + goto *err_label; + } + +#pragma fragment VECTOR + struct iovec @ARGNAME@; + + @ARGNAME@.iov_len = *((size_t *)new_meta); + new_meta += sizeof(@ARGNAME@.iov_len); + @ARGNAME@.iov_base = new_data; + new_data += @ARGNAME@.iov_len; + +#pragma fragment IATT + struct iatt @ARGNAME@; + { + @ARGNAME@.ia_prot = *((ia_prot_t *)new_meta); + new_meta += sizeof(ia_prot_t); + uint32_t *myints = (uint32_t *)new_meta; + @ARGNAME@.ia_uid = myints[0]; + @ARGNAME@.ia_gid = myints[1]; + @ARGNAME@.ia_atime = myints[2]; + @ARGNAME@.ia_atime_nsec = myints[3]; + @ARGNAME@.ia_mtime = myints[4]; + @ARGNAME@.ia_mtime_nsec = myints[5]; + new_meta += sizeof(*myints) * 6; + } + +#pragma fragment IOBREF + struct iobref *@ARGNAME@; + + @ARGNAME@ = iobref_new(); + if (!@ARGNAME@) { + goto *err_label; + } + err_label = &&cleanup_@ARGNAME@; + +#pragma fragment IOBREF_CLEANUP +cleanup_@ARGNAME@: + iobref_unref (@ARGNAME@); + +#pragma fragment LINK + /* TBD: check error */ + inode_t *new_inode = inode_link (@INODE_ARG@, NULL, NULL, @IATT_ARG@); + if (new_inode) { + inode_lookup (new_inode); + } + +#pragma fragment FOP +int +fdl_replay_@NAME@ (glfs_t *fs, char **old_meta, char **old_data) +{ + char *new_meta = *old_meta; + char *new_data = *old_data; + int ret; + int status = 0xbad; + void *err_label = &&done; + +@FUNCTION_BODY@ + + ret = syncop_@NAME@ (fs->active_subvol, @SYNCOP_ARGS@, NULL); + if (ret != @SUCCESS_VALUE@) { + fprintf (stderr, "syncop_@NAME@ returned %d", ret); + goto *err_label; + } + +@LINKS@ + + status = 0; + +@CLEANUPS@ + +done: + *old_meta = new_meta; + *old_data = new_data; + return status; +} + +#pragma fragment CASE + case GF_FOP_@UPNAME@: + printf ("=== GF_FOP_@UPNAME@\n"); + if (fdl_replay_@NAME@ (fs, &new_meta, &new_data) != 0) { + goto done; + } + recognized = 1; + break; + +#pragma fragment EPILOG +int +recon_execute (glfs_t *fs, char **old_meta, char **old_data) +{ + char *new_meta = *old_meta; + char *new_data = *old_data; + int recognized = 0; + event_header_t *eh; + + eh = (event_header_t *)new_meta; + new_meta += sizeof (*eh); + + /* TBD: check event_type instead of assuming NEW_REQUEST */ + + switch (eh->fop_type) { +@SWITCH_BODY@ + + default: + printf ("unknown fop %u\n", eh->fop_type); + } + +done: + *old_meta = new_meta; + *old_data = new_data; + return recognized; +} diff --git a/xlators/experimental/fdl/src/recon.c b/xlators/experimental/fdl/src/recon.c new file mode 100644 index 00000000000..14168a011e0 --- /dev/null +++ b/xlators/experimental/fdl/src/recon.c @@ -0,0 +1,89 @@ +#include +#include +#include +#include +#include + +#include "glusterfs.h" +#include "fd.h" +#include "syncop.h" +#include "glfs-internal.h" + +#define GFAPI_SUCCESS 0 + +extern int recon_execute (glfs_t *, char **, char **); + +int +main (int argc, char **argv) +{ + glfs_t *fs; + int ret; + int meta_fd = (-1); + char *meta_buf = NULL; + int data_fd = (-1); + char *data_buf = NULL; + + fs = glfs_new ("whocares"); + if (!fs) { + fprintf (stderr, "glfs_new failed\n"); + return EXIT_FAILURE; + } + + if (getenv("RECON_DEBUG")) { + ret = glfs_set_logging (fs, "/dev/stderr", 7); + } + else { + ret = glfs_set_logging (fs, "/dev/null", 0); + } + + if (ret != GFAPI_SUCCESS) { + fprintf (stderr, "glfs_set_logging failed (%d)\n", errno); + return EXIT_FAILURE; + } + + ret = glfs_set_volfile (fs, argv[1]); + if (ret != GFAPI_SUCCESS) { + fprintf (stderr, "glfs_set_volfile failed (%d)\n", errno); + return EXIT_FAILURE; + } + + ret = glfs_init (fs); + if (ret != GFAPI_SUCCESS) { + fprintf (stderr, "glfs_init failed (%d)\n", errno); + return EXIT_FAILURE; + } + + meta_fd = open (argv[2], O_RDONLY); + if (meta_fd < 0) { + perror ("open"); + return EXIT_FAILURE; + } + + /* TBD: get proper length */ + meta_buf = mmap (NULL, 1048576, PROT_READ, MAP_PRIVATE, meta_fd, 0); + if (meta_buf == MAP_FAILED) { + perror ("mmap"); + return EXIT_FAILURE; + } + + data_fd = open (argv[3], O_RDONLY); + if (data_fd < 0) { + perror ("open"); + return EXIT_FAILURE; + } + + /* TBD: get proper length */ + data_buf = mmap (NULL, 1048576, PROT_READ, MAP_PRIVATE, data_fd, 0); + if (data_buf == MAP_FAILED) { + perror ("mmap"); + return EXIT_FAILURE; + } + + for (;;) { + if (!recon_execute(fs,&meta_buf,&data_buf)) { + break; + } + } + + return EXIT_SUCCESS; +} -- cgit