diff options
| -rw-r--r-- | xlators/experimental/fdl/src/Makefile.am | 14 | ||||
| -rw-r--r-- | xlators/experimental/fdl/src/dump-tmpl.c | 187 | ||||
| -rw-r--r-- | xlators/experimental/fdl/src/dump-tmpl.c.in | 177 | ||||
| -rw-r--r-- | xlators/experimental/fdl/src/fdl-tmpl.c | 536 | ||||
| -rw-r--r-- | xlators/experimental/fdl/src/fdl-tmpl.c.in | 512 | ||||
| -rw-r--r-- | xlators/experimental/fdl/src/recon-tmpl.c | 304 | ||||
| -rw-r--r-- | xlators/experimental/fdl/src/recon-tmpl.c.in | 297 | ||||
| -rw-r--r-- | xlators/experimental/jbr-client/src/Makefile.am | 2 | ||||
| -rw-r--r-- | xlators/experimental/jbr-client/src/fop-template.c | 113 | ||||
| -rw-r--r-- | xlators/experimental/jbr-client/src/fop-template.c.in | 102 | ||||
| -rw-r--r-- | xlators/experimental/jbr-server/src/Makefile.am | 2 | ||||
| -rw-r--r-- | xlators/experimental/jbr-server/src/all-templates.c | 542 | ||||
| -rw-r--r-- | xlators/experimental/jbr-server/src/all-templates.c.in | 501 | 
13 files changed, 1598 insertions, 1691 deletions
diff --git a/xlators/experimental/fdl/src/Makefile.am b/xlators/experimental/fdl/src/Makefile.am index f39978c3930..bdcaaf6c38d 100644 --- a/xlators/experimental/fdl/src/Makefile.am +++ b/xlators/experimental/fdl/src/Makefile.am @@ -33,16 +33,16 @@ AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \  AM_CFLAGS = -Wall $(GF_CFLAGS)  noinst_PYTHON = gen_fdl.py gen_dumper.py gen_recon.py -EXTRA_DIST = fdl-tmpl.c dump-tmpl.c recon-tmpl.c +EXTRA_DIST = fdl-tmpl.c.in dump-tmpl.c.in recon-tmpl.c.in  CLEANFILES = $(nodist_fdl_la_SOURCES) $(nodist_gf_logdump_SOURCES) \  	$(nodist_gf_recon_SOURCES) -fdl.c: fdl-tmpl.c gen_fdl.py -	$(PYTHON) $(srcdir)/gen_fdl.py $(srcdir)/fdl-tmpl.c > $@ +fdl.c: fdl-tmpl.c.in gen_fdl.py +	$(PYTHON) $(srcdir)/gen_fdl.py $(srcdir)/fdl-tmpl.c.in > $@ -libfdl.c: dump-tmpl.c gen_dumper.py -	$(PYTHON) $(srcdir)/gen_dumper.py $(srcdir)/dump-tmpl.c > $@ +libfdl.c: dump-tmpl.c.in gen_dumper.py +	$(PYTHON) $(srcdir)/gen_dumper.py $(srcdir)/dump-tmpl.c.in > $@ -librecon.c: recon-tmpl.c gen_recon.py -	$(PYTHON) $(srcdir)/gen_recon.py $(srcdir)/recon-tmpl.c > $@ +librecon.c: recon-tmpl.c.in gen_recon.py +	$(PYTHON) $(srcdir)/gen_recon.py $(srcdir)/recon-tmpl.c.in > $@ diff --git a/xlators/experimental/fdl/src/dump-tmpl.c b/xlators/experimental/fdl/src/dump-tmpl.c deleted file mode 100644 index 32b0fef6af3..00000000000 --- a/xlators/experimental/fdl/src/dump-tmpl.c +++ /dev/null @@ -1,187 +0,0 @@ -#pragma fragment PROLOG -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#include <ctype.h> -#endif - -#include "glfs.h" -#include "iatt.h" -#include "xlator.h" -#include "fdl.h" - -/* - * Returns 0 if the string is ASCII printable * - * and -1 if it's not ASCII printable         * - */ -int str_isprint (char *s) -{ -        int ret = -1; - -        if (!s) -                goto out; - -        while (s[0] != '\0') { -                if (!isprint(s[0])) -                        goto out; -                else -                        s++; -        } - -        ret = 0; -out: -        return ret; -} - -#pragma fragment DICT -        { -                int key_len, data_len; -                char *key_ptr; -                char *key_val; -                printf ("@ARGNAME@ = dict {\n"); -                for (;;) { -                        key_len = *((int *)new_meta); -                        new_meta += sizeof(int); -                        if (!key_len) { -                                break; -                        } -                        key_ptr = new_meta; -                        new_meta += key_len; -                        data_len = *((int *)new_meta); -                        key_val = new_meta + sizeof(int); -                        new_meta += sizeof(int) + data_len; -                        if (str_isprint(key_val)) -                                printf (" %s = <%d bytes>\n", -                                        key_ptr, data_len); -                        else -                                printf (" %s = %s <%d bytes>\n", -                                        key_ptr, key_val, data_len); -                } -                printf ("}\n"); -        } - -#pragma fragment DOUBLE -        printf ("@ARGNAME@ = @FORMAT@\n", *((uint64_t *)new_meta), -                *((uint64_t *)new_meta)); -        new_meta += sizeof(uint64_t); - -#pragma fragment GFID -        printf ("@ARGNAME@ = <gfid %s>\n", uuid_utoa(*((uuid_t *)new_meta))); -        new_meta += 16; - -#pragma fragment INTEGER -        printf ("@ARGNAME@ = @FORMAT@\n", *((uint32_t *)new_meta), -                *((uint32_t *)new_meta)); -        new_meta += sizeof(uint32_t); - -#pragma fragment LOC -        printf ("@ARGNAME@ = loc {\n"); -        printf ("  gfid = %s\n", uuid_utoa(*((uuid_t *)new_meta))); -        new_meta += 16; -        printf ("  pargfid = %s\n", uuid_utoa(*((uuid_t *)new_meta))); -        new_meta += 16; -        if (*(new_meta++)) { -                printf ("  name = %s\n", new_meta); -                new_meta += (strlen(new_meta) + 1); -        } -        printf ("}\n"); - -#pragma fragment STRING -        if (*(new_meta++)) { -                printf ("@ARGNAME@ = %s\n", new_meta); -                new_meta += (strlen(new_meta) + 1); -        } - -#pragma fragment VECTOR -        { -                size_t len = *((size_t *)new_meta); -                new_meta += sizeof(len); -                printf ("@ARGNAME@ = <%zu bytes>\n", len); -                new_data += len; -        } - -#pragma fragment IATT -        { -                ia_prot_t *myprot = ((ia_prot_t *)new_meta); -                printf ("@ARGNAME@ = iatt {\n"); -                printf ("  ia_prot = %c%c%c", -                        myprot->suid ? 'S' : '-', -                        myprot->sgid ? 'S' : '-', -                        myprot->sticky ? 'T' : '-'); -                printf ("%c%c%c", -                        myprot->owner.read ? 'r' : '-', -                        myprot->owner.write ? 'w' : '-', -                        myprot->owner.exec ? 'x' : '-'); -                printf ("%c%c%c", -                        myprot->group.read ? 'r' : '-', -                        myprot->group.write ? 'w' : '-', -                        myprot->group.exec ? 'x' : '-'); -                printf ("%c%c%c\n", -                        myprot->other.read ? 'r' : '-', -                        myprot->other.write ? 'w' : '-', -                        myprot->other.exec ? 'x' : '-'); -                new_meta += sizeof(ia_prot_t); -                uint32_t *myints = (uint32_t *)new_meta; -                printf ("  ia_uid = %u\n", myints[0]); -                printf ("  ia_gid = %u\n", myints[1]); -                printf ("  ia_atime = %u.%09u\n", myints[2], myints[3]); -                printf ("  ia_mtime = %u.%09u\n", myints[4], myints[5]); -                new_meta += sizeof(*myints) * 6; -        } - -#pragma fragment FOP -void -fdl_dump_@NAME@ (char **old_meta, char **old_data) -{ -        char    *new_meta	= *old_meta; -        char	*new_data	= *old_data; - -        /* TBD: word size/endianness */ -@FUNCTION_BODY@ - -        *old_meta = new_meta; -        *old_data = new_data; -} - -#pragma fragment CASE -        case GF_FOP_@UPNAME@: -                printf ("=== GF_FOP_@UPNAME@\n"); -                fdl_dump_@NAME@ (&new_meta, &new_data); -                break; - -#pragma fragment EPILOG -int -fdl_dump (char **old_meta, char **old_data) -{ -        char            *new_meta       = *old_meta; -        char            *new_data       = *old_data; -        static glfs_t   *fs             = NULL; -        int             recognized      = 1; -        event_header_t  *eh; - -        /* -         * We don't really call anything else in GFAPI, but this is the most -         * convenient way to satisfy all of the spurious dependencies on how it -         * or glusterfsd initialize (e.g. setting up THIS). -         */ -        if (!fs) { -                fs = glfs_new ("dummy"); -        } - -        eh = (event_header_t *)new_meta; -        new_meta += sizeof (*eh); - -        /* TBD: check event_type instead of assuming NEW_REQUEST */ - -        switch (eh->fop_type) { -@SWITCH_BODY@ - -        default: -                printf ("unknown fop %u\n", eh->fop_type); -                recognized = 0; -        } - -        *old_meta = new_meta; -        *old_data = new_data; -        return recognized; -} diff --git a/xlators/experimental/fdl/src/dump-tmpl.c.in b/xlators/experimental/fdl/src/dump-tmpl.c.in new file mode 100644 index 00000000000..97249ac3e71 --- /dev/null +++ b/xlators/experimental/fdl/src/dump-tmpl.c.in @@ -0,0 +1,177 @@ +#pragma fragment PROLOG +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#include <ctype.h> +#endif + +#include "glfs.h" +#include "iatt.h" +#include "xlator.h" +#include "fdl.h" + +/* + * Returns 0 if the string is ASCII printable * + * and -1 if it's not ASCII printable         * + */ +int +str_isprint(char *s) +{ +    int ret = -1; + +    if (!s) +        goto out; + +    while (s[0] != '\0') { +        if (!isprint(s[0])) +            goto out; +        else +            s++; +    } + +    ret = 0; +out: +    return ret; +} + +#pragma fragment DICT +{ +    int key_len, data_len; +    char *key_ptr; +    char *key_val; +    printf("@ARGNAME@ = dict {\n"); +    for (;;) { +        key_len = *((int *)new_meta); +        new_meta += sizeof(int); +        if (!key_len) { +            break; +        } +        key_ptr = new_meta; +        new_meta += key_len; +        data_len = *((int *)new_meta); +        key_val = new_meta + sizeof(int); +        new_meta += sizeof(int) + data_len; +        if (str_isprint(key_val)) +            printf(" %s = <%d bytes>\n", key_ptr, data_len); +        else +            printf(" %s = %s <%d bytes>\n", key_ptr, key_val, data_len); +    } +    printf("}\n"); +} + +#pragma fragment DOUBLE +printf("@ARGNAME@ = @FORMAT@\n", *((uint64_t *)new_meta), +       *((uint64_t *)new_meta)); +new_meta += sizeof(uint64_t); + +#pragma fragment GFID +printf("@ARGNAME@ = <gfid %s>\n", uuid_utoa(*((uuid_t *)new_meta))); +new_meta += 16; + +#pragma fragment INTEGER +printf("@ARGNAME@ = @FORMAT@\n", *((uint32_t *)new_meta), +       *((uint32_t *)new_meta)); +new_meta += sizeof(uint32_t); + +#pragma fragment LOC +printf("@ARGNAME@ = loc {\n"); +printf("  gfid = %s\n", uuid_utoa(*((uuid_t *)new_meta))); +new_meta += 16; +printf("  pargfid = %s\n", uuid_utoa(*((uuid_t *)new_meta))); +new_meta += 16; +if (*(new_meta++)) { +    printf("  name = %s\n", new_meta); +    new_meta += (strlen(new_meta) + 1); +} +printf("}\n"); + +#pragma fragment STRING +if (*(new_meta++)) { +    printf("@ARGNAME@ = %s\n", new_meta); +    new_meta += (strlen(new_meta) + 1); +} + +#pragma fragment VECTOR +{ +    size_t len = *((size_t *)new_meta); +    new_meta += sizeof(len); +    printf("@ARGNAME@ = <%zu bytes>\n", len); +    new_data += len; +} + +#pragma fragment IATT +{ +    ia_prot_t *myprot = ((ia_prot_t *)new_meta); +    printf("@ARGNAME@ = iatt {\n"); +    printf("  ia_prot = %c%c%c", myprot->suid ? 'S' : '-', +           myprot->sgid ? 'S' : '-', myprot->sticky ? 'T' : '-'); +    printf("%c%c%c", myprot->owner.read ? 'r' : '-', +           myprot->owner.write ? 'w' : '-', myprot->owner.exec ? 'x' : '-'); +    printf("%c%c%c", myprot->group.read ? 'r' : '-', +           myprot->group.write ? 'w' : '-', myprot->group.exec ? 'x' : '-'); +    printf("%c%c%c\n", myprot->other.read ? 'r' : '-', +           myprot->other.write ? 'w' : '-', myprot->other.exec ? 'x' : '-'); +    new_meta += sizeof(ia_prot_t); +    uint32_t *myints = (uint32_t *)new_meta; +    printf("  ia_uid = %u\n", myints[0]); +    printf("  ia_gid = %u\n", myints[1]); +    printf("  ia_atime = %u.%09u\n", myints[2], myints[3]); +    printf("  ia_mtime = %u.%09u\n", myints[4], myints[5]); +    new_meta += sizeof(*myints) * 6; +} + +#pragma fragment FOP +void fdl_dump_@NAME@(char **old_meta, char **old_data) +{ +    char *new_meta = *old_meta; +    char *new_data = *old_data; + +    /* TBD: word size/endianness */ +    @FUNCTION_BODY@ + +    *old_meta = new_meta; +    *old_data = new_data; +} + +#pragma fragment CASE +case GF_FOP_@UPNAME@: +    printf("=== GF_FOP_@UPNAME@\n"); +    fdl_dump_@NAME@(&new_meta, &new_data); +    break; + +#pragma fragment EPILOG +    int +    fdl_dump(char **old_meta, char **old_data) +    { +        char *new_meta = *old_meta; +        char *new_data = *old_data; +        static glfs_t *fs = NULL; +        int recognized = 1; +        event_header_t *eh; + +        /* +         * We don't really call anything else in GFAPI, but this is the most +         * convenient way to satisfy all of the spurious dependencies on how it +         * or glusterfsd initialize (e.g. setting up THIS). +         */ +        if (!fs) { +            fs = glfs_new("dummy"); +        } + +        eh = (event_header_t *)new_meta; +        new_meta += sizeof(*eh); + +        /* TBD: check event_type instead of assuming NEW_REQUEST */ + +        switch (eh->fop_type) { +            @SWITCH_BODY@ + +            default : +                printf("unknown fop %u\n", eh->fop_type); +                recognized = 0; +        } + +        *old_meta = new_meta; +        *old_data = new_data; +        return recognized; +    } diff --git a/xlators/experimental/fdl/src/fdl-tmpl.c b/xlators/experimental/fdl/src/fdl-tmpl.c deleted file mode 100644 index 145dad7964a..00000000000 --- a/xlators/experimental/fdl/src/fdl-tmpl.c +++ /dev/null @@ -1,536 +0,0 @@ -/* -  Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> -  This file is part of GlusterFS. - -  This file is licensed to you under your choice of the GNU Lesser -  General Public License, version 3 or any later version (LGPLv3 or -  later), or the GNU General Public License, version 2 (GPLv2), in all -  cases as published by the Free Software Foundation. -*/ - -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - -#include <fcntl.h> -#include <unistd.h> -#include <sys/mman.h> -#include "call-stub.h" -#include "iatt.h" -#include "defaults.h" -#include "syscall.h" -#include "xlator.h" -#include "fdl.h" - -/* TBD: make tunable */ -#define META_FILE_SIZE  (1 << 20) -#define DATA_FILE_SIZE  (1 << 24) - -enum gf_fdl { -        gf_fdl_mt_fdl_private_t = gf_common_mt_end + 1, -        gf_fdl_mt_end -}; - -typedef struct { -        char            *type; -        off_t           size; -        char            *path; -        int             fd; -        void *          ptr; -        off_t           max_offset; -} log_obj_t; - -typedef struct { -        struct list_head        reqs; -        pthread_mutex_t         req_lock; -        pthread_cond_t          req_cond; -        char                    *log_dir; -        pthread_t               worker; -        gf_boolean_t            should_stop; -        gf_boolean_t            change_term; -        log_obj_t               meta_log; -        log_obj_t               data_log; -        int                     term; -        int                     first_term; -} fdl_private_t; - -int32_t -fdl_ipc (call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata); - -void -fdl_enqueue (xlator_t *this, call_stub_t *stub) -{ -        fdl_private_t   *priv   = this->private; - -        pthread_mutex_lock (&priv->req_lock); -        list_add_tail (&stub->list, &priv->reqs); -        pthread_mutex_unlock (&priv->req_lock); - -        pthread_cond_signal (&priv->req_cond); -} - -#pragma generate - -char * -fdl_open_term_log (xlator_t *this, log_obj_t *obj, int term) -{ -        fdl_private_t   *priv   = this->private; -        int             ret; -        char *          ptr     = NULL; - -        /* -         * Use .jnl instead of .log so that we don't get test info (mistakenly) -         * appended to our journal files. -         */ -        if (this->ctx->cmd_args.log_ident) { -                ret = gf_asprintf (&obj->path, "%s/%s-%s-%d.jnl", -                                   priv->log_dir, this->ctx->cmd_args.log_ident, -                                   obj->type, term); -        } -        else { -                ret = gf_asprintf (&obj->path, "%s/fubar-%s-%d.jnl", -                                   priv->log_dir, obj->type, term); -        } -        if ((ret <= 0) || !obj->path) { -                gf_log (this->name, GF_LOG_ERROR, -                        "failed to construct log-file path"); -                goto err; -        } - -        gf_log (this->name, GF_LOG_INFO, "opening %s (size %ld)", -                obj->path, obj->size); - -        obj->fd = open (obj->path, O_RDWR|O_CREAT|O_TRUNC, 0666); -        if (obj->fd < 0) { -                gf_log (this->name, GF_LOG_ERROR, -                        "failed to open log file (%s)", strerror(errno)); -                goto err; -        } - -#if !defined(GF_BSD_HOST_OS) -        /* -         * NetBSD can just go die in a fire.  Even though it claims to support -         * fallocate/posix_fallocate they don't actually *do* anything so the -         * file size remains zero.  Then mmap succeeds anyway, but any access -         * to the mmap'ed region will segfault.  It would be acceptable for -         * fallocate to do what it says, for mmap to fail, or for access to -         * extend the file.  NetBSD managed to hit the trifecta of Getting -         * Everything Wrong, and debugging in that environment to get this far -         * has already been painful enough (systems I worked on in 1990 were -         * better that way).  We'll fall through to the lseek/write method, and -         * performance will be worse, and TOO BAD. -         */ -        if (sys_fallocate(obj->fd,0,0,obj->size) < 0) -#endif -        { -                gf_log (this->name, GF_LOG_WARNING, -                        "failed to fallocate space for log file"); -                /* Have to do this the ugly page-faulty way. */ -                (void) sys_lseek (obj->fd, obj->size-1, SEEK_SET); -                (void) sys_write (obj->fd, "", 1); -        } - -        ptr = mmap (NULL, obj->size, PROT_WRITE, MAP_SHARED, obj->fd, 0); -        if (ptr == MAP_FAILED) { -                gf_log (this->name, GF_LOG_ERROR, "failed to mmap log (%s)", -                        strerror(errno)); -                goto err; -        } - -        obj->ptr = ptr; -        obj->max_offset = 0; -        return ptr; - -err: -        if (obj->fd >= 0) { -                sys_close (obj->fd); -                obj->fd = (-1); -        } -        if (obj->path) { -                GF_FREE (obj->path); -                obj->path = NULL; -        } -        return ptr; -} - -void -fdl_close_term_log (xlator_t *this, log_obj_t *obj) -{ -        fdl_private_t   *priv           = this->private; - -        if (obj->ptr) { -                (void) munmap (obj->ptr, obj->size); -                obj->ptr = NULL; -        } - -        if (obj->fd >= 0) { -                gf_log (this->name, GF_LOG_INFO, -                        "truncating term %d %s journal to %ld", -                        priv->term, obj->type, obj->max_offset); -                if (sys_ftruncate(obj->fd,obj->max_offset) < 0) { -                        gf_log (this->name, GF_LOG_WARNING, -                                "failed to truncate journal (%s)", -                                strerror(errno)); -                } -                sys_close (obj->fd); -                obj->fd = (-1); -        } - -        if (obj->path) { -                GF_FREE (obj->path); -                obj->path = NULL; -        } -} - -gf_boolean_t -fdl_change_term (xlator_t *this, char **meta_ptr, char **data_ptr) -{ -        fdl_private_t   *priv           = this->private; - -        fdl_close_term_log (this, &priv->meta_log); -        fdl_close_term_log (this, &priv->data_log); - -        ++(priv->term); - -        *meta_ptr = fdl_open_term_log (this, &priv->meta_log, priv->term); -        if (!*meta_ptr) { -                return _gf_false; -        } - -        *data_ptr = fdl_open_term_log (this, &priv->data_log, priv->term); -        if (!*data_ptr) { -                return _gf_false; -        } - -        return _gf_true; -} - -void * -fdl_worker (void *arg) -{ -        xlator_t        *this           = arg; -        fdl_private_t   *priv           = this->private; -        call_stub_t     *stub; -        char *          meta_ptr        = NULL; -        off_t           *meta_offset    = &priv->meta_log.max_offset; -        char *          data_ptr        = NULL; -        off_t           *data_offset    = &priv->data_log.max_offset; -        unsigned long   base_as_ul; -        void *          msync_ptr; -        size_t          msync_len; -        gf_boolean_t    recycle; -        void            *err_label      = &&err_unlocked; - -        priv->meta_log.type = "meta"; -        priv->meta_log.size = META_FILE_SIZE; -        priv->meta_log.path = NULL; -        priv->meta_log.fd = (-1); -        priv->meta_log.ptr = NULL; - -        priv->data_log.type = "data"; -        priv->data_log.size = DATA_FILE_SIZE; -        priv->data_log.path = NULL; -        priv->data_log.fd = (-1); -        priv->data_log.ptr = NULL; - -        /* TBD: initial term should come from persistent storage (e.g. etcd) */ -        priv->first_term = ++(priv->term); -        meta_ptr = fdl_open_term_log (this, &priv->meta_log, priv->term); -        if (!meta_ptr) { -                goto *err_label; -        } -        data_ptr = fdl_open_term_log (this, &priv->data_log, priv->term); -        if (!data_ptr) { -                fdl_close_term_log (this, &priv->meta_log); -                goto *err_label; -        } - -        for (;;) { -                pthread_mutex_lock (&priv->req_lock); -                err_label = &&err_locked; -                while (list_empty(&priv->reqs)) { -                        pthread_cond_wait (&priv->req_cond, &priv->req_lock); -                        if (priv->should_stop) { -                                goto *err_label; -                        } -                        if (priv->change_term) { -                                if (!fdl_change_term(this, &meta_ptr, -                                                           &data_ptr)) { -                                        goto *err_label; -                                } -                                priv->change_term = _gf_false; -                                continue; -                        } -                } -                stub = list_entry (priv->reqs.next, call_stub_t, list); -                list_del_init (&stub->list); -                pthread_mutex_unlock (&priv->req_lock); -                err_label = &&err_unlocked; -                /* -                 * TBD: batch requests -                 * -                 * What we should do here is gather up *all* of the requests -                 * that have accumulated since we were last at this point, -                 * blast them all out in one big writev, and then dispatch them -                 * all before coming back for more.  That maximizes throughput, -                 * at some cost to latency (due to queuing effects at the log -                 * stage).  Note that we're likely to be above io-threads, so -                 * the dispatch itself will be parallelized (at further cost to -                 * latency).  For now, we just do the simplest thing and handle -                 * one request all the way through before fetching the next. -                 * -                 * So, why mmap/msync instead of writev/fdatasync?  Because it's -                 * faster.  Much faster.  So much faster that I half-suspect -                 * cheating, but it's more convenient for now than having to -                 * ensure that everything's page-aligned for O_DIRECT (the only -                 * alternative that still might avoid ridiculous levels of -                 * local-FS overhead). -                 * -                 * TBD: check that msync really does get our data to disk. -                 */ -                gf_log (this->name, GF_LOG_DEBUG, -                        "logging %u+%u bytes for op %d", -                        stub->jnl_meta_len, stub->jnl_data_len, stub->fop); -                recycle = _gf_false; -                if ((*meta_offset + stub->jnl_meta_len) > priv->meta_log.size) { -                        recycle = _gf_true; -                } -                if ((*data_offset + stub->jnl_data_len) > priv->data_log.size) { -                        recycle = _gf_true; -                } -                if (recycle && !fdl_change_term(this,&meta_ptr,&data_ptr)) { -                        goto *err_label; -                } -                meta_ptr = priv->meta_log.ptr; -                data_ptr = priv->data_log.ptr; -                gf_log (this->name, GF_LOG_DEBUG, "serializing to %p/%p", -                        meta_ptr + *meta_offset, data_ptr + *data_offset); -                stub->serialize (stub, meta_ptr + *meta_offset, -                                       data_ptr + *data_offset); -                if (stub->jnl_meta_len > 0) { -                        base_as_ul = (unsigned long) (meta_ptr + *meta_offset); -                        msync_ptr = (void *) (base_as_ul & ~0x0fff); -                        msync_len = (size_t) (base_as_ul &  0x0fff); -                        if (msync (msync_ptr, msync_len+stub->jnl_meta_len, -                                              MS_SYNC) < 0) { -                                gf_log (this->name, GF_LOG_WARNING, -                                        "failed to log request meta (%s)", -                                        strerror(errno)); -                        } -                        *meta_offset += stub->jnl_meta_len; -                } -                if (stub->jnl_data_len > 0) { -                        base_as_ul = (unsigned long) (data_ptr + *data_offset); -                        msync_ptr = (void *) (base_as_ul & ~0x0fff); -                        msync_len = (size_t) (base_as_ul &  0x0fff); -                        if (msync (msync_ptr, msync_len+stub->jnl_data_len, -                                              MS_SYNC) < 0) { -                                gf_log (this->name, GF_LOG_WARNING, -                                        "failed to log request data (%s)", -                                        strerror(errno)); -                        } -                        *data_offset += stub->jnl_data_len; -                } -                call_resume (stub); -        } - -err_locked: -        pthread_mutex_unlock (&priv->req_lock); -err_unlocked: -        fdl_close_term_log (this, &priv->meta_log); -        fdl_close_term_log (this, &priv->data_log); -        return NULL; -} - -int32_t -fdl_ipc_continue (call_frame_t *frame, xlator_t *this, -                  int32_t op, dict_t *xdata) -{ -        /* -         * Nothing to be done here. Just Unwind. * -         */ -        STACK_UNWIND_STRICT (ipc, frame, 0, 0, xdata); - -        return 0; -} - -int32_t -fdl_ipc (call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata) -{ -        call_stub_t     *stub; -        fdl_private_t   *priv   = this->private; -        dict_t          *tdict; -        int32_t         gt_err  = EIO; - -        switch (op) { - -        case FDL_IPC_CHANGE_TERM: -                gf_log (this->name, GF_LOG_INFO, "got CHANGE_TERM op"); -                priv->change_term = _gf_true; -                pthread_cond_signal (&priv->req_cond); -                STACK_UNWIND_STRICT (ipc, frame, 0, 0, NULL); -                break; - -        case FDL_IPC_GET_TERMS: -                gf_log (this->name, GF_LOG_INFO, "got GET_TERMS op"); -                tdict = dict_new (); -                if (!tdict) { -                        gt_err = ENOMEM; -                        goto gt_done; -                } -                if (dict_set_int32(tdict,"first",priv->first_term) != 0) { -                        goto gt_done; -                } -                if (dict_set_int32(tdict,"last",priv->term) != 0) { -                        goto gt_done; -                } -                gt_err = 0; -        gt_done: -                if (gt_err) { -                        STACK_UNWIND_STRICT (ipc, frame, -1, gt_err, NULL); -                } else { -                        STACK_UNWIND_STRICT (ipc, frame, 0, 0, tdict); -                } -                if (tdict) { -                        dict_unref (tdict); -                } -                break; - -        case FDL_IPC_JBR_SERVER_ROLLBACK: -                /* -                 * In case of a rollback from jbr-server, dump  * -                 * the term and index number in the journal,    * -                 * which will later be used to rollback the fop * -                 */ -                stub = fop_ipc_stub (frame, fdl_ipc_continue, -                                     op, xdata); -                fdl_len_ipc (stub); -                stub->serialize = fdl_serialize_ipc; -                fdl_enqueue (this, stub); - -                break; - -        default: -                STACK_WIND_TAIL (frame, -                                 FIRST_CHILD(this), -                                 FIRST_CHILD(this)->fops->ipc, -                                 op, xdata); -        } - -        return 0; -} - -int -fdl_init (xlator_t *this) -{ -        fdl_private_t   *priv   = NULL; - -        priv = GF_CALLOC (1, sizeof (*priv), gf_fdl_mt_fdl_private_t); -        if (!priv) { -                gf_log (this->name, GF_LOG_ERROR, -                        "failed to allocate fdl_private"); -                goto err; -        } - -        INIT_LIST_HEAD (&priv->reqs); -        if (pthread_mutex_init (&priv->req_lock, NULL) != 0) { -                gf_log (this->name, GF_LOG_ERROR, -                        "failed to initialize req_lock"); -                goto err; -        } -        if (pthread_cond_init (&priv->req_cond, NULL) != 0) { -                gf_log (this->name, GF_LOG_ERROR, -                        "failed to initialize req_cond"); -                goto err; -        } - -        GF_OPTION_INIT ("log-path", priv->log_dir, path, err); - -        this->private = priv; -        /* -         * The rest of the fop table is automatically generated, so this is a -         * bit cleaner than messing with the generation to add a hand-written -         * exception. -         */ - -        if (gf_thread_create (&priv->worker, NULL, fdl_worker, this, -                              "fdlwrker") != 0) { -                gf_log (this->name, GF_LOG_ERROR, -                        "failed to start fdl_worker"); -                goto err; -        } - -        return 0; - -err: -        if (priv) { -                GF_FREE(priv); -        } -        return -1; -} - -void -fdl_fini (xlator_t *this) -{ -        fdl_private_t   *priv   = this->private; - -        if (priv) { -                priv->should_stop = _gf_true; -                pthread_cond_signal (&priv->req_cond); -                pthread_join (priv->worker, NULL); -                GF_FREE(priv); -        } -} - -int -fdl_reconfigure (xlator_t *this, dict_t *options) -{ -        fdl_private_t   *priv   = this->private; - -	GF_OPTION_RECONF ("log_dir", priv->log_dir, options, path, out); -        /* TBD: react if it changed */ - -out: -        return 0; -} - -int32_t -mem_acct_init (xlator_t *this) -{ -        int     ret = -1; - -        GF_VALIDATE_OR_GOTO ("fdl", this, out); - -        ret = xlator_mem_acct_init (this, gf_fdl_mt_end + 1); - -        if (ret != 0) { -                gf_log (this->name, GF_LOG_ERROR, "Memory accounting init" -                        "failed"); -                return ret; -        } -out: -        return ret; -} - -class_methods_t class_methods = { -        .init           = fdl_init, -        .fini           = fdl_fini, -        .reconfigure    = fdl_reconfigure, -        .notify         = default_notify, -}; - -struct volume_options options[] = { -        { .key = {"log-path"}, -          .type = GF_OPTION_TYPE_PATH, -          .default_value = DEFAULT_LOG_FILE_DIRECTORY, -          .description = "Directory for FDL files." -        }, -        { .key  = {NULL} }, -}; - -struct xlator_cbks cbks = { -        .release        = default_release, -        .releasedir     = default_releasedir, -        .forget         = default_forget, -}; diff --git a/xlators/experimental/fdl/src/fdl-tmpl.c.in b/xlators/experimental/fdl/src/fdl-tmpl.c.in new file mode 100644 index 00000000000..7388b83e0bc --- /dev/null +++ b/xlators/experimental/fdl/src/fdl-tmpl.c.in @@ -0,0 +1,512 @@ +/* +  Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> +  This file is part of GlusterFS. + +  This file is licensed to you under your choice of the GNU Lesser +  General Public License, version 3 or any later version (LGPLv3 or +  later), or the GNU General Public License, version 2 (GPLv2), in all +  cases as published by the Free Software Foundation. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include <fcntl.h> +#include <unistd.h> +#include <sys/mman.h> +#include "call-stub.h" +#include "iatt.h" +#include "defaults.h" +#include "syscall.h" +#include "xlator.h" +#include "fdl.h" + +/* TBD: make tunable */ +#define META_FILE_SIZE (1 << 20) +#define DATA_FILE_SIZE (1 << 24) + +enum gf_fdl { gf_fdl_mt_fdl_private_t = gf_common_mt_end + 1, gf_fdl_mt_end }; + +typedef struct { +    char *type; +    off_t size; +    char *path; +    int fd; +    void *ptr; +    off_t max_offset; +} log_obj_t; + +typedef struct { +    struct list_head reqs; +    pthread_mutex_t req_lock; +    pthread_cond_t req_cond; +    char *log_dir; +    pthread_t worker; +    gf_boolean_t should_stop; +    gf_boolean_t change_term; +    log_obj_t meta_log; +    log_obj_t data_log; +    int term; +    int first_term; +} fdl_private_t; + +int32_t +fdl_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata); + +void +fdl_enqueue(xlator_t *this, call_stub_t *stub) +{ +    fdl_private_t *priv = this->private; + +    pthread_mutex_lock(&priv->req_lock); +    list_add_tail(&stub->list, &priv->reqs); +    pthread_mutex_unlock(&priv->req_lock); + +    pthread_cond_signal(&priv->req_cond); +} + +#pragma generate + +char * +fdl_open_term_log(xlator_t *this, log_obj_t *obj, int term) +{ +    fdl_private_t *priv = this->private; +    int ret; +    char *ptr = NULL; + +    /* +     * Use .jnl instead of .log so that we don't get test info (mistakenly) +     * appended to our journal files. +     */ +    if (this->ctx->cmd_args.log_ident) { +        ret = gf_asprintf(&obj->path, "%s/%s-%s-%d.jnl", priv->log_dir, +                          this->ctx->cmd_args.log_ident, obj->type, term); +    } else { +        ret = gf_asprintf(&obj->path, "%s/fubar-%s-%d.jnl", priv->log_dir, +                          obj->type, term); +    } +    if ((ret <= 0) || !obj->path) { +        gf_log(this->name, GF_LOG_ERROR, "failed to construct log-file path"); +        goto err; +    } + +    gf_log(this->name, GF_LOG_INFO, "opening %s (size %ld)", obj->path, +           obj->size); + +    obj->fd = open(obj->path, O_RDWR | O_CREAT | O_TRUNC, 0666); +    if (obj->fd < 0) { +        gf_log(this->name, GF_LOG_ERROR, "failed to open log file (%s)", +               strerror(errno)); +        goto err; +    } + +#if !defined(GF_BSD_HOST_OS) +    /* +     * NetBSD can just go die in a fire.  Even though it claims to support +     * fallocate/posix_fallocate they don't actually *do* anything so the +     * file size remains zero.  Then mmap succeeds anyway, but any access +     * to the mmap'ed region will segfault.  It would be acceptable for +     * fallocate to do what it says, for mmap to fail, or for access to +     * extend the file.  NetBSD managed to hit the trifecta of Getting +     * Everything Wrong, and debugging in that environment to get this far +     * has already been painful enough (systems I worked on in 1990 were +     * better that way).  We'll fall through to the lseek/write method, and +     * performance will be worse, and TOO BAD. +     */ +    if (sys_fallocate(obj->fd, 0, 0, obj->size) < 0) +#endif +    { +        gf_log(this->name, GF_LOG_WARNING, +               "failed to fallocate space for log file"); +        /* Have to do this the ugly page-faulty way. */ +        (void)sys_lseek(obj->fd, obj->size - 1, SEEK_SET); +        (void)sys_write(obj->fd, "", 1); +    } + +    ptr = mmap(NULL, obj->size, PROT_WRITE, MAP_SHARED, obj->fd, 0); +    if (ptr == MAP_FAILED) { +        gf_log(this->name, GF_LOG_ERROR, "failed to mmap log (%s)", +               strerror(errno)); +        goto err; +    } + +    obj->ptr = ptr; +    obj->max_offset = 0; +    return ptr; + +err: +    if (obj->fd >= 0) { +        sys_close(obj->fd); +        obj->fd = (-1); +    } +    if (obj->path) { +        GF_FREE(obj->path); +        obj->path = NULL; +    } +    return ptr; +} + +void +fdl_close_term_log(xlator_t *this, log_obj_t *obj) +{ +    fdl_private_t *priv = this->private; + +    if (obj->ptr) { +        (void)munmap(obj->ptr, obj->size); +        obj->ptr = NULL; +    } + +    if (obj->fd >= 0) { +        gf_log(this->name, GF_LOG_INFO, "truncating term %d %s journal to %ld", +               priv->term, obj->type, obj->max_offset); +        if (sys_ftruncate(obj->fd, obj->max_offset) < 0) { +            gf_log(this->name, GF_LOG_WARNING, +                   "failed to truncate journal (%s)", strerror(errno)); +        } +        sys_close(obj->fd); +        obj->fd = (-1); +    } + +    if (obj->path) { +        GF_FREE(obj->path); +        obj->path = NULL; +    } +} + +gf_boolean_t +fdl_change_term(xlator_t *this, char **meta_ptr, char **data_ptr) +{ +    fdl_private_t *priv = this->private; + +    fdl_close_term_log(this, &priv->meta_log); +    fdl_close_term_log(this, &priv->data_log); + +    ++(priv->term); + +    *meta_ptr = fdl_open_term_log(this, &priv->meta_log, priv->term); +    if (!*meta_ptr) { +        return _gf_false; +    } + +    *data_ptr = fdl_open_term_log(this, &priv->data_log, priv->term); +    if (!*data_ptr) { +        return _gf_false; +    } + +    return _gf_true; +} + +void * +fdl_worker(void *arg) +{ +    xlator_t *this = arg; +    fdl_private_t *priv = this->private; +    call_stub_t *stub; +    char *meta_ptr = NULL; +    off_t *meta_offset = &priv->meta_log.max_offset; +    char *data_ptr = NULL; +    off_t *data_offset = &priv->data_log.max_offset; +    unsigned long base_as_ul; +    void *msync_ptr; +    size_t msync_len; +    gf_boolean_t recycle; +    void *err_label = &&err_unlocked; + +    priv->meta_log.type = "meta"; +    priv->meta_log.size = META_FILE_SIZE; +    priv->meta_log.path = NULL; +    priv->meta_log.fd = (-1); +    priv->meta_log.ptr = NULL; + +    priv->data_log.type = "data"; +    priv->data_log.size = DATA_FILE_SIZE; +    priv->data_log.path = NULL; +    priv->data_log.fd = (-1); +    priv->data_log.ptr = NULL; + +    /* TBD: initial term should come from persistent storage (e.g. etcd) */ +    priv->first_term = ++(priv->term); +    meta_ptr = fdl_open_term_log(this, &priv->meta_log, priv->term); +    if (!meta_ptr) { +        goto *err_label; +    } +    data_ptr = fdl_open_term_log(this, &priv->data_log, priv->term); +    if (!data_ptr) { +        fdl_close_term_log(this, &priv->meta_log); +        goto *err_label; +    } + +    for (;;) { +        pthread_mutex_lock(&priv->req_lock); +        err_label = &&err_locked; +        while (list_empty(&priv->reqs)) { +            pthread_cond_wait(&priv->req_cond, &priv->req_lock); +            if (priv->should_stop) { +                goto *err_label; +            } +            if (priv->change_term) { +                if (!fdl_change_term(this, &meta_ptr, &data_ptr)) { +                    goto *err_label; +                } +                priv->change_term = _gf_false; +                continue; +            } +        } +        stub = list_entry(priv->reqs.next, call_stub_t, list); +        list_del_init(&stub->list); +        pthread_mutex_unlock(&priv->req_lock); +        err_label = &&err_unlocked; +        /* +         * TBD: batch requests +         * +         * What we should do here is gather up *all* of the requests +         * that have accumulated since we were last at this point, +         * blast them all out in one big writev, and then dispatch them +         * all before coming back for more.  That maximizes throughput, +         * at some cost to latency (due to queuing effects at the log +         * stage).  Note that we're likely to be above io-threads, so +         * the dispatch itself will be parallelized (at further cost to +         * latency).  For now, we just do the simplest thing and handle +         * one request all the way through before fetching the next. +         * +         * So, why mmap/msync instead of writev/fdatasync?  Because it's +         * faster.  Much faster.  So much faster that I half-suspect +         * cheating, but it's more convenient for now than having to +         * ensure that everything's page-aligned for O_DIRECT (the only +         * alternative that still might avoid ridiculous levels of +         * local-FS overhead). +         * +         * TBD: check that msync really does get our data to disk. +         */ +        gf_log(this->name, GF_LOG_DEBUG, "logging %u+%u bytes for op %d", +               stub->jnl_meta_len, stub->jnl_data_len, stub->fop); +        recycle = _gf_false; +        if ((*meta_offset + stub->jnl_meta_len) > priv->meta_log.size) { +            recycle = _gf_true; +        } +        if ((*data_offset + stub->jnl_data_len) > priv->data_log.size) { +            recycle = _gf_true; +        } +        if (recycle && !fdl_change_term(this, &meta_ptr, &data_ptr)) { +            goto *err_label; +        } +        meta_ptr = priv->meta_log.ptr; +        data_ptr = priv->data_log.ptr; +        gf_log(this->name, GF_LOG_DEBUG, "serializing to %p/%p", +               meta_ptr + *meta_offset, data_ptr + *data_offset); +        stub->serialize(stub, meta_ptr + *meta_offset, data_ptr + *data_offset); +        if (stub->jnl_meta_len > 0) { +            base_as_ul = (unsigned long)(meta_ptr + *meta_offset); +            msync_ptr = (void *)(base_as_ul & ~0x0fff); +            msync_len = (size_t)(base_as_ul & 0x0fff); +            if (msync(msync_ptr, msync_len + stub->jnl_meta_len, MS_SYNC) < 0) { +                gf_log(this->name, GF_LOG_WARNING, +                       "failed to log request meta (%s)", strerror(errno)); +            } +            *meta_offset += stub->jnl_meta_len; +        } +        if (stub->jnl_data_len > 0) { +            base_as_ul = (unsigned long)(data_ptr + *data_offset); +            msync_ptr = (void *)(base_as_ul & ~0x0fff); +            msync_len = (size_t)(base_as_ul & 0x0fff); +            if (msync(msync_ptr, msync_len + stub->jnl_data_len, MS_SYNC) < 0) { +                gf_log(this->name, GF_LOG_WARNING, +                       "failed to log request data (%s)", strerror(errno)); +            } +            *data_offset += stub->jnl_data_len; +        } +        call_resume(stub); +    } + +err_locked: +    pthread_mutex_unlock(&priv->req_lock); +err_unlocked: +    fdl_close_term_log(this, &priv->meta_log); +    fdl_close_term_log(this, &priv->data_log); +    return NULL; +} + +int32_t +fdl_ipc_continue(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata) +{ +    /* +     * Nothing to be done here. Just Unwind. * +     */ +    STACK_UNWIND_STRICT(ipc, frame, 0, 0, xdata); + +    return 0; +} + +int32_t +fdl_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata) +{ +    call_stub_t *stub; +    fdl_private_t *priv = this->private; +    dict_t *tdict; +    int32_t gt_err = EIO; + +    switch (op) { +        case FDL_IPC_CHANGE_TERM: +            gf_log(this->name, GF_LOG_INFO, "got CHANGE_TERM op"); +            priv->change_term = _gf_true; +            pthread_cond_signal(&priv->req_cond); +            STACK_UNWIND_STRICT(ipc, frame, 0, 0, NULL); +            break; + +        case FDL_IPC_GET_TERMS: +            gf_log(this->name, GF_LOG_INFO, "got GET_TERMS op"); +            tdict = dict_new(); +            if (!tdict) { +                gt_err = ENOMEM; +                goto gt_done; +            } +            if (dict_set_int32(tdict, "first", priv->first_term) != 0) { +                goto gt_done; +            } +            if (dict_set_int32(tdict, "last", priv->term) != 0) { +                goto gt_done; +            } +            gt_err = 0; +        gt_done: +            if (gt_err) { +                STACK_UNWIND_STRICT(ipc, frame, -1, gt_err, NULL); +            } else { +                STACK_UNWIND_STRICT(ipc, frame, 0, 0, tdict); +            } +            if (tdict) { +                dict_unref(tdict); +            } +            break; + +        case FDL_IPC_JBR_SERVER_ROLLBACK: +            /* +             * In case of a rollback from jbr-server, dump  * +             * the term and index number in the journal,    * +             * which will later be used to rollback the fop * +             */ +            stub = fop_ipc_stub(frame, fdl_ipc_continue, op, xdata); +            fdl_len_ipc(stub); +            stub->serialize = fdl_serialize_ipc; +            fdl_enqueue(this, stub); + +            break; + +        default: +            STACK_WIND_TAIL(frame, FIRST_CHILD(this), +                            FIRST_CHILD(this)->fops->ipc, op, xdata); +    } + +    return 0; +} + +int +fdl_init(xlator_t *this) +{ +    fdl_private_t *priv = NULL; + +    priv = GF_CALLOC(1, sizeof(*priv), gf_fdl_mt_fdl_private_t); +    if (!priv) { +        gf_log(this->name, GF_LOG_ERROR, "failed to allocate fdl_private"); +        goto err; +    } + +    INIT_LIST_HEAD(&priv->reqs); +    if (pthread_mutex_init(&priv->req_lock, NULL) != 0) { +        gf_log(this->name, GF_LOG_ERROR, "failed to initialize req_lock"); +        goto err; +    } +    if (pthread_cond_init(&priv->req_cond, NULL) != 0) { +        gf_log(this->name, GF_LOG_ERROR, "failed to initialize req_cond"); +        goto err; +    } + +    GF_OPTION_INIT("log-path", priv->log_dir, path, err); + +    this->private = priv; +    /* +     * The rest of the fop table is automatically generated, so this is a +     * bit cleaner than messing with the generation to add a hand-written +     * exception. +     */ + +    if (gf_thread_create(&priv->worker, NULL, fdl_worker, this, "fdlwrker") != +        0) { +        gf_log(this->name, GF_LOG_ERROR, "failed to start fdl_worker"); +        goto err; +    } + +    return 0; + +err: +    if (priv) { +        GF_FREE(priv); +    } +    return -1; +} + +void +fdl_fini(xlator_t *this) +{ +    fdl_private_t *priv = this->private; + +    if (priv) { +        priv->should_stop = _gf_true; +        pthread_cond_signal(&priv->req_cond); +        pthread_join(priv->worker, NULL); +        GF_FREE(priv); +    } +} + +int +fdl_reconfigure(xlator_t *this, dict_t *options) +{ +    fdl_private_t *priv = this->private; + +    GF_OPTION_RECONF("log_dir", priv->log_dir, options, path, out); +    /* TBD: react if it changed */ + +out: +    return 0; +} + +int32_t +mem_acct_init(xlator_t *this) +{ +    int ret = -1; + +    GF_VALIDATE_OR_GOTO("fdl", this, out); + +    ret = xlator_mem_acct_init(this, gf_fdl_mt_end + 1); + +    if (ret != 0) { +        gf_log(this->name, GF_LOG_ERROR, +               "Memory accounting init" +               "failed"); +        return ret; +    } +out: +    return ret; +} + +class_methods_t class_methods = { +    .init = fdl_init, +    .fini = fdl_fini, +    .reconfigure = fdl_reconfigure, +    .notify = default_notify, +}; + +struct volume_options options[] = { +    {.key = {"log-path"}, +     .type = GF_OPTION_TYPE_PATH, +     .default_value = DEFAULT_LOG_FILE_DIRECTORY, +     .description = "Directory for FDL files."}, +    {.key = {NULL}}, +}; + +struct xlator_cbks cbks = { +    .release = default_release, +    .releasedir = default_releasedir, +    .forget = default_forget, +}; diff --git a/xlators/experimental/fdl/src/recon-tmpl.c b/xlators/experimental/fdl/src/recon-tmpl.c deleted file mode 100644 index 228860401ae..00000000000 --- a/xlators/experimental/fdl/src/recon-tmpl.c +++ /dev/null @@ -1,304 +0,0 @@ -#pragma fragment PROLOG -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - -#include "glusterfs.h" -#include "iatt.h" -#include "syncop.h" -#include "xlator.h" -#include "glfs-internal.h" - -#include "fdl.h" - -#define GFAPI_SUCCESS 0 - -inode_t * -recon_get_inode (glfs_t *fs, uuid_t gfid) -{ -        inode_t         *inode; -        loc_t           loc     = {NULL,}; -        struct iatt     iatt; -        int             ret; -        inode_t         *newinode; - -        inode = inode_find (fs->active_subvol->itable, gfid); -        if (inode) { -                printf ("=== FOUND %s IN TABLE\n", uuid_utoa(gfid)); -                return inode; -        } - -        loc.inode = inode_new (fs->active_subvol->itable); -        if (!loc.inode) { -                return NULL; -        } -        gf_uuid_copy (loc.inode->gfid, gfid); -        gf_uuid_copy (loc.gfid, gfid); - -        printf ("=== DOING LOOKUP FOR %s\n", uuid_utoa(gfid)); - -        ret = syncop_lookup (fs->active_subvol, &loc, &iatt, -                             NULL, NULL, NULL); -        if (ret != GFAPI_SUCCESS) { -                fprintf (stderr, "syncop_lookup failed (%d)\n", ret); -                return NULL; -        } - -        newinode = inode_link (loc.inode, NULL, NULL, &iatt); -        if (newinode) { -                inode_lookup (newinode); -        } - -        return newinode; -} - -#pragma fragment DICT -        dict_t  *@ARGNAME@; - -        @ARGNAME@ = dict_new(); -        if (!@ARGNAME@) { -                goto *err_label; -        } -        err_label = &&cleanup_@ARGNAME@; - -        { -                int     key_len, data_len; -                char    *key_ptr; -                int     garbage; -                for (;;) { -                        key_len = *((int *)new_meta); -                        new_meta += sizeof(int); -                        if (!key_len) { -                                break; -                        } -                        key_ptr = new_meta; -                        new_meta += key_len; -                        data_len = *((int *)new_meta); -                        new_meta += sizeof(int); -                        garbage = dict_set_static_bin (@ARGNAME@, key_ptr, -                                                       new_meta, data_len); -                        /* TBD: check error from dict_set_static_bin */ -                        (void)garbage; -                        new_meta += data_len; -                } -        } - -#pragma fragment DICT_CLEANUP -cleanup_@ARGNAME@: -        dict_unref (@ARGNAME@); - -#pragma fragment DOUBLE -        @ARGTYPE@       @ARGNAME@       = *((@ARGTYPE@ *)new_meta); -        new_meta += sizeof(uint64_t); - -#pragma fragment FD -        inode_t *@ARGNAME@_ino; -        fd_t    *@ARGNAME@; - -        @ARGNAME@_ino = recon_get_inode (fs, *((uuid_t *)new_meta)); -        new_meta += 16; -        if (!@ARGNAME@_ino) { -                goto *err_label; -        } -        err_label = &&cleanup_@ARGNAME@_ino; - -        @ARGNAME@ = fd_anonymous (@ARGNAME@_ino); -        if (!@ARGNAME@) { -                goto *err_label; -        } -        err_label = &&cleanup_@ARGNAME@; - -#pragma fragment FD_CLEANUP -cleanup_@ARGNAME@: -        fd_unref (@ARGNAME@); -cleanup_@ARGNAME@_ino: -        inode_unref (@ARGNAME@_ino); - -#pragma fragment NEW_FD -        /* -         * This pseudo-type is only used for create, and in that case we know -         * we'll be using loc.inode, so it's not worth generalizing to take an -         * extra argument. -         */ -        fd_t    *@ARGNAME@      = fd_anonymous (loc.inode); - -        if (!fd) { -                goto *err_label; -        } -        err_label = &&cleanup_@ARGNAME@; -        new_meta += 16; - -#pragma fragment NEW_FD_CLEANUP -cleanup_@ARGNAME@: -        fd_unref (@ARGNAME@); - -#pragma fragment INTEGER -        @ARGTYPE@       @ARGNAME@       = *((@ARGTYPE@ *)new_meta); - -        new_meta += sizeof(@ARGTYPE@); - -#pragma fragment LOC -        loc_t           @ARGNAME@       = { NULL, }; - -        @ARGNAME@.inode = recon_get_inode (fs, *((uuid_t *)new_meta)); -        if (!@ARGNAME@.inode) { -                goto *err_label; -        } -        err_label = &&cleanup_@ARGNAME@; -        gf_uuid_copy (@ARGNAME@.gfid, @ARGNAME@.inode->gfid); -        new_meta += 16; -        new_meta += 16; /* skip over pargfid */ -        if (*(new_meta++)) { -                @ARGNAME@.name = new_meta; -                new_meta += strlen(new_meta) + 1; -        } - -#pragma fragment LOC_CLEANUP -cleanup_@ARGNAME@: -        loc_wipe (&@ARGNAME@); - -#pragma fragment PARENT_LOC -        loc_t           @ARGNAME@       = { NULL, }; - -        new_meta += 16; /* skip over gfid */ -        @ARGNAME@.parent = recon_get_inode (fs, *((uuid_t *)new_meta)); -        if (!@ARGNAME@.parent) { -                goto *err_label; -        } -        err_label = &&cleanup_@ARGNAME@; -        gf_uuid_copy (@ARGNAME@.pargfid, @ARGNAME@.parent->gfid); -        new_meta += 16; -        if (!*(new_meta++)) { -                goto *err_label; -        } -        @ARGNAME@.name = new_meta; -        new_meta += strlen(new_meta) + 1; - -        @ARGNAME@.inode = inode_new (fs->active_subvol->itable); -        if (!@ARGNAME@.inode) { -                goto *err_label; -        } - -#pragma fragment PARENT_LOC_CLEANUP -cleanup_@ARGNAME@: -        loc_wipe (&@ARGNAME@); - -#pragma fragment STRING -        char    *@ARGNAME@; -        if (*(new_meta++)) { -                @ARGNAME@ = new_meta; -                new_meta += (strlen(new_meta) + 1); -        } -        else { -                goto *err_label; -        } - -#pragma fragment VECTOR -        struct iovec    @ARGNAME@; - -        @ARGNAME@.iov_len = *((size_t *)new_meta); -        new_meta += sizeof(@ARGNAME@.iov_len); -        @ARGNAME@.iov_base = new_data; -        new_data += @ARGNAME@.iov_len; - -#pragma fragment IATT -        struct iatt     @ARGNAME@; -        { -                @ARGNAME@.ia_prot = *((ia_prot_t *)new_meta); -                new_meta += sizeof(ia_prot_t); -                uint32_t *myints = (uint32_t *)new_meta; -                @ARGNAME@.ia_uid = myints[0]; -                @ARGNAME@.ia_gid = myints[1]; -                @ARGNAME@.ia_atime = myints[2]; -                @ARGNAME@.ia_atime_nsec = myints[3]; -                @ARGNAME@.ia_mtime = myints[4]; -                @ARGNAME@.ia_mtime_nsec = myints[5]; -                new_meta += sizeof(*myints) * 6; -        } - -#pragma fragment IOBREF -        struct iobref   *@ARGNAME@; - -        @ARGNAME@ = iobref_new(); -        if (!@ARGNAME@) { -                goto *err_label; -        } -        err_label = &&cleanup_@ARGNAME@; - -#pragma fragment IOBREF_CLEANUP -cleanup_@ARGNAME@: -        iobref_unref (@ARGNAME@); - -#pragma fragment LINK -        /* TBD: check error */ -        inode_t *new_inode = inode_link (@INODE_ARG@, NULL, NULL, @IATT_ARG@); -        if (new_inode) { -                inode_lookup (new_inode); -        } - -#pragma fragment FOP -int -fdl_replay_@NAME@ (glfs_t *fs, char **old_meta, char **old_data) -{ -        char    *new_meta	= *old_meta; -        char	*new_data	= *old_data; -        int     ret; -        int     status          = 0xbad; -        void    *err_label      = &&done; - -@FUNCTION_BODY@ - -        ret = syncop_@NAME@ (fs->active_subvol, @SYNCOP_ARGS@, NULL); -        if (ret != @SUCCESS_VALUE@) { -                fprintf (stderr, "syncop_@NAME@ returned %d", ret); -                goto *err_label; -        } - -@LINKS@ - -        status = 0; - -@CLEANUPS@ - -done: -        *old_meta = new_meta; -        *old_data = new_data; -        return status; -} - -#pragma fragment CASE -        case GF_FOP_@UPNAME@: -                printf ("=== GF_FOP_@UPNAME@\n"); -                if (fdl_replay_@NAME@ (fs, &new_meta, &new_data) != 0) { -                        goto done; -                } -                recognized = 1; -                break; - -#pragma fragment EPILOG -int -recon_execute (glfs_t *fs, char **old_meta, char **old_data) -{ -        char            *new_meta       = *old_meta; -        char            *new_data       = *old_data; -        int             recognized      = 0; -        event_header_t  *eh; - -        eh = (event_header_t *)new_meta; -        new_meta += sizeof (*eh); - -        /* TBD: check event_type instead of assuming NEW_REQUEST */ - -        switch (eh->fop_type) { -@SWITCH_BODY@ - -        default: -                printf ("unknown fop %u\n", eh->fop_type); -        } - -done: -        *old_meta = new_meta; -        *old_data = new_data; -        return recognized; -} diff --git a/xlators/experimental/fdl/src/recon-tmpl.c.in b/xlators/experimental/fdl/src/recon-tmpl.c.in new file mode 100644 index 00000000000..5115dfd5c75 --- /dev/null +++ b/xlators/experimental/fdl/src/recon-tmpl.c.in @@ -0,0 +1,297 @@ +#pragma fragment PROLOG +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "iatt.h" +#include "syncop.h" +#include "xlator.h" +#include "glfs-internal.h" + +#include "fdl.h" + +#define GFAPI_SUCCESS 0 + +inode_t * +recon_get_inode(glfs_t *fs, uuid_t gfid) +{ +    inode_t *inode; +    loc_t loc = { +        NULL, +    }; +    struct iatt iatt; +    int ret; +    inode_t *newinode; + +    inode = inode_find(fs->active_subvol->itable, gfid); +    if (inode) { +        printf("=== FOUND %s IN TABLE\n", uuid_utoa(gfid)); +        return inode; +    } + +    loc.inode = inode_new(fs->active_subvol->itable); +    if (!loc.inode) { +        return NULL; +    } +    gf_uuid_copy(loc.inode->gfid, gfid); +    gf_uuid_copy(loc.gfid, gfid); + +    printf("=== DOING LOOKUP FOR %s\n", uuid_utoa(gfid)); + +    ret = syncop_lookup(fs->active_subvol, &loc, &iatt, NULL, NULL, NULL); +    if (ret != GFAPI_SUCCESS) { +        fprintf(stderr, "syncop_lookup failed (%d)\n", ret); +        return NULL; +    } + +    newinode = inode_link(loc.inode, NULL, NULL, &iatt); +    if (newinode) { +        inode_lookup(newinode); +    } + +    return newinode; +} + +#pragma fragment DICT +dict_t *@ARGNAME@; + +@ARGNAME@ = dict_new(); +if (!@ARGNAME@) { +    goto *err_label; +} +err_label = &&cleanup_@ARGNAME@; + +{ +    int key_len, data_len; +    char *key_ptr; +    int garbage; +    for (;;) { +        key_len = *((int *)new_meta); +        new_meta += sizeof(int); +        if (!key_len) { +            break; +        } +        key_ptr = new_meta; +        new_meta += key_len; +        data_len = *((int *)new_meta); +        new_meta += sizeof(int); +        garbage = dict_set_static_bin(@ARGNAME@, key_ptr, new_meta, data_len); +        /* TBD: check error from dict_set_static_bin */ +        (void)garbage; +        new_meta += data_len; +    } +} + +#pragma fragment DICT_CLEANUP +cleanup_@ARGNAME@ : dict_unref(@ARGNAME@); + +#pragma fragment DOUBLE +@ARGTYPE@ @ARGNAME@ = *((@ARGTYPE@ *)new_meta); +new_meta += sizeof(uint64_t); + +#pragma fragment FD +inode_t *@ARGNAME@_ino; +fd_t *@ARGNAME@; + +@ARGNAME@_ino = recon_get_inode(fs, *((uuid_t *)new_meta)); +new_meta += 16; +if (!@ARGNAME@_ino) { +    goto *err_label; +} +err_label = &&cleanup_@ARGNAME@_ino; + +@ARGNAME@ = fd_anonymous(@ARGNAME@_ino); +if (!@ARGNAME@) { +    goto *err_label; +} +err_label = &&cleanup_@ARGNAME@; + +#pragma fragment FD_CLEANUP +cleanup_@ARGNAME@ : fd_unref(@ARGNAME@); +cleanup_@ARGNAME@_ino : inode_unref(@ARGNAME@_ino); + +#pragma fragment NEW_FD +/* + * This pseudo-type is only used for create, and in that case we know + * we'll be using loc.inode, so it's not worth generalizing to take an + * extra argument. + */ +fd_t *@ARGNAME@ = fd_anonymous(loc.inode); + +if (!fd) { +    goto *err_label; +} +err_label = &&cleanup_@ARGNAME@; +new_meta += 16; + +#pragma fragment NEW_FD_CLEANUP +cleanup_@ARGNAME@ : fd_unref(@ARGNAME@); + +#pragma fragment INTEGER +@ARGTYPE@ @ARGNAME@ = *((@ARGTYPE@ *)new_meta); + +new_meta += sizeof(@ARGTYPE@); + +#pragma fragment LOC +loc_t @ARGNAME@ = { +    NULL, +}; + +@ARGNAME@.inode = recon_get_inode(fs, *((uuid_t *)new_meta)); +if (!@ARGNAME@.inode) { +    goto *err_label; +} +err_label = &&cleanup_@ARGNAME@; +gf_uuid_copy(@ARGNAME@.gfid, @ARGNAME@.inode->gfid); +new_meta += 16; +new_meta += 16; /* skip over pargfid */ +if (*(new_meta++)) { +    @ARGNAME@.name = new_meta; +    new_meta += strlen(new_meta) + 1; +} + +#pragma fragment LOC_CLEANUP +cleanup_@ARGNAME@ : loc_wipe(&@ARGNAME@); + +#pragma fragment PARENT_LOC +loc_t @ARGNAME@ = { +    NULL, +}; + +new_meta += 16; /* skip over gfid */ +@ARGNAME@.parent = recon_get_inode(fs, *((uuid_t *)new_meta)); +if (!@ARGNAME@.parent) { +    goto *err_label; +} +err_label = &&cleanup_@ARGNAME@; +gf_uuid_copy(@ARGNAME@.pargfid, @ARGNAME@.parent->gfid); +new_meta += 16; +if (!*(new_meta++)) { +    goto *err_label; +} +@ARGNAME@.name = new_meta; +new_meta += strlen(new_meta) + 1; + +@ARGNAME@.inode = inode_new(fs->active_subvol->itable); +if (!@ARGNAME@.inode) { +    goto *err_label; +} + +#pragma fragment PARENT_LOC_CLEANUP +cleanup_@ARGNAME@ : loc_wipe(&@ARGNAME@); + +#pragma fragment STRING +char *@ARGNAME@; +if (*(new_meta++)) { +    @ARGNAME@ = new_meta; +    new_meta += (strlen(new_meta) + 1); +} else { +    goto *err_label; +} + +#pragma fragment VECTOR +struct iovec @ARGNAME@; + +@ARGNAME@.iov_len = *((size_t *)new_meta); +new_meta += sizeof(@ARGNAME@.iov_len); +@ARGNAME@.iov_base = new_data; +new_data += @ARGNAME@.iov_len; + +#pragma fragment IATT +struct iatt @ARGNAME@; +{ +    @ARGNAME@.ia_prot = *((ia_prot_t *)new_meta); +    new_meta += sizeof(ia_prot_t); +    uint32_t *myints = (uint32_t *)new_meta; +    @ARGNAME@.ia_uid = myints[0]; +    @ARGNAME@.ia_gid = myints[1]; +    @ARGNAME@.ia_atime = myints[2]; +    @ARGNAME@.ia_atime_nsec = myints[3]; +    @ARGNAME@.ia_mtime = myints[4]; +    @ARGNAME@.ia_mtime_nsec = myints[5]; +    new_meta += sizeof(*myints) * 6; +} + +#pragma fragment IOBREF +struct iobref *@ARGNAME@; + +@ARGNAME@ = iobref_new(); +if (!@ARGNAME@) { +    goto *err_label; +} +err_label = &&cleanup_@ARGNAME@; + +#pragma fragment IOBREF_CLEANUP +cleanup_@ARGNAME@ : iobref_unref(@ARGNAME@); + +#pragma fragment LINK +/* TBD: check error */ +inode_t *new_inode = inode_link(@INODE_ARG@, NULL, NULL, @IATT_ARG@); +if (new_inode) { +    inode_lookup(new_inode); +} + +#pragma fragment FOP +int fdl_replay_@NAME@(glfs_t *fs, char **old_meta, char **old_data) +{ +    char *new_meta = *old_meta; +    char *new_data = *old_data; +    int ret; +    int status = 0xbad; +    void *err_label = &&done; + +    @FUNCTION_BODY@ + +    ret = syncop_@NAME@(fs->active_subvol, @SYNCOP_ARGS@, NULL); +    if (ret !=@SUCCESS_VALUE@) { +        fprintf(stderr, "syncop_@NAME@ returned %d", ret); +        goto *err_label; +    } + +    @LINKS@ + +    status = 0; + +    @CLEANUPS@ + +     done : *old_meta = new_meta; +    *old_data = new_data; +    return status; +} + +#pragma fragment CASE +case GF_FOP_@UPNAME@: +    printf("=== GF_FOP_@UPNAME@\n"); +    if (fdl_replay_@NAME@(fs, &new_meta, &new_data) != 0) { +        goto done; +    } +    recognized = 1; +    break; + +#pragma fragment EPILOG +    int +    recon_execute(glfs_t *fs, char **old_meta, char **old_data) +    { +        char *new_meta = *old_meta; +        char *new_data = *old_data; +        int recognized = 0; +        event_header_t *eh; + +        eh = (event_header_t *)new_meta; +        new_meta += sizeof(*eh); + +        /* TBD: check event_type instead of assuming NEW_REQUEST */ + +        switch (eh->fop_type) { +            @SWITCH_BODY@ + +            default : printf("unknown fop %u\n", eh->fop_type); +        } + +    done: +        *old_meta = new_meta; +        *old_data = new_data; +        return recognized; +    } diff --git a/xlators/experimental/jbr-client/src/Makefile.am b/xlators/experimental/jbr-client/src/Makefile.am index 15616d13798..c71f5ff1e83 100644 --- a/xlators/experimental/jbr-client/src/Makefile.am +++ b/xlators/experimental/jbr-client/src/Makefile.am @@ -22,7 +22,7 @@ AM_CFLAGS = -Wall $(GF_CFLAGS)  JBRC_PREFIX	= $(top_srcdir)/xlators/experimental/jbr-client/src  JBRC_GEN_FOPS	= $(JBRC_PREFIX)/gen-fops.py -JBRC_TEMPLATES	= $(JBRC_PREFIX)/fop-template.c +JBRC_TEMPLATES	= $(JBRC_PREFIX)/fop-template.c.in  JBRC_WRAPPER	= $(JBRC_PREFIX)/jbrc.c  noinst_PYTHON	= $(JBRC_GEN_FOPS)  EXTRA_DIST	= $(JBRC_TEMPLATES) $(JBRC_WRAPPER) diff --git a/xlators/experimental/jbr-client/src/fop-template.c b/xlators/experimental/jbr-client/src/fop-template.c deleted file mode 100644 index 7719f511f01..00000000000 --- a/xlators/experimental/jbr-client/src/fop-template.c +++ /dev/null @@ -1,113 +0,0 @@ -/* template-name fop */ -int32_t -jbrc_@NAME@ (call_frame_t *frame, xlator_t *this, -             @LONG_ARGS@) -{ -        jbrc_local_t    *local          = NULL; -        xlator_t        *target_xl      = ACTIVE_CHILD(this); - -        local = mem_get(this->local_pool); -        if (!local) { -                goto err; -        } - -        local->stub = fop_@NAME@_stub (frame, jbrc_@NAME@_continue, -                                       @SHORT_ARGS@); -        if (!local->stub) { -                goto err; -        } -        local->curr_xl = target_xl; -        local->scars = 0; - -        frame->local = local; -        STACK_WIND_COOKIE (frame, jbrc_@NAME@_cbk, target_xl, -                    target_xl, target_xl->fops->@NAME@, -                    @SHORT_ARGS@); -        return 0; - -err: -        if (local) { -                mem_put(local); -        } -        STACK_UNWIND_STRICT (@NAME@, frame, -1, ENOMEM, -                             @ERROR_ARGS@); -        return 0; -} - -/* template-name cbk */ -int32_t -jbrc_@NAME@_cbk (call_frame_t *frame, void *cookie, xlator_t *this, -                 int32_t op_ret, int32_t op_errno, -                 @LONG_ARGS@) -{ -        jbrc_local_t    *local          = frame->local; -        xlator_t        *last_xl        = cookie; -        xlator_t        *next_xl; -        jbrc_private_t  *priv           = this->private; -        struct timespec spec; - -        if (op_ret != (-1)) { -                if (local->scars) { -                        gf_msg (this->name, GF_LOG_INFO, 0, J_MSG_RETRY_MSG, -                                HILITE("retried %p OK"), frame->local); -                } -                priv->active = last_xl; -                goto unwind; -        } -        if ((op_errno != EREMOTE) && (op_errno != ENOTCONN)) { -                goto unwind; -        } - -        /* TBD: get leader ID from xdata? */ -        next_xl = next_xlator(this, last_xl); -        /* -         * We can't just give up after we've tried all bricks, because it's -         * quite likely that a new leader election just hasn't finished yet. -         * We also shouldn't retry endlessly, and especially not at a high -         * rate, but that's good enough while we work on other things. -         * -         * TBD: implement slow/finite retry via a worker thread -         */ -        if (!next_xl || (local->scars >= SCAR_LIMIT)) { -                gf_msg (this->name, GF_LOG_DEBUG, 0, J_MSG_RETRY_MSG, -                        HILITE("ran out of retries for %p"), frame->local); -                goto unwind; -        } - -        local->curr_xl = next_xl; -        local->scars += 1; -        spec.tv_sec = 1; -        spec.tv_nsec = 0; -        /* -         * WARNING -         * -         * Just calling gf_timer_call_after like this leaves open the -         * possibility that writes will get reordered, if a first write is -         * rescheduled and then a second comes along to find an updated -         * priv->active before the first actually executes.  We might need to -         * implement a stricter (and more complicated) queuing mechanism to -         * ensure absolute consistency in this case. -         */ -        if (gf_timer_call_after(this->ctx, spec, jbrc_retry_cb, local)) { -                return 0; -        } - -unwind: -        call_stub_destroy(local->stub); -        STACK_UNWIND_STRICT (@NAME@, frame, op_ret, op_errno, -                                     @SHORT_ARGS@); -        return 0; -} - -/* template-name cont-func */ -int32_t -jbrc_@NAME@_continue (call_frame_t *frame, xlator_t *this, -                      @LONG_ARGS@) -{ -        jbrc_local_t    *local  = frame->local; - -        STACK_WIND_COOKIE (frame, jbrc_@NAME@_cbk, local->curr_xl, -                           local->curr_xl, local->curr_xl->fops->@NAME@, -                           @SHORT_ARGS@); -        return 0; -} diff --git a/xlators/experimental/jbr-client/src/fop-template.c.in b/xlators/experimental/jbr-client/src/fop-template.c.in new file mode 100644 index 00000000000..9732badc794 --- /dev/null +++ b/xlators/experimental/jbr-client/src/fop-template.c.in @@ -0,0 +1,102 @@ +/* template-name fop */ +int32_t jbrc_@NAME@(call_frame_t *frame, xlator_t *this, @LONG_ARGS@) +{ +    jbrc_local_t *local = NULL; +    xlator_t *target_xl = ACTIVE_CHILD(this); + +    local = mem_get(this->local_pool); +    if (!local) { +        goto err; +    } + +    local->stub = fop_@NAME@_stub(frame, jbrc_@NAME@_continue, @SHORT_ARGS@); +    if (!local->stub) { +        goto err; +    } +    local->curr_xl = target_xl; +    local->scars = 0; + +    frame->local = local; +    STACK_WIND_COOKIE(frame, jbrc_@NAME@_cbk, target_xl, target_xl, +                      target_xl->fops->@NAME@, @SHORT_ARGS@); +    return 0; + +err: +    if (local) { +        mem_put(local); +    } +    STACK_UNWIND_STRICT(@NAME@, frame, -1, ENOMEM, @ERROR_ARGS@); +    return 0; +} + +/* template-name cbk */ +int32_t jbrc_@NAME@_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +                          int32_t op_ret, int32_t op_errno, @LONG_ARGS@) +{ +    jbrc_local_t *local = frame->local; +    xlator_t *last_xl = cookie; +    xlator_t *next_xl; +    jbrc_private_t *priv = this->private; +    struct timespec spec; + +    if (op_ret != (-1)) { +        if (local->scars) { +            gf_msg(this->name, GF_LOG_INFO, 0, J_MSG_RETRY_MSG, +                   HILITE("retried %p OK"), frame->local); +        } +        priv->active = last_xl; +        goto unwind; +    } +    if ((op_errno != EREMOTE) && (op_errno != ENOTCONN)) { +        goto unwind; +    } + +    /* TBD: get leader ID from xdata? */ +    next_xl = next_xlator(this, last_xl); +    /* +     * We can't just give up after we've tried all bricks, because it's +     * quite likely that a new leader election just hasn't finished yet. +     * We also shouldn't retry endlessly, and especially not at a high +     * rate, but that's good enough while we work on other things. +     * +     * TBD: implement slow/finite retry via a worker thread +     */ +    if (!next_xl || (local->scars >= SCAR_LIMIT)) { +        gf_msg(this->name, GF_LOG_DEBUG, 0, J_MSG_RETRY_MSG, +               HILITE("ran out of retries for %p"), frame->local); +        goto unwind; +    } + +    local->curr_xl = next_xl; +    local->scars += 1; +    spec.tv_sec = 1; +    spec.tv_nsec = 0; +    /* +     * WARNING +     * +     * Just calling gf_timer_call_after like this leaves open the +     * possibility that writes will get reordered, if a first write is +     * rescheduled and then a second comes along to find an updated +     * priv->active before the first actually executes.  We might need to +     * implement a stricter (and more complicated) queuing mechanism to +     * ensure absolute consistency in this case. +     */ +    if (gf_timer_call_after(this->ctx, spec, jbrc_retry_cb, local)) { +        return 0; +    } + +unwind: +    call_stub_destroy(local->stub); +    STACK_UNWIND_STRICT(@NAME@, frame, op_ret, op_errno, @SHORT_ARGS@); +    return 0; +} + +/* template-name cont-func */ +int32_t jbrc_@NAME@_continue(call_frame_t *frame, xlator_t *this, @LONG_ARGS@) +{ +    jbrc_local_t *local = frame->local; + +    STACK_WIND_COOKIE(frame, jbrc_@NAME@_cbk, local->curr_xl, local->curr_xl, +                      local->curr_xl->fops->@NAME@, @SHORT_ARGS@); +    return 0; +} diff --git a/xlators/experimental/jbr-server/src/Makefile.am b/xlators/experimental/jbr-server/src/Makefile.am index b3ceb2d9eda..42d3c8a6c36 100644 --- a/xlators/experimental/jbr-server/src/Makefile.am +++ b/xlators/experimental/jbr-server/src/Makefile.am @@ -27,7 +27,7 @@ AM_CFLAGS = -Wall $(GF_CFLAGS)  JBR_PREFIX	= $(top_srcdir)/xlators/experimental/jbr-server/src  JBR_GEN_FOPS	= $(JBR_PREFIX)/gen-fops.py -JBR_TEMPLATES	= $(JBR_PREFIX)/all-templates.c +JBR_TEMPLATES	= $(JBR_PREFIX)/all-templates.c.in  JBR_WRAPPER	= $(JBR_PREFIX)/jbr.c  noinst_PYTHON	= $(JBR_GEN_FOPS)  EXTRA_DIST	= $(JBR_TEMPLATES) $(JBR_WRAPPER) diff --git a/xlators/experimental/jbr-server/src/all-templates.c b/xlators/experimental/jbr-server/src/all-templates.c deleted file mode 100644 index 530c4187571..00000000000 --- a/xlators/experimental/jbr-server/src/all-templates.c +++ /dev/null @@ -1,542 +0,0 @@ -/* - * You can put anything here - it doesn't even have to be a comment - and it - * will be ignored until we reach the first template-name comment. - */ - - -/* template-name read-fop */ -int32_t -jbr_@NAME@ (call_frame_t *frame, xlator_t *this, -            @LONG_ARGS@) -{ -        jbr_private_t   *priv     = NULL; -        gf_boolean_t     in_recon = _gf_false; -        int32_t          op_errno = 0; -        int32_t          recon_term, recon_index; - -        GF_VALIDATE_OR_GOTO ("jbr", this, err); -        priv = this->private; -        GF_VALIDATE_OR_GOTO (this->name, priv, err); -        GF_VALIDATE_OR_GOTO (this->name, frame, err); - -        op_errno = EREMOTE; - -        /* allow reads during reconciliation       * -         * TBD: allow "dirty" reads on non-leaders * -         */ -        if (xdata && -            (dict_get_int32(xdata, RECON_TERM_XATTR, &recon_term) == 0) && -            (dict_get_int32(xdata, RECON_INDEX_XATTR, &recon_index) == 0)) { -                in_recon = _gf_true; -        } - -        if ((!priv->leader) && (in_recon == _gf_false)) { -                goto err; -        } - -        STACK_WIND (frame, default_@NAME@_cbk, -                    FIRST_CHILD(this), FIRST_CHILD(this)->fops->@NAME@, -                    @SHORT_ARGS@); -        return 0; - -err: -        STACK_UNWIND_STRICT (@NAME@, frame, -1, op_errno, -                             @ERROR_ARGS@); -        return 0; -} - -/* template-name read-perform_local_op */ -/* No "perform_local_op" function needed for @NAME@ */ - -/* template-name read-dispatch */ -/* No "dispatch" function needed for @NAME@ */ - -/* template-name read-call_dispatch */ -/* No "call_dispatch" function needed for @NAME@ */ - -/* template-name read-fan-in */ -/* No "fan-in" function needed for @NAME@ */ - -/* template-name read-continue */ -/* No "continue" function needed for @NAME@ */ - -/* template-name read-complete */ -/* No "complete" function needed for @NAME@ */ - -/* template-name write-fop */ -int32_t -jbr_@NAME@ (call_frame_t *frame, xlator_t *this, -            @LONG_ARGS@) -{ -        jbr_local_t     *local         = NULL; -        jbr_private_t   *priv          = NULL; -        int32_t          ret           = -1; -        int              op_errno      = ENOMEM; - -        GF_VALIDATE_OR_GOTO ("jbr", this, err); -        priv = this->private; -        GF_VALIDATE_OR_GOTO (this->name, priv, err); -        GF_VALIDATE_OR_GOTO (this->name, frame, err); - -#if defined(JBR_CG_NEED_FD) -        ret = jbr_leader_checks_and_init (frame, this, &op_errno, xdata, fd); -#else -        ret = jbr_leader_checks_and_init (frame, this, &op_errno, xdata, NULL); -#endif -        if (ret) -                goto err; - -        local = frame->local; - -        /* -         * If we let it through despite not being the leader, then we just want -         * to pass it on down without all of the additional xattrs, queuing, and -         * so on.  However, jbr_*_complete does depend on the initialization -         * immediately above this. -         */ -        if (!priv->leader) { -                STACK_WIND (frame, jbr_@NAME@_complete, -                            FIRST_CHILD(this), FIRST_CHILD(this)->fops->@NAME@, -                            @SHORT_ARGS@); -                return 0; -        } - -        ret = jbr_initialize_xdata_set_attrs (this, &xdata); -        if (ret) -                goto err; - -        local->xdata = dict_ref(xdata); -        local->stub = fop_@NAME@_stub (frame, jbr_@NAME@_continue, -                                       @SHORT_ARGS@); -        if (!local->stub) { -                goto err; -        } - -        /* -         * Can be used to just call_dispatch or be customised per fop to * -         * perform ops specific to that particular fop.                  * -         */ -        ret = jbr_@NAME@_perform_local_op (frame, this, &op_errno, -                                           @SHORT_ARGS@); -        if (ret) -                goto err; - -        return ret; -err: -        if (local) { -                if (local->stub) { -                        call_stub_destroy(local->stub); -                } -                if (local->qstub) { -                        call_stub_destroy(local->qstub); -                } -                if (local->fd) { -                        fd_unref(local->fd); -                } -                mem_put(local); -        } -        STACK_UNWIND_STRICT (@NAME@, frame, -1, op_errno, -                             @ERROR_ARGS@); -        return 0; -} - -/* template-name write-perform_local_op */ -int32_t -jbr_@NAME@_perform_local_op (call_frame_t *frame, xlator_t *this, int *op_errno, -                             @LONG_ARGS@) -{ -        int32_t          ret    = -1; - -        GF_VALIDATE_OR_GOTO ("jbr", this, out); -        GF_VALIDATE_OR_GOTO (this->name, frame, out); -        GF_VALIDATE_OR_GOTO (this->name, op_errno, out); - -        ret = jbr_@NAME@_call_dispatch (frame, this, op_errno, -                                        @SHORT_ARGS@); - -out: -        return ret; -} - -/* template-name write-call_dispatch */ -int32_t -jbr_@NAME@_call_dispatch (call_frame_t *frame, xlator_t *this, int *op_errno, -                          @LONG_ARGS@) -{ -        jbr_local_t     *local  = NULL; -        jbr_private_t   *priv   = NULL; -        int32_t          ret    = -1; - -        GF_VALIDATE_OR_GOTO ("jbr", this, out); -        priv = this->private; -        GF_VALIDATE_OR_GOTO (this->name, priv, out); -        GF_VALIDATE_OR_GOTO (this->name, frame, out); -        local = frame->local; -        GF_VALIDATE_OR_GOTO (this->name, local, out); -        GF_VALIDATE_OR_GOTO (this->name, op_errno, out); - -#if defined(JBR_CG_QUEUE) -        jbr_inode_ctx_t  *ictx  = jbr_get_inode_ctx(this, fd->inode); -        if (!ictx) { -                *op_errno = EIO; -                goto out; -        } - -        LOCK(&ictx->lock); -                if (ictx->active) { -                        gf_msg_debug (this->name, 0, -                                      "queuing request due to conflict"); -                        /* -                         * TBD: enqueue only for real conflict -                         * -                         * Currently we just act like all writes are in -                         * conflict with one another.  What we should really do -                         * is check the active/pending queues and defer only if -                         * there's a conflict there. -                         * -                         * It's important to check the pending queue because we -                         * might have an active request X which conflicts with -                         * a pending request Y, and this request Z might -                         * conflict with Y but not X.  If we checked only the -                         * active queue then Z could jump ahead of Y, which -                         * would be incorrect. -                         */ -                        local->qstub = fop_@NAME@_stub (frame, -                                                        jbr_@NAME@_dispatch, -                                                        @SHORT_ARGS@); -                        if (!local->qstub) { -                                UNLOCK(&ictx->lock); -                                goto out; -                        } -                        list_add_tail(&local->qlinks, &ictx->pqueue); -                        ++(ictx->pending); -                        UNLOCK(&ictx->lock); -                        ret = 0; -                        goto out; -                } else { -                        list_add_tail(&local->qlinks, &ictx->aqueue); -                        ++(ictx->active); -                } -        UNLOCK(&ictx->lock); -#endif -        ret = jbr_@NAME@_dispatch (frame, this, @SHORT_ARGS@); - -out: -        return ret; -} - -/* template-name write-dispatch */ -int32_t -jbr_@NAME@_dispatch (call_frame_t *frame, xlator_t *this, -                     @LONG_ARGS@) -{ -        jbr_local_t     *local  = NULL; -        jbr_private_t   *priv   = NULL; -        int32_t          ret    = -1; -        xlator_list_t   *trav; - -        GF_VALIDATE_OR_GOTO ("jbr", this, out); -        priv = this->private; -        GF_VALIDATE_OR_GOTO (this->name, priv, out); -        GF_VALIDATE_OR_GOTO (this->name, frame, out); -        local = frame->local; -        GF_VALIDATE_OR_GOTO (this->name, local, out); - -        /* -         * TBD: unblock pending request(s) if we fail after this point but -         * before we get to jbr_@NAME@_complete (where that code currently -         * resides). -         */ - -        local->call_count = priv->n_children - 1; -        for (trav = this->children->next; trav; trav = trav->next) { -                STACK_WIND (frame, jbr_@NAME@_fan_in, -                            trav->xlator, trav->xlator->fops->@NAME@, -                            @SHORT_ARGS@); -        } - -        /* TBD: variable Issue count */ -        ret = 0; -out: -        return ret; -} - -/* template-name write-fan-in */ -int32_t -jbr_@NAME@_fan_in (call_frame_t *frame, void *cookie, xlator_t *this, -                   int32_t op_ret, int32_t op_errno, -                   @LONG_ARGS@) -{ -        jbr_local_t   *local  = NULL; -        int32_t        ret    = -1; -        uint8_t        call_count; - -        GF_VALIDATE_OR_GOTO ("jbr", this, out); -        GF_VALIDATE_OR_GOTO (this->name, frame, out); -        local = frame->local; -        GF_VALIDATE_OR_GOTO (this->name, local, out); - -        gf_msg_trace (this->name, 0, "op_ret = %d, op_errno = %d\n", -                      op_ret, op_errno); - -        LOCK(&frame->lock); -        call_count = --(local->call_count); -        if (op_ret != -1) { -                /* Increment the number of successful acks * -                 * received for the operation.             * -                 */ -                (local->successful_acks)++; -                local->successful_op_ret = op_ret; -        } -        gf_msg_debug (this->name, 0, "succ_acks = %d, op_ret = %d, op_errno = %d\n", -                      op_ret, op_errno, local->successful_acks); -        UNLOCK(&frame->lock); - -        /* TBD: variable Completion count */ -        if (call_count == 0) { -                call_resume(local->stub); -        } - -        ret = 0; -out: -        return ret; -} - -/* template-name write-continue */ -int32_t -jbr_@NAME@_continue (call_frame_t *frame, xlator_t *this, -                     @LONG_ARGS@) -{ -        int32_t          ret       = -1; -        gf_boolean_t     result    = _gf_false; -        jbr_local_t     *local     = NULL; -        jbr_local_t     *new_local = NULL; -        jbr_private_t   *priv      = NULL; -        int32_t          op_errno  = 0; - -        GF_VALIDATE_OR_GOTO ("jbr", this, out); -        GF_VALIDATE_OR_GOTO (this->name, frame, out); -        priv = this->private; -        local = frame->local; -        GF_VALIDATE_OR_GOTO (this->name, priv, out); -        GF_VALIDATE_OR_GOTO (this->name, local, out); - -        /* Perform quorum check to see if the leader needs     * -         * to perform the operation. If the operation will not * -         * meet quorum irrespective of the leader's result     * -         * there is no point in the leader performing the fop  * -         */ -        result = fop_quorum_check (this, (double)priv->n_children, -                                   (double)local->successful_acks + 1); -        if (result == _gf_false) { -                gf_msg (this->name, GF_LOG_ERROR, EROFS, -                        J_MSG_QUORUM_NOT_MET, "Didn't receive enough acks " -                        "to meet quorum. Failing the operation without trying " -                        "it on the leader."); - -#if defined(JBR_CG_QUEUE) -                /* -                 * In case of a fop failure, before unwinding need to * -                 * remove it from queue                               * -                 */ -                ret = jbr_remove_from_queue (frame, this); -                if (ret) { -                        gf_msg (this->name, GF_LOG_ERROR, 0, -                                J_MSG_GENERIC, "Failed to remove from queue."); -                } -#endif - -                /* -                 * In this case, the quorum is not met on the followers  * -                 * So the operation will not be performed on the leader  * -                 * and a rollback will be sent via GF_FOP_IPC to all the * -                 * followers, where this particular fop's term and index * -                 * numbers will be journaled, and later used to rollback * -                 */ -                call_frame_t    *new_frame; - -                new_frame = copy_frame (frame); - -                if (new_frame) { -                        new_local = mem_get0(this->local_pool); -                        if (new_local) { -                                INIT_LIST_HEAD(&new_local->qlinks); -                                ret = dict_set_int32 (local->xdata, -                                                      "rollback-fop", -                                                      GF_FOP_@UPNAME@); -                                if (ret) { -                                        gf_msg (this->name, GF_LOG_ERROR, 0, -                                                J_MSG_DICT_FLR, -                                                "failed to set rollback-fop"); -                                } else { -                                        new_local->xdata = dict_ref(local->xdata); -                                        new_frame->local = new_local; -                                        jbr_ipc_call_dispatch (new_frame, -                                                               this, &op_errno, -                                                               FDL_IPC_JBR_SERVER_ROLLBACK, -                                                               new_local->xdata); -                                } -                        } else { -                                gf_log (this->name, GF_LOG_WARNING, -                                        "Could not create local for new_frame"); -                        } -                } else { -                        gf_log (this->name, GF_LOG_WARNING, -                                "Could not send rollback ipc"); -                } - -                STACK_UNWIND_STRICT (@NAME@, frame, -1, EROFS, -                                     @ERROR_ARGS@); -        } else { -                STACK_WIND (frame, jbr_@NAME@_complete, -                            FIRST_CHILD(this), FIRST_CHILD(this)->fops->@NAME@, -                            @SHORT_ARGS@); -        } - -out: -        return 0; -} - -/* template-name write-complete */ -int32_t -jbr_@NAME@_complete (call_frame_t *frame, void *cookie, xlator_t *this, -                     int32_t op_ret, int32_t op_errno, -                     @LONG_ARGS@) -{ -        int32_t          ret       = -1; -        gf_boolean_t     result    = _gf_false; -        jbr_private_t   *priv      = NULL; -        jbr_local_t     *local     = NULL; -        jbr_local_t     *new_local = NULL; - -        GF_VALIDATE_OR_GOTO ("jbr", this, err); -        GF_VALIDATE_OR_GOTO (this->name, frame, err); -        priv = this->private; -        local = frame->local; -        GF_VALIDATE_OR_GOTO (this->name, priv, err); -        GF_VALIDATE_OR_GOTO (this->name, local, err); - -        /* If the fop failed on the leader, then reduce one successful ack -         * before calculating the fop quorum -         */ -        LOCK(&frame->lock); -        if (op_ret == -1) -                (local->successful_acks)--; -        UNLOCK(&frame->lock); - -#if defined(JBR_CG_QUEUE) -        ret = jbr_remove_from_queue (frame, this); -        if (ret) -                goto err; -#endif - -#if defined(JBR_CG_FSYNC) -        jbr_mark_fd_dirty(this, local); -#endif - -#if defined(JBR_CG_NEED_FD) -        fd_unref(local->fd); -#endif - -        /* After the leader completes the fop, a quorum check is      * -         * performed, taking into account the outcome of the fop      * -         * on the leader. Irrespective of the fop being successful    * -         * or failing on the leader, the result of the quorum will    * -         * determine if the overall fop is successful or not. For     * -         * example, a fop might have succeeded on every node except   * -         * the leader, in which case as quorum is being met, the fop  * -         * will be treated as a successful fop, even though it failed * -         * on the leader. On follower nodes, no quorum check should   * -         * be done, and the result is returned to the leader as is.   * -         */ -        if (priv->leader) { -                result = fop_quorum_check (this, (double)priv->n_children, -                                           (double)local->successful_acks + 1); -                if (result == _gf_false) { -                        op_ret = -1; -                        op_errno = EROFS; -                        gf_msg (this->name, GF_LOG_ERROR, EROFS, -                                J_MSG_QUORUM_NOT_MET, "Quorum is not met. " -                                "The operation has failed."); -                        /* -                         * In this case, the quorum is not met after the      * -                         * operation is performed on the leader. Hence a      * -                         * rollback will be sent via GF_FOP_IPC to the leader * -                         * where this particular fop's term and index numbers * -                         * will be journaled, and later used to rollback.     * -                         * The same will be done on all the followers         * -                         */ -                        call_frame_t    *new_frame; - -                        new_frame = copy_frame (frame); -                        if (new_frame) { -                                new_local = mem_get0(this->local_pool); -                                if (new_local) { -                                        INIT_LIST_HEAD(&new_local->qlinks); -                                        gf_msg (this->name, GF_LOG_ERROR, 0, -                                                J_MSG_DICT_FLR, "op = %d", -                                                new_frame->op); -                                        ret = dict_set_int32 (local->xdata, -                                                              "rollback-fop", -                                                              GF_FOP_@UPNAME@); -                                        if (ret) { -                                                gf_msg (this->name, -                                                        GF_LOG_ERROR, 0, -                                                        J_MSG_DICT_FLR, -                                                        "failed to set " -                                                        "rollback-fop"); -                                        } else { -                                                new_local->xdata = dict_ref (local->xdata); -                                                new_frame->local = new_local; -                                                /* -                                                 * Calling STACK_WIND instead * -                                                 * of jbr_ipc as it will not  * -                                                 * unwind to the previous     * -                                                 * translators like it will   * -                                                 * in case of jbr_ipc.        * -                                                 */ -                                                STACK_WIND (new_frame, -                                                            jbr_ipc_complete, -                                                            FIRST_CHILD(this), -                                                            FIRST_CHILD(this)->fops->ipc, -                                                            FDL_IPC_JBR_SERVER_ROLLBACK, -                                                            new_local->xdata); -                                        } -                                } else { -                                        gf_log (this->name, GF_LOG_WARNING, -                                                "Could not create local " -                                                "for new_frame"); -                                } -                        } else { -                                gf_log (this->name, GF_LOG_WARNING, -                                        "Could not send rollback ipc"); -                        } -                } else { -#if defined(JBR_CG_NEED_FD) -                        op_ret = local->successful_op_ret; -#else -                        op_ret = 0; -#endif -                        op_errno = 0; -                        gf_msg_debug (this->name, 0, -                                      "Quorum has met. The operation has succeeded."); -                } -        } - -        /* -         * Unrefing the reference taken in jbr_@NAME@ () * -         */ -        dict_unref (local->xdata); - -        STACK_UNWIND_STRICT (@NAME@, frame, op_ret, op_errno, -                             @SHORT_ARGS@); - - -        return 0; - -err: -        STACK_UNWIND_STRICT (@NAME@, frame, -1, 0, -                             @SHORT_ARGS@); - -        return 0; -} diff --git a/xlators/experimental/jbr-server/src/all-templates.c.in b/xlators/experimental/jbr-server/src/all-templates.c.in new file mode 100644 index 00000000000..a9d57fc646f --- /dev/null +++ b/xlators/experimental/jbr-server/src/all-templates.c.in @@ -0,0 +1,501 @@ +/* + * You can put anything here - it doesn't even have to be a comment - and it + * will be ignored until we reach the first template-name comment. + */ + +/* template-name read-fop */ +int32_t jbr_@NAME@(call_frame_t *frame, xlator_t *this, @LONG_ARGS@) +{ +    jbr_private_t *priv = NULL; +    gf_boolean_t in_recon = _gf_false; +    int32_t op_errno = 0; +    int32_t recon_term, recon_index; + +    GF_VALIDATE_OR_GOTO("jbr", this, err); +    priv = this->private; +    GF_VALIDATE_OR_GOTO(this->name, priv, err); +    GF_VALIDATE_OR_GOTO(this->name, frame, err); + +    op_errno = EREMOTE; + +    /* allow reads during reconciliation       * +     * TBD: allow "dirty" reads on non-leaders * +     */ +    if (xdata && (dict_get_int32(xdata, RECON_TERM_XATTR, &recon_term) == 0) && +        (dict_get_int32(xdata, RECON_INDEX_XATTR, &recon_index) == 0)) { +        in_recon = _gf_true; +    } + +    if ((!priv->leader) && (in_recon == _gf_false)) { +        goto err; +    } + +    STACK_WIND(frame, default_@NAME@_cbk, FIRST_CHILD(this), +               FIRST_CHILD(this)->fops->@NAME@, @SHORT_ARGS@); +    return 0; + +err: +    STACK_UNWIND_STRICT(@NAME@, frame, -1, op_errno, @ERROR_ARGS@); +    return 0; +} + +/* template-name read-perform_local_op */ +/* No "perform_local_op" function needed for @NAME@ */ + +/* template-name read-dispatch */ +/* No "dispatch" function needed for @NAME@ */ + +/* template-name read-call_dispatch */ +/* No "call_dispatch" function needed for @NAME@ */ + +/* template-name read-fan-in */ +/* No "fan-in" function needed for @NAME@ */ + +/* template-name read-continue */ +/* No "continue" function needed for @NAME@ */ + +/* template-name read-complete */ +/* No "complete" function needed for @NAME@ */ + +/* template-name write-fop */ +int32_t jbr_@NAME@(call_frame_t *frame, xlator_t *this, @LONG_ARGS@) +{ +    jbr_local_t *local = NULL; +    jbr_private_t *priv = NULL; +    int32_t ret = -1; +    int op_errno = ENOMEM; + +    GF_VALIDATE_OR_GOTO("jbr", this, err); +    priv = this->private; +    GF_VALIDATE_OR_GOTO(this->name, priv, err); +    GF_VALIDATE_OR_GOTO(this->name, frame, err); + +#if defined(JBR_CG_NEED_FD) +    ret = jbr_leader_checks_and_init(frame, this, &op_errno, xdata, fd); +#else +    ret = jbr_leader_checks_and_init(frame, this, &op_errno, xdata, NULL); +#endif +    if (ret) +        goto err; + +    local = frame->local; + +    /* +     * If we let it through despite not being the leader, then we just want +     * to pass it on down without all of the additional xattrs, queuing, and +     * so on.  However, jbr_*_complete does depend on the initialization +     * immediately above this. +     */ +    if (!priv->leader) { +        STACK_WIND(frame, jbr_@NAME@_complete, FIRST_CHILD(this), +                   FIRST_CHILD(this)->fops->@NAME@, @SHORT_ARGS@); +        return 0; +    } + +    ret = jbr_initialize_xdata_set_attrs(this, &xdata); +    if (ret) +        goto err; + +    local->xdata = dict_ref(xdata); +    local->stub = fop_@NAME@_stub(frame, jbr_@NAME@_continue, @SHORT_ARGS@); +    if (!local->stub) { +        goto err; +    } + +    /* +     * Can be used to just call_dispatch or be customised per fop to * +     * perform ops specific to that particular fop.                  * +     */ +    ret = jbr_@NAME@_perform_local_op(frame, this, &op_errno, @SHORT_ARGS@); +    if (ret) +        goto err; + +    return ret; +err: +    if (local) { +        if (local->stub) { +            call_stub_destroy(local->stub); +        } +        if (local->qstub) { +            call_stub_destroy(local->qstub); +        } +        if (local->fd) { +            fd_unref(local->fd); +        } +        mem_put(local); +    } +    STACK_UNWIND_STRICT(@NAME@, frame, -1, op_errno, @ERROR_ARGS@); +    return 0; +} + +/* template-name write-perform_local_op */ +int32_t jbr_@NAME@_perform_local_op(call_frame_t *frame, xlator_t *this, +                                    int *op_errno, @LONG_ARGS@) +{ +    int32_t ret = -1; + +    GF_VALIDATE_OR_GOTO("jbr", this, out); +    GF_VALIDATE_OR_GOTO(this->name, frame, out); +    GF_VALIDATE_OR_GOTO(this->name, op_errno, out); + +    ret = jbr_@NAME@_call_dispatch(frame, this, op_errno, @SHORT_ARGS@); + +out: +    return ret; +} + +/* template-name write-call_dispatch */ +int32_t jbr_@NAME@_call_dispatch(call_frame_t *frame, xlator_t *this, +                                 int *op_errno, @LONG_ARGS@) +{ +    jbr_local_t *local = NULL; +    jbr_private_t *priv = NULL; +    int32_t ret = -1; + +    GF_VALIDATE_OR_GOTO("jbr", this, out); +    priv = this->private; +    GF_VALIDATE_OR_GOTO(this->name, priv, out); +    GF_VALIDATE_OR_GOTO(this->name, frame, out); +    local = frame->local; +    GF_VALIDATE_OR_GOTO(this->name, local, out); +    GF_VALIDATE_OR_GOTO(this->name, op_errno, out); + +#if defined(JBR_CG_QUEUE) +    jbr_inode_ctx_t *ictx = jbr_get_inode_ctx(this, fd->inode); +    if (!ictx) { +        *op_errno = EIO; +        goto out; +    } + +    LOCK(&ictx->lock); +    if (ictx->active) { +        gf_msg_debug(this->name, 0, "queuing request due to conflict"); +        /* +         * TBD: enqueue only for real conflict +         * +         * Currently we just act like all writes are in +         * conflict with one another.  What we should really do +         * is check the active/pending queues and defer only if +         * there's a conflict there. +         * +         * It's important to check the pending queue because we +         * might have an active request X which conflicts with +         * a pending request Y, and this request Z might +         * conflict with Y but not X.  If we checked only the +         * active queue then Z could jump ahead of Y, which +         * would be incorrect. +         */ +        local->qstub = fop_@NAME@_stub(frame, jbr_@NAME@_dispatch, +                                       @SHORT_ARGS@); +        if (!local->qstub) { +            UNLOCK(&ictx->lock); +            goto out; +        } +        list_add_tail(&local->qlinks, &ictx->pqueue); +        ++(ictx->pending); +        UNLOCK(&ictx->lock); +        ret = 0; +        goto out; +    } else { +        list_add_tail(&local->qlinks, &ictx->aqueue); +        ++(ictx->active); +    } +    UNLOCK(&ictx->lock); +#endif +    ret = jbr_@NAME@_dispatch(frame, this, @SHORT_ARGS@); + +out: +    return ret; +} + +/* template-name write-dispatch */ +int32_t jbr_@NAME@_dispatch(call_frame_t *frame, xlator_t *this, @LONG_ARGS@) +{ +    jbr_local_t *local = NULL; +    jbr_private_t *priv = NULL; +    int32_t ret = -1; +    xlator_list_t *trav; + +    GF_VALIDATE_OR_GOTO("jbr", this, out); +    priv = this->private; +    GF_VALIDATE_OR_GOTO(this->name, priv, out); +    GF_VALIDATE_OR_GOTO(this->name, frame, out); +    local = frame->local; +    GF_VALIDATE_OR_GOTO(this->name, local, out); + +    /* +     * TBD: unblock pending request(s) if we fail after this point but +     * before we get to jbr_@NAME@_complete (where that code currently +     * resides). +     */ + +    local->call_count = priv->n_children - 1; +    for (trav = this->children->next; trav; trav = trav->next) { +        STACK_WIND(frame, jbr_@NAME@_fan_in, trav->xlator, +                   trav->xlator->fops->@NAME@, @SHORT_ARGS@); +    } + +    /* TBD: variable Issue count */ +    ret = 0; +out: +    return ret; +} + +/* template-name write-fan-in */ +int32_t jbr_@NAME@_fan_in(call_frame_t *frame, void *cookie, xlator_t *this, +                            int32_t op_ret, int32_t op_errno, @LONG_ARGS@) +{ +    jbr_local_t *local = NULL; +    int32_t ret = -1; +    uint8_t call_count; + +    GF_VALIDATE_OR_GOTO("jbr", this, out); +    GF_VALIDATE_OR_GOTO(this->name, frame, out); +    local = frame->local; +    GF_VALIDATE_OR_GOTO(this->name, local, out); + +    gf_msg_trace(this->name, 0, "op_ret = %d, op_errno = %d\n", op_ret, +                 op_errno); + +    LOCK(&frame->lock); +    call_count = --(local->call_count); +    if (op_ret != -1) { +        /* Increment the number of successful acks * +         * received for the operation.             * +         */ +        (local->successful_acks)++; +        local->successful_op_ret = op_ret; +    } +    gf_msg_debug(this->name, 0, "succ_acks = %d, op_ret = %d, op_errno = %d\n", +                 op_ret, op_errno, local->successful_acks); +    UNLOCK(&frame->lock); + +    /* TBD: variable Completion count */ +    if (call_count == 0) { +        call_resume(local->stub); +    } + +    ret = 0; +out: +    return ret; +} + +/* template-name write-continue */ +int32_t jbr_@NAME@_continue(call_frame_t *frame, xlator_t *this, @LONG_ARGS@) +{ +    int32_t ret = -1; +    gf_boolean_t result = _gf_false; +    jbr_local_t *local = NULL; +    jbr_local_t *new_local = NULL; +    jbr_private_t *priv = NULL; +    int32_t op_errno = 0; + +    GF_VALIDATE_OR_GOTO("jbr", this, out); +    GF_VALIDATE_OR_GOTO(this->name, frame, out); +    priv = this->private; +    local = frame->local; +    GF_VALIDATE_OR_GOTO(this->name, priv, out); +    GF_VALIDATE_OR_GOTO(this->name, local, out); + +    /* Perform quorum check to see if the leader needs     * +     * to perform the operation. If the operation will not * +     * meet quorum irrespective of the leader's result     * +     * there is no point in the leader performing the fop  * +     */ +    result = fop_quorum_check(this, (double)priv->n_children, +                              (double)local->successful_acks + 1); +    if (result == _gf_false) { +        gf_msg(this->name, GF_LOG_ERROR, EROFS, J_MSG_QUORUM_NOT_MET, +               "Didn't receive enough acks " +               "to meet quorum. Failing the operation without trying " +               "it on the leader."); + +#if defined(JBR_CG_QUEUE) +        /* +         * In case of a fop failure, before unwinding need to * +         * remove it from queue                               * +         */ +        ret = jbr_remove_from_queue(frame, this); +        if (ret) { +            gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_GENERIC, +                   "Failed to remove from queue."); +        } +#endif + +        /* +         * In this case, the quorum is not met on the followers  * +         * So the operation will not be performed on the leader  * +         * and a rollback will be sent via GF_FOP_IPC to all the * +         * followers, where this particular fop's term and index * +         * numbers will be journaled, and later used to rollback * +         */ +        call_frame_t *new_frame; + +        new_frame = copy_frame(frame); + +        if (new_frame) { +            new_local = mem_get0(this->local_pool); +            if (new_local) { +                INIT_LIST_HEAD(&new_local->qlinks); +                ret = dict_set_int32(local->xdata, "rollback-fop", +                                     GF_FOP_@UPNAME@); +                if (ret) { +                    gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_DICT_FLR, +                           "failed to set rollback-fop"); +                } else { +                    new_local->xdata = dict_ref(local->xdata); +                    new_frame->local = new_local; +                    jbr_ipc_call_dispatch(new_frame, this, &op_errno, +                                          FDL_IPC_JBR_SERVER_ROLLBACK, +                                          new_local->xdata); +                } +            } else { +                gf_log(this->name, GF_LOG_WARNING, +                       "Could not create local for new_frame"); +            } +        } else { +            gf_log(this->name, GF_LOG_WARNING, "Could not send rollback ipc"); +        } + +        STACK_UNWIND_STRICT(@NAME@, frame, -1, EROFS, @ERROR_ARGS@); +    } else { +        STACK_WIND(frame, jbr_@NAME@_complete, FIRST_CHILD(this), +                   FIRST_CHILD(this)->fops->@NAME@, @SHORT_ARGS@); +    } + +out: +    return 0; +} + +/* template-name write-complete */ +int32_t jbr_@NAME@_complete(call_frame_t *frame, void *cookie, xlator_t *this, +                              int32_t op_ret, int32_t op_errno, @LONG_ARGS@) +{ +    int32_t ret = -1; +    gf_boolean_t result = _gf_false; +    jbr_private_t *priv = NULL; +    jbr_local_t *local = NULL; +    jbr_local_t *new_local = NULL; + +    GF_VALIDATE_OR_GOTO("jbr", this, err); +    GF_VALIDATE_OR_GOTO(this->name, frame, err); +    priv = this->private; +    local = frame->local; +    GF_VALIDATE_OR_GOTO(this->name, priv, err); +    GF_VALIDATE_OR_GOTO(this->name, local, err); + +    /* If the fop failed on the leader, then reduce one successful ack +     * before calculating the fop quorum +     */ +    LOCK(&frame->lock); +    if (op_ret == -1) +        (local->successful_acks)--; +    UNLOCK(&frame->lock); + +#if defined(JBR_CG_QUEUE) +    ret = jbr_remove_from_queue(frame, this); +    if (ret) +        goto err; +#endif + +#if defined(JBR_CG_FSYNC) +    jbr_mark_fd_dirty(this, local); +#endif + +#if defined(JBR_CG_NEED_FD) +    fd_unref(local->fd); +#endif + +    /* After the leader completes the fop, a quorum check is      * +     * performed, taking into account the outcome of the fop      * +     * on the leader. Irrespective of the fop being successful    * +     * or failing on the leader, the result of the quorum will    * +     * determine if the overall fop is successful or not. For     * +     * example, a fop might have succeeded on every node except   * +     * the leader, in which case as quorum is being met, the fop  * +     * will be treated as a successful fop, even though it failed * +     * on the leader. On follower nodes, no quorum check should   * +     * be done, and the result is returned to the leader as is.   * +     */ +    if (priv->leader) { +        result = fop_quorum_check(this, (double)priv->n_children, +                                  (double)local->successful_acks + 1); +        if (result == _gf_false) { +            op_ret = -1; +            op_errno = EROFS; +            gf_msg(this->name, GF_LOG_ERROR, EROFS, J_MSG_QUORUM_NOT_MET, +                   "Quorum is not met. " +                   "The operation has failed."); +            /* +             * In this case, the quorum is not met after the      * +             * operation is performed on the leader. Hence a      * +             * rollback will be sent via GF_FOP_IPC to the leader * +             * where this particular fop's term and index numbers * +             * will be journaled, and later used to rollback.     * +             * The same will be done on all the followers         * +             */ +            call_frame_t *new_frame; + +            new_frame = copy_frame(frame); +            if (new_frame) { +                new_local = mem_get0(this->local_pool); +                if (new_local) { +                    INIT_LIST_HEAD(&new_local->qlinks); +                    gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_DICT_FLR, +                           "op = %d", new_frame->op); +                    ret = dict_set_int32(local->xdata, "rollback-fop", +                                         GF_FOP_@UPNAME@); +                    if (ret) { +                        gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_DICT_FLR, +                               "failed to set " +                               "rollback-fop"); +                    } else { +                        new_local->xdata = dict_ref(local->xdata); +                        new_frame->local = new_local; +                        /* +                         * Calling STACK_WIND instead * +                         * of jbr_ipc as it will not  * +                         * unwind to the previous     * +                         * translators like it will   * +                         * in case of jbr_ipc.        * +                         */ +                        STACK_WIND( +                            new_frame, jbr_ipc_complete, FIRST_CHILD(this), +                            FIRST_CHILD(this)->fops->ipc, +                            FDL_IPC_JBR_SERVER_ROLLBACK, new_local->xdata); +                    } +                } else { +                    gf_log(this->name, GF_LOG_WARNING, +                           "Could not create local " +                           "for new_frame"); +                } +            } else { +                gf_log(this->name, GF_LOG_WARNING, +                       "Could not send rollback ipc"); +            } +        } else { +#if defined(JBR_CG_NEED_FD) +            op_ret = local->successful_op_ret; +#else +            op_ret = 0; +#endif +            op_errno = 0; +            gf_msg_debug(this->name, 0, +                         "Quorum has met. The operation has succeeded."); +        } +    } + +    /* +     * Unrefing the reference taken in jbr_@NAME@ () * +     */ +    dict_unref(local->xdata); + +    STACK_UNWIND_STRICT(@NAME@, frame, op_ret, op_errno, @SHORT_ARGS@); + +    return 0; + +err: +    STACK_UNWIND_STRICT(@NAME@, frame, -1, 0, @SHORT_ARGS@); + +    return 0; +}  | 
