diff options
| author | Ravishankar N <ravishankar@redhat.com> | 2015-03-19 12:33:51 +0000 | 
|---|---|---|
| committer | Vijay Bellur <vbellur@redhat.com> | 2015-03-19 10:45:49 -0700 | 
| commit | 32ed7aa5ad7049a9d85c795f997336c0366151a8 (patch) | |
| tree | dd5c31dea878b803f3164b4417ac6b72ded3bbbc | |
| parent | 61489c1725f048c2e34a08e73f0ab367bbf673c1 (diff) | |
afr: arbiter xlator
This patch adds the arbiter translator into the tree. This is a server
side xlator used for replica 3 volumes. It sits above posix and will be
loaded on the 3rd (last) brick of every afr subvolume in a replica 3
configuration. It intercepts inode read/write operations: reads are
unwound with ENOTCONN, inode writes are unwound with success without
actually passing them down to posix. Metadata operations are allowed to
pass through.
The CLI for creating a 3 way replica with arbiter is also added but kept
disabled (A 'normal' 3 way replica is created instead).
This patch is a part of the arbiter logic implementation for 3 way AFR,
details of which can be found at http://review.gluster.org/#/c/9656/
Change-Id: I395b81f49d5da52c466daf5c8518f1bbad9c16fa
BUG: 1199985
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
Reviewed-on: http://review.gluster.org/9840
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
| -rw-r--r-- | cli/src/cli-cmd-parser.c | 22 | ||||
| -rw-r--r-- | cli/src/cli-cmd-volume.c | 3 | ||||
| -rw-r--r-- | configure.ac | 2 | ||||
| -rw-r--r-- | xlators/features/Makefile.am | 2 | ||||
| -rw-r--r-- | xlators/features/arbiter/Makefile.am | 3 | ||||
| -rw-r--r-- | xlators/features/arbiter/src/Makefile.am | 16 | ||||
| -rw-r--r-- | xlators/features/arbiter/src/arbiter-mem-types.h | 19 | ||||
| -rw-r--r-- | xlators/features/arbiter/src/arbiter.c | 324 | ||||
| -rw-r--r-- | xlators/features/arbiter/src/arbiter.h | 26 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volgen.c | 20 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd.h | 1 | 
11 files changed, 435 insertions, 3 deletions
diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c index 54a57008457..0584b1edbbd 100644 --- a/cli/src/cli-cmd-parser.c +++ b/cli/src/cli-cmd-parser.c @@ -437,12 +437,13 @@ cli_cmd_volume_create_parse (struct cli_state *state, const char **words,          char    *bricks = NULL;          int32_t brick_count = 0;          char    *opwords[] = { "replica", "stripe", "transport", "disperse", -                               "redundancy", "disperse-data", NULL }; +                               "redundancy", "disperse-data", "arbiter", NULL };          char    *w = NULL;          char    *ptr = NULL;          int      op_count = 0;          int32_t  replica_count = 1; +        int32_t  arbiter_count = 0;          int32_t  stripe_count = 1;          int32_t  disperse_count = -1;          int32_t  redundancy_count = -1; @@ -521,6 +522,25 @@ cli_cmd_volume_create_parse (struct cli_state *state, const char **words,                                  goto out;                          index += 2; +                        if (!strcmp (words[index], "arbiter")) { +                                ret = gf_string2int (words[index+1], +                                                     &arbiter_count); +                                if (ret == -1 || arbiter_count != 1 || +                                    replica_count != 3) { +                                        cli_err ("For arbiter configuration, " +                                                 "replica count must be 3 and " +                                                 "arbiter count must be 1. " +                                                 "The 3rd brick of the replica " +                                                 "will be the arbiter."); +                                        ret = -1; +                                        goto out; +                                } +                                ret = dict_set_int32 (dict, "arbiter-count", +                                                      arbiter_count); +                                if (ret) +                                        goto out; +                                index += 2; +                        }                  } else if ((strcmp (w, "stripe")) == 0) {                          switch (type) { diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c index 3098d74491c..c6b0673bca7 100644 --- a/cli/src/cli-cmd-volume.c +++ b/cli/src/cli-cmd-volume.c @@ -2544,7 +2544,8 @@ struct cli_cmd volume_cmds[] = {            cli_cmd_volume_info_cbk,            "list information of all volumes"}, -        { "volume create <NEW-VOLNAME> [stripe <COUNT>] [replica <COUNT>] " +        { "volume create <NEW-VOLNAME> [stripe <COUNT>] " +          "[replica <COUNT> [arbiter <COUNT>]] "            "[disperse [<COUNT>]] [disperse-data <COUNT>] [redundancy <COUNT>] "            "[transport <tcp|rdma|tcp,rdma>] <NEW-BRICK>"  #ifdef HAVE_BD_XLATOR diff --git a/configure.ac b/configure.ac index e91baecb520..16d32db7958 100644 --- a/configure.ac +++ b/configure.ac @@ -115,6 +115,8 @@ AC_CONFIG_FILES([Makefile                  xlators/protocol/server/Makefile                  xlators/protocol/server/src/Makefile                  xlators/features/Makefile +                xlators/features/arbiter/Makefile +                xlators/features/arbiter/src/Makefile                  xlators/features/changelog/Makefile                  xlators/features/changelog/src/Makefile                  xlators/features/changelog/lib/Makefile diff --git a/xlators/features/Makefile.am b/xlators/features/Makefile.am index 67d1338ed9e..5f35b37bc8e 100644 --- a/xlators/features/Makefile.am +++ b/xlators/features/Makefile.am @@ -1,4 +1,4 @@ -SUBDIRS = locks quota read-only mac-compat quiesce marker index barrier \ +SUBDIRS = locks quota read-only mac-compat quiesce marker index barrier arbiter\            protect compress changelog changetimerecorder ganesha gfid-access $(GLUPY_SUBDIR) qemu-block \            upcall snapview-client snapview-server trash #path-converter # filter diff --git a/xlators/features/arbiter/Makefile.am b/xlators/features/arbiter/Makefile.am new file mode 100644 index 00000000000..a985f42a877 --- /dev/null +++ b/xlators/features/arbiter/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/features/arbiter/src/Makefile.am b/xlators/features/arbiter/src/Makefile.am new file mode 100644 index 00000000000..edec57b892c --- /dev/null +++ b/xlators/features/arbiter/src/Makefile.am @@ -0,0 +1,16 @@ +xlator_LTLIBRARIES = arbiter.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features + +arbiter_la_LDFLAGS = -module -avoid-version + +arbiter_la_SOURCES = arbiter.c +_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la +arbiter_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +noinst_HEADERS = arbiter.h arbiter-mem-types.h + +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src + +AM_CFLAGS = -Wall $(GF_CFLAGS) + +CLEANFILES = diff --git a/xlators/features/arbiter/src/arbiter-mem-types.h b/xlators/features/arbiter/src/arbiter-mem-types.h new file mode 100644 index 00000000000..200b59de695 --- /dev/null +++ b/xlators/features/arbiter/src/arbiter-mem-types.h @@ -0,0 +1,19 @@ +/* +  Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> +  This file is part of GlusterFS. +  This file is licensed to you under your choice of the GNU Lesser +  General Public License, version 3 or any later version (LGPLv3 or +  later), or the GNU General Public License, version 2 (GPLv2), in all +  cases as published by the Free Software Foundation. +*/ + +#ifndef __ARBITER_MEM_TYPES_H__ +#define __ARBITER_MEM_TYPES_H__ +#include "mem-types.h" + +typedef enum gf_arbiter_mem_types_ { +        gf_arbiter_mt_inode_ctx_t =  gf_common_mt_end + 1, +        gf_arbiter_mt_iatt, +        gf_arbiter_mt_end +} gf_arbiter_mem_types_t; +#endif diff --git a/xlators/features/arbiter/src/arbiter.c b/xlators/features/arbiter/src/arbiter.c new file mode 100644 index 00000000000..87145da5680 --- /dev/null +++ b/xlators/features/arbiter/src/arbiter.c @@ -0,0 +1,324 @@ +/* +  Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> +  This file is part of GlusterFS. + +  This file is licensed to you under your choice of the GNU Lesser +  General Public License, version 3 or any later version (LGPLv3 or +  later), or the GNU General Public License, version 2 (GPLv2), in all +  cases as published by the Free Software Foundation. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "arbiter.h" +#include "arbiter-mem-types.h" +#include "glusterfs.h" +#include "xlator.h" +#include "logging.h" + +void +arbiter_inode_ctx_destroy (arbiter_inode_ctx_t *ctx) +{ +        if (!ctx) +                return; +        GF_FREE (ctx->iattbuf); +        GF_FREE (ctx); +} + +static arbiter_inode_ctx_t * +__arbiter_inode_ctx_get (inode_t *inode, xlator_t *this) +{ + +        arbiter_inode_ctx_t *ctx = NULL; +        int ret = 0; +        uint64_t ctx_addr = 0; + +        ret = __inode_ctx_get (inode, this, &ctx_addr); +        if (ret == 0) { +                ctx = (arbiter_inode_ctx_t *) (long) ctx_addr; +                goto out; +        } + +        ctx = GF_CALLOC (1, sizeof (*ctx), gf_arbiter_mt_inode_ctx_t); +        if (!ctx) +                goto fail; +        ctx->iattbuf = GF_CALLOC (1, sizeof (*ctx->iattbuf), +                                  gf_arbiter_mt_iatt); +        if (!ctx->iattbuf) +                goto fail; +        ret = __inode_ctx_put (inode, this, (uint64_t)ctx); +        if (ret) { +                gf_log_callingfn (this->name, GF_LOG_ERROR, "failed to " +                                  "set the inode ctx (%s)", +                                  uuid_utoa (inode->gfid)); +                goto fail; +        } +out: +        return ctx; +fail: +        arbiter_inode_ctx_destroy (ctx); +        return NULL; +} + +static arbiter_inode_ctx_t * +arbiter_inode_ctx_get (inode_t *inode, xlator_t *this) +{ +        arbiter_inode_ctx_t *ctx = NULL; + +        LOCK(&inode->lock); +        { +                ctx = __arbiter_inode_ctx_get (inode, this); +        } +        UNLOCK(&inode->lock); +        return ctx; +} + +int32_t +arbiter_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +                    int32_t op_ret, int32_t op_errno, inode_t *inode, +                    struct iatt *buf, dict_t *xdata, struct iatt *postparent) +{ +        arbiter_inode_ctx_t *ctx = NULL; + +        if (op_ret != 0) +                goto unwind; +        ctx = arbiter_inode_ctx_get (inode, this); +        if (!ctx) { +                op_ret = -1; +                op_errno = ENOMEM; +                goto unwind; +        } +        memcpy (ctx->iattbuf, buf, sizeof (*ctx->iattbuf)); + +unwind: +        STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf, +                             xdata, postparent); +        return 0; +} + +int32_t +arbiter_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +{ +        STACK_WIND (frame, arbiter_lookup_cbk, FIRST_CHILD(this), +                         FIRST_CHILD(this)->fops->lookup, loc, xdata); +        return 0; +} + +int32_t +arbiter_readv (call_frame_t *frame,  xlator_t *this, fd_t *fd, size_t size, +               off_t offset, uint32_t flags, dict_t *xdata) +{ +        STACK_UNWIND_STRICT (readv, frame, -1, ENOTCONN, NULL, 0, NULL, NULL, +                             NULL); +        return 0; +} + +int32_t +arbiter_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, +                  dict_t *xdata) +{ +        arbiter_inode_ctx_t *ctx      = NULL; +        struct iatt         *buf      = NULL; +        int32_t              op_ret   = 0; +        int32_t              op_errno = 0; + +        ctx = arbiter_inode_ctx_get (loc->inode, this); +        if (!ctx) { +                op_ret = -1; +                op_errno = ENOMEM; +                goto unwind; +        } +        buf = ctx->iattbuf; +unwind: +        STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, buf, buf, +                             xdata); +        return 0; +} + +int32_t +arbiter_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, +                   dict_t *xdata) + +{ +        arbiter_inode_ctx_t *ctx      = NULL; +        struct iatt         *buf      = NULL; +        int32_t              op_ret   = 0; +        int32_t              op_errno = 0; + +        ctx = arbiter_inode_ctx_get (fd->inode, this); +        if (!ctx) { +                op_ret = -1; +                op_errno = ENOMEM; +                goto unwind; +        } +        buf = ctx->iattbuf; +unwind: +        STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, buf, buf, +                             xdata); +        return 0; +} + +int32_t +arbiter_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, +                struct iovec *vector, int32_t count, off_t off, uint32_t flags, +                struct iobref *iobref, dict_t *xdata) +{ +        arbiter_inode_ctx_t *ctx      = NULL; +        struct iatt         *buf      = NULL; +        int                  op_ret   = 0; +        int                  op_errno = 0; + +        ctx = arbiter_inode_ctx_get (fd->inode, this); +        if (!ctx) { +                op_ret = -1; +                op_errno = ENOMEM; +                goto unwind; +        } +        buf = ctx->iattbuf; +        op_ret = iov_length (vector, count); +unwind: +        STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, buf, buf, xdata); +        return 0; +} + +int32_t +arbiter_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd, +                  int32_t keep_size, off_t offset, size_t len, dict_t *xdata) +{ +        arbiter_inode_ctx_t *ctx      = NULL; +        struct iatt         *buf      = NULL; +        int                  op_ret   = 0; +        int                  op_errno = 0; + +        ctx = arbiter_inode_ctx_get (fd->inode, this); +        if (!ctx) { +                op_ret = -1; +                op_errno = ENOMEM; +                goto unwind; +        } +        buf = ctx->iattbuf; +unwind: +        STACK_UNWIND_STRICT(fallocate, frame, op_ret, op_errno, buf, buf, +                            xdata); +        return 0; +} + +int32_t +arbiter_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, +                off_t offset, size_t len, dict_t *xdata) +{ +        arbiter_inode_ctx_t *ctx      = NULL; +        struct iatt         *buf      = NULL; +        int                  op_ret   = 0; +        int                  op_errno = 0; + +        ctx = arbiter_inode_ctx_get (fd->inode, this); +        if (!ctx) { +                op_ret = -1; +                op_errno = ENOMEM; +                goto unwind; +        } +        buf = ctx->iattbuf; +unwind: +        STACK_UNWIND_STRICT(discard, frame, op_ret, op_errno, buf, buf, xdata); +        return 0; +} + +int32_t +arbiter_zerofill (call_frame_t *frame, xlator_t *this, fd_t *fd, +                  off_t offset, off_t len, dict_t *xdata) +{ +        arbiter_inode_ctx_t *ctx      = NULL; +        struct iatt         *buf      = NULL; +        int                  op_ret   = 0; +        int                  op_errno = 0; + +        ctx = arbiter_inode_ctx_get (fd->inode, this); +        if (!ctx) { +                op_ret = -1; +                op_errno = ENOMEM; +                goto unwind; +        } +        buf = ctx->iattbuf; +unwind: +        STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, buf, buf, xdata); +        return 0; +} + +int32_t +mem_acct_init (xlator_t *this) +{ +        int ret = -1; + +        ret = xlator_mem_acct_init (this, gf_arbiter_mt_end + 1); +        if (ret) +                gf_log (this->name, GF_LOG_ERROR, "Memory accounting " +                        "initialization failed."); +        return ret; +} + +int +reconfigure (xlator_t *this, dict_t *options) +{ + +        return 0; +} + +int +arbiter_forget (xlator_t *this, inode_t *inode) +{ +        arbiter_inode_ctx_t *ctx = NULL; +        uint64_t ctx_addr = 0; + +        inode_ctx_del (inode, this, &ctx_addr); +        if (!ctx_addr) +                return 0; +        ctx = (arbiter_inode_ctx_t *) (long) ctx_addr; +        GF_FREE (ctx); +        return 0; +} + +int32_t +init (xlator_t *this) +{ + +        if (!this->children || this->children->next) { +                gf_log (this->name, GF_LOG_ERROR, +                        "'arbiter' not configured with exactly one child"); +                return -1; +        } + +        if (!this->parents) +                gf_log (this->name, GF_LOG_ERROR, +                        "dangling volume. check volfile "); + +        return 0; +} + +void +fini (xlator_t *this) +{ +        return; +} + +struct xlator_fops fops = { +        .lookup = arbiter_lookup, +        .readv  = arbiter_readv, +        .truncate = arbiter_truncate, +        .writev = arbiter_writev, +        .ftruncate = arbiter_ftruncate, +        .fallocate = arbiter_fallocate, +        .discard = arbiter_discard, +        .zerofill = arbiter_zerofill, +}; + +struct xlator_cbks cbks = { +        .forget = arbiter_forget, +}; + +struct volume_options options[] = { +        { .key  = {NULL} }, +}; diff --git a/xlators/features/arbiter/src/arbiter.h b/xlators/features/arbiter/src/arbiter.h new file mode 100644 index 00000000000..69ce9cb4fa3 --- /dev/null +++ b/xlators/features/arbiter/src/arbiter.h @@ -0,0 +1,26 @@ +/* +  Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> +  This file is part of GlusterFS. + +  This file is licensed to you under your choice of the GNU Lesser +  General Public License, version 3 or any later version (LGPLv3 or +  later), or the GNU General Public License, version 2 (GPLv2), in all +  cases as published by the Free Software Foundation. +*/ + +#ifndef _ARBITER_H +#define _ARBITER_H + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "locking.h" +#include "common-utils.h" + +typedef struct arbiter_inode_ctx_ { +        struct iatt *iattbuf; +} arbiter_inode_ctx_t; + +#endif /* _ARBITER_H */ diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index 79da432bafe..6f6d1095edb 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -1496,6 +1496,25 @@ out:  }  static int +brick_graph_add_arbiter (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, +                         dict_t *set_dict, glusterd_brickinfo_t *brickinfo) +{ +        xlator_t *xl = NULL; +        int ret = -1; + +        if (volinfo->arbiter_count != 1) +                return 0; +        /*TODO: Parse brickinfo and add the arbiter xlator only if brick is the +         * last brick (i.e. 3rd brick) of the replcia pair.*/ +        xl = volgen_graph_add (graph, "features/arbiter", volinfo->volname); +        if (!xl) +                goto out; +        ret = 0; +out: +        return ret; +} + +static int  brick_graph_add_bd (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,                       dict_t *set_dict, glusterd_brickinfo_t *brickinfo)  { @@ -2140,6 +2159,7 @@ static volgen_brick_xlator_t server_graph_table[] = {          {brick_graph_add_changetimerecorder, "changetimerecorder"},          {brick_graph_add_bd, "bd"},          {brick_graph_add_trash, "trash"}, +        {brick_graph_add_arbiter, "arbiter"},          {brick_graph_add_posix, "posix"},  }; diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index bac1598598b..2dd6348ac7f 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -340,6 +340,7 @@ struct glusterd_volinfo_ {          int                       sub_count;  /* backward compatibility */          int                       stripe_count;          int                       replica_count; +        int                       arbiter_count;          int                       disperse_count;          int                       redundancy_count;          int                       subvol_count; /* Number of subvolumes in a  | 
