summaryrefslogtreecommitdiffstats
path: root/xlators
diff options
context:
space:
mode:
authorRavishankar N <ravishankar@redhat.com>2015-03-19 12:33:51 +0000
committerVijay Bellur <vbellur@redhat.com>2015-03-19 10:45:49 -0700
commit32ed7aa5ad7049a9d85c795f997336c0366151a8 (patch)
treedd5c31dea878b803f3164b4417ac6b72ded3bbbc /xlators
parent61489c1725f048c2e34a08e73f0ab367bbf673c1 (diff)
afr: arbiter xlator
This patch adds the arbiter translator into the tree. This is a server side xlator used for replica 3 volumes. It sits above posix and will be loaded on the 3rd (last) brick of every afr subvolume in a replica 3 configuration. It intercepts inode read/write operations: reads are unwound with ENOTCONN, inode writes are unwound with success without actually passing them down to posix. Metadata operations are allowed to pass through. The CLI for creating a 3 way replica with arbiter is also added but kept disabled (A 'normal' 3 way replica is created instead). This patch is a part of the arbiter logic implementation for 3 way AFR, details of which can be found at http://review.gluster.org/#/c/9656/ Change-Id: I395b81f49d5da52c466daf5c8518f1bbad9c16fa BUG: 1199985 Signed-off-by: Ravishankar N <ravishankar@redhat.com> Reviewed-on: http://review.gluster.org/9840 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com> Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'xlators')
-rw-r--r--xlators/features/Makefile.am2
-rw-r--r--xlators/features/arbiter/Makefile.am3
-rw-r--r--xlators/features/arbiter/src/Makefile.am16
-rw-r--r--xlators/features/arbiter/src/arbiter-mem-types.h19
-rw-r--r--xlators/features/arbiter/src/arbiter.c324
-rw-r--r--xlators/features/arbiter/src/arbiter.h26
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.c20
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h1
8 files changed, 410 insertions, 1 deletions
diff --git a/xlators/features/Makefile.am b/xlators/features/Makefile.am
index 67d1338ed9e..5f35b37bc8e 100644
--- a/xlators/features/Makefile.am
+++ b/xlators/features/Makefile.am
@@ -1,4 +1,4 @@
-SUBDIRS = locks quota read-only mac-compat quiesce marker index barrier \
+SUBDIRS = locks quota read-only mac-compat quiesce marker index barrier arbiter\
protect compress changelog changetimerecorder ganesha gfid-access $(GLUPY_SUBDIR) qemu-block \
upcall snapview-client snapview-server trash #path-converter # filter
diff --git a/xlators/features/arbiter/Makefile.am b/xlators/features/arbiter/Makefile.am
new file mode 100644
index 00000000000..a985f42a877
--- /dev/null
+++ b/xlators/features/arbiter/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/features/arbiter/src/Makefile.am b/xlators/features/arbiter/src/Makefile.am
new file mode 100644
index 00000000000..edec57b892c
--- /dev/null
+++ b/xlators/features/arbiter/src/Makefile.am
@@ -0,0 +1,16 @@
+xlator_LTLIBRARIES = arbiter.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+arbiter_la_LDFLAGS = -module -avoid-version
+
+arbiter_la_SOURCES = arbiter.c
+_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+arbiter_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = arbiter.h arbiter-mem-types.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/arbiter/src/arbiter-mem-types.h b/xlators/features/arbiter/src/arbiter-mem-types.h
new file mode 100644
index 00000000000..200b59de695
--- /dev/null
+++ b/xlators/features/arbiter/src/arbiter-mem-types.h
@@ -0,0 +1,19 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __ARBITER_MEM_TYPES_H__
+#define __ARBITER_MEM_TYPES_H__
+#include "mem-types.h"
+
+typedef enum gf_arbiter_mem_types_ {
+ gf_arbiter_mt_inode_ctx_t = gf_common_mt_end + 1,
+ gf_arbiter_mt_iatt,
+ gf_arbiter_mt_end
+} gf_arbiter_mem_types_t;
+#endif
diff --git a/xlators/features/arbiter/src/arbiter.c b/xlators/features/arbiter/src/arbiter.c
new file mode 100644
index 00000000000..87145da5680
--- /dev/null
+++ b/xlators/features/arbiter/src/arbiter.c
@@ -0,0 +1,324 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "arbiter.h"
+#include "arbiter-mem-types.h"
+#include "glusterfs.h"
+#include "xlator.h"
+#include "logging.h"
+
+void
+arbiter_inode_ctx_destroy (arbiter_inode_ctx_t *ctx)
+{
+ if (!ctx)
+ return;
+ GF_FREE (ctx->iattbuf);
+ GF_FREE (ctx);
+}
+
+static arbiter_inode_ctx_t *
+__arbiter_inode_ctx_get (inode_t *inode, xlator_t *this)
+{
+
+ arbiter_inode_ctx_t *ctx = NULL;
+ int ret = 0;
+ uint64_t ctx_addr = 0;
+
+ ret = __inode_ctx_get (inode, this, &ctx_addr);
+ if (ret == 0) {
+ ctx = (arbiter_inode_ctx_t *) (long) ctx_addr;
+ goto out;
+ }
+
+ ctx = GF_CALLOC (1, sizeof (*ctx), gf_arbiter_mt_inode_ctx_t);
+ if (!ctx)
+ goto fail;
+ ctx->iattbuf = GF_CALLOC (1, sizeof (*ctx->iattbuf),
+ gf_arbiter_mt_iatt);
+ if (!ctx->iattbuf)
+ goto fail;
+ ret = __inode_ctx_put (inode, this, (uint64_t)ctx);
+ if (ret) {
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "failed to "
+ "set the inode ctx (%s)",
+ uuid_utoa (inode->gfid));
+ goto fail;
+ }
+out:
+ return ctx;
+fail:
+ arbiter_inode_ctx_destroy (ctx);
+ return NULL;
+}
+
+static arbiter_inode_ctx_t *
+arbiter_inode_ctx_get (inode_t *inode, xlator_t *this)
+{
+ arbiter_inode_ctx_t *ctx = NULL;
+
+ LOCK(&inode->lock);
+ {
+ ctx = __arbiter_inode_ctx_get (inode, this);
+ }
+ UNLOCK(&inode->lock);
+ return ctx;
+}
+
+int32_t
+arbiter_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
+{
+ arbiter_inode_ctx_t *ctx = NULL;
+
+ if (op_ret != 0)
+ goto unwind;
+ ctx = arbiter_inode_ctx_get (inode, this);
+ if (!ctx) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ memcpy (ctx->iattbuf, buf, sizeof (*ctx->iattbuf));
+
+unwind:
+ STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf,
+ xdata, postparent);
+ return 0;
+}
+
+int32_t
+arbiter_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ STACK_WIND (frame, arbiter_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
+ return 0;
+}
+
+int32_t
+arbiter_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (readv, frame, -1, ENOTCONN, NULL, 0, NULL, NULL,
+ NULL);
+ return 0;
+}
+
+int32_t
+arbiter_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
+{
+ arbiter_inode_ctx_t *ctx = NULL;
+ struct iatt *buf = NULL;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+
+ ctx = arbiter_inode_ctx_get (loc->inode, this);
+ if (!ctx) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ buf = ctx->iattbuf;
+unwind:
+ STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, buf, buf,
+ xdata);
+ return 0;
+}
+
+int32_t
+arbiter_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+
+{
+ arbiter_inode_ctx_t *ctx = NULL;
+ struct iatt *buf = NULL;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+
+ ctx = arbiter_inode_ctx_get (fd->inode, this);
+ if (!ctx) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ buf = ctx->iattbuf;
+unwind:
+ STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, buf, buf,
+ xdata);
+ return 0;
+}
+
+int32_t
+arbiter_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t off, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
+{
+ arbiter_inode_ctx_t *ctx = NULL;
+ struct iatt *buf = NULL;
+ int op_ret = 0;
+ int op_errno = 0;
+
+ ctx = arbiter_inode_ctx_get (fd->inode, this);
+ if (!ctx) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ buf = ctx->iattbuf;
+ op_ret = iov_length (vector, count);
+unwind:
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, buf, buf, xdata);
+ return 0;
+}
+
+int32_t
+arbiter_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t keep_size, off_t offset, size_t len, dict_t *xdata)
+{
+ arbiter_inode_ctx_t *ctx = NULL;
+ struct iatt *buf = NULL;
+ int op_ret = 0;
+ int op_errno = 0;
+
+ ctx = arbiter_inode_ctx_get (fd->inode, this);
+ if (!ctx) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ buf = ctx->iattbuf;
+unwind:
+ STACK_UNWIND_STRICT(fallocate, frame, op_ret, op_errno, buf, buf,
+ xdata);
+ return 0;
+}
+
+int32_t
+arbiter_discard (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ arbiter_inode_ctx_t *ctx = NULL;
+ struct iatt *buf = NULL;
+ int op_ret = 0;
+ int op_errno = 0;
+
+ ctx = arbiter_inode_ctx_get (fd->inode, this);
+ if (!ctx) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ buf = ctx->iattbuf;
+unwind:
+ STACK_UNWIND_STRICT(discard, frame, op_ret, op_errno, buf, buf, xdata);
+ return 0;
+}
+
+int32_t
+arbiter_zerofill (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, off_t len, dict_t *xdata)
+{
+ arbiter_inode_ctx_t *ctx = NULL;
+ struct iatt *buf = NULL;
+ int op_ret = 0;
+ int op_errno = 0;
+
+ ctx = arbiter_inode_ctx_get (fd->inode, this);
+ if (!ctx) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ buf = ctx->iattbuf;
+unwind:
+ STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, buf, buf, xdata);
+ return 0;
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ ret = xlator_mem_acct_init (this, gf_arbiter_mt_end + 1);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting "
+ "initialization failed.");
+ return ret;
+}
+
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+
+ return 0;
+}
+
+int
+arbiter_forget (xlator_t *this, inode_t *inode)
+{
+ arbiter_inode_ctx_t *ctx = NULL;
+ uint64_t ctx_addr = 0;
+
+ inode_ctx_del (inode, this, &ctx_addr);
+ if (!ctx_addr)
+ return 0;
+ ctx = (arbiter_inode_ctx_t *) (long) ctx_addr;
+ GF_FREE (ctx);
+ return 0;
+}
+
+int32_t
+init (xlator_t *this)
+{
+
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "'arbiter' not configured with exactly one child");
+ return -1;
+ }
+
+ if (!this->parents)
+ gf_log (this->name, GF_LOG_ERROR,
+ "dangling volume. check volfile ");
+
+ return 0;
+}
+
+void
+fini (xlator_t *this)
+{
+ return;
+}
+
+struct xlator_fops fops = {
+ .lookup = arbiter_lookup,
+ .readv = arbiter_readv,
+ .truncate = arbiter_truncate,
+ .writev = arbiter_writev,
+ .ftruncate = arbiter_ftruncate,
+ .fallocate = arbiter_fallocate,
+ .discard = arbiter_discard,
+ .zerofill = arbiter_zerofill,
+};
+
+struct xlator_cbks cbks = {
+ .forget = arbiter_forget,
+};
+
+struct volume_options options[] = {
+ { .key = {NULL} },
+};
diff --git a/xlators/features/arbiter/src/arbiter.h b/xlators/features/arbiter/src/arbiter.h
new file mode 100644
index 00000000000..69ce9cb4fa3
--- /dev/null
+++ b/xlators/features/arbiter/src/arbiter.h
@@ -0,0 +1,26 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _ARBITER_H
+#define _ARBITER_H
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "locking.h"
+#include "common-utils.h"
+
+typedef struct arbiter_inode_ctx_ {
+ struct iatt *iattbuf;
+} arbiter_inode_ctx_t;
+
+#endif /* _ARBITER_H */
diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
index 79da432bafe..6f6d1095edb 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
@@ -1496,6 +1496,25 @@ out:
}
static int
+brick_graph_add_arbiter (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ dict_t *set_dict, glusterd_brickinfo_t *brickinfo)
+{
+ xlator_t *xl = NULL;
+ int ret = -1;
+
+ if (volinfo->arbiter_count != 1)
+ return 0;
+ /*TODO: Parse brickinfo and add the arbiter xlator only if brick is the
+ * last brick (i.e. 3rd brick) of the replcia pair.*/
+ xl = volgen_graph_add (graph, "features/arbiter", volinfo->volname);
+ if (!xl)
+ goto out;
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
brick_graph_add_bd (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
dict_t *set_dict, glusterd_brickinfo_t *brickinfo)
{
@@ -2140,6 +2159,7 @@ static volgen_brick_xlator_t server_graph_table[] = {
{brick_graph_add_changetimerecorder, "changetimerecorder"},
{brick_graph_add_bd, "bd"},
{brick_graph_add_trash, "trash"},
+ {brick_graph_add_arbiter, "arbiter"},
{brick_graph_add_posix, "posix"},
};
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
index bac1598598b..2dd6348ac7f 100644
--- a/xlators/mgmt/glusterd/src/glusterd.h
+++ b/xlators/mgmt/glusterd/src/glusterd.h
@@ -340,6 +340,7 @@ struct glusterd_volinfo_ {
int sub_count; /* backward compatibility */
int stripe_count;
int replica_count;
+ int arbiter_count;
int disperse_count;
int redundancy_count;
int subvol_count; /* Number of subvolumes in a