From 32ed7aa5ad7049a9d85c795f997336c0366151a8 Mon Sep 17 00:00:00 2001 From: Ravishankar N Date: Thu, 19 Mar 2015 12:33:51 +0000 Subject: afr: arbiter xlator This patch adds the arbiter translator into the tree. This is a server side xlator used for replica 3 volumes. It sits above posix and will be loaded on the 3rd (last) brick of every afr subvolume in a replica 3 configuration. It intercepts inode read/write operations: reads are unwound with ENOTCONN, inode writes are unwound with success without actually passing them down to posix. Metadata operations are allowed to pass through. The CLI for creating a 3 way replica with arbiter is also added but kept disabled (A 'normal' 3 way replica is created instead). This patch is a part of the arbiter logic implementation for 3 way AFR, details of which can be found at http://review.gluster.org/#/c/9656/ Change-Id: I395b81f49d5da52c466daf5c8518f1bbad9c16fa BUG: 1199985 Signed-off-by: Ravishankar N Reviewed-on: http://review.gluster.org/9840 Tested-by: Gluster Build System Reviewed-by: Pranith Kumar Karampuri Reviewed-by: Vijay Bellur --- xlators/features/arbiter/Makefile.am | 3 + xlators/features/arbiter/src/Makefile.am | 16 ++ xlators/features/arbiter/src/arbiter-mem-types.h | 19 ++ xlators/features/arbiter/src/arbiter.c | 324 +++++++++++++++++++++++ xlators/features/arbiter/src/arbiter.h | 26 ++ 5 files changed, 388 insertions(+) create mode 100644 xlators/features/arbiter/Makefile.am create mode 100644 xlators/features/arbiter/src/Makefile.am create mode 100644 xlators/features/arbiter/src/arbiter-mem-types.h create mode 100644 xlators/features/arbiter/src/arbiter.c create mode 100644 xlators/features/arbiter/src/arbiter.h (limited to 'xlators/features/arbiter') diff --git a/xlators/features/arbiter/Makefile.am b/xlators/features/arbiter/Makefile.am new file mode 100644 index 00000000000..a985f42a877 --- /dev/null +++ b/xlators/features/arbiter/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/features/arbiter/src/Makefile.am b/xlators/features/arbiter/src/Makefile.am new file mode 100644 index 00000000000..edec57b892c --- /dev/null +++ b/xlators/features/arbiter/src/Makefile.am @@ -0,0 +1,16 @@ +xlator_LTLIBRARIES = arbiter.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features + +arbiter_la_LDFLAGS = -module -avoid-version + +arbiter_la_SOURCES = arbiter.c +_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la +arbiter_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +noinst_HEADERS = arbiter.h arbiter-mem-types.h + +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src + +AM_CFLAGS = -Wall $(GF_CFLAGS) + +CLEANFILES = diff --git a/xlators/features/arbiter/src/arbiter-mem-types.h b/xlators/features/arbiter/src/arbiter-mem-types.h new file mode 100644 index 00000000000..200b59de695 --- /dev/null +++ b/xlators/features/arbiter/src/arbiter-mem-types.h @@ -0,0 +1,19 @@ +/* + Copyright (c) 2015 Red Hat, Inc. + This file is part of GlusterFS. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __ARBITER_MEM_TYPES_H__ +#define __ARBITER_MEM_TYPES_H__ +#include "mem-types.h" + +typedef enum gf_arbiter_mem_types_ { + gf_arbiter_mt_inode_ctx_t = gf_common_mt_end + 1, + gf_arbiter_mt_iatt, + gf_arbiter_mt_end +} gf_arbiter_mem_types_t; +#endif diff --git a/xlators/features/arbiter/src/arbiter.c b/xlators/features/arbiter/src/arbiter.c new file mode 100644 index 00000000000..87145da5680 --- /dev/null +++ b/xlators/features/arbiter/src/arbiter.c @@ -0,0 +1,324 @@ +/* + Copyright (c) 2015 Red Hat, Inc. + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "arbiter.h" +#include "arbiter-mem-types.h" +#include "glusterfs.h" +#include "xlator.h" +#include "logging.h" + +void +arbiter_inode_ctx_destroy (arbiter_inode_ctx_t *ctx) +{ + if (!ctx) + return; + GF_FREE (ctx->iattbuf); + GF_FREE (ctx); +} + +static arbiter_inode_ctx_t * +__arbiter_inode_ctx_get (inode_t *inode, xlator_t *this) +{ + + arbiter_inode_ctx_t *ctx = NULL; + int ret = 0; + uint64_t ctx_addr = 0; + + ret = __inode_ctx_get (inode, this, &ctx_addr); + if (ret == 0) { + ctx = (arbiter_inode_ctx_t *) (long) ctx_addr; + goto out; + } + + ctx = GF_CALLOC (1, sizeof (*ctx), gf_arbiter_mt_inode_ctx_t); + if (!ctx) + goto fail; + ctx->iattbuf = GF_CALLOC (1, sizeof (*ctx->iattbuf), + gf_arbiter_mt_iatt); + if (!ctx->iattbuf) + goto fail; + ret = __inode_ctx_put (inode, this, (uint64_t)ctx); + if (ret) { + gf_log_callingfn (this->name, GF_LOG_ERROR, "failed to " + "set the inode ctx (%s)", + uuid_utoa (inode->gfid)); + goto fail; + } +out: + return ctx; +fail: + arbiter_inode_ctx_destroy (ctx); + return NULL; +} + +static arbiter_inode_ctx_t * +arbiter_inode_ctx_get (inode_t *inode, xlator_t *this) +{ + arbiter_inode_ctx_t *ctx = NULL; + + LOCK(&inode->lock); + { + ctx = __arbiter_inode_ctx_get (inode, this); + } + UNLOCK(&inode->lock); + return ctx; +} + +int32_t +arbiter_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, struct iatt *postparent) +{ + arbiter_inode_ctx_t *ctx = NULL; + + if (op_ret != 0) + goto unwind; + ctx = arbiter_inode_ctx_get (inode, this); + if (!ctx) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + memcpy (ctx->iattbuf, buf, sizeof (*ctx->iattbuf)); + +unwind: + STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf, + xdata, postparent); + return 0; +} + +int32_t +arbiter_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +{ + STACK_WIND (frame, arbiter_lookup_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, loc, xdata); + return 0; +} + +int32_t +arbiter_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) +{ + STACK_UNWIND_STRICT (readv, frame, -1, ENOTCONN, NULL, 0, NULL, NULL, + NULL); + return 0; +} + +int32_t +arbiter_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) +{ + arbiter_inode_ctx_t *ctx = NULL; + struct iatt *buf = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + + ctx = arbiter_inode_ctx_get (loc->inode, this); + if (!ctx) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + buf = ctx->iattbuf; +unwind: + STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, buf, buf, + xdata); + return 0; +} + +int32_t +arbiter_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) + +{ + arbiter_inode_ctx_t *ctx = NULL; + struct iatt *buf = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + + ctx = arbiter_inode_ctx_get (fd->inode, this); + if (!ctx) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + buf = ctx->iattbuf; +unwind: + STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, buf, buf, + xdata); + return 0; +} + +int32_t +arbiter_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iovec *vector, int32_t count, off_t off, uint32_t flags, + struct iobref *iobref, dict_t *xdata) +{ + arbiter_inode_ctx_t *ctx = NULL; + struct iatt *buf = NULL; + int op_ret = 0; + int op_errno = 0; + + ctx = arbiter_inode_ctx_get (fd->inode, this); + if (!ctx) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + buf = ctx->iattbuf; + op_ret = iov_length (vector, count); +unwind: + STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, buf, buf, xdata); + return 0; +} + +int32_t +arbiter_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd, + int32_t keep_size, off_t offset, size_t len, dict_t *xdata) +{ + arbiter_inode_ctx_t *ctx = NULL; + struct iatt *buf = NULL; + int op_ret = 0; + int op_errno = 0; + + ctx = arbiter_inode_ctx_get (fd->inode, this); + if (!ctx) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + buf = ctx->iattbuf; +unwind: + STACK_UNWIND_STRICT(fallocate, frame, op_ret, op_errno, buf, buf, + xdata); + return 0; +} + +int32_t +arbiter_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, + off_t offset, size_t len, dict_t *xdata) +{ + arbiter_inode_ctx_t *ctx = NULL; + struct iatt *buf = NULL; + int op_ret = 0; + int op_errno = 0; + + ctx = arbiter_inode_ctx_get (fd->inode, this); + if (!ctx) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + buf = ctx->iattbuf; +unwind: + STACK_UNWIND_STRICT(discard, frame, op_ret, op_errno, buf, buf, xdata); + return 0; +} + +int32_t +arbiter_zerofill (call_frame_t *frame, xlator_t *this, fd_t *fd, + off_t offset, off_t len, dict_t *xdata) +{ + arbiter_inode_ctx_t *ctx = NULL; + struct iatt *buf = NULL; + int op_ret = 0; + int op_errno = 0; + + ctx = arbiter_inode_ctx_get (fd->inode, this); + if (!ctx) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + buf = ctx->iattbuf; +unwind: + STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, buf, buf, xdata); + return 0; +} + +int32_t +mem_acct_init (xlator_t *this) +{ + int ret = -1; + + ret = xlator_mem_acct_init (this, gf_arbiter_mt_end + 1); + if (ret) + gf_log (this->name, GF_LOG_ERROR, "Memory accounting " + "initialization failed."); + return ret; +} + +int +reconfigure (xlator_t *this, dict_t *options) +{ + + return 0; +} + +int +arbiter_forget (xlator_t *this, inode_t *inode) +{ + arbiter_inode_ctx_t *ctx = NULL; + uint64_t ctx_addr = 0; + + inode_ctx_del (inode, this, &ctx_addr); + if (!ctx_addr) + return 0; + ctx = (arbiter_inode_ctx_t *) (long) ctx_addr; + GF_FREE (ctx); + return 0; +} + +int32_t +init (xlator_t *this) +{ + + if (!this->children || this->children->next) { + gf_log (this->name, GF_LOG_ERROR, + "'arbiter' not configured with exactly one child"); + return -1; + } + + if (!this->parents) + gf_log (this->name, GF_LOG_ERROR, + "dangling volume. check volfile "); + + return 0; +} + +void +fini (xlator_t *this) +{ + return; +} + +struct xlator_fops fops = { + .lookup = arbiter_lookup, + .readv = arbiter_readv, + .truncate = arbiter_truncate, + .writev = arbiter_writev, + .ftruncate = arbiter_ftruncate, + .fallocate = arbiter_fallocate, + .discard = arbiter_discard, + .zerofill = arbiter_zerofill, +}; + +struct xlator_cbks cbks = { + .forget = arbiter_forget, +}; + +struct volume_options options[] = { + { .key = {NULL} }, +}; diff --git a/xlators/features/arbiter/src/arbiter.h b/xlators/features/arbiter/src/arbiter.h new file mode 100644 index 00000000000..69ce9cb4fa3 --- /dev/null +++ b/xlators/features/arbiter/src/arbiter.h @@ -0,0 +1,26 @@ +/* + Copyright (c) 2015 Red Hat, Inc. + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _ARBITER_H +#define _ARBITER_H + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "locking.h" +#include "common-utils.h" + +typedef struct arbiter_inode_ctx_ { + struct iatt *iattbuf; +} arbiter_inode_ctx_t; + +#endif /* _ARBITER_H */ -- cgit