diff options
Diffstat (limited to 'xlators/features')
290 files changed, 75382 insertions, 63980 deletions
diff --git a/xlators/features/Makefile.am b/xlators/features/Makefile.am index 7e5783f4f30..c57897f11ea 100644 --- a/xlators/features/Makefile.am +++ b/xlators/features/Makefile.am @@ -1,5 +1,14 @@ -SUBDIRS = locks quota read-only mac-compat quiesce marker index barrier arbiter\ - protect compress changelog changetimerecorder ganesha gfid-access $(GLUPY_SUBDIR) qemu-block \ - upcall snapview-client snapview-server trash shard bit-rot #path-converter # filter +if BUILD_CLOUDSYNC + CLOUDSYNC_DIR = cloudsync +endif + +if BUILD_METADISP + METADISP_DIR = metadisp +endif + +SUBDIRS = locks quota read-only quiesce marker index barrier arbiter upcall \ + compress changelog gfid-access snapview-client snapview-server trash \ + shard bit-rot leases selinux sdfs namespace $(CLOUDSYNC_DIR) thin-arbiter \ + utime $(METADISP_DIR) CLEANFILES = diff --git a/xlators/features/arbiter/src/Makefile.am b/xlators/features/arbiter/src/Makefile.am index 328e08cda9c..badc42f37be 100644 --- a/xlators/features/arbiter/src/Makefile.am +++ b/xlators/features/arbiter/src/Makefile.am @@ -1,14 +1,18 @@ +if WITH_SERVER xlator_LTLIBRARIES = arbiter.la +endif + xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features -arbiter_la_LDFLAGS = $(GF_XLATOR_DEFAULT_LDFLAGS) +arbiter_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) arbiter_la_SOURCES = arbiter.c arbiter_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la noinst_HEADERS = arbiter.h arbiter-mem-types.h -AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ + -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src AM_CFLAGS = -Wall $(GF_CFLAGS) diff --git a/xlators/features/arbiter/src/arbiter-mem-types.h b/xlators/features/arbiter/src/arbiter-mem-types.h index 200b59de695..05d18374c46 100644 --- a/xlators/features/arbiter/src/arbiter-mem-types.h +++ b/xlators/features/arbiter/src/arbiter-mem-types.h @@ -9,11 +9,10 @@ #ifndef __ARBITER_MEM_TYPES_H__ #define __ARBITER_MEM_TYPES_H__ -#include "mem-types.h" +#include <glusterfs/mem-types.h> typedef enum gf_arbiter_mem_types_ { - gf_arbiter_mt_inode_ctx_t = gf_common_mt_end + 1, - gf_arbiter_mt_iatt, - gf_arbiter_mt_end + gf_arbiter_mt_inode_ctx_t = gf_common_mt_end + 1, + gf_arbiter_mt_end } gf_arbiter_mem_types_t; #endif diff --git a/xlators/features/arbiter/src/arbiter.c b/xlators/features/arbiter/src/arbiter.c index b404597be9d..83a97e3354b 100644 --- a/xlators/features/arbiter/src/arbiter.c +++ b/xlators/features/arbiter/src/arbiter.c @@ -10,310 +10,371 @@ #include "arbiter.h" #include "arbiter-mem-types.h" -#include "glusterfs.h" -#include "xlator.h" -#include "logging.h" - -void -arbiter_inode_ctx_destroy (arbiter_inode_ctx_t *ctx) -{ - if (!ctx) - return; - GF_FREE (ctx->iattbuf); - GF_FREE (ctx); -} +#include <glusterfs/glusterfs.h> +#include <glusterfs/xlator.h> +#include <glusterfs/logging.h> static arbiter_inode_ctx_t * -__arbiter_inode_ctx_get (inode_t *inode, xlator_t *this) +__arbiter_inode_ctx_get(inode_t *inode, xlator_t *this) { - - arbiter_inode_ctx_t *ctx = NULL; - int ret = 0; - uint64_t ctx_addr = 0; - - ret = __inode_ctx_get (inode, this, &ctx_addr); - if (ret == 0) { - ctx = (arbiter_inode_ctx_t *) (long) ctx_addr; - goto out; - } - - ctx = GF_CALLOC (1, sizeof (*ctx), gf_arbiter_mt_inode_ctx_t); - if (!ctx) - goto fail; - ctx->iattbuf = GF_CALLOC (1, sizeof (*ctx->iattbuf), - gf_arbiter_mt_iatt); - if (!ctx->iattbuf) - goto fail; - ret = __inode_ctx_put (inode, this, (uint64_t)ctx); - if (ret) { - gf_log_callingfn (this->name, GF_LOG_ERROR, "failed to " - "set the inode ctx (%s)", - uuid_utoa (inode->gfid)); - goto fail; - } + arbiter_inode_ctx_t *ctx = NULL; + int ret = 0; + uint64_t ctx_addr = 0; + + ret = __inode_ctx_get(inode, this, &ctx_addr); + if (ret == 0) { + ctx = (arbiter_inode_ctx_t *)(long)ctx_addr; + goto out; + } + + ctx = GF_CALLOC(1, sizeof(*ctx), gf_arbiter_mt_inode_ctx_t); + if (!ctx) + goto out; + + ret = __inode_ctx_put(inode, this, (uint64_t)(uintptr_t)ctx); + if (ret) { + GF_FREE(ctx); + ctx = NULL; + gf_log_callingfn(this->name, GF_LOG_ERROR, + "failed to " + "set the inode ctx (%s)", + uuid_utoa(inode->gfid)); + } out: - return ctx; -fail: - arbiter_inode_ctx_destroy (ctx); - return NULL; + return ctx; } static arbiter_inode_ctx_t * -arbiter_inode_ctx_get (inode_t *inode, xlator_t *this) +arbiter_inode_ctx_get(inode_t *inode, xlator_t *this) { - arbiter_inode_ctx_t *ctx = NULL; - - LOCK(&inode->lock); - { - ctx = __arbiter_inode_ctx_get (inode, this); - } - UNLOCK(&inode->lock); - return ctx; + arbiter_inode_ctx_t *ctx = NULL; + + LOCK(&inode->lock); + { + ctx = __arbiter_inode_ctx_get(inode, this); + } + UNLOCK(&inode->lock); + return ctx; } int32_t -arbiter_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *xdata, struct iatt *postparent) +arbiter_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, struct iatt *postparent) { - arbiter_inode_ctx_t *ctx = NULL; - - if (op_ret != 0) - goto unwind; - ctx = arbiter_inode_ctx_get (inode, this); - if (!ctx) { - op_ret = -1; - op_errno = ENOMEM; - goto unwind; - } - memcpy (ctx->iattbuf, buf, sizeof (*ctx->iattbuf)); + arbiter_inode_ctx_t *ctx = NULL; + + if (op_ret != 0) + goto unwind; + ctx = arbiter_inode_ctx_get(inode, this); + if (!ctx) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + memcpy(&ctx->iattbuf, buf, sizeof(ctx->iattbuf)); unwind: - STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf, - xdata, postparent); - return 0; + STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, xdata, + postparent); + return 0; } int32_t -arbiter_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +arbiter_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { - STACK_WIND (frame, arbiter_lookup_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, loc, xdata); - return 0; + STACK_WIND(frame, arbiter_lookup_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, loc, xdata); + return 0; } int32_t -arbiter_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t offset, uint32_t flags, dict_t *xdata) +arbiter_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) { - STACK_UNWIND_STRICT (readv, frame, -1, ENOTCONN, NULL, 0, NULL, NULL, - NULL); - return 0; + arbiter_inode_ctx_t *ctx = NULL; + struct iatt *buf = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + + ctx = arbiter_inode_ctx_get(loc->inode, this); + if (!ctx) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + buf = &ctx->iattbuf; +unwind: + STACK_UNWIND_STRICT(truncate, frame, op_ret, op_errno, buf, buf, NULL); + return 0; } int32_t -arbiter_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, +arbiter_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, dict_t *xdata) + { - arbiter_inode_ctx_t *ctx = NULL; - struct iatt *buf = NULL; - int32_t op_ret = 0; - int32_t op_errno = 0; - - ctx = arbiter_inode_ctx_get (loc->inode, this); - if (!ctx) { - op_ret = -1; - op_errno = ENOMEM; - goto unwind; - } - buf = ctx->iattbuf; + arbiter_inode_ctx_t *ctx = NULL; + struct iatt *buf = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + + ctx = arbiter_inode_ctx_get(fd->inode, this); + if (!ctx) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + buf = &ctx->iattbuf; unwind: - STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, buf, buf, - xdata); - return 0; + STACK_UNWIND_STRICT(ftruncate, frame, op_ret, op_errno, buf, buf, NULL); + return 0; } -int32_t -arbiter_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, - dict_t *xdata) - +dict_t * +arbiter_fill_writev_xdata(fd_t *fd, dict_t *xdata, xlator_t *this) { - arbiter_inode_ctx_t *ctx = NULL; - struct iatt *buf = NULL; - int32_t op_ret = 0; - int32_t op_errno = 0; - - ctx = arbiter_inode_ctx_get (fd->inode, this); - if (!ctx) { - op_ret = -1; - op_errno = ENOMEM; - goto unwind; + dict_t *rsp_xdata = NULL; + int32_t ret = 0; + int is_append = 1; + + if (!fd || !fd->inode || gf_uuid_is_null(fd->inode->gfid)) { + goto out; + } + + if (!xdata) + goto out; + + rsp_xdata = dict_new(); + if (!rsp_xdata) + goto out; + + if (dict_get(xdata, GLUSTERFS_OPEN_FD_COUNT)) { + ret = dict_set_uint32(rsp_xdata, GLUSTERFS_OPEN_FD_COUNT, + fd->inode->fd_count); + if (ret < 0) { + gf_msg_debug(this->name, 0, + "Failed to set dict value" + " for GLUSTERFS_OPEN_FD_COUNT"); } - buf = ctx->iattbuf; -unwind: - STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, buf, buf, - xdata); - return 0; + } + if (dict_get(xdata, GLUSTERFS_WRITE_IS_APPEND)) { + ret = dict_set_uint32(rsp_xdata, GLUSTERFS_WRITE_IS_APPEND, is_append); + if (ret < 0) { + gf_msg_debug(this->name, 0, + "Failed to set dict value" + " for GLUSTERFS_WRITE_IS_APPEND"); + } + } +out: + return rsp_xdata; } int32_t -arbiter_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iovec *vector, int32_t count, off_t off, uint32_t flags, - struct iobref *iobref, dict_t *xdata) +arbiter_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iovec *vector, int32_t count, off_t off, uint32_t flags, + struct iobref *iobref, dict_t *xdata) { - arbiter_inode_ctx_t *ctx = NULL; - struct iatt *buf = NULL; - int op_ret = 0; - int op_errno = 0; - - ctx = arbiter_inode_ctx_get (fd->inode, this); - if (!ctx) { - op_ret = -1; - op_errno = ENOMEM; - goto unwind; - } - buf = ctx->iattbuf; - op_ret = iov_length (vector, count); + arbiter_inode_ctx_t *ctx = NULL; + struct iatt *buf = NULL; + dict_t *rsp_xdata = NULL; + int op_ret = 0; + int op_errno = 0; + + ctx = arbiter_inode_ctx_get(fd->inode, this); + if (!ctx) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + buf = &ctx->iattbuf; + op_ret = iov_length(vector, count); + rsp_xdata = arbiter_fill_writev_xdata(fd, xdata, this); unwind: - STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, buf, buf, xdata); - return 0; + STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, buf, buf, rsp_xdata); + if (rsp_xdata) + dict_unref(rsp_xdata); + return 0; } int32_t -arbiter_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd, +arbiter_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t keep_size, off_t offset, size_t len, dict_t *xdata) { - arbiter_inode_ctx_t *ctx = NULL; - struct iatt *buf = NULL; - int op_ret = 0; - int op_errno = 0; - - ctx = arbiter_inode_ctx_get (fd->inode, this); - if (!ctx) { - op_ret = -1; - op_errno = ENOMEM; - goto unwind; - } - buf = ctx->iattbuf; + arbiter_inode_ctx_t *ctx = NULL; + struct iatt *buf = NULL; + int op_ret = 0; + int op_errno = 0; + + ctx = arbiter_inode_ctx_get(fd->inode, this); + if (!ctx) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + buf = &ctx->iattbuf; unwind: - STACK_UNWIND_STRICT(fallocate, frame, op_ret, op_errno, buf, buf, - xdata); - return 0; + STACK_UNWIND_STRICT(fallocate, frame, op_ret, op_errno, buf, buf, NULL); + return 0; } int32_t -arbiter_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, - off_t offset, size_t len, dict_t *xdata) +arbiter_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) { - arbiter_inode_ctx_t *ctx = NULL; - struct iatt *buf = NULL; - int op_ret = 0; - int op_errno = 0; - - ctx = arbiter_inode_ctx_get (fd->inode, this); - if (!ctx) { - op_ret = -1; - op_errno = ENOMEM; - goto unwind; - } - buf = ctx->iattbuf; + arbiter_inode_ctx_t *ctx = NULL; + struct iatt *buf = NULL; + int op_ret = 0; + int op_errno = 0; + + ctx = arbiter_inode_ctx_get(fd->inode, this); + if (!ctx) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + buf = &ctx->iattbuf; unwind: - STACK_UNWIND_STRICT(discard, frame, op_ret, op_errno, buf, buf, xdata); - return 0; + STACK_UNWIND_STRICT(discard, frame, op_ret, op_errno, buf, buf, NULL); + return 0; } int32_t -arbiter_zerofill (call_frame_t *frame, xlator_t *this, fd_t *fd, - off_t offset, off_t len, dict_t *xdata) +arbiter_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + off_t len, dict_t *xdata) { - arbiter_inode_ctx_t *ctx = NULL; - struct iatt *buf = NULL; - int op_ret = 0; - int op_errno = 0; - - ctx = arbiter_inode_ctx_get (fd->inode, this); - if (!ctx) { - op_ret = -1; - op_errno = ENOMEM; - goto unwind; - } - buf = ctx->iattbuf; + arbiter_inode_ctx_t *ctx = NULL; + struct iatt *buf = NULL; + int op_ret = 0; + int op_errno = 0; + + ctx = arbiter_inode_ctx_get(fd->inode, this); + if (!ctx) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + buf = &ctx->iattbuf; unwind: - STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, buf, buf, xdata); - return 0; + STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, buf, buf, NULL); + return 0; } -int32_t -mem_acct_init (xlator_t *this) +static int32_t +arbiter_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) +{ + STACK_UNWIND_STRICT(readv, frame, -1, ENOSYS, NULL, 0, NULL, NULL, NULL); + return 0; +} + +static int32_t +arbiter_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + gf_seek_what_t what, dict_t *xdata) { - int ret = -1; + STACK_UNWIND_STRICT(seek, frame, -1, ENOSYS, 0, xdata); + return 0; +} - ret = xlator_mem_acct_init (this, gf_arbiter_mt_end + 1); - if (ret) - gf_log (this->name, GF_LOG_ERROR, "Memory accounting " - "initialization failed."); - return ret; +int32_t +mem_acct_init(xlator_t *this) +{ + int ret = -1; + + ret = xlator_mem_acct_init(this, gf_arbiter_mt_end + 1); + if (ret) + gf_log(this->name, GF_LOG_ERROR, + "Memory accounting " + "initialization failed."); + return ret; } int -reconfigure (xlator_t *this, dict_t *options) +reconfigure(xlator_t *this, dict_t *options) { - - return 0; + return 0; } int -arbiter_forget (xlator_t *this, inode_t *inode) +arbiter_forget(xlator_t *this, inode_t *inode) { - arbiter_inode_ctx_t *ctx = NULL; - uint64_t ctx_addr = 0; - - inode_ctx_del (inode, this, &ctx_addr); - if (!ctx_addr) - return 0; - ctx = (arbiter_inode_ctx_t *) (long) ctx_addr; - GF_FREE (ctx); + arbiter_inode_ctx_t *ctx = NULL; + uint64_t ctx_addr = 0; + + inode_ctx_del(inode, this, &ctx_addr); + if (!ctx_addr) return 0; + ctx = (arbiter_inode_ctx_t *)(long)ctx_addr; + GF_FREE(ctx); + return 0; } int32_t -init (xlator_t *this) +init(xlator_t *this) { + if (!this->children || this->children->next) { + gf_log(this->name, GF_LOG_ERROR, + "'arbiter' not configured with exactly one child"); + return -1; + } - if (!this->children || this->children->next) { - gf_log (this->name, GF_LOG_ERROR, - "'arbiter' not configured with exactly one child"); - return -1; - } - - if (!this->parents) - gf_log (this->name, GF_LOG_ERROR, - "dangling volume. check volfile "); + if (!this->parents) + gf_log(this->name, GF_LOG_ERROR, "dangling volume. check volfile "); - return 0; + return 0; } void -fini (xlator_t *this) +fini(xlator_t *this) { - return; + return; } struct xlator_fops fops = { - .lookup = arbiter_lookup, - .readv = arbiter_readv, - .truncate = arbiter_truncate, - .writev = arbiter_writev, - .ftruncate = arbiter_ftruncate, - .fallocate = arbiter_fallocate, - .discard = arbiter_discard, - .zerofill = arbiter_zerofill, + .lookup = arbiter_lookup, + + /* Return success for these inode write FOPS without winding it down to + * posix; this is needed for AFR write transaction logic to work.*/ + .truncate = arbiter_truncate, + .writev = arbiter_writev, + .ftruncate = arbiter_ftruncate, + .fallocate = arbiter_fallocate, + .discard = arbiter_discard, + .zerofill = arbiter_zerofill, + + /* AFR is not expected to wind these inode read FOPS initiated by the + * application to the arbiter brick. But in case a bug causes them + * to be called, we return ENOSYS. */ + .readv = arbiter_readv, + .seek = arbiter_seek, + + /* The following inode read FOPS initiated by the application are not + * wound by AFR either but internal logic like shd, glfsheal and + * client side healing in AFR will send them for selfheal/ inode refresh + * operations etc.,so we need to wind them down to posix: + * + * (f)stat, readdir(p), readlink, (f)getxattr.*/ + + /* All other FOPs not listed here are safe to be wound down to posix.*/ }; struct xlator_cbks cbks = { - .forget = arbiter_forget, + .forget = arbiter_forget, }; struct volume_options options[] = { - { .key = {NULL} }, + {.key = {NULL}}, +}; + +xlator_api_t xlator_api = { + .init = init, + .fini = fini, + .reconfigure = reconfigure, + .mem_acct_init = mem_acct_init, + .op_version = {1}, /* Present from the initial version */ + .fops = &fops, + .cbks = &cbks, + .options = options, + .identifier = "arbiter", + .category = GF_MAINTAINED, }; diff --git a/xlators/features/arbiter/src/arbiter.h b/xlators/features/arbiter/src/arbiter.h index 6ccc3add3b3..546db7b751a 100644 --- a/xlators/features/arbiter/src/arbiter.h +++ b/xlators/features/arbiter/src/arbiter.h @@ -11,11 +11,11 @@ #ifndef _ARBITER_H #define _ARBITER_H -#include "locking.h" -#include "common-utils.h" +#include <glusterfs/locking.h> +#include <glusterfs/common-utils.h> typedef struct arbiter_inode_ctx_ { - struct iatt *iattbuf; + struct iatt iattbuf; } arbiter_inode_ctx_t; #endif /* _ARBITER_H */ diff --git a/xlators/features/barrier/src/Makefile.am b/xlators/features/barrier/src/Makefile.am index 162c6a8865c..25099bc56e5 100644 --- a/xlators/features/barrier/src/Makefile.am +++ b/xlators/features/barrier/src/Makefile.am @@ -1,7 +1,7 @@ xlator_LTLIBRARIES = barrier.la xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features -barrier_la_LDFLAGS = $(GF_XLATOR_DEFAULT_LDFLAGS) +barrier_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) barrier_la_SOURCES = barrier.c @@ -9,7 +9,8 @@ barrier_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la noinst_HEADERS = barrier.h barrier-mem-types.h -AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ + -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src AM_CFLAGS = -Wall $(GF_CFLAGS) diff --git a/xlators/features/barrier/src/barrier-mem-types.h b/xlators/features/barrier/src/barrier-mem-types.h index 36647a66966..71ed7898d9c 100644 --- a/xlators/features/barrier/src/barrier-mem-types.h +++ b/xlators/features/barrier/src/barrier-mem-types.h @@ -11,10 +11,10 @@ #ifndef __BARRIER_MEM_TYPES_H__ #define __BARRIER_MEM_TYPES_H__ -#include "mem-types.h" +#include <glusterfs/mem-types.h> enum gf_barrier_mem_types_ { - gf_barrier_mt_priv_t = gf_common_mt_end + 1, - gf_barrier_mt_end + gf_barrier_mt_priv_t = gf_common_mt_end + 1, + gf_barrier_mt_end }; #endif diff --git a/xlators/features/barrier/src/barrier.c b/xlators/features/barrier/src/barrier.c index f3f62d6c2d9..852bbacb99d 100644 --- a/xlators/features/barrier/src/barrier.c +++ b/xlators/features/barrier/src/barrier.c @@ -9,791 +9,801 @@ */ #include "barrier.h" -#include "defaults.h" -#include "call-stub.h" +#include <glusterfs/defaults.h> +#include <glusterfs/call-stub.h> -#include "statedump.h" +#include <glusterfs/statedump.h> void -barrier_local_set_gfid (call_frame_t *frame, uuid_t gfid, xlator_t *this) -{ - if (gfid) { - uuid_t *id = GF_MALLOC (sizeof (uuid_t), gf_common_mt_uuid_t); - if (!id) { - gf_log (this->name, GF_LOG_WARNING, "Could not set gfid" - ". gfid will not be dumped in statedump file."); - return; - } - gf_uuid_copy (*id, gfid); - frame->local = id; +barrier_local_set_gfid(call_frame_t *frame, uuid_t gfid, xlator_t *this) +{ + if (gfid) { + uuid_t *id = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); + if (!id) { + gf_log(this->name, GF_LOG_WARNING, + "Could not set gfid" + ". gfid will not be dumped in statedump file."); + return; } + gf_uuid_copy(*id, gfid); + frame->local = id; + } } void -barrier_local_free_gfid (call_frame_t *frame) +barrier_local_free_gfid(call_frame_t *frame) { - if (frame->local) { - GF_FREE (frame->local); - frame->local = NULL; - } + if (frame->local) { + GF_FREE(frame->local); + frame->local = NULL; + } } int32_t -barrier_truncate_cbk_resume (call_frame_t *frame, void *cookie, xlator_t *this, +barrier_truncate_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *prebuf, struct iatt *postbuf, + dict_t *xdata) +{ + barrier_local_free_gfid(frame); + STACK_UNWIND_STRICT(truncate, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + return 0; +} + +int32_t +barrier_ftruncate_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata) { - barrier_local_free_gfid (frame); - STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf, postbuf, - xdata); - return 0; + barrier_local_free_gfid(frame); + STACK_UNWIND_STRICT(ftruncate, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + return 0; } int32_t -barrier_ftruncate_cbk_resume (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *prebuf, struct iatt *postbuf, - dict_t *xdata) -{ - barrier_local_free_gfid (frame); - STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, prebuf, - postbuf, xdata); - return 0; +barrier_unlink_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) +{ + barrier_local_free_gfid(frame); + STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, preparent, postparent, + xdata); + return 0; } int32_t -barrier_unlink_cbk_resume (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *preparent, struct iatt *postparent, - dict_t *xdata) -{ - barrier_local_free_gfid (frame); - STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno, preparent, - postparent, xdata); - return 0; +barrier_rmdir_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) +{ + barrier_local_free_gfid(frame); + STACK_UNWIND_STRICT(rmdir, frame, op_ret, op_errno, preparent, postparent, + xdata); + return 0; } int32_t -barrier_rmdir_cbk_resume (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *preparent, struct iatt *postparent, +barrier_rename_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + struct iatt *preoldparent, struct iatt *postoldparent, + struct iatt *prenewparent, struct iatt *postnewparent, dict_t *xdata) { - barrier_local_free_gfid (frame); - STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno, preparent, - postparent, xdata); - return 0; + barrier_local_free_gfid(frame); + STACK_UNWIND_STRICT(rename, frame, op_ret, op_errno, buf, preoldparent, + postoldparent, prenewparent, postnewparent, xdata); + return 0; } int32_t -barrier_rename_cbk_resume (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, - struct iatt *preoldparent, struct iatt *postoldparent, - struct iatt *prenewparent, struct iatt *postnewparent, - dict_t *xdata) -{ - barrier_local_free_gfid (frame); - STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, buf, preoldparent, - postoldparent, prenewparent, postnewparent, xdata); - return 0; +barrier_writev_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + barrier_local_free_gfid(frame); + STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + return 0; } int32_t -barrier_writev_cbk_resume (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *prebuf, struct iatt *postbuf, - dict_t *xdata) -{ - barrier_local_free_gfid (frame); - STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf, - xdata); - return 0; +barrier_fsync_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + barrier_local_free_gfid(frame); + STACK_UNWIND_STRICT(fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata); + return 0; } int32_t -barrier_fsync_cbk_resume (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) +barrier_removexattr_cbk_resume(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + dict_t *xdata) { - barrier_local_free_gfid (frame); - STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf, - xdata); - return 0; + barrier_local_free_gfid(frame); + STACK_UNWIND_STRICT(removexattr, frame, op_ret, op_errno, xdata); + return 0; } int32_t -barrier_removexattr_cbk_resume (call_frame_t *frame, void *cookie, +barrier_fremovexattr_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata) { - barrier_local_free_gfid (frame); - STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, xdata); - return 0; + barrier_local_free_gfid(frame); + STACK_UNWIND_STRICT(fremovexattr, frame, op_ret, op_errno, xdata); + return 0; } int32_t -barrier_fremovexattr_cbk_resume (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, - int32_t op_errno, dict_t *xdata) +barrier_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - barrier_local_free_gfid (frame); - STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, xdata); - return 0; + BARRIER_FOP_CBK(writev, out, frame, this, op_ret, op_errno, prebuf, postbuf, + xdata); +out: + return 0; } int32_t -barrier_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, - dict_t *xdata) +barrier_fremovexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - BARRIER_FOP_CBK (writev, out, frame, this, op_ret, op_errno, - prebuf, postbuf, xdata); + BARRIER_FOP_CBK(fremovexattr, out, frame, this, op_ret, op_errno, xdata); out: - return 0; + return 0; } int32_t -barrier_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +barrier_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - BARRIER_FOP_CBK (fremovexattr, out, frame, this, op_ret, op_errno, - xdata); + BARRIER_FOP_CBK(removexattr, out, frame, this, op_ret, op_errno, xdata); out: - return 0; + return 0; } int32_t -barrier_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +barrier_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - BARRIER_FOP_CBK (removexattr, out, frame, this, op_ret, op_errno, - xdata); + BARRIER_FOP_CBK(truncate, out, frame, this, op_ret, op_errno, prebuf, + postbuf, xdata); out: - return 0; + return 0; } int32_t -barrier_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +barrier_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata) { - BARRIER_FOP_CBK (truncate, out, frame, this, op_ret, op_errno, prebuf, - postbuf, xdata); + BARRIER_FOP_CBK(ftruncate, out, frame, this, op_ret, op_errno, prebuf, + postbuf, xdata); out: - return 0; + return 0; } int32_t -barrier_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) -{ - BARRIER_FOP_CBK (ftruncate, out, frame, this, op_ret, op_errno, prebuf, - postbuf, xdata); +barrier_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + struct iatt *preoldparent, struct iatt *postoldparent, + struct iatt *prenewparent, struct iatt *postnewparent, + dict_t *xdata) +{ + BARRIER_FOP_CBK(rename, out, frame, this, op_ret, op_errno, buf, + preoldparent, postoldparent, prenewparent, postnewparent, + xdata); out: - return 0; + return 0; } int32_t -barrier_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, - struct iatt *preoldparent, struct iatt *postoldparent, - struct iatt *prenewparent, struct iatt *postnewparent, - dict_t *xdata) -{ - BARRIER_FOP_CBK (rename, out, frame, this, op_ret, op_errno, buf, - preoldparent, postoldparent, prenewparent, - postnewparent, xdata); +barrier_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + BARRIER_FOP_CBK(rmdir, out, frame, this, op_ret, op_errno, preparent, + postparent, xdata); out: - return 0; + return 0; } int32_t -barrier_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +barrier_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *preparent, struct iatt *postparent, dict_t *xdata) { - BARRIER_FOP_CBK (rmdir, out, frame, this, op_ret, op_errno, preparent, - postparent, xdata); + BARRIER_FOP_CBK(unlink, out, frame, this, op_ret, op_errno, preparent, + postparent, xdata); out: - return 0; + return 0; } int32_t -barrier_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +barrier_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - BARRIER_FOP_CBK (unlink, out, frame, this, op_ret, op_errno, preparent, - postparent, xdata); + BARRIER_FOP_CBK(fsync, out, frame, this, op_ret, op_errno, prebuf, postbuf, + xdata); out: - return 0; + return 0; } int32_t -barrier_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) +barrier_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iovec *vector, int32_t count, off_t off, uint32_t flags, + struct iobref *iobref, dict_t *xdata) { - BARRIER_FOP_CBK (fsync, out, frame, this, op_ret, op_errno, - prebuf, postbuf, xdata); -out: - return 0; -} - -int32_t -barrier_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iovec *vector, int32_t count, off_t off, uint32_t flags, - struct iobref *iobref, dict_t *xdata) -{ - if (!((flags | fd->flags) & (O_SYNC | O_DSYNC))) { - STACK_WIND_TAIL (frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->writev, - fd, vector, count, off, flags, iobref, xdata); - - return 0; - } + if (!((flags | fd->flags) & (O_SYNC | O_DSYNC))) { + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->writev, fd, vector, count, off, + flags, iobref, xdata); - barrier_local_set_gfid (frame, fd->inode->gfid, this); - STACK_WIND (frame, barrier_writev_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->writev, fd, vector, count, - off, flags, iobref, xdata); return 0; + } + + barrier_local_set_gfid(frame, fd->inode->gfid, this); + STACK_WIND(frame, barrier_writev_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->writev, fd, vector, count, off, flags, + iobref, xdata); + return 0; } int32_t -barrier_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd, - const char *name, dict_t *xdata) +barrier_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *name, dict_t *xdata) { - barrier_local_set_gfid (frame, fd->inode->gfid, this); - STACK_WIND (frame, barrier_fremovexattr_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fremovexattr, - fd, name, xdata); - return 0; + barrier_local_set_gfid(frame, fd->inode->gfid, this); + STACK_WIND(frame, barrier_fremovexattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata); + return 0; } int32_t -barrier_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *name, dict_t *xdata) +barrier_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) { - barrier_local_set_gfid (frame, loc->inode->gfid, this); - STACK_WIND (frame, barrier_removexattr_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->removexattr, - loc, name, xdata); - return 0; + barrier_local_set_gfid(frame, loc->inode->gfid, this); + STACK_WIND(frame, barrier_removexattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, loc, name, xdata); + return 0; } int32_t -barrier_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, - off_t offset, dict_t *xdata) +barrier_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) { - barrier_local_set_gfid (frame, loc->inode->gfid, this); - STACK_WIND (frame, barrier_truncate_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->truncate, - loc, offset, xdata); - return 0; + barrier_local_set_gfid(frame, loc->inode->gfid, this); + STACK_WIND(frame, barrier_truncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); + return 0; } - int32_t -barrier_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, - loc_t *newloc, dict_t *xdata) +barrier_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, + loc_t *newloc, dict_t *xdata) { - barrier_local_set_gfid (frame, oldloc->inode->gfid, this); - STACK_WIND (frame, barrier_rename_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->rename, - oldloc, newloc, xdata); - return 0; + barrier_local_set_gfid(frame, oldloc->inode->gfid, this); + STACK_WIND(frame, barrier_rename_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); + return 0; } int -barrier_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, - dict_t *xdata) +barrier_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + dict_t *xdata) { - barrier_local_set_gfid (frame, loc->inode->gfid, this); - STACK_WIND (frame, barrier_rmdir_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->rmdir, - loc, flags, xdata); - return 0; + barrier_local_set_gfid(frame, loc->inode->gfid, this); + STACK_WIND(frame, barrier_rmdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rmdir, loc, flags, xdata); + return 0; } int32_t -barrier_unlink (call_frame_t *frame, xlator_t *this, - loc_t *loc, int xflag, dict_t *xdata) +barrier_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, + dict_t *xdata) { - barrier_local_set_gfid (frame, loc->inode->gfid, this); - STACK_WIND (frame, barrier_unlink_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->unlink, - loc, xflag, xdata); - return 0; + barrier_local_set_gfid(frame, loc->inode->gfid, this); + STACK_WIND(frame, barrier_unlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); + return 0; } int32_t -barrier_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, - off_t offset, dict_t *xdata) +barrier_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) { - barrier_local_set_gfid (frame, fd->inode->gfid, this); - STACK_WIND (frame, barrier_ftruncate_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->ftruncate, - fd, offset, xdata); - return 0; + barrier_local_set_gfid(frame, fd->inode->gfid, this); + STACK_WIND(frame, barrier_ftruncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); + return 0; } int32_t -barrier_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, - int32_t flags, dict_t *xdata) +barrier_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, + dict_t *xdata) { - barrier_local_set_gfid (frame, fd->inode->gfid, this); - STACK_WIND (frame, barrier_fsync_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fsync, - fd, flags, xdata); - return 0; + barrier_local_set_gfid(frame, fd->inode->gfid, this); + STACK_WIND(frame, barrier_fsync_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsync, fd, flags, xdata); + return 0; } call_stub_t * -__barrier_dequeue (xlator_t *this, struct list_head *queue) +__barrier_dequeue(xlator_t *this, struct list_head *queue) { - call_stub_t *stub = NULL; - barrier_priv_t *priv = NULL; + call_stub_t *stub = NULL; + barrier_priv_t *priv = NULL; - priv = this->private; - GF_ASSERT (priv); + priv = this->private; + GF_ASSERT(priv); - if (list_empty (queue)) - goto out; + if (list_empty(queue)) + goto out; - stub = list_entry (queue->next, call_stub_t, list); - list_del_init (&stub->list); + stub = list_entry(queue->next, call_stub_t, list); + list_del_init(&stub->list); out: - return stub; + return stub; } void -barrier_dequeue_all (xlator_t *this, struct list_head *queue) +barrier_dequeue_all(xlator_t *this, struct list_head *queue) { - call_stub_t *stub = NULL; + call_stub_t *stub = NULL; - gf_log (this->name, GF_LOG_INFO, "Dequeuing all the barriered fops"); + gf_log(this->name, GF_LOG_INFO, "Dequeuing all the barriered fops"); - /* TODO: Start the below task in a new thread */ - while ((stub = __barrier_dequeue (this, queue))) - call_resume (stub); + /* TODO: Start the below task in a new thread */ + while ((stub = __barrier_dequeue(this, queue))) + call_resume(stub); - gf_log (this->name, GF_LOG_INFO, "Dequeuing the barriered fops is " - "finished"); - return; + gf_log(this->name, GF_LOG_INFO, + "Dequeuing the barriered fops is " + "finished"); + return; } void -barrier_timeout (void *data) +barrier_timeout(void *data) { - xlator_t *this = NULL; - barrier_priv_t *priv = NULL; - struct list_head queue = {0,}; + xlator_t *this = NULL; + barrier_priv_t *priv = NULL; + struct list_head queue = { + 0, + }; - this = data; - THIS = this; - priv = this->private; + this = data; + THIS = this; + priv = this->private; - INIT_LIST_HEAD (&queue); + INIT_LIST_HEAD(&queue); - gf_log (this->name, GF_LOG_CRITICAL, "Disabling barrier because of " - "the barrier timeout."); + gf_log(this->name, GF_LOG_CRITICAL, + "Disabling barrier because of " + "the barrier timeout."); - LOCK (&priv->lock); - { - __barrier_disable (this, &queue); - } - UNLOCK (&priv->lock); + LOCK(&priv->lock); + { + __barrier_disable(this, &queue); + } + UNLOCK(&priv->lock); - barrier_dequeue_all (this, &queue); + barrier_dequeue_all(this, &queue); - return; + return; } void -__barrier_enqueue (xlator_t *this, call_stub_t *stub) +__barrier_enqueue(xlator_t *this, call_stub_t *stub) { - barrier_priv_t *priv = NULL; + barrier_priv_t *priv = NULL; - priv = this->private; - GF_ASSERT (priv); + priv = this->private; + GF_ASSERT(priv); - list_add_tail (&stub->list, &priv->queue); - priv->queue_size++; + list_add_tail(&stub->list, &priv->queue); + priv->queue_size++; - return; + return; } void -__barrier_disable (xlator_t *this, struct list_head *queue) +__barrier_disable(xlator_t *this, struct list_head *queue) { - GF_UNUSED int ret = 0; - barrier_priv_t *priv = NULL; + GF_UNUSED int ret = 0; + barrier_priv_t *priv = NULL; - priv = this->private; - GF_ASSERT (priv); + priv = this->private; + GF_ASSERT(priv); - if (priv->timer) { - ret = gf_timer_call_cancel (this->ctx, priv->timer); - priv->timer = NULL; - } + if (priv->timer) { + ret = gf_timer_call_cancel(this->ctx, priv->timer); + priv->timer = NULL; + } - list_splice_init (&priv->queue, queue); - priv->queue_size = 0; - priv->barrier_enabled = _gf_false; + list_splice_init(&priv->queue, queue); + priv->queue_size = 0; + priv->barrier_enabled = _gf_false; } int -__barrier_enable (xlator_t *this, barrier_priv_t *priv) +__barrier_enable(xlator_t *this, barrier_priv_t *priv) { - int ret = -1; + int ret = -1; - priv->timer = gf_timer_call_after (this->ctx, priv->timeout, - barrier_timeout, (void *) this); - if (!priv->timer) { - gf_log (this->name, GF_LOG_CRITICAL, "Couldn't add barrier " - "timeout event."); - goto out; - } + priv->timer = gf_timer_call_after(this->ctx, priv->timeout, barrier_timeout, + (void *)this); + if (!priv->timer) { + gf_log(this->name, GF_LOG_CRITICAL, + "Couldn't add barrier " + "timeout event."); + goto out; + } - priv->barrier_enabled = _gf_true; - ret = 0; + priv->barrier_enabled = _gf_true; + ret = 0; out: - return ret; + return ret; } int -notify (xlator_t *this, int event, void *data, ...) -{ - barrier_priv_t *priv = NULL; - dict_t *dict = NULL; - gf_boolean_t past = _gf_false; - int ret = -1; - gf_boolean_t barrier_enabled = _gf_false; - struct list_head queue = {0,}; - - priv = this->private; - GF_ASSERT (priv); - INIT_LIST_HEAD (&queue); - - switch (event) { - case GF_EVENT_TRANSLATOR_OP: - { - dict = data; - barrier_enabled = dict_get_str_boolean (dict, "barrier", -1); - - if (barrier_enabled == -1) { - gf_log (this->name, GF_LOG_ERROR, "Could not fetch " - " barrier key from the dictionary."); - goto out; - } - - LOCK (&priv->lock); - { - past = priv->barrier_enabled; - - switch (past) { - case _gf_false: - if (barrier_enabled) { - ret = __barrier_enable (this,priv); - if (ret) - goto unlock; - } else { - gf_log (this->name, GF_LOG_ERROR, - "Already disabled."); - goto unlock; - } - break; - - case _gf_true: - if (!barrier_enabled) { - __barrier_disable(this, &queue); - } else { - gf_log (this->name, GF_LOG_ERROR, - "Already enabled"); - goto unlock; - } - break; - } +notify(xlator_t *this, int event, void *data, ...) +{ + barrier_priv_t *priv = this->private; + dict_t *dict = NULL; + int ret = -1; + int barrier_enabled = _gf_false; + struct list_head queue = { + 0, + }; + + GF_ASSERT(priv); + INIT_LIST_HEAD(&queue); + + switch (event) { + case GF_EVENT_TRANSLATOR_OP: { + dict = data; + barrier_enabled = dict_get_str_boolean(dict, "barrier", -1); + + if (barrier_enabled == -1) { + gf_log(this->name, GF_LOG_ERROR, + "Could not fetch " + " barrier key from the dictionary."); + goto out; + } + + LOCK(&priv->lock); + { + if (!priv->barrier_enabled) { + if (barrier_enabled) { + ret = __barrier_enable(this, priv); + } else { + UNLOCK(&priv->lock); + gf_log(this->name, GF_LOG_ERROR, "Already disabled."); + goto post_unlock; + } + } else { + if (!barrier_enabled) { + __barrier_disable(this, &queue); ret = 0; + } else { + UNLOCK(&priv->lock); + gf_log(this->name, GF_LOG_ERROR, "Already enabled"); + goto post_unlock; + } } -unlock: - UNLOCK (&priv->lock); + } + UNLOCK(&priv->lock); + post_unlock: + if (!list_empty(&queue)) + barrier_dequeue_all(this, &queue); - if (!list_empty (&queue)) - barrier_dequeue_all (this, &queue); - - break; - } - default: - { - default_notify (this, event, data); - ret = 0; - goto out; + break; } + default: { + default_notify(this, event, data); + ret = 0; + goto out; } + } out: - return ret; + return ret; } int -reconfigure (xlator_t *this, dict_t *options) -{ - barrier_priv_t *priv = NULL; - gf_boolean_t past = _gf_false; - int ret = -1; - gf_boolean_t barrier_enabled = _gf_false; - uint32_t timeout = {0,}; - struct list_head queue = {0,}; - - priv = this->private; - GF_ASSERT (priv); - - GF_OPTION_RECONF ("barrier", barrier_enabled, options, bool, out); - GF_OPTION_RECONF ("barrier-timeout", timeout, options, time, out); - - INIT_LIST_HEAD (&queue); - - LOCK (&priv->lock); - { - past = priv->barrier_enabled; - - switch (past) { - case _gf_false: - if (barrier_enabled) { - ret = __barrier_enable (this, priv); - if (ret) { - goto unlock; - } - } - break; - - case _gf_true: - if (!barrier_enabled) { - __barrier_disable (this, &queue); - - } - break; +reconfigure(xlator_t *this, dict_t *options) +{ + barrier_priv_t *priv = NULL; + int ret = -1; + gf_boolean_t barrier_enabled = _gf_false; + uint32_t timeout = { + 0, + }; + struct list_head queue = { + 0, + }; + + priv = this->private; + GF_ASSERT(priv); + + GF_OPTION_RECONF("barrier", barrier_enabled, options, bool, out); + GF_OPTION_RECONF("barrier-timeout", timeout, options, time, out); + + INIT_LIST_HEAD(&queue); + + LOCK(&priv->lock); + { + if (!priv->barrier_enabled) { + if (barrier_enabled) { + ret = __barrier_enable(this, priv); + if (ret) { + goto unlock; } - priv->timeout.tv_sec = timeout; - ret = 0; + } + } else { + if (!barrier_enabled) { + __barrier_disable(this, &queue); + } } + priv->timeout.tv_sec = timeout; + ret = 0; + } unlock: - UNLOCK (&priv->lock); + UNLOCK(&priv->lock); - if (!list_empty (&queue)) - barrier_dequeue_all (this, &queue); + if (!list_empty(&queue)) + barrier_dequeue_all(this, &queue); out: - return ret; + return ret; } int32_t -mem_acct_init (xlator_t *this) +mem_acct_init(xlator_t *this) { - int ret = -1; + int ret = -1; - ret = xlator_mem_acct_init (this, gf_barrier_mt_end + 1); - if (ret) - gf_log (this->name, GF_LOG_ERROR, "Memory accounting " - "initialization failed."); + ret = xlator_mem_acct_init(this, gf_barrier_mt_end + 1); + if (ret) + gf_log(this->name, GF_LOG_ERROR, + "Memory accounting " + "initialization failed."); - return ret; + return ret; } int -init (xlator_t *this) +init(xlator_t *this) { - int ret = -1; - barrier_priv_t *priv = NULL; - uint32_t timeout = {0,}; + int ret = -1; + barrier_priv_t *priv = NULL; + uint32_t timeout = { + 0, + }; - if (!this->children || this->children->next) { - gf_log (this->name, GF_LOG_ERROR, - "'barrier' not configured with exactly one child"); - goto out; - } + if (!this->children || this->children->next) { + gf_log(this->name, GF_LOG_ERROR, + "'barrier' not configured with exactly one child"); + goto out; + } - if (!this->parents) - gf_log (this->name, GF_LOG_WARNING, - "dangling volume. check volfile "); + if (!this->parents) + gf_log(this->name, GF_LOG_WARNING, "dangling volume. check volfile "); - priv = GF_CALLOC (1, sizeof (*priv), gf_barrier_mt_priv_t); - if (!priv) - goto out; + priv = GF_CALLOC(1, sizeof(*priv), gf_barrier_mt_priv_t); + if (!priv) + goto out; - LOCK_INIT (&priv->lock); + LOCK_INIT(&priv->lock); - GF_OPTION_INIT ("barrier", priv->barrier_enabled, bool, out); - GF_OPTION_INIT ("barrier-timeout", timeout, time, out); - priv->timeout.tv_sec = timeout; + GF_OPTION_INIT("barrier", priv->barrier_enabled, bool, out); + GF_OPTION_INIT("barrier-timeout", timeout, time, out); + priv->timeout.tv_sec = timeout; - INIT_LIST_HEAD (&priv->queue); + INIT_LIST_HEAD(&priv->queue); - if (priv->barrier_enabled) { - ret = __barrier_enable (this, priv); - if (ret == -1) - goto out; - } + if (priv->barrier_enabled) { + ret = __barrier_enable(this, priv); + if (ret == -1) + goto out; + } - this->private = priv; - ret = 0; + this->private = priv; + ret = 0; out: - return ret; + if (ret && priv) + GF_FREE(priv); + + return ret; } void -fini (xlator_t *this) +fini(xlator_t *this) { - barrier_priv_t *priv = NULL; - struct list_head queue = {0,}; + barrier_priv_t *priv = NULL; + struct list_head queue = { + 0, + }; - priv = this->private; - if (!priv) - goto out; + priv = this->private; + if (!priv) + goto out; - INIT_LIST_HEAD (&queue); + INIT_LIST_HEAD(&queue); - gf_log (this->name, GF_LOG_INFO, "Disabling barriering and dequeuing " - "all the queued fops"); - LOCK (&priv->lock); - { - __barrier_disable (this, &queue); - } - UNLOCK (&priv->lock); + gf_log(this->name, GF_LOG_INFO, + "Disabling barriering and dequeuing " + "all the queued fops"); + LOCK(&priv->lock); + { + __barrier_disable(this, &queue); + } + UNLOCK(&priv->lock); - if (!list_empty (&queue)) - barrier_dequeue_all (this, &queue); + if (!list_empty(&queue)) + barrier_dequeue_all(this, &queue); - this->private = NULL; + this->private = NULL; - LOCK_DESTROY (&priv->lock); - GF_FREE (priv); + LOCK_DESTROY(&priv->lock); + GF_FREE(priv); out: - return; + return; } static void -barrier_dump_stub (call_stub_t *stub, char *prefix) +barrier_dump_stub(call_stub_t *stub, char *prefix) { - char key[GF_DUMP_MAX_BUF_LEN] = {0,}; + char key[GF_DUMP_MAX_BUF_LEN] = { + 0, + }; - gf_proc_dump_build_key (key, prefix, "fop"); - gf_proc_dump_write (key, "%s", gf_fop_list[stub->fop]); + gf_proc_dump_build_key(key, prefix, "fop"); + gf_proc_dump_write(key, "%s", gf_fop_list[stub->fop]); - if (stub->frame->local) { - gf_proc_dump_build_key (key, prefix, "gfid"); - gf_proc_dump_write (key, "%s", - uuid_utoa (*(uuid_t*)(stub->frame->local))); - } - if (stub->args.loc.path) { - gf_proc_dump_build_key (key, prefix, "path"); - gf_proc_dump_write (key, "%s", stub->args.loc.path); - } - if (stub->args.loc.name) { - gf_proc_dump_build_key (key, prefix, "name"); - gf_proc_dump_write (key, "%s", stub->args.loc.name); - } + if (stub->frame->local) { + gf_proc_dump_build_key(key, prefix, "gfid"); + gf_proc_dump_write(key, "%s", + uuid_utoa(*(uuid_t *)(stub->frame->local))); + } + if (stub->args.loc.path) { + gf_proc_dump_build_key(key, prefix, "path"); + gf_proc_dump_write(key, "%s", stub->args.loc.path); + } + if (stub->args.loc.name) { + gf_proc_dump_build_key(key, prefix, "name"); + gf_proc_dump_write(key, "%s", stub->args.loc.name); + } - return; + return; } static void -__barrier_dump_queue (barrier_priv_t *priv) +__barrier_dump_queue(barrier_priv_t *priv) { - call_stub_t *stub = NULL; - char key[GF_DUMP_MAX_BUF_LEN] = {0,}; - int i = 0; + call_stub_t *stub = NULL; + char key[GF_DUMP_MAX_BUF_LEN] = { + 0, + }; + int i = 0; - GF_VALIDATE_OR_GOTO ("barrier", priv, out); + GF_VALIDATE_OR_GOTO("barrier", priv, out); - list_for_each_entry (stub, &priv->queue, list) { - snprintf (key, sizeof (key), "stub.%d", i++); - gf_proc_dump_add_section (key); - barrier_dump_stub(stub, key); - } + list_for_each_entry(stub, &priv->queue, list) + { + snprintf(key, sizeof(key), "stub.%d", i++); + gf_proc_dump_add_section("%s", key); + barrier_dump_stub(stub, key); + } out: - return; + return; } int -barrier_dump_priv (xlator_t *this) -{ - int ret = -1; - char key[GF_DUMP_MAX_BUF_LEN] = {0,}; - barrier_priv_t *priv = NULL; - - GF_VALIDATE_OR_GOTO ("barrier", this, out); - - priv = this->private; - if (!priv) - return 0; - - gf_proc_dump_build_key (key, "xlator.features.barrier", "priv"); - gf_proc_dump_add_section (key); - - LOCK (&priv->lock); - { - gf_proc_dump_build_key (key, "barrier", "enabled"); - gf_proc_dump_write (key, "%d", priv->barrier_enabled); - gf_proc_dump_build_key (key, "barrier", "timeout"); - gf_proc_dump_write (key, "%"PRId64, priv->timeout.tv_sec); - if (priv->barrier_enabled) { - gf_proc_dump_build_key (key, "barrier", "queue_size"); - gf_proc_dump_write (key, "%d", priv->queue_size); - __barrier_dump_queue (priv); - } +barrier_dump_priv(xlator_t *this) +{ + int ret = -1; + char key[GF_DUMP_MAX_BUF_LEN] = { + 0, + }; + barrier_priv_t *priv = NULL; + + GF_VALIDATE_OR_GOTO("barrier", this, out); + + priv = this->private; + if (!priv) + return 0; + + gf_proc_dump_build_key(key, "xlator.features.barrier", "priv"); + gf_proc_dump_add_section("%s", key); + gf_proc_dump_build_key(key, "barrier", "enabled"); + + LOCK(&priv->lock); + { + gf_proc_dump_write(key, "%d", priv->barrier_enabled); + gf_proc_dump_build_key(key, "barrier", "timeout"); + gf_proc_dump_write(key, "%ld", priv->timeout.tv_sec); + if (priv->barrier_enabled) { + gf_proc_dump_build_key(key, "barrier", "queue_size"); + gf_proc_dump_write(key, "%d", priv->queue_size); + __barrier_dump_queue(priv); } - UNLOCK (&priv->lock); + } + UNLOCK(&priv->lock); out: - return ret; + return ret; } struct xlator_fops fops = { - /* Barrier Class fops */ - .rmdir = barrier_rmdir, - .unlink = barrier_unlink, - .rename = barrier_rename, - .removexattr = barrier_removexattr, - .fremovexattr = barrier_fremovexattr, - .truncate = barrier_truncate, - .ftruncate = barrier_ftruncate, - .fsync = barrier_fsync, - - /* Writes with only O_SYNC flag */ - .writev = barrier_writev, + /* Barrier Class fops */ + .rmdir = barrier_rmdir, + .unlink = barrier_unlink, + .rename = barrier_rename, + .removexattr = barrier_removexattr, + .fremovexattr = barrier_fremovexattr, + .truncate = barrier_truncate, + .ftruncate = barrier_ftruncate, + .fsync = barrier_fsync, + + /* Writes with only O_SYNC flag */ + .writev = barrier_writev, }; struct xlator_dumpops dumpops = { - .priv = barrier_dump_priv, + .priv = barrier_dump_priv, }; struct xlator_cbks cbks; struct volume_options options[] = { - { .key = {"barrier"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "off", - .description = "When \"on\", blocks acknowledgements to application " - "for file operations such as rmdir, rename, unlink, " - "removexattr, fremovexattr, truncate, ftruncate, " - "write (with O_SYNC), fsync. It is turned \"off\" by " - "default." - }, - { .key = {"barrier-timeout"}, - .type = GF_OPTION_TYPE_TIME, - .default_value = BARRIER_TIMEOUT, - .description = "After 'timeout' seconds since the time 'barrier' " - "option was set to \"on\", acknowledgements to file " - "operations are no longer blocked and previously " - "blocked acknowledgements are sent to the application" - }, - { .key = {NULL} }, + {.key = {"barrier"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "disable", + .op_version = {GD_OP_VERSION_3_6_0}, + .flags = OPT_FLAG_SETTABLE, + .description = "When \"enabled\", blocks acknowledgements to application " + "for file operations such as rmdir, rename, unlink, " + "removexattr, fremovexattr, truncate, ftruncate, " + "write (with O_SYNC), fsync. It is turned \"off\" by " + "default."}, + {.key = {"barrier-timeout"}, + .type = GF_OPTION_TYPE_TIME, + .default_value = BARRIER_TIMEOUT, + .op_version = {GD_OP_VERSION_3_6_0}, + .flags = OPT_FLAG_SETTABLE, + .description = "After 'timeout' seconds since the time 'barrier' " + "option was set to \"on\", acknowledgements to file " + "operations are no longer blocked and previously " + "blocked acknowledgements are sent to the application"}, + {.key = {NULL}}, +}; + +xlator_api_t xlator_api = { + .init = init, + .fini = fini, + .notify = notify, + .reconfigure = reconfigure, + .mem_acct_init = mem_acct_init, + .op_version = {1}, /* Present from the initial version */ + .dumpops = &dumpops, + .fops = &fops, + .cbks = &cbks, + .options = options, + .identifier = "barrier", + .category = GF_MAINTAINED, }; diff --git a/xlators/features/barrier/src/barrier.h b/xlators/features/barrier/src/barrier.h index 0d646f90474..1337f311f7d 100644 --- a/xlators/features/barrier/src/barrier.h +++ b/xlators/features/barrier/src/barrier.h @@ -12,71 +12,78 @@ #define __BARRIER_H__ #include "barrier-mem-types.h" -#include "xlator.h" -#include "timer.h" -#include "call-stub.h" +#include <glusterfs/xlator.h> +#include <glusterfs/timer.h> +#include <glusterfs/call-stub.h> -#define BARRIER_FOP_CBK(fop_name, label, frame, this, params ...) \ - do { \ - barrier_priv_t *_priv = NULL; \ - call_stub_t *_stub = NULL; \ - gf_boolean_t _barrier_enabled= _gf_false; \ - struct list_head queue = {0, }; \ - \ - INIT_LIST_HEAD (&queue); \ - \ - _priv = this->private; \ - GF_ASSERT (_priv); \ - \ - LOCK (&_priv->lock); \ - { \ - if (_priv->barrier_enabled) { \ - _barrier_enabled = _priv->barrier_enabled;\ - \ - _stub = fop_##fop_name##_cbk_stub \ - (frame, \ - barrier_##fop_name##_cbk_resume,\ - params); \ - if (!_stub) { \ - __barrier_disable (this, &queue);\ - goto unlock; \ - } \ - \ - __barrier_enqueue (this, _stub); \ - } \ - } \ -unlock: \ - UNLOCK (&_priv->lock); \ - \ - if (_stub) \ - goto label; \ - \ - if (_barrier_enabled && !_stub) { \ - gf_log (this->name, GF_LOG_CRITICAL, \ - "Failed to barrier FOPs, disabling " \ - "barrier. FOP: %s, ERROR: %s", \ - #fop_name, strerror (ENOMEM)); \ - barrier_dequeue_all (this, &queue); \ - } \ - barrier_local_free_gfid (frame); \ - STACK_UNWIND_STRICT (fop_name, frame, params); \ - goto label; \ - } while (0) +#define BARRIER_FOP_CBK(fop_name, label, frame, this, params...) \ + do { \ + barrier_priv_t *_priv = NULL; \ + call_stub_t *_stub = NULL; \ + gf_boolean_t _barrier_enabled = _gf_false; \ + struct list_head queue = { \ + 0, \ + }; \ + \ + INIT_LIST_HEAD(&queue); \ + \ + _priv = this->private; \ + GF_ASSERT(_priv); \ + \ + LOCK(&_priv->lock); \ + { \ + if (_priv->barrier_enabled) { \ + _barrier_enabled = _priv->barrier_enabled; \ + \ + _stub = fop_##fop_name##_cbk_stub( \ + frame, barrier_##fop_name##_cbk_resume, params); \ + if (!_stub) { \ + __barrier_disable(this, &queue); \ + goto unlock; \ + } \ + \ + __barrier_enqueue(this, _stub); \ + } \ + } \ + unlock: \ + UNLOCK(&_priv->lock); \ + \ + if (_stub) \ + goto label; \ + \ + if (_barrier_enabled && !_stub) { \ + gf_log(this->name, GF_LOG_CRITICAL, \ + "Failed to barrier FOPs, disabling " \ + "barrier. FOP: %s, ERROR: %s", \ + #fop_name, strerror(ENOMEM)); \ + barrier_dequeue_all(this, &queue); \ + } \ + barrier_local_free_gfid(frame); \ + STACK_UNWIND_STRICT(fop_name, frame, params); \ + goto label; \ + } while (0) typedef struct { - gf_timer_t *timer; - gf_boolean_t barrier_enabled; - gf_lock_t lock; - struct list_head queue; - struct timespec timeout; - uint32_t queue_size; + gf_timer_t *timer; + gf_lock_t lock; + struct list_head queue; + struct timespec timeout; + uint32_t queue_size; + gf_boolean_t barrier_enabled; + char _pad[3]; /* manual padding */ } barrier_priv_t; -int __barrier_enable (xlator_t *this, barrier_priv_t *priv); -void __barrier_enqueue (xlator_t *this, call_stub_t *stub); -void __barrier_disable (xlator_t *this, struct list_head *queue); -void barrier_timeout (void *data); -void barrier_dequeue_all (xlator_t *this, struct list_head *queue); -call_stub_t *__barrier_dequeue (xlator_t *this, struct list_head *queue); +int +__barrier_enable(xlator_t *this, barrier_priv_t *priv); +void +__barrier_enqueue(xlator_t *this, call_stub_t *stub); +void +__barrier_disable(xlator_t *this, struct list_head *queue); +void +barrier_timeout(void *data); +void +barrier_dequeue_all(xlator_t *this, struct list_head *queue); +call_stub_t * +__barrier_dequeue(xlator_t *this, struct list_head *queue); #endif diff --git a/xlators/features/bit-rot/src/bitd/Makefile.am b/xlators/features/bit-rot/src/bitd/Makefile.am index b338d3cdd8f..6db800e6565 100644 --- a/xlators/features/bit-rot/src/bitd/Makefile.am +++ b/xlators/features/bit-rot/src/bitd/Makefile.am @@ -1,19 +1,22 @@ +if WITH_SERVER xlator_LTLIBRARIES = bit-rot.la +endif xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features -bit_rot_la_LDFLAGS = $(GF_XLATOR_DEFAULT_LDFLAGS) +bit_rot_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ - -I$(top_srcdir)/rpc/xdr/src/ \ - -I$(top_srcdir)/rpc/rpc-lib/src \ - -I$(CONTRIBDIR)/timer-wheel \ - -I$(top_srcdir)/xlators/features/bit-rot/src/stub + -I$(top_srcdir)/rpc/xdr/src/ -I$(top_builddir)/rpc/xdr/src/ \ + -I$(top_srcdir)/rpc/rpc-lib/src -I$(CONTRIBDIR)/timer-wheel \ + -I$(top_srcdir)/xlators/features/bit-rot/src/stub -bit_rot_la_SOURCES = bit-rot.c bit-rot-scrub.c bit-rot-tbf.c bit-rot-ssm.c +bit_rot_la_SOURCES = bit-rot.c bit-rot-scrub.c bit-rot-ssm.c \ + bit-rot-scrub-status.c bit_rot_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ - $(top_builddir)/xlators/features/changelog/lib/src/libgfchangelog.la + $(top_builddir)/xlators/features/changelog/lib/src/libgfchangelog.la -noinst_HEADERS = bit-rot.h bit-rot-scrub.h bit-rot-tbf.h bit-rot-bitd-messages.h bit-rot-ssm.h +noinst_HEADERS = bit-rot.h bit-rot-scrub.h bit-rot-bitd-messages.h bit-rot-ssm.h \ + bit-rot-scrub-status.h AM_CFLAGS = -Wall -DBR_RATE_LIMIT_SIGNER $(GF_CFLAGS) diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h b/xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h index b4746bb5ecb..5bc5103a27c 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h +++ b/xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h @@ -11,408 +11,91 @@ #ifndef _BITROT_BITD_MESSAGES_H_ #define _BITROT_BITD_MESSAGES_H_ -#include "glfs-message-id.h" +#include <glusterfs/glfs-message-id.h> -/* file bit-rot-bitd-messages.h - * brief BIT-ROT log-message IDs and their descriptions - */ - -/* NOTE: Rules for message additions - * 1) Each instance of a message is _better_ left with a unique message ID, even - * if the message format is the same. Reasoning is that, if the message - * format needs to change in one instance, the other instances are not - * impacted or the new change does not change the ID of the instance being - * modified. - * 2) Addition of a message, - * - Should increment the GLFS_NUM_MESSAGES - * - Append to the list of messages defined, towards the end - * - Retain macro naming as glfs_msg_X (for redability across developers) - * NOTE: Rules for message format modifications - * 3) Check acorss the code if the message ID macro in question is reused - * anywhere. If reused then then the modifications should ensure correctness - * everywhere, or needs a new message ID as (1) above was not adhered to. If - * not used anywhere, proceed with the required modification. - * NOTE: Rules for message deletion - * 4) Check (3) and if used anywhere else, then cannot be deleted. If not used - * anywhere, then can be deleted, but will leave a hole by design, as - * addition rules specify modification to the end of the list and not filling - * holes. - */ - -#define GLFS_BITROT_BITD_BASE GLFS_MSGID_COMP_BITROT_BITD -#define GLFS_BITROT_BITD_NUM_MESSAGES 48 -#define GLFS_MSGID_END (GLFS_BITROT_BITD_BASE + \ - GLFS_BITROT_BITD_NUM_MESSAGES + 1) -/* Messaged with message IDs */ -#define glfs_msg_start_x GLFS_BITROT_BITD_BASE, "Invalid: Start of messages" -/*------------*/ - - -#define BRB_MSG_FD_CREATE_FAILED (GLFS_BITROT_BITD_BASE + 1) -/*! - * @messageid - * @diagnosis - * @recommendedaction +/* To add new message IDs, append new identifiers at the end of the list. * - */ - -#define BRB_MSG_READV_FAILED (GLFS_BITROT_BITD_BASE + 2) -/*! - * @messageid - * @diagnosis - * @recommendedaction + * Never remove a message ID. If it's not used anymore, you can rename it or + * leave it as it is, but not delete it. This is to prevent reutilization of + * IDs by other messages. * + * The component name must match one of the entries defined in + * glfs-message-id.h. */ -#define BRB_MSG_BLOCK_READ_FAILED (GLFS_BITROT_BITD_BASE + 3) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRB_MSG_CALC_CHECKSUM_FAILED (GLFS_BITROT_BITD_BASE + 4) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRB_MSG_NO_MEMORY (GLFS_BITROT_BITD_BASE + 5) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRB_MSG_GET_SIGN_FAILED (GLFS_BITROT_BITD_BASE + 6) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRB_MSG_SET_SIGN_FAILED (GLFS_BITROT_BITD_BASE + 7) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRB_MSG_OP_FAILED (GLFS_BITROT_BITD_BASE + 8) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRB_MSG_READ_AND_SIGN_FAILED (GLFS_BITROT_BITD_BASE + 9) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRB_MSG_SIGN_FAILED (GLFS_BITROT_BITD_BASE + 10) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRB_MSG_GET_SUBVOL_FAILED (GLFS_BITROT_BITD_BASE + 11) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRB_MSG_SET_TIMER_FAILED (GLFS_BITROT_BITD_BASE + 12) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRB_MSG_GET_INFO_FAILED (GLFS_BITROT_BITD_BASE + 13) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRB_MSG_PATH_FAILED (GLFS_BITROT_BITD_BASE + 14) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRB_MSG_MARK_BAD_FILE (GLFS_BITROT_BITD_BASE + 15) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRB_MSG_TRIGGER_SIGN (GLFS_BITROT_BITD_BASE + 16) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRB_MSG_REGISTER_FAILED (GLFS_BITROT_BITD_BASE + 17) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRB_MSG_CRAWLING_START (GLFS_BITROT_BITD_BASE + 18) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRB_MSG_SPAWN_FAILED (GLFS_BITROT_BITD_BASE + 19) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRB_MSG_INVALID_SUBVOL_CHILD (GLFS_BITROT_BITD_BASE + 20) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRB_MSG_SKIP_OBJECT (GLFS_BITROT_BITD_BASE + 21) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRB_MSG_NO_CHILD (GLFS_BITROT_BITD_BASE + 22) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRB_MSG_CHECKSUM_MISMATCH (GLFS_BITROT_BITD_BASE + 23) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRB_MSG_MARK_CORRUPTED (GLFS_BITROT_BITD_BASE + 24) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRB_MSG_CRAWLING_FINISH (GLFS_BITROT_BITD_BASE + 25) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRB_MSG_CALC_ERROR (GLFS_BITROT_BITD_BASE + 26) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRB_MSG_LOOKUP_FAILED (GLFS_BITROT_BITD_BASE + 27) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRB_MSG_PARTIAL_VERSION_PRESENCE (GLFS_BITROT_BITD_BASE + 28) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRB_MSG_MEM_ACNT_FAILED (GLFS_BITROT_BITD_BASE + 29) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRB_MSG_TIMER_WHEEL_UNAVAILABLE (GLFS_BITROT_BITD_BASE + 30) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRB_MSG_BITROT_LOADED (GLFS_BITROT_BITD_BASE + 31) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRB_MSG_SCALE_DOWN_FAILED (GLFS_BITROT_BITD_BASE + 32) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRB_MSG_SCALE_UP_FAILED (GLFS_BITROT_BITD_BASE + 33) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRB_MSG_SCALE_DOWN_SCRUBBER (GLFS_BITROT_BITD_BASE + 34) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRB_MSG_SCALING_UP_SCRUBBER (GLFS_BITROT_BITD_BASE + 35) -/*! - * @messageid - * @diagnosis - * @recommendedaction - */ -#define BRB_MSG_UNKNOWN_THROTTLE (GLFS_BITROT_BITD_BASE + 36) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRB_MSG_RATE_LIMIT_INFO (GLFS_BITROT_BITD_BASE + 37) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRB_MSG_SCRUB_INFO (GLFS_BITROT_BITD_BASE + 38) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRB_MSG_CONNECTED_TO_BRICK (GLFS_BITROT_BITD_BASE + 39) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRB_MSG_BRICK_INFO (GLFS_BITROT_BITD_BASE + 40) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRB_MSG_SUBVOL_CONNECT_FAILED (GLFS_BITROT_BITD_BASE + 41) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRB_MSG_INVALID_SUBVOL (GLFS_BITROT_BITD_BASE + 42) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRB_MSG_RESCHEDULE_SCRUBBER_FAILED (GLFS_BITROT_BITD_BASE + 43) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ - -#define BRB_MSG_SCRUB_START (GLFS_BITROT_BITD_BASE + 44) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRB_MSG_SCRUB_FINISH (GLFS_BITROT_BITD_BASE + 45) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRB_MSG_SCRUB_RUNNING (GLFS_BITROT_BITD_BASE + 46) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRB_MSG_SCRUB_RESCHEDULED (GLFS_BITROT_BITD_BASE + 47) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRB_MSG_SCRUB_TUNABLE (GLFS_BITROT_BITD_BASE + 48) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -/*------------*/ -#define BRB_MSG_SCRUB_THREAD_CLEANUP (GLFS_BITROT_BITD_BASE + 49) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -/*------------*/ -#define BRB_MSG_SCRUBBER_CLEANED (GLFS_BITROT_BITD_BASE + 50) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -/*------------*/ -#define BRB_MSG_GENERIC_SSM_INFO (GLFS_BITROT_BITD_BASE + 51) - -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -/*------------*/ -#define BRB_MSG_ZERO_TIMEOUT_BUG (GLFS_BITROT_BITD_BASE + 52) +GLFS_MSGID(BITROT_BITD, BRB_MSG_FD_CREATE_FAILED, BRB_MSG_READV_FAILED, + BRB_MSG_BLOCK_READ_FAILED, BRB_MSG_CALC_CHECKSUM_FAILED, + BRB_MSG_NO_MEMORY, BRB_MSG_GET_SIGN_FAILED, BRB_MSG_SET_SIGN_FAILED, + BRB_MSG_OP_FAILED, BRB_MSG_READ_AND_SIGN_FAILED, BRB_MSG_SIGN_FAILED, + BRB_MSG_GET_SUBVOL_FAILED, BRB_MSG_SET_TIMER_FAILED, + BRB_MSG_GET_INFO_FAILED, BRB_MSG_PATH_FAILED, BRB_MSG_MARK_BAD_FILE, + BRB_MSG_TRIGGER_SIGN, BRB_MSG_REGISTER_FAILED, + BRB_MSG_CRAWLING_START, BRB_MSG_SPAWN_FAILED, + BRB_MSG_INVALID_SUBVOL_CHILD, BRB_MSG_SKIP_OBJECT, BRB_MSG_NO_CHILD, + BRB_MSG_CHECKSUM_MISMATCH, BRB_MSG_MARK_CORRUPTED, + BRB_MSG_CRAWLING_FINISH, BRB_MSG_CALC_ERROR, BRB_MSG_LOOKUP_FAILED, + BRB_MSG_PARTIAL_VERSION_PRESENCE, BRB_MSG_MEM_ACNT_FAILED, + BRB_MSG_TIMER_WHEEL_UNAVAILABLE, BRB_MSG_BITROT_LOADED, + BRB_MSG_SCALE_DOWN_FAILED, BRB_MSG_SCALE_UP_FAILED, + BRB_MSG_SCALE_DOWN_SCRUBBER, BRB_MSG_SCALING_UP_SCRUBBER, + BRB_MSG_UNKNOWN_THROTTLE, BRB_MSG_RATE_LIMIT_INFO, + BRB_MSG_SCRUB_INFO, BRB_MSG_CONNECTED_TO_BRICK, BRB_MSG_BRICK_INFO, + BRB_MSG_SUBVOL_CONNECT_FAILED, BRB_MSG_INVALID_SUBVOL, + BRB_MSG_RESCHEDULE_SCRUBBER_FAILED, BRB_MSG_SCRUB_START, + BRB_MSG_SCRUB_FINISH, BRB_MSG_SCRUB_RUNNING, + BRB_MSG_SCRUB_RESCHEDULED, BRB_MSG_SCRUB_TUNABLE, + BRB_MSG_SCRUB_THREAD_CLEANUP, BRB_MSG_SCRUBBER_CLEANED, + BRB_MSG_GENERIC_SSM_INFO, BRB_MSG_ZERO_TIMEOUT_BUG, + BRB_MSG_BAD_OBJ_READDIR_FAIL, BRB_MSG_SSM_FAILED, + BRB_MSG_SCRUB_WAIT_FAILED, BRB_MSG_TRIGGER_SIGN_FAILED, + BRB_MSG_EVENT_UNHANDLED, BRB_MSG_COULD_NOT_SCHEDULE_SCRUB, + BRB_MSG_THREAD_CREATION_FAILED, BRB_MSG_MEM_POOL_ALLOC, + BRB_MSG_SAVING_HASH_FAILED); -#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages" +#define BRB_MSG_FD_CREATE_FAILED_STR "failed to create fd for the inode" +#define BRB_MSG_READV_FAILED_STR "readv failed" +#define BRB_MSG_BLOCK_READ_FAILED_STR "reading block failed" +#define BRB_MSG_NO_MEMORY_STR "failed to allocate memory" +#define BRB_MSG_CALC_CHECKSUM_FAILED_STR "calculating checksum failed" +#define BRB_MSG_GET_SIGN_FAILED_STR "failed to get the signature" +#define BRB_MSG_SET_SIGN_FAILED_STR "signing failed" +#define BRB_MSG_OP_FAILED_STR "failed on object" +#define BRB_MSG_TRIGGER_SIGN_FAILED_STR "Could not trigger signing" +#define BRB_MSG_READ_AND_SIGN_FAILED_STR "reading and signing of object failed" +#define BRB_MSG_SET_TIMER_FAILED_STR "Failed to allocate object expiry timer" +#define BRB_MSG_GET_SUBVOL_FAILED_STR \ + "failed to get the subvolume for the brick" +#define BRB_MSG_PATH_FAILED_STR "path failed" +#define BRB_MSG_SKIP_OBJECT_STR "Entry is marked corrupted. skipping" +#define BRB_MSG_PARTIAL_VERSION_PRESENCE_STR \ + "PArtial version xattr presence detected, ignoring" +#define BRB_MSG_TRIGGER_SIGN_STR "Triggering signing" +#define BRB_MSG_CRAWLING_START_STR \ + "Crawling brick, scanning for unsigned objects" +#define BRB_MSG_CRAWLING_FINISH_STR "Completed crawling brick" +#define BRB_MSG_REGISTER_FAILED_STR "Register to changelog failed" +#define BRB_MSG_SPAWN_FAILED_STR "failed to spawn" +#define BRB_MSG_CONNECTED_TO_BRICK_STR "Connected to brick" +#define BRB_MSG_LOOKUP_FAILED_STR "lookup on root failed" +#define BRB_MSG_GET_INFO_FAILED_STR "failed to get stub info" +#define BRB_MSG_SCRUB_THREAD_CLEANUP_STR "Error cleaning up scanner thread" +#define BRB_MSG_SCRUBBER_CLEANED_STR "clened up scrubber for brick" +#define BRB_MSG_SUBVOL_CONNECT_FAILED_STR \ + "callback handler for subvolume failed" +#define BRB_MSG_MEM_ACNT_FAILED_STR "Memory accounting init failed" +#define BRB_MSG_EVENT_UNHANDLED_STR "Event unhandled for child" +#define BRB_MSG_INVALID_SUBVOL_STR "Got event from invalid subvolume" +#define BRB_MSG_RESCHEDULE_SCRUBBER_FAILED_STR \ + "on demand scrub schedule failed. Scrubber is not in pending state." +#define BRB_MSG_COULD_NOT_SCHEDULE_SCRUB_STR \ + "Could not schedule ondemand scrubbing. Scrubbing will continue " \ + "according to old frequency." +#define BRB_MSG_THREAD_CREATION_FAILED_STR "thread creation failed" +#define BRB_MSG_RATE_LIMIT_INFO_STR "Rate Limit Info" +#define BRB_MSG_MEM_POOL_ALLOC_STR "failed to allocate mem-pool for timer" +#define BRB_MSG_NO_CHILD_STR "FATAL: no children" +#define BRB_MSG_TIMER_WHEEL_UNAVAILABLE_STR "global timer wheel unavailable" +#define BRB_MSG_BITROT_LOADED_STR "bit-rot xlator loaded" +#define BRB_MSG_SAVING_HASH_FAILED_STR \ + "failed to allocate memory for saving hash of the object" #endif /* !_BITROT_BITD_MESSAGES_H_ */ diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c new file mode 100644 index 00000000000..5cef2ffa5e5 --- /dev/null +++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c @@ -0,0 +1,78 @@ +/* + Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include <string.h> +#include <stdio.h> + +#include "bit-rot-scrub-status.h" + +void +br_inc_unsigned_file_count(br_scrub_stats_t *scrub_stat) +{ + if (!scrub_stat) + return; + + pthread_mutex_lock(&scrub_stat->lock); + { + scrub_stat->unsigned_files++; + } + pthread_mutex_unlock(&scrub_stat->lock); +} + +void +br_inc_scrubbed_file(br_scrub_stats_t *scrub_stat) +{ + if (!scrub_stat) + return; + + pthread_mutex_lock(&scrub_stat->lock); + { + scrub_stat->scrubbed_files++; + } + pthread_mutex_unlock(&scrub_stat->lock); +} + +void +br_update_scrub_start_time(br_scrub_stats_t *scrub_stat, time_t time) +{ + if (!scrub_stat) + return; + + pthread_mutex_lock(&scrub_stat->lock); + { + scrub_stat->scrub_start_time = time; + } + pthread_mutex_unlock(&scrub_stat->lock); +} + +void +br_update_scrub_finish_time(br_scrub_stats_t *scrub_stat, char *timestr, + time_t time) +{ + int lst_size = 0; + + if (!scrub_stat) + return; + + lst_size = sizeof(scrub_stat->last_scrub_time); + if (strlen(timestr) >= lst_size) + return; + + pthread_mutex_lock(&scrub_stat->lock); + { + scrub_stat->scrub_end_time = time; + + scrub_stat->scrub_duration = scrub_stat->scrub_end_time - + scrub_stat->scrub_start_time; + + snprintf(scrub_stat->last_scrub_time, lst_size, "%s", timestr); + } + pthread_mutex_unlock(&scrub_stat->lock); +} diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h new file mode 100644 index 00000000000..f022aa831eb --- /dev/null +++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h @@ -0,0 +1,50 @@ +/* + Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __BIT_ROT_SCRUB_STATUS_H__ +#define __BIT_ROT_SCRUB_STATUS_H__ + +#include <stdint.h> +#include <sys/time.h> +#include <pthread.h> + +#include <glusterfs/common-utils.h> + +struct br_scrub_stats { + uint64_t scrubbed_files; /* Total number of scrubbed files. */ + + uint64_t unsigned_files; /* Total number of unsigned files. */ + + uint64_t scrub_duration; /* Duration of last scrub. */ + + char last_scrub_time[GF_TIMESTR_SIZE]; /* Last scrub completion time. */ + + time_t scrub_start_time; /* Scrubbing starting time. */ + + time_t scrub_end_time; /* Scrubbing finishing time. */ + + int8_t scrub_running; /* Whether scrub running or not. */ + + pthread_mutex_t lock; +}; + +typedef struct br_scrub_stats br_scrub_stats_t; + +void +br_inc_unsigned_file_count(br_scrub_stats_t *scrub_stat); +void +br_inc_scrubbed_file(br_scrub_stats_t *scrub_stat); +void +br_update_scrub_start_time(br_scrub_stats_t *scrub_stat, time_t time); +void +br_update_scrub_finish_time(br_scrub_stats_t *scrub_stat, char *timestr, + time_t time); + +#endif /* __BIT_ROT_SCRUB_STATUS_H__ */ diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c index 4a74bbf7eef..289dd53f610 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c +++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c @@ -12,30 +12,32 @@ #include <ctype.h> #include <sys/uio.h> -#include "glusterfs.h" -#include "logging.h" -#include "common-utils.h" +#include <glusterfs/glusterfs.h> +#include <glusterfs/logging.h> +#include <glusterfs/common-utils.h> #include "bit-rot-scrub.h" #include <pthread.h> #include "bit-rot-bitd-messages.h" +#include "bit-rot-scrub-status.h" +#include <glusterfs/events.h> struct br_scrubbers { - pthread_t scrubthread; + pthread_t scrubthread; - struct list_head list; + struct list_head list; }; struct br_fsscan_entry { - void *data; + void *data; - loc_t parent; + loc_t parent; - gf_dirent_t *entry; + gf_dirent_t *entry; - struct br_scanfs *fsscan; /* backpointer to subvolume scanner */ + struct br_scanfs *fsscan; /* backpointer to subvolume scanner */ - struct list_head list; + struct list_head list; }; /** @@ -44,34 +46,32 @@ struct br_fsscan_entry { * to the dictionary value. */ static int32_t -bitd_fetch_signature (xlator_t *this, br_child_t *child, - fd_t *fd, dict_t **xattr, br_isignature_out_t **sign) +bitd_fetch_signature(xlator_t *this, br_child_t *child, fd_t *fd, + dict_t **xattr, br_isignature_out_t **sign) { - int32_t ret = -1; - - ret = syncop_fgetxattr (child->xl, fd, xattr, - GLUSTERFS_GET_OBJECT_SIGNATURE, NULL, NULL); - if (ret < 0) { - br_log_object (this, "fgetxattr", fd->inode->gfid, -ret); - goto out; - } - - ret = dict_get_ptr - (*xattr, GLUSTERFS_GET_OBJECT_SIGNATURE, (void **) sign); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_SIGN_FAILED, - "failed to extract signature info [GFID: %s]", - uuid_utoa (fd->inode->gfid)); - goto unref_dict; - } - - return 0; - - unref_dict: - dict_unref (*xattr); - out: - return -1; - + int32_t ret = -1; + + ret = syncop_fgetxattr(child->xl, fd, xattr, GLUSTERFS_GET_OBJECT_SIGNATURE, + NULL, NULL); + if (ret < 0) { + br_log_object(this, "fgetxattr", fd->inode->gfid, -ret); + goto out; + } + + ret = dict_get_ptr(*xattr, GLUSTERFS_GET_OBJECT_SIGNATURE, (void **)sign); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_SIGN_FAILED, + "failed to extract signature info [GFID: %s]", + uuid_utoa(fd->inode->gfid)); + goto unref_dict; + } + + return 0; + +unref_dict: + dict_unref(*xattr); +out: + return -1; } /** @@ -84,77 +84,87 @@ bitd_fetch_signature (xlator_t *this, br_child_t *child, */ int32_t -bitd_scrub_post_compute_check (xlator_t *this, - br_child_t *child, - fd_t *fd, unsigned long version, - br_isignature_out_t **signature) +bitd_scrub_post_compute_check(xlator_t *this, br_child_t *child, fd_t *fd, + unsigned long version, + br_isignature_out_t **signature, + br_scrub_stats_t *scrub_stat, + gf_boolean_t skip_stat) { - int32_t ret = 0; - size_t signlen = 0; - dict_t *xattr = NULL; - br_isignature_out_t *signptr = NULL; - - ret = bitd_fetch_signature (this, child, fd, &xattr, &signptr); - if (ret < 0) - goto out; - - /** - * Either the object got dirtied during the time the signature was - * calculated OR the version we saved during pre-compute check does - * not match now, implying that the object got dirtied and signed in - * between scrubs pre & post compute checks (checksum window). - * - * The log entry looks pretty ugly, but helps in debugging.. - */ - if (signptr->stale || (signptr->version != version)) { - gf_msg_debug (this->name, 0, "<STAGE: POST> Object [GFID: %s] " - "either has a stale signature OR underwent " - "signing during checksumming {Stale: %d | " - "Version: %lu,%lu}", uuid_utoa (fd->inode->gfid), - (signptr->stale) ? 1 : 0, version, - signptr->version); - ret = -1; - goto unref_dict; - } - - signlen = signptr->signaturelen; - *signature = GF_CALLOC (1, sizeof (br_isignature_out_t) + signlen, - gf_common_mt_char); - - (void) memcpy (*signature, signptr, - sizeof (br_isignature_out_t) + signlen); - - unref_dict: - dict_unref (xattr); - out: - return ret; - + int32_t ret = 0; + size_t signlen = 0; + dict_t *xattr = NULL; + br_isignature_out_t *signptr = NULL; + + ret = bitd_fetch_signature(this, child, fd, &xattr, &signptr); + if (ret < 0) { + if (!skip_stat) + br_inc_unsigned_file_count(scrub_stat); + goto out; + } + + /** + * Either the object got dirtied during the time the signature was + * calculated OR the version we saved during pre-compute check does + * not match now, implying that the object got dirtied and signed in + * between scrubs pre & post compute checks (checksum window). + * + * The log entry looks pretty ugly, but helps in debugging.. + */ + if (signptr->stale || (signptr->version != version)) { + if (!skip_stat) + br_inc_unsigned_file_count(scrub_stat); + gf_msg_debug(this->name, 0, + "<STAGE: POST> Object [GFID: %s] " + "either has a stale signature OR underwent " + "signing during checksumming {Stale: %d | " + "Version: %lu,%lu}", + uuid_utoa(fd->inode->gfid), (signptr->stale) ? 1 : 0, + version, signptr->version); + ret = -1; + goto unref_dict; + } + + signlen = signptr->signaturelen; + *signature = GF_MALLOC(sizeof(br_isignature_out_t) + signlen, + gf_common_mt_char); + + (void)memcpy(*signature, signptr, sizeof(br_isignature_out_t) + signlen); + + (*signature)->signaturelen = signlen; + +unref_dict: + dict_unref(xattr); +out: + return ret; } static int32_t -bitd_signature_staleness (xlator_t *this, - br_child_t *child, fd_t *fd, - int *stale, unsigned long *version) +bitd_signature_staleness(xlator_t *this, br_child_t *child, fd_t *fd, + int *stale, unsigned long *version, + br_scrub_stats_t *scrub_stat, gf_boolean_t skip_stat) { - int32_t ret = -1; - dict_t *xattr = NULL; - br_isignature_out_t *signptr = NULL; - - ret = bitd_fetch_signature (this, child, fd, &xattr, &signptr); - if (ret < 0) - goto out; - - /** - * save verison for validation in post compute stage - * c.f. bitd_scrub_post_compute_check() - */ - *stale = signptr->stale ? 1 : 0; - *version = signptr->version; - - dict_unref (xattr); - - out: - return ret; + int32_t ret = -1; + dict_t *xattr = NULL; + br_isignature_out_t *signptr = NULL; + + ret = bitd_fetch_signature(this, child, fd, &xattr, &signptr); + if (ret < 0) { + if (!skip_stat) + br_inc_unsigned_file_count(scrub_stat); + goto out; + } + + /** + * save version for validation in post compute stage + * c.f. bitd_scrub_post_compute_check() + */ + *stale = signptr->stale ? 1 : 0; + *version = signptr->version; + + dict_unref(xattr); + +out: + return ret; } /** @@ -166,94 +176,102 @@ bitd_signature_staleness (xlator_t *this, * - has stale signature */ int32_t -bitd_scrub_pre_compute_check (xlator_t *this, br_child_t *child, - fd_t *fd, unsigned long *version) +bitd_scrub_pre_compute_check(xlator_t *this, br_child_t *child, fd_t *fd, + unsigned long *version, + br_scrub_stats_t *scrub_stat, + gf_boolean_t skip_stat) { - int stale = 0; - int32_t ret = -1; - - if (bitd_is_bad_file (this, child, NULL, fd)) { - gf_msg (this->name, GF_LOG_WARNING, 0, BRB_MSG_SKIP_OBJECT, - "Object [GFID: %s] is marked corrupted, skipping..", - uuid_utoa (fd->inode->gfid)); - goto out; - } - - ret = bitd_signature_staleness (this, child, fd, &stale, version); - if (!ret && stale) { - gf_msg_debug (this->name, 0, "<STAGE: PRE> Object [GFID: %s] " - "has stale signature", - uuid_utoa (fd->inode->gfid)); - ret = -1; - } - - out: - return ret; + int stale = 0; + int32_t ret = -1; + + if (bitd_is_bad_file(this, child, NULL, fd)) { + gf_msg(this->name, GF_LOG_WARNING, 0, BRB_MSG_SKIP_OBJECT, + "Object [GFID: %s] is marked corrupted, skipping..", + uuid_utoa(fd->inode->gfid)); + goto out; + } + + ret = bitd_signature_staleness(this, child, fd, &stale, version, scrub_stat, + skip_stat); + if (!ret && stale) { + if (!skip_stat) + br_inc_unsigned_file_count(scrub_stat); + gf_msg_debug(this->name, 0, + "<STAGE: PRE> Object [GFID: %s] " + "has stale signature", + uuid_utoa(fd->inode->gfid)); + ret = -1; + } + +out: + return ret; } /* static int */ int -bitd_compare_ckum (xlator_t *this, - br_isignature_out_t *sign, - unsigned char *md, inode_t *linked_inode, - gf_dirent_t *entry, fd_t *fd, br_child_t *child, loc_t *loc) +bitd_compare_ckum(xlator_t *this, br_isignature_out_t *sign, unsigned char *md, + inode_t *linked_inode, gf_dirent_t *entry, fd_t *fd, + br_child_t *child, loc_t *loc) { - int ret = -1; - dict_t *xattr = NULL; - - GF_VALIDATE_OR_GOTO ("bit-rot", this, out); - GF_VALIDATE_OR_GOTO (this->name, sign, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - GF_VALIDATE_OR_GOTO (this->name, child, out); - GF_VALIDATE_OR_GOTO (this->name, linked_inode, out); - GF_VALIDATE_OR_GOTO (this->name, md, out); - GF_VALIDATE_OR_GOTO (this->name, entry, out); - - if (strncmp - (sign->signature, (char *) md, strlen (sign->signature)) == 0) { - gf_msg_debug (this->name, 0, "%s [GFID: %s | Brick: %s] " - "matches calculated checksum", loc->path, - uuid_utoa (linked_inode->gfid), - child->brick_path); - return 0; - } - - gf_msg (this->name, GF_LOG_DEBUG, 0, BRB_MSG_CHECKSUM_MISMATCH, - "Object checksum mismatch: %s [GFID: %s | Brick: %s]", - loc->path, uuid_utoa (linked_inode->gfid), child->brick_path); - gf_msg (this->name, GF_LOG_ALERT, 0, BRB_MSG_CHECKSUM_MISMATCH, - "CORRUPTION DETECTED: Object %s {Brick: %s | GFID: %s}", - loc->path, child->brick_path, uuid_utoa (linked_inode->gfid)); - - /* Perform bad-file marking */ - xattr = dict_new (); - if (!xattr) { - ret = -1; - goto out; - } - - ret = dict_set_int32 (xattr, BITROT_OBJECT_BAD_KEY, _gf_true); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, BRB_MSG_MARK_BAD_FILE, - "Error setting bad-file marker for %s [GFID: %s | " - "Brick: %s]", loc->path, uuid_utoa (linked_inode->gfid), - child->brick_path); - goto dictfree; - } - - gf_msg (this->name, GF_LOG_ALERT, 0, BRB_MSG_MARK_CORRUPTED, "Marking" - " %s [GFID: %s | Brick: %s] as corrupted..", loc->path, - uuid_utoa (linked_inode->gfid), child->brick_path); - ret = syncop_fsetxattr (child->xl, fd, xattr, 0, NULL, NULL); - if (ret) - gf_msg (this->name, GF_LOG_ERROR, 0, BRB_MSG_MARK_BAD_FILE, - "Error marking object %s [GFID: %s] as corrupted", - loc->path, uuid_utoa (linked_inode->gfid)); - - dictfree: - dict_unref (xattr); - out: - return ret; + int ret = -1; + dict_t *xattr = NULL; + + GF_VALIDATE_OR_GOTO("bit-rot", this, out); + GF_VALIDATE_OR_GOTO(this->name, sign, out); + GF_VALIDATE_OR_GOTO(this->name, fd, out); + GF_VALIDATE_OR_GOTO(this->name, child, out); + GF_VALIDATE_OR_GOTO(this->name, linked_inode, out); + GF_VALIDATE_OR_GOTO(this->name, md, out); + GF_VALIDATE_OR_GOTO(this->name, entry, out); + + if (strncmp(sign->signature, (char *)md, sign->signaturelen) == 0) { + gf_msg_debug(this->name, 0, + "%s [GFID: %s | Brick: %s] " + "matches calculated checksum", + loc->path, uuid_utoa(linked_inode->gfid), + child->brick_path); + return 0; + } + + gf_msg(this->name, GF_LOG_DEBUG, 0, BRB_MSG_CHECKSUM_MISMATCH, + "Object checksum mismatch: %s [GFID: %s | Brick: %s]", loc->path, + uuid_utoa(linked_inode->gfid), child->brick_path); + gf_msg(this->name, GF_LOG_ALERT, 0, BRB_MSG_CHECKSUM_MISMATCH, + "CORRUPTION DETECTED: Object %s {Brick: %s | GFID: %s}", loc->path, + child->brick_path, uuid_utoa(linked_inode->gfid)); + + /* Perform bad-file marking */ + xattr = dict_new(); + if (!xattr) { + ret = -1; + goto out; + } + + ret = dict_set_int32(xattr, BITROT_OBJECT_BAD_KEY, _gf_true); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_MARK_BAD_FILE, + "Error setting bad-file marker for %s [GFID: %s | " + "Brick: %s]", + loc->path, uuid_utoa(linked_inode->gfid), child->brick_path); + goto dictfree; + } + + gf_msg(this->name, GF_LOG_ALERT, 0, BRB_MSG_MARK_CORRUPTED, + "Marking" + " %s [GFID: %s | Brick: %s] as corrupted..", + loc->path, uuid_utoa(linked_inode->gfid), child->brick_path); + gf_event(EVENT_BITROT_BAD_FILE, "gfid=%s;path=%s;brick=%s", + uuid_utoa(linked_inode->gfid), loc->path, child->brick_path); + ret = syncop_fsetxattr(child->xl, fd, xattr, 0, NULL, NULL); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_MARK_BAD_FILE, + "Error marking object %s [GFID: %s] as corrupted", loc->path, + uuid_utoa(linked_inode->gfid)); + +dictfree: + dict_unref(xattr); +out: + return ret; } /** @@ -264,400 +282,550 @@ bitd_compare_ckum (xlator_t *this, * signs with SHA256). */ int -br_scrubber_scrub_begin (xlator_t *this, struct br_fsscan_entry *fsentry) +br_scrubber_scrub_begin(xlator_t *this, struct br_fsscan_entry *fsentry) { - int32_t ret = -1; - fd_t *fd = NULL; - loc_t loc = {0, }; - struct iatt iatt = {0, }; - struct iatt parent_buf = {0, }; - pid_t pid = 0; - br_child_t *child = NULL; - unsigned char *md = NULL; - inode_t *linked_inode = NULL; - br_isignature_out_t *sign = NULL; - unsigned long signedversion = 0; - gf_dirent_t *entry = NULL; - loc_t *parent = NULL; - - GF_VALIDATE_OR_GOTO ("bit-rot", fsentry, out); - - entry = fsentry->entry; - parent = &fsentry->parent; - child = fsentry->data; - - GF_VALIDATE_OR_GOTO ("bit-rot", entry, out); - GF_VALIDATE_OR_GOTO ("bit-rot", parent, out); - GF_VALIDATE_OR_GOTO ("bit-rot", child, out); - - pid = GF_CLIENT_PID_SCRUB; - - ret = br_prepare_loc (this, child, parent, entry, &loc); - if (!ret) - goto out; - - syncopctx_setfspid (&pid); - - ret = syncop_lookup (child->xl, &loc, &iatt, &parent_buf, NULL, NULL); - if (ret) { - br_log_object_path (this, "lookup", loc.path, -ret); - goto out; - } - - linked_inode = inode_link (loc.inode, parent->inode, loc.name, &iatt); - if (linked_inode) - inode_lookup (linked_inode); - - gf_msg_debug (this->name, 0, "Scrubbing object %s [GFID: %s]", - entry->d_name, uuid_utoa (linked_inode->gfid)); - - if (iatt.ia_type != IA_IFREG) { - gf_msg_debug (this->name, 0, "%s is not a regular file", - entry->d_name); - ret = 0; - goto unref_inode; - } - - /** - * open() an fd for subsequent opertaions - */ - fd = fd_create (linked_inode, 0); - if (!fd) { - gf_msg (this->name, GF_LOG_ERROR, 0, BRB_MSG_FD_CREATE_FAILED, - "failed to create fd for inode %s", - uuid_utoa (linked_inode->gfid)); - goto unref_inode; - } - - ret = syncop_open (child->xl, &loc, O_RDWR, fd, NULL, NULL); - if (ret) { - br_log_object (this, "open", linked_inode->gfid, -ret); - ret = -1; - goto unrefd; - } - - fd_bind (fd); - - /** - * perform pre compute checks before initiating checksum - * computation - * - presence of bad object - * - signature staleness - */ - ret = bitd_scrub_pre_compute_check (this, child, fd, &signedversion); - if (ret) - goto unrefd; /* skip this object */ - - /* if all's good, proceed to calculate the hash */ - md = GF_CALLOC (SHA256_DIGEST_LENGTH, sizeof (*md), - gf_common_mt_char); - if (!md) - goto unrefd; - - ret = br_calculate_obj_checksum (md, child, fd, &iatt); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, BRB_MSG_CALC_ERROR, - "error calculating hash for object [GFID: %s]", - uuid_utoa (fd->inode->gfid)); - ret = -1; - goto free_md; - } - - /** - * perform post compute checks as an object's signature may have - * become stale while scrubber calculated checksum. - */ - ret = bitd_scrub_post_compute_check (this, child, - fd, signedversion, &sign); - if (ret) - goto free_md; - - ret = bitd_compare_ckum (this, sign, md, - linked_inode, entry, fd, child, &loc); - - GF_FREE (sign); /* alloced on post-compute */ - - /** fd_unref() takes care of closing fd.. like syncop_close() */ - - free_md: - GF_FREE (md); - unrefd: - fd_unref (fd); - unref_inode: - inode_unref (linked_inode); - out: - loc_wipe (&loc); - return ret; + int32_t ret = -1; + fd_t *fd = NULL; + loc_t loc = { + 0, + }; + struct iatt iatt = { + 0, + }; + struct iatt parent_buf = { + 0, + }; + pid_t pid = 0; + br_child_t *child = NULL; + unsigned char *md = NULL; + inode_t *linked_inode = NULL; + br_isignature_out_t *sign = NULL; + unsigned long signedversion = 0; + gf_dirent_t *entry = NULL; + br_private_t *priv = NULL; + loc_t *parent = NULL; + gf_boolean_t skip_stat = _gf_false; + uuid_t shard_root_gfid = { + 0, + }; + + GF_VALIDATE_OR_GOTO("bit-rot", fsentry, out); + + entry = fsentry->entry; + parent = &fsentry->parent; + child = fsentry->data; + + priv = this->private; + + GF_VALIDATE_OR_GOTO("bit-rot", entry, out); + GF_VALIDATE_OR_GOTO("bit-rot", parent, out); + GF_VALIDATE_OR_GOTO("bit-rot", child, out); + GF_VALIDATE_OR_GOTO("bit-rot", priv, out); + + pid = GF_CLIENT_PID_SCRUB; + + ret = br_prepare_loc(this, child, parent, entry, &loc); + if (!ret) + goto out; + + syncopctx_setfspid(&pid); + + ret = syncop_lookup(child->xl, &loc, &iatt, &parent_buf, NULL, NULL); + if (ret) { + br_log_object_path(this, "lookup", loc.path, -ret); + goto out; + } + + linked_inode = inode_link(loc.inode, parent->inode, loc.name, &iatt); + if (linked_inode) + inode_lookup(linked_inode); + + gf_msg_debug(this->name, 0, "Scrubbing object %s [GFID: %s]", entry->d_name, + uuid_utoa(linked_inode->gfid)); + + if (iatt.ia_type != IA_IFREG) { + gf_msg_debug(this->name, 0, "%s is not a regular file", entry->d_name); + ret = 0; + goto unref_inode; + } + + if (IS_DHT_LINKFILE_MODE((&iatt))) { + gf_msg_debug(this->name, 0, "%s is a dht sticky bit file", + entry->d_name); + ret = 0; + goto unref_inode; + } + + /* skip updating scrub statistics for shard entries */ + gf_uuid_parse(SHARD_ROOT_GFID, shard_root_gfid); + if (gf_uuid_compare(loc.pargfid, shard_root_gfid) == 0) + skip_stat = _gf_true; + + /** + * open() an fd for subsequent operations + */ + fd = fd_create(linked_inode, 0); + if (!fd) { + gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_FD_CREATE_FAILED, + "failed to create fd for inode %s", + uuid_utoa(linked_inode->gfid)); + goto unref_inode; + } + + ret = syncop_open(child->xl, &loc, O_RDWR, fd, NULL, NULL); + if (ret) { + br_log_object(this, "open", linked_inode->gfid, -ret); + ret = -1; + goto unrefd; + } + + fd_bind(fd); + + /** + * perform pre compute checks before initiating checksum + * computation + * - presence of bad object + * - signature staleness + */ + ret = bitd_scrub_pre_compute_check(this, child, fd, &signedversion, + &priv->scrub_stat, skip_stat); + if (ret) + goto unrefd; /* skip this object */ + + /* if all's good, proceed to calculate the hash */ + md = GF_MALLOC(SHA256_DIGEST_LENGTH, gf_common_mt_char); + if (!md) + goto unrefd; + + ret = br_calculate_obj_checksum(md, child, fd, &iatt); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_CALC_ERROR, + "error calculating hash for object [GFID: %s]", + uuid_utoa(fd->inode->gfid)); + ret = -1; + goto free_md; + } + + /** + * perform post compute checks as an object's signature may have + * become stale while scrubber calculated checksum. + */ + ret = bitd_scrub_post_compute_check(this, child, fd, signedversion, &sign, + &priv->scrub_stat, skip_stat); + if (ret) + goto free_md; + + ret = bitd_compare_ckum(this, sign, md, linked_inode, entry, fd, child, + &loc); + + if (!skip_stat) + br_inc_scrubbed_file(&priv->scrub_stat); + + GF_FREE(sign); /* allocated on post-compute */ + + /** fd_unref() takes care of closing fd.. like syncop_close() */ + +free_md: + GF_FREE(md); +unrefd: + fd_unref(fd); +unref_inode: + inode_unref(linked_inode); +out: + loc_wipe(&loc); + return ret; } static void -_br_lock_cleaner (void *arg) +_br_lock_cleaner(void *arg) { - pthread_mutex_t *mutex = arg; + pthread_mutex_t *mutex = arg; - pthread_mutex_unlock (mutex); + pthread_mutex_unlock(mutex); } static void -wait_for_scrubbing (xlator_t *this, struct br_scanfs *fsscan) +wait_for_scrubbing(xlator_t *this, struct br_scanfs *fsscan) { - br_private_t *priv = NULL; - struct br_scrubber *fsscrub = NULL; + br_private_t *priv = NULL; + struct br_scrubber *fsscrub = NULL; - priv = this->private; - fsscrub = &priv->fsscrub; + priv = this->private; + fsscrub = &priv->fsscrub; - pthread_cleanup_push (_br_lock_cleaner, &fsscan->waitlock); - pthread_mutex_lock (&fsscan->waitlock); + pthread_cleanup_push(_br_lock_cleaner, &fsscan->waitlock); + pthread_mutex_lock(&fsscan->waitlock); + { + pthread_cleanup_push(_br_lock_cleaner, &fsscrub->mutex); + pthread_mutex_lock(&fsscrub->mutex); { - pthread_cleanup_push (_br_lock_cleaner, &fsscrub->mutex); - pthread_mutex_lock (&fsscrub->mutex); - { - list_replace_init (&fsscan->queued, &fsscan->ready); - - /* wake up scrubbers */ - pthread_cond_broadcast (&fsscrub->cond); - } - pthread_mutex_unlock (&fsscrub->mutex); - pthread_cleanup_pop (0); - - while (fsscan->entries != 0) - pthread_cond_wait - (&fsscan->waitcond, &fsscan->waitlock); + list_replace_init(&fsscan->queued, &fsscan->ready); + + /* wake up scrubbers */ + pthread_cond_broadcast(&fsscrub->cond); } - pthread_mutex_unlock (&fsscan->waitlock); - pthread_cleanup_pop (0); + pthread_mutex_unlock(&fsscrub->mutex); + pthread_cleanup_pop(0); + + while (fsscan->entries != 0) + pthread_cond_wait(&fsscan->waitcond, &fsscan->waitlock); + } + pthread_mutex_unlock(&fsscan->waitlock); + pthread_cleanup_pop(0); } static void -_br_fsscan_inc_entry_count (struct br_scanfs *fsscan) +_br_fsscan_inc_entry_count(struct br_scanfs *fsscan) { - fsscan->entries++; + fsscan->entries++; } static void -_br_fsscan_dec_entry_count (struct br_scanfs *fsscan) +_br_fsscan_dec_entry_count(struct br_scanfs *fsscan) { - if (--fsscan->entries == 0) { - pthread_mutex_lock (&fsscan->waitlock); - { - pthread_cond_signal (&fsscan->waitcond); - } - pthread_mutex_unlock (&fsscan->waitlock); + if (--fsscan->entries == 0) { + pthread_mutex_lock(&fsscan->waitlock); + { + pthread_cond_signal(&fsscan->waitcond); } + pthread_mutex_unlock(&fsscan->waitlock); + } } static void -_br_fsscan_collect_entry (struct br_scanfs *fsscan, - struct br_fsscan_entry *fsentry) +_br_fsscan_collect_entry(struct br_scanfs *fsscan, + struct br_fsscan_entry *fsentry) { - list_add_tail (&fsentry->list, &fsscan->queued); - _br_fsscan_inc_entry_count (fsscan); + list_add_tail(&fsentry->list, &fsscan->queued); + _br_fsscan_inc_entry_count(fsscan); } -#define NR_ENTRIES (1<<7) /* ..bulk scrubbing */ +#define NR_ENTRIES (1 << 7) /* ..bulk scrubbing */ int -br_fsscanner_handle_entry (xlator_t *subvol, - gf_dirent_t *entry, loc_t *parent, void *data) +br_fsscanner_handle_entry(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, + void *data) { - int32_t ret = -1; - int scrub = 0; - br_child_t *child = NULL; - xlator_t *this = NULL; - struct br_scanfs *fsscan = NULL; - struct br_fsscan_entry *fsentry = NULL; + int32_t ret = -1; + int scrub = 0; + br_child_t *child = NULL; + xlator_t *this = NULL; + struct br_scanfs *fsscan = NULL; + struct br_fsscan_entry *fsentry = NULL; - GF_VALIDATE_OR_GOTO ("bit-rot", subvol, error_return); - GF_VALIDATE_OR_GOTO ("bit-rot", data, error_return); + GF_VALIDATE_OR_GOTO("bit-rot", subvol, error_return); + GF_VALIDATE_OR_GOTO("bit-rot", data, error_return); - child = data; - this = child->this; - fsscan = &child->fsscan; + child = data; + this = child->this; + fsscan = &child->fsscan; - _mask_cancellation (); + _mask_cancellation(); - fsentry = GF_CALLOC (1, sizeof (*fsentry), gf_br_mt_br_fsscan_entry_t); - if (!fsentry) - goto error_return; + fsentry = GF_CALLOC(1, sizeof(*fsentry), gf_br_mt_br_fsscan_entry_t); + if (!fsentry) + goto error_return; - { - fsentry->data = data; - fsentry->fsscan = &child->fsscan; + { + fsentry->data = data; + fsentry->fsscan = &child->fsscan; - /* copy parent loc */ - ret = loc_copy (&fsentry->parent, parent); - if (ret) - goto dealloc; + /* copy parent loc */ + ret = loc_copy(&fsentry->parent, parent); + if (ret) + goto dealloc; - /* copy child entry */ - fsentry->entry = entry_copy (entry); - if (!fsentry->entry) - goto locwipe; + /* copy child entry */ + fsentry->entry = entry_copy(entry); + if (!fsentry->entry) + goto locwipe; - INIT_LIST_HEAD (&fsentry->list); - } + INIT_LIST_HEAD(&fsentry->list); + } - LOCK (&fsscan->entrylock); - { - _br_fsscan_collect_entry (fsscan, fsentry); - - /** - * need not be a equality check as entries may be pushed - * back onto the scanned queue when thread(s) are cleaned. - */ - if (fsscan->entries >= NR_ENTRIES) - scrub = 1; - } - UNLOCK (&fsscan->entrylock); + LOCK(&fsscan->entrylock); + { + _br_fsscan_collect_entry(fsscan, fsentry); - _unmask_cancellation (); + /** + * need not be a equality check as entries may be pushed + * back onto the scanned queue when thread(s) are cleaned. + */ + if (fsscan->entries >= NR_ENTRIES) + scrub = 1; + } + UNLOCK(&fsscan->entrylock); - if (scrub) - wait_for_scrubbing (this, fsscan); + _unmask_cancellation(); - return 0; + if (scrub) + wait_for_scrubbing(this, fsscan); - locwipe: - loc_wipe (&fsentry->parent); - dealloc: - GF_FREE (fsentry); - error_return: - return -1; + return 0; + +locwipe: + loc_wipe(&fsentry->parent); +dealloc: + GF_FREE(fsentry); +error_return: + return -1; } int32_t -br_fsscan_deactivate (xlator_t *this, br_child_t *child) +br_fsscan_deactivate(xlator_t *this) { - int ret = 0; - br_private_t *priv = NULL; - br_scrub_state_t nstate = 0; - struct br_scanfs *fsscan = NULL; - - priv = this->private; - fsscan = &child->fsscan; - - ret = gf_tw_del_timer (priv->timer_wheel, fsscan->timer); - if (ret == 0) { - nstate = BR_SCRUB_STATE_STALLED; - gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO, - "Brick [%s] is under active scrubbing. Pausing scrub..", - child->brick_path); - } else { - nstate = BR_SCRUB_STATE_PAUSED; - gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO, - "Scrubber paused [Brick: %s]", child->brick_path); - } - - _br_child_set_scrub_state (child, nstate); - - return 0; + int ret = 0; + br_private_t *priv = NULL; + br_scrub_state_t nstate = 0; + struct br_monitor *scrub_monitor = NULL; + + priv = this->private; + scrub_monitor = &priv->scrub_monitor; + + ret = gf_tw_del_timer(priv->timer_wheel, scrub_monitor->timer); + if (ret == 0) { + nstate = BR_SCRUB_STATE_STALLED; + gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO, + "Volume is under active scrubbing. Pausing scrub.."); + } else { + nstate = BR_SCRUB_STATE_PAUSED; + gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO, + "Scrubber paused"); + } + + _br_monitor_set_scrub_state(scrub_monitor, nstate); + + return 0; } static void -br_fsscanner_log_time (xlator_t *this, br_child_t *child, const char *sfx) +br_scrubber_log_time(xlator_t *this, const char *sfx) { - struct timeval tv = {0,}; - char timestr[1024] = {0,}; + char timestr[GF_TIMESTR_SIZE] = { + 0, + }; + br_private_t *priv = NULL; + time_t now = 0; + + now = gf_time(); + priv = this->private; + + gf_time_fmt(timestr, sizeof(timestr), now, gf_timefmt_FT); + + if (strcasecmp(sfx, "started") == 0) { + br_update_scrub_start_time(&priv->scrub_stat, now); + gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_START, + "Scrubbing %s at %s", sfx, timestr); + } else { + br_update_scrub_finish_time(&priv->scrub_stat, timestr, now); + gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_FINISH, + "Scrubbing %s at %s", sfx, timestr); + } +} - gettimeofday (&tv, NULL); - gf_time_fmt (timestr, sizeof (timestr), tv.tv_sec, gf_timefmt_FT); +static void +br_fsscanner_log_time(xlator_t *this, br_child_t *child, const char *sfx) +{ + char timestr[GF_TIMESTR_SIZE] = { + 0, + }; + time_t now = 0; + + now = gf_time(); + gf_time_fmt(timestr, sizeof(timestr), now, gf_timefmt_FT); + + if (strcasecmp(sfx, "started") == 0) { + gf_msg_debug(this->name, 0, "Scrubbing \"%s\" %s at %s", + child->brick_path, sfx, timestr); + } else { + gf_msg_debug(this->name, 0, "Scrubbing \"%s\" %s at %s", + child->brick_path, sfx, timestr); + } +} - if (strcasecmp (sfx, "started") == 0) { - gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_START, - "Scrubbing \"%s\" %s at %s", child->brick_path, sfx, - timestr); - } else { - gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_FINISH, - "Scrubbing \"%s\" %s at %s", child->brick_path, sfx, - timestr); - } +void +br_child_set_scrub_state(br_child_t *child, gf_boolean_t state) +{ + child->active_scrubbing = state; } static void -br_fsscanner_wait_until_kicked (struct br_scanfs *fsscan) +br_fsscanner_wait_until_kicked(xlator_t *this, br_child_t *child) { - pthread_cleanup_push (_br_lock_cleaner, &fsscan->wakelock); - pthread_mutex_lock (&fsscan->wakelock); + br_private_t *priv = NULL; + struct br_monitor *scrub_monitor = NULL; + + priv = this->private; + scrub_monitor = &priv->scrub_monitor; + + pthread_cleanup_push(_br_lock_cleaner, &scrub_monitor->wakelock); + pthread_mutex_lock(&scrub_monitor->wakelock); + { + while (!scrub_monitor->kick) + pthread_cond_wait(&scrub_monitor->wakecond, + &scrub_monitor->wakelock); + + /* Child lock is to synchronize with disconnect events */ + pthread_cleanup_push(_br_lock_cleaner, &child->lock); + pthread_mutex_lock(&child->lock); { - while (!fsscan->kick) - pthread_cond_wait (&fsscan->wakecond, - &fsscan->wakelock); - fsscan->kick = _gf_false; + scrub_monitor->active_child_count++; + br_child_set_scrub_state(child, _gf_true); } - pthread_mutex_unlock (&fsscan->wakelock); - pthread_cleanup_pop (0); + pthread_mutex_unlock(&child->lock); + pthread_cleanup_pop(0); + } + pthread_mutex_unlock(&scrub_monitor->wakelock); + pthread_cleanup_pop(0); } static void -br_fsscanner_entry_control (xlator_t *this, br_child_t *child) +br_scrubber_entry_control(xlator_t *this) { - struct br_scanfs *fsscan = &child->fsscan; + br_private_t *priv = NULL; + struct br_monitor *scrub_monitor = NULL; + + priv = this->private; + scrub_monitor = &priv->scrub_monitor; + + LOCK(&scrub_monitor->lock); + { + /* Move the state to BR_SCRUB_STATE_ACTIVE */ + if (scrub_monitor->state == BR_SCRUB_STATE_PENDING) + scrub_monitor->state = BR_SCRUB_STATE_ACTIVE; + br_scrubber_log_time(this, "started"); + priv->scrub_stat.scrub_running = 1; + } + UNLOCK(&scrub_monitor->lock); +} - LOCK (&child->lock); - { - if (fsscan->state == BR_SCRUB_STATE_PENDING) - fsscan->state = BR_SCRUB_STATE_ACTIVE; - br_fsscanner_log_time (this, child, "started"); +static void +br_scrubber_exit_control(xlator_t *this) +{ + br_private_t *priv = NULL; + struct br_monitor *scrub_monitor = NULL; + + priv = this->private; + scrub_monitor = &priv->scrub_monitor; + + LOCK(&scrub_monitor->lock); + { + br_scrubber_log_time(this, "finished"); + priv->scrub_stat.scrub_running = 0; + + if (scrub_monitor->state == BR_SCRUB_STATE_ACTIVE) { + (void)br_fsscan_activate(this); + } else { + UNLOCK(&scrub_monitor->lock); + gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO, + "Volume waiting to get rescheduled.."); + return; } - UNLOCK (&child->lock); + } + UNLOCK(&scrub_monitor->lock); } static void -br_fsscanner_exit_control (xlator_t *this, br_child_t *child) +br_fsscanner_entry_control(xlator_t *this, br_child_t *child) { - struct br_scanfs *fsscan = &child->fsscan; + br_fsscanner_log_time(this, child, "started"); +} - LOCK (&child->lock); +static void +br_fsscanner_exit_control(xlator_t *this, br_child_t *child) +{ + br_private_t *priv = NULL; + struct br_monitor *scrub_monitor = NULL; + + priv = this->private; + scrub_monitor = &priv->scrub_monitor; + + if (!_br_is_child_connected(child)) { + gf_msg(this->name, GF_LOG_WARNING, 0, BRB_MSG_SCRUB_INFO, + "Brick [%s] disconnected while scrubbing. Scrubbing " + "might be incomplete", + child->brick_path); + } + + br_fsscanner_log_time(this, child, "finished"); + + pthread_cleanup_push(_br_lock_cleaner, &scrub_monitor->wakelock); + pthread_mutex_lock(&scrub_monitor->wakelock); + { + scrub_monitor->active_child_count--; + pthread_cleanup_push(_br_lock_cleaner, &child->lock); + pthread_mutex_lock(&child->lock); { - fsscan->over = _gf_true; - br_fsscanner_log_time (this, child, "finished"); - - if (fsscan->state == BR_SCRUB_STATE_ACTIVE) { - (void) br_fsscan_activate (this, child); - } else { - gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO, - "Brick [%s] waiting to get rescheduled..", - child->brick_path); - } + br_child_set_scrub_state(child, _gf_false); + } + pthread_mutex_unlock(&child->lock); + pthread_cleanup_pop(0); + + if (scrub_monitor->active_child_count == 0) { + /* The last child has finished scrubbing. + * Set the kick to false and wake up other + * children who are waiting for the last + * child to complete scrubbing. + */ + scrub_monitor->kick = _gf_false; + pthread_cond_broadcast(&scrub_monitor->wakecond); + + /* Signal monitor thread waiting for the all + * the children to finish scrubbing. + */ + pthread_cleanup_push(_br_lock_cleaner, &scrub_monitor->donelock); + pthread_mutex_lock(&scrub_monitor->donelock); + { + scrub_monitor->done = _gf_true; + pthread_cond_signal(&scrub_monitor->donecond); + } + pthread_mutex_unlock(&scrub_monitor->donelock); + pthread_cleanup_pop(0); + } else { + while (scrub_monitor->active_child_count) + pthread_cond_wait(&scrub_monitor->wakecond, + &scrub_monitor->wakelock); } - UNLOCK (&child->lock); + } + pthread_mutex_unlock(&scrub_monitor->wakelock); + pthread_cleanup_pop(0); } void * -br_fsscanner (void *arg) +br_fsscanner(void *arg) { - loc_t loc = {0,}; - br_child_t *child = NULL; - xlator_t *this = NULL; - struct br_scanfs *fsscan = NULL; - - child = arg; - this = child->this; - fsscan = &child->fsscan; - - THIS = this; - loc.inode = child->table->root; - - while (1) { - br_fsscanner_wait_until_kicked (fsscan); - { - /* precursor for scrub */ - br_fsscanner_entry_control (this, child); - - /* scrub */ - (void) syncop_ftw (child->xl, - &loc, GF_CLIENT_PID_SCRUB, - child, br_fsscanner_handle_entry); - if (!list_empty (&fsscan->queued)) - wait_for_scrubbing (this, fsscan); - - /* scrub exit criteria */ - br_fsscanner_exit_control (this, child); - } + loc_t loc = { + 0, + }; + br_child_t *child = NULL; + xlator_t *this = NULL; + struct br_scanfs *fsscan = NULL; + + child = arg; + this = child->this; + fsscan = &child->fsscan; + + THIS = this; + loc.inode = child->table->root; + + while (1) { + br_fsscanner_wait_until_kicked(this, child); + { + /* precursor for scrub */ + br_fsscanner_entry_control(this, child); + + /* scrub */ + (void)syncop_ftw(child->xl, &loc, GF_CLIENT_PID_SCRUB, child, + br_fsscanner_handle_entry); + if (!list_empty(&fsscan->queued)) + wait_for_scrubbing(this, fsscan); + + /* scrub exit criteria */ + br_fsscanner_exit_control(this, child); } + } - return NULL; + return NULL; } /** @@ -668,200 +836,268 @@ br_fsscanner (void *arg) * non-pending timer. */ void -br_kickstart_scanner (struct gf_tw_timer_list *timer, - void *data, unsigned long calltime) +br_kickstart_scanner(struct gf_tw_timer_list *timer, void *data, + unsigned long calltime) { - xlator_t *this = NULL; - br_child_t *child = data; - struct br_scanfs *fsscan = NULL; - - THIS = this = child->this; - fsscan = &child->fsscan; - - /* kickstart scanning.. */ - pthread_mutex_lock (&fsscan->wakelock); - { - fsscan->kick = _gf_true; - pthread_cond_signal (&fsscan->wakecond); - } - pthread_mutex_unlock (&fsscan->wakelock); - - return; - + xlator_t *this = NULL; + struct br_monitor *scrub_monitor = data; + br_private_t *priv = NULL; + + THIS = this = scrub_monitor->this; + priv = this->private; + + /* Reset scrub statistics */ + priv->scrub_stat.scrubbed_files = 0; + priv->scrub_stat.unsigned_files = 0; + + /* Moves state from PENDING to ACTIVE */ + (void)br_scrubber_entry_control(this); + + /* kickstart scanning.. */ + pthread_mutex_lock(&scrub_monitor->wakelock); + { + scrub_monitor->kick = _gf_true; + GF_ASSERT(scrub_monitor->active_child_count == 0); + pthread_cond_broadcast(&scrub_monitor->wakecond); + } + pthread_mutex_unlock(&scrub_monitor->wakelock); + + return; } static uint32_t -br_fsscan_calculate_delta (uint32_t times) +br_fsscan_calculate_delta(uint32_t times) { - return times; + return times; } -#define BR_SCRUB_HOURLY (60 * 60) -#define BR_SCRUB_DAILY (1 * 24 * 60 * 60) -#define BR_SCRUB_WEEKLY (7 * 24 * 60 * 60) -#define BR_SCRUB_BIWEEKLY (14 * 24 * 60 * 60) -#define BR_SCRUB_MONTHLY (30 * 24 * 60 * 60) +#define BR_SCRUB_ONDEMAND (1) +#define BR_SCRUB_MINUTE (60) +#define BR_SCRUB_HOURLY (60 * 60) +#define BR_SCRUB_DAILY (1 * 24 * 60 * 60) +#define BR_SCRUB_WEEKLY (7 * 24 * 60 * 60) +#define BR_SCRUB_BIWEEKLY (14 * 24 * 60 * 60) +#define BR_SCRUB_MONTHLY (30 * 24 * 60 * 60) static unsigned int -br_fsscan_calculate_timeout (scrub_freq_t freq) +br_fsscan_calculate_timeout(scrub_freq_t freq) { - uint32_t timo = 0; + uint32_t timo = 0; - switch (freq) { + switch (freq) { + case BR_FSSCRUB_FREQ_MINUTE: + timo = br_fsscan_calculate_delta(BR_SCRUB_MINUTE); + break; case BR_FSSCRUB_FREQ_HOURLY: - timo = br_fsscan_calculate_delta (BR_SCRUB_HOURLY); - break; + timo = br_fsscan_calculate_delta(BR_SCRUB_HOURLY); + break; case BR_FSSCRUB_FREQ_DAILY: - timo = br_fsscan_calculate_delta (BR_SCRUB_DAILY); - break; + timo = br_fsscan_calculate_delta(BR_SCRUB_DAILY); + break; case BR_FSSCRUB_FREQ_WEEKLY: - timo = br_fsscan_calculate_delta (BR_SCRUB_WEEKLY); - break; + timo = br_fsscan_calculate_delta(BR_SCRUB_WEEKLY); + break; case BR_FSSCRUB_FREQ_BIWEEKLY: - timo = br_fsscan_calculate_delta (BR_SCRUB_BIWEEKLY); - break; + timo = br_fsscan_calculate_delta(BR_SCRUB_BIWEEKLY); + break; case BR_FSSCRUB_FREQ_MONTHLY: - timo = br_fsscan_calculate_delta (BR_SCRUB_MONTHLY); - break; + timo = br_fsscan_calculate_delta(BR_SCRUB_MONTHLY); + break; default: - timo = 0; - } + timo = 0; + } - return timo; + return timo; } int32_t -br_fsscan_schedule (xlator_t *this, br_child_t *child) +br_fsscan_schedule(xlator_t *this) { - uint32_t timo = 0; - br_private_t *priv = NULL; - struct timeval tv = {0,}; - char timestr[1024] = {0,}; - struct br_scanfs *fsscan = NULL; - struct br_scrubber *fsscrub = NULL; - struct gf_tw_timer_list *timer = NULL; - - priv = this->private; - fsscan = &child->fsscan; - fsscrub = &priv->fsscrub; - - (void) gettimeofday (&tv, NULL); - fsscan->boot = tv.tv_sec; - - timo = br_fsscan_calculate_timeout (fsscrub->frequency); - if (timo == 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, BRB_MSG_ZERO_TIMEOUT_BUG, - "BUG: Zero schedule timeout"); - goto error_return; - } - - fsscan->timer = GF_CALLOC (1, sizeof (*fsscan->timer), - gf_br_stub_mt_br_scanner_freq_t); - if (!fsscan->timer) - goto error_return; - - timer = fsscan->timer; - INIT_LIST_HEAD (&timer->entry); + uint32_t timo = 0; + br_private_t *priv = NULL; + char timestr[GF_TIMESTR_SIZE] = { + 0, + }; + struct br_scrubber *fsscrub = NULL; + struct gf_tw_timer_list *timer = NULL; + struct br_monitor *scrub_monitor = NULL; + + priv = this->private; + fsscrub = &priv->fsscrub; + scrub_monitor = &priv->scrub_monitor; + + scrub_monitor->boot = gf_time(); + + timo = br_fsscan_calculate_timeout(fsscrub->frequency); + if (timo == 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_ZERO_TIMEOUT_BUG, + "BUG: Zero schedule timeout"); + goto error_return; + } + + scrub_monitor->timer = GF_CALLOC(1, sizeof(*scrub_monitor->timer), + gf_br_stub_mt_br_scanner_freq_t); + if (!scrub_monitor->timer) + goto error_return; + + timer = scrub_monitor->timer; + INIT_LIST_HEAD(&timer->entry); + + timer->data = scrub_monitor; + timer->expires = timo; + timer->function = br_kickstart_scanner; + + gf_tw_add_timer(priv->timer_wheel, timer); + _br_monitor_set_scrub_state(scrub_monitor, BR_SCRUB_STATE_PENDING); + + gf_time_fmt(timestr, sizeof(timestr), (scrub_monitor->boot + timo), + gf_timefmt_FT); + gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO, + "Scrubbing is " + "scheduled to run at %s", + timestr); + + return 0; + +error_return: + return -1; +} - timer->data = child; - timer->expires = timo; - timer->function = br_kickstart_scanner; +int32_t +br_fsscan_activate(xlator_t *this) +{ + uint32_t timo = 0; + char timestr[GF_TIMESTR_SIZE] = { + 0, + }; + time_t now = 0; + br_private_t *priv = NULL; + struct br_scrubber *fsscrub = NULL; + struct br_monitor *scrub_monitor = NULL; + + priv = this->private; + fsscrub = &priv->fsscrub; + scrub_monitor = &priv->scrub_monitor; + + now = gf_time(); + timo = br_fsscan_calculate_timeout(fsscrub->frequency); + if (timo == 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_ZERO_TIMEOUT_BUG, + "BUG: Zero schedule timeout"); + return -1; + } - gf_tw_add_timer (priv->timer_wheel, timer); - _br_child_set_scrub_state (child, BR_SCRUB_STATE_PENDING); + pthread_mutex_lock(&scrub_monitor->donelock); + { + scrub_monitor->done = _gf_false; + } + pthread_mutex_unlock(&scrub_monitor->donelock); - gf_time_fmt (timestr, sizeof (timestr), - (fsscan->boot + timo), gf_timefmt_FT); - gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO, "Scrubbing for " - "%s scheduled to run at %s", child->brick_path, timestr); + gf_time_fmt(timestr, sizeof(timestr), now + timo, gf_timefmt_FT); + (void)gf_tw_mod_timer(priv->timer_wheel, scrub_monitor->timer, timo); - return 0; + _br_monitor_set_scrub_state(scrub_monitor, BR_SCRUB_STATE_PENDING); + gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO, + "Scrubbing is " + "rescheduled to run at %s", + timestr); - error_return: - return -1; + return 0; } int32_t -br_fsscan_activate (xlator_t *this, br_child_t *child) +br_fsscan_reschedule(xlator_t *this) { - uint32_t timo = 0; - char timestr[1024] = {0,}; - struct timeval now = {0,}; - br_private_t *priv = NULL; - struct br_scanfs *fsscan = NULL; - struct br_scrubber *fsscrub = NULL; - - priv = this->private; - fsscan = &child->fsscan; - fsscrub = &priv->fsscrub; - - (void) gettimeofday (&now, NULL); - timo = br_fsscan_calculate_timeout (fsscrub->frequency); - if (timo == 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, BRB_MSG_ZERO_TIMEOUT_BUG, - "BUG: Zero schedule timeout"); - return -1; - } - - fsscan->over = _gf_false; - gf_time_fmt (timestr, sizeof (timestr), - (now.tv_sec + timo), gf_timefmt_FT); - (void) gf_tw_mod_timer (priv->timer_wheel, fsscan->timer, timo); - - _br_child_set_scrub_state (child, BR_SCRUB_STATE_PENDING); - gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO, "Scrubbing for " - "%s rescheduled to run at %s", child->brick_path, timestr); - + int32_t ret = 0; + uint32_t timo = 0; + char timestr[GF_TIMESTR_SIZE] = { + 0, + }; + time_t now = 0; + br_private_t *priv = NULL; + struct br_scrubber *fsscrub = NULL; + struct br_monitor *scrub_monitor = NULL; + + priv = this->private; + fsscrub = &priv->fsscrub; + scrub_monitor = &priv->scrub_monitor; + + if (!fsscrub->frequency_reconf) return 0; + + now = gf_time(); + timo = br_fsscan_calculate_timeout(fsscrub->frequency); + if (timo == 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_ZERO_TIMEOUT_BUG, + "BUG: Zero schedule timeout"); + return -1; + } + + gf_time_fmt(timestr, sizeof(timestr), now + timo, gf_timefmt_FT); + + pthread_mutex_lock(&scrub_monitor->donelock); + { + scrub_monitor->done = _gf_false; + } + pthread_mutex_unlock(&scrub_monitor->donelock); + + ret = gf_tw_mod_timer_pending(priv->timer_wheel, scrub_monitor->timer, + timo); + if (ret == 0) + gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO, + "Scrubber is currently running and would be " + "rescheduled after completion"); + else { + _br_monitor_set_scrub_state(scrub_monitor, BR_SCRUB_STATE_PENDING); + gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO, + "Scrubbing rescheduled to run at %s", timestr); + } + + return 0; } int32_t -br_fsscan_reschedule (xlator_t *this, br_child_t *child) +br_fsscan_ondemand(xlator_t *this) { - int32_t ret = 0; - uint32_t timo = 0; - char timestr[1024] = {0,}; - struct timeval now = {0,}; - br_private_t *priv = NULL; - struct br_scanfs *fsscan = NULL; - struct br_scrubber *fsscrub = NULL; - - priv = this->private; - fsscan = &child->fsscan; - fsscrub = &priv->fsscrub; - - if (!fsscrub->frequency_reconf) - return 0; - - (void) gettimeofday (&now, NULL); - timo = br_fsscan_calculate_timeout (fsscrub->frequency); - if (timo == 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, BRB_MSG_ZERO_TIMEOUT_BUG, - "BUG: Zero schedule timeout"); - return -1; - } - - gf_time_fmt (timestr, sizeof (timestr), - (now.tv_sec + timo), gf_timefmt_FT); - - fsscan->over = _gf_false; - ret = gf_tw_mod_timer_pending (priv->timer_wheel, fsscan->timer, timo); - if (ret == 0) - gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO, - "Scrubber for %s is currently running and would be " - "rescheduled after completion", child->brick_path); - else { - _br_child_set_scrub_state (child, BR_SCRUB_STATE_PENDING); - gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO, - "Scrubbing for %s rescheduled to run at %s", - child->brick_path, timestr); - } - - return 0; + int32_t ret = 0; + uint32_t timo = 0; + char timestr[GF_TIMESTR_SIZE] = { + 0, + }; + time_t now = 0; + br_private_t *priv = NULL; + struct br_monitor *scrub_monitor = NULL; + + priv = this->private; + scrub_monitor = &priv->scrub_monitor; + + now = gf_time(); + timo = BR_SCRUB_ONDEMAND; + gf_time_fmt(timestr, sizeof(timestr), now + timo, gf_timefmt_FT); + + pthread_mutex_lock(&scrub_monitor->donelock); + { + scrub_monitor->done = _gf_false; + } + pthread_mutex_unlock(&scrub_monitor->donelock); + + ret = gf_tw_mod_timer_pending(priv->timer_wheel, scrub_monitor->timer, + timo); + if (ret == 0) + gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO, + "Scrubber is currently running and would be " + "rescheduled after completion"); + else { + _br_monitor_set_scrub_state(scrub_monitor, BR_SCRUB_STATE_PENDING); + gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO, + "Ondemand Scrubbing scheduled to run at %s", timestr); + } + + return 0; } -#define BR_SCRUB_THREAD_SCALE_LAZY 0 -#define BR_SCRUB_THREAD_SCALE_NORMAL 0.4 +#define BR_SCRUB_THREAD_SCALE_LAZY 0 +#define BR_SCRUB_THREAD_SCALE_NORMAL 0.4 #define BR_SCRUB_THREAD_SCALE_AGGRESSIVE 1.0 #ifndef M_E @@ -874,111 +1110,105 @@ br_fsscan_reschedule (xlator_t *this, br_child_t *child) * the scale based on the number of processor cores too. */ static unsigned int -br_scrubber_calc_scale (xlator_t *this, - br_private_t *priv, scrub_throttle_t throttle) +br_scrubber_calc_scale(xlator_t *this, br_private_t *priv, + scrub_throttle_t throttle) { - unsigned int scale = 0; + unsigned int scale = 0; - switch (throttle) { + switch (throttle) { case BR_SCRUB_THROTTLE_VOID: case BR_SCRUB_THROTTLE_STALLED: - scale = 0; - break; + scale = 0; + break; case BR_SCRUB_THROTTLE_LAZY: - scale = priv->child_count * - pow (M_E, BR_SCRUB_THREAD_SCALE_LAZY); - break; + scale = priv->child_count * pow(M_E, BR_SCRUB_THREAD_SCALE_LAZY); + break; case BR_SCRUB_THROTTLE_NORMAL: - scale = priv->child_count * - pow (M_E, BR_SCRUB_THREAD_SCALE_NORMAL); - break; + scale = priv->child_count * pow(M_E, BR_SCRUB_THREAD_SCALE_NORMAL); + break; case BR_SCRUB_THROTTLE_AGGRESSIVE: - scale = priv->child_count * - pow (M_E, BR_SCRUB_THREAD_SCALE_AGGRESSIVE); - break; + scale = priv->child_count * + pow(M_E, BR_SCRUB_THREAD_SCALE_AGGRESSIVE); + break; default: - gf_msg (this->name, GF_LOG_ERROR, 0, BRB_MSG_UNKNOWN_THROTTLE, - "Unknown throttle %d", throttle); - } - - return scale; + gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_UNKNOWN_THROTTLE, + "Unknown throttle %d", throttle); + } + return scale; } static br_child_t * -_br_scrubber_get_next_child (struct br_scrubber *fsscrub) +_br_scrubber_get_next_child(struct br_scrubber *fsscrub) { - br_child_t *child = NULL; + br_child_t *child = NULL; - child = list_first_entry (&fsscrub->scrublist, br_child_t, list); - list_rotate_left (&fsscrub->scrublist); + child = list_first_entry(&fsscrub->scrublist, br_child_t, list); + list_rotate_left(&fsscrub->scrublist); - return child; + return child; } static void -_br_scrubber_get_entry (br_child_t *child, struct br_fsscan_entry **fsentry) +_br_scrubber_get_entry(br_child_t *child, struct br_fsscan_entry **fsentry) { - struct br_scanfs *fsscan = &child->fsscan; + struct br_scanfs *fsscan = &child->fsscan; - if (list_empty (&fsscan->ready)) - return; - *fsentry = list_first_entry - (&fsscan->ready, struct br_fsscan_entry, list); - list_del_init (&(*fsentry)->list); + if (list_empty(&fsscan->ready)) + return; + *fsentry = list_first_entry(&fsscan->ready, struct br_fsscan_entry, list); + list_del_init(&(*fsentry)->list); } static void -_br_scrubber_find_scrubbable_entry (struct br_scrubber *fsscrub, - struct br_fsscan_entry **fsentry) +_br_scrubber_find_scrubbable_entry(struct br_scrubber *fsscrub, + struct br_fsscan_entry **fsentry) { - br_child_t *child = NULL; - br_child_t *firstchild = NULL; - - while (1) { - if (list_empty (&fsscrub->scrublist)) - pthread_cond_wait (&fsscrub->cond, &fsscrub->mutex); + br_child_t *child = NULL; + br_child_t *firstchild = NULL; - firstchild = NULL; - for (child = _br_scrubber_get_next_child (fsscrub); - child != firstchild; - child = _br_scrubber_get_next_child (fsscrub)) { + while (1) { + while (list_empty(&fsscrub->scrublist)) + pthread_cond_wait(&fsscrub->cond, &fsscrub->mutex); - if (!firstchild) - firstchild = child; + firstchild = NULL; + for (child = _br_scrubber_get_next_child(fsscrub); child != firstchild; + child = _br_scrubber_get_next_child(fsscrub)) { + if (!firstchild) + firstchild = child; - _br_scrubber_get_entry (child, fsentry); - if (*fsentry) - break; - } + _br_scrubber_get_entry(child, fsentry); + if (*fsentry) + break; + } - if (*fsentry) - break; + if (*fsentry) + break; - /* nothing to work on.. wait till available */ - pthread_cond_wait (&fsscrub->cond, &fsscrub->mutex); - } + /* nothing to work on.. wait till available */ + pthread_cond_wait(&fsscrub->cond, &fsscrub->mutex); + } } static void -br_scrubber_pick_entry (struct br_scrubber *fsscrub, - struct br_fsscan_entry **fsentry) +br_scrubber_pick_entry(struct br_scrubber *fsscrub, + struct br_fsscan_entry **fsentry) { - pthread_cleanup_push (_br_lock_cleaner, &fsscrub->mutex); + pthread_cleanup_push(_br_lock_cleaner, &fsscrub->mutex); - pthread_mutex_lock (&fsscrub->mutex); - { - *fsentry = NULL; - _br_scrubber_find_scrubbable_entry (fsscrub, fsentry); - } - pthread_mutex_unlock (&fsscrub->mutex); + pthread_mutex_lock(&fsscrub->mutex); + { + *fsentry = NULL; + _br_scrubber_find_scrubbable_entry(fsscrub, fsentry); + } + pthread_mutex_unlock(&fsscrub->mutex); - pthread_cleanup_pop (0); + pthread_cleanup_pop(0); } struct br_scrub_entry { - gf_boolean_t scrubbed; - struct br_fsscan_entry *fsentry; + gf_boolean_t scrubbed; + struct br_fsscan_entry *fsentry; }; /** @@ -988,381 +1218,853 @@ struct br_scrub_entry { * in the ->pending queue or when an object is undergoing scrubbing. */ static void -br_scrubber_entry_handle (void *arg) +br_scrubber_entry_handle(void *arg) { - struct br_scanfs *fsscan = NULL; - struct br_scrub_entry *sentry = NULL; - struct br_fsscan_entry *fsentry = NULL; + struct br_scanfs *fsscan = NULL; + struct br_scrub_entry *sentry = NULL; + struct br_fsscan_entry *fsentry = NULL; - sentry = arg; + sentry = arg; - fsentry = sentry->fsentry; - fsscan = fsentry->fsscan; + fsentry = sentry->fsentry; + fsscan = fsentry->fsscan; - LOCK (&fsscan->entrylock); - { - if (sentry->scrubbed) { - _br_fsscan_dec_entry_count (fsscan); - - /* cleanup ->entry */ - fsentry->data = NULL; - fsentry->fsscan = NULL; - loc_wipe (&fsentry->parent); - gf_dirent_entry_free (fsentry->entry); - - GF_FREE (sentry->fsentry); - } else { - /* (re)queue the entry again for scrub */ - _br_fsscan_collect_entry (fsscan, sentry->fsentry); - } + LOCK(&fsscan->entrylock); + { + if (sentry->scrubbed) { + _br_fsscan_dec_entry_count(fsscan); + + /* cleanup ->entry */ + fsentry->data = NULL; + fsentry->fsscan = NULL; + loc_wipe(&fsentry->parent); + gf_dirent_entry_free(fsentry->entry); + + GF_FREE(sentry->fsentry); + } else { + /* (re)queue the entry again for scrub */ + _br_fsscan_collect_entry(fsscan, sentry->fsentry); } - UNLOCK (&fsscan->entrylock); + } + UNLOCK(&fsscan->entrylock); } static void -br_scrubber_scrub_entry (xlator_t *this, struct br_fsscan_entry *fsentry) +br_scrubber_scrub_entry(xlator_t *this, struct br_fsscan_entry *fsentry) { - struct br_scrub_entry sentry = {0, }; - - sentry.scrubbed = 0; - sentry.fsentry = fsentry; - - pthread_cleanup_push (br_scrubber_entry_handle, &sentry); - { - (void) br_scrubber_scrub_begin (this, fsentry); - sentry.scrubbed = 1; - } - pthread_cleanup_pop (1); + struct br_scrub_entry sentry = { + 0, + }; + + sentry.scrubbed = 0; + sentry.fsentry = fsentry; + + pthread_cleanup_push(br_scrubber_entry_handle, &sentry); + { + (void)br_scrubber_scrub_begin(this, fsentry); + sentry.scrubbed = 1; + } + pthread_cleanup_pop(1); } -void *br_scrubber_proc (void *arg) +void * +br_scrubber_proc(void *arg) { - xlator_t *this = NULL; - struct br_scrubber *fsscrub = NULL; - struct br_fsscan_entry *fsentry = NULL; + xlator_t *this = NULL; + struct br_scrubber *fsscrub = NULL; + struct br_fsscan_entry *fsentry = NULL; - fsscrub = arg; - THIS = this = fsscrub->this; + fsscrub = arg; + THIS = this = fsscrub->this; - while (1) { - br_scrubber_pick_entry (fsscrub, &fsentry); - br_scrubber_scrub_entry (this, fsentry); - sleep (1); - } + while (1) { + br_scrubber_pick_entry(fsscrub, &fsentry); + br_scrubber_scrub_entry(this, fsentry); + sleep(1); + } - return NULL; + return NULL; } static int32_t -br_scrubber_scale_up (xlator_t *this, - struct br_scrubber *fsscrub, - unsigned int v1, unsigned int v2) +br_scrubber_scale_up(xlator_t *this, struct br_scrubber *fsscrub, + unsigned int v1, unsigned int v2) { - int i = 0; - int32_t ret = -1; - int diff = 0; - struct br_scrubbers *scrub = NULL; - - diff = (int)(v2 - v1); - - gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCALING_UP_SCRUBBER, - "Scaling up scrubbers [%d => %d]", v1, v2); - - for (i = 0; i < diff; i++) { - scrub = GF_CALLOC (diff, sizeof (*scrub), - gf_br_mt_br_scrubber_t); - if (!scrub) - break; - - INIT_LIST_HEAD (&scrub->list); - ret = gf_thread_create (&scrub->scrubthread, - NULL, br_scrubber_proc, fsscrub); - if (ret) - break; - - fsscrub->nr_scrubbers++; - list_add_tail (&scrub->list, &fsscrub->scrubbers); - } + int i = 0; + int32_t ret = -1; + int diff = 0; + struct br_scrubbers *scrub = NULL; - if ((i != diff) && !scrub) - goto error_return; + diff = (int)(v2 - v1); - if (i != diff) /* degraded scaling.. */ - gf_msg (this->name, GF_LOG_WARNING, 0, BRB_MSG_SCALE_UP_FAILED, - "Could not fully scale up to %d scrubber(s). Spawned " - "%d/%d [total scrubber(s): %d]", v2, i, diff, (v1 + i)); + gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCALING_UP_SCRUBBER, + "Scaling up scrubbers [%d => %d]", v1, v2); - return 0; + for (i = 0; i < diff; i++) { + scrub = GF_CALLOC(diff, sizeof(*scrub), gf_br_mt_br_scrubber_t); + if (!scrub) + break; - error_return: - return -1; -} + INIT_LIST_HEAD(&scrub->list); + ret = gf_thread_create(&scrub->scrubthread, NULL, br_scrubber_proc, + fsscrub, "brsproc"); + if (ret) + break; -static int32_t -br_scrubber_scale_down (xlator_t *this, - struct br_scrubber *fsscrub, - unsigned int v1, unsigned int v2) -{ - int i = 0; - int diff = 0; - int32_t ret = -1; - struct br_scrubbers *scrub = NULL; + fsscrub->nr_scrubbers++; + list_add_tail(&scrub->list, &fsscrub->scrubbers); + } - diff = (int)(v1 - v2); + if ((i != diff) && !scrub) + goto error_return; - gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCALE_DOWN_SCRUBBER, - "Scaling down scrubbers [%d => %d]", v1, v2); + if (i != diff) /* degraded scaling.. */ + gf_msg(this->name, GF_LOG_WARNING, 0, BRB_MSG_SCALE_UP_FAILED, + "Could not fully scale up to %d scrubber(s). Spawned " + "%d/%d [total scrubber(s): %d]", + v2, i, diff, (v1 + i)); - for (i = 0 ; i < diff; i++) { - scrub = list_first_entry - (&fsscrub->scrubbers, struct br_scrubbers, list); + return 0; - list_del_init (&scrub->list); - ret = gf_thread_cleanup_xint (scrub->scrubthread); - if (ret) - break; - GF_FREE (scrub); +error_return: + return -1; +} - fsscrub->nr_scrubbers--; - } +static int32_t +br_scrubber_scale_down(xlator_t *this, struct br_scrubber *fsscrub, + unsigned int v1, unsigned int v2) +{ + int i = 0; + int diff = 0; + int32_t ret = -1; + struct br_scrubbers *scrub = NULL; - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, 0, - BRB_MSG_SCALE_DOWN_FAILED, "Could not fully scale down " - "to %d scrubber(s). Terminated %d/%d [total " - "scrubber(s): %d]", v1, i, diff, (v2 - i)); - ret = 0; - } + diff = (int)(v1 - v2); + + gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCALE_DOWN_SCRUBBER, + "Scaling down scrubbers [%d => %d]", v1, v2); - return ret; + for (i = 0; i < diff; i++) { + scrub = list_first_entry(&fsscrub->scrubbers, struct br_scrubbers, + list); + + list_del_init(&scrub->list); + ret = gf_thread_cleanup_xint(scrub->scrubthread); + if (ret) + break; + GF_FREE(scrub); + + fsscrub->nr_scrubbers--; + } + + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, BRB_MSG_SCALE_DOWN_FAILED, + "Could not fully scale down " + "to %d scrubber(s). Terminated %d/%d [total " + "scrubber(s): %d]", + v1, i, diff, (v2 - i)); + ret = 0; + } + + return ret; } static int32_t -br_scrubber_configure (xlator_t *this, br_private_t *priv, - struct br_scrubber *fsscrub, scrub_throttle_t nthrottle) +br_scrubber_configure(xlator_t *this, br_private_t *priv, + struct br_scrubber *fsscrub, scrub_throttle_t nthrottle) { - int32_t ret = 0; - unsigned int v1 = 0; - unsigned int v2 = 0; + int32_t ret = 0; + unsigned int v1 = 0; + unsigned int v2 = 0; - v1 = fsscrub->nr_scrubbers; - v2 = br_scrubber_calc_scale (this, priv, nthrottle); + v1 = fsscrub->nr_scrubbers; + v2 = br_scrubber_calc_scale(this, priv, nthrottle); - if (v1 == v2) - return 0; + if (v1 == v2) + return 0; - if (v1 > v2) - ret = br_scrubber_scale_down (this, fsscrub, v1, v2); - else - ret = br_scrubber_scale_up (this, fsscrub, v1, v2); + if (v1 > v2) + ret = br_scrubber_scale_down(this, fsscrub, v1, v2); + else + ret = br_scrubber_scale_up(this, fsscrub, v1, v2); - return ret; + return ret; } static int32_t -br_scrubber_fetch_option (xlator_t *this, - char *opt, dict_t *options, char **value) +br_scrubber_fetch_option(xlator_t *this, char *opt, dict_t *options, + char **value) { - if (options) - GF_OPTION_RECONF (opt, *value, options, str, error_return); - else - GF_OPTION_INIT (opt, *value, str, error_return); + if (options) + GF_OPTION_RECONF(opt, *value, options, str, error_return); + else + GF_OPTION_INIT(opt, *value, str, error_return); - return 0; + return 0; - error_return: - return -1; +error_return: + return -1; } /* internal "throttle" override */ -#define BR_SCRUB_STALLED "STALLED" +#define BR_SCRUB_STALLED "STALLED" /* TODO: token buket spec */ static int32_t -br_scrubber_handle_throttle (xlator_t *this, br_private_t *priv, - dict_t *options, gf_boolean_t scrubstall) +br_scrubber_handle_throttle(xlator_t *this, br_private_t *priv, dict_t *options, + gf_boolean_t scrubstall) { - int32_t ret = 0; - char *tmp = NULL; - struct br_scrubber *fsscrub = NULL; - scrub_throttle_t nthrottle = BR_SCRUB_THROTTLE_VOID; + int32_t ret = 0; + char *tmp = NULL; + struct br_scrubber *fsscrub = NULL; + scrub_throttle_t nthrottle = BR_SCRUB_THROTTLE_VOID; + + fsscrub = &priv->fsscrub; + fsscrub->throttle_reconf = _gf_false; + + ret = br_scrubber_fetch_option(this, "scrub-throttle", options, &tmp); + if (ret) + goto error_return; + + if (scrubstall) + tmp = BR_SCRUB_STALLED; + + if (strcasecmp(tmp, "lazy") == 0) + nthrottle = BR_SCRUB_THROTTLE_LAZY; + else if (strcasecmp(tmp, "normal") == 0) + nthrottle = BR_SCRUB_THROTTLE_NORMAL; + else if (strcasecmp(tmp, "aggressive") == 0) + nthrottle = BR_SCRUB_THROTTLE_AGGRESSIVE; + else if (strcasecmp(tmp, BR_SCRUB_STALLED) == 0) + nthrottle = BR_SCRUB_THROTTLE_STALLED; + else + goto error_return; + + /* on failure old throttling value is preserved */ + ret = br_scrubber_configure(this, priv, fsscrub, nthrottle); + if (ret) + goto error_return; + + if (fsscrub->throttle != nthrottle) + fsscrub->throttle_reconf = _gf_true; + + fsscrub->throttle = nthrottle; + return 0; + +error_return: + return -1; +} - fsscrub = &priv->fsscrub; - fsscrub->throttle_reconf = _gf_false; +static int32_t +br_scrubber_handle_stall(xlator_t *this, br_private_t *priv, dict_t *options, + gf_boolean_t *scrubstall) +{ + int32_t ret = 0; + char *tmp = NULL; - ret = br_scrubber_fetch_option (this, "scrub-throttle", options, &tmp); - if (ret) - goto error_return; - - if (scrubstall) - tmp = BR_SCRUB_STALLED; - - if (strcasecmp (tmp, "lazy") == 0) - nthrottle = BR_SCRUB_THROTTLE_LAZY; - else if (strcasecmp (tmp, "normal") == 0) - nthrottle = BR_SCRUB_THROTTLE_NORMAL; - else if (strcasecmp (tmp, "aggressive") == 0) - nthrottle = BR_SCRUB_THROTTLE_AGGRESSIVE; - else if (strcasecmp (tmp, BR_SCRUB_STALLED) == 0) - nthrottle = BR_SCRUB_THROTTLE_STALLED; - else - goto error_return; - - /* on failure old throttling value is preserved */ - ret = br_scrubber_configure (this, priv, fsscrub, nthrottle); - if (ret) - goto error_return; + ret = br_scrubber_fetch_option(this, "scrub-state", options, &tmp); + if (ret) + goto error_return; - if (fsscrub->throttle != nthrottle) - fsscrub->throttle_reconf = _gf_true; + if (strcasecmp(tmp, "pause") == 0) /* anything else is active */ + *scrubstall = _gf_true; - fsscrub->throttle = nthrottle; - return 0; + return 0; - error_return: - return -1; +error_return: + return -1; } static int32_t -br_scrubber_handle_stall (xlator_t *this, br_private_t *priv, - dict_t *options, gf_boolean_t *scrubstall) +br_scrubber_handle_freq(xlator_t *this, br_private_t *priv, dict_t *options, + gf_boolean_t scrubstall) { - int32_t ret = 0; - char *tmp = NULL; + int32_t ret = -1; + char *tmp = NULL; + scrub_freq_t frequency = BR_FSSCRUB_FREQ_HOURLY; + struct br_scrubber *fsscrub = NULL; + + fsscrub = &priv->fsscrub; + fsscrub->frequency_reconf = _gf_true; + + ret = br_scrubber_fetch_option(this, "scrub-freq", options, &tmp); + if (ret) + goto error_return; + + if (scrubstall) + tmp = BR_SCRUB_STALLED; + + if (strcasecmp(tmp, "hourly") == 0) { + frequency = BR_FSSCRUB_FREQ_HOURLY; + } else if (strcasecmp(tmp, "daily") == 0) { + frequency = BR_FSSCRUB_FREQ_DAILY; + } else if (strcasecmp(tmp, "weekly") == 0) { + frequency = BR_FSSCRUB_FREQ_WEEKLY; + } else if (strcasecmp(tmp, "biweekly") == 0) { + frequency = BR_FSSCRUB_FREQ_BIWEEKLY; + } else if (strcasecmp(tmp, "monthly") == 0) { + frequency = BR_FSSCRUB_FREQ_MONTHLY; + } else if (strcasecmp(tmp, "minute") == 0) { + frequency = BR_FSSCRUB_FREQ_MINUTE; + } else if (strcasecmp(tmp, BR_SCRUB_STALLED) == 0) { + frequency = BR_FSSCRUB_FREQ_STALLED; + } else + goto error_return; + + if (fsscrub->frequency == frequency) + fsscrub->frequency_reconf = _gf_false; + else + fsscrub->frequency = frequency; + + return 0; + +error_return: + return -1; +} - ret = br_scrubber_fetch_option (this, "scrub-state", options, &tmp); - if (ret) - goto error_return; +static void +br_scrubber_log_option(xlator_t *this, br_private_t *priv, + gf_boolean_t scrubstall) +{ + struct br_scrubber *fsscrub = &priv->fsscrub; + char *scrub_throttle_str[] = { + [BR_SCRUB_THROTTLE_LAZY] = "lazy", + [BR_SCRUB_THROTTLE_NORMAL] = "normal", + [BR_SCRUB_THROTTLE_AGGRESSIVE] = "aggressive", + [BR_SCRUB_THROTTLE_STALLED] = "stalled", + }; + + char *scrub_freq_str[] = { + [0] = "", + [BR_FSSCRUB_FREQ_HOURLY] = "hourly", + [BR_FSSCRUB_FREQ_DAILY] = "daily", + [BR_FSSCRUB_FREQ_WEEKLY] = "weekly", + [BR_FSSCRUB_FREQ_BIWEEKLY] = "biweekly", + [BR_FSSCRUB_FREQ_MONTHLY] = "monthly (30 days)", + [BR_FSSCRUB_FREQ_MINUTE] = "every minute", + }; + + if (scrubstall) + return; /* logged as pause */ + + if (fsscrub->frequency_reconf || fsscrub->throttle_reconf) { + if (fsscrub->throttle == BR_SCRUB_THROTTLE_VOID) + return; + gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_TUNABLE, + "SCRUB TUNABLES:: [Frequency: %s, Throttle: %s]", + scrub_freq_str[fsscrub->frequency], + scrub_throttle_str[fsscrub->throttle]); + } +} - if (strcasecmp (tmp, "pause") == 0) /* anything else is active */ - *scrubstall = _gf_true; +int32_t +br_scrubber_handle_options(xlator_t *this, br_private_t *priv, dict_t *options) +{ + int32_t ret = 0; + gf_boolean_t scrubstall = _gf_false; /* not as dangerous as it sounds */ - return 0; + ret = br_scrubber_handle_stall(this, priv, options, &scrubstall); + if (ret) + goto error_return; - error_return: - return -1; + ret = br_scrubber_handle_throttle(this, priv, options, scrubstall); + if (ret) + goto error_return; + + ret = br_scrubber_handle_freq(this, priv, options, scrubstall); + if (ret) + goto error_return; + + br_scrubber_log_option(this, priv, scrubstall); + + return 0; + +error_return: + return -1; } -static int32_t -br_scrubber_handle_freq (xlator_t *this, br_private_t *priv, - dict_t *options, gf_boolean_t scrubstall) +inode_t * +br_lookup_bad_obj_dir(xlator_t *this, br_child_t *child, uuid_t gfid) { - int32_t ret = -1; - char *tmp = NULL; - scrub_freq_t frequency = BR_FSSCRUB_FREQ_HOURLY; - struct br_scrubber *fsscrub = NULL; + struct iatt statbuf = { + 0, + }; + inode_table_t *table = NULL; + int32_t ret = -1; + loc_t loc = { + 0, + }; + inode_t *linked_inode = NULL; + int32_t op_errno = 0; + + GF_VALIDATE_OR_GOTO("bit-rot-scrubber", this, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + GF_VALIDATE_OR_GOTO(this->name, child, out); + + table = child->table; + + loc.inode = inode_new(table); + if (!loc.inode) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY, + "failed to allocate a new inode for" + "bad object directory"); + goto out; + } + + gf_uuid_copy(loc.gfid, gfid); + + ret = syncop_lookup(child->xl, &loc, &statbuf, NULL, NULL, NULL); + if (ret < 0) { + op_errno = -ret; + gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_LOOKUP_FAILED, + "failed to lookup the bad " + "objects directory (gfid: %s (%s))", + uuid_utoa(gfid), strerror(op_errno)); + goto out; + } + + linked_inode = inode_link(loc.inode, NULL, NULL, &statbuf); + if (linked_inode) + inode_lookup(linked_inode); + +out: + loc_wipe(&loc); + return linked_inode; +} - fsscrub = &priv->fsscrub; - fsscrub->frequency_reconf = _gf_true; +int32_t +br_read_bad_object_dir(xlator_t *this, br_child_t *child, fd_t *fd, + dict_t *dict) +{ + gf_dirent_t entries; + gf_dirent_t *entry = NULL; + int32_t ret = -1; + off_t offset = 0; + int32_t count = 0; + char key[32] = { + 0, + }; + dict_t *out_dict = NULL; + + INIT_LIST_HEAD(&entries.list); + + while ((ret = syncop_readdir(child->xl, fd, 131072, offset, &entries, NULL, + &out_dict))) { + if (ret < 0) + goto out; + + list_for_each_entry(entry, &entries.list, list) + { + offset = entry->d_off; + + snprintf(key, sizeof(key), "quarantine-%d", count); + + /* + * ignore the dict_set errors for now. The intention is + * to get as many bad objects as possible instead of + * erroring out at the first failure. + */ + ret = dict_set_dynstr_with_alloc(dict, key, entry->d_name); + if (!ret) + count++; + + if (out_dict) { + dict_copy(out_dict, dict); + dict_unref(out_dict); + out_dict = NULL; + } + } + + gf_dirent_free(&entries); + } + + ret = count; + ret = dict_set_int32_sizen(dict, "count", count); + +out: + return ret; +} - ret = br_scrubber_fetch_option (this, "scrub-freq", options, &tmp); +int32_t +br_get_bad_objects_from_child(xlator_t *this, dict_t *dict, br_child_t *child) +{ + inode_t *inode = NULL; + inode_table_t *table = NULL; + fd_t *fd = NULL; + int32_t ret = -1; + loc_t loc = { + 0, + }; + int32_t op_errno = 0; + + GF_VALIDATE_OR_GOTO("bit-rot-scrubber", this, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + GF_VALIDATE_OR_GOTO(this->name, child, out); + GF_VALIDATE_OR_GOTO(this->name, dict, out); + + table = child->table; + + inode = inode_find(table, BR_BAD_OBJ_CONTAINER); + if (!inode) { + inode = br_lookup_bad_obj_dir(this, child, BR_BAD_OBJ_CONTAINER); + if (!inode) + goto out; + } + + fd = fd_create(inode, 0); + if (!fd) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_FD_CREATE_FAILED, + "fd creation for the bad " + "objects directory failed (gfid: %s)", + uuid_utoa(BR_BAD_OBJ_CONTAINER)); + goto out; + } + + loc.inode = inode; + gf_uuid_copy(loc.gfid, inode->gfid); + + ret = syncop_opendir(child->xl, &loc, fd, NULL, NULL); + if (ret < 0) { + op_errno = -ret; + fd_unref(fd); + fd = NULL; + gf_msg(this->name, GF_LOG_ERROR, op_errno, BRB_MSG_FD_CREATE_FAILED, + "failed to open the bad " + "objects directory %s", + uuid_utoa(BR_BAD_OBJ_CONTAINER)); + goto out; + } + + fd_bind(fd); + + ret = br_read_bad_object_dir(this, child, fd, dict); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_BAD_OBJ_READDIR_FAIL, + "readdir of the bad " + "objects directory (%s) failed ", + uuid_utoa(BR_BAD_OBJ_CONTAINER)); + goto out; + } + + ret = 0; + +out: + loc_wipe(&loc); + if (fd) + fd_unref(fd); + return ret; +} + +int32_t +br_collect_bad_objects_of_child(xlator_t *this, br_child_t *child, dict_t *dict, + dict_t *child_dict, int32_t total_count) +{ + int32_t ret = -1; + int32_t count = 0; + char key[32] = { + 0, + }; + char main_key[32] = { + 0, + }; + int32_t j = 0; + int32_t tmp_count = 0; + char *entry = NULL; + char tmp[PATH_MAX] = { + 0, + }; + char *path = NULL; + int32_t len = 0; + + ret = dict_get_int32_sizen(child_dict, "count", &count); + if (ret) + goto out; + + tmp_count = total_count; + + for (j = 0; j < count; j++) { + len = snprintf(key, sizeof(key), "quarantine-%d", j); + ret = dict_get_strn(child_dict, key, len, &entry); if (ret) - goto error_return; - - if (scrubstall) - tmp = BR_SCRUB_STALLED; - - if (strcasecmp (tmp, "hourly") == 0) { - frequency = BR_FSSCRUB_FREQ_HOURLY; - } else if (strcasecmp (tmp, "daily") == 0) { - frequency = BR_FSSCRUB_FREQ_DAILY; - } else if (strcasecmp (tmp, "weekly") == 0) { - frequency = BR_FSSCRUB_FREQ_WEEKLY; - } else if (strcasecmp (tmp, "biweekly") == 0) { - frequency = BR_FSSCRUB_FREQ_BIWEEKLY; - } else if (strcasecmp (tmp, "monthly") == 0) { - frequency = BR_FSSCRUB_FREQ_MONTHLY; - } else if (strcasecmp (tmp, BR_SCRUB_STALLED) == 0) { - frequency = BR_FSSCRUB_FREQ_STALLED; - } else - goto error_return; - - if (fsscrub->frequency == frequency) - fsscrub->frequency_reconf = _gf_false; - else - fsscrub->frequency = frequency; + continue; - return 0; + ret = dict_get_str(child_dict, entry, &path); + len = snprintf(tmp, PATH_MAX, "%s ==> BRICK: %s\n path: %s", entry, + child->brick_path, path); + if ((len < 0) || (len >= PATH_MAX)) { + continue; + } + snprintf(main_key, sizeof(main_key), "quarantine-%d", tmp_count); - error_return: - return -1; + ret = dict_set_dynstr_with_alloc(dict, main_key, tmp); + if (!ret) + tmp_count++; + path = NULL; + } + + ret = tmp_count; + +out: + return ret; } -static void br_scrubber_log_option (xlator_t *this, - br_private_t *priv, gf_boolean_t scrubstall) +int32_t +br_collect_bad_objects_from_children(xlator_t *this, dict_t *dict) { - struct br_scrubber *fsscrub = &priv->fsscrub; - char *scrub_throttle_str[] = { - [BR_SCRUB_THROTTLE_LAZY] = "lazy", - [BR_SCRUB_THROTTLE_NORMAL] = "normal", - [BR_SCRUB_THROTTLE_AGGRESSIVE] = "aggressive", - }; - - char *scrub_freq_str[] = { - [BR_FSSCRUB_FREQ_HOURLY] = "hourly", - [BR_FSSCRUB_FREQ_DAILY] = "daily", - [BR_FSSCRUB_FREQ_WEEKLY] = "weekly", - [BR_FSSCRUB_FREQ_BIWEEKLY] = "biweekly", - [BR_FSSCRUB_FREQ_MONTHLY] = "monthly (30 days)", - }; - - if (scrubstall) - return; /* logged as pause */ - - if (fsscrub->frequency_reconf || fsscrub->throttle_reconf) { - gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_TUNABLE, - "SCRUB TUNABLES:: [Frequency: %s, Throttle: %s]", - scrub_freq_str[fsscrub->frequency], - scrub_throttle_str[fsscrub->throttle]); + int32_t ret = -1; + dict_t *child_dict = NULL; + int32_t i = 0; + int32_t total_count = 0; + br_child_t *child = NULL; + br_private_t *priv = NULL; + dict_t *tmp_dict = NULL; + + priv = this->private; + tmp_dict = dict; + + for (i = 0; i < priv->child_count; i++) { + child = &priv->children[i]; + GF_ASSERT(child); + if (!_br_is_child_connected(child)) + continue; + + child_dict = dict_new(); + if (!child_dict) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY, + "failed to allocate dict"); + continue; } + ret = br_get_bad_objects_from_child(this, child_dict, child); + /* + * Continue asking the remaining children for the list of + * bad objects even though getting the list from one of them + * fails. + */ + if (ret) { + dict_unref(child_dict); + continue; + } + + ret = br_collect_bad_objects_of_child(this, child, tmp_dict, child_dict, + total_count); + if (ret < 0) { + dict_unref(child_dict); + continue; + } + + total_count = ret; + dict_unref(child_dict); + child_dict = NULL; + } + + ret = dict_set_int32(tmp_dict, "total-count", total_count); + + return ret; } int32_t -br_scrubber_handle_options (xlator_t *this, br_private_t *priv, dict_t *options) +br_get_bad_objects_list(xlator_t *this, dict_t **dict) { - int32_t ret = 0; - gf_boolean_t scrubstall = _gf_false; /* not as dangerous as it sounds */ + int32_t ret = -1; + dict_t *tmp_dict = NULL; + + GF_VALIDATE_OR_GOTO("bir-rot-scrubber", this, out); + GF_VALIDATE_OR_GOTO(this->name, dict, out); + + tmp_dict = *dict; + if (!tmp_dict) { + tmp_dict = dict_new(); + if (!tmp_dict) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY, + "failed to allocate dict"); + goto out; + } + *dict = tmp_dict; + } - ret = br_scrubber_handle_stall (this, priv, options, &scrubstall); - if (ret) - goto error_return; + ret = br_collect_bad_objects_from_children(this, tmp_dict); - ret = br_scrubber_handle_throttle (this, priv, options, scrubstall); - if (ret) - goto error_return; +out: + return ret; +} - ret = br_scrubber_handle_freq (this, priv, options, scrubstall); - if (ret) - goto error_return; +static int +wait_for_scrub_to_finish(xlator_t *this) +{ + int ret = -1; + br_private_t *priv = NULL; + struct br_monitor *scrub_monitor = NULL; + + priv = this->private; + scrub_monitor = &priv->scrub_monitor; + + GF_VALIDATE_OR_GOTO("bit-rot", scrub_monitor, out); + GF_VALIDATE_OR_GOTO("bit-rot", this, out); + + gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO, + "Waiting for all children to start and finish scrub"); + + pthread_mutex_lock(&scrub_monitor->donelock); + { + while (!scrub_monitor->done) + pthread_cond_wait(&scrub_monitor->donecond, + &scrub_monitor->donelock); + } + pthread_mutex_unlock(&scrub_monitor->donelock); + ret = 0; +out: + return ret; +} + +/** + * This function is executed in a separate thread. This is scrubber monitor + * thread that takes care of state machine. + */ +void * +br_monitor_thread(void *arg) +{ + int32_t ret = 0; + xlator_t *this = NULL; + br_private_t *priv = NULL; + struct br_monitor *scrub_monitor = NULL; + + this = arg; + priv = this->private; + + /* + * Since, this is the topmost xlator, THIS has to be set by bit-rot + * xlator itself (STACK_WIND won't help in this case). Also it has + * to be done for each thread that gets spawned. Otherwise, a new + * thread will get global_xlator's pointer when it does "THIS". + */ + THIS = this; + + scrub_monitor = &priv->scrub_monitor; + + pthread_mutex_lock(&scrub_monitor->mutex); + { + while (!scrub_monitor->inited) + pthread_cond_wait(&scrub_monitor->cond, &scrub_monitor->mutex); + } + pthread_mutex_unlock(&scrub_monitor->mutex); + + /* this needs to be serialized with reconfigure() */ + pthread_mutex_lock(&priv->lock); + { + ret = br_scrub_state_machine(this, _gf_false); + } + pthread_mutex_unlock(&priv->lock); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, BRB_MSG_SSM_FAILED, + "Scrub state machine failed"); + goto out; + } + + while (1) { + /* Wait for all children to finish scrubbing */ + ret = wait_for_scrub_to_finish(this); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, BRB_MSG_SCRUB_WAIT_FAILED, + "Scrub wait failed"); + goto out; + } - br_scrubber_log_option (this, priv, scrubstall); + /* scrub exit criteria: Move the state to PENDING */ + br_scrubber_exit_control(this); + } - return 0; +out: + return NULL; +} - error_return: - return -1; +static void +br_set_scrub_state(struct br_monitor *scrub_monitor, br_scrub_state_t state) +{ + LOCK(&scrub_monitor->lock); + { + _br_monitor_set_scrub_state(scrub_monitor, state); + } + UNLOCK(&scrub_monitor->lock); } int32_t -br_scrubber_init (xlator_t *this, br_private_t *priv) +br_scrubber_monitor_init(xlator_t *this, br_private_t *priv) { - struct br_scrubber *fsscrub = NULL; + struct br_monitor *scrub_monitor = NULL; + int ret = 0; - priv->tbf = br_tbf_init (NULL, 0); - if (!priv->tbf) - return -1; + scrub_monitor = &priv->scrub_monitor; - fsscrub = &priv->fsscrub; + LOCK_INIT(&scrub_monitor->lock); + scrub_monitor->this = this; - fsscrub->this = this; - fsscrub->throttle = BR_SCRUB_THROTTLE_VOID; + scrub_monitor->inited = _gf_false; + pthread_mutex_init(&scrub_monitor->mutex, NULL); + pthread_cond_init(&scrub_monitor->cond, NULL); - pthread_mutex_init (&fsscrub->mutex, NULL); - pthread_cond_init (&fsscrub->cond, NULL); + scrub_monitor->kick = _gf_false; + scrub_monitor->active_child_count = 0; + pthread_mutex_init(&scrub_monitor->wakelock, NULL); + pthread_cond_init(&scrub_monitor->wakecond, NULL); - fsscrub->nr_scrubbers = 0; - INIT_LIST_HEAD (&fsscrub->scrubbers); - INIT_LIST_HEAD (&fsscrub->scrublist); + scrub_monitor->done = _gf_false; + pthread_mutex_init(&scrub_monitor->donelock, NULL); + pthread_cond_init(&scrub_monitor->donecond, NULL); - return 0; + /* Set the state to INACTIVE */ + br_set_scrub_state(&priv->scrub_monitor, BR_SCRUB_STATE_INACTIVE); + + /* Start the monitor thread */ + ret = gf_thread_create(&scrub_monitor->thread, NULL, br_monitor_thread, + this, "brmon"); + if (ret != 0) { + gf_msg(this->name, GF_LOG_ERROR, -ret, BRB_MSG_SPAWN_FAILED, + "monitor thread creation failed"); + ret = -1; + goto err; + } + + return 0; +err: + pthread_mutex_destroy(&scrub_monitor->mutex); + pthread_cond_destroy(&scrub_monitor->cond); + + pthread_mutex_destroy(&scrub_monitor->wakelock); + pthread_cond_destroy(&scrub_monitor->wakecond); + + pthread_mutex_destroy(&scrub_monitor->donelock); + pthread_cond_destroy(&scrub_monitor->donecond); + + LOCK_DESTROY(&scrub_monitor->lock); + + return ret; +} + +int32_t +br_scrubber_init(xlator_t *this, br_private_t *priv) +{ + struct br_scrubber *fsscrub = NULL; + int ret = 0; + + priv->tbf = tbf_init(NULL, 0); + if (!priv->tbf) + return -1; + + ret = br_scrubber_monitor_init(this, priv); + if (ret) + return -1; + + fsscrub = &priv->fsscrub; + + fsscrub->this = this; + fsscrub->throttle = BR_SCRUB_THROTTLE_VOID; + + pthread_mutex_init(&fsscrub->mutex, NULL); + pthread_cond_init(&fsscrub->cond, NULL); + + fsscrub->nr_scrubbers = 0; + INIT_LIST_HEAD(&fsscrub->scrubbers); + INIT_LIST_HEAD(&fsscrub->scrublist); + + return 0; } diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h index 427153c4bd7..4e5f67bc021 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h +++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h @@ -8,21 +8,39 @@ cases as published by the Free Software Foundation. */ -#ifndef __BIT_ROT__SCRUB_H__ +#ifndef __BIT_ROT_SCRUB_H__ #define __BIT_ROT_SCRUB_H__ -#include "xlator.h" +#include <glusterfs/xlator.h> #include "bit-rot.h" -void *br_fsscanner (void *); +void * +br_fsscanner(void *); -int32_t br_fsscan_schedule (xlator_t *, br_child_t *); -int32_t br_fsscan_reschedule (xlator_t *, br_child_t *); -int32_t br_fsscan_activate (xlator_t *, br_child_t *); -int32_t br_fsscan_deactivate (xlator_t *, br_child_t *); +int32_t +br_fsscan_schedule(xlator_t *); +int32_t +br_fsscan_reschedule(xlator_t *); +int32_t +br_fsscan_activate(xlator_t *); +int32_t +br_fsscan_deactivate(xlator_t *); +int32_t +br_fsscan_ondemand(xlator_t *); -int32_t br_scrubber_handle_options (xlator_t *, br_private_t *, dict_t *); +int32_t +br_scrubber_handle_options(xlator_t *, br_private_t *, dict_t *); -int32_t br_scrubber_init (xlator_t *, br_private_t *); +int32_t +br_scrubber_monitor_init(xlator_t *, br_private_t *); + +int32_t +br_scrubber_init(xlator_t *, br_private_t *); + +int32_t +br_collect_bad_objects_from_children(xlator_t *this, dict_t *dict); + +void +br_child_set_scrub_state(br_child_t *, gf_boolean_t); #endif /* __BIT_ROT_SCRUB_H__ */ diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-ssm.c b/xlators/features/bit-rot/src/bitd/bit-rot-ssm.c index fcffc04feda..753e31a3b23 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot-ssm.c +++ b/xlators/features/bit-rot/src/bitd/bit-rot-ssm.c @@ -12,82 +12,113 @@ #include "bit-rot-scrub.h" #include "bit-rot-bitd-messages.h" -int br_scrub_ssm_noop (xlator_t *this, br_child_t *child) +int +br_scrub_ssm_noop(xlator_t *this) { - return 0; + return 0; } int -br_scrub_ssm_state_pause (xlator_t *this, br_child_t *child) +br_scrub_ssm_state_pause(xlator_t *this) { - gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_GENERIC_SSM_INFO, - "Scrubber paused [Brick: %s]", child->brick_path); - _br_child_set_scrub_state (child, BR_SCRUB_STATE_PAUSED); - return 0; + br_private_t *priv = NULL; + struct br_monitor *scrub_monitor = NULL; + + priv = this->private; + scrub_monitor = &priv->scrub_monitor; + + gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_GENERIC_SSM_INFO, + "Scrubber paused"); + _br_monitor_set_scrub_state(scrub_monitor, BR_SCRUB_STATE_PAUSED); + return 0; } int -br_scrub_ssm_state_ipause (xlator_t *this, br_child_t *child) +br_scrub_ssm_state_ipause(xlator_t *this) { - gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_GENERIC_SSM_INFO, - "Scrubber paused [Brick: %s]", child->brick_path); - _br_child_set_scrub_state (child, BR_SCRUB_STATE_IPAUSED); - return 0; + br_private_t *priv = NULL; + struct br_monitor *scrub_monitor = NULL; + + priv = this->private; + scrub_monitor = &priv->scrub_monitor; + + gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_GENERIC_SSM_INFO, + "Scrubber paused"); + _br_monitor_set_scrub_state(scrub_monitor, BR_SCRUB_STATE_IPAUSED); + return 0; } int -br_scrub_ssm_state_active (xlator_t *this, br_child_t *child) +br_scrub_ssm_state_active(xlator_t *this) { - struct br_scanfs *fsscan = &child->fsscan; + br_private_t *priv = NULL; + struct br_monitor *scrub_monitor = NULL; + + priv = this->private; + scrub_monitor = &priv->scrub_monitor; - if (fsscan->over) { - (void) br_fsscan_activate (this, child); - } else { - gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_GENERIC_SSM_INFO, - "Scrubbing resumed [Brick %s]", child->brick_path); - _br_child_set_scrub_state (child, BR_SCRUB_STATE_ACTIVE); - } + if (scrub_monitor->done) { + (void)br_fsscan_activate(this); + } else { + gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_GENERIC_SSM_INFO, + "Scrubbing resumed"); + _br_monitor_set_scrub_state(scrub_monitor, BR_SCRUB_STATE_ACTIVE); + } - return 0; + return 0; } int -br_scrub_ssm_state_stall (xlator_t *this, br_child_t *child) +br_scrub_ssm_state_stall(xlator_t *this) { - gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_GENERIC_SSM_INFO, - "Brick [%s] is under active scrubbing. Pausing scrub..", - child->brick_path); - _br_child_set_scrub_state (child, BR_SCRUB_STATE_STALLED); - return 0; + br_private_t *priv = NULL; + struct br_monitor *scrub_monitor = NULL; + + priv = this->private; + scrub_monitor = &priv->scrub_monitor; + + gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_GENERIC_SSM_INFO, + "Volume is under active scrubbing. Pausing scrub.."); + _br_monitor_set_scrub_state(scrub_monitor, BR_SCRUB_STATE_STALLED); + return 0; } -static br_scrub_ssm_call * -br_scrub_ssm[BR_SCRUB_MAXSTATES][BR_SCRUB_MAXEVENTS] = { - {br_fsscan_schedule, br_scrub_ssm_state_ipause}, /* INACTIVE */ - {br_fsscan_reschedule, br_fsscan_deactivate}, /* PENDING */ - {br_scrub_ssm_noop, br_scrub_ssm_state_stall}, /* ACTIVE */ - {br_fsscan_activate, br_scrub_ssm_noop}, /* PAUSED */ - {br_fsscan_schedule, br_scrub_ssm_noop}, /* IPAUSED */ - {br_scrub_ssm_state_active, br_scrub_ssm_noop}, /* STALLED */ +static br_scrub_ssm_call *br_scrub_ssm[BR_SCRUB_MAXSTATES][BR_SCRUB_MAXEVENTS] = + { + /* INACTIVE */ + {br_fsscan_schedule, br_scrub_ssm_state_ipause, br_scrub_ssm_noop}, + /* PENDING */ + {br_fsscan_reschedule, br_fsscan_deactivate, br_fsscan_ondemand}, + /* ACTIVE */ + {br_scrub_ssm_noop, br_scrub_ssm_state_stall, br_scrub_ssm_noop}, + /* PAUSED */ + {br_fsscan_activate, br_scrub_ssm_noop, br_scrub_ssm_noop}, + /* IPAUSED */ + {br_fsscan_schedule, br_scrub_ssm_noop, br_scrub_ssm_noop}, + /* STALLED */ + {br_scrub_ssm_state_active, br_scrub_ssm_noop, br_scrub_ssm_noop}, }; int32_t -br_scrub_state_machine (xlator_t *this, br_child_t *child) +br_scrub_state_machine(xlator_t *this, gf_boolean_t scrub_ondemand) { - br_private_t *priv = NULL; - br_scrub_ssm_call *call = NULL; - struct br_scanfs *fsscan = NULL; - struct br_scrubber *fsscrub = NULL; - br_scrub_state_t currstate = 0; - br_scrub_event_t event = 0; - - priv = this->private; - fsscan = &child->fsscan; - fsscrub = &priv->fsscrub; - - currstate = fsscan->state; - event = _br_child_get_scrub_event (fsscrub); - - call = br_scrub_ssm[currstate][event]; - return call (this, child); + br_private_t *priv = NULL; + br_scrub_ssm_call *call = NULL; + struct br_scrubber *fsscrub = NULL; + br_scrub_state_t currstate = 0; + br_scrub_event_t event = 0; + struct br_monitor *scrub_monitor = NULL; + + priv = this->private; + fsscrub = &priv->fsscrub; + scrub_monitor = &priv->scrub_monitor; + + currstate = scrub_monitor->state; + if (scrub_ondemand) + event = BR_SCRUB_EVENT_ONDEMAND; + else + event = _br_child_get_scrub_event(fsscrub); + + call = br_scrub_ssm[currstate][event]; + return call(this); } diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-ssm.h b/xlators/features/bit-rot/src/bitd/bit-rot-ssm.h index 72fd62b3630..37b45a42eac 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot-ssm.h +++ b/xlators/features/bit-rot/src/bitd/bit-rot-ssm.h @@ -11,26 +11,28 @@ #ifndef __BIT_ROT_SSM_H__ #define __BIT_ROT_SSM_H__ -#include "xlator.h" +#include <glusterfs/xlator.h> typedef enum br_scrub_state { - BR_SCRUB_STATE_INACTIVE = 0, - BR_SCRUB_STATE_PENDING, - BR_SCRUB_STATE_ACTIVE, - BR_SCRUB_STATE_PAUSED, - BR_SCRUB_STATE_IPAUSED, - BR_SCRUB_STATE_STALLED, - BR_SCRUB_MAXSTATES, + BR_SCRUB_STATE_INACTIVE = 0, + BR_SCRUB_STATE_PENDING, + BR_SCRUB_STATE_ACTIVE, + BR_SCRUB_STATE_PAUSED, + BR_SCRUB_STATE_IPAUSED, + BR_SCRUB_STATE_STALLED, + BR_SCRUB_MAXSTATES, } br_scrub_state_t; typedef enum br_scrub_event { - BR_SCRUB_EVENT_SCHEDULE = 0, - BR_SCRUB_EVENT_PAUSE, - BR_SCRUB_MAXEVENTS, + BR_SCRUB_EVENT_SCHEDULE = 0, + BR_SCRUB_EVENT_PAUSE, + BR_SCRUB_EVENT_ONDEMAND, + BR_SCRUB_MAXEVENTS, } br_scrub_event_t; -struct br_child; +struct br_monitor; -int32_t br_scrub_state_machine (xlator_t *, struct br_child *); +int32_t +br_scrub_state_machine(xlator_t *, gf_boolean_t); #endif /* __BIT_ROT_SSM_H__ */ diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-tbf.c b/xlators/features/bit-rot/src/bitd/bit-rot-tbf.c deleted file mode 100644 index f8b9b75d575..00000000000 --- a/xlators/features/bit-rot/src/bitd/bit-rot-tbf.c +++ /dev/null @@ -1,306 +0,0 @@ -/* - Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -/** - * - * Basic token bucket implementation for rate limiting. As of now interfaces - * to throttle disk read request, directory entry scan and hash calculation - * are available. To throttle a particular request (operation), the call needs - * to be wrapped in-between throttling APIs, for e.g. - * - * TBF_THROTTLE_BEGIN (...); <-- induces "delays" if required - * { - * call (...); - * } - * TBF_THROTTLE_END (...); <-- not used atm, maybe needed later - * - */ - -#include "mem-pool.h" -#include "bit-rot-tbf.h" -#include "bit-rot-stub-mem-types.h" - -typedef struct br_tbf_throttle { - char done; - - pthread_mutex_t mutex; - pthread_cond_t cond; - - unsigned long tokens; - - struct list_head list; -} br_tbf_throttle_t; - -/** - * OK. Most implementations of TBF I've come across generate tokens - * every second (UML, etc..) and some chose sub-second granularity - * (blk-iothrottle cgroups). TBF algorithm itself does not enforce - * any logic for choosing generation interval and it seems pretty - * logical as one could jack up token count per interval w.r.t. - * generation rate. - * - * Value used here is chosen based on a series of test(s) performed - * to balance object signing time and not maxing out on all available - * CPU cores. It's obvious to have seconds granularity and jack up - * token count per interval, thereby achieving close to similar - * results. Let's stick to this as it seems to be working fine for - * the set of ops that are throttled. - */ -#define BR_TBF_TOKENGEN_INTERVAL_USEC 600000 - -static br_tbf_throttle_t * -br_tbf_init_throttle (unsigned long tokens_required) -{ - br_tbf_throttle_t *throttle = NULL; - - throttle = GF_CALLOC (1, sizeof (*throttle), - gf_br_mt_br_tbf_throttle_t); - if (!throttle) - return NULL; - - throttle->done = 0; - throttle->tokens = tokens_required; - INIT_LIST_HEAD (&throttle->list); - - (void) pthread_mutex_init (&throttle->mutex, NULL); - (void) pthread_cond_init (&throttle->cond, NULL); - - return throttle; -} - -void -_br_tbf_dispatch_queued (br_tbf_bucket_t *bucket) -{ - gf_boolean_t xcont = _gf_false; - br_tbf_throttle_t *tmp = NULL; - br_tbf_throttle_t *throttle = NULL; - - list_for_each_entry_safe (throttle, tmp, &bucket->queued, list) { - - pthread_mutex_lock (&throttle->mutex); - { - if (bucket->tokens < throttle->tokens) { - xcont = _gf_true; - goto unblock; - } - - /* this request can now be serviced */ - throttle->done = 1; - list_del_init (&throttle->list); - - bucket->tokens -= throttle->tokens; - pthread_cond_signal (&throttle->cond); - } - unblock: - pthread_mutex_unlock (&throttle->mutex); - if (xcont) - break; - } -} - -void *br_tbf_tokengenerator (void *arg) -{ - unsigned long tokenrate = 0; - unsigned long maxtokens = 0; - br_tbf_bucket_t *bucket = arg; - - tokenrate = bucket->tokenrate; - maxtokens = bucket->maxtokens; - - while (1) { - usleep (BR_TBF_TOKENGEN_INTERVAL_USEC); - - LOCK (&bucket->lock); - { - bucket->tokens += tokenrate; - if (bucket->tokens > maxtokens) - bucket->tokens = maxtokens; - - if (!list_empty (&bucket->queued)) - _br_tbf_dispatch_queued (bucket); - } - UNLOCK (&bucket->lock); - } - - return NULL; -} - -/** - * There is lazy synchronization between this routine (when invoked - * under br_tbf_mod() context) and br_tbf_throttle(). *bucket is - * updated _after_ all the required variables are initialized. - */ -static int32_t -br_tbf_init_bucket (br_tbf_t *tbf, br_tbf_opspec_t *spec) -{ - int ret = 0; - br_tbf_bucket_t *curr = NULL; - br_tbf_bucket_t **bucket = NULL; - - GF_ASSERT (spec->op >= BR_TBF_OP_MIN); - GF_ASSERT (spec->op <= BR_TBF_OP_MAX); - - /* no rate? no throttling. */ - if (!spec->rate) - return 0; - - bucket = tbf->bucket + spec->op; - - curr = GF_CALLOC (1, sizeof (*curr), gf_br_mt_br_tbf_bucket_t); - if (!curr) - goto error_return; - - LOCK_INIT (&curr->lock); - INIT_LIST_HEAD (&curr->queued); - - curr->tokens = 0; - curr->tokenrate = spec->rate; - curr->maxtokens = spec->maxlimit; - - ret = gf_thread_create (&curr->tokener, - NULL, br_tbf_tokengenerator, curr); - if (ret != 0) - goto freemem; - - *bucket = curr; - return 0; - - freemem: - LOCK_DESTROY (&curr->lock); - GF_FREE (curr); - error_return: - return -1; -} - -#define BR_TBF_ALLOC_SIZE \ - (sizeof (br_tbf_t) + (BR_TBF_OP_MAX * sizeof (br_tbf_bucket_t))) - -br_tbf_t * -br_tbf_init (br_tbf_opspec_t *tbfspec, unsigned int count) -{ - int32_t i = 0; - int32_t ret = 0; - br_tbf_t *tbf = NULL; - br_tbf_opspec_t *opspec = NULL; - - tbf = GF_CALLOC (1, BR_TBF_ALLOC_SIZE, gf_br_mt_br_tbf_t); - if (!tbf) - goto error_return; - - tbf->bucket = (br_tbf_bucket_t **) ((char *)tbf + sizeof (*tbf)); - for (i = 0; i < BR_TBF_OP_MAX; i++) { - *(tbf->bucket + i) = NULL; - } - - for (i = 0; i < count; i++) { - opspec = tbfspec + i; - - ret = br_tbf_init_bucket (tbf, opspec); - if (ret) - break; - } - - if (ret) - goto error_return; - - return tbf; - - error_return: - return NULL; -} - -static void -br_tbf_mod_bucket (br_tbf_bucket_t *bucket, br_tbf_opspec_t *spec) -{ - LOCK (&bucket->lock); - { - bucket->tokens = 0; - bucket->tokenrate = spec->rate; - bucket->maxtokens = spec->maxlimit; - } - UNLOCK (&bucket->lock); - - /* next token tick would unqueue pending operations */ -} - -int -br_tbf_mod (br_tbf_t *tbf, br_tbf_opspec_t *tbfspec) -{ - int ret = 0; - br_tbf_bucket_t *bucket = NULL; - br_tbf_ops_t op = BR_TBF_OP_MIN; - - if (!tbf || !tbfspec) - return -1; - - op = tbfspec->op; - - GF_ASSERT (op >= BR_TBF_OP_MIN); - GF_ASSERT (op <= BR_TBF_OP_MAX); - - bucket = *(tbf->bucket + op); - if (bucket) { - br_tbf_mod_bucket (bucket, tbfspec); - } else { - ret = br_tbf_init_bucket (tbf, tbfspec); - } - - return ret; -} - -void -br_tbf_throttle (br_tbf_t *tbf, br_tbf_ops_t op, unsigned long tokens_requested) -{ - char waitq = 0; - br_tbf_bucket_t *bucket = NULL; - br_tbf_throttle_t *throttle = NULL; - - GF_ASSERT (op >= BR_TBF_OP_MIN); - GF_ASSERT (op <= BR_TBF_OP_MAX); - - bucket = *(tbf->bucket + op); - if (!bucket) - return; - - LOCK (&bucket->lock); - { - /** - * if there are enough tokens in the bucket there is no need - * to throttle the request: therefore, consume the required - * number of tokens and continue. - */ - if (tokens_requested <= bucket->tokens) { - bucket->tokens -= tokens_requested; - } else { - throttle = br_tbf_init_throttle (tokens_requested); - if (!throttle) /* let it slip through for now.. */ - goto unblock; - - waitq = 1; - pthread_mutex_lock (&throttle->mutex); - list_add_tail (&throttle->list, &bucket->queued); - } - } - unblock: - UNLOCK (&bucket->lock); - - if (waitq) { - while (!throttle->done) { - pthread_cond_wait (&throttle->cond, &throttle->mutex); - } - - pthread_mutex_unlock (&throttle->mutex); - - pthread_mutex_destroy (&throttle->mutex); - pthread_cond_destroy (&throttle->cond); - - GF_FREE (throttle); - } -} diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-tbf.h b/xlators/features/bit-rot/src/bitd/bit-rot-tbf.h deleted file mode 100644 index 5a41be4fd95..00000000000 --- a/xlators/features/bit-rot/src/bitd/bit-rot-tbf.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#include "list.h" -#include "xlator.h" -#include "locking.h" - -#ifndef __BIT_ROT_TBF_H__ -#define __BIT_ROT_TBF_H__ - -typedef enum br_tbf_ops { - BR_TBF_OP_MIN = -1, - BR_TBF_OP_HASH = 0, /* checksum calculation */ - BR_TBF_OP_READ = 1, /* inode read(s) */ - BR_TBF_OP_READDIR = 2, /* dentry read(s) */ - BR_TBF_OP_MAX = 3, -} br_tbf_ops_t; - -/** - * Operation rate specification - */ -typedef struct br_tbf_opspec { - br_tbf_ops_t op; - - unsigned long rate; - - unsigned long maxlimit; -} br_tbf_opspec_t; - -/** - * Token bucket for each operation type - */ -typedef struct br_tbf_bucket { - gf_lock_t lock; - - pthread_t tokener; /* token generator thread */ - - unsigned long tokenrate; /* token generation rate */ - - unsigned long tokens; /* number of current tokens */ - - unsigned long maxtokens; /* maximum token in the bucket */ - - struct list_head queued; /* list of non-conformant requests */ -} br_tbf_bucket_t; - -typedef struct br_tbf { - br_tbf_bucket_t **bucket; -} br_tbf_t; - -br_tbf_t * -br_tbf_init (br_tbf_opspec_t *, unsigned int); - -int -br_tbf_mod (br_tbf_t *, br_tbf_opspec_t *); - -void -br_tbf_throttle (br_tbf_t *, br_tbf_ops_t, unsigned long); - -#define TBF_THROTTLE_BEGIN(tbf, op, tokens) (br_tbf_throttle (tbf, op, tokens)) -#define TBF_THROTTLE_END(tbf, op, tokens) (void) - -#endif /** __BIT_ROT_TBF_H__ */ diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c index d6ae5e2fdd2..a2f1c343a1d 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot.c +++ b/xlators/features/bit-rot/src/bitd/bit-rot.c @@ -9,86 +9,81 @@ */ #include <ctype.h> -#include <sys/uio.h> -#include "glusterfs.h" -#include "xlator.h" -#include "logging.h" -#include "compat-errno.h" +#include <glusterfs/logging.h> +#include <glusterfs/compat-errno.h> #include "bit-rot.h" #include "bit-rot-scrub.h" #include <pthread.h> #include "bit-rot-bitd-messages.h" -#include "tw.h" +#define BR_HASH_CALC_READ_SIZE (128 * 1024) -#define BR_HASH_CALC_READ_SIZE (128 * 1024) - -typedef int32_t (br_child_handler)(xlator_t *, br_child_t *); +typedef int32_t(br_child_handler)(xlator_t *, br_child_t *); struct br_child_event { - xlator_t *this; + xlator_t *this; - br_child_t *child; + br_child_t *child; - br_child_handler *call; + br_child_handler *call; - struct list_head list; + struct list_head list; }; static int -br_find_child_index (xlator_t *this, xlator_t *child) +br_find_child_index(xlator_t *this, xlator_t *child) { - br_private_t *priv = NULL; - int i = -1; - int index = -1; + br_private_t *priv = NULL; + int i = -1; + int index = -1; - GF_VALIDATE_OR_GOTO ("bit-rot", this, out); - GF_VALIDATE_OR_GOTO (this->name, this->private, out); - GF_VALIDATE_OR_GOTO (this->name, child, out); + GF_VALIDATE_OR_GOTO("bit-rot", this, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + GF_VALIDATE_OR_GOTO(this->name, child, out); - priv = this->private; + priv = this->private; - for (i = 0; i < priv->child_count; i++) { - if (child == priv->children[i].xl) { - index = i; - break; - } + for (i = 0; i < priv->child_count; i++) { + if (child == priv->children[i].xl) { + index = i; + break; } + } out: - return index; + return index; } br_child_t * -br_get_child_from_brick_path (xlator_t *this, char *brick_path) +br_get_child_from_brick_path(xlator_t *this, char *brick_path) { - br_private_t *priv = NULL; - br_child_t *child = NULL; - br_child_t *tmp = NULL; - int i = 0; + br_private_t *priv = NULL; + br_child_t *child = NULL; + br_child_t *tmp = NULL; + int i = 0; - GF_VALIDATE_OR_GOTO ("bit-rot", this, out); - GF_VALIDATE_OR_GOTO (this->name, this->private, out); - GF_VALIDATE_OR_GOTO (this->name, brick_path, out); + GF_VALIDATE_OR_GOTO("bit-rot", this, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + GF_VALIDATE_OR_GOTO(this->name, brick_path, out); - priv = this->private; + priv = this->private; - pthread_mutex_lock (&priv->lock); - { - for (i = 0; i < priv->child_count; i++) { - tmp = &priv->children[i]; - if (!strcmp (tmp->brick_path, brick_path)) { - child = tmp; - break; - } - } + pthread_mutex_lock(&priv->lock); + { + for (i = 0; i < priv->child_count; i++) { + tmp = &priv->children[i]; + if (!strcmp(tmp->brick_path, brick_path)) { + child = tmp; + break; + } } - pthread_mutex_unlock (&priv->lock); + } + pthread_mutex_unlock(&priv->lock); out: - return child; + return child; } /** @@ -96,18 +91,18 @@ out: * needed -- later. */ void * -br_brick_init (void *xl, struct gf_brick_spec *brick) +br_brick_init(void *xl, struct gf_brick_spec *brick) { - return brick; + return brick; } /** * and cleanup things here when allocated br_brick_init(). */ void -br_brick_fini (void *xl, char *brick, void *data) +br_brick_fini(void *xl, char *brick, void *data) { - return; + return; } /** @@ -119,109 +114,109 @@ br_brick_fini (void *xl, char *brick, void *data) * change stub to handle this change. */ static br_isignature_t * -br_prepare_signature (const unsigned char *sign, - unsigned long hashlen, - int8_t hashtype, br_object_t *object) +br_prepare_signature(const unsigned char *sign, unsigned long hashlen, + int8_t hashtype, br_object_t *object) { - br_isignature_t *signature = NULL; + br_isignature_t *signature = NULL; - /* TODO: use mem-pool */ - signature = GF_CALLOC (1, signature_size (hashlen + 1), - gf_br_stub_mt_signature_t); - if (!signature) - return NULL; + /* TODO: use mem-pool */ + signature = GF_CALLOC(1, signature_size(hashlen + 1), + gf_br_stub_mt_signature_t); + if (!signature) + return NULL; - /* object version */ - signature->signedversion = object->signedversion; + /* object version */ + signature->signedversion = object->signedversion; - /* signature length & type */ - signature->signaturelen = hashlen; - signature->signaturetype = hashtype; + /* signature length & type */ + signature->signaturelen = hashlen; + signature->signaturetype = hashtype; - /* signature itself */ - memcpy (signature->signature, (char *)sign, hashlen); - signature->signature[hashlen+1] = '\0'; + /* signature itself */ + memcpy(signature->signature, (char *)sign, hashlen); + signature->signature[hashlen + 1] = '\0'; - return signature; + return signature; } gf_boolean_t -bitd_is_bad_file (xlator_t *this, br_child_t *child, loc_t *loc, fd_t *fd) +bitd_is_bad_file(xlator_t *this, br_child_t *child, loc_t *loc, fd_t *fd) { - int32_t ret = -1; - dict_t *xattr = NULL; - inode_t *inode = NULL; - gf_boolean_t bad_file = _gf_false; + int32_t ret = -1; + dict_t *xattr = NULL; + inode_t *inode = NULL; + gf_boolean_t bad_file = _gf_false; - GF_VALIDATE_OR_GOTO ("bit-rot", this, out); + GF_VALIDATE_OR_GOTO("bit-rot", this, out); - inode = (loc) ? loc->inode : fd->inode; + inode = (loc) ? loc->inode : fd->inode; - if (fd) - ret = syncop_fgetxattr (child->xl, fd, &xattr, - BITROT_OBJECT_BAD_KEY, NULL, NULL); - else if (loc) - ret = syncop_getxattr (child->xl, loc, - &xattr, BITROT_OBJECT_BAD_KEY, NULL, - NULL); + if (fd) + ret = syncop_fgetxattr(child->xl, fd, &xattr, BITROT_OBJECT_BAD_KEY, + NULL, NULL); + else if (loc) + ret = syncop_getxattr(child->xl, loc, &xattr, BITROT_OBJECT_BAD_KEY, + NULL, NULL); - if (!ret) { - gf_msg_debug (this->name, 0, "[GFID: %s] is marked corrupted", - uuid_utoa (inode->gfid)); - bad_file = _gf_true; - } + if (!ret) { + gf_msg_debug(this->name, 0, "[GFID: %s] is marked corrupted", + uuid_utoa(inode->gfid)); + bad_file = _gf_true; + } - if (xattr) - dict_unref (xattr); + if (xattr) + dict_unref(xattr); out: - return bad_file; + return bad_file; } /** * Do a lookup on the gfid present within the object. */ static int32_t -br_object_lookup (xlator_t *this, br_object_t *object, - struct iatt *iatt, inode_t **linked_inode) +br_object_lookup(xlator_t *this, br_object_t *object, struct iatt *iatt, + inode_t **linked_inode) { - int ret = -EINVAL; - loc_t loc = {0, }; - inode_t *inode = NULL; - - GF_VALIDATE_OR_GOTO ("bit-rot", this, out); - GF_VALIDATE_OR_GOTO (this->name, object, out); - - inode = inode_find (object->child->table, object->gfid); - - if (inode) - loc.inode = inode; - else - loc.inode = inode_new (object->child->table); - - if (!loc.inode) { - ret = -ENOMEM; - goto out; - } - - gf_uuid_copy (loc.gfid, object->gfid); - - ret = syncop_lookup (object->child->xl, &loc, iatt, NULL, NULL, NULL); - if (ret < 0) - goto out; - - /* - * The file might have been deleted by the application - * after getting the event, but before doing a lookup. - * So use linked_inode after inode_link is done. - */ - *linked_inode = inode_link (loc.inode, NULL, NULL, iatt); - if (*linked_inode) - inode_lookup (*linked_inode); + int ret = -EINVAL; + loc_t loc = { + 0, + }; + inode_t *inode = NULL; + + GF_VALIDATE_OR_GOTO("bit-rot", this, out); + GF_VALIDATE_OR_GOTO(this->name, object, out); + + inode = inode_find(object->child->table, object->gfid); + + if (inode) + loc.inode = inode; + else + loc.inode = inode_new(object->child->table); + + if (!loc.inode) { + ret = -ENOMEM; + goto out; + } + + gf_uuid_copy(loc.gfid, object->gfid); + + ret = syncop_lookup(object->child->xl, &loc, iatt, NULL, NULL, NULL); + if (ret < 0) + goto out; + + /* + * The file might have been deleted by the application + * after getting the event, but before doing a lookup. + * So use linked_inode after inode_link is done. + */ + *linked_inode = inode_link(loc.inode, NULL, NULL, iatt); + if (*linked_inode) + inode_lookup(*linked_inode); out: - loc_wipe (&loc); - return ret; + loc_wipe(&loc); + return ret; } /** @@ -230,43 +225,44 @@ out: * passing xdata -- may be use frame->root->pid itself. */ static int32_t -br_object_open (xlator_t *this, - br_object_t *object, inode_t *inode, fd_t **openfd) +br_object_open(xlator_t *this, br_object_t *object, inode_t *inode, + fd_t **openfd) { - int32_t ret = -1; - fd_t *fd = NULL; - loc_t loc = {0, }; - - GF_VALIDATE_OR_GOTO ("bit-rot", this, out); - GF_VALIDATE_OR_GOTO (this->name, object, out); - GF_VALIDATE_OR_GOTO (this->name, inode, out); - - ret = -EINVAL; - fd = fd_create (inode, 0); - if (!fd) { - gf_msg (this->name, GF_LOG_ERROR, 0, BRB_MSG_FD_CREATE_FAILED, - "failed to create fd for the inode %s", - uuid_utoa (inode->gfid)); - goto out; - } - - loc.inode = inode_ref (inode); - gf_uuid_copy (loc.gfid, inode->gfid); - - ret = syncop_open (object->child->xl, &loc, O_RDONLY, fd, NULL, NULL); - if (ret) { - br_log_object (this, "open", inode->gfid, -ret); - fd_unref (fd); - fd = NULL; - } else { - fd_bind (fd); - *openfd = fd; - } - - loc_wipe (&loc); + int32_t ret = -1; + fd_t *fd = NULL; + loc_t loc = { + 0, + }; + + GF_VALIDATE_OR_GOTO("bit-rot", this, out); + GF_VALIDATE_OR_GOTO(this->name, object, out); + GF_VALIDATE_OR_GOTO(this->name, inode, out); + + ret = -EINVAL; + fd = fd_create(inode, 0); + if (!fd) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_FD_CREATE_FAILED, + "gfid=%s", uuid_utoa(inode->gfid), NULL); + goto out; + } + + loc.inode = inode_ref(inode); + gf_uuid_copy(loc.gfid, inode->gfid); + + ret = syncop_open(object->child->xl, &loc, O_RDONLY, fd, NULL, NULL); + if (ret) { + br_log_object(this, "open", inode->gfid, -ret); + fd_unref(fd); + fd = NULL; + } else { + fd_bind(fd); + *openfd = fd; + } + + loc_wipe(&loc); out: - return ret; + return ret; } /** @@ -274,287 +270,282 @@ out: * and return the buffer. */ static int32_t -br_object_read_block_and_sign (xlator_t *this, fd_t *fd, br_child_t *child, - off_t offset, size_t size, SHA256_CTX *sha256) +br_object_read_block_and_sign(xlator_t *this, fd_t *fd, br_child_t *child, + off_t offset, size_t size, SHA256_CTX *sha256) { - int32_t ret = -1; - br_tbf_t *tbf = NULL; - struct iovec *iovec = NULL; - struct iobref *iobref = NULL; - br_private_t *priv = NULL; - int count = 0; - int i = 0; - - GF_VALIDATE_OR_GOTO ("bit-rot", this, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - GF_VALIDATE_OR_GOTO (this->name, fd->inode, out); - GF_VALIDATE_OR_GOTO (this->name, child, out); - GF_VALIDATE_OR_GOTO (this->name, this->private, out); - - priv = this->private; - - GF_VALIDATE_OR_GOTO (this->name, priv->tbf, out); - tbf = priv->tbf; - - ret = syncop_readv (child->xl, fd, - size, offset, 0, &iovec, &count, &iobref, NULL, - NULL); - - if (ret < 0) { - gf_msg (this->name, GF_LOG_ERROR, errno, BRB_MSG_READV_FAILED, - "readv on %s failed", uuid_utoa (fd->inode->gfid)); - ret = -1; - goto out; - } + int32_t ret = -1; + tbf_t *tbf = NULL; + struct iovec *iovec = NULL; + struct iobref *iobref = NULL; + br_private_t *priv = NULL; + int count = 0; + int i = 0; + + GF_VALIDATE_OR_GOTO("bit-rot", this, out); + GF_VALIDATE_OR_GOTO(this->name, fd, out); + GF_VALIDATE_OR_GOTO(this->name, fd->inode, out); + GF_VALIDATE_OR_GOTO(this->name, child, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + + priv = this->private; + + GF_VALIDATE_OR_GOTO(this->name, priv->tbf, out); + tbf = priv->tbf; + + ret = syncop_readv(child->xl, fd, size, offset, 0, &iovec, &count, &iobref, + NULL, NULL, NULL); + + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, BRB_MSG_READV_FAILED, + "gfid=%s", uuid_utoa(fd->inode->gfid), NULL); + ret = -1; + goto out; + } - if (ret == 0) - goto out; + if (ret == 0) + goto out; - for (i = 0; i < count; i++) { - TBF_THROTTLE_BEGIN (tbf, BR_TBF_OP_HASH, iovec[i].iov_len); - { - SHA256_Update (sha256, (const unsigned char *) - (iovec[i].iov_base), iovec[i].iov_len); - } - TBF_THROTTLE_BEGIN (tbf, BR_TBF_OP_HASH, iovec[i].iov_len); + for (i = 0; i < count; i++) { + TBF_THROTTLE_BEGIN(tbf, TBF_OP_HASH, iovec[i].iov_len); + { + SHA256_Update(sha256, (const unsigned char *)(iovec[i].iov_base), + iovec[i].iov_len); } + TBF_THROTTLE_BEGIN(tbf, TBF_OP_HASH, iovec[i].iov_len); + } - out: - if (iovec) - GF_FREE (iovec); +out: + if (iovec) + GF_FREE(iovec); - if (iobref) - iobref_unref (iobref); + if (iobref) + iobref_unref(iobref); - return ret; + return ret; } int32_t -br_calculate_obj_checksum (unsigned char *md, - br_child_t *child, fd_t *fd, struct iatt *iatt) +br_calculate_obj_checksum(unsigned char *md, br_child_t *child, fd_t *fd, + struct iatt *iatt) { - int32_t ret = -1; - off_t offset = 0; - size_t block = BR_HASH_CALC_READ_SIZE; - xlator_t *this = NULL; + int32_t ret = -1; + off_t offset = 0; + size_t block = BR_HASH_CALC_READ_SIZE; + xlator_t *this = NULL; - SHA256_CTX sha256; + SHA256_CTX sha256; - GF_VALIDATE_OR_GOTO ("bit-rot", child, out); - GF_VALIDATE_OR_GOTO ("bit-rot", iatt, out); - GF_VALIDATE_OR_GOTO ("bit-rot", fd, out); + GF_VALIDATE_OR_GOTO("bit-rot", child, out); + GF_VALIDATE_OR_GOTO("bit-rot", iatt, out); + GF_VALIDATE_OR_GOTO("bit-rot", fd, out); - this = child->this; + this = child->this; - SHA256_Init (&sha256); + SHA256_Init(&sha256); - while (1) { - ret = br_object_read_block_and_sign (this, fd, child, - offset, block, &sha256); - if (ret < 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - BRB_MSG_BLOCK_READ_FAILED, "reading block with " - "offset %lu of object %s failed", offset, - uuid_utoa (fd->inode->gfid)); - break; - } - - if (ret == 0) - break; - - offset += ret; + while (1) { + ret = br_object_read_block_and_sign(this, fd, child, offset, block, + &sha256); + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_BLOCK_READ_FAILED, + "offset=%" PRIu64, offset, "object-gfid=%s", + uuid_utoa(fd->inode->gfid), NULL); + break; } if (ret == 0) - SHA256_Final (md, &sha256); + break; - out: - return ret; + offset += ret; + } + + if (ret == 0) + SHA256_Final(md, &sha256); + +out: + return ret; } static int32_t -br_object_checksum (unsigned char *md, - br_object_t *object, fd_t *fd, struct iatt *iatt) +br_object_checksum(unsigned char *md, br_object_t *object, fd_t *fd, + struct iatt *iatt) { - return br_calculate_obj_checksum (md, object->child, fd, iatt); + return br_calculate_obj_checksum(md, object->child, fd, iatt); } static int32_t -br_object_read_sign (inode_t *linked_inode, fd_t *fd, br_object_t *object, - struct iatt *iatt) +br_object_read_sign(inode_t *linked_inode, fd_t *fd, br_object_t *object, + struct iatt *iatt) { - int32_t ret = -1; - xlator_t *this = NULL; - dict_t *xattr = NULL; - unsigned char *md = NULL; - br_isignature_t *sign = NULL; - - GF_VALIDATE_OR_GOTO ("bit-rot", object, out); - GF_VALIDATE_OR_GOTO ("bit-rot", linked_inode, out); - GF_VALIDATE_OR_GOTO ("bit-rot", fd, out); - - this = object->this; - - md = GF_CALLOC (SHA256_DIGEST_LENGTH, sizeof (*md), gf_common_mt_char); - if (!md) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY, - "failed to allocate memory for saving hash of the " - "object %s", uuid_utoa (fd->inode->gfid)); - goto out; - } - - ret = br_object_checksum (md, object, fd, iatt); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - BRB_MSG_CALC_CHECKSUM_FAILED, "calculating checksum " - "for the object %s failed", - uuid_utoa (linked_inode->gfid)); - goto free_signature; - } - - sign = br_prepare_signature (md, SHA256_DIGEST_LENGTH, - BR_SIGNATURE_TYPE_SHA256, object); - if (!sign) { - gf_msg (this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_SIGN_FAILED, - "failed to get the signature for the object %s", - uuid_utoa (fd->inode->gfid)); - goto free_signature; - } - - xattr = dict_for_key_value - (GLUSTERFS_SET_OBJECT_SIGNATURE, - (void *)sign, signature_size (SHA256_DIGEST_LENGTH)); - - if (!xattr) { - gf_msg (this->name, GF_LOG_ERROR, 0, BRB_MSG_SET_SIGN_FAILED, - "dict allocation for signing failed for the object %s", - uuid_utoa (fd->inode->gfid)); - goto free_isign; - } - - ret = syncop_fsetxattr (object->child->xl, fd, xattr, 0, NULL, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, BRB_MSG_SET_SIGN_FAILED, - "fsetxattr of signature to the object %s failed", - uuid_utoa (fd->inode->gfid)); - goto unref_dict; - } - - ret = 0; - - unref_dict: - dict_unref (xattr); - free_isign: - GF_FREE (sign); - free_signature: - GF_FREE (md); - out: - return ret; + int32_t ret = -1; + xlator_t *this = NULL; + dict_t *xattr = NULL; + unsigned char *md = NULL; + br_isignature_t *sign = NULL; + + GF_VALIDATE_OR_GOTO("bit-rot", object, out); + GF_VALIDATE_OR_GOTO("bit-rot", linked_inode, out); + GF_VALIDATE_OR_GOTO("bit-rot", fd, out); + + this = object->this; + + md = GF_MALLOC(SHA256_DIGEST_LENGTH, gf_common_mt_char); + if (!md) { + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_SAVING_HASH_FAILED, + "object-gfid=%s", uuid_utoa(fd->inode->gfid), NULL); + goto out; + } + + ret = br_object_checksum(md, object, fd, iatt); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_CALC_CHECKSUM_FAILED, + "object-gfid=%s", uuid_utoa(linked_inode->gfid), NULL); + goto free_signature; + } + + sign = br_prepare_signature(md, SHA256_DIGEST_LENGTH, + BR_SIGNATURE_TYPE_SHA256, object); + if (!sign) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_SIGN_FAILED, + "object-gfid=%s", uuid_utoa(fd->inode->gfid), NULL); + goto free_signature; + } + + xattr = dict_for_key_value(GLUSTERFS_SET_OBJECT_SIGNATURE, (void *)sign, + signature_size(SHA256_DIGEST_LENGTH), _gf_true); + + if (!xattr) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SET_SIGN_FAILED, + "dict-allocation object-gfid=%s", uuid_utoa(fd->inode->gfid), + NULL); + goto free_isign; + } + + ret = syncop_fsetxattr(object->child->xl, fd, xattr, 0, NULL, NULL); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SET_SIGN_FAILED, + "fsetxattr object-gfid=%s", uuid_utoa(fd->inode->gfid), NULL); + goto unref_dict; + } + + ret = 0; + +unref_dict: + dict_unref(xattr); +free_isign: + GF_FREE(sign); +free_signature: + GF_FREE(md); +out: + return ret; } -static int br_object_sign_softerror (int32_t op_errno) +static int +br_object_sign_softerror(int32_t op_errno) { - return ((op_errno == ENOENT) || (op_errno == ESTALE) - || (op_errno == ENODATA)); + return ((op_errno == ENOENT) || (op_errno == ESTALE) || + (op_errno == ENODATA)); } void -br_log_object (xlator_t *this, char *op, uuid_t gfid, int32_t op_errno) +br_log_object(xlator_t *this, char *op, uuid_t gfid, int32_t op_errno) { - int softerror = br_object_sign_softerror (op_errno); - if (softerror) { - gf_msg_debug (this->name, 0, "%s() failed on object %s " - "[reason: %s]", op, uuid_utoa (gfid), - strerror (op_errno)); - } else { - gf_msg (this->name, GF_LOG_ERROR, op_errno, BRB_MSG_OP_FAILED, - "%s() failed on object %s", op, uuid_utoa (gfid)); - } + int softerror = br_object_sign_softerror(op_errno); + if (softerror) { + gf_msg_debug(this->name, 0, + "%s() failed on object %s " + "[reason: %s]", + op, uuid_utoa(gfid), strerror(op_errno)); + } else { + gf_smsg(this->name, GF_LOG_ERROR, op_errno, BRB_MSG_OP_FAILED, "op=%s", + op, "gfid=%s", uuid_utoa(gfid), NULL); + } } void -br_log_object_path (xlator_t *this, char *op, - const char *path, int32_t op_errno) +br_log_object_path(xlator_t *this, char *op, const char *path, int32_t op_errno) { - int softerror = br_object_sign_softerror (op_errno); - if (softerror) { - gf_msg_debug (this->name, 0, "%s() failed on object %s " - "[reason: %s]", op, path, strerror (op_errno)); - } else { - gf_msg (this->name, GF_LOG_ERROR, op_errno, BRB_MSG_OP_FAILED, - "%s() failed on object %s", op, path); - } + int softerror = br_object_sign_softerror(op_errno); + if (softerror) { + gf_msg_debug(this->name, 0, + "%s() failed on object %s " + "[reason: %s]", + op, path, strerror(op_errno)); + } else { + gf_smsg(this->name, GF_LOG_ERROR, op_errno, BRB_MSG_OP_FAILED, "op=%s", + op, "path=%s", path, NULL); + } } static void -br_trigger_sign (xlator_t *this, br_child_t *child, - inode_t *linked_inode, loc_t *loc, gf_boolean_t need_reopen) +br_trigger_sign(xlator_t *this, br_child_t *child, inode_t *linked_inode, + loc_t *loc, gf_boolean_t need_reopen) { - fd_t *fd = NULL; - int32_t ret = -1; - uint32_t val = 0; - dict_t *dict = NULL; - pid_t pid = GF_CLIENT_PID_BITD; - - syncopctx_setfspid (&pid); - - val = (need_reopen == _gf_true) ? BR_OBJECT_REOPEN : BR_OBJECT_RESIGN; - - dict = dict_new (); - if (!dict) - goto out; - - ret = dict_set_uint32 (dict, BR_REOPEN_SIGN_HINT_KEY, val); - if (ret) - goto cleanup_dict; - - ret = -1; - fd = fd_create (linked_inode, 0); - if (!fd) { - gf_msg (this->name, GF_LOG_ERROR, 0, BRB_MSG_FD_CREATE_FAILED, - "Failed to create fd [GFID %s]", - uuid_utoa (linked_inode->gfid)); - goto cleanup_dict; - } - - ret = syncop_open (child->xl, loc, O_RDWR, fd, NULL, NULL); - if (ret) { - br_log_object (this, "open", linked_inode->gfid, -ret); - goto unref_fd; - } - - fd_bind (fd); - - ret = syncop_fsetxattr (child->xl, fd, dict, 0, NULL, NULL); - if (ret) - br_log_object (this, "fsetxattr", linked_inode->gfid, -ret); - - /* passthough: fd_unref() */ - - unref_fd: - fd_unref (fd); - cleanup_dict: - dict_unref (dict); - out: - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, 0, BRB_MSG_TRIGGER_SIGN, - "Could not trigger signingd for %s (reopen hint: %d)", - uuid_utoa (linked_inode->gfid), val); - } + fd_t *fd = NULL; + int32_t ret = -1; + uint32_t val = 0; + dict_t *dict = NULL; + pid_t pid = GF_CLIENT_PID_BITD; + + syncopctx_setfspid(&pid); + + val = (need_reopen == _gf_true) ? BR_OBJECT_REOPEN : BR_OBJECT_RESIGN; + + dict = dict_new(); + if (!dict) + goto out; + + ret = dict_set_uint32(dict, BR_REOPEN_SIGN_HINT_KEY, val); + if (ret) + goto cleanup_dict; + + ret = -1; + fd = fd_create(linked_inode, 0); + if (!fd) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_FD_CREATE_FAILED, + "gfid=%s", uuid_utoa(linked_inode->gfid), NULL); + goto cleanup_dict; + } + + ret = syncop_open(child->xl, loc, O_RDWR, fd, NULL, NULL); + if (ret) { + br_log_object(this, "open", linked_inode->gfid, -ret); + goto unref_fd; + } + + fd_bind(fd); + + ret = syncop_fsetxattr(child->xl, fd, dict, 0, NULL, NULL); + if (ret) + br_log_object(this, "fsetxattr", linked_inode->gfid, -ret); + + /* passthough: fd_unref() */ + +unref_fd: + fd_unref(fd); +cleanup_dict: + dict_unref(dict); +out: + if (ret) { + gf_smsg(this->name, GF_LOG_WARNING, 0, BRB_MSG_TRIGGER_SIGN_FAILED, + "gfid=%s", uuid_utoa(linked_inode->gfid), "reopen-hint-val=%d", + val, NULL); + } } static void -br_object_resign (xlator_t *this, - br_object_t *object, inode_t *linked_inode) +br_object_resign(xlator_t *this, br_object_t *object, inode_t *linked_inode) { - loc_t loc = {0, }; + loc_t loc = { + 0, + }; - loc.inode = inode_ref (linked_inode); - gf_uuid_copy (loc.gfid, linked_inode->gfid); + loc.inode = inode_ref(linked_inode); + gf_uuid_copy(loc.gfid, linked_inode->gfid); - br_trigger_sign (this, object->child, linked_inode, &loc, _gf_false); + br_trigger_sign(this, object->child, linked_inode, &loc, _gf_false); - loc_wipe (&loc); + loc_wipe(&loc); } /** @@ -562,122 +553,123 @@ br_object_resign (xlator_t *this, * some form of priority scheduling and/or read burstness to avoid starving * (or kicking) client I/O's. */ -static int32_t br_sign_object (br_object_t *object) +static int32_t +br_sign_object(br_object_t *object) { - int32_t ret = -1; - inode_t *linked_inode = NULL; - xlator_t *this = NULL; - fd_t *fd = NULL; - struct iatt iatt = {0, }; - pid_t pid = GF_CLIENT_PID_BITD; - br_sign_state_t sign_info = BR_SIGN_NORMAL; - - GF_VALIDATE_OR_GOTO ("bit-rot", object, out); - - this = object->this; - - /** - * FIXME: This is required as signing an object is restricted to - * clients with special frame->root->pid. Change the way client - * pid is set. - */ - syncopctx_setfspid (&pid); - - ret = br_object_lookup (this, object, &iatt, &linked_inode); - if (ret) { - br_log_object (this, "lookup", object->gfid, -ret); - goto out; - } - - /** - * For fd's that have notified for reopening, we send an explicit - * open() followed by a dummy write() call. This triggers the - * actual signing of the object. - */ - sign_info = ntohl (object->sign_info); - if (sign_info == BR_SIGN_REOPEN_WAIT) { - br_object_resign (this, object, linked_inode); - goto unref_inode; - } - - ret = br_object_open (this, object, linked_inode, &fd); - if (!fd) { - br_log_object (this, "open", object->gfid, -ret); - goto unref_inode; - } - - /** - * we have an open file descriptor on the object. from here on, - * do not be generous to file operation errors. - */ - gf_msg_debug (this->name, 0, "Signing object [%s]", - uuid_utoa (linked_inode->gfid)); - - ret = br_object_read_sign (linked_inode, fd, object, &iatt); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - BRB_MSG_READ_AND_SIGN_FAILED, "reading and signing of " - "the object %s failed", uuid_utoa (linked_inode->gfid)); - goto unref_fd; - } - - ret = 0; - - unref_fd: - fd_unref (fd); - unref_inode: - inode_unref (linked_inode); - out: - return ret; + int32_t ret = -1; + inode_t *linked_inode = NULL; + xlator_t *this = NULL; + fd_t *fd = NULL; + struct iatt iatt = { + 0, + }; + pid_t pid = GF_CLIENT_PID_BITD; + br_sign_state_t sign_info = BR_SIGN_NORMAL; + + GF_VALIDATE_OR_GOTO("bit-rot", object, out); + + this = object->this; + + /** + * FIXME: This is required as signing an object is restricted to + * clients with special frame->root->pid. Change the way client + * pid is set. + */ + syncopctx_setfspid(&pid); + + ret = br_object_lookup(this, object, &iatt, &linked_inode); + if (ret) { + br_log_object(this, "lookup", object->gfid, -ret); + goto out; + } + + /** + * For fd's that have notified for reopening, we send an explicit + * open() followed by a dummy write() call. This triggers the + * actual signing of the object. + */ + sign_info = ntohl(object->sign_info); + if (sign_info == BR_SIGN_REOPEN_WAIT) { + br_object_resign(this, object, linked_inode); + goto unref_inode; + } + + ret = br_object_open(this, object, linked_inode, &fd); + if (!fd) { + br_log_object(this, "open", object->gfid, -ret); + goto unref_inode; + } + + /** + * we have an open file descriptor on the object. from here on, + * do not be generous to file operation errors. + */ + gf_msg_debug(this->name, 0, "Signing object [%s]", + uuid_utoa(linked_inode->gfid)); + + ret = br_object_read_sign(linked_inode, fd, object, &iatt); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_READ_AND_SIGN_FAILED, + "gfid=%s", uuid_utoa(linked_inode->gfid), NULL); + goto unref_fd; + } + + ret = 0; + +unref_fd: + fd_unref(fd); +unref_inode: + inode_unref(linked_inode); +out: + return ret; } -static br_object_t *__br_pick_object (br_private_t *priv) +static br_object_t * +__br_pick_object(br_private_t *priv) { - br_object_t *object = NULL; + br_object_t *object = NULL; - while (list_empty (&priv->obj_queue->objects)) { - pthread_cond_wait (&priv->object_cond, &priv->lock); - } + while (list_empty(&priv->obj_queue->objects)) { + pthread_cond_wait(&priv->object_cond, &priv->lock); + } - object = list_first_entry - (&priv->obj_queue->objects, br_object_t, list); - list_del_init (&object->list); + object = list_first_entry(&priv->obj_queue->objects, br_object_t, list); + list_del_init(&object->list); - return object; + return object; } /** * This is the place where the signing of the objects is triggered. */ void * -br_process_object (void *arg) +br_process_object(void *arg) { - xlator_t *this = NULL; - br_object_t *object = NULL; - br_private_t *priv = NULL; - int32_t ret = -1; - - this = arg; - priv = this->private; - - THIS = this; - - for (;;) { - pthread_mutex_lock (&priv->lock); - { - object = __br_pick_object (priv); - } - pthread_mutex_unlock (&priv->lock); - - ret = br_sign_object (object); - if (ret && !br_object_sign_softerror (-ret)) - gf_msg (this->name, GF_LOG_ERROR, 0, - BRB_MSG_SIGN_FAILED, "SIGNING FAILURE [%s]", - uuid_utoa (object->gfid)); - GF_FREE (object); + xlator_t *this = NULL; + br_object_t *object = NULL; + br_private_t *priv = NULL; + int32_t ret = -1; + + this = arg; + priv = this->private; + + THIS = this; + + for (;;) { + pthread_mutex_lock(&priv->lock); + { + object = __br_pick_object(priv); } + pthread_mutex_unlock(&priv->lock); - return NULL; + ret = br_sign_object(object); + if (ret && !br_object_sign_softerror(-ret)) + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SET_SIGN_FAILED, + "gfid=%s", uuid_utoa(object->gfid), NULL); + GF_FREE(object); + } + + return NULL; } /** @@ -694,98 +686,97 @@ br_process_object (void *arg) * NOTE: use call_time to instrument signing time in br_sign_object(). */ void -br_add_object_to_queue (struct gf_tw_timer_list *timer, - void *data, unsigned long call_time) +br_add_object_to_queue(struct gf_tw_timer_list *timer, void *data, + unsigned long call_time) { - br_object_t *object = NULL; - xlator_t *this = NULL; - br_private_t *priv = NULL; - - object = data; - this = object->this; - priv = this->private; - - THIS = this; - - pthread_mutex_lock (&priv->lock); - { - list_add_tail (&object->list, &priv->obj_queue->objects); - pthread_cond_broadcast (&priv->object_cond); - } - pthread_mutex_unlock (&priv->lock); - - if (timer) - mem_put (timer); - return; + br_object_t *object = NULL; + xlator_t *this = NULL; + br_private_t *priv = NULL; + + object = data; + this = object->this; + priv = this->private; + + THIS = this; + + pthread_mutex_lock(&priv->lock); + { + list_add_tail(&object->list, &priv->obj_queue->objects); + pthread_cond_broadcast(&priv->object_cond); + } + pthread_mutex_unlock(&priv->lock); + + if (timer) + mem_put(timer); + return; } static br_object_t * -br_initialize_object (xlator_t *this, br_child_t *child, changelog_event_t *ev) +br_initialize_object(xlator_t *this, br_child_t *child, changelog_event_t *ev) { - br_object_t *object = NULL; + br_object_t *object = NULL; - object = GF_CALLOC (1, sizeof (*object), gf_br_mt_br_object_t); - if (!object) - goto out; - INIT_LIST_HEAD (&object->list); + object = GF_CALLOC(1, sizeof(*object), gf_br_mt_br_object_t); + if (!object) + goto out; + INIT_LIST_HEAD(&object->list); - object->this = this; - object->child = child; - gf_uuid_copy (object->gfid, ev->u.releasebr.gfid); + object->this = this; + object->child = child; + gf_uuid_copy(object->gfid, ev->u.releasebr.gfid); - /* NOTE: it's BE, but no worry */ - object->signedversion = ev->u.releasebr.version; - object->sign_info = ev->u.releasebr.sign_info; + /* NOTE: it's BE, but no worry */ + object->signedversion = ev->u.releasebr.version; + object->sign_info = ev->u.releasebr.sign_info; out: - return object; + return object; } static struct gf_tw_timer_list * -br_initialize_timer (xlator_t *this, br_object_t *object, br_child_t *child, - changelog_event_t *ev) +br_initialize_timer(xlator_t *this, br_object_t *object, br_child_t *child, + changelog_event_t *ev) { - br_private_t *priv = NULL; - struct gf_tw_timer_list *timer = NULL; + br_private_t *priv = NULL; + struct gf_tw_timer_list *timer = NULL; - priv = this->private; + priv = this->private; - timer = mem_get0 (child->timer_pool); - if (!timer) - goto out; - INIT_LIST_HEAD (&timer->entry); + timer = mem_get0(child->timer_pool); + if (!timer) + goto out; + INIT_LIST_HEAD(&timer->entry); - timer->expires = priv->expiry_time; - if (!timer->expires) - timer->expires = 1; + timer->expires = priv->expiry_time; + if (!timer->expires) + timer->expires = 1; - timer->data = object; - timer->function = br_add_object_to_queue; - gf_tw_add_timer (priv->timer_wheel, timer); + timer->data = object; + timer->function = br_add_object_to_queue; + gf_tw_add_timer(priv->timer_wheel, timer); out: - return timer; + return timer; } static int32_t -br_schedule_object_reopen (xlator_t *this, br_object_t *object, - br_child_t *child, changelog_event_t *ev) +br_schedule_object_reopen(xlator_t *this, br_object_t *object, + br_child_t *child, changelog_event_t *ev) { - struct gf_tw_timer_list *timer = NULL; - - timer = br_initialize_timer (this, object, child, ev); - if (!timer) - gf_msg (this->name, GF_LOG_ERROR, 0, BRB_MSG_SET_TIMER_FAILED, - "Failed to allocate object expiry timer [GFID: %s]", - uuid_utoa (object->gfid)); - return timer ? 0 : -1; + struct gf_tw_timer_list *timer = NULL; + + timer = br_initialize_timer(this, object, child, ev); + if (!timer) + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SET_TIMER_FAILED, + "gfid=%s", uuid_utoa(object->gfid), NULL); + return timer ? 0 : -1; } static int32_t -br_object_quicksign (xlator_t *this, br_object_t *object) +br_object_quicksign(xlator_t *this, br_object_t *object) { - br_add_object_to_queue (NULL, object, 0ULL); - return 0; + br_add_object_to_queue(NULL, object, 0ULL); + return 0; } /** @@ -798,148 +789,146 @@ br_object_quicksign (xlator_t *this, br_object_t *object) * object as a single alloc and bifurcate their respective pointers. */ void -br_brick_callback (void *xl, char *brick, - void *data, changelog_event_t *ev) +br_brick_callback(void *xl, char *brick, void *data, changelog_event_t *ev) { - int32_t ret = 0; - uuid_t gfid = {0,}; - xlator_t *this = NULL; - br_object_t *object = NULL; - br_child_t *child = NULL; - br_sign_state_t sign_info = BR_SIGN_INVALID; - - this = xl; - - GF_VALIDATE_OR_GOTO (this->name, ev, out); - GF_VALIDATE_OR_GOTO ("bit-rot", this, out); - GF_VALIDATE_OR_GOTO (this->name, this->private, out); - - GF_ASSERT (ev->ev_type == CHANGELOG_OP_TYPE_BR_RELEASE); - GF_ASSERT (!gf_uuid_is_null (ev->u.releasebr.gfid)); - - gf_uuid_copy (gfid, ev->u.releasebr.gfid); - - gf_msg_debug (this->name, 0, "RELEASE EVENT [GFID %s]", - uuid_utoa (gfid)); - - child = br_get_child_from_brick_path (this, brick); - if (!child) { - gf_msg (this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_SUBVOL_FAILED, - "failed to get the subvolume for the brick %s", brick); - goto out; - } - - object = br_initialize_object (this, child, ev); - if (!object) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY, - "failed to allocate object memory [GFID: %s]", - uuid_utoa (gfid)); - goto out; - } - - /* sanity check */ - sign_info = ntohl (object->sign_info); - GF_ASSERT (sign_info != BR_SIGN_NORMAL); - - if (sign_info == BR_SIGN_REOPEN_WAIT) - ret = br_schedule_object_reopen (this, object, child, ev); - else - ret = br_object_quicksign (this, object); - - if (ret) - goto free_object; - - gf_msg_debug (this->name, 0, "->callback: brick [%s], type [%d]\n", - brick, ev->ev_type); - return; - - free_object: - GF_FREE (object); - out: - return; + int32_t ret = 0; + uuid_t gfid = { + 0, + }; + xlator_t *this = NULL; + br_object_t *object = NULL; + br_child_t *child = NULL; + br_sign_state_t sign_info = BR_SIGN_INVALID; + + this = xl; + + GF_VALIDATE_OR_GOTO(this->name, ev, out); + GF_VALIDATE_OR_GOTO("bit-rot", this, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + + GF_ASSERT(ev->ev_type == CHANGELOG_OP_TYPE_BR_RELEASE); + GF_ASSERT(!gf_uuid_is_null(ev->u.releasebr.gfid)); + + gf_uuid_copy(gfid, ev->u.releasebr.gfid); + + gf_msg_debug(this->name, 0, "RELEASE EVENT [GFID %s]", uuid_utoa(gfid)); + + child = br_get_child_from_brick_path(this, brick); + if (!child) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_SUBVOL_FAILED, + "brick=%s", brick, NULL); + goto out; + } + + object = br_initialize_object(this, child, ev); + if (!object) { + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY, + "object-gfid=%s", uuid_utoa(gfid), NULL); + goto out; + } + + /* sanity check */ + sign_info = ntohl(object->sign_info); + GF_ASSERT(sign_info != BR_SIGN_NORMAL); + + if (sign_info == BR_SIGN_REOPEN_WAIT) + ret = br_schedule_object_reopen(this, object, child, ev); + else + ret = br_object_quicksign(this, object); + + if (ret) + goto free_object; + + gf_msg_debug(this->name, 0, "->callback: brick [%s], type [%d]\n", brick, + ev->ev_type); + return; + +free_object: + GF_FREE(object); +out: + return; } void -br_fill_brick_spec (struct gf_brick_spec *brick, char *path) +br_fill_brick_spec(struct gf_brick_spec *brick, char *path) { - brick->brick_path = gf_strdup (path); - brick->filter = CHANGELOG_OP_TYPE_BR_RELEASE; - - brick->init = br_brick_init; - brick->fini = br_brick_fini; - brick->callback = br_brick_callback; - brick->connected = NULL; - brick->disconnected = NULL; + brick->brick_path = gf_strdup(path); + brick->filter = CHANGELOG_OP_TYPE_BR_RELEASE; + + brick->init = br_brick_init; + brick->fini = br_brick_fini; + brick->callback = br_brick_callback; + brick->connected = NULL; + brick->disconnected = NULL; } static gf_boolean_t -br_check_object_need_sign (xlator_t *this, dict_t *xattr, br_child_t *child) +br_check_object_need_sign(xlator_t *this, dict_t *xattr, br_child_t *child) { - int32_t ret = -1; - gf_boolean_t need_sign = _gf_false; - br_isignature_out_t *sign = NULL; - - GF_VALIDATE_OR_GOTO ("bit-rot", this, out); - GF_VALIDATE_OR_GOTO (this->name, xattr, out); - GF_VALIDATE_OR_GOTO (this->name, child, out); - - ret = dict_get_ptr (xattr, GLUSTERFS_GET_OBJECT_SIGNATURE, - (void **)&sign); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_SIGN_FAILED, - "failed to get object signature info"); - goto out; - } + int32_t ret = -1; + gf_boolean_t need_sign = _gf_false; + br_isignature_out_t *sign = NULL; - /* Object has been opened and hence dirty. Do not sign it */ - if (sign->stale) - need_sign = _gf_true; + GF_VALIDATE_OR_GOTO("bit-rot", this, out); + GF_VALIDATE_OR_GOTO(this->name, xattr, out); + GF_VALIDATE_OR_GOTO(this->name, child, out); -out: - return need_sign; -} + ret = dict_get_ptr(xattr, GLUSTERFS_GET_OBJECT_SIGNATURE, (void **)&sign); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_SIGN_FAILED, + "object-info", NULL); + goto out; + } + /* Object has been opened and hence dirty. Do not sign it */ + if (sign->stale) + need_sign = _gf_true; +out: + return need_sign; +} int32_t -br_prepare_loc (xlator_t *this, br_child_t *child, loc_t *parent, - gf_dirent_t *entry, loc_t *loc) +br_prepare_loc(xlator_t *this, br_child_t *child, loc_t *parent, + gf_dirent_t *entry, loc_t *loc) { - int32_t ret = -1; - inode_t *inode = NULL; - - inode = inode_grep (child->table, parent->inode, entry->d_name); - if (!inode) - loc->inode = inode_new (child->table); - else { - loc->inode = inode; - if (loc->inode->ia_type != IA_IFREG) { - gf_msg_debug (this->name, 0, "%s is not a regular " - "file", entry->d_name); - ret = 0; - goto out; - } + int32_t ret = -1; + inode_t *inode = NULL; + + inode = inode_grep(child->table, parent->inode, entry->d_name); + if (!inode) + loc->inode = inode_new(child->table); + else { + loc->inode = inode; + if (loc->inode->ia_type != IA_IFREG) { + gf_msg_debug(this->name, 0, + "%s is not a regular " + "file", + entry->d_name); + ret = 0; + goto out; } + } - loc->parent = inode_ref (parent->inode); - gf_uuid_copy (loc->pargfid, parent->inode->gfid); + loc->parent = inode_ref(parent->inode); + gf_uuid_copy(loc->pargfid, parent->inode->gfid); - ret = inode_path (parent->inode, entry->d_name, (char **)&loc->path); - if (ret < 0 || !loc->path) { - gf_msg (this->name, GF_LOG_ERROR, 0, BRB_MSG_PATH_FAILED, - "inode_path on %s (parent: %s) failed", entry->d_name, - uuid_utoa (parent->inode->gfid)); - goto out; - } + ret = inode_path(parent->inode, entry->d_name, (char **)&loc->path); + if (ret < 0 || !loc->path) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_PATH_FAILED, + "inode_path=%s", entry->d_name, "parent-gfid=%s", + uuid_utoa(parent->inode->gfid), NULL); + goto out; + } - loc->name = strrchr (loc->path, '/'); - if (loc->name) - loc->name++; + loc->name = strrchr(loc->path, '/'); + if (loc->name) + loc->name++; - ret = 1; + ret = 1; out: - return ret; + return ret; } /** @@ -949,164 +938,174 @@ out: * last run for whatever reason (node crashes, reboots, etc..) become * candidates for signing. This allows the signature to "catch up" with * the current state of the object. Triggering signing is easy: perform - * an open() followed by a close() therby resulting in call boomerang. + * an open() followed by a close() thereby resulting in call boomerang. * (though not back to itself :)) */ int -bitd_oneshot_crawl (xlator_t *subvol, - gf_dirent_t *entry, loc_t *parent, void *data) +bitd_oneshot_crawl(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, + void *data) { - int op_errno = 0; - br_child_t *child = NULL; - xlator_t *this = NULL; - loc_t loc = {0, }; - struct iatt iatt = {0, }; - struct iatt parent_buf = {0, }; - dict_t *xattr = NULL; - int32_t ret = -1; - inode_t *linked_inode = NULL; - gf_boolean_t need_signing = _gf_false; - - GF_VALIDATE_OR_GOTO ("bit-rot", subvol, out); - GF_VALIDATE_OR_GOTO ("bit-rot", data, out); - - child = data; - this = child->this; - - ret = br_prepare_loc (this, child, parent, entry, &loc); - if (!ret) - goto out; - - ret = syncop_lookup (child->xl, &loc, &iatt, &parent_buf, NULL, NULL); - if (ret) { - br_log_object_path (this, "lookup", loc.path, -ret); - goto out; - } - - linked_inode = inode_link (loc.inode, parent->inode, loc.name, &iatt); - if (linked_inode) - inode_lookup (linked_inode); - - if (iatt.ia_type != IA_IFREG) { - gf_msg_debug (this->name, 0, "%s is not a regular file, " - "skipping..", entry->d_name); - ret = 0; - goto unref_inode; - } + int op_errno = 0; + br_child_t *child = NULL; + xlator_t *this = NULL; + loc_t loc = { + 0, + }; + struct iatt iatt = { + 0, + }; + struct iatt parent_buf = { + 0, + }; + dict_t *xattr = NULL; + int32_t ret = -1; + inode_t *linked_inode = NULL; + gf_boolean_t need_signing = _gf_false; + gf_boolean_t need_reopen = _gf_true; + + GF_VALIDATE_OR_GOTO("bit-rot", subvol, out); + GF_VALIDATE_OR_GOTO("bit-rot", data, out); + + child = data; + this = child->this; + + ret = br_prepare_loc(this, child, parent, entry, &loc); + if (!ret) + goto out; + + ret = syncop_lookup(child->xl, &loc, &iatt, &parent_buf, NULL, NULL); + if (ret) { + br_log_object_path(this, "lookup", loc.path, -ret); + goto out; + } + + linked_inode = inode_link(loc.inode, parent->inode, loc.name, &iatt); + if (linked_inode) + inode_lookup(linked_inode); + + if (iatt.ia_type != IA_IFREG) { + gf_msg_debug(this->name, 0, + "%s is not a regular file, " + "skipping..", + entry->d_name); + ret = 0; + goto unref_inode; + } + + /** + * As of now, 2 cases are possible and handled. + * 1) GlusterFS is upgraded from a previous version which does not + * have any idea about bit-rot and have data in the filesystem. + * In this case syncop_getxattr fails with ENODATA and the object + * is signed. (In real, when crawler sends lookup, bit-rot-stub + * creates the xattrs before returning lookup reply) + * 2) Bit-rot was not enabled or BitD was does for some reasons, during + * which some files were created, but since BitD was down, were not + * signed. + * If the file was just created and was being written some data when + * the down BitD came up, then bit-rot stub should be intelligent to + * identify this case (by comparing the ongoing version or by checking + * if there are any fds present for that inode) and handle properly. + */ + + if (bitd_is_bad_file(this, child, &loc, NULL)) { + gf_smsg(this->name, GF_LOG_WARNING, 0, BRB_MSG_SKIP_OBJECT, "path=%s", + loc.path, NULL); + goto unref_inode; + } + + ret = syncop_getxattr(child->xl, &loc, &xattr, + GLUSTERFS_GET_OBJECT_SIGNATURE, NULL, NULL); + if (ret < 0) { + op_errno = -ret; + br_log_object(this, "getxattr", linked_inode->gfid, op_errno); /** - * As of now, 2 cases are possible and handled. - * 1) GlusterFS is upgraded from a previous version which does not - * have any idea about bit-rot and have data in the filesystem. - * In this case syncop_getxattr fails with ENODATA and the object - * is signed. (In real, when crawler sends lookup, bit-rot-stub - * creates the xattrs before returning lookup reply) - * 2) Bit-rot was not enabled or BitD was dows for some reasons, during - * which some files were created, but since BitD was down, were not - * signed. - * If the file was just created and was being written some data when - * the down BitD came up, then bit-rot stub should be intelligent to - * identify this case (by comparing the ongoing version or by checking - * if there are any fds present for that inode) and handle properly. + * No need to sign the zero byte objects as the signing + * happens upon first modification of the object. */ + if (op_errno == ENODATA && (iatt.ia_size != 0)) + need_signing = _gf_true; + if (op_errno == EINVAL) + gf_smsg(this->name, GF_LOG_WARNING, 0, + BRB_MSG_PARTIAL_VERSION_PRESENCE, "gfid=%s", + uuid_utoa(linked_inode->gfid), NULL); + } else { + need_signing = br_check_object_need_sign(this, xattr, child); - if (bitd_is_bad_file (this, child, &loc, NULL)) { - gf_msg (this->name, GF_LOG_WARNING, 0, BRB_MSG_SKIP_OBJECT, - "Entry [%s] is marked corrupted.. skipping.", loc.path); - goto unref_inode; - } - - ret = syncop_getxattr (child->xl, &loc, &xattr, - GLUSTERFS_GET_OBJECT_SIGNATURE, NULL, NULL); - if (ret < 0) { - op_errno = -ret; - br_log_object (this, "getxattr", linked_inode->gfid, op_errno); - - /** - * No need to sign the zero byte objects as the signing - * happens upon first modification of the object. - */ - if (op_errno == ENODATA && (iatt.ia_size != 0)) - need_signing = _gf_true; - if (op_errno == EINVAL) - gf_msg (this->name, GF_LOG_WARNING, 0, - BRB_MSG_PARTIAL_VERSION_PRESENCE, "Partial " - "version xattr presence detected, ignoring " - "[GFID: %s]", uuid_utoa (linked_inode->gfid)); - } else { - need_signing = br_check_object_need_sign (this, xattr, child); + /* + * If we are here means, bitrot daemon has started. Is it just + * a simple restart of the daemon or is it started because the + * feature is enabled is something hard to determine. Hence, + * if need_signing is false (because bit-rot version and signature + * are present), then still go ahead and sign it. + */ + if (!need_signing) { + need_signing = _gf_true; + need_reopen = _gf_true; } + } - if (!need_signing) - goto unref_dict; + if (!need_signing) + goto unref_dict; - gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_TRIGGER_SIGN, - "Triggering signing for %s [GFID: %s | Brick: %s]", - loc.path, uuid_utoa (linked_inode->gfid), child->brick_path); - br_trigger_sign (this, child, linked_inode, &loc, _gf_true); + gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_TRIGGER_SIGN, "path=%s", + loc.path, "gfid=%s", uuid_utoa(linked_inode->gfid), "Brick-path=%s", + child->brick_path, NULL); + br_trigger_sign(this, child, linked_inode, &loc, need_reopen); - ret = 0; + ret = 0; - unref_dict: - if (xattr) - dict_unref (xattr); - unref_inode: - inode_unref (linked_inode); - out: - loc_wipe (&loc); +unref_dict: + if (xattr) + dict_unref(xattr); +unref_inode: + inode_unref(linked_inode); +out: + loc_wipe(&loc); - return ret; + return ret; } #define BR_CRAWL_THROTTLE_COUNT 50 -#define BR_CRAWL_THROTTLE_ZZZ 5 +#define BR_CRAWL_THROTTLE_ZZZ 5 void * -br_oneshot_signer (void *arg) +br_oneshot_signer(void *arg) { - loc_t loc = {0,}; - xlator_t *this = NULL; - br_child_t *child = NULL; + loc_t loc = { + 0, + }; + xlator_t *this = NULL; + br_child_t *child = NULL; - child = arg; - this = child->this; + child = arg; + this = child->this; - THIS = this; + THIS = this; - gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_CRAWLING_START, - "Crawling brick [%s], scanning for unsigned objects", - child->brick_path); + gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_CRAWLING_START, "brick-path=%s", + child->brick_path, NULL); - loc.inode = child->table->root; - (void) syncop_ftw_throttle - (child->xl, &loc, - GF_CLIENT_PID_BITD, child, bitd_oneshot_crawl, - BR_CRAWL_THROTTLE_COUNT, BR_CRAWL_THROTTLE_ZZZ); + loc.inode = child->table->root; + (void)syncop_ftw_throttle(child->xl, &loc, GF_CLIENT_PID_BITD, child, + bitd_oneshot_crawl, BR_CRAWL_THROTTLE_COUNT, + BR_CRAWL_THROTTLE_ZZZ); - gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_CRAWLING_FINISH, - "Completed crawling brick [%s]", child->brick_path); + gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_CRAWLING_FINISH, + "brick-path=%s", child->brick_path, NULL); - return NULL; + return NULL; } static void -br_set_child_state (br_child_t *child, br_child_state_t state) +br_set_child_state(br_child_t *child, br_child_state_t state) { - LOCK (&child->lock); - { - _br_set_child_state (child, state); - } - UNLOCK (&child->lock); -} - -static void -br_set_scrub_state (br_child_t *child, br_scrub_state_t state) -{ - LOCK (&child->lock); - { - _br_child_set_scrub_state (child, state); - } - UNLOCK (&child->lock); + pthread_mutex_lock(&child->lock); + { + _br_set_child_state(child, state); + } + pthread_mutex_unlock(&child->lock); } /** @@ -1121,164 +1120,157 @@ br_set_scrub_state (br_child_t *child, br_scrub_state_t state) * notifications. */ static int32_t -br_enact_signer (xlator_t *this, br_child_t *child, br_stub_init_t *stub) +br_enact_signer(xlator_t *this, br_child_t *child, br_stub_init_t *stub) { - int32_t ret = 0; - br_private_t *priv = NULL; - struct gf_brick_spec *brick = NULL; - - priv = this->private; - - brick = GF_CALLOC (1, sizeof (struct gf_brick_spec), - gf_common_mt_gf_brick_spec_t); - if (!brick) - goto error_return; - - br_fill_brick_spec (brick, stub->export); - ret = gf_changelog_register_generic - (brick, 1, 1, this->ctx->cmd_args.log_file, -1, this); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, errno, - BRB_MSG_REGISTER_FAILED, "Register to changelog " - "failed"); - goto dealloc; - } - - child->threadrunning = 0; - ret = gf_thread_create (&child->thread, NULL, br_oneshot_signer, child); - if (ret) - gf_msg (this->name, GF_LOG_WARNING, 0, BRB_MSG_SPAWN_FAILED, - "failed to spawn FS crawler thread"); - else - child->threadrunning = 1; - - /* it's OK to continue, "old" objects would be signed when modified */ - list_add_tail (&child->list, &priv->signing); - return 0; - - dealloc: - GF_FREE (brick); - error_return: - return -1; + int32_t ret = 0; + br_private_t *priv = NULL; + struct gf_brick_spec *brick = NULL; + + priv = this->private; + + brick = GF_CALLOC(1, sizeof(struct gf_brick_spec), + gf_common_mt_gf_brick_spec_t); + if (!brick) + goto error_return; + + br_fill_brick_spec(brick, stub->export); + ret = gf_changelog_register_generic(brick, 1, 1, + this->ctx->cmd_args.log_file, -1, this); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, BRB_MSG_REGISTER_FAILED, NULL); + goto dealloc; + } + + child->threadrunning = 0; + ret = gf_thread_create(&child->thread, NULL, br_oneshot_signer, child, + "brosign"); + if (ret) + gf_smsg(this->name, GF_LOG_WARNING, 0, BRB_MSG_SPAWN_FAILED, + "FS-crawler-thread", NULL); + else + child->threadrunning = 1; + + /* it's OK to continue, "old" objects would be signed when modified */ + list_add_tail(&child->list, &priv->signing); + return 0; + +dealloc: + GF_FREE(brick); +error_return: + return -1; } static int32_t -br_launch_scrubber (xlator_t *this, br_child_t *child, - struct br_scanfs *fsscan, struct br_scrubber *fsscrub) +br_launch_scrubber(xlator_t *this, br_child_t *child, struct br_scanfs *fsscan, + struct br_scrubber *fsscrub) { - int32_t ret = -1; - br_private_t *priv = NULL; + int32_t ret = -1; + br_private_t *priv = NULL; + struct br_monitor *scrub_monitor = NULL; + + priv = this->private; + + scrub_monitor = &priv->scrub_monitor; + ret = gf_thread_create(&child->thread, NULL, br_fsscanner, child, + "brfsscan"); + if (ret != 0) { + gf_smsg(this->name, GF_LOG_ALERT, 0, BRB_MSG_SPAWN_FAILED, + "bitrot-scrubber-daemon Brick-path=%s", child->brick_path, + NULL); + goto error_return; + } + + /* Signal monitor to kick off state machine*/ + pthread_mutex_lock(&scrub_monitor->mutex); + { + if (!scrub_monitor->inited) + pthread_cond_signal(&scrub_monitor->cond); + scrub_monitor->inited = _gf_true; + } + pthread_mutex_unlock(&scrub_monitor->mutex); + + /** + * Everything has been setup.. add this subvolume to scrubbers + * list. + */ + pthread_mutex_lock(&fsscrub->mutex); + { + list_add_tail(&child->list, &fsscrub->scrublist); + pthread_cond_broadcast(&fsscrub->cond); + } + pthread_mutex_unlock(&fsscrub->mutex); + + return 0; - priv = this->private; - - fsscan->kick = _gf_false; - fsscan->over = _gf_false; - ret = gf_thread_create (&child->thread, NULL, br_fsscanner, child); - if (ret != 0) { - gf_msg (this->name, GF_LOG_ALERT, 0, BRB_MSG_SPAWN_FAILED, - "failed to spawn bitrot scrubber daemon [Brick: %s]", - child->brick_path); - goto error_return; - } - - /* this needs to be serialized with reconfigure() */ - pthread_mutex_lock (&priv->lock); - { - ret = br_scrub_state_machine (this, child); - } - pthread_mutex_unlock (&priv->lock); - if (ret) - goto cleanup_thread; - - /** - * Everything has been setup.. add this subvolume to scrubbers - * list. - */ - pthread_mutex_lock (&fsscrub->mutex); - { - list_add_tail (&child->list, &fsscrub->scrublist); - pthread_cond_broadcast (&fsscrub->cond); - } - pthread_mutex_unlock (&fsscrub->mutex); - - return 0; - - cleanup_thread: - (void) gf_thread_cleanup_xint (child->thread); - error_return: - return -1; +error_return: + return -1; } static int32_t -br_enact_scrubber (xlator_t *this, br_child_t *child) +br_enact_scrubber(xlator_t *this, br_child_t *child) { - int32_t ret = 0; - br_private_t *priv = NULL; - struct br_scanfs *fsscan = NULL; - struct br_scrubber *fsscrub = NULL; + int32_t ret = 0; + br_private_t *priv = NULL; + struct br_scanfs *fsscan = NULL; + struct br_scrubber *fsscrub = NULL; - priv = this->private; + priv = this->private; - fsscan = &child->fsscan; - fsscrub = &priv->fsscrub; + fsscan = &child->fsscan; + fsscrub = &priv->fsscrub; - /** - * if this child already witnesses a successfull connection earlier - * there's no need to initialize mutexes, condvars, etc.. - */ - if (_br_child_witnessed_connection (child)) - return br_launch_scrubber (this, child, fsscan, fsscrub); + /** + * if this child already witnesses a successful connection earlier + * there's no need to initialize mutexes, condvars, etc.. + */ + if (_br_child_witnessed_connection(child)) + return br_launch_scrubber(this, child, fsscan, fsscrub); - LOCK_INIT (&fsscan->entrylock); - pthread_mutex_init (&fsscan->waitlock, NULL); - pthread_cond_init (&fsscan->waitcond, NULL); + LOCK_INIT(&fsscan->entrylock); + pthread_mutex_init(&fsscan->waitlock, NULL); + pthread_cond_init(&fsscan->waitcond, NULL); - fsscan->entries = 0; - INIT_LIST_HEAD (&fsscan->queued); - INIT_LIST_HEAD (&fsscan->ready); + fsscan->entries = 0; + INIT_LIST_HEAD(&fsscan->queued); + INIT_LIST_HEAD(&fsscan->ready); - /* init scheduler related variables */ - pthread_mutex_init (&fsscan->wakelock, NULL); - pthread_cond_init (&fsscan->wakecond, NULL); + ret = br_launch_scrubber(this, child, fsscan, fsscrub); + if (ret) + goto error_return; - ret = br_launch_scrubber (this, child, fsscan, fsscrub); - if (ret) - goto error_return; + return 0; - return 0; - - error_return: - LOCK_DESTROY (&fsscan->entrylock); - pthread_mutex_destroy (&fsscan->waitlock); - pthread_cond_destroy (&fsscan->waitcond); +error_return: + LOCK_DESTROY(&fsscan->entrylock); + pthread_mutex_destroy(&fsscan->waitlock); + pthread_cond_destroy(&fsscan->waitcond); - return -1; + return -1; } static int32_t -br_child_enaction (xlator_t *this, br_child_t *child, br_stub_init_t *stub) +br_child_enaction(xlator_t *this, br_child_t *child, br_stub_init_t *stub) { - int32_t ret = -1; - br_private_t *priv = this->private; + int32_t ret = -1; + br_private_t *priv = this->private; - LOCK (&child->lock); - { - if (priv->iamscrubber) - ret = br_enact_scrubber (this, child); - else - ret = br_enact_signer (this, child, stub); - - if (!ret) { - child->witnessed = 1; - _br_set_child_state (child, BR_CHILD_STATE_CONNECTED); - gf_msg (this->name, GF_LOG_INFO, - 0, BRB_MSG_CONNECTED_TO_BRICK, - "Connected to brick %s..", child->brick_path); - } + pthread_mutex_lock(&child->lock); + { + if (priv->iamscrubber) + ret = br_enact_scrubber(this, child); + else + ret = br_enact_signer(this, child, stub); + + if (!ret) { + child->witnessed = 1; + _br_set_child_state(child, BR_CHILD_STATE_CONNECTED); + gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_CONNECTED_TO_BRICK, + "brick-path=%s", child->brick_path, NULL); } - UNLOCK (&child->lock); + } + pthread_mutex_unlock(&child->lock); - return ret; + return ret; } /** @@ -1289,138 +1281,130 @@ br_child_enaction (xlator_t *this, br_child_t *child, br_stub_init_t *stub) * process either acts as a signer or a scrubber. */ int32_t -br_brick_connect (xlator_t *this, br_child_t *child) +br_brick_connect(xlator_t *this, br_child_t *child) { - int32_t ret = -1; - loc_t loc = {0, }; - struct iatt buf = {0, }; - struct iatt parent = {0, }; - br_stub_init_t *stub = NULL; - dict_t *xattr = NULL; - int op_errno = 0; - - GF_VALIDATE_OR_GOTO ("bit-rot", this, out); - GF_VALIDATE_OR_GOTO (this->name, child, out); - GF_VALIDATE_OR_GOTO (this->name, this->private, out); - - br_set_child_state (child, BR_CHILD_STATE_INITIALIZING); - - loc.inode = inode_ref (child->table->root); - gf_uuid_copy (loc.gfid, loc.inode->gfid); - loc.path = gf_strdup ("/"); - - ret = syncop_lookup (child->xl, &loc, &buf, &parent, NULL, NULL); - if (ret) { - op_errno = -ret; - ret = -1; - gf_msg (this->name, GF_LOG_ERROR, op_errno, - BRB_MSG_LOOKUP_FAILED, "lookup on root failed"); - goto wipeloc; - } - - ret = syncop_getxattr (child->xl, &loc, &xattr, - GLUSTERFS_GET_BR_STUB_INIT_TIME, NULL, NULL); - if (ret) { - op_errno = -ret; - ret = -1; - gf_msg (this->name, GF_LOG_ERROR, op_errno, - BRB_MSG_GET_INFO_FAILED, "failed to get stub info"); - goto wipeloc; - } - - ret = dict_get_ptr (xattr, GLUSTERFS_GET_BR_STUB_INIT_TIME, - (void **)&stub); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_INFO_FAILED, - "failed to extract stub information"); - goto free_dict; - } - - memcpy (child->brick_path, stub->export, strlen (stub->export) + 1); - child->tv.tv_sec = ntohl (stub->timebuf[0]); - child->tv.tv_usec = ntohl (stub->timebuf[1]); - - ret = br_child_enaction (this, child, stub); - - free_dict: - dict_unref (xattr); - wipeloc: - loc_wipe (&loc); - out: - if (ret) - br_set_child_state (child, BR_CHILD_STATE_CONNFAILED); - return ret; + int32_t ret = -1; + loc_t loc = { + 0, + }; + struct iatt buf = { + 0, + }; + struct iatt parent = { + 0, + }; + br_stub_init_t *stub = NULL; + dict_t *xattr = NULL; + int op_errno = 0; + + GF_VALIDATE_OR_GOTO("bit-rot", this, out); + GF_VALIDATE_OR_GOTO(this->name, child, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + + br_child_set_scrub_state(child, _gf_false); + br_set_child_state(child, BR_CHILD_STATE_INITIALIZING); + + loc.inode = inode_ref(child->table->root); + gf_uuid_copy(loc.gfid, loc.inode->gfid); + loc.path = gf_strdup("/"); + + ret = syncop_lookup(child->xl, &loc, &buf, &parent, NULL, NULL); + if (ret) { + op_errno = -ret; + ret = -1; + gf_smsg(this->name, GF_LOG_ERROR, op_errno, BRB_MSG_LOOKUP_FAILED, + NULL); + goto wipeloc; + } + + ret = syncop_getxattr(child->xl, &loc, &xattr, + GLUSTERFS_GET_BR_STUB_INIT_TIME, NULL, NULL); + if (ret) { + op_errno = -ret; + ret = -1; + gf_smsg(this->name, GF_LOG_ERROR, op_errno, BRB_MSG_GET_INFO_FAILED, + NULL); + goto wipeloc; + } + + ret = dict_get_ptr(xattr, GLUSTERFS_GET_BR_STUB_INIT_TIME, (void **)&stub); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_INFO_FAILED, NULL); + goto free_dict; + } + + memcpy(child->brick_path, stub->export, strlen(stub->export) + 1); + child->tv.tv_sec = ntohl(stub->timebuf[0]); + child->tv.tv_usec = ntohl(stub->timebuf[1]); + + ret = br_child_enaction(this, child, stub); + +free_dict: + dict_unref(xattr); +wipeloc: + loc_wipe(&loc); +out: + if (ret) + br_set_child_state(child, BR_CHILD_STATE_CONNFAILED); + return ret; } /* TODO: cleanup signer */ static int32_t -br_cleanup_signer (xlator_t *this, br_child_t *child) +br_cleanup_signer(xlator_t *this, br_child_t *child) { - return 0; + return 0; } static int32_t -br_cleanup_scrubber (xlator_t *this, br_child_t *child) +br_cleanup_scrubber(xlator_t *this, br_child_t *child) { - int32_t ret = 0; - br_private_t *priv = NULL; - struct br_scanfs *fsscan = NULL; - struct br_scrubber *fsscrub = NULL; - - priv = this->private; - fsscan = &child->fsscan; - fsscrub = &priv->fsscrub; - - /** - * 0x0: child (brick) goes out of rotation - * - * This is fully safe w.r.t. entries for this child being actively - * scrubbed. Each of the scrubber thread(s) would finish scrubbing - * the entry (probably failing due to disconnection) and either - * putting the entry back into the queue or continuing further. - * Either way, pending entries for this child's queue need not be - * drained; entries just sit there in the queued/ready list to be - * consumed later upon re-connection. - */ - pthread_mutex_lock (&fsscrub->mutex); - { - list_del_init (&child->list); - } - pthread_mutex_unlock (&fsscrub->mutex); - - /** - * 0x1: cleanup scanner thread - * - * The pending timer needs to be removed _after_ cleaning up the - * filesystem scanner (scheduling the next scrub time is not a - * cancellation point). - */ - ret = gf_thread_cleanup_xint (child->thread); - if (ret) - gf_msg (this->name, GF_LOG_INFO, - 0, BRB_MSG_SCRUB_THREAD_CLEANUP, - "Error cleaning up scanner thread"); - - /** - * 0x2: free()up resources - */ - if (fsscan->timer) { - (void) gf_tw_del_timer (priv->timer_wheel, fsscan->timer); - - GF_FREE (fsscan->timer); - fsscan->timer = NULL; - } - - /** - * 0x3: reset scrubber state - */ - _br_child_set_scrub_state (child, BR_SCRUB_STATE_INACTIVE); - - gf_msg (this->name, GF_LOG_INFO, - 0, BRB_MSG_SCRUBBER_CLEANED, - "Cleaned up scrubber for brick [%s]", child->brick_path); - - return 0; + int32_t ret = 0; + br_private_t *priv = NULL; + struct br_scrubber *fsscrub = NULL; + struct br_monitor *scrub_monitor = NULL; + + priv = this->private; + fsscrub = &priv->fsscrub; + scrub_monitor = &priv->scrub_monitor; + + if (_br_is_child_scrub_active(child)) { + scrub_monitor->active_child_count--; + br_child_set_scrub_state(child, _gf_false); + } + + /** + * 0x0: child (brick) goes out of rotation + * + * This is fully safe w.r.t. entries for this child being actively + * scrubbed. Each of the scrubber thread(s) would finish scrubbing + * the entry (probably failing due to disconnection) and either + * putting the entry back into the queue or continuing further. + * Either way, pending entries for this child's queue need not be + * drained; entries just sit there in the queued/ready list to be + * consumed later upon re-connection. + */ + pthread_mutex_lock(&fsscrub->mutex); + { + list_del_init(&child->list); + } + pthread_mutex_unlock(&fsscrub->mutex); + + /** + * 0x1: cleanup scanner thread + * + * The pending timer needs to be removed _after_ cleaning up the + * filesystem scanner (scheduling the next scrub time is not a + * cancellation point). + */ + ret = gf_thread_cleanup_xint(child->thread); + if (ret) + gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_THREAD_CLEANUP, NULL); + + gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUBBER_CLEANED, + "brick-path=%s", child->brick_path, NULL); + + return 0; } /** @@ -1429,28 +1413,38 @@ br_cleanup_scrubber (xlator_t *this, br_child_t *child) * the inode table, it's just reused taking care of stale inodes) */ int32_t -br_brick_disconnect (xlator_t *this, br_child_t *child) +br_brick_disconnect(xlator_t *this, br_child_t *child) { - int32_t ret = 0; - br_private_t *priv = this->private; - - LOCK (&child->lock); + int32_t ret = 0; + struct br_monitor *scrub_monitor = NULL; + br_private_t *priv = this->private; + + scrub_monitor = &priv->scrub_monitor; + + /* Lock order should be wakelock and then child lock to + * dead locks. + */ + pthread_mutex_lock(&scrub_monitor->wakelock); + { + pthread_mutex_lock(&child->lock); { - if (!_br_is_child_connected (child)) - goto unblock; + if (!_br_is_child_connected(child)) + goto unblock; - /* child is on death row.. */ - _br_set_child_state (child, BR_CHILD_STATE_DISCONNECTED); + /* child is on death row.. */ + _br_set_child_state(child, BR_CHILD_STATE_DISCONNECTED); - if (priv->iamscrubber) - ret = br_cleanup_scrubber (this, child); - else - ret = br_cleanup_signer (this, child); + if (priv->iamscrubber) + ret = br_cleanup_scrubber(this, child); + else + ret = br_cleanup_signer(this, child); } - unblock: - UNLOCK (&child->lock); + unblock: + pthread_mutex_unlock(&child->lock); + } + pthread_mutex_unlock(&scrub_monitor->wakelock); - return ret; + return ret; } /** @@ -1459,235 +1453,341 @@ br_brick_disconnect (xlator_t *this, br_child_t *child) * information regarding that brick (such as brick path). */ void * -br_handle_events (void *arg) +br_handle_events(void *arg) { - int32_t ret = 0; - xlator_t *this = NULL; - br_private_t *priv = NULL; - br_child_t *child = NULL; - struct br_child_event *childev = NULL; - - this = arg; - priv = this->private; + int32_t ret = 0; + xlator_t *this = NULL; + br_private_t *priv = NULL; + br_child_t *child = NULL; + struct br_child_event *childev = NULL; + + this = arg; + priv = this->private; + + /* + * Since, this is the topmost xlator, THIS has to be set by bit-rot + * xlator itself (STACK_WIND won't help in this case). Also it has + * to be done for each thread that gets spawned. Otherwise, a new + * thread will get global_xlator's pointer when it does "THIS". + */ + THIS = this; + + while (1) { + pthread_mutex_lock(&priv->lock); + { + while (list_empty(&priv->bricks)) + pthread_cond_wait(&priv->cond, &priv->lock); - /* - * Since, this is the topmost xlator, THIS has to be set by bit-rot - * xlator itself (STACK_WIND wont help in this case). Also it has - * to be done for each thread that gets spawned. Otherwise, a new - * thread will get global_xlator's pointer when it does "THIS". - */ - THIS = this; - - while (1) { - pthread_mutex_lock (&priv->lock); - { - while (list_empty (&priv->bricks)) - pthread_cond_wait (&priv->cond, &priv->lock); - - childev = list_first_entry - (&priv->bricks, struct br_child_event, list); - list_del_init (&childev->list); - } - pthread_mutex_unlock (&priv->lock); - - child = childev->child; - ret = childev->call (this, child); - if (ret) - gf_msg (this->name, GF_LOG_ERROR, 0, - BRB_MSG_SUBVOL_CONNECT_FAILED, - "callback handler for subvolume [%s] failed", - child->xl->name); - GF_FREE (childev); + childev = list_first_entry(&priv->bricks, struct br_child_event, + list); + list_del_init(&childev->list); } + pthread_mutex_unlock(&priv->lock); - return NULL; + child = childev->child; + ret = childev->call(this, child); + if (ret) + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SUBVOL_CONNECT_FAILED, + "name=%s", child->xl->name, NULL); + GF_FREE(childev); + } + + return NULL; } int32_t -mem_acct_init (xlator_t *this) +mem_acct_init(xlator_t *this) { - int32_t ret = -1; - - if (!this) - return ret; + int32_t ret = -1; - ret = xlator_mem_acct_init (this, gf_br_stub_mt_end + 1); + if (!this) + return ret; - if (ret != 0) { - gf_msg (this->name, GF_LOG_WARNING, 0, BRB_MSG_MEM_ACNT_FAILED, - "Memory accounting init failed"); - return ret; - } + ret = xlator_mem_acct_init(this, gf_br_stub_mt_end + 1); + if (ret != 0) { + gf_smsg(this->name, GF_LOG_WARNING, 0, BRB_MSG_MEM_ACNT_FAILED, NULL); return ret; + } + + return ret; } static void -_br_qchild_event (xlator_t *this, br_child_t *child, br_child_handler *call) +_br_qchild_event(xlator_t *this, br_child_t *child, br_child_handler *call) { - br_private_t *priv = NULL; - struct br_child_event *childev = NULL; + br_private_t *priv = NULL; + struct br_child_event *childev = NULL; - priv = this->private; + priv = this->private; - childev = GF_CALLOC (1, sizeof (*childev), gf_br_mt_br_child_event_t); - if (!childev) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY, - "Event unhandled for child.. [Brick: %s]", - child->xl->name); - return; - } + childev = GF_CALLOC(1, sizeof(*childev), gf_br_mt_br_child_event_t); + if (!childev) { + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_EVENT_UNHANDLED, + "Brick-name=%s", child->xl->name, NULL); + return; + } - INIT_LIST_HEAD (&childev->list); - childev->this = this; - childev->child = child; - childev->call = call; + INIT_LIST_HEAD(&childev->list); + childev->this = this; + childev->child = child; + childev->call = call; - list_add_tail (&childev->list, &priv->bricks); + list_add_tail(&childev->list, &priv->bricks); } int -notify (xlator_t *this, int32_t event, void *data, ...) +br_scrubber_status_get(xlator_t *this, dict_t **dict) { - int idx = -1; - xlator_t *subvol = NULL; - br_child_t *child = NULL; - br_private_t *priv = NULL; + int ret = -1; + br_private_t *priv = NULL; + struct br_scrub_stats *scrub_stats = NULL; + + priv = this->private; + + GF_VALIDATE_OR_GOTO("bit-rot", priv, out); + + scrub_stats = &priv->scrub_stat; + + ret = br_get_bad_objects_list(this, dict); + if (ret) { + gf_msg_debug(this->name, 0, + "Failed to collect corrupt " + "files"); + } + + ret = dict_set_int8(*dict, "scrub-running", scrub_stats->scrub_running); + if (ret) { + gf_msg_debug(this->name, 0, + "Failed setting scrub_running " + "entry to the dictionary"); + } + + ret = dict_set_uint64(*dict, "scrubbed-files", scrub_stats->scrubbed_files); + if (ret) { + gf_msg_debug(this->name, 0, + "Failed to setting scrubbed file " + "entry to the dictionary"); + } + + ret = dict_set_uint64(*dict, "unsigned-files", scrub_stats->unsigned_files); + if (ret) { + gf_msg_debug(this->name, 0, + "Failed to set unsigned file count" + " entry to the dictionary"); + } + + ret = dict_set_uint64(*dict, "scrub-duration", scrub_stats->scrub_duration); + if (ret) { + gf_msg_debug(this->name, 0, + "Failed to set scrub duration" + " entry to the dictionary"); + } + + ret = dict_set_dynstr_with_alloc(*dict, "last-scrub-time", + scrub_stats->last_scrub_time); + if (ret) { + gf_msg_debug(this->name, 0, + "Failed to set " + "last scrub time value"); + } - subvol = (xlator_t *)data; - priv = this->private; +out: + return ret; +} - gf_msg_trace (this->name, 0, "Notification received: %d", event); +int +notify(xlator_t *this, int32_t event, void *data, ...) +{ + int idx = -1; + int ret = -1; + xlator_t *subvol = NULL; + br_child_t *child = NULL; + br_private_t *priv = NULL; + dict_t *output = NULL; + va_list ap; + struct br_monitor *scrub_monitor = NULL; + + subvol = (xlator_t *)data; + priv = this->private; + scrub_monitor = &priv->scrub_monitor; + + gf_msg_trace(this->name, 0, "Notification received: %d", event); - idx = br_find_child_index (this, subvol); + idx = br_find_child_index(this, subvol); - switch (event) { + switch (event) { case GF_EVENT_CHILD_UP: - if (idx < 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - BRB_MSG_INVALID_SUBVOL, "Got event %d from " - "invalid subvolume", event); - goto out; - } - - pthread_mutex_lock (&priv->lock); - { - child = &priv->children[idx]; - if (child->child_up == 1) - goto unblock_0; - priv->up_children++; - - child->child_up = 1; - child->xl = subvol; - if (!child->table) - child->table = inode_table_new (4096, subvol); - - _br_qchild_event (this, child, br_brick_connect); - pthread_cond_signal (&priv->cond); - } + if (idx < 0) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_INVALID_SUBVOL, + "event=%d", event, NULL); + goto out; + } + + pthread_mutex_lock(&priv->lock); + { + child = &priv->children[idx]; + if (child->child_up == 1) + goto unblock_0; + priv->up_children++; + + child->child_up = 1; + child->xl = subvol; + if (!child->table) + child->table = inode_table_new(4096, subvol); + + _br_qchild_event(this, child, br_brick_connect); + pthread_cond_signal(&priv->cond); + } unblock_0: - pthread_mutex_unlock (&priv->lock); + pthread_mutex_unlock(&priv->lock); - if (priv->up_children == priv->child_count) - default_notify (this, event, data); - break; + if (priv->up_children == priv->child_count) + default_notify(this, event, data); + break; case GF_EVENT_CHILD_DOWN: - if (idx < 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - BRB_MSG_INVALID_SUBVOL_CHILD, - "Got event %d from invalid subvolume", event); - goto out; - } - - pthread_mutex_lock (&priv->lock); - { - child = &priv->children[idx]; - if (child->child_up == 0) - goto unblock_1; - - child->child_up = 0; - priv->up_children--; - - _br_qchild_event (this, child, br_brick_disconnect); - pthread_cond_signal (&priv->cond); - } - unblock_1: - pthread_mutex_unlock (&priv->lock); + if (idx < 0) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_INVALID_SUBVOL, + "event=%d", event, NULL); + goto out; + } - if (priv->up_children == 0) - default_notify (this, event, data); - break; + pthread_mutex_lock(&priv->lock); + { + child = &priv->children[idx]; + if (child->child_up == 0) + goto unblock_1; + + child->child_up = 0; + priv->up_children--; + _br_qchild_event(this, child, br_brick_disconnect); + pthread_cond_signal(&priv->cond); + } + unblock_1: + pthread_mutex_unlock(&priv->lock); + + if (priv->up_children == 0) + default_notify(this, event, data); + break; + + case GF_EVENT_SCRUB_STATUS: + gf_msg_debug(this->name, GF_LOG_INFO, + "BitRot scrub status " + "called"); + va_start(ap, data); + output = va_arg(ap, dict_t *); + va_end(ap); + + ret = br_scrubber_status_get(this, &output); + gf_msg_debug(this->name, 0, "returning %d", ret); + break; + + case GF_EVENT_SCRUB_ONDEMAND: + gf_log(this->name, GF_LOG_INFO, + "BitRot scrub ondemand " + "called"); + + if (scrub_monitor->state != BR_SCRUB_STATE_PENDING) { + gf_smsg(this->name, GF_LOG_ERROR, 0, + BRB_MSG_RESCHEDULE_SCRUBBER_FAILED, "Current-state=%d", + scrub_monitor->state, NULL); + return -2; + } + + /* Needs synchronization with reconfigure thread */ + pthread_mutex_lock(&priv->lock); + { + ret = br_scrub_state_machine(this, _gf_true); + } + pthread_mutex_unlock(&priv->lock); + + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, + BRB_MSG_COULD_NOT_SCHEDULE_SCRUB, NULL); + } + gf_msg_debug(this->name, 0, "returning %d", ret); + break; default: - default_notify (this, event, data); - } + default_notify(this, event, data); + } - out: - return 0; +out: + return 0; } -/** - * Initialize signer specific structures, spawn worker threads. - */ - static void -br_fini_signer (xlator_t *this, br_private_t *priv) +br_fini_signer(xlator_t *this, br_private_t *priv) { - int i = 0; + int i = 0; - for (; i < BR_WORKERS; i++) { - (void) gf_thread_cleanup_xint (priv->obj_queue->workers[i]); - } + if (priv == NULL) + return; + + for (; i < priv->signer_th_count; i++) { + (void)gf_thread_cleanup_xint(priv->obj_queue->workers[i]); + } + GF_FREE(priv->obj_queue->workers); - pthread_cond_destroy (&priv->object_cond); + pthread_cond_destroy(&priv->object_cond); } +/** + * Initialize signer specific structures, spawn worker threads. + */ + static int32_t -br_init_signer (xlator_t *this, br_private_t *priv) +br_init_signer(xlator_t *this, br_private_t *priv) { - int i = 0; - int32_t ret = -1; - - /* initialize gfchangelog xlator context */ - ret = gf_changelog_init (this); - if (ret) - goto out; - - pthread_cond_init (&priv->object_cond, NULL); - - priv->obj_queue = GF_CALLOC (1, sizeof (*priv->obj_queue), - gf_br_mt_br_ob_n_wk_t); - if (!priv->obj_queue) - goto cleanup_cond; - INIT_LIST_HEAD (&priv->obj_queue->objects); - - for (i = 0; i < BR_WORKERS; i++) { - ret = gf_thread_create (&priv->obj_queue->workers[i], NULL, - br_process_object, this); - if (ret != 0) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - BRB_MSG_SPAWN_FAILED, "thread creation" - " failed"); - ret = -1; - goto cleanup_threads; - } + int i = 0; + int32_t ret = -1; + + /* initialize gfchangelog xlator context */ + ret = gf_changelog_init(this); + if (ret) + goto out; + + pthread_cond_init(&priv->object_cond, NULL); + + priv->obj_queue = GF_CALLOC(1, sizeof(*priv->obj_queue), + gf_br_mt_br_ob_n_wk_t); + if (!priv->obj_queue) + goto cleanup_cond; + INIT_LIST_HEAD(&priv->obj_queue->objects); + + priv->obj_queue->workers = GF_CALLOC( + priv->signer_th_count, sizeof(pthread_t), gf_br_mt_br_worker_t); + if (!priv->obj_queue->workers) + goto cleanup_obj_queue; + + for (i = 0; i < priv->signer_th_count; i++) { + ret = gf_thread_create(&priv->obj_queue->workers[i], NULL, + br_process_object, this, "brpobj"); + if (ret != 0) { + gf_smsg(this->name, GF_LOG_ERROR, -ret, + BRB_MSG_THREAD_CREATION_FAILED, NULL); + ret = -1; + goto cleanup_threads; } + } - return 0; + return 0; - cleanup_threads: - for (i--; i >= 0; i--) { - (void) gf_thread_cleanup_xint (priv->obj_queue->workers[i]); - } +cleanup_threads: + for (i--; i >= 0; i--) { + (void)gf_thread_cleanup_xint(priv->obj_queue->workers[i]); + } + GF_FREE(priv->obj_queue->workers); - GF_FREE (priv->obj_queue); +cleanup_obj_queue: + GF_FREE(priv->obj_queue); - cleanup_cond: - /* that's explicit */ - pthread_cond_destroy (&priv->object_cond); - out: - return -1; +cleanup_cond: + /* that's explicit */ + pthread_cond_destroy(&priv->object_cond); +out: + return -1; } /** @@ -1696,334 +1796,358 @@ br_init_signer (xlator_t *this, br_private_t *priv) * throttle. */ static int32_t -br_rate_limit_signer (xlator_t *this, int child_count, int numbricks) +br_rate_limit_signer(xlator_t *this, int child_count, int numbricks) { - br_private_t *priv = NULL; - br_tbf_opspec_t spec = {0,}; - - priv = this->private; - - spec.op = BR_TBF_OP_HASH; - spec.rate = 0; - spec.maxlimit = 0; + br_private_t *priv = NULL; + tbf_opspec_t spec = { + 0, + }; + + priv = this->private; + + spec.op = TBF_OP_HASH; + spec.rate = 0; + spec.maxlimit = 0; + + /** + * OK. Most implementations of TBF I've come across generate tokens + * every second (UML, etc..) and some chose sub-second granularity + * (blk-iothrottle cgroups). TBF algorithm itself does not enforce + * any logic for choosing generation interval and it seems pretty + * logical as one could jack up token count per interval w.r.t. + * generation rate. + * + * Value used here is chosen based on a series of test(s) performed + * to balance object signing time and not maxing out on all available + * CPU cores. It's obvious to have seconds granularity and jack up + * token count per interval, thereby achieving close to similar + * results. Let's stick to this as it seems to be working fine for + * the set of ops that are throttled. + **/ + spec.token_gen_interval = 600000; /* In usec */ #ifdef BR_RATE_LIMIT_SIGNER - double contribution = 0; - contribution = ((double)1 - ((double)child_count / (double)numbricks)); - if (contribution == 0) - contribution = 1; - spec.rate = BR_HASH_CALC_READ_SIZE * contribution; - spec.maxlimit = BR_WORKERS * BR_HASH_CALC_READ_SIZE; + double contribution = 0; + contribution = ((double)1 - ((double)child_count / (double)numbricks)); + if (contribution == 0) + contribution = 1; + spec.rate = BR_HASH_CALC_READ_SIZE * contribution; + spec.maxlimit = priv->signer_th_count * BR_HASH_CALC_READ_SIZE; #endif - if (!spec.rate) - gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_RATE_LIMIT_INFO, - "[Rate Limit Info] \"FULL THROTTLE\""); - else - gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_RATE_LIMIT_INFO, - "[Rate Limit Info] \"tokens/sec (rate): %lu, " - "maxlimit: %lu\"", spec.rate, spec.maxlimit); + if (!spec.rate) + gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_RATE_LIMIT_INFO, + "FULL THROTTLE", NULL); + else + gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_RATE_LIMIT_INFO, + "tokens/sec-rate=%lu", spec.rate, "maxlimit=%lu", spec.maxlimit, + NULL); - priv->tbf = br_tbf_init (&spec, 1); - return priv->tbf ? 0 : -1; + priv->tbf = tbf_init(&spec, 1); + return priv->tbf ? 0 : -1; } static int32_t -br_signer_handle_options (xlator_t *this, br_private_t *priv, dict_t *options) +br_signer_handle_options(xlator_t *this, br_private_t *priv, dict_t *options) { - if (options) - GF_OPTION_RECONF ("expiry-time", priv->expiry_time, - options, uint32, error_return); - else - GF_OPTION_INIT ("expiry-time", priv->expiry_time, - uint32, error_return); - - return 0; + if (options) { + GF_OPTION_RECONF("expiry-time", priv->expiry_time, options, uint32, + error_return); + GF_OPTION_RECONF("signer-threads", priv->signer_th_count, options, + uint32, error_return); + } else { + GF_OPTION_INIT("expiry-time", priv->expiry_time, uint32, error_return); + GF_OPTION_INIT("signer-threads", priv->signer_th_count, uint32, + error_return); + } + + return 0; error_return: - return -1; + return -1; } static int32_t -br_signer_init (xlator_t *this, br_private_t *priv) +br_signer_init(xlator_t *this, br_private_t *priv) { - int32_t ret = 0; - int numbricks = 0; + int32_t ret = 0; + int numbricks = 0; - GF_OPTION_INIT ("expiry-time", priv->expiry_time, uint32, error_return); - GF_OPTION_INIT ("brick-count", numbricks, int32, error_return); + GF_OPTION_INIT("expiry-time", priv->expiry_time, uint32, error_return); + GF_OPTION_INIT("brick-count", numbricks, int32, error_return); + GF_OPTION_INIT("signer-threads", priv->signer_th_count, uint32, + error_return); - ret = br_rate_limit_signer (this, priv->child_count, numbricks); - if (ret) - goto error_return; + ret = br_rate_limit_signer(this, priv->child_count, numbricks); + if (ret) + goto error_return; - ret = br_init_signer (this, priv); - if (ret) - goto cleanup_tbf; + ret = br_init_signer(this, priv); + if (ret) + goto cleanup_tbf; - return 0; + return 0; - cleanup_tbf: - /* cleanup TBF */ - error_return: - return -1; +cleanup_tbf: + /* cleanup TBF */ +error_return: + return -1; +} +static void +br_free_scrubber_monitor(xlator_t *this, br_private_t *priv) +{ + struct br_monitor *scrub_monitor = &priv->scrub_monitor; + + if (scrub_monitor->timer) { + (void)gf_tw_del_timer(priv->timer_wheel, scrub_monitor->timer); + + GF_FREE(scrub_monitor->timer); + scrub_monitor->timer = NULL; + } + + (void)gf_thread_cleanup_xint(scrub_monitor->thread); + + /* Clean up cond and mutex variables */ + pthread_mutex_destroy(&scrub_monitor->mutex); + pthread_cond_destroy(&scrub_monitor->cond); + + pthread_mutex_destroy(&scrub_monitor->wakelock); + pthread_cond_destroy(&scrub_monitor->wakecond); + + pthread_mutex_destroy(&scrub_monitor->donelock); + pthread_cond_destroy(&scrub_monitor->donecond); + + LOCK_DESTROY(&scrub_monitor->lock); } static void -br_free_children (xlator_t *this, br_private_t *priv, int count) +br_free_children(xlator_t *this, br_private_t *priv, int count) { - br_child_t *child = NULL; + br_child_t *child = NULL; - for (--count; count >= 0; count--) { - child = &priv->children[count]; - mem_pool_destroy (child->timer_pool); - LOCK_DESTROY (&child->lock); - } + for (--count; count >= 0; count--) { + child = &priv->children[count]; + mem_pool_destroy(child->timer_pool); + pthread_mutex_destroy(&child->lock); + } - GF_FREE (priv->children); - priv->children = NULL; + GF_FREE(priv->children); + priv->children = NULL; } static int -br_init_children (xlator_t *this, br_private_t *priv) +br_init_children(xlator_t *this, br_private_t *priv) { - int i = 0; - br_child_t *child = NULL; - xlator_list_t *trav = NULL; - - priv->child_count = xlator_subvolume_count (this); - priv->children = GF_CALLOC (priv->child_count, sizeof (*priv->children), - gf_br_mt_br_child_t); - if (!priv->children) - goto err; - - trav = this->children; - while (trav) { - child = &priv->children[i]; - - LOCK_INIT (&child->lock); - child->witnessed = 0; - - br_set_scrub_state (child, BR_SCRUB_STATE_INACTIVE); - br_set_child_state (child, BR_CHILD_STATE_DISCONNECTED); - - child->this = this; - child->xl = trav->xlator; - - child->timer_pool = mem_pool_new - (struct gf_tw_timer_list, 4096); - if (!child->timer_pool) { - gf_msg (this->name, GF_LOG_ERROR, - ENOMEM, BRB_MSG_NO_MEMORY, - "failed to allocate mem-pool for timer"); - errno = ENOMEM; - goto freechild; - } - - INIT_LIST_HEAD (&child->list); - - i++; - trav = trav->next; + int i = 0; + br_child_t *child = NULL; + xlator_list_t *trav = NULL; + + priv->child_count = xlator_subvolume_count(this); + priv->children = GF_CALLOC(priv->child_count, sizeof(*priv->children), + gf_br_mt_br_child_t); + if (!priv->children) + goto err; + + trav = this->children; + while (trav) { + child = &priv->children[i]; + + pthread_mutex_init(&child->lock, NULL); + child->witnessed = 0; + + br_set_child_state(child, BR_CHILD_STATE_DISCONNECTED); + + child->this = this; + child->xl = trav->xlator; + + child->timer_pool = mem_pool_new(struct gf_tw_timer_list, 4096); + if (!child->timer_pool) { + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_MEM_POOL_ALLOC, + NULL); + errno = ENOMEM; + goto freechild; } - return 0; + INIT_LIST_HEAD(&child->list); + + i++; + trav = trav->next; + } + + return 0; - freechild: - br_free_children (this, priv, i); - err: - return -1; +freechild: + br_free_children(this, priv, i); +err: + return -1; } int32_t -init (xlator_t *this) +init(xlator_t *this) { - int32_t ret = -1; - br_private_t *priv = NULL; - - if (!this->children) { - gf_msg (this->name, GF_LOG_ERROR, 0, BRB_MSG_NO_CHILD, - "FATAL: no children"); - goto out; - } - - priv = GF_CALLOC (1, sizeof (*priv), gf_br_mt_br_private_t); - if (!priv) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY, - "failed to allocate memory (->priv)"); - goto out; - } + int32_t ret = -1; + br_private_t *priv = NULL; - GF_OPTION_INIT ("scrubber", priv->iamscrubber, bool, out); + if (!this->children) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_NO_CHILD, NULL); + goto out; + } - ret = br_init_children (this, priv); - if (ret) - goto free_priv; + priv = GF_CALLOC(1, sizeof(*priv), gf_br_mt_br_private_t); + if (!priv) { + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY, NULL); + goto out; + } - pthread_mutex_init (&priv->lock, NULL); - pthread_cond_init (&priv->cond, NULL); + GF_OPTION_INIT("scrubber", priv->iamscrubber, bool, free_priv); - INIT_LIST_HEAD (&priv->bricks); - INIT_LIST_HEAD (&priv->signing); + ret = br_init_children(this, priv); + if (ret) + goto free_priv; - priv->timer_wheel = glusterfs_global_timer_wheel (this); - if (!priv->timer_wheel) { - gf_msg (this->name, GF_LOG_ERROR, 0, - BRB_MSG_TIMER_WHEEL_UNAVAILABLE, - "global timer wheel unavailable"); - goto cleanup; - } + pthread_mutex_init(&priv->lock, NULL); + pthread_cond_init(&priv->cond, NULL); - this->private = priv; + INIT_LIST_HEAD(&priv->bricks); + INIT_LIST_HEAD(&priv->signing); - if (!priv->iamscrubber) { - ret = br_signer_init (this, priv); - if (!ret) - ret = br_signer_handle_options (this, priv, NULL); - } else { - ret = br_scrubber_init (this, priv); - if (!ret) - ret = br_scrubber_handle_options (this, priv, NULL); - } + priv->timer_wheel = glusterfs_ctx_tw_get(this->ctx); + if (!priv->timer_wheel) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_TIMER_WHEEL_UNAVAILABLE, + NULL); + goto cleanup; + } - if (ret) - goto cleanup; + this->private = priv; - ret = gf_thread_create (&priv->thread, NULL, br_handle_events, this); - if (ret != 0) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - BRB_MSG_SPAWN_FAILED, "thread creation failed"); - ret = -1; - } + if (!priv->iamscrubber) { + ret = br_signer_init(this, priv); + if (!ret) + ret = br_signer_handle_options(this, priv, NULL); + } else { + ret = br_scrubber_init(this, priv); + if (!ret) + ret = br_scrubber_handle_options(this, priv, NULL); + } - if (!ret) { - gf_msg (this->name, GF_LOG_INFO, 0, BRB_MSG_BITROT_LOADED, - "bit-rot xlator loaded in \"%s\" mode", - (priv->iamscrubber) ? "SCRUBBER" : "SIGNER"); - return 0; - } + if (ret) + goto cleanup; + + ret = gf_thread_create(&priv->thread, NULL, br_handle_events, this, + "brhevent"); + if (ret != 0) { + gf_smsg(this->name, GF_LOG_ERROR, -ret, BRB_MSG_THREAD_CREATION_FAILED, + NULL); + ret = -1; + } - cleanup: - (void) pthread_cond_destroy (&priv->cond); - (void) pthread_mutex_destroy (&priv->lock); + if (!ret) { + gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_BITROT_LOADED, "mode=%s", + (priv->iamscrubber) ? "SCRUBBER" : "SIGNER", NULL); + return 0; + } + +cleanup: + (void)pthread_cond_destroy(&priv->cond); + (void)pthread_mutex_destroy(&priv->lock); - br_free_children (this, priv, priv->child_count); + br_free_children(this, priv, priv->child_count); - free_priv: - GF_FREE (priv); - out: - this->private = NULL; - return -1; +free_priv: + GF_FREE(priv); +out: + this->private = NULL; + return -1; } void -fini (xlator_t *this) +fini(xlator_t *this) { - br_private_t *priv = this->private; + br_private_t *priv = this->private; - if (!priv) - return; + if (!priv) + return; + + if (!priv->iamscrubber) + br_fini_signer(this, priv); + else + (void)br_free_scrubber_monitor(this, priv); + + br_free_children(this, priv, priv->child_count); - if (!priv->iamscrubber) - br_fini_signer (this, priv); - br_free_children (this, priv, priv->child_count); + this->private = NULL; + GF_FREE(priv); - this->private = NULL; - GF_FREE (priv); + glusterfs_ctx_tw_put(this->ctx); - return; + return; } static void -br_reconfigure_child (xlator_t *this, br_child_t *child) +br_reconfigure_monitor(xlator_t *this) { - int32_t ret = 0; - - ret = br_scrub_state_machine (this, child); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - BRB_MSG_RESCHEDULE_SCRUBBER_FAILED, - "Could not reschedule scrubber for brick: %s. Scubbing " - "will continue according to old frequency.", - child->brick_path); - } + int32_t ret = 0; + + ret = br_scrub_state_machine(this, _gf_false); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_COULD_NOT_SCHEDULE_SCRUB, + NULL); + } } static int -br_reconfigure_scrubber (xlator_t *this, dict_t *options) +br_reconfigure_scrubber(xlator_t *this, dict_t *options) { - int i = 0; - int32_t ret = -1; - br_child_t *child = NULL; - br_private_t *priv = NULL; + int32_t ret = -1; + br_private_t *priv = NULL; - priv = this->private; + priv = this->private; - pthread_mutex_lock (&priv->lock); - { - ret = br_scrubber_handle_options (this, priv, options); - } - pthread_mutex_unlock (&priv->lock); + pthread_mutex_lock(&priv->lock); + { + ret = br_scrubber_handle_options(this, priv, options); + } + pthread_mutex_unlock(&priv->lock); - if (ret) - goto err; - - /* change state for all _up_ subvolume(s) */ - for (; i < priv->child_count; i++) { - child = &priv->children[i]; - - LOCK (&child->lock); - { - if (_br_child_failed_conn (child)) { - gf_msg (this->name, GF_LOG_INFO, - 0, BRB_MSG_BRICK_INFO, - "Scrubber for brick [%s] failed " - "initialization, rescheduling is " - "skipped", child->brick_path); - goto unblock; - } - - if (_br_is_child_connected (child)) - br_reconfigure_child (this, child); - - /** - * for the rest.. either the child is in initialization - * phase or is disconnected. either way, updated values - * would be reflected on successful connection. - */ - } - unblock: - UNLOCK (&child->lock); - } + if (ret) + goto err; - err: - return ret; + /* change state for all _up_ subvolume(s) */ + pthread_mutex_lock(&priv->lock); + { + br_reconfigure_monitor(this); + } + pthread_mutex_unlock(&priv->lock); + +err: + return ret; } static int -br_reconfigure_signer (xlator_t *this, dict_t *options) +br_reconfigure_signer(xlator_t *this, dict_t *options) { - br_private_t *priv = this->private; + br_private_t *priv = this->private; - return br_signer_handle_options (this, priv, options); + return br_signer_handle_options(this, priv, options); } int -reconfigure (xlator_t *this, dict_t *options) +reconfigure(xlator_t *this, dict_t *options) { - int ret = 0; - br_private_t *priv = NULL; + int ret = 0; + br_private_t *priv = NULL; - priv = this->private; + priv = this->private; - if (priv->iamscrubber) - ret = br_reconfigure_scrubber (this, options); - else - ret = br_reconfigure_signer (this, options); + if (priv->iamscrubber) + ret = br_reconfigure_scrubber(this, options); + else + ret = br_reconfigure_signer(this, options); - return ret; + return ret; } struct xlator_fops fops; @@ -2031,38 +2155,78 @@ struct xlator_fops fops; struct xlator_cbks cbks; struct volume_options options[] = { - { .key = {"expiry-time"}, - .type = GF_OPTION_TYPE_INT, - .default_value = SIGNING_TIMEOUT, - .description = "Waiting time for an object on which it waits " - "before it is signed", - }, - { .key = {"brick-count"}, - .type = GF_OPTION_TYPE_STR, - .description = "Total number of bricks for the current node for " - "all volumes in the trusted storage pool.", - }, - { .key = {"scrubber"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "false", - .description = "option to run as a scrubber", - }, - { .key = {"scrub-throttle"}, - .type = GF_OPTION_TYPE_STR, - .description = "Scrub-throttle value is a measure of how fast " - "or slow the scrubber scrubs the filesystem for " - "volume <VOLNAME>", - }, - { .key = {"scrub-freq"}, - .type = GF_OPTION_TYPE_STR, - .default_value = "biweekly", - .description = "Scrub frequency for volume <VOLNAME>", - }, - { .key = {"scrub-state"}, - .type = GF_OPTION_TYPE_STR, - .default_value = "active", - .description = "Pause/Resume scrub. Upon resume, scrubber " - "continues from where it left off.", - }, - { .key = {NULL} }, + { + .key = {"expiry-time"}, + .type = GF_OPTION_TYPE_INT, + .default_value = SIGNING_TIMEOUT, + .op_version = {GD_OP_VERSION_3_7_0}, + .flags = OPT_FLAG_SETTABLE, + .description = "Waiting time for an object on which it waits " + "before it is signed", + }, + { + .key = {"brick-count"}, + .type = GF_OPTION_TYPE_STR, + .description = "Total number of bricks for the current node for " + "all volumes in the trusted storage pool.", + }, + { + .key = {"scrubber", "scrub"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "false", + .op_version = {GD_OP_VERSION_3_7_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_FORCE, + .description = "option to run as a scrubber", + }, + { + .key = {"scrub-throttle"}, + .type = GF_OPTION_TYPE_STR, + .default_value = "lazy", + .op_version = {GD_OP_VERSION_3_7_0}, + .flags = OPT_FLAG_SETTABLE, + .description = "Scrub-throttle value is a measure of how fast " + "or slow the scrubber scrubs the filesystem for " + "volume <VOLNAME>", + }, + { + .key = {"scrub-freq"}, + .type = GF_OPTION_TYPE_STR, + .default_value = "biweekly", + .op_version = {GD_OP_VERSION_3_7_0}, + .flags = OPT_FLAG_SETTABLE, + .description = "Scrub frequency for volume <VOLNAME>", + }, + { + .key = {"scrub-state"}, + .type = GF_OPTION_TYPE_STR, + .default_value = "active", + .op_version = {GD_OP_VERSION_4_0_0}, + .flags = OPT_FLAG_SETTABLE, + .description = "Pause/Resume scrub. Upon resume, scrubber " + "continues from where it left off.", + }, + { + .key = {"signer-threads"}, + .type = GF_OPTION_TYPE_INT, + .default_value = BR_WORKERS, + .op_version = {GD_OP_VERSION_8_0}, + .flags = OPT_FLAG_SETTABLE, + .description = "Number of signing process threads. As a best " + "practice, set this to the number of processor cores", + }, + {.key = {NULL}}, +}; + +xlator_api_t xlator_api = { + .init = init, + .fini = fini, + .notify = notify, + .reconfigure = reconfigure, + .mem_acct_init = mem_acct_init, + .op_version = {1}, /* Present from the initial version */ + .fops = &fops, + .cbks = &cbks, + .options = options, + .identifier = "bit-rot", + .category = GF_MAINTAINED, }; diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.h b/xlators/features/bit-rot/src/bitd/bit-rot.h index 7def0606726..8ac7dcdac3d 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot.h +++ b/xlators/features/bit-rot/src/bitd/bit-rot.h @@ -11,257 +11,292 @@ #ifndef __BIT_ROT_H__ #define __BIT_ROT_H__ -#include "glusterfs.h" -#include "logging.h" -#include "dict.h" -#include "xlator.h" -#include "defaults.h" -#include "syncop.h" -#include "syncop-utils.h" +#include <glusterfs/glusterfs.h> +#include <glusterfs/logging.h> +#include <glusterfs/dict.h> +#include <glusterfs/xlator.h> +#include <glusterfs/defaults.h> +#include <glusterfs/syncop.h> +#include <glusterfs/syncop-utils.h> #include "changelog.h" #include "timer-wheel.h" -#include "bit-rot-tbf.h" +#include <glusterfs/throttle-tbf.h> #include "bit-rot-ssm.h" #include "bit-rot-common.h" #include "bit-rot-stub-mem-types.h" +#include "bit-rot-scrub-status.h" #include <openssl/sha.h> -/** - * TODO: make this configurable. As a best practice, set this to the - * number of processor cores. - */ -#define BR_WORKERS 4 - typedef enum scrub_throttle { - BR_SCRUB_THROTTLE_VOID = -1, - BR_SCRUB_THROTTLE_LAZY = 0, - BR_SCRUB_THROTTLE_NORMAL = 1, - BR_SCRUB_THROTTLE_AGGRESSIVE = 2, - BR_SCRUB_THROTTLE_STALLED = 3, + BR_SCRUB_THROTTLE_VOID = -1, + BR_SCRUB_THROTTLE_LAZY = 0, + BR_SCRUB_THROTTLE_NORMAL = 1, + BR_SCRUB_THROTTLE_AGGRESSIVE = 2, + BR_SCRUB_THROTTLE_STALLED = 3, } scrub_throttle_t; typedef enum scrub_freq { - BR_FSSCRUB_FREQ_HOURLY = 1, - BR_FSSCRUB_FREQ_DAILY, - BR_FSSCRUB_FREQ_WEEKLY, - BR_FSSCRUB_FREQ_BIWEEKLY, - BR_FSSCRUB_FREQ_MONTHLY, - BR_FSSCRUB_FREQ_STALLED, + BR_FSSCRUB_FREQ_HOURLY = 1, + BR_FSSCRUB_FREQ_DAILY, + BR_FSSCRUB_FREQ_WEEKLY, + BR_FSSCRUB_FREQ_BIWEEKLY, + BR_FSSCRUB_FREQ_MONTHLY, + BR_FSSCRUB_FREQ_MINUTE, + BR_FSSCRUB_FREQ_STALLED, } scrub_freq_t; -#define signature_size(hl) (sizeof (br_isignature_t) + hl + 1) +#define signature_size(hl) (sizeof(br_isignature_t) + hl + 1) struct br_scanfs { - gf_lock_t entrylock; - - pthread_mutex_t waitlock; - pthread_cond_t waitcond; - - unsigned int entries; - struct list_head queued; - struct list_head ready; - - /* scheduler */ - uint32_t boot; - gf_boolean_t kick; - gf_boolean_t over; - - br_scrub_state_t state; /* current scrub state */ + gf_lock_t entrylock; - pthread_mutex_t wakelock; - pthread_cond_t wakecond; + pthread_mutex_t waitlock; + pthread_cond_t waitcond; - struct gf_tw_timer_list *timer; + unsigned int entries; + struct list_head queued; + struct list_head ready; }; /* just need three states to track child status */ typedef enum br_child_state { - BR_CHILD_STATE_CONNECTED = 1, - BR_CHILD_STATE_INITIALIZING, - BR_CHILD_STATE_CONNFAILED, - BR_CHILD_STATE_DISCONNECTED, + BR_CHILD_STATE_CONNECTED = 1, + BR_CHILD_STATE_INITIALIZING, + BR_CHILD_STATE_CONNFAILED, + BR_CHILD_STATE_DISCONNECTED, } br_child_state_t; struct br_child { - gf_lock_t lock; /* protects child state */ - char witnessed; /* witnessed at least one succesfull - connection */ - br_child_state_t c_state; /* current state of this child */ - - char child_up; /* Indicates whether this child is - up or not */ - xlator_t *xl; /* client xlator corresponding to - this child */ - inode_table_t *table; /* inode table for this child */ - char brick_path[PATH_MAX]; /* brick export directory of this - child */ - struct list_head list; /* hook to attach to the list of - UP children */ - xlator_t *this; /* Bit rot xlator */ - - pthread_t thread; /* initial crawler for unsigned - object(s) or scrub crawler */ - int threadrunning; /* active thread */ - - struct mem_pool *timer_pool; /* timer-wheel's timer mem-pool */ - - struct timeval tv; - - struct br_scanfs fsscan; /* per subvolume FS scanner */ + pthread_mutex_t lock; /* protects child state */ + char witnessed; /* witnessed at least one successful + connection */ + br_child_state_t c_state; /* current state of this child */ + + char child_up; /* Indicates whether this child is + up or not */ + xlator_t *xl; /* client xlator corresponding to + this child */ + inode_table_t *table; /* inode table for this child */ + char brick_path[PATH_MAX]; /* brick export directory of this + child */ + struct list_head list; /* hook to attach to the list of + UP children */ + xlator_t *this; /* Bit rot xlator */ + + pthread_t thread; /* initial crawler for unsigned + object(s) or scrub crawler */ + int threadrunning; /* active thread */ + + struct mem_pool *timer_pool; /* timer-wheel's timer mem-pool */ + + struct timeval tv; + + struct br_scanfs fsscan; /* per subvolume FS scanner */ + + gf_boolean_t active_scrubbing; /* Actively scrubbing or not */ }; typedef struct br_child br_child_t; struct br_obj_n_workers { - struct list_head objects; /* queue of objects expired from the - timer wheel and ready to be picked - up for signing */ - pthread_t workers[BR_WORKERS]; /* Threads which pick up the objects - from the above queue and start - signing each object */ + struct list_head objects; /* queue of objects expired from the + timer wheel and ready to be picked + up for signing */ + pthread_t *workers; /* Threads which pick up the objects + from the above queue and start + signing each object */ }; struct br_scrubber { - xlator_t *this; + xlator_t *this; + + scrub_throttle_t throttle; + + /** + * frequency of scanning for this subvolume. this should + * normally be per-child, but since all children follow the + * same frequency for a volume, this option ends up here + * instead of br_child_t. + */ + scrub_freq_t frequency; + + gf_boolean_t frequency_reconf; + gf_boolean_t throttle_reconf; + + pthread_mutex_t mutex; + pthread_cond_t cond; + + unsigned int nr_scrubbers; + struct list_head scrubbers; + + /** + * list of "rotatable" subvolume(s) undergoing scrubbing + */ + struct list_head scrublist; +}; + +struct br_monitor { + gf_lock_t lock; + pthread_t thread; /* Monitor thread */ - scrub_throttle_t throttle; + gf_boolean_t inited; + pthread_mutex_t mutex; + pthread_cond_t cond; /* Thread starts and will be waiting on cond. + First child which is up wakes this up */ - /** - * frequency of scanning for this subvolume. this should - * normally be per-child, but since all childs follow the - * same frequency for a volume, this option ends up here - * instead of br_child_t. - */ - scrub_freq_t frequency; + xlator_t *this; + /* scheduler */ + uint32_t boot; - gf_boolean_t frequency_reconf; - gf_boolean_t throttle_reconf; + int32_t active_child_count; /* Number of children currently scrubbing */ + gf_boolean_t kick; /* This variable tracks the scrubber is + * kicked or not. Both 'kick' and + * 'active_child_count' uses the same pair + * of mutex-cond variable, i.e, wakelock and + * wakecond. */ - pthread_mutex_t mutex; - pthread_cond_t cond; + pthread_mutex_t wakelock; + pthread_cond_t wakecond; - unsigned int nr_scrubbers; - struct list_head scrubbers; + gf_boolean_t done; + pthread_mutex_t donelock; + pthread_cond_t donecond; - /** - * list of "rotatable" subvolume(s) undergoing scrubbing - */ - struct list_head scrublist; + struct gf_tw_timer_list *timer; + br_scrub_state_t state; /* current scrub state */ }; typedef struct br_obj_n_workers br_obj_n_workers_t; +typedef struct br_private br_private_t; + +typedef void (*br_scrubbed_file_update)(br_private_t *priv); + struct br_private { - pthread_mutex_t lock; + pthread_mutex_t lock; - struct list_head bricks; /* list of bricks from which enents - have been received */ + struct list_head bricks; /* list of bricks from which enents + have been received */ - struct list_head signing; + struct list_head signing; - pthread_cond_t object_cond; /* handling signing of objects */ - int child_count; - br_child_t *children; /* list of subvolumes */ - int up_children; + pthread_cond_t object_cond; /* handling signing of objects */ + int child_count; + br_child_t *children; /* list of subvolumes */ + int up_children; - pthread_cond_t cond; /* handling CHILD_UP notifications */ - pthread_t thread; /* thread for connecting each UP - child with changelog */ + pthread_cond_t cond; /* handling CHILD_UP notifications */ + pthread_t thread; /* thread for connecting each UP + child with changelog */ - struct tvec_base *timer_wheel; /* timer wheel where the objects which - changelog has sent sits and waits - for expiry */ - br_obj_n_workers_t *obj_queue; /* place holder for all the objects - that are expired from timer wheel - and ready to be picked up for - signing and the workers which sign - the objects */ - uint32_t expiry_time; /* objects "wait" time */ + struct tvec_base *timer_wheel; /* timer wheel where the objects which + changelog has sent sits and waits + for expiry */ + br_obj_n_workers_t *obj_queue; /* place holder for all the objects + that are expired from timer wheel + and ready to be picked up for + signing and the workers which sign + the objects */ - br_tbf_t *tbf; /* token bucket filter */ + uint32_t expiry_time; /* objects "wait" time */ - gf_boolean_t iamscrubber; /* function as a fs scrubber */ + uint32_t signer_th_count; /* Number of signing process threads */ - struct br_scrubber fsscrub; /* scrubbers for this subvolume */ -}; + tbf_t *tbf; /* token bucket filter */ -typedef struct br_private br_private_t; + gf_boolean_t iamscrubber; /* function as a fs scrubber */ + + struct br_scrub_stats scrub_stat; /* statistics of scrub*/ + + struct br_scrubber fsscrub; /* scrubbers for this subvolume */ + + struct br_monitor scrub_monitor; /* scrubber monitor */ +}; struct br_object { - xlator_t *this; + xlator_t *this; - uuid_t gfid; + uuid_t gfid; - unsigned long signedversion; /* version aginst which this object will - be signed */ - br_child_t *child; /* object's subvolume */ + unsigned long signedversion; /* version against which this object will + be signed */ + br_child_t *child; /* object's subvolume */ - int sign_info; + int sign_info; - struct list_head list; /* hook to add to the queue once the - object is expired from timer wheel */ - void *data; + struct list_head list; /* hook to add to the queue once the + object is expired from timer wheel */ + void *data; }; typedef struct br_object br_object_t; -typedef int32_t (br_scrub_ssm_call) (xlator_t *, br_child_t *); +typedef int32_t(br_scrub_ssm_call)(xlator_t *); void -br_log_object (xlator_t *, char *, uuid_t, int32_t); +br_log_object(xlator_t *, char *, uuid_t, int32_t); void -br_log_object_path (xlator_t *, char *, const char *, int32_t); +br_log_object_path(xlator_t *, char *, const char *, int32_t); int32_t -br_calculate_obj_checksum (unsigned char *, - br_child_t *, fd_t *, struct iatt *); +br_calculate_obj_checksum(unsigned char *, br_child_t *, fd_t *, struct iatt *); int32_t -br_prepare_loc (xlator_t *, br_child_t *, loc_t *, gf_dirent_t *, loc_t *); +br_prepare_loc(xlator_t *, br_child_t *, loc_t *, gf_dirent_t *, loc_t *); gf_boolean_t -bitd_is_bad_file (xlator_t *, br_child_t *, loc_t *, fd_t *); +bitd_is_bad_file(xlator_t *, br_child_t *, loc_t *, fd_t *); static inline void -_br_set_child_state (br_child_t *child, br_child_state_t state) +_br_set_child_state(br_child_t *child, br_child_state_t state) +{ + child->c_state = state; +} + +static inline int +_br_is_child_connected(br_child_t *child) { - child->c_state = state; + return (child->c_state == BR_CHILD_STATE_CONNECTED); } static inline int -_br_is_child_connected (br_child_t *child) +_br_is_child_scrub_active(br_child_t *child) { - return (child->c_state == BR_CHILD_STATE_CONNECTED); + return child->active_scrubbing; } static inline int -_br_child_failed_conn (br_child_t *child) +_br_child_failed_conn(br_child_t *child) { - return (child->c_state == BR_CHILD_STATE_CONNFAILED); + return (child->c_state == BR_CHILD_STATE_CONNFAILED); } static inline int -_br_child_witnessed_connection (br_child_t *child) +_br_child_witnessed_connection(br_child_t *child) { - return (child->witnessed == 1); + return (child->witnessed == 1); } /* scrub state */ static inline void -_br_child_set_scrub_state (br_child_t *child, br_scrub_state_t state) +_br_monitor_set_scrub_state(struct br_monitor *scrub_monitor, + br_scrub_state_t state) { - struct br_scanfs *fsscan = &child->fsscan; - fsscan->state = state; + scrub_monitor->state = state; } static inline br_scrub_event_t -_br_child_get_scrub_event (struct br_scrubber *fsscrub) +_br_child_get_scrub_event(struct br_scrubber *fsscrub) { - return (fsscrub->frequency == BR_FSSCRUB_FREQ_STALLED) - ? BR_SCRUB_EVENT_PAUSE : BR_SCRUB_EVENT_SCHEDULE; + return (fsscrub->frequency == BR_FSSCRUB_FREQ_STALLED) + ? BR_SCRUB_EVENT_PAUSE + : BR_SCRUB_EVENT_SCHEDULE; } +int32_t +br_get_bad_objects_list(xlator_t *this, dict_t **dict); + #endif /* __BIT_ROT_H__ */ diff --git a/xlators/features/bit-rot/src/stub/Makefile.am b/xlators/features/bit-rot/src/stub/Makefile.am index 5b5253c4ad5..f13de7145fc 100644 --- a/xlators/features/bit-rot/src/stub/Makefile.am +++ b/xlators/features/bit-rot/src/stub/Makefile.am @@ -1,15 +1,19 @@ +if WITH_SERVER xlator_LTLIBRARIES = bitrot-stub.la +endif xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features -bitrot_stub_la_LDFLAGS = $(GF_XLATOR_DEFAULT_LDFLAGS) +bitrot_stub_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) -bitrot_stub_la_SOURCES = bit-rot-stub.c +bitrot_stub_la_SOURCES = bit-rot-stub-helpers.c bit-rot-stub.c bitrot_stub_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la noinst_HEADERS = bit-rot-stub.h bit-rot-common.h bit-rot-stub-mem-types.h \ - bit-rot-object-version.h bit-rot-stub-messages.h + bit-rot-object-version.h bit-rot-stub-messages.h -AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ + -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \ + -I$(top_srcdir)/rpc/rpc-lib/src AM_CFLAGS = -Wall $(GF_CFLAGS) diff --git a/xlators/features/bit-rot/src/stub/bit-rot-common.h b/xlators/features/bit-rot/src/stub/bit-rot-common.h index bcf931a2b0b..20561aa7764 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-common.h +++ b/xlators/features/bit-rot/src/stub/bit-rot-common.h @@ -11,72 +11,74 @@ #ifndef __BIT_ROT_COMMON_H__ #define __BIT_ROT_COMMON_H__ -#include "glusterfs.h" +#include <glusterfs/glusterfs.h> #include "bit-rot-object-version.h" -#define BR_VXATTR_VERSION (1 << 0) +#define BR_VXATTR_VERSION (1 << 0) #define BR_VXATTR_SIGNATURE (1 << 1) #define BR_VXATTR_SIGN_MISSING (BR_VXATTR_SIGNATURE) -#define BR_VXATTR_ALL_MISSING \ - (BR_VXATTR_VERSION | BR_VXATTR_SIGNATURE) +#define BR_VXATTR_ALL_MISSING (BR_VXATTR_VERSION | BR_VXATTR_SIGNATURE) + +#define BR_BAD_OBJ_CONTAINER \ + (uuid_t) { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8 } typedef enum br_vxattr_state { - BR_VXATTR_STATUS_FULL = 0, - BR_VXATTR_STATUS_MISSING = 1, - BR_VXATTR_STATUS_UNSIGNED = 2, - BR_VXATTR_STATUS_INVALID = 3, + BR_VXATTR_STATUS_FULL = 0, + BR_VXATTR_STATUS_MISSING = 1, + BR_VXATTR_STATUS_UNSIGNED = 2, + BR_VXATTR_STATUS_INVALID = 3, } br_vxattr_status_t; typedef enum br_sign_state { - BR_SIGN_INVALID = -1, - BR_SIGN_NORMAL = 0, - BR_SIGN_REOPEN_WAIT = 1, - BR_SIGN_QUICK = 2, + BR_SIGN_INVALID = -1, + BR_SIGN_NORMAL = 0, + BR_SIGN_REOPEN_WAIT = 1, + BR_SIGN_QUICK = 2, } br_sign_state_t; static inline br_vxattr_status_t -br_version_xattr_state (dict_t *xattr, br_version_t **obuf, - br_signature_t **sbuf, gf_boolean_t *objbad) +br_version_xattr_state(dict_t *xattr, br_version_t **obuf, + br_signature_t **sbuf, gf_boolean_t *objbad) { - int32_t ret = 0; - int32_t vxattr = 0; - br_vxattr_status_t status; - void *data = NULL; - - /** - * The key being present in the dict indicates the xattr was set on - * disk. The presence of xattr itself as of now is suffecient to say - * the the object is bad. - */ - *objbad = _gf_false; - ret = dict_get_bin (xattr, BITROT_OBJECT_BAD_KEY, (void **)&data); - if (!ret) - *objbad = _gf_true; - - ret = dict_get_bin (xattr, BITROT_CURRENT_VERSION_KEY, (void **)obuf); - if (ret) - vxattr |= BR_VXATTR_VERSION; - - ret = dict_get_bin (xattr, BITROT_SIGNING_VERSION_KEY, (void **)sbuf); - if (ret) - vxattr |= BR_VXATTR_SIGNATURE; - - switch (vxattr) { + int32_t ret = 0; + int32_t vxattr = 0; + br_vxattr_status_t status; + void *data = NULL; + + /** + * The key being present in the dict indicates the xattr was set on + * disk. The presence of xattr itself as of now is suffecient to say + * the the object is bad. + */ + *objbad = _gf_false; + ret = dict_get_bin(xattr, BITROT_OBJECT_BAD_KEY, (void **)&data); + if (!ret) + *objbad = _gf_true; + + ret = dict_get_bin(xattr, BITROT_CURRENT_VERSION_KEY, (void **)obuf); + if (ret) + vxattr |= BR_VXATTR_VERSION; + + ret = dict_get_bin(xattr, BITROT_SIGNING_VERSION_KEY, (void **)sbuf); + if (ret) + vxattr |= BR_VXATTR_SIGNATURE; + + switch (vxattr) { case 0: - status = BR_VXATTR_STATUS_FULL; - break; + status = BR_VXATTR_STATUS_FULL; + break; case BR_VXATTR_SIGN_MISSING: - status = BR_VXATTR_STATUS_UNSIGNED; - break; + status = BR_VXATTR_STATUS_UNSIGNED; + break; case BR_VXATTR_ALL_MISSING: - status = BR_VXATTR_STATUS_MISSING; - break; + status = BR_VXATTR_STATUS_MISSING; + break; default: - status = BR_VXATTR_STATUS_INVALID; - } + status = BR_VXATTR_STATUS_INVALID; + } - return status; + return status; } /** @@ -84,13 +86,13 @@ br_version_xattr_state (dict_t *xattr, br_version_t **obuf, * signing. */ typedef struct br_isignature_in { - int8_t signaturetype; /* signature type */ + int8_t signaturetype; /* signature type */ - unsigned long signedversion; /* version against which the - object was signed */ + unsigned long signedversion; /* version against which the + object was signed */ - size_t signaturelen; /* signature length */ - char signature[0]; /* object signature */ + size_t signaturelen; /* signature length */ + char signature[0]; /* object signature */ } br_isignature_t; /** @@ -98,80 +100,79 @@ typedef struct br_isignature_in { * verification. */ typedef struct br_isignature_out { - char stale; /* stale signature? */ + char stale; /* stale signature? */ - unsigned long version; /* current signed version */ + unsigned long version; /* current signed version */ - uint32_t time[2]; /* time when the object - got dirtied */ + uint32_t time[2]; /* time when the object + got dirtied */ - int8_t signaturetype; /* hash type */ - size_t signaturelen; /* signature length */ - char signature[0]; /* signature (hash) */ + int8_t signaturetype; /* hash type */ + size_t signaturelen; /* signature length */ + char signature[0]; /* signature (hash) */ } br_isignature_out_t; typedef struct br_stub_init { - uint32_t timebuf[2]; - char export[PATH_MAX]; + uint32_t timebuf[2]; + char export[PATH_MAX]; } br_stub_init_t; typedef enum { - BR_SIGNATURE_TYPE_VOID = -1, /* object is not signed */ - BR_SIGNATURE_TYPE_ZERO = 0, /* min boundary */ - BR_SIGNATURE_TYPE_SHA256 = 1, /* signed with SHA256 */ - BR_SIGNATURE_TYPE_MAX = 2, /* max boundary */ + BR_SIGNATURE_TYPE_VOID = -1, /* object is not signed */ + BR_SIGNATURE_TYPE_ZERO = 0, /* min boundary */ + BR_SIGNATURE_TYPE_SHA256 = 1, /* signed with SHA256 */ + BR_SIGNATURE_TYPE_MAX = 2, /* max boundary */ } br_signature_type; /* BitRot stub start time (virtual xattr) */ -#define GLUSTERFS_GET_BR_STUB_INIT_TIME "trusted.glusterfs.bit-rot.stub-init" +#define GLUSTERFS_GET_BR_STUB_INIT_TIME "trusted.glusterfs.bit-rot.stub-init" /* signing/reopen hint */ #define BR_OBJECT_RESIGN 0 -#define BR_OBJECT_REOPEN 1 -#define BR_REOPEN_SIGN_HINT_KEY "trusted.glusterfs.bit-rot.reopen-hint" +#define BR_OBJECT_REOPEN 1 +#define BR_REOPEN_SIGN_HINT_KEY "trusted.glusterfs.bit-rot.reopen-hint" static inline int -br_is_signature_type_valid (int8_t signaturetype) +br_is_signature_type_valid(int8_t signaturetype) { - return ((signaturetype > BR_SIGNATURE_TYPE_ZERO) - && (signaturetype < BR_SIGNATURE_TYPE_MAX)); + return ((signaturetype > BR_SIGNATURE_TYPE_ZERO) && + (signaturetype < BR_SIGNATURE_TYPE_MAX)); } static inline void -br_set_default_ongoingversion (br_version_t *buf, uint32_t *tv) +br_set_default_ongoingversion(br_version_t *buf, uint32_t *tv) { - buf->ongoingversion = BITROT_DEFAULT_CURRENT_VERSION; - buf->timebuf[0] = tv[0]; - buf->timebuf[1] = tv[1]; + buf->ongoingversion = BITROT_DEFAULT_CURRENT_VERSION; + buf->timebuf[0] = tv[0]; + buf->timebuf[1] = tv[1]; } static inline void -br_set_default_signature (br_signature_t *buf, size_t *size) +br_set_default_signature(br_signature_t *buf, size_t *size) { - buf->signaturetype = (int8_t) BR_SIGNATURE_TYPE_VOID; - buf->signedversion = BITROT_DEFAULT_SIGNING_VERSION; + buf->signaturetype = (int8_t)BR_SIGNATURE_TYPE_VOID; + buf->signedversion = BITROT_DEFAULT_SIGNING_VERSION; - *size = sizeof (br_signature_t); /* no signature */ + *size = sizeof(br_signature_t); /* no signature */ } static inline void -br_set_ongoingversion (br_version_t *buf, - unsigned long version, uint32_t *tv) +br_set_ongoingversion(br_version_t *buf, unsigned long version, uint32_t *tv) { - buf->ongoingversion = version; - buf->timebuf[0] = tv[0]; - buf->timebuf[1] = tv[1]; + buf->ongoingversion = version; + buf->timebuf[0] = tv[0]; + buf->timebuf[1] = tv[1]; } static inline void -br_set_signature (br_signature_t *buf, - br_isignature_t *sign, size_t signaturelen, size_t *size) +br_set_signature(br_signature_t *buf, br_isignature_t *sign, + size_t signaturelen, size_t *size) { - buf->signaturetype = sign->signaturetype; - buf->signedversion = ntohl (sign->signedversion); + buf->signaturetype = sign->signaturetype; + buf->signedversion = ntohl(sign->signedversion); - memcpy (buf->signature, sign->signature, signaturelen); - *size = sizeof (br_signature_t) + signaturelen; + memcpy(buf->signature, sign->signature, signaturelen); + *size = sizeof(br_signature_t) + signaturelen; } #endif /* __BIT_ROT_COMMON_H__ */ diff --git a/xlators/features/bit-rot/src/stub/bit-rot-object-version.h b/xlators/features/bit-rot/src/stub/bit-rot-object-version.h index 1f2497aebe9..7ae6a5200df 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-object-version.h +++ b/xlators/features/bit-rot/src/stub/bit-rot-object-version.h @@ -15,16 +15,16 @@ * on-disk formats for ongoing version and object signature. */ typedef struct br_version { - unsigned long ongoingversion; - uint32_t timebuf[2]; + unsigned long ongoingversion; + uint32_t timebuf[2]; } br_version_t; -typedef struct __attribute__ ((__packed__)) br_signature { - int8_t signaturetype; +typedef struct __attribute__((__packed__)) br_signature { + int8_t signaturetype; - unsigned long signedversion; + unsigned long signedversion; - char signature[0]; + char signature[0]; } br_signature_t; #endif diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-helpers.c b/xlators/features/bit-rot/src/stub/bit-rot-stub-helpers.c new file mode 100644 index 00000000000..8ac13a09941 --- /dev/null +++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-helpers.c @@ -0,0 +1,796 @@ +/* + Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include "bit-rot-stub.h" + +br_stub_fd_t * +br_stub_fd_new(void) +{ + br_stub_fd_t *br_stub_fd = NULL; + + br_stub_fd = GF_CALLOC(1, sizeof(*br_stub_fd), gf_br_stub_mt_br_stub_fd_t); + + return br_stub_fd; +} + +int +__br_stub_fd_ctx_set(xlator_t *this, fd_t *fd, br_stub_fd_t *br_stub_fd) +{ + uint64_t value = 0; + int ret = -1; + + GF_VALIDATE_OR_GOTO("bit-rot-stub", this, out); + GF_VALIDATE_OR_GOTO(this->name, fd, out); + GF_VALIDATE_OR_GOTO(this->name, br_stub_fd, out); + + value = (uint64_t)(long)br_stub_fd; + + ret = __fd_ctx_set(fd, this, value); + +out: + return ret; +} + +br_stub_fd_t * +__br_stub_fd_ctx_get(xlator_t *this, fd_t *fd) +{ + br_stub_fd_t *br_stub_fd = NULL; + uint64_t value = 0; + int ret = -1; + + GF_VALIDATE_OR_GOTO("bit-rot-stub", this, out); + GF_VALIDATE_OR_GOTO(this->name, fd, out); + + ret = __fd_ctx_get(fd, this, &value); + if (ret) + return NULL; + + br_stub_fd = (br_stub_fd_t *)((long)value); + +out: + return br_stub_fd; +} + +br_stub_fd_t * +br_stub_fd_ctx_get(xlator_t *this, fd_t *fd) +{ + br_stub_fd_t *br_stub_fd = NULL; + + GF_VALIDATE_OR_GOTO("bit-rot-stub", this, out); + GF_VALIDATE_OR_GOTO(this->name, fd, out); + + LOCK(&fd->lock); + { + br_stub_fd = __br_stub_fd_ctx_get(this, fd); + } + UNLOCK(&fd->lock); + +out: + return br_stub_fd; +} + +int32_t +br_stub_fd_ctx_set(xlator_t *this, fd_t *fd, br_stub_fd_t *br_stub_fd) +{ + int32_t ret = -1; + + GF_VALIDATE_OR_GOTO("bit-rot-stub", this, out); + GF_VALIDATE_OR_GOTO(this->name, fd, out); + GF_VALIDATE_OR_GOTO(this->name, br_stub_fd, out); + + LOCK(&fd->lock); + { + ret = __br_stub_fd_ctx_set(this, fd, br_stub_fd); + } + UNLOCK(&fd->lock); + +out: + return ret; +} + +/** + * Adds an entry to the bad objects directory. + * @gfid: gfid of the bad object being added to the bad objects directory + */ +int +br_stub_add(xlator_t *this, uuid_t gfid) +{ + char gfid_path[BR_PATH_MAX_PLUS] = {0}; + char bad_gfid_path[BR_PATH_MAX_PLUS] = {0}; + int ret = 0; + br_stub_private_t *priv = NULL; + struct stat st = {0}; + + priv = this->private; + GF_ASSERT_AND_GOTO_WITH_ERROR(this->name, !gf_uuid_is_null(gfid), out, + errno, EINVAL); + + snprintf(gfid_path, sizeof(gfid_path), "%s/%s", priv->stub_basepath, + uuid_utoa(gfid)); + + ret = sys_stat(gfid_path, &st); + if (!ret) + goto out; + snprintf(bad_gfid_path, sizeof(bad_gfid_path), "%s/stub-%s", + priv->stub_basepath, uuid_utoa(priv->bad_object_dir_gfid)); + + ret = sys_link(bad_gfid_path, gfid_path); + if (ret) { + if ((errno != ENOENT) && (errno != EMLINK) && (errno != EEXIST)) + goto out; + + /* + * Continue with success. At least we'll have half of the + * functionality, in the sense, object is marked bad and + * would be inaccessible. It's only scrub status that would + * show up less number of objects. That's fine as we'll have + * the log files that will have the missing information. + */ + gf_smsg(this->name, GF_LOG_WARNING, errno, BRS_MSG_LINK_FAIL, "gfid=%s", + uuid_utoa(gfid), NULL); + } + + return 0; +out: + return -1; +} + +int +br_stub_del(xlator_t *this, uuid_t gfid) +{ + int32_t op_errno __attribute__((unused)) = 0; + br_stub_private_t *priv = NULL; + int ret = 0; + char gfid_path[BR_PATH_MAX_PLUS] = {0}; + + priv = this->private; + GF_ASSERT_AND_GOTO_WITH_ERROR(this->name, !gf_uuid_is_null(gfid), out, + op_errno, EINVAL); + snprintf(gfid_path, sizeof(gfid_path), "%s/%s", priv->stub_basepath, + uuid_utoa(gfid)); + ret = sys_unlink(gfid_path); + if (ret && (errno != ENOENT)) { + gf_smsg(this->name, GF_LOG_ERROR, errno, BRS_MSG_BAD_OBJ_UNLINK_FAIL, + "path=%s", gfid_path, NULL); + ret = -errno; + goto out; + } + + ret = 0; + +out: + return ret; +} + +static int +br_stub_check_stub_directory(xlator_t *this, char *fullpath) +{ + int ret = 0; + struct stat st = { + 0, + }; + char oldpath[BR_PATH_MAX_PLUS] = {0}; + br_stub_private_t *priv = NULL; + + priv = this->private; + + snprintf(oldpath, sizeof(oldpath), "%s/%s", priv->export, + OLD_BR_STUB_QUARANTINE_DIR); + + ret = sys_stat(fullpath, &st); + if (!ret && !S_ISDIR(st.st_mode)) + goto error_return; + if (ret) { + if (errno != ENOENT) + goto error_return; + ret = sys_stat(oldpath, &st); + if (ret) + ret = mkdir_p(fullpath, 0600, _gf_true); + else + ret = sys_rename(oldpath, fullpath); + } + + if (ret) + gf_smsg(this->name, GF_LOG_ERROR, errno, BRS_MSG_BAD_OBJECT_DIR_FAIL, + "create-path=%s", fullpath, NULL); + return ret; + +error_return: + gf_smsg(this->name, GF_LOG_ERROR, errno, BRS_MSG_BAD_OBJECT_DIR_FAIL, + "verify-path=%s", fullpath, NULL); + return -1; +} + +/** + * Function to create the container for the bad objects within the bad objects + * directory. + */ +static int +br_stub_check_stub_file(xlator_t *this, char *path) +{ + int ret = 0; + int fd = -1; + struct stat st = { + 0, + }; + + ret = sys_stat(path, &st); + if (!ret && !S_ISREG(st.st_mode)) + goto error_return; + if (ret) { + if (errno != ENOENT) + goto error_return; + fd = sys_creat(path, 0); + if (fd < 0) + gf_smsg(this->name, GF_LOG_ERROR, errno, + BRS_MSG_BAD_OBJECT_DIR_FAIL, "create-path=%s", path, NULL); + } + + if (fd >= 0) { + sys_close(fd); + ret = 0; + } + + return ret; + +error_return: + gf_smsg(this->name, GF_LOG_ERROR, errno, BRS_MSG_BAD_OBJECT_DIR_FAIL, + "verify-path=%s", path, NULL); + return -1; +} + +int +br_stub_dir_create(xlator_t *this, br_stub_private_t *priv) +{ + int ret = -1; + char fullpath[BR_PATH_MAX_PLUS] = { + 0, + }; + char stub_gfid_path[BR_PATH_MAX_PLUS] = { + 0, + }; + + gf_uuid_copy(priv->bad_object_dir_gfid, BR_BAD_OBJ_CONTAINER); + + if (snprintf(fullpath, sizeof(fullpath), "%s", priv->stub_basepath) >= + sizeof(fullpath)) + goto out; + + if (snprintf(stub_gfid_path, sizeof(stub_gfid_path), "%s/stub-%s", + priv->stub_basepath, uuid_utoa(priv->bad_object_dir_gfid)) >= + sizeof(stub_gfid_path)) + goto out; + + ret = br_stub_check_stub_directory(this, fullpath); + if (ret) + goto out; + ret = br_stub_check_stub_file(this, stub_gfid_path); + if (ret) + goto out; + + return 0; + +out: + return -1; +} + +call_stub_t * +__br_stub_dequeue(struct list_head *callstubs) +{ + call_stub_t *stub = NULL; + + if (!list_empty(callstubs)) { + stub = list_entry(callstubs->next, call_stub_t, list); + list_del_init(&stub->list); + } + + return stub; +} + +void +__br_stub_enqueue(struct list_head *callstubs, call_stub_t *stub) +{ + list_add_tail(&stub->list, callstubs); +} + +void +br_stub_worker_enqueue(xlator_t *this, call_stub_t *stub) +{ + br_stub_private_t *priv = NULL; + + priv = this->private; + pthread_mutex_lock(&priv->container.bad_lock); + { + __br_stub_enqueue(&priv->container.bad_queue, stub); + pthread_cond_signal(&priv->container.bad_cond); + } + pthread_mutex_unlock(&priv->container.bad_lock); +} + +void * +br_stub_worker(void *data) +{ + br_stub_private_t *priv = NULL; + xlator_t *this = NULL; + call_stub_t *stub = NULL; + + THIS = data; + this = data; + priv = this->private; + + for (;;) { + pthread_mutex_lock(&priv->container.bad_lock); + { + while (list_empty(&priv->container.bad_queue)) { + (void)pthread_cond_wait(&priv->container.bad_cond, + &priv->container.bad_lock); + } + + stub = __br_stub_dequeue(&priv->container.bad_queue); + } + pthread_mutex_unlock(&priv->container.bad_lock); + + if (stub) /* guard against spurious wakeups */ + call_resume(stub); + } + + return NULL; +} + +int32_t +br_stub_lookup_wrapper(call_frame_t *frame, xlator_t *this, loc_t *loc, + dict_t *xattr_req) +{ + br_stub_private_t *priv = NULL; + struct stat lstatbuf = {0}; + int ret = 0; + int32_t op_errno = EINVAL; + int32_t op_ret = -1; + struct iatt stbuf = { + 0, + }; + struct iatt postparent = { + 0, + }; + dict_t *xattr = NULL; + gf_boolean_t ver_enabled = _gf_false; + + BR_STUB_VER_ENABLED_IN_CALLPATH(frame, ver_enabled); + priv = this->private; + BR_STUB_VER_COND_GOTO(priv, (!ver_enabled), done); + + VALIDATE_OR_GOTO(loc, done); + if (gf_uuid_compare(loc->gfid, priv->bad_object_dir_gfid)) + goto done; + + ret = sys_lstat(priv->stub_basepath, &lstatbuf); + if (ret) { + gf_msg_debug(this->name, errno, + "Stat failed on stub bad " + "object dir"); + op_errno = errno; + goto done; + } else if (!S_ISDIR(lstatbuf.st_mode)) { + gf_msg_debug(this->name, errno, + "bad object container is not " + "a directory"); + op_errno = ENOTDIR; + goto done; + } + + iatt_from_stat(&stbuf, &lstatbuf); + gf_uuid_copy(stbuf.ia_gfid, priv->bad_object_dir_gfid); + + op_ret = op_errno = 0; + xattr = dict_new(); + if (!xattr) { + op_ret = -1; + op_errno = ENOMEM; + } + +done: + STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, loc->inode, &stbuf, + xattr, &postparent); + if (xattr) + dict_unref(xattr); + return 0; +} + +static int +is_bad_gfid_file_current(char *filename, uuid_t gfid) +{ + char current_stub_gfid[GF_UUID_BUF_SIZE + 16] = { + 0, + }; + + snprintf(current_stub_gfid, sizeof current_stub_gfid, "stub-%s", + uuid_utoa(gfid)); + return (!strcmp(filename, current_stub_gfid)); +} + +static void +check_delete_stale_bad_file(xlator_t *this, char *filename) +{ + int ret = 0; + struct stat st = {0}; + char filepath[BR_PATH_MAX_PLUS] = {0}; + br_stub_private_t *priv = NULL; + + priv = this->private; + + if (is_bad_gfid_file_current(filename, priv->bad_object_dir_gfid)) + return; + + snprintf(filepath, sizeof(filepath), "%s/%s", priv->stub_basepath, + filename); + + ret = sys_stat(filepath, &st); + if (!ret && st.st_nlink == 1) + sys_unlink(filepath); +} + +static int +br_stub_fill_readdir(fd_t *fd, br_stub_fd_t *fctx, DIR *dir, off_t off, + size_t size, gf_dirent_t *entries) +{ + off_t in_case = -1; + off_t last_off = 0; + size_t filled = 0; + int count = 0; + int32_t this_size = -1; + gf_dirent_t *this_entry = NULL; + xlator_t *this = NULL; + struct dirent *entry = NULL; + struct dirent scratch[2] = { + { + 0, + }, + }; + + this = THIS; + if (!off) { + rewinddir(dir); + } else { + seekdir(dir, off); +#ifndef GF_LINUX_HOST_OS + if ((u_long)telldir(dir) != off && off != fctx->bad_object.dir_eof) { + gf_smsg(THIS->name, GF_LOG_ERROR, 0, + BRS_MSG_BAD_OBJECT_DIR_SEEK_FAIL, "off=(0x%llx)", off, + "dir=%p", dir, NULL); + errno = EINVAL; + count = -1; + goto out; + } +#endif /* GF_LINUX_HOST_OS */ + } + + while (filled <= size) { + in_case = (u_long)telldir(dir); + + if (in_case == -1) { + gf_smsg(THIS->name, GF_LOG_ERROR, 0, + BRS_MSG_BAD_OBJECT_DIR_TELL_FAIL, "dir=%p", dir, "err=%s", + strerror(errno), NULL); + goto out; + } + + errno = 0; + entry = sys_readdir(dir, scratch); + if (!entry || errno != 0) { + if (errno == EBADF) { + gf_smsg(THIS->name, GF_LOG_WARNING, 0, + BRS_MSG_BAD_OBJECT_DIR_READ_FAIL, "dir=%p", dir, + "err=%s", strerror(errno), NULL); + goto out; + } + break; + } + + if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) + continue; + + if (!strncmp(entry->d_name, "stub-", strlen("stub-"))) { + check_delete_stale_bad_file(this, entry->d_name); + continue; + } + + this_size = max(sizeof(gf_dirent_t), sizeof(gfs3_dirplist)) + + strlen(entry->d_name) + 1; + + if (this_size + filled > size) { + seekdir(dir, in_case); +#ifndef GF_LINUX_HOST_OS + if ((u_long)telldir(dir) != in_case && + in_case != fctx->bad_object.dir_eof) { + gf_smsg(THIS->name, GF_LOG_ERROR, 0, + BRS_MSG_BAD_OBJECT_DIR_SEEK_FAIL, "in_case=(0x%llx)", + in_case, "dir=%p", dir, NULL); + errno = EINVAL; + count = -1; + goto out; + } +#endif /* GF_LINUX_HOST_OS */ + break; + } + + this_entry = gf_dirent_for_name(entry->d_name); + + if (!this_entry) { + gf_smsg(THIS->name, GF_LOG_ERROR, 0, + BRS_MSG_CREATE_GF_DIRENT_FAILED, "entry-name=%s", + entry->d_name, "err=%s", strerror(errno), NULL); + goto out; + } + /* + * we store the offset of next entry here, which is + * probably not intended, but code using syncop_readdir() + * (glfs-heal.c, afr-self-heald.c, pump.c) rely on it + * for directory read resumption. + */ + last_off = (u_long)telldir(dir); + this_entry->d_off = last_off; + this_entry->d_ino = entry->d_ino; + + list_add_tail(&this_entry->list, &entries->list); + + filled += this_size; + count++; + } + + if ((!sys_readdir(dir, scratch) && (errno == 0))) { + /* Indicate EOF */ + errno = ENOENT; + /* Remember EOF offset for later detection */ + fctx->bad_object.dir_eof = last_off; + } +out: + return count; +} + +int32_t +br_stub_readdir_wrapper(call_frame_t *frame, xlator_t *this, fd_t *fd, + size_t size, off_t off, dict_t *xdata) +{ + br_stub_fd_t *fctx = NULL; + DIR *dir = NULL; + int ret = -1; + int32_t op_ret = -1; + int32_t op_errno = 0; + int count = 0; + gf_dirent_t entries; + gf_boolean_t xdata_unref = _gf_false; + dict_t *dict = NULL; + + INIT_LIST_HEAD(&entries.list); + + fctx = br_stub_fd_ctx_get(this, fd); + if (!fctx) { + gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_GET_FD_CONTEXT_FAILED, + "fd=%p", fd, NULL); + op_errno = -ret; + goto done; + } + + dir = fctx->bad_object.dir; + + if (!dir) { + gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_BAD_HANDLE_DIR_NULL, + "fd=%p", fd, NULL); + op_errno = EINVAL; + goto done; + } + + count = br_stub_fill_readdir(fd, fctx, dir, off, size, &entries); + + /* pick ENOENT to indicate EOF */ + op_errno = errno; + op_ret = count; + + dict = xdata; + (void)br_stub_bad_objects_path(this, fd, &entries, &dict); + if (!xdata && dict) { + xdata = dict; + xdata_unref = _gf_true; + } + +done: + STACK_UNWIND_STRICT(readdir, frame, op_ret, op_errno, &entries, xdata); + gf_dirent_free(&entries); + if (xdata_unref) + dict_unref(xdata); + return 0; +} + +/** + * This function is called to mainly obtain the paths of the corrupt + * objects (files as of now). Currently scrub status prints only the + * gfid of the corrupted files. Reason is, bitrot-stub maintains the + * list of the corrupted objects as entries inside the quarantine + * directory (<brick export>/.glusterfs/quarantine) + * + * And the name of each entry in the qurantine directory is the gfid + * of the corrupted object. So scrub status will just show that info. + * But it helps the users a lot if the actual path to the object is + * also reported. Hence the below function to get that information. + * The function allocates a new dict to be returned (if it does not + * get one from the caller of readdir i.e. scrubber as of now), and + * stores the paths of each corrupted gfid there. The gfid is used as + * the key and path is used as the value. + * + * NOTE: The path will be there in following situations + * 1) gfid2path option has been enabled (posix xlator option) + * and the corrupted file contains the path as an extended + * attribute. + * 2) If the gfid2path option is not enabled, OR if the xattr + * is absent, then the inode table should have it. + * The path will be there if a name based lookup has happened + * on the file which has been corrupted. With lookup a inode and + * dentry would be created in the inode table. And the path is + * constructed using the in memory inode and dentry. If a lookup + * has not happened OR the inode corresponding to the corrupted + * file does not exist in the inode table (because it got purged + * as lru limit of the inodes exceeded) OR a nameless lookup had + * happened to populate the inode in the inode table, then the + * path will not be printed in scrub and only the gfid will be there. + **/ +int +br_stub_bad_objects_path(xlator_t *this, fd_t *fd, gf_dirent_t *entries, + dict_t **dict) +{ + gf_dirent_t *entry = NULL; + inode_t *inode = NULL; + char *hpath = NULL; + uuid_t gfid = {0}; + int ret = -1; + dict_t *tmp_dict = NULL; + char str_gfid[64] = {0}; + + if (list_empty(&entries->list)) + return 0; + + tmp_dict = *dict; + + if (!tmp_dict) { + tmp_dict = dict_new(); + /* + * If the allocation of dict fails then no need treat it + * it as a error. This path (or function) is executed when + * "gluster volume bitrot <volume name> scrub status" is + * executed, to get the list of the corrupted objects. + * And the motive of this function is to get the paths of + * the corrupted objects. If the dict allocation fails, then + * the scrub status will only show the gfids of those corrupted + * objects (which is the behavior as of the time of this patch + * being worked upon). So just return and only the gfids will + * be shown. + */ + if (!tmp_dict) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ALLOC_FAILED, NULL); + goto out; + } + } + + list_for_each_entry(entry, &entries->list, list) + { + gf_uuid_clear(gfid); + gf_uuid_parse(entry->d_name, gfid); + + inode = inode_find(fd->inode->table, gfid); + + /* No need to check the return value here. + * Because @hpath is examined. + */ + (void)br_stub_get_path_of_gfid(this, fd->inode, inode, gfid, &hpath); + + if (hpath) { + gf_msg_debug(this->name, 0, + "path of the corrupted " + "object (gfid: %s) is %s", + uuid_utoa(gfid), hpath); + br_stub_entry_xattr_fill(this, hpath, entry, tmp_dict); + } else + gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_PATH_GET_FAILED, + "gfid=%s", uuid_utoa_r(gfid, str_gfid), NULL); + + inode = NULL; + hpath = NULL; + } + + ret = 0; + *dict = tmp_dict; + +out: + return ret; +} + +int +br_stub_get_path_of_gfid(xlator_t *this, inode_t *parent, inode_t *inode, + uuid_t gfid, char **path) +{ + int32_t ret = -1; + char gfid_str[64] = {0}; + + GF_VALIDATE_OR_GOTO("bitrot-stub", this, out); + GF_VALIDATE_OR_GOTO(this->name, parent, out); + GF_VALIDATE_OR_GOTO(this->name, path, out); + + /* Above, No need to validate the @inode for hard resolution. Because + * inode can be NULL and if it is NULL, then syncop_gfid_to_path_hard + * will allocate a new inode and proceed. So no need to bother about + * @inode. Because we need it only to send a syncop_getxattr call + * from inside syncop_gfid_to_path_hard. And getxattr fetches the + * path from the backend. + */ + + ret = syncop_gfid_to_path_hard(parent->table, FIRST_CHILD(this), gfid, + inode, path, _gf_true); + if (ret < 0) + gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_PATH_GET_FAILED, + "gfid=%s", uuid_utoa_r(gfid, gfid_str), NULL); + + /* + * Try with soft resolution of path if hard resolve fails. Because + * checking the xattr on disk to get the path of a inode (or gfid) + * is dependent on whether that option is enabled in the posix + * xlator or not. If it is not enabled, then hard resolution by + * checking the on disk xattr fails. + * + * Thus in such situations fall back to the soft resolution which + * mainly depends on the inode_path() function. And for using + * inode_path, @inode has to be linked i.e. a successful lookup should + * have happened on the gfid (or the path) to link the inode to the + * inode table. And if @inode is NULL, means, the inode has not been + * found in the inode table and better not to do inode_path() on the + * inode which has not been linked. + */ + if (ret < 0 && inode) { + ret = syncop_gfid_to_path_hard(parent->table, FIRST_CHILD(this), gfid, + inode, path, _gf_false); + if (ret < 0) + gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_PATH_GET_FAILED, + "from-memory gfid=%s", uuid_utoa_r(gfid, gfid_str), NULL); + } + +out: + return ret; +} + +/** + * NOTE: If the file has multiple hardlinks (in gluster volume + * namespace), the path would be one of the hardlinks. Its up to + * the user to find the remaining hardlinks (using find -samefile) + * and remove them. + **/ +void +br_stub_entry_xattr_fill(xlator_t *this, char *hpath, gf_dirent_t *entry, + dict_t *dict) +{ + int32_t ret = -1; + + GF_VALIDATE_OR_GOTO("bit-rot-stub", this, out); + GF_VALIDATE_OR_GOTO(this->name, hpath, out); + + /* + * Use the entry->d_name (which is nothing but the gfid of the + * corrupted object) as the key. And the value will be the actual + * path of that object (or file). + * + * ALso ignore the dict_set errors. scrubber will get the gfid of + * the corrupted object for sure. So, for now lets just log the + * dict_set_dynstr failure and move on. + */ + + ret = dict_set_dynstr(dict, entry->d_name, hpath); + if (ret) + gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_DICT_SET_FAILED, + "path=%s", hpath, "object-name=%s", entry->d_name, NULL); +out: + return; +} diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h index f70fafbca49..9d93caf069f 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h +++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h @@ -11,28 +11,26 @@ #ifndef _BR_MEM_TYPES_H #define _BR_MEM_TYPES_H -#include "mem-types.h" +#include <glusterfs/mem-types.h> enum br_mem_types { - gf_br_stub_mt_private_t = gf_common_mt_end + 1, - gf_br_stub_mt_version_t, - gf_br_stub_mt_inode_ctx_t, - gf_br_stub_mt_signature_t, - gf_br_mt_br_private_t, - gf_br_mt_br_child_t, - gf_br_mt_br_object_t, - gf_br_mt_br_ob_n_wk_t, - gf_br_mt_br_tbf_t, - gf_br_mt_br_tbf_bucket_t, - gf_br_mt_br_tbf_throttle_t, - gf_br_mt_br_tbf_opspec_t, - gf_br_mt_br_scrubber_t, - gf_br_mt_br_fsscan_entry_t, - gf_br_stub_mt_br_stub_fd_t, - gf_br_stub_mt_br_scanner_freq_t, - gf_br_stub_mt_sigstub_t, - gf_br_mt_br_child_event_t, - gf_br_stub_mt_end, + gf_br_stub_mt_private_t = gf_common_mt_end + 1, + gf_br_stub_mt_version_t, + gf_br_stub_mt_inode_ctx_t, + gf_br_stub_mt_signature_t, + gf_br_mt_br_private_t, + gf_br_mt_br_child_t, + gf_br_mt_br_object_t, + gf_br_mt_br_ob_n_wk_t, + gf_br_mt_br_scrubber_t, + gf_br_mt_br_fsscan_entry_t, + gf_br_stub_mt_br_stub_fd_t, + gf_br_stub_mt_br_scanner_freq_t, + gf_br_stub_mt_sigstub_t, + gf_br_mt_br_child_event_t, + gf_br_stub_mt_misc, + gf_br_mt_br_worker_t, + gf_br_stub_mt_end, }; #endif diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h index 532c2beb5c1..6c15a166f18 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h +++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h @@ -11,184 +11,107 @@ #ifndef _BITROT_STUB_MESSAGES_H_ #define _BITROT_STUB_MESSAGES_H_ -#include "glfs-message-id.h" +#include <glusterfs/glfs-message-id.h> -/* file bit-rot-stub-messages.h - * brief BIT-ROT log-message IDs and their descriptions - */ - -/* NOTE: Rules for message additions - * 1) Each instance of a message is _better_ left with a unique message ID, even - * if the message format is the same. Reasoning is that, if the message - * format needs to change in one instance, the other instances are not - * impacted or the new change does not change the ID of the instance being - * modified. - * 2) Addition of a message, - * - Should increment the GLFS_NUM_MESSAGES - * - Append to the list of messages defined, towards the end - * - Retain macro naming as glfs_msg_X (for redability across developers) - * NOTE: Rules for message format modifications - * 3) Check acorss the code if the message ID macro in question is reused - * anywhere. If reused then then the modifications should ensure correctness - * everywhere, or needs a new message ID as (1) above was not adhered to. If - * not used anywhere, proceed with the required modification. - * NOTE: Rules for message deletion - * 4) Check (3) and if used anywhere else, then cannot be deleted. If not used - * anywhere, then can be deleted, but will leave a hole by design, as - * addition rules specify modification to the end of the list and not filling - * holes. - */ - -#define GLFS_BITROT_STUB_BASE GLFS_MSGID_COMP_BITROT_STUB -#define GLFS_BITROT_STUB_NUM_MESSAGES 15 -#define GLFS_MSGID_END (GLFS_BITROT_STUB_BASE + \ - GLFS_BITROT_STUB_NUM_MESSAGES + 1) -/* Messaged with message IDs */ -#define glfs_msg_start_x GLFS_BITROT_STUB_BASE, "Invalid: Start of messages" -/*------------*/ - - -#define BRS_MSG_NO_MEMORY (GLFS_BITROT_STUB_BASE + 1) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRS_MSG_SET_EVENT_FAILED (GLFS_BITROT_STUB_BASE + 2) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRS_MSG_MEM_ACNT_FAILED (GLFS_BITROT_STUB_BASE + 3) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRS_MSG_CREATE_FRAME_FAILED (GLFS_BITROT_STUB_BASE + 4) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRS_MSG_SET_CONTEXT_FAILED (GLFS_BITROT_STUB_BASE + 5) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRS_MSG_CHANGE_VERSION_FAILED (GLFS_BITROT_STUB_BASE + 6) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRS_MSG_ADD_FD_TO_LIST_FAILED (GLFS_BITROT_STUB_BASE + 7) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRS_MSG_SET_FD_CONTEXT_FAILED (GLFS_BITROT_STUB_BASE + 8) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRS_MSG_CREATE_ANONYMOUS_FD_FAILED (GLFS_BITROT_STUB_BASE + 9) -/*! - * @messageid - * @diagnosis - * @recommendedaction +/* To add new message IDs, append new identifiers at the end of the list. * - */ -#define BRS_MSG_NO_CHILD (GLFS_BITROT_STUB_BASE + 10) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRS_MSG_STUB_ALLOC_FAILED (GLFS_BITROT_STUB_BASE + 11) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRS_MSG_GET_INODE_CONTEXT_FAILED (GLFS_BITROT_STUB_BASE + 12) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRS_MSG_CANCEL_SIGN_THREAD_FAILED (GLFS_BITROT_STUB_BASE + 13) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRS_MSG_ADD_FD_TO_INODE (GLFS_BITROT_STUB_BASE + 14) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRS_MSG_SIGN_VERSION_ERROR (GLFS_BITROT_STUB_BASE + 15) -/*! - * @messageid - * @diagnosis - * @recommendedaction + * Never remove a message ID. If it's not used anymore, you can rename it or + * leave it as it is, but not delete it. This is to prevent reutilization of + * IDs by other messages. * + * The component name must match one of the entries defined in + * glfs-message-id.h. */ -#define BRS_MSG_BAD_OBJ_MARK_FAIL (GLFS_BITROT_STUB_BASE + 16) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRS_MSG_NON_SCRUB_BAD_OBJ_MARK (GLFS_BITROT_STUB_BASE + 17) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRS_MSG_REMOVE_INTERNAL_XATTR (GLFS_BITROT_STUB_BASE + 18) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRS_MSG_SET_INTERNAL_XATTR (GLFS_BITROT_STUB_BASE + 19) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -#define BRS_MSG_BAD_OBJECT_ACCESS (GLFS_BITROT_STUB_BASE + 20) -/*! - * @messageid - * @diagnosis - * @recommendedaction - * - */ -/*------------*/ -#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages" +GLFS_MSGID(BITROT_STUB, BRS_MSG_NO_MEMORY, BRS_MSG_SET_EVENT_FAILED, + BRS_MSG_MEM_ACNT_FAILED, BRS_MSG_CREATE_FRAME_FAILED, + BRS_MSG_SET_CONTEXT_FAILED, BRS_MSG_CHANGE_VERSION_FAILED, + BRS_MSG_ADD_FD_TO_LIST_FAILED, BRS_MSG_SET_FD_CONTEXT_FAILED, + BRS_MSG_CREATE_ANONYMOUS_FD_FAILED, BRS_MSG_NO_CHILD, + BRS_MSG_STUB_ALLOC_FAILED, BRS_MSG_GET_INODE_CONTEXT_FAILED, + BRS_MSG_CANCEL_SIGN_THREAD_FAILED, BRS_MSG_ADD_FD_TO_INODE, + BRS_MSG_SIGN_VERSION_ERROR, BRS_MSG_BAD_OBJ_MARK_FAIL, + BRS_MSG_NON_SCRUB_BAD_OBJ_MARK, BRS_MSG_REMOVE_INTERNAL_XATTR, + BRS_MSG_SET_INTERNAL_XATTR, BRS_MSG_BAD_OBJECT_ACCESS, + BRS_MSG_BAD_CONTAINER_FAIL, BRS_MSG_BAD_OBJECT_DIR_FAIL, + BRS_MSG_BAD_OBJECT_DIR_SEEK_FAIL, BRS_MSG_BAD_OBJECT_DIR_TELL_FAIL, + BRS_MSG_BAD_OBJECT_DIR_READ_FAIL, BRS_MSG_GET_FD_CONTEXT_FAILED, + BRS_MSG_BAD_HANDLE_DIR_NULL, BRS_MSG_BAD_OBJ_THREAD_FAIL, + BRS_MSG_BAD_OBJ_DIR_CLOSE_FAIL, BRS_MSG_LINK_FAIL, + BRS_MSG_BAD_OBJ_UNLINK_FAIL, BRS_MSG_DICT_SET_FAILED, + BRS_MSG_PATH_GET_FAILED, BRS_MSG_NULL_LOCAL, + BRS_MSG_SPAWN_SIGN_THRD_FAILED, BRS_MSG_KILL_SIGN_THREAD, + BRS_MSG_NON_BITD_PID, BRS_MSG_SIGN_PREPARE_FAIL, + BRS_MSG_USING_DEFAULT_THREAD_SIZE, BRS_MSG_ALLOC_MEM_FAILED, + BRS_MSG_DICT_ALLOC_FAILED, BRS_MSG_CREATE_GF_DIRENT_FAILED, + BRS_MSG_ALLOC_FAILED, BRS_MSG_PATH_XATTR_GET_FAILED, + BRS_MSG_VERSION_PREPARE_FAIL); + +#define BRS_MSG_MEM_ACNT_FAILED_STR "Memory accounting init failed" +#define BRS_MSG_BAD_OBJ_THREAD_FAIL_STR "pthread_init failed" +#define BRS_MSG_USING_DEFAULT_THREAD_SIZE_STR "Using default thread stack size" +#define BRS_MSG_NO_CHILD_STR "FATAL: no children" +#define BRS_MSG_SPAWN_SIGN_THRD_FAILED_STR \ + "failed to create the new thread for signer" +#define BRS_MSG_BAD_CONTAINER_FAIL_STR \ + "failed to launch the thread for storing bad gfids" +#define BRS_MSG_CANCEL_SIGN_THREAD_FAILED_STR \ + "Could not cancel sign serializer thread" +#define BRS_MSG_KILL_SIGN_THREAD_STR "killed the signer thread" +#define BRS_MSG_GET_INODE_CONTEXT_FAILED_STR \ + "failed to init the inode context for the inode" +#define BRS_MSG_ADD_FD_TO_INODE_STR "failed to add fd to the inode" +#define BRS_MSG_NO_MEMORY_STR "local allocation failed" +#define BRS_MSG_BAD_OBJECT_ACCESS_STR "bad object accessed. Returning" +#define BRS_MSG_SIGN_VERSION_ERROR_STR "Signing version exceeds current version" +#define BRS_MSG_NON_BITD_PID_STR \ + "PID from where signature request came, does not belong to bit-rot " \ + "daemon. Unwinding the fop" +#define BRS_MSG_SIGN_PREPARE_FAIL_STR \ + "failed to prepare the signature. Unwinding the fop" +#define BRS_MSG_VERSION_PREPARE_FAIL_STR \ + "failed to prepare the version. Unwinding the fop" +#define BRS_MSG_STUB_ALLOC_FAILED_STR "failed to allocate stub fop, Unwinding" +#define BRS_MSG_BAD_OBJ_MARK_FAIL_STR "failed to mark object as bad" +#define BRS_MSG_NON_SCRUB_BAD_OBJ_MARK_STR \ + "bad object marking is not from the scrubber" +#define BRS_MSG_ALLOC_MEM_FAILED_STR "failed to allocate memory" +#define BRS_MSG_SET_INTERNAL_XATTR_STR "called on the internal xattr" +#define BRS_MSG_REMOVE_INTERNAL_XATTR_STR "removexattr called on internal xattr" +#define BRS_MSG_CREATE_ANONYMOUS_FD_FAILED_STR \ + "failed to create anonymous fd for the inode" +#define BRS_MSG_ADD_FD_TO_LIST_FAILED_STR "failed add fd to the list" +#define BRS_MSG_SET_FD_CONTEXT_FAILED_STR \ + "failed to set the fd context for the file" +#define BRS_MSG_NULL_LOCAL_STR "local is NULL" +#define BRS_MSG_DICT_ALLOC_FAILED_STR \ + "dict allocation failed: cannot send IPC FOP to changelog" +#define BRS_MSG_SET_EVENT_FAILED_STR "cannot set release event in dict" +#define BRS_MSG_CREATE_FRAME_FAILED_STR "create_frame() failure" +#define BRS_MSG_BAD_OBJ_DIR_CLOSE_FAIL_STR "closedir error" +#define BRS_MSG_LINK_FAIL_STR "failed to record gfid" +#define BRS_MSG_BAD_OBJ_UNLINK_FAIL_STR \ + "failed to delete bad object link from quaratine directory" +#define BRS_MSG_BAD_OBJECT_DIR_FAIL_STR "failed stub directory" +#define BRS_MSG_BAD_OBJECT_DIR_SEEK_FAIL_STR \ + "seekdir failed. Invalid argument (offset reused from another DIR * " \ + "structure)" +#define BRS_MSG_BAD_OBJECT_DIR_TELL_FAIL_STR "telldir failed on dir" +#define BRS_MSG_BAD_OBJECT_DIR_READ_FAIL_STR "readdir failed on dir" +#define BRS_MSG_CREATE_GF_DIRENT_FAILED_STR "could not create gf_dirent" +#define BRS_MSG_GET_FD_CONTEXT_FAILED_STR "pfd is NULL" +#define BRS_MSG_BAD_HANDLE_DIR_NULL_STR "dir if NULL" +#define BRS_MSG_ALLOC_FAILED_STR \ + "failed to allocate new dict for saving the paths of the corrupted " \ + "objects. Scrub status will only display the gfid" +#define BRS_MSG_PATH_GET_FAILED_STR "failed to get the path" +#define BRS_MSG_PATH_XATTR_GET_FAILED_STR \ + "failed to get the path xattr from disk for the gfid. Trying to get path " \ + "from the memory" +#define BRS_MSG_DICT_SET_FAILED_STR \ + "failed to set the actual path as the value in the dict for the " \ + "corrupted object" +#define BRS_MSG_SET_CONTEXT_FAILED_STR \ + "could not set fd context for release callback" +#define BRS_MSG_CHANGE_VERSION_FAILED_STR "change version failed" #endif /* !_BITROT_STUB_MESSAGES_H_ */ diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c index 85fad6925c1..447dd47ff41 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c +++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c @@ -10,218 +10,433 @@ #include <ctype.h> #include <sys/uio.h> +#include <signal.h> -#include "glusterfs.h" -#include "xlator.h" -#include "logging.h" +#include <glusterfs/glusterfs.h> +#include <glusterfs/logging.h> #include "changelog.h" -#include "compat-errno.h" -#include "call-stub.h" +#include <glusterfs/compat-errno.h> +#include <glusterfs/call-stub.h> #include "bit-rot-stub.h" #include "bit-rot-stub-mem-types.h" #include "bit-rot-stub-messages.h" #include "bit-rot-common.h" -#define BR_STUB_REQUEST_COOKIE 0x1 +#define BR_STUB_REQUEST_COOKIE 0x1 -void *br_stub_signth (void *); +void +br_stub_lock_cleaner(void *arg) +{ + pthread_mutex_t *clean_mutex = arg; + + pthread_mutex_unlock(clean_mutex); + return; +} + +void * +br_stub_signth(void *); struct br_stub_signentry { - unsigned long v; + unsigned long v; - call_stub_t *stub; + call_stub_t *stub; - struct list_head list; + struct list_head list; }; int32_t -mem_acct_init (xlator_t *this) +mem_acct_init(xlator_t *this) { - int32_t ret = -1; - - if (!this) - return ret; + int32_t ret = -1; - ret = xlator_mem_acct_init (this, gf_br_stub_mt_end + 1); + if (!this) + return ret; - if (ret != 0) { - gf_msg (this->name, GF_LOG_WARNING, 0, BRS_MSG_MEM_ACNT_FAILED, - "Memory accounting init failed"); - return ret; - } + ret = xlator_mem_acct_init(this, gf_br_stub_mt_end + 1); + if (ret != 0) { + gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_MEM_ACNT_FAILED, NULL); return ret; + } + + return ret; +} + +int +br_stub_bad_object_container_init(xlator_t *this, br_stub_private_t *priv) +{ + pthread_attr_t w_attr; + int ret = -1; + + ret = pthread_cond_init(&priv->container.bad_cond, NULL); + if (ret != 0) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJ_THREAD_FAIL, + "cond_init ret=%d", ret, NULL); + goto out; + } + + ret = pthread_mutex_init(&priv->container.bad_lock, NULL); + if (ret != 0) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJ_THREAD_FAIL, + "mutex_init ret=%d", ret, NULL); + goto cleanup_cond; + } + + ret = pthread_attr_init(&w_attr); + if (ret != 0) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJ_THREAD_FAIL, + "attr_init ret=%d", ret, NULL); + goto cleanup_lock; + } + + ret = pthread_attr_setstacksize(&w_attr, BAD_OBJECT_THREAD_STACK_SIZE); + if (ret == EINVAL) { + gf_smsg(this->name, GF_LOG_WARNING, 0, + BRS_MSG_USING_DEFAULT_THREAD_SIZE, NULL); + } + + INIT_LIST_HEAD(&priv->container.bad_queue); + ret = br_stub_dir_create(this, priv); + if (ret < 0) + goto cleanup_lock; + + ret = gf_thread_create(&priv->container.thread, &w_attr, br_stub_worker, + this, "brswrker"); + if (ret) + goto cleanup_attr; + + return 0; + +cleanup_attr: + pthread_attr_destroy(&w_attr); +cleanup_lock: + pthread_mutex_destroy(&priv->container.bad_lock); +cleanup_cond: + pthread_cond_destroy(&priv->container.bad_cond); +out: + return -1; } int32_t -init (xlator_t *this) +init(xlator_t *this) { - int32_t ret = 0; - char *tmp = NULL; - struct timeval tv = {0,}; - br_stub_private_t *priv = NULL; + int ret = 0; + char *tmp = NULL; + struct timeval tv = { + 0, + }; + br_stub_private_t *priv = NULL; - if (!this->children) { - gf_msg (this->name, GF_LOG_ERROR, 0, BRS_MSG_NO_CHILD, - "FATAL: no children"); - goto error_return; - } + if (!this->children) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_NO_CHILD, NULL); + goto error_return; + } - priv = GF_CALLOC (1, sizeof (*priv), gf_br_stub_mt_private_t); - if (!priv) - goto error_return; + priv = GF_CALLOC(1, sizeof(*priv), gf_br_stub_mt_private_t); + if (!priv) + goto error_return; - priv->local_pool = mem_pool_new (br_stub_local_t, 512); - if (!priv->local_pool) - goto free_priv; + priv->local_pool = mem_pool_new(br_stub_local_t, 512); + if (!priv->local_pool) + goto free_priv; - GF_OPTION_INIT ("bitrot", priv->go, bool, free_mempool); + GF_OPTION_INIT("bitrot", priv->do_versioning, bool, free_mempool); - GF_OPTION_INIT ("export", tmp, str, free_mempool); - memcpy (priv->export, tmp, strlen (tmp) + 1); + GF_OPTION_INIT("export", tmp, str, free_mempool); - (void) gettimeofday (&tv, NULL); + if (snprintf(priv->export, PATH_MAX, "%s", tmp) >= PATH_MAX) + goto free_mempool; - /* boot time is in network endian format */ - priv->boot[0] = htonl (tv.tv_sec); - priv->boot[1] = htonl (tv.tv_usec); + if (snprintf(priv->stub_basepath, sizeof(priv->stub_basepath), "%s/%s", + priv->export, + BR_STUB_QUARANTINE_DIR) >= sizeof(priv->stub_basepath)) + goto free_mempool; - pthread_mutex_init (&priv->lock, NULL); - pthread_cond_init (&priv->cond, NULL); - INIT_LIST_HEAD (&priv->squeue); + (void)gettimeofday(&tv, NULL); - ret = gf_thread_create (&priv->signth, NULL, br_stub_signth, priv); - if (ret != 0) - goto cleanup_lock; + /* boot time is in network endian format */ + priv->boot[0] = htonl(tv.tv_sec); + priv->boot[1] = htonl(tv.tv_usec); - gf_msg_debug (this->name, 0, "bit-rot stub loaded"); - this->private = priv; + pthread_mutex_init(&priv->lock, NULL); + pthread_cond_init(&priv->cond, NULL); + INIT_LIST_HEAD(&priv->squeue); + /* Thread creations need 'this' to be passed so that THIS can be + * assigned inside the thread. So setting this->private here. + */ + this->private = priv; + if (!priv->do_versioning) return 0; - cleanup_lock: - pthread_cond_destroy (&priv->cond); - pthread_mutex_destroy (&priv->lock); - free_mempool: - mem_pool_destroy (priv->local_pool); - free_priv: - GF_FREE (priv); - error_return: - return -1; + ret = gf_thread_create(&priv->signth, NULL, br_stub_signth, this, + "brssign"); + if (ret != 0) { + gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SPAWN_SIGN_THRD_FAILED, + NULL); + goto cleanup_lock; + } + + ret = br_stub_bad_object_container_init(this, priv); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_CONTAINER_FAIL, NULL); + goto cleanup_lock; + } + + gf_msg_debug(this->name, 0, "bit-rot stub loaded"); + + return 0; + +cleanup_lock: + pthread_cond_destroy(&priv->cond); + pthread_mutex_destroy(&priv->lock); +free_mempool: + mem_pool_destroy(priv->local_pool); + priv->local_pool = NULL; +free_priv: + GF_FREE(priv); + this->private = NULL; +error_return: + return -1; +} + +/* TODO: + * As of now enabling bitrot option does 2 things. + * 1) Start the Bitrot Daemon which signs the objects (currently files only) + * upon getting notified by the stub. + * 2) Enable versioning of the objects. Object versions (again files only) are + * incremented upon modification. + * So object versioning is tied to bitrot daemon's signing. In future, object + * versioning might be necessary for other things as well apart from bit-rot + * detection (well that's the objective of bringing in object-versioning :)). + * In that case, better to make versioning a new option and letting it to be + * enabled despite bit-rot detection is not needed. + * Ex: ICAP. + */ +int32_t +reconfigure(xlator_t *this, dict_t *options) +{ + int32_t ret = -1; + br_stub_private_t *priv = NULL; + + priv = this->private; + + GF_OPTION_RECONF("bitrot", priv->do_versioning, options, bool, err); + if (priv->do_versioning && !priv->signth) { + ret = gf_thread_create(&priv->signth, NULL, br_stub_signth, this, + "brssign"); + if (ret != 0) { + gf_smsg(this->name, GF_LOG_WARNING, 0, + BRS_MSG_SPAWN_SIGN_THRD_FAILED, NULL); + goto err; + } + + ret = br_stub_bad_object_container_init(this, priv); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_CONTAINER_FAIL, + NULL); + goto err; + } + } else { + if (priv->signth) { + if (gf_thread_cleanup_xint(priv->signth)) { + gf_smsg(this->name, GF_LOG_ERROR, 0, + BRS_MSG_CANCEL_SIGN_THREAD_FAILED, NULL); + } else { + gf_smsg(this->name, GF_LOG_INFO, 0, BRS_MSG_KILL_SIGN_THREAD, + NULL); + priv->signth = 0; + } + } + + if (priv->container.thread) { + if (gf_thread_cleanup_xint(priv->container.thread)) { + gf_smsg(this->name, GF_LOG_ERROR, 0, + BRS_MSG_CANCEL_SIGN_THREAD_FAILED, NULL); + } + priv->container.thread = 0; + } + } + + ret = 0; + return ret; +err: + if (priv->signth) { + if (gf_thread_cleanup_xint(priv->signth)) { + gf_smsg(this->name, GF_LOG_ERROR, 0, + BRS_MSG_CANCEL_SIGN_THREAD_FAILED, NULL); + } + priv->signth = 0; + } + + if (priv->container.thread) { + if (gf_thread_cleanup_xint(priv->container.thread)) { + gf_smsg(this->name, GF_LOG_ERROR, 0, + BRS_MSG_CANCEL_SIGN_THREAD_FAILED, NULL); + } + priv->container.thread = 0; + } + ret = -1; + return ret; +} + +int +notify(xlator_t *this, int event, void *data, ...) +{ + br_stub_private_t *priv = NULL; + + if (!this) + return 0; + + priv = this->private; + if (!priv) + return 0; + + default_notify(this, event, data); + return 0; } void -fini (xlator_t *this) +fini(xlator_t *this) { - int32_t ret = 0; - br_stub_private_t *priv = this->private; - struct br_stub_signentry *sigstub = NULL; + int32_t ret = 0; + br_stub_private_t *priv = this->private; + struct br_stub_signentry *sigstub = NULL; + call_stub_t *stub = NULL; - if (!priv) - return; + if (!priv) + return; - ret = gf_thread_cleanup_xint (priv->signth); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - BRS_MSG_CANCEL_SIGN_THREAD_FAILED, - "Could not cancel sign serializer thread"); - goto out; - } + if (!priv->do_versioning) + goto cleanup; - while (!list_empty (&priv->squeue)) { - sigstub = list_first_entry (&priv->squeue, - struct br_stub_signentry, list); - list_del_init (&sigstub->list); + ret = gf_thread_cleanup_xint(priv->signth); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_CANCEL_SIGN_THREAD_FAILED, + NULL); + goto out; + } + priv->signth = 0; - call_stub_destroy (sigstub->stub); - GF_FREE (sigstub); - } + while (!list_empty(&priv->squeue)) { + sigstub = list_first_entry(&priv->squeue, struct br_stub_signentry, + list); + list_del_init(&sigstub->list); + + call_stub_destroy(sigstub->stub); + GF_FREE(sigstub); + } - pthread_mutex_destroy (&priv->lock); - pthread_cond_destroy (&priv->cond); + ret = gf_thread_cleanup_xint(priv->container.thread); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_CANCEL_SIGN_THREAD_FAILED, + NULL); + goto out; + } - this->private = NULL; - GF_FREE (priv); + priv->container.thread = 0; - out: - return; + while (!list_empty(&priv->container.bad_queue)) { + stub = list_first_entry(&priv->container.bad_queue, call_stub_t, list); + list_del_init(&stub->list); + call_stub_destroy(stub); + } + + pthread_mutex_destroy(&priv->container.bad_lock); + pthread_cond_destroy(&priv->container.bad_cond); + +cleanup: + pthread_mutex_destroy(&priv->lock); + pthread_cond_destroy(&priv->cond); + + if (priv->local_pool) { + mem_pool_destroy(priv->local_pool); + priv->local_pool = NULL; + } + + this->private = NULL; + GF_FREE(priv); + +out: + return; } static int -br_stub_alloc_versions (br_version_t **obuf, - br_signature_t **sbuf, size_t signaturelen) +br_stub_alloc_versions(br_version_t **obuf, br_signature_t **sbuf, + size_t signaturelen) { - void *mem = NULL; - size_t size = 0; + void *mem = NULL; + size_t size = 0; - if (obuf) - size += sizeof (br_version_t); - if (sbuf) - size += sizeof (br_signature_t) + signaturelen; + if (obuf) + size += sizeof(br_version_t); + if (sbuf) + size += sizeof(br_signature_t) + signaturelen; - mem = GF_CALLOC (1, size, gf_br_stub_mt_version_t); - if (!mem) - goto error_return; + mem = GF_CALLOC(1, size, gf_br_stub_mt_version_t); + if (!mem) + goto error_return; - if (obuf) { - *obuf = (br_version_t *)mem; - mem = ((char *)mem + sizeof (br_version_t)); - } - if (sbuf) { - *sbuf = (br_signature_t *)mem; - } + if (obuf) { + *obuf = (br_version_t *)mem; + mem = ((char *)mem + sizeof(br_version_t)); + } + if (sbuf) { + *sbuf = (br_signature_t *)mem; + } - return 0; + return 0; - error_return: - return -1; +error_return: + return -1; } static void -br_stub_dealloc_versions (void *mem) +br_stub_dealloc_versions(void *mem) { - GF_FREE (mem); + GF_FREE(mem); } static br_stub_local_t * -br_stub_alloc_local (xlator_t *this) +br_stub_alloc_local(xlator_t *this) { - br_stub_private_t *priv = this->private; + br_stub_private_t *priv = this->private; - return mem_get0 (priv->local_pool); + return mem_get0(priv->local_pool); } static void -br_stub_dealloc_local (br_stub_local_t *ptr) +br_stub_dealloc_local(br_stub_local_t *ptr) { - mem_put (ptr); + if (!ptr) + return; + + mem_put(ptr); } static int -br_stub_prepare_version_request (xlator_t *this, dict_t *dict, +br_stub_prepare_version_request(xlator_t *this, dict_t *dict, br_version_t *obuf, unsigned long oversion) { - br_stub_private_t *priv = NULL; + br_stub_private_t *priv = NULL; - priv = this->private; - br_set_ongoingversion (obuf, oversion, priv->boot); + priv = this->private; + br_set_ongoingversion(obuf, oversion, priv->boot); - return dict_set_static_bin (dict, BITROT_CURRENT_VERSION_KEY, - (void *)obuf, sizeof (br_version_t)); + return dict_set_bin(dict, BITROT_CURRENT_VERSION_KEY, (void *)obuf, + sizeof(br_version_t)); } static int -br_stub_prepare_signing_request (dict_t *dict, - br_signature_t *sbuf, - br_isignature_t *sign, size_t signaturelen) +br_stub_prepare_signing_request(dict_t *dict, br_signature_t *sbuf, + br_isignature_t *sign, size_t signaturelen) { - size_t size = 0; + size_t size = 0; - br_set_signature (sbuf, sign, signaturelen, &size); + br_set_signature(sbuf, sign, signaturelen, &size); - return dict_set_static_bin (dict, BITROT_SIGNING_VERSION_KEY, - (void *)sbuf, size); + return dict_set_bin(dict, BITROT_SIGNING_VERSION_KEY, (void *)sbuf, size); } /** @@ -231,224 +446,245 @@ br_stub_prepare_signing_request (dict_t *dict, * initializes the transient inode version. */ static int -br_stub_init_inode_versions (xlator_t *this, fd_t *fd, inode_t *inode, - unsigned long version, gf_boolean_t markdirty, - gf_boolean_t bad_object) -{ - int32_t ret = 0; - br_stub_inode_ctx_t *ctx = NULL; - - ctx = GF_CALLOC (1, sizeof (br_stub_inode_ctx_t), - gf_br_stub_mt_inode_ctx_t); - if (!ctx) - goto error_return; - - INIT_LIST_HEAD (&ctx->fd_list); - (markdirty) ? __br_stub_mark_inode_dirty (ctx) - : __br_stub_mark_inode_synced (ctx); - __br_stub_set_ongoing_version (ctx, version); - - if (bad_object) - __br_stub_mark_object_bad (ctx); - - if (fd) { - ret = br_stub_add_fd_to_inode (this, fd, ctx); - if (ret) - goto free_ctx; - } +br_stub_init_inode_versions(xlator_t *this, fd_t *fd, inode_t *inode, + unsigned long version, gf_boolean_t markdirty, + gf_boolean_t bad_object, uint64_t *ctx_addr) +{ + int32_t ret = 0; + br_stub_inode_ctx_t *ctx = NULL; + + ctx = GF_CALLOC(1, sizeof(br_stub_inode_ctx_t), gf_br_stub_mt_inode_ctx_t); + if (!ctx) + goto error_return; - ret = br_stub_set_inode_ctx (this, inode, ctx); + INIT_LIST_HEAD(&ctx->fd_list); + (markdirty) ? __br_stub_mark_inode_dirty(ctx) + : __br_stub_mark_inode_synced(ctx); + __br_stub_set_ongoing_version(ctx, version); + + if (bad_object) + __br_stub_mark_object_bad(ctx); + + if (fd) { + ret = br_stub_add_fd_to_inode(this, fd, ctx); if (ret) - goto free_ctx; - return 0; + goto free_ctx; + } + + ret = br_stub_set_inode_ctx(this, inode, ctx); + if (ret) + goto free_ctx; + + if (ctx_addr) + *ctx_addr = (uint64_t)(uintptr_t)ctx; + return 0; free_ctx: - GF_FREE (ctx); - error_return: - return -1; + GF_FREE(ctx); +error_return: + return -1; } /** * modify the ongoing version of an inode. */ static int -br_stub_mod_inode_versions (xlator_t *this, - fd_t *fd, inode_t *inode, unsigned long version) +br_stub_mod_inode_versions(xlator_t *this, fd_t *fd, inode_t *inode, + unsigned long version) { - int32_t ret = -1; - br_stub_inode_ctx_t *ctx = 0; + int32_t ret = -1; + br_stub_inode_ctx_t *ctx = 0; - LOCK (&inode->lock); - { - ctx = __br_stub_get_ongoing_version_ctx (this, inode, NULL); - if (ctx == NULL) - goto unblock; - if (__br_stub_is_inode_dirty (ctx)) { - __br_stub_set_ongoing_version (ctx, version); - __br_stub_mark_inode_synced (ctx); - } - - ret = 0; + LOCK(&inode->lock); + { + ctx = __br_stub_get_ongoing_version_ctx(this, inode, NULL); + if (ctx == NULL) + goto unblock; + if (__br_stub_is_inode_dirty(ctx)) { + __br_stub_set_ongoing_version(ctx, version); + __br_stub_mark_inode_synced(ctx); } + + ret = 0; + } unblock: - UNLOCK (&inode->lock); + UNLOCK(&inode->lock); - return ret; + return ret; } static void -br_stub_fill_local (br_stub_local_t *local, - call_stub_t *stub, fd_t *fd, inode_t *inode, uuid_t gfid, - int versioningtype, unsigned long memversion) +br_stub_fill_local(br_stub_local_t *local, call_stub_t *stub, fd_t *fd, + inode_t *inode, uuid_t gfid, int versioningtype, + unsigned long memversion) { - local->fopstub = stub; - local->versioningtype = versioningtype; - local->u.context.version = memversion; - if (fd) - local->u.context.fd = fd_ref (fd); - if (inode) - local->u.context.inode = inode_ref (inode); - gf_uuid_copy (local->u.context.gfid, gfid); + local->fopstub = stub; + local->versioningtype = versioningtype; + local->u.context.version = memversion; + if (fd) + local->u.context.fd = fd_ref(fd); + if (inode) + local->u.context.inode = inode_ref(inode); + gf_uuid_copy(local->u.context.gfid, gfid); } static void -br_stub_cleanup_local (br_stub_local_t *local) -{ - local->fopstub = NULL; - local->versioningtype = 0; - local->u.context.version = 0; - if (local->u.context.fd) { - fd_unref (local->u.context.fd); - local->u.context.fd = NULL; - } - if (local->u.context.inode) { - inode_unref (local->u.context.inode); - local->u.context.inode = NULL; - } - memset (local->u.context.gfid, '\0', sizeof (uuid_t)); +br_stub_cleanup_local(br_stub_local_t *local) +{ + if (!local) + return; + + local->fopstub = NULL; + local->versioningtype = 0; + local->u.context.version = 0; + if (local->u.context.fd) { + fd_unref(local->u.context.fd); + local->u.context.fd = NULL; + } + if (local->u.context.inode) { + inode_unref(local->u.context.inode); + local->u.context.inode = NULL; + } + memset(local->u.context.gfid, '\0', sizeof(uuid_t)); } static int -br_stub_need_versioning (xlator_t *this, - fd_t *fd, gf_boolean_t *versioning, - gf_boolean_t *modified, br_stub_inode_ctx_t **ctx) -{ - int32_t ret = -1; - uint64_t ctx_addr = 0; - br_stub_inode_ctx_t *c = NULL; - - *versioning = _gf_false; - *modified = _gf_false; - - ret = br_stub_get_inode_ctx (this, fd->inode, &ctx_addr); - if (ret < 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - BRS_MSG_GET_INODE_CONTEXT_FAILED, "failed to get the " - "inode context for the inode %s", - uuid_utoa (fd->inode->gfid)); - goto error_return; +br_stub_need_versioning(xlator_t *this, fd_t *fd, gf_boolean_t *versioning, + gf_boolean_t *modified, br_stub_inode_ctx_t **ctx) +{ + int32_t ret = -1; + uint64_t ctx_addr = 0; + br_stub_inode_ctx_t *c = NULL; + unsigned long version = BITROT_DEFAULT_CURRENT_VERSION; + + *versioning = _gf_false; + *modified = _gf_false; + + /* Bitrot stub inode context was initialized only in lookup, create + * and mknod cbk path. Object versioning was enabled by default + * irrespective of bitrot enabled or not. But it's made optional now. + * As a consequence there could be cases where getting inode ctx would + * fail because it's not set yet. + * e.g., If versioning (with bitrot enable) is enabled while I/O is + * happening, it could directly get other fops like writev without + * lookup, where getting inode ctx would fail. Hence initialize the + * inode ctx on failure to get ctx. This is done in all places where + * applicable. + */ + ret = br_stub_get_inode_ctx(this, fd->inode, &ctx_addr); + if (ret < 0) { + ret = br_stub_init_inode_versions(this, fd, fd->inode, version, + _gf_true, _gf_false, &ctx_addr); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, + BRS_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s", + uuid_utoa(fd->inode->gfid), NULL); + goto error_return; } + } - c = (br_stub_inode_ctx_t *) (long) ctx_addr; + c = (br_stub_inode_ctx_t *)(long)ctx_addr; - LOCK (&fd->inode->lock); - { - if (__br_stub_is_inode_dirty (c)) - *versioning = _gf_true; - if (__br_stub_is_inode_modified (c)) - *modified = _gf_true; - } - UNLOCK (&fd->inode->lock); + LOCK(&fd->inode->lock); + { + if (__br_stub_is_inode_dirty(c)) + *versioning = _gf_true; + if (__br_stub_is_inode_modified(c)) + *modified = _gf_true; + } + UNLOCK(&fd->inode->lock); - if (ctx) - *ctx = c; - return 0; + if (ctx) + *ctx = c; + return 0; - error_return: - return -1; +error_return: + return -1; } static int32_t -br_stub_anon_fd_ctx (xlator_t *this, fd_t *fd, br_stub_inode_ctx_t *ctx) -{ - int32_t ret = -1; - br_stub_fd_t *br_stub_fd = NULL; - - br_stub_fd = br_stub_fd_ctx_get (this, fd); - if (!br_stub_fd) { - ret = br_stub_add_fd_to_inode (this, fd, ctx); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - BRS_MSG_ADD_FD_TO_INODE, "failed to add fd to " - "the inode (gfid: %s)", - uuid_utoa (fd->inode->gfid)); - goto out; - } +br_stub_anon_fd_ctx(xlator_t *this, fd_t *fd, br_stub_inode_ctx_t *ctx) +{ + int32_t ret = -1; + br_stub_fd_t *br_stub_fd = NULL; + + br_stub_fd = br_stub_fd_ctx_get(this, fd); + if (!br_stub_fd) { + ret = br_stub_add_fd_to_inode(this, fd, ctx); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ADD_FD_TO_INODE, + "gfid=%s", uuid_utoa(fd->inode->gfid), NULL); + goto out; } + } - ret = 0; + ret = 0; out: - return ret; + return ret; } static int -br_stub_versioning_prep (call_frame_t *frame, - xlator_t *this, fd_t *fd, br_stub_inode_ctx_t *ctx) +br_stub_versioning_prep(call_frame_t *frame, xlator_t *this, fd_t *fd, + br_stub_inode_ctx_t *ctx) { - int32_t ret = -1; - br_stub_local_t *local = NULL; + int32_t ret = -1; + br_stub_local_t *local = NULL; - local = br_stub_alloc_local (this); - if (!local) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, BRS_MSG_NO_MEMORY, - "local allocation failed (gfid: %s)", - uuid_utoa (fd->inode->gfid)); - goto error_return; - } + local = br_stub_alloc_local(this); + if (!local) { + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, BRS_MSG_NO_MEMORY, "gfid=%s", + uuid_utoa(fd->inode->gfid), NULL); + goto error_return; + } - if (fd_is_anonymous (fd)) { - ret = br_stub_anon_fd_ctx (this, fd, ctx); - if (ret) - goto free_local; - } + if (fd_is_anonymous(fd)) { + ret = br_stub_anon_fd_ctx(this, fd, ctx); + if (ret) + goto free_local; + } - frame->local = local; + frame->local = local; - return 0; + return 0; - free_local: - br_stub_dealloc_local (local); - error_return: - return -1; +free_local: + br_stub_dealloc_local(local); +error_return: + return -1; } static int -br_stub_mark_inode_modified (xlator_t *this, br_stub_local_t *local) +br_stub_mark_inode_modified(xlator_t *this, br_stub_local_t *local) { - fd_t *fd = NULL; - int32_t ret = 0; - uint64_t ctx_addr = 0; - br_stub_inode_ctx_t *ctx = NULL; + fd_t *fd = NULL; + int32_t ret = 0; + uint64_t ctx_addr = 0; + br_stub_inode_ctx_t *ctx = NULL; + unsigned long version = BITROT_DEFAULT_CURRENT_VERSION; - fd = local->u.context.fd; + fd = local->u.context.fd; - ret = br_stub_get_inode_ctx (this, fd->inode, &ctx_addr); - if (ret < 0) - goto error_return; + ret = br_stub_get_inode_ctx(this, fd->inode, &ctx_addr); + if (ret < 0) { + ret = br_stub_init_inode_versions(this, fd, fd->inode, version, + _gf_true, _gf_false, &ctx_addr); + if (ret) + goto error_return; + } - ctx = (br_stub_inode_ctx_t *) (long) ctx_addr; + ctx = (br_stub_inode_ctx_t *)(long)ctx_addr; - LOCK (&fd->inode->lock); - { - __br_stub_set_inode_modified (ctx); - } - UNLOCK (&fd->inode->lock); + LOCK(&fd->inode->lock); + { + __br_stub_set_inode_modified(ctx); + } + UNLOCK(&fd->inode->lock); - return 0; + return 0; - error_return: - return -1; +error_return: + return -1; } /** @@ -460,65 +696,68 @@ br_stub_mark_inode_modified (xlator_t *this, br_stub_local_t *local) * and error is returned upwards. */ static int -br_stub_check_bad_object (xlator_t *this, inode_t *inode, int32_t *op_ret, - int32_t *op_errno) -{ - int ret = -1; - - ret = br_stub_is_bad_object (this, inode); - if (ret == -2) { - gf_msg (this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJECT_ACCESS, - "%s is a bad object. Returning", - uuid_utoa (inode->gfid)); - *op_ret = -1; - *op_errno = EIO; - } - - if (ret == -1) { - gf_msg (this->name, GF_LOG_ERROR, 0, - BRS_MSG_GET_INODE_CONTEXT_FAILED, "could not get inode" - " context for %s", uuid_utoa (inode->gfid)); - *op_ret = -1; - *op_errno = EINVAL; +br_stub_check_bad_object(xlator_t *this, inode_t *inode, int32_t *op_ret, + int32_t *op_errno) +{ + int ret = -1; + unsigned long version = BITROT_DEFAULT_CURRENT_VERSION; + + ret = br_stub_is_bad_object(this, inode); + if (ret == -2) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJECT_ACCESS, + "gfid=%s", uuid_utoa(inode->gfid), NULL); + *op_ret = -1; + *op_errno = EIO; + } + + if (ret == -1) { + ret = br_stub_init_inode_versions(this, NULL, inode, version, _gf_true, + _gf_false, NULL); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, + BRS_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s", + uuid_utoa(inode->gfid), NULL); + *op_ret = -1; + *op_errno = EINVAL; } + } - return ret; + return ret; } /** * callback for inode/fd versioning */ int -br_stub_fd_incversioning_cbk (call_frame_t *frame, - void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *xdata) -{ - fd_t *fd = NULL; - inode_t *inode = NULL; - unsigned long version = 0; - br_stub_local_t *local = NULL; - - local = (br_stub_local_t *)frame->local; - if (op_ret < 0) - goto done; - fd = local->u.context.fd; - inode = local->u.context.inode; - version = local->u.context.version; - - op_ret = br_stub_mod_inode_versions (this, fd, inode, version); - if (op_ret < 0) - op_errno = EINVAL; - - done: - if (op_ret < 0) { - frame->local = NULL; - call_unwind_error (local->fopstub, -1, op_errno); - br_stub_cleanup_local (local); - br_stub_dealloc_local (local); - } else { - call_resume (local->fopstub); - } - return 0; +br_stub_fd_incversioning_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xdata) +{ + fd_t *fd = NULL; + inode_t *inode = NULL; + unsigned long version = 0; + br_stub_local_t *local = NULL; + + local = (br_stub_local_t *)frame->local; + if (op_ret < 0) + goto done; + fd = local->u.context.fd; + inode = local->u.context.inode; + version = local->u.context.version; + + op_ret = br_stub_mod_inode_versions(this, fd, inode, version); + if (op_ret < 0) + op_errno = EINVAL; + +done: + if (op_ret < 0) { + frame->local = NULL; + call_unwind_error(local->fopstub, -1, op_errno); + br_stub_cleanup_local(local); + br_stub_dealloc_local(local); + } else { + call_resume(local->fopstub); + } + return 0; } /** @@ -551,102 +790,101 @@ br_stub_fd_incversioning_cbk (call_frame_t *frame, /** * perform full or incremental versioning on an inode pointd by an * fd. incremental versioning is done when an inode is dirty and a - * writeback is trigerred. + * writeback is triggered. */ int -br_stub_fd_versioning (xlator_t *this, call_frame_t *frame, - call_stub_t *stub, dict_t *dict, fd_t *fd, - br_stub_version_cbk *callback, unsigned long memversion, - int versioningtype, int durable) +br_stub_fd_versioning(xlator_t *this, call_frame_t *frame, call_stub_t *stub, + dict_t *dict, fd_t *fd, br_stub_version_cbk *callback, + unsigned long memversion, int versioningtype, int durable) { - int32_t ret = -1; - int flags = 0; - dict_t *xdata = NULL; - br_stub_local_t *local = NULL; + int32_t ret = -1; + int flags = 0; + dict_t *xdata = NULL; + br_stub_local_t *local = NULL; - xdata = dict_new (); - if (!xdata) - goto done; + xdata = dict_new(); + if (!xdata) + goto done; - ret = dict_set_int32 (xdata, GLUSTERFS_INTERNAL_FOP_KEY, 1); + ret = dict_set_int32(xdata, GLUSTERFS_INTERNAL_FOP_KEY, 1); + if (ret) + goto dealloc_xdata; + + if (durable) { + ret = dict_set_int32(xdata, GLUSTERFS_DURABLE_OP, 0); if (ret) - goto dealloc_xdata; + goto dealloc_xdata; + } - if (durable) { - ret = dict_set_int32 (xdata, GLUSTERFS_DURABLE_OP, 0); - if (ret) - goto dealloc_xdata; - } + local = frame->local; - local = frame->local; + br_stub_fill_local(local, stub, fd, fd->inode, fd->inode->gfid, + versioningtype, memversion); - br_stub_fill_local (local, stub, fd, - fd->inode, fd->inode->gfid, - versioningtype, memversion); + STACK_WIND(frame, callback, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata); - STACK_WIND (frame, callback, - FIRST_CHILD (this), FIRST_CHILD (this)->fops->fsetxattr, - fd, dict, flags, xdata); + ret = 0; - ret = 0; - - dealloc_xdata: - dict_unref (xdata); - done: - return ret; +dealloc_xdata: + dict_unref(xdata); +done: + return ret; } static int -br_stub_perform_incversioning (xlator_t *this, - call_frame_t *frame, call_stub_t *stub, - fd_t *fd, br_stub_inode_ctx_t *ctx) -{ - int32_t ret = -1; - dict_t *dict = NULL; - br_version_t *obuf = NULL; - unsigned long writeback_version = 0; - int op_errno = 0; - br_stub_local_t *local = NULL; - - op_errno = EINVAL; - local = frame->local; - - writeback_version = __br_stub_writeback_version (ctx); - - op_errno = ENOMEM; - dict = dict_new (); - if (!dict) - goto done; - ret = br_stub_alloc_versions (&obuf, NULL, 0); - if (ret) - goto dealloc_dict; - ret = br_stub_prepare_version_request (this, dict, - obuf, writeback_version); - if (ret) - goto dealloc_versions; - - ret = br_stub_fd_versioning - (this, frame, stub, dict, - fd, br_stub_fd_incversioning_cbk, writeback_version, - BR_STUB_INCREMENTAL_VERSIONING, !WRITEBACK_DURABLE); - - dealloc_versions: - br_stub_dealloc_versions (obuf); - dealloc_dict: - dict_unref (dict); - done: - if (ret) { - if (local) - frame->local = NULL; - call_unwind_error (stub, -1, op_errno); - if (local) { - br_stub_cleanup_local (local); - br_stub_dealloc_local (local); - } +br_stub_perform_incversioning(xlator_t *this, call_frame_t *frame, + call_stub_t *stub, fd_t *fd, + br_stub_inode_ctx_t *ctx) +{ + int32_t ret = -1; + dict_t *dict = NULL; + br_version_t *obuf = NULL; + unsigned long writeback_version = 0; + int op_errno = 0; + br_stub_local_t *local = NULL; + + op_errno = EINVAL; + local = frame->local; + + writeback_version = __br_stub_writeback_version(ctx); + + op_errno = ENOMEM; + dict = dict_new(); + if (!dict) + goto out; + ret = br_stub_alloc_versions(&obuf, NULL, 0); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ALLOC_MEM_FAILED, + "gfid=%s", uuid_utoa(fd->inode->gfid), NULL); + goto out; + } + ret = br_stub_prepare_version_request(this, dict, obuf, writeback_version); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_VERSION_PREPARE_FAIL, + "gfid=%s", uuid_utoa(fd->inode->gfid), NULL); + br_stub_dealloc_versions(obuf); + goto out; + } + + ret = br_stub_fd_versioning( + this, frame, stub, dict, fd, br_stub_fd_incversioning_cbk, + writeback_version, BR_STUB_INCREMENTAL_VERSIONING, !WRITEBACK_DURABLE); +out: + if (dict) + dict_unref(dict); + if (ret) { + if (local) + frame->local = NULL; + call_unwind_error(stub, -1, op_errno); + if (local) { + br_stub_cleanup_local(local); + br_stub_dealloc_local(local); } + } - return ret; + return ret; } /** {{{ */ @@ -654,230 +892,271 @@ br_stub_perform_incversioning (xlator_t *this, /* fsetxattr() */ int32_t -br_stub_perform_objsign (call_frame_t *frame, xlator_t *this, - fd_t *fd, dict_t *dict, int flags, dict_t *xdata) +br_stub_perform_objsign(call_frame_t *frame, xlator_t *this, fd_t *fd, + dict_t *dict, int flags, dict_t *xdata) { - STACK_WIND (frame, default_fsetxattr_cbk, - FIRST_CHILD (this), FIRST_CHILD (this)->fops->fsetxattr, fd, - dict, flags, xdata); + STACK_WIND(frame, default_fsetxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata); - dict_unref (xdata); - return 0; + dict_unref(xdata); + return 0; } void * -br_stub_signth (void *arg) +br_stub_signth(void *arg) { - br_stub_private_t *priv = arg; - struct br_stub_signentry *sigstub = NULL; + xlator_t *this = arg; + br_stub_private_t *priv = this->private; + struct br_stub_signentry *sigstub = NULL; + + THIS = this; + while (1) { + /* + * Disabling bit-rot feature leads to this particular thread + * getting cleaned up by reconfigure via a call to the function + * gf_thread_cleanup_xint (which in turn calls pthread_cancel + * and pthread_join). But, if this thread had held the mutex + * &priv->lock at the time of cancellation, then it leads to + * deadlock in future when bit-rot feature is enabled (which + * again spawns this thread which cant hold the lock as the + * mutex is still held by the previous instance of the thread + * which got killed). Also, the br_stub_handle_object_signature + * function which is called whenever file has to be signed + * also gets blocked as it too attempts to acquire &priv->lock. + * + * So, arrange for the lock to be unlocked as part of the + * cleanup of this thread using pthread_cleanup_push and + * pthread_cleanup_pop. + */ + pthread_cleanup_push(br_stub_lock_cleaner, &priv->lock); + pthread_mutex_lock(&priv->lock); + { + while (list_empty(&priv->squeue)) + pthread_cond_wait(&priv->cond, &priv->lock); - while (1) { - pthread_mutex_lock (&priv->lock); - { - while (list_empty (&priv->squeue)) - pthread_cond_wait (&priv->cond, &priv->lock); + sigstub = list_first_entry(&priv->squeue, struct br_stub_signentry, + list); + list_del_init(&sigstub->list); + } + pthread_mutex_unlock(&priv->lock); + pthread_cleanup_pop(0); - sigstub = list_first_entry - (&priv->squeue, struct br_stub_signentry, list); - list_del_init (&sigstub->list); - } - pthread_mutex_unlock (&priv->lock); + call_resume(sigstub->stub); - call_resume (sigstub->stub); + GF_FREE(sigstub); + } - GF_FREE (sigstub); - } + return NULL; +} + +static gf_boolean_t +br_stub_internal_xattr(dict_t *dict) +{ + if (dict_get(dict, GLUSTERFS_SET_OBJECT_SIGNATURE) || + dict_get(dict, GLUSTERFS_GET_OBJECT_SIGNATURE) || + dict_get(dict, BR_REOPEN_SIGN_HINT_KEY) || + dict_get(dict, BITROT_OBJECT_BAD_KEY) || + dict_get(dict, BITROT_SIGNING_VERSION_KEY) || + dict_get(dict, BITROT_CURRENT_VERSION_KEY)) + return _gf_true; - return NULL; + return _gf_false; } int -orderq (struct list_head *elem1, struct list_head *elem2) +orderq(struct list_head *elem1, struct list_head *elem2) { - struct br_stub_signentry *s1 = NULL; - struct br_stub_signentry *s2 = NULL; + struct br_stub_signentry *s1 = NULL; + struct br_stub_signentry *s2 = NULL; - s1 = list_entry (elem1, struct br_stub_signentry, list); - s2 = list_entry (elem2, struct br_stub_signentry, list); + s1 = list_entry(elem1, struct br_stub_signentry, list); + s2 = list_entry(elem2, struct br_stub_signentry, list); - return (s1->v > s2->v); + return (s1->v > s2->v); } static int -br_stub_compare_sign_version (xlator_t *this, - inode_t *inode, - br_signature_t *sbuf, - dict_t *dict, int *fakesuccess) -{ - int32_t ret = -1; - uint64_t tmp_ctx = 0; - gf_boolean_t invalid = _gf_false; - br_stub_inode_ctx_t *ctx = NULL; - - GF_VALIDATE_OR_GOTO ("bit-rot-stub", this, out); - GF_VALIDATE_OR_GOTO (this->name, inode, out); - GF_VALIDATE_OR_GOTO (this->name, sbuf, out); - GF_VALIDATE_OR_GOTO (this->name, dict, out); - - ret = br_stub_get_inode_ctx (this, inode, &tmp_ctx); - if (ret) { - dict_del (dict, BITROT_SIGNING_VERSION_KEY); - goto out; - } - - ctx = (br_stub_inode_ctx_t *)(long)tmp_ctx; - - LOCK (&inode->lock); - { - if (ctx->currentversion < sbuf->signedversion) { - invalid = _gf_true; - } else if (ctx->currentversion > sbuf->signedversion) { - gf_msg_debug (this->name, 0, "\"Signing version\" " - "(%lu) lower than \"Current version \" " - "(%lu)", ctx->currentversion, - sbuf->signedversion); - *fakesuccess = 1; - } - } - UNLOCK (&inode->lock); - - if (invalid) { - ret = -1; - gf_msg (this->name, GF_LOG_WARNING, 0, - BRS_MSG_SIGN_VERSION_ERROR, "Signing version exceeds " - "current version [%lu > %lu]", sbuf->signedversion, - ctx->currentversion); - } +br_stub_compare_sign_version(xlator_t *this, inode_t *inode, + br_signature_t *sbuf, dict_t *dict, + int *fakesuccess) +{ + int32_t ret = -1; + uint64_t tmp_ctx = 0; + gf_boolean_t invalid = _gf_false; + br_stub_inode_ctx_t *ctx = NULL; + + GF_VALIDATE_OR_GOTO("bit-rot-stub", this, out); + GF_VALIDATE_OR_GOTO(this->name, inode, out); + GF_VALIDATE_OR_GOTO(this->name, sbuf, out); + GF_VALIDATE_OR_GOTO(this->name, dict, out); + + ret = br_stub_get_inode_ctx(this, inode, &tmp_ctx); + if (ret) { + dict_del(dict, BITROT_SIGNING_VERSION_KEY); + goto out; + } + + ctx = (br_stub_inode_ctx_t *)(long)tmp_ctx; + + LOCK(&inode->lock); + { + if (ctx->currentversion < sbuf->signedversion) { + invalid = _gf_true; + } else if (ctx->currentversion > sbuf->signedversion) { + gf_msg_debug(this->name, 0, + "\"Signing version\" " + "(%lu) lower than \"Current version \" " + "(%lu)", + ctx->currentversion, sbuf->signedversion); + *fakesuccess = 1; + } + } + UNLOCK(&inode->lock); + + if (invalid) { + ret = -1; + gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SIGN_VERSION_ERROR, + "Signing-ver=%lu", sbuf->signedversion, "current-ver=%lu", + ctx->currentversion, NULL); + } - out: - return ret; +out: + return ret; } static int -br_stub_prepare_signature (xlator_t *this, - dict_t *dict, inode_t *inode, - br_isignature_t *sign, int *fakesuccess) +br_stub_prepare_signature(xlator_t *this, dict_t *dict, inode_t *inode, + br_isignature_t *sign, int *fakesuccess) { - int32_t ret = 0; - size_t signaturelen = 0; - br_signature_t *sbuf = NULL; + int32_t ret = -1; + size_t signaturelen = 0; + br_signature_t *sbuf = NULL; - if (!br_is_signature_type_valid (sign->signaturetype)) - goto error_return; - - signaturelen = sign->signaturelen; - ret = br_stub_alloc_versions (NULL, &sbuf, signaturelen); - if (ret) - goto error_return; - ret = br_stub_prepare_signing_request (dict, sbuf, sign, signaturelen); - if (ret) - goto dealloc_versions; - - ret = br_stub_compare_sign_version (this, inode, - sbuf, dict, fakesuccess); - if (ret) - goto dealloc_versions; + if (!br_is_signature_type_valid(sign->signaturetype)) + goto out; - return 0; - - dealloc_versions: - br_stub_dealloc_versions (sbuf); - error_return: - return -1; + signaturelen = sign->signaturelen; + ret = br_stub_alloc_versions(NULL, &sbuf, signaturelen); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ALLOC_MEM_FAILED, + "gfid=%s", uuid_utoa(inode->gfid), NULL); + ret = -1; + goto out; + } + ret = br_stub_prepare_signing_request(dict, sbuf, sign, signaturelen); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_SIGN_PREPARE_FAIL, + "gfid=%s", uuid_utoa(inode->gfid), NULL); + ret = -1; + br_stub_dealloc_versions(sbuf); + goto out; + } + + /* At this point sbuf has been added to dict, so the memory will be freed + * when the data from the dict is destroyed + */ + ret = br_stub_compare_sign_version(this, inode, sbuf, dict, fakesuccess); +out: + return ret; } static void -br_stub_handle_object_signature (call_frame_t *frame, - xlator_t *this, fd_t *fd, dict_t *dict, - br_isignature_t *sign, dict_t *xdata) -{ - int32_t ret = -1; - int32_t op_ret = -1; - int32_t op_errno = EINVAL; - int fakesuccess = 0; - br_stub_private_t *priv = NULL; - struct br_stub_signentry *sigstub = NULL; - - priv = this->private; - - if (frame->root->pid != GF_CLIENT_PID_BITD) - goto dofop; - - ret = br_stub_prepare_signature (this, dict, - fd->inode, sign, &fakesuccess); - if (ret) - goto dofop; - if (fakesuccess) { - op_ret = op_errno = 0; - goto dofop; - } - - dict_del (dict, GLUSTERFS_SET_OBJECT_SIGNATURE); - - ret = -1; - if (!xdata) { - xdata = dict_new (); - if (!xdata) - goto dofop; - } else { - dict_ref (xdata); - } +br_stub_handle_object_signature(call_frame_t *frame, xlator_t *this, fd_t *fd, + dict_t *dict, br_isignature_t *sign, + dict_t *xdata) +{ + int32_t ret = -1; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + int fakesuccess = 0; + br_stub_private_t *priv = NULL; + struct br_stub_signentry *sigstub = NULL; + + priv = this->private; + + if (frame->root->pid != GF_CLIENT_PID_BITD) { + gf_smsg(this->name, GF_LOG_WARNING, op_errno, BRS_MSG_NON_BITD_PID, + "PID=%d", frame->root->pid, NULL); + goto dofop; + } + + ret = br_stub_prepare_signature(this, dict, fd->inode, sign, &fakesuccess); + if (ret) { + gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SIGN_PREPARE_FAIL, + "gfid=%s", uuid_utoa(fd->inode->gfid), NULL); + goto dofop; + } + if (fakesuccess) { + op_ret = op_errno = 0; + goto dofop; + } + + dict_del(dict, GLUSTERFS_SET_OBJECT_SIGNATURE); + + ret = -1; + if (!xdata) { + xdata = dict_new(); + if (!xdata) + goto dofop; + } else { + dict_ref(xdata); + } - ret = dict_set_int32 (xdata, GLUSTERFS_DURABLE_OP, 0); - if (ret) - goto unref_dict; + ret = dict_set_int32(xdata, GLUSTERFS_DURABLE_OP, 0); + if (ret) + goto unref_dict; - /* prepare dispatch stub to order object signing */ - sigstub = GF_CALLOC (1, sizeof (*sigstub), gf_br_stub_mt_sigstub_t); - if (!sigstub) - goto unref_dict; + /* prepare dispatch stub to order object signing */ + sigstub = GF_CALLOC(1, sizeof(*sigstub), gf_br_stub_mt_sigstub_t); + if (!sigstub) + goto unref_dict; - INIT_LIST_HEAD (&sigstub->list); - sigstub->v = ntohl (sign->signedversion); - sigstub->stub = fop_fsetxattr_stub (frame, br_stub_perform_objsign, - fd, dict, 0, xdata); - if (!sigstub->stub) - goto cleanup_stub; + INIT_LIST_HEAD(&sigstub->list); + sigstub->v = ntohl(sign->signedversion); + sigstub->stub = fop_fsetxattr_stub(frame, br_stub_perform_objsign, fd, dict, + 0, xdata); + if (!sigstub->stub) + goto cleanup_stub; - pthread_mutex_lock (&priv->lock); - { - list_add_order (&sigstub->list, &priv->squeue, orderq); - pthread_cond_signal (&priv->cond); - } - pthread_mutex_unlock (&priv->lock); + pthread_mutex_lock(&priv->lock); + { + list_add_order(&sigstub->list, &priv->squeue, orderq); + pthread_cond_signal(&priv->cond); + } + pthread_mutex_unlock(&priv->lock); - return; + return; - cleanup_stub: - GF_FREE (sigstub); - unref_dict: - dict_unref (xdata); - dofop: - STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, NULL); +cleanup_stub: + GF_FREE(sigstub); +unref_dict: + dict_unref(xdata); +dofop: + STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, NULL); } int32_t -br_stub_fsetxattr_resume (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +br_stub_fsetxattr_resume(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - int32_t ret = -1; - br_stub_local_t *local = NULL; + int32_t ret = -1; + br_stub_local_t *local = NULL; - local = frame->local; - frame->local = NULL; + local = frame->local; + frame->local = NULL; - ret = br_stub_mark_inode_modified (this, local); - if (ret) { - op_ret = -1; - op_errno = EINVAL; - } + ret = br_stub_mark_inode_modified(this, local); + if (ret) { + op_ret = -1; + op_errno = EINVAL; + } - STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata); + STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, xdata); - br_stub_cleanup_local (local); - br_stub_dealloc_local (local); + br_stub_cleanup_local(local); + br_stub_dealloc_local(local); - return 0; + return 0; } /** @@ -915,65 +1194,62 @@ br_stub_fsetxattr_resume (call_frame_t *frame, void *cookie, xlator_t *this, * } */ static void -br_stub_handle_object_reopen (call_frame_t *frame, - xlator_t *this, fd_t *fd, uint32_t val) -{ - int32_t ret = -1; - int32_t op_ret = -1; - int32_t op_errno = EINVAL; - call_stub_t *stub = NULL; - gf_boolean_t inc_version = _gf_false; - gf_boolean_t modified = _gf_false; - br_stub_inode_ctx_t *ctx = NULL; - br_stub_local_t *local = NULL; - gf_boolean_t goback = _gf_true; - - ret = br_stub_need_versioning (this, fd, &inc_version, &modified, &ctx); - if (ret) - goto unwind; +br_stub_handle_object_reopen(call_frame_t *frame, xlator_t *this, fd_t *fd, + uint32_t val) +{ + int32_t ret = -1; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + call_stub_t *stub = NULL; + gf_boolean_t inc_version = _gf_false; + gf_boolean_t modified = _gf_false; + br_stub_inode_ctx_t *ctx = NULL; + br_stub_local_t *local = NULL; + gf_boolean_t goback = _gf_true; + + ret = br_stub_need_versioning(this, fd, &inc_version, &modified, &ctx); + if (ret) + goto unwind; + + LOCK(&fd->inode->lock); + { + if ((val == BR_OBJECT_REOPEN) && inc_version) + goback = _gf_false; + if (val == BR_OBJECT_RESIGN && ctx->info_sign == BR_SIGN_NORMAL) { + __br_stub_mark_inode_synced(ctx); + __br_stub_set_inode_modified(ctx); + } + (void)__br_stub_inode_sign_state(ctx, GF_FOP_FSETXATTR, fd); + } + UNLOCK(&fd->inode->lock); + + if (goback) { + op_ret = op_errno = 0; + goto unwind; + } + + ret = br_stub_versioning_prep(frame, this, fd, ctx); + if (ret) + goto unwind; + local = frame->local; + + stub = fop_fsetxattr_cbk_stub(frame, br_stub_fsetxattr_resume, 0, 0, NULL); + if (!stub) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_STUB_ALLOC_FAILED, + "fsetxattr gfid=%s", uuid_utoa(fd->inode->gfid), NULL); + goto cleanup_local; + } + + (void)br_stub_perform_incversioning(this, frame, stub, fd, ctx); + return; + +cleanup_local: + br_stub_cleanup_local(local); + br_stub_dealloc_local(local); - LOCK (&fd->inode->lock); - { - if ((val == BR_OBJECT_REOPEN) && inc_version) - goback = _gf_false; - if (val == BR_OBJECT_RESIGN && - ctx->info_sign == BR_SIGN_NORMAL) { - __br_stub_mark_inode_synced (ctx); - __br_stub_set_inode_modified (ctx); - } - (void) __br_stub_inode_sign_state (ctx, GF_FOP_FSETXATTR, fd); - } - UNLOCK (&fd->inode->lock); - - if (goback) { - op_ret = op_errno = 0; - goto unwind; - } - - ret = br_stub_versioning_prep (frame, this, fd, ctx); - if (ret) - goto unwind; - local = frame->local; - - stub = fop_fsetxattr_cbk_stub (frame, br_stub_fsetxattr_resume, - 0, 0, NULL); - if (!stub) { - gf_msg (this->name, GF_LOG_ERROR, 0, BRS_MSG_STUB_ALLOC_FAILED, - "failed to allocate stub for fsetxattr fop (gfid: %s)," - " unwinding", uuid_utoa (fd->inode->gfid)); - goto cleanup_local; - } - - (void) br_stub_perform_incversioning (this, frame, stub, fd, ctx); - return; - - cleanup_local: - br_stub_cleanup_local (local); - br_stub_dealloc_local (local); - - unwind: - frame->local = NULL; - STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, NULL); +unwind: + frame->local = NULL; + STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, NULL); } /** @@ -984,87 +1260,83 @@ br_stub_handle_object_reopen (call_frame_t *frame, * to mark the object as bad. */ int -br_stub_fsetxattr_bad_object_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, - int32_t op_errno, dict_t *xdata) -{ - br_stub_local_t *local = NULL; - int32_t ret = -1; - - local = frame->local; - frame->local = NULL; - - if (op_ret < 0) - goto unwind; - - /* - * What to do if marking the object as bad fails? (i.e. in memory - * marking within the inode context. If we are here means fsetxattr - * fop has succeeded on disk and the bad object xattr has been set). - * We can return failure to scruber, but there is nothing the scrubber - * can do with it (it might assume that the on disk setxattr itself has - * failed). The main purpose of this operation is to help identify the - * bad object by checking the inode context itself (thus avoiding the - * necessity of doing a getxattr fop on the disk). - * - * So as of now, success itself is being returned even though inode - * context set operation fails. - * In future if there is any change in the policy which can handle this, - * then appropriate response should be sent (i.e. success or error). - */ - ret = br_stub_mark_object_bad (this, local->u.context.inode); - if (ret) - gf_msg (this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJ_MARK_FAIL, - "failed to mark object %s as bad", - uuid_utoa (local->u.context.inode->gfid)); +br_stub_fsetxattr_bad_object_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *xdata) +{ + br_stub_local_t *local = NULL; + int32_t ret = -1; + + local = frame->local; + frame->local = NULL; + + if (op_ret < 0) + goto unwind; + + /* + * What to do if marking the object as bad fails? (i.e. in memory + * marking within the inode context. If we are here means fsetxattr + * fop has succeeded on disk and the bad object xattr has been set). + * We can return failure to scruber, but there is nothing the scrubber + * can do with it (it might assume that the on disk setxattr itself has + * failed). The main purpose of this operation is to help identify the + * bad object by checking the inode context itself (thus avoiding the + * necessity of doing a getxattr fop on the disk). + * + * So as of now, success itself is being returned even though inode + * context set operation fails. + * In future if there is any change in the policy which can handle this, + * then appropriate response should be sent (i.e. success or error). + */ + ret = br_stub_mark_object_bad(this, local->u.context.inode); + if (ret) + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJ_MARK_FAIL, + "gfid=%s", uuid_utoa(local->u.context.inode->gfid), NULL); + + ret = br_stub_add(this, local->u.context.inode->gfid); unwind: - STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata); - br_stub_cleanup_local (local); - br_stub_dealloc_local (local); - return 0; + STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, xdata); + br_stub_cleanup_local(local); + br_stub_dealloc_local(local); + return 0; } static int32_t -br_stub_handle_bad_object_key (call_frame_t *frame, xlator_t *this, fd_t *fd, - dict_t *dict, int flags, dict_t *xdata) -{ - br_stub_local_t *local = NULL; - int32_t op_ret = -1; - int32_t op_errno = EINVAL; - - if (frame->root->pid != GF_CLIENT_PID_SCRUB) { - gf_msg (this->name, GF_LOG_ERROR, 0, - BRS_MSG_NON_SCRUB_BAD_OBJ_MARK, "bad object marking " - "on %s is not from the scrubber", - uuid_utoa (fd->inode->gfid)); - goto unwind; - } - - local = br_stub_alloc_local (this); - if (!local) { - gf_msg (this->name, GF_LOG_ERROR, 0, BRS_MSG_NO_MEMORY, - "failed to allocate memory for fsetxattr on %s", - uuid_utoa (fd->inode->gfid)); - op_ret = -1; - op_errno = ENOMEM; - goto unwind; - } +br_stub_handle_bad_object_key(call_frame_t *frame, xlator_t *this, fd_t *fd, + dict_t *dict, int flags, dict_t *xdata) +{ + br_stub_local_t *local = NULL; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + + if (frame->root->pid != GF_CLIENT_PID_SCRUB) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_NON_SCRUB_BAD_OBJ_MARK, + "gfid=%s", uuid_utoa(fd->inode->gfid), NULL); + goto unwind; + } + + local = br_stub_alloc_local(this); + if (!local) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ALLOC_MEM_FAILED, + "fsetxattr gfid=%s", uuid_utoa(fd->inode->gfid), NULL); + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } - br_stub_fill_local (local, NULL, fd, fd->inode, - fd->inode->gfid, BR_STUB_NO_VERSIONING, 0); - frame->local = local; + br_stub_fill_local(local, NULL, fd, fd->inode, fd->inode->gfid, + BR_STUB_NO_VERSIONING, 0); + frame->local = local; - STACK_WIND (frame, br_stub_fsetxattr_bad_object_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fsetxattr, fd, dict, flags, - xdata); - return 0; + STACK_WIND(frame, br_stub_fsetxattr_bad_object_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata); + return 0; unwind: - STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, NULL); - return 0; + STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, NULL); + return 0; } - /** * As of now, versioning is done by the stub (though as a setxattr * operation) as part of inode modification operations such as writev, @@ -1080,84 +1352,121 @@ unwind: * */ static int32_t -br_stub_handle_internal_xattr (call_frame_t *frame, xlator_t *this, fd_t *fd, - char *key) +br_stub_handle_internal_xattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + char *key) { - int32_t op_ret = -1; - int32_t op_errno = EINVAL; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; - gf_msg (this->name, GF_LOG_ERROR, 0, - BRS_MSG_SET_INTERNAL_XATTR, "setxattr called" - " on the internal xattr %s for inode %s", key, - uuid_utoa (fd->inode->gfid)); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_SET_INTERNAL_XATTR, + "setxattr key=%s", key, "inode-gfid=%s", uuid_utoa(fd->inode->gfid), + NULL); - STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, NULL); - return 0; + STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, NULL); + return 0; } -int -br_stub_fsetxattr (call_frame_t *frame, xlator_t *this, - fd_t *fd, dict_t *dict, int flags, dict_t *xdata) -{ - int32_t ret = 0; - uint32_t val = 0; - br_isignature_t *sign = NULL; - - if (!IA_ISREG (fd->inode->ia_type)) - goto wind; - - /* object signature request */ - ret = dict_get_bin (dict, GLUSTERFS_SET_OBJECT_SIGNATURE, - (void **) &sign); - if (!ret) { - br_stub_handle_object_signature (frame, this, - fd, dict, sign, xdata); - goto done; - } - - /* signing xattr */ - if (dict_get(dict, BITROT_SIGNING_VERSION_KEY)) { - br_stub_handle_internal_xattr (frame, this, fd, - BITROT_SIGNING_VERSION_KEY); - goto done; - } - - /* version xattr */ - if (dict_get(dict, BITROT_CURRENT_VERSION_KEY)) { - br_stub_handle_internal_xattr (frame, this, fd, - BITROT_CURRENT_VERSION_KEY); - goto done; - } +static void +br_stub_dump_xattr(xlator_t *this, dict_t *dict, int *op_errno) +{ + char *format = "(%s:%s)"; + char *dump = NULL; + + dump = GF_CALLOC(1, BR_STUB_DUMP_STR_SIZE, gf_br_stub_mt_misc); + if (!dump) { + *op_errno = ENOMEM; + goto out; + } + dict_dump_to_str(dict, dump, BR_STUB_DUMP_STR_SIZE, format); + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_SET_INTERNAL_XATTR, + "fsetxattr dump=%s", dump, NULL); +out: + if (dump) { + GF_FREE(dump); + } + return; +} - if (dict_get (dict, GLUSTERFS_GET_OBJECT_SIGNATURE)) { - br_stub_handle_internal_xattr (frame, this, fd, - GLUSTERFS_GET_OBJECT_SIGNATURE); - goto done; - } +int +br_stub_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, + int flags, dict_t *xdata) +{ + int32_t ret = 0; + uint32_t val = 0; + br_isignature_t *sign = NULL; + br_stub_private_t *priv = NULL; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + + priv = this->private; + + if ((frame->root->pid != GF_CLIENT_PID_BITD && + frame->root->pid != GF_CLIENT_PID_SCRUB) && + br_stub_internal_xattr(dict)) { + br_stub_dump_xattr(this, dict, &op_errno); + goto unwind; + } + + if (!priv->do_versioning) + goto wind; + + if (!IA_ISREG(fd->inode->ia_type)) + goto wind; + + /* object signature request */ + ret = dict_get_bin(dict, GLUSTERFS_SET_OBJECT_SIGNATURE, (void **)&sign); + if (!ret) { + gf_msg_debug(this->name, 0, "got SIGNATURE request on %s", + uuid_utoa(fd->inode->gfid)); + br_stub_handle_object_signature(frame, this, fd, dict, sign, xdata); + goto done; + } + + /* signing xattr */ + if (dict_get(dict, BITROT_SIGNING_VERSION_KEY)) { + br_stub_handle_internal_xattr(frame, this, fd, + BITROT_SIGNING_VERSION_KEY); + goto done; + } + + /* version xattr */ + if (dict_get(dict, BITROT_CURRENT_VERSION_KEY)) { + br_stub_handle_internal_xattr(frame, this, fd, + BITROT_CURRENT_VERSION_KEY); + goto done; + } + + if (dict_get(dict, GLUSTERFS_GET_OBJECT_SIGNATURE)) { + br_stub_handle_internal_xattr(frame, this, fd, + GLUSTERFS_GET_OBJECT_SIGNATURE); + goto done; + } + + /* object reopen request */ + ret = dict_get_uint32(dict, BR_REOPEN_SIGN_HINT_KEY, &val); + if (!ret) { + br_stub_handle_object_reopen(frame, this, fd, val); + goto done; + } + + /* handle bad object */ + if (dict_get(dict, BITROT_OBJECT_BAD_KEY)) { + br_stub_handle_bad_object_key(frame, this, fd, dict, flags, xdata); + goto done; + } - /* object reopen request */ - ret = dict_get_uint32 (dict, BR_REOPEN_SIGN_HINT_KEY, &val); - if (!ret) { - br_stub_handle_object_reopen (frame, this, fd, val); - goto done; - } +wind: + STACK_WIND(frame, default_fsetxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata); + return 0; - /* handle bad object */ - if (dict_get (dict, BITROT_OBJECT_BAD_KEY)) { - br_stub_handle_bad_object_key (frame, this, fd, - dict, flags, xdata); - goto done; - } +unwind: + STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, NULL); -wind: - STACK_WIND (frame, default_fsetxattr_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fsetxattr, fd, dict, flags, - xdata); done: - return 0; + return 0; } - /** * Currently BitD and scrubber are doing fsetxattr to either sign the object * or to mark it as bad. Hence setxattr on any of those keys is denied directly @@ -1166,95 +1475,76 @@ done: * check has to be added below. */ int -br_stub_setxattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, dict_t *dict, int flags, dict_t *xdata) -{ - int32_t op_ret = -1; - int32_t op_errno = EINVAL; - char dump[64*1024] = {0,}; - char *format = "(%s:%s)"; - - if (dict_get (dict, GLUSTERFS_SET_OBJECT_SIGNATURE) || - dict_get (dict, GLUSTERFS_GET_OBJECT_SIGNATURE) || - dict_get (dict, BR_REOPEN_SIGN_HINT_KEY) || - dict_get (dict, BITROT_OBJECT_BAD_KEY) || - dict_get (dict, BITROT_SIGNING_VERSION_KEY) || - dict_get (dict, BITROT_CURRENT_VERSION_KEY)) { - dict_dump_to_str (dict, dump, sizeof(dump), format); - gf_msg (this->name, GF_LOG_ERROR, 0, - BRS_MSG_SET_INTERNAL_XATTR, "setxattr called on " - "internal xattr %s", dump); - goto unwind; - } +br_stub_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + int flags, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + if (br_stub_internal_xattr(dict)) { + br_stub_dump_xattr(this, dict, &op_errno); + goto unwind; + } - STACK_WIND_TAIL (frame, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->setxattr, loc, dict, flags, - xdata); - return 0; + STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr, + loc, dict, flags, xdata); + return 0; unwind: - STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, NULL); - return 0; + STACK_UNWIND_STRICT(setxattr, frame, op_ret, op_errno, NULL); + return 0; } /** }}} */ - /** {{{ */ /* {f}removexattr() */ int32_t -br_stub_removexattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, const char *name, dict_t *xdata) -{ - int32_t op_ret = -1; - int32_t op_errno = EINVAL; - - if (!strcmp (BITROT_OBJECT_BAD_KEY, name) || - !strcmp (BITROT_SIGNING_VERSION_KEY, name) || - !strcmp (BITROT_CURRENT_VERSION_KEY, name)) { - gf_msg (this->name, GF_LOG_WARNING, 0, - BRS_MSG_REMOVE_INTERNAL_XATTR, "removexattr called" - " on internal xattr %s for file %s", name, loc->path); - goto unwind; - } - - - STACK_WIND_TAIL (frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->removexattr, - loc, name, xdata); - return 0; +br_stub_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + + if (!strcmp(BITROT_OBJECT_BAD_KEY, name) || + !strcmp(BITROT_SIGNING_VERSION_KEY, name) || + !strcmp(BITROT_CURRENT_VERSION_KEY, name)) { + gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_REMOVE_INTERNAL_XATTR, + "name=%s", name, "file-path=%s", loc->path, NULL); + goto unwind; + } + + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, loc, name, xdata); + return 0; unwind: - STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, NULL); - return 0; + STACK_UNWIND_STRICT(removexattr, frame, op_ret, op_errno, NULL); + return 0; } int32_t -br_stub_fremovexattr (call_frame_t *frame, xlator_t *this, - fd_t *fd, const char *name, dict_t *xdata) -{ - int32_t op_ret = -1; - int32_t op_errno = EINVAL; - - if (!strcmp (BITROT_OBJECT_BAD_KEY, name) || - !strcmp (BITROT_SIGNING_VERSION_KEY, name) || - !strcmp (BITROT_CURRENT_VERSION_KEY, name)) { - gf_msg (this->name, GF_LOG_WARNING, 0, - BRS_MSG_REMOVE_INTERNAL_XATTR, "removexattr called" - " on internal xattr %s for inode %s", name, - uuid_utoa (fd->inode->gfid)); - goto unwind; - } - - - STACK_WIND_TAIL (frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fremovexattr, - fd, name, xdata); - return 0; +br_stub_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *name, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + + if (!strcmp(BITROT_OBJECT_BAD_KEY, name) || + !strcmp(BITROT_SIGNING_VERSION_KEY, name) || + !strcmp(BITROT_CURRENT_VERSION_KEY, name)) { + gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_REMOVE_INTERNAL_XATTR, + "name=%s", name, "inode-gfid=%s", uuid_utoa(fd->inode->gfid), + NULL); + goto unwind; + } + + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata); + return 0; unwind: - STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, NULL); - return 0; + STACK_UNWIND_STRICT(fremovexattr, frame, op_ret, op_errno, NULL); + return 0; } /** }}} */ @@ -1264,17 +1554,17 @@ unwind: /* {f}getxattr() */ int -br_stub_listxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *xattr, dict_t *xdata) +br_stub_listxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xattr, dict_t *xdata) { - if (op_ret < 0) - goto unwind; + if (op_ret < 0) + goto unwind; - br_stub_remove_vxattrs (xattr); + br_stub_remove_vxattrs(xattr, _gf_true); - unwind: - STACK_UNWIND (frame, op_ret, op_errno, xattr, xdata); - return 0; +unwind: + STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, xattr, xdata); + return 0; } /** @@ -1313,333 +1603,380 @@ br_stub_listxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, * then treat the object as stale. */ char -br_stub_is_object_stale (xlator_t *this, call_frame_t *frame, inode_t *inode, - br_version_t *obuf, br_signature_t *sbuf) -{ - uint64_t ctx_addr = 0; - br_stub_inode_ctx_t *ctx = NULL; - int32_t ret = -1; - char stale = 0; - - if (obuf->ongoingversion == sbuf->signedversion) - goto out; - - if (frame->root->pid == GF_CLIENT_PID_SCRUB) { - stale = 1; - goto out; - } - - ret = br_stub_get_inode_ctx (this, inode, &ctx_addr); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - BRS_MSG_GET_INODE_CONTEXT_FAILED, "failed to get the " - "inode context for %s", uuid_utoa (inode->gfid)); - goto out; - } - - ctx = (br_stub_inode_ctx_t *)(long)ctx_addr; - - LOCK (&inode->lock); - { - if ((!__br_stub_is_inode_dirty (ctx) && - ctx->info_sign != BR_SIGN_NORMAL) || - __br_stub_is_inode_dirty (ctx)) - stale = 1; - } - UNLOCK (&inode->lock); +br_stub_is_object_stale(xlator_t *this, call_frame_t *frame, inode_t *inode, + br_version_t *obuf, br_signature_t *sbuf) +{ + uint64_t ctx_addr = 0; + br_stub_inode_ctx_t *ctx = NULL; + int32_t ret = -1; + char stale = 0; + + if (obuf->ongoingversion == sbuf->signedversion) + goto out; + + if (frame->root->pid == GF_CLIENT_PID_SCRUB) { + stale = 1; + goto out; + } + + ret = br_stub_get_inode_ctx(this, inode, &ctx_addr); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_GET_INODE_CONTEXT_FAILED, + "gfid=%s", uuid_utoa(inode->gfid), NULL); + goto out; + } + + ctx = (br_stub_inode_ctx_t *)(long)ctx_addr; + + LOCK(&inode->lock); + { + if ((!__br_stub_is_inode_dirty(ctx) && + ctx->info_sign != BR_SIGN_NORMAL) || + __br_stub_is_inode_dirty(ctx)) + stale = 1; + } + UNLOCK(&inode->lock); out: - return stale; + return stale; } int -br_stub_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *xattr, dict_t *xdata) -{ - int32_t ret = 0; - size_t totallen = 0; - size_t signaturelen = 0; - br_version_t *obuf = NULL; - br_signature_t *sbuf = NULL; - br_isignature_out_t *sign = NULL; - br_vxattr_status_t status; - br_stub_local_t *local = NULL; - inode_t *inode = NULL; - gf_boolean_t bad_object = _gf_false; - - if (op_ret < 0) - goto unwind; - if (cookie != (void *) BR_STUB_REQUEST_COOKIE) - goto unwind; - - local = frame->local; - frame->local = NULL; - inode = local->u.context.inode; - - op_ret = -1; - status = br_version_xattr_state (xattr, &obuf, &sbuf, &bad_object); - - op_errno = EIO; - if (bad_object) - goto delkeys; - - op_errno = EINVAL; - if (status == BR_VXATTR_STATUS_INVALID) - goto delkeys; - - op_errno = ENODATA; - if ((status == BR_VXATTR_STATUS_MISSING) - || (status == BR_VXATTR_STATUS_UNSIGNED)) - goto delkeys; - - /** - * okay.. we have enough information to satisfy the request, - * namely: version and signing extended attribute. what's - * pending is the signature length -- that's figured out - * indirectly via the size of the _whole_ xattr and the - * on-disk signing xattr header size. - */ - op_errno = EINVAL; - ret = dict_get_uint32 (xattr, BITROT_SIGNING_XATTR_SIZE_KEY, - (uint32_t *)&signaturelen); - if (ret) - goto delkeys; - - signaturelen -= sizeof (br_signature_t); - totallen = sizeof (br_isignature_out_t) + signaturelen; - - op_errno = ENOMEM; - sign = GF_CALLOC (1, totallen, gf_br_stub_mt_signature_t); - if (!sign) - goto delkeys; - - sign->time[0] = obuf->timebuf[0]; - sign->time[1] = obuf->timebuf[1]; - - /* Object's dirty state & current signed version */ - sign->version = sbuf->signedversion; - sign->stale = br_stub_is_object_stale (this, frame, inode, obuf, sbuf); - - /* Object's signature */ - sign->signaturelen = signaturelen; - sign->signaturetype = sbuf->signaturetype; - (void) memcpy (sign->signature, sbuf->signature, signaturelen); - +br_stub_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xattr, dict_t *xdata) +{ + int32_t ret = 0; + size_t totallen = 0; + size_t signaturelen = 0; + br_stub_private_t *priv = NULL; + br_version_t *obuf = NULL; + br_signature_t *sbuf = NULL; + br_isignature_out_t *sign = NULL; + br_vxattr_status_t status; + br_stub_local_t *local = NULL; + inode_t *inode = NULL; + gf_boolean_t bad_object = _gf_false; + gf_boolean_t ver_enabled = _gf_false; + + BR_STUB_VER_ENABLED_IN_CALLPATH(frame, ver_enabled); + priv = this->private; + + if (op_ret < 0) + goto unwind; + BR_STUB_VER_COND_GOTO(priv, (!ver_enabled), delkeys); + + if (cookie != (void *)BR_STUB_REQUEST_COOKIE) + goto unwind; + + local = frame->local; + frame->local = NULL; + if (!local) { + op_ret = -1; op_errno = EINVAL; - ret = dict_set_bin (xattr, GLUSTERFS_GET_OBJECT_SIGNATURE, - (void *)sign, totallen); - if (ret < 0) { - GF_FREE (sign); - goto delkeys; - } - op_errno = 0; - op_ret = totallen; - - delkeys: - br_stub_remove_vxattrs (xattr); + goto unwind; + } + inode = local->u.context.inode; + + op_ret = -1; + status = br_version_xattr_state(xattr, &obuf, &sbuf, &bad_object); + + op_errno = EIO; + if (bad_object) + goto delkeys; + + op_errno = EINVAL; + if (status == BR_VXATTR_STATUS_INVALID) + goto delkeys; + + op_errno = ENODATA; + if ((status == BR_VXATTR_STATUS_MISSING) || + (status == BR_VXATTR_STATUS_UNSIGNED)) + goto delkeys; + + /** + * okay.. we have enough information to satisfy the request, + * namely: version and signing extended attribute. what's + * pending is the signature length -- that's figured out + * indirectly via the size of the _whole_ xattr and the + * on-disk signing xattr header size. + */ + op_errno = EINVAL; + ret = dict_get_uint32(xattr, BITROT_SIGNING_XATTR_SIZE_KEY, + (uint32_t *)&signaturelen); + if (ret) + goto delkeys; + + signaturelen -= sizeof(br_signature_t); + totallen = sizeof(br_isignature_out_t) + signaturelen; + + op_errno = ENOMEM; + sign = GF_CALLOC(1, totallen, gf_br_stub_mt_signature_t); + if (!sign) + goto delkeys; + + sign->time[0] = obuf->timebuf[0]; + sign->time[1] = obuf->timebuf[1]; + + /* Object's dirty state & current signed version */ + sign->version = sbuf->signedversion; + sign->stale = br_stub_is_object_stale(this, frame, inode, obuf, sbuf); + + /* Object's signature */ + sign->signaturelen = signaturelen; + sign->signaturetype = sbuf->signaturetype; + (void)memcpy(sign->signature, sbuf->signature, signaturelen); + + op_errno = EINVAL; + ret = dict_set_bin(xattr, GLUSTERFS_GET_OBJECT_SIGNATURE, (void *)sign, + totallen); + if (ret < 0) { + GF_FREE(sign); + goto delkeys; + } + op_errno = 0; + op_ret = totallen; + +delkeys: + br_stub_remove_vxattrs(xattr, _gf_true); - unwind: - STACK_UNWIND (frame, op_ret, op_errno, xattr, xdata); - if (local) { - br_stub_cleanup_local (local); - br_stub_dealloc_local (local); - } - return 0; +unwind: + STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, xattr, xdata); + br_stub_cleanup_local(local); + br_stub_dealloc_local(local); + return 0; } static void -br_stub_send_stub_init_time (call_frame_t *frame, xlator_t *this) +br_stub_send_stub_init_time(call_frame_t *frame, xlator_t *this) { - int op_ret = 0; - int op_errno = 0; - dict_t *xattr = NULL; - br_stub_init_t stub = {{0,},}; - br_stub_private_t *priv = NULL; + int op_ret = 0; + int op_errno = 0; + dict_t *xattr = NULL; + br_stub_init_t stub = { + { + 0, + }, + }; + br_stub_private_t *priv = NULL; - priv = this->private; + priv = this->private; - xattr = dict_new (); - if (!xattr) { - op_ret = -1; - op_errno = ENOMEM; - goto unwind; - } + xattr = dict_new(); + if (!xattr) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } - stub.timebuf[0] = priv->boot[0]; - stub.timebuf[1] = priv->boot[1]; - memcpy (stub.export, priv->export, strlen (priv->export) + 1); + stub.timebuf[0] = priv->boot[0]; + stub.timebuf[1] = priv->boot[1]; + memcpy(stub.export, priv->export, strlen(priv->export) + 1); - op_ret = dict_set_static_bin (xattr, GLUSTERFS_GET_BR_STUB_INIT_TIME, - (void *) &stub, sizeof (br_stub_init_t)); - if (op_ret < 0) { - op_errno = EINVAL; - goto unwind; - } + op_ret = dict_set_static_bin(xattr, GLUSTERFS_GET_BR_STUB_INIT_TIME, + (void *)&stub, sizeof(br_stub_init_t)); + if (op_ret < 0) { + op_errno = EINVAL; + goto unwind; + } - op_ret = sizeof (br_stub_init_t); + op_ret = sizeof(br_stub_init_t); - unwind: - STACK_UNWIND (frame, op_ret, op_errno, xattr, NULL); +unwind: + STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, xattr, NULL); - if (xattr) - dict_unref (xattr); + if (xattr) + dict_unref(xattr); } int -br_stub_getxattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, const char *name, dict_t *xdata) -{ - void *cookie = NULL; - uuid_t rootgfid = {0, }; - fop_getxattr_cbk_t cbk = br_stub_getxattr_cbk; - int32_t op_ret = -1; - int32_t op_errno = EINVAL; - br_stub_local_t *local = NULL; - - GF_VALIDATE_OR_GOTO ("bit-rot-stub", this, unwind); - GF_VALIDATE_OR_GOTO (this->name, loc, unwind); - GF_VALIDATE_OR_GOTO (this->name, loc->inode, unwind); +br_stub_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) +{ + void *cookie = NULL; + static uuid_t rootgfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; + fop_getxattr_cbk_t cbk = br_stub_getxattr_cbk; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + br_stub_local_t *local = NULL; + br_stub_private_t *priv = NULL; + + GF_VALIDATE_OR_GOTO("bit-rot-stub", this, unwind); + GF_VALIDATE_OR_GOTO(this->name, loc, unwind); + GF_VALIDATE_OR_GOTO(this->name, this->private, unwind); + GF_VALIDATE_OR_GOTO(this->name, loc->inode, unwind); + + if (!name) { + cbk = br_stub_listxattr_cbk; + goto wind; + } + + if (br_stub_is_internal_xattr(name)) + goto unwind; + + priv = this->private; + BR_STUB_VER_NOT_ACTIVE_THEN_GOTO(frame, priv, wind); + + /** + * If xattr is node-uuid and the inode is marked bad, return EIO. + * Returning EIO would result in AFR to choose correct node-uuid + * corresponding to the subvolume * where the good copy of the + * file resides. + */ + if (IA_ISREG(loc->inode->ia_type) && XATTR_IS_NODE_UUID(name) && + br_stub_check_bad_object(this, loc->inode, &op_ret, &op_errno)) { + goto unwind; + } + + /** + * this special extended attribute is allowed only on root + */ + if (name && + (strncmp(name, GLUSTERFS_GET_BR_STUB_INIT_TIME, + sizeof(GLUSTERFS_GET_BR_STUB_INIT_TIME) - 1) == 0) && + ((gf_uuid_compare(loc->gfid, rootgfid) == 0) || + (gf_uuid_compare(loc->inode->gfid, rootgfid) == 0))) { + BR_STUB_RESET_LOCAL_NULL(frame); + br_stub_send_stub_init_time(frame, this); + return 0; + } - rootgfid[15] = 1; + if (!IA_ISREG(loc->inode->ia_type)) + goto wind; - if (!name) { - cbk = br_stub_listxattr_cbk; - goto wind; - } + if (name && (strncmp(name, GLUSTERFS_GET_OBJECT_SIGNATURE, + sizeof(GLUSTERFS_GET_OBJECT_SIGNATURE) - 1) == 0)) { + cookie = (void *)BR_STUB_REQUEST_COOKIE; - if (br_stub_is_internal_xattr (name)) - goto unwind; - - /** - * this special extended attribute is allowed only on root - */ - if (name - && (strncmp (name, GLUSTERFS_GET_BR_STUB_INIT_TIME, - strlen (GLUSTERFS_GET_BR_STUB_INIT_TIME)) == 0) - && ((gf_uuid_compare (loc->gfid, rootgfid) == 0) - || (gf_uuid_compare (loc->inode->gfid, rootgfid) == 0))) { - br_stub_send_stub_init_time (frame, this); - return 0; + local = br_stub_alloc_local(this); + if (!local) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; } - if (!IA_ISREG (loc->inode->ia_type)) - goto wind; - - if (name && (strncmp (name, GLUSTERFS_GET_OBJECT_SIGNATURE, - strlen (GLUSTERFS_GET_OBJECT_SIGNATURE)) == 0)) { - cookie = (void *) BR_STUB_REQUEST_COOKIE; - - local = br_stub_alloc_local (this); - if (!local) { - op_ret = -1; - op_errno = ENOMEM; - goto unwind; - } - - br_stub_fill_local (local, NULL, NULL, loc->inode, - loc->inode->gfid, - BR_STUB_NO_VERSIONING, 0); - frame->local = local; - } + br_stub_fill_local(local, NULL, NULL, loc->inode, loc->inode->gfid, + BR_STUB_NO_VERSIONING, 0); + frame->local = local; + } - wind: - STACK_WIND_COOKIE - (frame, cbk, cookie, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->getxattr, loc, name, xdata); - return 0; +wind: + STACK_WIND_COOKIE(frame, cbk, cookie, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->getxattr, loc, name, xdata); + return 0; unwind: - STACK_UNWIND (frame, op_ret, op_errno, NULL, NULL); - return 0; + BR_STUB_RESET_LOCAL_NULL(frame); + STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, NULL, NULL); + return 0; } int -br_stub_fgetxattr (call_frame_t *frame, xlator_t *this, - fd_t *fd, const char *name, dict_t *xdata) -{ - void *cookie = NULL; - uuid_t rootgfid = {0, }; - fop_fgetxattr_cbk_t cbk = br_stub_getxattr_cbk; - int32_t op_ret = -1; - int32_t op_errno = EINVAL; - br_stub_local_t *local = NULL; - - rootgfid[15] = 1; +br_stub_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *name, dict_t *xdata) +{ + void *cookie = NULL; + static uuid_t rootgfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; + fop_fgetxattr_cbk_t cbk = br_stub_getxattr_cbk; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + br_stub_local_t *local = NULL; + br_stub_private_t *priv = NULL; + + priv = this->private; + + if (!name) { + cbk = br_stub_listxattr_cbk; + goto wind; + } + + if (br_stub_is_internal_xattr(name)) + goto unwind; + + BR_STUB_VER_NOT_ACTIVE_THEN_GOTO(frame, priv, wind); + + /** + * If xattr is node-uuid and the inode is marked bad, return EIO. + * Returning EIO would result in AFR to choose correct node-uuid + * corresponding to the subvolume * where the good copy of the + * file resides. + */ + if (IA_ISREG(fd->inode->ia_type) && XATTR_IS_NODE_UUID(name) && + br_stub_check_bad_object(this, fd->inode, &op_ret, &op_errno)) { + goto unwind; + } + + /** + * this special extended attribute is allowed only on root + */ + if (name && + (strncmp(name, GLUSTERFS_GET_BR_STUB_INIT_TIME, + sizeof(GLUSTERFS_GET_BR_STUB_INIT_TIME) - 1) == 0) && + (gf_uuid_compare(fd->inode->gfid, rootgfid) == 0)) { + BR_STUB_RESET_LOCAL_NULL(frame); + br_stub_send_stub_init_time(frame, this); + return 0; + } - if (!name) { - cbk = br_stub_listxattr_cbk; - goto wind; - } + if (!IA_ISREG(fd->inode->ia_type)) + goto wind; - if (br_stub_is_internal_xattr (name)) - goto unwind; + if (name && (strncmp(name, GLUSTERFS_GET_OBJECT_SIGNATURE, + sizeof(GLUSTERFS_GET_OBJECT_SIGNATURE) - 1) == 0)) { + cookie = (void *)BR_STUB_REQUEST_COOKIE; - /** - * this special extended attribute is allowed only on root - */ - if (name - && (strncmp (name, GLUSTERFS_GET_BR_STUB_INIT_TIME, - strlen (GLUSTERFS_GET_BR_STUB_INIT_TIME)) == 0) - && (gf_uuid_compare (fd->inode->gfid, rootgfid) == 0)) { - br_stub_send_stub_init_time (frame, this); - return 0; + local = br_stub_alloc_local(this); + if (!local) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; } - if (!IA_ISREG (fd->inode->ia_type)) - goto wind; - - if (name && (strncmp (name, GLUSTERFS_GET_OBJECT_SIGNATURE, - strlen (GLUSTERFS_GET_OBJECT_SIGNATURE)) == 0)) { - cookie = (void *) BR_STUB_REQUEST_COOKIE; - - local = br_stub_alloc_local (this); - if (!local) { - op_ret = -1; - op_errno = ENOMEM; - goto unwind; - } - - br_stub_fill_local (local, NULL, fd, fd->inode, - fd->inode->gfid, - BR_STUB_NO_VERSIONING, 0); - frame->local = local; - } + br_stub_fill_local(local, NULL, fd, fd->inode, fd->inode->gfid, + BR_STUB_NO_VERSIONING, 0); + frame->local = local; + } - wind: - STACK_WIND_COOKIE - (frame, cbk, cookie, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fgetxattr, fd, name, xdata); - return 0; +wind: + STACK_WIND_COOKIE(frame, cbk, cookie, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata); + return 0; unwind: - STACK_UNWIND (frame, op_ret, op_errno, NULL, NULL); - return 0; + BR_STUB_RESET_LOCAL_NULL(frame); + STACK_UNWIND_STRICT(fgetxattr, frame, op_ret, op_errno, NULL, NULL); + return 0; } int32_t -br_stub_readv (call_frame_t *frame, xlator_t *this, - fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata) +br_stub_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) { - int32_t op_ret = -1; - int32_t op_errno = EINVAL; - int32_t ret = -1; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + int32_t ret = -1; + br_stub_private_t *priv = NULL; - GF_VALIDATE_OR_GOTO ("bit-rot-stub", this, unwind); - GF_VALIDATE_OR_GOTO (this->name, frame, unwind); - GF_VALIDATE_OR_GOTO (this->name, fd, unwind); - GF_VALIDATE_OR_GOTO (this->name, fd->inode, unwind); + GF_VALIDATE_OR_GOTO("bit-rot-stub", this, unwind); + GF_VALIDATE_OR_GOTO(this->name, frame, unwind); + GF_VALIDATE_OR_GOTO(this->name, this->private, unwind); + GF_VALIDATE_OR_GOTO(this->name, fd, unwind); + GF_VALIDATE_OR_GOTO(this->name, fd->inode, unwind); - ret = br_stub_check_bad_object (this, fd->inode, &op_ret, &op_errno); - if (ret) - goto unwind; + priv = this->private; + if (!priv->do_versioning) + goto wind; - STACK_WIND_TAIL (frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readv, fd, size, offset, - flags, xdata); - return 0; + ret = br_stub_check_bad_object(this, fd->inode, &op_ret, &op_errno); + if (ret) + goto unwind; + +wind: + STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->readv, + fd, size, offset, flags, xdata); + return 0; unwind: - STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, NULL, 0, NULL, - NULL, NULL); - return 0; + STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, NULL, 0, NULL, NULL, + NULL); + return 0; } /** @@ -1650,287 +1987,294 @@ unwind: * fds. */ int32_t -br_stub_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) +br_stub_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - int32_t ret = 0; - br_stub_local_t *local = NULL; + int32_t ret = 0; + br_stub_local_t *local = NULL; - local = frame->local; - frame->local = NULL; + local = frame->local; + frame->local = NULL; - if (op_ret < 0) - goto unwind; + if (op_ret < 0) + goto unwind; - ret = br_stub_mark_inode_modified (this, local); - if (ret) { - op_ret = -1; - op_errno = EINVAL; - } + ret = br_stub_mark_inode_modified(this, local); + if (ret) { + op_ret = -1; + op_errno = EINVAL; + } unwind: - STACK_UNWIND_STRICT (writev, frame, - op_ret, op_errno, prebuf, postbuf, xdata); + STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, prebuf, postbuf, + xdata); - br_stub_cleanup_local (local); - br_stub_dealloc_local (local); + br_stub_cleanup_local(local); + br_stub_dealloc_local(local); - return 0; + return 0; } int32_t -br_stub_writev_resume (call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iovec *vector, int32_t count, off_t offset, - uint32_t flags, struct iobref *iobref, dict_t *xdata) +br_stub_writev_resume(call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iovec *vector, int32_t count, off_t offset, + uint32_t flags, struct iobref *iobref, dict_t *xdata) { - STACK_WIND (frame, br_stub_writev_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->writev, fd, vector, count, - offset, flags, iobref, xdata); - return 0; + STACK_WIND(frame, br_stub_writev_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->writev, fd, vector, count, offset, + flags, iobref, xdata); + return 0; } /** * This is probably the most crucial part about the whole versioning thing. * There's absolutely no differentiation as such between an anonymous fd * and a regular fd except the fd context initialization. Object versioning - * is perfomed when the inode is dirty. Parallel write operations are no + * is performed when the inode is dirty. Parallel write operations are no * special with each write performing object versioning followed by marking * the inode as non-dirty (synced). This is followed by the actual operation * (writev() in this case) which on a success marks the inode as modified. * This prevents signing of objects that have not been modified. */ int32_t -br_stub_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iovec *vector, int32_t count, off_t offset, - uint32_t flags, struct iobref *iobref, dict_t *xdata) -{ - call_stub_t *stub = NULL; - int32_t op_ret = -1; - int32_t op_errno = EINVAL; - gf_boolean_t inc_version = _gf_false; - gf_boolean_t modified = _gf_false; - br_stub_inode_ctx_t *ctx = NULL; - int32_t ret = -1; - fop_writev_cbk_t cbk = default_writev_cbk; - br_stub_local_t *local = NULL; - - GF_VALIDATE_OR_GOTO ("bit-rot-stub", this, unwind); - GF_VALIDATE_OR_GOTO (this->name, frame, unwind); - GF_VALIDATE_OR_GOTO (this->name, fd, unwind); - - ret = br_stub_need_versioning (this, fd, &inc_version, &modified, &ctx); - if (ret) - goto unwind; - - ret = br_stub_check_bad_object (this, fd->inode, &op_ret, &op_errno); - if (ret) - goto unwind; - - /** - * The inode is not dirty and also witnessed atleast one successful - * modification operation. Therefore, subsequent operations need not - * perform any special tracking. - */ - if (!inc_version && modified) - goto wind; - - /** - * okay.. so, either the inode needs versioning or the modification - * needs to be tracked. ->cbk is set to the appropriate callback - * routine for this. - * NOTE: ->local needs to be deallocated on failures from here on. - */ - ret = br_stub_versioning_prep (frame, this, fd, ctx); - if (ret) - goto unwind; - - local = frame->local; - if (!inc_version) { - br_stub_fill_local (local, NULL, fd, fd->inode, - fd->inode->gfid, BR_STUB_NO_VERSIONING, 0); - cbk = br_stub_writev_cbk; - goto wind; - } - - stub = fop_writev_stub (frame, br_stub_writev_resume, fd, vector, count, - offset, flags, iobref, xdata); - - if (!stub) { - gf_msg (this->name, GF_LOG_ERROR, 0, BRS_MSG_STUB_ALLOC_FAILED, - "failed to allocate stub for write fop (gfid: %s), " - "unwinding", uuid_utoa (fd->inode->gfid)); - goto cleanup_local; - } - - /* Perform Versioning */ - return br_stub_perform_incversioning (this, frame, stub, fd, ctx); +br_stub_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iovec *vector, int32_t count, off_t offset, + uint32_t flags, struct iobref *iobref, dict_t *xdata) +{ + call_stub_t *stub = NULL; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + gf_boolean_t inc_version = _gf_false; + gf_boolean_t modified = _gf_false; + br_stub_inode_ctx_t *ctx = NULL; + int32_t ret = -1; + fop_writev_cbk_t cbk = default_writev_cbk; + br_stub_local_t *local = NULL; + br_stub_private_t *priv = NULL; + + GF_VALIDATE_OR_GOTO("bit-rot-stub", this, unwind); + GF_VALIDATE_OR_GOTO(this->name, this->private, unwind); + GF_VALIDATE_OR_GOTO(this->name, frame, unwind); + GF_VALIDATE_OR_GOTO(this->name, fd, unwind); + + priv = this->private; + if (!priv->do_versioning) + goto wind; + + ret = br_stub_need_versioning(this, fd, &inc_version, &modified, &ctx); + if (ret) + goto unwind; + + ret = br_stub_check_bad_object(this, fd->inode, &op_ret, &op_errno); + if (ret) + goto unwind; + + /** + * The inode is not dirty and also witnessed at least one successful + * modification operation. Therefore, subsequent operations need not + * perform any special tracking. + */ + if (!inc_version && modified) + goto wind; + + /** + * okay.. so, either the inode needs versioning or the modification + * needs to be tracked. ->cbk is set to the appropriate callback + * routine for this. + * NOTE: ->local needs to be deallocated on failures from here on. + */ + ret = br_stub_versioning_prep(frame, this, fd, ctx); + if (ret) + goto unwind; + + local = frame->local; + if (!inc_version) { + br_stub_fill_local(local, NULL, fd, fd->inode, fd->inode->gfid, + BR_STUB_NO_VERSIONING, 0); + cbk = br_stub_writev_cbk; + goto wind; + } + + stub = fop_writev_stub(frame, br_stub_writev_resume, fd, vector, count, + offset, flags, iobref, xdata); + + if (!stub) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_STUB_ALLOC_FAILED, + "write gfid=%s", uuid_utoa(fd->inode->gfid), NULL); + goto cleanup_local; + } + + /* Perform Versioning */ + return br_stub_perform_incversioning(this, frame, stub, fd, ctx); - wind: - STACK_WIND (frame, cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->writev, - fd, vector, count, offset, flags, iobref, xdata); - return 0; +wind: + STACK_WIND(frame, cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev, + fd, vector, count, offset, flags, iobref, xdata); + return 0; - cleanup_local: - br_stub_cleanup_local (local); - br_stub_dealloc_local (local); +cleanup_local: + br_stub_cleanup_local(local); + br_stub_dealloc_local(local); - unwind: - frame->local = NULL; - STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, NULL, NULL, - NULL); +unwind: + frame->local = NULL; + STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, NULL, NULL, NULL); - return 0; + return 0; } int32_t -br_stub_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) +br_stub_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - int32_t ret = -1; - br_stub_local_t *local = NULL; + int32_t ret = -1; + br_stub_local_t *local = NULL; - local = frame->local; - frame->local = NULL; + local = frame->local; + frame->local = NULL; - if (op_ret < 0) - goto unwind; + if (op_ret < 0) + goto unwind; - ret = br_stub_mark_inode_modified (this, local); - if (ret) { - op_ret = -1; - op_errno = EINVAL; - } + ret = br_stub_mark_inode_modified(this, local); + if (ret) { + op_ret = -1; + op_errno = EINVAL; + } unwind: - STACK_UNWIND_STRICT (ftruncate, frame, - op_ret, op_errno, prebuf, postbuf, xdata); + STACK_UNWIND_STRICT(ftruncate, frame, op_ret, op_errno, prebuf, postbuf, + xdata); - br_stub_cleanup_local (local); - br_stub_dealloc_local (local); + br_stub_cleanup_local(local); + br_stub_dealloc_local(local); - return 0; + return 0; } int32_t -br_stub_ftruncate_resume (call_frame_t *frame, xlator_t *this, fd_t *fd, - off_t offset, dict_t *xdata) +br_stub_ftruncate_resume(call_frame_t *frame, xlator_t *this, fd_t *fd, + off_t offset, dict_t *xdata) { - STACK_WIND (frame, br_stub_ftruncate_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); - return 0; + STACK_WIND(frame, br_stub_ftruncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); + return 0; } /* c.f. br_stub_writev() for explanation */ int32_t -br_stub_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, - off_t offset, dict_t *xdata) -{ - br_stub_local_t *local = NULL; - call_stub_t *stub = NULL; - int32_t op_ret = -1; - int32_t op_errno = EINVAL; - gf_boolean_t inc_version = _gf_false; - gf_boolean_t modified = _gf_false; - br_stub_inode_ctx_t *ctx = NULL; - int32_t ret = -1; - fop_ftruncate_cbk_t cbk = default_ftruncate_cbk; - - GF_VALIDATE_OR_GOTO ("bit-rot-stub", this, unwind); - GF_VALIDATE_OR_GOTO (this->name, frame, unwind); - GF_VALIDATE_OR_GOTO (this->name, fd, unwind); - - ret = br_stub_need_versioning (this, fd, &inc_version, &modified, &ctx); - if (ret) - goto unwind; - - ret = br_stub_check_bad_object (this, fd->inode, &op_ret, &op_errno); - if (ret) - goto unwind; - - if (!inc_version && modified) - goto wind; - - ret = br_stub_versioning_prep (frame, this, fd, ctx); - if (ret) - goto unwind; - - local = frame->local; - if (!inc_version) { - br_stub_fill_local (local, NULL, fd, fd->inode, - fd->inode->gfid, BR_STUB_NO_VERSIONING, 0); - cbk = br_stub_ftruncate_cbk; - goto wind; - } - - stub = fop_ftruncate_stub (frame, br_stub_ftruncate_resume, fd, offset, - xdata); - if (!stub) { - gf_msg (this->name, GF_LOG_ERROR, 0, BRS_MSG_STUB_ALLOC_FAILED, - "failed to allocate stub for ftruncate fop (gfid: %s)," - " unwinding", uuid_utoa (fd->inode->gfid)); - goto cleanup_local; - } +br_stub_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) +{ + br_stub_local_t *local = NULL; + call_stub_t *stub = NULL; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + gf_boolean_t inc_version = _gf_false; + gf_boolean_t modified = _gf_false; + br_stub_inode_ctx_t *ctx = NULL; + int32_t ret = -1; + fop_ftruncate_cbk_t cbk = default_ftruncate_cbk; + br_stub_private_t *priv = NULL; + + GF_VALIDATE_OR_GOTO("bit-rot-stub", this, unwind); + GF_VALIDATE_OR_GOTO(this->name, this->private, unwind); + GF_VALIDATE_OR_GOTO(this->name, frame, unwind); + GF_VALIDATE_OR_GOTO(this->name, fd, unwind); + + priv = this->private; + if (!priv->do_versioning) + goto wind; + + ret = br_stub_need_versioning(this, fd, &inc_version, &modified, &ctx); + if (ret) + goto unwind; + + ret = br_stub_check_bad_object(this, fd->inode, &op_ret, &op_errno); + if (ret) + goto unwind; + + if (!inc_version && modified) + goto wind; + + ret = br_stub_versioning_prep(frame, this, fd, ctx); + if (ret) + goto unwind; + + local = frame->local; + if (!inc_version) { + br_stub_fill_local(local, NULL, fd, fd->inode, fd->inode->gfid, + BR_STUB_NO_VERSIONING, 0); + cbk = br_stub_ftruncate_cbk; + goto wind; + } + + stub = fop_ftruncate_stub(frame, br_stub_ftruncate_resume, fd, offset, + xdata); + if (!stub) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_STUB_ALLOC_FAILED, + "ftruncate gfid=%s", uuid_utoa(fd->inode->gfid), NULL); + goto cleanup_local; + } + + return br_stub_perform_incversioning(this, frame, stub, fd, ctx); - return br_stub_perform_incversioning (this, frame, stub, fd, ctx); - - wind: - STACK_WIND (frame, cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); - return 0; +wind: + STACK_WIND(frame, cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); + return 0; - cleanup_local: - br_stub_cleanup_local (local); - br_stub_dealloc_local (local); +cleanup_local: + br_stub_cleanup_local(local); + br_stub_dealloc_local(local); - unwind: - frame->local = NULL; - STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, NULL, NULL, - NULL); +unwind: + frame->local = NULL; + STACK_UNWIND_STRICT(ftruncate, frame, op_ret, op_errno, NULL, NULL, NULL); - return 0; + return 0; } int32_t -br_stub_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) +br_stub_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - int32_t ret = 0; - br_stub_local_t *local = NULL; + int32_t ret = 0; + br_stub_local_t *local = NULL; - local = frame->local; - frame->local = NULL; + local = frame->local; + frame->local = NULL; - if (op_ret < 0) - goto unwind; + if (op_ret < 0) + goto unwind; - ret = br_stub_mark_inode_modified (this, local); - if (ret) { - op_ret = -1; - op_errno = EINVAL; - } + ret = br_stub_mark_inode_modified(this, local); + if (ret) { + op_ret = -1; + op_errno = EINVAL; + } unwind: - STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf, postbuf, - xdata); - br_stub_cleanup_local (local); - br_stub_dealloc_local (local); - return 0; + STACK_UNWIND_STRICT(truncate, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + br_stub_cleanup_local(local); + br_stub_dealloc_local(local); + return 0; } int32_t -br_stub_truncate_resume (call_frame_t *frame, xlator_t *this, loc_t *loc, - off_t offset, dict_t *xdata) +br_stub_truncate_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, + off_t offset, dict_t *xdata) { - br_stub_local_t *local = frame->local; + br_stub_local_t *local = frame->local; - fd_unref (local->u.context.fd); - STACK_WIND (frame, br_stub_ftruncate_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); - return 0; + fd_unref(local->u.context.fd); + STACK_WIND(frame, br_stub_ftruncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); + return 0; } /** @@ -1948,90 +2292,92 @@ br_stub_truncate_resume (call_frame_t *frame, xlator_t *this, loc_t *loc, * c.f. br_writev_cbk() for explanation */ int32_t -br_stub_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, - off_t offset, dict_t *xdata) -{ - br_stub_local_t *local = NULL; - call_stub_t *stub = NULL; - int32_t op_ret = -1; - int32_t op_errno = EINVAL; - gf_boolean_t inc_version = _gf_false; - gf_boolean_t modified = _gf_false; - br_stub_inode_ctx_t *ctx = NULL; - int32_t ret = -1; - fd_t *fd = NULL; - fop_truncate_cbk_t cbk = default_truncate_cbk; - - GF_VALIDATE_OR_GOTO ("bit-rot-stub", this, unwind); - GF_VALIDATE_OR_GOTO (this->name, frame, unwind); - GF_VALIDATE_OR_GOTO (this->name, loc, unwind); - GF_VALIDATE_OR_GOTO (this->name, loc->inode, unwind); - - fd = fd_anonymous (loc->inode); - if (!fd) { - gf_msg (this->name, GF_LOG_ERROR, 0, - BRS_MSG_CREATE_ANONYMOUS_FD_FAILED, "failed to create " - "anonymous fd for the inode %s", - uuid_utoa (loc->inode->gfid)); - goto unwind; - } - - ret = br_stub_need_versioning (this, fd, &inc_version, &modified, &ctx); - if (ret) - goto cleanup_fd; - - ret = br_stub_check_bad_object (this, fd->inode, &op_ret, &op_errno); - if (ret) - goto unwind; - - if (!inc_version && modified) - goto wind; - - ret = br_stub_versioning_prep (frame, this, fd, ctx); - if (ret) - goto cleanup_fd; - - local = frame->local; - if (!inc_version) { - br_stub_fill_local (local, NULL, fd, fd->inode, - fd->inode->gfid, BR_STUB_NO_VERSIONING, 0); - cbk = br_stub_truncate_cbk; - goto wind; - } - - stub = fop_truncate_stub (frame, br_stub_truncate_resume, loc, offset, - xdata); - if (!stub) { - gf_msg (this->name, GF_LOG_ERROR, 0, BRS_MSG_STUB_ALLOC_FAILED, - "failed to allocate stub for truncate fop (gfid: %s), " - "unwinding", uuid_utoa (fd->inode->gfid)); - goto cleanup_local; - } - - return br_stub_perform_incversioning (this, frame, stub, fd, ctx); +br_stub_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) +{ + br_stub_local_t *local = NULL; + call_stub_t *stub = NULL; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + gf_boolean_t inc_version = _gf_false; + gf_boolean_t modified = _gf_false; + br_stub_inode_ctx_t *ctx = NULL; + int32_t ret = -1; + fd_t *fd = NULL; + fop_truncate_cbk_t cbk = default_truncate_cbk; + br_stub_private_t *priv = NULL; + + GF_VALIDATE_OR_GOTO("bit-rot-stub", this, unwind); + GF_VALIDATE_OR_GOTO(this->name, this->private, unwind); + GF_VALIDATE_OR_GOTO(this->name, frame, unwind); + GF_VALIDATE_OR_GOTO(this->name, loc, unwind); + GF_VALIDATE_OR_GOTO(this->name, loc->inode, unwind); + + priv = this->private; + if (!priv->do_versioning) + goto wind; + + fd = fd_anonymous(loc->inode); + if (!fd) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_CREATE_ANONYMOUS_FD_FAILED, + "inode-gfid=%s", uuid_utoa(loc->inode->gfid), NULL); + goto unwind; + } + + ret = br_stub_need_versioning(this, fd, &inc_version, &modified, &ctx); + if (ret) + goto cleanup_fd; + + ret = br_stub_check_bad_object(this, fd->inode, &op_ret, &op_errno); + if (ret) + goto unwind; + + if (!inc_version && modified) + goto wind; + + ret = br_stub_versioning_prep(frame, this, fd, ctx); + if (ret) + goto cleanup_fd; + + local = frame->local; + if (!inc_version) { + br_stub_fill_local(local, NULL, fd, fd->inode, fd->inode->gfid, + BR_STUB_NO_VERSIONING, 0); + cbk = br_stub_truncate_cbk; + goto wind; + } + + stub = fop_truncate_stub(frame, br_stub_truncate_resume, loc, offset, + xdata); + if (!stub) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_STUB_ALLOC_FAILED, + "truncate gfid=%s", uuid_utoa(fd->inode->gfid), NULL); + goto cleanup_local; + } - wind: - STACK_WIND (frame, cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); - fd_unref (fd); - return 0; + return br_stub_perform_incversioning(this, frame, stub, fd, ctx); - cleanup_local: - br_stub_cleanup_local (local); - br_stub_dealloc_local (local); - cleanup_fd: - fd_unref (fd); - unwind: - frame->local = NULL; - STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, NULL, NULL, - NULL); +wind: + STACK_WIND(frame, cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->truncate, + loc, offset, xdata); + if (fd) + fd_unref(fd); + return 0; + +cleanup_local: + br_stub_cleanup_local(local); + br_stub_dealloc_local(local); +cleanup_fd: + fd_unref(fd); +unwind: + frame->local = NULL; + STACK_UNWIND_STRICT(truncate, frame, op_ret, op_errno, NULL, NULL, NULL); - return 0; + return 0; } /** }}} */ - /** {{{ */ /* open() */ @@ -2051,62 +2397,70 @@ br_stub_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, */ int -br_stub_open (call_frame_t *frame, xlator_t *this, - loc_t *loc, int32_t flags, fd_t *fd, dict_t *xdata) -{ - int32_t ret = -1; - br_stub_inode_ctx_t *ctx = NULL; - uint64_t ctx_addr = 0; - int32_t op_ret = -1; - int32_t op_errno = EINVAL; - - GF_VALIDATE_OR_GOTO ("bit-rot-stub", this, unwind); - GF_VALIDATE_OR_GOTO (this->name, loc, unwind); - GF_VALIDATE_OR_GOTO (this->name, fd, unwind); - GF_VALIDATE_OR_GOTO (this->name, fd->inode, unwind); - - ret = br_stub_get_inode_ctx (this, fd->inode, &ctx_addr); +br_stub_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + fd_t *fd, dict_t *xdata) +{ + int32_t ret = -1; + br_stub_inode_ctx_t *ctx = NULL; + uint64_t ctx_addr = 0; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + br_stub_private_t *priv = NULL; + unsigned long version = BITROT_DEFAULT_CURRENT_VERSION; + + GF_VALIDATE_OR_GOTO("bit-rot-stub", this, unwind); + GF_VALIDATE_OR_GOTO(this->name, this->private, unwind); + GF_VALIDATE_OR_GOTO(this->name, loc, unwind); + GF_VALIDATE_OR_GOTO(this->name, fd, unwind); + GF_VALIDATE_OR_GOTO(this->name, fd->inode, unwind); + + priv = this->private; + + if (!priv->do_versioning) + goto wind; + + ret = br_stub_get_inode_ctx(this, fd->inode, &ctx_addr); + if (ret) { + ret = br_stub_init_inode_versions(this, fd, fd->inode, version, + _gf_true, _gf_false, &ctx_addr); if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - BRS_MSG_GET_INODE_CONTEXT_FAILED, "failed to get the " - "inode context for the file %s (gfid: %s)", loc->path, - uuid_utoa (fd->inode->gfid)); - goto unwind; + gf_smsg(this->name, GF_LOG_ERROR, 0, + BRS_MSG_GET_INODE_CONTEXT_FAILED, "path=%s", loc->path, + "gfid=%s", uuid_utoa(fd->inode->gfid), NULL); + goto unwind; } + } - ctx = (br_stub_inode_ctx_t *)(long)ctx_addr; + ctx = (br_stub_inode_ctx_t *)(long)ctx_addr; - ret = br_stub_check_bad_object (this, fd->inode, &op_ret, &op_errno); - if (ret) - goto unwind; + ret = br_stub_check_bad_object(this, fd->inode, &op_ret, &op_errno); + if (ret) + goto unwind; - if (frame->root->pid == GF_CLIENT_PID_SCRUB) - goto wind; + if (frame->root->pid == GF_CLIENT_PID_SCRUB) + goto wind; - if (flags == O_RDONLY) - goto wind; + if (flags == O_RDONLY) + goto wind; - ret = br_stub_add_fd_to_inode (this, fd, ctx); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - BRS_MSG_ADD_FD_TO_LIST_FAILED, - "failed add fd to the list (gfid: %s)", - uuid_utoa (fd->inode->gfid)); - goto unwind; - } + ret = br_stub_add_fd_to_inode(this, fd, ctx); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ADD_FD_TO_LIST_FAILED, + "gfid=%s", uuid_utoa(fd->inode->gfid), NULL); + goto unwind; + } wind: - STACK_WIND (frame, default_open_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->open, loc, flags, fd, xdata); - return 0; + STACK_WIND(frame, default_open_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); + return 0; unwind: - STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, NULL, NULL); - return 0; + STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, NULL, NULL); + return 0; } /** }}} */ - /** {{{ */ /* creat() */ @@ -2116,130 +2470,137 @@ unwind: * fd to the inode context fd tracking list. */ int32_t -br_stub_add_fd_to_inode (xlator_t *this, fd_t *fd, br_stub_inode_ctx_t *ctx) +br_stub_add_fd_to_inode(xlator_t *this, fd_t *fd, br_stub_inode_ctx_t *ctx) { - int32_t ret = -1; - br_stub_fd_t *br_stub_fd = NULL; + int32_t ret = -1; + br_stub_fd_t *br_stub_fd = NULL; - ret = br_stub_require_release_call (this, fd, &br_stub_fd); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - BRS_MSG_SET_FD_CONTEXT_FAILED, "failed to set the fd " - "context for the file (gfid: %s)", - uuid_utoa (fd->inode->gfid)); - goto out; - } + ret = br_stub_require_release_call(this, fd, &br_stub_fd); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_SET_FD_CONTEXT_FAILED, + "gfid=%s", uuid_utoa(fd->inode->gfid), NULL); + goto out; + } - LOCK (&fd->inode->lock); - { - list_add_tail (&ctx->fd_list, &br_stub_fd->list); - } - UNLOCK (&fd->inode->lock); + LOCK(&fd->inode->lock); + { + list_add_tail(&ctx->fd_list, &br_stub_fd->list); + } + UNLOCK(&fd->inode->lock); - ret = 0; + ret = 0; out: - return ret; + return ret; } int -br_stub_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, fd_t *fd, inode_t *inode, - struct iatt *stbuf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - int32_t ret = 0; - uint64_t ctx_addr = 0; - br_stub_inode_ctx_t *ctx = NULL; - unsigned long version = BITROT_DEFAULT_CURRENT_VERSION; - - if (op_ret < 0) - goto unwind; - - ret = br_stub_get_inode_ctx (this, fd->inode, &ctx_addr); - if (ret < 0) { - ret = br_stub_init_inode_versions (this, fd, inode, version, - _gf_true, _gf_false); - if (ret) { - op_ret = -1; - op_errno = EINVAL; - } - } else { - ctx = (br_stub_inode_ctx_t *)(long)ctx_addr; - ret = br_stub_add_fd_to_inode (this, fd, ctx); +br_stub_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, fd_t *fd, inode_t *inode, + struct iatt *stbuf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + int32_t ret = 0; + uint64_t ctx_addr = 0; + br_stub_inode_ctx_t *ctx = NULL; + unsigned long version = BITROT_DEFAULT_CURRENT_VERSION; + br_stub_private_t *priv = NULL; + + priv = this->private; + + if (op_ret < 0) + goto unwind; + + if (!priv->do_versioning) + goto unwind; + + ret = br_stub_get_inode_ctx(this, fd->inode, &ctx_addr); + if (ret < 0) { + ret = br_stub_init_inode_versions(this, fd, inode, version, _gf_true, + _gf_false, &ctx_addr); + if (ret) { + op_ret = -1; + op_errno = EINVAL; } + } else { + ctx = (br_stub_inode_ctx_t *)(long)ctx_addr; + ret = br_stub_add_fd_to_inode(this, fd, ctx); + } unwind: - STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, - fd, inode, stbuf, preparent, postparent, xdata); - return 0; + STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, stbuf, + preparent, postparent, xdata); + return 0; } int -br_stub_create (call_frame_t *frame, - xlator_t *this, loc_t *loc, int32_t flags, - mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) -{ - GF_VALIDATE_OR_GOTO ("bit-rot-stub", this, unwind); - GF_VALIDATE_OR_GOTO (this->name, loc, unwind); - GF_VALIDATE_OR_GOTO (this->name, loc->inode, unwind); - GF_VALIDATE_OR_GOTO (this->name, fd, unwind); - GF_VALIDATE_OR_GOTO (this->name, fd->inode, unwind); - - STACK_WIND (frame, br_stub_create_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->create, - loc, flags, mode, umask, fd, xdata); - return 0; +br_stub_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) +{ + GF_VALIDATE_OR_GOTO("bit-rot-stub", this, unwind); + GF_VALIDATE_OR_GOTO(this->name, loc, unwind); + GF_VALIDATE_OR_GOTO(this->name, loc->inode, unwind); + GF_VALIDATE_OR_GOTO(this->name, fd, unwind); + GF_VALIDATE_OR_GOTO(this->name, fd->inode, unwind); + + STACK_WIND(frame, br_stub_create_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd, + xdata); + return 0; unwind: - STACK_UNWIND_STRICT (create, frame, -1, EINVAL, NULL, NULL, NULL, NULL, - NULL, NULL); - return 0; + STACK_UNWIND_STRICT(create, frame, -1, EINVAL, NULL, NULL, NULL, NULL, NULL, + NULL); + return 0; } int -br_stub_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, inode_t *inode, - struct iatt *stbuf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - int32_t ret = -1; - unsigned long version = BITROT_DEFAULT_CURRENT_VERSION; - - if (op_ret < 0) - goto unwind; - - ret = br_stub_init_inode_versions (this, NULL, inode, version, - _gf_true, _gf_false); - /** - * Like lookup, if init_inode_versions fail, return EINVAL - */ - if (ret) { - op_ret = -1; - op_errno = EINVAL; - } +br_stub_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, inode_t *inode, struct iatt *stbuf, + struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) +{ + int32_t ret = -1; + unsigned long version = BITROT_DEFAULT_CURRENT_VERSION; + br_stub_private_t *priv = NULL; + + priv = this->private; + + if (op_ret < 0) + goto unwind; + + if (!priv->do_versioning) + goto unwind; + + ret = br_stub_init_inode_versions(this, NULL, inode, version, _gf_true, + _gf_false, NULL); + /** + * Like lookup, if init_inode_versions fail, return EINVAL + */ + if (ret) { + op_ret = -1; + op_errno = EINVAL; + } unwind: - STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, - inode, stbuf, preparent, postparent, xdata); - return 0; + STACK_UNWIND_STRICT(mknod, frame, op_ret, op_errno, inode, stbuf, preparent, + postparent, xdata); + return 0; } int -br_stub_mknod (call_frame_t *frame, xlator_t *this, - loc_t *loc, mode_t mode, dev_t dev, mode_t umask, dict_t *xdata) +br_stub_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + dev_t dev, mode_t umask, dict_t *xdata) { - GF_VALIDATE_OR_GOTO ("bit-rot-stub", this, unwind); - GF_VALIDATE_OR_GOTO (this->name, loc, unwind); - GF_VALIDATE_OR_GOTO (this->name, loc->inode, unwind); + GF_VALIDATE_OR_GOTO("bit-rot-stub", this, unwind); + GF_VALIDATE_OR_GOTO(this->name, loc, unwind); + GF_VALIDATE_OR_GOTO(this->name, loc->inode, unwind); - STACK_WIND (frame, br_stub_mknod_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->mknod, - loc, mode, dev, umask, xdata); - return 0; + STACK_WIND(frame, br_stub_mknod_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mknod, loc, mode, dev, umask, xdata); + return 0; unwind: - STACK_UNWIND_STRICT (mknod, frame, -1, EINVAL, NULL, NULL, NULL, - NULL, NULL); - return 0; + STACK_UNWIND_STRICT(mknod, frame, -1, EINVAL, NULL, NULL, NULL, NULL, NULL); + return 0; } /** }}} */ @@ -2269,272 +2630,646 @@ unwind: * creattion failure as the lookup failure. */ static int32_t -br_stub_lookup_version (xlator_t *this, - uuid_t gfid, inode_t *inode, dict_t *xattr) +br_stub_lookup_version(xlator_t *this, uuid_t gfid, inode_t *inode, + dict_t *xattr) +{ + unsigned long version = 0; + br_version_t *obuf = NULL; + br_signature_t *sbuf = NULL; + br_vxattr_status_t status; + gf_boolean_t bad_object = _gf_false; + + /** + * versioning xattrs were requested from POSIX. if available, figure + * out the correct version to use in the inode context (start with + * the default version if unavailable). As of now versions are not + * persisted on-disk. The inode is marked dirty, so that the first + * operation (such as write(), etc..) triggers synchronization to + * disk. + */ + status = br_version_xattr_state(xattr, &obuf, &sbuf, &bad_object); + version = ((status == BR_VXATTR_STATUS_FULL) || + (status == BR_VXATTR_STATUS_UNSIGNED)) + ? obuf->ongoingversion + : BITROT_DEFAULT_CURRENT_VERSION; + + /** + * If signature is there, but version is not there then that status is + * is treated as INVALID. So in that case, we should not initialize the + * inode context with wrong version names etc. + */ + if (status == BR_VXATTR_STATUS_INVALID) + return -1; + + return br_stub_init_inode_versions(this, NULL, inode, version, _gf_true, + bad_object, NULL); +} + +/** {{{ */ + +int32_t +br_stub_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, + dict_t *xdata) { - unsigned long version = 0; - br_version_t *obuf = NULL; - br_signature_t *sbuf = NULL; - br_vxattr_status_t status; - gf_boolean_t bad_object = _gf_false; + br_stub_private_t *priv = NULL; + br_stub_fd_t *fd_ctx = NULL; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; - /** - * versioning xattrs were requested from POSIX. if available, figure - * out the correct version to use in the inode context (start with - * the default version if unavailable). As of now versions are not - * persisted on-disk. The inode is marked dirty, so that the first - * operation (such as write(), etc..) triggers synchronization to - * disk. - */ - status = br_version_xattr_state (xattr, &obuf, &sbuf, &bad_object); - version = ((status == BR_VXATTR_STATUS_FULL) - || (status == BR_VXATTR_STATUS_UNSIGNED)) - ? obuf->ongoingversion : BITROT_DEFAULT_CURRENT_VERSION; + priv = this->private; + if (gf_uuid_compare(fd->inode->gfid, priv->bad_object_dir_gfid)) + goto normal; - /** - * If signature is there, but version is not therem then that status is - * is treated as INVALID. So in that case, we should not initialize the - * inode context with wrong version names etc. - */ - if (status == BR_VXATTR_STATUS_INVALID) - return -1; + fd_ctx = br_stub_fd_new(); + if (!fd_ctx) { + op_errno = ENOMEM; + goto unwind; + } - return br_stub_init_inode_versions (this, NULL, inode, version, - _gf_true, bad_object); -} + fd_ctx->bad_object.dir_eof = -1; + fd_ctx->bad_object.dir = sys_opendir(priv->stub_basepath); + if (!fd_ctx->bad_object.dir) { + op_errno = errno; + goto err_freectx; + } + op_ret = br_stub_fd_ctx_set(this, fd, fd_ctx); + if (!op_ret) + goto unwind; -/** {{{ */ + sys_closedir(fd_ctx->bad_object.dir); + +err_freectx: + GF_FREE(fd_ctx); +unwind: + STACK_UNWIND_STRICT(opendir, frame, op_ret, op_errno, fd, NULL); + return 0; + +normal: + STACK_WIND(frame, default_opendir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->opendir, loc, fd, xdata); + return 0; +} + +int32_t +br_stub_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *xdata) +{ + call_stub_t *stub = NULL; + br_stub_private_t *priv = NULL; + + priv = this->private; + if (!priv->do_versioning) + goto out; + + if (gf_uuid_compare(fd->inode->gfid, priv->bad_object_dir_gfid)) + goto out; + stub = fop_readdir_stub(frame, br_stub_readdir_wrapper, fd, size, off, + xdata); + if (!stub) { + STACK_UNWIND_STRICT(readdir, frame, -1, ENOMEM, NULL, NULL); + return 0; + } + br_stub_worker_enqueue(this, stub); + return 0; +out: + STACK_WIND(frame, default_readdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdir, fd, size, off, xdata); + return 0; +} int -br_stub_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, gf_dirent_t *entries, - dict_t *dict) -{ - int32_t ret = 0; - uint64_t ctxaddr = 0; - gf_dirent_t *entry = NULL; - - if (op_ret < 0) - goto unwind; - - list_for_each_entry (entry, &entries->list, list) { - if ((strcmp (entry->d_name, ".") == 0) - || (strcmp (entry->d_name, "..") == 0)) - continue; - - if (!IA_ISREG (entry->d_stat.ia_type)) - continue; - - ret = br_stub_get_inode_ctx (this, entry->inode, &ctxaddr); - if (ret < 0) - ctxaddr = 0; - if (ctxaddr) { /* already has the context */ - br_stub_remove_vxattrs (entry->dict); - continue; - } - - ret = br_stub_lookup_version - (this, entry->inode->gfid, entry->inode, entry->dict); - br_stub_remove_vxattrs (entry->dict); - if (ret) { - /** - * there's no per-file granularity support in case of - * failure. let's fail the entire request for now.. - */ - break; - } +br_stub_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, gf_dirent_t *entries, + dict_t *dict) +{ + int32_t ret = 0; + uint64_t ctxaddr = 0; + gf_dirent_t *entry = NULL; + br_stub_private_t *priv = NULL; + gf_boolean_t ver_enabled = _gf_false; + + BR_STUB_VER_ENABLED_IN_CALLPATH(frame, ver_enabled); + priv = this->private; + BR_STUB_VER_COND_GOTO(priv, (!ver_enabled), unwind); + + if (op_ret < 0) + goto unwind; + + list_for_each_entry(entry, &entries->list, list) + { + if ((strcmp(entry->d_name, ".") == 0) || + (strcmp(entry->d_name, "..") == 0)) + continue; + + if (!IA_ISREG(entry->d_stat.ia_type)) + continue; + + /* + * Readdirp for most part is a bulk lookup for all the entries + * present in the directory being read. Ideally, for each + * entry, the handling should be similar to that of a lookup + * callback. But for now, just keeping this as it has been + * until now (which means, this comment has been added much + * later as part of a change that wanted to send the flag + * of true/false to br_stub_remove_vxattrs to indicate whether + * the bad-object xattr should be removed from the entry->dict + * or not). Until this change, the function br_stub_remove_vxattrs + * was just removing all the xattrs associated with bit-rot-stub + * (like version, bad-object, signature etc). But, there are + * scenarios where we only want to send bad-object xattr and not + * others. So this comment is part of that change which also + * mentions about another possible change that might be needed + * in future. + * But for now, adding _gf_true means functionally its same as + * what this function was doing before. Just remove all the stub + * related xattrs. + */ + ret = br_stub_get_inode_ctx(this, entry->inode, &ctxaddr); + if (ret < 0) + ctxaddr = 0; + if (ctxaddr) { /* already has the context */ + br_stub_remove_vxattrs(entry->dict, _gf_true); + continue; } + ret = br_stub_lookup_version(this, entry->inode->gfid, entry->inode, + entry->dict); + br_stub_remove_vxattrs(entry->dict, _gf_true); if (ret) { - op_ret = -1; - op_errno = EINVAL; + /** + * there's no per-file granularity support in case of + * failure. let's fail the entire request for now.. + */ + break; } + } - unwind: - STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, dict); + if (ret) { + op_ret = -1; + op_errno = EINVAL; + } - return 0; +unwind: + STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, entries, dict); + + return 0; } int -br_stub_readdirp (call_frame_t *frame, xlator_t *this, - fd_t *fd, size_t size, off_t offset, dict_t *dict) +br_stub_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, dict_t *dict) { - int32_t ret = -1; - int op_errno = 0; - gf_boolean_t xref = _gf_false; + int32_t ret = -1; + int op_errno = 0; + gf_boolean_t xref = _gf_false; + br_stub_private_t *priv = NULL; - op_errno = ENOMEM; - if (!dict) { - dict = dict_new (); - if (!dict) - goto unwind; - } else { - dict = dict_ref (dict); - } + priv = this->private; + BR_STUB_VER_NOT_ACTIVE_THEN_GOTO(frame, priv, wind); - xref = _gf_true; - - op_errno = EINVAL; - ret = dict_set_uint32 (dict, BITROT_CURRENT_VERSION_KEY, 0); - if (ret) - goto unwind; - ret = dict_set_uint32 (dict, BITROT_SIGNING_VERSION_KEY, 0); - if (ret) - goto unwind; - ret = dict_set_uint32 (dict, BITROT_OBJECT_BAD_KEY, 0); - if (ret) - goto unwind; + op_errno = ENOMEM; + if (!dict) { + dict = dict_new(); + if (!dict) + goto unwind; + } else { + dict = dict_ref(dict); + } + + xref = _gf_true; + + op_errno = EINVAL; + ret = dict_set_uint32(dict, BITROT_CURRENT_VERSION_KEY, 0); + if (ret) + goto unwind; + ret = dict_set_uint32(dict, BITROT_SIGNING_VERSION_KEY, 0); + if (ret) + goto unwind; + ret = dict_set_uint32(dict, BITROT_OBJECT_BAD_KEY, 0); + if (ret) + goto unwind; - STACK_WIND (frame, br_stub_readdirp_cbk, FIRST_CHILD (this), - FIRST_CHILD(this)->fops->readdirp, fd, size, - offset, dict); - goto unref_dict; +wind: + STACK_WIND(frame, br_stub_readdirp_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdirp, fd, size, offset, dict); + goto unref_dict; - unwind: - STACK_UNWIND_STRICT (readdirp, frame, -1, op_errno, NULL, NULL); - return 0; +unwind: + if (frame->local == (void *)0x1) + frame->local = NULL; + STACK_UNWIND_STRICT(readdirp, frame, -1, op_errno, NULL, NULL); + return 0; - unref_dict: - if (xref) - dict_unref (dict); - return 0; +unref_dict: + if (xref) + dict_unref(dict); + return 0; } /** }}} */ - /** {{{ */ /* lookup() */ -int -br_stub_lookup_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int op_ret, int op_errno, inode_t *inode, - struct iatt *stbuf, dict_t *xattr, struct iatt *postparent) +/** + * This function mainly handles the ENOENT error for the bad objects. Though + * br_stub_forget () handles removal of the link for the bad object from the + * quarantine directory, its better to handle it in lookup as well, where + * a failed lookup on a bad object with ENOENT, will trigger deletion of the + * link for the bad object from quarantine directory. So whoever comes first + * either forget () or lookup () will take care of removing the link. + */ +void +br_stub_handle_lookup_error(xlator_t *this, inode_t *inode, int32_t op_errno) { - int32_t ret = 0; + int32_t ret = -1; + uint64_t ctx_addr = 0; + br_stub_inode_ctx_t *ctx = NULL; - if (op_ret < 0) - goto unwind; - if (!IA_ISREG (stbuf->ia_type)) - goto unwind; + if (op_errno != ENOENT) + goto out; - /** - * If the object is bad, then "bad inode" marker has to be sent back - * in resoinse, for revalidated lookups as well. Some xlators such as - * quick-read might cache the data in revalidated lookup as fresh - * lookup would anyway have sent "bad inode" marker. - * In general send bad inode marker for every lookup operation on the - * bad object. + if (!inode_is_linked(inode)) + goto out; + + ret = br_stub_get_inode_ctx(this, inode, &ctx_addr); + if (ret) + goto out; + + ctx = (br_stub_inode_ctx_t *)(long)ctx_addr; + + LOCK(&inode->lock); + { + if (__br_stub_is_bad_object(ctx)) + (void)br_stub_del(this, inode->gfid); + } + UNLOCK(&inode->lock); + + if (__br_stub_is_bad_object(ctx)) { + /* File is not present, might be deleted for recovery, + * del the bitrot inode context */ - if (cookie != (void *) BR_STUB_REQUEST_COOKIE) { - ret = br_stub_mark_xdata_bad_object (this, inode, xattr); - if (ret) { - op_ret = -1; - op_errno = EIO; - goto unwind; - } - - goto delkey; + ctx_addr = 0; + inode_ctx_del(inode, this, &ctx_addr); + if (ctx_addr) { + ctx = (br_stub_inode_ctx_t *)(long)ctx_addr; + GF_FREE(ctx); } + } - ret = br_stub_lookup_version (this, stbuf->ia_gfid, inode, xattr); - if (ret < 0) { - op_ret = -1; - op_errno = EINVAL; - goto delkey; - } +out: + return; +} - /** - * If the object is bad, send "bad inode" marker back in response - * for xlator(s) to act accordingly (such as quick-read, etc..) +int +br_stub_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf, + dict_t *xattr, struct iatt *postparent) +{ + int32_t ret = 0; + br_stub_private_t *priv = NULL; + gf_boolean_t ver_enabled = _gf_false; + gf_boolean_t remove_bad_file_marker = _gf_true; + + BR_STUB_VER_ENABLED_IN_CALLPATH(frame, ver_enabled); + priv = this->private; + + if (op_ret < 0) { + (void)br_stub_handle_lookup_error(this, inode, op_errno); + + /* + * If the lookup error is not ENOENT, then it is better + * to send the bad file marker to the higher layer (if + * it has been set) */ - ret = br_stub_mark_xdata_bad_object (this, inode, xattr); + if (op_errno != ENOENT) + remove_bad_file_marker = _gf_false; + goto delkey; + } + + BR_STUB_VER_COND_GOTO(priv, (!ver_enabled), delkey); + + if (!IA_ISREG(stbuf->ia_type)) + goto unwind; + + /** + * If the object is bad, then "bad inode" marker has to be sent back + * in resoinse, for revalidated lookups as well. Some xlators such as + * quick-read might cache the data in revalidated lookup as fresh + * lookup would anyway have sent "bad inode" marker. + * In general send bad inode marker for every lookup operation on the + * bad object. + */ + if (cookie != (void *)BR_STUB_REQUEST_COOKIE) { + ret = br_stub_mark_xdata_bad_object(this, inode, xattr); if (ret) { - /** - * aaha! bad object, but sorry we would not - * satisfy the request on allocation failures. - */ - op_ret = -1; - op_errno = EIO; - goto unwind; - } + op_ret = -1; + op_errno = EIO; + /* + * This flag ensures that in the label @delkey below, + * bad file marker is not removed from the dictinary, + * but other virtual xattrs (such as version, signature) + * are removed. + */ + remove_bad_file_marker = _gf_false; + } + goto delkey; + } + + ret = br_stub_lookup_version(this, stbuf->ia_gfid, inode, xattr); + if (ret < 0) { + op_ret = -1; + op_errno = EINVAL; + goto delkey; + } + + /** + * If the object is bad, send "bad inode" marker back in response + * for xlator(s) to act accordingly (such as quick-read, etc..) + */ + ret = br_stub_mark_xdata_bad_object(this, inode, xattr); + if (ret) { + /** + * aaha! bad object, but sorry we would not + * satisfy the request on allocation failures. + */ + op_ret = -1; + op_errno = EIO; + goto delkey; + } delkey: - br_stub_remove_vxattrs (xattr); + br_stub_remove_vxattrs(xattr, remove_bad_file_marker); unwind: - STACK_UNWIND_STRICT (lookup, frame, - op_ret, op_errno, inode, stbuf, xattr, postparent); + STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, stbuf, xattr, + postparent); + return 0; +} + +int +br_stub_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +{ + int32_t ret = 0; + int op_errno = 0; + void *cookie = NULL; + uint64_t ctx_addr = 0; + gf_boolean_t xref = _gf_false; + br_stub_private_t *priv = NULL; + call_stub_t *stub = NULL; + + GF_VALIDATE_OR_GOTO("bit-rot-stub", this, unwind); + GF_VALIDATE_OR_GOTO(this->name, loc, unwind); + GF_VALIDATE_OR_GOTO(this->name, loc->inode, unwind); + + priv = this->private; + + BR_STUB_VER_NOT_ACTIVE_THEN_GOTO(frame, priv, wind); + + if (!gf_uuid_compare(loc->gfid, priv->bad_object_dir_gfid) || + !gf_uuid_compare(loc->pargfid, priv->bad_object_dir_gfid)) { + stub = fop_lookup_stub(frame, br_stub_lookup_wrapper, loc, xdata); + if (!stub) { + op_errno = ENOMEM; + goto unwind; + } + br_stub_worker_enqueue(this, stub); return 0; + } + + ret = br_stub_get_inode_ctx(this, loc->inode, &ctx_addr); + if (ret < 0) + ctx_addr = 0; + if (ctx_addr != 0) + goto wind; + + /** + * fresh lookup: request version keys from POSIX + */ + op_errno = ENOMEM; + if (!xdata) { + xdata = dict_new(); + if (!xdata) + goto unwind; + } else { + xdata = dict_ref(xdata); + } + + xref = _gf_true; + + /** + * Requesting both xattrs provides a way of sanity checking the + * object. Anomaly checking is done in cbk by examining absence + * of either or both xattrs. + */ + op_errno = EINVAL; + ret = dict_set_uint32(xdata, BITROT_CURRENT_VERSION_KEY, 0); + if (ret) + goto unwind; + ret = dict_set_uint32(xdata, BITROT_SIGNING_VERSION_KEY, 0); + if (ret) + goto unwind; + ret = dict_set_uint32(xdata, BITROT_OBJECT_BAD_KEY, 0); + if (ret) + goto unwind; + cookie = (void *)BR_STUB_REQUEST_COOKIE; + +wind: + STACK_WIND_COOKIE(frame, br_stub_lookup_cbk, cookie, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, loc, xdata); + goto dealloc_dict; + +unwind: + if (frame->local == (void *)0x1) + frame->local = NULL; + STACK_UNWIND_STRICT(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL); +dealloc_dict: + if (xref) + dict_unref(xdata); + return 0; } +/** }}} */ + +/** {{{ */ + +/* stat */ int -br_stub_lookup (call_frame_t *frame, - xlator_t *this, loc_t *loc, dict_t *xdata) +br_stub_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { - int32_t ret = 0; - int op_errno = 0; - void *cookie = NULL; - uint64_t ctx_addr = 0; - gf_boolean_t xref = _gf_false; + int32_t ret = 0; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + br_stub_private_t *priv = NULL; - GF_VALIDATE_OR_GOTO ("bit-rot-stub", this, unwind); - GF_VALIDATE_OR_GOTO (this->name, loc, unwind); - GF_VALIDATE_OR_GOTO (this->name, loc->inode, unwind); + priv = this->private; - ret = br_stub_get_inode_ctx (this, loc->inode, &ctx_addr); - if (ret < 0) - ctx_addr = 0; - if (ctx_addr != 0) - goto wind; + if (!priv->do_versioning) + goto wind; + + if (!IA_ISREG(loc->inode->ia_type)) + goto wind; + + ret = br_stub_check_bad_object(this, loc->inode, &op_ret, &op_errno); + if (ret) + goto unwind; + +wind: + STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->stat, + loc, xdata); + return 0; +unwind: + STACK_UNWIND_STRICT(stat, frame, op_ret, op_errno, NULL, NULL); + return 0; +} + +/* fstat */ +int +br_stub_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) +{ + int32_t ret = 0; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + br_stub_private_t *priv = NULL; + + priv = this->private; + + if (!priv->do_versioning) + goto wind; + + if (!IA_ISREG(fd->inode->ia_type)) + goto wind; + + ret = br_stub_check_bad_object(this, fd->inode, &op_ret, &op_errno); + if (ret) + goto unwind; + +wind: + STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fstat, + fd, xdata); + return 0; + +unwind: + STACK_UNWIND_STRICT(fstat, frame, op_ret, op_errno, NULL, NULL); + return 0; +} + +/** }}} */ + +/** {{{ */ + +/* unlink() */ + +int +br_stub_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + br_stub_local_t *local = NULL; + inode_t *inode = NULL; + uint64_t ctx_addr = 0; + br_stub_inode_ctx_t *ctx = NULL; + int32_t ret = -1; + br_stub_private_t *priv = NULL; + gf_boolean_t ver_enabled = _gf_false; + + BR_STUB_VER_ENABLED_IN_CALLPATH(frame, ver_enabled); + priv = this->private; + BR_STUB_VER_COND_GOTO(priv, (!ver_enabled), unwind); + + local = frame->local; + frame->local = NULL; + + if (op_ret < 0) + goto unwind; + + if (!local) { + gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_NULL_LOCAL, NULL); + goto unwind; + } + inode = local->u.context.inode; + if (!IA_ISREG(inode->ia_type)) + goto unwind; + + ret = br_stub_get_inode_ctx(this, inode, &ctx_addr); + if (ret) { /** - * fresh lookup: request version keys from POSIX + * If the inode is bad AND context is not there, then there + * is a possibility of the gfid of the object being listed + * in the quarantine directory and will be shown in the + * bad objects list. So continuing with the fop with a + * warning log. The entry from the quarantine directory + * has to be removed manually. Its not a good idea to fail + * the fop, as the object has already been deleted. */ - op_errno = ENOMEM; - if (!xdata) { - xdata = dict_new (); - if (!xdata) - goto unwind; - } else { - xdata = dict_ref (xdata); - } + gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_GET_INODE_CONTEXT_FAILED, + "inode-gfid=%s", uuid_utoa(inode->gfid), NULL); + goto unwind; + } - xref = _gf_true; + ctx = (br_stub_inode_ctx_t *)(long)ctx_addr; + LOCK(&inode->lock); + { /** - * Requesting both xattrs provides a way of sanity checking the - * object. Anomaly checking is done in cbk by examining absence - * of either or both xattrs. + * Ignoring the return value of br_stub_del (). + * There is not much that can be done if unlinking + * of the entry in the quarantine directory fails. + * The failure is logged. */ - op_errno = EINVAL; - ret = dict_set_uint32 (xdata, BITROT_CURRENT_VERSION_KEY, 0); - if (ret) - goto unwind; - ret = dict_set_uint32 (xdata, BITROT_SIGNING_VERSION_KEY, 0); - if (ret) - goto unwind; - ret = dict_set_uint32 (xdata, BITROT_OBJECT_BAD_KEY, 0); - if (ret) - goto unwind; - cookie = (void *) BR_STUB_REQUEST_COOKIE; + if (__br_stub_is_bad_object(ctx)) + (void)br_stub_del(this, inode->gfid); + } + UNLOCK(&inode->lock); - wind: - STACK_WIND_COOKIE (frame, br_stub_lookup_cbk, cookie, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, - loc, xdata); - goto dealloc_dict; +unwind: + STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, preparent, postparent, + xdata); + br_stub_cleanup_local(local); + br_stub_dealloc_local(local); + return 0; +} - unwind: - STACK_UNWIND_STRICT (lookup, frame, - -1, op_errno, NULL, NULL, NULL, NULL); - dealloc_dict: - if (xref) - dict_unref (xdata); - return 0; +int +br_stub_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int flag, + dict_t *xdata) +{ + br_stub_local_t *local = NULL; + int32_t op_ret = -1; + int32_t op_errno = 0; + br_stub_private_t *priv = NULL; + + priv = this->private; + BR_STUB_VER_NOT_ACTIVE_THEN_GOTO(frame, priv, wind); + + local = br_stub_alloc_local(this); + if (!local) { + op_ret = -1; + op_errno = ENOMEM; + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, BRS_MSG_ALLOC_MEM_FAILED, + "local path=%s", loc->path, "gfid=%s", + uuid_utoa(loc->inode->gfid), NULL); + goto unwind; + } + + br_stub_fill_local(local, NULL, NULL, loc->inode, loc->inode->gfid, + BR_STUB_NO_VERSIONING, 0); + + frame->local = local; + +wind: + STACK_WIND(frame, br_stub_unlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, loc, flag, xdata); + return 0; + +unwind: + if (frame->local == (void *)0x1) + frame->local = NULL; + STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, NULL, NULL, NULL); + return 0; } /** }}} */ @@ -2544,19 +3279,20 @@ br_stub_lookup (call_frame_t *frame, /* forget() */ int -br_stub_forget (xlator_t *this, inode_t *inode) +br_stub_forget(xlator_t *this, inode_t *inode) { - uint64_t ctx_addr = 0; - br_stub_inode_ctx_t *ctx = NULL; + uint64_t ctx_addr = 0; + br_stub_inode_ctx_t *ctx = NULL; - inode_ctx_del (inode, this, &ctx_addr); - if (!ctx_addr) - return 0; + inode_ctx_del(inode, this, &ctx_addr); + if (!ctx_addr) + return 0; - ctx = (br_stub_inode_ctx_t *) (long) ctx_addr; - GF_FREE (ctx); + ctx = (br_stub_inode_ctx_t *)(long)ctx_addr; - return 0; + GF_FREE(ctx); + + return 0; } /** }}} */ @@ -2564,60 +3300,58 @@ br_stub_forget (xlator_t *this, inode_t *inode) /** {{{ */ int32_t -br_stub_noop (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +br_stub_noop(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *xdata) { - STACK_DESTROY (frame->root); - return 0; + STACK_DESTROY(frame->root); + return 0; } static void -br_stub_send_ipc_fop (xlator_t *this, fd_t *fd, unsigned long releaseversion, - int sign_info) -{ - int32_t op = 0; - int32_t ret = 0; - dict_t *xdata = NULL; - call_frame_t *frame = NULL; - changelog_event_t ev = {0,}; - - ev.ev_type = CHANGELOG_OP_TYPE_BR_RELEASE; - ev.u.releasebr.version = releaseversion; - ev.u.releasebr.sign_info = sign_info; - gf_uuid_copy (ev.u.releasebr.gfid, fd->inode->gfid); - - xdata = dict_new (); - if (!xdata) { - gf_msg (this->name, GF_LOG_WARNING, ENOMEM, BRS_MSG_NO_MEMORY, - "dict allocation failed: cannot send IPC FOP " - "to changelog"); - goto out; - } - - ret = dict_set_static_bin (xdata, - "RELEASE-EVENT", &ev, CHANGELOG_EV_SIZE); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, 0, BRS_MSG_SET_EVENT_FAILED, - "cannot set release event in dict"); - goto dealloc_dict; - } +br_stub_send_ipc_fop(xlator_t *this, fd_t *fd, unsigned long releaseversion, + int sign_info) +{ + int32_t op = 0; + int32_t ret = 0; + dict_t *xdata = NULL; + call_frame_t *frame = NULL; + changelog_event_t ev = { + 0, + }; + + ev.ev_type = CHANGELOG_OP_TYPE_BR_RELEASE; + ev.u.releasebr.version = releaseversion; + ev.u.releasebr.sign_info = sign_info; + gf_uuid_copy(ev.u.releasebr.gfid, fd->inode->gfid); + + xdata = dict_new(); + if (!xdata) { + gf_smsg(this->name, GF_LOG_WARNING, ENOMEM, BRS_MSG_DICT_ALLOC_FAILED, + NULL); + goto out; + } + + ret = dict_set_static_bin(xdata, "RELEASE-EVENT", &ev, CHANGELOG_EV_SIZE); + if (ret) { + gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SET_EVENT_FAILED, NULL); + goto dealloc_dict; + } - frame = create_frame (this, this->ctx->pool); - if (!frame) { - gf_msg (this->name, GF_LOG_WARNING, 0, - BRS_MSG_CREATE_FRAME_FAILED, - "create_frame() failure"); - goto dealloc_dict; - } + frame = create_frame(this, this->ctx->pool); + if (!frame) { + gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_CREATE_FRAME_FAILED, + NULL); + goto dealloc_dict; + } - op = GF_IPC_TARGET_CHANGELOG; - STACK_WIND (frame, br_stub_noop, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->ipc, op, xdata); + op = GF_IPC_TARGET_CHANGELOG; + STACK_WIND(frame, br_stub_noop, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ipc, op, xdata); - dealloc_dict: - dict_unref (xdata); - out: - return; +dealloc_dict: + dict_unref(xdata); +out: + return; } /** @@ -2628,7 +3362,7 @@ br_stub_send_ipc_fop (xlator_t *this, fd_t *fd, unsigned long releaseversion, * 3) BR_SIGN_QUICK => reopen has happened and this release should trigger sign * 2 events: * 1) GF_FOP_RELEASE - * 2) GF_FOP_WRITE (actually a dummy write fro BitD) + * 2) GF_FOP_WRITE (actually a dummy write for BitD) * * This is how states are changed based on events: * EVENT: GF_FOP_RELEASE: @@ -2641,90 +3375,113 @@ br_stub_send_ipc_fop (xlator_t *this, fd_t *fd, unsigned long releaseversion, * set state = BR_SIGN_QUICK; */ br_sign_state_t -__br_stub_inode_sign_state (br_stub_inode_ctx_t *ctx, - glusterfs_fop_t fop, fd_t *fd) +__br_stub_inode_sign_state(br_stub_inode_ctx_t *ctx, glusterfs_fop_t fop, + fd_t *fd) { - br_sign_state_t sign_info = BR_SIGN_INVALID; - - switch (fop) { + br_sign_state_t sign_info = BR_SIGN_INVALID; + switch (fop) { case GF_FOP_FSETXATTR: - sign_info = ctx->info_sign = BR_SIGN_QUICK; - break; + sign_info = ctx->info_sign = BR_SIGN_QUICK; + break; case GF_FOP_RELEASE: - GF_ASSERT (ctx->info_sign != BR_SIGN_REOPEN_WAIT); + GF_ASSERT(ctx->info_sign != BR_SIGN_REOPEN_WAIT); - if (ctx->info_sign == BR_SIGN_NORMAL) { - sign_info = ctx->info_sign = BR_SIGN_REOPEN_WAIT; - } else { - sign_info = ctx->info_sign; - ctx->info_sign = BR_SIGN_NORMAL; - } + if (ctx->info_sign == BR_SIGN_NORMAL) { + sign_info = ctx->info_sign = BR_SIGN_REOPEN_WAIT; + } else { + sign_info = ctx->info_sign; + ctx->info_sign = BR_SIGN_NORMAL; + } - break; + break; default: - break; - } + break; + } - return sign_info; + return sign_info; } int32_t -br_stub_release (xlator_t *this, fd_t *fd) -{ - int32_t ret = 0; - int32_t flags = 0; - inode_t *inode = NULL; - unsigned long releaseversion = 0; - br_stub_inode_ctx_t *ctx = NULL; - uint64_t tmp = 0; - br_stub_fd_t *br_stub_fd = NULL; - int32_t signinfo = 0; +br_stub_release(xlator_t *this, fd_t *fd) +{ + int32_t ret = 0; + int32_t flags = 0; + inode_t *inode = NULL; + unsigned long releaseversion = 0; + br_stub_inode_ctx_t *ctx = NULL; + uint64_t tmp = 0; + br_stub_fd_t *br_stub_fd = NULL; + int32_t signinfo = 0; + + inode = fd->inode; + + LOCK(&inode->lock); + { + ctx = __br_stub_get_ongoing_version_ctx(this, inode, NULL); + if (ctx == NULL) + goto unblock; + br_stub_fd = br_stub_fd_ctx_get(this, fd); + if (br_stub_fd) { + list_del_init(&br_stub_fd->list); + } + + ret = __br_stub_can_trigger_release(inode, ctx, &releaseversion); + if (!ret) + goto unblock; + + signinfo = __br_stub_inode_sign_state(ctx, GF_FOP_RELEASE, fd); + signinfo = htonl(signinfo); + + /* inode back to initital state: mark dirty */ + if (ctx->info_sign == BR_SIGN_NORMAL) { + __br_stub_mark_inode_dirty(ctx); + __br_stub_unset_inode_modified(ctx); + } + } +unblock: + UNLOCK(&inode->lock); - inode = fd->inode; + if (ret) { + gf_msg_debug(this->name, 0, + "releaseversion: %lu | flags: %d " + "| signinfo: %d", + (unsigned long)ntohl(releaseversion), flags, + ntohl(signinfo)); + br_stub_send_ipc_fop(this, fd, releaseversion, signinfo); + } - LOCK (&inode->lock); - { - ctx = __br_stub_get_ongoing_version_ctx (this, inode, NULL); - if (ctx == NULL) - goto unblock; - br_stub_fd = br_stub_fd_ctx_get (this, fd); - if (br_stub_fd) { - list_del_init (&br_stub_fd->list); - } - - ret = __br_stub_can_trigger_release - (inode, ctx, &releaseversion); - if (!ret) - goto unblock; - - signinfo = __br_stub_inode_sign_state (ctx, GF_FOP_RELEASE, fd); - signinfo = htonl (signinfo); - - /* inode back to initital state: mark dirty */ - if (ctx->info_sign == BR_SIGN_NORMAL) { - __br_stub_mark_inode_dirty (ctx); - __br_stub_unset_inode_modified (ctx); - } - } - unblock: - UNLOCK (&inode->lock); + ret = fd_ctx_del(fd, this, &tmp); + br_stub_fd = (br_stub_fd_t *)(long)tmp; - if (ret) { - gf_msg_debug (this->name, 0, "releaseversion: %lu | flags: %d " - "| signinfo: %d", - (unsigned long) ntohl (releaseversion), flags, - ntohl(signinfo)); - br_stub_send_ipc_fop (this, fd, releaseversion, signinfo); - } + GF_FREE(br_stub_fd); - ret = fd_ctx_del (fd, this, &tmp); - br_stub_fd = (br_stub_fd_t *)(long)tmp; + return 0; +} - GF_FREE (br_stub_fd); +int32_t +br_stub_releasedir(xlator_t *this, fd_t *fd) +{ + br_stub_fd_t *fctx = NULL; + uint64_t ctx = 0; + int ret = 0; - return 0; + ret = fd_ctx_del(fd, this, &ctx); + if (ret < 0) + goto out; + + fctx = (br_stub_fd_t *)(long)ctx; + if (fctx->bad_object.dir) { + ret = sys_closedir(fctx->bad_object.dir); + if (ret) + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJ_DIR_CLOSE_FAIL, + "error=%s", strerror(errno), NULL); + } + + GF_FREE(fctx); +out: + return 0; } /** }}} */ @@ -2734,79 +3491,100 @@ br_stub_release (xlator_t *this, fd_t *fd) /* ictxmerge */ void -br_stub_ictxmerge (xlator_t *this, fd_t *fd, - inode_t *inode, inode_t *linked_inode) -{ - int32_t ret = 0; - uint64_t ctxaddr = 0; - uint64_t lctxaddr = 0; - br_stub_inode_ctx_t *ctx = NULL; - br_stub_inode_ctx_t *lctx = NULL; - br_stub_fd_t *br_stub_fd = NULL; - - ret = br_stub_get_inode_ctx (this, inode, &ctxaddr); +br_stub_ictxmerge(xlator_t *this, fd_t *fd, inode_t *inode, + inode_t *linked_inode) +{ + int32_t ret = 0; + uint64_t ctxaddr = 0; + uint64_t lctxaddr = 0; + br_stub_inode_ctx_t *ctx = NULL; + br_stub_inode_ctx_t *lctx = NULL; + br_stub_fd_t *br_stub_fd = NULL; + + ret = br_stub_get_inode_ctx(this, inode, &ctxaddr); + if (ret < 0) + goto done; + ctx = (br_stub_inode_ctx_t *)(uintptr_t)ctxaddr; + + LOCK(&linked_inode->lock); + { + ret = __br_stub_get_inode_ctx(this, linked_inode, &lctxaddr); if (ret < 0) - goto done; - ctx = (br_stub_inode_ctx_t *) ctxaddr; + goto unblock; + lctx = (br_stub_inode_ctx_t *)(uintptr_t)lctxaddr; - LOCK (&linked_inode->lock); - { - ret = __br_stub_get_inode_ctx (this, linked_inode, &lctxaddr); - if (ret < 0) - goto unblock; - lctx = (br_stub_inode_ctx_t *) lctxaddr; - - GF_ASSERT (list_is_singular (&ctx->fd_list)); - br_stub_fd = list_first_entry (&ctx->fd_list, br_stub_fd_t, - list); - if (br_stub_fd) { - GF_ASSERT (br_stub_fd->fd == fd); - list_move_tail (&br_stub_fd->list, &lctx->fd_list); - } + GF_ASSERT(list_is_singular(&ctx->fd_list)); + br_stub_fd = list_first_entry(&ctx->fd_list, br_stub_fd_t, list); + if (br_stub_fd) { + GF_ASSERT(br_stub_fd->fd == fd); + list_move_tail(&br_stub_fd->list, &lctx->fd_list); } + } unblock: - UNLOCK (&linked_inode->lock); + UNLOCK(&linked_inode->lock); - done: - return; +done: + return; } /** }}} */ - struct xlator_fops fops = { - .lookup = br_stub_lookup, - .open = br_stub_open, - .create = br_stub_create, - .readdirp = br_stub_readdirp, - .getxattr = br_stub_getxattr, - .fgetxattr = br_stub_fgetxattr, - .fsetxattr = br_stub_fsetxattr, - .writev = br_stub_writev, - .truncate = br_stub_truncate, - .ftruncate = br_stub_ftruncate, - .mknod = br_stub_mknod, - .readv = br_stub_readv, - .removexattr = br_stub_removexattr, - .fremovexattr = br_stub_fremovexattr, - .setxattr = br_stub_setxattr, + .lookup = br_stub_lookup, + .stat = br_stub_stat, + .fstat = br_stub_fstat, + .open = br_stub_open, + .create = br_stub_create, + .readdirp = br_stub_readdirp, + .getxattr = br_stub_getxattr, + .fgetxattr = br_stub_fgetxattr, + .fsetxattr = br_stub_fsetxattr, + .writev = br_stub_writev, + .truncate = br_stub_truncate, + .ftruncate = br_stub_ftruncate, + .mknod = br_stub_mknod, + .readv = br_stub_readv, + .removexattr = br_stub_removexattr, + .fremovexattr = br_stub_fremovexattr, + .setxattr = br_stub_setxattr, + .opendir = br_stub_opendir, + .readdir = br_stub_readdir, + .unlink = br_stub_unlink, }; struct xlator_cbks cbks = { - .forget = br_stub_forget, - .release = br_stub_release, - .ictxmerge = br_stub_ictxmerge, + .forget = br_stub_forget, + .release = br_stub_release, + .ictxmerge = br_stub_ictxmerge, }; struct volume_options options[] = { - { .key = {"bitrot"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "on", - .description = "enable/disable bitrot stub" - }, - { .key = {"export"}, - .type = GF_OPTION_TYPE_PATH, - .description = "brick path for versioning" - }, - { .key = {NULL} }, + {.key = {"bitrot"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .op_version = {GD_OP_VERSION_3_7_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_FORCE, + .tags = {"bitrot"}, + .description = "enable/disable bitrot stub"}, + {.key = {"export"}, + .type = GF_OPTION_TYPE_PATH, + .op_version = {GD_OP_VERSION_3_7_0}, + .tags = {"bitrot"}, + .description = "brick path for versioning", + .default_value = "{{ brick.path }}"}, + {.key = {NULL}}, +}; + +xlator_api_t xlator_api = { + .init = init, + .fini = fini, + .notify = notify, + .reconfigure = reconfigure, + .mem_acct_init = mem_acct_init, + .op_version = {1}, /* Present from the initial version */ + .fops = &fops, + .cbks = &cbks, + .options = options, + .identifier = "bitrot-stub", + .category = GF_MAINTAINED, }; diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.h b/xlators/features/bit-rot/src/stub/bit-rot-stub.h index 9362c129303..edd79a77e4f 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-stub.h +++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.h @@ -1,341 +1,325 @@ - /* - Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. +/* + Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ #ifndef __BIT_ROT_STUB_H__ #define __BIT_ROT_STUB_H__ -#include "glusterfs.h" -#include "logging.h" -#include "dict.h" -#include "xlator.h" -#include "defaults.h" -#include "call-stub.h" +#include <glusterfs/glusterfs.h> +#include <glusterfs/logging.h> +#include <glusterfs/dict.h> +#include <glusterfs/xlator.h> +#include <glusterfs/defaults.h> +#include <glusterfs/call-stub.h> #include "bit-rot-stub-mem-types.h" - +#include <glusterfs/syscall.h> +#include <glusterfs/common-utils.h> #include "bit-rot-common.h" #include "bit-rot-stub-messages.h" +#include "glusterfs3-xdr.h" +#include <glusterfs/syncop.h> +#include <glusterfs/syncop-utils.h> + +#define BAD_OBJECT_THREAD_STACK_SIZE ((size_t)(1024 * 1024)) +#define BR_STUB_DUMP_STR_SIZE 65536 -typedef int (br_stub_version_cbk) (call_frame_t *, void *, - xlator_t *, int32_t, int32_t, dict_t *); +#define BR_PATH_MAX_EXTRA (PATH_MAX + 1024) +#define BR_PATH_MAX_PLUS (PATH_MAX + 2048) + +/* + * Oops. Spelling mistake. Correcting it + */ +#define OLD_BR_STUB_QUARANTINE_DIR GF_HIDDEN_PATH "/quanrantine" +#define BR_STUB_QUARANTINE_DIR GF_HIDDEN_PATH "/quarantine" + +/* do not reference frame->local in cbk unless initialized. + * Assigned 0x1 marks verisoning flag between call path and + * cbk path. + */ +#define BR_STUB_VER_NOT_ACTIVE_THEN_GOTO(frame, priv, label) \ + do { \ + if (priv->do_versioning) \ + frame->local = (void *)0x1; \ + else \ + goto label; \ + } while (0) + +#define BR_STUB_VER_COND_GOTO(priv, cond, label) \ + do { \ + if (!priv->do_versioning || cond) \ + goto label; \ + } while (0) + +#define BR_STUB_VER_ENABLED_IN_CALLPATH(frame, flag) \ + do { \ + if (frame->local) \ + flag = _gf_true; \ + if (frame->local == (void *)0x1) \ + frame->local = NULL; \ + } while (0) + +#define BR_STUB_RESET_LOCAL_NULL(frame) \ + do { \ + if (frame->local == (void *)0x1) \ + frame->local = NULL; \ + } while (0) + +typedef int(br_stub_version_cbk)(call_frame_t *, void *, xlator_t *, int32_t, + int32_t, dict_t *); typedef struct br_stub_inode_ctx { - int need_writeback; /* does the inode need - a writeback to disk? */ - unsigned long currentversion; /* ongoing version */ - - int info_sign; - struct list_head fd_list; /* list of open fds or fds participating in - write operations */ - gf_boolean_t bad_object; + int need_writeback; /* does the inode need + a writeback to disk? */ + unsigned long currentversion; /* ongoing version */ + + int info_sign; + struct list_head fd_list; /* list of open fds or fds participating in + write operations */ + gf_boolean_t bad_object; } br_stub_inode_ctx_t; typedef struct br_stub_fd { - fd_t *fd; - struct list_head list; + fd_t *fd; + struct list_head list; + struct bad_object_dir { + DIR *dir; + off_t dir_eof; + } bad_object; } br_stub_fd_t; -#define I_DIRTY (1<<0) /* inode needs writeback */ -#define I_MODIFIED (1<<1) -#define WRITEBACK_DURABLE 1 /* writeback is durable */ +#define I_DIRTY (1 << 0) /* inode needs writeback */ +#define I_MODIFIED (1 << 1) +#define WRITEBACK_DURABLE 1 /* writeback is durable */ /** * This could just have been a plain struct without unions and all, * but we may need additional things in the future. */ typedef struct br_stub_local { - call_stub_t *fopstub; /* stub for original fop */ - - int versioningtype; /* not much used atm */ - - union { - struct br_stub_ctx { - fd_t *fd; - uuid_t gfid; - inode_t *inode; - unsigned long version; - } context; - } u; + call_stub_t *fopstub; /* stub for original fop */ + + int versioningtype; /* not much used atm */ + + union { + struct br_stub_ctx { + fd_t *fd; + uuid_t gfid; + inode_t *inode; + unsigned long version; + } context; + } u; } br_stub_local_t; #define BR_STUB_NO_VERSIONING (1 << 0) #define BR_STUB_INCREMENTAL_VERSIONING (1 << 1) typedef struct br_stub_private { - gf_boolean_t go; + gf_boolean_t do_versioning; + + uint32_t boot[2]; + char export[PATH_MAX]; - uint32_t boot[2]; - char export[PATH_MAX]; + pthread_mutex_t lock; + pthread_cond_t cond; - pthread_mutex_t lock; - pthread_cond_t cond; + struct list_head squeue; /* ordered signing queue */ + pthread_t signth; + struct bad_objects_container { + pthread_t thread; + pthread_mutex_t bad_lock; + pthread_cond_t bad_cond; + struct list_head bad_queue; + } container; + struct mem_pool *local_pool; - struct list_head squeue; /* ordered signing queue */ - pthread_t signth; + char stub_basepath[BR_PATH_MAX_EXTRA]; - struct mem_pool *local_pool; + uuid_t bad_object_dir_gfid; } br_stub_private_t; +br_stub_fd_t * +br_stub_fd_new(void); + +int +__br_stub_fd_ctx_set(xlator_t *this, fd_t *fd, br_stub_fd_t *br_stub_fd); + +br_stub_fd_t * +__br_stub_fd_ctx_get(xlator_t *this, fd_t *fd); + +br_stub_fd_t * +br_stub_fd_ctx_get(xlator_t *this, fd_t *fd); + +int32_t +br_stub_fd_ctx_set(xlator_t *this, fd_t *fd, br_stub_fd_t *br_stub_fd); + static inline gf_boolean_t -__br_stub_is_bad_object (br_stub_inode_ctx_t *ctx) +__br_stub_is_bad_object(br_stub_inode_ctx_t *ctx) { - return ctx->bad_object; + return ctx->bad_object; } static inline void -__br_stub_mark_object_bad (br_stub_inode_ctx_t *ctx) +__br_stub_mark_object_bad(br_stub_inode_ctx_t *ctx) { - ctx->bad_object = _gf_true; + ctx->bad_object = _gf_true; } /* inode writeback helpers */ static inline void -__br_stub_mark_inode_dirty (br_stub_inode_ctx_t *ctx) +__br_stub_mark_inode_dirty(br_stub_inode_ctx_t *ctx) { - ctx->need_writeback |= I_DIRTY; + ctx->need_writeback |= I_DIRTY; } static inline void -__br_stub_mark_inode_synced (br_stub_inode_ctx_t *ctx) +__br_stub_mark_inode_synced(br_stub_inode_ctx_t *ctx) { - ctx->need_writeback &= ~I_DIRTY; + ctx->need_writeback &= ~I_DIRTY; } static inline int -__br_stub_is_inode_dirty (br_stub_inode_ctx_t *ctx) +__br_stub_is_inode_dirty(br_stub_inode_ctx_t *ctx) { - return (ctx->need_writeback & I_DIRTY); + return (ctx->need_writeback & I_DIRTY); } /* inode mofification markers */ static inline void -__br_stub_set_inode_modified (br_stub_inode_ctx_t *ctx) +__br_stub_set_inode_modified(br_stub_inode_ctx_t *ctx) { - ctx->need_writeback |= I_MODIFIED; + ctx->need_writeback |= I_MODIFIED; } static inline void -__br_stub_unset_inode_modified (br_stub_inode_ctx_t *ctx) +__br_stub_unset_inode_modified(br_stub_inode_ctx_t *ctx) { - ctx->need_writeback &= ~I_MODIFIED; + ctx->need_writeback &= ~I_MODIFIED; } static inline int -__br_stub_is_inode_modified (br_stub_inode_ctx_t *ctx) -{ - return (ctx->need_writeback & I_MODIFIED); -} - -br_stub_fd_t * -br_stub_fd_new (void) -{ - br_stub_fd_t *br_stub_fd = NULL; - - br_stub_fd = GF_CALLOC (1, sizeof (*br_stub_fd), - gf_br_stub_mt_br_stub_fd_t); - - return br_stub_fd; -} - -int -__br_stub_fd_ctx_set (xlator_t *this, fd_t *fd, br_stub_fd_t *br_stub_fd) -{ - uint64_t value = 0; - int ret = -1; - - GF_VALIDATE_OR_GOTO ("bit-rot-stub", this, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - GF_VALIDATE_OR_GOTO (this->name, br_stub_fd, out); - - value = (uint64_t)(long) br_stub_fd; - - ret = __fd_ctx_set (fd, this, value); - -out: - return ret; -} - -br_stub_fd_t * -__br_stub_fd_ctx_get (xlator_t *this, fd_t *fd) -{ - br_stub_fd_t *br_stub_fd = NULL; - uint64_t value = 0; - int ret = -1; - - GF_VALIDATE_OR_GOTO ("bit-rot-stub", this, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - - ret = __fd_ctx_get (fd, this, &value); - if (ret) - return NULL; - - br_stub_fd = (br_stub_fd_t *) ((long) value); - -out: - return br_stub_fd; -} - -br_stub_fd_t * -br_stub_fd_ctx_get (xlator_t *this, fd_t *fd) -{ - br_stub_fd_t *br_stub_fd = NULL; - - GF_VALIDATE_OR_GOTO ("bit-rot-stub", this, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - - LOCK (&fd->lock); - { - br_stub_fd = __br_stub_fd_ctx_get (this, fd); - } - UNLOCK (&fd->lock); - -out: - return br_stub_fd; -} - -int32_t -br_stub_fd_ctx_set (xlator_t *this, fd_t *fd, br_stub_fd_t *br_stub_fd) +__br_stub_is_inode_modified(br_stub_inode_ctx_t *ctx) { - int32_t ret = -1; - - GF_VALIDATE_OR_GOTO ("bit-rot-stub", this, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - GF_VALIDATE_OR_GOTO (this->name, br_stub_fd, out); - - LOCK (&fd->lock); - { - ret = __br_stub_fd_ctx_set (this, fd, br_stub_fd); - } - UNLOCK (&fd->lock); - -out: - return ret; + return (ctx->need_writeback & I_MODIFIED); } static inline int -br_stub_require_release_call (xlator_t *this, fd_t *fd, br_stub_fd_t **fd_ctx) +br_stub_require_release_call(xlator_t *this, fd_t *fd, br_stub_fd_t **fd_ctx) { - int32_t ret = 0; - br_stub_fd_t *br_stub_fd = NULL; + int32_t ret = 0; + br_stub_fd_t *br_stub_fd = NULL; - br_stub_fd = br_stub_fd_new (); - if (!br_stub_fd) - return -1; + br_stub_fd = br_stub_fd_new(); + if (!br_stub_fd) + return -1; - br_stub_fd->fd = fd; - INIT_LIST_HEAD (&br_stub_fd->list); + br_stub_fd->fd = fd; + INIT_LIST_HEAD(&br_stub_fd->list); - ret = br_stub_fd_ctx_set (this, fd, br_stub_fd); - if (ret) - gf_msg (this->name, GF_LOG_WARNING, 0, - BRS_MSG_SET_CONTEXT_FAILED, - "could not set fd context (for release callback"); - else - *fd_ctx = br_stub_fd; + ret = br_stub_fd_ctx_set(this, fd, br_stub_fd); + if (ret) + gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SET_CONTEXT_FAILED, + NULL); + else + *fd_ctx = br_stub_fd; - return ret; + return ret; } /* get/set inode context helpers */ static inline int -__br_stub_get_inode_ctx (xlator_t *this, - inode_t *inode, uint64_t *ctx) +__br_stub_get_inode_ctx(xlator_t *this, inode_t *inode, uint64_t *ctx) { - return __inode_ctx_get (inode, this, ctx); + return __inode_ctx_get(inode, this, ctx); } static inline int -br_stub_get_inode_ctx (xlator_t *this, - inode_t *inode, uint64_t *ctx) +br_stub_get_inode_ctx(xlator_t *this, inode_t *inode, uint64_t *ctx) { - int ret = -1; + int ret = -1; - LOCK (&inode->lock); - { - ret = __br_stub_get_inode_ctx (this, inode, ctx); - } - UNLOCK (&inode->lock); + LOCK(&inode->lock); + { + ret = __br_stub_get_inode_ctx(this, inode, ctx); + } + UNLOCK(&inode->lock); - return ret; + return ret; } static inline int -br_stub_set_inode_ctx (xlator_t *this, - inode_t *inode, br_stub_inode_ctx_t *ctx) +br_stub_set_inode_ctx(xlator_t *this, inode_t *inode, br_stub_inode_ctx_t *ctx) { - uint64_t ctx_addr = (uint64_t) ctx; - return inode_ctx_set (inode, this, &ctx_addr); + uint64_t ctx_addr = (uint64_t)(uintptr_t)ctx; + return inode_ctx_set(inode, this, &ctx_addr); } /* version get/set helpers */ static inline unsigned long -__br_stub_writeback_version (br_stub_inode_ctx_t *ctx) +__br_stub_writeback_version(br_stub_inode_ctx_t *ctx) { - return (ctx->currentversion + 1); + return (ctx->currentversion + 1); } static inline void -__br_stub_set_ongoing_version (br_stub_inode_ctx_t *ctx, unsigned long version) +__br_stub_set_ongoing_version(br_stub_inode_ctx_t *ctx, unsigned long version) { - if (ctx->currentversion < version) - ctx->currentversion = version; - else - gf_msg ("bit-rot-stub", GF_LOG_WARNING, 0, - BRS_MSG_CHANGE_VERSION_FAILED, "current version: %lu" - "new version: %lu", ctx->currentversion, version); + if (ctx->currentversion < version) + ctx->currentversion = version; + else + gf_smsg("bit-rot-stub", GF_LOG_WARNING, 0, + BRS_MSG_CHANGE_VERSION_FAILED, "current version=%lu", + ctx->currentversion, "new version=%lu", version, NULL); } static inline int -__br_stub_can_trigger_release (inode_t *inode, - br_stub_inode_ctx_t *ctx, unsigned long *version) +__br_stub_can_trigger_release(inode_t *inode, br_stub_inode_ctx_t *ctx, + unsigned long *version) { - /** - * If the inode is modified, then it has to be dirty. An inode is - * marked dirty once version is increased. Its marked as modified - * when the modification call (write/truncate) which triggered - * the versioning is successful. - */ - if (__br_stub_is_inode_modified (ctx) - && list_empty (&ctx->fd_list) - && (ctx->info_sign != BR_SIGN_REOPEN_WAIT)) { - - GF_ASSERT (__br_stub_is_inode_dirty (ctx) == 0); + /** + * If the inode is modified, then it has to be dirty. An inode is + * marked dirty once version is increased. Its marked as modified + * when the modification call (write/truncate) which triggered + * the versioning is successful. + */ + if (__br_stub_is_inode_modified(ctx) && list_empty(&ctx->fd_list) && + (ctx->info_sign != BR_SIGN_REOPEN_WAIT)) { + GF_ASSERT(__br_stub_is_inode_dirty(ctx) == 0); - if (version) - *version = htonl (ctx->currentversion); - return 1; - } + if (version) + *version = htonl(ctx->currentversion); + return 1; + } - return 0; + return 0; } static inline int32_t -br_stub_get_ongoing_version (xlator_t *this, - inode_t *inode, unsigned long *version) +br_stub_get_ongoing_version(xlator_t *this, inode_t *inode, + unsigned long *version) { - int32_t ret = 0; - uint64_t ctx_addr = 0; - br_stub_inode_ctx_t *ctx = NULL; - - LOCK (&inode->lock); - { - ret = __inode_ctx_get (inode, this, &ctx_addr); - if (ret < 0) - goto unblock; - ctx = (br_stub_inode_ctx_t *) (long) ctx_addr; - *version = ctx->currentversion; - } - unblock: - UNLOCK (&inode->lock); - - return ret; + int32_t ret = 0; + uint64_t ctx_addr = 0; + br_stub_inode_ctx_t *ctx = NULL; + + LOCK(&inode->lock); + { + ret = __inode_ctx_get(inode, this, &ctx_addr); + if (ret < 0) + goto unblock; + ctx = (br_stub_inode_ctx_t *)(long)ctx_addr; + *version = ctx->currentversion; + } +unblock: + UNLOCK(&inode->lock); + + return ret; } /** @@ -344,45 +328,52 @@ br_stub_get_ongoing_version (xlator_t *this, * *needs* to be valid in the caller. */ static inline br_stub_inode_ctx_t * -__br_stub_get_ongoing_version_ctx (xlator_t *this, - inode_t *inode, unsigned long *version) +__br_stub_get_ongoing_version_ctx(xlator_t *this, inode_t *inode, + unsigned long *version) { - int32_t ret = 0; - uint64_t ctx_addr = 0; - br_stub_inode_ctx_t *ctx = NULL; - - ret = __inode_ctx_get (inode, this, &ctx_addr); - if (ret < 0) - return NULL; - ctx = (br_stub_inode_ctx_t *) (long) ctx_addr; - if (version) - *version = ctx->currentversion; - - return ctx; + int32_t ret = 0; + uint64_t ctx_addr = 0; + br_stub_inode_ctx_t *ctx = NULL; + + ret = __inode_ctx_get(inode, this, &ctx_addr); + if (ret < 0) + return NULL; + ctx = (br_stub_inode_ctx_t *)(long)ctx_addr; + if (version) + *version = ctx->currentversion; + + return ctx; } /* filter for xattr fetch */ static inline int -br_stub_is_internal_xattr (const char *name) +br_stub_is_internal_xattr(const char *name) { - if (name - && ((strncmp (name, BITROT_CURRENT_VERSION_KEY, - strlen (BITROT_CURRENT_VERSION_KEY)) == 0) - || (strncmp (name, BITROT_SIGNING_VERSION_KEY, - strlen (BITROT_SIGNING_VERSION_KEY)) == 0))) - return 1; - return 0; + if (name && ((strncmp(name, BITROT_CURRENT_VERSION_KEY, + SLEN(BITROT_CURRENT_VERSION_KEY)) == 0) || + (strncmp(name, BITROT_SIGNING_VERSION_KEY, + SLEN(BITROT_SIGNING_VERSION_KEY)) == 0))) + return 1; + return 0; } static inline void -br_stub_remove_vxattrs (dict_t *xattr) +br_stub_remove_vxattrs(dict_t *xattr, gf_boolean_t remove_bad_marker) { - if (xattr) { - dict_del (xattr, BITROT_OBJECT_BAD_KEY); - dict_del (xattr, BITROT_CURRENT_VERSION_KEY); - dict_del (xattr, BITROT_SIGNING_VERSION_KEY); - dict_del (xattr, BITROT_SIGNING_XATTR_SIZE_KEY); - } + if (xattr) { + /* + * When a file is corrupted, bad-object should be + * set in the dict. But, other info such as version, + * signature etc should not be set. Hence the flag + * remove_bad_marker. The consumer should know whether + * to send the bad-object info in the dict or not. + */ + if (remove_bad_marker) + dict_del(xattr, BITROT_OBJECT_BAD_KEY); + dict_del(xattr, BITROT_CURRENT_VERSION_KEY); + dict_del(xattr, BITROT_SIGNING_VERSION_KEY); + dict_del(xattr, BITROT_SIGNING_XATTR_SIZE_KEY); + } } /** @@ -396,64 +387,60 @@ br_stub_remove_vxattrs (dict_t *xattr) * errors can be made into enums. */ static inline int -br_stub_is_bad_object (xlator_t *this, inode_t *inode) +br_stub_is_bad_object(xlator_t *this, inode_t *inode) { - int bad_object = 0; - gf_boolean_t tmp = _gf_false; - uint64_t ctx_addr = 0; - br_stub_inode_ctx_t *ctx = NULL; - int32_t ret = -1; - - ret = br_stub_get_inode_ctx (this, inode, &ctx_addr); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - BRS_MSG_GET_INODE_CONTEXT_FAILED, - "failed to get the inode context for the inode %s", - uuid_utoa (inode->gfid)); - bad_object = -1; - goto out; - } - - ctx = (br_stub_inode_ctx_t *)(long)ctx_addr; - - LOCK (&inode->lock); - { - tmp = __br_stub_is_bad_object (ctx); - if (tmp) - bad_object = -2; - } - UNLOCK (&inode->lock); + int bad_object = 0; + gf_boolean_t tmp = _gf_false; + uint64_t ctx_addr = 0; + br_stub_inode_ctx_t *ctx = NULL; + int32_t ret = -1; + + ret = br_stub_get_inode_ctx(this, inode, &ctx_addr); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_GET_INODE_CONTEXT_FAILED, + "inode-gfid=%s", uuid_utoa(inode->gfid), NULL); + bad_object = -1; + goto out; + } + + ctx = (br_stub_inode_ctx_t *)(long)ctx_addr; + + LOCK(&inode->lock); + { + tmp = __br_stub_is_bad_object(ctx); + if (tmp) + bad_object = -2; + } + UNLOCK(&inode->lock); out: - return bad_object; + return bad_object; } static inline int32_t -br_stub_mark_object_bad (xlator_t *this, inode_t *inode) +br_stub_mark_object_bad(xlator_t *this, inode_t *inode) { - int32_t ret = -1; - uint64_t ctx_addr = 0; - br_stub_inode_ctx_t *ctx = NULL; - - ret = br_stub_get_inode_ctx (this, inode, &ctx_addr); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - BRS_MSG_GET_INODE_CONTEXT_FAILED, "failed to get the " - "inode context for the inode %s", - uuid_utoa (inode->gfid)); - goto out; - } + int32_t ret = -1; + uint64_t ctx_addr = 0; + br_stub_inode_ctx_t *ctx = NULL; - ctx = (br_stub_inode_ctx_t *)(long)ctx_addr; + ret = br_stub_get_inode_ctx(this, inode, &ctx_addr); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_GET_INODE_CONTEXT_FAILED, + "inode-gfid=%s", uuid_utoa(inode->gfid), NULL); + goto out; + } + + ctx = (br_stub_inode_ctx_t *)(long)ctx_addr; - LOCK (&inode->lock); - { - __br_stub_mark_object_bad (ctx); - } - UNLOCK (&inode->lock); + LOCK(&inode->lock); + { + __br_stub_mark_object_bad(ctx); + } + UNLOCK(&inode->lock); out: - return ret; + return ret; } /** @@ -461,20 +448,68 @@ out: * given to the caller and the caller has to decide what to do. */ static inline int32_t -br_stub_mark_xdata_bad_object (xlator_t *this, inode_t *inode, dict_t *xdata) +br_stub_mark_xdata_bad_object(xlator_t *this, inode_t *inode, dict_t *xdata) { - int32_t ret = 0; + int32_t ret = 0; - if (br_stub_is_bad_object (this, inode) == -2) - ret = dict_set_int32 (xdata, GLUSTERFS_BAD_INODE, 1); + if (br_stub_is_bad_object(this, inode) == -2) + ret = dict_set_int32(xdata, GLUSTERFS_BAD_INODE, 1); - return ret; + return ret; } int32_t -br_stub_add_fd_to_inode (xlator_t *this, fd_t *fd, br_stub_inode_ctx_t *ctx); +br_stub_add_fd_to_inode(xlator_t *this, fd_t *fd, br_stub_inode_ctx_t *ctx); br_sign_state_t -__br_stub_inode_sign_state (br_stub_inode_ctx_t *ctx, glusterfs_fop_t fop, - fd_t *fd); +__br_stub_inode_sign_state(br_stub_inode_ctx_t *ctx, glusterfs_fop_t fop, + fd_t *fd); + +int +br_stub_dir_create(xlator_t *this, br_stub_private_t *priv); + +int +br_stub_add(xlator_t *this, uuid_t gfid); + +int32_t +br_stub_create_stub_gfid(xlator_t *this, char *stub_gfid_path, uuid_t gfid); + +int +br_stub_dir_create(xlator_t *this, br_stub_private_t *priv); + +call_stub_t * +__br_stub_dequeue(struct list_head *callstubs); + +void +__br_stub_enqueue(struct list_head *callstubs, call_stub_t *stub); + +void +br_stub_worker_enqueue(xlator_t *this, call_stub_t *stub); + +void * +br_stub_worker(void *data); + +int32_t +br_stub_lookup_wrapper(call_frame_t *frame, xlator_t *this, loc_t *loc, + dict_t *xattr_req); + +int32_t +br_stub_readdir_wrapper(call_frame_t *frame, xlator_t *this, fd_t *fd, + size_t size, off_t off, dict_t *xdata); + +int +br_stub_del(xlator_t *this, uuid_t gfid); + +int +br_stub_bad_objects_path(xlator_t *this, fd_t *fd, gf_dirent_t *entries, + dict_t **dict); + +void +br_stub_entry_xattr_fill(xlator_t *this, char *hpath, gf_dirent_t *entry, + dict_t *dict); + +int +br_stub_get_path_of_gfid(xlator_t *this, inode_t *parent, inode_t *inode, + uuid_t gfid, char **path); + #endif /* __BIT_ROT_STUB_H__ */ diff --git a/xlators/features/changelog/lib/examples/c/get-changes-multi.c b/xlators/features/changelog/lib/examples/c/get-changes-multi.c index 3741bdf6edc..5ea5bbb6630 100644 --- a/xlators/features/changelog/lib/examples/c/get-changes-multi.c +++ b/xlators/features/changelog/lib/examples/c/get-changes-multi.c @@ -25,64 +25,66 @@ #include "changelog.h" -void *brick_init (void *xl, struct gf_brick_spec *brick) +void * +brick_init(void *xl, struct gf_brick_spec *brick) { - return brick; + return brick; } -void brick_fini (void *xl, char *brick, void *data) +void +brick_fini(void *xl, char *brick, void *data) { - return; + return; } -void brick_callback (void *xl, char *brick, - void *data, changelog_event_t *ev) +void +brick_callback(void *xl, char *brick, void *data, changelog_event_t *ev) { - printf ("->callback: (brick,type) [%s:%d]\n", brick, ev->ev_type); + printf("->callback: (brick,type) [%s:%d]\n", brick, ev->ev_type); } -void fill_brick_spec (struct gf_brick_spec *brick, char *path) +void +fill_brick_spec(struct gf_brick_spec *brick, char *path) { - brick->brick_path = strdup (path); - brick->filter = CHANGELOG_OP_TYPE_BR_RELEASE; - - brick->init = brick_init; - brick->fini = brick_fini; - brick->callback = brick_callback; - brick->connected = NULL; - brick->disconnected = NULL; + brick->brick_path = strdup(path); + brick->filter = CHANGELOG_OP_TYPE_BR_RELEASE; + + brick->init = brick_init; + brick->fini = brick_fini; + brick->callback = brick_callback; + brick->connected = NULL; + brick->disconnected = NULL; } int -main (int argc, char **argv) +main(int argc, char **argv) { - int ret = 0; - void *bricks = NULL; - struct gf_brick_spec *brick = NULL; + int ret = 0; + void *bricks = NULL; + struct gf_brick_spec *brick = NULL; - bricks = calloc (2, sizeof (struct gf_brick_spec)); - if (!bricks) - goto error_return; + bricks = calloc(2, sizeof(struct gf_brick_spec)); + if (!bricks) + goto error_return; - brick = (struct gf_brick_spec *)bricks; - fill_brick_spec (brick, "/export/z1/zwoop"); + brick = (struct gf_brick_spec *)bricks; + fill_brick_spec(brick, "/export/z1/zwoop"); - brick++; - fill_brick_spec (brick, "/export/z2/zwoop"); + brick++; + fill_brick_spec(brick, "/export/z2/zwoop"); - ret = gf_changelog_init (NULL); - if (ret) - goto error_return; + ret = gf_changelog_init(NULL); + if (ret) + goto error_return; - ret = gf_changelog_register_generic ((struct gf_brick_spec *)bricks, 2, - 0, "/tmp/multi-changes.log", 9, - NULL); - if (ret) - goto error_return; + ret = gf_changelog_register_generic((struct gf_brick_spec *)bricks, 2, 0, + "/tmp/multi-changes.log", 9, NULL); + if (ret) + goto error_return; - /* let callbacks do the job */ - select (0, NULL, NULL, NULL, NULL); + /* let callbacks do the job */ + select(0, NULL, NULL, NULL, NULL); - error_return: - return -1; +error_return: + return -1; } diff --git a/xlators/features/changelog/lib/examples/c/get-changes.c b/xlators/features/changelog/lib/examples/c/get-changes.c index ef766c566b6..8bc651c24a4 100644 --- a/xlators/features/changelog/lib/examples/c/get-changes.c +++ b/xlators/features/changelog/lib/examples/c/get-changes.c @@ -27,67 +27,67 @@ #include "changelog.h" -#define handle_error(fn) \ - printf ("%s (reason: %s)\n", fn, strerror (errno)) +#define handle_error(fn) printf("%s (reason: %s)\n", fn, strerror(errno)) int -main (int argc, char ** argv) +main(int argc, char **argv) { - int i = 0; - int ret = 0; - ssize_t nr_changes = 0; - ssize_t changes = 0; - char fbuf[PATH_MAX] = {0,}; - - ret = gf_changelog_init (NULL); - if (ret) { - handle_error ("Init failed"); - goto out; + int i = 0; + int ret = 0; + ssize_t nr_changes = 0; + ssize_t changes = 0; + char fbuf[PATH_MAX] = { + 0, + }; + + ret = gf_changelog_init(NULL); + if (ret) { + handle_error("Init failed"); + goto out; + } + + /* get changes for brick "/home/vshankar/export/yow/yow-1" */ + ret = gf_changelog_register("/export/z1/zwoop", "/tmp/scratch", + "/tmp/change.log", 9, 5); + if (ret) { + handle_error("register failed"); + goto out; + } + + while (1) { + i = 0; + nr_changes = gf_changelog_scan(); + if (nr_changes < 0) { + handle_error("scan(): "); + break; } - /* get changes for brick "/home/vshankar/export/yow/yow-1" */ - ret = gf_changelog_register ("/export/z1/zwoop", - "/tmp/scratch", "/tmp/change.log", 9, 5); - if (ret) { - handle_error ("register failed"); - goto out; - } - - while (1) { - i = 0; - nr_changes = gf_changelog_scan (); - if (nr_changes < 0) { - handle_error ("scan(): "); - break; - } - - if (nr_changes == 0) - goto next; + if (nr_changes == 0) + goto next; - printf ("Got %ld changelog files\n", nr_changes); + printf("Got %ld changelog files\n", nr_changes); - while ( (changes = - gf_changelog_next_change (fbuf, PATH_MAX)) > 0) { - printf ("changelog file [%d]: %s\n", ++i, fbuf); + while ((changes = gf_changelog_next_change(fbuf, PATH_MAX)) > 0) { + printf("changelog file [%d]: %s\n", ++i, fbuf); - /* process changelog */ - /* ... */ - /* ... */ - /* ... */ - /* done processing */ + /* process changelog */ + /* ... */ + /* ... */ + /* ... */ + /* done processing */ - ret = gf_changelog_done (fbuf); - if (ret) - handle_error ("gf_changelog_done"); - } + ret = gf_changelog_done(fbuf); + if (ret) + handle_error("gf_changelog_done"); + } - if (changes == -1) - handle_error ("gf_changelog_next_change"); + if (changes == -1) + handle_error("gf_changelog_next_change"); - next: - sleep (10); - } + next: + sleep(10); + } - out: - return ret; +out: + return ret; } diff --git a/xlators/features/changelog/lib/examples/c/get-history.c b/xlators/features/changelog/lib/examples/c/get-history.c index ee3ec0ad100..3e888d75ca6 100644 --- a/xlators/features/changelog/lib/examples/c/get-history.c +++ b/xlators/features/changelog/lib/examples/c/get-history.c @@ -27,90 +27,90 @@ #include "changelog.h" -#define handle_error(fn) \ - printf ("%s (reason: %s)\n", fn, strerror (errno)) +#define handle_error(fn) printf("%s (reason: %s)\n", fn, strerror(errno)) int -main (int argc, char ** argv) +main(int argc, char **argv) { - int i = 0; - int ret = 0; - ssize_t nr_changes = 0; - ssize_t changes = 0; - char fbuf[PATH_MAX] = {0,}; - unsigned long end_ts = 0; - - ret = gf_changelog_init (NULL); - if (ret) { - handle_error ("init failed"); - goto out; + int i = 0; + int ret = 0; + ssize_t nr_changes = 0; + ssize_t changes = 0; + char fbuf[PATH_MAX] = { + 0, + }; + unsigned long end_ts = 0; + + ret = gf_changelog_init(NULL); + if (ret) { + handle_error("init failed"); + goto out; + } + + ret = gf_changelog_register("/export/z1/zwoop", "/tmp/scratch_v1", + "/tmp/changes.log", 9, 5); + if (ret) { + handle_error("register failed"); + goto out; + } + + int a, b; + printf("give the two numbers start and end\t"); + scanf("%d%d", &a, &b); + ret = gf_history_changelog("/export/z1/zwoop/.glusterfs/changelogs", a, b, + 3, &end_ts); + if (ret == -1) { + printf("history failed"); + goto out; + } + + printf("end time till when changelog available : %d , ret(%d) \t", end_ts, + ret); + fflush(stdout); + + while (1) { + nr_changes = gf_history_changelog_scan(); + printf("scanned, nr_changes : %d\n", nr_changes); + if (nr_changes < 0) { + handle_error("scan(): "); + break; } - ret = gf_changelog_register ("/export/z1/zwoop", - "/tmp/scratch_v1", "/tmp/changes.log", - 9, 5); - if (ret) { - handle_error ("register failed"); - goto out; + if (nr_changes == 0) { + printf("done scanning \n"); + goto out; } - int a, b; - printf ("give the two numbers start and end\t"); - scanf ("%d%d", &a, &b); - ret = gf_history_changelog ("/export/z1/zwoop/.glusterfs/changelogs", - a, b, 3, &end_ts); - if (ret == -1) { - printf ("history failed"); - goto out; - } + printf("Got %ld changelog files\n", nr_changes); + + while ((changes = gf_history_changelog_next_change(fbuf, PATH_MAX)) > + 0) { + printf("changelog file [%d]: %s\n", ++i, fbuf); - printf ("end time till when changelog available : %d , ret(%d) \t", end_ts, ret); - fflush(stdout); - - while (1) { - nr_changes = gf_history_changelog_scan (); - printf ("scanned, nr_changes : %d\n",nr_changes); - if (nr_changes < 0) { - handle_error ("scan(): "); - break; - } - - if (nr_changes == 0) { - printf ("done scanning \n"); - goto out; - } - - printf ("Got %ld changelog files\n", nr_changes); - - while ( (changes = - gf_history_changelog_next_change (fbuf, PATH_MAX)) > 0) { - printf ("changelog file [%d]: %s\n", ++i, fbuf); - - /* process changelog */ - /* ... */ - /* ... */ - /* ... */ - /* done processing */ - - ret = gf_history_changelog_done (fbuf); - if (ret) - handle_error ("gf_changelog_done"); - } - /* - if (changes == -1) - handle_error ("gf_changelog_next_change"); - if (nr_changes ==1){ - printf("continue scanning\n"); - } - - if(nr_changes == 0){ - printf("done scanning \n"); - goto out; - } - */ + /* process changelog */ + /* ... */ + /* ... */ + /* ... */ + /* done processing */ + + ret = gf_history_changelog_done(fbuf); + if (ret) + handle_error("gf_changelog_done"); + } + /* + if (changes == -1) + handle_error ("gf_changelog_next_change"); + if (nr_changes ==1){ + printf("continue scanning\n"); } + if(nr_changes == 0){ + printf("done scanning \n"); + goto out; + } + */ + } out: - return ret; + return ret; } diff --git a/xlators/features/changelog/lib/examples/python/changes.py b/xlators/features/changelog/lib/examples/python/changes.py index 221df642a36..c410d3b000d 100644..100755 --- a/xlators/features/changelog/lib/examples/python/changes.py +++ b/xlators/features/changelog/lib/examples/python/changes.py @@ -1,5 +1,6 @@ -#!/usr/bin/python +#!/usr/bin/python3 +from __future__ import print_function import os import sys import time @@ -16,18 +17,18 @@ def get_changes(brick, scratch_dir, log_file, log_level, interval): cl.cl_scan() change_list = cl.cl_getchanges() if change_list: - print change_list + print(change_list) for change in change_list: - print('done with %s' % (change)) + print(('done with %s' % (change))) cl.cl_done(change) time.sleep(interval) except OSError: ex = sys.exc_info()[1] - print ex + print(ex) if __name__ == '__main__': if len(sys.argv) != 6: - print("usage: %s <brick> <scratch-dir> <log-file> <fetch-interval>" - % (sys.argv[0])) + print(("usage: %s <brick> <scratch-dir> <log-file> <fetch-interval>" + % (sys.argv[0]))) sys.exit(1) get_changes(sys.argv[1], sys.argv[2], sys.argv[3], 9, int(sys.argv[4])) diff --git a/xlators/features/changelog/lib/examples/python/libgfchangelog.py b/xlators/features/changelog/lib/examples/python/libgfchangelog.py index 10e73c02b34..2da9f2d2a8c 100644 --- a/xlators/features/changelog/lib/examples/python/libgfchangelog.py +++ b/xlators/features/changelog/lib/examples/python/libgfchangelog.py @@ -3,7 +3,8 @@ from ctypes import * from ctypes.util import find_library class Changes(object): - libgfc = CDLL(find_library("gfchangelog"), mode=RTLD_GLOBAL, use_errno=True) + libgfc = CDLL(find_library("gfchangelog"), mode=RTLD_GLOBAL, + use_errno=True) @classmethod def geterrno(cls): diff --git a/xlators/features/changelog/lib/src/Makefile.am b/xlators/features/changelog/lib/src/Makefile.am index 8d3edb4d63f..c933ec53ed2 100644 --- a/xlators/features/changelog/lib/src/Makefile.am +++ b/xlators/features/changelog/lib/src/Makefile.am @@ -1,29 +1,33 @@ libgfchangelog_la_CFLAGS = -Wall $(GF_CFLAGS) $(GF_DARWIN_LIBGLUSTERFS_CFLAGS) \ - -DDATADIR=\"$(localstatedir)\" + -DDATADIR=\"$(localstatedir)\" -libgfchangelog_la_CPPFLAGS = $(GF_CPPFLAGS) -D__USE_FILE_OFFSET64 -fpic \ - -I../../../src/ -I$(top_srcdir)/libglusterfs/src \ - -I$(top_srcdir)/xlators/features/changelog/src \ - -I$(top_srcdir)/rpc/xdr/src -I$(top_srcdir)/rpc/rpc-lib/src \ - -I$(top_srcdir)/rpc/rpc-transport/socket/src \ - -DDATADIR=\"$(localstatedir)\" +libgfchangelog_la_CPPFLAGS = $(GF_CPPFLAGS) -D__USE_FILE_OFFSET64 -D__USE_LARGEFILE64 -fpic \ + -I../../../src/ -I$(top_srcdir)/libglusterfs/src \ + -I$(top_srcdir)/xlators/features/changelog/src \ + -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \ + -I$(top_srcdir)/rpc/rpc-lib/src \ + -I$(top_srcdir)/rpc/rpc-transport/socket/src \ + -DDATADIR=\"$(localstatedir)\" libgfchangelog_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ - $(top_builddir)/rpc/xdr/src/libgfxdr.la \ - $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la + $(top_builddir)/rpc/xdr/src/libgfxdr.la \ + $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la -libgfchangelog_la_LDFLAGS = $(GF_LDFLAGS) -version-info $(LIBGFCHANGELOG_LT_VERSION) +libgfchangelog_la_LDFLAGS = $(GF_LDFLAGS) \ + -version-info $(LIBGFCHANGELOG_LT_VERSION) \ + $(GF_NO_UNDEFINED) -libgfchangelogdir = $(includedir)/glusterfs/gfchangelog lib_LTLIBRARIES = libgfchangelog.la CONTRIB_BUILDDIR = $(top_builddir)/contrib -libgfchangelog_la_SOURCES = gf-changelog.c gf-changelog-journal-handler.c gf-changelog-helpers.c \ - gf-changelog-api.c gf-history-changelog.c gf-changelog-rpc.c gf-changelog-reborp.c \ - $(top_srcdir)/xlators/features/changelog/src/changelog-rpc-common.c +libgfchangelog_la_SOURCES = gf-changelog.c gf-changelog-journal-handler.c \ + gf-changelog-helpers.c gf-changelog-api.c gf-history-changelog.c \ + gf-changelog-rpc.c gf-changelog-reborp.c \ + $(top_srcdir)/xlators/features/changelog/src/changelog-rpc-common.c -noinst_HEADERS = gf-changelog-helpers.h gf-changelog-rpc.h gf-changelog-journal.h changelog-lib-messages.h +noinst_HEADERS = gf-changelog-helpers.h gf-changelog-rpc.h \ + gf-changelog-journal.h changelog-lib-messages.h CLEANFILES = diff --git a/xlators/features/changelog/lib/src/changelog-lib-messages.h b/xlators/features/changelog/lib/src/changelog-lib-messages.h index 976c67f61a9..d7fe7274353 100644 --- a/xlators/features/changelog/lib/src/changelog-lib-messages.h +++ b/xlators/features/changelog/lib/src/changelog-lib-messages.h @@ -11,277 +11,64 @@ #ifndef _CHANGELOG_LIB_MESSAGES_H_ #define _CHANGELOG_LIB_MESSAGES_H_ -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - -#include "glfs-message-id.h" - -/*! \file changelog-lib-messages.h - * \brief CHANGELOG_LIB log-message IDs and their descriptions. - */ - -/* NOTE: Rules for message additions - * 1) Each instance of a message is _better_ left with a unique message ID, even - * if the message format is the same. Reasoning is that, if the message - * format needs to change in one instance, the other instances are not - * impacted or the new change does not change the ID of the instance being - * modified. - * 2) Addition of a message, - * - Should increment the GLFS_NUM_MESSAGES - * - Append to the list of messages defined, towards the end - * - Retain macro naming as glfs_msg_X (for readability across developers) - * NOTE: Rules for message format modifications - * 3) Check acorss the code if the message ID macro in question is reused - * anywhere. If reused then then the modifications should ensure correctness - * everywhere, or needs a new message ID as (1) above was not adhered to. If - * not used anywhere, proceed with the required modification. - * NOTE: Rules for message deletion - * 4) Check (3) and if used anywhere else, then cannot be deleted. If not used - * anywhere, then can be deleted, but will leave a hole by design, as - * addition rules specify modification to the end of the list and not filling - * holes. - */ - -#define GLFS_COMP_BASE_CHANGELOG_LIB GLFS_MSGID_COMP_CHANGELOG_LIB -#define GLFS_NUM_MESSAGES 28 -#define GLFS_MSGID_END (GLFS_COMP_BASE_CHANGELOG_LIB + GLFS_NUM_MESSAGES + 1) - -#define glfs_msg_start_x GLFS_COMP_BASE_CHANGELOG_LIB,\ - "Invalid: Start of messages" - -/*! - * @messageid - * @diagnosis open/opendir failed on a brick. - * @recommended action Error number in the log should give the reason why it - * failed. Also observe brick logs for more information. +#include <glusterfs/glfs-message-id.h> + +/* To add new message IDs, append new identifiers at the end of the list. + * + * Never remove a message ID. If it's not used anymore, you can rename it or + * leave it as it is, but not delete it. This is to prevent reutilization of + * IDs by other messages. + * + * The component name must match one of the entries defined in + * glfs-message-id.h. */ -#define CHANGELOG_LIB_MSG_OPEN_FAILED (GLFS_COMP_BASE_CHANGELOG_LIB + 1) - -/*! - * @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_LIB_MSG_FAILED_TO_RMDIR (GLFS_COMP_BASE_CHANGELOG_LIB + 2) - -/*! - * @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_LIB_MSG_SCRATCH_DIR_ENTRIES_CREATION_ERROR \ -(GLFS_COMP_BASE_CHANGELOG_LIB + 3) - -/*! - * @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_LIB_MSG_THREAD_CREATION_FAILED \ - (GLFS_COMP_BASE_CHANGELOG_LIB + 4) - -/*! - * @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_LIB_MSG_OPENDIR_ERROR (GLFS_COMP_BASE_CHANGELOG_LIB + 5) - -/*! - * @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_LIB_MSG_RENAME_FAILED (GLFS_COMP_BASE_CHANGELOG_LIB + 6) - -/*! - * @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_LIB_MSG_READ_ERROR (GLFS_COMP_BASE_CHANGELOG_LIB + 7) - -/*! - * @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_LIB_MSG_HTIME_ERROR (GLFS_COMP_BASE_CHANGELOG_LIB + 8) - -/*! - * @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_LIB_MSG_GET_TIME_ERROR (GLFS_COMP_BASE_CHANGELOG_LIB + 9) - -/*! - * @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_LIB_MSG_WRITE_FAILED (GLFS_COMP_BASE_CHANGELOG_LIB + 10) - -/*! - * @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_LIB_MSG_PTHREAD_ERROR (GLFS_COMP_BASE_CHANGELOG_LIB + 11) - -/*! - * @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_LIB_MSG_MMAP_FAILED (GLFS_COMP_BASE_CHANGELOG_LIB + 12) - -/*! - * @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_LIB_MSG_MUNMAP_FAILED (GLFS_COMP_BASE_CHANGELOG_LIB + 13) - -/*! - * @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_LIB_MSG_ASCII_ERROR (GLFS_COMP_BASE_CHANGELOG_LIB + 14) - -/*! - * @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_LIB_MSG_STAT_FAILED (GLFS_COMP_BASE_CHANGELOG_LIB + 15) - -/*! - * @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_LIB_MSG_GET_XATTR_FAILED \ - (GLFS_COMP_BASE_CHANGELOG_LIB + 16) - -/*! - * @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_LIB_MSG_PUBLISH_ERROR (GLFS_COMP_BASE_CHANGELOG_LIB + 17) - -/*! - * @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_LIB_MSG_PARSE_ERROR (GLFS_COMP_BASE_CHANGELOG_LIB + 18) - -/*! - * @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_LIB_MSG_TOTAL_LOG_INFO (GLFS_COMP_BASE_CHANGELOG_LIB + 19) - -/*! - * @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_LIB_MSG_CLEANUP_ERROR (GLFS_COMP_BASE_CHANGELOG_LIB + 20) - -/*! - * @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_LIB_MSG_UNLINK_FAILED (GLFS_COMP_BASE_CHANGELOG_LIB + 21) - -/*! - @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_LIB_MSG_NOTIFY_REGISTER_FAILED\ - (GLFS_COMP_BASE_CHANGELOG_LIB + 22) - -/*! - @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_LIB_MSG_INVOKE_RPC_FAILED\ - (GLFS_COMP_BASE_CHANGELOG_LIB + 23) - -/*! - @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_LIB_MSG_DRAINING_EVENT_INFO\ - (GLFS_COMP_BASE_CHANGELOG_LIB + 24) - -/*! - @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_LIB_MSG_CLEANING_BRICK_ENTRY_INFO \ - (GLFS_COMP_BASE_CHANGELOG_LIB + 25) - -/*! - @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_LIB_MSG_FREEING_ENTRY_INFO \ - (GLFS_COMP_BASE_CHANGELOG_LIB + 26) - -/*! - @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_LIB_MSG_XDR_DECODING_FAILED \ - (GLFS_COMP_BASE_CHANGELOG_LIB + 27) - -/*! - @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_LIB_MSG_NOTIFY_REGISTER_INFO \ - (GLFS_COMP_BASE_CHANGELOG_LIB + 28) - -/*! - @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_LIB_MSG_THREAD_CLEANUP_WARNING \ - (GLFS_COMP_BASE_CHANGELOG_LIB + 29) - -/*! - @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_LIB_MSG_COPY_FROM_BUFFER_FAILED \ - (GLFS_COMP_BASE_CHANGELOG_LIB + 30) - -/*! - * @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_LIB_MSG_PTHREAD_JOIN_FAILED \ - (GLFS_COMP_BASE_CHANGELOG_LIB + 20) +GLFS_MSGID( + CHANGELOG_LIB, CHANGELOG_LIB_MSG_OPEN_FAILED, + CHANGELOG_LIB_MSG_FAILED_TO_RMDIR, + CHANGELOG_LIB_MSG_SCRATCH_DIR_ENTRIES_CREATION_ERROR, + CHANGELOG_LIB_MSG_THREAD_CREATION_FAILED, CHANGELOG_LIB_MSG_OPENDIR_ERROR, + CHANGELOG_LIB_MSG_RENAME_FAILED, CHANGELOG_LIB_MSG_READ_ERROR, + CHANGELOG_LIB_MSG_HTIME_ERROR, CHANGELOG_LIB_MSG_GET_TIME_ERROR, + CHANGELOG_LIB_MSG_WRITE_FAILED, CHANGELOG_LIB_MSG_PTHREAD_ERROR, + CHANGELOG_LIB_MSG_MMAP_FAILED, CHANGELOG_LIB_MSG_MUNMAP_FAILED, + CHANGELOG_LIB_MSG_ASCII_ERROR, CHANGELOG_LIB_MSG_STAT_FAILED, + CHANGELOG_LIB_MSG_GET_XATTR_FAILED, CHANGELOG_LIB_MSG_PUBLISH_ERROR, + CHANGELOG_LIB_MSG_PARSE_ERROR, CHANGELOG_LIB_MSG_MIN_MAX_INFO, + CHANGELOG_LIB_MSG_CLEANUP_ERROR, CHANGELOG_LIB_MSG_UNLINK_FAILED, + CHANGELOG_LIB_MSG_NOTIFY_REGISTER_FAILED, + CHANGELOG_LIB_MSG_INVOKE_RPC_FAILED, CHANGELOG_LIB_MSG_DRAINING_EVENT_INFO, + CHANGELOG_LIB_MSG_CLEANING_BRICK_ENTRY_INFO, + CHANGELOG_LIB_MSG_FREEING_ENTRY_INFO, CHANGELOG_LIB_MSG_XDR_DECODING_FAILED, + CHANGELOG_LIB_MSG_NOTIFY_REGISTER_INFO, + CHANGELOG_LIB_MSG_THREAD_CLEANUP_WARNING, + CHANGELOG_LIB_MSG_COPY_FROM_BUFFER_FAILED, + CHANGELOG_LIB_MSG_PTHREAD_JOIN_FAILED, CHANGELOG_LIB_MSG_HIST_FAILED, + CHANGELOG_LIB_MSG_DRAINED_EVENT_INFO, CHANGELOG_LIB_MSG_PARSE_ERROR_CEASED, + CHANGELOG_LIB_MSG_REQUESTING_INFO, CHANGELOG_LIB_MSG_FINAL_INFO); + +#define CHANGELOG_LIB_MSG_NOTIFY_REGISTER_INFO_STR "Registering brick" +#define CHANGELOG_LIB_MSG_RENAME_FAILED_STR "error moving changelog file" +#define CHANGELOG_LIB_MSG_OPEN_FAILED_STR "cannot open changelog file" +#define CHANGELOG_LIB_MSG_UNLINK_FAILED_STR "failed to unlink" +#define CHANGELOG_LIB_MSG_FAILED_TO_RMDIR_STR "failed to rmdir" +#define CHANGELOG_LIB_MSG_STAT_FAILED_STR "stat failed on changelog file" +#define CHANGELOG_LIB_MSG_PARSE_ERROR_STR "could not parse changelog" +#define CHANGELOG_LIB_MSG_PARSE_ERROR_CEASED_STR \ + "parsing error, ceased publishing..." +#define CHANGELOG_LIB_MSG_HTIME_ERROR_STR "fop failed on htime file" +#define CHANGELOG_LIB_MSG_GET_XATTR_FAILED_STR \ + "error extracting max timstamp from htime file" +#define CHANGELOG_LIB_MSG_MIN_MAX_INFO_STR "changelogs min max" +#define CHANGELOG_LIB_MSG_REQUESTING_INFO_STR "Requesting historical changelogs" +#define CHANGELOG_LIB_MSG_FINAL_INFO_STR "FINAL" +#define CHANGELOG_LIB_MSG_HIST_FAILED_STR \ + "Requested changelog range is not available" +#define CHANGELOG_LIB_MSG_GET_TIME_ERROR_STR "wrong result" +#define CHANGELOG_LIB_MSG_CLEANING_BRICK_ENTRY_INFO_STR \ + "Cleaning brick entry for brick" +#define CHANGELOG_LIB_MSG_DRAINING_EVENT_INFO_STR "Draining event" +#define CHANGELOG_LIB_MSG_DRAINED_EVENT_INFO_STR "Drained event" +#define CHANGELOG_LIB_MSG_FREEING_ENTRY_INFO_STR "freeing entry" -#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages" #endif /* !_CHANGELOG_MESSAGES_H_ */ diff --git a/xlators/features/changelog/lib/src/gf-changelog-api.c b/xlators/features/changelog/lib/src/gf-changelog-api.c index f41b505a749..81a5cbfec10 100644 --- a/xlators/features/changelog/lib/src/gf-changelog-api.c +++ b/xlators/features/changelog/lib/src/gf-changelog-api.c @@ -8,10 +8,10 @@ cases as published by the Free Software Foundation. */ -#include "compat-uuid.h" -#include "globals.h" -#include "glusterfs.h" -#include "syscall.h" +#include <glusterfs/compat-uuid.h> +#include <glusterfs/globals.h> +#include <glusterfs/glusterfs.h> +#include <glusterfs/syscall.h> #include "gf-changelog-helpers.h" #include "gf-changelog-journal.h" @@ -19,55 +19,54 @@ #include "changelog-lib-messages.h" int -gf_changelog_done (char *file) +gf_changelog_done(char *file) { - int ret = -1; - char *buffer = NULL; - xlator_t *this = NULL; - gf_changelog_journal_t *jnl = NULL; - char to_path[PATH_MAX] = {0,}; - - errno = EINVAL; - - this = THIS; - if (!this) - goto out; - - jnl = (gf_changelog_journal_t *) GF_CHANGELOG_GET_API_PTR (this); - if (!jnl) - goto out; - - if (!file || !strlen (file)) - goto out; - - /* make sure 'file' is inside ->jnl_working_dir */ - buffer = realpath (file, NULL); - if (!buffer) - goto out; - - if (strncmp (jnl->jnl_working_dir, - buffer, strlen (jnl->jnl_working_dir))) - goto out; - - (void) snprintf (to_path, PATH_MAX, "%s%s", - jnl->jnl_processed_dir, basename (buffer)); - gf_msg_debug (this->name, 0, - "moving %s to processed directory", file); - ret = sys_rename (buffer, to_path); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_LIB_MSG_RENAME_FAILED, - "cannot move %s to %s", - file, to_path); - goto out; - } - - ret = 0; + int ret = -1; + char *buffer = NULL; + xlator_t *this = NULL; + gf_changelog_journal_t *jnl = NULL; + char to_path[PATH_MAX] = { + 0, + }; + + errno = EINVAL; + + this = THIS; + if (!this) + goto out; + + jnl = (gf_changelog_journal_t *)GF_CHANGELOG_GET_API_PTR(this); + if (!jnl) + goto out; + + if (!file || !strlen(file)) + goto out; + + /* make sure 'file' is inside ->jnl_working_dir */ + buffer = realpath(file, NULL); + if (!buffer) + goto out; + + if (strncmp(jnl->jnl_working_dir, buffer, strlen(jnl->jnl_working_dir))) + goto out; + + (void)snprintf(to_path, PATH_MAX, "%s%s", jnl->jnl_processed_dir, + basename(buffer)); + gf_msg_debug(this->name, 0, "moving %s to processed directory", file); + ret = sys_rename(buffer, to_path); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_LIB_MSG_RENAME_FAILED, "from=%s", file, "to=%s", + to_path, NULL); + goto out; + } + + ret = 0; - out: - if (buffer) - free (buffer); /* allocated by realpath() */ - return ret; +out: + if (buffer) + free(buffer); /* allocated by realpath() */ + return ret; } /** @@ -75,28 +74,28 @@ gf_changelog_done (char *file) * for a set of changelogs, start from the beginning */ int -gf_changelog_start_fresh () +gf_changelog_start_fresh() { - xlator_t *this = NULL; - gf_changelog_journal_t *jnl = NULL; + xlator_t *this = NULL; + gf_changelog_journal_t *jnl = NULL; - this = THIS; - if (!this) - goto out; + this = THIS; + if (!this) + goto out; - errno = EINVAL; + errno = EINVAL; - jnl = (gf_changelog_journal_t *) GF_CHANGELOG_GET_API_PTR (this); - if (!jnl) - goto out; + jnl = (gf_changelog_journal_t *)GF_CHANGELOG_GET_API_PTR(this); + if (!jnl) + goto out; - if (gf_ftruncate (jnl->jnl_fd, 0)) - goto out; + if (gf_ftruncate(jnl->jnl_fd, 0)) + goto out; - return 0; + return 0; - out: - return -1; +out: + return -1; } /** @@ -105,40 +104,42 @@ gf_changelog_start_fresh () * consumed. */ ssize_t -gf_changelog_next_change (char *bufptr, size_t maxlen) +gf_changelog_next_change(char *bufptr, size_t maxlen) { - ssize_t size = -1; - int tracker_fd = 0; - xlator_t *this = NULL; - gf_changelog_journal_t *jnl = NULL; - char buffer[PATH_MAX] = {0,}; + ssize_t size = -1; + int tracker_fd = 0; + xlator_t *this = NULL; + gf_changelog_journal_t *jnl = NULL; + char buffer[PATH_MAX] = { + 0, + }; - errno = EINVAL; + errno = EINVAL; - this = THIS; - if (!this) - goto out; + this = THIS; + if (!this) + goto out; - jnl = (gf_changelog_journal_t *) GF_CHANGELOG_GET_API_PTR (this); - if (!jnl) - goto out; + jnl = (gf_changelog_journal_t *)GF_CHANGELOG_GET_API_PTR(this); + if (!jnl) + goto out; - tracker_fd = jnl->jnl_fd; + tracker_fd = jnl->jnl_fd; - size = gf_readline (tracker_fd, buffer, maxlen); - if (size < 0) { - size = -1; - goto out; - } + size = gf_readline(tracker_fd, buffer, maxlen); + if (size < 0) { + size = -1; + goto out; + } - if (size == 0) - goto out; + if (size == 0) + goto out; - memcpy (bufptr, buffer, size - 1); - bufptr[size - 1] = '\0'; + memcpy(bufptr, buffer, size - 1); + bufptr[size - 1] = '\0'; out: - return size; + return size; } /** @@ -150,79 +151,74 @@ out: * This call also acts as a cancellation point for the consumer. */ ssize_t -gf_changelog_scan () +gf_changelog_scan() { - int ret = 0; - int tracker_fd = 0; - size_t len = 0; - size_t off = 0; - xlator_t *this = NULL; - size_t nr_entries = 0; - gf_changelog_journal_t *jnl = NULL; - struct dirent *entryp = NULL; - struct dirent *result = NULL; - char buffer[PATH_MAX] = {0,}; - - this = THIS; - if (!this) - goto out; - - jnl = (gf_changelog_journal_t *) GF_CHANGELOG_GET_API_PTR (this); - if (!jnl) - goto out; - if (JNL_IS_API_DISCONNECTED (jnl)) { - errno = ENOTCONN; - goto out; + int tracker_fd = 0; + size_t off = 0; + xlator_t *this = NULL; + size_t nr_entries = 0; + gf_changelog_journal_t *jnl = NULL; + struct dirent *entry = NULL; + struct dirent scratch[2] = { + { + 0, + }, + }; + char buffer[PATH_MAX] = { + 0, + }; + + this = THIS; + if (!this) + goto out; + + jnl = (gf_changelog_journal_t *)GF_CHANGELOG_GET_API_PTR(this); + if (!jnl) + goto out; + if (JNL_IS_API_DISCONNECTED(jnl)) { + errno = ENOTCONN; + goto out; + } + + errno = EINVAL; + + tracker_fd = jnl->jnl_fd; + if (gf_ftruncate(tracker_fd, 0)) + goto out; + + rewinddir(jnl->jnl_dir); + + for (;;) { + errno = 0; + entry = sys_readdir(jnl->jnl_dir, scratch); + if (!entry || errno != 0) + break; + + if (!strcmp(basename(entry->d_name), ".") || + !strcmp(basename(entry->d_name), "..")) + continue; + + nr_entries++; + + GF_CHANGELOG_FILL_BUFFER(jnl->jnl_processing_dir, buffer, off, + strlen(jnl->jnl_processing_dir)); + GF_CHANGELOG_FILL_BUFFER(entry->d_name, buffer, off, + strlen(entry->d_name)); + GF_CHANGELOG_FILL_BUFFER("\n", buffer, off, 1); + + if (gf_changelog_write(tracker_fd, buffer, off) != off) { + gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_LIB_MSG_WRITE_FAILED, + "error writing changelog filename" + " to tracker file"); + break; } + off = 0; + } - errno = EINVAL; - - tracker_fd = jnl->jnl_fd; - if (gf_ftruncate (tracker_fd, 0)) - goto out; - - len = offsetof(struct dirent, d_name) - + pathconf(jnl->jnl_processing_dir, _PC_NAME_MAX) + 1; - entryp = GF_CALLOC (1, len, - gf_changelog_mt_libgfchangelog_dirent_t); - if (!entryp) - goto out; - - rewinddir (jnl->jnl_dir); - while (1) { - ret = readdir_r (jnl->jnl_dir, entryp, &result); - if (ret || !result) - break; - - if (!strcmp (basename (entryp->d_name), ".") - || !strcmp (basename (entryp->d_name), "..")) - continue; - - nr_entries++; - - GF_CHANGELOG_FILL_BUFFER (jnl->jnl_processing_dir, - buffer, off, - strlen (jnl->jnl_processing_dir)); - GF_CHANGELOG_FILL_BUFFER (entryp->d_name, buffer, - off, strlen (entryp->d_name)); - GF_CHANGELOG_FILL_BUFFER ("\n", buffer, off, 1); - - if (gf_changelog_write (tracker_fd, buffer, off) != off) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CHANGELOG_LIB_MSG_WRITE_FAILED, - "error writing changelog filename" - " to tracker file"); - break; - } - off = 0; - } - - GF_FREE (entryp); - - if (!result) { - if (gf_lseek (tracker_fd, 0, SEEK_SET) != -1) - return nr_entries; - } - out: - return -1; + if (!entry) { + if (gf_lseek(tracker_fd, 0, SEEK_SET) != -1) + return nr_entries; + } +out: + return -1; } diff --git a/xlators/features/changelog/lib/src/gf-changelog-helpers.c b/xlators/features/changelog/lib/src/gf-changelog-helpers.c index 8b35f4e9416..75f8a6dfc08 100644 --- a/xlators/features/changelog/lib/src/gf-changelog-helpers.c +++ b/xlators/features/changelog/lib/src/gf-changelog-helpers.c @@ -11,40 +11,36 @@ #include "changelog-mem-types.h" #include "gf-changelog-helpers.h" #include "changelog-lib-messages.h" -#include "syscall.h" - -ssize_t gf_changelog_read_path (int fd, char *buffer, size_t bufsize) -{ - return sys_read (fd, buffer, bufsize); -} +#include <glusterfs/syscall.h> size_t -gf_changelog_write (int fd, char *buffer, size_t len) +gf_changelog_write(int fd, char *buffer, size_t len) { - ssize_t size = 0; - size_t written = 0; + ssize_t size = 0; + size_t written = 0; - while (written < len) { - size = sys_write (fd, buffer + written, len - written); - if (size <= 0) - break; + while (written < len) { + size = sys_write(fd, buffer + written, len - written); + if (size <= 0) + break; - written += size; - } + written += size; + } - return written; + return written; } void -gf_rfc3986_encode (unsigned char *s, char *enc, char *estr) +gf_rfc3986_encode_space_newline(unsigned char *s, char *enc, char *estr) { - for (; *s; s++) { - if (estr[*s]) - sprintf(enc, "%c", estr[*s]); - else - sprintf(enc, "%%%02X", *s); - while (*++enc); - } + for (; *s; s++) { + if (estr[*s]) + sprintf(enc, "%c", estr[*s]); + else + sprintf(enc, "%%%02X", *s); + while (*++enc) + ; + } } /** @@ -57,163 +53,118 @@ gf_rfc3986_encode (unsigned char *s, char *enc, char *estr) * that can be done via @fflush(fp), @ftruncate(fd) and @fseek(fp), * but this involves mixing POSIX file descriptors and stream FILE *). * - * NOTE: This implmentation still does work with more than one fd's + * NOTE: This implementation still does work with more than one fd's * used to perform gf_readline(). For this very reason it's not * made a part of libglusterfs. */ -static pthread_key_t rl_key; -static pthread_once_t rl_once = PTHREAD_ONCE_INIT; - -static void -readline_destructor (void *ptr) -{ - GF_FREE (ptr); -} - -static void -readline_once (void) -{ - pthread_key_create (&rl_key, readline_destructor); -} +static __thread read_line_t thread_tsd = {}; static ssize_t -my_read (read_line_t *tsd, int fd, char *ptr) -{ - if (tsd->rl_cnt <= 0) { - tsd->rl_cnt = sys_read (fd, tsd->rl_buf, MAXLINE); - - if (tsd->rl_cnt < 0) - return -1; - else if (tsd->rl_cnt == 0) - return 0; - tsd->rl_bufptr = tsd->rl_buf; - } - - tsd->rl_cnt--; - *ptr = *tsd->rl_bufptr++; - return 1; -} - -static int -gf_readline_init_once (read_line_t **tsd) +my_read(read_line_t *tsd, int fd, char *ptr) { - if (pthread_once (&rl_once, readline_once) != 0) - return -1; + if (tsd->rl_cnt <= 0) { + tsd->rl_cnt = sys_read(fd, tsd->rl_buf, MAXLINE); - *tsd = pthread_getspecific (rl_key); - if (*tsd) - goto out; - - *tsd = GF_CALLOC (1, sizeof (**tsd), - gf_changelog_mt_libgfchangelog_rl_t); - if (!*tsd) - return -1; - - if (pthread_setspecific (rl_key, *tsd) != 0) - return -1; + if (tsd->rl_cnt < 0) + return -1; + else if (tsd->rl_cnt == 0) + return 0; + tsd->rl_bufptr = tsd->rl_buf; + } - out: - return 0; + tsd->rl_cnt--; + *ptr = *tsd->rl_bufptr++; + return 1; } ssize_t -gf_readline (int fd, void *vptr, size_t maxlen) +gf_readline(int fd, void *vptr, size_t maxlen) { - size_t n = 0; - size_t rc = 0; - char c = ' '; - char *ptr = NULL; - read_line_t *tsd = NULL; - - if (gf_readline_init_once (&tsd)) - return -1; - - ptr = vptr; - for (n = 1; n < maxlen; n++) { - if ( (rc = my_read (tsd, fd, &c)) == 1 ) { - *ptr++ = c; - if (c == '\n') - break; - } else if (rc == 0) { - *ptr = '\0'; - return (n - 1); - } else - return -1; - } - - *ptr = '\0'; - return n; - + size_t n = 0; + size_t rc = 0; + char c = ' '; + char *ptr = NULL; + read_line_t *tsd = &thread_tsd; + + ptr = vptr; + for (n = 1; n < maxlen; n++) { + if ((rc = my_read(tsd, fd, &c)) == 1) { + *ptr++ = c; + if (c == '\n') + break; + } else if (rc == 0) { + *ptr = '\0'; + return (n - 1); + } else + return -1; + } + + *ptr = '\0'; + return n; } off_t -gf_lseek (int fd, off_t offset, int whence) +gf_lseek(int fd, off_t offset, int whence) { - off_t off = 0; - read_line_t *tsd = NULL; + off_t off = 0; + read_line_t *tsd = &thread_tsd; - if (gf_readline_init_once (&tsd)) - return -1; + off = sys_lseek(fd, offset, whence); + if (off == -1) + return -1; - off = sys_lseek (fd, offset, whence); - if (off == -1) - return -1; + tsd->rl_cnt = 0; + tsd->rl_bufptr = tsd->rl_buf; - tsd->rl_cnt = 0; - tsd->rl_bufptr = tsd->rl_buf; - - return off; + return off; } int -gf_ftruncate (int fd, off_t length) +gf_ftruncate(int fd, off_t length) { - read_line_t *tsd = NULL; + read_line_t *tsd = &thread_tsd; - if (gf_readline_init_once (&tsd)) - return -1; + if (sys_ftruncate(fd, 0)) + return -1; - if (sys_ftruncate (fd, 0)) - return -1; + tsd->rl_cnt = 0; + tsd->rl_bufptr = tsd->rl_buf; - tsd->rl_cnt = 0; - tsd->rl_bufptr = tsd->rl_buf; - - return 0; + return 0; } int -gf_thread_cleanup (xlator_t *this, pthread_t thread) +gf_thread_cleanup(xlator_t *this, pthread_t thread) { - int ret = 0; - void *res = NULL; - - ret = pthread_cancel (thread); - if (ret != 0) { - gf_msg (this->name, GF_LOG_WARNING, 0, - CHANGELOG_LIB_MSG_THREAD_CLEANUP_WARNING, - "Failed to send cancellation to thread"); - goto error_return; - } - - ret = pthread_join (thread, &res); - if (ret != 0) { - gf_msg (this->name, GF_LOG_WARNING, 0, - CHANGELOG_LIB_MSG_THREAD_CLEANUP_WARNING, - "failed to join thread"); - goto error_return; - } - - if (res != PTHREAD_CANCELED) { - gf_msg (this->name, GF_LOG_WARNING, 0, - CHANGELOG_LIB_MSG_THREAD_CLEANUP_WARNING, - "Thread could not be cleaned up"); - goto error_return; - } - - return 0; - - error_return: - return -1; + int ret = 0; + void *res = NULL; + + ret = pthread_cancel(thread); + if (ret != 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, + CHANGELOG_LIB_MSG_THREAD_CLEANUP_WARNING, + "Failed to send cancellation to thread"); + goto error_return; + } + + ret = pthread_join(thread, &res); + if (ret != 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, + CHANGELOG_LIB_MSG_THREAD_CLEANUP_WARNING, + "failed to join thread"); + goto error_return; + } + + if (res != PTHREAD_CANCELED) { + gf_msg(this->name, GF_LOG_WARNING, 0, + CHANGELOG_LIB_MSG_THREAD_CLEANUP_WARNING, + "Thread could not be cleaned up"); + goto error_return; + } + + return 0; + +error_return: + return -1; } diff --git a/xlators/features/changelog/lib/src/gf-changelog-helpers.h b/xlators/features/changelog/lib/src/gf-changelog-helpers.h index bd21e4df035..9c609d33172 100644 --- a/xlators/features/changelog/lib/src/gf-changelog-helpers.h +++ b/xlators/features/changelog/lib/src/gf-changelog-helpers.h @@ -14,36 +14,37 @@ #include <unistd.h> #include <dirent.h> #include <limits.h> -#include "locking.h" +#include <glusterfs/locking.h> -#include <xlator.h> +#include <glusterfs/xlator.h> #include "changelog.h" #include "changelog-rpc-common.h" #include "gf-changelog-journal.h" -#define GF_CHANGELOG_TRACKER "tracker" +#define GF_CHANGELOG_TRACKER "tracker" -#define GF_CHANGELOG_CURRENT_DIR ".current" -#define GF_CHANGELOG_PROCESSED_DIR ".processed" +#define GF_CHANGELOG_CURRENT_DIR ".current" +#define GF_CHANGELOG_PROCESSED_DIR ".processed" #define GF_CHANGELOG_PROCESSING_DIR ".processing" -#define GF_CHANGELOG_HISTORY_DIR ".history" +#define GF_CHANGELOG_HISTORY_DIR ".history" #define TIMESTAMP_LENGTH 10 #ifndef MAXLINE #define MAXLINE 4096 #endif -#define GF_CHANGELOG_FILL_BUFFER(ptr, ascii, off, len) do { \ - memcpy (ascii + off, ptr, len); \ - off += len; \ - } while (0) +#define GF_CHANGELOG_FILL_BUFFER(ptr, ascii, off, len) \ + do { \ + memcpy(ascii + off, ptr, len); \ + off += len; \ + } while (0) typedef struct read_line { - int rl_cnt; - char *rl_bufptr; - char rl_buf[MAXLINE]; + int rl_cnt; + char *rl_bufptr; + char rl_buf[MAXLINE]; } read_line_t; struct gf_changelog; @@ -55,51 +56,50 @@ struct gf_event; * ->next_seq holds the next _expected_ sequence number. */ struct gf_event_list { - pthread_mutex_t lock; /* protects this structure */ - pthread_cond_t cond; + pthread_mutex_t lock; /* protects this structure */ + pthread_cond_t cond; - pthread_t invoker; + pthread_t invoker; - unsigned long next_seq; /* next sequence number expected: - zero during bootstrap */ + unsigned long next_seq; /* next sequence number expected: + zero during bootstrap */ - struct gf_changelog *entry; /* backpointer to it's brick - encapsulator (entry) */ - struct list_head events; /* list of events */ + struct gf_changelog *entry; /* backpointer to it's brick + encapsulator (entry) */ + struct list_head events; /* list of events */ }; /** * include a refcount if it's of use by additional layers */ struct gf_event { - int count; + int count; - unsigned long seq; + unsigned long seq; - struct list_head list; + struct list_head list; - struct iovec iov[0]; + struct iovec iov[0]; }; -#define GF_EVENT_CALLOC_SIZE(cnt, len) \ - (sizeof (struct gf_event) + (cnt * sizeof (struct iovec)) + len) +#define GF_EVENT_CALLOC_SIZE(cnt, len) \ + (sizeof(struct gf_event) + (cnt * sizeof(struct iovec)) + len) /** * assign the base address of the IO vector to the correct memory o * area and set it's addressable length. */ -#define GF_EVENT_ASSIGN_IOVEC(vec, event, len, pos) \ - do { \ - vec->iov_base = ((char *)event) + \ - sizeof (struct gf_event) + \ - (event->count * sizeof (struct iovec)) + pos; \ - vec->iov_len = len; \ - pos += len; \ - } while (0) +#define GF_EVENT_ASSIGN_IOVEC(vec, event, len, pos) \ + do { \ + vec->iov_base = ((char *)event) + sizeof(struct gf_event) + \ + (event->count * sizeof(struct iovec)) + pos; \ + vec->iov_len = len; \ + pos += len; \ + } while (0) typedef enum gf_changelog_conn_state { - GF_CHANGELOG_CONN_STATE_PENDING = 0, - GF_CHANGELOG_CONN_STATE_ACCEPTED, - GF_CHANGELOG_CONN_STATE_DISCONNECTED, + GF_CHANGELOG_CONN_STATE_PENDING = 0, + GF_CHANGELOG_CONN_STATE_ACCEPTED, + GF_CHANGELOG_CONN_STATE_DISCONNECTED, } gf_changelog_conn_state_t; /** @@ -107,153 +107,149 @@ typedef enum gf_changelog_conn_state { * notifications are streamed. */ typedef struct gf_changelog { - gf_lock_t statelock; - gf_changelog_conn_state_t connstate; + gf_lock_t statelock; + gf_changelog_conn_state_t connstate; - xlator_t *this; + xlator_t *this; - struct list_head list; /* list of instances */ + struct list_head list; /* list of instances */ - char brick[PATH_MAX]; /* brick path for this end-point */ + char brick[PATH_MAX]; /* brick path for this end-point */ - changelog_rpc_t grpc; /* rpc{-clnt,svc} for this brick */ -#define RPC_PROBER(ent) ent->grpc.rpc -#define RPC_REBORP(ent) ent->grpc.svc -#define RPC_SOCK(ent) ent->grpc.sock + changelog_rpc_t grpc; /* rpc{-clnt,svc} for this brick */ +#define RPC_PROBER(ent) ent->grpc.rpc +#define RPC_REBORP(ent) ent->grpc.svc +#define RPC_SOCK(ent) ent->grpc.sock - unsigned int notify; /* notification flag(s) */ + unsigned int notify; /* notification flag(s) */ - FINI *fini; /* destructor callback */ - CALLBACK *callback; /* event callback dispatcher */ - CONNECT *connected; /* connect callback */ - DISCONNECT *disconnected; /* disconnection callback */ + FINI *fini; /* destructor callback */ + CALLBACK *callback; /* event callback dispatcher */ + CONNECT *connected; /* connect callback */ + DISCONNECT *disconnected; /* disconnection callback */ - void *ptr; /* owner specific private data */ - xlator_t *invokerxl; /* consumers _this_, if valid, - assigned to THIS before cbk is - invoked */ + void *ptr; /* owner specific private data */ + xlator_t *invokerxl; /* consumers _this_, if valid, + assigned to THIS before cbk is + invoked */ - gf_boolean_t ordered; + gf_boolean_t ordered; - void (*queueevent) (struct gf_event_list *, struct gf_event *); - void (*pickevent) (struct gf_event_list *, struct gf_event **); + void (*queueevent)(struct gf_event_list *, struct gf_event *); + void (*pickevent)(struct gf_event_list *, struct gf_event **); - struct gf_event_list event; + struct gf_event_list event; } gf_changelog_t; static inline int -gf_changelog_filter_check (gf_changelog_t *entry, changelog_event_t *event) +gf_changelog_filter_check(gf_changelog_t *entry, changelog_event_t *event) { - if (event->ev_type & entry->notify) - return 1; - return 0; + if (event->ev_type & entry->notify) + return 1; + return 0; } -#define GF_NEED_ORDERED_EVENTS(ent) (ent->ordered == _gf_true) +#define GF_NEED_ORDERED_EVENTS(ent) (ent->ordered == _gf_true) /** private structure */ typedef struct gf_private { - pthread_mutex_t lock; /* protects ->connections, cleanups */ - pthread_cond_t cond; + pthread_mutex_t lock; /* protects ->connections, cleanups */ + pthread_cond_t cond; - void *api; /* pointer for API access */ + void *api; /* pointer for API access */ - pthread_t poller; /* event poller thread */ - pthread_t connectionjanitor; /* connection cleaner */ + pthread_t poller; /* event poller thread */ + pthread_t connectionjanitor; /* connection cleaner */ - struct list_head connections; /* list of connections */ - struct list_head cleanups; /* list of connection to be - cleaned up */ + struct list_head connections; /* list of connections */ + struct list_head cleanups; /* list of connection to be + cleaned up */ } gf_private_t; -#define GF_CHANGELOG_GET_API_PTR(this) (((gf_private_t *) this->private)->api) +#define GF_CHANGELOG_GET_API_PTR(this) (((gf_private_t *)this->private)->api) /** * upcall: invoke callback with _correct_ THIS */ -#define GF_CHANGELOG_INVOKE_CBK(this, cbk, brick, args ...) \ - do { \ - xlator_t *old_this = NULL; \ - xlator_t *invokerxl = NULL; \ - \ - invokerxl = entry->invokerxl; \ - old_this = this; \ - \ - if (invokerxl) { \ - THIS = invokerxl; \ - } \ - \ - cbk (invokerxl, brick, args); \ - THIS = old_this; \ - \ - } while (0) - -#define SAVE_THIS(xl) \ - do { \ - old_this = xl; \ - THIS = master; \ - } while (0) - -#define RESTORE_THIS() \ - do { \ - if (old_this) \ - THIS = old_this; \ - } while (0) +#define GF_CHANGELOG_INVOKE_CBK(this, cbk, brick, args...) \ + do { \ + xlator_t *old_this = NULL; \ + xlator_t *invokerxl = NULL; \ + \ + invokerxl = entry->invokerxl; \ + old_this = this; \ + \ + if (invokerxl) { \ + THIS = invokerxl; \ + } \ + \ + cbk(invokerxl, brick, args); \ + THIS = old_this; \ + \ + } while (0) + +#define SAVE_THIS(xl) \ + do { \ + old_this = xl; \ + THIS = master; \ + } while (0) + +#define RESTORE_THIS() \ + do { \ + if (old_this) \ + THIS = old_this; \ + } while (0) /** APIs and the rest */ void * -gf_changelog_process (void *data); - -ssize_t -gf_changelog_read_path (int fd, char *buffer, size_t bufsize); +gf_changelog_process(void *data); void -gf_rfc3986_encode (unsigned char *s, char *enc, char *estr); +gf_rfc3986_encode_space_newline(unsigned char *s, char *enc, char *estr); size_t -gf_changelog_write (int fd, char *buffer, size_t len); +gf_changelog_write(int fd, char *buffer, size_t len); ssize_t -gf_readline (int fd, void *vptr, size_t maxlen); +gf_readline(int fd, void *vptr, size_t maxlen); int -gf_ftruncate (int fd, off_t length); +gf_ftruncate(int fd, off_t length); off_t -gf_lseek (int fd, off_t offset, int whence); +gf_lseek(int fd, off_t offset, int whence); int -gf_changelog_consume (xlator_t *this, - gf_changelog_journal_t *jnl, - char *from_path, gf_boolean_t no_publish); +gf_changelog_consume(xlator_t *this, gf_changelog_journal_t *jnl, + char *from_path, gf_boolean_t no_publish); int -gf_changelog_publish (xlator_t *this, - gf_changelog_journal_t *jnl, char *from_path); +gf_changelog_publish(xlator_t *this, gf_changelog_journal_t *jnl, + char *from_path); int -gf_thread_cleanup (xlator_t *this, pthread_t thread); +gf_thread_cleanup(xlator_t *this, pthread_t thread); void * -gf_changelog_callback_invoker (void *arg); +gf_changelog_callback_invoker(void *arg); int -gf_cleanup_event (xlator_t *, struct gf_event_list *); +gf_cleanup_event(xlator_t *, struct gf_event_list *); /* (un)ordered event queueing */ void -queue_ordered_event (struct gf_event_list *, struct gf_event *); +queue_ordered_event(struct gf_event_list *, struct gf_event *); void -queue_unordered_event (struct gf_event_list *, struct gf_event *); +queue_unordered_event(struct gf_event_list *, struct gf_event *); /* (un)ordered event picking */ void -pick_event_ordered (struct gf_event_list *, struct gf_event **); +pick_event_ordered(struct gf_event_list *, struct gf_event **); void -pick_event_unordered (struct gf_event_list *, struct gf_event **); +pick_event_unordered(struct gf_event_list *, struct gf_event **); /* connection janitor thread */ void * -gf_changelog_connection_janitor (void *); +gf_changelog_connection_janitor(void *); #endif diff --git a/xlators/features/changelog/lib/src/gf-changelog-journal-handler.c b/xlators/features/changelog/lib/src/gf-changelog-journal-handler.c index 6ea7cac88da..7f6e2329e71 100644 --- a/xlators/features/changelog/lib/src/gf-changelog-journal-handler.c +++ b/xlators/features/changelog/lib/src/gf-changelog-journal-handler.c @@ -8,11 +8,11 @@ cases as published by the Free Software Foundation. */ -#include "compat-uuid.h" -#include "globals.h" -#include "glusterfs.h" -#include "syscall.h" -#include "compat-errno.h" +#include <glusterfs/compat-uuid.h> +#include <glusterfs/globals.h> +#include <glusterfs/glusterfs.h> +#include <glusterfs/syscall.h> +#include <glusterfs/compat-errno.h> #include "gf-changelog-helpers.h" @@ -25,112 +25,107 @@ extern int byebye; -enum changelog_versions { - VERSION_1_1 = 0, - VERSION_1_2 = 1 -}; +enum changelog_versions { VERSION_1_1 = 0, VERSION_1_2 = 1 }; /** * number of gfid records after fop number */ -int nr_gfids[2][GF_FOP_MAXVALUE] = { - { - [GF_FOP_MKNOD] = 1, - [GF_FOP_MKDIR] = 1, - [GF_FOP_UNLINK] = 1, - [GF_FOP_RMDIR] = 1, - [GF_FOP_SYMLINK] = 1, - [GF_FOP_RENAME] = 2, - [GF_FOP_LINK] = 1, - [GF_FOP_CREATE] = 1, - }, - { - [GF_FOP_MKNOD] = 1, - [GF_FOP_MKDIR] = 1, - [GF_FOP_UNLINK] = 2, - [GF_FOP_RMDIR] = 2, - [GF_FOP_SYMLINK] = 1, - [GF_FOP_RENAME] = 2, - [GF_FOP_LINK] = 1, - [GF_FOP_CREATE] = 1, - } -}; - -int nr_extra_recs[2][GF_FOP_MAXVALUE] = { - { - [GF_FOP_MKNOD] = 3, - [GF_FOP_MKDIR] = 3, - [GF_FOP_UNLINK] = 0, - [GF_FOP_RMDIR] = 0, - [GF_FOP_SYMLINK] = 0, - [GF_FOP_RENAME] = 0, - [GF_FOP_LINK] = 0, - [GF_FOP_CREATE] = 3, - }, - { - [GF_FOP_MKNOD] = 3, - [GF_FOP_MKDIR] = 3, - [GF_FOP_UNLINK] = 0, - [GF_FOP_RMDIR] = 0, - [GF_FOP_SYMLINK] = 0, - [GF_FOP_RENAME] = 0, - [GF_FOP_LINK] = 0, - [GF_FOP_CREATE] = 3, - } -}; +int nr_gfids[2][GF_FOP_MAXVALUE] = {{ + [GF_FOP_MKNOD] = 1, + [GF_FOP_MKDIR] = 1, + [GF_FOP_UNLINK] = 1, + [GF_FOP_RMDIR] = 1, + [GF_FOP_SYMLINK] = 1, + [GF_FOP_RENAME] = 2, + [GF_FOP_LINK] = 1, + [GF_FOP_CREATE] = 1, + }, + { + [GF_FOP_MKNOD] = 1, + [GF_FOP_MKDIR] = 1, + [GF_FOP_UNLINK] = 2, + [GF_FOP_RMDIR] = 2, + [GF_FOP_SYMLINK] = 1, + [GF_FOP_RENAME] = 2, + [GF_FOP_LINK] = 1, + [GF_FOP_CREATE] = 1, + }}; + +int nr_extra_recs[2][GF_FOP_MAXVALUE] = {{ + [GF_FOP_MKNOD] = 3, + [GF_FOP_MKDIR] = 3, + [GF_FOP_UNLINK] = 0, + [GF_FOP_RMDIR] = 0, + [GF_FOP_SYMLINK] = 0, + [GF_FOP_RENAME] = 0, + [GF_FOP_LINK] = 0, + [GF_FOP_CREATE] = 3, + }, + { + [GF_FOP_MKNOD] = 3, + [GF_FOP_MKDIR] = 3, + [GF_FOP_UNLINK] = 0, + [GF_FOP_RMDIR] = 0, + [GF_FOP_SYMLINK] = 0, + [GF_FOP_RENAME] = 0, + [GF_FOP_LINK] = 0, + [GF_FOP_CREATE] = 3, + }}; static char * -binary_to_ascii (uuid_t uuid) +binary_to_ascii(uuid_t uuid) { - return uuid_utoa (uuid); + return uuid_utoa(uuid); } static char * -conv_noop (char *ptr) { return ptr; } - -#define VERIFY_SEPARATOR(ptr, plen, perr) \ - { \ - if (*(ptr + plen) != '\0') { \ - perr = 1; \ - break; \ - } \ - } +conv_noop(char *ptr) +{ + return ptr; +} -#define MOVER_MOVE(mover, nleft, bytes) \ - { \ - mover += bytes; \ - nleft -= bytes; \ - } \ - -#define PARSE_GFID(mov, ptr, le, fn, perr) \ - { \ - VERIFY_SEPARATOR (mov, le, perr); \ - ptr = fn (mov); \ - if (!ptr) { \ - perr = 1; \ - break; \ - } \ - } +#define VERIFY_SEPARATOR(ptr, plen, perr) \ + { \ + if (*(ptr + plen) != '\0') { \ + perr = 1; \ + break; \ + } \ + } -#define FILL_AND_MOVE(pt, buf, of, mo, nl, le) \ - { \ - GF_CHANGELOG_FILL_BUFFER (pt, buf, of, strlen (pt)); \ - MOVER_MOVE (mo, nl, le); \ - } +#define MOVER_MOVE(mover, nleft, bytes) \ + { \ + mover += bytes; \ + nleft -= bytes; \ + } + +#define PARSE_GFID(mov, ptr, le, fn, perr) \ + { \ + VERIFY_SEPARATOR(mov, le, perr); \ + ptr = fn(mov); \ + if (!ptr) { \ + perr = 1; \ + break; \ + } \ + } +#define FILL_AND_MOVE(pt, buf, of, mo, nl, le) \ + { \ + GF_CHANGELOG_FILL_BUFFER(pt, buf, of, strlen(pt)); \ + MOVER_MOVE(mo, nl, le); \ + } -#define PARSE_GFID_MOVE(ptr, uuid, mover, nleft, perr) \ - { \ - memcpy (uuid, mover, sizeof (uuid_t)); \ - ptr = binary_to_ascii (uuid); \ - if (!ptr) { \ - perr = 1; \ - break; \ - } \ - MOVER_MOVE (mover, nleft, sizeof (uuid_t)); \ - } \ +#define PARSE_GFID_MOVE(ptr, uuid, mover, nleft, perr) \ + { \ + memcpy(uuid, mover, sizeof(uuid_t)); \ + ptr = binary_to_ascii(uuid); \ + if (!ptr) { \ + perr = 1; \ + break; \ + } \ + MOVER_MOVE(mover, nleft, sizeof(uuid_t)); \ + } -#define LINE_BUFSIZE (3*PATH_MAX) /* enough buffer for extra chars too */ +#define LINE_BUFSIZE (3 * PATH_MAX) /* enough buffer for extra chars too */ /** * using mmap() makes parsing easy. fgets() cannot be used here as @@ -145,107 +140,107 @@ conv_noop (char *ptr) { return ptr; } */ static int -gf_changelog_parse_binary (xlator_t *this, - gf_changelog_journal_t *jnl, - int from_fd, int to_fd, - size_t start_offset, struct stat *stbuf, - int version_idx) +gf_changelog_parse_binary(xlator_t *this, gf_changelog_journal_t *jnl, + int from_fd, int to_fd, size_t start_offset, + struct stat *stbuf, int version_idx) { - int ret = -1; - off_t off = 0; - off_t nleft = 0; - uuid_t uuid = {0,}; - char *ptr = NULL; - char *bname_start = NULL; - char *bname_end = NULL; - char *mover = NULL; - void *start = NULL; - char current_mover = ' '; - size_t blen = 0; - int parse_err = 0; - char ascii[LINE_BUFSIZE] = {0,}; - - nleft = stbuf->st_size; - - start = mmap (NULL, nleft, PROT_READ, MAP_PRIVATE, from_fd, 0); - if (start == MAP_FAILED) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_LIB_MSG_MMAP_FAILED, - "mmap() error"); - goto out; - } - - mover = start; - - MOVER_MOVE (mover, nleft, start_offset); - - while (nleft > 0) { + int ret = -1; + off_t off = 0; + off_t nleft = 0; + uuid_t uuid = { + 0, + }; + char *ptr = NULL; + char *bname_start = NULL; + char *bname_end = NULL; + char *mover = NULL; + void *start = NULL; + char current_mover = ' '; + size_t blen = 0; + int parse_err = 0; + char *ascii = NULL; + + ascii = GF_CALLOC(LINE_BUFSIZE, sizeof(char), gf_common_mt_char); + + nleft = stbuf->st_size; + + start = mmap(NULL, nleft, PROT_READ, MAP_PRIVATE, from_fd, 0); + if (start == MAP_FAILED) { + gf_msg(this->name, GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_MMAP_FAILED, + "mmap() error"); + goto out; + } - off = blen = 0; - ptr = bname_start = bname_end = NULL; + mover = start; - current_mover = *mover; + MOVER_MOVE(mover, nleft, start_offset); - switch (current_mover) { - case 'D': - case 'M': - MOVER_MOVE (mover, nleft, 1); - PARSE_GFID_MOVE (ptr, uuid, mover, nleft, parse_err); + while (nleft > 0) { + off = blen = 0; + ptr = bname_start = bname_end = NULL; - break; + current_mover = *mover; - case 'E': - MOVER_MOVE (mover, nleft, 1); - PARSE_GFID_MOVE (ptr, uuid, mover, nleft, parse_err); + switch (current_mover) { + case 'D': + case 'M': + MOVER_MOVE(mover, nleft, 1); + PARSE_GFID_MOVE(ptr, uuid, mover, nleft, parse_err); - bname_start = mover; - bname_end = strchr (mover, '\n'); - if (bname_end == NULL) { - parse_err = 1; - break; - } - - blen = bname_end - bname_start; - MOVER_MOVE (mover, nleft, blen); + break; - break; + case 'E': + MOVER_MOVE(mover, nleft, 1); + PARSE_GFID_MOVE(ptr, uuid, mover, nleft, parse_err); - default: - parse_err = 1; + bname_start = mover; + bname_end = strchr(mover, '\n'); + if (bname_end == NULL) { + parse_err = 1; + break; } - if (parse_err) - break; + blen = bname_end - bname_start; + MOVER_MOVE(mover, nleft, blen); - GF_CHANGELOG_FILL_BUFFER (¤t_mover, ascii, off, 1); - GF_CHANGELOG_FILL_BUFFER (" ", ascii, off, 1); - GF_CHANGELOG_FILL_BUFFER (ptr, ascii, off, strlen (ptr)); - if (blen) - GF_CHANGELOG_FILL_BUFFER (bname_start, - ascii, off, blen); - GF_CHANGELOG_FILL_BUFFER ("\n", ascii, off, 1); - - if (gf_changelog_write (to_fd, ascii, off) != off) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_LIB_MSG_ASCII_ERROR, - "processing binary changelog failed due to " - " error in writing ascii change"); - break; - } + break; + + default: + parse_err = 1; + } - MOVER_MOVE (mover, nleft, 1); + if (parse_err) + break; + + GF_CHANGELOG_FILL_BUFFER(¤t_mover, ascii, off, 1); + GF_CHANGELOG_FILL_BUFFER(" ", ascii, off, 1); + GF_CHANGELOG_FILL_BUFFER(ptr, ascii, off, strlen(ptr)); + if (blen) + GF_CHANGELOG_FILL_BUFFER(bname_start, ascii, off, blen); + GF_CHANGELOG_FILL_BUFFER("\n", ascii, off, 1); + + if (gf_changelog_write(to_fd, ascii, off) != off) { + gf_msg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_LIB_MSG_ASCII_ERROR, + "processing binary changelog failed due to " + " error in writing ascii change"); + break; } - if ((nleft == 0) && (!parse_err)) - ret = 0; + MOVER_MOVE(mover, nleft, 1); + } + + if ((nleft == 0) && (!parse_err)) + ret = 0; - if (munmap (start, stbuf->st_size)) - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_LIB_MSG_MUNMAP_FAILED, - "munmap() error"); - out: - return ret; + if (munmap(start, stbuf->st_size)) + gf_msg(this->name, GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_MUNMAP_FAILED, + "munmap() error"); +out: + if (ascii) + GF_FREE(ascii); + return ret; } /** @@ -254,812 +249,781 @@ gf_changelog_parse_binary (xlator_t *this, * - use fop name rather than fop number */ static int -gf_changelog_parse_ascii (xlator_t *this, - gf_changelog_journal_t *jnl, - int from_fd, int to_fd, - size_t start_offset, struct stat *stbuf, - int version_idx) +gf_changelog_parse_ascii(xlator_t *this, gf_changelog_journal_t *jnl, + int from_fd, int to_fd, size_t start_offset, + struct stat *stbuf, int version_idx) { - int ng = 0; - int ret = -1; - int fop = 0; - int len = 0; - off_t off = 0; - off_t nleft = 0; - char *ptr = NULL; - char *eptr = NULL; - void *start = NULL; - char *mover = NULL; - int parse_err = 0; - char current_mover = ' '; - char ascii[LINE_BUFSIZE] = {0,}; - const char *fopname = NULL; - - nleft = stbuf->st_size; - - start = mmap (NULL, nleft, PROT_READ, MAP_PRIVATE, from_fd, 0); - if (start == MAP_FAILED) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_LIB_MSG_MMAP_FAILED, - "mmap() error"); - goto out; - } + int ng = 0; + int ret = -1; + int fop = 0; + int len = 0; + off_t off = 0; + off_t nleft = 0; + char *ptr = NULL; + char *eptr = NULL; + void *start = NULL; + char *mover = NULL; + int parse_err = 0; + char current_mover = ' '; + char *ascii = NULL; + const char *fopname = NULL; + + ascii = GF_CALLOC(LINE_BUFSIZE, sizeof(char), gf_common_mt_char); + + nleft = stbuf->st_size; + + start = mmap(NULL, nleft, PROT_READ, MAP_PRIVATE, from_fd, 0); + if (start == MAP_FAILED) { + gf_msg(this->name, GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_MMAP_FAILED, + "mmap() error"); + goto out; + } - mover = start; + mover = start; - MOVER_MOVE (mover, nleft, start_offset); + MOVER_MOVE(mover, nleft, start_offset); - while (nleft > 0) { - off = 0; - current_mover = *mover; + while (nleft > 0) { + off = 0; + current_mover = *mover; - GF_CHANGELOG_FILL_BUFFER (¤t_mover, ascii, off, 1); - GF_CHANGELOG_FILL_BUFFER (" ", ascii, off, 1); + GF_CHANGELOG_FILL_BUFFER(¤t_mover, ascii, off, 1); + GF_CHANGELOG_FILL_BUFFER(" ", ascii, off, 1); - switch (current_mover) { - case 'D': - MOVER_MOVE (mover, nleft, 1); + switch (current_mover) { + case 'D': + MOVER_MOVE(mover, nleft, 1); - /* target gfid */ - PARSE_GFID (mover, ptr, UUID_CANONICAL_FORM_LEN, - conv_noop, parse_err); - FILL_AND_MOVE(ptr, ascii, off, - mover, nleft, UUID_CANONICAL_FORM_LEN); - break; - case 'M': - MOVER_MOVE (mover, nleft, 1); + /* target gfid */ + PARSE_GFID(mover, ptr, UUID_CANONICAL_FORM_LEN, conv_noop, + parse_err); + FILL_AND_MOVE(ptr, ascii, off, mover, nleft, + UUID_CANONICAL_FORM_LEN); + break; + case 'M': + MOVER_MOVE(mover, nleft, 1); + + /* target gfid */ + PARSE_GFID(mover, ptr, UUID_CANONICAL_FORM_LEN, conv_noop, + parse_err); + FILL_AND_MOVE(ptr, ascii, off, mover, nleft, + UUID_CANONICAL_FORM_LEN); + FILL_AND_MOVE(" ", ascii, off, mover, nleft, 1); + + /* fop */ + len = strlen(mover); + VERIFY_SEPARATOR(mover, len, parse_err); + + fop = atoi(mover); + fopname = gf_fop_list[fop]; + if (fopname == NULL) { + parse_err = 1; + break; + } - /* target gfid */ - PARSE_GFID (mover, ptr, UUID_CANONICAL_FORM_LEN, - conv_noop, parse_err); - FILL_AND_MOVE (ptr, ascii, off, - mover, nleft, UUID_CANONICAL_FORM_LEN); - FILL_AND_MOVE (" ", ascii, off, mover, nleft, 1); + MOVER_MOVE(mover, nleft, len); - /* fop */ - len = strlen (mover); - VERIFY_SEPARATOR (mover, len, parse_err); + len = strlen(fopname); + GF_CHANGELOG_FILL_BUFFER(fopname, ascii, off, len); - fop = atoi (mover); - fopname = gf_fop_list[fop]; - if (fopname == NULL) { - parse_err = 1; - break; - } + break; - MOVER_MOVE (mover, nleft, len); + case 'E': + MOVER_MOVE(mover, nleft, 1); + + /* target gfid */ + PARSE_GFID(mover, ptr, UUID_CANONICAL_FORM_LEN, conv_noop, + parse_err); + FILL_AND_MOVE(ptr, ascii, off, mover, nleft, + UUID_CANONICAL_FORM_LEN); + FILL_AND_MOVE(" ", ascii, off, mover, nleft, 1); + + /* fop */ + len = strlen(mover); + VERIFY_SEPARATOR(mover, len, parse_err); + + fop = atoi(mover); + fopname = gf_fop_list[fop]; + if (fopname == NULL) { + parse_err = 1; + break; + } - len = strlen (fopname); - GF_CHANGELOG_FILL_BUFFER (fopname, ascii, off, len); + MOVER_MOVE(mover, nleft, len); - break; + len = strlen(fopname); + GF_CHANGELOG_FILL_BUFFER(fopname, ascii, off, len); - case 'E': - MOVER_MOVE (mover, nleft, 1); - - /* target gfid */ - PARSE_GFID (mover, ptr, UUID_CANONICAL_FORM_LEN, - conv_noop, parse_err); - FILL_AND_MOVE (ptr, ascii, off, - mover, nleft, UUID_CANONICAL_FORM_LEN); - FILL_AND_MOVE (" ", ascii, off, - mover, nleft, 1); - - /* fop */ - len = strlen (mover); - VERIFY_SEPARATOR (mover, len, parse_err); - - fop = atoi (mover); - fopname = gf_fop_list[fop]; - if (fopname == NULL) { - parse_err = 1; - break; - } - - MOVER_MOVE (mover, nleft, len); - - len = strlen (fopname); - GF_CHANGELOG_FILL_BUFFER (fopname, ascii, off, len); - - ng = nr_extra_recs[version_idx][fop]; - for (; ng > 0; ng--) { - MOVER_MOVE (mover, nleft, 1); - len = strlen (mover); - VERIFY_SEPARATOR (mover, len, parse_err); - - GF_CHANGELOG_FILL_BUFFER (" ", ascii, off, 1); - FILL_AND_MOVE (mover, ascii, - off, mover, nleft, len); - } - - /* pargfid + bname */ - ng = nr_gfids[version_idx][fop]; - while (ng-- > 0) { - MOVER_MOVE (mover, nleft, 1); - len = strlen (mover); - if (!len) { - MOVER_MOVE (mover, nleft, 1); - continue; - } - - GF_CHANGELOG_FILL_BUFFER (" ", ascii, off, 1); - - PARSE_GFID (mover, ptr, len, - conv_noop, parse_err); - eptr = calloc (3, strlen (ptr)); - if (!eptr) { - parse_err = 1; - break; - } - - gf_rfc3986_encode ((unsigned char *) ptr, - eptr, jnl->rfc3986); - FILL_AND_MOVE (eptr, ascii, off, - mover, nleft, len); - free (eptr); - } + ng = nr_extra_recs[version_idx][fop]; + for (; ng > 0; ng--) { + MOVER_MOVE(mover, nleft, 1); + len = strlen(mover); + VERIFY_SEPARATOR(mover, len, parse_err); - break; - default: - parse_err = 1; + GF_CHANGELOG_FILL_BUFFER(" ", ascii, off, 1); + FILL_AND_MOVE(mover, ascii, off, mover, nleft, len); } - if (parse_err) + /* pargfid + bname */ + ng = nr_gfids[version_idx][fop]; + while (ng-- > 0) { + MOVER_MOVE(mover, nleft, 1); + len = strlen(mover); + if (!len) { + MOVER_MOVE(mover, nleft, 1); + continue; + } + + GF_CHANGELOG_FILL_BUFFER(" ", ascii, off, 1); + + PARSE_GFID(mover, ptr, len, conv_noop, parse_err); + eptr = calloc(3, strlen(ptr)); + if (!eptr) { + parse_err = 1; break; + } - GF_CHANGELOG_FILL_BUFFER ("\n", ascii, off, 1); - - if (gf_changelog_write (to_fd, ascii, off) != off) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_LIB_MSG_ASCII_ERROR, - "processing ascii changelog failed due to " - " error in writing change"); - break; + gf_rfc3986_encode_space_newline((unsigned char *)ptr, eptr, + jnl->rfc3986_space_newline); + FILL_AND_MOVE(eptr, ascii, off, mover, nleft, len); + free(eptr); } - MOVER_MOVE (mover, nleft, 1); - + break; + default: + parse_err = 1; } - if ((nleft == 0) && (!parse_err)) - ret = 0; + if (parse_err) + break; - if (munmap (start, stbuf->st_size)) - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_LIB_MSG_MUNMAP_FAILED, - "munmap() error"); + GF_CHANGELOG_FILL_BUFFER("\n", ascii, off, 1); - out: - return ret; -} + if (gf_changelog_write(to_fd, ascii, off) != off) { + gf_msg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_LIB_MSG_ASCII_ERROR, + "processing ascii changelog failed due to " + " error in writing change"); + break; + } -#define COPY_BUFSIZE 8192 -static int -gf_changelog_copy (xlator_t *this, int from_fd, int to_fd) -{ - ssize_t size = 0; - char buffer[COPY_BUFSIZE+1] = {0,}; + MOVER_MOVE(mover, nleft, 1); + } - while (1) { - size = sys_read (from_fd, buffer, COPY_BUFSIZE); - if (size <= 0) - break; + if ((nleft == 0) && (!parse_err)) + ret = 0; - if (gf_changelog_write (to_fd, - buffer, size) != size) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CHANGELOG_LIB_MSG_COPY_FROM_BUFFER_FAILED, - "error processing ascii changlog"); - size = -1; - break; - } - } + if (munmap(start, stbuf->st_size)) + gf_msg(this->name, GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_MUNMAP_FAILED, + "munmap() error"); - return (size < 0 ? -1 : 0); +out: + if (ascii) + GF_FREE(ascii); + + return ret; } static int -gf_changelog_decode (xlator_t *this, gf_changelog_journal_t *jnl, - int from_fd, int to_fd, struct stat *stbuf, int *zerob) +gf_changelog_decode(xlator_t *this, gf_changelog_journal_t *jnl, int from_fd, + int to_fd, struct stat *stbuf, int *zerob) { - int ret = -1; - int encoding = -1; - int major_version = -1; - int minor_version = -1; - int version_idx = -1; - size_t elen = 0; - char buffer[1024] = {0,}; - - CHANGELOG_GET_HEADER_INFO (from_fd, buffer, 1024, encoding, - major_version, minor_version, elen); - if (encoding == -1) /* unknown encoding */ - goto out; - - if (major_version == -1) /* unknown major version */ - goto out; - - if (minor_version == -1) /* unknown minor version */ - goto out; - - if (!CHANGELOG_VALID_ENCODING (encoding)) - goto out; - - if (elen == stbuf->st_size) { - *zerob = 1; - goto out; - } - - if (major_version == 1 && minor_version == 1) { - version_idx = VERSION_1_1; - } else if (major_version == 1 && minor_version == 2) { - version_idx = VERSION_1_2; - } + int ret = -1; + int encoding = -1; + int major_version = -1; + int minor_version = -1; + int version_idx = -1; + size_t elen = 0; + char buffer[1024] = { + 0, + }; + + CHANGELOG_GET_HEADER_INFO(from_fd, buffer, sizeof(buffer), encoding, + major_version, minor_version, elen); + if (encoding == -1) /* unknown encoding */ + goto out; + + if (major_version == -1) /* unknown major version */ + goto out; + + if (minor_version == -1) /* unknown minor version */ + goto out; + + if (!CHANGELOG_VALID_ENCODING(encoding)) + goto out; + + if (elen == stbuf->st_size) { + *zerob = 1; + goto out; + } - if (version_idx == -1) /* unknown version number */ - goto out; + if (major_version == 1 && minor_version == 1) { + version_idx = VERSION_1_1; + } else if (major_version == 1 && minor_version == 2) { + version_idx = VERSION_1_2; + } - /** - * start processing after the header - */ - sys_lseek (from_fd, elen, SEEK_SET); + if (version_idx == -1) /* unknown version number */ + goto out; - switch (encoding) { + /** + * start processing after the header + */ + if (sys_lseek(from_fd, elen, SEEK_SET) < 0) { + goto out; + } + switch (encoding) { case CHANGELOG_ENCODE_BINARY: - /** - * this ideally should have been a part of changelog-encoders.c - * (ie. part of the changelog translator). - */ - ret = gf_changelog_parse_binary (this, jnl, from_fd, - to_fd, elen, stbuf, - version_idx); - break; + /** + * this ideally should have been a part of changelog-encoders.c + * (ie. part of the changelog translator). + */ + ret = gf_changelog_parse_binary(this, jnl, from_fd, to_fd, elen, + stbuf, version_idx); + break; case CHANGELOG_ENCODE_ASCII: - ret = gf_changelog_parse_ascii (this, jnl, from_fd, - to_fd, elen, stbuf, - version_idx); - break; - default: - ret = gf_changelog_copy (this, from_fd, to_fd); - } + ret = gf_changelog_parse_ascii(this, jnl, from_fd, to_fd, elen, + stbuf, version_idx); + break; + } - out: - return ret; +out: + return ret; } int -gf_changelog_publish (xlator_t *this, - gf_changelog_journal_t *jnl, char *from_path) +gf_changelog_publish(xlator_t *this, gf_changelog_journal_t *jnl, + char *from_path) { - int ret = 0; - char dest[PATH_MAX] = {0,}; - char to_path[PATH_MAX] = {0,}; - struct stat stbuf = {0,}; - - (void) snprintf (to_path, PATH_MAX, "%s%s", - jnl->jnl_current_dir, basename (from_path)); - - /* handle zerob file that wont exist in current */ - ret = sys_stat (to_path, &stbuf); - if (ret) { - if (errno == ENOENT) - ret = 0; - goto out; - } + int ret = 0; + char dest[PATH_MAX] = { + 0, + }; + char to_path[PATH_MAX] = { + 0, + }; + struct stat stbuf = { + 0, + }; + + if (snprintf(to_path, PATH_MAX, "%s%s", jnl->jnl_current_dir, + basename(from_path)) >= PATH_MAX) + return -1; - (void) snprintf (dest, PATH_MAX, "%s%s", - jnl->jnl_processing_dir, basename (from_path)); + /* handle zerob file that won't exist in current */ + ret = sys_stat(to_path, &stbuf); + if (ret) { + if (errno == ENOENT) + ret = 0; + goto out; + } - ret = sys_rename (to_path, dest); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_LIB_MSG_RENAME_FAILED, - "error moving %s to processing dir", - to_path); - } + if (snprintf(dest, PATH_MAX, "%s%s", jnl->jnl_processing_dir, + basename(from_path)) >= PATH_MAX) + return -1; + + ret = sys_rename(to_path, dest); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_LIB_MSG_RENAME_FAILED, "from=%s", to_path, "to=%s", + dest, NULL); + } out: - return ret; + return ret; } int -gf_changelog_consume (xlator_t *this, - gf_changelog_journal_t *jnl, - char *from_path, gf_boolean_t no_publish) +gf_changelog_consume(xlator_t *this, gf_changelog_journal_t *jnl, + char *from_path, gf_boolean_t no_publish) { - int ret = -1; - int fd1 = 0; - int fd2 = 0; - int zerob = 0; - struct stat stbuf = {0,}; - char dest[PATH_MAX] = {0,}; - char to_path[PATH_MAX] = {0,}; - - ret = sys_stat (from_path, &stbuf); - if (ret || !S_ISREG(stbuf.st_mode)) { - ret = -1; - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_LIB_MSG_STAT_FAILED, - "stat failed on changelog file: %s", from_path); - goto out; - } + int ret = -1; + int fd1 = 0; + int fd2 = 0; + int zerob = 0; + struct stat stbuf = { + 0, + }; + char dest[PATH_MAX] = { + 0, + }; + char to_path[PATH_MAX] = { + 0, + }; + + if (snprintf(to_path, PATH_MAX, "%s%s", jnl->jnl_current_dir, + basename(from_path)) >= PATH_MAX) + goto out; + if (snprintf(dest, PATH_MAX, "%s%s", jnl->jnl_processing_dir, + basename(from_path)) >= PATH_MAX) + goto out; + + ret = sys_stat(from_path, &stbuf); + if (ret || !S_ISREG(stbuf.st_mode)) { + ret = -1; + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_STAT_FAILED, + "path=%s", from_path, NULL); + goto out; + } - fd1 = open (from_path, O_RDONLY); - if (fd1 < 0) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_LIB_MSG_OPEN_FAILED, - "cannot open changelog file: %s", - from_path); - goto out; - } + fd1 = open(from_path, O_RDONLY); + if (fd1 < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_OPEN_FAILED, + "path=%s", from_path, NULL); + goto out; + } - (void) snprintf (to_path, PATH_MAX, "%s%s", - jnl->jnl_current_dir, basename (from_path)); - (void) snprintf (dest, PATH_MAX, "%s%s", - jnl->jnl_processing_dir, basename (from_path)); - - fd2 = open (to_path, O_CREAT | O_TRUNC | O_RDWR, - S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); - if (fd2 < 0) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_LIB_MSG_OPEN_FAILED, - "cannot create ascii changelog file %s", - to_path); + fd2 = open(to_path, O_CREAT | O_TRUNC | O_RDWR, + S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); + if (fd2 < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_OPEN_FAILED, + "path=%s", to_path, NULL); + goto close_fd; + } else { + ret = gf_changelog_decode(this, jnl, fd1, fd2, &stbuf, &zerob); + + sys_close(fd2); + + if (!ret) { + /* move it to processing on a successful + decode */ + if (no_publish == _gf_true) goto close_fd; - } else { - ret = gf_changelog_decode (this, jnl, fd1, - fd2, &stbuf, &zerob); - - sys_close (fd2); - - if (!ret) { - /* move it to processing on a successful - decode */ - if (no_publish == _gf_true) - goto close_fd; - ret = sys_rename (to_path, dest); - if (ret) - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_LIB_MSG_RENAME_FAILED, - "error moving %s to processing dir", - to_path); - } + ret = sys_rename(to_path, dest); + if (ret) + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_LIB_MSG_RENAME_FAILED, "from=%s", to_path, + "to=%s", dest, NULL); + } - /* remove it from .current if it's an empty file */ - if (zerob) { - /* zerob changelogs must be unlinked */ - ret = sys_unlink (to_path); - if (ret) - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_LIB_MSG_UNLINK_FAILED, - "could not unlink %s", - to_path); - } + /* remove it from .current if it's an empty file */ + if (zerob) { + /* zerob changelogs must be unlinked */ + ret = sys_unlink(to_path); + if (ret) + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_LIB_MSG_UNLINK_FAILED, "name=empty changelog", + "path=%s", to_path, NULL); } + } - close_fd: - sys_close (fd1); +close_fd: + sys_close(fd1); - out: - return ret; +out: + return ret; } void * -gf_changelog_process (void *data) +gf_changelog_process(void *data) { - int ret = 0; - xlator_t *this = NULL; - gf_changelog_journal_t *jnl = NULL; - gf_changelog_entry_t *entry = NULL; - gf_changelog_processor_t *jnl_proc = NULL; - - jnl = data; - jnl_proc = jnl->jnl_proc; - THIS = jnl->this; - this = jnl->this; - - while (1) { - pthread_mutex_lock (&jnl_proc->lock); - { - while (list_empty (&jnl_proc->entries)) { - jnl_proc->waiting = _gf_true; - pthread_cond_wait - (&jnl_proc->cond, &jnl_proc->lock); - } - - entry = list_first_entry (&jnl_proc->entries, - gf_changelog_entry_t, list); - list_del (&entry->list); - jnl_proc->waiting = _gf_false; - } - pthread_mutex_unlock (&jnl_proc->lock); + xlator_t *this = NULL; + gf_changelog_journal_t *jnl = NULL; + gf_changelog_entry_t *entry = NULL; + gf_changelog_processor_t *jnl_proc = NULL; + + jnl = data; + jnl_proc = jnl->jnl_proc; + THIS = jnl->this; + this = jnl->this; + + while (1) { + pthread_mutex_lock(&jnl_proc->lock); + { + while (list_empty(&jnl_proc->entries)) { + jnl_proc->waiting = _gf_true; + pthread_cond_wait(&jnl_proc->cond, &jnl_proc->lock); + } - if (entry) { - ret = gf_changelog_consume (this, jnl, - entry->path, _gf_false); - GF_FREE (entry); - } + entry = list_first_entry(&jnl_proc->entries, gf_changelog_entry_t, + list); + if (entry) + list_del(&entry->list); + + jnl_proc->waiting = _gf_false; } + pthread_mutex_unlock(&jnl_proc->lock); - return NULL; + if (entry) { + (void)gf_changelog_consume(this, jnl, entry->path, _gf_false); + GF_FREE(entry); + } + } + + return NULL; } void -gf_changelog_queue_journal (gf_changelog_processor_t *jnl_proc, - changelog_event_t *event) +gf_changelog_queue_journal(gf_changelog_processor_t *jnl_proc, + changelog_event_t *event) { - size_t len = 0; - gf_changelog_entry_t *entry = NULL; + size_t len = 0; + gf_changelog_entry_t *entry = NULL; - entry = GF_CALLOC (1, sizeof (gf_changelog_entry_t), - gf_changelog_mt_libgfchangelog_entry_t); - if (!entry) - return; - INIT_LIST_HEAD (&entry->list); + entry = GF_CALLOC(1, sizeof(gf_changelog_entry_t), + gf_changelog_mt_libgfchangelog_entry_t); + if (!entry) + return; + INIT_LIST_HEAD(&entry->list); - len = strlen (event->u.journal.path); - (void)memcpy (entry->path, event->u.journal.path, len+1); + len = strlen(event->u.journal.path); + (void)memcpy(entry->path, event->u.journal.path, len + 1); + entry->path[len] = '\0'; - pthread_mutex_lock (&jnl_proc->lock); - { - list_add_tail (&entry->list, &jnl_proc->entries); - if (jnl_proc->waiting) - pthread_cond_signal (&jnl_proc->cond); - } - pthread_mutex_unlock (&jnl_proc->lock); + pthread_mutex_lock(&jnl_proc->lock); + { + list_add_tail(&entry->list, &jnl_proc->entries); + if (jnl_proc->waiting) + pthread_cond_signal(&jnl_proc->cond); + } + pthread_mutex_unlock(&jnl_proc->lock); - return; + return; } void -gf_changelog_handle_journal (void *xl, char *brick, - void *cbkdata, changelog_event_t *event) +gf_changelog_handle_journal(void *xl, char *brick, void *cbkdata, + changelog_event_t *event) { - int ret = 0; - gf_changelog_journal_t *jnl = NULL; - gf_changelog_processor_t *jnl_proc = NULL; + gf_changelog_journal_t *jnl = NULL; + gf_changelog_processor_t *jnl_proc = NULL; - jnl = cbkdata; - jnl_proc = jnl->jnl_proc; + jnl = cbkdata; + jnl_proc = jnl->jnl_proc; - gf_changelog_queue_journal (jnl_proc, event); + gf_changelog_queue_journal(jnl_proc, event); } void -gf_changelog_journal_disconnect (void *xl, char *brick, void *data) +gf_changelog_journal_disconnect(void *xl, char *brick, void *data) { - gf_changelog_journal_t *jnl = NULL; + gf_changelog_journal_t *jnl = NULL; - jnl = data; + jnl = data; - pthread_spin_lock (&jnl->lock); - { - JNL_SET_API_STATE (jnl, JNL_API_DISCONNECTED); - }; - pthread_spin_unlock (&jnl->lock); + pthread_spin_lock(&jnl->lock); + { + JNL_SET_API_STATE(jnl, JNL_API_DISCONNECTED); + }; + pthread_spin_unlock(&jnl->lock); } void -gf_changelog_journal_connect (void *xl, char *brick, void *data) +gf_changelog_journal_connect(void *xl, char *brick, void *data) { - gf_changelog_journal_t *jnl = NULL; + gf_changelog_journal_t *jnl = NULL; - jnl = data; + jnl = data; - pthread_spin_lock (&jnl->lock); - { - JNL_SET_API_STATE (jnl, JNL_API_CONNECTED); - }; - pthread_spin_unlock (&jnl->lock); + pthread_spin_lock(&jnl->lock); + { + JNL_SET_API_STATE(jnl, JNL_API_CONNECTED); + }; + pthread_spin_unlock(&jnl->lock); - return; + return; } void -gf_changelog_cleanup_processor (gf_changelog_journal_t *jnl) +gf_changelog_cleanup_processor(gf_changelog_journal_t *jnl) { - int ret = 0; - xlator_t *this = NULL; - gf_changelog_processor_t *jnl_proc = NULL; - - this = THIS; - if (!this || !jnl || !jnl->jnl_proc) - goto error_return; - - jnl_proc = jnl->jnl_proc; - - ret = gf_thread_cleanup (this, jnl_proc->processor); - if (ret != 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CHANGELOG_LIB_MSG_CLEANUP_ERROR, - "failed to cleanup processor thread"); - goto error_return; - } + int ret = 0; + xlator_t *this = NULL; + gf_changelog_processor_t *jnl_proc = NULL; - (void)pthread_mutex_destroy (&jnl_proc->lock); - (void)pthread_cond_destroy (&jnl_proc->cond); + this = THIS; + if (!this || !jnl || !jnl->jnl_proc) + goto error_return; - GF_FREE (jnl_proc); + jnl_proc = jnl->jnl_proc; - error_return: - return; + ret = gf_thread_cleanup(this, jnl_proc->processor); + if (ret != 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_LIB_MSG_CLEANUP_ERROR, + "failed to cleanup processor thread"); + goto error_return; + } + + (void)pthread_mutex_destroy(&jnl_proc->lock); + (void)pthread_cond_destroy(&jnl_proc->cond); + + GF_FREE(jnl_proc); + +error_return: + return; } int -gf_changelog_init_processor (gf_changelog_journal_t *jnl) +gf_changelog_init_processor(gf_changelog_journal_t *jnl) { - int ret = -1; - gf_changelog_processor_t *jnl_proc = NULL; + int ret = -1; + gf_changelog_processor_t *jnl_proc = NULL; - jnl_proc = GF_CALLOC (1, sizeof (gf_changelog_processor_t), - gf_changelog_mt_libgfchangelog_t); - if (!jnl_proc) - goto error_return; - - ret = pthread_mutex_init (&jnl_proc->lock, NULL); - if (ret != 0) - goto free_jnl_proc; - ret = pthread_cond_init (&jnl_proc->cond, NULL); - if (ret != 0) - goto cleanup_mutex; - - INIT_LIST_HEAD (&jnl_proc->entries); - jnl_proc->waiting = _gf_false; - jnl->jnl_proc = jnl_proc; - - ret = pthread_create (&jnl_proc->processor, - NULL, gf_changelog_process, jnl); - if (ret != 0) { - jnl->jnl_proc = NULL; - goto cleanup_cond; - } + jnl_proc = GF_CALLOC(1, sizeof(gf_changelog_processor_t), + gf_changelog_mt_libgfchangelog_t); + if (!jnl_proc) + goto error_return; + + ret = pthread_mutex_init(&jnl_proc->lock, NULL); + if (ret != 0) + goto free_jnl_proc; + ret = pthread_cond_init(&jnl_proc->cond, NULL); + if (ret != 0) + goto cleanup_mutex; + + INIT_LIST_HEAD(&jnl_proc->entries); + jnl_proc->waiting = _gf_false; + jnl->jnl_proc = jnl_proc; + + ret = gf_thread_create(&jnl_proc->processor, NULL, gf_changelog_process, + jnl, "clogproc"); + if (ret != 0) { + jnl->jnl_proc = NULL; + goto cleanup_cond; + } - return 0; + return 0; - cleanup_cond: - (void) pthread_cond_destroy (&jnl_proc->cond); - cleanup_mutex: - (void) pthread_mutex_destroy (&jnl_proc->lock); - free_jnl_proc: - GF_FREE (jnl_proc); - error_return: - return -1; +cleanup_cond: + (void)pthread_cond_destroy(&jnl_proc->cond); +cleanup_mutex: + (void)pthread_mutex_destroy(&jnl_proc->lock); +free_jnl_proc: + GF_FREE(jnl_proc); +error_return: + return -1; } static void -gf_changelog_cleanup_fds (gf_changelog_journal_t *jnl) +gf_changelog_cleanup_fds(gf_changelog_journal_t *jnl) { - /* tracker fd */ - if (jnl->jnl_fd != -1) - sys_close (jnl->jnl_fd); - /* processing dir */ - if (jnl->jnl_dir) - sys_closedir (jnl->jnl_dir); - - if (jnl->jnl_working_dir) - free (jnl->jnl_working_dir); /* allocated by realpath */ + /* tracker fd */ + if (jnl->jnl_fd != -1) + sys_close(jnl->jnl_fd); + /* processing dir */ + if (jnl->jnl_dir) + sys_closedir(jnl->jnl_dir); + + if (jnl->jnl_working_dir) + free(jnl->jnl_working_dir); /* allocated by realpath */ } static int -gf_changelog_open_dirs (xlator_t *this, gf_changelog_journal_t *jnl) +gf_changelog_open_dirs(xlator_t *this, gf_changelog_journal_t *jnl) { - int ret = -1; - DIR *dir = NULL; - int tracker_fd = 0; - char tracker_path[PATH_MAX] = {0,}; - - /* .current */ - (void) snprintf (jnl->jnl_current_dir, PATH_MAX, - "%s/"GF_CHANGELOG_CURRENT_DIR"/", - jnl->jnl_working_dir); - ret = recursive_rmdir (jnl->jnl_current_dir); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_LIB_MSG_FAILED_TO_RMDIR, - "Failed to rmdir: %s", - jnl->jnl_current_dir); - goto out; - } - ret = mkdir_p (jnl->jnl_current_dir, 0600, _gf_false); - if (ret) - goto out; + int ret = -1; + DIR *dir = NULL; + int tracker_fd = 0; + char tracker_path[PATH_MAX] = { + 0, + }; + + /* .current */ + (void)snprintf(jnl->jnl_current_dir, PATH_MAX, + "%s/" GF_CHANGELOG_CURRENT_DIR "/", jnl->jnl_working_dir); + ret = recursive_rmdir(jnl->jnl_current_dir); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_LIB_MSG_FAILED_TO_RMDIR, "path=%s", + jnl->jnl_current_dir, NULL); + goto out; + } + ret = mkdir_p(jnl->jnl_current_dir, 0600, _gf_false); + if (ret) + goto out; + + /* .processed */ + (void)snprintf(jnl->jnl_processed_dir, PATH_MAX, + "%s/" GF_CHANGELOG_PROCESSED_DIR "/", jnl->jnl_working_dir); + ret = mkdir_p(jnl->jnl_processed_dir, 0600, _gf_false); + if (ret) + goto out; + + /* .processing */ + (void)snprintf(jnl->jnl_processing_dir, PATH_MAX, + "%s/" GF_CHANGELOG_PROCESSING_DIR "/", jnl->jnl_working_dir); + ret = recursive_rmdir(jnl->jnl_processing_dir); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_LIB_MSG_FAILED_TO_RMDIR, "path=%s", + jnl->jnl_processing_dir, NULL); + goto out; + } - /* .processed */ - (void) snprintf (jnl->jnl_processed_dir, PATH_MAX, - "%s/"GF_CHANGELOG_PROCESSED_DIR"/", - jnl->jnl_working_dir); - ret = mkdir_p (jnl->jnl_processed_dir, 0600, _gf_false); - if (ret) - goto out; - - /* .processing */ - (void) snprintf (jnl->jnl_processing_dir, PATH_MAX, - "%s/"GF_CHANGELOG_PROCESSING_DIR"/", - jnl->jnl_working_dir); - ret = recursive_rmdir (jnl->jnl_processing_dir); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_LIB_MSG_FAILED_TO_RMDIR, - "Failed to rmdir: %s", - jnl->jnl_processing_dir); - goto out; - } + ret = mkdir_p(jnl->jnl_processing_dir, 0600, _gf_false); + if (ret) + goto out; - ret = mkdir_p (jnl->jnl_processing_dir, 0600, _gf_false); - if (ret) - goto out; - - dir = sys_opendir (jnl->jnl_processing_dir); - if (!dir) { - gf_msg ("", GF_LOG_ERROR, errno, - CHANGELOG_LIB_MSG_OPENDIR_ERROR, - "opendir() error"); - goto out; - } + dir = sys_opendir(jnl->jnl_processing_dir); + if (!dir) { + gf_msg("", GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_OPENDIR_ERROR, + "opendir() error"); + goto out; + } - jnl->jnl_dir = dir; + jnl->jnl_dir = dir; - (void) snprintf (tracker_path, PATH_MAX, - "%s/"GF_CHANGELOG_TRACKER, jnl->jnl_working_dir); + (void)snprintf(tracker_path, PATH_MAX, "%s/" GF_CHANGELOG_TRACKER, + jnl->jnl_working_dir); - tracker_fd = open (tracker_path, O_CREAT | O_APPEND | O_RDWR, - S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); - if (tracker_fd < 0) { - sys_closedir (jnl->jnl_dir); - ret = -1; - goto out; - } + tracker_fd = open(tracker_path, O_CREAT | O_APPEND | O_RDWR, + S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); + if (tracker_fd < 0) { + sys_closedir(jnl->jnl_dir); + ret = -1; + goto out; + } - jnl->jnl_fd = tracker_fd; - ret = 0; - out: - return ret; + jnl->jnl_fd = tracker_fd; + ret = 0; +out: + return ret; } int -gf_changelog_init_history (xlator_t *this, - gf_changelog_journal_t *jnl, - char *brick_path) +gf_changelog_init_history(xlator_t *this, gf_changelog_journal_t *jnl, + char *brick_path) { - int i = 0; - int ret = 0; - char hist_scratch_dir[PATH_MAX] = {0,}; + int i = 0; + int ret = 0; + char hist_scratch_dir[PATH_MAX] = { + 0, + }; - jnl->hist_jnl = GF_CALLOC (1, sizeof (*jnl), - gf_changelog_mt_libgfchangelog_t); - if (!jnl->hist_jnl) - goto error_return; + jnl->hist_jnl = GF_CALLOC(1, sizeof(*jnl), + gf_changelog_mt_libgfchangelog_t); + if (!jnl->hist_jnl) + goto error_return; - jnl->hist_jnl->jnl_dir = NULL; - jnl->hist_jnl->jnl_fd = -1; + jnl->hist_jnl->jnl_dir = NULL; + jnl->hist_jnl->jnl_fd = -1; - (void) snprintf (hist_scratch_dir, PATH_MAX, - "%s/"GF_CHANGELOG_HISTORY_DIR"/", - jnl->jnl_working_dir); + (void)snprintf(hist_scratch_dir, PATH_MAX, + "%s/" GF_CHANGELOG_HISTORY_DIR "/", jnl->jnl_working_dir); - ret = mkdir_p (hist_scratch_dir, 0600, _gf_false); - if (ret) - goto dealloc_hist; - - jnl->hist_jnl->jnl_working_dir = realpath (hist_scratch_dir, NULL); - if (!jnl->hist_jnl->jnl_working_dir) - goto dealloc_hist; - - ret = gf_changelog_open_dirs (this, jnl->hist_jnl); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CHANGELOG_LIB_MSG_OPENDIR_ERROR, - "could not create entries in history scratch dir"); - goto dealloc_hist; - } + ret = mkdir_p(hist_scratch_dir, 0600, _gf_false); + if (ret) + goto dealloc_hist; - (void) strncpy (jnl->hist_jnl->jnl_brickpath, brick_path, PATH_MAX-1); - jnl->hist_jnl->jnl_brickpath[PATH_MAX-1] = 0; + jnl->hist_jnl->jnl_working_dir = realpath(hist_scratch_dir, NULL); + if (!jnl->hist_jnl->jnl_working_dir) + goto dealloc_hist; - for (i = 0; i < 256; i++) { - jnl->hist_jnl->rfc3986[i] = - (isalnum(i) || i == '~' || - i == '-' || i == '.' || i == '_') ? i : 0; - } + ret = gf_changelog_open_dirs(this, jnl->hist_jnl); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_LIB_MSG_OPENDIR_ERROR, + "could not create entries in history scratch dir"); + goto dealloc_hist; + } - return 0; + if (snprintf(jnl->hist_jnl->jnl_brickpath, PATH_MAX, "%s", brick_path) >= + PATH_MAX) + goto dealloc_hist; - dealloc_hist: - GF_FREE (jnl->hist_jnl); - jnl->hist_jnl = NULL; - error_return: - return -1; + for (i = 0; i < 256; i++) { + jnl->hist_jnl->rfc3986_space_newline[i] = (i == ' ' || i == '\n' || + i == '%') + ? 0 + : i; + } + + return 0; + +dealloc_hist: + GF_FREE(jnl->hist_jnl); + jnl->hist_jnl = NULL; +error_return: + return -1; } void -gf_changelog_journal_fini (void *xl, char *brick, void *data) +gf_changelog_journal_fini(void *xl, char *brick, void *data) { - int ret = 0; - xlator_t *this = NULL; - gf_changelog_journal_t *jnl = NULL; + gf_changelog_journal_t *jnl = NULL; - this = xl; - jnl = data; + jnl = data; - gf_changelog_cleanup_processor (jnl); + gf_changelog_cleanup_processor(jnl); - gf_changelog_cleanup_fds (jnl); - if (jnl->hist_jnl) - gf_changelog_cleanup_fds (jnl->hist_jnl); + gf_changelog_cleanup_fds(jnl); + if (jnl->hist_jnl) + gf_changelog_cleanup_fds(jnl->hist_jnl); - GF_FREE (jnl); + GF_FREE(jnl); } void * -gf_changelog_journal_init (void *xl, struct gf_brick_spec *brick) +gf_changelog_journal_init(void *xl, struct gf_brick_spec *brick) { - int i = 0; - int ret = 0; - xlator_t *this = NULL; - struct stat buf = {0,}; - char *scratch_dir = NULL; - gf_changelog_journal_t *jnl = NULL; - - this = xl; - scratch_dir = (char *) brick->ptr; - - jnl = GF_CALLOC (1, sizeof (gf_changelog_journal_t), - gf_changelog_mt_libgfchangelog_t); - if (!jnl) - goto error_return; - - if (sys_stat (scratch_dir, &buf) && errno == ENOENT) { - ret = mkdir_p (scratch_dir, 0600, _gf_true); - if (ret) - goto dealloc_private; - } - - jnl->jnl_working_dir = realpath (scratch_dir, NULL); - if (!jnl->jnl_working_dir) - goto dealloc_private; - - ret = gf_changelog_open_dirs (this, jnl); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CHANGELOG_LIB_MSG_OPENDIR_ERROR, - "could not create entries in scratch dir"); - goto dealloc_private; - } + int i = 0; + int ret = 0; + xlator_t *this = NULL; + struct stat buf = { + 0, + }; + char *scratch_dir = NULL; + gf_changelog_journal_t *jnl = NULL; + + this = xl; + scratch_dir = (char *)brick->ptr; + + jnl = GF_CALLOC(1, sizeof(gf_changelog_journal_t), + gf_changelog_mt_libgfchangelog_t); + if (!jnl) + goto error_return; + + if (snprintf(jnl->jnl_brickpath, PATH_MAX, "%s", brick->brick_path) >= + PATH_MAX) + goto dealloc_private; + + if (sys_stat(scratch_dir, &buf) && errno == ENOENT) { + ret = mkdir_p(scratch_dir, 0600, _gf_true); + if (ret) + goto dealloc_private; + } - (void) strncpy (jnl->jnl_brickpath, brick->brick_path, PATH_MAX-1); - jnl->jnl_brickpath[PATH_MAX-1] = 0; + jnl->jnl_working_dir = realpath(scratch_dir, NULL); + if (!jnl->jnl_working_dir) + goto dealloc_private; - /* RFC 3986 {de,en}coding */ - for (i = 0; i < 256; i++) { - jnl->rfc3986[i] = - (isalnum(i) || i == '~' || - i == '-' || i == '.' || i == '_') ? i : 0; - } + ret = gf_changelog_open_dirs(this, jnl); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_LIB_MSG_OPENDIR_ERROR, + "could not create entries in scratch dir"); + goto dealloc_private; + } - ret = gf_changelog_init_history (this, jnl, brick->brick_path); - if (ret) - goto cleanup_fds; + /* RFC 3986 {de,en}coding */ + for (i = 0; i < 256; i++) { + jnl->rfc3986_space_newline[i] = (i == ' ' || i == '\n' || i == '%') ? 0 + : i; + } - /* initialize journal processor */ - jnl->this = this; - ret = gf_changelog_init_processor (jnl); - if (ret) - goto cleanup_fds; - - JNL_SET_API_STATE (jnl, JNL_API_CONN_INPROGESS); - ret = pthread_spin_init (&jnl->lock, 0); - if (ret != 0) - goto cleanup_processor; - return jnl; - - cleanup_processor: - gf_changelog_cleanup_processor (jnl); - cleanup_fds: - gf_changelog_cleanup_fds (jnl); - if (jnl->hist_jnl) - gf_changelog_cleanup_fds (jnl->hist_jnl); - dealloc_private: - GF_FREE (jnl); - error_return: - return NULL; + ret = gf_changelog_init_history(this, jnl, brick->brick_path); + if (ret) + goto cleanup_fds; + + /* initialize journal processor */ + jnl->this = this; + ret = gf_changelog_init_processor(jnl); + if (ret) + goto cleanup_fds; + + JNL_SET_API_STATE(jnl, JNL_API_CONN_INPROGESS); + ret = pthread_spin_init(&jnl->lock, 0); + if (ret != 0) + goto cleanup_processor; + return jnl; + +cleanup_processor: + gf_changelog_cleanup_processor(jnl); +cleanup_fds: + gf_changelog_cleanup_fds(jnl); + if (jnl->hist_jnl) + gf_changelog_cleanup_fds(jnl->hist_jnl); +dealloc_private: + GF_FREE(jnl); +error_return: + return NULL; } diff --git a/xlators/features/changelog/lib/src/gf-changelog-journal.h b/xlators/features/changelog/lib/src/gf-changelog-journal.h index e91807c80b6..ba5b9bf827e 100644 --- a/xlators/features/changelog/lib/src/gf-changelog-journal.h +++ b/xlators/features/changelog/lib/src/gf-changelog-journal.h @@ -17,91 +17,91 @@ #include "changelog.h" enum api_conn { - JNL_API_CONNECTED, - JNL_API_CONN_INPROGESS, - JNL_API_DISCONNECTED, + JNL_API_CONNECTED, + JNL_API_CONN_INPROGESS, + JNL_API_DISCONNECTED, }; typedef struct gf_changelog_entry { - char path[PATH_MAX]; + char path[PATH_MAX]; - struct list_head list; + struct list_head list; } gf_changelog_entry_t; typedef struct gf_changelog_processor { - pthread_mutex_t lock; /* protects ->entries */ - pthread_cond_t cond; /* waiter during empty list */ - gf_boolean_t waiting; + pthread_mutex_t lock; /* protects ->entries */ + pthread_cond_t cond; /* waiter during empty list */ + gf_boolean_t waiting; - pthread_t processor; /* thread-id of journal processing thread */ + pthread_t processor; /* thread-id of journal processing thread */ - struct list_head entries; + struct list_head entries; } gf_changelog_processor_t; typedef struct gf_changelog_journal { - DIR *jnl_dir; /* 'processing' directory stream */ + DIR *jnl_dir; /* 'processing' directory stream */ - int jnl_fd; /* fd to the tracker file */ + int jnl_fd; /* fd to the tracker file */ - char jnl_brickpath[PATH_MAX]; /* brick path for this end-point */ + char jnl_brickpath[PATH_MAX]; /* brick path for this end-point */ - gf_changelog_processor_t *jnl_proc; + gf_changelog_processor_t *jnl_proc; - char *jnl_working_dir; /* scratch directory */ + char *jnl_working_dir; /* scratch directory */ - char jnl_current_dir[PATH_MAX]; - char jnl_processed_dir[PATH_MAX]; - char jnl_processing_dir[PATH_MAX]; + char jnl_current_dir[PATH_MAX]; + char jnl_processed_dir[PATH_MAX]; + char jnl_processing_dir[PATH_MAX]; - char rfc3986[256]; /* RFC 3986 string encoding */ + char rfc3986_space_newline[256]; /* RFC 3986 string encoding */ - struct gf_changelog_journal *hist_jnl; - int hist_done; /* holds 0 done scanning, - 1 keep scanning and -1 error */ + struct gf_changelog_journal *hist_jnl; + int hist_done; /* holds 0 done scanning, + 1 keep scanning and -1 error */ - pthread_spinlock_t lock; - int connected; - xlator_t *this; + pthread_spinlock_t lock; + int connected; + xlator_t *this; } gf_changelog_journal_t; -#define JNL_SET_API_STATE(jnl, state) (jnl->connected = state) -#define JNL_IS_API_DISCONNECTED(jnl) (jnl->connected == JNL_API_DISCONNECTED) +#define JNL_SET_API_STATE(jnl, state) (jnl->connected = state) +#define JNL_IS_API_DISCONNECTED(jnl) (jnl->connected == JNL_API_DISCONNECTED) /* History API */ typedef struct gf_changelog_history_data { - int len; + int len; - int htime_fd; + int htime_fd; - /* parallelism count */ - int n_parallel; + /* parallelism count */ + int n_parallel; - /* history from, to indexes */ - unsigned long from; - unsigned long to; - xlator_t *this; + /* history from, to indexes */ + unsigned long from; + unsigned long to; + xlator_t *this; } gf_changelog_history_data_t; typedef struct gf_changelog_consume_data { - /** set of inputs */ + /** set of inputs */ - /* fd to read from */ - int fd; + /* fd to read from */ + int fd; - /* from @offset */ - off_t offset; + /* from @offset */ + off_t offset; - xlator_t *this; + xlator_t *this; - gf_changelog_journal_t *jnl; + gf_changelog_journal_t *jnl; - /** set of outputs */ + /** set of outputs */ - /* return value */ - int retval; + /* return value */ + int retval; - /* journal processed */ - char changelog[PATH_MAX]; + /* journal processed */ + char changelog[PATH_MAX]; } gf_changelog_consume_data_t; /* event handler */ diff --git a/xlators/features/changelog/lib/src/gf-changelog-reborp.c b/xlators/features/changelog/lib/src/gf-changelog-reborp.c index 4c49e9a533f..56b11cbb705 100644 --- a/xlators/features/changelog/lib/src/gf-changelog-reborp.c +++ b/xlators/features/changelog/lib/src/gf-changelog-reborp.c @@ -15,140 +15,130 @@ #include "changelog-rpc-common.h" #include "changelog-lib-messages.h" -#include "syscall.h" +#include <glusterfs/syscall.h> /** * Reverse socket: actual data transfer handler. Connection * initiator is PROBER, data transfer is REBORP. */ -struct rpcsvc_program *gf_changelog_reborp_programs[]; +static struct rpcsvc_program *gf_changelog_reborp_programs[]; void * -gf_changelog_connection_janitor (void *arg) +gf_changelog_connection_janitor(void *arg) { - int32_t ret = 0; - xlator_t *this = NULL; - gf_private_t *priv = NULL; - gf_changelog_t *entry = NULL; - struct gf_event *event = NULL; - struct gf_event_list *ev = NULL; - unsigned long drained = 0; - - this = arg; - THIS = this; - - priv = this->private; - - while (1) { - pthread_mutex_lock (&priv->lock); - { - while (list_empty (&priv->cleanups)) - pthread_cond_wait (&priv->cond, &priv->lock); - - entry = list_first_entry (&priv->cleanups, - gf_changelog_t, list); - list_del_init (&entry->list); - } - pthread_mutex_unlock (&priv->lock); - - drained = 0; - ev = &entry->event; - - gf_msg (this->name, GF_LOG_INFO, 0, - CHANGELOG_LIB_MSG_CLEANING_BRICK_ENTRY_INFO, - "Cleaning brick entry for brick %s", entry->brick); - - /* 0x0: disbale rpc-clnt */ - rpc_clnt_disable (RPC_PROBER (entry)); - - /* 0x1: cleanup callback invoker thread */ - ret = gf_cleanup_event (this, ev); - if (ret) - continue; - - /* 0x2: drain pending events */ - while (!list_empty (&ev->events)) { - event = list_first_entry (&ev->events, - struct gf_event, list); - gf_msg (this->name, GF_LOG_INFO, 0, - CHANGELOG_LIB_MSG_DRAINING_EVENT_INFO, - "Draining event [Seq: %lu, Payload: %d]", - event->seq, event->count); - - GF_FREE (event); - drained++; - } - - gf_msg (this->name, GF_LOG_INFO, 0, - CHANGELOG_LIB_MSG_DRAINING_EVENT_INFO, - "Drained %lu events", drained); - - /* 0x3: freeup brick entry */ - gf_msg (this->name, GF_LOG_INFO, 0, - CHANGELOG_LIB_MSG_FREEING_ENTRY_INFO, - "freeing entry %p", entry); - LOCK_DESTROY (&entry->statelock); - GF_FREE (entry); + int32_t ret = 0; + xlator_t *this = NULL; + gf_private_t *priv = NULL; + gf_changelog_t *entry = NULL; + struct gf_event *event = NULL; + struct gf_event_list *ev = NULL; + unsigned long drained = 0; + + this = arg; + THIS = this; + + priv = this->private; + + while (1) { + pthread_mutex_lock(&priv->lock); + { + while (list_empty(&priv->cleanups)) + pthread_cond_wait(&priv->cond, &priv->lock); + + entry = list_first_entry(&priv->cleanups, gf_changelog_t, list); + list_del_init(&entry->list); + } + pthread_mutex_unlock(&priv->lock); + + drained = 0; + ev = &entry->event; + + gf_smsg(this->name, GF_LOG_INFO, 0, + CHANGELOG_LIB_MSG_CLEANING_BRICK_ENTRY_INFO, "brick=%s", + entry->brick, NULL); + + /* 0x0: disable rpc-clnt */ + rpc_clnt_disable(RPC_PROBER(entry)); + + /* 0x1: cleanup callback invoker thread */ + ret = gf_cleanup_event(this, ev); + if (ret) + continue; + + /* 0x2: drain pending events */ + while (!list_empty(&ev->events)) { + event = list_first_entry(&ev->events, struct gf_event, list); + gf_smsg(this->name, GF_LOG_INFO, 0, + CHANGELOG_LIB_MSG_DRAINING_EVENT_INFO, "seq=%lu", + event->seq, "payload=%d", event->count, NULL); + + GF_FREE(event); + drained++; } - return NULL; + gf_smsg(this->name, GF_LOG_INFO, 0, + CHANGELOG_LIB_MSG_DRAINED_EVENT_INFO, "num=%lu", drained, NULL); + + /* 0x3: freeup brick entry */ + gf_smsg(this->name, GF_LOG_INFO, 0, + CHANGELOG_LIB_MSG_FREEING_ENTRY_INFO, "entry=%p", entry, NULL); + LOCK_DESTROY(&entry->statelock); + GF_FREE(entry); + } + + return NULL; } int -gf_changelog_reborp_rpcsvc_notify (rpcsvc_t *rpc, void *mydata, - rpcsvc_event_t event, void *data) +gf_changelog_reborp_rpcsvc_notify(rpcsvc_t *rpc, void *mydata, + rpcsvc_event_t event, void *data) { - int ret = 0; - xlator_t *this = NULL; - gf_private_t *priv = NULL; - gf_changelog_t *entry = NULL; - char sock[UNIX_PATH_MAX] = {0,}; + int ret = 0; + xlator_t *this = NULL; + gf_changelog_t *entry = NULL; - if (!(event == RPCSVC_EVENT_ACCEPT || - event == RPCSVC_EVENT_DISCONNECT)) - return 0; + if (!(event == RPCSVC_EVENT_ACCEPT || event == RPCSVC_EVENT_DISCONNECT)) + return 0; - entry = mydata; - this = entry->this; - priv = this->private; + entry = mydata; + this = entry->this; - switch (event) { + switch (event) { case RPCSVC_EVENT_ACCEPT: - ret = sys_unlink (RPC_SOCK(entry)); - if (ret != 0) - gf_msg (this->name, GF_LOG_WARNING, errno, - CHANGELOG_LIB_MSG_UNLINK_FAILED, - "failed to unlink " - "reverse socket %s", RPC_SOCK (entry)); - if (entry->connected) - GF_CHANGELOG_INVOKE_CBK (this, entry->connected, - entry->brick, entry->ptr); - break; + ret = sys_unlink(RPC_SOCK(entry)); + if (ret != 0) + gf_smsg(this->name, GF_LOG_WARNING, errno, + CHANGELOG_LIB_MSG_UNLINK_FAILED, "name=reverse socket", + "path=%s", RPC_SOCK(entry), NULL); + if (entry->connected) + GF_CHANGELOG_INVOKE_CBK(this, entry->connected, entry->brick, + entry->ptr); + break; case RPCSVC_EVENT_DISCONNECT: - if (entry->disconnected) - GF_CHANGELOG_INVOKE_CBK (this, entry->disconnected, - entry->brick, entry->ptr); - /* passthrough */ + if (entry->disconnected) + GF_CHANGELOG_INVOKE_CBK(this, entry->disconnected, entry->brick, + entry->ptr); + /* passthrough */ default: - break; - } + break; + } - return 0; + return 0; } rpcsvc_t * -gf_changelog_reborp_init_rpc_listner (xlator_t *this, - char *path, char *sock, void *cbkdata) +gf_changelog_reborp_init_rpc_listner(xlator_t *this, char *path, char *sock, + void *cbkdata) { - CHANGELOG_MAKE_TMP_SOCKET_PATH (path, sock, UNIX_PATH_MAX); - return changelog_rpc_server_init (this, sock, cbkdata, - gf_changelog_reborp_rpcsvc_notify, - gf_changelog_reborp_programs); + CHANGELOG_MAKE_TMP_SOCKET_PATH(path, sock, UNIX_PATH_MAX); + return changelog_rpc_server_init(this, sock, cbkdata, + gf_changelog_reborp_rpcsvc_notify, + gf_changelog_reborp_programs); } /** - * This is dirty and painful as of now untill there is event filtering in the + * This is dirty and painful as of now until there is event filtering in the * server. The entire event buffer is scanned and interested events are picked, * whereas we _should_ be notified with the events we were interested in * (selected at the time of probe). As of now this is complete BS and needs @@ -157,29 +147,27 @@ gf_changelog_reborp_init_rpc_listner (xlator_t *this, * @FIXME: cleanup this bugger once server filters events. */ void -gf_changelog_invoke_callback (gf_changelog_t *entry, - struct iovec **vec, int payloadcnt) +gf_changelog_invoke_callback(gf_changelog_t *entry, struct iovec **vec, + int payloadcnt) { - int i = 0; - int evsize = 0; - xlator_t *this = NULL; - changelog_event_t *event = NULL; - - this = entry->this; - - for (; i < payloadcnt; i++) { - event = (changelog_event_t *)vec[i]->iov_base; - evsize = vec[i]->iov_len / CHANGELOG_EV_SIZE; - - for (; evsize > 0; evsize--, event++) { - if (gf_changelog_filter_check (entry, event)) { - GF_CHANGELOG_INVOKE_CBK (this, - entry->callback, - entry->brick, - entry->ptr, event); - } - } + int i = 0; + int evsize = 0; + xlator_t *this = NULL; + changelog_event_t *event = NULL; + + this = entry->this; + + for (; i < payloadcnt; i++) { + event = (changelog_event_t *)vec[i]->iov_base; + evsize = vec[i]->iov_len / CHANGELOG_EV_SIZE; + + for (; evsize > 0; evsize--, event++) { + if (gf_changelog_filter_check(entry, event)) { + GF_CHANGELOG_INVOKE_CBK(this, entry->callback, entry->brick, + entry->ptr, event); + } } + } } /** @@ -190,219 +178,218 @@ gf_changelog_invoke_callback (gf_changelog_t *entry, */ int -__is_expected_sequence (struct gf_event_list *ev, struct gf_event *event) +__is_expected_sequence(struct gf_event_list *ev, struct gf_event *event) { - return (ev->next_seq == event->seq); + return (ev->next_seq == event->seq); } int -__can_process_event (struct gf_event_list *ev, struct gf_event **event) +__can_process_event(struct gf_event_list *ev, struct gf_event **event) { - *event = list_first_entry (&ev->events, struct gf_event, list); + *event = list_first_entry(&ev->events, struct gf_event, list); - if (__is_expected_sequence (ev, *event)) { - list_del (&(*event)->list); - ev->next_seq++; - return 1; - } + if (__is_expected_sequence(ev, *event)) { + list_del(&(*event)->list); + ev->next_seq++; + return 1; + } - return 0; + return 0; } void -pick_event_ordered (struct gf_event_list *ev, struct gf_event **event) +pick_event_ordered(struct gf_event_list *ev, struct gf_event **event) { - pthread_mutex_lock (&ev->lock); - { - while (list_empty (&ev->events) - || !__can_process_event (ev, event)) - pthread_cond_wait (&ev->cond, &ev->lock); - } - pthread_mutex_unlock (&ev->lock); + pthread_mutex_lock(&ev->lock); + { + while (list_empty(&ev->events) || !__can_process_event(ev, event)) + pthread_cond_wait(&ev->cond, &ev->lock); + } + pthread_mutex_unlock(&ev->lock); } void -pick_event_unordered (struct gf_event_list *ev, struct gf_event **event) +pick_event_unordered(struct gf_event_list *ev, struct gf_event **event) { - pthread_mutex_lock (&ev->lock); - { - while (list_empty (&ev->events)) - pthread_cond_wait (&ev->cond, &ev->lock); - *event = list_first_entry (&ev->events, struct gf_event, list); - list_del (&(*event)->list); - } - pthread_mutex_unlock (&ev->lock); + pthread_mutex_lock(&ev->lock); + { + while (list_empty(&ev->events)) + pthread_cond_wait(&ev->cond, &ev->lock); + *event = list_first_entry(&ev->events, struct gf_event, list); + list_del(&(*event)->list); + } + pthread_mutex_unlock(&ev->lock); } void * -gf_changelog_callback_invoker (void *arg) +gf_changelog_callback_invoker(void *arg) { - int ret = 0; - xlator_t *this = NULL; - gf_changelog_t *entry = NULL; - struct iovec *vec = NULL; - struct gf_event *event = NULL; - struct gf_event_list *ev = NULL; + xlator_t *this = NULL; + gf_changelog_t *entry = NULL; + struct iovec *vec = NULL; + struct gf_event *event = NULL; + struct gf_event_list *ev = NULL; - ev = arg; - entry = ev->entry; - THIS = this = entry->this; + ev = arg; + entry = ev->entry; + THIS = this = entry->this; - while (1) { - entry->pickevent (ev, &event); + while (1) { + entry->pickevent(ev, &event); - vec = (struct iovec *) &event->iov; - gf_changelog_invoke_callback (entry, &vec, event->count); + vec = (struct iovec *)&event->iov; + gf_changelog_invoke_callback(entry, &vec, event->count); - GF_FREE (event); - } + GF_FREE(event); + } - return NULL; + return NULL; } static int -orderfn (struct list_head *pos1, struct list_head *pos2) +orderfn(struct list_head *pos1, struct list_head *pos2) { - struct gf_event *event1 = NULL; - struct gf_event *event2 = NULL; + struct gf_event *event1 = NULL; + struct gf_event *event2 = NULL; - event1 = list_entry (pos1, struct gf_event, list); - event2 = list_entry (pos2, struct gf_event, list); + event1 = list_entry(pos1, struct gf_event, list); + event2 = list_entry(pos2, struct gf_event, list); - if (event1->seq > event2->seq) - return 1; - return -1; + if (event1->seq > event2->seq) + return 1; + return -1; } void -queue_ordered_event (struct gf_event_list *ev, struct gf_event *event) +queue_ordered_event(struct gf_event_list *ev, struct gf_event *event) { - /* add event to the ordered event list and wake up listner(s) */ - pthread_mutex_lock (&ev->lock); - { - list_add_order (&event->list, &ev->events, orderfn); - if (!ev->next_seq) - ev->next_seq = event->seq; - if (ev->next_seq == event->seq) - pthread_cond_signal (&ev->cond); - } - pthread_mutex_unlock (&ev->lock); + /* add event to the ordered event list and wake up listener(s) */ + pthread_mutex_lock(&ev->lock); + { + list_add_order(&event->list, &ev->events, orderfn); + if (!ev->next_seq) + ev->next_seq = event->seq; + if (ev->next_seq == event->seq) + pthread_cond_signal(&ev->cond); + } + pthread_mutex_unlock(&ev->lock); } void -queue_unordered_event (struct gf_event_list *ev, struct gf_event *event) +queue_unordered_event(struct gf_event_list *ev, struct gf_event *event) { - /* add event to the tail of the queue and wake up listener(s) */ - pthread_mutex_lock (&ev->lock); - { - list_add_tail (&event->list, &ev->events); - pthread_cond_signal (&ev->cond); - } - pthread_mutex_unlock (&ev->lock); + /* add event to the tail of the queue and wake up listener(s) */ + pthread_mutex_lock(&ev->lock); + { + list_add_tail(&event->list, &ev->events); + pthread_cond_signal(&ev->cond); + } + pthread_mutex_unlock(&ev->lock); } int -gf_changelog_event_handler (rpcsvc_request_t *req, - xlator_t *this, gf_changelog_t *entry) +gf_changelog_event_handler(rpcsvc_request_t *req, xlator_t *this, + gf_changelog_t *entry) { - int i = 0; - size_t payloadlen = 0; - ssize_t len = 0; - int payloadcnt = 0; - changelog_event_req rpc_req = {0,}; - changelog_event_rsp rpc_rsp = {0,}; - struct iovec *vec = NULL; - struct gf_event *event = NULL; - struct gf_event_list *ev = NULL; - - ev = &entry->event; - - len = xdr_to_generic (req->msg[0], - &rpc_req, (xdrproc_t)xdr_changelog_event_req); - if (len < 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CHANGELOG_LIB_MSG_XDR_DECODING_FAILED, - "xdr decoding failed"); - req->rpc_err = GARBAGE_ARGS; - goto handle_xdr_error; - } - - if (len < req->msg[0].iov_len) { - payloadcnt = 1; - payloadlen = (req->msg[0].iov_len - len); - } - for (i = 1; i < req->count; i++) { - payloadcnt++; - payloadlen += req->msg[i].iov_len; - } - - event = GF_CALLOC (1, GF_EVENT_CALLOC_SIZE (payloadcnt, payloadlen), - gf_changelog_mt_libgfchangelog_event_t); - if (!event) - goto handle_xdr_error; - INIT_LIST_HEAD (&event->list); - - payloadlen = 0; - event->seq = rpc_req.seq; - event->count = payloadcnt; - - /* deep copy IO vectors */ - vec = &event->iov[0]; - GF_EVENT_ASSIGN_IOVEC (vec, event, - (req->msg[0].iov_len - len), payloadlen); - (void) memcpy (vec->iov_base, - req->msg[0].iov_base + len, vec->iov_len); - - for (i = 1; i < req->count; i++) { - vec = &event->iov[i]; - GF_EVENT_ASSIGN_IOVEC (vec, event, - req->msg[i].iov_len, payloadlen); - (void) memcpy (event->iov[i].iov_base, - req->msg[i].iov_base, req->msg[i].iov_len); - } - - gf_msg_debug (this->name, 0, - "seq: %lu [%s] (time: %lu.%lu), (vec: %d, len: %ld)", - rpc_req.seq, entry->brick, rpc_req.tv_sec, - rpc_req.tv_usec, payloadcnt, payloadlen); - - /* dispatch event */ - entry->queueevent (ev, event); - - /* ack sequence number */ - rpc_rsp.op_ret = 0; - rpc_rsp.seq = rpc_req.seq; - - goto submit_rpc; - - handle_xdr_error: - rpc_rsp.op_ret = -1; - rpc_rsp.seq = 0; /* invalid */ - submit_rpc: - return changelog_rpc_sumbit_reply (req, &rpc_rsp, NULL, 0, NULL, - (xdrproc_t)xdr_changelog_event_rsp); + int i = 0; + size_t payloadlen = 0; + ssize_t len = 0; + int payloadcnt = 0; + changelog_event_req rpc_req = { + 0, + }; + changelog_event_rsp rpc_rsp = { + 0, + }; + struct iovec *vec = NULL; + struct gf_event *event = NULL; + struct gf_event_list *ev = NULL; + + ev = &entry->event; + + len = xdr_to_generic(req->msg[0], &rpc_req, + (xdrproc_t)xdr_changelog_event_req); + if (len < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, + CHANGELOG_LIB_MSG_XDR_DECODING_FAILED, "xdr decoding failed"); + req->rpc_err = GARBAGE_ARGS; + goto handle_xdr_error; + } + + if (len < req->msg[0].iov_len) { + payloadcnt = 1; + payloadlen = (req->msg[0].iov_len - len); + } + for (i = 1; i < req->count; i++) { + payloadcnt++; + payloadlen += req->msg[i].iov_len; + } + + event = GF_CALLOC(1, GF_EVENT_CALLOC_SIZE(payloadcnt, payloadlen), + gf_changelog_mt_libgfchangelog_event_t); + if (!event) + goto handle_xdr_error; + INIT_LIST_HEAD(&event->list); + + payloadlen = 0; + event->seq = rpc_req.seq; + event->count = payloadcnt; + + /* deep copy IO vectors */ + vec = &event->iov[0]; + GF_EVENT_ASSIGN_IOVEC(vec, event, (req->msg[0].iov_len - len), payloadlen); + (void)memcpy(vec->iov_base, req->msg[0].iov_base + len, vec->iov_len); + + for (i = 1; i < req->count; i++) { + vec = &event->iov[i]; + GF_EVENT_ASSIGN_IOVEC(vec, event, req->msg[i].iov_len, payloadlen); + (void)memcpy(event->iov[i].iov_base, req->msg[i].iov_base, + req->msg[i].iov_len); + } + + gf_msg_debug(this->name, 0, + "seq: %" PRIu64 " [%s] (time: %" PRIu64 ".%" PRIu64 + "), " + "(vec: %d, len: %zd)", + rpc_req.seq, entry->brick, rpc_req.tv_sec, rpc_req.tv_usec, + payloadcnt, payloadlen); + + /* dispatch event */ + entry->queueevent(ev, event); + + /* ack sequence number */ + rpc_rsp.op_ret = 0; + rpc_rsp.seq = rpc_req.seq; + + goto submit_rpc; + +handle_xdr_error: + rpc_rsp.op_ret = -1; + rpc_rsp.seq = 0; /* invalid */ +submit_rpc: + return changelog_rpc_sumbit_reply(req, &rpc_rsp, NULL, 0, NULL, + (xdrproc_t)xdr_changelog_event_rsp); } int -gf_changelog_reborp_handle_event (rpcsvc_request_t *req) +gf_changelog_reborp_handle_event(rpcsvc_request_t *req) { - xlator_t *this = NULL; - rpcsvc_t *svc = NULL; - gf_changelog_t *entry = NULL; + xlator_t *this = NULL; + rpcsvc_t *svc = NULL; + gf_changelog_t *entry = NULL; - svc = rpcsvc_request_service (req); - entry = svc->mydata; + svc = rpcsvc_request_service(req); + entry = svc->mydata; - this = THIS = entry->this; + this = THIS = entry->this; - return gf_changelog_event_handler (req, this, entry); + return gf_changelog_event_handler(req, this, entry); } -rpcsvc_actor_t gf_changelog_reborp_actors[CHANGELOG_REV_PROC_MAX] = { - [CHANGELOG_REV_PROC_EVENT] = { - "CHANGELOG EVENT HANDLER", CHANGELOG_REV_PROC_EVENT, - gf_changelog_reborp_handle_event, NULL, 0, DRC_NA - }, +static rpcsvc_actor_t gf_changelog_reborp_actors[CHANGELOG_REV_PROC_MAX] = { + [CHANGELOG_REV_PROC_EVENT] = {"CHANGELOG EVENT HANDLER", + gf_changelog_reborp_handle_event, NULL, + CHANGELOG_REV_PROC_EVENT, DRC_NA, 0}, }; /** @@ -411,16 +398,16 @@ rpcsvc_actor_t gf_changelog_reborp_actors[CHANGELOG_REV_PROC_MAX] = { * and that's required to invoke the callback with the appropriate * brick path and it's private data. */ -struct rpcsvc_program gf_changelog_reborp_prog = { - .progname = "LIBGFCHANGELOG REBORP", - .prognum = CHANGELOG_REV_RPC_PROCNUM, - .progver = CHANGELOG_REV_RPC_PROCVER, - .numactors = CHANGELOG_REV_PROC_MAX, - .actors = gf_changelog_reborp_actors, - .synctask = _gf_false, +static struct rpcsvc_program gf_changelog_reborp_prog = { + .progname = "LIBGFCHANGELOG REBORP", + .prognum = CHANGELOG_REV_RPC_PROCNUM, + .progver = CHANGELOG_REV_RPC_PROCVER, + .numactors = CHANGELOG_REV_PROC_MAX, + .actors = gf_changelog_reborp_actors, + .synctask = _gf_false, }; -struct rpcsvc_program *gf_changelog_reborp_programs[] = { - &gf_changelog_reborp_prog, - NULL, +static struct rpcsvc_program *gf_changelog_reborp_programs[] = { + &gf_changelog_reborp_prog, + NULL, }; diff --git a/xlators/features/changelog/lib/src/gf-changelog-rpc.c b/xlators/features/changelog/lib/src/gf-changelog-rpc.c index 270632bc71b..8ec6ffbcebc 100644 --- a/xlators/features/changelog/lib/src/gf-changelog-rpc.c +++ b/xlators/features/changelog/lib/src/gf-changelog-rpc.c @@ -16,31 +16,32 @@ struct rpc_clnt_program gf_changelog_clnt; /* TODO: piggyback reconnect to called (upcall) */ int -gf_changelog_rpc_notify (struct rpc_clnt *rpc, - void *mydata, rpc_clnt_event_t event, void *data) +gf_changelog_rpc_notify(struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data) { - switch (event) { + switch (event) { case RPC_CLNT_CONNECT: - rpc_clnt_set_connected (&rpc->conn); - break; + break; case RPC_CLNT_DISCONNECT: case RPC_CLNT_MSG: case RPC_CLNT_DESTROY: - break; - } + case RPC_CLNT_PING: + break; + } - return 0; + return 0; } struct rpc_clnt * -gf_changelog_rpc_init (xlator_t *this, gf_changelog_t *entry) +gf_changelog_rpc_init(xlator_t *this, gf_changelog_t *entry) { - char sockfile[UNIX_PATH_MAX] = {0,}; + char sockfile[UNIX_PATH_MAX] = { + 0, + }; - CHANGELOG_MAKE_SOCKET_PATH (entry->brick, - sockfile, UNIX_PATH_MAX); - return changelog_rpc_client_init (this, entry, - sockfile, gf_changelog_rpc_notify); + CHANGELOG_MAKE_SOCKET_PATH(entry->brick, sockfile, UNIX_PATH_MAX); + return changelog_rpc_client_init(this, entry, sockfile, + gf_changelog_rpc_notify); } /** @@ -48,52 +49,50 @@ gf_changelog_rpc_init (xlator_t *this, gf_changelog_t *entry) */ int -gf_probe_changelog_cbk (struct rpc_req *req, - struct iovec *iovec, int count, void *myframe) +gf_probe_changelog_cbk(struct rpc_req *req, struct iovec *iovec, int count, + void *myframe) { - return 0; + return 0; } int -gf_probe_changelog_filter (call_frame_t *frame, xlator_t *this, void *data) +gf_probe_changelog_filter(call_frame_t *frame, xlator_t *this, void *data) { - int ret = 0; - char *sock = NULL; - gf_changelog_t *entry = NULL; - changelog_probe_req req = {0,}; - - entry = data; - sock = RPC_SOCK (entry); - - (void) memcpy (&req.sock, sock, strlen (sock)); - req.filter = entry->notify; - - /* invoke RPC */ - return changelog_rpc_sumbit_req (RPC_PROBER (entry), (void *) &req, - frame, &gf_changelog_clnt, - CHANGELOG_RPC_PROBE_FILTER, NULL, 0, - NULL, this, gf_probe_changelog_cbk, - (xdrproc_t) xdr_changelog_probe_req); + char *sock = NULL; + gf_changelog_t *entry = NULL; + changelog_probe_req req = { + 0, + }; + + entry = data; + sock = RPC_SOCK(entry); + + (void)memcpy(&req.sock, sock, strlen(sock)); + req.filter = entry->notify; + + /* invoke RPC */ + return changelog_rpc_sumbit_req( + RPC_PROBER(entry), (void *)&req, frame, &gf_changelog_clnt, + CHANGELOG_RPC_PROBE_FILTER, NULL, 0, NULL, this, gf_probe_changelog_cbk, + (xdrproc_t)xdr_changelog_probe_req); } int -gf_changelog_invoke_rpc (xlator_t *this, gf_changelog_t *entry, int procidx) +gf_changelog_invoke_rpc(xlator_t *this, gf_changelog_t *entry, int procidx) { - return changelog_invoke_rpc (this, RPC_PROBER (entry), - &gf_changelog_clnt, procidx, entry); + return changelog_invoke_rpc(this, RPC_PROBER(entry), &gf_changelog_clnt, + procidx, entry); } struct rpc_clnt_procedure gf_changelog_procs[CHANGELOG_RPC_PROC_MAX] = { - [CHANGELOG_RPC_PROC_NULL] = {"NULL", NULL}, - [CHANGELOG_RPC_PROBE_FILTER] = { - "PROBE FILTER", gf_probe_changelog_filter - }, + [CHANGELOG_RPC_PROC_NULL] = {"NULL", NULL}, + [CHANGELOG_RPC_PROBE_FILTER] = {"PROBE FILTER", gf_probe_changelog_filter}, }; struct rpc_clnt_program gf_changelog_clnt = { - .progname = "LIBGFCHANGELOG", - .prognum = CHANGELOG_RPC_PROGNUM, - .progver = CHANGELOG_RPC_PROGVER, - .numproc = CHANGELOG_RPC_PROC_MAX, - .proctable = gf_changelog_procs, + .progname = "LIBGFCHANGELOG", + .prognum = CHANGELOG_RPC_PROGNUM, + .progver = CHANGELOG_RPC_PROGVER, + .numproc = CHANGELOG_RPC_PROC_MAX, + .proctable = gf_changelog_procs, }; diff --git a/xlators/features/changelog/lib/src/gf-changelog-rpc.h b/xlators/features/changelog/lib/src/gf-changelog-rpc.h index 1c982eef809..5c82d6f1c08 100644 --- a/xlators/features/changelog/lib/src/gf-changelog-rpc.h +++ b/xlators/features/changelog/lib/src/gf-changelog-rpc.h @@ -11,16 +11,18 @@ #ifndef __GF_CHANGELOG_RPC_H #define __GF_CHANGELOG_RPC_H -#include "xlator.h" +#include <glusterfs/xlator.h> #include "gf-changelog-helpers.h" #include "changelog-rpc-common.h" -struct rpc_clnt *gf_changelog_rpc_init (xlator_t *, gf_changelog_t *); +struct rpc_clnt * +gf_changelog_rpc_init(xlator_t *, gf_changelog_t *); -int gf_changelog_invoke_rpc (xlator_t *, gf_changelog_t *, int); +int +gf_changelog_invoke_rpc(xlator_t *, gf_changelog_t *, int); rpcsvc_t * -gf_changelog_reborp_init_rpc_listner (xlator_t *, char *, char *, void *); +gf_changelog_reborp_init_rpc_listner(xlator_t *, char *, char *, void *); #endif diff --git a/xlators/features/changelog/lib/src/gf-changelog.c b/xlators/features/changelog/lib/src/gf-changelog.c index 5972e7052d8..57c3d39ef76 100644 --- a/xlators/features/changelog/lib/src/gf-changelog.c +++ b/xlators/features/changelog/lib/src/gf-changelog.c @@ -22,11 +22,11 @@ #endif #include <string.h> -#include "globals.h" -#include "glusterfs.h" -#include "logging.h" -#include "defaults.h" -#include "syncop.h" +#include <glusterfs/globals.h> +#include <glusterfs/glusterfs.h> +#include <glusterfs/logging.h> +#include <glusterfs/defaults.h> +#include <glusterfs/syncop.h> #include "gf-changelog-rpc.h" #include "gf-changelog-helpers.h" @@ -45,279 +45,315 @@ */ xlator_t *master = NULL; -static inline -gf_private_t *gf_changelog_alloc_priv () +static inline gf_private_t * +gf_changelog_alloc_priv() { - int ret = 0; - gf_private_t *priv = NULL; - - priv = GF_CALLOC (1, sizeof (*priv), gf_changelog_mt_priv_t); - if (!priv) - goto error_return; - INIT_LIST_HEAD (&priv->connections); - INIT_LIST_HEAD (&priv->cleanups); - - ret = pthread_mutex_init (&priv->lock, NULL); - if (ret != 0) - goto free_priv; - ret = pthread_cond_init (&priv->cond, NULL); - if (ret != 0) - goto cleanup_mutex; - - priv->api = NULL; - return priv; - - cleanup_mutex: - (void) pthread_mutex_destroy (&priv->lock); - free_priv: - GF_FREE (priv); - error_return: - return NULL; + int ret = 0; + gf_private_t *priv = NULL; + + priv = GF_CALLOC(1, sizeof(*priv), gf_changelog_mt_priv_t); + if (!priv) + goto error_return; + INIT_LIST_HEAD(&priv->connections); + INIT_LIST_HEAD(&priv->cleanups); + + ret = pthread_mutex_init(&priv->lock, NULL); + if (ret != 0) + goto free_priv; + ret = pthread_cond_init(&priv->cond, NULL); + if (ret != 0) + goto cleanup_mutex; + + priv->api = NULL; + return priv; + +cleanup_mutex: + (void)pthread_mutex_destroy(&priv->lock); +free_priv: + GF_FREE(priv); +error_return: + return NULL; } -#define GF_CHANGELOG_EVENT_POOL_SIZE 16384 +#define GF_CHANGELOG_EVENT_POOL_SIZE 16384 #define GF_CHANGELOG_EVENT_THREAD_COUNT 4 static int -gf_changelog_ctx_defaults_init (glusterfs_ctx_t *ctx) +gf_changelog_ctx_defaults_init(glusterfs_ctx_t *ctx) { - cmd_args_t *cmd_args = NULL; - struct rlimit lim = {0, }; - call_pool_t *pool = NULL; - int ret = -1; + cmd_args_t *cmd_args = NULL; + struct rlimit lim = { + 0, + }; + call_pool_t *pool = NULL; + int ret = -1; + + ret = xlator_mem_acct_init(THIS, gf_changelog_mt_end); + if (ret != 0) + return -1; - ret = xlator_mem_acct_init (THIS, gf_changelog_mt_end); - if (ret != 0) - return -1; + ctx->process_uuid = generate_glusterfs_ctx_id(); + if (!ctx->process_uuid) + return -1; - ctx->process_uuid = generate_glusterfs_ctx_id (); - if (!ctx->process_uuid) - return -1; + ctx->page_size = 128 * GF_UNIT_KB; - ctx->page_size = 128 * GF_UNIT_KB; + ctx->iobuf_pool = iobuf_pool_new(); + if (!ctx->iobuf_pool) + goto free_pool; - ctx->iobuf_pool = iobuf_pool_new (); - if (!ctx->iobuf_pool) - return -1; + ctx->event_pool = gf_event_pool_new(GF_CHANGELOG_EVENT_POOL_SIZE, + GF_CHANGELOG_EVENT_THREAD_COUNT); + if (!ctx->event_pool) + goto free_pool; - ctx->event_pool = event_pool_new (GF_CHANGELOG_EVENT_POOL_SIZE, - GF_CHANGELOG_EVENT_THREAD_COUNT); - if (!ctx->event_pool) - return -1; + pool = GF_CALLOC(1, sizeof(call_pool_t), + gf_changelog_mt_libgfchangelog_call_pool_t); + if (!pool) + goto free_pool; - pool = GF_CALLOC (1, sizeof (call_pool_t), - gf_changelog_mt_libgfchangelog_call_pool_t); - if (!pool) - return -1; + /* frame_mem_pool size 112 * 64 */ + pool->frame_mem_pool = mem_pool_new(call_frame_t, 32); + if (!pool->frame_mem_pool) + goto free_pool; - /* frame_mem_pool size 112 * 64 */ - pool->frame_mem_pool = mem_pool_new (call_frame_t, 32); - if (!pool->frame_mem_pool) - return -1; + /* stack_mem_pool size 256 * 128 */ + pool->stack_mem_pool = mem_pool_new(call_stack_t, 16); - /* stack_mem_pool size 256 * 128 */ - pool->stack_mem_pool = mem_pool_new (call_stack_t, 16); + if (!pool->stack_mem_pool) + goto free_pool; - if (!pool->stack_mem_pool) - return -1; + ctx->stub_mem_pool = mem_pool_new(call_stub_t, 16); + if (!ctx->stub_mem_pool) + goto free_pool; - ctx->stub_mem_pool = mem_pool_new (call_stub_t, 16); - if (!ctx->stub_mem_pool) - return -1; + ctx->dict_pool = mem_pool_new(dict_t, 32); + if (!ctx->dict_pool) + goto free_pool; - ctx->dict_pool = mem_pool_new (dict_t, 32); - if (!ctx->dict_pool) - return -1; + ctx->dict_pair_pool = mem_pool_new(data_pair_t, 512); + if (!ctx->dict_pair_pool) + goto free_pool; - ctx->dict_pair_pool = mem_pool_new (data_pair_t, 512); - if (!ctx->dict_pair_pool) - return -1; + ctx->dict_data_pool = mem_pool_new(data_t, 512); + if (!ctx->dict_data_pool) + goto free_pool; - ctx->dict_data_pool = mem_pool_new (data_t, 512); - if (!ctx->dict_data_pool) - return -1; + ctx->logbuf_pool = mem_pool_new(log_buf_t, 256); + if (!ctx->logbuf_pool) + goto free_pool; - INIT_LIST_HEAD (&pool->all_frames); - LOCK_INIT (&pool->lock); - ctx->pool = pool; + INIT_LIST_HEAD(&pool->all_frames); + LOCK_INIT(&pool->lock); + ctx->pool = pool; - pthread_mutex_init (&(ctx->lock), NULL); + LOCK_INIT(&ctx->lock); - cmd_args = &ctx->cmd_args; + cmd_args = &ctx->cmd_args; - INIT_LIST_HEAD (&cmd_args->xlator_options); + INIT_LIST_HEAD(&cmd_args->xlator_options); - lim.rlim_cur = RLIM_INFINITY; - lim.rlim_max = RLIM_INFINITY; - setrlimit (RLIMIT_CORE, &lim); + lim.rlim_cur = RLIM_INFINITY; + lim.rlim_max = RLIM_INFINITY; + setrlimit(RLIMIT_CORE, &lim); - return 0; + return 0; + +free_pool: + if (pool) { + GF_FREE(pool->frame_mem_pool); + + GF_FREE(pool->stack_mem_pool); + + GF_FREE(pool); + } + + GF_FREE(ctx->stub_mem_pool); + + GF_FREE(ctx->dict_pool); + + GF_FREE(ctx->dict_pair_pool); + + GF_FREE(ctx->dict_data_pool); + + GF_FREE(ctx->logbuf_pool); + + GF_FREE(ctx->iobuf_pool); + + GF_FREE(ctx->event_pool); + + return -1; } /* TODO: cleanup ctx defaults */ void -gf_changelog_cleanup_this (xlator_t *this) +gf_changelog_cleanup_this(xlator_t *this) { - glusterfs_ctx_t *ctx = NULL; + glusterfs_ctx_t *ctx = NULL; + + if (!this) + return; - if (!this) - return; + ctx = this->ctx; + syncenv_destroy(ctx->env); + free(ctx); - ctx = this->ctx; - syncenv_destroy (ctx->env); - free (ctx); + this->private = NULL; + this->ctx = NULL; - this->private = NULL; - this->ctx = NULL; + mem_pools_fini(); } static int -gf_changelog_init_context () +gf_changelog_init_context() { - glusterfs_ctx_t *ctx = NULL; + glusterfs_ctx_t *ctx = NULL; - ctx = glusterfs_ctx_new (); - if (!ctx) - goto error_return; + ctx = glusterfs_ctx_new(); + if (!ctx) + goto error_return; - if (glusterfs_globals_init (ctx)) - goto free_ctx; + if (glusterfs_globals_init(ctx)) + goto free_ctx; - THIS->ctx = ctx; - if (gf_changelog_ctx_defaults_init (ctx)) - goto free_ctx; + THIS->ctx = ctx; + if (gf_changelog_ctx_defaults_init(ctx)) + goto free_ctx; - ctx->env = syncenv_new (0, 0, 0); - if (!ctx->env) - goto free_ctx; - return 0; + ctx->env = syncenv_new(0, 0, 0); + if (!ctx->env) + goto free_ctx; + return 0; - free_ctx: - free (ctx); - THIS->ctx = NULL; - error_return: - return -1; +free_ctx: + free(ctx); + THIS->ctx = NULL; +error_return: + return -1; } static int -gf_changelog_init_master () +gf_changelog_init_master() { - return gf_changelog_init_context (); + int ret = 0; + + ret = gf_changelog_init_context(); + mem_pools_init(); + + return ret; } /* TODO: cleanup clnt/svc on failure */ int -gf_changelog_setup_rpc (xlator_t *this, - gf_changelog_t *entry, int proc) +gf_changelog_setup_rpc(xlator_t *this, gf_changelog_t *entry, int proc) { - int ret = 0; - rpcsvc_t *svc = NULL; - struct rpc_clnt *rpc = NULL; - - /** - * Initialize a connect back socket. A probe() RPC call to the server - * triggers a reverse connect. - */ - svc = gf_changelog_reborp_init_rpc_listner (this, entry->brick, - RPC_SOCK (entry), entry); - if (!svc) - goto error_return; - RPC_REBORP (entry) = svc; - - /* Initialize an RPC client */ - rpc = gf_changelog_rpc_init (this, entry); - if (!rpc) - goto error_return; - RPC_PROBER (entry) = rpc; - - /** - * @FIXME - * till we have connection state machine, let's delay the RPC call - * for now.. - */ - sleep (2); - - /** - * Probe changelog translator for reverse connection. After a successful - * call, there's less use of the client and can be disconnected, but - * let's leave the connection active for any future RPC calls. - */ - ret = gf_changelog_invoke_rpc (this, entry, proc); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CHANGELOG_LIB_MSG_INVOKE_RPC_FAILED, - "Could not initiate probe RPC, bailing out!!!"); - goto error_return; - } - - return 0; - - error_return: - return -1; + int ret = 0; + rpcsvc_t *svc = NULL; + struct rpc_clnt *rpc = NULL; + + /** + * Initialize a connect back socket. A probe() RPC call to the server + * triggers a reverse connect. + */ + svc = gf_changelog_reborp_init_rpc_listner(this, entry->brick, + RPC_SOCK(entry), entry); + if (!svc) + goto error_return; + RPC_REBORP(entry) = svc; + + /* Initialize an RPC client */ + rpc = gf_changelog_rpc_init(this, entry); + if (!rpc) + goto error_return; + RPC_PROBER(entry) = rpc; + + /** + * @FIXME + * till we have connection state machine, let's delay the RPC call + * for now.. + */ + sleep(2); + + /** + * Probe changelog translator for reverse connection. After a successful + * call, there's less use of the client and can be disconnected, but + * let's leave the connection active for any future RPC calls. + */ + ret = gf_changelog_invoke_rpc(this, entry, proc); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_LIB_MSG_INVOKE_RPC_FAILED, + "Could not initiate probe RPC, bailing out!!!"); + goto error_return; + } + + return 0; + +error_return: + return -1; } int -gf_cleanup_event (xlator_t *this, struct gf_event_list *ev) +gf_cleanup_event(xlator_t *this, struct gf_event_list *ev) { - int ret = 0; - - ret = gf_thread_cleanup (this, ev->invoker); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, -ret, - CHANGELOG_LIB_MSG_CLEANUP_ERROR, - "cannot cleanup callback invoker thread." - " Not freeing resources"); - return -1; - } + int ret = 0; + + ret = gf_thread_cleanup(this, ev->invoker); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, -ret, + CHANGELOG_LIB_MSG_CLEANUP_ERROR, + "cannot cleanup callback invoker thread." + " Not freeing resources"); + return -1; + } - ev->entry = NULL; + ev->entry = NULL; - return 0; + return 0; } static int -gf_init_event (gf_changelog_t *entry) +gf_init_event(gf_changelog_t *entry) { - int ret = 0; - struct gf_event_list *ev = NULL; - - ev = &entry->event; - ev->entry = entry; - - ret = pthread_mutex_init (&ev->lock, NULL); - if (ret != 0) - goto error_return; - ret = pthread_cond_init (&ev->cond, NULL); - if (ret != 0) - goto cleanup_mutex; - INIT_LIST_HEAD (&ev->events); - - ev->next_seq = 0; /* bootstrap sequencing */ - - if (GF_NEED_ORDERED_EVENTS (entry)) { - entry->pickevent = pick_event_ordered; - entry->queueevent = queue_ordered_event; - } else { - entry->pickevent = pick_event_unordered; - entry->queueevent = queue_unordered_event; - } - - ret = gf_thread_create (&ev->invoker, NULL, - gf_changelog_callback_invoker, ev); - if (ret != 0) { - entry->pickevent = NULL; - entry->queueevent = NULL; - goto cleanup_cond; - } - - return 0; - - cleanup_cond: - (void) pthread_cond_destroy (&ev->cond); - cleanup_mutex: - (void) pthread_mutex_destroy (&ev->lock); - error_return: - return -1; + int ret = 0; + struct gf_event_list *ev = NULL; + + ev = &entry->event; + ev->entry = entry; + + ret = pthread_mutex_init(&ev->lock, NULL); + if (ret != 0) + goto error_return; + ret = pthread_cond_init(&ev->cond, NULL); + if (ret != 0) + goto cleanup_mutex; + INIT_LIST_HEAD(&ev->events); + + ev->next_seq = 0; /* bootstrap sequencing */ + + if (GF_NEED_ORDERED_EVENTS(entry)) { + entry->pickevent = pick_event_ordered; + entry->queueevent = queue_ordered_event; + } else { + entry->pickevent = pick_event_unordered; + entry->queueevent = queue_unordered_event; + } + + ret = gf_thread_create(&ev->invoker, NULL, gf_changelog_callback_invoker, + ev, "clogcbki"); + if (ret != 0) { + entry->pickevent = NULL; + entry->queueevent = NULL; + goto cleanup_cond; + } + + return 0; + +cleanup_cond: + (void)pthread_cond_destroy(&ev->cond); +cleanup_mutex: + (void)pthread_mutex_destroy(&ev->lock); +error_return: + return -1; } /** @@ -327,246 +363,241 @@ gf_init_event (gf_changelog_t *entry) * - destroy rpc{-clnt, svc} */ int -gf_cleanup_brick_connection (xlator_t *this, gf_changelog_t *entry) +gf_cleanup_brick_connection(xlator_t *this, gf_changelog_t *entry) { - return 0; + return 0; } int -gf_cleanup_connections (xlator_t *this) +gf_cleanup_connections(xlator_t *this) { - return 0; + return 0; } static int -gf_setup_brick_connection (xlator_t *this, - struct gf_brick_spec *brick, - gf_boolean_t ordered, void *xl) +gf_setup_brick_connection(xlator_t *this, struct gf_brick_spec *brick, + gf_boolean_t ordered, void *xl) { - int ret = 0; - gf_private_t *priv = NULL; - gf_changelog_t *entry = NULL; - - priv = this->private; - - if (!brick->callback || !brick->init || !brick->fini) - goto error_return; - - entry = GF_CALLOC (1, sizeof (*entry), - gf_changelog_mt_libgfchangelog_t); - if (!entry) - goto error_return; - INIT_LIST_HEAD (&entry->list); - - LOCK_INIT (&entry->statelock); - entry->connstate = GF_CHANGELOG_CONN_STATE_PENDING; - - entry->notify = brick->filter; - (void) strncpy (entry->brick, brick->brick_path, PATH_MAX-1); - entry->brick[PATH_MAX-1] = 0; - - entry->this = this; - entry->invokerxl = xl; - - entry->ordered = ordered; - ret = gf_init_event (entry); - if (ret) - goto free_entry; - - entry->fini = brick->fini; - entry->callback = brick->callback; - entry->connected = brick->connected; - entry->disconnected = brick->disconnected; - - entry->ptr = brick->init (this, brick); - if (!entry->ptr) - goto cleanup_event; - priv->api = entry->ptr; /* pointer to API, if required */ - - pthread_mutex_lock (&priv->lock); - { - list_add_tail (&entry->list, &priv->connections); - } - pthread_mutex_unlock (&priv->lock); - - ret = gf_changelog_setup_rpc (this, entry, CHANGELOG_RPC_PROBE_FILTER); - if (ret) - goto cleanup_event; - return 0; - - cleanup_event: - (void) gf_cleanup_event (this, &entry->event); - free_entry: - gf_msg_debug (this->name, 0, "freeing entry %p", entry); - list_del (&entry->list); /* FIXME: kludge for now */ - GF_FREE (entry); - error_return: - return -1; + int ret = 0; + gf_private_t *priv = NULL; + gf_changelog_t *entry = NULL; + + priv = this->private; + + if (!brick->callback || !brick->init || !brick->fini) + goto error_return; + + entry = GF_CALLOC(1, sizeof(*entry), gf_changelog_mt_libgfchangelog_t); + if (!entry) + goto error_return; + INIT_LIST_HEAD(&entry->list); + + LOCK_INIT(&entry->statelock); + entry->connstate = GF_CHANGELOG_CONN_STATE_PENDING; + + entry->notify = brick->filter; + if (snprintf(entry->brick, PATH_MAX, "%s", brick->brick_path) >= PATH_MAX) + goto free_entry; + + entry->this = this; + entry->invokerxl = xl; + + entry->ordered = ordered; + ret = gf_init_event(entry); + if (ret) + goto free_entry; + + entry->fini = brick->fini; + entry->callback = brick->callback; + entry->connected = brick->connected; + entry->disconnected = brick->disconnected; + + entry->ptr = brick->init(this, brick); + if (!entry->ptr) + goto cleanup_event; + priv->api = entry->ptr; /* pointer to API, if required */ + + pthread_mutex_lock(&priv->lock); + { + list_add_tail(&entry->list, &priv->connections); + } + pthread_mutex_unlock(&priv->lock); + + ret = gf_changelog_setup_rpc(this, entry, CHANGELOG_RPC_PROBE_FILTER); + if (ret) + goto cleanup_event; + return 0; + +cleanup_event: + (void)gf_cleanup_event(this, &entry->event); +free_entry: + gf_msg_debug(this->name, 0, "freeing entry %p", entry); + list_del(&entry->list); /* FIXME: kludge for now */ + GF_FREE(entry); +error_return: + return -1; } int -gf_changelog_register_brick (xlator_t *this, - struct gf_brick_spec *brick, - gf_boolean_t ordered, void *xl) +gf_changelog_register_brick(xlator_t *this, struct gf_brick_spec *brick, + gf_boolean_t ordered, void *xl) { - return gf_setup_brick_connection (this, brick, ordered, xl); + return gf_setup_brick_connection(this, brick, ordered, xl); } static int -gf_changelog_setup_logging (xlator_t *this, char *logfile, int loglevel) +gf_changelog_setup_logging(xlator_t *this, char *logfile, int loglevel) { - /* passing ident as NULL means to use default ident for syslog */ - if (gf_log_init (this->ctx, logfile, NULL)) - return -1; + /* passing ident as NULL means to use default ident for syslog */ + if (gf_log_init(this->ctx, logfile, NULL)) + return -1; - gf_log_set_loglevel ((loglevel == -1) ? GF_LOG_INFO : - loglevel); - return 0; + gf_log_set_loglevel(this->ctx, (loglevel == -1) ? GF_LOG_INFO : loglevel); + return 0; } static int -gf_changelog_set_master (xlator_t *master, void *xl) +gf_changelog_set_master(xlator_t *master, void *xl) { - int32_t ret = 0; - xlator_t *this = NULL; - xlator_t *old_this = NULL; - gf_private_t *priv = NULL; - - this = xl; - if (!this || !this->ctx) { - ret = gf_changelog_init_master (); - if (ret) - return -1; - this = THIS; - } + int32_t ret = 0; + xlator_t *this = NULL; + xlator_t *old_this = NULL; + gf_private_t *priv = NULL; + + this = xl; + if (!this || !this->ctx) { + ret = gf_changelog_init_master(); + if (ret) + return -1; + this = THIS; + } - master->ctx = this->ctx; + master->ctx = this->ctx; - INIT_LIST_HEAD (&master->volume_options); - SAVE_THIS (THIS); + INIT_LIST_HEAD(&master->volume_options); + SAVE_THIS(THIS); - ret = xlator_mem_acct_init (THIS, gf_changelog_mt_end); - if (ret != 0) - goto restore_this; + ret = xlator_mem_acct_init(THIS, gf_changelog_mt_end); + if (ret != 0) + goto restore_this; - priv = gf_changelog_alloc_priv (); - if (!priv) { - ret = -1; - goto restore_this; - } + priv = gf_changelog_alloc_priv(); + if (!priv) { + ret = -1; + goto restore_this; + } - if (!xl) { - /* poller thread */ - ret = gf_thread_create (&priv->poller, - NULL, changelog_rpc_poller, THIS); - if (ret != 0) { - GF_FREE (priv); - gf_msg (master->name, GF_LOG_ERROR, 0, - CHANGELOG_LIB_MSG_THREAD_CREATION_FAILED, - "failed to spawn poller thread"); - goto restore_this; - } + if (!xl) { + /* poller thread */ + ret = gf_thread_create(&priv->poller, NULL, changelog_rpc_poller, THIS, + "clogpoll"); + if (ret != 0) { + GF_FREE(priv); + gf_msg(master->name, GF_LOG_ERROR, 0, + CHANGELOG_LIB_MSG_THREAD_CREATION_FAILED, + "failed to spawn poller thread"); + goto restore_this; } + } - master->private = priv; + master->private = priv; - restore_this: - RESTORE_THIS (); +restore_this: + RESTORE_THIS(); - return ret; + return ret; } int -gf_changelog_init (void *xl) +gf_changelog_init(void *xl) { - int ret = 0; - gf_private_t *priv = NULL; - - if (master) - return 0; - - master = calloc (1, sizeof (*master)); - if (!master) - goto error_return; - - master->name = strdup ("gfchangelog"); - if (!master->name) - goto dealloc_master; - - ret = gf_changelog_set_master (master, xl); - if (ret) - goto dealloc_name; - - priv = master->private; - ret = gf_thread_create (&priv->connectionjanitor, NULL, - gf_changelog_connection_janitor, master); - if (ret != 0) { - /* TODO: cleanup priv, mutex (poller thread for !xl) */ - goto dealloc_name; - } + int ret = 0; + gf_private_t *priv = NULL; + if (master) return 0; - dealloc_name: - free (master->name); - dealloc_master: - free (master); - master = NULL; - error_return: - return -1; + master = calloc(1, sizeof(*master)); + if (!master) + goto error_return; + + master->name = strdup("gfchangelog"); + if (!master->name) + goto dealloc_master; + + ret = gf_changelog_set_master(master, xl); + if (ret) + goto dealloc_name; + + priv = master->private; + ret = gf_thread_create(&priv->connectionjanitor, NULL, + gf_changelog_connection_janitor, master, "clogjan"); + if (ret != 0) { + /* TODO: cleanup priv, mutex (poller thread for !xl) */ + goto dealloc_name; + } + + return 0; + +dealloc_name: + free(master->name); +dealloc_master: + free(master); + master = NULL; +error_return: + return -1; } int -gf_changelog_register_generic (struct gf_brick_spec *bricks, int count, - int ordered, char *logfile, int lvl, void *xl) +gf_changelog_register_generic(struct gf_brick_spec *bricks, int count, + int ordered, char *logfile, int lvl, void *xl) { - int ret = 0; - xlator_t *this = NULL; - xlator_t *old_this = NULL; - struct gf_brick_spec *brick = NULL; - gf_boolean_t need_order = _gf_false; + int ret = 0; + xlator_t *this = NULL; + xlator_t *old_this = NULL; + struct gf_brick_spec *brick = NULL; + gf_boolean_t need_order = _gf_false; - SAVE_THIS (xl); + SAVE_THIS(xl); - this = THIS; - if (!this) - goto error_return; + this = THIS; + if (!this) + goto error_return; - ret = gf_changelog_setup_logging (this, logfile, lvl); - if (ret) - goto error_return; - - need_order = (ordered) ? _gf_true : _gf_false; - - brick = bricks; - while (count--) { - gf_msg (this->name, GF_LOG_INFO, 0, - CHANGELOG_LIB_MSG_NOTIFY_REGISTER_INFO, - "Registering brick: %s [notify filter: %d]", - brick->brick_path, brick->filter); - - ret = gf_changelog_register_brick (this, brick, need_order, xl); - if (ret != 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CHANGELOG_LIB_MSG_NOTIFY_REGISTER_FAILED, - "Error registering with changelog xlator"); - break; - } - - brick++; + ret = gf_changelog_setup_logging(this, logfile, lvl); + if (ret) + goto error_return; + + need_order = (ordered) ? _gf_true : _gf_false; + + brick = bricks; + while (count--) { + gf_smsg(this->name, GF_LOG_INFO, 0, + CHANGELOG_LIB_MSG_NOTIFY_REGISTER_INFO, "brick=%s", + brick->brick_path, "notify_filter=%d", brick->filter, NULL); + + ret = gf_changelog_register_brick(this, brick, need_order, xl); + if (ret != 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, + CHANGELOG_LIB_MSG_NOTIFY_REGISTER_FAILED, + "Error registering with changelog xlator"); + break; } - if (ret != 0) - goto cleanup_inited_bricks; + brick++; + } - RESTORE_THIS(); - return 0; + if (ret != 0) + goto cleanup_inited_bricks; - cleanup_inited_bricks: - gf_cleanup_connections (this); - error_return: - RESTORE_THIS(); - return -1; + RESTORE_THIS(); + return 0; + +cleanup_inited_bricks: + gf_cleanup_connections(this); +error_return: + RESTORE_THIS(); + return -1; } /** @@ -593,27 +624,29 @@ gf_changelog_register_generic (struct gf_brick_spec *bricks, int count, * For generic API, refer gf_changelog_register_generic(). */ int -gf_changelog_register (char *brick_path, char *scratch_dir, - char *log_file, int log_level, int max_reconnects) +gf_changelog_register(char *brick_path, char *scratch_dir, char *log_file, + int log_level, int max_reconnects) { - struct gf_brick_spec brick = {0,}; + struct gf_brick_spec brick = { + 0, + }; - if (master) - THIS = master; - else - return -1; + if (master) + THIS = master; + else + return -1; - brick.brick_path = brick_path; - brick.filter = CHANGELOG_OP_TYPE_JOURNAL; + brick.brick_path = brick_path; + brick.filter = CHANGELOG_OP_TYPE_JOURNAL; - brick.init = gf_changelog_journal_init; - brick.fini = gf_changelog_journal_fini; - brick.callback = gf_changelog_handle_journal; - brick.connected = gf_changelog_journal_connect; - brick.disconnected = gf_changelog_journal_disconnect; + brick.init = gf_changelog_journal_init; + brick.fini = gf_changelog_journal_fini; + brick.callback = gf_changelog_handle_journal; + brick.connected = gf_changelog_journal_connect; + brick.disconnected = gf_changelog_journal_disconnect; - brick.ptr = scratch_dir; + brick.ptr = scratch_dir; - return gf_changelog_register_generic (&brick, 1, 1, - log_file, log_level, NULL); + return gf_changelog_register_generic(&brick, 1, 1, log_file, log_level, + NULL); } diff --git a/xlators/features/changelog/lib/src/gf-history-changelog.c b/xlators/features/changelog/lib/src/gf-history-changelog.c index f7b58f5a965..a16219f3664 100644 --- a/xlators/features/changelog/lib/src/gf-history-changelog.c +++ b/xlators/features/changelog/lib/src/gf-history-changelog.c @@ -8,10 +8,10 @@ #endif #include <string.h> -#include "globals.h" -#include "glusterfs.h" -#include "logging.h" -#include "syscall.h" +#include <glusterfs/globals.h> +#include <glusterfs/glusterfs.h> +#include <glusterfs/logging.h> +#include <glusterfs/syscall.h> #include "gf-changelog-helpers.h" #include "gf-changelog-journal.h" @@ -36,60 +36,60 @@ * -1: On error. */ int -gf_history_changelog_done (char *file) +gf_history_changelog_done(char *file) { - int ret = -1; - char *buffer = NULL; - xlator_t *this = NULL; - gf_changelog_journal_t *jnl = NULL; - gf_changelog_journal_t *hist_jnl = NULL; - char to_path[PATH_MAX] = {0,}; + int ret = -1; + char *buffer = NULL; + xlator_t *this = NULL; + gf_changelog_journal_t *jnl = NULL; + gf_changelog_journal_t *hist_jnl = NULL; + char to_path[PATH_MAX] = { + 0, + }; + + errno = EINVAL; + + this = THIS; + if (!this) + goto out; + + jnl = (gf_changelog_journal_t *)GF_CHANGELOG_GET_API_PTR(this); + if (!jnl) + goto out; + + hist_jnl = jnl->hist_jnl; + if (!hist_jnl) + goto out; + + if (!file || !strlen(file)) + goto out; + + /* make sure 'file' is inside ->jnl_working_dir */ + buffer = realpath(file, NULL); + if (!buffer) + goto out; + + if (strncmp(hist_jnl->jnl_working_dir, buffer, + strlen(hist_jnl->jnl_working_dir))) + goto out; + + (void)snprintf(to_path, PATH_MAX, "%s%s", hist_jnl->jnl_processed_dir, + basename(buffer)); + gf_msg_debug(this->name, 0, "moving %s to processed directory", file); + ret = sys_rename(buffer, to_path); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_LIB_MSG_RENAME_FAILED, "from=%s", file, "to=%s", + to_path, NULL); + goto out; + } + + ret = 0; - errno = EINVAL; - - this = THIS; - if (!this) - goto out; - - jnl = (gf_changelog_journal_t *) GF_CHANGELOG_GET_API_PTR (this); - if (!jnl) - goto out; - - hist_jnl = jnl->hist_jnl; - if (!hist_jnl) - goto out; - - if (!file || !strlen (file)) - goto out; - - /* make sure 'file' is inside ->jnl_working_dir */ - buffer = realpath (file, NULL); - if (!buffer) - goto out; - - if (strncmp (hist_jnl->jnl_working_dir, - buffer, strlen (hist_jnl->jnl_working_dir))) - goto out; - - (void) snprintf (to_path, PATH_MAX, "%s%s", - hist_jnl->jnl_processed_dir, basename (buffer)); - gf_msg_debug (this->name, 0, - "moving %s to processed directory", file); - ret = sys_rename (buffer, to_path); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_LIB_MSG_RENAME_FAILED, - "cannot move %s to %s", - file, to_path); - goto out; - } - - ret = 0; - - out: - if (buffer) - free (buffer); /* allocated by realpath() */ - return ret; +out: + if (buffer) + free(buffer); /* allocated by realpath() */ + return ret; } /** @@ -103,33 +103,33 @@ gf_history_changelog_done (char *file) * -1: On error. */ int -gf_history_changelog_start_fresh () +gf_history_changelog_start_fresh() { - xlator_t *this = NULL; - gf_changelog_journal_t *jnl = NULL; - gf_changelog_journal_t *hist_jnl = NULL; + xlator_t *this = NULL; + gf_changelog_journal_t *jnl = NULL; + gf_changelog_journal_t *hist_jnl = NULL; - this = THIS; - if (!this) - goto out; + this = THIS; + if (!this) + goto out; - errno = EINVAL; + errno = EINVAL; - jnl = (gf_changelog_journal_t *) GF_CHANGELOG_GET_API_PTR (this); - if (!jnl) - goto out; + jnl = (gf_changelog_journal_t *)GF_CHANGELOG_GET_API_PTR(this); + if (!jnl) + goto out; - hist_jnl = jnl->hist_jnl; - if (!hist_jnl) - goto out; + hist_jnl = jnl->hist_jnl; + if (!hist_jnl) + goto out; - if (gf_ftruncate (hist_jnl->jnl_fd, 0)) - goto out; + if (gf_ftruncate(hist_jnl->jnl_fd, 0)) + goto out; - return 0; + return 0; - out: - return -1; +out: + return -1; } /** @@ -148,50 +148,52 @@ gf_history_changelog_start_fresh () * -1 : On error. */ ssize_t -gf_history_changelog_next_change (char *bufptr, size_t maxlen) +gf_history_changelog_next_change(char *bufptr, size_t maxlen) { - ssize_t size = -1; - int tracker_fd = 0; - xlator_t *this = NULL; - gf_changelog_journal_t *jnl = NULL; - gf_changelog_journal_t *hist_jnl = NULL; - char buffer[PATH_MAX] = {0,}; - - if (maxlen > PATH_MAX) { - errno = ENAMETOOLONG; - goto out; - } + ssize_t size = -1; + int tracker_fd = 0; + xlator_t *this = NULL; + gf_changelog_journal_t *jnl = NULL; + gf_changelog_journal_t *hist_jnl = NULL; + char buffer[PATH_MAX] = { + 0, + }; - errno = EINVAL; + if (maxlen > PATH_MAX) { + errno = ENAMETOOLONG; + goto out; + } - this = THIS; - if (!this) - goto out; + errno = EINVAL; - jnl = (gf_changelog_journal_t *) GF_CHANGELOG_GET_API_PTR (this); - if (!jnl) - goto out; + this = THIS; + if (!this) + goto out; - hist_jnl = jnl->hist_jnl; - if (!hist_jnl) - goto out; + jnl = (gf_changelog_journal_t *)GF_CHANGELOG_GET_API_PTR(this); + if (!jnl) + goto out; - tracker_fd = hist_jnl->jnl_fd; + hist_jnl = jnl->hist_jnl; + if (!hist_jnl) + goto out; - size = gf_readline (tracker_fd, buffer, maxlen); - if (size < 0) { - size = -1; - goto out; - } + tracker_fd = hist_jnl->jnl_fd; - if (size == 0) - goto out; + size = gf_readline(tracker_fd, buffer, maxlen); + if (size < 0) { + size = -1; + goto out; + } + + if (size == 0) + goto out; - memcpy (bufptr, buffer, size - 1); - bufptr[size - 1] = '\0'; + memcpy(bufptr, buffer, size - 1); + bufptr[size - 1] = '\0'; out: - return size; + return size; } /** @@ -212,106 +214,100 @@ out: * */ ssize_t -gf_history_changelog_scan () +gf_history_changelog_scan() { - int ret = 0; - int tracker_fd = 0; - size_t len = 0; - size_t off = 0; - xlator_t *this = NULL; - size_t nr_entries = 0; - gf_changelog_journal_t *jnl = NULL; - gf_changelog_journal_t *hist_jnl = NULL; - struct dirent *entryp = NULL; - struct dirent *result = NULL; - char buffer[PATH_MAX] = {0,}; - static int is_last_scan; - - this = THIS; - if (!this) - goto out; + int tracker_fd = 0; + size_t off = 0; + xlator_t *this = NULL; + size_t nr_entries = 0; + gf_changelog_journal_t *jnl = NULL; + gf_changelog_journal_t *hist_jnl = NULL; + struct dirent *entry = NULL; + struct dirent scratch[2] = { + { + 0, + }, + }; + char buffer[PATH_MAX] = { + 0, + }; + static int is_last_scan; + + this = THIS; + if (!this) + goto out; + + jnl = (gf_changelog_journal_t *)GF_CHANGELOG_GET_API_PTR(this); + if (!jnl) + goto out; + if (JNL_IS_API_DISCONNECTED(jnl)) { + errno = ENOTCONN; + goto out; + } + + hist_jnl = jnl->hist_jnl; + if (!hist_jnl) + goto out; + +retry: + if (is_last_scan == 1) + return 0; + if (hist_jnl->hist_done == 0) + is_last_scan = 1; - jnl = (gf_changelog_journal_t *) GF_CHANGELOG_GET_API_PTR (this); - if (!jnl) - goto out; - if (JNL_IS_API_DISCONNECTED (jnl)) { - errno = ENOTCONN; - goto out; - } + errno = EINVAL; + if (hist_jnl->hist_done == -1) + goto out; - hist_jnl = jnl->hist_jnl; - if (!hist_jnl) - goto out; + tracker_fd = hist_jnl->jnl_fd; - retry: - if (is_last_scan == 1) - return 0; - if (hist_jnl->hist_done == 0) - is_last_scan = 1; + if (gf_ftruncate(tracker_fd, 0)) + goto out; - errno = EINVAL; - if (hist_jnl->hist_done == -1) - goto out; + rewinddir(hist_jnl->jnl_dir); - tracker_fd = hist_jnl->jnl_fd; + for (;;) { + errno = 0; + entry = sys_readdir(hist_jnl->jnl_dir, scratch); + if (!entry || errno != 0) + break; - if (gf_ftruncate (tracker_fd, 0)) - goto out; + if (strcmp(basename(entry->d_name), ".") == 0 || + strcmp(basename(entry->d_name), "..") == 0) + continue; - len = offsetof (struct dirent, d_name) - + pathconf (hist_jnl->jnl_processing_dir, _PC_NAME_MAX) + 1; - entryp = GF_CALLOC (1, len, - gf_changelog_mt_libgfchangelog_dirent_t); - if (!entryp) - goto out; + nr_entries++; + + GF_CHANGELOG_FILL_BUFFER(hist_jnl->jnl_processing_dir, buffer, off, + strlen(hist_jnl->jnl_processing_dir)); + GF_CHANGELOG_FILL_BUFFER(entry->d_name, buffer, off, + strlen(entry->d_name)); + GF_CHANGELOG_FILL_BUFFER("\n", buffer, off, 1); - rewinddir (hist_jnl->jnl_dir); - while (1) { - ret = readdir_r (hist_jnl->jnl_dir, entryp, &result); - if (ret || !result) - break; - - if ( !strcmp (basename (entryp->d_name), ".") - || !strcmp (basename (entryp->d_name), "..") ) - continue; - - nr_entries++; - - GF_CHANGELOG_FILL_BUFFER (hist_jnl->jnl_processing_dir, - buffer, off, - strlen (hist_jnl->jnl_processing_dir)); - GF_CHANGELOG_FILL_BUFFER (entryp->d_name, buffer, - off, strlen (entryp->d_name)); - GF_CHANGELOG_FILL_BUFFER ("\n", buffer, off, 1); - - if (gf_changelog_write (tracker_fd, buffer, off) != off) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CHANGELOG_LIB_MSG_WRITE_FAILED, - "error writing changelog filename" - " to tracker file"); - break; - } - off = 0; + if (gf_changelog_write(tracker_fd, buffer, off) != off) { + gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_LIB_MSG_WRITE_FAILED, + "error writing changelog filename" + " to tracker file"); + break; } + off = 0; + } - GF_FREE (entryp); - - gf_msg_debug (this->name, 0, - "hist_done %d, is_last_scan: %d", - hist_jnl->hist_done, is_last_scan); - - if (!result) { - if (gf_lseek (tracker_fd, 0, SEEK_SET) != -1) { - if (nr_entries > 0) - return nr_entries; - else { - sleep(1); - goto retry; - } - } + gf_msg_debug(this->name, 0, "hist_done %d, is_last_scan: %d", + hist_jnl->hist_done, is_last_scan); + + if (!entry) { + if (gf_lseek(tracker_fd, 0, SEEK_SET) != -1) { + if (nr_entries > 0) + return nr_entries; + else { + sleep(1); + goto retry; + } } - out: - return -1; + } +out: + return -1; } /* @@ -319,36 +315,36 @@ gf_history_changelog_scan () * Returns 0 on success(updates given time-stamp), -1 on failure. */ int -gf_history_get_timestamp (int fd, int index, int len, - unsigned long *ts) +gf_history_get_timestamp(int fd, int index, int len, unsigned long *ts) { - xlator_t *this = NULL; - int n_read = -1; - char path_buf[PATH_MAX]= {0,}; - char *iter = path_buf; - size_t offset = index * (len+1); - unsigned long value = 0; - int ret = 0; - - this = THIS; - if (!this) { - return -1; - } - - n_read = pread (fd, path_buf, len, offset); - if (n_read < 0 ) { - ret = -1; - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_LIB_MSG_READ_ERROR, - "could not read from htime file"); - goto out; - } - iter+= len - TIMESTAMP_LENGTH; - sscanf (iter, "%lu",&value); + xlator_t *this = NULL; + int n_read = -1; + char path_buf[PATH_MAX] = { + 0, + }; + char *iter = path_buf; + size_t offset = index * (len + 1); + unsigned long value = 0; + int ret = 0; + + this = THIS; + if (!this) { + return -1; + } + + n_read = sys_pread(fd, path_buf, len, offset); + if (n_read < 0) { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_READ_ERROR, + "could not read from htime file"); + goto out; + } + iter += len - TIMESTAMP_LENGTH; + sscanf(iter, "%lu", &value); out: - if(ret == 0) - *ts = value; - return ret; + if (ret == 0) + *ts = value; + return ret; } /* @@ -356,38 +352,37 @@ out: * Checks whether @value is there next to @target_index or not */ int -gf_history_check ( int fd, int target_index, unsigned long value, int len) +gf_history_check(int fd, int target_index, unsigned long value, int len) { - int ret = 0; - unsigned long ts1 = 0; - unsigned long ts2 = 0; - - if (target_index == 0) { - ret = gf_history_get_timestamp (fd, target_index, len, &ts1); - if (ret == -1) - goto out; - if (value <= ts1) - goto out; - else { - ret = -1; - goto out; - } - } + int ret = 0; + unsigned long ts1 = 0; + unsigned long ts2 = 0; - ret = gf_history_get_timestamp (fd, target_index, len, &ts1); - if (ret ==-1) - goto out; - ret = gf_history_get_timestamp (fd, target_index -1, len, &ts2); - if (ret ==-1) - goto out; - - if ( (value <= ts1) && (value > ts2) ) { - goto out; - } - else - ret = -1; + if (target_index == 0) { + ret = gf_history_get_timestamp(fd, target_index, len, &ts1); + if (ret == -1) + goto out; + if (value <= ts1) + goto out; + else { + ret = -1; + goto out; + } + } + + ret = gf_history_get_timestamp(fd, target_index, len, &ts1); + if (ret == -1) + goto out; + ret = gf_history_get_timestamp(fd, target_index - 1, len, &ts2); + if (ret == -1) + goto out; + + if ((value <= ts1) && (value > ts2)) { + goto out; + } else + ret = -1; out: - return ret; + return ret; } /* @@ -407,83 +402,74 @@ out: */ int -gf_history_b_search (int fd, unsigned long value, - unsigned long from, unsigned long to, int len) +gf_history_b_search(int fd, unsigned long value, unsigned long from, + unsigned long to, int len) { - int m_index = -1; - unsigned long cur_value = 0; - unsigned long ts1 = 0; - int ret = 0; - - m_index = (from + to)/2; - - if ( (to - from) <=1 ) { - /* either one or 2 changelogs left */ - if ( to != from ) { - /* check if value is less or greater than to - * return accordingly - */ - ret = gf_history_get_timestamp (fd, from, len, &ts1); - if (ret ==-1) - goto out; - if ( ts1 >= value) { - /* actually compatision should be - * exactly == but considering - * - * case of only 2 changelogs in htime file - */ - return from; - } - else - return to; - } - else - return to; - } - - ret = gf_history_get_timestamp (fd, m_index, len, &cur_value); + int m_index = -1; + unsigned long cur_value = 0; + unsigned long ts1 = 0; + int ret = 0; + + m_index = (from + to) / 2; + + if ((to - from) <= 1) { + /* either one or 2 changelogs left */ + if (to != from) { + /* check if value is less or greater than to + * return accordingly + */ + ret = gf_history_get_timestamp(fd, from, len, &ts1); + if (ret == -1) + goto out; + if (ts1 >= value) { + /* actually compatision should be + * exactly == but considering + * + * case of only 2 changelogs in htime file + */ + return from; + } else + return to; + } else + return to; + } + + ret = gf_history_get_timestamp(fd, m_index, len, &cur_value); + if (ret == -1) + goto out; + if (cur_value == value) { + return m_index; + } else if (value > cur_value) { + ret = gf_history_get_timestamp(fd, m_index + 1, len, &cur_value); if (ret == -1) - goto out; - if (cur_value == value) { + goto out; + if (value < cur_value) + return m_index + 1; + else + return gf_history_b_search(fd, value, m_index + 1, to, len); + } else { + if (m_index == 0) { + /* we are sure that values exists + * in this htime file + */ + return 0; + } else { + ret = gf_history_get_timestamp(fd, m_index - 1, len, &cur_value); + if (ret == -1) + goto out; + if (value > cur_value) { return m_index; + } else + return gf_history_b_search(fd, value, from, m_index - 1, len); } - else if (value > cur_value) { - ret = gf_history_get_timestamp (fd, m_index+1, len, &cur_value); - if (ret == -1) - goto out; - if (value < cur_value) - return m_index + 1; - else - return gf_history_b_search (fd, value, - m_index+1, to, len); - } - else { - if (m_index ==0) { - /* we are sure that values exists - * in this htime file - */ - return 0; - } - else { - ret = gf_history_get_timestamp (fd, m_index-1, len, - &cur_value); - if (ret == -1) - goto out; - if (value > cur_value) { - return m_index; - } - else - return gf_history_b_search (fd, value, from, - m_index-1, len); - } - } + } out: - return -1; + return -1; } /* * Description: Checks if the changelog path is usable or not, - * which is differenciated by checking for "changelog" + * which is differentiated by checking for "changelog" * in the path and not "CHANGELOG". * * Returns: @@ -491,64 +477,59 @@ out: * 0 : No, Not usable ( contains, "changelog") */ int -gf_is_changelog_usable (char *cl_path) +gf_is_changelog_usable(char *cl_path) { - int ret = -1; - const char low_c[] = "changelog"; - char *str_ret = NULL; - char *bname = NULL; + int ret = -1; + const char low_c[] = "changelog"; + char *str_ret = NULL; + char *bname = NULL; - bname = basename (cl_path); + bname = basename(cl_path); - str_ret = strstr (bname, low_c); + str_ret = strstr(bname, low_c); - if (str_ret != NULL) - ret = 0; - else - ret = 1; - - return ret; + if (str_ret != NULL) + ret = 0; + else + ret = 1; + return ret; } void * -gf_changelog_consume_wrap (void* data) +gf_changelog_consume_wrap(void *data) { - int ret = -1; - ssize_t nread = 0; - xlator_t *this = NULL; - gf_changelog_consume_data_t *ccd = NULL; - - ccd = (gf_changelog_consume_data_t *) data; - this = ccd->this; - - ccd->retval = -1; - - nread = pread (ccd->fd, ccd->changelog, PATH_MAX, ccd->offset); - if (nread < 0) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_LIB_MSG_READ_ERROR, - "cannot read from history metadata file"); - goto out; - } - - /* TODO: handle short reads and EOF. */ - if (gf_is_changelog_usable (ccd->changelog) == 1) { - - ret = gf_changelog_consume (ccd->this, - ccd->jnl, ccd->changelog, _gf_true); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, - 0, CHANGELOG_LIB_MSG_PARSE_ERROR, - "could not parse changelog: %s", - ccd->changelog); - goto out; - } + int ret = -1; + ssize_t nread = 0; + xlator_t *this = NULL; + gf_changelog_consume_data_t *ccd = NULL; + + ccd = (gf_changelog_consume_data_t *)data; + this = ccd->this; + + ccd->retval = -1; + + nread = sys_pread(ccd->fd, ccd->changelog, PATH_MAX - 1, ccd->offset); + if (nread < 0) { + gf_msg(this->name, GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_READ_ERROR, + "cannot read from history metadata file"); + goto out; + } + + /* TODO: handle short reads and EOF. */ + if (gf_is_changelog_usable(ccd->changelog) == 1) { + ret = gf_changelog_consume(ccd->this, ccd->jnl, ccd->changelog, + _gf_true); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_LIB_MSG_PARSE_ERROR, + "name=%s", ccd->changelog, NULL); + goto out; } - ccd->retval = 0; + } + ccd->retval = 0; - out: - return NULL; +out: + return NULL; } /** @@ -557,130 +538,132 @@ gf_changelog_consume_wrap (void* data) * to index "to" in open htime file whose fd is "fd". */ -#define MAX_PARALLELS 10 +#define MAX_PARALLELS 10 void * -gf_history_consume (void * data) +gf_history_consume(void *data) { - xlator_t *this = NULL; - gf_changelog_journal_t *jnl = NULL; - gf_changelog_journal_t *hist_jnl = NULL; - int ret = 0; - int iter = 0; - int fd = -1; - int from = -1; - int to = -1; - int len = -1; - int n_parallel = 0; - int n_envoked = 0; - gf_boolean_t publish = _gf_true; - pthread_t th_id[MAX_PARALLELS] = {0,}; - gf_changelog_history_data_t *hist_data = NULL; - gf_changelog_consume_data_t ccd[MAX_PARALLELS] = {{0},}; - gf_changelog_consume_data_t *curr = NULL; - - hist_data = (gf_changelog_history_data_t *) data; - if (hist_data == NULL) { - ret = -1; - goto out; - } - - fd = hist_data->htime_fd; - from = hist_data->from; - to = hist_data->to; - len = hist_data->len; - n_parallel = hist_data->n_parallel; - - THIS = hist_data->this; - this = hist_data->this; - if (!this) { - ret = -1; - goto out; - } - - jnl = (gf_changelog_journal_t *) GF_CHANGELOG_GET_API_PTR (this); - if (!jnl) { - ret = -1; - goto out; - } - - hist_jnl = jnl->hist_jnl; - if (!hist_jnl) { - ret = -1; - goto out; - } - - while (from <= to) { - n_envoked = 0; - - for (iter = 0 ; (iter < n_parallel) && (from <= to); iter++) { - curr = &ccd[iter]; - - curr->this = this; - curr->jnl = hist_jnl; - curr->fd = fd; - curr->offset = from * (len + 1); - - curr->retval = 0; - memset (curr->changelog, '\0', PATH_MAX); - - ret = pthread_create (&th_id[iter], NULL, - gf_changelog_consume_wrap, curr); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, ret, - CHANGELOG_LIB_MSG_THREAD_CREATION_FAILED - , "could not create consume-thread"); - ret = -1; - goto sync; - } else - n_envoked++; - - from++; - } - - sync: - for (iter = 0; iter < n_envoked; iter++) { - ret = pthread_join (th_id[iter], NULL); - if (ret) { - publish = _gf_false; - gf_msg (this->name, GF_LOG_ERROR, ret, - CHANGELOG_LIB_MSG_PTHREAD_JOIN_FAILED, - "pthread_join() error"); - /* try to join the rest */ - continue; - } - - if (publish == _gf_false) - continue; - - curr = &ccd[iter]; - if (ccd->retval) { - publish = _gf_false; - gf_msg (this->name, GF_LOG_ERROR, - 0, CHANGELOG_LIB_MSG_PARSE_ERROR, - "parsing error, ceased publishing..."); - continue; - } - - ret = gf_changelog_publish (curr->this, - curr->jnl, curr->changelog); - if (ret) { - publish = _gf_false; - gf_msg (this->name, GF_LOG_ERROR, 0, - CHANGELOG_LIB_MSG_PUBLISH_ERROR, - "publish error, ceased publishing..."); - } - } - } - - /* informing "parsing done". */ - hist_jnl->hist_done = (publish == _gf_true) ? 0 : -1; + xlator_t *this = NULL; + gf_changelog_journal_t *jnl = NULL; + gf_changelog_journal_t *hist_jnl = NULL; + int ret = 0; + int iter = 0; + int fd = -1; + int from = -1; + int to = -1; + int len = -1; + int n_parallel = 0; + int n_envoked = 0; + gf_boolean_t publish = _gf_true; + pthread_t th_id[MAX_PARALLELS] = { + 0, + }; + gf_changelog_history_data_t *hist_data = NULL; + gf_changelog_consume_data_t ccd[MAX_PARALLELS] = { + {0}, + }; + gf_changelog_consume_data_t *curr = NULL; + + hist_data = (gf_changelog_history_data_t *)data; + if (hist_data == NULL) { + ret = -1; + goto out; + } + + fd = hist_data->htime_fd; + from = hist_data->from; + to = hist_data->to; + len = hist_data->len; + n_parallel = hist_data->n_parallel; + + THIS = hist_data->this; + this = hist_data->this; + if (!this) { + ret = -1; + goto out; + } + + jnl = (gf_changelog_journal_t *)GF_CHANGELOG_GET_API_PTR(this); + if (!jnl) { + ret = -1; + goto out; + } + + hist_jnl = jnl->hist_jnl; + if (!hist_jnl) { + ret = -1; + goto out; + } + + while (from <= to) { + n_envoked = 0; + + for (iter = 0; (iter < n_parallel) && (from <= to); iter++) { + curr = &ccd[iter]; + + curr->this = this; + curr->jnl = hist_jnl; + curr->fd = fd; + curr->offset = from * (len + 1); + + curr->retval = 0; + memset(curr->changelog, '\0', PATH_MAX); + + ret = gf_thread_create(&th_id[iter], NULL, + gf_changelog_consume_wrap, curr, + "clogc%03hx", (iter + 1) & 0x3ff); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, ret, + CHANGELOG_LIB_MSG_THREAD_CREATION_FAILED, + "could not create consume-thread"); + goto sync; + } else + n_envoked++; + + from++; + } + + sync: + for (iter = 0; iter < n_envoked; iter++) { + ret = pthread_join(th_id[iter], NULL); + if (ret) { + publish = _gf_false; + gf_msg(this->name, GF_LOG_ERROR, ret, + CHANGELOG_LIB_MSG_PTHREAD_JOIN_FAILED, + "pthread_join() error"); + /* try to join the rest */ + continue; + } + + if (publish == _gf_false) + continue; + + curr = &ccd[iter]; + if (ccd->retval) { + publish = _gf_false; + gf_smsg(this->name, GF_LOG_ERROR, 0, + CHANGELOG_LIB_MSG_PARSE_ERROR_CEASED, NULL); + continue; + } + + ret = gf_changelog_publish(curr->this, curr->jnl, curr->changelog); + if (ret) { + publish = _gf_false; + gf_msg(this->name, GF_LOG_ERROR, 0, + CHANGELOG_LIB_MSG_PUBLISH_ERROR, + "publish error, ceased publishing..."); + } + } + } + + /* informing "parsing done". */ + hist_jnl->hist_done = (publish == _gf_true) ? 0 : -1; out: - if (fd != -1) - sys_close (fd); - GF_FREE (hist_data); - return NULL; + if (fd != -1) + (void)sys_close(fd); + GF_FREE(hist_data); + return NULL; } /** @@ -710,277 +693,328 @@ out: * -2 : Ignore this metadata file and process next */ int -gf_changelog_extract_min_max (const char *dname, const char *htime_dir, - int *fd, unsigned long *total, - unsigned long *min_ts, unsigned long *max_ts) +gf_changelog_extract_min_max(const char *dname, const char *htime_dir, int *fd, + unsigned long *total, unsigned long *min_ts, + unsigned long *max_ts) { - int ret = -1; - xlator_t *this = NULL; - char htime_file[PATH_MAX] = {0,}; - struct stat stbuf = {0,}; - char *iter = NULL; - char x_value[30] = {0,}; - - this = THIS; + int ret = -1; + xlator_t *this = NULL; + char htime_file[PATH_MAX] = { + 0, + }; + struct stat stbuf = { + 0, + }; + char *iter = NULL; + char x_value[30] = { + 0, + }; + + this = THIS; + + snprintf(htime_file, PATH_MAX, "%s/%s", htime_dir, dname); + + iter = (htime_file + strlen(htime_file) - TIMESTAMP_LENGTH); + sscanf(iter, "%lu", min_ts); + + ret = sys_stat(htime_file, &stbuf); + if (ret) { + ret = -1; + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_HTIME_ERROR, + "op=stat", "path=%s", htime_file, NULL); + goto out; + } + + /* ignore everything except regular files */ + if (!S_ISREG(stbuf.st_mode)) { + ret = -2; + goto out; + } + + *fd = open(htime_file, O_RDONLY); + if (*fd < 0) { + ret = -1; + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_HTIME_ERROR, + "op=open", "path=%s", htime_file, NULL); + goto out; + } + + /* Looks good, extract max timestamp */ + ret = sys_fgetxattr(*fd, HTIME_KEY, x_value, sizeof(x_value)); + if (ret < 0) { + ret = -1; + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_LIB_MSG_GET_XATTR_FAILED, "path=%s", htime_file, + NULL); + goto out; + } + + sscanf(x_value, "%lu:%lu", max_ts, total); + gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_LIB_MSG_MIN_MAX_INFO, + "min=%lu", *min_ts, "max=%lu", *max_ts, "total_changelogs=%lu", + *total, NULL); + + ret = 0; - snprintf (htime_file, PATH_MAX, "%s/%s", htime_dir, dname); - - iter = (htime_file + strlen (htime_file) - TIMESTAMP_LENGTH); - sscanf (iter ,"%lu",min_ts); +out: + return ret; +} - ret = sys_stat (htime_file, &stbuf); +/* gf_history_changelog returns actual_end and spawns threads to + * parse historical changelogs. The return values are as follows. + * 0 : On success + * 1 : Successful, but partial historical changelogs available, + * end time falls into different htime file or future time + * -2 : Error, requested historical changelog not available, not + * even partial + * -1 : On any error + */ +int +gf_history_changelog(char *changelog_dir, unsigned long start, + unsigned long end, int n_parallel, + unsigned long *actual_end) +{ + int ret = 0; + int len = -1; + int fd = -1; + int n_read = -1; + unsigned long min_ts = 0; + unsigned long max_ts = 0; + unsigned long end2 = 0; + unsigned long ts1 = 0; + unsigned long ts2 = 0; + unsigned long to = 0; + unsigned long from = 0; + unsigned long total_changelog = 0; + xlator_t *this = NULL; + gf_changelog_journal_t *jnl = NULL; + gf_changelog_journal_t *hist_jnl = NULL; + gf_changelog_history_data_t *hist_data = NULL; + DIR *dirp = NULL; + struct dirent *entry = NULL; + struct dirent scratch[2] = { + { + 0, + }, + }; + pthread_t consume_th = 0; + char htime_dir[PATH_MAX] = { + 0, + }; + char buffer[PATH_MAX] = { + 0, + }; + gf_boolean_t partial_history = _gf_false; + + pthread_attr_t attr; + + this = THIS; + if (!this) { + ret = -1; + goto out; + } + + ret = pthread_attr_init(&attr); + if (ret != 0) { + gf_msg(this->name, GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_PTHREAD_ERROR, + "Pthread init failed"); + return -1; + } + + jnl = (gf_changelog_journal_t *)GF_CHANGELOG_GET_API_PTR(this); + if (!jnl) { + ret = -1; + goto out; + } + + hist_jnl = (gf_changelog_journal_t *)jnl->hist_jnl; + if (!hist_jnl) { + ret = -1; + goto out; + } + + gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_LIB_MSG_REQUESTING_INFO, + "start=%lu", start, "end=%lu", end, NULL); + + /* basic sanity check */ + if (start > end || n_parallel <= 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_HIST_FAILED, + "start=%lu", start, "end=%lu", end, "thread_count=%d", + n_parallel, NULL); + ret = -1; + goto out; + } + + /* cap parallelism count */ + if (n_parallel > MAX_PARALLELS) + n_parallel = MAX_PARALLELS; + + CHANGELOG_FILL_HTIME_DIR(changelog_dir, htime_dir); + + dirp = sys_opendir(htime_dir); + if (dirp == NULL) { + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_HTIME_ERROR, + "op=opendir", "path=%s", htime_dir, NULL); + ret = -1; + goto out; + } + + for (;;) { + errno = 0; + + entry = sys_readdir(dirp, scratch); + + if (!entry || errno != 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_LIB_MSG_HIST_FAILED, "start=%lu", start, + "end=%lu", end, NULL); + ret = -2; + break; + } + + ret = gf_changelog_extract_min_max(entry->d_name, htime_dir, &fd, + &total_changelog, &min_ts, &max_ts); if (ret) { - ret = -1; - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_LIB_MSG_HTIME_ERROR, - "stat() failed on htime file %s", - htime_file); - goto out; - } - - /* ignore everything except regular files */ - if (!S_ISREG (stbuf.st_mode)) { - ret = -2; - goto out; + if (-2 == ret) + continue; + goto out; } - *fd = open (htime_file, O_RDONLY); - if (*fd < 0) { + if (start >= min_ts && start < max_ts) { + /** + * TODO: handle short reads later... + */ + n_read = sys_read(fd, buffer, PATH_MAX); + if (n_read < 0) { ret = -1; - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_LIB_MSG_HTIME_ERROR, - "open() failed for htime %s", - htime_file); + gf_msg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_LIB_MSG_READ_ERROR, + "unable to read htime file"); goto out; - } + } - /* Looks good, extract max timestamp */ - ret = sys_fgetxattr (*fd, HTIME_KEY, x_value, sizeof (x_value)); - if (ret < 0) { - ret = -1; - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_LIB_MSG_GET_XATTR_FAILED, - "error extracting max timstamp from htime file" - " %s", htime_file); - goto out; - } + len = strlen(buffer); - sscanf (x_value, "%lu:%lu", max_ts, total); - gf_msg (this->name, GF_LOG_INFO, 0, - CHANGELOG_LIB_MSG_TOTAL_LOG_INFO, - "MIN: %lu, MAX: %lu, TOTAL CHANGELOGS: %lu", - *min_ts, *max_ts, *total); - - ret = 0; - - out: - return ret; -} - -int -gf_history_changelog (char* changelog_dir, unsigned long start, - unsigned long end, int n_parallel, - unsigned long *actual_end) -{ - int ret = 0; - int len = -1; - int fd = -1; - int n_read = -1; - unsigned long min_ts = 0; - unsigned long max_ts = 0; - unsigned long end2 = 0; - unsigned long ts1 = 0; - unsigned long ts2 = 0; - unsigned long to = 0; - unsigned long from = 0; - unsigned long total_changelog = 0; - xlator_t *this = NULL; - gf_changelog_journal_t *jnl = NULL; - gf_changelog_journal_t *hist_jnl = NULL; - gf_changelog_history_data_t *hist_data = NULL; - DIR *dirp = NULL; - struct dirent *dp = NULL; - pthread_t consume_th = 0; - char htime_dir[PATH_MAX] = {0,}; - char buffer[PATH_MAX] = {0,}; - - pthread_attr_t attr; - - ret = pthread_attr_init (&attr); - if (ret != 0) { - return -1; - } + /** + * search @start in the htime file returning it's index + * (@from) + */ + from = gf_history_b_search(fd, start, 0, total_changelog - 1, len); - this = THIS; - if (!this) { + /* ensuring correctness of gf_b_search */ + if (gf_history_check(fd, from, start, len) != 0) { ret = -1; - goto out; - } - - jnl = (gf_changelog_journal_t *) GF_CHANGELOG_GET_API_PTR (this); - if (!jnl) { + gf_smsg(this->name, GF_LOG_ERROR, 0, + CHANGELOG_LIB_MSG_GET_TIME_ERROR, "for=start", + "start=%lu", start, "idx=%lu", from, NULL); + goto out; + } + + end2 = (end <= max_ts) ? end : max_ts; + + /* Check if end falls out of same HTIME file. The end + * falling to a different htime file or changelog + * disable-enable is detected only after 20 seconds. + * This is required because, applications generally + * asks historical changelogs till current time and + * it is possible changelog is not rolled over yet. + * So, buffer time of default rollover time plus 5 + * seconds is subtracted. If the application requests + * the end time with in half a minute of changelog + * disable, it's not detected as changelog disable and + * it's application's responsibility to retry after + * 20 seconds before confirming it as partial history. + */ + if ((end - 20) > max_ts) { + partial_history = _gf_true; + } + + /** + * search @end2 in htime file returning it's index (@to) + */ + to = gf_history_b_search(fd, end2, 0, total_changelog - 1, len); + + if (gf_history_check(fd, to, end2, len) != 0) { ret = -1; + gf_smsg(this->name, GF_LOG_ERROR, 0, + CHANGELOG_LIB_MSG_GET_TIME_ERROR, "for=end", + "start=%lu", end2, "idx=%lu", to, NULL); goto out; - } + } - hist_jnl = (gf_changelog_journal_t *) jnl->hist_jnl; - if (!hist_jnl) { - ret = -1; + ret = gf_history_get_timestamp(fd, from, len, &ts1); + if (ret == -1) goto out; - } - /* basic sanity check */ - if (start > end || n_parallel <= 0) { - ret = -1; + ret = gf_history_get_timestamp(fd, to, len, &ts2); + if (ret == -1) goto out; - } - /* cap parallelism count */ - if (n_parallel > MAX_PARALLELS) - n_parallel = MAX_PARALLELS; + gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_LIB_MSG_FINAL_INFO, + "from=%lu", ts1, "to=%lu", ts2, "changes=%lu", + (to - from + 1), NULL); - CHANGELOG_FILL_HTIME_DIR (changelog_dir, htime_dir); + hist_data = GF_CALLOC(1, sizeof(gf_changelog_history_data_t), + gf_changelog_mt_history_data_t); - dirp = sys_opendir (htime_dir); - if (dirp == NULL) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_LIB_MSG_HTIME_ERROR, - "open dir on htime failed : %s", - htime_dir); + hist_data->htime_fd = fd; + hist_data->from = from; + hist_data->to = to; + hist_data->len = len; + hist_data->n_parallel = n_parallel; + hist_data->this = this; + + ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); + if (ret != 0) { + gf_msg(this->name, GF_LOG_ERROR, ret, + CHANGELOG_LIB_MSG_PTHREAD_ERROR, + "unable to sets the detach" + " state attribute"); ret = -1; goto out; - } + } - while ((dp = sys_readdir (dirp)) != NULL) { - ret = gf_changelog_extract_min_max (dp->d_name, htime_dir, - &fd, &total_changelog, - &min_ts, &max_ts); - if (ret) { - if (-2 == ret) - continue; - goto out; - } - - if (start >= min_ts && start < max_ts) { - /** - * TODO: handle short reads later... - */ - n_read = sys_read (fd, buffer, PATH_MAX); - if (n_read < 0) { - ret = -1; - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_LIB_MSG_READ_ERROR, - "unable to read htime file"); - goto out; - } - - len = strlen (buffer); - - /** - * search @start in the htime file returning it's index - * (@from) - */ - from = gf_history_b_search (fd, start, 0, - total_changelog - 1, len); - - /* ensuring correctness of gf_b_search */ - if (gf_history_check (fd, from, start, len) != 0) { - ret = -1; - gf_msg (this->name, GF_LOG_ERROR, 0, - CHANGELOG_LIB_MSG_GET_TIME_ERROR, - "wrong result for start: %lu idx: %lu", - start, from); - goto out; - } - - end2 = (end <= max_ts) ? end : max_ts; - - /** - * search @end2 in htime file returning it's index (@to) - */ - to = gf_history_b_search (fd, end2, - 0, total_changelog - 1, len); - - if (gf_history_check (fd, to, end2, len) != 0) { - ret = -1; - gf_msg (this->name, GF_LOG_ERROR, 0, - CHANGELOG_LIB_MSG_GET_TIME_ERROR, - "wrong result for start: %lu idx: %lu", - end2, to); - goto out; - } - - ret = gf_history_get_timestamp (fd, from, len, &ts1); - if (ret == -1) - goto out; - - ret = gf_history_get_timestamp (fd, to, len, &ts2); - if (ret == -1) - goto out; - - gf_msg (this->name, GF_LOG_INFO, 0, - CHANGELOG_LIB_MSG_TOTAL_LOG_INFO, - "FINAL: from: %lu, to: %lu, changes: %lu", - ts1, ts2, (to - from + 1)); - - hist_data = GF_CALLOC (1, - sizeof (gf_changelog_history_data_t), - gf_changelog_mt_history_data_t); - - hist_data->htime_fd = fd; - hist_data->from = from; - hist_data->to = to; - hist_data->len = len; - hist_data->n_parallel = n_parallel; - hist_data->this = this; - - ret = pthread_attr_setdetachstate - (&attr, PTHREAD_CREATE_DETACHED); - if (ret != 0) { - gf_msg (this->name, GF_LOG_ERROR, ret, - CHANGELOG_LIB_MSG_PTHREAD_ERROR, - "unable to sets the detach" - " state attribute"); - ret = -1; - goto out; - } - - /* spawn a thread for background parsing & publishing */ - ret = pthread_create (&consume_th, &attr, - gf_history_consume, hist_data); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, ret, - CHANGELOG_LIB_MSG_THREAD_CREATION_FAILED - , "creation of consume parent-thread" - " failed."); - ret = -1; - goto out; - } - - goto out; - - } /* end of range check */ - - } /* end of readdir() */ - - if (!from || !to) + /* spawn a thread for background parsing & publishing */ + ret = gf_thread_create(&consume_th, &attr, gf_history_consume, + hist_data, "cloghcon"); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, ret, + CHANGELOG_LIB_MSG_THREAD_CREATION_FAILED, + "creation of consume parent-thread" + " failed."); ret = -1; + goto out; + } -out: - if (dirp != NULL) - sys_closedir (dirp); - - if (ret < 0) { - if (fd != -1) - sys_close (fd); - GF_FREE (hist_data); - (void) pthread_attr_destroy (&attr); + goto out; - return ret; + } else { /* end of range check */ + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_LIB_MSG_HIST_FAILED, "start=%lu", start, + "end=%lu", end, "chlog_min=%lu", min_ts, "chlog_max=%lu", + max_ts, NULL); } + } /* end of readdir() */ - hist_jnl->hist_done = 1; - *actual_end = ts2; +out: + if (dirp != NULL) + (void)sys_closedir(dirp); + + if (ret < 0) { + if (fd != -1) + (void)sys_close(fd); + GF_FREE(hist_data); + (void)pthread_attr_destroy(&attr); return ret; + } + + hist_jnl->hist_done = 1; + *actual_end = ts2; + + if (partial_history) { + ret = 1; + } + + return ret; } diff --git a/xlators/features/changelog/src/Makefile.am b/xlators/features/changelog/src/Makefile.am index e851cc7819a..eee7dfa238d 100644 --- a/xlators/features/changelog/src/Makefile.am +++ b/xlators/features/changelog/src/Makefile.am @@ -3,25 +3,26 @@ xlator_LTLIBRARIES = changelog.la xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features noinst_HEADERS = changelog-helpers.h changelog-mem-types.h changelog-rt.h \ - changelog-rpc-common.h changelog-misc.h changelog-encoders.h \ - changelog-rpc-common.h changelog-rpc.h changelog-ev-handle.h \ - changelog-messages.h + changelog-rpc-common.h changelog-misc.h changelog-encoders.h \ + changelog-rpc-common.h changelog-rpc.h changelog-ev-handle.h \ + changelog-messages.h -changelog_la_LDFLAGS = $(GF_XLATOR_DEFAULT_LDFLAGS) +changelog_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) changelog_la_SOURCES = changelog.c changelog-rt.c changelog-helpers.c \ - changelog-encoders.c changelog-rpc.c changelog-barrier.c \ - changelog-rpc-common.c changelog-ev-handle.c + changelog-encoders.c changelog-rpc.c changelog-barrier.c \ + changelog-rpc-common.c changelog-ev-handle.c changelog_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ - $(top_builddir)/rpc/xdr/src/libgfxdr.la \ - $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la + $(top_builddir)/rpc/xdr/src/libgfxdr.la \ + $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ - -I$(top_srcdir)/rpc/xdr/src -I$(top_srcdir)/rpc/rpc-lib/src \ - -I$(top_srcdir)/rpc/rpc-transport/socket/src \ - -I$(top_srcdir)/xlators/features/changelog/lib/src/ \ - -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS) \ - -DDATADIR=\"$(localstatedir)\" + -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \ + -I$(top_srcdir)/rpc/rpc-lib/src \ + -I$(top_srcdir)/rpc/rpc-transport/socket/src \ + -I$(top_srcdir)/xlators/features/changelog/lib/src/ \ + -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS) \ + -DDATADIR=\"$(localstatedir)\" AM_CFLAGS = -Wall $(GF_CFLAGS) diff --git a/xlators/features/changelog/src/changelog-barrier.c b/xlators/features/changelog/src/changelog-barrier.c index ac1eb0e4397..0fb89ddb127 100644 --- a/xlators/features/changelog/src/changelog-barrier.c +++ b/xlators/features/changelog/src/changelog-barrier.c @@ -10,125 +10,122 @@ #include "changelog-helpers.h" #include "changelog-messages.h" -#include "call-stub.h" +#include <glusterfs/call-stub.h> /* Enqueue a stub*/ void -__chlog_barrier_enqueue (xlator_t *this, call_stub_t *stub) +__chlog_barrier_enqueue(xlator_t *this, call_stub_t *stub) { - changelog_priv_t *priv = NULL; + changelog_priv_t *priv = NULL; - priv = this->private; - GF_ASSERT (priv); + priv = this->private; + GF_ASSERT(priv); - list_add_tail (&stub->list, &priv->queue); - priv->queue_size++; + list_add_tail(&stub->list, &priv->queue); + priv->queue_size++; - return; + return; } /* Dequeue a stub */ call_stub_t * -__chlog_barrier_dequeue (xlator_t *this, struct list_head *queue) +__chlog_barrier_dequeue(xlator_t *this, struct list_head *queue) { - call_stub_t *stub = NULL; - changelog_priv_t *priv = NULL; + call_stub_t *stub = NULL; + changelog_priv_t *priv = NULL; - priv = this->private; - GF_ASSERT (priv); + priv = this->private; + GF_ASSERT(priv); - if (list_empty (queue)) - goto out; + if (list_empty(queue)) + goto out; - stub = list_entry (queue->next, call_stub_t, list); - list_del_init (&stub->list); + stub = list_entry(queue->next, call_stub_t, list); + list_del_init(&stub->list); out: - return stub; + return stub; } /* Dequeue all the stubs and call corresponding resume functions */ void -chlog_barrier_dequeue_all (xlator_t *this, struct list_head *queue) +chlog_barrier_dequeue_all(xlator_t *this, struct list_head *queue) { - call_stub_t *stub = NULL; + call_stub_t *stub = NULL; - gf_msg (this->name, GF_LOG_INFO, 0, - CHANGELOG_MSG_BARRIER_INFO, - "Dequeuing all the changelog barriered fops"); + gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_DEQUEUING_BARRIER_FOPS, + NULL); - while ((stub = __chlog_barrier_dequeue (this, queue))) - call_resume (stub); + while ((stub = __chlog_barrier_dequeue(this, queue))) + call_resume(stub); - gf_msg (this->name, GF_LOG_INFO, 0, - CHANGELOG_MSG_BARRIER_INFO, - "Dequeuing changelog barriered fops is finished"); - return; + gf_smsg(this->name, GF_LOG_INFO, 0, + CHANGELOG_MSG_DEQUEUING_BARRIER_FOPS_FINISHED, NULL); + return; } /* Function called on changelog barrier timeout */ void -chlog_barrier_timeout (void *data) +chlog_barrier_timeout(void *data) { - xlator_t *this = NULL; - changelog_priv_t *priv = NULL; - struct list_head queue = {0,}; + xlator_t *this = NULL; + changelog_priv_t *priv = NULL; + struct list_head queue = { + 0, + }; - this = data; - THIS = this; - priv = this->private; + this = data; + THIS = this; + priv = this->private; - INIT_LIST_HEAD (&queue); + INIT_LIST_HEAD(&queue); - gf_msg (this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_BARRIER_ERROR, - "Disabling changelog barrier because of the timeout."); + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_BARRIER_TIMEOUT, NULL); - LOCK (&priv->lock); - { - __chlog_barrier_disable (this, &queue); - } - UNLOCK (&priv->lock); + LOCK(&priv->lock); + { + __chlog_barrier_disable(this, &queue); + } + UNLOCK(&priv->lock); - chlog_barrier_dequeue_all (this, &queue); + chlog_barrier_dequeue_all(this, &queue); - return; + return; } /* Disable changelog barrier enable flag */ void -__chlog_barrier_disable (xlator_t *this, struct list_head *queue) +__chlog_barrier_disable(xlator_t *this, struct list_head *queue) { - changelog_priv_t *priv = this->private; - GF_ASSERT (priv); + changelog_priv_t *priv = this->private; + GF_ASSERT(priv); - if (priv->timer) { - gf_timer_call_cancel (this->ctx, priv->timer); - priv->timer = NULL; - } + if (priv->timer) { + gf_timer_call_cancel(this->ctx, priv->timer); + priv->timer = NULL; + } - list_splice_init (&priv->queue, queue); - priv->queue_size = 0; - priv->barrier_enabled = _gf_false; + list_splice_init(&priv->queue, queue); + priv->queue_size = 0; + priv->barrier_enabled = _gf_false; } /* Enable chagelog barrier enable with timer */ int -__chlog_barrier_enable (xlator_t *this, changelog_priv_t *priv) +__chlog_barrier_enable(xlator_t *this, changelog_priv_t *priv) { - int ret = -1; - - priv->timer = gf_timer_call_after (this->ctx, priv->timeout, - chlog_barrier_timeout, (void *)this); - if (!priv->timer) { - gf_msg (this->name, GF_LOG_CRITICAL, 0, - CHANGELOG_MSG_BARRIER_ERROR, - "Couldn't add changelog barrier timeout event."); - goto out; - } - - priv->barrier_enabled = _gf_true; - ret = 0; + int ret = -1; + + priv->timer = gf_timer_call_after(this->ctx, priv->timeout, + chlog_barrier_timeout, (void *)this); + if (!priv->timer) { + gf_smsg(this->name, GF_LOG_CRITICAL, 0, + CHANGELOG_MSG_TIMEOUT_ADD_FAILED, NULL); + goto out; + } + + priv->barrier_enabled = _gf_true; + ret = 0; out: - return ret; + return ret; } diff --git a/xlators/features/changelog/src/changelog-encoders.c b/xlators/features/changelog/src/changelog-encoders.c index 95030236636..63754516c2e 100644 --- a/xlators/features/changelog/src/changelog-encoders.c +++ b/xlators/features/changelog/src/changelog-encoders.c @@ -11,117 +11,117 @@ #include "changelog-encoders.h" size_t -entry_fn (void *data, char *buffer, gf_boolean_t encode) +entry_fn(void *data, char *buffer, gf_boolean_t encode) { - char *tmpbuf = NULL; - size_t bufsz = 0; - struct changelog_entry_fields *ce = NULL; - - ce = (struct changelog_entry_fields *) data; - - if (encode) { - tmpbuf = uuid_utoa (ce->cef_uuid); - CHANGELOG_FILL_BUFFER (buffer, bufsz, tmpbuf, strlen (tmpbuf)); - } else { - CHANGELOG_FILL_BUFFER (buffer, bufsz, - ce->cef_uuid, sizeof (uuid_t)); - } - - CHANGELOG_FILL_BUFFER (buffer, bufsz, "/", 1); - CHANGELOG_FILL_BUFFER (buffer, bufsz, - ce->cef_bname, strlen (ce->cef_bname)); - return bufsz; + char *tmpbuf = NULL; + size_t bufsz = 0; + struct changelog_entry_fields *ce = NULL; + + ce = (struct changelog_entry_fields *)data; + + if (encode) { + tmpbuf = uuid_utoa(ce->cef_uuid); + CHANGELOG_FILL_BUFFER(buffer, bufsz, tmpbuf, strlen(tmpbuf)); + } else { + CHANGELOG_FILL_BUFFER(buffer, bufsz, ce->cef_uuid, sizeof(uuid_t)); + } + + CHANGELOG_FILL_BUFFER(buffer, bufsz, "/", 1); + CHANGELOG_FILL_BUFFER(buffer, bufsz, ce->cef_bname, strlen(ce->cef_bname)); + return bufsz; } size_t -del_entry_fn (void *data, char *buffer, gf_boolean_t encode) +del_entry_fn(void *data, char *buffer, gf_boolean_t encode) { - char *tmpbuf = NULL; - size_t bufsz = 0; - struct changelog_entry_fields *ce = NULL; - - ce = (struct changelog_entry_fields *) data; - - if (encode) { - tmpbuf = uuid_utoa (ce->cef_uuid); - CHANGELOG_FILL_BUFFER (buffer, bufsz, tmpbuf, strlen (tmpbuf)); - } else { - CHANGELOG_FILL_BUFFER (buffer, bufsz, - ce->cef_uuid, sizeof (uuid_t)); - } - - CHANGELOG_FILL_BUFFER (buffer, bufsz, "/", 1); - CHANGELOG_FILL_BUFFER (buffer, bufsz, - ce->cef_bname, strlen (ce->cef_bname)); - CHANGELOG_FILL_BUFFER (buffer, bufsz, "\0", 1); - - if (ce->cef_path[0] == '\0') { - CHANGELOG_FILL_BUFFER (buffer, bufsz, "\0", 1); - } else { - CHANGELOG_FILL_BUFFER (buffer, bufsz, - ce->cef_path, strlen (ce->cef_path)); - } - - return bufsz; + char *tmpbuf = NULL; + size_t bufsz = 0; + struct changelog_entry_fields *ce = NULL; + + ce = (struct changelog_entry_fields *)data; + + if (encode) { + tmpbuf = uuid_utoa(ce->cef_uuid); + CHANGELOG_FILL_BUFFER(buffer, bufsz, tmpbuf, strlen(tmpbuf)); + } else { + CHANGELOG_FILL_BUFFER(buffer, bufsz, ce->cef_uuid, sizeof(uuid_t)); + } + + CHANGELOG_FILL_BUFFER(buffer, bufsz, "/", 1); + CHANGELOG_FILL_BUFFER(buffer, bufsz, ce->cef_bname, strlen(ce->cef_bname)); + CHANGELOG_FILL_BUFFER(buffer, bufsz, "\0", 1); + + if (ce->cef_path[0] == '\0') { + CHANGELOG_FILL_BUFFER(buffer, bufsz, "\0", 1); + } else { + CHANGELOG_FILL_BUFFER(buffer, bufsz, ce->cef_path, + strlen(ce->cef_path)); + } + + return bufsz; } size_t -fop_fn (void *data, char *buffer, gf_boolean_t encode) +fop_fn(void *data, char *buffer, gf_boolean_t encode) { - char buf[10] = {0,}; - size_t bufsz = 0; - glusterfs_fop_t fop = 0; + char buf[10] = { + 0, + }; + size_t bufsz = 0; + glusterfs_fop_t fop = 0; - fop = *(glusterfs_fop_t *) data; + fop = *(glusterfs_fop_t *)data; - if (encode) { - (void) snprintf (buf, sizeof (buf), "%d", fop); - CHANGELOG_FILL_BUFFER (buffer, bufsz, buf, strlen (buf)); - } else - CHANGELOG_FILL_BUFFER (buffer, bufsz, &fop, sizeof (fop)); + if (encode) { + (void)snprintf(buf, sizeof(buf), "%d", fop); + CHANGELOG_FILL_BUFFER(buffer, bufsz, buf, strlen(buf)); + } else + CHANGELOG_FILL_BUFFER(buffer, bufsz, &fop, sizeof(fop)); - return bufsz; + return bufsz; } size_t -number_fn (void *data, char *buffer, gf_boolean_t encode) +number_fn(void *data, char *buffer, gf_boolean_t encode) { - size_t bufsz = 0; - unsigned int nr = 0; - char buf[20] = {0,}; + size_t bufsz = 0; + unsigned int nr = 0; + char buf[20] = { + 0, + }; - nr = *(unsigned int *) data; + nr = *(unsigned int *)data; - if (encode) { - (void) snprintf (buf, sizeof (buf), "%u", nr); - CHANGELOG_FILL_BUFFER (buffer, bufsz, buf, strlen (buf)); - } else - CHANGELOG_FILL_BUFFER (buffer, bufsz, &nr, sizeof (unsigned int)); + if (encode) { + (void)snprintf(buf, sizeof(buf), "%u", nr); + CHANGELOG_FILL_BUFFER(buffer, bufsz, buf, strlen(buf)); + } else + CHANGELOG_FILL_BUFFER(buffer, bufsz, &nr, sizeof(unsigned int)); - return bufsz; + return bufsz; } void -entry_free_fn (void *data) +entry_free_fn(void *data) { - changelog_opt_t *co = data; + changelog_opt_t *co = data; - if (!co) - return; + if (!co) + return; - GF_FREE (co->co_entry.cef_bname); + GF_FREE(co->co_entry.cef_bname); } void -del_entry_free_fn (void *data) +del_entry_free_fn(void *data) { - changelog_opt_t *co = data; + changelog_opt_t *co = data; - if (!co) - return; + if (!co) + return; - GF_FREE (co->co_entry.cef_bname); - GF_FREE (co->co_entry.cef_path); + GF_FREE(co->co_entry.cef_bname); + GF_FREE(co->co_entry.cef_path); } /** @@ -129,108 +129,104 @@ del_entry_free_fn (void *data) */ static void -changelog_encode_write_xtra (changelog_log_data_t *cld, - char *buffer, size_t *off, gf_boolean_t encode) +changelog_encode_write_xtra(changelog_log_data_t *cld, char *buffer, + size_t *off, gf_boolean_t encode) { - int i = 0; - size_t offset = 0; - void *data = NULL; - changelog_opt_t *co = NULL; - - offset = *off; - - co = (changelog_opt_t *) cld->cld_ptr; - - for (; i < cld->cld_xtra_records; i++, co++) { - CHANGELOG_FILL_BUFFER (buffer, offset, "\0", 1); - - switch (co->co_type) { - case CHANGELOG_OPT_REC_FOP: - data = &co->co_fop; - break; - case CHANGELOG_OPT_REC_ENTRY: - data = &co->co_entry; - break; - case CHANGELOG_OPT_REC_UINT32: - data = &co->co_uint32; - break; - } - - if (co->co_convert) - offset += co->co_convert (data, - buffer + offset, encode); - else /* no coversion: write it out as it is */ - CHANGELOG_FILL_BUFFER (buffer, offset, - data, co->co_len); + int i = 0; + size_t offset = 0; + void *data = NULL; + changelog_opt_t *co = NULL; + + offset = *off; + + co = (changelog_opt_t *)cld->cld_ptr; + + for (; i < cld->cld_xtra_records; i++, co++) { + CHANGELOG_FILL_BUFFER(buffer, offset, "\0", 1); + + switch (co->co_type) { + case CHANGELOG_OPT_REC_FOP: + data = &co->co_fop; + break; + case CHANGELOG_OPT_REC_ENTRY: + data = &co->co_entry; + break; + case CHANGELOG_OPT_REC_UINT32: + data = &co->co_uint32; + break; } - *off = offset; + if (co->co_convert) + offset += co->co_convert(data, buffer + offset, encode); + else /* no coversion: write it out as it is */ + CHANGELOG_FILL_BUFFER(buffer, offset, data, co->co_len); + } + + *off = offset; } int -changelog_encode_ascii (xlator_t *this, changelog_log_data_t *cld) +changelog_encode_ascii(xlator_t *this, changelog_log_data_t *cld) { - size_t off = 0; - size_t gfid_len = 0; - char *gfid_str = NULL; - char *buffer = NULL; - changelog_priv_t *priv = NULL; + size_t off = 0; + size_t gfid_len = 0; + char *gfid_str = NULL; + char *buffer = NULL; + changelog_priv_t *priv = NULL; - priv = this->private; + priv = this->private; - gfid_str = uuid_utoa (cld->cld_gfid); - gfid_len = strlen (gfid_str); + gfid_str = uuid_utoa(cld->cld_gfid); + gfid_len = strlen(gfid_str); - /* extra bytes for decorations */ - buffer = alloca (gfid_len + cld->cld_ptr_len + 10); - CHANGELOG_STORE_ASCII (priv, buffer, - off, gfid_str, gfid_len, cld); + /* extra bytes for decorations */ + buffer = alloca(gfid_len + cld->cld_ptr_len + 10); + CHANGELOG_STORE_ASCII(priv, buffer, off, gfid_str, gfid_len, cld); - if (cld->cld_xtra_records) - changelog_encode_write_xtra (cld, buffer, &off, _gf_true); + if (cld->cld_xtra_records) + changelog_encode_write_xtra(cld, buffer, &off, _gf_true); - CHANGELOG_FILL_BUFFER (buffer, off, "\0", 1); + CHANGELOG_FILL_BUFFER(buffer, off, "\0", 1); - return changelog_write_change (priv, buffer, off); + return changelog_write_change(priv, buffer, off); } int -changelog_encode_binary (xlator_t *this, changelog_log_data_t *cld) +changelog_encode_binary(xlator_t *this, changelog_log_data_t *cld) { - size_t off = 0; - char *buffer = NULL; - changelog_priv_t *priv = NULL; + size_t off = 0; + char *buffer = NULL; + changelog_priv_t *priv = NULL; - priv = this->private; + priv = this->private; - /* extra bytes for decorations */ - buffer = alloca (sizeof (uuid_t) + cld->cld_ptr_len + 10); - CHANGELOG_STORE_BINARY (priv, buffer, off, cld->cld_gfid, cld); + /* extra bytes for decorations */ + buffer = alloca(sizeof(uuid_t) + cld->cld_ptr_len + 10); + CHANGELOG_STORE_BINARY(priv, buffer, off, cld->cld_gfid, cld); - if (cld->cld_xtra_records) - changelog_encode_write_xtra (cld, buffer, &off, _gf_false); + if (cld->cld_xtra_records) + changelog_encode_write_xtra(cld, buffer, &off, _gf_false); - CHANGELOG_FILL_BUFFER (buffer, off, "\0", 1); + CHANGELOG_FILL_BUFFER(buffer, off, "\0", 1); - return changelog_write_change (priv, buffer, off); + return changelog_write_change(priv, buffer, off); } -static struct changelog_encoder -cb_encoder[] = { - [CHANGELOG_ENCODE_BINARY] = +static struct changelog_encoder cb_encoder[] = { + [CHANGELOG_ENCODE_BINARY] = { - .encoder = CHANGELOG_ENCODE_BINARY, - .encode = changelog_encode_binary, + .encoder = CHANGELOG_ENCODE_BINARY, + .encode = changelog_encode_binary, }, - [CHANGELOG_ENCODE_ASCII] = + [CHANGELOG_ENCODE_ASCII] = { - .encoder = CHANGELOG_ENCODE_ASCII, - .encode = changelog_encode_ascii, + .encoder = CHANGELOG_ENCODE_ASCII, + .encode = changelog_encode_ascii, }, }; void changelog_encode_change(changelog_priv_t *priv) { - priv->ce = &cb_encoder[priv->encode_mode]; + priv->ce = &cb_encoder[priv->encode_mode]; } diff --git a/xlators/features/changelog/src/changelog-encoders.h b/xlators/features/changelog/src/changelog-encoders.h index d6a50cc9ef7..26252696d56 100644 --- a/xlators/features/changelog/src/changelog-encoders.h +++ b/xlators/features/changelog/src/changelog-encoders.h @@ -11,41 +11,39 @@ #ifndef _CHANGELOG_ENCODERS_H #define _CHANGELOG_ENCODERS_H -#include "xlator.h" -#include "defaults.h" +#include <glusterfs/xlator.h> +#include <glusterfs/defaults.h> #include "changelog-helpers.h" -#define CHANGELOG_STORE_ASCII(priv, buf, off, gfid, gfid_len, cld) do { \ - CHANGELOG_FILL_BUFFER (buffer, off, \ - priv->maps[cld->cld_type], 1); \ - CHANGELOG_FILL_BUFFER (buffer, \ - off, gfid, gfid_len); \ - } while (0) +#define CHANGELOG_STORE_ASCII(priv, buf, off, gfid, gfid_len, cld) \ + do { \ + CHANGELOG_FILL_BUFFER(buffer, off, priv->maps[cld->cld_type], 1); \ + CHANGELOG_FILL_BUFFER(buffer, off, gfid, gfid_len); \ + } while (0) -#define CHANGELOG_STORE_BINARY(priv, buf, off, gfid, cld) do { \ - CHANGELOG_FILL_BUFFER (buffer, off, \ - priv->maps[cld->cld_type], 1); \ - CHANGELOG_FILL_BUFFER (buffer, \ - off, gfid, sizeof (uuid_t)); \ - } while (0) +#define CHANGELOG_STORE_BINARY(priv, buf, off, gfid, cld) \ + do { \ + CHANGELOG_FILL_BUFFER(buffer, off, priv->maps[cld->cld_type], 1); \ + CHANGELOG_FILL_BUFFER(buffer, off, gfid, sizeof(uuid_t)); \ + } while (0) size_t -entry_fn (void *data, char *buffer, gf_boolean_t encode); +entry_fn(void *data, char *buffer, gf_boolean_t encode); size_t -del_entry_fn (void *data, char *buffer, gf_boolean_t encode); +del_entry_fn(void *data, char *buffer, gf_boolean_t encode); size_t -fop_fn (void *data, char *buffer, gf_boolean_t encode); +fop_fn(void *data, char *buffer, gf_boolean_t encode); size_t -number_fn (void *data, char *buffer, gf_boolean_t encode); +number_fn(void *data, char *buffer, gf_boolean_t encode); void -entry_free_fn (void *data); +entry_free_fn(void *data); void -del_entry_free_fn (void *data); +del_entry_free_fn(void *data); int -changelog_encode_binary (xlator_t *, changelog_log_data_t *); +changelog_encode_binary(xlator_t *, changelog_log_data_t *); int -changelog_encode_ascii (xlator_t *, changelog_log_data_t *); +changelog_encode_ascii(xlator_t *, changelog_log_data_t *); void changelog_encode_change(changelog_priv_t *); diff --git a/xlators/features/changelog/src/changelog-ev-handle.c b/xlators/features/changelog/src/changelog-ev-handle.c index 79652a969bd..aa94459de5a 100644 --- a/xlators/features/changelog/src/changelog-ev-handle.c +++ b/xlators/features/changelog/src/changelog-ev-handle.c @@ -14,19 +14,19 @@ struct rpc_clnt_program changelog_ev_program; -#define NR_IOVEC (MAX_IOVEC - 3) +#define NR_IOVEC (MAX_IOVEC - 3) struct ev_rpc_vec { - int count; - struct iovec vector[NR_IOVEC]; + int count; + struct iovec vector[NR_IOVEC]; - /* sequence number */ - unsigned long seq; + /* sequence number */ + unsigned long seq; }; struct ev_rpc { - rbuf_list_t *rlist; - struct rpc_clnt *rpc; - struct ev_rpc_vec vec; + rbuf_list_t *rlist; + struct rpc_clnt *rpc; + struct ev_rpc_vec vec; }; /** @@ -35,205 +35,229 @@ struct ev_rpc { * intelligence can be built into the server. */ int -changelog_event_dispatch_cbk (struct rpc_req *req, - struct iovec *iov, int count, void *myframe) +changelog_event_dispatch_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) { - return 0; + return 0; } /* dispatcher RPC */ int -changelog_dispatch_vec (call_frame_t *frame, xlator_t *this, - struct rpc_clnt *rpc, struct ev_rpc_vec *vec) +changelog_dispatch_vec(call_frame_t *frame, xlator_t *this, + struct rpc_clnt *rpc, struct ev_rpc_vec *vec) { - struct timeval tv = {0,}; - changelog_event_req req = {0,}; - - (void) gettimeofday (&tv, NULL); - - /** - * Event dispatch RPC header contains a sequence number for each - * dispatch. This allows the reciever to order the request before - * processing. - */ - req.seq = vec->seq; - req.tv_sec = tv.tv_sec; - req.tv_usec = tv.tv_usec; - - return changelog_rpc_sumbit_req (rpc, (void *)&req, - frame, &changelog_ev_program, - CHANGELOG_REV_PROC_EVENT, - vec->vector, vec->count, NULL, - this, changelog_event_dispatch_cbk, - (xdrproc_t) xdr_changelog_event_req); - } - - int - changelog_event_dispatch_rpc (call_frame_t *frame, xlator_t *this, void *data) - { - int idx = 0; - int count = 0; - int ret = 0; - unsigned long range = 0; - unsigned long sequence = 0; - rbuf_iovec_t *rvec = NULL; - struct ev_rpc *erpc = NULL; - struct rlist_iter riter = {{0,},}; - - /* dispatch NR_IOVEC IO vectors at a time. */ - - erpc = data; - RLIST_GET_SEQ (erpc->rlist, sequence, range); - - rlist_iter_init (&riter, erpc->rlist); - - rvec_for_each_entry (rvec, &riter) { - idx = count % NR_IOVEC; - if (++count == NR_IOVEC) { - erpc->vec.vector[idx] = rvec->iov; - erpc->vec.seq = sequence++; - erpc->vec.count = NR_IOVEC; - - ret = changelog_dispatch_vec (frame, this, - erpc->rpc, &erpc->vec); - if (ret) - break; - count = 0; - continue; - } - - erpc->vec.vector[idx] = rvec->iov; - } - - if (ret) - goto error_return; - - idx = count % NR_IOVEC; - if (idx) { - erpc->vec.seq = sequence; - erpc->vec.count = idx; - - ret = changelog_dispatch_vec (frame, this, - erpc->rpc, &erpc->vec); - } - - error_return: - return ret; + struct timeval tv = { + 0, + }; + changelog_event_req req = { + 0, + }; + + (void)gettimeofday(&tv, NULL); + + /** + * Event dispatch RPC header contains a sequence number for each + * dispatch. This allows the receiver to order the request before + * processing. + */ + req.seq = vec->seq; + req.tv_sec = tv.tv_sec; + req.tv_usec = tv.tv_usec; + + return changelog_rpc_sumbit_req( + rpc, (void *)&req, frame, &changelog_ev_program, + CHANGELOG_REV_PROC_EVENT, vec->vector, vec->count, NULL, this, + changelog_event_dispatch_cbk, (xdrproc_t)xdr_changelog_event_req); } int -changelog_rpc_notify (struct rpc_clnt *rpc, - void *mydata, rpc_clnt_event_t event, void *data) +changelog_event_dispatch_rpc(call_frame_t *frame, xlator_t *this, void *data) { - xlator_t *this = NULL; - changelog_rpc_clnt_t *crpc = NULL; - changelog_clnt_t *c_clnt = NULL; - changelog_priv_t *priv = NULL; - changelog_ev_selector_t *selection = NULL; + int idx = 0; + int count = 0; + int ret = 0; + unsigned long sequence = 0; + rbuf_iovec_t *rvec = NULL; + struct ev_rpc *erpc = NULL; + struct rlist_iter riter = { + { + 0, + }, + }; + + /* dispatch NR_IOVEC IO vectors at a time. */ + + erpc = data; + sequence = erpc->rlist->seq[0]; + + rlist_iter_init(&riter, erpc->rlist); + + rvec_for_each_entry(rvec, &riter) + { + idx = count % NR_IOVEC; + if (++count == NR_IOVEC) { + erpc->vec.vector[idx] = rvec->iov; + erpc->vec.seq = sequence++; + erpc->vec.count = NR_IOVEC; - crpc = mydata; - this = crpc->this; - c_clnt = crpc->c_clnt; + ret = changelog_dispatch_vec(frame, this, erpc->rpc, &erpc->vec); + if (ret) + break; + count = 0; + continue; + } + + erpc->vec.vector[idx] = rvec->iov; + } - priv = this->private; + if (ret) + goto error_return; - switch (event) { + idx = count % NR_IOVEC; + if (idx) { + erpc->vec.seq = sequence; + erpc->vec.count = idx; + + ret = changelog_dispatch_vec(frame, this, erpc->rpc, &erpc->vec); + } + +error_return: + return ret; +} + +int +changelog_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event, + void *data) +{ + xlator_t *this = NULL; + changelog_rpc_clnt_t *crpc = NULL; + changelog_clnt_t *c_clnt = NULL; + changelog_priv_t *priv = NULL; + changelog_ev_selector_t *selection = NULL; + uint64_t clntcnt = 0; + uint64_t xprtcnt = 0; + + crpc = mydata; + this = crpc->this; + c_clnt = crpc->c_clnt; + + priv = this->private; + + switch (event) { case RPC_CLNT_CONNECT: - rpc_clnt_set_connected (&rpc->conn); - selection = &priv->ev_selection; + selection = &priv->ev_selection; + GF_ATOMIC_INC(priv->clntcnt); - LOCK (&c_clnt->wait_lock); + LOCK(&c_clnt->wait_lock); + { + LOCK(&c_clnt->active_lock); { - LOCK (&c_clnt->active_lock); - { - changelog_select_event (this, selection, - crpc->filter); - list_move_tail (&crpc->list, &c_clnt->active); - } - UNLOCK (&c_clnt->active_lock); + changelog_select_event(this, selection, crpc->filter); + list_move_tail(&crpc->list, &c_clnt->active); } - UNLOCK (&c_clnt->wait_lock); + UNLOCK(&c_clnt->active_lock); + } + UNLOCK(&c_clnt->wait_lock); - break; + break; case RPC_CLNT_DISCONNECT: - rpc_clnt_disable (crpc->rpc); - selection = &priv->ev_selection; + rpc_clnt_disable(crpc->rpc); - LOCK (&crpc->lock); - { - changelog_deselect_event (this, selection, - crpc->filter); - changelog_set_disconnect_flag (crpc, _gf_true); - } - UNLOCK (&crpc->lock); + /* rpc_clnt_disable doesn't unref the rpc. It just marks + * the rpc as disabled and cancels reconnection timer. + * Hence unref the rpc object to free it. + */ + rpc_clnt_unref(crpc->rpc); - break; + if (priv) + selection = &priv->ev_selection; + + LOCK(&crpc->lock); + { + if (selection) + changelog_deselect_event(this, selection, crpc->filter); + changelog_set_disconnect_flag(crpc, _gf_true); + } + UNLOCK(&crpc->lock); + LOCK(&c_clnt->active_lock); + { + list_del_init(&crpc->list); + } + UNLOCK(&c_clnt->active_lock); + + break; case RPC_CLNT_MSG: case RPC_CLNT_DESTROY: - break; - } - - return 0; + /* Free up mydata */ + changelog_rpc_clnt_unref(crpc); + clntcnt = GF_ATOMIC_DEC(priv->clntcnt); + xprtcnt = GF_ATOMIC_GET(priv->xprtcnt); + if (this->cleanup_starting) { + if (!clntcnt && !xprtcnt) + changelog_process_cleanup_event(this); + } + break; + case RPC_CLNT_PING: + break; + } + + return 0; } void * -changelog_ev_connector (void *data) +changelog_ev_connector(void *data) { - xlator_t *this = NULL; - changelog_clnt_t *c_clnt = NULL; - changelog_rpc_clnt_t *crpc = NULL; + xlator_t *this = NULL; + changelog_clnt_t *c_clnt = NULL; + changelog_rpc_clnt_t *crpc = NULL; - c_clnt = data; - this = c_clnt->this; + c_clnt = data; + this = c_clnt->this; - while (1) { - pthread_mutex_lock (&c_clnt->pending_lock); - { - while (list_empty (&c_clnt->pending)) - pthread_cond_wait (&c_clnt->pending_cond, - &c_clnt->pending_lock); - crpc = list_first_entry (&c_clnt->pending, - changelog_rpc_clnt_t, list); - crpc->rpc = - changelog_rpc_client_init (this, crpc, - crpc->sock, - changelog_rpc_notify); - if (!crpc->rpc) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_RPC_CONNECT_ERROR, - "failed to connect back.. <%s>", - crpc->sock); - crpc->cleanup (crpc); - goto mutex_unlock; - } - - LOCK (&c_clnt->wait_lock); - { - list_move_tail (&crpc->list, &c_clnt->waitq); - } - UNLOCK (&c_clnt->wait_lock); - } - mutex_unlock: - pthread_mutex_unlock (&c_clnt->pending_lock); + while (1) { + pthread_mutex_lock(&c_clnt->pending_lock); + { + while (list_empty(&c_clnt->pending)) + pthread_cond_wait(&c_clnt->pending_cond, &c_clnt->pending_lock); + crpc = list_first_entry(&c_clnt->pending, changelog_rpc_clnt_t, + list); + crpc->rpc = changelog_rpc_client_init(this, crpc, crpc->sock, + changelog_rpc_notify); + if (!crpc->rpc) { + gf_smsg(this->name, GF_LOG_ERROR, 0, + CHANGELOG_MSG_RPC_CONNECT_ERROR, "path=%s", crpc->sock, + NULL); + crpc->cleanup(crpc); + goto mutex_unlock; + } + + LOCK(&c_clnt->wait_lock); + { + list_move_tail(&crpc->list, &c_clnt->waitq); + } + UNLOCK(&c_clnt->wait_lock); } + mutex_unlock: + pthread_mutex_unlock(&c_clnt->pending_lock); + } - return NULL; + return NULL; } void -changelog_ev_cleanup_connections (xlator_t *this, changelog_clnt_t *c_clnt) +changelog_ev_cleanup_connections(xlator_t *this, changelog_clnt_t *c_clnt) { - int ret = 0; - changelog_rpc_clnt_t *crpc = NULL; + changelog_rpc_clnt_t *crpc = NULL; - /* cleanup active connections */ - LOCK (&c_clnt->active_lock); + /* cleanup active connections */ + LOCK(&c_clnt->active_lock); + { + list_for_each_entry(crpc, &c_clnt->active, list) { - list_for_each_entry (crpc, &c_clnt->active, list) { - rpc_clnt_disable (crpc->rpc); - } + rpc_clnt_disable(crpc->rpc); } - UNLOCK (&c_clnt->active_lock); + } + UNLOCK(&c_clnt->active_lock); } /** @@ -244,143 +268,145 @@ changelog_ev_cleanup_connections (xlator_t *this, changelog_clnt_t *c_clnt) */ static changelog_rpc_clnt_t * -get_client (changelog_clnt_t *c_clnt, struct list_head **next) +get_client(changelog_clnt_t *c_clnt, struct list_head **next) { - changelog_rpc_clnt_t *crpc = NULL; - - LOCK (&c_clnt->active_lock); - { - if (*next == &c_clnt->active) - goto unblock; - crpc = list_entry (*next, changelog_rpc_clnt_t, list); - changelog_rpc_clnt_ref (crpc); - *next = (*next)->next; - } - unblock: - UNLOCK (&c_clnt->active_lock); - - return crpc; + changelog_rpc_clnt_t *crpc = NULL; + + LOCK(&c_clnt->active_lock); + { + if (*next == &c_clnt->active) + goto unblock; + crpc = list_entry(*next, changelog_rpc_clnt_t, list); + /* ref rpc as DISCONNECT might unref the rpc asynchronously */ + changelog_rpc_clnt_ref(crpc); + rpc_clnt_ref(crpc->rpc); + *next = (*next)->next; + } +unblock: + UNLOCK(&c_clnt->active_lock); + + return crpc; } static void -put_client (changelog_clnt_t *c_clnt, changelog_rpc_clnt_t *crpc) +put_client(changelog_clnt_t *c_clnt, changelog_rpc_clnt_t *crpc) { - LOCK (&c_clnt->active_lock); - { - changelog_rpc_clnt_unref (crpc); - } - UNLOCK (&c_clnt->active_lock); + LOCK(&c_clnt->active_lock); + { + rpc_clnt_unref(crpc->rpc); + changelog_rpc_clnt_unref(crpc); + } + UNLOCK(&c_clnt->active_lock); } void -_dispatcher (rbuf_list_t *rlist, void *arg) +_dispatcher(rbuf_list_t *rlist, void *arg) { - int ret = 0; - xlator_t *this = NULL; - changelog_clnt_t *c_clnt = NULL; - changelog_rpc_clnt_t *crpc = NULL; - changelog_rpc_clnt_t *tmp = NULL; - struct ev_rpc erpc = {0,}; - struct list_head *next = NULL; - - c_clnt = arg; - this = c_clnt->this; - - erpc.rlist = rlist; - next = c_clnt->active.next; - - while (1) { - crpc = get_client (c_clnt, &next); - if (!crpc) - break; - erpc.rpc = crpc->rpc; - ret = changelog_invoke_rpc (this, crpc->rpc, - &changelog_ev_program, - CHANGELOG_REV_PROC_EVENT, &erpc); - put_client (c_clnt, crpc); - } + xlator_t *this = NULL; + changelog_clnt_t *c_clnt = NULL; + changelog_rpc_clnt_t *crpc = NULL; + struct ev_rpc erpc = { + 0, + }; + struct list_head *next = NULL; + + c_clnt = arg; + this = c_clnt->this; + + erpc.rlist = rlist; + next = c_clnt->active.next; + + while (1) { + crpc = get_client(c_clnt, &next); + if (!crpc) + break; + erpc.rpc = crpc->rpc; + (void)changelog_invoke_rpc(this, crpc->rpc, &changelog_ev_program, + CHANGELOG_REV_PROC_EVENT, &erpc); + put_client(c_clnt, crpc); + } } /** this is called under rotbuff's lock */ void -sequencer (rbuf_list_t *rlist, void *mydata) +sequencer(rbuf_list_t *rlist, void *mydata) { - unsigned long range = 0; - changelog_clnt_t *c_clnt = 0; + unsigned long range = 0; + changelog_clnt_t *c_clnt = 0; - c_clnt = mydata; + c_clnt = mydata; - range = (RLIST_ENTRY_COUNT (rlist)) / NR_IOVEC; - if ((RLIST_ENTRY_COUNT (rlist)) % NR_IOVEC) - range++; - RLIST_STORE_SEQ (rlist, c_clnt->sequence, range); + range = (RLIST_ENTRY_COUNT(rlist)) / NR_IOVEC; + if ((RLIST_ENTRY_COUNT(rlist)) % NR_IOVEC) + range++; + RLIST_STORE_SEQ(rlist, c_clnt->sequence, range); - c_clnt->sequence += range; + c_clnt->sequence += range; } void * -changelog_ev_dispatch (void *data) +changelog_ev_dispatch(void *data) { - int ret = 0; - void *opaque = NULL; - xlator_t *this = NULL; - changelog_clnt_t *c_clnt = NULL; - struct timeval tv = {0,}; - - c_clnt = data; - this = c_clnt->this; - - while (1) { - /* TODO: change this to be pthread cond based.. later */ - tv.tv_sec = 1; - tv.tv_usec = 0; - select (0, NULL, NULL, NULL, &tv); - - ret = rbuf_get_buffer (c_clnt->rbuf, - &opaque, sequencer, c_clnt); - if (ret != RBUF_CONSUMABLE) { - if (ret != RBUF_EMPTY) - gf_msg (this->name, GF_LOG_WARNING, 0, - CHANGELOG_MSG_BUFFER_STARVATION_ERROR, - "Failed to get buffer for RPC dispatch " - "[rbuf retval: %d]", ret); - continue; - } - - ret = rbuf_wait_for_completion (c_clnt->rbuf, - opaque, _dispatcher, c_clnt); - if (ret) - gf_msg (this->name, GF_LOG_WARNING, 0, - CHANGELOG_MSG_PUT_BUFFER_FAILED, - "failed to put buffer after consumption"); + int ret = 0; + void *opaque = NULL; + xlator_t *this = NULL; + changelog_clnt_t *c_clnt = NULL; + struct timeval tv = { + 0, + }; + + c_clnt = data; + this = c_clnt->this; + + while (1) { + /* TODO: change this to be pthread cond based.. later */ + + tv.tv_sec = 1; + tv.tv_usec = 0; + select(0, NULL, NULL, NULL, &tv); + + ret = rbuf_get_buffer(c_clnt->rbuf, &opaque, sequencer, c_clnt); + if (ret != RBUF_CONSUMABLE) { + if (ret != RBUF_EMPTY) + gf_smsg(this->name, GF_LOG_WARNING, 0, + CHANGELOG_MSG_BUFFER_STARVATION_ERROR, + "Failed to get buffer for RPC dispatch", + "rbuf_retval=%d", ret, NULL); + continue; } - return NULL; + ret = rbuf_wait_for_completion(c_clnt->rbuf, opaque, _dispatcher, + c_clnt); + if (ret) + gf_smsg(this->name, GF_LOG_WARNING, 0, + CHANGELOG_MSG_PUT_BUFFER_FAILED, NULL); + } + + return NULL; } void -changelog_ev_queue_connection (changelog_clnt_t *c_clnt, - changelog_rpc_clnt_t *crpc) +changelog_ev_queue_connection(changelog_clnt_t *c_clnt, + changelog_rpc_clnt_t *crpc) { - pthread_mutex_lock (&c_clnt->pending_lock); - { - list_add_tail (&crpc->list, &c_clnt->pending); - pthread_cond_signal (&c_clnt->pending_cond); - } - pthread_mutex_unlock (&c_clnt->pending_lock); + pthread_mutex_lock(&c_clnt->pending_lock); + { + list_add_tail(&crpc->list, &c_clnt->pending); + pthread_cond_signal(&c_clnt->pending_cond); + } + pthread_mutex_unlock(&c_clnt->pending_lock); } struct rpc_clnt_procedure changelog_ev_procs[CHANGELOG_REV_PROC_MAX] = { - [CHANGELOG_REV_PROC_NULL] = {"NULL", NULL}, - [CHANGELOG_REV_PROC_EVENT] = { - "EVENT DISPATCH", changelog_event_dispatch_rpc - }, + [CHANGELOG_REV_PROC_NULL] = {"NULL", NULL}, + [CHANGELOG_REV_PROC_EVENT] = {"EVENT DISPATCH", + changelog_event_dispatch_rpc}, }; struct rpc_clnt_program changelog_ev_program = { - .progname = "CHANGELOG EVENT DISPATCHER", - .prognum = CHANGELOG_REV_RPC_PROCNUM, - .progver = CHANGELOG_REV_RPC_PROCVER, - .numproc = CHANGELOG_REV_PROC_MAX, - .proctable = changelog_ev_procs, + .progname = "CHANGELOG EVENT DISPATCHER", + .prognum = CHANGELOG_REV_RPC_PROCNUM, + .progver = CHANGELOG_REV_RPC_PROCVER, + .numproc = CHANGELOG_REV_PROC_MAX, + .proctable = changelog_ev_procs, }; diff --git a/xlators/features/changelog/src/changelog-ev-handle.h b/xlators/features/changelog/src/changelog-ev-handle.h index eef0492a9ee..cc1af58a276 100644 --- a/xlators/features/changelog/src/changelog-ev-handle.h +++ b/xlators/features/changelog/src/changelog-ev-handle.h @@ -11,74 +11,67 @@ #ifndef __CHANGELOG_EV_HANDLE_H #define __CHANGELOG_EV_HANDLE_H -#include "list.h" -#include "xlator.h" +#include <glusterfs/list.h> +#include <glusterfs/xlator.h> #include "rpc-clnt.h" -#include "rot-buffs.h" +#include <glusterfs/rot-buffs.h> struct changelog_clnt; typedef struct changelog_rpc_clnt { - xlator_t *this; + xlator_t *this; - gf_lock_t lock; + gf_lock_t lock; - unsigned long ref; - gf_boolean_t disconnected; + gf_atomic_t ref; + gf_boolean_t disconnected; - unsigned int filter; - char sock[UNIX_PATH_MAX]; + unsigned int filter; + char sock[UNIX_PATH_MAX]; - struct changelog_clnt *c_clnt; /* back pointer to list holder */ + struct changelog_clnt *c_clnt; /* back pointer to list holder */ - struct rpc_clnt *rpc; /* RPC client endpoint */ + struct rpc_clnt *rpc; /* RPC client endpoint */ - struct list_head list; /* ->pending, ->waitq, ->active */ + struct list_head list; /* ->pending, ->waitq, ->active */ - void (*cleanup) - (struct changelog_rpc_clnt *); /* cleanup handler */ + void (*cleanup)(struct changelog_rpc_clnt *); /* cleanup handler */ } changelog_rpc_clnt_t; static inline void -changelog_rpc_clnt_ref (changelog_rpc_clnt_t *crpc) +changelog_rpc_clnt_ref(changelog_rpc_clnt_t *crpc) { - LOCK (&crpc->lock); - { - ++crpc->ref; - } - UNLOCK (&crpc->lock); + GF_ATOMIC_INC(crpc->ref); } static inline void -changelog_set_disconnect_flag (changelog_rpc_clnt_t *crpc, gf_boolean_t flag) +changelog_set_disconnect_flag(changelog_rpc_clnt_t *crpc, gf_boolean_t flag) { - crpc->disconnected = flag; + crpc->disconnected = flag; } static inline int -changelog_rpc_clnt_is_disconnected (changelog_rpc_clnt_t *crpc) +changelog_rpc_clnt_is_disconnected(changelog_rpc_clnt_t *crpc) { - return (crpc->disconnected == _gf_true); + return (crpc->disconnected == _gf_true); } static inline void -changelog_rpc_clnt_unref (changelog_rpc_clnt_t *crpc) +changelog_rpc_clnt_unref(changelog_rpc_clnt_t *crpc) { - gf_boolean_t gone = _gf_false; - - LOCK (&crpc->lock); - { - if (!(--crpc->ref) - && changelog_rpc_clnt_is_disconnected (crpc)) { - list_del (&crpc->list); - gone = _gf_true; - } - } - UNLOCK (&crpc->lock); - - if (gone) - crpc->cleanup (crpc); + gf_boolean_t gone = _gf_false; + uint64_t ref = 0; + + ref = GF_ATOMIC_DEC(crpc->ref); + + if (!ref && changelog_rpc_clnt_is_disconnected(crpc)) { + list_del(&crpc->list); + gone = _gf_true; + } + + if (gone) + crpc->cleanup(crpc); } /** @@ -106,35 +99,38 @@ changelog_rpc_clnt_unref (changelog_rpc_clnt_t *crpc) */ typedef struct changelog_clnt { - xlator_t *this; + xlator_t *this; - /* pending connections */ - pthread_mutex_t pending_lock; - pthread_cond_t pending_cond; - struct list_head pending; + /* pending connections */ + pthread_mutex_t pending_lock; + pthread_cond_t pending_cond; + struct list_head pending; - /* current active connections */ - gf_lock_t active_lock; - struct list_head active; + /* current active connections */ + gf_lock_t active_lock; + struct list_head active; - gf_lock_t wait_lock; - struct list_head waitq; + gf_lock_t wait_lock; + struct list_head waitq; - /* consumer part of rot-buffs */ - rbuf_t *rbuf; - unsigned long sequence; + /* consumer part of rot-buffs */ + rbuf_t *rbuf; + unsigned long sequence; } changelog_clnt_t; -void *changelog_ev_connector (void *); +void * +changelog_ev_connector(void *); -void *changelog_ev_dispatch (void *); +void * +changelog_ev_dispatch(void *); /* APIs */ void -changelog_ev_queue_connection (changelog_clnt_t *, changelog_rpc_clnt_t *); +changelog_ev_queue_connection(changelog_clnt_t *, changelog_rpc_clnt_t *); void -changelog_ev_cleanup_connections (xlator_t *, changelog_clnt_t *); +changelog_ev_cleanup_connections(xlator_t *, changelog_clnt_t *); +void +changelog_process_cleanup_event(xlator_t *); #endif - diff --git a/xlators/features/changelog/src/changelog-helpers.c b/xlators/features/changelog/src/changelog-helpers.c index e352f2ec859..e561997d858 100644 --- a/xlators/features/changelog/src/changelog-helpers.c +++ b/xlators/features/changelog/src/changelog-helpers.c @@ -8,11 +8,11 @@ cases as published by the Free Software Foundation. */ -#include "xlator.h" -#include "defaults.h" -#include "logging.h" -#include "iobuf.h" -#include "syscall.h" +#include <glusterfs/xlator.h> +#include <glusterfs/defaults.h> +#include <glusterfs/logging.h> +#include <glusterfs/iobuf.h> +#include <glusterfs/syscall.h> #include "changelog-helpers.h" #include "changelog-encoders.h" @@ -22,312 +22,281 @@ #include "changelog-encoders.h" #include "changelog-rpc-common.h" #include <pthread.h> +#include <time.h> static void -changelog_cleanup_free_mutex (void *arg_mutex) +changelog_cleanup_free_mutex(void *arg_mutex) { - pthread_mutex_t *p_mutex = (pthread_mutex_t*) arg_mutex; + pthread_mutex_t *p_mutex = (pthread_mutex_t *)arg_mutex; if (p_mutex) - pthread_mutex_unlock(p_mutex); + pthread_mutex_unlock(p_mutex); } int -changelog_thread_cleanup (xlator_t *this, pthread_t thr_id) +changelog_thread_cleanup(xlator_t *this, pthread_t thr_id) { - int ret = 0; - void *retval = NULL; - - /* send a cancel request to the thread */ - ret = pthread_cancel (thr_id); - if (ret != 0) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_PTHREAD_CANCEL_FAILED, - "could not cancel thread"); - goto out; - } + int ret = 0; + void *retval = NULL; + + /* send a cancel request to the thread */ + ret = pthread_cancel(thr_id); + if (ret != 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_PTHREAD_CANCEL_FAILED, NULL); + goto out; + } + + ret = pthread_join(thr_id, &retval); + if ((ret != 0) || (retval != PTHREAD_CANCELED)) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_PTHREAD_CANCEL_FAILED, NULL); + } - ret = pthread_join (thr_id, &retval); - if ((ret != 0) || (retval != PTHREAD_CANCELED)) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_PTHREAD_CANCEL_FAILED, - "cancel request not adhered as expected"); - } - - out: - return ret; +out: + return ret; } void * -changelog_get_usable_buffer (changelog_local_t *local) +changelog_get_usable_buffer(changelog_local_t *local) { - changelog_log_data_t *cld = NULL; + changelog_log_data_t *cld = NULL; - if (!local) - return NULL; + if (!local) + return NULL; - cld = &local->cld; - if (!cld->cld_iobuf) - return NULL; + cld = &local->cld; + if (!cld->cld_iobuf) + return NULL; - return cld->cld_iobuf->ptr; + return cld->cld_iobuf->ptr; } static int -changelog_selector_index (unsigned int selector) +changelog_selector_index(unsigned int selector) { - return (ffs (selector) - 1); + return (ffs(selector) - 1); } int -changelog_ev_selected (xlator_t *this, - changelog_ev_selector_t *selection, - unsigned int selector) +changelog_ev_selected(xlator_t *this, changelog_ev_selector_t *selection, + unsigned int selector) { - int idx = 0; - - idx = changelog_selector_index (selector); - gf_msg_debug (this->name, 0, - "selector ref count for %d (idx: %d): %d", - selector, idx, selection->ref[idx]); - /* this can be lockless */ - return (idx < CHANGELOG_EV_SELECTION_RANGE - && (selection->ref[idx] > 0)); + int idx = 0; + + idx = changelog_selector_index(selector); + gf_msg_debug(this->name, 0, "selector ref count for %d (idx: %d): %d", + selector, idx, selection->ref[idx]); + /* this can be lockless */ + return (idx < CHANGELOG_EV_SELECTION_RANGE && (selection->ref[idx] > 0)); } void -changelog_select_event (xlator_t *this, - changelog_ev_selector_t *selection, - unsigned int selector) +changelog_select_event(xlator_t *this, changelog_ev_selector_t *selection, + unsigned int selector) { - int idx = 0; - - LOCK (&selection->reflock); - { - while (selector) { - idx = changelog_selector_index (selector); - if (idx < CHANGELOG_EV_SELECTION_RANGE) { - selection->ref[idx]++; - gf_msg_debug (this->name, 0, - "selecting event %d", idx); - } - selector &= ~(1 << idx); - } - } - UNLOCK (&selection->reflock); + int idx = 0; + + LOCK(&selection->reflock); + { + while (selector) { + idx = changelog_selector_index(selector); + if (idx < CHANGELOG_EV_SELECTION_RANGE) { + selection->ref[idx]++; + gf_msg_debug(this->name, 0, "selecting event %d", idx); + } + selector &= ~(1 << idx); + } + } + UNLOCK(&selection->reflock); } void -changelog_deselect_event (xlator_t *this, - changelog_ev_selector_t *selection, - unsigned int selector) +changelog_deselect_event(xlator_t *this, changelog_ev_selector_t *selection, + unsigned int selector) { - int idx = 0; - - LOCK (&selection->reflock); - { - while (selector) { - idx = changelog_selector_index (selector); - if (idx < CHANGELOG_EV_SELECTION_RANGE) { - selection->ref[idx]--; - gf_msg_debug (this->name, 0, - "de-selecting event %d", idx); - } - selector &= ~(1 << idx); - } - } - UNLOCK (&selection->reflock); + int idx = 0; + + LOCK(&selection->reflock); + { + while (selector) { + idx = changelog_selector_index(selector); + if (idx < CHANGELOG_EV_SELECTION_RANGE) { + selection->ref[idx]--; + gf_msg_debug(this->name, 0, "de-selecting event %d", idx); + } + selector &= ~(1 << idx); + } + } + UNLOCK(&selection->reflock); } int -changelog_init_event_selection (xlator_t *this, - changelog_ev_selector_t *selection) +changelog_init_event_selection(xlator_t *this, + changelog_ev_selector_t *selection) { - int ret = 0; - int j = CHANGELOG_EV_SELECTION_RANGE; - - ret = LOCK_INIT (&selection->reflock); - if (ret != 0) - return -1; + int ret = 0; + int j = CHANGELOG_EV_SELECTION_RANGE; - LOCK (&selection->reflock); - { - while (j--) { - selection->ref[j] = 0; - } - } - UNLOCK (&selection->reflock); - - return 0; -} - -int -changelog_cleanup_event_selection (xlator_t *this, - changelog_ev_selector_t *selection) -{ - int ret = 0; - int j = CHANGELOG_EV_SELECTION_RANGE; + ret = LOCK_INIT(&selection->reflock); + if (ret != 0) + return -1; - LOCK (&selection->reflock); - { - while (j--) { - if (selection->ref[j] > 0) - gf_msg (this->name, GF_LOG_WARNING, 0, - CHANGELOG_MSG_CLEANUP_ON_ACTIVE_REF, - "changelog event selection cleaning up " - " on active references"); - } + LOCK(&selection->reflock); + { + while (j--) { + selection->ref[j] = 0; } - UNLOCK (&selection->reflock); + } + UNLOCK(&selection->reflock); - return LOCK_DESTROY (&selection->reflock); + return 0; } static void -changelog_perform_dispatch (xlator_t *this, - changelog_priv_t *priv, void *mem, size_t size) +changelog_perform_dispatch(xlator_t *this, changelog_priv_t *priv, void *mem, + size_t size) { - char *buf = NULL; - void *opaque = NULL; - - buf = rbuf_reserve_write_area (priv->rbuf, size, &opaque); - if (!buf) { - gf_msg_callingfn (this->name, - GF_LOG_WARNING, 0, - CHANGELOG_MSG_DISPATCH_EVENT_FAILED, - "failed to dispatch event"); - return; - } + char *buf = NULL; + void *opaque = NULL; + + buf = rbuf_reserve_write_area(priv->rbuf, size, &opaque); + if (!buf) { + gf_msg_callingfn(this->name, GF_LOG_WARNING, 0, + CHANGELOG_MSG_DISPATCH_EVENT_FAILED, + "failed to dispatch event"); + return; + } - memcpy (buf, mem, size); - rbuf_write_complete (opaque); + memcpy(buf, mem, size); + rbuf_write_complete(opaque); } void -changelog_dispatch_event (xlator_t *this, - changelog_priv_t *priv, changelog_event_t *ev) +changelog_dispatch_event(xlator_t *this, changelog_priv_t *priv, + changelog_event_t *ev) { - changelog_ev_selector_t *selection = NULL; + changelog_ev_selector_t *selection = NULL; - selection = &priv->ev_selection; - if (changelog_ev_selected (this, selection, ev->ev_type)) { - changelog_perform_dispatch (this, priv, ev, CHANGELOG_EV_SIZE); - } + selection = &priv->ev_selection; + if (changelog_ev_selected(this, selection, ev->ev_type)) { + changelog_perform_dispatch(this, priv, ev, CHANGELOG_EV_SIZE); + } } void -changelog_set_usable_record_and_length (changelog_local_t *local, - size_t len, int xr) +changelog_set_usable_record_and_length(changelog_local_t *local, size_t len, + int xr) { - changelog_log_data_t *cld = NULL; + changelog_log_data_t *cld = NULL; - cld = &local->cld; + cld = &local->cld; - cld->cld_ptr_len = len; - cld->cld_xtra_records = xr; + cld->cld_ptr_len = len; + cld->cld_xtra_records = xr; } void -changelog_local_cleanup (xlator_t *xl, changelog_local_t *local) +changelog_local_cleanup(xlator_t *xl, changelog_local_t *local) { - int i = 0; - changelog_opt_t *co = NULL; - changelog_log_data_t *cld = NULL; + int i = 0; + changelog_opt_t *co = NULL; + changelog_log_data_t *cld = NULL; - if (!local) - return; + if (!local) + return; - cld = &local->cld; + cld = &local->cld; - /* cleanup dynamic allocation for extra records */ - if (cld->cld_xtra_records) { - co = (changelog_opt_t *) cld->cld_ptr; - for (; i < cld->cld_xtra_records; i++, co++) - if (co->co_free) - co->co_free (co); - } + /* cleanup dynamic allocation for extra records */ + if (cld->cld_xtra_records) { + co = (changelog_opt_t *)cld->cld_ptr; + for (; i < cld->cld_xtra_records; i++, co++) + if (co->co_free) + co->co_free(co); + } - CHANGELOG_IOBUF_UNREF (cld->cld_iobuf); + CHANGELOG_IOBUF_UNREF(cld->cld_iobuf); - if (local->inode) - inode_unref (local->inode); + if (local->inode) + inode_unref(local->inode); - mem_put (local); + mem_put(local); } int -changelog_write (int fd, char *buffer, size_t len) +changelog_write(int fd, char *buffer, size_t len) { - ssize_t size = 0; - size_t written = 0; + ssize_t size = 0; + size_t written = 0; - while (written < len) { - size = sys_write (fd, - buffer + written, len - written); - if (size <= 0) - break; + while (written < len) { + size = sys_write(fd, buffer + written, len - written); + if (size <= 0) + break; - written += size; - } + written += size; + } - return (written != len); + return (written != len); } int -htime_update (xlator_t *this, - changelog_priv_t *priv, unsigned long ts, - char * buffer) +htime_update(xlator_t *this, changelog_priv_t *priv, time_t ts, char *buffer) { - char changelog_path[PATH_MAX+1] = {0,}; - int len = -1; - char x_value[25] = {0,}; - /* time stamp(10) + : (1) + rolltime (12 ) + buffer (2) */ - int ret = 0; - - if (priv->htime_fd ==-1) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_HTIME_ERROR, - "Htime fd not available for updation"); - ret = -1; - goto out; - } - strncpy (changelog_path, buffer, PATH_MAX); - len = strlen (changelog_path); - changelog_path[len] = '\0'; /* redundant */ - - if (changelog_write (priv->htime_fd, (void*) changelog_path, len+1 ) < 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_HTIME_ERROR, - "Htime file content write failed"); - ret =-1; - goto out; - } - - snprintf (x_value, sizeof x_value, "%lu:%d", - ts, priv->rollover_count); - - if (sys_fsetxattr (priv->htime_fd, HTIME_KEY, x_value, - strlen (x_value), XATTR_REPLACE)) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_HTIME_ERROR, - "Htime xattr updation failed with XATTR_REPLACE " - "Changelog: %s", changelog_path); - - if (sys_fsetxattr (priv->htime_fd, HTIME_KEY, x_value, - strlen (x_value), 0)) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_HTIME_ERROR, - "Htime xattr updation failed " - "Changelog: %s", changelog_path); - ret = -1; - goto out; - } - } - - priv->rollover_count +=1; + char changelog_path[PATH_MAX + 1] = { + 0, + }; + int len = -1; + char x_value[25] = { + 0, + }; + /* time stamp(10) + : (1) + rolltime (12 ) + buffer (2) */ + int ret = 0; + + if (priv->htime_fd == -1) { + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_HTIME_ERROR, + "reason=fd not available", NULL); + ret = -1; + goto out; + } + len = snprintf(changelog_path, PATH_MAX, "%s", buffer); + if (len >= PATH_MAX) { + ret = -1; + goto out; + } + if (changelog_write(priv->htime_fd, (void *)changelog_path, len + 1) < 0) { + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_HTIME_ERROR, + "reason=write failed", NULL); + ret = -1; + goto out; + } + + len = snprintf(x_value, sizeof(x_value), "%ld:%d", ts, + priv->rollover_count); + if (len >= sizeof(x_value)) { + ret = -1; + goto out; + } + + if (sys_fsetxattr(priv->htime_fd, HTIME_KEY, x_value, len, XATTR_REPLACE)) { + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_HTIME_ERROR, + "reason=xattr updation failed", "XATTR_REPLACE=true", + "changelog=%s", changelog_path, NULL); + + if (sys_fsetxattr(priv->htime_fd, HTIME_KEY, x_value, len, 0)) { + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_HTIME_ERROR, + "reason=xattr updation failed", "changelog=%s", + changelog_path, NULL); + ret = -1; + goto out; + } + } + + priv->rollover_count += 1; out: - return ret; + return ret; } /* @@ -339,43 +308,45 @@ out: * 0 : If NOT empty, proceed usual. */ int -cl_is_empty (xlator_t *this, int fd) +cl_is_empty(xlator_t *this, int fd) { - int ret = -1; - size_t elen = 0; - int encoding = -1; - char buffer[1024] = {0,}; - struct stat stbuf = {0,}; - int major_version = -1; - int minor_version = -1; - - ret = sys_fstat (fd, &stbuf); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_FSTAT_OP_FAILED, - "Could not stat (CHANGELOG)"); - goto out; - } - - ret = sys_lseek (fd, 0, SEEK_SET); - if (ret == -1) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_LSEEK_OP_FAILED, - "Could not lseek (CHANGELOG)"); - goto out; - } - - CHANGELOG_GET_HEADER_INFO (fd, buffer, 1024, encoding, - major_version, minor_version, elen); - - if (elen == stbuf.st_size) { - ret = 1; - } else { - ret = 0; - } + int ret = -1; + size_t elen = 0; + int encoding = -1; + char buffer[1024] = { + 0, + }; + struct stat stbuf = { + 0, + }; + int major_version = -1; + int minor_version = -1; + + ret = sys_fstat(fd, &stbuf); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FSTAT_OP_FAILED, + NULL); + goto out; + } + + ret = sys_lseek(fd, 0, SEEK_SET); + if (ret == -1) { + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_LSEEK_OP_FAILED, + NULL); + goto out; + } + + CHANGELOG_GET_HEADER_INFO(fd, buffer, sizeof(buffer), encoding, + major_version, minor_version, elen); + + if (elen == stbuf.st_size) { + ret = 1; + } else { + ret = 0; + } out: - return ret; + return ret; } /* @@ -387,162 +358,172 @@ out: * -1 : Error */ int -update_path (xlator_t *this, char *cl_path) +update_path(xlator_t *this, char *cl_path) { - char low_cl[] = "changelog"; - char up_cl[] = "CHANGELOG"; - char *found = NULL; - int iter = 0; - int ret = -1; - - found = strstr(cl_path, up_cl); - - if (found == NULL) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_LSEEK_OP_FAILED, - "Could not find CHANGELOG in changelog path"); - goto out; - } else { - strncpy(found, low_cl, strlen(low_cl)); - } - - ret = 0; + const char low_cl[] = "changelog"; + const char up_cl[] = "CHANGELOG"; + char *found = NULL; + int ret = -1; + + found = strstr(cl_path, up_cl); + + if (found == NULL) { + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_PATH_NOT_FOUND, + NULL); + goto out; + } else { + memcpy(found, low_cl, sizeof(low_cl) - 1); + } + + ret = 0; out: - return ret; + return ret; } static int -changelog_rollover_changelog (xlator_t *this, - changelog_priv_t *priv, unsigned long ts) +changelog_rollover_changelog(xlator_t *this, changelog_priv_t *priv, time_t ts) { - int ret = -1; - int notify = 0; - int cl_empty_flag = 0; - char ofile[PATH_MAX] = {0,}; - char nfile[PATH_MAX] = {0,}; - changelog_event_t ev = {0,}; - - if (priv->changelog_fd != -1) { - ret = sys_fsync (priv->changelog_fd); - if (ret < 0) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_FSYNC_OP_FAILED, - "fsync failed"); - } - ret = cl_is_empty (this, priv->changelog_fd); - if (ret == 1) { - cl_empty_flag = 1; - } else if (ret == -1) { - /* Log error but proceed as usual */ - gf_msg (this->name, GF_LOG_WARNING, 0, - CHANGELOG_MSG_DETECT_EMPTY_CHANGELOG_FAILED, - "Error detecting empty changelog"); - } - sys_close (priv->changelog_fd); - priv->changelog_fd = -1; - } - - (void) snprintf (ofile, PATH_MAX, - "%s/"CHANGELOG_FILE_NAME, priv->changelog_dir); - (void) snprintf (nfile, PATH_MAX, - "%s/"CHANGELOG_FILE_NAME".%lu", - priv->changelog_dir, ts); - - if (cl_empty_flag == 1) { - ret = sys_unlink (ofile); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_UNLINK_OP_FAILED, - "error unlinking(empty cl) %s)", - ofile); - ret = 0; /* Error in unlinking empty changelog should - not break further changelog operation, so - reset return value to 0*/ - } - } else { - ret = sys_rename (ofile, nfile); + int ret = -1; + int notify = 0; + int cl_empty_flag = 0; + struct tm *gmt; + char yyyymmdd[40]; + char ofile[PATH_MAX] = { + 0, + }; + char nfile[PATH_MAX] = { + 0, + }; + char nfile_dir[PATH_MAX] = { + 0, + }; + changelog_event_t ev = { + 0, + }; + + if (priv->changelog_fd != -1) { + ret = sys_fsync(priv->changelog_fd); + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_FSYNC_OP_FAILED, NULL); + } + ret = cl_is_empty(this, priv->changelog_fd); + if (ret == 1) { + cl_empty_flag = 1; + } else if (ret == -1) { + /* Log error but proceed as usual */ + gf_smsg(this->name, GF_LOG_WARNING, 0, + CHANGELOG_MSG_DETECT_EMPTY_CHANGELOG_FAILED, NULL); + } + sys_close(priv->changelog_fd); + priv->changelog_fd = -1; + } + + /* Get GMT time. */ + gmt = gmtime(&ts); + + strftime(yyyymmdd, sizeof(yyyymmdd), "%Y/%m/%d", gmt); + + (void)snprintf(ofile, PATH_MAX, "%s/" CHANGELOG_FILE_NAME, + priv->changelog_dir); + (void)snprintf(nfile, PATH_MAX, "%s/%s/" CHANGELOG_FILE_NAME ".%ld", + priv->changelog_dir, yyyymmdd, ts); + (void)snprintf(nfile_dir, PATH_MAX, "%s/%s", priv->changelog_dir, yyyymmdd); + + if (cl_empty_flag == 1) { + ret = sys_unlink(ofile); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_UNLINK_OP_FAILED, "path=%s", ofile, NULL); + ret = 0; /* Error in unlinking empty changelog should + not break further changelog operation, so + reset return value to 0*/ + } + } else { + ret = sys_rename(ofile, nfile); + + /* Changelog file rename gets ENOENT when parent dir doesn't exist */ + if (errno == ENOENT) { + ret = mkdir_p(nfile_dir, 0600, _gf_true); + + if ((ret == -1) && (EEXIST != errno)) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_MKDIR_ERROR, "%s", nfile_dir, NULL); + goto out; + } - if (ret && (errno == ENOENT)) { - ret = 0; - goto out; - } - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_RENAME_ERROR, - "error renaming %s -> %s", - ofile, nfile); - } + ret = sys_rename(ofile, nfile); } - if (!ret && (cl_empty_flag == 0)) { - notify = 1; - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_RENAME_ERROR, - "error renaming %s -> %s", - ofile, nfile); + if (ret && (errno == ENOENT)) { + ret = 0; + goto out; } - - if (!ret) { - if (cl_empty_flag) { - update_path (this, nfile); - } - ret = htime_update (this, priv, ts, nfile); - if (ret == -1) { - gf_msg (this->name, GF_LOG_ERROR, - 0, CHANGELOG_MSG_HTIME_ERROR, - "could not update htime file"); - goto out; - } + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_RENAME_ERROR, + "from=%s", ofile, "to=%s", nfile, NULL); } + } - if (notify) { - ev.ev_type = CHANGELOG_OP_TYPE_JOURNAL; - memcpy (ev.u.journal.path, nfile, strlen (nfile) + 1); - changelog_dispatch_event (this, priv, &ev); + if (!ret && (cl_empty_flag == 0)) { + notify = 1; + } + + if (!ret) { + if (cl_empty_flag) { + update_path(this, nfile); } - out: - /* If this is explicit rollover initiated by snapshot, - * wakeup reconfigure thread waiting for changelog to - * rollover. This should happen even in failure cases as - * well otherwise snapshot will timeout and fail. Hence - * moved under out. - */ - if (priv->explicit_rollover) { - priv->explicit_rollover = _gf_false; - - pthread_mutex_lock (&priv->bn.bnotify_mutex); - { - if (ret) { - priv->bn.bnotify_error = _gf_true; - gf_msg (this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_EXPLICIT_ROLLOVER_FAILED, - "Fail snapshot because of " - "previous errors"); - } else { - gf_msg (this->name, GF_LOG_INFO, 0, - CHANGELOG_MSG_BNOTIFY_INFO, "Explicit " - "rollover changelog: %s signaling " - "bnotify", nfile); - } - priv->bn.bnotify = _gf_false; - pthread_cond_signal (&priv->bn.bnotify_cond); - } - pthread_mutex_unlock (&priv->bn.bnotify_mutex); + ret = htime_update(this, priv, ts, nfile); + if (ret == -1) { + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_HTIME_ERROR, + NULL); + goto out; } - return ret; + } + + if (notify) { + ev.ev_type = CHANGELOG_OP_TYPE_JOURNAL; + memcpy(ev.u.journal.path, nfile, strlen(nfile) + 1); + changelog_dispatch_event(this, priv, &ev); + } +out: + /* If this is explicit rollover initiated by snapshot, + * wakeup reconfigure thread waiting for changelog to + * rollover. This should happen even in failure cases as + * well otherwise snapshot will timeout and fail. Hence + * moved under out. + */ + if (priv->explicit_rollover) { + priv->explicit_rollover = _gf_false; + + pthread_mutex_lock(&priv->bn.bnotify_mutex); + { + if (ret) { + priv->bn.bnotify_error = _gf_true; + gf_smsg(this->name, GF_LOG_ERROR, 0, + CHANGELOG_MSG_EXPLICIT_ROLLOVER_FAILED, NULL); + } else { + gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_BNOTIFY_INFO, + "changelog=%s", nfile, NULL); + } + priv->bn.bnotify = _gf_false; + pthread_cond_signal(&priv->bn.bnotify_cond); + } + pthread_mutex_unlock(&priv->bn.bnotify_mutex); + } + return ret; } int -filter_cur_par_dirs (const struct dirent *entry) +filter_cur_par_dirs(const struct dirent *entry) { - if (entry == NULL) - return 0; + if (entry == NULL) + return 0; - if ((strcmp(entry->d_name, ".") == 0) || - (strcmp(entry->d_name, "..") == 0)) - return 0; - else - return 1; + if ((strcmp(entry->d_name, ".") == 0) || (strcmp(entry->d_name, "..") == 0)) + return 0; + else + return 1; } /* @@ -555,252 +536,284 @@ filter_cur_par_dirs (const struct dirent *entry) */ int -find_current_htime (int ht_dir_fd, const char *ht_dir_path, char *ht_file_bname) +find_current_htime(int ht_dir_fd, const char *ht_dir_path, char *ht_file_bname) { - struct dirent **namelist = NULL; - int ret = 0; - int cnt = 0; - int i = 0; - xlator_t *this = NULL; - - this = THIS; - GF_ASSERT (this); - GF_ASSERT (ht_dir_path); - - cnt = scandir (ht_dir_path, &namelist, filter_cur_par_dirs, alphasort); - if (cnt < 0) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_SCAN_DIR_FAILED, - "scandir failed"); - } else if (cnt > 0) { - strncpy (ht_file_bname, namelist[cnt - 1]->d_name, NAME_MAX); - ht_file_bname[NAME_MAX - 1] = 0; - - if (sys_fsetxattr (ht_dir_fd, HTIME_CURRENT, ht_file_bname, - strlen (ht_file_bname), 0)) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_FSETXATTR_FAILED, - "fsetxattr failed: HTIME_CURRENT"); - ret = -1; - goto out; - } + struct dirent **namelist = NULL; + int ret = 0; + int cnt = 0; + int i = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(ht_dir_path); + + cnt = scandir(ht_dir_path, &namelist, filter_cur_par_dirs, alphasort); + if (cnt < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_SCAN_DIR_FAILED, + NULL); + } else if (cnt > 0) { + if (snprintf(ht_file_bname, NAME_MAX, "%s", + namelist[cnt - 1]->d_name) >= NAME_MAX) { + ret = -1; + goto out; + } + if (sys_fsetxattr(ht_dir_fd, HTIME_CURRENT, ht_file_bname, + strlen(ht_file_bname), 0)) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_FSETXATTR_FAILED, "HTIME_CURRENT", NULL); + ret = -1; + goto out; + } + + if (sys_fsync(ht_dir_fd) < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_FSYNC_OP_FAILED, NULL); + ret = -1; + goto out; + } + } - if (sys_fsync (ht_dir_fd) < 0) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_FSYNC_OP_FAILED, - "fsync failed"); - ret = -1; - goto out; - } - } - - out: - for (i = 0; i < cnt; i++) - free (namelist[i]); - free (namelist); +out: + for (i = 0; i < cnt; i++) + free(namelist[i]); + free(namelist); - if (ret) - cnt = ret; + if (ret) + cnt = ret; - return cnt; + return cnt; } /* Returns 0 on successful open of htime file * returns -1 on failure or error */ int -htime_open (xlator_t *this, - changelog_priv_t *priv, unsigned long ts) +htime_open(xlator_t *this, changelog_priv_t *priv, time_t ts) { - int ht_file_fd = -1; - int ht_dir_fd = -1; - int ret = 0; - int cnt = 0; - char ht_dir_path[PATH_MAX] = {0,}; - char ht_file_path[PATH_MAX] = {0,}; - char ht_file_bname[NAME_MAX] = {0,}; - char x_value[NAME_MAX] = {0,}; - int flags = 0; - unsigned long min_ts = 0; - unsigned long max_ts = 0; - unsigned long total = 0; - ssize_t size = 0; - - CHANGELOG_FILL_HTIME_DIR(priv->changelog_dir, ht_dir_path); - - /* Open htime directory to get HTIME_CURRENT */ - ht_dir_fd = open (ht_dir_path, O_RDONLY); - if (ht_dir_fd == -1) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_OPEN_FAILED, "open failed: %s", - ht_dir_path); - ret = -1; - goto out; - } - - size = sys_fgetxattr (ht_dir_fd, HTIME_CURRENT, ht_file_bname, - sizeof (ht_file_bname)); - if (size < 0) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_FGETXATTR_FAILED, "Error extracting" - " HTIME_CURRENT."); - - /* If upgrade scenario, find the latest HTIME.TSTAMP file - * and use the same. If error, create a new HTIME.TSTAMP - * file. - */ - cnt = find_current_htime (ht_dir_fd, ht_dir_path, - ht_file_bname); - if (cnt <= 0) { - gf_msg (this->name, GF_LOG_INFO, errno, - CHANGELOG_MSG_HTIME_INFO, - "HTIME_CURRENT not found. Changelog enabled" - " before init"); - return htime_create (this, priv, ts); - } - - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_HTIME_ERROR, "Error extracting" - " HTIME_CURRENT."); - } - - gf_msg (this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_HTIME_INFO, - "HTIME_CURRENT: %s", ht_file_bname); - (void) snprintf (ht_file_path, PATH_MAX, "%s/%s", - ht_dir_path, ht_file_bname); - - /* Open in append mode as existing htime file is used */ - flags |= (O_RDWR | O_SYNC | O_APPEND); - ht_file_fd = open (ht_file_path, flags, - S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); - if (ht_file_fd < 0) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_OPEN_FAILED, - "unable to open htime file: %s", - ht_file_path); - ret = -1; - goto out; - } - - /* save this htime_fd in priv->htime_fd */ - priv->htime_fd = ht_file_fd; - - /* Initialize rollover-number in priv to current number */ - size = sys_fgetxattr (ht_file_fd, HTIME_KEY, x_value, sizeof (x_value)); - if (size < 0) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_FGETXATTR_FAILED, "error extracting max" - " timstamp from htime file %s", - ht_file_path); - ret = -1; - goto out; - } - - sscanf (x_value, "%lu:%lu", &max_ts, &total); - gf_msg (this->name, GF_LOG_INFO, 0, - CHANGELOG_MSG_TOTAL_LOG_INFO, - "INIT CASE: MIN: %lu, MAX: %lu," - " TOTAL CHANGELOGS: %lu", min_ts, max_ts, total); + int ht_file_fd = -1; + int ht_dir_fd = -1; + int ret = 0; + int cnt = 0; + char ht_dir_path[PATH_MAX] = { + 0, + }; + char ht_file_path[PATH_MAX] = { + 0, + }; + char ht_file_bname[NAME_MAX] = { + 0, + }; + char x_value[NAME_MAX] = { + 0, + }; + int flags = 0; + unsigned long min_ts = 0; + unsigned long max_ts = 0; + unsigned long total = 0; + unsigned long total1 = 0; + ssize_t size = 0; + struct stat stat_buf = { + 0, + }; + unsigned long record_len = 0; + int32_t len = 0; + + CHANGELOG_FILL_HTIME_DIR(priv->changelog_dir, ht_dir_path); + + /* Open htime directory to get HTIME_CURRENT */ + ht_dir_fd = open(ht_dir_path, O_RDONLY); + if (ht_dir_fd == -1) { + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_OPEN_FAILED, + "path=%s", ht_dir_path, NULL); + ret = -1; + goto out; + } + + size = sys_fgetxattr(ht_dir_fd, HTIME_CURRENT, ht_file_bname, + sizeof(ht_file_bname)); + if (size < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FGETXATTR_FAILED, + "name=HTIME_CURRENT", NULL); + + /* If upgrade scenario, find the latest HTIME.TSTAMP file + * and use the same. If error, create a new HTIME.TSTAMP + * file. + */ + cnt = find_current_htime(ht_dir_fd, ht_dir_path, ht_file_bname); + if (cnt <= 0) { + gf_smsg(this->name, GF_LOG_INFO, errno, + CHANGELOG_MSG_NO_HTIME_CURRENT, NULL); + sys_close(ht_dir_fd); + return htime_create(this, priv, ts); + } + + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_HTIME_CURRENT_ERROR, NULL); + } + + gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_HTIME_CURRENT, "path=%s", + ht_file_bname, NULL); + len = snprintf(ht_file_path, PATH_MAX, "%s/%s", ht_dir_path, ht_file_bname); + if ((len < 0) || (len >= PATH_MAX)) { + ret = -1; + goto out; + } + + /* Open in append mode as existing htime file is used */ + flags |= (O_RDWR | O_SYNC | O_APPEND); + ht_file_fd = open(ht_file_path, flags, + S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); + if (ht_file_fd < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_OPEN_FAILED, + "path=%s", ht_file_path, NULL); + ret = -1; + goto out; + } + + /* save this htime_fd in priv->htime_fd */ + priv->htime_fd = ht_file_fd; + + ret = sys_fstat(ht_file_fd, &stat_buf); + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_HTIME_STAT_ERROR, + "path=%s", ht_file_path, NULL); + ret = -1; + goto out; + } + + /* Initialize rollover-number in priv to current number */ + size = sys_fgetxattr(ht_file_fd, HTIME_KEY, x_value, sizeof(x_value)); + if (size < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FGETXATTR_FAILED, + "name=%s", HTIME_KEY, "path=%s", ht_file_path, NULL); + ret = -1; + goto out; + } + + sscanf(x_value, "%lu:%lu", &max_ts, &total); + + /* 22 = 1(/) + 20(CHANGELOG.TIMESTAMP) + 1(\x00) */ + record_len = strlen(priv->changelog_dir) + 22; + total1 = stat_buf.st_size / record_len; + if (total != total1) { + gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_TOTAL_LOG_INFO, + "xattr_total=%lu", total, "size_total=%lu", total1, NULL); + } + + gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_TOTAL_LOG_INFO, "min=%lu", + min_ts, "max=%lu", max_ts, "total_changelogs=%lu", total, NULL); + + if (total < total1) + priv->rollover_count = total1 + 1; + else priv->rollover_count = total + 1; out: - if (ht_dir_fd != -1) - sys_close (ht_dir_fd); - return ret; + if (ht_dir_fd != -1) + sys_close(ht_dir_fd); + return ret; } /* Returns 0 on successful creation of htime file * returns -1 on failure or error */ int -htime_create (xlator_t *this, - changelog_priv_t *priv, unsigned long ts) +htime_create(xlator_t *this, changelog_priv_t *priv, time_t ts) { - int ht_file_fd = -1; - int ht_dir_fd = -1; - int ret = 0; - char ht_dir_path[PATH_MAX] = {0,}; - char ht_file_path[PATH_MAX] = {0,}; - char ht_file_bname[NAME_MAX + 1] = {0,}; - int flags = 0; - - gf_msg (this->name, GF_LOG_INFO, 0, - CHANGELOG_MSG_HTIME_INFO, "Changelog enable: Creating new " - "HTIME.%lu file", ts); - - CHANGELOG_FILL_HTIME_DIR(priv->changelog_dir, ht_dir_path); - - /* get the htime file name in ht_file_path */ - (void) snprintf (ht_file_path,PATH_MAX,"%s/%s.%lu",ht_dir_path, - HTIME_FILE_NAME, ts); - - flags |= (O_CREAT | O_RDWR | O_SYNC); - ht_file_fd = open (ht_file_path, flags, - S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); - if (ht_file_fd < 0) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_OPEN_FAILED, - "unable to create htime file: %s", - ht_file_path); - ret = -1; - goto out; - } - - if (sys_fsetxattr (ht_file_fd, HTIME_KEY, HTIME_INITIAL_VALUE, - sizeof (HTIME_INITIAL_VALUE)-1, 0)) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_FSETXATTR_FAILED, - "Htime xattr initialization failed"); - ret = -1; - goto out; - } - - ret = sys_fsync (ht_file_fd); - if (ret < 0) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_FSYNC_OP_FAILED, - "fsync failed"); - goto out; - } - - /* Set xattr HTIME_CURRENT on htime directory to htime filename */ - ht_dir_fd = open (ht_dir_path, O_RDONLY); - if (ht_dir_fd == -1) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_OPEN_FAILED, "open of %s failed", - ht_dir_path); - ret = -1; - goto out; - } - - (void) snprintf (ht_file_bname, sizeof (ht_file_bname), "%s.%lu", - HTIME_FILE_NAME, ts); - if (sys_fsetxattr (ht_dir_fd, HTIME_CURRENT, ht_file_bname, - strlen (ht_file_bname), 0)) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_FSETXATTR_FAILED, "fsetxattr failed:" - " HTIME_CURRENT"); - ret = -1; - goto out; - } - - ret = sys_fsync (ht_dir_fd); - if (ret < 0) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_FSYNC_OP_FAILED, - "fsync failed"); - goto out; - } - - /* save this htime_fd in priv->htime_fd */ - priv->htime_fd = ht_file_fd; - /* initialize rollover-number in priv to 1 */ - priv->rollover_count = 1; + int ht_file_fd = -1; + int ht_dir_fd = -1; + int ret = 0; + char ht_dir_path[PATH_MAX] = { + 0, + }; + char ht_file_path[PATH_MAX] = { + 0, + }; + char ht_file_bname[NAME_MAX + 1] = { + 0, + }; + int flags = 0; + int32_t len = 0; + + gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_NEW_HTIME_FILE, + "name=%ld", ts, NULL); + + CHANGELOG_FILL_HTIME_DIR(priv->changelog_dir, ht_dir_path); + + /* get the htime file name in ht_file_path */ + len = snprintf(ht_file_path, PATH_MAX, "%s/%s.%ld", ht_dir_path, + HTIME_FILE_NAME, ts); + if ((len < 0) || (len >= PATH_MAX)) { + ret = -1; + goto out; + } + + flags |= (O_CREAT | O_RDWR | O_SYNC); + ht_file_fd = open(ht_file_path, flags, + S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); + if (ht_file_fd < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_OPEN_FAILED, + "path=%s", ht_file_path, NULL); + ret = -1; + goto out; + } + + if (sys_fsetxattr(ht_file_fd, HTIME_KEY, HTIME_INITIAL_VALUE, + sizeof(HTIME_INITIAL_VALUE) - 1, 0)) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_XATTR_INIT_FAILED, NULL); + ret = -1; + goto out; + } + + ret = sys_fsync(ht_file_fd); + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FSYNC_OP_FAILED, + NULL); + goto out; + } + + /* save this htime_fd in priv->htime_fd */ + priv->htime_fd = ht_file_fd; + + ht_file_fd = -1; + + /* Set xattr HTIME_CURRENT on htime directory to htime filename */ + ht_dir_fd = open(ht_dir_path, O_RDONLY); + if (ht_dir_fd == -1) { + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_OPEN_FAILED, + "path=%s", ht_dir_path, NULL); + ret = -1; + goto out; + } + + (void)snprintf(ht_file_bname, sizeof(ht_file_bname), "%s.%ld", + HTIME_FILE_NAME, ts); + if (sys_fsetxattr(ht_dir_fd, HTIME_CURRENT, ht_file_bname, + strlen(ht_file_bname), 0)) { + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FSETXATTR_FAILED, + " HTIME_CURRENT", NULL); + ret = -1; + goto out; + } + + ret = sys_fsync(ht_dir_fd); + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FSYNC_OP_FAILED, + NULL); + goto out; + } + + /* initialize rollover-number in priv to 1 */ + priv->rollover_count = 1; out: - if (ht_dir_fd != -1) - sys_close (ht_dir_fd); - return ret; + if (ht_dir_fd != -1) + sys_close(ht_dir_fd); + if (ht_file_fd != -1) + sys_close(ht_file_fd); + return ret; } /* Description: @@ -812,48 +825,53 @@ out: * -1 : On failure. */ int -changelog_snap_open (xlator_t *this, - changelog_priv_t *priv) +changelog_snap_open(xlator_t *this, changelog_priv_t *priv) { - int fd = -1; - int ret = 0; - int flags = 0; - char buffer[1024] = {0,}; - char c_snap_path[PATH_MAX] = {0,}; - char csnap_dir_path[PATH_MAX] = {0,}; - - CHANGELOG_FILL_CSNAP_DIR(priv->changelog_dir, csnap_dir_path); - - (void) snprintf (c_snap_path, PATH_MAX, - "%s/"CSNAP_FILE_NAME, - csnap_dir_path); - - flags |= (O_CREAT | O_RDWR | O_TRUNC); - - fd = open (c_snap_path, flags, - S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); - if (fd < 0) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_OPEN_FAILED, "unable to open %s file ", - c_snap_path); - ret = -1; - goto out; - } - priv->c_snap_fd = fd; - - (void) snprintf (buffer, 1024, CHANGELOG_HEADER, - CHANGELOG_VERSION_MAJOR, - CHANGELOG_VERSION_MINOR, - priv->ce->encoder); - ret = changelog_snap_write_change (priv, buffer, strlen (buffer)); - if (ret < 0) { - sys_close (priv->c_snap_fd); - priv->c_snap_fd = -1; - goto out; - } + int fd = -1; + int ret = 0; + int flags = 0; + char buffer[1024] = { + 0, + }; + char c_snap_path[PATH_MAX] = { + 0, + }; + char csnap_dir_path[PATH_MAX] = { + 0, + }; + int32_t len = 0; + + CHANGELOG_FILL_CSNAP_DIR(priv->changelog_dir, csnap_dir_path); + + len = snprintf(c_snap_path, PATH_MAX, "%s/" CSNAP_FILE_NAME, + csnap_dir_path); + if ((len < 0) || (len >= PATH_MAX)) { + ret = -1; + goto out; + } + + flags |= (O_CREAT | O_RDWR | O_TRUNC); + + fd = open(c_snap_path, flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); + if (fd < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_OPEN_FAILED, + "path=%s", c_snap_path, NULL); + ret = -1; + goto out; + } + priv->c_snap_fd = fd; + + (void)snprintf(buffer, 1024, CHANGELOG_HEADER, CHANGELOG_VERSION_MAJOR, + CHANGELOG_VERSION_MINOR, priv->ce->encoder); + ret = changelog_snap_write_change(priv, buffer, strlen(buffer)); + if (ret < 0) { + sys_close(priv->c_snap_fd); + priv->c_snap_fd = -1; + goto out; + } out: - return ret; + return ret; } /* @@ -864,17 +882,15 @@ out: * -1 : On Failure. */ int -changelog_snap_logging_start (xlator_t *this, - changelog_priv_t *priv) +changelog_snap_logging_start(xlator_t *this, changelog_priv_t *priv) { - int ret = 0; + int ret = 0; - ret = changelog_snap_open (this, priv); - gf_msg (this->name, GF_LOG_INFO, 0, - CHANGELOG_MSG_SNAP_INFO, - "Now starting to log in call path"); + ret = changelog_snap_open(this, priv); + gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_SNAP_INFO, "starting", + NULL); - return ret; + return ret; } /* @@ -885,118 +901,104 @@ changelog_snap_logging_start (xlator_t *this, * -1 : On Failure. */ int -changelog_snap_logging_stop (xlator_t *this, - changelog_priv_t *priv) +changelog_snap_logging_stop(xlator_t *this, changelog_priv_t *priv) { - int ret = 0; + int ret = 0; - sys_close (priv->c_snap_fd); - priv->c_snap_fd = -1; + sys_close(priv->c_snap_fd); + priv->c_snap_fd = -1; - gf_msg (this->name, GF_LOG_INFO, 0, - CHANGELOG_MSG_SNAP_INFO, - "Stopped to log in call path"); + gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_SNAP_INFO, "Stopped", + NULL); - return ret; + return ret; } int -changelog_open_journal (xlator_t *this, - changelog_priv_t *priv) +changelog_open_journal(xlator_t *this, changelog_priv_t *priv) { - int fd = 0; - int ret = -1; - int flags = 0; - char buffer[1024] = {0,}; - char changelog_path[PATH_MAX] = {0,}; - - (void) snprintf (changelog_path, PATH_MAX, - "%s/"CHANGELOG_FILE_NAME, - priv->changelog_dir); - - flags |= (O_CREAT | O_RDWR); - if (priv->fsync_interval == 0) - flags |= O_SYNC; - - fd = open (changelog_path, flags, - S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); - if (fd < 0) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_OPEN_FAILED, - "unable to open/create changelog file %s." - " change-logging will be" - " inactive", changelog_path); - goto out; - } - - priv->changelog_fd = fd; + int fd = 0; + int ret = -1; + int flags = 0; + char buffer[1024] = { + 0, + }; + char changelog_path[PATH_MAX] = { + 0, + }; + + (void)snprintf(changelog_path, PATH_MAX, "%s/" CHANGELOG_FILE_NAME, + priv->changelog_dir); + + flags |= (O_CREAT | O_RDWR); + if (priv->fsync_interval == 0) + flags |= O_SYNC; + + fd = open(changelog_path, flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); + if (fd < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_OPEN_FAILED, + "path=%s", changelog_path, NULL); + goto out; + } + + priv->changelog_fd = fd; + + (void)snprintf(buffer, 1024, CHANGELOG_HEADER, CHANGELOG_VERSION_MAJOR, + CHANGELOG_VERSION_MINOR, priv->ce->encoder); + ret = changelog_write_change(priv, buffer, strlen(buffer)); + if (ret) { + sys_close(priv->changelog_fd); + priv->changelog_fd = -1; + goto out; + } + + ret = 0; - (void) snprintf (buffer, 1024, CHANGELOG_HEADER, - CHANGELOG_VERSION_MAJOR, - CHANGELOG_VERSION_MINOR, - priv->ce->encoder); - ret = changelog_write_change (priv, buffer, strlen (buffer)); - if (ret) { - sys_close (priv->changelog_fd); - priv->changelog_fd = -1; - goto out; - } - - ret = 0; - - out: - return ret; +out: + return ret; } int -changelog_start_next_change (xlator_t *this, - changelog_priv_t *priv, - unsigned long ts, gf_boolean_t finale) +changelog_start_next_change(xlator_t *this, changelog_priv_t *priv, time_t ts, + gf_boolean_t finale) { - int ret = -1; + int ret = -1; - ret = changelog_rollover_changelog (this, priv, ts); + ret = changelog_rollover_changelog(this, priv, ts); - if (!ret && !finale) - ret = changelog_open_journal (this, priv); + if (!ret && !finale) + ret = changelog_open_journal(this, priv); - return ret; + return ret; } /** * return the length of entry */ size_t -changelog_entry_length () +changelog_entry_length() { - return sizeof (changelog_log_data_t); + return sizeof(changelog_log_data_t); } -int -changelog_fill_rollover_data (changelog_log_data_t *cld, gf_boolean_t is_last) +void +changelog_fill_rollover_data(changelog_log_data_t *cld, gf_boolean_t is_last) { - struct timeval tv = {0,}; - - cld->cld_type = CHANGELOG_TYPE_ROLLOVER; - - if (gettimeofday (&tv, NULL)) - return -1; - - cld->cld_roll_time = (unsigned long) tv.tv_sec; - cld->cld_finale = is_last; - return 0; + cld->cld_type = CHANGELOG_TYPE_ROLLOVER; + cld->cld_roll_time = gf_time(); + cld->cld_finale = is_last; } int -changelog_snap_write_change (changelog_priv_t *priv, char *buffer, size_t len) +changelog_snap_write_change(changelog_priv_t *priv, char *buffer, size_t len) { - return changelog_write (priv->c_snap_fd, buffer, len); + return changelog_write(priv->c_snap_fd, buffer, len); } int -changelog_write_change (changelog_priv_t *priv, char *buffer, size_t len) +changelog_write_change(changelog_priv_t *priv, char *buffer, size_t len) { - return changelog_write (priv->changelog_fd, buffer, len); + return changelog_write(priv->changelog_fd, buffer, len); } /* @@ -1009,249 +1011,230 @@ changelog_write_change (changelog_priv_t *priv, char *buffer, size_t len) * -1 : On Failure. */ int -changelog_snap_handle_ascii_change (xlator_t *this, - changelog_log_data_t *cld) +changelog_snap_handle_ascii_change(xlator_t *this, changelog_log_data_t *cld) { - size_t off = 0; - size_t gfid_len = 0; - char *gfid_str = NULL; - char *buffer = NULL; - changelog_priv_t *priv = NULL; - int ret = 0; - - if (this == NULL) { - ret = -1; - goto out; - } - - priv = this->private; - - if (priv == NULL) { - ret = -1; - goto out; - } - - gfid_str = uuid_utoa (cld->cld_gfid); - gfid_len = strlen (gfid_str); - - /* extra bytes for decorations */ - buffer = alloca (gfid_len + cld->cld_ptr_len + 10); - CHANGELOG_STORE_ASCII (priv, buffer, - off, gfid_str, gfid_len, cld); - - CHANGELOG_FILL_BUFFER (buffer, off, "\0", 1); - - ret = changelog_snap_write_change (priv, buffer, off); - - if (ret < 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_WRITE_FAILED, - "error writing csnap to disk"); - } - gf_msg (this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_SNAP_INFO, - "Successfully wrote to csnap"); - ret = 0; + size_t off = 0; + size_t gfid_len = 0; + char *gfid_str = NULL; + char *buffer = NULL; + changelog_priv_t *priv = NULL; + int ret = 0; + + if (this == NULL) { + ret = -1; + goto out; + } + + priv = this->private; + + if (priv == NULL) { + ret = -1; + goto out; + } + + gfid_str = uuid_utoa(cld->cld_gfid); + gfid_len = strlen(gfid_str); + + /* extra bytes for decorations */ + buffer = alloca(gfid_len + cld->cld_ptr_len + 10); + CHANGELOG_STORE_ASCII(priv, buffer, off, gfid_str, gfid_len, cld); + + CHANGELOG_FILL_BUFFER(buffer, off, "\0", 1); + + ret = changelog_snap_write_change(priv, buffer, off); + + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_WRITE_FAILED, + "csnap", NULL); + } + gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_WROTE_TO_CSNAP, NULL); + ret = 0; out: - return ret; + return ret; } int -changelog_handle_change (xlator_t *this, - changelog_priv_t *priv, changelog_log_data_t *cld) +changelog_handle_change(xlator_t *this, changelog_priv_t *priv, + changelog_log_data_t *cld) { - int ret = 0; - - if (CHANGELOG_TYPE_IS_ROLLOVER (cld->cld_type)) { - changelog_encode_change (priv); - ret = changelog_start_next_change (this, priv, - cld->cld_roll_time, - cld->cld_finale); - if (ret) - gf_msg (this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_GET_TIME_OP_FAILED, - "Problem rolling over changelog(s)"); - goto out; - } + int ret = 0; - /** - * case when there is reconfigure done (disabling changelog) and there - * are still fops that have updates in prgress. - */ - if (priv->changelog_fd == -1) - return 0; - - if (CHANGELOG_TYPE_IS_FSYNC (cld->cld_type)) { - ret = sys_fsync (priv->changelog_fd); - if (ret < 0) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_FSYNC_OP_FAILED, - "fsync failed"); - } - goto out; - } + if (CHANGELOG_TYPE_IS_ROLLOVER(cld->cld_type)) { + changelog_encode_change(priv); + ret = changelog_start_next_change(this, priv, cld->cld_roll_time, + cld->cld_finale); + if (ret) + gf_smsg(this->name, GF_LOG_ERROR, 0, + CHANGELOG_MSG_GET_TIME_OP_FAILED, NULL); + goto out; + } + + /** + * case when there is reconfigure done (disabling changelog) and there + * are still fops that have updates in prgress. + */ + if (priv->changelog_fd == -1) + return 0; - ret = priv->ce->encode (this, cld); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_WRITE_FAILED, - "error writing changelog to disk"); + if (CHANGELOG_TYPE_IS_FSYNC(cld->cld_type)) { + ret = sys_fsync(priv->changelog_fd); + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_FSYNC_OP_FAILED, NULL); } + goto out; + } - out: - return ret; + ret = priv->ce->encode(this, cld); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_WRITE_FAILED, + "changelog", NULL); + } + +out: + return ret; } changelog_local_t * -changelog_local_init (xlator_t *this, inode_t *inode, - uuid_t gfid, int xtra_records, - gf_boolean_t update_flag) +changelog_local_init(xlator_t *this, inode_t *inode, uuid_t gfid, + int xtra_records, gf_boolean_t update_flag) { - changelog_local_t *local = NULL; - struct iobuf *iobuf = NULL; + changelog_local_t *local = NULL; + struct iobuf *iobuf = NULL; - /** - * We relax the presence of inode if @update_flag is true. - * The caller (implmentation of the fop) needs to be careful to - * not blindly use local->inode. - */ - if (!update_flag && !inode) { - gf_msg_callingfn (this->name, GF_LOG_WARNING, 0, - CHANGELOG_MSG_INODE_NOT_FOUND, - "inode needed for version checking !!!"); - goto out; - } + /** + * We relax the presence of inode if @update_flag is true. + * The caller (implementation of the fop) needs to be careful to + * not blindly use local->inode. + */ + if (!update_flag && !inode) { + gf_msg_callingfn(this->name, GF_LOG_WARNING, 0, + CHANGELOG_MSG_INODE_NOT_FOUND, + "inode needed for version checking !!!"); - if (xtra_records) { - iobuf = iobuf_get2 (this->ctx->iobuf_pool, - xtra_records * CHANGELOG_OPT_RECORD_LEN); - if (!iobuf) - goto out; - } + goto out; + } - local = mem_get0 (this->local_pool); - if (!local) { - CHANGELOG_IOBUF_UNREF (iobuf); - goto out; - } + if (xtra_records) { + iobuf = iobuf_get2(this->ctx->iobuf_pool, + xtra_records * CHANGELOG_OPT_RECORD_LEN); + if (!iobuf) + goto out; + } - local->update_no_check = update_flag; + local = mem_get0(this->local_pool); + if (!local) { + CHANGELOG_IOBUF_UNREF(iobuf); + goto out; + } - gf_uuid_copy (local->cld.cld_gfid, gfid); + local->update_no_check = update_flag; - local->cld.cld_iobuf = iobuf; - local->cld.cld_xtra_records = 0; /* set by the caller */ + gf_uuid_copy(local->cld.cld_gfid, gfid); - if (inode) - local->inode = inode_ref (inode); + local->cld.cld_iobuf = iobuf; + local->cld.cld_xtra_records = 0; /* set by the caller */ - out: - return local; + if (inode) + local->inode = inode_ref(inode); + +out: + return local; } int -changelog_forget (xlator_t *this, inode_t *inode) +changelog_forget(xlator_t *this, inode_t *inode) { - uint64_t ctx_addr = 0; - changelog_inode_ctx_t *ctx = NULL; + uint64_t ctx_addr = 0; + changelog_inode_ctx_t *ctx = NULL; - inode_ctx_del (inode, this, &ctx_addr); - if (!ctx_addr) - return 0; + inode_ctx_del(inode, this, &ctx_addr); + if (!ctx_addr) + return 0; - ctx = (changelog_inode_ctx_t *) (long) ctx_addr; - GF_FREE (ctx); + ctx = (changelog_inode_ctx_t *)(long)ctx_addr; + GF_FREE(ctx); - return 0; + return 0; } int -changelog_inject_single_event (xlator_t *this, - changelog_priv_t *priv, - changelog_log_data_t *cld) +changelog_inject_single_event(xlator_t *this, changelog_priv_t *priv, + changelog_log_data_t *cld) { - return priv->cd.dispatchfn (this, priv, priv->cd.cd_data, cld, NULL); + return priv->cd.dispatchfn(this, priv, priv->cd.cd_data, cld, NULL); } /* Wait till all the black fops are drained */ void -changelog_drain_black_fops (xlator_t *this, changelog_priv_t *priv) +changelog_drain_black_fops(xlator_t *this, changelog_priv_t *priv) { - int ret = 0; - - /* clean up framework of pthread_mutex is required here as - * 'reconfigure' terminates the changelog_rollover thread - * on graph change. - */ - pthread_cleanup_push (changelog_cleanup_free_mutex, - &priv->dm.drain_black_mutex); - ret = pthread_mutex_lock (&priv->dm.drain_black_mutex); - if (ret) - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_PTHREAD_ERROR, "pthread error:" - " Error:%d", ret); - while (priv->dm.black_fop_cnt > 0) { - gf_msg_debug (this->name, 0, - "Condtional wait on black fops: %ld", - priv->dm.black_fop_cnt); - priv->dm.drain_wait_black = _gf_true; - ret = pthread_cond_wait (&priv->dm.drain_black_cond, - &priv->dm.drain_black_mutex); - if (ret) - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_PTHREAD_COND_WAIT_FAILED, - "pthread cond wait failed: Error:%d", - ret); - } - priv->dm.drain_wait_black = _gf_false; - ret = pthread_mutex_unlock (&priv->dm.drain_black_mutex); + int ret = 0; + + /* clean up framework of pthread_mutex is required here as + * 'reconfigure' terminates the changelog_rollover thread + * on graph change. + */ + pthread_cleanup_push(changelog_cleanup_free_mutex, + &priv->dm.drain_black_mutex); + ret = pthread_mutex_lock(&priv->dm.drain_black_mutex); + if (ret) + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_PTHREAD_ERROR, + "error=%d", ret, NULL); + while (priv->dm.black_fop_cnt > 0) { + gf_msg_debug(this->name, 0, "Conditional wait on black fops: %ld", + priv->dm.black_fop_cnt); + priv->dm.drain_wait_black = _gf_true; + ret = pthread_cond_wait(&priv->dm.drain_black_cond, + &priv->dm.drain_black_mutex); if (ret) - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_PTHREAD_ERROR, "pthread error:" - " Error:%d", ret); - pthread_cleanup_pop (0); - gf_msg_debug (this->name, 0, - "Woke up: Conditional wait on black fops"); + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_PTHREAD_COND_WAIT_FAILED, "error=%d", ret, + NULL); + } + priv->dm.drain_wait_black = _gf_false; + ret = pthread_mutex_unlock(&priv->dm.drain_black_mutex); + if (ret) + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_PTHREAD_ERROR, + "error=%d", ret, NULL); + pthread_cleanup_pop(0); + gf_msg_debug(this->name, 0, "Woke up: Conditional wait on black fops"); } /* Wait till all the white fops are drained */ void -changelog_drain_white_fops (xlator_t *this, changelog_priv_t *priv) +changelog_drain_white_fops(xlator_t *this, changelog_priv_t *priv) { - int ret = 0; - - /* clean up framework of pthread_mutex is required here as - * 'reconfigure' terminates the changelog_rollover thread - * on graph change. - */ - pthread_cleanup_push (changelog_cleanup_free_mutex, - &priv->dm.drain_white_mutex); - ret = pthread_mutex_lock (&priv->dm.drain_white_mutex); + int ret = 0; + + /* clean up framework of pthread_mutex is required here as + * 'reconfigure' terminates the changelog_rollover thread + * on graph change. + */ + pthread_cleanup_push(changelog_cleanup_free_mutex, + &priv->dm.drain_white_mutex); + ret = pthread_mutex_lock(&priv->dm.drain_white_mutex); + if (ret) + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_PTHREAD_ERROR, + "error=%d", ret, NULL); + while (priv->dm.white_fop_cnt > 0) { + gf_msg_debug(this->name, 0, "Conditional wait on white fops : %ld", + priv->dm.white_fop_cnt); + priv->dm.drain_wait_white = _gf_true; + ret = pthread_cond_wait(&priv->dm.drain_white_cond, + &priv->dm.drain_white_mutex); if (ret) - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_PTHREAD_ERROR, "pthread error:" - " Error:%d", ret); - while (priv->dm.white_fop_cnt > 0) { - gf_msg_debug (this->name, 0, - "Condtional wait on white fops : %ld", - priv->dm.white_fop_cnt); - priv->dm.drain_wait_white = _gf_true; - ret = pthread_cond_wait (&priv->dm.drain_white_cond, - &priv->dm.drain_white_mutex); - if (ret) - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_PTHREAD_COND_WAIT_FAILED, - "pthread cond wait failed: Error:%d", - ret); - } - priv->dm.drain_wait_white = _gf_false; - ret = pthread_mutex_unlock (&priv->dm.drain_white_mutex); - if (ret) - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_PTHREAD_ERROR, "pthread error:" - " Error:%d", ret); - pthread_cleanup_pop (0); - gf_msg_debug (this->name, 0, - "Woke up: Conditional wait on white fops"); + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_PTHREAD_COND_WAIT_FAILED, "error=%d", ret, + NULL); + } + priv->dm.drain_wait_white = _gf_false; + ret = pthread_mutex_unlock(&priv->dm.drain_white_mutex); + if (ret) + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_PTHREAD_ERROR, + "error=%d", ret, NULL); + pthread_cleanup_pop(0); + gf_msg_debug(this->name, 0, "Woke up: Conditional wait on white fops"); } /** @@ -1259,204 +1242,194 @@ changelog_drain_white_fops (xlator_t *this, changelog_priv_t *priv) * a certain time etc..). move them into separate routine. */ void * -changelog_rollover (void *data) +changelog_rollover(void *data) { - int ret = 0; - xlator_t *this = NULL; - struct timeval tv = {0,}; - changelog_log_data_t cld = {0,}; - changelog_time_slice_t *slice = NULL; - changelog_priv_t *priv = data; - int max_fd = 0; - char buf[1] = {0}; - int len = 0; - - fd_set rset; - - this = priv->cr.this; - slice = &priv->slice; - - while (1) { - (void) pthread_testcancel(); - - tv.tv_sec = priv->rollover_time; - tv.tv_usec = 0; - FD_ZERO(&rset); - FD_SET(priv->cr.rfd, &rset); - max_fd = priv->cr.rfd; - max_fd = max_fd + 1; - - /* It seems there is a race between actual rollover and explicit - * rollover. But it is handled. If actual rollover is being - * done and the explicit rollover event comes, the event is - * not missed. The next select will immediately wakeup to - * handle explicit wakeup. - */ - - ret = select (max_fd, &rset, NULL, NULL, &tv); - if (ret == -1) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_SELECT_FAILED, - "select failed"); - continue; - } else if (ret && FD_ISSET(priv->cr.rfd, &rset)) { - gf_msg (this->name, GF_LOG_INFO, 0, - CHANGELOG_MSG_BARRIER_INFO, - "Explicit wakeup of select on barrier notify"); - len = sys_read (priv->cr.rfd, buf, 1); - if (len == 0) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_READ_ERROR, "BUG: Got EOF" - " from reconfigure notification pipe"); - continue; - } - if (len < 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_READ_ERROR, - "Failed to read wakeup data"); - continue; - } - /* Lock is not required as same thread is modifying.*/ - priv->explicit_rollover = _gf_true; - } else { - gf_msg_debug (this->name, 0, - "select wokeup on timeout"); - } - - /* Reading curent_color without lock is fine here - * as it is only modified here and is next to reading. - */ - if (priv->current_color == FOP_COLOR_BLACK) { - LOCK(&priv->lock); - priv->current_color = FOP_COLOR_WHITE; - UNLOCK(&priv->lock); - gf_msg_debug (this->name, 0, "Black fops" - " to be drained:%ld", - priv->dm.black_fop_cnt); - changelog_drain_black_fops (this, priv); - } else { - LOCK(&priv->lock); - priv->current_color = FOP_COLOR_BLACK; - UNLOCK(&priv->lock); - gf_msg_debug (this->name, 0, "White fops" - " to be drained:%ld", - priv->dm.white_fop_cnt); - changelog_drain_white_fops (this, priv); - } - - /* Adding delay of 1 second only during explicit rollover: - * - * Changelog rollover can happen either due to actual - * or the explict rollover during snapshot. Actual - * rollover is controlled by tuneable called 'rollover-time'. - * The minimum granularity for rollover-time is 1 second. - * Explicit rollover is asynchronous in nature and happens - * during snapshot. - * - * Basically, rollover renames the current CHANGELOG file - * to CHANGELOG.TIMESTAMP. Let's assume, at time 't1', - * actual and explicit rollover raced against each - * other and actual rollover won the race renaming the - * CHANGELOG file to CHANGELOG.t1 and opens a new - * CHANGELOG file. There is high chance that, an immediate - * explicit rollover at time 't1' can happen with in the same - * second to rename CHANGELOG file to CHANGELOG.t1 resulting in - * purging the earlier CHANGELOG.t1 file created by actual - * rollover. So adding a delay of 1 second guarantees unique - * CHANGELOG.TIMESTAMP during explicit rollover. - */ - if (priv->explicit_rollover == _gf_true) - sleep (1); - - ret = changelog_fill_rollover_data (&cld, _gf_false); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_GET_TIME_OP_FAILED, - "failed to fill rollover data"); - continue; - } + int ret = 0; + xlator_t *this = NULL; + struct timespec tv = { + 0, + }; + changelog_log_data_t cld = { + 0, + }; + changelog_time_slice_t *slice = NULL; + changelog_priv_t *priv = data; + + this = priv->cr.this; + slice = &priv->slice; + + while (1) { + (void)pthread_testcancel(); + + tv.tv_sec = gf_time() + priv->rollover_time; + tv.tv_nsec = 0; + ret = 0; /* Reset ret to zero */ + + /* The race between actual rollover and explicit rollover is + * handled. If actual rollover is being done and the + * explicit rollover event comes, the event is not missed. + * Since explicit rollover sets 'cr.notify' to true, this + * thread doesn't wait on 'pthread_cond_timedwait'. + */ + pthread_cleanup_push(changelog_cleanup_free_mutex, &priv->cr.lock); + pthread_mutex_lock(&priv->cr.lock); + { + while (ret == 0 && !priv->cr.notify) + ret = pthread_cond_timedwait(&priv->cr.cond, &priv->cr.lock, + &tv); + if (ret == 0) + priv->cr.notify = _gf_false; + } + pthread_mutex_unlock(&priv->cr.lock); + pthread_cleanup_pop(0); + + if (ret == 0) { + gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_BARRIER_INFO, + NULL); + priv->explicit_rollover = _gf_true; + } else if (ret && ret != ETIMEDOUT) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_SELECT_FAILED, NULL); + continue; + } else if (ret && ret == ETIMEDOUT) { + gf_msg_debug(this->name, 0, "Wokeup on timeout"); + } + + /* Reading curent_color without lock is fine here + * as it is only modified here and is next to reading. + */ + if (priv->current_color == FOP_COLOR_BLACK) { + LOCK(&priv->lock); + priv->current_color = FOP_COLOR_WHITE; + UNLOCK(&priv->lock); + gf_msg_debug(this->name, 0, + "Black fops" + " to be drained:%ld", + priv->dm.black_fop_cnt); + changelog_drain_black_fops(this, priv); + } else { + LOCK(&priv->lock); + priv->current_color = FOP_COLOR_BLACK; + UNLOCK(&priv->lock); + gf_msg_debug(this->name, 0, + "White fops" + " to be drained:%ld", + priv->dm.white_fop_cnt); + changelog_drain_white_fops(this, priv); + } + + /* Adding delay of 1 second only during explicit rollover: + * + * Changelog rollover can happen either due to actual + * or the explicit rollover during snapshot. Actual + * rollover is controlled by tuneable called 'rollover-time'. + * The minimum granularity for rollover-time is 1 second. + * Explicit rollover is asynchronous in nature and happens + * during snapshot. + * + * Basically, rollover renames the current CHANGELOG file + * to CHANGELOG.TIMESTAMP. Let's assume, at time 't1', + * actual and explicit rollover raced against each + * other and actual rollover won the race renaming the + * CHANGELOG file to CHANGELOG.t1 and opens a new + * CHANGELOG file. There is high chance that, an immediate + * explicit rollover at time 't1' can happen with in the same + * second to rename CHANGELOG file to CHANGELOG.t1 resulting in + * purging the earlier CHANGELOG.t1 file created by actual + * rollover. So adding a delay of 1 second guarantees unique + * CHANGELOG.TIMESTAMP during explicit rollover. + */ + if (priv->explicit_rollover == _gf_true) + sleep(1); - _mask_cancellation (); + changelog_fill_rollover_data(&cld, _gf_false); - LOCK (&priv->lock); - { - ret = changelog_inject_single_event (this, priv, &cld); - if (!ret) - SLICE_VERSION_UPDATE (slice); - } - UNLOCK (&priv->lock); + _mask_cancellation(); - _unmask_cancellation (); + LOCK(&priv->lock); + { + ret = changelog_inject_single_event(this, priv, &cld); + if (!ret) + SLICE_VERSION_UPDATE(slice); } + UNLOCK(&priv->lock); - return NULL; + _unmask_cancellation(); + } + + return NULL; } void * -changelog_fsync_thread (void *data) +changelog_fsync_thread(void *data) { - int ret = 0; - xlator_t *this = NULL; - struct timeval tv = {0,}; - changelog_log_data_t cld = {0,}; - changelog_priv_t *priv = data; - - this = priv->cf.this; - cld.cld_type = CHANGELOG_TYPE_FSYNC; - - while (1) { - (void) pthread_testcancel(); - - tv.tv_sec = priv->fsync_interval; - tv.tv_usec = 0; + int ret = 0; + xlator_t *this = NULL; + struct timeval tv = { + 0, + }; + changelog_log_data_t cld = { + 0, + }; + changelog_priv_t *priv = data; + + this = priv->cf.this; + cld.cld_type = CHANGELOG_TYPE_FSYNC; + + while (1) { + (void)pthread_testcancel(); + + tv.tv_sec = priv->fsync_interval; + tv.tv_usec = 0; + + ret = select(0, NULL, NULL, NULL, &tv); + if (ret) + continue; - ret = select (0, NULL, NULL, NULL, &tv); - if (ret) - continue; + _mask_cancellation(); - _mask_cancellation (); + ret = changelog_inject_single_event(this, priv, &cld); + if (ret) + gf_smsg(this->name, GF_LOG_ERROR, 0, + CHANGELOG_MSG_INJECT_FSYNC_FAILED, NULL); - ret = changelog_inject_single_event (this, priv, &cld); - if (ret) - gf_msg (this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_INJECT_FSYNC_FAILED, - "failed to inject fsync event"); + _unmask_cancellation(); + } - _unmask_cancellation (); - } - - return NULL; + return NULL; } /* macros for inode/changelog version checks */ -#define INODE_VERSION_UPDATE(priv, inode, iver, slice, type) do { \ - LOCK (&inode->lock); \ - { \ - LOCK (&priv->lock); \ - { \ - *iver = slice->changelog_version[type]; \ - } \ - UNLOCK (&priv->lock); \ - } \ - UNLOCK (&inode->lock); \ - } while (0) - -#define INODE_VERSION_EQUALS_SLICE(priv, ver, slice, type, upd) do { \ - LOCK (&priv->lock); \ - { \ - upd = (ver == slice->changelog_version[type]) \ - ? _gf_false : _gf_true; \ - } \ - UNLOCK (&priv->lock); \ - } while (0) +#define INODE_VERSION_UPDATE(priv, inode, iver, slice, type) \ + do { \ + LOCK(&inode->lock); \ + { \ + LOCK(&priv->lock); \ + { \ + *iver = slice->changelog_version[type]; \ + } \ + UNLOCK(&priv->lock); \ + } \ + UNLOCK(&inode->lock); \ + } while (0) + +#define INODE_VERSION_EQUALS_SLICE(priv, ver, slice, type, upd) \ + do { \ + LOCK(&priv->lock); \ + { \ + upd = (ver == slice->changelog_version[type]) ? _gf_false \ + : _gf_true; \ + } \ + UNLOCK(&priv->lock); \ + } while (0) static int -__changelog_inode_ctx_set (xlator_t *this, - inode_t *inode, changelog_inode_ctx_t *ctx) +__changelog_inode_ctx_set(xlator_t *this, inode_t *inode, + changelog_inode_ctx_t *ctx) { - uint64_t ctx_addr = (uint64_t) ctx; - return __inode_ctx_set (inode, this, &ctx_addr); + uint64_t ctx_addr = (uint64_t)(uintptr_t)ctx; + return __inode_ctx_set(inode, this, &ctx_addr); } /** @@ -1464,56 +1437,53 @@ __changelog_inode_ctx_set (xlator_t *this, * for a particular type. */ changelog_inode_ctx_t * -__changelog_inode_ctx_get (xlator_t *this, - inode_t *inode, unsigned long **iver, - unsigned long *version, changelog_log_type type) +__changelog_inode_ctx_get(xlator_t *this, inode_t *inode, unsigned long **iver, + unsigned long *version, changelog_log_type type) { - int ret = 0; - uint64_t ctx_addr = 0; - changelog_inode_ctx_t *ctx = NULL; - - ret = __inode_ctx_get (inode, this, &ctx_addr); - if (ret < 0) - ctx_addr = 0; - if (ctx_addr != 0) { - ctx = (changelog_inode_ctx_t *) (long)ctx_addr; - goto out; - } - - ctx = GF_CALLOC (1, sizeof (*ctx), gf_changelog_mt_inode_ctx_t); - if (!ctx) - goto out; + int ret = 0; + uint64_t ctx_addr = 0; + changelog_inode_ctx_t *ctx = NULL; + + ret = __inode_ctx_get(inode, this, &ctx_addr); + if (ret < 0) + ctx_addr = 0; + if (ctx_addr != 0) { + ctx = (changelog_inode_ctx_t *)(long)ctx_addr; + goto out; + } + + ctx = GF_CALLOC(1, sizeof(*ctx), gf_changelog_mt_inode_ctx_t); + if (!ctx) + goto out; + + ret = __changelog_inode_ctx_set(this, inode, ctx); + if (ret) { + GF_FREE(ctx); + ctx = NULL; + } - ret = __changelog_inode_ctx_set (this, inode, ctx); - if (ret) { - GF_FREE (ctx); - ctx = NULL; - } - - out: - if (ctx && iver && version) { - *iver = CHANGELOG_INODE_VERSION_TYPE (ctx, type); - *version = **iver; - } +out: + if (ctx && iver && version) { + *iver = CHANGELOG_INODE_VERSION_TYPE(ctx, type); + *version = **iver; + } - return ctx; + return ctx; } static changelog_inode_ctx_t * -changelog_inode_ctx_get (xlator_t *this, - inode_t *inode, unsigned long **iver, - unsigned long *version, changelog_log_type type) +changelog_inode_ctx_get(xlator_t *this, inode_t *inode, unsigned long **iver, + unsigned long *version, changelog_log_type type) { - changelog_inode_ctx_t *ctx = NULL; + changelog_inode_ctx_t *ctx = NULL; - LOCK (&inode->lock); - { - ctx = __changelog_inode_ctx_get (this, - inode, iver, version, type); - } - UNLOCK (&inode->lock); + LOCK(&inode->lock); + { + ctx = __changelog_inode_ctx_get(this, inode, iver, version, type); + } + UNLOCK(&inode->lock); - return ctx; + return ctx; } /** @@ -1617,59 +1587,57 @@ changelog_inode_ctx_get (xlator_t *this, * signifies an update was recorded in the current time slice). */ void -changelog_update (xlator_t *this, changelog_priv_t *priv, - changelog_local_t *local, changelog_log_type type) +changelog_update(xlator_t *this, changelog_priv_t *priv, + changelog_local_t *local, changelog_log_type type) { - int ret = 0; - unsigned long *iver = NULL; - unsigned long version = 0; - inode_t *inode = NULL; - changelog_time_slice_t *slice = NULL; - changelog_inode_ctx_t *ctx = NULL; - changelog_log_data_t *cld_0 = NULL; - changelog_log_data_t *cld_1 = NULL; - changelog_local_t *next_local = NULL; - gf_boolean_t need_upd = _gf_true; - - slice = &priv->slice; - - /** - * for fops that do not require inode version checking - */ - if (local->update_no_check) - goto update; + int ret = 0; + unsigned long *iver = NULL; + unsigned long version = 0; + inode_t *inode = NULL; + changelog_time_slice_t *slice = NULL; + changelog_inode_ctx_t *ctx = NULL; + changelog_log_data_t *cld_0 = NULL; + changelog_log_data_t *cld_1 = NULL; + changelog_local_t *next_local = NULL; + gf_boolean_t need_upd = _gf_true; - inode = local->inode; + slice = &priv->slice; - ctx = changelog_inode_ctx_get (this, - inode, &iver, &version, type); - if (!ctx) - goto update; + /** + * for fops that do not require inode version checking + */ + if (local->update_no_check) + goto update; - INODE_VERSION_EQUALS_SLICE (priv, version, slice, type, need_upd); + inode = local->inode; - update: - if (need_upd) { - cld_0 = &local->cld; - cld_0->cld_type = type; + ctx = changelog_inode_ctx_get(this, inode, &iver, &version, type); + if (!ctx) + goto update; - if ( (next_local = local->prev_entry) != NULL ) { - cld_1 = &next_local->cld; - cld_1->cld_type = type; - } + INODE_VERSION_EQUALS_SLICE(priv, version, slice, type, need_upd); - ret = priv->cd.dispatchfn (this, priv, - priv->cd.cd_data, cld_0, cld_1); +update: + if (need_upd) { + cld_0 = &local->cld; + cld_0->cld_type = type; - /** - * update after the dispatcher has successfully done - * it's job. - */ - if (!local->update_no_check && iver && !ret) - INODE_VERSION_UPDATE (priv, inode, iver, slice, type); + if ((next_local = local->prev_entry) != NULL) { + cld_1 = &next_local->cld; + cld_1->cld_type = type; } - return; + ret = priv->cd.dispatchfn(this, priv, priv->cd.cd_data, cld_0, cld_1); + + /** + * update after the dispatcher has successfully done + * it's job. + */ + if (!local->update_no_check && iver && !ret) + INODE_VERSION_UPDATE(priv, inode, iver, slice, type); + } + + return; } /* Begin: Geo-rep snapshot dependency changes */ @@ -1685,223 +1653,221 @@ changelog_update (xlator_t *this, changelog_priv_t *priv, */ void -changelog_color_fop_and_inc_cnt (xlator_t *this, changelog_priv_t *priv, - changelog_local_t *local) +changelog_color_fop_and_inc_cnt(xlator_t *this, changelog_priv_t *priv, + changelog_local_t *local) { - if (!priv || !local) - return; + if (!priv || !local) + return; - LOCK (&priv->lock); - { - local->color = priv->current_color; - changelog_inc_fop_cnt (this, priv, local); - } - UNLOCK (&priv->lock); + LOCK(&priv->lock); + { + local->color = priv->current_color; + changelog_inc_fop_cnt(this, priv, local); + } + UNLOCK(&priv->lock); } /* Increments the respective fop counter based on the fop color */ void -changelog_inc_fop_cnt (xlator_t *this, changelog_priv_t *priv, - changelog_local_t *local) +changelog_inc_fop_cnt(xlator_t *this, changelog_priv_t *priv, + changelog_local_t *local) { - int ret = 0; - - if (local) { - if (local->color == FOP_COLOR_BLACK) { - ret = pthread_mutex_lock (&priv->dm.drain_black_mutex); - CHANGELOG_PTHREAD_ERROR_HANDLE_0 (ret, out); - { - priv->dm.black_fop_cnt++; - } - ret = pthread_mutex_unlock(&priv->dm.drain_black_mutex); - CHANGELOG_PTHREAD_ERROR_HANDLE_0 (ret, out); - } else { - ret = pthread_mutex_lock (&priv->dm.drain_white_mutex); - CHANGELOG_PTHREAD_ERROR_HANDLE_0 (ret, out); - { - priv->dm.white_fop_cnt++; - } - ret = pthread_mutex_unlock(&priv->dm.drain_white_mutex); - CHANGELOG_PTHREAD_ERROR_HANDLE_0 (ret, out); - } - } - out: - return; + int ret = 0; + + if (local) { + if (local->color == FOP_COLOR_BLACK) { + ret = pthread_mutex_lock(&priv->dm.drain_black_mutex); + CHANGELOG_PTHREAD_ERROR_HANDLE_0(ret, out); + { + priv->dm.black_fop_cnt++; + } + ret = pthread_mutex_unlock(&priv->dm.drain_black_mutex); + CHANGELOG_PTHREAD_ERROR_HANDLE_0(ret, out); + } else { + ret = pthread_mutex_lock(&priv->dm.drain_white_mutex); + CHANGELOG_PTHREAD_ERROR_HANDLE_0(ret, out); + { + priv->dm.white_fop_cnt++; + } + ret = pthread_mutex_unlock(&priv->dm.drain_white_mutex); + CHANGELOG_PTHREAD_ERROR_HANDLE_0(ret, out); + } + } +out: + return; } /* Decrements the respective fop counter based on the fop color */ void -changelog_dec_fop_cnt (xlator_t *this, changelog_priv_t *priv, - changelog_local_t *local) +changelog_dec_fop_cnt(xlator_t *this, changelog_priv_t *priv, + changelog_local_t *local) { - int ret = 0; - - if (local) { - if (local->color == FOP_COLOR_BLACK) { - ret = pthread_mutex_lock (&priv->dm.drain_black_mutex); - CHANGELOG_PTHREAD_ERROR_HANDLE_0 (ret, out); - { - priv->dm.black_fop_cnt--; - if (priv->dm.black_fop_cnt == 0 && - priv->dm.drain_wait_black == _gf_true) { - ret = pthread_cond_signal ( - &priv->dm.drain_black_cond); - CHANGELOG_PTHREAD_ERROR_HANDLE_0 (ret, - out); - gf_msg_debug (this->name, 0, - "Signalled " - "draining of black"); - } - } - ret = pthread_mutex_unlock(&priv->dm.drain_black_mutex); - CHANGELOG_PTHREAD_ERROR_HANDLE_0 (ret, out); - } else { - ret = pthread_mutex_lock (&priv->dm.drain_white_mutex); - CHANGELOG_PTHREAD_ERROR_HANDLE_0 (ret, out); - { - priv->dm.white_fop_cnt--; - if (priv->dm.white_fop_cnt == 0 && - priv->dm.drain_wait_white == _gf_true) { - ret = pthread_cond_signal ( - &priv->dm.drain_white_cond); - CHANGELOG_PTHREAD_ERROR_HANDLE_0 (ret, - out); - gf_msg_debug (this->name, 0, - "Signalled " - "draining of white"); - } - } - ret = pthread_mutex_unlock(&priv->dm.drain_white_mutex); - CHANGELOG_PTHREAD_ERROR_HANDLE_0 (ret, out); + int ret = 0; + + if (local) { + if (local->color == FOP_COLOR_BLACK) { + ret = pthread_mutex_lock(&priv->dm.drain_black_mutex); + CHANGELOG_PTHREAD_ERROR_HANDLE_0(ret, out); + { + priv->dm.black_fop_cnt--; + if (priv->dm.black_fop_cnt == 0 && + priv->dm.drain_wait_black == _gf_true) { + ret = pthread_cond_signal(&priv->dm.drain_black_cond); + CHANGELOG_PTHREAD_ERROR_HANDLE_2( + ret, out, priv->dm.drain_black_mutex); + gf_msg_debug(this->name, 0, + "Signalled " + "draining of black"); } + } + ret = pthread_mutex_unlock(&priv->dm.drain_black_mutex); + CHANGELOG_PTHREAD_ERROR_HANDLE_0(ret, out); + } else { + ret = pthread_mutex_lock(&priv->dm.drain_white_mutex); + CHANGELOG_PTHREAD_ERROR_HANDLE_0(ret, out); + { + priv->dm.white_fop_cnt--; + if (priv->dm.white_fop_cnt == 0 && + priv->dm.drain_wait_white == _gf_true) { + ret = pthread_cond_signal(&priv->dm.drain_white_cond); + CHANGELOG_PTHREAD_ERROR_HANDLE_2( + ret, out, priv->dm.drain_white_mutex); + gf_msg_debug(this->name, 0, + "Signalled " + "draining of white"); + } + } + ret = pthread_mutex_unlock(&priv->dm.drain_white_mutex); + CHANGELOG_PTHREAD_ERROR_HANDLE_0(ret, out); } - out: - return; + } +out: + return; } /* Write to a pipe setup between changelog main thread and changelog * rollover thread to initiate explicit rollover of changelog journal. */ int -changelog_barrier_notify (changelog_priv_t *priv, char *buf) +changelog_barrier_notify(changelog_priv_t *priv, char *buf) { - int ret = 0; - - LOCK(&priv->lock); - ret = changelog_write (priv->cr_wfd, buf, 1); - UNLOCK(&priv->lock); - return ret; + int ret = 0; + + pthread_mutex_lock(&priv->cr.lock); + { + ret = pthread_cond_signal(&priv->cr.cond); + priv->cr.notify = _gf_true; + } + pthread_mutex_unlock(&priv->cr.lock); + return ret; } /* Clean up flags set on barrier notification */ void -changelog_barrier_cleanup (xlator_t *this, changelog_priv_t *priv, - struct list_head *queue) +changelog_barrier_cleanup(xlator_t *this, changelog_priv_t *priv, + struct list_head *queue) { - int ret = 0; - - LOCK (&priv->bflags.lock); - priv->bflags.barrier_ext = _gf_false; - UNLOCK (&priv->bflags.lock); - - ret = pthread_mutex_lock (&priv->bn.bnotify_mutex); - CHANGELOG_PTHREAD_ERROR_HANDLE_0 (ret, out); - { - priv->bn.bnotify = _gf_false; - } - ret = pthread_mutex_unlock (&priv->bn.bnotify_mutex); - CHANGELOG_PTHREAD_ERROR_HANDLE_0 (ret, out); - - /* Disable changelog barrier and dequeue fops */ - LOCK (&priv->lock); - { - if (priv->barrier_enabled == _gf_true) - __chlog_barrier_disable (this, queue); - else - ret = -1; - } - UNLOCK (&priv->lock); - if (ret == 0) - chlog_barrier_dequeue_all(this, queue); + int ret = 0; + + LOCK(&priv->bflags.lock); + priv->bflags.barrier_ext = _gf_false; + UNLOCK(&priv->bflags.lock); + + ret = pthread_mutex_lock(&priv->bn.bnotify_mutex); + CHANGELOG_PTHREAD_ERROR_HANDLE_0(ret, out); + { + priv->bn.bnotify = _gf_false; + } + ret = pthread_mutex_unlock(&priv->bn.bnotify_mutex); + CHANGELOG_PTHREAD_ERROR_HANDLE_0(ret, out); + + /* Disable changelog barrier and dequeue fops */ + LOCK(&priv->lock); + { + if (priv->barrier_enabled == _gf_true) + __chlog_barrier_disable(this, queue); + else + ret = -1; + } + UNLOCK(&priv->lock); + if (ret == 0) + chlog_barrier_dequeue_all(this, queue); - out: - return; +out: + return; } /* End: Geo-Rep snapshot dependency changes */ int32_t -changelog_fill_entry_buf (call_frame_t *frame, xlator_t *this, - loc_t *loc, changelog_local_t **local) +changelog_fill_entry_buf(call_frame_t *frame, xlator_t *this, loc_t *loc, + changelog_local_t **local) { - changelog_opt_t *co = NULL; - size_t xtra_len = 0; - char *dup_path = NULL; - char *bname = NULL; - inode_t *parent = NULL; - - GF_ASSERT (this); - - parent = inode_parent (loc->inode, 0, 0); - if (!parent) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_INODE_NOT_FOUND, "Parent inode not found" - " for gfid: %s", uuid_utoa (loc->inode->gfid)); - goto err; - } - - CHANGELOG_INIT_NOCHECK (this, *local, loc->inode, loc->inode->gfid, 5); - if (!(*local)) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_LOCAL_INIT_FAILED, "changelog local" - " initiatilization failed"); - goto err; - } - - co = changelog_get_usable_buffer (*local); - if (!co) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_NO_MEMORY, - "Failed to get buffer"); - goto err; - } - - if (loc->inode->ia_type == IA_IFDIR) { - CHANGLOG_FILL_FOP_NUMBER (co, GF_FOP_MKDIR, fop_fn, xtra_len); - co++; - CHANGELOG_FILL_UINT32 (co, S_IFDIR|0755, number_fn, xtra_len); - co++; - } else { - CHANGLOG_FILL_FOP_NUMBER (co, GF_FOP_CREATE, fop_fn, xtra_len); - co++; - CHANGELOG_FILL_UINT32 (co, S_IFREG|0644, number_fn, xtra_len); - co++; - } - - CHANGELOG_FILL_UINT32 (co, frame->root->uid, number_fn, xtra_len); + changelog_opt_t *co = NULL; + size_t xtra_len = 0; + char *dup_path = NULL; + char *bname = NULL; + inode_t *parent = NULL; + + GF_ASSERT(this); + + parent = inode_parent(loc->inode, 0, 0); + if (!parent) { + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_INODE_NOT_FOUND, + "type=parent", "gfid=%s", uuid_utoa(loc->inode->gfid), NULL); + goto err; + } + + CHANGELOG_INIT_NOCHECK(this, *local, loc->inode, loc->inode->gfid, 5); + if (!(*local)) { + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_LOCAL_INIT_FAILED, + NULL); + goto err; + } + + co = changelog_get_usable_buffer(*local); + if (!co) { + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_GET_BUFFER_FAILED, + NULL); + goto err; + } + + if (loc->inode->ia_type == IA_IFDIR) { + CHANGLOG_FILL_FOP_NUMBER(co, GF_FOP_MKDIR, fop_fn, xtra_len); co++; - - CHANGELOG_FILL_UINT32 (co, frame->root->gid, number_fn, xtra_len); + CHANGELOG_FILL_UINT32(co, S_IFDIR | 0755, number_fn, xtra_len); co++; + } else { + CHANGLOG_FILL_FOP_NUMBER(co, GF_FOP_CREATE, fop_fn, xtra_len); + co++; + CHANGELOG_FILL_UINT32(co, S_IFREG | 0644, number_fn, xtra_len); + co++; + } - dup_path = gf_strdup (loc->path); - bname = basename (dup_path); + CHANGELOG_FILL_UINT32(co, frame->root->uid, number_fn, xtra_len); + co++; - CHANGELOG_FILL_ENTRY (co, parent->gfid, bname, entry_fn, entry_free_fn, - xtra_len, err); - changelog_set_usable_record_and_length (*local, xtra_len, 5); + CHANGELOG_FILL_UINT32(co, frame->root->gid, number_fn, xtra_len); + co++; - if (dup_path) - GF_FREE (dup_path); - if (parent) - inode_unref (parent); - return 0; + dup_path = gf_strdup(loc->path); + bname = basename(dup_path); + + CHANGELOG_FILL_ENTRY(co, parent->gfid, bname, entry_fn, entry_free_fn, + xtra_len, err); + changelog_set_usable_record_and_length(*local, xtra_len, 5); + + if (dup_path) + GF_FREE(dup_path); + if (parent) + inode_unref(parent); + return 0; err: - if (dup_path) - GF_FREE (dup_path); - if (parent) - inode_unref (parent); - return -1; + if (dup_path) + GF_FREE(dup_path); + if (parent) + inode_unref(parent); + return -1; } /* @@ -1914,76 +1880,98 @@ err: */ int -resolve_pargfid_to_path (xlator_t *this, uuid_t pargfid, - char **path, char *bname) +resolve_pargfid_to_path(xlator_t *this, const uuid_t pgfid, char **path, + char *bname) { - char *linkname = NULL; - char *dir_handle = NULL; - char *pgfidstr = NULL; - char *saveptr = NULL; - ssize_t len = 0; - int ret = 0; - uuid_t tmp_gfid = {0, }; - changelog_priv_t *priv = NULL; - char gpath[PATH_MAX] = {0,}; - char result[PATH_MAX] = {0,}; - char *dir_name = NULL; - char pre_dir_name[PATH_MAX] = {0,}; - - GF_ASSERT (this); - priv = this->private; - GF_ASSERT (priv); - - if (!path || gf_uuid_is_null (pargfid)) { - ret = -1; - goto out; - } + char *linkname = NULL; + char *dir_handle = NULL; + char *pgfidstr = NULL; + char *saveptr = NULL; + ssize_t len = 0; + int ret = 0; + uuid_t tmp_gfid = { + 0, + }; + uuid_t pargfid = { + 0, + }; + changelog_priv_t *priv = NULL; + char gpath[PATH_MAX] = { + 0, + }; + char result[PATH_MAX] = { + 0, + }; + char *dir_name = NULL; + char pre_dir_name[PATH_MAX] = { + 0, + }; + + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + gf_uuid_copy(pargfid, pgfid); + if (!path || gf_uuid_is_null(pargfid)) { + ret = -1; + goto out; + } + + if (__is_root_gfid(pargfid)) { + if (bname) + *path = gf_strdup(bname); + else + *path = gf_strdup("."); + return ret; + } - if (__is_root_gfid (pargfid)) { - if (bname) - *path = gf_strdup (bname); - else - *path = gf_strdup ("."); - return ret; - } + dir_handle = alloca(PATH_MAX); + linkname = alloca(PATH_MAX); + (void)snprintf(gpath, PATH_MAX, "%s/.glusterfs/", priv->changelog_brick); - dir_handle = alloca (PATH_MAX); - linkname = alloca (PATH_MAX); - (void) snprintf (gpath, PATH_MAX, "%s/.glusterfs/", - priv->changelog_brick); - - while (!(__is_root_gfid (pargfid))) { - snprintf (dir_handle, PATH_MAX, "%s/%02x/%02x/%s", gpath, - pargfid[0], pargfid[1], uuid_utoa (pargfid)); - - len = sys_readlink (dir_handle, linkname, PATH_MAX); - if (len < 0) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_READLINK_OP_FAILED, - "could not read the " - "link from the gfid handle %s", dir_handle); - ret = -1; - goto out; - } + while (!(__is_root_gfid(pargfid))) { + len = snprintf(dir_handle, PATH_MAX, "%s/%02x/%02x/%s", gpath, + pargfid[0], pargfid[1], uuid_utoa(pargfid)); + if ((len < 0) || (len >= PATH_MAX)) { + ret = -1; + goto out; + } - linkname[len] = '\0'; + len = sys_readlink(dir_handle, linkname, PATH_MAX); + if (len < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_READLINK_OP_FAILED, + "could not read the " + "link from the gfid handle", + "handle=%s", dir_handle, NULL); + ret = -1; + goto out; + } - pgfidstr = strtok_r (linkname + strlen("../../00/00/"), "/", - &saveptr); - dir_name = strtok_r (NULL, "/", &saveptr); + linkname[len] = '\0'; - snprintf (result, PATH_MAX, "%s/%s", dir_name, pre_dir_name); - strncpy (pre_dir_name, result, sizeof(pre_dir_name)); + pgfidstr = strtok_r(linkname + strlen("../../00/00/"), "/", &saveptr); + dir_name = strtok_r(NULL, "/", &saveptr); - gf_uuid_parse (pgfidstr, tmp_gfid); - gf_uuid_copy (pargfid, tmp_gfid); + len = snprintf(result, PATH_MAX, "%s/%s", dir_name, pre_dir_name); + if ((len < 0) || (len >= PATH_MAX)) { + ret = -1; + goto out; + } + if (snprintf(pre_dir_name, len + 1, "%s", result) >= len + 1) { + ret = -1; + goto out; } - if (bname) - strncat (result, bname, strlen(bname) + 1); + gf_uuid_parse(pgfidstr, tmp_gfid); + gf_uuid_copy(pargfid, tmp_gfid); + } - *path = gf_strdup (result); + if (bname) + strncat(result, bname, strlen(bname) + 1); + + *path = gf_strdup(result); out: - return ret; + return ret; } diff --git a/xlators/features/changelog/src/changelog-helpers.h b/xlators/features/changelog/src/changelog-helpers.h index 7d1a86e7b33..38fa7590c32 100644 --- a/xlators/features/changelog/src/changelog-helpers.h +++ b/xlators/features/changelog/src/changelog-helpers.h @@ -11,14 +11,14 @@ #ifndef _CHANGELOG_HELPERS_H #define _CHANGELOG_HELPERS_H -#include "locking.h" -#include "timer.h" +#include <glusterfs/locking.h> +#include <glusterfs/timer.h> #include "pthread.h" -#include "iobuf.h" -#include "rot-buffs.h" +#include <glusterfs/iobuf.h> +#include <glusterfs/rot-buffs.h> #include "changelog-misc.h" -#include "call-stub.h" +#include <glusterfs/call-stub.h> #include "rpcsvc.h" #include "changelog-ev-handle.h" @@ -30,44 +30,44 @@ * the changelog entry */ typedef struct changelog_log_data { - /* rollover related */ - unsigned long cld_roll_time; + /* rollover related */ + time_t cld_roll_time; - /* reopen changelog? */ - gf_boolean_t cld_finale; + /* reopen changelog? */ + gf_boolean_t cld_finale; - changelog_log_type cld_type; + changelog_log_type cld_type; - /** - * sincd gfid is _always_ a necessity, it's not a part - * of the iobuf. by doing this we do not add any overhead - * for data and metadata related fops. - */ - uuid_t cld_gfid; + /** + * sincd gfid is _always_ a necessity, it's not a part + * of the iobuf. by doing this we do not add any overhead + * for data and metadata related fops. + */ + uuid_t cld_gfid; - /** - * iobufs are used for optionals records: pargfid, path, - * write offsets etc.. It's the fop implementers job - * to allocate (iobuf_get() in the fop) and get unref'ed - * in the callback (CHANGELOG_STACK_UNWIND). - */ - struct iobuf *cld_iobuf; + /** + * iobufs are used for optionals records: pargfid, path, + * write offsets etc.. It's the fop implementers job + * to allocate (iobuf_get() in the fop) and get unref'ed + * in the callback (CHANGELOG_STACK_UNWIND). + */ + struct iobuf *cld_iobuf; #define cld_ptr cld_iobuf->ptr - /** - * after allocation you can point this to the length of - * usable data, but make sure it does not exceed the - * the size of the requested iobuf. - */ - size_t cld_iobuf_len; + /** + * after allocation you can point this to the length of + * usable data, but make sure it does not exceed the + * the size of the requested iobuf. + */ + size_t cld_iobuf_len; #define cld_ptr_len cld_iobuf_len - /** - * number of optional records - */ - int cld_xtra_records; + /** + * number of optional records + */ + int cld_xtra_records; } changelog_log_data_t; /** @@ -77,54 +77,48 @@ typedef struct changelog_log_data { typedef struct changelog_priv changelog_priv_t; typedef struct changelog_dispatcher { - void *cd_data; - int (*dispatchfn) (xlator_t *, changelog_priv_t *, void *, - changelog_log_data_t *, changelog_log_data_t *); + void *cd_data; + int (*dispatchfn)(xlator_t *, changelog_priv_t *, void *, + changelog_log_data_t *, changelog_log_data_t *); } changelog_dispatcher_t; struct changelog_bootstrap { - changelog_mode_t mode; - int (*ctor) (xlator_t *, changelog_dispatcher_t *); - int (*dtor) (xlator_t *, changelog_dispatcher_t *); + changelog_mode_t mode; + int (*ctor)(xlator_t *, changelog_dispatcher_t *); + int (*dtor)(xlator_t *, changelog_dispatcher_t *); }; struct changelog_encoder { - changelog_encoder_t encoder; - int (*encode) (xlator_t *, changelog_log_data_t *); + changelog_encoder_t encoder; + int (*encode)(xlator_t *, changelog_log_data_t *); }; - /* xlator private */ typedef struct changelog_time_slice { - /** - * just in case we need nanosecond granularity some day. - * field is unused as of now (maybe we'd need it later). - */ - struct timeval tv_start; - - /** - * version of changelog file, incremented each time changes - * rollover. - */ - unsigned long changelog_version[CHANGELOG_MAX_TYPE]; + /** + * version of changelog file, incremented each time changes + * rollover. + */ + unsigned long changelog_version[CHANGELOG_MAX_TYPE]; } changelog_time_slice_t; typedef struct changelog_rollover { - /* rollover thread */ - pthread_t rollover_th; + /* rollover thread */ + pthread_t rollover_th; - xlator_t *this; + xlator_t *this; - /* read end of pipe used as event from barrier on snapshot */ - int rfd; + pthread_mutex_t lock; + pthread_cond_t cond; + gf_boolean_t notify; } changelog_rollover_t; typedef struct changelog_fsync { - /* fsync() thread */ - pthread_t fsync_th; + /* fsync() thread */ + pthread_t fsync_th; - xlator_t *this; + xlator_t *this; } changelog_fsync_t; /* Draining during changelog rollover (for geo-rep snapshot dependency): @@ -144,201 +138,219 @@ typedef struct changelog_fsync { */ typedef enum chlog_fop_color { - FOP_COLOR_BLACK, - FOP_COLOR_WHITE + FOP_COLOR_BLACK, + FOP_COLOR_WHITE } chlog_fop_color_t; /* Barrier notify variable */ typedef struct barrier_notify { - pthread_mutex_t bnotify_mutex; - pthread_cond_t bnotify_cond; - gf_boolean_t bnotify; - gf_boolean_t bnotify_error; + pthread_mutex_t bnotify_mutex; + pthread_cond_t bnotify_cond; + gf_boolean_t bnotify; + gf_boolean_t bnotify_error; } barrier_notify_t; /* Two separate mutex and conditional variable set is used * to drain white and black fops. */ typedef struct drain_mgmt { - pthread_mutex_t drain_black_mutex; - pthread_cond_t drain_black_cond; - pthread_mutex_t drain_white_mutex; - pthread_cond_t drain_white_cond; - /* Represents black fops count in-transit */ - unsigned long black_fop_cnt; - /* Represents white fops count in-transit */ - unsigned long white_fop_cnt; - gf_boolean_t drain_wait_black; - gf_boolean_t drain_wait_white; + pthread_mutex_t drain_black_mutex; + pthread_cond_t drain_black_cond; + pthread_mutex_t drain_white_mutex; + pthread_cond_t drain_white_cond; + /* Represents black fops count in-transit */ + unsigned long black_fop_cnt; + /* Represents white fops count in-transit */ + unsigned long white_fop_cnt; + gf_boolean_t drain_wait_black; + gf_boolean_t drain_wait_white; } drain_mgmt_t; /* External barrier as a result of snap on/off indicating flag*/ typedef struct barrier_flags { - gf_lock_t lock; - gf_boolean_t barrier_ext; + gf_lock_t lock; + gf_boolean_t barrier_ext; } barrier_flags_t; /* Event selection */ typedef struct changelog_ev_selector { - gf_lock_t reflock; + gf_lock_t reflock; - /** - * Array of references for each selection bit. - */ - unsigned int ref[CHANGELOG_EV_SELECTION_RANGE]; + /** + * Array of references for each selection bit. + */ + unsigned int ref[CHANGELOG_EV_SELECTION_RANGE]; } changelog_ev_selector_t; - /* changelog's private structure */ struct changelog_priv { - gf_boolean_t active; + /* changelog journalling */ + gf_boolean_t active; + + /* changelog live notifications */ + gf_boolean_t rpc_active; + + /* to generate unique socket file per brick */ + char *changelog_brick; + + /* logging directory */ + char *changelog_dir; - /* to generate unique socket file per brick */ - char *changelog_brick; + /* htime directory */ + char *htime_dir; - /* logging directory */ - char *changelog_dir; + /* one file for all changelog types */ + int changelog_fd; - /* htime directory */ - char *htime_dir; + /* htime fd for current changelog session */ + int htime_fd; - /* one file for all changelog types */ - int changelog_fd; + /* c_snap_fd is fd for call-path changelog */ + int c_snap_fd; - /* htime fd for current changelog session */ - int htime_fd; + /* rollover_count used by htime */ + int rollover_count; - /* c_snap_fd is fd for call-path changelog */ - int c_snap_fd; + gf_lock_t lock; - /* rollover_count used by htime */ - int rollover_count; + /* lock to synchronize CSNAP updation */ + gf_lock_t c_snap_lock; - gf_lock_t lock; + /* written end of the pipe */ + int wfd; - /* lock to synchronize CSNAP updation */ - gf_lock_t c_snap_lock; + /* rollover time */ + int32_t rollover_time; - /* written end of the pipe */ - int wfd; + /* fsync() interval */ + int32_t fsync_interval; - /* rollover time */ - int32_t rollover_time; + /* changelog type maps */ + const char *maps[CHANGELOG_MAX_TYPE]; - /* fsync() interval */ - int32_t fsync_interval; + /* time slicer */ + changelog_time_slice_t slice; - /* changelog type maps */ - const char *maps[CHANGELOG_MAX_TYPE]; + /* context of the updater */ + changelog_dispatcher_t cd; - /* time slicer */ - changelog_time_slice_t slice; + /* context of the rollover thread */ + changelog_rollover_t cr; - /* context of the updater */ - changelog_dispatcher_t cd; + /* context of fsync thread */ + changelog_fsync_t cf; - /* context of the rollover thread */ - changelog_rollover_t cr; + /* operation mode */ + changelog_mode_t op_mode; - /* context of fsync thread */ - changelog_fsync_t cf; + /* bootstrap routine for 'current' logger */ + struct changelog_bootstrap *cb; - /* operation mode */ - changelog_mode_t op_mode; + /* encoder mode */ + changelog_encoder_t encode_mode; - /* bootstrap routine for 'current' logger */ - struct changelog_bootstrap *cb; + /* encoder */ + struct changelog_encoder *ce; - /* encoder mode */ - changelog_encoder_t encode_mode; + /** + * snapshot dependency changes + */ - /* encoder */ - struct changelog_encoder *ce; + /* Draining of fops*/ + drain_mgmt_t dm; - /** - * snapshot dependency changes - */ + /* Represents the active color. Initially by default black */ + chlog_fop_color_t current_color; - /* Draining of fops*/ - drain_mgmt_t dm; + /* flag to determine explicit rollover is triggered */ + gf_boolean_t explicit_rollover; - /* Represents the active color. Initially by default black */ - chlog_fop_color_t current_color; + /* barrier notification variable protected by mutex */ + barrier_notify_t bn; - /* write end of pipe to do explicit rollover on barrier during snap */ - int cr_wfd; + /* barrier on/off indicating flags */ + barrier_flags_t bflags; - /* flag to determine explicit rollover is triggered */ - gf_boolean_t explicit_rollover; + /* changelog barrier on/off indicating flag */ + gf_boolean_t barrier_enabled; + struct list_head queue; + uint32_t queue_size; + gf_timer_t *timer; + struct timespec timeout; - /* barrier notification variable protected by mutex */ - barrier_notify_t bn; + /** + * buffers, RPC, event selection, notifications and other + * beasts. + */ - /* barrier on/off indicating flags */ - barrier_flags_t bflags; + /* epoll pthread */ + pthread_t poller; - /* changelog barrier on/off indicating flag */ - gf_boolean_t barrier_enabled; - struct list_head queue; - uint32_t queue_size; - gf_timer_t *timer; - struct timespec timeout; + /* rotational buffer */ + rbuf_t *rbuf; - /** - * buffers, RPC, event selection, notifications and other - * beasts. - */ + /* changelog RPC server */ + rpcsvc_t *rpc; - /* epoll pthread */ - pthread_t poller; + /* event selection */ + changelog_ev_selector_t ev_selection; - /* rotational buffer */ - rbuf_t *rbuf; + /* client handling (reverse connection) */ + pthread_t connector; - /* changelog RPC server */ - rpcsvc_t *rpc; + int nr_dispatchers; + pthread_t *ev_dispatcher; - /* event selection */ - changelog_ev_selector_t ev_selection; + changelog_clnt_t connections; - /* client handling (reverse connection) */ - pthread_t connector; + /* glusterfind dependency to capture paths on deleted entries*/ + gf_boolean_t capture_del_path; - int nr_dispatchers; - pthread_t *ev_dispatcher; + /* Save total no. of listners */ + gf_atomic_t listnercnt; - changelog_clnt_t connections; + /* Save total no. of xprt are associated with listner */ + gf_atomic_t xprtcnt; - /* glusterfind dependency to capture paths on deleted entries*/ - gf_boolean_t capture_del_path; + /* Save xprt list */ + struct list_head xprt_list; + + /* Save total no. of client connection */ + gf_atomic_t clntcnt; + + /* Save cleanup brick in victim */ + xlator_t *victim; + + /* Status to save cleanup notify status */ + gf_boolean_t notify_down; }; struct changelog_local { - inode_t *inode; - gf_boolean_t update_no_check; + inode_t *inode; + gf_boolean_t update_no_check; - changelog_log_data_t cld; + changelog_log_data_t cld; - /** - * ->prev_entry is used in cases when there needs to be - * additional changelog entry for the parent (eg. rename) - * It's analogous to ->next in single linked list world, - * but we call it as ->prev_entry... ha ha ha - */ - struct changelog_local *prev_entry; + /** + * ->prev_entry is used in cases when there needs to be + * additional changelog entry for the parent (eg. rename) + * It's analogous to ->next in single linked list world, + * but we call it as ->prev_entry... ha ha ha + */ + struct changelog_local *prev_entry; - /* snap dependency changes */ - chlog_fop_color_t color; + /* snap dependency changes */ + chlog_fop_color_t color; }; typedef struct changelog_local changelog_local_t; /* inode version is stored in inode ctx */ typedef struct changelog_inode_ctx { - unsigned long iversion[CHANGELOG_MAX_TYPE]; + unsigned long iversion[CHANGELOG_MAX_TYPE]; } changelog_inode_ctx_t; -#define CHANGELOG_INODE_VERSION_TYPE(ctx, type) &(ctx->iversion[type]) +#define CHANGELOG_INODE_VERSION_TYPE(ctx, type) &(ctx->iversion[type]) /** * Optional Records: @@ -346,268 +358,276 @@ typedef struct changelog_inode_ctx { * @changelog_opt_t struct. The array is allocated via @iobufs. */ typedef enum { - CHANGELOG_OPT_REC_FOP, - CHANGELOG_OPT_REC_ENTRY, - CHANGELOG_OPT_REC_UINT32, + CHANGELOG_OPT_REC_FOP, + CHANGELOG_OPT_REC_ENTRY, + CHANGELOG_OPT_REC_UINT32, } changelog_optional_rec_type_t; struct changelog_entry_fields { - uuid_t cef_uuid; - char *cef_bname; - char *cef_path; + uuid_t cef_uuid; + char *cef_bname; + char *cef_path; }; typedef struct { - /** - * @co_covert can be used to do post-processing of the record before - * it's persisted to the CHANGELOG. If this is NULL, then the record - * is persisted as per it's in memory format. - */ - size_t (*co_convert) (void *data, char *buffer, gf_boolean_t encode); - - /* release routines */ - void (*co_free) (void *data); - - /* type of the field */ - changelog_optional_rec_type_t co_type; - - /** - * sizeof of the 'valid' field in the union. This field is not used if - * @co_convert is specified. - */ - size_t co_len; - - union { - unsigned int co_uint32; - glusterfs_fop_t co_fop; - struct changelog_entry_fields co_entry; - }; + /** + * @co_covert can be used to do post-processing of the record before + * it's persisted to the CHANGELOG. If this is NULL, then the record + * is persisted as per it's in memory format. + */ + size_t (*co_convert)(void *data, char *buffer, gf_boolean_t encode); + + /* release routines */ + void (*co_free)(void *data); + + /* type of the field */ + changelog_optional_rec_type_t co_type; + + /** + * sizeof of the 'valid' field in the union. This field is not used if + * @co_convert is specified. + */ + size_t co_len; + + union { + unsigned int co_uint32; + glusterfs_fop_t co_fop; + struct changelog_entry_fields co_entry; + }; } changelog_opt_t; -#define CHANGELOG_OPT_RECORD_LEN sizeof (changelog_opt_t) +#define CHANGELOG_OPT_RECORD_LEN sizeof(changelog_opt_t) /** * helpers routines */ int -changelog_thread_cleanup (xlator_t *this, pthread_t thr_id); +changelog_thread_cleanup(xlator_t *this, pthread_t thr_id); void * -changelog_get_usable_buffer (changelog_local_t *local); +changelog_get_usable_buffer(changelog_local_t *local); void -changelog_set_usable_record_and_length (changelog_local_t *local, - size_t len, int xr); +changelog_set_usable_record_and_length(changelog_local_t *local, size_t len, + int xr); void -changelog_local_cleanup (xlator_t *xl, changelog_local_t *local); +changelog_local_cleanup(xlator_t *xl, changelog_local_t *local); changelog_local_t * -changelog_local_init (xlator_t *this, inode_t *inode, uuid_t gfid, - int xtra_records, gf_boolean_t update_flag); +changelog_local_init(xlator_t *this, inode_t *inode, uuid_t gfid, + int xtra_records, gf_boolean_t update_flag); int -changelog_start_next_change (xlator_t *this, - changelog_priv_t *priv, - unsigned long ts, gf_boolean_t finale); +changelog_start_next_change(xlator_t *this, changelog_priv_t *priv, time_t ts, + gf_boolean_t finale); int -changelog_open_journal (xlator_t *this, changelog_priv_t *priv); -int -changelog_fill_rollover_data (changelog_log_data_t *cld, gf_boolean_t is_last); +changelog_open_journal(xlator_t *this, changelog_priv_t *priv); +void +changelog_fill_rollover_data(changelog_log_data_t *cld, gf_boolean_t is_last); int -changelog_inject_single_event (xlator_t *this, - changelog_priv_t *priv, - changelog_log_data_t *cld); +changelog_inject_single_event(xlator_t *this, changelog_priv_t *priv, + changelog_log_data_t *cld); size_t -changelog_entry_length (); +changelog_entry_length(); int -changelog_write (int fd, char *buffer, size_t len); +changelog_write(int fd, char *buffer, size_t len); int -changelog_write_change (changelog_priv_t *priv, char *buffer, size_t len); +changelog_write_change(changelog_priv_t *priv, char *buffer, size_t len); int -changelog_handle_change (xlator_t *this, - changelog_priv_t *priv, changelog_log_data_t *cld); +changelog_handle_change(xlator_t *this, changelog_priv_t *priv, + changelog_log_data_t *cld); void -changelog_update (xlator_t *this, changelog_priv_t *priv, - changelog_local_t *local, changelog_log_type type); +changelog_update(xlator_t *this, changelog_priv_t *priv, + changelog_local_t *local, changelog_log_type type); void * -changelog_rollover (void *data); +changelog_rollover(void *data); void * -changelog_fsync_thread (void *data); +changelog_fsync_thread(void *data); int -changelog_forget (xlator_t *this, inode_t *inode); +changelog_forget(xlator_t *this, inode_t *inode); int -htime_update (xlator_t *this, changelog_priv_t *priv, - unsigned long ts, char * buffer); +htime_update(xlator_t *this, changelog_priv_t *priv, time_t ts, char *buffer); int -htime_open (xlator_t *this, changelog_priv_t *priv, unsigned long ts); +htime_open(xlator_t *this, changelog_priv_t *priv, time_t ts); int -htime_create (xlator_t *this, changelog_priv_t *priv, unsigned long ts); +htime_create(xlator_t *this, changelog_priv_t *priv, time_t ts); /* Geo-Rep snapshot dependency changes */ void -changelog_color_fop_and_inc_cnt (xlator_t *this, changelog_priv_t *priv, - changelog_local_t *local); +changelog_color_fop_and_inc_cnt(xlator_t *this, changelog_priv_t *priv, + changelog_local_t *local); void -changelog_inc_fop_cnt (xlator_t *this, changelog_priv_t *priv, - changelog_local_t *local); +changelog_inc_fop_cnt(xlator_t *this, changelog_priv_t *priv, + changelog_local_t *local); void -changelog_dec_fop_cnt (xlator_t *this, changelog_priv_t *priv, - changelog_local_t *local); +changelog_dec_fop_cnt(xlator_t *this, changelog_priv_t *priv, + changelog_local_t *local); int -changelog_barrier_notify (changelog_priv_t *priv, char* buf); +changelog_barrier_notify(changelog_priv_t *priv, char *buf); void -changelog_barrier_cleanup (xlator_t *this, changelog_priv_t *priv, - struct list_head *queue); +changelog_barrier_cleanup(xlator_t *this, changelog_priv_t *priv, + struct list_head *queue); void -changelog_drain_white_fops (xlator_t *this, changelog_priv_t *priv); +changelog_drain_white_fops(xlator_t *this, changelog_priv_t *priv); void -changelog_drain_black_fops (xlator_t *this, changelog_priv_t *priv); +changelog_drain_black_fops(xlator_t *this, changelog_priv_t *priv); /* Crash consistency of changelog wrt snapshot */ int -changelog_snap_logging_stop ( xlator_t *this, changelog_priv_t *priv); +changelog_snap_logging_stop(xlator_t *this, changelog_priv_t *priv); int -changelog_snap_logging_start ( xlator_t *this, changelog_priv_t *priv); +changelog_snap_logging_start(xlator_t *this, changelog_priv_t *priv); int -changelog_snap_open ( xlator_t *this, changelog_priv_t *priv); +changelog_snap_open(xlator_t *this, changelog_priv_t *priv); int -changelog_snap_handle_ascii_change (xlator_t *this, - changelog_log_data_t *cld); +changelog_snap_handle_ascii_change(xlator_t *this, changelog_log_data_t *cld); int -changelog_snap_write_change (changelog_priv_t *priv, char *buffer, size_t len); +changelog_snap_write_change(changelog_priv_t *priv, char *buffer, size_t len); /* Changelog barrier routines */ -void __chlog_barrier_enqueue (xlator_t *this, call_stub_t *stub); -void __chlog_barrier_disable (xlator_t *this, struct list_head *queue); -void chlog_barrier_dequeue_all (xlator_t *this, struct list_head *queue); -call_stub_t *__chlog_barrier_dequeue (xlator_t *this, struct list_head *queue); -int __chlog_barrier_enable (xlator_t *this, changelog_priv_t *priv); +void +__chlog_barrier_enqueue(xlator_t *this, call_stub_t *stub); +void +__chlog_barrier_disable(xlator_t *this, struct list_head *queue); +void +chlog_barrier_dequeue_all(xlator_t *this, struct list_head *queue); +call_stub_t * +__chlog_barrier_dequeue(xlator_t *this, struct list_head *queue); +int +__chlog_barrier_enable(xlator_t *this, changelog_priv_t *priv); int32_t -changelog_fill_entry_buf (call_frame_t *frame, xlator_t *this, - loc_t *loc, changelog_local_t **local); +changelog_fill_entry_buf(call_frame_t *frame, xlator_t *this, loc_t *loc, + changelog_local_t **local); /* event selection routines */ -void changelog_select_event (xlator_t *, - changelog_ev_selector_t *, unsigned int); -void changelog_deselect_event (xlator_t *, - changelog_ev_selector_t *, unsigned int); -int changelog_init_event_selection (xlator_t *, - changelog_ev_selector_t *); -int changelog_cleanup_event_selection (xlator_t *, - changelog_ev_selector_t *); -int changelog_ev_selected (xlator_t *, - changelog_ev_selector_t *, unsigned int); void -changelog_dispatch_event (xlator_t *, changelog_priv_t *, changelog_event_t *); +changelog_select_event(xlator_t *, changelog_ev_selector_t *, unsigned int); +void +changelog_deselect_event(xlator_t *, changelog_ev_selector_t *, unsigned int); +int +changelog_init_event_selection(xlator_t *, changelog_ev_selector_t *); +int +changelog_ev_selected(xlator_t *, changelog_ev_selector_t *, unsigned int); +void +changelog_dispatch_event(xlator_t *, changelog_priv_t *, changelog_event_t *); changelog_inode_ctx_t * -__changelog_inode_ctx_get (xlator_t *, inode_t *, unsigned long **, - unsigned long *, changelog_log_type); +__changelog_inode_ctx_get(xlator_t *, inode_t *, unsigned long **, + unsigned long *, changelog_log_type); int -resolve_pargfid_to_path (xlator_t *this, uuid_t gfid, char **path, char *bname); +resolve_pargfid_to_path(xlator_t *this, const uuid_t gfid, char **path, + char *bname); /* macros */ -#define CHANGELOG_STACK_UNWIND(fop, frame, params ...) do { \ - changelog_local_t *__local = NULL; \ - xlator_t *__xl = NULL; \ - if (frame) { \ - __local = frame->local; \ - __xl = frame->this; \ - frame->local = NULL; \ - } \ - STACK_UNWIND_STRICT (fop, frame, params); \ - if (__local && __local->prev_entry) \ - changelog_local_cleanup (__xl, \ - __local->prev_entry); \ - changelog_local_cleanup (__xl, __local); \ - } while (0) - -#define CHANGELOG_IOBUF_REF(iobuf) do { \ - if (iobuf) \ - iobuf_ref (iobuf); \ - } while (0) - -#define CHANGELOG_IOBUF_UNREF(iobuf) do { \ - if (iobuf) \ - iobuf_unref (iobuf); \ - } while (0) - -#define CHANGELOG_FILL_BUFFER(buffer, off, val, len) do { \ - memcpy (buffer + off, val, len); \ - off += len; \ - } while (0) - -#define SLICE_VERSION_UPDATE(slice) do { \ - int i = 0; \ - for (; i < CHANGELOG_MAX_TYPE; i++) { \ - slice->changelog_version[i]++; \ - } \ - } while (0) - -#define CHANGELOG_FILL_UINT32(co, number, converter, xlen) do { \ - co->co_convert = converter; \ - co->co_free = NULL; \ - co->co_type = CHANGELOG_OPT_REC_UINT32; \ - co->co_uint32 = number; \ - xlen += sizeof (unsigned int); \ - } while (0) - -#define CHANGLOG_FILL_FOP_NUMBER(co, fop, converter, xlen) do { \ - co->co_convert = converter; \ - co->co_free = NULL; \ - co->co_type = CHANGELOG_OPT_REC_FOP; \ - co->co_fop = fop; \ - xlen += sizeof (fop); \ - } while (0) - -#define CHANGELOG_FILL_ENTRY(co, pargfid, bname, \ - converter, freefn, xlen, label) \ - do { \ - co->co_convert = converter; \ - co->co_free = freefn; \ - co->co_type = CHANGELOG_OPT_REC_ENTRY; \ - gf_uuid_copy (co->co_entry.cef_uuid, pargfid); \ - co->co_entry.cef_bname = gf_strdup(bname); \ - if (!co->co_entry.cef_bname) \ - goto label; \ - xlen += (UUID_CANONICAL_FORM_LEN + strlen (bname)); \ - } while (0) - -#define CHANGELOG_FILL_ENTRY_DIR_PATH(co, pargfid, bname, converter, \ - del_freefn, xlen, label, capture_del) \ - do { \ - co->co_convert = converter; \ - co->co_free = del_freefn; \ - co->co_type = CHANGELOG_OPT_REC_ENTRY; \ - gf_uuid_copy (co->co_entry.cef_uuid, pargfid); \ - co->co_entry.cef_bname = gf_strdup(bname); \ - if (!co->co_entry.cef_bname) \ - goto label; \ - xlen += (UUID_CANONICAL_FORM_LEN + strlen (bname)); \ - if (!capture_del || resolve_pargfid_to_path (this, pargfid, \ - &(co->co_entry.cef_path), co->co_entry.cef_bname)) { \ - co->co_entry.cef_path = gf_strdup ("\0"); \ - xlen += 1; \ - } else { \ - xlen += (strlen (co->co_entry.cef_path)); \ - } \ - } while (0) - -#define CHANGELOG_INIT(this, local, inode, gfid, xrec) \ - local = changelog_local_init (this, inode, gfid, xrec, _gf_false) - -#define CHANGELOG_INIT_NOCHECK(this, local, inode, gfid, xrec) \ - local = changelog_local_init (this, inode, gfid, xrec, _gf_true) - -#define CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, label) do { \ - if (!priv->active) \ - goto label; \ - /* ignore rebalance process's activity. */ \ - if ((frame->root->pid == GF_CLIENT_PID_DEFRAG) || \ - (frame->root->pid == GF_CLIENT_PID_TIER_DEFRAG)) \ - goto label; \ - } while (0) +#define CHANGELOG_STACK_UNWIND(fop, frame, params...) \ + do { \ + changelog_local_t *__local = NULL; \ + xlator_t *__xl = NULL; \ + if (frame) { \ + __local = frame->local; \ + __xl = frame->this; \ + frame->local = NULL; \ + } \ + STACK_UNWIND_STRICT(fop, frame, params); \ + if (__local && __local->prev_entry) \ + changelog_local_cleanup(__xl, __local->prev_entry); \ + changelog_local_cleanup(__xl, __local); \ + } while (0) + +#define CHANGELOG_IOBUF_REF(iobuf) \ + do { \ + if (iobuf) \ + iobuf_ref(iobuf); \ + } while (0) + +#define CHANGELOG_IOBUF_UNREF(iobuf) \ + do { \ + if (iobuf) \ + iobuf_unref(iobuf); \ + } while (0) + +#define CHANGELOG_FILL_BUFFER(buffer, off, val, len) \ + do { \ + memcpy(buffer + off, val, len); \ + off += len; \ + } while (0) + +#define SLICE_VERSION_UPDATE(slice) \ + do { \ + int i = 0; \ + for (; i < CHANGELOG_MAX_TYPE; i++) { \ + slice->changelog_version[i]++; \ + } \ + } while (0) + +#define CHANGELOG_FILL_UINT32(co, number, converter, xlen) \ + do { \ + co->co_convert = converter; \ + co->co_free = NULL; \ + co->co_type = CHANGELOG_OPT_REC_UINT32; \ + co->co_uint32 = number; \ + xlen += sizeof(unsigned int); \ + } while (0) + +#define CHANGLOG_FILL_FOP_NUMBER(co, fop, converter, xlen) \ + do { \ + co->co_convert = converter; \ + co->co_free = NULL; \ + co->co_type = CHANGELOG_OPT_REC_FOP; \ + co->co_fop = fop; \ + xlen += sizeof(fop); \ + } while (0) + +#define CHANGELOG_FILL_ENTRY(co, pargfid, bname, converter, freefn, xlen, \ + label) \ + do { \ + co->co_convert = converter; \ + co->co_free = freefn; \ + co->co_type = CHANGELOG_OPT_REC_ENTRY; \ + gf_uuid_copy(co->co_entry.cef_uuid, pargfid); \ + co->co_entry.cef_bname = gf_strdup(bname); \ + if (!co->co_entry.cef_bname) \ + goto label; \ + xlen += (UUID_CANONICAL_FORM_LEN + strlen(bname)); \ + } while (0) + +#define CHANGELOG_FILL_ENTRY_DIR_PATH(co, pargfid, bname, converter, \ + del_freefn, xlen, label, capture_del) \ + do { \ + co->co_convert = converter; \ + co->co_free = del_freefn; \ + co->co_type = CHANGELOG_OPT_REC_ENTRY; \ + gf_uuid_copy(co->co_entry.cef_uuid, pargfid); \ + co->co_entry.cef_bname = gf_strdup(bname); \ + if (!co->co_entry.cef_bname) \ + goto label; \ + xlen += (UUID_CANONICAL_FORM_LEN + strlen(bname)); \ + if (!capture_del || \ + resolve_pargfid_to_path(this, pargfid, &(co->co_entry.cef_path), \ + co->co_entry.cef_bname)) { \ + co->co_entry.cef_path = gf_strdup("\0"); \ + xlen += 1; \ + } else { \ + xlen += (strlen(co->co_entry.cef_path)); \ + } \ + } while (0) + +#define CHANGELOG_INIT(this, local, inode, gfid, xrec) \ + local = changelog_local_init(this, inode, gfid, xrec, _gf_false) + +#define CHANGELOG_INIT_NOCHECK(this, local, inode, gfid, xrec) \ + local = changelog_local_init(this, inode, gfid, xrec, _gf_true) + +#define CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, label) \ + do { \ + if (!priv->active) \ + goto label; \ + /* ignore rebalance process's activity. */ \ + if ((frame->root->pid == GF_CLIENT_PID_DEFRAG) || \ + (frame->root->pid == GF_CLIENT_PID_TIER_DEFRAG)) \ + goto label; \ + } while (0) /* If it is a METADATA entry and fop num being GF_FOP_NULL, don't * log in the changelog as it is of no use. And also if it is @@ -616,66 +636,81 @@ resolve_pargfid_to_path (xlator_t *this, uuid_t gfid, char **path, char *bname); * to same changelog will be missed. Hence check for boundary * condition. */ -#define CHANGELOG_OP_BOUNDARY_CHECK(frame, label) do { \ - if (frame->root->op <= GF_FOP_NULL || \ - frame->root->op >= GF_FOP_MAXVALUE) \ - goto label; \ - } while (0) +#define CHANGELOG_OP_BOUNDARY_CHECK(frame, label) \ + do { \ + if (frame->root->op <= GF_FOP_NULL || \ + frame->root->op >= GF_FOP_MAXVALUE) \ + goto label; \ + } while (0) /** * ignore internal fops for all clients except AFR self-heal daemon */ -#define CHANGELOG_IF_INTERNAL_FOP_THEN_GOTO(frame, dict, label) do { \ - if ((frame->root->pid != GF_CLIENT_PID_AFR_SELF_HEALD) \ - && dict \ - && dict_get (dict, GLUSTERFS_INTERNAL_FOP_KEY)) \ - goto label; \ - } while (0) - -#define CHANGELOG_COND_GOTO(priv, cond, label) do { \ - if (!priv->active || cond) \ - goto label; \ - } while (0) +#define CHANGELOG_IF_INTERNAL_FOP_THEN_GOTO(frame, dict, label) \ + do { \ + if ((frame->root->pid != GF_CLIENT_PID_SELF_HEALD) && dict && \ + dict_get(dict, GLUSTERFS_INTERNAL_FOP_KEY)) \ + goto label; \ + } while (0) + +#define CHANGELOG_COND_GOTO(priv, cond, label) \ + do { \ + if (!priv->active || cond) \ + goto label; \ + } while (0) /* Begin: Geo-Rep snapshot dependency changes */ -#define DICT_ERROR -1 -#define BARRIER_OFF 0 -#define BARRIER_ON 1 -#define DICT_DEFAULT 2 - -#define CHANGELOG_NOT_ON_THEN_GOTO(priv, ret, label) do { \ - if (!priv->active) { \ - gf_msg (this->name, GF_LOG_WARNING, 0, \ - CHANGELOG_MSG_NOT_ACTIVE, \ - "Changelog is not active, return success"); \ - ret = 0; \ - goto label; \ - } \ - } while (0) +#define DICT_ERROR -1 +#define BARRIER_OFF 0 +#define BARRIER_ON 1 +#define DICT_DEFAULT 2 + +#define CHANGELOG_NOT_ON_THEN_GOTO(priv, ret, label) \ + do { \ + if (!priv->active) { \ + gf_smsg(this->name, GF_LOG_WARNING, 0, \ + CHANGELOG_MSG_CHANGELOG_NOT_ACTIVE, NULL); \ + ret = 0; \ + goto label; \ + } \ + } while (0) /* Log pthread error and goto label */ -#define CHANGELOG_PTHREAD_ERROR_HANDLE_0(ret, label) do { \ - if (ret) { \ - gf_msg (this->name, GF_LOG_ERROR, \ - 0, CHANGELOG_MSG_PTHREAD_ERROR, \ - "pthread error: Error: %d", ret); \ - ret = -1; \ - goto label; \ - } \ - } while (0); +#define CHANGELOG_PTHREAD_ERROR_HANDLE_0(ret, label) \ + do { \ + if (ret) { \ + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_PTHREAD_ERROR, \ + "error=%d", ret, NULL); \ + ret = -1; \ + goto label; \ + } \ + } while (0); /* Log pthread error, set flag and goto label */ -#define CHANGELOG_PTHREAD_ERROR_HANDLE_1(ret, label, flag) do { \ - if (ret) { \ - gf_msg (this->name, GF_LOG_ERROR, 0, \ - CHANGELOG_MSG_PTHREAD_ERROR, \ - "pthread error: Error: %d", ret); \ - ret = -1; \ - flag = _gf_true; \ - goto label; \ - } \ - } while (0) +#define CHANGELOG_PTHREAD_ERROR_HANDLE_1(ret, label, flag) \ + do { \ + if (ret) { \ + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_PTHREAD_ERROR, \ + "error=%d", ret, NULL); \ + ret = -1; \ + flag = _gf_true; \ + goto label; \ + } \ + } while (0) + +/* Log pthread error, unlock mutex and goto label */ +#define CHANGELOG_PTHREAD_ERROR_HANDLE_2(ret, label, mutex) \ + do { \ + if (ret) { \ + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_PTHREAD_ERROR, \ + "error=%d", ret, NULL); \ + ret = -1; \ + pthread_mutex_unlock(&mutex); \ + goto label; \ + } \ + } while (0) + /* End: Geo-Rep snapshot dependency changes */ #endif /* _CHANGELOG_HELPERS_H */ diff --git a/xlators/features/changelog/src/changelog-mem-types.h b/xlators/features/changelog/src/changelog-mem-types.h index 1618f722f6c..a2d8a9cbe93 100644 --- a/xlators/features/changelog/src/changelog-mem-types.h +++ b/xlators/features/changelog/src/changelog-mem-types.h @@ -11,25 +11,24 @@ #ifndef _CHANGELOG_MEM_TYPES_H #define _CHANGELOG_MEM_TYPES_H -#include "mem-types.h" +#include <glusterfs/mem-types.h> enum gf_changelog_mem_types { - gf_changelog_mt_priv_t = gf_common_mt_end + 1, - gf_changelog_mt_str_t = gf_common_mt_end + 2, - gf_changelog_mt_batch_t = gf_common_mt_end + 3, - gf_changelog_mt_rt_t = gf_common_mt_end + 4, - gf_changelog_mt_inode_ctx_t = gf_common_mt_end + 5, - gf_changelog_mt_rpc_clnt_t = gf_common_mt_end + 6, - gf_changelog_mt_libgfchangelog_t = gf_common_mt_end + 7, - gf_changelog_mt_libgfchangelog_entry_t = gf_common_mt_end + 8, - gf_changelog_mt_libgfchangelog_rl_t = gf_common_mt_end + 9, - gf_changelog_mt_libgfchangelog_dirent_t = gf_common_mt_end + 10, - gf_changelog_mt_changelog_buffer_t = gf_common_mt_end + 11, - gf_changelog_mt_history_data_t = gf_common_mt_end + 12, - gf_changelog_mt_libgfchangelog_call_pool_t = gf_common_mt_end + 13, - gf_changelog_mt_libgfchangelog_event_t = gf_common_mt_end + 14, - gf_changelog_mt_ev_dispatcher_t = gf_common_mt_end + 15, - gf_changelog_mt_end + gf_changelog_mt_priv_t = gf_common_mt_end + 1, + gf_changelog_mt_str_t = gf_common_mt_end + 2, + gf_changelog_mt_batch_t = gf_common_mt_end + 3, + gf_changelog_mt_rt_t = gf_common_mt_end + 4, + gf_changelog_mt_inode_ctx_t = gf_common_mt_end + 5, + gf_changelog_mt_rpc_clnt_t = gf_common_mt_end + 6, + gf_changelog_mt_libgfchangelog_t = gf_common_mt_end + 7, + gf_changelog_mt_libgfchangelog_entry_t = gf_common_mt_end + 8, + gf_changelog_mt_libgfchangelog_rl_t = gf_common_mt_end + 9, + gf_changelog_mt_changelog_buffer_t = gf_common_mt_end + 10, + gf_changelog_mt_history_data_t = gf_common_mt_end + 11, + gf_changelog_mt_libgfchangelog_call_pool_t = gf_common_mt_end + 12, + gf_changelog_mt_libgfchangelog_event_t = gf_common_mt_end + 13, + gf_changelog_mt_ev_dispatcher_t = gf_common_mt_end + 14, + gf_changelog_mt_end }; #endif diff --git a/xlators/features/changelog/src/changelog-messages.h b/xlators/features/changelog/src/changelog-messages.h index e65a457b7c0..cb0e16c85d8 100644 --- a/xlators/features/changelog/src/changelog-messages.h +++ b/xlators/features/changelog/src/changelog-messages.h @@ -11,440 +11,162 @@ #ifndef _CHANGELOG_MESSAGES_H_ #define _CHANGELOG_MESSAGES_H_ -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - -#include "glfs-message-id.h" - -/*! \file changelog-messages.h - * \brief CHANGELOG log-message IDs and their descriptions. - */ - -/* NOTE: Rules for message additions - * 1) Each instance of a message is _better_ left with a unique message ID, even - * if the message format is the same. Reasoning is that, if the message - * format needs to change in one instance, the other instances are not - * impacted or the new change does not change the ID of the instance being - * modified. - * 2) Addition of a message, - * - Should increment the GLFS_NUM_MESSAGES - * - Append to the list of messages defined, towards the end - * - Retain macro naming as glfs_msg_X (for readability across developers) - * NOTE: Rules for message format modifications - * 3) Check acorss the code if the message ID macro in question is reused - * anywhere. If reused then then the modifications should ensure correctness - * everywhere, or needs a new message ID as (1) above was not adhered to. If - * not used anywhere, proceed with the required modification. - * NOTE: Rules for message deletion - * 4) Check (3) and if used anywhere else, then cannot be deleted. If not used - * anywhere, then can be deleted, but will leave a hole by design, as - * addition rules specify modification to the end of the list and not filling - * holes. - */ - -#define GLFS_COMP_BASE_CHANGELOG GLFS_MSGID_COMP_CHANGELOG -#define GLFS_NUM_MESSAGES 54 -#define GLFS_MSGID_END (GLFS_COMP_BASE_CHANGELOG + GLFS_NUM_MESSAGES + 1) - -#define glfs_msg_start_x GLFS_COMP_BASE_CHANGELOG, "Invalid: Start of messages" - -/*! - * @messageid - * @diagnosis open/opendir failed on a brick. - * @recommended action Error number in the log should give the reason why it - * failed. Also observe brick logs for more information. +#include <glusterfs/glfs-message-id.h> + +/* To add new message IDs, append new identifiers at the end of the list. + * + * Never remove a message ID. If it's not used anymore, you can rename it or + * leave it as it is, but not delete it. This is to prevent reutilization of + * IDs by other messages. + * + * The component name must match one of the entries defined in + * glfs-message-id.h. */ -#define CHANGELOG_MSG_OPEN_FAILED (GLFS_COMP_BASE_CHANGELOG + 1) - -/*! - * @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_NO_MEMORY (GLFS_COMP_BASE_CHANGELOG + 2) -/*! - * @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_VOL_MISCONFIGURED (GLFS_COMP_BASE_CHANGELOG + 3) - -/*! - * @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_RENAME_ERROR (GLFS_COMP_BASE_CHANGELOG + 4) - -/*! - * @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_READ_ERROR (GLFS_COMP_BASE_CHANGELOG + 5) - -/*! - * @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_HTIME_ERROR (GLFS_COMP_BASE_CHANGELOG + 6) - -/*! - * @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED (GLFS_COMP_BASE_CHANGELOG + 7) - -/*! - * @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_PTHREAD_COND_INIT_FAILED (GLFS_COMP_BASE_CHANGELOG + 8) - -/*! - * @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_CHILD_MISCONFIGURED (GLFS_COMP_BASE_CHANGELOG + 9) - -/*! - * @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_DIR_OPTIONS_NOT_SET (GLFS_COMP_BASE_CHANGELOG + 10) - -/*! - * @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_CLOSE_ERROR (GLFS_COMP_BASE_CHANGELOG + 11) - -/*! - * @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_PIPE_CREATION_ERROR (GLFS_COMP_BASE_CHANGELOG + 12) - -/*! - * @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_DICT_GET_FAILED (GLFS_COMP_BASE_CHANGELOG + 13) - -/*! - * @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_BARRIER_INFO (GLFS_COMP_BASE_CHANGELOG + 14) - -/*! - * @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_BARRIER_ERROR (GLFS_COMP_BASE_CHANGELOG + 15) - -/*! - * @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_GET_TIME_OP_FAILED (GLFS_COMP_BASE_CHANGELOG + 16) - -/*! - * @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_WRITE_FAILED (GLFS_COMP_BASE_CHANGELOG + 17) - -/*! - * @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_PTHREAD_ERROR (GLFS_COMP_BASE_CHANGELOG + 18) - -/*! - * @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_INODE_NOT_FOUND (GLFS_COMP_BASE_CHANGELOG + 19) - -/*! - * @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_FSYNC_OP_FAILED (GLFS_COMP_BASE_CHANGELOG + 20) - -/*! - * @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_TOTAL_LOG_INFO (GLFS_COMP_BASE_CHANGELOG + 21) - -/*! - * @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_SNAP_INFO (GLFS_COMP_BASE_CHANGELOG + 22) - -/*! - * @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_SELECT_FAILED (GLFS_COMP_BASE_CHANGELOG + 23) - -/*! - * @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_FCNTL_FAILED (GLFS_COMP_BASE_CHANGELOG + 24) - -/*! - * @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_BNOTIFY_INFO (GLFS_COMP_BASE_CHANGELOG + 25) - -/*! - * @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_ENTRY_BUF_INFO (GLFS_COMP_BASE_CHANGELOG + 26) - -/*! - * @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_NOT_ACTIVE (GLFS_COMP_BASE_CHANGELOG + 27) - -/*! - @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_LOCAL_INIT_FAILED (GLFS_COMP_BASE_CHANGELOG + 28) - -/*! - @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_NOTIFY_REGISTER_FAILED (GLFS_COMP_BASE_CHANGELOG + 28) - -/*! - @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_PROGRAM_NAME_REG_FAILED (GLFS_COMP_BASE_CHANGELOG + 29) - -/*! - @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_HANDLE_PROBE_ERROR (GLFS_COMP_BASE_CHANGELOG + 30) - -/*! - @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_SET_FD_CONTEXT (GLFS_COMP_BASE_CHANGELOG + 31) - -/*! - @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_FREEUP_FAILED (GLFS_COMP_BASE_CHANGELOG + 32) - -/*! - @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_HTIME_INFO (GLFS_COMP_BASE_CHANGELOG + 33) - -/*! - @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_RPC_SUBMIT_REPLY_FAILED (GLFS_COMP_BASE_CHANGELOG + 34) - -/*! - @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_RPC_BUILD_ERROR (GLFS_COMP_BASE_CHANGELOG + 35) - -/*! - @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_RPC_CONNECT_ERROR (GLFS_COMP_BASE_CHANGELOG + 36) - -/*! - @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_RPC_START_ERROR (GLFS_COMP_BASE_CHANGELOG + 37) - -/*! - @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_BUFFER_STARVATION_ERROR (GLFS_COMP_BASE_CHANGELOG + 3) - -/*! - @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_SCAN_DIR_FAILED (GLFS_COMP_BASE_CHANGELOG + 39) - -/*! - @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_FSETXATTR_FAILED (GLFS_COMP_BASE_CHANGELOG + 40) - -/*! - @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_FGETXATTR_FAILED (GLFS_COMP_BASE_CHANGELOG + 41) - -/*! - @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_CLEANUP_ON_ACTIVE_REF \ - (GLFS_COMP_BASE_CHANGELOG + 42) - -/*! - @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_DISPATCH_EVENT_FAILED (GLFS_COMP_BASE_CHANGELOG + 43) - -/*! - @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_PUT_BUFFER_FAILED (GLFS_COMP_BASE_CHANGELOG + 44) - -/*! - * @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_PTHREAD_COND_WAIT_FAILED (GLFS_COMP_BASE_CHANGELOG + 45) - -/*! - @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_PTHREAD_CANCEL_FAILED (GLFS_COMP_BASE_CHANGELOG + 46) - -/*! - @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_INJECT_FSYNC_FAILED (GLFS_COMP_BASE_CHANGELOG + 47) - -/*! - @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_CREATE_FRAME_FAILED (GLFS_COMP_BASE_CHANGELOG + 48) - -/*! - @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_FSTAT_OP_FAILED (GLFS_COMP_BASE_CHANGELOG + 49) - -/*! - @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_LSEEK_OP_FAILED (GLFS_COMP_BASE_CHANGELOG + 50) - -/*! - @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_STRSTR_OP_FAILED (GLFS_COMP_BASE_CHANGELOG + 51) - -/*! - @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_UNLINK_OP_FAILED (GLFS_COMP_BASE_CHANGELOG + 52) - -/*! - @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_DETECT_EMPTY_CHANGELOG_FAILED \ - (GLFS_COMP_BASE_CHANGELOG + 53) - -/*! - @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_READLINK_OP_FAILED (GLFS_COMP_BASE_CHANGELOG + 54) - -/*! - @messageid - * @diagnosis - * @recommended action -*/ -#define CHANGELOG_MSG_EXPLICIT_ROLLOVER_FAILED (GLFS_COMP_BASE_CHANGELOG + 55) - - -#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages" +GLFS_MSGID( + CHANGELOG, CHANGELOG_MSG_OPEN_FAILED, CHANGELOG_MSG_BARRIER_FOP_FAILED, + CHANGELOG_MSG_VOL_MISCONFIGURED, CHANGELOG_MSG_RENAME_ERROR, + CHANGELOG_MSG_READ_ERROR, CHANGELOG_MSG_HTIME_ERROR, + CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED, + CHANGELOG_MSG_PTHREAD_COND_INIT_FAILED, CHANGELOG_MSG_CHILD_MISCONFIGURED, + CHANGELOG_MSG_DIR_OPTIONS_NOT_SET, CHANGELOG_MSG_CLOSE_ERROR, + CHANGELOG_MSG_PIPE_CREATION_ERROR, CHANGELOG_MSG_DICT_GET_FAILED, + CHANGELOG_MSG_BARRIER_INFO, CHANGELOG_MSG_BARRIER_ERROR, + CHANGELOG_MSG_GET_TIME_OP_FAILED, CHANGELOG_MSG_WRITE_FAILED, + CHANGELOG_MSG_PTHREAD_ERROR, CHANGELOG_MSG_INODE_NOT_FOUND, + CHANGELOG_MSG_FSYNC_OP_FAILED, CHANGELOG_MSG_TOTAL_LOG_INFO, + CHANGELOG_MSG_SNAP_INFO, CHANGELOG_MSG_SELECT_FAILED, + CHANGELOG_MSG_FCNTL_FAILED, CHANGELOG_MSG_BNOTIFY_INFO, + CHANGELOG_MSG_ENTRY_BUF_INFO, CHANGELOG_MSG_CHANGELOG_NOT_ACTIVE, + CHANGELOG_MSG_LOCAL_INIT_FAILED, CHANGELOG_MSG_NOTIFY_REGISTER_FAILED, + CHANGELOG_MSG_PROGRAM_NAME_REG_FAILED, CHANGELOG_MSG_HANDLE_PROBE_ERROR, + CHANGELOG_MSG_SET_FD_CONTEXT, CHANGELOG_MSG_FREEUP_FAILED, + CHANGELOG_MSG_RECONFIGURE, CHANGELOG_MSG_RPC_SUBMIT_REPLY_FAILED, + CHANGELOG_MSG_RPC_BUILD_ERROR, CHANGELOG_MSG_RPC_CONNECT_ERROR, + CHANGELOG_MSG_RPC_START_ERROR, CHANGELOG_MSG_BUFFER_STARVATION_ERROR, + CHANGELOG_MSG_SCAN_DIR_FAILED, CHANGELOG_MSG_FSETXATTR_FAILED, + CHANGELOG_MSG_FGETXATTR_FAILED, CHANGELOG_MSG_CLEANUP_ON_ACTIVE_REF, + CHANGELOG_MSG_DISPATCH_EVENT_FAILED, CHANGELOG_MSG_PUT_BUFFER_FAILED, + CHANGELOG_MSG_PTHREAD_COND_WAIT_FAILED, CHANGELOG_MSG_PTHREAD_CANCEL_FAILED, + CHANGELOG_MSG_INJECT_FSYNC_FAILED, CHANGELOG_MSG_CREATE_FRAME_FAILED, + CHANGELOG_MSG_FSTAT_OP_FAILED, CHANGELOG_MSG_LSEEK_OP_FAILED, + CHANGELOG_MSG_STRSTR_OP_FAILED, CHANGELOG_MSG_UNLINK_OP_FAILED, + CHANGELOG_MSG_DETECT_EMPTY_CHANGELOG_FAILED, + CHANGELOG_MSG_READLINK_OP_FAILED, CHANGELOG_MSG_EXPLICIT_ROLLOVER_FAILED, + CHANGELOG_MSG_RPCSVC_NOTIFY_FAILED, CHANGELOG_MSG_MEMORY_INIT_FAILED, + CHANGELOG_MSG_NO_MEMORY, CHANGELOG_MSG_HTIME_STAT_ERROR, + CHANGELOG_MSG_HTIME_CURRENT_ERROR, CHANGELOG_MSG_BNOTIFY_COND_INFO, + CHANGELOG_MSG_NO_HTIME_CURRENT, CHANGELOG_MSG_HTIME_CURRENT, + CHANGELOG_MSG_NEW_HTIME_FILE, CHANGELOG_MSG_MKDIR_ERROR, + CHANGELOG_MSG_PATH_NOT_FOUND, CHANGELOG_MSG_XATTR_INIT_FAILED, + CHANGELOG_MSG_WROTE_TO_CSNAP, CHANGELOG_MSG_UNUSED_0, + CHANGELOG_MSG_GET_BUFFER_FAILED, CHANGELOG_MSG_BARRIER_STATE_NOTIFY, + CHANGELOG_MSG_BARRIER_DISABLED, CHANGELOG_MSG_BARRIER_ALREADY_DISABLED, + CHANGELOG_MSG_BARRIER_ON_ERROR, CHANGELOG_MSG_BARRIER_ENABLE, + CHANGELOG_MSG_BARRIER_KEY_NOT_FOUND, CHANGELOG_MSG_ERROR_IN_DICT_GET, + CHANGELOG_MSG_UNUSED_1, CHANGELOG_MSG_UNUSED_2, + CHANGELOG_MSG_DEQUEUING_BARRIER_FOPS, + CHANGELOG_MSG_DEQUEUING_BARRIER_FOPS_FINISHED, + CHANGELOG_MSG_BARRIER_TIMEOUT, CHANGELOG_MSG_TIMEOUT_ADD_FAILED, + CHANGELOG_MSG_CLEANUP_ALREADY_SET); + +#define CHANGELOG_MSG_BARRIER_FOP_FAILED_STR \ + "failed to barrier FOPs, disabling changelog barrier" +#define CHANGELOG_MSG_MEMORY_INIT_FAILED_STR "memory accounting init failed" +#define CHANGELOG_MSG_NO_MEMORY_STR "failed to create local memory pool" +#define CHANGELOG_MSG_ENTRY_BUF_INFO_STR \ + "Entry cannot be captured for gfid, Capturing DATA entry." +#define CHANGELOG_MSG_PTHREAD_ERROR_STR "pthread error" +#define CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED_STR "pthread_mutex_init failed" +#define CHANGELOG_MSG_PTHREAD_COND_INIT_FAILED_STR "pthread_cond_init failed" +#define CHANGELOG_MSG_HTIME_ERROR_STR "failed to update HTIME file" +#define CHANGELOG_MSG_HTIME_STAT_ERROR_STR "unable to stat htime file" +#define CHANGELOG_MSG_HTIME_CURRENT_ERROR_STR "Error extracting HTIME_CURRENT." +#define CHANGELOG_MSG_UNLINK_OP_FAILED_STR "error unlinking empty changelog" +#define CHANGELOG_MSG_RENAME_ERROR_STR "error renaming" +#define CHANGELOG_MSG_MKDIR_ERROR_STR "unable to create directory" +#define CHANGELOG_MSG_BNOTIFY_INFO_STR \ + "Explicit rollover changelog signaling bnotify" +#define CHANGELOG_MSG_BNOTIFY_COND_INFO_STR "Woke up: bnotify conditional wait" +#define CHANGELOG_MSG_RECONFIGURE_STR "Reconfigure: Changelog Enable" +#define CHANGELOG_MSG_NO_HTIME_CURRENT_STR \ + "HTIME_CURRENT not found. Changelog enabled before init" +#define CHANGELOG_MSG_HTIME_CURRENT_STR "HTIME_CURRENT" +#define CHANGELOG_MSG_NEW_HTIME_FILE_STR \ + "Changelog enable: Creating new HTIME file" +#define CHANGELOG_MSG_FGETXATTR_FAILED_STR "fgetxattr failed" +#define CHANGELOG_MSG_TOTAL_LOG_INFO_STR "changelog info" +#define CHANGELOG_MSG_PTHREAD_COND_WAIT_FAILED_STR "pthread cond wait failed" +#define CHANGELOG_MSG_INODE_NOT_FOUND_STR "inode not found" +#define CHANGELOG_MSG_READLINK_OP_FAILED_STR \ + "could not read the link from the gfid handle" +#define CHANGELOG_MSG_OPEN_FAILED_STR "unable to open file" +#define CHANGELOG_MSG_RPC_CONNECT_ERROR_STR "failed to connect back" +#define CHANGELOG_MSG_BUFFER_STARVATION_ERROR_STR \ + "Failed to get buffer for RPC dispatch" +#define CHANGELOG_MSG_PTHREAD_CANCEL_FAILED_STR "could not cancel thread" +#define CHANGELOG_MSG_FSTAT_OP_FAILED_STR "Could not stat (CHANGELOG)" +#define CHANGELOG_MSG_LSEEK_OP_FAILED_STR "Could not lseek (changelog)" +#define CHANGELOG_MSG_PATH_NOT_FOUND_STR \ + "Could not find CHANGELOG in changelog path" +#define CHANGELOG_MSG_FSYNC_OP_FAILED_STR "fsync failed" +#define CHANGELOG_MSG_DETECT_EMPTY_CHANGELOG_FAILED_STR \ + "Error detecting empty changelog" +#define CHANGELOG_MSG_EXPLICIT_ROLLOVER_FAILED_STR \ + "Fail snapshot because of previous errors" +#define CHANGELOG_MSG_SCAN_DIR_FAILED_STR "scandir failed" +#define CHANGELOG_MSG_FSETXATTR_FAILED_STR "fsetxattr failed" +#define CHANGELOG_MSG_XATTR_INIT_FAILED_STR "Htime xattr initialization failed" +#define CHANGELOG_MSG_SNAP_INFO_STR "log in call path" +#define CHANGELOG_MSG_WRITE_FAILED_STR "error writing to disk" +#define CHANGELOG_MSG_WROTE_TO_CSNAP_STR "Successfully wrote to csnap" +#define CHANGELOG_MSG_GET_TIME_OP_FAILED_STR "Problem rolling over changelog(s)" +#define CHANGELOG_MSG_BARRIER_INFO_STR "Explicit wakeup on barrier notify" +#define CHANGELOG_MSG_SELECT_FAILED_STR "pthread_cond_timedwait failed" +#define CHANGELOG_MSG_INJECT_FSYNC_FAILED_STR "failed to inject fsync event" +#define CHANGELOG_MSG_LOCAL_INIT_FAILED_STR \ + "changelog local initialization failed" +#define CHANGELOG_MSG_GET_BUFFER_FAILED_STR "Failed to get buffer" +#define CHANGELOG_MSG_SET_FD_CONTEXT_STR \ + "could not set fd context(for release cbk)" +#define CHANGELOG_MSG_DICT_GET_FAILED_STR "Barrier failed" +#define CHANGELOG_MSG_BARRIER_STATE_NOTIFY_STR "Barrier notification" +#define CHANGELOG_MSG_BARRIER_ERROR_STR \ + "Received another barrier off notification while already off" +#define CHANGELOG_MSG_BARRIER_DISABLED_STR "disabled changelog barrier" +#define CHANGELOG_MSG_BARRIER_ALREADY_DISABLED_STR \ + "Changelog barrier already disabled" +#define CHANGELOG_MSG_BARRIER_ON_ERROR_STR \ + "Received another barrier on notification when last one is not served yet" +#define CHANGELOG_MSG_BARRIER_ENABLE_STR "Enabled changelog barrier" +#define CHANGELOG_MSG_BARRIER_KEY_NOT_FOUND_STR "barrier key not found" +#define CHANGELOG_MSG_ERROR_IN_DICT_GET_STR \ + "Something went wrong in dict_get_str_boolean" +#define CHANGELOG_MSG_DIR_OPTIONS_NOT_SET_STR "changelog-dir option is not set" +#define CHANGELOG_MSG_FREEUP_FAILED_STR "could not cleanup bootstrapper" +#define CHANGELOG_MSG_CHILD_MISCONFIGURED_STR \ + "translator needs a single subvolume" +#define CHANGELOG_MSG_VOL_MISCONFIGURED_STR \ + "dangling volume. please check volfile" +#define CHANGELOG_MSG_DEQUEUING_BARRIER_FOPS_STR \ + "Dequeuing all the changelog barriered fops" +#define CHANGELOG_MSG_DEQUEUING_BARRIER_FOPS_FINISHED_STR \ + "Dequeuing changelog barriered fops is finished" +#define CHANGELOG_MSG_BARRIER_TIMEOUT_STR \ + "Disabling changelog barrier because of the timeout" +#define CHANGELOG_MSG_TIMEOUT_ADD_FAILED_STR \ + "Couldn't add changelog barrier timeout event" +#define CHANGELOG_MSG_RPC_BUILD_ERROR_STR "failed to build rpc options" +#define CHANGELOG_MSG_NOTIFY_REGISTER_FAILED_STR "failed to register notify" +#define CHANGELOG_MSG_RPC_START_ERROR_STR "failed to start rpc" +#define CHANGELOG_MSG_CREATE_FRAME_FAILED_STR "failed to create frame" +#define CHANGELOG_MSG_RPC_SUBMIT_REPLY_FAILED_STR "failed to serialize reply" +#define CHANGELOG_MSG_PROGRAM_NAME_REG_FAILED_STR "cannot register program" +#define CHANGELOG_MSG_CHANGELOG_NOT_ACTIVE_STR \ + "Changelog is not active, return success" +#define CHANGELOG_MSG_PUT_BUFFER_FAILED_STR \ + "failed to put buffer after consumption" +#define CHANGELOG_MSG_CLEANUP_ALREADY_SET_STR \ + "cleanup_starting flag is already set for xl" +#define CHANGELOG_MSG_HANDLE_PROBE_ERROR_STR "xdr decoding error" #endif /* !_CHANGELOG_MESSAGES_H_ */ diff --git a/xlators/features/changelog/src/changelog-misc.h b/xlators/features/changelog/src/changelog-misc.h index 778f79c82c5..e2addc09414 100644 --- a/xlators/features/changelog/src/changelog-misc.h +++ b/xlators/features/changelog/src/changelog-misc.h @@ -11,10 +11,10 @@ #ifndef _CHANGELOG_MISC_H #define _CHANGELOG_MISC_H -#include "glusterfs.h" -#include "common-utils.h" +#include <glusterfs/glusterfs.h> +#include <glusterfs/common-utils.h> -#define CHANGELOG_MAX_TYPE 3 +#define CHANGELOG_MAX_TYPE 4 #define CHANGELOG_FILE_NAME "CHANGELOG" #define HTIME_FILE_NAME "HTIME" #define CSNAP_FILE_NAME "CHANGELOG.SNAP" @@ -22,110 +22,110 @@ #define HTIME_CURRENT "trusted.glusterfs.current_htime" #define HTIME_INITIAL_VALUE "0:0" -#define CHANGELOG_VERSION_MAJOR 1 -#define CHANGELOG_VERSION_MINOR 2 +#define CHANGELOG_VERSION_MAJOR 1 +#define CHANGELOG_VERSION_MINOR 2 -#define CHANGELOG_UNIX_SOCK DEFAULT_VAR_RUN_DIRECTORY"/changelog-%s.sock" -#define CHANGELOG_TMP_UNIX_SOCK DEFAULT_VAR_RUN_DIRECTORY"/.%s%lu.sock" +#define CHANGELOG_UNIX_SOCK DEFAULT_VAR_RUN_DIRECTORY "/changelog-%s.sock" +#define CHANGELOG_TMP_UNIX_SOCK DEFAULT_VAR_RUN_DIRECTORY "/.%s%lu.sock" /** * header starts with the version and the format of the changelog. * 'version' not much of a use now. */ -#define CHANGELOG_HEADER \ - "GlusterFS Changelog | version: v%d.%d | encoding : %d\n" - -#define CHANGELOG_MAKE_SOCKET_PATH(brick_path, sockpath, len) do { \ - char md5_sum[MD5_DIGEST_LENGTH*2+1] = {0,}; \ - md5_wrapper((unsigned char *) brick_path, \ - strlen(brick_path), \ - md5_sum); \ - (void) snprintf (sockpath, len, \ - CHANGELOG_UNIX_SOCK, md5_sum); \ - } while (0) - -#define CHANGELOG_MAKE_TMP_SOCKET_PATH(brick_path, sockpath, len) do { \ - unsigned long pid = 0; \ - char md5_sum[MD5_DIGEST_LENGTH*2+1] = {0,}; \ - pid = (unsigned long) getpid (); \ - md5_wrapper((unsigned char *) brick_path, \ - strlen(brick_path), \ - md5_sum); \ - (void) snprintf (sockpath, \ - len, CHANGELOG_TMP_UNIX_SOCK, \ - md5_sum, pid); \ - } while (0) - +#define CHANGELOG_HEADER \ + "GlusterFS Changelog | version: v%d.%d | encoding : %d\n" + +#define CHANGELOG_MAKE_SOCKET_PATH(brick_path, sockpath, len) \ + do { \ + char xxh64[GF_XXH64_DIGEST_LENGTH * 2 + 1] = { \ + 0, \ + }; \ + gf_xxh64_wrapper((unsigned char *)brick_path, strlen(brick_path), \ + GF_XXHSUM64_DEFAULT_SEED, xxh64); \ + (void)snprintf(sockpath, len, CHANGELOG_UNIX_SOCK, xxh64); \ + } while (0) + +#define CHANGELOG_MAKE_TMP_SOCKET_PATH(brick_path, sockpath, len) \ + do { \ + unsigned long pid = 0; \ + char xxh64[GF_XXH64_DIGEST_LENGTH * 2 + 1] = { \ + 0, \ + }; \ + pid = (unsigned long)getpid(); \ + gf_xxh64_wrapper((unsigned char *)brick_path, strlen(brick_path), \ + GF_XXHSUM64_DEFAULT_SEED, xxh64); \ + (void)snprintf(sockpath, len, CHANGELOG_TMP_UNIX_SOCK, xxh64, pid); \ + } while (0) /** * ... used by libgfchangelog. */ -#define CHANGELOG_GET_HEADER_INFO(fd, buffer, len, enc, maj, min, elen) do { \ - FILE *fp; \ - int fd_dup; \ - \ - enc = -1; \ - maj = -1; \ - min = -1; \ - fd_dup = dup (fd); \ - \ - if (fd_dup != -1) { \ - fp = fdopen (fd_dup, "r"); \ - if (fp) { \ - if (fgets (buffer, len, fp)) { \ - elen = strlen (buffer); \ - sscanf (buffer, \ - CHANGELOG_HEADER, \ - &maj, &min, &enc); \ - } \ - fclose (fp); \ - } else { \ - sys_close (fd_dup); \ - } \ - } \ - } while (0) - -#define CHANGELOG_FILL_HTIME_DIR(changelog_dir, path) do { \ - strncpy (path, changelog_dir, sizeof (path) - 1); \ - strcat (path, "/htime"); \ - } while(0) - -#define CHANGELOG_FILL_CSNAP_DIR(changelog_dir, path) do { \ - strncpy (path, changelog_dir, sizeof (path) - 1); \ - strcat (path, "/csnap"); \ - } while(0) +#define CHANGELOG_GET_HEADER_INFO(fd, buffer, len, enc, maj, min, elen) \ + do { \ + FILE *fp; \ + int fd_dup; \ + \ + enc = -1; \ + maj = -1; \ + min = -1; \ + fd_dup = dup(fd); \ + \ + if (fd_dup != -1) { \ + fp = fdopen(fd_dup, "r"); \ + if (fp) { \ + if (fgets(buffer, len, fp)) { \ + elen = strlen(buffer); \ + sscanf(buffer, CHANGELOG_HEADER, &maj, &min, &enc); \ + } \ + fclose(fp); \ + } else { \ + sys_close(fd_dup); \ + } \ + } \ + } while (0) + +#define CHANGELOG_FILL_HTIME_DIR(changelog_dir, path) \ + do { \ + snprintf(path, sizeof(path), "%s/htime", changelog_dir); \ + } while (0) + +#define CHANGELOG_FILL_CSNAP_DIR(changelog_dir, path) \ + do { \ + snprintf(path, sizeof(path), "%s/csnap", changelog_dir); \ + } while (0) /** - * everything after 'CHANGELOG_TYPE_ENTRY' are internal types + * everything after 'CHANGELOG_TYPE_METADATA_XATTR' are internal types * (ie. none of the fops trigger this type of event), hence - * CHANGELOG_MAX_TYPE = 3 + * CHANGELOG_MAX_TYPE = 4 */ typedef enum { - CHANGELOG_TYPE_DATA = 0, - CHANGELOG_TYPE_METADATA, - CHANGELOG_TYPE_ENTRY, - CHANGELOG_TYPE_ROLLOVER, - CHANGELOG_TYPE_FSYNC, + CHANGELOG_TYPE_DATA = 0, + CHANGELOG_TYPE_METADATA, + CHANGELOG_TYPE_ENTRY, + CHANGELOG_TYPE_METADATA_XATTR, + CHANGELOG_TYPE_ROLLOVER, + CHANGELOG_TYPE_FSYNC, } changelog_log_type; /* operation modes - RT for now */ typedef enum { - CHANGELOG_MODE_RT = 0, + CHANGELOG_MODE_RT = 0, } changelog_mode_t; /* encoder types */ typedef enum { - CHANGELOG_ENCODE_MIN = 0, - CHANGELOG_ENCODE_BINARY, - CHANGELOG_ENCODE_ASCII, - CHANGELOG_ENCODE_MAX, + CHANGELOG_ENCODE_MIN = 0, + CHANGELOG_ENCODE_BINARY, + CHANGELOG_ENCODE_ASCII, + CHANGELOG_ENCODE_MAX, } changelog_encoder_t; -#define CHANGELOG_VALID_ENCODING(enc) \ - (enc > CHANGELOG_ENCODE_MIN && enc < CHANGELOG_ENCODE_MAX) +#define CHANGELOG_VALID_ENCODING(enc) \ + (enc > CHANGELOG_ENCODE_MIN && enc < CHANGELOG_ENCODE_MAX) -#define CHANGELOG_TYPE_IS_ENTRY(type) (type == CHANGELOG_TYPE_ENTRY) -#define CHANGELOG_TYPE_IS_ROLLOVER(type) (type == CHANGELOG_TYPE_ROLLOVER) -#define CHANGELOG_TYPE_IS_FSYNC(type) (type == CHANGELOG_TYPE_FSYNC) +#define CHANGELOG_TYPE_IS_ENTRY(type) (type == CHANGELOG_TYPE_ENTRY) +#define CHANGELOG_TYPE_IS_ROLLOVER(type) (type == CHANGELOG_TYPE_ROLLOVER) +#define CHANGELOG_TYPE_IS_FSYNC(type) (type == CHANGELOG_TYPE_FSYNC) #endif /* _CHANGELOG_MISC_H */ diff --git a/xlators/features/changelog/src/changelog-rpc-common.c b/xlators/features/changelog/src/changelog-rpc-common.c index 4525923d34d..125246a17e1 100644 --- a/xlators/features/changelog/src/changelog-rpc-common.c +++ b/xlators/features/changelog/src/changelog-rpc-common.c @@ -11,7 +11,7 @@ #include "changelog-rpc-common.h" #include "changelog-messages.h" -#include "syscall.h" +#include <glusterfs/syscall.h> /** ***************************************************** Client Interface @@ -24,65 +24,63 @@ */ void * -changelog_rpc_poller (void *arg) +changelog_rpc_poller(void *arg) { - xlator_t *this = arg; + xlator_t *this = arg; - (void) event_dispatch (this->ctx->event_pool); - return NULL; + (void)gf_event_dispatch(this->ctx->event_pool); + return NULL; } struct rpc_clnt * -changelog_rpc_client_init (xlator_t *this, void *cbkdata, - char *sockfile, rpc_clnt_notify_t fn) +changelog_rpc_client_init(xlator_t *this, void *cbkdata, char *sockfile, + rpc_clnt_notify_t fn) { - int ret = 0; - struct rpc_clnt *rpc = NULL; - dict_t *options = NULL; - - if (!cbkdata) - cbkdata = this; - - options = dict_new (); - if (!options) - goto error_return; - - ret = rpc_transport_unix_options_build (&options, sockfile, 0); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_RPC_BUILD_ERROR, - "failed to build rpc options"); - goto dealloc_dict; - } - - rpc = rpc_clnt_new (options, this, this->name, 16); - if (!rpc) - goto dealloc_dict; - - ret = rpc_clnt_register_notify (rpc, fn, cbkdata); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_NOTIFY_REGISTER_FAILED, - "failed to register notify"); - goto dealloc_rpc_clnt; - } - - ret = rpc_clnt_start (rpc); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_RPC_START_ERROR, - "failed to start rpc"); - goto dealloc_rpc_clnt; - } - - return rpc; - - dealloc_rpc_clnt: - rpc_clnt_unref (rpc); - dealloc_dict: - dict_unref (options); - error_return: - return NULL; + int ret = 0; + struct rpc_clnt *rpc = NULL; + dict_t *options = NULL; + + if (!cbkdata) + cbkdata = this; + + options = dict_new(); + if (!options) + goto error_return; + + ret = rpc_transport_unix_options_build(options, sockfile, 0); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_RPC_BUILD_ERROR, + NULL); + goto dealloc_dict; + } + + rpc = rpc_clnt_new(options, this, this->name, 16); + if (!rpc) + goto dealloc_dict; + + ret = rpc_clnt_register_notify(rpc, fn, cbkdata); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, + CHANGELOG_MSG_NOTIFY_REGISTER_FAILED, NULL); + goto dealloc_rpc_clnt; + } + + ret = rpc_clnt_start(rpc); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_RPC_START_ERROR, + NULL); + goto dealloc_rpc_clnt; + } + + dict_unref(options); + return rpc; + +dealloc_rpc_clnt: + rpc_clnt_unref(rpc); +dealloc_dict: + dict_unref(options); +error_return: + return NULL; } /** @@ -90,96 +88,96 @@ changelog_rpc_client_init (xlator_t *this, void *cbkdata, * RPC server. */ int -changelog_rpc_sumbit_req (struct rpc_clnt *rpc, void *req, - call_frame_t *frame, rpc_clnt_prog_t *prog, - int procnum, struct iovec *payload, int payloadcnt, - struct iobref *iobref, xlator_t *this, - fop_cbk_fn_t cbkfn, xdrproc_t xdrproc) +changelog_rpc_sumbit_req(struct rpc_clnt *rpc, void *req, call_frame_t *frame, + rpc_clnt_prog_t *prog, int procnum, + struct iovec *payload, int payloadcnt, + struct iobref *iobref, xlator_t *this, + fop_cbk_fn_t cbkfn, xdrproc_t xdrproc) { - int ret = 0; - int count = 0; - struct iovec iov = {0, }; - struct iobuf *iobuf = NULL; - char new_iobref = 0; - ssize_t xdr_size = 0; + int ret = 0; + int count = 0; + struct iovec iov = { + 0, + }; + struct iobuf *iobuf = NULL; + char new_iobref = 0; + ssize_t xdr_size = 0; - GF_ASSERT (this); + GF_ASSERT(this); - if (req) { - xdr_size = xdr_sizeof (xdrproc, req); + if (req) { + xdr_size = xdr_sizeof(xdrproc, req); - iobuf = iobuf_get2 (this->ctx->iobuf_pool, xdr_size); - if (!iobuf) { - goto out; - }; + iobuf = iobuf_get2(this->ctx->iobuf_pool, xdr_size); + if (!iobuf) { + goto out; + }; - if (!iobref) { - iobref = iobref_new (); - if (!iobref) { - goto out; - } - - new_iobref = 1; - } + if (!iobref) { + iobref = iobref_new(); + if (!iobref) { + goto out; + } - iobref_add (iobref, iobuf); + new_iobref = 1; + } - iov.iov_base = iobuf->ptr; - iov.iov_len = iobuf_size (iobuf); + iobref_add(iobref, iobuf); - /* Create the xdr payload */ - ret = xdr_serialize_generic (iov, req, xdrproc); - if (ret == -1) { - goto out; - } + iov.iov_base = iobuf->ptr; + iov.iov_len = iobuf_size(iobuf); - iov.iov_len = ret; - count = 1; + /* Create the xdr payload */ + ret = xdr_serialize_generic(iov, req, xdrproc); + if (ret == -1) { + goto out; } - ret = rpc_clnt_submit (rpc, prog, procnum, cbkfn, &iov, count, - payload, payloadcnt, iobref, frame, NULL, - 0, NULL, 0, NULL); + iov.iov_len = ret; + count = 1; + } - out: - if (new_iobref) - iobref_unref (iobref); - if (iobuf) - iobuf_unref (iobuf); - return ret; + ret = rpc_clnt_submit(rpc, prog, procnum, cbkfn, &iov, count, payload, + payloadcnt, iobref, frame, NULL, 0, NULL, 0, NULL); + +out: + if (new_iobref) + iobref_unref(iobref); + if (iobuf) + iobuf_unref(iobuf); + return ret; } /** * Entry point to perform a remote procedure call */ int -changelog_invoke_rpc (xlator_t *this, struct rpc_clnt *rpc, - rpc_clnt_prog_t *prog, int procidx, void *arg) +changelog_invoke_rpc(xlator_t *this, struct rpc_clnt *rpc, + rpc_clnt_prog_t *prog, int procidx, void *arg) { - int ret = 0; - call_frame_t *frame = NULL; - rpc_clnt_procedure_t *proc = NULL; - - if (!this || !prog) - goto error_return; - - frame = create_frame (this, this->ctx->pool); - if (!frame) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_CREATE_FRAME_FAILED, - "failed to create frame"); - goto error_return; - } + int ret = 0; + call_frame_t *frame = NULL; + rpc_clnt_procedure_t *proc = NULL; - proc = &prog->proctable[procidx]; - if (proc->fn) - ret = proc->fn (frame, this, arg); + if (!this || !prog) + goto error_return; - STACK_DESTROY (frame->root); - return ret; + frame = create_frame(this, this->ctx->pool); + if (!frame) { + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_CREATE_FRAME_FAILED, + NULL); + goto error_return; + } - error_return: - return -1; + proc = &prog->proctable[procidx]; + if (proc->fn) + ret = proc->fn(frame, this, arg); + + STACK_DESTROY(frame->root); + return ret; + +error_return: + return -1; } /** @@ -189,161 +187,173 @@ changelog_invoke_rpc (xlator_t *this, struct rpc_clnt *rpc, */ struct iobuf * -__changelog_rpc_serialize_reply (rpcsvc_request_t *req, void *arg, - struct iovec *outmsg, xdrproc_t xdrproc) +__changelog_rpc_serialize_reply(rpcsvc_request_t *req, void *arg, + struct iovec *outmsg, xdrproc_t xdrproc) { - struct iobuf *iob = NULL; - ssize_t retlen = 0; - ssize_t rsp_size = 0; + struct iobuf *iob = NULL; + ssize_t retlen = 0; + ssize_t rsp_size = 0; - rsp_size = xdr_sizeof (xdrproc, arg); - iob = iobuf_get2 (req->svc->ctx->iobuf_pool, rsp_size); - if (!iob) - goto error_return; + rsp_size = xdr_sizeof(xdrproc, arg); + iob = iobuf_get2(req->svc->ctx->iobuf_pool, rsp_size); + if (!iob) + goto error_return; - iobuf_to_iovec (iob, outmsg); + iobuf_to_iovec(iob, outmsg); - retlen = xdr_serialize_generic (*outmsg, arg, xdrproc); - if (retlen == -1) - goto unref_iob; + retlen = xdr_serialize_generic(*outmsg, arg, xdrproc); + if (retlen == -1) + goto unref_iob; - outmsg->iov_len = retlen; - return iob; + outmsg->iov_len = retlen; + return iob; - unref_iob: - iobuf_unref (iob); - error_return: - return NULL; +unref_iob: + iobuf_unref(iob); +error_return: + return NULL; } int -changelog_rpc_sumbit_reply (rpcsvc_request_t *req, - void *arg, struct iovec *payload, int payloadcount, - struct iobref *iobref, xdrproc_t xdrproc) +changelog_rpc_sumbit_reply(rpcsvc_request_t *req, void *arg, + struct iovec *payload, int payloadcount, + struct iobref *iobref, xdrproc_t xdrproc) { - int ret = -1; - struct iobuf *iob = NULL; - struct iovec iov = {0,}; - char new_iobref = 0; - - if (!req) - goto return_ret; - - if (!iobref) { - iobref = iobref_new (); - if (!iobref) - goto return_ret; - new_iobref = 1; - } - - iob = __changelog_rpc_serialize_reply (req, arg, &iov, xdrproc); - if (!iob) - gf_msg ("", GF_LOG_ERROR, 0, - CHANGELOG_MSG_RPC_SUBMIT_REPLY_FAILED, - "failed to serialize reply"); - else - iobref_add (iobref, iob); - - ret = rpcsvc_submit_generic (req, &iov, - 1, payload, payloadcount, iobref); - - if (new_iobref) - iobref_unref (iobref); - if (iob) - iobuf_unref (iob); - return_ret: - return ret; + int ret = -1; + struct iobuf *iob = NULL; + struct iovec iov = { + 0, + }; + char new_iobref = 0; + + if (!req) + goto return_ret; + + if (!iobref) { + iobref = iobref_new(); + if (!iobref) + goto return_ret; + new_iobref = 1; + } + + iob = __changelog_rpc_serialize_reply(req, arg, &iov, xdrproc); + if (!iob) + gf_smsg("", GF_LOG_ERROR, 0, CHANGELOG_MSG_RPC_SUBMIT_REPLY_FAILED, + NULL); + else + iobref_add(iobref, iob); + + ret = rpcsvc_submit_generic(req, &iov, 1, payload, payloadcount, iobref); + + if (new_iobref) + iobref_unref(iobref); + if (iob) + iobuf_unref(iob); +return_ret: + return ret; } void -changelog_rpc_server_destroy (xlator_t *this, rpcsvc_t *rpc, char *sockfile, - rpcsvc_notify_t fn, struct rpcsvc_program **progs) +changelog_rpc_server_destroy(xlator_t *this, rpcsvc_t *rpc, char *sockfile, + rpcsvc_notify_t fn, struct rpcsvc_program **progs) { - rpcsvc_listener_t *listener = NULL; - rpcsvc_listener_t *next = NULL; - struct rpcsvc_program *prog = NULL; - - while (*progs) { - prog = *progs; - (void) rpcsvc_program_unregister (rpc, prog); + rpcsvc_listener_t *listener = NULL; + rpcsvc_listener_t *next = NULL; + struct rpcsvc_program *prog = NULL; + rpc_transport_t *trans = NULL; + + if (!rpc) + return; + + while (*progs) { + prog = *progs; + (void)rpcsvc_program_unregister(rpc, prog); + progs++; + } + + list_for_each_entry_safe(listener, next, &rpc->listeners, list) + { + if (listener->trans) { + trans = listener->trans; + rpc_transport_disconnect(trans, _gf_false); } - - list_for_each_entry_safe (listener, next, &rpc->listeners, list) { - rpcsvc_listener_destroy (listener); + } + + (void)rpcsvc_unregister_notify(rpc, fn, this); + + /* TODO Avoid freeing rpc object in case of brick multiplex + after freeing rpc object svc->rpclock corrupted and it takes + more time to detach a brick + */ + if (!this->cleanup_starting) { + if (rpc->rxpool) { + mem_pool_destroy(rpc->rxpool); + rpc->rxpool = NULL; } - - (void) rpcsvc_unregister_notify (rpc, fn, this); - sys_unlink (sockfile); - - GF_FREE (rpc); + GF_FREE(rpc); + } } rpcsvc_t * -changelog_rpc_server_init (xlator_t *this, char *sockfile, void *cbkdata, - rpcsvc_notify_t fn, struct rpcsvc_program **progs) +changelog_rpc_server_init(xlator_t *this, char *sockfile, void *cbkdata, + rpcsvc_notify_t fn, struct rpcsvc_program **progs) { - int j = 0; - int ret = 0; - rpcsvc_t *rpc = NULL; - dict_t *options = NULL; - struct rpcsvc_program *prog = NULL; - - if (!cbkdata) - cbkdata = this; - - options = dict_new (); - if (!options) - goto error_return; - - ret = rpcsvc_transport_unix_options_build (&options, sockfile); - if (ret) - goto dealloc_dict; - - rpc = rpcsvc_init (this, this->ctx, options, 8); - if (rpc == NULL) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_RPC_START_ERROR, - "failed to init rpc"); - goto dealloc_dict; - } + int ret = 0; + rpcsvc_t *rpc = NULL; + dict_t *options = NULL; + struct rpcsvc_program *prog = NULL; - ret = rpcsvc_register_notify (rpc, fn, cbkdata); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_NOTIFY_REGISTER_FAILED, - "failed to register notify function"); - goto dealloc_rpc; - } + if (!cbkdata) + cbkdata = this; - ret = rpcsvc_create_listeners (rpc, options, this->name); - if (ret != 1) { - gf_msg_debug (this->name, - 0, "failed to create listeners"); - goto dealloc_rpc; - } + options = dict_new(); + if (!options) + return NULL; - while (*progs) { - prog = *progs; - ret = rpcsvc_program_register (rpc, prog); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_PROGRAM_NAME_REG_FAILED, - "cannot register program " - "(name: %s, prognum: %d, pogver: %d)", - prog->progname, prog->prognum, prog->progver); - goto dealloc_rpc; - } - - progs++; + ret = rpcsvc_transport_unix_options_build(options, sockfile); + if (ret) + goto dealloc_dict; + + rpc = rpcsvc_init(this, this->ctx, options, 8); + if (rpc == NULL) { + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_RPC_START_ERROR, + NULL); + goto dealloc_dict; + } + + ret = rpcsvc_register_notify(rpc, fn, cbkdata); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, + CHANGELOG_MSG_NOTIFY_REGISTER_FAILED, NULL); + goto dealloc_rpc; + } + + ret = rpcsvc_create_listeners(rpc, options, this->name); + if (ret != 1) { + gf_msg_debug(this->name, 0, "failed to create listeners"); + goto dealloc_rpc; + } + + while (*progs) { + prog = *progs; + ret = rpcsvc_program_register(rpc, prog, _gf_false); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, + CHANGELOG_MSG_PROGRAM_NAME_REG_FAILED, "name%s", + prog->progname, "prognum=%d", prog->prognum, "pogver=%d", + prog->progver, NULL); + goto dealloc_rpc; } - dict_unref (options); - return rpc; + progs++; + } - dealloc_rpc: - GF_FREE (rpc); - dealloc_dict: - dict_unref (options); - error_return: - return NULL; + dict_unref(options); + return rpc; + +dealloc_rpc: + GF_FREE(rpc); +dealloc_dict: + dict_unref(options); + return NULL; } diff --git a/xlators/features/changelog/src/changelog-rpc-common.h b/xlators/features/changelog/src/changelog-rpc-common.h index 95c850c9400..4d9aa2c694b 100644 --- a/xlators/features/changelog/src/changelog-rpc-common.h +++ b/xlators/features/changelog/src/changelog-rpc-common.h @@ -13,8 +13,8 @@ #include "rpcsvc.h" #include "rpc-clnt.h" -#include "event.h" -#include "call-stub.h" +#include <glusterfs/gf-event.h> +#include <glusterfs/call-stub.h> #include "changelog-xdr.h" #include "xdr-generic.h" @@ -24,61 +24,62 @@ /** * Let's keep this non-configurable for now. */ -#define NR_ROTT_BUFFS 4 +#define NR_ROTT_BUFFS 4 #define NR_DISPATCHERS (NR_ROTT_BUFFS - 1) enum changelog_rpc_procnum { - CHANGELOG_RPC_PROC_NULL = 0, - CHANGELOG_RPC_PROBE_FILTER = 1, - CHANGELOG_RPC_PROC_MAX = 2, + CHANGELOG_RPC_PROC_NULL = 0, + CHANGELOG_RPC_PROBE_FILTER = 1, + CHANGELOG_RPC_PROC_MAX = 2, }; -#define CHANGELOG_RPC_PROGNUM 1885957735 -#define CHANGELOG_RPC_PROGVER 1 +#define CHANGELOG_RPC_PROGNUM 1885957735 +#define CHANGELOG_RPC_PROGVER 1 /** * reverse connection: data xfer path */ enum changelog_reverse_rpc_procnum { - CHANGELOG_REV_PROC_NULL = 0, - CHANGELOG_REV_PROC_EVENT = 1, - CHANGELOG_REV_PROC_MAX = 2, + CHANGELOG_REV_PROC_NULL = 0, + CHANGELOG_REV_PROC_EVENT = 1, + CHANGELOG_REV_PROC_MAX = 2, }; -#define CHANGELOG_REV_RPC_PROCNUM 1886350951 -#define CHANGELOG_REV_RPC_PROCVER 1 +#define CHANGELOG_REV_RPC_PROCNUM 1886350951 +#define CHANGELOG_REV_RPC_PROCVER 1 typedef struct changelog_rpc { - rpcsvc_t *svc; - struct rpc_clnt *rpc; - char sock[UNIX_PATH_MAX]; /* tied to server */ + rpcsvc_t *svc; + struct rpc_clnt *rpc; + char sock[UNIX_PATH_MAX]; /* tied to server */ } changelog_rpc_t; /* event poller */ -void *changelog_rpc_poller (void *); +void * +changelog_rpc_poller(void *); /* CLIENT API */ struct rpc_clnt * -changelog_rpc_client_init (xlator_t *, void *, char *, rpc_clnt_notify_t); +changelog_rpc_client_init(xlator_t *, void *, char *, rpc_clnt_notify_t); int -changelog_rpc_sumbit_req (struct rpc_clnt *, void *, call_frame_t *, - rpc_clnt_prog_t *, int , struct iovec *, int, - struct iobref *, xlator_t *, fop_cbk_fn_t, xdrproc_t); +changelog_rpc_sumbit_req(struct rpc_clnt *, void *, call_frame_t *, + rpc_clnt_prog_t *, int, struct iovec *, int, + struct iobref *, xlator_t *, fop_cbk_fn_t, xdrproc_t); int -changelog_invoke_rpc (xlator_t *, struct rpc_clnt *, - rpc_clnt_prog_t *, int , void *); +changelog_invoke_rpc(xlator_t *, struct rpc_clnt *, rpc_clnt_prog_t *, int, + void *); /* SERVER API */ int -changelog_rpc_sumbit_reply (rpcsvc_request_t *, void *, - struct iovec *, int, struct iobref *, xdrproc_t); +changelog_rpc_sumbit_reply(rpcsvc_request_t *, void *, struct iovec *, int, + struct iobref *, xdrproc_t); rpcsvc_t * -changelog_rpc_server_init (xlator_t *, char *, void*, - rpcsvc_notify_t, struct rpcsvc_program **); +changelog_rpc_server_init(xlator_t *, char *, void *, rpcsvc_notify_t, + struct rpcsvc_program **); void -changelog_rpc_server_destroy (xlator_t *, rpcsvc_t *, char *, - rpcsvc_notify_t, struct rpcsvc_program **); +changelog_rpc_server_destroy(xlator_t *, rpcsvc_t *, char *, rpcsvc_notify_t, + struct rpcsvc_program **); #endif diff --git a/xlators/features/changelog/src/changelog-rpc.c b/xlators/features/changelog/src/changelog-rpc.c index b07030ec28e..440b88091a6 100644 --- a/xlators/features/changelog/src/changelog-rpc.c +++ b/xlators/features/changelog/src/changelog-rpc.c @@ -8,216 +8,346 @@ cases as published by the Free Software Foundation. */ +#include <glusterfs/syscall.h> #include "changelog-rpc.h" #include "changelog-mem-types.h" #include "changelog-ev-handle.h" -struct rpcsvc_program *changelog_programs[]; +static struct rpcsvc_program *changelog_programs[]; static void -changelog_cleanup_dispatchers (xlator_t *this, - changelog_priv_t *priv, int count) +changelog_cleanup_dispatchers(xlator_t *this, changelog_priv_t *priv, int count) { - for (; count >= 0; count--) { - (void) changelog_thread_cleanup - (this, priv->ev_dispatcher[count]); - } + for (count--; count >= 0; count--) { + (void)changelog_thread_cleanup(this, priv->ev_dispatcher[count]); + priv->ev_dispatcher[count] = 0; + } } -static int -changelog_cleanup_rpc_threads (xlator_t *this, changelog_priv_t *priv) +int +changelog_cleanup_rpc_threads(xlator_t *this, changelog_priv_t *priv) { - int ret = 0; - changelog_clnt_t *conn = NULL; - - conn = &priv->connections; - if (!conn) - return 0; - - /** terminate RPC thread(s) */ - ret = changelog_thread_cleanup (this, priv->connector); - if (ret != 0) - goto error_return; - /** terminate dispatcher thread(s) */ - changelog_cleanup_dispatchers (this, priv, priv->nr_dispatchers); - - /* TODO: what about pending and waiting connections? */ - changelog_ev_cleanup_connections (this, conn); - - /* destroy locks */ - ret = pthread_mutex_destroy (&conn->pending_lock); - if (ret != 0) - goto error_return; - ret = pthread_cond_destroy (&conn->pending_cond); - if (ret != 0) - goto error_return; - ret = LOCK_DESTROY (&conn->active_lock); - if (ret != 0) - goto error_return; - ret = LOCK_DESTROY (&conn->wait_lock); - if (ret != 0) - goto error_return; + int ret = 0; + changelog_clnt_t *conn = NULL; + + conn = &priv->connections; + if (!conn) return 0; - error_return: - return -1; + /** terminate RPC thread(s) */ + ret = changelog_thread_cleanup(this, priv->connector); + if (ret != 0) + goto error_return; + priv->connector = 0; + + /** terminate dispatcher thread(s) */ + changelog_cleanup_dispatchers(this, priv, priv->nr_dispatchers); + + /* destroy locks */ + ret = pthread_mutex_destroy(&conn->pending_lock); + if (ret != 0) + goto error_return; + ret = pthread_cond_destroy(&conn->pending_cond); + if (ret != 0) + goto error_return; + ret = LOCK_DESTROY(&conn->active_lock); + if (ret != 0) + goto error_return; + ret = LOCK_DESTROY(&conn->wait_lock); + if (ret != 0) + goto error_return; + return 0; + +error_return: + return -1; } static int -changelog_init_rpc_threads (xlator_t *this, changelog_priv_t *priv, - rbuf_t *rbuf, int nr_dispatchers) +changelog_init_rpc_threads(xlator_t *this, changelog_priv_t *priv, rbuf_t *rbuf, + int nr_dispatchers) +{ + int j = 0; + int ret = 0; + changelog_clnt_t *conn = NULL; + + conn = &priv->connections; + + conn->this = this; + conn->rbuf = rbuf; + conn->sequence = 1; /* start with sequence number one */ + + INIT_LIST_HEAD(&conn->pending); + INIT_LIST_HEAD(&conn->active); + INIT_LIST_HEAD(&conn->waitq); + + ret = pthread_mutex_init(&conn->pending_lock, NULL); + if (ret) + goto error_return; + ret = pthread_cond_init(&conn->pending_cond, NULL); + if (ret) + goto cleanup_pending_lock; + + ret = LOCK_INIT(&conn->active_lock); + if (ret) + goto cleanup_pending_cond; + ret = LOCK_INIT(&conn->wait_lock); + if (ret) + goto cleanup_active_lock; + + /* spawn reverse connection thread */ + ret = gf_thread_create(&priv->connector, NULL, changelog_ev_connector, conn, + "clogecon"); + if (ret != 0) + goto cleanup_wait_lock; + + /* spawn dispatcher thread(s) */ + priv->ev_dispatcher = GF_CALLOC(nr_dispatchers, sizeof(pthread_t), + gf_changelog_mt_ev_dispatcher_t); + if (!priv->ev_dispatcher) + goto cleanup_connector; + + /* spawn dispatcher threads */ + for (; j < nr_dispatchers; j++) { + ret = gf_thread_create(&priv->ev_dispatcher[j], NULL, + changelog_ev_dispatch, conn, "clogd%03hx", + j & 0x3ff); + if (ret != 0) { + changelog_cleanup_dispatchers(this, priv, j); + break; + } + } + + if (ret != 0) + goto cleanup_connector; + + priv->nr_dispatchers = nr_dispatchers; + return 0; + +cleanup_connector: + (void)pthread_cancel(priv->connector); +cleanup_wait_lock: + LOCK_DESTROY(&conn->wait_lock); +cleanup_active_lock: + LOCK_DESTROY(&conn->active_lock); +cleanup_pending_cond: + (void)pthread_cond_destroy(&conn->pending_cond); +cleanup_pending_lock: + (void)pthread_mutex_destroy(&conn->pending_lock); +error_return: + return -1; +} + +int +changelog_rpcsvc_notify(rpcsvc_t *rpc, void *xl, rpcsvc_event_t event, + void *data) { - int j = 0; - int ret = 0; - changelog_clnt_t *conn = NULL; - - conn = &priv->connections; - - conn->this = this; - conn->rbuf = rbuf; - conn->sequence = 1; /* start with sequence number one */ - - INIT_LIST_HEAD (&conn->pending); - INIT_LIST_HEAD (&conn->active); - INIT_LIST_HEAD (&conn->waitq); - - ret = pthread_mutex_init (&conn->pending_lock, NULL); - if (ret) - goto error_return; - ret = pthread_cond_init (&conn->pending_cond, NULL); - if (ret) - goto cleanup_pending_lock; - - ret = LOCK_INIT (&conn->active_lock); - if (ret) - goto cleanup_pending_cond; - ret = LOCK_INIT (&conn->wait_lock); - if (ret) - goto cleanup_active_lock; - - /* spawn reverse connection thread */ - ret = pthread_create (&priv->connector, - NULL, changelog_ev_connector, conn); - if (ret != 0) - goto cleanup_wait_lock; - - /* spawn dispatcher thread(s) */ - priv->ev_dispatcher = GF_CALLOC (nr_dispatchers, sizeof(pthread_t), - gf_changelog_mt_ev_dispatcher_t); - if (!priv->ev_dispatcher) - goto cleanup_connector; - - /* spawn dispatcher threads */ - for (; j < nr_dispatchers; j++) { - ret = pthread_create (&priv->ev_dispatcher[j], - NULL, changelog_ev_dispatch, conn); - if (ret != 0) { - changelog_cleanup_dispatchers (this, priv, --j); - break; + xlator_t *this = NULL; + rpc_transport_t *trans = NULL; + rpc_transport_t *xprt = NULL; + rpc_transport_t *xp_next = NULL; + changelog_priv_t *priv = NULL; + uint64_t listnercnt = 0; + uint64_t xprtcnt = 0; + uint64_t clntcnt = 0; + rpcsvc_listener_t *listener = NULL; + rpcsvc_listener_t *next = NULL; + gf_boolean_t listner_found = _gf_false; + socket_private_t *sockpriv = NULL; + + if (!xl || !data || !rpc) { + gf_msg_callingfn("changelog", GF_LOG_WARNING, 0, + CHANGELOG_MSG_RPCSVC_NOTIFY_FAILED, + "Calling rpc_notify without initializing"); + goto out; + } + + this = xl; + trans = data; + priv = this->private; + + if (!priv) { + gf_msg_callingfn("changelog", GF_LOG_WARNING, 0, + CHANGELOG_MSG_RPCSVC_NOTIFY_FAILED, + "Calling rpc_notify without priv initializing"); + goto out; + } + + if (event == RPCSVC_EVENT_ACCEPT) { + GF_ATOMIC_INC(priv->xprtcnt); + LOCK(&priv->lock); + { + list_add_tail(&trans->list, &priv->xprt_list); + } + UNLOCK(&priv->lock); + goto out; + } + + if (event == RPCSVC_EVENT_DISCONNECT) { + list_for_each_entry_safe(listener, next, &rpc->listeners, list) + { + if (listener && listener->trans) { + if (listener->trans == trans) { + listnercnt = GF_ATOMIC_DEC(priv->listnercnt); + listner_found = _gf_true; + rpcsvc_listener_destroy(listener); } + } } - if (ret != 0) - goto cleanup_connector; + if (listnercnt > 0) { + goto out; + } + if (listner_found) { + LOCK(&priv->lock); + list_for_each_entry_safe(xprt, xp_next, &priv->xprt_list, list) + { + sockpriv = (socket_private_t *)(xprt->private); + gf_log("changelog", GF_LOG_INFO, + "Send disconnect" + " on socket %d", + sockpriv->sock); + rpc_transport_disconnect(xprt, _gf_false); + } + UNLOCK(&priv->lock); + goto out; + } + LOCK(&priv->lock); + { + list_del_init(&trans->list); + } + UNLOCK(&priv->lock); - priv->nr_dispatchers = nr_dispatchers; - return 0; + xprtcnt = GF_ATOMIC_DEC(priv->xprtcnt); + clntcnt = GF_ATOMIC_GET(priv->clntcnt); + if (!xprtcnt && !clntcnt) { + changelog_process_cleanup_event(this); + } + } - cleanup_connector: - (void) pthread_cancel (priv->connector); - cleanup_wait_lock: - (void) LOCK_DESTROY (&conn->wait_lock); - cleanup_active_lock: - (void) LOCK_DESTROY (&conn->active_lock); - cleanup_pending_cond: - (void) pthread_cond_destroy (&conn->pending_cond); - cleanup_pending_lock: - (void) pthread_mutex_destroy (&conn->pending_lock); - error_return: - return -1; +out: + return 0; } -int -changelog_rpcsvc_notify (rpcsvc_t *rpc, - void *xl, rpcsvc_event_t event, void *data) +void +changelog_process_cleanup_event(xlator_t *this) { - return 0; + gf_boolean_t cleanup_notify = _gf_false; + changelog_priv_t *priv = NULL; + char sockfile[UNIX_PATH_MAX] = { + 0, + }; + + if (!this) + return; + priv = this->private; + if (!priv) + return; + + LOCK(&priv->lock); + { + cleanup_notify = priv->notify_down; + priv->notify_down = _gf_true; + } + UNLOCK(&priv->lock); + + if (priv->victim && !cleanup_notify) { + default_notify(this, GF_EVENT_PARENT_DOWN, priv->victim); + + if (priv->rpc) { + /* sockfile path could have been saved to avoid this */ + CHANGELOG_MAKE_SOCKET_PATH(priv->changelog_brick, sockfile, + UNIX_PATH_MAX); + sys_unlink(sockfile); + (void)rpcsvc_unregister_notify(priv->rpc, changelog_rpcsvc_notify, + this); + if (priv->rpc->rxpool) { + mem_pool_destroy(priv->rpc->rxpool); + priv->rpc->rxpool = NULL; + } + GF_FREE(priv->rpc); + priv->rpc = NULL; + } + } } void -changelog_destroy_rpc_listner (xlator_t *this, changelog_priv_t *priv) +changelog_destroy_rpc_listner(xlator_t *this, changelog_priv_t *priv) { - char sockfile[UNIX_PATH_MAX] = {0,}; - - /* sockfile path could have been saved to avoid this */ - CHANGELOG_MAKE_SOCKET_PATH (priv->changelog_brick, - sockfile, UNIX_PATH_MAX); - changelog_rpc_server_destroy (this, - priv->rpc, sockfile, - changelog_rpcsvc_notify, - changelog_programs); - (void) changelog_cleanup_rpc_threads (this, priv); + char sockfile[UNIX_PATH_MAX] = { + 0, + }; + + /* sockfile path could have been saved to avoid this */ + CHANGELOG_MAKE_SOCKET_PATH(priv->changelog_brick, sockfile, UNIX_PATH_MAX); + changelog_rpc_server_destroy(this, priv->rpc, sockfile, + changelog_rpcsvc_notify, changelog_programs); } rpcsvc_t * -changelog_init_rpc_listner (xlator_t *this, changelog_priv_t *priv, +changelog_init_rpc_listener(xlator_t *this, changelog_priv_t *priv, rbuf_t *rbuf, int nr_dispatchers) { - int ret = 0; - char sockfile[UNIX_PATH_MAX] = {0,}; - - ret = changelog_init_rpc_threads (this, priv, rbuf, nr_dispatchers); - if (ret) - return NULL; - - CHANGELOG_MAKE_SOCKET_PATH (priv->changelog_brick, - sockfile, UNIX_PATH_MAX); - return changelog_rpc_server_init (this, sockfile, NULL, - changelog_rpcsvc_notify, - changelog_programs); + int ret = 0; + char sockfile[UNIX_PATH_MAX] = { + 0, + }; + rpcsvc_t *svcp; + + ret = changelog_init_rpc_threads(this, priv, rbuf, nr_dispatchers); + if (ret) + return NULL; + + CHANGELOG_MAKE_SOCKET_PATH(priv->changelog_brick, sockfile, UNIX_PATH_MAX); + (void)sys_unlink(sockfile); + svcp = changelog_rpc_server_init( + this, sockfile, NULL, changelog_rpcsvc_notify, changelog_programs); + return svcp; } void -changelog_rpc_clnt_cleanup (changelog_rpc_clnt_t *crpc) +changelog_rpc_clnt_cleanup(changelog_rpc_clnt_t *crpc) { - if (!crpc) - return; - crpc->c_clnt = NULL; - (void) LOCK_DESTROY (&crpc->lock); - GF_FREE (crpc); + if (!crpc) + return; + crpc->c_clnt = NULL; + LOCK_DESTROY(&crpc->lock); + GF_FREE(crpc); } static changelog_rpc_clnt_t * -changelog_rpc_clnt_init (xlator_t *this, - changelog_probe_req *rpc_req, changelog_clnt_t *c_clnt) +changelog_rpc_clnt_init(xlator_t *this, changelog_probe_req *rpc_req, + changelog_clnt_t *c_clnt) { - int ret = 0; - changelog_rpc_clnt_t *crpc = NULL; - - crpc = GF_CALLOC (1, sizeof (*crpc), gf_changelog_mt_rpc_clnt_t); - if (!crpc) - goto error_return; - INIT_LIST_HEAD (&crpc->list); - - crpc->ref = 0; - changelog_set_disconnect_flag (crpc, _gf_false); - - crpc->filter = rpc_req->filter; - (void) memcpy (crpc->sock, rpc_req->sock, strlen (rpc_req->sock)); - - crpc->this = this; - crpc->c_clnt = c_clnt; - crpc->cleanup = changelog_rpc_clnt_cleanup; - - ret = LOCK_INIT (&crpc->lock); - if (ret != 0) - goto dealloc_crpc; - return crpc; - - dealloc_crpc: - GF_FREE (crpc); - error_return: - return NULL; + int ret = 0; + changelog_rpc_clnt_t *crpc = NULL; + + crpc = GF_CALLOC(1, sizeof(*crpc), gf_changelog_mt_rpc_clnt_t); + if (!crpc) + goto error_return; + INIT_LIST_HEAD(&crpc->list); + + /* Take a ref, the last unref will be on RPC_CLNT_DESTROY + * which comes as a result of last rpc_clnt_unref. + */ + GF_ATOMIC_INIT(crpc->ref, 1); + changelog_set_disconnect_flag(crpc, _gf_false); + + crpc->filter = rpc_req->filter; + (void)memcpy(crpc->sock, rpc_req->sock, strlen(rpc_req->sock)); + + crpc->this = this; + crpc->c_clnt = c_clnt; + crpc->cleanup = changelog_rpc_clnt_cleanup; + + ret = LOCK_INIT(&crpc->lock); + if (ret != 0) + goto dealloc_crpc; + return crpc; + +dealloc_crpc: + GF_FREE(crpc); +error_return: + return NULL; } /** @@ -231,72 +361,80 @@ changelog_rpc_clnt_init (xlator_t *this, */ int -changelog_handle_probe (rpcsvc_request_t *req) +changelog_handle_probe(rpcsvc_request_t *req) { - int ret = 0; - xlator_t *this = NULL; - rpcsvc_t *svc = NULL; - changelog_priv_t *priv = NULL; - changelog_clnt_t *c_clnt = NULL; - changelog_rpc_clnt_t *crpc = NULL; - - changelog_probe_req rpc_req = {0,}; - changelog_probe_rsp rpc_rsp = {0,}; - - ret = xdr_to_generic (req->msg[0], - &rpc_req, (xdrproc_t)xdr_changelog_probe_req); - if (ret < 0) { - gf_msg ("", GF_LOG_ERROR, 0, - CHANGELOG_MSG_HANDLE_PROBE_ERROR, - "xdr decoding error"); - req->rpc_err = GARBAGE_ARGS; - goto handle_xdr_error; - } - - /* ->xl hidden in rpcsvc */ - svc = rpcsvc_request_service (req); - this = svc->xl; - priv = this->private; - c_clnt = &priv->connections; - - crpc = changelog_rpc_clnt_init (this, &rpc_req, c_clnt); - if (!crpc) - goto handle_xdr_error; - - changelog_ev_queue_connection (c_clnt, crpc); - rpc_rsp.op_ret = 0; - - goto submit_rpc; - - handle_xdr_error: - rpc_rsp.op_ret = -1; - submit_rpc: - (void) changelog_rpc_sumbit_reply (req, &rpc_rsp, NULL, 0, NULL, - (xdrproc_t)xdr_changelog_probe_rsp); + int ret = 0; + xlator_t *this = NULL; + rpcsvc_t *svc = NULL; + changelog_priv_t *priv = NULL; + changelog_clnt_t *c_clnt = NULL; + changelog_rpc_clnt_t *crpc = NULL; + + changelog_probe_req rpc_req = { + 0, + }; + changelog_probe_rsp rpc_rsp = { + 0, + }; + + this = req->trans->xl; + if (this->cleanup_starting) { + gf_smsg(this->name, GF_LOG_DEBUG, 0, CHANGELOG_MSG_CLEANUP_ALREADY_SET, + NULL); return 0; + } + + ret = xdr_to_generic(req->msg[0], &rpc_req, + (xdrproc_t)xdr_changelog_probe_req); + if (ret < 0) { + gf_smsg("", GF_LOG_ERROR, 0, CHANGELOG_MSG_HANDLE_PROBE_ERROR, NULL); + req->rpc_err = GARBAGE_ARGS; + goto handle_xdr_error; + } + + /* ->xl hidden in rpcsvc */ + svc = rpcsvc_request_service(req); + this = svc->xl; + priv = this->private; + c_clnt = &priv->connections; + + crpc = changelog_rpc_clnt_init(this, &rpc_req, c_clnt); + if (!crpc) + goto handle_xdr_error; + + changelog_ev_queue_connection(c_clnt, crpc); + rpc_rsp.op_ret = 0; + + goto submit_rpc; + +handle_xdr_error: + rpc_rsp.op_ret = -1; +submit_rpc: + (void)changelog_rpc_sumbit_reply(req, &rpc_rsp, NULL, 0, NULL, + (xdrproc_t)xdr_changelog_probe_rsp); + return 0; } /** * RPC declarations */ -rpcsvc_actor_t changelog_svc_actors[CHANGELOG_RPC_PROC_MAX] = { - [CHANGELOG_RPC_PROBE_FILTER] = { - "CHANGELOG PROBE FILTER", CHANGELOG_RPC_PROBE_FILTER, - changelog_handle_probe, NULL, 0, DRC_NA - }, +static rpcsvc_actor_t changelog_svc_actors[CHANGELOG_RPC_PROC_MAX] = { + [CHANGELOG_RPC_PROBE_FILTER] = {"CHANGELOG PROBE FILTER", + changelog_handle_probe, NULL, + CHANGELOG_RPC_PROBE_FILTER, DRC_NA, 0}, }; -struct rpcsvc_program changelog_svc_prog = { - .progname = CHANGELOG_RPC_PROGNAME, - .prognum = CHANGELOG_RPC_PROGNUM, - .progver = CHANGELOG_RPC_PROGVER, - .numactors = CHANGELOG_RPC_PROC_MAX, - .actors = changelog_svc_actors, - .synctask = _gf_true, +static struct rpcsvc_program changelog_svc_prog = { + .progname = CHANGELOG_RPC_PROGNAME, + .prognum = CHANGELOG_RPC_PROGNUM, + .progver = CHANGELOG_RPC_PROGVER, + .numactors = CHANGELOG_RPC_PROC_MAX, + .actors = changelog_svc_actors, + .synctask = _gf_true, }; -struct rpcsvc_program *changelog_programs[] = { - &changelog_svc_prog, - NULL, +static struct rpcsvc_program *changelog_programs[] = { + &changelog_svc_prog, + NULL, }; diff --git a/xlators/features/changelog/src/changelog-rpc.h b/xlators/features/changelog/src/changelog-rpc.h index 0df96684b6c..b1707565249 100644 --- a/xlators/features/changelog/src/changelog-rpc.h +++ b/xlators/features/changelog/src/changelog-rpc.h @@ -11,19 +11,21 @@ #ifndef __CHANGELOG_RPC_H #define __CHANGELOG_RPC_H -#include "xlator.h" +#include <glusterfs/xlator.h> #include "changelog-helpers.h" /* one time */ #include "socket.h" #include "changelog-rpc-common.h" -#define CHANGELOG_RPC_PROGNAME "GlusterFS Changelog" +#define CHANGELOG_RPC_PROGNAME "GlusterFS Changelog" rpcsvc_t * -changelog_init_rpc_listner (xlator_t *, changelog_priv_t *, rbuf_t *, int); +changelog_init_rpc_listener(xlator_t *, changelog_priv_t *, rbuf_t *, int); void -changelog_destroy_rpc_listner (xlator_t *, changelog_priv_t *); +changelog_destroy_rpc_listner(xlator_t *, changelog_priv_t *); +int +changelog_cleanup_rpc_threads(xlator_t *this, changelog_priv_t *priv); #endif diff --git a/xlators/features/changelog/src/changelog-rt.c b/xlators/features/changelog/src/changelog-rt.c index c262820c64c..841545ae359 100644 --- a/xlators/features/changelog/src/changelog-rt.c +++ b/xlators/features/changelog/src/changelog-rt.c @@ -8,60 +8,59 @@ cases as published by the Free Software Foundation. */ -#include "xlator.h" -#include "defaults.h" -#include "logging.h" +#include <glusterfs/xlator.h> +#include <glusterfs/defaults.h> +#include <glusterfs/logging.h> #include "changelog-rt.h" #include "changelog-mem-types.h" int -changelog_rt_init (xlator_t *this, changelog_dispatcher_t *cd) +changelog_rt_init(xlator_t *this, changelog_dispatcher_t *cd) { - changelog_rt_t *crt = NULL; + changelog_rt_t *crt = NULL; - crt = GF_CALLOC (1, sizeof (*crt), - gf_changelog_mt_rt_t); - if (!crt) - return -1; + crt = GF_CALLOC(1, sizeof(*crt), gf_changelog_mt_rt_t); + if (!crt) + return -1; - LOCK_INIT (&crt->lock); + LOCK_INIT(&crt->lock); - cd->cd_data = crt; - cd->dispatchfn = &changelog_rt_enqueue; + cd->cd_data = crt; + cd->dispatchfn = &changelog_rt_enqueue; - return 0; + return 0; } int -changelog_rt_fini (xlator_t *this, changelog_dispatcher_t *cd) +changelog_rt_fini(xlator_t *this, changelog_dispatcher_t *cd) { - changelog_rt_t *crt = NULL; + changelog_rt_t *crt = NULL; - crt = cd->cd_data; + crt = cd->cd_data; - LOCK_DESTROY (&crt->lock); - GF_FREE (crt); + LOCK_DESTROY(&crt->lock); + GF_FREE(crt); - return 0; + return 0; } int -changelog_rt_enqueue (xlator_t *this, changelog_priv_t *priv, void *cbatch, - changelog_log_data_t *cld_0, changelog_log_data_t *cld_1) +changelog_rt_enqueue(xlator_t *this, changelog_priv_t *priv, void *cbatch, + changelog_log_data_t *cld_0, changelog_log_data_t *cld_1) { - int ret = 0; - changelog_rt_t *crt = NULL; + int ret = 0; + changelog_rt_t *crt = NULL; - crt = (changelog_rt_t *) cbatch; + crt = (changelog_rt_t *)cbatch; - LOCK (&crt->lock); - { - ret = changelog_handle_change (this, priv, cld_0); - if (!ret && cld_1) - ret = changelog_handle_change (this, priv, cld_1); - } - UNLOCK (&crt->lock); + LOCK(&crt->lock); + { + ret = changelog_handle_change(this, priv, cld_0); + if (!ret && cld_1) + ret = changelog_handle_change(this, priv, cld_1); + } + UNLOCK(&crt->lock); - return ret; + return ret; } diff --git a/xlators/features/changelog/src/changelog-rt.h b/xlators/features/changelog/src/changelog-rt.h index 1fc2bbc5bb9..28b9827d85b 100644 --- a/xlators/features/changelog/src/changelog-rt.h +++ b/xlators/features/changelog/src/changelog-rt.h @@ -11,23 +11,23 @@ #ifndef _CHANGELOG_RT_H #define _CHANGELOG_RT_H -#include "locking.h" -#include "timer.h" +#include <glusterfs/locking.h> +#include <glusterfs/timer.h> #include "pthread.h" #include "changelog-helpers.h" /* unused as of now - may be you would need it later */ typedef struct changelog_rt { - gf_lock_t lock; + gf_lock_t lock; } changelog_rt_t; int -changelog_rt_init (xlator_t *this, changelog_dispatcher_t *cd); +changelog_rt_init(xlator_t *this, changelog_dispatcher_t *cd); int -changelog_rt_fini (xlator_t *this, changelog_dispatcher_t *cd); +changelog_rt_fini(xlator_t *this, changelog_dispatcher_t *cd); int -changelog_rt_enqueue (xlator_t *this, changelog_priv_t *priv, void *cbatch, - changelog_log_data_t *cld_0, changelog_log_data_t *cld_1); +changelog_rt_enqueue(xlator_t *this, changelog_priv_t *priv, void *cbatch, + changelog_log_data_t *cld_0, changelog_log_data_t *cld_1); #endif /* _CHANGELOG_RT_H */ diff --git a/xlators/features/changelog/src/changelog.c b/xlators/features/changelog/src/changelog.c index c954e5e0616..6a6e5af859e 100644 --- a/xlators/features/changelog/src/changelog.c +++ b/xlators/features/changelog/src/changelog.c @@ -8,11 +8,11 @@ cases as published by the Free Software Foundation. */ -#include "xlator.h" -#include "defaults.h" -#include "syscall.h" -#include "logging.h" -#include "iobuf.h" +#include <glusterfs/xlator.h> +#include <glusterfs/defaults.h> +#include <glusterfs/syscall.h> +#include <glusterfs/logging.h> +#include <glusterfs/iobuf.h> #include "changelog-rt.h" @@ -21,19 +21,25 @@ #include "changelog-messages.h" #include <pthread.h> +#include <signal.h> #include "changelog-rpc.h" #include "errno.h" -static struct changelog_bootstrap -cb_bootstrap[] = { - { - .mode = CHANGELOG_MODE_RT, - .ctor = changelog_rt_init, - .dtor = changelog_rt_fini, - }, +static struct changelog_bootstrap cb_bootstrap[] = { + { + .mode = CHANGELOG_MODE_RT, + .ctor = changelog_rt_init, + .dtor = changelog_rt_fini, + }, }; +static int +changelog_init_rpc(xlator_t *this, changelog_priv_t *priv); + +static int +changelog_init(xlator_t *this, changelog_priv_t *priv); + /* Entry operations - TYPE III */ /** @@ -45,1109 +51,1052 @@ cb_bootstrap[] = { /* rmdir */ int32_t -changelog_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +changelog_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - changelog_priv_t *priv = NULL; - changelog_local_t *local = NULL; + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; - priv = this->private; - local = frame->local; + priv = this->private; + local = frame->local; - CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind); + CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind); - changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY); + changelog_update(this, priv, local, CHANGELOG_TYPE_ENTRY); - unwind: - changelog_dec_fop_cnt (this, priv, local); - CHANGELOG_STACK_UNWIND (rmdir, frame, op_ret, op_errno, - preparent, postparent, xdata); - return 0; +unwind: + changelog_dec_fop_cnt(this, priv, local); + CHANGELOG_STACK_UNWIND(rmdir, frame, op_ret, op_errno, preparent, + postparent, xdata); + return 0; } int32_t -changelog_rmdir_resume (call_frame_t *frame, xlator_t *this, - loc_t *loc, int xflags, dict_t *xdata) +changelog_rmdir_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, + int xflags, dict_t *xdata) { - changelog_priv_t *priv = NULL; + changelog_priv_t *priv = NULL; - priv = this->private; + priv = this->private; - gf_msg_debug (this->name, 0, "Dequeue rmdir"); - changelog_color_fop_and_inc_cnt (this, priv, - frame->local); - STACK_WIND (frame, changelog_rmdir_cbk, - FIRST_CHILD (this), FIRST_CHILD (this)->fops->rmdir, - loc, xflags, xdata); - return 0; + gf_msg_debug(this->name, 0, "Dequeue rmdir"); + changelog_color_fop_and_inc_cnt(this, priv, frame->local); + STACK_WIND(frame, changelog_rmdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rmdir, loc, xflags, xdata); + return 0; } int32_t -changelog_rmdir (call_frame_t *frame, xlator_t *this, - loc_t *loc, int xflags, dict_t *xdata) -{ - size_t xtra_len = 0; - changelog_priv_t *priv = NULL; - changelog_opt_t *co = NULL; - call_stub_t *stub = NULL; - struct list_head queue = {0, }; - gf_boolean_t barrier_enabled = _gf_false; - - INIT_LIST_HEAD (&queue); - - priv = this->private; - CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind); - - CHANGELOG_INIT_NOCHECK (this, frame->local, - NULL, loc->inode->gfid, 2); - - co = changelog_get_usable_buffer (frame->local); - if (!co) - goto wind; - - CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len); - - co++; - if (priv->capture_del_path) { - CHANGELOG_FILL_ENTRY_DIR_PATH (co, loc->pargfid, loc->name, - del_entry_fn, del_entry_free_fn, - xtra_len, wind, _gf_true); +changelog_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags, + dict_t *xdata) +{ + size_t xtra_len = 0; + changelog_priv_t *priv = NULL; + changelog_opt_t *co = NULL; + call_stub_t *stub = NULL; + struct list_head queue = { + 0, + }; + gf_boolean_t barrier_enabled = _gf_false; + + INIT_LIST_HEAD(&queue); + + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind); + + CHANGELOG_INIT_NOCHECK(this, frame->local, NULL, loc->inode->gfid, 2); + + co = changelog_get_usable_buffer(frame->local); + if (!co) + goto wind; + + CHANGLOG_FILL_FOP_NUMBER(co, frame->root->op, fop_fn, xtra_len); + + co++; + if (priv->capture_del_path) { + CHANGELOG_FILL_ENTRY_DIR_PATH(co, loc->pargfid, loc->name, del_entry_fn, + del_entry_free_fn, xtra_len, wind, + _gf_true); + } else { + CHANGELOG_FILL_ENTRY_DIR_PATH(co, loc->pargfid, loc->name, del_entry_fn, + del_entry_free_fn, xtra_len, wind, + _gf_false); + } + + changelog_set_usable_record_and_length(frame->local, xtra_len, 2); + + /* changelog barrier */ + /* Color assignment and increment of fop_cnt for rmdir/unlink/rename + * should be made with in priv lock if changelog barrier is not enabled. + * Because if counter is not incremented yet, draining wakes up and + * publishes the changelog but later these fops might hit the disk and + * present in snapped volume but where as the intention is these fops + * should not be present in snapped volume. + */ + LOCK(&priv->lock); + { + if ((barrier_enabled = priv->barrier_enabled)) { + stub = fop_rmdir_stub(frame, changelog_rmdir_resume, loc, xflags, + xdata); + if (!stub) + __chlog_barrier_disable(this, &queue); + else + __chlog_barrier_enqueue(this, stub); } else { - CHANGELOG_FILL_ENTRY_DIR_PATH (co, loc->pargfid, loc->name, - del_entry_fn, del_entry_free_fn, - xtra_len, wind, _gf_false); - } - - changelog_set_usable_record_and_length (frame->local, xtra_len, 2); - -/* changelog barrier */ - /* Color assignment and increment of fop_cnt for rmdir/unlink/rename - * should be made with in priv lock if changelog barrier is not enabled. - * Because if counter is not incremented yet, draining wakes up and - * publishes the changelog but later these fops might hit the disk and - * present in snapped volume but where as the intention is these fops - * should not be present in snapped volume. - */ - LOCK (&priv->lock); - { - if ((barrier_enabled = priv->barrier_enabled)) { - stub = fop_rmdir_stub (frame, changelog_rmdir_resume, - loc, xflags, xdata); - if (!stub) - __chlog_barrier_disable (this, &queue); - else - __chlog_barrier_enqueue (this, stub); - } else { - ((changelog_local_t *)frame->local)->color - = priv->current_color; - changelog_inc_fop_cnt (this, priv, frame->local); - } - } - UNLOCK (&priv->lock); - - if (barrier_enabled && stub) { - gf_msg_debug (this->name, 0, "Enqueue rmdir"); - goto out; - } - if (barrier_enabled && !stub) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - CHANGELOG_MSG_NO_MEMORY, - "Failed to barrier FOPs, disabling changelog barrier " - "FOP: rmdir"); - chlog_barrier_dequeue_all (this, &queue); - } - -/* changelog barrier */ - - wind: - STACK_WIND (frame, changelog_rmdir_cbk, - FIRST_CHILD (this), FIRST_CHILD (this)->fops->rmdir, - loc, xflags, xdata); - out: - return 0; + ((changelog_local_t *)frame->local)->color = priv->current_color; + changelog_inc_fop_cnt(this, priv, frame->local); + } + } + UNLOCK(&priv->lock); + + if (barrier_enabled && stub) { + gf_msg_debug(this->name, 0, "Enqueue rmdir"); + goto out; + } + if (barrier_enabled && !stub) { + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, + CHANGELOG_MSG_BARRIER_FOP_FAILED, "fop=rmdir", NULL); + chlog_barrier_dequeue_all(this, &queue); + } + + /* changelog barrier */ + +wind: + STACK_WIND(frame, changelog_rmdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rmdir, loc, xflags, xdata); +out: + return 0; } /* unlink */ int32_t -changelog_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +changelog_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - changelog_priv_t *priv = NULL; - changelog_local_t *local = NULL; + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; - priv = this->private; - local = frame->local; + priv = this->private; + local = frame->local; - CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind); + CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind); - changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY); + changelog_update(this, priv, local, CHANGELOG_TYPE_ENTRY); - unwind: - changelog_dec_fop_cnt (this, priv, local); - CHANGELOG_STACK_UNWIND (unlink, frame, op_ret, op_errno, - preparent, postparent, xdata); - return 0; +unwind: + changelog_dec_fop_cnt(this, priv, local); + CHANGELOG_STACK_UNWIND(unlink, frame, op_ret, op_errno, preparent, + postparent, xdata); + return 0; } int32_t -changelog_unlink_resume (call_frame_t *frame, xlator_t *this, - loc_t *loc, int xflags, dict_t *xdata) +changelog_unlink_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, + int xflags, dict_t *xdata) { - changelog_priv_t *priv = NULL; + changelog_priv_t *priv = NULL; - priv = this->private; + priv = this->private; - gf_msg_debug (this->name, 0, "Dequeue unlink"); - changelog_color_fop_and_inc_cnt - (this, priv, frame->local); - STACK_WIND (frame, changelog_unlink_cbk, - FIRST_CHILD (this), FIRST_CHILD (this)->fops->unlink, - loc, xflags, xdata); - return 0; + gf_msg_debug(this->name, 0, "Dequeue unlink"); + changelog_color_fop_and_inc_cnt(this, priv, frame->local); + STACK_WIND(frame, changelog_unlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, loc, xflags, xdata); + return 0; } int32_t -changelog_unlink (call_frame_t *frame, xlator_t *this, - loc_t *loc, int xflags, dict_t *xdata) -{ - size_t xtra_len = 0; - changelog_priv_t *priv = NULL; - changelog_opt_t *co = NULL; - call_stub_t *stub = NULL; - struct list_head queue = {0, }; - gf_boolean_t barrier_enabled = _gf_false; - dht_changelog_rename_info_t *info = NULL; - int ret = 0; - char old_name[NAME_MAX] = {0}; - char new_name[NAME_MAX] = {0}; - char *nname = NULL; - - INIT_LIST_HEAD (&queue); - priv = this->private; - - CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind); - - ret = dict_get_bin (xdata, DHT_CHANGELOG_RENAME_OP_KEY, (void **)&info); - if (!ret) { /* special case: unlink considered as rename */ - /* 3 == fop + oldloc + newloc */ - CHANGELOG_INIT_NOCHECK (this, frame->local, - NULL, loc->inode->gfid, 3); - - co = changelog_get_usable_buffer (frame->local); - if (!co) - goto wind; - - CHANGLOG_FILL_FOP_NUMBER (co, GF_FOP_RENAME, fop_fn, xtra_len); - - co++; - strncpy (old_name, info->buffer, info->oldname_len); - CHANGELOG_FILL_ENTRY (co, info->old_pargfid, old_name, - entry_fn, entry_free_fn, xtra_len, wind); - - co++; - /* new name resides just after old name */ - nname = info->buffer + info->oldname_len; - strncpy (new_name, nname, info->newname_len); - CHANGELOG_FILL_ENTRY (co, info->new_pargfid, new_name, - entry_fn, entry_free_fn, xtra_len, wind); - - changelog_set_usable_record_and_length (frame->local, - xtra_len, 3); - } else { /* default unlink */ - CHANGELOG_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, wind); - CHANGELOG_INIT_NOCHECK (this, frame->local, NULL, - loc->inode->gfid, 2); - - co = changelog_get_usable_buffer (frame->local); - if (!co) - goto wind; - - CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, - fop_fn, xtra_len); - - co++; - if (priv->capture_del_path) { - CHANGELOG_FILL_ENTRY_DIR_PATH (co, loc->pargfid, - loc->name, del_entry_fn, del_entry_free_fn, - xtra_len, wind, _gf_true); - } else { - CHANGELOG_FILL_ENTRY_DIR_PATH (co, loc->pargfid, - loc->name, del_entry_fn, del_entry_free_fn, - xtra_len, wind, _gf_false); - } +changelog_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags, + dict_t *xdata) +{ + size_t xtra_len = 0; + changelog_priv_t *priv = NULL; + changelog_opt_t *co = NULL; + call_stub_t *stub = NULL; + struct list_head queue = { + 0, + }; + gf_boolean_t barrier_enabled = _gf_false; + dht_changelog_rename_info_t *info = NULL; + int ret = 0; + char *old_name = NULL; + char *new_name = NULL; + char *nname = NULL; + + INIT_LIST_HEAD(&queue); + priv = this->private; + + CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind); + + ret = dict_get_bin(xdata, DHT_CHANGELOG_RENAME_OP_KEY, (void **)&info); + if (!ret) { /* special case: unlink considered as rename */ + /* 3 == fop + oldloc + newloc */ + old_name = alloca(info->oldname_len); + new_name = alloca(info->newname_len); + CHANGELOG_INIT_NOCHECK(this, frame->local, NULL, loc->inode->gfid, 3); - changelog_set_usable_record_and_length (frame->local, - xtra_len, 2); - } + co = changelog_get_usable_buffer(frame->local); + if (!co) + goto wind; -/* changelog barrier */ - LOCK (&priv->lock); - { - if ((barrier_enabled = priv->barrier_enabled)) { - stub = fop_unlink_stub (frame, changelog_unlink_resume, - loc, xflags, xdata); - if (!stub) - __chlog_barrier_disable (this, &queue); - else - __chlog_barrier_enqueue (this, stub); - } else { - ((changelog_local_t *)frame->local)->color - = priv->current_color; - changelog_inc_fop_cnt (this, priv, frame->local); - } - } - UNLOCK (&priv->lock); + CHANGLOG_FILL_FOP_NUMBER(co, GF_FOP_RENAME, fop_fn, xtra_len); - if (barrier_enabled && stub) { - gf_msg_debug (this->name, 0, "Enqueue unlink"); - goto out; - } - if (barrier_enabled && !stub) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - CHANGELOG_MSG_NO_MEMORY, - "Failed to barrier FOPs, disabling changelog barrier " - "FOP: unlink"); - chlog_barrier_dequeue_all (this, &queue); - } + co++; + strncpy(old_name, info->buffer, info->oldname_len); + CHANGELOG_FILL_ENTRY(co, info->old_pargfid, old_name, entry_fn, + entry_free_fn, xtra_len, wind); + + co++; + /* new name resides just after old name */ + nname = info->buffer + info->oldname_len; + strncpy(new_name, nname, info->newname_len); + CHANGELOG_FILL_ENTRY(co, info->new_pargfid, new_name, entry_fn, + entry_free_fn, xtra_len, wind); + + changelog_set_usable_record_and_length(frame->local, xtra_len, 3); + } else { /* default unlink */ + CHANGELOG_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, wind); + CHANGELOG_INIT_NOCHECK(this, frame->local, NULL, loc->inode->gfid, 2); + + co = changelog_get_usable_buffer(frame->local); + if (!co) + goto wind; -/* changelog barrier */ + CHANGLOG_FILL_FOP_NUMBER(co, frame->root->op, fop_fn, xtra_len); - wind: - STACK_WIND (frame, changelog_unlink_cbk, - FIRST_CHILD (this), FIRST_CHILD (this)->fops->unlink, - loc, xflags, xdata); - out: - return 0; + co++; + if (priv->capture_del_path) { + CHANGELOG_FILL_ENTRY_DIR_PATH(co, loc->pargfid, loc->name, + del_entry_fn, del_entry_free_fn, + xtra_len, wind, _gf_true); + } else { + CHANGELOG_FILL_ENTRY_DIR_PATH(co, loc->pargfid, loc->name, + del_entry_fn, del_entry_free_fn, + xtra_len, wind, _gf_false); + } + + changelog_set_usable_record_and_length(frame->local, xtra_len, 2); + } + + /* changelog barrier */ + LOCK(&priv->lock); + { + if ((barrier_enabled = priv->barrier_enabled)) { + stub = fop_unlink_stub(frame, changelog_unlink_resume, loc, xflags, + xdata); + if (!stub) + __chlog_barrier_disable(this, &queue); + else + __chlog_barrier_enqueue(this, stub); + } else { + ((changelog_local_t *)frame->local)->color = priv->current_color; + changelog_inc_fop_cnt(this, priv, frame->local); + } + } + UNLOCK(&priv->lock); + + if (barrier_enabled && stub) { + gf_msg_debug(this->name, 0, "Enqueue unlink"); + goto out; + } + if (barrier_enabled && !stub) { + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, + CHANGELOG_MSG_BARRIER_FOP_FAILED, "fop=unlink", NULL); + chlog_barrier_dequeue_all(this, &queue); + } + + /* changelog barrier */ + +wind: + STACK_WIND(frame, changelog_unlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, loc, xflags, xdata); +out: + return 0; } /* rename */ int32_t -changelog_rename_cbk (call_frame_t *frame, - void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *buf, struct iatt *preoldparent, - struct iatt *postoldparent, struct iatt *prenewparent, - struct iatt *postnewparent, dict_t *xdata) -{ - changelog_priv_t *priv = NULL; - changelog_local_t *local = NULL; - - priv = this->private; - local = frame->local; - CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind); - changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY); - unwind: - changelog_dec_fop_cnt (this, priv, local); - CHANGELOG_STACK_UNWIND (rename, frame, op_ret, op_errno, - buf, preoldparent, postoldparent, - prenewparent, postnewparent, xdata); - return 0; +changelog_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + struct iatt *preoldparent, struct iatt *postoldparent, + struct iatt *prenewparent, struct iatt *postnewparent, + dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; + + priv = this->private; + local = frame->local; + CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind); + changelog_update(this, priv, local, CHANGELOG_TYPE_ENTRY); +unwind: + changelog_dec_fop_cnt(this, priv, local); + CHANGELOG_STACK_UNWIND(rename, frame, op_ret, op_errno, buf, preoldparent, + postoldparent, prenewparent, postnewparent, xdata); + return 0; } int32_t -changelog_rename_resume (call_frame_t *frame, xlator_t *this, - loc_t *oldloc, loc_t *newloc, dict_t *xdata) +changelog_rename_resume(call_frame_t *frame, xlator_t *this, loc_t *oldloc, + loc_t *newloc, dict_t *xdata) { - changelog_priv_t *priv = NULL; + changelog_priv_t *priv = NULL; - priv = this->private; + priv = this->private; - gf_msg_debug (this->name, 0, "Dequeue rename"); - changelog_color_fop_and_inc_cnt - (this, priv, frame->local); - STACK_WIND (frame, changelog_rename_cbk, - FIRST_CHILD (this), FIRST_CHILD (this)->fops->rename, - oldloc, newloc, xdata); - return 0; + gf_msg_debug(this->name, 0, "Dequeue rename"); + changelog_color_fop_and_inc_cnt(this, priv, frame->local); + STACK_WIND(frame, changelog_rename_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); + return 0; } int32_t -changelog_rename (call_frame_t *frame, xlator_t *this, - loc_t *oldloc, loc_t *newloc, dict_t *xdata) -{ - size_t xtra_len = 0; - changelog_priv_t *priv = NULL; - changelog_opt_t *co = NULL; - call_stub_t *stub = NULL; - struct list_head queue = {0, }; - gf_boolean_t barrier_enabled = _gf_false; - dht_changelog_rename_info_t *info = NULL; - int ret = 0; - - INIT_LIST_HEAD (&queue); - - priv = this->private; - CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind); - - ret = dict_get_bin (xdata, DHT_CHANGELOG_RENAME_OP_KEY, (void **)&info); - if (ret && oldloc->inode->ia_type != IA_IFDIR) { - /* xdata "NOT" set for a non-directory, - * Special rename => avoid logging */ - goto wind; - } - - /* 3 == fop + oldloc + newloc */ - CHANGELOG_INIT_NOCHECK (this, frame->local, - NULL, oldloc->inode->gfid, 3); - - co = changelog_get_usable_buffer (frame->local); - if (!co) - goto wind; - - CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len); - - co++; - CHANGELOG_FILL_ENTRY (co, oldloc->pargfid, oldloc->name, - entry_fn, entry_free_fn, xtra_len, wind); - - co++; - CHANGELOG_FILL_ENTRY (co, newloc->pargfid, newloc->name, - entry_fn, entry_free_fn, xtra_len, wind); - - changelog_set_usable_record_and_length (frame->local, xtra_len, 3); -/* changelog barrier */ - LOCK (&priv->lock); - { - if ((barrier_enabled = priv->barrier_enabled)) { - stub = fop_rename_stub (frame, changelog_rename_resume, - oldloc, newloc, xdata); - if (!stub) - __chlog_barrier_disable (this, &queue); - else - __chlog_barrier_enqueue (this, stub); - } else { - ((changelog_local_t *)frame->local)->color - = priv->current_color; - changelog_inc_fop_cnt (this, priv, frame->local); - } - } - UNLOCK (&priv->lock); - - if (barrier_enabled && stub) { - gf_msg_debug (this->name, 0, "Enqueue rename"); - goto out; - } - if (barrier_enabled && !stub) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - CHANGELOG_MSG_NO_MEMORY, - "Failed to barrier FOPs, disabling changelog barrier " - "FOP: rename"); - chlog_barrier_dequeue_all (this, &queue); - } -/* changelog barrier */ - - wind: - STACK_WIND (frame, changelog_rename_cbk, - FIRST_CHILD (this), FIRST_CHILD (this)->fops->rename, - oldloc, newloc, xdata); - out: - return 0; +changelog_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, + loc_t *newloc, dict_t *xdata) +{ + size_t xtra_len = 0; + changelog_priv_t *priv = NULL; + changelog_opt_t *co = NULL; + call_stub_t *stub = NULL; + struct list_head queue = { + 0, + }; + gf_boolean_t barrier_enabled = _gf_false; + dht_changelog_rename_info_t *info = NULL; + int ret = 0; + + INIT_LIST_HEAD(&queue); + + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind); + + ret = dict_get_bin(xdata, DHT_CHANGELOG_RENAME_OP_KEY, (void **)&info); + if (ret && oldloc->inode->ia_type != IA_IFDIR) { + /* xdata "NOT" set for a non-directory, + * Special rename => avoid logging */ + goto wind; + } + + /* 3 == fop + oldloc + newloc */ + CHANGELOG_INIT_NOCHECK(this, frame->local, NULL, oldloc->inode->gfid, 3); + + co = changelog_get_usable_buffer(frame->local); + if (!co) + goto wind; + + CHANGLOG_FILL_FOP_NUMBER(co, frame->root->op, fop_fn, xtra_len); + + co++; + CHANGELOG_FILL_ENTRY(co, oldloc->pargfid, oldloc->name, entry_fn, + entry_free_fn, xtra_len, wind); + + co++; + CHANGELOG_FILL_ENTRY(co, newloc->pargfid, newloc->name, entry_fn, + entry_free_fn, xtra_len, wind); + + changelog_set_usable_record_and_length(frame->local, xtra_len, 3); + /* changelog barrier */ + LOCK(&priv->lock); + { + if ((barrier_enabled = priv->barrier_enabled)) { + stub = fop_rename_stub(frame, changelog_rename_resume, oldloc, + newloc, xdata); + if (!stub) + __chlog_barrier_disable(this, &queue); + else + __chlog_barrier_enqueue(this, stub); + } else { + ((changelog_local_t *)frame->local)->color = priv->current_color; + changelog_inc_fop_cnt(this, priv, frame->local); + } + } + UNLOCK(&priv->lock); + + if (barrier_enabled && stub) { + gf_msg_debug(this->name, 0, "Enqueue rename"); + goto out; + } + if (barrier_enabled && !stub) { + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, + CHANGELOG_MSG_BARRIER_FOP_FAILED, "fop=rename", NULL); + chlog_barrier_dequeue_all(this, &queue); + } + /* changelog barrier */ + +wind: + STACK_WIND(frame, changelog_rename_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); +out: + return 0; } /* link */ int32_t -changelog_link_cbk (call_frame_t *frame, - void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +changelog_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - changelog_priv_t *priv = NULL; - changelog_local_t *local = NULL; + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; - priv = this->private; - local = frame->local; + priv = this->private; + local = frame->local; - CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind); + CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind); - changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY); + changelog_update(this, priv, local, CHANGELOG_TYPE_ENTRY); - unwind: - changelog_dec_fop_cnt (this, priv, local); - CHANGELOG_STACK_UNWIND (link, frame, op_ret, op_errno, - inode, buf, preparent, postparent, xdata); - return 0; +unwind: + changelog_dec_fop_cnt(this, priv, local); + CHANGELOG_STACK_UNWIND(link, frame, op_ret, op_errno, inode, buf, preparent, + postparent, xdata); + return 0; } int32_t -changelog_link_resume (call_frame_t *frame, xlator_t *this, - loc_t *oldloc, loc_t *newloc, dict_t *xdata) +changelog_link_resume(call_frame_t *frame, xlator_t *this, loc_t *oldloc, + loc_t *newloc, dict_t *xdata) { - changelog_priv_t *priv = NULL; + changelog_priv_t *priv = NULL; - GF_VALIDATE_OR_GOTO ("changelog", this, out); - GF_VALIDATE_OR_GOTO ("changelog", this->fops, out); - GF_VALIDATE_OR_GOTO ("changelog", frame, out); + GF_VALIDATE_OR_GOTO("changelog", this, out); + GF_VALIDATE_OR_GOTO("changelog", this->fops, out); + GF_VALIDATE_OR_GOTO("changelog", frame, out); - priv = this->private; + priv = this->private; - gf_msg_debug (this->name, 0, "Dequeuing link"); - changelog_color_fop_and_inc_cnt - (this, priv, frame->local); - STACK_WIND (frame, changelog_link_cbk, - FIRST_CHILD (this), FIRST_CHILD (this)->fops->link, - oldloc, newloc, xdata); - return 0; + gf_msg_debug(this->name, 0, "Dequeuing link"); + changelog_color_fop_and_inc_cnt(this, priv, frame->local); + STACK_WIND(frame, changelog_link_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata); + return 0; out: - return -1; + return -1; } int32_t -changelog_link (call_frame_t *frame, - xlator_t *this, loc_t *oldloc, - loc_t *newloc, dict_t *xdata) -{ - size_t xtra_len = 0; - changelog_priv_t *priv = NULL; - changelog_opt_t *co = NULL; - call_stub_t *stub = NULL; - struct list_head queue = {0, }; - gf_boolean_t barrier_enabled = _gf_false; - - priv = this->private; - - CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind); - CHANGELOG_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, wind); - - CHANGELOG_INIT_NOCHECK (this, frame->local, NULL, oldloc->gfid, 2); - - co = changelog_get_usable_buffer (frame->local); - if (!co) - goto wind; - - CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len); - - co++; - CHANGELOG_FILL_ENTRY (co, newloc->pargfid, newloc->name, - entry_fn, entry_free_fn, xtra_len, wind); - - changelog_set_usable_record_and_length (frame->local, xtra_len, 2); - - LOCK (&priv->lock); - { - if ((barrier_enabled = priv->barrier_enabled)) { - stub = fop_link_stub (frame, changelog_link_resume, - oldloc, newloc, xdata); - if (!stub) - __chlog_barrier_disable (this, &queue); - else - __chlog_barrier_enqueue (this, stub); - } else { - ((changelog_local_t *)frame->local)->color - = priv->current_color; - changelog_inc_fop_cnt (this, priv, frame->local); - } - } - UNLOCK (&priv->lock); - - if (barrier_enabled && stub) { - gf_msg_debug (this->name, 0, "Enqueued link"); - goto out; - } - - if (barrier_enabled && !stub) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_NO_MEMORY, - "Failed to barrier FOPs, disabling changelog barrier " - "FOP: link"); - chlog_barrier_dequeue_all (this, &queue); - } - wind: - STACK_WIND (frame, changelog_link_cbk, - FIRST_CHILD (this), FIRST_CHILD (this)->fops->link, - oldloc, newloc, xdata); +changelog_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, + loc_t *newloc, dict_t *xdata) +{ + size_t xtra_len = 0; + changelog_priv_t *priv = NULL; + changelog_opt_t *co = NULL; + call_stub_t *stub = NULL; + struct list_head queue = { + 0, + }; + gf_boolean_t barrier_enabled = _gf_false; + + priv = this->private; + + CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind); + CHANGELOG_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, wind); + + CHANGELOG_INIT_NOCHECK(this, frame->local, NULL, oldloc->gfid, 2); + + co = changelog_get_usable_buffer(frame->local); + if (!co) + goto wind; + + CHANGLOG_FILL_FOP_NUMBER(co, frame->root->op, fop_fn, xtra_len); + + co++; + CHANGELOG_FILL_ENTRY(co, newloc->pargfid, newloc->name, entry_fn, + entry_free_fn, xtra_len, wind); + + changelog_set_usable_record_and_length(frame->local, xtra_len, 2); + + LOCK(&priv->lock); + { + if ((barrier_enabled = priv->barrier_enabled)) { + stub = fop_link_stub(frame, changelog_link_resume, oldloc, newloc, + xdata); + if (!stub) + __chlog_barrier_disable(this, &queue); + else + __chlog_barrier_enqueue(this, stub); + } else { + ((changelog_local_t *)frame->local)->color = priv->current_color; + changelog_inc_fop_cnt(this, priv, frame->local); + } + } + UNLOCK(&priv->lock); + + if (barrier_enabled && stub) { + gf_msg_debug(this->name, 0, "Enqueued link"); + goto out; + } + + if (barrier_enabled && !stub) { + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_BARRIER_FOP_FAILED, + "fop=link", NULL); + chlog_barrier_dequeue_all(this, &queue); + } +wind: + STACK_WIND(frame, changelog_link_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata); out: - return 0; + return 0; } /* mkdir */ int32_t -changelog_mkdir_cbk (call_frame_t *frame, - void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +changelog_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - changelog_priv_t *priv = NULL; - changelog_local_t *local = NULL; + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; - priv = this->private; - local = frame->local; + priv = this->private; + local = frame->local; - CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind); + CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind); - changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY); + changelog_update(this, priv, local, CHANGELOG_TYPE_ENTRY); - unwind: - changelog_dec_fop_cnt (this, priv, local); - CHANGELOG_STACK_UNWIND (mkdir, frame, op_ret, op_errno, - inode, buf, preparent, postparent, xdata); - return 0; +unwind: + changelog_dec_fop_cnt(this, priv, local); + CHANGELOG_STACK_UNWIND(mkdir, frame, op_ret, op_errno, inode, buf, + preparent, postparent, xdata); + return 0; } int32_t -changelog_mkdir_resume (call_frame_t *frame, xlator_t *this, - loc_t *loc, mode_t mode, - mode_t umask, dict_t *xdata) +changelog_mkdir_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, + mode_t mode, mode_t umask, dict_t *xdata) { - changelog_priv_t *priv = NULL; + changelog_priv_t *priv = NULL; - GF_VALIDATE_OR_GOTO ("changelog", this, out); - GF_VALIDATE_OR_GOTO ("changelog", this->fops, out); - GF_VALIDATE_OR_GOTO ("changelog", frame, out); + GF_VALIDATE_OR_GOTO("changelog", this, out); + GF_VALIDATE_OR_GOTO("changelog", this->fops, out); + GF_VALIDATE_OR_GOTO("changelog", frame, out); - priv = this->private; + priv = this->private; - gf_msg_debug (this->name, 0, "Dequeuing mkdir"); - changelog_color_fop_and_inc_cnt - (this, priv, frame->local); - STACK_WIND (frame, changelog_mkdir_cbk, - FIRST_CHILD (this), FIRST_CHILD (this)->fops->mkdir, - loc, mode, umask, xdata); - return 0; + gf_msg_debug(this->name, 0, "Dequeuing mkdir"); + changelog_color_fop_and_inc_cnt(this, priv, frame->local); + STACK_WIND(frame, changelog_mkdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata); + return 0; out: - return -1; + return -1; } int32_t -changelog_mkdir (call_frame_t *frame, xlator_t *this, - loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata) -{ - int ret = -1; - uuid_t gfid = {0,}; - void *uuid_req = NULL; - size_t xtra_len = 0; - changelog_priv_t *priv = NULL; - changelog_opt_t *co = NULL; - call_stub_t *stub = NULL; - struct list_head queue = {0, }; - gf_boolean_t barrier_enabled = _gf_false; - - priv = this->private; - CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind); - - ret = dict_get_ptr (xdata, "gfid-req", &uuid_req); - if (ret) { - gf_msg_debug (this->name, 0, - "failed to get gfid from dict"); - goto wind; - } - gf_uuid_copy (gfid, uuid_req); - - CHANGELOG_INIT_NOCHECK (this, frame->local, NULL, gfid, 5); - - co = changelog_get_usable_buffer (frame->local); - if (!co) - goto wind; - - CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len); - co++; - - CHANGELOG_FILL_UINT32 (co, S_IFDIR | mode, number_fn, xtra_len); - co++; - - CHANGELOG_FILL_UINT32 (co, frame->root->uid, number_fn, xtra_len); - co++; - - CHANGELOG_FILL_UINT32 (co, frame->root->gid, number_fn, xtra_len); - co++; - - CHANGELOG_FILL_ENTRY (co, loc->pargfid, loc->name, - entry_fn, entry_free_fn, xtra_len, wind); - - changelog_set_usable_record_and_length (frame->local, xtra_len, 5); - - LOCK (&priv->lock); - { - if ((barrier_enabled = priv->barrier_enabled)) { - stub = fop_mkdir_stub (frame, changelog_mkdir_resume, - loc, mode, umask, xdata); - if (!stub) - __chlog_barrier_disable (this, &queue); - else - __chlog_barrier_enqueue (this, stub); - } else { - ((changelog_local_t *)frame->local)->color - = priv->current_color; - changelog_inc_fop_cnt (this, priv, frame->local); - } - } - UNLOCK (&priv->lock); - - if (barrier_enabled && stub) { - gf_msg_debug (this->name, 0, "Enqueued mkdir"); - goto out; - } - - if (barrier_enabled && !stub) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - CHANGELOG_MSG_NO_MEMORY, - "Failed to barrier FOPs, disabling changelog barrier " - "FOP: mkdir"); - chlog_barrier_dequeue_all (this, &queue); - } - - wind: - STACK_WIND (frame, changelog_mkdir_cbk, - FIRST_CHILD (this), FIRST_CHILD (this)->fops->mkdir, - loc, mode, umask, xdata); +changelog_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + mode_t umask, dict_t *xdata) +{ + int ret = -1; + uuid_t gfid = { + 0, + }; + size_t xtra_len = 0; + changelog_priv_t *priv = NULL; + changelog_opt_t *co = NULL; + call_stub_t *stub = NULL; + struct list_head queue = { + 0, + }; + gf_boolean_t barrier_enabled = _gf_false; + + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind); + + ret = dict_get_gfuuid(xdata, "gfid-req", &gfid); + if (ret) { + gf_msg_debug(this->name, 0, "failed to get gfid from dict"); + goto wind; + } + + CHANGELOG_INIT_NOCHECK(this, frame->local, NULL, gfid, 5); + + co = changelog_get_usable_buffer(frame->local); + if (!co) + goto wind; + + CHANGLOG_FILL_FOP_NUMBER(co, frame->root->op, fop_fn, xtra_len); + co++; + + CHANGELOG_FILL_UINT32(co, S_IFDIR | mode, number_fn, xtra_len); + co++; + + CHANGELOG_FILL_UINT32(co, frame->root->uid, number_fn, xtra_len); + co++; + + CHANGELOG_FILL_UINT32(co, frame->root->gid, number_fn, xtra_len); + co++; + + CHANGELOG_FILL_ENTRY(co, loc->pargfid, loc->name, entry_fn, entry_free_fn, + xtra_len, wind); + + changelog_set_usable_record_and_length(frame->local, xtra_len, 5); + + LOCK(&priv->lock); + { + if ((barrier_enabled = priv->barrier_enabled)) { + stub = fop_mkdir_stub(frame, changelog_mkdir_resume, loc, mode, + umask, xdata); + if (!stub) + __chlog_barrier_disable(this, &queue); + else + __chlog_barrier_enqueue(this, stub); + } else { + ((changelog_local_t *)frame->local)->color = priv->current_color; + changelog_inc_fop_cnt(this, priv, frame->local); + } + } + UNLOCK(&priv->lock); + + if (barrier_enabled && stub) { + gf_msg_debug(this->name, 0, "Enqueued mkdir"); + goto out; + } + + if (barrier_enabled && !stub) { + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, + CHANGELOG_MSG_BARRIER_FOP_FAILED, "fop=mkdir", NULL); + chlog_barrier_dequeue_all(this, &queue); + } + +wind: + STACK_WIND(frame, changelog_mkdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata); out: - return 0; + return 0; } /* symlink */ int32_t -changelog_symlink_cbk (call_frame_t *frame, - void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - inode_t *inode, struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +changelog_symlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - changelog_priv_t *priv = NULL; - changelog_local_t *local = NULL; + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; - priv = this->private; - local = frame->local; + priv = this->private; + local = frame->local; - CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind); + CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind); - changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY); + changelog_update(this, priv, local, CHANGELOG_TYPE_ENTRY); - unwind: - changelog_dec_fop_cnt (this, priv, local); - CHANGELOG_STACK_UNWIND (symlink, frame, op_ret, op_errno, - inode, buf, preparent, postparent, xdata); - return 0; +unwind: + changelog_dec_fop_cnt(this, priv, local); + CHANGELOG_STACK_UNWIND(symlink, frame, op_ret, op_errno, inode, buf, + preparent, postparent, xdata); + return 0; } - int32_t -changelog_symlink_resume (call_frame_t *frame, xlator_t *this, - const char *linkname, loc_t *loc, - mode_t umask, dict_t *xdata) +changelog_symlink_resume(call_frame_t *frame, xlator_t *this, + const char *linkname, loc_t *loc, mode_t umask, + dict_t *xdata) { - changelog_priv_t *priv = NULL; + changelog_priv_t *priv = NULL; - GF_VALIDATE_OR_GOTO ("changelog", this, out); - GF_VALIDATE_OR_GOTO ("changelog", this->fops, out); - GF_VALIDATE_OR_GOTO ("changelog", frame, out); + GF_VALIDATE_OR_GOTO("changelog", this, out); + GF_VALIDATE_OR_GOTO("changelog", this->fops, out); + GF_VALIDATE_OR_GOTO("changelog", frame, out); - priv = this->private; + priv = this->private; - gf_msg_debug (this->name, 0, "Dequeuing symlink"); - changelog_color_fop_and_inc_cnt - (this, priv, frame->local); - STACK_WIND (frame, changelog_symlink_cbk, - FIRST_CHILD (this), FIRST_CHILD (this)->fops->symlink, - linkname, loc, umask, xdata); - return 0; + gf_msg_debug(this->name, 0, "Dequeuing symlink"); + changelog_color_fop_and_inc_cnt(this, priv, frame->local); + STACK_WIND(frame, changelog_symlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->symlink, linkname, loc, umask, xdata); + return 0; out: - return -1; + return -1; } int32_t -changelog_symlink (call_frame_t *frame, xlator_t *this, - const char *linkname, loc_t *loc, - mode_t umask, dict_t *xdata) -{ - int ret = -1; - size_t xtra_len = 0; - uuid_t gfid = {0,}; - void *uuid_req = NULL; - changelog_priv_t *priv = NULL; - changelog_opt_t *co = NULL; - call_stub_t *stub = NULL; - struct list_head queue = {0, }; - gf_boolean_t barrier_enabled = _gf_false; - - priv = this->private; - CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind); - - ret = dict_get_ptr (xdata, "gfid-req", &uuid_req); - if (ret) { - gf_msg_debug (this->name, 0, - "failed to get gfid from dict"); - goto wind; - } - gf_uuid_copy (gfid, uuid_req); - - CHANGELOG_INIT_NOCHECK (this, frame->local, NULL, gfid, 2); - - co = changelog_get_usable_buffer (frame->local); - if (!co) - goto wind; - - CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len); - co++; - - CHANGELOG_FILL_ENTRY (co, loc->pargfid, loc->name, - entry_fn, entry_free_fn, xtra_len, wind); - - changelog_set_usable_record_and_length (frame->local, xtra_len, 2); - - LOCK (&priv->lock); - { - if ((barrier_enabled = priv->barrier_enabled)) { - stub = fop_symlink_stub (frame, - changelog_symlink_resume, - linkname, loc, umask, xdata); - if (!stub) - __chlog_barrier_disable (this, &queue); - else - __chlog_barrier_enqueue (this, stub); - } else { - ((changelog_local_t *)frame->local)->color - = priv->current_color; - changelog_inc_fop_cnt (this, priv, frame->local); - } - } - UNLOCK (&priv->lock); - - if (barrier_enabled && stub) { - gf_msg_debug (this->name, 0, "Enqueued symlink"); - goto out; - } - - if (barrier_enabled && !stub) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - CHANGELOG_MSG_NO_MEMORY, - "Failed to barrier FOPs, disabling changelog barrier " - "FOP: symlink"); - chlog_barrier_dequeue_all (this, &queue); - } - - wind: - STACK_WIND (frame, changelog_symlink_cbk, - FIRST_CHILD (this), FIRST_CHILD (this)->fops->symlink, - linkname, loc, umask, xdata); +changelog_symlink(call_frame_t *frame, xlator_t *this, const char *linkname, + loc_t *loc, mode_t umask, dict_t *xdata) +{ + int ret = -1; + size_t xtra_len = 0; + uuid_t gfid = { + 0, + }; + changelog_priv_t *priv = NULL; + changelog_opt_t *co = NULL; + call_stub_t *stub = NULL; + struct list_head queue = { + 0, + }; + gf_boolean_t barrier_enabled = _gf_false; + + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind); + + ret = dict_get_gfuuid(xdata, "gfid-req", &gfid); + if (ret) { + gf_msg_debug(this->name, 0, "failed to get gfid from dict"); + goto wind; + } + + CHANGELOG_INIT_NOCHECK(this, frame->local, NULL, gfid, 2); + + co = changelog_get_usable_buffer(frame->local); + if (!co) + goto wind; + + CHANGLOG_FILL_FOP_NUMBER(co, frame->root->op, fop_fn, xtra_len); + co++; + + CHANGELOG_FILL_ENTRY(co, loc->pargfid, loc->name, entry_fn, entry_free_fn, + xtra_len, wind); + + changelog_set_usable_record_and_length(frame->local, xtra_len, 2); + + LOCK(&priv->lock); + { + if ((barrier_enabled = priv->barrier_enabled)) { + stub = fop_symlink_stub(frame, changelog_symlink_resume, linkname, + loc, umask, xdata); + if (!stub) + __chlog_barrier_disable(this, &queue); + else + __chlog_barrier_enqueue(this, stub); + } else { + ((changelog_local_t *)frame->local)->color = priv->current_color; + changelog_inc_fop_cnt(this, priv, frame->local); + } + } + UNLOCK(&priv->lock); + + if (barrier_enabled && stub) { + gf_msg_debug(this->name, 0, "Enqueued symlink"); + goto out; + } + + if (barrier_enabled && !stub) { + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, + CHANGELOG_MSG_BARRIER_FOP_FAILED, "fop=symlink", NULL); + chlog_barrier_dequeue_all(this, &queue); + } + +wind: + STACK_WIND(frame, changelog_symlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->symlink, linkname, loc, umask, xdata); out: - return 0; + return 0; } /* mknod */ int32_t -changelog_mknod_cbk (call_frame_t *frame, - void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +changelog_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - changelog_priv_t *priv = NULL; - changelog_local_t *local = NULL; + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; - priv = this->private; - local = frame->local; + priv = this->private; + local = frame->local; - CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind); + CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind); - changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY); + changelog_update(this, priv, local, CHANGELOG_TYPE_ENTRY); - unwind: - changelog_dec_fop_cnt (this, priv, local); - CHANGELOG_STACK_UNWIND (mknod, frame, op_ret, op_errno, - inode, buf, preparent, postparent, xdata); - return 0; +unwind: + changelog_dec_fop_cnt(this, priv, local); + CHANGELOG_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, buf, + preparent, postparent, xdata); + return 0; } int32_t -changelog_mknod_resume (call_frame_t *frame, xlator_t *this, - loc_t *loc, mode_t mode, dev_t rdev, - mode_t umask, dict_t *xdata) +changelog_mknod_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, + mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata) { - changelog_priv_t *priv = NULL; + changelog_priv_t *priv = NULL; - GF_VALIDATE_OR_GOTO ("changelog", this, out); - GF_VALIDATE_OR_GOTO ("changelog", this->fops, out); - GF_VALIDATE_OR_GOTO ("changelog", frame, out); + GF_VALIDATE_OR_GOTO("changelog", this, out); + GF_VALIDATE_OR_GOTO("changelog", this->fops, out); + GF_VALIDATE_OR_GOTO("changelog", frame, out); - priv = this->private; + priv = this->private; - gf_msg_debug (this->name, 0, "Dequeuing mknod"); - changelog_color_fop_and_inc_cnt - (this, priv, frame->local); - STACK_WIND (frame, changelog_mknod_cbk, - FIRST_CHILD (this), FIRST_CHILD (this)->fops->mknod, - loc, mode, rdev, umask, xdata); - return 0; + gf_msg_debug(this->name, 0, "Dequeuing mknod"); + changelog_color_fop_and_inc_cnt(this, priv, frame->local); + STACK_WIND(frame, changelog_mknod_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata); + return 0; out: - return -1; + return -1; } int32_t -changelog_mknod (call_frame_t *frame, - xlator_t *this, loc_t *loc, - mode_t mode, dev_t dev, mode_t umask, dict_t *xdata) -{ - int ret = -1; - uuid_t gfid = {0,}; - void *uuid_req = NULL; - size_t xtra_len = 0; - changelog_priv_t *priv = NULL; - changelog_opt_t *co = NULL; - call_stub_t *stub = NULL; - struct list_head queue = {0, }; - gf_boolean_t barrier_enabled = _gf_false; - - priv = this->private; - - /* Check whether changelog active */ - if (!(priv->active)) - goto wind; - - /* Check whether rebalance activity */ - if (frame->root->pid == GF_CLIENT_PID_DEFRAG) - goto wind; - - /* If tier-dht linkto is SET, ignore about verifiying : - * 1. Whether internal fop AND - * 2. Whether tier rebalance process activity (this will help in - * recording mknod if tier rebalance process calls this mknod) */ - if (!(dict_get (xdata, "trusted.tier.tier-dht.linkto"))) { - CHANGELOG_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, wind); - if (frame->root->pid == GF_CLIENT_PID_TIER_DEFRAG) - goto wind; - } - - ret = dict_get_ptr (xdata, "gfid-req", &uuid_req); - if (ret) { - gf_msg_debug (this->name, 0, - "failed to get gfid from dict"); - goto wind; - } - gf_uuid_copy (gfid, uuid_req); - - CHANGELOG_INIT_NOCHECK (this, frame->local, NULL, gfid, 5); - - co = changelog_get_usable_buffer (frame->local); - if (!co) - goto wind; - - CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len); - co++; - - CHANGELOG_FILL_UINT32 (co, mode, number_fn, xtra_len); - co++; - - CHANGELOG_FILL_UINT32 (co, frame->root->uid, number_fn, xtra_len); - co++; - - CHANGELOG_FILL_UINT32 (co, frame->root->gid, number_fn, xtra_len); - co++; - - CHANGELOG_FILL_ENTRY (co, loc->pargfid, loc->name, - entry_fn, entry_free_fn, xtra_len, wind); - - changelog_set_usable_record_and_length (frame->local, xtra_len, 5); - - LOCK (&priv->lock); - { - if ((barrier_enabled = priv->barrier_enabled)) { - stub = fop_mknod_stub (frame, changelog_mknod_resume, - loc, mode, dev, umask, xdata); - if (!stub) - __chlog_barrier_disable (this, &queue); - else - __chlog_barrier_enqueue (this, stub); - } else { - ((changelog_local_t *)frame->local)->color - = priv->current_color; - changelog_inc_fop_cnt (this, priv, frame->local); - } - } - UNLOCK (&priv->lock); - - if (barrier_enabled && stub) { - gf_msg_debug (this->name, 0, "Enqueued mknod"); - goto out; - } - - if (barrier_enabled && !stub) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - CHANGELOG_MSG_NO_MEMORY, - "Failed to barrier FOPs, disabling changelog barrier " - "FOP: mknod"); - chlog_barrier_dequeue_all (this, &queue); - } - - wind: - STACK_WIND (frame, changelog_mknod_cbk, - FIRST_CHILD (this), FIRST_CHILD (this)->fops->mknod, - loc, mode, dev, umask, xdata); +changelog_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + dev_t dev, mode_t umask, dict_t *xdata) +{ + int ret = -1; + uuid_t gfid = { + 0, + }; + size_t xtra_len = 0; + changelog_priv_t *priv = NULL; + changelog_opt_t *co = NULL; + call_stub_t *stub = NULL; + struct list_head queue = { + 0, + }; + gf_boolean_t barrier_enabled = _gf_false; + + priv = this->private; + + /* Check whether changelog active */ + if (!(priv->active)) + goto wind; + + /* Check whether rebalance activity */ + if (frame->root->pid == GF_CLIENT_PID_DEFRAG) + goto wind; + + /* If tier-dht linkto is SET, ignore about verifiying : + * 1. Whether internal fop AND + * 2. Whether tier rebalance process activity (this will help in + * recording mknod if tier rebalance process calls this mknod) */ + if (!(dict_get(xdata, "trusted.tier.tier-dht.linkto"))) { + CHANGELOG_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, wind); + if (frame->root->pid == GF_CLIENT_PID_TIER_DEFRAG) + goto wind; + } + + ret = dict_get_gfuuid(xdata, "gfid-req", &gfid); + if (ret) { + gf_msg_debug(this->name, 0, "failed to get gfid from dict"); + goto wind; + } + + CHANGELOG_INIT_NOCHECK(this, frame->local, NULL, gfid, 5); + + co = changelog_get_usable_buffer(frame->local); + if (!co) + goto wind; + + CHANGLOG_FILL_FOP_NUMBER(co, frame->root->op, fop_fn, xtra_len); + co++; + + CHANGELOG_FILL_UINT32(co, mode, number_fn, xtra_len); + co++; + + CHANGELOG_FILL_UINT32(co, frame->root->uid, number_fn, xtra_len); + co++; + + CHANGELOG_FILL_UINT32(co, frame->root->gid, number_fn, xtra_len); + co++; + + CHANGELOG_FILL_ENTRY(co, loc->pargfid, loc->name, entry_fn, entry_free_fn, + xtra_len, wind); + + changelog_set_usable_record_and_length(frame->local, xtra_len, 5); + + LOCK(&priv->lock); + { + if ((barrier_enabled = priv->barrier_enabled)) { + stub = fop_mknod_stub(frame, changelog_mknod_resume, loc, mode, dev, + umask, xdata); + if (!stub) + __chlog_barrier_disable(this, &queue); + else + __chlog_barrier_enqueue(this, stub); + } else { + ((changelog_local_t *)frame->local)->color = priv->current_color; + changelog_inc_fop_cnt(this, priv, frame->local); + } + } + UNLOCK(&priv->lock); + + if (barrier_enabled && stub) { + gf_msg_debug(this->name, 0, "Enqueued mknod"); + goto out; + } + + if (barrier_enabled && !stub) { + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, + CHANGELOG_MSG_BARRIER_FOP_FAILED, "fop=mknod", NULL); + chlog_barrier_dequeue_all(this, &queue); + } + +wind: + STACK_WIND(frame, changelog_mknod_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mknod, loc, mode, dev, umask, xdata); out: - return 0; + return 0; } -/* creat */ +/* create */ int32_t -changelog_create_cbk (call_frame_t *frame, - void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - fd_t *fd, inode_t *inode, struct iatt *buf, - struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +changelog_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - int32_t ret = 0; - changelog_priv_t *priv = NULL; - changelog_local_t *local = NULL; - changelog_event_t ev = {0,}; - - priv = this->private; - local = frame->local; - - CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind); - - /* fill the event structure.. similar to open() */ - ev.ev_type = CHANGELOG_OP_TYPE_CREATE; - gf_uuid_copy (ev.u.create.gfid, buf->ia_gfid); - ev.u.create.flags = fd->flags; - changelog_dispatch_event (this, priv, &ev); - - if (changelog_ev_selected - (this, &priv->ev_selection, CHANGELOG_OP_TYPE_RELEASE)) { - ret = fd_ctx_set (fd, this, (uint64_t)(long) 0x1); - if (ret) - gf_msg (this->name, GF_LOG_WARNING, 0, - CHANGELOG_MSG_SET_FD_CONTEXT, - "could not set fd context (for release cbk)"); - } - - changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY); + int32_t ret = 0; + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; + changelog_event_t ev = { + 0, + }; - unwind: - changelog_dec_fop_cnt (this, priv, local); - CHANGELOG_STACK_UNWIND (create, frame, - op_ret, op_errno, fd, inode, - buf, preparent, postparent, xdata); - return 0; -} + priv = this->private; + local = frame->local; -int32_t -changelog_create_resume (call_frame_t *frame, xlator_t *this, - loc_t *loc, int32_t flags, mode_t mode, - mode_t umask, fd_t *fd, dict_t *xdata) -{ - changelog_priv_t *priv = NULL; + CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind); - GF_VALIDATE_OR_GOTO ("changelog", this, out); - GF_VALIDATE_OR_GOTO ("changelog", this->fops, out); - GF_VALIDATE_OR_GOTO ("changelog", frame, out); + /* fill the event structure.. similar to open() */ + ev.ev_type = CHANGELOG_OP_TYPE_CREATE; + gf_uuid_copy(ev.u.create.gfid, buf->ia_gfid); + ev.u.create.flags = fd->flags; + changelog_dispatch_event(this, priv, &ev); - priv = this->private; + if (changelog_ev_selected(this, &priv->ev_selection, + CHANGELOG_OP_TYPE_RELEASE)) { + ret = fd_ctx_set(fd, this, (uint64_t)(long)0x1); + if (ret) + gf_smsg(this->name, GF_LOG_WARNING, 0, CHANGELOG_MSG_SET_FD_CONTEXT, + NULL); + } - gf_msg_debug (this->name, 0, "Dequeuing create"); - changelog_color_fop_and_inc_cnt - (this, priv, frame->local); - STACK_WIND (frame, changelog_create_cbk, - FIRST_CHILD (this), FIRST_CHILD (this)->fops->create, - loc, flags, mode, umask, fd, xdata); - return 0; + changelog_update(this, priv, local, CHANGELOG_TYPE_ENTRY); -out: - return -1; +unwind: + changelog_dec_fop_cnt(this, priv, local); + CHANGELOG_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, buf, + preparent, postparent, xdata); + return 0; } int32_t -changelog_create (call_frame_t *frame, xlator_t *this, - loc_t *loc, int32_t flags, mode_t mode, - mode_t umask, fd_t *fd, dict_t *xdata) -{ - int ret = -1; - uuid_t gfid = {0,}; - void *uuid_req = NULL; - changelog_opt_t *co = NULL; - changelog_priv_t *priv = NULL; - size_t xtra_len = 0; - call_stub_t *stub = NULL; - struct list_head queue = {0, }; - gf_boolean_t barrier_enabled = _gf_false; - - priv = this->private; - CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind); - - ret = dict_get_ptr (xdata, "gfid-req", &uuid_req); - if (ret) { - gf_msg_debug (this->name, 0, - "failed to get gfid from dict"); - goto wind; - } - gf_uuid_copy (gfid, uuid_req); - - /* init with two extra records */ - CHANGELOG_INIT_NOCHECK (this, frame->local, NULL, gfid, 5); - if (!frame->local) - goto wind; - - co = changelog_get_usable_buffer (frame->local); - if (!co) - goto wind; - - CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len); - co++; - - CHANGELOG_FILL_UINT32 (co, mode, number_fn, xtra_len); - co++; - - CHANGELOG_FILL_UINT32 (co, frame->root->uid, number_fn, xtra_len); - co++; - - CHANGELOG_FILL_UINT32 (co, frame->root->gid, number_fn, xtra_len); - co++; +changelog_create_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, + int32_t flags, mode_t mode, mode_t umask, fd_t *fd, + dict_t *xdata) +{ + changelog_priv_t *priv = NULL; - CHANGELOG_FILL_ENTRY (co, loc->pargfid, loc->name, - entry_fn, entry_free_fn, xtra_len, wind); + GF_VALIDATE_OR_GOTO("changelog", this, out); + GF_VALIDATE_OR_GOTO("changelog", this->fops, out); + GF_VALIDATE_OR_GOTO("changelog", frame, out); - changelog_set_usable_record_and_length (frame->local, xtra_len, 5); + priv = this->private; - LOCK (&priv->lock); - { - if ((barrier_enabled = priv->barrier_enabled)) { - stub = fop_create_stub (frame, changelog_create_resume, - loc, flags, mode, umask, fd, - xdata); - if (!stub) - __chlog_barrier_disable (this, &queue); - else - __chlog_barrier_enqueue (this, stub); - } else { - ((changelog_local_t *)frame->local)->color - = priv->current_color; - changelog_inc_fop_cnt (this, priv, frame->local); - } - } - UNLOCK (&priv->lock); - - if (barrier_enabled && stub) { - gf_msg_debug (this->name, 0, "Enqueued create"); - goto out; - } + gf_msg_debug(this->name, 0, "Dequeuing create"); + changelog_color_fop_and_inc_cnt(this, priv, frame->local); + STACK_WIND(frame, changelog_create_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd, + xdata); + return 0; - if (barrier_enabled && !stub) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - CHANGELOG_MSG_NO_MEMORY, - "Failed to barrier FOPs, disabling changelog barrier " - "FOP: create"); - chlog_barrier_dequeue_all (this, &queue); - } +out: + return -1; +} - wind: - STACK_WIND (frame, changelog_create_cbk, - FIRST_CHILD (this), FIRST_CHILD (this)->fops->create, - loc, flags, mode, umask, fd, xdata); +int32_t +changelog_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) +{ + int ret = -1; + uuid_t gfid = { + 0, + }; + changelog_opt_t *co = NULL; + changelog_priv_t *priv = NULL; + size_t xtra_len = 0; + call_stub_t *stub = NULL; + struct list_head queue = { + 0, + }; + gf_boolean_t barrier_enabled = _gf_false; + + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind); + + ret = dict_get_gfuuid(xdata, "gfid-req", &gfid); + if (ret) { + gf_msg_debug(this->name, 0, "failed to get gfid from dict"); + goto wind; + } + + /* init with two extra records */ + CHANGELOG_INIT_NOCHECK(this, frame->local, NULL, gfid, 5); + if (!frame->local) + goto wind; + + co = changelog_get_usable_buffer(frame->local); + if (!co) + goto wind; + + CHANGLOG_FILL_FOP_NUMBER(co, frame->root->op, fop_fn, xtra_len); + co++; + + CHANGELOG_FILL_UINT32(co, mode, number_fn, xtra_len); + co++; + + CHANGELOG_FILL_UINT32(co, frame->root->uid, number_fn, xtra_len); + co++; + + CHANGELOG_FILL_UINT32(co, frame->root->gid, number_fn, xtra_len); + co++; + + CHANGELOG_FILL_ENTRY(co, loc->pargfid, loc->name, entry_fn, entry_free_fn, + xtra_len, wind); + + changelog_set_usable_record_and_length(frame->local, xtra_len, 5); + + LOCK(&priv->lock); + { + if ((barrier_enabled = priv->barrier_enabled)) { + stub = fop_create_stub(frame, changelog_create_resume, loc, flags, + mode, umask, fd, xdata); + if (!stub) + __chlog_barrier_disable(this, &queue); + else + __chlog_barrier_enqueue(this, stub); + } else { + ((changelog_local_t *)frame->local)->color = priv->current_color; + changelog_inc_fop_cnt(this, priv, frame->local); + } + } + UNLOCK(&priv->lock); + + if (barrier_enabled && stub) { + gf_msg_debug(this->name, 0, "Enqueued create"); + goto out; + } + + if (barrier_enabled && !stub) { + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, + CHANGELOG_MSG_BARRIER_FOP_FAILED, "fop=create", NULL); + chlog_barrier_dequeue_all(this, &queue); + } + +wind: + STACK_WIND(frame, changelog_create_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd, + xdata); out: - return 0; + return 0; } /* }}} */ - /* Metadata modification fops - TYPE II */ /* {{{ */ @@ -1155,261 +1104,253 @@ out: /* {f}setattr */ int32_t -changelog_fsetattr_cbk (call_frame_t *frame, - void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, struct iatt *preop_stbuf, - struct iatt *postop_stbuf, dict_t *xdata) +changelog_fsetattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *preop_stbuf, struct iatt *postop_stbuf, + dict_t *xdata) { - changelog_priv_t *priv = NULL; - changelog_local_t *local = NULL; + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; - priv = this->private; - local = frame->local; + priv = this->private; + local = frame->local; - CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind); + CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind); - changelog_update (this, priv, local, CHANGELOG_TYPE_METADATA); - - unwind: - changelog_dec_fop_cnt (this, priv, local); - CHANGELOG_STACK_UNWIND (fsetattr, frame, op_ret, op_errno, - preop_stbuf, postop_stbuf, xdata); - - return 0; + changelog_update(this, priv, local, CHANGELOG_TYPE_METADATA); +unwind: + changelog_dec_fop_cnt(this, priv, local); + CHANGELOG_STACK_UNWIND(fsetattr, frame, op_ret, op_errno, preop_stbuf, + postop_stbuf, xdata); + return 0; } int32_t -changelog_fsetattr (call_frame_t *frame, - xlator_t *this, fd_t *fd, - struct iatt *stbuf, int32_t valid, dict_t *xdata) +changelog_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iatt *stbuf, int32_t valid, dict_t *xdata) { - changelog_priv_t *priv = NULL; - changelog_opt_t *co = NULL; - size_t xtra_len = 0; + changelog_priv_t *priv = NULL; + changelog_opt_t *co = NULL; + size_t xtra_len = 0; - priv = this->private; - CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind); + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind); - CHANGELOG_OP_BOUNDARY_CHECK (frame, wind); + CHANGELOG_OP_BOUNDARY_CHECK(frame, wind); - CHANGELOG_INIT (this, frame->local, - fd->inode, fd->inode->gfid, 1); - if (!frame->local) - goto wind; + CHANGELOG_INIT(this, frame->local, fd->inode, fd->inode->gfid, 1); + if (!frame->local) + goto wind; - co = changelog_get_usable_buffer (frame->local); - if (!co) - goto wind; + co = changelog_get_usable_buffer(frame->local); + if (!co) + goto wind; - CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len); - - changelog_set_usable_record_and_length (frame->local, xtra_len, 1); - - wind: - changelog_color_fop_and_inc_cnt (this, priv, frame->local); - STACK_WIND (frame, changelog_fsetattr_cbk, - FIRST_CHILD (this), FIRST_CHILD (this)->fops->fsetattr, - fd, stbuf, valid, xdata); - return 0; + CHANGLOG_FILL_FOP_NUMBER(co, frame->root->op, fop_fn, xtra_len); + changelog_set_usable_record_and_length(frame->local, xtra_len, 1); +wind: + changelog_color_fop_and_inc_cnt(this, priv, frame->local); + STACK_WIND(frame, changelog_fsetattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata); + return 0; } int32_t -changelog_setattr_cbk (call_frame_t *frame, - void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, struct iatt *preop_stbuf, - struct iatt *postop_stbuf, dict_t *xdata) +changelog_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *preop_stbuf, struct iatt *postop_stbuf, + dict_t *xdata) { - changelog_priv_t *priv = NULL; - changelog_local_t *local = NULL; + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; - priv = this->private; - local = frame->local; + priv = this->private; + local = frame->local; - CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind); + CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind); - changelog_update (this, priv, local, CHANGELOG_TYPE_METADATA); + changelog_update(this, priv, local, CHANGELOG_TYPE_METADATA); - unwind: - changelog_dec_fop_cnt (this, priv, local); - CHANGELOG_STACK_UNWIND (setattr, frame, op_ret, op_errno, - preop_stbuf, postop_stbuf, xdata); +unwind: + changelog_dec_fop_cnt(this, priv, local); + CHANGELOG_STACK_UNWIND(setattr, frame, op_ret, op_errno, preop_stbuf, + postop_stbuf, xdata); - return 0; + return 0; } int32_t -changelog_setattr (call_frame_t *frame, - xlator_t *this, loc_t *loc, - struct iatt *stbuf, int32_t valid, dict_t *xdata) +changelog_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid, dict_t *xdata) { - changelog_priv_t *priv = NULL; - changelog_opt_t *co = NULL; - size_t xtra_len = 0; + changelog_priv_t *priv = NULL; + changelog_opt_t *co = NULL; + size_t xtra_len = 0; + uuid_t shard_root_gfid = { + 0, + }; - priv = this->private; - CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind); + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind); - CHANGELOG_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, wind); + CHANGELOG_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, wind); - CHANGELOG_OP_BOUNDARY_CHECK (frame, wind); + /* Do not record META on .shard */ + gf_uuid_parse(SHARD_ROOT_GFID, shard_root_gfid); + if (gf_uuid_compare(loc->gfid, shard_root_gfid) == 0) { + goto wind; + } - CHANGELOG_INIT (this, frame->local, - loc->inode, loc->inode->gfid, 1); - if (!frame->local) - goto wind; + CHANGELOG_OP_BOUNDARY_CHECK(frame, wind); - co = changelog_get_usable_buffer (frame->local); - if (!co) - goto wind; + CHANGELOG_INIT(this, frame->local, loc->inode, loc->inode->gfid, 1); + if (!frame->local) + goto wind; - CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len); + co = changelog_get_usable_buffer(frame->local); + if (!co) + goto wind; - changelog_set_usable_record_and_length (frame->local, xtra_len, 1); + CHANGLOG_FILL_FOP_NUMBER(co, frame->root->op, fop_fn, xtra_len); - wind: - changelog_color_fop_and_inc_cnt (this, priv, frame->local); - STACK_WIND (frame, changelog_setattr_cbk, - FIRST_CHILD (this), FIRST_CHILD (this)->fops->setattr, - loc, stbuf, valid, xdata); - return 0; + changelog_set_usable_record_and_length(frame->local, xtra_len, 1); + +wind: + changelog_color_fop_and_inc_cnt(this, priv, frame->local); + STACK_WIND(frame, changelog_setattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata); + return 0; } /* {f}removexattr */ int32_t -changelog_fremovexattr_cbk (call_frame_t *frame, - void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +changelog_fremovexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - changelog_priv_t *priv = NULL; - changelog_local_t *local = NULL; + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; - priv = this->private; - local = frame->local; + priv = this->private; + local = frame->local; - CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind); + CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind); - changelog_update (this, priv, local, CHANGELOG_TYPE_METADATA); + changelog_update(this, priv, local, CHANGELOG_TYPE_METADATA_XATTR); - unwind: - changelog_dec_fop_cnt (this, priv, local); - CHANGELOG_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, xdata); +unwind: + changelog_dec_fop_cnt(this, priv, local); + CHANGELOG_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, xdata); - return 0; + return 0; } int32_t -changelog_fremovexattr (call_frame_t *frame, xlator_t *this, - fd_t *fd, const char *name, dict_t *xdata) +changelog_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *name, dict_t *xdata) { - changelog_priv_t *priv = NULL; - changelog_opt_t *co = NULL; - size_t xtra_len = 0; + changelog_priv_t *priv = NULL; + changelog_opt_t *co = NULL; + size_t xtra_len = 0; - priv = this->private; - CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind); + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind); - CHANGELOG_OP_BOUNDARY_CHECK (frame, wind); + CHANGELOG_OP_BOUNDARY_CHECK(frame, wind); - CHANGELOG_INIT (this, frame->local, - fd->inode, fd->inode->gfid, 1); + CHANGELOG_INIT(this, frame->local, fd->inode, fd->inode->gfid, 1); - co = changelog_get_usable_buffer (frame->local); - if (!co) - goto wind; + co = changelog_get_usable_buffer(frame->local); + if (!co) + goto wind; - CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len); + CHANGLOG_FILL_FOP_NUMBER(co, frame->root->op, fop_fn, xtra_len); - changelog_set_usable_record_and_length (frame->local, xtra_len, 1); + changelog_set_usable_record_and_length(frame->local, xtra_len, 1); - wind: - changelog_color_fop_and_inc_cnt (this, priv, frame->local); - STACK_WIND (frame, changelog_fremovexattr_cbk, - FIRST_CHILD (this), FIRST_CHILD (this)->fops->fremovexattr, - fd, name, xdata); - return 0; +wind: + changelog_color_fop_and_inc_cnt(this, priv, frame->local); + STACK_WIND(frame, changelog_fremovexattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata); + return 0; } int32_t -changelog_removexattr_cbk (call_frame_t *frame, - void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +changelog_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - changelog_priv_t *priv = NULL; - changelog_local_t *local = NULL; + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; - priv = this->private; - local = frame->local; + priv = this->private; + local = frame->local; - CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind); + CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind); - changelog_update (this, priv, local, CHANGELOG_TYPE_METADATA); + changelog_update(this, priv, local, CHANGELOG_TYPE_METADATA_XATTR); - unwind: - changelog_dec_fop_cnt (this, priv, local); - CHANGELOG_STACK_UNWIND (removexattr, frame, op_ret, op_errno, xdata); +unwind: + changelog_dec_fop_cnt(this, priv, local); + CHANGELOG_STACK_UNWIND(removexattr, frame, op_ret, op_errno, xdata); - return 0; + return 0; } int32_t -changelog_removexattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, const char *name, dict_t *xdata) +changelog_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) { - changelog_priv_t *priv = NULL; - changelog_opt_t *co = NULL; - size_t xtra_len = 0; + changelog_priv_t *priv = NULL; + changelog_opt_t *co = NULL; + size_t xtra_len = 0; - priv = this->private; - CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind); + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind); - CHANGELOG_OP_BOUNDARY_CHECK (frame, wind); + CHANGELOG_OP_BOUNDARY_CHECK(frame, wind); - CHANGELOG_INIT (this, frame->local, - loc->inode, loc->inode->gfid, 1); + CHANGELOG_INIT(this, frame->local, loc->inode, loc->inode->gfid, 1); - co = changelog_get_usable_buffer (frame->local); - if (!co) - goto wind; + co = changelog_get_usable_buffer(frame->local); + if (!co) + goto wind; - CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len); + CHANGLOG_FILL_FOP_NUMBER(co, frame->root->op, fop_fn, xtra_len); - changelog_set_usable_record_and_length (frame->local, xtra_len, 1); + changelog_set_usable_record_and_length(frame->local, xtra_len, 1); - wind: - changelog_color_fop_and_inc_cnt (this, priv, frame->local); - STACK_WIND (frame, changelog_removexattr_cbk, - FIRST_CHILD (this), FIRST_CHILD (this)->fops->removexattr, - loc, name, xdata); - return 0; +wind: + changelog_color_fop_and_inc_cnt(this, priv, frame->local); + STACK_WIND(frame, changelog_removexattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, loc, name, xdata); + return 0; } /* {f}setxattr */ int32_t -changelog_setxattr_cbk (call_frame_t *frame, - void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +changelog_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - changelog_priv_t *priv = NULL; - changelog_local_t *local = NULL; + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; - priv = this->private; - local = frame->local; + priv = this->private; + local = frame->local; - CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind); + CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind); - changelog_update (this, priv, local, CHANGELOG_TYPE_METADATA); + changelog_update(this, priv, local, CHANGELOG_TYPE_METADATA_XATTR); - unwind: - changelog_dec_fop_cnt (this, priv, local); - CHANGELOG_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata); +unwind: + changelog_dec_fop_cnt(this, priv, local); + CHANGELOG_STACK_UNWIND(setxattr, frame, op_ret, op_errno, xdata); - return 0; + return 0; } /* changelog_handle_virtual_xattr: @@ -1422,153 +1363,254 @@ changelog_setxattr_cbk (call_frame_t *frame, * any other value: ENOTSUP is returned. */ static void -changelog_handle_virtual_xattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, dict_t *dict) +changelog_handle_virtual_xattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + dict_t *dict) { - changelog_priv_t *priv = NULL; - changelog_local_t *local = NULL; - int32_t value = 0; - int ret = 0; - int dict_ret = 0; - gf_boolean_t valid = _gf_false; + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; + int32_t value = 0; + int ret = 0; + int dict_ret = 0; + gf_boolean_t valid = _gf_false; - priv = this->private; - GF_ASSERT (priv); + priv = this->private; + GF_ASSERT(priv); - dict_ret = dict_get_int32 (dict, GF_XATTR_TRIGGER_SYNC, &value); + dict_ret = dict_get_int32(dict, GF_XATTR_TRIGGER_SYNC, &value); - if ((dict_ret == 0 && value == 1) && ((loc->inode->ia_type == IA_IFDIR) - || (loc->inode->ia_type == IA_IFREG))) - valid = _gf_true; + if ((dict_ret == 0 && value == 1) && ((loc->inode->ia_type == IA_IFDIR) || + (loc->inode->ia_type == IA_IFREG))) + valid = _gf_true; - if (valid) { - ret = changelog_fill_entry_buf (frame, this, loc, &local); - if (ret) { - gf_msg (this->name, GF_LOG_INFO, 0, - CHANGELOG_MSG_ENTRY_BUF_INFO, - "Entry cannot be" - " captured for gfid: %s. Capturing DATA" - " entry.", uuid_utoa (loc->inode->gfid)); - goto unwind; - } - changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY); - - unwind: - /* Capture DATA only if it's a file. */ - if (loc->inode->ia_type != IA_IFDIR) - changelog_update (this, priv, frame->local, - CHANGELOG_TYPE_DATA); - /* Assign local to prev_entry, so unwind will take - * care of cleanup. */ - ((changelog_local_t *)(frame->local))->prev_entry = local; - CHANGELOG_STACK_UNWIND (setxattr, frame, 0, 0, NULL); - return; - } else { - CHANGELOG_STACK_UNWIND (setxattr, frame, -1, ENOTSUP, NULL); - return; - } + if (valid) { + ret = changelog_fill_entry_buf(frame, this, loc, &local); + if (ret) { + gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_ENTRY_BUF_INFO, + "gfid=%s", uuid_utoa(loc->inode->gfid), NULL); + goto unwind; + } + changelog_update(this, priv, local, CHANGELOG_TYPE_ENTRY); + + unwind: + /* Capture DATA only if it's a file. */ + if (loc->inode->ia_type != IA_IFDIR) + changelog_update(this, priv, frame->local, CHANGELOG_TYPE_DATA); + /* Assign local to prev_entry, so unwind will take + * care of cleanup. */ + ((changelog_local_t *)(frame->local))->prev_entry = local; + CHANGELOG_STACK_UNWIND(setxattr, frame, 0, 0, NULL); + return; + } else { + CHANGELOG_STACK_UNWIND(setxattr, frame, -1, ENOTSUP, NULL); + return; + } } int32_t -changelog_setxattr (call_frame_t *frame, - xlator_t *this, loc_t *loc, - dict_t *dict, int32_t flags, dict_t *xdata) +changelog_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + dict_t *dict, int32_t flags, dict_t *xdata) { - changelog_priv_t *priv = NULL; - changelog_opt_t *co = NULL; - size_t xtra_len = 0; + changelog_priv_t *priv = NULL; + changelog_opt_t *co = NULL; + size_t xtra_len = 0; - priv = this->private; - CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind); + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind); - CHANGELOG_OP_BOUNDARY_CHECK (frame, wind); + CHANGELOG_OP_BOUNDARY_CHECK(frame, wind); - CHANGELOG_INIT (this, frame->local, - loc->inode, loc->inode->gfid, 1); + CHANGELOG_INIT(this, frame->local, loc->inode, loc->inode->gfid, 1); - /* On setting this virtual xattr on a file, an explicit data - * sync is triggered from geo-rep as CREATE|DATA entry is - * recorded in changelog based on xattr value. - */ - if (dict_get (dict, GF_XATTR_TRIGGER_SYNC)) { - changelog_handle_virtual_xattr (frame, this, loc, dict); - return 0; - } + /* On setting this virtual xattr on a file, an explicit data + * sync is triggered from geo-rep as CREATE|DATA entry is + * recorded in changelog based on xattr value. + */ + if (dict_get(dict, GF_XATTR_TRIGGER_SYNC)) { + changelog_handle_virtual_xattr(frame, this, loc, dict); + return 0; + } - co = changelog_get_usable_buffer (frame->local); - if (!co) - goto wind; + co = changelog_get_usable_buffer(frame->local); + if (!co) + goto wind; - CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len); + CHANGLOG_FILL_FOP_NUMBER(co, frame->root->op, fop_fn, xtra_len); - changelog_set_usable_record_and_length (frame->local, xtra_len, 1); + changelog_set_usable_record_and_length(frame->local, xtra_len, 1); - wind: - changelog_color_fop_and_inc_cnt (this, priv, frame->local); - STACK_WIND (frame, changelog_setxattr_cbk, - FIRST_CHILD (this), FIRST_CHILD (this)->fops->setxattr, - loc, dict, flags, xdata); - return 0; +wind: + changelog_color_fop_and_inc_cnt(this, priv, frame->local); + STACK_WIND(frame, changelog_setxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata); + return 0; } int32_t -changelog_fsetxattr_cbk (call_frame_t *frame, - void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, dict_t *xdata) +changelog_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - changelog_priv_t *priv = NULL; - changelog_local_t *local = NULL; + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; - priv = this->private; - local = frame->local; + priv = this->private; + local = frame->local; - CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind); + CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind); - changelog_update (this, priv, local, CHANGELOG_TYPE_METADATA); + changelog_update(this, priv, local, CHANGELOG_TYPE_METADATA_XATTR); - unwind: - changelog_dec_fop_cnt (this, priv, local); - CHANGELOG_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, xdata); +unwind: + changelog_dec_fop_cnt(this, priv, local); + CHANGELOG_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, xdata); - return 0; + return 0; } int32_t -changelog_fsetxattr (call_frame_t *frame, - xlator_t *this, fd_t *fd, dict_t *dict, - int32_t flags, dict_t *xdata) +changelog_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, + int32_t flags, dict_t *xdata) { - changelog_priv_t *priv = NULL; - changelog_opt_t *co = NULL; - size_t xtra_len = 0; + changelog_priv_t *priv = NULL; + changelog_opt_t *co = NULL; + size_t xtra_len = 0; - priv = this->private; - CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind); - CHANGELOG_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, wind); + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind); + CHANGELOG_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, wind); - CHANGELOG_OP_BOUNDARY_CHECK (frame, wind); + CHANGELOG_OP_BOUNDARY_CHECK(frame, wind); - CHANGELOG_INIT (this, frame->local, - fd->inode, fd->inode->gfid, 1); + CHANGELOG_INIT(this, frame->local, fd->inode, fd->inode->gfid, 1); - co = changelog_get_usable_buffer (frame->local); - if (!co) - goto wind; + co = changelog_get_usable_buffer(frame->local); + if (!co) + goto wind; - CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len); + CHANGLOG_FILL_FOP_NUMBER(co, frame->root->op, fop_fn, xtra_len); - changelog_set_usable_record_and_length (frame->local, xtra_len, 1); + changelog_set_usable_record_and_length(frame->local, xtra_len, 1); - wind: - changelog_color_fop_and_inc_cnt (this, priv, frame->local); - STACK_WIND (frame, changelog_fsetxattr_cbk, - FIRST_CHILD (this), FIRST_CHILD (this)->fops->fsetxattr, - fd, dict, flags, xdata); - return 0; +wind: + changelog_color_fop_and_inc_cnt(this, priv, frame->local); + STACK_WIND(frame, changelog_fsetxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata); + return 0; } -/* }}} */ +int32_t +changelog_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xattr, + dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; + + priv = this->private; + local = frame->local; + + CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind); + + changelog_update(this, priv, local, CHANGELOG_TYPE_METADATA); + +unwind: + changelog_dec_fop_cnt(this, priv, local); + CHANGELOG_STACK_UNWIND(xattrop, frame, op_ret, op_errno, xattr, xdata); + + return 0; +} + +int32_t +changelog_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc, + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + changelog_opt_t *co = NULL; + size_t xtra_len = 0; + int ret = 0; + void *size_attr = NULL; + + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind); + ret = dict_get_ptr(xattr, GF_XATTR_SHARD_FILE_SIZE, &size_attr); + if (ret) + goto wind; + + CHANGELOG_OP_BOUNDARY_CHECK(frame, wind); + + CHANGELOG_INIT(this, frame->local, loc->inode, loc->inode->gfid, 1); + + co = changelog_get_usable_buffer(frame->local); + if (!co) + goto wind; + + CHANGLOG_FILL_FOP_NUMBER(co, frame->root->op, fop_fn, xtra_len); + + changelog_set_usable_record_and_length(frame->local, xtra_len, 1); + +wind: + changelog_color_fop_and_inc_cnt(this, priv, frame->local); + STACK_WIND(frame, changelog_xattrop_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->xattrop, loc, optype, xattr, xdata); + return 0; +} + +int32_t +changelog_fxattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xattr, + dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; + + priv = this->private; + local = frame->local; + + CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind); + + changelog_update(this, priv, local, CHANGELOG_TYPE_METADATA_XATTR); + +unwind: + changelog_dec_fop_cnt(this, priv, local); + CHANGELOG_STACK_UNWIND(fxattrop, frame, op_ret, op_errno, xattr, xdata); + + return 0; +} + +int32_t +changelog_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd, + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + changelog_opt_t *co = NULL; + size_t xtra_len = 0; + void *size_attr = NULL; + int ret = 0; + + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind); + ret = dict_get_ptr(xattr, GF_XATTR_SHARD_FILE_SIZE, &size_attr); + if (ret) + goto wind; + CHANGELOG_OP_BOUNDARY_CHECK(frame, wind); + + CHANGELOG_INIT(this, frame->local, fd->inode, fd->inode->gfid, 1); + + co = changelog_get_usable_buffer(frame->local); + if (!co) + goto wind; + + CHANGLOG_FILL_FOP_NUMBER(co, frame->root->op, fop_fn, xtra_len); + + changelog_set_usable_record_and_length(frame->local, xtra_len, 1); + +wind: + changelog_color_fop_and_inc_cnt(this, priv, frame->local); + STACK_WIND(frame, changelog_fxattrop_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fxattrop, fd, optype, xattr, xdata); + return 0; +} +/* }}} */ /* Data modification fops - TYPE I */ @@ -1577,164 +1619,151 @@ changelog_fsetxattr (call_frame_t *frame, /* {f}truncate() */ int32_t -changelog_truncate_cbk (call_frame_t *frame, - void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) +changelog_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - changelog_priv_t *priv = NULL; - changelog_local_t *local = NULL; + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; - priv = this->private; - local = frame->local; + priv = this->private; + local = frame->local; - CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind); + CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind); - changelog_update (this, priv, local, CHANGELOG_TYPE_DATA); + changelog_update(this, priv, local, CHANGELOG_TYPE_DATA); - unwind: - changelog_dec_fop_cnt (this, priv, local); - CHANGELOG_STACK_UNWIND (truncate, frame, - op_ret, op_errno, prebuf, postbuf, xdata); - return 0; +unwind: + changelog_dec_fop_cnt(this, priv, local); + CHANGELOG_STACK_UNWIND(truncate, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + return 0; } int32_t -changelog_truncate (call_frame_t *frame, - xlator_t *this, loc_t *loc, off_t offset, dict_t *xdata) +changelog_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, + off_t offset, dict_t *xdata) { - changelog_priv_t *priv = NULL; + changelog_priv_t *priv = NULL; - priv = this->private; - CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind); + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind); - CHANGELOG_INIT (this, frame->local, - loc->inode, loc->inode->gfid, 0); - LOCK(&priv->c_snap_lock); - { - if (priv->c_snap_fd != -1 && - priv->barrier_enabled == _gf_true) { - changelog_snap_handle_ascii_change (this, - &( ((changelog_local_t *)(frame->local))->cld)); - } + CHANGELOG_INIT(this, frame->local, loc->inode, loc->inode->gfid, 0); + LOCK(&priv->c_snap_lock); + { + if (priv->c_snap_fd != -1 && priv->barrier_enabled == _gf_true) { + changelog_snap_handle_ascii_change( + this, &(((changelog_local_t *)(frame->local))->cld)); } - UNLOCK(&priv->c_snap_lock); - + } + UNLOCK(&priv->c_snap_lock); - wind: - changelog_color_fop_and_inc_cnt (this, priv, frame->local); - STACK_WIND (frame, changelog_truncate_cbk, - FIRST_CHILD (this), FIRST_CHILD (this)->fops->truncate, - loc, offset, xdata); - return 0; +wind: + changelog_color_fop_and_inc_cnt(this, priv, frame->local); + STACK_WIND(frame, changelog_truncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); + return 0; } int32_t -changelog_ftruncate_cbk (call_frame_t *frame, - void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) +changelog_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - changelog_priv_t *priv = NULL; - changelog_local_t *local = NULL; + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; - priv = this->private; - local = frame->local; + priv = this->private; + local = frame->local; - CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind); + CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind); - changelog_update (this, priv, local, CHANGELOG_TYPE_DATA); + changelog_update(this, priv, local, CHANGELOG_TYPE_DATA); - unwind: - changelog_dec_fop_cnt (this, priv, local); - CHANGELOG_STACK_UNWIND (ftruncate, frame, - op_ret, op_errno, prebuf, postbuf, xdata); - return 0; +unwind: + changelog_dec_fop_cnt(this, priv, local); + CHANGELOG_STACK_UNWIND(ftruncate, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + return 0; } int32_t -changelog_ftruncate (call_frame_t *frame, - xlator_t *this, fd_t *fd, off_t offset, dict_t *xdata) +changelog_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) { - changelog_priv_t *priv = NULL; + changelog_priv_t *priv = NULL; - priv = this->private; - CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind); + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind); - CHANGELOG_INIT (this, frame->local, - fd->inode, fd->inode->gfid, 0); - LOCK(&priv->c_snap_lock); - { - if (priv->c_snap_fd != -1 && - priv->barrier_enabled == _gf_true) { - changelog_snap_handle_ascii_change (this, - &( ((changelog_local_t *)(frame->local))->cld)); - } + CHANGELOG_INIT(this, frame->local, fd->inode, fd->inode->gfid, 0); + LOCK(&priv->c_snap_lock); + { + if (priv->c_snap_fd != -1 && priv->barrier_enabled == _gf_true) { + changelog_snap_handle_ascii_change( + this, &(((changelog_local_t *)(frame->local))->cld)); } - UNLOCK(&priv->c_snap_lock); + } + UNLOCK(&priv->c_snap_lock); - wind: - changelog_color_fop_and_inc_cnt (this, priv, frame->local); - STACK_WIND (frame, changelog_ftruncate_cbk, - FIRST_CHILD (this), FIRST_CHILD (this)->fops->ftruncate, - fd, offset, xdata); - return 0; +wind: + changelog_color_fop_and_inc_cnt(this, priv, frame->local); + STACK_WIND(frame, changelog_ftruncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); + return 0; } /* writev() */ int32_t -changelog_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, - dict_t *xdata) +changelog_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - changelog_priv_t *priv = NULL; - changelog_local_t *local = NULL; + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; - priv = this->private; - local = frame->local; + priv = this->private; + local = frame->local; - CHANGELOG_COND_GOTO (priv, ((op_ret <= 0) || !local), unwind); + CHANGELOG_COND_GOTO(priv, ((op_ret <= 0) || !local), unwind); - changelog_update (this, priv, local, CHANGELOG_TYPE_DATA); + changelog_update(this, priv, local, CHANGELOG_TYPE_DATA); - unwind: - changelog_dec_fop_cnt (this, priv, local); - CHANGELOG_STACK_UNWIND (writev, frame, - op_ret, op_errno, prebuf, postbuf, xdata); - return 0; +unwind: + changelog_dec_fop_cnt(this, priv, local); + CHANGELOG_STACK_UNWIND(writev, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + return 0; } int32_t -changelog_writev (call_frame_t *frame, - xlator_t *this, fd_t *fd, struct iovec *vector, - int32_t count, off_t offset, uint32_t flags, - struct iobref *iobref, dict_t *xdata) +changelog_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iovec *vector, int32_t count, off_t offset, + uint32_t flags, struct iobref *iobref, dict_t *xdata) { - changelog_priv_t *priv = NULL; + changelog_priv_t *priv = NULL; - priv = this->private; - CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind); + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind); - CHANGELOG_INIT (this, frame->local, - fd->inode, fd->inode->gfid, 0); - LOCK(&priv->c_snap_lock); - { - if (priv->c_snap_fd != -1 && - priv->barrier_enabled == _gf_true) { - changelog_snap_handle_ascii_change (this, - &( ((changelog_local_t *)(frame->local))->cld)); - } + CHANGELOG_INIT(this, frame->local, fd->inode, fd->inode->gfid, 0); + LOCK(&priv->c_snap_lock); + { + if (priv->c_snap_fd != -1 && priv->barrier_enabled == _gf_true) { + changelog_snap_handle_ascii_change( + this, &(((changelog_local_t *)(frame->local))->cld)); } - UNLOCK(&priv->c_snap_lock); + } + UNLOCK(&priv->c_snap_lock); - wind: - changelog_color_fop_and_inc_cnt (this, priv, frame->local); - STACK_WIND (frame, changelog_writev_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->writev, fd, vector, - count, offset, flags, iobref, xdata); - return 0; +wind: + changelog_color_fop_and_inc_cnt(this, priv, frame->local); + STACK_WIND(frame, changelog_writev_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->writev, fd, vector, count, offset, + flags, iobref, xdata); + return 0; } /* }}} */ @@ -1743,84 +1772,79 @@ changelog_writev (call_frame_t *frame, /* {{{ */ - - int -changelog_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, fd_t *fd, dict_t *xdata) -{ - int ret = 0; - void *opaque = NULL; - char *buf = NULL; - ssize_t buflen = 0; - changelog_priv_t *priv = NULL; - changelog_event_t ev = {0,}; - gf_boolean_t logopen = _gf_false; - - priv = this->private; - if (frame->local) { - frame->local = NULL; - logopen = _gf_true; - } - - CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !logopen), unwind); - - /* fill the event structure */ - ev.ev_type = CHANGELOG_OP_TYPE_OPEN; - gf_uuid_copy (ev.u.open.gfid, fd->inode->gfid); - ev.u.open.flags = fd->flags; - changelog_dispatch_event (this, priv, &ev); - - if (changelog_ev_selected - (this, &priv->ev_selection, CHANGELOG_OP_TYPE_RELEASE)) { - ret = fd_ctx_set (fd, this, (uint64_t)(long) 0x1); - if (ret) - gf_msg (this->name, GF_LOG_WARNING, 0, - CHANGELOG_MSG_SET_FD_CONTEXT, - "could not set fd context (for release cbk)"); - } +changelog_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, fd_t *fd, dict_t *xdata) +{ + int ret = 0; + changelog_priv_t *priv = NULL; + changelog_event_t ev = { + 0, + }; + gf_boolean_t logopen = _gf_false; + + priv = this->private; + if (frame->local) { + frame->local = NULL; + logopen = _gf_true; + } + + CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !logopen), unwind); + + /* fill the event structure */ + ev.ev_type = CHANGELOG_OP_TYPE_OPEN; + gf_uuid_copy(ev.u.open.gfid, fd->inode->gfid); + ev.u.open.flags = fd->flags; + changelog_dispatch_event(this, priv, &ev); + + if (changelog_ev_selected(this, &priv->ev_selection, + CHANGELOG_OP_TYPE_RELEASE)) { + ret = fd_ctx_set(fd, this, (uint64_t)(long)0x1); + if (ret) + gf_smsg(this->name, GF_LOG_WARNING, 0, CHANGELOG_MSG_SET_FD_CONTEXT, + NULL); + } - unwind: - CHANGELOG_STACK_UNWIND (open, frame, op_ret, op_errno, fd, xdata); - return 0; +unwind: + CHANGELOG_STACK_UNWIND(open, frame, op_ret, op_errno, fd, xdata); + return 0; } int -changelog_open (call_frame_t *frame, xlator_t *this, - loc_t *loc, int flags, fd_t *fd, dict_t *xdata) +changelog_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + fd_t *fd, dict_t *xdata) { - changelog_priv_t *priv = NULL; + changelog_priv_t *priv = NULL; - priv = this->private; - CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind); + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind); - frame->local = (void *)0x1; /* do not dereference in ->cbk */ + frame->local = (void *)0x1; /* do not dereference in ->cbk */ - wind: - STACK_WIND (frame, changelog_open_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->open, loc, flags, fd, xdata); - return 0; +wind: + STACK_WIND(frame, changelog_open_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); + return 0; } /* }}} */ /* {{{ */ - /* }}} */ int32_t -_changelog_generic_dispatcher (dict_t *dict, - char *key, data_t *value, void *data) +_changelog_generic_dispatcher(dict_t *dict, char *key, data_t *value, + void *data) { - xlator_t *this = NULL; - changelog_priv_t *priv = NULL; + xlator_t *this = NULL; + changelog_priv_t *priv = NULL; - this = data; - priv = this->private; + this = data; + priv = this->private; - changelog_dispatch_event (this, priv, (changelog_event_t *)value->data); - return 0; + changelog_dispatch_event(this, priv, (changelog_event_t *)value->data); + return 0; } /** @@ -1829,46 +1853,45 @@ _changelog_generic_dispatcher (dict_t *dict, * traverses the dictionary). */ int32_t -changelog_ipc (call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata) +changelog_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata) { - if (op != GF_IPC_TARGET_CHANGELOG) - goto wind; + if (op != GF_IPC_TARGET_CHANGELOG) + goto wind; - /* it's for us, do the job */ - if (xdata) - (void) dict_foreach (xdata, - _changelog_generic_dispatcher, this); + /* it's for us, do the job */ + if (xdata) + (void)dict_foreach(xdata, _changelog_generic_dispatcher, this); - STACK_UNWIND_STRICT (ipc, frame, 0, 0, NULL); - return 0; + STACK_UNWIND_STRICT(ipc, frame, 0, 0, NULL); + return 0; - wind: - STACK_WIND (frame, default_ipc_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->ipc, op, xdata); - return 0; +wind: + STACK_WIND(frame, default_ipc_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ipc, op, xdata); + return 0; } - /* {{{ */ int32_t -changelog_release (xlator_t *this, fd_t *fd) +changelog_release(xlator_t *this, fd_t *fd) { - changelog_event_t ev = {0,}; - changelog_priv_t *priv = NULL; + changelog_event_t ev = { + 0, + }; + changelog_priv_t *priv = NULL; - priv = this->private; + priv = this->private; - ev.ev_type = CHANGELOG_OP_TYPE_RELEASE; - gf_uuid_copy (ev.u.release.gfid, fd->inode->gfid); - changelog_dispatch_event (this, priv, &ev); + ev.ev_type = CHANGELOG_OP_TYPE_RELEASE; + gf_uuid_copy(ev.u.release.gfid, fd->inode->gfid); + changelog_dispatch_event(this, priv, &ev); - (void) fd_ctx_del (fd, this, NULL); + (void)fd_ctx_del(fd, this, NULL); - return 0; + return 0; } - /* }}} */ /** @@ -1883,978 +1906,1084 @@ changelog_release (xlator_t *this, fd_t *fd) * needed if there are more operation modes in the future. */ static void -changelog_assign_opmode (changelog_priv_t *priv, char *mode) +changelog_assign_opmode(changelog_priv_t *priv, char *mode) { - if ( strncmp (mode, "realtime", 8) == 0 ) { - priv->op_mode = CHANGELOG_MODE_RT; - } + if (strncmp(mode, "realtime", 8) == 0) { + priv->op_mode = CHANGELOG_MODE_RT; + } } static void -changelog_assign_encoding (changelog_priv_t *priv, char *enc) +changelog_assign_encoding(changelog_priv_t *priv, char *enc) { - if ( strncmp (enc, "binary", 6) == 0 ) { - priv->encode_mode = CHANGELOG_ENCODE_BINARY; - } else if ( strncmp (enc, "ascii", 5) == 0 ) { - priv->encode_mode = CHANGELOG_ENCODE_ASCII; - } + if (strncmp(enc, "binary", 6) == 0) { + priv->encode_mode = CHANGELOG_ENCODE_BINARY; + } else if (strncmp(enc, "ascii", 5) == 0) { + priv->encode_mode = CHANGELOG_ENCODE_ASCII; + } } static void changelog_assign_barrier_timeout(changelog_priv_t *priv, uint32_t timeout) { - LOCK (&priv->lock); - { - priv->timeout.tv_sec = timeout; - } - UNLOCK (&priv->lock); + LOCK(&priv->lock); + { + priv->timeout.tv_sec = timeout; + } + UNLOCK(&priv->lock); } /* cleanup any helper threads that are running */ static void -changelog_cleanup_helper_threads (xlator_t *this, changelog_priv_t *priv) -{ - int ret = 0; - - if (priv->cr.rollover_th) { - (void) changelog_thread_cleanup (this, priv->cr.rollover_th); - priv->cr.rollover_th = 0; - ret = sys_close (priv->cr_wfd); - if (ret) - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_CLOSE_ERROR, - "error closing write end of rollover pipe"); - } +changelog_cleanup_helper_threads(xlator_t *this, changelog_priv_t *priv) +{ + if (priv->cr.rollover_th) { + (void)changelog_thread_cleanup(this, priv->cr.rollover_th); + priv->cr.rollover_th = 0; + } - if (priv->cf.fsync_th) { - (void) changelog_thread_cleanup (this, priv->cf.fsync_th); - priv->cf.fsync_th = 0; - } + if (priv->cf.fsync_th) { + (void)changelog_thread_cleanup(this, priv->cf.fsync_th); + priv->cf.fsync_th = 0; + } } /* spawn helper thread; cleaning up in case of errors */ static int -changelog_spawn_helper_threads (xlator_t *this, changelog_priv_t *priv) -{ - int ret = 0; - int flags = 0; - int pipe_fd[2] = {0, 0}; - - /* Geo-Rep snapshot dependency: - * - * To implement explicit rollover of changlog journal on barrier - * notification, a pipe is created to communicate between - * 'changelog_rollover' thread and changelog main thread. The select - * call used to wait till roll-over time in changelog_rollover thread - * is modified to wait on read end of the pipe. When barrier - * notification comes (i.e, in 'reconfigure'), select in - * changelog_rollover thread is woken up explicitly by writing into - * the write end of the pipe in 'reconfigure'. - */ - - ret = pipe (pipe_fd); - if (ret == -1) { - gf_msg (this->name, GF_LOG_ERROR, - errno, CHANGELOG_MSG_PIPE_CREATION_ERROR, - "Cannot create pipe"); - goto out; - } +changelog_spawn_helper_threads(xlator_t *this, changelog_priv_t *priv) +{ + int ret = 0; + + /* Geo-Rep snapshot dependency: + * + * To implement explicit rollover of changlog journal on barrier + * notification, a pipe is created to communicate between + * 'changelog_rollover' thread and changelog main thread. The select + * call used to wait till roll-over time in changelog_rollover thread + * is modified to wait on read end of the pipe. When barrier + * notification comes (i.e, in 'reconfigure'), select in + * changelog_rollover thread is woken up explicitly by writing into + * the write end of the pipe in 'reconfigure'. + */ + + priv->cr.notify = _gf_false; + priv->cr.this = this; + ret = gf_thread_create(&priv->cr.rollover_th, NULL, changelog_rollover, + priv, "clogro"); + if (ret) + goto out; + + if (priv->fsync_interval) { + priv->cf.this = this; + ret = gf_thread_create(&priv->cf.fsync_th, NULL, changelog_fsync_thread, + priv, "clogfsyn"); + } + + if (ret) + changelog_cleanup_helper_threads(this, priv); - /* writer is non-blocking */ - flags = fcntl (pipe_fd[1], F_GETFL); - flags |= O_NONBLOCK; +out: + return ret; +} - ret = fcntl (pipe_fd[1], F_SETFL, flags); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_FCNTL_FAILED, - "failed to set O_NONBLOCK flag"); - goto out; +int +notify(xlator_t *this, int event, void *data, ...) +{ + changelog_priv_t *priv = NULL; + dict_t *dict = NULL; + char buf[1] = {1}; + int barrier = DICT_DEFAULT; + gf_boolean_t bclean_req = _gf_false; + int ret = 0; + int ret1 = 0; + struct list_head queue = { + 0, + }; + uint64_t xprtcnt = 0; + uint64_t clntcnt = 0; + changelog_clnt_t *conn = NULL; + gf_boolean_t cleanup_notify = _gf_false; + char sockfile[UNIX_PATH_MAX] = { + 0, + }; + rpcsvc_listener_t *listener = NULL; + rpcsvc_listener_t *next = NULL; + + INIT_LIST_HEAD(&queue); + + priv = this->private; + if (!priv) + goto out; + + if (event == GF_EVENT_PARENT_DOWN) { + priv->victim = data; + gf_log(this->name, GF_LOG_INFO, + "cleanup changelog rpc connection of brick %s", + priv->victim->name); + + if (priv->rpc_active) { + this->cleanup_starting = 1; + changelog_destroy_rpc_listner(this, priv); + conn = &priv->connections; + if (conn) + changelog_ev_cleanup_connections(this, conn); + xprtcnt = GF_ATOMIC_GET(priv->xprtcnt); + clntcnt = GF_ATOMIC_GET(priv->clntcnt); + if (!xprtcnt && !clntcnt) { + LOCK(&priv->lock); + { + cleanup_notify = priv->notify_down; + priv->notify_down = _gf_true; + } + UNLOCK(&priv->lock); + if (priv->rpc) { + list_for_each_entry_safe(listener, next, + &priv->rpc->listeners, list) + { + if (listener->trans) { + rpc_transport_unref(listener->trans); + } + } + rpcsvc_destroy(priv->rpc); + priv->rpc = NULL; + } + CHANGELOG_MAKE_SOCKET_PATH(priv->changelog_brick, sockfile, + UNIX_PATH_MAX); + sys_unlink(sockfile); + if (!cleanup_notify) + default_notify(this, GF_EVENT_PARENT_DOWN, data); + } + } else { + default_notify(this, GF_EVENT_PARENT_DOWN, data); } + goto out; + } - priv->cr_wfd = pipe_fd[1]; - priv->cr.rfd = pipe_fd[0]; + if (event == GF_EVENT_TRANSLATOR_OP) { + dict = data; - priv->cr.this = this; - ret = gf_thread_create (&priv->cr.rollover_th, - NULL, changelog_rollover, priv); - if (ret) + barrier = dict_get_str_boolean(dict, "barrier", DICT_DEFAULT); + + switch (barrier) { + case DICT_ERROR: + gf_smsg(this->name, GF_LOG_ERROR, 0, + CHANGELOG_MSG_DICT_GET_FAILED, "dict_get_str_boolean", + NULL); + ret = -1; goto out; - if (priv->fsync_interval) { - priv->cf.this = this; - ret = gf_thread_create (&priv->cf.fsync_th, - NULL, changelog_fsync_thread, priv); - } + case BARRIER_OFF: + gf_smsg(this->name, GF_LOG_INFO, 0, + CHANGELOG_MSG_BARRIER_STATE_NOTIFY, "off", NULL); - if (ret) - changelog_cleanup_helper_threads (this, priv); + CHANGELOG_NOT_ON_THEN_GOTO(priv, ret, out); + LOCK(&priv->c_snap_lock); + { + changelog_snap_logging_stop(this, priv); + } + UNLOCK(&priv->c_snap_lock); - out: - return ret; -} + LOCK(&priv->bflags.lock); + { + if (priv->bflags.barrier_ext == _gf_false) + ret = -1; + } + UNLOCK(&priv->bflags.lock); -int -notify (xlator_t *this, int event, void *data, ...) -{ - changelog_priv_t *priv = NULL; - dict_t *dict = NULL; - char buf[1] = {1}; - int barrier = DICT_DEFAULT; - gf_boolean_t bclean_req = _gf_false; - int ret = 0; - int ret1 = 0; - struct list_head queue = {0, }; + if (ret == -1) { + gf_smsg(this->name, GF_LOG_ERROR, 0, + CHANGELOG_MSG_BARRIER_ERROR, NULL); + goto out; + } + + /* Stop changelog barrier and dequeue all fops */ + LOCK(&priv->lock); + { + if (priv->barrier_enabled == _gf_true) + __chlog_barrier_disable(this, &queue); + else + ret = -1; + } + UNLOCK(&priv->lock); + /* If ret = -1, then changelog barrier is already + * disabled because of error or timeout. + */ + if (ret == 0) { + chlog_barrier_dequeue_all(this, &queue); + gf_smsg(this->name, GF_LOG_INFO, 0, + CHANGELOG_MSG_BARRIER_DISABLED, NULL); + } else { + gf_smsg(this->name, GF_LOG_ERROR, 0, + CHANGELOG_MSG_BARRIER_ALREADY_DISABLED, NULL); + } - INIT_LIST_HEAD (&queue); + LOCK(&priv->bflags.lock); + { + priv->bflags.barrier_ext = _gf_false; + } + UNLOCK(&priv->bflags.lock); - priv = this->private; - if (!priv) goto out; - if (event == GF_EVENT_TRANSLATOR_OP) { + case BARRIER_ON: + gf_smsg(this->name, GF_LOG_INFO, 0, + CHANGELOG_MSG_BARRIER_STATE_NOTIFY, "on", NULL); - dict = data; - - barrier = dict_get_str_boolean (dict, "barrier", DICT_DEFAULT); + CHANGELOG_NOT_ON_THEN_GOTO(priv, ret, out); + LOCK(&priv->c_snap_lock); + { + changelog_snap_logging_start(this, priv); + } + UNLOCK(&priv->c_snap_lock); - switch (barrier) { - case DICT_ERROR: - gf_msg (this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_DICT_GET_FAILED, - "Barrier dict_get_str_boolean failed"); + LOCK(&priv->bflags.lock); + { + if (priv->bflags.barrier_ext == _gf_true) ret = -1; - goto out; - - case BARRIER_OFF: - gf_msg (this->name, GF_LOG_INFO, 0, - CHANGELOG_MSG_BARRIER_INFO, - "Barrier off notification"); - - CHANGELOG_NOT_ON_THEN_GOTO(priv, ret, out); - LOCK(&priv->c_snap_lock); - { - changelog_snap_logging_stop (this, priv); - } - UNLOCK(&priv->c_snap_lock); - - LOCK (&priv->bflags.lock); - { - if (priv->bflags.barrier_ext == _gf_false) - ret = -1; - } - UNLOCK (&priv->bflags.lock); - - if (ret == -1 ) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_BARRIER_ERROR, - "Received another barrier off" - " notification while already off"); - goto out; - } + else + priv->bflags.barrier_ext = _gf_true; + } + UNLOCK(&priv->bflags.lock); - /* Stop changelog barrier and dequeue all fops */ - LOCK (&priv->lock); - { - if (priv->barrier_enabled == _gf_true) - __chlog_barrier_disable (this, &queue); - else - ret = -1; - } - UNLOCK (&priv->lock); - /* If ret = -1, then changelog barrier is already - * disabled because of error or timeout. - */ - if (ret == 0) { - chlog_barrier_dequeue_all(this, &queue); - gf_msg(this->name, GF_LOG_INFO, 0, - CHANGELOG_MSG_BARRIER_INFO, - "Disabled changelog barrier"); - } else { - gf_msg (this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_BARRIER_ERROR, - "Changelog barrier already disabled"); - } + if (ret == -1) { + gf_smsg(this->name, GF_LOG_ERROR, 0, + CHANGELOG_MSG_BARRIER_ON_ERROR, NULL); + goto out; + } - LOCK (&priv->bflags.lock); - { - priv->bflags.barrier_ext = _gf_false; - } - UNLOCK (&priv->bflags.lock); + ret = pthread_mutex_lock(&priv->bn.bnotify_mutex); + CHANGELOG_PTHREAD_ERROR_HANDLE_1(ret, out, bclean_req); + { + priv->bn.bnotify = _gf_true; + } + ret = pthread_mutex_unlock(&priv->bn.bnotify_mutex); + CHANGELOG_PTHREAD_ERROR_HANDLE_1(ret, out, bclean_req); - goto out; + /* Start changelog barrier */ + LOCK(&priv->lock); + { + ret = __chlog_barrier_enable(this, priv); + } + UNLOCK(&priv->lock); + if (ret == -1) { + changelog_barrier_cleanup(this, priv, &queue); + goto out; + } - case BARRIER_ON: - gf_msg (this->name, GF_LOG_INFO, 0, - CHANGELOG_MSG_BARRIER_INFO, - "Barrier on notification"); + gf_smsg(this->name, GF_LOG_INFO, 0, + CHANGELOG_MSG_BARRIER_ENABLE, NULL); - CHANGELOG_NOT_ON_THEN_GOTO(priv, ret, out); - LOCK(&priv->c_snap_lock); - { - changelog_snap_logging_start (this, priv); - } - UNLOCK(&priv->c_snap_lock); - - LOCK (&priv->bflags.lock); - { - if (priv->bflags.barrier_ext == _gf_true) - ret = -1; - else - priv->bflags.barrier_ext = _gf_true; - } - UNLOCK (&priv->bflags.lock); - - if (ret == -1 ) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_BARRIER_ERROR, - "Received another barrier on" - "notification when last one is" - "not served yet"); - goto out; - } + ret = changelog_barrier_notify(priv, buf); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, + CHANGELOG_MSG_WRITE_FAILED, "Explicit roll over", + NULL); + changelog_barrier_cleanup(this, priv, &queue); + ret = -1; + goto out; + } - ret = pthread_mutex_lock (&priv->bn.bnotify_mutex); - CHANGELOG_PTHREAD_ERROR_HANDLE_1 (ret, out, - bclean_req); - { - priv->bn.bnotify = _gf_true; - } - ret = pthread_mutex_unlock (&priv->bn.bnotify_mutex); - CHANGELOG_PTHREAD_ERROR_HANDLE_1 (ret, out, - bclean_req); - - /* Start changelog barrier */ - LOCK (&priv->lock); - { - ret = __chlog_barrier_enable (this, priv); - } - UNLOCK (&priv->lock); - if (ret == -1) { - changelog_barrier_cleanup (this, priv, &queue); - goto out; - } + ret = pthread_mutex_lock(&priv->bn.bnotify_mutex); + CHANGELOG_PTHREAD_ERROR_HANDLE_1(ret, out, bclean_req); + { + /* The while condition check is required here to + * handle spurious wakeup of cond wait that can + * happen with pthreads. See man page */ + while (priv->bn.bnotify == _gf_true) { + ret = pthread_cond_wait(&priv->bn.bnotify_cond, + &priv->bn.bnotify_mutex); + CHANGELOG_PTHREAD_ERROR_HANDLE_1(ret, out, bclean_req); + } + if (priv->bn.bnotify_error == _gf_true) { + ret = -1; + priv->bn.bnotify_error = _gf_false; + } + } + ret1 = pthread_mutex_unlock(&priv->bn.bnotify_mutex); + CHANGELOG_PTHREAD_ERROR_HANDLE_1(ret1, out, bclean_req); + gf_smsg(this->name, GF_LOG_INFO, 0, + CHANGELOG_MSG_BNOTIFY_COND_INFO, NULL); - gf_msg(this->name, GF_LOG_INFO, 0, - CHANGELOG_MSG_BARRIER_INFO, - "Enabled changelog barrier"); - - ret = changelog_barrier_notify(priv, buf); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_WRITE_FAILED, - "Explicit roll over: write failed"); - changelog_barrier_cleanup (this, priv, &queue); - ret = -1; - goto out; - } + goto out; - ret = pthread_mutex_lock (&priv->bn.bnotify_mutex); - CHANGELOG_PTHREAD_ERROR_HANDLE_1 (ret, out, - bclean_req); - { - /* The while condition check is required here to - * handle spurious wakeup of cond wait that can - * happen with pthreads. See man page */ - while (priv->bn.bnotify == _gf_true) { - ret = pthread_cond_wait ( - &priv->bn.bnotify_cond, - &priv->bn.bnotify_mutex); - CHANGELOG_PTHREAD_ERROR_HANDLE_1 (ret, - out, - bclean_req); - } - if (priv->bn.bnotify_error == _gf_true) { - ret = -1; - priv->bn.bnotify_error = _gf_false; - } - } - ret1 = pthread_mutex_unlock (&priv->bn.bnotify_mutex); - CHANGELOG_PTHREAD_ERROR_HANDLE_1 (ret1, out, - bclean_req); - gf_msg (this->name, GF_LOG_INFO, 0, - CHANGELOG_MSG_BNOTIFY_INFO, - "Woke up: bnotify conditional wait"); - - goto out; - - case DICT_DEFAULT: - gf_msg (this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_DICT_GET_FAILED, - "barrier key not found"); - ret = -1; - goto out; + case DICT_DEFAULT: + gf_smsg(this->name, GF_LOG_ERROR, 0, + CHANGELOG_MSG_BARRIER_KEY_NOT_FOUND, NULL); + ret = -1; + goto out; - default: - gf_msg (this->name, GF_LOG_ERROR, EINVAL, - CHANGELOG_MSG_DICT_GET_FAILED, - "Something went bad in dict_get_str_boolean"); - ret = -1; - goto out; - } - } else { - ret = default_notify (this, event, data); + default: + gf_smsg(this->name, GF_LOG_ERROR, EINVAL, + CHANGELOG_MSG_ERROR_IN_DICT_GET, NULL); + ret = -1; + goto out; } + } else { + ret = default_notify(this, event, data); + } - out: - if (bclean_req) - changelog_barrier_cleanup (this, priv, &queue); +out: + if (bclean_req) + changelog_barrier_cleanup(this, priv, &queue); - return ret; + return ret; } int32_t -mem_acct_init (xlator_t *this) +mem_acct_init(xlator_t *this) { - int ret = -1; - - if (!this) - return ret; + int ret = -1; - ret = xlator_mem_acct_init (this, gf_changelog_mt_end + 1); + if (!this) + return ret; - if (ret != 0) { - gf_msg (this->name, GF_LOG_WARNING, ENOMEM, - CHANGELOG_MSG_NO_MEMORY, "Memory accounting" - " init failed"); - return ret; - } + ret = xlator_mem_acct_init(this, gf_changelog_mt_end + 1); + if (ret != 0) { + gf_smsg(this->name, GF_LOG_WARNING, ENOMEM, + CHANGELOG_MSG_MEMORY_INIT_FAILED, NULL); return ret; + } + + return ret; } static int -changelog_init (xlator_t *this, changelog_priv_t *priv) +changelog_init(xlator_t *this, changelog_priv_t *priv) { - int i = 0; - int ret = -1; - struct timeval tv = {0,}; - changelog_log_data_t cld = {0,}; - - ret = gettimeofday (&tv, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_GET_TIME_OP_FAILED, - "gettimeofday() failure"); - goto out; - } + int i = 0; + int ret = 0; + changelog_log_data_t cld = { + 0, + }; - priv->slice.tv_start = tv; + priv->maps[CHANGELOG_TYPE_DATA] = "D "; + priv->maps[CHANGELOG_TYPE_METADATA] = "M "; + priv->maps[CHANGELOG_TYPE_METADATA_XATTR] = "M "; + priv->maps[CHANGELOG_TYPE_ENTRY] = "E "; - priv->maps[CHANGELOG_TYPE_DATA] = "D "; - priv->maps[CHANGELOG_TYPE_METADATA] = "M "; - priv->maps[CHANGELOG_TYPE_ENTRY] = "E "; + for (; i < CHANGELOG_MAX_TYPE; i++) { + /* start with version 1 */ + priv->slice.changelog_version[i] = 1; + } - for (; i < CHANGELOG_MAX_TYPE; i++) { - /* start with version 1 */ - priv->slice.changelog_version[i] = 1; - } - - if (!priv->active) - return ret; + if (!priv->active) + return ret; - /** - * start with a fresh changelog file every time. this is done - * in case there was an encoding change. so... things are kept - * simple here. - */ - ret = changelog_fill_rollover_data (&cld, _gf_false); - if(ret) - goto out; + /** + * start with a fresh changelog file every time. this is done + * in case there was an encoding change. so... things are kept + * simple here. + */ + changelog_fill_rollover_data(&cld, _gf_false); - ret = htime_open (this, priv, cld.cld_roll_time); - /* call htime open with cld's rollover_time */ - if (ret) - goto out; + ret = htime_open(this, priv, cld.cld_roll_time); + /* call htime open with cld's rollover_time */ + if (ret) + goto out; - LOCK (&priv->lock); - { - ret = changelog_inject_single_event (this, priv, &cld); - } - UNLOCK (&priv->lock); + LOCK(&priv->lock); + { + ret = changelog_inject_single_event(this, priv, &cld); + } + UNLOCK(&priv->lock); - /* ... and finally spawn the helpers threads */ - ret = changelog_spawn_helper_threads (this, priv); + /* ... and finally spawn the helpers threads */ + ret = changelog_spawn_helper_threads(this, priv); - out: - return ret; +out: + return ret; } /** * Init barrier related condition variables and locks */ static int -changelog_barrier_pthread_init (xlator_t *this, changelog_priv_t *priv) -{ - gf_boolean_t bn_mutex_init = _gf_false; - gf_boolean_t bn_cond_init = _gf_false; - gf_boolean_t dm_mutex_black_init = _gf_false; - gf_boolean_t dm_cond_black_init = _gf_false; - gf_boolean_t dm_mutex_white_init = _gf_false; - gf_boolean_t dm_cond_white_init = _gf_false; - int ret = 0; - - if ((ret = pthread_mutex_init(&priv->bn.bnotify_mutex, NULL)) != 0) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED, - "bnotify pthread_mutex_init failed (%d)", ret); - ret = -1; - goto out; - } - bn_mutex_init = _gf_true; - - if ((ret = pthread_cond_init(&priv->bn.bnotify_cond, NULL)) != 0) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_PTHREAD_COND_INIT_FAILED, - "bnotify pthread_cond_init failed (%d)", ret); - ret = -1; - goto out; - } - bn_cond_init = _gf_true; - - if ((ret = pthread_mutex_init(&priv->dm.drain_black_mutex, NULL)) != 0) - { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED, - "drain_black pthread_mutex_init failed (%d)", ret); - ret = -1; - goto out; - } - dm_mutex_black_init = _gf_true; - - if ((ret = pthread_cond_init(&priv->dm.drain_black_cond, NULL)) != 0) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_PTHREAD_COND_INIT_FAILED, - "drain_black pthread_cond_init failed (%d)", ret); - ret = -1; - goto out; - } - dm_cond_black_init = _gf_true; - - if ((ret = pthread_mutex_init(&priv->dm.drain_white_mutex, NULL)) != 0) - { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED, - "drain_white pthread_mutex_init failed (%d)", ret); - ret = -1; - goto out; - } - dm_mutex_white_init = _gf_true; - - if ((ret = pthread_cond_init(&priv->dm.drain_white_cond, NULL)) != 0) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CHANGELOG_MSG_PTHREAD_COND_INIT_FAILED, - "drain_white pthread_cond_init failed (%d)", ret); - ret = -1; - goto out; - } - dm_cond_white_init = _gf_true; - out: - if (ret) { - if (bn_mutex_init) - pthread_mutex_destroy(&priv->bn.bnotify_mutex); - if (bn_cond_init) - pthread_cond_destroy (&priv->bn.bnotify_cond); - if (dm_mutex_black_init) - pthread_mutex_destroy(&priv->dm.drain_black_mutex); - if (dm_cond_black_init) - pthread_cond_destroy (&priv->dm.drain_black_cond); - if (dm_mutex_white_init) - pthread_mutex_destroy(&priv->dm.drain_white_mutex); - if (dm_cond_white_init) - pthread_cond_destroy (&priv->dm.drain_white_cond); - } - return ret; +changelog_barrier_pthread_init(xlator_t *this, changelog_priv_t *priv) +{ + gf_boolean_t bn_mutex_init = _gf_false; + gf_boolean_t bn_cond_init = _gf_false; + gf_boolean_t dm_mutex_black_init = _gf_false; + gf_boolean_t dm_cond_black_init = _gf_false; + gf_boolean_t dm_mutex_white_init = _gf_false; + gf_boolean_t dm_cond_white_init = _gf_false; + gf_boolean_t cr_mutex_init = _gf_false; + gf_boolean_t cr_cond_init = _gf_false; + int ret = 0; + + if ((ret = pthread_mutex_init(&priv->bn.bnotify_mutex, NULL)) != 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED, "name=bnotify", + "ret=%d", ret, NULL); + ret = -1; + goto out; + } + bn_mutex_init = _gf_true; + + if ((ret = pthread_cond_init(&priv->bn.bnotify_cond, NULL)) != 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_PTHREAD_COND_INIT_FAILED, "name=bnotify", + "ret=%d", ret, NULL); + ret = -1; + goto out; + } + bn_cond_init = _gf_true; + + if ((ret = pthread_mutex_init(&priv->dm.drain_black_mutex, NULL)) != 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED, "name=drain_black", + "ret=%d", ret, NULL); + ret = -1; + goto out; + } + dm_mutex_black_init = _gf_true; + + if ((ret = pthread_cond_init(&priv->dm.drain_black_cond, NULL)) != 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_PTHREAD_COND_INIT_FAILED, "name=drain_black", + "ret=%d", ret, NULL); + ret = -1; + goto out; + } + dm_cond_black_init = _gf_true; + + if ((ret = pthread_mutex_init(&priv->dm.drain_white_mutex, NULL)) != 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED, "name=drain_white", + "ret=%d", ret, NULL); + ret = -1; + goto out; + } + dm_mutex_white_init = _gf_true; + + if ((ret = pthread_cond_init(&priv->dm.drain_white_cond, NULL)) != 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_PTHREAD_COND_INIT_FAILED, "name=drain_white", + "ret=%d", ret, NULL); + ret = -1; + goto out; + } + dm_cond_white_init = _gf_true; + + if ((pthread_mutex_init(&priv->cr.lock, NULL)) != 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED, + "name=changelog_rollover", "ret=%d", ret, NULL); + ret = -1; + goto out; + } + cr_mutex_init = _gf_true; + + if ((pthread_cond_init(&priv->cr.cond, NULL)) != 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_PTHREAD_COND_INIT_FAILED, + "changelog_rollover cond init failed", "ret=%d", ret, NULL); + ret = -1; + goto out; + } + cr_cond_init = _gf_true; +out: + if (ret) { + if (bn_mutex_init) + pthread_mutex_destroy(&priv->bn.bnotify_mutex); + if (bn_cond_init) + pthread_cond_destroy(&priv->bn.bnotify_cond); + if (dm_mutex_black_init) + pthread_mutex_destroy(&priv->dm.drain_black_mutex); + if (dm_cond_black_init) + pthread_cond_destroy(&priv->dm.drain_black_cond); + if (dm_mutex_white_init) + pthread_mutex_destroy(&priv->dm.drain_white_mutex); + if (dm_cond_white_init) + pthread_cond_destroy(&priv->dm.drain_white_cond); + if (cr_mutex_init) + pthread_mutex_destroy(&priv->cr.lock); + if (cr_cond_init) + pthread_cond_destroy(&priv->cr.cond); + } + return ret; } /* Destroy barrier related condition variables and locks */ static void -changelog_barrier_pthread_destroy (changelog_priv_t *priv) +changelog_barrier_pthread_destroy(changelog_priv_t *priv) { - pthread_mutex_destroy (&priv->bn.bnotify_mutex); - pthread_cond_destroy (&priv->bn.bnotify_cond); - pthread_mutex_destroy (&priv->dm.drain_black_mutex); - pthread_cond_destroy (&priv->dm.drain_black_cond); - pthread_mutex_destroy (&priv->dm.drain_white_mutex); - pthread_cond_destroy (&priv->dm.drain_white_cond); - LOCK_DESTROY (&priv->bflags.lock); + pthread_mutex_destroy(&priv->bn.bnotify_mutex); + pthread_cond_destroy(&priv->bn.bnotify_cond); + pthread_mutex_destroy(&priv->dm.drain_black_mutex); + pthread_cond_destroy(&priv->dm.drain_black_cond); + pthread_mutex_destroy(&priv->dm.drain_white_mutex); + pthread_cond_destroy(&priv->dm.drain_white_cond); + pthread_mutex_destroy(&priv->cr.lock); + pthread_cond_destroy(&priv->cr.cond); + LOCK_DESTROY(&priv->bflags.lock); } -int -reconfigure (xlator_t *this, dict_t *options) -{ - int ret = 0; - char *tmp = NULL; - changelog_priv_t *priv = NULL; - gf_boolean_t active_earlier = _gf_true; - gf_boolean_t active_now = _gf_true; - changelog_time_slice_t *slice = NULL; - changelog_log_data_t cld = {0,}; - char htime_dir[PATH_MAX] = {0,}; - char csnap_dir[PATH_MAX] = {0,}; - struct timeval tv = {0,}; - uint32_t timeout = 0; - - priv = this->private; - if (!priv) - goto out; - - ret = -1; - active_earlier = priv->active; - - /* first stop the rollover and the fsync thread */ - changelog_cleanup_helper_threads (this, priv); - - GF_OPTION_RECONF ("changelog-dir", tmp, options, str, out); - if (!tmp) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_DIR_OPTIONS_NOT_SET, - "\"changelog-dir\" option is not set"); - goto out; - } - - GF_FREE (priv->changelog_dir); - priv->changelog_dir = gf_strdup (tmp); - if (!priv->changelog_dir) - goto out; +static void +changelog_cleanup_rpc(xlator_t *this, changelog_priv_t *priv) +{ + /* terminate rpc server */ + if (!this->cleanup_starting) + changelog_destroy_rpc_listner(this, priv); - ret = mkdir_p (priv->changelog_dir, 0600, _gf_true); + (void)changelog_cleanup_rpc_threads(this, priv); + /* cleanup rot buffs */ + rbuf_dtor(priv->rbuf); - if (ret) - goto out; - CHANGELOG_FILL_HTIME_DIR(priv->changelog_dir, htime_dir); - ret = mkdir_p (htime_dir, 0600, _gf_true); + /* cleanup poller thread */ + if (priv->poller) + (void)changelog_thread_cleanup(this, priv->poller); +} +int +reconfigure(xlator_t *this, dict_t *options) +{ + int ret = 0; + char *tmp = NULL; + changelog_priv_t *priv = NULL; + gf_boolean_t active_earlier = _gf_true; + gf_boolean_t active_now = _gf_true; + gf_boolean_t rpc_active_earlier = _gf_true; + gf_boolean_t rpc_active_now = _gf_true; + gf_boolean_t iniate_rpc = _gf_false; + changelog_time_slice_t *slice = NULL; + changelog_log_data_t cld = { + 0, + }; + char htime_dir[PATH_MAX] = { + 0, + }; + char csnap_dir[PATH_MAX] = { + 0, + }; + uint32_t timeout = 0; + + priv = this->private; + if (!priv) + goto out; + + ret = -1; + active_earlier = priv->active; + rpc_active_earlier = priv->rpc_active; + + /* first stop the rollover and the fsync thread */ + changelog_cleanup_helper_threads(this, priv); + + GF_OPTION_RECONF("changelog-dir", tmp, options, str, out); + if (!tmp) { + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_DIR_OPTIONS_NOT_SET, + NULL); + goto out; + } + + GF_FREE(priv->changelog_dir); + priv->changelog_dir = gf_strdup(tmp); + if (!priv->changelog_dir) + goto out; + + ret = mkdir_p(priv->changelog_dir, 0600, _gf_true); + + if (ret) + goto out; + CHANGELOG_FILL_HTIME_DIR(priv->changelog_dir, htime_dir); + ret = mkdir_p(htime_dir, 0600, _gf_true); + + if (ret) + goto out; + + CHANGELOG_FILL_CSNAP_DIR(priv->changelog_dir, csnap_dir); + ret = mkdir_p(csnap_dir, 0600, _gf_true); + + if (ret) + goto out; + + GF_OPTION_RECONF("changelog", active_now, options, bool, out); + GF_OPTION_RECONF("changelog-notification", rpc_active_now, options, bool, + out); + + /* If journalling is enabled, enable rpc notifications */ + if (active_now && !active_earlier) { + if (!rpc_active_earlier) + iniate_rpc = _gf_true; + } + + if (rpc_active_now && !rpc_active_earlier) { + iniate_rpc = _gf_true; + } + + /* TODO: Disable of changelog-notifications is not supported for now + * as there is no clean way of cleaning up of rpc resources + */ + + if (iniate_rpc) { + ret = changelog_init_rpc(this, priv); if (ret) - goto out; - - CHANGELOG_FILL_CSNAP_DIR(priv->changelog_dir, csnap_dir); - ret = mkdir_p (csnap_dir, 0600, _gf_true); + goto out; + priv->rpc_active = _gf_true; + } - if (ret) - goto out; + /** + * changelog_handle_change() handles changes that could possibly + * have been submit changes before changelog deactivation. + */ + if (!active_now) + priv->active = _gf_false; - GF_OPTION_RECONF ("changelog", active_now, options, bool, out); + GF_OPTION_RECONF("op-mode", tmp, options, str, out); + changelog_assign_opmode(priv, tmp); - /** - * changelog_handle_change() handles changes that could possibly - * have been submit changes before changelog deactivation. - */ - if (!active_now) - priv->active = _gf_false; + tmp = NULL; - GF_OPTION_RECONF ("op-mode", tmp, options, str, out); - changelog_assign_opmode (priv, tmp); + GF_OPTION_RECONF("encoding", tmp, options, str, out); + changelog_assign_encoding(priv, tmp); - tmp = NULL; + GF_OPTION_RECONF("rollover-time", priv->rollover_time, options, int32, out); + GF_OPTION_RECONF("fsync-interval", priv->fsync_interval, options, int32, + out); + GF_OPTION_RECONF("changelog-barrier-timeout", timeout, options, time, out); + changelog_assign_barrier_timeout(priv, timeout); - GF_OPTION_RECONF ("encoding", tmp, options, str, out); - changelog_assign_encoding (priv, tmp); + GF_OPTION_RECONF("capture-del-path", priv->capture_del_path, options, bool, + out); - GF_OPTION_RECONF ("rollover-time", - priv->rollover_time, options, int32, out); - GF_OPTION_RECONF ("fsync-interval", - priv->fsync_interval, options, int32, out); - GF_OPTION_RECONF ("changelog-barrier-timeout", - timeout, options, time, out); - changelog_assign_barrier_timeout (priv, timeout); + if (active_now || active_earlier) { + changelog_fill_rollover_data(&cld, !active_now); - GF_OPTION_RECONF ("capture-del-path", priv->capture_del_path, options, - bool, out); + slice = &priv->slice; - if (active_now || active_earlier) { - ret = changelog_fill_rollover_data (&cld, !active_now); - if (ret) - goto out; + LOCK(&priv->lock); + { + ret = changelog_inject_single_event(this, priv, &cld); + if (!ret && active_now) + SLICE_VERSION_UPDATE(slice); + } + UNLOCK(&priv->lock); - slice = &priv->slice; + if (ret) + goto out; - LOCK (&priv->lock); - { - ret = changelog_inject_single_event (this, priv, &cld); - if (!ret && active_now) - SLICE_VERSION_UPDATE (slice); - } - UNLOCK (&priv->lock); - - if (ret) - goto out; - - if (active_now) { - if (!active_earlier) { - gf_msg (this->name, GF_LOG_INFO, 0, - CHANGELOG_MSG_HTIME_INFO, - "Reconfigure: Changelog Enable"); - if (gettimeofday(&tv, NULL) ) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_HTIME_ERROR, - "unable to fetch htime"); - ret = -1; - goto out; - } - htime_create (this, priv, tv.tv_sec); - } - ret = changelog_spawn_helper_threads (this, priv); - } + if (active_now) { + if (!active_earlier) { + gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_RECONFIGURE, + NULL); + htime_create(this, priv, gf_time()); + } + ret = changelog_spawn_helper_threads(this, priv); } + } - out: - if (ret) { - /* TODO */ - } else { - gf_msg_debug (this->name, 0, - "changelog reconfigured"); - if (active_now && priv) - priv->active = _gf_true; - } +out: + if (ret) { + /* TODO */ + } else { + gf_msg_debug(this->name, 0, "changelog reconfigured"); + if (active_now && priv) + priv->active = _gf_true; + } - return ret; + return ret; } static void -changelog_freeup_options (xlator_t *this, changelog_priv_t *priv) +changelog_freeup_options(xlator_t *this, changelog_priv_t *priv) { - int ret = 0; + int ret = 0; - ret = priv->cb->dtor (this, &priv->cd); - if (ret) - gf_msg (this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_FREEUP_FAILED, - "could not cleanup bootstrapper"); - GF_FREE (priv->changelog_brick); - GF_FREE (priv->changelog_dir); + ret = priv->cb->dtor(this, &priv->cd); + if (ret) + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_FREEUP_FAILED, NULL); + GF_FREE(priv->changelog_brick); + GF_FREE(priv->changelog_dir); } static int -changelog_init_options (xlator_t *this, changelog_priv_t *priv) +changelog_init_options(xlator_t *this, changelog_priv_t *priv) { - int ret = 0; - char *tmp = NULL; - uint32_t timeout = 0; - char htime_dir[PATH_MAX] = {0,}; - char csnap_dir[PATH_MAX] = {0,}; + int ret = 0; + char *tmp = NULL; + uint32_t timeout = 0; + char htime_dir[PATH_MAX] = { + 0, + }; + char csnap_dir[PATH_MAX] = { + 0, + }; - GF_OPTION_INIT ("changelog-brick", tmp, str, error_return); - priv->changelog_brick = gf_strdup (tmp); - if (!priv->changelog_brick) - goto error_return; + GF_OPTION_INIT("changelog-brick", tmp, str, error_return); + priv->changelog_brick = gf_strdup(tmp); + if (!priv->changelog_brick) + goto error_return; - tmp = NULL; + tmp = NULL; - GF_OPTION_INIT ("changelog-dir", tmp, str, dealloc_1); - priv->changelog_dir = gf_strdup (tmp); - if (!priv->changelog_dir) - goto dealloc_1; + GF_OPTION_INIT("changelog-dir", tmp, str, dealloc_1); + priv->changelog_dir = gf_strdup(tmp); + if (!priv->changelog_dir) + goto dealloc_1; - tmp = NULL; + tmp = NULL; - /** - * create the directory even if change-logging would be inactive - * so that consumers can _look_ into it (finding nothing...) - */ - ret = mkdir_p (priv->changelog_dir, 0600, _gf_true); + /** + * create the directory even if change-logging would be inactive + * so that consumers can _look_ into it (finding nothing...) + */ + ret = mkdir_p(priv->changelog_dir, 0600, _gf_true); - if (ret) - goto dealloc_2; + if (ret) + goto dealloc_2; - CHANGELOG_FILL_HTIME_DIR (priv->changelog_dir, htime_dir); - ret = mkdir_p (htime_dir, 0600, _gf_true); - if (ret) - goto dealloc_2; + CHANGELOG_FILL_HTIME_DIR(priv->changelog_dir, htime_dir); + ret = mkdir_p(htime_dir, 0600, _gf_true); + if (ret) + goto dealloc_2; - CHANGELOG_FILL_CSNAP_DIR (priv->changelog_dir, csnap_dir); - ret = mkdir_p (csnap_dir, 0600, _gf_true); - if (ret) - goto dealloc_2; + CHANGELOG_FILL_CSNAP_DIR(priv->changelog_dir, csnap_dir); + ret = mkdir_p(csnap_dir, 0600, _gf_true); + if (ret) + goto dealloc_2; - GF_OPTION_INIT ("changelog", priv->active, bool, dealloc_2); - GF_OPTION_INIT ("capture-del-path", priv->capture_del_path, - bool, dealloc_2); + GF_OPTION_INIT("changelog", priv->active, bool, dealloc_2); + GF_OPTION_INIT("changelog-notification", priv->rpc_active, bool, dealloc_2); + GF_OPTION_INIT("capture-del-path", priv->capture_del_path, bool, dealloc_2); - GF_OPTION_INIT ("op-mode", tmp, str, dealloc_2); - changelog_assign_opmode (priv, tmp); + GF_OPTION_INIT("op-mode", tmp, str, dealloc_2); + changelog_assign_opmode(priv, tmp); - tmp = NULL; + tmp = NULL; - GF_OPTION_INIT ("encoding", tmp, str, dealloc_2); - changelog_assign_encoding (priv, tmp); - changelog_encode_change (priv); + GF_OPTION_INIT("encoding", tmp, str, dealloc_2); + changelog_assign_encoding(priv, tmp); + changelog_encode_change(priv); - GF_OPTION_INIT ("rollover-time", - priv->rollover_time, int32, dealloc_2); + GF_OPTION_INIT("rollover-time", priv->rollover_time, int32, dealloc_2); - GF_OPTION_INIT ("fsync-interval", - priv->fsync_interval, int32, dealloc_2); + GF_OPTION_INIT("fsync-interval", priv->fsync_interval, int32, dealloc_2); - GF_OPTION_INIT ("changelog-barrier-timeout", - timeout, time, dealloc_2); - changelog_assign_barrier_timeout (priv, timeout); + GF_OPTION_INIT("changelog-barrier-timeout", timeout, time, dealloc_2); + changelog_assign_barrier_timeout(priv, timeout); - GF_ASSERT (cb_bootstrap[priv->op_mode].mode == priv->op_mode); - priv->cb = &cb_bootstrap[priv->op_mode]; + GF_ASSERT(cb_bootstrap[priv->op_mode].mode == priv->op_mode); + priv->cb = &cb_bootstrap[priv->op_mode]; - /* ... now bootstrap the logger */ - ret = priv->cb->ctor (this, &priv->cd); - if (ret) - goto dealloc_2; + /* ... now bootstrap the logger */ + ret = priv->cb->ctor(this, &priv->cd); + if (ret) + goto dealloc_2; - priv->changelog_fd = -1; + priv->changelog_fd = -1; - return 0; + return 0; - dealloc_2: - GF_FREE (priv->changelog_dir); - dealloc_1: - GF_FREE (priv->changelog_brick); - error_return: - return -1; -} - -static void -changelog_cleanup_rpc (xlator_t *this, changelog_priv_t *priv) -{ - /* terminate rpc server */ - changelog_destroy_rpc_listner (this, priv); - - /* cleanup rot buffs */ - rbuf_dtor (priv->rbuf); - - /* cleanup poller thread */ - if (priv->poller) - (void) changelog_thread_cleanup (this, priv->poller); +dealloc_2: + GF_FREE(priv->changelog_dir); +dealloc_1: + GF_FREE(priv->changelog_brick); +error_return: + return -1; } static int -changelog_init_rpc (xlator_t *this, changelog_priv_t *priv) +changelog_init_rpc(xlator_t *this, changelog_priv_t *priv) { - int ret = 0; - rpcsvc_t *rpc = NULL; - changelog_ev_selector_t *selection = NULL; + rpcsvc_t *rpc = NULL; + changelog_ev_selector_t *selection = NULL; - selection = &priv->ev_selection; + selection = &priv->ev_selection; - /* initialize event selection */ - changelog_init_event_selection (this, selection); + /* initialize event selection */ + changelog_init_event_selection(this, selection); - priv->rbuf = rbuf_init (NR_ROTT_BUFFS); - if (!priv->rbuf) - goto cleanup_thread; + priv->rbuf = rbuf_init(NR_ROTT_BUFFS); + if (!priv->rbuf) + goto cleanup_thread; - rpc = changelog_init_rpc_listner (this, priv, - priv->rbuf, NR_DISPATCHERS); - if (!rpc) - goto cleanup_rbuf; - priv->rpc = rpc; + rpc = changelog_init_rpc_listener(this, priv, priv->rbuf, NR_DISPATCHERS); + if (!rpc) + goto cleanup_rbuf; + priv->rpc = rpc; - return 0; + return 0; - cleanup_rbuf: - rbuf_dtor (priv->rbuf); - cleanup_thread: - if (priv->poller) - (void) changelog_thread_cleanup (this, priv->poller); +cleanup_rbuf: + rbuf_dtor(priv->rbuf); +cleanup_thread: + if (priv->poller) + (void)changelog_thread_cleanup(this, priv->poller); - return -1; + return -1; } int32_t -init (xlator_t *this) -{ - int ret = -1; - char *tmp = NULL; - changelog_priv_t *priv = NULL; - - GF_VALIDATE_OR_GOTO ("changelog", this, error_return); - - if (!this->children || this->children->next) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_CHILD_MISCONFIGURED, - "translator needs a single subvolume"); - goto error_return; - } - - if (!this->parents) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CHANGELOG_MSG_VOL_MISCONFIGURED, - "dangling volume. please check volfile"); - goto error_return; - } - - priv = GF_CALLOC (1, sizeof (*priv), gf_changelog_mt_priv_t); - if (!priv) - goto error_return; - - this->local_pool = mem_pool_new (changelog_local_t, 64); - if (!this->local_pool) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - CHANGELOG_MSG_NO_MEMORY, - "failed to create local memory pool"); - goto cleanup_priv; - } - - LOCK_INIT (&priv->lock); - LOCK_INIT (&priv->c_snap_lock); - - ret = changelog_init_options (this, priv); - if (ret) - goto cleanup_mempool; - - /* snap dependency changes */ - priv->dm.black_fop_cnt = 0; - priv->dm.white_fop_cnt = 0; - priv->dm.drain_wait_black = _gf_false; - priv->dm.drain_wait_white = _gf_false; - priv->current_color = FOP_COLOR_BLACK; - priv->explicit_rollover = _gf_false; - - /* Mutex is not needed as threads are not spawned yet */ - priv->bn.bnotify = _gf_false; - priv->bn.bnotify_error = _gf_false; - ret = changelog_barrier_pthread_init (this, priv); - if (ret) - goto cleanup_options; - LOCK_INIT (&priv->bflags.lock); - priv->bflags.barrier_ext = _gf_false; - - /* Changelog barrier init */ - INIT_LIST_HEAD (&priv->queue); - priv->barrier_enabled = _gf_false; - +init(xlator_t *this) +{ + int ret = -1; + changelog_priv_t *priv = NULL; + + GF_VALIDATE_OR_GOTO("changelog", this, error_return); + + if (!this->children || this->children->next) { + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_CHILD_MISCONFIGURED, + NULL); + goto error_return; + } + + if (!this->parents) { + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_VOL_MISCONFIGURED, + NULL); + goto error_return; + } + + priv = GF_CALLOC(1, sizeof(*priv), gf_changelog_mt_priv_t); + if (!priv) + goto error_return; + + this->local_pool = mem_pool_new(changelog_local_t, 64); + if (!this->local_pool) { + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, CHANGELOG_MSG_NO_MEMORY, + NULL); + goto cleanup_priv; + } + + LOCK_INIT(&priv->lock); + LOCK_INIT(&priv->c_snap_lock); + GF_ATOMIC_INIT(priv->listnercnt, 0); + GF_ATOMIC_INIT(priv->clntcnt, 0); + GF_ATOMIC_INIT(priv->xprtcnt, 0); + INIT_LIST_HEAD(&priv->xprt_list); + priv->htime_fd = -1; + + ret = changelog_init_options(this, priv); + if (ret) + goto cleanup_mempool; + + /* snap dependency changes */ + priv->dm.black_fop_cnt = 0; + priv->dm.white_fop_cnt = 0; + priv->dm.drain_wait_black = _gf_false; + priv->dm.drain_wait_white = _gf_false; + priv->current_color = FOP_COLOR_BLACK; + priv->explicit_rollover = _gf_false; + + priv->cr.notify = _gf_false; + /* Mutex is not needed as threads are not spawned yet */ + priv->bn.bnotify = _gf_false; + priv->bn.bnotify_error = _gf_false; + ret = changelog_barrier_pthread_init(this, priv); + if (ret) + goto cleanup_options; + LOCK_INIT(&priv->bflags.lock); + priv->bflags.barrier_ext = _gf_false; + + /* Changelog barrier init */ + INIT_LIST_HEAD(&priv->queue); + priv->barrier_enabled = _gf_false; + + if (priv->rpc_active || priv->active) { /* RPC ball rolling.. */ - ret = changelog_init_rpc (this, priv); + ret = changelog_init_rpc(this, priv); if (ret) - goto cleanup_barrier; - - ret = changelog_init (this, priv); - if (ret) - goto cleanup_rpc; - - gf_msg_debug (this->name, 0, "changelog translator loaded"); - - this->private = priv; - return 0; - - cleanup_rpc: - changelog_cleanup_rpc (this, priv); - cleanup_barrier: - changelog_barrier_pthread_destroy (priv); - cleanup_options: - changelog_freeup_options (this, priv); - cleanup_mempool: - mem_pool_destroy (this->local_pool); - cleanup_priv: - GF_FREE (priv); - error_return: - this->private = NULL; - return -1; + goto cleanup_barrier; + priv->rpc_active = _gf_true; + } + + ret = changelog_init(this, priv); + if (ret) + goto cleanup_rpc; + + gf_msg_debug(this->name, 0, "changelog translator loaded"); + + this->private = priv; + return 0; + +cleanup_rpc: + if (priv->rpc_active) { + changelog_cleanup_rpc(this, priv); + } +cleanup_barrier: + changelog_barrier_pthread_destroy(priv); +cleanup_options: + changelog_freeup_options(this, priv); +cleanup_mempool: + mem_pool_destroy(this->local_pool); + this->local_pool = NULL; +cleanup_priv: + GF_FREE(priv); +error_return: + this->private = NULL; + return -1; } void -fini (xlator_t *this) +fini(xlator_t *this) { - changelog_priv_t *priv = NULL; + changelog_priv_t *priv = NULL; + struct list_head queue = { + 0, + }; + + priv = this->private; + + if (priv) { + if (priv->active || priv->rpc_active) { + /* terminate RPC server/threads */ + changelog_cleanup_rpc(this, priv); + GF_FREE(priv->ev_dispatcher); + } + /* call barrier_disable to cancel timer */ + if (priv->barrier_enabled) + __chlog_barrier_disable(this, &queue); - priv = this->private; + /* cleanup barrier related objects */ + changelog_barrier_pthread_destroy(priv); - if (priv) { - /* terminate RPC server/threads */ - changelog_cleanup_rpc (this, priv); + /* cleanup helper threads */ + changelog_cleanup_helper_threads(this, priv); - /* cleanup barrier related objects */ - changelog_barrier_pthread_destroy (priv); + /* cleanup allocated options */ + changelog_freeup_options(this, priv); - /* cleanup allocated options */ - changelog_freeup_options (this, priv); + /* deallocate mempool */ + mem_pool_destroy(this->local_pool); - /* deallocate mempool */ - mem_pool_destroy (this->local_pool); - /* finally, dealloac private variable */ - GF_FREE (priv); + if (priv->htime_fd != -1) { + sys_close(priv->htime_fd); } - this->private = NULL; + /* finally, dealloac private variable */ + GF_FREE(priv); + } - return; + this->private = NULL; + this->local_pool = NULL; + + return; } struct xlator_fops fops = { - .open = changelog_open, - .mknod = changelog_mknod, - .mkdir = changelog_mkdir, - .create = changelog_create, - .symlink = changelog_symlink, - .writev = changelog_writev, - .truncate = changelog_truncate, - .ftruncate = changelog_ftruncate, - .link = changelog_link, - .rename = changelog_rename, - .unlink = changelog_unlink, - .rmdir = changelog_rmdir, - .setattr = changelog_setattr, - .fsetattr = changelog_fsetattr, - .setxattr = changelog_setxattr, - .fsetxattr = changelog_fsetxattr, - .removexattr = changelog_removexattr, - .fremovexattr = changelog_fremovexattr, - .ipc = changelog_ipc, + .open = changelog_open, + .mknod = changelog_mknod, + .mkdir = changelog_mkdir, + .create = changelog_create, + .symlink = changelog_symlink, + .writev = changelog_writev, + .truncate = changelog_truncate, + .ftruncate = changelog_ftruncate, + .link = changelog_link, + .rename = changelog_rename, + .unlink = changelog_unlink, + .rmdir = changelog_rmdir, + .setattr = changelog_setattr, + .fsetattr = changelog_fsetattr, + .setxattr = changelog_setxattr, + .fsetxattr = changelog_fsetxattr, + .removexattr = changelog_removexattr, + .fremovexattr = changelog_fremovexattr, + .ipc = changelog_ipc, + .xattrop = changelog_xattrop, + .fxattrop = changelog_fxattrop, }; struct xlator_cbks cbks = { - .forget = changelog_forget, - .release = changelog_release, + .forget = changelog_forget, + .release = changelog_release, }; struct volume_options options[] = { - {.key = {"changelog"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "off", - .description = "enable/disable change-logging" - }, - {.key = {"changelog-brick"}, - .type = GF_OPTION_TYPE_PATH, - .description = "brick path to generate unique socket file name." - " should be the export directory of the volume strictly." - }, - {.key = {"changelog-dir"}, - .type = GF_OPTION_TYPE_PATH, - .description = "directory for the changelog files" - }, - {.key = {"op-mode"}, - .type = GF_OPTION_TYPE_STR, - .default_value = "realtime", - .value = {"realtime"}, - .description = "operation mode - futuristic operation modes" - }, - {.key = {"encoding"}, - .type = GF_OPTION_TYPE_STR, - .default_value = "ascii", - .value = {"binary", "ascii"}, - .description = "encoding type for changelogs" - }, - {.key = {"rollover-time"}, - .default_value = "15", - .type = GF_OPTION_TYPE_TIME, - .description = "time to switch to a new changelog file (in seconds)" - }, - {.key = {"fsync-interval"}, - .type = GF_OPTION_TYPE_TIME, - .default_value = "5", - .description = "do not open CHANGELOG file with O_SYNC mode." - " instead perform fsync() at specified intervals" - }, - { .key = {"changelog-barrier-timeout"}, - .type = GF_OPTION_TYPE_TIME, - .default_value = BARRIER_TIMEOUT, - .description = "After 'timeout' seconds since the time 'barrier' " - "option was set to \"on\", unlink/rmdir/rename " - "operations are no longer blocked and previously " - "blocked fops are allowed to go through" - }, - {.key = {"capture-del-path"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "off", - .description = "enable/disable capturing paths of deleted entries" - }, - {.key = {NULL} - }, + {.key = {"changelog"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "enable/disable change-logging", + .op_version = {3}, + .flags = OPT_FLAG_SETTABLE, + .level = OPT_STATUS_BASIC, + .tags = {"journal", "georep", "glusterfind"}}, + {.key = {"changelog-notification"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "enable/disable changelog live notification", + .op_version = {3}, + .level = OPT_STATUS_BASIC, + .tags = {"bitrot", "georep"}}, + {.key = {"changelog-brick"}, + .type = GF_OPTION_TYPE_PATH, + .description = "brick path to generate unique socket file name." + " should be the export directory of the volume strictly.", + .default_value = "{{ brick.path }}", + .op_version = {3}, + .tags = {"journal"}}, + {.key = {"changelog-dir"}, + .type = GF_OPTION_TYPE_PATH, + .description = "directory for the changelog files", + .default_value = "{{ brick.path }}/.glusterfs/changelogs", + .op_version = {3}, + .flags = OPT_FLAG_SETTABLE, + .level = OPT_STATUS_ADVANCED, + .tags = {"journal", "georep", "glusterfind"}}, + {.key = {"op-mode"}, + .type = GF_OPTION_TYPE_STR, + .default_value = "realtime", + .value = {"realtime"}, + .description = "operation mode - futuristic operation modes", + .op_version = {3}, + .tags = {"journal"}}, + {.key = {"encoding"}, + .type = GF_OPTION_TYPE_STR, + .default_value = "ascii", + .value = {"binary", "ascii"}, + .description = "encoding type for changelogs", + .op_version = {3}, + .flags = OPT_FLAG_SETTABLE, + .level = OPT_STATUS_ADVANCED, + .tags = {"journal"}}, + {.key = {"rollover-time"}, + .default_value = "15", + .type = GF_OPTION_TYPE_TIME, + .description = "time to switch to a new changelog file (in seconds)", + .op_version = {3}, + .flags = OPT_FLAG_SETTABLE, + .level = OPT_STATUS_ADVANCED, + .tags = {"journal", "georep", "glusterfind"}}, + {.key = {"fsync-interval"}, + .type = GF_OPTION_TYPE_TIME, + .default_value = "5", + .description = "do not open CHANGELOG file with O_SYNC mode." + " instead perform fsync() at specified intervals", + .op_version = {3}, + .flags = OPT_FLAG_SETTABLE, + .level = OPT_STATUS_ADVANCED, + .tags = {"journal"}}, + {.key = {"changelog-barrier-timeout"}, + .type = GF_OPTION_TYPE_TIME, + .default_value = BARRIER_TIMEOUT, + .description = "After 'timeout' seconds since the time 'barrier' " + "option was set to \"on\", unlink/rmdir/rename " + "operations are no longer blocked and previously " + "blocked fops are allowed to go through", + .op_version = {3}, + .flags = OPT_FLAG_SETTABLE, + .level = OPT_STATUS_ADVANCED, + .tags = {"journal"}}, + {.key = {"capture-del-path"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "enable/disable capturing paths of deleted entries", + .op_version = {3}, + .flags = OPT_FLAG_SETTABLE, + .level = OPT_STATUS_BASIC, + .tags = {"journal", "glusterfind"}}, + {.key = {NULL}}, +}; + +xlator_api_t xlator_api = { + .init = init, + .fini = fini, + .notify = notify, + .reconfigure = reconfigure, + .mem_acct_init = mem_acct_init, + .op_version = {1}, /* Present from the initial version */ + .fops = &fops, + .cbks = &cbks, + .options = options, + .identifier = "changelog", + .category = GF_MAINTAINED, }; diff --git a/xlators/features/changetimerecorder/src/Makefile.am b/xlators/features/changetimerecorder/src/Makefile.am deleted file mode 100644 index 5af50983fef..00000000000 --- a/xlators/features/changetimerecorder/src/Makefile.am +++ /dev/null @@ -1,23 +0,0 @@ -xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features - -# changetimerecorder can only get build when libgfdb is enabled -if BUILD_GFDB - xlator_LTLIBRARIES = changetimerecorder.la -endif - -changetimerecorder_la_LDFLAGS = $(GF_XLATOR_DEFAULT_LDFLAGS) - -changetimerecorder_la_SOURCES = changetimerecorder.c ctr-helper.c ctr-xlator-ctx.c - -changetimerecorder_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la\ - $(top_builddir)/libglusterfs/src/gfdb/libgfdb.la - -noinst_HEADERS = changetimerecorder.h ctr_mem_types.h ctr-helper.h ctr-xlator-ctx.h - -AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ - -I$(top_srcdir)/libglusterfs/src/gfdb \ - -DDATADIR=\"$(localstatedir)\" - -AM_CFLAGS = -Wall $(GF_CFLAGS) $(SQLITE_CFLAGS) - -CLEANFILES = diff --git a/xlators/features/changetimerecorder/src/changetimerecorder.c b/xlators/features/changetimerecorder/src/changetimerecorder.c deleted file mode 100644 index 1831316f8a3..00000000000 --- a/xlators/features/changetimerecorder/src/changetimerecorder.c +++ /dev/null @@ -1,2120 +0,0 @@ -/* - Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ -#include <ctype.h> -#include <sys/uio.h> - -#include "gfdb_sqlite3.h" -#include "ctr-helper.h" -#include "ctr-messages.h" - -/*******************************inode forget***********************************/ - -int -ctr_forget (xlator_t *this, inode_t *inode) -{ - fini_ctr_xlator_ctx (this, inode); - return 0; -} - -/************************** Look up heal **************************************/ -/* -Problem: The CTR xlator records file meta (heat/hardlinks) -into the data. This works fine for files which are created -after ctr xlator is switched ON. But for files which were -created before CTR xlator is ON, CTR xlator is not able to -record either of the meta i.e heat or hardlinks. Thus making -those files immune to promotions/demotions. - -Solution: The solution that is implemented in this patch is -do ctr-db heal of all those pre-existent files, using named lookup. -For this purpose we use the inode-xlator context variable option -in gluster. -The inode-xlator context variable for ctr xlator will have the -following, - a. A Lock for the context variable - b. A hardlink list: This list represents the successful looked - up hardlinks. -These are the scenarios when the hardlink list is updated: -1) Named-Lookup: Whenever a named lookup happens on a file, in the - wind path we copy all required hardlink and inode information to - ctr_db_record structure, which resides in the frame->local variable. - We dont update the database in wind. During the unwind, we read the - information from the ctr_db_record and , - Check if the inode context variable is created, if not we create it. - Check if the hard link is there in the hardlink list. - If its not there we add it to the list and send a update to the - database using libgfdb. - Please note: The database transaction can fail(and we ignore) as there - already might be a record in the db. This update to the db is to heal - if its not there. - If its there in the list we ignore it. -2) Inode Forget: Whenever an inode forget hits we clear the hardlink list in - the inode context variable and delete the inode context variable. - Please note: An inode forget may happen for two reason, - a. when the inode is delete. - b. the in-memory inode is evicted from the inode table due to cache limits. -3) create: whenever a create happens we create the inode context variable and - add the hardlink. The database updation is done as usual by ctr. -4) link: whenever a hardlink is created for the inode, we create the inode - context variable, if not present, and add the hardlink to the list. -5) unlink: whenever a unlink happens we delete the hardlink from the list. -6) mknod: same as create. -7) rename: whenever a rename happens we update the hardlink in list. if the - hardlink was not present for updation, we add the hardlink to the list. - -What is pending: -1) This solution will only work for named lookups. -2) We dont track afr-self-heal/dht-rebalancer traffic for healing. - -*/ - - -/* This function doesnot write anything to the db, - * just created the local variable - * for the frame and sets values for the ctr_db_record */ -static int -ctr_lookup_wind(call_frame_t *frame, - xlator_t *this, - gf_ctr_inode_context_t *ctr_inode_cx) -{ - int ret = -1; - gf_ctr_private_t *_priv = NULL; - gf_ctr_local_t *ctr_local = NULL; - - GF_ASSERT(frame); - GF_ASSERT(frame->root); - GF_ASSERT(this); - IS_CTR_INODE_CX_SANE(ctr_inode_cx); - - _priv = this->private; - GF_ASSERT (_priv); - - if (_priv->ctr_record_wind && ctr_inode_cx->ia_type != IA_IFDIR) { - - frame->local = init_ctr_local_t (this); - if (!frame->local) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_CREATE_CTR_LOCAL_ERROR_WIND, - "WIND: Error while creating ctr local"); - goto out; - }; - ctr_local = frame->local; - ctr_local->client_pid = frame->root->pid; - /*Definately no internal fops will reach here*/ - ctr_local->is_internal_fop = _gf_false; - /*Dont record counters*/ - CTR_DB_REC(ctr_local).do_record_counters = _gf_false; - /*Don't record time at all*/ - CTR_DB_REC(ctr_local).do_record_times = _gf_false; - - /* Copy gfid into db record*/ - gf_uuid_copy (CTR_DB_REC(ctr_local).gfid, - *(ctr_inode_cx->gfid)); - - /* Set fop_path and fop_type, required by libgfdb to make - * decision while inserting the record */ - CTR_DB_REC(ctr_local).gfdb_fop_path = ctr_inode_cx->fop_path; - CTR_DB_REC(ctr_local).gfdb_fop_type = ctr_inode_cx->fop_type; - - /* Copy hard link info*/ - gf_uuid_copy (CTR_DB_REC(ctr_local).pargfid, - *((NEW_LINK_CX(ctr_inode_cx))->pargfid)); - strcpy (CTR_DB_REC(ctr_local).file_name, - NEW_LINK_CX(ctr_inode_cx)->basename); - strcpy (CTR_DB_REC(ctr_local).file_path, - NEW_LINK_CX(ctr_inode_cx)->basepath); - - /* Since we are in lookup we can ignore errors while - * Inserting in the DB, because there may be many - * to write to the DB attempts for healing. - * We dont want to log all failed attempts and - * bloat the log*/ - ctr_local->gfdb_db_record.ignore_errors = _gf_true; - } - - ret = 0; - -out: - - if (ret) { - free_ctr_local (ctr_local); - frame->local = NULL; - } - - return ret; -} - - -/* This function inserts the ctr_db_record populated by ctr_lookup_wind - * in to the db. It also destroys the frame->local created by ctr_lookup_wind */ -static int -ctr_lookup_unwind (call_frame_t *frame, - xlator_t *this) -{ - int ret = -1; - gf_ctr_private_t *_priv = NULL; - gf_ctr_local_t *ctr_local = NULL; - - GF_ASSERT(frame); - GF_ASSERT(this); - - _priv = this->private; - GF_ASSERT (_priv); - - GF_ASSERT(_priv->_db_conn); - - ctr_local = frame->local; - - if (ctr_local && (ctr_local->ia_inode_type != IA_IFDIR)) { - - ret = insert_record(_priv->_db_conn, - &ctr_local->gfdb_db_record); - if (ret == -1) { - gf_msg (this->name, - _gfdb_log_level (GF_LOG_ERROR, - ctr_local-> - gfdb_db_record.ignore_errors), - 0, CTR_MSG_FILL_CTR_LOCAL_ERROR_UNWIND, - "UNWIND: Error filling ctr local"); - goto out; - } - } - ret = 0; -out: - free_ctr_local (ctr_local); - frame->local = NULL; - return ret; -} - -/****************************************************************************** - * - * FOPS HANDLING BELOW - * - * ***************************************************************************/ - -/****************************LOOKUP********************************************/ - - -int32_t -ctr_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *dict, struct iatt *postparent) -{ - int ret = -1; - ctr_xlator_ctx_t *ctr_xlator_ctx = NULL; - gf_ctr_local_t *ctr_local = NULL; - ctr_heal_ret_val_t ret_val = CTR_CTX_ERROR; - gf_boolean_t _is_heal_needed = _gf_false; - - CTR_IS_DISABLED_THEN_GOTO(this, out); - - /* if the lookup failed lookup dont do anything*/ - if (op_ret == -1) { - gf_msg_trace (this->name, 0, "lookup failed with %s", - strerror (op_errno)); - goto out; - } - - /* Ignore directory lookups */ - if (inode->ia_type == IA_IFDIR) { - goto out; - } - - /* if frame local was not set by the ctr_lookup() - * so dont so anything*/ - if (!frame->local) { - goto out; - } - - /* if the lookup is for dht link donot record*/ - if (dht_is_linkfile (buf, dict)) { - gf_msg_trace (this->name, 0, "Ignoring Lookup " - "for dht link file"); - goto out; - } - - ctr_local = frame->local; - /*Assign the proper inode type*/ - ctr_local->ia_inode_type = inode->ia_type; - - /* Copy gfid directly from inode */ - gf_uuid_copy (CTR_DB_REC(ctr_local).gfid, inode->gfid); - - /* Checking if gfid and parent gfid is valid */ - if (gf_uuid_is_null(CTR_DB_REC(ctr_local).gfid) || - gf_uuid_is_null(CTR_DB_REC(ctr_local).pargfid)) { - gf_msg_trace (this->name, 0, - "Invalid GFID"); - goto out; - } - - /* if its a first entry - * then mark the ctr_record for create - * A create will attempt a file and a hard link created in the db*/ - ctr_xlator_ctx = get_ctr_xlator_ctx (this, inode); - if (!ctr_xlator_ctx) { - /* This marks inode heal */ - CTR_DB_REC(ctr_local).gfdb_fop_type = GFDB_FOP_CREATE_WRITE; - _is_heal_needed = _gf_true; - } - - /* Copy the correct gfid from resolved inode */ - gf_uuid_copy (CTR_DB_REC(ctr_local).gfid, inode->gfid); - - /* Add hard link to the list */ - ret_val = add_hard_link_ctx (frame, this, inode); - if (ret_val == CTR_CTX_ERROR) { - gf_msg_trace (this->name, 0, - "Failed adding hardlink to list"); - goto out; - } - /* If inode needs healing then heal the hardlink also */ - else if (ret_val & CTR_TRY_INODE_HEAL) { - /* This marks inode heal */ - CTR_DB_REC(ctr_local).gfdb_fop_type = GFDB_FOP_CREATE_WRITE; - _is_heal_needed = _gf_true; - } - /* If hardlink needs healing */ - else if (ret_val & CTR_TRY_HARDLINK_HEAL) { - _is_heal_needed = _gf_true; - } - - /* If lookup heal needed */ - if (!_is_heal_needed) - goto out; - - /* FINALLY HEAL : Inserts the ctr_db_record populated by ctr_lookup_wind - * in to the db. It also destroys the frame->local - * created by ctr_lookup_wind */ - ret = ctr_lookup_unwind(frame, this); - if (ret) { - gf_msg_trace (this->name, 0, - "Failed healing/inserting link"); - } - - -out: - free_ctr_local ((gf_ctr_local_t *)frame->local); - frame->local = NULL; - - STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf, - dict, postparent); - - return 0; -} - - - -int32_t -ctr_lookup (call_frame_t *frame, xlator_t *this, - loc_t *loc, dict_t *xdata) -{ - gf_ctr_inode_context_t ctr_inode_cx; - gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; - gf_ctr_link_context_t ctr_link_cx; - gf_ctr_link_context_t *_link_cx = &ctr_link_cx; - int ret = -1; - - CTR_IS_DISABLED_THEN_GOTO(this, out); - CTR_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, out); - - GF_ASSERT(frame); - GF_ASSERT(frame->root); - - /* Dont handle nameless lookups*/ - if (!loc->parent) - goto out; - - /*fill ctr link context*/ - FILL_CTR_LINK_CX(_link_cx, loc->parent->gfid, loc->name, - loc->path, out); - - /* Fill ctr inode context*/ - /* IA_IFREG : We assume its a file in the wind - * but in the unwind we are sure what the inode is a file - * or directory - * gfid: we are just filling loc->gfid which is not correct. - * In unwind we fill the correct gfid for successful lookup*/ - FILL_CTR_INODE_CONTEXT(_inode_cx, IA_IFREG, - loc->gfid, _link_cx, NULL, - GFDB_FOP_DENTRY_WRITE, GFDB_FOP_WIND); - - /* Create the frame->local and populate ctr_db_record - * No writing to the db yet */ - ret = ctr_lookup_wind(frame, this, _inode_cx); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_INSERT_LINK_WIND_FAILED, - "Failed to insert link wind"); - } - -out: - STACK_WIND (frame, ctr_lookup_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, loc, xdata); - return 0; -} - - - - -/****************************WRITEV********************************************/ -int32_t -ctr_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, - dict_t *xdata) -{ - int ret = -1; - - CTR_IS_DISABLED_THEN_GOTO(this, out); - - ret = ctr_insert_unwind(frame, this, - GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_INSERT_WRITEV_UNWIND_FAILED, - "Failed to insert writev unwind"); - } - - -out: - STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, - postbuf, xdata); - - return 0; -} - -int32_t -ctr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iovec *vector, int32_t count, off_t off, - uint32_t flags, - struct iobref *iobref, dict_t *xdata) -{ - int ret = -1; - gf_ctr_inode_context_t ctr_inode_cx; - gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; - - CTR_IS_DISABLED_THEN_GOTO(this, out); - CTR_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, out); - - /*Fill ctr inode context*/ - FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, - fd->inode->gfid, NULL, NULL, - GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND); - - /*record into the database*/ - ret = ctr_insert_wind(frame, this, _inode_cx); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_INSERT_WRITEV_WIND_FAILED, - "Failed to insert writev wind"); - } - -out: - STACK_WIND (frame, ctr_writev_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->writev, fd, vector, count, - off, flags, iobref, xdata); - - return 0; -} - -/******************************setattr*****************************************/ - -int32_t -ctr_setattr_cbk (call_frame_t *frame, - void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, struct iatt *preop_stbuf, - struct iatt *postop_stbuf, dict_t *xdata) -{ - - int ret = -1; - - CTR_IS_DISABLED_THEN_GOTO(this, out); - - ret = ctr_insert_unwind(frame, this, - GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_INSERT_SETATTR_UNWIND_FAILED, - "Failed to insert setattr unwind"); - } - -out: - STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, preop_stbuf, - postop_stbuf, xdata); - - return 0; -} - -int32_t -ctr_setattr (call_frame_t *frame, - xlator_t *this, loc_t *loc, - struct iatt *stbuf, int32_t valid, dict_t *xdata) -{ - - int ret = -1; - gf_ctr_inode_context_t ctr_inode_cx; - gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; - - CTR_IS_DISABLED_THEN_GOTO(this, out); - CTR_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, out); - CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO (this, out); - - /*Fill ctr inode context*/ - FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, - loc->inode->gfid, NULL, NULL, GFDB_FOP_INODE_WRITE, - GFDB_FOP_WIND); - - /*record into the database*/ - ret = ctr_insert_wind(frame, this, _inode_cx); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_INSERT_SETATTR_WIND_FAILED, - "Failed to insert setattr wind"); - } -out: - - STACK_WIND (frame, ctr_setattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setattr, loc, stbuf, - valid, xdata); - - return 0; -} - -/*************************** fsetattr ***************************************/ -int32_t -ctr_fsetattr_cbk (call_frame_t *frame, - void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, struct iatt *preop_stbuf, - struct iatt *postop_stbuf, dict_t *xdata) -{ - int ret = -1; - - CTR_IS_DISABLED_THEN_GOTO(this, out); - - ret = ctr_insert_unwind(frame, this, - GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_INSERT_SETATTR_UNWIND_FAILED, - "Failed to insert fsetattr unwind"); - } - -out: - STACK_UNWIND_STRICT (fsetattr, frame, op_ret, op_errno, - preop_stbuf, postop_stbuf, xdata); - - return 0; -} - - -int32_t -ctr_fsetattr (call_frame_t *frame, - xlator_t *this, fd_t *fd, - struct iatt *stbuf, int32_t valid, dict_t *xdata) -{ - int ret = -1; - gf_ctr_inode_context_t ctr_inode_cx; - gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; - - CTR_IS_DISABLED_THEN_GOTO(this, out); - CTR_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, out); - CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO (this, out); - - /*Fill ctr inode context*/ - FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, - fd->inode->gfid, NULL, NULL, GFDB_FOP_INODE_WRITE, - GFDB_FOP_WIND); - - /*record into the database*/ - ret = ctr_insert_wind(frame, this, _inode_cx); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_INSERT_SETATTR_WIND_FAILED, - "Failed to insert fsetattr wind"); - } -out: - STACK_WIND (frame, ctr_fsetattr_cbk, - FIRST_CHILD (this), FIRST_CHILD (this)->fops->fsetattr, - fd, stbuf, valid, xdata); - - return 0; -} -/****************************fremovexattr************************************/ - -int32_t -ctr_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - int ret = -1; - - CTR_IS_DISABLED_THEN_GOTO(this, out); - - - ret = ctr_insert_unwind(frame, this, - GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_INSERT_FREMOVEXATTR_UNWIND_FAILED, - "Failed to insert fremovexattr unwind"); - } - -out: - STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, xdata); - - return 0; -} - -int32_t -ctr_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd, - const char *name, dict_t *xdata) -{ - int ret = -1; - gf_ctr_inode_context_t ctr_inode_cx; - gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; - - CTR_IS_DISABLED_THEN_GOTO(this, out); - CTR_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, out); - CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO (this, out); - - /*Fill ctr inode context*/ - FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, - fd->inode->gfid, NULL, NULL, GFDB_FOP_INODE_WRITE, - GFDB_FOP_WIND); - - /*record into the database*/ - ret = ctr_insert_wind(frame, this, _inode_cx); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_INSERT_FREMOVEXATTR_WIND_FAILED, - "Failed to insert fremovexattr wind"); - } - -out: - STACK_WIND (frame, ctr_fremovexattr_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fremovexattr, - fd, name, xdata); - return 0; -} - -/****************************removexattr*************************************/ - -int32_t -ctr_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - int ret = -1; - - CTR_IS_DISABLED_THEN_GOTO(this, out); - CTR_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, out); - - - ret = ctr_insert_unwind(frame, this, - GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_INSERT_REMOVEXATTR_UNWIND_FAILED, - "Failed to insert removexattr unwind"); - } - -out: - STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, xdata); - - return 0; -} - -int32_t -ctr_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *name, dict_t *xdata) -{ - int ret = -1; - gf_ctr_inode_context_t ctr_inode_cx; - gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; - - CTR_IS_DISABLED_THEN_GOTO(this, out); - CTR_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, out); - CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO (this, out); - - /*Fill ctr inode context*/ - FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, - loc->inode->gfid, NULL, NULL, GFDB_FOP_INODE_WRITE, - GFDB_FOP_WIND); - - /*record into the database*/ - ret = ctr_insert_wind(frame, this, _inode_cx); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_INSERT_REMOVEXATTR_WIND_FAILED, - "Failed to insert removexattr wind"); - } - -out: - STACK_WIND (frame, ctr_removexattr_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->removexattr, - loc, name, xdata); - return 0; -} - -/****************************truncate****************************************/ - -int32_t -ctr_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) -{ - int ret = -1; - - CTR_IS_DISABLED_THEN_GOTO(this, out); - - - ret = ctr_insert_unwind(frame, this, - GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_INSERT_TRUNCATE_UNWIND_FAILED, - "Failed to insert truncate unwind"); - } - - -out: - STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf, - postbuf, xdata); - - return 0; -} - -int32_t -ctr_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, - off_t offset, dict_t *xdata) -{ - int ret = -1; - gf_ctr_inode_context_t ctr_inode_cx; - gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; - - CTR_IS_DISABLED_THEN_GOTO(this, out); - CTR_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, out); - - /*Fill ctr inode context*/ - FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, - loc->inode->gfid, NULL, NULL, GFDB_FOP_INODE_WRITE, - GFDB_FOP_WIND); - - /*record into the database*/ - ret = ctr_insert_wind(frame, this, _inode_cx); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_INSERT_TRUNCATE_WIND_FAILED, - "Failed to insert truncate wind"); - } -out: - STACK_WIND (frame, ctr_truncate_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->truncate, - loc, offset, xdata); - return 0; -} - -/****************************ftruncate***************************************/ - -int32_t -ctr_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) -{ - int ret = -1; - - CTR_IS_DISABLED_THEN_GOTO(this, out); - - ret = ctr_insert_unwind(frame, this, - GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_INSERT_FTRUNCATE_UNWIND_FAILED, - "Failed to insert ftruncate unwind"); - } - -out: - STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, prebuf, - postbuf, xdata); - - return 0; -} - -int32_t -ctr_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, - off_t offset, dict_t *xdata) -{ - int ret = -1; - gf_ctr_inode_context_t ctr_inode_cx; - gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; - - CTR_IS_DISABLED_THEN_GOTO(this, out); - CTR_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, out); - - /*Fill ctr inode context*/ - FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, - fd->inode->gfid, NULL, NULL, GFDB_FOP_INODE_WRITE, - GFDB_FOP_WIND); - - /*record into the database*/ - ret = ctr_insert_wind(frame, this, _inode_cx); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_INSERT_FTRUNCATE_WIND_FAILED, - "Failed to insert ftruncate wind"); - } - -out: - STACK_WIND (frame, ctr_ftruncate_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->ftruncate, - fd, offset, xdata); - return 0; -} - -/****************************rename******************************************/ - -int32_t -ctr_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, - struct iatt *preoldparent, struct iatt *postoldparent, - struct iatt *prenewparent, struct iatt *postnewparent, - dict_t *xdata) -{ - int ret = -1; - - CTR_IS_DISABLED_THEN_GOTO(this, out); - - ret = ctr_insert_unwind(frame, this, - GFDB_FOP_DENTRY_WRITE, GFDB_FOP_UNWIND); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_INSERT_RENAME_UNWIND_FAILED, - "Failed to insert rename unwind"); - } - -out: - STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, buf, - preoldparent, postoldparent, prenewparent, - postnewparent, - xdata); - - return 0; -} - -int32_t -ctr_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, - loc_t *newloc, dict_t *xdata) -{ - int ret = -1; - gf_ctr_inode_context_t ctr_inode_cx; - gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; - gf_ctr_link_context_t new_link_cx, old_link_cx; - gf_ctr_link_context_t *_nlink_cx = &new_link_cx; - gf_ctr_link_context_t *_olink_cx = &old_link_cx; - - CTR_IS_DISABLED_THEN_GOTO(this, out); - CTR_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, out); - - /*Fill old link context*/ - FILL_CTR_LINK_CX(_olink_cx, oldloc->pargfid, oldloc->name, - oldloc->path, out); - - /*Fill new link context*/ - FILL_CTR_LINK_CX(_nlink_cx, newloc->pargfid, newloc->name, - newloc->path, out); - - /*Fill ctr inode context*/ - FILL_CTR_INODE_CONTEXT(_inode_cx, oldloc->inode->ia_type, - oldloc->inode->gfid, _nlink_cx, _olink_cx, - GFDB_FOP_DENTRY_WRITE, GFDB_FOP_WIND); - - /* Is a metatdata fop */ - _inode_cx->is_metadata_fop = _gf_true; - - /*record into the database*/ - ret = ctr_insert_wind(frame, this, _inode_cx); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_INSERT_RENAME_WIND_FAILED, - "Failed to insert rename wind"); - } else { - /* We are doing updation of hard link in inode context in wind - * As we dont get the "inode" in the call back for rename */ - ret = update_hard_link_ctx (frame, this, oldloc->inode); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_UPDATE_HARDLINK_FAILED, "Failed " - "updating hard link in ctr inode context"); - } - } - -out: - STACK_WIND (frame, ctr_rename_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->rename, - oldloc, newloc, xdata); - return 0; -} - -/****************************unlink******************************************/ -int32_t -ctr_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - int ret = -1; - uint32_t remaining_links = -1; - - CTR_IS_DISABLED_THEN_GOTO(this, out); - - if (!xdata) - goto out; - - /* - * - * Extracting GF_RESPONSE_LINK_COUNT_XDATA from POSIX Xlator - * - * */ - ret = dict_get_uint32 (xdata , GF_RESPONSE_LINK_COUNT_XDATA, - &remaining_links); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_GET_CTR_RESPONSE_LINK_COUNT_XDATA_FAILED, - "Failed to getting GF_RESPONSE_LINK_COUNT_XDATA"); - remaining_links = -1; - } - - /*This is not the only link*/ - if (remaining_links != 1) { - - ret = ctr_insert_unwind(frame, this, GFDB_FOP_DENTRY_WRITE, - GFDB_FOP_UNDEL); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_INSERT_UNLINK_UNWIND_FAILED, - "Failed to insert unlink unwind"); - } - } - /*Last link that was deleted*/ - else if (remaining_links == 1) { - - ret = ctr_insert_unwind(frame, this, GFDB_FOP_DENTRY_WRITE, - GFDB_FOP_UNDEL_ALL); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_INSERT_UNLINK_UNWIND_FAILED, - "Failed to insert unlink unwind"); - } - } - -out: - STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno, preparent, - postparent, xdata); - - return 0; -} - -int32_t -ctr_unlink (call_frame_t *frame, xlator_t *this, - loc_t *loc, int xflag, dict_t *xdata) -{ - int ret = -1; - gf_ctr_inode_context_t ctr_inode_cx; - gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; - gf_ctr_link_context_t ctr_link_cx; - gf_ctr_link_context_t *_link_cx = &ctr_link_cx; - gf_boolean_t is_xdata_created = _gf_false; - struct iatt dummy_stat = {0}; - - GF_ASSERT (frame); - - CTR_IS_DISABLED_THEN_GOTO(this, out); - - /*Fill link context*/ - FILL_CTR_LINK_CX(_link_cx, loc->pargfid, loc->name, loc->path, out); - - /*Fill ctr inode context*/ - FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, - loc->inode->gfid, _link_cx, NULL, - GFDB_FOP_DENTRY_WRITE, GFDB_FOP_WDEL); - - /*Internal FOP*/ - _inode_cx->is_internal_fop = is_internal_fop (frame, xdata); - - /* Is a metadata FOP */ - _inode_cx->is_metadata_fop = _gf_true; - - /* If its a internal FOP and dht link file donot record*/ - if (_inode_cx->is_internal_fop && - dht_is_linkfile (&dummy_stat, xdata)) { - goto out; - } - - /*record into the database*/ - ret = ctr_insert_wind(frame, this, _inode_cx); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_INSERT_UNLINK_UNWIND_FAILED, - "Failed to insert unlink wind"); - } else { - /* We are doing delete of hard link in inode context in wind - * As we dont get the "inode" in the call back for rename */ - ret = delete_hard_link_ctx (frame, this, loc->inode); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_DELETE_HARDLINK_FAILED, "Failed " - "deleting hard link from ctr inode context"); - } - } - - /* - * - * Sending GF_REQUEST_LINK_COUNT_XDATA - * to POSIX Xlator to send link count in unwind path - * - * */ - /*create xdata if NULL*/ - if (!xdata) { - xdata = dict_new(); - is_xdata_created = (xdata) ? _gf_true : _gf_false; - } - if (!xdata) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_XDATA_NULL, "xdata is NULL :Cannot send " - "GF_REQUEST_LINK_COUNT_XDATA to posix"); - goto out; - } - - ret = dict_set_int32 (xdata, GF_REQUEST_LINK_COUNT_XDATA, 1); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_SET_CTR_RESPONSE_LINK_COUNT_XDATA_FAILED, - "Failed setting GF_REQUEST_LINK_COUNT_XDATA"); - if (is_xdata_created) { - dict_unref (xdata); - } - goto out; - } - -out: - STACK_WIND (frame, ctr_unlink_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->unlink, - loc, xflag, xdata); - - if (is_xdata_created) - dict_unref (xdata); - - return 0; -} - -/****************************fsync******************************************/ -int32_t -ctr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) -{ - int ret = -1; - - CTR_IS_DISABLED_THEN_GOTO(this, out); - - ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, - GFDB_FOP_UNWIND); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_INSERT_FSYNC_UNWIND_FAILED, - "Failed to insert fsync unwind"); - } - -out: - STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf, - xdata); - - return 0; -} - -int32_t -ctr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, - int32_t flags, dict_t *xdata) -{ - int ret = -1; - gf_ctr_inode_context_t ctr_inode_cx; - gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; - - CTR_IS_DISABLED_THEN_GOTO(this, out); - CTR_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, out); - - /*Fill ctr inode context*/ - FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, - fd->inode->gfid, NULL, NULL, - GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND); - - /*record into the database*/ - ret = ctr_insert_wind(frame, this, _inode_cx); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_INSERT_FSYNC_WIND_FAILED, - "Failed to insert fsync wind"); - } - -out: - STACK_WIND (frame, ctr_fsync_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fsync, - fd, flags, xdata); - return 0; -} - -/****************************setxattr****************************************/ - -int -ctr_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - int ret = -1; - - CTR_IS_DISABLED_THEN_GOTO(this, out); - - ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, - GFDB_FOP_UNWIND); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_INSERT_FSYNC_UNWIND_FAILED, - "Failed to insert setxattr unwind"); - } - -out: - STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata); - - return 0; -} - -int -ctr_setxattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, dict_t *xattr, int flags, dict_t *xdata) -{ - int ret = -1; - gf_ctr_inode_context_t ctr_inode_cx; - gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; - - CTR_IS_DISABLED_THEN_GOTO(this, out); - CTR_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, out); - CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO (this, out); - - /*Fill ctr inode context*/ - FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, - loc->inode->gfid, NULL, NULL, - GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND); - - /*record into the database*/ - ret = ctr_insert_wind(frame, this, _inode_cx); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_INSERT_SETATTR_WIND_FAILED, - "Failed to insert setxattr wind"); - } - -out: - STACK_WIND (frame, ctr_setxattr_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->setxattr, - loc, xattr, flags, xdata); - return 0; -} -/**************************** fsetxattr *************************************/ -int32_t -ctr_fsetxattr_cbk (call_frame_t *frame, - void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, dict_t *xdata) -{ - int ret = -1; - - CTR_IS_DISABLED_THEN_GOTO(this, out); - - ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, - GFDB_FOP_UNWIND); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_INSERT_FSYNC_UNWIND_FAILED, - "Failed to insert fsetxattr unwind"); - } - -out: - STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata); - - return 0; -} - -int32_t -ctr_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, - int32_t flags, dict_t *xdata) -{ - int ret = -1; - gf_ctr_inode_context_t ctr_inode_cx; - gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; - - CTR_IS_DISABLED_THEN_GOTO(this, out); - CTR_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, out); - CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO (this, out); - - /*Fill ctr inode context*/ - FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, - fd->inode->gfid, NULL, NULL, - GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND); - - /*record into the database*/ - ret = ctr_insert_wind(frame, this, _inode_cx); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_INSERT_SETATTR_WIND_FAILED, - "Failed to insert fsetxattr wind"); - } - -out: - STACK_WIND (frame, ctr_fsetxattr_cbk, - FIRST_CHILD (this), FIRST_CHILD (this)->fops->fsetxattr, - fd, dict, flags, xdata); - return 0; -} -/****************************mknod*******************************************/ - - -int32_t -ctr_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - int ret = -1; - ctr_heal_ret_val_t ret_val = CTR_CTX_ERROR; - - CTR_IS_DISABLED_THEN_GOTO(this, out); - - /* Add hard link to the list */ - ret_val = add_hard_link_ctx (frame, this, inode); - if (ret_val == CTR_CTX_ERROR) { - gf_msg_trace (this->name, 0, "Failed adding hard link"); - } - - ret = ctr_insert_unwind(frame, this, GFDB_FOP_CREATE_WRITE, - GFDB_FOP_UNWIND); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_INSERT_MKNOD_UNWIND_FAILED, - "Failed to insert mknod unwind"); - } - -out: - STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, inode, buf, - preparent, postparent, xdata); - - return 0; -} - - -int -ctr_mknod (call_frame_t *frame, xlator_t *this, - loc_t *loc, mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata) -{ - int ret = -1; - gf_ctr_inode_context_t ctr_inode_cx; - gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; - gf_ctr_link_context_t ctr_link_cx; - gf_ctr_link_context_t *_link_cx = &ctr_link_cx; - void *uuid_req = NULL; - uuid_t gfid = {0,}; - uuid_t *ptr_gfid = &gfid; - - CTR_IS_DISABLED_THEN_GOTO(this, out); - CTR_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, out); - - GF_ASSERT(frame); - GF_ASSERT(frame->root); - - /*get gfid from xdata dict*/ - ret = dict_get_ptr (xdata, "gfid-req", &uuid_req); - if (ret) { - gf_msg_debug (this->name, 0, "failed to get gfid from dict"); - goto out; - } - gf_uuid_copy (gfid, uuid_req); - - /*fill ctr link context*/ - FILL_CTR_LINK_CX (_link_cx, loc->pargfid, loc->name, loc->path, out); - - /*Fill ctr inode context*/ - FILL_CTR_INODE_CONTEXT (_inode_cx, loc->inode->ia_type, - *ptr_gfid, _link_cx, NULL, - GFDB_FOP_CREATE_WRITE, GFDB_FOP_WIND); - - /*record into the database*/ - ret = ctr_insert_wind(frame, this, _inode_cx); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_INSERT_MKNOD_WIND_FAILED, - "Failed to insert mknod wind"); - } - -out: - STACK_WIND (frame, ctr_mknod_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->mknod, - loc, mode, rdev, umask, xdata); - return 0; -} - -/****************************create******************************************/ -int -ctr_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, - fd_t *fd, inode_t *inode, struct iatt *stbuf, - struct iatt *preparent, struct iatt *postparent, - dict_t *xdata) -{ - int ret = -1; - - CTR_IS_DISABLED_THEN_GOTO(this, out); - - - ret = add_hard_link_ctx (frame, this, inode); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_ADD_HARDLINK_FAILED, - "Failed adding hard link"); - } - - ret = ctr_insert_unwind(frame, this, GFDB_FOP_CREATE_WRITE, - GFDB_FOP_UNWIND); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_INSERT_CREATE_UNWIND_FAILED, - "Failed to insert create unwind"); - } - -out: - STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, - stbuf, - preparent, postparent, xdata); - - return 0; -} - -int -ctr_create (call_frame_t *frame, xlator_t *this, - loc_t *loc, int32_t flags, mode_t mode, - mode_t umask, fd_t *fd, dict_t *xdata) -{ - int ret = -1; - gf_ctr_inode_context_t ctr_inode_cx; - gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; - gf_ctr_link_context_t ctr_link_cx; - gf_ctr_link_context_t *_link_cx = &ctr_link_cx; - void *uuid_req = NULL; - uuid_t gfid = {0,}; - uuid_t *ptr_gfid = &gfid; - struct iatt dummy_stat = {0}; - - CTR_IS_DISABLED_THEN_GOTO(this, out); - - GF_ASSERT(frame); - GF_ASSERT(frame->root); - - /*Get GFID from Xdata dict*/ - ret = dict_get_ptr (xdata, "gfid-req", &uuid_req); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_GET_GFID_FROM_DICT_FAILED, - "failed to get gfid from dict"); - goto out; - } - gf_uuid_copy (gfid, uuid_req); - - /*fill ctr link context*/ - FILL_CTR_LINK_CX(_link_cx, loc->pargfid, loc->name, loc->path, out); - - /*Fill ctr inode context*/ - FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, - *ptr_gfid, _link_cx, NULL, - GFDB_FOP_CREATE_WRITE, GFDB_FOP_WIND); - - /*Internal FOP*/ - _inode_cx->is_internal_fop = is_internal_fop (frame, xdata); - - /* If its a internal FOP and dht link file donot record*/ - if (_inode_cx->is_internal_fop && - dht_is_linkfile (&dummy_stat, xdata)) { - goto out; - } - - /*record into the database*/ - ret = ctr_insert_wind(frame, this, &ctr_inode_cx); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_INSERT_CREATE_WIND_FAILED, - "Failed to insert create wind"); - } -out: - STACK_WIND (frame, ctr_create_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->create, - loc, flags, mode, umask, fd, xdata); - return 0; -} - -/****************************link********************************************/ - -int -ctr_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, - inode_t *inode, struct iatt *stbuf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - int ret = -1; - - CTR_IS_DISABLED_THEN_GOTO(this, out); - - /* Add hard link to the list */ - ret = add_hard_link_ctx (frame, this, inode); - if (ret) { - gf_msg_trace (this->name, 0, "Failed adding hard link"); - } - - ret = ctr_insert_unwind(frame, this, GFDB_FOP_DENTRY_WRITE, - GFDB_FOP_UNWIND); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_INSERT_CREATE_UNWIND_FAILED, - "Failed to insert create unwind"); - } - -out: - STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, stbuf, - preparent, postparent, xdata); - return 0; -} - -int -ctr_link (call_frame_t *frame, xlator_t *this, - loc_t *oldloc, loc_t *newloc, dict_t *xdata) -{ - int ret = -1; - gf_ctr_inode_context_t ctr_inode_cx; - gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; - gf_ctr_link_context_t ctr_link_cx; - gf_ctr_link_context_t *_link_cx = &ctr_link_cx; - struct iatt dummy_stat = {0}; - - CTR_IS_DISABLED_THEN_GOTO(this, out); - - GF_ASSERT(frame); - GF_ASSERT(frame->root); - - /*fill ctr link context*/ - FILL_CTR_LINK_CX(_link_cx, newloc->pargfid, newloc->name, - newloc->path, out); - - /*Fill ctr inode context*/ - FILL_CTR_INODE_CONTEXT(_inode_cx, oldloc->inode->ia_type, - oldloc->inode->gfid, _link_cx, NULL, - GFDB_FOP_DENTRY_WRITE, GFDB_FOP_WIND); - - /*Internal FOP*/ - _inode_cx->is_internal_fop = is_internal_fop (frame, xdata); - - /* Is a metadata fop */ - _inode_cx->is_metadata_fop = _gf_true; - - /* If its a internal FOP and dht link file donot record*/ - if (_inode_cx->is_internal_fop && - dht_is_linkfile (&dummy_stat, xdata)) { - goto out; - } - - - /*record into the database*/ - ret = ctr_insert_wind(frame, this, _inode_cx); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_INSERT_LINK_WIND_FAILED, - "Failed to insert link wind"); - } - -out: - STACK_WIND (frame, ctr_link_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->link, - oldloc, newloc, xdata); - return 0; -} - -/******************************readv*****************************************/ -int ctr_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, - struct iovec *vector, int count, struct iatt *stbuf, - struct iobref *iobref, dict_t *xdata) { - - int ret = -1; - - CTR_IS_DISABLED_THEN_GOTO(this, out); - - ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_READ, - GFDB_FOP_UNWIND); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_INSERT_CREATE_UNWIND_FAILED, - "Failed to insert create unwind"); - } - -out: - STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count, - stbuf, iobref, xdata); - return 0; -} - - -int -ctr_readv (call_frame_t *frame, xlator_t *this, - fd_t *fd, size_t size, off_t off, uint32_t flags, dict_t *xdata) -{ - int ret = -1; - gf_ctr_inode_context_t ctr_inode_cx; - gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; - - CTR_IS_DISABLED_THEN_GOTO(this, out); - CTR_IF_INTERNAL_FOP_THEN_GOTO (frame, xdata, out); - - /*Fill ctr inode context*/ - FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, - fd->inode->gfid, NULL, NULL, - GFDB_FOP_INODE_READ, GFDB_FOP_WIND); - - /*record into the database*/ - ret = ctr_insert_wind(frame, this, _inode_cx); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_INSERT_READV_WIND_FAILED, - "Failed to insert readv wind"); - } - -out: - STACK_WIND (frame, ctr_readv_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->readv, - fd, size, off, flags, xdata); - return 0; -} - -/*******************************ctr_ipc****************************************/ - -/*This is the call back function per record/file from data base*/ -static int -ctr_db_query_callback (gfdb_query_record_t *gfdb_query_record, - void *args) { - int ret = -1; - ctr_query_cbk_args_t *query_cbk_args = args; - - GF_VALIDATE_OR_GOTO ("ctr", query_cbk_args, out); - - ret = gfdb_write_query_record (query_cbk_args->query_fd, - gfdb_query_record); - if (ret) { - gf_msg ("ctr", GF_LOG_ERROR, 0, - CTR_MSG_FATAL_ERROR, - "Failed to write to query file"); - goto out; - } - - query_cbk_args->count++; - - ret = 0; -out: - return ret; -} - -/* This function does all the db queries related to tiering and - * generates/populates new/existing query file - * inputs: - * xlator_t *this : CTR Translator - * void *conn_node : Database connection - * char *query_file: the query file that needs to be updated - * gfdb_ipc_ctr_params_t *ipc_ctr_params: the query parameters - * Return: - * On success 0 - * On failure -1 - * */ -int -ctr_db_query (xlator_t *this, - void *conn_node, - char *query_file, - gfdb_ipc_ctr_params_t *ipc_ctr_params) -{ - int ret = -1; - ctr_query_cbk_args_t query_cbk_args = {0}; - - GF_VALIDATE_OR_GOTO ("ctr", this, out); - GF_VALIDATE_OR_GOTO (this->name, conn_node, out); - GF_VALIDATE_OR_GOTO (this->name, query_file, out); - GF_VALIDATE_OR_GOTO (this->name, ipc_ctr_params, out); - - /*Query for eligible files from db*/ - query_cbk_args.query_fd = open (query_file, - O_WRONLY | O_CREAT | O_APPEND, - S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); - if (query_cbk_args.query_fd < 0) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CTR_MSG_FATAL_ERROR, - "Failed to open query file %s", query_file); - goto out; - } - if (!ipc_ctr_params->is_promote) { - if (ipc_ctr_params->write_freq_threshold == 0 && - ipc_ctr_params->read_freq_threshold == 0) { - ret = find_unchanged_for_time ( - conn_node, - ctr_db_query_callback, - (void *)&query_cbk_args, - &ipc_ctr_params->time_stamp); - } else { - ret = find_unchanged_for_time_freq ( - conn_node, - ctr_db_query_callback, - (void *)&query_cbk_args, - &ipc_ctr_params->time_stamp, - ipc_ctr_params->write_freq_threshold, - ipc_ctr_params->read_freq_threshold, - _gf_false); - } - } else { - if (ipc_ctr_params->write_freq_threshold == 0 && - ipc_ctr_params->read_freq_threshold == 0) { - ret = find_recently_changed_files ( - conn_node, - ctr_db_query_callback, - (void *)&query_cbk_args, - &ipc_ctr_params->time_stamp); - } else { - ret = find_recently_changed_files_freq ( - conn_node, - ctr_db_query_callback, - (void *)&query_cbk_args, - &ipc_ctr_params->time_stamp, - ipc_ctr_params->write_freq_threshold, - ipc_ctr_params->read_freq_threshold, - _gf_false); - } - } - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_FATAL_ERROR, - "FATAL: query from db failed"); - goto out; - } - - ret = clear_files_heat (conn_node); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_FATAL_ERROR, - "FATAL: Failed to clear db entries"); - goto out; - } - - ret = 0; -out: - - if (!ret) - ret = query_cbk_args.count; - - if (query_cbk_args.query_fd >= 0) { - close (query_cbk_args.query_fd); - query_cbk_args.query_fd = -1; - } - - return ret; -} - - -int -ctr_ipc_helper (xlator_t *this, dict_t *in_dict, - dict_t *out_dict) -{ - int ret = -1; - char *ctr_ipc_ops = NULL; - gf_ctr_private_t *priv = NULL; - char *db_version = NULL; - char *db_param_key = NULL; - char *db_param = NULL; - char *query_file = NULL; - gfdb_ipc_ctr_params_t *ipc_ctr_params = NULL; - - - GF_VALIDATE_OR_GOTO ("ctr", this, out); - GF_VALIDATE_OR_GOTO (this->name, this->private, out); - priv = this->private; - GF_VALIDATE_OR_GOTO (this->name, priv->_db_conn, out); - GF_VALIDATE_OR_GOTO (this->name, in_dict, out); - GF_VALIDATE_OR_GOTO (this->name, out_dict, out); - - GET_DB_PARAM_FROM_DICT(this->name, in_dict, GFDB_IPC_CTR_KEY, - ctr_ipc_ops, out); - - /*if its a db clear operation */ - if (strncmp (ctr_ipc_ops, GFDB_IPC_CTR_CLEAR_OPS, - strlen (GFDB_IPC_CTR_CLEAR_OPS)) == 0) { - - ret = clear_files_heat (priv->_db_conn); - if (ret) - goto out; - - } /* if its a query operation, in which case its query + clear db*/ - else if (strncmp (ctr_ipc_ops, GFDB_IPC_CTR_QUERY_OPS, - strlen (GFDB_IPC_CTR_QUERY_OPS)) == 0) { - - ret = dict_get_str (in_dict, GFDB_IPC_CTR_GET_QFILE_PATH, - &query_file); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, - "Failed extracting query file path"); - goto out; - } - - ret = dict_get_bin (in_dict, GFDB_IPC_CTR_GET_QUERY_PARAMS, - (void *)&ipc_ctr_params); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, - "Failed extracting query parameters"); - goto out; - } - - ret = ctr_db_query (this, priv->_db_conn, query_file, - ipc_ctr_params); - - ret = dict_set_int32 (out_dict, - GFDB_IPC_CTR_RET_QUERY_COUNT, ret); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, - "Failed setting query reply"); - goto out; - } - - } /* if its a query for db version */ - else if (strncmp (ctr_ipc_ops, GFDB_IPC_CTR_GET_DB_VERSION_OPS, - strlen (GFDB_IPC_CTR_GET_DB_VERSION_OPS)) == 0) { - - ret = get_db_version (priv->_db_conn, &db_version); - if (ret == -1 || !db_version) { - gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, - "Failed extracting db version "); - goto out; - } - - SET_DB_PARAM_TO_DICT(this->name, out_dict, - GFDB_IPC_CTR_RET_DB_VERSION, - db_version, ret, error); - - } /* if its a query for a db setting */ - else if (strncmp (ctr_ipc_ops, GFDB_IPC_CTR_GET_DB_PARAM_OPS, - strlen (GFDB_IPC_CTR_GET_DB_PARAM_OPS)) == 0) { - - ret = dict_get_str (in_dict, GFDB_IPC_CTR_GET_DB_KEY, - &db_param_key); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, - "Failed extracting db param key"); - goto out; - } - - ret = get_db_setting (priv->_db_conn, db_param_key, &db_param); - if (ret == -1 || !db_param) { - goto out; - } - - SET_DB_PARAM_TO_DICT(this->name, out_dict, - db_param_key, - db_param, ret, error); - } /* default case */ - else { - goto out; - } - - - ret = 0; - goto out; -error: - GF_FREE (db_param_key); - GF_FREE (db_param); - GF_FREE (db_version); -out: - return ret; -} - - -/* IPC Call from tier migrator to clear the heat on the DB */ -int32_t -ctr_ipc (call_frame_t *frame, xlator_t *this, int32_t op, - dict_t *in_dict) -{ - int ret = -1; - gf_ctr_private_t *priv = NULL; - dict_t *out_dict = NULL; - - GF_ASSERT(this); - priv = this->private; - GF_ASSERT (priv); - GF_ASSERT(priv->_db_conn); - GF_VALIDATE_OR_GOTO (this->name, in_dict, wind); - - - if (op != GF_IPC_TARGET_CTR) - goto wind; - - out_dict = dict_new(); - if (!out_dict) { - goto out; - } - - ret = ctr_ipc_helper (this, in_dict, out_dict); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, - "Failed in ctr_ipc_helper"); - } -out: - - STACK_UNWIND_STRICT (ipc, frame, ret, 0, out_dict); - - if (out_dict) - dict_unref(out_dict); - - return 0; - - wind: - STACK_WIND (frame, default_ipc_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->ipc, op, in_dict); - - - - return 0; -} - - -/******************************************************************************/ - -int -reconfigure (xlator_t *this, dict_t *options) -{ - char *temp_str = NULL; - int ret = 0; - gf_ctr_private_t *priv = NULL; - - priv = this->private; - if (dict_get_str(options, "changetimerecorder.frequency", - &temp_str)) { - gf_msg(this->name, GF_LOG_INFO, 0, CTR_MSG_SET, "set!"); - } - - GF_OPTION_RECONF ("ctr-enabled", priv->enabled, options, - bool, out); - - GF_OPTION_RECONF ("record-counters", priv->ctr_record_counter, options, - bool, out); - - GF_OPTION_RECONF ("ctr-record-metadata-heat", - priv->ctr_record_metadata_heat, options, - bool, out); - - GF_OPTION_RECONF ("ctr_link_consistency", priv->ctr_link_consistency, - options, bool, out); - - GF_OPTION_RECONF ("ctr_inode_heal_expire_period", - priv->ctr_inode_heal_expire_period, - options, uint64, out); - - GF_OPTION_RECONF ("ctr_hardlink_heal_expire_period", - priv->ctr_hardlink_heal_expire_period, - options, uint64, out); - - GF_OPTION_RECONF ("record-exit", priv->ctr_record_unwind, options, - bool, out); - - GF_OPTION_RECONF ("record-entry", priv->ctr_record_wind, options, - bool, out); - -out: - - return ret; -} - -/****************************init********************************************/ - -int32_t -init (xlator_t *this) -{ - gf_ctr_private_t *priv = NULL; - int ret_db = -1; - dict_t *params_dict = NULL; - - GF_VALIDATE_OR_GOTO ("ctr", this, error); - - if (!this->children || this->children->next) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_FATAL_ERROR, - "FATAL: ctr should have exactly one child"); - goto error; - } - - if (!this->parents) { - gf_msg (this->name, GF_LOG_WARNING, 0, - CTR_MSG_DANGLING_VOLUME, - "dangling volume. check volfile "); - } - - priv = GF_CALLOC (1, sizeof (*priv), gf_ctr_mt_private_t); - if (!priv) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - CTR_MSG_CALLOC_FAILED, - "Calloc didnt work!!!"); - goto error; - } - - /*Default values for the translator*/ - priv->ctr_record_wind = _gf_true; - priv->ctr_record_unwind = _gf_false; - priv->ctr_hot_brick = _gf_false; - priv->gfdb_db_type = GFDB_SQLITE3; - priv->gfdb_sync_type = GFDB_DB_SYNC; - priv->enabled = _gf_true; - priv->_db_conn = NULL; - priv->ctr_hardlink_heal_expire_period = - CTR_DEFAULT_HARDLINK_EXP_PERIOD; - priv->ctr_inode_heal_expire_period = - CTR_DEFAULT_INODE_EXP_PERIOD; - - /*Extract ctr xlator options*/ - ret_db = extract_ctr_options (this, priv); - if (ret_db) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_EXTRACT_CTR_XLATOR_OPTIONS_FAILED, - "Failed extracting ctr xlator options"); - goto error; - } - - params_dict = dict_new (); - if (!params_dict) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_INIT_DB_PARAMS_FAILED, - "DB Params cannot initialized!"); - goto error; - } - - /*Extract db params options*/ - ret_db = extract_db_params(this, params_dict, priv->gfdb_db_type); - if (ret_db) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_EXTRACT_DB_PARAM_OPTIONS_FAILED, - "Failed extracting db params options"); - goto error; - } - - /*Create a memory pool for ctr xlator*/ - this->local_pool = mem_pool_new (gf_ctr_local_t, 64); - if (!this->local_pool) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_CREATE_LOCAL_MEMORY_POOL_FAILED, - "failed to create local memory pool"); - goto error; - } - - /*Initialize Database Connection*/ - priv->_db_conn = init_db(params_dict, priv->gfdb_db_type); - if (!priv->_db_conn) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_FATAL_ERROR, - "FATAL: Failed initializing data base"); - goto error; - } - - ret_db = 0; - goto out; - -/*Error handling */ -error: - - if (this) - mem_pool_destroy (this->local_pool); - - if (priv) { - GF_FREE (priv->ctr_db_path); - } - GF_FREE (priv); - - if (params_dict) - dict_unref (params_dict); - - return -1; - -out: - - if (params_dict) - dict_unref (params_dict); - - this->private = (void *)priv; - return 0; -} - -int32_t -mem_acct_init (xlator_t *this) -{ - int ret = -1; - - GF_VALIDATE_OR_GOTO ("ctr", this, out); - - ret = xlator_mem_acct_init (this, gf_ctr_mt_end + 1); - - if (ret != 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_MEM_ACC_INIT_FAILED, "Memory accounting init" - "failed"); - return ret; - } -out: - return ret; -} - - -void -fini (xlator_t *this) -{ - gf_ctr_private_t *priv = NULL; - - priv = this->private; - - if (priv) { - if (fini_db (priv->_db_conn)) { - gf_msg (this->name, GF_LOG_WARNING, 0, - CTR_MSG_CLOSE_DB_CONN_FAILED, "Failed closing " - "db connection"); - } - GF_FREE (priv->ctr_db_path); - } - GF_FREE (priv); - mem_pool_destroy (this->local_pool); - - return; -} - -struct xlator_fops fops = { - /*lookup*/ - .lookup = ctr_lookup, - /*write fops */ - .mknod = ctr_mknod, - .create = ctr_create, - .truncate = ctr_truncate, - .ftruncate = ctr_ftruncate, - .setxattr = ctr_setxattr, - .fsetxattr = ctr_fsetxattr, - .removexattr = ctr_removexattr, - .fremovexattr = ctr_fremovexattr, - .unlink = ctr_unlink, - .link = ctr_link, - .rename = ctr_rename, - .writev = ctr_writev, - .setattr = ctr_setattr, - .fsetattr = ctr_fsetattr, - /*read fops*/ - .readv = ctr_readv, - /* IPC call*/ - .ipc = ctr_ipc -}; - -struct xlator_cbks cbks = { - .forget = ctr_forget -}; - -struct volume_options options[] = { - { .key = {"ctr-enabled",}, - .type = GF_OPTION_TYPE_BOOL, - .value = {"on", "off"}, - .default_value = "off", - .description = "Enables the CTR" - }, - { .key = {"record-entry"}, - .type = GF_OPTION_TYPE_BOOL, - .value = {"on", "off"}, - .default_value = "on" - }, - { .key = {"record-exit"}, - .type = GF_OPTION_TYPE_BOOL, - .value = {"on", "off"}, - .default_value = "off" - }, - { .key = {"record-counters"}, - .type = GF_OPTION_TYPE_BOOL, - .value = {"on", "off"}, - .default_value = "off" - }, - { .key = {"ctr-record-metadata-heat"}, - .type = GF_OPTION_TYPE_BOOL, - .value = {"on", "off"}, - .default_value = "off" - }, - { .key = {"ctr_link_consistency"}, - .type = GF_OPTION_TYPE_BOOL, - .value = {"on", "off"}, - .default_value = "off" - }, - { .key = {"ctr_hardlink_heal_expire_period"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "300" - }, - { .key = {"ctr_inode_heal_expire_period"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "300" - }, - { .key = {"hot-brick"}, - .type = GF_OPTION_TYPE_BOOL, - .value = {"on", "off"}, - .default_value = "off" - }, - { .key = {"db-type"}, - .type = GF_OPTION_TYPE_STR, - .value = {"hashfile", "rocksdb", "changelog", "sqlite3", - "hyperdex"}, - .default_value = "sqlite3" - }, - { .key = {"db-sync"}, - .type = GF_OPTION_TYPE_STR, - .value = {"sync", "async"}, - .default_value = "sync" - }, - { .key = {"db-path"}, - .type = GF_OPTION_TYPE_PATH - }, - { .key = {"db-name"}, - .type = GF_OPTION_TYPE_STR - }, - { .key = {GFDB_SQL_PARAM_SYNC}, - .type = GF_OPTION_TYPE_STR, - .value = {"off", "normal", "full"}, - .default_value = "normal" - }, - { .key = {GFDB_SQL_PARAM_JOURNAL_MODE}, - .type = GF_OPTION_TYPE_STR, - .value = {"delete", "truncate", "persist", "memory", "wal", "off"}, - .default_value = "wal" - }, - { .key = {GFDB_SQL_PARAM_AUTO_VACUUM}, - .type = GF_OPTION_TYPE_STR, - .value = {"off", "full", "incr"}, - .default_value = "off" - }, - { .key = {GFDB_SQL_PARAM_WAL_AUTOCHECK}, - .type = GF_OPTION_TYPE_INT, - .default_value = "1000" - }, - { .key = {GFDB_SQL_PARAM_CACHE_SIZE}, - .type = GF_OPTION_TYPE_INT, - .default_value = "1000" - }, - { .key = {GFDB_SQL_PARAM_PAGE_SIZE}, - .type = GF_OPTION_TYPE_INT, - .default_value = "4096" - }, - { .key = {NULL} }, -}; diff --git a/xlators/features/changetimerecorder/src/changetimerecorder.h b/xlators/features/changetimerecorder/src/changetimerecorder.h deleted file mode 100644 index 2a8bbd18c5b..00000000000 --- a/xlators/features/changetimerecorder/src/changetimerecorder.h +++ /dev/null @@ -1,21 +0,0 @@ -/* - Copyright (c) 2006-2015 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#ifndef __CTR_H -#define __CTR_H - -#include "glusterfs.h" -#include "xlator.h" -#include "logging.h" -#include "common-utils.h" -#include "ctr_mem_types.h" -#include "ctr-helper.h" - -#endif /* __CTR_H */ diff --git a/xlators/features/changetimerecorder/src/ctr-helper.c b/xlators/features/changetimerecorder/src/ctr-helper.c deleted file mode 100644 index ab918eac825..00000000000 --- a/xlators/features/changetimerecorder/src/ctr-helper.c +++ /dev/null @@ -1,305 +0,0 @@ -/* - Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#include "gfdb_sqlite3.h" -#include "ctr-helper.h" -#include "ctr-messages.h" - -/******************************************************************************* - * - * Fill unwind into db record - * - ******************************************************************************/ -int -fill_db_record_for_unwind(xlator_t *this, - gf_ctr_local_t *ctr_local, - gfdb_fop_type_t fop_type, - gfdb_fop_path_t fop_path) -{ - int ret = -1; - gfdb_time_t *ctr_uwtime = NULL; - gf_ctr_private_t *_priv = NULL; - - GF_ASSERT (this); - _priv = this->private; - GF_ASSERT (_priv); - - GF_ASSERT(ctr_local); - - /*If not unwind path error*/ - if (!isunwindpath(fop_path)) { - gf_msg (this->name, GF_LOG_ERROR, 0, CTR_MSG_WRONG_FOP_PATH, - "Wrong fop_path. Should be unwind"); - goto out; - } - - ctr_uwtime = &CTR_DB_REC(ctr_local).gfdb_unwind_change_time; - CTR_DB_REC(ctr_local).gfdb_fop_path = fop_path; - CTR_DB_REC(ctr_local).gfdb_fop_type = fop_type; - - ret = gettimeofday (ctr_uwtime, NULL); - if (ret == -1) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CTR_MSG_FILL_UNWIND_TIME_REC_ERROR, "Error " - "filling unwind time record %s", - strerror(errno)); - goto out; - } - - /* Special case i.e if its a tier rebalance - * + cold tier brick - * + its a create/mknod FOP - * we record unwind time as zero */ - if (ctr_local->client_pid == GF_CLIENT_PID_TIER_DEFRAG - && (!_priv->ctr_hot_brick) - && isdentrycreatefop(fop_type)) { - memset(ctr_uwtime, 0, sizeof(*ctr_uwtime)); - } - ret = 0; -out: - return ret; -} - - -/******************************************************************************* - * - * Fill wind into db record - * - ******************************************************************************/ -int -fill_db_record_for_wind (xlator_t *this, - gf_ctr_local_t *ctr_local, - gf_ctr_inode_context_t *ctr_inode_cx) -{ - int ret = -1; - gfdb_time_t *ctr_wtime = NULL; - gf_ctr_private_t *_priv = NULL; - - GF_ASSERT (this); - _priv = this->private; - GF_ASSERT (_priv); - GF_ASSERT (ctr_local); - IS_CTR_INODE_CX_SANE (ctr_inode_cx); - - /*if not wind path error!*/ - if (!iswindpath(ctr_inode_cx->fop_path)) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_WRONG_FOP_PATH, - "Wrong fop_path. Should be wind"); - goto out; - } - - ctr_wtime = &CTR_DB_REC(ctr_local).gfdb_wind_change_time; - CTR_DB_REC(ctr_local).gfdb_fop_path = ctr_inode_cx->fop_path; - CTR_DB_REC(ctr_local).gfdb_fop_type = ctr_inode_cx->fop_type; - CTR_DB_REC(ctr_local).link_consistency = _priv->ctr_link_consistency; - - ret = gettimeofday (ctr_wtime, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, errno, - CTR_MSG_FILL_UNWIND_TIME_REC_ERROR, - "Error filling wind time record %s", - strerror(errno)); - goto out; - } - - /* Special case i.e if its a tier rebalance - * + cold tier brick - * + its a create/mknod FOP - * we record wind time as zero */ - if (ctr_local->client_pid == GF_CLIENT_PID_TIER_DEFRAG - && (!_priv->ctr_hot_brick) - && isdentrycreatefop(ctr_inode_cx->fop_type)) { - memset(ctr_wtime, 0, sizeof(*ctr_wtime)); - } - - /*Copy gfid into db record*/ - gf_uuid_copy (CTR_DB_REC(ctr_local).gfid, *(ctr_inode_cx->gfid)); - - /*Hard Links*/ - if (isdentryfop(ctr_inode_cx->fop_type)) { - /*new link fop*/ - if (NEW_LINK_CX(ctr_inode_cx)) { - gf_uuid_copy (CTR_DB_REC(ctr_local).pargfid, - *((NEW_LINK_CX(ctr_inode_cx))->pargfid)); - strcpy (CTR_DB_REC(ctr_local).file_name, - NEW_LINK_CX(ctr_inode_cx)->basename); - strcpy (CTR_DB_REC(ctr_local).file_path, - NEW_LINK_CX(ctr_inode_cx)->basepath); - } - /*rename fop*/ - if (OLD_LINK_CX(ctr_inode_cx)) { - gf_uuid_copy (CTR_DB_REC(ctr_local).old_pargfid, - *((OLD_LINK_CX(ctr_inode_cx))->pargfid)); - strcpy (CTR_DB_REC(ctr_local).old_file_name, - OLD_LINK_CX(ctr_inode_cx)->basename); - strcpy (CTR_DB_REC(ctr_local).old_path, - OLD_LINK_CX(ctr_inode_cx)->basepath); - } - } - - ret = 0; -out: - /*On error roll back and clean the record*/ - if (ret == -1) { - CLEAR_CTR_DB_RECORD (ctr_local); - } - return ret; -} - - -/****************************************************************************** - * - * CTR xlator init related functions - * - * - * ****************************************************************************/ -static int -extract_sql_params(xlator_t *this, dict_t *params_dict) -{ - int ret = -1; - char *db_path = NULL; - char *db_name = NULL; - char *db_full_path = NULL; - - GF_ASSERT (this); - GF_ASSERT (params_dict); - - /*Extract the path of the db*/ - db_path = NULL; - GET_DB_PARAM_FROM_DICT_DEFAULT(this->name, this->options, "db-path", - db_path, "/var/run/gluster/"); - - /*Extract the name of the db*/ - db_name = NULL; - GET_DB_PARAM_FROM_DICT_DEFAULT(this->name, this->options, "db-name", - db_name, "gf_ctr_db.db"); - - /*Construct full path of the db*/ - ret = gf_asprintf(&db_full_path, "%s/%s", db_path, db_name); - if (ret < 0) { - gf_msg (GFDB_DATA_STORE, GF_LOG_ERROR, 0, - CTR_MSG_CONSTRUCT_DB_PATH_FAILED, - "Construction of full db path failed!"); - goto out; - } - - /*Setting the SQL DB Path*/ - SET_DB_PARAM_TO_DICT(this->name, params_dict, GFDB_SQL_PARAM_DBPATH, - db_full_path, ret, out); - - /*Extact rest of the sql params*/ - ret = gfdb_set_sql_params(this->name, this->options, params_dict); - if (ret) { - gf_msg (GFDB_DATA_STORE, GF_LOG_ERROR, 0, - CTR_MSG_SET_VALUE_TO_SQL_PARAM_FAILED, - "Failed setting values to sql param dict!"); - } - - ret = 0; - -out: - if (ret) - GF_FREE (db_full_path); - return ret; -} - - - -int extract_db_params(xlator_t *this, dict_t *params_dict, - gfdb_db_type_t db_type) { - - int ret = -1; - - GF_ASSERT (this); - GF_ASSERT (params_dict); - - switch (db_type) { - case GFDB_SQLITE3: - ret = extract_sql_params(this, params_dict); - if (ret) - goto out; - break; - case GFDB_ROCKS_DB: - case GFDB_HYPERDEX: - case GFDB_HASH_FILE_STORE: - case GFDB_INVALID_DB: - case GFDB_DB_END: - ret = -1; - break; - } - ret = 0; -out: - return ret; -} - -int extract_ctr_options (xlator_t *this, gf_ctr_private_t *_priv) { - int ret = -1; - char *_val_str = NULL; - - GF_ASSERT (this); - GF_ASSERT (_priv); - - /*Checking if the CTR Translator is enabled. By default its disabled*/ - _priv->enabled = _gf_false; - GF_OPTION_INIT ("ctr-enabled", _priv->enabled, bool, out); - if (!_priv->enabled) { - gf_msg (GFDB_DATA_STORE, GF_LOG_INFO, 0, - CTR_MSG_XLATOR_DISABLED, - "CTR Xlator is disabled."); - ret = 0; - goto out; - } - - /*Extract db type*/ - GF_OPTION_INIT ("db-type", _val_str, str, out); - _priv->gfdb_db_type = gf_string2gfdbdbtype(_val_str); - - /*Extract flag for record on wind*/ - GF_OPTION_INIT ("record-entry", _priv->ctr_record_wind, bool, out); - - /*Extract flag for record on unwind*/ - GF_OPTION_INIT ("record-exit", _priv->ctr_record_unwind, bool, out); - - /*Extract flag for record on counters*/ - GF_OPTION_INIT ("record-counters", _priv->ctr_record_counter, bool, - out); - - /* Extract flag for record metadata heat */ - GF_OPTION_INIT ("ctr-record-metadata-heat", - _priv->ctr_record_metadata_heat, bool, - out); - - /*Extract flag for link consistency*/ - GF_OPTION_INIT ("ctr_link_consistency", _priv->ctr_link_consistency, - bool, out); - - /*Extract ctr_inode_heal_expire_period */ - GF_OPTION_INIT ("ctr_inode_heal_expire_period", - _priv->ctr_inode_heal_expire_period, - uint64, out); - - /*Extract ctr_hardlink_heal_expire_period*/ - GF_OPTION_INIT ("ctr_hardlink_heal_expire_period", - _priv->ctr_hardlink_heal_expire_period, - uint64, out); - - /*Extract flag for hot tier brick*/ - GF_OPTION_INIT ("hot-brick", _priv->ctr_hot_brick, bool, out); - - /*Extract flag for sync mode*/ - GF_OPTION_INIT ("db-sync", _val_str, str, out); - _priv->gfdb_sync_type = gf_string2gfdbdbsync(_val_str); - - ret = 0; - -out: - return ret; -} diff --git a/xlators/features/changetimerecorder/src/ctr-helper.h b/xlators/features/changetimerecorder/src/ctr-helper.h deleted file mode 100644 index 161fff2d774..00000000000 --- a/xlators/features/changetimerecorder/src/ctr-helper.h +++ /dev/null @@ -1,803 +0,0 @@ -/* - Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#ifndef __CTR_HELPER_H -#define __CTR_HELPER_H - - -#include "xlator.h" -#include "ctr_mem_types.h" -#include "iatt.h" -#include "glusterfs.h" -#include "xlator.h" -#include "defaults.h" -#include "logging.h" -#include "common-utils.h" -#include <time.h> -#include <sys/time.h> - -#include "gfdb_data_store.h" -#include "ctr-xlator-ctx.h" -#include "ctr-messages.h" - -#define CTR_DEFAULT_HARDLINK_EXP_PERIOD 300 /* Five mins */ -#define CTR_DEFAULT_INODE_EXP_PERIOD 300 /* Five mins */ - - -typedef struct ctr_query_cbk_args { - int query_fd; - int count; -} ctr_query_cbk_args_t; - - -/*CTR Xlator Private structure*/ -typedef struct gf_ctr_private { - gf_boolean_t enabled; - char *ctr_db_path; - gf_boolean_t ctr_hot_brick; - gf_boolean_t ctr_record_wind; - gf_boolean_t ctr_record_unwind; - gf_boolean_t ctr_record_counter; - gf_boolean_t ctr_record_metadata_heat; - gf_boolean_t ctr_link_consistency; - gfdb_db_type_t gfdb_db_type; - gfdb_sync_type_t gfdb_sync_type; - gfdb_conn_node_t *_db_conn; - uint64_t ctr_hardlink_heal_expire_period; - uint64_t ctr_inode_heal_expire_period; -} gf_ctr_private_t; - - -/* - * gf_ctr_local_t is the ctr xlator local data structure that is stored in - * the call_frame of each FOP. - * - * gfdb_db_record: The gf_ctr_local contains a gfdb_db_record object, which is - * used by the insert_record() api from the libgfdb. The gfdb_db_record object - * will contain all the inode and hardlink(only for dentry fops: create, - * mknod,link, unlink, rename).The ctr_local is keep alive till the unwind - * call and will be release during the unwind. The same gfdb_db_record will - * used for the unwind insert_record() api, to record unwind in the database. - * - * ia_inode_type in gf_ctr_local will tell the type of the inode. This is - * important for during the unwind path. As we will not have the inode during - * the unwind path. We would have include this in the gfdb_db_record itself - * but currently we record only file inode information. - * - * is_internal_fop in gf_ctr_local will tell us if this is a internal fop and - * take special/no action. We dont record change/acces times or increement heat - * counter for internal fops from rebalancer. - * */ -typedef struct gf_ctr_local { - gfdb_db_record_t gfdb_db_record; - ia_type_t ia_inode_type; - gf_boolean_t is_internal_fop; - gf_client_pid_t client_pid; -} gf_ctr_local_t; -/* - * Easy access of gfdb_db_record of ctr_local - * */ -#define CTR_DB_REC(ctr_local)\ - (ctr_local->gfdb_db_record) - -/*Clear db record*/ -#define CLEAR_CTR_DB_RECORD(ctr_local)\ -do {\ - ctr_local->gfdb_db_record.gfdb_fop_path = GFDB_FOP_INVALID;\ - memset(&(ctr_local->gfdb_db_record.gfdb_wind_change_time),\ - 0, sizeof(gfdb_time_t));\ - memset(&(ctr_local->gfdb_db_record.gfdb_unwind_change_time),\ - 0, sizeof(gfdb_time_t));\ - gf_uuid_clear (ctr_local->gfdb_db_record.gfid);\ - gf_uuid_clear (ctr_local->gfdb_db_record.pargfid);\ - memset(ctr_local->gfdb_db_record.file_name, 0, PATH_MAX);\ - memset(ctr_local->gfdb_db_record.old_file_name, 0, PATH_MAX);\ - ctr_local->gfdb_db_record.gfdb_fop_type = GFDB_FOP_INVALID_OP;\ - ctr_local->ia_inode_type = IA_INVAL;\ -} while (0) - - -static gf_ctr_local_t * -init_ctr_local_t (xlator_t *this) { - - gf_ctr_local_t *ctr_local = NULL; - - GF_ASSERT(this); - - ctr_local = mem_get0 (this->local_pool); - if (!ctr_local) { - gf_msg (GFDB_DATA_STORE, GF_LOG_ERROR, 0, - CTR_MSG_CREATE_CTR_LOCAL_ERROR_WIND, - "Error while creating ctr local"); - goto out; - } - - CLEAR_CTR_DB_RECORD (ctr_local); -out: - return ctr_local; -} - -static void -free_ctr_local (gf_ctr_local_t *ctr_local) -{ - if (ctr_local) - mem_put (ctr_local); -} - - - -/****************************************************************************** - * - * - * Context Carrier Structures - * - * - * ****************************************************************************/ - -/* - * Context Carrier structures are used to carry relavent information about - * inodes and links from the fops calls to the ctr_insert_wind. - * These structure just have pointers to the original data and donot - * do a deep copy of any data. This info is deep copied to - * ctr_local->gfdb_db_record and passed to insert_record() api of libgfdb. This - * info remains persistent for the unwind in ctr_local->gfdb_db_record - * and once used will be destroyed. - * - * gf_ctr_link_context_t : Context structure for hard links - * gf_ctr_inode_context_t : Context structure for inodes - * - * */ - - /*Context Carrier Structure for hard links*/ -typedef struct gf_ctr_link_context { - uuid_t *pargfid; - const char *basename; - /*basepath is redundent. Will go off*/ - const char *basepath; -} gf_ctr_link_context_t; - - /*Context Carrier Structure for inodes*/ -typedef struct gf_ctr_inode_context { - ia_type_t ia_type; - uuid_t *gfid; - gf_ctr_link_context_t *new_link_cx; - gf_ctr_link_context_t *old_link_cx; - gfdb_fop_type_t fop_type; - gfdb_fop_path_t fop_path; - gf_boolean_t is_internal_fop; - /* Indicating metadata fops */ - gf_boolean_t is_metadata_fop; -} gf_ctr_inode_context_t; - - -/*******************Util Macros for Context Carrier Structures*****************/ - -/*Checks if ctr_link_cx is sane!*/ -#define IS_CTR_LINK_CX_SANE(ctr_link_cx)\ -do {\ - if (ctr_link_cx) {\ - if (ctr_link_cx->pargfid)\ - GF_ASSERT (*(ctr_link_cx->pargfid));\ - GF_ASSERT (ctr_link_cx->basename);\ - GF_ASSERT (ctr_link_cx->basepath);\ - };\ -} while (0) - -/*Clear and fill the ctr_link_context with values*/ -#define FILL_CTR_LINK_CX(ctr_link_cx, _pargfid, _basename, _basepath, label)\ -do {\ - GF_VALIDATE_OR_GOTO ("ctr", ctr_link_cx, label);\ - GF_VALIDATE_OR_GOTO ("ctr", _pargfid, label);\ - GF_VALIDATE_OR_GOTO ("ctr", _basename, label);\ - GF_VALIDATE_OR_GOTO ("ctr", _basepath, label);\ - memset (ctr_link_cx, 0, sizeof (*ctr_link_cx));\ - ctr_link_cx->pargfid = &_pargfid;\ - ctr_link_cx->basename = _basename;\ - ctr_link_cx->basepath = _basepath;\ -} while (0) - -#define NEW_LINK_CX(ctr_inode_cx)\ - ctr_inode_cx->new_link_cx\ - -#define OLD_LINK_CX(ctr_inode_cx)\ - ctr_inode_cx->old_link_cx\ - -/*Checks if ctr_inode_cx is sane!*/ -#define IS_CTR_INODE_CX_SANE(ctr_inode_cx)\ -do {\ - GF_ASSERT (ctr_inode_cx);\ - GF_ASSERT (ctr_inode_cx->gfid);\ - GF_ASSERT (*(ctr_inode_cx->gfid));\ - GF_ASSERT (ctr_inode_cx->fop_type != GFDB_FOP_INVALID_OP);\ - GF_ASSERT (ctr_inode_cx->fop_path != GFDB_FOP_INVALID);\ - IS_CTR_LINK_CX_SANE (NEW_LINK_CX(ctr_inode_cx));\ - IS_CTR_LINK_CX_SANE (OLD_LINK_CX(ctr_inode_cx));\ -} while (0) - -/*Clear and fill the ctr_inode_context with values*/ -#define FILL_CTR_INODE_CONTEXT(ctr_inode_cx,\ - _ia_type,\ - _gfid,\ - _new_link_cx,\ - _old_link_cx,\ - _fop_type,\ - _fop_path)\ -do {\ - GF_ASSERT(ctr_inode_cx);\ - GF_ASSERT(_gfid);\ - GF_ASSERT(_fop_type != GFDB_FOP_INVALID_OP);\ - GF_ASSERT(_fop_path != GFDB_FOP_INVALID);\ - memset(ctr_inode_cx, 0, sizeof(*ctr_inode_cx));\ - ctr_inode_cx->ia_type = _ia_type;\ - ctr_inode_cx->gfid = &_gfid;\ - IS_CTR_LINK_CX_SANE(NEW_LINK_CX(ctr_inode_cx));\ - if (_new_link_cx)\ - NEW_LINK_CX(ctr_inode_cx) = _new_link_cx;\ - IS_CTR_LINK_CX_SANE(OLD_LINK_CX(ctr_inode_cx));\ - if (_old_link_cx)\ - OLD_LINK_CX(ctr_inode_cx) = _old_link_cx;\ - ctr_inode_cx->fop_type = _fop_type;\ - ctr_inode_cx->fop_path = _fop_path;\ -} while (0) - -/****************************************************************************** - * - * Util functions or macros used by - * insert wind and insert unwind - * - * ****************************************************************************/ - -/* - * If a bitrot fop - * */ -#define BITROT_FOP(frame)\ - (frame->root->pid == GF_CLIENT_PID_BITD ||\ - frame->root->pid == GF_CLIENT_PID_SCRUB) - - -/* - * If a rebalancer fop - * */ -#define REBALANCE_FOP(frame)\ - (frame->root->pid == GF_CLIENT_PID_DEFRAG) - -/* - * If its a tiering rebalancer fop - * */ -#define TIER_REBALANCE_FOP(frame)\ - (frame->root->pid == GF_CLIENT_PID_TIER_DEFRAG) - -/* - * If its a AFR SELF HEAL - * */ - #define AFR_SELF_HEAL_FOP(frame)\ - (frame->root->pid == GF_CLIENT_PID_AFR_SELF_HEALD) - -/* - * if a rebalancer fop goto - * */ -#define CTR_IF_REBALANCE_FOP_THEN_GOTO(frame, label)\ -do {\ - if (REBALANCE_FOP (frame))\ - goto label;\ -} while (0) - -/* - * Internal fop - * - * */ -static inline -gf_boolean_t is_internal_fop (call_frame_t *frame, - dict_t *xdata) -{ - gf_boolean_t ret = _gf_false; - - GF_ASSERT(frame); - GF_ASSERT(frame->root); - - if (AFR_SELF_HEAL_FOP (frame)) { - ret = _gf_true; - } - if (BITROT_FOP (frame)) { - ret = _gf_true; - } - if (REBALANCE_FOP (frame) || TIER_REBALANCE_FOP (frame)) { - ret = _gf_true; - if (xdata && dict_get (xdata, CTR_ATTACH_TIER_LOOKUP)) { - ret = _gf_false; - } - } - if (xdata && dict_get (xdata, GLUSTERFS_INTERNAL_FOP_KEY)) { - ret = _gf_true; - } - - return ret; -} - -#define CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, dict, label)\ -do {\ - if (is_internal_fop (frame, dict)) \ - goto label; \ -} while (0) - - -/* - * IS CTR Xlator is disabled then goto to label - * */ - #define CTR_IS_DISABLED_THEN_GOTO(this, label)\ - do {\ - gf_ctr_private_t *_priv = NULL;\ - GF_ASSERT (this);\ - GF_ASSERT (this->private);\ - _priv = this->private;\ - if (!_priv->enabled)\ - goto label;\ - } while (0) - -/* - * IS CTR record metadata heat is disabled then goto to label - * */ - #define CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, label)\ - do {\ - gf_ctr_private_t *_priv = NULL;\ - GF_ASSERT (this);\ - GF_ASSERT (this->private);\ - _priv = this->private;\ - if (!_priv->ctr_record_metadata_heat)\ - goto label;\ - } while (0) - -int -fill_db_record_for_unwind (xlator_t *this, - gf_ctr_local_t *ctr_local, - gfdb_fop_type_t fop_type, - gfdb_fop_path_t fop_path); - -int -fill_db_record_for_wind (xlator_t *this, - gf_ctr_local_t *ctr_local, - gf_ctr_inode_context_t *ctr_inode_cx); - -/******************************************************************************* - * CTR INSERT WIND - * ***************************************************************************** - * Function used to insert/update record into the database during a wind fop - * This function creates ctr_local structure into the frame of the fop - * call. - * ****************************************************************************/ -static inline int -ctr_insert_wind (call_frame_t *frame, - xlator_t *this, - gf_ctr_inode_context_t *ctr_inode_cx) -{ - int ret = -1; - gf_ctr_private_t *_priv = NULL; - gf_ctr_local_t *ctr_local = NULL; - - GF_ASSERT(frame); - GF_ASSERT(frame->root); - GF_ASSERT(this); - IS_CTR_INODE_CX_SANE(ctr_inode_cx); - - _priv = this->private; - GF_ASSERT (_priv); - - GF_ASSERT(_priv->_db_conn); - - /*If record_wind option of CTR is on record wind for - * regular files only*/ - if (_priv->ctr_record_wind && ctr_inode_cx->ia_type != IA_IFDIR) { - frame->local = init_ctr_local_t (this); - if (!frame->local) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_CREATE_CTR_LOCAL_ERROR_WIND, - "WIND: Error while creating ctr local"); - goto out; - }; - ctr_local = frame->local; - ctr_local->client_pid = frame->root->pid; - ctr_local->is_internal_fop = ctr_inode_cx->is_internal_fop; - - /* Decide whether to record counters or not */ - CTR_DB_REC(ctr_local).do_record_counters = _gf_false; - /* If record counter is enabled */ - if (_priv->ctr_record_counter) { - /* If not a internal fop */ - if (!(ctr_local->is_internal_fop)) { - /* If its a metadata fop AND - * record metadata heat - * OR - * its NOT a metadata fop */ - if ((ctr_inode_cx->is_metadata_fop - && _priv->ctr_record_metadata_heat) - || - (!ctr_inode_cx->is_metadata_fop)) { - CTR_DB_REC(ctr_local).do_record_counters - = _gf_true; - } - } - } - - /* Decide whether to record times or not - * For non internal FOPS record times as usual*/ - CTR_DB_REC(ctr_local).do_record_times = _gf_false; - if (!ctr_local->is_internal_fop) { - /* If its a metadata fop AND - * record metadata heat - * OR - * its NOT a metadata fop */ - if ((ctr_inode_cx->is_metadata_fop && - _priv->ctr_record_metadata_heat) - || - (!ctr_inode_cx->is_metadata_fop)) { - CTR_DB_REC(ctr_local).do_record_times = - (_priv->ctr_record_wind - || _priv->ctr_record_unwind); - } - } - /* when its a internal FOPS*/ - else { - /* Record times only for create - * i.e when the inode is created */ - CTR_DB_REC(ctr_local).do_record_times = - (isdentrycreatefop(ctr_inode_cx->fop_type)) ? - _gf_true : _gf_false; - } - - /*Fill the db record for insertion*/ - ret = fill_db_record_for_wind (this, ctr_local, ctr_inode_cx); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_FILL_CTR_LOCAL_ERROR_WIND, - "WIND: Error filling ctr local"); - goto out; - } - - /*Insert the db record*/ - ret = insert_record (_priv->_db_conn, - &ctr_local->gfdb_db_record); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_INSERT_RECORD_WIND_FAILED, - "WIND: Inserting of record failed!"); - goto out; - } - } - ret = 0; -out: - - if (ret) { - free_ctr_local (ctr_local); - frame->local = NULL; - } - - return ret; -} - - - - -/******************************************************************************* - * CTR INSERT UNWIND - * ***************************************************************************** - * Function used to insert/update record into the database during a unwind fop - * This function destroys ctr_local structure into the frame of the fop - * call at the end. - * ****************************************************************************/ -static inline int -ctr_insert_unwind (call_frame_t *frame, - xlator_t *this, - gfdb_fop_type_t fop_type, - gfdb_fop_path_t fop_path) -{ - int ret = -1; - gf_ctr_private_t *_priv = NULL; - gf_ctr_local_t *ctr_local = NULL; - - GF_ASSERT(frame); - GF_ASSERT(this); - - _priv = this->private; - GF_ASSERT (_priv); - - GF_ASSERT(_priv->_db_conn); - - ctr_local = frame->local; - - if (ctr_local - && (_priv->ctr_record_unwind || isdentryfop(fop_type)) - && (ctr_local->ia_inode_type != IA_IFDIR)) { - - CTR_DB_REC(ctr_local).do_record_uwind_time = - _priv->ctr_record_unwind; - - ret = fill_db_record_for_unwind(this, ctr_local, fop_type, - fop_path); - if (ret == -1) { - gf_msg(this->name, GF_LOG_ERROR, 0, - CTR_MSG_FILL_CTR_LOCAL_ERROR_UNWIND, - "UNWIND: Error filling ctr local"); - goto out; - } - - ret = insert_record(_priv->_db_conn, - &ctr_local->gfdb_db_record); - if (ret == -1) { - gf_msg(this->name, GF_LOG_ERROR, 0, - CTR_MSG_FILL_CTR_LOCAL_ERROR_UNWIND, - "UNWIND: Error filling ctr local"); - goto out; - } - } - ret = 0; -out: - free_ctr_local (ctr_local); - frame->local = NULL; - return ret; -} - -/******************************* Hard link function ***************************/ - -static inline gf_boolean_t -__is_inode_expired (ctr_xlator_ctx_t *ctr_xlator_ctx, - gf_ctr_private_t *_priv, - gfdb_time_t *current_time) -{ - gf_boolean_t ret = _gf_false; - uint64_t time_diff = 0; - - GF_ASSERT (ctr_xlator_ctx); - GF_ASSERT (_priv); - GF_ASSERT (current_time); - - time_diff = current_time->tv_sec - - ctr_xlator_ctx->inode_heal_period; - - ret = (time_diff >= _priv->ctr_inode_heal_expire_period) ? - _gf_true : _gf_false; - return ret; -} - -static inline gf_boolean_t -__is_hardlink_expired (ctr_hard_link_t *ctr_hard_link, - gf_ctr_private_t *_priv, - gfdb_time_t *current_time) -{ - gf_boolean_t ret = _gf_false; - uint64_t time_diff = 0; - - GF_ASSERT (ctr_hard_link); - GF_ASSERT (_priv); - GF_ASSERT (current_time); - - time_diff = current_time->tv_sec - - ctr_hard_link->hardlink_heal_period; - - ret = ret || (time_diff >= _priv->ctr_hardlink_heal_expire_period) ? - _gf_true : _gf_false; - - return ret; -} - - -/* Return values of heal*/ -typedef enum ctr_heal_ret_val { - CTR_CTX_ERROR = -1, - /* No healing required */ - CTR_TRY_NO_HEAL = 0, - /* Try healing hard link */ - CTR_TRY_HARDLINK_HEAL = 1, - /* Try healing inode */ - CTR_TRY_INODE_HEAL = 2, -} ctr_heal_ret_val_t; - - - -/** - * @brief Function to add hard link to the inode context variable. - * The inode context maintainences a in-memory list. This is used - * smart healing of database. - * @param frame of the FOP - * @param this is the Xlator instant - * @param inode - * @return Return ctr_heal_ret_val_t - */ - -static inline ctr_heal_ret_val_t -add_hard_link_ctx (call_frame_t *frame, - xlator_t *this, - inode_t *inode) -{ - ctr_heal_ret_val_t ret_val = CTR_TRY_NO_HEAL; - int ret = -1; - gf_ctr_local_t *ctr_local = NULL; - ctr_xlator_ctx_t *ctr_xlator_ctx = NULL; - ctr_hard_link_t *ctr_hard_link = NULL; - gf_ctr_private_t *_priv = NULL; - gfdb_time_t current_time = {0}; - - - GF_ASSERT (frame); - GF_ASSERT (this); - GF_ASSERT (inode); - GF_ASSERT (this->private); - - _priv = this->private; - - ctr_local = frame->local; - if (!ctr_local) { - goto out; - } - - ctr_xlator_ctx = init_ctr_xlator_ctx (this, inode); - if (!ctr_xlator_ctx) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_ACCESS_CTR_INODE_CONTEXT_FAILED, - "Failed accessing ctr inode context"); - goto out; - } - - LOCK (&ctr_xlator_ctx->lock); - - /* Check if the hard link already exists - * in the ctr inode context*/ - ctr_hard_link = ctr_search_hard_link_ctx (this, - ctr_xlator_ctx, - CTR_DB_REC(ctr_local).pargfid, - CTR_DB_REC(ctr_local).file_name); - /* if there then ignore */ - if (ctr_hard_link) { - - ret = gettimeofday (¤t_time, NULL); - if (ret == -1) { - gf_log (this->name, GF_LOG_ERROR, - "Failed to get current time"); - ret_val = CTR_CTX_ERROR; - goto unlock; - } - - if (__is_hardlink_expired (ctr_hard_link, - _priv, ¤t_time)) { - ctr_hard_link->hardlink_heal_period = - current_time.tv_sec; - ret_val = ret_val | CTR_TRY_HARDLINK_HEAL; - } - - if (__is_inode_expired (ctr_xlator_ctx, - _priv, ¤t_time)) { - ctr_xlator_ctx->inode_heal_period = - current_time.tv_sec; - ret_val = ret_val | CTR_TRY_INODE_HEAL; - } - - goto unlock; - } - - /* Add the hard link to the list*/ - ret = ctr_add_hard_link (this, ctr_xlator_ctx, - CTR_DB_REC(ctr_local).pargfid, - CTR_DB_REC(ctr_local).file_name); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_ADD_HARDLINK_TO_CTR_INODE_CONTEXT_FAILED, - "Failed to add hardlink to the ctr inode context"); - ret_val = CTR_CTX_ERROR; - goto unlock; - } - - ret_val = CTR_TRY_NO_HEAL; -unlock: - UNLOCK (&ctr_xlator_ctx->lock); -out: - return ret_val; -} - -static inline int -delete_hard_link_ctx (call_frame_t *frame, - xlator_t *this, - inode_t *inode) -{ - int ret = -1; - ctr_xlator_ctx_t *ctr_xlator_ctx = NULL; - gf_ctr_local_t *ctr_local = NULL; - - GF_ASSERT (frame); - GF_ASSERT (this); - GF_ASSERT (inode); - - ctr_local = frame->local; - if (!ctr_local) { - goto out; - } - - ctr_xlator_ctx = get_ctr_xlator_ctx (this, inode); - if (!ctr_xlator_ctx) { - /* Since there is no ctr inode context so nothing more to do */ - ret = 0; - goto out; - } - - ret = ctr_delete_hard_link (this, ctr_xlator_ctx, - CTR_DB_REC(ctr_local).pargfid, - CTR_DB_REC(ctr_local).file_name); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_DELETE_HARDLINK_FAILED, - "Failed to delete hard link"); - goto out; - } - - ret = 0; - -out: - return ret; -} - -static inline int -update_hard_link_ctx (call_frame_t *frame, - xlator_t *this, - inode_t *inode) -{ - int ret = -1; - ctr_xlator_ctx_t *ctr_xlator_ctx = NULL; - gf_ctr_local_t *ctr_local = NULL; - - GF_ASSERT (frame); - GF_ASSERT (this); - GF_ASSERT (inode); - - ctr_local = frame->local; - if (!ctr_local) { - goto out; - } - - ctr_xlator_ctx = init_ctr_xlator_ctx (this, inode); - if (!ctr_xlator_ctx) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_ACCESS_CTR_INODE_CONTEXT_FAILED, - "Failed accessing ctr inode context"); - goto out; - } - - ret = ctr_update_hard_link (this, ctr_xlator_ctx, - CTR_DB_REC(ctr_local).pargfid, - CTR_DB_REC(ctr_local).file_name, - CTR_DB_REC(ctr_local).old_pargfid, - CTR_DB_REC(ctr_local).old_file_name); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_DELETE_HARDLINK_FAILED, - "Failed to delete hard link"); - goto out; - } - - ret = 0; - -out: - return ret; -} - - -/****************************************************************************** - * - * CTR xlator init related functions - * - * - * ****************************************************************************/ -int -extract_db_params (xlator_t *this, - dict_t *params_dict, - gfdb_db_type_t db_type); - -int -extract_ctr_options (xlator_t *this, - gf_ctr_private_t *_priv); - -#endif diff --git a/xlators/features/changetimerecorder/src/ctr-xlator-ctx.c b/xlators/features/changetimerecorder/src/ctr-xlator-ctx.c deleted file mode 100644 index c5900eac929..00000000000 --- a/xlators/features/changetimerecorder/src/ctr-xlator-ctx.c +++ /dev/null @@ -1,409 +0,0 @@ -/* - Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#include "ctr-xlator-ctx.h" -#include "ctr-messages.h" -#include <time.h> -#include <sys/time.h> - -#define IS_THE_ONLY_HARDLINK(ctr_hard_link)\ - (ctr_hard_link->list.next == ctr_hard_link->list.prev) - - -static void -fini_ctr_hard_link (ctr_hard_link_t **ctr_hard_link) { - - GF_ASSERT (ctr_hard_link); - - if (*ctr_hard_link) - return; - GF_FREE ((*ctr_hard_link)->base_name); - GF_FREE (*ctr_hard_link); - *ctr_hard_link = NULL; -} - - -/* Please lock the ctr_xlator_ctx before using this function */ -ctr_hard_link_t * -ctr_search_hard_link_ctx (xlator_t *this, - ctr_xlator_ctx_t *ctr_xlator_ctx, - uuid_t pgfid, - const char *base_name) -{ - ctr_hard_link_t *_hard_link = NULL; - ctr_hard_link_t *searched_hardlink = NULL; - - GF_ASSERT (this); - GF_ASSERT (ctr_xlator_ctx); - - if (pgfid == NULL || base_name == NULL) - goto out; - - /*linear search*/ - list_for_each_entry (_hard_link, &ctr_xlator_ctx->hardlink_list, list) { - if (gf_uuid_compare (_hard_link->pgfid, pgfid) == 0 - && _hard_link->base_name - && strcmp(_hard_link->base_name, base_name) == 0) { - searched_hardlink = _hard_link; - break; - } - } - -out: - return searched_hardlink; -} - - - - -/* Please lock the ctr_xlator_ctx before using this function */ -int -ctr_add_hard_link (xlator_t *this, - ctr_xlator_ctx_t *ctr_xlator_ctx, - uuid_t pgfid, - const char *base_name) -{ - int ret = -1; - ctr_hard_link_t *ctr_hard_link = NULL; - struct timeval current_time = {0}; - - GF_ASSERT (this); - GF_ASSERT (ctr_xlator_ctx); - - if (pgfid == NULL || base_name == NULL) - goto out; - - ctr_hard_link = GF_CALLOC (1, sizeof (*ctr_hard_link), - gf_ctr_mt_hard_link_t); - if (!ctr_hard_link) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - CTR_MSG_CALLOC_FAILED, "Failed allocating " - "ctr_hard_link"); - goto out; - } - - /*Initialize the ctr_hard_link object and - * Assign the values : parent GFID and basename*/ - INIT_LIST_HEAD (&ctr_hard_link->list); - gf_uuid_copy (ctr_hard_link->pgfid, pgfid); - ret = gf_asprintf(&ctr_hard_link->base_name, "%s", base_name); - if (ret < 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_COPY_FAILED, "Failed copying basename" - "to ctr_hard_link"); - goto error; - } - - ret = gettimeofday (¤t_time, NULL); - if (ret == -1) { - gf_log (this->name, GF_LOG_ERROR, - "Failed to get current time"); - goto error; - } - - /*Add the hard link to the list*/ - list_add_tail (&ctr_hard_link->list, - &ctr_xlator_ctx->hardlink_list); - - ctr_hard_link->hardlink_heal_period = current_time.tv_sec; - - /*aal izz well!*/ - ret = 0; - goto out; -error: - GF_FREE (ctr_hard_link); -out: - return ret; -} - -static void -__delete_hard_link_from_list (ctr_hard_link_t **ctr_hard_link) -{ - GF_ASSERT (ctr_hard_link); - GF_ASSERT (*ctr_hard_link); - - /*Remove hard link from list*/ - list_del(&(*ctr_hard_link)->list); - fini_ctr_hard_link (ctr_hard_link); -} - - -int -ctr_delete_hard_link (xlator_t *this, - ctr_xlator_ctx_t *ctr_xlator_ctx, - uuid_t pgfid, - const char *base_name) -{ - int ret = -1; - ctr_hard_link_t *ctr_hard_link = NULL; - - GF_ASSERT (this); - GF_ASSERT (ctr_xlator_ctx); - - - LOCK (&ctr_xlator_ctx->lock); - - /*Check if the hard link is present */ - ctr_hard_link = ctr_search_hard_link_ctx (this, ctr_xlator_ctx, - pgfid, base_name); - if (!ctr_hard_link) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_HARDLINK_MISSING_IN_LIST, - "Hard link doesnt exist in the list"); - goto out; - } - - __delete_hard_link_from_list (&ctr_hard_link); - ctr_hard_link = NULL; - - ret = 0; -out: - UNLOCK (&ctr_xlator_ctx->lock); - - return ret; -} - - - - -int -ctr_update_hard_link (xlator_t *this, - ctr_xlator_ctx_t *ctr_xlator_ctx, - uuid_t pgfid, - const char *base_name, - uuid_t old_pgfid, - const char *old_base_name) -{ - int ret = -1; - ctr_hard_link_t *ctr_hard_link = NULL; - struct timeval current_time = {0}; - - GF_ASSERT (this); - GF_ASSERT (ctr_xlator_ctx); - - - LOCK (&ctr_xlator_ctx->lock); - - /*Check if the hard link is present */ - ctr_hard_link = ctr_search_hard_link_ctx (this, ctr_xlator_ctx, - old_pgfid, old_base_name); - if (!ctr_hard_link) { - gf_msg_trace (this->name, 0, "Hard link doesnt exist" - " in the list"); - /* Since the hard link is not present in the list - * we add it to the list */ - ret = ctr_add_hard_link (this, ctr_xlator_ctx, - pgfid, base_name); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_ADD_HARDLINK_TO_LIST_FAILED, - "Failed adding hard link to the list"); - goto out; - } - ret = 0; - goto out; - } - - /* update the hard link */ - gf_uuid_copy (ctr_hard_link->pgfid, pgfid); - GF_FREE (ctr_hard_link->base_name); - ret = gf_asprintf(&ctr_hard_link->base_name, "%s", base_name); - if (ret < 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - CTR_MSG_COPY_FAILED, "Failed copying basename" - "to ctr_hard_link"); - /* delete the corrupted entry */ - __delete_hard_link_from_list (&ctr_hard_link); - ctr_hard_link = NULL; - goto out; - } - - ret = gettimeofday (¤t_time, NULL); - if (ret == -1) { - gf_log (this->name, GF_LOG_ERROR, - "Failed to get current time"); - ctr_hard_link->hardlink_heal_period = 0; - } else { - ctr_hard_link->hardlink_heal_period = current_time.tv_sec; - } - - ret = 0; - -out: - UNLOCK (&ctr_xlator_ctx->lock); - - return ret; -} - - - - -/* Delete all hardlinks */ -static int -ctr_delete_all_hard_link (xlator_t *this, - ctr_xlator_ctx_t *ctr_xlator_ctx) -{ - int ret = -1; - ctr_hard_link_t *ctr_hard_link = NULL; - ctr_hard_link_t *tmp = NULL; - - GF_ASSERT (ctr_xlator_ctx); - - LOCK (&ctr_xlator_ctx->lock); - - list_for_each_entry_safe(ctr_hard_link, tmp, - &ctr_xlator_ctx->hardlink_list, list) - { - /*Remove hard link from list*/ - __delete_hard_link_from_list (&ctr_hard_link); - ctr_hard_link = NULL; - - } - - - UNLOCK (&ctr_xlator_ctx->lock); - - ret = 0; - - return ret; -} - - -/* Please lock the inode before using this function */ -static ctr_xlator_ctx_t * -__get_ctr_xlator_ctx (xlator_t *this, - inode_t *inode) -{ - int ret = 0; - uint64_t _addr = 0; - ctr_xlator_ctx_t *ctr_xlator_ctx = NULL; - - GF_ASSERT (this); - GF_ASSERT (inode); - - ret = __inode_ctx_get (inode, this, &_addr); - if (ret < 0) - _addr = 0; - if (_addr != 0) { - ctr_xlator_ctx = (ctr_xlator_ctx_t *) (long)_addr; - } - - return ctr_xlator_ctx; -} - - -ctr_xlator_ctx_t * -init_ctr_xlator_ctx (xlator_t *this, - inode_t *inode) -{ - int ret = -1; - uint64_t _addr = 0; - ctr_xlator_ctx_t *ctr_xlator_ctx = NULL; - struct timeval current_time = {0}; - - GF_ASSERT (this); - GF_ASSERT (inode); - - LOCK (&inode->lock); - { - ctr_xlator_ctx = __get_ctr_xlator_ctx (this, inode); - if (ctr_xlator_ctx) { - ret = 0; - goto out; - } - ctr_xlator_ctx = GF_CALLOC (1, sizeof (*ctr_xlator_ctx), - gf_ctr_mt_xlator_ctx); - if (!ctr_xlator_ctx) - goto out; - - ret = LOCK_INIT (&ctr_xlator_ctx->lock); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, ret, - CTR_MSG_INIT_LOCK_FAILED, - "Failed init lock %s", strerror(ret)); - goto out; - } - _addr = (uint64_t) ctr_xlator_ctx; - - ret = __inode_ctx_set (inode, this, &_addr); - if (ret) { - goto out; - } - - INIT_LIST_HEAD (&ctr_xlator_ctx->hardlink_list); - - ret = gettimeofday (¤t_time, NULL); - if (ret == -1) { - gf_log (this->name, GF_LOG_ERROR, - "Failed to get current time"); - goto out; - } - - ctr_xlator_ctx->inode_heal_period = current_time.tv_sec; - } - ret = 0; -out: - if (ret) { - GF_FREE (ctr_xlator_ctx); - ctr_xlator_ctx = NULL; - } - - UNLOCK (&inode->lock); - - return ctr_xlator_ctx; -} - - - - -void -fini_ctr_xlator_ctx (xlator_t *this, - inode_t *inode) -{ - int ret = 0; - uint64_t _addr = 0; - ctr_xlator_ctx_t *ctr_xlator_ctx = NULL; - - - inode_ctx_del (inode, this, &_addr); - if (!_addr) - return; - - ctr_xlator_ctx = (ctr_xlator_ctx_t *) (long) _addr; - - ret = ctr_delete_all_hard_link (this, ctr_xlator_ctx); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING , 0, - CTR_MSG_DELETE_HARDLINK_FAILED, "Failed deleting all " - "hard links from inode context"); - } - - LOCK_DESTROY (&ctr_xlator_ctx->lock); - - GF_FREE (ctr_xlator_ctx); - -} - - - - -ctr_xlator_ctx_t * -get_ctr_xlator_ctx (xlator_t *this, - inode_t *inode) -{ - ctr_xlator_ctx_t *ctr_xlator_ctx = NULL; - - LOCK (&inode->lock); - ctr_xlator_ctx = __get_ctr_xlator_ctx (this, inode); - UNLOCK (&inode->lock); - - return ctr_xlator_ctx; -} - diff --git a/xlators/features/changetimerecorder/src/ctr-xlator-ctx.h b/xlators/features/changetimerecorder/src/ctr-xlator-ctx.h deleted file mode 100644 index 7f1c6cb1712..00000000000 --- a/xlators/features/changetimerecorder/src/ctr-xlator-ctx.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#ifndef __CTR_XLATOR_CTX_H -#define __CTR_XLATOR_CTX_H - -#include "xlator.h" -#include "ctr_mem_types.h" -#include "iatt.h" -#include "glusterfs.h" -#include "xlator.h" -#include "logging.h" -#include "locking.h" -#include "common-utils.h" -#include <time.h> -#include <sys/time.h> - -typedef struct ctr_hard_link { - uuid_t pgfid; - char *base_name; - /* Hardlink expiry : Defines the expiry period after which a - * database heal is attempted. */ - uint64_t hardlink_heal_period; - struct list_head list; -} ctr_hard_link_t; - -typedef struct ctr_xlator_ctx { - /* This represents the looked up hardlinks - * NOTE: This doesn't represent all physical hardlinks of the inode*/ - struct list_head hardlink_list; - uint64_t inode_heal_period; - gf_lock_t lock; -} ctr_xlator_ctx_t; - - -ctr_hard_link_t * -ctr_search_hard_link_ctx (xlator_t *this, - ctr_xlator_ctx_t *ctr_xlator_ctx, - uuid_t pgfid, - const char *base_name); - - -int -ctr_add_hard_link (xlator_t *this, - ctr_xlator_ctx_t *ctr_xlator_ctx, - uuid_t pgfid, - const char *base_name); - - - -int -ctr_delete_hard_link (xlator_t *this, - ctr_xlator_ctx_t *ctr_xlator_ctx, - uuid_t pgfid, - const char *base_name); - - -int -ctr_update_hard_link (xlator_t *this, - ctr_xlator_ctx_t *ctr_xlator_ctx, - uuid_t pgfid, - const char *base_name, - uuid_t old_pgfid, - const char *old_base_name); - - -ctr_xlator_ctx_t * -get_ctr_xlator_ctx (xlator_t *this, - inode_t *inode); - - - - -ctr_xlator_ctx_t * -init_ctr_xlator_ctx (xlator_t *this, - inode_t *inode); - - -void -fini_ctr_xlator_ctx (xlator_t *this, - inode_t *inode); - -#endif diff --git a/xlators/features/changetimerecorder/Makefile.am b/xlators/features/cloudsync/Makefile.am index a985f42a877..a985f42a877 100644 --- a/xlators/features/changetimerecorder/Makefile.am +++ b/xlators/features/cloudsync/Makefile.am diff --git a/xlators/features/cloudsync/src/Makefile.am b/xlators/features/cloudsync/src/Makefile.am new file mode 100644 index 00000000000..e2a277e372b --- /dev/null +++ b/xlators/features/cloudsync/src/Makefile.am @@ -0,0 +1,46 @@ +SUBDIRS = cloudsync-plugins + +xlator_LTLIBRARIES = cloudsync.la + +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features + +cloudsync_sources = cloudsync.c + +CLOUDSYNC_SRC = $(top_srcdir)/xlators/features/cloudsync/src +CLOUDSYNC_BLD = $(top_builddir)/xlators/features/cloudsync/src + +cloudsynccommon_sources = $(CLOUDSYNC_SRC)/cloudsync-common.c + +noinst_HEADERS = $(CLOUDSYNC_BLD)/cloudsync.h \ + $(CLOUDSYNC_BLD)/cloudsync-mem-types.h \ + $(CLOUDSYNC_BLD)/cloudsync-messages.h \ + $(CLOUDSYNC_BLD)/cloudsync-common.h + +cloudsync_la_SOURCES = $(cloudsync_sources) $(cloudsynccommon_sources) + +nodist_cloudsync_la_SOURCES = cloudsync-autogen-fops.c cloudsync-autogen-fops.h +BUILT_SOURCES = cloudsync-autogen-fops.h + +cloudsync_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) + +cloudsync_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(LIB_DL) + +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \ + -DCS_PLUGINDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/cloudsync-plugins\" +AM_CFLAGS = -Wall -fno-strict-aliasing $(GF_CFLAGS) + +noinst_PYTHON = cloudsync-fops-c.py cloudsync-fops-h.py +EXTRA_DIST = cloudsync-autogen-fops-tmpl.c cloudsync-autogen-fops-tmpl.h + +cloudsync-autogen-fops.c: cloudsync-fops-c.py cloudsync-autogen-fops-tmpl.c + $(PYTHON) $(CLOUDSYNC_SRC)/cloudsync-fops-c.py \ + $(CLOUDSYNC_SRC)/cloudsync-autogen-fops-tmpl.c > $@ + +cloudsync-autogen-fops.h: cloudsync-fops-h.py cloudsync-autogen-fops-tmpl.h + $(PYTHON) $(CLOUDSYNC_SRC)/cloudsync-fops-h.py \ + $(CLOUDSYNC_SRC)/cloudsync-autogen-fops-tmpl.h > $@ + +CLEANFILES = $(nodist_cloudsync_la_SOURCES) + +uninstall-local: + rm -f $(DESTDIR)$(xlatordir)/cloudsync.so diff --git a/xlators/features/cloudsync/src/cloudsync-autogen-fops-tmpl.c b/xlators/features/cloudsync/src/cloudsync-autogen-fops-tmpl.c new file mode 100644 index 00000000000..ee63f983980 --- /dev/null +++ b/xlators/features/cloudsync/src/cloudsync-autogen-fops-tmpl.c @@ -0,0 +1,30 @@ +/* + Copyright (c) 2008-2015 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +/* File: cloudsync-autogen-fops-tmpl.c + * This file contains the CLOUDSYNC autogenerated FOPs. This is run through + * the code generator, generator.py to generate the required FOPs. + */ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include <dlfcn.h> + +#include <glusterfs/glusterfs.h> +#include <glusterfs/xlator.h> +#include <glusterfs/defaults.h> +#include "cloudsync.h" +#include "cloudsync-common.h" +#include <glusterfs/call-stub.h> + +#pragma generate diff --git a/xlators/features/changetimerecorder/src/ctr_mem_types.h b/xlators/features/cloudsync/src/cloudsync-autogen-fops-tmpl.h index f408c028e24..d922c77d8aa 100644 --- a/xlators/features/changetimerecorder/src/ctr_mem_types.h +++ b/xlators/features/cloudsync/src/cloudsync-autogen-fops-tmpl.h @@ -8,17 +8,17 @@ cases as published by the Free Software Foundation. */ +/* File: clousync-autogen-fops-tmpl.h + * This file contains the cloudsync autogenerated FOPs declarations. + */ -#ifndef __CTR_MEM_TYPES_H__ -#define __CTR_MEM_TYPES_H__ +#ifndef _CLOUDSYNC_AUTOGEN_FOPS_H +#define _CLOUDSYNC_AUTOGEN_FOPS_H -#include "gfdb_mem-types.h" +#include <glusterfs/xlator.h> +#include "cloudsync.h" +#include "cloudsync-common.h" -enum gf_ctr_mem_types_ { - gf_ctr_mt_private_t = gfdb_mt_end + 1, - gf_ctr_mt_xlator_ctx, - gf_ctr_mt_hard_link_t, - gf_ctr_mt_end -}; -#endif +#pragma generate +#endif /* _CLOUDSYNC_AUTOGEN_FOPS_H */ diff --git a/xlators/features/cloudsync/src/cloudsync-common.c b/xlators/features/cloudsync/src/cloudsync-common.c new file mode 100644 index 00000000000..445a31b90e7 --- /dev/null +++ b/xlators/features/cloudsync/src/cloudsync-common.c @@ -0,0 +1,60 @@ +/* + Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include "cloudsync-common.h" + +void +cs_xattrinfo_wipe(cs_local_t *local) +{ + if (local->xattrinfo.lxattr) { + if (local->xattrinfo.lxattr->file_path) + GF_FREE(local->xattrinfo.lxattr->file_path); + + if (local->xattrinfo.lxattr->volname) + GF_FREE(local->xattrinfo.lxattr->volname); + + GF_FREE(local->xattrinfo.lxattr); + } +} + +void +cs_local_wipe(xlator_t *this, cs_local_t *local) +{ + if (!local) + return; + + loc_wipe(&local->loc); + + if (local->fd) { + fd_unref(local->fd); + local->fd = NULL; + } + + if (local->stub) { + call_stub_destroy(local->stub); + local->stub = NULL; + } + + if (local->xattr_req) + dict_unref(local->xattr_req); + + if (local->xattr_rsp) + dict_unref(local->xattr_rsp); + + if (local->dlfd) + fd_unref(local->dlfd); + + if (local->remotepath) + GF_FREE(local->remotepath); + + cs_xattrinfo_wipe(local); + + mem_put(local); +} diff --git a/xlators/features/cloudsync/src/cloudsync-common.h b/xlators/features/cloudsync/src/cloudsync-common.h new file mode 100644 index 00000000000..11d233460a4 --- /dev/null +++ b/xlators/features/cloudsync/src/cloudsync-common.h @@ -0,0 +1,134 @@ +/* + Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _CLOUDSYNC_COMMON_H +#define _CLOUDSYNC_COMMON_H + +#include <glusterfs/glusterfs.h> +#include <glusterfs/call-stub.h> +#include <glusterfs/xlator.h> +#include <glusterfs/syncop.h> +#include <glusterfs/compat-errno.h> +#include "cloudsync-mem-types.h" +#include "cloudsync-messages.h" + +typedef struct cs_loc_xattr { + char *file_path; + uuid_t uuid; + uuid_t gfid; + char *volname; +} cs_loc_xattr_t; + +typedef struct cs_size_xattr { + uint64_t size; + uint64_t blksize; + uint64_t blocks; +} cs_size_xattr_t; + +typedef struct cs_local { + loc_t loc; + fd_t *fd; + call_stub_t *stub; + call_frame_t *main_frame; + int op_errno; + int op_ret; + fd_t *dlfd; + off_t dloffset; + struct iatt stbuf; + dict_t *xattr_rsp; + dict_t *xattr_req; + glusterfs_fop_t fop; + gf_boolean_t locked; + int call_cnt; + inode_t *inode; + char *remotepath; + + struct { + /* offset, flags and size are the information needed + * by read fop for remote read operation. These will be + * populated in cloudsync read fop, before being passed + * on to the plugin performing remote read. + */ + off_t offset; + uint32_t flags; + size_t size; + cs_loc_xattr_t *lxattr; + } xattrinfo; + +} cs_local_t; + +typedef int (*fop_download_t)(call_frame_t *frame, void *config); + +typedef int (*fop_remote_read_t)(call_frame_t *, void *); + +typedef void *(*store_init)(xlator_t *this); + +typedef int (*store_reconfigure)(xlator_t *this, dict_t *options); + +typedef void (*store_fini)(void *config); + +struct cs_remote_stores { + char *name; /* store name */ + void *config; /* store related information */ + fop_download_t dlfop; /* store specific download function */ + fop_remote_read_t rdfop; /* store specific read function */ + store_init init; /* store init to initialize store config */ + store_reconfigure reconfigure; /* reconfigure store config */ + store_fini fini; + void *handle; /* shared library handle*/ +}; + +typedef struct cs_private { + xlator_t *this; + struct cs_remote_stores *stores; + gf_boolean_t abortdl; + pthread_spinlock_t lock; + gf_boolean_t remote_read; +} cs_private_t; + +void +cs_local_wipe(xlator_t *this, cs_local_t *local); + +void +cs_xattrinfo_wipe(cs_local_t *local); + +#define CS_STACK_UNWIND(fop, frame, params...) \ + do { \ + cs_local_t *__local = NULL; \ + xlator_t *__xl = NULL; \ + if (frame) { \ + __xl = frame->this; \ + __local = frame->local; \ + frame->local = NULL; \ + } \ + STACK_UNWIND_STRICT(fop, frame, params); \ + cs_local_wipe(__xl, __local); \ + } while (0) + +#define CS_STACK_DESTROY(frame) \ + do { \ + cs_local_t *__local = NULL; \ + xlator_t *__xl = NULL; \ + __xl = frame->this; \ + __local = frame->local; \ + frame->local = NULL; \ + STACK_DESTROY(frame->root); \ + cs_local_wipe(__xl, __local); \ + } while (0) + +typedef struct store_methods { + int (*fop_download)(call_frame_t *frame, void *config); + int (*fop_remote_read)(call_frame_t *, void *); + /* return type should be the store config */ + void *(*fop_init)(xlator_t *this); + int (*fop_reconfigure)(xlator_t *this, dict_t *options); + void (*fop_fini)(void *config); +} store_methods_t; + +#endif /* _CLOUDSYNC_COMMON_H */ diff --git a/xlators/features/cloudsync/src/cloudsync-fops-c.py b/xlators/features/cloudsync/src/cloudsync-fops-c.py new file mode 100755 index 00000000000..c27df97ae58 --- /dev/null +++ b/xlators/features/cloudsync/src/cloudsync-fops-c.py @@ -0,0 +1,324 @@ +#!/usr/bin/python3 + +from __future__ import print_function +import os +import sys + +curdir = os.path.dirname(sys.argv[0]) +gendir = os.path.join(curdir, '../../../../libglusterfs/src') +sys.path.append(gendir) +from generator import ops, fop_subs, cbk_subs, generate + +FD_DATA_MODIFYING_OP_FOP_TEMPLATE = """ +int32_t +cs_@NAME@ (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@) +{ + int op_errno = EINVAL ; + cs_local_t *local = NULL; + int ret = 0; + cs_inode_ctx_t *ctx = NULL; + gf_cs_obj_state state = -1; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + local = cs_local_init (this, frame, NULL, fd, GF_FOP_@UPNAME@); + if (!local) { + + gf_msg (this->name, GF_LOG_ERROR, 0, 0, "local init failed"); + op_errno = ENOMEM; + goto err; + } + + __cs_inode_ctx_get (this, fd->inode, &ctx); + + if (ctx) + state = __cs_get_file_state (fd->inode, ctx); + else + state = GF_CS_LOCAL; + + xdata = xdata ? dict_ref (xdata) : dict_new (); + + if (!xdata) { + gf_msg (this->name, GF_LOG_ERROR, 0, 0, "insufficient memory"); + op_errno = ENOMEM; + goto err; + } + + local->xattr_req = xdata; + + ret = dict_set_uint32 (local->xattr_req, GF_CS_OBJECT_STATUS, 1); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, 0, "dict_set failed key:" + " %s", GF_CS_OBJECT_STATUS); + goto err; + } + + local->stub = fop_@NAME@_stub (frame, cs_resume_@NAME@, + @SHORT_ARGS@); + if (!local->stub) { + gf_msg (this->name, GF_LOG_ERROR, 0, 0, "insufficient memory"); + op_errno = ENOMEM; + goto err; + } + + + if (state == GF_CS_LOCAL) { + STACK_WIND (frame, cs_@NAME@_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->@NAME@, + @SHORT_ARGS@); + } else { + local->call_cnt++; + ret = locate_and_execute (frame); + if (ret) { + op_errno = ENOMEM; + goto err; + } + } + + return 0; + +err: + CS_STACK_UNWIND (@NAME@, frame, -1, op_errno, @CBK_ERROR_ARGS@); + + return 0; +} +""" + +FD_DATA_MODIFYING_RESUME_OP_FOP_TEMPLATE = """ +int32_t +cs_resume_@NAME@ (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@) +{ + int ret = 0; + + ret = cs_resume_postprocess (this, frame, fd->inode); + if (ret) { + goto unwind; + } + + cs_inodelk_unlock (frame); + + STACK_WIND (frame, cs_@NAME@_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->@NAME@, + @SHORT_ARGS@); + + return 0; + +unwind: + + cs_inodelk_unlock (frame); + + cs_common_cbk (frame); + + return 0; +} +""" +FD_DATA_MODIFYING_OP_FOP_CBK_TEMPLATE = """ +int32_t +cs_@NAME@_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + @LONG_ARGS@) +{ + cs_local_t *local = NULL; + int ret = 0; + uint64_t val = 0; + fd_t *fd = NULL; + + local = frame->local; + fd = local->fd; + + /* Do we need lock here? */ + local->call_cnt++; + + if (op_ret == -1) { + ret = dict_get_uint64 (xdata, GF_CS_OBJECT_STATUS, &val); + if (ret == 0) { + if (val == GF_CS_ERROR) { + gf_msg (this->name, GF_LOG_ERROR, 0, 0, + "could not get file state, unwinding"); + op_ret = -1; + op_errno = EIO; + goto unwind; + } else { + __cs_inode_ctx_update (this, fd->inode, val); + gf_msg (this->name, GF_LOG_INFO, 0, 0, + " state = %" PRIu64, val); + + if (local->call_cnt == 1 && + (val == GF_CS_REMOTE || + val == GF_CS_DOWNLOADING)) { + gf_msg (this->name, GF_LOG_INFO, 0, + 0, " will repair and download " + "the file, current state : %" + PRIu64, val); + goto repair; + } else { + gf_msg (this->name, GF_LOG_ERROR, 0, 0, + "second @NAME@, Unwinding"); + goto unwind; + } + } + } else { + gf_msg (this->name, GF_LOG_ERROR, 0, 0, "file state " + "could not be figured, unwinding"); + goto unwind; + } + } else { + /* successful @NAME@ => file is local */ + __cs_inode_ctx_update (this, fd->inode, GF_CS_LOCAL); + gf_msg (this->name, GF_LOG_INFO, 0, 0, "state : GF_CS_LOCAL" + ", @NAME@ successful"); + + goto unwind; + } + +repair: + ret = locate_and_execute (frame); + if (ret) { + goto unwind; + } + + return 0; + +unwind: + CS_STACK_UNWIND (@NAME@, frame, op_ret, op_errno, @SHORT_ARGS@); + + return 0; +} +""" + +LOC_STAT_OP_FOP_TEMPLATE = """ +int32_t +cs_@NAME@ (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@) +{ + int op_errno = EINVAL; + cs_local_t *local = NULL; + int ret = 0; + + local = cs_local_init (this, frame, loc, NULL, GF_FOP_@UPNAME@); + if (!local) { + gf_msg (this->name, GF_LOG_ERROR, 0, 0, "local is NULL"); + op_errno = ENOMEM; + goto err; + } + + if (loc->inode->ia_type == IA_IFDIR) + goto wind; + + xdata = xdata ? dict_ref (xdata) : dict_new (); + + if (!xdata) { + gf_msg (this->name, GF_LOG_ERROR, 0, 0, "insufficient memory"); + op_errno = ENOMEM; + goto err; + } + + local->xattr_req = xdata; + + ret = dict_set_uint32 (local->xattr_req, GF_CS_OBJECT_STATUS, 1); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, 0, "dict_set failed key:" + " %s", GF_CS_OBJECT_STATUS); + goto err; + } + +wind: + STACK_WIND (frame, cs_@NAME@_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->@NAME@, + @SHORT_ARGS@); + + return 0; +err: + CS_STACK_UNWIND (@NAME@, frame, -1, op_errno, @CBK_ERROR_ARGS@); + + return 0; +} +""" + +LOC_STAT_OP_FOP_CBK_TEMPLATE = """ +int32_t +cs_@NAME@_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + @LONG_ARGS@) +{ + int ret = 0; + uint64_t val = 0; + loc_t *loc = NULL; + cs_local_t *local = NULL; + + local = frame->local; + + loc = &local->loc; + + if (op_ret == 0) { + ret = dict_get_uint64 (xdata, GF_CS_OBJECT_STATUS, &val); + if (!ret) { + ret = __cs_inode_ctx_update (this, loc->inode, val); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, 0, + "ctx update failed"); + } + } + } else { + cs_inode_ctx_reset (this, loc->inode); + } + + CS_STACK_UNWIND (@NAME@, frame, op_ret, op_errno, @SHORT_ARGS@); + + return 0; +} +""" + +# All xlator FOPs are covered in the following section just to create a clarity +# The lists themselves are not used. +entry_ops = ['mknod', 'mkdir', 'unlink', 'rmdir', 'symlink', 'rename', 'link', + 'create'] +special_ops = ['statfs', 'lookup', 'ipc', 'compound', 'icreate', 'namelink'] +ignored_ops = ['getspec'] +inode_ops = ['stat', 'readlink', 'truncate', 'open', 'setxattr', 'getxattr', + 'removexattr', 'opendir', 'access', 'inodelk', 'entrylk', + 'xattrop', 'setattr', 'lease', 'getactivelk', 'setactivelk', + 'discover'] +fd_ops = ['readv', 'writev', 'flush', 'fsync', 'fsyncdir', 'ftruncate', + 'fstat', 'lk', 'readdir', 'finodelk', 'fentrylk', 'fxattrop', + 'fsetxattr', 'fgetxattr', 'rchecksum', 'fsetattr', 'readdirp', + 'fremovexattr', 'fallocate', 'discard', 'zerofill', 'seek'] + + +# These are the current actual lists used to generate the code + +# The following list contains fops which are fd based that modifies data +fd_data_modify_op_fop_template = ['writev', 'flush', 'fsync', + 'ftruncate', 'rchecksum', 'fallocate', + 'discard', 'zerofill', 'seek'] + +# The following list contains fops which are entry based that does not change +# data +loc_stat_op_fop_template = ['lookup', 'stat', 'discover', 'access', 'setattr', + 'getattr'] + +# These fops need a separate implementation +special_fops = ['statfs', 'setxattr', 'unlink', 'getxattr', + 'truncate', 'fstat', 'readv', 'readdirp'] + +def gen_defaults(): + for name in ops: + if name in fd_data_modify_op_fop_template: + print(generate(FD_DATA_MODIFYING_OP_FOP_CBK_TEMPLATE, name, cbk_subs)) + print(generate(FD_DATA_MODIFYING_RESUME_OP_FOP_TEMPLATE, name, fop_subs)) + print(generate(FD_DATA_MODIFYING_OP_FOP_TEMPLATE, name, fop_subs)) + elif name in loc_stat_op_fop_template: + print(generate(LOC_STAT_OP_FOP_CBK_TEMPLATE, name, cbk_subs)) + print(generate(LOC_STAT_OP_FOP_TEMPLATE, name, fop_subs)) + +for l in open(sys.argv[1], 'r').readlines(): + if l.find('#pragma generate') != -1: + print("/* BEGIN GENERATED CODE - DO NOT MODIFY */") + gen_defaults() + print("/* END GENERATED CODE */") + else: + print(l[:-1]) diff --git a/xlators/features/cloudsync/src/cloudsync-fops-h.py b/xlators/features/cloudsync/src/cloudsync-fops-h.py new file mode 100755 index 00000000000..faa2de651a7 --- /dev/null +++ b/xlators/features/cloudsync/src/cloudsync-fops-h.py @@ -0,0 +1,31 @@ +#!/usr/bin/python3 + +from __future__ import print_function +import os +import sys + +curdir = os.path.dirname(sys.argv[0]) +gendir = os.path.join(curdir, '../../../../libglusterfs/src') +sys.path.append(gendir) +from generator import ops, fop_subs, cbk_subs, generate + +OP_FOP_TEMPLATE = """ +int32_t +cs_@NAME@ (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@); +""" + +def gen_defaults(): + for name, value in ops.items(): + if name == 'getspec': + continue + print(generate(OP_FOP_TEMPLATE, name, fop_subs)) + + +for l in open(sys.argv[1], 'r').readlines(): + if l.find('#pragma generate') != -1: + print("/* BEGIN GENERATED CODE - DO NOT MODIFY */") + gen_defaults() + print("/* END GENERATED CODE */") + else: + print(l[:-1]) diff --git a/xlators/features/cloudsync/src/cloudsync-mem-types.h b/xlators/features/cloudsync/src/cloudsync-mem-types.h new file mode 100644 index 00000000000..220346405d0 --- /dev/null +++ b/xlators/features/cloudsync/src/cloudsync-mem-types.h @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com> + * This file is part of GlusterFS. + * + * This file is licensed to you under your choice of the GNU Lesser + * General Public License, version 3 or any later version (LGPLv3 or + * later), or the GNU General Public License, version 2 (GPLv2), in all + * cases as published by the Free Software Foundation. + */ + +#ifndef __CLOUDSYNC_MEM_TYPES_H__ +#define __CLOUDSYNC_MEM_TYPES_H__ + +#include <glusterfs/mem-types.h> +enum cs_mem_types_ { + gf_cs_mt_cs_private_t = gf_common_mt_end + 1, + gf_cs_mt_cs_remote_stores_t, + gf_cs_mt_cs_inode_ctx_t, + gf_cs_mt_cs_lxattr_t, + gf_cs_mt_end +}; +#endif /* __CLOUDSYNC_MEM_TYPES_H__ */ diff --git a/xlators/features/cloudsync/src/cloudsync-messages.h b/xlators/features/cloudsync/src/cloudsync-messages.h new file mode 100644 index 00000000000..fb08f72de7f --- /dev/null +++ b/xlators/features/cloudsync/src/cloudsync-messages.h @@ -0,0 +1,16 @@ +/* + * Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com> + * This file is part of GlusterFS. + * + * This file is licensed to you under your choice of the GNU Lesser + * General Public License, version 3 or any later version (LGPLv3 or + * later), or the GNU General Public License, version 2 (GPLv2), in all + * cases as published by the Free Software Foundation. + */ + +#ifndef __CLOUDSYNC_MESSAGES_H__ +#define __CLOUDSYNC_MESSAGES_H__ + +/*TODO: define relevant message ids */ + +#endif /* __CLOUDSYNC_MESSAGES_H__ */ diff --git a/xlators/features/ganesha/Makefile.am b/xlators/features/cloudsync/src/cloudsync-plugins/Makefile.am index a985f42a877..a985f42a877 100644 --- a/xlators/features/ganesha/Makefile.am +++ b/xlators/features/cloudsync/src/cloudsync-plugins/Makefile.am diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/Makefile.am b/xlators/features/cloudsync/src/cloudsync-plugins/src/Makefile.am new file mode 100644 index 00000000000..fb6b0580c6d --- /dev/null +++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/Makefile.am @@ -0,0 +1,11 @@ +if BUILD_AMAZONS3_PLUGIN + AMAZONS3_DIR = cloudsyncs3 +endif + +if BUILD_CVLT_PLUGIN + CVLT_DIR = cvlt +endif + +SUBDIRS = ${AMAZONS3_DIR} ${CVLT_DIR} + +CLEANFILES = diff --git a/xlators/features/filter/Makefile.am b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/Makefile.am index d471a3f9243..a985f42a877 100644 --- a/xlators/features/filter/Makefile.am +++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/Makefile.am @@ -1,3 +1,3 @@ SUBDIRS = src -CLEANFILES = +CLEANFILES = diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/Makefile.am b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/Makefile.am new file mode 100644 index 00000000000..6509426ef87 --- /dev/null +++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/Makefile.am @@ -0,0 +1,12 @@ +csp_LTLIBRARIES = cloudsyncs3.la +cspdir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/cloudsync-plugins + +cloudsyncs3_la_SOURCES = libcloudsyncs3.c $(top_srcdir)/xlators/features/cloudsync/src/cloudsync-common.c +cloudsyncs3_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la +cloudsyncs3_la_LDFLAGS = -module -export-symbols $(top_srcdir)/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.sym $(GF_XLATOR_LDFLAGS) +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src -lcurlpp -lcryptopp +noinst_HEADERS = libcloudsyncs3.h libcloudsyncs3-mem-types.h +AM_CFLAGS = -Wall -fno-strict-aliasing $(GF_CFLAGS) -lcurl -lcrypto -I$(top_srcdir)/xlators/features/cloudsync/src +CLEANFILES = + +EXTRA_DIST = libcloudsyncs3.sym diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3-mem-types.h b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3-mem-types.h new file mode 100644 index 00000000000..7ccfcc9f4b6 --- /dev/null +++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3-mem-types.h @@ -0,0 +1,19 @@ +/* + * Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com> + * This file is part of GlusterFS. + * + * This file is licensed to you under your choice of the GNU Lesser + * General Public License, version 3 or any later version (LGPLv3 or + * later), or the GNU General Public License, version 2 (GPLv2), in all + * cases as published by the Free Software Foundation. + */ + +#ifndef __LIBAWS_MEM_TYPES_H__ +#define __LIBAWS_MEM_TYPES_H__ + +#include <glusterfs/mem-types.h> +enum libaws_mem_types_ { + gf_libaws_mt_aws_private_t = gf_common_mt_end + 1, + gf_libaws_mt_end +}; +#endif /* __CLOUDSYNC_MEM_TYPES_H__ */ diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c new file mode 100644 index 00000000000..23c3599825a --- /dev/null +++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c @@ -0,0 +1,584 @@ +/* + Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include <stdlib.h> +#include <openssl/hmac.h> +#include <openssl/evp.h> +#include <openssl/bio.h> +#include <openssl/buffer.h> +#include <openssl/crypto.h> +#include <curl/curl.h> +#include <glusterfs/xlator.h> +#include <glusterfs/glusterfs.h> +#include "libcloudsyncs3.h" +#include "cloudsync-common.h" + +#define RESOURCE_SIZE 4096 + +store_methods_t store_ops = { + .fop_download = aws_download_s3, + .fop_init = aws_init, + .fop_reconfigure = aws_reconfigure, + .fop_fini = aws_fini, +}; + +typedef struct aws_private { + char *hostname; + char *bucketid; + char *awssekey; + char *awskeyid; + gf_boolean_t abortdl; + pthread_spinlock_t lock; +} aws_private_t; + +void * +aws_init(xlator_t *this) +{ + aws_private_t *priv = NULL; + char *temp_str = NULL; + int ret = 0; + + priv = GF_CALLOC(1, sizeof(aws_private_t), gf_libaws_mt_aws_private_t); + if (!priv) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, "insufficient memory"); + return NULL; + } + + priv->abortdl = _gf_false; + + pthread_spin_init(&priv->lock, PTHREAD_PROCESS_PRIVATE); + + pthread_spin_lock(&(priv->lock)); + { + if (dict_get_str(this->options, "s3plugin-seckey", &temp_str) == 0) { + priv->awssekey = gf_strdup(temp_str); + if (!priv->awssekey) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0, + "initializing aws secret key failed"); + ret = -1; + goto unlock; + } + } + + if (dict_get_str(this->options, "s3plugin-keyid", &temp_str) == 0) { + priv->awskeyid = gf_strdup(temp_str); + if (!priv->awskeyid) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0, + "initializing aws key ID failed"); + ret = -1; + goto unlock; + } + } + + if (dict_get_str(this->options, "s3plugin-bucketid", &temp_str) == 0) { + priv->bucketid = gf_strdup(temp_str); + if (!priv->bucketid) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0, + "initializing aws bucketid failed"); + + ret = -1; + goto unlock; + } + } + + if (dict_get_str(this->options, "s3plugin-hostname", &temp_str) == 0) { + priv->hostname = gf_strdup(temp_str); + if (!priv->hostname) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0, + "initializing aws hostname failed"); + + ret = -1; + goto unlock; + } + } + + gf_msg_debug(this->name, 0, + "stored key: %s id: %s " + "bucketid %s hostname: %s", + priv->awssekey, priv->awskeyid, priv->bucketid, + priv->hostname); + } +unlock: + pthread_spin_unlock(&(priv->lock)); + + if (ret == -1) { + GF_FREE(priv->awskeyid); + GF_FREE(priv->awssekey); + GF_FREE(priv->bucketid); + GF_FREE(priv->hostname); + GF_FREE(priv); + priv = NULL; + } + + return (void *)priv; +} + +int +aws_reconfigure(xlator_t *this, dict_t *options) +{ + aws_private_t *priv = NULL; + char *temp_str = NULL; + int ret = 0; + cs_private_t *cspriv = NULL; + + cspriv = this->private; + + priv = cspriv->stores->config; + + if (!priv) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, "null priv"); + return -1; + } + + pthread_spin_lock(&(priv->lock)); + { + if (dict_get_str(options, "s3plugin-seckey", &temp_str) == 0) { + priv->awssekey = gf_strdup(temp_str); + if (!priv->awssekey) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0, + "initializing aws secret key failed"); + ret = -1; + goto out; + } + } + + if (dict_get_str(options, "s3plugin-keyid", &temp_str) == 0) { + priv->awskeyid = gf_strdup(temp_str); + if (!priv->awskeyid) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0, + "initializing aws key ID failed"); + ret = -1; + goto out; + } + } + + if (dict_get_str(options, "s3plugin-bucketid", &temp_str) == 0) { + priv->bucketid = gf_strdup(temp_str); + if (!priv->bucketid) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0, + "initializing aws bucketid failed"); + ret = -1; + goto out; + } + } + + if (dict_get_str(options, "s3plugin-hostname", &temp_str) == 0) { + priv->hostname = gf_strdup(temp_str); + if (!priv->hostname) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0, + "initializing aws hostname failed"); + ret = -1; + goto out; + } + } + } +out: + pthread_spin_unlock(&(priv->lock)); + + gf_msg_debug(this->name, 0, + "stored key: %s id: %s " + "bucketid %s hostname: %s", + priv->awssekey, priv->awskeyid, priv->bucketid, + priv->hostname); + + return ret; +} + +void +aws_fini(void *config) +{ + aws_private_t *priv = NULL; + + priv = (aws_private_t *)priv; + + if (priv) { + GF_FREE(priv->hostname); + GF_FREE(priv->bucketid); + GF_FREE(priv->awssekey); + GF_FREE(priv->awskeyid); + + pthread_spin_destroy(&priv->lock); + GF_FREE(priv); + } +} + +int32_t +mem_acct_init(xlator_t *this) +{ + int ret = -1; + + GF_VALIDATE_OR_GOTO("dht", this, out); + + ret = xlator_mem_acct_init(this, gf_libaws_mt_end + 1); + + if (ret != 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, "Memory accounting init failed"); + return ret; + } +out: + return ret; +} +char * +aws_form_request(char *resource, char **date, char *reqtype, char *bucketid, + char *filepath) +{ + char httpdate[256]; + time_t ctime; + struct tm *gtime = NULL; + char *sign_req = NULL; + int signreq_len = -1; + int date_len = -1; + int res_len = -1; + + ctime = gf_time(); + gtime = gmtime(&ctime); + + date_len = strftime(httpdate, sizeof(httpdate), + "%a, %d %b %Y %H:%M:%S +0000", gtime); + + *date = gf_strndup(httpdate, date_len); + if (*date == NULL) { + gf_msg("CS", GF_LOG_ERROR, ENOMEM, 0, + "memory allocation " + "failure for date"); + goto out; + } + + res_len = snprintf(resource, RESOURCE_SIZE, "%s/%s", bucketid, filepath); + + gf_msg_debug("CS", 0, "resource %s", resource); + + /* 6 accounts for the 4 new line chars, one forward slash and + * one null char */ + signreq_len = res_len + date_len + strlen(reqtype) + 6; + + sign_req = GF_MALLOC(signreq_len, gf_common_mt_char); + if (sign_req == NULL) { + gf_msg("CS", GF_LOG_ERROR, ENOMEM, 0, + "memory allocation " + "failure for sign_req"); + goto out; + } + + snprintf(sign_req, signreq_len, "%s\n\n%s\n%s\n/%s", reqtype, "", *date, + resource); + +out: + return sign_req; +} + +char * +aws_b64_encode(const unsigned char *input, int length) +{ + BIO *bio, *b64; + BUF_MEM *bptr; + char *buff = NULL; + + b64 = BIO_new(BIO_f_base64()); + bio = BIO_new(BIO_s_mem()); + b64 = BIO_push(b64, bio); + BIO_write(b64, input, length); + BIO_flush(b64); + BIO_get_mem_ptr(b64, &bptr); + + buff = GF_MALLOC(bptr->length, gf_common_mt_char); + memcpy(buff, bptr->data, bptr->length - 1); + buff[bptr->length - 1] = 0; + + BIO_free_all(b64); + + return buff; +} + +char * +aws_sign_request(char *const str, char *awssekey) +{ +#if (OPENSSL_VERSION_NUMBER < 0x1010002f) + HMAC_CTX ctx; +#endif + HMAC_CTX *pctx = NULL; + ; + + unsigned char md[256]; + unsigned len; + char *base64 = NULL; + +#if (OPENSSL_VERSION_NUMBER < 0x1010002f) + HMAC_CTX_init(&ctx); + pctx = &ctx; +#else + pctx = HMAC_CTX_new(); +#endif + HMAC_Init_ex(pctx, awssekey, strlen(awssekey), EVP_sha1(), NULL); + HMAC_Update(pctx, (unsigned char *)str, strlen(str)); + HMAC_Final(pctx, (unsigned char *)md, &len); + +#if (OPENSSL_VERSION_NUMBER < 0x1010002f) + HMAC_CTX_cleanup(pctx); +#else + HMAC_CTX_free(pctx); +#endif + base64 = aws_b64_encode(md, len); + + return base64; +} + +int +aws_dlwritev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, struct iatt *prebuf, struct iatt *postbuf, + dict_t *xdata) +{ + aws_private_t *priv = NULL; + + if (op_ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, 0, op_errno, + "write failed " + ". Aborting Download"); + + priv = this->private; + pthread_spin_lock(&(priv->lock)); + { + priv->abortdl = _gf_true; + } + pthread_spin_unlock(&(priv->lock)); + } + + CS_STACK_DESTROY(frame); + + return op_ret; +} + +size_t +aws_write_callback(void *dlbuf, size_t size, size_t nitems, void *mainframe) +{ + call_frame_t *frame = NULL; + fd_t *dlfd = NULL; + int ret = 0; + cs_local_t *local = NULL; + struct iovec iov = { + 0, + }; + struct iobref *iobref = NULL; + struct iobuf *iobuf = NULL; + struct iovec dliov = { + 0, + }; + size_t tsize = 0; + xlator_t *this = NULL; + cs_private_t *xl_priv = NULL; + aws_private_t *priv = NULL; + call_frame_t *dlframe = NULL; + + frame = (call_frame_t *)mainframe; + this = frame->this; + xl_priv = this->private; + priv = xl_priv->stores->config; + + pthread_spin_lock(&(priv->lock)); + { + /* returning size other than the size passed from curl will + * abort further download*/ + if (priv->abortdl) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, "aborting download"); + pthread_spin_unlock(&(priv->lock)); + return 0; + } + } + pthread_spin_unlock(&(priv->lock)); + + local = frame->local; + dlfd = local->dlfd; + tsize = size * nitems; + + dliov.iov_base = (void *)dlbuf; + dliov.iov_len = tsize; + + ret = iobuf_copy(this->ctx->iobuf_pool, &dliov, 1, &iobref, &iobuf, &iov); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, "iobuf_copy failed"); + goto out; + } + + /* copy frame */ + dlframe = copy_frame(frame); + if (!dlframe) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, "copy_frame failed"); + tsize = 0; + goto out; + } + + STACK_WIND(dlframe, aws_dlwritev_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->writev, dlfd, &iov, 1, local->dloffset, + 0, iobref, NULL); + + local->dloffset += tsize; + +out: + if (iobuf) + iobuf_unref(iobuf); + if (iobref) + iobref_unref(iobref); + + return tsize; +} + +int +aws_download_s3(call_frame_t *frame, void *config) +{ + char *buf; + int bufsize = -1; + CURL *handle = NULL; + struct curl_slist *slist = NULL; + struct curl_slist *tmp = NULL; + xlator_t *this = NULL; + int ret = 0; + int debug = 1; + CURLcode res; + char errbuf[CURL_ERROR_SIZE]; + size_t len = 0; + long responsecode; + char *sign_req = NULL; + char *date = NULL; + char *const reqtype = "GET"; + char *signature = NULL; + cs_local_t *local = NULL; + char resource[RESOURCE_SIZE] = { + 0, + }; + aws_private_t *priv = NULL; + + this = frame->this; + + local = frame->local; + + priv = (aws_private_t *)config; + + if (!priv->bucketid || !priv->hostname || !priv->awssekey || + !priv->awskeyid) { + ret = -1; + goto out; + } + + sign_req = aws_form_request(resource, &date, reqtype, priv->bucketid, + local->remotepath); + if (!sign_req) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "null sign_req, " + "aborting download"); + ret = -1; + goto out; + } + + gf_msg_debug("CS", 0, "sign_req %s date %s", sign_req, date); + + signature = aws_sign_request(sign_req, priv->awssekey); + if (!signature) { + gf_msg("CS", GF_LOG_ERROR, 0, 0, + "null signature, " + "aborting download"); + ret = -1; + goto out; + } + + handle = curl_easy_init(); + this = frame->this; + + /* special numbers 6, 20, 10 accounts for static characters in the + * below snprintf string format arguments*/ + bufsize = strlen(date) + 6 + strlen(priv->awskeyid) + strlen(signature) + + 20 + strlen(priv->hostname) + 10; + + buf = (char *)alloca(bufsize); + if (!buf) { + gf_msg("CS", GF_LOG_ERROR, ENOMEM, 0, + "mem allocation " + "failed for buf"); + ret = -1; + goto out; + } + + snprintf(buf, bufsize, "Date: %s", date); + slist = curl_slist_append(slist, buf); + snprintf(buf, bufsize, "Authorization: AWS %s:%s", priv->awskeyid, + signature); + slist = curl_slist_append(slist, buf); + snprintf(buf, bufsize, "https://%s/%s", priv->hostname, resource); + + if (gf_log_get_loglevel() >= GF_LOG_DEBUG) { + tmp = slist; + while (tmp) { + gf_msg_debug(this->name, 0, "slist for curl - %s", tmp->data); + tmp = tmp->next; + } + } + + curl_easy_setopt(handle, CURLOPT_HTTPHEADER, slist); + curl_easy_setopt(handle, CURLOPT_URL, buf); + curl_easy_setopt(handle, CURLOPT_WRITEFUNCTION, aws_write_callback); + curl_easy_setopt(handle, CURLOPT_WRITEDATA, frame); + curl_easy_setopt(handle, CURLOPT_VERBOSE, debug); + curl_easy_setopt(handle, CURLOPT_ERRORBUFFER, errbuf); + + res = curl_easy_perform(handle); + if (res != CURLE_OK) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, "download failed. err: %s\n", + curl_easy_strerror(res)); + ret = -1; + len = strlen(errbuf); + if (len) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, "curl failure %s", errbuf); + } else { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "curl error " + "%s\n", + curl_easy_strerror(res)); + } + } + + if (res == CURLE_OK) { + curl_easy_getinfo(handle, CURLINFO_RESPONSE_CODE, &responsecode); + gf_msg_debug(this->name, 0, "response code %ld", responsecode); + if (responsecode != 200) { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, 0, "curl download failed"); + } + } + + curl_slist_free_all(slist); + curl_easy_cleanup(handle); + +out: + if (sign_req) + GF_FREE(sign_req); + if (date) + GF_FREE(date); + if (signature) + GF_FREE(signature); + + return ret; +} + +struct volume_options cs_options[] = { + {.key = {"s3plugin-seckey"}, + .type = GF_OPTION_TYPE_STR, + .description = "aws secret key"}, + {.key = {"s3plugin-keyid"}, + .type = GF_OPTION_TYPE_STR, + .description = "aws key ID" + + }, + {.key = {"s3plugin-bucketid"}, + .type = GF_OPTION_TYPE_STR, + .description = "aws bucketid"}, + {.key = {"s3plugin-hostname"}, + .type = GF_OPTION_TYPE_STR, + .description = "aws hostname e.g. s3.amazonaws.com"}, + {.key = {NULL}}, +}; diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.h b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.h new file mode 100644 index 00000000000..85ae669486b --- /dev/null +++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.h @@ -0,0 +1,50 @@ +/* + Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _LIBAWS_H +#define _LIBAWS_H + +#include <glusterfs/glusterfs.h> +#include <glusterfs/call-stub.h> +#include <glusterfs/xlator.h> +#include <glusterfs/syncop.h> +#include <curl/curl.h> +#include "cloudsync-common.h" +#include "libcloudsyncs3-mem-types.h" + +char * +aws_b64_encode(const unsigned char *input, int length); + +size_t +aws_write_callback(void *dlbuf, size_t size, size_t nitems, void *mainframe); + +int +aws_download_s3(call_frame_t *frame, void *config); + +int +aws_dlwritev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, struct iatt *prebuf, struct iatt *postbuf, + dict_t *xdata); + +void * +aws_init(xlator_t *this); + +int +aws_reconfigure(xlator_t *this, dict_t *options); + +char * +aws_form_request(char *resource, char **date, char *reqtype, char *bucketid, + char *filepath); +char * +aws_sign_request(char *const str, char *awssekey); + +void +aws_fini(void *config); + +#endif diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.sym b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.sym new file mode 100644 index 00000000000..0bc273670d5 --- /dev/null +++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.sym @@ -0,0 +1 @@ +store_ops diff --git a/xlators/features/protect/Makefile.am b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/Makefile.am index d471a3f9243..a985f42a877 100644 --- a/xlators/features/protect/Makefile.am +++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/Makefile.am @@ -1,3 +1,3 @@ SUBDIRS = src -CLEANFILES = +CLEANFILES = diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/Makefile.am b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/Makefile.am new file mode 100644 index 00000000000..b512464f157 --- /dev/null +++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/Makefile.am @@ -0,0 +1,12 @@ +csp_LTLIBRARIES = cloudsynccvlt.la +cspdir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/cloudsync-plugins + +cloudsynccvlt_la_SOURCES = libcvlt.c $(top_srcdir)/xlators/features/cloudsync/src/cloudsync-common.c +cloudsynccvlt_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la +cloudsynccvlt_la_LDFLAGS = -module -avoid-version -export-symbols $(top_srcdir)/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcloudsynccvlt.sym +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src +noinst_HEADERS = archivestore.h libcvlt.h libcvlt-mem-types.h cvlt-messages.h +AM_CFLAGS = -Wall -fno-strict-aliasing $(GF_CFLAGS) -I$(top_srcdir)/xlators/features/cloudsync/src +CLEANFILES = + +EXTRA_DIST = libcloudsynccvlt.sym diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/archivestore.h b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/archivestore.h new file mode 100644 index 00000000000..7230ef77337 --- /dev/null +++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/archivestore.h @@ -0,0 +1,203 @@ +/* + Copyright (c) 2018 Commvault Systems, Inc. <http://www.commvault.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __ARCHIVESTORE_H__ +#define __ARCHIVESTORE_H__ + +#include <stdlib.h> +#include <stddef.h> +#include <stdint.h> +#include <dlfcn.h> +#include <uuid/uuid.h> + +#define CS_XATTR_ARCHIVE_UUID "trusted.cloudsync.uuid" +#define CS_XATTR_PRODUCT_ID "trusted.cloudsync.product-id" +#define CS_XATTR_STORE_ID "trusted.cloudsync.store-id" + +struct _archstore_methods; +typedef struct _archstore_methods archstore_methods_t; + +struct _archstore_desc { + void *priv; /* Private field for store mgmt. */ + /* To be used only by archive store*/ +}; +typedef struct _archstore_desc archstore_desc_t; + +struct _archstore_info { + char *id; /* Identifier for the archivestore */ + uint32_t idlen; /* Length of identifier string */ + char *prod; /* Name of the data mgmt. product */ + uint32_t prodlen; /* Length of the product string */ +}; +typedef struct _archstore_info archstore_info_t; + +struct _archstore_fileinfo { + uuid_t uuid; /* uuid of the file */ + char *path; /* file path */ + uint32_t pathlength; /* length of file path */ +}; +typedef struct _archstore_fileinfo archstore_fileinfo_t; + +struct _app_callback_info { + archstore_info_t *src_archstore; + archstore_fileinfo_t *src_archfile; + archstore_info_t *dest_archstore; + archstore_fileinfo_t *dest_archfile; +}; +typedef struct _app_callback_info app_callback_info_t; + +typedef void (*app_callback_t)(archstore_desc_t *, app_callback_info_t *, + void *, int64_t, int32_t); + +enum _archstore_scan_type { FULL = 1, INCREMENTAL = 2 }; +typedef enum _archstore_scan_type archstore_scan_type_t; + +typedef int32_t archstore_errno_t; + +/* + * Initialize archive store. + * arg1 pointer to structure containing archive store information + * arg2 error number if any generated during the initialization + * arg3 name of the log file + */ +typedef int32_t (*init_archstore_t)(archstore_desc_t *, archstore_errno_t *, + const char *); + +/* + * Clean up archive store. + * arg1 pointer to structure containing archive store information + * arg2 error number if any generated during the cleanup + */ +typedef int32_t (*term_archstore_t)(archstore_desc_t *, archstore_errno_t *); + +/* + * Read the contents of the file from archive store + * arg1 pointer to structure containing archive store description + * arg2 pointer to structure containing archive store information + * arg3 pointer to structure containing information about file to be read + * arg4 offset in the file from which data should be read + * arg5 buffer where the data should be read + * arg6 number of bytes of data to be read + * arg7 error number if any generated during the read from file + * arg8 callback handler to be invoked after the data is read + * arg9 cookie to be passed when callback is invoked + */ +typedef int32_t (*read_archstore_t)(archstore_desc_t *, archstore_info_t *, + archstore_fileinfo_t *, off_t, char *, + size_t, archstore_errno_t *, app_callback_t, + void *); + +/* + * Restore the contents of the file from archive store + * This is basically in-place restore + * arg1 pointer to structure containing archive store description + * arg2 pointer to structure containing archive store information + * arg3 pointer to structure containing information about file to be restored + * arg4 error number if any generated during the file restore + * arg5 callback to be invoked after the file is restored + * arg6 cookie to be passed when callback is invoked + */ +typedef int32_t (*recall_archstore_t)(archstore_desc_t *, archstore_info_t *, + archstore_fileinfo_t *, + archstore_errno_t *, app_callback_t, + void *); + +/* + * Restore the contents of the file from archive store to a different store + * This is basically out-of-place restore + * arg1 pointer to structure containing archive store description + * arg2 pointer to structure containing source archive store information + * arg3 pointer to structure containing information about file to be restored + * arg4 pointer to structure containing destination archive store information + * arg5 pointer to structure containing information about the location to + which the file will be restored + * arg6 error number if any generated during the file restore + * arg7 callback to be invoked after the file is restored + * arg8 cookie to be passed when callback is invoked + */ +typedef int32_t (*restore_archstore_t)(archstore_desc_t *, archstore_info_t *, + archstore_fileinfo_t *, + archstore_info_t *, + archstore_fileinfo_t *, + archstore_errno_t *, app_callback_t, + void *); + +/* + * Archive the contents of the file to archive store + * arg1 pointer to structure containing archive store description + * arg2 pointer to structure containing source archive store information + * arg3 pointer to structure containing information about files to be archived + * arg4 pointer to structure containing destination archive store information + * arg5 pointer to structure containing information about files that failed + * to be archived + * arg6 error number if any generated during the file archival + * arg7 callback to be invoked after the file is archived + * arg8 cookie to be passed when callback is invoked + */ +typedef int32_t (*archive_archstore_t)(archstore_desc_t *, archstore_info_t *, + archstore_fileinfo_t *, + archstore_info_t *, + archstore_fileinfo_t *, + archstore_errno_t *, app_callback_t, + void *); + +/* + * Backup list of files provided in the input file + * arg1 pointer to structure containing archive store description + * arg2 pointer to structure containing source archive store information + * arg3 pointer to structure containing information about files to be backed up + * arg4 pointer to structure containing destination archive store information + * arg5 pointer to structure containing information about files that failed + * to be backed up + * arg6 error number if any generated during the file archival + * arg7 callback to be invoked after the file is archived + * arg8 cookie to be passed when callback is invoked + */ +typedef int32_t (*backup_archstore_t)(archstore_desc_t *, archstore_info_t *, + archstore_fileinfo_t *, + archstore_info_t *, + archstore_fileinfo_t *, + archstore_errno_t *, app_callback_t, + void *); + +/* + * Scan the contents of a store and determine the files which need to be + * backed up. + * arg1 pointer to structure containing archive store description + * arg2 pointer to structure containing archive store information + * arg3 type of scan whether full or incremental + * arg4 path to file that contains list of files to be backed up + * arg5 error number if any generated during scan operation + */ +typedef int32_t (*scan_archstore_t)(archstore_desc_t *, archstore_info_t *, + archstore_scan_type_t, char *, + archstore_errno_t *); + +struct _archstore_methods { + init_archstore_t init; + term_archstore_t fini; + backup_archstore_t backup; + archive_archstore_t archive; + scan_archstore_t scan; + restore_archstore_t restore; + recall_archstore_t recall; + read_archstore_t read; +}; + +typedef int (*get_archstore_methods_t)(archstore_methods_t *); + +/* + * Single function that will be invoked by applications for extracting + * the function pointers to all data management functions. + */ +int32_t +get_archstore_methods(archstore_methods_t *); + +#endif /* End of __ARCHIVESTORE_H__ */ diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/cvlt-messages.h b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/cvlt-messages.h new file mode 100644 index 00000000000..57c9aa77da0 --- /dev/null +++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/cvlt-messages.h @@ -0,0 +1,30 @@ +/* + Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. + */ + +#ifndef _CVLT_MESSAGES_H_ +#define _CVLT_MESSAGES_H_ + +#include <glusterfs/glfs-message-id.h> + +/* To add new message IDs, append new identifiers at the end of the list. + * + * Never remove a message ID. If it's not used anymore, you can rename it or + * leave it as it is, but not delete it. This is to prevent reutilization of + * IDs by other messages. + * + * The component name must match one of the entries defined in + * glfs-message-id.h. + */ + +GLFS_MSGID(CVLT, CVLT_EXTRACTION_FAILED, CVLT_FREE, + CVLT_RESOURCE_ALLOCATION_FAILED, CVLT_RESTORE_FAILED, + CVLT_READ_FAILED, CVLT_NO_MEMORY, CVLT_DLOPEN_FAILED); + +#endif /* !_CVLT_MESSAGES_H_ */ diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcloudsynccvlt.sym b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcloudsynccvlt.sym new file mode 100644 index 00000000000..0bc273670d5 --- /dev/null +++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcloudsynccvlt.sym @@ -0,0 +1 @@ +store_ops diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt-mem-types.h b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt-mem-types.h new file mode 100644 index 00000000000..c24fab8bfe7 --- /dev/null +++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt-mem-types.h @@ -0,0 +1,19 @@ +/* + * Copyright (c) 2018 Commvault Systems, Inc. <http://www.commvault.com> + * This file is part of GlusterFS. + * + * This file is licensed to you under your choice of the GNU Lesser + * General Public License, version 3 or any later version (LGPLv3 or + * later), or the GNU General Public License, version 2 (GPLv2), in all + * cases as published by the Free Software Foundation. + */ + +#ifndef __LIBCVLT_MEM_TYPES_H__ +#define __LIBCVLT_MEM_TYPES_H__ + +#include <glusterfs/mem-types.h> +enum libcvlt_mem_types_ { + gf_libcvlt_mt_cvlt_private_t = gf_common_mt_end + 1, + gf_libcvlt_mt_end +}; +#endif /* __LIBCVLT_MEM_TYPES_H__ */ diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.c b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.c new file mode 100644 index 00000000000..5b7272bb448 --- /dev/null +++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.c @@ -0,0 +1,842 @@ +#include <stdlib.h> +#include <glusterfs/xlator.h> +#include <glusterfs/glusterfs.h> +#include "libcvlt.h" +#include "cloudsync-common.h" +#include "cvlt-messages.h" + +#define LIBARCHIVE_SO "libopenarchive.so" +#define ALIGN_SIZE 4096 +#define CVLT_TRAILER "cvltv1" + +store_methods_t store_ops = { + .fop_download = cvlt_download, + .fop_init = cvlt_init, + .fop_reconfigure = cvlt_reconfigure, + .fop_fini = cvlt_fini, + .fop_remote_read = cvlt_read, +}; + +static const int32_t num_req = 32; +static const int32_t num_iatt = 32; +static char *plugin = "cvlt_cloudSync"; + +int32_t +mem_acct_init(xlator_t *this) +{ + int ret = -1; + + if (!this) + return ret; + + ret = xlator_mem_acct_init(this, gf_libcvlt_mt_end + 1); + + if (ret != 0) { + return ret; + } + + return ret; +} + +static void +cvlt_free_resources(archive_t *arch) +{ + /* + * We will release all the resources that were allocated by the xlator. + * Check whether there are any buffers which have not been released + * back to a mempool. + */ + + if (arch->handle) { + dlclose(arch->handle); + } + + if (arch->iobuf_pool) { + iobuf_pool_destroy(arch->iobuf_pool); + } + + if (arch->req_pool) { + mem_pool_destroy(arch->req_pool); + arch->req_pool = NULL; + } + + return; +} + +static int32_t +cvlt_extract_store_fops(xlator_t *this, archive_t *arch) +{ + int32_t op_ret = -1; + get_archstore_methods_t get_archstore_methods; + + /* + * libopenarchive.so defines methods for performing data management + * operations. We will extract the methods from library and these + * methods will be invoked for moving data between glusterfs volume + * and the data management product. + */ + + VALIDATE_OR_GOTO(arch, err); + + arch->handle = dlopen(LIBARCHIVE_SO, RTLD_NOW); + if (!arch->handle) { + gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_DLOPEN_FAILED, + " failed to open %s ", LIBARCHIVE_SO); + return op_ret; + } + + dlerror(); /* Clear any existing error */ + + get_archstore_methods = dlsym(arch->handle, "get_archstore_methods"); + if (!get_archstore_methods) { + gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED, + " Error extracting get_archstore_methods()"); + dlclose(arch->handle); + arch->handle = NULL; + return op_ret; + } + + op_ret = get_archstore_methods(&(arch->fops)); + if (op_ret) { + gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED, + " Failed to extract methods in get_archstore_methods"); + dlclose(arch->handle); + arch->handle = NULL; + return op_ret; + } + +err: + return op_ret; +} + +static int32_t +cvlt_alloc_resources(xlator_t *this, archive_t *arch, int num_req, int num_iatt) +{ + /* + * Initialize information about all the memory pools that will be + * used by this xlator. + */ + arch->nreqs = 0; + + arch->req_pool = NULL; + + arch->handle = NULL; + arch->xl = this; + + arch->req_pool = mem_pool_new(cvlt_request_t, num_req); + if (!arch->req_pool) { + goto err; + } + + arch->iobuf_pool = iobuf_pool_new(); + if (!arch->iobuf_pool) { + goto err; + } + + if (cvlt_extract_store_fops(this, arch)) { + goto err; + } + + return 0; + +err: + + return -1; +} + +static void +cvlt_req_init(cvlt_request_t *req) +{ + sem_init(&(req->sem), 0, 0); + + return; +} + +static void +cvlt_req_destroy(cvlt_request_t *req) +{ + if (req->iobuf) { + iobuf_unref(req->iobuf); + } + + if (req->iobref) { + iobref_unref(req->iobref); + } + + sem_destroy(&(req->sem)); + + return; +} + +static cvlt_request_t * +cvlt_alloc_req(archive_t *arch) +{ + cvlt_request_t *reqptr = NULL; + + if (!arch) { + goto err; + } + + if (arch->req_pool) { + reqptr = mem_get0(arch->req_pool); + if (reqptr) { + cvlt_req_init(reqptr); + } + } + + if (reqptr) { + LOCK(&(arch->lock)); + arch->nreqs++; + UNLOCK(&(arch->lock)); + } + +err: + return reqptr; +} + +static int32_t +cvlt_free_req(archive_t *arch, cvlt_request_t *reqptr) +{ + if (!reqptr) { + goto err; + } + + if (!arch) { + goto err; + } + + if (arch->req_pool) { + /* + * Free the request resources if they exist. + */ + + cvlt_req_destroy(reqptr); + mem_put(reqptr); + + LOCK(&(arch->lock)); + arch->nreqs--; + UNLOCK(&(arch->lock)); + } + + return 0; + +err: + return -1; +} + +static int32_t +cvlt_init_xlator(xlator_t *this, archive_t *arch, int num_req, int num_iatt) +{ + int32_t ret = -1; + int32_t errnum = -1; + int32_t locked = 0; + + /* + * Perform all the initializations needed for brining up the xlator. + */ + if (!arch) { + goto err; + } + + LOCK_INIT(&(arch->lock)); + LOCK(&(arch->lock)); + + locked = 1; + + ret = cvlt_alloc_resources(this, arch, num_req, num_iatt); + + if (ret) { + goto err; + } + + /* + * Now that the fops have been extracted initialize the store + */ + ret = arch->fops.init(&(arch->descinfo), &errnum, plugin); + if (ret) { + goto err; + } + + UNLOCK(&(arch->lock)); + locked = 0; + ret = 0; + + return ret; + +err: + if (arch) { + cvlt_free_resources(arch); + + if (locked) { + UNLOCK(&(arch->lock)); + } + } + + return ret; +} + +static int32_t +cvlt_term_xlator(archive_t *arch) +{ + int32_t errnum = -1; + + if (!arch) { + goto err; + } + + LOCK(&(arch->lock)); + + /* + * Release the resources that have been allocated inside store + */ + arch->fops.fini(&(arch->descinfo), &errnum); + + cvlt_free_resources(arch); + + UNLOCK(&(arch->lock)); + + GF_FREE(arch); + + return 0; + +err: + return -1; +} + +static int32_t +cvlt_init_store_info(archive_t *priv, archstore_info_t *store_info) +{ + if (!store_info) { + return -1; + } + + store_info->prod = priv->product_id; + store_info->prodlen = strlen(priv->product_id); + + store_info->id = priv->store_id; + store_info->idlen = strlen(priv->store_id); + + return 0; +} + +static int32_t +cvlt_init_file_info(cs_loc_xattr_t *xattr, archstore_fileinfo_t *file_info) +{ + if (!xattr || !file_info) { + return -1; + } + + gf_uuid_copy(file_info->uuid, xattr->uuid); + file_info->path = xattr->file_path; + file_info->pathlength = strlen(xattr->file_path); + + return 0; +} + +static int32_t +cvlt_init_gluster_store_info(cs_loc_xattr_t *xattr, + archstore_info_t *store_info) +{ + static char *product = "glusterfs"; + + if (!xattr || !store_info) { + return -1; + } + + store_info->prod = product; + store_info->prodlen = strlen(product); + + store_info->id = xattr->volname; + store_info->idlen = strlen(xattr->volname); + + return 0; +} + +static int32_t +cvlt_init_gluster_file_info(cs_loc_xattr_t *xattr, + archstore_fileinfo_t *file_info) +{ + if (!xattr || !file_info) { + return -1; + } + + gf_uuid_copy(file_info->uuid, xattr->gfid); + file_info->path = xattr->file_path; + file_info->pathlength = strlen(xattr->file_path); + + return 0; +} + +static void +cvlt_copy_stat_info(struct iatt *buf, cs_size_xattr_t *xattrs) +{ + /* + * If the file was archived then the reported size will not be a + * correct one. We need to fix this. + */ + if (buf && xattrs) { + buf->ia_size = xattrs->size; + buf->ia_blksize = xattrs->blksize; + buf->ia_blocks = xattrs->blocks; + } + + return; +} + +static void +cvlt_readv_complete(archstore_desc_t *desc, app_callback_info_t *cbkinfo, + void *cookie, int64_t op_ret, int32_t op_errno) +{ + struct iovec iov; + xlator_t *this = NULL; + struct iatt postbuf = { + 0, + }; + call_frame_t *frame = NULL; + cvlt_request_t *req = (cvlt_request_t *)cookie; + cs_local_t *local = NULL; + cs_private_t *cspriv = NULL; + archive_t *priv = NULL; + + frame = req->frame; + this = frame->this; + local = frame->local; + + cspriv = this->private; + priv = (archive_t *)cspriv->stores->config; + + if (strcmp(priv->trailer, CVLT_TRAILER)) { + op_ret = -1; + op_errno = EINVAL; + goto out; + } + + gf_msg_debug(plugin, 0, + " Read callback invoked offset:%" PRIu64 "bytes: %" PRIu64 + " op : %d ret : %" PRId64 " errno : %d", + req->offset, req->bytes, req->op_type, op_ret, op_errno); + + if (op_ret < 0) { + goto out; + } + + req->iobref = iobref_new(); + if (!req->iobref) { + op_ret = -1; + op_errno = ENOMEM; + goto out; + } + + iobref_add(req->iobref, req->iobuf); + iov.iov_base = iobuf_ptr(req->iobuf); + iov.iov_len = op_ret; + + cvlt_copy_stat_info(&postbuf, &(req->szxattr)); + + /* + * Hack to notify higher layers of EOF. + */ + if (!postbuf.ia_size || (req->offset + iov.iov_len >= postbuf.ia_size)) { + gf_msg_debug(plugin, 0, " signalling end-of-file for uuid=%s", + uuid_utoa(req->file_info.uuid)); + op_errno = ENOENT; + } + +out: + + STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, &iov, 1, &postbuf, + req->iobref, local->xattr_rsp); + + cvlt_free_req(priv, req); + + return; +} + +static void +cvlt_download_complete(archstore_desc_t *store, app_callback_info_t *cbk_info, + void *cookie, int64_t ret, int errcode) +{ + cvlt_request_t *req = (cvlt_request_t *)cookie; + + gf_msg_debug(plugin, 0, + " Download callback invoked ret : %" PRId64 " errno : %d", + ret, errcode); + + req->op_ret = ret; + req->op_errno = errcode; + sem_post(&(req->sem)); + + return; +} + +void * +cvlt_init(xlator_t *this) +{ + int ret = 0; + archive_t *priv = NULL; + + if (!this->children || this->children->next) { + gf_msg(plugin, GF_LOG_ERROR, ENOMEM, 0, + "should have exactly one child"); + ret = -1; + goto out; + } + + if (!this->parents) { + gf_msg(plugin, GF_LOG_ERROR, ENOMEM, 0, + "dangling volume. check volfile"); + ret = -1; + goto out; + } + + priv = GF_CALLOC(1, sizeof(archive_t), gf_libcvlt_mt_cvlt_private_t); + if (!priv) { + ret = -1; + goto out; + } + + priv->trailer = CVLT_TRAILER; + if (cvlt_init_xlator(this, priv, num_req, num_iatt)) { + gf_msg(plugin, GF_LOG_ERROR, ENOMEM, 0, "xlator init failed"); + ret = -1; + goto out; + } + + GF_OPTION_INIT("cloudsync-store-id", priv->store_id, str, out); + GF_OPTION_INIT("cloudsync-product-id", priv->product_id, str, out); + + gf_msg(plugin, GF_LOG_INFO, 0, 0, + "store id is : %s " + "product id is : %s.", + priv->store_id, priv->product_id); +out: + if (ret == -1) { + cvlt_term_xlator(priv); + return (NULL); + } + return priv; +} + +int +cvlt_reconfigure(xlator_t *this, dict_t *options) +{ + cs_private_t *cspriv = NULL; + archive_t *priv = NULL; + + cspriv = this->private; + priv = (archive_t *)cspriv->stores->config; + + if (strcmp(priv->trailer, CVLT_TRAILER)) + goto out; + + GF_OPTION_RECONF("cloudsync-store-id", priv->store_id, options, str, out); + + GF_OPTION_RECONF("cloudsync-product-id", priv->product_id, options, str, + out); + gf_msg_debug(plugin, 0, + "store id is : %s " + "product id is : %s.", + priv->store_id, priv->product_id); + return 0; +out: + return -1; +} + +void +cvlt_fini(void *config) +{ + archive_t *priv = NULL; + + priv = (archive_t *)config; + + if (strcmp(priv->trailer, CVLT_TRAILER)) + return; + + cvlt_term_xlator(priv); + gf_msg(plugin, GF_LOG_INFO, 0, CVLT_FREE, " released xlator resources"); + return; +} + +int +cvlt_download(call_frame_t *frame, void *config) +{ + archive_t *parch = NULL; + cs_local_t *local = frame->local; + cs_loc_xattr_t *locxattr = local->xattrinfo.lxattr; + cvlt_request_t *req = NULL; + archstore_info_t dest_storeinfo; + archstore_fileinfo_t dest_fileinfo; + int32_t op_ret, op_errno; + + parch = (archive_t *)config; + + if (strcmp(parch->trailer, CVLT_TRAILER)) { + op_ret = -1; + op_errno = EINVAL; + goto err; + } + + gf_msg_debug(plugin, 0, " download invoked for uuid = %s gfid=%s ", + locxattr->uuid, uuid_utoa(locxattr->gfid)); + + if (!(parch->fops.restore)) { + op_errno = ELIBBAD; + goto err; + } + + /* + * Download needs to be processed. Allocate a request. + */ + req = cvlt_alloc_req(parch); + + if (!req) { + gf_msg(plugin, GF_LOG_ERROR, ENOMEM, CVLT_RESOURCE_ALLOCATION_FAILED, + " failed to allocated request for gfid=%s", + uuid_utoa(locxattr->gfid)); + op_errno = ENOMEM; + goto err; + } + + /* + * Initialize the request object. + */ + req->op_type = CVLT_RESTORE_OP; + req->frame = frame; + + /* + * The file is currently residing inside a data management store. + * To restore the file contents we need to provide the information + * about data management store. + */ + op_ret = cvlt_init_store_info(parch, &(req->store_info)); + if (op_ret < 0) { + gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED, + " failed to extract store info for gfid=%s", + uuid_utoa(locxattr->gfid)); + goto err; + } + + op_ret = cvlt_init_file_info(locxattr, &(req->file_info)); + if (op_ret < 0) { + gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED, + " failed to extract file info for gfid=%s", + uuid_utoa(locxattr->gfid)); + goto err; + } + + /* + * We need to perform in-place restore of the file from data management + * store to gusterfs volume. + */ + op_ret = cvlt_init_gluster_store_info(locxattr, &dest_storeinfo); + if (op_ret < 0) { + gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED, + " failed to extract destination store info for gfid=%s", + uuid_utoa(locxattr->gfid)); + goto err; + } + + op_ret = cvlt_init_gluster_file_info(locxattr, &dest_fileinfo); + if (op_ret < 0) { + gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED, + " failed to extract file info for gfid=%s", + uuid_utoa(locxattr->gfid)); + goto err; + } + + /* + * Submit the restore request. + */ + op_ret = parch->fops.restore(&(parch->descinfo), &(req->store_info), + &(req->file_info), &dest_storeinfo, + &dest_fileinfo, &op_errno, + cvlt_download_complete, req); + if (op_ret < 0) { + gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_RESTORE_FAILED, + " failed to restore file gfid=%s from data management store", + uuid_utoa(locxattr->gfid)); + goto err; + } + + /* + * Wait for the restore to complete. + */ + sem_wait(&(req->sem)); + + if (req->op_ret < 0) { + gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_RESTORE_FAILED, + " restored failed for gfid=%s", uuid_utoa(locxattr->gfid)); + goto err; + } + + if (req) { + cvlt_free_req(parch, req); + } + + return 0; + +err: + + if (req) { + cvlt_free_req(parch, req); + } + + return -1; +} + +int +cvlt_read(call_frame_t *frame, void *config) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + archive_t *parch = NULL; + cvlt_request_t *req = NULL; + struct iovec iov = { + 0, + }; + struct iobref *iobref; + size_t size = 0; + off_t off = 0; + + cs_local_t *local = frame->local; + cs_loc_xattr_t *locxattr = local->xattrinfo.lxattr; + + size = local->xattrinfo.size; + off = local->xattrinfo.offset; + + parch = (archive_t *)config; + + if (strcmp(parch->trailer, CVLT_TRAILER)) { + op_ret = -1; + op_errno = EINVAL; + goto err; + } + + gf_msg_debug(plugin, 0, + " read invoked for gfid = %s offset = %" PRIu64 + " file_size = %" PRIu64, + uuid_utoa(locxattr->gfid), off, local->stbuf.ia_size); + + if (off >= local->stbuf.ia_size) { + /* + * Hack to notify higher layers of EOF. + */ + + op_errno = ENOENT; + op_ret = 0; + + gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_READ_FAILED, + " reporting end-of-file for gfid=%s", uuid_utoa(locxattr->gfid)); + + goto err; + } + + if (!size) { + op_errno = EINVAL; + + gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_READ_FAILED, + " zero size read attempted on gfid=%s", + uuid_utoa(locxattr->gfid)); + goto err; + } + + if (!(parch->fops.read)) { + op_errno = ELIBBAD; + goto err; + } + + /* + * The read request need to be processed. Allocate a request. + */ + req = cvlt_alloc_req(parch); + + if (!req) { + gf_msg(plugin, GF_LOG_ERROR, ENOMEM, CVLT_NO_MEMORY, + " failed to allocated request for gfid=%s", + uuid_utoa(locxattr->gfid)); + op_errno = ENOMEM; + goto err; + } + + req->iobuf = iobuf_get_page_aligned(parch->iobuf_pool, size, ALIGN_SIZE); + if (!req->iobuf) { + op_errno = ENOMEM; + goto err; + } + + /* + * Initialize the request object. + */ + req->op_type = CVLT_READ_OP; + req->offset = off; + req->bytes = size; + req->frame = frame; + req->szxattr.size = local->stbuf.ia_size; + req->szxattr.blocks = local->stbuf.ia_blocks; + req->szxattr.blksize = local->stbuf.ia_blksize; + + /* + * The file is currently residing inside a data management store. + * To read the file contents we need to provide the information + * about data management store. + */ + op_ret = cvlt_init_store_info(parch, &(req->store_info)); + if (op_ret < 0) { + gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED, + " failed to extract store info for gfid=%s" + " offset=%" PRIu64 " size=%" GF_PRI_SIZET + ", " + " buf=%p", + uuid_utoa(locxattr->gfid), off, size, req->iobuf->ptr); + goto err; + } + + op_ret = cvlt_init_file_info(locxattr, &(req->file_info)); + if (op_ret < 0) { + gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED, + " failed to extract file info for gfid=%s" + " offset=%" PRIu64 " size=%" GF_PRI_SIZET + ", " + " buf=%p", + uuid_utoa(locxattr->gfid), off, size, req->iobuf->ptr); + goto err; + } + + /* + * Submit the read request. + */ + op_ret = parch->fops.read(&(parch->descinfo), &(req->store_info), + &(req->file_info), off, req->iobuf->ptr, size, + &op_errno, cvlt_readv_complete, req); + + if (op_ret < 0) { + gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED, + " read failed on gfid=%s" + " offset=%" PRIu64 " size=%" GF_PRI_SIZET + ", " + " buf=%p", + uuid_utoa(locxattr->gfid), off, size, req->iobuf->ptr); + goto err; + } + + return 0; + +err: + + iobref = iobref_new(); + gf_msg_debug(plugin, 0, " read unwinding stack op_ret = %d, op_errno = %d", + op_ret, op_errno); + + STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, &iov, 1, + &(local->stbuf), iobref, local->xattr_rsp); + + if (iobref) { + iobref_unref(iobref); + } + + if (req) { + cvlt_free_req(parch, req); + } + + return 0; +} diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.h b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.h new file mode 100644 index 00000000000..c45ac948f6c --- /dev/null +++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.h @@ -0,0 +1,84 @@ +/* + Copyright (c) 2018 Commvault Systems, Inc. <http://www.commvault.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _LIBCVLT_H +#define _LIBCVLT_H + +#include <semaphore.h> +#include <glusterfs/xlator.h> +#include <glusterfs/glusterfs.h> +#include <glusterfs/call-stub.h> +#include <glusterfs/syncop.h> +#include <glusterfs/compat-errno.h> +#include "cloudsync-common.h" +#include "libcvlt-mem-types.h" +#include "archivestore.h" + +enum _cvlt_op { + CVLT_READ_OP = 1, + CVLT_WRITE_OP = 2, + CVLT_RESTORE_OP = 3, + CVLT_ARCHIVE_OP = 4, + CVLT_LOOKUP_OP = 5, + CVLT_XATTR_OP = 6, + CVLT_STAT_OP = 7, + CVLT_FSTAT_op = 8, + CVLT_UNDEF_OP = 127 +}; +typedef enum _cvlt_op cvlt_op_t; + +struct _archive; +struct _cvlt_request { + uint64_t offset; + uint64_t bytes; + struct iobuf *iobuf; + struct iobref *iobref; + call_frame_t *frame; + cvlt_op_t op_type; + int32_t op_ret; + int32_t op_errno; + xlator_t *this; + sem_t sem; + archstore_info_t store_info; + archstore_fileinfo_t file_info; + cs_size_xattr_t szxattr; +}; +typedef struct _cvlt_request cvlt_request_t; + +struct _archive { + gf_lock_t lock; /* lock for controlling access */ + xlator_t *xl; /* xlator */ + void *handle; /* handle returned from dlopen */ + int32_t nreqs; /* num requests active */ + struct mem_pool *req_pool; /* pool for requests */ + struct iobuf_pool *iobuf_pool; /* iobuff pool */ + archstore_desc_t descinfo; /* Archive store descriptor info */ + archstore_methods_t fops; /* function pointers */ + char *product_id; + char *store_id; + char *trailer; +}; +typedef struct _archive archive_t; + +void * +cvlt_init(xlator_t *); + +int +cvlt_reconfigure(xlator_t *, dict_t *); + +void +cvlt_fini(void *); + +int +cvlt_download(call_frame_t *, void *); + +int +cvlt_read(call_frame_t *, void *); + +#endif diff --git a/xlators/features/cloudsync/src/cloudsync.c b/xlators/features/cloudsync/src/cloudsync.c new file mode 100644 index 00000000000..7f0b9e563b8 --- /dev/null +++ b/xlators/features/cloudsync/src/cloudsync.c @@ -0,0 +1,2076 @@ +/* + * Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com> + * This file is part of GlusterFS. + * + * This file is licensed to you under your choice of the GNU Lesser + * General Public License, version 3 or any later version (LGPLv3 or + * later), or the GNU General Public License, version 2 (GPLv2), in all + * cases as published by the Free Software Foundation. + */ + +#include <glusterfs/glusterfs.h> +#include <glusterfs/xlator.h> +#include <glusterfs/defaults.h> +#include "cloudsync.h" +#include "cloudsync-common.h" +#include <glusterfs/call-stub.h> +#include "cloudsync-autogen-fops.h" + +#include <string.h> +#include <dlfcn.h> + +static void +cs_cleanup_private(cs_private_t *priv) +{ + if (priv) { + if (priv->stores) { + priv->stores->fini(priv->stores->config); + GF_FREE(priv->stores); + } + + pthread_spin_destroy(&priv->lock); + GF_FREE(priv); + } + + return; +} + +static struct cs_plugin plugins[] = { + {.name = "cloudsyncs3", + .library = "cloudsyncs3.so", + .description = "cloudsync s3 store."}, +#if defined(__linux__) + {.name = "cvlt", + .library = "cloudsynccvlt.so", + .description = "Commvault content store."}, +#endif + {.name = NULL}, +}; + +int +cs_init(xlator_t *this) +{ + cs_private_t *priv = NULL; + gf_boolean_t per_vol = _gf_false; + int ret = 0; + char *libpath = NULL; + store_methods_t *store_methods = NULL; + void *handle = NULL; + char *temp_str = NULL; + int index = 0; + char *libname = NULL; + + priv = GF_CALLOC(1, sizeof(*priv), gf_cs_mt_cs_private_t); + if (!priv) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, "insufficient memory"); + goto out; + } + + priv->this = this; + + this->local_pool = mem_pool_new(cs_local_t, 512); + if (!this->local_pool) { + gf_msg(this->name, GF_LOG_ERROR, 0, ENOMEM, "initialisation failed."); + ret = -1; + goto out; + } + + this->private = priv; + + GF_OPTION_INIT("cloudsync-remote-read", priv->remote_read, bool, out); + + /* temp workaround. Should be configurable through glusterd*/ + per_vol = _gf_true; + + if (per_vol) { + if (dict_get_str_sizen(this->options, "cloudsync-storetype", + &temp_str) == 0) { + for (index = 0; plugins[index].name; index++) { + if (!strcmp(temp_str, plugins[index].name)) { + libname = plugins[index].library; + break; + } + } + } else { + ret = 0; + } + + if (!libname) { + gf_msg(this->name, GF_LOG_WARNING, 0, 0, "no plugin enabled"); + ret = 0; + goto out; + } + + ret = gf_asprintf(&libpath, "%s/%s", CS_PLUGINDIR, libname); + if (ret == -1) { + goto out; + } + + handle = dlopen(libpath, RTLD_NOW); + if (!handle) { + gf_msg(this->name, GF_LOG_WARNING, 0, 0, + "could not " + "load the required library. %s", + dlerror()); + ret = 0; + goto out; + } else { + gf_msg(this->name, GF_LOG_INFO, 0, 0, + "loading library:%s successful", libname); + } + + priv->stores = GF_CALLOC(1, sizeof(struct cs_remote_stores), + gf_cs_mt_cs_remote_stores_t); + if (!priv->stores) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "Could not " + "allocate memory for priv->stores"); + ret = -1; + goto out; + } + + (void)dlerror(); /* clear out previous error string */ + + /* load library methods */ + store_methods = (store_methods_t *)dlsym(handle, "store_ops"); + if (!store_methods) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, "null store_methods %s", + dlerror()); + ret = -1; + goto out; + } + + (void)dlerror(); + + if (priv->remote_read) { + priv->stores->rdfop = store_methods->fop_remote_read; + if (!priv->stores->rdfop) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "failed to get" + " read fop %s", + dlerror()); + ret = -1; + goto out; + } + } + + priv->stores->dlfop = store_methods->fop_download; + if (!priv->stores->dlfop) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "failed to get" + " download fop %s", + dlerror()); + ret = -1; + goto out; + } + + (void)dlerror(); + priv->stores->init = store_methods->fop_init; + if (!priv->stores->init) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "failed to get" + " init fop %s", + dlerror()); + ret = -1; + goto out; + } + + (void)dlerror(); + priv->stores->reconfigure = store_methods->fop_reconfigure; + if (!priv->stores->reconfigure) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "failed to get" + " reconfigure fop %s", + dlerror()); + ret = -1; + goto out; + } + + priv->stores->handle = handle; + + priv->stores->config = (void *)((priv->stores->init)(this)); + if (!priv->stores->config) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, "null config"); + ret = -1; + goto out; + } + } + + ret = 0; + +out: + if (ret == -1) { + if (this->local_pool) { + mem_pool_destroy(this->local_pool); + this->local_pool = NULL; + } + + cs_cleanup_private(priv); + + if (handle) { + dlclose(handle); + } + } + + GF_FREE(libpath); + + return ret; +} + +int +cs_forget(xlator_t *this, inode_t *inode) +{ + uint64_t ctx_int = 0; + cs_inode_ctx_t *ctx = NULL; + + inode_ctx_del(inode, this, &ctx_int); + if (!ctx_int) + return 0; + + ctx = (cs_inode_ctx_t *)(uintptr_t)ctx_int; + + GF_FREE(ctx); + return 0; +} + +void +cs_fini(xlator_t *this) +{ + cs_private_t *priv = NULL; + priv = this->private; + + cs_cleanup_private(priv); +} + +int +cs_reconfigure(xlator_t *this, dict_t *options) +{ + cs_private_t *priv = NULL; + int ret = 0; + + priv = this->private; + if (!priv) { + ret = -1; + goto out; + } + + GF_OPTION_RECONF("cloudsync-remote-read", priv->remote_read, options, bool, + out); + + /* needed only for per volume configuration*/ + ret = priv->stores->reconfigure(this, options); + +out: + return ret; +} + +int32_t +cs_mem_acct_init(xlator_t *this) +{ + int ret = -1; + + GF_VALIDATE_OR_GOTO("cloudsync", this, out); + + ret = xlator_mem_acct_init(this, gf_cs_mt_end + 1); + + if (ret != 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, "Memory accounting init failed"); + return ret; + } +out: + return ret; +} + +int32_t +cs_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *xdata) +{ + int ret = 0; + int op_errno = ENOMEM; + + if (!xdata) { + xdata = dict_new(); + if (!xdata) { + gf_msg(this->name, GF_LOG_ERROR, 0, ENOMEM, + "failed to create " + "dict"); + goto err; + } + } + + ret = dict_set_uint32(xdata, GF_CS_OBJECT_STATUS, 1); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "dict_set failed key:" + " %s", + GF_CS_OBJECT_STATUS); + goto err; + } + + STACK_WIND(frame, default_readdirp_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdirp, fd, size, off, xdata); + return 0; +err: + STACK_UNWIND_STRICT(readdirp, frame, -1, op_errno, NULL, NULL); + return 0; +} + +int32_t +cs_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + cs_local_t *local = NULL; + int ret = 0; + uint64_t val = 0; + + local = frame->local; + + local->call_cnt++; + + if (op_ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, "truncate failed"); + ret = dict_get_uint64(xdata, GF_CS_OBJECT_STATUS, &val); + if (ret == 0) { + if (val == GF_CS_ERROR) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "could not get file state, unwinding"); + op_ret = -1; + op_errno = EIO; + goto unwind; + } else { + __cs_inode_ctx_update(this, local->loc.inode, val); + gf_msg(this->name, GF_LOG_INFO, 0, 0, " state = %" PRIu64, val); + + if (local->call_cnt == 1 && + (val == GF_CS_REMOTE || val == GF_CS_DOWNLOADING)) { + gf_msg(this->name, GF_LOG_WARNING, 0, 0, + "will repair and download " + "the file, current state : %" PRIu64, + val); + goto repair; + } else { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "second truncate, Unwinding"); + goto unwind; + } + } + } else { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "file state " + "could not be figured, unwinding"); + goto unwind; + } + } else { + /* successful write => file is local */ + __cs_inode_ctx_update(this, local->loc.inode, GF_CS_LOCAL); + gf_msg(this->name, GF_LOG_INFO, 0, 0, + "state : GF_CS_LOCAL" + ", truncate successful"); + + goto unwind; + } + +repair: + ret = locate_and_execute(frame); + if (ret) { + goto unwind; + } + + return 0; + +unwind: + CS_STACK_UNWIND(truncate, frame, op_ret, op_errno, prebuf, postbuf, xdata); + return 0; +} + +int32_t +cs_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) +{ + cs_local_t *local = NULL; + int ret = 0; + cs_inode_ctx_t *ctx = NULL; + gf_cs_obj_state state = -1; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(loc, err); + + local = cs_local_init(this, frame, loc, NULL, GF_FOP_TRUNCATE); + if (!local) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, "local init failed"); + goto err; + } + + __cs_inode_ctx_get(this, loc->inode, &ctx); + + if (ctx) + state = __cs_get_file_state(loc->inode, ctx); + else + state = GF_CS_LOCAL; + + local->xattr_req = xdata ? dict_ref(xdata) : (xdata = dict_new()); + + ret = dict_set_uint32(local->xattr_req, GF_CS_OBJECT_STATUS, 1); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "dict_set failed key:" + " %s", + GF_CS_OBJECT_STATUS); + goto err; + } + + local->stub = fop_truncate_stub(frame, cs_resume_truncate, loc, offset, + xdata); + if (!local->stub) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, "insufficient memory"); + goto err; + } + + if (state == GF_CS_LOCAL) { + STACK_WIND(frame, cs_truncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); + + } else { + local->call_cnt++; + ret = locate_and_execute(frame); + if (ret) { + goto err; + } + } + + return 0; +err: + CS_STACK_UNWIND(truncate, frame, -1, ENOMEM, NULL, NULL, NULL); + return 0; +} + +int32_t +cs_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct statvfs *buf, dict_t *xdata) +{ + STACK_UNWIND_STRICT(statfs, frame, op_ret, op_errno, buf, xdata); + return 0; +} + +int32_t +cs_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +{ + STACK_WIND(frame, cs_statfs_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->statfs, loc, xdata); + return 0; +} + +int32_t +cs_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata) +{ + STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, dict, xdata); + return 0; +} + +int32_t +cs_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name, + dict_t *xattr_req) +{ + STACK_WIND(frame, cs_getxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->getxattr, loc, name, xattr_req); + return 0; +} + +int32_t +cs_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + cs_local_t *local = NULL; + + local = frame->local; + + if (local->locked) + cs_inodelk_unlock(frame); + + CS_STACK_UNWIND(setxattr, frame, op_ret, op_errno, xdata); + + return 0; +} + +int32_t +cs_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + int32_t flags, dict_t *xdata) +{ + data_t *tmp = NULL; + cs_local_t *local = NULL; + int ret = 0; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + + local = cs_local_init(this, frame, loc, NULL, GF_FOP_SETXATTR); + if (!local) { + ret = -1; + goto err; + } + + local->xattr_req = xdata ? dict_ref(xdata) : (xdata = dict_new()); + + tmp = dict_get_sizen(dict, GF_CS_OBJECT_UPLOAD_COMPLETE); + if (tmp) { + /* Value of key should be the atime */ + local->stub = fop_setxattr_stub(frame, cs_resume_setxattr, loc, dict, + flags, xdata); + + if (!local->stub) + goto err; + + ret = locate_and_execute(frame); + if (ret) { + goto err; + } + + return 0; + } + + STACK_WIND(frame, cs_setxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata); + return 0; +err: + CS_STACK_UNWIND(setxattr, frame, -1, errno, NULL); + return 0; +} + +int32_t +cs_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata) +{ + STACK_UNWIND_STRICT(fgetxattr, frame, op_ret, op_errno, dict, xdata); + return 0; +} + +int32_t +cs_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, + dict_t *xdata) +{ + STACK_WIND(frame, cs_fgetxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata); + return 0; +} + +int32_t +cs_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, xdata); + return 0; +} + +int32_t +cs_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, + int32_t flags, dict_t *xdata) +{ + STACK_WIND(frame, cs_fsetxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata); + return 0; +} + +int32_t +cs_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) +{ + STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, preparent, postparent, + xdata); + return 0; +} + +int32_t +cs_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + dict_t *xattr_req) +{ + cs_local_t *local = NULL; + int ret = 0; + + local = cs_local_init(this, frame, loc, NULL, GF_FOP_UNLINK); + if (!local) + goto err; + + local->xattr_req = xattr_req ? dict_ref(xattr_req) : dict_new(); + + ret = dict_set_uint32(local->xattr_req, GF_CS_OBJECT_STATUS, 1); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "dict_set failed key:" + " %s", + GF_CS_OBJECT_STATUS); + goto err; + } + STACK_WIND(frame, cs_unlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, loc, flags, local->xattr_req); + return 0; +err: + CS_STACK_UNWIND(unlink, frame, -1, errno, NULL, NULL, NULL); + return 0; +} + +int32_t +cs_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, fd_t *fd, dict_t *xdata) +{ + int ret = 0; + uint64_t val = 0; + + if (op_ret == 0) { + ret = dict_get_uint64(xdata, GF_CS_OBJECT_STATUS, &val); + if (!ret) { + ret = __cs_inode_ctx_update(this, fd->inode, val); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, "ctx update failed"); + } + } + } else { + cs_inode_ctx_reset(this, fd->inode); + } + + CS_STACK_UNWIND(open, frame, op_ret, op_errno, fd, xdata); + return 0; +} + +int32_t +cs_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + fd_t *fd, dict_t *xattr_req) +{ + cs_local_t *local = NULL; + int ret = 0; + + local = cs_local_init(this, frame, NULL, fd, GF_FOP_OPEN); + if (!local) + goto err; + + local->xattr_req = xattr_req ? dict_ref(xattr_req) : dict_new(); + + ret = dict_set_uint32(local->xattr_req, GF_CS_OBJECT_STATUS, 1); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "dict_set failed key:" + " %s", + GF_CS_OBJECT_STATUS); + goto err; + } + + STACK_WIND(frame, cs_open_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->open, loc, flags, fd, local->xattr_req); + return 0; +err: + CS_STACK_UNWIND(open, frame, -1, errno, NULL, NULL); + return 0; +} + +int32_t +cs_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iatt *buf, dict_t *xdata) +{ + int ret = 0; + uint64_t val = 0; + fd_t *fd = NULL; + cs_local_t *local = NULL; + + local = frame->local; + + fd = local->fd; + + if (op_ret == 0) { + ret = dict_get_uint64(xdata, GF_CS_OBJECT_STATUS, &val); + if (!ret) { + gf_msg_debug(this->name, 0, "state %" PRIu64, val); + ret = __cs_inode_ctx_update(this, fd->inode, val); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, "ctx update failed"); + } + } + } else { + cs_inode_ctx_reset(this, fd->inode); + } + + CS_STACK_UNWIND(fstat, frame, op_ret, op_errno, buf, xdata); + + return 0; +} + +int32_t +cs_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xattr_req) +{ + cs_local_t *local = NULL; + int ret = 0; + + local = cs_local_init(this, frame, NULL, fd, GF_FOP_FSTAT); + if (!local) + goto err; + + if (fd->inode->ia_type == IA_IFDIR) + goto wind; + + local->xattr_req = xattr_req ? dict_ref(xattr_req) : dict_new(); + + ret = dict_set_uint32(local->xattr_req, GF_CS_OBJECT_STATUS, 1); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "dict_set failed key:" + " %s", + GF_CS_OBJECT_STATUS); + goto err; + } + +wind: + STACK_WIND(frame, cs_fstat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, fd, local->xattr_req); + return 0; +err: + CS_STACK_UNWIND(fstat, frame, -1, errno, NULL, NULL); + return 0; +} + +cs_local_t * +cs_local_init(xlator_t *this, call_frame_t *frame, loc_t *loc, fd_t *fd, + glusterfs_fop_t fop) +{ + cs_local_t *local = NULL; + int ret = 0; + + local = mem_get0(this->local_pool); + if (!local) + goto out; + + if (loc) { + ret = loc_copy(&local->loc, loc); + if (ret) + goto out; + } + + if (fd) { + local->fd = fd_ref(fd); + } + + local->op_ret = -1; + local->op_errno = EUCLEAN; + local->fop = fop; + local->dloffset = 0; + frame->local = local; + local->locked = _gf_false; + local->call_cnt = 0; +out: + if (ret) { + if (local) + mem_put(local); + local = NULL; + } + + return local; +} + +call_frame_t * +cs_lock_frame(call_frame_t *parent_frame) +{ + call_frame_t *lock_frame = NULL; + + lock_frame = copy_frame(parent_frame); + + if (lock_frame == NULL) + goto out; + + set_lk_owner_from_ptr(&lock_frame->root->lk_owner, parent_frame->root); + +out: + return lock_frame; +} + +void +cs_lock_wipe(call_frame_t *lock_frame) +{ + CS_STACK_DESTROY(lock_frame); +} + +int32_t +cs_inodelk_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + cs_lock_wipe(frame); + + return 0; +} + +int +cs_inodelk_unlock(call_frame_t *main_frame) +{ + xlator_t *this = NULL; + struct gf_flock flock = { + 0, + }; + call_frame_t *lock_frame = NULL; + cs_local_t *lock_local = NULL; + cs_local_t *main_local = NULL; + int ret = 0; + + this = main_frame->this; + main_local = main_frame->local; + + lock_frame = cs_lock_frame(main_frame); + if (!lock_frame) + goto out; + + lock_local = cs_local_init(this, lock_frame, NULL, NULL, 0); + if (!lock_local) + goto out; + + ret = cs_build_loc(&lock_local->loc, main_frame); + if (ret) { + goto out; + } + + flock.l_type = F_UNLCK; + + main_local->locked = _gf_false; + + STACK_WIND(lock_frame, cs_inodelk_unlock_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->inodelk, CS_LOCK_DOMAIN, + &lock_local->loc, F_SETLKW, &flock, NULL); + + return 0; + +out: + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "Stale lock would be found on" + " server"); + + if (lock_frame) + cs_lock_wipe(lock_frame); + + return 0; +} + +int +cs_download_task(void *arg) +{ + call_frame_t *frame = NULL; + xlator_t *this = NULL; + cs_private_t *priv = NULL; + int ret = -1; + char *sign_req = NULL; + fd_t *fd = NULL; + cs_local_t *local = NULL; + dict_t *dict = NULL; + + frame = (call_frame_t *)arg; + + this = frame->this; + + priv = this->private; + + if (!priv->stores) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "No remote store " + "plugins found"); + ret = -1; + goto out; + } + + local = frame->local; + + if (local->fd) + fd = fd_anonymous(local->fd->inode); + else + fd = fd_anonymous(local->loc.inode); + + if (!fd) { + gf_msg("CS", GF_LOG_ERROR, 0, 0, "fd creation failed"); + ret = -1; + goto out; + } + + local->dlfd = fd; + local->dloffset = 0; + + dict = dict_new(); + if (!dict) { + gf_msg(this->name, GF_LOG_ERROR, 0, ENOMEM, + "failed to create " + "dict"); + ret = -1; + goto out; + } + + ret = dict_set_uint32(dict, GF_CS_OBJECT_DOWNLOADING, 1); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, "dict_set failed"); + ret = -1; + goto out; + } + + ret = syncop_fsetxattr(this, local->fd, dict, 0, NULL, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "fsetxattr failed " + "key %s", + GF_CS_OBJECT_DOWNLOADING); + ret = -1; + goto out; + } + /*this calling method is for per volume setting */ + ret = priv->stores->dlfop(frame, priv->stores->config); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "download failed" + ", remotepath: %s", + local->remotepath); + + /*using dlfd as it is anonymous and have RDWR flag*/ + ret = syncop_ftruncate(FIRST_CHILD(this), local->dlfd, 0, NULL, NULL, + NULL, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, -ret, "ftruncate failed"); + } else { + gf_msg_debug(this->name, 0, "ftruncate succeed"); + } + + ret = -1; + goto out; + } else { + gf_msg(this->name, GF_LOG_INFO, 0, 0, + "download success, path" + " : %s", + local->remotepath); + + ret = syncop_fremovexattr(this, local->fd, GF_CS_OBJECT_REMOTE, NULL, + NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, -ret, + "removexattr failed, remotexattr"); + ret = -1; + goto out; + } else { + gf_msg_debug(this->name, 0, + "fremovexattr success, " + "path : %s", + local->remotepath); + } + + ret = syncop_fremovexattr(this, local->fd, GF_CS_OBJECT_DOWNLOADING, + NULL, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, -ret, + "removexattr failed, downloading xattr, path %s", + local->remotepath); + ret = -1; + goto out; + } else { + gf_msg_debug(this->name, 0, + "fremovexattr success" + " path %s", + local->remotepath); + } + } + +out: + GF_FREE(sign_req); + + if (dict) + dict_unref(dict); + + if (fd) { + fd_unref(fd); + local->dlfd = NULL; + } + + return ret; +} + +int +cs_download(call_frame_t *frame) +{ + int ret = 0; + cs_local_t *local = NULL; + xlator_t *this = NULL; + + local = frame->local; + this = frame->this; + + if (!local->remotepath) { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "remote path not" + " available. Check posix logs to resolve"); + goto out; + } + + ret = cs_download_task((void *)frame); +out: + return ret; +} + +int +cs_set_xattr_req(call_frame_t *frame) +{ + cs_local_t *local = NULL; + GF_UNUSED int ret = 0; + + local = frame->local; + + /* When remote reads are performed (i.e. reads on remote store), + * there needs to be a way to associate a file on gluster volume + * with its correspnding file on the remote store. In order to do + * that, a unique key can be maintained as an xattr + * (GF_CS_XATTR_ARCHIVE_UUID)on the stub file on gluster bricks. + * This xattr should be provided to the plugin to + * perform the read fop on the correct file. This assumes that the file + * hierarchy and name need not be the same on remote store as that of + * the gluster volume. + */ + ret = dict_set_sizen_str_sizen(local->xattr_req, GF_CS_XATTR_ARCHIVE_UUID, + "1"); + + return 0; +} + +int +cs_update_xattrs(call_frame_t *frame, dict_t *xdata) +{ + cs_local_t *local = NULL; + xlator_t *this = NULL; + int size = -1; + GF_UNUSED int ret = 0; + + local = frame->local; + this = frame->this; + + local->xattrinfo.lxattr = GF_CALLOC(1, sizeof(cs_loc_xattr_t), + gf_cs_mt_cs_lxattr_t); + if (!local->xattrinfo.lxattr) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto err; + } + + gf_uuid_copy(local->xattrinfo.lxattr->gfid, local->loc.gfid); + + if (local->remotepath) { + local->xattrinfo.lxattr->file_path = gf_strdup(local->remotepath); + if (!local->xattrinfo.lxattr->file_path) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto err; + } + } + + ret = dict_get_gfuuid(xdata, GF_CS_XATTR_ARCHIVE_UUID, + &(local->xattrinfo.lxattr->uuid)); + + if (ret) { + gf_uuid_clear(local->xattrinfo.lxattr->uuid); + } + size = strlen(this->name) - strlen("-cloudsync") + 1; + local->xattrinfo.lxattr->volname = GF_CALLOC(1, size, gf_common_mt_char); + if (!local->xattrinfo.lxattr->volname) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto err; + } + strncpy(local->xattrinfo.lxattr->volname, this->name, size - 1); + local->xattrinfo.lxattr->volname[size - 1] = '\0'; + + return 0; +err: + cs_xattrinfo_wipe(local); + return -1; +} + +int +cs_serve_readv(call_frame_t *frame, off_t offset, size_t size, uint32_t flags) +{ + xlator_t *this = NULL; + cs_private_t *priv = NULL; + int ret = -1; + fd_t *fd = NULL; + cs_local_t *local = NULL; + + local = frame->local; + this = frame->this; + priv = this->private; + + if (!local->remotepath) { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "remote path not" + " available. Check posix logs to resolve"); + goto out; + } + + if (!priv->stores) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "No remote store " + "plugins found"); + ret = -1; + goto out; + } + + if (local->fd) { + fd = fd_anonymous(local->fd->inode); + } else { + fd = fd_anonymous(local->loc.inode); + } + + local->xattrinfo.size = size; + local->xattrinfo.offset = offset; + local->xattrinfo.flags = flags; + + if (!fd) { + gf_msg("CS", GF_LOG_ERROR, 0, 0, "fd creation failed"); + ret = -1; + goto out; + } + + local->dlfd = fd; + local->dloffset = offset; + + /*this calling method is for per volume setting */ + ret = priv->stores->rdfop(frame, priv->stores->config); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "read failed" + ", remotepath: %s", + local->remotepath); + ret = -1; + goto out; + } else { + gf_msg(this->name, GF_LOG_INFO, 0, 0, + "read success, path" + " : %s", + local->remotepath); + } + +out: + if (fd) { + fd_unref(fd); + local->dlfd = NULL; + } + return ret; +} + +int32_t +cs_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iovec *vector, int32_t count, + struct iatt *stbuf, struct iobref *iobref, dict_t *xdata) +{ + cs_local_t *local = NULL; + int ret = 0; + uint64_t val = 0; + fd_t *fd = NULL; + + local = frame->local; + fd = local->fd; + + local->call_cnt++; + + if (op_ret == -1) { + ret = dict_get_uint64(xdata, GF_CS_OBJECT_STATUS, &val); + if (ret == 0) { + if (val == GF_CS_ERROR) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "could not get file state, unwinding"); + op_ret = -1; + op_errno = EIO; + goto unwind; + } else { + __cs_inode_ctx_update(this, fd->inode, val); + gf_msg(this->name, GF_LOG_INFO, 0, 0, " state = %" PRIu64, val); + + if (local->call_cnt == 1 && + (val == GF_CS_REMOTE || val == GF_CS_DOWNLOADING)) { + gf_msg(this->name, GF_LOG_INFO, 0, 0, + " will read from remote : %" PRIu64, val); + goto repair; + } else { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "second readv, Unwinding"); + goto unwind; + } + } + } else { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "file state " + "could not be figured, unwinding"); + goto unwind; + } + } else { + /* successful readv => file is local */ + __cs_inode_ctx_update(this, fd->inode, GF_CS_LOCAL); + gf_msg(this->name, GF_LOG_INFO, 0, 0, + "state : GF_CS_LOCAL" + ", readv successful"); + + goto unwind; + } + +repair: + ret = locate_and_execute(frame); + if (ret) { + goto unwind; + } + + return 0; + +unwind: + CS_STACK_UNWIND(readv, frame, op_ret, op_errno, vector, count, stbuf, + iobref, xdata); + + return 0; +} + +int32_t +cs_resume_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) +{ + int ret = 0; + + ret = cs_resume_postprocess(this, frame, fd->inode); + if (ret) { + goto unwind; + } + + cs_inodelk_unlock(frame); + + STACK_WIND(frame, cs_readv_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata); + + return 0; + +unwind: + cs_inodelk_unlock(frame); + + cs_common_cbk(frame); + + return 0; +} + +int32_t +cs_resume_remote_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, + size_t size, off_t offset, uint32_t flags, dict_t *xdata) +{ + int ret = 0; + cs_local_t *local = NULL; + gf_cs_obj_state state = -1; + cs_inode_ctx_t *ctx = NULL; + + cs_inodelk_unlock(frame); + + local = frame->local; + if (!local) { + ret = -1; + goto unwind; + } + + __cs_inode_ctx_get(this, fd->inode, &ctx); + + state = __cs_get_file_state(fd->inode, ctx); + if (state == GF_CS_ERROR) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "status is GF_CS_ERROR." + " Aborting readv"); + local->op_ret = -1; + local->op_errno = EREMOTE; + ret = -1; + goto unwind; + } + + /* Serve readv from remote store only if it is remote. */ + gf_msg_debug(this->name, 0, "status of file %s is %d", + local->remotepath ? local->remotepath : "", state); + + /* We will reach this condition if local inode ctx had REMOTE + * state when the control was in cs_readv but after stat + * we got an updated state saying that the file is LOCAL. + */ + if (state == GF_CS_LOCAL) { + STACK_WIND(frame, cs_readv_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, + xdata); + } else if (state == GF_CS_REMOTE) { + ret = cs_resume_remote_readv_postprocess(this, frame, fd->inode, offset, + size, flags); + /* Failed to submit the remote readv fop to plugin */ + if (ret) { + local->op_ret = -1; + local->op_errno = EREMOTE; + goto unwind; + } + /* When the file is in any other intermediate state, + * we should not perform remote reads. + */ + } else { + local->op_ret = -1; + local->op_errno = EINVAL; + goto unwind; + } + + return 0; + +unwind: + cs_common_cbk(frame); + + return 0; +} + +int32_t +cs_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) +{ + int op_errno = ENOMEM; + cs_local_t *local = NULL; + int ret = 0; + cs_inode_ctx_t *ctx = NULL; + gf_cs_obj_state state = -1; + cs_private_t *priv = NULL; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(fd, err); + + priv = this->private; + + local = cs_local_init(this, frame, NULL, fd, GF_FOP_READ); + if (!local) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, "local init failed"); + goto err; + } + + __cs_inode_ctx_get(this, fd->inode, &ctx); + + if (ctx) + state = __cs_get_file_state(fd->inode, ctx); + else + state = GF_CS_LOCAL; + + local->xattr_req = xdata ? dict_ref(xdata) : (xdata = dict_new()); + + ret = dict_set_uint32(local->xattr_req, GF_CS_OBJECT_STATUS, 1); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "dict_set failed key:" + " %s", + GF_CS_OBJECT_STATUS); + goto err; + } + + if (priv->remote_read) { + local->stub = fop_readv_stub(frame, cs_resume_remote_readv, fd, size, + offset, flags, xdata); + } else { + local->stub = fop_readv_stub(frame, cs_resume_readv, fd, size, offset, + flags, xdata); + } + if (!local->stub) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, "insufficient memory"); + goto err; + } + + if (state == GF_CS_LOCAL) { + STACK_WIND(frame, cs_readv_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, + xdata); + } else { + local->call_cnt++; + ret = locate_and_execute(frame); + if (ret) { + goto err; + } + } + + return 0; + +err: + CS_STACK_UNWIND(readv, frame, -1, op_errno, NULL, -1, NULL, NULL, NULL); + + return 0; +} + +int +cs_resume_remote_readv_postprocess(xlator_t *this, call_frame_t *frame, + inode_t *inode, off_t offset, size_t size, + uint32_t flags) +{ + int ret = 0; + + ret = cs_serve_readv(frame, offset, size, flags); + + return ret; +} + +int +cs_stat_check_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, struct iatt *stbuf, dict_t *xdata) +{ + cs_local_t *local = NULL; + call_stub_t *stub = NULL; + char *filepath = NULL; + int ret = 0; + inode_t *inode = NULL; + uint64_t val = 0; + + local = frame->local; + + if (op_ret == -1) { + local->op_ret = op_ret; + local->op_errno = op_errno; + gf_msg(this->name, GF_LOG_ERROR, 0, op_errno, "stat check failed"); + goto err; + } else { + if (local->fd) + inode = local->fd->inode; + else + inode = local->loc.inode; + + if (!inode) { + local->op_ret = -1; + local->op_errno = EINVAL; + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "null inode " + "returned"); + goto err; + } + + ret = dict_get_uint64(xdata, GF_CS_OBJECT_STATUS, &val); + if (ret == 0) { + if (val == GF_CS_ERROR) { + cs_inode_ctx_reset(this, inode); + local->op_ret = -1; + local->op_errno = EIO; + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "status = GF_CS_ERROR. failed to get " + " file state"); + goto err; + } else { + ret = __cs_inode_ctx_update(this, inode, val); + gf_msg_debug(this->name, 0, "status : %" PRIu64, val); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, "ctx update failed"); + local->op_ret = -1; + local->op_errno = ENOMEM; + goto err; + } + } + } else { + gf_msg_debug(this->name, 0, "status not found in dict"); + local->op_ret = -1; + local->op_errno = ENOMEM; + goto err; + } + + ret = dict_get_str_sizen(xdata, GF_CS_OBJECT_REMOTE, &filepath); + if (filepath) { + gf_msg_debug(this->name, 0, "filepath returned %s", filepath); + local->remotepath = gf_strdup(filepath); + if (!local->remotepath) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto err; + } + } else { + gf_msg_debug(this->name, 0, "NULL filepath"); + } + + ret = cs_update_xattrs(frame, xdata); + if (ret) + goto err; + + local->op_ret = 0; + local->xattr_rsp = dict_ref(xdata); + memcpy(&local->stbuf, stbuf, sizeof(struct iatt)); + } + + stub = local->stub; + local->stub = NULL; + call_resume(stub); + + return 0; +err: + cs_inodelk_unlock(frame); + + cs_common_cbk(frame); + + return 0; +} + +int +cs_do_stat_check(call_frame_t *main_frame) +{ + cs_local_t *local = NULL; + xlator_t *this = NULL; + int ret = 0; + + local = main_frame->local; + this = main_frame->this; + + ret = dict_set_uint32(local->xattr_req, GF_CS_OBJECT_REPAIR, 256); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, "dict_set failed"); + goto err; + } + + cs_set_xattr_req(main_frame); + + if (local->fd) { + STACK_WIND(main_frame, cs_stat_check_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, local->fd, local->xattr_req); + } else { + STACK_WIND(main_frame, cs_stat_check_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->stat, &local->loc, + local->xattr_req); + } + + return 0; + +err: + cs_inodelk_unlock(main_frame); + + cs_common_cbk(main_frame); + + return 0; +} + +void +cs_common_cbk(call_frame_t *frame) +{ + glusterfs_fop_t fop = -1; + cs_local_t *local = NULL; + + local = frame->local; + + fop = local->fop; + + /*Note: Only the failure case needs to be handled here. Since for + * successful stat check the fop will resume anyway. The unwind can + * happen from the fop_cbk and each cbk can unlock the inodelk in case + * a lock was taken before. The lock status can be stored in frame */ + + /* for failure case */ + + /*TODO: add other fops*/ + switch (fop) { + case GF_FOP_WRITE: + CS_STACK_UNWIND(writev, frame, local->op_ret, local->op_errno, NULL, + NULL, NULL); + break; + + case GF_FOP_SETXATTR: + CS_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno, + NULL); + break; + case GF_FOP_READ: + CS_STACK_UNWIND(readv, frame, local->op_ret, local->op_errno, NULL, + 0, NULL, NULL, NULL); + break; + case GF_FOP_FTRUNCATE: + CS_STACK_UNWIND(ftruncate, frame, local->op_ret, local->op_errno, + NULL, NULL, NULL); + break; + + case GF_FOP_TRUNCATE: + CS_STACK_UNWIND(truncate, frame, local->op_ret, local->op_errno, + NULL, NULL, NULL); + break; + default: + break; + } + + return; +} + +int +cs_blocking_inodelk_cbk(call_frame_t *lock_frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + cs_local_t *main_local = NULL; + call_frame_t *main_frame = NULL; + cs_local_t *lock_local = NULL; + + lock_local = lock_frame->local; + + main_frame = lock_local->main_frame; + main_local = main_frame->local; + + if (op_ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, "inodelk failed"); + main_local->op_errno = op_errno; + main_local->op_ret = op_ret; + goto err; + } + + main_local->locked = _gf_true; + + cs_lock_wipe(lock_frame); + + cs_do_stat_check(main_frame); + + return 0; +err: + cs_common_cbk(main_frame); + + cs_lock_wipe(lock_frame); + + return 0; +} + +int +cs_build_loc(loc_t *loc, call_frame_t *frame) +{ + cs_local_t *local = NULL; + int ret = -1; + + local = frame->local; + + if (local->fd) { + loc->inode = inode_ref(local->fd->inode); + if (loc->inode) { + gf_uuid_copy(loc->gfid, loc->inode->gfid); + ret = 0; + goto out; + } else { + ret = -1; + goto out; + } + } else { + loc->inode = inode_ref(local->loc.inode); + if (loc->inode) { + gf_uuid_copy(loc->gfid, loc->inode->gfid); + ret = 0; + goto out; + } else { + ret = -1; + goto out; + } + } +out: + return ret; +} + +int +cs_blocking_inodelk(call_frame_t *parent_frame) +{ + call_frame_t *lock_frame = NULL; + cs_local_t *lock_local = NULL; + xlator_t *this = NULL; + struct gf_flock flock = { + 0, + }; + int ret = 0; + + this = parent_frame->this; + + lock_frame = cs_lock_frame(parent_frame); + if (!lock_frame) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, "insuffcient memory"); + goto err; + } + + lock_local = cs_local_init(this, lock_frame, NULL, NULL, 0); + if (!lock_local) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, "local init failed"); + goto err; + } + + lock_local->main_frame = parent_frame; + + flock.l_type = F_WRLCK; + + ret = cs_build_loc(&lock_local->loc, parent_frame); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, "build_loc failed"); + goto err; + } + + STACK_WIND(lock_frame, cs_blocking_inodelk_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->inodelk, CS_LOCK_DOMAIN, + &lock_local->loc, F_SETLKW, &flock, NULL); + + return 0; +err: + if (lock_frame) + cs_lock_wipe(lock_frame); + + return -1; +} + +int +locate_and_execute(call_frame_t *frame) +{ + int ret = 0; + + ret = cs_blocking_inodelk(frame); + + if (ret) + return -1; + else + return 0; +} + +int32_t +cs_resume_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, + off_t offset, dict_t *xattr_req) +{ + cs_local_t *local = NULL; + int ret = 0; + + local = frame->local; + + ret = cs_resume_postprocess(this, frame, loc->inode); + if (ret) { + goto unwind; + } + + cs_inodelk_unlock(frame); + + STACK_WIND(frame, cs_truncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, loc, offset, + local->xattr_req); + + return 0; + +unwind: + cs_inodelk_unlock(frame); + + cs_common_cbk(frame); + + return 0; +} + +int32_t +cs_resume_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + dict_t *dict, int32_t flags, dict_t *xdata) +{ + cs_local_t *local = NULL; + cs_inode_ctx_t *ctx = NULL; + gf_cs_obj_state state = GF_CS_ERROR; + + local = frame->local; + + __cs_inode_ctx_get(this, loc->inode, &ctx); + + state = __cs_get_file_state(loc->inode, ctx); + + if (state == GF_CS_ERROR) { + /* file is already remote */ + local->op_ret = -1; + local->op_errno = EINVAL; + gf_msg(this->name, GF_LOG_WARNING, 0, 0, + "file %s , could not figure file state", loc->path); + goto unwind; + } + + if (state == GF_CS_REMOTE) { + /* file is already remote */ + local->op_ret = -1; + local->op_errno = EINVAL; + gf_msg(this->name, GF_LOG_WARNING, 0, EINVAL, + "file %s is already remote", loc->path); + goto unwind; + } + + if (state == GF_CS_DOWNLOADING) { + gf_msg(this->name, GF_LOG_WARNING, 0, 0, + " file is in downloading state."); + local->op_ret = -1; + local->op_errno = EINVAL; + goto unwind; + } + + STACK_WIND(frame, cs_setxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, + local->xattr_req); + + return 0; +unwind: + cs_inodelk_unlock(frame); + + cs_common_cbk(frame); + + return 0; +} + +gf_cs_obj_state +__cs_get_file_state(inode_t *inode, cs_inode_ctx_t *ctx) +{ + gf_cs_obj_state state = -1; + + if (!ctx) + return GF_CS_ERROR; + + LOCK(&inode->lock); + { + state = ctx->state; + } + UNLOCK(&inode->lock); + + return state; +} + +void +__cs_inode_ctx_get(xlator_t *this, inode_t *inode, cs_inode_ctx_t **ctx) +{ + uint64_t ctxint = 0; + int ret = 0; + + LOCK(&inode->lock); + { + ret = __inode_ctx_get(inode, this, &ctxint); + } + UNLOCK(&inode->lock); + + if (ret) + *ctx = NULL; + else + *ctx = (cs_inode_ctx_t *)(uintptr_t)ctxint; + + return; +} + +int +__cs_inode_ctx_update(xlator_t *this, inode_t *inode, uint64_t val) +{ + cs_inode_ctx_t *ctx = NULL; + uint64_t ctxint = 0; + int ret = 0; + + LOCK(&inode->lock); + { + ret = __inode_ctx_get(inode, this, &ctxint); + if (ret) { + ctx = GF_CALLOC(1, sizeof(*ctx), gf_cs_mt_cs_inode_ctx_t); + if (!ctx) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, "ctx allocation failed"); + ret = -1; + goto out; + } + + ctx->state = val; + + ctxint = (uint64_t)(uintptr_t)ctx; + + ret = __inode_ctx_set(inode, this, &ctxint); + if (ret) { + GF_FREE(ctx); + goto out; + } + } else { + ctx = (cs_inode_ctx_t *)(uintptr_t)ctxint; + + ctx->state = val; + } + } + +out: + UNLOCK(&inode->lock); + + return ret; +} + +int +cs_inode_ctx_reset(xlator_t *this, inode_t *inode) +{ + cs_inode_ctx_t *ctx = NULL; + uint64_t ctxint = 0; + + inode_ctx_del(inode, this, &ctxint); + if (!ctxint) { + return 0; + } + + ctx = (cs_inode_ctx_t *)(uintptr_t)ctxint; + + GF_FREE(ctx); + return 0; +} + +int +cs_resume_postprocess(xlator_t *this, call_frame_t *frame, inode_t *inode) +{ + cs_local_t *local = NULL; + gf_cs_obj_state state = -1; + cs_inode_ctx_t *ctx = NULL; + int ret = 0; + + local = frame->local; + if (!local) { + ret = -1; + goto out; + } + + __cs_inode_ctx_get(this, inode, &ctx); + + state = __cs_get_file_state(inode, ctx); + if (state == GF_CS_ERROR) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "status is GF_CS_ERROR." + " Aborting write"); + local->op_ret = -1; + local->op_errno = EREMOTE; + ret = -1; + goto out; + } + + if (state == GF_CS_REMOTE || state == GF_CS_DOWNLOADING) { + gf_msg_debug(this->name, 0, "status is %d", state); + ret = cs_download(frame); + if (ret == 0) { + gf_msg_debug(this->name, 0, "Winding for Final Write"); + } else { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + " download failed, unwinding writev"); + local->op_ret = -1; + local->op_errno = EREMOTE; + ret = -1; + } + } +out: + return ret; +} + +int32_t +cs_fdctx_to_dict(xlator_t *this, fd_t *fd, dict_t *dict) +{ + return 0; +} + +int32_t +cs_inode(xlator_t *this) +{ + return 0; +} + +int32_t +cs_inode_to_dict(xlator_t *this, dict_t *dict) +{ + return 0; +} + +int32_t +cs_history(xlator_t *this) +{ + return 0; +} + +int32_t +cs_fd(xlator_t *this) +{ + return 0; +} + +int32_t +cs_fd_to_dict(xlator_t *this, dict_t *dict) +{ + return 0; +} + +int32_t +cs_fdctx(xlator_t *this, fd_t *fd) +{ + return 0; +} + +int32_t +cs_inodectx(xlator_t *this, inode_t *ino) +{ + return 0; +} + +int32_t +cs_inodectx_to_dict(xlator_t *this, inode_t *ino, dict_t *dict) +{ + return 0; +} + +int32_t +cs_priv_to_dict(xlator_t *this, dict_t *dict, char *brickname) +{ + return 0; +} + +int32_t +cs_priv(xlator_t *this) +{ + return 0; +} + +int +cs_notify(xlator_t *this, int event, void *data, ...) +{ + return default_notify(this, event, data); +} + +struct xlator_fops cs_fops = { + .stat = cs_stat, + .readdirp = cs_readdirp, + .truncate = cs_truncate, + .seek = cs_seek, + .statfs = cs_statfs, + .fallocate = cs_fallocate, + .discard = cs_discard, + .getxattr = cs_getxattr, + .writev = cs_writev, + .setxattr = cs_setxattr, + .fgetxattr = cs_fgetxattr, + .lookup = cs_lookup, + .fsetxattr = cs_fsetxattr, + .readv = cs_readv, + .ftruncate = cs_ftruncate, + .rchecksum = cs_rchecksum, + .unlink = cs_unlink, + .open = cs_open, + .fstat = cs_fstat, + .zerofill = cs_zerofill, +}; + +struct xlator_cbks cs_cbks = { + .forget = cs_forget, +}; + +struct xlator_dumpops cs_dumpops = { + .fdctx_to_dict = cs_fdctx_to_dict, + .inode = cs_inode, + .inode_to_dict = cs_inode_to_dict, + .history = cs_history, + .fd = cs_fd, + .fd_to_dict = cs_fd_to_dict, + .fdctx = cs_fdctx, + .inodectx = cs_inodectx, + .inodectx_to_dict = cs_inodectx_to_dict, + .priv_to_dict = cs_priv_to_dict, + .priv = cs_priv, +}; + +struct volume_options cs_options[] = { + {.key = {"cloudsync-storetype"}, + .type = GF_OPTION_TYPE_STR, + .description = "Defines which remote store is enabled"}, + {.key = {"cloudsync-remote-read"}, + .type = GF_OPTION_TYPE_BOOL, + .description = "Defines a remote read fop when on"}, + {.key = {"cloudsync-store-id"}, + .type = GF_OPTION_TYPE_STR, + .description = "Defines a volume wide store id"}, + {.key = {"cloudsync-product-id"}, + .type = GF_OPTION_TYPE_STR, + .description = "Defines a volume wide product id"}, + {.key = {NULL}}, +}; + +xlator_api_t xlator_api = { + .init = cs_init, + .fini = cs_fini, + .notify = cs_notify, + .reconfigure = cs_reconfigure, + .mem_acct_init = cs_mem_acct_init, + .dumpops = &cs_dumpops, + .fops = &cs_fops, + .cbks = &cs_cbks, + .options = cs_options, + .identifier = "cloudsync", + .category = GF_TECH_PREVIEW, +}; diff --git a/xlators/features/cloudsync/src/cloudsync.h b/xlators/features/cloudsync/src/cloudsync.h new file mode 100644 index 00000000000..d24141978d6 --- /dev/null +++ b/xlators/features/cloudsync/src/cloudsync.h @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com> + * This file is part of GlusterFS. + * + * This file is licensed to you under your choice of the GNU Lesser + * General Public License, version 3 or any later version (LGPLv3 or + * later), or the GNU General Public License, version 2 (GPLv2), in all + * cases as published by the Free Software Foundation. + */ + +#ifndef __CLOUDSYNC_H__ +#define __CLOUDSYNC_H__ + +#include <glusterfs/glusterfs.h> +#include <glusterfs/xlator.h> +#include <glusterfs/defaults.h> +#include <glusterfs/syncop.h> +#include <glusterfs/call-stub.h> +#include "cloudsync-common.h" +#include "cloudsync-autogen-fops.h" + +#define ALIGN_SIZE 4096 +#define CS_LOCK_DOMAIN "cs.protect.file.stat" +typedef struct cs_dlstore { + off_t off; + struct iovec *vector; + int32_t count; + struct iobref *iobref; + uint32_t flags; +} cs_dlstore; + +typedef struct cs_inode_ctx { + cs_loc_xattr_t locxattr; + gf_cs_obj_state state; +} cs_inode_ctx_t; + +struct cs_plugin { + char *name; /* store name */ + char *library; /* library to load for the given store */ + char *description; /* description about the store */ +}; + +cs_local_t * +cs_local_init(xlator_t *this, call_frame_t *frame, loc_t *loc, fd_t *fd, + glusterfs_fop_t fop); + +int +locate_and_execute(call_frame_t *frame); + +int32_t +cs_resume_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + dict_t *dict, int32_t flags, dict_t *xdata); + +int32_t +cs_inodelk_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata); + +size_t +cs_write_callback(void *lcurlbuf, size_t size, size_t nitems, void *frame); + +void +cs_common_cbk(call_frame_t *frame); + +gf_boolean_t +cs_is_file_remote(struct iatt *stbuf, dict_t *xattr); + +int32_t +cs_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata); +int +cs_build_loc(loc_t *loc, call_frame_t *frame); + +int +cs_blocking_inodelk_cbk(call_frame_t *lock_frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata); + +int +cs_read_authinfo(xlator_t *this); + +int +__cs_inode_ctx_update(xlator_t *this, inode_t *inode, uint64_t val); + +int +cs_inode_ctx_reset(xlator_t *this, inode_t *inode); + +void +__cs_inode_ctx_get(xlator_t *this, inode_t *inode, cs_inode_ctx_t **ctx); + +gf_cs_obj_state +__cs_get_file_state(inode_t *inode, cs_inode_ctx_t *ctx); + +int +cs_inodelk_unlock(call_frame_t *main_frame); + +int +cs_resume_postprocess(xlator_t *this, call_frame_t *frame, inode_t *inode); + +int32_t +cs_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata); +int32_t +cs_resume_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, + off_t offset, dict_t *xattr_req); + +int32_t +cs_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iovec *vector, int32_t count, + struct iatt *stbuf, struct iobref *iobref, dict_t *xdata); +int32_t +cs_resume_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata); +int32_t +cs_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata); + +int +cs_resume_remote_readv_postprocess(xlator_t *this, call_frame_t *frame, + inode_t *inode, off_t offset, size_t size, + uint32_t flags); +int +cs_serve_readv(call_frame_t *frame, off_t offset, size_t size, uint32_t flags); +#endif /* __CLOUDSYNC_H__ */ diff --git a/xlators/features/compress/src/Makefile.am b/xlators/features/compress/src/Makefile.am index 1c4c80eec90..98271a9f3fc 100644 --- a/xlators/features/compress/src/Makefile.am +++ b/xlators/features/compress/src/Makefile.am @@ -4,13 +4,15 @@ xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features noinst_HEADERS = cdc.h cdc-mem-types.h -cdc_la_LDFLAGS = $(GF_XLATOR_DEFAULT_LDFLAGS) +cdc_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) cdc_la_SOURCES = cdc.c cdc-helper.c cdc_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(ZLIB_LIBS) -AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src -fPIC \ - -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS) $(LIBZ_CFLAGS) +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ + -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \ + -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS) \ + $(LIBZ_CFLAGS) AM_CFLAGS = -Wall $(GF_CFLAGS) diff --git a/xlators/features/compress/src/cdc-helper.c b/xlators/features/compress/src/cdc-helper.c index 0a9a0e3d29c..f973ff56cf5 100644 --- a/xlators/features/compress/src/cdc-helper.c +++ b/xlators/features/compress/src/cdc-helper.c @@ -8,9 +8,9 @@ cases as published by the Free Software Foundation. */ -#include "glusterfs.h" -#include "logging.h" -#include "syscall.h" +#include <glusterfs/glusterfs.h> +#include <glusterfs/logging.h> +#include <glusterfs/syscall.h> #include "cdc.h" #include "cdc-mem-types.h" @@ -34,118 +34,110 @@ * gzip_header is added only during debugging. * Refer to the function cdc_dump_iovec_to_disk */ -static const char gzip_header[10] = - { - '\037', '\213', Z_DEFLATED, 0, - 0, 0, 0, 0, - 0, GF_CDC_OS_ID - }; +static const char gzip_header[10] = {'\037', '\213', Z_DEFLATED, 0, 0, 0, 0, + 0, 0, GF_CDC_OS_ID}; static int32_t -cdc_next_iovec (xlator_t *this, cdc_info_t *ci) +cdc_next_iovec(xlator_t *this, cdc_info_t *ci) { - int ret = -1; - - ci->ncount++; - /* check for iovec overflow -- should not happen */ - if (ci->ncount == MAX_IOVEC) { - gf_log (this->name, GF_LOG_ERROR, - "Zlib output buffer overflow" - " ->ncount (%d) | ->MAX_IOVEC (%d)", - ci->ncount, MAX_IOVEC); - goto out; - } - - ret = 0; - - out: - return ret; + int ret = -1; + + ci->ncount++; + /* check for iovec overflow -- should not happen */ + if (ci->ncount == MAX_IOVEC) { + gf_log(this->name, GF_LOG_ERROR, + "Zlib output buffer overflow" + " ->ncount (%d) | ->MAX_IOVEC (%d)", + ci->ncount, MAX_IOVEC); + goto out; + } + + ret = 0; + +out: + return ret; } static void -cdc_put_long (unsigned char *string, unsigned long x) +cdc_put_long(unsigned char *string, unsigned long x) { - string[0] = (unsigned char) (x & 0xff); - string[1] = (unsigned char) ((x & 0xff00) >> 8); - string[2] = (unsigned char) ((x & 0xff0000) >> 16); - string[3] = (unsigned char) ((x & 0xff000000) >> 24); + string[0] = (unsigned char)(x & 0xff); + string[1] = (unsigned char)((x & 0xff00) >> 8); + string[2] = (unsigned char)((x & 0xff0000) >> 16); + string[3] = (unsigned char)((x & 0xff000000) >> 24); } static unsigned long -cdc_get_long (unsigned char *buf) +cdc_get_long(unsigned char *buf) { - return ((unsigned long) buf[0]) - | (((unsigned long) buf[1]) << 8) - | (((unsigned long) buf[2]) << 16) - | (((unsigned long) buf[3]) << 24); + return ((unsigned long)buf[0]) | (((unsigned long)buf[1]) << 8) | + (((unsigned long)buf[2]) << 16) | (((unsigned long)buf[3]) << 24); } static int32_t -cdc_init_gzip_trailer (xlator_t *this, cdc_priv_t *priv, cdc_info_t *ci) +cdc_init_gzip_trailer(xlator_t *this, cdc_priv_t *priv, cdc_info_t *ci) { - int ret = -1; - char *buf = NULL; + int ret = -1; + char *buf = NULL; - ret = cdc_next_iovec (this, ci); - if (ret) - goto out; + ret = cdc_next_iovec(this, ci); + if (ret) + goto out; - buf = CURR_VEC(ci).iov_base = - (char *) GF_CALLOC (1, GF_CDC_VALIDATION_SIZE, - gf_cdc_mt_gzip_trailer_t); + buf = CURR_VEC(ci).iov_base = (char *)GF_CALLOC(1, GF_CDC_VALIDATION_SIZE, + gf_cdc_mt_gzip_trailer_t); - if (!CURR_VEC(ci).iov_base) - goto out; + if (!CURR_VEC(ci).iov_base) + goto out; - CURR_VEC(ci).iov_len = GF_CDC_VALIDATION_SIZE; + CURR_VEC(ci).iov_len = GF_CDC_VALIDATION_SIZE; - cdc_put_long ((unsigned char *)&buf[0], ci->crc); - cdc_put_long ((unsigned char *)&buf[4], ci->stream.total_in); + cdc_put_long((unsigned char *)&buf[0], ci->crc); + cdc_put_long((unsigned char *)&buf[4], ci->stream.total_in); - ret = 0; + ret = 0; - out: - return ret; +out: + return ret; } static int32_t -cdc_alloc_iobuf_and_init_vec (xlator_t *this, - cdc_priv_t *priv, cdc_info_t *ci, - int size) +cdc_alloc_iobuf_and_init_vec(xlator_t *this, cdc_priv_t *priv, cdc_info_t *ci, + int size) { - int ret = -1; - int alloc_len = 0; - struct iobuf *iobuf = NULL; + int ret = -1; + int alloc_len = 0; + struct iobuf *iobuf = NULL; - ret = cdc_next_iovec (this, ci); - if (ret) - goto out; + ret = cdc_next_iovec(this, ci); + if (ret) + goto out; - alloc_len = size ? size : ci->buffer_size; + alloc_len = size ? size : ci->buffer_size; - iobuf = iobuf_get2 (this->ctx->iobuf_pool, alloc_len); - if (!iobuf) - goto out; + iobuf = iobuf_get2(this->ctx->iobuf_pool, alloc_len); + if (!iobuf) + goto out; - ret = iobref_add (ci->iobref, iobuf); - if (ret) - goto out; + ret = iobref_add(ci->iobref, iobuf); + if (ret) + goto out; - /* Initialize this iovec */ - CURR_VEC(ci).iov_base = iobuf->ptr; - CURR_VEC(ci).iov_len = alloc_len; + /* Initialize this iovec */ + CURR_VEC(ci).iov_base = iobuf->ptr; + CURR_VEC(ci).iov_len = alloc_len; - ret = 0; + ret = 0; - out: - return ret; +out: + return ret; } static void -cdc_init_zlib_output_stream (cdc_priv_t *priv, cdc_info_t *ci, int size) +cdc_init_zlib_output_stream(cdc_priv_t *priv, cdc_info_t *ci, int size) { - ci->stream.next_out = (unsigned char *) CURR_VEC(ci).iov_base; - ci->stream.avail_out = size ? size : ci->buffer_size; + ci->stream.next_out = (unsigned char *)CURR_VEC(ci).iov_base; + ci->stream.avail_out = size ? size : ci->buffer_size; } /* This routine is for testing and debugging only. @@ -153,391 +145,383 @@ cdc_init_zlib_output_stream (cdc_priv_t *priv, cdc_info_t *ci, int size) * So each gzip dump file is at least 18 bytes in size. */ void -cdc_dump_iovec_to_disk (xlator_t *this, cdc_info_t *ci, const char *file) +cdc_dump_iovec_to_disk(xlator_t *this, cdc_info_t *ci, const char *file) { - int i = 0; - int fd = 0; - size_t written = 0; - size_t total_written = 0; - - fd = open (file, O_WRONLY|O_CREAT|O_TRUNC, 0777 ); - if (fd < 0) { - gf_log (this->name, GF_LOG_ERROR, - "Cannot open file: %s", file); - return; - } - - written = sys_write (fd, (char *) gzip_header, 10); + int i = 0; + int fd = 0; + size_t written = 0; + size_t total_written = 0; + + fd = open(file, O_WRONLY | O_CREAT | O_TRUNC, 0777); + if (fd < 0) { + gf_log(this->name, GF_LOG_ERROR, "Cannot open file: %s", file); + return; + } + + written = sys_write(fd, (char *)gzip_header, 10); + total_written += written; + for (i = 0; i < ci->ncount; i++) { + written = sys_write(fd, (char *)ci->vec[i].iov_base, + ci->vec[i].iov_len); total_written += written; - for (i = 0; i < ci->ncount; i++) { - written = sys_write (fd, (char *) ci->vec[i].iov_base, ci->vec[i].iov_len); - total_written += written; - } + } - gf_log (this->name, GF_LOG_DEBUG, - "dump'd %zu bytes to %s", total_written, GF_CDC_DEBUG_DUMP_FILE ); + gf_log(this->name, GF_LOG_DEBUG, "dump'd %zu bytes to %s", total_written, + GF_CDC_DEBUG_DUMP_FILE); - sys_close (fd); + sys_close(fd); } static int32_t -cdc_flush_libz_buffer (cdc_priv_t *priv, xlator_t *this, cdc_info_t *ci, - int (*libz_func)(z_streamp, int), - int flush) +cdc_flush_libz_buffer(cdc_priv_t *priv, xlator_t *this, cdc_info_t *ci, + int (*libz_func)(z_streamp, int), int flush) { - int32_t ret = Z_OK; - int done = 0; - unsigned int deflate_len = 0; + int32_t ret = Z_OK; + int done = 0; + unsigned int deflate_len = 0; - for (;;) { - deflate_len = ci->buffer_size - ci->stream.avail_out; + for (;;) { + deflate_len = ci->buffer_size - ci->stream.avail_out; - if (deflate_len != 0) { - CURR_VEC(ci).iov_len = deflate_len; + if (deflate_len != 0) { + CURR_VEC(ci).iov_len = deflate_len; - ret = cdc_alloc_iobuf_and_init_vec (this, priv, ci, 0); - if (ret) { - ret = Z_MEM_ERROR; - break; - } + ret = cdc_alloc_iobuf_and_init_vec(this, priv, ci, 0); + if (ret) { + ret = Z_MEM_ERROR; + break; + } - /* Re-position Zlib output buffer */ - cdc_init_zlib_output_stream (priv, ci, 0); - } + /* Re-position Zlib output buffer */ + cdc_init_zlib_output_stream(priv, ci, 0); + } - if (done) { - ci->ncount--; - break; - } + if (done) { + ci->ncount--; + break; + } - ret = libz_func (&ci->stream, flush); + ret = libz_func(&ci->stream, flush); - if (ret == Z_BUF_ERROR) { - ret = Z_OK; - ci->ncount--; - break; - } + if (ret == Z_BUF_ERROR) { + ret = Z_OK; + ci->ncount--; + break; + } - done = (ci->stream.avail_out != 0 || ret == Z_STREAM_END); + done = (ci->stream.avail_out != 0 || ret == Z_STREAM_END); - if (ret != Z_OK && ret != Z_STREAM_END) - break; - } + if (ret != Z_OK && ret != Z_STREAM_END) + break; + } - return ret; + return ret; } static int32_t -do_cdc_compress (struct iovec *vec, xlator_t *this, cdc_priv_t *priv, - cdc_info_t *ci) +do_cdc_compress(struct iovec *vec, xlator_t *this, cdc_priv_t *priv, + cdc_info_t *ci) { - int ret = -1; + int ret = -1; - /* Initialize defalte */ - ret = deflateInit2 (&ci->stream, priv->cdc_level, Z_DEFLATED, - priv->window_size, priv->mem_level, - Z_DEFAULT_STRATEGY); + /* Initialize defalte */ + ret = deflateInit2(&ci->stream, priv->cdc_level, Z_DEFLATED, + priv->window_size, priv->mem_level, Z_DEFAULT_STRATEGY); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "unable to init Zlib (retval: %d)", ret); - goto out; - } + if (ret) { + gf_log(this->name, GF_LOG_ERROR, "unable to init Zlib (retval: %d)", + ret); + goto out; + } - ret = cdc_alloc_iobuf_and_init_vec (this, priv, ci, 0); - if (ret) - goto out; + ret = cdc_alloc_iobuf_and_init_vec(this, priv, ci, 0); + if (ret) + goto out; - /* setup output buffer */ - cdc_init_zlib_output_stream (priv, ci, 0); + /* setup output buffer */ + cdc_init_zlib_output_stream(priv, ci, 0); - /* setup input buffer */ - ci->stream.next_in = (unsigned char *) vec->iov_base; - ci->stream.avail_in = vec->iov_len; + /* setup input buffer */ + ci->stream.next_in = (unsigned char *)vec->iov_base; + ci->stream.avail_in = vec->iov_len; - ci->crc = crc32 (ci->crc, (const Bytef *) vec->iov_base, vec->iov_len); + ci->crc = crc32(ci->crc, (const Bytef *)vec->iov_base, vec->iov_len); - gf_log (this->name, GF_LOG_DEBUG, "crc=%lu len=%d buffer_size=%d", - ci->crc, ci->stream.avail_in, ci->buffer_size); + gf_log(this->name, GF_LOG_DEBUG, "crc=%lu len=%d buffer_size=%d", ci->crc, + ci->stream.avail_in, ci->buffer_size); - /* compress !! */ - while (ci->stream.avail_in != 0) { - if (ci->stream.avail_out == 0) { + /* compress !! */ + while (ci->stream.avail_in != 0) { + if (ci->stream.avail_out == 0) { + CURR_VEC(ci).iov_len = ci->buffer_size; - CURR_VEC(ci).iov_len = ci->buffer_size; + ret = cdc_alloc_iobuf_and_init_vec(this, priv, ci, 0); + if (ret) + break; - ret = cdc_alloc_iobuf_and_init_vec (this, priv, ci, 0); - if (ret) - break; - - /* Re-position Zlib output buffer */ - cdc_init_zlib_output_stream (priv, ci, 0); - } - - ret = deflate (&ci->stream, Z_NO_FLUSH); - if (ret != Z_OK) - break; + /* Re-position Zlib output buffer */ + cdc_init_zlib_output_stream(priv, ci, 0); } - out: - return ret; + ret = deflate(&ci->stream, Z_NO_FLUSH); + if (ret != Z_OK) + break; + } + +out: + return ret; } int32_t -cdc_compress (xlator_t *this, cdc_priv_t *priv, cdc_info_t *ci, - dict_t **xdata) +cdc_compress(xlator_t *this, cdc_priv_t *priv, cdc_info_t *ci, dict_t **xdata) { - int ret = -1; - int i = 0; + int ret = -1; + int i = 0; - ci->iobref = iobref_new (); - if (!ci->iobref) - goto out; + ci->iobref = iobref_new(); + if (!ci->iobref) + goto out; + if (!*xdata) { + *xdata = dict_new(); if (!*xdata) { - *xdata = dict_new (); - if (!*xdata) { - gf_log (this->name, GF_LOG_ERROR, "Cannot allocate xdata" - " dict"); - goto out; - } - } - - /* data */ - for (i = 0; i < ci->count; i++) { - ret = do_cdc_compress (&ci->vector[i], this, priv, ci); - if (ret != Z_OK) - goto deflate_cleanup_out; - } - - /* flush zlib buffer */ - ret = cdc_flush_libz_buffer (priv, this, ci, deflate, Z_FINISH); - if (!(ret == Z_OK || ret == Z_STREAM_END)) { - gf_log (this->name, GF_LOG_ERROR, - "Compression Error: ret (%d)", ret); - ret = -1; - goto deflate_cleanup_out; - } - - /* trailer */ - ret = cdc_init_gzip_trailer (this, priv, ci); - if (ret) - goto deflate_cleanup_out; - - gf_log (this->name, GF_LOG_DEBUG, - "Compressed %ld to %ld bytes", - ci->stream.total_in, ci->stream.total_out); - - ci->nbytes = ci->stream.total_out + GF_CDC_VALIDATION_SIZE; - - /* set deflated canary value for identification */ - ret = dict_set_int32 (*xdata, GF_CDC_DEFLATE_CANARY_VAL, 1); - if (ret) { - /* Send uncompressed data if we can't _tell_ the client - * that deflated data is on it's way. So, we just log - * the faliure and continue as usual. - */ - gf_log (this->name, GF_LOG_ERROR, - "Data deflated, but could not set canary" - " value in dict for identification"); + gf_log(this->name, GF_LOG_ERROR, + "Cannot allocate xdata" + " dict"); + goto out; } + } + + /* data */ + for (i = 0; i < ci->count; i++) { + ret = do_cdc_compress(&ci->vector[i], this, priv, ci); + if (ret != Z_OK) + goto deflate_cleanup_out; + } + + /* flush zlib buffer */ + ret = cdc_flush_libz_buffer(priv, this, ci, deflate, Z_FINISH); + if (!(ret == Z_OK || ret == Z_STREAM_END)) { + gf_log(this->name, GF_LOG_ERROR, "Compression Error: ret (%d)", ret); + ret = -1; + goto deflate_cleanup_out; + } + + /* trailer */ + ret = cdc_init_gzip_trailer(this, priv, ci); + if (ret) + goto deflate_cleanup_out; + + gf_log(this->name, GF_LOG_DEBUG, "Compressed %ld to %ld bytes", + ci->stream.total_in, ci->stream.total_out); + + ci->nbytes = ci->stream.total_out + GF_CDC_VALIDATION_SIZE; + + /* set deflated canary value for identification */ + ret = dict_set_int32(*xdata, GF_CDC_DEFLATE_CANARY_VAL, 1); + if (ret) { + /* Send uncompressed data if we can't _tell_ the client + * that deflated data is on it's way. So, we just log + * the failure and continue as usual. + */ + gf_log(this->name, GF_LOG_ERROR, + "Data deflated, but could not set canary" + " value in dict for identification"); + } - /* This is to be used in testing */ - if ( priv->debug ) { - cdc_dump_iovec_to_disk (this, ci, GF_CDC_DEBUG_DUMP_FILE ); - } + /* This is to be used in testing */ + if (priv->debug) { + cdc_dump_iovec_to_disk(this, ci, GF_CDC_DEBUG_DUMP_FILE); + } - deflate_cleanup_out: - (void) deflateEnd(&ci->stream); +deflate_cleanup_out: + (void)deflateEnd(&ci->stream); - out: - return ret; +out: + return ret; } - /* deflate content is checked by the presence of a canary * value in the dict as the key */ static int32_t -cdc_check_content_for_deflate (dict_t *xdata) +cdc_check_content_for_deflate(dict_t *xdata) { - return dict_get (xdata, GF_CDC_DEFLATE_CANARY_VAL) ? -1 : 0; + return dict_get(xdata, GF_CDC_DEFLATE_CANARY_VAL) ? -1 : 0; } static unsigned long -cdc_extract_crc (char *trailer) +cdc_extract_crc(char *trailer) { - return cdc_get_long ((unsigned char *) &trailer[0]); + return cdc_get_long((unsigned char *)&trailer[0]); } static unsigned long -cdc_extract_size (char *trailer) +cdc_extract_size(char *trailer) { - return cdc_get_long ((unsigned char *) &trailer[4]); + return cdc_get_long((unsigned char *)&trailer[4]); } static int32_t -cdc_validate_inflate (cdc_info_t *ci, unsigned long crc, - unsigned long len) +cdc_validate_inflate(cdc_info_t *ci, unsigned long crc, unsigned long len) { - return !((crc == ci->crc) - /* inflated length is hidden inside - * Zlib stream struct */ - && (len == ci->stream.total_out)); + return !((crc == ci->crc) + /* inflated length is hidden inside + * Zlib stream struct */ + && (len == ci->stream.total_out)); } static int32_t -do_cdc_decompress (xlator_t *this, cdc_priv_t *priv, cdc_info_t *ci) +do_cdc_decompress(xlator_t *this, cdc_priv_t *priv, cdc_info_t *ci) { - int ret = -1; - int i = 0; - int len = 0; - char *inflte = NULL; - char *trailer = NULL; - struct iovec vec = {0,}; - unsigned long computed_crc = 0; - unsigned long computed_len = 0; - - ret = inflateInit2 (&ci->stream, priv->window_size); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "Zlib: Unable to initialize inflate"); - goto out; + int ret = -1; + int i = 0; + int len = 0; + char *inflte = NULL; + char *trailer = NULL; + struct iovec vec = { + 0, + }; + unsigned long computed_crc = 0; + unsigned long computed_len = 0; + + ret = inflateInit2(&ci->stream, priv->window_size); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, "Zlib: Unable to initialize inflate"); + goto out; + } + + vec = THIS_VEC(ci, 0); + + trailer = (char *)(((char *)vec.iov_base) + vec.iov_len - + GF_CDC_VALIDATION_SIZE); + + /* CRC of uncompressed data */ + computed_crc = cdc_extract_crc(trailer); + + /* size of uncomrpessed data */ + computed_len = cdc_extract_size(trailer); + + gf_log(this->name, GF_LOG_DEBUG, "crc=%lu len=%lu buffer_size=%d", + computed_crc, computed_len, ci->buffer_size); + + inflte = vec.iov_base; + len = vec.iov_len - GF_CDC_VALIDATION_SIZE; + + /* allocate buffer of the original length of the data */ + ret = cdc_alloc_iobuf_and_init_vec(this, priv, ci, 0); + if (ret) + goto out; + + /* setup output buffer */ + cdc_init_zlib_output_stream(priv, ci, 0); + + /* setup input buffer */ + ci->stream.next_in = (unsigned char *)inflte; + ci->stream.avail_in = len; + + while (ci->stream.avail_in != 0) { + if (ci->stream.avail_out == 0) { + CURR_VEC(ci).iov_len = ci->buffer_size; + + ret = cdc_alloc_iobuf_and_init_vec(this, priv, ci, 0); + if (ret) + break; + + /* Re-position Zlib output buffer */ + cdc_init_zlib_output_stream(priv, ci, 0); } - vec = THIS_VEC(ci, 0); - - trailer = (char *) (((char *) vec.iov_base) + vec.iov_len - - GF_CDC_VALIDATION_SIZE); - - /* CRC of uncompressed data */ - computed_crc = cdc_extract_crc (trailer); - - /* size of uncomrpessed data */ - computed_len = cdc_extract_size (trailer); - - gf_log (this->name, GF_LOG_DEBUG, "crc=%lu len=%lu buffer_size=%d", - computed_crc, computed_len, ci->buffer_size); - - inflte = vec.iov_base ; - len = vec.iov_len - GF_CDC_VALIDATION_SIZE; - - /* allocate buffer of the original length of the data */ - ret = cdc_alloc_iobuf_and_init_vec (this, priv, ci, 0); - if (ret) - goto out; - - /* setup output buffer */ - cdc_init_zlib_output_stream (priv, ci, 0); - - /* setup input buffer */ - ci->stream.next_in = (unsigned char *) inflte; - ci->stream.avail_in = len; - - while (ci->stream.avail_in != 0) { - if (ci->stream.avail_out == 0) { - CURR_VEC(ci).iov_len = ci->buffer_size; - - ret = cdc_alloc_iobuf_and_init_vec (this, priv, ci, 0); - if (ret) - break; - - /* Re-position Zlib output buffer */ - cdc_init_zlib_output_stream (priv, ci, 0); - } - - ret = inflate (&ci->stream, Z_NO_FLUSH); - if (ret == Z_STREAM_ERROR) - break; - } - - /* flush zlib buffer */ - ret = cdc_flush_libz_buffer (priv, this, ci, inflate, Z_SYNC_FLUSH); - if (!(ret == Z_OK || ret == Z_STREAM_END)) { - gf_log (this->name, GF_LOG_ERROR, - "Decompression Error: ret (%d)", ret); - ret = -1; - goto out; - } - - /* compute CRC of the uncompresses data to check for - * correctness */ - - for (i = 0; i < ci->ncount; i++) { - ci->crc = crc32 (ci->crc, - (const Bytef *) ci->vec[i].iov_base, - ci->vec[i].iov_len); - } - - /* validate inflated data */ - ret = cdc_validate_inflate (ci, computed_crc, computed_len); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "Checksum or length mismatched in inflated data"); - } - - out: - return ret; + ret = inflate(&ci->stream, Z_NO_FLUSH); + if (ret == Z_STREAM_ERROR) + break; + } + + /* flush zlib buffer */ + ret = cdc_flush_libz_buffer(priv, this, ci, inflate, Z_SYNC_FLUSH); + if (!(ret == Z_OK || ret == Z_STREAM_END)) { + gf_log(this->name, GF_LOG_ERROR, "Decompression Error: ret (%d)", ret); + ret = -1; + goto out; + } + + /* compute CRC of the uncompresses data to check for + * correctness */ + + for (i = 0; i < ci->ncount; i++) { + ci->crc = crc32(ci->crc, (const Bytef *)ci->vec[i].iov_base, + ci->vec[i].iov_len); + } + + /* validate inflated data */ + ret = cdc_validate_inflate(ci, computed_crc, computed_len); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, + "Checksum or length mismatched in inflated data"); + } + +out: + return ret; } int32_t -cdc_decompress (xlator_t *this, cdc_priv_t *priv, cdc_info_t *ci, - dict_t *xdata) +cdc_decompress(xlator_t *this, cdc_priv_t *priv, cdc_info_t *ci, dict_t *xdata) { - int32_t ret = -1; - - /* check for deflate content */ - if (!cdc_check_content_for_deflate (xdata)) { - gf_log (this->name, GF_LOG_DEBUG, - "Content not deflated, passing through ..."); - goto passthrough_out; - } - - ci->iobref = iobref_new (); - if (!ci->iobref) - goto passthrough_out; - - /* do we need to do this? can we assume that one iovec - * will hold per request data every time? - * - * server/client protocol seems to deal with a single - * iovec even if op_ret > 1M. So, it looks ok to - * assume that a single iovec will contain all the - * data (This saves us a lot from finding the trailer - * and the data since it could have been split-up onto - * two adjacent iovec's. - * - * But, in case this translator is loaded above quick-read - * for some reason, then it's entirely possible that we get - * multiple iovec's... - * - * This case (handled below) is not tested. (by loading the - * xlator below quick-read) - */ - - /* @@ I_HOPE_THIS_IS_NEVER_HIT */ - if (ci->count > 1) { - gf_log (this->name, GF_LOG_WARNING, "unable to handle" - " multiple iovecs (%d in number)", ci->count); - goto inflate_cleanup_out; - /* TODO: coallate all iovecs in one */ - } - - ret = do_cdc_decompress (this, priv, ci); - if (ret) - goto inflate_cleanup_out; - - ci->nbytes = ci->stream.total_out; - - gf_log (this->name, GF_LOG_DEBUG, - "Inflated %ld to %ld bytes", - ci->stream.total_in, ci->stream.total_out); - - inflate_cleanup_out: - (void) inflateEnd (&ci->stream); - - passthrough_out: - return ret; + int32_t ret = -1; + + /* check for deflate content */ + if (!cdc_check_content_for_deflate(xdata)) { + gf_log(this->name, GF_LOG_DEBUG, + "Content not deflated, passing through ..."); + goto passthrough_out; + } + + ci->iobref = iobref_new(); + if (!ci->iobref) + goto passthrough_out; + + /* do we need to do this? can we assume that one iovec + * will hold per request data every time? + * + * server/client protocol seems to deal with a single + * iovec even if op_ret > 1M. So, it looks ok to + * assume that a single iovec will contain all the + * data (This saves us a lot from finding the trailer + * and the data since it could have been split-up onto + * two adjacent iovec's. + * + * But, in case this translator is loaded above quick-read + * for some reason, then it's entirely possible that we get + * multiple iovec's... + * + * This case (handled below) is not tested. (by loading the + * xlator below quick-read) + */ + + /* @@ I_HOPE_THIS_IS_NEVER_HIT */ + if (ci->count > 1) { + gf_log(this->name, GF_LOG_WARNING, + "unable to handle" + " multiple iovecs (%d in number)", + ci->count); + goto inflate_cleanup_out; + /* TODO: coallate all iovecs in one */ + } + + ret = do_cdc_decompress(this, priv, ci); + if (ret) + goto inflate_cleanup_out; + + ci->nbytes = ci->stream.total_out; + + gf_log(this->name, GF_LOG_DEBUG, "Inflated %ld to %ld bytes", + ci->stream.total_in, ci->stream.total_out); + +inflate_cleanup_out: + (void)inflateEnd(&ci->stream); + +passthrough_out: + return ret; } #endif diff --git a/xlators/features/compress/src/cdc-mem-types.h b/xlators/features/compress/src/cdc-mem-types.h index ead2c70ba6e..928afdd2efe 100644 --- a/xlators/features/compress/src/cdc-mem-types.h +++ b/xlators/features/compress/src/cdc-mem-types.h @@ -11,13 +11,13 @@ #ifndef __CDC_MEM_TYPES_H #define __CDC_MEM_TYPES_H -#include "mem-types.h" +#include <glusterfs/mem-types.h> enum gf_cdc_mem_types { - gf_cdc_mt_priv_t = gf_common_mt_end + 1, - gf_cdc_mt_vec_t = gf_common_mt_end + 2, - gf_cdc_mt_gzip_trailer_t = gf_common_mt_end + 3, - gf_cdc_mt_end = gf_common_mt_end + 4, + gf_cdc_mt_priv_t = gf_common_mt_end + 1, + gf_cdc_mt_vec_t = gf_common_mt_end + 2, + gf_cdc_mt_gzip_trailer_t = gf_common_mt_end + 3, + gf_cdc_mt_end = gf_common_mt_end + 4, }; #endif diff --git a/xlators/features/compress/src/cdc.c b/xlators/features/compress/src/cdc.c index e33d4efc1a1..b0b51e914ed 100644 --- a/xlators/features/compress/src/cdc.c +++ b/xlators/features/compress/src/cdc.c @@ -10,347 +10,339 @@ #include <sys/uio.h> -#include "xlator.h" -#include "defaults.h" -#include "logging.h" +#include <glusterfs/xlator.h> +#include <glusterfs/defaults.h> +#include <glusterfs/logging.h> #include "cdc.h" #include "cdc-mem-types.h" static void -cdc_cleanup_iobref (cdc_info_t *ci) +cdc_cleanup_iobref(cdc_info_t *ci) { - assert(ci->iobref != NULL); - iobref_clear (ci->iobref); + assert(ci->iobref != NULL); + iobref_clear(ci->iobref); } int32_t -cdc_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iovec *vector, int32_t count, - struct iatt *stbuf, struct iobref *iobref, - dict_t *xdata) +cdc_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iovec *vector, int32_t count, + struct iatt *stbuf, struct iobref *iobref, dict_t *xdata) { - int ret = -1; - cdc_priv_t *priv = NULL; - cdc_info_t ci = {0,}; - - GF_VALIDATE_OR_GOTO ("cdc", this, default_out); - GF_VALIDATE_OR_GOTO (this->name, frame, default_out); - - priv = this->private; - - if (op_ret <= 0) - goto default_out; - - if ( (priv->min_file_size != 0) - && (op_ret < priv->min_file_size) ) - goto default_out; - - ci.count = count; - ci.ibytes = op_ret; - ci.vector = vector; - ci.buf = NULL; - ci.iobref = NULL; - ci.ncount = 0; - ci.crc = 0; - ci.buffer_size = GF_CDC_DEF_BUFFERSIZE; - -/* A readv compresses on the server side and decompresses on the client side - */ - if (priv->op_mode == GF_CDC_MODE_SERVER) { - ret = cdc_compress (this, priv, &ci, &xdata); - } else if (priv->op_mode == GF_CDC_MODE_CLIENT) { - ret = cdc_decompress (this, priv, &ci, xdata); - } else { - gf_log (this->name, GF_LOG_ERROR, - "Invalid operation mode (%d)", priv->op_mode); - } - - if (ret) - goto default_out; - - STACK_UNWIND_STRICT (readv, frame, ci.nbytes, op_errno, - ci.vec, ci.ncount, stbuf, iobref, - xdata); - cdc_cleanup_iobref (&ci); - return 0; - - default_out: - STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, - vector, count, stbuf, iobref, xdata); - return 0; + int ret = -1; + cdc_priv_t *priv = NULL; + cdc_info_t ci = { + 0, + }; + + GF_VALIDATE_OR_GOTO("cdc", this, default_out); + GF_VALIDATE_OR_GOTO(this->name, frame, default_out); + + priv = this->private; + + if (op_ret <= 0) + goto default_out; + + if ((priv->min_file_size != 0) && (op_ret < priv->min_file_size)) + goto default_out; + + ci.count = count; + ci.ibytes = op_ret; + ci.vector = vector; + ci.buf = NULL; + ci.iobref = NULL; + ci.ncount = 0; + ci.crc = 0; + ci.buffer_size = GF_CDC_DEF_BUFFERSIZE; + + /* A readv compresses on the server side and decompresses on the client side + */ + if (priv->op_mode == GF_CDC_MODE_SERVER) { + ret = cdc_compress(this, priv, &ci, &xdata); + } else if (priv->op_mode == GF_CDC_MODE_CLIENT) { + ret = cdc_decompress(this, priv, &ci, xdata); + } else { + gf_log(this->name, GF_LOG_ERROR, "Invalid operation mode (%d)", + priv->op_mode); + } + + if (ret) + goto default_out; + + STACK_UNWIND_STRICT(readv, frame, ci.nbytes, op_errno, ci.vec, ci.ncount, + stbuf, iobref, xdata); + cdc_cleanup_iobref(&ci); + return 0; + +default_out: + STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, vector, count, stbuf, + iobref, xdata); + return 0; } int32_t -cdc_readv (call_frame_t *frame, xlator_t *this, - fd_t *fd, size_t size, off_t offset, uint32_t flags, - dict_t *xdata) +cdc_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) { - fop_readv_cbk_t cbk = NULL; + fop_readv_cbk_t cbk = NULL; #ifdef HAVE_LIB_Z - cbk = cdc_readv_cbk; + cbk = cdc_readv_cbk; #else - cbk = default_readv_cbk; + cbk = default_readv_cbk; #endif - STACK_WIND (frame, cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readv, - fd, size, offset, flags, xdata); - return 0; + STACK_WIND(frame, cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->readv, + fd, size, offset, flags, xdata); + return 0; } int32_t -cdc_writev_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) +cdc_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - - STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf, xdata); - return 0; + STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + return 0; } int32_t -cdc_writev (call_frame_t *frame, - xlator_t *this, - fd_t *fd, - struct iovec *vector, - int32_t count, - off_t offset, - uint32_t flags, - struct iobref *iobref, dict_t *xdata) +cdc_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, + int32_t count, off_t offset, uint32_t flags, struct iobref *iobref, + dict_t *xdata) { - int ret = -1; - cdc_priv_t *priv = NULL; - cdc_info_t ci = {0,}; - size_t isize = 0; - - GF_VALIDATE_OR_GOTO ("cdc", this, default_out); - GF_VALIDATE_OR_GOTO (this->name, frame, default_out); - - priv = this->private; - - isize = iov_length(vector, count); - - if (isize <= 0) - goto default_out; - - if ( (priv->min_file_size != 0) && (isize < priv->min_file_size) ) - goto default_out; - - ci.count = count; - ci.ibytes = isize; - ci.vector = vector; - ci.buf = NULL; - ci.iobref = NULL; - ci.ncount = 0; - ci.crc = 0; - ci.buffer_size = GF_CDC_DEF_BUFFERSIZE; - -/* A writev compresses on the client side and decompresses on the server side - */ - if (priv->op_mode == GF_CDC_MODE_CLIENT) { - ret = cdc_compress (this, priv, &ci, &xdata); - } else if (priv->op_mode == GF_CDC_MODE_SERVER) { - ret = cdc_decompress (this, priv, &ci, xdata); - } else { - gf_log (this->name, GF_LOG_ERROR, "Invalid operation mode (%d) ", priv->op_mode); - } - - if (ret) - goto default_out; - - STACK_WIND (frame, - cdc_writev_cbk, - FIRST_CHILD (this), - FIRST_CHILD (this)->fops->writev, - fd, ci.vec, ci.ncount, offset, flags, - iobref, xdata); - - cdc_cleanup_iobref (&ci); - return 0; - - default_out: - STACK_WIND (frame, - cdc_writev_cbk, - FIRST_CHILD (this), - FIRST_CHILD (this)->fops->writev, - fd, vector, count, offset, flags, - iobref, xdata); - return 0; + int ret = -1; + cdc_priv_t *priv = NULL; + cdc_info_t ci = { + 0, + }; + size_t isize = 0; + + GF_VALIDATE_OR_GOTO("cdc", this, err); + GF_VALIDATE_OR_GOTO(this->name, frame, err); + + priv = this->private; + + isize = iov_length(vector, count); + + if (isize <= 0) + goto default_out; + + if ((priv->min_file_size != 0) && (isize < priv->min_file_size)) + goto default_out; + + ci.count = count; + ci.ibytes = isize; + ci.vector = vector; + ci.buf = NULL; + ci.iobref = NULL; + ci.ncount = 0; + ci.crc = 0; + ci.buffer_size = GF_CDC_DEF_BUFFERSIZE; + + /* A writev compresses on the client side and decompresses on the server + * side + */ + if (priv->op_mode == GF_CDC_MODE_CLIENT) { + ret = cdc_compress(this, priv, &ci, &xdata); + } else if (priv->op_mode == GF_CDC_MODE_SERVER) { + ret = cdc_decompress(this, priv, &ci, xdata); + } else { + gf_log(this->name, GF_LOG_ERROR, "Invalid operation mode (%d) ", + priv->op_mode); + } + + if (ret) + goto default_out; + + STACK_WIND(frame, cdc_writev_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->writev, fd, ci.vec, ci.ncount, offset, + flags, iobref, xdata); + + cdc_cleanup_iobref(&ci); + return 0; + +default_out: + STACK_WIND(frame, cdc_writev_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->writev, fd, vector, count, offset, + flags, iobref, xdata); + return 0; +err: + STACK_UNWIND_STRICT(writev, frame, -1, EINVAL, NULL, NULL, NULL); + return 0; } int32_t -mem_acct_init (xlator_t *this) +mem_acct_init(xlator_t *this) { - int ret = -1; + int ret = -1; - if (!this) - return ret; - - ret = xlator_mem_acct_init (this, gf_cdc_mt_end); + if (!this) + return ret; - if (ret != 0) { - gf_log(this->name, GF_LOG_ERROR, "Memory accounting init" - "failed"); - return ret; - } + ret = xlator_mem_acct_init(this, gf_cdc_mt_end); + if (ret != 0) { + gf_log(this->name, GF_LOG_ERROR, + "Memory accounting init" + "failed"); return ret; + } + + return ret; } int32_t -init (xlator_t *this) +init(xlator_t *this) { - int ret = -1; - char *temp_str = NULL; - cdc_priv_t *priv = NULL; - - GF_VALIDATE_OR_GOTO ("cdc", this, err); - - if (!this->children || this->children->next) { - gf_log (this->name, GF_LOG_ERROR, - "Need subvolume == 1"); - goto err; - } - - if (!this->parents) { - gf_log (this->name, GF_LOG_WARNING, - "Dangling volume. Check volfile"); - } - - priv = GF_CALLOC (1, sizeof (*priv), gf_cdc_mt_priv_t); - if (!priv) { - goto err; - } - - /* Check if debug mode is turned on */ - GF_OPTION_INIT ("debug", priv->debug, bool, err); - if( priv->debug ) { - gf_log (this->name, GF_LOG_DEBUG, "CDC debug option turned on"); - } - - /* Set Gzip Window Size */ - GF_OPTION_INIT ("window-size", priv->window_size, int32, err); - if ( (priv->window_size > GF_CDC_MAX_WINDOWSIZE) - || (priv->window_size < GF_CDC_DEF_WINDOWSIZE) ) { - gf_log (this->name, GF_LOG_WARNING, - "Invalid gzip window size (%d), using default", - priv->window_size); - priv->window_size = GF_CDC_DEF_WINDOWSIZE; - } - - /* Set Gzip (De)Compression Level */ - GF_OPTION_INIT ("compression-level", priv->cdc_level, int32, err); - if ( ((priv->cdc_level < 1) || (priv->cdc_level > 9)) - && (priv->cdc_level != GF_CDC_DEF_COMPRESSION) ) { - gf_log (this->name, GF_LOG_WARNING, - "Invalid gzip (de)compression level (%d)," - " using default", priv->cdc_level); - priv->cdc_level = GF_CDC_DEF_COMPRESSION; - } - - /* Set Gzip Memory Level */ - GF_OPTION_INIT ("mem-level", priv->mem_level, int32, err); - if ( (priv->mem_level < 1) || (priv->mem_level > 9) ) { - gf_log (this->name, GF_LOG_WARNING, - "Invalid gzip memory level, using the default"); - priv->mem_level = GF_CDC_DEF_MEMLEVEL; - } - - /* Set min file size to enable compression */ - GF_OPTION_INIT ("min-size", priv->min_file_size, int32, err); - - /* Mode of operation - Server/Client */ - ret = dict_get_str (this->options, "mode", &temp_str); - if (ret) { - gf_log (this->name, GF_LOG_CRITICAL, - "Operation mode not specified !!"); - goto err; - } - - if (GF_CDC_MODE_IS_CLIENT (temp_str)) { - priv->op_mode = GF_CDC_MODE_CLIENT; - } else if (GF_CDC_MODE_IS_SERVER (temp_str)) { - priv->op_mode = GF_CDC_MODE_SERVER; - } else { - gf_log (this->name, GF_LOG_CRITICAL, - "Bogus operation mode (%s) specified", temp_str); - goto err; - } - - this->private = priv; - gf_log (this->name, GF_LOG_DEBUG, "CDC xlator loaded in (%s) mode",temp_str); - return 0; - - err: - if (priv) - GF_FREE (priv); - - return -1; + int ret = -1; + char *temp_str = NULL; + cdc_priv_t *priv = NULL; + + GF_VALIDATE_OR_GOTO("cdc", this, err); + + if (!this->children || this->children->next) { + gf_log(this->name, GF_LOG_ERROR, "Need subvolume == 1"); + goto err; + } + + if (!this->parents) { + gf_log(this->name, GF_LOG_WARNING, "Dangling volume. Check volfile"); + } + + priv = GF_CALLOC(1, sizeof(*priv), gf_cdc_mt_priv_t); + if (!priv) { + goto err; + } + + /* Check if debug mode is turned on */ + GF_OPTION_INIT("debug", priv->debug, bool, err); + if (priv->debug) { + gf_log(this->name, GF_LOG_DEBUG, "CDC debug option turned on"); + } + + /* Set Gzip Window Size */ + GF_OPTION_INIT("window-size", priv->window_size, int32, err); + if ((priv->window_size > GF_CDC_MAX_WINDOWSIZE) || + (priv->window_size < GF_CDC_DEF_WINDOWSIZE)) { + gf_log(this->name, GF_LOG_WARNING, + "Invalid gzip window size (%d), using default", + priv->window_size); + priv->window_size = GF_CDC_DEF_WINDOWSIZE; + } + + /* Set Gzip (De)Compression Level */ + GF_OPTION_INIT("compression-level", priv->cdc_level, int32, err); + if (((priv->cdc_level < 1) || (priv->cdc_level > 9)) && + (priv->cdc_level != GF_CDC_DEF_COMPRESSION)) { + gf_log(this->name, GF_LOG_WARNING, + "Invalid gzip (de)compression level (%d)," + " using default", + priv->cdc_level); + priv->cdc_level = GF_CDC_DEF_COMPRESSION; + } + + /* Set Gzip Memory Level */ + GF_OPTION_INIT("mem-level", priv->mem_level, int32, err); + if ((priv->mem_level < 1) || (priv->mem_level > 9)) { + gf_log(this->name, GF_LOG_WARNING, + "Invalid gzip memory level, using the default"); + priv->mem_level = GF_CDC_DEF_MEMLEVEL; + } + + /* Set min file size to enable compression */ + GF_OPTION_INIT("min-size", priv->min_file_size, int32, err); + + /* Mode of operation - Server/Client */ + ret = dict_get_str(this->options, "mode", &temp_str); + if (ret) { + gf_log(this->name, GF_LOG_CRITICAL, "Operation mode not specified !!"); + goto err; + } + + if (GF_CDC_MODE_IS_CLIENT(temp_str)) { + priv->op_mode = GF_CDC_MODE_CLIENT; + } else if (GF_CDC_MODE_IS_SERVER(temp_str)) { + priv->op_mode = GF_CDC_MODE_SERVER; + } else { + gf_log(this->name, GF_LOG_CRITICAL, + "Bogus operation mode (%s) specified", temp_str); + goto err; + } + + this->private = priv; + gf_log(this->name, GF_LOG_DEBUG, "CDC xlator loaded in (%s) mode", + temp_str); + return 0; + +err: + if (priv) + GF_FREE(priv); + + return -1; } void -fini (xlator_t *this) +fini(xlator_t *this) { - cdc_priv_t *priv = this->private; + cdc_priv_t *priv = this->private; - if (priv) - GF_FREE (priv); - this->private = NULL; - return; + if (priv) + GF_FREE(priv); + this->private = NULL; + return; } struct xlator_fops fops = { - .readv = cdc_readv, - .writev = cdc_writev, + .readv = cdc_readv, + .writev = cdc_writev, }; -struct xlator_cbks cbks = { -}; +struct xlator_cbks cbks = {}; struct volume_options options[] = { - { .key = {"window-size"}, - .default_value = "-15", - .type = GF_OPTION_TYPE_INT, - .description = "Size of the zlib history buffer." - }, - { .key = {"mem-level"}, - .default_value = "8", - .type = GF_OPTION_TYPE_INT, - .description = "Memory allocated for internal compression state. " - "1 uses minimum memory but is slow and reduces " - "compression ratio; memLevel=9 uses maximum memory " - "for optimal speed. The default value is 8." - }, - { .key = {"compression-level"}, - .default_value = "-1", - .type = GF_OPTION_TYPE_INT, - .description = "Compression levels \n" - "0 : no compression, 1 : best speed, \n" - "9 : best compression, -1 : default compression " - }, - { .key = {"min-size"}, - .default_value = "0", - .type = GF_OPTION_TYPE_INT, - .description = "Data is compressed only when its size exceeds this." - }, - { .key = {"mode"}, - .value = {"server", "client"}, - .type = GF_OPTION_TYPE_STR, - .description = "Set on the basis of where the xlator is loaded. " - "This option should NOT be configured by user." - }, - { .key = {"debug"}, - .default_value = "false", - .type = GF_OPTION_TYPE_BOOL, - .description = "This is used in testing. Will dump compressed data " - "to disk as a gzip file." - }, - { .key = {NULL} - }, + {.key = {"window-size"}, + .default_value = "-15", + .type = GF_OPTION_TYPE_INT, + .description = "Size of the zlib history buffer."}, + {.key = {"mem-level"}, + .default_value = "8", + .type = GF_OPTION_TYPE_INT, + .description = "Memory allocated for internal compression state. " + "1 uses minimum memory but is slow and reduces " + "compression ratio; memLevel=9 uses maximum memory " + "for optimal speed. The default value is 8."}, + {.key = {"compression-level"}, + .default_value = "-1", + .type = GF_OPTION_TYPE_INT, + .description = "Compression levels \n" + "0 : no compression, 1 : best speed, \n" + "9 : best compression, -1 : default compression "}, + {.key = {"min-size"}, + .default_value = "0", + .type = GF_OPTION_TYPE_INT, + .description = "Data is compressed only when its size exceeds this."}, + {.key = {"mode"}, + .value = {"server", "client"}, + .type = GF_OPTION_TYPE_STR, + .description = "Set on the basis of where the xlator is loaded. " + "This option should NOT be configured by user."}, + {.key = {"debug"}, + .default_value = "false", + .type = GF_OPTION_TYPE_BOOL, + .description = "This is used in testing. Will dump compressed data " + "to disk as a gzip file."}, + {.key = {NULL}}, +}; + +xlator_api_t xlator_api = { + .init = init, + .fini = fini, + .mem_acct_init = mem_acct_init, + .op_version = {GD_OP_VERSION_3_9_0}, + .fops = &fops, + .cbks = &cbks, + .options = options, + .identifier = "cdc", + .category = GF_TECH_PREVIEW, }; diff --git a/xlators/features/compress/src/cdc.h b/xlators/features/compress/src/cdc.h index 71f4d2317bb..cb87b06a989 100644 --- a/xlators/features/compress/src/cdc.h +++ b/xlators/features/compress/src/cdc.h @@ -15,41 +15,41 @@ #include "zlib.h" #endif -#include "xlator.h" +#include <glusterfs/xlator.h> #ifndef MAX_IOVEC #define MAX_IOVEC 16 #endif typedef struct cdc_priv { - int window_size; - int mem_level; - int cdc_level; - int min_file_size; - int op_mode; - gf_boolean_t debug; - gf_lock_t lock; + int window_size; + int mem_level; + int cdc_level; + int min_file_size; + int op_mode; + gf_boolean_t debug; + gf_lock_t lock; } cdc_priv_t; typedef struct cdc_info { - /* input bits */ - int count; - int32_t ibytes; - struct iovec *vector; - struct iatt *buf; - - /* output bits */ - int ncount; - int nbytes; - int buffer_size; - struct iovec vec[MAX_IOVEC]; - struct iobref *iobref; - - /* zlib bits */ + /* input bits */ + int count; + int32_t ibytes; + struct iovec *vector; + struct iatt *buf; + + /* output bits */ + int ncount; + int nbytes; + int buffer_size; + struct iovec vec[MAX_IOVEC]; + struct iobref *iobref; + + /* zlib bits */ #ifdef HAVE_LIB_Z - z_stream stream; + z_stream stream; #endif - unsigned long crc; + unsigned long crc; } cdc_info_t; #define NVEC(ci) (ci->ncount - 1) @@ -57,8 +57,8 @@ typedef struct cdc_info { #define THIS_VEC(ci, i) ci->vector[i] /* Gzip defaults */ -#define GF_CDC_DEF_WINDOWSIZE -15 /* default value */ -#define GF_CDC_MAX_WINDOWSIZE -8 /* max value */ +#define GF_CDC_DEF_WINDOWSIZE -15 /* default value */ +#define GF_CDC_MAX_WINDOWSIZE -8 /* max value */ #ifdef HAVE_LIB_Z #define GF_CDC_DEF_COMPRESSION Z_DEFAULT_COMPRESSION @@ -66,15 +66,15 @@ typedef struct cdc_info { #define GF_CDC_DEF_COMPRESSION -1 #endif -#define GF_CDC_DEF_MEMLEVEL 8 -#define GF_CDC_DEF_BUFFERSIZE 262144 // 256K - default compression buffer size +#define GF_CDC_DEF_MEMLEVEL 8 +#define GF_CDC_DEF_BUFFERSIZE 262144 // 256K - default compression buffer size /* Operation mode * If xlator is loaded on client, readv decompresses and writev compresses * If xlator is loaded on server, readv compresses and writev decompresses */ -#define GF_CDC_MODE_CLIENT 0 -#define GF_CDC_MODE_SERVER 1 +#define GF_CDC_MODE_CLIENT 0 +#define GF_CDC_MODE_SERVER 1 /* min size of data to do cmpression * 0 == compress even 1byte @@ -87,21 +87,13 @@ typedef struct cdc_info { #define GF_CDC_DEFLATE_CANARY_VAL "deflate" #define GF_CDC_DEBUG_DUMP_FILE "/tmp/cdcdump.gz" -#define GF_CDC_MODE_IS_CLIENT(m) \ - (strcmp (m, "client") == 0) +#define GF_CDC_MODE_IS_CLIENT(m) (strcmp(m, "client") == 0) -#define GF_CDC_MODE_IS_SERVER(m) \ - (strcmp (m, "server") == 0) +#define GF_CDC_MODE_IS_SERVER(m) (strcmp(m, "server") == 0) int32_t -cdc_compress (xlator_t *this, - cdc_priv_t *priv, - cdc_info_t *ci, - dict_t **xdata); +cdc_compress(xlator_t *this, cdc_priv_t *priv, cdc_info_t *ci, dict_t **xdata); int32_t -cdc_decompress (xlator_t *this, - cdc_priv_t *priv, - cdc_info_t *ci, - dict_t *xdata); +cdc_decompress(xlator_t *this, cdc_priv_t *priv, cdc_info_t *ci, dict_t *xdata); #endif diff --git a/xlators/features/filter/src/Makefile.am b/xlators/features/filter/src/Makefile.am deleted file mode 100644 index 49ea3a45fb7..00000000000 --- a/xlators/features/filter/src/Makefile.am +++ /dev/null @@ -1,16 +0,0 @@ -xlator_LTLIBRARIES = filter.la -xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/testing/features - -filter_la_LDFLAGS = $(GF_XLATOR_DEFAULT_LDFLAGS) - -filter_la_SOURCES = filter.c -filter_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la - -noinst_HEADERS = filter-mem-types.h - -AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src - -AM_CFLAGS = -Wall $(GF_CFLAGS) - -CLEANFILES = - diff --git a/xlators/features/filter/src/filter.c b/xlators/features/filter/src/filter.c deleted file mode 100644 index 3fd7dc8c8fb..00000000000 --- a/xlators/features/filter/src/filter.c +++ /dev/null @@ -1,1729 +0,0 @@ -/* - Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ -#include <sys/types.h> -#include <sys/stat.h> -#include <unistd.h> - -#include "glusterfs.h" -#include "logging.h" -#include "dict.h" -#include "xlator.h" -#include "filter-mem-types.h" - -#define GF_FILTER_NOBODY_UID 65534 -#define GF_FILTER_NOBODY_GID 65534 -#define GF_FILTER_ROOT_UID 0 -#define GF_FILTER_ROOT_GID 0 - -#define GF_MAXIMUM_FILTERING_ALLOWED 32 - -/* - option root-filtering on (off by default) - option translate-uid <uid-range=newuid,uid=newuid> - option translate-gid <gid-range=newgid,gid=newgid> - option read-only <yes|true> - option fixed-uid <uid> - option fixed-gid <gid> - option filter-uid <uid-range,uid> - option filter-gid <gid-range,gid> // not supported yet - -*/ - -struct gf_filter { - /* Flags */ - gf_boolean_t complete_read_only; - char fixed_uid_set; - char fixed_gid_set; - char partial_filter; - - /* Options */ - /* Mapping/Filtering/Translate whatever you want to call */ - int translate_num_uid_entries; - int translate_num_gid_entries; - int translate_input_uid[GF_MAXIMUM_FILTERING_ALLOWED][2]; - int translate_output_uid[GF_MAXIMUM_FILTERING_ALLOWED]; - int translate_input_gid[GF_MAXIMUM_FILTERING_ALLOWED][2]; - int translate_output_gid[GF_MAXIMUM_FILTERING_ALLOWED]; - - /* Fixed uid/gid */ - int fixed_uid; - int fixed_gid; - - /* Filter */ - int filter_num_uid_entries; - int filter_num_gid_entries; - int filter_input_uid[GF_MAXIMUM_FILTERING_ALLOWED][2]; - int filter_input_gid[GF_MAXIMUM_FILTERING_ALLOWED][2]; - -}; - -/* update_frame: The main logic of the whole translator. - Return values: - 0: no change - // TRANSLATE - 1: only uid changed - 2: only gid changed - 3: both uid/gid changed - // FILTER - 4: uid in filter range - 5: gid in filter range // not supported yet - 6: complete fs is readonly -*/ - -#define GF_FILTER_NO_CHANGE 0 -#define GF_FILTER_MAP_UID 1 -#define GF_FILTER_MAP_GID 2 -#define GF_FILTER_MAP_BOTH 3 -#define GF_FILTER_FILTER_UID 4 -#define GF_FILTER_FILTER_GID 5 -#define GF_FILTER_RO_FS 6 - -static int32_t -update_frame (call_frame_t *frame, - inode_t *inode, - struct gf_filter *filter) -{ - uid_t uid = 0; - int32_t idx = 0; - int32_t ret = 0; - int32_t dictret = 0; - uint64_t tmp_uid = 0; - - for (idx = 0; idx < filter->translate_num_uid_entries; idx++) { - if ((frame->root->uid >=filter->translate_input_uid[idx][0]) && - (frame->root->uid <=filter->translate_input_uid[idx][1])) { - dictret = inode_ctx_get (inode, frame->this, &tmp_uid); - uid = (uid_t)tmp_uid; - if (dictret == 0) { - if (frame->root->uid != uid) - ret = GF_FILTER_MAP_UID; - } else { - ret = GF_FILTER_MAP_UID; - } - break; - } - } - - for (idx = 0; idx < filter->translate_num_gid_entries; idx++) { - if ((frame->root->gid >=filter->translate_input_gid[idx][0]) && - (frame->root->gid <=filter->translate_input_gid[idx][1])) { - if (ret == GF_FILTER_NO_CHANGE) - ret = GF_FILTER_MAP_GID; - else - ret = GF_FILTER_MAP_BOTH; - break; - } - } - - - if (filter->complete_read_only) - return GF_FILTER_RO_FS; - - if (filter->partial_filter) { - dictret = inode_ctx_get (inode, frame->this, &tmp_uid); - uid = (uid_t)tmp_uid; - if (dictret != -1) { - for (idx = 0; idx < filter->filter_num_uid_entries; - idx++) { - if ((uid >=filter->filter_input_uid[idx][0]) && - (uid <=filter->filter_input_uid[idx][1])) { - return GF_FILTER_FILTER_UID; - } - } - } - } - - return ret; -} - -/* if 'root' don't change the uid/gid */ -static int32_t -update_stat (struct iatt *stbuf, - struct gf_filter *filter) -{ - int32_t idx = 0; - for (idx = 0; idx < filter->translate_num_uid_entries; idx++) { - if (stbuf->ia_uid == GF_FILTER_ROOT_UID) - continue; - if ((stbuf->ia_uid >= filter->translate_input_uid[idx][0]) && - (stbuf->ia_uid <= filter->translate_input_uid[idx][1])) { - stbuf->ia_uid = filter->translate_output_uid[idx]; - break; - } - } - - for (idx = 0; idx < filter->translate_num_gid_entries; idx++) { - if (stbuf->ia_gid == GF_FILTER_ROOT_GID) - continue; - if ((stbuf->ia_gid >= filter->translate_input_gid[idx][0]) && - (stbuf->ia_gid <= filter->translate_input_gid[idx][1])) { - stbuf->ia_gid = filter->translate_output_gid[idx]; - break; - } - } - - if (filter->fixed_uid_set) { - stbuf->ia_uid = filter->fixed_uid; - } - - if (filter->fixed_gid_set) { - stbuf->ia_gid = filter->fixed_gid; - } - - return 0; -} - -static int32_t -filter_lookup_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - inode_t *inode, - struct iatt *buf, - dict_t *dict, - struct iatt *postparent) -{ - int ret = 0; - if (op_ret >= 0) { - update_stat (buf, this->private); - ret = inode_ctx_put (inode, this, (uint64_t)(long)buf->ia_uid); - if (ret == -1) { - gf_log (this->name, GF_LOG_ERROR, - "couldn't set context"); - } - - update_stat (postparent, this->private); - } - STACK_UNWIND (frame, op_ret, op_errno, inode, buf, dict, postparent); - return 0; -} - -int32_t -filter_lookup (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - dict_t *xattr_req) -{ - STACK_WIND (frame, - filter_lookup_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, - loc, - xattr_req); - return 0; -} - - -static int32_t -filter_stat_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - struct iatt *buf) -{ - if (op_ret >= 0) { - update_stat (buf, this->private); - } - STACK_UNWIND (frame, op_ret, op_errno, buf); - return 0; -} - -int32_t -filter_stat (call_frame_t *frame, - xlator_t *this, - loc_t *loc) -{ - STACK_WIND (frame, - filter_stat_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->stat, - loc); - return 0; -} - -static int32_t -filter_setattr_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - struct iatt *preop, - struct iatt *postop) -{ - if (op_ret >= 0) { - update_stat (preop, this->private); - update_stat (postop, this->private); - } - STACK_UNWIND (frame, op_ret, op_errno, preop, postop); - return 0; -} - -int32_t -filter_setattr (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - struct iatt *stbuf, - int32_t valid) -{ - int32_t ret = 0; - ret = update_frame (frame, loc->inode, this->private); - switch (ret) { - case GF_FILTER_MAP_UID: - if (loc->inode->st_mode & S_IWGRP) - break; - case GF_FILTER_MAP_BOTH: - if (loc->inode->st_mode & S_IWOTH) - break; - gf_log (this->name, GF_LOG_DEBUG, - "%s: returning permission denied", loc->path); - STACK_UNWIND (frame, -1, EPERM, NULL, NULL, NULL); - return 0; - - case GF_FILTER_FILTER_UID: - case GF_FILTER_FILTER_GID: - case GF_FILTER_RO_FS: - STACK_UNWIND (frame, -1, EROFS, NULL, NULL); - return 0; - default: - break; - } - - STACK_WIND (frame, - filter_setattr_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setattr, - loc, - stbuf, valid); - return 0; -} - -static int32_t -filter_fsetattr_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - struct iatt *preop, - struct iatt *postop) -{ - if (op_ret >= 0) { - update_stat (preop, this->private); - update_stat (postop, this->private); - } - STACK_UNWIND (frame, - op_ret, - op_errno, - preop, postop); - return 0; -} - -int32_t -filter_fsetattr (call_frame_t *frame, - xlator_t *this, - fd_t *fd, - struct iatt *stbuf, - int32_t valid) -{ - STACK_WIND (frame, - filter_fsetattr_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsetattr, - fd, - stbuf, valid); - return 0; -} - - -static int32_t -filter_truncate_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - struct iatt *prebuf, - struct iatt *postbuf) -{ - if (op_ret >= 0) { - update_stat (prebuf, this->private); - update_stat (postbuf, this->private); - } - STACK_UNWIND (frame, op_ret, op_errno, prebuf, postbuf); - return 0; -} - -int32_t -filter_truncate (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - off_t offset) -{ - int32_t ret = 0; - ret = update_frame (frame, loc->inode, this->private); - switch (ret) { - case GF_FILTER_MAP_UID: - if (loc->inode->st_mode & S_IWGRP) - break; - case GF_FILTER_MAP_BOTH: - if (loc->inode->st_mode & S_IWOTH) - break; - gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path); - STACK_UNWIND (frame, -1, EPERM, NULL, NULL); - return 0; - - case GF_FILTER_FILTER_UID: - case GF_FILTER_FILTER_GID: - case GF_FILTER_RO_FS: - STACK_UNWIND (frame, -1, EROFS, NULL, NULL); - return 0; - } - - STACK_WIND (frame, - filter_truncate_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->truncate, - loc, - offset); - return 0; -} - -static int32_t -filter_ftruncate_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - struct iatt *prebuf, - struct iatt *postbuf) -{ - if (op_ret >= 0) { - update_stat (prebuf, this->private); - update_stat (postbuf, this->private); - } - STACK_UNWIND (frame, op_ret, op_errno, prebuf, postbuf); - return 0; -} - -int32_t -filter_ftruncate (call_frame_t *frame, - xlator_t *this, - fd_t *fd, - off_t offset) -{ - STACK_WIND (frame, - filter_ftruncate_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->ftruncate, - fd, - offset); - return 0; -} - - -static int32_t -filter_readlink_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - const char *path, - struct iatt *sbuf) -{ - if (op_ret >= 0) - update_stat (sbuf, this->private); - - STACK_UNWIND (frame, op_ret, op_errno, path, sbuf); - return 0; -} - -int32_t -filter_readlink (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - size_t size) -{ - int32_t ret = 0; - ret = update_frame (frame, loc->inode, this->private); - switch (ret) { - case GF_FILTER_MAP_UID: - if (loc->inode->st_mode & S_IRGRP) - break; - case GF_FILTER_MAP_BOTH: - if (loc->inode->st_mode & S_IROTH) - break; - gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path); - STACK_UNWIND (frame, -1, EPERM, NULL); - return 0; - } - STACK_WIND (frame, - filter_readlink_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readlink, - loc, - size); - return 0; -} - - -static int32_t -filter_mknod_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - inode_t *inode, - struct iatt *buf, - struct iatt *preparent, - struct iatt *postparent) -{ - int ret = 0; - - if (op_ret >= 0) { - update_stat (buf, this->private); - ret = inode_ctx_put (inode, this, (uint64_t)(long)buf->ia_uid); - if (ret == -1) { - gf_log (this->name, GF_LOG_ERROR, - "couldn't set context"); - } - - update_stat (preparent, this->private); - update_stat (postparent, this->private); - } - STACK_UNWIND (frame, op_ret, op_errno, inode, buf, - preparent, postparent); - return 0; -} - -int32_t -filter_mknod (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - mode_t mode, - dev_t rdev) -{ - int ret = 0; - inode_t *parent = loc->parent; - ret = update_frame (frame, loc->inode, this->private); - switch (ret) { - case GF_FILTER_MAP_UID: - if (parent->st_mode & S_IWGRP) - break; - case GF_FILTER_MAP_BOTH: - if (parent->st_mode & S_IWOTH) - break; - gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path); - STACK_UNWIND (frame, -1, EPERM, NULL, NULL, - NULL, NULL); - return 0; - - case GF_FILTER_FILTER_UID: - case GF_FILTER_FILTER_GID: - case GF_FILTER_RO_FS: - STACK_UNWIND (frame, -1, EROFS, NULL, NULL, - NULL, NULL); - return 0; - } - STACK_WIND (frame, - filter_mknod_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->mknod, - loc, mode, rdev); - return 0; -} - -static int32_t -filter_mkdir_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - inode_t *inode, - struct iatt *buf, - struct iatt *preparent, - struct iatt *postparent) -{ - int ret = 0; - if (op_ret >= 0) { - update_stat (buf, this->private); - ret = inode_ctx_put (inode, this, (uint64_t)(long)buf->ia_uid); - if (ret == -1) { - gf_log (this->name, GF_LOG_ERROR, - "couldn't set context"); - } - - update_stat (preparent, this->private); - update_stat (postparent, this->private); - } - STACK_UNWIND (frame, op_ret, op_errno, inode, buf, - preparent, postparent); - return 0; -} - -int32_t -filter_mkdir (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - mode_t mode) -{ - int ret = 0; - inode_t *parent = loc->parent; - ret = update_frame (frame, loc->inode, this->private); - switch (ret) { - case GF_FILTER_MAP_UID: - if (parent->st_mode & S_IWGRP) - break; - case GF_FILTER_MAP_BOTH: - if (parent->st_mode & S_IWOTH) - break; - gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path); - STACK_UNWIND (frame, -1, EPERM, NULL, NULL, - NULL, NULL); - return 0; - - case GF_FILTER_FILTER_UID: - case GF_FILTER_FILTER_GID: - case GF_FILTER_RO_FS: - STACK_UNWIND (frame, -1, EROFS, NULL, NULL, - NULL, NULL); - return 0; - } - STACK_WIND (frame, - filter_mkdir_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->mkdir, - loc, mode); - return 0; -} - -static int32_t -filter_unlink_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - struct iatt *preparent, - struct iatt *postparent) -{ - if (op_ret >= 0) { - update_stat (preparent, this->private); - update_stat (postparent, this->private); - } - - STACK_UNWIND (frame, op_ret, op_errno, preparent, postparent); - return 0; -} - -int32_t -filter_unlink (call_frame_t *frame, - xlator_t *this, - loc_t *loc) -{ - int32_t ret = 0; - inode_t *parent = loc->parent; - if (!parent) - parent = inode_parent (loc->inode, 0, NULL); - ret = update_frame (frame, loc->inode, this->private); - switch (ret) { - case GF_FILTER_MAP_UID: - if (parent->st_mode & S_IWGRP) - break; - if (loc->inode->st_mode & S_IWGRP) - break; - case GF_FILTER_MAP_BOTH: - if (parent->st_mode & S_IWOTH) - break; - if (loc->inode->st_mode & S_IWOTH) - break; - gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path); - STACK_UNWIND (frame, -1, EPERM, NULL, NULL); - return 0; - case GF_FILTER_FILTER_UID: - case GF_FILTER_FILTER_GID: - case GF_FILTER_RO_FS: - STACK_UNWIND (frame, -1, EROFS, NULL, NULL); - return 0; - } - STACK_WIND (frame, - filter_unlink_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, - loc); - return 0; -} - -static int32_t -filter_rmdir_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - struct iatt *preparent, - struct iatt *postparent) -{ - if (op_ret >= 0) { - update_stat (preparent, this->private); - update_stat (postparent, this->private); - } - - STACK_UNWIND (frame, op_ret, op_errno, preparent, postparent); - return 0; -} - -int32_t -filter_rmdir (call_frame_t *frame, - xlator_t *this, - loc_t *loc) -{ - int32_t ret = 0; - inode_t *parent = loc->parent; - if (!parent) - parent = inode_parent (loc->inode, 0, NULL); - ret = update_frame (frame, loc->inode, this->private); - switch (ret) { - case GF_FILTER_MAP_UID: - if (parent->st_mode & S_IWGRP) - break; - if (loc->inode->st_mode & S_IWGRP) - break; - case GF_FILTER_MAP_BOTH: - if (parent->st_mode & S_IWOTH) - break; - if (loc->inode->st_mode & S_IWOTH) - break; - gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path); - STACK_UNWIND (frame, -1, EPERM, NULL, NULL); - return 0; - case GF_FILTER_FILTER_UID: - case GF_FILTER_FILTER_GID: - case GF_FILTER_RO_FS: - STACK_UNWIND (frame, -1, EROFS, NULL, NULL); - return 0; - } - STACK_WIND (frame, - filter_rmdir_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rmdir, - loc); - return 0; -} - -static int32_t -filter_symlink_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - inode_t *inode, - struct iatt *buf, - struct iatt *preparent, - struct iatt *postparent) -{ - int ret = 0; - if (op_ret >= 0) { - update_stat (buf, this->private); - ret = inode_ctx_put (inode, this, (uint64_t)(long)buf->ia_uid); - if (ret == -1) { - gf_log (this->name, GF_LOG_ERROR, - "couldn't set context"); - } - - update_stat (preparent, this->private); - update_stat (postparent, this->private); - } - STACK_UNWIND (frame, op_ret, op_errno, inode, buf, - preparent, postparent); - return 0; -} - -int32_t -filter_symlink (call_frame_t *frame, - xlator_t *this, - const char *linkpath, - loc_t *loc) -{ - int ret = 0; - inode_t *parent = loc->parent; - ret = update_frame (frame, loc->inode, this->private); - switch (ret) { - case GF_FILTER_MAP_UID: - if (parent->st_mode & S_IWGRP) - break; - case GF_FILTER_MAP_BOTH: - if (parent->st_mode & S_IWOTH) - break; - gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path); - STACK_UNWIND (frame, -1, EPERM, NULL, NULL, - NULL, NULL); - return 0; - - case GF_FILTER_FILTER_UID: - case GF_FILTER_FILTER_GID: - case GF_FILTER_RO_FS: - STACK_UNWIND (frame, -1, EROFS, NULL, NULL, - NULL, NULL); - return 0; - } - STACK_WIND (frame, - filter_symlink_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->symlink, - linkpath, loc); - return 0; -} - - -static int32_t -filter_rename_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - struct iatt *buf, - struct iatt *preoldparent, - struct iatt *postoldparent, - struct iatt *prenewparent, - struct iatt *postnewparent) -{ - if (op_ret >= 0) { - update_stat (buf, this->private); - - update_stat (preoldparent, this->private); - update_stat (postoldparent, this->private); - - update_stat (prenewparent, this->private); - update_stat (postnewparent, this->private); - } - - STACK_UNWIND (frame, op_ret, op_errno, buf, - preoldparent, postoldparent, - prenewparent, postnewparent); - return 0; -} - -int32_t -filter_rename (call_frame_t *frame, - xlator_t *this, - loc_t *oldloc, - loc_t *newloc) -{ - int32_t ret = 0; - inode_t *parent = oldloc->parent; - if (!parent) - parent = inode_parent (oldloc->inode, 0, NULL); - ret = update_frame (frame, oldloc->inode, this->private); - switch (ret) { - case GF_FILTER_MAP_UID: - if (parent->st_mode & S_IWGRP) - break; - if (oldloc->inode->st_mode & S_IWGRP) - break; - case GF_FILTER_MAP_BOTH: - if (parent->st_mode & S_IWOTH) - break; - if (oldloc->inode->st_mode & S_IWOTH) - break; - gf_log (this->name, GF_LOG_DEBUG, - "%s -> %s: returning permission denied", oldloc->path, newloc->path); - STACK_UNWIND (frame, -1, EPERM, NULL, - NULL, NULL, - NULL, NULL); - return 0; - - case GF_FILTER_FILTER_UID: - case GF_FILTER_FILTER_GID: - case GF_FILTER_RO_FS: - STACK_UNWIND (frame, -1, EROFS, NULL, - NULL, NULL, - NULL, NULL); - return 0; - } - STACK_WIND (frame, - filter_rename_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rename, - oldloc, newloc); - return 0; -} - - -static int32_t -filter_link_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - inode_t *inode, - struct iatt *buf, - struct iatt *preparent, - struct iatt *postparent) -{ - int ret = 0; - if (op_ret >= 0) { - update_stat (buf, this->private); - ret = inode_ctx_put (inode, this, (uint64_t)(long)buf->ia_uid); - if (ret == -1) { - gf_log (this->name, GF_LOG_ERROR, - "couldn't set context"); - } - - update_stat (preparent, this->private); - update_stat (postparent, this->private); - } - STACK_UNWIND (frame, op_ret, op_errno, inode, buf, - preparent, postparent); - return 0; -} - -int32_t -filter_link (call_frame_t *frame, - xlator_t *this, - loc_t *oldloc, - loc_t *newloc) -{ - int ret = 0; - ret = update_frame (frame, oldloc->inode, this->private); - switch (ret) { - case GF_FILTER_FILTER_UID: - case GF_FILTER_FILTER_GID: - case GF_FILTER_RO_FS: - STACK_UNWIND (frame, -1, EROFS, NULL, NULL, - NULL, NULL); - return 0; - } - STACK_WIND (frame, - filter_link_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->link, - oldloc, newloc); - return 0; -} - - -static int32_t -filter_create_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - fd_t *fd, - inode_t *inode, - struct iatt *buf, - struct iatt *preparent, - struct iatt *postparent) -{ - int ret = 0; - if (op_ret >= 0) { - update_stat (buf, this->private); - ret = inode_ctx_put (inode, this, (uint64_t)(long)buf->ia_uid); - if (ret == -1) { - gf_log (this->name, GF_LOG_ERROR, - "couldn't set context"); - } - update_stat (preparent, this->private); - update_stat (postparent, this->private); - } - STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf, - preparent, postparent); - return 0; -} - -int32_t -filter_create (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - int32_t flags, - mode_t mode, fd_t *fd) -{ - int ret = 0; - inode_t *parent = loc->parent; - ret = update_frame (frame, loc->inode, this->private); - switch (ret) { - case GF_FILTER_MAP_UID: - if (parent->st_mode & S_IWGRP) - break; - case GF_FILTER_MAP_BOTH: - if (parent->st_mode & S_IWOTH) - break; - gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path); - STACK_UNWIND (frame, -1, EPERM, NULL, NULL, NULL, - NULL, NULL); - return 0; - - case GF_FILTER_FILTER_UID: - case GF_FILTER_FILTER_GID: - case GF_FILTER_RO_FS: - STACK_UNWIND (frame, -1, EROFS, NULL, NULL, NULL, - NULL, NULL); - return 0; - } - STACK_WIND (frame, filter_create_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->create, - loc, flags, mode, fd); - return 0; -} - -static int32_t -filter_open_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - fd_t *fd) -{ - STACK_UNWIND (frame, op_ret, op_errno, fd); - return 0; -} - -int32_t -filter_open (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - int32_t flags, - fd_t *fd, - int32_t wbflags) -{ - int32_t ret = 0; - ret = update_frame (frame, loc->inode, this->private); - switch (ret) { - case GF_FILTER_MAP_UID: - if (loc->inode->st_mode & S_IWGRP) - break; - if (!(((flags & O_ACCMODE) == O_WRONLY) - || ((flags & O_ACCMODE) == O_RDWR)) - && (loc->inode->st_mode & S_IRGRP)) - break; - case GF_FILTER_MAP_BOTH: - if (loc->inode->st_mode & S_IWOTH) - break; - if (!(((flags & O_ACCMODE) == O_WRONLY) - || ((flags & O_ACCMODE) == O_RDWR)) - && (loc->inode->st_mode & S_IROTH)) - break; - gf_log (this->name, GF_LOG_DEBUG, - "%s: returning permission denied (mode: 0%o, flag=0%o)", - loc->path, loc->inode->st_mode, flags); - STACK_UNWIND (frame, -1, EPERM, fd); - return 0; - case GF_FILTER_FILTER_UID: - case GF_FILTER_FILTER_GID: - case GF_FILTER_RO_FS: - if (!(((flags & O_ACCMODE) == O_WRONLY) - || ((flags & O_ACCMODE) == O_RDWR))) - break; - STACK_UNWIND (frame, -1, EROFS, NULL); - return 0; - - } - STACK_WIND (frame, - filter_open_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, - loc, flags, fd, wbflags); - return 0; -} - -static int32_t -filter_readv_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - struct iovec *vector, - int32_t count, - struct iatt *stbuf, - struct iobref *iobref) -{ - if (op_ret >= 0) { - update_stat (stbuf, this->private); - } - STACK_UNWIND (frame, - op_ret, - op_errno, - vector, - count, - stbuf, - iobref); - return 0; -} - -int32_t -filter_readv (call_frame_t *frame, - xlator_t *this, - fd_t *fd, - size_t size, - off_t offset) -{ - STACK_WIND (frame, - filter_readv_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readv, - fd, - size, - offset); - return 0; -} - - -static int32_t -filter_writev_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - struct iatt *prebuf, - struct iatt *postbuf) -{ - if (op_ret >= 0) { - update_stat (prebuf, this->private); - update_stat (postbuf, this->private); - } - STACK_UNWIND (frame, - op_ret, - op_errno, - prebuf, - postbuf); - return 0; -} - -int32_t -filter_writev (call_frame_t *frame, - xlator_t *this, - fd_t *fd, - struct iovec *vector, - int32_t count, - off_t off, - struct iobref *iobref) -{ - int32_t ret = 0; - ret = update_frame (frame, fd->inode, this->private); - switch (ret) { - case GF_FILTER_FILTER_UID: - case GF_FILTER_FILTER_GID: - case GF_FILTER_RO_FS: - STACK_UNWIND (frame, -1, EROFS, NULL, NULL); - return 0; - } - - STACK_WIND (frame, - filter_writev_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->writev, - fd, - vector, - count, - off, - iobref); - return 0; -} - -static int32_t -filter_fstat_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - struct iatt *buf) -{ - if (op_ret >= 0) { - update_stat (buf, this->private); - } - STACK_UNWIND (frame, - op_ret, - op_errno, - buf); - return 0; -} - -int32_t -filter_fstat (call_frame_t *frame, - xlator_t *this, - fd_t *fd) -{ - STACK_WIND (frame, - filter_fstat_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fstat, - fd); - return 0; -} - -static int32_t -filter_opendir_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - fd_t *fd) -{ - STACK_UNWIND (frame, - op_ret, - op_errno, - fd); - return 0; -} - -int32_t -filter_opendir (call_frame_t *frame, - xlator_t *this, - loc_t *loc, fd_t *fd) -{ - int32_t ret = 0; - ret = update_frame (frame, loc->inode, this->private); - switch (ret) { - case GF_FILTER_MAP_UID: - if (loc->inode->st_mode & S_IWGRP) - break; - if (loc->inode->st_mode & S_IRGRP) - break; - case GF_FILTER_MAP_BOTH: - if (loc->inode->st_mode & S_IWOTH) - break; - if (loc->inode->st_mode & S_IROTH) - break; - gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path); - STACK_UNWIND (frame, -1, EPERM, fd); - return 0; - } - STACK_WIND (frame, - filter_opendir_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->opendir, - loc, fd); - return 0; -} - - -static int32_t -filter_setxattr_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno) -{ - STACK_UNWIND (frame, - op_ret, - op_errno); - return 0; -} - -int32_t -filter_setxattr (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - dict_t *dict, - int32_t flags) -{ - - int32_t ret = 0; - ret = update_frame (frame, loc->inode, this->private); - switch (ret) { - case GF_FILTER_MAP_UID: - if (loc->inode->st_mode & S_IWGRP) - break; - case GF_FILTER_MAP_BOTH: - if (loc->inode->st_mode & S_IWOTH) - break; - gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path); - STACK_UNWIND (frame, -1, EPERM); - return 0; - case GF_FILTER_FILTER_UID: - case GF_FILTER_FILTER_GID: - case GF_FILTER_RO_FS: - STACK_UNWIND (frame, -1, EROFS); - return 0; - } - - STACK_WIND (frame, - filter_setxattr_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setxattr, - loc, - dict, - flags); - return 0; -} - -static int32_t -filter_getxattr_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - dict_t *dict) -{ - STACK_UNWIND (frame, - op_ret, - op_errno, - dict); - return 0; -} - -int32_t -filter_getxattr (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - const char *name) -{ - int32_t ret = 0; - ret = update_frame (frame, loc->inode, this->private); - switch (ret) { - case GF_FILTER_MAP_UID: - if (loc->inode->st_mode & S_IRGRP) - break; - case GF_FILTER_MAP_BOTH: - if (loc->inode->st_mode & S_IROTH) - break; - gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path); - STACK_UNWIND (frame, -1, EPERM, NULL); - return 0; - } - - STACK_WIND (frame, - filter_getxattr_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->getxattr, - loc, - name); - return 0; -} - -static int32_t -filter_removexattr_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno) -{ - STACK_UNWIND (frame, op_ret, op_errno); - return 0; -} - -int32_t -filter_removexattr (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - const char *name) -{ - int32_t ret = 0; - ret = update_frame (frame, loc->inode, this->private); - switch (ret) { - case GF_FILTER_MAP_UID: - if (loc->inode->st_mode & S_IWGRP) - break; - case GF_FILTER_MAP_BOTH: - if (loc->inode->st_mode & S_IWOTH) - break; - gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path); - STACK_UNWIND (frame, -1, EPERM); - return 0; - case GF_FILTER_FILTER_UID: - case GF_FILTER_FILTER_GID: - case GF_FILTER_RO_FS: - STACK_UNWIND (frame, -1, EROFS); - return 0; - } - - STACK_WIND (frame, - filter_removexattr_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->removexattr, - loc, - name); - return 0; -} - -int32_t -mem_acct_init (xlator_t *this) -{ - int ret = -1; - - if (!this) - return ret; - - ret = xlator_mem_acct_init (this, gf_filter_mt_end + 1); - - if (ret != 0) { - gf_log (this->name, GF_LOG_ERROR, "Memory accounting init" - "failed"); - return ret; - } - - return ret; -} - -int32_t -init (xlator_t *this) -{ - char *value = NULL; - char *tmp_str = NULL; - char *tmp_str1 = NULL; - char *tmp_str2 = NULL; - char *dup_str = NULL; - char *input_value_str1 = NULL; - char *input_value_str2 = NULL; - char *output_value_str = NULL; - int32_t input_value = 0; - int32_t output_value = 0; - data_t *option_data = NULL; - struct gf_filter *filter = NULL; - gf_boolean_t tmp_bool = 0; - - if (!this->children || this->children->next) { - gf_log (this->name, - GF_LOG_ERROR, - "translator not configured with exactly one child"); - return -1; - } - - if (!this->parents) { - gf_log (this->name, GF_LOG_WARNING, - "dangling volume. check volfile "); - } - - filter = GF_CALLOC (sizeof (*filter), 1, gf_filter_mt_gf_filter); - ERR_ABORT (filter); - - if (dict_get (this->options, "read-only")) { - value = data_to_str (dict_get (this->options, "read-only")); - if (gf_string2boolean (value, &filter->complete_read_only) == -1) { - gf_log (this->name, GF_LOG_ERROR, - "wrong value provided for 'read-only'"); - return -1; - } - } - - if (dict_get (this->options, "root-squashing")) { - value = data_to_str (dict_get (this->options, "root-squashing")); - if (gf_string2boolean (value, &tmp_bool) == -1) { - gf_log (this->name, GF_LOG_ERROR, - "wrong value provided for 'root-squashing'"); - return -1; - } - if (tmp_bool) { - filter->translate_num_uid_entries = 1; - filter->translate_num_gid_entries = 1; - filter->translate_input_uid[0][0] = GF_FILTER_ROOT_UID; /* root */ - filter->translate_input_uid[0][1] = GF_FILTER_ROOT_UID; /* root */ - filter->translate_input_gid[0][0] = GF_FILTER_ROOT_GID; /* root */ - filter->translate_input_gid[0][1] = GF_FILTER_ROOT_GID; /* root */ - filter->translate_output_uid[0] = GF_FILTER_NOBODY_UID; - filter->translate_output_gid[0] = GF_FILTER_NOBODY_GID; - } - } - - if (dict_get (this->options, "translate-uid")) { - option_data = dict_get (this->options, "translate-uid"); - value = strtok_r (option_data->data, ",", &tmp_str); - while (value) { - dup_str = gf_strdup (value); - input_value_str1 = strtok_r (dup_str, "=", &tmp_str1); - if (input_value_str1) { - /* Check for n-m */ - char *temp_string = gf_strdup (input_value_str1); - input_value_str2 = strtok_r (temp_string, "-", &tmp_str2); - if (gf_string2int (input_value_str2, &input_value) != 0) { - gf_log (this->name, GF_LOG_ERROR, - "invalid number format \"%s\"", - input_value_str2); - return -1; - } - filter->translate_input_uid[filter->translate_num_uid_entries][0] = input_value; - input_value_str2 = strtok_r (NULL, "-", &tmp_str2); - if (input_value_str2) { - if (gf_string2int (input_value_str2, &input_value) != 0) { - gf_log (this->name, GF_LOG_ERROR, - "invalid number format \"%s\"", - input_value_str2); - return -1; - } - } - filter->translate_input_uid[filter->translate_num_uid_entries][1] = input_value; - GF_FREE (temp_string); - output_value_str = strtok_r (NULL, "=", &tmp_str1); - if (output_value_str) { - if (gf_string2int (output_value_str, &output_value) != 0) { - gf_log (this->name, GF_LOG_ERROR, - "invalid number format \"%s\"", - output_value_str); - return -1; - } - } else { - gf_log (this->name, GF_LOG_ERROR, - "mapping string not valid"); - return -1; - } - } else { - gf_log (this->name, GF_LOG_ERROR, - "mapping string not valid"); - return -1; - } - filter->translate_output_uid[filter->translate_num_uid_entries] = output_value; - gf_log (this->name, - GF_LOG_DEBUG, - "pair %d: input uid '%d' will be changed to uid '%d'", - filter->translate_num_uid_entries, input_value, output_value); - - filter->translate_num_uid_entries++; - if (filter->translate_num_uid_entries == GF_MAXIMUM_FILTERING_ALLOWED) - break; - value = strtok_r (NULL, ",", &tmp_str); - GF_FREE (dup_str); - } - } - - tmp_str1 = NULL; - tmp_str2 = NULL; - tmp_str = NULL; - - if (dict_get (this->options, "translate-gid")) { - option_data = dict_get (this->options, "translate-gid"); - value = strtok_r (option_data->data, ",", &tmp_str); - while (value) { - dup_str = gf_strdup (value); - input_value_str1 = strtok_r (dup_str, "=", &tmp_str1); - if (input_value_str1) { - /* Check for n-m */ - char *temp_string = gf_strdup (input_value_str1); - input_value_str2 = strtok_r (temp_string, "-", &tmp_str2); - if (gf_string2int (input_value_str2, &input_value) != 0) { - gf_log (this->name, GF_LOG_ERROR, - "invalid number format \"%s\"", - input_value_str2); - return -1; - } - filter->translate_input_gid[filter->translate_num_gid_entries][0] = input_value; - input_value_str2 = strtok_r (NULL, "-", &tmp_str2); - if (input_value_str2) { - if (gf_string2int (input_value_str2, &input_value) != 0) { - gf_log (this->name, GF_LOG_ERROR, - "invalid number format \"%s\"", - input_value_str2); - return -1; - } - } - filter->translate_input_gid[filter->translate_num_gid_entries][1] = input_value; - GF_FREE (temp_string); - output_value_str = strtok_r (NULL, "=", &tmp_str1); - if (output_value_str) { - if (gf_string2int (output_value_str, &output_value) != 0) { - gf_log (this->name, GF_LOG_ERROR, - "invalid number format \"%s\"", - output_value_str); - return -1; - } - } else { - gf_log (this->name, GF_LOG_ERROR, - "translate-gid value not valid"); - return -1; - } - } else { - gf_log (this->name, GF_LOG_ERROR, - "translate-gid value not valid"); - return -1; - } - - filter->translate_output_gid[filter->translate_num_gid_entries] = output_value; - - gf_log (this->name, GF_LOG_DEBUG, - "pair %d: input gid '%d' will be changed to gid '%d'", - filter->translate_num_gid_entries, input_value, output_value); - - filter->translate_num_gid_entries++; - if (filter->translate_num_gid_entries == GF_MAXIMUM_FILTERING_ALLOWED) - break; - value = strtok_r (NULL, ",", &tmp_str); - GF_FREE (dup_str); - } - } - - tmp_str = NULL; - tmp_str1 = NULL; - - if (dict_get (this->options, "filter-uid")) { - option_data = dict_get (this->options, "filter-uid"); - value = strtok_r (option_data->data, ",", &tmp_str); - while (value) { - dup_str = gf_strdup (value); - /* Check for n-m */ - input_value_str1 = strtok_r (dup_str, "-", &tmp_str1); - if (gf_string2int (input_value_str1, &input_value) != 0) { - gf_log (this->name, GF_LOG_ERROR, - "invalid number format \"%s\"", - input_value_str1); - return -1; - } - filter->filter_input_uid[filter->filter_num_uid_entries][0] = input_value; - input_value_str1 = strtok_r (NULL, "-", &tmp_str1); - if (input_value_str1) { - if (gf_string2int (input_value_str1, &input_value) != 0) { - gf_log (this->name, GF_LOG_ERROR, - "invalid number format \"%s\"", - input_value_str1); - return -1; - } - } - filter->filter_input_uid[filter->filter_num_uid_entries][1] = input_value; - - gf_log (this->name, - GF_LOG_DEBUG, - "filter [%d]: input uid(s) '%s' will be filtered", - filter->filter_num_uid_entries, dup_str); - - filter->filter_num_uid_entries++; - if (filter->filter_num_uid_entries == GF_MAXIMUM_FILTERING_ALLOWED) - break; - value = strtok_r (NULL, ",", &tmp_str); - GF_FREE (dup_str); - } - filter->partial_filter = 1; - } - - tmp_str = NULL; - tmp_str1 = NULL; - - if (dict_get (this->options, "filter-gid")) { - option_data = dict_get (this->options, "filter-gid"); - value = strtok_r (option_data->data, ",", &tmp_str); - while (value) { - dup_str = gf_strdup (value); - /* Check for n-m */ - input_value_str1 = strtok_r (dup_str, "-", &tmp_str1); - if (gf_string2int (input_value_str1, &input_value) != 0) { - gf_log (this->name, GF_LOG_ERROR, - "invalid number format \"%s\"", - input_value_str1); - return -1; - } - filter->filter_input_gid[filter->filter_num_gid_entries][0] = input_value; - input_value_str1 = strtok_r (NULL, "-", &tmp_str1); - if (input_value_str1) { - if (gf_string2int (input_value_str1, &input_value) != 0) { - gf_log (this->name, GF_LOG_ERROR, - "invalid number format \"%s\"", - input_value_str1); - return -1; - } - } - filter->filter_input_gid[filter->filter_num_gid_entries][1] = input_value; - - gf_log (this->name, - GF_LOG_DEBUG, - "filter [%d]: input gid(s) '%s' will be filtered", - filter->filter_num_gid_entries, dup_str); - - filter->filter_num_gid_entries++; - if (filter->filter_num_gid_entries == GF_MAXIMUM_FILTERING_ALLOWED) - break; - value = strtok_r (NULL, ",", &tmp_str); - GF_FREE (dup_str); - } - gf_log (this->name, GF_LOG_ERROR, "this option is not supported currently.. exiting"); - return -1; - filter->partial_filter = 1; - } - - if (dict_get (this->options, "fixed-uid")) { - option_data = dict_get (this->options, "fixed-uid"); - if (gf_string2int (option_data->data, &input_value) != 0) { - gf_log (this->name, GF_LOG_ERROR, - "invalid number format \"%s\"", - option_data->data); - return -1; - } - filter->fixed_uid = input_value; - filter->fixed_uid_set = 1; - } - - if (dict_get (this->options, "fixed-gid")) { - option_data = dict_get (this->options, "fixed-gid"); - if (gf_string2int (option_data->data, &input_value) != 0) { - gf_log (this->name, GF_LOG_ERROR, - "invalid number format \"%s\"", - option_data->data); - return -1; - } - filter->fixed_gid = input_value; - filter->fixed_gid_set = 1; - } - - this->private = filter; - return 0; -} - - -void -fini (xlator_t *this) -{ - struct gf_filter *filter = this->private; - - GF_FREE (filter); - - return; -} - - -struct xlator_fops fops = { - .lookup = filter_lookup, - .stat = filter_stat, - .fstat = filter_fstat, - .readlink = filter_readlink, - .mknod = filter_mknod, - .mkdir = filter_mkdir, - .unlink = filter_unlink, - .rmdir = filter_rmdir, - .symlink = filter_symlink, - .rename = filter_rename, - .link = filter_link, - .truncate = filter_truncate, - .ftruncate = filter_ftruncate, - .create = filter_create, - .open = filter_open, - .readv = filter_readv, - .writev = filter_writev, - .setxattr = filter_setxattr, - .getxattr = filter_getxattr, - .removexattr = filter_removexattr, - .opendir = filter_opendir, - .setattr = filter_setattr, - .fsetattr = filter_fsetattr, -}; - -struct xlator_cbks cbks = { -}; - -struct volume_options options[] = { - { .key = { "root-squashing" }, - .type = GF_OPTION_TYPE_BOOL - }, - { .key = { "read-only" }, - .type = GF_OPTION_TYPE_BOOL - }, - { .key = { "fixed-uid" }, - .type = GF_OPTION_TYPE_INT - }, - { .key = { "fixed-gid" }, - .type = GF_OPTION_TYPE_INT - }, - { .key = { "translate-uid" }, - .type = GF_OPTION_TYPE_ANY - }, - { .key = { "translate-gid" }, - .type = GF_OPTION_TYPE_ANY - }, - { .key = { "filter-uid" }, - .type = GF_OPTION_TYPE_ANY - }, - { .key = { "filter-gid" }, - .type = GF_OPTION_TYPE_ANY - }, - { .key = {NULL} }, -}; diff --git a/xlators/features/ganesha/src/Makefile.am b/xlators/features/ganesha/src/Makefile.am deleted file mode 100644 index 3a245703eb1..00000000000 --- a/xlators/features/ganesha/src/Makefile.am +++ /dev/null @@ -1,18 +0,0 @@ -xlator_LTLIBRARIES = ganesha.la - -xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features - -noinst_HEADERS = ganesha.h ganesha-mem-types.h - -ganesha_la_LDFLAGS = $(GF_XLATOR_DEFAULT_LDFLAGS) - -ganesha_la_SOURCES = ganesha.c - -AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ - -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS)\ - -DGANESHA_DIR=\"$(sysconfdir)/ganesha\" \ - -DGYSNCD_PREFIX=\"$(libexecdir)/glusterfs\" - -AM_CFLAGS = -Wall $(GF_CFLAGS) - -CLEANFILES = diff --git a/xlators/features/ganesha/src/ganesha-mem-types.h b/xlators/features/ganesha/src/ganesha-mem-types.h deleted file mode 100644 index c4976c01afc..00000000000 --- a/xlators/features/ganesha/src/ganesha-mem-types.h +++ /dev/null @@ -1,21 +0,0 @@ -/* - Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ -#ifndef __GANESHA_MEM_TYPES_H__ -#define __GANESHA_MEM_TYPES_H__ - - -#include "mem-types.h" - -enum gf_ganesha_mem_types_ { - gf_ganesha_mt_priv_t = gf_common_mt_end + 1, - gf_ganesha_mt_end -}; - -#endif diff --git a/xlators/features/ganesha/src/ganesha.c b/xlators/features/ganesha/src/ganesha.c deleted file mode 100644 index 859915420ac..00000000000 --- a/xlators/features/ganesha/src/ganesha.c +++ /dev/null @@ -1,90 +0,0 @@ -/* - Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - - -#include "ganesha.h" -#include "ganesha-mem-types.h" - - -int32_t -mem_acct_init (xlator_t *this) -{ - int ret = -1; - - if (!this) - return ret; - - ret = xlator_mem_acct_init (this, gf_ganesha_mt_end + 1); - - if (ret != 0) - gf_log (this->name, GF_LOG_WARNING, "Memory accounting" - "init failed"); - - return ret; -} - -int32_t -init (xlator_t *this) -{ - int ret = -1; - ganesha_priv_t *priv = NULL; - - if (!this->children || this->children->next) { - gf_log (this->name, GF_LOG_ERROR, - "Need subvolume == 1"); - goto err; - } - - if (!this->parents) { - gf_log (this->name, GF_LOG_WARNING, - "Dangling volume. Check volfile"); - goto err; - } - - priv = GF_CALLOC (1, sizeof (*priv), gf_ganesha_mt_priv_t); - if (!priv) - goto err; - - this->private = priv; - ret = 0; - -err: - return ret; -} - - -void -fini (xlator_t *this) -{ - ganesha_priv_t *priv = this->private; - - this->private = NULL; - if (priv) - GF_FREE (priv); - - return; -} - -struct xlator_fops fops = { -}; - -struct xlator_cbks cbks = { -}; - -struct volume_options options[] = { - - { .key = {"ganesha.enable"}, - .default_value = "off", - .type = GF_OPTION_TYPE_BOOL, - .description = "export volume via NFS-Ganesha" - }, - { .key = {NULL} - }, -}; diff --git a/xlators/features/ganesha/src/ganesha.h b/xlators/features/ganesha/src/ganesha.h deleted file mode 100644 index 86320e9da28..00000000000 --- a/xlators/features/ganesha/src/ganesha.h +++ /dev/null @@ -1,18 +0,0 @@ -/* - Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#include "xlator.h" -#include "ganesha-mem-types.h" - -typedef struct { - char *host_name; -} ganesha_priv_t; - - diff --git a/xlators/features/gfid-access/src/Makefile.am b/xlators/features/gfid-access/src/Makefile.am index ba95c7c0184..ff95604c4de 100644 --- a/xlators/features/gfid-access/src/Makefile.am +++ b/xlators/features/gfid-access/src/Makefile.am @@ -1,14 +1,15 @@ xlator_LTLIBRARIES = gfid-access.la xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features -gfid_access_la_LDFLAGS = $(GF_XLATOR_DEFAULT_LDFLAGS) +gfid_access_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) gfid_access_la_SOURCES = gfid-access.c gfid_access_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la noinst_HEADERS = gfid-access.h gfid-access-mem-types.h -AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ + -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src AM_CFLAGS = -Wall $(GF_CFLAGS) diff --git a/xlators/features/gfid-access/src/gfid-access-mem-types.h b/xlators/features/gfid-access/src/gfid-access-mem-types.h index 168d67b431f..1c4d0b93de2 100644 --- a/xlators/features/gfid-access/src/gfid-access-mem-types.h +++ b/xlators/features/gfid-access/src/gfid-access-mem-types.h @@ -11,13 +11,12 @@ #ifndef _GFID_ACCESS_MEM_TYPES_H #define _GFID_ACCESS_MEM_TYPES_H -#include "mem-types.h" +#include <glusterfs/mem-types.h> enum gf_changelog_mem_types { - gf_gfid_access_mt_priv_t = gf_common_mt_end + 1, - gf_gfid_access_mt_gfid_t, - gf_gfid_access_mt_end + gf_gfid_access_mt_priv_t = gf_common_mt_end + 1, + gf_gfid_access_mt_gfid_t, + gf_gfid_access_mt_end }; #endif - diff --git a/xlators/features/gfid-access/src/gfid-access.c b/xlators/features/gfid-access/src/gfid-access.c index a714b66a34d..3fea5672a21 100644 --- a/xlators/features/gfid-access/src/gfid-access.c +++ b/xlators/features/gfid-access/src/gfid-access.c @@ -8,1420 +8,1413 @@ cases as published by the Free Software Foundation. */ #include "gfid-access.h" -#include "inode.h" -#include "byte-order.h" -#include "statedump.h" - +#include <glusterfs/inode.h> +#include <glusterfs/byte-order.h> +#include <glusterfs/statedump.h> int -ga_valid_inode_loc_copy (loc_t *dst, loc_t *src, xlator_t *this) +ga_valid_inode_loc_copy(loc_t *dst, loc_t *src, xlator_t *this) { - int ret = 0; - uint64_t value = 0; - - /* if its an entry operation, on the virtual */ - /* directory inode as parent, we need to handle */ - /* it properly */ - ret = loc_copy (dst, src); - if (ret < 0) - goto out; - - /* - * Change ALL virtual inodes with real-inodes in loc - */ - if (dst->parent) { - ret = inode_ctx_get (dst->parent, this, &value); - if (ret < 0) { - ret = 0; //real-inode - goto out; - } - inode_unref (dst->parent); - dst->parent = inode_ref ((inode_t*)value); - gf_uuid_copy (dst->pargfid, dst->parent->gfid); + int ret = 0; + uint64_t value = 0; + + /* if its an entry operation, on the virtual */ + /* directory inode as parent, we need to handle */ + /* it properly */ + ret = loc_copy(dst, src); + if (ret < 0) + goto out; + + /* + * Change ALL virtual inodes with real-inodes in loc + */ + if (dst->parent) { + ret = inode_ctx_get(dst->parent, this, &value); + if (ret < 0) { + ret = 0; // real-inode + goto out; } + inode_unref(dst->parent); + dst->parent = inode_ref((inode_t *)(uintptr_t)value); + gf_uuid_copy(dst->pargfid, dst->parent->gfid); + } - if (dst->inode) { - ret = inode_ctx_get (dst->inode, this, &value); - if (ret < 0) { - ret = 0; //real-inode - goto out; - } - inode_unref (dst->inode); - dst->inode = inode_ref ((inode_t*)value); - gf_uuid_copy (dst->gfid, dst->inode->gfid); + if (dst->inode) { + ret = inode_ctx_get(dst->inode, this, &value); + if (ret < 0) { + ret = 0; // real-inode + goto out; } + inode_unref(dst->inode); + dst->inode = inode_ref((inode_t *)(uintptr_t)value); + gf_uuid_copy(dst->gfid, dst->inode->gfid); + } out: - return ret; + return ret; } void -ga_newfile_args_free (ga_newfile_args_t *args) +ga_newfile_args_free(ga_newfile_args_t *args) { - if (!args) - goto out; + if (!args) + goto out; - GF_FREE (args->bname); + GF_FREE(args->bname); - if (S_ISLNK (args->st_mode) && args->args.symlink.linkpath) { - GF_FREE (args->args.symlink.linkpath); - args->args.symlink.linkpath = NULL; - } + if (S_ISLNK(args->st_mode) && args->args.symlink.linkpath) { + GF_FREE(args->args.symlink.linkpath); + args->args.symlink.linkpath = NULL; + } - mem_put (args); + mem_put(args); out: - return; + return; } - void -ga_heal_args_free (ga_heal_args_t *args) +ga_heal_args_free(ga_heal_args_t *args) { - if (!args) - goto out; + if (!args) + goto out; - GF_FREE (args->bname); + GF_FREE(args->bname); - mem_put (args); + mem_put(args); out: - return; + return; } - ga_newfile_args_t * -ga_newfile_parse_args (xlator_t *this, data_t *data) +ga_newfile_parse_args(xlator_t *this, data_t *data) { - ga_newfile_args_t *args = NULL; - ga_private_t *priv = NULL; - int len = 0; - int blob_len = 0; - int min_len = 0; - void *blob = NULL; - - priv = this->private; - - blob = data->data; - blob_len = data->len; - - min_len = sizeof (args->uid) + sizeof (args->gid) + sizeof (args->gfid) - + sizeof (args->st_mode) + 2 + 2; - if (blob_len < min_len) { - gf_log (this->name, GF_LOG_ERROR, - "Invalid length: Total length is less " - "than minimum length."); - goto err; + ga_newfile_args_t *args = NULL; + ga_private_t *priv = NULL; + int len = 0; + int blob_len = 0; + int min_len = 0; + void *blob = NULL; + + priv = this->private; + + blob = data->data; + blob_len = data->len; + + min_len = sizeof(args->uid) + sizeof(args->gid) + sizeof(args->gfid) + + sizeof(args->st_mode) + 2 + 2; + if (blob_len < min_len) { + gf_log(this->name, GF_LOG_ERROR, + "Invalid length: Total length is less " + "than minimum length."); + goto err; + } + + args = mem_get0(priv->newfile_args_pool); + if (args == NULL) + goto err; + + args->uid = ntoh32(*(uint32_t *)blob); + blob += sizeof(uint32_t); + blob_len -= sizeof(uint32_t); + + args->gid = ntoh32(*(uint32_t *)blob); + blob += sizeof(uint32_t); + blob_len -= sizeof(uint32_t); + + memcpy(args->gfid, blob, sizeof(args->gfid)); + blob += sizeof(args->gfid); + blob_len -= sizeof(args->gfid); + + args->st_mode = ntoh32(*(uint32_t *)blob); + blob += sizeof(uint32_t); + blob_len -= sizeof(uint32_t); + + len = strnlen(blob, blob_len); + if (len == blob_len) { + gf_log(this->name, GF_LOG_ERROR, "gfid: %s. No null byte present.", + args->gfid); + goto err; + } + + args->bname = GF_MALLOC(len + 1, gf_common_mt_char); + if (args->bname == NULL) + goto err; + + memcpy(args->bname, blob, (len + 1)); + blob += (len + 1); + blob_len -= (len + 1); + + if (S_ISDIR(args->st_mode)) { + if (blob_len < sizeof(uint32_t)) { + gf_log(this->name, GF_LOG_ERROR, "gfid: %s. Invalid length", + args->gfid); + goto err; } - - args = mem_get0 (priv->newfile_args_pool); - if (args == NULL) - goto err; - - args->uid = ntoh32 (*(uint32_t *)blob); - blob += sizeof (uint32_t); - blob_len -= sizeof (uint32_t); - - args->gid = ntoh32 (*(uint32_t *)blob); - blob += sizeof (uint32_t); - blob_len -= sizeof (uint32_t); - - memcpy (args->gfid, blob, sizeof (args->gfid)); - blob += sizeof (args->gfid); - blob_len -= sizeof (args->gfid); - - args->st_mode = ntoh32 (*(uint32_t *)blob); - blob += sizeof (uint32_t); - blob_len -= sizeof (uint32_t); - - len = strnlen (blob, blob_len); + args->args.mkdir.mode = ntoh32(*(uint32_t *)blob); + blob += sizeof(uint32_t); + blob_len -= sizeof(uint32_t); + + if (blob_len < sizeof(uint32_t)) { + gf_log(this->name, GF_LOG_ERROR, "gfid: %s. Invalid length", + args->gfid); + goto err; + } + args->args.mkdir.umask = ntoh32(*(uint32_t *)blob); + blob_len -= sizeof(uint32_t); + if (blob_len < 0) { + gf_log(this->name, GF_LOG_ERROR, "gfid: %s. Invalid length", + args->gfid); + goto err; + } + } else if (S_ISLNK(args->st_mode)) { + len = strnlen(blob, blob_len); if (len == blob_len) { - gf_log (this->name, GF_LOG_ERROR, - "gfid: %s. No null byte present.", - args->gfid); - goto err; + gf_log(this->name, GF_LOG_ERROR, "gfid: %s. Invalid length", + args->gfid); + goto err; } + args->args.symlink.linkpath = GF_MALLOC(len + 1, gf_common_mt_char); + if (args->args.symlink.linkpath == NULL) + goto err; - args->bname = GF_CALLOC (1, (len + 1), gf_common_mt_char); - if (args->bname == NULL) - goto err; - - memcpy (args->bname, blob, (len + 1)); - blob += (len + 1); + memcpy(args->args.symlink.linkpath, blob, (len + 1)); blob_len -= (len + 1); - - if (S_ISDIR (args->st_mode)) { - if (blob_len < sizeof (uint32_t)) { - gf_log (this->name, GF_LOG_ERROR, - "gfid: %s. Invalid length", - args->gfid); - goto err; - } - args->args.mkdir.mode = ntoh32 (*(uint32_t *)blob); - blob += sizeof (uint32_t); - blob_len -= sizeof (uint32_t); - - if (blob_len < sizeof (uint32_t)) { - gf_log (this->name, GF_LOG_ERROR, - "gfid: %s. Invalid length", - args->gfid); - goto err; - } - args->args.mkdir.umask = ntoh32 (*(uint32_t *)blob); - blob_len -= sizeof (uint32_t); - if (blob_len < 0) { - gf_log (this->name, GF_LOG_ERROR, - "gfid: %s. Invalid length", - args->gfid); - goto err; - } - } else if (S_ISLNK (args->st_mode)) { - len = strnlen (blob, blob_len); - if (len == blob_len) { - gf_log (this->name, GF_LOG_ERROR, - "gfid: %s. Invalid length", - args->gfid); - goto err; - } - args->args.symlink.linkpath = GF_CALLOC (1, len + 1, - gf_common_mt_char); - if (args->args.symlink.linkpath == NULL) - goto err; - - memcpy (args->args.symlink.linkpath, blob, (len + 1)); - blob_len -= (len + 1); - } else { - if (blob_len < sizeof (uint32_t)) { - gf_log (this->name, GF_LOG_ERROR, - "gfid: %s. Invalid length", - args->gfid); - goto err; - } - args->args.mknod.mode = ntoh32 (*(uint32_t *)blob); - blob += sizeof (uint32_t); - blob_len -= sizeof (uint32_t); - - if (blob_len < sizeof (uint32_t)) { - gf_log (this->name, GF_LOG_ERROR, - "gfid: %s. Invalid length", - args->gfid); - goto err; - } - args->args.mknod.rdev = ntoh32 (*(uint32_t *)blob); - blob += sizeof (uint32_t); - blob_len -= sizeof (uint32_t); - - if (blob_len < sizeof (uint32_t)) { - gf_log (this->name, GF_LOG_ERROR, - "gfid: %s. Invalid length", - args->gfid); - goto err; - } - args->args.mknod.umask = ntoh32 (*(uint32_t *)blob); - blob_len -= sizeof (uint32_t); + } else { + if (blob_len < sizeof(uint32_t)) { + gf_log(this->name, GF_LOG_ERROR, "gfid: %s. Invalid length", + args->gfid); + goto err; } - - if (blob_len) { - gf_log (this->name, GF_LOG_ERROR, - "gfid: %s. Invalid length", - args->gfid); - goto err; + args->args.mknod.mode = ntoh32(*(uint32_t *)blob); + blob += sizeof(uint32_t); + blob_len -= sizeof(uint32_t); + + if (blob_len < sizeof(uint32_t)) { + gf_log(this->name, GF_LOG_ERROR, "gfid: %s. Invalid length", + args->gfid); + goto err; + } + args->args.mknod.rdev = ntoh32(*(uint32_t *)blob); + blob += sizeof(uint32_t); + blob_len -= sizeof(uint32_t); + + if (blob_len < sizeof(uint32_t)) { + gf_log(this->name, GF_LOG_ERROR, "gfid: %s. Invalid length", + args->gfid); + goto err; } + args->args.mknod.umask = ntoh32(*(uint32_t *)blob); + blob_len -= sizeof(uint32_t); + } + + if (blob_len) { + gf_log(this->name, GF_LOG_ERROR, "gfid: %s. Invalid length", + args->gfid); + goto err; + } - return args; + return args; err: - if (args) - ga_newfile_args_free (args); + if (args) + ga_newfile_args_free(args); - return NULL; + return NULL; } ga_heal_args_t * -ga_heal_parse_args (xlator_t *this, data_t *data) +ga_heal_parse_args(xlator_t *this, data_t *data) { - ga_heal_args_t *args = NULL; - ga_private_t *priv = NULL; - void *blob = NULL; - int len = 0; - int blob_len = 0; + ga_heal_args_t *args = NULL; + ga_private_t *priv = NULL; + void *blob = NULL; + int len = 0; + int blob_len = 0; - blob = data->data; - blob_len = data->len; + blob = data->data; + blob_len = data->len; - priv = this->private; + priv = this->private; - /* bname should at least contain a character */ - if (blob_len < (sizeof (args->gfid) + 2)) - goto err; + /* bname should at least contain a character */ + if (blob_len < (sizeof(args->gfid) + 2)) + goto err; - args = mem_get0 (priv->heal_args_pool); - if (!args) - goto err; + args = mem_get0(priv->heal_args_pool); + if (!args) + goto err; - memcpy (args->gfid, blob, sizeof (args->gfid)); - blob += sizeof (args->gfid); - blob_len -= sizeof (args->gfid); + memcpy(args->gfid, blob, sizeof(args->gfid)); + blob += sizeof(args->gfid); + blob_len -= sizeof(args->gfid); - len = strnlen (blob, blob_len); - if (len == blob_len) - goto err; + len = strnlen(blob, blob_len); + if (len == blob_len) + goto err; - args->bname = GF_CALLOC (1, len + 1, gf_common_mt_char); - if (!args->bname) - goto err; + args->bname = GF_MALLOC(len + 1, gf_common_mt_char); + if (!args->bname) + goto err; - memcpy (args->bname, blob, len); - blob_len -= (len + 1); + memcpy(args->bname, blob, len); + args->bname[len] = '\0'; + blob_len -= (len + 1); - if (blob_len) - goto err; + if (blob_len) + goto err; - return args; + return args; err: - if (args) - ga_heal_args_free (args); + if (args) + ga_heal_args_free(args); - return NULL; + return NULL; } static int32_t -ga_fill_tmp_loc (loc_t *loc, xlator_t *this, uuid_t gfid, - char *bname, dict_t *xdata, loc_t *new_loc) +ga_fill_tmp_loc(loc_t *loc, xlator_t *this, uuid_t gfid, char *bname, + dict_t *xdata, loc_t *new_loc) { - int ret = -1; - uint64_t value = 0; - inode_t *parent = NULL; - uuid_t *gfid_ptr = NULL; - - parent = loc->inode; - ret = inode_ctx_get (loc->inode, this, &value); - if (!ret) { - parent = (void *)value; - if (gf_uuid_is_null (parent->gfid)) - parent = loc->inode; - } - - /* parent itself should be looked up */ - gf_uuid_copy (new_loc->pargfid, parent->gfid); - new_loc->parent = inode_ref (parent); - - new_loc->inode = inode_grep (parent->table, parent, bname); - if (!new_loc->inode) { - new_loc->inode = inode_new (parent->table); - gf_uuid_copy (new_loc->inode->gfid, gfid); - } - - loc_path (new_loc, bname); - if (new_loc->path) { - new_loc->name = strrchr (new_loc->path, '/'); - if (new_loc->name) - new_loc->name++; - } - - gfid_ptr = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t); - if (!gfid_ptr) { - ret = -1; - goto out; - } - gf_uuid_copy (*gfid_ptr, gfid); - ret = dict_set_dynptr (xdata, "gfid-req", gfid_ptr, sizeof (uuid_t)); - if (ret < 0) - goto out; - - ret = 0; + int ret = -1; + uint64_t value = 0; + inode_t *parent = NULL; + unsigned char *gfid_ptr = NULL; + + parent = loc->inode; + ret = inode_ctx_get(loc->inode, this, &value); + if (!ret) { + parent = (void *)(uintptr_t)value; + if (gf_uuid_is_null(parent->gfid)) + parent = loc->inode; + } + + /* parent itself should be looked up */ + gf_uuid_copy(new_loc->pargfid, parent->gfid); + new_loc->parent = inode_ref(parent); + + new_loc->inode = inode_grep(parent->table, parent, bname); + if (!new_loc->inode) { + new_loc->inode = inode_new(parent->table); + gf_uuid_copy(new_loc->inode->gfid, gfid); + } + + loc_path(new_loc, bname); + if (new_loc->path) { + new_loc->name = strrchr(new_loc->path, '/'); + if (new_loc->name) + new_loc->name++; + } + + gfid_ptr = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); + if (!gfid_ptr) { + ret = -1; + goto out; + } + gf_uuid_copy(gfid_ptr, gfid); + ret = dict_set_gfuuid(xdata, "gfid-req", gfid_ptr, false); + if (ret < 0) + goto out; + + ret = 0; out: - if (ret && gfid_ptr) - GF_FREE (gfid_ptr); - return ret; + if (ret && gfid_ptr) + GF_FREE(gfid_ptr); + return ret; } - - static gf_boolean_t -__is_gfid_access_dir (uuid_t gfid) +__is_gfid_access_dir(uuid_t gfid) { - uuid_t aux_gfid; + static uuid_t aux_gfid = {0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, GF_AUX_GFID}; - memset (aux_gfid, 0, 16); - aux_gfid[15] = GF_AUX_GFID; + if (gf_uuid_compare(gfid, aux_gfid) == 0) + return _gf_true; - if (gf_uuid_compare (gfid, aux_gfid) == 0) - return _gf_true; - - return _gf_false; + return _gf_false; } int32_t -ga_forget (xlator_t *this, inode_t *inode) +ga_forget(xlator_t *this, inode_t *inode) { - int ret = -1; - uint64_t value = 0; - inode_t *tmp_inode = NULL; + int ret = -1; + uint64_t value = 0; + inode_t *tmp_inode = NULL; - ret = inode_ctx_del (inode, this, &value); - if (ret) - goto out; + ret = inode_ctx_del(inode, this, &value); + if (ret) + goto out; - tmp_inode = (void *)value; - inode_unref (tmp_inode); + tmp_inode = (void *)(uintptr_t)value; + inode_unref(tmp_inode); out: - return 0; + return 0; } - static int -ga_heal_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - inode_t *inode, struct iatt *stat, dict_t *dict, - struct iatt *postparent) +ga_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, inode_t *inode, struct iatt *stat, dict_t *dict, + struct iatt *postparent) { - call_frame_t *orig_frame = NULL; + call_frame_t *orig_frame = NULL; - orig_frame = frame->local; - frame->local = NULL; + orig_frame = frame->local; + frame->local = NULL; - /* don't worry about inode linking and other stuff. They'll happen on - * the next lookup. - */ - STACK_DESTROY (frame->root); + /* don't worry about inode linking and other stuff. They'll happen on + * the next lookup. + */ + STACK_DESTROY(frame->root); - STACK_UNWIND_STRICT (setxattr, orig_frame, op_ret, op_errno, dict); + STACK_UNWIND_STRICT(setxattr, orig_frame, op_ret, op_errno, dict); - return 0; + return 0; } static int -ga_newentry_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - inode_t *inode, struct iatt *buf, - struct iatt *preparent, struct iatt *postparent, - dict_t *xdata) +ga_newentry_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - ga_local_t *local = NULL; + ga_local_t *local = NULL; - local = frame->local; + local = frame->local; - /* don't worry about inode linking and other stuff. They'll happen on - * the next lookup. - */ - frame->local = NULL; - STACK_DESTROY (frame->root); + /* don't worry about inode linking and other stuff. They'll happen on + * the next lookup. + */ + frame->local = NULL; + STACK_DESTROY(frame->root); - STACK_UNWIND_STRICT (setxattr, local->orig_frame, op_ret, - op_errno, xdata); + STACK_UNWIND_STRICT(setxattr, local->orig_frame, op_ret, op_errno, xdata); - if (local->xdata) - dict_unref (local->xdata); - loc_wipe (&local->loc); - mem_put (local); + if (local->xdata) + dict_unref(local->xdata); + loc_wipe(&local->loc); + mem_put(local); - return 0; + return 0; } static int -ga_newentry_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *stat, dict_t *xdata, - struct iatt *postparent) +ga_newentry_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *stat, dict_t *xdata, + struct iatt *postparent) { - ga_local_t *local = NULL; + ga_local_t *local = NULL; - local = frame->local; + local = frame->local; - if ((op_ret < 0) && ((op_errno != ENOENT) && (op_errno != ESTALE))) - goto err; + if ((op_ret < 0) && ((op_errno != ENOENT) && (op_errno != ESTALE))) + goto err; - STACK_WIND (frame, ga_newentry_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->mknod, &local->loc, local->mode, - local->rdev, local->umask, local->xdata); - return 0; + STACK_WIND(frame, ga_newentry_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mknod, &local->loc, local->mode, + local->rdev, local->umask, local->xdata); + return 0; err: - frame->local = NULL; - STACK_DESTROY (frame->root); - STACK_UNWIND_STRICT (setxattr, local->orig_frame, op_ret, op_errno, - xdata); - if (local->xdata) - dict_unref (local->xdata); - loc_wipe (&local->loc); - mem_put (local); - - return 0; + frame->local = NULL; + STACK_DESTROY(frame->root); + STACK_UNWIND_STRICT(setxattr, local->orig_frame, op_ret, op_errno, xdata); + if (local->xdata) + dict_unref(local->xdata); + loc_wipe(&local->loc); + mem_put(local); + + return 0; } int32_t -ga_new_entry (call_frame_t *frame, xlator_t *this, loc_t *loc, data_t *data, - dict_t *xdata) +ga_new_entry(call_frame_t *frame, xlator_t *this, loc_t *loc, data_t *data, + dict_t *xdata) { - int ret = -1; - ga_newfile_args_t *args = NULL; - loc_t tmp_loc = {0,}; - call_frame_t *new_frame = NULL; - ga_local_t *local = NULL; - uuid_t gfid = {0,}; - - args = ga_newfile_parse_args (this, data); - if (!args) - goto out; - - ret = gf_uuid_parse (args->gfid, gfid); - if (ret) - goto out; - - if (!xdata) { - xdata = dict_new (); - } else { - xdata = dict_ref (xdata); - } - - if (!xdata) { - ret = -1; - goto out; - } - - ret = ga_fill_tmp_loc (loc, this, gfid, - args->bname, xdata, &tmp_loc); - if (ret) - goto out; - - new_frame = copy_frame (frame); - if (!new_frame) - goto out; - - local = mem_get0 (this->local_pool); - local->orig_frame = frame; - - loc_copy (&local->loc, &tmp_loc); - - new_frame->local = local; - new_frame->root->uid = args->uid; - new_frame->root->gid = args->gid; - - if (S_ISDIR (args->st_mode)) { - STACK_WIND (new_frame, ga_newentry_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir, - &tmp_loc, args->args.mkdir.mode, - args->args.mkdir.umask, xdata); - } else if (S_ISLNK (args->st_mode)) { - STACK_WIND (new_frame, ga_newentry_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->symlink, - args->args.symlink.linkpath, - &tmp_loc, 0, xdata); - } else { - /* use 07777 (4 7s) for considering the Sticky bits etc) */ - ((ga_local_t *)new_frame->local)->mode = - (S_IFMT & args->st_mode) | (07777 & args->args.mknod.mode); - - ((ga_local_t *)new_frame->local)->umask = - args->args.mknod.umask; - ((ga_local_t *)new_frame->local)->rdev = args->args.mknod.rdev; - ((ga_local_t *)new_frame->local)->xdata = dict_ref (xdata); - - /* send a named lookup, so that dht can cleanup up stale linkto - * files etc. - */ - STACK_WIND (new_frame, ga_newentry_lookup_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, - &tmp_loc, NULL); - } + int ret = -1; + ga_newfile_args_t *args = NULL; + loc_t tmp_loc = { + 0, + }; + call_frame_t *new_frame = NULL; + ga_local_t *local = NULL; + uuid_t gfid = { + 0, + }; + + if (!xdata) { + xdata = dict_new(); + } else { + xdata = dict_ref(xdata); + } + + if (!xdata) { + ret = -1; + goto out; + } + + args = ga_newfile_parse_args(this, data); + if (!args) + goto out; + + ret = gf_uuid_parse(args->gfid, gfid); + if (ret) + goto out; + + ret = ga_fill_tmp_loc(loc, this, gfid, args->bname, xdata, &tmp_loc); + if (ret) + goto out; + + new_frame = copy_frame(frame); + if (!new_frame) + goto out; + + local = mem_get0(this->local_pool); + local->orig_frame = frame; + + loc_copy(&local->loc, &tmp_loc); + + new_frame->local = local; + new_frame->root->uid = args->uid; + new_frame->root->gid = args->gid; + + if (S_ISDIR(args->st_mode)) { + STACK_WIND(new_frame, ga_newentry_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mkdir, &tmp_loc, + args->args.mkdir.mode, args->args.mkdir.umask, xdata); + } else if (S_ISLNK(args->st_mode)) { + STACK_WIND(new_frame, ga_newentry_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->symlink, + args->args.symlink.linkpath, &tmp_loc, 0, xdata); + } else { + /* use 07777 (4 7s) for considering the Sticky bits etc) */ + ((ga_local_t *)new_frame->local)->mode = (S_IFMT & args->st_mode) | + (07777 & + args->args.mknod.mode); + + ((ga_local_t *)new_frame->local)->umask = args->args.mknod.umask; + ((ga_local_t *)new_frame->local)->rdev = args->args.mknod.rdev; + ((ga_local_t *)new_frame->local)->xdata = dict_ref(xdata); + + /* send a named lookup, so that dht can cleanup up stale linkto + * files etc. + */ + STACK_WIND(new_frame, ga_newentry_lookup_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, &tmp_loc, NULL); + } - ret = 0; + ret = 0; out: - ga_newfile_args_free (args); + ga_newfile_args_free(args); - if (xdata) - dict_unref (xdata); + if (xdata) + dict_unref(xdata); - loc_wipe (&tmp_loc); + loc_wipe(&tmp_loc); - return ret; + return ret; } int32_t -ga_heal_entry (call_frame_t *frame, xlator_t *this, loc_t *loc, data_t *data, - dict_t *xdata) +ga_heal_entry(call_frame_t *frame, xlator_t *this, loc_t *loc, data_t *data, + dict_t *xdata) { - int ret = -1; - ga_heal_args_t *args = NULL; - loc_t tmp_loc = {0,}; - call_frame_t *new_frame = NULL; - uuid_t gfid = {0,}; - - args = ga_heal_parse_args (this, data); - if (!args) - goto out; - - ret = gf_uuid_parse (args->gfid, gfid); - if (ret) - goto out; - - if (!xdata) - xdata = dict_new (); - else - xdata = dict_ref (xdata); - - if (!xdata) { - ret = -1; - goto out; - } - - ret = ga_fill_tmp_loc (loc, this, gfid, args->bname, - xdata, &tmp_loc); - if (ret) - goto out; - - new_frame = copy_frame (frame); - if (!new_frame) - goto out; - - new_frame->local = (void *)frame; - - STACK_WIND (new_frame, ga_heal_cbk, FIRST_CHILD (this), - FIRST_CHILD(this)->fops->lookup, - &tmp_loc, xdata); - - ret = 0; + int ret = -1; + ga_heal_args_t *args = NULL; + loc_t tmp_loc = { + 0, + }; + call_frame_t *new_frame = NULL; + uuid_t gfid = { + 0, + }; + + args = ga_heal_parse_args(this, data); + if (!args) + goto out; + + ret = gf_uuid_parse(args->gfid, gfid); + if (ret) + goto out; + + if (!xdata) + xdata = dict_new(); + else + xdata = dict_ref(xdata); + + if (!xdata) { + ret = -1; + goto out; + } + + ret = ga_fill_tmp_loc(loc, this, gfid, args->bname, xdata, &tmp_loc); + if (ret) + goto out; + + new_frame = copy_frame(frame); + if (!new_frame) + goto out; + + new_frame->local = (void *)frame; + + STACK_WIND(new_frame, ga_heal_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, &tmp_loc, xdata); + + ret = 0; out: - if (args) - ga_heal_args_free (args); + if (args) + ga_heal_args_free(args); - loc_wipe (&tmp_loc); + loc_wipe(&tmp_loc); - if (xdata) - dict_unref (xdata); + if (xdata) + dict_unref(xdata); - return ret; + return ret; } int32_t -ga_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - dict_t *xdata) +ga_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata); - return 0; + STACK_UNWIND_STRICT(setxattr, frame, op_ret, op_errno, xdata); + return 0; } int32_t -ga_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, - int32_t flags, dict_t *xdata) +ga_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + int32_t flags, dict_t *xdata) { - data_t *data = NULL; - int op_errno = ENOMEM; - int ret = 0; - loc_t ga_loc = {0, }; - - GFID_ACCESS_INODE_OP_CHECK (loc, op_errno, err); - - data = dict_get (dict, GF_FUSE_AUX_GFID_NEWFILE); - if (data) { - ret = ga_new_entry (frame, this, loc, data, xdata); - if (ret) - goto err; - return 0; - } + data_t *data = NULL; + int op_errno = ENOMEM; + int ret = 0; + loc_t ga_loc = { + 0, + }; + + GFID_ACCESS_INODE_OP_CHECK(loc, op_errno, err); + + data = dict_get(dict, GF_FUSE_AUX_GFID_NEWFILE); + if (data) { + ret = ga_new_entry(frame, this, loc, data, xdata); + if (ret) + goto err; + return 0; + } - data = dict_get (dict, GF_FUSE_AUX_GFID_HEAL); - if (data) { - ret = ga_heal_entry (frame, this, loc, data, xdata); - if (ret) - goto err; - return 0; - } + data = dict_get(dict, GF_FUSE_AUX_GFID_HEAL); + if (data) { + ret = ga_heal_entry(frame, this, loc, data, xdata); + if (ret) + goto err; + return 0; + } - //If the inode is a virtual inode change the inode otherwise perform - //the operation on same inode - ret = ga_valid_inode_loc_copy (&ga_loc, loc, this); - if (ret < 0) - goto err; + // If the inode is a virtual inode change the inode otherwise perform + // the operation on same inode + ret = ga_valid_inode_loc_copy(&ga_loc, loc, this); + if (ret < 0) + goto err; - STACK_WIND (frame, ga_setxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setxattr, &ga_loc, dict, flags, - xdata); + STACK_WIND(frame, ga_setxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, &ga_loc, dict, flags, xdata); - loc_wipe (&ga_loc); - return 0; + loc_wipe(&ga_loc); + return 0; err: - STACK_UNWIND_STRICT (setxattr, frame, -1, op_errno, xdata); - return 0; + STACK_UNWIND_STRICT(setxattr, frame, -1, op_errno, xdata); + return 0; } - int32_t -ga_virtual_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *xdata, struct iatt *postparent) +ga_virtual_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, struct iatt *postparent) { - int j = 0; - int i = 0; - int ret = 0; - uint64_t temp_ino = 0; - inode_t *cbk_inode = NULL; - inode_t *true_inode = NULL; - uuid_t random_gfid = {0,}; - inode_t *linked_inode = NULL; - - if (frame->local) - cbk_inode = frame->local; - else - cbk_inode = inode_ref (inode); - - frame->local = NULL; - if (op_ret) - goto unwind; - - if (!IA_ISDIR (buf->ia_type)) + int ret = 0; + inode_t *cbk_inode = NULL; + inode_t *true_inode = NULL; + uuid_t random_gfid = { + 0, + }; + inode_t *linked_inode = NULL; + + if (frame->local) + cbk_inode = frame->local; + else + cbk_inode = inode_ref(inode); + + frame->local = NULL; + if (op_ret) + goto unwind; + + if (!IA_ISDIR(buf->ia_type)) + goto unwind; + + /* need to send back a different inode for linking in itable */ + if (cbk_inode == inode) { + /* check if the inode is in the 'itable' or + if its just previously discover()'d inode */ + true_inode = inode_find(inode->table, buf->ia_gfid); + if (!true_inode) { + /* This unref is for 'inode_ref()' done in beginning. + This is needed as cbk_inode is allocated new inode + whose unref is taken at the end*/ + inode_unref(cbk_inode); + cbk_inode = inode_new(inode->table); + + if (!cbk_inode) { + op_ret = -1; + op_errno = ENOMEM; goto unwind; + } + /* the inode is not present in itable, ie, the actual + path is not yet looked up. Use the current inode + itself for now */ - /* need to send back a different inode for linking in itable */ - if (cbk_inode == inode) { - /* check if the inode is in the 'itable' or - if its just previously discover()'d inode */ - true_inode = inode_find (inode->table, buf->ia_gfid); - if (!true_inode) { - /* This unref is for 'inode_ref()' done in beginning. - This is needed as cbk_inode is allocated new inode - whose unref is taken at the end*/ - inode_unref (cbk_inode); - cbk_inode = inode_new (inode->table); - - if (!cbk_inode) { - op_ret = -1; - op_errno = ENOMEM; - goto unwind; - } - /* the inode is not present in itable, ie, the actual - path is not yet looked up. Use the current inode - itself for now */ - - linked_inode = inode_link (inode, NULL, NULL, buf); - inode = linked_inode; - } else { - /* 'inode_ref()' has been done in inode_find() */ - inode = true_inode; - } - - ret = inode_ctx_put (cbk_inode, this, (uint64_t)inode); - if (ret) { - gf_log (this->name, GF_LOG_WARNING, - "failed to set the inode ctx with" - "the actual inode"); - if (inode) - inode_unref (inode); - } - inode = NULL; + linked_inode = inode_link(inode, NULL, NULL, buf); + inode = linked_inode; + } else { + /* 'inode_ref()' has been done in inode_find() */ + inode = true_inode; } - if (!gf_uuid_is_null (cbk_inode->gfid)) { - /* if the previous linked inode is used, use the - same gfid */ - gf_uuid_copy (random_gfid, cbk_inode->gfid); - } else { - /* replace the buf->ia_gfid to a random gfid - for directory, for files, what we received is fine */ - gf_uuid_generate (random_gfid); + ret = inode_ctx_put(cbk_inode, this, (uint64_t)(uintptr_t)inode); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, + "failed to set the inode ctx with" + "the actual inode"); + if (inode) + inode_unref(inode); } + inode = NULL; + } - gf_uuid_copy (buf->ia_gfid, random_gfid); + if (!gf_uuid_is_null(cbk_inode->gfid)) { + /* if the previous linked inode is used, use the + same gfid */ + gf_uuid_copy(random_gfid, cbk_inode->gfid); + } else { + /* replace the buf->ia_gfid to a random gfid + for directory, for files, what we received is fine */ + gf_uuid_generate(random_gfid); + } - for (i = 15; i > (15 - 8); i--) { - temp_ino += (uint64_t)(buf->ia_gfid[i]) << j; - j += 8; - } - buf->ia_ino = temp_ino; + gf_uuid_copy(buf->ia_gfid, random_gfid); + + buf->ia_ino = gfid_to_ino(buf->ia_gfid); unwind: - /* Lookup on non-existing gfid returns ESTALE. - Convert into ENOENT for virtual lookup*/ - if (op_errno == ESTALE) - op_errno = ENOENT; + /* Lookup on non-existing gfid returns ESTALE. + Convert into ENOENT for virtual lookup*/ + if (op_errno == ESTALE) + op_errno = ENOENT; - STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, cbk_inode, buf, - xdata, postparent); + STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, cbk_inode, buf, xdata, + postparent); - /* Also handles inode_unref of frame->local if done in ga_lookup */ - if (cbk_inode) - inode_unref (cbk_inode); + /* Also handles inode_unref of frame->local if done in ga_lookup */ + if (cbk_inode) + inode_unref(cbk_inode); - return 0; + return 0; } int32_t -ga_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *xdata, struct iatt *postparent) +ga_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, inode_t *inode, struct iatt *buf, dict_t *xdata, + struct iatt *postparent) { - ga_private_t *priv = NULL; + ga_private_t *priv = NULL; - /* if the entry in question is not 'root', - then follow the normal path */ - if (op_ret || !__is_root_gfid(buf->ia_gfid)) - goto unwind; + /* if the entry in question is not 'root', + then follow the normal path */ + if (op_ret || !__is_root_gfid(buf->ia_gfid)) + goto unwind; - priv = this->private; + priv = this->private; - /* do we need to copy root stbuf everytime? */ - /* mostly yes, as we want to have the 'stat' info show latest - in every _cbk() */ + /* do we need to copy root stbuf every time? */ + /* mostly yes, as we want to have the 'stat' info show latest + in every _cbk() */ - /* keep the reference for root stat buf */ - priv->root_stbuf = *buf; - priv->gfiddir_stbuf = priv->root_stbuf; - priv->gfiddir_stbuf.ia_gfid[15] = GF_AUX_GFID; - priv->gfiddir_stbuf.ia_ino = GF_AUX_GFID; + /* keep the reference for root stat buf */ + priv->root_stbuf = *buf; + priv->gfiddir_stbuf = priv->root_stbuf; + priv->gfiddir_stbuf.ia_gfid[15] = GF_AUX_GFID; + priv->gfiddir_stbuf.ia_ino = GF_AUX_GFID; unwind: - STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf, - xdata, postparent); - return 0; + STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, xdata, + postparent); + return 0; } int32_t -ga_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +ga_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { - ga_private_t *priv = NULL; - int ret = -1; - uuid_t tmp_gfid = {0,}; - loc_t tmp_loc = {0,}; - uint64_t value = 0; - inode_t *inode = NULL; - inode_t *true_inode = NULL; - int32_t op_errno = ENOENT; - - /* if its discover(), no need for any action here */ - if (!loc->name) - goto wind; - - /* if its revalidate, and inode is not of type directory, - proceed with 'wind' */ - if (loc->inode && loc->inode->ia_type && - !IA_ISDIR (loc->inode->ia_type)) { - - /* a revalidate on ".gfid/<dentry>" is possible, check for it */ - if (((loc->parent && - __is_gfid_access_dir (loc->parent->gfid)) || - __is_gfid_access_dir (loc->pargfid))) { - - /* here, just send 'loc->gfid' and 'loc->inode' */ - tmp_loc.inode = inode_ref (loc->inode); - gf_uuid_copy (tmp_loc.gfid, loc->inode->gfid); - - STACK_WIND (frame, default_lookup_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, - &tmp_loc, xdata); - - inode_unref (tmp_loc.inode); - - return 0; - } - - /* not something to bother, continue the flow */ - goto wind; - } + ga_private_t *priv = NULL; + int ret = -1; + uuid_t tmp_gfid = { + 0, + }; + loc_t tmp_loc = { + 0, + }; + uint64_t value = 0; + inode_t *inode = NULL; + inode_t *true_inode = NULL; + int32_t op_errno = ENOENT; + + priv = this->private; + + /* Handle nameless lookup on ".gfid" */ + if (!loc->parent && __is_gfid_access_dir(loc->gfid)) { + STACK_UNWIND_STRICT(lookup, frame, 0, 0, loc->inode, + &priv->gfiddir_stbuf, xdata, &priv->root_stbuf); + return 0; + } - priv = this->private; + /* if its discover(), no need for any action here */ + if (!loc->name) + goto wind; - /* need to check if the lookup is on virtual dir */ - if ((loc->name && !strcmp (GF_GFID_DIR, loc->name)) && - ((loc->parent && __is_root_gfid (loc->parent->gfid)) || - __is_root_gfid (loc->pargfid))) { - /* this means, the query is on '/.gfid', return the fake stat, - and say success */ + /* if its revalidate, and inode is not of type directory, + proceed with 'wind' */ + if (loc->inode && loc->inode->ia_type && !IA_ISDIR(loc->inode->ia_type)) { + /* a revalidate on ".gfid/<dentry>" is possible, check for it */ + if (((loc->parent && __is_gfid_access_dir(loc->parent->gfid)) || + __is_gfid_access_dir(loc->pargfid))) { + /* here, just send 'loc->gfid' and 'loc->inode' */ + tmp_loc.inode = inode_ref(loc->inode); + gf_uuid_copy(tmp_loc.gfid, loc->inode->gfid); - STACK_UNWIND_STRICT (lookup, frame, 0, 0, loc->inode, - &priv->gfiddir_stbuf, xdata, - &priv->root_stbuf); - return 0; - } + STACK_WIND(frame, default_lookup_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, &tmp_loc, xdata); - /* now, check if the lookup() is on an existing entry, - but on gfid-path */ - if (!((loc->parent && __is_gfid_access_dir (loc->parent->gfid)) || - __is_gfid_access_dir (loc->pargfid))) { - if (!loc->parent) - goto wind; + inode_unref(tmp_loc.inode); - ret = inode_ctx_get (loc->parent, this, &value); - if (ret) - goto wind; + return 0; + } - inode = (inode_t *) value; + /* not something to bother, continue the flow */ + goto wind; + } - ret = loc_copy_overload_parent (&tmp_loc, loc, inode); - if (ret) - goto err; + /* need to check if the lookup is on virtual dir */ + if ((loc->name && !strcmp(GF_GFID_DIR, loc->name)) && + ((loc->parent && __is_root_gfid(loc->parent->gfid)) || + __is_root_gfid(loc->pargfid))) { + /* this means, the query is on '/.gfid', return the fake stat, + and say success */ - STACK_WIND (frame, ga_lookup_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->lookup, &tmp_loc, xdata); + STACK_UNWIND_STRICT(lookup, frame, 0, 0, loc->inode, + &priv->gfiddir_stbuf, xdata, &priv->root_stbuf); + return 0; + } - loc_wipe (&tmp_loc); - return 0; - } + /* now, check if the lookup() is on an existing entry, + but on gfid-path */ + if (!((loc->parent && __is_gfid_access_dir(loc->parent->gfid)) || + __is_gfid_access_dir(loc->pargfid))) { + if (!loc->parent) + goto wind; - /* make sure the 'basename' is actually a 'canonical-gfid', - otherwise, return error */ - ret = gf_uuid_parse (loc->name, tmp_gfid); + ret = inode_ctx_get(loc->parent, this, &value); if (ret) - goto err; + goto wind; - /* if its fresh lookup, go ahead and send it down, if not, - for directory, we need indirection to actual dir inode */ - if (!(loc->inode && loc->inode->ia_type)) - goto discover; + inode = (inode_t *)(uintptr_t)value; - /* revalidate on directory */ - ret = inode_ctx_get (loc->inode, this, &value); + ret = loc_copy_overload_parent(&tmp_loc, loc, inode); if (ret) - goto err; - - inode = (void *)value; + goto err; - /* valid inode, already looked up, work on that */ - if (inode->ia_type) - goto discover; + STACK_WIND(frame, ga_lookup_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, &tmp_loc, xdata); - /* check if the inode is in the 'itable' or - if its just previously discover()'d inode */ - true_inode = inode_find (loc->inode->table, tmp_gfid); - if (true_inode) { - /* time do another lookup and update the context - with proper inode */ - op_errno = ESTALE; - /* 'inode_ref()' done in inode_find */ - inode_unref (true_inode); - goto err; - } + loc_wipe(&tmp_loc); + return 0; + } + + /* make sure the 'basename' is actually a 'canonical-gfid', + otherwise, return error */ + ret = gf_uuid_parse(loc->name, tmp_gfid); + if (ret) + goto err; + + /* if its fresh lookup, go ahead and send it down, if not, + for directory, we need indirection to actual dir inode */ + if (!(loc->inode && loc->inode->ia_type)) + goto discover; + + /* revalidate on directory */ + ret = inode_ctx_get(loc->inode, this, &value); + if (ret) + goto err; + + inode = (void *)(uintptr_t)value; + + /* valid inode, already looked up, work on that */ + if (inode->ia_type) + goto discover; + + /* check if the inode is in the 'itable' or + if its just previously discover()'d inode */ + true_inode = inode_find(loc->inode->table, tmp_gfid); + if (true_inode) { + /* time do another lookup and update the context + with proper inode */ + op_errno = ESTALE; + /* 'inode_ref()' done in inode_find */ + inode_unref(true_inode); + goto err; + } discover: - /* for the virtual entries, we don't need to send 'gfid-req' key, as - for these entries, we don't want to 'set' a new gfid */ - if (xdata) - dict_del (xdata, "gfid-req"); + /* for the virtual entries, we don't need to send 'gfid-req' key, as + for these entries, we don't want to 'set' a new gfid */ + if (xdata) + dict_del(xdata, "gfid-req"); - gf_uuid_copy (tmp_loc.gfid, tmp_gfid); + gf_uuid_copy(tmp_loc.gfid, tmp_gfid); - /* if revalidate, then we need to have the proper reference */ - if (inode) { - tmp_loc.inode = inode_ref (inode); - frame->local = inode_ref (loc->inode); - } else { - tmp_loc.inode = inode_ref (loc->inode); - } + /* if revalidate, then we need to have the proper reference */ + if (inode) { + tmp_loc.inode = inode_ref(inode); + frame->local = inode_ref(loc->inode); + } else { + tmp_loc.inode = inode_ref(loc->inode); + } - STACK_WIND (frame, ga_virtual_lookup_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, &tmp_loc, xdata); + STACK_WIND(frame, ga_virtual_lookup_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, &tmp_loc, xdata); - inode_unref (tmp_loc.inode); + inode_unref(tmp_loc.inode); - return 0; + return 0; wind: - /* used for all the normal lookup path */ - STACK_WIND (frame, ga_lookup_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, loc, xdata); + /* used for all the normal lookup path */ + STACK_WIND(frame, ga_lookup_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, loc, xdata); - return 0; + return 0; err: - STACK_UNWIND_STRICT (lookup, frame, -1, op_errno, loc->inode, - &priv->gfiddir_stbuf, xdata, - &priv->root_stbuf); - return 0; + STACK_UNWIND_STRICT(lookup, frame, -1, op_errno, loc->inode, + &priv->gfiddir_stbuf, xdata, &priv->root_stbuf); + return 0; } int -ga_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, - mode_t umask, dict_t *xdata) +ga_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + mode_t umask, dict_t *xdata) { - int op_errno = ENOMEM; + int op_errno = ENOMEM; - GFID_ACCESS_ENTRY_OP_CHECK (loc, op_errno, err); + GFID_ACCESS_ENTRY_OP_CHECK(loc, op_errno, err); - STACK_WIND (frame, default_mkdir_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, - xdata); + STACK_WIND(frame, default_mkdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata); - return 0; + return 0; err: - STACK_UNWIND_STRICT (mkdir, frame, -1, op_errno, loc->inode, - NULL, NULL, NULL, xdata); - return 0; + STACK_UNWIND_STRICT(mkdir, frame, -1, op_errno, loc->inode, NULL, NULL, + NULL, xdata); + return 0; } - int -ga_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, - mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) +ga_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) { - int op_errno = ENOMEM; + int op_errno = ENOMEM; - GFID_ACCESS_ENTRY_OP_CHECK (loc, op_errno, err); + GFID_ACCESS_ENTRY_OP_CHECK(loc, op_errno, err); - STACK_WIND (frame, default_create_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->create, - loc, flags, mode, umask, fd, xdata); - return 0; + STACK_WIND(frame, default_create_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd, + xdata); + return 0; err: - STACK_UNWIND_STRICT (create, frame, -1, op_errno, NULL, - NULL, NULL, NULL, NULL, xdata); - - return 0; + STACK_UNWIND_STRICT(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, + NULL, xdata); + return 0; } int -ga_symlink (call_frame_t *frame, xlator_t *this, const char *linkname, - loc_t *loc, mode_t umask, dict_t *xdata) +ga_symlink(call_frame_t *frame, xlator_t *this, const char *linkname, + loc_t *loc, mode_t umask, dict_t *xdata) { - int op_errno = ENOMEM; + int op_errno = ENOMEM; - GFID_ACCESS_ENTRY_OP_CHECK (loc, op_errno, err); + GFID_ACCESS_ENTRY_OP_CHECK(loc, op_errno, err); - STACK_WIND (frame, default_symlink_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->symlink, - linkname, loc, umask, xdata); - return 0; + STACK_WIND(frame, default_symlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->symlink, linkname, loc, umask, xdata); + return 0; err: - STACK_UNWIND_STRICT (symlink, frame, -1, op_errno, NULL, - NULL, NULL, NULL, xdata); + STACK_UNWIND_STRICT(symlink, frame, -1, op_errno, NULL, NULL, NULL, NULL, + xdata); - return 0; + return 0; } int -ga_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, - dev_t rdev, mode_t umask, dict_t *xdata) +ga_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + dev_t rdev, mode_t umask, dict_t *xdata) { - int op_errno = ENOMEM; + int op_errno = ENOMEM; - GFID_ACCESS_ENTRY_OP_CHECK (loc, op_errno, err); + GFID_ACCESS_ENTRY_OP_CHECK(loc, op_errno, err); - STACK_WIND (frame, default_mknod_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, - umask, xdata); + STACK_WIND(frame, default_mknod_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata); - return 0; + return 0; err: - STACK_UNWIND_STRICT (mknod, frame, -1, op_errno, NULL, - NULL, NULL, NULL, xdata); + STACK_UNWIND_STRICT(mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL, + xdata); - return 0; + return 0; } int -ga_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flag, - dict_t *xdata) +ga_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flag, + dict_t *xdata) { - int op_errno = ENOMEM; - int ret = -1; - loc_t ga_loc = {0, }; + int op_errno = ENOMEM; + int ret = -1; + loc_t ga_loc = { + 0, + }; - GFID_ACCESS_ENTRY_OP_CHECK (loc, op_errno, err); + GFID_ACCESS_ENTRY_OP_CHECK(loc, op_errno, err); - ret = ga_valid_inode_loc_copy (&ga_loc, loc, this); - if (ret < 0) - goto err; + ret = ga_valid_inode_loc_copy(&ga_loc, loc, this); + if (ret < 0) + goto err; - STACK_WIND (frame, default_rmdir_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->rmdir, - &ga_loc, flag, xdata); + STACK_WIND(frame, default_rmdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rmdir, &ga_loc, flag, xdata); - loc_wipe (&ga_loc); - return 0; + loc_wipe(&ga_loc); + return 0; err: - STACK_UNWIND_STRICT (rmdir, frame, -1, op_errno, NULL, - NULL, xdata); + STACK_UNWIND_STRICT(rmdir, frame, -1, op_errno, NULL, NULL, xdata); - return 0; + return 0; } int -ga_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t xflag, - dict_t *xdata) +ga_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t xflag, + dict_t *xdata) { - int op_errno = ENOMEM; - int ret = -1; - loc_t ga_loc = {0, }; + int op_errno = ENOMEM; + int ret = -1; + loc_t ga_loc = { + 0, + }; - GFID_ACCESS_ENTRY_OP_CHECK (loc, op_errno, err); + GFID_ACCESS_ENTRY_OP_CHECK(loc, op_errno, err); - ret = ga_valid_inode_loc_copy (&ga_loc, loc, this); - if (ret < 0) - goto err; + ret = ga_valid_inode_loc_copy(&ga_loc, loc, this); + if (ret < 0) + goto err; - STACK_WIND (frame, default_unlink_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->unlink, - &ga_loc, xflag, xdata); + STACK_WIND(frame, default_unlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, &ga_loc, xflag, xdata); - loc_wipe (&ga_loc); - return 0; + loc_wipe(&ga_loc); + return 0; err: - STACK_UNWIND_STRICT (unlink, frame, -1, op_errno, NULL, - NULL, xdata); + STACK_UNWIND_STRICT(unlink, frame, -1, op_errno, NULL, NULL, xdata); - return 0; + return 0; } int -ga_rename (call_frame_t *frame, xlator_t *this, - loc_t *oldloc, loc_t *newloc, dict_t *xdata) +ga_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) { - int op_errno = ENOMEM; - int ret = 0; - loc_t ga_oldloc = {0, }; - loc_t ga_newloc = {0, }; - - GFID_ACCESS_ENTRY_OP_CHECK (oldloc, op_errno, err); - GFID_ACCESS_ENTRY_OP_CHECK (newloc, op_errno, err); - - ret = ga_valid_inode_loc_copy (&ga_oldloc, oldloc, this); - if (ret < 0) - goto err; - - ret = ga_valid_inode_loc_copy (&ga_newloc, newloc, this); - if (ret < 0) { - loc_wipe (&ga_oldloc); - goto err; - } - - STACK_WIND (frame, default_rename_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->rename, - &ga_oldloc, &ga_newloc, xdata); - - loc_wipe (&ga_newloc); - loc_wipe (&ga_oldloc); - return 0; + int op_errno = ENOMEM; + int ret = 0; + loc_t ga_oldloc = { + 0, + }; + loc_t ga_newloc = { + 0, + }; + + GFID_ACCESS_ENTRY_OP_CHECK(oldloc, op_errno, err); + GFID_ACCESS_ENTRY_OP_CHECK(newloc, op_errno, err); + + ret = ga_valid_inode_loc_copy(&ga_oldloc, oldloc, this); + if (ret < 0) + goto err; + + ret = ga_valid_inode_loc_copy(&ga_newloc, newloc, this); + if (ret < 0) { + loc_wipe(&ga_oldloc); + goto err; + } + + STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rename, &ga_oldloc, &ga_newloc, xdata); + + loc_wipe(&ga_newloc); + loc_wipe(&ga_oldloc); + return 0; err: - STACK_UNWIND_STRICT (rename, frame, -1, op_errno, NULL, - NULL, NULL, NULL, NULL, xdata); + STACK_UNWIND_STRICT(rename, frame, -1, op_errno, NULL, NULL, NULL, NULL, + NULL, xdata); - return 0; + return 0; } - int -ga_link (call_frame_t *frame, xlator_t *this, - loc_t *oldloc, loc_t *newloc, dict_t *xdata) +ga_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) { - int op_errno = ENOMEM; - int ret = 0; - loc_t ga_oldloc = {0, }; - loc_t ga_newloc = {0, }; - - GFID_ACCESS_ENTRY_OP_CHECK (oldloc, op_errno, err); - GFID_ACCESS_ENTRY_OP_CHECK (newloc, op_errno, err); - - ret = ga_valid_inode_loc_copy (&ga_oldloc, oldloc, this); - if (ret < 0) - goto err; - - ret = ga_valid_inode_loc_copy (&ga_newloc, newloc, this); - if (ret < 0) { - loc_wipe (&ga_oldloc); - goto err; - } - - STACK_WIND (frame, default_link_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->link, - &ga_oldloc, &ga_newloc, xdata); - - loc_wipe (&ga_newloc); - loc_wipe (&ga_oldloc); - return 0; + int op_errno = ENOMEM; + int ret = 0; + loc_t ga_oldloc = { + 0, + }; + loc_t ga_newloc = { + 0, + }; + + GFID_ACCESS_ENTRY_OP_CHECK(oldloc, op_errno, err); + GFID_ACCESS_ENTRY_OP_CHECK(newloc, op_errno, err); + + ret = ga_valid_inode_loc_copy(&ga_oldloc, oldloc, this); + if (ret < 0) + goto err; + + ret = ga_valid_inode_loc_copy(&ga_newloc, newloc, this); + if (ret < 0) { + loc_wipe(&ga_oldloc); + goto err; + } + + STACK_WIND(frame, default_link_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->link, &ga_oldloc, &ga_newloc, xdata); + + loc_wipe(&ga_newloc); + loc_wipe(&ga_oldloc); + return 0; err: - STACK_UNWIND_STRICT (link, frame, -1, op_errno, NULL, - NULL, NULL, NULL, xdata); + STACK_UNWIND_STRICT(link, frame, -1, op_errno, NULL, NULL, NULL, NULL, + xdata); - return 0; + return 0; } int32_t -ga_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, - fd_t *fd, dict_t *xdata) +ga_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, + dict_t *xdata) { - int op_errno = ENOMEM; + int op_errno = ENOMEM; - GFID_ACCESS_INODE_OP_CHECK (loc, op_errno, err); + GFID_ACCESS_INODE_OP_CHECK(loc, op_errno, err); - /* also check if the loc->inode itself is virtual - inode, if yes, return with failure, mainly because we - can't handle all the readdirp and other things on it. */ - if (inode_ctx_get (loc->inode, this, NULL) == 0) { - op_errno = ENOTSUP; - goto err; - } + /* also check if the loc->inode itself is virtual + inode, if yes, return with failure, mainly because we + can't handle all the readdirp and other things on it. */ + if (inode_ctx_get(loc->inode, this, NULL) == 0) { + op_errno = ENOTSUP; + goto err; + } - STACK_WIND (frame, default_opendir_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->opendir, - loc, fd, xdata); - return 0; + STACK_WIND(frame, default_opendir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->opendir, loc, fd, xdata); + return 0; err: - STACK_UNWIND_STRICT (opendir, frame, -1, op_errno, NULL, xdata); + STACK_UNWIND_STRICT(opendir, frame, -1, op_errno, NULL, xdata); - return 0; + return 0; } int32_t -ga_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *name, dict_t *xdata) +ga_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name, + dict_t *xdata) { - int op_errno = ENOMEM; - int ret = -1; - loc_t ga_loc = {0, }; + int op_errno = ENOMEM; + int ret = -1; + loc_t ga_loc = { + 0, + }; - GFID_ACCESS_INODE_OP_CHECK (loc, op_errno, err); - ret = ga_valid_inode_loc_copy (&ga_loc, loc, this); - if (ret < 0) - goto err; + GFID_ACCESS_INODE_OP_CHECK(loc, op_errno, err); + ret = ga_valid_inode_loc_copy(&ga_loc, loc, this); + if (ret < 0) + goto err; - STACK_WIND (frame, default_getxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->getxattr, &ga_loc, name, xdata); + STACK_WIND(frame, default_getxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->getxattr, &ga_loc, name, xdata); - loc_wipe (&ga_loc); + loc_wipe(&ga_loc); - return 0; + return 0; err: - STACK_UNWIND_STRICT (getxattr, frame, -1, op_errno, NULL, xdata); + STACK_UNWIND_STRICT(getxattr, frame, -1, op_errno, NULL, xdata); - return 0; + return 0; } int32_t -ga_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, - dict_t *xdata) +ga_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { - int op_errno = ENOMEM; - int ret = -1; - loc_t ga_loc = {0, }; - ga_private_t *priv = NULL; - - priv = this->private; - /* If stat is on ".gfid" itself, do not wind further, - * return fake stat and return success. - */ - if (__is_gfid_access_dir(loc->gfid)) - goto out; - - ret = ga_valid_inode_loc_copy (&ga_loc, loc, this); - if (ret < 0) - goto err; - - STACK_WIND (frame, default_stat_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->stat, &ga_loc, xdata); - - loc_wipe (&ga_loc); - return 0; + int op_errno = ENOMEM; + int ret = -1; + loc_t ga_loc = { + 0, + }; + ga_private_t *priv = NULL; + + priv = this->private; + /* If stat is on ".gfid" itself, do not wind further, + * return fake stat and return success. + */ + if (__is_gfid_access_dir(loc->gfid)) + goto out; + + ret = ga_valid_inode_loc_copy(&ga_loc, loc, this); + if (ret < 0) + goto err; + + STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->stat, &ga_loc, xdata); + + loc_wipe(&ga_loc); + return 0; err: - STACK_UNWIND_STRICT (stat, frame, -1, op_errno, NULL, xdata); + STACK_UNWIND_STRICT(stat, frame, -1, op_errno, NULL, xdata); - return 0; + return 0; out: - STACK_UNWIND_STRICT (stat, frame, 0, 0, &priv->gfiddir_stbuf, xdata); - return 0; + STACK_UNWIND_STRICT(stat, frame, 0, 0, &priv->gfiddir_stbuf, xdata); + return 0; } int32_t -ga_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - struct iatt *stbuf, int32_t valid, - dict_t *xdata) +ga_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf, + int32_t valid, dict_t *xdata) { - int op_errno = ENOMEM; - int ret = -1; - loc_t ga_loc = {0, }; - - GFID_ACCESS_INODE_OP_CHECK (loc, op_errno, err); - ret = ga_valid_inode_loc_copy (&ga_loc, loc, this); - if (ret < 0) - goto err; - - STACK_WIND (frame, default_setattr_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->setattr, &ga_loc, stbuf, valid, - xdata); - - loc_wipe (&ga_loc); - return 0; + int op_errno = ENOMEM; + int ret = -1; + loc_t ga_loc = { + 0, + }; + + GFID_ACCESS_INODE_OP_CHECK(loc, op_errno, err); + ret = ga_valid_inode_loc_copy(&ga_loc, loc, this); + if (ret < 0) + goto err; + + STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setattr, &ga_loc, stbuf, valid, xdata); + + loc_wipe(&ga_loc); + return 0; err: - STACK_UNWIND_STRICT (setattr, frame, -1, op_errno, NULL, NULL, xdata); + STACK_UNWIND_STRICT(setattr, frame, -1, op_errno, NULL, NULL, xdata); - return 0; + return 0; } int32_t -ga_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *name, dict_t *xdata) +ga_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) { - int op_errno = ENOMEM; - int ret = -1; - loc_t ga_loc = {0, }; + int op_errno = ENOMEM; + int ret = -1; + loc_t ga_loc = { + 0, + }; - GFID_ACCESS_INODE_OP_CHECK (loc, op_errno, err); - ret = ga_valid_inode_loc_copy (&ga_loc, loc, this); - if (ret < 0) - goto err; + GFID_ACCESS_INODE_OP_CHECK(loc, op_errno, err); + ret = ga_valid_inode_loc_copy(&ga_loc, loc, this); + if (ret < 0) + goto err; - STACK_WIND (frame, default_removexattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->removexattr, &ga_loc, name, - xdata); + STACK_WIND(frame, default_removexattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, &ga_loc, name, xdata); - loc_wipe (&ga_loc); - return 0; + loc_wipe(&ga_loc); + return 0; err: - STACK_UNWIND_STRICT (removexattr, frame, -1, op_errno, xdata); + STACK_UNWIND_STRICT(removexattr, frame, -1, op_errno, xdata); - return 0; + return 0; } - int32_t -mem_acct_init (xlator_t *this) +mem_acct_init(xlator_t *this) { - int ret = -1; + int ret = -1; - if (!this) - return ret; - - ret = xlator_mem_acct_init (this, gf_gfid_access_mt_end + 1); + if (!this) + return ret; - if (ret != 0) { - gf_log (this->name, GF_LOG_WARNING, "Memory accounting" - " init failed"); - return ret; - } + ret = xlator_mem_acct_init(this, gf_gfid_access_mt_end + 1); + if (ret != 0) { + gf_log(this->name, GF_LOG_WARNING, + "Memory accounting" + " init failed"); return ret; + } + + return ret; } int32_t -init (xlator_t *this) +init(xlator_t *this) { - ga_private_t *priv = NULL; - int ret = -1; - - if (!this->children || this->children->next) { - gf_log (this->name, GF_LOG_ERROR, - "not configured with exactly one child. exiting"); - goto out; - } - - /* This can be the top of graph in certain cases */ - if (!this->parents) { - gf_log (this->name, GF_LOG_DEBUG, - "dangling volume. check volfile "); - } - - /* TODO: define a mem-type structure */ - priv = GF_CALLOC (1, sizeof (*priv), gf_gfid_access_mt_priv_t); - if (!priv) - goto out; - - priv->newfile_args_pool = mem_pool_new (ga_newfile_args_t, 512); - if (!priv->newfile_args_pool) - goto out; - - priv->heal_args_pool = mem_pool_new (ga_heal_args_t, 512); - if (!priv->heal_args_pool) - goto out; - - this->local_pool = mem_pool_new (ga_local_t, 16); - if (!this->local_pool) { - gf_log (this->name, GF_LOG_ERROR, - "failed to create local_t's memory pool"); - goto out; - } - - this->private = priv; - - ret = 0; + ga_private_t *priv = NULL; + int ret = -1; + + if (!this->children || this->children->next) { + gf_log(this->name, GF_LOG_ERROR, + "not configured with exactly one child. exiting"); + goto out; + } + + /* This can be the top of graph in certain cases */ + if (!this->parents) { + gf_log(this->name, GF_LOG_DEBUG, "dangling volume. check volfile "); + } + + /* TODO: define a mem-type structure */ + priv = GF_CALLOC(1, sizeof(*priv), gf_gfid_access_mt_priv_t); + if (!priv) + goto out; + + priv->newfile_args_pool = mem_pool_new(ga_newfile_args_t, 512); + if (!priv->newfile_args_pool) + goto out; + + priv->heal_args_pool = mem_pool_new(ga_heal_args_t, 512); + if (!priv->heal_args_pool) + goto out; + + this->local_pool = mem_pool_new(ga_local_t, 16); + if (!this->local_pool) { + gf_log(this->name, GF_LOG_ERROR, + "failed to create local_t's memory pool"); + goto out; + } + + this->private = priv; + + ret = 0; out: - if (ret && priv) { - if (priv->newfile_args_pool) - mem_pool_destroy (priv->newfile_args_pool); - GF_FREE (priv); - } + if (ret && priv) { + if (priv->newfile_args_pool) + mem_pool_destroy(priv->newfile_args_pool); + GF_FREE(priv); + } - return ret; + return ret; } void -fini (xlator_t *this) +fini(xlator_t *this) { - ga_private_t *priv = NULL; - priv = this->private; - this->private = NULL; - - if (priv) { - if (priv->newfile_args_pool) - mem_pool_destroy (priv->newfile_args_pool); - if (priv->heal_args_pool) - mem_pool_destroy (priv->heal_args_pool); - GF_FREE (priv); - } - - return; + ga_private_t *priv = NULL; + priv = this->private; + this->private = NULL; + + if (priv) { + if (priv->newfile_args_pool) + mem_pool_destroy(priv->newfile_args_pool); + if (priv->heal_args_pool) + mem_pool_destroy(priv->heal_args_pool); + GF_FREE(priv); + } + + return; } int32_t -ga_dump_inodectx (xlator_t *this, inode_t *inode) +ga_dump_inodectx(xlator_t *this, inode_t *inode) { - int ret = -1; - uint64_t value = 0; - inode_t *tmp_inode = NULL; - char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, }; - - ret = inode_ctx_get (inode, this, &value); - if (ret == 0) { - tmp_inode = (void*) value; - gf_proc_dump_build_key (key_prefix, this->name, "inode"); - gf_proc_dump_add_section (key_prefix); - gf_proc_dump_write ("real-gfid", "%s", - uuid_utoa (tmp_inode->gfid)); - } - - return 0; + int ret = -1; + uint64_t value = 0; + inode_t *tmp_inode = NULL; + char key_prefix[GF_DUMP_MAX_BUF_LEN] = { + 0, + }; + + ret = inode_ctx_get(inode, this, &value); + if (ret == 0) { + tmp_inode = (void *)(uintptr_t)value; + gf_proc_dump_build_key(key_prefix, this->name, "inode"); + gf_proc_dump_add_section("%s", key_prefix); + gf_proc_dump_write("real-gfid", "%s", uuid_utoa(tmp_inode->gfid)); + } + + return 0; } struct xlator_fops fops = { - .lookup = ga_lookup, - - /* entry fops */ - .mkdir = ga_mkdir, - .mknod = ga_mknod, - .create = ga_create, - .symlink = ga_symlink, - .link = ga_link, - .unlink = ga_unlink, - .rmdir = ga_rmdir, - .rename = ga_rename, - - /* handle any other directory operations here */ - .opendir = ga_opendir, - .stat = ga_stat, - .setattr = ga_setattr, - .getxattr = ga_getxattr, - .removexattr = ga_removexattr, - - /* special fop to handle more entry creations */ - .setxattr = ga_setxattr, + .lookup = ga_lookup, + + /* entry fops */ + .mkdir = ga_mkdir, + .mknod = ga_mknod, + .create = ga_create, + .symlink = ga_symlink, + .link = ga_link, + .unlink = ga_unlink, + .rmdir = ga_rmdir, + .rename = ga_rename, + + /* handle any other directory operations here */ + .opendir = ga_opendir, + .stat = ga_stat, + .setattr = ga_setattr, + .getxattr = ga_getxattr, + .removexattr = ga_removexattr, + + /* special fop to handle more entry creations */ + .setxattr = ga_setxattr, }; struct xlator_cbks cbks = { - .forget = ga_forget, + .forget = ga_forget, }; struct xlator_dumpops dumpops = { - .inodectx = ga_dump_inodectx, + .inodectx = ga_dump_inodectx, }; struct volume_options options[] = { - /* This translator doesn't take any options, or provide any options */ - { .key = {NULL} }, + /* This translator doesn't take any options, or provide any options */ + {.key = {NULL}}, +}; + +xlator_api_t xlator_api = { + .init = init, + .fini = fini, + .mem_acct_init = mem_acct_init, + .op_version = {1}, + .fops = &fops, + .cbks = &cbks, + .options = options, + .identifier = "gfid-access", + .category = GF_MAINTAINED, }; diff --git a/xlators/features/gfid-access/src/gfid-access.h b/xlators/features/gfid-access/src/gfid-access.h index 2b5e4fd4184..b1e255e56c0 100644 --- a/xlators/features/gfid-access/src/gfid-access.h +++ b/xlators/features/gfid-access/src/gfid-access.h @@ -10,97 +10,97 @@ #ifndef __GFID_ACCESS_H__ #define __GFID_ACCESS_H__ -#include "glusterfs.h" -#include "logging.h" -#include "dict.h" -#include "xlator.h" -#include "defaults.h" +#include <glusterfs/glusterfs.h> +#include <glusterfs/logging.h> +#include <glusterfs/dict.h> +#include <glusterfs/xlator.h> +#include <glusterfs/defaults.h> #include "gfid-access-mem-types.h" #define UUID_CANONICAL_FORM_LEN 36 #define GF_FUSE_AUX_GFID_NEWFILE "glusterfs.gfid.newfile" -#define GF_FUSE_AUX_GFID_HEAL "glusterfs.gfid.heal" +#define GF_FUSE_AUX_GFID_HEAL "glusterfs.gfid.heal" #define GF_GFID_KEY "GLUSTERFS_GFID" #define GF_GFID_DIR ".gfid" #define GF_AUX_GFID 0xd -#define GFID_ACCESS_ENTRY_OP_CHECK(loc,err,lbl) do { \ - /* need to check if the lookup is on virtual dir */ \ - if ((loc->name && !strcmp (GF_GFID_DIR, loc->name)) && \ - ((loc->parent && \ - __is_root_gfid (loc->parent->gfid)) || \ - __is_root_gfid (loc->pargfid))) { \ - err = ENOTSUP; \ - goto lbl; \ - } \ - \ - /* now, check if the lookup() is on an existing */ \ - /* entry, but on gfid-path */ \ - if ((loc->parent && \ - __is_gfid_access_dir (loc->parent->gfid)) || \ - __is_gfid_access_dir (loc->pargfid)) { \ - err = EPERM; \ - goto lbl; \ - } \ - } while (0) +#define GFID_ACCESS_ENTRY_OP_CHECK(loc, err, lbl) \ + do { \ + /* need to check if the lookup is on virtual dir */ \ + if ((loc->name && !strcmp(GF_GFID_DIR, loc->name)) && \ + ((loc->parent && __is_root_gfid(loc->parent->gfid)) || \ + __is_root_gfid(loc->pargfid))) { \ + err = ENOTSUP; \ + goto lbl; \ + } \ + \ + /* now, check if the lookup() is on an existing */ \ + /* entry, but on gfid-path */ \ + if ((loc->parent && __is_gfid_access_dir(loc->parent->gfid)) || \ + __is_gfid_access_dir(loc->pargfid)) { \ + err = EPERM; \ + goto lbl; \ + } \ + } while (0) -#define GFID_ACCESS_INODE_OP_CHECK(loc,err,lbl) do { \ - /*Check if it is on .gfid*/ \ - if (__is_gfid_access_dir(loc->gfid)) { \ - err = ENOTSUP; \ - goto lbl; \ - } \ - } while (0) +#define GFID_ACCESS_INODE_OP_CHECK(loc, err, lbl) \ + do { \ + /*Check if it is on .gfid*/ \ + if (__is_gfid_access_dir(loc->gfid)) { \ + err = ENOTSUP; \ + goto lbl; \ + } \ + } while (0) typedef struct { - unsigned int uid; - unsigned int gid; - char gfid[UUID_CANONICAL_FORM_LEN + 1]; - unsigned int st_mode; - char *bname; + unsigned int uid; + unsigned int gid; + char gfid[UUID_CANONICAL_FORM_LEN + 1]; + unsigned int st_mode; + char *bname; - union { - struct _symlink_in { - char *linkpath; - } __attribute__ ((__packed__)) symlink; + union { + struct _symlink_in { + char *linkpath; + } __attribute__((__packed__)) symlink; - struct _mknod_in { - unsigned int mode; - unsigned int rdev; - unsigned int umask; - } __attribute__ ((__packed__)) mknod; + struct _mknod_in { + unsigned int mode; + unsigned int rdev; + unsigned int umask; + } __attribute__((__packed__)) mknod; - struct _mkdir_in { - unsigned int mode; - unsigned int umask; - } __attribute__ ((__packed__)) mkdir; - } __attribute__ ((__packed__)) args; + struct _mkdir_in { + unsigned int mode; + unsigned int umask; + } __attribute__((__packed__)) mkdir; + } __attribute__((__packed__)) args; } __attribute__((__packed__)) ga_newfile_args_t; typedef struct { - char gfid[UUID_CANONICAL_FORM_LEN + 1]; - char *bname; /* a null terminated basename */ + char gfid[UUID_CANONICAL_FORM_LEN + 1]; + char *bname; /* a null terminated basename */ } __attribute__((__packed__)) ga_heal_args_t; struct ga_private { - /* root inode's stbuf */ - struct iatt root_stbuf; - struct iatt gfiddir_stbuf; - struct mem_pool *newfile_args_pool; - struct mem_pool *heal_args_pool; + /* root inode's stbuf */ + struct iatt root_stbuf; + struct iatt gfiddir_stbuf; + struct mem_pool *newfile_args_pool; + struct mem_pool *heal_args_pool; }; typedef struct ga_private ga_private_t; struct __ga_local { - call_frame_t *orig_frame; - unsigned int uid; - unsigned int gid; - loc_t loc; - mode_t mode; - dev_t rdev; - mode_t umask; - dict_t *xdata; + call_frame_t *orig_frame; + unsigned int uid; + unsigned int gid; + loc_t loc; + mode_t mode; + dev_t rdev; + mode_t umask; + dict_t *xdata; }; typedef struct __ga_local ga_local_t; diff --git a/xlators/features/glupy/Makefile.am b/xlators/features/glupy/Makefile.am deleted file mode 100644 index 060429ecf0f..00000000000 --- a/xlators/features/glupy/Makefile.am +++ /dev/null @@ -1,3 +0,0 @@ -SUBDIRS = src examples - -CLEANFILES = diff --git a/xlators/features/glupy/doc/README.md b/xlators/features/glupy/doc/README.md deleted file mode 100644 index 4b8b863ef39..00000000000 --- a/xlators/features/glupy/doc/README.md +++ /dev/null @@ -1,44 +0,0 @@ -This is just the very start for a GlusterFS[1] meta-translator that will -allow translator code to be written in Python. It's based on the standard -Python embedding (not extending) techniques, plus a dash of the ctypes module. -The interface is a pretty minimal adaptation of the dispatches and callbacks -from the C API[2] to Python, as follows: - -* Dispatch functions and callbacks must be defined on an "xlator" class - derived from gluster.Translator so that they'll be auto-registered with - the C translator during initialization. - -* For each dispatch or callback function you want to intercept, you define a - Python function using the xxx\_fop\_t or xxx\_cbk\_t decorator. - -* The arguments for each operation are different, so you'll need to refer to - the C API. GlusterFS-specific types are used (though only loc\_t is fully - defined so far) and type correctness is enforced by ctypes. - -* If you do intercept a dispatch function, it is your responsibility to call - xxx\_wind (like STACK\_WIND in the C API but operation-specific) to pass - the request to the next translator. If you do not intercept a function, it - will default the same way as for C (pass through to the same operation with - the same arguments on the first child translator). - -* If you intercept a callback function, it is your responsibility to call - xxx\_unwind (like STACK\_UNWIND\_STRICT in the C API) to pass the request back - to the caller. - -So far only the lookup and create operations are handled this way, to support -the "negative lookup" example. Now that the basic infrastructure is in place, -adding more functions should be very quick, though with that much boilerplate I -might pause to write a code generator. I also plan to add structure -definitions and interfaces for some of the utility functions in libglusterfs -(especially those having to do with inode and fd context) in the fairly near -future. Note that you can also use ctypes to get at anything not explicitly -exposed to Python already. - -_If you're coming here because of the Linux Journal article, please note that -the code has evolved since that was written. The version that matches the -article is here:_ - -https://github.com/jdarcy/glupy/tree/4bbae91ba459ea46ef32f2966562492e4ca9187a - -[1] http://www.gluster.org -[2] http://pl.atyp.us/hekafs.org/dist/xlator_api_2.html diff --git a/xlators/features/glupy/doc/TESTING b/xlators/features/glupy/doc/TESTING deleted file mode 100644 index e05f17f498f..00000000000 --- a/xlators/features/glupy/doc/TESTING +++ /dev/null @@ -1,9 +0,0 @@ -Loading a translator written in Python using the glupy meta translator -------------------------------------------------------------------------------- -'test.vol' is a simple volfile with the debug-trace Python translator on top -of a brick. The volfile can be mounted using the following command. - -$ glusterfs --debug -f test.vol /path/to/mntpt - -If then file operations are performed on the newly mounted file system, log -output would be printed by the Python translator on the standard output. diff --git a/xlators/features/glupy/doc/test.vol b/xlators/features/glupy/doc/test.vol deleted file mode 100644 index 0751a488c1f..00000000000 --- a/xlators/features/glupy/doc/test.vol +++ /dev/null @@ -1,10 +0,0 @@ -volume vol-posix - type storage/posix - option directory /path/to/brick -end-volume - -volume vol-glupy - type features/glupy - option module-name debug-trace - subvolumes vol-posix -end-volume diff --git a/xlators/features/glupy/examples/Makefile.am b/xlators/features/glupy/examples/Makefile.am deleted file mode 100644 index c26abeaafb6..00000000000 --- a/xlators/features/glupy/examples/Makefile.am +++ /dev/null @@ -1,5 +0,0 @@ -xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features - -glupyexamplesdir = $(xlatordir)/glupy - -glupyexamples_PYTHON = negative.py helloworld.py debug-trace.py diff --git a/xlators/features/glupy/examples/debug-trace.py b/xlators/features/glupy/examples/debug-trace.py deleted file mode 100644 index 6eef1b58b8f..00000000000 --- a/xlators/features/glupy/examples/debug-trace.py +++ /dev/null @@ -1,775 +0,0 @@ -import sys -import stat -from uuid import UUID -from time import strftime, localtime -from gluster.glupy import * - -# This translator was written primarily to test the fop entry point definitions -# and structure definitions in 'glupy.py'. - -# It is similar to the C language debug-trace translator, which logs the -# arguments passed to the fops and their corresponding cbk functions. - -dl.get_id.restype = c_long -dl.get_id.argtypes = [ POINTER(call_frame_t) ] - -dl.get_rootunique.restype = c_uint64 -dl.get_rootunique.argtypes = [ POINTER(call_frame_t) ] - -def uuid2str (gfid): - return str(UUID(''.join(map("{0:02x}".format, gfid)))) - - -def st_mode_from_ia (prot, filetype): - st_mode = 0 - type_bit = 0 - prot_bit = 0 - - if filetype == IA_IFREG: - type_bit = stat.S_IFREG - elif filetype == IA_IFDIR: - type_bit = stat.S_IFDIR - elif filetype == IA_IFLNK: - type_bit = stat.S_IFLNK - elif filetype == IA_IFBLK: - type_bit = stat.S_IFBLK - elif filetype == IA_IFCHR: - type_bit = stat.S_IFCHR - elif filetype == IA_IFIFO: - type_bit = stat.S_IFIFO - elif filetype == IA_IFSOCK: - type_bit = stat.S_IFSOCK - elif filetype == IA_INVAL: - pass - - - if prot.suid: - prot_bit |= stat.S_ISUID - if prot.sgid: - prot_bit |= stat.S_ISGID - if prot.sticky: - prot_bit |= stat.S_ISVTX - - if prot.owner.read: - prot_bit |= stat.S_IRUSR - if prot.owner.write: - prot_bit |= stat.S_IWUSR - if prot.owner.execn: - prot_bit |= stat.S_IXUSR - - if prot.group.read: - prot_bit |= stat.S_IRGRP - if prot.group.write: - prot_bit |= stat.S_IWGRP - if prot.group.execn: - prot_bit |= stat.S_IXGRP - - if prot.other.read: - prot_bit |= stat.S_IROTH - if prot.other.write: - prot_bit |= stat.S_IWOTH - if prot.other.execn: - prot_bit |= stat.S_IXOTH - - st_mode = (type_bit | prot_bit) - - return st_mode - - -def trace_stat2str (buf): - gfid = uuid2str(buf.contents.ia_gfid) - mode = st_mode_from_ia(buf.contents.ia_prot, buf.contents.ia_type) - atime_buf = strftime("[%b %d %H:%M:%S]", - localtime(buf.contents.ia_atime)) - mtime_buf = strftime("[%b %d %H:%M:%S]", - localtime(buf.contents.ia_mtime)) - ctime_buf = strftime("[%b %d %H:%M:%S]", - localtime(buf.contents.ia_ctime)) - return ("(gfid={0:s}, ino={1:d}, mode={2:o}, nlink={3:d}, uid ={4:d}, "+ - "gid ={5:d}, size={6:d}, blocks={7:d}, atime={8:s}, mtime={9:s}, "+ - "ctime={10:s})").format(gfid, buf.contents.ia_no, mode, - buf.contents.ia_nlink, - buf.contents.ia_uid, - buf.contents.ia_gid, - buf.contents.ia_size, - buf.contents.ia_blocks, - atime_buf, mtime_buf, - ctime_buf) - -class xlator(Translator): - - def __init__(self, c_this): - Translator.__init__(self, c_this) - self.gfids = {} - - def lookup_fop(self, frame, this, loc, xdata): - unique = dl.get_rootunique(frame) - key = dl.get_id(frame) - gfid = uuid2str(loc.contents.gfid) - print("GLUPY TRACE LOOKUP FOP- {0:d}: gfid={1:s}; " + - "path={2:s}").format(unique, gfid, loc.contents.path) - self.gfids[key] = gfid - dl.wind_lookup(frame, POINTER(xlator_t)(), loc, xdata) - return 0 - - def lookup_cbk(self, frame, cookie, this, op_ret, op_errno, - inode, buf, xdata, postparent): - unique =dl.get_rootunique(frame) - key =dl.get_id(frame) - if op_ret == 0: - gfid = uuid2str(buf.contents.ia_gfid) - statstr = trace_stat2str(buf) - postparentstr = trace_stat2str(postparent) - print("GLUPY TRACE LOOKUP CBK- {0:d}: gfid={1:s}; "+ - "op_ret={2:d}; *buf={3:s}; " + - "*postparent={4:s}").format(unique, gfid, - op_ret, statstr, - postparentstr) - else: - gfid = self.gfids[key] - print("GLUPY TRACE LOOKUP CBK - {0:d}: gfid={1:s};" + - " op_ret={2:d}; op_errno={3:d}").format(unique, - gfid, - op_ret, - op_errno) - del self.gfids[key] - dl.unwind_lookup(frame, cookie, this, op_ret, op_errno, - inode, buf, xdata, postparent) - return 0 - - def create_fop(self, frame, this, loc, flags, mode, umask, fd, - xdata): - unique = dl.get_rootunique(frame) - gfid = uuid2str(loc.contents.gfid) - print("GLUPY TRACE CREATE FOP- {0:d}: gfid={1:s}; path={2:s}; " + - "fd={3:s}; flags=0{4:o}; mode=0{5:o}; " + - "umask=0{6:o}").format(unique, gfid, loc.contents.path, - fd, flags, mode, umask) - dl.wind_create(frame, POINTER(xlator_t)(), loc, flags,mode, - umask, fd, xdata) - return 0 - - def create_cbk(self, frame, cookie, this, op_ret, op_errno, fd, - inode, buf, preparent, postparent, xdata): - unique = dl.get_rootunique(frame) - if op_ret >= 0: - gfid = uuid2str(inode.contents.gfid) - statstr = trace_stat2str(buf) - preparentstr = trace_stat2str(preparent) - postparentstr = trace_stat2str(postparent) - print("GLUPY TRACE CREATE CBK- {0:d}: gfid={1:s};" + - " op_ret={2:d}; fd={3:s}; *stbuf={4:s}; " + - "*preparent={5:s};" + - " *postparent={6:s}").format(unique, gfid, op_ret, - fd, statstr, - preparentstr, - postparentstr) - else: - print ("GLUPY TRACE CREATE CBK- {0:d}: op_ret={1:d}; " + - "op_errno={2:d}").format(unique, op_ret, op_errno) - dl.unwind_create(frame, cookie, this, op_ret, op_errno, fd, - inode, buf, preparent, postparent, xdata) - return 0 - - def open_fop(self, frame, this, loc, flags, fd, xdata): - unique = dl.get_rootunique(frame) - key = dl.get_id(frame) - gfid = uuid2str(loc.contents.inode.contents.gfid) - print("GLUPY TRACE OPEN FOP- {0:d}: gfid={1:s}; path={2:s}; "+ - "flags={3:d}; fd={4:s}").format(unique, gfid, - loc.contents.path, flags, - fd) - self.gfids[key] = gfid - dl.wind_open(frame, POINTER(xlator_t)(), loc, flags, fd, xdata) - return 0 - - def open_cbk(self, frame, cookie, this, op_ret, op_errno, fd, xdata): - unique = dl.get_rootunique(frame) - key = dl.get_id(frame) - gfid = self.gfids[key] - print("GLUPY TRACE OPEN CBK- {0:d}: gfid={1:s}; op_ret={2:d}; " - "op_errno={3:d}; *fd={4:s}").format(unique, gfid, - op_ret, op_errno, fd) - del self.gfids[key] - dl.unwind_open(frame, cookie, this, op_ret, op_errno, fd, - xdata) - return 0 - - def readv_fop(self, frame, this, fd, size, offset, flags, xdata): - unique = dl.get_rootunique(frame) - key = dl.get_id(frame) - gfid = uuid2str(fd.contents.inode.contents.gfid) - print("GLUPY TRACE READV FOP- {0:d}: gfid={1:s}; "+ - "fd={2:s}; size ={3:d}; offset={4:d}; " + - "flags=0{5:x}").format(unique, gfid, fd, size, offset, - flags) - self.gfids[key] = gfid - dl.wind_readv (frame, POINTER(xlator_t)(), fd, size, offset, - flags, xdata) - return 0 - - def readv_cbk(self, frame, cookie, this, op_ret, op_errno, vector, - count, buf, iobref, xdata): - unique = dl.get_rootunique(frame) - key = dl.get_id(frame) - gfid = self.gfids[key] - if op_ret >= 0: - statstr = trace_stat2str(buf) - print("GLUPY TRACE READV CBK- {0:d}: gfid={1:s}, "+ - "op_ret={2:d}; *buf={3:s};").format(unique, gfid, - op_ret, - statstr) - - else: - print("GLUPY TRACE READV CBK- {0:d}: gfid={1:s}, "+ - "op_ret={2:d}; op_errno={3:d}").format(unique, - gfid, - op_ret, - op_errno) - del self.gfids[key] - dl.unwind_readv (frame, cookie, this, op_ret, op_errno, - vector, count, buf, iobref, xdata) - return 0 - - def writev_fop(self, frame, this, fd, vector, count, offset, flags, - iobref, xdata): - unique = dl.get_rootunique(frame) - key = dl.get_id(frame) - gfid = uuid2str(fd.contents.inode.contents.gfid) - print("GLUPY TRACE WRITEV FOP- {0:d}: gfid={1:s}; " + - "fd={2:s}; count={3:d}; offset={4:d}; " + - "flags=0{5:x}").format(unique, gfid, fd, count, offset, - flags) - self.gfids[key] = gfid - dl.wind_writev(frame, POINTER(xlator_t)(), fd, vector, count, - offset, flags, iobref, xdata) - return 0 - - def writev_cbk(self, frame, cookie, this, op_ret, op_errno, prebuf, - postbuf, xdata): - unique = dl.get_rootunique(frame) - key = dl.get_id(frame) - if op_ret >= 0: - preopstr = trace_stat2str(prebuf) - postopstr = trace_stat2str(postbuf) - print("GLUPY TRACE WRITEV CBK- {0:d}: op_ret={1:d}; " + - "*prebuf={2:s}; " + - "*postbuf={3:s}").format(unique, op_ret, preopstr, - postopstr) - else: - gfid = self.gfids[key] - print("GLUPY TRACE WRITEV CBK- {0:d}: gfid={1:s}; "+ - "op_ret={2:d}; op_errno={3:d}").format(unique, - gfid, - op_ret, - op_errno) - del self.gfids[key] - dl.unwind_writev (frame, cookie, this, op_ret, op_errno, - prebuf, postbuf, xdata) - return 0 - - def opendir_fop(self, frame, this, loc, fd, xdata): - unique = dl.get_rootunique(frame) - key = dl.get_id(frame) - gfid = uuid2str(loc.contents.inode.contents.gfid) - print("GLUPY TRACE OPENDIR FOP- {0:d}: gfid={1:s}; path={2:s}; "+ - "fd={3:s}").format(unique, gfid, loc.contents.path, fd) - self.gfids[key] = gfid - dl.wind_opendir(frame, POINTER(xlator_t)(), loc, fd, xdata) - return 0 - - def opendir_cbk(self, frame, cookie, this, op_ret, op_errno, fd, - xdata): - unique = dl.get_rootunique(frame) - key = dl.get_id(frame) - gfid = self.gfids[key] - print("GLUPY TRACE OPENDIR CBK- {0:d}: gfid={1:s}; op_ret={2:d};"+ - " op_errno={3:d}; fd={4:s}").format(unique, gfid, op_ret, - op_errno, fd) - del self.gfids[key] - dl.unwind_opendir(frame, cookie, this, op_ret, op_errno, - fd, xdata) - return 0 - - def readdir_fop(self, frame, this, fd, size, offset, xdata): - unique = dl.get_rootunique(frame) - key = dl.get_id(frame) - gfid = uuid2str(fd.contents.inode.contents.gfid) - print("GLUPY TRACE READDIR FOP- {0:d}: gfid={1:s}; fd={2:s}; " + - "size={3:d}; offset={4:d}").format(unique, gfid, fd, size, - offset) - self.gfids[key] = gfid - dl.wind_readdir(frame, POINTER(xlator_t)(), fd, size, offset, - xdata) - return 0 - - def readdir_cbk(self, frame, cookie, this, op_ret, op_errno, buf, - xdata): - unique = dl.get_rootunique(frame) - key = dl.get_id(frame) - gfid = self.gfids[key] - print("GLUPY TRACE READDIR CBK- {0:d}: gfid={1:s}; op_ret={2:d};"+ - " op_errno={3:d}").format(unique, gfid, op_ret, op_errno) - del self.gfids[key] - dl.unwind_readdir(frame, cookie, this, op_ret, op_errno, buf, - xdata) - return 0 - - def readdirp_fop(self, frame, this, fd, size, offset, dictionary): - unique = dl.get_rootunique(frame) - key = dl.get_id(frame) - gfid = uuid2str(fd.contents.inode.contents.gfid) - print("GLUPY TRACE READDIRP FOP- {0:d}: gfid={1:s}; fd={2:s}; "+ - " size={3:d}; offset={4:d}").format(unique, gfid, fd, size, - offset) - self.gfids[key] = gfid - dl.wind_readdirp(frame, POINTER(xlator_t)(), fd, size, offset, - dictionary) - return 0 - - def readdirp_cbk(self, frame, cookie, this, op_ret, op_errno, buf, - xdata): - unique = dl.get_rootunique(frame) - key = dl.get_id(frame) - gfid = self.gfids[key] - print("GLUPY TRACE READDIRP CBK- {0:d}: gfid={1:s}; "+ - "op_ret={2:d}; op_errno={3:d}").format(unique, gfid, - op_ret, op_errno) - del self.gfids[key] - dl.unwind_readdirp(frame, cookie, this, op_ret, op_errno, buf, - xdata) - return 0 - - def mkdir_fop(self, frame, this, loc, mode, umask, xdata): - unique = dl.get_rootunique(frame) - gfid = uuid2str(loc.contents.inode.contents.gfid) - print("GLUPY TRACE MKDIR FOP- {0:d}: gfid={1:s}; path={2:s}; " + - "mode={3:d}; umask=0{4:o}").format(unique, gfid, - loc.contents.path, mode, - umask) - dl.wind_mkdir(frame, POINTER(xlator_t)(), loc, mode, umask, - xdata) - return 0 - - def mkdir_cbk(self, frame, cookie, this, op_ret, op_errno, inode, buf, - preparent, postparent, xdata): - unique = dl.get_rootunique(frame) - if op_ret == 0: - gfid = uuid2str(inode.contents.gfid) - statstr = trace_stat2str(buf) - preparentstr = trace_stat2str(preparent) - postparentstr = trace_stat2str(postparent) - print("GLUPY TRACE MKDIR CBK- {0:d}: gfid={1:s}; "+ - "op_ret={2:d}; *stbuf={3:s}; *prebuf={4:s}; "+ - "*postbuf={5:s} ").format(unique, gfid, op_ret, - statstr, - preparentstr, - postparentstr) - else: - print("GLUPY TRACE MKDIR CBK- {0:d}: op_ret={1:d}; "+ - "op_errno={2:d}").format(unique, op_ret, op_errno) - dl.unwind_mkdir(frame, cookie, this, op_ret, op_errno, inode, - buf, preparent, postparent, xdata) - return 0 - - def rmdir_fop(self, frame, this, loc, flags, xdata): - unique = dl.get_rootunique(frame) - key = dl.get_id(frame) - gfid = uuid2str(loc.contents.inode.contents.gfid) - print("GLUPY TRACE RMDIR FOP- {0:d}: gfid={1:s}; path={2:s}; "+ - "flags={3:d}").format(unique, gfid, loc.contents.path, - flags) - self.gfids[key] = gfid - dl.wind_rmdir(frame, POINTER(xlator_t)(), loc, flags, xdata) - return 0 - - def rmdir_cbk(self, frame, cookie, this, op_ret, op_errno, preparent, - postparent, xdata): - unique = dl.get_rootunique(frame) - key = dl.get_id(frame) - gfid = self.gfids[key] - if op_ret == 0: - preparentstr = trace_stat2str(preparent) - postparentstr = trace_stat2str(postparent) - print("GLUPY TRACE RMDIR CBK- {0:d}: gfid={1:s}; "+ - "op_ret={2:d}; *prebuf={3:s}; "+ - "*postbuf={4:s}").format(unique, gfid, op_ret, - preparentstr, - postparentstr) - else: - print("GLUPY TRACE RMDIR CBK- {0:d}: gfid={1:s}; "+ - "op_ret={2:d}; op_errno={3:d}").format(unique, - gfid, - op_ret, - op_errno) - del self.gfids[key] - dl.unwind_rmdir(frame, cookie, this, op_ret, op_errno, - preparent, postparent, xdata) - return 0 - - def stat_fop(self, frame, this, loc, xdata): - unique = dl.get_rootunique(frame) - key = dl.get_id(frame) - gfid = uuid2str(loc.contents.inode.contents.gfid) - print("GLUPY TRACE STAT FOP- {0:d}: gfid={1:s}; " + - " path={2:s}").format(unique, gfid, loc.contents.path) - self.gfids[key] = gfid - dl.wind_stat(frame, POINTER(xlator_t)(), loc, xdata) - return 0 - - def stat_cbk(self, frame, cookie, this, op_ret, op_errno, buf, - xdata): - unique = dl.get_rootunique(frame) - key = dl.get_id(frame) - gfid = self.gfids[key] - if op_ret == 0: - statstr = trace_stat2str(buf) - print("GLUPY TRACE STAT CBK- {0:d}: gfid={1:s}; "+ - "op_ret={2:d}; *buf={3:s};").format(unique, - gfid, - op_ret, - statstr) - else: - print("GLUPY TRACE STAT CBK- {0:d}: gfid={1:s}; "+ - "op_ret={2:d}; op_errno={3:d}").format(unique, - gfid, - op_ret, - op_errno) - del self.gfids[key] - dl.unwind_stat(frame, cookie, this, op_ret, op_errno, - buf, xdata) - return 0 - - def fstat_fop(self, frame, this, fd, xdata): - unique = dl.get_rootunique(frame) - key = dl.get_id(frame) - gfid = uuid2str(fd.contents.inode.contents.gfid) - print("GLUPY TRACE FSTAT FOP- {0:d}: gfid={1:s}; " + - "fd={2:s}").format(unique, gfid, fd) - self.gfids[key] = gfid - dl.wind_fstat(frame, POINTER(xlator_t)(), fd, xdata) - return 0 - - def fstat_cbk(self, frame, cookie, this, op_ret, op_errno, buf, - xdata): - unique = dl.get_rootunique(frame) - key = dl.get_id(frame) - gfid = self.gfids[key] - if op_ret == 0: - statstr = trace_stat2str(buf) - print("GLUPY TRACE FSTAT CBK- {0:d}: gfid={1:s} "+ - " op_ret={2:d}; *buf={3:s}").format(unique, - gfid, - op_ret, - statstr) - else: - print("GLUPY TRACE FSTAT CBK- {0:d}: gfid={1:s} "+ - "op_ret={2:d}; op_errno={3:d}").format(unique. - gfid, - op_ret, - op_errno) - del self.gfids[key] - dl.unwind_fstat(frame, cookie, this, op_ret, op_errno, - buf, xdata) - return 0 - - def statfs_fop(self, frame, this, loc, xdata): - unique = dl.get_rootunique(frame) - if loc.contents.inode: - gfid = uuid2str(loc.contents.inode.contents.gfid) - else: - gfid = "0" - print("GLUPY TRACE STATFS FOP- {0:d}: gfid={1:s}; "+ - "path={2:s}").format(unique, gfid, loc.contents.path) - dl.wind_statfs(frame, POINTER(xlator_t)(), loc, xdata) - return 0 - - def statfs_cbk(self, frame, cookie, this, op_ret, op_errno, buf, - xdata): - unique = dl.get_rootunique(frame) - if op_ret == 0: - #TBD: print buf (pointer to an iovec type object) - print("GLUPY TRACE STATFS CBK {0:d}: "+ - "op_ret={1:d}").format(unique, op_ret) - else: - print("GLUPY TRACE STATFS CBK- {0:d}"+ - "op_ret={1:d}; op_errno={2:d}").format(unique, - op_ret, - op_errno) - dl.unwind_statfs(frame, cookie, this, op_ret, op_errno, - buf, xdata) - return 0 - - def getxattr_fop(self, frame, this, loc, name, xdata): - unique = dl.get_rootunique(frame) - key = dl.get_id(frame) - gfid = uuid2str(loc.contents.inode.contents.gfid) - print("GLUPY TRACE GETXATTR FOP- {0:d}: gfid={1:s}; path={2:s};"+ - " name={3:s}").format(unique, gfid, loc.contents.path, - name) - self.gfids[key]=gfid - dl.wind_getxattr(frame, POINTER(xlator_t)(), loc, name, xdata) - return 0 - - def getxattr_cbk(self, frame, cookie, this, op_ret, op_errno, - dictionary, xdata): - unique = dl.get_rootunique(frame) - key = dl.get_id(frame) - gfid = self.gfids[key] - print("GLUPY TRACE GETXATTR CBK- {0:d}: gfid={1:s}; "+ - "op_ret={2:d}; op_errno={3:d}; "+ - " dictionary={4:s}").format(unique, gfid, op_ret, op_errno, - dictionary) - del self.gfids[key] - dl.unwind_getxattr(frame, cookie, this, op_ret, op_errno, - dictionary, xdata) - return 0 - - def fgetxattr_fop(self, frame, this, fd, name, xdata): - unique = dl.get_rootunique(frame) - key = dl.get_id(frame) - gfid = uuid2str(fd.contents.inode.contents.gfid) - print("GLUPY TRACE FGETXATTR FOP- {0:d}: gfid={1:s}; fd={2:s}; "+ - "name={3:s}").format(unique, gfid, fd, name) - self.gfids[key] = gfid - dl.wind_fgetxattr(frame, POINTER(xlator_t)(), fd, name, xdata) - return 0 - - def fgetxattr_cbk(self, frame, cookie, this, op_ret, op_errno, - dictionary, xdata): - unique = dl.get_rootunique(frame) - key = dl.get_id(frame) - gfid = self.gfids[key] - print("GLUPY TRACE FGETXATTR CBK- {0:d}: gfid={1:s}; "+ - "op_ret={2:d}; op_errno={3:d};"+ - " dictionary={4:s}").format(unique, gfid, op_ret, - op_errno, dictionary) - del self.gfids[key] - dl.unwind_fgetxattr(frame, cookie, this, op_ret, op_errno, - dictionary, xdata) - return 0 - - def setxattr_fop(self, frame, this, loc, dictionary, flags, xdata): - unique = dl.get_rootunique(frame) - key = dl.get_id(frame) - gfid = uuid2str(loc.contents.inode.contents.gfid) - print("GLUPY TRACE SETXATTR FOP- {0:d}: gfid={1:s}; path={2:s};"+ - " flags={3:d}").format(unique, gfid, loc.contents.path, - flags) - self.gfids[key] = gfid - dl.wind_setxattr(frame, POINTER(xlator_t)(), loc, dictionary, - flags, xdata) - return 0 - - def setxattr_cbk(self, frame, cookie, this, op_ret, op_errno, xdata): - unique = dl.get_rootunique(frame) - key = dl.get_id(frame) - gfid = self.gfids[key] - print("GLUPY TRACE SETXATTR CBK- {0:d}: gfid={1:s}; "+ - "op_ret={2:d}; op_errno={3:d}").format(unique, gfid, - op_ret, op_errno) - del self.gfids[key] - dl.unwind_setxattr(frame, cookie, this, op_ret, op_errno, - xdata) - return 0 - - def fsetxattr_fop(self, frame, this, fd, dictionary, flags, xdata): - unique = dl.get_rootunique(frame) - key = dl.get_id(frame) - gfid = uuid2str(fd.contents.inode.contents.gfid) - print("GLUPY TRACE FSETXATTR FOP- {0:d}: gfid={1:s}; fd={2:p}; "+ - "flags={3:d}").format(unique, gfid, fd, flags) - self.gfids[key] = gfid - dl.wind_fsetxattr(frame, POINTER(xlator_t)(), fd, dictionary, - flags, xdata) - return 0 - - def fsetxattr_cbk(self, frame, cookie, this, op_ret, op_errno, xdata): - unique = dl.get_rootunique(frame) - key = dl.get_id(frame) - gfid = self.gfids[key] - print("GLUPY TRACE FSETXATTR CBK- {0:d}: gfid={1:s}; "+ - "op_ret={2:d}; op_errno={3:d}").format(unique, gfid, - op_ret, op_errno) - del self.gfids[key] - dl.unwind_fsetxattr(frame, cookie, this, op_ret, op_errno, - xdata) - return 0 - - def removexattr_fop(self, frame, this, loc, name, xdata): - unique = dl.get_rootunique(frame) - key = dl.get_id(frame) - gfid = uuid2str(loc.contents.inode.contents.gfid) - print("GLUPY TRACE REMOVEXATTR FOP- {0:d}: gfid={1:s}; "+ - "path={2:s}; name={3:s}").format(unique, gfid, - loc.contents.path, - name) - self.gfids[key] = gfid - dl.wind_removexattr(frame, POINTER(xlator_t)(), loc, name, - xdata) - return 0 - - def removexattr_cbk(self, frame, cookie, this, op_ret, op_errno, - xdata): - unique = dl.get_rootunique(frame) - key = dl.get_id(frame) - gfid = self.gfids[key] - print("GLUPY TRACE REMOVEXATTR CBK- {0:d}: gfid={1:s} "+ - " op_ret={2:d}; op_errno={3:d}").format(unique, gfid, - op_ret, op_errno) - del self.gfids[key] - dl.unwind_removexattr(frame, cookie, this, op_ret, op_errno, - xdata) - return 0 - - def link_fop(self, frame, this, oldloc, newloc, xdata): - unique = dl.get_rootunique(frame) - key = dl.get_id(frame) - if (newloc.contents.inode): - newgfid = uuid2str(newloc.contents.inode.contents.gfid) - else: - newgfid = "0" - oldgfid = uuid2str(oldloc.contents.inode.contents.gfid) - print("GLUPY TRACE LINK FOP-{0:d}: oldgfid={1:s}; oldpath={2:s};"+ - "newgfid={3:s};"+ - "newpath={4:s}").format(unique, oldgfid, - oldloc.contents.path, - newgfid, - newloc.contents.path) - self.gfids[key] = oldgfid - dl.wind_link(frame, POINTER(xlator_t)(), oldloc, newloc, - xdata) - return 0 - - def link_cbk(self, frame, cookie, this, op_ret, op_errno, inode, buf, - preparent, postparent, xdata): - unique = dl.get_rootunique(frame) - key = dl.get_id(frame) - gfid = self.gfids[key] - if op_ret == 0: - statstr = trace_stat2str(buf) - preparentstr = trace_stat2str(preparent) - postparentstr = trace_stat2str(postparent) - print("GLUPY TRACE LINK CBK- {0:d}: op_ret={1:d} "+ - "*stbuf={2:s}; *prebuf={3:s}; "+ - "*postbuf={4:s} ").format(unique, op_ret, statstr, - preparentstr, - postparentstr) - else: - print("GLUPY TRACE LINK CBK- {0:d}: gfid={1:s}; "+ - "op_ret={2:d}; "+ - "op_errno={3:d}").format(unique, gfid, - op_ret, op_errno) - del self.gfids[key] - dl.unwind_link(frame, cookie, this, op_ret, op_errno, inode, - buf, preparent, postparent, xdata) - return 0 - - def unlink_fop(self, frame, this, loc, xflag, xdata): - unique = dl.get_rootunique(frame) - key = dl.get_id(frame) - gfid = uuid2str(loc.contents.inode.contents.gfid) - print("GLUPY TRACE UNLINK FOP- {0:d}; gfid={1:s}; path={2:s}; "+ - "flag={3:d}").format(unique, gfid, loc.contents.path, - xflag) - self.gfids[key] = gfid - dl.wind_unlink(frame, POINTER(xlator_t)(), loc, xflag, - xdata) - return 0 - - def unlink_cbk(self, frame, cookie, this, op_ret, op_errno, - preparent, postparent, xdata): - unique = dl.get_rootunique(frame) - key = dl.get_id(frame) - gfid = self.gfids[key] - if op_ret == 0: - preparentstr = trace_stat2str(preparent) - postparentstr = trace_stat2str(postparent) - print("GLUPY TRACE UNLINK CBK- {0:d}: gfid ={1:s}; "+ - "op_ret={2:d}; *prebuf={3:s}; "+ - "*postbuf={4:s} ").format(unique, gfid, op_ret, - preparentstr, - postparentstr) - else: - print("GLUPY TRACE UNLINK CBK: {0:d}: gfid ={1:s}; "+ - "op_ret={2:d}; "+ - "op_errno={3:d}").format(unique, gfid, op_ret, - op_errno) - del self.gfids[key] - dl.unwind_unlink(frame, cookie, this, op_ret, op_errno, - preparent, postparent, xdata) - return 0 - - def readlink_fop(self, frame, this, loc, size, xdata): - unique = dl.get_rootunique(frame) - key = dl.get_id(frame) - gfid = uuid2str(loc.contents.inode.contents.gfid) - print("GLUPY TRACE READLINK FOP- {0:d}: gfid={1:s}; path={2:s};"+ - " size={3:d}").format(unique, gfid, loc.contents.path, - size) - self.gfids[key] = gfid - dl.wind_readlink(frame, POINTER(xlator_t)(), loc, size, - xdata) - return 0 - - def readlink_cbk(self, frame, cookie, this, op_ret, op_errno, - buf, stbuf, xdata): - unique = dl.get_rootunique(frame) - key = dl.get_id(frame) - gfid = self.gfids[key] - if op_ret == 0: - statstr = trace_stat2str(stbuf) - print("GLUPY TRACE READLINK CBK- {0:d}: gfid={1:s} "+ - " op_ret={2:d}; op_errno={3:d}; *prebuf={4:s}; "+ - "*postbuf={5:s} ").format(unique, gfid, - op_ret, op_errno, - buf, statstr) - else: - print("GLUPY TRACE READLINK CBK- {0:d}: gfid={1:s} "+ - " op_ret={2:d}; op_errno={3:d}").format(unique, - gfid, - op_ret, - op_errno) - del self.gfids[key] - dl.unwind_readlink(frame, cookie, this, op_ret, op_errno, buf, - stbuf, xdata) - return 0 - - def symlink_fop(self, frame, this, linkpath, loc, umask, xdata): - unique = dl.get_rootunique(frame) - key = dl.get_id(frame) - gfid = uuid2str(loc.contents.inode.contents.gfid) - print("GLUPY TRACE SYMLINK FOP- {0:d}: gfid={1:s}; "+ - "linkpath={2:s}; path={3:s};"+ - "umask=0{4:o}").format(unique, gfid, linkpath, - loc.contents.path, umask) - self.gfids[key] = gfid - dl.wind_symlink(frame, POINTER(xlator_t)(), linkpath, loc, - umask, xdata) - return 0 - - def symlink_cbk(self, frame, cookie, this, op_ret, op_errno, - inode, buf, preparent, postparent, xdata): - unique = dl.get_rootunique(frame) - key = dl.get_id(frame) - gfid = self.gfids[key] - if op_ret == 0: - statstr = trace_stat2str(buf) - preparentstr = trace_stat2str(preparent) - postparentstr = trace_stat2str(postparent) - print("GLUPY TRACE SYMLINK CBK- {0:d}: gfid={1:s}; "+ - "op_ret={2:d}; *stbuf={3:s}; *preparent={4:s}; "+ - "*postparent={5:s}").format(unique, gfid, - op_ret, statstr, - preparentstr, - postparentstr) - else: - print("GLUPY TRACE SYMLINK CBK- {0:d}: gfid={1:s}; "+ - "op_ret={2:d}; op_errno={3:d}").format(unique, - gfid, - op_ret, - op_errno) - del self.gfids[key] - dl.unwind_symlink(frame, cookie, this, op_ret, op_errno, - inode, buf, preparent, postparent, xdata) - return 0 diff --git a/xlators/features/glupy/examples/helloworld.py b/xlators/features/glupy/examples/helloworld.py deleted file mode 100644 index b565a4e5bc3..00000000000 --- a/xlators/features/glupy/examples/helloworld.py +++ /dev/null @@ -1,19 +0,0 @@ -import sys -from gluster.glupy import * - -class xlator (Translator): - - def __init__(self, c_this): - Translator.__init__(self, c_this) - - def lookup_fop(self, frame, this, loc, xdata): - print "Python xlator: Hello!" - dl.wind_lookup(frame, POINTER(xlator_t)(), loc, xdata) - return 0 - - def lookup_cbk(self, frame, cookie, this, op_ret, op_errno, inode, buf, - xdata, postparent): - print "Python xlator: Hello again!" - dl.unwind_lookup(frame, cookie, this, op_ret, op_errno, inode, buf, - xdata, postparent) - return 0 diff --git a/xlators/features/glupy/examples/negative.py b/xlators/features/glupy/examples/negative.py deleted file mode 100644 index e7a4fc07ced..00000000000 --- a/xlators/features/glupy/examples/negative.py +++ /dev/null @@ -1,91 +0,0 @@ -import sys -from uuid import UUID -from gluster.glupy import * - -# Negative-lookup-caching example. If a file wasn't there the last time we -# looked, it's probably still not there. This translator keeps track of -# those failed lookups for us, and returns ENOENT without needing to pass the -# call any further for repeated requests. - -# If we were doing this for real, we'd need separate caches for each xlator -# instance. The easiest way to do this would be to have xlator.__init__ -# "register" each instance in a module-global dict, with the key as the C -# translator address and the value as the xlator object itself. For testing -# and teaching, it's sufficient just to have one cache. The keys are parent -# GFIDs, and the entries are lists of names within that parent that we know -# don't exist. -cache = {} - -# TBD: we need a better way of handling per-request data (frame->local in C). -dl.get_id.restype = c_long -dl.get_id.argtypes = [ POINTER(call_frame_t) ] - -def uuid2str (gfid): - return str(UUID(''.join(map("{0:02x}".format, gfid)))) - -class xlator (Translator): - - def __init__ (self, c_this): - self.requests = {} - Translator.__init__(self,c_this) - - def lookup_fop (self, frame, this, loc, xdata): - pargfid = uuid2str(loc.contents.pargfid) - print "lookup FOP: %s:%s" % (pargfid, loc.contents.name) - # Check the cache. - if cache.has_key(pargfid): - if loc.contents.name in cache[pargfid]: - print "short-circuiting for %s:%s" % (pargfid, - loc.contents.name) - dl.unwind_lookup(frame,0,this,-1,2,None,None,None,None) - return 0 - key = dl.get_id(frame) - self.requests[key] = (pargfid, loc.contents.name[:]) - # TBD: get real child xl from init, pass it here - dl.wind_lookup(frame,POINTER(xlator_t)(),loc,xdata) - return 0 - - def lookup_cbk (self, frame, cookie, this, op_ret, op_errno, inode, buf, - xdata, postparent): - print "lookup CBK: %d (%d)" % (op_ret, op_errno) - key = dl.get_id(frame) - pargfid, name = self.requests[key] - # Update the cache. - if op_ret == 0: - print "found %s, removing from cache" % name - if cache.has_key(pargfid): - cache[pargfid].discard(name) - elif op_errno == 2: # ENOENT - print "failed to find %s, adding to cache" % name - if cache.has_key(pargfid): - cache[pargfid].add(name) - else: - cache[pargfid] = set([name]) - del self.requests[key] - dl.unwind_lookup(frame,cookie,this,op_ret,op_errno, - inode,buf,xdata,postparent) - return 0 - - def create_fop (self, frame, this, loc, flags, mode, umask, fd, xdata): - pargfid = uuid2str(loc.contents.pargfid) - print "create FOP: %s:%s" % (pargfid, loc.contents.name) - key = dl.get_id(frame) - self.requests[key] = (pargfid, loc.contents.name[:]) - # TBD: get real child xl from init, pass it here - dl.wind_create(frame,POINTER(xlator_t)(),loc,flags,mode,umask,fd,xdata) - return 0 - - def create_cbk (self, frame, cookie, this, op_ret, op_errno, fd, inode, - buf, preparent, postparent, xdata): - print "create CBK: %d (%d)" % (op_ret, op_errno) - key = dl.get_id(frame) - pargfid, name = self.requests[key] - # Update the cache. - if op_ret == 0: - print "created %s, removing from cache" % name - if cache.has_key(pargfid): - cache[pargfid].discard(name) - del self.requests[key] - dl.unwind_create(frame,cookie,this,op_ret,op_errno,fd,inode,buf, - preparent,postparent,xdata) - return 0 diff --git a/xlators/features/glupy/src/Makefile.am b/xlators/features/glupy/src/Makefile.am deleted file mode 100644 index eadccf75c15..00000000000 --- a/xlators/features/glupy/src/Makefile.am +++ /dev/null @@ -1,26 +0,0 @@ -xlator_LTLIBRARIES = glupy.la - -# Ensure GLUSTER_PYTHON_PATH is passed to glupy.so -xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features -glupydir = $(xlatordir)/glupy -AM_CPPFLAGS = $(PYTHONDEV_CPPFLAGS) $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src -isystem $(BUILD_PYTHON_INC) -AM_CFLAGS = $(PYTHONDEV_CFLAGS) -Wall -fno-strict-aliasing -DGLUSTER_PYTHON_PATH=\"$(glupydir)\" -DPATH_GLUSTERFS_GLUPY_MODULE=\"${xlatordir}/glupy${shrext_cmds}\" $(GF_CFLAGS) - -# Flags to build glupy.so with -glupy_la_LDFLAGS = $(PYTHONDEV_LDFLAGS) -module -avoid-version -nostartfiles -export-symbols $(top_srcdir)/xlators/features/glupy/src/glupy.sym -glupy_la_SOURCES = glupy.c -glupy_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ - -lpthread -l$(BUILD_PYTHON_LIB) - -noinst_HEADERS = glupy.h - -# Install __init__.py into the Python site-packages area -pyglupydir = @BUILD_PYTHON_SITE_PACKAGES@/gluster -pyglupy_PYTHON = __init__.py - -# Install glupy/__init_-.py into the Python site-packages area -SUBDIRS = glupy - -CLEANFILES = - -EXTRA_DIST = glupy.sym diff --git a/xlators/features/glupy/src/__init__.py.in b/xlators/features/glupy/src/__init__.py.in deleted file mode 100644 index 3ad9513f40e..00000000000 --- a/xlators/features/glupy/src/__init__.py.in +++ /dev/null @@ -1,2 +0,0 @@ -from pkgutil import extend_path -__path__ = extend_path(__path__, __name__) diff --git a/xlators/features/glupy/src/glupy.c b/xlators/features/glupy/src/glupy.c deleted file mode 100644 index bca476427c8..00000000000 --- a/xlators/features/glupy/src/glupy.c +++ /dev/null @@ -1,2496 +0,0 @@ -/* - Copyright (c) 2006-2014 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#include <ctype.h> -#include <sys/uio.h> -#include <Python.h> - -#include "glusterfs.h" -#include "xlator.h" -#include "logging.h" -#include "defaults.h" - -#include "glupy.h" - -/* UTILITY FUNCTIONS FOR FOP-SPECIFIC CODE */ - -pthread_key_t gil_init_key; - -PyGILState_STATE -glupy_enter (void) -{ - if (!pthread_getspecific(gil_init_key)) { - PyEval_ReleaseLock(); - (void)pthread_setspecific(gil_init_key,(void *)1); - } - - return PyGILState_Ensure(); -} - -void -glupy_leave (PyGILState_STATE gstate) -{ - PyGILState_Release(gstate); -} - -/* FOP: LOOKUP */ - -int32_t -glupy_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *xdata, struct iatt *postparent) -{ - glupy_private_t *priv = this->private; - PyGILState_STATE gstate; - int32_t ret; - - if (!priv->cbks[GLUPY_LOOKUP]) { - goto unwind; - } - - gstate = glupy_enter(); - ret = ((fop_lookup_cbk_t)(priv->cbks[GLUPY_LOOKUP]))( - frame, cookie, this, op_ret, op_errno, - inode, buf, xdata, postparent); - glupy_leave(gstate); - - return ret; - -unwind: - frame->local = NULL; - STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf, - xdata, postparent); - return 0; -} - -int32_t -glupy_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, - dict_t *xdata) -{ - glupy_private_t *priv = this->private; - PyGILState_STATE gstate; - int32_t ret; - static long next_id = 0; - - if (!priv->fops[GLUPY_LOOKUP]) { - goto wind; - } - - gstate = glupy_enter(); - frame->local = (void *)++next_id; - ret = ((fop_lookup_t)(priv->fops[GLUPY_LOOKUP]))( - frame, this, loc, xdata); - glupy_leave(gstate); - - return ret; - -wind: - STACK_WIND (frame, glupy_lookup_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, loc, xdata); - return 0; -} - -void -wind_lookup (call_frame_t *frame, xlator_t *xl, loc_t *loc, dict_t *xdata) -{ - xlator_t *this = THIS; - - if (!xl || (xl == this)) { - xl = FIRST_CHILD(this); - } - - STACK_WIND(frame,glupy_lookup_cbk,xl,xl->fops->lookup,loc,xdata); -} - -void -unwind_lookup (call_frame_t *frame, long cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *xdata, struct iatt *postparent) -{ - frame->local = NULL; - STACK_UNWIND_STRICT(lookup,frame,op_ret,op_errno, - inode,buf,xdata,postparent); -} - -void -set_lookup_fop (long py_this, fop_lookup_t fop) -{ - glupy_private_t *priv = ((xlator_t *)py_this)->private; - - priv->fops[GLUPY_LOOKUP] = (long)fop; -} - -void -set_lookup_cbk (long py_this, fop_lookup_cbk_t cbk) -{ - glupy_private_t *priv = ((xlator_t *)py_this)->private; - - priv->cbks[GLUPY_LOOKUP] = (long)cbk; -} - -/* FOP: CREATE */ - -int32_t -glupy_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - glupy_private_t *priv = this->private; - PyGILState_STATE gstate; - int32_t ret; - - if (!priv->cbks[GLUPY_CREATE]) { - goto unwind; - } - - gstate = glupy_enter(); - ret = ((fop_create_cbk_t)(priv->cbks[GLUPY_CREATE]))( - frame, cookie, this, op_ret, op_errno, - fd, inode, buf, preparent, postparent, xdata); - glupy_leave(gstate); - - return ret; - -unwind: - frame->local = NULL; - STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf, - preparent, postparent, xdata); - return 0; -} - -int32_t -glupy_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) -{ - glupy_private_t *priv = this->private; - PyGILState_STATE gstate; - int32_t ret; - static long next_id = 0; - - if (!priv->fops[GLUPY_CREATE]) { - goto wind; - } - - gstate = glupy_enter(); - frame->local = (void *)++next_id; - ret = ((fop_create_t)(priv->fops[GLUPY_CREATE]))( - frame, this, loc, flags, mode, umask, fd, xdata); - glupy_leave(gstate); - - return ret; - -wind: - STACK_WIND (frame, glupy_create_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, - fd, xdata); - return 0; -} - -void -wind_create (call_frame_t *frame, xlator_t *xl, loc_t *loc, int32_t flags, - mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) -{ - xlator_t *this = THIS; - - if (!xl || (xl == this)) { - xl = FIRST_CHILD(this); - } - - STACK_WIND (frame, glupy_create_cbk,xl, xl->fops->create, - loc, flags, mode, umask, fd, xdata); -} - -void -unwind_create (call_frame_t *frame, long cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - frame->local = NULL; - STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf, - preparent, postparent, xdata); -} - -void -set_create_fop (long py_this, fop_create_t fop) -{ - glupy_private_t *priv = ((xlator_t *)py_this)->private; - - priv->fops[GLUPY_CREATE] = (long)fop; -} - -void -set_create_cbk (long py_this, fop_create_cbk_t cbk) -{ - glupy_private_t *priv = ((xlator_t *)py_this)->private; - - priv->cbks[GLUPY_CREATE] = (long)cbk; -} - -/* FOP: OPEN */ - -int32_t -glupy_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) -{ - glupy_private_t *priv = this->private; - PyGILState_STATE gstate; - int32_t ret; - - if (!priv->cbks[GLUPY_OPEN]) { - goto unwind; - } - - gstate = glupy_enter(); - ret = ((fop_open_cbk_t)(priv->cbks[GLUPY_OPEN]))( - frame, cookie, this, op_ret, op_errno, - fd, xdata); - glupy_leave(gstate); - - return ret; - -unwind: - frame->local = NULL; - STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata); - return 0; -} - -int32_t -glupy_open (call_frame_t *frame, xlator_t *this, loc_t *loc, - int32_t flags, fd_t *fd, dict_t *xdata) -{ - glupy_private_t *priv = this->private; - PyGILState_STATE gstate; - int32_t ret; - static long next_id = 0; - - if (!priv->fops[GLUPY_OPEN]) { - goto wind; - } - - gstate = glupy_enter(); - frame->local = (void *)++next_id; - ret = ((fop_open_t)(priv->fops[GLUPY_OPEN]))( - frame, this, loc, flags, fd, xdata); - glupy_leave(gstate); - - return ret; - -wind: - STACK_WIND (frame, glupy_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); - return 0; -} - -void -wind_open (call_frame_t *frame, xlator_t *xl, loc_t *loc, int32_t flags, - fd_t *fd, dict_t *xdata) -{ - xlator_t *this = THIS; - - if (!xl || (xl == this)) { - xl = FIRST_CHILD(this); - } - - STACK_WIND (frame, glupy_open_cbk, xl, xl->fops->open, loc, flags, - fd, xdata); -} - -void -unwind_open (call_frame_t *frame, long cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) -{ - frame->local = NULL; - STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata); -} - -void -set_open_fop (long py_this, fop_open_t fop) -{ - glupy_private_t *priv = ((xlator_t *)py_this)->private; - priv->fops[GLUPY_OPEN] = (long)fop; -} - -void -set_open_cbk (long py_this, fop_open_cbk_t cbk) -{ - glupy_private_t *priv = ((xlator_t *)py_this)->private; - priv->cbks[GLUPY_OPEN] = (long)cbk; -} - -/* FOP: READV */ - -int32_t -glupy_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iovec *vector, - int32_t count, struct iatt *stbuf, struct iobref *iobref, - dict_t *xdata) -{ - glupy_private_t *priv = this->private; - PyGILState_STATE gstate; - int32_t ret; - - if (!priv->cbks[GLUPY_READV]) { - goto unwind; - } - - gstate = glupy_enter(); - ret = ((fop_readv_cbk_t)(priv->cbks[GLUPY_READV]))( - frame, cookie, this, op_ret, op_errno, - vector, count, stbuf, iobref, xdata); - glupy_leave(gstate); - - return ret; - -unwind: - frame->local = NULL; - STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, - count, stbuf, iobref, xdata); - return 0; -} - -int32_t -glupy_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, - size_t size, off_t offset, uint32_t flags, dict_t *xdata) -{ - glupy_private_t *priv = this->private; - PyGILState_STATE gstate; - int32_t ret; - static long next_id = 0; - - if (!priv->fops[GLUPY_READV]) { - goto wind; - } - - gstate = glupy_enter(); - frame->local = (void *)++next_id; - ret = ((fop_readv_t)(priv->fops[GLUPY_READV]))( - frame, this, fd, size, offset, flags, xdata); - glupy_leave(gstate); - - return ret; - -wind: - STACK_WIND (frame, glupy_readv_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readv, fd, size, offset, - flags, xdata); - return 0; -} - -void -wind_readv (call_frame_t *frame, xlator_t *xl, fd_t *fd, size_t size, - off_t offset, uint32_t flags, dict_t *xdata) -{ - xlator_t *this = THIS; - - if (!xl || (xl == this)) { - xl = FIRST_CHILD(this); - } - - STACK_WIND (frame, glupy_readv_cbk, xl, xl->fops->readv, fd, size, - offset, flags, xdata); -} - -void -unwind_readv (call_frame_t *frame, long cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iovec *vector, - int32_t count, struct iatt *stbuf, struct iobref *iobref, - dict_t *xdata) -{ - frame->local = NULL; - STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, - count, stbuf, iobref, xdata); -} - -void -set_readv_fop (long py_this, fop_readv_t fop) -{ - glupy_private_t *priv = ((xlator_t *)py_this)->private; - priv->fops[GLUPY_READV] = (long)fop; -} - -void -set_readv_cbk (long py_this, fop_readv_cbk_t cbk) -{ - glupy_private_t *priv = ((xlator_t *)py_this)->private; - priv->cbks[GLUPY_READV] = (long)cbk; -} - -/* FOP: WRITEV */ - -int32_t -glupy_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) -{ - glupy_private_t *priv = this->private; - PyGILState_STATE gstate; - int32_t ret; - - if (!priv->cbks[GLUPY_WRITEV]) { - goto unwind; - } - - gstate = glupy_enter(); - ret = ((fop_writev_cbk_t)(priv->cbks[GLUPY_WRITEV]))( - frame, cookie, this, op_ret, op_errno, - prebuf, postbuf, xdata); - glupy_leave(gstate); - - return ret; - -unwind: - frame->local = NULL; - STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, - postbuf, xdata); - return 0; -} - -int32_t -glupy_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iovec *vector, int32_t count, off_t offset, - uint32_t flags, struct iobref *iobref, dict_t *xdata) -{ - glupy_private_t *priv = this->private; - PyGILState_STATE gstate; - int32_t ret; - static long next_id = 0; - - if (!priv->fops[GLUPY_WRITEV]) { - goto wind; - } - - gstate = glupy_enter(); - frame->local = (void *)++next_id; - ret = ((fop_writev_t)(priv->fops[GLUPY_WRITEV]))( - frame, this, fd, vector, count, offset, flags, - iobref, xdata); - glupy_leave(gstate); - - return ret; - -wind: - STACK_WIND (frame, glupy_writev_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->writev, fd, vector, count, - offset, flags, iobref, xdata); - return 0; -} - -void -wind_writev (call_frame_t *frame, xlator_t *xl, fd_t *fd, struct iovec *vector, - int32_t count, off_t offset, uint32_t flags, struct iobref *iobref, - dict_t *xdata) -{ - xlator_t *this = THIS; - - if (!xl || (xl == this)) { - xl = FIRST_CHILD(this); - } - - STACK_WIND (frame, glupy_writev_cbk, xl, xl->fops->writev, fd, vector, - count, offset, flags, iobref, xdata); -} - -void -unwind_writev (call_frame_t *frame, long cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) -{ - frame->local = NULL; - STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, - postbuf, xdata); -} - -void -set_writev_fop (long py_this, fop_writev_t fop) -{ - glupy_private_t *priv = ((xlator_t *)py_this)->private; - priv->fops[GLUPY_WRITEV] = (long)fop; -} - -void -set_writev_cbk (long py_this, fop_writev_cbk_t cbk) -{ - glupy_private_t *priv = ((xlator_t *)py_this)->private; - priv->cbks[GLUPY_WRITEV] = (long)cbk; -} - - -/* FOP: OPENDIR */ - -int32_t -glupy_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd, - dict_t *xdata) -{ - glupy_private_t *priv = this->private; - PyGILState_STATE gstate; - int32_t ret; - - if (!priv->cbks[GLUPY_OPENDIR]) { - goto unwind; - } - - gstate = glupy_enter(); - ret = ((fop_opendir_cbk_t)(priv->cbks[GLUPY_OPENDIR]))( - frame, cookie, this, op_ret, op_errno, - fd, xdata); - glupy_leave(gstate); - - return ret; - -unwind: - frame->local = NULL; - STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, xdata); - return 0; -} - -int32_t -glupy_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, - fd_t *fd, dict_t *xdata) -{ - glupy_private_t *priv = this->private; - PyGILState_STATE gstate; - int32_t ret; - static long next_id = 0; - - if (!priv->fops[GLUPY_OPENDIR]) { - goto wind; - } - - gstate = glupy_enter(); - frame->local = (void *)++next_id; - ret = ((fop_opendir_t)(priv->fops[GLUPY_OPENDIR]))( - frame, this, loc, fd, xdata); - glupy_leave(gstate); - - return ret; - -wind: - STACK_WIND (frame, glupy_opendir_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->opendir, loc, fd, xdata); - return 0; -} - -void -wind_opendir (call_frame_t *frame, xlator_t *xl, loc_t *loc, fd_t *fd, dict_t *xdata) -{ - xlator_t *this = THIS; - - if (!xl || (xl == this)) { - xl = FIRST_CHILD(this); - } - - STACK_WIND(frame,glupy_opendir_cbk,xl,xl->fops->opendir,loc,fd,xdata); -} - -void -unwind_opendir (call_frame_t *frame, long cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) -{ - frame->local = NULL; - STACK_UNWIND_STRICT(opendir,frame,op_ret,op_errno, - fd,xdata); -} - -void -set_opendir_fop (long py_this, fop_opendir_t fop) -{ - glupy_private_t *priv = ((xlator_t *)py_this)->private; - - priv->fops[GLUPY_OPENDIR] = (long)fop; -} - -void -set_opendir_cbk (long py_this, fop_opendir_cbk_t cbk) -{ - glupy_private_t *priv = ((xlator_t *)py_this)->private; - - priv->cbks[GLUPY_OPENDIR] = (long)cbk; -} - -/* FOP: READDIR */ - -int32_t -glupy_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, - dict_t *xdata) -{ - glupy_private_t *priv = this->private; - PyGILState_STATE gstate; - int32_t ret; - - if (!priv->cbks[GLUPY_READDIR]) { - goto unwind; - } - - gstate = glupy_enter(); - ret = ((fop_readdir_cbk_t)(priv->cbks[GLUPY_READDIR]))( - frame, cookie, this, op_ret, op_errno, - entries, xdata); - glupy_leave(gstate); - - return ret; - -unwind: - frame->local = NULL; - STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, entries, - xdata); - return 0; -} - -int32_t -glupy_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, - size_t size, off_t offset, dict_t *xdata) -{ - glupy_private_t *priv = this->private; - PyGILState_STATE gstate; - int32_t ret; - static long next_id = 0; - - if (!priv->fops[GLUPY_READDIR]) { - goto wind; - } - - gstate = glupy_enter(); - frame->local = (void *)++next_id; - ret = ((fop_readdir_t)(priv->fops[GLUPY_READDIR]))( - frame, this, fd, size, offset, xdata); - glupy_leave(gstate); - - return ret; - -wind: - STACK_WIND (frame, glupy_readdir_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readdir,fd, size, offset, xdata); - return 0; -} - -void -wind_readdir(call_frame_t *frame, xlator_t *xl, fd_t *fd, size_t size, - off_t offset, dict_t *xdata) -{ - xlator_t *this = THIS; - - if (!xl || (xl == this)) { - xl = FIRST_CHILD(this); - } - - STACK_WIND(frame,glupy_readdir_cbk,xl,xl->fops->readdir,fd,size,offset,xdata); -} - -void -unwind_readdir (call_frame_t *frame, long cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, - dict_t *xdata) -{ - frame->local = NULL; - STACK_UNWIND_STRICT(readdir,frame,op_ret,op_errno, - entries, xdata); -} - -void -set_readdir_fop (long py_this, fop_readdir_t fop) -{ - glupy_private_t *priv = ((xlator_t *)py_this)->private; - - priv->fops[GLUPY_READDIR] = (long)fop; -} - -void -set_readdir_cbk (long py_this, fop_readdir_cbk_t cbk) -{ - glupy_private_t *priv = ((xlator_t *)py_this)->private; - - priv->cbks[GLUPY_READDIR] = (long)cbk; -} - - -/* FOP: READDIRP */ - -int32_t -glupy_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, - dict_t *xdata) -{ - glupy_private_t *priv = this->private; - PyGILState_STATE gstate; - int32_t ret; - - if (!priv->cbks[GLUPY_READDIRP]) { - goto unwind; - } - - gstate = glupy_enter(); - ret = ((fop_readdirp_cbk_t)(priv->cbks[GLUPY_READDIRP]))( - frame, cookie, this, op_ret, op_errno, - entries, xdata); - glupy_leave(gstate); - - return ret; - -unwind: - frame->local = NULL; - STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, - xdata); - return 0; -} - -int32_t -glupy_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, - size_t size, off_t offset, dict_t *xdata) -{ - glupy_private_t *priv = this->private; - PyGILState_STATE gstate; - int32_t ret; - static long next_id = 0; - - if (!priv->fops[GLUPY_READDIRP]) { - goto wind; - } - - gstate = glupy_enter(); - frame->local = (void *)++next_id; - ret = ((fop_readdirp_t)(priv->fops[GLUPY_READDIRP]))( - frame, this, fd, size, offset, xdata); - glupy_leave(gstate); - - return ret; - -wind: - STACK_WIND (frame, glupy_readdirp_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readdirp,fd, size, offset, xdata); - return 0; -} - -void -wind_readdirp (call_frame_t *frame, xlator_t *xl, fd_t *fd, size_t size, - off_t offset, dict_t *xdata) -{ - xlator_t *this = THIS; - - if (!xl || (xl == this)) { - xl = FIRST_CHILD(this); - } - - STACK_WIND(frame,glupy_readdirp_cbk,xl,xl->fops->readdirp,fd,size,offset,xdata); -} - -void -unwind_readdirp (call_frame_t *frame, long cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, - dict_t *xdata) -{ - frame->local = NULL; - STACK_UNWIND_STRICT(readdirp,frame,op_ret,op_errno, - entries, xdata); -} - -void -set_readdirp_fop (long py_this, fop_readdirp_t fop) -{ - glupy_private_t *priv = ((xlator_t *)py_this)->private; - - priv->fops[GLUPY_READDIRP] = (long)fop; -} - -void -set_readdirp_cbk (long py_this, fop_readdirp_cbk_t cbk) -{ - glupy_private_t *priv = ((xlator_t *)py_this)->private; - - priv->cbks[GLUPY_READDIRP] = (long)cbk; -} - - -/* FOP:STAT */ - -int32_t -glupy_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata) -{ - glupy_private_t *priv = this->private; - PyGILState_STATE gstate; - int32_t ret; - - if (!priv->cbks[GLUPY_STAT]) { - goto unwind; - } - - gstate = glupy_enter(); - ret = ((fop_stat_cbk_t)(priv->cbks[GLUPY_STAT]))( - frame, cookie, this, op_ret, op_errno, - buf, xdata); - glupy_leave(gstate); - - return ret; - -unwind: - frame->local = NULL; - STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf, xdata); - return 0; -} - -int32_t -glupy_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, - dict_t *xdata) -{ - glupy_private_t *priv = this->private; - PyGILState_STATE gstate; - int32_t ret; - static long next_id = 0; - - if (!priv->fops[GLUPY_STAT]) { - goto wind; - } - - gstate = glupy_enter(); - frame->local = (void *)++next_id; - ret = ((fop_stat_t)(priv->fops[GLUPY_STAT]))( - frame, this, loc, xdata); - glupy_leave(gstate); - - return ret; - -wind: - STACK_WIND (frame, glupy_stat_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->stat, loc, xdata); - return 0; -} - -void -wind_stat (call_frame_t *frame, xlator_t *xl, loc_t *loc, dict_t *xdata) -{ - xlator_t *this = THIS; - - if (!xl || (xl == this)) { - xl = FIRST_CHILD(this); - } - - STACK_WIND(frame,glupy_stat_cbk,xl,xl->fops->stat,loc,xdata); -} - -void -unwind_stat (call_frame_t *frame, long cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, - dict_t *xdata) -{ - frame->local = NULL; - STACK_UNWIND_STRICT(stat,frame,op_ret,op_errno, - buf,xdata); -} - -void -set_stat_fop (long py_this, fop_stat_t fop) -{ - glupy_private_t *priv = ((xlator_t *)py_this)->private; - - priv->fops[GLUPY_STAT] = (long)fop; -} - -void -set_stat_cbk (long py_this, fop_stat_cbk_t cbk) -{ - glupy_private_t *priv = ((xlator_t *)py_this)->private; - - priv->cbks[GLUPY_STAT] = (long)cbk; -} - - -/* FOP: FSTAT */ - -int32_t -glupy_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata) -{ - glupy_private_t *priv = this->private; - PyGILState_STATE gstate; - int32_t ret; - - if (!priv->cbks[GLUPY_FSTAT]) { - goto unwind; - } - - gstate = glupy_enter(); - ret = ((fop_fstat_cbk_t)(priv->cbks[GLUPY_FSTAT]))( - frame, cookie, this, op_ret, op_errno, - buf, xdata); - glupy_leave(gstate); - - return ret; - -unwind: - frame->local = NULL; - STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf, xdata); - return 0; -} - -int32_t -glupy_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, - dict_t *xdata) -{ - glupy_private_t *priv = this->private; - PyGILState_STATE gstate; - int32_t ret; - static long next_id = 0; - - if (!priv->fops[GLUPY_FSTAT]) { - goto wind; - } - - gstate = glupy_enter(); - frame->local = (void *)++next_id; - ret = ((fop_fstat_t)(priv->fops[GLUPY_FSTAT]))( - frame, this, fd, xdata); - glupy_leave(gstate); - - return ret; - -wind: - STACK_WIND (frame, glupy_fstat_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fstat, fd, xdata); - return 0; -} - -void -wind_fstat (call_frame_t *frame, xlator_t *xl, fd_t *fd, dict_t *xdata) -{ - xlator_t *this = THIS; - - if (!xl || (xl == this)) { - xl = FIRST_CHILD(this); - } - - STACK_WIND(frame,glupy_fstat_cbk,xl,xl->fops->fstat,fd,xdata); -} - -void -unwind_fstat (call_frame_t *frame, long cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, - dict_t *xdata) -{ - frame->local = NULL; - STACK_UNWIND_STRICT(fstat,frame,op_ret,op_errno, - buf,xdata); -} - -void -set_fstat_fop (long py_this, fop_fstat_t fop) -{ - glupy_private_t *priv = ((xlator_t *)py_this)->private; - - priv->fops[GLUPY_FSTAT] = (long)fop; -} - -void -set_fstat_cbk (long py_this, fop_fstat_cbk_t cbk) -{ - glupy_private_t *priv = ((xlator_t *)py_this)->private; - - priv->cbks[GLUPY_FSTAT] = (long)cbk; -} - -/* FOP:STATFS */ - -int32_t -glupy_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct statvfs *buf, dict_t *xdata) -{ - glupy_private_t *priv = this->private; - PyGILState_STATE gstate; - int32_t ret; - - if (!priv->cbks[GLUPY_STATFS]) { - goto unwind; - } - - gstate = glupy_enter(); - ret = ((fop_statfs_cbk_t)(priv->cbks[GLUPY_STATFS]))( - frame, cookie, this, op_ret, op_errno, - buf, xdata); - glupy_leave(gstate); - - return ret; - -unwind: - frame->local = NULL; - STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf, xdata); - return 0; -} - -int32_t -glupy_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, - dict_t *xdata) -{ - glupy_private_t *priv = this->private; - PyGILState_STATE gstate; - int32_t ret; - static long next_id = 0; - - if (!priv->fops[GLUPY_STATFS]) { - goto wind; - } - - gstate = glupy_enter(); - frame->local = (void *)++next_id; - ret = ((fop_statfs_t)(priv->fops[GLUPY_STATFS]))( - frame, this, loc, xdata); - glupy_leave(gstate); - - return ret; - -wind: - STACK_WIND (frame, glupy_statfs_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->statfs, loc, xdata); - return 0; -} - -void -wind_statfs (call_frame_t *frame, xlator_t *xl, loc_t *loc, dict_t *xdata) -{ - xlator_t *this = THIS; - - if (!xl || (xl == this)) { - xl = FIRST_CHILD(this); - } - - STACK_WIND(frame,glupy_statfs_cbk,xl,xl->fops->statfs,loc,xdata); -} - -void -unwind_statfs (call_frame_t *frame, long cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct statvfs *buf, - dict_t *xdata) -{ - frame->local = NULL; - STACK_UNWIND_STRICT(statfs,frame,op_ret,op_errno, - buf,xdata); -} - -void -set_statfs_fop (long py_this, fop_statfs_t fop) -{ - glupy_private_t *priv = ((xlator_t *)py_this)->private; - - priv->fops[GLUPY_STATFS] = (long)fop; -} - -void -set_statfs_cbk (long py_this, fop_statfs_cbk_t cbk) -{ - glupy_private_t *priv = ((xlator_t *)py_this)->private; - - priv->cbks[GLUPY_STATFS] = (long)cbk; -} - - -/* FOP: SETXATTR */ - -int32_t -glupy_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - glupy_private_t *priv = this->private; - PyGILState_STATE gstate; - int32_t ret; - - if (!priv->cbks[GLUPY_SETXATTR]) { - goto unwind; - } - - gstate = glupy_enter(); - ret = ((fop_setxattr_cbk_t)(priv->cbks[GLUPY_SETXATTR]))( - frame, cookie, this, op_ret, op_errno, - xdata); - glupy_leave(gstate); - - return ret; - -unwind: - frame->local = NULL; - STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata); - return 0; -} - -int32_t -glupy_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - dict_t *dict, int32_t flags, dict_t *xdata) -{ - glupy_private_t *priv = this->private; - PyGILState_STATE gstate; - int32_t ret; - static long next_id = 0; - - if (!priv->fops[GLUPY_SETXATTR]) { - goto wind; - } - - gstate = glupy_enter(); - frame->local = (void *)++next_id; - ret = ((fop_setxattr_t)(priv->fops[GLUPY_SETXATTR]))( - frame, this, loc, dict, flags, xdata); - glupy_leave(gstate); - - return ret; - -wind: - STACK_WIND (frame, glupy_setxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setxattr, loc, dict, - flags, xdata); - return 0; -} - -void -wind_setxattr (call_frame_t *frame, xlator_t *xl, loc_t *loc, - dict_t *dict, int32_t flags, dict_t *xdata) -{ - xlator_t *this = THIS; - - if (!xl || (xl == this)) { - xl = FIRST_CHILD(this); - } - - STACK_WIND (frame, glupy_setxattr_cbk, xl, xl->fops->setxattr, - loc, dict, flags, xdata); -} - - -void -unwind_setxattr (call_frame_t *frame, long cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - frame->local = NULL; - STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata); - -} - -void -set_setxattr_fop (long py_this, fop_setxattr_t fop) -{ - glupy_private_t *priv = ((xlator_t *)py_this)->private; - - priv->fops[GLUPY_SETXATTR] = (long)fop; -} - -void -set_setxattr_cbk (long py_this, fop_setxattr_cbk_t cbk) -{ - glupy_private_t *priv = ((xlator_t *)py_this)->private; - - priv->cbks[GLUPY_SETXATTR] = (long)cbk; -} - -/* FOP: GETXATTR */ - -int32_t -glupy_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict, - dict_t *xdata) -{ - glupy_private_t *priv = this->private; - PyGILState_STATE gstate; - int32_t ret; - - if (!priv->cbks[GLUPY_GETXATTR]) { - goto unwind; - } - - gstate = glupy_enter(); - ret = ((fop_getxattr_cbk_t)(priv->cbks[GLUPY_GETXATTR]))( - frame, cookie, this, op_ret, op_errno, dict, - xdata); - glupy_leave(gstate); - - return ret; - -unwind: - frame->local = NULL; - STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, - xdata); - return 0; -} - -int32_t -glupy_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *name, dict_t *xdata) -{ - glupy_private_t *priv = this->private; - PyGILState_STATE gstate; - int32_t ret; - static long next_id = 0; - - if (!priv->fops[GLUPY_GETXATTR]) { - goto wind; - } - - gstate = glupy_enter(); - frame->local = (void *)++next_id; - ret = ((fop_getxattr_t)(priv->fops[GLUPY_GETXATTR]))( - frame, this, loc, name, xdata); - glupy_leave(gstate); - - return ret; - -wind: - STACK_WIND (frame, glupy_getxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->getxattr, loc, name, - xdata); - return 0; -} - -void -wind_getxattr (call_frame_t *frame, xlator_t *xl, loc_t *loc, - const char *name, dict_t *xdata) -{ - xlator_t *this = THIS; - - if (!xl || (xl == this)) { - xl = FIRST_CHILD(this); - } - - STACK_WIND (frame, glupy_getxattr_cbk, xl, xl->fops->getxattr, - loc, name, xdata); -} - - -void -unwind_getxattr (call_frame_t *frame, long cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict, - dict_t *xdata) -{ - frame->local = NULL; - STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, - xdata); - -} - - -void -set_getxattr_fop (long py_this, fop_getxattr_t fop) -{ - glupy_private_t *priv = ((xlator_t *)py_this)->private; - - priv->fops[GLUPY_GETXATTR] = (long)fop; -} - - -void -set_getxattr_cbk (long py_this, fop_getxattr_cbk_t cbk) -{ - glupy_private_t *priv = ((xlator_t *)py_this)->private; - - priv->cbks[GLUPY_GETXATTR] = (long)cbk; -} - -/* FOP: FSETXATTR */ - -int32_t -glupy_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - glupy_private_t *priv = this->private; - PyGILState_STATE gstate; - int32_t ret; - - if (!priv->cbks[GLUPY_FSETXATTR]) { - goto unwind; - } - - gstate = glupy_enter(); - ret = ((fop_fsetxattr_cbk_t)(priv->cbks[GLUPY_FSETXATTR]))( - frame, cookie, this, op_ret, op_errno, - xdata); - glupy_leave(gstate); - - return ret; - -unwind: - frame->local = NULL; - STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata); - return 0; -} - -int32_t -glupy_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, - dict_t *dict, int32_t flags, dict_t *xdata) -{ - glupy_private_t *priv = this->private; - PyGILState_STATE gstate; - int32_t ret; - static long next_id = 0; - - if (!priv->fops[GLUPY_FSETXATTR]) { - goto wind; - } - - gstate = glupy_enter(); - frame->local = (void *)++next_id; - ret = ((fop_fsetxattr_t)(priv->fops[GLUPY_FSETXATTR]))( - frame, this, fd, dict, flags, xdata); - glupy_leave(gstate); - - return ret; - -wind: - STACK_WIND (frame, glupy_fsetxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsetxattr, fd, dict, - flags, xdata); - return 0; -} - -void -wind_fsetxattr (call_frame_t *frame, xlator_t *xl, fd_t *fd, - dict_t *dict, int32_t flags, dict_t *xdata) -{ - xlator_t *this = THIS; - - if (!xl || (xl == this)) { - xl = FIRST_CHILD(this); - } - - STACK_WIND (frame, glupy_fsetxattr_cbk, xl, xl->fops->fsetxattr, - fd, dict, flags, xdata); -} - - -void -unwind_fsetxattr (call_frame_t *frame, long cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - frame->local = NULL; - STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata); - -} - -void -set_fsetxattr_fop (long py_this, fop_fsetxattr_t fop) -{ - glupy_private_t *priv = ((xlator_t *)py_this)->private; - - priv->fops[GLUPY_FSETXATTR] = (long)fop; -} - -void -set_fsetxattr_cbk (long py_this, fop_fsetxattr_cbk_t cbk) -{ - glupy_private_t *priv = ((xlator_t *)py_this)->private; - - priv->cbks[GLUPY_FSETXATTR] = (long)cbk; -} - -/* FOP: FGETXATTR */ - -int32_t -glupy_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict, - dict_t *xdata) -{ - glupy_private_t *priv = this->private; - PyGILState_STATE gstate; - int32_t ret; - - if (!priv->cbks[GLUPY_FGETXATTR]) { - goto unwind; - } - - gstate = glupy_enter(); - ret = ((fop_fgetxattr_cbk_t)(priv->cbks[GLUPY_FGETXATTR]))( - frame, cookie, this, op_ret, op_errno, dict, - xdata); - glupy_leave(gstate); - - return ret; - -unwind: - frame->local = NULL; - STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, - xdata); - return 0; -} - -int32_t -glupy_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, - const char *name, dict_t *xdata) -{ - glupy_private_t *priv = this->private; - PyGILState_STATE gstate; - int32_t ret; - static long next_id = 0; - - if (!priv->fops[GLUPY_FGETXATTR]) { - goto wind; - } - - gstate = glupy_enter(); - frame->local = (void *)++next_id; - ret = ((fop_fgetxattr_t)(priv->fops[GLUPY_FGETXATTR]))( - frame, this, fd, name, xdata); - glupy_leave(gstate); - - return ret; - -wind: - STACK_WIND (frame, glupy_fgetxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fgetxattr, fd, name, - xdata); - return 0; -} - -void -wind_fgetxattr (call_frame_t *frame, xlator_t *xl, fd_t *fd, - const char *name, dict_t *xdata) -{ - xlator_t *this = THIS; - - if (!xl || (xl == this)) { - xl = FIRST_CHILD(this); - } - - STACK_WIND (frame, glupy_fgetxattr_cbk, xl, xl->fops->fgetxattr, - fd, name, xdata); -} - - -void -unwind_fgetxattr (call_frame_t *frame, long cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict, - dict_t *xdata) -{ - frame->local = NULL; - STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, - xdata); - -} - - -void -set_fgetxattr_fop (long py_this, fop_fgetxattr_t fop) -{ - glupy_private_t *priv = ((xlator_t *)py_this)->private; - - priv->fops[GLUPY_FGETXATTR] = (long)fop; -} - - -void -set_fgetxattr_cbk (long py_this, fop_fgetxattr_cbk_t cbk) -{ - glupy_private_t *priv = ((xlator_t *)py_this)->private; - - priv->cbks[GLUPY_FGETXATTR] = (long)cbk; -} - -/* FOP:REMOVEXATTR */ - -int32_t -glupy_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - glupy_private_t *priv = this->private; - PyGILState_STATE gstate; - int32_t ret; - - if (!priv->cbks[GLUPY_REMOVEXATTR]) { - goto unwind; - } - - gstate = glupy_enter(); - ret = ((fop_removexattr_cbk_t)(priv->cbks[GLUPY_REMOVEXATTR]))( - frame, cookie, this, op_ret, op_errno, xdata); - glupy_leave(gstate); - - return ret; - -unwind: - frame->local = NULL; - STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, xdata); - return 0; -} - -int32_t -glupy_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *name, dict_t *xdata) -{ - glupy_private_t *priv = this->private; - PyGILState_STATE gstate; - int32_t ret; - static long next_id = 0; - - if (!priv->fops[GLUPY_REMOVEXATTR]) { - goto wind; - } - - gstate = glupy_enter(); - frame->local = (void *)++next_id; - ret = ((fop_removexattr_t)(priv->fops[GLUPY_REMOVEXATTR]))( - frame, this, loc, name, xdata); - glupy_leave(gstate); - - return ret; - -wind: - STACK_WIND (frame, glupy_removexattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->removexattr, loc, name, - xdata); - return 0; -} - -void -wind_removexattr (call_frame_t *frame, xlator_t *xl, loc_t *loc, - const char *name, dict_t *xdata) -{ - xlator_t *this = THIS; - - if (!xl || (xl == this)) { - xl = FIRST_CHILD(this); - } - - STACK_WIND (frame, glupy_removexattr_cbk, xl, xl->fops->removexattr, - loc, name, xdata); -} - - -void -unwind_removexattr (call_frame_t *frame, long cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - frame->local = NULL; - STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, xdata); - -} - -void -set_removexattr_fop (long py_this, fop_removexattr_t fop) -{ - glupy_private_t *priv = ((xlator_t *)py_this)->private; - - priv->fops[GLUPY_REMOVEXATTR] = (long)fop; -} - -void -set_removexattr_cbk (long py_this, fop_removexattr_cbk_t cbk) -{ - glupy_private_t *priv = ((xlator_t *)py_this)->private; - - priv->cbks[GLUPY_REMOVEXATTR] = (long)cbk; -} - - -/* FOP:FREMOVEXATTR */ - -int32_t -glupy_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - glupy_private_t *priv = this->private; - PyGILState_STATE gstate; - int32_t ret; - - if (!priv->cbks[GLUPY_FREMOVEXATTR]) { - goto unwind; - } - - gstate = glupy_enter(); - ret = ((fop_fremovexattr_cbk_t)(priv->cbks[GLUPY_FREMOVEXATTR]))( - frame, cookie, this, op_ret, op_errno, xdata); - glupy_leave(gstate); - - return ret; - -unwind: - frame->local = NULL; - STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, xdata); - return 0; -} - -int32_t -glupy_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd, - const char *name, dict_t *xdata) -{ - glupy_private_t *priv = this->private; - PyGILState_STATE gstate; - int32_t ret; - static long next_id = 0; - - if (!priv->fops[GLUPY_FREMOVEXATTR]) { - goto wind; - } - - gstate = glupy_enter(); - frame->local = (void *)++next_id; - ret = ((fop_fremovexattr_t)(priv->fops[GLUPY_FREMOVEXATTR]))( - frame, this, fd, name, xdata); - glupy_leave(gstate); - - return ret; - -wind: - STACK_WIND (frame, glupy_fremovexattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fremovexattr, fd, name, - xdata); - return 0; -} - -void -wind_fremovexattr (call_frame_t *frame, xlator_t *xl, fd_t *fd, - const char *name, dict_t *xdata) -{ - xlator_t *this = THIS; - - if (!xl || (xl == this)) { - xl = FIRST_CHILD(this); - } - - STACK_WIND (frame, glupy_fremovexattr_cbk, xl, xl->fops->fremovexattr, - fd, name, xdata); -} - - -void -unwind_fremovexattr (call_frame_t *frame, long cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - frame->local = NULL; - STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, xdata); - -} - -void -set_fremovexattr_fop (long py_this, fop_fremovexattr_t fop) -{ - glupy_private_t *priv = ((xlator_t *)py_this)->private; - - priv->fops[GLUPY_FREMOVEXATTR] = (long)fop; -} - -void -set_fremovexattr_cbk (long py_this, fop_fremovexattr_cbk_t cbk) -{ - glupy_private_t *priv = ((xlator_t *)py_this)->private; - - priv->cbks[GLUPY_FREMOVEXATTR] = (long)cbk; -} - - -/* FOP: LINK*/ -int32_t -glupy_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - glupy_private_t *priv = this->private; - PyGILState_STATE gstate; - int32_t ret; - - if (!priv->cbks[GLUPY_LINK]) { - goto unwind; - } - - gstate = glupy_enter(); - ret = ((fop_link_cbk_t)(priv->cbks[GLUPY_LINK]))( - frame, cookie, this, op_ret, op_errno, - inode, buf, preparent, postparent, xdata); - glupy_leave(gstate); - - return ret; - -unwind: - frame->local = NULL; - STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, buf, - preparent, postparent, xdata); - return 0; -} - -int32_t -glupy_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, - dict_t *xdata) -{ - glupy_private_t *priv = this->private; - PyGILState_STATE gstate; - int32_t ret; - static long next_id = 0; - - if (!priv->fops[GLUPY_LINK]) { - goto wind; - } - - gstate = glupy_enter(); - frame->local = (void *)++next_id; - ret = ((fop_link_t)(priv->fops[GLUPY_LINK]))( - frame, this, oldloc, newloc, xdata); - glupy_leave(gstate); - - return ret; - -wind: - STACK_WIND (frame, glupy_link_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->link, oldloc, newloc, - xdata); - return 0; -} - -void -wind_link (call_frame_t *frame, xlator_t *xl, loc_t *oldloc, loc_t *newloc, - dict_t *xdata) -{ - xlator_t *this = THIS; - - if (!xl || (xl == this)) { - xl = FIRST_CHILD(this); - } - - STACK_WIND (frame, glupy_link_cbk, xl, xl->fops->link, - oldloc, newloc, xdata); -} - -void -unwind_link (call_frame_t *frame, long cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - frame->local = NULL; - STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, buf, - preparent, postparent, xdata); -} - -void -set_link_fop (long py_this, fop_link_t fop) -{ - glupy_private_t *priv = ((xlator_t *)py_this)->private; - - priv->fops[GLUPY_LINK] = (long)fop; -} - -void -set_link_cbk (long py_this, fop_link_cbk_t cbk) -{ - glupy_private_t *priv = ((xlator_t *)py_this)->private; - - priv->cbks[GLUPY_LINK] = (long)cbk; -} - -/* FOP: SYMLINK*/ -int32_t -glupy_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - glupy_private_t *priv = this->private; - PyGILState_STATE gstate; - int32_t ret; - - if (!priv->cbks[GLUPY_SYMLINK]) { - goto unwind; - } - - gstate = glupy_enter(); - ret = ((fop_symlink_cbk_t)(priv->cbks[GLUPY_SYMLINK]))( - frame, cookie, this, op_ret, op_errno, - inode, buf, preparent, postparent, xdata); - glupy_leave(gstate); - - return ret; - -unwind: - frame->local = NULL; - STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf, - preparent, postparent, xdata); - return 0; -} - -int32_t -glupy_symlink(call_frame_t *frame, xlator_t *this, const char *linkname, - loc_t *loc, mode_t umask, dict_t *xdata) -{ - glupy_private_t *priv = this->private; - PyGILState_STATE gstate; - int32_t ret; - static long next_id = 0; - - if (!priv->fops[GLUPY_SYMLINK]) { - goto wind; - } - - gstate = glupy_enter(); - frame->local = (void *)++next_id; - ret = ((fop_symlink_t)(priv->fops[GLUPY_SYMLINK]))( - frame, this, linkname, loc, umask, xdata); - glupy_leave(gstate); - - return ret; - -wind: - STACK_WIND (frame, glupy_symlink_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->symlink, linkname, loc, - umask, xdata); - return 0; -} - -void -wind_symlink (call_frame_t *frame, xlator_t *xl, const char *linkname, - loc_t *loc, mode_t umask, dict_t *xdata) -{ - xlator_t *this = THIS; - - if (!xl || (xl == this)) { - xl = FIRST_CHILD(this); - } - - STACK_WIND (frame, glupy_symlink_cbk, xl, xl->fops->symlink, - linkname, loc, umask, xdata); -} - -void -unwind_symlink (call_frame_t *frame, long cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - frame->local = NULL; - STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf, - preparent, postparent, xdata); -} - -void -set_symlink_fop (long py_this, fop_symlink_t fop) -{ - glupy_private_t *priv = ((xlator_t *)py_this)->private; - - priv->fops[GLUPY_SYMLINK] = (long)fop; -} - -void -set_symlink_cbk (long py_this, fop_symlink_cbk_t cbk) -{ - glupy_private_t *priv = ((xlator_t *)py_this)->private; - - priv->cbks[GLUPY_SYMLINK] = (long)cbk; -} - - -/* FOP: READLINK */ -int32_t -glupy_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, const char *path, - struct iatt *buf, dict_t *xdata) -{ - glupy_private_t *priv = this->private; - PyGILState_STATE gstate; - int32_t ret; - - if (!priv->cbks[GLUPY_READLINK]) { - goto unwind; - } - - gstate = glupy_enter(); - ret = ((fop_readlink_cbk_t)(priv->cbks[GLUPY_READLINK]))( - frame, cookie, this, op_ret, op_errno, - path, buf, xdata); - glupy_leave(gstate); - - return ret; - -unwind: - frame->local = NULL; - STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, path, - buf, xdata); - return 0; -} - -int32_t -glupy_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, - size_t size, dict_t *xdata) -{ - glupy_private_t *priv = this->private; - PyGILState_STATE gstate; - int32_t ret; - static long next_id = 0; - - if (!priv->fops[GLUPY_READLINK]) { - goto wind; - } - - gstate = glupy_enter(); - frame->local = (void *)++next_id; - ret = ((fop_readlink_t)(priv->fops[GLUPY_READLINK]))( - frame, this, loc, size, xdata); - glupy_leave(gstate); - - return ret; - -wind: - STACK_WIND (frame, glupy_readlink_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readlink, loc, - size, xdata); - return 0; -} - -void -wind_readlink (call_frame_t *frame, xlator_t *xl, loc_t *loc, - size_t size, dict_t *xdata) -{ - xlator_t *this = THIS; - - if (!xl || (xl == this)) { - xl = FIRST_CHILD(this); - } - - STACK_WIND (frame, glupy_readlink_cbk, xl, xl->fops->readlink, - loc, size, xdata); -} - -void -unwind_readlink (call_frame_t *frame, long cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, const char *path, - struct iatt *buf, dict_t *xdata) -{ - frame->local = NULL; - STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, path, buf, - xdata); -} - -void -set_readlink_fop (long py_this, fop_readlink_t fop) -{ - glupy_private_t *priv = ((xlator_t *)py_this)->private; - - priv->fops[GLUPY_READLINK] = (long)fop; -} - -void -set_readlink_cbk (long py_this, fop_readlink_cbk_t cbk) -{ - glupy_private_t *priv = ((xlator_t *)py_this)->private; - - priv->cbks[GLUPY_READLINK] = (long)cbk; -} - - -/* FOP: UNLINK */ - -int32_t -glupy_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *preparent, struct iatt *postparent, - dict_t *xdata) -{ - glupy_private_t *priv = this->private; - PyGILState_STATE gstate; - int32_t ret; - - if (!priv->cbks[GLUPY_UNLINK]) { - goto unwind; - } - - gstate = glupy_enter(); - ret = ((fop_unlink_cbk_t)(priv->cbks[GLUPY_UNLINK]))( - frame, cookie, this, op_ret, op_errno, - preparent, postparent, xdata); - glupy_leave(gstate); - - return ret; - -unwind: - frame->local = NULL; - STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno, preparent, - postparent, xdata); - return 0; -} - -int32_t -glupy_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, - int xflags, dict_t *xdata) -{ - glupy_private_t *priv = this->private; - PyGILState_STATE gstate; - int32_t ret; - static long next_id = 0; - - if (!priv->fops[GLUPY_UNLINK]) { - goto wind; - } - - gstate = glupy_enter(); - frame->local = (void *)++next_id; - ret = ((fop_unlink_t)(priv->fops[GLUPY_UNLINK]))( - frame, this, loc, xflags, xdata); - glupy_leave(gstate); - - return ret; - -wind: - STACK_WIND (frame, glupy_unlink_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, loc, - xflags, xdata); - return 0; -} - -void -wind_unlink (call_frame_t *frame, xlator_t *xl, loc_t *loc, - int xflags, dict_t *xdata) -{ - xlator_t *this = THIS; - - if (!xl || (xl == this)) { - xl = FIRST_CHILD(this); - } - - STACK_WIND (frame, glupy_unlink_cbk, xl, xl->fops->unlink, - loc, xflags, xdata); -} - -void -unwind_unlink (call_frame_t *frame, long cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *preparent, struct iatt *postparent, - dict_t *xdata) -{ - frame->local = NULL; - STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno, - preparent, postparent, xdata); -} - -void -set_unlink_fop (long py_this, fop_unlink_t fop) -{ - glupy_private_t *priv = ((xlator_t *)py_this)->private; - - priv->fops[GLUPY_UNLINK] = (long)fop; -} - -void -set_unlink_cbk (long py_this, fop_unlink_cbk_t cbk) -{ - glupy_private_t *priv = ((xlator_t *)py_this)->private; - - priv->cbks[GLUPY_UNLINK] = (long)cbk; -} - - -/* FOP: MKDIR */ - -int32_t -glupy_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - glupy_private_t *priv = this->private; - PyGILState_STATE gstate; - int32_t ret; - - if (!priv->cbks[GLUPY_MKDIR]) { - goto unwind; - } - - gstate = glupy_enter(); - ret = ((fop_mkdir_cbk_t)(priv->cbks[GLUPY_MKDIR]))( - frame, cookie, this, op_ret, op_errno, - inode, buf, preparent, postparent, xdata); - glupy_leave(gstate); - - return ret; - -unwind: - frame->local = NULL; - STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, inode, buf, - preparent, postparent, xdata); - return 0; -} - -int32_t -glupy_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, - mode_t umask, dict_t *xdata) -{ - glupy_private_t *priv = this->private; - PyGILState_STATE gstate; - int32_t ret; - static long next_id = 0; - - if (!priv->fops[GLUPY_MKDIR]) { - goto wind; - } - - gstate = glupy_enter(); - frame->local = (void *)++next_id; - ret = ((fop_mkdir_t)(priv->fops[GLUPY_MKDIR]))( - frame, this, loc, mode, umask, xdata); - glupy_leave(gstate); - - return ret; - -wind: - STACK_WIND (frame, glupy_mkdir_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, - xdata); - return 0; -} - -void -wind_mkdir (call_frame_t *frame, xlator_t *xl, loc_t *loc, mode_t mode, - mode_t umask, dict_t *xdata) -{ - - xlator_t *this = THIS; - - if (!xl || (xl == this)) { - xl = FIRST_CHILD(this); - } - - STACK_WIND (frame, glupy_mkdir_cbk, xl, xl->fops->mkdir, - loc, mode, umask, xdata); -} - -void -unwind_mkdir (call_frame_t *frame, long cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - frame->local = NULL; - STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, inode, buf, - preparent, postparent, xdata); -} - -void -set_mkdir_fop (long py_this, fop_mkdir_t fop) -{ - glupy_private_t *priv = ((xlator_t *)py_this)->private; - - priv->fops[GLUPY_MKDIR] = (long)fop; -} - -void -set_mkdir_cbk (long py_this, fop_mkdir_cbk_t cbk) -{ - glupy_private_t *priv = ((xlator_t *)py_this)->private; - - priv->cbks[GLUPY_MKDIR] = (long)cbk; -} - - -/* FOP: RMDIR */ - -int32_t -glupy_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *preparent, struct iatt *postparent, - dict_t *xdata) -{ - glupy_private_t *priv = this->private; - PyGILState_STATE gstate; - int32_t ret; - - if (!priv->cbks[GLUPY_RMDIR]) { - goto unwind; - } - - gstate = glupy_enter(); - ret = ((fop_rmdir_cbk_t)(priv->cbks[GLUPY_RMDIR]))( - frame, cookie, this, op_ret, op_errno, - preparent, postparent, xdata); - glupy_leave(gstate); - - return ret; - -unwind: - frame->local = NULL; - STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno, preparent, - postparent, xdata); - return 0; -} - -int32_t -glupy_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, - int xflags, dict_t *xdata) -{ - glupy_private_t *priv = this->private; - PyGILState_STATE gstate; - int32_t ret; - static long next_id = 0; - - if (!priv->fops[GLUPY_RMDIR]) { - goto wind; - } - - gstate = glupy_enter(); - frame->local = (void *)++next_id; - ret = ((fop_rmdir_t)(priv->fops[GLUPY_RMDIR]))( - frame, this, loc, xflags, xdata); - glupy_leave(gstate); - - return ret; - -wind: - STACK_WIND (frame, glupy_rmdir_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rmdir, loc, - xflags, xdata); - return 0; -} - -void -wind_rmdir (call_frame_t *frame, xlator_t *xl, loc_t *loc, - int xflags, dict_t *xdata) -{ - - xlator_t *this = THIS; - - if (!xl || (xl == this)) { - xl = FIRST_CHILD(this); - } - - STACK_WIND (frame, glupy_rmdir_cbk, xl, xl->fops->rmdir, - loc, xflags, xdata); -} - -void -unwind_rmdir (call_frame_t *frame, long cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *preparent, struct iatt *postparent, - dict_t *xdata) -{ - frame->local = NULL; - STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno, - preparent, postparent, xdata); -} - -void -set_rmdir_fop (long py_this, fop_rmdir_t fop) -{ - glupy_private_t *priv = ((xlator_t *)py_this)->private; - - priv->fops[GLUPY_RMDIR] = (long)fop; -} - -void -set_rmdir_cbk (long py_this, fop_rmdir_cbk_t cbk) -{ - glupy_private_t *priv = ((xlator_t *)py_this)->private; - - priv->cbks[GLUPY_RMDIR] = (long)cbk; -} - - -/* NON-FOP-SPECIFIC CODE */ - - -long -get_id (call_frame_t *frame) -{ - return (long)(frame->local); -} - -uint64_t -get_rootunique (call_frame_t *frame) -{ - return frame->root->unique; -} - -int32_t -mem_acct_init (xlator_t *this) -{ - int ret = -1; - - if (!this) - return ret; - - ret = xlator_mem_acct_init (this, gf_glupy_mt_end); - - if (ret != 0) { - gf_log(this->name, GF_LOG_ERROR, "Memory accounting init" - " failed"); - return ret; - } - - return ret; -} - -int32_t -init (xlator_t *this) -{ - glupy_private_t *priv = NULL; - char *module_name = NULL; - PyObject *py_mod_name = NULL; - PyObject *py_init_func = NULL; - PyObject *py_args = NULL; - PyObject *syspath = NULL; - PyObject *path = NULL; - PyObject *error_type = NULL; - PyObject *error_msg = NULL; - PyObject *error_bt = NULL; - static gf_boolean_t py_inited = _gf_false; - void * err_cleanup = &&err_return; - - if (dict_get_str(this->options,"module-name",&module_name) != 0) { - gf_log (this->name, GF_LOG_ERROR, "missing module-name"); - return -1; - } - - priv = GF_CALLOC (1, sizeof (glupy_private_t), gf_glupy_mt_priv); - if (!priv) { - goto *err_cleanup; - } - this->private = priv; - err_cleanup = &&err_free_priv; - - if (!py_inited) { - /* - * This must be done before Py_Initialize(), - * because it will duplicate the environment, - * and fail to see later environment updates. - */ - setenv("PATH_GLUSTERFS_GLUPY_MODULE", - PATH_GLUSTERFS_GLUPY_MODULE, 1); - - Py_Initialize(); - PyEval_InitThreads(); - - (void)pthread_key_create(&gil_init_key,NULL); - (void)pthread_setspecific(gil_init_key,(void *)1); - - /* PyEval_InitThreads takes this "for" us. No thanks. */ - PyEval_ReleaseLock(); - py_inited = _gf_true; - } - - /* Adjust python's path */ - syspath = PySys_GetObject("path"); - path = PyString_FromString(GLUSTER_PYTHON_PATH); - PyList_Append(syspath, path); - Py_DECREF(path); - - py_mod_name = PyString_FromString(module_name); - if (!py_mod_name) { - gf_log (this->name, GF_LOG_ERROR, "could not create name"); - if (PyErr_Occurred()) { - PyErr_Fetch (&error_type, &error_msg, &error_bt); - gf_log (this->name, GF_LOG_ERROR, "Python error: %s", - PyString_AsString(error_msg)); - } - goto *err_cleanup; - } - - gf_log (this->name, GF_LOG_DEBUG, "py_mod_name = %s", module_name); - priv->py_module = PyImport_Import(py_mod_name); - Py_DECREF(py_mod_name); - if (!priv->py_module) { - gf_log (this->name, GF_LOG_ERROR, "Python import of %s failed", - module_name); - if (PyErr_Occurred()) { - PyErr_Fetch (&error_type, &error_msg, &error_bt); - gf_log (this->name, GF_LOG_ERROR, "Python error: %s", - PyString_AsString(error_msg)); - } - goto *err_cleanup; - } - gf_log (this->name, GF_LOG_INFO, "Import of %s succeeded", module_name); - err_cleanup = &&err_deref_module; - - py_init_func = PyObject_GetAttrString(priv->py_module, "xlator"); - if (!py_init_func || !PyCallable_Check(py_init_func)) { - gf_log (this->name, GF_LOG_ERROR, "missing init func"); - if (PyErr_Occurred()) { - PyErr_Fetch (&error_type, &error_msg, &error_bt); - gf_log (this->name, GF_LOG_ERROR, "Python error: %s", - PyString_AsString(error_msg)); - } - goto *err_cleanup; - } - err_cleanup = &&err_deref_init; - - py_args = PyTuple_New(1); - if (!py_args) { - gf_log (this->name, GF_LOG_ERROR, "could not create args"); - if (PyErr_Occurred()) { - PyErr_Fetch (&error_type, &error_msg, &error_bt); - gf_log (this->name, GF_LOG_ERROR, "Python error: %s", - PyString_AsString(error_msg)); - } - goto *err_cleanup; - } - PyTuple_SetItem(py_args,0,PyLong_FromLong((long)this)); - - /* TBD: pass in list of children */ - priv->py_xlator = PyObject_CallObject(py_init_func, py_args); - Py_DECREF(py_args); - if (!priv->py_xlator) { - gf_log (this->name, GF_LOG_ERROR, "Python init failed"); - if (PyErr_Occurred()) { - PyErr_Fetch (&error_type, &error_msg, &error_bt); - gf_log (this->name, GF_LOG_ERROR, "Python error: %s", - PyString_AsString(error_msg)); - } - goto *err_cleanup; - } - gf_log (this->name, GF_LOG_DEBUG, "init returned %p", priv->py_xlator); - - return 0; - -err_deref_init: - Py_DECREF(py_init_func); -err_deref_module: - Py_DECREF(priv->py_module); -err_free_priv: - GF_FREE(priv); -err_return: - return -1; -} - -void -fini (xlator_t *this) -{ - glupy_private_t *priv = this->private; - - if (!priv) - return; - Py_DECREF(priv->py_xlator); - Py_DECREF(priv->py_module); - this->private = NULL; - GF_FREE (priv); - - return; -} - -struct xlator_fops fops = { - .lookup = glupy_lookup, - .create = glupy_create, - .open = glupy_open, - .readv = glupy_readv, - .writev = glupy_writev, - .opendir = glupy_opendir, - .readdir = glupy_readdir, - .stat = glupy_stat, - .fstat = glupy_fstat, - .setxattr = glupy_setxattr, - .getxattr = glupy_getxattr, - .fsetxattr = glupy_fsetxattr, - .fgetxattr = glupy_fgetxattr, - .removexattr = glupy_removexattr, - .fremovexattr = glupy_fremovexattr, - .link = glupy_link, - .unlink = glupy_unlink, - .readlink = glupy_readlink, - .symlink = glupy_symlink, - .mkdir = glupy_mkdir, - .rmdir = glupy_rmdir, - .statfs = glupy_statfs, - .readdirp = glupy_readdirp -}; - -struct xlator_cbks cbks = { -}; - -struct volume_options options[] = { - { .key = {NULL} }, -}; diff --git a/xlators/features/glupy/src/glupy.h b/xlators/features/glupy/src/glupy.h deleted file mode 100644 index 1488c55c331..00000000000 --- a/xlators/features/glupy/src/glupy.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - Copyright (c) 2006-2014 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#ifndef __GLUPY_H__ -#define __GLUPY_H__ - -#include "mem-types.h" - -enum { - GLUPY_LOOKUP = 0, - GLUPY_CREATE, - GLUPY_OPEN, - GLUPY_READV, - GLUPY_WRITEV, - GLUPY_OPENDIR, - GLUPY_READDIR, - GLUPY_READDIRP, - GLUPY_STAT, - GLUPY_FSTAT, - GLUPY_STATFS, - GLUPY_SETXATTR, - GLUPY_GETXATTR, - GLUPY_FSETXATTR, - GLUPY_FGETXATTR, - GLUPY_REMOVEXATTR, - GLUPY_FREMOVEXATTR, - GLUPY_LINK, - GLUPY_UNLINK, - GLUPY_READLINK, - GLUPY_SYMLINK, - GLUPY_MKNOD, - GLUPY_MKDIR, - GLUPY_RMDIR, - GLUPY_N_FUNCS -}; - -typedef struct { - PyObject *py_module; - PyObject *py_xlator; - long fops[GLUPY_N_FUNCS]; - long cbks[GLUPY_N_FUNCS]; -} glupy_private_t; - -enum gf_glupy_mem_types_ { - gf_glupy_mt_priv = gf_common_mt_end + 1, - gf_glupy_mt_end -}; - -#endif /* __GLUPY_H__ */ diff --git a/xlators/features/glupy/src/glupy.sym b/xlators/features/glupy/src/glupy.sym deleted file mode 100644 index 55d9a300108..00000000000 --- a/xlators/features/glupy/src/glupy.sym +++ /dev/null @@ -1,101 +0,0 @@ -init -fini -fops -cbks -options -notify -mem_acct_init -reconfigure -dumpops -set_lookup_fop -set_lookup_cbk -set_create_fop -set_create_cbk -set_open_fop -set_open_cbk -set_readv_fop -set_readv_cbk -set_writev_fop -set_writev_cbk -set_opendir_fop -set_opendir_cbk -set_readdir_fop -set_readdir_cbk -set_readdirp_fop -set_readdirp_cbk -set_stat_fop -set_stat_cbk -set_fstat_fop -set_fstat_cbk -set_statfs_fop -set_statfs_cbk -set_setxattr_fop -set_setxattr_cbk -set_getxattr_fop -set_getxattr_cbk -set_fsetxattr_fop -set_fsetxattr_cbk -set_fgetxattr_fop -set_fgetxattr_cbk -set_removexattr_fop -set_removexattr_cbk -set_fremovexattr_fop -set_fremovexattr_cbk -set_link_fop -set_link_cbk -set_symlink_fop -set_symlink_cbk -set_readlink_fop -set_readlink_cbk -set_unlink_fop -set_unlink_cbk -set_mkdir_fop -set_mkdir_cbk -set_rmdir_fop -set_rmdir_cbk -wind_lookup -wind_create -wind_open -wind_readv -wind_writev -wind_opendir -wind_readdir -wind_readdirp -wind_stat -wind_fstat -wind_statfs -wind_setxattr -wind_getxattr -wind_fsetxattr -wind_fgetxattr -wind_removexattr -wind_fremovexattr -wind_link -wind_symlink -wind_readlink -wind_unlink -wind_mkdir -wind_rmdir -unwind_lookup -unwind_create -unwind_open -unwind_readv -unwind_writev -unwind_opendir -unwind_readdir -unwind_readdirp -unwind_stat -unwind_fstat -unwind_statfs -unwind_setxattr -unwind_getxattr -unwind_fsetxattr -unwind_fgetxattr -unwind_removexattr -unwind_fremovexattr -unwind_link -unwind_symlink -unwind_readlink -unwind_unlink -unwind_mkdir -unwind_rmdir diff --git a/xlators/features/glupy/src/glupy/Makefile.am b/xlators/features/glupy/src/glupy/Makefile.am deleted file mode 100644 index 573d2da12e1..00000000000 --- a/xlators/features/glupy/src/glupy/Makefile.am +++ /dev/null @@ -1,5 +0,0 @@ -# Install __init__.py into the Python site-packages area -pyglupydir = @BUILD_PYTHON_SITE_PACKAGES@/gluster/glupy -pyglupy_PYTHON = __init__.py - -CLEANFILES = diff --git a/xlators/features/glupy/src/glupy/__init__.py b/xlators/features/glupy/src/glupy/__init__.py deleted file mode 100644 index b9fc3700fa6..00000000000 --- a/xlators/features/glupy/src/glupy/__init__.py +++ /dev/null @@ -1,852 +0,0 @@ -## -## Copyright (c) 2006-2014 Red Hat, Inc. <http://www.redhat.com> -## This file is part of GlusterFS. -## -## This file is licensed to you under your choice of the GNU Lesser -## General Public License, version 3 or any later version (LGPLv3 or -## later), or the GNU General Public License, version 2 (GPLv2), in all -## cases as published by the Free Software Foundation. -## - -import sys -import os -from ctypes import * - -dl = CDLL(os.getenv("PATH_GLUSTERFS_GLUPY_MODULE", ""),RTLD_GLOBAL) - - -class call_frame_t (Structure): - pass - -class dev_t (Structure): - pass - - -class dict_t (Structure): - pass - - -class gf_dirent_t (Structure): - pass - - -class iobref_t (Structure): - pass - - -class iovec_t (Structure): - pass - - -class list_head (Structure): - pass - -list_head._fields_ = [ - ("next", POINTER(list_head)), - ("prev", POINTER(list_head)) - ] - - -class rwxperm_t (Structure): - _fields_ = [ - ("read", c_uint8, 1), - ("write", c_uint8, 1), - ("execn", c_uint8, 1) - ] - - -class statvfs_t (Structure): - pass - - -class xlator_t (Structure): - pass - - -class ia_prot_t (Structure): - _fields_ = [ - ("suid", c_uint8, 1), - ("sgid", c_uint8, 1), - ("sticky", c_uint8, 1), - ("owner", rwxperm_t), - ("group", rwxperm_t), - ("other", rwxperm_t) - ] - -# For checking file type. -(IA_INVAL, IA_IFREG, IA_IFDIR, IA_IFLNK, IA_IFBLK, IA_IFCHR, IA_IFIFO, - IA_IFSOCK) = xrange(8) - - -class iatt_t (Structure): - _fields_ = [ - ("ia_no", c_uint64), - ("ia_gfid", c_ubyte * 16), - ("ia_dev", c_uint64), - ("ia_type", c_uint), - ("ia_prot", ia_prot_t), - ("ia_nlink", c_uint32), - ("ia_uid", c_uint32), - ("ia_gid", c_uint32), - ("ia_rdev", c_uint64), - ("ia_size", c_uint64), - ("ia_blksize", c_uint32), - ("ia_blocks", c_uint64), - ("ia_atime", c_uint32 ), - ("ia_atime_nsec", c_uint32), - ("ia_mtime", c_uint32), - ("ia_mtime_nsec", c_uint32), - ("ia_ctime", c_uint32), - ("ia_ctime_nsec", c_uint32) - ] - - -class mem_pool (Structure): - _fields_ = [ - ("list", list_head), - ("hot_count", c_int), - ("cold_count", c_int), - ("lock", c_void_p), - ("padded_sizeof_type", c_ulong), - ("pool", c_void_p), - ("pool_end", c_void_p), - ("real_sizeof_type", c_int), - ("alloc_count", c_uint64), - ("pool_misses", c_uint64), - ("max_alloc", c_int), - ("curr_stdalloc", c_int), - ("max_stdalloc", c_int), - ("name", c_char_p), - ("global_list", list_head) - ] - - -class U_ctx_key_inode (Union): - _fields_ = [ - ("key", c_uint64), - ("xl_key", POINTER(xlator_t)) - ] - - -class U_ctx_value1 (Union): - _fields_ = [ - ("value1", c_uint64), - ("ptr1", c_void_p) - ] - - -class U_ctx_value2 (Union): - _fields_ = [ - ("value2", c_uint64), - ("ptr2", c_void_p) - ] - -class inode_ctx (Structure): - _anonymous_ = ("u_key","u_value1","u_value2",) - _fields_ = [ - ("u_key", U_ctx_key_inode), - ("u_value1", U_ctx_value1), - ("u_value2", U_ctx_value2) - ] - -class inode_t (Structure): - pass - -class inode_table_t (Structure): - _fields_ = [ - ("lock", c_void_p), - ("hashsize", c_size_t), - ("name", c_char_p), - ("root", POINTER(inode_t)), - ("xl", POINTER(xlator_t)), - ("lru_limit", c_uint32), - ("inode_hash", POINTER(list_head)), - ("name_hash", POINTER(list_head)), - ("active", list_head), - ("active_size", c_uint32), - ("lru", list_head), - ("lru_size", c_uint32), - ("purge", list_head), - ("purge_size", c_uint32), - ("inode_pool", POINTER(mem_pool)), - ("dentry_pool", POINTER(mem_pool)), - ("fd_mem_pool", POINTER(mem_pool)) - ] - -inode_t._fields_ = [ - ("table", POINTER(inode_table_t)), - ("gfid", c_ubyte * 16), - ("lock", c_void_p), - ("nlookup", c_uint64), - ("fd_count", c_uint32), - ("ref", c_uint32), - ("ia_type", c_uint), - ("fd_list", list_head), - ("dentry_list", list_head), - ("hashv", list_head), - ("listv", list_head), - ("ctx", POINTER(inode_ctx)) - ] - - - -class U_ctx_key_fd (Union): - _fields_ = [ - ("key", c_uint64), - ("xl_key", c_void_p) - ] - -class fd_lk_ctx (Structure): - _fields_ = [ - ("lk_list", list_head), - ("ref", c_int), - ("lock", c_void_p) - ] - -class fd_ctx (Structure): - _anonymous_ = ("u_key","u_value1") - _fields_ = [ - ("u_key", U_ctx_key_fd), - ("u_value1", U_ctx_value1) - ] - -class fd_t (Structure): - _fields_ = [ - ("pid", c_uint64), - ("flags", c_int32), - ("refcount", c_int32), - ("inode_list", list_head), - ("inode", POINTER(inode_t)), - ("lock", c_void_p), - ("ctx", POINTER(fd_ctx)), - ("xl_count", c_int), - ("lk_ctx", POINTER(fd_lk_ctx)), - ("anonymous", c_uint) - ] - -class loc_t (Structure): - _fields_ = [ - ("path", c_char_p), - ("name", c_char_p), - ("inode", POINTER(inode_t)), - ("parent", POINTER(inode_t)), - ("gfid", c_ubyte * 16), - ("pargfid", c_ubyte * 16), - ] - - - -def _init_op (a_class, fop, cbk, wind, unwind): - # Decorators, used by translators. We could pass the signatures as - # parameters, but it's actually kind of nice to keep them around for - # inspection. - a_class.fop_type = apply(CFUNCTYPE,a_class.fop_sig) - a_class.cbk_type = apply(CFUNCTYPE,a_class.cbk_sig) - # Dispatch-function registration. - fop.restype = None - fop.argtypes = [ c_long, a_class.fop_type ] - # Callback-function registration. - cbk.restype = None - cbk.argtypes = [ c_long, a_class.cbk_type ] - # STACK_WIND function. - wind.restype = None - wind.argtypes = list(a_class.fop_sig[1:]) - # STACK_UNWIND function. - unwind.restype = None - unwind.argtypes = list(a_class.cbk_sig[1:]) - -class OpLookup: - fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t), - POINTER(loc_t), POINTER(dict_t)) - cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t), - c_int, c_int, POINTER(inode_t), POINTER(iatt_t), - POINTER(dict_t), POINTER(iatt_t)) -_init_op (OpLookup, dl.set_lookup_fop, dl.set_lookup_cbk, - dl.wind_lookup, dl.unwind_lookup) - -class OpCreate: - fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t), - POINTER(loc_t), c_int, c_uint, c_uint, POINTER(fd_t), - POINTER(dict_t)) - cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t), - c_int, c_int, POINTER(fd_t), POINTER(inode_t), - POINTER(iatt_t), POINTER(iatt_t), POINTER(iatt_t), - POINTER(dict_t)) -_init_op (OpCreate, dl.set_create_fop, dl.set_create_cbk, - dl.wind_create, dl.unwind_create) - -class OpOpen: - fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t), - POINTER(loc_t), c_int, POINTER(fd_t), POINTER(dict_t)) - cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t), - c_int, c_int, POINTER(fd_t), POINTER(dict_t)) -_init_op (OpOpen, dl.set_open_fop, dl.set_open_cbk, - dl.wind_open, dl.unwind_open) - -class OpReadv: - fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t), - POINTER(fd_t), c_size_t, c_long, c_uint32, POINTER(dict_t)) - cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t), - c_int, c_int, POINTER(iovec_t), c_int, POINTER(iatt_t), - POINTER(iobref_t), POINTER(dict_t)) -_init_op (OpReadv, dl.set_readv_fop, dl.set_readv_cbk, - dl.wind_readv, dl.unwind_readv) -class OpWritev: - fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t), - POINTER(fd_t), POINTER(iovec_t), c_int, c_long, c_uint32, - POINTER(iobref_t), POINTER(dict_t)) - cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t), - c_int, c_int, POINTER(iatt_t), POINTER(iatt_t), - POINTER(dict_t)) -_init_op (OpWritev, dl.set_writev_fop, dl.set_writev_cbk, - dl.wind_writev, dl.unwind_writev) - -class OpOpendir: - fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t), - POINTER(loc_t), POINTER(fd_t) ,POINTER(dict_t)) - cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t), - c_int, c_int, POINTER(fd_t), POINTER(dict_t)) -_init_op (OpOpendir, dl.set_opendir_fop, dl.set_opendir_cbk, - dl.wind_opendir, dl.unwind_opendir) - -class OpReaddir: - fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t), - POINTER(fd_t), c_size_t, c_long, POINTER(dict_t)) - cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t), - c_int, c_int, POINTER(gf_dirent_t), POINTER(dict_t)) -_init_op (OpReaddir, dl.set_readdir_fop, dl.set_readdir_cbk, - dl.wind_readdir, dl.unwind_readdir) - -class OpReaddirp: - fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t), - POINTER(fd_t), c_size_t, c_long, POINTER(dict_t)) - cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t), - c_int, c_int, POINTER(gf_dirent_t), POINTER(dict_t)) -_init_op (OpReaddirp, dl.set_readdirp_fop, dl.set_readdirp_cbk, - dl.wind_readdirp, dl.unwind_readdirp) - -class OpStat: - fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t), - POINTER(loc_t), POINTER(dict_t)) - cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t), - c_int, c_int, POINTER(iatt_t), POINTER(dict_t)) -_init_op (OpStat, dl.set_stat_fop, dl.set_stat_cbk, - dl.wind_stat, dl.unwind_stat) - -class OpFstat: - fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t), - POINTER(fd_t), POINTER(dict_t)) - cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t), - c_int, c_int, POINTER(iatt_t), POINTER(dict_t)) -_init_op (OpFstat, dl.set_fstat_fop, dl.set_fstat_cbk, - dl.wind_fstat, dl.unwind_fstat) - -class OpStatfs: - fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t), - POINTER(loc_t), POINTER(dict_t)) - cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t), - c_int, c_int, POINTER(statvfs_t), POINTER(dict_t)) -_init_op (OpStatfs, dl.set_statfs_fop, dl.set_statfs_cbk, - dl.wind_statfs, dl.unwind_statfs) - - -class OpSetxattr: - fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t), - POINTER(loc_t), POINTER(dict_t), c_int32, - POINTER (dict_t)) - cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t), - c_int, c_int, POINTER(dict_t)) -_init_op (OpSetxattr, dl.set_setxattr_fop, dl.set_setxattr_cbk, - dl.wind_setxattr, dl.unwind_setxattr) - -class OpGetxattr: - fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t), - POINTER(loc_t), c_char_p, POINTER (dict_t)) - cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t), - c_int, c_int, POINTER(dict_t), POINTER(dict_t)) -_init_op (OpGetxattr, dl.set_getxattr_fop, dl.set_getxattr_cbk, - dl.wind_getxattr, dl.unwind_getxattr) - -class OpFsetxattr: - fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t), - POINTER(fd_t), POINTER(dict_t), c_int32, - POINTER (dict_t)) - cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t), - c_int, c_int, POINTER(dict_t)) -_init_op (OpFsetxattr, dl.set_fsetxattr_fop, dl.set_fsetxattr_cbk, - dl.wind_fsetxattr, dl.unwind_fsetxattr) - -class OpFgetxattr: - fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t), - POINTER(fd_t), c_char_p, POINTER (dict_t)) - cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t), - c_int, c_int, POINTER(dict_t), POINTER(dict_t)) -_init_op (OpFgetxattr, dl.set_fgetxattr_fop, dl.set_fgetxattr_cbk, - dl.wind_fgetxattr, dl.unwind_fgetxattr) - -class OpRemovexattr: - fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t), - POINTER(loc_t), c_char_p, POINTER(dict_t)) - cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t), - c_int, c_int, POINTER(dict_t)) -_init_op (OpRemovexattr, dl.set_removexattr_fop, dl.set_removexattr_cbk, - dl.wind_removexattr, dl.unwind_removexattr) - - -class OpFremovexattr: - fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t), - POINTER(fd_t), c_char_p, POINTER(dict_t)) - cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t), - c_int, c_int, POINTER(dict_t)) -_init_op (OpFremovexattr, dl.set_fremovexattr_fop, dl.set_fremovexattr_cbk, - dl.wind_fremovexattr, dl.unwind_fremovexattr) - -class OpLink: - fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t), - POINTER(loc_t), POINTER(loc_t), POINTER(dict_t)) - cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t), - c_int, c_int, POINTER(inode_t), POINTER(iatt_t), - POINTER(iatt_t), POINTER(iatt_t), POINTER(dict_t)) -_init_op (OpLink, dl.set_link_fop, dl.set_link_cbk, - dl.wind_link, dl.unwind_link) - -class OpSymlink: - fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t), - c_char_p, POINTER(loc_t), c_uint, POINTER(dict_t)) - cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t), - c_int, c_int, POINTER(inode_t), POINTER(iatt_t), - POINTER(iatt_t), POINTER(iatt_t), POINTER(dict_t)) -_init_op (OpSymlink, dl.set_symlink_fop, dl.set_symlink_cbk, - dl.wind_symlink, dl.unwind_symlink) - -class OpUnlink: - fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t), - POINTER(loc_t), c_int, POINTER(dict_t)) - cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t), - c_int, c_int, POINTER(iatt_t), POINTER(iatt_t), - POINTER(dict_t)) -_init_op (OpUnlink, dl.set_unlink_fop, dl.set_unlink_cbk, - dl.wind_unlink, dl.unwind_unlink) - -class OpReadlink: - fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t), - POINTER(loc_t), c_size_t, POINTER(dict_t)) - cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t), - c_int, c_int, c_char_p, POINTER(iatt_t), POINTER(dict_t)) -_init_op (OpReadlink, dl.set_readlink_fop, dl.set_readlink_cbk, - dl.wind_readlink, dl.unwind_readlink) - -class OpMkdir: - fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t), - POINTER(loc_t), c_uint, c_uint, POINTER(dict_t)) - cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t), - c_int, c_int, POINTER(inode_t), POINTER(iatt_t), - POINTER(iatt_t), POINTER(iatt_t), POINTER(dict_t)) -_init_op (OpMkdir, dl.set_mkdir_fop, dl.set_mkdir_cbk, - dl.wind_mkdir, dl.unwind_mkdir) - -class OpRmdir: - fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t), - POINTER(loc_t), c_int, POINTER(dict_t)) - cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t), - c_int, c_int, POINTER(iatt_t), POINTER(iatt_t), - POINTER(dict_t)) -_init_op (OpRmdir, dl.set_rmdir_fop, dl.set_rmdir_cbk, - dl.wind_rmdir, dl.unwind_rmdir) - - -class Translator: - def __init__ (self, c_this): - # This is only here to keep references to the stubs we create, - # because ctypes doesn't and glupy.so can't because it doesn't - # get a pointer to the actual Python object. It's a dictionary - # instead of a list in case we ever allow changing fops/cbks - # after initialization and need to look them up. - self.stub_refs = {} - funcs = dir(self.__class__) - if "lookup_fop" in funcs: - @OpLookup.fop_type - def stub (frame, this, loc, xdata, s=self): - return s.lookup_fop (frame, this, loc, xdata) - self.stub_refs["lookup_fop"] = stub - dl.set_lookup_fop(c_this,stub) - if "lookup_cbk" in funcs: - @OpLookup.cbk_type - def stub (frame, cookie, this, op_ret, op_errno, inode, - buf, xdata, postparent, s=self): - return s.lookup_cbk(frame, cookie, this, op_ret, - op_errno, inode, buf, xdata, - postparent) - self.stub_refs["lookup_cbk"] = stub - dl.set_lookup_cbk(c_this,stub) - if "create_fop" in funcs: - @OpCreate.fop_type - def stub (frame, this, loc, flags, mode, umask, fd, - xdata, s=self): - return s.create_fop (frame, this, loc, flags, - mode, umask, fd, xdata) - self.stub_refs["create_fop"] = stub - dl.set_create_fop(c_this,stub) - if "create_cbk" in funcs: - @OpCreate.cbk_type - def stub (frame, cookie, this, op_ret, op_errno, fd, - inode, buf, preparent, postparent, xdata, - s=self): - return s.create_cbk (frame, cookie, this, - op_ret, op_errno, fd, - inode, buf, preparent, - postparent, xdata) - self.stub_refs["create_cbk"] = stub - dl.set_create_cbk(c_this,stub) - if "open_fop" in funcs: - @OpOpen.fop_type - def stub (frame, this, loc, flags, fd, - xdata, s=self): - return s.open_fop (frame, this, loc, flags, - fd, xdata) - self.stub_refs["open_fop"] = stub - dl.set_open_fop(c_this,stub) - if "open_cbk" in funcs: - @OpOpen.cbk_type - def stub (frame, cookie, this, op_ret, op_errno, fd, - xdata, s=self): - return s.open_cbk (frame, cookie, this, - op_ret, op_errno, fd, - xdata) - self.stub_refs["open_cbk"] = stub - dl.set_open_cbk(c_this,stub) - if "readv_fop" in funcs: - @OpReadv.fop_type - def stub (frame, this, fd, size, offset, flags, - xdata, s=self): - return s.readv_fop (frame, this, fd, size, - offset, flags, xdata) - self.stub_refs["readv_fop"] = stub - dl.set_readv_fop(c_this,stub) - if "readv_cbk" in funcs: - @OpReadv.cbk_type - def stub (frame, cookie, this, op_ret, op_errno, - vector, count, stbuf, iobref, xdata, - s=self): - return s.readv_cbk (frame, cookie, this, - op_ret, op_errno, vector, - count, stbuf, iobref, - xdata) - self.stub_refs["readv_cbk"] = stub - dl.set_readv_cbk(c_this,stub) - if "writev_fop" in funcs: - @OpWritev.fop_type - def stub (frame, this, fd, vector, count, - offset, flags, iobref, xdata, s=self): - return s.writev_fop (frame, this, fd, vector, - count, offset, flags, - iobref, xdata) - self.stub_refs["writev_fop"] = stub - dl.set_writev_fop(c_this,stub) - if "writev_cbk" in funcs: - @OpWritev.cbk_type - def stub (frame, cookie, this, op_ret, op_errno, - prebuf, postbuf, xdata, s=self): - return s.writev_cbk (frame, cookie, this, - op_ret, op_errno, prebuf, - postbuf, xdata) - self.stub_refs["writev_cbk"] = stub - dl.set_writev_cbk(c_this,stub) - if "opendir_fop" in funcs: - @OpOpendir.fop_type - def stub (frame, this, loc, fd, xdata, s=self): - return s.opendir_fop (frame, this, loc, fd, - xdata) - self.stub_refs["opendir_fop"] = stub - dl.set_opendir_fop(c_this,stub) - if "opendir_cbk" in funcs: - @OpOpendir.cbk_type - def stub (frame, cookie, this, op_ret, op_errno, fd, - xdata, s=self): - return s.opendir_cbk(frame, cookie, this, - op_ret, op_errno, fd, - xdata) - self.stub_refs["opendir_cbk"] = stub - dl.set_opendir_cbk(c_this,stub) - if "readdir_fop" in funcs: - @OpReaddir.fop_type - def stub (frame, this, fd, size, offset, xdata, s=self): - return s.readdir_fop (frame, this, fd, size, - offset, xdata) - self.stub_refs["readdir_fop"] = stub - dl.set_readdir_fop(c_this,stub) - if "readdir_cbk" in funcs: - @OpReaddir.cbk_type - def stub (frame, cookie, this, op_ret, op_errno, - entries, xdata, s=self): - return s.readdir_cbk(frame, cookie, this, - op_ret, op_errno, entries, - xdata) - self.stub_refs["readdir_cbk"] = stub - dl.set_readdir_cbk(c_this,stub) - if "readdirp_fop" in funcs: - @OpReaddirp.fop_type - def stub (frame, this, fd, size, offset, xdata, s=self): - return s.readdirp_fop (frame, this, fd, size, - offset, xdata) - self.stub_refs["readdirp_fop"] = stub - dl.set_readdirp_fop(c_this,stub) - if "readdirp_cbk" in funcs: - @OpReaddirp.cbk_type - def stub (frame, cookie, this, op_ret, op_errno, - entries, xdata, s=self): - return s.readdirp_cbk (frame, cookie, this, - op_ret, op_errno, - entries, xdata) - self.stub_refs["readdirp_cbk"] = stub - dl.set_readdirp_cbk(c_this,stub) - if "stat_fop" in funcs: - @OpStat.fop_type - def stub (frame, this, loc, xdata, s=self): - return s.stat_fop (frame, this, loc, xdata) - self.stub_refs["stat_fop"] = stub - dl.set_stat_fop(c_this,stub) - if "stat_cbk" in funcs: - @OpStat.cbk_type - def stub (frame, cookie, this, op_ret, op_errno, buf, - xdata, s=self): - return s.stat_cbk(frame, cookie, this, op_ret, - op_errno, buf, xdata) - self.stub_refs["stat_cbk"] = stub - dl.set_stat_cbk(c_this,stub) - if "fstat_fop" in funcs: - @OpFstat.fop_type - def stub (frame, this, fd, xdata, s=self): - return s.fstat_fop (frame, this, fd, xdata) - self.stub_refs["fstat_fop"] = stub - dl.set_fstat_fop(c_this,stub) - if "fstat_cbk" in funcs: - @OpFstat.cbk_type - def stub (frame, cookie, this, op_ret, op_errno, buf, - xdata, s=self): - return s.fstat_cbk(frame, cookie, this, op_ret, - op_errno, buf, xdata) - self.stub_refs["fstat_cbk"] = stub - dl.set_fstat_cbk(c_this,stub) - if "statfs_fop" in funcs: - @OpStatfs.fop_type - def stub (frame, this, loc, xdata, s=self): - return s.statfs_fop (frame, this, loc, xdata) - self.stub_refs["statfs_fop"] = stub - dl.set_statfs_fop(c_this,stub) - if "statfs_cbk" in funcs: - @OpStatfs.cbk_type - def stub (frame, cookie, this, op_ret, op_errno, buf, - xdata, s=self): - return s.statfs_cbk (frame, cookie, this, - op_ret, op_errno, buf, - xdata) - self.stub_refs["statfs_cbk"] = stub - dl.set_statfs_cbk(c_this,stub) - if "setxattr_fop" in funcs: - @OpSetxattr.fop_type - def stub (frame, this, loc, dictionary, flags, xdata, - s=self): - return s.setxattr_fop (frame, this, loc, - dictionary, flags, - xdata) - self.stub_refs["setxattr_fop"] = stub - dl.set_setxattr_fop(c_this,stub) - if "setxattr_cbk" in funcs: - @OpSetxattr.cbk_type - def stub (frame, cookie, this, op_ret, op_errno, xdata, - s=self): - return s.setxattr_cbk(frame, cookie, this, - op_ret, op_errno, xdata) - self.stub_refs["setxattr_cbk"] = stub - dl.set_setxattr_cbk(c_this,stub) - if "getxattr_fop" in funcs: - @OpGetxattr.fop_type - def stub (frame, this, loc, name, xdata, s=self): - return s.getxattr_fop (frame, this, loc, name, - xdata) - self.stub_refs["getxattr_fop"] = stub - dl.set_getxattr_fop(c_this,stub) - if "getxattr_cbk" in funcs: - @OpGetxattr.cbk_type - def stub (frame, cookie, this, op_ret, op_errno, - dictionary, xdata, s=self): - return s.getxattr_cbk(frame, cookie, this, - op_ret, op_errno, - dictionary, xdata) - self.stub_refs["getxattr_cbk"] = stub - dl.set_getxattr_cbk(c_this,stub) - if "fsetxattr_fop" in funcs: - @OpFsetxattr.fop_type - def stub (frame, this, fd, dictionary, flags, xdata, - s=self): - return s.fsetxattr_fop (frame, this, fd, - dictionary, flags, - xdata) - self.stub_refs["fsetxattr_fop"] = stub - dl.set_fsetxattr_fop(c_this,stub) - if "fsetxattr_cbk" in funcs: - @OpFsetxattr.cbk_type - def stub (frame, cookie, this, op_ret, op_errno, xdata, - s=self): - return s.fsetxattr_cbk(frame, cookie, this, - op_ret, op_errno, xdata) - self.stub_refs["fsetxattr_cbk"] = stub - dl.set_fsetxattr_cbk(c_this,stub) - if "fgetxattr_fop" in funcs: - @OpFgetxattr.fop_type - def stub (frame, this, fd, name, xdata, s=self): - return s.fgetxattr_fop (frame, this, fd, name, - xdata) - self.stub_refs["fgetxattr_fop"] = stub - dl.set_fgetxattr_fop(c_this,stub) - if "fgetxattr_cbk" in funcs: - @OpFgetxattr.cbk_type - def stub (frame, cookie, this, op_ret, op_errno, - dictionary, xdata, s=self): - return s.fgetxattr_cbk(frame, cookie, this, - op_ret, op_errno, - dictionary, xdata) - self.stub_refs["fgetxattr_cbk"] = stub - dl.set_fgetxattr_cbk(c_this,stub) - if "removexattr_fop" in funcs: - @OpRemovexattr.fop_type - def stub (frame, this, loc, name, xdata, s=self): - return s.removexattr_fop (frame, this, loc, - name, xdata) - self.stub_refs["removexattr_fop"] = stub - dl.set_removexattr_fop(c_this,stub) - if "removexattr_cbk" in funcs: - @OpRemovexattr.cbk_type - def stub (frame, cookie, this, op_ret, op_errno, - xdata, s=self): - return s.removexattr_cbk(frame, cookie, this, - op_ret, op_errno, - xdata) - self.stub_refs["removexattr_cbk"] = stub - dl.set_removexattr_cbk(c_this,stub) - if "fremovexattr_fop" in funcs: - @OpFremovexattr.fop_type - def stub (frame, this, fd, name, xdata, s=self): - return s.fremovexattr_fop (frame, this, fd, - name, xdata) - self.stub_refs["fremovexattr_fop"] = stub - dl.set_fremovexattr_fop(c_this,stub) - if "fremovexattr_cbk" in funcs: - @OpFremovexattr.cbk_type - def stub (frame, cookie, this, op_ret, op_errno, - xdata, s=self): - return s.fremovexattr_cbk(frame, cookie, this, - op_ret, op_errno, - xdata) - self.stub_refs["fremovexattr_cbk"] = stub - dl.set_fremovexattr_cbk(c_this,stub) - if "link_fop" in funcs: - @OpLink.fop_type - def stub (frame, this, oldloc, newloc, - xdata, s=self): - return s.link_fop (frame, this, oldloc, - newloc, xdata) - self.stub_refs["link_fop"] = stub - dl.set_link_fop(c_this,stub) - if "link_cbk" in funcs: - @OpLink.cbk_type - def stub (frame, cookie, this, op_ret, op_errno, - inode, buf, preparent, postparent, xdata, - s=self): - return s.link_cbk (frame, cookie, this, - op_ret, op_errno, inode, - buf, preparent, - postparent, xdata) - self.stub_refs["link_cbk"] = stub - dl.set_link_cbk(c_this,stub) - if "symlink_fop" in funcs: - @OpSymlink.fop_type - def stub (frame, this, linkname, loc, - umask, xdata, s=self): - return s.symlink_fop (frame, this, linkname, - loc, umask, xdata) - self.stub_refs["symlink_fop"] = stub - dl.set_symlink_fop(c_this,stub) - if "symlink_cbk" in funcs: - @OpSymlink.cbk_type - def stub (frame, cookie, this, op_ret, op_errno, - inode, buf, preparent, postparent, xdata, - s=self): - return s.symlink_cbk (frame, cookie, this, - op_ret, op_errno, inode, - buf, preparent, - postparent, xdata) - self.stub_refs["symlink_cbk"] = stub - dl.set_symlink_cbk(c_this,stub) - if "unlink_fop" in funcs: - @OpUnlink.fop_type - def stub (frame, this, loc, xflags, - xdata, s=self): - return s.unlink_fop (frame, this, loc, - xflags, xdata) - self.stub_refs["unlink_fop"] = stub - dl.set_unlink_fop(c_this,stub) - if "unlink_cbk" in funcs: - @OpUnlink.cbk_type - def stub (frame, cookie, this, op_ret, op_errno, - preparent, postparent, xdata, s=self): - return s.unlink_cbk (frame, cookie, this, - op_ret, op_errno, - preparent, postparent, - xdata) - self.stub_refs["unlink_cbk"] = stub - dl.set_unlink_cbk(c_this,stub) - if "readlink_fop" in funcs: - @OpReadlink.fop_type - def stub (frame, this, loc, size, - xdata, s=self): - return s.readlink_fop (frame, this, loc, - size, xdata) - self.stub_refs["readlink_fop"] = stub - dl.set_readlink_fop(c_this,stub) - if "readlink_cbk" in funcs: - @OpReadlink.cbk_type - def stub (frame, cookie, this, op_ret, op_errno, - path, buf, xdata, s=self): - return s.readlink_cbk (frame, cookie, this, - op_ret, op_errno, - path, buf, xdata) - self.stub_refs["readlink_cbk"] = stub - dl.set_readlink_cbk(c_this,stub) - if "mkdir_fop" in funcs: - @OpMkdir.fop_type - def stub (frame, this, loc, mode, umask, xdata, - s=self): - return s.mkdir_fop (frame, this, loc, mode, - umask, xdata) - self.stub_refs["mkdir_fop"] = stub - dl.set_mkdir_fop(c_this,stub) - if "mkdir_cbk" in funcs: - @OpMkdir.cbk_type - def stub (frame, cookie, this, op_ret, op_errno, inode, - buf, preparent, postparent, xdata, s=self): - return s.mkdir_cbk (frame, cookie, this, - op_ret, op_errno, inode, - buf, preparent, - postparent, xdata) - self.stub_refs["mkdir_cbk"] = stub - dl.set_mkdir_cbk(c_this,stub) - if "rmdir_fop" in funcs: - @OpRmdir.fop_type - def stub (frame, this, loc, xflags, - xdata, s=self): - return s.rmdir_fop (frame, this, loc, - xflags, xdata) - self.stub_refs["rmdir_fop"] = stub - dl.set_rmdir_fop(c_this,stub) - if "rmdir_cbk" in funcs: - @OpRmdir.cbk_type - def stub (frame, cookie, this, op_ret, op_errno, - preparent, postparent, xdata, s=self): - return s.rmdir_cbk (frame, cookie, this, - op_ret, op_errno, - preparent, postparent, - xdata) - self.stub_refs["rmdir_cbk"] = stub - dl.set_rmdir_cbk(c_this,stub) diff --git a/xlators/features/glupy/src/setup.py.in b/xlators/features/glupy/src/setup.py.in deleted file mode 100644 index 611e9695f76..00000000000 --- a/xlators/features/glupy/src/setup.py.in +++ /dev/null @@ -1,24 +0,0 @@ -from distutils.core import setup - -DESC = """GlusterFS is a distributed file-system capable of scaling to -several petabytes. It aggregates various storage bricks over Infiniband -RDMA or TCP/IP interconnect into one large parallel network file system. -GlusterFS is one of the most sophisticated file systems in terms of -features and extensibility. It borrows a powerful concept called -Translators from GNU Hurd kernel. Much of the code in GlusterFS is in -user space and easily manageable. - -This package contains Glupy, the Python translator interface for GlusterFS.""" - -setup( - name='glusterfs-glupy', - version='@PACKAGE_VERSION@', - description='Glupy is the Python translator interface for GlusterFS', - long_description=DESC, - author='Gluster Community', - author_email='gluster-devel@gluster.org', - license='LGPLv3', - url='http://gluster.org/', - package_dir={'gluster':''}, - packages=['gluster'] -) diff --git a/xlators/features/index/src/Makefile.am b/xlators/features/index/src/Makefile.am index a19026c6f94..c71c238c163 100644 --- a/xlators/features/index/src/Makefile.am +++ b/xlators/features/index/src/Makefile.am @@ -1,15 +1,17 @@ +if WITH_SERVER xlator_LTLIBRARIES = index.la +endif xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features -index_la_LDFLAGS = $(GF_XLATOR_DEFAULT_LDFLAGS) +index_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) index_la_SOURCES = index.c index_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la -noinst_HEADERS = index.h index-mem-types.h +noinst_HEADERS = index.h index-mem-types.h index-messages.h -AM_CPPFLAGS = $(GF_CPPFLAGS) \ - -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/xdr/src \ +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ + -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \ -I$(top_srcdir)/rpc/rpc-lib/src AM_CFLAGS = -Wall $(GF_CFLAGS) diff --git a/xlators/features/index/src/index-mem-types.h b/xlators/features/index/src/index-mem-types.h index 553d492dfbf..58833d0ec9b 100644 --- a/xlators/features/index/src/index-mem-types.h +++ b/xlators/features/index/src/index-mem-types.h @@ -8,15 +8,16 @@ cases as published by the Free Software Foundation. */ -#ifndef __QUIESCE_MEM_TYPES_H__ -#define __QUIESCE_MEM_TYPES_H__ +#ifndef __INDEX_MEM_TYPES_H__ +#define __INDEX_MEM_TYPES_H__ -#include "mem-types.h" +#include <glusterfs/mem-types.h> enum gf_index_mem_types_ { - gf_index_mt_priv_t = gf_common_mt_end + 1, - gf_index_inode_ctx_t = gf_common_mt_end + 2, - gf_index_fd_ctx_t = gf_common_mt_end + 3, - gf_index_mt_end + gf_index_mt_priv_t = gf_common_mt_end + 1, + gf_index_inode_ctx_t, + gf_index_fd_ctx_t, + gf_index_mt_local_t, + gf_index_mt_end }; #endif diff --git a/xlators/features/index/src/index-messages.h b/xlators/features/index/src/index-messages.h new file mode 100644 index 00000000000..364f17cd34e --- /dev/null +++ b/xlators/features/index/src/index-messages.h @@ -0,0 +1,33 @@ +/* + Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. + */ + +#ifndef _INDEX_MESSAGES_H_ +#define _INDEX_MESSAGES_H_ + +#include <glusterfs/glfs-message-id.h> + +/* To add new message IDs, append new identifiers at the end of the list. + * + * Never remove a message ID. If it's not used anymore, you can rename it or + * leave it as it is, but not delete it. This is to prevent reutilization of + * IDs by other messages. + * + * The component name must match one of the entries defined in + * glfs-message-id.h. + */ + +GLFS_MSGID(INDEX, INDEX_MSG_INDEX_DIR_CREATE_FAILED, + INDEX_MSG_INDEX_READDIR_FAILED, INDEX_MSG_INDEX_ADD_FAILED, + INDEX_MSG_INDEX_DEL_FAILED, INDEX_MSG_DICT_SET_FAILED, + INDEX_MSG_INODE_CTX_GET_SET_FAILED, INDEX_MSG_INVALID_ARGS, + INDEX_MSG_FD_OP_FAILED, INDEX_MSG_WORKER_THREAD_CREATE_FAILED, + INDEX_MSG_INVALID_GRAPH); + +#endif /* !_INDEX_MESSAGES_H_ */ diff --git a/xlators/features/index/src/index.c b/xlators/features/index/src/index.c index 5de8d9b4668..4abb2c73ce5 100644 --- a/xlators/features/index/src/index.c +++ b/xlators/features/index/src/index.c @@ -8,1442 +8,2675 @@ cases as published by the Free Software Foundation. */ #include "index.h" -#include "options.h" +#include <glusterfs/options.h> #include "glusterfs3-xdr.h" -#include "syscall.h" +#include <glusterfs/syscall.h> +#include <glusterfs/syncop.h> +#include <glusterfs/common-utils.h> +#include "index-messages.h" +#include <ftw.h> +#include <libgen.h> /* for dirname() */ +#include <signal.h> #define XATTROP_SUBDIR "xattrop" +#define DIRTY_SUBDIR "dirty" +#define ENTRY_CHANGES_SUBDIR "entry-changes" -call_stub_t * -__index_dequeue (struct list_head *callstubs) +struct index_syncop_args { + inode_t *parent; + gf_dirent_t *entries; + char *path; +}; + +static char *index_vgfid_xattrs[XATTROP_TYPE_END] = { + [XATTROP] = GF_XATTROP_INDEX_GFID, + [DIRTY] = GF_XATTROP_DIRTY_GFID, + [ENTRY_CHANGES] = GF_XATTROP_ENTRY_CHANGES_GFID}; + +static char *index_subdirs[XATTROP_TYPE_END] = { + [XATTROP] = XATTROP_SUBDIR, + [DIRTY] = DIRTY_SUBDIR, + [ENTRY_CHANGES] = ENTRY_CHANGES_SUBDIR}; + +int +index_get_type_from_vgfid(index_priv_t *priv, uuid_t vgfid) { - call_stub_t *stub = NULL; + int i = 0; - if (!list_empty (callstubs)) { - stub = list_entry (callstubs->next, call_stub_t, list); - list_del_init (&stub->list); - } + for (i = 0; i < XATTROP_TYPE_END; i++) { + if (gf_uuid_compare(priv->internal_vgfid[i], vgfid) == 0) + return i; + } + return -1; +} - return stub; +gf_boolean_t +index_is_virtual_gfid(index_priv_t *priv, uuid_t vgfid) +{ + if (index_get_type_from_vgfid(priv, vgfid) < 0) + return _gf_false; + return _gf_true; } -static void -__index_enqueue (struct list_head *callstubs, call_stub_t *stub) +static int +__index_inode_ctx_get(inode_t *inode, xlator_t *this, index_inode_ctx_t **ctx) { - list_add_tail (&stub->list, callstubs); + int ret = 0; + index_inode_ctx_t *ictx = NULL; + uint64_t tmpctx = 0; + + ret = __inode_ctx_get(inode, this, &tmpctx); + if (!ret) { + ictx = (index_inode_ctx_t *)(long)tmpctx; + goto out; + } + ictx = GF_CALLOC(1, sizeof(*ictx), gf_index_inode_ctx_t); + if (!ictx) { + ret = -1; + goto out; + } + + INIT_LIST_HEAD(&ictx->callstubs); + ret = __inode_ctx_put(inode, this, (uint64_t)(uintptr_t)ictx); + if (ret) { + GF_FREE(ictx); + ictx = NULL; + goto out; + } +out: + if (ictx) + *ctx = ictx; + return ret; } -static void -worker_enqueue (xlator_t *this, call_stub_t *stub) +static int +index_inode_ctx_get(inode_t *inode, xlator_t *this, index_inode_ctx_t **ctx) { - index_priv_t *priv = NULL; + int ret = 0; - priv = this->private; - pthread_mutex_lock (&priv->mutex); - { - __index_enqueue (&priv->callstubs, stub); - pthread_cond_signal (&priv->cond); - } - pthread_mutex_unlock (&priv->mutex); + LOCK(&inode->lock); + { + ret = __index_inode_ctx_get(inode, this, ctx); + } + UNLOCK(&inode->lock); + + return ret; } -void * -index_worker (void *data) -{ - index_priv_t *priv = NULL; - xlator_t *this = NULL; - call_stub_t *stub = NULL; - int ret = 0; - - THIS = data; - this = data; - priv = this->private; - - for (;;) { - pthread_mutex_lock (&priv->mutex); - { - while (list_empty (&priv->callstubs)) { - ret = pthread_cond_wait (&priv->cond, - &priv->mutex); - } - - stub = __index_dequeue (&priv->callstubs); - } - pthread_mutex_unlock (&priv->mutex); +static gf_boolean_t +index_is_subdir_of_entry_changes(xlator_t *this, inode_t *inode) +{ + index_inode_ctx_t *ctx = NULL; + int ret = 0; - if (stub) /* guard against spurious wakeups */ - call_resume (stub); - } + if (!inode) + return _gf_false; - return NULL; + ret = index_inode_ctx_get(inode, this, &ctx); + if ((ret == 0) && !gf_uuid_is_null(ctx->virtual_pargfid)) + return _gf_true; + return _gf_false; } -int -__index_inode_ctx_get (inode_t *inode, xlator_t *this, index_inode_ctx_t **ctx) + +static int +index_get_type_from_vgfid_xattr(const char *name) { - int ret = 0; - index_inode_ctx_t *ictx = NULL; - uint64_t tmpctx = 0; + int i = 0; - ret = __inode_ctx_get (inode, this, &tmpctx); - if (!ret) { - ictx = (index_inode_ctx_t*) (long) tmpctx; - goto out; - } - ictx = GF_CALLOC (1, sizeof (*ictx), gf_index_inode_ctx_t); - if (!ictx) { - ret = -1; - goto out; - } + for (i = 0; i < XATTROP_TYPE_END; i++) { + if (strcmp(name, index_vgfid_xattrs[i]) == 0) + return i; + } + return -1; +} - INIT_LIST_HEAD (&ictx->callstubs); - ret = __inode_ctx_put (inode, this, (uint64_t)ictx); - if (ret) { - GF_FREE (ictx); - ictx = NULL; - goto out; - } -out: - if (ictx) - *ctx = ictx; - return ret; +gf_boolean_t +index_is_fop_on_internal_inode(xlator_t *this, inode_t *inode, uuid_t gfid) +{ + index_priv_t *priv = this->private; + uuid_t vgfid = {0}; + + if (!inode) + return _gf_false; + + if (gfid && !gf_uuid_is_null(gfid)) + gf_uuid_copy(vgfid, gfid); + else + gf_uuid_copy(vgfid, inode->gfid); + + if (index_is_virtual_gfid(priv, vgfid)) + return _gf_true; + if (index_is_subdir_of_entry_changes(this, inode)) + return _gf_true; + return _gf_false; } -int -index_inode_ctx_get (inode_t *inode, xlator_t *this, index_inode_ctx_t **ctx) +static gf_boolean_t +index_is_vgfid_xattr(const char *name) { - int ret = 0; + if (index_get_type_from_vgfid_xattr(name) < 0) + return _gf_false; + return _gf_true; +} - LOCK (&inode->lock); - { - ret = __index_inode_ctx_get (inode, this, ctx); - } - UNLOCK (&inode->lock); +call_stub_t * +__index_dequeue(struct list_head *callstubs) +{ + call_stub_t *stub = NULL; + + if (!list_empty(callstubs)) { + stub = list_entry(callstubs->next, call_stub_t, list); + list_del_init(&stub->list); + } - return ret; + return stub; } static void -make_index_dir_path (char *base, const char *subdir, - char *index_dir, size_t len) +__index_enqueue(struct list_head *callstubs, call_stub_t *stub) { - snprintf (index_dir, len, "%s/%s", base, subdir); + list_add_tail(&stub->list, callstubs); } -int -index_dir_create (xlator_t *this, const char *subdir) -{ - int ret = 0; - struct stat st = {0}; - char fullpath[PATH_MAX] = {0}; - char path[PATH_MAX] = {0}; - char *dir = NULL; - index_priv_t *priv = NULL; - size_t len = 0; - size_t pathlen = 0; - - priv = this->private; - make_index_dir_path (priv->index_basepath, subdir, fullpath, - sizeof (fullpath)); - ret = sys_stat (fullpath, &st); - if (!ret) { - if (!S_ISDIR (st.st_mode)) - ret = -2; - goto out; +static void +worker_enqueue(xlator_t *this, call_stub_t *stub) +{ + index_priv_t *priv = NULL; + + priv = this->private; + pthread_mutex_lock(&priv->mutex); + { + __index_enqueue(&priv->callstubs, stub); + GF_ATOMIC_INC(priv->stub_cnt); + pthread_cond_signal(&priv->cond); + } + pthread_mutex_unlock(&priv->mutex); +} + +void * +index_worker(void *data) +{ + index_priv_t *priv = NULL; + xlator_t *this = NULL; + call_stub_t *stub = NULL; + gf_boolean_t bye = _gf_false; + + THIS = data; + this = data; + priv = this->private; + + for (;;) { + pthread_mutex_lock(&priv->mutex); + { + while (list_empty(&priv->callstubs)) { + if (priv->down) { + bye = _gf_true; /*Avoid wait*/ + break; + } + (void)pthread_cond_wait(&priv->cond, &priv->mutex); + if (priv->down) { + bye = _gf_true; + break; + } + } + if (!bye) + stub = __index_dequeue(&priv->callstubs); + if (bye) { + priv->curr_count--; + if (priv->curr_count == 0) + pthread_cond_broadcast(&priv->cond); + } } + pthread_mutex_unlock(&priv->mutex); - pathlen = strlen (fullpath); - if ((pathlen > 1) && fullpath[pathlen - 1] == '/') - fullpath[pathlen - 1] = '\0'; - dir = strchr (fullpath, '/'); - while (dir) { - dir = strchr (dir + 1, '/'); - if (dir) - len = pathlen - strlen (dir); - else - len = pathlen; - strncpy (path, fullpath, len); - path[len] = '\0'; - ret = sys_mkdir (path, 0600); - if (ret && (errno != EEXIST)) - goto out; + if (stub) { /* guard against spurious wakeups */ + call_resume(stub); + GF_ATOMIC_DEC(priv->stub_cnt); } - ret = 0; + stub = NULL; + if (bye) + break; + } + + return NULL; +} + +static void +make_index_dir_path(char *base, const char *subdir, char *index_dir, size_t len) +{ + snprintf(index_dir, len, "%s/%s", base, subdir); +} + +int +index_dir_create(xlator_t *this, const char *subdir) +{ + int ret = 0; + struct stat st = {0}; + char fullpath[PATH_MAX] = {0}; + char path[PATH_MAX] = {0}; + char *dir = NULL; + index_priv_t *priv = NULL; + size_t len = 0; + size_t pathlen = 0; + + priv = this->private; + make_index_dir_path(priv->index_basepath, subdir, fullpath, + sizeof(fullpath)); + ret = sys_stat(fullpath, &st); + if (!ret) { + if (!S_ISDIR(st.st_mode)) + ret = -2; + goto out; + } + + pathlen = strlen(fullpath); + if ((pathlen > 1) && fullpath[pathlen - 1] == '/') + fullpath[pathlen - 1] = '\0'; + dir = strchr(fullpath, '/'); + while (dir) { + dir = strchr(dir + 1, '/'); + if (dir) + len = pathlen - strlen(dir); + else + len = pathlen; + strncpy(path, fullpath, len); + path[len] = '\0'; + ret = sys_mkdir(path, 0600); + if (ret && (errno != EEXIST)) + goto out; + } + ret = 0; out: - if (ret == -1) { - gf_log (this->name, GF_LOG_ERROR, "%s/%s: Failed to " - "create (%s)", priv->index_basepath, subdir, - strerror (errno)); - } else if (ret == -2) { - gf_log (this->name, GF_LOG_ERROR, "%s/%s: Failed to create, " - "path exists, not a directory ", priv->index_basepath, - subdir); - } - return ret; + if (ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, errno, + INDEX_MSG_INDEX_DIR_CREATE_FAILED, + "%s/%s: Failed to " + "create", + priv->index_basepath, subdir); + } else if (ret == -2) { + gf_msg(this->name, GF_LOG_ERROR, ENOTDIR, + INDEX_MSG_INDEX_DIR_CREATE_FAILED, + "%s/%s: Failed to " + "create, path exists, not a directory ", + priv->index_basepath, subdir); + } + return ret; } void -index_get_index (index_priv_t *priv, uuid_t index) +index_get_index(index_priv_t *priv, uuid_t index) { - LOCK (&priv->lock); - { - gf_uuid_copy (index, priv->index); - } - UNLOCK (&priv->lock); + LOCK(&priv->lock); + { + gf_uuid_copy(index, priv->index); + } + UNLOCK(&priv->lock); } void -index_generate_index (index_priv_t *priv, uuid_t index) +index_generate_index(index_priv_t *priv, uuid_t index) { - LOCK (&priv->lock); - { - //To prevent duplicate generates. - //This method fails if number of contending threads is greater - //than MAX_LINK count of the fs - if (!gf_uuid_compare (priv->index, index)) - gf_uuid_generate (priv->index); - gf_uuid_copy (index, priv->index); - } - UNLOCK (&priv->lock); + LOCK(&priv->lock); + { + // To prevent duplicate generates. + // This method fails if number of contending threads is greater + // than MAX_LINK count of the fs + if (!gf_uuid_compare(priv->index, index)) + gf_uuid_generate(priv->index); + gf_uuid_copy(index, priv->index); + } + UNLOCK(&priv->lock); } static void -make_index_path (char *base, const char *subdir, uuid_t index, - char *index_path, size_t len) +make_index_path(char *base, const char *subdir, uuid_t index, char *index_path, + size_t len) { - make_index_dir_path (base, subdir, index_path, len); - snprintf (index_path + strlen (index_path), len - strlen (index_path), - "/%s-%s", subdir, uuid_utoa (index)); + make_index_dir_path(base, subdir, index_path, len); + snprintf(index_path + strlen(index_path), len - strlen(index_path), + "/%s-%s", subdir, uuid_utoa(index)); } static void -make_gfid_path (char *base, const char *subdir, uuid_t gfid, - char *gfid_path, size_t len) +make_gfid_path(char *base, const char *subdir, uuid_t gfid, char *gfid_path, + size_t len) { - make_index_dir_path (base, subdir, gfid_path, len); - snprintf (gfid_path + strlen (gfid_path), len - strlen (gfid_path), - "/%s", uuid_utoa (gfid)); + make_index_dir_path(base, subdir, gfid_path, len); + snprintf(gfid_path + strlen(gfid_path), len - strlen(gfid_path), "/%s", + uuid_utoa(gfid)); } static void -make_file_path (char *base, const char *subdir, const char *filename, - char *file_path, size_t len) +make_file_path(char *base, const char *subdir, const char *filename, + char *file_path, size_t len) { - make_index_dir_path (base, subdir, file_path, len); - snprintf (file_path + strlen (file_path), len - strlen (file_path), - "/%s", filename); + make_index_dir_path(base, subdir, file_path, len); + snprintf(file_path + strlen(file_path), len - strlen(file_path), "/%s", + filename); } static int -is_index_file_current (char *filename, uuid_t priv_index) +is_index_file_current(char *filename, uuid_t priv_index, char *subdir) { - char current_index[GF_UUID_BUF_SIZE + 16] = {0, }; + char current_index[GF_UUID_BUF_SIZE + 16] = { + 0, + }; - snprintf (current_index, sizeof current_index, - "xattrop-%s", uuid_utoa(priv_index)); - return (!strcmp(filename, current_index)); + snprintf(current_index, sizeof current_index, "%s-%s", subdir, + uuid_utoa(priv_index)); + return (!strcmp(filename, current_index)); } static void -check_delete_stale_index_file (xlator_t *this, char *filename) +check_delete_stale_index_file(xlator_t *this, char *filename, char *subdir) { - int ret = 0; - struct stat st = {0}; - char filepath[PATH_MAX] = {0}; - index_priv_t *priv = NULL; + int ret = 0; + struct stat st = {0}; + char filepath[PATH_MAX] = {0}; + index_priv_t *priv = NULL; - priv = this->private; + priv = this->private; - if (is_index_file_current (filename, priv->index)) - return; + if (is_index_file_current(filename, priv->index, subdir)) + return; - make_file_path (priv->index_basepath, XATTROP_SUBDIR, - filename, filepath, sizeof (filepath)); - ret = sys_stat (filepath, &st); - if (!ret && st.st_nlink == 1) - sys_unlink (filepath); + make_file_path(priv->index_basepath, subdir, filename, filepath, + sizeof(filepath)); + ret = sys_stat(filepath, &st); + if (!ret && st.st_nlink == 1) + sys_unlink(filepath); +} + +static void +index_set_link_count(index_priv_t *priv, int64_t count, + index_xattrop_type_t type) +{ + switch (type) { + case XATTROP: + LOCK(&priv->lock); + { + priv->pending_count = count; + } + UNLOCK(&priv->lock); + break; + default: + break; + } +} + +static void +index_get_link_count(index_priv_t *priv, int64_t *count, + index_xattrop_type_t type) +{ + switch (type) { + case XATTROP: + LOCK(&priv->lock); + { + *count = priv->pending_count; + } + UNLOCK(&priv->lock); + break; + default: + break; + } +} + +static void +index_dec_link_count(index_priv_t *priv, index_xattrop_type_t type) +{ + switch (type) { + case XATTROP: + LOCK(&priv->lock); + { + priv->pending_count--; + if (priv->pending_count == 0) + priv->pending_count--; + } + UNLOCK(&priv->lock); + break; + default: + break; + } +} + +char * +index_get_subdir_from_type(index_xattrop_type_t type) +{ + if (type < XATTROP || type >= XATTROP_TYPE_END) + return NULL; + return index_subdirs[type]; +} + +char * +index_get_subdir_from_vgfid(index_priv_t *priv, uuid_t vgfid) +{ + return index_get_subdir_from_type(index_get_type_from_vgfid(priv, vgfid)); } static int -index_fill_readdir (fd_t *fd, index_fd_ctx_t *fctx, DIR *dir, off_t off, - size_t size, gf_dirent_t *entries) -{ - off_t in_case = -1; - off_t last_off = 0; - size_t filled = 0; - int count = 0; - char entrybuf[sizeof(struct dirent) + 256 + 8]; - struct dirent *entry = NULL; - int32_t this_size = -1; - gf_dirent_t *this_entry = NULL; - xlator_t *this = NULL; - - this = THIS; - if (!off) { - rewinddir (dir); - } else { - seekdir (dir, off); +index_fill_readdir(fd_t *fd, index_fd_ctx_t *fctx, DIR *dir, off_t off, + size_t size, gf_dirent_t *entries) +{ + off_t in_case = -1; + off_t last_off = 0; + size_t filled = 0; + int count = 0; + struct dirent *entry = NULL; + struct dirent scratch[2] = { + { + 0, + }, + }; + int32_t this_size = -1; + gf_dirent_t *this_entry = NULL; + xlator_t *this = NULL; + + this = THIS; + if (!off) { + rewinddir(dir); + } else { + seekdir(dir, off); #ifndef GF_LINUX_HOST_OS - if ((u_long)telldir(dir) != off && off != fctx->dir_eof) { - gf_log (THIS->name, GF_LOG_ERROR, - "seekdir(0x%llx) failed on dir=%p: " - "Invalid argument (offset reused from " - "another DIR * structure?)", off, dir); - errno = EINVAL; - count = -1; - goto out; - } -#endif /* GF_LINUX_HOST_OS */ + if ((u_long)telldir(dir) != off && off != fctx->dir_eof) { + gf_msg(THIS->name, GF_LOG_ERROR, EINVAL, + INDEX_MSG_INDEX_READDIR_FAILED, + "seekdir(0x%llx) failed on dir=%p: " + "Invalid argument (offset reused from " + "another DIR * structure?)", + off, dir); + errno = EINVAL; + count = -1; + goto out; } +#endif /* GF_LINUX_HOST_OS */ + } - while (filled <= size) { - in_case = (u_long)telldir (dir); + while (filled <= size) { + in_case = (u_long)telldir(dir); - if (in_case == -1) { - gf_log (THIS->name, GF_LOG_ERROR, - "telldir failed on dir=%p: %s", - dir, strerror (errno)); - goto out; - } + if (in_case == -1) { + gf_msg(THIS->name, GF_LOG_ERROR, errno, + INDEX_MSG_INDEX_READDIR_FAILED, "telldir failed on dir=%p", + dir); + goto out; + } - errno = 0; - entry = NULL; - readdir_r (dir, (struct dirent *)entrybuf, &entry); - - if (!entry) { - if (errno == EBADF) { - gf_log (THIS->name, GF_LOG_WARNING, - "readdir failed on dir=%p: %s", - dir, strerror (errno)); - goto out; - } - break; - } + errno = 0; + entry = sys_readdir(dir, scratch); + if (!entry || errno != 0) { + if (errno == EBADF) { + gf_msg(THIS->name, GF_LOG_WARNING, errno, + INDEX_MSG_INDEX_READDIR_FAILED, + "readdir failed on dir=%p", dir); + goto out; + } + break; + } - if (!strncmp (entry->d_name, XATTROP_SUBDIR"-", - strlen (XATTROP_SUBDIR"-"))) { - check_delete_stale_index_file (this, entry->d_name); - continue; - } + if (!strncmp(entry->d_name, XATTROP_SUBDIR "-", + strlen(XATTROP_SUBDIR "-"))) { + check_delete_stale_index_file(this, entry->d_name, XATTROP_SUBDIR); + continue; + } else if (!strncmp(entry->d_name, DIRTY_SUBDIR "-", + strlen(DIRTY_SUBDIR "-"))) { + check_delete_stale_index_file(this, entry->d_name, DIRTY_SUBDIR); + continue; + } - this_size = max (sizeof (gf_dirent_t), - sizeof (gfs3_dirplist)) - + strlen (entry->d_name) + 1; + this_size = max(sizeof(gf_dirent_t), sizeof(gfs3_dirplist)) + + strlen(entry->d_name) + 1; - if (this_size + filled > size) { - seekdir (dir, in_case); + if (this_size + filled > size) { + seekdir(dir, in_case); #ifndef GF_LINUX_HOST_OS - if ((u_long)telldir(dir) != in_case && - in_case != fctx->dir_eof) { - gf_log (THIS->name, GF_LOG_ERROR, - "seekdir(0x%llx) failed on dir=%p: " - "Invalid argument (offset reused from " - "another DIR * structure?)", - in_case, dir); - errno = EINVAL; - count = -1; - goto out; - } + if ((u_long)telldir(dir) != in_case && in_case != fctx->dir_eof) { + gf_msg(THIS->name, GF_LOG_ERROR, EINVAL, + INDEX_MSG_INDEX_READDIR_FAILED, + "seekdir(0x%llx) failed on dir=%p: " + "Invalid argument (offset reused from " + "another DIR * structure?)", + in_case, dir); + errno = EINVAL; + count = -1; + goto out; + } #endif /* GF_LINUX_HOST_OS */ - break; - } - - this_entry = gf_dirent_for_name (entry->d_name); - - if (!this_entry) { - gf_log (THIS->name, GF_LOG_ERROR, - "could not create gf_dirent for entry %s: (%s)", - entry->d_name, strerror (errno)); - goto out; - } - /* - * we store the offset of next entry here, which is - * probably not intended, but code using syncop_readdir() - * (glfs-heal.c, afr-self-heald.c, pump.c) rely on it - * for directory read resumption. - */ - last_off = (u_long)telldir(dir); - this_entry->d_off = last_off; - this_entry->d_ino = entry->d_ino; - - list_add_tail (&this_entry->list, &entries->list); - - filled += this_size; - count ++; + break; } - if ((!sys_readdir (dir) && (errno == 0))) { - /* Indicate EOF */ - errno = ENOENT; - /* Remember EOF offset for later detection */ - fctx->dir_eof = last_off; + this_entry = gf_dirent_for_name(entry->d_name); + + if (!this_entry) { + gf_msg(THIS->name, GF_LOG_ERROR, errno, + INDEX_MSG_INDEX_READDIR_FAILED, + "could not create gf_dirent for entry %s", entry->d_name); + goto out; } + /* + * we store the offset of next entry here, which is + * probably not intended, but code using syncop_readdir() + * (glfs-heal.c, afr-self-heald.c, pump.c) rely on it + * for directory read resumption. + */ + last_off = (u_long)telldir(dir); + this_entry->d_off = last_off; + this_entry->d_ino = entry->d_ino; + + list_add_tail(&this_entry->list, &entries->list); + + filled += this_size; + count++; + } + + errno = 0; + + if ((!sys_readdir(dir, scratch) && (errno == 0))) { + /* Indicate EOF */ + errno = ENOENT; + /* Remember EOF offset for later detection */ + fctx->dir_eof = last_off; + } out: - return count; + return count; } int -index_add (xlator_t *this, uuid_t gfid, const char *subdir) -{ - int32_t op_errno = 0; - char gfid_path[PATH_MAX] = {0}; - char index_path[PATH_MAX] = {0}; - int ret = 0; - uuid_t index = {0}; - index_priv_t *priv = NULL; - struct stat st = {0}; - int fd = 0; - - priv = this->private; - GF_ASSERT_AND_GOTO_WITH_ERROR (this->name, !gf_uuid_is_null (gfid), - out, op_errno, EINVAL); - - make_gfid_path (priv->index_basepath, subdir, gfid, - gfid_path, sizeof (gfid_path)); - - ret = sys_stat (gfid_path, &st); - if (!ret) - goto out; - index_get_index (priv, index); - make_index_path (priv->index_basepath, subdir, - index, index_path, sizeof (index_path)); - ret = sys_link (index_path, gfid_path); - if (!ret || (errno == EEXIST)) { - ret = 0; - goto out; - } +index_link_to_base(xlator_t *this, char *fpath, const char *subdir) +{ + int ret = 0; + int fd = 0; + int op_errno = 0; + uuid_t index = {0}; + index_priv_t *priv = this->private; + char base[PATH_MAX] = {0}; + + index_get_index(priv, index); + make_index_path(priv->index_basepath, subdir, index, base, sizeof(base)); + + ret = sys_link(base, fpath); + if (!ret || (errno == EEXIST)) { + ret = 0; + goto out; + } - op_errno = errno; - if (op_errno == ENOENT) { - ret = index_dir_create (this, subdir); - if (ret) - goto out; - } else if (op_errno == EMLINK) { - index_generate_index (priv, index); - make_index_path (priv->index_basepath, subdir, - index, index_path, sizeof (index_path)); - } else { - goto out; + op_errno = errno; + if (op_errno == ENOENT) { + ret = index_dir_create(this, subdir); + if (ret) { + op_errno = errno; + goto out; } + } else if (op_errno == EMLINK) { + index_generate_index(priv, index); + make_index_path(priv->index_basepath, subdir, index, base, + sizeof(base)); + } else { + goto out; + } + + op_errno = 0; + fd = sys_creat(base, 0); + if ((fd < 0) && (errno != EEXIST)) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, op_errno, INDEX_MSG_INDEX_ADD_FAILED, + "%s: Not able to " + "create index", + fpath); + goto out; + } + + if (fd >= 0) + sys_close(fd); + + ret = sys_link(base, fpath); + if (ret && (errno != EEXIST)) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, INDEX_MSG_INDEX_ADD_FAILED, + "%s: Not able to " + "add to index", + fpath); + goto out; + } +out: + return -op_errno; +} - fd = sys_creat (index_path, 0); - if ((fd < 0) && (errno != EEXIST)) { - ret = -1; - gf_log (this->name, GF_LOG_ERROR, "%s: Not able to " - "create index (%s)", uuid_utoa (gfid), - strerror (errno)); - goto out; - } +int +index_add(xlator_t *this, uuid_t gfid, const char *subdir, + index_xattrop_type_t type) +{ + char gfid_path[PATH_MAX] = {0}; + int ret = -1; + index_priv_t *priv = NULL; + struct stat st = {0}; - if (fd >= 0) - sys_close (fd); + priv = this->private; - ret = sys_link (index_path, gfid_path); - if (ret && (errno != EEXIST)) { - gf_log (this->name, GF_LOG_ERROR, "%s: Not able to " - "add to index (%s)", uuid_utoa (gfid), - strerror (errno)); - goto out; - } + if (gf_uuid_is_null(gfid)) { + GF_ASSERT(0); + goto out; + } - ret = 0; + make_gfid_path(priv->index_basepath, subdir, gfid, gfid_path, + sizeof(gfid_path)); + + ret = sys_stat(gfid_path, &st); + if (!ret) + goto out; + ret = index_link_to_base(this, gfid_path, subdir); out: - return ret; + return ret; } int -index_del (xlator_t *this, uuid_t gfid, const char *subdir) -{ - int32_t op_errno __attribute__((unused)) = 0; - index_priv_t *priv = NULL; - int ret = 0; - char gfid_path[PATH_MAX] = {0}; - - priv = this->private; - GF_ASSERT_AND_GOTO_WITH_ERROR (this->name, !gf_uuid_is_null (gfid), - out, op_errno, EINVAL); - make_gfid_path (priv->index_basepath, subdir, gfid, - gfid_path, sizeof (gfid_path)); - ret = sys_unlink (gfid_path); - if (ret && (errno != ENOENT)) { - gf_log (this->name, GF_LOG_ERROR, - "%s: failed to delete from index (%s)", - gfid_path, strerror (errno)); - ret = -errno; - goto out; +index_del(xlator_t *this, uuid_t gfid, const char *subdir, int type) +{ + int32_t op_errno __attribute__((unused)) = 0; + index_priv_t *priv = NULL; + int ret = 0; + char gfid_path[PATH_MAX] = {0}; + char rename_dst[PATH_MAX] = { + 0, + }; + uuid_t uuid; + + priv = this->private; + GF_ASSERT_AND_GOTO_WITH_ERROR(this->name, !gf_uuid_is_null(gfid), out, + op_errno, EINVAL); + make_gfid_path(priv->index_basepath, subdir, gfid, gfid_path, + sizeof(gfid_path)); + + if ((strcmp(subdir, ENTRY_CHANGES_SUBDIR)) == 0) { + ret = sys_rmdir(gfid_path); + /* rmdir above could fail with ENOTEMPTY if the indices under + * it were created when granular-entry-heal was enabled, whereas + * the actual heal that happened was non-granular (or full) in + * nature, resulting in name indices getting left out. To + * clean up this directory without it affecting the IO path perf, + * the directory is renamed to a unique name under + * indices/entry-changes. Self-heal will pick up this entry + * during crawl and on lookup into the file system figure that + * the index is stale and subsequently wipe it out using rmdir(). + */ + if ((ret) && (errno == ENOTEMPTY)) { + gf_uuid_generate(uuid); + make_gfid_path(priv->index_basepath, subdir, uuid, rename_dst, + sizeof(rename_dst)); + ret = sys_rename(gfid_path, rename_dst); } - ret = 0; + } else { + ret = sys_unlink(gfid_path); + } + + if (ret && (errno != ENOENT)) { + gf_msg(this->name, GF_LOG_ERROR, errno, INDEX_MSG_INDEX_DEL_FAILED, + "%s: failed to delete" + " from index", + gfid_path); + ret = -errno; + goto out; + } + + index_dec_link_count(priv, type); + ret = 0; out: - return ret; + return ret; +} + +static gf_boolean_t +_is_xattr_in_watchlist(dict_t *d, char *k, data_t *v, void *tmp) +{ + if (!strncmp(k, tmp, strlen(k))) + return _gf_true; + + return _gf_false; +} + +static gf_boolean_t +is_xattr_in_watchlist(dict_t *this, char *key, data_t *value, void *matchdata) +{ + int ret = -1; + + // matchdata is a list of xattrs + // key is strncmp'ed with each xattr in matchdata. + // ret will be 0 if key pattern is not present in the matchdata + // else ret will be count number of xattrs the key pattern-matches with. + ret = dict_foreach_match(matchdata, _is_xattr_in_watchlist, key, + dict_null_foreach_fn, NULL); + + if (ret > 0) + return _gf_true; + return _gf_false; } static int -_check_key_is_zero_filled (dict_t *d, char *k, data_t *v, - void *tmp) +index_find_xattr_type(dict_t *d, char *k, data_t *v) { - if (mem_0filled ((const char*)v->data, v->len)) { - /* -1 means, no more iterations, treat as 'break' */ - return -1; - } + int idx = -1; + index_priv_t *priv = THIS->private; + + if (priv->dirty_watchlist && + is_xattr_in_watchlist(d, k, v, priv->dirty_watchlist)) + idx = DIRTY; + else if (priv->pending_watchlist && + is_xattr_in_watchlist(d, k, v, priv->pending_watchlist)) + idx = XATTROP; + + return idx; +} + +int +index_fill_zero_array(dict_t *d, char *k, data_t *v, void *adata) +{ + int idx = -1; + int *zfilled = adata; + // zfilled array contains `state` for all types xattrs. + // state : whether the gfid file of this file exists in + // corresponding xattr directory or not. + + idx = index_find_xattr_type(d, k, v); + if (idx == -1) return 0; + zfilled[idx] = 0; + return 0; } -void -_index_action (xlator_t *this, inode_t *inode, gf_boolean_t zero_xattr) +static int +_check_key_is_zero_filled(dict_t *d, char *k, data_t *v, void *tmp) { - int ret = 0; - index_inode_ctx_t *ctx = NULL; + int *zfilled = tmp; + int idx = -1; - ret = index_inode_ctx_get (inode, this, &ctx); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Not able to %s %s -> index", - zero_xattr?"del":"add", uuid_utoa (inode->gfid)); - goto out; + idx = index_find_xattr_type(d, k, v); + if (idx == -1) + return 0; + + /* Along with checking that the value of a key is zero filled + * the key's corresponding index should be assigned + * appropriate value. + * zfilled[idx] will be 0(false) if value not zero. + * will be 1(true) if value is zero. + */ + if (mem_0filled((const char *)v->data, v->len)) { + zfilled[idx] = 0; + return 0; + } + + /* If zfilled[idx] was previously 0, it means at least + * one xattr of its "kind" is non-zero. Keep its value + * the same. + */ + if (zfilled[idx]) + zfilled[idx] = 1; + return 0; +} + +int +index_entry_create(xlator_t *this, inode_t *inode, char *filename) +{ + int ret = -1; + int op_errno = 0; + char pgfid_path[PATH_MAX] = {0}; + char entry_path[PATH_MAX] = {0}; + index_priv_t *priv = NULL; + index_inode_ctx_t *ctx = NULL; + int32_t len = 0; + + priv = this->private; + + GF_ASSERT_AND_GOTO_WITH_ERROR(this->name, !gf_uuid_is_null(inode->gfid), + out, op_errno, EINVAL); + GF_ASSERT_AND_GOTO_WITH_ERROR(this->name, filename, out, op_errno, EINVAL); + + ret = index_inode_ctx_get(inode, this, &ctx); + if (ret) { + op_errno = EINVAL; + gf_msg(this->name, GF_LOG_ERROR, op_errno, + INDEX_MSG_INODE_CTX_GET_SET_FAILED, + "Not able to get inode ctx for %s", uuid_utoa(inode->gfid)); + goto out; + } + + make_gfid_path(priv->index_basepath, ENTRY_CHANGES_SUBDIR, inode->gfid, + pgfid_path, sizeof(pgfid_path)); + + if (ctx->state[ENTRY_CHANGES] != IN) { + ret = sys_mkdir(pgfid_path, 0600); + if (ret != 0 && errno != EEXIST) { + op_errno = errno; + goto out; } - if (zero_xattr) { - if (ctx->state == NOTIN) - goto out; - ret = index_del (this, inode->gfid, XATTROP_SUBDIR); - if (!ret) - ctx->state = NOTIN; - } else { - if (ctx->state == IN) - goto out; - ret = index_add (this, inode->gfid, XATTROP_SUBDIR); - if (!ret) - ctx->state = IN; + ctx->state[ENTRY_CHANGES] = IN; + } + + if (strchr(filename, '/')) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, INDEX_MSG_INDEX_ADD_FAILED, + "Got invalid entry (%s) for pargfid path (%s)", filename, + pgfid_path); + op_errno = EINVAL; + goto out; + } + + len = snprintf(entry_path, sizeof(entry_path), "%s/%s", pgfid_path, + filename); + if ((len < 0) || (len >= sizeof(entry_path))) { + op_errno = EINVAL; + goto out; + } + + op_errno = 0; + + ret = index_link_to_base(this, entry_path, ENTRY_CHANGES_SUBDIR); +out: + if (op_errno) + ret = -op_errno; + return ret; +} + +int +index_entry_delete(xlator_t *this, uuid_t pgfid, char *filename) +{ + int ret = 0; + int op_errno = 0; + char pgfid_path[PATH_MAX] = {0}; + char entry_path[PATH_MAX] = {0}; + index_priv_t *priv = NULL; + int32_t len = 0; + + priv = this->private; + + GF_ASSERT_AND_GOTO_WITH_ERROR(this->name, !gf_uuid_is_null(pgfid), out, + op_errno, EINVAL); + GF_ASSERT_AND_GOTO_WITH_ERROR(this->name, filename, out, op_errno, EINVAL); + + make_gfid_path(priv->index_basepath, ENTRY_CHANGES_SUBDIR, pgfid, + pgfid_path, sizeof(pgfid_path)); + + if (strchr(filename, '/')) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, INDEX_MSG_INDEX_DEL_FAILED, + "Got invalid entry (%s) for pargfid path (%s)", filename, + pgfid_path); + op_errno = EINVAL; + goto out; + } + + len = snprintf(entry_path, sizeof(entry_path), "%s/%s", pgfid_path, + filename); + if ((len < 0) || (len >= sizeof(entry_path))) { + op_errno = EINVAL; + goto out; + } + + ret = sys_unlink(entry_path); + if (ret && (errno != ENOENT)) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, op_errno, INDEX_MSG_INDEX_DEL_FAILED, + "%s: failed to delete from index/entry-changes", entry_path); + } + +out: + return -op_errno; +} + +int +index_entry_action(xlator_t *this, inode_t *inode, dict_t *xdata, char *key) +{ + int ret = 0; + char *filename = NULL; + + ret = dict_get_str(xdata, key, &filename); + if (ret != 0) { + ret = 0; + goto out; + } + + if (strcmp(key, GF_XATTROP_ENTRY_IN_KEY) == 0) + ret = index_entry_create(this, inode, filename); + else if (strcmp(key, GF_XATTROP_ENTRY_OUT_KEY) == 0) + ret = index_entry_delete(this, inode->gfid, filename); + +out: + return ret; +} + +void +_index_action(xlator_t *this, inode_t *inode, int *zfilled) +{ + int ret = 0; + int i = 0; + index_inode_ctx_t *ctx = NULL; + char *subdir = NULL; + + ret = index_inode_ctx_get(inode, this, &ctx); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, + INDEX_MSG_INODE_CTX_GET_SET_FAILED, + "Not able to get" + " inode context for %s.", + uuid_utoa(inode->gfid)); + goto out; + } + + for (i = 0; i < XATTROP_TYPE_END; i++) { + subdir = index_get_subdir_from_type(i); + if (zfilled[i] == 1) { + if (ctx->state[i] == NOTIN) + continue; + ret = index_del(this, inode->gfid, subdir, i); + if (!ret) + ctx->state[i] = NOTIN; + } else if (zfilled[i] == 0) { + if (ctx->state[i] == IN) + continue; + ret = index_add(this, inode->gfid, subdir, i); + if (!ret) + ctx->state[i] = IN; } + } out: - return; + return; } -static gf_boolean_t -is_xattr_in_watchlist (dict_t *this, char *key, data_t *value, void *matchdata) +static void +index_init_state(xlator_t *this, inode_t *inode, index_inode_ctx_t *ctx, + char *subdir) { - if (dict_get (matchdata, key)) - return _gf_true; + int ret = -1; + char pgfid_path[PATH_MAX] = {0}; + struct stat st = {0}; + index_priv_t *priv = NULL; - return _gf_false; + priv = this->private; + + make_gfid_path(priv->index_basepath, subdir, inode->gfid, pgfid_path, + sizeof(pgfid_path)); + + ret = sys_stat(pgfid_path, &st); + if (ret == 0) + ctx->state[ENTRY_CHANGES] = IN; + else if (ret != 0 && errno == ENOENT) + ctx->state[ENTRY_CHANGES] = NOTIN; + + return; } void -xattrop_index_action (xlator_t *this, inode_t *inode, dict_t *xattr, - dict_match_t match, void *match_data) +xattrop_index_action(xlator_t *this, index_local_t *local, dict_t *xattr, + dict_match_t match, void *match_data) { - gf_boolean_t zero_xattr = _gf_true; - int ret = 0; + int ret = 0; + int zfilled[XATTROP_TYPE_END] = { + 0, + }; + int8_t value = 0; + char *subdir = NULL; + dict_t *req_xdata = NULL; + inode_t *inode = NULL; + index_inode_ctx_t *ctx = NULL; + + inode = local->inode; + req_xdata = local->xdata; + + memset(zfilled, -1, sizeof(zfilled)); + ret = dict_foreach_match(xattr, match, match_data, + _check_key_is_zero_filled, zfilled); + _index_action(this, inode, zfilled); + + if (req_xdata) { + ret = index_entry_action(this, inode, req_xdata, + GF_XATTROP_ENTRY_OUT_KEY); + + ret = dict_get_int8(req_xdata, GF_XATTROP_PURGE_INDEX, &value); + if ((ret) || (value == 0)) + goto out; + } + + if (zfilled[XATTROP] != 1) + goto out; + + if (inode->ia_type != IA_IFDIR) + goto out; + + subdir = index_get_subdir_from_type(ENTRY_CHANGES); + ret = index_inode_ctx_get(inode, this, &ctx); + if (ctx->state[ENTRY_CHANGES] == UNKNOWN) + index_init_state(this, inode, ctx, subdir); + if (ctx->state[ENTRY_CHANGES] == IN) { + ret = index_del(this, inode->gfid, subdir, ENTRY_CHANGES); + ctx->state[ENTRY_CHANGES] = NOTIN; + } - ret = dict_foreach_match (xattr, match, match_data, - _check_key_is_zero_filled, NULL); - if (ret == -1) - zero_xattr = _gf_false; - _index_action (this, inode, zero_xattr); - return; +out: + return; } static gf_boolean_t -index_xattrop_track (xlator_t *this, gf_xattrop_flags_t flags, dict_t *dict) +index_xattrop_track(xlator_t *this, gf_xattrop_flags_t flags, dict_t *dict) { - index_priv_t *priv = this->private; + index_priv_t *priv = this->private; - if (flags == GF_XATTROP_ADD_ARRAY) - return _gf_true; + if (flags == GF_XATTROP_ADD_ARRAY) + return _gf_true; - if (flags != GF_XATTROP_ADD_ARRAY64) - return _gf_false; + if (flags != GF_XATTROP_ADD_ARRAY64) + return _gf_false; - if (!priv->xattrop64_watchlist) - return _gf_false; + if (!priv->pending_watchlist) + return _gf_false; - if (dict_foreach_match (dict, is_xattr_in_watchlist, - priv->xattrop64_watchlist, dict_null_foreach_fn, - NULL) > 0) - return _gf_true; + if (dict_foreach_match(dict, is_xattr_in_watchlist, priv->pending_watchlist, + dict_null_foreach_fn, NULL) > 0) + return _gf_true; - return _gf_false; + return _gf_false; } int -__index_fd_ctx_get (fd_t *fd, xlator_t *this, index_fd_ctx_t **ctx) +index_inode_path(xlator_t *this, inode_t *inode, char *dirpath, size_t len) { - int ret = 0; - index_fd_ctx_t *fctx = NULL; - uint64_t tmpctx = 0; - char index_dir[PATH_MAX] = {0}; - index_priv_t *priv = NULL; - - priv = this->private; - if (gf_uuid_compare (fd->inode->gfid, priv->xattrop_vgfid)) { - ret = -EINVAL; - goto out; - } - - ret = __fd_ctx_get (fd, this, &tmpctx); - if (!ret) { - fctx = (index_fd_ctx_t*) (long) tmpctx; - goto out; + char *subdir = NULL; + int ret = 0; + index_priv_t *priv = NULL; + index_inode_ctx_t *ictx = NULL; + + priv = this->private; + if (!index_is_fop_on_internal_inode(this, inode, NULL)) { + ret = -EINVAL; + goto out; + } + + subdir = index_get_subdir_from_vgfid(priv, inode->gfid); + if (subdir) { + if (len <= strlen(priv->index_basepath) + 1 /*'/'*/ + strlen(subdir)) { + ret = -EINVAL; + goto out; } - - fctx = GF_CALLOC (1, sizeof (*fctx), gf_index_fd_ctx_t); - if (!fctx) { - ret = -ENOMEM; - goto out; + make_index_dir_path(priv->index_basepath, subdir, dirpath, len); + } else { + ret = index_inode_ctx_get(inode, this, &ictx); + if (ret) + goto out; + if (gf_uuid_is_null(ictx->virtual_pargfid)) { + ret = -EINVAL; + goto out; } - - make_index_dir_path (priv->index_basepath, XATTROP_SUBDIR, - index_dir, sizeof (index_dir)); - fctx->dir = sys_opendir (index_dir); - if (!fctx->dir) { - ret = -errno; - GF_FREE (fctx); - fctx = NULL; - goto out; + make_index_dir_path(priv->index_basepath, ENTRY_CHANGES_SUBDIR, dirpath, + len); + if (len <= strlen(dirpath) + 1 /*'/'*/ + SLEN(UUID0_STR)) { + ret = -EINVAL; + goto out; } - fctx->dir_eof = -1; + strcat(dirpath, "/"); + strcat(dirpath, uuid_utoa(ictx->virtual_pargfid)); + } +out: + return ret; +} - ret = __fd_ctx_set (fd, this, (uint64_t)(long)fctx); - if (ret) { - sys_closedir (fctx->dir); - GF_FREE (fctx); - fctx = NULL; - ret = -EINVAL; - goto out; - } +int +__index_fd_ctx_get(fd_t *fd, xlator_t *this, index_fd_ctx_t **ctx) +{ + int ret = 0; + index_fd_ctx_t *fctx = NULL; + uint64_t tmpctx = 0; + char dirpath[PATH_MAX] = {0}; + + ret = __fd_ctx_get(fd, this, &tmpctx); + if (!ret) { + fctx = (index_fd_ctx_t *)(long)tmpctx; + *ctx = fctx; + goto out; + } + + ret = index_inode_path(this, fd->inode, dirpath, sizeof(dirpath)); + if (ret) + goto out; + + fctx = GF_CALLOC(1, sizeof(*fctx), gf_index_fd_ctx_t); + if (!fctx) { + ret = -ENOMEM; + goto out; + } + + fctx->dir = sys_opendir(dirpath); + if (!fctx->dir) { + ret = -errno; + GF_FREE(fctx); + fctx = NULL; + goto out; + } + fctx->dir_eof = -1; + + ret = __fd_ctx_set(fd, this, (uint64_t)(long)fctx); + if (ret) { + (void)sys_closedir(fctx->dir); + GF_FREE(fctx); + fctx = NULL; + ret = -EINVAL; + goto out; + } + *ctx = fctx; out: - if (fctx) - *ctx = fctx; - return ret; + return ret; } int -index_fd_ctx_get (fd_t *fd, xlator_t *this, index_fd_ctx_t **ctx) +index_fd_ctx_get(fd_t *fd, xlator_t *this, index_fd_ctx_t **ctx) { - int ret = 0; - LOCK (&fd->lock); - { - ret = __index_fd_ctx_get (fd, this, ctx); - } - UNLOCK (&fd->lock); - return ret; + int ret = 0; + LOCK(&fd->lock); + { + ret = __index_fd_ctx_get(fd, this, ctx); + } + UNLOCK(&fd->lock); + return ret; } -//new - Not NULL means start a fop -//new - NULL means done processing the fop +// new - Not NULL means start a fop +// new - NULL means done processing the fop void -index_queue_process (xlator_t *this, inode_t *inode, call_stub_t *new) +index_queue_process(xlator_t *this, inode_t *inode, call_stub_t *new) { - call_stub_t *stub = NULL; - index_inode_ctx_t *ctx = NULL; - int ret = 0; - call_frame_t *frame = NULL; + call_stub_t *stub = NULL; + index_inode_ctx_t *ctx = NULL; + int ret = 0; + call_frame_t *frame = NULL; + + LOCK(&inode->lock); + { + ret = __index_inode_ctx_get(inode, this, &ctx); + if (ret) + goto unlock; - LOCK (&inode->lock); - { - ret = __index_inode_ctx_get (inode, this, &ctx); - if (ret) - goto unlock; - - if (new) { - __index_enqueue (&ctx->callstubs, new); - new = NULL; - } else { - ctx->processing = _gf_false; - } + if (new) { + __index_enqueue(&ctx->callstubs, new); + new = NULL; + } else { + ctx->processing = _gf_false; + } - if (!ctx->processing) { - stub = __index_dequeue (&ctx->callstubs); - if (stub) - ctx->processing = _gf_true; - else - ctx->processing = _gf_false; - } + if (!ctx->processing) { + stub = __index_dequeue(&ctx->callstubs); + if (stub) + ctx->processing = _gf_true; + else + ctx->processing = _gf_false; } + } unlock: - UNLOCK (&inode->lock); - - if (ret && new) { - frame = new->frame; - if (new->fop == GF_FOP_XATTROP) { - INDEX_STACK_UNWIND (xattrop, frame, -1, ENOMEM, - NULL, NULL); - } else if (new->fop == GF_FOP_FXATTROP) { - INDEX_STACK_UNWIND (fxattrop, frame, -1, ENOMEM, - NULL, NULL); - } - call_stub_destroy (new); - } else if (stub) { - call_resume (stub); + UNLOCK(&inode->lock); + + if (ret && new) { + frame = new->frame; + if (new->fop == GF_FOP_XATTROP) { + INDEX_STACK_UNWIND(xattrop, frame, -1, ENOMEM, NULL, NULL); + } else if (new->fop == GF_FOP_FXATTROP) { + INDEX_STACK_UNWIND(fxattrop, frame, -1, ENOMEM, NULL, NULL); } - return; + call_stub_destroy(new); + } else if (stub) { + call_resume(stub); + } + return; } static int -xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xattr, - dict_t *xdata, dict_match_t match, dict_t *matchdata) +xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *xattr, dict_t *xdata, dict_match_t match, + dict_t *matchdata) { - inode_t *inode = NULL; + inode_t *inode = NULL; + index_local_t *local = NULL; - inode = inode_ref (frame->local); - if (op_ret < 0) - goto out; + local = frame->local; + inode = inode_ref(local->inode); - xattrop_index_action (this, frame->local, xattr, match, matchdata); + if (op_ret < 0) + goto out; + + xattrop_index_action(this, local, xattr, match, matchdata); out: - INDEX_STACK_UNWIND (xattrop, frame, op_ret, op_errno, xattr, xdata); - index_queue_process (this, inode, NULL); - inode_unref (inode); + INDEX_STACK_UNWIND(xattrop, frame, op_ret, op_errno, xattr, xdata); + index_queue_process(this, inode, NULL); + inode_unref(inode); - return 0; + return 0; } int32_t -index_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xattr, - dict_t *xdata) +index_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xattr, + dict_t *xdata) { - return xattrop_cbk (frame, cookie, this, op_ret, op_errno, xattr, xdata, - dict_match_everything, NULL); + index_priv_t *priv = this->private; + + xattrop_cbk(frame, cookie, this, op_ret, op_errno, xattr, xdata, + is_xattr_in_watchlist, priv->complete_watchlist); + return 0; } int32_t -index_xattrop64_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xattr, - dict_t *xdata) +index_xattrop64_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xattr, + dict_t *xdata) { - index_priv_t *priv = this->private; + index_priv_t *priv = this->private; - return xattrop_cbk (frame, cookie, this, op_ret, op_errno, xattr, xdata, - is_xattr_in_watchlist, priv->xattrop64_watchlist); + return xattrop_cbk(frame, cookie, this, op_ret, op_errno, xattr, xdata, + is_xattr_in_watchlist, priv->pending_watchlist); } -int -index_xattrop_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, - gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) +void +index_xattrop_do(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) { - fop_xattrop_cbk_t cbk = NULL; - //In wind phase bring the gfid into index. This way if the brick crashes - //just after posix performs xattrop before _cbk reaches index xlator - //we will still have the gfid in index. - _index_action (this, frame->local, _gf_false); - - if (optype == GF_XATTROP_ADD_ARRAY) - cbk = index_xattrop_cbk; - else - cbk = index_xattrop64_cbk; - - STACK_WIND (frame, cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->xattrop, loc, optype, xattr, - xdata); - return 0; + int ret = -1; + int zfilled[XATTROP_TYPE_END] = { + 0, + }; + index_local_t *local = NULL; + fop_xattrop_cbk_t x_cbk = NULL; + + local = frame->local; + + if (optype == GF_XATTROP_ADD_ARRAY) + x_cbk = index_xattrop_cbk; + else + x_cbk = index_xattrop64_cbk; + + // In wind phase bring the gfid into index. This way if the brick crashes + // just after posix performs xattrop before _cbk reaches index xlator + // we will still have the gfid in index. + memset(zfilled, -1, sizeof(zfilled)); + + /* Foreach xattr, set corresponding index of zfilled to 1 + * zfilled[index] = 1 implies the xattr's value is zero filled + * and should be added in its corresponding subdir. + * + * zfilled should be set to 1 only for those index that + * exist in xattr variable. This is to distinguish + * between different types of volumes. + * For e.g., if the check is not made, + * zfilled[DIRTY] is set to 1 for EC volumes, + * index file will be tried to create in indices/dirty dir + * which doesn't exist for an EC volume. + */ + ret = dict_foreach(xattr, index_fill_zero_array, zfilled); + + _index_action(this, local->inode, zfilled); + if (xdata) + ret = index_entry_action(this, local->inode, xdata, + GF_XATTROP_ENTRY_IN_KEY); + if (ret < 0) { + x_cbk(frame, NULL, this, -1, -ret, NULL, NULL); + return; + } + + if (loc) + STACK_WIND(frame, x_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->xattrop, loc, optype, xattr, xdata); + else + STACK_WIND(frame, x_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fxattrop, fd, optype, xattr, xdata); } int -index_fxattrop_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd, - gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) +index_xattrop_wrapper(call_frame_t *frame, xlator_t *this, loc_t *loc, + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) { - fop_fxattrop_cbk_t cbk = NULL; - //In wind phase bring the gfid into index. This way if the brick crashes - //just after posix performs xattrop before _cbk reaches index xlator - //we will still have the gfid in index. - _index_action (this, frame->local, _gf_false); - - if (optype == GF_XATTROP_ADD_ARRAY) - cbk = index_xattrop_cbk; - else - cbk = index_xattrop64_cbk; + index_xattrop_do(frame, this, loc, NULL, optype, xattr, xdata); + return 0; +} - STACK_WIND (frame, cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fxattrop, fd, optype, xattr, - xdata); - return 0; +int +index_fxattrop_wrapper(call_frame_t *frame, xlator_t *this, fd_t *fd, + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) +{ + index_xattrop_do(frame, this, NULL, fd, optype, xattr, xdata); + return 0; } int32_t -index_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, - gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata) +index_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc, + gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata) { - call_stub_t *stub = NULL; - - if (!index_xattrop_track (this, flags, dict)) - goto out; - - frame->local = inode_ref (loc->inode); - stub = fop_xattrop_stub (frame, index_xattrop_wrapper, - loc, flags, dict, xdata); - if (!stub) { - INDEX_STACK_UNWIND (xattrop, frame, -1, ENOMEM, NULL, NULL); - return 0; - } - - index_queue_process (this, loc->inode, stub); + call_stub_t *stub = NULL; + index_local_t *local = NULL; + + if (!index_xattrop_track(this, flags, dict)) + goto out; + + local = mem_get0(this->local_pool); + if (!local) + goto err; + + frame->local = local; + local->inode = inode_ref(loc->inode); + if (xdata) + local->xdata = dict_ref(xdata); + stub = fop_xattrop_stub(frame, index_xattrop_wrapper, loc, flags, dict, + xdata); + +err: + if ((!local) || (!stub)) { + INDEX_STACK_UNWIND(xattrop, frame, -1, ENOMEM, NULL, NULL); return 0; + } + + index_queue_process(this, loc->inode, stub); + return 0; out: - STACK_WIND (frame, default_xattrop_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->xattrop, loc, flags, dict, xdata); - return 0; + STACK_WIND(frame, default_xattrop_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->xattrop, loc, flags, dict, xdata); + return 0; } int32_t -index_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd, - gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata) +index_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd, + gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata) { - call_stub_t *stub = NULL; - - if (!index_xattrop_track (this, flags, dict)) - goto out; - - frame->local = inode_ref (fd->inode); - stub = fop_fxattrop_stub (frame, index_fxattrop_wrapper, - fd, flags, dict, xdata); - if (!stub) { - INDEX_STACK_UNWIND (fxattrop, frame, -1, ENOMEM, NULL, xdata); - return 0; - } - - index_queue_process (this, fd->inode, stub); + call_stub_t *stub = NULL; + index_local_t *local = NULL; + + if (!index_xattrop_track(this, flags, dict)) + goto out; + + local = mem_get0(this->local_pool); + if (!local) + goto err; + + frame->local = local; + local->inode = inode_ref(fd->inode); + if (xdata) + local->xdata = dict_ref(xdata); + stub = fop_fxattrop_stub(frame, index_fxattrop_wrapper, fd, flags, dict, + xdata); + +err: + if ((!local) || (!stub)) { + INDEX_STACK_UNWIND(fxattrop, frame, -1, ENOMEM, NULL, xdata); return 0; + } + + index_queue_process(this, fd->inode, stub); + return 0; out: - STACK_WIND (frame, default_fxattrop_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fxattrop, fd, flags, dict, xdata); - return 0; + STACK_WIND(frame, default_fxattrop_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fxattrop, fd, flags, dict, xdata); + return 0; } uint64_t -index_entry_count (xlator_t *this, char *subdir) +index_entry_count(xlator_t *this, char *subdir) { - index_priv_t *priv = NULL; - char index_dir[PATH_MAX]; - DIR *dirp = NULL; - uint64_t count = 0; - struct dirent buf; - struct dirent *entry = NULL; + uint64_t count = 0; + index_priv_t *priv = NULL; + DIR *dirp = NULL; + struct dirent *entry = NULL; + struct dirent scratch[2] = { + { + 0, + }, + }; + char index_dir[PATH_MAX] = { + 0, + }; + + priv = this->private; - priv = this->private; + make_index_dir_path(priv->index_basepath, subdir, index_dir, + sizeof(index_dir)); - make_index_dir_path (priv->index_basepath, subdir, - index_dir, sizeof (index_dir)); + dirp = sys_opendir(index_dir); + if (!dirp) + return 0; - dirp = sys_opendir (index_dir); - if (!dirp) - return 0; + for (;;) { + errno = 0; + entry = sys_readdir(dirp, scratch); + if (!entry || errno != 0) + break; - while (readdir_r (dirp, &buf, &entry) == 0) { - if (!entry) - break; - if (!strcmp (entry->d_name, ".") || - !strcmp (entry->d_name, "..")) - continue; - if (!strncmp (entry->d_name, subdir, strlen (subdir))) - continue; - count++; - } - sys_closedir (dirp); + if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0) + continue; - return count; -} + if (!strncmp(entry->d_name, subdir, strlen(subdir))) + continue; + count++; + } + + (void)sys_closedir(dirp); + + return count; +} int32_t -index_getxattr_wrapper (call_frame_t *frame, xlator_t *this, - loc_t *loc, const char *name, dict_t *xdata) +index_getxattr_wrapper(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) { - index_priv_t *priv = NULL; - dict_t *xattr = NULL; - int ret = 0; - uint64_t count = 0; + index_priv_t *priv = NULL; + dict_t *xattr = NULL; + int ret = 0; + int vgfid_type = 0; + uint64_t count = 0; + + priv = this->private; + + xattr = dict_new(); + if (!xattr) { + ret = -ENOMEM; + goto done; + } + + vgfid_type = index_get_type_from_vgfid_xattr(name); + if (vgfid_type >= 0) { + ret = dict_set_static_bin(xattr, (char *)name, + priv->internal_vgfid[vgfid_type], + sizeof(priv->internal_vgfid[vgfid_type])); + if (ret) { + ret = -EINVAL; + gf_msg(this->name, GF_LOG_ERROR, -ret, INDEX_MSG_DICT_SET_FAILED, + "xattrop index " + "gfid set failed"); + goto done; + } + } - priv = this->private; + /* TODO: Need to check what kind of link-counts are needed for + * ENTRY-CHANGES before refactor of this block with array*/ + if (strcmp(name, GF_XATTROP_INDEX_COUNT) == 0) { + count = index_entry_count(this, XATTROP_SUBDIR); - xattr = dict_new (); - if (!xattr) { - ret = -ENOMEM; - goto done; + ret = dict_set_uint64(xattr, (char *)name, count); + if (ret) { + ret = -EINVAL; + gf_msg(this->name, GF_LOG_ERROR, -ret, INDEX_MSG_DICT_SET_FAILED, + "xattrop index " + "count set failed"); + goto done; } + } else if (strcmp(name, GF_XATTROP_DIRTY_COUNT) == 0) { + count = index_entry_count(this, DIRTY_SUBDIR); - if (strcmp (name, GF_XATTROP_INDEX_GFID) == 0) { - ret = dict_set_static_bin (xattr, (char*)name, priv->xattrop_vgfid, - sizeof (priv->xattrop_vgfid)); - if (ret) { - ret = -ENOMEM; - gf_log (this->name, GF_LOG_ERROR, "xattrop index " - "gfid set failed"); - goto done; - } - } else if (strcmp (name, GF_XATTROP_INDEX_COUNT) == 0) { - count = index_entry_count (this, XATTROP_SUBDIR); - - ret = dict_set_uint64 (xattr, (char *)name, count); - if (ret) { - ret = -ENOMEM; - gf_log (this->name, GF_LOG_ERROR, "xattrop index " - "count set failed"); - goto done; - } - } + ret = dict_set_uint64(xattr, (char *)name, count); + if (ret) { + ret = -EINVAL; + gf_msg(this->name, GF_LOG_ERROR, -ret, INDEX_MSG_DICT_SET_FAILED, + "dirty index " + "count set failed"); + goto done; + } + } done: - if (ret) - STACK_UNWIND_STRICT (getxattr, frame, -1, -ret, xattr, xdata); - else - STACK_UNWIND_STRICT (getxattr, frame, 0, 0, xattr, xdata); + if (ret) + STACK_UNWIND_STRICT(getxattr, frame, -1, -ret, xattr, NULL); + else + STACK_UNWIND_STRICT(getxattr, frame, 0, 0, xattr, NULL); - if (xattr) - dict_unref (xattr); + if (xattr) + dict_unref(xattr); + return 0; +} + +static int +index_save_pargfid_for_entry_changes(xlator_t *this, loc_t *loc, char *path) +{ + index_priv_t *priv = NULL; + index_inode_ctx_t *ctx = NULL; + int ret = 0; + + priv = this->private; + if (!loc) + return -1; + if (gf_uuid_compare(loc->pargfid, priv->internal_vgfid[ENTRY_CHANGES])) return 0; + + ret = index_inode_ctx_get(loc->inode, this, &ctx); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, + INDEX_MSG_INODE_CTX_GET_SET_FAILED, + "Unable to get inode context for %s", path); + return -EINVAL; + } + ret = gf_uuid_parse(loc->name, ctx->virtual_pargfid); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, + INDEX_MSG_INODE_CTX_GET_SET_FAILED, + "Unable to store " + "virtual gfid in inode context for %s", + path); + return -EINVAL; + } + return 0; } int32_t -index_lookup_wrapper (call_frame_t *frame, xlator_t *this, - loc_t *loc, dict_t *xattr_req) -{ - index_priv_t *priv = NULL; - struct stat lstatbuf = {0}; - int ret = 0; - int32_t op_errno = EINVAL; - int32_t op_ret = -1; - char path[PATH_MAX] = {0}; - struct iatt stbuf = {0, }; - struct iatt postparent = {0,}; - dict_t *xattr = NULL; - gf_boolean_t is_dir = _gf_false; - - priv = this->private; - - VALIDATE_OR_GOTO (loc, done); - if (!gf_uuid_compare (loc->gfid, priv->xattrop_vgfid)) { - make_index_dir_path (priv->index_basepath, XATTROP_SUBDIR, - path, sizeof (path)); - is_dir = _gf_true; - } else if (!gf_uuid_compare (loc->pargfid, priv->xattrop_vgfid)) { - make_file_path (priv->index_basepath, XATTROP_SUBDIR, - loc->name, path, sizeof (path)); +index_lookup_wrapper(call_frame_t *frame, xlator_t *this, loc_t *loc, + dict_t *xattr_req) +{ + index_priv_t *priv = NULL; + struct stat lstatbuf = {0}; + int ret = 0; + int32_t op_errno = EINVAL; + int32_t op_ret = -1; + uint64_t val = IA_INVAL; + char path[PATH_MAX] = {0}; + struct iatt stbuf = { + 0, + }; + struct iatt postparent = { + 0, + }; + dict_t *xattr = NULL; + gf_boolean_t is_dir = _gf_false; + char *subdir = NULL; + loc_t iloc = {0}; + + priv = this->private; + loc_copy(&iloc, loc); + + VALIDATE_OR_GOTO(loc, done); + if (index_is_fop_on_internal_inode(this, loc->parent, loc->pargfid)) { + subdir = index_get_subdir_from_vgfid(priv, loc->pargfid); + ret = index_inode_path(this, loc->parent, path, sizeof(path)); + if (ret < 0) { + op_errno = -ret; + goto done; } + ret = snprintf(path + strlen(path), PATH_MAX - strlen(path), "/%s", + loc->name); - ret = sys_lstat (path, &lstatbuf); + if ((ret < 0) || (ret > (PATH_MAX - strlen(path)))) { + op_errno = EINVAL; + op_ret = -1; + goto done; + } + + } else if (index_is_virtual_gfid(priv, loc->gfid)) { + subdir = index_get_subdir_from_vgfid(priv, loc->gfid); + make_index_dir_path(priv->index_basepath, subdir, path, sizeof(path)); + is_dir = _gf_true; + + if ((xattr_req) && (dict_get(xattr_req, GF_INDEX_IA_TYPE_GET_REQ))) { + if (0 == strcmp(subdir, index_get_subdir_from_type(ENTRY_CHANGES))) + val = IA_IFDIR; + else + val = IA_IFREG; + } + } else { + if (!inode_is_linked(loc->inode)) { + inode_unref(iloc.inode); + iloc.inode = inode_find(loc->inode->table, loc->gfid); + } + ret = index_inode_path(this, iloc.inode, path, sizeof(path)); + if (ret < 0) { + op_errno = -ret; + goto done; + } + } + ret = sys_lstat(path, &lstatbuf); + if (ret) { + gf_msg_debug(this->name, errno, "Stat failed on %s dir ", path); + op_errno = errno; + goto done; + } else if (!S_ISDIR(lstatbuf.st_mode) && is_dir) { + op_errno = ENOTDIR; + gf_msg_debug(this->name, op_errno, + "Stat failed on %s dir, " + "not a directory", + path); + goto done; + } + xattr = dict_new(); + if (!xattr) { + op_errno = ENOMEM; + goto done; + } + + if (val != IA_INVAL) { + ret = dict_set_uint64(xattr, GF_INDEX_IA_TYPE_GET_RSP, val); if (ret) { - gf_log (this->name, GF_LOG_DEBUG, "Stat failed on index dir " - "(%s)", strerror (errno)); - op_errno = errno; - goto done; - } else if (!S_ISDIR (lstatbuf.st_mode) && is_dir) { - gf_log (this->name, GF_LOG_DEBUG, "Stat failed on index dir, " - "not a directory"); - op_errno = ENOENT; - goto done; + op_ret = -1; + op_errno = -ret; + goto done; } - xattr = dict_new (); - if (!xattr) { - op_errno = ENOMEM; - goto done; + } + + iatt_from_stat(&stbuf, &lstatbuf); + if (is_dir || inode_is_linked(iloc.inode)) + loc_gfid(&iloc, stbuf.ia_gfid); + else + gf_uuid_generate(stbuf.ia_gfid); + + ret = index_save_pargfid_for_entry_changes(this, &iloc, path); + if (ret) { + op_ret = -1; + op_errno = -ret; + goto done; + } + + stbuf.ia_ino = -1; + op_ret = 0; +done: + STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, + loc ? loc->inode : NULL, &stbuf, xattr, &postparent); + if (xattr) + dict_unref(xattr); + loc_wipe(&iloc); + return 0; +} + +int +index_get_gfid_type(void *opaque) +{ + gf_dirent_t *entry = NULL; + xlator_t *this = THIS; + struct index_syncop_args *args = opaque; + loc_t loc = {0}; + struct iatt iatt = {0}; + int ret = 0; + + list_for_each_entry(entry, &args->entries->list, list) + { + if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0) + continue; + + loc_wipe(&loc); + + entry->d_type = gf_d_type_from_ia_type(IA_INVAL); + entry->d_stat.ia_type = IA_INVAL; + if (gf_uuid_parse(entry->d_name, loc.gfid)) + continue; + + loc.inode = inode_find(args->parent->table, loc.gfid); + if (loc.inode) { + entry->d_stat.ia_type = loc.inode->ia_type; + entry->d_type = gf_d_type_from_ia_type(loc.inode->ia_type); + continue; + } + loc.inode = inode_new(args->parent->table); + if (!loc.inode) + continue; + ret = syncop_lookup(FIRST_CHILD(this), &loc, &iatt, 0, 0, 0); + if (ret == 0) { + entry->d_type = gf_d_type_from_ia_type(iatt.ia_type); + entry->d_stat = iatt; } + } + loc_wipe(&loc); - iatt_from_stat (&stbuf, &lstatbuf); - if (is_dir) - gf_uuid_copy (stbuf.ia_gfid, priv->xattrop_vgfid); - else - gf_uuid_generate (stbuf.ia_gfid); - stbuf.ia_ino = -1; - op_ret = 0; -done: - STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, - loc->inode, &stbuf, xattr, &postparent); - if (xattr) - dict_unref (xattr); - return 0; + return 0; } int32_t -index_readdir_wrapper (call_frame_t *frame, xlator_t *this, - fd_t *fd, size_t size, off_t off, dict_t *xdata) +index_readdir_wrapper(call_frame_t *frame, xlator_t *this, fd_t *fd, + size_t size, off_t off, dict_t *xdata) { - index_fd_ctx_t *fctx = NULL; - DIR *dir = NULL; - int ret = -1; - int32_t op_ret = -1; - int32_t op_errno = 0; - int count = 0; - gf_dirent_t entries; + index_fd_ctx_t *fctx = NULL; + index_priv_t *priv = NULL; + DIR *dir = NULL; + int ret = -1; + int32_t op_ret = -1; + int32_t op_errno = 0; + int count = 0; + gf_dirent_t entries; + struct index_syncop_args args = {0}; + + priv = this->private; + INIT_LIST_HEAD(&entries.list); + + ret = index_fd_ctx_get(fd, this, &fctx); + if (ret < 0) { + op_errno = -ret; + gf_msg(this->name, GF_LOG_WARNING, op_errno, INDEX_MSG_FD_OP_FAILED, + "pfd is NULL, fd=%p", fd); + goto done; + } + + dir = fctx->dir; + if (!dir) { + op_errno = EINVAL; + gf_msg(this->name, GF_LOG_WARNING, op_errno, + INDEX_MSG_INDEX_READDIR_FAILED, "dir is NULL for fd=%p", fd); + goto done; + } + + count = index_fill_readdir(fd, fctx, dir, off, size, &entries); + + /* pick ENOENT to indicate EOF */ + op_errno = errno; + op_ret = count; + if (index_is_virtual_gfid(priv, fd->inode->gfid) && xdata && + dict_get(xdata, "get-gfid-type")) { + args.parent = fd->inode; + args.entries = &entries; + ret = synctask_new(this->ctx->env, index_get_gfid_type, NULL, NULL, + &args); + } +done: + STACK_UNWIND_STRICT(readdir, frame, op_ret, op_errno, &entries, NULL); + gf_dirent_free(&entries); + return 0; +} - INIT_LIST_HEAD (&entries.list); +int +deletion_handler(const char *fpath, const struct stat *sb, int typeflag, + struct FTW *ftwbuf) +{ + ia_type_t type = IA_INVAL; + + switch (sb->st_mode & S_IFMT) { + case S_IFREG: + sys_unlink(fpath); + break; + + case S_IFDIR: + sys_rmdir(fpath); + break; + default: + type = ia_type_from_st_mode(sb->st_mode); + gf_msg(THIS->name, GF_LOG_WARNING, EINVAL, INDEX_MSG_INVALID_ARGS, + "%s neither a regular file nor a directory - type:%s", fpath, + gf_inode_type_to_str(type)); + break; + } + return 0; +} - ret = index_fd_ctx_get (fd, this, &fctx); - if (ret < 0) { - gf_log (this->name, GF_LOG_WARNING, - "pfd is NULL, fd=%p", fd); - op_errno = -ret; - goto done; - } +static int +index_wipe_index_subdir(void *opaque) +{ + struct index_syncop_args *args = opaque; - dir = fctx->dir; + nftw(args->path, deletion_handler, 1, FTW_DEPTH | FTW_PHYS); + return 0; +} - if (!dir) { - gf_log (this->name, GF_LOG_WARNING, - "dir is NULL for fd=%p", fd); - op_errno = EINVAL; - goto done; - } +static void +index_get_parent_iatt(struct iatt *parent, char *path, loc_t *loc, + int32_t *op_ret, int32_t *op_errno) +{ + int ret = -1; + struct stat lstatbuf = { + 0, + }; + + ret = sys_lstat(path, &lstatbuf); + if (ret < 0) { + *op_ret = -1; + *op_errno = errno; + return; + } - count = index_fill_readdir (fd, fctx, dir, off, size, &entries); + iatt_from_stat(parent, &lstatbuf); + gf_uuid_copy(parent->ia_gfid, loc->pargfid); + parent->ia_ino = -1; - /* pick ENOENT to indicate EOF */ - op_errno = errno; - op_ret = count; -done: - STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, &entries, xdata); - gf_dirent_free (&entries); - return 0; + return; } int -index_unlink_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, int flag, - dict_t *xdata) -{ - index_priv_t *priv = NULL; - int32_t op_ret = 0; - int32_t op_errno = 0; - int ret = 0; - struct iatt preparent = {0}; - struct iatt postparent = {0}; - char index_dir[PATH_MAX] = {0}; - struct stat lstatbuf = {0}; - uuid_t gfid = {0}; - - priv = this->private; - make_index_dir_path (priv->index_basepath, XATTROP_SUBDIR, - index_dir, sizeof (index_dir)); - ret = sys_lstat (index_dir, &lstatbuf); +index_rmdir_wrapper(call_frame_t *frame, xlator_t *this, loc_t *loc, int flag, + dict_t *xdata) +{ + int ret = 0; + int32_t op_ret = 0; + int32_t op_errno = 0; + char *subdir = NULL; + char index_dir[PATH_MAX] = {0}; + char index_subdir[PATH_MAX] = {0}; + uuid_t gfid = {0}; + struct iatt preparent = {0}; + struct iatt postparent = {0}; + index_priv_t *priv = NULL; + index_xattrop_type_t type = XATTROP_TYPE_UNSET; + struct index_syncop_args args = { + 0, + }; + + priv = this->private; + + type = index_get_type_from_vgfid(priv, loc->pargfid); + subdir = index_get_subdir_from_vgfid(priv, loc->pargfid); + make_index_dir_path(priv->index_basepath, subdir, index_dir, + sizeof(index_dir)); + + index_get_parent_iatt(&preparent, index_dir, loc, &op_ret, &op_errno); + if (op_ret < 0) + goto done; + + gf_uuid_parse(loc->name, gfid); + make_gfid_path(priv->index_basepath, subdir, gfid, index_subdir, + sizeof(index_subdir)); + + if (flag == 0) { + ret = index_del(this, gfid, subdir, type); if (ret < 0) { - op_ret = -1; - op_errno = errno; - goto done; + op_ret = -1; + op_errno = -ret; + goto done; } + } else { + args.path = index_subdir; + ret = synctask_new(this->ctx->env, index_wipe_index_subdir, NULL, NULL, + &args); + } - iatt_from_stat (&preparent, &lstatbuf); - gf_uuid_copy (preparent.ia_gfid, priv->xattrop_vgfid); - preparent.ia_ino = -1; - gf_uuid_parse (loc->name, gfid); - ret = index_del (this, gfid, XATTROP_SUBDIR); - if (ret < 0) { - op_ret = -1; - op_errno = -ret; - goto done; + index_get_parent_iatt(&postparent, index_dir, loc, &op_ret, &op_errno); + if (op_ret < 0) + goto done; + +done: + INDEX_STACK_UNWIND(rmdir, frame, op_ret, op_errno, &preparent, &postparent, + xdata); + return 0; +} + +int +index_unlink_wrapper(call_frame_t *frame, xlator_t *this, loc_t *loc, int flag, + dict_t *xdata) +{ + index_priv_t *priv = NULL; + index_inode_ctx_t *ictx = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + int ret = 0; + index_xattrop_type_t type = XATTROP_TYPE_UNSET; + struct iatt preparent = {0}; + struct iatt postparent = {0}; + char index_dir[PATH_MAX] = {0}; + char filepath[PATH_MAX] = {0}; + uuid_t gfid = {0}; + char *subdir = NULL; + + priv = this->private; + type = index_get_type_from_vgfid(priv, loc->pargfid); + ret = index_inode_path(this, loc->parent, index_dir, sizeof(index_dir)); + if (ret < 0) { + op_ret = -1; + op_errno = -ret; + goto done; + } + + index_get_parent_iatt(&preparent, index_dir, loc, &op_ret, &op_errno); + if (op_ret < 0) + goto done; + + if (type <= XATTROP_TYPE_UNSET) { + ret = index_inode_ctx_get(loc->parent, this, &ictx); + if ((ret == 0) && gf_uuid_is_null(ictx->virtual_pargfid)) { + ret = -EINVAL; } - memset (&lstatbuf, 0, sizeof (lstatbuf)); - ret = sys_lstat (index_dir, &lstatbuf); - if (ret < 0) { - op_ret = -1; - op_errno = errno; - goto done; + if (ret == 0) { + ret = index_entry_delete(this, ictx->virtual_pargfid, + (char *)loc->name); } - iatt_from_stat (&postparent, &lstatbuf); - gf_uuid_copy (postparent.ia_gfid, priv->xattrop_vgfid); - postparent.ia_ino = -1; + } else if (type == ENTRY_CHANGES) { + make_file_path(priv->index_basepath, ENTRY_CHANGES_SUBDIR, + (char *)loc->name, filepath, sizeof(filepath)); + ret = sys_unlink(filepath); + } else { + subdir = index_get_subdir_from_type(type); + gf_uuid_parse(loc->name, gfid); + ret = index_del(this, gfid, subdir, type); + } + if (ret < 0) { + op_ret = -1; + op_errno = -ret; + goto done; + } + + index_get_parent_iatt(&postparent, index_dir, loc, &op_ret, &op_errno); + if (op_ret < 0) + goto done; done: - INDEX_STACK_UNWIND (unlink, frame, op_ret, op_errno, &preparent, - &postparent, xdata); - return 0; + INDEX_STACK_UNWIND(unlink, frame, op_ret, op_errno, &preparent, &postparent, + xdata); + return 0; } int32_t -index_getxattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, const char *name, dict_t *xdata) +index_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) { - call_stub_t *stub = NULL; - index_priv_t *priv = NULL; + call_stub_t *stub = NULL; - priv = this->private; + if (!name || + (!index_is_vgfid_xattr(name) && strcmp(GF_XATTROP_INDEX_COUNT, name) && + strcmp(GF_XATTROP_DIRTY_COUNT, name))) + goto out; - if (!name || (strcmp (GF_XATTROP_INDEX_GFID, name) && - strcmp (GF_XATTROP_INDEX_COUNT, name))) - goto out; - - stub = fop_getxattr_stub (frame, index_getxattr_wrapper, loc, name, - xdata); - if (!stub) { - STACK_UNWIND_STRICT (getxattr, frame, -1, ENOMEM, NULL, NULL); - return 0; - } - worker_enqueue (this, stub); + stub = fop_getxattr_stub(frame, index_getxattr_wrapper, loc, name, xdata); + if (!stub) { + STACK_UNWIND_STRICT(getxattr, frame, -1, ENOMEM, NULL, NULL); return 0; + } + worker_enqueue(this, stub); + return 0; out: - STACK_WIND (frame, default_getxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->getxattr, loc, name, xdata); - return 0; + STACK_WIND(frame, default_getxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->getxattr, loc, name, xdata); + return 0; } -int32_t -index_lookup (call_frame_t *frame, xlator_t *this, - loc_t *loc, dict_t *xattr_req) +int64_t +index_fetch_link_count(xlator_t *this, index_xattrop_type_t type) { - call_stub_t *stub = NULL; - index_priv_t *priv = NULL; + index_priv_t *priv = this->private; + char *subdir = NULL; + struct stat lstatbuf = { + 0, + }; + int ret = -1; + int64_t count = -1; + DIR *dirp = NULL; + struct dirent *entry = NULL; + struct dirent scratch[2] = { + { + 0, + }, + }; + char index_dir[PATH_MAX] = { + 0, + }; + char index_path[PATH_MAX] = { + 0, + }; + + subdir = index_get_subdir_from_type(type); + make_index_dir_path(priv->index_basepath, subdir, index_dir, + sizeof(index_dir)); + + dirp = sys_opendir(index_dir); + if (!dirp) + goto out; + + for (;;) { + errno = 0; + entry = sys_readdir(dirp, scratch); + if (!entry || errno != 0) { + if (count == -1) + count = 0; + goto out; + } - priv = this->private; + if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0) + continue; - if (gf_uuid_compare (loc->gfid, priv->xattrop_vgfid) && - gf_uuid_compare (loc->pargfid, priv->xattrop_vgfid)) - goto normal; + make_file_path(priv->index_basepath, subdir, entry->d_name, index_path, + sizeof(index_path)); - stub = fop_lookup_stub (frame, index_lookup_wrapper, loc, xattr_req); - if (!stub) { - STACK_UNWIND_STRICT (lookup, frame, -1, ENOMEM, loc->inode, - NULL, NULL, NULL); - return 0; + ret = sys_lstat(index_path, &lstatbuf); + if (ret < 0) { + count = -2; + continue; + } else { + count = lstatbuf.st_nlink - 1; + if (count == 0) + continue; + else + break; } - worker_enqueue (this, stub); - return 0; -normal: - STACK_WIND (frame, default_lookup_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, loc, xattr_req); + } +out: + if (dirp) + (void)sys_closedir(dirp); + return count; +} - return 0; +dict_t * +index_fill_link_count(xlator_t *this, dict_t *xdata) +{ + int ret = -1; + index_priv_t *priv = NULL; + int64_t count = -1; + + priv = this->private; + xdata = (xdata) ? dict_ref(xdata) : dict_new(); + if (!xdata) + goto out; + + index_get_link_count(priv, &count, XATTROP); + if (count < 0) { + count = index_fetch_link_count(this, XATTROP); + index_set_link_count(priv, count, XATTROP); + } + + if (count == 0) { + ret = dict_set_int8(xdata, "link-count", 0); + if (ret < 0) + gf_msg(this->name, GF_LOG_ERROR, EINVAL, INDEX_MSG_DICT_SET_FAILED, + "Unable to set link-count"); + } else { + ret = dict_set_int8(xdata, "link-count", 1); + if (ret < 0) + gf_msg(this->name, GF_LOG_ERROR, EINVAL, INDEX_MSG_DICT_SET_FAILED, + "Unable to set link-count"); + } + +out: + return xdata; } int32_t -index_opendir (call_frame_t *frame, xlator_t *this, - loc_t *loc, fd_t *fd, dict_t *xdata) +index_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, struct iatt *postparent) { - index_priv_t *priv = NULL; + xdata = index_fill_link_count(this, xdata); + STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, xdata, + postparent); + if (xdata) + dict_unref(xdata); + return 0; +} - priv = this->private; - if (gf_uuid_compare (fd->inode->gfid, priv->xattrop_vgfid)) +int32_t +index_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) +{ + inode_t *inode = NULL; + call_stub_t *stub = NULL; + char *flag = NULL; + int ret = -1; + + if (!index_is_fop_on_internal_inode(this, loc->parent, loc->pargfid) && + !index_is_fop_on_internal_inode(this, loc->inode, loc->gfid)) { + if (!inode_is_linked(loc->inode)) { + inode = inode_find(loc->inode->table, loc->gfid); + if (!index_is_fop_on_internal_inode(this, inode, loc->gfid)) { + inode_unref(inode); goto normal; + } + inode_unref(inode); + } else { + goto normal; + } + } - frame->local = NULL; - STACK_UNWIND_STRICT (opendir, frame, 0, 0, fd, NULL); + stub = fop_lookup_stub(frame, index_lookup_wrapper, loc, xattr_req); + if (!stub) { + STACK_UNWIND_STRICT(lookup, frame, -1, ENOMEM, loc->inode, NULL, NULL, + NULL); return 0; - + } + worker_enqueue(this, stub); + return 0; normal: - STACK_WIND (frame, default_opendir_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->opendir, loc, fd, xdata); - return 0; + ret = dict_get_str_sizen(xattr_req, "link-count", &flag); + if ((ret == 0) && (strcmp(flag, GF_XATTROP_INDEX_COUNT) == 0)) { + STACK_WIND(frame, index_lookup_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, loc, xattr_req); + } else { + STACK_WIND(frame, default_lookup_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, loc, xattr_req); + } + + return 0; } int32_t -index_readdir (call_frame_t *frame, xlator_t *this, - fd_t *fd, size_t size, off_t off, dict_t *xdata) +index_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + dict_t *xdata) { - call_stub_t *stub = NULL; - index_priv_t *priv = NULL; + xdata = index_fill_link_count(this, xdata); + STACK_UNWIND_STRICT(fstat, frame, op_ret, op_errno, buf, xdata); + if (xdata) + dict_unref(xdata); + return 0; +} - priv = this->private; - if (gf_uuid_compare (fd->inode->gfid, priv->xattrop_vgfid)) - goto out; - stub = fop_readdir_stub (frame, index_readdir_wrapper, fd, size, off, - xdata); - if (!stub) { - STACK_UNWIND_STRICT (readdir, frame, -1, ENOMEM, NULL, NULL); - return 0; - } - worker_enqueue (this, stub); - return 0; -out: - STACK_WIND (frame, default_readdir_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readdir, fd, size, off, xdata); - return 0; +int32_t +index_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) +{ + int ret = -1; + char *flag = NULL; + + ret = dict_get_str(xdata, "link-count", &flag); + if ((ret == 0) && (strcmp(flag, GF_XATTROP_INDEX_COUNT) == 0)) { + STACK_WIND(frame, index_fstat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, fd, xdata); + } else { + STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, fd, xdata); + } + + return 0; } -int -index_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, +int32_t +index_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, dict_t *xdata) { - call_stub_t *stub = NULL; - index_priv_t *priv = NULL; + if (!index_is_fop_on_internal_inode(this, fd->inode, NULL)) + goto normal; - priv = this->private; - if (gf_uuid_compare (loc->pargfid, priv->xattrop_vgfid)) - goto out; + frame->local = NULL; + STACK_UNWIND_STRICT(opendir, frame, 0, 0, fd, NULL); + return 0; - stub = fop_unlink_stub (frame, index_unlink_wrapper, loc, xflag, xdata); - if (!stub) { - STACK_UNWIND_STRICT (unlink, frame, -1, ENOMEM, NULL, NULL, - NULL); - return 0; - } - worker_enqueue (this, stub); - return 0; -out: - STACK_WIND (frame, default_unlink_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); - return 0; +normal: + STACK_WIND(frame, default_opendir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->opendir, loc, fd, xdata); + return 0; } -int -index_make_xattrop64_watchlist (xlator_t *this, index_priv_t *priv, - char *watchlist) -{ - char *delim = NULL; - char *dup_watchlist = NULL; - char *key = NULL; - char *saveptr = NULL; - dict_t *xattrs = NULL; - data_t *dummy = NULL; - int ret = 0; - - if (!watchlist) - return 0; - - dup_watchlist = gf_strdup (watchlist); - if (!dup_watchlist) - return -1; - - xattrs = dict_new (); - if (!xattrs) { - ret = -1; - goto out; - } - - dummy = int_to_data (1); - if (!dummy) { - ret = -1; - goto out; - } - - data_ref (dummy); - - delim = ","; - key = strtok_r (dup_watchlist, delim, &saveptr); - while (key) { - if (strlen (key) == 0) { - ret = -1; - goto out; - } - - ret = dict_set (xattrs, key, dummy); - if (ret) - goto out; - - key = strtok_r (NULL, delim, &saveptr); - } +int32_t +index_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *xdata) +{ + call_stub_t *stub = NULL; - priv->xattrop64_watchlist = xattrs; - xattrs = NULL; + if (!index_is_fop_on_internal_inode(this, fd->inode, NULL)) + goto out; - ret = 0; + stub = fop_readdir_stub(frame, index_readdir_wrapper, fd, size, off, xdata); + if (!stub) { + STACK_UNWIND_STRICT(readdir, frame, -1, ENOMEM, NULL, NULL); + return 0; + } + worker_enqueue(this, stub); + return 0; out: - if (xattrs) - dict_unref (xattrs); + STACK_WIND(frame, default_readdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdir, fd, size, off, xdata); + return 0; +} - GF_FREE (dup_watchlist); +int +index_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, + dict_t *xdata) +{ + call_stub_t *stub = NULL; - if (dummy) - data_unref (dummy); + if (!index_is_fop_on_internal_inode(this, loc->parent, NULL)) + goto out; - return ret; + stub = fop_unlink_stub(frame, index_unlink_wrapper, loc, xflag, xdata); + if (!stub) { + STACK_UNWIND_STRICT(unlink, frame, -1, ENOMEM, NULL, NULL, NULL); + return 0; + } + worker_enqueue(this, stub); + return 0; +out: + STACK_WIND(frame, default_unlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); + return 0; } -int32_t -mem_acct_init (xlator_t *this) +int +index_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + dict_t *xdata) { - int ret = -1; + call_stub_t *stub = NULL; - ret = xlator_mem_acct_init (this, gf_index_mt_end + 1); + if (!index_is_fop_on_internal_inode(this, loc->parent, NULL)) + goto out; - return ret; + stub = fop_rmdir_stub(frame, index_rmdir_wrapper, loc, flags, xdata); + if (!stub) { + STACK_UNWIND_STRICT(rmdir, frame, -1, ENOMEM, NULL, NULL, NULL); + return 0; + } + worker_enqueue(this, stub); + return 0; +out: + STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->rmdir, + loc, flags, xdata); + return 0; } int -init (xlator_t *this) -{ - int ret = -1; - index_priv_t *priv = NULL; - pthread_t thread; - pthread_attr_t w_attr; - gf_boolean_t mutex_inited = _gf_false; - gf_boolean_t cond_inited = _gf_false; - gf_boolean_t attr_inited = _gf_false; - char *watchlist = NULL; - - if (!this->children || this->children->next) { - gf_log (this->name, GF_LOG_ERROR, - "'index' not configured with exactly one child"); - goto out; - } +index_make_xattrop_watchlist(xlator_t *this, index_priv_t *priv, + char *watchlist, index_xattrop_type_t type) +{ + char *delim = NULL; + char *dup_watchlist = NULL; + char *key = NULL; + char *saveptr = NULL; + dict_t *xattrs = NULL; + data_t *dummy = NULL; + int ret = 0; + + if (!watchlist) + return 0; - if (!this->parents) { - gf_log (this->name, GF_LOG_WARNING, - "dangling volume. check volfile "); - } + dup_watchlist = gf_strdup(watchlist); + if (!dup_watchlist) + return -1; + + xattrs = dict_new(); + if (!xattrs) { + ret = -1; + goto out; + } + + dummy = int_to_data(1); + if (!dummy) { + ret = -1; + goto out; + } + + data_ref(dummy); + + delim = ","; + key = strtok_r(dup_watchlist, delim, &saveptr); + while (key) { + if (strlen(key) == 0) { + ret = -1; + goto out; + } - priv = GF_CALLOC (1, sizeof (*priv), gf_index_mt_priv_t); - if (!priv) - goto out; + ret = dict_set(xattrs, key, dummy); + if (ret) + goto out; - LOCK_INIT (&priv->lock); - if ((ret = pthread_cond_init(&priv->cond, NULL)) != 0) { - gf_log (this->name, GF_LOG_ERROR, - "pthread_cond_init failed (%d)", ret); - goto out; - } - cond_inited = _gf_true; + key = strtok_r(NULL, delim, &saveptr); + } - if ((ret = pthread_mutex_init(&priv->mutex, NULL)) != 0) { - gf_log (this->name, GF_LOG_ERROR, - "pthread_mutex_init failed (%d)", ret); + switch (type) { + case DIRTY: + priv->dirty_watchlist = dict_copy_with_ref(xattrs, + priv->dirty_watchlist); + if (!priv->dirty_watchlist) { + ret = -1; goto out; - } - mutex_inited = _gf_true; - - if ((ret = pthread_attr_init (&w_attr)) != 0) { - gf_log (this->name, GF_LOG_ERROR, - "pthread_attr_init failed (%d)", ret); + } + break; + case XATTROP: + priv->pending_watchlist = dict_copy_with_ref( + xattrs, priv->pending_watchlist); + if (!priv->pending_watchlist) { + ret = -1; goto out; - } - attr_inited = _gf_true; + } + break; + default: + break; + } - ret = pthread_attr_setstacksize (&w_attr, INDEX_THREAD_STACK_SIZE); - if (ret == EINVAL) { - gf_log (this->name, GF_LOG_WARNING, - "Using default thread stack size"); - } + ret = 0; +out: + if (xattrs) + dict_unref(xattrs); - GF_OPTION_INIT ("index-base", priv->index_basepath, path, out); + GF_FREE(dup_watchlist); - GF_OPTION_INIT ("xattrop64-watchlist", watchlist, str, out); - ret = index_make_xattrop64_watchlist (this, priv, watchlist); - if (ret) - goto out; + if (dummy) + data_unref(dummy); - gf_uuid_generate (priv->index); - gf_uuid_generate (priv->xattrop_vgfid); - INIT_LIST_HEAD (&priv->callstubs); + return ret; +} - this->private = priv; +int32_t +mem_acct_init(xlator_t *this) +{ + int ret = -1; - ret = index_dir_create (this, XATTROP_SUBDIR); - if (ret < 0) - goto out; + ret = xlator_mem_acct_init(this, gf_index_mt_end + 1); - ret = gf_thread_create (&thread, &w_attr, index_worker, this); - if (ret) { - gf_log (this->name, GF_LOG_WARNING, "Failed to create " - "worker thread, aborting"); - goto out; - } + return ret; +} - ret = 0; +int +init(xlator_t *this) +{ + int i = 0; + int ret = -1; + int64_t count = -1; + index_priv_t *priv = NULL; + pthread_attr_t w_attr; + gf_boolean_t mutex_inited = _gf_false; + gf_boolean_t cond_inited = _gf_false; + gf_boolean_t attr_inited = _gf_false; + char *watchlist = NULL; + char *dirtylist = NULL; + char *pendinglist = NULL; + char *index_base_parent = NULL; + char *tmp = NULL; + + if (!this->children || this->children->next) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, INDEX_MSG_INVALID_GRAPH, + "'index' not configured with exactly one child"); + goto out; + } + + if (!this->parents) { + gf_msg(this->name, GF_LOG_WARNING, EINVAL, INDEX_MSG_INVALID_GRAPH, + "dangling volume. check volfile "); + } + + priv = GF_CALLOC(1, sizeof(*priv), gf_index_mt_priv_t); + if (!priv) + goto out; + + LOCK_INIT(&priv->lock); + if ((ret = pthread_cond_init(&priv->cond, NULL)) != 0) { + gf_msg(this->name, GF_LOG_ERROR, ret, INDEX_MSG_INVALID_ARGS, + "pthread_cond_init failed"); + goto out; + } + cond_inited = _gf_true; + + if ((ret = pthread_mutex_init(&priv->mutex, NULL)) != 0) { + gf_msg(this->name, GF_LOG_ERROR, ret, INDEX_MSG_INVALID_ARGS, + "pthread_mutex_init failed"); + goto out; + } + mutex_inited = _gf_true; + + if ((ret = pthread_attr_init(&w_attr)) != 0) { + gf_msg(this->name, GF_LOG_ERROR, ret, INDEX_MSG_INVALID_ARGS, + "pthread_attr_init failed"); + goto out; + } + attr_inited = _gf_true; + + ret = pthread_attr_setstacksize(&w_attr, INDEX_THREAD_STACK_SIZE); + if (ret == EINVAL) { + gf_msg(this->name, GF_LOG_WARNING, ret, INDEX_MSG_INVALID_ARGS, + "Using default thread stack size"); + } + + GF_OPTION_INIT("index-base", priv->index_basepath, path, out); + tmp = gf_strdup(priv->index_basepath); + index_base_parent = dirname(tmp); + if (gf_lstat_dir(index_base_parent, NULL) != 0) { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, errno, + INDEX_MSG_INDEX_DIR_CREATE_FAILED, + "Failed to find parent dir (%s) of index basepath %s.", + index_base_parent, priv->index_basepath); + goto out; + } + + GF_OPTION_INIT("xattrop64-watchlist", watchlist, str, out); + ret = index_make_xattrop_watchlist(this, priv, watchlist, XATTROP); + if (ret) + goto out; + + GF_OPTION_INIT("xattrop-dirty-watchlist", dirtylist, str, out); + ret = index_make_xattrop_watchlist(this, priv, dirtylist, DIRTY); + if (ret) + goto out; + + GF_OPTION_INIT("xattrop-pending-watchlist", pendinglist, str, out); + ret = index_make_xattrop_watchlist(this, priv, pendinglist, XATTROP); + if (ret) + goto out; + + if (priv->dirty_watchlist) + priv->complete_watchlist = dict_copy_with_ref(priv->dirty_watchlist, + priv->complete_watchlist); + if (priv->pending_watchlist) + priv->complete_watchlist = dict_copy_with_ref(priv->pending_watchlist, + priv->complete_watchlist); + + gf_uuid_generate(priv->index); + for (i = 0; i < XATTROP_TYPE_END; i++) + gf_uuid_generate(priv->internal_vgfid[i]); + + INIT_LIST_HEAD(&priv->callstubs); + GF_ATOMIC_INIT(priv->stub_cnt, 0); + + this->local_pool = mem_pool_new(index_local_t, 64); + if (!this->local_pool) { + ret = -1; + goto out; + } + + this->private = priv; + + ret = index_dir_create(this, XATTROP_SUBDIR); + if (ret < 0) + goto out; + + if (priv->dirty_watchlist) { + ret = index_dir_create(this, DIRTY_SUBDIR); + if (ret < 0) + goto out; + } + + ret = index_dir_create(this, ENTRY_CHANGES_SUBDIR); + if (ret < 0) + goto out; + + /*init indices files counts*/ + count = index_fetch_link_count(this, XATTROP); + index_set_link_count(priv, count, XATTROP); + priv->down = _gf_false; + + priv->curr_count = 0; + ret = gf_thread_create(&priv->thread, &w_attr, index_worker, this, + "idxwrker"); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, ret, + INDEX_MSG_WORKER_THREAD_CREATE_FAILED, + "Failed to create worker thread, aborting"); + goto out; + } + priv->curr_count++; + ret = 0; out: - if (ret) { - if (cond_inited) - pthread_cond_destroy (&priv->cond); - if (mutex_inited) - pthread_mutex_destroy (&priv->mutex); - if (priv && priv->xattrop64_watchlist) - dict_unref (priv->xattrop64_watchlist); - if (priv) - GF_FREE (priv); - this->private = NULL; - } - if (attr_inited) - pthread_attr_destroy (&w_attr); - return ret; + GF_FREE(tmp); + + if (ret) { + if (cond_inited) + pthread_cond_destroy(&priv->cond); + if (mutex_inited) + pthread_mutex_destroy(&priv->mutex); + if (priv && priv->dirty_watchlist) + dict_unref(priv->dirty_watchlist); + if (priv && priv->pending_watchlist) + dict_unref(priv->pending_watchlist); + if (priv && priv->complete_watchlist) + dict_unref(priv->complete_watchlist); + if (priv) + GF_FREE(priv); + this->private = NULL; + mem_pool_destroy(this->local_pool); + this->local_pool = NULL; + } + + if (attr_inited) + pthread_attr_destroy(&w_attr); + return ret; } void -fini (xlator_t *this) +fini(xlator_t *this) { - index_priv_t *priv = NULL; - - priv = this->private; - if (!priv) - goto out; - this->private = NULL; - LOCK_DESTROY (&priv->lock); - pthread_cond_destroy (&priv->cond); - pthread_mutex_destroy (&priv->mutex); - if (priv->xattrop64_watchlist) - dict_unref (priv->xattrop64_watchlist); - GF_FREE (priv); + index_priv_t *priv = NULL; + + priv = this->private; + if (!priv) + goto out; + + priv->down = _gf_true; + pthread_cond_broadcast(&priv->cond); + if (priv->thread) { + gf_thread_cleanup_xint(priv->thread); + priv->thread = 0; + } + this->private = NULL; + LOCK_DESTROY(&priv->lock); + pthread_cond_destroy(&priv->cond); + pthread_mutex_destroy(&priv->mutex); + if (priv->dirty_watchlist) + dict_unref(priv->dirty_watchlist); + if (priv->pending_watchlist) + dict_unref(priv->pending_watchlist); + if (priv->complete_watchlist) + dict_unref(priv->complete_watchlist); + GF_FREE(priv); + + if (this->local_pool) { + mem_pool_destroy(this->local_pool); + this->local_pool = NULL; + } out: - return; + return; } int -index_forget (xlator_t *this, inode_t *inode) +index_forget(xlator_t *this, inode_t *inode) { - uint64_t tmp_cache = 0; - if (!inode_ctx_del (inode, this, &tmp_cache)) - GF_FREE ((index_inode_ctx_t*) (long)tmp_cache); + uint64_t tmp_cache = 0; + if (!inode_ctx_del(inode, this, &tmp_cache)) + GF_FREE((index_inode_ctx_t *)(long)tmp_cache); - return 0; + return 0; } int32_t -index_releasedir (xlator_t *this, fd_t *fd) +index_releasedir(xlator_t *this, fd_t *fd) { - index_fd_ctx_t *fctx = NULL; - uint64_t ctx = 0; - int ret = 0; + index_fd_ctx_t *fctx = NULL; + uint64_t ctx = 0; + int ret = 0; - ret = fd_ctx_del (fd, this, &ctx); - if (ret < 0) - goto out; + ret = fd_ctx_del(fd, this, &ctx); + if (ret < 0) + goto out; - fctx = (index_fd_ctx_t*) (long) ctx; - if (fctx->dir) { - ret = sys_closedir (fctx->dir); - if (ret) - gf_log (this->name, GF_LOG_ERROR, "closedir error: %s", strerror (errno)); - } + fctx = (index_fd_ctx_t *)(long)ctx; + if (fctx->dir) { + ret = sys_closedir(fctx->dir); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, errno, INDEX_MSG_FD_OP_FAILED, + "closedir error"); + } - GF_FREE (fctx); + GF_FREE(fctx); out: - return 0; + return 0; } int32_t -index_release (xlator_t *this, fd_t *fd) +index_release(xlator_t *this, fd_t *fd) { - index_fd_ctx_t *fctx = NULL; - uint64_t ctx = 0; - int ret = 0; + index_fd_ctx_t *fctx = NULL; + uint64_t ctx = 0; + int ret = 0; - ret = fd_ctx_del (fd, this, &ctx); - if (ret < 0) - goto out; + ret = fd_ctx_del(fd, this, &ctx); + if (ret < 0) + goto out; - fctx = (index_fd_ctx_t*) (long) ctx; - GF_FREE (fctx); + fctx = (index_fd_ctx_t *)(long)ctx; + GF_FREE(fctx); out: - return 0; + return 0; } int -notify (xlator_t *this, int event, void *data, ...) +notify(xlator_t *this, int event, void *data, ...) { - int ret = 0; - ret = default_notify (this, event, data); - return ret; + int ret = 0; + index_priv_t *priv = NULL; + uint64_t stub_cnt = 0; + xlator_t *victim = data; + struct timespec sleep_till = { + 0, + }; + + if (!this) + return 0; + + priv = this->private; + if (!priv) + return 0; + + if ((event == GF_EVENT_PARENT_DOWN) && victim->cleanup_starting) { + stub_cnt = GF_ATOMIC_GET(priv->stub_cnt); + timespec_now_realtime(&sleep_till); + sleep_till.tv_sec += 1; + + /* Wait for draining stub from queue before notify PARENT_DOWN */ + pthread_mutex_lock(&priv->mutex); + { + while (stub_cnt) { + (void)pthread_cond_timedwait(&priv->cond, &priv->mutex, + &sleep_till); + stub_cnt = GF_ATOMIC_GET(priv->stub_cnt); + } + } + pthread_mutex_unlock(&priv->mutex); + gf_log(this->name, GF_LOG_INFO, + "Notify GF_EVENT_PARENT_DOWN for brick %s", victim->name); + } + + if ((event == GF_EVENT_CHILD_DOWN) && victim->cleanup_starting) { + pthread_mutex_lock(&priv->mutex); + { + priv->down = _gf_true; + pthread_cond_broadcast(&priv->cond); + while (priv->curr_count) + pthread_cond_wait(&priv->cond, &priv->mutex); + } + pthread_mutex_unlock(&priv->mutex); + + gf_log(this->name, GF_LOG_INFO, + "Notify GF_EVENT_CHILD_DOWN for brick %s", victim->name); + } + + ret = default_notify(this, event, data); + return ret; } struct xlator_fops fops = { - .xattrop = index_xattrop, - .fxattrop = index_fxattrop, - - //interface functions follow - .getxattr = index_getxattr, - .lookup = index_lookup, - .opendir = index_opendir, - .readdir = index_readdir, - .unlink = index_unlink + .xattrop = index_xattrop, + .fxattrop = index_fxattrop, + + // interface functions follow + .getxattr = index_getxattr, + .lookup = index_lookup, + .opendir = index_opendir, + .readdir = index_readdir, + .unlink = index_unlink, + .rmdir = index_rmdir, + .fstat = index_fstat, }; struct xlator_dumpops dumpops; -struct xlator_cbks cbks = { - .forget = index_forget, - .release = index_release, - .releasedir = index_releasedir -}; +struct xlator_cbks cbks = {.forget = index_forget, + .release = index_release, + .releasedir = index_releasedir}; struct volume_options options[] = { - { .key = {"index-base" }, - .type = GF_OPTION_TYPE_PATH, - .description = "path where the index files need to be stored", - }, - { .key = {"xattrop64-watchlist" }, - .type = GF_OPTION_TYPE_STR, - .description = "Comma separated list of xattrs that are watched", - }, - { .key = {NULL} }, + {.key = {"index-base"}, + .type = GF_OPTION_TYPE_PATH, + .description = "path where the index files need to be stored", + .default_value = "{{ brick.path }}/.glusterfs/indices"}, + {.key = {"xattrop64-watchlist"}, + .type = GF_OPTION_TYPE_STR, + .description = "Comma separated list of xattrs that are watched", + .default_value = "trusted.ec.dirty"}, + {.key = {"xattrop-dirty-watchlist"}, + .type = GF_OPTION_TYPE_STR, + .description = "Comma separated list of xattrs that are watched", + .default_value = "trusted.afr.dirty"}, + {.key = {"xattrop-pending-watchlist"}, + .type = GF_OPTION_TYPE_STR, + .description = "Comma separated list of xattrs that are watched", + .default_value = "trusted.afr.{{ volume.name }}"}, + {.key = {NULL}}, +}; + +xlator_api_t xlator_api = { + .init = init, + .fini = fini, + .notify = notify, + .mem_acct_init = mem_acct_init, + .op_version = {1}, /* Present from the initial version */ + .dumpops = &dumpops, + .fops = &fops, + .cbks = &cbks, + .options = options, + .identifier = "index", + .category = GF_MAINTAINED, }; diff --git a/xlators/features/index/src/index.h b/xlators/features/index/src/index.h index a8dfe067ae1..a2b6e6e2570 100644 --- a/xlators/features/index/src/index.h +++ b/xlators/features/index/src/index.h @@ -11,51 +11,76 @@ #ifndef __INDEX_H__ #define __INDEX_H__ -#include "xlator.h" -#include "call-stub.h" -#include "defaults.h" -#include "byte-order.h" -#include "common-utils.h" +#include <glusterfs/xlator.h> +#include <glusterfs/call-stub.h> +#include <glusterfs/defaults.h> +#include <glusterfs/byte-order.h> +#include <glusterfs/common-utils.h> #include "index-mem-types.h" -#define INDEX_THREAD_STACK_SIZE ((size_t)(1024*1024)) +#define INDEX_THREAD_STACK_SIZE ((size_t)(1024 * 1024)) + +typedef enum { UNKNOWN, IN, NOTIN } index_state_t; typedef enum { - UNKNOWN, - IN, - NOTIN -} index_state_t; + XATTROP_TYPE_UNSET = -1, + XATTROP, + DIRTY, + ENTRY_CHANGES, + XATTROP_TYPE_END +} index_xattrop_type_t; typedef struct index_inode_ctx { - gf_boolean_t processing; - struct list_head callstubs; - index_state_t state; + gf_boolean_t processing; + struct list_head callstubs; + int state[XATTROP_TYPE_END]; + uuid_t virtual_pargfid; /* virtual gfid of dir under + .glusterfs/indices/entry-changes. */ } index_inode_ctx_t; typedef struct index_fd_ctx { - DIR *dir; - off_t dir_eof; + DIR *dir; + off_t dir_eof; } index_fd_ctx_t; typedef struct index_priv { - char *index_basepath; - uuid_t index; - gf_lock_t lock; - uuid_t xattrop_vgfid;//virtual gfid of the xattrop index dir - struct list_head callstubs; - pthread_mutex_t mutex; - pthread_cond_t cond; - dict_t *xattrop64_watchlist; + char *index_basepath; + char *dirty_basepath; + uuid_t index; + gf_lock_t lock; + uuid_t internal_vgfid[XATTROP_TYPE_END]; + struct list_head callstubs; + pthread_mutex_t mutex; + pthread_cond_t cond; + dict_t *dirty_watchlist; + dict_t *pending_watchlist; + dict_t *complete_watchlist; + int64_t pending_count; + pthread_t thread; + gf_boolean_t down; + gf_atomic_t stub_cnt; + int32_t curr_count; } index_priv_t; -#define INDEX_STACK_UNWIND(fop, frame, params ...) \ -do { \ - if (frame) { \ - inode_t *_inode = frame->local; \ - frame->local = NULL; \ - inode_unref (_inode); \ - } \ - STACK_UNWIND_STRICT (fop, frame, params); \ -} while (0) +typedef struct index_local { + inode_t *inode; + dict_t *xdata; +} index_local_t; + +#define INDEX_STACK_UNWIND(fop, frame, params...) \ + do { \ + index_local_t *__local = NULL; \ + if (frame) { \ + __local = frame->local; \ + frame->local = NULL; \ + } \ + STACK_UNWIND_STRICT(fop, frame, params); \ + if (__local) { \ + inode_unref(__local->inode); \ + if (__local->xdata) \ + dict_unref(__local->xdata); \ + mem_put(__local); \ + } \ + } while (0) #endif diff --git a/xlators/features/mac-compat/Makefile.am b/xlators/features/leases/Makefile.am index d471a3f9243..a985f42a877 100644 --- a/xlators/features/mac-compat/Makefile.am +++ b/xlators/features/leases/Makefile.am @@ -1,3 +1,3 @@ SUBDIRS = src -CLEANFILES = +CLEANFILES = diff --git a/xlators/features/leases/src/Makefile.am b/xlators/features/leases/src/Makefile.am new file mode 100644 index 00000000000..a1aef10e299 --- /dev/null +++ b/xlators/features/leases/src/Makefile.am @@ -0,0 +1,20 @@ +if WITH_SERVER +xlator_LTLIBRARIES = leases.la +endif +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features + +leases_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) + +leases_la_SOURCES = leases.c leases-internal.c + +leases_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +noinst_HEADERS = leases.h leases-mem-types.h leases-messages.h + +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ + -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \ + -I$(CONTRIBDIR)/timer-wheel + +AM_CFLAGS = -Wall $(GF_CFLAGS) + +CLEANFILES = diff --git a/xlators/features/leases/src/leases-internal.c b/xlators/features/leases/src/leases-internal.c new file mode 100644 index 00000000000..56dee244281 --- /dev/null +++ b/xlators/features/leases/src/leases-internal.c @@ -0,0 +1,1412 @@ +/* + Copyright (c) 2015-2016 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "leases.h" + +/* Mutex locks used in this xlator and their order of acquisition: + * Check lease conflict: + * lease_ctx lock + * add_timer => internal timer locks + * lease_ctx unlock + * + * Add/remove lease: + * lease_ctx lock + * add_timer => internal timer locks + * OR + * priv lock => Adding/removing to/from the cleanup client list + * priv unlock + * lease_ctx unlock + * + * Timer thread: + * Timer internal lock + * priv lock => By timer handler + * priv unlock + * Timer internal unlock + * + * Expired recall cleanup thread: + * priv lock + * priv condwait + * priv unlock + * lease_ctx lock + * priv lock + * priv unlock + * lease_ctx unlock + */ + +/* + * Check if lease_lk is enabled + * Return Value: + * _gf_true - lease lock option enabled + * _gf_false - lease lock option disabled + */ +gf_boolean_t +is_leases_enabled(xlator_t *this) +{ + leases_private_t *priv = NULL; + gf_boolean_t is_enabled = _gf_false; + + GF_VALIDATE_OR_GOTO("leases", this, out); + + if (this->private) { + priv = (leases_private_t *)this->private; + is_enabled = priv->leases_enabled; + } +out: + return is_enabled; +} + +/* + * Get the recall_leaselk_timeout + * Return Value: + * timeout value(in seconds) set as an option to this xlator. + * -1 error case + */ +static int32_t +get_recall_lease_timeout(xlator_t *this) +{ + leases_private_t *priv = NULL; + int32_t timeout = -1; + + GF_VALIDATE_OR_GOTO("leases", this, out); + + if (this->private) { + priv = (leases_private_t *)this->private; + timeout = priv->recall_lease_timeout; + } +out: + return timeout; +} + +static void +__dump_leases_info(xlator_t *this, lease_inode_ctx_t *lease_ctx) +{ + lease_id_entry_t *lease_entry = NULL; + lease_id_entry_t *tmp = NULL; + + GF_VALIDATE_OR_GOTO("leases", this, out); + GF_VALIDATE_OR_GOTO("leases", lease_ctx, out); + + gf_msg_debug(this->name, 0, + "Lease held on this inode, lease_type: %d," + " lease_cnt:%" PRIu64 + ", RD lease:%d, RW lease:%d, " + "openfd cnt:%" PRIu64, + lease_ctx->lease_type, lease_ctx->lease_cnt, + lease_ctx->lease_type_cnt[GF_RD_LEASE], + lease_ctx->lease_type_cnt[GF_RW_LEASE], lease_ctx->openfd_cnt); + + list_for_each_entry_safe(lease_entry, tmp, &lease_ctx->lease_id_list, + lease_id_list) + { + gf_msg_debug(this->name, 0, + "Leases held by client: %s, lease " + "ID:%s, RD lease:%d, RW lease:%d, lease_type: %d, " + "lease_cnt:%" PRIu64, + lease_entry->client_uid, lease_entry->lease_id, + lease_entry->lease_type_cnt[GF_RD_LEASE], + lease_entry->lease_type_cnt[GF_RW_LEASE], + lease_entry->lease_type, lease_entry->lease_cnt); + } +out: + return; +} + +static int +__lease_ctx_set(inode_t *inode, xlator_t *this) +{ + lease_inode_ctx_t *inode_ctx = NULL; + int ret = -1; + uint64_t ctx = 0; + + GF_VALIDATE_OR_GOTO("leases", inode, out); + GF_VALIDATE_OR_GOTO("leases", this, out); + + ret = __inode_ctx_get(inode, this, &ctx); + if (!ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, LEASE_MSG_INVAL_INODE_CTX, + "inode_ctx_get failed"); + goto out; + } + + inode_ctx = GF_CALLOC(1, sizeof(*inode_ctx), + gf_leases_mt_lease_inode_ctx_t); + GF_CHECK_ALLOC(inode_ctx, ret, out); + + pthread_mutex_init(&inode_ctx->lock, NULL); + INIT_LIST_HEAD(&inode_ctx->lease_id_list); + INIT_LIST_HEAD(&inode_ctx->blocked_list); + + inode_ctx->lease_cnt = 0; + + ret = __inode_ctx_set(inode, this, (uint64_t *)inode_ctx); + if (ret) { + GF_FREE(inode_ctx); + gf_msg(this->name, GF_LOG_INFO, 0, LEASE_MSG_INVAL_INODE_CTX, + "failed to set inode ctx (%p)", inode); + } +out: + return ret; +} + +static lease_inode_ctx_t * +__lease_ctx_get(inode_t *inode, xlator_t *this) +{ + lease_inode_ctx_t *inode_ctx = NULL; + uint64_t ctx = 0; + int ret = 0; + + GF_VALIDATE_OR_GOTO("leases", inode, out); + GF_VALIDATE_OR_GOTO("leases", this, out); + + ret = __inode_ctx_get(inode, this, &ctx); + if (ret < 0) { + ret = __lease_ctx_set(inode, this); + if (ret < 0) + goto out; + + ret = __inode_ctx_get(inode, this, &ctx); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, LEASE_MSG_INVAL_INODE_CTX, + "failed to get inode ctx (%p)", inode); + goto out; + } + } + + inode_ctx = (lease_inode_ctx_t *)(long)ctx; +out: + return inode_ctx; +} + +lease_inode_ctx_t * +lease_ctx_get(inode_t *inode, xlator_t *this) +{ + lease_inode_ctx_t *inode_ctx = NULL; + + GF_VALIDATE_OR_GOTO("leases", inode, out); + GF_VALIDATE_OR_GOTO("leases", this, out); + + LOCK(&inode->lock); + { + inode_ctx = __lease_ctx_get(inode, this); + } + UNLOCK(&inode->lock); +out: + return inode_ctx; +} + +static lease_id_entry_t * +new_lease_id_entry(call_frame_t *frame, const char *lease_id) +{ + lease_id_entry_t *lease_entry = NULL; + + GF_VALIDATE_OR_GOTO("leases", frame, out); + GF_VALIDATE_OR_GOTO("leases", lease_id, out); + + lease_entry = GF_CALLOC(1, sizeof(*lease_entry), + gf_leases_mt_lease_id_entry_t); + if (!lease_entry) { + gf_msg(frame->this->name, GF_LOG_ERROR, ENOMEM, LEASE_MSG_NO_MEM, + "Memory allocation for lease_entry failed"); + return NULL; + } + + INIT_LIST_HEAD(&lease_entry->lease_id_list); + lease_entry->lease_type = NONE; + lease_entry->lease_cnt = 0; + lease_entry->recall_time = get_recall_lease_timeout(frame->this); + lease_entry->client_uid = gf_strdup(frame->root->client->client_uid); + if (!lease_entry->client_uid) { + gf_msg(frame->this->name, GF_LOG_ERROR, ENOMEM, LEASE_MSG_NO_MEM, + "Memory allocation for client_uid failed"); + GF_FREE(lease_entry); + lease_entry = NULL; + goto out; + } + + memcpy(lease_entry->lease_id, lease_id, LEASE_ID_SIZE); +out: + return lease_entry; +} + +static void +__destroy_lease_id_entry(lease_id_entry_t *lease_entry) +{ + GF_VALIDATE_OR_GOTO("leases", lease_entry, out); + + list_del_init(&lease_entry->lease_id_list); + GF_FREE(lease_entry->client_uid); + GF_FREE(lease_entry); +out: + return; +} + +static inline gf_boolean_t +__is_same_lease_id(const char *k1, const char *k2) +{ + if (memcmp(k1, k2, strlen(k1)) == 0) + return _gf_true; + + return _gf_false; +} + +/* Checks if there are any leases, other than the leases taken + * by the given lease_id + */ +static gf_boolean_t +__another_lease_found(lease_inode_ctx_t *lease_ctx, const char *lease_id) +{ + lease_id_entry_t *lease_entry = NULL; + lease_id_entry_t *tmp = NULL; + gf_boolean_t found_lease = _gf_false; + + GF_VALIDATE_OR_GOTO("leases", lease_id, out); + GF_VALIDATE_OR_GOTO("leases", lease_ctx, out); + + list_for_each_entry_safe(lease_entry, tmp, &lease_ctx->lease_id_list, + lease_id_list) + { + if (!__is_same_lease_id(lease_id, lease_entry->lease_id)) { + if (lease_entry->lease_cnt > 0) { + found_lease = _gf_true; + break; + } + } + } +out: + return found_lease; +} + +/* Returns the lease_id_entry for a given lease_id and a given inode. + * Return values: + * NULL - If no client entry found + * lease_id_entry_t* - a pointer to the client entry if found + */ +static lease_id_entry_t * +__get_lease_id_entry(lease_inode_ctx_t *lease_ctx, const char *lease_id) +{ + lease_id_entry_t *lease_entry = NULL; + lease_id_entry_t *tmp = NULL; + lease_id_entry_t *found = NULL; + + GF_VALIDATE_OR_GOTO("leases", lease_id, out); + GF_VALIDATE_OR_GOTO("leases", lease_ctx, out); + + list_for_each_entry_safe(lease_entry, tmp, &lease_ctx->lease_id_list, + lease_id_list) + { + if (__is_same_lease_id(lease_id, lease_entry->lease_id)) { + found = lease_entry; + gf_msg_debug("leases", 0, + "lease ID entry found " + "Client UID:%s, lease id:%s", + lease_entry->client_uid, + leaseid_utoa(lease_entry->lease_id)); + break; + } + } +out: + return found; +} + +/* Returns the lease_id_entry for a given lease_id and a given inode, + * if none found creates one. + * Return values: + * lease_id_entry_t* - a pointer to the client entry + */ +static lease_id_entry_t * +__get_or_new_lease_entry(call_frame_t *frame, const char *lease_id, + lease_inode_ctx_t *lease_ctx) +{ + lease_id_entry_t *lease_entry = NULL; + + GF_VALIDATE_OR_GOTO("leases", frame, out); + GF_VALIDATE_OR_GOTO("leases", lease_id, out); + GF_VALIDATE_OR_GOTO("leases", lease_ctx, out); + + lease_entry = __get_lease_id_entry(lease_ctx, lease_id); + if (!lease_entry) { /* create one */ + lease_entry = new_lease_id_entry(frame, lease_id); + if (!lease_entry) + goto out; + + list_add_tail(&lease_entry->lease_id_list, &lease_ctx->lease_id_list); + + gf_msg_debug(frame->this->name, 0, + "lease ID entry added," + " Client UID:%s, lease id:%s", + lease_entry->client_uid, + leaseid_utoa(lease_entry->lease_id)); + } +out: + return lease_entry; +} + +static lease_inode_t * +new_lease_inode(inode_t *inode) +{ + lease_inode_t *l_inode = GF_MALLOC(sizeof(*l_inode), + gf_leases_mt_lease_inode_t); + if (!l_inode) + goto out; + + INIT_LIST_HEAD(&l_inode->list); + l_inode->inode = inode_ref(inode); +out: + return l_inode; +} + +static void +__destroy_lease_inode(lease_inode_t *l_inode) +{ + list_del_init(&l_inode->list); + inode_unref(l_inode->inode); + GF_FREE(l_inode); +} + +static lease_client_t * +new_lease_client(const char *client_uid) +{ + lease_client_t *clnt = GF_MALLOC(sizeof(*clnt), + gf_leases_mt_lease_client_t); + if (!clnt) + goto out; + + INIT_LIST_HEAD(&clnt->client_list); + INIT_LIST_HEAD(&clnt->inode_list); + clnt->client_uid = gf_strdup(client_uid); +out: + return clnt; +} + +static void +__destroy_lease_client(lease_client_t *clnt) +{ + list_del_init(&clnt->inode_list); + list_del_init(&clnt->client_list); + GF_FREE(clnt); + + return; +} + +static lease_client_t * +__get_lease_client(xlator_t *this, leases_private_t *priv, + const char *client_uid) +{ + lease_client_t *clnt = NULL; + lease_client_t *tmp = NULL; + lease_client_t *found = NULL; + + list_for_each_entry_safe(clnt, tmp, &priv->client_list, client_list) + { + if ((strcmp(clnt->client_uid, client_uid) == 0)) { + found = clnt; + gf_msg_debug(this->name, 0, + "Client:%s already found " + "in the cleanup list", + client_uid); + break; + } + } + return found; +} + +static lease_client_t * +__get_or_new_lease_client(xlator_t *this, leases_private_t *priv, + const char *client_uid) +{ + lease_client_t *found = NULL; + + found = __get_lease_client(this, priv, client_uid); + if (!found) { + found = new_lease_client(client_uid); + if (!found) + goto out; + list_add_tail(&found->client_list, &priv->client_list); + gf_msg_debug(this->name, 0, + "Adding a new client:%s entry " + "to the cleanup list", + client_uid); + } +out: + return found; +} + +static int +add_inode_to_client_list(xlator_t *this, inode_t *inode, const char *client_uid) +{ + leases_private_t *priv = this->private; + lease_client_t *clnt = NULL; + + lease_inode_t *lease_inode = new_lease_inode(inode); + if (!lease_inode) + return -ENOMEM; + + pthread_mutex_lock(&priv->mutex); + { + clnt = __get_or_new_lease_client(this, priv, client_uid); + if (!clnt) { + pthread_mutex_unlock(&priv->mutex); + __destroy_lease_inode(lease_inode); + return -ENOMEM; + } + list_add_tail(&clnt->inode_list, &lease_inode->list); + } + pthread_mutex_unlock(&priv->mutex); + gf_msg_debug(this->name, 0, + "Added a new inode:%p to the client(%s) " + "cleanup list, gfid(%s)", + inode, client_uid, uuid_utoa(inode->gfid)); + return 0; +} + +/* Add lease entry to the corresponding client entry. + * Return values: + * 0 Success + * -1 Failure + */ +static int +__add_lease(call_frame_t *frame, inode_t *inode, lease_inode_ctx_t *lease_ctx, + const char *client_uid, struct gf_lease *lease) +{ + lease_id_entry_t *lease_entry = NULL; + int ret = -1; + + GF_VALIDATE_OR_GOTO("leases", frame, out); + GF_VALIDATE_OR_GOTO("leases", client_uid, out); + GF_VALIDATE_OR_GOTO("leases", lease_ctx, out); + GF_VALIDATE_OR_GOTO("leases", inode, out); + GF_VALIDATE_OR_GOTO("leases", lease, out); + + gf_msg_trace(frame->this->name, 0, + "Granting lease lock to client %s with lease id %s" + " on gfid(%s)", + client_uid, leaseid_utoa(lease->lease_id), + uuid_utoa(inode->gfid)); + + lease_entry = __get_or_new_lease_entry(frame, lease->lease_id, lease_ctx); + if (!lease_entry) { + errno = ENOMEM; + goto out; + } + + lease_entry->lease_type_cnt[lease->lease_type]++; + lease_entry->lease_cnt++; + lease_entry->lease_type |= lease->lease_type; + /* If this is the first lease taken by the client on the file, then + * add this inode/file to the client disconnect cleanup list + */ + if (lease_entry->lease_cnt == 1) { + add_inode_to_client_list(frame->this, inode, client_uid); + } + + lease_ctx->lease_cnt++; + lease_ctx->lease_type_cnt[lease->lease_type]++; + lease_ctx->lease_type |= lease->lease_type; + + /* Take a ref for the first lock taken on this inode. Corresponding + * unref when all the leases are unlocked or during DISCONNECT + * Ref is required because the inode on which lease is acquired should + * not be deleted when lru cleanup kicks in*/ + if (lease_ctx->lease_cnt == 1) { + lease_ctx->inode = inode_ref(inode); + } + + ret = 0; +out: + return ret; +} + +static gf_boolean_t +__is_clnt_lease_none(const char *client_uid, lease_inode_ctx_t *lease_ctx) +{ + gf_boolean_t lease_none = _gf_true; + lease_id_entry_t *lease_entry = NULL; + lease_id_entry_t *tmp = NULL; + + list_for_each_entry_safe(lease_entry, tmp, &lease_ctx->lease_id_list, + lease_id_list) + { + if ((strcmp(client_uid, lease_entry->client_uid) == 0) && + (lease_entry->lease_cnt != 0)) { + lease_none = _gf_false; + break; + } + } + + return lease_none; +} + +static int +__remove_inode_from_clnt_list(xlator_t *this, lease_client_t *clnt, + inode_t *inode) +{ + int ret = -1; + lease_inode_t *l_inode = NULL; + lease_inode_t *tmp1 = NULL; + + list_for_each_entry_safe(l_inode, tmp1, &clnt->inode_list, list) + { + if (l_inode->inode == inode) { + __destroy_lease_inode(l_inode); + gf_msg_debug(this->name, 0, + "Removed the inode from the client cleanup list"); + ret = 0; + } + } + /* TODO: Remove the client entry from the cleanup list */ + + return ret; +} + +static int +remove_from_clnt_list(xlator_t *this, const char *client_uid, inode_t *inode) +{ + leases_private_t *priv = NULL; + int ret = -1; + lease_client_t *clnt = NULL; + + priv = this->private; + if (!priv) + goto out; + + pthread_mutex_lock(&priv->mutex); + { + clnt = __get_lease_client(this, priv, client_uid); + if (!clnt) { + pthread_mutex_unlock(&priv->mutex); + gf_msg(this->name, GF_LOG_ERROR, 0, LEASE_MSG_CLNT_NOTFOUND, + "There is no client entry found in the cleanup list"); + goto out; + } + ret = __remove_inode_from_clnt_list(this, clnt, inode); + if (ret) { + pthread_mutex_unlock(&priv->mutex); + gf_msg(this->name, GF_LOG_ERROR, 0, LEASE_MSG_INODE_NOTFOUND, + "There is no inode entry found in the cleanup list"); + goto out; + } + } + pthread_mutex_unlock(&priv->mutex); +out: + return ret; +} + +/* Remove lease entry in the corresponding client entry. + */ +static int +__remove_lease(xlator_t *this, inode_t *inode, lease_inode_ctx_t *lease_ctx, + const char *client_uid, struct gf_lease *lease) +{ + lease_id_entry_t *lease_entry = NULL; + int ret = 0; + int32_t lease_type = 0; + leases_private_t *priv = NULL; + + GF_VALIDATE_OR_GOTO("leases", lease_ctx, out); + GF_VALIDATE_OR_GOTO("leases", lease, out); + + priv = this->private; + + gf_msg_trace(this->name, 0, + "Removing lease entry for client: %s, " + "lease type:%d, lease id:%s", + client_uid, lease->lease_type, leaseid_utoa(lease->lease_id)); + + /* There could be a race where in server recalled the lease and by the time + * client sends lease_unlock request, server may have revoked it. To handle + * such cases, if lease doesnt exist treat it as noop and return success. + */ + lease_entry = __get_lease_id_entry(lease_ctx, lease->lease_id); + if (!lease_entry) { + gf_msg(this->name, GF_LOG_INFO, 0, LEASE_MSG_INVAL_UNLK_LEASE, + "Got unlock lease request from client:%s, but has no " + "corresponding lock", + client_uid); + ret = 0; + goto out; + } + + if (!(lease_entry->lease_type & lease->lease_type)) { + gf_msg(this->name, GF_LOG_INFO, 0, LEASE_MSG_INVAL_UNLK_LEASE, + "Got unlock lease request from client:%s for an invalid " + "lease_type", + client_uid); + ret = -EINVAL; + errno = EINVAL; + goto out; + } + lease_type = lease->lease_type; + lease_entry->lease_type_cnt[lease_type]--; + lease_entry->lease_cnt--; + + lease_ctx->lease_type_cnt[lease_type]--; + lease_ctx->lease_cnt--; + + if (lease_entry->lease_type_cnt[lease_type] == 0) + lease_entry->lease_type = lease_entry->lease_type & (~lease_type); + + if (lease_ctx->lease_type_cnt[lease_type] == 0) + lease_ctx->lease_type = lease_ctx->lease_type & (~lease_type); + + if (lease_entry->lease_cnt == 0) { + if (__is_clnt_lease_none(client_uid, lease_ctx)) { + gf_msg_trace(this->name, 0, + "Client(%s) has no leases" + " on gfid (%s), hence removing the inode" + " from the client cleanup list", + client_uid, uuid_utoa(inode->gfid)); + remove_from_clnt_list(this, client_uid, lease_ctx->inode); + } + __destroy_lease_id_entry(lease_entry); + lease_ctx->blocked_fops_resuming = _gf_true; + } + + if (lease_ctx->lease_cnt == 0 && lease_ctx->timer) { + ret = gf_tw_del_timer(priv->timer_wheel, lease_ctx->timer); + lease_ctx->recall_in_progress = _gf_false; + lease_ctx->timer = NULL; + } +out: + return ret; +} + +static gf_boolean_t +__is_lease_grantable(xlator_t *this, lease_inode_ctx_t *lease_ctx, + struct gf_lease *lease, inode_t *inode) +{ + uint32_t fd_count = 0; + int32_t flags = 0; + fd_t *iter_fd = NULL; + gf_boolean_t grant = _gf_false; + int ret = 0; + lease_fd_ctx_t *fd_ctx = NULL; + uint64_t ctx = 0; + + GF_VALIDATE_OR_GOTO("leases", lease_ctx, out); + GF_VALIDATE_OR_GOTO("leases", lease, out); + GF_VALIDATE_OR_GOTO("leases", inode, out); + + if (lease_ctx->recall_in_progress) { + gf_msg_debug(this->name, 0, + "Recall in progress, hence " + "failing the lease request"); + grant = _gf_false; + goto out; + } + + if (lease_ctx->blocked_fops_resuming) { + gf_msg_debug(this->name, 0, + "Previously blocked fops resuming, hence " + "failing the lease request"); + grant = _gf_false; + goto out; + } + + LOCK(&inode->lock); + { + list_for_each_entry(iter_fd, &inode->fd_list, inode_list) + { + ret = fd_ctx_get(iter_fd, this, &ctx); + if (ret < 0) { + grant = _gf_false; + UNLOCK(&inode->lock); + gf_msg(this->name, GF_LOG_ERROR, 0, LEASE_MSG_INVAL_FD_CTX, + "Unable to get fd ctx"); + goto out; + } + fd_ctx = (lease_fd_ctx_t *)(long)ctx; + + /* Check for open fd conflict, note that open fds from + * the same lease id is not checked for conflict, as it is + * lease id based lease. + */ + if (fd_ctx->client_uid != NULL && + !__is_same_lease_id(fd_ctx->lease_id, lease->lease_id)) { + fd_count++; + flags |= iter_fd->flags; + } + } + } + UNLOCK(&inode->lock); + + gf_msg_debug(this->name, 0, "open fd count:%d flags:%d", fd_count, flags); + + __dump_leases_info(this, lease_ctx); + + switch (lease->lease_type) { + case GF_RD_LEASE: + /* check open fd conflict */ + if ((fd_count > 0) && ((flags & O_WRONLY) || (flags & O_RDWR))) { + grant = _gf_false; + break; + } + + /* check for conflict with existing leases */ + if (lease_ctx->lease_type == NONE || + lease_ctx->lease_type == GF_RD_LEASE || + !(__another_lease_found(lease_ctx, lease->lease_id))) + grant = _gf_true; + else + grant = _gf_false; + break; + + case GF_RW_LEASE: + /* check open fd conflict; conflict if there are any fds open + * other than the client on which the lease is requested. */ + if (fd_count > 0) { + grant = _gf_false; + break; + } + + /* check existing lease conflict */ + if (lease_ctx->lease_type == NONE || + !(__another_lease_found(lease_ctx, lease->lease_id))) + grant = _gf_true; + else + grant = _gf_false; + break; + + default: + gf_msg(this->name, GF_LOG_ERROR, EINVAL, LEASE_MSG_INVAL_LEASE_TYPE, + "Invalid lease type specified"); + break; + } +out: + return grant; +} + +static void +do_blocked_fops(xlator_t *this, lease_inode_ctx_t *lease_ctx) +{ + struct list_head wind_list; + fop_stub_t *blk_fop = NULL; + fop_stub_t *tmp = NULL; + + INIT_LIST_HEAD(&wind_list); + + pthread_mutex_lock(&lease_ctx->lock); + { + if (!lease_ctx->blocked_fops_resuming) { + /* lease_ctx->blocked_fops_resuming will be set + * only when the last lease is released. That + * is when we need to resume blocked fops and unref + * the inode taken in __add_lease (when lease_cnt == 1). + * Return otherwise. + */ + pthread_mutex_unlock(&lease_ctx->lock); + return; + } + + list_for_each_entry_safe(blk_fop, tmp, &lease_ctx->blocked_list, list) + { + list_del_init(&blk_fop->list); + list_add_tail(&blk_fop->list, &wind_list); + } + } + pthread_mutex_unlock(&lease_ctx->lock); + + gf_msg_trace(this->name, 0, "Executing the blocked stubs on gfid(%s)", + uuid_utoa(lease_ctx->inode->gfid)); + list_for_each_entry_safe(blk_fop, tmp, &wind_list, list) + { + list_del_init(&blk_fop->list); + gf_msg_trace(this->name, 0, "Executing fop:%d", blk_fop->stub->fop); + call_resume(blk_fop->stub); + GF_FREE(blk_fop); + } + + pthread_mutex_lock(&lease_ctx->lock); + { + lease_ctx->lease_type = NONE; + /* unref the inode taken in __add_lease + * (when lease_cnt == 1) */ + lease_ctx->blocked_fops_resuming = _gf_false; + inode_unref(lease_ctx->inode); + lease_ctx->inode = NULL; + } + pthread_mutex_unlock(&lease_ctx->lock); + + return; +} + +void +recall_lease_timer_handler(struct gf_tw_timer_list *timer, void *data, + unsigned long calltime) +{ + inode_t *inode = NULL; + lease_inode_t *lease_inode = NULL; + leases_private_t *priv = NULL; + lease_timer_data_t *timer_data = NULL; + + timer_data = data; + + priv = timer_data->this->private; + inode = timer_data->inode; + lease_inode = new_lease_inode(inode); + if (!lease_inode) { + errno = ENOMEM; + goto out; + } + pthread_mutex_lock(&priv->mutex); + { + list_add_tail(&lease_inode->list, &priv->recall_list); + pthread_cond_broadcast(&priv->cond); + } + pthread_mutex_unlock(&priv->mutex); +out: + /* unref the inode_ref taken by timer_data in __recall_lease */ + inode_unref(timer_data->inode); + + GF_FREE(timer); +} + +static void +__recall_lease(xlator_t *this, lease_inode_ctx_t *lease_ctx) +{ + lease_id_entry_t *lease_entry = NULL; + lease_id_entry_t *tmp = NULL; + struct gf_upcall up_req = { + 0, + }; + struct gf_upcall_recall_lease recall_req = { + 0, + }; + int notify_ret = -1; + struct gf_tw_timer_list *timer = NULL; + leases_private_t *priv = NULL; + lease_timer_data_t *timer_data = NULL; + time_t recall_time; + + if (lease_ctx->recall_in_progress) { + gf_msg_debug(this->name, 0, + "Lease recall is already in " + "progress, hence not sending another recall"); + goto out; + } + + priv = this->private; + recall_time = gf_time(); + list_for_each_entry_safe(lease_entry, tmp, &lease_ctx->lease_id_list, + lease_id_list) + { + gf_uuid_copy(up_req.gfid, lease_ctx->inode->gfid); + up_req.client_uid = lease_entry->client_uid; + up_req.event_type = GF_UPCALL_RECALL_LEASE; + up_req.data = &recall_req; + + notify_ret = this->notify(this, GF_EVENT_UPCALL, &up_req); + if (notify_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, LEASE_MSG_RECALL_FAIL, + "Recall notification to client: %s failed", + lease_entry->client_uid); + /* Do not return from here, continue registering the timer, + this is required mostly o keep replicas in sync*/ + } else { + gf_msg_debug(this->name, 0, + "Recall lease (all)" + "notification sent to client %s", + lease_entry->client_uid); + } + + lease_ctx->recall_in_progress = _gf_true; + lease_entry->recall_time = recall_time; + } + timer = GF_MALLOC(sizeof(*timer), gf_common_mt_tw_timer_list); + if (!timer) { + goto out; + } + timer_data = GF_MALLOC(sizeof(lease_timer_data_t), + gf_leases_mt_timer_data_t); + if (!timer_data) { + GF_FREE(timer); + goto out; + } + + timer_data->inode = inode_ref(lease_ctx->inode); + timer_data->this = this; + timer->data = timer_data; + + INIT_LIST_HEAD(&timer->entry); + timer->expires = get_recall_lease_timeout(this); + timer->function = recall_lease_timer_handler; + lease_ctx->timer = timer; + gf_tw_add_timer(priv->timer_wheel, timer); + gf_msg_trace(this->name, 0, + "Registering timer " + "%p, after " + "sending recall", + timer); +out: + return; +} + +/* ret = 0; STACK_UNWIND Success + * ret = -1; STACK_UNWIND failure + */ +int +process_lease_req(call_frame_t *frame, xlator_t *this, inode_t *inode, + struct gf_lease *lease) +{ + int ret = 0; + char *client_uid = NULL; + lease_inode_ctx_t *lease_ctx = NULL; + + GF_VALIDATE_OR_GOTO("leases", frame, out); + GF_VALIDATE_OR_GOTO("leases", this, out); + GF_VALIDATE_OR_GOTO("leases", inode, out); + GF_VALIDATE_OR_GOTO("leases", lease, out); + + client_uid = frame->root->client->client_uid; + + if (!is_valid_lease_id(lease->lease_id)) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, LEASE_MSG_INVAL_LEASE_ID, + "Invalid lease id, from" + "client:%s", + client_uid); + ret = -EINVAL; + errno = EINVAL; + goto out; + } + + lease_ctx = lease_ctx_get(inode, this); + if (!lease_ctx) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, LEASE_MSG_NO_MEM, + "Unable to create/get inode ctx, " + "inode:%p", + inode); + ret = -ENOMEM; + errno = ENOMEM; + goto out; + } + + gf_msg_debug(this->name, 0, + "Lease request from client: %s, " + "lease type:%d, lease cmd:%d, lease ID:%s, gfid:%s", + client_uid, lease->lease_type, lease->cmd, + leaseid_utoa(lease->lease_id), uuid_utoa(inode->gfid)); + + pthread_mutex_lock(&lease_ctx->lock); + { + switch (lease->cmd) { + case GF_GET_LEASE: + lease->lease_type = lease_ctx->lease_type; + gf_msg_debug(this->name, 0, + "Get lease, existing lease" + "type: %d", + lease_ctx->lease_type); + /*TODO:Should it consider lease id or client_uid?*/ + break; + + case GF_SET_LEASE: + if (__is_lease_grantable(this, lease_ctx, lease, inode)) { + __add_lease(frame, inode, lease_ctx, client_uid, lease); + ret = 0; + } else { + gf_msg_debug(this->name, GF_LOG_DEBUG, + "Not granting the conflicting lease" + " request from %s on gfid(%s)", + client_uid, uuid_utoa(inode->gfid)); + __recall_lease(this, lease_ctx); + ret = -1; + } + break; + case GF_UNLK_LEASE: + ret = __remove_lease(this, inode, lease_ctx, client_uid, lease); + if ((ret >= 0) && (lease_ctx->lease_cnt == 0)) { + pthread_mutex_unlock(&lease_ctx->lock); + goto unblock; + } + break; + default: + ret = -EINVAL; + break; + } + } + pthread_mutex_unlock(&lease_ctx->lock); + + return ret; + +unblock: + do_blocked_fops(this, lease_ctx); +out: + return ret; +} + +/* ret = 1 conflict + * ret = 0 no conflict + */ +gf_boolean_t +__check_lease_conflict(call_frame_t *frame, lease_inode_ctx_t *lease_ctx, + const char *lease_id, gf_boolean_t is_write) +{ + gf_lease_types_t lease_type = { + 0, + }; + gf_boolean_t conflicts = _gf_false; + lease_id_entry_t *lease_entry = NULL; + + GF_VALIDATE_OR_GOTO("leases", frame, out); + GF_VALIDATE_OR_GOTO("leases", lease_ctx, out); + + lease_type = lease_ctx->lease_type; + + /* If the fop is rename or unlink conflict the lease even if its + * from the same client?? + */ + if ((frame->root->op == GF_FOP_RENAME) || + (frame->root->op == GF_FOP_UNLINK)) { + conflicts = _gf_true; + goto recall; + } + + /* As internal fops are used to maintain data integrity but do not + * make modififications to the client data, no need to conflict with + * them. + * + * @todo: like for locks, even lease state has to be handled by + * rebalance or self-heal daemon process. */ + if (frame->root->pid < 0) { + conflicts = _gf_false; + goto recall; + } + + /* If lease_id is not sent, set conflicts = true if there is + * an existing lease */ + if (!lease_id && (lease_ctx->lease_cnt > 0)) { + conflicts = _gf_true; + goto recall; + } + + switch (lease_type) { + case (GF_RW_LEASE | GF_RD_LEASE): + case GF_RW_LEASE: + lease_entry = __get_lease_id_entry(lease_ctx, lease_id); + if (lease_entry && (lease_entry->lease_type & GF_RW_LEASE)) + conflicts = _gf_false; + else + conflicts = _gf_true; + break; + case GF_RD_LEASE: + if (is_write && __another_lease_found(lease_ctx, lease_id)) + conflicts = _gf_true; + else + conflicts = _gf_false; + break; + default: + break; + } + +recall: + /* If there is a conflict found and recall is not already sent to all + * the clients, then send recall to each of the client holding lease. + */ + if (conflicts) + __recall_lease(frame->this, lease_ctx); +out: + return conflicts; +} + +/* Return values: + * -1 : error, unwind the fop + * WIND_FOP: No conflict, wind the fop + * BLOCK_FOP: Found a conflicting lease, block the fop + */ +int +check_lease_conflict(call_frame_t *frame, inode_t *inode, const char *lease_id, + uint32_t fop_flags) +{ + lease_inode_ctx_t *lease_ctx = NULL; + gf_boolean_t is_blocking_fop = _gf_false; + gf_boolean_t is_write_fop = _gf_false; + gf_boolean_t conflicts = _gf_false; + int ret = WIND_FOP; + + lease_ctx = lease_ctx_get(inode, frame->this); + if (!lease_ctx) { + gf_msg(frame->this->name, GF_LOG_WARNING, ENOMEM, LEASE_MSG_NO_MEM, + "Unable to create/get inode ctx"); + ret = -1; + errno = ENOMEM; + goto out; + } + + is_blocking_fop = ((fop_flags & BLOCKING_FOP) != 0); + is_write_fop = ((fop_flags & DATA_MODIFY_FOP) != 0); + + pthread_mutex_lock(&lease_ctx->lock); + { + if (lease_ctx->lease_type == NONE) { + pthread_mutex_unlock(&lease_ctx->lock); + gf_msg_debug(frame->this->name, 0, + "No leases found continuing with the" + " fop:%s", + gf_fop_list[frame->root->op]); + ret = WIND_FOP; + goto out; + } + conflicts = __check_lease_conflict(frame, lease_ctx, lease_id, + is_write_fop); + if (conflicts) { + if (is_blocking_fop) { + gf_msg_debug(frame->this->name, 0, + "Fop: %s " + "conflicting existing " + "lease: %d, blocking the" + "fop", + gf_fop_list[frame->root->op], + lease_ctx->lease_type); + ret = BLOCK_FOP; + } else { + gf_msg_debug(frame->this->name, 0, + "Fop: %s " + "conflicting existing " + "lease: %d, sending " + "EAGAIN", + gf_fop_list[frame->root->op], + lease_ctx->lease_type); + errno = EAGAIN; + ret = -1; + } + } + } + pthread_mutex_unlock(&lease_ctx->lock); +out: + return ret; +} + +static int +remove_clnt_leases(const char *client_uid, inode_t *inode, xlator_t *this) +{ + lease_inode_ctx_t *lease_ctx = NULL; + lease_id_entry_t *lease_entry = NULL; + lease_id_entry_t *tmp = NULL; + int ret = 0; + int i = 0; + + lease_ctx = lease_ctx_get(inode, this); + if (!lease_ctx) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, LEASE_MSG_INVAL_INODE_CTX, + "Unable to create/get inode ctx"); + ret = -1; + errno = ENOMEM; + goto out; + } + + pthread_mutex_lock(&lease_ctx->lock); + { + list_for_each_entry_safe(lease_entry, tmp, &lease_ctx->lease_id_list, + lease_id_list) + { + if (strcmp(client_uid, lease_entry->client_uid) == 0) { + for (i = 0; i < GF_LEASE_MAX_TYPE; i++) { + lease_ctx->lease_type_cnt[i] -= lease_entry + ->lease_type_cnt[i]; + } + lease_ctx->lease_cnt -= lease_entry->lease_cnt; + __destroy_lease_id_entry(lease_entry); + if (lease_ctx->lease_cnt == 0) { + lease_ctx->blocked_fops_resuming = _gf_true; + pthread_mutex_unlock(&lease_ctx->lock); + goto unblock; + } + } + } + } + pthread_mutex_unlock(&lease_ctx->lock); +out: + return ret; + +unblock: + do_blocked_fops(this, lease_ctx); + return ret; +} + +int +cleanup_client_leases(xlator_t *this, const char *client_uid) +{ + lease_client_t *clnt = NULL; + lease_client_t *tmp = NULL; + struct list_head cleanup_list = { + 0, + }; + lease_inode_t *l_inode = NULL; + lease_inode_t *tmp1 = NULL; + leases_private_t *priv = NULL; + int ret = 0; + + priv = this->private; + if (!priv) { + ret = -1; + errno = EINVAL; + goto out; + } + + INIT_LIST_HEAD(&cleanup_list); + pthread_mutex_lock(&priv->mutex); + { + list_for_each_entry_safe(clnt, tmp, &priv->client_list, client_list) + { + if ((strcmp(clnt->client_uid, client_uid) == 0)) { + list_for_each_entry_safe(l_inode, tmp1, &clnt->inode_list, list) + { + list_del_init(&l_inode->list); + list_add_tail(&l_inode->list, &cleanup_list); + } + __destroy_lease_client(clnt); + break; + } + } + } + pthread_mutex_unlock(&priv->mutex); + + l_inode = tmp1 = NULL; + list_for_each_entry_safe(l_inode, tmp1, &cleanup_list, list) + { + remove_clnt_leases(client_uid, l_inode->inode, this); + __destroy_lease_inode(l_inode); + } +out: + return ret; +} + +static void +__remove_all_leases(xlator_t *this, lease_inode_ctx_t *lease_ctx) +{ + int i = 0; + lease_id_entry_t *lease_entry = NULL; + lease_id_entry_t *tmp = NULL; + + if (lease_ctx->lease_cnt == 0) { + /* No leases to remove. Return */ + return; + } + __dump_leases_info(this, lease_ctx); + + list_for_each_entry_safe(lease_entry, tmp, &lease_ctx->lease_id_list, + lease_id_list) + { + lease_entry->lease_cnt = 0; + remove_from_clnt_list(this, lease_entry->client_uid, lease_ctx->inode); + __destroy_lease_id_entry(lease_entry); + } + INIT_LIST_HEAD(&lease_ctx->lease_id_list); + for (i = 0; i <= GF_LEASE_MAX_TYPE; i++) + lease_ctx->lease_type_cnt[i] = 0; + lease_ctx->lease_type = 0; + lease_ctx->lease_cnt = 0; + lease_ctx->recall_in_progress = _gf_false; + lease_ctx->timer = NULL; + lease_ctx->blocked_fops_resuming = _gf_true; + + /* TODO: + * - Mark the corresponding fd bad. Could be done on client side + * as a result of recall + * - Free the lease_ctx + */ + return; +} + +static int +remove_all_leases(xlator_t *this, inode_t *inode) +{ + lease_inode_ctx_t *lease_ctx = NULL; + int ret = 0; + + GF_VALIDATE_OR_GOTO("leases", inode, out); + + lease_ctx = lease_ctx_get(inode, this); + if (!lease_ctx) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, LEASE_MSG_INVAL_INODE_CTX, + "Unable to create/get inode ctx"); + ret = -1; + errno = ENOMEM; + goto out; + } + + pthread_mutex_lock(&lease_ctx->lock); + { + __remove_all_leases(this, lease_ctx); + } + pthread_mutex_unlock(&lease_ctx->lock); + + do_blocked_fops(this, lease_ctx); +out: + return ret; +} + +void * +expired_recall_cleanup(void *data) +{ + struct timespec sleep_till = { + 0, + }; + struct list_head recall_cleanup_list; + lease_inode_t *recall_entry = NULL; + lease_inode_t *tmp = NULL; + leases_private_t *priv = NULL; + xlator_t *this = NULL; + time_t time_now; + + GF_VALIDATE_OR_GOTO("leases", data, out); + + this = data; + priv = this->private; + + gf_msg_debug(this->name, 0, "Started the expired_recall_cleanup thread"); + + while (1) { + time_now = gf_time(); + pthread_mutex_lock(&priv->mutex); + { + if (priv->fini) { + pthread_mutex_unlock(&priv->mutex); + goto out; + } + INIT_LIST_HEAD(&recall_cleanup_list); + if (list_empty(&priv->recall_list)) { + sleep_till.tv_sec = time_now + 600; + pthread_cond_timedwait(&priv->cond, &priv->mutex, &sleep_till); + } + if (!list_empty(&priv->recall_list)) { + gf_msg_debug(this->name, 0, "Found expired recalls"); + list_for_each_entry_safe(recall_entry, tmp, &priv->recall_list, + list) + { + list_del_init(&recall_entry->list); + list_add_tail(&recall_entry->list, &recall_cleanup_list); + } + } + } + pthread_mutex_unlock(&priv->mutex); + + recall_entry = tmp = NULL; + list_for_each_entry_safe(recall_entry, tmp, &recall_cleanup_list, list) + { + gf_msg_debug(this->name, 0, + "Recall lease was sent on" + " inode:%p, recall timer has expired" + " and clients haven't unlocked the lease" + " hence cleaning up leases on the inode", + recall_entry->inode); + remove_all_leases(this, recall_entry->inode); + /* no need to take priv->mutex lock as this entry + * reference is removed from global recall list. */ + __destroy_lease_inode(recall_entry); + } + } + +out: + return NULL; +} diff --git a/xlators/features/leases/src/leases-mem-types.h b/xlators/features/leases/src/leases-mem-types.h new file mode 100644 index 00000000000..25664b44156 --- /dev/null +++ b/xlators/features/leases/src/leases-mem-types.h @@ -0,0 +1,27 @@ +/* + Copyright (c) 2015-2016 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __LEASES_MEM_TYPES_H__ +#define __LEASES_MEM_TYPES_H__ + +#include <glusterfs/mem-types.h> + +enum gf_leases_mem_types_ { + gf_leases_mt_private_t = gf_common_mt_end + 1, + gf_leases_mt_lease_client_t, + gf_leases_mt_lease_inode_t, + gf_leases_mt_fd_ctx_t, + gf_leases_mt_lease_inode_ctx_t, + gf_leases_mt_lease_id_entry_t, + gf_leases_mt_fop_stub_t, + gf_leases_mt_timer_data_t, + gf_leases_mt_end +}; +#endif diff --git a/xlators/features/leases/src/leases-messages.h b/xlators/features/leases/src/leases-messages.h new file mode 100644 index 00000000000..da696b832de --- /dev/null +++ b/xlators/features/leases/src/leases-messages.h @@ -0,0 +1,33 @@ +/* + Copyright (c) 2015-2016 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. + */ + +#ifndef _LEASES_MESSAGES_H_ +#define _LEASES_MESSAGES_H_ + +#include <glusterfs/glfs-message-id.h> + +/* To add new message IDs, append new identifiers at the end of the list. + * + * Never remove a message ID. If it's not used anymore, you can rename it or + * leave it as it is, but not delete it. This is to prevent reutilization of + * IDs by other messages. + * + * The component name must match one of the entries defined in + * glfs-message-id.h. + */ + +GLFS_MSGID(LEASES, LEASE_MSG_NO_MEM, LEASE_MSG_RECALL_FAIL, + LEASE_MSG_INVAL_LEASE_ID, LEASE_MSG_INVAL_UNLK_LEASE, + LEASE_MSG_INVAL_INODE_CTX, LEASE_MSG_NOT_ENABLED, + LEASE_MSG_NO_TIMER_WHEEL, LEASE_MSG_CLNT_NOTFOUND, + LEASE_MSG_INODE_NOTFOUND, LEASE_MSG_INVAL_FD_CTX, + LEASE_MSG_INVAL_LEASE_TYPE); + +#endif /* !_LEASES_MESSAGES_H_ */ diff --git a/xlators/features/leases/src/leases.c b/xlators/features/leases/src/leases.c new file mode 100644 index 00000000000..04bee50ba3f --- /dev/null +++ b/xlators/features/leases/src/leases.c @@ -0,0 +1,1168 @@ +/* + Copyright (c) 2015-2016 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "leases.h" + +int32_t +leases_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) +{ + STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, xdata); + + return 0; +} + +int32_t +leases_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + fd_t *fd, dict_t *xdata) +{ + uint32_t fop_flags = 0; + int32_t op_errno = EINVAL; + int ret = 0; + lease_fd_ctx_t *fd_ctx = NULL; + char *lease_id = NULL; + + EXIT_IF_LEASES_OFF(this, out); + EXIT_IF_INTERNAL_FOP(frame, xdata, out); + + fd_ctx = GF_CALLOC(1, sizeof(*fd_ctx), gf_leases_mt_fd_ctx_t); + if (!fd_ctx) { + op_errno = ENOMEM; + goto err; + } + + fd_ctx->client_uid = gf_strdup(frame->root->client->client_uid); + if (!fd_ctx->client_uid) { + op_errno = ENOMEM; + goto err; + } + + GET_FLAGS(frame->root->op, flags); + GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid); + if (lease_id != NULL) + memcpy(fd_ctx->lease_id, lease_id, LEASE_ID_SIZE); + else + memset(fd_ctx->lease_id, 0, LEASE_ID_SIZE); + + ret = fd_ctx_set(fd, this, (uint64_t)(uintptr_t)fd_ctx); + if (ret) { + op_errno = ENOMEM; + goto err; + } + + ret = check_lease_conflict(frame, fd->inode, lease_id, fop_flags); + if (ret < 0) + goto err; + else if (ret == BLOCK_FOP) + goto block; + else if (ret == WIND_FOP) + goto out; + +block: + LEASE_BLOCK_FOP(fd->inode, open, frame, this, loc, flags, fd, xdata); + return 0; + +out: + STACK_WIND(frame, leases_open_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); + return 0; + +err: + if (fd_ctx) { + GF_FREE(fd_ctx->client_uid); + GF_FREE(fd_ctx); + } + + STACK_UNWIND_STRICT(open, frame, -1, op_errno, NULL, NULL); + return 0; +} + +int32_t +leases_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, struct iatt *prebuf, struct iatt *postbuf, + dict_t *xdata) +{ + STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + + return 0; +} + +int32_t +leases_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iovec *vector, int count, off_t off, uint32_t flags, + struct iobref *iobref, dict_t *xdata) +{ + uint32_t fop_flags = 0; + char *lease_id = NULL; + int ret = 0; + + EXIT_IF_LEASES_OFF(this, out); + EXIT_IF_INTERNAL_FOP(frame, xdata, out); + + GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid); + GET_FLAGS(frame->root->op, fd->flags); + + ret = check_lease_conflict(frame, fd->inode, lease_id, fop_flags); + if (ret < 0) + goto err; + else if (ret == BLOCK_FOP) + goto block; + else if (ret == WIND_FOP) + goto out; + +block: + LEASE_BLOCK_FOP(fd->inode, writev, frame, this, fd, vector, count, off, + flags, iobref, xdata); + return 0; + +out: + STACK_WIND(frame, leases_writev_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->writev, fd, vector, count, off, flags, + iobref, xdata); + return 0; + +err: + STACK_UNWIND_STRICT(writev, frame, -1, errno, NULL, NULL, NULL); + return 0; +} + +int32_t +leases_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, struct iovec *vector, int count, + struct iatt *stbuf, struct iobref *iobref, dict_t *xdata) +{ + STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, vector, count, stbuf, + iobref, xdata); + + return 0; +} + +int32_t +leases_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) +{ + uint32_t fop_flags = 0; + char *lease_id = NULL; + int ret = 0; + + EXIT_IF_LEASES_OFF(this, out); + EXIT_IF_INTERNAL_FOP(frame, xdata, out); + + GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid); + GET_FLAGS(frame->root->op, fd->flags); + + ret = check_lease_conflict(frame, fd->inode, lease_id, fop_flags); + if (ret < 0) + goto err; + else if (ret == BLOCK_FOP) + goto block; + else if (ret == WIND_FOP) + goto out; + +block: + LEASE_BLOCK_FOP(fd->inode, readv, frame, this, fd, size, offset, flags, + xdata); + return 0; + +out: + STACK_WIND(frame, leases_readv_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata); + return 0; + +err: + STACK_UNWIND_STRICT(readv, frame, -1, errno, NULL, 0, NULL, NULL, NULL); + return 0; +} + +int32_t +leases_lk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct gf_flock *lock, dict_t *xdata) +{ + STACK_UNWIND_STRICT(lk, frame, op_ret, op_errno, lock, xdata); + + return 0; +} + +int32_t +leases_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, + struct gf_flock *flock, dict_t *xdata) +{ + uint32_t fop_flags = 0; + char *lease_id = NULL; + int ret = 0; + + EXIT_IF_LEASES_OFF(this, out); + EXIT_IF_INTERNAL_FOP(frame, xdata, out); + + GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid); + GET_FLAGS_LK(cmd, flock->l_type, fd->flags); + + ret = check_lease_conflict(frame, fd->inode, lease_id, fop_flags); + if (ret < 0) + goto err; + else if (ret == BLOCK_FOP) + goto block; + else if (ret == WIND_FOP) + goto out; + +block: + LEASE_BLOCK_FOP(fd->inode, lk, frame, this, fd, cmd, flock, xdata); + return 0; + +out: + STACK_WIND(frame, leases_lk_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lk, fd, cmd, flock, xdata); + return 0; + +err: + STACK_UNWIND_STRICT(lk, frame, -1, errno, NULL, NULL); + return 0; +} + +int32_t +leases_lease(call_frame_t *frame, xlator_t *this, loc_t *loc, + struct gf_lease *lease, dict_t *xdata) +{ + int32_t op_errno = 0; + int ret = 0; + struct gf_lease nullease = { + 0, + }; + int32_t op_ret = 0; + + EXIT_IF_LEASES_OFF(this, out); + EXIT_IF_INTERNAL_FOP(frame, xdata, out); + + ret = process_lease_req(frame, this, loc->inode, lease); + if (ret < 0) { + op_errno = -ret; + op_ret = -1; + } + goto unwind; + +out: + gf_msg(this->name, GF_LOG_ERROR, EINVAL, LEASE_MSG_NOT_ENABLED, + "\"features/leases\" translator is not enabled. " + "You need to enable it for proper functioning of your " + "application"); + op_errno = ENOSYS; + op_ret = -1; + +unwind: + STACK_UNWIND_STRICT(lease, frame, op_ret, op_errno, + (op_errno == ENOSYS) ? &nullease : lease, xdata); + return 0; +} + +int32_t +leases_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + STACK_UNWIND_STRICT(truncate, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + + return 0; +} + +int32_t +leases_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) +{ + uint32_t fop_flags = 0; + char *lease_id = NULL; + int ret = 0; + + EXIT_IF_LEASES_OFF(this, out); + EXIT_IF_INTERNAL_FOP(frame, xdata, out); + + GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid); + GET_FLAGS(frame->root->op, 0); + + ret = check_lease_conflict(frame, loc->inode, lease_id, fop_flags); + if (ret < 0) + goto err; + else if (ret == BLOCK_FOP) + goto block; + else if (ret == WIND_FOP) + goto out; + +block: + LEASE_BLOCK_FOP(loc->inode, truncate, frame, this, loc, offset, xdata); + return 0; + +out: + STACK_WIND(frame, leases_truncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); + return 0; + +err: + STACK_UNWIND_STRICT(truncate, frame, -1, errno, NULL, NULL, NULL); + return 0; +} + +int32_t +leases_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct iatt *statpre, + struct iatt *statpost, dict_t *xdata) +{ + STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, statpre, statpost, + xdata); + + return 0; +} + +int32_t +leases_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid, dict_t *xdata) +{ + uint32_t fop_flags = 0; + char *lease_id = NULL; + int ret = 0; + + EXIT_IF_LEASES_OFF(this, out); + EXIT_IF_INTERNAL_FOP(frame, xdata, out); + + GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid); + GET_FLAGS(frame->root->op, 0); + + ret = check_lease_conflict(frame, loc->inode, lease_id, fop_flags); + if (ret < 0) + goto err; + else if (ret == BLOCK_FOP) + goto block; + else if (ret == WIND_FOP) + goto out; + +block: + LEASE_BLOCK_FOP(loc->inode, setattr, frame, this, loc, stbuf, valid, xdata); + return 0; + +out: + STACK_WIND(frame, leases_setattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata); + return 0; + +err: + STACK_UNWIND_STRICT(setattr, frame, -1, errno, NULL, NULL, NULL); + return 0; +} + +int32_t +leases_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *stbuf, + struct iatt *preoldparent, struct iatt *postoldparent, + struct iatt *prenewparent, struct iatt *postnewparent, + dict_t *xdata) +{ + STACK_UNWIND_STRICT(rename, frame, op_ret, op_errno, stbuf, preoldparent, + postoldparent, prenewparent, postnewparent, xdata); + + return 0; +} + +int32_t +leases_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) +{ + uint32_t fop_flags = 0; + char *lease_id = NULL; + int ret = 0; + + EXIT_IF_LEASES_OFF(this, out); + EXIT_IF_INTERNAL_FOP(frame, xdata, out); + + /* should the lease be also checked for newloc */ + GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid); + GET_FLAGS(frame->root->op, 0); + + ret = check_lease_conflict(frame, oldloc->inode, lease_id, fop_flags); + if (ret < 0) + goto err; + else if (ret == BLOCK_FOP) + goto block; + else if (ret == WIND_FOP) + goto out; + +block: + LEASE_BLOCK_FOP(oldloc->inode, rename, frame, this, oldloc, newloc, xdata); + return 0; + +out: + STACK_WIND(frame, leases_rename_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); + return 0; + +err: + STACK_UNWIND_STRICT(rename, frame, -1, errno, NULL, NULL, NULL, NULL, NULL, + NULL); + return 0; +} + +int32_t +leases_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) +{ + STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, preparent, postparent, + xdata); + + return 0; +} + +int32_t +leases_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, + dict_t *xdata) +{ + uint32_t fop_flags = 0; + char *lease_id = NULL; + int ret = 0; + + EXIT_IF_LEASES_OFF(this, out); + EXIT_IF_INTERNAL_FOP(frame, xdata, out); + + GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid); + GET_FLAGS(frame->root->op, 0); + + ret = check_lease_conflict(frame, loc->inode, lease_id, fop_flags); + if (ret < 0) + goto err; + else if (ret == BLOCK_FOP) + goto block; + else if (ret == WIND_FOP) + goto out; + +block: + LEASE_BLOCK_FOP(loc->inode, unlink, frame, this, loc, xflag, xdata); + return 0; + +out: + STACK_WIND(frame, leases_unlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); + return 0; + +err: + STACK_UNWIND_STRICT(unlink, frame, -1, errno, NULL, NULL, NULL); + return 0; +} + +int32_t +leases_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, inode_t *inode, struct iatt *stbuf, + struct iatt *preparent, struct iatt *postparent, dict_t *xdata) +{ + STACK_UNWIND_STRICT(link, frame, op_ret, op_errno, inode, stbuf, preparent, + postparent, xdata); + + return 0; +} + +int32_t +leases_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) +{ + uint32_t fop_flags = 0; + char *lease_id = NULL; + int ret = 0; + + EXIT_IF_LEASES_OFF(this, out); + EXIT_IF_INTERNAL_FOP(frame, xdata, out); + + GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid); + GET_FLAGS(frame->root->op, 0); + + ret = check_lease_conflict(frame, oldloc->inode, lease_id, fop_flags); + if (ret < 0) + goto err; + else if (ret == BLOCK_FOP) + goto block; + else if (ret == WIND_FOP) + goto out; + +block: + LEASE_BLOCK_FOP(oldloc->inode, link, frame, this, oldloc, newloc, xdata); + return 0; +out: + STACK_WIND(frame, leases_link_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata); + return 0; + +err: + STACK_UNWIND_STRICT(link, frame, -1, errno, NULL, NULL, NULL, NULL, NULL); + return 0; +} + +int32_t +leases_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, fd_t *fd, inode_t *inode, struct iatt *stbuf, + struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) +{ + STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, stbuf, + preparent, postparent, xdata); + + return 0; +} + +int32_t +leases_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) +{ + uint32_t fop_flags = 0; + char *lease_id = NULL; + int ret = 0; + + EXIT_IF_LEASES_OFF(this, out); + EXIT_IF_INTERNAL_FOP(frame, xdata, out); + + GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid); + GET_FLAGS(frame->root->op, flags); + + ret = check_lease_conflict(frame, fd->inode, lease_id, fop_flags); + if (ret < 0) + goto err; + else if (ret == BLOCK_FOP) + goto block; + else if (ret == WIND_FOP) + goto out; + +block: + LEASE_BLOCK_FOP(fd->inode, create, frame, this, loc, flags, mode, umask, fd, + xdata); + return 0; + +out: + STACK_WIND(frame, leases_create_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd, + xdata); + return 0; + +err: + STACK_UNWIND_STRICT(create, frame, -1, errno, NULL, NULL, NULL, NULL, NULL, + NULL); + return 0; +} + +int32_t +leases_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + STACK_UNWIND_STRICT(fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata); + return 0; +} + +int32_t +leases_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, + dict_t *xdata) +{ + uint32_t fop_flags = 0; + char *lease_id = NULL; + int ret = 0; + + EXIT_IF_LEASES_OFF(this, out); + EXIT_IF_INTERNAL_FOP(frame, xdata, out); + + GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid); + GET_FLAGS(frame->root->op, fd->flags); + + ret = check_lease_conflict(frame, fd->inode, lease_id, fop_flags); + if (ret < 0) + goto err; + else if (ret == BLOCK_FOP) + goto block; + else if (ret == WIND_FOP) + goto out; + +block: + LEASE_BLOCK_FOP(fd->inode, fsync, frame, this, fd, flags, xdata); + return 0; + +out: + STACK_WIND(frame, leases_fsync_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsync, fd, flags, xdata); + return 0; +err: + STACK_UNWIND_STRICT(fsync, frame, -1, errno, NULL, NULL, NULL); + return 0; +} + +int32_t +leases_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + STACK_UNWIND_STRICT(ftruncate, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + return 0; +} + +int32_t +leases_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) +{ + uint32_t fop_flags = 0; + char *lease_id = NULL; + int ret = 0; + + EXIT_IF_LEASES_OFF(this, out); + EXIT_IF_INTERNAL_FOP(frame, xdata, out); + + GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid); + GET_FLAGS(frame->root->op, 0); /* TODO:fd->flags?*/ + + ret = check_lease_conflict(frame, fd->inode, lease_id, fop_flags); + if (ret < 0) + goto err; + else if (ret == BLOCK_FOP) + goto block; + else if (ret == WIND_FOP) + goto out; + +block: + LEASE_BLOCK_FOP(fd->inode, ftruncate, frame, this, fd, offset, xdata); + return 0; + +out: + STACK_WIND(frame, leases_ftruncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); + return 0; + +err: + STACK_UNWIND_STRICT(ftruncate, frame, -1, errno, NULL, NULL, NULL); + return 0; +} + +int32_t +leases_fsetattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *statpre, + struct iatt *statpost, dict_t *xdata) +{ + STACK_UNWIND_STRICT(fsetattr, frame, op_ret, op_errno, statpre, statpost, + xdata); + return 0; +} + +int32_t +leases_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iatt *stbuf, int32_t valid, dict_t *xdata) +{ + uint32_t fop_flags = 0; + char *lease_id = NULL; + int ret = 0; + + EXIT_IF_LEASES_OFF(this, out); + EXIT_IF_INTERNAL_FOP(frame, xdata, out); + + GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid); + GET_FLAGS(frame->root->op, fd->flags); + + ret = check_lease_conflict(frame, fd->inode, lease_id, fop_flags); + if (ret < 0) + goto err; + else if (ret == BLOCK_FOP) + goto block; + else if (ret == WIND_FOP) + goto out; + +block: + LEASE_BLOCK_FOP(fd->inode, fsetattr, frame, this, fd, stbuf, valid, xdata); + return 0; + +out: + STACK_WIND(frame, leases_fsetattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata); + return 0; + +err: + STACK_UNWIND_STRICT(fsetattr, frame, -1, errno, NULL, NULL, NULL); + return 0; +} + +int32_t +leases_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *pre, + struct iatt *post, dict_t *xdata) +{ + STACK_UNWIND_STRICT(fallocate, frame, op_ret, op_errno, pre, post, xdata); + + return 0; +} + +int32_t +leases_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, + off_t offset, size_t len, dict_t *xdata) +{ + uint32_t fop_flags = 0; + char *lease_id = NULL; + int ret = 0; + + EXIT_IF_LEASES_OFF(this, out); + EXIT_IF_INTERNAL_FOP(frame, xdata, out); + + GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid); + GET_FLAGS(frame->root->op, fd->flags); + + ret = check_lease_conflict(frame, fd->inode, lease_id, fop_flags); + if (ret < 0) + goto err; + else if (ret == BLOCK_FOP) + goto block; + else if (ret == WIND_FOP) + goto out; + +block: + LEASE_BLOCK_FOP(fd->inode, fallocate, frame, this, fd, mode, offset, len, + xdata); + return 0; + +out: + STACK_WIND(frame, leases_fallocate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len, + xdata); + return 0; + +err: + STACK_UNWIND_STRICT(fallocate, frame, -1, errno, NULL, NULL, NULL); + return 0; +} + +int32_t +leases_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *pre, + struct iatt *post, dict_t *xdata) +{ + STACK_UNWIND_STRICT(discard, frame, op_ret, op_errno, pre, post, xdata); + + return 0; +} + +int32_t +leases_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + uint32_t fop_flags = 0; + char *lease_id = NULL; + int ret = 0; + + EXIT_IF_LEASES_OFF(this, out); + EXIT_IF_INTERNAL_FOP(frame, xdata, out); + + GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid); + GET_FLAGS(frame->root->op, fd->flags); + + ret = check_lease_conflict(frame, fd->inode, lease_id, fop_flags); + if (ret < 0) + goto err; + else if (ret == BLOCK_FOP) + goto block; + else if (ret == WIND_FOP) + goto out; + +block: + LEASE_BLOCK_FOP(fd->inode, discard, frame, this, fd, offset, len, xdata); + return 0; + +out: + STACK_WIND(frame, leases_discard_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata); + return 0; + +err: + STACK_UNWIND_STRICT(discard, frame, -1, errno, NULL, NULL, NULL); + return 0; +} + +int32_t +leases_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *pre, + struct iatt *post, dict_t *xdata) +{ + STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, pre, post, xdata); + + return 0; +} + +int +leases_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + off_t len, dict_t *xdata) +{ + uint32_t fop_flags = 0; + char *lease_id = NULL; + int ret = 0; + + EXIT_IF_LEASES_OFF(this, out); + EXIT_IF_INTERNAL_FOP(frame, xdata, out); + + GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid); + GET_FLAGS(frame->root->op, fd->flags); + + ret = check_lease_conflict(frame, fd->inode, lease_id, fop_flags); + if (ret < 0) + goto err; + else if (ret == BLOCK_FOP) + goto block; + else if (ret == WIND_FOP) + goto out; + +block: + LEASE_BLOCK_FOP(fd->inode, zerofill, frame, this, fd, offset, len, xdata); + return 0; + +out: + STACK_WIND(frame, leases_zerofill_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata); + return 0; + +err: + STACK_UNWIND_STRICT(zerofill, frame, -1, errno, NULL, NULL, NULL); + return 0; +} + +int +leases_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + STACK_UNWIND_STRICT(flush, frame, op_ret, op_errno, xdata); + + return 0; +} + +int +leases_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) +{ + uint32_t fop_flags = 0; + char *lease_id = NULL; + int ret = 0; + lease_fd_ctx_t *fd_ctx = NULL; + uint64_t ctx = 0; + + EXIT_IF_LEASES_OFF(this, out); + EXIT_IF_INTERNAL_FOP(frame, xdata, out); + + GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid); + GET_FLAGS(frame->root->op, fd->flags); + + ret = check_lease_conflict(frame, fd->inode, lease_id, fop_flags); + if (ret < 0) + goto err; + else if (ret == BLOCK_FOP) + goto block; + else if (ret == WIND_FOP) + goto out; + +block: + LEASE_BLOCK_FOP(fd->inode, flush, frame, this, fd, xdata); + return 0; + +out: + /* * + * currently release is not called after the close fop from the + * application. Hence lease fd ctx is reset on here. + * This is actually not the right way, since flush can be called + * not only from the close op. + * TODO : + * - Either identify the flush is called from close call on fd from + * from the application. + * OR + * - Find why release is not called post the last close call + */ + ret = fd_ctx_get(fd, this, &ctx); + if (ret == 0) { + fd_ctx = (lease_fd_ctx_t *)(long)ctx; + if (fd_ctx->client_uid) { + GF_FREE(fd_ctx->client_uid); + fd_ctx->client_uid = NULL; + } + memset(fd_ctx->lease_id, 0, LEASE_ID_SIZE); + } + STACK_WIND(frame, leases_flush_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->flush, fd, xdata); + return 0; + +err: + STACK_UNWIND_STRICT(create, frame, -1, errno, NULL, NULL, NULL, NULL, NULL, + NULL); + return 0; +} + +int32_t +mem_acct_init(xlator_t *this) +{ + int ret = -1; + + if (!this) + return ret; + + ret = xlator_mem_acct_init(this, gf_leases_mt_end + 1); + + if (ret != 0) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, LEASE_MSG_NO_MEM, + "mem account init failed"); + return ret; + } + + return ret; +} + +static int +leases_init_priv(xlator_t *this) +{ + int ret = 0; + leases_private_t *priv = NULL; + + priv = this->private; + GF_ASSERT(priv); + + if (!priv->timer_wheel) { + priv->timer_wheel = glusterfs_ctx_tw_get(this->ctx); + if (!priv->timer_wheel) { + ret = -1; + goto out; + } + } + + if (!priv->inited_recall_thr) { + ret = gf_thread_create(&priv->recall_thr, NULL, expired_recall_cleanup, + this, "leasercl"); + if (!ret) + priv->inited_recall_thr = _gf_true; + } + +out: + return ret; +} + +int +reconfigure(xlator_t *this, dict_t *options) +{ + leases_private_t *priv = NULL; + int ret = -1; + + priv = this->private; + GF_ASSERT(priv); + + /* TODO: In case of reconfigure, if its enabling the leases + * its not an issue, but if its disabling the leases, there + * is more to it, like recall all the existing leases, wait + * for unlock of all the leases etc., hence not supporting the + * reconfigure for now. + + GF_OPTION_RECONF ("leases", priv->leases_enabled, + options, bool, out); + + if (priv->leases_enabled) { + ret = leases_init_priv (this); + if (ret) + goto out; + } + */ + + GF_OPTION_RECONF("lease-lock-recall-timeout", priv->recall_lease_timeout, + options, int32, out); + + ret = 0; +out: + return ret; +} + +int +init(xlator_t *this) +{ + int ret = -1; + leases_private_t *priv = NULL; + + priv = GF_CALLOC(1, sizeof(*priv), gf_leases_mt_private_t); + if (!priv) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, LEASE_MSG_NO_MEM, + "Leases init failed"); + goto out; + } + + GF_OPTION_INIT("leases", priv->leases_enabled, bool, out); + GF_OPTION_INIT("lease-lock-recall-timeout", priv->recall_lease_timeout, + int32, out); + pthread_mutex_init(&priv->mutex, NULL); + INIT_LIST_HEAD(&priv->client_list); + INIT_LIST_HEAD(&priv->recall_list); + + this->private = priv; + + if (priv->leases_enabled) { + ret = leases_init_priv(this); + if (ret) + goto out; + } + + ret = 0; + +out: + if (ret) { + GF_FREE(priv); + this->private = NULL; + } + + return ret; +} + +void +fini(xlator_t *this) +{ + leases_private_t *priv = NULL; + + priv = this->private; + if (!priv) { + return; + } + this->private = NULL; + + priv->fini = _gf_true; + pthread_cond_broadcast(&priv->cond); + if (priv->recall_thr) { + gf_thread_cleanup_xint(priv->recall_thr); + priv->recall_thr = 0; + priv->inited_recall_thr = _gf_false; + } + + if (priv->timer_wheel) { + glusterfs_ctx_tw_put(this->ctx); + } + + GF_FREE(priv); + return; +} + +static int +leases_forget(xlator_t *this, inode_t *inode) +{ + /* TODO:leases_cleanup_inode_ctx (this, inode); */ + return 0; +} + +static int +leases_release(xlator_t *this, fd_t *fd) +{ + int ret = -1; + uint64_t tmp = 0; + lease_fd_ctx_t *fd_ctx = NULL; + + if (fd == NULL) { + goto out; + } + + gf_log(this->name, GF_LOG_TRACE, "Releasing all leases with fd %p", fd); + + ret = fd_ctx_del(fd, this, &tmp); + if (ret) { + gf_log(this->name, GF_LOG_DEBUG, "Could not get fdctx"); + goto out; + } + + fd_ctx = (lease_fd_ctx_t *)(long)tmp; + if (fd_ctx) + GF_FREE(fd_ctx); +out: + return ret; +} + +static int +leases_clnt_disconnect_cbk(xlator_t *this, client_t *client) +{ + int ret = 0; + + EXIT_IF_LEASES_OFF(this, out); + + ret = cleanup_client_leases(this, client->client_uid); +out: + return ret; +} + +struct xlator_fops fops = { + /* Metadata modifying fops */ + .fsetattr = leases_fsetattr, + .setattr = leases_setattr, + + /* File Data reading fops */ + .open = leases_open, + .readv = leases_readv, + + /* File Data modifying fops */ + .truncate = leases_truncate, + .ftruncate = leases_ftruncate, + .writev = leases_writev, + .zerofill = leases_zerofill, + .fallocate = leases_fallocate, + .discard = leases_discard, + .lk = leases_lk, + .fsync = leases_fsync, + .flush = leases_flush, + .lease = leases_lease, + + /* Directory Data modifying fops */ + .create = leases_create, + .rename = leases_rename, + .unlink = leases_unlink, + .link = leases_link, + +#ifdef NOT_SUPPORTED + /* internal lk fops */ + .inodelk = leases_inodelk, + .finodelk = leases_finodelk, + .entrylk = leases_entrylk, + .fentrylk = leases_fentrylk, + + /* Internal special fops*/ + .xattrop = leases_xattrop, + .fxattrop = leases_fxattrop, +#endif +}; + +struct xlator_cbks cbks = { + .forget = leases_forget, + .release = leases_release, + .client_disconnect = leases_clnt_disconnect_cbk, +}; + +struct volume_options options[] = { + {.key = {"leases"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .op_version = {GD_OP_VERSION_3_8_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .description = "When \"on\", enables leases support"}, + {.key = {"lease-lock-recall-timeout"}, + .type = GF_OPTION_TYPE_INT, + .default_value = RECALL_LEASE_LK_TIMEOUT, + .op_version = {GD_OP_VERSION_3_8_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .description = "After 'timeout' seconds since the recall_lease" + " request has been sent to the client, the lease lock" + " will be forcefully purged by the server."}, + {.key = {NULL}}, +}; + +xlator_api_t xlator_api = { + .init = init, + .fini = fini, + .reconfigure = reconfigure, + .mem_acct_init = mem_acct_init, + .op_version = {1}, /* Present from the initial version */ + .fops = &fops, + .cbks = &cbks, + .options = options, + .identifier = "leases", + .category = GF_MAINTAINED, +}; diff --git a/xlators/features/leases/src/leases.h b/xlators/features/leases/src/leases.h new file mode 100644 index 00000000000..a6e8a6824cc --- /dev/null +++ b/xlators/features/leases/src/leases.h @@ -0,0 +1,259 @@ +/* + Copyright (c) 2015-2016 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _LEASES_H +#define _LEASES_H + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include <glusterfs/common-utils.h> +#include <glusterfs/glusterfs.h> +#include <glusterfs/xlator.h> +#include <glusterfs/call-stub.h> +#include <glusterfs/logging.h> +#include <glusterfs/client_t.h> +#include <glusterfs/lkowner.h> +#include <glusterfs/locking.h> +#include <glusterfs/upcall-utils.h> +#include "timer-wheel.h" +#include "leases-mem-types.h" +#include "leases-messages.h" + +/* The time period for which a client lease lock will be stored after its been + * recalled for the first time. */ +#define RECALL_LEASE_LK_TIMEOUT "60" + +#define DATA_MODIFY_FOP 0x0001 +#define BLOCKING_FOP 0x0002 + +#define BLOCK_FOP 0x0001 +#define WIND_FOP 0x0002 + +#define EXIT_IF_LEASES_OFF(this, label) \ + do { \ + if (!is_leases_enabled(this)) \ + goto label; \ + } while (0) + +#define EXIT_IF_INTERNAL_FOP(frame, xdata, label) \ + do { \ + if (frame->root->pid < 0) \ + goto label; \ + if (xdata && dict_get(xdata, GLUSTERFS_INTERNAL_FOP_KEY)) \ + goto label; \ + } while (0) + +#define GET_LEASE_ID(xdata, lease_id, client_uid) \ + do { \ + int ret_val = -1; \ + ret_val = dict_get_bin(xdata, "lease-id", (void **)&lease_id); \ + if (ret_val) { \ + ret_val = 0; \ + gf_msg_debug("leases", 0, "Lease id is not set for client:%s", \ + client_uid); \ + } \ + } while (0) + +#define GET_FLAGS(fop, fd_flags) \ + do { \ + if ((fd_flags & (O_WRONLY | O_RDWR)) && fop == GF_FOP_OPEN) \ + fop_flags = DATA_MODIFY_FOP; \ + \ + if (fop == GF_FOP_UNLINK || fop == GF_FOP_RENAME || \ + fop == GF_FOP_TRUNCATE || fop == GF_FOP_FTRUNCATE || \ + fop == GF_FOP_FLUSH || fop == GF_FOP_FSYNC || \ + fop == GF_FOP_WRITE || fop == GF_FOP_FALLOCATE || \ + fop == GF_FOP_DISCARD || fop == GF_FOP_ZEROFILL || \ + fop == GF_FOP_SETATTR || fop == GF_FOP_FSETATTR || \ + fop == GF_FOP_LINK) \ + fop_flags = DATA_MODIFY_FOP; \ + \ + if (!(fd_flags & (O_NONBLOCK | O_NDELAY))) \ + fop_flags |= BLOCKING_FOP; \ + \ + } while (0) + +#define GET_FLAGS_LK(cmd, l_type, fd_flags) \ + do { \ + /* TODO: handle F_RESLK_LCK and other glusterfs_lk_recovery_cmds_t */ \ + if ((cmd == F_SETLKW || cmd == F_SETLKW64 || cmd == F_SETLK || \ + cmd == F_SETLK64) && \ + l_type == F_WRLCK) \ + fop_flags = DATA_MODIFY_FOP; \ + \ + if (fd_flags & (O_NONBLOCK | O_NDELAY) && \ + (cmd == F_SETLKW || cmd == F_SETLKW64)) \ + fop_flags |= BLOCKING_FOP; \ + \ + } while (0) + +#define LEASE_BLOCK_FOP(inode, fop_name, frame, this, params...) \ + do { \ + call_stub_t *__stub = NULL; \ + fop_stub_t *blk_fop = NULL; \ + lease_inode_ctx_t *lease_ctx = NULL; \ + \ + __stub = fop_##fop_name##_stub(frame, default_##fop_name##_resume, \ + params); \ + if (!__stub) { \ + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, LEASE_MSG_NO_MEM, \ + "Unable to create stub"); \ + ret = -ENOMEM; \ + goto __out; \ + } \ + \ + blk_fop = GF_CALLOC(1, sizeof(*blk_fop), gf_leases_mt_fop_stub_t); \ + if (!blk_fop) { \ + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, LEASE_MSG_NO_MEM, \ + "Unable to create lease fop stub"); \ + ret = -ENOMEM; \ + goto __out; \ + } \ + \ + lease_ctx = lease_ctx_get(inode, this); \ + if (!lease_ctx) { \ + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, LEASE_MSG_NO_MEM, \ + "Unable to create/get inode ctx"); \ + ret = -ENOMEM; \ + goto __out; \ + } \ + \ + blk_fop->stub = __stub; \ + pthread_mutex_lock(&lease_ctx->lock); \ + { \ + /*TODO: If the lease is unlocked btw check lease conflict and \ + * by now, then this fop shouldn't be add to the blocked fop \ + * list, can use generation number for the same?*/ \ + list_add_tail(&blk_fop->list, &lease_ctx->blocked_list); \ + } \ + pthread_mutex_unlock(&lease_ctx->lock); \ + \ + __out: \ + if (ret < 0) { \ + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, LEASE_MSG_NO_MEM, \ + "Unable to create stub for blocking the fop:%s (%s)", \ + gf_fop_list[frame->root->op], strerror(ENOMEM)); \ + if (__stub != NULL) { \ + call_stub_destroy(__stub); \ + } \ + GF_FREE(blk_fop); \ + goto err; \ + } \ + } while (0) + +struct _leases_private { + struct list_head client_list; + struct list_head recall_list; + struct tvec_base *timer_wheel; /* timer wheel where the recall request + is qued and waits for unlock/expiry */ + pthread_t recall_thr; + pthread_mutex_t mutex; + pthread_cond_t cond; + int32_t recall_lease_timeout; + gf_boolean_t inited_recall_thr; + gf_boolean_t fini; + gf_boolean_t leases_enabled; + + char _pad[1]; /* manual padding */ +}; +typedef struct _leases_private leases_private_t; + +struct _lease_client { + char *client_uid; + struct list_head client_list; + struct list_head inode_list; +}; +typedef struct _lease_client lease_client_t; + +struct _lease_inode { + inode_t *inode; + struct list_head + list; /* This can be part of both inode_list and recall_list */ +}; +typedef struct _lease_inode lease_inode_t; + +struct _lease_fd_ctx { + char *client_uid; + char lease_id[LEASE_ID_SIZE]; +}; +typedef struct _lease_fd_ctx lease_fd_ctx_t; + +struct _lease_inode_ctx { + struct list_head lease_id_list; /* clients that have taken leases */ + int lease_type_cnt[GF_LEASE_MAX_TYPE + 1]; + uint64_t lease_cnt; /* Total number of leases on this inode */ + uint64_t openfd_cnt; /* number of fds open */ + struct list_head blocked_list; /* List of fops blocked until the + lease recall is complete */ + inode_t *inode; /* this represents the inode on which the + lock was taken, required mainly during + disconnect cleanup */ + struct gf_tw_timer_list *timer; + pthread_mutex_t lock; + int lease_type; /* Types of leases acquired */ + gf_boolean_t recall_in_progress; /* if lease recall is sent on this inode */ + gf_boolean_t blocked_fops_resuming; /* if blocked fops are being resumed */ + + char _pad[2]; /* manual padding */ +}; +typedef struct _lease_inode_ctx lease_inode_ctx_t; + +struct _lease_id_entry { + struct list_head lease_id_list; + char lease_id[LEASE_ID_SIZE]; + char *client_uid; /* uid of the client that has + taken the lease */ + int lease_type_cnt[GF_LEASE_MAX_TYPE + 1]; /* count of each lease type */ + uint64_t lease_cnt; /* Number of leases taken under the + given lease id */ + time_t recall_time; /* time @ which recall was sent */ + int lease_type; /* Union of all the leases taken + under the given lease id */ + char _pad[4]; /* manual padding */ +}; +typedef struct _lease_id_entry lease_id_entry_t; + +/* Required? as stub itself will have list */ +struct __fop_stub { + struct list_head list; + call_stub_t *stub; +}; +typedef struct __fop_stub fop_stub_t; + +struct __lease_timer_data { + inode_t *inode; + xlator_t *this; +}; +typedef struct __lease_timer_data lease_timer_data_t; + +gf_boolean_t +is_leases_enabled(xlator_t *this); + +lease_inode_ctx_t * +lease_ctx_get(inode_t *inode, xlator_t *this); + +int +process_lease_req(call_frame_t *frame, xlator_t *this, inode_t *inode, + struct gf_lease *lease); + +int +check_lease_conflict(call_frame_t *frame, inode_t *inode, const char *lease_id, + uint32_t fop_flags); + +int +cleanup_client_leases(xlator_t *this, const char *client_uid); + +void * +expired_recall_cleanup(void *data); + +#endif /* _LEASES_H */ diff --git a/xlators/features/locks/src/Makefile.am b/xlators/features/locks/src/Makefile.am index e486e32fa1e..0b174c19d2d 100644 --- a/xlators/features/locks/src/Makefile.am +++ b/xlators/features/locks/src/Makefile.am @@ -1,23 +1,29 @@ +if WITH_SERVER xlator_LTLIBRARIES = locks.la +endif xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features -locks_la_LDFLAGS = $(GF_XLATOR_DEFAULT_LDFLAGS) +locks_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) locks_la_SOURCES = common.c posix.c entrylk.c inodelk.c reservelk.c \ - clear.c + clear.c + locks_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la -noinst_HEADERS = locks.h common.h locks-mem-types.h clear.h +noinst_HEADERS = locks.h common.h locks-mem-types.h clear.h pl-messages.h -AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ + -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src AM_CFLAGS = -Wall -fno-strict-aliasing $(GF_CFLAGS) CLEANFILES = +if WITH_SERVER uninstall-local: rm -f $(DESTDIR)$(xlatordir)/posix-locks.so install-data-hook: ln -sf locks.so $(DESTDIR)$(xlatordir)/posix-locks.so +endif diff --git a/xlators/features/locks/src/clear.c b/xlators/features/locks/src/clear.c index 640c6bb5553..ab1eac68a53 100644 --- a/xlators/features/locks/src/clear.c +++ b/xlators/features/locks/src/clear.c @@ -12,407 +12,449 @@ #include <limits.h> #include <pthread.h> -#include "glusterfs.h" -#include "compat.h" -#include "xlator.h" -#include "inode.h" -#include "logging.h" -#include "common-utils.h" +#include <glusterfs/glusterfs.h> +#include <glusterfs/compat.h> +#include <glusterfs/xlator.h> +#include <glusterfs/logging.h> +#include <glusterfs/common-utils.h> #include "locks.h" #include "common.h" -#include "statedump.h" +#include <glusterfs/statedump.h> #include "clear.h" +const char *clrlk_type_names[CLRLK_TYPE_MAX] = { + [CLRLK_INODE] = "inode", + [CLRLK_ENTRY] = "entry", + [CLRLK_POSIX] = "posix", +}; + int -clrlk_get_kind (char *kind) +clrlk_get_kind(char *kind) { - char *clrlk_kinds[CLRLK_KIND_MAX] = {"dummy", "blocked", "granted", - "all"}; - int ret_kind = CLRLK_KIND_MAX; - int i = 0; - - for (i = CLRLK_BLOCKED; i < CLRLK_KIND_MAX; i++) { - if (!strcmp (clrlk_kinds[i], kind)) { - ret_kind = i; - break; - } + char *clrlk_kinds[CLRLK_KIND_MAX] = {"dummy", "blocked", "granted", "all"}; + int ret_kind = CLRLK_KIND_MAX; + int i = 0; + + for (i = CLRLK_BLOCKED; i < CLRLK_KIND_MAX; i++) { + if (!strcmp(clrlk_kinds[i], kind)) { + ret_kind = i; + break; } + } - return ret_kind; + return ret_kind; } int -clrlk_get_type (char *type) +clrlk_get_type(char *type) { - char *clrlk_types[CLRLK_TYPE_MAX] = {"inode", "entry", "posix"}; - int ret_type = CLRLK_TYPE_MAX; - int i = 0; - - for (i = CLRLK_INODE; i < CLRLK_TYPE_MAX; i++) { - if (!strcmp (clrlk_types[i], type)) { - ret_type = i; - break; - } + char *clrlk_types[CLRLK_TYPE_MAX] = {"inode", "entry", "posix"}; + int ret_type = CLRLK_TYPE_MAX; + int i = 0; + + for (i = CLRLK_INODE; i < CLRLK_TYPE_MAX; i++) { + if (!strcmp(clrlk_types[i], type)) { + ret_type = i; + break; } + } - return ret_type; + return ret_type; } int -clrlk_get_lock_range (char *range_str, struct gf_flock *ulock, - gf_boolean_t *chk_range) +clrlk_get_lock_range(char *range_str, struct gf_flock *ulock, + gf_boolean_t *chk_range) { - int ret = -1; - - if (!chk_range) - goto out; + int ret = -1; - if (!range_str) { - ret = 0; - *chk_range = _gf_false; - goto out; - } - - if (sscanf (range_str, "%hd,%"PRId64"-""%"PRId64, &ulock->l_whence, - &ulock->l_start, &ulock->l_len) != 3) { - goto out; - } + if (!chk_range) + goto out; + if (!range_str) { ret = 0; - *chk_range = _gf_true; + *chk_range = _gf_false; + goto out; + } + + if (sscanf(range_str, + "%hd,%" PRId64 "-" + "%" PRId64, + &ulock->l_whence, &ulock->l_start, &ulock->l_len) != 3) { + goto out; + } + + ret = 0; + *chk_range = _gf_true; out: - return ret; + return ret; } int -clrlk_parse_args (const char* cmd, clrlk_args *args) +clrlk_parse_args(const char *cmd, clrlk_args *args) { - char *opts = NULL; - char *cur = NULL; - char *tok = NULL; - char *sptr = NULL; - char *free_ptr = NULL; - char kw[KW_MAX] = {[KW_TYPE] = 't', - [KW_KIND] = 'k', - }; - int ret = -1; - int i = 0; - - GF_ASSERT (cmd); - free_ptr = opts = GF_CALLOC (1, strlen (cmd), gf_common_mt_char); - if (!opts) - goto out; - - if (sscanf (cmd, GF_XATTR_CLRLK_CMD".%s", opts) < 1) { - ret = -1; - goto out; + char *opts = NULL; + char *cur = NULL; + char *tok = NULL; + char *sptr = NULL; + char *free_ptr = NULL; + char kw[KW_MAX] = { + [KW_TYPE] = 't', + [KW_KIND] = 'k', + }; + int ret = -1; + int i = 0; + + GF_ASSERT(cmd); + free_ptr = opts = GF_CALLOC(1, strlen(cmd), gf_common_mt_char); + if (!opts) + goto out; + + if (sscanf(cmd, GF_XATTR_CLRLK_CMD ".%s", opts) < 1) { + ret = -1; + goto out; + } + + /*clr_lk_prefix.ttype.kkind.args, args - type specific*/ + cur = opts; + for (i = 0; i < KW_MAX && (tok = strtok_r(cur, ".", &sptr)); + cur = NULL, i++) { + if (tok[0] != kw[i]) { + ret = -1; + goto out; } - - /*clr_lk_prefix.ttype.kkind.args, args - type specific*/ - cur = opts; - for (i = 0; i < KW_MAX && (tok = strtok_r (cur, ".", &sptr)); - cur = NULL, i++) { - if (tok[0] != kw[i]) { - ret = -1; - goto out; - } - if (i == KW_TYPE) - args->type = clrlk_get_type (tok+1); - if (i == KW_KIND) - args->kind = clrlk_get_kind (tok+1); - } - - if ((args->type == CLRLK_TYPE_MAX) || (args->kind == CLRLK_KIND_MAX)) - goto out; - - /*optional args, neither range nor basename can 'legally' contain - * "/" in them*/ - tok = strtok_r (NULL, "/", &sptr); - if (tok) - args->opts = gf_strdup (tok); - - ret = 0; + if (i == KW_TYPE) + args->type = clrlk_get_type(tok + 1); + if (i == KW_KIND) + args->kind = clrlk_get_kind(tok + 1); + } + + if ((args->type == CLRLK_TYPE_MAX) || (args->kind == CLRLK_KIND_MAX)) + goto out; + + /*optional args, neither range nor basename can 'legally' contain + * "/" in them*/ + tok = strtok_r(NULL, "/", &sptr); + if (tok) + args->opts = gf_strdup(tok); + + ret = 0; out: - GF_FREE (free_ptr); - return ret; + GF_FREE(free_ptr); + return ret; } int -clrlk_clear_posixlk (xlator_t *this, pl_inode_t *pl_inode, clrlk_args *args, - int *blkd, int *granted, int *op_errno) +clrlk_clear_posixlk(xlator_t *this, pl_inode_t *pl_inode, clrlk_args *args, + int *blkd, int *granted, int *op_errno) { - posix_lock_t *plock = NULL; - posix_lock_t *tmp = NULL; - struct gf_flock ulock = {0, }; - int ret = -1; - int bcount = 0; - int gcount = 0; - gf_boolean_t chk_range = _gf_false; - - if (clrlk_get_lock_range (args->opts, &ulock, &chk_range)) { - *op_errno = EINVAL; - goto out; - } - - pthread_mutex_lock (&pl_inode->mutex); + posix_lock_t *plock = NULL; + posix_lock_t *tmp = NULL; + struct gf_flock ulock = { + 0, + }; + int ret = -1; + int bcount = 0; + int gcount = 0; + gf_boolean_t chk_range = _gf_false; + + if (clrlk_get_lock_range(args->opts, &ulock, &chk_range)) { + *op_errno = EINVAL; + goto out; + } + + pthread_mutex_lock(&pl_inode->mutex); + { + list_for_each_entry_safe(plock, tmp, &pl_inode->ext_list, list) { - list_for_each_entry_safe (plock, tmp, &pl_inode->ext_list, - list) { - if ((plock->blocked && - !(args->kind & CLRLK_BLOCKED)) || - (!plock->blocked && - !(args->kind & CLRLK_GRANTED))) - continue; - - if (chk_range && - (plock->user_flock.l_whence != ulock.l_whence - || plock->user_flock.l_start != ulock.l_start - || plock->user_flock.l_len != ulock.l_len)) - continue; - - list_del_init (&plock->list); - if (plock->blocked) { - bcount++; - pl_trace_out (this, plock->frame, NULL, NULL, - F_SETLKW, &plock->user_flock, - -1, EAGAIN, NULL); - - STACK_UNWIND_STRICT (lk, plock->frame, -1, EAGAIN, - &plock->user_flock, NULL); - - } else { - gcount++; - } - GF_FREE (plock); - } + if ((plock->blocked && !(args->kind & CLRLK_BLOCKED)) || + (!plock->blocked && !(args->kind & CLRLK_GRANTED))) + continue; + + if (chk_range && (plock->user_flock.l_whence != ulock.l_whence || + plock->user_flock.l_start != ulock.l_start || + plock->user_flock.l_len != ulock.l_len)) + continue; + + list_del_init(&plock->list); + if (plock->blocked) { + bcount++; + pl_trace_out(this, plock->frame, NULL, NULL, F_SETLKW, + &plock->user_flock, -1, EINTR, NULL); + + STACK_UNWIND_STRICT(lk, plock->frame, -1, EINTR, + &plock->user_flock, NULL); + + } else { + gcount++; + } + __destroy_lock(plock); } - pthread_mutex_unlock (&pl_inode->mutex); - grant_blocked_locks (this, pl_inode); - ret = 0; + } + pthread_mutex_unlock(&pl_inode->mutex); + grant_blocked_locks(this, pl_inode); + ret = 0; out: - *blkd = bcount; - *granted = gcount; - return ret; + *blkd = bcount; + *granted = gcount; + return ret; } /* Returns 0 on success and -1 on failure */ int -clrlk_clear_inodelk (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom, - clrlk_args *args, int *blkd, int *granted, int *op_errno) +clrlk_clear_inodelk(xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom, + clrlk_args *args, int *blkd, int *granted, int *op_errno) { - pl_inode_lock_t *ilock = NULL; - pl_inode_lock_t *tmp = NULL; - struct gf_flock ulock = {0, }; - int ret = -1; - int bcount = 0; - int gcount = 0; - gf_boolean_t chk_range = _gf_false; - struct list_head released; - - INIT_LIST_HEAD (&released); - if (clrlk_get_lock_range (args->opts, &ulock, &chk_range)) { - *op_errno = EINVAL; - goto out; - } - - if (args->kind & CLRLK_BLOCKED) - goto blkd; - - if (args->kind & CLRLK_GRANTED) - goto granted; + posix_locks_private_t *priv; + pl_inode_lock_t *ilock = NULL; + pl_inode_lock_t *tmp = NULL; + struct gf_flock ulock = { + 0, + }; + int ret = -1; + int bcount = 0; + int gcount = 0; + gf_boolean_t chk_range = _gf_false; + struct list_head *pcontend = NULL; + struct list_head released; + struct list_head contend; + struct timespec now = {}; + + INIT_LIST_HEAD(&released); + + priv = this->private; + if (priv->notify_contention) { + pcontend = &contend; + INIT_LIST_HEAD(pcontend); + timespec_now(&now); + } + + if (clrlk_get_lock_range(args->opts, &ulock, &chk_range)) { + *op_errno = EINVAL; + goto out; + } + + if (args->kind & CLRLK_BLOCKED) + goto blkd; + + if (args->kind & CLRLK_GRANTED) + goto granted; blkd: - pthread_mutex_lock (&pl_inode->mutex); + pthread_mutex_lock(&pl_inode->mutex); + { + list_for_each_entry_safe(ilock, tmp, &dom->blocked_inodelks, + blocked_locks) { - list_for_each_entry_safe (ilock, tmp, &dom->blocked_inodelks, - blocked_locks) { - if (chk_range && - (ilock->user_flock.l_whence != ulock.l_whence - || ilock->user_flock.l_start != ulock.l_start - || ilock->user_flock.l_len != ulock.l_len)) - continue; - - bcount++; - list_del_init (&ilock->blocked_locks); - list_add (&ilock->blocked_locks, &released); - } - } - pthread_mutex_unlock (&pl_inode->mutex); - - list_for_each_entry_safe (ilock, tmp, &released, blocked_locks) { - list_del_init (&ilock->blocked_locks); - pl_trace_out (this, ilock->frame, NULL, NULL, F_SETLKW, - &ilock->user_flock, -1, EAGAIN, - ilock->volume); - STACK_UNWIND_STRICT (inodelk, ilock->frame, -1, - EAGAIN, NULL); - //No need to take lock as the locks are only in one list - __pl_inodelk_unref (ilock); + if (chk_range && (ilock->user_flock.l_whence != ulock.l_whence || + ilock->user_flock.l_start != ulock.l_start || + ilock->user_flock.l_len != ulock.l_len)) + continue; + + bcount++; + list_del_init(&ilock->client_list); + list_del_init(&ilock->blocked_locks); + list_add(&ilock->blocked_locks, &released); } + } + pthread_mutex_unlock(&pl_inode->mutex); - if (!(args->kind & CLRLK_GRANTED)) { - ret = 0; - goto out; + if (!list_empty(&released)) { + list_for_each_entry_safe(ilock, tmp, &released, blocked_locks) + { + list_del_init(&ilock->blocked_locks); + pl_trace_out(this, ilock->frame, NULL, NULL, F_SETLKW, + &ilock->user_flock, -1, EAGAIN, ilock->volume); + STACK_UNWIND_STRICT(inodelk, ilock->frame, -1, EAGAIN, NULL); + // No need to take lock as the locks are only in one list + __pl_inodelk_unref(ilock); } + } + + if (!(args->kind & CLRLK_GRANTED)) { + ret = 0; + goto out; + } granted: - pthread_mutex_lock (&pl_inode->mutex); + pthread_mutex_lock(&pl_inode->mutex); + { + list_for_each_entry_safe(ilock, tmp, &dom->inodelk_list, list) { - list_for_each_entry_safe (ilock, tmp, &dom->inodelk_list, - list) { - if (chk_range && - (ilock->user_flock.l_whence != ulock.l_whence - || ilock->user_flock.l_start != ulock.l_start - || ilock->user_flock.l_len != ulock.l_len)) - continue; - - gcount++; - list_del_init (&ilock->list); - list_add (&ilock->list, &released); - } + if (chk_range && (ilock->user_flock.l_whence != ulock.l_whence || + ilock->user_flock.l_start != ulock.l_start || + ilock->user_flock.l_len != ulock.l_len)) + continue; + + gcount++; + list_del_init(&ilock->client_list); + list_del_init(&ilock->list); + list_add(&ilock->list, &released); } - pthread_mutex_unlock (&pl_inode->mutex); + } + pthread_mutex_unlock(&pl_inode->mutex); - list_for_each_entry_safe (ilock, tmp, &released, list) { - list_del_init (&ilock->list); - //No need to take lock as the locks are only in one list - __pl_inodelk_unref (ilock); - } + list_for_each_entry_safe(ilock, tmp, &released, list) + { + list_del_init(&ilock->list); + // No need to take lock as the locks are only in one list + __pl_inodelk_unref(ilock); + } - ret = 0; + ret = 0; out: - grant_blocked_inode_locks (this, pl_inode, dom); - *blkd = bcount; - *granted = gcount; - return ret; + grant_blocked_inode_locks(this, pl_inode, dom, &now, pcontend); + if (pcontend != NULL) { + inodelk_contention_notify(this, pcontend); + } + *blkd = bcount; + *granted = gcount; + return ret; } /* Returns 0 on success and -1 on failure */ int -clrlk_clear_entrylk (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom, - clrlk_args *args, int *blkd, int *granted, int *op_errno) +clrlk_clear_entrylk(xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom, + clrlk_args *args, int *blkd, int *granted, int *op_errno) { - pl_entry_lock_t *elock = NULL; - pl_entry_lock_t *tmp = NULL; - int bcount = 0; - int gcount = 0; - int ret = -1; - struct list_head removed; - struct list_head released; - - INIT_LIST_HEAD (&released); - if (args->kind & CLRLK_BLOCKED) - goto blkd; - - if (args->kind & CLRLK_GRANTED) - goto granted; + posix_locks_private_t *priv; + pl_entry_lock_t *elock = NULL; + pl_entry_lock_t *tmp = NULL; + int bcount = 0; + int gcount = 0; + int ret = -1; + struct list_head *pcontend = NULL; + struct list_head removed; + struct list_head released; + struct list_head contend; + struct timespec now; + + INIT_LIST_HEAD(&released); + + priv = this->private; + if (priv->notify_contention) { + pcontend = &contend; + INIT_LIST_HEAD(pcontend); + timespec_now(&now); + } + + if (args->kind & CLRLK_BLOCKED) + goto blkd; + + if (args->kind & CLRLK_GRANTED) + goto granted; blkd: - pthread_mutex_lock (&pl_inode->mutex); + pthread_mutex_lock(&pl_inode->mutex); + { + list_for_each_entry_safe(elock, tmp, &dom->blocked_entrylks, + blocked_locks) { - list_for_each_entry_safe (elock, tmp, &dom->blocked_entrylks, - blocked_locks) { - if (args->opts) { - if (!elock->basename || - strcmp (elock->basename, args->opts)) - continue; - } - - bcount++; - - list_del_init (&elock->blocked_locks); - list_add_tail (&elock->blocked_locks, &released); - } - } - pthread_mutex_unlock (&pl_inode->mutex); + if (args->opts) { + if (!elock->basename || strcmp(elock->basename, args->opts)) + continue; + } - list_for_each_entry_safe (elock, tmp, &released, blocked_locks) { - list_del_init (&elock->blocked_locks); - entrylk_trace_out (this, elock->frame, elock->volume, NULL, NULL, - elock->basename, ENTRYLK_LOCK, elock->type, - -1, EAGAIN); - STACK_UNWIND_STRICT (entrylk, elock->frame, -1, EAGAIN, NULL); + bcount++; - __pl_entrylk_unref (elock); + list_del_init(&elock->client_list); + list_del_init(&elock->blocked_locks); + list_add_tail(&elock->blocked_locks, &released); } + } + pthread_mutex_unlock(&pl_inode->mutex); + + if (!list_empty(&released)) { + list_for_each_entry_safe(elock, tmp, &released, blocked_locks) + { + list_del_init(&elock->blocked_locks); + entrylk_trace_out(this, elock->frame, elock->volume, NULL, NULL, + elock->basename, ENTRYLK_LOCK, elock->type, -1, + EAGAIN); + STACK_UNWIND_STRICT(entrylk, elock->frame, -1, EAGAIN, NULL); - if (!(args->kind & CLRLK_GRANTED)) { - ret = 0; - goto out; + __pl_entrylk_unref(elock); } + } + + if (!(args->kind & CLRLK_GRANTED)) { + ret = 0; + goto out; + } granted: - INIT_LIST_HEAD (&removed); - pthread_mutex_lock (&pl_inode->mutex); + INIT_LIST_HEAD(&removed); + pthread_mutex_lock(&pl_inode->mutex); + { + list_for_each_entry_safe(elock, tmp, &dom->entrylk_list, domain_list) { - list_for_each_entry_safe (elock, tmp, &dom->entrylk_list, - domain_list) { - if (args->opts) { - if (!elock->basename || - strcmp (elock->basename, args->opts)) - continue; - } - - gcount++; - list_del_init (&elock->domain_list); - list_add_tail (&elock->domain_list, &removed); - - __pl_entrylk_unref (elock); - } + if (args->opts) { + if (!elock->basename || strcmp(elock->basename, args->opts)) + continue; + } + + gcount++; + list_del_init(&elock->client_list); + list_del_init(&elock->domain_list); + list_add_tail(&elock->domain_list, &removed); + + __pl_entrylk_unref(elock); } - pthread_mutex_unlock (&pl_inode->mutex); + } + pthread_mutex_unlock(&pl_inode->mutex); - grant_blocked_entry_locks (this, pl_inode, dom); + grant_blocked_entry_locks(this, pl_inode, dom, &now, pcontend); + if (pcontend != NULL) { + entrylk_contention_notify(this, pcontend); + } - ret = 0; + ret = 0; out: - *blkd = bcount; - *granted = gcount; - return ret; + *blkd = bcount; + *granted = gcount; + return ret; } int -clrlk_clear_lks_in_all_domains (xlator_t *this, pl_inode_t *pl_inode, - clrlk_args *args, int *blkd, int *granted, - int *op_errno) +clrlk_clear_lks_in_all_domains(xlator_t *this, pl_inode_t *pl_inode, + clrlk_args *args, int *blkd, int *granted, + int *op_errno) { - pl_dom_list_t *dom = NULL; - int ret = -1; - int tmp_bcount = 0; - int tmp_gcount = 0; - - if (list_empty (&pl_inode->dom_list)) { - ret = 0; - goto out; - } + pl_dom_list_t *dom = NULL; + int ret = -1; + int tmp_bcount = 0; + int tmp_gcount = 0; - list_for_each_entry (dom, &pl_inode->dom_list, inode_list) { - tmp_bcount = tmp_gcount = 0; - - switch (args->type) - { - case CLRLK_INODE: - ret = clrlk_clear_inodelk (this, pl_inode, dom, args, - &tmp_bcount, &tmp_gcount, - op_errno); - if (ret) - goto out; - break; - case CLRLK_ENTRY: - ret = clrlk_clear_entrylk (this, pl_inode, dom, args, - &tmp_bcount, &tmp_gcount, - op_errno); - if (ret) - goto out; - break; - } - - *blkd += tmp_bcount; - *granted += tmp_gcount; + if (list_empty(&pl_inode->dom_list)) { + ret = 0; + goto out; + } + + list_for_each_entry(dom, &pl_inode->dom_list, inode_list) + { + tmp_bcount = tmp_gcount = 0; + + switch (args->type) { + case CLRLK_INODE: + ret = clrlk_clear_inodelk(this, pl_inode, dom, args, + &tmp_bcount, &tmp_gcount, op_errno); + if (ret) + goto out; + break; + case CLRLK_ENTRY: + ret = clrlk_clear_entrylk(this, pl_inode, dom, args, + &tmp_bcount, &tmp_gcount, op_errno); + if (ret) + goto out; + break; } - ret = 0; + *blkd += tmp_bcount; + *granted += tmp_gcount; + } + + ret = 0; out: - return ret; + return ret; } diff --git a/xlators/features/locks/src/clear.h b/xlators/features/locks/src/clear.h index 78fc5ae3398..bc118cb1b81 100644 --- a/xlators/features/locks/src/clear.h +++ b/xlators/features/locks/src/clear.h @@ -10,62 +10,64 @@ #ifndef __CLEAR_H__ #define __CLEAR_H__ -#include "compat-errno.h" -#include "stack.h" -#include "call-stub.h" +#include <glusterfs/compat-errno.h> +#include <glusterfs/stack.h> +#include <glusterfs/call-stub.h> #include "locks.h" typedef enum { - CLRLK_INODE, - CLRLK_ENTRY, - CLRLK_POSIX, - CLRLK_TYPE_MAX + CLRLK_INODE, + CLRLK_ENTRY, + CLRLK_POSIX, + CLRLK_TYPE_MAX } clrlk_type; +extern const char *clrlk_type_names[]; + typedef enum { - CLRLK_BLOCKED = 1, - CLRLK_GRANTED, - CLRLK_ALL, - CLRLK_KIND_MAX + CLRLK_BLOCKED = 1, + CLRLK_GRANTED, + CLRLK_ALL, + CLRLK_KIND_MAX } clrlk_kind; typedef enum { - KW_TYPE, - KW_KIND, - /*add new keywords here*/ - KW_MAX + KW_TYPE, + KW_KIND, + /*add new keywords here*/ + KW_MAX } clrlk_opts; struct _clrlk_args; typedef struct _clrlk_args clrlk_args; struct _clrlk_args { - int type; - int kind; - char *opts; + int type; + int kind; + char *opts; }; int -clrlk_get__kind (char *kind); +clrlk_get__kind(char *kind); int -clrlk_get_type (char *type); +clrlk_get_type(char *type); int -clrlk_get_lock_range (char *range_str, struct gf_flock *ulock, - gf_boolean_t *chk_range); +clrlk_get_lock_range(char *range_str, struct gf_flock *ulock, + gf_boolean_t *chk_range); int -clrlk_parse_args (const char* cmd, clrlk_args *args); +clrlk_parse_args(const char *cmd, clrlk_args *args); int -clrlk_clear_posixlk (xlator_t *this, pl_inode_t *pl_inode, clrlk_args *args, - int *blkd, int *granted, int *op_errno); +clrlk_clear_posixlk(xlator_t *this, pl_inode_t *pl_inode, clrlk_args *args, + int *blkd, int *granted, int *op_errno); int -clrlk_clear_inodelk (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom, - clrlk_args *args, int *blkd, int *granted, int *op_errno); +clrlk_clear_inodelk(xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom, + clrlk_args *args, int *blkd, int *granted, int *op_errno); int -clrlk_clear_entrylk (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom, - clrlk_args *args, int *blkd, int *granted, int *op_errno); +clrlk_clear_entrylk(xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom, + clrlk_args *args, int *blkd, int *granted, int *op_errno); int -clrlk_clear_lks_in_all_domains (xlator_t *this, pl_inode_t *pl_inode, - clrlk_args *args, int *blkd, int *granted, - int *op_errno); +clrlk_clear_lks_in_all_domains(xlator_t *this, pl_inode_t *pl_inode, + clrlk_args *args, int *blkd, int *granted, + int *op_errno); #endif /* __CLEAR_H__ */ diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c index 0abc9353c9e..a2c6be93e03 100644 --- a/xlators/features/locks/src/common.c +++ b/xlators/features/locks/src/common.c @@ -1,5 +1,5 @@ /* - Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + Copyright (c) 2006-2012, 2015-2016 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. This file is licensed to you under your choice of the GNU Lesser @@ -12,719 +12,775 @@ #include <limits.h> #include <pthread.h> -#include "glusterfs.h" -#include "compat.h" -#include "xlator.h" -#include "inode.h" -#include "logging.h" -#include "common-utils.h" +#include <glusterfs/glusterfs.h> +#include <glusterfs/compat.h> +#include <glusterfs/logging.h> +#include <glusterfs/syncop.h> #include "locks.h" #include "common.h" - static int -__is_lock_grantable (pl_inode_t *pl_inode, posix_lock_t *lock); +__is_lock_grantable(pl_inode_t *pl_inode, posix_lock_t *lock); static void -__insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock); +__insert_and_merge(pl_inode_t *pl_inode, posix_lock_t *lock); static int -pl_send_prelock_unlock (xlator_t *this, pl_inode_t *pl_inode, - posix_lock_t *old_lock); +pl_send_prelock_unlock(xlator_t *this, pl_inode_t *pl_inode, + posix_lock_t *old_lock); static pl_dom_list_t * -__allocate_domain (const char *volume) +__allocate_domain(const char *volume) { - pl_dom_list_t *dom = NULL; + pl_dom_list_t *dom = NULL; - dom = GF_CALLOC (1, sizeof (*dom), - gf_locks_mt_pl_dom_list_t); - if (!dom) - goto out; + dom = GF_CALLOC(1, sizeof(*dom), gf_locks_mt_pl_dom_list_t); + if (!dom) + goto out; - dom->domain = gf_strdup(volume); - if (!dom->domain) - goto out; + dom->domain = gf_strdup(volume); + if (!dom->domain) + goto out; - gf_log ("posix-locks", GF_LOG_TRACE, - "New domain allocated: %s", dom->domain); + gf_log("posix-locks", GF_LOG_TRACE, "New domain allocated: %s", + dom->domain); - INIT_LIST_HEAD (&dom->inode_list); - INIT_LIST_HEAD (&dom->entrylk_list); - INIT_LIST_HEAD (&dom->blocked_entrylks); - INIT_LIST_HEAD (&dom->inodelk_list); - INIT_LIST_HEAD (&dom->blocked_inodelks); + INIT_LIST_HEAD(&dom->inode_list); + INIT_LIST_HEAD(&dom->entrylk_list); + INIT_LIST_HEAD(&dom->blocked_entrylks); + INIT_LIST_HEAD(&dom->inodelk_list); + INIT_LIST_HEAD(&dom->blocked_inodelks); out: - if (dom && (NULL == dom->domain)) { - GF_FREE (dom); - dom = NULL; - } + if (dom && (NULL == dom->domain)) { + GF_FREE(dom); + dom = NULL; + } - return dom; + return dom; } /* Returns domain for the lock. If domain is not present, * allocates a domain and returns it */ pl_dom_list_t * -get_domain (pl_inode_t *pl_inode, const char *volume) +get_domain(pl_inode_t *pl_inode, const char *volume) { - pl_dom_list_t *dom = NULL; + pl_dom_list_t *dom = NULL; - GF_VALIDATE_OR_GOTO ("posix-locks", pl_inode, out); - GF_VALIDATE_OR_GOTO ("posix-locks", volume, out); + GF_VALIDATE_OR_GOTO("posix-locks", pl_inode, out); + GF_VALIDATE_OR_GOTO("posix-locks", volume, out); - pthread_mutex_lock (&pl_inode->mutex); + pthread_mutex_lock(&pl_inode->mutex); + { + list_for_each_entry(dom, &pl_inode->dom_list, inode_list) { - list_for_each_entry (dom, &pl_inode->dom_list, inode_list) { - if (strcmp (dom->domain, volume) == 0) - goto unlock; - } - - dom = __allocate_domain (volume); - if (dom) - list_add (&dom->inode_list, &pl_inode->dom_list); + if (strcmp(dom->domain, volume) == 0) + goto unlock; } + + dom = __allocate_domain(volume); + if (dom) + list_add(&dom->inode_list, &pl_inode->dom_list); + } unlock: - pthread_mutex_unlock (&pl_inode->mutex); - if (dom) { - gf_log ("posix-locks", GF_LOG_TRACE, "Domain %s found", volume); - } else { - gf_log ("posix-locks", GF_LOG_TRACE, "Domain %s not found", volume); - } + pthread_mutex_unlock(&pl_inode->mutex); + if (dom) { + gf_log("posix-locks", GF_LOG_TRACE, "Domain %s found", volume); + } else { + gf_log("posix-locks", GF_LOG_TRACE, "Domain %s not found", volume); + } out: - return dom; + return dom; } unsigned long -fd_to_fdnum (fd_t *fd) +fd_to_fdnum(fd_t *fd) { - return ((unsigned long) fd); + return ((unsigned long)fd); } fd_t * -fd_from_fdnum (posix_lock_t *lock) +fd_from_fdnum(posix_lock_t *lock) { - return ((fd_t *) lock->fd_num); + return ((fd_t *)lock->fd_num); } int -__pl_inode_is_empty (pl_inode_t *pl_inode) +__pl_inode_is_empty(pl_inode_t *pl_inode) { - return (list_empty (&pl_inode->ext_list)); + return (list_empty(&pl_inode->ext_list)); } void -pl_print_locker (char *str, int size, xlator_t *this, call_frame_t *frame) +pl_print_locker(char *str, int size, xlator_t *this, call_frame_t *frame) { - snprintf (str, size, "Pid=%llu, lk-owner=%s, Client=%p, Frame=%llu", - (unsigned long long) frame->root->pid, - lkowner_utoa (&frame->root->lk_owner), - frame->root->client, - (unsigned long long) frame->root->unique); + snprintf(str, size, "Pid=%llu, lk-owner=%s, Client=%p, Frame=%llu", + (unsigned long long)frame->root->pid, + lkowner_utoa(&frame->root->lk_owner), frame->root->client, + (unsigned long long)frame->root->unique); } - void -pl_print_lockee (char *str, int size, fd_t *fd, loc_t *loc) +pl_print_lockee(char *str, int size, fd_t *fd, loc_t *loc) { - inode_t *inode = NULL; - char *ipath = NULL; - int ret = 0; + inode_t *inode = NULL; + char *ipath = NULL; + int ret = 0; - if (fd) - inode = fd->inode; - if (loc) - inode = loc->inode; + if (fd) + inode = fd->inode; + if (loc) + inode = loc->inode; - if (!inode) { - snprintf (str, size, "<nul>"); - return; - } + if (!inode) { + snprintf(str, size, "<nul>"); + return; + } - if (loc && loc->path) { - ipath = gf_strdup (loc->path); - } else { - ret = inode_path (inode, NULL, &ipath); - if (ret <= 0) - ipath = NULL; - } + if (loc && loc->path) { + ipath = gf_strdup(loc->path); + } else { + ret = inode_path(inode, NULL, &ipath); + if (ret <= 0) + ipath = NULL; + } - snprintf (str, size, "gfid=%s, fd=%p, path=%s", - uuid_utoa (inode->gfid), fd, - ipath ? ipath : "<nul>"); + snprintf(str, size, "gfid=%s, fd=%p, path=%s", uuid_utoa(inode->gfid), fd, + ipath ? ipath : "<nul>"); - GF_FREE (ipath); + GF_FREE(ipath); } - void -pl_print_lock (char *str, int size, int cmd, - struct gf_flock *flock, gf_lkowner_t *owner) +pl_print_lock(char *str, int size, int cmd, struct gf_flock *flock, + gf_lkowner_t *owner) { - char *cmd_str = NULL; - char *type_str = NULL; + char *cmd_str = NULL; + char *type_str = NULL; - switch (cmd) { + switch (cmd) { #if F_GETLK != F_GETLK64 case F_GETLK64: #endif case F_GETLK: - cmd_str = "GETLK"; - break; + cmd_str = "GETLK"; + break; #if F_SETLK != F_SETLK64 case F_SETLK64: #endif case F_SETLK: - cmd_str = "SETLK"; - break; + cmd_str = "SETLK"; + break; #if F_SETLKW != F_SETLKW64 case F_SETLKW64: #endif case F_SETLKW: - cmd_str = "SETLKW"; - break; + cmd_str = "SETLKW"; + break; default: - cmd_str = "UNKNOWN"; - break; - } + cmd_str = "UNKNOWN"; + break; + } - switch (flock->l_type) { + switch (flock->l_type) { case F_RDLCK: - type_str = "READ"; - break; + type_str = "READ"; + break; case F_WRLCK: - type_str = "WRITE"; - break; + type_str = "WRITE"; + break; case F_UNLCK: - type_str = "UNLOCK"; - break; + type_str = "UNLOCK"; + break; default: - type_str = "UNKNOWN"; - break; - } - - snprintf (str, size, "lock=FCNTL, cmd=%s, type=%s, " - "start=%llu, len=%llu, pid=%llu, lk-owner=%s", - cmd_str, type_str, (unsigned long long) flock->l_start, - (unsigned long long) flock->l_len, - (unsigned long long) flock->l_pid, - lkowner_utoa (owner)); + type_str = "UNKNOWN"; + break; + } + + snprintf(str, size, + "lock=FCNTL, cmd=%s, type=%s, " + "start=%llu, len=%llu, pid=%llu, lk-owner=%s", + cmd_str, type_str, (unsigned long long)flock->l_start, + (unsigned long long)flock->l_len, (unsigned long long)flock->l_pid, + lkowner_utoa(owner)); } - void -pl_trace_in (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc, - int cmd, struct gf_flock *flock, const char *domain) +pl_trace_in(xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc, int cmd, + struct gf_flock *flock, const char *domain) { - posix_locks_private_t *priv = NULL; - char pl_locker[256]; - char pl_lockee[256]; - char pl_lock[256]; + posix_locks_private_t *priv = this->private; + char pl_locker[256]; + char pl_lockee[256]; + char pl_lock[256]; - priv = this->private; - - if (!priv->trace) - return; + if (!priv->trace) + return; - pl_print_locker (pl_locker, 256, this, frame); - pl_print_lockee (pl_lockee, 256, fd, loc); - if (domain) - pl_print_inodelk (pl_lock, 256, cmd, flock, domain); - else - pl_print_lock (pl_lock, 256, cmd, flock, &frame->root->lk_owner); + pl_print_locker(pl_locker, 256, this, frame); + pl_print_lockee(pl_lockee, 256, fd, loc); + if (domain) + pl_print_inodelk(pl_lock, 256, cmd, flock, domain); + else + pl_print_lock(pl_lock, 256, cmd, flock, &frame->root->lk_owner); - gf_log (this->name, GF_LOG_INFO, - "[REQUEST] Locker = {%s} Lockee = {%s} Lock = {%s}", - pl_locker, pl_lockee, pl_lock); + gf_log(this->name, GF_LOG_INFO, + "[REQUEST] Locker = {%s} Lockee = {%s} Lock = {%s}", pl_locker, + pl_lockee, pl_lock); } - void -pl_print_verdict (char *str, int size, int op_ret, int op_errno) +pl_print_verdict(char *str, int size, int op_ret, int op_errno) { - char *verdict = NULL; - - if (op_ret == 0) { - verdict = "GRANTED"; - } else { - switch (op_errno) { - case EAGAIN: - verdict = "TRYAGAIN"; - break; - default: - verdict = strerror (op_errno); - } + char *verdict = NULL; + + if (op_ret == 0) { + verdict = "GRANTED"; + } else { + switch (op_errno) { + case EAGAIN: + verdict = "TRYAGAIN"; + break; + default: + verdict = strerror(op_errno); } + } - snprintf (str, size, "%s", verdict); + snprintf(str, size, "%s", verdict); } - void -pl_trace_out (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc, - int cmd, struct gf_flock *flock, int op_ret, int op_errno, const char *domain) +pl_trace_out(xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc, int cmd, + struct gf_flock *flock, int op_ret, int op_errno, + const char *domain) { - posix_locks_private_t *priv = NULL; - char pl_locker[256]; - char pl_lockee[256]; - char pl_lock[256]; - char verdict[32]; + posix_locks_private_t *priv = NULL; + char pl_locker[256]; + char pl_lockee[256]; + char pl_lock[256]; + char verdict[32]; - priv = this->private; + priv = this->private; - if (!priv->trace) - return; + if (!priv->trace) + return; - pl_print_locker (pl_locker, 256, this, frame); - pl_print_lockee (pl_lockee, 256, fd, loc); - if (domain) - pl_print_inodelk (pl_lock, 256, cmd, flock, domain); - else - pl_print_lock (pl_lock, 256, cmd, flock, &frame->root->lk_owner); + pl_print_locker(pl_locker, 256, this, frame); + pl_print_lockee(pl_lockee, 256, fd, loc); + if (domain) + pl_print_inodelk(pl_lock, 256, cmd, flock, domain); + else + pl_print_lock(pl_lock, 256, cmd, flock, &frame->root->lk_owner); - pl_print_verdict (verdict, 32, op_ret, op_errno); + pl_print_verdict(verdict, 32, op_ret, op_errno); - gf_log (this->name, GF_LOG_INFO, - "[%s] Locker = {%s} Lockee = {%s} Lock = {%s}", - verdict, pl_locker, pl_lockee, pl_lock); + gf_log(this->name, GF_LOG_INFO, + "[%s] Locker = {%s} Lockee = {%s} Lock = {%s}", verdict, pl_locker, + pl_lockee, pl_lock); } - void -pl_trace_block (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc, - int cmd, struct gf_flock *flock, const char *domain) +pl_trace_block(xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc, + int cmd, struct gf_flock *flock, const char *domain) { - posix_locks_private_t *priv = NULL; - char pl_locker[256]; - char pl_lockee[256]; - char pl_lock[256]; + posix_locks_private_t *priv = this->private; + char pl_locker[256]; + char pl_lockee[256]; + char pl_lock[256]; - priv = this->private; - - if (!priv->trace) - return; + if (!priv->trace) + return; - pl_print_locker (pl_locker, 256, this, frame); - pl_print_lockee (pl_lockee, 256, fd, loc); - if (domain) - pl_print_inodelk (pl_lock, 256, cmd, flock, domain); - else - pl_print_lock (pl_lock, 256, cmd, flock, &frame->root->lk_owner); + pl_print_locker(pl_locker, 256, this, frame); + pl_print_lockee(pl_lockee, 256, fd, loc); + if (domain) + pl_print_inodelk(pl_lock, 256, cmd, flock, domain); + else + pl_print_lock(pl_lock, 256, cmd, flock, &frame->root->lk_owner); - gf_log (this->name, GF_LOG_INFO, - "[BLOCKED] Locker = {%s} Lockee = {%s} Lock = {%s}", - pl_locker, pl_lockee, pl_lock); + gf_log(this->name, GF_LOG_INFO, + "[BLOCKED] Locker = {%s} Lockee = {%s} Lock = {%s}", pl_locker, + pl_lockee, pl_lock); } - void -pl_trace_flush (xlator_t *this, call_frame_t *frame, fd_t *fd) +pl_trace_flush(xlator_t *this, call_frame_t *frame, fd_t *fd) { - posix_locks_private_t *priv = NULL; - char pl_locker[256]; - char pl_lockee[256]; - pl_inode_t *pl_inode = NULL; + posix_locks_private_t *priv = NULL; + char pl_locker[256]; + char pl_lockee[256]; + pl_inode_t *pl_inode = NULL; - priv = this->private; + priv = this->private; - if (!priv->trace) - return; + if (!priv->trace) + return; - pl_inode = pl_inode_get (this, fd->inode); + pl_inode = pl_inode_get(this, fd->inode, NULL); - if (pl_inode && __pl_inode_is_empty (pl_inode)) - return; + if (pl_inode && __pl_inode_is_empty(pl_inode)) + return; - pl_print_locker (pl_locker, 256, this, frame); - pl_print_lockee (pl_lockee, 256, fd, NULL); + pl_print_locker(pl_locker, 256, this, frame); + pl_print_lockee(pl_lockee, 256, fd, NULL); - gf_log (this->name, GF_LOG_INFO, - "[FLUSH] Locker = {%s} Lockee = {%s}", - pl_locker, pl_lockee); + gf_log(this->name, GF_LOG_INFO, "[FLUSH] Locker = {%s} Lockee = {%s}", + pl_locker, pl_lockee); } void -pl_trace_release (xlator_t *this, fd_t *fd) +pl_trace_release(xlator_t *this, fd_t *fd) { - posix_locks_private_t *priv = NULL; - char pl_lockee[256]; + posix_locks_private_t *priv = NULL; + char pl_lockee[256]; - priv = this->private; + priv = this->private; - if (!priv->trace) - return; + if (!priv->trace) + return; - pl_print_lockee (pl_lockee, 256, fd, NULL); + pl_print_lockee(pl_lockee, 256, fd, NULL); - gf_log (this->name, GF_LOG_INFO, - "[RELEASE] Lockee = {%s}", pl_lockee); + gf_log(this->name, GF_LOG_INFO, "[RELEASE] Lockee = {%s}", pl_lockee); } - void -pl_update_refkeeper (xlator_t *this, inode_t *inode) +pl_update_refkeeper(xlator_t *this, inode_t *inode) { - pl_inode_t *pl_inode = NULL; - int is_empty = 0; - int need_unref = 0; - int need_ref = 0; + pl_inode_t *pl_inode = NULL; + int is_empty = 0; + int need_unref = 0; + int need_ref = 0; - pl_inode = pl_inode_get (this, inode); + pl_inode = pl_inode_get(this, inode, NULL); + if (!pl_inode) + return; - pthread_mutex_lock (&pl_inode->mutex); - { - is_empty = __pl_inode_is_empty (pl_inode); + pthread_mutex_lock(&pl_inode->mutex); + { + is_empty = __pl_inode_is_empty(pl_inode); - if (is_empty && pl_inode->refkeeper) { - need_unref = 1; - pl_inode->refkeeper = NULL; - } + if (is_empty && pl_inode->refkeeper) { + need_unref = 1; + pl_inode->refkeeper = NULL; + } - if (!is_empty && !pl_inode->refkeeper) { - need_ref = 1; - pl_inode->refkeeper = inode; - } + if (!is_empty && !pl_inode->refkeeper) { + need_ref = 1; + pl_inode->refkeeper = inode; } - pthread_mutex_unlock (&pl_inode->mutex); + } + pthread_mutex_unlock(&pl_inode->mutex); - if (need_unref) - inode_unref (inode); + if (need_unref) + inode_unref(inode); - if (need_ref) - inode_ref (inode); + if (need_ref) + inode_ref(inode); } - -pl_inode_t * -pl_inode_get (xlator_t *this, inode_t *inode) +/* Get lock enforcement info from disk */ +int +pl_fetch_mlock_info_from_disk(xlator_t *this, pl_inode_t *pl_inode, + pl_local_t *local) { - uint64_t tmp_pl_inode = 0; - pl_inode_t *pl_inode = NULL; - int ret = 0; - - LOCK (&inode->lock); - { - ret = __inode_ctx_get (inode, this, &tmp_pl_inode); - if (ret == 0) { - pl_inode = (pl_inode_t *)(long)tmp_pl_inode; - goto unlock; - } - pl_inode = GF_CALLOC (1, sizeof (*pl_inode), - gf_locks_mt_pl_inode_t); - if (!pl_inode) { - goto unlock; - } + dict_t *xdata_rsp = NULL; + int ret = 0; + int op_ret = 0; + + if (!local) { + return -1; + } + + if (local->fd) { + op_ret = syncop_fgetxattr(this, local->fd, &xdata_rsp, + GF_ENFORCE_MANDATORY_LOCK, NULL, NULL); + } else { + op_ret = syncop_getxattr(this, &local->loc[0], &xdata_rsp, + GF_ENFORCE_MANDATORY_LOCK, NULL, NULL); + } + + pthread_mutex_lock(&pl_inode->mutex); + { + if (op_ret >= 0) { + pl_inode->mlock_enforced = _gf_true; + pl_inode->check_mlock_info = _gf_false; + } else { + gf_msg(this->name, GF_LOG_WARNING, -op_ret, 0, + "getxattr failed with %d", op_ret); + pl_inode->mlock_enforced = _gf_false; + + if (-op_ret == ENODATA) { + pl_inode->check_mlock_info = _gf_false; + } else { + pl_inode->check_mlock_info = _gf_true; + } + } + } + pthread_mutex_unlock(&pl_inode->mutex); - gf_log (this->name, GF_LOG_TRACE, - "Allocating new pl inode"); + return ret; +} - pthread_mutex_init (&pl_inode->mutex, NULL); +pl_inode_t * +pl_inode_get(xlator_t *this, inode_t *inode, pl_local_t *local) +{ + uint64_t tmp_pl_inode = 0; + pl_inode_t *pl_inode = NULL; + int ret = 0; + + LOCK(&inode->lock); + { + ret = __inode_ctx_get(inode, this, &tmp_pl_inode); + if (ret == 0) { + pl_inode = (pl_inode_t *)(long)tmp_pl_inode; + goto unlock; + } - INIT_LIST_HEAD (&pl_inode->dom_list); - INIT_LIST_HEAD (&pl_inode->ext_list); - INIT_LIST_HEAD (&pl_inode->rw_list); - INIT_LIST_HEAD (&pl_inode->reservelk_list); - INIT_LIST_HEAD (&pl_inode->blocked_reservelks); - INIT_LIST_HEAD (&pl_inode->blocked_calls); - gf_uuid_copy (pl_inode->gfid, inode->gfid); + pl_inode = GF_CALLOC(1, sizeof(*pl_inode), gf_locks_mt_pl_inode_t); + if (!pl_inode) { + goto unlock; + } - __inode_ctx_put (inode, this, (uint64_t)(long)(pl_inode)); + gf_log(this->name, GF_LOG_TRACE, "Allocating new pl inode"); + + pthread_mutex_init(&pl_inode->mutex, NULL); + pthread_cond_init(&pl_inode->check_fop_wind_count, 0); + + INIT_LIST_HEAD(&pl_inode->dom_list); + INIT_LIST_HEAD(&pl_inode->ext_list); + INIT_LIST_HEAD(&pl_inode->rw_list); + INIT_LIST_HEAD(&pl_inode->reservelk_list); + INIT_LIST_HEAD(&pl_inode->blocked_reservelks); + INIT_LIST_HEAD(&pl_inode->blocked_calls); + INIT_LIST_HEAD(&pl_inode->metalk_list); + INIT_LIST_HEAD(&pl_inode->queued_locks); + INIT_LIST_HEAD(&pl_inode->waiting); + gf_uuid_copy(pl_inode->gfid, inode->gfid); + + pl_inode->check_mlock_info = _gf_true; + pl_inode->mlock_enforced = _gf_false; + + /* -2 means never looked up. -1 means something went wrong and link + * tracking is disabled. */ + pl_inode->links = -2; + + ret = __inode_ctx_put(inode, this, (uint64_t)(long)(pl_inode)); + if (ret) { + pthread_mutex_destroy(&pl_inode->mutex); + GF_FREE(pl_inode); + pl_inode = NULL; + goto unlock; } + } unlock: - UNLOCK (&inode->lock); + UNLOCK(&inode->lock); - return pl_inode; -} + if ((pl_inode != NULL) && pl_is_mandatory_locking_enabled(pl_inode) && + pl_inode->check_mlock_info && local) { + /* Note: The lock enforcement information per file can be stored in the + attribute flag of stat(x) in posix. With that there won't be a need + for doing getxattr post a reboot + */ + pl_fetch_mlock_info_from_disk(this, pl_inode, local); + } + return pl_inode; +} /* Create a new posix_lock_t */ posix_lock_t * -new_posix_lock (struct gf_flock *flock, client_t *client, pid_t client_pid, - gf_lkowner_t *owner, fd_t *fd) +new_posix_lock(struct gf_flock *flock, client_t *client, pid_t client_pid, + gf_lkowner_t *owner, fd_t *fd, uint32_t lk_flags, int blocking, + int32_t *op_errno) { - posix_lock_t *lock = NULL; + posix_lock_t *lock = NULL; - GF_VALIDATE_OR_GOTO ("posix-locks", flock, out); - GF_VALIDATE_OR_GOTO ("posix-locks", client, out); - GF_VALIDATE_OR_GOTO ("posix-locks", fd, out); + GF_VALIDATE_OR_GOTO("posix-locks", flock, out); + GF_VALIDATE_OR_GOTO("posix-locks", client, out); + GF_VALIDATE_OR_GOTO("posix-locks", fd, out); - lock = GF_CALLOC (1, sizeof (posix_lock_t), - gf_locks_mt_posix_lock_t); - if (!lock) { - goto out; - } + if (!pl_is_lk_owner_valid(owner, client)) { + *op_errno = EINVAL; + goto out; + } + + lock = GF_CALLOC(1, sizeof(posix_lock_t), gf_locks_mt_posix_lock_t); + if (!lock) { + *op_errno = ENOMEM; + goto out; + } - lock->fl_start = flock->l_start; - lock->fl_type = flock->l_type; + lock->fl_start = flock->l_start; + lock->fl_type = flock->l_type; - if (flock->l_len == 0) - lock->fl_end = LLONG_MAX; - else - lock->fl_end = flock->l_start + flock->l_len - 1; + if (flock->l_len == 0) + lock->fl_end = LLONG_MAX; + else + lock->fl_end = flock->l_start + flock->l_len - 1; - lock->client = client; - lock->fd_num = fd_to_fdnum (fd); - lock->fd = fd; - lock->client_pid = client_pid; - lock->owner = *owner; + lock->client = client; - INIT_LIST_HEAD (&lock->list); + lock->client_uid = gf_strdup(client->client_uid); + if (lock->client_uid == NULL) { + GF_FREE(lock); + lock = NULL; + *op_errno = ENOMEM; + goto out; + } + + lock->fd_num = fd_to_fdnum(fd); + lock->fd = fd; + lock->client_pid = client_pid; + lock->owner = *owner; + lock->lk_flags = lk_flags; + + lock->blocking = blocking; + memcpy(&lock->user_flock, flock, sizeof(lock->user_flock)); + + INIT_LIST_HEAD(&lock->list); out: - return lock; + return lock; } - /* Delete a lock from the inode's lock list */ void -__delete_lock (pl_inode_t *pl_inode, posix_lock_t *lock) +__delete_lock(posix_lock_t *lock) { - list_del_init (&lock->list); + list_del_init(&lock->list); } - /* Destroy a posix_lock */ void -__destroy_lock (posix_lock_t *lock) +__destroy_lock(posix_lock_t *lock) { - GF_FREE (lock); + GF_FREE(lock->client_uid); + GF_FREE(lock); } +static posix_lock_t * +__copy_lock(posix_lock_t *src) +{ + posix_lock_t *dst; + + dst = GF_MALLOC(sizeof(posix_lock_t), gf_locks_mt_posix_lock_t); + if (dst != NULL) { + memcpy(dst, src, sizeof(posix_lock_t)); + dst->client_uid = gf_strdup(src->client_uid); + if (dst->client_uid == NULL) { + GF_FREE(dst); + dst = NULL; + } + + if (dst != NULL) + INIT_LIST_HEAD(&dst->list); + } + + return dst; +} /* Convert a posix_lock to a struct gf_flock */ void -posix_lock_to_flock (posix_lock_t *lock, struct gf_flock *flock) +posix_lock_to_flock(posix_lock_t *lock, struct gf_flock *flock) { - flock->l_pid = lock->client_pid; - flock->l_type = lock->fl_type; - flock->l_start = lock->fl_start; - flock->l_owner = lock->owner; - - if (lock->fl_end == LLONG_MAX) - flock->l_len = 0; - else - flock->l_len = lock->fl_end - lock->fl_start + 1; + flock->l_pid = lock->user_flock.l_pid; + flock->l_type = lock->fl_type; + flock->l_start = lock->fl_start; + flock->l_owner = lock->owner; + + if (lock->fl_end == LLONG_MAX) + flock->l_len = 0; + else + flock->l_len = lock->fl_end - lock->fl_start + 1; } /* Insert the lock into the inode's lock list */ static void -__insert_lock (pl_inode_t *pl_inode, posix_lock_t *lock) +__insert_lock(pl_inode_t *pl_inode, posix_lock_t *lock) { - if (lock->blocked) - gettimeofday (&lock->blkd_time, NULL); - else - gettimeofday (&lock->granted_time, NULL); + if (lock->blocked) + lock->blkd_time = gf_time(); + else + lock->granted_time = gf_time(); - list_add_tail (&lock->list, &pl_inode->ext_list); - - return; + list_add_tail(&lock->list, &pl_inode->ext_list); } - /* Return true if the locks overlap, false otherwise */ int -locks_overlap (posix_lock_t *l1, posix_lock_t *l2) +locks_overlap(posix_lock_t *l1, posix_lock_t *l2) { - /* - Note: - FUSE always gives us absolute offsets, so no need to worry - about SEEK_CUR or SEEK_END - */ + /* + Note: + FUSE always gives us absolute offsets, so no need to worry + about SEEK_CUR or SEEK_END + */ - return ((l1->fl_end >= l2->fl_start) && - (l2->fl_end >= l1->fl_start)); + return ((l1->fl_end >= l2->fl_start) && (l2->fl_end >= l1->fl_start)); } - /* Return true if the locks have the same owner */ int -same_owner (posix_lock_t *l1, posix_lock_t *l2) +same_owner(posix_lock_t *l1, posix_lock_t *l2) { - - return (is_same_lkowner (&l1->owner, &l2->owner) && - (l1->client == l2->client)); - + return (is_same_lkowner(&l1->owner, &l2->owner) && + (l1->client == l2->client)); } - /* Delete all F_UNLCK locks */ void -__delete_unlck_locks (pl_inode_t *pl_inode) +__delete_unlck_locks(pl_inode_t *pl_inode) { - posix_lock_t *l = NULL; - posix_lock_t *tmp = NULL; - - list_for_each_entry_safe (l, tmp, &pl_inode->ext_list, list) { - if (l->fl_type == F_UNLCK) { - __delete_lock (pl_inode, l); - __destroy_lock (l); - } + posix_lock_t *l = NULL; + posix_lock_t *tmp = NULL; + + list_for_each_entry_safe(l, tmp, &pl_inode->ext_list, list) + { + if (l->fl_type == F_UNLCK) { + __delete_lock(l); + __destroy_lock(l); } + } } - /* Add two locks */ static posix_lock_t * -add_locks (posix_lock_t *l1, posix_lock_t *l2) +add_locks(posix_lock_t *l1, posix_lock_t *l2, posix_lock_t *dst) { - posix_lock_t *sum = NULL; + posix_lock_t *sum = NULL; - sum = GF_CALLOC (1, sizeof (posix_lock_t), - gf_locks_mt_posix_lock_t); - if (!sum) - return NULL; + sum = __copy_lock(dst); + if (!sum) + return NULL; + + sum->fl_start = min(l1->fl_start, l2->fl_start); + sum->fl_end = max(l1->fl_end, l2->fl_end); - sum->fl_start = min (l1->fl_start, l2->fl_start); - sum->fl_end = max (l1->fl_end, l2->fl_end); + posix_lock_to_flock(sum, &sum->user_flock); - return sum; + return sum; } /* Subtract two locks */ struct _values { - posix_lock_t *locks[3]; + posix_lock_t *locks[3]; }; /* {big} must always be contained inside {small} */ static struct _values -subtract_locks (posix_lock_t *big, posix_lock_t *small) +subtract_locks(posix_lock_t *big, posix_lock_t *small) { + struct _values v = {.locks = {0, 0, 0}}; - struct _values v = { .locks = {0, 0, 0} }; - - if ((big->fl_start == small->fl_start) && - (big->fl_end == small->fl_end)) { - /* both edges coincide with big */ - v.locks[0] = GF_CALLOC (1, sizeof (posix_lock_t), - gf_locks_mt_posix_lock_t); - if (!v.locks[0]) - goto out; - memcpy (v.locks[0], big, sizeof (posix_lock_t)); - v.locks[0]->fl_type = small->fl_type; - goto done; + if ((big->fl_start == small->fl_start) && (big->fl_end == small->fl_end)) { + /* both edges coincide with big */ + v.locks[0] = __copy_lock(big); + if (!v.locks[0]) { + goto out; } - if ((small->fl_start > big->fl_start) && - (small->fl_end < big->fl_end)) { - /* both edges lie inside big */ - v.locks[0] = GF_CALLOC (1, sizeof (posix_lock_t), - gf_locks_mt_posix_lock_t); - if (!v.locks[0]) - goto out; - - v.locks[1] = GF_CALLOC (1, sizeof (posix_lock_t), - gf_locks_mt_posix_lock_t); - if (!v.locks[1]) - goto out; - - v.locks[2] = GF_CALLOC (1, sizeof (posix_lock_t), - gf_locks_mt_posix_lock_t); - if (!v.locks[1]) - goto out; - - memcpy (v.locks[0], big, sizeof (posix_lock_t)); - v.locks[0]->fl_end = small->fl_start - 1; - - memcpy (v.locks[1], small, sizeof (posix_lock_t)); - - memcpy (v.locks[2], big, sizeof (posix_lock_t)); - v.locks[2]->fl_start = small->fl_end + 1; - goto done; - + v.locks[0]->fl_type = small->fl_type; + v.locks[0]->user_flock.l_type = small->fl_type; + goto done; + } + + if ((small->fl_start > big->fl_start) && (small->fl_end < big->fl_end)) { + /* both edges lie inside big */ + v.locks[0] = __copy_lock(big); + v.locks[1] = __copy_lock(small); + v.locks[2] = __copy_lock(big); + if ((v.locks[0] == NULL) || (v.locks[1] == NULL) || + (v.locks[2] == NULL)) { + goto out; } - /* one edge coincides with big */ - if (small->fl_start == big->fl_start) { - v.locks[0] = GF_CALLOC (1, sizeof (posix_lock_t), - gf_locks_mt_posix_lock_t); - if (!v.locks[0]) - goto out; - - v.locks[1] = GF_CALLOC (1, sizeof (posix_lock_t), - gf_locks_mt_posix_lock_t); - if (!v.locks[1]) - goto out; - - memcpy (v.locks[0], big, sizeof (posix_lock_t)); - v.locks[0]->fl_start = small->fl_end + 1; - - memcpy (v.locks[1], small, sizeof (posix_lock_t)); - goto done; + v.locks[0]->fl_end = small->fl_start - 1; + v.locks[2]->fl_start = small->fl_end + 1; + posix_lock_to_flock(v.locks[0], &v.locks[0]->user_flock); + posix_lock_to_flock(v.locks[2], &v.locks[2]->user_flock); + goto done; + } + + /* one edge coincides with big */ + if (small->fl_start == big->fl_start) { + v.locks[0] = __copy_lock(big); + v.locks[1] = __copy_lock(small); + if ((v.locks[0] == NULL) || (v.locks[1] == NULL)) { + goto out; } - if (small->fl_end == big->fl_end) { - v.locks[0] = GF_CALLOC (1, sizeof (posix_lock_t), - gf_locks_mt_posix_lock_t); - if (!v.locks[0]) - goto out; - - v.locks[1] = GF_CALLOC (1, sizeof (posix_lock_t), - gf_locks_mt_posix_lock_t); - if (!v.locks[1]) - goto out; + v.locks[0]->fl_start = small->fl_end + 1; + posix_lock_to_flock(v.locks[0], &v.locks[0]->user_flock); + goto done; + } - memcpy (v.locks[0], big, sizeof (posix_lock_t)); - v.locks[0]->fl_end = small->fl_start - 1; - - memcpy (v.locks[1], small, sizeof (posix_lock_t)); - goto done; + if (small->fl_end == big->fl_end) { + v.locks[0] = __copy_lock(big); + v.locks[1] = __copy_lock(small); + if ((v.locks[0] == NULL) || (v.locks[1] == NULL)) { + goto out; } - GF_ASSERT (0); - gf_log ("posix-locks", GF_LOG_ERROR, "Unexpected case in subtract_locks"); + v.locks[0]->fl_end = small->fl_start - 1; + posix_lock_to_flock(v.locks[0], &v.locks[0]->user_flock); + goto done; + } + + GF_ASSERT(0); + gf_log("posix-locks", GF_LOG_ERROR, "Unexpected case in subtract_locks"); out: - if (v.locks[0]) { - GF_FREE (v.locks[0]); - v.locks[0] = NULL; - } - if (v.locks[1]) { - GF_FREE (v.locks[1]); - v.locks[1] = NULL; - } - if (v.locks[2]) { - GF_FREE (v.locks[2]); - v.locks[2] = NULL; - } + if (v.locks[0]) { + __destroy_lock(v.locks[0]); + v.locks[0] = NULL; + } + if (v.locks[1]) { + __destroy_lock(v.locks[1]); + v.locks[1] = NULL; + } + if (v.locks[2]) { + __destroy_lock(v.locks[2]); + v.locks[2] = NULL; + } done: - return v; + return v; } static posix_lock_t * -first_conflicting_overlap (pl_inode_t *pl_inode, posix_lock_t *lock) +first_conflicting_overlap(pl_inode_t *pl_inode, posix_lock_t *lock) { - posix_lock_t *l = NULL; - posix_lock_t *conf = NULL; + posix_lock_t *l = NULL; + posix_lock_t *conf = NULL; - pthread_mutex_lock (&pl_inode->mutex); + pthread_mutex_lock(&pl_inode->mutex); + { + list_for_each_entry(l, &pl_inode->ext_list, list) { - list_for_each_entry (l, &pl_inode->ext_list, list) { - if (l->blocked) - continue; - - if (locks_overlap (l, lock)) { - if (same_owner (l, lock)) - continue; - - if ((l->fl_type == F_WRLCK) || - (lock->fl_type == F_WRLCK)) { - conf = l; - goto unlock; - } - } + if (l->blocked) + continue; + + if (locks_overlap(l, lock)) { + if (same_owner(l, lock)) + continue; + + if ((l->fl_type == F_WRLCK) || (lock->fl_type == F_WRLCK)) { + conf = l; + goto unlock; } + } } + } unlock: - pthread_mutex_unlock (&pl_inode->mutex); + pthread_mutex_unlock(&pl_inode->mutex); - return conf; + return conf; } /* @@ -733,351 +789,803 @@ unlock: If {begin} is NULL, then start from the beginning of the list */ static posix_lock_t * -first_overlap (pl_inode_t *pl_inode, posix_lock_t *lock) +first_overlap(pl_inode_t *pl_inode, posix_lock_t *lock) { - posix_lock_t *l = NULL; + posix_lock_t *l = NULL; - list_for_each_entry (l, &pl_inode->ext_list, list) { - if (l->blocked) - continue; + list_for_each_entry(l, &pl_inode->ext_list, list) + { + if (l->blocked) + continue; - if (locks_overlap (l, lock)) - return l; - } + if (locks_overlap(l, lock)) + return l; + } - return NULL; + return NULL; } - - /* Return true if lock is grantable */ static int -__is_lock_grantable (pl_inode_t *pl_inode, posix_lock_t *lock) +__is_lock_grantable(pl_inode_t *pl_inode, posix_lock_t *lock) { - posix_lock_t *l = NULL; - int ret = 1; - - list_for_each_entry (l, &pl_inode->ext_list, list) { - if (!l->blocked && locks_overlap (lock, l)) { - if (((l->fl_type == F_WRLCK) - || (lock->fl_type == F_WRLCK)) - && (lock->fl_type != F_UNLCK) - && !same_owner (l, lock)) { - ret = 0; - break; - } - } + posix_lock_t *l = NULL; + int ret = 1; + + list_for_each_entry(l, &pl_inode->ext_list, list) + { + if (!l->blocked && locks_overlap(lock, l)) { + if (((l->fl_type == F_WRLCK) || (lock->fl_type == F_WRLCK)) && + (lock->fl_type != F_UNLCK) && !same_owner(l, lock)) { + ret = 0; + break; + } } - return ret; + } + return ret; } - -extern void do_blocked_rw (pl_inode_t *); - +extern void +do_blocked_rw(pl_inode_t *); static void -__insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock) +__insert_and_merge(pl_inode_t *pl_inode, posix_lock_t *lock) { - posix_lock_t *conf = NULL; - posix_lock_t *t = NULL; - posix_lock_t *sum = NULL; - int i = 0; - struct _values v = { .locks = {0, 0, 0} }; - - list_for_each_entry_safe (conf, t, &pl_inode->ext_list, list) { - if (conf->blocked) - continue; - if (!locks_overlap (conf, lock)) - continue; + posix_lock_t *conf = NULL; + posix_lock_t *t = NULL; + posix_lock_t *sum = NULL; + int i = 0; + struct _values v = {.locks = {0, 0, 0}}; + + list_for_each_entry_safe(conf, t, &pl_inode->ext_list, list) + { + if (conf->blocked) + continue; + if (!locks_overlap(conf, lock)) + continue; + + if (same_owner(conf, lock)) { + if (conf->fl_type == lock->fl_type && + conf->lk_flags == lock->lk_flags) { + sum = add_locks(lock, conf, lock); + + __delete_lock(conf); + __destroy_lock(conf); + + __destroy_lock(lock); + INIT_LIST_HEAD(&sum->list); + posix_lock_to_flock(sum, &sum->user_flock); + __insert_and_merge(pl_inode, sum); - if (same_owner (conf, lock)) { - if (conf->fl_type == lock->fl_type) { - sum = add_locks (lock, conf); + return; + } else { + sum = add_locks(lock, conf, conf); - sum->fl_type = lock->fl_type; - sum->client = lock->client; - sum->fd_num = lock->fd_num; - sum->client_pid = lock->client_pid; - sum->owner = lock->owner; + v = subtract_locks(sum, lock); - __delete_lock (pl_inode, conf); - __destroy_lock (conf); + __delete_lock(conf); + __destroy_lock(conf); - __destroy_lock (lock); - INIT_LIST_HEAD (&sum->list); - posix_lock_to_flock (sum, &sum->user_flock); - __insert_and_merge (pl_inode, sum); + __delete_lock(lock); + __destroy_lock(lock); - return; - } else { - sum = add_locks (lock, conf); + __destroy_lock(sum); - sum->fl_type = conf->fl_type; - sum->client = conf->client; - sum->fd_num = conf->fd_num; - sum->client_pid = conf->client_pid; - sum->owner = conf->owner; + for (i = 0; i < 3; i++) { + if (!v.locks[i]) + continue; - v = subtract_locks (sum, lock); + __insert_and_merge(pl_inode, v.locks[i]); + } - __delete_lock (pl_inode, conf); - __destroy_lock (conf); + __delete_unlck_locks(pl_inode); + return; + } + } - __delete_lock (pl_inode, lock); - __destroy_lock (lock); + if (lock->fl_type == F_UNLCK) { + continue; + } - __destroy_lock (sum); + if ((conf->fl_type == F_RDLCK) && (lock->fl_type == F_RDLCK)) { + __insert_lock(pl_inode, lock); + return; + } + } + + /* no conflicts, so just insert */ + if (lock->fl_type != F_UNLCK) { + __insert_lock(pl_inode, lock); + } else { + __destroy_lock(lock); + } +} - for (i = 0; i < 3; i++) { - if (!v.locks[i]) - continue; +void +__grant_blocked_locks(xlator_t *this, pl_inode_t *pl_inode, + struct list_head *granted) +{ + struct list_head tmp_list; + posix_lock_t *l = NULL; + posix_lock_t *tmp = NULL; + posix_lock_t *conf = NULL; + + INIT_LIST_HEAD(&tmp_list); + + list_for_each_entry_safe(l, tmp, &pl_inode->ext_list, list) + { + if (l->blocked) { + conf = first_overlap(pl_inode, l); + if (conf) + continue; + + l->blocked = 0; + list_move_tail(&l->list, &tmp_list); + } + } - INIT_LIST_HEAD (&v.locks[i]->list); - posix_lock_to_flock (v.locks[i], - &v.locks[i]->user_flock); - __insert_and_merge (pl_inode, - v.locks[i]); - } + list_for_each_entry_safe(l, tmp, &tmp_list, list) + { + list_del_init(&l->list); - __delete_unlck_locks (pl_inode); - return; - } - } + if (__is_lock_grantable(pl_inode, l)) { + conf = GF_CALLOC(1, sizeof(*conf), gf_locks_mt_posix_lock_t); - if (lock->fl_type == F_UNLCK) { - continue; - } + if (!conf) { + l->blocked = 1; + __insert_lock(pl_inode, l); + continue; + } - if ((conf->fl_type == F_RDLCK) && (lock->fl_type == F_RDLCK)) { - __insert_lock (pl_inode, lock); - return; - } - } + conf->frame = l->frame; + l->frame = NULL; + + posix_lock_to_flock(l, &conf->user_flock); + + gf_log(this->name, GF_LOG_TRACE, + "%s (pid=%d) lk-owner:%s %" PRId64 " - %" PRId64 + " => Granted", + l->fl_type == F_UNLCK ? "Unlock" : "Lock", l->client_pid, + lkowner_utoa(&l->owner), l->user_flock.l_start, + l->user_flock.l_len); - /* no conflicts, so just insert */ - if (lock->fl_type != F_UNLCK) { - __insert_lock (pl_inode, lock); + __insert_and_merge(pl_inode, l); + + list_add(&conf->list, granted); } else { - __destroy_lock (lock); + l->blocked = 1; + __insert_lock(pl_inode, l); } + } } - void -__grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode, struct list_head *granted) +grant_blocked_locks(xlator_t *this, pl_inode_t *pl_inode) { - struct list_head tmp_list; - posix_lock_t *l = NULL; - posix_lock_t *tmp = NULL; - posix_lock_t *conf = NULL; + struct list_head granted_list; + posix_lock_t *tmp = NULL; + posix_lock_t *lock = NULL; + pl_local_t *local = NULL; + INIT_LIST_HEAD(&granted_list); + + pthread_mutex_lock(&pl_inode->mutex); + { + __grant_blocked_locks(this, pl_inode, &granted_list); + } + pthread_mutex_unlock(&pl_inode->mutex); + + list_for_each_entry_safe(lock, tmp, &granted_list, list) + { + list_del_init(&lock->list); + + pl_trace_out(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock, + 0, 0, NULL); + local = lock->frame->local; + PL_STACK_UNWIND_AND_FREE(local, lk, lock->frame, 0, 0, + &lock->user_flock, NULL); + __destroy_lock(lock); + } + + return; +} - INIT_LIST_HEAD (&tmp_list); +static int +pl_send_prelock_unlock(xlator_t *this, pl_inode_t *pl_inode, + posix_lock_t *old_lock) +{ + struct gf_flock flock = { + 0, + }; + posix_lock_t *unlock_lock = NULL; + int32_t op_errno = 0; - list_for_each_entry_safe (l, tmp, &pl_inode->ext_list, list) { - if (l->blocked) { - conf = first_overlap (pl_inode, l); - if (conf) - continue; + struct list_head granted_list; + posix_lock_t *tmp = NULL; + posix_lock_t *lock = NULL; + pl_local_t *local = NULL; - l->blocked = 0; - list_move_tail (&l->list, &tmp_list); - } - } + int ret = -1; - list_for_each_entry_safe (l, tmp, &tmp_list, list) { - list_del_init (&l->list); + INIT_LIST_HEAD(&granted_list); - if (__is_lock_grantable (pl_inode, l)) { - conf = GF_CALLOC (1, sizeof (*conf), - gf_locks_mt_posix_lock_t); + flock.l_type = F_UNLCK; + flock.l_whence = old_lock->user_flock.l_whence; + flock.l_start = old_lock->user_flock.l_start; + flock.l_len = old_lock->user_flock.l_len; + flock.l_pid = old_lock->user_flock.l_pid; - if (!conf) { - l->blocked = 1; - __insert_lock (pl_inode, l); - continue; - } + unlock_lock = new_posix_lock(&flock, old_lock->client, old_lock->client_pid, + &old_lock->owner, old_lock->fd, + old_lock->lk_flags, 0, &op_errno); + GF_VALIDATE_OR_GOTO(this->name, unlock_lock, out); + ret = 0; - conf->frame = l->frame; - l->frame = NULL; + __insert_and_merge(pl_inode, unlock_lock); - posix_lock_to_flock (l, &conf->user_flock); + __grant_blocked_locks(this, pl_inode, &granted_list); - gf_log (this->name, GF_LOG_TRACE, - "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => Granted", - l->fl_type == F_UNLCK ? "Unlock" : "Lock", - l->client_pid, lkowner_utoa (&l->owner), - l->user_flock.l_start, - l->user_flock.l_len); + list_for_each_entry_safe(lock, tmp, &granted_list, list) + { + list_del_init(&lock->list); - __insert_and_merge (pl_inode, l); + pl_trace_out(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock, + 0, 0, NULL); + local = lock->frame->local; + PL_STACK_UNWIND_AND_FREE(local, lk, lock->frame, 0, 0, + &lock->user_flock, NULL); + __destroy_lock(lock); + } - list_add (&conf->list, granted); - } else { - l->blocked = 1; - __insert_lock (pl_inode, l); - } - } +out: + return ret; } +int +pl_setlk(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock, + int can_block) +{ + int ret = 0; + + errno = 0; -void -grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode) + pthread_mutex_lock(&pl_inode->mutex); + { + /* Send unlock before the actual lock to + prevent lock upgrade / downgrade + problems only if: + - it is a blocking call + - it has other conflicting locks + */ + + if (can_block && !(__is_lock_grantable(pl_inode, lock))) { + ret = pl_send_prelock_unlock(this, pl_inode, lock); + if (ret) + gf_log(this->name, GF_LOG_DEBUG, + "Could not send pre-lock " + "unlock"); + } + + if (__is_lock_grantable(pl_inode, lock)) { + if (pl_metalock_is_active(pl_inode)) { + __pl_queue_lock(pl_inode, lock); + pthread_mutex_unlock(&pl_inode->mutex); + ret = -2; + goto out; + } + gf_log(this->name, GF_LOG_TRACE, + "%s (pid=%d) lk-owner:%s %" PRId64 " - %" PRId64 " => OK", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, lkowner_utoa(&lock->owner), + lock->user_flock.l_start, lock->user_flock.l_len); + __insert_and_merge(pl_inode, lock); + } else if (can_block) { + if (pl_metalock_is_active(pl_inode)) { + __pl_queue_lock(pl_inode, lock); + pthread_mutex_unlock(&pl_inode->mutex); + ret = -2; + goto out; + } + gf_log(this->name, GF_LOG_TRACE, + "%s (pid=%d) lk-owner:%s %" PRId64 " - %" PRId64 + " => Blocked", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, lkowner_utoa(&lock->owner), + lock->user_flock.l_start, lock->user_flock.l_len); + + pl_trace_block(this, lock->frame, NULL, NULL, F_SETLKW, + &lock->user_flock, NULL); + + lock->blocked = 1; + __insert_lock(pl_inode, lock); + ret = -1; + } else { + gf_log(this->name, GF_LOG_TRACE, + "%s (pid=%d) lk-owner:%s %" PRId64 " - %" PRId64 " => NOK", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, lkowner_utoa(&lock->owner), + lock->user_flock.l_start, lock->user_flock.l_len); + errno = EAGAIN; + ret = -1; + } + } + pthread_mutex_unlock(&pl_inode->mutex); + + grant_blocked_locks(this, pl_inode); + + do_blocked_rw(pl_inode); + +out: + return ret; +} + +posix_lock_t * +pl_getlk(pl_inode_t *pl_inode, posix_lock_t *lock) { - struct list_head granted_list; - posix_lock_t *tmp = NULL; - posix_lock_t *lock = NULL; + posix_lock_t *conf = first_conflicting_overlap(pl_inode, lock); + if (conf == NULL) { + lock->fl_type = F_UNLCK; + return lock; + } + + return conf; +} - INIT_LIST_HEAD (&granted_list); +gf_boolean_t +pl_does_monkey_want_stuck_lock() +{ + long int monkey_unlock_rand = 0; + long int monkey_unlock_rand_rem = 0; + + /* coverity[DC.WEAK_CRYPTO] */ + monkey_unlock_rand = random(); + monkey_unlock_rand_rem = monkey_unlock_rand % 100; + if (monkey_unlock_rand_rem == 0) + return _gf_true; + return _gf_false; +} - pthread_mutex_lock (&pl_inode->mutex); +int +pl_lock_preempt(pl_inode_t *pl_inode, posix_lock_t *reqlock) +{ + posix_lock_t *lock = NULL; + posix_lock_t *i = NULL; + pl_rw_req_t *rw = NULL; + pl_rw_req_t *itr = NULL; + struct list_head unwind_blist = { + 0, + }; + struct list_head unwind_rw_list = { + 0, + }; + int ret = 0; + + INIT_LIST_HEAD(&unwind_blist); + INIT_LIST_HEAD(&unwind_rw_list); + + pthread_mutex_lock(&pl_inode->mutex); + { + /* + - go through the lock list + - remove all locks from different owners + - same owner locks will be added or substracted based on + the new request + - add the new lock + */ + list_for_each_entry_safe(lock, i, &pl_inode->ext_list, list) { - __grant_blocked_locks (this, pl_inode, &granted_list); + if (lock->blocked) { + list_del_init(&lock->list); + list_add(&lock->list, &unwind_blist); + continue; + } + + if (locks_overlap(lock, reqlock)) { + if (same_owner(lock, reqlock)) + continue; + + /* remove conflicting locks */ + list_del_init(&lock->list); + __delete_lock(lock); + __destroy_lock(lock); + } } - pthread_mutex_unlock (&pl_inode->mutex); - list_for_each_entry_safe (lock, tmp, &granted_list, list) { - list_del_init (&lock->list); + __insert_and_merge(pl_inode, reqlock); - pl_trace_out (this, lock->frame, NULL, NULL, F_SETLKW, - &lock->user_flock, 0, 0, NULL); + list_for_each_entry_safe(rw, itr, &pl_inode->rw_list, list) + { + list_del_init(&rw->list); + list_add(&rw->list, &unwind_rw_list); + } + } + pthread_mutex_unlock(&pl_inode->mutex); + + /* unwind blocked locks */ + list_for_each_entry_safe(lock, i, &unwind_blist, list) + { + PL_STACK_UNWIND_AND_FREE(((pl_local_t *)lock->frame->local), lk, + lock->frame, -1, EBUSY, &lock->user_flock, + NULL); + __destroy_lock(lock); + } + + /* unwind blocked IOs */ + list_for_each_entry_safe(rw, itr, &unwind_rw_list, list) + { + pl_clean_local(rw->stub->frame->local); + call_unwind_error(rw->stub, -1, EBUSY); + } + + return ret; +} - STACK_UNWIND_STRICT (lk, lock->frame, 0, 0, - &lock->user_flock, NULL); +/* Return true in case we need to ensure mandatory-locking + * semantics under different modes. + */ +gf_boolean_t +pl_is_mandatory_locking_enabled(pl_inode_t *pl_inode) +{ + posix_locks_private_t *priv = THIS->private; - GF_FREE (lock); - } + if (priv->mandatory_mode == MLK_FILE_BASED && pl_inode->mandatory) + return _gf_true; + else if (priv->mandatory_mode == MLK_FORCED || + priv->mandatory_mode == MLK_OPTIMAL) + return _gf_true; - return; + return _gf_false; } -static int -pl_send_prelock_unlock (xlator_t *this, pl_inode_t *pl_inode, - posix_lock_t *old_lock) +void +pl_clean_local(pl_local_t *local) { - struct gf_flock flock = {0,}; - posix_lock_t *unlock_lock = NULL; + if (!local) + return; - struct list_head granted_list; - posix_lock_t *tmp = NULL; - posix_lock_t *lock = NULL; + if (local->inodelk_dom_count_req) + data_unref(local->inodelk_dom_count_req); + loc_wipe(&local->loc[0]); + loc_wipe(&local->loc[1]); + if (local->fd) + fd_unref(local->fd); + if (local->inode) + inode_unref(local->inode); + mem_put(local); +} + +/* +TODO: detach local initialization from PL_LOCAL_GET_REQUESTS and add it here +*/ +int +pl_local_init(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd) +{ + pl_local_t *local = NULL; + + if (!loc && !fd) { + return -1; + } + + if (!frame->local) { + local = mem_get0(this->local_pool); + if (!local) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0, + "mem allocation failed"); + return -1; + } - int ret = -1; + local->inode = (loc ? inode_ref(loc->inode) : inode_ref(fd->inode)); - INIT_LIST_HEAD (&granted_list); + frame->local = local; + } - flock.l_type = F_UNLCK; - flock.l_whence = old_lock->user_flock.l_whence; - flock.l_start = old_lock->user_flock.l_start; - flock.l_len = old_lock->user_flock.l_len; + return 0; +} +gf_boolean_t +pl_is_lk_owner_valid(gf_lkowner_t *owner, client_t *client) +{ + if (client && (client->opversion < GD_OP_VERSION_7_0)) { + return _gf_true; + } + + if (is_lk_owner_null(owner)) { + return _gf_false; + } + return _gf_true; +} - unlock_lock = new_posix_lock (&flock, old_lock->client, - old_lock->client_pid, &old_lock->owner, - old_lock->fd); - GF_VALIDATE_OR_GOTO (this->name, unlock_lock, out); - ret = 0; +static int32_t +pl_inode_from_loc(loc_t *loc, inode_t **pinode) +{ + inode_t *inode = NULL; + int32_t error = 0; + + if (loc->inode != NULL) { + inode = inode_ref(loc->inode); + goto done; + } + + if (loc->parent == NULL) { + error = EINVAL; + goto done; + } + + if (!gf_uuid_is_null(loc->gfid)) { + inode = inode_find(loc->parent->table, loc->gfid); + if (inode != NULL) { + goto done; + } + } - __insert_and_merge (pl_inode, unlock_lock); + if (loc->name == NULL) { + error = EINVAL; + goto done; + } - __grant_blocked_locks (this, pl_inode, &granted_list); + inode = inode_grep(loc->parent->table, loc->parent, loc->name); + if (inode == NULL) { + /* We haven't found any inode. This means that the file doesn't exist + * or that even if it exists, we don't have any knowledge about it, so + * we don't have locks on it either, which is fine for our purposes. */ + goto done; + } - list_for_each_entry_safe (lock, tmp, &granted_list, list) { - list_del_init (&lock->list); +done: + *pinode = inode; - pl_trace_out (this, lock->frame, NULL, NULL, F_SETLKW, - &lock->user_flock, 0, 0, NULL); + return error; +} - STACK_UNWIND_STRICT (lk, lock->frame, 0, 0, - &lock->user_flock, NULL); +static gf_boolean_t +pl_inode_has_owners(xlator_t *xl, client_t *client, pl_inode_t *pl_inode, + struct timespec *now, struct list_head *contend) +{ + pl_dom_list_t *dom; + pl_inode_lock_t *lock; + gf_boolean_t has_owners = _gf_false; - GF_FREE (lock); + list_for_each_entry(dom, &pl_inode->dom_list, inode_list) + { + list_for_each_entry(lock, &dom->inodelk_list, list) + { + /* If the lock belongs to the same client, we assume it's related + * to the same operation, so we allow the removal to continue. */ + if (lock->client == client) { + continue; + } + /* If the lock belongs to an internal process, we don't block the + * removal. */ + if (lock->client_pid < 0) { + continue; + } + if (contend == NULL) { + return _gf_true; + } + has_owners = _gf_true; + inodelk_contention_notify_check(xl, lock, now, contend); } + } -out: - return ret; + return has_owners; } -int -pl_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock, - int can_block) +int32_t +pl_inode_remove_prepare(xlator_t *xl, call_frame_t *frame, loc_t *loc, + pl_inode_t **ppl_inode, struct list_head *contend) { - int ret = 0; + struct timespec now; + inode_t *inode; + pl_inode_t *pl_inode; + int32_t error; + + pl_inode = NULL; + + error = pl_inode_from_loc(loc, &inode); + if ((error != 0) || (inode == NULL)) { + goto done; + } + + pl_inode = pl_inode_get(xl, inode, NULL); + if (pl_inode == NULL) { + inode_unref(inode); + error = ENOMEM; + goto done; + } + + /* pl_inode_from_loc() already increments ref count for inode, so + * we only assign here our reference. */ + pl_inode->inode = inode; + + timespec_now(&now); + + pthread_mutex_lock(&pl_inode->mutex); + + if (pl_inode->removed) { + error = ESTALE; + goto unlock; + } + + if (pl_inode_has_owners(xl, frame->root->client, pl_inode, &now, contend)) { + error = -1; + /* We skip the unlock here because the caller must create a stub when + * we return -1 and do a call to pl_inode_remove_complete(), which + * assumes the lock is still acquired and will release it once + * everything else is prepared. */ + goto done; + } + + pl_inode->is_locked = _gf_true; + pl_inode->remove_running++; - errno = 0; +unlock: + pthread_mutex_unlock(&pl_inode->mutex); - pthread_mutex_lock (&pl_inode->mutex); - { - /* Send unlock before the actual lock to - prevent lock upgrade / downgrade - problems only if: - - it is a blocking call - - it has other conflicting locks - */ - - if (can_block && - !(__is_lock_grantable (pl_inode, lock))) { - ret = pl_send_prelock_unlock (this, pl_inode, - lock); - if (ret) - gf_log (this->name, GF_LOG_DEBUG, - "Could not send pre-lock " - "unlock"); - } +done: + *ppl_inode = pl_inode; - if (__is_lock_grantable (pl_inode, lock)) { - gf_log (this->name, GF_LOG_TRACE, - "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => OK", - lock->fl_type == F_UNLCK ? "Unlock" : "Lock", - lock->client_pid, - lkowner_utoa (&lock->owner), - lock->user_flock.l_start, - lock->user_flock.l_len); - __insert_and_merge (pl_inode, lock); - } else if (can_block) { - gf_log (this->name, GF_LOG_TRACE, - "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => Blocked", - lock->fl_type == F_UNLCK ? "Unlock" : "Lock", - lock->client_pid, - lkowner_utoa (&lock->owner), - lock->user_flock.l_start, - lock->user_flock.l_len); - lock->blocked = 1; - __insert_lock (pl_inode, lock); - ret = -1; - } else { - gf_log (this->name, GF_LOG_TRACE, - "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => NOK", - lock->fl_type == F_UNLCK ? "Unlock" : "Lock", - lock->client_pid, - lkowner_utoa (&lock->owner), - lock->user_flock.l_start, - lock->user_flock.l_len); - errno = EAGAIN; - ret = -1; - } + return error; +} + +int32_t +pl_inode_remove_complete(xlator_t *xl, pl_inode_t *pl_inode, call_stub_t *stub, + struct list_head *contend) +{ + pl_inode_lock_t *lock; + int32_t error = -1; + + if (stub != NULL) { + list_add_tail(&stub->list, &pl_inode->waiting); + pl_inode->is_locked = _gf_true; + } else { + error = ENOMEM; + + while (!list_empty(contend)) { + lock = list_first_entry(contend, pl_inode_lock_t, list); + list_del_init(&lock->list); + __pl_inodelk_unref(lock); } - pthread_mutex_unlock (&pl_inode->mutex); + } - grant_blocked_locks (this, pl_inode); + pthread_mutex_unlock(&pl_inode->mutex); - do_blocked_rw (pl_inode); + if (error < 0) { + inodelk_contention_notify(xl, contend); + } - return ret; + inode_unref(pl_inode->inode); + + return error; } +void +pl_inode_remove_wake(struct list_head *list) +{ + call_stub_t *stub; + + while (!list_empty(list)) { + stub = list_first_entry(list, call_stub_t, list); + list_del_init(&stub->list); -posix_lock_t * -pl_getlk (pl_inode_t *pl_inode, posix_lock_t *lock) + call_resume(stub); + } +} + +void +pl_inode_remove_cbk(xlator_t *xl, pl_inode_t *pl_inode, int32_t error) { - posix_lock_t *conf = NULL; + struct list_head contend, granted; + struct timespec now; + pl_dom_list_t *dom; + + if (pl_inode == NULL) { + return; + } - conf = first_conflicting_overlap (pl_inode, lock); + INIT_LIST_HEAD(&contend); + INIT_LIST_HEAD(&granted); + timespec_now(&now); - if (conf == NULL) { - lock->fl_type = F_UNLCK; - return lock; + pthread_mutex_lock(&pl_inode->mutex); + + if (error == 0) { + if (pl_inode->links >= 0) { + pl_inode->links--; + } + if (pl_inode->links == 0) { + pl_inode->removed = _gf_true; + } + } + + pl_inode->remove_running--; + + if ((pl_inode->remove_running == 0) && list_empty(&pl_inode->waiting)) { + pl_inode->is_locked = _gf_false; + + list_for_each_entry(dom, &pl_inode->dom_list, inode_list) + { + __grant_blocked_inode_locks(xl, pl_inode, &granted, dom, &now, + &contend); } + } + + pthread_mutex_unlock(&pl_inode->mutex); - return conf; + unwind_granted_inodes(xl, pl_inode, &granted); + + inodelk_contention_notify(xl, &contend); + + inode_unref(pl_inode->inode); +} + +void +pl_inode_remove_unlocked(xlator_t *xl, pl_inode_t *pl_inode, + struct list_head *list) +{ + call_stub_t *stub, *tmp; + + if (!pl_inode->is_locked) { + return; + } + + list_for_each_entry_safe(stub, tmp, &pl_inode->waiting, list) + { + if (!pl_inode_has_owners(xl, stub->frame->root->client, pl_inode, NULL, + NULL)) { + list_move_tail(&stub->list, list); + } + } } +/* This function determines if an inodelk attempt can be done now or it needs + * to wait. + * + * Possible return values: + * < 0: An error occurred. Currently only -ESTALE can be returned if the + * inode has been deleted previously by unlink/rmdir/rename + * = 0: The lock can be attempted. + * > 0: The lock needs to wait because a conflicting remove operation is + * ongoing. + */ +int32_t +pl_inode_remove_inodelk(pl_inode_t *pl_inode, pl_inode_lock_t *lock) +{ + pl_dom_list_t *dom; + pl_inode_lock_t *ilock; + + /* If the inode has been deleted, we won't allow any lock. */ + if (pl_inode->removed) { + return -ESTALE; + } + + /* We only synchronize with locks made for regular operations coming from + * the user. Locks done for internal purposes are hard to control and could + * lead to long delays or deadlocks quite easily. */ + if (lock->client_pid < 0) { + return 0; + } + if (!pl_inode->is_locked) { + return 0; + } + if (pl_inode->remove_running > 0) { + return 1; + } + + list_for_each_entry(dom, &pl_inode->dom_list, inode_list) + { + list_for_each_entry(ilock, &dom->inodelk_list, list) + { + /* If a lock from the same client is already granted, we allow this + * one to continue. This is necessary to prevent deadlocks when + * multiple locks are taken for the same operation. + * + * On the other side it's unlikely that the same client sends + * completely unrelated locks for the same inode. + */ + if (ilock->client == lock->client) { + return 0; + } + } + } + + return 1; +} diff --git a/xlators/features/locks/src/common.h b/xlators/features/locks/src/common.h index 5ec630ee857..281223bf3b8 100644 --- a/xlators/features/locks/src/common.h +++ b/xlators/features/locks/src/common.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + Copyright (c) 2006-2012, 2015-2016 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. This file is licensed to you under your choice of the GNU Lesser @@ -10,149 +10,253 @@ #ifndef __COMMON_H__ #define __COMMON_H__ -#include "lkowner.h" /*dump locks format strings */ -#define RANGE_FMT "type=%s, whence=%hd, start=%llu, len=%llu" -#define ENTRY_FMT "type=%s on basename=%s" -#define DUMP_GEN_FMT "pid = %llu, owner=%s, client=%p" -#define GRNTD_AT "granted at %s" -#define BLKD_AT "blocked at %s" -#define CONN_ID "connection-id=%s" -#define DUMP_BLKD_FMT DUMP_GEN_FMT", "CONN_ID", "BLKD_AT -#define DUMP_GRNTD_FMT DUMP_GEN_FMT", "CONN_ID", "GRNTD_AT -#define DUMP_BLKD_GRNTD_FMT DUMP_GEN_FMT", "CONN_ID", "BLKD_AT", "GRNTD_AT - -#define ENTRY_BLKD_FMT ENTRY_FMT", "DUMP_BLKD_FMT -#define ENTRY_GRNTD_FMT ENTRY_FMT", "DUMP_GRNTD_FMT -#define ENTRY_BLKD_GRNTD_FMT ENTRY_FMT", "DUMP_BLKD_GRNTD_FMT - -#define RANGE_BLKD_FMT RANGE_FMT", "DUMP_BLKD_FMT -#define RANGE_GRNTD_FMT RANGE_FMT", "DUMP_GRNTD_FMT -#define RANGE_BLKD_GRNTD_FMT RANGE_FMT", "DUMP_BLKD_GRNTD_FMT +#define RANGE_FMT "type=%s, whence=%hd, start=%llu, len=%llu" +#define ENTRY_FMT "type=%s on basename=%s" +#define DUMP_GEN_FMT "pid = %llu, owner=%s, client=%p" +#define GRNTD_AT "granted at %s" +#define BLKD_AT "blocked at %s" +#define CONN_ID "connection-id=%s" +#define DUMP_BLKD_FMT DUMP_GEN_FMT ", " CONN_ID ", " BLKD_AT +#define DUMP_GRNTD_FMT DUMP_GEN_FMT ", " CONN_ID ", " GRNTD_AT +#define DUMP_BLKD_GRNTD_FMT DUMP_GEN_FMT ", " CONN_ID ", " BLKD_AT ", " GRNTD_AT + +#define ENTRY_BLKD_FMT ENTRY_FMT ", " DUMP_BLKD_FMT +#define ENTRY_GRNTD_FMT ENTRY_FMT ", " DUMP_GRNTD_FMT +#define ENTRY_BLKD_GRNTD_FMT ENTRY_FMT ", " DUMP_BLKD_GRNTD_FMT + +#define RANGE_BLKD_FMT RANGE_FMT ", " DUMP_BLKD_FMT +#define RANGE_GRNTD_FMT RANGE_FMT ", " DUMP_GRNTD_FMT +#define RANGE_BLKD_GRNTD_FMT RANGE_FMT ", " DUMP_BLKD_GRNTD_FMT #define SET_FLOCK_PID(flock, lock) ((flock)->l_pid = lock->client_pid) +#define PL_STACK_UNWIND_AND_FREE(__local, fop, frame, op_ret, params...) \ + do { \ + frame->local = NULL; \ + STACK_UNWIND_STRICT(fop, frame, op_ret, params); \ + if (__local) { \ + if (__local->inodelk_dom_count_req) \ + data_unref(__local->inodelk_dom_count_req); \ + loc_wipe(&__local->loc[0]); \ + loc_wipe(&__local->loc[1]); \ + if (__local->fd) \ + fd_unref(__local->fd); \ + if (__local->inode) \ + inode_unref(__local->inode); \ + if (__local->xdata) { \ + dict_unref(__local->xdata); \ + __local->xdata = NULL; \ + } \ + mem_put(__local); \ + } \ + } while (0) posix_lock_t * -new_posix_lock (struct gf_flock *flock, client_t *client, pid_t client_pid, - gf_lkowner_t *owner, fd_t *fd); +new_posix_lock(struct gf_flock *flock, client_t *client, pid_t client_pid, + gf_lkowner_t *owner, fd_t *fd, uint32_t lk_flags, int blocking, + int32_t *op_errno); pl_inode_t * -pl_inode_get (xlator_t *this, inode_t *inode); +pl_inode_get(xlator_t *this, inode_t *inode, pl_local_t *local); posix_lock_t * -pl_getlk (pl_inode_t *inode, posix_lock_t *lock); +pl_getlk(pl_inode_t *inode, posix_lock_t *lock); int -pl_setlk (xlator_t *this, pl_inode_t *inode, posix_lock_t *lock, - int can_block); +pl_setlk(xlator_t *this, pl_inode_t *inode, posix_lock_t *lock, int can_block); + +int +pl_lock_preempt(pl_inode_t *pl_inode, posix_lock_t *reqlock); void -grant_blocked_locks (xlator_t *this, pl_inode_t *inode); +grant_blocked_locks(xlator_t *this, pl_inode_t *inode); void -posix_lock_to_flock (posix_lock_t *lock, struct gf_flock *flock); +posix_lock_to_flock(posix_lock_t *lock, struct gf_flock *flock); int -locks_overlap (posix_lock_t *l1, posix_lock_t *l2); +locks_overlap(posix_lock_t *l1, posix_lock_t *l2); int -same_owner (posix_lock_t *l1, posix_lock_t *l2); +same_owner(posix_lock_t *l1, posix_lock_t *l2); -void __delete_lock (pl_inode_t *, posix_lock_t *); +void +__delete_lock(posix_lock_t *); -void __destroy_lock (posix_lock_t *); +void +__destroy_lock(posix_lock_t *); pl_dom_list_t * -get_domain (pl_inode_t *pl_inode, const char *volume); +get_domain(pl_inode_t *pl_inode, const char *volume); void -grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode, - pl_dom_list_t *dom); +grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode, + pl_dom_list_t *dom, struct timespec *now, + struct list_head *contend); void -__delete_inode_lock (pl_inode_lock_t *lock); +inodelk_contention_notify(xlator_t *this, struct list_head *contend); void -__pl_inodelk_unref (pl_inode_lock_t *lock); +__delete_inode_lock(pl_inode_lock_t *lock); void -grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode, - pl_dom_list_t *dom); +__pl_inodelk_unref(pl_inode_lock_t *lock); -void pl_update_refkeeper (xlator_t *this, inode_t *inode); +void +__grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode, + struct list_head *granted, pl_dom_list_t *dom, + struct timespec *now, struct list_head *contend); + +void +unwind_granted_inodes(xlator_t *this, pl_inode_t *pl_inode, + struct list_head *granted); + +void +grant_blocked_entry_locks(xlator_t *this, pl_inode_t *pl_inode, + pl_dom_list_t *dom, struct timespec *now, + struct list_head *contend); + +void +entrylk_contention_notify(xlator_t *this, struct list_head *contend); + +void +pl_update_refkeeper(xlator_t *this, inode_t *inode); int32_t -__get_inodelk_count (xlator_t *this, pl_inode_t *pl_inode, char *domname); +__get_inodelk_count(xlator_t *this, pl_inode_t *pl_inode, char *domname); int32_t -get_inodelk_count (xlator_t *this, inode_t *inode, char *domname); +get_inodelk_count(xlator_t *this, inode_t *inode, char *domname); int32_t -__get_entrylk_count (xlator_t *this, pl_inode_t *pl_inode); +__get_entrylk_count(xlator_t *this, pl_inode_t *pl_inode); int32_t -get_entrylk_count (xlator_t *this, inode_t *inode); +get_entrylk_count(xlator_t *this, inode_t *inode); -void pl_trace_in (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc, - int cmd, struct gf_flock *flock, const char *domain); +void +pl_trace_in(xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc, int cmd, + struct gf_flock *flock, const char *domain); -void pl_trace_out (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc, - int cmd, struct gf_flock *flock, int op_ret, int op_errno, const char *domain); +void +pl_trace_out(xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc, int cmd, + struct gf_flock *flock, int op_ret, int op_errno, + const char *domain); -void pl_trace_block (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc, - int cmd, struct gf_flock *flock, const char *domain); +void +pl_trace_block(xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc, + int cmd, struct gf_flock *flock, const char *domain); -void pl_trace_flush (xlator_t *this, call_frame_t *frame, fd_t *fd); +void +pl_trace_flush(xlator_t *this, call_frame_t *frame, fd_t *fd); -void entrylk_trace_in (xlator_t *this, call_frame_t *frame, const char *volume, - fd_t *fd, loc_t *loc, const char *basename, - entrylk_cmd cmd, entrylk_type type); +void +entrylk_trace_in(xlator_t *this, call_frame_t *frame, const char *volume, + fd_t *fd, loc_t *loc, const char *basename, entrylk_cmd cmd, + entrylk_type type); -void entrylk_trace_out (xlator_t *this, call_frame_t *frame, const char *volume, - fd_t *fd, loc_t *loc, const char *basename, - entrylk_cmd cmd, entrylk_type type, - int op_ret, int op_errno); +void +entrylk_trace_out(xlator_t *this, call_frame_t *frame, const char *volume, + fd_t *fd, loc_t *loc, const char *basename, entrylk_cmd cmd, + entrylk_type type, int op_ret, int op_errno); -void entrylk_trace_block (xlator_t *this, call_frame_t *frame, const char *volume, - fd_t *fd, loc_t *loc, const char *basename, - entrylk_cmd cmd, entrylk_type type); +void +entrylk_trace_block(xlator_t *this, call_frame_t *frame, const char *volume, + fd_t *fd, loc_t *loc, const char *basename, entrylk_cmd cmd, + entrylk_type type); void -pl_print_verdict (char *str, int size, int op_ret, int op_errno); +pl_print_verdict(char *str, int size, int op_ret, int op_errno); void -pl_print_lockee (char *str, int size, fd_t *fd, loc_t *loc); +pl_print_lockee(char *str, int size, fd_t *fd, loc_t *loc); void -pl_print_locker (char *str, int size, xlator_t *this, call_frame_t *frame); +pl_print_locker(char *str, int size, xlator_t *this, call_frame_t *frame); void -pl_print_inodelk (char *str, int size, int cmd, struct gf_flock *flock, const char *domain); +pl_print_inodelk(char *str, int size, int cmd, struct gf_flock *flock, + const char *domain); void -pl_trace_release (xlator_t *this, fd_t *fd); +pl_trace_release(xlator_t *this, fd_t *fd); unsigned long -fd_to_fdnum (fd_t *fd); +fd_to_fdnum(fd_t *fd); fd_t * -fd_from_fdnum (posix_lock_t *lock); +fd_from_fdnum(posix_lock_t *lock); + +int +pl_reserve_setlk(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock, + int can_block); +int +reservelks_equal(posix_lock_t *l1, posix_lock_t *l2); int -pl_reserve_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock, - int can_block); +pl_verify_reservelk(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock, + int can_block); int -reservelks_equal (posix_lock_t *l1, posix_lock_t *l2); +pl_reserve_unlock(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *reqlock); + +int32_t +check_entrylk_on_basename(xlator_t *this, inode_t *parent, char *basename); + +void +__pl_inodelk_unref(pl_inode_lock_t *lock); +void +__pl_entrylk_unref(pl_entry_lock_t *lock); int -pl_verify_reservelk (xlator_t *this, pl_inode_t *pl_inode, - posix_lock_t *lock, int can_block); +pl_metalock_is_active(pl_inode_t *pl_inode); + +void +__pl_queue_lock(pl_inode_t *pl_inode, posix_lock_t *reqlock); + +void +inodelk_contention_notify_check(xlator_t *xl, pl_inode_lock_t *lock, + struct timespec *now, + struct list_head *contend); + +void +entrylk_contention_notify_check(xlator_t *xl, pl_entry_lock_t *lock, + struct timespec *now, + struct list_head *contend); + +gf_boolean_t +pl_does_monkey_want_stuck_lock(); + +gf_boolean_t +pl_is_mandatory_locking_enabled(pl_inode_t *pl_inode); + +void +pl_clean_local(pl_local_t *local); + int -pl_reserve_unlock (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *reqlock); +pl_local_init(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd); + +gf_boolean_t +pl_is_lk_owner_valid(gf_lkowner_t *owner, client_t *client); + +int32_t +pl_inode_remove_prepare(xlator_t *xl, call_frame_t *frame, loc_t *loc, + pl_inode_t **ppl_inode, struct list_head *contend); + +int32_t +pl_inode_remove_complete(xlator_t *xl, pl_inode_t *pl_inode, call_stub_t *stub, + struct list_head *contend); -uint32_t -check_entrylk_on_basename (xlator_t *this, inode_t *parent, char *basename); +void +pl_inode_remove_wake(struct list_head *list); + +void +pl_inode_remove_cbk(xlator_t *xl, pl_inode_t *pl_inode, int32_t error); -void __pl_inodelk_unref (pl_inode_lock_t *lock); -void __pl_entrylk_unref (pl_entry_lock_t *lock); +void +pl_inode_remove_unlocked(xlator_t *xl, pl_inode_t *pl_inode, + struct list_head *list); + +int32_t +pl_inode_remove_inodelk(pl_inode_t *pl_inode, pl_inode_lock_t *lock); #endif /* __COMMON_H__ */ diff --git a/xlators/features/locks/src/entrylk.c b/xlators/features/locks/src/entrylk.c index 31553c12be2..fd772c850dd 100644 --- a/xlators/features/locks/src/entrylk.c +++ b/xlators/features/locks/src/entrylk.c @@ -7,72 +7,77 @@ later), or the GNU General Public License, version 2 (GPLv2), in all cases as published by the Free Software Foundation. */ -#include "glusterfs.h" -#include "compat.h" -#include "xlator.h" -#include "inode.h" -#include "logging.h" -#include "common-utils.h" -#include "list.h" +#include <glusterfs/glusterfs.h> +#include <glusterfs/compat.h> +#include <glusterfs/xlator.h> +#include <glusterfs/logging.h> +#include <glusterfs/common-utils.h> +#include <glusterfs/list.h> +#include <glusterfs/upcall-utils.h> #include "locks.h" +#include "clear.h" #include "common.h" - +#include "pl-messages.h" void -__pl_entrylk_unref (pl_entry_lock_t *lock) +__pl_entrylk_unref(pl_entry_lock_t *lock) { - lock->ref--; - if (!lock->ref) { - GF_FREE ((char *)lock->basename); - GF_FREE (lock->connection_id); - GF_FREE (lock); - } + lock->ref--; + if (!lock->ref) { + GF_FREE((char *)lock->basename); + GF_FREE(lock->connection_id); + GF_FREE(lock); + } } - static void -__pl_entrylk_ref (pl_entry_lock_t *lock) +__pl_entrylk_ref(pl_entry_lock_t *lock) { - lock->ref++; + lock->ref++; } - static pl_entry_lock_t * -new_entrylk_lock (pl_inode_t *pinode, const char *basename, entrylk_type type, - const char *domain, call_frame_t *frame, char *conn_id) +new_entrylk_lock(pl_inode_t *pinode, const char *basename, entrylk_type type, + const char *domain, call_frame_t *frame, char *conn_id, + int32_t *op_errno) { - pl_entry_lock_t *newlock = NULL; - - newlock = GF_CALLOC (1, sizeof (pl_entry_lock_t), - gf_locks_mt_pl_entry_lock_t); - if (!newlock) { - goto out; - } - - newlock->basename = basename ? gf_strdup (basename) : NULL; - newlock->type = type; - newlock->client = frame->root->client; - newlock->client_pid = frame->root->pid; - newlock->volume = domain; - newlock->owner = frame->root->lk_owner; - newlock->frame = frame; - newlock->this = frame->this; - - if (conn_id) { - newlock->connection_id = gf_strdup (conn_id); - } - - INIT_LIST_HEAD (&newlock->domain_list); - INIT_LIST_HEAD (&newlock->blocked_locks); - INIT_LIST_HEAD (&newlock->client_list); - - __pl_entrylk_ref (newlock); + pl_entry_lock_t *newlock = NULL; + + if (!pl_is_lk_owner_valid(&frame->root->lk_owner, frame->root->client)) { + *op_errno = EINVAL; + goto out; + } + + newlock = GF_CALLOC(1, sizeof(pl_entry_lock_t), + gf_locks_mt_pl_entry_lock_t); + if (!newlock) { + *op_errno = ENOMEM; + goto out; + } + + newlock->basename = basename ? gf_strdup(basename) : NULL; + newlock->type = type; + newlock->client = frame->root->client; + newlock->client_pid = frame->root->pid; + newlock->volume = domain; + newlock->owner = frame->root->lk_owner; + newlock->frame = frame; + newlock->this = frame->this; + + if (conn_id) { + newlock->connection_id = gf_strdup(conn_id); + } + + INIT_LIST_HEAD(&newlock->domain_list); + INIT_LIST_HEAD(&newlock->blocked_locks); + INIT_LIST_HEAD(&newlock->client_list); + + __pl_entrylk_ref(newlock); out: - return newlock; + return newlock; } - /** * all_names - does a basename represent all names? * @basename: name to check @@ -87,28 +92,220 @@ out: */ static int -names_conflict (const char *n1, const char *n2) +names_conflict(const char *n1, const char *n2) { - return all_names (n1) || all_names (n2) || !strcmp (n1, n2); + return all_names(n1) || all_names(n2) || !strcmp(n1, n2); } - static int -__same_entrylk_owner (pl_entry_lock_t *l1, pl_entry_lock_t *l2) +__same_entrylk_owner(pl_entry_lock_t *l1, pl_entry_lock_t *l2) { - return (is_same_lkowner (&l1->owner, &l2->owner) && - (l1->client == l2->client)); + return (is_same_lkowner(&l1->owner, &l2->owner) && + (l1->client == l2->client)); } /* Just as in inodelk, allow conflicting name locks from same (lk_owner, conn)*/ static int -__conflicting_entrylks (pl_entry_lock_t *l1, pl_entry_lock_t *l2) +__conflicting_entrylks(pl_entry_lock_t *l1, pl_entry_lock_t *l2) { - if (names_conflict (l1->basename, l2->basename) - && !__same_entrylk_owner (l1, l2)) - return 1; + if (names_conflict(l1->basename, l2->basename) && + !__same_entrylk_owner(l1, l2)) + return 1; + + return 0; +} - return 0; +/* See comments in inodelk.c for details */ +static inline gf_boolean_t +__stale_entrylk(xlator_t *this, pl_entry_lock_t *candidate_lock, + pl_entry_lock_t *requested_lock, time_t *lock_age_sec) +{ + posix_locks_private_t *priv = NULL; + + priv = this->private; + + /* Question: Should we just prune them all given the + * chance? Or just the locks we are attempting to acquire? + */ + if (names_conflict(candidate_lock->basename, requested_lock->basename)) { + *lock_age_sec = gf_time() - candidate_lock->granted_time; + if (*lock_age_sec > priv->revocation_secs) + return _gf_true; + } + return _gf_false; +} + +/* See comments in inodelk.c for details */ +static gf_boolean_t +__entrylk_prune_stale(xlator_t *this, pl_inode_t *pinode, pl_dom_list_t *dom, + pl_entry_lock_t *lock) +{ + posix_locks_private_t *priv = NULL; + pl_entry_lock_t *tmp = NULL; + pl_entry_lock_t *lk = NULL; + gf_boolean_t revoke_lock = _gf_false; + int bcount = 0; + int gcount = 0; + int op_errno = 0; + clrlk_args args; + args.opts = NULL; + time_t lk_age_sec = 0; + uint32_t max_blocked = 0; + char *reason_str = NULL; + + priv = this->private; + args.type = CLRLK_ENTRY; + if (priv->revocation_clear_all == _gf_true) + args.kind = CLRLK_ALL; + else + args.kind = CLRLK_GRANTED; + + if (list_empty(&dom->entrylk_list)) + goto out; + + pthread_mutex_lock(&pinode->mutex); + lock->pinode = pinode; + list_for_each_entry_safe(lk, tmp, &dom->entrylk_list, domain_list) + { + if (__stale_entrylk(this, lk, lock, &lk_age_sec) == _gf_true) { + revoke_lock = _gf_true; + reason_str = "age"; + break; + } + } + max_blocked = priv->revocation_max_blocked; + if (max_blocked != 0 && revoke_lock == _gf_false) { + list_for_each_entry_safe(lk, tmp, &dom->blocked_entrylks, blocked_locks) + { + max_blocked--; + if (max_blocked == 0) { + revoke_lock = _gf_true; + reason_str = "max blocked"; + break; + } + } + } + pthread_mutex_unlock(&pinode->mutex); + +out: + if (revoke_lock == _gf_true) { + clrlk_clear_entrylk(this, pinode, dom, &args, &bcount, &gcount, + &op_errno); + gf_log(this->name, GF_LOG_WARNING, + "Lock revocation [reason: %s; gfid: %s; domain: %s; " + "age: %ld sec] - Entry lock revoked: %d granted & %d " + "blocked locks cleared", + reason_str, uuid_utoa(pinode->gfid), dom->domain, lk_age_sec, + gcount, bcount); + } + + return revoke_lock; +} + +void +entrylk_contention_notify_check(xlator_t *this, pl_entry_lock_t *lock, + struct timespec *now, struct list_head *contend) +{ + posix_locks_private_t *priv; + int64_t elapsed; + + priv = this->private; + + /* If this lock is in a list, it means that we are about to send a + * notification for it, so no need to do anything else. */ + if (!list_empty(&lock->contend)) { + return; + } + + elapsed = now->tv_sec; + elapsed -= lock->contention_time.tv_sec; + if (now->tv_nsec < lock->contention_time.tv_nsec) { + elapsed--; + } + if (elapsed < priv->notify_contention_delay) { + return; + } + + /* All contention notifications will be sent outside of the locked + * region. This means that currently granted locks might have already + * been unlocked by that time. To avoid the lock or the inode to be + * destroyed before we process them, we take an additional reference + * on both. */ + inode_ref(lock->pinode->inode); + __pl_entrylk_ref(lock); + + lock->contention_time = *now; + + list_add_tail(&lock->contend, contend); +} + +void +entrylk_contention_notify(xlator_t *this, struct list_head *contend) +{ + struct gf_upcall up; + struct gf_upcall_entrylk_contention lc; + pl_entry_lock_t *lock; + pl_inode_t *pl_inode; + client_t *client; + gf_boolean_t notify; + + while (!list_empty(contend)) { + lock = list_first_entry(contend, pl_entry_lock_t, contend); + + pl_inode = lock->pinode; + + pthread_mutex_lock(&pl_inode->mutex); + + /* If the lock has already been released, no notification is + * sent. We clear the notification time in this case. */ + notify = !list_empty(&lock->domain_list); + if (!notify) { + lock->contention_time.tv_sec = 0; + lock->contention_time.tv_nsec = 0; + } else { + lc.type = lock->type; + lc.name = lock->basename; + lc.pid = lock->client_pid; + lc.domain = lock->volume; + lc.xdata = NULL; + + gf_uuid_copy(up.gfid, lock->pinode->gfid); + client = (client_t *)lock->client; + if (client == NULL) { + /* A NULL client can be found if the entrylk + * was issued by a server side xlator. */ + up.client_uid = NULL; + } else { + up.client_uid = client->client_uid; + } + } + + pthread_mutex_unlock(&pl_inode->mutex); + + if (notify) { + up.event_type = GF_UPCALL_ENTRYLK_CONTENTION; + up.data = &lc; + + if (this->notify(this, GF_EVENT_UPCALL, &up) < 0) { + gf_msg_debug(this->name, 0, + "Entrylk contention notification " + "failed"); + } else { + gf_msg_debug(this->name, 0, + "Entrylk contention notification " + "sent"); + } + } + + pthread_mutex_lock(&pl_inode->mutex); + + list_del_init(&lock->contend); + __pl_entrylk_unref(lock); + + pthread_mutex_unlock(&pl_inode->mutex); + + inode_unref(pl_inode->inode); + } } /** @@ -118,184 +315,188 @@ __conflicting_entrylks (pl_entry_lock_t *l1, pl_entry_lock_t *l2) * @type: type of lock */ static pl_entry_lock_t * -__entrylk_grantable (pl_dom_list_t *dom, pl_entry_lock_t *lock) +__entrylk_grantable(xlator_t *this, pl_dom_list_t *dom, pl_entry_lock_t *lock, + struct timespec *now, struct list_head *contend) { - pl_entry_lock_t *tmp = NULL; - - if (list_empty (&dom->entrylk_list)) - return NULL; - - list_for_each_entry (tmp, &dom->entrylk_list, domain_list) { - if (__conflicting_entrylks (tmp, lock)) - return tmp; + pl_entry_lock_t *tmp = NULL; + pl_entry_lock_t *ret = NULL; + + list_for_each_entry(tmp, &dom->entrylk_list, domain_list) + { + if (__conflicting_entrylks(tmp, lock)) { + if (ret == NULL) { + ret = tmp; + if (contend == NULL) { + break; + } + } + entrylk_contention_notify_check(this, tmp, now, contend); } + } - return NULL; + return ret; } static pl_entry_lock_t * -__blocked_entrylk_conflict (pl_dom_list_t *dom, pl_entry_lock_t *lock) +__blocked_entrylk_conflict(pl_dom_list_t *dom, pl_entry_lock_t *lock) { - pl_entry_lock_t *tmp = NULL; + pl_entry_lock_t *tmp = NULL; - if (list_empty (&dom->blocked_entrylks)) - return NULL; - - list_for_each_entry (tmp, &dom->blocked_entrylks, blocked_locks) { - if (names_conflict (tmp->basename, lock->basename)) - return lock; - } + list_for_each_entry(tmp, &dom->blocked_entrylks, blocked_locks) + { + if (names_conflict(tmp->basename, lock->basename)) + return lock; + } - return NULL; + return NULL; } static int -__owner_has_lock (pl_dom_list_t *dom, pl_entry_lock_t *newlock) +__owner_has_lock(pl_dom_list_t *dom, pl_entry_lock_t *newlock) { - pl_entry_lock_t *lock = NULL; + pl_entry_lock_t *lock = NULL; - list_for_each_entry (lock, &dom->entrylk_list, domain_list) { - if (__same_entrylk_owner (lock, newlock)) - return 1; - } + list_for_each_entry(lock, &dom->entrylk_list, domain_list) + { + if (__same_entrylk_owner(lock, newlock)) + return 1; + } - list_for_each_entry (lock, &dom->blocked_entrylks, blocked_locks) { - if (__same_entrylk_owner (lock, newlock)) - return 1; - } + list_for_each_entry(lock, &dom->blocked_entrylks, blocked_locks) + { + if (__same_entrylk_owner(lock, newlock)) + return 1; + } - return 0; + return 0; } static int -names_equal (const char *n1, const char *n2) +names_equal(const char *n1, const char *n2) { - return (n1 == NULL && n2 == NULL) || (n1 && n2 && !strcmp (n1, n2)); + return (n1 == NULL && n2 == NULL) || (n1 && n2 && !strcmp(n1, n2)); } void -pl_print_entrylk (char *str, int size, entrylk_cmd cmd, entrylk_type type, - const char *basename, const char *domain) +pl_print_entrylk(char *str, int size, entrylk_cmd cmd, entrylk_type type, + const char *basename, const char *domain) { - char *cmd_str = NULL; - char *type_str = NULL; + char *cmd_str = NULL; + char *type_str = NULL; - switch (cmd) { + switch (cmd) { case ENTRYLK_LOCK: - cmd_str = "LOCK"; - break; + cmd_str = "LOCK"; + break; case ENTRYLK_LOCK_NB: - cmd_str = "LOCK_NB"; - break; + cmd_str = "LOCK_NB"; + break; case ENTRYLK_UNLOCK: - cmd_str = "UNLOCK"; - break; + cmd_str = "UNLOCK"; + break; default: - cmd_str = "UNKNOWN"; - break; - } + cmd_str = "UNKNOWN"; + break; + } - switch (type) { + switch (type) { case ENTRYLK_RDLCK: - type_str = "READ"; - break; + type_str = "READ"; + break; case ENTRYLK_WRLCK: - type_str = "WRITE"; - break; + type_str = "WRITE"; + break; default: - type_str = "UNKNOWN"; - break; - } + type_str = "UNKNOWN"; + break; + } - snprintf (str, size, "lock=ENTRYLK, cmd=%s, type=%s, basename=%s, domain: %s", - cmd_str, type_str, basename, domain); + snprintf(str, size, + "lock=ENTRYLK, cmd=%s, type=%s, basename=%s, domain: %s", cmd_str, + type_str, basename, domain); } - void -entrylk_trace_in (xlator_t *this, call_frame_t *frame, const char *domain, - fd_t *fd, loc_t *loc, const char *basename, - entrylk_cmd cmd, entrylk_type type) +entrylk_trace_in(xlator_t *this, call_frame_t *frame, const char *domain, + fd_t *fd, loc_t *loc, const char *basename, entrylk_cmd cmd, + entrylk_type type) { - posix_locks_private_t *priv = NULL; - char pl_locker[256]; - char pl_lockee[256]; - char pl_entrylk[256]; + posix_locks_private_t *priv = NULL; + char pl_locker[256]; + char pl_lockee[256]; + char pl_entrylk[256]; - priv = this->private; + priv = this->private; - if (!priv->trace) - return; + if (!priv->trace) + return; - pl_print_locker (pl_locker, 256, this, frame); - pl_print_lockee (pl_lockee, 256, fd, loc); - pl_print_entrylk (pl_entrylk, 256, cmd, type, basename, domain); + pl_print_locker(pl_locker, 256, this, frame); + pl_print_lockee(pl_lockee, 256, fd, loc); + pl_print_entrylk(pl_entrylk, 256, cmd, type, basename, domain); - gf_log (this->name, GF_LOG_INFO, - "[REQUEST] Locker = {%s} Lockee = {%s} Lock = {%s}", - pl_locker, pl_lockee, pl_entrylk); + gf_log(this->name, GF_LOG_INFO, + "[REQUEST] Locker = {%s} Lockee = {%s} Lock = {%s}", pl_locker, + pl_lockee, pl_entrylk); } - void -entrylk_trace_out (xlator_t *this, call_frame_t *frame, const char *domain, - fd_t *fd, loc_t *loc, const char *basename, - entrylk_cmd cmd, entrylk_type type, int op_ret, int op_errno) +entrylk_trace_out(xlator_t *this, call_frame_t *frame, const char *domain, + fd_t *fd, loc_t *loc, const char *basename, entrylk_cmd cmd, + entrylk_type type, int op_ret, int op_errno) { - posix_locks_private_t *priv = NULL; - char pl_locker[256]; - char pl_lockee[256]; - char pl_entrylk[256]; - char verdict[32]; + posix_locks_private_t *priv = NULL; + char pl_locker[256]; + char pl_lockee[256]; + char pl_entrylk[256]; + char verdict[32]; - priv = this->private; + priv = this->private; - if (!priv->trace) - return; + if (!priv->trace) + return; - pl_print_locker (pl_locker, 256, this, frame); - pl_print_lockee (pl_lockee, 256, fd, loc); - pl_print_entrylk (pl_entrylk, 256, cmd, type, basename, domain); - pl_print_verdict (verdict, 32, op_ret, op_errno); + pl_print_locker(pl_locker, 256, this, frame); + pl_print_lockee(pl_lockee, 256, fd, loc); + pl_print_entrylk(pl_entrylk, 256, cmd, type, basename, domain); + pl_print_verdict(verdict, 32, op_ret, op_errno); - gf_log (this->name, GF_LOG_INFO, - "[%s] Locker = {%s} Lockee = {%s} Lock = {%s}", - verdict, pl_locker, pl_lockee, pl_entrylk); + gf_log(this->name, GF_LOG_INFO, + "[%s] Locker = {%s} Lockee = {%s} Lock = {%s}", verdict, pl_locker, + pl_lockee, pl_entrylk); } - void -entrylk_trace_block (xlator_t *this, call_frame_t *frame, const char *volume, - fd_t *fd, loc_t *loc, const char *basename, - entrylk_cmd cmd, entrylk_type type) +entrylk_trace_block(xlator_t *this, call_frame_t *frame, const char *volume, + fd_t *fd, loc_t *loc, const char *basename, entrylk_cmd cmd, + entrylk_type type) { - posix_locks_private_t *priv = NULL; - char pl_locker[256]; - char pl_lockee[256]; - char pl_entrylk[256]; + posix_locks_private_t *priv = NULL; + char pl_locker[256]; + char pl_lockee[256]; + char pl_entrylk[256]; - priv = this->private; + priv = this->private; - if (!priv->trace) - return; + if (!priv->trace) + return; - pl_print_locker (pl_locker, 256, this, frame); - pl_print_lockee (pl_lockee, 256, fd, loc); - pl_print_entrylk (pl_entrylk, 256, cmd, type, basename, volume); + pl_print_locker(pl_locker, 256, this, frame); + pl_print_lockee(pl_lockee, 256, fd, loc); + pl_print_entrylk(pl_entrylk, 256, cmd, type, basename, volume); - gf_log (this->name, GF_LOG_INFO, - "[BLOCKED] Locker = {%s} Lockee = {%s} Lock = {%s}", - pl_locker, pl_lockee, pl_entrylk); + gf_log(this->name, GF_LOG_INFO, + "[BLOCKED] Locker = {%s} Lockee = {%s} Lock = {%s}", pl_locker, + pl_lockee, pl_entrylk); } /** - * __find_most_matching_lock - find the lock struct which most matches in order of: - * lock on the exact basename || - * an all_names lock + * __find_most_matching_lock - find the lock struct which most matches in order + * of: lock on the exact basename || an all_names lock * * * @inode: inode in which to look @@ -303,37 +504,57 @@ entrylk_trace_block (xlator_t *this, call_frame_t *frame, const char *volume, */ static pl_entry_lock_t * -__find_most_matching_lock (pl_dom_list_t *dom, const char *basename) +__find_most_matching_lock(pl_dom_list_t *dom, const char *basename) { - pl_entry_lock_t *lock; - pl_entry_lock_t *all = NULL; - pl_entry_lock_t *exact = NULL; + pl_entry_lock_t *lock; + pl_entry_lock_t *all = NULL; + pl_entry_lock_t *exact = NULL; - if (list_empty (&dom->entrylk_list)) - return NULL; + if (list_empty(&dom->entrylk_list)) + return NULL; - list_for_each_entry (lock, &dom->entrylk_list, domain_list) { - if (all_names (lock->basename)) - all = lock; - else if (names_equal (lock->basename, basename)) - exact = lock; - } + list_for_each_entry(lock, &dom->entrylk_list, domain_list) + { + if (all_names(lock->basename)) + all = lock; + else if (names_equal(lock->basename, basename)) + exact = lock; + } - return (exact ? exact : all); + return (exact ? exact : all); } -static pl_entry_lock_t* -__find_matching_lock (pl_dom_list_t *dom, pl_entry_lock_t *lock) +static pl_entry_lock_t * +__find_matching_lock(pl_dom_list_t *dom, pl_entry_lock_t *lock) { - pl_entry_lock_t *tmp = NULL; + pl_entry_lock_t *tmp = NULL; + + list_for_each_entry(tmp, &dom->entrylk_list, domain_list) + { + if (names_equal(lock->basename, tmp->basename) && + __same_entrylk_owner(lock, tmp) && (lock->type == tmp->type)) + return tmp; + } + return NULL; +} - list_for_each_entry (tmp, &dom->entrylk_list, domain_list) { - if (names_equal (lock->basename, tmp->basename) - && __same_entrylk_owner (lock, tmp) - && (lock->type == tmp->type)) - return tmp; - } - return NULL; +static int +__lock_blocked_add(xlator_t *this, pl_inode_t *pinode, pl_dom_list_t *dom, + pl_entry_lock_t *lock, int nonblock) +{ + if (nonblock) + goto out; + + lock->blkd_time = gf_time(); + list_add_tail(&lock->blocked_locks, &dom->blocked_entrylks); + + gf_msg_trace(this->name, 0, "Blocking lock: {pinode=%p, basename=%s}", + pinode, lock->basename); + + entrylk_trace_block(this, lock->frame, NULL, NULL, NULL, lock->basename, + ENTRYLK_LOCK, lock->type); +out: + return -EAGAIN; } /** @@ -348,63 +569,49 @@ __find_matching_lock (pl_dom_list_t *dom, pl_entry_lock_t *lock) */ int -__lock_entrylk (xlator_t *this, pl_inode_t *pinode, pl_entry_lock_t *lock, - int nonblock, pl_dom_list_t *dom) +__lock_entrylk(xlator_t *this, pl_inode_t *pinode, pl_entry_lock_t *lock, + int nonblock, pl_dom_list_t *dom, struct timespec *now, + struct list_head *contend) { - pl_entry_lock_t *conf = NULL; - int ret = -EAGAIN; - - conf = __entrylk_grantable (dom, lock); - if (conf) { - ret = -EAGAIN; - if (nonblock) - goto out; - - gettimeofday (&lock->blkd_time, NULL); - list_add_tail (&lock->blocked_locks, &dom->blocked_entrylks); - - gf_log (this->name, GF_LOG_TRACE, - "Blocking lock: {pinode=%p, basename=%s}", - pinode, lock->basename); - - goto out; + pl_entry_lock_t *conf = NULL; + int ret = -EAGAIN; + + conf = __entrylk_grantable(this, dom, lock, now, contend); + if (conf) { + ret = __lock_blocked_add(this, pinode, dom, lock, nonblock); + goto out; + } + + /* To prevent blocked locks starvation, check if there are any blocked + * locks thay may conflict with this lock. If there is then don't grant + * the lock. BUT grant the lock if the owner already has lock to allow + * nested locks. + * Example: SHD from Machine1 takes (gfid, basename=257-length-name) + * and is granted. + * SHD from machine2 takes (gfid, basename=NULL) and is blocked. + * When SHD from Machine1 takes (gfid, basename=NULL) it needs to be + * granted, without which self-heal can't progress. + * TODO: Find why 'owner_has_lock' is checked even for blocked locks. + */ + if (__blocked_entrylk_conflict(dom, lock) && + !(__owner_has_lock(dom, lock))) { + if (nonblock == 0) { + gf_log(this->name, GF_LOG_DEBUG, + "Lock is grantable, but blocking to prevent " + "starvation"); } - /* To prevent blocked locks starvation, check if there are any blocked - * locks thay may conflict with this lock. If there is then don't grant - * the lock. BUT grant the lock if the owner already has lock to allow - * nested locks. - * Example: SHD from Machine1 takes (gfid, basename=257-length-name) - * and is granted. - * SHD from machine2 takes (gfid, basename=NULL) and is blocked. - * When SHD from Machine1 takes (gfid, basename=NULL) it needs to be - * granted, without which self-heal can't progress. - * TODO: Find why 'owner_has_lock' is checked even for blocked locks. - */ - if (__blocked_entrylk_conflict (dom, lock) && !(__owner_has_lock (dom, lock))) { - ret = -EAGAIN; - if (nonblock) - goto out; - - gettimeofday (&lock->blkd_time, NULL); - list_add_tail (&lock->blocked_locks, &dom->blocked_entrylks); - - gf_log (this->name, GF_LOG_DEBUG, - "Lock is grantable, but blocking to prevent starvation"); - gf_log (this->name, GF_LOG_TRACE, - "Blocking lock: {pinode=%p, basename=%s}", - pinode, lock->basename); - - goto out; - } + ret = __lock_blocked_add(this, pinode, dom, lock, nonblock); + goto out; + } - __pl_entrylk_ref (lock); - gettimeofday (&lock->granted_time, NULL); - list_add (&lock->domain_list, &dom->entrylk_list); + __pl_entrylk_ref(lock); + lock->granted_time = gf_time(); + list_add(&lock->domain_list, &dom->entrylk_list); - ret = 0; + ret = 0; out: - return ret; + return ret; } /** @@ -415,291 +622,322 @@ out: */ pl_entry_lock_t * -__unlock_entrylk (pl_dom_list_t *dom, pl_entry_lock_t *lock) +__unlock_entrylk(pl_dom_list_t *dom, pl_entry_lock_t *lock) { - pl_entry_lock_t *ret_lock = NULL; + pl_entry_lock_t *ret_lock = NULL; - ret_lock = __find_matching_lock (dom, lock); + ret_lock = __find_matching_lock(dom, lock); - if (ret_lock) { - list_del_init (&ret_lock->domain_list); - } else { - gf_log ("locks", GF_LOG_ERROR, "unlock on %s " - "(type=ENTRYLK_WRLCK) attempted but no matching lock " - "found", lock->basename); - } + if (ret_lock) { + list_del_init(&ret_lock->domain_list); + } else { + gf_log("locks", GF_LOG_ERROR, + "unlock on %s " + "(type=ENTRYLK_WRLCK) attempted but no matching lock " + "found", + lock->basename); + } - return ret_lock; + return ret_lock; } -uint32_t -check_entrylk_on_basename (xlator_t *this, inode_t *parent, char *basename) +int32_t +check_entrylk_on_basename(xlator_t *this, inode_t *parent, char *basename) { - uint32_t entrylk = 0; - pl_inode_t *pinode = 0; - pl_dom_list_t *dom = NULL; - pl_entry_lock_t *conf = NULL; - - pinode = pl_inode_get (this, parent); - if (!pinode) - goto out; - pthread_mutex_lock (&pinode->mutex); + int32_t entrylk = 0; + pl_dom_list_t *dom = NULL; + pl_entry_lock_t *conf = NULL; + + pl_inode_t *pinode = pl_inode_get(this, parent, NULL); + if (!pinode) + goto out; + pthread_mutex_lock(&pinode->mutex); + { + list_for_each_entry(dom, &pinode->dom_list, inode_list) { - list_for_each_entry (dom, &pinode->dom_list, inode_list) { - conf = __find_most_matching_lock (dom, basename); - if (conf && conf->basename) { - entrylk = 1; - break; - } - } + conf = __find_most_matching_lock(dom, basename); + if (conf && conf->basename) { + entrylk = 1; + break; + } } - pthread_mutex_unlock (&pinode->mutex); + } + pthread_mutex_unlock(&pinode->mutex); out: - return entrylk; + return entrylk; } void -__grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode, - pl_dom_list_t *dom, struct list_head *granted) +__grant_blocked_entry_locks(xlator_t *this, pl_inode_t *pl_inode, + pl_dom_list_t *dom, struct list_head *granted, + struct timespec *now, struct list_head *contend) { - int bl_ret = 0; - pl_entry_lock_t *bl = NULL; - pl_entry_lock_t *tmp = NULL; + int bl_ret = 0; + pl_entry_lock_t *bl = NULL; + pl_entry_lock_t *tmp = NULL; - struct list_head blocked_list; + struct list_head blocked_list; - INIT_LIST_HEAD (&blocked_list); - list_splice_init (&dom->blocked_entrylks, &blocked_list); + INIT_LIST_HEAD(&blocked_list); + list_splice_init(&dom->blocked_entrylks, &blocked_list); - list_for_each_entry_safe (bl, tmp, &blocked_list, blocked_locks) { + list_for_each_entry_safe(bl, tmp, &blocked_list, blocked_locks) + { + list_del_init(&bl->blocked_locks); - list_del_init (&bl->blocked_locks); + bl_ret = __lock_entrylk(bl->this, pl_inode, bl, 0, dom, now, contend); - bl_ret = __lock_entrylk (bl->this, pl_inode, bl, 0, dom); - - if (bl_ret == 0) { - list_add (&bl->blocked_locks, granted); - } + if (bl_ret == 0) { + list_add_tail(&bl->blocked_locks, granted); } - return; + } } /* Grants locks if possible which are blocked on a lock */ void -grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode, - pl_dom_list_t *dom) +grant_blocked_entry_locks(xlator_t *this, pl_inode_t *pl_inode, + pl_dom_list_t *dom, struct timespec *now, + struct list_head *contend) { - struct list_head granted_list; - pl_entry_lock_t *tmp = NULL; - pl_entry_lock_t *lock = NULL; - - INIT_LIST_HEAD (&granted_list); - - pthread_mutex_lock (&pl_inode->mutex); + struct list_head granted_list; + pl_entry_lock_t *tmp = NULL; + pl_entry_lock_t *lock = NULL; + + INIT_LIST_HEAD(&granted_list); + + pthread_mutex_lock(&pl_inode->mutex); + { + __grant_blocked_entry_locks(this, pl_inode, dom, &granted_list, now, + contend); + } + pthread_mutex_unlock(&pl_inode->mutex); + + list_for_each_entry_safe(lock, tmp, &granted_list, blocked_locks) + { + entrylk_trace_out(this, lock->frame, NULL, NULL, NULL, lock->basename, + ENTRYLK_LOCK, lock->type, 0, 0); + + STACK_UNWIND_STRICT(entrylk, lock->frame, 0, 0, NULL); + lock->frame = NULL; + } + + pthread_mutex_lock(&pl_inode->mutex); + { + list_for_each_entry_safe(lock, tmp, &granted_list, blocked_locks) { - __grant_blocked_entry_locks (this, pl_inode, dom, - &granted_list); + list_del_init(&lock->blocked_locks); + __pl_entrylk_unref(lock); } - pthread_mutex_unlock (&pl_inode->mutex); - - list_for_each_entry_safe (lock, tmp, &granted_list, blocked_locks) { - entrylk_trace_out (this, lock->frame, NULL, NULL, NULL, - lock->basename, ENTRYLK_LOCK, lock->type, - 0, 0); - - STACK_UNWIND_STRICT (entrylk, lock->frame, 0, 0, NULL); - lock->frame = NULL; - } - - pthread_mutex_lock (&pl_inode->mutex); - { - list_for_each_entry_safe (lock, tmp, &granted_list, blocked_locks) { - list_del_init (&lock->blocked_locks); - __pl_entrylk_unref (lock); - } - } - pthread_mutex_unlock (&pl_inode->mutex); - - return; + } + pthread_mutex_unlock(&pl_inode->mutex); } - /* Common entrylk code called by pl_entrylk and pl_fentrylk */ int -pl_common_entrylk (call_frame_t *frame, xlator_t *this, - const char *volume, inode_t *inode, const char *basename, - entrylk_cmd cmd, entrylk_type type, loc_t *loc, fd_t *fd, - dict_t *xdata) - -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - int ret = -1; - char unwind = 1; - GF_UNUSED int dict_ret = -1; - pl_inode_t *pinode = NULL; - pl_entry_lock_t *reqlock = NULL; - pl_entry_lock_t *unlocked = NULL; - pl_dom_list_t *dom = NULL; - char *conn_id = NULL; - pl_ctx_t *ctx = NULL; - int nonblock = 0; - gf_boolean_t need_inode_unref = _gf_false; - - if (xdata) - dict_ret = dict_get_str (xdata, "connection-id", &conn_id); - - pinode = pl_inode_get (this, inode); - if (!pinode) { - op_errno = ENOMEM; - goto out; - } +pl_common_entrylk(call_frame_t *frame, xlator_t *this, const char *volume, + inode_t *inode, const char *basename, entrylk_cmd cmd, + entrylk_type type, loc_t *loc, fd_t *fd, dict_t *xdata) - if (frame->root->client) { - ctx = pl_ctx_get (frame->root->client, this); - if (!ctx) { - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_INFO, "pl_ctx_get() failed"); - goto unwind; - } - } - - dom = get_domain (pinode, volume); - if (!dom){ - op_errno = ENOMEM; +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + int ret = -1; + char unwind = 1; + GF_UNUSED int dict_ret = -1; + pl_inode_t *pinode = NULL; + pl_entry_lock_t *reqlock = NULL; + pl_entry_lock_t *unlocked = NULL; + pl_dom_list_t *dom = NULL; + char *conn_id = NULL; + pl_ctx_t *ctx = NULL; + int nonblock = 0; + gf_boolean_t need_inode_unref = _gf_false; + posix_locks_private_t *priv = NULL; + struct list_head *pcontend = NULL; + struct list_head contend; + struct timespec now = {}; + + priv = this->private; + + if (priv->notify_contention) { + pcontend = &contend; + INIT_LIST_HEAD(pcontend); + timespec_now(&now); + } + + if (xdata) + dict_ret = dict_get_str(xdata, "connection-id", &conn_id); + + pinode = pl_inode_get(this, inode, NULL); + if (!pinode) { + op_errno = ENOMEM; + goto out; + } + + if (frame->root->client) { + ctx = pl_ctx_get(frame->root->client, this); + if (!ctx) { + op_errno = ENOMEM; + gf_log(this->name, GF_LOG_INFO, "pl_ctx_get() failed"); + goto unwind; + } + } + + dom = get_domain(pinode, volume); + if (!dom) { + op_errno = ENOMEM; + goto out; + } + + entrylk_trace_in(this, frame, volume, fd, loc, basename, cmd, type); + + reqlock = new_entrylk_lock(pinode, basename, type, dom->domain, frame, + conn_id, &op_errno); + if (!reqlock) { + op_ret = -1; + goto unwind; + } + + /* Ideally, AFTER a successful lock (both blocking and non-blocking) or + * an unsuccessful blocking lock operation, the inode needs to be ref'd. + * + * But doing so might give room to a race where the lock-requesting + * client could send a DISCONNECT just before this thread refs the inode + * after the locking is done, and the epoll thread could unref the inode + * in cleanup which means the inode's refcount would come down to 0, and + * the call to pl_forget() at this point destroys @pinode. Now when + * the io-thread executing this function tries to access pinode, + * it could crash on account of illegal memory access. + * + * To get around this problem, the inode is ref'd once even before + * adding the lock into client_list as a precautionary measure. + * This way even if there are DISCONNECTs, there will always be 1 extra + * ref on the inode, so @pinode is still alive until after the + * current stack unwinds. + */ + pinode->inode = inode_ref(inode); + if (priv->revocation_secs != 0) { + if (cmd != ENTRYLK_UNLOCK) { + __entrylk_prune_stale(this, pinode, dom, reqlock); + } else if (priv->monkey_unlocking == _gf_true) { + if (pl_does_monkey_want_stuck_lock()) { + gf_log(this->name, GF_LOG_WARNING, + "MONKEY LOCKING (forcing stuck lock)!"); + op_ret = 0; + need_inode_unref = _gf_true; + pthread_mutex_lock(&pinode->mutex); + { + __pl_entrylk_unref(reqlock); + } + pthread_mutex_unlock(&pinode->mutex); goto out; + } } + } - entrylk_trace_in (this, frame, volume, fd, loc, basename, cmd, type); + switch (cmd) { + case ENTRYLK_LOCK_NB: + nonblock = 1; + /* fall through */ + case ENTRYLK_LOCK: + if (ctx) + pthread_mutex_lock(&ctx->lock); + pthread_mutex_lock(&pinode->mutex); + { + reqlock->pinode = pinode; + + ret = __lock_entrylk(this, pinode, reqlock, nonblock, dom, &now, + pcontend); + if (ret == 0) { + reqlock->frame = NULL; + op_ret = 0; + } else { + op_errno = -ret; + } - reqlock = new_entrylk_lock (pinode, basename, type, dom->domain, frame, - conn_id); - if (!reqlock) { - op_ret = -1; - op_errno = ENOMEM; - goto unwind; - } + if (ctx && (!ret || !nonblock)) + list_add(&reqlock->client_list, &ctx->entrylk_lockers); - /* Ideally, AFTER a successful lock (both blocking and non-blocking) or - * an unsuccessful blocking lock operation, the inode needs to be ref'd. - * - * But doing so might give room to a race where the lock-requesting - * client could send a DISCONNECT just before this thread refs the inode - * after the locking is done, and the epoll thread could unref the inode - * in cleanup which means the inode's refcount would come down to 0, and - * the call to pl_forget() at this point destroys @pinode. Now when - * the io-thread executing this function tries to access pinode, - * it could crash on account of illegal memory access. - * - * To get around this problem, the inode is ref'd once even before - * adding the lock into client_list as a precautionary measure. - * This way even if there are DISCONNECTs, there will always be 1 extra - * ref on the inode, so @pinode is still alive until after the - * current stack unwinds. - */ - pinode->inode = inode_ref (inode); - - switch (cmd) { - case ENTRYLK_LOCK_NB: - nonblock = 1; - /* fall through */ - case ENTRYLK_LOCK: - if (ctx) - pthread_mutex_lock (&ctx->lock); - pthread_mutex_lock (&pinode->mutex); - { - reqlock->pinode = pinode; - - ret = __lock_entrylk (this, pinode, reqlock, nonblock, dom); - if (ret == 0) { - reqlock->frame = NULL; - op_ret = 0; - } else { - op_errno = -ret; - } - - if (ctx && (!ret || !nonblock)) - list_add (&reqlock->client_list, - &ctx->entrylk_lockers); - - if (ret == -EAGAIN && !nonblock) { - /* blocked */ - unwind = 0; - } else { - __pl_entrylk_unref (reqlock); - } - - /* For all but the case where a non-blocking lock - * attempt fails, the extra ref taken before the switch - * block must be negated. - */ - if ((ret == -EAGAIN) && (nonblock)) - need_inode_unref = _gf_true; + if (ret == -EAGAIN && !nonblock) { + /* blocked */ + unwind = 0; + } else { + __pl_entrylk_unref(reqlock); } - pthread_mutex_unlock (&pinode->mutex); - if (ctx) - pthread_mutex_unlock (&ctx->lock); - break; + + /* For all but the case where a non-blocking lock + * attempt fails, the extra ref taken before the switch + * block must be negated. + */ + if ((ret == -EAGAIN) && (nonblock)) + need_inode_unref = _gf_true; + } + pthread_mutex_unlock(&pinode->mutex); + if (ctx) + pthread_mutex_unlock(&ctx->lock); + break; case ENTRYLK_UNLOCK: - if (ctx) - pthread_mutex_lock (&ctx->lock); - pthread_mutex_lock (&pinode->mutex); - { - /* Irrespective of whether unlock succeeds or not, - * the extra inode ref that was done before the switch - * block must be negated. Towards this, - * @need_inode_unref flag is set unconditionally here. - */ - need_inode_unref = _gf_true; - unlocked = __unlock_entrylk (dom, reqlock); - if (unlocked) { - list_del_init (&unlocked->client_list); - __pl_entrylk_unref (unlocked); - op_ret = 0; - } else { - op_errno = EINVAL; - } - __pl_entrylk_unref (reqlock); + if (ctx) + pthread_mutex_lock(&ctx->lock); + pthread_mutex_lock(&pinode->mutex); + { + /* Irrespective of whether unlock succeeds or not, + * the extra inode ref that was done before the switch + * block must be negated. Towards this, + * @need_inode_unref flag is set unconditionally here. + */ + need_inode_unref = _gf_true; + unlocked = __unlock_entrylk(dom, reqlock); + if (unlocked) { + list_del_init(&unlocked->client_list); + __pl_entrylk_unref(unlocked); + op_ret = 0; + } else { + op_errno = EINVAL; } - pthread_mutex_unlock (&pinode->mutex); - if (ctx) - pthread_mutex_unlock (&ctx->lock); + __pl_entrylk_unref(reqlock); + } + pthread_mutex_unlock(&pinode->mutex); + if (ctx) + pthread_mutex_unlock(&ctx->lock); - grant_blocked_entry_locks (this, pinode, dom); + grant_blocked_entry_locks(this, pinode, dom, &now, pcontend); - break; + break; default: - inode_unref (pinode->inode); - gf_log (this->name, GF_LOG_ERROR, - "Unexpected case in entrylk (cmd=%d). Please file" - "a bug report at http://bugs.gluster.com", cmd); - goto out; - } - if (need_inode_unref) - inode_unref (pinode->inode); - - /* The following (extra) unref corresponds to the ref that - * was done at the time the lock was granted. - */ - if ((cmd == ENTRYLK_UNLOCK) && (op_ret == 0)) - inode_unref (pinode->inode); + need_inode_unref = _gf_true; + gf_log(this->name, GF_LOG_ERROR, + "Unexpected case in entrylk (cmd=%d). Please file" + "a bug report at http://bugs.gluster.com", + cmd); + goto out; + } + /* The following (extra) unref corresponds to the ref that + * was done at the time the lock was granted. + */ + if ((cmd == ENTRYLK_UNLOCK) && (op_ret == 0)) + inode_unref(pinode->inode); out: - if (unwind) { - entrylk_trace_out (this, frame, volume, fd, loc, basename, - cmd, type, op_ret, op_errno); -unwind: - STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno, NULL); - } else { - entrylk_trace_block (this, frame, volume, fd, loc, basename, - cmd, type); - } + if (need_inode_unref) + inode_unref(pinode->inode); + + if (unwind) { + entrylk_trace_out(this, frame, volume, fd, loc, basename, cmd, type, + op_ret, op_errno); + unwind: + STACK_UNWIND_STRICT(entrylk, frame, op_ret, op_errno, NULL); + } - return 0; + if (pcontend != NULL) { + entrylk_contention_notify(this, pcontend); + } + + return 0; } /** @@ -709,17 +947,16 @@ unwind: */ int -pl_entrylk (call_frame_t *frame, xlator_t *this, - const char *volume, loc_t *loc, const char *basename, - entrylk_cmd cmd, entrylk_type type, dict_t *xdata) +pl_entrylk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc, + const char *basename, entrylk_cmd cmd, entrylk_type type, + dict_t *xdata) { - pl_common_entrylk (frame, this, volume, loc->inode, basename, cmd, - type, loc, NULL, xdata); + pl_common_entrylk(frame, this, volume, loc->inode, basename, cmd, type, loc, + NULL, xdata); - return 0; + return 0; } - /** * pl_fentrylk: * @@ -727,176 +964,190 @@ pl_entrylk (call_frame_t *frame, xlator_t *this, */ int -pl_fentrylk (call_frame_t *frame, xlator_t *this, - const char *volume, fd_t *fd, const char *basename, - entrylk_cmd cmd, entrylk_type type, dict_t *xdata) +pl_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, + const char *basename, entrylk_cmd cmd, entrylk_type type, + dict_t *xdata) { - pl_common_entrylk (frame, this, volume, fd->inode, basename, cmd, - type, NULL, fd, xdata); + pl_common_entrylk(frame, this, volume, fd->inode, basename, cmd, type, NULL, + fd, xdata); - return 0; + return 0; } - static void -pl_entrylk_log_cleanup (pl_entry_lock_t *lock) +pl_entrylk_log_cleanup(pl_entry_lock_t *lock) { - pl_inode_t *pinode = NULL; + pl_inode_t *pinode = NULL; - pinode = lock->pinode; + pinode = lock->pinode; - gf_log (THIS->name, GF_LOG_WARNING, - "releasing lock on %s held by " - "{client=%p, pid=%"PRId64" lk-owner=%s}", - uuid_utoa (pinode->gfid), lock->client, - (uint64_t) lock->client_pid, lkowner_utoa (&lock->owner)); + gf_log(THIS->name, GF_LOG_WARNING, + "releasing lock on %s held by " + "{client=%p, pid=%" PRId64 " lk-owner=%s}", + uuid_utoa(pinode->gfid), lock->client, (uint64_t)lock->client_pid, + lkowner_utoa(&lock->owner)); } - /* Release all entrylks from this client */ int -pl_entrylk_client_cleanup (xlator_t *this, pl_ctx_t *ctx) +pl_entrylk_client_cleanup(xlator_t *this, pl_ctx_t *ctx) { - pl_entry_lock_t *tmp = NULL; - pl_entry_lock_t *l = NULL; - pl_dom_list_t *dom = NULL; - pl_inode_t *pinode = NULL; - - struct list_head released; - struct list_head unwind; - - INIT_LIST_HEAD (&released); - INIT_LIST_HEAD (&unwind); - - pthread_mutex_lock (&ctx->lock); + posix_locks_private_t *priv; + pl_entry_lock_t *tmp = NULL; + pl_entry_lock_t *l = NULL; + pl_dom_list_t *dom = NULL; + pl_inode_t *pinode = NULL; + struct list_head *pcontend = NULL; + struct list_head released; + struct list_head unwind; + struct list_head contend; + struct timespec now = {}; + + INIT_LIST_HEAD(&released); + INIT_LIST_HEAD(&unwind); + + priv = this->private; + if (priv->notify_contention) { + pcontend = &contend; + INIT_LIST_HEAD(pcontend); + timespec_now(&now); + } + + pthread_mutex_lock(&ctx->lock); + { + list_for_each_entry_safe(l, tmp, &ctx->entrylk_lockers, client_list) { - list_for_each_entry_safe (l, tmp, &ctx->entrylk_lockers, - client_list) { - list_del_init (&l->client_list); - - pl_entrylk_log_cleanup (l); - - pinode = l->pinode; - - pthread_mutex_lock (&pinode->mutex); - { - /* If the entrylk object is part of granted list but not - * blocked list, then perform the following actions: - * i. delete the object from granted list; - * ii. grant other locks (from other clients) that may - * have been blocked on this entrylk; and - * iii. unref the object. - * - * If the entrylk object (L1) is part of both granted - * and blocked lists, then this means that a parallel - * unlock on another entrylk (L2 say) may have 'granted' - * L1 and added it to 'granted' list in - * __grant_blocked_entry_locks() (although using the - * 'blocked_locks' member). In that case, the cleanup - * codepath must try and grant other overlapping - * blocked entrylks from other clients, now that L1 is - * out of their way and then unref L1 in the end, and - * leave it to the other thread (the one executing - * unlock codepath) to unwind L1's frame, delete it from - * blocked_locks list, and perform the last unref on L1. - * - * If the entrylk object (L1) is part of blocked list - * only, the cleanup code path must: - * i. delete it from the blocked_locks list inside - * this critical section, - * ii. unwind its frame with EAGAIN, - * iii. try and grant blocked entry locks from other - * clients that were otherwise grantable, but were - * blocked to avoid leaving L1 to starve forever. - * iv. unref the object. - */ - if (!list_empty (&l->domain_list)) { - list_del_init (&l->domain_list); - list_add_tail (&l->client_list, - &released); - } else { - list_del_init (&l->blocked_locks); - list_add_tail (&l->client_list, - &unwind); - } - } - pthread_mutex_unlock (&pinode->mutex); + pl_entrylk_log_cleanup(l); + + pinode = l->pinode; + + pthread_mutex_lock(&pinode->mutex); + { + /* If the entrylk object is part of granted list but not + * blocked list, then perform the following actions: + * i. delete the object from granted list; + * ii. grant other locks (from other clients) that may + * have been blocked on this entrylk; and + * iii. unref the object. + * + * If the entrylk object (L1) is part of both granted + * and blocked lists, then this means that a parallel + * unlock on another entrylk (L2 say) may have 'granted' + * L1 and added it to 'granted' list in + * __grant_blocked_entry_locks() (although using the + * 'blocked_locks' member). In that case, the cleanup + * codepath must try and grant other overlapping + * blocked entrylks from other clients, now that L1 is + * out of their way and then unref L1 in the end, and + * leave it to the other thread (the one executing + * unlock codepath) to unwind L1's frame, delete it from + * blocked_locks list, and perform the last unref on L1. + * + * If the entrylk object (L1) is part of blocked list + * only, the cleanup code path must: + * i. delete it from the blocked_locks list inside + * this critical section, + * ii. unwind its frame with EAGAIN, + * iii. try and grant blocked entry locks from other + * clients that were otherwise grantable, but were + * blocked to avoid leaving L1 to starve forever. + * iv. unref the object. + */ + list_del_init(&l->client_list); + + if (!list_empty(&l->domain_list)) { + list_del_init(&l->domain_list); + list_add_tail(&l->client_list, &released); + } else { + list_del_init(&l->blocked_locks); + list_add_tail(&l->client_list, &unwind); } - } - pthread_mutex_unlock (&ctx->lock); + } + pthread_mutex_unlock(&pinode->mutex); + } + } + pthread_mutex_unlock(&ctx->lock); - list_for_each_entry_safe (l, tmp, &unwind, client_list) { - list_del_init (&l->client_list); + if (!list_empty(&unwind)) { + list_for_each_entry_safe(l, tmp, &unwind, client_list) + { + list_del_init(&l->client_list); - if (l->frame) - STACK_UNWIND_STRICT (entrylk, l->frame, -1, EAGAIN, - NULL); - list_add_tail (&l->client_list, &released); + if (l->frame) + STACK_UNWIND_STRICT(entrylk, l->frame, -1, EAGAIN, NULL); + list_add_tail(&l->client_list, &released); } + } - list_for_each_entry_safe (l, tmp, &released, client_list) { - list_del_init (&l->client_list); + if (!list_empty(&released)) { + list_for_each_entry_safe(l, tmp, &released, client_list) + { + list_del_init(&l->client_list); - pinode = l->pinode; + pinode = l->pinode; - dom = get_domain (pinode, l->volume); + dom = get_domain(pinode, l->volume); - grant_blocked_entry_locks (this, pinode, dom); + grant_blocked_entry_locks(this, pinode, dom, &now, pcontend); - pthread_mutex_lock (&pinode->mutex); - { - __pl_entrylk_unref (l); - } - pthread_mutex_unlock (&pinode->mutex); - inode_unref (pinode->inode); + pthread_mutex_lock(&pinode->mutex); + { + __pl_entrylk_unref(l); + } + pthread_mutex_unlock(&pinode->mutex); + + inode_unref(pinode->inode); } + } - return 0; -} + if (pcontend != NULL) { + entrylk_contention_notify(this, pcontend); + } + return 0; +} int32_t -__get_entrylk_count (xlator_t *this, pl_inode_t *pl_inode) +__get_entrylk_count(xlator_t *this, pl_inode_t *pl_inode) { - int32_t count = 0; - pl_entry_lock_t *lock = NULL; - pl_dom_list_t *dom = NULL; + int32_t count = 0; + pl_entry_lock_t *lock = NULL; + pl_dom_list_t *dom = NULL; - list_for_each_entry (dom, &pl_inode->dom_list, inode_list) { - list_for_each_entry (lock, &dom->entrylk_list, domain_list) { - count++; - } - - list_for_each_entry (lock, &dom->blocked_entrylks, blocked_locks) { - count++; - } + list_for_each_entry(dom, &pl_inode->dom_list, inode_list) + { + list_for_each_entry(lock, &dom->entrylk_list, domain_list) { count++; } + list_for_each_entry(lock, &dom->blocked_entrylks, blocked_locks) + { + count++; } + } - return count; + return count; } int32_t -get_entrylk_count (xlator_t *this, inode_t *inode) +get_entrylk_count(xlator_t *this, inode_t *inode) { - pl_inode_t *pl_inode = NULL; - uint64_t tmp_pl_inode = 0; - int ret = 0; - int32_t count = 0; + pl_inode_t *pl_inode = NULL; + uint64_t tmp_pl_inode = 0; + int ret = 0; + int32_t count = 0; - ret = inode_ctx_get (inode, this, &tmp_pl_inode); - if (ret != 0) { - goto out; - } + ret = inode_ctx_get(inode, this, &tmp_pl_inode); + if (ret != 0) { + goto out; + } - pl_inode = (pl_inode_t *)(long) tmp_pl_inode; + pl_inode = (pl_inode_t *)(long)tmp_pl_inode; - pthread_mutex_lock (&pl_inode->mutex); - { - count = __get_entrylk_count (this, pl_inode); - } - pthread_mutex_unlock (&pl_inode->mutex); + pthread_mutex_lock(&pl_inode->mutex); + { + count = __get_entrylk_count(this, pl_inode); + } + pthread_mutex_unlock(&pl_inode->mutex); out: - return count; + return count; } diff --git a/xlators/features/locks/src/inodelk.c b/xlators/features/locks/src/inodelk.c index 1564f26b8fb..d4e51d6e0a1 100644 --- a/xlators/features/locks/src/inodelk.c +++ b/xlators/features/locks/src/inodelk.c @@ -7,888 +7,1168 @@ later), or the GNU General Public License, version 2 (GPLv2), in all cases as published by the Free Software Foundation. */ -#include "glusterfs.h" -#include "compat.h" -#include "xlator.h" -#include "inode.h" -#include "logging.h" -#include "common-utils.h" -#include "list.h" +#include <glusterfs/glusterfs.h> +#include <glusterfs/compat.h> +#include <glusterfs/dict.h> +#include <glusterfs/logging.h> +#include <glusterfs/list.h> +#include <glusterfs/upcall-utils.h> #include "locks.h" +#include "clear.h" #include "common.h" void -__delete_inode_lock (pl_inode_lock_t *lock) +__delete_inode_lock(pl_inode_lock_t *lock) { - list_del_init (&lock->list); + list_del_init(&lock->list); } static void -__pl_inodelk_ref (pl_inode_lock_t *lock) +__pl_inodelk_ref(pl_inode_lock_t *lock) { - lock->ref++; + lock->ref++; } void -__pl_inodelk_unref (pl_inode_lock_t *lock) +__pl_inodelk_unref(pl_inode_lock_t *lock) { - lock->ref--; - if (!lock->ref) { - GF_FREE (lock->connection_id); - GF_FREE (lock); - } + lock->ref--; + if (!lock->ref) { + GF_FREE(lock->connection_id); + GF_FREE(lock); + } } -/* Check if 2 inodelks are conflicting on type. Only 2 shared locks don't conflict */ +/* Check if 2 inodelks are conflicting on type. Only 2 shared locks don't + * conflict */ static int -inodelk_type_conflict (pl_inode_lock_t *l1, pl_inode_lock_t *l2) +inodelk_type_conflict(pl_inode_lock_t *l1, pl_inode_lock_t *l2) { - if (l2->fl_type == F_WRLCK || l1->fl_type == F_WRLCK) - return 1; + if (l2->fl_type == F_WRLCK || l1->fl_type == F_WRLCK) + return 1; - return 0; + return 0; } void -pl_print_inodelk (char *str, int size, int cmd, struct gf_flock *flock, const char *domain) +pl_print_inodelk(char *str, int size, int cmd, struct gf_flock *flock, + const char *domain) { - char *cmd_str = NULL; - char *type_str = NULL; + char *cmd_str = NULL; + char *type_str = NULL; - switch (cmd) { + switch (cmd) { #if F_GETLK != F_GETLK64 case F_GETLK64: #endif case F_GETLK: - cmd_str = "GETLK"; - break; + cmd_str = "GETLK"; + break; #if F_SETLK != F_SETLK64 case F_SETLK64: #endif case F_SETLK: - cmd_str = "SETLK"; - break; + cmd_str = "SETLK"; + break; #if F_SETLKW != F_SETLKW64 case F_SETLKW64: #endif case F_SETLKW: - cmd_str = "SETLKW"; - break; + cmd_str = "SETLKW"; + break; default: - cmd_str = "UNKNOWN"; - break; - } + cmd_str = "UNKNOWN"; + break; + } - switch (flock->l_type) { + switch (flock->l_type) { case F_RDLCK: - type_str = "READ"; - break; + type_str = "READ"; + break; case F_WRLCK: - type_str = "WRITE"; - break; + type_str = "WRITE"; + break; case F_UNLCK: - type_str = "UNLOCK"; - break; + type_str = "UNLOCK"; + break; default: - type_str = "UNKNOWN"; - break; - } - - snprintf (str, size, "lock=INODELK, cmd=%s, type=%s, " - "domain: %s, start=%llu, len=%llu, pid=%llu", - cmd_str, type_str, domain, - (unsigned long long) flock->l_start, - (unsigned long long) flock->l_len, - (unsigned long long) flock->l_pid); + type_str = "UNKNOWN"; + break; + } + + snprintf(str, size, + "lock=INODELK, cmd=%s, type=%s, " + "domain: %s, start=%llu, len=%llu, pid=%llu", + cmd_str, type_str, domain, (unsigned long long)flock->l_start, + (unsigned long long)flock->l_len, + (unsigned long long)flock->l_pid); } /* Determine if the two inodelks overlap reach other's lock regions */ static int -inodelk_overlap (pl_inode_lock_t *l1, pl_inode_lock_t *l2) +inodelk_overlap(pl_inode_lock_t *l1, pl_inode_lock_t *l2) { - return ((l1->fl_end >= l2->fl_start) && - (l2->fl_end >= l1->fl_start)); + return ((l1->fl_end >= l2->fl_start) && (l2->fl_end >= l1->fl_start)); } /* Returns true if the 2 inodelks have the same owner */ static int -same_inodelk_owner (pl_inode_lock_t *l1, pl_inode_lock_t *l2) +same_inodelk_owner(pl_inode_lock_t *l1, pl_inode_lock_t *l2) { - return (is_same_lkowner (&l1->owner, &l2->owner) && - (l1->client == l2->client)); + return (is_same_lkowner(&l1->owner, &l2->owner) && + (l1->client == l2->client)); } /* Returns true if the 2 inodelks conflict with each other */ static int -inodelk_conflict (pl_inode_lock_t *l1, pl_inode_lock_t *l2) +inodelk_conflict(pl_inode_lock_t *l1, pl_inode_lock_t *l2) { - return (inodelk_overlap (l1, l2) && - inodelk_type_conflict (l1, l2)); + return (inodelk_overlap(l1, l2) && inodelk_type_conflict(l1, l2)); } -/* Determine if lock is grantable or not */ -static pl_inode_lock_t * -__inodelk_grantable (pl_dom_list_t *dom, pl_inode_lock_t *lock) +/* + * Check to see if the candidate lock overlaps/conflicts with the + * requested lock. If so, determine how old the lock is and return + * true if it exceeds the configured threshold, false otherwise. + */ +static inline gf_boolean_t +__stale_inodelk(xlator_t *this, pl_inode_lock_t *candidate_lock, + pl_inode_lock_t *requested_lock, time_t *lock_age_sec) { - pl_inode_lock_t *l = NULL; - pl_inode_lock_t *ret = NULL; - if (list_empty (&dom->inodelk_list)) - goto out; - list_for_each_entry (l, &dom->inodelk_list, list){ - if (inodelk_conflict (lock, l) && - !same_inodelk_owner (lock, l)) { - ret = l; - goto out; - } - } -out: - return ret; + posix_locks_private_t *priv = NULL; + + priv = this->private; + /* Question: Should we just prune them all given the + * chance? Or just the locks we are attempting to acquire? + */ + if (inodelk_conflict(candidate_lock, requested_lock)) { + *lock_age_sec = gf_time() - candidate_lock->granted_time; + if (*lock_age_sec > priv->revocation_secs) + return _gf_true; + } + return _gf_false; } -static pl_inode_lock_t * -__blocked_lock_conflict (pl_dom_list_t *dom, pl_inode_lock_t *lock) +/* Examine any locks held on this inode and potentially revoke the lock + * if the age exceeds revocation_secs. We will clear _only_ those locks + * which are granted, and then grant those locks which are blocked. + * + * Depending on how this patch works in the wild, we may expand this and + * introduce a heuristic which clears blocked locks as well if they + * are beyond a threshold. + */ +static gf_boolean_t +__inodelk_prune_stale(xlator_t *this, pl_inode_t *pinode, pl_dom_list_t *dom, + pl_inode_lock_t *lock) { - pl_inode_lock_t *l = NULL; - pl_inode_lock_t *ret = NULL; - - if (list_empty (&dom->blocked_inodelks)) - return NULL; + posix_locks_private_t *priv = NULL; + pl_inode_lock_t *tmp = NULL; + pl_inode_lock_t *lk = NULL; + gf_boolean_t revoke_lock = _gf_false; + int bcount = 0; + int gcount = 0; + int op_errno = 0; + clrlk_args args; + args.opts = NULL; + time_t lk_age_sec = 0; + uint32_t max_blocked = 0; + char *reason_str = NULL; + + priv = this->private; + + args.type = CLRLK_INODE; + if (priv->revocation_clear_all == _gf_true) + args.kind = CLRLK_ALL; + else + args.kind = CLRLK_GRANTED; + + if (list_empty(&dom->inodelk_list)) + goto out; + + pthread_mutex_lock(&pinode->mutex); + list_for_each_entry_safe(lk, tmp, &dom->inodelk_list, list) + { + if (__stale_inodelk(this, lk, lock, &lk_age_sec) == _gf_true) { + revoke_lock = _gf_true; + reason_str = "age"; + break; + } + } - list_for_each_entry (l, &dom->blocked_inodelks, blocked_locks) { - if (inodelk_conflict (lock, l)) { - ret = l; - goto out; - } + max_blocked = priv->revocation_max_blocked; + if (max_blocked != 0 && revoke_lock == _gf_false) { + list_for_each_entry_safe(lk, tmp, &dom->blocked_inodelks, blocked_locks) + { + max_blocked--; + if (max_blocked == 0) { + revoke_lock = _gf_true; + reason_str = "max blocked"; + break; + } } + } + pthread_mutex_unlock(&pinode->mutex); out: - return ret; + if (revoke_lock == _gf_true) { + clrlk_clear_inodelk(this, pinode, dom, &args, &bcount, &gcount, + &op_errno); + gf_log(this->name, GF_LOG_WARNING, + "Lock revocation [reason: %s; gfid: %s; domain: %s; " + "age: %ld sec] - Inode lock revoked: %d granted & %d " + "blocked locks cleared", + reason_str, uuid_utoa(pinode->gfid), dom->domain, lk_age_sec, + gcount, bcount); + } + return revoke_lock; } -static int -__owner_has_lock (pl_dom_list_t *dom, pl_inode_lock_t *newlock) +void +inodelk_contention_notify_check(xlator_t *this, pl_inode_lock_t *lock, + struct timespec *now, struct list_head *contend) { - pl_inode_lock_t *lock = NULL; + posix_locks_private_t *priv; + int64_t elapsed; - list_for_each_entry (lock, &dom->inodelk_list, list) { - if (same_inodelk_owner (lock, newlock)) - return 1; - } + priv = this->private; - list_for_each_entry (lock, &dom->blocked_inodelks, blocked_locks) { - if (same_inodelk_owner (lock, newlock)) - return 1; - } + /* If this lock is in a list, it means that we are about to send a + * notification for it, so no need to do anything else. */ + if (!list_empty(&lock->contend)) { + return; + } + + elapsed = now->tv_sec; + elapsed -= lock->contention_time.tv_sec; + if (now->tv_nsec < lock->contention_time.tv_nsec) { + elapsed--; + } + if (elapsed < priv->notify_contention_delay) { + return; + } - return 0; -} + /* All contention notifications will be sent outside of the locked + * region. This means that currently granted locks might have already + * been unlocked by that time. To avoid the lock or the inode to be + * destroyed before we process them, we take an additional reference + * on both. */ + inode_ref(lock->pl_inode->inode); + __pl_inodelk_ref(lock); + lock->contention_time = *now; -/* Determines if lock can be granted and adds the lock. If the lock - * is blocking, adds it to the blocked_inodelks list of the domain. - */ -static int -__lock_inodelk (xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock, - int can_block, pl_dom_list_t *dom) + list_add_tail(&lock->contend, contend); +} + +void +inodelk_contention_notify(xlator_t *this, struct list_head *contend) { - pl_inode_lock_t *conf = NULL; - int ret = -EINVAL; + struct gf_upcall up; + struct gf_upcall_inodelk_contention lc; + pl_inode_lock_t *lock; + pl_inode_t *pl_inode; + client_t *client; + gf_boolean_t notify; + + while (!list_empty(contend)) { + lock = list_first_entry(contend, pl_inode_lock_t, contend); + + pl_inode = lock->pl_inode; + + pthread_mutex_lock(&pl_inode->mutex); + + /* If the lock has already been released, no notification is + * sent. We clear the notification time in this case. */ + notify = !list_empty(&lock->list); + if (!notify) { + lock->contention_time.tv_sec = 0; + lock->contention_time.tv_nsec = 0; + } else { + memcpy(&lc.flock, &lock->user_flock, sizeof(lc.flock)); + lc.pid = lock->client_pid; + lc.domain = lock->volume; + lc.xdata = NULL; + + gf_uuid_copy(up.gfid, lock->pl_inode->gfid); + client = (client_t *)lock->client; + if (client == NULL) { + /* A NULL client can be found if the inodelk + * was issued by a server side xlator. */ + up.client_uid = NULL; + } else { + up.client_uid = client->client_uid; + } + } - conf = __inodelk_grantable (dom, lock); - if (conf) { - ret = -EAGAIN; - if (can_block == 0) - goto out; + pthread_mutex_unlock(&pl_inode->mutex); + + if (notify) { + up.event_type = GF_UPCALL_INODELK_CONTENTION; + up.data = &lc; + + if (this->notify(this, GF_EVENT_UPCALL, &up) < 0) { + gf_msg_debug(this->name, 0, + "Inodelk contention notification " + "failed"); + } else { + gf_msg_debug(this->name, 0, + "Inodelk contention notification " + "sent"); + } + } - gettimeofday (&lock->blkd_time, NULL); - list_add_tail (&lock->blocked_locks, &dom->blocked_inodelks); + pthread_mutex_lock(&pl_inode->mutex); - gf_log (this->name, GF_LOG_TRACE, - "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => Blocked", - lock->fl_type == F_UNLCK ? "Unlock" : "Lock", - lock->client_pid, - lkowner_utoa (&lock->owner), - lock->user_flock.l_start, - lock->user_flock.l_len); + list_del_init(&lock->contend); + __pl_inodelk_unref(lock); + pthread_mutex_unlock(&pl_inode->mutex); - goto out; + inode_unref(pl_inode->inode); + } +} + +/* Determine if lock is grantable or not */ +static pl_inode_lock_t * +__inodelk_grantable(xlator_t *this, pl_dom_list_t *dom, pl_inode_lock_t *lock, + struct timespec *now, struct list_head *contend) +{ + pl_inode_lock_t *l = NULL; + pl_inode_lock_t *ret = NULL; + + list_for_each_entry(l, &dom->inodelk_list, list) + { + if (inodelk_conflict(lock, l) && !same_inodelk_owner(lock, l)) { + if (ret == NULL) { + ret = l; + if (contend == NULL) { + break; + } + } + inodelk_contention_notify_check(this, l, now, contend); } + } - /* To prevent blocked locks starvation, check if there are any blocked - * locks thay may conflict with this lock. If there is then don't grant - * the lock. BUT grant the lock if the owner already has lock to allow - * nested locks. - * Example: - * SHD from Machine1 takes (gfid, 0-infinity) and is granted. - * SHD from machine2 takes (gfid, 0-infinity) and is blocked. - * When SHD from Machine1 takes (gfid, 0-128KB) it - * needs to be granted, without which the earlier lock on 0-infinity - * will not be unlocked by SHD from Machine1. - * TODO: Find why 'owner_has_lock' is checked even for blocked locks. - */ - if (__blocked_lock_conflict (dom, lock) && !(__owner_has_lock (dom, lock))) { - ret = -EAGAIN; - if (can_block == 0) - goto out; + return ret; +} - gettimeofday (&lock->blkd_time, NULL); - list_add_tail (&lock->blocked_locks, &dom->blocked_inodelks); +static pl_inode_lock_t * +__blocked_lock_conflict(pl_dom_list_t *dom, pl_inode_lock_t *lock) +{ + pl_inode_lock_t *l = NULL; - gf_log (this->name, GF_LOG_DEBUG, - "Lock is grantable, but blocking to prevent starvation"); - gf_log (this->name, GF_LOG_TRACE, - "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => Blocked", - lock->fl_type == F_UNLCK ? "Unlock" : "Lock", - lock->client_pid, - lkowner_utoa (&lock->owner), - lock->user_flock.l_start, - lock->user_flock.l_len); + list_for_each_entry(l, &dom->blocked_inodelks, blocked_locks) + { + if (inodelk_conflict(lock, l)) { + return l; + } + } + return NULL; +} - goto out; - } - __pl_inodelk_ref (lock); - gettimeofday (&lock->granted_time, NULL); - list_add (&lock->list, &dom->inodelk_list); +static int +__owner_has_lock(pl_dom_list_t *dom, pl_inode_lock_t *newlock) +{ + pl_inode_lock_t *lock = NULL; + + list_for_each_entry(lock, &dom->inodelk_list, list) + { + if (same_inodelk_owner(lock, newlock)) + return 1; + } - ret = 0; + list_for_each_entry(lock, &dom->blocked_inodelks, blocked_locks) + { + if (same_inodelk_owner(lock, newlock)) + return 1; + } + return 0; +} + +static int +__lock_blocked_add(xlator_t *this, pl_dom_list_t *dom, pl_inode_lock_t *lock, + int can_block) +{ + if (can_block == 0) { + goto out; + } + + lock->blkd_time = gf_time(); + list_add_tail(&lock->blocked_locks, &dom->blocked_inodelks); + + gf_msg_trace(this->name, 0, + "%s (pid=%d) (lk-owner=%s) %" PRId64 + " - " + "%" PRId64 " => Blocked", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", lock->client_pid, + lkowner_utoa(&lock->owner), lock->user_flock.l_start, + lock->user_flock.l_len); + + pl_trace_block(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock, + lock->volume); out: + return -EAGAIN; +} + +/* Determines if lock can be granted and adds the lock. If the lock + * is blocking, adds it to the blocked_inodelks list of the domain. + */ +static int +__lock_inodelk(xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock, + int can_block, pl_dom_list_t *dom, struct timespec *now, + struct list_head *contend) +{ + pl_inode_lock_t *conf = NULL; + int ret; + + ret = pl_inode_remove_inodelk(pl_inode, lock); + if (ret < 0) { return ret; + } + if (ret == 0) { + conf = __inodelk_grantable(this, dom, lock, now, contend); + } + if ((ret > 0) || (conf != NULL)) { + return __lock_blocked_add(this, dom, lock, can_block); + } + + /* To prevent blocked locks starvation, check if there are any blocked + * locks thay may conflict with this lock. If there is then don't grant + * the lock. BUT grant the lock if the owner already has lock to allow + * nested locks. + * Example: + * SHD from Machine1 takes (gfid, 0-infinity) and is granted. + * SHD from machine2 takes (gfid, 0-infinity) and is blocked. + * When SHD from Machine1 takes (gfid, 0-128KB) it + * needs to be granted, without which the earlier lock on 0-infinity + * will not be unlocked by SHD from Machine1. + * TODO: Find why 'owner_has_lock' is checked even for blocked locks. + */ + if (__blocked_lock_conflict(dom, lock) && !(__owner_has_lock(dom, lock))) { + if (can_block != 0) { + gf_log(this->name, GF_LOG_DEBUG, + "Lock is grantable, but blocking to prevent " + "starvation"); + } + + return __lock_blocked_add(this, dom, lock, can_block); + } + __pl_inodelk_ref(lock); + lock->granted_time = gf_time(); + list_add(&lock->list, &dom->inodelk_list); + + return 0; } /* Return true if the two inodelks have exactly same lock boundaries */ static int -inodelks_equal (pl_inode_lock_t *l1, pl_inode_lock_t *l2) +inodelks_equal(pl_inode_lock_t *l1, pl_inode_lock_t *l2) { - if ((l1->fl_start == l2->fl_start) && - (l1->fl_end == l2->fl_end)) - return 1; + if ((l1->fl_start == l2->fl_start) && (l1->fl_end == l2->fl_end)) + return 1; - return 0; + return 0; } - static pl_inode_lock_t * -find_matching_inodelk (pl_inode_lock_t *lock, pl_dom_list_t *dom) +find_matching_inodelk(pl_inode_lock_t *lock, pl_dom_list_t *dom) { - pl_inode_lock_t *l = NULL; - list_for_each_entry (l, &dom->inodelk_list, list) { - if (inodelks_equal (l, lock) && - same_inodelk_owner (l, lock)) - return l; - } - return NULL; + pl_inode_lock_t *l = NULL; + list_for_each_entry(l, &dom->inodelk_list, list) + { + if (inodelks_equal(l, lock) && same_inodelk_owner(l, lock)) + return l; + } + return NULL; } /* Set F_UNLCK removes a lock which has the exact same lock boundaries * as the UNLCK lock specifies. If such a lock is not found, returns invalid */ static pl_inode_lock_t * -__inode_unlock_lock (xlator_t *this, pl_inode_lock_t *lock, pl_dom_list_t *dom) +__inode_unlock_lock(xlator_t *this, pl_inode_lock_t *lock, pl_dom_list_t *dom) { - - pl_inode_lock_t *conf = NULL; - - conf = find_matching_inodelk (lock, dom); - if (!conf) { - gf_log (this->name, GF_LOG_ERROR, - " Matching lock not found for unlock %llu-%llu, by %s " - "on %p", (unsigned long long)lock->fl_start, - (unsigned long long)lock->fl_end, - lkowner_utoa (&lock->owner), lock->client); - goto out; - } - __delete_inode_lock (conf); - gf_log (this->name, GF_LOG_DEBUG, - " Matching lock found for unlock %llu-%llu, by %s on %p", - (unsigned long long)lock->fl_start, - (unsigned long long)lock->fl_end, lkowner_utoa (&lock->owner), - lock->client); + pl_inode_lock_t *conf = NULL; + inode_t *inode = NULL; + + inode = lock->pl_inode->inode; + + conf = find_matching_inodelk(lock, dom); + if (!conf) { + gf_log(this->name, GF_LOG_ERROR, + " Matching lock not found for unlock %llu-%llu, by %s " + "on %p for gfid:%s", + (unsigned long long)lock->fl_start, + (unsigned long long)lock->fl_end, lkowner_utoa(&lock->owner), + lock->client, inode ? uuid_utoa(inode->gfid) : "UNKNOWN"); + goto out; + } + __delete_inode_lock(conf); + gf_log(this->name, GF_LOG_DEBUG, + " Matching lock found for unlock %llu-%llu, by %s on %p for gfid:%s", + (unsigned long long)lock->fl_start, (unsigned long long)lock->fl_end, + lkowner_utoa(&lock->owner), lock->client, + inode ? uuid_utoa(inode->gfid) : "UNKNOWN"); out: - return conf; + return conf; } - -static void -__grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode, - struct list_head *granted, pl_dom_list_t *dom) +void +__grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode, + struct list_head *granted, pl_dom_list_t *dom, + struct timespec *now, struct list_head *contend) { - int bl_ret = 0; - pl_inode_lock_t *bl = NULL; - pl_inode_lock_t *tmp = NULL; + pl_inode_lock_t *bl = NULL; + pl_inode_lock_t *tmp = NULL; - struct list_head blocked_list; + struct list_head blocked_list; - INIT_LIST_HEAD (&blocked_list); - list_splice_init (&dom->blocked_inodelks, &blocked_list); + INIT_LIST_HEAD(&blocked_list); + list_splice_init(&dom->blocked_inodelks, &blocked_list); - list_for_each_entry_safe (bl, tmp, &blocked_list, blocked_locks) { + list_for_each_entry_safe(bl, tmp, &blocked_list, blocked_locks) + { + list_del_init(&bl->blocked_locks); - list_del_init (&bl->blocked_locks); + bl->status = __lock_inodelk(this, pl_inode, bl, 1, dom, now, contend); - bl_ret = __lock_inodelk (this, pl_inode, bl, 1, dom); - - if (bl_ret == 0) { - list_add (&bl->blocked_locks, granted); - } + if (bl->status != -EAGAIN) { + list_add_tail(&bl->blocked_locks, granted); } - return; + } } -/* Grant all inodelks blocked on a lock */ void -grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode, - pl_dom_list_t *dom) +unwind_granted_inodes(xlator_t *this, pl_inode_t *pl_inode, + struct list_head *granted) { - struct list_head granted; - pl_inode_lock_t *lock; - pl_inode_lock_t *tmp; + pl_inode_lock_t *lock; + pl_inode_lock_t *tmp; + int32_t op_ret; + int32_t op_errno; + + list_for_each_entry_safe(lock, tmp, granted, blocked_locks) + { + if (lock->status == 0) { + op_ret = 0; + op_errno = 0; + gf_log(this->name, GF_LOG_TRACE, + "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64 + " => Granted", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, lkowner_utoa(&lock->owner), + lock->user_flock.l_start, lock->user_flock.l_len); + } else { + op_ret = -1; + op_errno = -lock->status; + } + pl_trace_out(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock, + op_ret, op_errno, lock->volume); - INIT_LIST_HEAD (&granted); + STACK_UNWIND_STRICT(inodelk, lock->frame, op_ret, op_errno, NULL); + lock->frame = NULL; + } - pthread_mutex_lock (&pl_inode->mutex); + pthread_mutex_lock(&pl_inode->mutex); + { + list_for_each_entry_safe(lock, tmp, granted, blocked_locks) { - __grant_blocked_inode_locks (this, pl_inode, &granted, dom); - } - pthread_mutex_unlock (&pl_inode->mutex); - - list_for_each_entry_safe (lock, tmp, &granted, blocked_locks) { - gf_log (this->name, GF_LOG_TRACE, - "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => Granted", - lock->fl_type == F_UNLCK ? "Unlock" : "Lock", - lock->client_pid, - lkowner_utoa (&lock->owner), - lock->user_flock.l_start, - lock->user_flock.l_len); - - pl_trace_out (this, lock->frame, NULL, NULL, F_SETLKW, - &lock->user_flock, 0, 0, lock->volume); - - STACK_UNWIND_STRICT (inodelk, lock->frame, 0, 0, NULL); - lock->frame = NULL; + list_del_init(&lock->blocked_locks); + __pl_inodelk_unref(lock); } - - pthread_mutex_lock (&pl_inode->mutex); - { - list_for_each_entry_safe (lock, tmp, &granted, blocked_locks) { - list_del_init (&lock->blocked_locks); - __pl_inodelk_unref (lock); - } - } - pthread_mutex_unlock (&pl_inode->mutex); + } + pthread_mutex_unlock(&pl_inode->mutex); } +/* Grant all inodelks blocked on a lock */ +void +grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode, + pl_dom_list_t *dom, struct timespec *now, + struct list_head *contend) +{ + struct list_head granted; + + INIT_LIST_HEAD(&granted); + + pthread_mutex_lock(&pl_inode->mutex); + { + __grant_blocked_inode_locks(this, pl_inode, &granted, dom, now, + contend); + } + pthread_mutex_unlock(&pl_inode->mutex); + + unwind_granted_inodes(this, pl_inode, &granted); +} static void -pl_inodelk_log_cleanup (pl_inode_lock_t *lock) +pl_inodelk_log_cleanup(pl_inode_lock_t *lock) { - pl_inode_t *pl_inode = NULL; + pl_inode_t *pl_inode = NULL; - pl_inode = lock->pl_inode; + pl_inode = lock->pl_inode; - gf_log (THIS->name, GF_LOG_WARNING, "releasing lock on %s held by " - "{client=%p, pid=%"PRId64" lk-owner=%s}", - uuid_utoa (pl_inode->gfid), lock->client, - (uint64_t) lock->client_pid, lkowner_utoa (&lock->owner)); + gf_log(THIS->name, GF_LOG_WARNING, + "releasing lock on %s held by " + "{client=%p, pid=%" PRId64 " lk-owner=%s}", + uuid_utoa(pl_inode->gfid), lock->client, (uint64_t)lock->client_pid, + lkowner_utoa(&lock->owner)); } - /* Release all inodelks from this client */ int -pl_inodelk_client_cleanup (xlator_t *this, pl_ctx_t *ctx) +pl_inodelk_client_cleanup(xlator_t *this, pl_ctx_t *ctx) { - pl_inode_lock_t *tmp = NULL; - pl_inode_lock_t *l = NULL; - pl_dom_list_t *dom = NULL; - pl_inode_t *pl_inode = NULL; - - struct list_head released; - struct list_head unwind; - - INIT_LIST_HEAD (&released); - INIT_LIST_HEAD (&unwind); - - pthread_mutex_lock (&ctx->lock); + posix_locks_private_t *priv; + pl_inode_lock_t *tmp = NULL; + pl_inode_lock_t *l = NULL; + pl_dom_list_t *dom = NULL; + pl_inode_t *pl_inode = NULL; + struct list_head *pcontend = NULL; + struct list_head released; + struct list_head unwind; + struct list_head contend; + struct timespec now = {}; + + priv = this->private; + + INIT_LIST_HEAD(&released); + INIT_LIST_HEAD(&unwind); + + if (priv->notify_contention) { + pcontend = &contend; + INIT_LIST_HEAD(pcontend); + timespec_now(&now); + } + + pthread_mutex_lock(&ctx->lock); + { + list_for_each_entry_safe(l, tmp, &ctx->inodelk_lockers, client_list) { - list_for_each_entry_safe (l, tmp, &ctx->inodelk_lockers, - client_list) { - list_del_init (&l->client_list); - - pl_inodelk_log_cleanup (l); - - pl_inode = l->pl_inode; - - pthread_mutex_lock (&pl_inode->mutex); - { - /* If the inodelk object is part of granted list but not - * blocked list, then perform the following actions: - * i. delete the object from granted list; - * ii. grant other locks (from other clients) that may - * have been blocked on this inodelk; and - * iii. unref the object. - * - * If the inodelk object (L1) is part of both granted - * and blocked lists, then this means that a parallel - * unlock on another inodelk (L2 say) may have 'granted' - * L1 and added it to 'granted' list in - * __grant_blocked_node_locks() (although using the - * 'blocked_locks' member). In that case, the cleanup - * codepath must try and grant other overlapping - * blocked inodelks from other clients, now that L1 is - * out of their way and then unref L1 in the end, and - * leave it to the other thread (the one executing - * unlock codepath) to unwind L1's frame, delete it from - * blocked_locks list, and perform the last unref on L1. - * - * If the inodelk object (L1) is part of blocked list - * only, the cleanup code path must: - * i. delete it from the blocked_locks list inside - * this critical section, - * ii. unwind its frame with EAGAIN, - * iii. try and grant blocked inode locks from other - * clients that were otherwise grantable, but just - * got blocked to avoid leaving L1 to starve - * forever. - * iv. unref the object. - */ - if (!list_empty (&l->list)) { - __delete_inode_lock (l); - list_add_tail (&l->client_list, - &released); - } else { - list_del_init(&l->blocked_locks); - list_add_tail (&l->client_list, - &unwind); - } - } - pthread_mutex_unlock (&pl_inode->mutex); + pl_inodelk_log_cleanup(l); + + pl_inode = l->pl_inode; + + pthread_mutex_lock(&pl_inode->mutex); + { + /* If the inodelk object is part of granted list but not + * blocked list, then perform the following actions: + * i. delete the object from granted list; + * ii. grant other locks (from other clients) that may + * have been blocked on this inodelk; and + * iii. unref the object. + * + * If the inodelk object (L1) is part of both granted + * and blocked lists, then this means that a parallel + * unlock on another inodelk (L2 say) may have 'granted' + * L1 and added it to 'granted' list in + * __grant_blocked_inode_locks() (although using the + * 'blocked_locks' member). In that case, the cleanup + * codepath must try and grant other overlapping + * blocked inodelks from other clients, now that L1 is + * out of their way and then unref L1 in the end, and + * leave it to the other thread (the one executing + * unlock codepath) to unwind L1's frame, delete it from + * blocked_locks list, and perform the last unref on L1. + * + * If the inodelk object (L1) is part of blocked list + * only, the cleanup code path must: + * i. delete it from the blocked_locks list inside + * this critical section, + * ii. unwind its frame with EAGAIN, + * iii. try and grant blocked inode locks from other + * clients that were otherwise grantable, but just + * got blocked to avoid leaving L1 to starve + * forever. + * iv. unref the object. + */ + list_del_init(&l->client_list); + + if (!list_empty(&l->list)) { + __delete_inode_lock(l); + list_add_tail(&l->client_list, &released); + } else { + list_del_init(&l->blocked_locks); + list_add_tail(&l->client_list, &unwind); } - } - pthread_mutex_unlock (&ctx->lock); - - list_for_each_entry_safe (l, tmp, &unwind, client_list) { - list_del_init (&l->client_list); + } + pthread_mutex_unlock(&pl_inode->mutex); + } + } + pthread_mutex_unlock(&ctx->lock); - if (l->frame) - STACK_UNWIND_STRICT (inodelk, l->frame, -1, EAGAIN, - NULL); - list_add_tail (&l->client_list, &released); + if (!list_empty(&unwind)) { + list_for_each_entry_safe(l, tmp, &unwind, client_list) + { + list_del_init(&l->client_list); + if (l->frame) + STACK_UNWIND_STRICT(inodelk, l->frame, -1, EAGAIN, NULL); + list_add_tail(&l->client_list, &released); } + } - list_for_each_entry_safe (l, tmp, &released, client_list) { - list_del_init (&l->client_list); + if (!list_empty(&released)) { + list_for_each_entry_safe(l, tmp, &released, client_list) + { + list_del_init(&l->client_list); - pl_inode = l->pl_inode; + pl_inode = l->pl_inode; - dom = get_domain (pl_inode, l->volume); + dom = get_domain(pl_inode, l->volume); - grant_blocked_inode_locks (this, pl_inode, dom); + grant_blocked_inode_locks(this, pl_inode, dom, &now, pcontend); - pthread_mutex_lock (&pl_inode->mutex); - { - __pl_inodelk_unref (l); - } - pthread_mutex_unlock (&pl_inode->mutex); - inode_unref (pl_inode->inode); + pthread_mutex_lock(&pl_inode->mutex); + { + __pl_inodelk_unref(l); + } + pthread_mutex_unlock(&pl_inode->mutex); + inode_unref(pl_inode->inode); } + } - return 0; -} + if (pcontend != NULL) { + inodelk_contention_notify(this, pcontend); + } + return 0; +} static int -pl_inode_setlk (xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode, - pl_inode_lock_t *lock, int can_block, pl_dom_list_t *dom, - inode_t *inode) -{ - int ret = -EINVAL; - pl_inode_lock_t *retlock = NULL; - gf_boolean_t unref = _gf_true; - gf_boolean_t need_inode_unref = _gf_false; - short fl_type; - - lock->pl_inode = pl_inode; - fl_type = lock->fl_type; - - /* Ideally, AFTER a successful lock (both blocking and non-blocking) or - * an unsuccessful blocking lock operation, the inode needs to be ref'd. - * - * But doing so might give room to a race where the lock-requesting - * client could send a DISCONNECT just before this thread refs the inode - * after the locking is done, and the epoll thread could unref the inode - * in cleanup which means the inode's refcount would come down to 0, and - * the call to pl_forget() at this point destroys @pl_inode. Now when - * the io-thread executing this function tries to access pl_inode, - * it could crash on account of illegal memory access. - * - * To get around this problem, the inode is ref'd once even before - * adding the lock into client_list as a precautionary measure. - * This way even if there are DISCONNECTs, there will always be 1 extra - * ref on the inode, so @pl_inode is still alive until after the - * current stack unwinds. - */ - pl_inode->inode = inode_ref (inode); - - if (ctx) - pthread_mutex_lock (&ctx->lock); - pthread_mutex_lock (&pl_inode->mutex); - { - if (lock->fl_type != F_UNLCK) { - ret = __lock_inodelk (this, pl_inode, lock, can_block, dom); - if (ret == 0) { - lock->frame = NULL; - gf_log (this->name, GF_LOG_TRACE, - "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => OK", - lock->fl_type == F_UNLCK ? "Unlock" : "Lock", - lock->client_pid, - lkowner_utoa (&lock->owner), - lock->fl_start, - lock->fl_end); - } else if (ret == -EAGAIN) { - gf_log (this->name, GF_LOG_TRACE, - "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => NOK", - lock->fl_type == F_UNLCK ? "Unlock" : "Lock", - lock->client_pid, - lkowner_utoa (&lock->owner), - lock->user_flock.l_start, - lock->user_flock.l_len); - if (can_block) - unref = _gf_false; - /* For all but the case where a non-blocking - * lock attempt fails, the extra ref taken at - * the start of this function must be negated. - */ - else - need_inode_unref = _gf_true; - } - - if (ctx && (!ret || can_block)) - list_add_tail (&lock->client_list, - &ctx->inodelk_lockers); - } else { - /* Irrespective of whether unlock succeeds or not, - * the extra inode ref that was done at the start of - * this function must be negated. Towards this, - * @need_inode_unref flag is set unconditionally here. - */ - need_inode_unref = _gf_true; - retlock = __inode_unlock_lock (this, lock, dom); - if (!retlock) { - gf_log (this->name, GF_LOG_DEBUG, - "Bad Unlock issued on Inode lock"); - ret = -EINVAL; - goto out; - } - list_del_init (&retlock->client_list); - __pl_inodelk_unref (retlock); - - ret = 0; +pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode, + pl_inode_lock_t *lock, int can_block, pl_dom_list_t *dom, + inode_t *inode) +{ + posix_locks_private_t *priv = NULL; + int ret = -EINVAL; + pl_inode_lock_t *retlock = NULL; + gf_boolean_t unref = _gf_true; + gf_boolean_t need_inode_unref = _gf_false; + struct list_head *pcontend = NULL; + struct list_head contend; + struct list_head wake; + struct timespec now = {}; + short fl_type; + + lock->pl_inode = pl_inode; + fl_type = lock->fl_type; + + priv = this->private; + + /* Ideally, AFTER a successful lock (both blocking and non-blocking) or + * an unsuccessful blocking lock operation, the inode needs to be ref'd. + * + * But doing so might give room to a race where the lock-requesting + * client could send a DISCONNECT just before this thread refs the inode + * after the locking is done, and the epoll thread could unref the inode + * in cleanup which means the inode's refcount would come down to 0, and + * the call to pl_forget() at this point destroys @pl_inode. Now when + * the io-thread executing this function tries to access pl_inode, + * it could crash on account of illegal memory access. + * + * To get around this problem, the inode is ref'd once even before + * adding the lock into client_list as a precautionary measure. + * This way even if there are DISCONNECTs, there will always be 1 extra + * ref on the inode, so @pl_inode is still alive until after the + * current stack unwinds. + */ + pl_inode->inode = inode_ref(inode); + + if (priv->revocation_secs != 0) { + if (lock->fl_type != F_UNLCK) { + __inodelk_prune_stale(this, pl_inode, dom, lock); + } else if (priv->monkey_unlocking == _gf_true) { + if (pl_does_monkey_want_stuck_lock()) { + pthread_mutex_lock(&pl_inode->mutex); + { + __pl_inodelk_unref(lock); } -out: - if (unref) - __pl_inodelk_unref (lock); - } - pthread_mutex_unlock (&pl_inode->mutex); - if (ctx) - pthread_mutex_unlock (&ctx->lock); - - if (need_inode_unref) - inode_unref (pl_inode->inode); - - /* The following (extra) unref corresponds to the ref that - * was done at the time the lock was granted. - */ - if ((fl_type == F_UNLCK) && (ret == 0)) { - inode_unref (pl_inode->inode); - grant_blocked_inode_locks (this, pl_inode, dom); + pthread_mutex_unlock(&pl_inode->mutex); + inode_unref(pl_inode->inode); + gf_log(this->name, GF_LOG_WARNING, + "MONKEY LOCKING (forcing stuck lock)!"); + return 0; + } } + } + + if (priv->notify_contention) { + pcontend = &contend; + INIT_LIST_HEAD(pcontend); + timespec_now(&now); + } + + INIT_LIST_HEAD(&wake); + + if (ctx) + pthread_mutex_lock(&ctx->lock); + pthread_mutex_lock(&pl_inode->mutex); + { + if (lock->fl_type != F_UNLCK) { + ret = __lock_inodelk(this, pl_inode, lock, can_block, dom, &now, + pcontend); + if (ret == 0) { + lock->frame = NULL; + gf_log(this->name, GF_LOG_TRACE, + "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64 + " => OK", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, lkowner_utoa(&lock->owner), + lock->fl_start, lock->fl_end); + } else if (ret == -EAGAIN) { + gf_log(this->name, GF_LOG_TRACE, + "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64 + " => NOK", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, lkowner_utoa(&lock->owner), + lock->user_flock.l_start, lock->user_flock.l_len); + if (can_block) { + unref = _gf_false; + } + } + /* For all but the case where a non-blocking lock attempt fails + * with -EAGAIN, the extra ref taken at the start of this function + * must be negated. */ + need_inode_unref = (ret != 0) && ((ret != -EAGAIN) || !can_block); + if (ctx && !need_inode_unref) { + list_add_tail(&lock->client_list, &ctx->inodelk_lockers); + } + } else { + /* Irrespective of whether unlock succeeds or not, + * the extra inode ref that was done at the start of + * this function must be negated. Towards this, + * @need_inode_unref flag is set unconditionally here. + */ + need_inode_unref = _gf_true; + retlock = __inode_unlock_lock(this, lock, dom); + if (!retlock) { + gf_log(this->name, GF_LOG_DEBUG, + "Bad Unlock issued on Inode lock"); + ret = -EINVAL; + goto out; + } + list_del_init(&retlock->client_list); + __pl_inodelk_unref(retlock); - return ret; + pl_inode_remove_unlocked(this, pl_inode, &wake); + + ret = 0; + } + out: + if (unref) + __pl_inodelk_unref(lock); + } + pthread_mutex_unlock(&pl_inode->mutex); + if (ctx) + pthread_mutex_unlock(&ctx->lock); + + pl_inode_remove_wake(&wake); + + /* The following (extra) unref corresponds to the ref that + * was done at the time the lock was granted. + */ + if ((fl_type == F_UNLCK) && (ret == 0)) { + inode_unref(pl_inode->inode); + grant_blocked_inode_locks(this, pl_inode, dom, &now, pcontend); + } + + if (need_inode_unref) { + inode_unref(pl_inode->inode); + } + + if (pcontend != NULL) { + inodelk_contention_notify(this, pcontend); + } + + return ret; } /* Create a new inode_lock_t */ -pl_inode_lock_t * -new_inode_lock (struct gf_flock *flock, client_t *client, pid_t client_pid, - call_frame_t *frame, xlator_t *this, const char *volume, - char *conn_id) +static pl_inode_lock_t * +new_inode_lock(struct gf_flock *flock, client_t *client, pid_t client_pid, + call_frame_t *frame, xlator_t *this, const char *volume, + char *conn_id, int32_t *op_errno) { - pl_inode_lock_t *lock = NULL; - - lock = GF_CALLOC (1, sizeof (*lock), - gf_locks_mt_pl_inode_lock_t); - if (!lock) { - return NULL; - } - - lock->fl_start = flock->l_start; - lock->fl_type = flock->l_type; - - if (flock->l_len == 0) - lock->fl_end = LLONG_MAX; - else - lock->fl_end = flock->l_start + flock->l_len - 1; - - lock->client = client; - lock->client_pid = client_pid; - lock->volume = volume; - lock->owner = frame->root->lk_owner; - lock->frame = frame; - lock->this = this; - - if (conn_id) { - lock->connection_id = gf_strdup (conn_id); - } + pl_inode_lock_t *lock = NULL; + + if (!pl_is_lk_owner_valid(&frame->root->lk_owner, frame->root->client)) { + *op_errno = EINVAL; + goto out; + } + + lock = GF_CALLOC(1, sizeof(*lock), gf_locks_mt_pl_inode_lock_t); + if (!lock) { + *op_errno = ENOMEM; + goto out; + } + + lock->fl_start = flock->l_start; + lock->fl_type = flock->l_type; + + if (flock->l_len == 0) + lock->fl_end = LLONG_MAX; + else + lock->fl_end = flock->l_start + flock->l_len - 1; + + lock->client = client; + lock->client_pid = client_pid; + lock->volume = volume; + lock->owner = frame->root->lk_owner; + lock->frame = frame; + lock->this = this; + + if (conn_id) { + lock->connection_id = gf_strdup(conn_id); + } + + INIT_LIST_HEAD(&lock->list); + INIT_LIST_HEAD(&lock->blocked_locks); + INIT_LIST_HEAD(&lock->client_list); + INIT_LIST_HEAD(&lock->contend); + __pl_inodelk_ref(lock); - INIT_LIST_HEAD (&lock->list); - INIT_LIST_HEAD (&lock->blocked_locks); - INIT_LIST_HEAD (&lock->client_list); - __pl_inodelk_ref (lock); - - return lock; +out: + return lock; } int32_t -_pl_convert_volume (const char *volume, char **res) +_pl_convert_volume(const char *volume, char **res) { - char *mdata_vol = NULL; - int ret = 0; - - mdata_vol = strrchr (volume, ':'); - //if the volume already ends with :metadata don't bother - if (mdata_vol && (strcmp (mdata_vol, ":metadata") == 0)) - return 0; + char *mdata_vol = NULL; + int ret = 0; - ret = gf_asprintf (res, "%s:metadata", volume); - if (ret <= 0) - return ENOMEM; + mdata_vol = strrchr(volume, ':'); + // if the volume already ends with :metadata don't bother + if (mdata_vol && (strcmp(mdata_vol, ":metadata") == 0)) return 0; + + ret = gf_asprintf(res, "%s:metadata", volume); + if (ret <= 0) + return ENOMEM; + return 0; } int32_t -_pl_convert_volume_for_special_range (struct gf_flock *flock, - const char *volume, char **res) +_pl_convert_volume_for_special_range(struct gf_flock *flock, const char *volume, + char **res) { - int32_t ret = 0; + int32_t ret = 0; - if ((flock->l_start == LLONG_MAX -1) && - (flock->l_len == 0)) { - ret = _pl_convert_volume (volume, res); - } + if ((flock->l_start == LLONG_MAX - 1) && (flock->l_len == 0)) { + ret = _pl_convert_volume(volume, res); + } - return ret; + return ret; } /* Common inodelk code called from pl_inodelk and pl_finodelk */ int -pl_common_inodelk (call_frame_t *frame, xlator_t *this, - const char *volume, inode_t *inode, int32_t cmd, - struct gf_flock *flock, loc_t *loc, fd_t *fd, dict_t *xdata) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - int ret = -1; - GF_UNUSED int dict_ret = -1; - int can_block = 0; - pl_inode_t * pinode = NULL; - pl_inode_lock_t * reqlock = NULL; - pl_dom_list_t * dom = NULL; - char *res = NULL; - char *res1 = NULL; - char *conn_id = NULL; - pl_ctx_t *ctx = NULL; - - if (xdata) - dict_ret = dict_get_str (xdata, "connection-id", &conn_id); - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (inode, unwind); - VALIDATE_OR_GOTO (flock, unwind); - - if ((flock->l_start < 0) || (flock->l_len < 0)) { - op_errno = EINVAL; - goto unwind; - } - - op_errno = _pl_convert_volume_for_special_range (flock, volume, &res); - if (op_errno) - goto unwind; - if (res) - volume = res; - - pl_trace_in (this, frame, fd, loc, cmd, flock, volume); - - if (frame->root->client) { - ctx = pl_ctx_get (frame->root->client, this); - if (!ctx) { - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_INFO, "pl_ctx_get() failed"); - goto unwind; - } - } - - pinode = pl_inode_get (this, inode); - if (!pinode) { - op_errno = ENOMEM; - goto unwind; +pl_common_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, + inode_t *inode, int32_t cmd, struct gf_flock *flock, + loc_t *loc, fd_t *fd, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + int ret = -1; + GF_UNUSED int dict_ret = -1; + int can_block = 0; + short lock_type = 0; + pl_inode_t *pinode = NULL; + pl_inode_lock_t *reqlock = NULL; + pl_dom_list_t *dom = NULL; + char *res = NULL; + char *res1 = NULL; + char *conn_id = NULL; + pl_ctx_t *ctx = NULL; + + if (xdata) + dict_ret = dict_get_str(xdata, "connection-id", &conn_id); + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(inode, unwind); + VALIDATE_OR_GOTO(flock, unwind); + + if ((flock->l_start < 0) || (flock->l_len < 0)) { + op_errno = EINVAL; + goto unwind; + } + + op_errno = _pl_convert_volume_for_special_range(flock, volume, &res); + if (op_errno) + goto unwind; + if (res) + volume = res; + + pl_trace_in(this, frame, fd, loc, cmd, flock, volume); + + if (frame->root->client) { + ctx = pl_ctx_get(frame->root->client, this); + if (!ctx) { + op_errno = ENOMEM; + gf_log(this->name, GF_LOG_INFO, "pl_ctx_get() failed"); + goto unwind; } + } - dom = get_domain (pinode, volume); - if (!dom) { - op_errno = ENOMEM; - goto unwind; - } + pinode = pl_inode_get(this, inode, NULL); + if (!pinode) { + op_errno = ENOMEM; + goto unwind; + } - reqlock = new_inode_lock (flock, frame->root->client, frame->root->pid, - frame, this, dom->domain, conn_id); + dom = get_domain(pinode, volume); + if (!dom) { + op_errno = ENOMEM; + goto unwind; + } - if (!reqlock) { - op_ret = -1; - op_errno = ENOMEM; - goto unwind; - } + reqlock = new_inode_lock(flock, frame->root->client, frame->root->pid, + frame, this, dom->domain, conn_id, &op_errno); + if (!reqlock) { + op_ret = -1; + goto unwind; + } - switch (cmd) { + switch (cmd) { case F_SETLKW: - can_block = 1; + can_block = 1; - /* fall through */ + /* fall through */ case F_SETLK: - memcpy (&reqlock->user_flock, flock, sizeof (struct gf_flock)); - ret = pl_inode_setlk (this, ctx, pinode, reqlock, can_block, - dom, inode); - - if (ret < 0) { - if ((can_block) && (F_UNLCK != flock->l_type)) { - pl_trace_block (this, frame, fd, loc, - cmd, flock, volume); - goto out; - } - gf_log (this->name, GF_LOG_TRACE, "returning EAGAIN"); - op_errno = -ret; - goto unwind; + lock_type = flock->l_type; + memcpy(&reqlock->user_flock, flock, sizeof(struct gf_flock)); + ret = pl_inode_setlk(this, ctx, pinode, reqlock, can_block, dom, + inode); + + if (ret < 0) { + if (ret == -EAGAIN) { + if (can_block && (F_UNLCK != lock_type)) { + goto out; + } + gf_log(this->name, GF_LOG_TRACE, "returning EAGAIN"); + } else { + gf_log(this->name, GF_LOG_TRACE, "returning %d", ret); } - break; + op_errno = -ret; + goto unwind; + } + break; default: - op_errno = ENOTSUP; - gf_log (this->name, GF_LOG_DEBUG, - "Lock command F_GETLK not supported for [f]inodelk " - "(cmd=%d)", - cmd); - goto unwind; - } + op_errno = ENOTSUP; + gf_log(this->name, GF_LOG_DEBUG, + "Lock command F_GETLK not supported for [f]inodelk " + "(cmd=%d)", + cmd); + goto unwind; + } - op_ret = 0; + op_ret = 0; unwind: - if (flock != NULL) - pl_trace_out (this, frame, fd, loc, cmd, flock, op_ret, - op_errno, volume); + if (flock != NULL) + pl_trace_out(this, frame, fd, loc, cmd, flock, op_ret, op_errno, + volume); - STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno, NULL); + STACK_UNWIND_STRICT(inodelk, frame, op_ret, op_errno, NULL); out: - GF_FREE (res); - GF_FREE (res1); - return 0; + GF_FREE(res); + GF_FREE(res1); + return 0; } int -pl_inodelk (call_frame_t *frame, xlator_t *this, - const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *flock, - dict_t *xdata) +pl_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc, + int32_t cmd, struct gf_flock *flock, dict_t *xdata) { - pl_common_inodelk (frame, this, volume, loc->inode, cmd, flock, - loc, NULL, xdata); + pl_common_inodelk(frame, this, volume, loc->inode, cmd, flock, loc, NULL, + xdata); - return 0; + return 0; } int -pl_finodelk (call_frame_t *frame, xlator_t *this, - const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *flock, - dict_t *xdata) +pl_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, + int32_t cmd, struct gf_flock *flock, dict_t *xdata) { - pl_common_inodelk (frame, this, volume, fd->inode, cmd, flock, - NULL, fd, xdata); - - return 0; + pl_common_inodelk(frame, this, volume, fd->inode, cmd, flock, NULL, fd, + xdata); + return 0; } static int32_t -__get_inodelk_dom_count (pl_dom_list_t *dom) +__get_inodelk_dom_count(pl_dom_list_t *dom) { - pl_inode_lock_t *lock = NULL; - int32_t count = 0; - - list_for_each_entry (lock, &dom->inodelk_list, list) { - count++; - } - list_for_each_entry (lock, &dom->blocked_inodelks, blocked_locks) { - count++; - } - return count; + pl_inode_lock_t *lock = NULL; + int32_t count = 0; + + list_for_each_entry(lock, &dom->inodelk_list, list) { count++; } + list_for_each_entry(lock, &dom->blocked_inodelks, blocked_locks) + { + count++; + } + return count; } /* Returns the no. of locks (blocked/granted) held on a given domain name * If @domname is NULL, returns the no. of locks in all the domains present. * If @domname is non-NULL and non-existent, returns 0 */ int32_t -__get_inodelk_count (xlator_t *this, pl_inode_t *pl_inode, char *domname) +__get_inodelk_count(xlator_t *this, pl_inode_t *pl_inode, char *domname) { - int32_t count = 0; - pl_dom_list_t *dom = NULL; - - list_for_each_entry (dom, &pl_inode->dom_list, inode_list) { - if (domname) { - if (strcmp (domname, dom->domain) == 0) { - count = __get_inodelk_dom_count (dom); - goto out; - } - - } else { - /* Counting locks from all domains */ - count += __get_inodelk_dom_count (dom); + int32_t count = 0; + pl_dom_list_t *dom = NULL; + + list_for_each_entry(dom, &pl_inode->dom_list, inode_list) + { + if (domname) { + if (strcmp(domname, dom->domain) == 0) { + count = __get_inodelk_dom_count(dom); + goto out; + } - } + } else { + /* Counting locks from all domains */ + count += __get_inodelk_dom_count(dom); } + } out: - return count; + return count; } int32_t -get_inodelk_count (xlator_t *this, inode_t *inode, char *domname) +get_inodelk_count(xlator_t *this, inode_t *inode, char *domname) { - pl_inode_t *pl_inode = NULL; - uint64_t tmp_pl_inode = 0; - int ret = 0; - int32_t count = 0; + pl_inode_t *pl_inode = NULL; + uint64_t tmp_pl_inode = 0; + int ret = 0; + int32_t count = 0; - ret = inode_ctx_get (inode, this, &tmp_pl_inode); - if (ret != 0) { - goto out; - } + ret = inode_ctx_get(inode, this, &tmp_pl_inode); + if (ret != 0) { + goto out; + } - pl_inode = (pl_inode_t *)(long) tmp_pl_inode; + pl_inode = (pl_inode_t *)(long)tmp_pl_inode; - pthread_mutex_lock (&pl_inode->mutex); - { - count = __get_inodelk_count (this, pl_inode, domname); - } - pthread_mutex_unlock (&pl_inode->mutex); + pthread_mutex_lock(&pl_inode->mutex); + { + count = __get_inodelk_count(this, pl_inode, domname); + } + pthread_mutex_unlock(&pl_inode->mutex); out: - return count; + return count; } diff --git a/xlators/features/locks/src/locks-mem-types.h b/xlators/features/locks/src/locks-mem-types.h index 08aeb0a7925..a76605027b3 100644 --- a/xlators/features/locks/src/locks-mem-types.h +++ b/xlators/features/locks/src/locks-mem-types.h @@ -11,19 +11,18 @@ #ifndef __LOCKS_MEM_TYPES_H__ #define __LOCKS_MEM_TYPES_H__ -#include "mem-types.h" +#include <glusterfs/mem-types.h> enum gf_locks_mem_types_ { - gf_locks_mt_pl_dom_list_t = gf_common_mt_end + 1, - gf_locks_mt_pl_inode_t, - gf_locks_mt_posix_lock_t, - gf_locks_mt_pl_entry_lock_t, - gf_locks_mt_pl_inode_lock_t, - gf_locks_mt_truncate_ops, - gf_locks_mt_pl_rw_req_t, - gf_locks_mt_posix_locks_private_t, - gf_locks_mt_pl_fdctx_t, - gf_locks_mt_end + gf_locks_mt_pl_dom_list_t = gf_common_mt_end + 1, + gf_locks_mt_pl_inode_t, + gf_locks_mt_posix_lock_t, + gf_locks_mt_pl_entry_lock_t, + gf_locks_mt_pl_inode_lock_t, + gf_locks_mt_pl_rw_req_t, + gf_locks_mt_posix_locks_private_t, + gf_locks_mt_pl_fdctx_t, + gf_locks_mt_pl_meta_lock_t, + gf_locks_mt_end }; #endif - diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h index 8c24eb8cb9a..c868eb494a2 100644 --- a/xlators/features/locks/src/locks.h +++ b/xlators/features/locks/src/locks.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + Copyright (c) 2006-2012, 2015-2016 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. This file is licensed to you under your choice of the GNU Lesser @@ -10,194 +10,283 @@ #ifndef __POSIX_LOCKS_H__ #define __POSIX_LOCKS_H__ -#include "compat-errno.h" -#include "stack.h" -#include "call-stub.h" +#include <glusterfs/compat-errno.h> +#include <glusterfs/stack.h> +#include <glusterfs/call-stub.h> #include "locks-mem-types.h" -#include "client_t.h" +#include <glusterfs/client_t.h> -#include "lkowner.h" +#include <glusterfs/lkowner.h> + +typedef enum { + MLK_NONE, + MLK_FILE_BASED, + MLK_FORCED, + MLK_OPTIMAL +} mlk_mode_t; /* defines different mandatory locking modes*/ struct __pl_fd; struct __posix_lock { - struct list_head list; + struct list_head list; + + off_t fl_start; + off_t fl_end; + uint32_t lk_flags; + + short fl_type; + short blocked; /* waiting to acquire */ + struct gf_flock user_flock; /* the flock supplied by the user */ + xlator_t *this; /* required for blocked locks */ + unsigned long fd_num; + + fd_t *fd; + call_frame_t *frame; - short fl_type; - off_t fl_start; - off_t fl_end; + time_t blkd_time; /* time at which lock was queued into blkd list */ + time_t granted_time; /* time at which lock was queued into active list */ - short blocked; /* waiting to acquire */ - struct gf_flock user_flock; /* the flock supplied by the user */ - xlator_t *this; /* required for blocked locks */ - unsigned long fd_num; + /* These two together serve to uniquely identify each process + across nodes */ - fd_t *fd; - call_frame_t *frame; + void *client; /* to identify client node */ - struct timeval blkd_time; /*time at which lock was queued into blkd list*/ - struct timeval granted_time; /*time at which lock was queued into active list*/ + /* This field uniquely identifies the client the lock belongs to. As + * lock migration is handled by rebalance, the client_t object will be + * overwritten by rebalance and can't be deemed as the owner of the + * lock on destination. Hence, the below field is migrated from + * source to destination by lock_migration_info_t and updated on the + * destination. So that on client-server disconnection, server can + * cleanup the locks proper;y. */ - /* These two together serve to uniquely identify each process - across nodes */ + char *client_uid; + gf_lkowner_t owner; + pid_t client_pid; /* pid of client process */ - void *client; /* to identify client node */ - gf_lkowner_t owner; - pid_t client_pid; /* pid of client process */ + int blocking; }; typedef struct __posix_lock posix_lock_t; struct __pl_inode_lock { - struct list_head list; - struct list_head blocked_locks; /* list_head pointing to blocked_inodelks */ - int ref; + struct list_head list; + struct list_head blocked_locks; /* list_head pointing to blocked_inodelks */ + struct list_head contend; /* list of contending locks */ + int ref; - short fl_type; - off_t fl_start; - off_t fl_end; + off_t fl_start; + off_t fl_end; - const char *volume; + const char *volume; - struct gf_flock user_flock; /* the flock supplied by the user */ - xlator_t *this; /* required for blocked locks */ - struct __pl_inode *pl_inode; + struct gf_flock user_flock; /* the flock supplied by the user */ + xlator_t *this; /* required for blocked locks */ + struct __pl_inode *pl_inode; - call_frame_t *frame; + call_frame_t *frame; - struct timeval blkd_time; /*time at which lock was queued into blkd list*/ - struct timeval granted_time; /*time at which lock was queued into active list*/ + time_t blkd_time; /* time at which lock was queued into blkd list */ + time_t granted_time; /* time at which lock was queued into active list */ - /* These two together serve to uniquely identify each process - across nodes */ + /*last time at which lock contention was detected and notified*/ + struct timespec contention_time; - void *client; /* to identify client node */ - gf_lkowner_t owner; - pid_t client_pid; /* pid of client process */ + /* These two together serve to uniquely identify each process + across nodes */ - char *connection_id; /* stores the client connection id */ + void *client; /* to identify client node */ + gf_lkowner_t owner; + pid_t client_pid; /* pid of client process */ - struct list_head client_list; /* list of all locks from a client */ + char *connection_id; /* stores the client connection id */ + + struct list_head client_list; /* list of all locks from a client */ + short fl_type; + + int32_t status; /* Error code when we try to grant a lock in blocked + state */ }; typedef struct __pl_inode_lock pl_inode_lock_t; -struct __pl_rw_req_t { - struct list_head list; - call_stub_t *stub; - posix_lock_t region; +struct _pl_rw_req { + struct list_head list; + call_stub_t *stub; + posix_lock_t region; }; -typedef struct __pl_rw_req_t pl_rw_req_t; - -struct __pl_dom_list_t { - struct list_head inode_list; /* list_head back to pl_inode_t */ - const char *domain; - struct list_head entrylk_list; /* List of entry locks */ - struct list_head blocked_entrylks; /* List of all blocked entrylks */ - struct list_head inodelk_list; /* List of inode locks */ - struct list_head blocked_inodelks; /* List of all blocked inodelks */ +typedef struct _pl_rw_req pl_rw_req_t; + +struct _pl_dom_list { + struct list_head inode_list; /* list_head back to pl_inode_t */ + const char *domain; + struct list_head entrylk_list; /* List of entry locks */ + struct list_head blocked_entrylks; /* List of all blocked entrylks */ + struct list_head inodelk_list; /* List of inode locks */ + struct list_head blocked_inodelks; /* List of all blocked inodelks */ }; -typedef struct __pl_dom_list_t pl_dom_list_t; +typedef struct _pl_dom_list pl_dom_list_t; struct __entry_lock { - struct list_head domain_list; /* list_head back to pl_dom_list_t */ - struct list_head blocked_locks; /* list_head back to blocked_entrylks */ - int ref; + struct list_head domain_list; /* list_head back to pl_dom_list_t */ + struct list_head blocked_locks; /* list_head back to blocked_entrylks */ + struct list_head contend; /* list of contending locks */ + int ref; + + call_frame_t *frame; + xlator_t *this; + struct __pl_inode *pinode; - call_frame_t *frame; - xlator_t *this; - struct __pl_inode *pinode; + const char *volume; - const char *volume; + const char *basename; - const char *basename; - entrylk_type type; + time_t blkd_time; /* time at which lock was queued into blkd list */ + time_t granted_time; /* time at which lock was queued into active list */ - struct timeval blkd_time; /*time at which lock was queued into blkd list*/ - struct timeval granted_time; /*time at which lock was queued into active list*/ + /*last time at which lock contention was detected and notified*/ + struct timespec contention_time; - void *client; - gf_lkowner_t owner; - pid_t client_pid; /* pid of client process */ + void *client; + gf_lkowner_t owner; + pid_t client_pid; /* pid of client process */ - char *connection_id; /* stores the client connection id */ + char *connection_id; /* stores the client connection id */ - struct list_head client_list; /* list of all locks from a client */ + struct list_head client_list; /* list of all locks from a client */ + entrylk_type type; }; typedef struct __entry_lock pl_entry_lock_t; - /* The "simulated" inode. This contains a list of all the locks associated with this file */ struct __pl_inode { - pthread_mutex_t mutex; - - struct list_head dom_list; /* list of domains */ - struct list_head ext_list; /* list of fcntl locks */ - struct list_head rw_list; /* list of waiting r/w requests */ - struct list_head reservelk_list; /* list of reservelks */ - struct list_head blocked_reservelks; /* list of blocked reservelks */ - struct list_head blocked_calls; /* List of blocked lock calls while a reserve is held*/ - int mandatory; /* if mandatory locking is enabled */ - - inode_t *refkeeper; /* hold refs on an inode while locks are - held to prevent pruning */ - uuid_t gfid; /* placeholder for gfid of the inode */ - inode_t *inode; /* pointer to be used for ref and unref - of inode_t as long as there are - locks on it */ + pthread_mutex_t mutex; + + struct list_head dom_list; /* list of domains */ + struct list_head ext_list; /* list of fcntl locks */ + struct list_head rw_list; /* list of waiting r/w requests */ + struct list_head reservelk_list; /* list of reservelks */ + struct list_head blocked_reservelks; /* list of blocked reservelks */ + struct list_head blocked_calls; /* List of blocked lock calls while a + reserve is held*/ + struct list_head metalk_list; /* Meta lock list */ + struct list_head queued_locks; /* This is to store the incoming lock + requests while meta lock is enabled */ + struct list_head waiting; /* List of pending fops waiting to unlink/rmdir + the inode. */ + int mandatory; /* if mandatory locking is enabled */ + + inode_t *refkeeper; /* hold refs on an inode while locks are + held to prevent pruning */ + uuid_t gfid; /* placeholder for gfid of the inode */ + inode_t *inode; /* pointer to be used for ref and unref + of inode_t as long as there are + locks on it */ + gf_boolean_t migrated; + + /* Flag to indicate whether to read mlock-enforce xattr from disk */ + gf_boolean_t check_mlock_info; + + /* Mandatory_lock enforce: IO will be allowed if and only if the lkowner has + held the lock. + + Note: An xattr is set on the file to recover this information post + reboot. If client does not want mandatory lock to be enforced, then it + should remove this xattr explicitly + */ + gf_boolean_t mlock_enforced; + /* There are scenarios where mandatory lock is granted but there are IOs + pending at posix level. To avoid this before preempting the previous lock + owner, we wait for all the fops to be unwound. + */ + int fop_wind_count; + pthread_cond_t check_fop_wind_count; + + gf_boolean_t track_fop_wind_count; + + int32_t links; /* Number of hard links the inode has. */ + uint32_t remove_running; /* Number of remove operations running. */ + gf_boolean_t is_locked; /* Regular locks will be blocked. */ + gf_boolean_t removed; /* The inode has been deleted. */ }; typedef struct __pl_inode pl_inode_t; +struct __pl_metalk { + pthread_mutex_t mutex; + /* For pl_inode meta lock list */ + struct list_head list; + /* For pl_ctx_t list */ + struct list_head client_list; + char *client_uid; + + pl_inode_t *pl_inode; + int ref; +}; +typedef struct __pl_metalk pl_meta_lock_t; typedef struct { - gf_boolean_t mandatory; /* if mandatory locking is enabled */ - gf_boolean_t trace; /* trace lock requests in and out */ - char *brickname; + char *brickname; + uint32_t revocation_secs; + uint32_t revocation_max_blocked; + uint32_t notify_contention_delay; + mlk_mode_t mandatory_mode; /* holds current mandatory locking mode */ + gf_boolean_t trace; /* trace lock requests in and out */ + gf_boolean_t monkey_unlocking; + gf_boolean_t revocation_clear_all; + gf_boolean_t notify_contention; + gf_boolean_t mlock_enforced; } posix_locks_private_t; - typedef struct { - gf_boolean_t entrylk_count_req; - gf_boolean_t inodelk_count_req; - gf_boolean_t posixlk_count_req; - gf_boolean_t parent_entrylk_req; - data_t *inodelk_dom_count_req; - - dict_t *xdata; - /* used by {f,}truncate */ - loc_t loc; - fd_t *fd; - off_t offset; - enum {TRUNCATE, FTRUNCATE} op; + data_t *inodelk_dom_count_req; + + dict_t *xdata; + loc_t loc[2]; + fd_t *fd; + inode_t *inode; + off_t offset; + glusterfs_fop_t op; + gf_boolean_t entrylk_count_req; + gf_boolean_t inodelk_count_req; + gf_boolean_t posixlk_count_req; + gf_boolean_t parent_entrylk_req; + gf_boolean_t multiple_dom_lk_requests; + int update_mlock_enforced_flag; } pl_local_t; - typedef struct { - struct list_head locks_list; + struct list_head locks_list; } pl_fdctx_t; - struct _locker { - struct list_head lockers; - char *volume; - inode_t *inode; - gf_lkowner_t owner; + struct list_head lockers; + char *volume; + inode_t *inode; + gf_lkowner_t owner; }; typedef struct _locks_ctx { - pthread_mutex_t lock; - struct list_head inodelk_lockers; - struct list_head entrylk_lockers; + pthread_mutex_t lock; + struct list_head inodelk_lockers; + struct list_head entrylk_lockers; + struct list_head metalk_list; } pl_ctx_t; +typedef struct _multi_dom_lk_data { + xlator_t *this; + inode_t *inode; + dict_t *xdata_rsp; + gf_boolean_t keep_max; +} multi_dom_lk_data; + +typedef enum { DECREMENT, INCREMENT } pl_count_op_t; pl_ctx_t * -pl_ctx_get (client_t *client, xlator_t *xlator); +pl_ctx_get(client_t *client, xlator_t *xlator); int -pl_inodelk_client_cleanup (xlator_t *this, pl_ctx_t *ctx); +pl_inodelk_client_cleanup(xlator_t *this, pl_ctx_t *ctx); int -pl_entrylk_client_cleanup (xlator_t *this, pl_ctx_t *ctx); +pl_entrylk_client_cleanup(xlator_t *this, pl_ctx_t *ctx); #endif /* __POSIX_LOCKS_H__ */ diff --git a/xlators/features/locks/src/pl-messages.h b/xlators/features/locks/src/pl-messages.h new file mode 100644 index 00000000000..e2d3d7ca974 --- /dev/null +++ b/xlators/features/locks/src/pl-messages.h @@ -0,0 +1,29 @@ +/* + Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _PL_MESSAGES_H_ +#define _PL_MESSAGES_H_ + +#include <glusterfs/glfs-message-id.h> + +/* To add new message IDs, append new identifiers at the end of the list. + * + * Never remove a message ID. If it's not used anymore, you can rename it or + * leave it as it is, but not delete it. This is to prevent reutilization of + * IDs by other messages. + * + * The component name must match one of the entries defined in + * glfs-message-id.h. + */ + +GLFS_MSGID(PL, PL_MSG_LOCK_NUMBER, PL_MSG_INODELK_CONTENTION_FAILED, + PL_MSG_ENTRYLK_CONTENTION_FAILED); + +#endif /* !_PL_MESSAGES_H_ */ diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c index 8d1e67e703c..cf0ae4c57dd 100644 --- a/xlators/features/locks/src/posix.c +++ b/xlators/features/locks/src/posix.c @@ -1,5 +1,5 @@ /* - Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + Copyright (c) 2006-2012, 2016 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. This file is licensed to you under your choice of the GNU Lesser @@ -12,2791 +12,5084 @@ #include <limits.h> #include <pthread.h> -#include "glusterfs.h" -#include "compat.h" -#include "xlator.h" -#include "inode.h" -#include "logging.h" -#include "common-utils.h" +#include <glusterfs/compat.h> +#include <glusterfs/logging.h> #include "locks.h" #include "common.h" -#include "statedump.h" +#include <glusterfs/statedump.h> #include "clear.h" -#include "defaults.h" -#include "syncop.h" +#include <glusterfs/defaults.h> +#include <glusterfs/syncop.h> #ifndef LLONG_MAX #define LLONG_MAX LONG_LONG_MAX /* compat with old gcc */ -#endif /* LLONG_MAX */ +#endif /* LLONG_MAX */ /* Forward declarations */ +void +do_blocked_rw(pl_inode_t *); +static int +__rw_allowable(pl_inode_t *, posix_lock_t *, glusterfs_fop_t); +static int +format_brickname(char *); +int +pl_lockinfo_get_brickname(xlator_t *, inode_t *, int32_t *); +static int +fetch_pathinfo(xlator_t *, inode_t *, int32_t *, char **); -void do_blocked_rw (pl_inode_t *); -static int __rw_allowable (pl_inode_t *, posix_lock_t *, glusterfs_fop_t); -static int format_brickname(char *); -int pl_lockinfo_get_brickname (xlator_t *, inode_t *, int32_t *); -static int fetch_pathinfo(xlator_t *, inode_t *, int32_t *, char **); - -#define PL_STACK_UNWIND(fop, xdata, frame, op_ret, params ...) \ - do { \ - pl_local_t *__local = NULL; \ - inode_t *__parent = NULL; \ - inode_t *__inode = NULL; \ - char *__name = NULL; \ - dict_t *__unref = NULL; \ - \ - __local = frame->local; \ - if (op_ret >= 0 && pl_needs_xdata_response (frame->local)) {\ - if (xdata) \ - dict_ref (xdata); \ - else \ - xdata = dict_new(); \ - if (xdata) { \ - __unref = xdata; \ - pl_get_xdata_rsp_args (__local, \ - #fop, &__parent, &__inode, \ - &__name); \ - pl_set_xdata_response (frame->this, \ - __local, __parent, __inode, __name, \ - xdata); \ - } \ - } \ - frame->local = NULL; \ - STACK_UNWIND_STRICT (fop, frame, op_ret, params); \ - if (__local) { \ - if (__local->inodelk_dom_count_req) \ - data_unref (__local->inodelk_dom_count_req);\ - loc_wipe (&__local->loc); \ - if (__local->fd) \ - fd_unref (__local->fd); \ - mem_put (__local); \ - } \ - if (__unref) \ - dict_unref (__unref); \ - } while (0) - -#define PL_LOCAL_GET_REQUESTS(frame, this, xdata, __fd, __loc) \ - do { \ - if (pl_has_xdata_requests (xdata)) { \ - frame->local = mem_get0 (this->local_pool); \ - pl_local_t *__local = frame->local; \ - if (__local) { \ - if (__fd) \ - __local->fd = fd_ref (__fd); \ - else \ - loc_copy (&__local->loc, __loc);\ - pl_get_xdata_requests (__local, xdata); \ - } \ - } \ - } while (0) +/* + * The client is always requesting data, but older + * servers were not returning it. Newer ones are, so + * the client is receiving a mix of NULL and non-NULL + * xdata in the answers when bricks are of different + * versions. This triggers a bug in older clients. + * To prevent that, we avoid returning extra xdata to + * older clients (making the newer brick to behave as + * an old brick). + */ +#define PL_STACK_UNWIND_FOR_CLIENT(fop, xdata, frame, op_ret, params...) \ + do { \ + pl_local_t *__local = NULL; \ + if (frame->root->client && \ + (frame->root->client->opversion < GD_OP_VERSION_3_10_0)) { \ + __local = frame->local; \ + PL_STACK_UNWIND_AND_FREE(__local, fop, frame, op_ret, params); \ + } else { \ + PL_STACK_UNWIND(fop, xdata, frame, op_ret, params); \ + } \ + } while (0) + +#define PL_STACK_UNWIND(fop, xdata, frame, op_ret, params...) \ + do { \ + pl_local_t *__local = NULL; \ + inode_t *__parent = NULL; \ + inode_t *__inode = NULL; \ + char *__name = NULL; \ + dict_t *__unref = NULL; \ + int __i = 0; \ + __local = frame->local; \ + if (op_ret >= 0 && pl_needs_xdata_response(frame->local)) { \ + if (xdata) \ + dict_ref(xdata); \ + else \ + xdata = dict_new(); \ + if (xdata) { \ + __unref = xdata; \ + while (__local->fd || __local->loc[__i].inode) { \ + pl_get_xdata_rsp_args(__local, #fop, &__parent, &__inode, \ + &__name, __i); \ + pl_set_xdata_response(frame->this, __local, __parent, \ + __inode, __name, xdata, __i > 0); \ + if (__local->fd || __i == 1) \ + break; \ + __i++; \ + } \ + } \ + } \ + PL_STACK_UNWIND_AND_FREE(__local, fop, frame, op_ret, params); \ + if (__unref) \ + dict_unref(__unref); \ + } while (0) + +#define PL_LOCAL_GET_REQUESTS(frame, this, xdata, __fd, __loc, __newloc) \ + do { \ + if (pl_has_xdata_requests(xdata)) { \ + if (!frame->local) \ + frame->local = mem_get0(this->local_pool); \ + pl_local_t *__local = frame->local; \ + if (__local) { \ + if (__fd) { \ + __local->fd = fd_ref(__fd); \ + __local->inode = inode_ref(__fd->inode); \ + } else { \ + if (__loc) \ + loc_copy(&__local->loc[0], __loc); \ + if (__newloc) \ + loc_copy(&__local->loc[1], __newloc); \ + __local->inode = inode_ref(__local->loc[0].inode); \ + } \ + pl_get_xdata_requests(__local, xdata); \ + } \ + } \ + } while (0) + +#define PL_CHECK_LOCK_ENFORCE_KEY(frame, dict, name, this, loc, fd, priv) \ + do { \ + if ((dict && (dict_get(dict, GF_ENFORCE_MANDATORY_LOCK))) || \ + (name && (strcmp(name, GF_ENFORCE_MANDATORY_LOCK) == 0))) { \ + inode_t *__inode = (loc ? loc->inode : fd->inode); \ + pl_inode_t *__pl_inode = pl_inode_get(this, __inode, NULL); \ + if (__pl_inode == NULL) { \ + op_ret = -1; \ + op_errno = ENOMEM; \ + goto unwind; \ + } \ + if (!pl_is_mandatory_locking_enabled(__pl_inode) || \ + !priv->mlock_enforced) { \ + op_ret = -1; \ + gf_msg(this->name, GF_LOG_DEBUG, EINVAL, 0, \ + "option %s would need mandatory lock to be enabled " \ + "and feature.enforce-mandatory-lock option to be set " \ + "to on", \ + GF_ENFORCE_MANDATORY_LOCK); \ + op_errno = EINVAL; \ + goto unwind; \ + } \ + \ + op_ret = pl_local_init(frame, this, loc, fd); \ + if (op_ret) { \ + op_errno = ENOMEM; \ + goto unwind; \ + } \ + \ + ((pl_local_t *)(frame->local))->update_mlock_enforced_flag = 1; \ + } \ + } while (0) + +#define PL_INODE_REMOVE(_fop, _frame, _xl, _loc1, _loc2, _cont, _cbk, \ + _args...) \ + ({ \ + struct list_head contend; \ + pl_inode_t *__pl_inode; \ + call_stub_t *__stub; \ + int32_t __error; \ + INIT_LIST_HEAD(&contend); \ + __error = pl_inode_remove_prepare(_xl, _frame, _loc2 ? _loc2 : _loc1, \ + &__pl_inode, &contend); \ + if (__error < 0) { \ + __stub = fop_##_fop##_stub(_frame, _cont, ##_args); \ + __error = pl_inode_remove_complete(_xl, __pl_inode, __stub, \ + &contend); \ + } else if (__error == 0) { \ + PL_LOCAL_GET_REQUESTS(_frame, _xl, xdata, ((fd_t *)NULL), _loc1, \ + _loc2); \ + STACK_WIND_COOKIE(_frame, _cbk, __pl_inode, FIRST_CHILD(_xl), \ + FIRST_CHILD(_xl)->fops->_fop, ##_args); \ + } \ + __error; \ + }) gf_boolean_t -pl_has_xdata_requests (dict_t *xdata) +pl_has_xdata_requests(dict_t *xdata) { - char *reqs[] = {GLUSTERFS_ENTRYLK_COUNT, GLUSTERFS_INODELK_COUNT, - GLUSTERFS_INODELK_DOM_COUNT, GLUSTERFS_POSIXLK_COUNT, - GLUSTERFS_PARENT_ENTRYLK, NULL}; - int i = 0; + static char *reqs[] = {GLUSTERFS_ENTRYLK_COUNT, + GLUSTERFS_INODELK_COUNT, + GLUSTERFS_INODELK_DOM_COUNT, + GLUSTERFS_POSIXLK_COUNT, + GLUSTERFS_PARENT_ENTRYLK, + GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS, + NULL}; + static int reqs_size[] = {SLEN(GLUSTERFS_ENTRYLK_COUNT), + SLEN(GLUSTERFS_INODELK_COUNT), + SLEN(GLUSTERFS_INODELK_DOM_COUNT), + SLEN(GLUSTERFS_POSIXLK_COUNT), + SLEN(GLUSTERFS_PARENT_ENTRYLK), + SLEN(GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS), + 0}; + int i = 0; + + if (!xdata) + return _gf_false; - if (!xdata) - return _gf_false; + for (i = 0; reqs[i]; i++) + if (dict_getn(xdata, reqs[i], reqs_size[i])) + return _gf_true; - for (i = 0; reqs[i]; i++) - if (dict_get (xdata, reqs[i])) - return _gf_true; + return _gf_false; +} - return _gf_false; +static int +dict_delete_domain_key(dict_t *dict, char *key, data_t *value, void *data) +{ + dict_del(dict, key); + return 0; } void -pl_get_xdata_requests (pl_local_t *local, dict_t *xdata) +pl_get_xdata_requests(pl_local_t *local, dict_t *xdata) { - if (!local || !xdata) - return; - - if (dict_get (xdata, GLUSTERFS_ENTRYLK_COUNT)) { - local->entrylk_count_req = 1; - dict_del (xdata, GLUSTERFS_ENTRYLK_COUNT); - } - if (dict_get (xdata, GLUSTERFS_INODELK_COUNT)) { - local->inodelk_count_req = 1; - dict_del (xdata, GLUSTERFS_INODELK_COUNT); - } - - local->inodelk_dom_count_req = dict_get (xdata, GLUSTERFS_INODELK_DOM_COUNT); - if (local->inodelk_dom_count_req) { - data_ref (local->inodelk_dom_count_req); - dict_del (xdata, GLUSTERFS_INODELK_DOM_COUNT); - } - - if (dict_get (xdata, GLUSTERFS_POSIXLK_COUNT)) { - local->posixlk_count_req = 1; - dict_del (xdata, GLUSTERFS_POSIXLK_COUNT); - } + if (!local || !xdata) + return; - if (dict_get (xdata, GLUSTERFS_PARENT_ENTRYLK)) { - local->parent_entrylk_req = 1; - dict_del (xdata, GLUSTERFS_PARENT_ENTRYLK); - } + GF_ASSERT(local->xdata == NULL); + local->xdata = dict_copy_with_ref(xdata, NULL); + + if (dict_get_sizen(xdata, GLUSTERFS_ENTRYLK_COUNT)) { + local->entrylk_count_req = 1; + dict_del_sizen(xdata, GLUSTERFS_ENTRYLK_COUNT); + } + if (dict_get_sizen(xdata, GLUSTERFS_INODELK_COUNT)) { + local->inodelk_count_req = 1; + dict_del_sizen(xdata, GLUSTERFS_INODELK_COUNT); + } + if (dict_get_sizen(xdata, GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS)) { + local->multiple_dom_lk_requests = 1; + dict_del_sizen(xdata, GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS); + dict_foreach_fnmatch(xdata, GLUSTERFS_INODELK_DOM_PREFIX "*", + dict_delete_domain_key, NULL); + } + + local->inodelk_dom_count_req = dict_get_sizen(xdata, + GLUSTERFS_INODELK_DOM_COUNT); + if (local->inodelk_dom_count_req) { + data_ref(local->inodelk_dom_count_req); + dict_del_sizen(xdata, GLUSTERFS_INODELK_DOM_COUNT); + } + + if (dict_get_sizen(xdata, GLUSTERFS_POSIXLK_COUNT)) { + local->posixlk_count_req = 1; + dict_del_sizen(xdata, GLUSTERFS_POSIXLK_COUNT); + } + + if (dict_get_sizen(xdata, GLUSTERFS_PARENT_ENTRYLK)) { + local->parent_entrylk_req = 1; + dict_del_sizen(xdata, GLUSTERFS_PARENT_ENTRYLK); + } } gf_boolean_t -pl_needs_xdata_response (pl_local_t *local) +pl_needs_xdata_response(pl_local_t *local) { - if (!local) - return _gf_false; - - if (local->parent_entrylk_req) - return _gf_true; - - if (local->entrylk_count_req) - return _gf_true; - - if (local->inodelk_dom_count_req) - return _gf_true; + if (!local) + return _gf_false; - if (local->inodelk_count_req) - return _gf_true; + if (local->parent_entrylk_req || local->entrylk_count_req || + local->inodelk_dom_count_req || local->inodelk_count_req || + local->posixlk_count_req || local->multiple_dom_lk_requests) + return _gf_true; - if (local->posixlk_count_req) - return _gf_true; - return _gf_false; + return _gf_false; } void -pl_get_xdata_rsp_args (pl_local_t *local, char *fop, inode_t **parent, - inode_t **inode, char **name) +pl_get_xdata_rsp_args(pl_local_t *local, char *fop, inode_t **parent, + inode_t **inode, char **name, int i) { - if (strcmp (fop, "lookup") == 0) { - *parent = local->loc.parent; - *inode = local->loc.inode; - *name = (char *)local->loc.name; + if (strcmp(fop, "lookup") == 0) { + *parent = local->loc[0].parent; + *inode = local->loc[0].inode; + *name = (char *)local->loc[0].name; + } else { + if (local->fd) { + *inode = local->fd->inode; } else { - if (local->fd) { - *inode = local->fd->inode; - } else { - *inode = local->loc.parent; - } + *inode = local->loc[i].parent; } + } } -int32_t -__get_posixlk_count (xlator_t *this, pl_inode_t *pl_inode) +static inline int +pl_track_io_fop_count(pl_local_t *local, xlator_t *this, pl_count_op_t op) { - posix_lock_t *lock = NULL; - int32_t count = 0; + pl_inode_t *pl_inode = NULL; + + if (!local) + return -1; - list_for_each_entry (lock, &pl_inode->ext_list, list) { + pl_inode = pl_inode_get(this, local->inode, NULL); + if (!pl_inode) + return -1; - count++; + if (pl_inode->mlock_enforced && pl_inode->track_fop_wind_count) { + pthread_mutex_lock(&pl_inode->mutex); + { + if (op == DECREMENT) { + pl_inode->fop_wind_count--; + /* fop_wind_count can go negative when lock enforcement is + * enabled on unwind path of an IO. Hence the "<" comparision. + */ + if (pl_inode->fop_wind_count <= 0) { + pthread_cond_broadcast(&pl_inode->check_fop_wind_count); + pl_inode->track_fop_wind_count = _gf_false; + pl_inode->fop_wind_count = 0; + } + } else { + pl_inode->fop_wind_count++; + } } + pthread_mutex_unlock(&pl_inode->mutex); + } + + return 0; +} + +static int32_t +__get_posixlk_count(pl_inode_t *pl_inode) +{ + posix_lock_t *lock = NULL; + int32_t count = 0; - return count; + list_for_each_entry(lock, &pl_inode->ext_list, list) { count++; } + + return count; } int32_t -get_posixlk_count (xlator_t *this, inode_t *inode) +get_posixlk_count(xlator_t *this, inode_t *inode) { - pl_inode_t *pl_inode = NULL; - uint64_t tmp_pl_inode = 0; - int ret = 0; - int32_t count = 0; + pl_inode_t *pl_inode = NULL; + uint64_t tmp_pl_inode = 0; + int32_t count = 0; - ret = inode_ctx_get (inode, this, &tmp_pl_inode); - if (ret != 0) { - goto out; - } + int ret = inode_ctx_get(inode, this, &tmp_pl_inode); + if (ret != 0) { + goto out; + } - pl_inode = (pl_inode_t *)(long) tmp_pl_inode; + pl_inode = (pl_inode_t *)(long)tmp_pl_inode; - pthread_mutex_lock (&pl_inode->mutex); - { - count = __get_posixlk_count (this, pl_inode); - } - pthread_mutex_unlock (&pl_inode->mutex); + pthread_mutex_lock(&pl_inode->mutex); + { + count = __get_posixlk_count(pl_inode); + } + pthread_mutex_unlock(&pl_inode->mutex); out: - return count; + return count; } void -pl_parent_entrylk_xattr_fill (xlator_t *this, inode_t *parent, - char *basename, dict_t *dict) +pl_parent_entrylk_xattr_fill(xlator_t *this, inode_t *parent, char *basename, + dict_t *dict, gf_boolean_t keep_max) { - uint32_t entrylk = 0; - int ret = -1; - - if (!parent || !basename || !strlen (basename)) - goto out; - entrylk = check_entrylk_on_basename (this, parent, basename); + int32_t entrylk = 0; + int32_t maxcount = -1; + int ret = -1; + + if (!parent || !basename) + goto out; + if (keep_max) { + ret = dict_get_int32_sizen(dict, GLUSTERFS_PARENT_ENTRYLK, &maxcount); + if (ret < 0) + gf_msg_debug(this->name, 0, " Failed to fetch the value for key %s", + GLUSTERFS_PARENT_ENTRYLK); + } + entrylk = check_entrylk_on_basename(this, parent, basename); + if (maxcount >= entrylk) + return; out: - ret = dict_set_uint32 (dict, GLUSTERFS_PARENT_ENTRYLK, entrylk); - if (ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, - " dict_set failed on key %s", GLUSTERFS_PARENT_ENTRYLK); - } + ret = dict_set_int32_sizen(dict, GLUSTERFS_PARENT_ENTRYLK, entrylk); + if (ret < 0) { + gf_msg_debug(this->name, 0, " dict_set failed on key %s", + GLUSTERFS_PARENT_ENTRYLK); + } } void -pl_entrylk_xattr_fill (xlator_t *this, inode_t *inode, - dict_t *dict) +pl_entrylk_xattr_fill(xlator_t *this, inode_t *inode, dict_t *dict, + gf_boolean_t keep_max) { - int32_t count = 0; - int ret = -1; - - count = get_entrylk_count (this, inode); - ret = dict_set_int32 (dict, GLUSTERFS_ENTRYLK_COUNT, count); - if (ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, - " dict_set failed on key %s", GLUSTERFS_ENTRYLK_COUNT); - } + int32_t count = 0; + int32_t maxcount = -1; + int ret = -1; + + if (keep_max) { + ret = dict_get_int32_sizen(dict, GLUSTERFS_ENTRYLK_COUNT, &maxcount); + if (ret < 0) + gf_msg_debug(this->name, 0, " Failed to fetch the value for key %s", + GLUSTERFS_ENTRYLK_COUNT); + } + count = get_entrylk_count(this, inode); + if (maxcount >= count) + return; + ret = dict_set_int32_sizen(dict, GLUSTERFS_ENTRYLK_COUNT, count); + if (ret < 0) { + gf_msg_debug(this->name, 0, " dict_set failed on key %s", + GLUSTERFS_ENTRYLK_COUNT); + } } void -pl_inodelk_xattr_fill (xlator_t *this, inode_t *inode, dict_t *dict, - char *domname) +pl_inodelk_xattr_fill(xlator_t *this, inode_t *inode, dict_t *dict, + char *domname, gf_boolean_t keep_max) { - int32_t count = 0; - int ret = -1; + int32_t count = 0; + int32_t maxcount = -1; + int ret = -1; + + if (keep_max) { + ret = dict_get_int32_sizen(dict, GLUSTERFS_INODELK_COUNT, &maxcount); + if (ret < 0) + gf_msg_debug(this->name, 0, " Failed to fetch the value for key %s", + GLUSTERFS_INODELK_COUNT); + } + count = get_inodelk_count(this, inode, domname); + if (maxcount >= count) + return; + ret = dict_set_int32_sizen(dict, GLUSTERFS_INODELK_COUNT, count); + if (ret < 0) { + gf_msg_debug(this->name, 0, + "Failed to set count for " + "key %s", + GLUSTERFS_INODELK_COUNT); + } - count = get_inodelk_count (this, inode, domname); + return; +} - ret = dict_set_int32 (dict, GLUSTERFS_INODELK_COUNT, count); - if (ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, "Failed to set count for " - "key %s", GLUSTERFS_INODELK_COUNT); - } +void +pl_posixlk_xattr_fill(xlator_t *this, inode_t *inode, dict_t *dict, + gf_boolean_t keep_max) +{ + int32_t count = 0; + int32_t maxcount = -1; + int ret = -1; + + if (keep_max) { + ret = dict_get_int32_sizen(dict, GLUSTERFS_POSIXLK_COUNT, &maxcount); + if (ret < 0) + gf_msg_debug(this->name, 0, " Failed to fetch the value for key %s", + GLUSTERFS_POSIXLK_COUNT); + } + count = get_posixlk_count(this, inode); + if (maxcount >= count) + return; + ret = dict_set_int32_sizen(dict, GLUSTERFS_POSIXLK_COUNT, count); + if (ret < 0) { + gf_msg_debug(this->name, 0, " dict_set failed on key %s", + GLUSTERFS_POSIXLK_COUNT); + } +} + +void +pl_inodelk_xattr_fill_each(xlator_t *this, inode_t *inode, dict_t *dict, + char *domname, gf_boolean_t keep_max, char *key) +{ + int32_t count = 0; + int32_t maxcount = -1; + int ret = -1; + + if (keep_max) { + ret = dict_get_int32(dict, key, &maxcount); + if (ret < 0) + gf_msg_debug(this->name, 0, " Failed to fetch the value for key %s", + GLUSTERFS_INODELK_COUNT); + } + count = get_inodelk_count(this, inode, domname); + if (maxcount >= count) return; + + ret = dict_set_int32(dict, key, count); + if (ret < 0) { + gf_msg_debug(this->name, 0, + "Failed to set count for " + "key %s", + key); + } + + return; +} + +static int +pl_inodelk_xattr_fill_multiple(dict_t *this, char *key, data_t *value, + void *data) +{ + multi_dom_lk_data *d = data; + char *tmp_key = NULL; + char *save_ptr = NULL; + + tmp_key = gf_strdup(key); + if (!tmp_key) + return -1; + + strtok_r(tmp_key, ":", &save_ptr); + if (!*save_ptr) { + if (tmp_key) + GF_FREE(tmp_key); + gf_msg(THIS->name, GF_LOG_ERROR, 0, EINVAL, + "Could not tokenize domain string from key %s", key); + return -1; + } + + pl_inodelk_xattr_fill_each(d->this, d->inode, d->xdata_rsp, save_ptr, + d->keep_max, key); + if (tmp_key) + GF_FREE(tmp_key); + + return 0; } void -pl_posixlk_xattr_fill (xlator_t *this, inode_t *inode, - dict_t *dict) +pl_fill_multiple_dom_lk_requests(xlator_t *this, pl_local_t *local, + inode_t *inode, dict_t *dict, + gf_boolean_t keep_max) { - int32_t count = 0; - int ret = -1; + multi_dom_lk_data data; - count = get_posixlk_count (this, inode); - ret = dict_set_int32 (dict, GLUSTERFS_POSIXLK_COUNT, count); - if (ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, - " dict_set failed on key %s", GLUSTERFS_POSIXLK_COUNT); - } + data.this = this; + data.inode = inode; + data.xdata_rsp = dict; + data.keep_max = keep_max; + dict_foreach_fnmatch(local->xdata, GLUSTERFS_INODELK_DOM_PREFIX "*", + pl_inodelk_xattr_fill_multiple, &data); } void -pl_set_xdata_response (xlator_t *this, pl_local_t *local, inode_t *parent, - inode_t *inode, char *name, dict_t *xdata) +pl_set_xdata_response(xlator_t *this, pl_local_t *local, inode_t *parent, + inode_t *inode, char *name, dict_t *xdata, + gf_boolean_t max_lock) { - if (!xdata || !local) - return; + if (!xdata || !local) + return; + + if (local->parent_entrylk_req && parent && name && name[0] != '\0') + pl_parent_entrylk_xattr_fill(this, parent, name, xdata, max_lock); - if (local->parent_entrylk_req && parent && name && strlen (name)) - pl_parent_entrylk_xattr_fill (this, parent, name, xdata); + if (!inode) + return; + + if (local->entrylk_count_req) + pl_entrylk_xattr_fill(this, inode, xdata, max_lock); - if (local->entrylk_count_req && inode) - pl_entrylk_xattr_fill (this, inode, xdata); + if (local->inodelk_dom_count_req) + pl_inodelk_xattr_fill(this, inode, xdata, + data_to_str(local->inodelk_dom_count_req), + max_lock); - if (local->inodelk_dom_count_req && inode) - pl_inodelk_xattr_fill (this, inode, xdata, - data_to_str (local->inodelk_dom_count_req)); + if (local->inodelk_count_req) + pl_inodelk_xattr_fill(this, inode, xdata, NULL, max_lock); - if (local->inodelk_count_req && inode) - pl_inodelk_xattr_fill (this, inode, xdata, NULL); + if (local->posixlk_count_req) + pl_posixlk_xattr_fill(this, inode, xdata, max_lock); - if (local->posixlk_count_req && inode) - pl_posixlk_xattr_fill (this, inode, xdata); + if (local->multiple_dom_lk_requests) + pl_fill_multiple_dom_lk_requests(this, local, inode, xdata, max_lock); } -static pl_fdctx_t * -pl_new_fdctx () +/* Checks whether the region where fop is acting upon conflicts + * with existing locks. If there is no conflict function returns + * 1 else returns 0 with can_block boolean set accordingly to + * indicate block/fail the fop. + */ +int +pl_is_fop_allowed(pl_inode_t *pl_inode, posix_lock_t *region, fd_t *fd, + glusterfs_fop_t op, gf_boolean_t *can_block) { - pl_fdctx_t *fdctx = NULL; + int ret = 0; + + if (!__rw_allowable(pl_inode, region, op)) { + if (pl_inode->mlock_enforced) { + *can_block = _gf_false; + } else if ((!fd) || (fd && (fd->flags & O_NONBLOCK))) { + gf_log("locks", GF_LOG_TRACE, + "returning EAGAIN" + " because fd is O_NONBLOCK"); + *can_block = _gf_false; + } else { + *can_block = _gf_true; + } + } else { + ret = 1; + } + + return ret; +} - fdctx = GF_CALLOC (1, sizeof (*fdctx), - gf_locks_mt_pl_fdctx_t); - GF_VALIDATE_OR_GOTO ("posix-locks", fdctx, out); +static pl_fdctx_t * +pl_new_fdctx() +{ + pl_fdctx_t *fdctx = GF_MALLOC(sizeof(*fdctx), gf_locks_mt_pl_fdctx_t); + GF_VALIDATE_OR_GOTO("posix-locks", fdctx, out); - INIT_LIST_HEAD (&fdctx->locks_list); + INIT_LIST_HEAD(&fdctx->locks_list); out: - return fdctx; + return fdctx; } static pl_fdctx_t * -pl_check_n_create_fdctx (xlator_t *this, fd_t *fd) +pl_check_n_create_fdctx(xlator_t *this, fd_t *fd) { - int ret = 0; - uint64_t tmp = 0; - pl_fdctx_t *fdctx = NULL; - - GF_VALIDATE_OR_GOTO ("posix-locks", this, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - - LOCK (&fd->lock); - { - ret = __fd_ctx_get (fd, this, &tmp); - if ((ret != 0) || (tmp == 0)) { - fdctx = pl_new_fdctx (); - if (fdctx == NULL) { - goto unlock; - } - } + int ret = 0; + uint64_t tmp = 0; + pl_fdctx_t *fdctx = NULL; + + GF_VALIDATE_OR_GOTO("posix-locks", this, out); + GF_VALIDATE_OR_GOTO(this->name, fd, out); + + LOCK(&fd->lock); + { + ret = __fd_ctx_get(fd, this, &tmp); + if ((ret != 0) || (tmp == 0)) { + fdctx = pl_new_fdctx(); + if (fdctx == NULL) { + goto unlock; + } + } - ret = __fd_ctx_set (fd, this, (uint64_t)(long)fdctx); - if (ret != 0) { - GF_FREE (fdctx); - fdctx = NULL; - gf_log (this->name, GF_LOG_DEBUG, - "failed to set fd ctx"); - } + ret = __fd_ctx_set(fd, this, (uint64_t)(long)fdctx); + if (ret != 0) { + GF_FREE(fdctx); + fdctx = NULL; + UNLOCK(&fd->lock); + gf_log(this->name, GF_LOG_DEBUG, "failed to set fd ctx"); + goto out; } + } unlock: - UNLOCK (&fd->lock); + UNLOCK(&fd->lock); out: - return fdctx; + return fdctx; +} + +int32_t +pl_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + pl_track_io_fop_count(frame->local, this, DECREMENT); + + PL_STACK_UNWIND(discard, xdata, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + return 0; } int -pl_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) +pl_discard_cont(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) { - pl_local_t *local = NULL; + pl_track_io_fop_count(frame->local, this, INCREMENT); - local = frame->local; + STACK_WIND(frame, pl_discard_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata); + return 0; +} - if (local->op == TRUNCATE) - loc_wipe (&local->loc); +int32_t +pl_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + pl_local_t *local = NULL; + pl_inode_t *pl_inode = NULL; + pl_rw_req_t *rw = NULL; + posix_lock_t region = { + .list = + { + 0, + }, + }; + gf_boolean_t enabled = _gf_false; + gf_boolean_t can_block = _gf_true; + int op_ret = 0; + int op_errno = 0; + int allowed = 1; + + GF_VALIDATE_OR_GOTO("locks", this, unwind); + + local = mem_get0(this->local_pool); + if (!local) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + + frame->local = local; + local->inode = inode_ref(fd->inode); + local->fd = fd_ref(fd); + + pl_inode = pl_inode_get(this, fd->inode, local); + if (!pl_inode) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + + if (frame->root->pid < 0) + enabled = _gf_false; + else + enabled = pl_is_mandatory_locking_enabled(pl_inode); + + if (enabled) { + region.fl_start = offset; + region.fl_end = offset + len - 1; + region.client = frame->root->client; + region.fd_num = fd_to_fdnum(fd); + region.client_pid = frame->root->pid; + region.owner = frame->root->lk_owner; + + pthread_mutex_lock(&pl_inode->mutex); + { + allowed = pl_is_fop_allowed(pl_inode, ®ion, fd, GF_FOP_DISCARD, + &can_block); + if (allowed == 1) { + if (pl_inode->mlock_enforced && + pl_inode->track_fop_wind_count) { + pl_inode->fop_wind_count++; + } + goto unlock; + } else if (!can_block) { + op_errno = EAGAIN; + op_ret = -1; + goto unlock; + } - if (local->xdata) - dict_unref (local->xdata); - if (local->fd) - fd_unref (local->fd); + rw = GF_MALLOC(sizeof(*rw), gf_locks_mt_pl_rw_req_t); + if (!rw) { + op_errno = ENOMEM; + op_ret = -1; + goto unlock; + } - STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, - prebuf, postbuf, xdata); - return 0; + rw->stub = fop_discard_stub(frame, pl_discard_cont, fd, offset, len, + xdata); + if (!rw->stub) { + op_errno = ENOMEM; + op_ret = -1; + GF_FREE(rw); + goto unlock; + } + + rw->region = region; + + list_add_tail(&rw->list, &pl_inode->rw_list); + } + unlock: + pthread_mutex_unlock(&pl_inode->mutex); + } + + if (allowed == 1) + STACK_WIND(frame, pl_discard_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata); +unwind: + if (op_ret == -1) + PL_STACK_UNWIND(discard, xdata, frame, op_ret, op_errno, NULL, NULL, + NULL); + + return 0; } +int32_t +pl_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + pl_track_io_fop_count(frame->local, this, DECREMENT); + + PL_STACK_UNWIND(zerofill, xdata, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + return 0; +} -static int -truncate_allowed (pl_inode_t *pl_inode, - client_t *client, pid_t client_pid, - gf_lkowner_t *owner, off_t offset) +int +pl_zerofill_cont(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + off_t len, dict_t *xdata) { - posix_lock_t *l = NULL; - posix_lock_t region = {.list = {0, }, }; - int ret = 1; + pl_track_io_fop_count(frame->local, this, INCREMENT); - region.fl_start = offset; - region.fl_end = LLONG_MAX; - region.client = client; - region.client_pid = client_pid; - region.owner = *owner; + STACK_WIND(frame, pl_zerofill_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata); + return 0; +} - pthread_mutex_lock (&pl_inode->mutex); +int32_t +pl_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + off_t len, dict_t *xdata) +{ + pl_local_t *local = NULL; + pl_inode_t *pl_inode = NULL; + pl_rw_req_t *rw = NULL; + posix_lock_t region = { + .list = + { + 0, + }, + }; + gf_boolean_t enabled = _gf_false; + gf_boolean_t can_block = _gf_true; + int op_ret = 0; + int op_errno = 0; + int allowed = 1; + + GF_VALIDATE_OR_GOTO("locks", this, unwind); + + local = mem_get0(this->local_pool); + if (!local) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + + frame->local = local; + local->inode = inode_ref(fd->inode); + local->fd = fd_ref(fd); + + pl_inode = pl_inode_get(this, fd->inode, local); + if (!pl_inode) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + + if (frame->root->pid < 0) + enabled = _gf_false; + else + enabled = pl_is_mandatory_locking_enabled(pl_inode); + + if (enabled) { + region.fl_start = offset; + region.fl_end = offset + len - 1; + region.client = frame->root->client; + region.fd_num = fd_to_fdnum(fd); + region.client_pid = frame->root->pid; + region.owner = frame->root->lk_owner; + + pthread_mutex_lock(&pl_inode->mutex); { - list_for_each_entry (l, &pl_inode->ext_list, list) { - if (!l->blocked - && locks_overlap (®ion, l) - && !same_owner (®ion, l)) { - ret = 0; - gf_log ("posix-locks", GF_LOG_TRACE, "Truncate " - "allowed"); - break; - } + allowed = pl_is_fop_allowed(pl_inode, ®ion, fd, GF_FOP_ZEROFILL, + &can_block); + if (allowed == 1) { + if (pl_inode->mlock_enforced && + pl_inode->track_fop_wind_count) { + pl_inode->fop_wind_count++; } + goto unlock; + } else if (!can_block) { + op_errno = EAGAIN; + op_ret = -1; + goto unlock; + } + + rw = GF_MALLOC(sizeof(*rw), gf_locks_mt_pl_rw_req_t); + if (!rw) { + op_errno = ENOMEM; + op_ret = -1; + goto unlock; + } + + rw->stub = fop_zerofill_stub(frame, pl_zerofill_cont, fd, offset, + len, xdata); + if (!rw->stub) { + op_errno = ENOMEM; + op_ret = -1; + GF_FREE(rw); + goto unlock; + } + + rw->region = region; + + list_add_tail(&rw->list, &pl_inode->rw_list); } - pthread_mutex_unlock (&pl_inode->mutex); + unlock: + pthread_mutex_unlock(&pl_inode->mutex); + } - return ret; + if (allowed == 1) + STACK_WIND(frame, pl_zerofill_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata); +unwind: + if (op_ret == -1) + PL_STACK_UNWIND(zerofill, xdata, frame, op_ret, op_errno, NULL, NULL, + NULL); + + return 0; } +int +pl_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + pl_local_t *local = frame->local; -static int -truncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, - dict_t *xdata) + pl_track_io_fop_count(local, this, DECREMENT); + + if (local->op == GF_FOP_TRUNCATE) + PL_STACK_UNWIND(truncate, xdata, frame, op_ret, op_errno, prebuf, + postbuf, xdata); + else + PL_STACK_UNWIND(ftruncate, xdata, frame, op_ret, op_errno, prebuf, + postbuf, xdata); + return 0; +} + +int +pl_ftruncate_cont(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) { - posix_locks_private_t *priv = NULL; - pl_local_t *local = NULL; - inode_t *inode = NULL; - pl_inode_t *pl_inode = NULL; + pl_track_io_fop_count(frame->local, this, INCREMENT); + STACK_WIND(frame, pl_truncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); + return 0; +} - priv = this->private; - local = frame->local; +int +pl_truncate_cont(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) +{ + pl_track_io_fop_count(frame->local, this, INCREMENT); - if (op_ret != 0) { - gf_log (this->name, GF_LOG_ERROR, - "got error (errno=%d, stderror=%s) from child", - op_errno, strerror (op_errno)); - goto unwind; - } + STACK_WIND(frame, pl_truncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); + return 0; +} - if (local->op == TRUNCATE) - inode = local->loc.inode; - else - inode = local->fd->inode; +static int +truncate_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + dict_t *xdata) +{ + pl_local_t *local = frame->local; + inode_t *inode = NULL; + pl_inode_t *pl_inode = NULL; + pl_rw_req_t *rw = NULL; + posix_lock_t region = { + .list = + { + 0, + }, + }; + gf_boolean_t enabled = _gf_false; + gf_boolean_t can_block = _gf_true; + int allowed = 1; + + GF_VALIDATE_OR_GOTO("locks", this, unwind); + + if (op_ret != 0) { + gf_log(this->name, GF_LOG_ERROR, + "got error (errno=%d, stderror=%s) from child", op_errno, + strerror(op_errno)); + goto unwind; + } + + if (local->op == GF_FOP_TRUNCATE) + inode = local->loc[0].inode; + else + inode = local->fd->inode; + + local->inode = inode_ref(inode); + + pl_inode = pl_inode_get(this, inode, local); + if (!pl_inode) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + + if (frame->root->pid < 0) + enabled = _gf_false; + else + enabled = pl_is_mandatory_locking_enabled(pl_inode); + + if (enabled) { + region.fl_start = local->offset; + region.fl_end = LLONG_MAX; + region.client = frame->root->client; + region.fd_num = fd_to_fdnum(local->fd); + region.client_pid = frame->root->pid; + region.owner = frame->root->lk_owner; + pthread_mutex_lock(&pl_inode->mutex); + { + allowed = pl_is_fop_allowed(pl_inode, ®ion, local->fd, local->op, + &can_block); - pl_inode = pl_inode_get (this, inode); - if (!pl_inode) { - op_ret = -1; + if (allowed == 1) { + if (pl_inode->mlock_enforced && + pl_inode->track_fop_wind_count) { + pl_inode->fop_wind_count++; + } + goto unlock; + } else if (!can_block) { + op_errno = EAGAIN; + op_ret = -1; + goto unlock; + } + + rw = GF_MALLOC(sizeof(*rw), gf_locks_mt_pl_rw_req_t); + if (!rw) { op_errno = ENOMEM; - goto unwind; - } + op_ret = -1; + goto unlock; + } + + if (local->op == GF_FOP_TRUNCATE) + rw->stub = fop_truncate_stub(frame, pl_truncate_cont, + &local->loc[0], local->offset, + local->xdata); + else + rw->stub = fop_ftruncate_stub(frame, pl_ftruncate_cont, + local->fd, local->offset, + local->xdata); + if (!rw->stub) { + op_errno = ENOMEM; + op_ret = -1; + GF_FREE(rw); + goto unlock; + } - if (priv->mandatory - && pl_inode->mandatory - && !truncate_allowed (pl_inode, frame->root->client, - frame->root->pid, &frame->root->lk_owner, - local->offset)) { - op_ret = -1; - op_errno = EAGAIN; - goto unwind; + rw->region = region; + + list_add_tail(&rw->list, &pl_inode->rw_list); } + unlock: + pthread_mutex_unlock(&pl_inode->mutex); + } + if (allowed == 1) { switch (local->op) { - case TRUNCATE: - STACK_WIND (frame, pl_truncate_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->truncate, - &local->loc, local->offset, local->xdata); + case GF_FOP_TRUNCATE: + STACK_WIND(frame, pl_truncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, &local->loc[0], + local->offset, local->xdata); + break; + case GF_FOP_FTRUNCATE: + STACK_WIND(frame, pl_truncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ftruncate, local->fd, + local->offset, local->xdata); break; - case FTRUNCATE: - STACK_WIND (frame, pl_truncate_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->ftruncate, - local->fd, local->offset, local->xdata); + default: break; } - - return 0; - + } unwind: - gf_log (this->name, GF_LOG_ERROR, "truncate failed with ret: %d, " - "error: %s", op_ret, strerror (op_errno)); - if (local->op == TRUNCATE) - loc_wipe (&local->loc); - if (local->xdata) - dict_unref (local->xdata); - if (local->fd) - fd_unref (local->fd); - - STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, buf, NULL, xdata); - return 0; -} + if (op_ret == -1) { + gf_log(this ? this->name : "locks", GF_LOG_ERROR, + "truncate failed with " + "ret: %d, error: %s", + op_ret, strerror(op_errno)); + switch (local->op) { + case GF_FOP_TRUNCATE: + PL_STACK_UNWIND(truncate, xdata, frame, op_ret, op_errno, buf, + NULL, xdata); + break; + case GF_FOP_FTRUNCATE: + PL_STACK_UNWIND(ftruncate, xdata, frame, op_ret, op_errno, buf, + NULL, xdata); + break; + default: + break; + } + } + return 0; +} int -pl_truncate (call_frame_t *frame, xlator_t *this, - loc_t *loc, off_t offset, dict_t *xdata) +pl_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) { - pl_local_t *local = NULL; + pl_local_t *local = NULL; + int ret = -1; - local = mem_get0 (this->local_pool); - GF_VALIDATE_OR_GOTO (this->name, local, unwind); + GF_VALIDATE_OR_GOTO("locks", this, unwind); - local->op = TRUNCATE; - local->offset = offset; - loc_copy (&local->loc, loc); - if (xdata) - local->xdata = dict_ref (xdata); + local = mem_get0(this->local_pool); + GF_VALIDATE_OR_GOTO(this->name, local, unwind); - frame->local = local; + local->op = GF_FOP_TRUNCATE; + local->offset = offset; + loc_copy(&local->loc[0], loc); + if (xdata) + local->xdata = dict_ref(xdata); - STACK_WIND (frame, truncate_stat_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->stat, loc, NULL); + frame->local = local; - return 0; + STACK_WIND(frame, truncate_stat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->stat, loc, NULL); + ret = 0; unwind: - gf_log (this->name, GF_LOG_ERROR, "truncate for %s failed with ret: %d, " - "error: %s", loc->path, -1, strerror (ENOMEM)); - STACK_UNWIND_STRICT (truncate, frame, -1, ENOMEM, NULL, NULL, NULL); - - return 0; + if (ret == -1) { + gf_log(this ? this->name : "locks", GF_LOG_ERROR, + "truncate on %s failed with" + " ret: %d, error: %s", + loc->path, -1, strerror(ENOMEM)); + STACK_UNWIND_STRICT(truncate, frame, -1, ENOMEM, NULL, NULL, NULL); + } + return 0; } - int -pl_ftruncate (call_frame_t *frame, xlator_t *this, - fd_t *fd, off_t offset, dict_t *xdata) +pl_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) { - pl_local_t *local = NULL; + pl_local_t *local = NULL; + int ret = -1; - local = mem_get0 (this->local_pool); - GF_VALIDATE_OR_GOTO (this->name, local, unwind); + GF_VALIDATE_OR_GOTO("locks", this, unwind); + local = mem_get0(this->local_pool); + GF_VALIDATE_OR_GOTO(this->name, local, unwind); - local->op = FTRUNCATE; - local->offset = offset; - local->fd = fd_ref (fd); - if (xdata) - local->xdata = dict_ref (xdata); - - frame->local = local; + local->op = GF_FOP_FTRUNCATE; + local->offset = offset; + local->fd = fd_ref(fd); + if (xdata) + local->xdata = dict_ref(xdata); - STACK_WIND (frame, truncate_stat_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fstat, fd, xdata); - return 0; + frame->local = local; + STACK_WIND(frame, truncate_stat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, fd, xdata); + ret = 0; unwind: - gf_log (this->name, GF_LOG_ERROR, "ftruncate failed with ret: %d, " - "error: %s", -1, strerror (ENOMEM)); - STACK_UNWIND_STRICT (ftruncate, frame, -1, ENOMEM, NULL, NULL, NULL); - - return 0; + if (ret == -1) { + gf_log(this ? this->name : "locks", GF_LOG_ERROR, + "ftruncate failed with" + " ret: %d, error: %s", + -1, strerror(ENOMEM)); + STACK_UNWIND_STRICT(ftruncate, frame, -1, ENOMEM, NULL, NULL, NULL); + } + return 0; } int -pl_locks_by_fd (pl_inode_t *pl_inode, fd_t *fd) +pl_locks_by_fd(pl_inode_t *pl_inode, fd_t *fd) { - posix_lock_t *l = NULL; - int found = 0; - - pthread_mutex_lock (&pl_inode->mutex); - { - - list_for_each_entry (l, &pl_inode->ext_list, list) { - if (l->fd_num == fd_to_fdnum(fd)) { - found = 1; - break; - } - } + posix_lock_t *l = NULL; + int found = 0; - } - pthread_mutex_unlock (&pl_inode->mutex); - return found; + pthread_mutex_lock(&pl_inode->mutex); + { + list_for_each_entry(l, &pl_inode->ext_list, list) + { + if (l->fd_num == fd_to_fdnum(fd)) { + found = 1; + break; + } + } + } + pthread_mutex_unlock(&pl_inode->mutex); + return found; } static void -delete_locks_of_fd (xlator_t *this, pl_inode_t *pl_inode, fd_t *fd) +delete_locks_of_fd(xlator_t *this, pl_inode_t *pl_inode, fd_t *fd) { - posix_lock_t *tmp = NULL; - posix_lock_t *l = NULL; - - struct list_head blocked_list; + posix_lock_t *tmp = NULL; + posix_lock_t *l = NULL; - INIT_LIST_HEAD (&blocked_list); + struct list_head blocked_list; - pthread_mutex_lock (&pl_inode->mutex); - { + INIT_LIST_HEAD(&blocked_list); - list_for_each_entry_safe (l, tmp, &pl_inode->ext_list, list) { - if (l->fd_num == fd_to_fdnum(fd)) { - if (l->blocked) { - list_move_tail (&l->list, &blocked_list); - continue; - } - __delete_lock (pl_inode, l); - __destroy_lock (l); - } - } - - } - pthread_mutex_unlock (&pl_inode->mutex); - - list_for_each_entry_safe (l, tmp, &blocked_list, list) { - list_del_init(&l->list); - STACK_UNWIND_STRICT (lk, l->frame, -1, EAGAIN, &l->user_flock, - NULL); - __destroy_lock (l); - } + pthread_mutex_lock(&pl_inode->mutex); + { + list_for_each_entry_safe(l, tmp, &pl_inode->ext_list, list) + { + if (l->fd_num == fd_to_fdnum(fd)) { + if (l->blocked) { + list_move_tail(&l->list, &blocked_list); + continue; + } + __delete_lock(l); + __destroy_lock(l); + } + } + } + pthread_mutex_unlock(&pl_inode->mutex); - grant_blocked_locks (this, pl_inode); + list_for_each_entry_safe(l, tmp, &blocked_list, list) + { + list_del_init(&l->list); + STACK_UNWIND_STRICT(lk, l->frame, -1, EAGAIN, &l->user_flock, NULL); + __destroy_lock(l); + } - do_blocked_rw (pl_inode); + grant_blocked_locks(this, pl_inode); + do_blocked_rw(pl_inode); } static void -__delete_locks_of_owner (pl_inode_t *pl_inode, - client_t *client, gf_lkowner_t *owner) -{ - posix_lock_t *tmp = NULL; - posix_lock_t *l = NULL; - - /* TODO: what if it is a blocked lock with pending l->frame */ - - list_for_each_entry_safe (l, tmp, &pl_inode->ext_list, list) { - if (l->blocked) - continue; - if ((l->client == client) && - is_same_lkowner (&l->owner, owner)) { - gf_log ("posix-locks", GF_LOG_TRACE, - " Flushing lock" - "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" state: %s", - l->fl_type == F_UNLCK ? "Unlock" : "Lock", - l->client_pid, - lkowner_utoa (&l->owner), - l->user_flock.l_start, - l->user_flock.l_len, - l->blocked == 1 ? "Blocked" : "Active"); - - __delete_lock (pl_inode, l); - __destroy_lock (l); - } +__delete_locks_of_owner(pl_inode_t *pl_inode, client_t *client, + gf_lkowner_t *owner) +{ + posix_lock_t *tmp = NULL; + posix_lock_t *l = NULL; + + /* TODO: what if it is a blocked lock with pending l->frame */ + + list_for_each_entry_safe(l, tmp, &pl_inode->ext_list, list) + { + if (l->blocked) + continue; + if ((l->client == client) && is_same_lkowner(&l->owner, owner)) { + gf_log("posix-locks", GF_LOG_TRACE, + " Flushing lock" + "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64 + " state: %s", + l->fl_type == F_UNLCK ? "Unlock" : "Lock", l->client_pid, + lkowner_utoa(&l->owner), l->user_flock.l_start, + l->user_flock.l_len, l->blocked == 1 ? "Blocked" : "Active"); + + __delete_lock(l); + __destroy_lock(l); } + } - return; + return; } - int32_t -pl_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata) +pl_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata) { - STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata); - return 0; - + STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, dict, xdata); + return 0; } -int32_t -pl_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *name, dict_t *xdata) -{ - int32_t op_errno = EINVAL; - int op_ret = -1; - int32_t bcount = 0; - int32_t gcount = 0; - char key[PATH_MAX] = {0, }; - char *lk_summary = NULL; - pl_inode_t *pl_inode = NULL; - dict_t *dict = NULL; - clrlk_args args = {0,}; - char *brickname = NULL; - - if (!name) - goto usual; - - if (strncmp (name, GF_XATTR_CLRLK_CMD, strlen (GF_XATTR_CLRLK_CMD))) - goto usual; - - if (clrlk_parse_args (name, &args)) { - op_errno = EINVAL; - goto out; +static int32_t +pl_getxattr_clrlk(xlator_t *this, const char *name, inode_t *inode, + dict_t **dict, int32_t *op_errno) +{ + int32_t bcount = 0; + int32_t gcount = 0; + char *key = NULL; + char *lk_summary = NULL; + pl_inode_t *pl_inode = NULL; + clrlk_args args = { + 0, + }; + char *brickname = NULL; + int32_t op_ret = -1; + + *op_errno = EINVAL; + + if (clrlk_parse_args(name, &args)) { + *op_errno = EINVAL; + goto out; + } + + *dict = dict_new(); + if (!*dict) { + *op_errno = ENOMEM; + goto out; + } + + pl_inode = pl_inode_get(this, inode, NULL); + if (!pl_inode) { + *op_errno = ENOMEM; + goto out; + } + + switch (args.type) { + case CLRLK_INODE: + case CLRLK_ENTRY: + op_ret = clrlk_clear_lks_in_all_domains(this, pl_inode, &args, + &bcount, &gcount, op_errno); + break; + case CLRLK_POSIX: + op_ret = clrlk_clear_posixlk(this, pl_inode, &args, &bcount, + &gcount, op_errno); + break; + default: + op_ret = -1; + *op_errno = EINVAL; + } + if (op_ret) { + if (args.type >= CLRLK_TYPE_MAX) { + gf_log(this->name, GF_LOG_ERROR, + "clear locks: invalid lock type %d", args.type); + } else { + gf_log(this->name, GF_LOG_ERROR, + "clear locks of type %s failed: %s", + clrlk_type_names[args.type], strerror(*op_errno)); } - dict = dict_new (); - if (!dict) { - op_errno = ENOMEM; - goto out; - } + goto out; + } - pl_inode = pl_inode_get (this, loc->inode); - if (!pl_inode) { - op_errno = ENOMEM; - goto out; + op_ret = fetch_pathinfo(this, inode, op_errno, &brickname); + if (op_ret) { + gf_log(this->name, GF_LOG_WARNING, "Couldn't get brickname"); + } else { + op_ret = format_brickname(brickname); + if (op_ret) { + gf_log(this->name, GF_LOG_WARNING, "Couldn't format brickname"); + GF_FREE(brickname); + brickname = NULL; } + } - switch (args.type) { - case CLRLK_INODE: - case CLRLK_ENTRY: - op_ret = clrlk_clear_lks_in_all_domains (this, pl_inode, - &args, &bcount, - &gcount, - &op_errno); - if (op_ret) - goto out; - break; - case CLRLK_POSIX: - op_ret = clrlk_clear_posixlk (this, pl_inode, &args, - &bcount, &gcount, - &op_errno); - if (op_ret) - goto out; - break; - case CLRLK_TYPE_MAX: - op_errno = EINVAL; - goto out; + if (!gcount && !bcount) { + if (gf_asprintf(&lk_summary, "No locks cleared.") == -1) { + op_ret = -1; + *op_errno = ENOMEM; + goto out; } + } else if (gf_asprintf(&lk_summary, + "%s: %s blocked locks=%d " + "granted locks=%d", + (brickname == NULL) ? this->name : brickname, + clrlk_type_names[args.type], bcount, gcount) == -1) { + op_ret = -1; + *op_errno = ENOMEM; + goto out; + } + gf_log(this->name, GF_LOG_DEBUG, "%s", lk_summary); + + key = gf_strdup(name); + if (!key) { + op_ret = -1; + goto out; + } + if (dict_set_dynstr(*dict, key, lk_summary)) { + op_ret = -1; + *op_errno = ENOMEM; + goto out; + } + + op_ret = 0; - op_ret = fetch_pathinfo (this, loc->inode, &op_errno, &brickname); - if (op_ret) { - gf_log (this->name, GF_LOG_WARNING, - "Couldn't get brickname"); - } else { - op_ret = format_brickname(brickname); - if (op_ret) { - gf_log (this->name, GF_LOG_WARNING, - "Couldn't format brickname"); - GF_FREE(brickname); - brickname = NULL; - } - } +out: + GF_FREE(brickname); + GF_FREE(args.opts); + GF_FREE(key); + if (op_ret) { + GF_FREE(lk_summary); + } + + return op_ret; +} - if (!gcount && !bcount) { - if (gf_asprintf (&lk_summary, "No locks cleared.") == -1) { - op_ret = -1; - op_errno = ENOMEM; - goto out; - } - } else if (gf_asprintf (&lk_summary, "%s: %s blocked locks=%d " - "granted locks=%d", - (brickname == NULL)? this->name : brickname, - (args.type == CLRLK_INODE)? "inode": - (args.type == CLRLK_ENTRY)? "entry": - (args.type == CLRLK_POSIX)? "posix": " ", - bcount, gcount) == -1) { - op_ret = -1; - op_errno = ENOMEM; - goto out; - } +int32_t +pl_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name, + dict_t *xdata) +{ + int32_t op_errno = EINVAL; + int32_t op_ret = -1; + dict_t *dict = NULL; - strncpy (key, name, strlen (name)); - if (dict_set_dynstr (dict, key, lk_summary)) { - op_ret = -1; - op_errno = ENOMEM; - goto out; - } + if (!name) + goto usual; - op_ret = 0; -out: - GF_FREE(brickname); - STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata); + if (strncmp(name, GF_XATTR_CLRLK_CMD, SLEN(GF_XATTR_CLRLK_CMD))) + goto usual; - GF_FREE (args.opts); - if (op_ret && lk_summary) - GF_FREE (lk_summary); - if (dict) - dict_unref (dict); - return 0; + op_ret = pl_getxattr_clrlk(this, name, loc->inode, &dict, &op_errno); + + STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, dict, xdata); + + if (dict) + dict_unref(dict); + return 0; usual: - STACK_WIND (frame, pl_getxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->getxattr, loc, name, xdata); - return 0; + STACK_WIND(frame, pl_getxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->getxattr, loc, name, xdata); + return 0; } static int format_brickname(char *brickname) { - int ret = -1; - char *hostname = NULL; - char *volume = NULL; - char *saveptr = NULL; + int ret = -1; + char *hostname = NULL; + char *volume = NULL; + char *saveptr = NULL; - if (!brickname) - goto out; + if (!brickname) + goto out; - strtok_r(brickname, ":", &saveptr); - hostname = gf_strdup(strtok_r(NULL, ":", &saveptr)); - if (hostname == NULL) - goto out; - volume = gf_strdup(strtok_r(NULL, ".", &saveptr)); - if (volume == NULL) - goto out; + strtok_r(brickname, ":", &saveptr); + hostname = gf_strdup(strtok_r(NULL, ":", &saveptr)); + if (hostname == NULL) + goto out; + volume = gf_strdup(strtok_r(NULL, ".", &saveptr)); + if (volume == NULL) + goto out; - sprintf(brickname, "%s:%s", hostname, volume); + sprintf(brickname, "%s:%s", hostname, volume); - ret = 0; + ret = 0; out: - GF_FREE(hostname); - GF_FREE(volume); - return ret; + GF_FREE(hostname); + GF_FREE(volume); + return ret; } static int -fetch_pathinfo (xlator_t *this, inode_t *inode, int32_t *op_errno, - char **brickname) +fetch_pathinfo(xlator_t *this, inode_t *inode, int32_t *op_errno, + char **brickname) { - int ret = -1; - loc_t loc = {0, }; - dict_t *dict = NULL; - - if (!brickname) - goto out; - - if (!op_errno) - goto out; - - gf_uuid_copy (loc.gfid, inode->gfid); - loc.inode = inode_ref (inode); - - ret = syncop_getxattr (FIRST_CHILD(this), &loc, &dict, - GF_XATTR_PATHINFO_KEY, NULL, NULL); - if (ret < 0) { - *op_errno = -ret; - ret = -1; - goto out; - } - - ret = dict_get_str (dict, GF_XATTR_PATHINFO_KEY, brickname); - if (ret) - goto out; - - *brickname = gf_strdup(*brickname); - if (*brickname == NULL) { - ret = -1; - goto out; - } - - ret = 0; + int ret = -1; + loc_t loc = { + 0, + }; + dict_t *dict = NULL; + + if (!brickname) + goto out; + + if (!op_errno) + goto out; + + gf_uuid_copy(loc.gfid, inode->gfid); + loc.inode = inode_ref(inode); + + ret = syncop_getxattr(FIRST_CHILD(this), &loc, &dict, GF_XATTR_PATHINFO_KEY, + NULL, NULL); + if (ret < 0) { + *op_errno = -ret; + ret = -1; + goto out; + } + + ret = dict_get_str_sizen(dict, GF_XATTR_PATHINFO_KEY, brickname); + if (ret) + goto out; + + *brickname = gf_strdup(*brickname); + if (*brickname == NULL) { + ret = -1; + goto out; + } + + ret = 0; out: - if (dict != NULL) { - dict_unref (dict); - } - loc_wipe(&loc); + if (dict != NULL) { + dict_unref(dict); + } + loc_wipe(&loc); - return ret; + return ret; } - int -pl_lockinfo_get_brickname (xlator_t *this, inode_t *inode, int32_t *op_errno) +pl_lockinfo_get_brickname(xlator_t *this, inode_t *inode, int32_t *op_errno) { - int ret = -1; - posix_locks_private_t *priv = NULL; - char *brickname = NULL; - char *end = NULL; - char *tmp = NULL; - - priv = this->private; + posix_locks_private_t *priv = this->private; + char *brickname = NULL; + char *end = NULL; + char *tmp = NULL; - ret = fetch_pathinfo (this, inode, op_errno, &brickname); - if (ret) - goto out; - - end = strrchr (brickname, ':'); - if (!end) { - GF_FREE(brickname); - ret = -1; - goto out; - } - - tmp = brickname; - brickname = gf_strndup (brickname, (end - brickname)); - if (brickname == NULL) { - ret = -1; - goto out; - } + int ret = fetch_pathinfo(this, inode, op_errno, &brickname); + if (ret) + goto out; - priv->brickname = brickname; - ret = 0; + end = strrchr(brickname, ':'); + if (!end) { + GF_FREE(brickname); + ret = -1; + goto out; + } + + tmp = brickname; + brickname = gf_strndup(brickname, (end - brickname)); + if (brickname == NULL) { + ret = -1; + goto out; + } + + priv->brickname = brickname; + ret = 0; out: - GF_FREE(tmp); - return ret; + GF_FREE(tmp); + return ret; } char * -pl_lockinfo_key (xlator_t *this, inode_t *inode, int32_t *op_errno) +pl_lockinfo_key(xlator_t *this, inode_t *inode, int32_t *op_errno) { - posix_locks_private_t *priv = NULL; - char *key = NULL; - int ret = 0; + posix_locks_private_t *priv = this->private; + char *key = NULL; + int ret = 0; - priv = this->private; - - if (priv->brickname == NULL) { - ret = pl_lockinfo_get_brickname (this, inode, op_errno); - if (ret < 0) { - gf_log (this->name, GF_LOG_WARNING, - "cannot get brickname"); - goto out; - } + if (priv->brickname == NULL) { + ret = pl_lockinfo_get_brickname(this, inode, op_errno); + if (ret < 0) { + gf_log(this->name, GF_LOG_WARNING, "cannot get brickname"); + goto out; } + } - key = priv->brickname; + key = priv->brickname; out: - return key; + return key; } int32_t -pl_fgetxattr_handle_lockinfo (xlator_t *this, fd_t *fd, - dict_t *dict, int32_t *op_errno) +pl_fgetxattr_handle_lockinfo(xlator_t *this, fd_t *fd, dict_t *dict, + int32_t *op_errno) { - pl_inode_t *pl_inode = NULL; - char *key = NULL, *buf = NULL; - int32_t op_ret = 0; - unsigned long fdnum = 0; - int32_t len = 0; - dict_t *tmp = NULL; - - pl_inode = pl_inode_get (this, fd->inode); - - if (!pl_inode) { - gf_log (this->name, GF_LOG_DEBUG, "Could not get inode."); - *op_errno = EBADFD; - op_ret = -1; - goto out; - } - - if (!pl_locks_by_fd (pl_inode, fd)) { - op_ret = 0; - goto out; - } - - fdnum = fd_to_fdnum (fd); - - key = pl_lockinfo_key (this, fd->inode, op_errno); - if (key == NULL) { - op_ret = -1; - goto out; - } - - tmp = dict_new (); - if (tmp == NULL) { - op_ret = -1; - *op_errno = ENOMEM; - goto out; - } - - op_ret = dict_set_uint64 (tmp, key, fdnum); - if (op_ret < 0) { - *op_errno = -op_ret; - op_ret = -1; - gf_log (this->name, GF_LOG_WARNING, "setting lockinfo value " - "(%lu) for fd (ptr:%p inode-gfid:%s) failed (%s)", - fdnum, fd, uuid_utoa (fd->inode->gfid), - strerror (*op_errno)); - goto out; - } - - len = dict_serialized_length (tmp); - if (len < 0) { - *op_errno = -op_ret; - op_ret = -1; - gf_log (this->name, GF_LOG_WARNING, - "dict_serialized_length failed (%s) while handling " - "lockinfo for fd (ptr:%p inode-gfid:%s)", - strerror (*op_errno), fd, uuid_utoa (fd->inode->gfid)); - goto out; - } - - buf = GF_CALLOC (1, len, gf_common_mt_char); - if (buf == NULL) { - op_ret = -1; - *op_errno = ENOMEM; - goto out; - } - - op_ret = dict_serialize (tmp, buf); - if (op_ret < 0) { - *op_errno = -op_ret; - op_ret = -1; - gf_log (this->name, GF_LOG_WARNING, - "dict_serialize failed (%s) while handling lockinfo " - "for fd (ptr: %p inode-gfid:%s)", strerror (*op_errno), - fd, uuid_utoa (fd->inode->gfid)); - goto out; - } - - op_ret = dict_set_dynptr (dict, GF_XATTR_LOCKINFO_KEY, buf, len); - if (op_ret < 0) { - *op_errno = -op_ret; - op_ret = -1; - gf_log (this->name, GF_LOG_WARNING, "setting lockinfo value " - "(%lu) for fd (ptr:%p inode-gfid:%s) failed (%s)", - fdnum, fd, uuid_utoa (fd->inode->gfid), - strerror (*op_errno)); - goto out; - } - - buf = NULL; + char *key = NULL, *buf = NULL; + int32_t op_ret = 0; + unsigned long fdnum = 0; + int32_t len = 0; + dict_t *tmp = NULL; + + pl_inode_t *pl_inode = pl_inode_get(this, fd->inode, NULL); + + if (!pl_inode) { + gf_log(this->name, GF_LOG_DEBUG, "Could not get inode."); + *op_errno = EBADFD; + op_ret = -1; + goto out; + } + + if (!pl_locks_by_fd(pl_inode, fd)) { + op_ret = 0; + goto out; + } + + fdnum = fd_to_fdnum(fd); + + key = pl_lockinfo_key(this, fd->inode, op_errno); + if (key == NULL) { + op_ret = -1; + goto out; + } + + tmp = dict_new(); + if (tmp == NULL) { + op_ret = -1; + *op_errno = ENOMEM; + goto out; + } + + op_ret = dict_set_uint64(tmp, key, fdnum); + if (op_ret < 0) { + *op_errno = -op_ret; + op_ret = -1; + gf_log(this->name, GF_LOG_WARNING, + "setting lockinfo value " + "(%lu) for fd (ptr:%p inode-gfid:%s) failed (%s)", + fdnum, fd, uuid_utoa(fd->inode->gfid), strerror(*op_errno)); + goto out; + } + + op_ret = dict_allocate_and_serialize(tmp, (char **)&buf, + (unsigned int *)&len); + if (op_ret != 0) { + *op_errno = -op_ret; + op_ret = -1; + gf_log(this->name, GF_LOG_WARNING, + "dict_serialized_length failed (%s) while handling " + "lockinfo for fd (ptr:%p inode-gfid:%s)", + strerror(*op_errno), fd, uuid_utoa(fd->inode->gfid)); + goto out; + } + + op_ret = dict_set_dynptr(dict, GF_XATTR_LOCKINFO_KEY, buf, len); + if (op_ret < 0) { + *op_errno = -op_ret; + op_ret = -1; + gf_log(this->name, GF_LOG_WARNING, + "setting lockinfo value " + "(%lu) for fd (ptr:%p inode-gfid:%s) failed (%s)", + fdnum, fd, uuid_utoa(fd->inode->gfid), strerror(*op_errno)); + goto out; + } + + buf = NULL; out: - if (tmp != NULL) { - dict_unref (tmp); - } + if (tmp != NULL) { + dict_unref(tmp); + } - if (buf != NULL) { - GF_FREE (buf); - } + if (buf != NULL) { + GF_FREE(buf); + } - return op_ret; + return op_ret; } - int32_t -pl_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, - const char *name, dict_t *xdata) +pl_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, + dict_t *xdata) { - int32_t op_ret = 0, op_errno = 0; - dict_t *dict = NULL; - - if (!name) { - goto usual; + int32_t op_ret = 0, op_errno = 0; + dict_t *dict = NULL; + + if (!name) { + goto usual; + } + + if (strcmp(name, GF_XATTR_LOCKINFO_KEY) == 0) { + dict = dict_new(); + if (dict == NULL) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; } - if (strcmp (name, GF_XATTR_LOCKINFO_KEY) == 0) { - dict = dict_new (); - if (dict == NULL) { - op_ret = -1; - op_errno = ENOMEM; - goto unwind; - } + op_ret = pl_fgetxattr_handle_lockinfo(this, fd, dict, &op_errno); + if (op_ret < 0) { + gf_log(this->name, GF_LOG_WARNING, + "getting lockinfo on fd (ptr:%p inode-gfid:%s) " + "failed (%s)", + fd, uuid_utoa(fd->inode->gfid), strerror(op_errno)); + } - op_ret = pl_fgetxattr_handle_lockinfo (this, fd, dict, - &op_errno); - if (op_ret < 0) { - gf_log (this->name, GF_LOG_WARNING, - "getting lockinfo on fd (ptr:%p inode-gfid:%s) " - "failed (%s)", fd, uuid_utoa (fd->inode->gfid), - strerror (op_errno)); - } + goto unwind; + } else if (strncmp(name, GF_XATTR_CLRLK_CMD, SLEN(GF_XATTR_CLRLK_CMD)) == + 0) { + op_ret = pl_getxattr_clrlk(this, name, fd->inode, &dict, &op_errno); - goto unwind; - } else { - goto usual; - } + goto unwind; + } else { + goto usual; + } unwind: - STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, NULL); - if (dict != NULL) { - dict_unref (dict); - } + STACK_UNWIND_STRICT(fgetxattr, frame, op_ret, op_errno, dict, NULL); + if (dict != NULL) { + dict_unref(dict); + } - return 0; + return 0; usual: - STACK_WIND (frame, default_fgetxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata); - return 0; + STACK_WIND(frame, default_fgetxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata); + return 0; } int32_t -pl_migrate_locks (call_frame_t *frame, fd_t *newfd, uint64_t oldfd_num, - int32_t *op_errno) +pl_migrate_locks(call_frame_t *frame, fd_t *newfd, uint64_t oldfd_num, + int32_t *op_errno) { - pl_inode_t *pl_inode = NULL; - uint64_t newfd_num = 0; - posix_lock_t *l = NULL; - int32_t op_ret = 0; - - newfd_num = fd_to_fdnum (newfd); - - pl_inode = pl_inode_get (frame->this, newfd->inode); - if (pl_inode == NULL) { - op_ret = -1; - *op_errno = EBADFD; - goto out; - } - - pthread_mutex_lock (&pl_inode->mutex); + posix_lock_t *l = NULL; + int32_t op_ret = 0; + uint64_t newfd_num = fd_to_fdnum(newfd); + + pl_inode_t *pl_inode = pl_inode_get(frame->this, newfd->inode, NULL); + if (pl_inode == NULL) { + op_ret = -1; + *op_errno = EBADFD; + goto out; + } + + pthread_mutex_lock(&pl_inode->mutex); + { + list_for_each_entry(l, &pl_inode->ext_list, list) { - list_for_each_entry (l, &pl_inode->ext_list, list) { - if (l->fd_num == oldfd_num) { - l->fd_num = newfd_num; - l->client = frame->root->client; - } - } + if (l->fd_num == oldfd_num) { + l->fd_num = newfd_num; + l->client = frame->root->client; + } } - pthread_mutex_unlock (&pl_inode->mutex); + } + pthread_mutex_unlock(&pl_inode->mutex); - op_ret = 0; + op_ret = 0; out: - return op_ret; + return op_ret; } int32_t -pl_fsetxattr_handle_lockinfo (call_frame_t *frame, fd_t *fd, char *lockinfo_buf, - int len, int32_t *op_errno) +pl_fsetxattr_handle_lockinfo(call_frame_t *frame, fd_t *fd, char *lockinfo_buf, + int len, int32_t *op_errno) { - int32_t op_ret = -1; - dict_t *lockinfo = NULL; - uint64_t oldfd_num = 0; - char *key = NULL; - - lockinfo = dict_new (); - if (lockinfo == NULL) { - op_ret = -1; - *op_errno = ENOMEM; - goto out; - } + int32_t op_ret = -1; + uint64_t oldfd_num = 0; + char *key = NULL; + + dict_t *lockinfo = dict_new(); + if (lockinfo == NULL) { + op_ret = -1; + *op_errno = ENOMEM; + goto out; + } + + op_ret = dict_unserialize(lockinfo_buf, len, &lockinfo); + if (op_ret < 0) { + *op_errno = -op_ret; + op_ret = -1; + goto out; + } + + key = pl_lockinfo_key(frame->this, fd->inode, op_errno); + if (key == NULL) { + op_ret = -1; + goto out; + } + + op_ret = dict_get_uint64(lockinfo, key, &oldfd_num); + + if (oldfd_num == 0) { + op_ret = 0; + goto out; + } + + op_ret = pl_migrate_locks(frame, fd, oldfd_num, op_errno); + if (op_ret < 0) { + gf_log(frame->this->name, GF_LOG_WARNING, + "migration of locks from oldfd (ptr:%p) to newfd " + "(ptr:%p) (inode-gfid:%s)", + (void *)(uintptr_t)oldfd_num, fd, uuid_utoa(fd->inode->gfid)); + goto out; + } - op_ret = dict_unserialize (lockinfo_buf, len, &lockinfo); - if (op_ret < 0) { - *op_errno = -op_ret; - op_ret = -1; - goto out; - } +out: + dict_unref(lockinfo); - key = pl_lockinfo_key (frame->this, fd->inode, op_errno); - if (key == NULL) { - op_ret = -1; - goto out; - } + return op_ret; +} - op_ret = dict_get_uint64 (lockinfo, key, &oldfd_num); +int32_t +pl_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + pl_local_t *local = NULL; + pl_inode_t *pl_inode = NULL; - if (oldfd_num == 0) { - op_ret = 0; - goto out; + local = frame->local; + if (local && local->update_mlock_enforced_flag && op_ret != -1) { + pl_inode = pl_inode_get(this, local->inode, NULL); + if (!pl_inode) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; } - op_ret = pl_migrate_locks (frame, fd, oldfd_num, op_errno); - if (op_ret < 0) { - gf_log (frame->this->name, GF_LOG_WARNING, - "migration of locks from oldfd (ptr:%p) to newfd " - "(ptr:%p) (inode-gfid:%s)", (void *)oldfd_num, fd, - uuid_utoa (fd->inode->gfid)); - goto out; + pthread_mutex_lock(&pl_inode->mutex); + { + pl_inode->mlock_enforced = _gf_true; + pl_inode->check_mlock_info = _gf_false; } + pthread_mutex_unlock(&pl_inode->mutex); + } -out: - dict_unref (lockinfo); - - return op_ret; +unwind: + PL_STACK_UNWIND_FOR_CLIENT(fsetxattr, xdata, frame, op_ret, op_errno, + xdata); + return 0; } int32_t -pl_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, - int32_t flags, dict_t *xdata) +pl_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, + int32_t flags, dict_t *xdata) { - int32_t op_ret = 0, op_errno = 0; - void *lockinfo_buf = NULL; - int len = 0; + int32_t op_errno = 0; + void *lockinfo_buf = NULL; + int len = 0; + char *name = NULL; + posix_locks_private_t *priv = this->private; + + int32_t op_ret = dict_get_ptr_and_len(dict, GF_XATTR_LOCKINFO_KEY, + &lockinfo_buf, &len); + if (lockinfo_buf == NULL) { + goto usual; + } + + op_ret = pl_fsetxattr_handle_lockinfo(frame, fd, lockinfo_buf, len, + &op_errno); + if (op_ret < 0) { + goto unwind; + } - op_ret = dict_get_ptr_and_len (dict, GF_XATTR_LOCKINFO_KEY, - &lockinfo_buf, &len); - if (lockinfo_buf == NULL) { - goto usual; - } +usual: + PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL); - op_ret = pl_fsetxattr_handle_lockinfo (frame, fd, lockinfo_buf, len, - &op_errno); - if (op_ret < 0) { - goto unwind; - } + PL_CHECK_LOCK_ENFORCE_KEY(frame, dict, name, this, ((loc_t *)NULL), fd, + priv); -usual: - STACK_WIND (frame, default_fsetxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata); - return 0; + STACK_WIND(frame, pl_fsetxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata); + return 0; unwind: - STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, NULL); - return 0; + PL_STACK_UNWIND_FOR_CLIENT(fsetxattr, xdata, frame, op_ret, op_errno, NULL); + + return 0; } int32_t -pl_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) +pl_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) { - pl_fdctx_t *fdctx = NULL; + pl_fdctx_t *fdctx = NULL; - if (op_ret < 0) - goto unwind; + if (op_ret < 0) + goto unwind; - fdctx = pl_check_n_create_fdctx (this, fd); - if (!fdctx) { - op_errno = ENOMEM; - op_ret = -1; - goto unwind; - } + fdctx = pl_check_n_create_fdctx(this, fd); + if (!fdctx) { + op_errno = ENOMEM; + op_ret = -1; + goto unwind; + } unwind: - PL_STACK_UNWIND (opendir, xdata, frame, op_ret, op_errno, fd, xdata); + PL_STACK_UNWIND(opendir, xdata, frame, op_ret, op_errno, fd, xdata); - return 0; + return 0; } int32_t -pl_opendir (call_frame_t *frame, xlator_t *this, - loc_t *loc, fd_t *fd, dict_t *xdata) +pl_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, + dict_t *xdata) { - PL_LOCAL_GET_REQUESTS (frame, this, xdata, fd, NULL); - STACK_WIND (frame, pl_opendir_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->opendir, loc, fd, xdata); - return 0; + PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL); + STACK_WIND(frame, pl_opendir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->opendir, loc, fd, xdata); + return 0; } int -pl_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +pl_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *xdata) { - STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, xdata); + PL_STACK_UNWIND_FOR_CLIENT(flush, xdata, frame, op_ret, op_errno, xdata); - return 0; + return 0; } - int -pl_flush (call_frame_t *frame, xlator_t *this, - fd_t *fd, dict_t *xdata) +pl_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { - pl_inode_t *pl_inode = NULL; - - pl_inode = pl_inode_get (this, fd->inode); - - if (!pl_inode) { - gf_log (this->name, GF_LOG_DEBUG, "Could not get inode."); - STACK_UNWIND_STRICT (flush, frame, -1, EBADFD, NULL); - return 0; - } - - pl_trace_flush (this, frame, fd); - - if (frame->root->lk_owner.len == 0) { - /* Handle special case when protocol/server sets lk-owner to zero. - * This usually happens due to a client disconnection. Hence, free - * all locks opened with this fd. - */ - gf_log (this->name, GF_LOG_TRACE, - "Releasing all locks with fd %p", fd); - delete_locks_of_fd (this, pl_inode, fd); - goto wind; - - } - pthread_mutex_lock (&pl_inode->mutex); - { - __delete_locks_of_owner (pl_inode, frame->root->client, - &frame->root->lk_owner); + pl_inode_t *pl_inode = pl_inode_get(this, fd->inode, NULL); + if (!pl_inode) { + gf_log(this->name, GF_LOG_DEBUG, "Could not get inode."); + STACK_UNWIND_STRICT(flush, frame, -1, EBADFD, NULL); + return 0; + } + + pthread_mutex_lock(&pl_inode->mutex); + { + if (pl_inode->migrated) { + pthread_mutex_unlock(&pl_inode->mutex); + STACK_UNWIND_STRICT(flush, frame, -1, EREMOTE, NULL); + return 0; } - pthread_mutex_unlock (&pl_inode->mutex); - - grant_blocked_locks (this, pl_inode); - - do_blocked_rw (pl_inode); + } + pthread_mutex_unlock(&pl_inode->mutex); + + pl_trace_flush(this, frame, fd); + + if (frame->root->lk_owner.len == 0) { + /* Handle special case when protocol/server sets lk-owner to zero. + * This usually happens due to a client disconnection. Hence, free + * all locks opened with this fd. + */ + gf_log(this->name, GF_LOG_TRACE, "Releasing all locks with fd %p", fd); + delete_locks_of_fd(this, pl_inode, fd); + goto wind; + } + pthread_mutex_lock(&pl_inode->mutex); + { + __delete_locks_of_owner(pl_inode, frame->root->client, + &frame->root->lk_owner); + } + pthread_mutex_unlock(&pl_inode->mutex); + + grant_blocked_locks(this, pl_inode); + + do_blocked_rw(pl_inode); wind: - STACK_WIND (frame, pl_flush_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->flush, fd, xdata); - return 0; + PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL); + STACK_WIND(frame, pl_flush_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->flush, fd, xdata); + return 0; } - int -pl_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) +pl_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, fd_t *fd, dict_t *xdata) { - pl_fdctx_t *fdctx = NULL; + pl_fdctx_t *fdctx = NULL; - if (op_ret < 0) - goto unwind; + if (op_ret < 0) + goto unwind; - fdctx = pl_check_n_create_fdctx (this, fd); - if (!fdctx) { - op_errno = ENOMEM; - op_ret = -1; - goto unwind; - } + fdctx = pl_check_n_create_fdctx(this, fd); + if (!fdctx) { + op_errno = ENOMEM; + op_ret = -1; + goto unwind; + } unwind: - STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata); + STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, xdata); - return 0; + return 0; } int -pl_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - fd_t *fd, dict_t *xdata) +pl_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + fd_t *fd, dict_t *xdata) { - STACK_WIND (frame, pl_open_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->open, - loc, flags, fd, xdata); + int op_ret = -1; + int op_errno = EINVAL; + pl_inode_t *pl_inode = NULL; + posix_lock_t *l = NULL; + posix_locks_private_t *priv = this->private; + + GF_VALIDATE_OR_GOTO("locks", this, unwind); + + op_ret = 0, op_errno = 0; + pl_inode = pl_inode_get(this, fd->inode, NULL); + if (!pl_inode) { + gf_msg(this->name, GF_LOG_ERROR, 0, ENOMEM, "Could not get inode"); + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + + /* As per design, under forced and file-based mandatory locking modes + * it doesn't matter whether inodes's lock list contain advisory or + * mandatory type locks. So we just check whether inode's lock list is + * empty or not to make sure that no locks are being held for the file. + * Whereas under optimal mandatory locking mode, we strictly fail open + * if and only if lock list contain mandatory locks. + */ + if (((priv->mandatory_mode == MLK_FILE_BASED) && pl_inode->mandatory) || + priv->mandatory_mode == MLK_FORCED) { + if (fd->flags & O_TRUNC) { + pthread_mutex_lock(&pl_inode->mutex); + { + if (!list_empty(&pl_inode->ext_list)) { + op_ret = -1; + op_errno = EAGAIN; + } + } + pthread_mutex_unlock(&pl_inode->mutex); + } + } else if (priv->mandatory_mode == MLK_OPTIMAL) { + if (fd->flags & O_TRUNC) { + pthread_mutex_lock(&pl_inode->mutex); + { + list_for_each_entry(l, &pl_inode->ext_list, list) + { + if ((l->lk_flags & GF_LK_MANDATORY)) { + op_ret = -1; + op_errno = EAGAIN; + break; + } + } + } + pthread_mutex_unlock(&pl_inode->mutex); + } + } - return 0; +unwind: + if (op_ret == -1) + STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, NULL, NULL); + else + STACK_WIND(frame, pl_open_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); + return 0; } - int -pl_create_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - fd_t *fd, inode_t *inode, struct iatt *buf, - struct iatt *preparent, struct iatt *postparent, dict_t *xdata) +pl_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, fd_t *fd, inode_t *inode, struct iatt *buf, + struct iatt *preparent, struct iatt *postparent, dict_t *xdata) { - pl_fdctx_t *fdctx = NULL; + pl_fdctx_t *fdctx = NULL; - if (op_ret < 0) - goto unwind; + if (op_ret < 0) + goto unwind; - fdctx = pl_check_n_create_fdctx (this, fd); - if (!fdctx) { - op_errno = ENOMEM; - op_ret = -1; - goto unwind; - } + fdctx = pl_check_n_create_fdctx(this, fd); + if (!fdctx) { + op_errno = ENOMEM; + op_ret = -1; + goto unwind; + } unwind: - PL_STACK_UNWIND (create, xdata, frame, op_ret, op_errno, fd, inode, buf, - preparent, postparent, xdata); + PL_STACK_UNWIND(create, xdata, frame, op_ret, op_errno, fd, inode, buf, + preparent, postparent, xdata); - return 0; + return 0; } - int -pl_create (call_frame_t *frame, xlator_t *this, - loc_t *loc, int32_t flags, mode_t mode, mode_t umask, fd_t *fd, - dict_t *xdata) +pl_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) { - PL_LOCAL_GET_REQUESTS (frame, this, xdata, NULL, loc); - STACK_WIND (frame, pl_create_cbk, - FIRST_CHILD (this), FIRST_CHILD (this)->fops->create, - loc, flags, mode, umask, fd, xdata); - return 0; -} - -int32_t -pl_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - PL_STACK_UNWIND (unlink, xdata, frame, op_ret, op_errno, preparent, - postparent, xdata); - return 0; -} + PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL); -int32_t -pl_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, - dict_t *xdata) -{ - PL_LOCAL_GET_REQUESTS (frame, this, xdata, NULL, loc); - STACK_WIND (frame, pl_unlink_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); - return 0; + STACK_WIND(frame, pl_create_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd, + xdata); + return 0; } int -pl_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iovec *vector, int32_t count, struct iatt *stbuf, - struct iobref *iobref, dict_t *xdata) +pl_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iovec *vector, int32_t count, + struct iatt *stbuf, struct iobref *iobref, dict_t *xdata) { - PL_STACK_UNWIND (readv, xdata, frame, op_ret, op_errno, - vector, count, stbuf, iobref, xdata); + pl_track_io_fop_count(frame->local, this, DECREMENT); - return 0; + PL_STACK_UNWIND(readv, xdata, frame, op_ret, op_errno, vector, count, stbuf, + iobref, xdata); + + return 0; } int -pl_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) +pl_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf, + dict_t *xdata) { - PL_STACK_UNWIND (writev, xdata, frame, op_ret, op_errno, prebuf, - postbuf, xdata); + pl_track_io_fop_count(frame->local, this, DECREMENT); - return 0; + PL_STACK_UNWIND(writev, xdata, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + + return 0; } void -do_blocked_rw (pl_inode_t *pl_inode) +do_blocked_rw(pl_inode_t *pl_inode) { - struct list_head wind_list; - pl_rw_req_t *rw = NULL; - pl_rw_req_t *tmp = NULL; + struct list_head wind_list; + pl_rw_req_t *rw = NULL; + pl_rw_req_t *tmp = NULL; - INIT_LIST_HEAD (&wind_list); + INIT_LIST_HEAD(&wind_list); - pthread_mutex_lock (&pl_inode->mutex); + pthread_mutex_lock(&pl_inode->mutex); + { + list_for_each_entry_safe(rw, tmp, &pl_inode->rw_list, list) { - list_for_each_entry_safe (rw, tmp, &pl_inode->rw_list, list) { - if (__rw_allowable (pl_inode, &rw->region, - rw->stub->fop)) { - list_del_init (&rw->list); - list_add_tail (&rw->list, &wind_list); - } + if (__rw_allowable(pl_inode, &rw->region, rw->stub->fop)) { + list_del_init(&rw->list); + list_add_tail(&rw->list, &wind_list); + if (pl_inode->mlock_enforced && + pl_inode->track_fop_wind_count) { + pl_inode->fop_wind_count++; } + } } - pthread_mutex_unlock (&pl_inode->mutex); + } + pthread_mutex_unlock(&pl_inode->mutex); - list_for_each_entry_safe (rw, tmp, &wind_list, list) { - list_del_init (&rw->list); - call_resume (rw->stub); - GF_FREE (rw); - } + list_for_each_entry_safe(rw, tmp, &wind_list, list) + { + list_del_init(&rw->list); + call_resume(rw->stub); + GF_FREE(rw); + } - return; + return; } +/* when mandatory lock is enforced: + If an IO request comes on a region which is out of the boundary of the + granted mandatory lock, it will be rejected. + + Note: There is no IO blocking with mandatory lock enforced as it may be + a stale data from an old client. + */ +gf_boolean_t static within_range(posix_lock_t *existing, posix_lock_t *new) +{ + if (existing->fl_start <= new->fl_start && existing->fl_end >= new->fl_end) + return _gf_true; + + return _gf_false; +} static int -__rw_allowable (pl_inode_t *pl_inode, posix_lock_t *region, - glusterfs_fop_t op) +__rw_allowable(pl_inode_t *pl_inode, posix_lock_t *region, glusterfs_fop_t op) { - posix_lock_t *l = NULL; - int ret = 1; + posix_lock_t *l = NULL; + posix_locks_private_t *priv = THIS->private; + int ret = 1; - list_for_each_entry (l, &pl_inode->ext_list, list) { - if (locks_overlap (l, region) && !same_owner (l, region)) { - if ((op == GF_FOP_READ) && (l->fl_type != F_WRLCK)) - continue; - ret = 0; - break; + if (pl_inode->mlock_enforced) { + list_for_each_entry(l, &pl_inode->ext_list, list) + { + /* + with lock enforced (fencing) there should not be any blocking + lock coexisting. + */ + if (same_owner(l, region)) { + /* Should range check be strict for same owner with fencing? */ + if (locks_overlap(l, region)) { + if (within_range(l, region)) { + return 1; + } else { + /* + Should we allow read fop if it does not fit it in the + range? + if (op == GF_FOP_READ && l->fl_type != F_WRLCK) { + return 1; + } + */ + return 0; + } + } + } else { + if (locks_overlap(l, region)) { + /* + with fencing should a read from a different owner be + allowed if the mandatory lock taken is F_RDLCK? + if (op == GF_FOP_READ && l->fl_type != F_WRLCK) { + return 1; + } + */ + return 0; } + } } - return ret; -} + /* No lock has been taken by this owner */ + return 0; + } + + list_for_each_entry(l, &pl_inode->ext_list, list) + { + if (!l->blocked && locks_overlap(l, region) && !same_owner(l, region)) { + if ((op == GF_FOP_READ) && (l->fl_type != F_WRLCK)) + continue; + /* Check for mandatory lock under optimal + * mandatory-locking mode */ + if (priv->mandatory_mode == MLK_OPTIMAL && + !(l->lk_flags & GF_LK_MANDATORY)) + continue; + ret = 0; + break; + } + } + return ret; +} int -pl_readv_cont (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t offset, uint32_t flags, dict_t *xdata) +pl_readv_cont(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) { - STACK_WIND (frame, pl_readv_cbk, - FIRST_CHILD (this), FIRST_CHILD (this)->fops->readv, - fd, size, offset, flags, xdata); + pl_track_io_fop_count(frame->local, this, INCREMENT); - return 0; -} + STACK_WIND(frame, pl_readv_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata); + return 0; +} int -pl_readv (call_frame_t *frame, xlator_t *this, - fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata) +pl_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) { - posix_locks_private_t *priv = NULL; - pl_inode_t *pl_inode = NULL; - pl_rw_req_t *rw = NULL; - posix_lock_t region = {.list = {0, }, }; - int op_ret = 0; - int op_errno = 0; - char wind_needed = 1; - - - priv = this->private; - pl_inode = pl_inode_get (this, fd->inode); - - PL_LOCAL_GET_REQUESTS (frame, this, xdata, fd, NULL); - - if (priv->mandatory && pl_inode->mandatory) { - region.fl_start = offset; - region.fl_end = offset + size - 1; - region.client = frame->root->client; - region.fd_num = fd_to_fdnum(fd); - region.client_pid = frame->root->pid; - region.owner = frame->root->lk_owner; - - pthread_mutex_lock (&pl_inode->mutex); - { - wind_needed = __rw_allowable (pl_inode, ®ion, - GF_FOP_READ); - if (wind_needed) { - goto unlock; - } - - if (fd->flags & O_NONBLOCK) { - gf_log (this->name, GF_LOG_TRACE, - "returning EAGAIN as fd is O_NONBLOCK"); - op_errno = EAGAIN; - op_ret = -1; - goto unlock; - } - - rw = GF_CALLOC (1, sizeof (*rw), - gf_locks_mt_pl_rw_req_t); - if (!rw) { - op_errno = ENOMEM; - op_ret = -1; - goto unlock; - } - - rw->stub = fop_readv_stub (frame, pl_readv_cont, - fd, size, offset, flags, - xdata); - if (!rw->stub) { - op_errno = ENOMEM; - op_ret = -1; - GF_FREE (rw); - goto unlock; - } + pl_local_t *local = NULL; + pl_inode_t *pl_inode = NULL; + pl_rw_req_t *rw = NULL; + posix_lock_t region = { + .list = + { + 0, + }, + }; + gf_boolean_t enabled = _gf_false; + gf_boolean_t can_block = _gf_true; + int op_ret = 0; + int op_errno = 0; + int allowed = 1; + + GF_VALIDATE_OR_GOTO("locks", this, unwind); + + PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL); + + if (!frame->local) { + frame->local = mem_get0(this->local_pool); + local = frame->local; + local->inode = inode_ref(fd->inode); + local->fd = fd_ref(fd); + } + + pl_inode = pl_inode_get(this, fd->inode, local); + if (!pl_inode) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + + if (frame->root->pid < 0) + enabled = _gf_false; + else + enabled = pl_is_mandatory_locking_enabled(pl_inode); + + if (enabled) { + region.fl_start = offset; + region.fl_end = offset + size - 1; + region.client = frame->root->client; + region.fd_num = fd_to_fdnum(fd); + region.client_pid = frame->root->pid; + region.owner = frame->root->lk_owner; + + pthread_mutex_lock(&pl_inode->mutex); + { + allowed = pl_is_fop_allowed(pl_inode, ®ion, fd, GF_FOP_READ, + &can_block); + if (allowed == 1) { + if (pl_inode->mlock_enforced && + pl_inode->track_fop_wind_count) { + pl_inode->fop_wind_count++; + } + goto unlock; + } else if (!can_block) { + op_errno = EAGAIN; + op_ret = -1; + goto unlock; + } - rw->region = region; + rw = GF_MALLOC(sizeof(*rw), gf_locks_mt_pl_rw_req_t); + if (!rw) { + op_errno = ENOMEM; + op_ret = -1; + goto unlock; + } - list_add_tail (&rw->list, &pl_inode->rw_list); - } - unlock: - pthread_mutex_unlock (&pl_inode->mutex); - } + rw->stub = fop_readv_stub(frame, pl_readv_cont, fd, size, offset, + flags, xdata); + if (!rw->stub) { + op_errno = ENOMEM; + op_ret = -1; + GF_FREE(rw); + goto unlock; + } + rw->region = region; - if (wind_needed) { - STACK_WIND (frame, pl_readv_cbk, - FIRST_CHILD (this), FIRST_CHILD (this)->fops->readv, - fd, size, offset, flags, xdata); + list_add_tail(&rw->list, &pl_inode->rw_list); } + unlock: + pthread_mutex_unlock(&pl_inode->mutex); + } + + if (allowed == 1) { + STACK_WIND(frame, pl_readv_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, + xdata); + } +unwind: + if (op_ret == -1) + PL_STACK_UNWIND(readv, xdata, frame, op_ret, op_errno, NULL, 0, NULL, + NULL, NULL); - if (op_ret == -1) - STACK_UNWIND_STRICT (readv, frame, -1, op_errno, - NULL, 0, NULL, NULL, NULL); - - return 0; + return 0; } - int -pl_writev_cont (call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iovec *vector, int count, off_t offset, - uint32_t flags, struct iobref *iobref, dict_t *xdata) +pl_writev_cont(call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iovec *vector, int count, off_t offset, uint32_t flags, + struct iobref *iobref, dict_t *xdata) { - STACK_WIND (frame, pl_writev_cbk, - FIRST_CHILD (this), FIRST_CHILD (this)->fops->writev, - fd, vector, count, offset, flags, iobref, xdata); + pl_track_io_fop_count(frame->local, this, INCREMENT); - return 0; -} + STACK_WIND(frame, pl_writev_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->writev, fd, vector, count, offset, + flags, iobref, xdata); + return 0; +} int -pl_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iovec *vector, int32_t count, off_t offset, - uint32_t flags, struct iobref *iobref, dict_t *xdata) -{ - posix_locks_private_t *priv = NULL; - pl_inode_t *pl_inode = NULL; - pl_rw_req_t *rw = NULL; - posix_lock_t region = {.list = {0, }, }; - int op_ret = 0; - int op_errno = 0; - char wind_needed = 1; - - priv = this->private; - pl_inode = pl_inode_get (this, fd->inode); - - PL_LOCAL_GET_REQUESTS (frame, this, xdata, fd, NULL); - - if (priv->mandatory && pl_inode->mandatory) { - region.fl_start = offset; - region.fl_end = offset + iov_length (vector, count) - 1; - region.client = frame->root->client; - region.fd_num = fd_to_fdnum(fd); - region.client_pid = frame->root->pid; - region.owner = frame->root->lk_owner; - - pthread_mutex_lock (&pl_inode->mutex); - { - wind_needed = __rw_allowable (pl_inode, ®ion, - GF_FOP_WRITE); - if (wind_needed) - goto unlock; - - if (fd->flags & O_NONBLOCK) { - gf_log (this->name, GF_LOG_TRACE, - "returning EAGAIN because fd is " - "O_NONBLOCK"); - op_errno = EAGAIN; - op_ret = -1; - goto unlock; - } - - rw = GF_CALLOC (1, sizeof (*rw), - gf_locks_mt_pl_rw_req_t); - if (!rw) { - op_errno = ENOMEM; - op_ret = -1; - goto unlock; - } +pl_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, + int32_t count, off_t offset, uint32_t flags, struct iobref *iobref, + dict_t *xdata) +{ + pl_local_t *local = NULL; + pl_inode_t *pl_inode = NULL; + pl_rw_req_t *rw = NULL; + posix_lock_t region = { + .list = + { + 0, + }, + }; + gf_boolean_t enabled = _gf_false; + gf_boolean_t can_block = _gf_true; + int op_ret = 0; + int op_errno = 0; + int allowed = 1; + + GF_VALIDATE_OR_GOTO("locks", this, unwind); + + PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL); + + if (!frame->local) { + frame->local = mem_get0(this->local_pool); + local = frame->local; + local->inode = inode_ref(fd->inode); + local->fd = fd_ref(fd); + } + + pl_inode = pl_inode_get(this, fd->inode, local); + if (!pl_inode) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + + if (frame->root->pid < 0) + enabled = _gf_false; + else + enabled = pl_is_mandatory_locking_enabled(pl_inode); + + if (enabled) { + region.fl_start = offset; + region.fl_end = offset + iov_length(vector, count) - 1; + region.client = frame->root->client; + region.fd_num = fd_to_fdnum(fd); + region.client_pid = frame->root->pid; + region.owner = frame->root->lk_owner; + + pthread_mutex_lock(&pl_inode->mutex); + { + allowed = pl_is_fop_allowed(pl_inode, ®ion, fd, GF_FOP_WRITE, + &can_block); + if (allowed == 1) { + if (pl_inode->mlock_enforced && + pl_inode->track_fop_wind_count) { + pl_inode->fop_wind_count++; + } + goto unlock; + } else if (!can_block) { + if (pl_inode->mlock_enforced) { + op_errno = EBUSY; + } else { + op_errno = EAGAIN; + } - rw->stub = fop_writev_stub (frame, pl_writev_cont, - fd, vector, count, offset, - flags, iobref, xdata); - if (!rw->stub) { - op_errno = ENOMEM; - op_ret = -1; - GF_FREE (rw); - goto unlock; - } + op_ret = -1; + goto unlock; + } - rw->region = region; + rw = GF_MALLOC(sizeof(*rw), gf_locks_mt_pl_rw_req_t); + if (!rw) { + op_errno = ENOMEM; + op_ret = -1; + goto unlock; + } - list_add_tail (&rw->list, &pl_inode->rw_list); - } - unlock: - pthread_mutex_unlock (&pl_inode->mutex); - } + rw->stub = fop_writev_stub(frame, pl_writev_cont, fd, vector, count, + offset, flags, iobref, xdata); + if (!rw->stub) { + op_errno = ENOMEM; + op_ret = -1; + GF_FREE(rw); + goto unlock; + } + rw->region = region; - if (wind_needed) { - STACK_WIND (frame, pl_writev_cbk, - FIRST_CHILD (this), FIRST_CHILD (this)->fops->writev, - fd, vector, count, offset, flags, iobref, xdata); + list_add_tail(&rw->list, &pl_inode->rw_list); } + unlock: + pthread_mutex_unlock(&pl_inode->mutex); + } + + if (allowed == 1) { + STACK_WIND(frame, pl_writev_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->writev, fd, vector, count, offset, + flags, iobref, xdata); + } +unwind: + if (op_ret == -1) + PL_STACK_UNWIND(writev, xdata, frame, op_ret, op_errno, NULL, NULL, + NULL); - if (op_ret == -1) - STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL, - NULL); - - return 0; + return 0; } static int -__fd_has_locks (pl_inode_t *pl_inode, fd_t *fd) +__fd_has_locks(pl_inode_t *pl_inode, fd_t *fd) { - int found = 0; - posix_lock_t *l = NULL; + posix_lock_t *l = NULL; - list_for_each_entry (l, &pl_inode->ext_list, list) { - if (l->fd_num == fd_to_fdnum(fd)) { - found = 1; - break; - } + list_for_each_entry(l, &pl_inode->ext_list, list) + { + if (l->fd_num == fd_to_fdnum(fd)) { + return 1; } + } - return found; + return 0; } static posix_lock_t * -lock_dup (posix_lock_t *lock) +lock_dup(posix_lock_t *lock) { - posix_lock_t *new_lock = NULL; - - new_lock = new_posix_lock (&lock->user_flock, lock->client, - lock->client_pid, &lock->owner, - (fd_t *)lock->fd_num); - return new_lock; + int32_t op_errno = 0; + return new_posix_lock(&lock->user_flock, lock->client, lock->client_pid, + &lock->owner, (fd_t *)lock->fd_num, lock->lk_flags, + lock->blocking, &op_errno); } static int -__dup_locks_to_fdctx (pl_inode_t *pl_inode, fd_t *fd, - pl_fdctx_t *fdctx) -{ - posix_lock_t *l = NULL; - posix_lock_t *duplock = NULL; - int ret = 0; - - list_for_each_entry (l, &pl_inode->ext_list, list) { - if (l->fd_num == fd_to_fdnum(fd)) { - duplock = lock_dup (l); - if (!duplock) { - ret = -1; - break; - } +__dup_locks_to_fdctx(pl_inode_t *pl_inode, fd_t *fd, pl_fdctx_t *fdctx) +{ + posix_lock_t *l = NULL; + posix_lock_t *duplock = NULL; + int ret = 0; + + list_for_each_entry(l, &pl_inode->ext_list, list) + { + if (l->fd_num == fd_to_fdnum(fd)) { + duplock = lock_dup(l); + if (!duplock) { + ret = -1; + break; + } - list_add_tail (&duplock->list, &fdctx->locks_list); - } + list_add_tail(&duplock->list, &fdctx->locks_list); } + } - return ret; + return ret; } static int -__copy_locks_to_fdctx (pl_inode_t *pl_inode, fd_t *fd, - pl_fdctx_t *fdctx) +__copy_locks_to_fdctx(pl_inode_t *pl_inode, fd_t *fd, pl_fdctx_t *fdctx) { - int ret = 0; - - ret = __dup_locks_to_fdctx (pl_inode, fd, fdctx); - if (ret) - goto out; - -out: - return ret; - + return __dup_locks_to_fdctx(pl_inode, fd, fdctx); } static void -pl_mark_eol_lock (posix_lock_t *lock) +pl_mark_eol_lock(posix_lock_t *lock) { - lock->user_flock.l_type = GF_LK_EOL; - return; + lock->user_flock.l_type = GF_LK_EOL; + return; } static posix_lock_t * -__get_next_fdctx_lock (pl_fdctx_t *fdctx) +__get_next_fdctx_lock(pl_fdctx_t *fdctx) { - posix_lock_t *lock = NULL; + posix_lock_t *lock = NULL; - GF_ASSERT (fdctx); + GF_ASSERT(fdctx); - if (list_empty (&fdctx->locks_list)) { - gf_log (THIS->name, GF_LOG_DEBUG, - "fdctx lock list empty"); - goto out; - } + if (list_empty(&fdctx->locks_list)) { + gf_log(THIS->name, GF_LOG_DEBUG, "fdctx lock list empty"); + goto out; + } - lock = list_entry (fdctx->locks_list.next, typeof (*lock), - list); + lock = list_entry(fdctx->locks_list.next, typeof(*lock), list); - GF_ASSERT (lock); + GF_ASSERT(lock); - list_del_init (&lock->list); + list_del_init(&lock->list); out: - return lock; + return lock; } static int -__set_next_lock_fd (pl_fdctx_t *fdctx, posix_lock_t *reqlock) +__set_next_lock_fd(pl_fdctx_t *fdctx, posix_lock_t *reqlock) { - posix_lock_t *lock = NULL; - int ret = 0; + posix_lock_t *lock = NULL; + int ret = 0; - GF_ASSERT (fdctx); + GF_ASSERT(fdctx); - lock = __get_next_fdctx_lock (fdctx); - if (!lock) { - gf_log (THIS->name, GF_LOG_DEBUG, - "marking EOL in reqlock"); - pl_mark_eol_lock (reqlock); - goto out; - } + lock = __get_next_fdctx_lock(fdctx); + if (!lock) { + gf_log(THIS->name, GF_LOG_DEBUG, "marking EOL in reqlock"); + pl_mark_eol_lock(reqlock); + goto out; + } - reqlock->user_flock = lock->user_flock; - reqlock->fl_start = lock->fl_start; - reqlock->fl_type = lock->fl_type; - reqlock->fl_end = lock->fl_end; - reqlock->owner = lock->owner; + reqlock->user_flock = lock->user_flock; + reqlock->fl_start = lock->fl_start; + reqlock->fl_type = lock->fl_type; + reqlock->fl_end = lock->fl_end; + reqlock->owner = lock->owner; out: - if (lock) - __destroy_lock (lock); + if (lock) + __destroy_lock(lock); - return ret; + return ret; } static int -pl_getlk_fd (xlator_t *this, pl_inode_t *pl_inode, - fd_t *fd, posix_lock_t *reqlock) +pl_getlk_fd(xlator_t *this, pl_inode_t *pl_inode, fd_t *fd, + posix_lock_t *reqlock) { - uint64_t tmp = 0; - pl_fdctx_t *fdctx = NULL; - int ret = 0; - - pthread_mutex_lock (&pl_inode->mutex); - { - if (!__fd_has_locks (pl_inode, fd)) { - gf_log (this->name, GF_LOG_DEBUG, - "fd=%p has no active locks", fd); - ret = 0; - goto unlock; - } + uint64_t tmp = 0; + pl_fdctx_t *fdctx = NULL; + int ret = 0; + + pthread_mutex_lock(&pl_inode->mutex); + { + if (!__fd_has_locks(pl_inode, fd)) { + pthread_mutex_unlock(&pl_inode->mutex); + gf_log(this->name, GF_LOG_DEBUG, "fd=%p has no active locks", fd); + ret = 0; + goto out; + } - gf_log (this->name, GF_LOG_DEBUG, - "There are active locks on fd"); + gf_log(this->name, GF_LOG_DEBUG, "There are active locks on fd"); - ret = fd_ctx_get (fd, this, &tmp); - fdctx = (pl_fdctx_t *)(long) tmp; + ret = fd_ctx_get(fd, this, &tmp); + fdctx = (pl_fdctx_t *)(long)tmp; - if (list_empty (&fdctx->locks_list)) { - gf_log (this->name, GF_LOG_TRACE, - "no fdctx -> copying all locks on fd"); + if (list_empty(&fdctx->locks_list)) { + gf_log(this->name, GF_LOG_TRACE, + "no fdctx -> copying all locks on fd"); - ret = __copy_locks_to_fdctx (pl_inode, fd, fdctx); - if (ret) { - goto unlock; - } + ret = __copy_locks_to_fdctx(pl_inode, fd, fdctx); + if (ret) { + goto unlock; + } - ret = __set_next_lock_fd (fdctx, reqlock); + ret = __set_next_lock_fd(fdctx, reqlock); - } else { - gf_log (this->name, GF_LOG_TRACE, - "fdctx present -> returning the next lock"); - ret = __set_next_lock_fd (fdctx, reqlock); - if (ret) { - gf_log (this->name, GF_LOG_DEBUG, - "could not get next lock of fd"); - goto unlock; - } - } + } else { + gf_log(this->name, GF_LOG_TRACE, + "fdctx present -> returning the next lock"); + ret = __set_next_lock_fd(fdctx, reqlock); + if (ret) { + pthread_mutex_unlock(&pl_inode->mutex); + gf_log(this->name, GF_LOG_DEBUG, + "could not get next lock of fd"); + goto out; + } } + } unlock: - pthread_mutex_unlock (&pl_inode->mutex); - return ret; - + pthread_mutex_unlock(&pl_inode->mutex); +out: + return ret; } int -pl_lk (call_frame_t *frame, xlator_t *this, - fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata) -{ - pl_inode_t *pl_inode = NULL; - int op_ret = 0; - int op_errno = 0; - int can_block = 0; - posix_lock_t *reqlock = NULL; - posix_lock_t *conf = NULL; - int ret = 0; - - if ((flock->l_start < 0) || (flock->l_len < 0)) { - op_ret = -1; - op_errno = EINVAL; - goto unwind; - } +pl_metalock_is_active(pl_inode_t *pl_inode) +{ + if (list_empty(&pl_inode->metalk_list)) + return 0; + else + return 1; +} - pl_inode = pl_inode_get (this, fd->inode); - if (!pl_inode) { - op_ret = -1; - op_errno = ENOMEM; - goto unwind; - } +void +__pl_queue_lock(pl_inode_t *pl_inode, posix_lock_t *reqlock) +{ + list_add_tail(&reqlock->list, &pl_inode->queued_locks); +} + +int +pl_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, + struct gf_flock *flock, dict_t *xdata) +{ + pl_inode_t *pl_inode = NULL; + int op_ret = 0; + int op_errno = 0; + int can_block = 0; + posix_lock_t *reqlock = NULL; + posix_lock_t *conf = NULL; + uint32_t lk_flags = 0; + posix_locks_private_t *priv = this->private; + pl_local_t *local = NULL; + short lock_type = 0; + + int ret = dict_get_uint32(xdata, GF_LOCK_MODE, &lk_flags); + if (ret == 0) { + if (priv->mandatory_mode == MLK_NONE) + gf_log(this->name, GF_LOG_DEBUG, + "Lock flags received " + "in a non-mandatory locking environment, " + "continuing"); + else + gf_log(this->name, GF_LOG_DEBUG, + "Lock flags received, " + "continuing"); + } + + if ((flock->l_start < 0) || ((flock->l_start + flock->l_len) < 0)) { + op_ret = -1; + op_errno = EINVAL; + goto unwind; + } + + /* As per 'man 3 fcntl', the value of l_len may be + * negative. In such cases, lock request should be + * considered for the range starting at 'l_start+l_len' + * and ending at 'l_start-1'. Update the fields accordingly. + */ + if (flock->l_len < 0) { + flock->l_start += flock->l_len; + flock->l_len = labs(flock->l_len); + } + + local = mem_get0(this->local_pool); + if (!local) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } else { + frame->local = local; + local->fd = fd_ref(fd); + } - reqlock = new_posix_lock (flock, frame->root->client, frame->root->pid, - &frame->root->lk_owner, fd); + pl_inode = pl_inode_get(this, fd->inode, local); + if (!pl_inode) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } - if (!reqlock) { - op_ret = -1; - op_errno = ENOMEM; - goto unwind; - } + reqlock = new_posix_lock(flock, frame->root->client, frame->root->pid, + &frame->root->lk_owner, fd, lk_flags, can_block, + &op_errno); - pl_trace_in (this, frame, fd, NULL, cmd, flock, NULL); + if (!reqlock) { + op_ret = -1; + goto unwind; + } - switch (cmd) { + pl_trace_in(this, frame, fd, NULL, cmd, flock, NULL); + switch (cmd) { case F_RESLK_LCKW: - can_block = 1; + can_block = 1; - /* fall through */ + /* fall through */ case F_RESLK_LCK: - memcpy (&reqlock->user_flock, flock, sizeof (struct gf_flock)); - reqlock->frame = frame; - reqlock->this = this; + reqlock->frame = frame; + reqlock->this = this; - ret = pl_reserve_setlk (this, pl_inode, reqlock, - can_block); - if (ret < 0) { - if (can_block) - goto out; + ret = pl_reserve_setlk(this, pl_inode, reqlock, can_block); + if (ret < 0) { + if (can_block) + goto out; - op_ret = -1; - op_errno = -ret; - __destroy_lock (reqlock); - goto unwind; - } - /* Finally a getlk and return the call */ - conf = pl_getlk (pl_inode, reqlock); - if (conf) - posix_lock_to_flock (conf, flock); - break; + op_ret = -1; + op_errno = -ret; + __destroy_lock(reqlock); + goto unwind; + } + /* Finally a getlk and return the call */ + conf = pl_getlk(pl_inode, reqlock); + if (conf) + posix_lock_to_flock(conf, flock); + break; case F_RESLK_UNLCK: - reqlock->frame = frame; - reqlock->this = this; - ret = pl_reserve_unlock (this, pl_inode, reqlock); - if (ret < 0) { - op_ret = -1; - op_errno = -ret; - } - __destroy_lock (reqlock); - goto unwind; + reqlock->frame = frame; + reqlock->this = this; + ret = pl_reserve_unlock(this, pl_inode, reqlock); + if (ret < 0) { + op_ret = -1; + op_errno = -ret; + } + __destroy_lock(reqlock); + goto unwind; - break; + break; case F_GETLK_FD: - reqlock->frame = frame; - reqlock->this = this; - ret = pl_verify_reservelk (this, pl_inode, reqlock, can_block); - GF_ASSERT (ret >= 0); - - ret = pl_getlk_fd (this, pl_inode, fd, reqlock); - if (ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, - "getting locks on fd failed"); - op_ret = -1; - op_errno = ENOLCK; - goto unwind; - } + reqlock->frame = frame; + reqlock->this = this; + ret = pl_verify_reservelk(this, pl_inode, reqlock, can_block); + GF_ASSERT(ret >= 0); + + ret = pl_getlk_fd(this, pl_inode, fd, reqlock); + if (ret < 0) { + gf_log(this->name, GF_LOG_DEBUG, "getting locks on fd failed"); + op_ret = -1; + op_errno = ENOLCK; + goto unwind; + } - gf_log (this->name, GF_LOG_TRACE, - "Replying with a lock on fd for healing"); + gf_log(this->name, GF_LOG_TRACE, + "Replying with a lock on fd for healing"); - posix_lock_to_flock (reqlock, flock); - __destroy_lock (reqlock); + posix_lock_to_flock(reqlock, flock); + __destroy_lock(reqlock); - break; + break; #if F_GETLK != F_GETLK64 case F_GETLK64: #endif case F_GETLK: - conf = pl_getlk (pl_inode, reqlock); - posix_lock_to_flock (conf, flock); - __destroy_lock (reqlock); + conf = pl_getlk(pl_inode, reqlock); + posix_lock_to_flock(conf, flock); + __destroy_lock(reqlock); - break; + break; #if F_SETLKW != F_SETLKW64 case F_SETLKW64: #endif case F_SETLKW: - can_block = 1; - reqlock->frame = frame; - reqlock->this = this; - - /* fall through */ + can_block = 1; + reqlock->frame = frame; + reqlock->this = this; + reqlock->blocking = can_block; + /* fall through */ #if F_SETLK != F_SETLK64 case F_SETLK64: #endif case F_SETLK: - memcpy (&reqlock->user_flock, flock, sizeof (struct gf_flock)); - ret = pl_verify_reservelk (this, pl_inode, reqlock, can_block); - if (ret < 0) { - gf_log (this->name, GF_LOG_TRACE, - "Lock blocked due to conflicting reserve lock"); - goto out; + reqlock->frame = frame; + reqlock->this = this; + lock_type = flock->l_type; + + pthread_mutex_lock(&pl_inode->mutex); + { + if (pl_inode->migrated) { + op_errno = EREMOTE; + pthread_mutex_unlock(&pl_inode->mutex); + STACK_UNWIND_STRICT(lk, frame, -1, op_errno, flock, xdata); + + __destroy_lock(reqlock); + goto out; } - ret = pl_setlk (this, pl_inode, reqlock, - can_block); + } + pthread_mutex_unlock(&pl_inode->mutex); + ret = pl_verify_reservelk(this, pl_inode, reqlock, can_block); + if (ret < 0) { + gf_log(this->name, GF_LOG_TRACE, + "Lock blocked due to conflicting reserve lock"); + goto out; + } + + if (reqlock->fl_type != F_UNLCK && pl_inode->mlock_enforced) { + ret = pl_lock_preempt(pl_inode, reqlock); if (ret == -1) { - if ((can_block) && (F_UNLCK != flock->l_type)) { - pl_trace_block (this, frame, fd, NULL, cmd, flock, NULL); - goto out; - } - gf_log (this->name, GF_LOG_DEBUG, "returning EAGAIN"); - op_ret = -1; - op_errno = EAGAIN; - __destroy_lock (reqlock); - - } else if ((0 == ret) && (F_UNLCK == flock->l_type)) { - /* For NLM's last "unlock on fd" detection */ - if (pl_locks_by_fd (pl_inode, fd)) - flock->l_type = F_RDLCK; - else - flock->l_type = F_UNLCK; + gf_log(this->name, GF_LOG_ERROR, "lock preempt failed"); + op_ret = -1; + op_errno = EAGAIN; + __destroy_lock(reqlock); + goto out; } - } -unwind: - pl_trace_out (this, frame, fd, NULL, cmd, flock, op_ret, op_errno, NULL); - pl_update_refkeeper (this, fd->inode); + pl_trace_block(this, frame, fd, NULL, cmd, flock, NULL); + goto unwind; + } + + ret = pl_setlk(this, pl_inode, reqlock, can_block); + if (ret == -1) { + if ((can_block) && (F_UNLCK != lock_type)) { + goto out; + } + gf_log(this->name, GF_LOG_DEBUG, "returning EAGAIN"); + op_ret = -1; + op_errno = EAGAIN; + __destroy_lock(reqlock); + } else if (ret == -2) { + goto out; + } else if ((0 == ret) && (F_UNLCK == flock->l_type)) { + /* For NLM's last "unlock on fd" detection */ + if (pl_locks_by_fd(pl_inode, fd)) + flock->l_type = F_RDLCK; + else + flock->l_type = F_UNLCK; + } + } +unwind: + pl_trace_out(this, frame, fd, NULL, cmd, flock, op_ret, op_errno, NULL); + pl_update_refkeeper(this, fd->inode); - STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, flock, xdata); + PL_STACK_UNWIND(lk, xdata, frame, op_ret, op_errno, flock, xdata); out: - return 0; + return 0; } - /* TODO: this function just logs, no action required?? */ int -pl_forget (xlator_t *this, - inode_t *inode) +pl_forget(xlator_t *this, inode_t *inode) { - pl_inode_t *pl_inode = NULL; + pl_inode_t *pl_inode = NULL; - posix_lock_t *ext_tmp = NULL; - posix_lock_t *ext_l = NULL; - struct list_head posixlks_released; + posix_lock_t *ext_tmp = NULL; + posix_lock_t *ext_l = NULL; + struct list_head posixlks_released; - pl_inode_lock_t *ino_tmp = NULL; - pl_inode_lock_t *ino_l = NULL; - struct list_head inodelks_released; + pl_inode_lock_t *ino_tmp = NULL; + pl_inode_lock_t *ino_l = NULL; + struct list_head inodelks_released; - pl_rw_req_t *rw_tmp = NULL; - pl_rw_req_t *rw_req = NULL; + pl_rw_req_t *rw_tmp = NULL; + pl_rw_req_t *rw_req = NULL; - pl_entry_lock_t *entry_tmp = NULL; - pl_entry_lock_t *entry_l = NULL; - struct list_head entrylks_released; + pl_entry_lock_t *entry_tmp = NULL; + pl_entry_lock_t *entry_l = NULL; + struct list_head entrylks_released; - pl_dom_list_t *dom = NULL; - pl_dom_list_t *dom_tmp = NULL; + pl_dom_list_t *dom = NULL; + pl_dom_list_t *dom_tmp = NULL; - INIT_LIST_HEAD (&posixlks_released); - INIT_LIST_HEAD (&inodelks_released); - INIT_LIST_HEAD (&entrylks_released); + INIT_LIST_HEAD(&posixlks_released); + INIT_LIST_HEAD(&inodelks_released); + INIT_LIST_HEAD(&entrylks_released); - pl_inode = pl_inode_get (this, inode); + pl_inode = pl_inode_get(this, inode, NULL); + if (!pl_inode) + return 0; - pthread_mutex_lock (&pl_inode->mutex); - { + pthread_mutex_lock(&pl_inode->mutex); + { + if (!list_empty(&pl_inode->rw_list)) { + gf_log(this->name, GF_LOG_WARNING, + "Pending R/W requests found, releasing."); + + list_for_each_entry_safe(rw_req, rw_tmp, &pl_inode->rw_list, list) + { + list_del(&rw_req->list); + call_stub_destroy(rw_req->stub); + GF_FREE(rw_req); + } + } - if (!list_empty (&pl_inode->rw_list)) { - gf_log (this->name, GF_LOG_WARNING, - "Pending R/W requests found, releasing."); + if (!list_empty(&pl_inode->ext_list)) { + gf_log(this->name, GF_LOG_WARNING, + "Pending fcntl locks found, releasing."); - list_for_each_entry_safe (rw_req, rw_tmp, &pl_inode->rw_list, - list) { + list_for_each_entry_safe(ext_l, ext_tmp, &pl_inode->ext_list, list) + { + __delete_lock(ext_l); + if (ext_l->blocked) { + list_add_tail(&ext_l->list, &posixlks_released); + continue; + } + __destroy_lock(ext_l); + } + } - list_del (&rw_req->list); - GF_FREE (rw_req); - } + list_for_each_entry_safe(dom, dom_tmp, &pl_inode->dom_list, inode_list) + { + if (!list_empty(&dom->inodelk_list)) { + gf_log(this->name, GF_LOG_WARNING, + "Pending inode locks found, releasing."); + + list_for_each_entry_safe(ino_l, ino_tmp, &dom->inodelk_list, + list) + { + __delete_inode_lock(ino_l); + __pl_inodelk_unref(ino_l); } - if (!list_empty (&pl_inode->ext_list)) { - gf_log (this->name, GF_LOG_WARNING, - "Pending fcntl locks found, releasing."); + list_splice_init(&dom->blocked_inodelks, &inodelks_released); + } + if (!list_empty(&dom->entrylk_list)) { + gf_log(this->name, GF_LOG_WARNING, + "Pending entry locks found, releasing."); - list_for_each_entry_safe (ext_l, ext_tmp, &pl_inode->ext_list, - list) { + list_for_each_entry_safe(entry_l, entry_tmp, &dom->entrylk_list, + domain_list) + { + list_del_init(&entry_l->domain_list); - __delete_lock (pl_inode, ext_l); - if (ext_l->blocked) { - list_add_tail (&ext_l->list, &posixlks_released); - continue; - } - __destroy_lock (ext_l); - } + GF_FREE((char *)entry_l->basename); + GF_FREE(entry_l->connection_id); + GF_FREE(entry_l); } + list_splice_init(&dom->blocked_entrylks, &entrylks_released); + } - list_for_each_entry_safe (dom, dom_tmp, &pl_inode->dom_list, inode_list) { + list_del(&dom->inode_list); + gf_log("posix-locks", GF_LOG_TRACE, " Cleaning up domain: %s", + dom->domain); + GF_FREE((char *)(dom->domain)); + GF_FREE(dom); + } + } + pthread_mutex_unlock(&pl_inode->mutex); - if (!list_empty (&dom->inodelk_list)) { - gf_log (this->name, GF_LOG_WARNING, - "Pending inode locks found, releasing."); + if (!list_empty(&posixlks_released)) { + list_for_each_entry_safe(ext_l, ext_tmp, &posixlks_released, list) + { + STACK_UNWIND_STRICT(lk, ext_l->frame, -1, 0, &ext_l->user_flock, + NULL); + __destroy_lock(ext_l); + } + } - list_for_each_entry_safe (ino_l, ino_tmp, &dom->inodelk_list, list) { - __delete_inode_lock (ino_l); - __pl_inodelk_unref (ino_l); - } + if (!list_empty(&inodelks_released)) { + list_for_each_entry_safe(ino_l, ino_tmp, &inodelks_released, + blocked_locks) + { + STACK_UNWIND_STRICT(inodelk, ino_l->frame, -1, 0, NULL); + __pl_inodelk_unref(ino_l); + } + } - list_splice_init (&dom->blocked_inodelks, &inodelks_released); + if (!list_empty(&entrylks_released)) { + list_for_each_entry_safe(entry_l, entry_tmp, &entrylks_released, + blocked_locks) + { + STACK_UNWIND_STRICT(entrylk, entry_l->frame, -1, 0, NULL); + GF_FREE((char *)entry_l->basename); + GF_FREE(entry_l->connection_id); + GF_FREE(entry_l); + } + } + pthread_mutex_destroy(&pl_inode->mutex); - } - if (!list_empty (&dom->entrylk_list)) { - gf_log (this->name, GF_LOG_WARNING, - "Pending entry locks found, releasing."); + GF_FREE(pl_inode); - list_for_each_entry_safe (entry_l, entry_tmp, &dom->entrylk_list, domain_list) { - list_del_init (&entry_l->domain_list); + return 0; +} - GF_FREE ((char *)entry_l->basename); - GF_FREE (entry_l->connection_id); - GF_FREE (entry_l); - } +int +pl_release(xlator_t *this, fd_t *fd) +{ + pl_inode_t *pl_inode = NULL; + uint64_t tmp_pl_inode = 0; + int ret = -1; + uint64_t tmp = 0; + pl_fdctx_t *fdctx = NULL; - list_splice_init (&dom->blocked_entrylks, &entrylks_released); - } + if (fd == NULL) { + goto out; + } - list_del (&dom->inode_list); - gf_log ("posix-locks", GF_LOG_TRACE, - " Cleaning up domain: %s", dom->domain); - GF_FREE ((char *)(dom->domain)); - GF_FREE (dom); - } + ret = inode_ctx_get(fd->inode, this, &tmp_pl_inode); + if (ret != 0) + goto clean; - } - pthread_mutex_unlock (&pl_inode->mutex); + pl_inode = (pl_inode_t *)(long)tmp_pl_inode; - list_for_each_entry_safe (ext_l, ext_tmp, &posixlks_released, list) { + pl_trace_release(this, fd); - STACK_UNWIND_STRICT (lk, ext_l->frame, -1, 0, - &ext_l->user_flock, NULL); - __destroy_lock (ext_l); - } + gf_log(this->name, GF_LOG_TRACE, "Releasing all locks with fd %p", fd); - list_for_each_entry_safe (ino_l, ino_tmp, &inodelks_released, blocked_locks) { + delete_locks_of_fd(this, pl_inode, fd); + pl_update_refkeeper(this, fd->inode); - STACK_UNWIND_STRICT (inodelk, ino_l->frame, -1, 0, NULL); - __pl_inodelk_unref (ino_l); - } +clean: + ret = fd_ctx_del(fd, this, &tmp); + if (ret) { + gf_log(this->name, GF_LOG_DEBUG, "Could not get fdctx"); + goto out; + } - list_for_each_entry_safe (entry_l, entry_tmp, &entrylks_released, blocked_locks) { + fdctx = (pl_fdctx_t *)(long)tmp; - STACK_UNWIND_STRICT (entrylk, entry_l->frame, -1, 0, NULL); - GF_FREE ((char *)entry_l->basename); - GF_FREE (entry_l->connection_id); - GF_FREE (entry_l); + GF_FREE(fdctx); +out: + return ret; +} - } +int +pl_releasedir(xlator_t *this, fd_t *fd) +{ + int ret = -1; + uint64_t tmp = 0; + pl_fdctx_t *fdctx = NULL; - GF_FREE (pl_inode); + if (fd == NULL) { + goto out; + } - return 0; + ret = fd_ctx_del(fd, this, &tmp); + if (ret) { + gf_log(this->name, GF_LOG_DEBUG, "Could not get fdctx"); + goto out; + } + + fdctx = (pl_fdctx_t *)(long)tmp; + + GF_FREE(fdctx); +out: + return ret; } -int -pl_release (xlator_t *this, fd_t *fd) +static int32_t +pl_request_link_count(dict_t **pxdata) { - pl_inode_t *pl_inode = NULL; - uint64_t tmp_pl_inode = 0; - int ret = -1; - uint64_t tmp = 0; - pl_fdctx_t *fdctx = NULL; + dict_t *xdata; - if (fd == NULL) { - goto out; + xdata = *pxdata; + if (xdata == NULL) { + xdata = dict_new(); + if (xdata == NULL) { + return ENOMEM; } + } else { + dict_ref(xdata); + } - ret = inode_ctx_get (fd->inode, this, &tmp_pl_inode); - if (ret != 0) - goto out; + if (dict_set_uint32(xdata, GET_LINK_COUNT, 0) != 0) { + dict_unref(xdata); + return ENOMEM; + } - pl_inode = (pl_inode_t *)(long)tmp_pl_inode; + *pxdata = xdata; - pl_trace_release (this, fd); + return 0; +} - gf_log (this->name, GF_LOG_TRACE, - "Releasing all locks with fd %p", fd); +static int32_t +pl_check_link_count(dict_t *xdata) +{ + int32_t count; - delete_locks_of_fd (this, pl_inode, fd); - pl_update_refkeeper (this, fd->inode); + /* In case we are unable to read the link count from xdata, we take a + * conservative approach and return -2, which will prevent the inode from + * being considered deleted. In fact it will cause link tracking for this + * inode to be disabled completely to avoid races. */ - ret = fd_ctx_del (fd, this, &tmp); - if (ret) { - gf_log (this->name, GF_LOG_DEBUG, - "Could not get fdctx"); - goto out; - } + if (xdata == NULL) { + return -2; + } - fdctx = (pl_fdctx_t *)(long)tmp; + if (dict_get_int32(xdata, GET_LINK_COUNT, &count) != 0) { + return -2; + } - GF_FREE (fdctx); -out: - return ret; + return count; } -int -pl_releasedir (xlator_t *this, fd_t *fd) +int32_t +pl_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, inode_t *inode, struct iatt *buf, dict_t *xdata, + struct iatt *postparent) { - int ret = -1; - uint64_t tmp = 0; - pl_fdctx_t *fdctx = NULL; + pl_inode_t *pl_inode; - if (fd == NULL) { - goto out; + if (op_ret >= 0) { + pl_inode = pl_inode_get(this, inode, NULL); + if (pl_inode == NULL) { + PL_STACK_UNWIND(lookup, xdata, frame, -1, ENOMEM, NULL, NULL, NULL, + NULL); + return 0; } - ret = fd_ctx_del (fd, this, &tmp); - if (ret) { - gf_log (this->name, GF_LOG_DEBUG, - "Could not get fdctx"); - goto out; + pthread_mutex_lock(&pl_inode->mutex); + + /* We only update the link count if we previously didn't know it. + * Doing it always can lead to races since lookup is not executed + * atomically most of the times. */ + if (pl_inode->links == -2) { + pl_inode->links = pl_check_link_count(xdata); + if (buf->ia_type == IA_IFDIR) { + /* Directories have at least 2 links. To avoid special handling + * for directories, we simply decrement the value here to make + * them equivalent to regular files. */ + pl_inode->links--; + } } - fdctx = (pl_fdctx_t *)(long)tmp; + pthread_mutex_unlock(&pl_inode->mutex); + } - GF_FREE (fdctx); -out: - return ret; + PL_STACK_UNWIND(lookup, xdata, frame, op_ret, op_errno, inode, buf, xdata, + postparent); + return 0; } int32_t -pl_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *xdata, struct iatt *postparent) +pl_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { - PL_STACK_UNWIND (lookup, xdata, frame, op_ret, op_errno, inode, buf, - xdata, postparent); - return 0; + int32_t error; + + error = pl_request_link_count(&xdata); + if (error == 0) { + PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL); + STACK_WIND(frame, pl_lookup_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, loc, xdata); + dict_unref(xdata); + } else { + STACK_UNWIND_STRICT(lookup, frame, -1, error, NULL, NULL, NULL, NULL); + } + return 0; } int32_t -pl_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +pl_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iatt *buf, dict_t *xdata) { - PL_LOCAL_GET_REQUESTS (frame, this, xdata, NULL, loc); - STACK_WIND (frame, pl_lookup_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, loc, xdata); - return 0; + PL_STACK_UNWIND(fstat, xdata, frame, op_ret, op_errno, buf, xdata); + return 0; +} + +int32_t +pl_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) +{ + PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL); + STACK_WIND(frame, pl_fstat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, fd, xdata); + return 0; } int -pl_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, gf_dirent_t *entries, dict_t *xdata) +pl_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, gf_dirent_t *entries, dict_t *xdata) { - pl_local_t *local = NULL; - gf_dirent_t *entry = NULL; + pl_local_t *local = NULL; + gf_dirent_t *entry = NULL; - if (op_ret <= 0) - goto unwind; + if (op_ret <= 0) + goto unwind; - local = frame->local; - if (!local) - goto unwind; + local = frame->local; + if (!local) + goto unwind; - list_for_each_entry (entry, &entries->list, list) { - pl_set_xdata_response (this, local, local->fd->inode, - entry->inode, entry->d_name, - entry->dict); - } + list_for_each_entry(entry, &entries->list, list) + { + pl_set_xdata_response(this, local, local->fd->inode, entry->inode, + entry->d_name, entry->dict, 0); + } unwind: - PL_STACK_UNWIND (readdirp, xdata, frame, op_ret, op_errno, entries, - xdata); + PL_STACK_UNWIND(readdirp, xdata, frame, op_ret, op_errno, entries, xdata); - return 0; + return 0; } int -pl_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t offset, dict_t *xdata) +pl_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, dict_t *xdata) { - PL_LOCAL_GET_REQUESTS (frame, this, xdata, fd, NULL); - STACK_WIND (frame, pl_readdirp_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp, - fd, size, offset, xdata); + PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL); + STACK_WIND(frame, pl_readdirp_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdirp, fd, size, offset, xdata); - return 0; + return 0; } +lock_migration_info_t * +gf_mig_info_for_lock(posix_lock_t *lock) +{ + lock_migration_info_t *new = GF_MALLOC(sizeof(lock_migration_info_t), + gf_common_mt_lock_mig); + if (new == NULL) { + goto out; + } -void -pl_dump_lock (char *str, int size, struct gf_flock *flock, - gf_lkowner_t *owner, void *trans, char *conn_id, - time_t *granted_time, time_t *blkd_time, gf_boolean_t active) -{ - char *type_str = NULL; - char granted[256] = {0,}; - char blocked[256] = {0,}; - - if (granted_time) - gf_time_fmt (granted, sizeof (granted), *granted_time, - gf_timefmt_FT); - if (blkd_time) - gf_time_fmt (blocked, sizeof (blocked), *blkd_time, - gf_timefmt_FT); - switch (flock->l_type) { - case F_RDLCK: - type_str = "READ"; - break; - case F_WRLCK: - type_str = "WRITE"; - break; - case F_UNLCK: - type_str = "UNLOCK"; - break; - default: - type_str = "UNKNOWN"; - break; + INIT_LIST_HEAD(&new->list); + + posix_lock_to_flock(lock, &new->flock); + + new->lk_flags = lock->lk_flags; + + new->client_uid = gf_strdup(lock->client_uid); + +out: + return new; +} + +int +pl_fill_active_locks(pl_inode_t *pl_inode, lock_migration_info_t *lmi) +{ + posix_lock_t *temp = NULL; + lock_migration_info_t *newlock = NULL; + int count = 0; + + pthread_mutex_lock(&pl_inode->mutex); + { + if (list_empty(&pl_inode->ext_list)) { + count = 0; + goto unlock; } - if (active) { - if (blkd_time && *blkd_time == 0) { - snprintf (str, size, RANGE_GRNTD_FMT, - type_str, flock->l_whence, - (unsigned long long) flock->l_start, - (unsigned long long) flock->l_len, - (unsigned long long) flock->l_pid, - lkowner_utoa (owner), trans, conn_id, - granted); - } else { - snprintf (str, size, RANGE_BLKD_GRNTD_FMT, - type_str, flock->l_whence, - (unsigned long long) flock->l_start, - (unsigned long long) flock->l_len, - (unsigned long long) flock->l_pid, - lkowner_utoa (owner), trans, conn_id, - blocked, granted); - } - } else { - snprintf (str, size, RANGE_BLKD_FMT, - type_str, flock->l_whence, - (unsigned long long) flock->l_start, - (unsigned long long) flock->l_len, - (unsigned long long) flock->l_pid, - lkowner_utoa (owner), trans, conn_id, blocked); + list_for_each_entry(temp, &pl_inode->ext_list, list) + { + if (temp->blocked) + continue; + + newlock = gf_mig_info_for_lock(temp); + if (!newlock) { + pthread_mutex_unlock(&pl_inode->mutex); + gf_msg(THIS->name, GF_LOG_ERROR, 0, 0, "lock_dup failed"); + count = -1; + goto out; + } + + list_add_tail(&newlock->list, &lmi->list); + count++; } + } +unlock: + pthread_mutex_unlock(&pl_inode->mutex); +out: + return count; } -void -__dump_entrylks (pl_inode_t *pl_inode) -{ - pl_dom_list_t *dom = NULL; - pl_entry_lock_t *lock = NULL; - char blocked[256] = {0,}; - char granted[256] = {0,}; - int count = 0; - char key[GF_DUMP_MAX_BUF_LEN] = {0,}; - char *k = "xlator.feature.locks.lock-dump.domain.entrylk"; - - char tmp[4098]; - - list_for_each_entry (dom, &pl_inode->dom_list, inode_list) { - - count = 0; - - gf_proc_dump_build_key(key, - "lock-dump.domain", - "domain"); - gf_proc_dump_write(key, "%s", dom->domain); - - list_for_each_entry (lock, &dom->entrylk_list, domain_list) { - - gf_time_fmt (granted, sizeof (granted), - lock->granted_time.tv_sec, gf_timefmt_FT); - gf_proc_dump_build_key(key, k, - "entrylk[%d](ACTIVE)", count ); - if (lock->blkd_time.tv_sec == 0) { - snprintf (tmp, sizeof (tmp), ENTRY_GRNTD_FMT, - lock->type == ENTRYLK_RDLCK ? - "ENTRYLK_RDLCK" : "ENTRYLK_WRLCK", - lock->basename, - (unsigned long long) lock->client_pid, - lkowner_utoa (&lock->owner), - lock->client, - lock->connection_id, granted); - } else { - gf_time_fmt (blocked, sizeof (blocked), - lock->blkd_time.tv_sec, - gf_timefmt_FT); - snprintf (tmp, sizeof (tmp), - ENTRY_BLKD_GRNTD_FMT, - lock->type == ENTRYLK_RDLCK ? - "ENTRYLK_RDLCK" : "ENTRYLK_WRLCK", - lock->basename, - (unsigned long long) lock->client_pid, - lkowner_utoa (&lock->owner), - lock->client, - lock->connection_id, - blocked, granted); - } +/* This function reads only active locks */ +static int +pl_getactivelk(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +{ + pl_inode_t *pl_inode = NULL; + lock_migration_info_t locks; + int op_ret = 0; + int op_errno = 0; + int count = 0; - gf_proc_dump_write(key, tmp); + INIT_LIST_HEAD(&locks.list); - count++; - } + pl_inode = pl_inode_get(this, loc->inode, NULL); + if (!pl_inode) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, "pl_inode_get failed"); - list_for_each_entry (lock, &dom->blocked_entrylks, - blocked_locks) { + op_ret = -1; + op_errno = ENOMEM; + goto out; + } - gf_time_fmt (blocked, sizeof (blocked), - lock->blkd_time.tv_sec, gf_timefmt_FT); + count = pl_fill_active_locks(pl_inode, &locks); - gf_proc_dump_build_key(key, k, - "entrylk[%d](BLOCKED)", count ); - snprintf (tmp, sizeof (tmp), ENTRY_BLKD_FMT, - lock->type == ENTRYLK_RDLCK ? - "ENTRYLK_RDLCK" : "ENTRYLK_WRLCK", - lock->basename, - (unsigned long long) lock->client_pid, - lkowner_utoa (&lock->owner), lock->client, - lock->connection_id, blocked); + op_ret = count; - gf_proc_dump_write(key, tmp); +out: + STACK_UNWIND_STRICT(getactivelk, frame, op_ret, op_errno, &locks, NULL); - count++; - } + gf_free_mig_locks(&locks); - } + return 0; +} + +void +pl_metalk_unref(pl_meta_lock_t *lock) +{ + lock->ref--; + if (!lock->ref) { + GF_FREE(lock->client_uid); + GF_FREE(lock); + } } void -dump_entrylks (pl_inode_t *pl_inode) +__pl_metalk_ref(pl_meta_lock_t *lock) +{ + lock->ref++; +} + +pl_meta_lock_t * +new_meta_lock(call_frame_t *frame, xlator_t *this) +{ + pl_meta_lock_t *lock = GF_CALLOC(1, sizeof(*lock), + gf_locks_mt_pl_meta_lock_t); + + if (!lock) { + gf_msg(this->name, GF_LOG_ERROR, 0, ENOMEM, + "mem allocation" + " failed for meta lock"); + goto out; + } + + INIT_LIST_HEAD(&lock->list); + INIT_LIST_HEAD(&lock->client_list); + + lock->client_uid = gf_strdup(frame->root->client->client_uid); + if (!lock->client_uid) { + gf_msg(this->name, GF_LOG_ERROR, 0, ENOMEM, + "mem allocation" + " failed for client_uid"); + GF_FREE(lock); + lock = NULL; + goto out; + } + + __pl_metalk_ref(lock); +out: + return lock; +} + +int +pl_insert_metalk(pl_inode_t *pl_inode, pl_ctx_t *ctx, pl_meta_lock_t *lock) { - pthread_mutex_lock (&pl_inode->mutex); + int ret = 0; + + if (!pl_inode || !ctx || !lock) { + gf_msg(THIS->name, GF_LOG_INFO, 0, 0, "NULL parameter"); + ret = -1; + goto out; + } + + lock->pl_inode = pl_inode; + + /* refer function pl_inode_setlk for more info for this ref. + * This should be unrefed on meta-unlock triggered by rebalance or + * in cleanup with client disconnect*/ + /*TODO: unref this in cleanup code for disconnect and meta-unlock*/ + pl_inode->inode = inode_ref(pl_inode->inode); + + /* NOTE:In case of a client-server disconnect we need to cleanup metalk. + * Hence, adding the metalk to pl_ctx_t as well. The mutex lock order + * should always be on ctx and then on pl_inode*/ + + pthread_mutex_lock(&ctx->lock); + { + pthread_mutex_lock(&pl_inode->mutex); { - __dump_entrylks (pl_inode); + list_add_tail(&lock->list, &pl_inode->metalk_list); } - pthread_mutex_unlock (&pl_inode->mutex); + pthread_mutex_unlock(&pl_inode->mutex); + list_add_tail(&lock->client_list, &ctx->metalk_list); + } + pthread_mutex_unlock(&ctx->lock); + +out: + return ret; } -void -__dump_inodelks (pl_inode_t *pl_inode) +int32_t +pl_metalk(call_frame_t *frame, xlator_t *this, inode_t *inode) { - pl_dom_list_t *dom = NULL; - pl_inode_lock_t *lock = NULL; - int count = 0; - char key[GF_DUMP_MAX_BUF_LEN]; + pl_inode_t *pl_inode = NULL; + int ret = 0; + pl_meta_lock_t *reqlk = NULL; + pl_ctx_t *ctx = NULL; + + pl_inode = pl_inode_get(this, inode, NULL); + if (!pl_inode) { + gf_msg(this->name, GF_LOG_ERROR, 0, ENOMEM, + "pl_inode mem allocation failedd"); + + ret = -1; + goto out; + } + + /* Non rebalance process trying to do metalock */ + if (frame->root->pid != GF_CLIENT_PID_DEFRAG) { + ret = -1; + goto out; + } + + /* Note: In the current scheme of glusterfs where lock migration is + * experimental, (ideally) the rebalance process which is migrating + * the file should request for a metalock. Hence, the metalock count + * should not be more than one for an inode. In future, if there is a + * need for meta-lock from other clients, the following block can be + * removed. + * + * Since pl_metalk is called as part of setxattr operation, any client + * process(non-rebalance) residing outside trusted network can exhaust + * memory of the server node by issuing setxattr repetitively on the + * metalock key. The following code makes sure that more than + * one metalock cannot be granted on an inode*/ + pthread_mutex_lock(&pl_inode->mutex); + { + if (pl_metalock_is_active(pl_inode)) { + ret = -1; + } + } + pthread_mutex_unlock(&pl_inode->mutex); + + if (ret == -1) { + gf_msg(this->name, GF_LOG_WARNING, EINVAL, 0, + "More than one meta-lock cannot be granted on" + " the inode"); + goto out; + } + + if (frame->root->client) { + ctx = pl_ctx_get(frame->root->client, this); + if (!ctx) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, "pl_ctx_get failed"); + + ret = -1; + goto out; + } + } else { + gf_msg(this->name, GF_LOG_INFO, 0, 0, + "frame-root-client " + "is NULL"); + + ret = -1; + goto out; + } + + reqlk = new_meta_lock(frame, this); + if (!reqlk) { + ret = -1; + goto out; + } + + ret = pl_insert_metalk(pl_inode, ctx, reqlk); + if (ret < 0) { + pl_metalk_unref(reqlk); + } - char tmp[4098]; +out: + return ret; +} - list_for_each_entry (dom, &pl_inode->dom_list, inode_list) { +static void +__unwind_queued_locks(pl_inode_t *pl_inode, struct list_head *tmp_list) +{ + if (list_empty(&pl_inode->queued_locks)) + return; - count = 0; + list_splice_init(&pl_inode->queued_locks, tmp_list); +} - gf_proc_dump_build_key(key, - "lock-dump.domain", - "domain"); - gf_proc_dump_write(key, "%s", dom->domain); +static void +__unwind_blocked_locks(pl_inode_t *pl_inode, struct list_head *tmp_list) +{ + posix_lock_t *lock = NULL; + posix_lock_t *tmp = NULL; - list_for_each_entry (lock, &dom->inodelk_list, list) { + if (list_empty(&pl_inode->ext_list)) + return; - gf_proc_dump_build_key(key, - "inodelk", - "inodelk[%d](ACTIVE)",count ); + list_for_each_entry_safe(lock, tmp, &pl_inode->ext_list, list) + { + if (!lock->blocking) + continue; - SET_FLOCK_PID (&lock->user_flock, lock); - pl_dump_lock (tmp, sizeof (tmp), &lock->user_flock, - &lock->owner, - lock->client, lock->connection_id, - &lock->granted_time.tv_sec, - &lock->blkd_time.tv_sec, - _gf_true); - gf_proc_dump_write(key, tmp); + list_del_init(&lock->list); + list_add_tail(&lock->list, tmp_list); + } +} - count++; - } +int +pl_metaunlock(call_frame_t *frame, xlator_t *this, inode_t *inode, dict_t *dict) +{ + pl_inode_t *pl_inode = NULL; + int ret = 0; + pl_meta_lock_t *meta_lock = NULL; + pl_meta_lock_t *tmp_metalk = NULL; + pl_ctx_t *ctx = NULL; + posix_lock_t *posix_lock = NULL; + posix_lock_t *tmp_posixlk = NULL; + struct list_head tmp_posixlk_list; + + INIT_LIST_HEAD(&tmp_posixlk_list); + + if (frame->root->client) { + ctx = pl_ctx_get(frame->root->client, this); + if (!ctx) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, "pl_ctx_get failed"); + + ret = -1; + goto out; + } + } else { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "frame-root-client is " + "NULL"); + ret = -1; + goto out; + } + + pl_inode = pl_inode_get(this, inode, NULL); + if (!pl_inode) { + ret = -1; + goto out; + } + + pthread_mutex_lock(&ctx->lock); + { + pthread_mutex_lock(&pl_inode->mutex); + { + /* Unwind queued locks regardless of migration status */ + __unwind_queued_locks(pl_inode, &tmp_posixlk_list); - list_for_each_entry (lock, &dom->blocked_inodelks, blocked_locks) { + /* Unwind blocked locks only for successful migration */ + if (dict_get_sizen(dict, "status")) { + /* unwind all blocked locks */ + __unwind_blocked_locks(pl_inode, &tmp_posixlk_list); + } - gf_proc_dump_build_key(key, - "inodelk", - "inodelk[%d](BLOCKED)",count ); - SET_FLOCK_PID (&lock->user_flock, lock); - pl_dump_lock (tmp, sizeof (tmp), &lock->user_flock, - &lock->owner, - lock->client, lock->connection_id, - 0, &lock->blkd_time.tv_sec, - _gf_false); - gf_proc_dump_write(key, tmp); + /* unlock metalk */ + /* if this list is empty then pl_inode->metalk_list + * should be empty too. meta lock should in all cases + * be added/removed from both pl_ctx_t and pl_inode */ - count++; - } + if (list_empty(&ctx->metalk_list)) + goto unlock; + list_for_each_entry_safe(meta_lock, tmp_metalk, &ctx->metalk_list, + client_list) + { + list_del_init(&meta_lock->client_list); + + pl_inode = meta_lock->pl_inode; + + list_del_init(&meta_lock->list); + + pl_metalk_unref(meta_lock); + + /* The corresponding ref is taken in + * pl_insert_metalk*/ + inode_unref(pl_inode->inode); + } + + if (dict_get_sizen(dict, "status")) + pl_inode->migrated = _gf_true; + else + pl_inode->migrated = _gf_false; } + unlock: + + pthread_mutex_unlock(&pl_inode->mutex); + } + pthread_mutex_unlock(&ctx->lock); + +out: + list_for_each_entry_safe(posix_lock, tmp_posixlk, &tmp_posixlk_list, list) + { + list_del_init(&posix_lock->list); + + STACK_UNWIND_STRICT(lk, posix_lock->frame, -1, EREMOTE, + &posix_lock->user_flock, NULL); + __destroy_lock(posix_lock); + } + + return ret; } -void -dump_inodelks (pl_inode_t *pl_inode) +int32_t +pl_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - pthread_mutex_lock (&pl_inode->mutex); + pl_local_t *local = NULL; + pl_inode_t *pl_inode = NULL; + local = frame->local; + if (local && local->update_mlock_enforced_flag && op_ret != -1) { + pl_inode = pl_inode_get(this, local->inode, NULL); + if (!pl_inode) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + + pthread_mutex_lock(&pl_inode->mutex); { - __dump_inodelks (pl_inode); + while (pl_inode->fop_wind_count > 0) { + gf_msg(this->name, GF_LOG_INFO, 0, 0, + "waiting for existing fops (count %d) to drain for " + "gfid %s", + pl_inode->fop_wind_count, uuid_utoa(pl_inode->gfid)); + pthread_cond_wait(&pl_inode->check_fop_wind_count, + &pl_inode->mutex); + } + pl_inode->mlock_enforced = _gf_true; + pl_inode->check_mlock_info = _gf_false; } - pthread_mutex_unlock (&pl_inode->mutex); + pthread_mutex_unlock(&pl_inode->mutex); + } +unwind: + PL_STACK_UNWIND_FOR_CLIENT(setxattr, xdata, frame, op_ret, op_errno, xdata); + return 0; } -void -__dump_posixlks (pl_inode_t *pl_inode) +int32_t +pl_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + int flags, dict_t *xdata) { - posix_lock_t *lock = NULL; - int count = 0; - char key[GF_DUMP_MAX_BUF_LEN]; + int op_ret = 0; + int op_errno = EINVAL; + dict_t *xdata_rsp = NULL; + char *name = NULL; + posix_locks_private_t *priv = this->private; - char tmp[4098]; + PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL); - list_for_each_entry (lock, &pl_inode->ext_list, list) { + if (dict_get_sizen(dict, GF_META_LOCK_KEY)) { + op_ret = pl_metalk(frame, this, loc->inode); - SET_FLOCK_PID (&lock->user_flock, lock); - gf_proc_dump_build_key(key, - "posixlk", - "posixlk[%d](%s)", - count, - lock->blocked ? "BLOCKED" : "ACTIVE"); - pl_dump_lock (tmp, sizeof (tmp), &lock->user_flock, - &lock->owner, lock->client, NULL, - &lock->granted_time.tv_sec, &lock->blkd_time.tv_sec, - (lock->blocked)? _gf_false: _gf_true); - gf_proc_dump_write(key, tmp); + } else if (dict_get_sizen(dict, GF_META_UNLOCK_KEY)) { + op_ret = pl_metaunlock(frame, this, loc->inode, dict); + } else { + goto usual; + } - count++; + PL_STACK_UNWIND_FOR_CLIENT(setxattr, xdata_rsp, frame, op_ret, op_errno, + xdata_rsp); + return 0; + +usual: + PL_CHECK_LOCK_ENFORCE_KEY(frame, dict, name, this, loc, ((fd_t *)NULL), + priv); + + STACK_WIND(frame, pl_setxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata); + return 0; + +unwind: + PL_STACK_UNWIND_FOR_CLIENT(setxattr, xdata, frame, op_ret, op_errno, xdata); + + return 0; +} + +void +pl_dump_lock(char *str, int size, struct gf_flock *flock, gf_lkowner_t *owner, + void *trans, char *conn_id, time_t *granted_time, + time_t *blkd_time, gf_boolean_t active) +{ + char *type_str = NULL; + char granted[GF_TIMESTR_SIZE] = { + 0, + }; + char blocked[GF_TIMESTR_SIZE] = { + 0, + }; + + if (granted_time) + gf_time_fmt(granted, sizeof(granted), *granted_time, gf_timefmt_FT); + if (blkd_time) + gf_time_fmt(blocked, sizeof(blocked), *blkd_time, gf_timefmt_FT); + switch (flock->l_type) { + case F_RDLCK: + type_str = "READ"; + break; + case F_WRLCK: + type_str = "WRITE"; + break; + case F_UNLCK: + type_str = "UNLOCK"; + break; + default: + type_str = "UNKNOWN"; + break; + } + + if (active) { + if (blkd_time && *blkd_time == 0) { + snprintf(str, size, RANGE_GRNTD_FMT, type_str, flock->l_whence, + (unsigned long long)flock->l_start, + (unsigned long long)flock->l_len, + (unsigned long long)flock->l_pid, lkowner_utoa(owner), + trans, conn_id, granted); + } else { + snprintf(str, size, RANGE_BLKD_GRNTD_FMT, type_str, flock->l_whence, + (unsigned long long)flock->l_start, + (unsigned long long)flock->l_len, + (unsigned long long)flock->l_pid, lkowner_utoa(owner), + trans, conn_id, blocked, granted); } + } else { + snprintf(str, size, RANGE_BLKD_FMT, type_str, flock->l_whence, + (unsigned long long)flock->l_start, + (unsigned long long)flock->l_len, + (unsigned long long)flock->l_pid, lkowner_utoa(owner), trans, + conn_id, blocked); + } } void -dump_posixlks (pl_inode_t *pl_inode) +__dump_entrylks(pl_inode_t *pl_inode) { - pthread_mutex_lock (&pl_inode->mutex); + pl_dom_list_t *dom = NULL; + pl_entry_lock_t *lock = NULL; + char blocked[GF_TIMESTR_SIZE] = { + 0, + }; + char granted[GF_TIMESTR_SIZE] = { + 0, + }; + int count = 0; + char key[GF_DUMP_MAX_BUF_LEN] = { + 0, + }; + char *k = "xlator.feature.locks.lock-dump.domain.entrylk"; + + char tmp[4098]; + + list_for_each_entry(dom, &pl_inode->dom_list, inode_list) + { + count = 0; + + gf_proc_dump_build_key(key, "lock-dump.domain", "domain"); + gf_proc_dump_write(key, "%s", dom->domain); + + list_for_each_entry(lock, &dom->entrylk_list, domain_list) + { + gf_time_fmt(granted, sizeof(granted), lock->granted_time, + gf_timefmt_FT); + gf_proc_dump_build_key(key, k, "entrylk[%d](ACTIVE)", count); + if (lock->blkd_time == 0) { + snprintf(tmp, sizeof(tmp), ENTRY_GRNTD_FMT, + lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" + : "ENTRYLK_WRLCK", + lock->basename, (unsigned long long)lock->client_pid, + lkowner_utoa(&lock->owner), lock->client, + lock->connection_id, granted); + } else { + gf_time_fmt(blocked, sizeof(blocked), lock->blkd_time, + gf_timefmt_FT); + snprintf(tmp, sizeof(tmp), ENTRY_BLKD_GRNTD_FMT, + lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" + : "ENTRYLK_WRLCK", + lock->basename, (unsigned long long)lock->client_pid, + lkowner_utoa(&lock->owner), lock->client, + lock->connection_id, blocked, granted); + } + + gf_proc_dump_write(key, "%s", tmp); + + count++; + } + + list_for_each_entry(lock, &dom->blocked_entrylks, blocked_locks) { - __dump_posixlks (pl_inode); + gf_time_fmt(blocked, sizeof(blocked), lock->blkd_time, + gf_timefmt_FT); + + gf_proc_dump_build_key(key, k, "entrylk[%d](BLOCKED)", count); + snprintf( + tmp, sizeof(tmp), ENTRY_BLKD_FMT, + lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" : "ENTRYLK_WRLCK", + lock->basename, (unsigned long long)lock->client_pid, + lkowner_utoa(&lock->owner), lock->client, lock->connection_id, + blocked); + + gf_proc_dump_write(key, "%s", tmp); + + count++; } - pthread_mutex_unlock (&pl_inode->mutex); + } +} +void +dump_entrylks(pl_inode_t *pl_inode) +{ + pthread_mutex_lock(&pl_inode->mutex); + { + __dump_entrylks(pl_inode); + } + pthread_mutex_unlock(&pl_inode->mutex); } -int32_t -pl_dump_inode_priv (xlator_t *this, inode_t *inode) +void +__dump_inodelks(pl_inode_t *pl_inode) { + pl_dom_list_t *dom = NULL; + pl_inode_lock_t *lock = NULL; + int count = 0; + char key[GF_DUMP_MAX_BUF_LEN]; - int ret = -1; - uint64_t tmp_pl_inode = 0; - pl_inode_t *pl_inode = NULL; - char *pathname = NULL; - gf_boolean_t section_added = _gf_false; + char tmp[4098]; - int count = 0; + list_for_each_entry(dom, &pl_inode->dom_list, inode_list) + { + count = 0; - if (!inode) { - errno = EINVAL; - goto out; + gf_proc_dump_build_key(key, "lock-dump.domain", "domain"); + gf_proc_dump_write(key, "%s", dom->domain); + + list_for_each_entry(lock, &dom->inodelk_list, list) + { + gf_proc_dump_build_key(key, "inodelk", "inodelk[%d](ACTIVE)", + count); + + SET_FLOCK_PID(&lock->user_flock, lock); + pl_dump_lock(tmp, sizeof(tmp), &lock->user_flock, &lock->owner, + lock->client, lock->connection_id, &lock->granted_time, + &lock->blkd_time, _gf_true); + gf_proc_dump_write(key, "%s", tmp); + + count++; } - ret = TRY_LOCK (&inode->lock); - if (ret) - goto out; + list_for_each_entry(lock, &dom->blocked_inodelks, blocked_locks) { - ret = __inode_ctx_get (inode, this, &tmp_pl_inode); - if (ret) - goto unlock; + gf_proc_dump_build_key(key, "inodelk", "inodelk[%d](BLOCKED)", + count); + SET_FLOCK_PID(&lock->user_flock, lock); + pl_dump_lock(tmp, sizeof(tmp), &lock->user_flock, &lock->owner, + lock->client, lock->connection_id, 0, &lock->blkd_time, + _gf_false); + gf_proc_dump_write(key, "%s", tmp); + + count++; } -unlock: - UNLOCK (&inode->lock); + } +} + +void +dump_inodelks(pl_inode_t *pl_inode) +{ + pthread_mutex_lock(&pl_inode->mutex); + { + __dump_inodelks(pl_inode); + } + pthread_mutex_unlock(&pl_inode->mutex); +} + +void +__dump_posixlks(pl_inode_t *pl_inode) +{ + posix_lock_t *lock = NULL; + int count = 0; + char key[GF_DUMP_MAX_BUF_LEN]; + + char tmp[4098]; + + list_for_each_entry(lock, &pl_inode->ext_list, list) + { + SET_FLOCK_PID(&lock->user_flock, lock); + gf_proc_dump_build_key(key, "posixlk", "posixlk[%d](%s)", count, + lock->blocked ? "BLOCKED" : "ACTIVE"); + pl_dump_lock(tmp, sizeof(tmp), &lock->user_flock, &lock->owner, + lock->client, lock->client_uid, &lock->granted_time, + &lock->blkd_time, (lock->blocked) ? _gf_false : _gf_true); + gf_proc_dump_write(key, "%s", tmp); + + count++; + } +} + +void +dump_posixlks(pl_inode_t *pl_inode) +{ + pthread_mutex_lock(&pl_inode->mutex); + { + __dump_posixlks(pl_inode); + } + pthread_mutex_unlock(&pl_inode->mutex); +} + +int32_t +pl_dump_inode_priv(xlator_t *this, inode_t *inode) +{ + int ret = -1; + uint64_t tmp_pl_inode = 0; + pl_inode_t *pl_inode = NULL; + char *pathname = NULL; + gf_boolean_t section_added = _gf_false; + + int count = 0; + + if (!inode) { + errno = EINVAL; + goto out; + } + + ret = TRY_LOCK(&inode->lock); + if (ret) + goto out; + { + ret = __inode_ctx_get(inode, this, &tmp_pl_inode); if (ret) - goto out; + goto unlock; + } +unlock: + UNLOCK(&inode->lock); + if (ret) + goto out; + + pl_inode = (pl_inode_t *)(long)tmp_pl_inode; + if (!pl_inode) { + ret = -1; + goto out; + } + + gf_proc_dump_add_section("xlator.features.locks.%s.inode", this->name); + section_added = _gf_true; + + /*We are safe to call __inode_path since we have the + * inode->table->lock */ + __inode_path(inode, NULL, &pathname); + if (pathname) + gf_proc_dump_write("path", "%s", pathname); + + gf_proc_dump_write("mandatory", "%d", pl_inode->mandatory); + + ret = pthread_mutex_trylock(&pl_inode->mutex); + if (ret) + goto out; + { + count = __get_entrylk_count(this, pl_inode); + if (count) { + gf_proc_dump_write("entrylk-count", "%d", count); + __dump_entrylks(pl_inode); + } - pl_inode = (pl_inode_t *)(long)tmp_pl_inode; - if (!pl_inode) { - ret = -1; - goto out; + count = __get_inodelk_count(this, pl_inode, NULL); + if (count) { + gf_proc_dump_write("inodelk-count", "%d", count); + __dump_inodelks(pl_inode); } - gf_proc_dump_add_section("xlator.features.locks.%s.inode", this->name); - section_added = _gf_true; + count = __get_posixlk_count(pl_inode); + if (count) { + gf_proc_dump_write("posixlk-count", "%d", count); + __dump_posixlks(pl_inode); + } - /*We are safe to call __inode_path since we have the - * inode->table->lock */ - __inode_path (inode, NULL, &pathname); - if (pathname) - gf_proc_dump_write ("path", "%s", pathname); + gf_proc_dump_write("links", "%d", pl_inode->links); + gf_proc_dump_write("removes_pending", "%u", pl_inode->remove_running); + gf_proc_dump_write("removed", "%u", pl_inode->removed); + } + pthread_mutex_unlock(&pl_inode->mutex); - gf_proc_dump_write("mandatory", "%d", pl_inode->mandatory); +out: + GF_FREE(pathname); + + if (ret && inode) { + if (!section_added) + gf_proc_dump_add_section( + "xlator.features.locks.%s." + "inode", + this->name); + gf_proc_dump_write("Unable to print lock state", + "(Lock " + "acquisition failure) %s", + uuid_utoa(inode->gfid)); + } + return ret; +} - ret = pthread_mutex_trylock (&pl_inode->mutex); - if (ret) - goto out; +int32_t +mem_acct_init(xlator_t *this) +{ + int ret = -1; + + if (!this) + return ret; + + ret = xlator_mem_acct_init(this, gf_locks_mt_end + 1); + + if (ret != 0) { + gf_log(this->name, GF_LOG_ERROR, + "Memory accounting init" + "failed"); + return ret; + } + + return ret; +} + +pl_ctx_t * +pl_ctx_get(client_t *client, xlator_t *xlator) +{ + void *tmp = NULL; + pl_ctx_t *ctx = NULL; + pl_ctx_t *setted_ctx = NULL; + + client_ctx_get(client, xlator, &tmp); + + ctx = tmp; + + if (ctx != NULL) + goto out; + + ctx = GF_CALLOC(1, sizeof(pl_ctx_t), gf_locks_mt_posix_lock_t); + + if (ctx == NULL) + goto out; + + pthread_mutex_init(&ctx->lock, NULL); + INIT_LIST_HEAD(&ctx->inodelk_lockers); + INIT_LIST_HEAD(&ctx->entrylk_lockers); + INIT_LIST_HEAD(&ctx->metalk_list); + + setted_ctx = client_ctx_set(client, xlator, ctx); + if (ctx != setted_ctx) { + pthread_mutex_destroy(&ctx->lock); + GF_FREE(ctx); + ctx = setted_ctx; + } +out: + return ctx; +} + +int +pl_metalk_client_cleanup(xlator_t *this, pl_ctx_t *ctx) +{ + pl_meta_lock_t *meta_lock = NULL; + pl_meta_lock_t *tmp_metalk = NULL; + pl_inode_t *pl_inode = NULL; + posix_lock_t *posix_lock = NULL; + posix_lock_t *tmp_posixlk = NULL; + struct list_head tmp_posixlk_list; + + INIT_LIST_HEAD(&tmp_posixlk_list); + + pthread_mutex_lock(&ctx->lock); + { + /* if this list is empty then pl_inode->metalk_list should be + * empty too. meta lock should in all cases be added/removed + * from both pl_ctx_t and pl_inode */ + if (list_empty(&ctx->metalk_list)) + goto unlock; + + list_for_each_entry_safe(meta_lock, tmp_metalk, &ctx->metalk_list, + client_list) { - count = __get_entrylk_count (this, pl_inode); - if (count) { - gf_proc_dump_write("entrylk-count", "%d", count); - __dump_entrylks (pl_inode); - } + list_del_init(&meta_lock->client_list); - count = __get_inodelk_count (this, pl_inode, NULL); - if (count) { - gf_proc_dump_write("inodelk-count", "%d", count); - __dump_inodelks (pl_inode); - } + pl_inode = meta_lock->pl_inode; - count = __get_posixlk_count (this, pl_inode); - if (count) { - gf_proc_dump_write("posixlk-count", "%d", count); - __dump_posixlks (pl_inode); - } + pthread_mutex_lock(&pl_inode->mutex); + + { + /* Since the migration status is unknown here + * unwind all queued and blocked locks to check + * migration status and find the correct + * destination */ + __unwind_queued_locks(pl_inode, &tmp_posixlk_list); + + __unwind_blocked_locks(pl_inode, &tmp_posixlk_list); + + list_del_init(&meta_lock->list); + + pl_metalk_unref(meta_lock); + } + pthread_mutex_unlock(&pl_inode->mutex); + + /* The corresponding ref is taken in + * pl_insert_metalk*/ + inode_unref(pl_inode->inode); } - pthread_mutex_unlock (&pl_inode->mutex); + } + +unlock: + pthread_mutex_unlock(&ctx->lock); + + list_for_each_entry_safe(posix_lock, tmp_posixlk, &tmp_posixlk_list, list) + { + list_del_init(&posix_lock->list); + + STACK_UNWIND_STRICT(lk, posix_lock->frame, -1, EREMOTE, + &posix_lock->user_flock, NULL); + + __destroy_lock(posix_lock); + } + return 0; +} + +static int +pl_client_disconnect_cbk(xlator_t *this, client_t *client) +{ + pl_ctx_t *pl_ctx = pl_ctx_get(client, this); + if (pl_ctx) { + pl_inodelk_client_cleanup(this, pl_ctx); + pl_entrylk_client_cleanup(this, pl_ctx); + pl_metalk_client_cleanup(this, pl_ctx); + } + + return 0; +} + +static int +pl_client_destroy_cbk(xlator_t *this, client_t *client) +{ + void *tmp = NULL; + pl_ctx_t *pl_ctx = NULL; + + pl_client_disconnect_cbk(this, client); + + client_ctx_del(client, this, &tmp); + + if (tmp == NULL) + return 0; + + pl_ctx = tmp; + + GF_ASSERT(list_empty(&pl_ctx->inodelk_lockers)); + GF_ASSERT(list_empty(&pl_ctx->entrylk_lockers)); + + pthread_mutex_destroy(&pl_ctx->lock); + GF_FREE(pl_ctx); + + return 0; +} + +int +reconfigure(xlator_t *this, dict_t *options) +{ + posix_locks_private_t *priv = this->private; + int ret = -1; + char *tmp_str = NULL; + + GF_OPTION_RECONF("trace", priv->trace, options, bool, out); + + GF_OPTION_RECONF("monkey-unlocking", priv->monkey_unlocking, options, bool, + out); + + GF_OPTION_RECONF("revocation-secs", priv->revocation_secs, options, uint32, + out); + + GF_OPTION_RECONF("revocation-clear-all", priv->revocation_clear_all, + options, bool, out); + + GF_OPTION_RECONF("revocation-max-blocked", priv->revocation_max_blocked, + options, uint32, out); + + GF_OPTION_RECONF("notify-contention", priv->notify_contention, options, + bool, out); + + GF_OPTION_RECONF("notify-contention-delay", priv->notify_contention_delay, + options, uint32, out); + + GF_OPTION_RECONF("mandatory-locking", tmp_str, options, str, out); + + GF_OPTION_RECONF("enforce-mandatory-lock", priv->mlock_enforced, options, + bool, out); + + if (!strcmp(tmp_str, "forced")) + priv->mandatory_mode = MLK_FORCED; + else if (!strcmp(tmp_str, "file")) + priv->mandatory_mode = MLK_FILE_BASED; + else if (!strcmp(tmp_str, "optimal")) + priv->mandatory_mode = MLK_OPTIMAL; + else + priv->mandatory_mode = MLK_NONE; + + ret = 0; out: - GF_FREE (pathname); + return ret; +} - if (ret && inode) { - if (!section_added) - gf_proc_dump_add_section ("xlator.features.locks.%s." - "inode", this->name); - gf_proc_dump_write ("Unable to print lock state", "(Lock " - "acquisition failure) %s", - uuid_utoa (inode->gfid)); - } - return ret; +int +init(xlator_t *this) +{ + posix_locks_private_t *priv = NULL; + xlator_list_t *trav = NULL; + char *tmp_str = NULL; + int ret = -1; + + if (!this->children || this->children->next) { + gf_log(this->name, GF_LOG_CRITICAL, + "FATAL: posix-locks should have exactly one child"); + goto out; + } + + if (!this->parents) { + gf_log(this->name, GF_LOG_WARNING, + "Volume is dangling. Please check the volume file."); + } + + trav = this->children; + while (trav->xlator->children) + trav = trav->xlator->children; + + if (strncmp("storage/", trav->xlator->type, 8)) { + gf_log(this->name, GF_LOG_CRITICAL, + "'locks' translator is not loaded over a storage " + "translator"); + goto out; + } + + priv = GF_CALLOC(1, sizeof(*priv), gf_locks_mt_posix_locks_private_t); + + GF_OPTION_INIT("mandatory-locking", tmp_str, str, out); + if (!strcmp(tmp_str, "forced")) + priv->mandatory_mode = MLK_FORCED; + else if (!strcmp(tmp_str, "file")) + priv->mandatory_mode = MLK_FILE_BASED; + else if (!strcmp(tmp_str, "optimal")) + priv->mandatory_mode = MLK_OPTIMAL; + else + priv->mandatory_mode = MLK_NONE; + + tmp_str = NULL; + + GF_OPTION_INIT("trace", priv->trace, bool, out); + + GF_OPTION_INIT("monkey-unlocking", priv->monkey_unlocking, bool, out); + + GF_OPTION_INIT("revocation-secs", priv->revocation_secs, uint32, out); + + GF_OPTION_INIT("revocation-clear-all", priv->revocation_clear_all, bool, + out); + + GF_OPTION_INIT("revocation-max-blocked", priv->revocation_max_blocked, + uint32, out); + + GF_OPTION_INIT("notify-contention", priv->notify_contention, bool, out); + + GF_OPTION_INIT("notify-contention-delay", priv->notify_contention_delay, + uint32, out); + + GF_OPTION_INIT("enforce-mandatory-lock", priv->mlock_enforced, bool, out); + + this->local_pool = mem_pool_new(pl_local_t, 32); + if (!this->local_pool) { + ret = -1; + gf_log(this->name, GF_LOG_ERROR, + "failed to create local_t's memory pool"); + goto out; + } + + this->private = priv; + ret = 0; + +out: + if (ret) { + GF_FREE(priv); + } + return ret; +} + +void +fini(xlator_t *this) +{ + posix_locks_private_t *priv = this->private; + if (!priv) + return; + this->private = NULL; + if (this->local_pool) { + mem_pool_destroy(this->local_pool); + this->local_pool = NULL; + } + GF_FREE(priv->brickname); + GF_FREE(priv); + + return; } +int +pl_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc, + int32_t cmd, struct gf_flock *flock, dict_t *xdata); + +int +pl_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, + int32_t cmd, struct gf_flock *flock, dict_t *xdata); + +int +pl_entrylk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc, + const char *basename, entrylk_cmd cmd, entrylk_type type, + dict_t *xdata); + +int +pl_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, + const char *basename, entrylk_cmd cmd, entrylk_type type, + dict_t *xdata); + int32_t -mem_acct_init (xlator_t *this) +pl_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iatt *buf, struct iatt *preoldparent, + struct iatt *postoldparent, struct iatt *prenewparent, + struct iatt *postnewparent, dict_t *xdata) { - int ret = -1; + pl_inode_remove_cbk(this, cookie, op_ret < 0 ? op_errno : 0); - if (!this) - return ret; + PL_STACK_UNWIND(rename, xdata, frame, op_ret, op_errno, buf, preoldparent, + postoldparent, prenewparent, postnewparent, xdata); - ret = xlator_mem_acct_init (this, gf_locks_mt_end + 1); + return 0; +} - if (ret != 0) { - gf_log (this->name, GF_LOG_ERROR, "Memory accounting init" - "failed"); - return ret; - } +int32_t +pl_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) +{ + int32_t error; - return ret; + error = PL_INODE_REMOVE(rename, frame, this, oldloc, newloc, pl_rename, + pl_rename_cbk, oldloc, newloc, xdata); + if (error > 0) { + STACK_UNWIND_STRICT(rename, frame, -1, error, NULL, NULL, NULL, NULL, + NULL, NULL); + } + + return 0; } +posix_lock_t * +gf_lkmig_info_to_posix_lock(call_frame_t *frame, lock_migration_info_t *lmi) +{ + posix_lock_t *lock = GF_CALLOC(1, sizeof(posix_lock_t), + gf_locks_mt_posix_lock_t); + if (!lock) + goto out; + + lock->fl_start = lmi->flock.l_start; + lock->fl_type = lmi->flock.l_type; + + if (lmi->flock.l_len == 0) + lock->fl_end = LLONG_MAX; + else + lock->fl_end = lmi->flock.l_start + lmi->flock.l_len - 1; + + lock->client = frame->root->client; + + lock->lk_flags = lmi->lk_flags; + + lock->client_uid = gf_strdup(lmi->client_uid); + if (lock->client_uid == NULL) { + GF_FREE(lock); + lock = NULL; + goto out; + } + + lock->client_pid = lmi->flock.l_pid; + lock->owner = lmi->flock.l_owner; -pl_ctx_t* -pl_ctx_get (client_t *client, xlator_t *xlator) + INIT_LIST_HEAD(&lock->list); + +out: + return lock; +} + +/* This function is supposed to write the active locks from the source brick(in + * rebalance context) and write here. Hence, will add the locks directly to the + * pl_inode->ext_list*/ +int +pl_write_active_locks(call_frame_t *frame, pl_inode_t *pl_inode, + lock_migration_info_t *locklist) { - void *tmp = NULL; - pl_ctx_t *ctx = NULL; + posix_lock_t *newlock = NULL; + lock_migration_info_t *temp = NULL; + int ret = 0; + + pthread_mutex_lock(&pl_inode->mutex); + { + /* Just making sure the activelk list is empty. Should not + * happen though*/ + if (!list_empty(&pl_inode->ext_list)) { + pthread_mutex_unlock(&pl_inode->mutex); + gf_msg(THIS->name, GF_LOG_ERROR, 0, 0, "invalid locks found"); + + ret = -1; + goto out; + } + + /* This list also should not be empty */ + if (list_empty(&locklist->list)) { + pthread_mutex_unlock(&pl_inode->mutex); + gf_msg(THIS->name, GF_LOG_ERROR, 0, 0, "empty lock list"); - client_ctx_get (client, xlator, &tmp); + ret = -1; + goto out; + } - ctx = tmp; + list_for_each_entry(temp, &locklist->list, list) + { + newlock = gf_lkmig_info_to_posix_lock(frame, temp); + if (!newlock) { + pthread_mutex_unlock(&pl_inode->mutex); + gf_msg(THIS->name, GF_LOG_ERROR, 0, 0, + "mem allocation failed for newlock"); - if (ctx != NULL) + ret = -1; goto out; + } + list_add_tail(&newlock->list, &pl_inode->ext_list); + } + } + /*TODO: What if few lock add failed with ENOMEM. Should the already + * added locks be clearted */ + pthread_mutex_unlock(&pl_inode->mutex); +out: + return ret; +} - ctx = GF_CALLOC (1, sizeof (pl_ctx_t), gf_locks_mt_posix_lock_t); +static int +pl_setactivelk(call_frame_t *frame, xlator_t *this, loc_t *loc, + lock_migration_info_t *locklist, dict_t *xdata) +{ + int op_ret = 0; + int op_errno = 0; + int ret = 0; - if (ctx == NULL) - goto out; + pl_inode_t *pl_inode = pl_inode_get(this, loc->inode, NULL); + if (!pl_inode) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, "pl_inode_get failed"); - pthread_mutex_init (&ctx->lock, NULL); - INIT_LIST_HEAD (&ctx->inodelk_lockers); - INIT_LIST_HEAD (&ctx->entrylk_lockers); + op_ret = -1; + op_errno = ENOMEM; + goto out; + } + ret = pl_write_active_locks(frame, pl_inode, locklist); + + op_ret = ret; - if (client_ctx_set (client, xlator, ctx) != 0) { - pthread_mutex_destroy (&ctx->lock); - GF_FREE (ctx); - ctx = NULL; - } out: - return ctx; + STACK_UNWIND_STRICT(setactivelk, frame, op_ret, op_errno, NULL); + + return 0; } +int32_t +pl_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) +{ + pl_inode_remove_cbk(this, cookie, op_ret < 0 ? op_errno : 0); -static int -pl_client_disconnect_cbk (xlator_t *this, client_t *client) + PL_STACK_UNWIND(unlink, xdata, frame, op_ret, op_errno, preparent, + postparent, xdata); + + return 0; +} + +int32_t +pl_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, + dict_t *xdata) { - pl_ctx_t *pl_ctx = NULL; + int32_t error; - pl_ctx = pl_ctx_get (client, this); + error = PL_INODE_REMOVE(unlink, frame, this, loc, NULL, pl_unlink, + pl_unlink_cbk, loc, xflag, xdata); + if (error > 0) { + STACK_UNWIND_STRICT(unlink, frame, -1, error, NULL, NULL, NULL); + } - pl_inodelk_client_cleanup (this, pl_ctx); + return 0; +} - pl_entrylk_client_cleanup (this, pl_ctx); +int32_t +pl_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, inode_t *inode, struct iatt *buf, + struct iatt *preparent, struct iatt *postparent, dict_t *xdata) +{ + PL_STACK_UNWIND_FOR_CLIENT(mkdir, xdata, frame, op_ret, op_errno, inode, + buf, preparent, postparent, xdata); + return 0; +} - return 0; +int +pl_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + mode_t umask, dict_t *xdata) +{ + PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL); + STACK_WIND(frame, pl_mkdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata); + return 0; } +int32_t +pl_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iatt *buf, dict_t *xdata) +{ + PL_STACK_UNWIND_FOR_CLIENT(stat, xdata, frame, op_ret, op_errno, buf, + xdata); + return 0; +} -static int -pl_client_destroy_cbk (xlator_t *this, client_t *client) +int +pl_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +{ + PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL); + STACK_WIND(frame, pl_stat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->stat, loc, xdata); + return 0; +} + +int32_t +pl_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, inode_t *inode, struct iatt *buf, + struct iatt *preparent, struct iatt *postparent, dict_t *xdata) +{ + PL_STACK_UNWIND_FOR_CLIENT(mknod, xdata, frame, op_ret, op_errno, inode, + buf, preparent, postparent, xdata); + return 0; +} + +int +pl_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + dev_t rdev, mode_t umask, dict_t *xdata) { - void *tmp = NULL; - pl_ctx_t *pl_ctx = NULL; + PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL); + STACK_WIND(frame, pl_mknod_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata); + return 0; +} - pl_client_disconnect_cbk (this, client); +int32_t +pl_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) +{ + pl_inode_remove_cbk(this, cookie, op_ret < 0 ? op_errno : 0); - client_ctx_del (client, this, &tmp); + PL_STACK_UNWIND_FOR_CLIENT(rmdir, xdata, frame, op_ret, op_errno, preparent, + postparent, xdata); - if (tmp == NULL) - return 0; + return 0; +} - pl_ctx = tmp; +int +pl_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags, + dict_t *xdata) +{ + int32_t error; - GF_ASSERT (list_empty(&pl_ctx->inodelk_lockers)); - GF_ASSERT (list_empty(&pl_ctx->entrylk_lockers)); + error = PL_INODE_REMOVE(rmdir, frame, this, loc, NULL, pl_rmdir, + pl_rmdir_cbk, loc, xflags, xdata); + if (error > 0) { + STACK_UNWIND_STRICT(rmdir, frame, -1, error, NULL, NULL, NULL); + } - pthread_mutex_destroy (&pl_ctx->lock); - GF_FREE (pl_ctx); + return 0; +} +int32_t +pl_symlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + PL_STACK_UNWIND_FOR_CLIENT(symlink, xdata, frame, op_ret, op_errno, inode, + buf, preparent, postparent, xdata); + return 0; +} + +int +pl_symlink(call_frame_t *frame, xlator_t *this, const char *linkname, + loc_t *loc, mode_t umask, dict_t *xdata) +{ + PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL); + STACK_WIND(frame, pl_symlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->symlink, linkname, loc, umask, xdata); + return 0; +} + +int32_t +pl_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, inode_t *inode, struct iatt *buf, + struct iatt *preparent, struct iatt *postparent, dict_t *xdata) +{ + pl_inode_t *pl_inode = (pl_inode_t *)cookie; + + if (op_ret >= 0) { + pthread_mutex_lock(&pl_inode->mutex); + + /* TODO: can happen pl_inode->links == 0 ? */ + if (pl_inode->links >= 0) { + pl_inode->links++; + } + + pthread_mutex_unlock(&pl_inode->mutex); + } + + PL_STACK_UNWIND_FOR_CLIENT(link, xdata, frame, op_ret, op_errno, inode, buf, + preparent, postparent, xdata); + return 0; +} + +int +pl_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) +{ + pl_inode_t *pl_inode; + + pl_inode = pl_inode_get(this, oldloc->inode, NULL); + if (pl_inode == NULL) { + STACK_UNWIND_STRICT(link, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, + NULL); return 0; + } + + PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), oldloc, newloc); + STACK_WIND_COOKIE(frame, pl_link_cbk, pl_inode, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata); + return 0; +} + +int32_t +pl_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf, + dict_t *xdata) +{ + PL_STACK_UNWIND_FOR_CLIENT(fsync, xdata, frame, op_ret, op_errno, prebuf, + postbuf, xdata); + return 0; } int -reconfigure (xlator_t *this, dict_t *options) +pl_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, + dict_t *xdata) { - posix_locks_private_t *priv = NULL; - int ret = -1; + PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL); + STACK_WIND(frame, pl_fsync_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata); + return 0; +} - priv = this->private; +int32_t +pl_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, + dict_t *xdata) +{ + PL_STACK_UNWIND_FOR_CLIENT(readdir, xdata, frame, op_ret, op_errno, entries, + xdata); + return 0; +} - GF_OPTION_RECONF ("trace", priv->trace, options, bool, out); +int +pl_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, dict_t *xdata) +{ + PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL); + STACK_WIND(frame, pl_readdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdir, fd, size, offset, xdata); + return 0; +} - ret = 0; -out: - return ret; +int32_t +pl_fsyncdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + PL_STACK_UNWIND_FOR_CLIENT(fsyncdir, xdata, frame, op_ret, op_errno, xdata); + return 0; } int -init (xlator_t *this) +pl_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, + dict_t *xdata) { - posix_locks_private_t *priv = NULL; - xlator_list_t *trav = NULL; - data_t *mandatory = NULL; - int ret = -1; + PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL); + STACK_WIND(frame, pl_fsyncdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsyncdir, fd, datasync, xdata); + return 0; +} - if (!this->children || this->children->next) { - gf_log (this->name, GF_LOG_CRITICAL, - "FATAL: posix-locks should have exactly one child"); - goto out; +int32_t +pl_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct statvfs *buf, dict_t *xdata) +{ + PL_STACK_UNWIND_FOR_CLIENT(statfs, xdata, frame, op_ret, op_errno, buf, + xdata); + return 0; +} + +int +pl_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +{ + PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL); + STACK_WIND(frame, pl_statfs_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->statfs, loc, xdata); + return 0; +} + +int32_t +pl_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + pl_local_t *local = NULL; + pl_inode_t *pl_inode = NULL; + + local = frame->local; + if (local && local->update_mlock_enforced_flag && op_ret != -1) { + pl_inode = pl_inode_get(this, local->inode, NULL); + if (!pl_inode) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; } - if (!this->parents) { - gf_log (this->name, GF_LOG_WARNING, - "Volume is dangling. Please check the volume file."); + pthread_mutex_lock(&pl_inode->mutex); + { + pl_inode->mlock_enforced = _gf_false; + pl_inode->check_mlock_info = _gf_false; + pl_inode->track_fop_wind_count = _gf_true; } + pthread_mutex_unlock(&pl_inode->mutex); + } - trav = this->children; - while (trav->xlator->children) - trav = trav->xlator->children; +unwind: + PL_STACK_UNWIND_FOR_CLIENT(removexattr, xdata, frame, op_ret, op_errno, + xdata); + return 0; +} - if (strncmp ("storage/", trav->xlator->type, 8)) { - gf_log (this->name, GF_LOG_CRITICAL, - "'locks' translator is not loaded over a storage " - "translator"); - goto out; - } +int +pl_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) +{ + int op_ret = 0; + int op_errno = EINVAL; + posix_locks_private_t *priv = this->private; - priv = GF_CALLOC (1, sizeof (*priv), - gf_locks_mt_posix_locks_private_t); + PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL); - mandatory = dict_get (this->options, "mandatory-locks"); - if (mandatory) - gf_log (this->name, GF_LOG_WARNING, - "mandatory locks not supported in this minor release."); + PL_CHECK_LOCK_ENFORCE_KEY(frame, ((dict_t *)NULL), name, this, loc, + ((fd_t *)NULL), priv); - GF_OPTION_INIT ("trace", priv->trace, bool, out); + STACK_WIND(frame, pl_removexattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, loc, name, xdata); + return 0; - this->local_pool = mem_pool_new (pl_local_t, 32); - if (!this->local_pool) { - ret = -1; - gf_log (this->name, GF_LOG_ERROR, - "failed to create local_t's memory pool"); - goto out; - } +unwind: + PL_STACK_UNWIND_FOR_CLIENT(removexattr, xdata, frame, op_ret, op_errno, + NULL); - this->private = priv; - ret = 0; + return 0; +} -out: - if (ret) { - GF_FREE (priv); +int32_t +pl_fremovexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + pl_local_t *local = NULL; + pl_inode_t *pl_inode = NULL; + + local = frame->local; + if (local && local->update_mlock_enforced_flag && op_ret != -1) { + pl_inode = pl_inode_get(this, local->inode, NULL); + if (!pl_inode) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; } - return ret; + + pthread_mutex_lock(&pl_inode->mutex); + { + pl_inode->mlock_enforced = _gf_false; + pl_inode->check_mlock_info = _gf_false; + } + pthread_mutex_unlock(&pl_inode->mutex); + } + +unwind: + PL_STACK_UNWIND_FOR_CLIENT(fremovexattr, xdata, frame, op_ret, op_errno, + xdata); + return 0; +} + +int +pl_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, + dict_t *xdata) +{ + int op_ret = -1; + int op_errno = EINVAL; + posix_locks_private_t *priv = this->private; + + PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL); + + PL_CHECK_LOCK_ENFORCE_KEY(frame, ((dict_t *)NULL), name, this, + ((loc_t *)NULL), fd, priv); + + STACK_WIND(frame, pl_fremovexattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata); + return 0; + +unwind: + PL_STACK_UNWIND_FOR_CLIENT(fremovexattr, xdata, frame, op_ret, op_errno, + NULL); + return 0; } +int32_t +pl_rchecksum_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, uint32_t weak_cksum, + uint8_t *strong_cksum, dict_t *xdata) +{ + PL_STACK_UNWIND_FOR_CLIENT(rchecksum, xdata, frame, op_ret, op_errno, + weak_cksum, strong_cksum, xdata); + return 0; +} int -fini (xlator_t *this) +pl_rchecksum(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + int32_t len, dict_t *xdata) { - posix_locks_private_t *priv = NULL; + PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL); + STACK_WIND(frame, pl_rchecksum_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rchecksum, fd, offset, len, xdata); + return 0; +} - priv = this->private; - if (!priv) - return 0; - this->private = NULL; - GF_FREE (priv->brickname); - GF_FREE (priv); +int32_t +pl_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata) +{ + PL_STACK_UNWIND_FOR_CLIENT(xattrop, xdata, frame, op_ret, op_errno, dict, + xdata); + return 0; +} - return 0; +int +pl_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc, + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) +{ + PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL); + STACK_WIND(frame, pl_xattrop_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->xattrop, loc, optype, xattr, xdata); + return 0; } +int32_t +pl_fxattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata) +{ + PL_STACK_UNWIND_FOR_CLIENT(fxattrop, xdata, frame, op_ret, op_errno, dict, + xdata); + return 0; +} int -pl_inodelk (call_frame_t *frame, xlator_t *this, - const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *flock, - dict_t *xdata); +pl_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd, + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) +{ + PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL); + STACK_WIND(frame, pl_fxattrop_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fxattrop, fd, optype, xattr, xdata); + return 0; +} + +int32_t +pl_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *statpre, + struct iatt *statpost, dict_t *xdata) +{ + PL_STACK_UNWIND_FOR_CLIENT(setattr, xdata, frame, op_ret, op_errno, statpre, + statpost, xdata); + return 0; +} int -pl_finodelk (call_frame_t *frame, xlator_t *this, - const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *flock, - dict_t *xdata); +pl_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf, + int32_t valid, dict_t *xdata) +{ + PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL); + STACK_WIND(frame, pl_setattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata); + return 0; +} + +int32_t +pl_fsetattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *statpre, + struct iatt *statpost, dict_t *xdata) +{ + PL_STACK_UNWIND_FOR_CLIENT(fsetattr, xdata, frame, op_ret, op_errno, + statpre, statpost, xdata); + return 0; +} int -pl_entrylk (call_frame_t *frame, xlator_t *this, - const char *volume, loc_t *loc, const char *basename, - entrylk_cmd cmd, entrylk_type type, dict_t *xdata); +pl_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf, + int32_t valid, dict_t *xdata) +{ + PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL); + STACK_WIND(frame, pl_fsetattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata); + return 0; +} + +int32_t +pl_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *pre, + struct iatt *post, dict_t *xdata) +{ + PL_STACK_UNWIND_FOR_CLIENT(fallocate, xdata, frame, op_ret, op_errno, pre, + post, xdata); + return 0; +} + +int +pl_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t keep_size, + off_t offset, size_t len, dict_t *xdata) +{ + PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL); + STACK_WIND(frame, pl_fallocate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fallocate, fd, keep_size, offset, len, + xdata); + return 0; +} + +int32_t +pl_readlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, const char *path, + struct iatt *buf, dict_t *xdata) +{ + PL_STACK_UNWIND_FOR_CLIENT(readlink, xdata, frame, op_ret, op_errno, path, + buf, xdata); + return 0; +} int -pl_fentrylk (call_frame_t *frame, xlator_t *this, - const char *volume, fd_t *fd, const char *basename, - entrylk_cmd cmd, entrylk_type type, dict_t *xdata); +pl_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size, + dict_t *xdata) +{ + PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL); + STACK_WIND(frame, pl_readlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readlink, loc, size, xdata); + return 0; +} + +int32_t +pl_access_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *xdata) +{ + PL_STACK_UNWIND_FOR_CLIENT(access, xdata, frame, op_ret, op_errno, xdata); + return 0; +} + +int +pl_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask, + dict_t *xdata) +{ + PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL); + STACK_WIND(frame, pl_access_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->access, loc, mask, xdata); + return 0; +} + +int32_t +pl_seek_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, off_t offset, dict_t *xdata) +{ + PL_STACK_UNWIND_FOR_CLIENT(seek, xdata, frame, op_ret, op_errno, offset, + xdata); + return 0; +} + +int32_t +pl_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + gf_seek_what_t what, dict_t *xdata) +{ + PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL); + STACK_WIND(frame, pl_seek_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->seek, fd, offset, what, xdata); + return 0; +} struct xlator_fops fops = { - .lookup = pl_lookup, - .create = pl_create, - .truncate = pl_truncate, - .ftruncate = pl_ftruncate, - .open = pl_open, - .readv = pl_readv, - .writev = pl_writev, - .lk = pl_lk, - .inodelk = pl_inodelk, - .finodelk = pl_finodelk, - .entrylk = pl_entrylk, - .fentrylk = pl_fentrylk, - .flush = pl_flush, - .opendir = pl_opendir, - .readdirp = pl_readdirp, - .getxattr = pl_getxattr, - .fgetxattr = pl_fgetxattr, - .fsetxattr = pl_fsetxattr, + .lookup = pl_lookup, + .create = pl_create, + .fstat = pl_fstat, + .truncate = pl_truncate, + .ftruncate = pl_ftruncate, + .discard = pl_discard, + .zerofill = pl_zerofill, + .open = pl_open, + .readv = pl_readv, + .writev = pl_writev, + .lk = pl_lk, + .inodelk = pl_inodelk, + .finodelk = pl_finodelk, + .entrylk = pl_entrylk, + .fentrylk = pl_fentrylk, + .flush = pl_flush, + .opendir = pl_opendir, + .readdirp = pl_readdirp, + .setxattr = pl_setxattr, + .fsetxattr = pl_fsetxattr, + .getxattr = pl_getxattr, + .fgetxattr = pl_fgetxattr, + .removexattr = pl_removexattr, + .fremovexattr = pl_fremovexattr, + .rename = pl_rename, + .getactivelk = pl_getactivelk, + .setactivelk = pl_setactivelk, + .unlink = pl_unlink, + .access = pl_access, + .readlink = pl_readlink, + .fallocate = pl_fallocate, + .fsetattr = pl_fsetattr, + .setattr = pl_setattr, + .fxattrop = pl_fxattrop, + .xattrop = pl_xattrop, + .rchecksum = pl_rchecksum, + .statfs = pl_statfs, + .fsyncdir = pl_fsyncdir, + .readdir = pl_readdir, + .symlink = pl_symlink, + .link = pl_link, + .rmdir = pl_rmdir, + .mknod = pl_mknod, + .stat = pl_stat, + .seek = pl_seek, }; struct xlator_dumpops dumpops = { - .inodectx = pl_dump_inode_priv, + .inodectx = pl_dump_inode_priv, }; struct xlator_cbks cbks = { - .forget = pl_forget, - .release = pl_release, - .releasedir = pl_releasedir, - .client_destroy = pl_client_destroy_cbk, - .client_disconnect = pl_client_disconnect_cbk, + .forget = pl_forget, + .release = pl_release, + .releasedir = pl_releasedir, + .client_destroy = pl_client_destroy_cbk, + .client_disconnect = pl_client_disconnect_cbk, }; - struct volume_options options[] = { - { .key = { "mandatory-locks", "mandatory" }, - .type = GF_OPTION_TYPE_BOOL - }, - { .key = { "trace" }, - .type = GF_OPTION_TYPE_BOOL - }, - { .key = {NULL} }, + {.key = {"mandatory-locking"}, + .type = GF_OPTION_TYPE_STR, + .default_value = "off", + .op_version = {GD_OP_VERSION_3_8_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"locks"}, + .description = "Specifies the mandatory-locking mode. Valid options " + "are 'file' to use linux style mandatory locks, " + "'forced' to use volume strictly under mandatory lock " + "semantics only and 'optimal' to treat advisory and " + "mandatory locks separately on their own."}, + {.key = {"trace"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .op_version = {GD_OP_VERSION_3_7_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"locks"}, + .description = "Trace the different lock requests " + "to logs."}, + {.key = {"monkey-unlocking"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "false", + .op_version = {GD_OP_VERSION_3_9_0}, + .flags = OPT_FLAG_SETTABLE, + .tags = {"locks"}, + .description = "Ignore a random number of unlock requests. Useful " + "for testing/creating robust lock recovery mechanisms."}, + { + .key = {"revocation-secs"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, + .max = INT_MAX, + .default_value = "0", + .op_version = {GD_OP_VERSION_3_9_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"locks"}, + .description = "Maximum time a lock can be taken out, before" + "being revoked.", + }, + { + .key = {"revocation-clear-all"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "false", + .op_version = {GD_OP_VERSION_3_9_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"locks"}, + .description = "If set to true, will revoke BOTH granted and blocked " + "(pending) lock requests if a revocation threshold is " + "hit.", + }, + {.key = {"revocation-max-blocked"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, + .max = INT_MAX, + .default_value = "0", + .op_version = {GD_OP_VERSION_3_9_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"locks"}, + .description = "A number of blocked lock requests after which a lock " + "will be revoked to allow the others to proceed. Can " + "be used in conjunction w/ revocation-clear-all."}, + {.key = {"notify-contention"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "yes", + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .op_version = {GD_OP_VERSION_4_0_0}, + .tags = {"locks", "contention"}, + .description = "When this option is enabled and a lock request " + "conflicts with a currently granted lock, an upcall " + "notification will be sent to the current owner of " + "the lock to request it to be released as soon as " + "possible."}, + {.key = {"notify-contention-delay"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, /* An upcall notification is sent every time a conflict is + * detected. */ + .max = 60, + .default_value = "5", + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .op_version = {GD_OP_VERSION_4_0_0}, + .tags = {"locks", "contention", "timeout"}, + .description = "This value determines the minimum amount of time " + "(in seconds) between upcall contention notifications " + "on the same inode. If multiple lock requests are " + "received during this period, only one upcall will " + "be sent."}, + {.key = {"enforce-mandatory-lock"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .flags = OPT_FLAG_SETTABLE, + .op_version = {GD_OP_VERSION_6_0}, + .description = "option to enable lock enforcement"}, + {.key = {NULL}}, +}; + +xlator_api_t xlator_api = { + .init = init, + .fini = fini, + .reconfigure = reconfigure, + .mem_acct_init = mem_acct_init, + .op_version = {1}, /* Present from the initial version */ + .dumpops = &dumpops, + .fops = &fops, + .cbks = &cbks, + .options = options, + .identifier = "locks", + .category = GF_MAINTAINED, }; diff --git a/xlators/features/locks/src/reservelk.c b/xlators/features/locks/src/reservelk.c index 8eb08d0ef79..604691fd887 100644 --- a/xlators/features/locks/src/reservelk.c +++ b/xlators/features/locks/src/reservelk.c @@ -7,432 +7,376 @@ later), or the GNU General Public License, version 2 (GPLv2), in all cases as published by the Free Software Foundation. */ -#include "glusterfs.h" -#include "compat.h" -#include "xlator.h" -#include "inode.h" -#include "logging.h" -#include "common-utils.h" -#include "list.h" +#include <glusterfs/glusterfs.h> +#include <glusterfs/compat.h> +#include <glusterfs/xlator.h> +#include <glusterfs/logging.h> +#include <glusterfs/common-utils.h> +#include <glusterfs/list.h> #include "locks.h" #include "common.h" -void -__delete_reserve_lock (posix_lock_t *lock) -{ - list_del (&lock->list); -} - -void -__destroy_reserve_lock (posix_lock_t *lock) -{ - GF_FREE (lock); -} - /* Return true if the two reservelks have exactly same lock boundaries */ int -reservelks_equal (posix_lock_t *l1, posix_lock_t *l2) +reservelks_equal(posix_lock_t *l1, posix_lock_t *l2) { - if ((l1->fl_start == l2->fl_start) && - (l1->fl_end == l2->fl_end)) - return 1; + if ((l1->fl_start == l2->fl_start) && (l1->fl_end == l2->fl_end)) + return 1; - return 0; + return 0; } /* Determine if lock is grantable or not */ static posix_lock_t * -__reservelk_grantable (pl_inode_t *pl_inode, posix_lock_t *lock) +__reservelk_grantable(pl_inode_t *pl_inode, posix_lock_t *lock) { - xlator_t *this = NULL; - posix_lock_t *l = NULL; - posix_lock_t *ret_lock = NULL; - - this = THIS; - - if (list_empty (&pl_inode->reservelk_list)) { - gf_log (this->name, GF_LOG_TRACE, - "No reservelks in list"); - goto out; - } - list_for_each_entry (l, &pl_inode->reservelk_list, list){ - if (reservelks_equal (lock, l)) { - ret_lock = l; - break; - } + xlator_t *this = THIS; + posix_lock_t *l = NULL; + posix_lock_t *ret_lock = NULL; + + if (list_empty(&pl_inode->reservelk_list)) { + gf_log(this->name, GF_LOG_TRACE, "No reservelks in list"); + goto out; + } + list_for_each_entry(l, &pl_inode->reservelk_list, list) + { + if (reservelks_equal(lock, l)) { + ret_lock = l; + break; } + } out: - return ret_lock; + return ret_lock; } static int -__same_owner_reservelk (posix_lock_t *l1, posix_lock_t *l2) +__same_owner_reservelk(posix_lock_t *l1, posix_lock_t *l2) { - return (is_same_lkowner (&l1->owner, &l2->owner)); - + return (is_same_lkowner(&l1->owner, &l2->owner)); } static posix_lock_t * -__matching_reservelk (pl_inode_t *pl_inode, posix_lock_t *lock) +__matching_reservelk(pl_inode_t *pl_inode, posix_lock_t *lock) { - posix_lock_t *l = NULL; + posix_lock_t *l = NULL; - if (list_empty (&pl_inode->reservelk_list)) { - gf_log ("posix-locks", GF_LOG_TRACE, - "reservelk list empty"); - return NULL; - } + if (list_empty(&pl_inode->reservelk_list)) { + gf_log("posix-locks", GF_LOG_TRACE, "reservelk list empty"); + return NULL; + } - list_for_each_entry (l, &pl_inode->reservelk_list, list) { - if (reservelks_equal (l, lock)) { - gf_log ("posix-locks", GF_LOG_TRACE, - "equal reservelk found"); - break; - } + list_for_each_entry(l, &pl_inode->reservelk_list, list) + { + if (reservelks_equal(l, lock)) { + gf_log("posix-locks", GF_LOG_TRACE, "equal reservelk found"); + break; } + } - return l; + return l; } static int -__reservelk_conflict (xlator_t *this, pl_inode_t *pl_inode, - posix_lock_t *lock) +__reservelk_conflict(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock) { - posix_lock_t *conf = NULL; - int ret = 0; - - conf = __matching_reservelk (pl_inode, lock); - if (conf) { - gf_log (this->name, GF_LOG_TRACE, - "Matching reservelk found"); - if (__same_owner_reservelk (lock, conf)) { - list_del_init (&conf->list); - gf_log (this->name, GF_LOG_TRACE, - "Removing the matching reservelk for setlk to progress"); - GF_FREE (conf); - ret = 0; - } else { - gf_log (this->name, GF_LOG_TRACE, - "Conflicting reservelk found"); - ret = 1; - } - + int ret = 0; + + posix_lock_t *conf = __matching_reservelk(pl_inode, lock); + if (conf) { + gf_log(this->name, GF_LOG_TRACE, "Matching reservelk found"); + if (__same_owner_reservelk(lock, conf)) { + list_del_init(&conf->list); + gf_log(this->name, GF_LOG_TRACE, + "Removing the matching reservelk for setlk to progress"); + __destroy_lock(conf); + ret = 0; + } else { + gf_log(this->name, GF_LOG_TRACE, "Conflicting reservelk found"); + ret = 1; } - return ret; - + } + return ret; } int -pl_verify_reservelk (xlator_t *this, pl_inode_t *pl_inode, - posix_lock_t *lock, int can_block) +pl_verify_reservelk(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock, + const int can_block) { - int ret = 0; - - pthread_mutex_lock (&pl_inode->mutex); - { - if (__reservelk_conflict (this, pl_inode, lock)) { - gf_log (this->name, GF_LOG_TRACE, - "Found conflicting reservelk. Blocking until reservelk is unlocked."); - lock->blocked = can_block; - list_add_tail (&lock->list, &pl_inode->blocked_calls); - ret = -1; - goto unlock; - } - - gf_log (this->name, GF_LOG_TRACE, - "no conflicting reservelk found. Call continuing"); - ret = 0; - + int ret = 0; + + pthread_mutex_lock(&pl_inode->mutex); + { + if (__reservelk_conflict(this, pl_inode, lock)) { + lock->blocked = can_block; + list_add_tail(&lock->list, &pl_inode->blocked_calls); + pthread_mutex_unlock(&pl_inode->mutex); + gf_log(this->name, GF_LOG_TRACE, + "Found conflicting reservelk. Blocking until reservelk is " + "unlocked."); + ret = -1; + goto out; } -unlock: - pthread_mutex_unlock (&pl_inode->mutex); - - return ret; - + } + pthread_mutex_unlock(&pl_inode->mutex); + gf_log(this->name, GF_LOG_TRACE, + "no conflicting reservelk found. Call continuing"); + ret = 0; +out: + return ret; } - /* Determines if lock can be granted and adds the lock. If the lock * is blocking, adds it to the blocked_reservelks. */ static int -__lock_reservelk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock, - int can_block) +__lock_reservelk(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock, + const int can_block) { - posix_lock_t *conf = NULL; - int ret = -EINVAL; - - conf = __reservelk_grantable (pl_inode, lock); - if (conf){ - ret = -EAGAIN; - if (can_block == 0) - goto out; + int ret = -EINVAL; - list_add_tail (&lock->list, &pl_inode->blocked_reservelks); + posix_lock_t *conf = __reservelk_grantable(pl_inode, lock); + if (conf) { + ret = -EAGAIN; + if (can_block == 0) + goto out; - gf_log (this->name, GF_LOG_TRACE, - "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => Blocked", - lock->fl_type == F_UNLCK ? "Unlock" : "Lock", - lock->client_pid, - lkowner_utoa (&lock->owner), - lock->user_flock.l_start, - lock->user_flock.l_len); + list_add_tail(&lock->list, &pl_inode->blocked_reservelks); + gf_log(this->name, GF_LOG_TRACE, + "%s (pid=%d) lk-owner:%s %" PRId64 " - %" PRId64 " => Blocked", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", lock->client_pid, + lkowner_utoa(&lock->owner), lock->user_flock.l_start, + lock->user_flock.l_len); - goto out; - } + goto out; + } - list_add (&lock->list, &pl_inode->reservelk_list); + list_add(&lock->list, &pl_inode->reservelk_list); - ret = 0; + ret = 0; out: - return ret; + return ret; } static posix_lock_t * -find_matching_reservelk (posix_lock_t *lock, pl_inode_t *pl_inode) +find_matching_reservelk(posix_lock_t *lock, pl_inode_t *pl_inode) { - posix_lock_t *l = NULL; - list_for_each_entry (l, &pl_inode->reservelk_list, list) { - if (reservelks_equal (l, lock)) - return l; - } - return NULL; + posix_lock_t *l = NULL; + list_for_each_entry(l, &pl_inode->reservelk_list, list) + { + if (reservelks_equal(l, lock)) + return l; + } + return NULL; } /* Set F_UNLCK removes a lock which has the exact same lock boundaries * as the UNLCK lock specifies. If such a lock is not found, returns invalid */ static posix_lock_t * -__reserve_unlock_lock (xlator_t *this, posix_lock_t *lock, pl_inode_t *pl_inode) +__reserve_unlock_lock(xlator_t *this, posix_lock_t *lock, pl_inode_t *pl_inode) { - - posix_lock_t *conf = NULL; - - conf = find_matching_reservelk (lock, pl_inode); - if (!conf) { - gf_log (this->name, GF_LOG_DEBUG, - " Matching lock not found for unlock"); - goto out; - } - __delete_reserve_lock (conf); - gf_log (this->name, GF_LOG_DEBUG, - " Matching lock found for unlock"); + posix_lock_t *conf = find_matching_reservelk(lock, pl_inode); + if (!conf) { + gf_log(this->name, GF_LOG_DEBUG, " Matching lock not found for unlock"); + goto out; + } + __delete_lock(conf); + gf_log(this->name, GF_LOG_DEBUG, " Matching lock found for unlock"); out: - return conf; - - + return conf; } static void -__grant_blocked_reserve_locks (xlator_t *this, pl_inode_t *pl_inode, - struct list_head *granted) +__grant_blocked_reserve_locks(xlator_t *this, pl_inode_t *pl_inode, + struct list_head *granted) { - int bl_ret = 0; - posix_lock_t *bl = NULL; - posix_lock_t *tmp = NULL; - - struct list_head blocked_list; + int bl_ret = 0; + posix_lock_t *bl = NULL; + posix_lock_t *tmp = NULL; - INIT_LIST_HEAD (&blocked_list); - list_splice_init (&pl_inode->blocked_reservelks, &blocked_list); + struct list_head blocked_list; - list_for_each_entry_safe (bl, tmp, &blocked_list, list) { + INIT_LIST_HEAD(&blocked_list); + list_splice_init(&pl_inode->blocked_reservelks, &blocked_list); - list_del_init (&bl->list); + list_for_each_entry_safe(bl, tmp, &blocked_list, list) + { + list_del_init(&bl->list); - bl_ret = __lock_reservelk (this, pl_inode, bl, 1); + bl_ret = __lock_reservelk(this, pl_inode, bl, 1); - if (bl_ret == 0) { - list_add (&bl->list, granted); - } + if (bl_ret == 0) { + list_add(&bl->list, granted); } - return; + } + return; } /* Grant all reservelks blocked on lock(s) */ void -grant_blocked_reserve_locks (xlator_t *this, pl_inode_t *pl_inode) +grant_blocked_reserve_locks(xlator_t *this, pl_inode_t *pl_inode) { - struct list_head granted; - posix_lock_t *lock = NULL; - posix_lock_t *tmp = NULL; + struct list_head granted; + posix_lock_t *lock = NULL; + posix_lock_t *tmp = NULL; - INIT_LIST_HEAD (&granted); - - if (list_empty (&pl_inode->blocked_reservelks)) { - gf_log (this->name, GF_LOG_TRACE, - "No blocked locks to be granted"); - return; - } - - pthread_mutex_lock (&pl_inode->mutex); - { - __grant_blocked_reserve_locks (this, pl_inode, &granted); - } - pthread_mutex_unlock (&pl_inode->mutex); - - list_for_each_entry_safe (lock, tmp, &granted, list) { - gf_log (this->name, GF_LOG_TRACE, - "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => Granted", - lock->fl_type == F_UNLCK ? "Unlock" : "Lock", - lock->client_pid, - lkowner_utoa (&lock->owner), - lock->user_flock.l_start, - lock->user_flock.l_len); - - STACK_UNWIND_STRICT (lk, lock->frame, 0, 0, &lock->user_flock, - NULL); - } + INIT_LIST_HEAD(&granted); + if (list_empty(&pl_inode->blocked_reservelks)) { + gf_log(this->name, GF_LOG_TRACE, "No blocked locks to be granted"); + return; + } + + pthread_mutex_lock(&pl_inode->mutex); + { + __grant_blocked_reserve_locks(this, pl_inode, &granted); + } + pthread_mutex_unlock(&pl_inode->mutex); + + list_for_each_entry_safe(lock, tmp, &granted, list) + { + gf_log(this->name, GF_LOG_TRACE, + "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64 " => Granted", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", lock->client_pid, + lkowner_utoa(&lock->owner), lock->user_flock.l_start, + lock->user_flock.l_len); + + STACK_UNWIND_STRICT(lk, lock->frame, 0, 0, &lock->user_flock, NULL); + } } static void -__grant_blocked_lock_calls (xlator_t *this, pl_inode_t *pl_inode, - struct list_head *granted) +__grant_blocked_lock_calls(xlator_t *this, pl_inode_t *pl_inode, + struct list_head *granted) { - int bl_ret = 0; - posix_lock_t *bl = NULL; - posix_lock_t *tmp = NULL; + int bl_ret = 0; + posix_lock_t *bl = NULL; + posix_lock_t *tmp = NULL; - struct list_head blocked_list; + struct list_head blocked_list; - INIT_LIST_HEAD (&blocked_list); - list_splice_init (&pl_inode->blocked_reservelks, &blocked_list); + INIT_LIST_HEAD(&blocked_list); + list_splice_init(&pl_inode->blocked_reservelks, &blocked_list); - list_for_each_entry_safe (bl, tmp, &blocked_list, list) { + list_for_each_entry_safe(bl, tmp, &blocked_list, list) + { + list_del_init(&bl->list); - list_del_init (&bl->list); + bl_ret = pl_verify_reservelk(this, pl_inode, bl, bl->blocked); - bl_ret = pl_verify_reservelk (this, pl_inode, bl, bl->blocked); - - if (bl_ret == 0) { - list_add_tail (&bl->list, granted); - } + if (bl_ret == 0) { + list_add_tail(&bl->list, granted); } - return; + } + return; } void -grant_blocked_lock_calls (xlator_t *this, pl_inode_t *pl_inode) +grant_blocked_lock_calls(xlator_t *this, pl_inode_t *pl_inode) { - struct list_head granted; - posix_lock_t *lock = NULL; - posix_lock_t *tmp = NULL; - fd_t *fd = NULL; - - int can_block = 0; - int32_t cmd = 0; - int ret = 0; - - if (list_empty (&pl_inode->blocked_calls)) { - gf_log (this->name, GF_LOG_TRACE, - "No blocked lock calls to be granted"); - return; - } + struct list_head granted; + posix_lock_t *lock = NULL; + posix_lock_t *tmp = NULL; + fd_t *fd = NULL; - pthread_mutex_lock (&pl_inode->mutex); - { - __grant_blocked_lock_calls (this, pl_inode, &granted); - } - pthread_mutex_unlock (&pl_inode->mutex); - - list_for_each_entry_safe (lock, tmp, &granted, list) { - fd = fd_from_fdnum (lock); - - if (lock->blocked) { - can_block = 1; - cmd = F_SETLKW; - } - else - cmd = F_SETLK; - - lock->blocked = 0; - ret = pl_setlk (this, pl_inode, lock, can_block); - if (ret == -1) { - if (can_block) { - pl_trace_block (this, lock->frame, fd, NULL, - cmd, &lock->user_flock, NULL); - continue; - } else { - gf_log (this->name, GF_LOG_DEBUG, "returning EAGAIN"); - pl_trace_out (this, lock->frame, fd, NULL, cmd, - &lock->user_flock, -1, EAGAIN, NULL); - pl_update_refkeeper (this, fd->inode); - STACK_UNWIND_STRICT (lk, lock->frame, -1, - EAGAIN, &lock->user_flock, - NULL); - __destroy_lock (lock); - } - } + int can_block = 0; + int32_t cmd = 0; + int ret = 0; + if (list_empty(&pl_inode->blocked_calls)) { + gf_log(this->name, GF_LOG_TRACE, "No blocked lock calls to be granted"); + return; + } + + pthread_mutex_lock(&pl_inode->mutex); + { + __grant_blocked_lock_calls(this, pl_inode, &granted); + } + pthread_mutex_unlock(&pl_inode->mutex); + + list_for_each_entry_safe(lock, tmp, &granted, list) + { + fd = fd_from_fdnum(lock); + + if (lock->blocked) { + can_block = 1; + cmd = F_SETLKW; + } else + cmd = F_SETLK; + + lock->blocked = 0; + ret = pl_setlk(this, pl_inode, lock, can_block); + if (ret == -1) { + if (can_block) { + continue; + } else { + gf_log(this->name, GF_LOG_DEBUG, "returning EAGAIN"); + pl_trace_out(this, lock->frame, fd, NULL, cmd, + &lock->user_flock, -1, EAGAIN, NULL); + pl_update_refkeeper(this, fd->inode); + STACK_UNWIND_STRICT(lk, lock->frame, -1, EAGAIN, + &lock->user_flock, NULL); + __destroy_lock(lock); + } } - + } } - int -pl_reserve_unlock (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock) +pl_reserve_unlock(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock) { - posix_lock_t *retlock = NULL; - int ret = -1; - - pthread_mutex_lock (&pl_inode->mutex); - { - retlock = __reserve_unlock_lock (this, lock, pl_inode); - if (!retlock) { - gf_log (this->name, GF_LOG_DEBUG, - "Bad Unlock issued on Inode lock"); - ret = -EINVAL; - goto out; - } - - gf_log (this->name, GF_LOG_TRACE, - "Reservelk Unlock successful"); - __destroy_reserve_lock (retlock); - ret = 0; + posix_lock_t *retlock = NULL; + int ret = -1; + + pthread_mutex_lock(&pl_inode->mutex); + { + retlock = __reserve_unlock_lock(this, lock, pl_inode); + if (!retlock) { + pthread_mutex_unlock(&pl_inode->mutex); + gf_log(this->name, GF_LOG_DEBUG, "Bad Unlock issued on Inode lock"); + ret = -EINVAL; + goto out; } -out: - pthread_mutex_unlock (&pl_inode->mutex); - - grant_blocked_reserve_locks (this, pl_inode); - grant_blocked_lock_calls (this, pl_inode); - return ret; + gf_log(this->name, GF_LOG_TRACE, "Reservelk Unlock successful"); + __destroy_lock(retlock); + ret = 0; + } + pthread_mutex_unlock(&pl_inode->mutex); +out: + grant_blocked_reserve_locks(this, pl_inode); + grant_blocked_lock_calls(this, pl_inode); + return ret; } int -pl_reserve_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock, - int can_block) +pl_reserve_setlk(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock, + int can_block) { - int ret = -EINVAL; - - pthread_mutex_lock (&pl_inode->mutex); - { - - ret = __lock_reservelk (this, pl_inode, lock, can_block); - if (ret < 0) - gf_log (this->name, GF_LOG_TRACE, - "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => NOK", - lock->fl_type == F_UNLCK ? "Unlock" : "Lock", - lock->client_pid, - lkowner_utoa (&lock->owner), - lock->user_flock.l_start, - lock->user_flock.l_len); - else - gf_log (this->name, GF_LOG_TRACE, - "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => OK", - lock->fl_type == F_UNLCK ? "Unlock" : "Lock", - lock->client_pid, - lkowner_utoa (&lock->owner), - lock->fl_start, - lock->fl_end); - - } - pthread_mutex_unlock (&pl_inode->mutex); - return ret; + int ret = -EINVAL; + + pthread_mutex_lock(&pl_inode->mutex); + { + ret = __lock_reservelk(this, pl_inode, lock, can_block); + } + pthread_mutex_unlock(&pl_inode->mutex); + + if (ret < 0) + gf_log(this->name, GF_LOG_TRACE, + "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64 " => NOK", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", lock->client_pid, + lkowner_utoa(&lock->owner), lock->user_flock.l_start, + lock->user_flock.l_len); + else + gf_log(this->name, GF_LOG_TRACE, + "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64 " => OK", + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", lock->client_pid, + lkowner_utoa(&lock->owner), lock->fl_start, lock->fl_end); + + return ret; } diff --git a/xlators/features/locks/tests/unit-test.c b/xlators/features/locks/tests/unit-test.c index dec2ba85909..d285b12b5aa 100644 --- a/xlators/features/locks/tests/unit-test.c +++ b/xlators/features/locks/tests/unit-test.c @@ -7,54 +7,71 @@ later), or the GNU General Public License, version 2 (GPLv2), in all cases as published by the Free Software Foundation. */ -#include "glusterfs.h" -#include "compat.h" -#include "xlator.h" -#include "inode.h" -#include "logging.h" -#include "common-utils.h" -#include "list.h" +#include <glusterfs/glusterfs.h> +#include <glusterfs/compat.h> +#include <glusterfs/xlator.h> +#include <glusterfs/logging.h> +#include <glusterfs/common-utils.h> +#include <glusterfs/list.h> #include "locks.h" #include "common.h" -#define expect(cond) if (!(cond)) { goto out; } +#define expect(cond) \ + if (!(cond)) { \ + goto out; \ + } -extern int lock_name (pl_inode_t *, const char *, entrylk_type); -extern int unlock_name (pl_inode_t *, const char *, entrylk_type); +extern int +lock_name(pl_inode_t *, const char *, entrylk_type); +extern int +unlock_name(pl_inode_t *, const char *, entrylk_type); -int main (int argc, char **argv) +int +main(int argc, char **argv) { - int ret = 1; - int r = -1; - - pl_inode_t *pinode = CALLOC (sizeof (pl_inode_t), 1); - pthread_mutex_init (&pinode->dir_lock_mutex, NULL); - INIT_LIST_HEAD (&pinode->gf_dir_locks); - - r = lock_name (pinode, NULL, ENTRYLK_WRLCK); expect (r == 0); - { - r = lock_name (pinode, "foo", ENTRYLK_WRLCK); expect (r == -EAGAIN); - } - r = unlock_name (pinode, NULL, ENTRYLK_WRLCK); expect (r == 0); - - r = lock_name (pinode, "foo", ENTRYLK_RDLCK); expect (r == 0); - { - r = lock_name (pinode, "foo", ENTRYLK_RDLCK); expect (r == 0); - { - r = lock_name (pinode, "foo", ENTRYLK_WRLCK); expect (r == -EAGAIN); - } - r = unlock_name (pinode, "foo", ENTRYLK_RDLCK); expect (r == 0); - } - r = unlock_name (pinode, "foo", ENTRYLK_RDLCK); expect (r == 0); - - r = lock_name (pinode, "foo", ENTRYLK_WRLCK); expect (r == 0); - r = unlock_name (pinode, "foo", ENTRYLK_WRLCK); expect (r == 0); - - r = lock_name (pinode, "baz", ENTRYLK_WRLCK); expect (r == 0); - r = lock_name (pinode, "baz", ENTRYLK_RDLCK); expect (r == -EAGAIN); - - ret = 0; + int ret = 1; + int r = -1; + + pl_inode_t *pinode = CALLOC(sizeof(pl_inode_t), 1); + pthread_mutex_init(&pinode->dir_lock_mutex, NULL); + INIT_LIST_HEAD(&pinode->gf_dir_locks); + + r = lock_name(pinode, NULL, ENTRYLK_WRLCK); + expect(r == 0); + { + r = lock_name(pinode, "foo", ENTRYLK_WRLCK); + expect(r == -EAGAIN); + } + r = unlock_name(pinode, NULL, ENTRYLK_WRLCK); + expect(r == 0); + + r = lock_name(pinode, "foo", ENTRYLK_RDLCK); + expect(r == 0); + { + r = lock_name(pinode, "foo", ENTRYLK_RDLCK); + expect(r == 0); + { + r = lock_name(pinode, "foo", ENTRYLK_WRLCK); + expect(r == -EAGAIN); + } + r = unlock_name(pinode, "foo", ENTRYLK_RDLCK); + expect(r == 0); + } + r = unlock_name(pinode, "foo", ENTRYLK_RDLCK); + expect(r == 0); + + r = lock_name(pinode, "foo", ENTRYLK_WRLCK); + expect(r == 0); + r = unlock_name(pinode, "foo", ENTRYLK_WRLCK); + expect(r == 0); + + r = lock_name(pinode, "baz", ENTRYLK_WRLCK); + expect(r == 0); + r = lock_name(pinode, "baz", ENTRYLK_RDLCK); + expect(r == -EAGAIN); + + ret = 0; out: - return ret; + return ret; } diff --git a/xlators/features/mac-compat/src/Makefile.am b/xlators/features/mac-compat/src/Makefile.am deleted file mode 100644 index c178a2542e8..00000000000 --- a/xlators/features/mac-compat/src/Makefile.am +++ /dev/null @@ -1,15 +0,0 @@ -xlator_LTLIBRARIES = mac-compat.la -xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features - -mac_compat_la_LDFLAGS = $(GF_XLATOR_DEFAULT_LDFLAGS) - -mac_compat_la_SOURCES = mac-compat.c -mac_compat_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la - -noinst_HEADERS = mac-compat.h - -AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src - -AM_CFLAGS = -Wall $(GF_CFLAGS) - -CLEANFILES = diff --git a/xlators/features/mac-compat/src/mac-compat.c b/xlators/features/mac-compat/src/mac-compat.c deleted file mode 100644 index 795a387d484..00000000000 --- a/xlators/features/mac-compat/src/mac-compat.c +++ /dev/null @@ -1,344 +0,0 @@ -/* - Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ -#include "xlator.h" -#include "defaults.h" -#include "compat-errno.h" -#include "syscall.h" -#include "mem-pool.h" -#include "mac-compat.h" - -static int -dict_key_remove_namespace(dict_t *dict, char *key, data_t *value, void *data) -{ - /* - char buffer[3*value->len+1]; - int index = 0; - for (index = 0; index < value->len; index++) - sprintf(buffer+3*index, " %02x", value->data[index]); - */ - xlator_t *this = (xlator_t *) data; - if (strncmp(key, "user.", 5) == 0) { - dict_set (dict, key + 5, value); - gf_log (this->name, GF_LOG_DEBUG, - "remove_namespace_dict: %s -> %s ", key, key + 5); - dict_del (dict, key); - } - return 0; -} - -int32_t -maccomp_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict, - dict_t *xdata) -{ - intptr_t ax = (intptr_t)this->private; - int i = 0; - - gf_log (this->name, GF_LOG_DEBUG, - "getxattr_cbk: dict %p private: %p xdata %p ", dict, - this->private, xdata); - - if (dict) { - dict_foreach(dict, dict_key_remove_namespace, this); - } - else { - // TODO: we expect dict to exist here, don't know why this - // this is needed - dict = dict_new(); - } - gf_log (this->name, GF_LOG_DEBUG, - "getxattr_cbk: dict %p ax: %ld op_ret %d op_err %d ", dict, ax, - op_ret, op_errno); - if ((ax == GF_XATTR_ALL && op_ret >= 0) || ax != GF_XATTR_NONE) { - op_ret = op_errno = 0; - for (i = 0; i < GF_XATTR_ALL; i++) { - if (dict_get (dict, apple_xattr_name[i])) - continue; - /* set dummy data */ - gf_log (this->name, GF_LOG_DEBUG, - "getxattr_cbk: setting dummy data %p, %s", dict, - apple_xattr_name[i]); - if (dict_set (dict, apple_xattr_name[i], - bin_to_data ((void *)apple_xattr_value[i], - apple_xattr_len[i])) == -1) { - op_ret = -1; - op_errno = ENOATTR; - - break; - } - } - } - STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata); - return 0; -} - - -static -int prepend_xattr_user_namespace(dict_t *dict, char *key, data_t *value, void *obj) -{ - xlator_t *this = (xlator_t *) obj; - dict_t *newdict = (dict_t *) this->private; - char *newkey = NULL; - gf_add_prefix(XATTR_USER_PREFIX, key, &newkey); - key = newkey; - dict_set(newdict, (char *)key, value); - if (newkey) - GF_FREE(newkey); - return 0; -} - -intptr_t -check_name(const char *name, char **newkey) -{ - intptr_t ax = GF_XATTR_NONE; - if (name) { - int i = 0; - for (i = 0; i < GF_XATTR_ALL; i++) { - if (strcmp (apple_xattr_name[i], name) == 0) { - ax = i; - break; - } - } - gf_add_prefix("user.", name, newkey); - } else - ax = GF_XATTR_ALL; - return ax; -} - -int32_t -maccomp_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *name, dict_t *xdata) -{ - char *newkey = NULL; - this->private = (void *) check_name(name, &newkey); - - gf_log (this->name, GF_LOG_DEBUG, - "getxattr: name %s private: %p xdata %p ", name, - this->private, xdata); - STACK_WIND (frame, maccomp_getxattr_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->getxattr, - loc, newkey, xdata); - return 0; -} - - -int32_t -maccomp_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, - const char *name, dict_t *xdata) -{ - char *newkey = NULL; - this->private = (void *) check_name(name, &newkey); - - STACK_WIND (frame, maccomp_getxattr_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fgetxattr, - fd, newkey, xdata); - GF_FREE(newkey); - return 0; -} - -int32_t -maccomp_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - intptr_t ax = (intptr_t)this->private; - - if (op_ret == -1 && ax != GF_XATTR_NONE) - op_ret = op_errno = 0; - gf_log (this->name, GF_LOG_DEBUG, - "setxattr_cbk op_ret %d op_errno %d private: %p xdata %p ", - op_ret, op_errno, this->private, xdata); - STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata); - return 0; -} - -int32_t -maccomp_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *iatt1, - struct iatt *iattr2, dict_t *xdata) -{ - gf_log (this->name, GF_LOG_DEBUG, - "setattr_cbk op_ret %d op_errno %d private: %p xdata %p ", - op_ret, op_errno, this->private, xdata); - STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, - iatt1, iattr2, xdata); - return 0; -} - -int map_flags(int flags) -{ - /* DARWIN has different defines on XATTR_ flags. - There do not seem to be a POSIX standard - Parse any other flags over. - NOFOLLOW is always true on Linux and Darwin - */ - int linux_flags = flags & ~(GF_XATTR_CREATE | GF_XATTR_REPLACE | XATTR_REPLACE); - if (XATTR_CREATE & flags) - linux_flags |= GF_XATTR_CREATE; - if (XATTR_REPLACE & flags) - linux_flags |= GF_XATTR_REPLACE; - return linux_flags; -} - -int32_t -maccomp_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd, - const char *name, dict_t *xdata) -{ - char *newkey = NULL; - - this->private = (void *) check_name(name, &newkey); - - STACK_WIND (frame, default_fremovexattr_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fremovexattr, - fd, newkey, xdata); - GF_FREE(newkey); - return 0; -} - -int32_t -maccomp_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, - int32_t flags, dict_t *xdata) -{ - intptr_t ax = GF_XATTR_NONE; - int i = 0; - - for (i = 0; i < GF_XATTR_ALL; i++) { - if (dict_get (dict, apple_xattr_name[i])) { - ax = i; - - break; - } - } - dict_t *newdict = dict_new(); - this->private = (void *) newdict; - dict_foreach(dict, prepend_xattr_user_namespace, this); - - this->private = (void *)ax; - int linux_flags = map_flags(flags); - gf_log (this->name, GF_LOG_DEBUG, - "setxattr flags: %d -> %d dict %p private: %p xdata %p ", - flags, linux_flags, dict, this->private, xdata); - STACK_WIND (frame, maccomp_setxattr_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setxattr, - loc, newdict, linux_flags, xdata); - dict_unref(newdict); - return 0; -} - -int32_t -maccomp_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *iattr, - int32_t flags, dict_t *xdata) -{ - gf_log (this->name, GF_LOG_DEBUG, - "setattr iattr %p private: %p xdata %p ", - iattr, this->private, xdata); - STACK_WIND (frame, maccomp_setattr_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setattr, - loc, iattr, flags, xdata); - return 0; -} - -int32_t -maccomp_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *name, dict_t *xdata) -{ - char *newkey = NULL; - this->private = (void *) check_name(name, &newkey); - - STACK_WIND (frame, default_removexattr_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->removexattr, - loc, newkey, xdata); - - gf_log (this->name, GF_LOG_TRACE, - "removeattr name %p private: %p xdata %p ", - name, this->private, xdata); - GF_FREE(newkey); - return 0; - -} - -int32_t -maccomp_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, - int32_t flags, dict_t *xdata) -{ - intptr_t ax = GF_XATTR_NONE; - int i = 0; - - for (i = 0; i < GF_XATTR_ALL; i++) { - if (dict_get (dict, apple_xattr_name[i])) { - ax = i; - - break; - } - } - - dict_t *newdict = dict_new(); - this->private = (void *) newdict; - dict_foreach(dict, prepend_xattr_user_namespace, this); - - this->private = (void *)ax; - int linux_flags = map_flags(flags); - gf_log (this->name, GF_LOG_DEBUG, - "fsetxattr flags: %d -> %d dict %p private: %p xdata %p ", - flags, linux_flags, dict, this->private, xdata); - STACK_WIND (frame, maccomp_setxattr_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsetxattr, - fd, newdict, linux_flags, xdata); - dict_unref(newdict); - return 0; -} - - -int32_t -init (xlator_t *this) -{ - if (!this->children || this->children->next) { - gf_log (this->name, GF_LOG_ERROR, - "translator not configured with exactly one child"); - return -1; - } - - if (!this->parents) { - gf_log (this->name, GF_LOG_WARNING, - "dangling volume. check volfile "); - } - - return 0; -} - - -void -fini (xlator_t *this) -{ - return; -} - - -struct xlator_fops fops = { - .getxattr = maccomp_getxattr, - .fgetxattr = maccomp_fgetxattr, - .setxattr = maccomp_setxattr, - .setattr = maccomp_setattr, - .fsetxattr = maccomp_fsetxattr, - .removexattr = maccomp_removexattr, - .fremovexattr = maccomp_fremovexattr, -}; - -struct xlator_cbks cbks; - -struct volume_options options[] = { - { .key = {NULL} }, -}; diff --git a/xlators/features/mac-compat/src/mac-compat.h b/xlators/features/mac-compat/src/mac-compat.h deleted file mode 100644 index b033ca0e4d8..00000000000 --- a/xlators/features/mac-compat/src/mac-compat.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#ifndef __MAC_COMPAT_H__ -#define __MAC_COMPAT_H__ - -enum apple_xattr { - GF_FINDER_INFO_XATTR, - GF_RESOURCE_FORK_XATTR, - GF_XATTR_ALL, - GF_XATTR_NONE -}; - -static char *apple_xattr_name[] = { - [GF_FINDER_INFO_XATTR] = "com.apple.FinderInfo", - [GF_RESOURCE_FORK_XATTR] = "com.apple.ResourceFork" -}; - -static const char *apple_xattr_value[] = { - [GF_FINDER_INFO_XATTR] = - /* 1 2 3 4 5 6 7 8 */ - "\0\0\0\0\0\0\0\0" - "\0\0\0\0\0\0\0\0" - "\0\0\0\0\0\0\0\0" - "\0\0\0\0\0\0\0\0", - [GF_RESOURCE_FORK_XATTR] = "" -}; - -static int32_t apple_xattr_len[] = { - [GF_FINDER_INFO_XATTR] = 32, - [GF_RESOURCE_FORK_XATTR] = 1 -}; - -#endif /* __MAC_COMPAT_H__ */ diff --git a/xlators/features/marker/src/Makefile.am b/xlators/features/marker/src/Makefile.am index d122c0bfc08..58056b36511 100644 --- a/xlators/features/marker/src/Makefile.am +++ b/xlators/features/marker/src/Makefile.am @@ -1,14 +1,21 @@ +if WITH_SERVER xlator_LTLIBRARIES = marker.la +endif xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features -marker_la_LDFLAGS = $(GF_XLATOR_DEFAULT_LDFLAGS) +marker_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) + +marker_la_SOURCES = marker.c marker-quota.c marker-quota-helper.c \ + marker-common.c -marker_la_SOURCES = marker.c marker-quota.c marker-quota-helper.c marker-common.c marker_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la -noinst_HEADERS = marker-mem-types.h marker.h marker-quota.h marker-quota-helper.h marker-common.h $(top_builddir)/xlators/lib/src/libxlator.h +noinst_HEADERS = marker-mem-types.h marker.h marker-quota.h \ + marker-quota-helper.h marker-common.h \ + $(top_builddir)/xlators/lib/src/libxlator.h AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ + -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \ -I$(top_srcdir)/xlators/lib/src AM_CFLAGS = -Wall -fno-strict-aliasing $(GF_CFLAGS) diff --git a/xlators/features/marker/src/marker-common.c b/xlators/features/marker/src/marker-common.c index 6ec5e3cc8bc..9c9047005d6 100644 --- a/xlators/features/marker/src/marker-common.c +++ b/xlators/features/marker/src/marker-common.c @@ -11,55 +11,47 @@ #include "marker-common.h" marker_inode_ctx_t * -marker_inode_ctx_new () +marker_inode_ctx_new() { - marker_inode_ctx_t *ctx = NULL; + marker_inode_ctx_t *ctx = NULL; - ctx = GF_CALLOC (1, sizeof (marker_inode_ctx_t), - gf_marker_mt_marker_inode_ctx_t); - if (ctx == NULL) - goto out; + ctx = GF_CALLOC(1, sizeof(marker_inode_ctx_t), + gf_marker_mt_marker_inode_ctx_t); + if (ctx == NULL) + goto out; - ctx->quota_ctx = NULL; + ctx->quota_ctx = NULL; out: - return ctx; + return ctx; } int32_t -marker_force_inode_ctx_get (inode_t *inode, xlator_t *this, - marker_inode_ctx_t **ctx) +marker_force_inode_ctx_get(inode_t *inode, xlator_t *this, + marker_inode_ctx_t **ctx) { - int32_t ret = -1; - uint64_t ctx_int = 0; - - LOCK (&inode->lock); - { - ret = __inode_ctx_get (inode, this, &ctx_int); - if (ret == 0) - *ctx = (marker_inode_ctx_t *) (unsigned long)ctx_int; - else { - *ctx = marker_inode_ctx_new (); - if (*ctx == NULL) - goto unlock; - - ret = __inode_ctx_put (inode, this, - (uint64_t )(unsigned long) *ctx); - if (ret == -1) { - GF_FREE (*ctx); - goto unlock; - } - ret = 0; - } + int32_t ret = -1; + uint64_t ctx_int = 0; + + LOCK(&inode->lock); + { + ret = __inode_ctx_get(inode, this, &ctx_int); + if (ret == 0) + *ctx = (marker_inode_ctx_t *)(unsigned long)ctx_int; + else { + *ctx = marker_inode_ctx_new(); + if (*ctx == NULL) + goto unlock; + + ret = __inode_ctx_put(inode, this, (uint64_t)(unsigned long)*ctx); + if (ret == -1) { + GF_FREE(*ctx); + goto unlock; + } + ret = 0; } -unlock: UNLOCK (&inode->lock); + } +unlock: + UNLOCK(&inode->lock); - return ret; -} - -int -marker_filter_quota_xattr (dict_t *dict, char *key, - data_t *value, void *data) -{ - dict_del (dict, key); - return 0; + return ret; } diff --git a/xlators/features/marker/src/marker-common.h b/xlators/features/marker/src/marker-common.h index c6ca422dd6a..7f8cffe7d35 100644 --- a/xlators/features/marker/src/marker-common.h +++ b/xlators/features/marker/src/marker-common.h @@ -10,13 +10,10 @@ #ifndef _MARKER_COMMON_H #define _MARKER_COMMON_H -#include "inode.h" -#include "xlator.h" +#include <glusterfs/xlator.h> #include "marker.h" int32_t -marker_force_inode_ctx_get (inode_t *, xlator_t *, marker_inode_ctx_t **); +marker_force_inode_ctx_get(inode_t *, xlator_t *, marker_inode_ctx_t **); -int -marker_filter_quota_xattr (dict_t *, char *, data_t *, void *); #endif diff --git a/xlators/features/marker/src/marker-mem-types.h b/xlators/features/marker/src/marker-mem-types.h index dc5ad16ed76..aedfdb4a1b7 100644 --- a/xlators/features/marker/src/marker-mem-types.h +++ b/xlators/features/marker/src/marker-mem-types.h @@ -10,18 +10,19 @@ #ifndef __MARKER_MEM_TYPES_H__ #define __MARKER_MEM_TYPES_H__ -#include "mem-types.h" +#include <glusterfs/mem-types.h> enum gf_marker_mem_types_ { - gf_marker_mt_marker_conf_t = gf_common_mt_end + 1, - gf_marker_mt_loc_t, - gf_marker_mt_volume_mark, - gf_marker_mt_int64_t, - gf_marker_mt_quota_inode_ctx_t, - gf_marker_mt_marker_inode_ctx_t, - gf_marker_mt_inode_contribution_t, - gf_marker_mt_quota_meta_t, - gf_marker_mt_quota_synctask_t, - gf_marker_mt_end + /* Those are used by ALLOCATE_OR_GOTO macro */ + gf_marker_mt_marker_conf_t = gf_common_mt_end + 1, + gf_marker_mt_loc_t, + gf_marker_mt_volume_mark, + gf_marker_mt_int64_t, + gf_marker_mt_quota_inode_ctx_t, + gf_marker_mt_marker_inode_ctx_t, + gf_marker_mt_inode_contribution_t, + gf_marker_mt_quota_meta_t, + gf_marker_mt_quota_synctask_t, + gf_marker_mt_end }; #endif diff --git a/xlators/features/marker/src/marker-quota-helper.c b/xlators/features/marker/src/marker-quota-helper.c index d1fabccf18e..ecd85d67b2b 100644 --- a/xlators/features/marker/src/marker-quota-helper.c +++ b/xlators/features/marker/src/marker-quota-helper.c @@ -7,454 +7,374 @@ later), or the GNU General Public License, version 2 (GPLv2), in all cases as published by the Free Software Foundation. */ -#include "locking.h" +#include <glusterfs/locking.h> #include "marker-quota.h" #include "marker-common.h" #include "marker-quota-helper.h" #include "marker-mem-types.h" int -mq_loc_fill (loc_t *loc, inode_t *inode, inode_t *parent, char *path) +mq_loc_fill(loc_t *loc, inode_t *inode, inode_t *parent, char *path) { - int ret = -1; + int ret = -1; - GF_VALIDATE_OR_GOTO ("marker", loc, out); - GF_VALIDATE_OR_GOTO ("marker", inode, out); - GF_VALIDATE_OR_GOTO ("marker", path, out); - /* Not checking for parent because while filling - * loc of root, parent will be NULL - */ + GF_VALIDATE_OR_GOTO("marker", loc, out); + GF_VALIDATE_OR_GOTO("marker", inode, out); + GF_VALIDATE_OR_GOTO("marker", path, out); + /* Not checking for parent because while filling + * loc of root, parent will be NULL + */ - if (inode) { - loc->inode = inode_ref (inode); - } + if (inode) { + loc->inode = inode_ref(inode); + } - if (parent) - loc->parent = inode_ref (parent); + if (parent) + loc->parent = inode_ref(parent); - if (!gf_uuid_is_null (inode->gfid)) - gf_uuid_copy (loc->gfid, inode->gfid); + if (!gf_uuid_is_null(inode->gfid)) + gf_uuid_copy(loc->gfid, inode->gfid); - loc->path = gf_strdup (path); - if (!loc->path) { - gf_log ("loc fill", GF_LOG_ERROR, "strdup failed"); - goto out; - } + loc->path = gf_strdup(path); + if (!loc->path) { + gf_log("loc fill", GF_LOG_ERROR, "strdup failed"); + goto out; + } - loc->name = strrchr (loc->path, '/'); - if (loc->name) - loc->name++; - else - goto out; + loc->name = strrchr(loc->path, '/'); + if (loc->name) + loc->name++; + else + goto out; - ret = 0; + ret = 0; out: - if (ret < 0) - loc_wipe (loc); + if (ret < 0) + loc_wipe(loc); - return ret; + return ret; } - int32_t -mq_inode_loc_fill (const char *parent_gfid, inode_t *inode, loc_t *loc) +mq_inode_loc_fill(const char *parent_gfid, inode_t *inode, loc_t *loc) { - char *resolvedpath = NULL; - inode_t *parent = NULL; - int ret = -1; + char *resolvedpath = NULL; + inode_t *parent = NULL; + quota_inode_ctx_t *ctx = NULL; + xlator_t *this = NULL; + int ret = -1; + + this = THIS; + + if (inode == NULL) { + gf_log_callingfn("marker", GF_LOG_ERROR, + "loc fill failed, " + "inode is NULL"); + return ret; + } - if ((!inode) || (!loc)) - return ret; + if (loc == NULL) + return ret; - if ((inode) && __is_root_gfid (inode->gfid)) { - loc->parent = NULL; - goto ignore_parent; - } + if ((inode) && __is_root_gfid(inode->gfid)) { + loc->parent = NULL; + goto ignore_parent; + } - if (parent_gfid == NULL) - parent = inode_parent (inode, 0, NULL); - else - parent = inode_find (inode->table, - (unsigned char *) parent_gfid); + if (parent_gfid == NULL) + parent = inode_parent(inode, 0, NULL); + else + parent = inode_find(inode->table, (unsigned char *)parent_gfid); - if (parent == NULL) { - gf_log ("marker", GF_LOG_ERROR, "parent is NULL for %s", - uuid_utoa(inode->gfid)); - goto err; - } + if (parent == NULL) { + gf_log("marker", GF_LOG_ERROR, "parent is NULL for %s", + uuid_utoa(inode->gfid)); + goto err; + } ignore_parent: - ret = inode_path (inode, NULL, &resolvedpath); - if (ret < 0) { - gf_log ("marker", GF_LOG_ERROR, "failed to resolve path for %s", - uuid_utoa(inode->gfid)); - goto err; - } - - ret = mq_loc_fill (loc, inode, parent, resolvedpath); - if (ret < 0) - goto err; + ret = inode_path(inode, NULL, &resolvedpath); + if (ret < 0) { + gf_log("marker", GF_LOG_ERROR, "failed to resolve path for %s", + uuid_utoa(inode->gfid)); + goto err; + } + + ret = mq_loc_fill(loc, inode, parent, resolvedpath); + if (ret < 0) + goto err; + + ret = mq_inode_ctx_get(inode, this, &ctx); + if (ret < 0 || ctx == NULL) + ctx = mq_inode_ctx_new(inode, this); + if (ctx == NULL) { + gf_log(this->name, GF_LOG_WARNING, + "mq_inode_ctx_new " + "failed for %s", + uuid_utoa(inode->gfid)); + ret = -1; + goto err; + } + ret = 0; err: - if (parent) - inode_unref (parent); + if (parent) + inode_unref(parent); - GF_FREE (resolvedpath); + GF_FREE(resolvedpath); - return ret; + return ret; } - quota_inode_ctx_t * -mq_alloc_inode_ctx () +mq_alloc_inode_ctx() { - int32_t ret = -1; - quota_inode_ctx_t *ctx = NULL; - - QUOTA_ALLOC (ctx, quota_inode_ctx_t, ret); - if (ret == -1) - goto out; - - ctx->size = 0; - ctx->dirty = 0; - ctx->updation_status = _gf_false; - LOCK_INIT (&ctx->lock); - INIT_LIST_HEAD (&ctx->contribution_head); + int32_t ret = -1; + quota_inode_ctx_t *ctx = NULL; + + QUOTA_ALLOC(ctx, quota_inode_ctx_t, ret); + if (ret == -1) + goto out; + + ctx->size = 0; + ctx->dirty = 0; + ctx->updation_status = _gf_false; + LOCK_INIT(&ctx->lock); + INIT_LIST_HEAD(&ctx->contribution_head); out: - return ctx; + return ctx; } -void -mq_contri_fini (void *data) +static void +mq_contri_fini(inode_contribution_t *contri) { - inode_contribution_t *contri = data; - - LOCK_DESTROY (&contri->lock); - GF_FREE (contri); + LOCK_DESTROY(&contri->lock); + GF_FREE(contri); } -inode_contribution_t* -mq_contri_init (inode_t *inode) +inode_contribution_t * +mq_contri_init(inode_t *inode) { - inode_contribution_t *contri = NULL; - int32_t ret = 0; + inode_contribution_t *contri = NULL; + int32_t ret = 0; - QUOTA_ALLOC (contri, inode_contribution_t, ret); - if (ret == -1) - goto out; + QUOTA_ALLOC(contri, inode_contribution_t, ret); + if (ret == -1) + goto out; - GF_REF_INIT (contri, mq_contri_fini); + GF_REF_INIT(contri, mq_contri_fini); - contri->contribution = 0; - contri->file_count = 0; - contri->dir_count = 0; - gf_uuid_copy (contri->gfid, inode->gfid); + contri->contribution = 0; + contri->file_count = 0; + contri->dir_count = 0; + gf_uuid_copy(contri->gfid, inode->gfid); - LOCK_INIT (&contri->lock); - INIT_LIST_HEAD (&contri->contri_list); + LOCK_INIT(&contri->lock); + INIT_LIST_HEAD(&contri->contri_list); out: - return contri; + return contri; } inode_contribution_t * -mq_get_contribution_node (inode_t *inode, quota_inode_ctx_t *ctx) +mq_get_contribution_node(inode_t *inode, quota_inode_ctx_t *ctx) { - inode_contribution_t *contri = NULL; - inode_contribution_t *temp = NULL; + inode_contribution_t *contri = NULL; + inode_contribution_t *temp = NULL; + + if (!inode || !ctx) + goto out; - if (!inode || !ctx) - goto out; + LOCK(&ctx->lock); + { + if (list_empty(&ctx->contribution_head)) + goto unlock; - LOCK (&ctx->lock); + list_for_each_entry(temp, &ctx->contribution_head, contri_list) { - if (list_empty (&ctx->contribution_head)) - goto unlock; - - list_for_each_entry (temp, &ctx->contribution_head, - contri_list) { - if (gf_uuid_compare (temp->gfid, inode->gfid) == 0) { - contri = temp; - GF_REF_GET (contri); - break; - } - } + if (gf_uuid_compare(temp->gfid, inode->gfid) == 0) { + contri = temp; + GF_REF_GET(contri); + break; + } } + } unlock: - UNLOCK (&ctx->lock); + UNLOCK(&ctx->lock); out: - return contri; + return contri; } inode_contribution_t * -__mq_add_new_contribution_node (xlator_t *this, quota_inode_ctx_t *ctx, - loc_t *loc) +__mq_add_new_contribution_node(xlator_t *this, quota_inode_ctx_t *ctx, + loc_t *loc) { - inode_contribution_t *contribution = NULL; - - if (!loc->parent) { - if (!gf_uuid_is_null (loc->pargfid)) - loc->parent = inode_find (loc->inode->table, - loc->pargfid); - - if (!loc->parent) - loc->parent = inode_parent (loc->inode, loc->pargfid, - loc->name); - if (!loc->parent) - goto out; + inode_contribution_t *contribution = NULL; + + if (!loc->parent) { + if (!gf_uuid_is_null(loc->pargfid)) + loc->parent = inode_find(loc->inode->table, loc->pargfid); + + if (!loc->parent) + loc->parent = inode_parent(loc->inode, loc->pargfid, loc->name); + if (!loc->parent) + goto out; + } + + list_for_each_entry(contribution, &ctx->contribution_head, contri_list) + { + if (loc->parent && + gf_uuid_compare(contribution->gfid, loc->parent->gfid) == 0) { + goto out; } + } - list_for_each_entry (contribution, &ctx->contribution_head, - contri_list) { - if (loc->parent && - gf_uuid_compare (contribution->gfid, loc->parent->gfid) == 0) { - goto out; - } - } - - contribution = mq_contri_init (loc->parent); - if (contribution == NULL) - goto out; + contribution = mq_contri_init(loc->parent); + if (contribution == NULL) + goto out; - list_add_tail (&contribution->contri_list, &ctx->contribution_head); + list_add_tail(&contribution->contri_list, &ctx->contribution_head); out: - return contribution; + return contribution; } - inode_contribution_t * -mq_add_new_contribution_node (xlator_t *this, quota_inode_ctx_t *ctx, - loc_t *loc) +mq_add_new_contribution_node(xlator_t *this, quota_inode_ctx_t *ctx, loc_t *loc) { - inode_contribution_t *contribution = NULL; + inode_contribution_t *contribution = NULL; - if ((ctx == NULL) || (loc == NULL)) - return NULL; + if ((ctx == NULL) || (loc == NULL)) + return NULL; - if (((loc->path) && (strcmp (loc->path, "/") == 0)) - || (!loc->path && gf_uuid_is_null (loc->pargfid))) - return NULL; + if (((loc->path) && (strcmp(loc->path, "/") == 0)) || + (!loc->path && gf_uuid_is_null(loc->pargfid))) + return NULL; - LOCK (&ctx->lock); - { - contribution = __mq_add_new_contribution_node (this, ctx, loc); - if (contribution) - GF_REF_GET (contribution); - } - UNLOCK (&ctx->lock); + LOCK(&ctx->lock); + { + contribution = __mq_add_new_contribution_node(this, ctx, loc); + if (contribution) + GF_REF_GET(contribution); + } + UNLOCK(&ctx->lock); - return contribution; + return contribution; } - int32_t -mq_dict_set_contribution (xlator_t *this, dict_t *dict, loc_t *loc, - uuid_t gfid, char *contri_key) +mq_dict_set_contribution(xlator_t *this, dict_t *dict, loc_t *loc, uuid_t gfid, + char *contri_key) { - int32_t ret = -1; - char key[QUOTA_KEY_MAX] = {0, }; - - GF_VALIDATE_OR_GOTO ("marker", this, out); - GF_VALIDATE_OR_GOTO ("marker", dict, out); - GF_VALIDATE_OR_GOTO ("marker", loc, out); - - if (gfid && !gf_uuid_is_null(gfid)) { - GET_CONTRI_KEY (this, key, gfid, ret); - } else if (loc->parent) { - GET_CONTRI_KEY (this, key, loc->parent->gfid, ret); - } else { - /* nameless lookup, fetch contributions to all parents */ - GET_CONTRI_KEY (this, key, NULL, ret); + int32_t ret = -1; + char key[QUOTA_KEY_MAX] = { + 0, + }; + + GF_VALIDATE_OR_GOTO("marker", this, out); + GF_VALIDATE_OR_GOTO("marker", dict, out); + GF_VALIDATE_OR_GOTO("marker", loc, out); + + if (gfid && !gf_uuid_is_null(gfid)) { + GET_CONTRI_KEY(this, key, gfid, ret); + } else if (loc->parent) { + GET_CONTRI_KEY(this, key, loc->parent->gfid, ret); + } else { + /* nameless lookup, fetch contributions to all parents */ + GET_CONTRI_KEY(this, key, NULL, ret); + } + + if (ret < 0) + goto out; + + ret = dict_set_int64(dict, key, 0); + if (ret < 0) + goto out; + + if (contri_key) + if (snprintf(contri_key, QUOTA_KEY_MAX, "%s", key) >= QUOTA_KEY_MAX) { + ret = -1; + goto out; } - if (ret < 0) - goto out; - - ret = dict_set_int64 (dict, key, 0); - if (ret < 0) - goto out; - - if (contri_key) - strncpy (contri_key, key, QUOTA_KEY_MAX); - out: - if (ret < 0) - gf_log_callingfn (this->name, GF_LOG_ERROR, "dict set failed"); + if (ret < 0) + gf_log_callingfn(this ? this->name : "Marker", GF_LOG_ERROR, + "dict set failed"); - return ret; + return ret; } - int32_t -mq_inode_ctx_get (inode_t *inode, xlator_t *this, - quota_inode_ctx_t **ctx) +mq_inode_ctx_get(inode_t *inode, xlator_t *this, quota_inode_ctx_t **ctx) { - int32_t ret = -1; - uint64_t ctx_int = 0; - marker_inode_ctx_t *mark_ctx = NULL; + int32_t ret = -1; + uint64_t ctx_int = 0; + marker_inode_ctx_t *mark_ctx = NULL; - GF_VALIDATE_OR_GOTO ("marker", inode, out); - GF_VALIDATE_OR_GOTO ("marker", this, out); - GF_VALIDATE_OR_GOTO ("marker", ctx, out); + GF_VALIDATE_OR_GOTO("marker", inode, out); + GF_VALIDATE_OR_GOTO("marker", this, out); + GF_VALIDATE_OR_GOTO("marker", ctx, out); - ret = inode_ctx_get (inode, this, &ctx_int); - if (ret < 0) { - ret = -1; - *ctx = NULL; - goto out; - } + ret = inode_ctx_get(inode, this, &ctx_int); + if (ret < 0) { + ret = -1; + *ctx = NULL; + goto out; + } - mark_ctx = (marker_inode_ctx_t *) (unsigned long)ctx_int; - if (mark_ctx->quota_ctx == NULL) { - ret = -1; - goto out; - } + mark_ctx = (marker_inode_ctx_t *)(unsigned long)ctx_int; + if (mark_ctx->quota_ctx == NULL) { + ret = -1; + goto out; + } - *ctx = mark_ctx->quota_ctx; + *ctx = mark_ctx->quota_ctx; - ret = 0; + ret = 0; out: - return ret; + return ret; } - quota_inode_ctx_t * -__mq_inode_ctx_new (inode_t *inode, xlator_t *this) +__mq_inode_ctx_new(inode_t *inode, xlator_t *this) { - int32_t ret = -1; - quota_inode_ctx_t *quota_ctx = NULL; - marker_inode_ctx_t *mark_ctx = NULL; - - ret = marker_force_inode_ctx_get (inode, this, &mark_ctx); - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, - "marker_force_inode_ctx_get() failed"); - goto out; + int32_t ret = -1; + quota_inode_ctx_t *quota_ctx = NULL; + marker_inode_ctx_t *mark_ctx = NULL; + + ret = marker_force_inode_ctx_get(inode, this, &mark_ctx); + if (ret < 0) { + gf_log(this->name, GF_LOG_ERROR, "marker_force_inode_ctx_get() failed"); + goto out; + } + + LOCK(&inode->lock); + { + if (mark_ctx->quota_ctx == NULL) { + quota_ctx = mq_alloc_inode_ctx(); + if (quota_ctx == NULL) { + ret = -1; + goto unlock; + } + mark_ctx->quota_ctx = quota_ctx; + } else { + quota_ctx = mark_ctx->quota_ctx; } - LOCK (&inode->lock); - { - if (mark_ctx->quota_ctx == NULL) { - quota_ctx = mq_alloc_inode_ctx (); - if (quota_ctx == NULL) { - ret = -1; - goto unlock; - } - mark_ctx->quota_ctx = quota_ctx; - } else { - quota_ctx = mark_ctx->quota_ctx; - } - - ret = 0; - } + ret = 0; + } unlock: - UNLOCK (&inode->lock); + UNLOCK(&inode->lock); out: - return quota_ctx; + return quota_ctx; } - quota_inode_ctx_t * -mq_inode_ctx_new (inode_t * inode, xlator_t *this) -{ - return __mq_inode_ctx_new (inode, this); -} - -quota_local_t * -mq_local_new () -{ - quota_local_t *local = NULL; - - local = mem_get0 (THIS->local_pool); - if (!local) - goto out; - - local->ref = 1; - LOCK_INIT (&local->lock); - - local->ctx = NULL; - local->contri = NULL; - -out: - return local; -} - -quota_local_t * -mq_local_ref (quota_local_t *local) -{ - LOCK (&local->lock); - { - local->ref ++; - } - UNLOCK (&local->lock); - - return local; -} - - -int32_t -mq_local_unref (xlator_t *this, quota_local_t *local) +mq_inode_ctx_new(inode_t *inode, xlator_t *this) { - int32_t ref = 0; - if (local == NULL) - goto out; - - QUOTA_SAFE_DECREMENT (&local->lock, local->ref, ref); - - if (ref != 0) - goto out; - - if (local->fd != NULL) - fd_unref (local->fd); - - if (local->contri) - GF_REF_PUT (local->contri); - - if (local->xdata) - dict_unref (local->xdata); - - loc_wipe (&local->loc); - - loc_wipe (&local->parent_loc); - - LOCK_DESTROY (&local->lock); - - mem_put (local); -out: - return 0; -} - - -inode_contribution_t * -mq_get_contribution_from_loc (xlator_t *this, loc_t *loc) -{ - int32_t ret = 0; - quota_inode_ctx_t *ctx = NULL; - inode_contribution_t *contribution = NULL; - - ret = mq_inode_ctx_get (loc->inode, this, &ctx); - if (ret < 0) { - gf_log_callingfn (this->name, GF_LOG_WARNING, - "cannot get marker-quota context from inode " - "(gfid:%s, path:%s)", - uuid_utoa (loc->inode->gfid), loc->path); - goto err; - } - - contribution = mq_get_contribution_node (loc->parent, ctx); - if (contribution == NULL) { - gf_log_callingfn (this->name, GF_LOG_WARNING, - "inode (gfid:%s, path:%s) has " - "no contribution towards parent (gfid:%s)", - uuid_utoa (loc->inode->gfid), - loc->path, uuid_utoa (loc->parent->gfid)); - goto err; - } - -err: - return contribution; + return __mq_inode_ctx_new(inode, this); } diff --git a/xlators/features/marker/src/marker-quota-helper.h b/xlators/features/marker/src/marker-quota-helper.h index bf417aa8241..d4091dd2180 100644 --- a/xlators/features/marker/src/marker-quota-helper.h +++ b/xlators/features/marker/src/marker-quota-helper.h @@ -13,69 +13,54 @@ #include "marker.h" -#define QUOTA_FREE_CONTRIBUTION_NODE(ctx, _contribution) \ - do { \ - LOCK (&ctx->lock); \ - { \ - list_del_init (&_contribution->contri_list); \ - GF_REF_PUT (_contribution); \ - } \ - UNLOCK (&ctx->lock); \ - } while (0) - -#define QUOTA_SAFE_INCREMENT(lock, var) \ - do { \ - LOCK (lock); \ - var ++; \ - UNLOCK (lock); \ - } while (0) - -#define QUOTA_SAFE_DECREMENT(lock, var, value) \ - do { \ - LOCK (lock); \ - { \ - value = --var; \ - } \ - UNLOCK (lock); \ - } while (0) +#define QUOTA_FREE_CONTRIBUTION_NODE(ctx, _contribution) \ + do { \ + LOCK(&ctx->lock); \ + { \ + list_del_init(&_contribution->contri_list); \ + GF_REF_PUT(_contribution); \ + } \ + UNLOCK(&ctx->lock); \ + } while (0) + +#define QUOTA_SAFE_INCREMENT(lock, var) \ + do { \ + LOCK(lock); \ + var++; \ + UNLOCK(lock); \ + } while (0) + +#define QUOTA_SAFE_DECREMENT(lock, var, value) \ + do { \ + LOCK(lock); \ + { \ + value = --var; \ + } \ + UNLOCK(lock); \ + } while (0) inode_contribution_t * -mq_add_new_contribution_node (xlator_t *, quota_inode_ctx_t *, loc_t *); +mq_add_new_contribution_node(xlator_t *, quota_inode_ctx_t *, loc_t *); int32_t -mq_dict_set_contribution (xlator_t *, dict_t *, loc_t *, uuid_t, char *); +mq_dict_set_contribution(xlator_t *, dict_t *, loc_t *, uuid_t, char *); quota_inode_ctx_t * -mq_inode_ctx_new (inode_t *, xlator_t *); - -int32_t -mq_inode_ctx_get (inode_t *, xlator_t *, quota_inode_ctx_t **); +mq_inode_ctx_new(inode_t *, xlator_t *); int32_t -mq_delete_contribution_node (dict_t *, char *, inode_contribution_t *); +mq_inode_ctx_get(inode_t *, xlator_t *, quota_inode_ctx_t **); int32_t -mq_inode_loc_fill (const char *, inode_t *, loc_t *); - -quota_local_t * -mq_local_new (); - -quota_local_t * -mq_local_ref (quota_local_t *); +mq_delete_contribution_node(dict_t *, char *, inode_contribution_t *); int32_t -mq_local_unref (xlator_t *, quota_local_t *); - -void -mq_contri_fini (void *data); - -inode_contribution_t* -mq_contri_init (inode_t *inode); +mq_inode_loc_fill(const char *, inode_t *, loc_t *); inode_contribution_t * -mq_get_contribution_node (inode_t *, quota_inode_ctx_t *); +mq_contri_init(inode_t *inode); inode_contribution_t * -mq_get_contribution_from_loc (xlator_t *this, loc_t *loc); +mq_get_contribution_node(inode_t *, quota_inode_ctx_t *); #endif diff --git a/xlators/features/marker/src/marker-quota.c b/xlators/features/marker/src/marker-quota.c index e12cfab2527..3de2ea1c92c 100644 --- a/xlators/features/marker/src/marker-quota.c +++ b/xlators/features/marker/src/marker-quota.c @@ -7,161 +7,261 @@ later), or the GNU General Public License, version 2 (GPLv2), in all cases as published by the Free Software Foundation. */ -#include "dict.h" -#include "xlator.h" -#include "defaults.h" +#include <glusterfs/dict.h> +#include <glusterfs/xlator.h> +#include <glusterfs/defaults.h> #include "libxlator.h" -#include "common-utils.h" -#include "byte-order.h" +#include <glusterfs/common-utils.h> +#include <glusterfs/byte-order.h> #include "marker-quota.h" #include "marker-quota-helper.h" -#include "syncop.h" -#include "quota-common-utils.h" +#include <glusterfs/syncop.h> +#include <glusterfs/quota-common-utils.h> int -mq_loc_copy (loc_t *dst, loc_t *src) +mq_loc_copy(loc_t *dst, loc_t *src) { - int ret = -1; + int ret = -1; - GF_VALIDATE_OR_GOTO ("marker", dst, out); - GF_VALIDATE_OR_GOTO ("marker", src, out); + GF_VALIDATE_OR_GOTO("marker", dst, out); + GF_VALIDATE_OR_GOTO("marker", src, out); - if (src->inode == NULL || - ((src->parent == NULL) && (gf_uuid_is_null (src->pargfid)) - && !__is_root_gfid (src->inode->gfid))) { - gf_log ("marker", GF_LOG_WARNING, - "src loc is not valid"); - goto out; - } + if (src->inode == NULL || + ((src->parent == NULL) && (gf_uuid_is_null(src->pargfid)) && + !__is_root_gfid(src->inode->gfid))) { + gf_log("marker", GF_LOG_WARNING, "src loc is not valid"); + goto out; + } - ret = loc_copy (dst, src); + ret = loc_copy(dst, src); out: - return ret; + return ret; } static void -mq_set_ctx_status (quota_inode_ctx_t *ctx, gf_boolean_t *flag, - gf_boolean_t status) +mq_set_ctx_status(quota_inode_ctx_t *ctx, gf_boolean_t *flag, + gf_boolean_t status) { - LOCK (&ctx->lock); - { - *flag = status; - } - UNLOCK (&ctx->lock); + LOCK(&ctx->lock); + { + *flag = status; + } + UNLOCK(&ctx->lock); } static void -mq_test_and_set_ctx_status (quota_inode_ctx_t *ctx, gf_boolean_t *flag, - gf_boolean_t *status) +mq_test_and_set_ctx_status(quota_inode_ctx_t *ctx, gf_boolean_t *flag, + gf_boolean_t *status) { - gf_boolean_t temp = _gf_false; - - LOCK (&ctx->lock); - { - temp = *status; - *status = *flag; - *flag = temp; - } - UNLOCK (&ctx->lock); + gf_boolean_t temp = _gf_false; + + LOCK(&ctx->lock); + { + temp = *status; + *status = *flag; + *flag = temp; + } + UNLOCK(&ctx->lock); } static void -mq_get_ctx_status (quota_inode_ctx_t *ctx, gf_boolean_t *flag, - gf_boolean_t *status) +mq_get_ctx_status(quota_inode_ctx_t *ctx, gf_boolean_t *flag, + gf_boolean_t *status) { - LOCK (&ctx->lock); - { - *status = *flag; - } - UNLOCK (&ctx->lock); + LOCK(&ctx->lock); + { + *status = *flag; + } + UNLOCK(&ctx->lock); } int32_t -mq_get_ctx_updation_status (quota_inode_ctx_t *ctx, - gf_boolean_t *status) +mq_get_ctx_updation_status(quota_inode_ctx_t *ctx, gf_boolean_t *status) { - GF_VALIDATE_OR_GOTO ("marker", ctx, out); - GF_VALIDATE_OR_GOTO ("marker", status, out); + GF_VALIDATE_OR_GOTO("marker", ctx, out); + GF_VALIDATE_OR_GOTO("marker", status, out); - mq_get_ctx_status (ctx, &ctx->updation_status, status); - return 0; + mq_get_ctx_status(ctx, &ctx->updation_status, status); + return 0; out: - return -1; + return -1; } int32_t -mq_set_ctx_updation_status (quota_inode_ctx_t *ctx, - gf_boolean_t status) +mq_set_ctx_updation_status(quota_inode_ctx_t *ctx, gf_boolean_t status) { - GF_VALIDATE_OR_GOTO ("marker", ctx, out); + GF_VALIDATE_OR_GOTO("marker", ctx, out); - mq_set_ctx_status (ctx, &ctx->updation_status, status); - return 0; + mq_set_ctx_status(ctx, &ctx->updation_status, status); + return 0; out: - return -1; + return -1; } int32_t -mq_test_and_set_ctx_updation_status (quota_inode_ctx_t *ctx, - gf_boolean_t *status) +mq_test_and_set_ctx_updation_status(quota_inode_ctx_t *ctx, + gf_boolean_t *status) { - GF_VALIDATE_OR_GOTO ("marker", ctx, out); - GF_VALIDATE_OR_GOTO ("marker", status, out); + GF_VALIDATE_OR_GOTO("marker", ctx, out); + GF_VALIDATE_OR_GOTO("marker", status, out); - mq_test_and_set_ctx_status (ctx, &ctx->updation_status, status); - return 0; + mq_test_and_set_ctx_status(ctx, &ctx->updation_status, status); + return 0; out: - return -1; + return -1; } int32_t -mq_set_ctx_create_status (quota_inode_ctx_t *ctx, - gf_boolean_t status) +mq_set_ctx_create_status(quota_inode_ctx_t *ctx, gf_boolean_t status) { - GF_VALIDATE_OR_GOTO ("marker", ctx, out); + GF_VALIDATE_OR_GOTO("marker", ctx, out); - mq_set_ctx_status (ctx, &ctx->create_status, status); - return 0; + mq_set_ctx_status(ctx, &ctx->create_status, status); + return 0; out: - return -1; + return -1; } int32_t -mq_test_and_set_ctx_create_status (quota_inode_ctx_t *ctx, - gf_boolean_t *status) +mq_test_and_set_ctx_create_status(quota_inode_ctx_t *ctx, gf_boolean_t *status) { - GF_VALIDATE_OR_GOTO ("marker", ctx, out); - GF_VALIDATE_OR_GOTO ("marker", status, out); + GF_VALIDATE_OR_GOTO("marker", ctx, out); + GF_VALIDATE_OR_GOTO("marker", status, out); - mq_test_and_set_ctx_status (ctx, &ctx->create_status, status); - return 0; + mq_test_and_set_ctx_status(ctx, &ctx->create_status, status); + return 0; out: - return -1; + return -1; } -int32_t -mq_set_ctx_dirty_status (quota_inode_ctx_t *ctx, - gf_boolean_t status) +static void +mq_set_ctx_dirty_status(quota_inode_ctx_t *ctx, gf_boolean_t status) { - GF_VALIDATE_OR_GOTO ("marker", ctx, out); + GF_VALIDATE_OR_GOTO("marker", ctx, out); - mq_set_ctx_status (ctx, &ctx->dirty_status, status); - return 0; + mq_set_ctx_status(ctx, &ctx->dirty_status, status); out: - return -1; + return; } -int32_t -mq_test_and_set_ctx_dirty_status (quota_inode_ctx_t *ctx, - gf_boolean_t *status) +int +mq_build_ancestry(xlator_t *this, loc_t *loc) { - GF_VALIDATE_OR_GOTO ("marker", ctx, out); - GF_VALIDATE_OR_GOTO ("marker", status, out); + int32_t ret = -1; + fd_t *fd = NULL; + gf_dirent_t entries; + gf_dirent_t *entry = NULL; + dict_t *xdata = NULL; + inode_t *tmp_parent = NULL; + inode_t *tmp_inode = NULL; + inode_t *linked_inode = NULL; + quota_inode_ctx_t *ctx = NULL; + + INIT_LIST_HEAD(&entries.list); + + xdata = dict_new(); + if (xdata == NULL) { + gf_log(this->name, GF_LOG_ERROR, "dict_new failed"); + ret = -ENOMEM; + goto out; + } + + ret = dict_set_int8(xdata, GET_ANCESTRY_DENTRY_KEY, 1); + if (ret < 0) + goto out; + + fd = fd_anonymous(loc->inode); + if (fd == NULL) { + gf_log(this->name, GF_LOG_ERROR, "fd creation failed"); + ret = -ENOMEM; + goto out; + } + + fd_bind(fd); + + ret = syncop_readdirp(this, fd, 131072, 0, &entries, xdata, NULL); + if (ret < 0) { + gf_log(this->name, + (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG : GF_LOG_ERROR, + "readdirp failed " + "for %s: %s", + loc->path, strerror(-ret)); + goto out; + } + + if (list_empty(&entries.list)) { + ret = -1; + goto out; + } + + list_for_each_entry(entry, &entries.list, list) + { + if (__is_root_gfid(entry->inode->gfid)) { + /* The list contains a sub-list for each possible path + * to the target inode. Each sub-list starts with the + * root entry of the tree and is followed by the child + * entries for a particular path to the target entry. + * The root entry is an implied sub-list delimiter, + * as it denotes we have started processing a new path. + * Reset the parent pointer and continue + */ + + tmp_parent = NULL; + } else { + linked_inode = inode_link(entry->inode, tmp_parent, entry->d_name, + &entry->d_stat); + if (linked_inode) { + tmp_inode = entry->inode; + entry->inode = linked_inode; + inode_unref(tmp_inode); + } else { + gf_log(this->name, GF_LOG_ERROR, "inode link failed"); + ret = -EINVAL; + goto out; + } + } + + ctx = mq_inode_ctx_new(entry->inode, this); + if (ctx == NULL) { + gf_log(this->name, GF_LOG_WARNING, + "mq_inode_ctx_new " + "failed for %s", + uuid_utoa(entry->inode->gfid)); + ret = -ENOMEM; + goto out; + } + + /* For non-directory, posix_get_ancestry_non_directory returns + * all hard-links that are represented by nodes adjacent to + * each other in the dentry-list. + * (Unlike the directory case where adjacent nodes either have + * a parent/child relationship or belong to different paths). + */ + if (entry->inode->ia_type == IA_IFDIR) + tmp_parent = entry->inode; + } + + if (loc->parent) + inode_unref(loc->parent); + + loc->parent = inode_parent(loc->inode, 0, NULL); + if (loc->parent == NULL) { + ret = -1; + goto out; + } + + ret = 0; - mq_test_and_set_ctx_status (ctx, &ctx->dirty_status, status); - return 0; out: - return -1; + gf_dirent_free(&entries); + + if (fd) + fd_unref(fd); + + if (xdata) + dict_unref(xdata); + + return ret; } /* This function should be used only in inspect_directory and inspect_file @@ -173,1886 +273,2025 @@ out: * This function returns success even is inode-quota xattrs are missing and * hence no healing performed. */ -int32_t -_quota_dict_get_meta (xlator_t *this, dict_t *dict, char *key, - quota_meta_t *meta, ia_type_t ia_type, - gf_boolean_t add_delta) +static int32_t +_quota_dict_get_meta(xlator_t *this, dict_t *dict, char *key, const int keylen, + quota_meta_t *meta, ia_type_t ia_type, + gf_boolean_t add_delta) { - int32_t ret = 0; - marker_conf_t *priv = NULL; - - priv = this->private; - - ret = quota_dict_get_inode_meta (dict, key, meta); - if (ret == -2 && (priv->feature_enabled & GF_INODE_QUOTA) == 0) { - /* quota_dict_get_inode_meta returns -2 if - * inode quota xattrs are not present. - * if inode quota self heal is turned off, - * then we should skip healing inode quotas - */ - - gf_log (this->name, GF_LOG_DEBUG, "inode quota disabled. " - "inode quota self heal will not be performed"); - ret = 0; - if (add_delta) { - if (ia_type == IA_IFDIR) - meta->dir_count = 1; - else - meta->file_count = 1; - } + int32_t ret = 0; + marker_conf_t *priv = NULL; + + priv = this->private; + + ret = quota_dict_get_inode_meta(dict, key, keylen, meta); + if (ret == -2 && (priv->feature_enabled & GF_INODE_QUOTA) == 0) { + /* quota_dict_get_inode_meta returns -2 if + * inode quota xattrs are not present. + * if inode quota self heal is turned off, + * then we should skip healing inode quotas + */ + + gf_log(this->name, GF_LOG_DEBUG, + "inode quota disabled. " + "inode quota self heal will not be performed"); + ret = 0; + if (add_delta) { + if (ia_type == IA_IFDIR) + meta->dir_count = 1; + else + meta->file_count = 1; } + } - return ret; + return ret; } int32_t -quota_dict_set_size_meta (xlator_t *this, dict_t *dict, - const quota_meta_t *meta) +quota_dict_set_size_meta(xlator_t *this, dict_t *dict, const quota_meta_t *meta) { - int32_t ret = -ENOMEM; - quota_meta_t *value = NULL; - char size_key[QUOTA_KEY_MAX] = {0, }; - - value = GF_CALLOC (2, sizeof (quota_meta_t), gf_common_quota_meta_t); - if (value == NULL) { - goto out; - } - value[0].size = hton64 (meta->size); - value[0].file_count = hton64 (meta->file_count); - value[0].dir_count = hton64 (meta->dir_count); + int32_t ret = -ENOMEM; + quota_meta_t *value = NULL; + char size_key[QUOTA_KEY_MAX] = { + 0, + }; + + value = GF_MALLOC(2 * sizeof(quota_meta_t), gf_common_quota_meta_t); + if (value == NULL) { + goto out; + } + value[0].size = hton64(meta->size); + value[0].file_count = hton64(meta->file_count); + value[0].dir_count = hton64(meta->dir_count); - value[1].size = 0; - value[1].file_count = 0; - value[1].dir_count = hton64 (1); + value[1].size = 0; + value[1].file_count = 0; + value[1].dir_count = hton64(1); - GET_SIZE_KEY (this, size_key, ret); - if (ret < 0) - goto out; - ret = dict_set_bin (dict, size_key, value, - (sizeof (quota_meta_t) * 2)); - if (ret < 0) { - gf_log_callingfn ("quota", GF_LOG_ERROR, "dict set failed"); - GF_FREE (value); - } + GET_SIZE_KEY(this, size_key, ret); + if (ret < 0) + goto out; + ret = dict_set_bin(dict, size_key, value, (sizeof(quota_meta_t) * 2)); + if (ret < 0) { + gf_log_callingfn("quota", GF_LOG_ERROR, "dict set failed"); + GF_FREE(value); + } out: - return ret; + return ret; } void -mq_compute_delta (quota_meta_t *delta, const quota_meta_t *op1, - const quota_meta_t *op2) +mq_compute_delta(quota_meta_t *delta, const quota_meta_t *op1, + const quota_meta_t *op2) { - delta->size = op1->size - op2->size; - delta->file_count = op1->file_count - op2->file_count; - delta->dir_count = op1->dir_count - op2->dir_count; + delta->size = op1->size - op2->size; + delta->file_count = op1->file_count - op2->file_count; + delta->dir_count = op1->dir_count - op2->dir_count; } void -mq_add_meta (quota_meta_t *dst, const quota_meta_t *src) +mq_add_meta(quota_meta_t *dst, const quota_meta_t *src) { - dst->size += src->size; - dst->file_count += src->file_count; - dst->dir_count += src->dir_count; + dst->size += src->size; + dst->file_count += src->file_count; + dst->dir_count += src->dir_count; } void -mq_sub_meta (quota_meta_t *dst, const quota_meta_t *src) +mq_sub_meta(quota_meta_t *dst, const quota_meta_t *src) { - if (src == NULL) { - dst->size = -dst->size; - dst->file_count = -dst->file_count; - dst->dir_count = -dst->dir_count; - } else { - dst->size = src->size - dst->size; - dst->file_count = src->file_count - dst->file_count; - dst->dir_count = src->dir_count - dst->dir_count; - } -} - -gf_boolean_t -quota_meta_is_null (const quota_meta_t *meta) -{ - if (meta->size == 0 && - meta->file_count == 0 && - meta->dir_count == 0) - return _gf_true; - - return _gf_false; + if (src == NULL) { + dst->size = -dst->size; + dst->file_count = -dst->file_count; + dst->dir_count = -dst->dir_count; + } else { + dst->size = src->size - dst->size; + dst->file_count = src->file_count - dst->file_count; + dst->dir_count = src->dir_count - dst->dir_count; + } } int32_t -mq_are_xattrs_set (xlator_t *this, loc_t *loc, gf_boolean_t *contri_set, - gf_boolean_t *size_set) +mq_are_xattrs_set(xlator_t *this, loc_t *loc, gf_boolean_t *contri_set, + gf_boolean_t *size_set) { - int32_t ret = -1; - char contri_key[QUOTA_KEY_MAX] = {0, }; - char size_key[QUOTA_KEY_MAX] = {0, }; - quota_meta_t meta = {0, }; - struct iatt stbuf = {0,}; - dict_t *dict = NULL; - dict_t *rsp_dict = NULL; - - dict = dict_new (); - if (dict == NULL) { - gf_log (this->name, GF_LOG_ERROR, "dict_new failed"); - goto out; - } - - ret = mq_req_xattr (this, loc, dict, contri_key, size_key); - if (ret < 0) - goto out; + int32_t ret = -1; + char contri_key[QUOTA_KEY_MAX] = { + 0, + }; + char size_key[QUOTA_KEY_MAX] = { + 0, + }; + quota_meta_t meta = { + 0, + }; + struct iatt stbuf = { + 0, + }; + dict_t *dict = NULL; + dict_t *rsp_dict = NULL; + + dict = dict_new(); + if (dict == NULL) { + gf_log(this->name, GF_LOG_ERROR, "dict_new failed"); + goto out; + } - ret = syncop_lookup (FIRST_CHILD(this), loc, &stbuf, NULL, - dict, &rsp_dict); - if (ret < 0) { - gf_log_callingfn (this->name, (-ret == ENOENT || -ret == ESTALE) - ? GF_LOG_DEBUG:GF_LOG_ERROR, "lookup failed " - "for %s: %s", loc->path, strerror (-ret)); - goto out; - } + ret = mq_req_xattr(this, loc, dict, contri_key, size_key); + if (ret < 0) + goto out; - if (rsp_dict == NULL) - goto out; + ret = syncop_lookup(FIRST_CHILD(this), loc, &stbuf, NULL, dict, &rsp_dict); + if (ret < 0) { + gf_log_callingfn( + this->name, + (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG : GF_LOG_ERROR, + "lookup failed " + "for %s: %s", + loc->path, strerror(-ret)); + goto out; + } - *contri_set = _gf_true; - *size_set = _gf_true; - if (loc->inode->ia_type == IA_IFDIR) { - ret = quota_dict_get_inode_meta (rsp_dict, size_key, &meta); - if (ret < 0 || meta.dir_count == 0) - *size_set = _gf_false; - } + if (rsp_dict == NULL) + goto out; - if (!loc_is_root(loc)) { - ret = quota_dict_get_inode_meta (rsp_dict, contri_key, &meta); - if (ret < 0) - *contri_set = _gf_false; - } + *contri_set = _gf_true; + *size_set = _gf_true; + if (loc->inode->ia_type == IA_IFDIR) { + ret = quota_dict_get_inode_meta(rsp_dict, size_key, strlen(size_key), + &meta); + if (ret < 0 || meta.dir_count == 0) + *size_set = _gf_false; + } + + if (!loc_is_root(loc)) { + ret = quota_dict_get_inode_meta(rsp_dict, contri_key, + strlen(contri_key), &meta); + if (ret < 0) + *contri_set = _gf_false; + } - ret = 0; + ret = 0; out: - if (dict) - dict_unref (dict); + if (dict) + dict_unref(dict); - if (rsp_dict) - dict_unref (rsp_dict); + if (rsp_dict) + dict_unref(rsp_dict); - return ret; + return ret; } int32_t -mq_create_size_xattrs (xlator_t *this, quota_inode_ctx_t *ctx, loc_t *loc) +mq_create_size_xattrs(xlator_t *this, quota_inode_ctx_t *ctx, loc_t *loc) { - int32_t ret = -1; - quota_meta_t size = {0, }; - dict_t *dict = NULL; + int32_t ret = -1; + quota_meta_t size = { + 0, + }; + dict_t *dict = NULL; - GF_VALIDATE_OR_GOTO ("marker", loc, out); - GF_VALIDATE_OR_GOTO ("marker", loc->inode, out); + GF_VALIDATE_OR_GOTO("marker", loc, out); + GF_VALIDATE_OR_GOTO("marker", loc->inode, out); - if (loc->inode->ia_type != IA_IFDIR) { - ret = 0; - goto out; - } - - dict = dict_new (); - if (!dict) { - gf_log (this->name, GF_LOG_ERROR, "dict_new failed"); - ret = -1; - goto out; - } + if (loc->inode->ia_type != IA_IFDIR) { + ret = 0; + goto out; + } - ret = quota_dict_set_size_meta (this, dict, &size); - if (ret < 0) - goto out; + dict = dict_new(); + if (!dict) { + gf_log(this->name, GF_LOG_ERROR, "dict_new failed"); + ret = -1; + goto out; + } - ret = syncop_xattrop (FIRST_CHILD(this), loc, - GF_XATTROP_ADD_ARRAY64_WITH_DEFAULT, dict, NULL, - NULL); + ret = quota_dict_set_size_meta(this, dict, &size); + if (ret < 0) + goto out; - if (ret < 0) { - gf_log_callingfn (this->name, (-ret == ENOENT || -ret == ESTALE) - ? GF_LOG_DEBUG:GF_LOG_ERROR, "xattrop failed " - "for %s: %s", loc->path, strerror (-ret)); - goto out; - } + ret = syncop_xattrop(FIRST_CHILD(this), loc, + GF_XATTROP_ADD_ARRAY64_WITH_DEFAULT, dict, NULL, NULL, + NULL); + + if (ret < 0) { + gf_log_callingfn( + this->name, + (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG : GF_LOG_ERROR, + "xattrop failed " + "for %s: %s", + loc->path, strerror(-ret)); + goto out; + } out: - if (dict) - dict_unref (dict); + if (dict) + dict_unref(dict); - return ret; + return ret; } int32_t -mq_lock (xlator_t *this, loc_t *loc, short l_type) +mq_lock(xlator_t *this, loc_t *loc, short l_type) { - struct gf_flock lock = {0, }; - int32_t ret = -1; - - GF_VALIDATE_OR_GOTO ("marker", loc, out); - GF_VALIDATE_OR_GOTO ("marker", loc->inode, out); - - gf_log (this->name, GF_LOG_DEBUG, "set lock type %d on %s", - l_type, loc->path); - - lock.l_len = 0; - lock.l_start = 0; - lock.l_type = l_type; - lock.l_whence = SEEK_SET; - - ret = syncop_inodelk (FIRST_CHILD(this), this->name, loc, F_SETLKW, - &lock, NULL, NULL); - if (ret < 0) - gf_log_callingfn (this->name, (-ret == ENOENT || -ret == ESTALE) - ? GF_LOG_DEBUG:GF_LOG_ERROR, "inodelk failed " - "for %s: %s", loc->path, strerror (-ret)); + struct gf_flock lock = { + 0, + }; + int32_t ret = -1; + + GF_VALIDATE_OR_GOTO("marker", loc, out); + GF_VALIDATE_OR_GOTO("marker", loc->inode, out); + + gf_log(this->name, GF_LOG_DEBUG, "set lock type %d on %s", l_type, + loc->path); + + lock.l_len = 0; + lock.l_start = 0; + lock.l_type = l_type; + lock.l_whence = SEEK_SET; + + ret = syncop_inodelk(FIRST_CHILD(this), this->name, loc, F_SETLKW, &lock, + NULL, NULL); + if (ret < 0) + gf_log_callingfn( + this->name, + (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG : GF_LOG_ERROR, + "inodelk failed " + "for %s: %s", + loc->path, strerror(-ret)); out: - return ret; + return ret; } int32_t -mq_get_dirty (xlator_t *this, loc_t *loc, int32_t *dirty) +mq_get_dirty(xlator_t *this, loc_t *loc, int32_t *dirty) { - int32_t ret = -1; - int8_t value = 0; - dict_t *dict = NULL; - dict_t *rsp_dict = NULL; - struct iatt stbuf = {0,}; - - dict = dict_new (); - if (dict == NULL) { - gf_log (this->name, GF_LOG_ERROR, "dict_new failed"); - goto out; - } - - ret = dict_set_int64 (dict, QUOTA_DIRTY_KEY, 0); - if (ret < 0) { - gf_log (this->name, GF_LOG_WARNING, "dict set failed"); - goto out; - } + int32_t ret = -1; + int8_t value = 0; + dict_t *dict = NULL; + dict_t *rsp_dict = NULL; + struct iatt stbuf = { + 0, + }; + + dict = dict_new(); + if (dict == NULL) { + gf_log(this->name, GF_LOG_ERROR, "dict_new failed"); + goto out; + } - ret = syncop_lookup (FIRST_CHILD(this), loc, &stbuf, NULL, - dict, &rsp_dict); - if (ret < 0) { - gf_log_callingfn (this->name, (-ret == ENOENT || -ret == ESTALE) - ? GF_LOG_DEBUG:GF_LOG_ERROR, "lookup failed " - "for %s: %s", loc->path, strerror (-ret)); - goto out; - } + ret = dict_set_int64(dict, QUOTA_DIRTY_KEY, 0); + if (ret < 0) { + gf_log(this->name, GF_LOG_WARNING, "dict set failed"); + goto out; + } + + ret = syncop_lookup(FIRST_CHILD(this), loc, &stbuf, NULL, dict, &rsp_dict); + if (ret < 0) { + gf_log_callingfn( + this->name, + (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG : GF_LOG_ERROR, + "lookup failed " + "for %s: %s", + loc->path, strerror(-ret)); + goto out; + } - ret = dict_get_int8 (rsp_dict, QUOTA_DIRTY_KEY, &value); - if (ret < 0) - goto out; + ret = dict_get_int8(rsp_dict, QUOTA_DIRTY_KEY, &value); + if (ret < 0) + goto out; - *dirty = value; + *dirty = value; out: - if (dict) - dict_unref (dict); + if (dict) + dict_unref(dict); - if (rsp_dict) - dict_unref (rsp_dict); + if (rsp_dict) + dict_unref(rsp_dict); - return ret; + return ret; } int32_t -mq_get_set_dirty (xlator_t *this, loc_t *loc, int32_t dirty, - int32_t *prev_dirty) +mq_get_set_dirty(xlator_t *this, loc_t *loc, int32_t dirty, int32_t *prev_dirty) { - int32_t ret = -1; - int8_t value = 0; - quota_inode_ctx_t *ctx = NULL; - dict_t *dict = NULL; - dict_t *rsp_dict = NULL; - - GF_VALIDATE_OR_GOTO ("marker", loc, out); - GF_VALIDATE_OR_GOTO ("marker", loc->inode, out); - GF_VALIDATE_OR_GOTO ("marker", prev_dirty, out); - - ret = mq_inode_ctx_get (loc->inode, this, &ctx); - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, "failed to get inode ctx for " - "%s", loc->path); - goto out; - } - - dict = dict_new (); - if (!dict) { - gf_log (this->name, GF_LOG_ERROR, "dict_new failed"); - ret = -1; - goto out; - } - - ret = dict_set_int8 (dict, QUOTA_DIRTY_KEY, dirty); - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, "dict_set failed"); - goto out; - } - - ret = syncop_xattrop (FIRST_CHILD(this), loc, GF_XATTROP_GET_AND_SET, - dict, NULL, &rsp_dict); - if (ret < 0) { - gf_log_callingfn (this->name, (-ret == ENOENT || -ret == ESTALE) - ? GF_LOG_DEBUG:GF_LOG_ERROR, "xattrop failed " - "for %s: %s", loc->path, strerror (-ret)); - goto out; - } + int32_t ret = -1; + int8_t value = 0; + quota_inode_ctx_t *ctx = NULL; + dict_t *dict = NULL; + dict_t *rsp_dict = NULL; + + GF_VALIDATE_OR_GOTO("marker", loc, out); + GF_VALIDATE_OR_GOTO("marker", loc->inode, out); + GF_VALIDATE_OR_GOTO("marker", prev_dirty, out); + + ret = mq_inode_ctx_get(loc->inode, this, &ctx); + if (ret < 0) { + gf_log(this->name, GF_LOG_ERROR, + "failed to get inode ctx for " + "%s", + loc->path); + goto out; + } - *prev_dirty = 0; - if (rsp_dict) { - ret = dict_get_int8 (rsp_dict, QUOTA_DIRTY_KEY, &value); - if (ret == 0) - *prev_dirty = value; - } + dict = dict_new(); + if (!dict) { + gf_log(this->name, GF_LOG_ERROR, "dict_new failed"); + ret = -1; + goto out; + } - LOCK (&ctx->lock); - { - ctx->dirty = dirty; - } - UNLOCK (&ctx->lock); - ret = 0; + ret = dict_set_int8(dict, QUOTA_DIRTY_KEY, dirty); + if (ret < 0) { + gf_log(this->name, GF_LOG_ERROR, "dict_set failed"); + goto out; + } + + ret = syncop_xattrop(FIRST_CHILD(this), loc, GF_XATTROP_GET_AND_SET, dict, + NULL, NULL, &rsp_dict); + if (ret < 0) { + gf_log_callingfn( + this->name, + (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG : GF_LOG_ERROR, + "xattrop failed " + "for %s: %s", + loc->path, strerror(-ret)); + goto out; + } + + *prev_dirty = 0; + if (rsp_dict) { + ret = dict_get_int8(rsp_dict, QUOTA_DIRTY_KEY, &value); + if (ret == 0) + *prev_dirty = value; + } + + LOCK(&ctx->lock); + { + ctx->dirty = dirty; + } + UNLOCK(&ctx->lock); + ret = 0; out: - if (dict) - dict_unref (dict); + if (dict) + dict_unref(dict); - if (rsp_dict) - dict_unref (rsp_dict); + if (rsp_dict) + dict_unref(rsp_dict); - return ret; + return ret; } int32_t -mq_mark_dirty (xlator_t *this, loc_t *loc, int32_t dirty) +mq_mark_dirty(xlator_t *this, loc_t *loc, int32_t dirty) { - int32_t ret = -1; - dict_t *dict = NULL; - quota_inode_ctx_t *ctx = NULL; - - GF_VALIDATE_OR_GOTO ("marker", loc, out); - GF_VALIDATE_OR_GOTO ("marker", loc->inode, out); - - ret = mq_inode_ctx_get (loc->inode, this, &ctx); - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, "failed to get inode ctx for " - "%s", loc->path); - ret = 0; - goto out; - } - - dict = dict_new (); - if (!dict) { - ret = -1; - gf_log (this->name, GF_LOG_ERROR, "dict_new failed"); - goto out; - } + int32_t ret = -1; + dict_t *dict = NULL; + quota_inode_ctx_t *ctx = NULL; + + GF_VALIDATE_OR_GOTO("marker", loc, out); + GF_VALIDATE_OR_GOTO("marker", loc->inode, out); + + ret = mq_inode_ctx_get(loc->inode, this, &ctx); + if (ret < 0) { + gf_log(this->name, GF_LOG_ERROR, + "failed to get inode ctx for " + "%s", + loc->path); + ret = 0; + goto out; + } - ret = dict_set_int8 (dict, QUOTA_DIRTY_KEY, dirty); - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, "dict_set failed"); - goto out; - } + dict = dict_new(); + if (!dict) { + ret = -1; + gf_log(this->name, GF_LOG_ERROR, "dict_new failed"); + goto out; + } - ret = syncop_setxattr (FIRST_CHILD(this), loc, dict, 0, NULL, NULL); - if (ret < 0) { - gf_log_callingfn (this->name, (-ret == ENOENT || -ret == ESTALE) - ? GF_LOG_DEBUG:GF_LOG_ERROR, "setxattr dirty = %d " - "failed for %s: %s", dirty, loc->path, strerror (-ret)); - goto out; - } + ret = dict_set_int8(dict, QUOTA_DIRTY_KEY, dirty); + if (ret < 0) { + gf_log(this->name, GF_LOG_ERROR, "dict_set failed"); + goto out; + } + + ret = syncop_setxattr(FIRST_CHILD(this), loc, dict, 0, NULL, NULL); + if (ret < 0) { + gf_log_callingfn( + this->name, + (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG : GF_LOG_ERROR, + "setxattr dirty = %d " + "failed for %s: %s", + dirty, loc->path, strerror(-ret)); + goto out; + } - LOCK (&ctx->lock); - { - ctx->dirty = dirty; - } - UNLOCK (&ctx->lock); + LOCK(&ctx->lock); + { + ctx->dirty = dirty; + } + UNLOCK(&ctx->lock); out: - if (dict) - dict_unref (dict); + if (dict) + dict_unref(dict); - return ret; + return ret; } int32_t -_mq_get_metadata (xlator_t *this, loc_t *loc, quota_meta_t *contri, - quota_meta_t *size, uuid_t contri_gfid) +_mq_get_metadata(xlator_t *this, loc_t *loc, quota_meta_t *contri, + quota_meta_t *size, uuid_t contri_gfid) { - int32_t ret = -1; - quota_meta_t meta = {0, }; - char contri_key[QUOTA_KEY_MAX] = {0, }; - char size_key[QUOTA_KEY_MAX] = {0, }; - dict_t *dict = NULL; - dict_t *rsp_dict = NULL; - struct iatt stbuf = {0,}; - - GF_VALIDATE_OR_GOTO ("marker", loc, out); - GF_VALIDATE_OR_GOTO ("marker", loc->inode, out); - - if (size == NULL && contri == NULL) - goto out; + int32_t ret = -1; + quota_meta_t meta = { + 0, + }; + char contri_key[QUOTA_KEY_MAX] = { + 0, + }; + char size_key[QUOTA_KEY_MAX] = { + 0, + }; + int keylen = 0; + dict_t *dict = NULL; + dict_t *rsp_dict = NULL; + struct iatt stbuf = { + 0, + }; + + GF_VALIDATE_OR_GOTO("marker", loc, out); + GF_VALIDATE_OR_GOTO("marker", loc->inode, out); + + if (size == NULL && contri == NULL) + goto out; - dict = dict_new (); - if (dict == NULL) { - gf_log (this->name, GF_LOG_ERROR, "dict_new failed"); - goto out; - } + dict = dict_new(); + if (dict == NULL) { + gf_log(this->name, GF_LOG_ERROR, "dict_new failed"); + goto out; + } - if (size && loc->inode->ia_type == IA_IFDIR) { - GET_SIZE_KEY (this, size_key, ret); - if (ret < 0) - goto out; - ret = dict_set_int64 (dict, size_key, 0); - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, "dict_set failed."); - goto out; - } + if (size && loc->inode->ia_type == IA_IFDIR) { + GET_SIZE_KEY(this, size_key, keylen); + if (keylen < 0) + goto out; + ret = dict_set_int64(dict, size_key, 0); + if (ret < 0) { + gf_log(this->name, GF_LOG_ERROR, "dict_set failed."); + goto out; } + } - if (contri && !loc_is_root(loc)) { - ret = mq_dict_set_contribution (this, dict, loc, contri_gfid, - contri_key); - if (ret < 0) - goto out; - } + if (contri && !loc_is_root(loc)) { + ret = mq_dict_set_contribution(this, dict, loc, contri_gfid, + contri_key); + if (ret < 0) + goto out; + } + + ret = syncop_lookup(FIRST_CHILD(this), loc, &stbuf, NULL, dict, &rsp_dict); + if (ret < 0) { + gf_log_callingfn( + this->name, + (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG : GF_LOG_ERROR, + "lookup failed " + "for %s: %s", + loc->path, strerror(-ret)); + goto out; + } - ret = syncop_lookup (FIRST_CHILD(this), loc, &stbuf, NULL, - dict, &rsp_dict); - if (ret < 0) { - gf_log_callingfn (this->name, (-ret == ENOENT || -ret == ESTALE) - ? GF_LOG_DEBUG:GF_LOG_ERROR, "lookup failed " - "for %s: %s", loc->path, strerror (-ret)); + if (size) { + if (loc->inode->ia_type == IA_IFDIR) { + ret = quota_dict_get_meta(rsp_dict, size_key, keylen, &meta); + if (ret < 0) { + gf_log(this->name, GF_LOG_ERROR, "dict_get failed."); goto out; - } + } - if (size) { - if (loc->inode->ia_type == IA_IFDIR) { - ret = quota_dict_get_meta (rsp_dict, size_key, - &meta); - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, - "dict_get failed."); - goto out; - } - - size->size = meta.size; - size->file_count = meta.file_count; - size->dir_count = meta.dir_count; - } else { - size->size = stbuf.ia_blocks * 512; - size->file_count = 1; - size->dir_count = 0; - } + size->size = meta.size; + size->file_count = meta.file_count; + size->dir_count = meta.dir_count; + } else { + size->size = stbuf.ia_blocks * 512; + size->file_count = 1; + size->dir_count = 0; } + } - if (contri && !loc_is_root(loc)) { - ret = quota_dict_get_meta (rsp_dict, contri_key, &meta); - if (ret < 0) { - contri->size = 0; - contri->file_count = 0; - contri->dir_count = 0; - } else { - contri->size = meta.size; - contri->file_count = meta.file_count; - contri->dir_count = meta.dir_count; - } + if (contri && !loc_is_root(loc)) { + ret = quota_dict_get_meta(rsp_dict, contri_key, strlen(contri_key), + &meta); + if (ret < 0) { + contri->size = 0; + contri->file_count = 0; + contri->dir_count = 0; + } else { + contri->size = meta.size; + contri->file_count = meta.file_count; + contri->dir_count = meta.dir_count; } + } - ret = 0; + ret = 0; out: - if (dict) - dict_unref (dict); + if (dict) + dict_unref(dict); - if (rsp_dict) - dict_unref (rsp_dict); + if (rsp_dict) + dict_unref(rsp_dict); - return ret; + return ret; } int32_t -mq_get_metadata (xlator_t *this, loc_t *loc, quota_meta_t *contri, - quota_meta_t *size, quota_inode_ctx_t *ctx, - inode_contribution_t *contribution) +mq_get_metadata(xlator_t *this, loc_t *loc, quota_meta_t *contri, + quota_meta_t *size, quota_inode_ctx_t *ctx, + inode_contribution_t *contribution) { - int32_t ret = -1; + int32_t ret = -1; - GF_VALIDATE_OR_GOTO ("marker", loc, out); - GF_VALIDATE_OR_GOTO ("marker", loc->inode, out); - GF_VALIDATE_OR_GOTO ("marker", ctx, out); - GF_VALIDATE_OR_GOTO ("marker", contribution, out); + GF_VALIDATE_OR_GOTO("marker", loc, out); + GF_VALIDATE_OR_GOTO("marker", loc->inode, out); + GF_VALIDATE_OR_GOTO("marker", ctx, out); + GF_VALIDATE_OR_GOTO("marker", contribution, out); - if (size == NULL && contri == NULL) { - ret = 0; - goto out; - } + if (size == NULL && contri == NULL) { + ret = 0; + goto out; + } - ret = _mq_get_metadata (this, loc, contri, size, contribution->gfid); - if (ret < 0) - goto out; + ret = _mq_get_metadata(this, loc, contri, size, contribution->gfid); + if (ret < 0) + goto out; - if (size) { - LOCK (&ctx->lock); - { - ctx->size = size->size; - ctx->file_count = size->file_count; - ctx->dir_count = size->dir_count; - } - UNLOCK (&ctx->lock); + if (size) { + LOCK(&ctx->lock); + { + ctx->size = size->size; + ctx->file_count = size->file_count; + ctx->dir_count = size->dir_count; } + UNLOCK(&ctx->lock); + } - if (contri) { - LOCK (&contribution->lock); - { - contribution->contribution = contri->size; - contribution->file_count = contri->file_count; - contribution->dir_count = contri->dir_count; - } - UNLOCK (&contribution->lock); + if (contri) { + LOCK(&contribution->lock); + { + contribution->contribution = contri->size; + contribution->file_count = contri->file_count; + contribution->dir_count = contri->dir_count; } + UNLOCK(&contribution->lock); + } out: - return ret; + return ret; } int32_t -mq_get_size (xlator_t *this, loc_t *loc, quota_meta_t *size) +mq_get_delta(xlator_t *this, loc_t *loc, quota_meta_t *delta, + quota_inode_ctx_t *ctx, inode_contribution_t *contribution) { - return _mq_get_metadata (this, loc, NULL, size, 0); -} - -int32_t -mq_get_contri (xlator_t *this, loc_t *loc, quota_meta_t *contri, - uuid_t contri_gfid) -{ - return _mq_get_metadata (this, loc, contri, NULL, contri_gfid); -} - -int32_t -mq_get_delta (xlator_t *this, loc_t *loc, quota_meta_t *delta, - quota_inode_ctx_t *ctx, inode_contribution_t *contribution) -{ - int32_t ret = -1; - quota_meta_t size = {0, }; - quota_meta_t contri = {0, }; - - GF_VALIDATE_OR_GOTO ("marker", loc, out); - GF_VALIDATE_OR_GOTO ("marker", loc->inode, out); - GF_VALIDATE_OR_GOTO ("marker", ctx, out); - GF_VALIDATE_OR_GOTO ("marker", contribution, out); - - ret = mq_get_metadata (this, loc, &contri, &size, ctx, contribution); - if (ret < 0) - goto out; + int32_t ret = -1; + quota_meta_t size = { + 0, + }; + quota_meta_t contri = { + 0, + }; + + GF_VALIDATE_OR_GOTO("marker", loc, out); + GF_VALIDATE_OR_GOTO("marker", loc->inode, out); + GF_VALIDATE_OR_GOTO("marker", ctx, out); + GF_VALIDATE_OR_GOTO("marker", contribution, out); + + ret = mq_get_metadata(this, loc, &contri, &size, ctx, contribution); + if (ret < 0) + goto out; - mq_compute_delta (delta, &size, &contri); + mq_compute_delta(delta, &size, &contri); out: - return ret; + return ret; } int32_t -mq_remove_contri (xlator_t *this, loc_t *loc, quota_inode_ctx_t *ctx, - inode_contribution_t *contri, quota_meta_t *delta, - uint32_t nlink) +mq_remove_contri(xlator_t *this, loc_t *loc, quota_inode_ctx_t *ctx, + inode_contribution_t *contri, quota_meta_t *delta, + uint32_t nlink) { - int32_t ret = -1; - char contri_key[QUOTA_KEY_MAX] = {0, }; - - if (nlink == 1) { - /*File was a last link and has been deleted */ - ret = 0; - goto done; - } + int32_t ret = -1; + char contri_key[QUOTA_KEY_MAX] = { + 0, + }; - GET_CONTRI_KEY (this, contri_key, contri->gfid, ret); - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, "get contri_key " - "failed for %s", uuid_utoa(contri->gfid)); - goto out; - } - - ret = syncop_removexattr (FIRST_CHILD(this), loc, contri_key, 0, NULL); - if (ret < 0) { - if (-ret == ENOENT || -ret == ESTALE || -ret == ENODATA || - -ret == ENOATTR) { - /* Remove contri in done when unlink operation is - * performed, so return success on ENOENT/ESTSLE - * rename operation removes xattr earlier, - * so return success on ENODATA - */ - ret = 0; - } else { - gf_log_callingfn (this->name, GF_LOG_ERROR, - "removexattr %s failed for %s: %s", - contri_key, loc->path, - strerror (-ret)); - goto out; - } + if (nlink == 1) { + /*File was a last link and has been deleted */ + ret = 0; + goto done; + } + + GET_CONTRI_KEY(this, contri_key, contri->gfid, ret); + if (ret < 0) { + gf_log(this->name, GF_LOG_ERROR, + "get contri_key " + "failed for %s", + uuid_utoa(contri->gfid)); + goto out; + } + + ret = syncop_removexattr(FIRST_CHILD(this), loc, contri_key, 0, NULL); + if (ret < 0) { + if (-ret == ENOENT || -ret == ESTALE || -ret == ENODATA || + -ret == ENOATTR) { + /* Remove contri in done when unlink operation is + * performed, so return success on ENOENT/ESTSLE + * rename operation removes xattr earlier, + * so return success on ENODATA + */ + ret = 0; + } else { + gf_log_callingfn(this->name, GF_LOG_ERROR, + "removexattr %s failed for %s: %s", contri_key, + loc->path, strerror(-ret)); + goto out; } + } done: - LOCK (&contri->lock); - { - contri->contribution += delta->size; - contri->file_count += delta->file_count; - contri->dir_count += delta->dir_count; - } - UNLOCK (&contri->lock); + LOCK(&contri->lock); + { + contri->contribution += delta->size; + contri->file_count += delta->file_count; + contri->dir_count += delta->dir_count; + } + UNLOCK(&contri->lock); - ret = 0; + ret = 0; out: - QUOTA_FREE_CONTRIBUTION_NODE (ctx, contri); + QUOTA_FREE_CONTRIBUTION_NODE(ctx, contri); - return ret; + return ret; } int32_t -mq_update_contri (xlator_t *this, loc_t *loc, inode_contribution_t *contri, - quota_meta_t *delta) +mq_update_contri(xlator_t *this, loc_t *loc, inode_contribution_t *contri, + quota_meta_t *delta) { - int32_t ret = -1; - char contri_key[QUOTA_KEY_MAX] = {0, }; - dict_t *dict = NULL; - - GF_VALIDATE_OR_GOTO ("marker", loc, out); - GF_VALIDATE_OR_GOTO ("marker", loc->inode, out); - GF_VALIDATE_OR_GOTO ("marker", delta, out); - GF_VALIDATE_OR_GOTO ("marker", contri, out); - - if (quota_meta_is_null (delta)) { - ret = 0; - goto out; - } - - dict = dict_new (); - if (!dict) { - gf_log (this->name, GF_LOG_ERROR, "dict_new failed"); - ret = -1; - goto out; - } + int32_t ret = -1; + char contri_key[QUOTA_KEY_MAX] = { + 0, + }; + dict_t *dict = NULL; + + GF_VALIDATE_OR_GOTO("marker", loc, out); + GF_VALIDATE_OR_GOTO("marker", loc->inode, out); + GF_VALIDATE_OR_GOTO("marker", delta, out); + GF_VALIDATE_OR_GOTO("marker", contri, out); + + if (quota_meta_is_null(delta)) { + ret = 0; + goto out; + } - GET_CONTRI_KEY (this, contri_key, contri->gfid, ret); - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, "get contri_key " - "failed for %s", uuid_utoa(contri->gfid)); - goto out; - } + dict = dict_new(); + if (!dict) { + gf_log(this->name, GF_LOG_ERROR, "dict_new failed"); + ret = -1; + goto out; + } + + GET_CONTRI_KEY(this, contri_key, contri->gfid, ret); + if (ret < 0) { + gf_log(this->name, GF_LOG_ERROR, + "get contri_key " + "failed for %s", + uuid_utoa(contri->gfid)); + goto out; + } - ret = quota_dict_set_meta (dict, contri_key, delta, - loc->inode->ia_type); - if (ret < 0) - goto out; + ret = quota_dict_set_meta(dict, contri_key, delta, loc->inode->ia_type); + if (ret < 0) + goto out; - ret = syncop_xattrop(FIRST_CHILD(this), loc, GF_XATTROP_ADD_ARRAY64, - dict, NULL, NULL); - if (ret < 0) { - gf_log_callingfn (this->name, (-ret == ENOENT || -ret == ESTALE) - ? GF_LOG_DEBUG:GF_LOG_ERROR, "xattrop failed " - "for %s: %s", loc->path, strerror (-ret)); - goto out; - } + ret = syncop_xattrop(FIRST_CHILD(this), loc, GF_XATTROP_ADD_ARRAY64, dict, + NULL, NULL, NULL); + if (ret < 0) { + gf_log_callingfn( + this->name, + (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG : GF_LOG_ERROR, + "xattrop failed " + "for %s: %s", + loc->path, strerror(-ret)); + goto out; + } - LOCK (&contri->lock); - { - contri->contribution += delta->size; - contri->file_count += delta->file_count; - contri->dir_count += delta->dir_count; - } - UNLOCK (&contri->lock); + LOCK(&contri->lock); + { + contri->contribution += delta->size; + contri->file_count += delta->file_count; + contri->dir_count += delta->dir_count; + } + UNLOCK(&contri->lock); out: - if (dict) - dict_unref (dict); + if (dict) + dict_unref(dict); - return ret; + return ret; } int32_t -mq_update_size (xlator_t *this, loc_t *loc, quota_meta_t *delta) +mq_update_size(xlator_t *this, loc_t *loc, quota_meta_t *delta) { - int32_t ret = -1; - quota_inode_ctx_t *ctx = NULL; - dict_t *dict = NULL; + int32_t ret = -1; + quota_inode_ctx_t *ctx = NULL; + dict_t *dict = NULL; - GF_VALIDATE_OR_GOTO ("marker", loc, out); - GF_VALIDATE_OR_GOTO ("marker", loc->inode, out); - GF_VALIDATE_OR_GOTO ("marker", delta, out); + GF_VALIDATE_OR_GOTO("marker", loc, out); + GF_VALIDATE_OR_GOTO("marker", loc->inode, out); + GF_VALIDATE_OR_GOTO("marker", delta, out); - if (quota_meta_is_null (delta)) { - ret = 0; - goto out; - } - - ret = mq_inode_ctx_get (loc->inode, this, &ctx); - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, "failed to get inode ctx for " - "%s", loc->path); - goto out; - } - - dict = dict_new (); - if (!dict) { - gf_log (this->name, GF_LOG_ERROR, "dict_new failed"); - ret = -1; - goto out; - } + if (quota_meta_is_null(delta)) { + ret = 0; + goto out; + } + + ret = mq_inode_ctx_get(loc->inode, this, &ctx); + if (ret < 0) { + gf_log(this->name, GF_LOG_ERROR, + "failed to get inode ctx for " + "%s", + loc->path); + goto out; + } - ret = quota_dict_set_size_meta (this, dict, delta); - if (ret < 0) - goto out; + dict = dict_new(); + if (!dict) { + gf_log(this->name, GF_LOG_ERROR, "dict_new failed"); + ret = -1; + goto out; + } - ret = syncop_xattrop(FIRST_CHILD(this), loc, - GF_XATTROP_ADD_ARRAY64_WITH_DEFAULT, dict, NULL, - NULL); - if (ret < 0) { - gf_log_callingfn (this->name, (-ret == ENOENT || -ret == ESTALE) - ? GF_LOG_DEBUG:GF_LOG_ERROR, "xattrop failed " - "for %s: %s", loc->path, strerror (-ret)); - goto out; - } + ret = quota_dict_set_size_meta(this, dict, delta); + if (ret < 0) + goto out; - LOCK (&ctx->lock); - { - ctx->size += delta->size; - ctx->file_count += delta->file_count; - if (ctx->dir_count == 0) - ctx->dir_count += delta->dir_count + 1; - else - ctx->dir_count += delta->dir_count; - } - UNLOCK (&ctx->lock); + ret = syncop_xattrop(FIRST_CHILD(this), loc, + GF_XATTROP_ADD_ARRAY64_WITH_DEFAULT, dict, NULL, NULL, + NULL); + if (ret < 0) { + gf_log_callingfn( + this->name, + (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG : GF_LOG_ERROR, + "xattrop failed " + "for %s: %s", + loc->path, strerror(-ret)); + goto out; + } + + LOCK(&ctx->lock); + { + ctx->size += delta->size; + ctx->file_count += delta->file_count; + if (ctx->dir_count == 0) + ctx->dir_count += delta->dir_count + 1; + else + ctx->dir_count += delta->dir_count; + } + UNLOCK(&ctx->lock); out: - if (dict) - dict_unref (dict); + if (dict) + dict_unref(dict); - return ret; + return ret; } int -mq_synctask_cleanup (int ret, call_frame_t *frame, void *opaque) +mq_synctask_cleanup(int ret, call_frame_t *frame, void *opaque) { - quota_synctask_t *args = NULL; + quota_synctask_t *args = NULL; - GF_ASSERT (opaque); + GF_ASSERT(opaque); - args = (quota_synctask_t *) opaque; - loc_wipe (&args->loc); + args = (quota_synctask_t *)opaque; + loc_wipe(&args->loc); - if (!args->is_static) - GF_FREE (args); + if (args->stub) + call_resume(args->stub); - return 0; + if (!args->is_static) + GF_FREE(args); + + return 0; } int -mq_synctask1 (xlator_t *this, synctask_fn_t task, gf_boolean_t spawn, - loc_t *loc, quota_meta_t *contri, uint32_t nlink) +mq_synctask1(xlator_t *this, synctask_fn_t task, gf_boolean_t spawn, loc_t *loc, + quota_meta_t *contri, uint32_t nlink, call_stub_t *stub) { - int32_t ret = -1; - quota_synctask_t *args = NULL; - quota_synctask_t static_args = {0, }; - - if (spawn) { - QUOTA_ALLOC_OR_GOTO (args, quota_synctask_t, ret, out); - args->is_static = _gf_false; - } else { - args = &static_args; - args->is_static = _gf_true; - } - - args->this = this; - loc_copy (&args->loc, loc); - args->ia_nlink = nlink; - - if (contri) { - args->contri = *contri; - } else { - args->contri.size = -1; - args->contri.file_count = -1; - args->contri.dir_count = -1; - } - - if (spawn) { - ret = synctask_new1 (this->ctx->env, 1024 * 16, task, - mq_synctask_cleanup, NULL, args); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Failed to spawn " - "new synctask"); - mq_synctask_cleanup (ret, NULL, args); - } - } else { - ret = task (args); - mq_synctask_cleanup (ret, NULL, args); - } + int32_t ret = -1; + quota_synctask_t *args = NULL; + quota_synctask_t static_args = { + 0, + }; + + if (spawn) { + QUOTA_ALLOC_OR_GOTO(args, quota_synctask_t, ret, out); + args->is_static = _gf_false; + } else { + args = &static_args; + args->is_static = _gf_true; + } + + args->this = this; + args->stub = stub; + loc_copy(&args->loc, loc); + args->ia_nlink = nlink; + + if (contri) { + args->contri = *contri; + } else { + args->contri.size = -1; + args->contri.file_count = -1; + args->contri.dir_count = -1; + } + + if (spawn) { + ret = synctask_new1(this->ctx->env, 1024 * 16, task, + mq_synctask_cleanup, NULL, args); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, + "Failed to spawn " + "new synctask"); + mq_synctask_cleanup(ret, NULL, args); + } + } else { + ret = task(args); + mq_synctask_cleanup(ret, NULL, args); + } out: - return ret; + return ret; } int -mq_synctask (xlator_t *this, synctask_fn_t task, gf_boolean_t spawn, loc_t *loc) +mq_synctask(xlator_t *this, synctask_fn_t task, gf_boolean_t spawn, loc_t *loc) { - return mq_synctask1 (this, task, spawn, loc, NULL, -1); + return mq_synctask1(this, task, spawn, loc, NULL, -1, NULL); } int32_t -mq_prevalidate_txn (xlator_t *this, loc_t *origin_loc, loc_t *loc, - quota_inode_ctx_t **ctx, struct iatt *buf) +mq_prevalidate_txn(xlator_t *this, loc_t *origin_loc, loc_t *loc, + quota_inode_ctx_t **ctx, struct iatt *buf) { - int32_t ret = -1; - quota_inode_ctx_t *ctxtmp = NULL; + int32_t ret = -1; + quota_inode_ctx_t *ctxtmp = NULL; - if (buf) { - if (buf->ia_type == IA_IFREG && IS_DHT_LINKFILE_MODE(buf)) - goto out; + if (buf) { + if (buf->ia_type == IA_IFREG && IS_DHT_LINKFILE_MODE(buf)) + goto out; - if (buf->ia_type != IA_IFREG && buf->ia_type != IA_IFLNK && - buf->ia_type != IA_IFDIR) - goto out; - } + if (buf->ia_type != IA_IFREG && buf->ia_type != IA_IFLNK && + buf->ia_type != IA_IFDIR) + goto out; + } - if (origin_loc == NULL || origin_loc->inode == NULL || - gf_uuid_is_null(origin_loc->inode->gfid)) - goto out; + if (origin_loc == NULL || origin_loc->inode == NULL || + gf_uuid_is_null(origin_loc->inode->gfid)) + goto out; - loc_copy (loc, origin_loc); + loc_copy(loc, origin_loc); - if (gf_uuid_is_null (loc->gfid)) - gf_uuid_copy (loc->gfid, loc->inode->gfid); + if (gf_uuid_is_null(loc->gfid)) + gf_uuid_copy(loc->gfid, loc->inode->gfid); - if (!loc_is_root(loc) && loc->parent == NULL) { - loc->parent = inode_parent (loc->inode, 0, NULL); - if (loc->parent == NULL) { - ret = -1; - goto out; - } - } + if (!loc_is_root(loc) && loc->parent == NULL) + loc->parent = inode_parent(loc->inode, 0, NULL); - if (ctx) - ret = mq_inode_ctx_get (loc->inode, this, ctx); - else - ret = mq_inode_ctx_get (loc->inode, this, &ctxtmp); - - if (ret < 0) { - if (ctx) { - *ctx = mq_inode_ctx_new (loc->inode, this); - if (*ctx == NULL) { - gf_log_callingfn (this->name, GF_LOG_WARNING, - "mq_inode_ctx_new failed for " - "%s", loc->path); - ret = -1; - goto out; - } - } else { - gf_log_callingfn (this->name, GF_LOG_WARNING, "ctx for " - "is NULL for %s", loc->path); - } - } + ret = mq_inode_ctx_get(loc->inode, this, &ctxtmp); + if (ret < 0) { + gf_log_callingfn(this->name, GF_LOG_WARNING, + "inode ctx for " + "is NULL for %s", + loc->path); + goto out; + } + if (ctx) + *ctx = ctxtmp; - ret = 0; + ret = 0; out: - return ret; + return ret; } int -mq_create_xattrs_task (void *opaque) +mq_create_xattrs_task(void *opaque) { - int32_t ret = -1; - gf_boolean_t locked = _gf_false; - gf_boolean_t contri_set = _gf_false; - gf_boolean_t size_set = _gf_false; - gf_boolean_t need_txn = _gf_false; - quota_synctask_t *args = NULL; - quota_inode_ctx_t *ctx = NULL; - xlator_t *this = NULL; - loc_t *loc = NULL; - gf_boolean_t status = _gf_false; - - GF_ASSERT (opaque); - - args = (quota_synctask_t *) opaque; - loc = &args->loc; - this = args->this; - THIS = this; - - ret = mq_inode_ctx_get (loc->inode, this, &ctx); - if (ret < 0) { - gf_log (this->name, GF_LOG_WARNING, "Failed to" - "get inode ctx, aborting quota create txn"); - goto out; - } + int32_t ret = -1; + gf_boolean_t locked = _gf_false; + gf_boolean_t contri_set = _gf_false; + gf_boolean_t size_set = _gf_false; + gf_boolean_t need_txn = _gf_false; + quota_synctask_t *args = NULL; + quota_inode_ctx_t *ctx = NULL; + xlator_t *this = NULL; + loc_t *loc = NULL; + gf_boolean_t status = _gf_false; + + GF_ASSERT(opaque); + + args = (quota_synctask_t *)opaque; + loc = &args->loc; + this = args->this; + THIS = this; + + ret = mq_inode_ctx_get(loc->inode, this, &ctx); + if (ret < 0) { + gf_log(this->name, GF_LOG_WARNING, + "Failed to" + "get inode ctx, aborting quota create txn"); + goto out; + } - if (loc->inode->ia_type == IA_IFDIR) { - /* lock not required for files */ - ret = mq_lock (this, loc, F_WRLCK); - if (ret < 0) - goto out; - locked = _gf_true; - } + if (loc->inode->ia_type == IA_IFDIR) { + /* lock not required for files */ + ret = mq_lock(this, loc, F_WRLCK); + if (ret < 0) + goto out; + locked = _gf_true; + } - ret = mq_are_xattrs_set (this, loc, &contri_set, &size_set); - if (ret < 0 || (contri_set && size_set)) - goto out; + ret = mq_are_xattrs_set(this, loc, &contri_set, &size_set); + if (ret < 0 || (contri_set && size_set)) + goto out; - mq_set_ctx_create_status (ctx, _gf_false); - status = _gf_true; + mq_set_ctx_create_status(ctx, _gf_false); + status = _gf_true; - if (loc->inode->ia_type == IA_IFDIR && size_set == _gf_false) { - ret = mq_create_size_xattrs (this, ctx, loc); - if (ret < 0) - goto out; - } + if (loc->inode->ia_type == IA_IFDIR && size_set == _gf_false) { + ret = mq_create_size_xattrs(this, ctx, loc); + if (ret < 0) + goto out; + } - need_txn = _gf_true; + need_txn = _gf_true; out: - if (locked) - ret = mq_lock (this, loc, F_UNLCK); + if (locked) + ret = mq_lock(this, loc, F_UNLCK); - if (status == _gf_false) - mq_set_ctx_create_status (ctx, _gf_false); + if (status == _gf_false) + mq_set_ctx_create_status(ctx, _gf_false); - if (need_txn) - ret = mq_initiate_quota_blocking_txn (this, loc, NULL); + if (need_txn) + ret = mq_initiate_quota_blocking_txn(this, loc, NULL); - return ret; + return ret; } static int -_mq_create_xattrs_txn (xlator_t *this, loc_t *origin_loc, struct iatt *buf, - gf_boolean_t spawn) +_mq_create_xattrs_txn(xlator_t *this, loc_t *origin_loc, struct iatt *buf, + gf_boolean_t spawn) { - int32_t ret = -1; - quota_inode_ctx_t *ctx = NULL; - gf_boolean_t status = _gf_true; - loc_t loc = {0, }; - inode_contribution_t *contribution = NULL; - - ret = mq_prevalidate_txn (this, origin_loc, &loc, &ctx, buf); - if (ret < 0) - goto out; + int32_t ret = -1; + quota_inode_ctx_t *ctx = NULL; + gf_boolean_t status = _gf_true; + loc_t loc = { + 0, + }; + inode_contribution_t *contribution = NULL; + + ret = mq_prevalidate_txn(this, origin_loc, &loc, &ctx, buf); + if (ret < 0) + goto out; - ret = mq_test_and_set_ctx_create_status (ctx, &status); - if (ret < 0 || status == _gf_true) - goto out; + ret = mq_test_and_set_ctx_create_status(ctx, &status); + if (ret < 0 || status == _gf_true) + goto out; - if (!loc_is_root(&loc)) { - contribution = mq_add_new_contribution_node (this, ctx, &loc); - if (contribution == NULL) { - gf_log (this->name, GF_LOG_WARNING, - "cannot add a new contribution node " - "(%s)", uuid_utoa (loc.gfid)); - ret = -1; - goto out; - } else { - GF_REF_PUT (contribution); - } + if (!loc_is_root(&loc) && loc.parent) { + contribution = mq_add_new_contribution_node(this, ctx, &loc); + if (contribution == NULL) { + gf_log(this->name, GF_LOG_WARNING, + "cannot add a new contribution node " + "(%s)", + uuid_utoa(loc.gfid)); + ret = -1; + goto out; + } else { + GF_REF_PUT(contribution); } + } - ret = mq_synctask (this, mq_create_xattrs_task, spawn, &loc); + ret = mq_synctask(this, mq_create_xattrs_task, spawn, &loc); out: - if (ret < 0 && status == _gf_false) - mq_set_ctx_create_status (ctx, _gf_false); + if (ret < 0 && status == _gf_false) + mq_set_ctx_create_status(ctx, _gf_false); - loc_wipe (&loc); - return ret; + loc_wipe(&loc); + return ret; } int -mq_create_xattrs_txn (xlator_t *this, loc_t *loc, struct iatt *buf) +mq_create_xattrs_txn(xlator_t *this, loc_t *loc, struct iatt *buf) { - int32_t ret = -1; + int32_t ret = -1; - GF_VALIDATE_OR_GOTO ("marker", loc, out); - GF_VALIDATE_OR_GOTO ("marker", loc->inode, out); + GF_VALIDATE_OR_GOTO("marker", loc, out); + GF_VALIDATE_OR_GOTO("marker", loc->inode, out); - ret = _mq_create_xattrs_txn (this, loc, buf, _gf_true); + ret = _mq_create_xattrs_txn(this, loc, buf, _gf_true); out: - return ret; -} - -int -mq_create_xattrs_blocking_txn (xlator_t *this, loc_t *loc, struct iatt *buf) -{ - int32_t ret = -1; - - GF_VALIDATE_OR_GOTO ("marker", loc, out); - GF_VALIDATE_OR_GOTO ("marker", loc->inode, out); - - ret = _mq_create_xattrs_txn (this, loc, buf, _gf_false); -out: - return ret; + return ret; } int32_t -mq_reduce_parent_size_task (void *opaque) +mq_reduce_parent_size_task(void *opaque) { - int32_t ret = -1; - int32_t prev_dirty = 0; - quota_inode_ctx_t *ctx = NULL; - quota_inode_ctx_t *parent_ctx = NULL; - inode_contribution_t *contribution = NULL; - quota_meta_t delta = {0, }; - quota_meta_t contri = {0, }; - loc_t parent_loc = {0,}; - gf_boolean_t locked = _gf_false; - gf_boolean_t dirty = _gf_false; - quota_synctask_t *args = NULL; - xlator_t *this = NULL; - loc_t *loc = NULL; - gf_boolean_t remove_xattr = _gf_true; - uint32_t nlink = 0; - - GF_ASSERT (opaque); - - args = (quota_synctask_t *) opaque; - loc = &args->loc; - contri = args->contri; - nlink = args->ia_nlink; - this = args->this; - THIS = this; - - ret = mq_inode_loc_fill (NULL, loc->parent, &parent_loc); + int32_t ret = -1; + int32_t prev_dirty = 0; + quota_inode_ctx_t *ctx = NULL; + quota_inode_ctx_t *parent_ctx = NULL; + inode_contribution_t *contribution = NULL; + quota_meta_t delta = { + 0, + }; + quota_meta_t contri = { + 0, + }; + loc_t parent_loc = { + 0, + }; + gf_boolean_t locked = _gf_false; + gf_boolean_t dirty = _gf_false; + quota_synctask_t *args = NULL; + xlator_t *this = NULL; + loc_t *loc = NULL; + gf_boolean_t remove_xattr = _gf_true; + uint32_t nlink = 0; + + GF_ASSERT(opaque); + + args = (quota_synctask_t *)opaque; + loc = &args->loc; + contri = args->contri; + nlink = args->ia_nlink; + this = args->this; + THIS = this; + + ret = mq_inode_loc_fill(NULL, loc->parent, &parent_loc); + if (ret < 0) { + gf_log(this->name, GF_LOG_ERROR, + "parent_loc fill failed for " + "child inode %s: ", + uuid_utoa(loc->inode->gfid)); + goto out; + } + + ret = mq_lock(this, &parent_loc, F_WRLCK); + if (ret < 0) + goto out; + locked = _gf_true; + + if (contri.size >= 0) { + /* contri parameter is supplied only for rename operation. + * remove xattr is alreday performed, we need to skip + * removexattr for rename operation + */ + remove_xattr = _gf_false; + delta.size = contri.size; + delta.file_count = contri.file_count; + delta.dir_count = contri.dir_count; + } else { + remove_xattr = _gf_true; + + ret = mq_inode_ctx_get(loc->inode, this, &ctx); if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, "loc fill failed"); - goto out; + gf_log_callingfn(this->name, GF_LOG_WARNING, + "ctx for" + " the node %s is NULL", + loc->path); + goto out; } - ret = mq_lock (this, &parent_loc, F_WRLCK); - if (ret < 0) - goto out; - locked = _gf_true; + contribution = mq_get_contribution_node(loc->parent, ctx); + if (contribution == NULL) { + ret = -1; + gf_log(this->name, GF_LOG_DEBUG, + "contribution for the node %s is NULL", loc->path); + goto out; + } - if (contri.size >= 0) { - /* contri paramater is supplied only for rename operation. - * remove xattr is alreday performed, we need to skip - * removexattr for rename operation - */ - remove_xattr = _gf_false; - delta.size = contri.size; - delta.file_count = contri.file_count; - delta.dir_count = contri.dir_count; - } else { - remove_xattr = _gf_true; - - ret = mq_inode_ctx_get (loc->inode, this, &ctx); - if (ret < 0) { - gf_log_callingfn (this->name, GF_LOG_WARNING, "ctx for" - " the node %s is NULL", loc->path); - goto out; - } - - contribution = mq_get_contribution_node (loc->parent, ctx); - if (contribution == NULL) { - ret = -1; - gf_log (this->name, GF_LOG_DEBUG, - "contribution for the node %s is NULL", - loc->path); - goto out; - } - - LOCK (&contribution->lock); - { - delta.size = contribution->contribution; - delta.file_count = contribution->file_count; - delta.dir_count = contribution->dir_count; - } - UNLOCK (&contribution->lock); + LOCK(&contribution->lock); + { + delta.size = contribution->contribution; + delta.file_count = contribution->file_count; + delta.dir_count = contribution->dir_count; } + UNLOCK(&contribution->lock); + } - ret = mq_get_set_dirty (this, &parent_loc, 1, &prev_dirty); - if (ret < 0) - goto out; - dirty = _gf_true; + ret = mq_get_set_dirty(this, &parent_loc, 1, &prev_dirty); + if (ret < 0) + goto out; + dirty = _gf_true; - mq_sub_meta (&delta, NULL); + mq_sub_meta(&delta, NULL); - if (remove_xattr) { - ret = mq_remove_contri (this, loc, ctx, contribution, &delta, - nlink); - if (ret < 0) - goto out; - } + if (remove_xattr) { + ret = mq_remove_contri(this, loc, ctx, contribution, &delta, nlink); + if (ret < 0) + goto out; + } - if (quota_meta_is_null (&delta)) - goto out; + if (quota_meta_is_null(&delta)) + goto out; - ret = mq_update_size (this, &parent_loc, &delta); - if (ret < 0) - goto out; + ret = mq_update_size(this, &parent_loc, &delta); + if (ret < 0) + goto out; out: - if (dirty) { - if (ret < 0 || prev_dirty) { - /* On failure clear dirty status flag. - * In the next lookup inspect_directory_xattr - * can set the status flag and fix the - * dirty directory. - * Do the same if dir was dirty before - * the txn - */ - ret = mq_inode_ctx_get (parent_loc.inode, this, - &parent_ctx); - mq_set_ctx_dirty_status (parent_ctx, _gf_false); - } else { - ret = mq_mark_dirty (this, &parent_loc, 0); - } + if (dirty) { + if (ret < 0 || prev_dirty) { + /* On failure clear dirty status flag. + * In the next lookup inspect_directory_xattr + * can set the status flag and fix the + * dirty directory. + * Do the same if dir was dirty before + * the txn + */ + ret = mq_inode_ctx_get(parent_loc.inode, this, &parent_ctx); + if (ret == 0) + mq_set_ctx_dirty_status(parent_ctx, _gf_false); + } else { + ret = mq_mark_dirty(this, &parent_loc, 0); } + } - if (locked) - ret = mq_lock (this, &parent_loc, F_UNLCK); + if (locked) + ret = mq_lock(this, &parent_loc, F_UNLCK); - if (ret >= 0) - ret = mq_initiate_quota_blocking_txn (this, &parent_loc, NULL); + if (ret >= 0) + ret = mq_initiate_quota_blocking_txn(this, &parent_loc, NULL); - loc_wipe (&parent_loc); + loc_wipe(&parent_loc); - if (contribution) - GF_REF_PUT (contribution); + if (contribution) + GF_REF_PUT(contribution); - return ret; + return ret; } int32_t -mq_reduce_parent_size_txn (xlator_t *this, loc_t *origin_loc, - quota_meta_t *contri, uint32_t nlink) +mq_reduce_parent_size_txn(xlator_t *this, loc_t *origin_loc, + quota_meta_t *contri, uint32_t nlink, + call_stub_t *stub) { - int32_t ret = -1; - loc_t loc = {0, }; + int32_t ret = -1; + loc_t loc = { + 0, + }; + gf_boolean_t resume_stub = _gf_true; - GF_VALIDATE_OR_GOTO ("marker", this, out); - GF_VALIDATE_OR_GOTO ("marker", origin_loc, out); + GF_VALIDATE_OR_GOTO("marker", this, out); + GF_VALIDATE_OR_GOTO("marker", origin_loc, out); - ret = mq_prevalidate_txn (this, origin_loc, &loc, NULL, NULL); - if (ret < 0) - goto out; + ret = mq_prevalidate_txn(this, origin_loc, &loc, NULL, NULL); + if (ret < 0) + goto out; - if (loc_is_root(&loc)) { - ret = 0; - goto out; - } + if (loc_is_root(&loc)) { + ret = 0; + goto out; + } - ret = mq_synctask1 (this, mq_reduce_parent_size_task, _gf_true, &loc, - contri, nlink); + resume_stub = _gf_false; + ret = mq_synctask1(this, mq_reduce_parent_size_task, _gf_true, &loc, contri, + nlink, stub); out: - loc_wipe (&loc); - return ret; + loc_wipe(&loc); + + if (resume_stub && stub) + call_resume(stub); + + if (ret) + gf_log_callingfn(this ? this->name : "Marker", GF_LOG_ERROR, + "mq_reduce_parent_size_txn failed"); + + return ret; } int -mq_initiate_quota_task (void *opaque) +mq_initiate_quota_task(void *opaque) { - int32_t ret = -1; - int32_t prev_dirty = 0; - loc_t child_loc = {0,}; - loc_t parent_loc = {0,}; - gf_boolean_t locked = _gf_false; - gf_boolean_t dirty = _gf_false; - gf_boolean_t status = _gf_false; - quota_meta_t delta = {0, }; - quota_synctask_t *args = NULL; - xlator_t *this = NULL; - loc_t *loc = NULL; - inode_contribution_t *contri = NULL; - quota_inode_ctx_t *ctx = NULL; - quota_inode_ctx_t *parent_ctx = NULL; - inode_t *tmp_parent = NULL; - - GF_VALIDATE_OR_GOTO ("marker", opaque, out); - - args = (quota_synctask_t *) opaque; - loc = &args->loc; - this = args->this; - - GF_VALIDATE_OR_GOTO ("marker", this, out); - THIS = this; - - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, loc->inode, out); - - ret = mq_loc_copy (&child_loc, loc); + int32_t ret = -1; + int32_t prev_dirty = 0; + loc_t child_loc = { + 0, + }; + loc_t parent_loc = { + 0, + }; + gf_boolean_t locked = _gf_false; + gf_boolean_t dirty = _gf_false; + gf_boolean_t status = _gf_false; + quota_meta_t delta = { + 0, + }; + quota_synctask_t *args = NULL; + xlator_t *this = NULL; + loc_t *loc = NULL; + inode_contribution_t *contri = NULL; + quota_inode_ctx_t *ctx = NULL; + quota_inode_ctx_t *parent_ctx = NULL; + inode_t *tmp_parent = NULL; + + GF_VALIDATE_OR_GOTO("marker", opaque, out); + + args = (quota_synctask_t *)opaque; + loc = &args->loc; + this = args->this; + + GF_VALIDATE_OR_GOTO("marker", this, out); + THIS = this; + + GF_VALIDATE_OR_GOTO(this->name, loc, out); + GF_VALIDATE_OR_GOTO(this->name, loc->inode, out); + + ret = mq_loc_copy(&child_loc, loc); + if (ret < 0) { + gf_log(this->name, GF_LOG_ERROR, "loc copy failed"); + goto out; + } + + while (!__is_root_gfid(child_loc.gfid)) { + ret = mq_inode_ctx_get(child_loc.inode, this, &ctx); if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, "loc copy failed"); + gf_log(this->name, GF_LOG_WARNING, + "inode ctx get failed for %s, " + "aborting update txn", + child_loc.path); + goto out; + } + + /* To improve performance, abort current transaction + * if one is already in progress for same inode + */ + if (status == _gf_true) { + /* status will already set before txn start, + * so it should not be set in first + * loop iteration + */ + ret = mq_test_and_set_ctx_updation_status(ctx, &status); + if (ret < 0 || status == _gf_true) + goto out; + } + + if (child_loc.parent == NULL) { + ret = mq_build_ancestry(this, &child_loc); + if (ret < 0 || child_loc.parent == NULL) { + /* If application performs parallel remove + * operations on same set of files/directories + * then we may get ENOENT/ESTALE + */ + gf_log(this->name, + (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG + : GF_LOG_ERROR, + "build ancestry failed for inode %s", + uuid_utoa(child_loc.inode->gfid)); + ret = -1; goto out; + } + } + + ret = mq_inode_loc_fill(NULL, child_loc.parent, &parent_loc); + if (ret < 0) { + gf_log(this->name, GF_LOG_ERROR, + "parent_loc fill " + "failed for child inode %s: ", + uuid_utoa(child_loc.inode->gfid)); + goto out; } - while (!__is_root_gfid (child_loc.gfid)) { + ret = mq_lock(this, &parent_loc, F_WRLCK); + if (ret < 0) + goto out; + locked = _gf_true; - ret = mq_inode_ctx_get (child_loc.inode, this, &ctx); - if (ret < 0) { - gf_log (this->name, GF_LOG_WARNING, - "inode ctx get failed for %s, " - "aborting update txn", child_loc.path); - goto out; - } + mq_set_ctx_updation_status(ctx, _gf_false); + status = _gf_true; - /* To improve performance, abort current transaction - * if one is already in progress for same inode + /* Contribution node can be NULL in below scenarios and + create if needed: + + Scenario 1) + In this case create a new contribution node + Suppose hard link for a file f1 present in a directory d1 is + created in the directory d2 (as f2). Now, since d2's + contribution is not there in f1's inode ctx, d2's + contribution xattr won't be created and will create problems + for quota operations. + + Don't create contribution if parent has been changed after + taking a lock, this can happen when rename is performed + and writes is still in-progress for the same file + + Scenario 2) + When a rename operation is performed, contribution node + for olp path will be removed. + + Create contribution node only if oldparent is same as + newparent. + Consider below example + 1) rename FOP invoked on file 'x' + 2) write is still in progress for file 'x' + 3) rename takes a lock on old-parent + 4) write-update txn blocked on old-parent to acquire lock + 5) in rename_cbk, contri xattrs are removed and contribution + is deleted and lock is released + 6) now write-update txn gets the lock and updates the + wrong parent as it was holding lock on old parent + so validate parent once the lock is acquired + + For more information on this problem, please see + doc for marker_rename in file marker.c + */ + contri = mq_get_contribution_node(child_loc.parent, ctx); + if (contri == NULL) { + tmp_parent = inode_parent(child_loc.inode, 0, NULL); + if (tmp_parent == NULL) { + /* This can happen if application performs + * parallel remove operations on same set + * of files/directories */ - if (status == _gf_true) { - /* status will alreday set before txn start, - * so it should not be set in first - * loop iteration - */ - ret = mq_test_and_set_ctx_updation_status (ctx, - &status); - if (ret < 0 || status == _gf_true) - goto out; - } - - ret = mq_inode_loc_fill (NULL, child_loc.parent, &parent_loc); - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, "loc fill failed"); - goto out; - } - - ret = mq_lock (this, &parent_loc, F_WRLCK); - if (ret < 0) - goto out; - locked = _gf_true; - - mq_set_ctx_updation_status (ctx, _gf_false); - status = _gf_true; - - /* Contribution node can be NULL in below scenarios and - create if needed: - - Scenario 1) - In this case create a new contribution node - Suppose hard link for a file f1 present in a directory d1 is - created in the directory d2 (as f2). Now, since d2's - contribution is not there in f1's inode ctx, d2's - contribution xattr wont be created and will create problems - for quota operations. - - Don't create contribution if parent has been changed after - taking a lock, this can happen when rename is performed - and writes is still in-progress for the same file - - Scenario 2) - When a rename operation is performed, contribution node - for olp path will be removed. - - Create contribution node only if oldparent is same as - newparent. - Consider below example - 1) rename FOP invoked on file 'x' - 2) write is still in progress for file 'x' - 3) rename takes a lock on old-parent - 4) write-update txn blocked on old-parent to acquire lock - 5) in rename_cbk, contri xattrs are removed and contribution - is deleted and lock is released - 6) now write-update txn gets the lock and updates the - wrong parent as it was holding lock on old parent - so validate parent once the lock is acquired - - For more information on thsi problem, please see - doc for marker_rename in file marker.c - */ - contri = mq_get_contribution_node (child_loc.parent, ctx); - if (contri == NULL) { - tmp_parent = inode_parent (child_loc.inode, 0, NULL); - if (tmp_parent == NULL) { - ret = -1; - goto out; - } - if (gf_uuid_compare(tmp_parent->gfid, - parent_loc.gfid)) { - /* abort txn if parent has changed */ - ret = 0; - goto out; - } - - inode_unref (tmp_parent); - tmp_parent = NULL; - - contri = mq_add_new_contribution_node (this, ctx, - &child_loc); - if (contri == NULL) { - gf_log (this->name, GF_LOG_ERROR, "Failed to " - "create contribution node for %s, " - "abort update txn", child_loc.path); - ret = -1; - goto out; - } - } - - ret = mq_get_delta (this, &child_loc, &delta, ctx, contri); - if (ret < 0) - goto out; - - if (quota_meta_is_null (&delta)) - goto out; - - prev_dirty = 0; - ret = mq_get_set_dirty (this, &parent_loc, 1, &prev_dirty); - if (ret < 0) - goto out; - dirty = _gf_true; - - ret = mq_update_contri (this, &child_loc, contri, &delta); - if (ret < 0) - goto out; - - ret = mq_update_size (this, &parent_loc, &delta); - if (ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, "rollback " - "contri updation"); - mq_sub_meta (&delta, NULL); - mq_update_contri (this, &child_loc, contri, &delta); - goto out; - } - - if (prev_dirty == 0) { - ret = mq_mark_dirty (this, &parent_loc, 0); - dirty = _gf_false; - } - - ret = mq_lock (this, &parent_loc, F_UNLCK); - locked = _gf_false; - - if (__is_root_gfid (parent_loc.gfid)) - break; - - /* Repeate above steps upwards till the root */ - loc_wipe (&child_loc); - ret = mq_loc_copy (&child_loc, &parent_loc); - if (ret < 0) - goto out; - - loc_wipe (&parent_loc); - GF_REF_PUT (contri); - contri = NULL; + gf_log(this->name, GF_LOG_WARNING, + "parent is " + "NULL for inode %s", + uuid_utoa(child_loc.inode->gfid)); + ret = -1; + goto out; + } + if (gf_uuid_compare(tmp_parent->gfid, parent_loc.gfid)) { + /* abort txn if parent has changed */ + ret = 0; + goto out; + } + + inode_unref(tmp_parent); + tmp_parent = NULL; + + contri = mq_add_new_contribution_node(this, ctx, &child_loc); + if (contri == NULL) { + gf_log(this->name, GF_LOG_ERROR, + "Failed to " + "create contribution node for %s, " + "abort update txn", + child_loc.path); + ret = -1; + goto out; + } } -out: - if (dirty) { - if (ret < 0 || prev_dirty) { - /* On failure clear dirty status flag. - * In the next lookup inspect_directory_xattr - * can set the status flag and fix the - * dirty directory. - * Do the same if the dir was dirty before - * txn - */ - ret = mq_inode_ctx_get (parent_loc.inode, this, - &parent_ctx); - mq_set_ctx_dirty_status (parent_ctx, _gf_false); - } else { - ret = mq_mark_dirty (this, &parent_loc, 0); - } + ret = mq_get_delta(this, &child_loc, &delta, ctx, contri); + if (ret < 0) + goto out; + + if (quota_meta_is_null(&delta)) + goto out; + + ret = mq_get_set_dirty(this, &parent_loc, 1, &prev_dirty); + if (ret < 0) + goto out; + dirty = _gf_true; + + ret = mq_update_contri(this, &child_loc, contri, &delta); + if (ret < 0) + goto out; + + ret = mq_update_size(this, &parent_loc, &delta); + if (ret < 0) { + gf_log(this->name, GF_LOG_DEBUG, + "rollback " + "contri updation"); + mq_sub_meta(&delta, NULL); + mq_update_contri(this, &child_loc, contri, &delta); + goto out; } - if (locked) - ret = mq_lock (this, &parent_loc, F_UNLCK); + if (prev_dirty == 0) { + ret = mq_mark_dirty(this, &parent_loc, 0); + } else { + ret = mq_inode_ctx_get(parent_loc.inode, this, &parent_ctx); + if (ret == 0) + mq_set_ctx_dirty_status(parent_ctx, _gf_false); + } + dirty = _gf_false; + prev_dirty = 0; - if (ctx && status == _gf_false) - mq_set_ctx_updation_status (ctx, _gf_false); + ret = mq_lock(this, &parent_loc, F_UNLCK); + locked = _gf_false; - loc_wipe (&child_loc); - loc_wipe (&parent_loc); + if (__is_root_gfid(parent_loc.gfid)) + break; - if (tmp_parent) - inode_unref (tmp_parent); + /* Repeate above steps upwards till the root */ + loc_wipe(&child_loc); + ret = mq_loc_copy(&child_loc, &parent_loc); + if (ret < 0) + goto out; - if (contri) - GF_REF_PUT (contri); + loc_wipe(&parent_loc); + GF_REF_PUT(contri); + contri = NULL; + } - return 0; +out: + if ((dirty) && (ret < 0)) { + /* On failure clear dirty status flag. + * In the next lookup inspect_directory_xattr + * can set the status flag and fix the + * dirty directory. + * Do the same if the dir was dirty before + * txn + */ + ret = mq_inode_ctx_get(parent_loc.inode, this, &parent_ctx); + if (ret == 0) + mq_set_ctx_dirty_status(parent_ctx, _gf_false); + } + + if (locked) + ret = mq_lock(this, &parent_loc, F_UNLCK); + + if (ctx && status == _gf_false) + mq_set_ctx_updation_status(ctx, _gf_false); + + loc_wipe(&child_loc); + loc_wipe(&parent_loc); + + if (tmp_parent) + inode_unref(tmp_parent); + + if (contri) + GF_REF_PUT(contri); + + return 0; } int -_mq_initiate_quota_txn (xlator_t *this, loc_t *origin_loc, struct iatt *buf, - gf_boolean_t spawn) +_mq_initiate_quota_txn(xlator_t *this, loc_t *origin_loc, struct iatt *buf, + gf_boolean_t spawn) { - int32_t ret = -1; - quota_inode_ctx_t *ctx = NULL; - gf_boolean_t status = _gf_true; - loc_t loc = {0,}; - - ret = mq_prevalidate_txn (this, origin_loc, &loc, &ctx, buf); - if (ret < 0) - goto out; + int32_t ret = -1; + quota_inode_ctx_t *ctx = NULL; + gf_boolean_t status = _gf_true; + loc_t loc = { + 0, + }; + + ret = mq_prevalidate_txn(this, origin_loc, &loc, &ctx, buf); + if (ret < 0) + goto out; - if (loc_is_root(&loc)) { - ret = 0; - goto out; - } + if (loc_is_root(&loc)) { + ret = 0; + goto out; + } - ret = mq_test_and_set_ctx_updation_status (ctx, &status); - if (ret < 0 || status == _gf_true) - goto out; + ret = mq_test_and_set_ctx_updation_status(ctx, &status); + if (ret < 0 || status == _gf_true) + goto out; - ret = mq_synctask (this, mq_initiate_quota_task, spawn, &loc); + ret = mq_synctask(this, mq_initiate_quota_task, spawn, &loc); out: - if (ret < 0 && status == _gf_false) - mq_set_ctx_updation_status (ctx, _gf_false); + if (ret < 0 && status == _gf_false) + mq_set_ctx_updation_status(ctx, _gf_false); - loc_wipe (&loc); - return ret; + loc_wipe(&loc); + return ret; } int -mq_initiate_quota_txn (xlator_t *this, loc_t *loc, struct iatt *buf) +mq_initiate_quota_txn(xlator_t *this, loc_t *loc, struct iatt *buf) { - int32_t ret = -1; + int32_t ret = -1; - GF_VALIDATE_OR_GOTO ("marker", this, out); - GF_VALIDATE_OR_GOTO ("marker", loc, out); - GF_VALIDATE_OR_GOTO ("marker", loc->inode, out); + GF_VALIDATE_OR_GOTO("marker", this, out); + GF_VALIDATE_OR_GOTO("marker", loc, out); + GF_VALIDATE_OR_GOTO("marker", loc->inode, out); - ret = _mq_initiate_quota_txn (this, loc, buf, _gf_true); + ret = _mq_initiate_quota_txn(this, loc, buf, _gf_true); out: - return ret; + return ret; } int -mq_initiate_quota_blocking_txn (xlator_t *this, loc_t *loc, struct iatt *buf) +mq_initiate_quota_blocking_txn(xlator_t *this, loc_t *loc, struct iatt *buf) { - int32_t ret = -1; + int32_t ret = -1; - GF_VALIDATE_OR_GOTO ("marker", this, out); - GF_VALIDATE_OR_GOTO ("marker", loc, out); - GF_VALIDATE_OR_GOTO ("marker", loc->inode, out); + GF_VALIDATE_OR_GOTO("marker", this, out); + GF_VALIDATE_OR_GOTO("marker", loc, out); + GF_VALIDATE_OR_GOTO("marker", loc->inode, out); - ret = _mq_initiate_quota_txn (this, loc, buf, _gf_false); + ret = _mq_initiate_quota_txn(this, loc, buf, _gf_false); out: - return ret; + return ret; } int -mq_update_dirty_inode_task (void *opaque) +mq_update_dirty_inode_task(void *opaque) { - int32_t ret = -1; - fd_t *fd = NULL; - off_t offset = 0; - loc_t child_loc = {0, }; - gf_dirent_t entries; - gf_dirent_t *entry = NULL; - gf_boolean_t locked = _gf_false; - gf_boolean_t free_entries = _gf_false; - gf_boolean_t updated = _gf_false; - int32_t dirty = 0; - quota_meta_t contri = {0, }; - quota_meta_t size = {0, }; - quota_meta_t contri_sum = {0, }; - quota_meta_t delta = {0, }; - quota_synctask_t *args = NULL; - xlator_t *this = NULL; - loc_t *loc = NULL; - quota_inode_ctx_t *ctx = NULL; - - GF_ASSERT (opaque); - - args = (quota_synctask_t *) opaque; - loc = &args->loc; - this = args->this; - THIS = this; - - ret = mq_inode_ctx_get (loc->inode, this, &ctx); - if (ret < 0) - goto out; + int32_t ret = -1; + fd_t *fd = NULL; + off_t offset = 0; + gf_dirent_t entries; + gf_dirent_t *entry = NULL; + gf_boolean_t locked = _gf_false; + gf_boolean_t updated = _gf_false; + int32_t dirty = 0; + quota_meta_t contri = { + 0, + }; + quota_meta_t size = { + 0, + }; + quota_meta_t contri_sum = { + 0, + }; + quota_meta_t delta = { + 0, + }; + quota_synctask_t *args = NULL; + xlator_t *this = NULL; + loc_t *loc = NULL; + quota_inode_ctx_t *ctx = NULL; + dict_t *xdata = NULL; + char contri_key[QUOTA_KEY_MAX] = { + 0, + }; + int keylen = 0; + + GF_ASSERT(opaque); + + args = (quota_synctask_t *)opaque; + loc = &args->loc; + this = args->this; + THIS = this; + INIT_LIST_HEAD(&entries.list); + + ret = mq_inode_ctx_get(loc->inode, this, &ctx); + if (ret < 0) + goto out; - ret = mq_lock (this, loc, F_WRLCK); - if (ret < 0) - goto out; - locked = _gf_true; + GET_CONTRI_KEY(this, contri_key, loc->gfid, keylen); + if (keylen < 0) { + ret = keylen; + goto out; + } - ret = mq_get_dirty (this, loc, &dirty); - if (ret < 0 || dirty == 0) { - ret = 0; - goto out; - } + xdata = dict_new(); + if (xdata == NULL) { + gf_log(this->name, GF_LOG_ERROR, "dict_new failed"); + ret = -1; + goto out; + } - fd = fd_create (loc->inode, 0); - if (!fd) { - gf_log (this->name, GF_LOG_ERROR, "Failed to create fd"); - ret = -1; - goto out; - } + ret = dict_set_int64(xdata, contri_key, 0); + if (ret < 0) { + gf_log(this->name, GF_LOG_ERROR, "dict_set failed"); + goto out; + } + + ret = mq_lock(this, loc, F_WRLCK); + if (ret < 0) + goto out; + locked = _gf_true; + + ret = mq_get_dirty(this, loc, &dirty); + if (ret < 0 || dirty == 0) { + ret = 0; + goto out; + } + + fd = fd_create(loc->inode, 0); + if (!fd) { + gf_log(this->name, GF_LOG_ERROR, "Failed to create fd"); + ret = -1; + goto out; + } + + ret = syncop_opendir(this, loc, fd, NULL, NULL); + if (ret < 0) { + gf_log(this->name, + (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG : GF_LOG_ERROR, + "opendir failed " + "for %s: %s", + loc->path, strerror(-ret)); + goto out; + } - ret = syncop_opendir (this, loc, fd, NULL, NULL); + fd_bind(fd); + while ((ret = syncop_readdirp(this, fd, 131072, offset, &entries, xdata, + NULL)) != 0) { if (ret < 0) { - gf_log (this->name, (-ret == ENOENT || -ret == ESTALE) - ? GF_LOG_DEBUG:GF_LOG_ERROR, "opendir failed " - "for %s: %s", loc->path, strerror (-ret)); - goto out; + gf_log(this->name, + (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG + : GF_LOG_ERROR, + "readdirp failed " + "for %s: %s", + loc->path, strerror(-ret)); + goto out; } - fd_bind (fd); - INIT_LIST_HEAD (&entries.list); - while ((ret = syncop_readdirp (this, fd, 131072, offset, &entries, - NULL, NULL)) != 0) { - if (ret < 0) { - gf_log (this->name, (-ret == ENOENT || -ret == ESTALE) - ? GF_LOG_DEBUG:GF_LOG_ERROR, "readdirp failed " - "for %s: %s", loc->path, strerror (-ret)); - goto out; - } - - if (list_empty (&entries.list)) - break; - - free_entries = _gf_true; - list_for_each_entry (entry, &entries.list, list) { - offset = entry->d_off; - - if (!strcmp (entry->d_name, ".") || - !strcmp (entry->d_name, "..")) - continue; - - ret = loc_build_child (&child_loc, loc, entry->d_name); - if (ret < 0) { - gf_log (this->name, GF_LOG_WARNING, - "Couldn't build loc for %s/%s " - "returning from updation of dirty " - "inode", loc->path, entry->d_name); - goto out; - } - - ret = mq_get_contri (this, &child_loc, &contri, - loc->gfid); - if (ret < 0) - goto out; - - mq_add_meta (&contri_sum, &contri); - loc_wipe (&child_loc); - } - - gf_dirent_free (&entries); - free_entries = _gf_false; + if (list_empty(&entries.list)) + break; + + list_for_each_entry(entry, &entries.list, list) + { + offset = entry->d_off; + + if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) + continue; + + memset(&contri, 0, sizeof(contri)); + quota_dict_get_meta(entry->dict, contri_key, keylen, &contri); + if (quota_meta_is_null(&contri)) + continue; + + mq_add_meta(&contri_sum, &contri); } - /* Inculde for self */ - contri_sum.dir_count++; - ret = mq_get_size (this, loc, &size); - if (ret < 0) - goto out; + gf_dirent_free(&entries); + } + /* Inculde for self */ + contri_sum.dir_count++; - mq_compute_delta (&delta, &contri_sum, &size); + ret = _mq_get_metadata(this, loc, NULL, &size, 0); + if (ret < 0) + goto out; - if (quota_meta_is_null (&delta)) - goto out; + mq_compute_delta(&delta, &contri_sum, &size); - gf_log (this->name, GF_LOG_INFO, "calculated size = %"PRId64 - ", original size = %"PRIu64 ", diff = %"PRIu64 - ", path = %s ", contri_sum.size, size.size, delta.size, - loc->path); + if (quota_meta_is_null(&delta)) + goto out; - gf_log (this->name, GF_LOG_INFO, "calculated f_count = %"PRId64 - ", original f_count = %"PRIu64 ", diff = %"PRIu64 - ", path = %s ", contri_sum.file_count, size.file_count, - delta.file_count, loc->path); + gf_log(this->name, GF_LOG_INFO, + "calculated size = %" PRId64 ", original size = %" PRIu64 + ", diff = %" PRIu64 ", path = %s ", + contri_sum.size, size.size, delta.size, loc->path); - gf_log (this->name, GF_LOG_INFO, "calculated d_count = %"PRId64 - ", original d_count = %"PRIu64 ", diff = %"PRIu64 - ", path = %s ", contri_sum.dir_count, size.dir_count, - delta.dir_count, loc->path); + gf_log(this->name, GF_LOG_INFO, + "calculated f_count = %" PRId64 ", original f_count = %" PRIu64 + ", diff = %" PRIu64 ", path = %s ", + contri_sum.file_count, size.file_count, delta.file_count, loc->path); + gf_log(this->name, GF_LOG_INFO, + "calculated d_count = %" PRId64 ", original d_count = %" PRIu64 + ", diff = %" PRIu64 ", path = %s ", + contri_sum.dir_count, size.dir_count, delta.dir_count, loc->path); - ret = mq_update_size (this, loc, &delta); - if (ret < 0) - goto out; + ret = mq_update_size(this, loc, &delta); + if (ret < 0) + goto out; - updated = _gf_true; + updated = _gf_true; out: - if (free_entries) - gf_dirent_free (&entries); + gf_dirent_free(&entries); - if (fd) - fd_unref (fd); + if (fd) + fd_unref(fd); - if (ret < 0) { - /* On failure clear dirty status flag. - * In the next lookup inspect_directory_xattr - * can set the status flag and fix the - * dirty directory - */ - mq_set_ctx_dirty_status (ctx, _gf_false); - } else if (dirty) { - mq_mark_dirty (this, loc, 0); - } + if (xdata) + dict_unref(xdata); - if (locked) - mq_lock (this, loc, F_UNLCK); + if (ret < 0) { + /* On failure clear dirty status flag. + * In the next lookup inspect_directory_xattr + * can set the status flag and fix the + * dirty directory + */ + if (ctx) + mq_set_ctx_dirty_status(ctx, _gf_false); + } else if (dirty) { + mq_mark_dirty(this, loc, 0); + } - loc_wipe(&child_loc); + if (locked) + mq_lock(this, loc, F_UNLCK); - if (updated) - mq_initiate_quota_blocking_txn (this, loc, NULL); + if (updated) + mq_initiate_quota_blocking_txn(this, loc, NULL); - return ret; + return ret; } int32_t -mq_update_dirty_inode_txn (xlator_t *this, loc_t *loc, quota_inode_ctx_t *ctx) +mq_update_dirty_inode_txn(xlator_t *this, loc_t *loc, quota_inode_ctx_t *ctx) { - int32_t ret = -1; - gf_boolean_t status = _gf_true; + int32_t ret = -1; + gf_boolean_t status = _gf_true; - GF_VALIDATE_OR_GOTO ("marker", loc, out); - GF_VALIDATE_OR_GOTO ("marker", loc->inode, out); + GF_VALIDATE_OR_GOTO("marker", loc, out); + GF_VALIDATE_OR_GOTO("marker", loc->inode, out); - ret = mq_test_and_set_ctx_dirty_status (ctx, &status); - if (ret < 0 || status == _gf_true) - goto out; + mq_test_and_set_ctx_status(ctx, &ctx->dirty_status, &status); + if (status == _gf_true) + goto out; - ret = mq_synctask (this, mq_update_dirty_inode_task, _gf_true, loc); + ret = mq_synctask(this, mq_update_dirty_inode_task, _gf_true, loc); out: - if (ret < 0 && status == _gf_false) - mq_set_ctx_dirty_status (ctx, _gf_false); + if (ret < 0 && status == _gf_false) + mq_set_ctx_dirty_status(ctx, _gf_false); - return ret; + return ret; } int32_t -mq_inspect_directory_xattr (xlator_t *this, quota_inode_ctx_t *ctx, - inode_contribution_t *contribution, loc_t *loc, - dict_t *dict, struct iatt buf) +mq_inspect_directory_xattr(xlator_t *this, quota_inode_ctx_t *ctx, + inode_contribution_t *contribution, loc_t *loc, + dict_t *dict) { - int32_t ret = -1; - int8_t dirty = -1; - quota_meta_t size = {0, }; - quota_meta_t contri = {0, }; - quota_meta_t delta = {0, }; - char contri_key[QUOTA_KEY_MAX] = {0, }; - char size_key[QUOTA_KEY_MAX] = {0, }; - gf_boolean_t status = _gf_false; - - ret = dict_get_int8 (dict, QUOTA_DIRTY_KEY, &dirty); - if (ret < 0) { - /* dirty is set only on the first file write operation - * so ignore this error - */ - ret = 0; - dirty = 0; - } + int32_t ret = -1; + int8_t dirty = -1; + quota_meta_t size = { + 0, + }; + quota_meta_t contri = { + 0, + }; + quota_meta_t delta = { + 0, + }; + char contri_key[QUOTA_KEY_MAX] = { + 0, + }; + char size_key[QUOTA_KEY_MAX] = { + 0, + }; + int keylen = 0; + gf_boolean_t status = _gf_false; + + ret = dict_get_int8(dict, QUOTA_DIRTY_KEY, &dirty); + if (ret < 0) { + /* dirty is set only on the first file write operation + * so ignore this error + */ + ret = 0; + dirty = 0; + } - GET_SIZE_KEY (this, size_key, ret); - if (ret < 0) - goto out; - ret = _quota_dict_get_meta (this, dict, size_key, &size, - IA_IFDIR, _gf_false); + GET_SIZE_KEY(this, size_key, keylen); + if (keylen < 0) { + ret = -1; + goto out; + } + ret = _quota_dict_get_meta(this, dict, size_key, keylen, &size, IA_IFDIR, + _gf_false); + if (ret < 0) + goto create_xattr; + + if (!contribution) + goto create_xattr; + + if (!loc_is_root(loc)) { + GET_CONTRI_KEY(this, contri_key, contribution->gfid, keylen); + if (keylen < 0) { + ret = -1; + goto out; + } + ret = _quota_dict_get_meta(this, dict, contri_key, keylen, &contri, + IA_IFDIR, _gf_false); if (ret < 0) - goto create_xattr; - - if (!loc_is_root(loc)) { - GET_CONTRI_KEY (this, contri_key, contribution->gfid, ret); - if (ret < 0) - goto out; - - ret = _quota_dict_get_meta (this, dict, contri_key, &contri, - IA_IFDIR, _gf_false); - if (ret < 0) - goto create_xattr; - - LOCK (&contribution->lock); - { - contribution->contribution = contri.size; - contribution->file_count = contri.file_count; - contribution->dir_count = contri.dir_count; - } - UNLOCK (&contribution->lock); - } + goto create_xattr; - LOCK (&ctx->lock); + LOCK(&contribution->lock); { - ctx->size = size.size; - ctx->file_count = size.file_count; - ctx->dir_count = size.dir_count; - ctx->dirty = dirty; - } - UNLOCK (&ctx->lock); - - ret = mq_get_ctx_updation_status (ctx, &status); - if (ret < 0 || status == _gf_true) { - /* If the update txn is in progress abort inspection */ - ret = 0; - goto out; - } + contribution->contribution = contri.size; + contribution->file_count = contri.file_count; + contribution->dir_count = contri.dir_count; + } + UNLOCK(&contribution->lock); + } + + LOCK(&ctx->lock); + { + ctx->size = size.size; + ctx->file_count = size.file_count; + ctx->dir_count = size.dir_count; + ctx->dirty = dirty; + } + UNLOCK(&ctx->lock); + + ret = mq_get_ctx_updation_status(ctx, &status); + if (ret < 0 || status == _gf_true) { + /* If the update txn is in progress abort inspection */ + ret = 0; + goto out; + } - mq_compute_delta (&delta, &size, &contri); + mq_compute_delta(&delta, &size, &contri); - if (dirty) { - ret = mq_update_dirty_inode_txn (this, loc, ctx); - goto out; - } + if (dirty) { + ret = mq_update_dirty_inode_txn(this, loc, ctx); + goto out; + } - if (!loc_is_root(loc) && - !quota_meta_is_null (&delta)) - mq_initiate_quota_txn (this, loc, NULL); + if (!loc_is_root(loc) && !quota_meta_is_null(&delta)) + mq_initiate_quota_txn(this, loc, NULL); - ret = 0; - goto out; + ret = 0; + goto out; create_xattr: - if (ret < 0) - ret = mq_create_xattrs_txn (this, loc, NULL); + if (ret < 0) + ret = mq_create_xattrs_txn(this, loc, NULL); out: - return ret; + return ret; } int32_t -mq_inspect_file_xattr (xlator_t *this, quota_inode_ctx_t *ctx, - inode_contribution_t *contribution, loc_t *loc, - dict_t *dict, struct iatt buf) +mq_inspect_file_xattr(xlator_t *this, quota_inode_ctx_t *ctx, + inode_contribution_t *contribution, loc_t *loc, + dict_t *dict, struct iatt *buf) { - int32_t ret = -1; - quota_meta_t size = {0, }; - quota_meta_t contri = {0, }; - quota_meta_t delta = {0, }; - char contri_key[QUOTA_KEY_MAX] = {0, }; - gf_boolean_t status = _gf_false; - - LOCK (&ctx->lock); - { - ctx->size = 512 * buf.ia_blocks; - ctx->file_count = 1; - ctx->dir_count = 0; + int32_t ret = -1; + quota_meta_t size = { + 0, + }; + quota_meta_t contri = { + 0, + }; + quota_meta_t delta = { + 0, + }; + char contri_key[QUOTA_KEY_MAX] = { + 0, + }; + int keylen = 0; + gf_boolean_t status = _gf_false; + + if (!buf || !contribution || !ctx) + goto out; - size.size = ctx->size; - size.file_count = ctx->file_count; - size.dir_count = ctx->dir_count; + LOCK(&ctx->lock); + { + ctx->size = 512 * buf->ia_blocks; + ctx->file_count = 1; + ctx->dir_count = 0; + + size.size = ctx->size; + size.file_count = ctx->file_count; + size.dir_count = ctx->dir_count; + } + UNLOCK(&ctx->lock); + + GET_CONTRI_KEY(this, contri_key, contribution->gfid, keylen); + if (keylen < 0) { + ret = -1; + goto out; + } + + ret = _quota_dict_get_meta(this, dict, contri_key, keylen, &contri, + IA_IFREG, _gf_true); + if (ret < 0) { + ret = mq_create_xattrs_txn(this, loc, NULL); + } else { + LOCK(&contribution->lock); + { + contribution->contribution = contri.size; + contribution->file_count = contri.file_count; + contribution->dir_count = contri.dir_count; } - UNLOCK (&ctx->lock); + UNLOCK(&contribution->lock); - GET_CONTRI_KEY (this, contri_key, contribution->gfid, ret); - if (ret < 0) - goto out; - - ret = _quota_dict_get_meta (this, dict, contri_key, &contri, - IA_IFREG, _gf_true); - if (ret < 0) { - ret = mq_create_xattrs_txn (this, loc, NULL); - } else { - LOCK (&contribution->lock); - { - contribution->contribution = contri.size; - contribution->file_count = contri.file_count; - contribution->dir_count = contri.dir_count; - } - UNLOCK (&contribution->lock); - - ret = mq_get_ctx_updation_status (ctx, &status); - if (ret < 0 || status == _gf_true) { - /* If the update txn is in progress abort inspection */ - ret = 0; - goto out; - } - - mq_compute_delta (&delta, &size, &contri); - if (!quota_meta_is_null (&delta)) - mq_initiate_quota_txn (this, loc, NULL); + ret = mq_get_ctx_updation_status(ctx, &status); + if (ret < 0 || status == _gf_true) { + /* If the update txn is in progress abort inspection */ + ret = 0; + goto out; } - /* TODO: revist this code when fixing hardlinks */ + + mq_compute_delta(&delta, &size, &contri); + if (!quota_meta_is_null(&delta)) + mq_initiate_quota_txn(this, loc, NULL); + } + /* TODO: revist this code when fixing hardlinks */ out: - return ret; + return ret; } int32_t -mq_xattr_state (xlator_t *this, loc_t *origin_loc, dict_t *dict, - struct iatt buf) +mq_xattr_state(xlator_t *this, loc_t *origin_loc, dict_t *dict, + struct iatt *buf) { - int32_t ret = -1; - quota_inode_ctx_t *ctx = NULL; - loc_t loc = {0, }; - inode_contribution_t *contribution = NULL; - - ret = mq_prevalidate_txn (this, origin_loc, &loc, &ctx, &buf); - if (ret < 0) - goto out; - - if (!loc_is_root(&loc)) { - contribution = mq_add_new_contribution_node (this, ctx, &loc); - if (contribution == NULL) { - if (!gf_uuid_is_null (loc.inode->gfid)) - gf_log (this->name, GF_LOG_WARNING, - "cannot add a new contribution node " - "(%s)", uuid_utoa (loc.gfid)); - ret = -1; - goto out; - } - } + int32_t ret = -1; + quota_inode_ctx_t *ctx = NULL; + loc_t loc = { + 0, + }; + inode_contribution_t *contribution = NULL; + + ret = mq_prevalidate_txn(this, origin_loc, &loc, &ctx, buf); + if (ret < 0 || loc.parent == NULL) + goto out; - if (buf.ia_type == IA_IFDIR) - mq_inspect_directory_xattr (this, ctx, contribution, &loc, dict, - buf); + if (!loc_is_root(&loc)) { + contribution = mq_add_new_contribution_node(this, ctx, &loc); + if (contribution == NULL) { + if (!gf_uuid_is_null(loc.inode->gfid)) + gf_log(this->name, GF_LOG_WARNING, + "cannot add a new contribution node " + "(%s)", + uuid_utoa(loc.gfid)); + ret = -1; + goto out; + } + if (buf->ia_type == IA_IFDIR) + mq_inspect_directory_xattr(this, ctx, contribution, &loc, dict); else - mq_inspect_file_xattr (this, ctx, contribution, &loc, dict, - buf); + mq_inspect_file_xattr(this, ctx, contribution, &loc, dict, buf); + } else { + mq_inspect_directory_xattr(this, ctx, 0, &loc, dict); + } out: - loc_wipe (&loc); + loc_wipe(&loc); - if (contribution) - GF_REF_PUT (contribution); + if (contribution) + GF_REF_PUT(contribution); - return ret; + return ret; } int32_t -mq_req_xattr (xlator_t *this, loc_t *loc, dict_t *dict, - char *contri_key, char *size_key) +mq_req_xattr(xlator_t *this, loc_t *loc, dict_t *dict, char *contri_key, + char *size_key) { - int32_t ret = -1; - char key[QUOTA_KEY_MAX] = {0, }; - - GF_VALIDATE_OR_GOTO ("marker", this, out); - GF_VALIDATE_OR_GOTO ("marker", loc, out); - GF_VALIDATE_OR_GOTO ("marker", dict, out); - - if (!loc_is_root(loc)) { - ret = mq_dict_set_contribution (this, dict, loc, NULL, - contri_key); - if (ret < 0) - goto out; - } + int32_t ret = -1; + char key[QUOTA_KEY_MAX] = { + 0, + }; - GET_SIZE_KEY (this, key, ret); - if (ret < 0) - goto out; - if (size_key) - strncpy (size_key, key, QUOTA_KEY_MAX); + GF_VALIDATE_OR_GOTO("marker", this, out); + GF_VALIDATE_OR_GOTO("marker", loc, out); + GF_VALIDATE_OR_GOTO("marker", dict, out); - ret = dict_set_uint64 (dict, key, 0); + if (!loc_is_root(loc)) { + ret = mq_dict_set_contribution(this, dict, loc, NULL, contri_key); if (ret < 0) - goto out; + goto out; + } - ret = dict_set_int8 (dict, QUOTA_DIRTY_KEY, 0); + GET_SIZE_KEY(this, key, ret); + if (ret < 0) + goto out; + if (size_key) + if (snprintf(size_key, QUOTA_KEY_MAX, "%s", key) >= QUOTA_KEY_MAX) { + ret = -1; + goto out; + } -out: - if (ret < 0) - gf_log_callingfn (this->name, GF_LOG_ERROR, "dict set failed"); + ret = dict_set_uint64(dict, key, 0); + if (ret < 0) + goto out; - return ret; -} + ret = dict_set_int8(dict, QUOTA_DIRTY_KEY, 0); +out: + if (ret < 0) + gf_log_callingfn(this ? this->name : "Marker", GF_LOG_ERROR, + "dict set failed"); + return ret; +} int32_t -mq_forget (xlator_t *this, quota_inode_ctx_t *ctx) +mq_forget(xlator_t *this, quota_inode_ctx_t *ctx) { - inode_contribution_t *contri = NULL; - inode_contribution_t *next = NULL; + inode_contribution_t *contri = NULL; + inode_contribution_t *next = NULL; - GF_VALIDATE_OR_GOTO ("marker", this, out); - GF_VALIDATE_OR_GOTO ("marker", ctx, out); + GF_VALIDATE_OR_GOTO("marker", this, out); + GF_VALIDATE_OR_GOTO("marker", ctx, out); - list_for_each_entry_safe (contri, next, &ctx->contribution_head, - contri_list) { - list_del_init (&contri->contri_list); - GF_REF_PUT (contri); - } + list_for_each_entry_safe(contri, next, &ctx->contribution_head, contri_list) + { + list_del_init(&contri->contri_list); + GF_REF_PUT(contri); + } - LOCK_DESTROY (&ctx->lock); - GF_FREE (ctx); + LOCK_DESTROY(&ctx->lock); + GF_FREE(ctx); out: - return 0; + return 0; } diff --git a/xlators/features/marker/src/marker-quota.h b/xlators/features/marker/src/marker-quota.h index dc953704d11..4bbf6878b22 100644 --- a/xlators/features/marker/src/marker-quota.h +++ b/xlators/features/marker/src/marker-quota.h @@ -10,145 +10,131 @@ #ifndef _MARKER_QUOTA_H #define _MARKER_QUOTA_H -#include "xlator.h" +#include <glusterfs/xlator.h> #include "marker-mem-types.h" -#include "refcount.h" -#include "quota-common-utils.h" +#include <glusterfs/refcount.h> +#include <glusterfs/quota-common-utils.h> +#include <glusterfs/call-stub.h> #define QUOTA_XATTR_PREFIX "trusted.glusterfs" #define QUOTA_DIRTY_KEY "trusted.glusterfs.quota.dirty" -#define CONTRIBUTION "contri" +#define CONTRIBUTION "contri" #define QUOTA_KEY_MAX 512 #define READDIR_BUF 4096 - -#define QUOTA_STACK_DESTROY(_frame, _this) \ - do { \ - quota_local_t *_local = NULL; \ - _local = _frame->local; \ - _frame->local = NULL; \ - STACK_DESTROY (_frame->root); \ - mq_local_unref (_this, _local); \ - } while (0) - - -#define QUOTA_ALLOC(var, type, ret) \ - do { \ - ret = 0; \ - var = GF_CALLOC (sizeof (type), 1, \ - gf_marker_mt_##type); \ - if (!var) { \ - ret = -1; \ - } \ - } while (0); - -#define QUOTA_ALLOC_OR_GOTO(var, type, ret, label) \ - do { \ - var = GF_CALLOC (sizeof (type), 1, \ - gf_marker_mt_##type); \ - if (!var) { \ - gf_log ("", GF_LOG_ERROR, \ - "out of memory"); \ - ret = -1; \ - goto label; \ - } \ - ret = 0; \ - } while (0); - -#define GET_CONTRI_KEY(_this, var, _gfid, _ret) \ - do { \ - marker_conf_t *_priv = _this->private; \ - if (_gfid != NULL) { \ - char _gfid_unparsed[40]; \ - gf_uuid_unparse (_gfid, _gfid_unparsed); \ - _ret = snprintf (var, QUOTA_KEY_MAX, \ - QUOTA_XATTR_PREFIX \ - ".%s.%s." CONTRIBUTION ".%d", \ - "quota", _gfid_unparsed, \ - _priv->version); \ - } else { \ - _ret = snprintf (var, QUOTA_KEY_MAX, \ - QUOTA_XATTR_PREFIX \ - ".%s.." CONTRIBUTION ".%d", \ - "quota", _priv->version); \ - } \ - } while (0) - -#define GET_QUOTA_KEY(_this, var, key, _ret) \ - do { \ - marker_conf_t *_priv = _this->private; \ - if (_priv->version > 0) \ - _ret = snprintf (var, QUOTA_KEY_MAX, "%s.%d", \ - key, _priv->version); \ - else \ - _ret = snprintf (var, QUOTA_KEY_MAX, "%s", key); \ - } while (0) - -#define GET_SIZE_KEY(_this, var, _ret) \ - { \ - GET_QUOTA_KEY (_this, var, QUOTA_SIZE_KEY, _ret); \ - } - -#define QUOTA_SAFE_INCREMENT(lock, var) \ - do { \ - LOCK (lock); \ - var ++; \ - UNLOCK (lock); \ - } while (0) +#define QUOTA_ALLOC(var, type, ret) \ + do { \ + ret = 0; \ + var = GF_CALLOC(sizeof(type), 1, gf_marker_mt_##type); \ + if (!var) { \ + ret = -1; \ + } \ + } while (0); + +#define QUOTA_ALLOC_OR_GOTO(var, type, ret, label) \ + do { \ + var = GF_CALLOC(sizeof(type), 1, gf_marker_mt_##type); \ + if (!var) { \ + gf_log("", GF_LOG_ERROR, "out of memory"); \ + ret = -1; \ + goto label; \ + } \ + ret = 0; \ + } while (0); + +#define GET_QUOTA_KEY(_this, var, key, _ret) \ + do { \ + marker_conf_t *_priv = _this->private; \ + if (_priv->version > 0) \ + _ret = snprintf(var, QUOTA_KEY_MAX, "%s.%d", key, _priv->version); \ + else \ + _ret = snprintf(var, QUOTA_KEY_MAX, "%s", key); \ + } while (0) + +#define GET_CONTRI_KEY(_this, var, _gfid, _ret) \ + do { \ + char _tmp_var[QUOTA_KEY_MAX] = { \ + 0, \ + }; \ + if (_gfid != NULL) { \ + char _gfid_unparsed[40]; \ + gf_uuid_unparse(_gfid, _gfid_unparsed); \ + _ret = snprintf(_tmp_var, QUOTA_KEY_MAX, \ + QUOTA_XATTR_PREFIX ".%s.%s." CONTRIBUTION, \ + "quota", _gfid_unparsed); \ + } else { \ + _ret = snprintf(_tmp_var, QUOTA_KEY_MAX, \ + QUOTA_XATTR_PREFIX ".%s.." CONTRIBUTION, "quota"); \ + } \ + GET_QUOTA_KEY(_this, var, _tmp_var, _ret); \ + } while (0) + +#define GET_SIZE_KEY(_this, var, _ret) \ + { \ + GET_QUOTA_KEY(_this, var, QUOTA_SIZE_KEY, _ret); \ + } + +#define QUOTA_SAFE_INCREMENT(lock, var) \ + do { \ + LOCK(lock); \ + var++; \ + UNLOCK(lock); \ + } while (0) struct quota_inode_ctx { - int64_t size; - int64_t file_count; - int64_t dir_count; - int8_t dirty; - gf_boolean_t create_status; - gf_boolean_t updation_status; - gf_boolean_t dirty_status; - gf_lock_t lock; - struct list_head contribution_head; + int64_t size; + int64_t file_count; + int64_t dir_count; + int8_t dirty; + gf_boolean_t create_status; + gf_boolean_t updation_status; + gf_boolean_t dirty_status; + gf_lock_t lock; + struct list_head contribution_head; }; typedef struct quota_inode_ctx quota_inode_ctx_t; struct quota_synctask { - xlator_t *this; - loc_t loc; - quota_meta_t contri; - gf_boolean_t is_static; - uint32_t ia_nlink; + xlator_t *this; + loc_t loc; + quota_meta_t contri; + gf_boolean_t is_static; + uint32_t ia_nlink; + call_stub_t *stub; }; typedef struct quota_synctask quota_synctask_t; struct inode_contribution { - struct list_head contri_list; - int64_t contribution; - int64_t file_count; - int64_t dir_count; - uuid_t gfid; - gf_lock_t lock; - GF_REF_DECL; + struct list_head contri_list; + int64_t contribution; + int64_t file_count; + int64_t dir_count; + uuid_t gfid; + gf_lock_t lock; + GF_REF_DECL; }; typedef struct inode_contribution inode_contribution_t; int32_t -mq_req_xattr (xlator_t *, loc_t *, dict_t *, char *, char *); +mq_req_xattr(xlator_t *, loc_t *, dict_t *, char *, char *); int32_t -mq_xattr_state (xlator_t *, loc_t *, dict_t *, struct iatt); +mq_xattr_state(xlator_t *, loc_t *, dict_t *, struct iatt *); int -mq_initiate_quota_txn (xlator_t *, loc_t *, struct iatt *); +mq_initiate_quota_txn(xlator_t *, loc_t *, struct iatt *); int -mq_initiate_quota_blocking_txn (xlator_t *, loc_t *, struct iatt *); +mq_initiate_quota_blocking_txn(xlator_t *, loc_t *, struct iatt *); int -mq_create_xattrs_txn (xlator_t *this, loc_t *loc, struct iatt *buf); +mq_create_xattrs_txn(xlator_t *this, loc_t *loc, struct iatt *buf); int32_t -mq_reduce_parent_size_txn (xlator_t *, loc_t *, quota_meta_t *, - uint32_t nlink); +mq_reduce_parent_size_txn(xlator_t *, loc_t *, quota_meta_t *, uint32_t nlink, + call_stub_t *stub); int32_t -mq_forget (xlator_t *, quota_inode_ctx_t *); +mq_forget(xlator_t *, quota_inode_ctx_t *); #endif diff --git a/xlators/features/marker/src/marker.c b/xlators/features/marker/src/marker.c index 8007933101d..1375ccc498c 100644 --- a/xlators/features/marker/src/marker.c +++ b/xlators/features/marker/src/marker.c @@ -7,34 +7,34 @@ later), or the GNU General Public License, version 2 (GPLv2), in all cases as published by the Free Software Foundation. */ -#include "xlator.h" -#include "defaults.h" +#include <glusterfs/xlator.h> +#include <glusterfs/defaults.h> #include "libxlator.h" #include "marker.h" #include "marker-mem-types.h" #include "marker-quota.h" #include "marker-quota-helper.h" #include "marker-common.h" -#include "byte-order.h" -#include "syncop.h" -#include "syscall.h" +#include <glusterfs/byte-order.h> +#include <glusterfs/syncop.h> +#include <glusterfs/syscall.h> #include <fnmatch.h> #define _GF_UID_GID_CHANGED 1 static char *mq_ext_xattrs[] = { - QUOTA_SIZE_KEY, - QUOTA_LIMIT_KEY, - QUOTA_LIMIT_OBJECTS_KEY, - NULL, + QUOTA_SIZE_KEY, + QUOTA_LIMIT_KEY, + QUOTA_LIMIT_OBJECTS_KEY, + NULL, }; void -fini (xlator_t *this); +fini(xlator_t *this); int32_t -marker_start_setxattr (call_frame_t *, xlator_t *); +marker_start_setxattr(call_frame_t *, xlator_t *); /* When client/quotad request for quota xattrs, * replace the key-name by adding the version number @@ -46,1491 +46,1566 @@ marker_start_setxattr (call_frame_t *, xlator_t *); * version for the key-name */ int -marker_key_replace_with_ver (xlator_t *this, dict_t *dict) +marker_key_replace_with_ver(xlator_t *this, dict_t *dict) { - int ret = -1; - int i = 0; - marker_conf_t *priv = NULL; - char key[QUOTA_KEY_MAX] = {0, }; + int ret = -1; + int i = 0; + marker_conf_t *priv = NULL; + char key[QUOTA_KEY_MAX] = { + 0, + }; - priv = this->private; + priv = this->private; - if (dict == NULL || priv->version <= 0) { - ret = 0; + if (dict == NULL || priv->version <= 0) { + ret = 0; + goto out; + } + + for (i = 0; mq_ext_xattrs[i]; i++) { + if (dict_get(dict, mq_ext_xattrs[i])) { + GET_QUOTA_KEY(this, key, mq_ext_xattrs[i], ret); + if (ret < 0) goto out; - } - for (i = 0; mq_ext_xattrs[i]; i++) { - if (dict_get (dict, mq_ext_xattrs[i])) { - GET_QUOTA_KEY (this, key, mq_ext_xattrs[i], ret); - if (ret < 0) - goto out; - - ret = dict_set (dict, key, - dict_get (dict, mq_ext_xattrs[i])); - if (ret < 0) - goto out; - - dict_del (dict, mq_ext_xattrs[i]); - } + ret = dict_set(dict, key, dict_get(dict, mq_ext_xattrs[i])); + if (ret < 0) + goto out; + + dict_del(dict, mq_ext_xattrs[i]); } + } - ret = 0; + ret = 0; out: - return ret; + return ret; } int -marker_key_set_ver (xlator_t *this, dict_t *dict) +marker_key_set_ver(xlator_t *this, dict_t *dict) { - int ret = -1; - int i = -1; - marker_conf_t *priv = NULL; - char key[QUOTA_KEY_MAX] = {0, }; + int ret = -1; + int i = -1; + marker_conf_t *priv = NULL; + char key[QUOTA_KEY_MAX] = { + 0, + }; - priv = this->private; + priv = this->private; - if (dict == NULL || priv->version <= 0) { - ret = 0; - goto out; - } + if (dict == NULL || priv->version <= 0) { + ret = 0; + goto out; + } - for (i = 0; mq_ext_xattrs[i]; i++) { - GET_QUOTA_KEY (this, key, mq_ext_xattrs[i], ret); - if (ret < 0) - goto out; + for (i = 0; mq_ext_xattrs[i]; i++) { + GET_QUOTA_KEY(this, key, mq_ext_xattrs[i], ret); + if (ret < 0) + goto out; - if (dict_get (dict, key)) - dict_set (dict, mq_ext_xattrs[i], dict_get (dict, key)); - } + if (dict_get(dict, key)) + dict_set(dict, mq_ext_xattrs[i], dict_get(dict, key)); + } - ret = 0; + ret = 0; out: - return ret; + return ret; } marker_local_t * -marker_local_ref (marker_local_t *local) +marker_local_ref(marker_local_t *local) { - GF_VALIDATE_OR_GOTO ("marker", local, err); + GF_VALIDATE_OR_GOTO("marker", local, err); - LOCK (&local->lock); - { - local->ref++; - } - UNLOCK (&local->lock); + LOCK(&local->lock); + { + local->ref++; + } + UNLOCK(&local->lock); - return local; + return local; err: - return NULL; + return NULL; } int -marker_loc_fill (loc_t *loc, inode_t *inode, inode_t *parent, char *path) +marker_loc_fill(loc_t *loc, inode_t *inode, inode_t *parent, char *path) { - int ret = -1; + int ret = -1; - if (!loc) - return ret; + if (!loc) + return ret; - if (inode) { - loc->inode = inode_ref (inode); - if (gf_uuid_is_null (loc->gfid)) { - gf_uuid_copy (loc->gfid, loc->inode->gfid); - } + if (inode) { + loc->inode = inode_ref(inode); + if (gf_uuid_is_null(loc->gfid)) { + gf_uuid_copy(loc->gfid, loc->inode->gfid); } + } - if (parent) - loc->parent = inode_ref (parent); - - if (path) { - loc->path = gf_strdup (path); - if (!loc->path) { - gf_log ("loc fill", GF_LOG_ERROR, "strdup failed"); - goto loc_wipe; - } - - loc->name = strrchr (loc->path, '/'); - if (loc->name) - loc->name++; + if (parent) + loc->parent = inode_ref(parent); + + if (path) { + loc->path = gf_strdup(path); + if (!loc->path) { + gf_log("loc fill", GF_LOG_ERROR, "strdup failed"); + goto loc_wipe; } - ret = 0; + loc->name = strrchr(loc->path, '/'); + if (loc->name) + loc->name++; + } + + ret = 0; loc_wipe: - if (ret < 0) - loc_wipe (loc); + if (ret < 0) + loc_wipe(loc); - return ret; + return ret; } int -_marker_inode_loc_fill (inode_t *inode, inode_t *parent, char *name, loc_t *loc) +_marker_inode_loc_fill(inode_t *inode, inode_t *parent, char *name, loc_t *loc) { - char *resolvedpath = NULL; - int ret = -1; - gf_boolean_t free_parent = _gf_false; + char *resolvedpath = NULL; + int ret = -1; + gf_boolean_t free_parent = _gf_false; - if ((!inode) || (!loc)) - return ret; + if ((!inode) || (!loc)) + return ret; - if (parent && name) - ret = inode_path (parent, name, &resolvedpath); - else - ret = inode_path (inode, NULL, &resolvedpath); - if (ret < 0) - goto err; + if (parent && name) + ret = inode_path(parent, name, &resolvedpath); + else + ret = inode_path(inode, NULL, &resolvedpath); + if (ret < 0) + goto err; - if (parent == NULL) { - parent = inode_parent (inode, NULL, NULL); - free_parent = _gf_true; - } + if (parent == NULL) { + parent = inode_parent(inode, NULL, NULL); + free_parent = _gf_true; + } - ret = marker_loc_fill (loc, inode, parent, resolvedpath); - if (ret < 0) - goto err; + ret = marker_loc_fill(loc, inode, parent, resolvedpath); + if (ret < 0) + goto err; err: - if (free_parent) - inode_unref (parent); + if (free_parent) + inode_unref(parent); - GF_FREE (resolvedpath); + GF_FREE(resolvedpath); - return ret; + return ret; } int -marker_inode_loc_fill (inode_t *inode, loc_t *loc) +marker_inode_loc_fill(inode_t *inode, loc_t *loc) { - return _marker_inode_loc_fill (inode, NULL, NULL, loc); + return _marker_inode_loc_fill(inode, NULL, NULL, loc); } int32_t -marker_trav_parent (marker_local_t *local) +marker_trav_parent(marker_local_t *local) { - int32_t ret = 0; - loc_t loc = {0, }; - inode_t *parent = NULL; - int8_t need_unref = 0; + int32_t ret = 0; + loc_t loc = { + 0, + }; + inode_t *parent = NULL; + int8_t need_unref = 0; - if (!local->loc.parent) { - parent = inode_parent (local->loc.inode, NULL, NULL); - if (parent) - need_unref = 1; - } else - parent = local->loc.parent; + if (!local->loc.parent) { + parent = inode_parent(local->loc.inode, NULL, NULL); + if (parent) + need_unref = 1; + } else + parent = local->loc.parent; - ret = marker_inode_loc_fill (parent, &loc); + ret = marker_inode_loc_fill(parent, &loc); - if (ret < 0) { - ret = -1; - goto out; - } + if (ret < 0) { + ret = -1; + goto out; + } - loc_wipe (&local->loc); + loc_wipe(&local->loc); - local->loc = loc; + local->loc = loc; out: - if (need_unref) - inode_unref (parent); + if (need_unref) + inode_unref(parent); - return ret; + return ret; } -int32_t -marker_error_handler (xlator_t *this, marker_local_t *local, int32_t op_errno) +void +marker_error_handler(xlator_t *this, marker_local_t *local, int32_t op_errno) { - marker_conf_t *priv = NULL; - const char *path = NULL; - - priv = (marker_conf_t *) this->private; - path = local - ? (local->loc.path - ? local->loc.path : uuid_utoa(local->loc.gfid)) - : "<nul>"; + marker_conf_t *priv = (marker_conf_t *)this->private; + const char *path = local ? ((local->loc.path) ? local->loc.path + : uuid_utoa(local->loc.gfid)) + : "<nul>"; - gf_log (this->name, GF_LOG_CRITICAL, - "Indexing gone corrupt at %s (reason: %s)." - " Geo-replication slave content needs to be revalidated", - path, strerror (op_errno)); - sys_unlink (priv->timestamp_file); - - return 0; + gf_log(this->name, GF_LOG_CRITICAL, + "Indexing gone corrupt at %s (reason: %s)." + " Geo-replication slave content needs to be revalidated", + path, strerror(op_errno)); + sys_unlink(priv->timestamp_file); } int32_t -marker_local_unref (marker_local_t *local) +marker_local_unref(marker_local_t *local) { - int32_t var = 0; - - if (local == NULL) - return -1; - - LOCK (&local->lock); - { - var = --local->ref; - } - UNLOCK (&local->lock); + int32_t var = 0; - if (var != 0) - goto out; + if (local == NULL) + return -1; - loc_wipe (&local->loc); - loc_wipe (&local->parent_loc); - if (local->xdata) - dict_unref (local->xdata); + LOCK(&local->lock); + { + var = --local->ref; + } + UNLOCK(&local->lock); - if (local->lk_frame) { - STACK_DESTROY (local->lk_frame->root); - local->lk_frame = NULL; - } + if (var != 0) + goto out; - if (local->oplocal) { - marker_local_unref (local->oplocal); - local->oplocal = NULL; - } - mem_put (local); + loc_wipe(&local->loc); + loc_wipe(&local->parent_loc); + if (local->xdata) + dict_unref(local->xdata); + + if (local->lk_frame) { + STACK_DESTROY(local->lk_frame->root); + local->lk_frame = NULL; + } + + if (local->oplocal) { + marker_local_unref(local->oplocal); + local->oplocal = NULL; + } + mem_put(local); out: - return 0; + return 0; } int32_t -stat_stampfile (xlator_t *this, marker_conf_t *priv, - struct volume_mark **status) +stat_stampfile(xlator_t *this, marker_conf_t *priv, struct volume_mark **status) { - struct stat buf = {0, }; - struct volume_mark *vol_mark = NULL; + struct stat buf = { + 0, + }; + struct volume_mark *vol_mark = NULL; - vol_mark = GF_CALLOC (sizeof (struct volume_mark), 1, - gf_marker_mt_volume_mark); + vol_mark = GF_CALLOC(sizeof(struct volume_mark), 1, + gf_marker_mt_volume_mark); - vol_mark->major = 1; - vol_mark->minor = 0; + vol_mark->major = 1; + vol_mark->minor = 0; - GF_ASSERT (sizeof (priv->volume_uuid_bin) == 16); - memcpy (vol_mark->uuid, priv->volume_uuid_bin, 16); + GF_ASSERT(sizeof(priv->volume_uuid_bin) == 16); + memcpy(vol_mark->uuid, priv->volume_uuid_bin, 16); - if (sys_stat (priv->timestamp_file, &buf) != -1) { - vol_mark->retval = 0; - vol_mark->sec = htonl (buf.st_mtime); - vol_mark->usec = htonl (ST_MTIM_NSEC (&buf)/1000); - } else - vol_mark->retval = 1; + if (sys_stat(priv->timestamp_file, &buf) != -1) { + vol_mark->retval = 0; + vol_mark->sec = htonl(buf.st_mtime); + vol_mark->usec = htonl(ST_MTIM_NSEC(&buf) / 1000); + } else + vol_mark->retval = 1; - *status = vol_mark; + *status = vol_mark; - return 0; + return 0; } int32_t -marker_getxattr_stampfile_cbk (call_frame_t *frame, xlator_t *this, - const char *name, struct volume_mark *vol_mark, - dict_t *xdata) +marker_getxattr_stampfile_cbk(call_frame_t *frame, xlator_t *this, + const char *name, struct volume_mark *vol_mark, + dict_t *xdata) { - int32_t ret = -1; - dict_t *dict = NULL; + int32_t ret = -1; + dict_t *dict = NULL; - if (vol_mark == NULL){ - STACK_UNWIND_STRICT (getxattr, frame, -1, ENOMEM, NULL, NULL); + if (vol_mark == NULL) { + STACK_UNWIND_STRICT(getxattr, frame, -1, ENOMEM, NULL, NULL); - goto out; - } + goto out; + } - dict = dict_new (); + dict = dict_new(); - ret = dict_set_bin (dict, (char *)name, vol_mark, - sizeof (struct volume_mark)); - if (ret) { - GF_FREE (vol_mark); - gf_log (this->name, GF_LOG_WARNING, "failed to set key %s", - name); - } + ret = dict_set_bin(dict, (char *)name, vol_mark, + sizeof(struct volume_mark)); + if (ret) { + GF_FREE(vol_mark); + gf_log(this->name, GF_LOG_WARNING, "failed to set key %s", name); + } - STACK_UNWIND_STRICT (getxattr, frame, 0, 0, dict, xdata); + STACK_UNWIND_STRICT(getxattr, frame, 0, 0, dict, xdata); - if (dict) - dict_unref (dict); + if (dict) + dict_unref(dict); out: - return 0; + return 0; } gf_boolean_t -call_from_special_client (call_frame_t *frame, xlator_t *this, const char *name) +call_from_special_client(call_frame_t *frame, xlator_t *this, const char *name) { - struct volume_mark *vol_mark = NULL; - marker_conf_t *priv = NULL; - gf_boolean_t is_true = _gf_true; + struct volume_mark *vol_mark = NULL; + marker_conf_t *priv = NULL; + gf_boolean_t is_true = _gf_true; - priv = (marker_conf_t *)this->private; + priv = (marker_conf_t *)this->private; - if (frame->root->pid != GF_CLIENT_PID_GSYNCD || name == NULL || - strcmp (name, MARKER_XATTR_PREFIX "." VOLUME_MARK) != 0) { - is_true = _gf_false; - goto out; - } + if (frame->root->pid != GF_CLIENT_PID_GSYNCD || name == NULL || + strcmp(name, MARKER_XATTR_PREFIX "." VOLUME_MARK) != 0) { + is_true = _gf_false; + goto out; + } - stat_stampfile (this, priv, &vol_mark); + stat_stampfile(this, priv, &vol_mark); - marker_getxattr_stampfile_cbk (frame, this, name, vol_mark, NULL); + marker_getxattr_stampfile_cbk(frame, this, name, vol_mark, NULL); out: - return is_true; + return is_true; } static gf_boolean_t -_is_quota_internal_xattr (dict_t *d, char *k, data_t *v, void *data) +_is_quota_internal_xattr(dict_t *d, char *k, data_t *v, void *data) { - int i = 0; - char **external_xattrs = data; + int i = 0; + char **external_xattrs = data; - for (i = 0; external_xattrs && external_xattrs[i]; i++) { - if (strcmp (k, external_xattrs[i]) == 0) - return _gf_false; - } + for (i = 0; external_xattrs && external_xattrs[i]; i++) { + if (strcmp(k, external_xattrs[i]) == 0) + return _gf_false; + } - if (fnmatch ("trusted.glusterfs.quota*", k, 0) == 0) - return _gf_true; + if (fnmatch("trusted.glusterfs.quota*", k, 0) == 0) + return _gf_true; - /* It would be nice if posix filters pgfid xattrs. But since marker - * also takes up responsibility to clean these up, adding the filtering - * here (Check 'quota_xattr_cleaner') - */ - if (fnmatch (PGFID_XATTR_KEY_PREFIX"*", k, 0) == 0) - return _gf_true; + /* It would be nice if posix filters pgfid xattrs. But since marker + * also takes up responsibility to clean these up, adding the filtering + * here (Check 'quota_xattr_cleaner') + */ + if (fnmatch(PGFID_XATTR_KEY_PREFIX "*", k, 0) == 0) + return _gf_true; - return _gf_false; + return _gf_false; } static void -marker_filter_internal_xattrs (xlator_t *this, dict_t *xattrs) +marker_filter_internal_xattrs(xlator_t *this, dict_t *xattrs) { - marker_conf_t *priv = NULL; - char **ext = NULL; + marker_conf_t *priv = NULL; + char **ext = NULL; - priv = this->private; - if (priv->feature_enabled & GF_QUOTA) - ext = mq_ext_xattrs; + priv = this->private; + if (priv->feature_enabled & GF_QUOTA) + ext = mq_ext_xattrs; - dict_foreach_match (xattrs, _is_quota_internal_xattr, ext, - dict_remove_foreach_fn, NULL); - return; + dict_foreach_match(xattrs, _is_quota_internal_xattr, ext, + dict_remove_foreach_fn, NULL); } -int32_t -marker_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict, - dict_t *xdata) +static void +marker_filter_gsyncd_xattrs(call_frame_t *frame, xlator_t *this, dict_t *xattrs) { - int32_t ret = -1; + marker_conf_t *priv = NULL; - if (op_ret < 0) - goto unwind; + priv = this->private; + GF_ASSERT(priv); + GF_ASSERT(frame); - ret = marker_key_set_ver (this, dict); - if (ret < 0) { - op_ret = -1; - op_errno = ENOMEM; - goto unwind; - } + if (xattrs && frame->root->pid != GF_CLIENT_PID_GSYNCD) { + GF_REMOVE_INTERNAL_XATTR(GF_XATTR_XTIME_PATTERN, xattrs); + } + return; +} - if (cookie) { - gf_log (this->name, GF_LOG_DEBUG, - "Filtering the quota extended attributes"); - - /* If the getxattr is from a non special client, then do not - copy the quota related xattrs (except the quota limit key - i.e trusted.glusterfs.quota.limit-set which has been set by - glusterd on the directory on which quota limit is set.) for - directories. Let the healing of xattrs happen upon lookup. - NOTE: setting of trusted.glusterfs.quota.limit-set as of now - happens from glusterd. It should be moved to quotad. Also - trusted.glusterfs.quota.limit-set is set on directory which - is permanent till quota is removed on that directory or limit - is changed. So let that xattr be healed by other xlators - properly whenever directory healing is done. - */ - /* - * Except limit-set xattr, rest of the xattrs are maintained - * by quota xlator. Don't expose them to other xlators. - * This filter makes sure quota xattrs are not healed as part of - * metadata self-heal - */ - marker_filter_internal_xattrs (frame->this, dict); - } +int32_t +marker_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata) +{ + int32_t ret = -1; + if (op_ret < 0) + goto unwind; + + ret = marker_key_set_ver(this, dict); + if (ret < 0) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + + if (cookie) { + gf_log(this->name, GF_LOG_DEBUG, + "Filtering the quota extended attributes"); + + /* If the getxattr is from a non special client, then do not + copy the quota related xattrs (except the quota limit key + i.e trusted.glusterfs.quota.limit-set which has been set by + glusterd on the directory on which quota limit is set.) for + directories. Let the healing of xattrs happen upon lookup. + NOTE: setting of trusted.glusterfs.quota.limit-set as of now + happens from glusterd. It should be moved to quotad. Also + trusted.glusterfs.quota.limit-set is set on directory which + is permanent till quota is removed on that directory or limit + is changed. So let that xattr be healed by other xlators + properly whenever directory healing is done. + */ + /* + * Except limit-set xattr, rest of the xattrs are maintained + * by quota xlator. Don't expose them to other xlators. + * This filter makes sure quota xattrs are not healed as part of + * metadata self-heal + */ + marker_filter_internal_xattrs(frame->this, dict); + } + + /* Filter gsyncd xtime xattr for non gsyncd clients */ + marker_filter_gsyncd_xattrs(frame, frame->this, dict); unwind: - MARKER_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata); - return 0; + MARKER_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata); + return 0; } int32_t -marker_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *name, dict_t *xdata) -{ - gf_boolean_t is_true = _gf_false; - marker_conf_t *priv = NULL; - unsigned long cookie = 0; - marker_local_t *local = NULL; - char key[QUOTA_KEY_MAX] = {0, }; - int32_t ret = -1; - int32_t i = 0; - - priv = this->private; - - if (name) { - for (i = 0; mq_ext_xattrs[i]; i++) { - if (strcmp (name, mq_ext_xattrs[i])) - continue; - - GET_QUOTA_KEY (this, key, mq_ext_xattrs[i], ret); - if (ret < 0) - goto out; - name = key; - break; - } - } +marker_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) +{ + gf_boolean_t is_true = _gf_false; + marker_conf_t *priv = NULL; + unsigned long cookie = 0; + marker_local_t *local = NULL; + char key[QUOTA_KEY_MAX] = { + 0, + }; + int32_t ret = -1; + int32_t i = 0; + + priv = this->private; + + if (name) { + for (i = 0; mq_ext_xattrs[i]; i++) { + if (strcmp(name, mq_ext_xattrs[i])) + continue; - frame->local = mem_get0 (this->local_pool); - local = frame->local; - if (local == NULL) + GET_QUOTA_KEY(this, key, mq_ext_xattrs[i], ret); + if (ret < 0) goto out; + name = key; + break; + } + } + + frame->local = mem_get0(this->local_pool); + local = frame->local; + if (local == NULL) + goto out; + + MARKER_INIT_LOCAL(frame, local); + + if ((loc_copy(&local->loc, loc)) < 0) + goto out; + + gf_log(this->name, GF_LOG_DEBUG, "USER:PID = %d", frame->root->pid); + + if (priv && priv->feature_enabled & GF_XTIME) + is_true = call_from_special_client(frame, this, name); - MARKER_INIT_LOCAL (frame, local); - - if ((loc_copy (&local->loc, loc)) < 0) - goto out; - - gf_log (this->name, GF_LOG_DEBUG, "USER:PID = %d", frame->root->pid); - - if (priv && priv->feature_enabled & GF_XTIME) - is_true = call_from_special_client (frame, this, name); - - if (is_true == _gf_false) { - if (name == NULL) { - /* Signifies that marker translator - * has to filter the quota's xattr's, - * this is to prevent afr from performing - * self healing on marker-quota xattrs' - */ - cookie = 1; - } - STACK_WIND_COOKIE (frame, marker_getxattr_cbk, - (void *)cookie, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->getxattr, - loc, name, xdata); + if (is_true == _gf_false) { + if (name == NULL) { + /* Signifies that marker translator + * has to filter the quota's xattr's, + * this is to prevent afr from performing + * self healing on marker-quota xattrs' + */ + cookie = 1; } + STACK_WIND_COOKIE(frame, marker_getxattr_cbk, (void *)cookie, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->getxattr, + loc, name, xdata); + } - return 0; + return 0; out: - MARKER_STACK_UNWIND (getxattr, frame, -1, ENOMEM, NULL, NULL); - return 0; + MARKER_STACK_UNWIND(getxattr, frame, -1, ENOMEM, NULL, NULL); + return 0; } int32_t -marker_setxattr_done (call_frame_t *frame) +marker_setxattr_done(call_frame_t *frame) { - marker_local_t *local = NULL; + marker_local_t *local = NULL; - local = (marker_local_t *) frame->local; + local = (marker_local_t *)frame->local; - frame->local = NULL; + frame->local = NULL; - STACK_DESTROY (frame->root); + STACK_DESTROY(frame->root); - marker_local_unref (local); + marker_local_unref(local); - return 0; + return 0; } int -marker_specific_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +marker_specific_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - int32_t ret = 0; - int32_t done = 0; - marker_local_t *local = NULL; + int32_t ret = 0; + int32_t done = 1; + marker_local_t *local = NULL; - local = (marker_local_t*) frame->local; + local = (marker_local_t *)frame->local; - if (op_ret == -1 && op_errno == ENOSPC) { - marker_error_handler (this, local, op_errno); - done = 1; - goto out; - } + if (op_ret == -1 && op_errno == ENOSPC) { + marker_error_handler(this, local, op_errno); + goto out; + } - if (local) { - if (local->loc.path && strcmp (local->loc.path, "/") == 0) { - done = 1; - goto out; - } - if (__is_root_gfid (local->loc.gfid)) { - done = 1; - goto out; - } + if (local) { + if (local->loc.path && strcmp(local->loc.path, "/") == 0) { + goto out; } - - ret = (local) ? marker_trav_parent (local) : -1; - - if (ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, "Error occurred " - "while traversing to the parent, stopping marker"); - - done = 1; - - goto out; + if (__is_root_gfid(local->loc.gfid)) { + goto out; } + } - marker_start_setxattr (frame, this); + ret = (local) ? marker_trav_parent(local) : -1; + + if (ret == -1) { + gf_log(this->name, GF_LOG_DEBUG, + "Error occurred " + "while traversing to the parent, stopping marker"); + goto out; + } + marker_start_setxattr(frame, this); + done = 0; out: - if (done) { - marker_setxattr_done (frame); - } + if (done) { + marker_setxattr_done(frame); + } - return 0; + return 0; } int32_t -marker_start_setxattr (call_frame_t *frame, xlator_t *this) +marker_start_setxattr(call_frame_t *frame, xlator_t *this) { - int32_t ret = -1; - dict_t *dict = NULL; - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + int32_t ret = -1; + dict_t *dict = NULL; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - priv = this->private; + priv = this->private; - local = (marker_local_t*) frame->local; + local = (marker_local_t *)frame->local; - if (!local) - goto out; + if (!local) + goto out; - dict = dict_new (); + dict = dict_new(); - if (!dict) - goto out; + if (!dict) + goto out; - if (local->loc.inode && gf_uuid_is_null (local->loc.gfid)) - gf_uuid_copy (local->loc.gfid, local->loc.inode->gfid); + if (local->loc.inode && gf_uuid_is_null(local->loc.gfid)) + gf_uuid_copy(local->loc.gfid, local->loc.inode->gfid); - GF_UUID_ASSERT (local->loc.gfid); + GF_UUID_ASSERT(local->loc.gfid); - ret = dict_set_static_bin (dict, priv->marker_xattr, - (void *)local->timebuf, 8); - if (ret) { - gf_log (this->name, GF_LOG_WARNING, - "failed to set marker xattr (%s)", local->loc.path); - goto out; - } + ret = dict_set_static_bin(dict, priv->marker_xattr, (void *)local->timebuf, + 8); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, "failed to set marker xattr (%s)", + local->loc.path); + goto out; + } - STACK_WIND (frame, marker_specific_setxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setxattr, &local->loc, dict, 0, - NULL); + STACK_WIND(frame, marker_specific_setxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, &local->loc, dict, 0, NULL); - ret = 0; + ret = 0; out: - if (dict) - dict_unref (dict); + if (dict) + dict_unref(dict); - return ret; + return ret; } void -marker_gettimeofday (marker_local_t *local) +marker_gettimeofday(marker_local_t *local) { - struct timeval tv = {0, }; + struct timeval tv = { + 0, + }; - gettimeofday (&tv, NULL); + gettimeofday(&tv, NULL); - local->timebuf [0] = htonl (tv.tv_sec); - local->timebuf [1] = htonl (tv.tv_usec); + local->timebuf[0] = htonl(tv.tv_sec); + local->timebuf[1] = htonl(tv.tv_usec); - return; + return; } int32_t -marker_create_frame (xlator_t *this, marker_local_t *local) +marker_create_frame(xlator_t *this, marker_local_t *local) { - call_frame_t *frame = NULL; + call_frame_t *frame = NULL; - frame = create_frame (this, this->ctx->pool); + frame = create_frame(this, this->ctx->pool); - if (!frame) - return -1; + if (!frame) + return -1; - frame->local = (void *) local; + frame->local = (void *)local; - marker_start_setxattr (frame, this); + marker_start_setxattr(frame, this); - return 0; + return 0; } int32_t -marker_xtime_update_marks (xlator_t *this, marker_local_t *local) +marker_xtime_update_marks(xlator_t *this, marker_local_t *local) { - marker_conf_t *priv = NULL; + marker_conf_t *priv = NULL; - GF_VALIDATE_OR_GOTO ("marker", this, out); - GF_VALIDATE_OR_GOTO (this->name, local, out); + GF_VALIDATE_OR_GOTO("marker", this, out); + GF_VALIDATE_OR_GOTO(this->name, local, out); - priv = this->private; + priv = this->private; - if ((local->pid == GF_CLIENT_PID_GSYNCD - && !(priv->feature_enabled & GF_XTIME_GSYNC_FORCE)) - || (local->pid == GF_CLIENT_PID_DEFRAG)) - goto out; + if ((local->pid == GF_CLIENT_PID_GSYNCD && + !(priv->feature_enabled & GF_XTIME_GSYNC_FORCE)) || + (local->pid == GF_CLIENT_PID_DEFRAG)) + goto out; - marker_gettimeofday (local); + marker_gettimeofday(local); - marker_local_ref (local); + marker_local_ref(local); - marker_create_frame (this, local); + marker_create_frame(this, local); out: - return 0; + return 0; } - int32_t -marker_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +marker_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - marker_conf_t *priv = NULL; - marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + quota_inode_ctx_t *ctx = NULL; - if (op_ret == -1) { - gf_log (this->name, GF_LOG_TRACE, "error occurred " - "while creating directory %s", strerror (op_errno)); - } + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, + "error occurred " + "while creating directory %s", + strerror(op_errno)); + } - local = (marker_local_t *) frame->local; + local = (marker_local_t *)frame->local; - frame->local = NULL; + frame->local = NULL; + priv = this->private; - STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, inode, - buf, preparent, postparent, xdata); + if (op_ret >= 0 && inode && (priv->feature_enabled & GF_QUOTA)) { + ctx = mq_inode_ctx_new(inode, this); + if (ctx == NULL) { + gf_log(this->name, GF_LOG_WARNING, + "mq_inode_ctx_new " + "failed for %s", + uuid_utoa(inode->gfid)); + op_ret = -1; + op_errno = ENOMEM; + } + } - if (op_ret == -1 || local == NULL) - goto out; + STACK_UNWIND_STRICT(mkdir, frame, op_ret, op_errno, inode, buf, preparent, + postparent, xdata); - if (gf_uuid_is_null (local->loc.gfid)) - gf_uuid_copy (local->loc.gfid, buf->ia_gfid); + if (op_ret == -1 || local == NULL) + goto out; - priv = this->private; + if (gf_uuid_is_null(local->loc.gfid)) + gf_uuid_copy(local->loc.gfid, buf->ia_gfid); - if (priv->feature_enabled & GF_QUOTA) - mq_create_xattrs_txn (this, &local->loc, NULL); + if (priv->feature_enabled & GF_QUOTA) + mq_create_xattrs_txn(this, &local->loc, NULL); - if (priv->feature_enabled & GF_XTIME) - marker_xtime_update_marks (this, local); + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); out: - marker_local_unref (local); + marker_local_unref(local); - return 0; + return 0; } int -marker_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, - mode_t umask, dict_t *xdata) +marker_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + mode_t umask, dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - priv = this->private; + priv = this->private; - if (priv->feature_enabled == 0) - goto wind; + if (priv->feature_enabled == 0) + goto wind; - local = mem_get0 (this->local_pool); + local = mem_get0(this->local_pool); - MARKER_INIT_LOCAL (frame, local); + MARKER_INIT_LOCAL(frame, local); - ret = loc_copy (&local->loc, loc); + ret = loc_copy(&local->loc, loc); - if (ret == -1) - goto err; + if (ret == -1) + goto err; wind: - STACK_WIND (frame, marker_mkdir_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata); + STACK_WIND(frame, marker_mkdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata); - return 0; + return 0; err: - MARKER_STACK_UNWIND (mkdir, frame, -1, ENOMEM, NULL, - NULL, NULL, NULL, NULL); + MARKER_STACK_UNWIND(mkdir, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, NULL); - return 0; + return 0; } - int32_t -marker_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +marker_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + quota_inode_ctx_t *ctx = NULL; - if (op_ret == -1) { - gf_log (this->name, GF_LOG_TRACE, "error occurred " - "while creating file %s", strerror (op_errno)); - } + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, + "error occurred " + "while creating file %s", + strerror(op_errno)); + } - local = (marker_local_t *) frame->local; + local = (marker_local_t *)frame->local; - frame->local = NULL; + frame->local = NULL; + priv = this->private; - STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf, - preparent, postparent, xdata); + if (op_ret >= 0 && inode && (priv->feature_enabled & GF_QUOTA)) { + ctx = mq_inode_ctx_new(inode, this); + if (ctx == NULL) { + gf_log(this->name, GF_LOG_WARNING, + "mq_inode_ctx_new " + "failed for %s", + uuid_utoa(inode->gfid)); + op_ret = -1; + op_errno = ENOMEM; + } + } - if (op_ret == -1 || local == NULL) - goto out; + STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, buf, + preparent, postparent, xdata); - if (gf_uuid_is_null (local->loc.gfid)) - gf_uuid_copy (local->loc.gfid, buf->ia_gfid); + if (op_ret == -1 || local == NULL) + goto out; - priv = this->private; + if (gf_uuid_is_null(local->loc.gfid)) + gf_uuid_copy(local->loc.gfid, buf->ia_gfid); - if (priv->feature_enabled & GF_QUOTA) - mq_create_xattrs_txn (this, &local->loc, buf); + if (priv->feature_enabled & GF_QUOTA) + mq_create_xattrs_txn(this, &local->loc, buf); - if (priv->feature_enabled & GF_XTIME) - marker_xtime_update_marks (this, local); + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); out: - marker_local_unref (local); + marker_local_unref(local); - return 0; + return 0; } int32_t -marker_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) +marker_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - priv = this->private; + priv = this->private; - if (priv->feature_enabled == 0) - goto wind; + if (priv->feature_enabled == 0) + goto wind; - local = mem_get0 (this->local_pool); + local = mem_get0(this->local_pool); - MARKER_INIT_LOCAL (frame, local); + MARKER_INIT_LOCAL(frame, local); - ret = loc_copy (&local->loc, loc); + ret = loc_copy(&local->loc, loc); - if (ret == -1) - goto err; + if (ret == -1) + goto err; wind: - STACK_WIND (frame, marker_create_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, - fd, xdata); - return 0; + STACK_WIND(frame, marker_create_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd, + xdata); + return 0; err: - MARKER_STACK_UNWIND (create, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, - NULL, NULL); + MARKER_STACK_UNWIND(create, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, NULL, + NULL); - return 0; + return 0; } - int32_t -marker_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) +marker_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - marker_conf_t *priv = NULL; - marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + marker_local_t *local = NULL; - if (op_ret == -1) { - gf_log (this->name, GF_LOG_TRACE, "error occurred " - "while write, %s", strerror (op_errno)); - } + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, + "error occurred " + "while write, %s", + strerror(op_errno)); + } - local = (marker_local_t *) frame->local; + local = (marker_local_t *)frame->local; - frame->local = NULL; + frame->local = NULL; - STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf, - xdata); + STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, prebuf, postbuf, + xdata); - if (op_ret == -1 || local == NULL) - goto out; + if (op_ret == -1 || local == NULL) + goto out; - priv = this->private; + priv = this->private; - if (priv->feature_enabled & GF_QUOTA) - mq_initiate_quota_txn (this, &local->loc, postbuf); + if (priv->feature_enabled & GF_QUOTA) + mq_initiate_quota_txn(this, &local->loc, postbuf); - if (priv->feature_enabled & GF_XTIME) - marker_xtime_update_marks (this, local); + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); out: - marker_local_unref (local); + marker_local_unref(local); - return 0; + return 0; } int32_t -marker_writev (call_frame_t *frame, - xlator_t *this, - fd_t *fd, - struct iovec *vector, - int32_t count, - off_t offset, uint32_t flags, - struct iobref *iobref, dict_t *xdata) +marker_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iovec *vector, int32_t count, off_t offset, uint32_t flags, + struct iobref *iobref, dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - priv = this->private; + priv = this->private; - if (priv->feature_enabled == 0) - goto wind; + if (priv->feature_enabled == 0) + goto wind; - local = mem_get0 (this->local_pool); + local = mem_get0(this->local_pool); - MARKER_INIT_LOCAL (frame, local); + MARKER_INIT_LOCAL(frame, local); - ret = marker_inode_loc_fill (fd->inode, &local->loc); + ret = marker_inode_loc_fill(fd->inode, &local->loc); - if (ret == -1) - goto err; + if (ret == -1) + goto err; wind: - STACK_WIND (frame, marker_writev_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->writev, fd, vector, count, offset, - flags, iobref, xdata); - return 0; + STACK_WIND(frame, marker_writev_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->writev, fd, vector, count, offset, + flags, iobref, xdata); + return 0; err: - MARKER_STACK_UNWIND (writev, frame, -1, ENOMEM, NULL, NULL, NULL); + MARKER_STACK_UNWIND(writev, frame, -1, ENOMEM, NULL, NULL, NULL); - return 0; + return 0; } - int32_t -marker_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +marker_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - marker_conf_t *priv = NULL; - marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + call_stub_t *stub = NULL; - if (op_ret == -1) { - gf_log (this->name, GF_LOG_TRACE, "error occurred " - "rmdir %s", strerror (op_errno)); - } + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, + "error occurred " + "rmdir %s", + strerror(op_errno)); + } - local = (marker_local_t *) frame->local; + local = (marker_local_t *)frame->local; - frame->local = NULL; + frame->local = NULL; + priv = this->private; - STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno, preparent, - postparent, xdata); + if (op_ret == -1 || local == NULL) + goto out; - if (op_ret == -1 || local == NULL) - goto out; + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); - priv = this->private; + if (priv->feature_enabled & GF_QUOTA) { + /* If a 'rm -rf' is performed by a client, rmdir can be faster + than marker background mq_reduce_parent_size_txn. + In this case, as part of rmdir parent child association + will be removed in the server protocol. + This can lead to mq_reduce_parent_size_txn failures. - if (priv->feature_enabled & GF_QUOTA) - mq_reduce_parent_size_txn (this, &local->loc, NULL, 1); + So perform mq_reduce_parent_size_txn in foreground + and unwind to server once txn is complete + */ + + stub = fop_rmdir_cbk_stub(frame, default_rmdir_cbk, op_ret, op_errno, + preparent, postparent, xdata); + mq_reduce_parent_size_txn(this, &local->loc, NULL, 1, stub); + + if (stub) { + marker_local_unref(local); + return 0; + } + } - if (priv->feature_enabled & GF_XTIME) - marker_xtime_update_marks (this, local); out: - marker_local_unref (local); + STACK_UNWIND_STRICT(rmdir, frame, op_ret, op_errno, preparent, postparent, + xdata); - return 0; + marker_local_unref(local); + + return 0; } int32_t -marker_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, - dict_t *xdata) +marker_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - priv = this->private; + priv = this->private; - if (priv->feature_enabled == 0) - goto wind; + if (priv->feature_enabled == 0) + goto wind; - local = mem_get0 (this->local_pool); + local = mem_get0(this->local_pool); - MARKER_INIT_LOCAL (frame, local); + MARKER_INIT_LOCAL(frame, local); - ret = loc_copy (&local->loc, loc); + ret = loc_copy(&local->loc, loc); - if (ret == -1) - goto err; + if (ret == -1) + goto err; wind: - STACK_WIND (frame, marker_rmdir_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rmdir, loc, flags, xdata); - return 0; + STACK_WIND(frame, marker_rmdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rmdir, loc, flags, xdata); + return 0; err: - MARKER_STACK_UNWIND (rmdir, frame, -1, ENOMEM, NULL, NULL, NULL); + MARKER_STACK_UNWIND(rmdir, frame, -1, ENOMEM, NULL, NULL, NULL); - return 0; + return 0; } - int32_t -marker_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +marker_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - marker_conf_t *priv = NULL; - marker_local_t *local = NULL; - uint32_t nlink = -1; - int32_t ret = 0; + marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + uint32_t nlink = -1; + GF_UNUSED int32_t ret = 0; + call_stub_t *stub = NULL; - if (op_ret == -1) { - gf_log (this->name, GF_LOG_TRACE, - "%s occurred in unlink", strerror (op_errno)); - } + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, "%s occurred in unlink", + strerror(op_errno)); + } - local = (marker_local_t *) frame->local; + local = (marker_local_t *)frame->local; - frame->local = NULL; + frame->local = NULL; + priv = this->private; - STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno, preparent, - postparent, xdata); + if (op_ret == -1 || local == NULL) + goto out; - if (op_ret == -1 || local == NULL) - goto out; + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); - priv = this->private; + if (priv->feature_enabled & GF_QUOTA) { + if (local->skip_txn) + goto out; - if (priv->feature_enabled & GF_QUOTA) { - if (!local->skip_txn) { - if (xdata) - ret = dict_get_uint32 (xdata, - GF_RESPONSE_LINK_COUNT_XDATA, &nlink); - - mq_reduce_parent_size_txn (this, &local->loc, NULL, - nlink); - } + if (xdata) { + ret = dict_get_uint32(xdata, GF_RESPONSE_LINK_COUNT_XDATA, &nlink); + if (ret) { + gf_log(this->name, GF_LOG_TRACE, "dict get failed %s ", + strerror(-ret)); + } + } + + /* If a 'rm -rf' is performed by a client, unlink can be faster + than marker background mq_reduce_parent_size_txn. + In this case, as part of unlink parent child association + will be removed in the server protocol. + This can lead to mq_reduce_parent_size_txn failures. + + So perform mq_reduce_parent_size_txn in foreground + and unwind to server once txn is complete + */ + + stub = fop_unlink_cbk_stub(frame, default_unlink_cbk, op_ret, op_errno, + preparent, postparent, xdata); + mq_reduce_parent_size_txn(this, &local->loc, NULL, nlink, stub); + + if (stub) { + marker_local_unref(local); + return 0; } + } - if (priv->feature_enabled & GF_XTIME) - marker_xtime_update_marks (this, local); out: - marker_local_unref (local); + STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, preparent, postparent, + xdata); - return 0; -} + marker_local_unref(local); + return 0; +} int32_t -marker_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, - dict_t *xdata) +marker_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, + dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; - gf_boolean_t dict_free = _gf_false; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + gf_boolean_t dict_free = _gf_false; - priv = this->private; + priv = this->private; - if (priv->feature_enabled == 0) - goto unlink_wind; + if (priv->feature_enabled == 0) + goto unlink_wind; - local = mem_get0 (this->local_pool); - local->xflag = xflag; - if (xdata) - local->xdata = dict_ref (xdata); - MARKER_INIT_LOCAL (frame, local); + local = mem_get0(this->local_pool); + local->xflag = xflag; + if (xdata) + local->xdata = dict_ref(xdata); + MARKER_INIT_LOCAL(frame, local); - ret = loc_copy (&local->loc, loc); + ret = loc_copy(&local->loc, loc); - if (ret == -1) - goto err; + if (ret == -1) + goto err; - if (xdata && dict_get (xdata, GLUSTERFS_MARKER_DONT_ACCOUNT_KEY)) { - local->skip_txn = 1; - goto unlink_wind; - } + if (xdata && dict_get(xdata, GLUSTERFS_MARKER_DONT_ACCOUNT_KEY)) { + local->skip_txn = 1; + goto unlink_wind; + } - if (xdata == NULL) { - xdata = dict_new (); - dict_free = _gf_true; - } + if (xdata == NULL) { + xdata = dict_new(); + dict_free = _gf_true; + } - ret = dict_set_int32 (xdata, GF_REQUEST_LINK_COUNT_XDATA, 1); - if (ret < 0) - goto err; + ret = dict_set_int32(xdata, GF_REQUEST_LINK_COUNT_XDATA, 1); + if (ret < 0) + goto err; unlink_wind: - STACK_WIND (frame, marker_unlink_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); - goto out; + STACK_WIND(frame, marker_unlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); + goto out; err: - MARKER_STACK_UNWIND (unlink, frame, -1, ENOMEM, NULL, NULL, NULL); + MARKER_STACK_UNWIND(unlink, frame, -1, ENOMEM, NULL, NULL, NULL); out: - if (dict_free) - dict_unref (xdata); - return 0; + if (dict_free) + dict_unref(xdata); + return 0; } - int32_t -marker_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +marker_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_TRACE, "%s occurred while " - "linking a file ", strerror (op_errno)); - } + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - local = (marker_local_t *) frame->local; + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, + "%s occurred while " + "linking a file ", + strerror(op_errno)); + } - frame->local = NULL; + local = (marker_local_t *)frame->local; - STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, buf, - preparent, postparent, xdata); + frame->local = NULL; - if (op_ret == -1 || local == NULL) - goto out; + STACK_UNWIND_STRICT(link, frame, op_ret, op_errno, inode, buf, preparent, + postparent, xdata); - priv = this->private; + if (op_ret == -1 || local == NULL) + goto out; - if (priv->feature_enabled & GF_QUOTA) { - if (!local->skip_txn) - mq_create_xattrs_txn (this, &local->loc, buf); - } + priv = this->private; + if (priv->feature_enabled & GF_QUOTA) { + if (!local->skip_txn) + mq_create_xattrs_txn(this, &local->loc, buf); + } - if (priv->feature_enabled & GF_XTIME) - marker_xtime_update_marks (this, local); + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); out: - marker_local_unref (local); + marker_local_unref(local); - return 0; + return 0; } int32_t -marker_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, - dict_t *xdata) +marker_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - priv = this->private; + priv = this->private; - if (priv->feature_enabled == 0) - goto wind; + if (priv->feature_enabled == 0) + goto wind; - local = mem_get0 (this->local_pool); + local = mem_get0(this->local_pool); - MARKER_INIT_LOCAL (frame, local); + MARKER_INIT_LOCAL(frame, local); - ret = loc_copy (&local->loc, newloc); + ret = loc_copy(&local->loc, newloc); - if (ret == -1) - goto err; + if (ret == -1) + goto err; - if (xdata && dict_get (xdata, GLUSTERFS_MARKER_DONT_ACCOUNT_KEY)) - local->skip_txn = 1; + if (xdata && dict_get(xdata, GLUSTERFS_MARKER_DONT_ACCOUNT_KEY)) + local->skip_txn = 1; wind: - STACK_WIND (frame, marker_link_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata); - return 0; + STACK_WIND(frame, marker_link_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata); + return 0; err: - MARKER_STACK_UNWIND (link, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, - NULL); + MARKER_STACK_UNWIND(link, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, NULL); - return 0; + return 0; } - int32_t -marker_rename_done (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +marker_rename_done(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - marker_local_t *local = NULL, *oplocal = NULL; - loc_t newloc = {0, }; - marker_conf_t *priv = NULL; - - local = frame->local; - oplocal = local->oplocal; - - priv = this->private; - - frame->local = NULL; - - if (op_ret < 0) { - gf_log (this->name, GF_LOG_WARNING, - "inodelk (UNLOCK) failed on path:%s (gfid:%s) (%s)", - oplocal->parent_loc.path, - uuid_utoa (oplocal->parent_loc.inode->gfid), - strerror (op_errno)); - } + marker_local_t *local = NULL, *oplocal = NULL; + loc_t newloc = { + 0, + }; + marker_conf_t *priv = NULL; - if (local->err != 0) - goto err; + local = frame->local; + oplocal = local->oplocal; - mq_reduce_parent_size_txn (this, &oplocal->loc, &oplocal->contribution, - -1); + priv = this->private; - if (local->loc.inode != NULL) { - /* If destination file exits before rename, it would have - * been unlinked while renaming a file - */ - mq_reduce_parent_size_txn (this, &local->loc, NULL, - local->ia_nlink); - } + frame->local = NULL; - newloc.inode = inode_ref (oplocal->loc.inode); - newloc.path = gf_strdup (local->loc.path); - newloc.name = strrchr (newloc.path, '/'); - if (newloc.name) - newloc.name++; - newloc.parent = inode_ref (local->loc.parent); + if (op_ret < 0) { + gf_log(this->name, GF_LOG_WARNING, + "inodelk (UNLOCK) failed on path:%s (gfid:%s) (%s)", + oplocal->parent_loc.path, + uuid_utoa(oplocal->parent_loc.inode->gfid), strerror(op_errno)); + } - mq_create_xattrs_txn (this, &newloc, &local->buf); + if (local->err != 0) + goto err; - loc_wipe (&newloc); + mq_reduce_parent_size_txn(this, &oplocal->loc, &oplocal->contribution, -1, + NULL); - if (priv->feature_enabled & GF_XTIME) { - //update marks on oldpath - gf_uuid_copy (local->loc.gfid, oplocal->loc.inode->gfid); - marker_xtime_update_marks (this, oplocal); - marker_xtime_update_marks (this, local); - } + if (local->loc.inode != NULL) { + /* If destination file exits before rename, it would have + * been unlinked while renaming a file + */ + mq_reduce_parent_size_txn(this, &local->loc, NULL, local->ia_nlink, + NULL); + } + + newloc.inode = inode_ref(oplocal->loc.inode); + newloc.path = gf_strdup(local->loc.path); + newloc.name = strrchr(newloc.path, '/'); + if (newloc.name) + newloc.name++; + newloc.parent = inode_ref(local->loc.parent); + + mq_create_xattrs_txn(this, &newloc, &local->buf); + + loc_wipe(&newloc); + + if (priv->feature_enabled & GF_XTIME) { + if (!local->loc.inode) + local->loc.inode = inode_ref(oplocal->loc.inode); + // update marks on oldpath + gf_uuid_copy(local->loc.gfid, oplocal->loc.inode->gfid); + marker_xtime_update_marks(this, oplocal); + marker_xtime_update_marks(this, local); + } err: - marker_local_unref (local); - marker_local_unref (oplocal); + marker_local_unref(local); + marker_local_unref(oplocal); - return 0; + return 0; } - void -marker_rename_release_oldp_lock (marker_local_t *local, xlator_t *this) +marker_rename_release_oldp_lock(marker_local_t *local, xlator_t *this) { - marker_local_t *oplocal = NULL; - call_frame_t *lk_frame = NULL; - struct gf_flock lock = {0, }; + marker_local_t *oplocal = NULL; + call_frame_t *lk_frame = NULL; + struct gf_flock lock = { + 0, + }; - oplocal = local->oplocal; - lk_frame = local->lk_frame; + oplocal = local->oplocal; + lk_frame = local->lk_frame; - if (lk_frame == NULL) - goto err; + if (lk_frame == NULL) + goto err; - lock.l_type = F_UNLCK; - lock.l_whence = SEEK_SET; - lock.l_start = 0; - lock.l_len = 0; - lock.l_pid = 0; + lock.l_type = F_UNLCK; + lock.l_whence = SEEK_SET; + lock.l_start = 0; + lock.l_len = 0; + lock.l_pid = 0; - STACK_WIND (lk_frame, - marker_rename_done, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->inodelk, - this->name, &oplocal->parent_loc, F_SETLKW, &lock, NULL); + STACK_WIND(lk_frame, marker_rename_done, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->inodelk, this->name, + &oplocal->parent_loc, F_SETLKW, &lock, NULL); - return; + return; err: - marker_local_unref (local); - marker_local_unref (oplocal); + marker_local_unref(local); + marker_local_unref(oplocal); } - int32_t -marker_rename_unwind (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +marker_rename_unwind(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - marker_local_t *local = NULL; - marker_local_t *oplocal = NULL; - quota_inode_ctx_t *ctx = NULL; - inode_contribution_t *contri = NULL; + marker_local_t *local = NULL; + marker_local_t *oplocal = NULL; + quota_inode_ctx_t *ctx = NULL; + inode_contribution_t *contri = NULL; - local = frame->local; - oplocal = local->oplocal; - frame->local = NULL; + local = frame->local; + oplocal = local->oplocal; + frame->local = NULL; - //Reset frame uid and gid if set. - if (cookie == (void *) _GF_UID_GID_CHANGED) - MARKER_RESET_UID_GID (frame, frame->root, local); - - if (op_ret < 0) - local->err = op_errno ? op_errno : EINVAL; - - if (local->stub != NULL) { - /* Remove contribution node from in-memory even if - * remove-xattr has failed as the rename is already performed - * if local->stub is set, which means rename was sucessful - */ - mq_inode_ctx_get (oplocal->loc.inode, this, &ctx); - if (ctx) { - contri = mq_get_contribution_node (oplocal->loc.parent, - ctx); - if (contri) { - QUOTA_FREE_CONTRIBUTION_NODE (ctx, contri); - GF_REF_PUT (contri); - } - } - - call_resume (local->stub); - local->stub = NULL; - local->err = 0; - } else if (local->err != 0) { - STACK_UNWIND_STRICT (rename, frame, -1, local->err, NULL, NULL, - NULL, NULL, NULL, NULL); - } else { - gf_log (this->name, GF_LOG_CRITICAL, - "continuation stub to unwind the call is absent, hence " - "call will be hung (call-stack id = %"PRIu64")", - frame->root->unique); - } + // Reset frame uid and gid if set. + if (cookie == (void *)_GF_UID_GID_CHANGED) + MARKER_RESET_UID_GID(frame, frame->root, local); - /* If there are in-progress writes on old-path when during rename - * operation, update txn will update the wrong path if lock - * is released before rename unwind. - * So release lock only after rename unwind - */ - marker_rename_release_oldp_lock (local, this); + if (op_ret < 0) + local->err = op_errno ? op_errno : EINVAL; - return 0; + if (local->stub != NULL) { + /* Remove contribution node from in-memory even if + * remove-xattr has failed as the rename is already performed + * if local->stub is set, which means rename was successful + */ + (void)mq_inode_ctx_get(oplocal->loc.inode, this, &ctx); + if (ctx) { + contri = mq_get_contribution_node(oplocal->loc.parent, ctx); + if (contri) { + QUOTA_FREE_CONTRIBUTION_NODE(ctx, contri); + GF_REF_PUT(contri); + } + } + + call_resume(local->stub); + local->stub = NULL; + local->err = 0; + } else if (local->err != 0) { + STACK_UNWIND_STRICT(rename, frame, -1, local->err, NULL, NULL, NULL, + NULL, NULL, NULL); + } else { + gf_log(this->name, GF_LOG_CRITICAL, + "continuation stub to unwind the call is absent, hence " + "call will be hung (call-stack id = %" PRIu64 ")", + frame->root->unique); + } + + /* If there are in-progress writes on old-path when during rename + * operation, update txn will update the wrong path if lock + * is released before rename unwind. + * So release lock only after rename unwind + */ + marker_rename_release_oldp_lock(local, this); + + return 0; } - int32_t -marker_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, - struct iatt *preoldparent, struct iatt *postoldparent, - struct iatt *prenewparent, struct iatt *postnewparent, - dict_t *xdata) -{ - marker_conf_t *priv = NULL; - marker_local_t *local = NULL; - marker_local_t *oplocal = NULL; - call_stub_t *stub = NULL; - int32_t ret = 0; - char contri_key[QUOTA_KEY_MAX] = {0, }; - loc_t newloc = {0, }; - - local = (marker_local_t *) frame->local; +marker_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + struct iatt *preoldparent, struct iatt *postoldparent, + struct iatt *prenewparent, struct iatt *postnewparent, + dict_t *xdata) +{ + marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + marker_local_t *oplocal = NULL; + call_stub_t *stub = NULL; + int32_t ret = 0; + char contri_key[QUOTA_KEY_MAX] = { + 0, + }; + loc_t newloc = { + 0, + }; + + local = (marker_local_t *)frame->local; + + if (local != NULL) { + oplocal = local->oplocal; + } + priv = this->private; + + if (op_ret < 0) { if (local != NULL) { - oplocal = local->oplocal; + local->err = op_errno; } - priv = this->private; - - if (op_ret < 0) { - if (local != NULL) { - local->err = op_errno; - } + gf_log(this->name, GF_LOG_TRACE, + "%s occurred while " + "renaming a file ", + strerror(op_errno)); + } - gf_log (this->name, GF_LOG_TRACE, "%s occurred while " - "renaming a file ", strerror (op_errno)); + if (priv->feature_enabled & GF_QUOTA) { + if ((op_ret < 0) || (local == NULL)) { + goto quota_err; } - if (priv->feature_enabled & GF_QUOTA) { - if ((op_ret < 0) || (local == NULL)) { - goto quota_err; - } - - local->ia_nlink = 0; - if (xdata) - ret = dict_get_uint32 (xdata, - GF_RESPONSE_LINK_COUNT_XDATA, - &local->ia_nlink); - - local->buf = *buf; - stub = fop_rename_cbk_stub (frame, default_rename_cbk, op_ret, - op_errno, buf, preoldparent, - postoldparent, prenewparent, - postnewparent, xdata); - if (stub == NULL) { - local->err = ENOMEM; - goto quota_err; - } - - local->stub = stub; - - GET_CONTRI_KEY (this, contri_key, oplocal->loc.parent->gfid, - ret); - if (ret < 0) { - local->err = ENOMEM; - goto quota_err; - } - - /* Removexattr requires uid and gid to be 0, - * reset them in the callback. - */ - MARKER_SET_UID_GID (frame, local, frame->root); - - newloc.inode = inode_ref (oplocal->loc.inode); - newloc.path = gf_strdup (local->loc.path); - newloc.name = strrchr (newloc.path, '/'); - if (newloc.name) - newloc.name++; - newloc.parent = inode_ref (local->loc.parent); - gf_uuid_copy (newloc.gfid, oplocal->loc.inode->gfid); - - STACK_WIND_COOKIE (frame, marker_rename_unwind, - frame->cookie, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->removexattr, - &newloc, contri_key, NULL); - - loc_wipe (&newloc); - } else { - frame->local = NULL; - - STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, buf, - preoldparent, postoldparent, prenewparent, - postnewparent, xdata); - - if ((op_ret < 0) || (local == NULL)) { - goto out; - } - - if (priv->feature_enabled & GF_XTIME) { - //update marks on oldpath - gf_uuid_copy (local->loc.gfid, oplocal->loc.inode->gfid); - marker_xtime_update_marks (this, oplocal); - marker_xtime_update_marks (this, local); - } + local->ia_nlink = 0; + if (xdata) + ret = dict_get_uint32(xdata, GF_RESPONSE_LINK_COUNT_XDATA, + &local->ia_nlink); + + local->buf = *buf; + stub = fop_rename_cbk_stub(frame, default_rename_cbk, op_ret, op_errno, + buf, preoldparent, postoldparent, + prenewparent, postnewparent, xdata); + if (stub == NULL) { + local->err = ENOMEM; + goto quota_err; } -out: - if (!(priv->feature_enabled & GF_QUOTA)) { - marker_local_unref (local); - marker_local_unref (oplocal); + local->stub = stub; + + GET_CONTRI_KEY(this, contri_key, oplocal->loc.parent->gfid, ret); + if (ret < 0) { + local->err = ENOMEM; + goto quota_err; } - return 0; + /* Removexattr requires uid and gid to be 0, + * reset them in the callback. + */ + MARKER_SET_UID_GID(frame, local, frame->root); -quota_err: - marker_rename_unwind (frame, NULL, this, 0, 0, NULL); - return 0; -} + newloc.inode = inode_ref(oplocal->loc.inode); + newloc.path = gf_strdup(local->loc.path); + newloc.name = strrchr(newloc.path, '/'); + if (newloc.name) + newloc.name++; + newloc.parent = inode_ref(local->loc.parent); + gf_uuid_copy(newloc.gfid, oplocal->loc.inode->gfid); + STACK_WIND_COOKIE( + frame, marker_rename_unwind, frame->cookie, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, &newloc, contri_key, NULL); -int32_t -marker_do_rename (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata) -{ - marker_local_t *local = NULL; - marker_local_t *oplocal = NULL; - char contri_key[QUOTA_KEY_MAX] = {0, }; - int32_t ret = 0; - quota_meta_t contribution = {0, }; + loc_wipe(&newloc); + } else { + frame->local = NULL; - local = frame->local; - oplocal = local->oplocal; + STACK_UNWIND_STRICT(rename, frame, op_ret, op_errno, buf, preoldparent, + postoldparent, prenewparent, postnewparent, xdata); - //Reset frame uid and gid if set. - if (cookie == (void *) _GF_UID_GID_CHANGED) - MARKER_RESET_UID_GID (frame, frame->root, local); - - if ((op_ret < 0) && (op_errno != ENOATTR) && (op_errno != ENODATA)) { - local->err = op_errno ? op_errno : EINVAL; - gf_log (this->name, GF_LOG_WARNING, - "fetching contribution values from %s (gfid:%s) " - "failed (%s)", oplocal->loc.path, - uuid_utoa (oplocal->loc.inode->gfid), - strerror (op_errno)); - goto err; + if ((op_ret < 0) || (local == NULL)) { + goto out; } - GET_CONTRI_KEY (this, contri_key, oplocal->loc.parent->gfid, ret); - if (ret < 0) { - local->err = errno ? errno : ENOMEM; - goto err; + if (priv->feature_enabled & GF_XTIME) { + // update marks on oldpath + if (!local->loc.inode) + local->loc.inode = inode_ref(oplocal->loc.inode); + gf_uuid_copy(local->loc.gfid, oplocal->loc.inode->gfid); + marker_xtime_update_marks(this, oplocal); + marker_xtime_update_marks(this, local); } - quota_dict_get_meta (dict, contri_key, &contribution); - oplocal->contribution = contribution; + } - STACK_WIND (frame, marker_rename_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rename, &oplocal->loc, - &local->loc, local->xdata); +out: + if (!(priv->feature_enabled & GF_QUOTA)) { + marker_local_unref(local); + marker_local_unref(oplocal); + } - return 0; + return 0; -err: - marker_rename_unwind (frame, NULL, this, 0, 0, NULL); - return 0; +quota_err: + marker_rename_unwind(frame, NULL, this, 0, 0, NULL); + return 0; } int32_t -marker_get_oldpath_contribution (call_frame_t *lk_frame, void *cookie, - xlator_t *this, int32_t op_ret, - int32_t op_errno, dict_t *xdata) -{ - call_frame_t *frame = NULL; - marker_local_t *local = NULL; - marker_local_t *oplocal = NULL; - char contri_key[QUOTA_KEY_MAX] = {0, }; - int32_t ret = 0; +marker_do_rename(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata) +{ + marker_local_t *local = NULL; + marker_local_t *oplocal = NULL; + char contri_key[QUOTA_KEY_MAX] = { + 0, + }; + int keylen = 0; + quota_meta_t contribution = { + 0, + }; + + local = frame->local; + oplocal = local->oplocal; + + // Reset frame uid and gid if set. + if (cookie == (void *)_GF_UID_GID_CHANGED) + MARKER_RESET_UID_GID(frame, frame->root, local); + + if ((op_ret < 0) && (op_errno != ENOATTR) && (op_errno != ENODATA)) { + local->err = op_errno ? op_errno : EINVAL; + gf_log(this->name, GF_LOG_WARNING, + "fetching contribution values from %s (gfid:%s) " + "failed (%s)", + oplocal->loc.path, uuid_utoa(oplocal->loc.inode->gfid), + strerror(op_errno)); + goto err; + } + + GET_CONTRI_KEY(this, contri_key, oplocal->loc.parent->gfid, keylen); + if (keylen < 0) { + local->err = errno ? errno : ENOMEM; + goto err; + } + quota_dict_get_meta(dict, contri_key, keylen, &contribution); + oplocal->contribution = contribution; + + STACK_WIND(frame, marker_rename_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rename, &oplocal->loc, &local->loc, + local->xdata); + + return 0; - local = lk_frame->local; - oplocal = local->oplocal; - frame = local->frame; - - if (op_ret < 0) { - local->err = op_errno ? op_errno : EINVAL; - gf_log (this->name, GF_LOG_WARNING, - "cannot hold inodelk on %s (gfid:%s) (%s)", - oplocal->loc.path, uuid_utoa (oplocal->loc.inode->gfid), - strerror (op_errno)); - goto err; +err: + marker_rename_unwind(frame, NULL, this, 0, 0, NULL); + return 0; +} - STACK_DESTROY (local->lk_frame->root); - local->lk_frame = NULL; +int32_t +marker_get_oldpath_contribution(call_frame_t *lk_frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *xdata) +{ + call_frame_t *frame = NULL; + marker_local_t *local = NULL; + marker_local_t *oplocal = NULL; + char contri_key[QUOTA_KEY_MAX] = { + 0, + }; + int32_t ret = 0; + + local = lk_frame->local; + oplocal = local->oplocal; + frame = local->frame; + + if (op_ret < 0) { + local->err = op_errno ? op_errno : EINVAL; + gf_log(this->name, GF_LOG_WARNING, + "cannot hold inodelk on %s (gfid:%s) (%s)", oplocal->loc.path, + uuid_utoa(oplocal->loc.inode->gfid), strerror(op_errno)); + if (local->lk_frame) { + STACK_DESTROY(local->lk_frame->root); + local->lk_frame = NULL; } + goto err; + } - GET_CONTRI_KEY (this, contri_key, oplocal->loc.parent->gfid, ret); - if (ret < 0) { - local->err = errno ? errno : ENOMEM; - goto err; - } + GET_CONTRI_KEY(this, contri_key, oplocal->loc.parent->gfid, ret); + if (ret < 0) { + local->err = errno ? errno : ENOMEM; + goto err; + } - /* getxattr requires uid and gid to be 0, - * reset them in the callback. - */ - MARKER_SET_UID_GID (frame, local, frame->root); + /* getxattr requires uid and gid to be 0, + * reset them in the callback. + */ + MARKER_SET_UID_GID(frame, local, frame->root); - if (gf_uuid_is_null (oplocal->loc.gfid)) - gf_uuid_copy (oplocal->loc.gfid, - oplocal->loc.inode->gfid); + if (gf_uuid_is_null(oplocal->loc.gfid)) + gf_uuid_copy(oplocal->loc.gfid, oplocal->loc.inode->gfid); - GF_UUID_ASSERT (oplocal->loc.gfid); + GF_UUID_ASSERT(oplocal->loc.gfid); - STACK_WIND_COOKIE (frame, marker_do_rename, - frame->cookie, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->getxattr, - &oplocal->loc, contri_key, NULL); + STACK_WIND_COOKIE(frame, marker_do_rename, frame->cookie, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->getxattr, &oplocal->loc, + contri_key, NULL); - return 0; + return 0; err: - marker_rename_unwind (frame, NULL, this, 0, 0, NULL); - return 0; + marker_rename_unwind(frame, NULL, this, 0, 0, NULL); + return 0; } - /* For a marker_rename FOP, following is the algorithm used for Quota * accounting. The use-case considered is: * 1. rename (src, dst) @@ -1554,7 +1629,7 @@ err: * b) we should subtract from src-parent exactly what we contributed to * src-parent * So, We hold a lock on src-parent to block any parallel transcations on - * src-inode (since thats the one which survives rename). + * src-inode (since that's the one which survives rename). * * If there are any parallel transactions on dst-inode they keep succeeding * till the association of dst-inode with dst-parent is broken because of an @@ -1616,589 +1691,631 @@ err: * 10) create contribution xattr to dst-parent on src-inode. */ int32_t -marker_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, - loc_t *newloc, dict_t *xdata) +marker_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; - marker_local_t *oplocal = NULL; - marker_conf_t *priv = NULL; - struct gf_flock lock = {0, }; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_local_t *oplocal = NULL; + marker_conf_t *priv = NULL; + struct gf_flock lock = { + 0, + }; - priv = this->private; + priv = this->private; - if (priv->feature_enabled == 0) - goto rename_wind; + if (priv->feature_enabled == 0) + goto rename_wind; - local = mem_get0 (this->local_pool); + local = mem_get0(this->local_pool); - MARKER_INIT_LOCAL (frame, local); + MARKER_INIT_LOCAL(frame, local); - oplocal = mem_get0 (this->local_pool); + oplocal = mem_get0(this->local_pool); - MARKER_INIT_LOCAL (frame, oplocal); + MARKER_INIT_LOCAL(frame, oplocal); - frame->local = local; + frame->local = local; - local->oplocal = marker_local_ref (oplocal); + local->oplocal = marker_local_ref(oplocal); - ret = loc_copy (&local->loc, newloc); - if (ret < 0) - goto err; + ret = loc_copy(&local->loc, newloc); + if (ret < 0) + goto err; - ret = loc_copy (&oplocal->loc, oldloc); - if (ret < 0) - goto err; + ret = loc_copy(&oplocal->loc, oldloc); + if (ret < 0) + goto err; - if (!(priv->feature_enabled & GF_QUOTA)) { - goto rename_wind; - } + if (!(priv->feature_enabled & GF_QUOTA)) { + goto rename_wind; + } - ret = mq_inode_loc_fill (NULL, newloc->parent, &local->parent_loc); - if (ret < 0) - goto err; + ret = mq_inode_loc_fill(NULL, newloc->parent, &local->parent_loc); + if (ret < 0) + goto err; - ret = mq_inode_loc_fill (NULL, oldloc->parent, &oplocal->parent_loc); - if (ret < 0) - goto err; + ret = mq_inode_loc_fill(NULL, oldloc->parent, &oplocal->parent_loc); + if (ret < 0) + goto err; - lock.l_len = 0; - lock.l_start = 0; - lock.l_type = F_WRLCK; - lock.l_whence = SEEK_SET; + lock.l_len = 0; + lock.l_start = 0; + lock.l_type = F_WRLCK; + lock.l_whence = SEEK_SET; - local->xdata = xdata ? dict_ref (xdata) : dict_new (); - ret = dict_set_int32 (local->xdata, GF_REQUEST_LINK_COUNT_XDATA, 1); - if (ret < 0) - goto err; + local->xdata = xdata ? dict_ref(xdata) : dict_new(); + ret = dict_set_int32(local->xdata, GF_REQUEST_LINK_COUNT_XDATA, 1); + if (ret < 0) + goto err; - local->frame = frame; - local->lk_frame = create_frame (this, this->ctx->pool); - if (local->lk_frame == NULL) - goto err; + local->frame = frame; + local->lk_frame = create_frame(this, this->ctx->pool); + if (local->lk_frame == NULL) + goto err; - local->lk_frame->root->uid = 0; - local->lk_frame->root->gid = 0; - local->lk_frame->local = local; - set_lk_owner_from_ptr (&local->lk_frame->root->lk_owner, - local->lk_frame->root); + local->lk_frame->root->uid = 0; + local->lk_frame->root->gid = 0; + local->lk_frame->local = local; + set_lk_owner_from_ptr(&local->lk_frame->root->lk_owner, + local->lk_frame->root); - STACK_WIND (local->lk_frame, - marker_get_oldpath_contribution, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->inodelk, - this->name, &oplocal->parent_loc, - F_SETLKW, &lock, NULL); + STACK_WIND(local->lk_frame, marker_get_oldpath_contribution, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->inodelk, this->name, + &oplocal->parent_loc, F_SETLKW, &lock, NULL); - return 0; + return 0; rename_wind: - STACK_WIND (frame, marker_rename_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); + STACK_WIND(frame, marker_rename_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); - return 0; + return 0; err: - MARKER_STACK_UNWIND (rename, frame, -1, ENOMEM, NULL, - NULL, NULL, NULL, NULL, NULL); - marker_local_unref (oplocal); + MARKER_STACK_UNWIND(rename, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, NULL, + NULL); + marker_local_unref(oplocal); - return 0; + return 0; } - int32_t -marker_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) +marker_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - if (op_ret == -1) { - gf_log (this->name, GF_LOG_TRACE, "%s occurred while " - "truncating a file ", strerror (op_errno)); - } + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, + "%s occurred while " + "truncating a file ", + strerror(op_errno)); + } - local = (marker_local_t *) frame->local; + local = (marker_local_t *)frame->local; - frame->local = NULL; + frame->local = NULL; - STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf, - postbuf, xdata); - - if (op_ret == -1 || local == NULL) - goto out; + STACK_UNWIND_STRICT(truncate, frame, op_ret, op_errno, prebuf, postbuf, + xdata); - priv = this->private; + if (op_ret == -1 || local == NULL) + goto out; - if (priv->feature_enabled & GF_QUOTA) - mq_initiate_quota_txn (this, &local->loc, postbuf); + priv = this->private; + + if (priv->feature_enabled & GF_QUOTA) { + /* DHT Rebalance process, at the end of migration will + * first make the src file as a linkto file and then + * truncate the file. By doing a truncate after making the + * src file as linkto file, the contri which is already + * accounted is left over. + * So, we need to account for the linkto file when a truncate + * happens, thereby updating the contri properly. + * By passing NULL for postbuf, mq_prevalidate does not check + * for linkto file. + * Same happens with ftruncate as well. + */ + if (postbuf && IS_DHT_LINKFILE_MODE(postbuf)) + mq_initiate_quota_txn(this, &local->loc, NULL); + else + mq_initiate_quota_txn(this, &local->loc, postbuf); + } - if (priv->feature_enabled & GF_XTIME) - marker_xtime_update_marks (this, local); + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); out: - marker_local_unref (local); + marker_local_unref(local); - return 0; + return 0; } int32_t -marker_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, - dict_t *xdata) +marker_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - priv = this->private; + priv = this->private; - if (priv->feature_enabled == 0) - goto wind; + if (priv->feature_enabled == 0) + goto wind; - local = mem_get0 (this->local_pool); + local = mem_get0(this->local_pool); - MARKER_INIT_LOCAL (frame, local); + MARKER_INIT_LOCAL(frame, local); - ret = loc_copy (&local->loc, loc); + ret = loc_copy(&local->loc, loc); - if (ret == -1) - goto err; + if (ret == -1) + goto err; wind: - STACK_WIND (frame, marker_truncate_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); - return 0; + STACK_WIND(frame, marker_truncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); + return 0; err: - MARKER_STACK_UNWIND (truncate, frame, -1, ENOMEM, NULL, NULL, NULL); + MARKER_STACK_UNWIND(truncate, frame, -1, ENOMEM, NULL, NULL, NULL); - return 0; + return 0; } - int32_t -marker_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) +marker_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - if (op_ret == -1) { - gf_log (this->name, GF_LOG_TRACE, "%s occurred while " - "truncating a file ", strerror (op_errno)); - } + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, + "%s occurred while " + "truncating a file ", + strerror(op_errno)); + } - local = (marker_local_t *) frame->local; + local = (marker_local_t *)frame->local; - frame->local = NULL; + frame->local = NULL; - STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, prebuf, - postbuf, xdata); + STACK_UNWIND_STRICT(ftruncate, frame, op_ret, op_errno, prebuf, postbuf, + xdata); - if (op_ret == -1 || local == NULL) - goto out; + if (op_ret == -1 || local == NULL) + goto out; - priv = this->private; + priv = this->private; - if (priv->feature_enabled & GF_QUOTA) - mq_initiate_quota_txn (this, &local->loc, postbuf); + if (priv->feature_enabled & GF_QUOTA) { + if (postbuf && IS_DHT_LINKFILE_MODE(postbuf)) + mq_initiate_quota_txn(this, &local->loc, NULL); + else + mq_initiate_quota_txn(this, &local->loc, postbuf); + } - if (priv->feature_enabled & GF_XTIME) - marker_xtime_update_marks (this, local); + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); out: - marker_local_unref (local); + marker_local_unref(local); - return 0; + return 0; } int32_t -marker_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, - dict_t *xdata) +marker_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - priv = this->private; + priv = this->private; - if (priv->feature_enabled == 0) - goto wind; + if (priv->feature_enabled == 0) + goto wind; - local = mem_get0 (this->local_pool); + local = mem_get0(this->local_pool); - MARKER_INIT_LOCAL (frame, local); + MARKER_INIT_LOCAL(frame, local); - ret = marker_inode_loc_fill (fd->inode, &local->loc); + ret = marker_inode_loc_fill(fd->inode, &local->loc); - if (ret == -1) - goto err; + if (ret == -1) + goto err; wind: - STACK_WIND (frame, marker_ftruncate_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); - return 0; + STACK_WIND(frame, marker_ftruncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); + return 0; err: - MARKER_STACK_UNWIND (ftruncate, frame, -1, ENOMEM, NULL, NULL, NULL); + MARKER_STACK_UNWIND(ftruncate, frame, -1, ENOMEM, NULL, NULL, NULL); - return 0; + return 0; } - int32_t -marker_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +marker_symlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - marker_conf_t *priv = NULL; - marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + quota_inode_ctx_t *ctx = NULL; - if (op_ret == -1) { - gf_log (this->name, GF_LOG_TRACE, "%s occurred while " - "creating symlinks ", strerror (op_errno)); - } + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, + "%s occurred while " + "creating symlinks ", + strerror(op_errno)); + } - local = (marker_local_t *) frame->local; + local = (marker_local_t *)frame->local; - frame->local = NULL; + frame->local = NULL; + priv = this->private; - STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf, - preparent, postparent, xdata); + if (op_ret >= 0 && inode && (priv->feature_enabled & GF_QUOTA)) { + ctx = mq_inode_ctx_new(inode, this); + if (ctx == NULL) { + gf_log(this->name, GF_LOG_WARNING, + "mq_inode_ctx_new " + "failed for %s", + uuid_utoa(inode->gfid)); + op_ret = -1; + op_errno = ENOMEM; + } + } - if (op_ret == -1 || local == NULL) - goto out; + STACK_UNWIND_STRICT(symlink, frame, op_ret, op_errno, inode, buf, preparent, + postparent, xdata); - if (gf_uuid_is_null (local->loc.gfid)) - gf_uuid_copy (local->loc.gfid, buf->ia_gfid); + if (op_ret == -1 || local == NULL) + goto out; - priv = this->private; + if (gf_uuid_is_null(local->loc.gfid)) + gf_uuid_copy(local->loc.gfid, buf->ia_gfid); - if (priv->feature_enabled & GF_QUOTA) { - mq_create_xattrs_txn (this, &local->loc, buf); - } + if (priv->feature_enabled & GF_QUOTA) { + mq_create_xattrs_txn(this, &local->loc, buf); + } - if (priv->feature_enabled & GF_XTIME) - marker_xtime_update_marks (this, local); + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); out: - marker_local_unref (local); + marker_local_unref(local); - return 0; + return 0; } int -marker_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath, - loc_t *loc, mode_t umask, dict_t *xdata) +marker_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath, + loc_t *loc, mode_t umask, dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - priv = this->private; + priv = this->private; - if (priv->feature_enabled == 0) - goto wind; + if (priv->feature_enabled == 0) + goto wind; - local = mem_get0 (this->local_pool); + local = mem_get0(this->local_pool); - MARKER_INIT_LOCAL (frame, local); + MARKER_INIT_LOCAL(frame, local); - ret = loc_copy (&local->loc, loc); + ret = loc_copy(&local->loc, loc); - if (ret == -1) - goto err; + if (ret == -1) + goto err; wind: - STACK_WIND (frame, marker_symlink_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->symlink, linkpath, loc, umask, - xdata); - return 0; + STACK_WIND(frame, marker_symlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->symlink, linkpath, loc, umask, xdata); + return 0; err: - MARKER_STACK_UNWIND (symlink, frame, -1, ENOMEM, NULL, - NULL, NULL, NULL, NULL); + MARKER_STACK_UNWIND(symlink, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, + NULL); - return 0; + return 0; } - int32_t -marker_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +marker_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + quota_inode_ctx_t *ctx = NULL; - if (op_ret == -1) { - gf_log (this->name, GF_LOG_TRACE, "%s occurred with " - "mknod ", strerror (op_errno)); - } + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, + "%s occurred with " + "mknod ", + strerror(op_errno)); + } - local = (marker_local_t *) frame->local; + local = (marker_local_t *)frame->local; - frame->local = NULL; + frame->local = NULL; + priv = this->private; - STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, inode, - buf, preparent, postparent, xdata); + if (op_ret >= 0 && inode && (priv->feature_enabled & GF_QUOTA)) { + ctx = mq_inode_ctx_new(inode, this); + if (ctx == NULL) { + gf_log(this->name, GF_LOG_WARNING, + "mq_inode_ctx_new " + "failed for %s", + uuid_utoa(inode->gfid)); + op_ret = -1; + op_errno = ENOMEM; + } + } - if (op_ret == -1 || local == NULL) - goto out; + STACK_UNWIND_STRICT(mknod, frame, op_ret, op_errno, inode, buf, preparent, + postparent, xdata); - if (gf_uuid_is_null (local->loc.gfid)) - gf_uuid_copy (local->loc.gfid, buf->ia_gfid); + if (op_ret == -1 || local == NULL) + goto out; - priv = this->private; + if (gf_uuid_is_null(local->loc.gfid)) + gf_uuid_copy(local->loc.gfid, buf->ia_gfid); - if ((priv->feature_enabled & GF_QUOTA) && (S_ISREG (local->mode))) { - mq_create_xattrs_txn (this, &local->loc, buf); - } + if ((priv->feature_enabled & GF_QUOTA) && (S_ISREG(local->mode))) { + mq_create_xattrs_txn(this, &local->loc, buf); + } - if (priv->feature_enabled & GF_XTIME) - marker_xtime_update_marks (this, local); + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); out: - marker_local_unref (local); + marker_local_unref(local); - return 0; + return 0; } int -marker_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, - dev_t rdev, mode_t umask, dict_t *xdata) +marker_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + dev_t rdev, mode_t umask, dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - priv = this->private; + priv = this->private; - if (priv->feature_enabled == 0) - goto wind; + if (priv->feature_enabled == 0) + goto wind; - local = mem_get0 (this->local_pool); + local = mem_get0(this->local_pool); - MARKER_INIT_LOCAL (frame, local); + MARKER_INIT_LOCAL(frame, local); - ret = loc_copy (&local->loc, loc); + ret = loc_copy(&local->loc, loc); - local->mode = mode; + local->mode = mode; - if (ret == -1) - goto err; + if (ret == -1) + goto err; wind: - STACK_WIND (frame, marker_mknod_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, - xdata); - return 0; + STACK_WIND(frame, marker_mknod_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata); + return 0; err: - MARKER_STACK_UNWIND (mknod, frame, -1, ENOMEM, NULL, - NULL, NULL, NULL, NULL); + MARKER_STACK_UNWIND(mknod, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, NULL); - return 0; + return 0; } - int32_t -marker_fallocate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) +marker_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - if (op_ret == -1) { - gf_log (this->name, GF_LOG_TRACE, "%s occurred while " - "fallocating a file ", strerror (op_errno)); - } + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, + "%s occurred while " + "fallocating a file ", + strerror(op_errno)); + } - local = (marker_local_t *) frame->local; + local = (marker_local_t *)frame->local; - frame->local = NULL; + frame->local = NULL; - STACK_UNWIND_STRICT (fallocate, frame, op_ret, op_errno, prebuf, - postbuf, xdata); + STACK_UNWIND_STRICT(fallocate, frame, op_ret, op_errno, prebuf, postbuf, + xdata); - if (op_ret == -1 || local == NULL) - goto out; + if (op_ret == -1 || local == NULL) + goto out; - priv = this->private; + priv = this->private; - if (priv->feature_enabled & GF_QUOTA) - mq_initiate_quota_txn (this, &local->loc, postbuf); + if (priv->feature_enabled & GF_QUOTA) + mq_initiate_quota_txn(this, &local->loc, postbuf); - if (priv->feature_enabled & GF_XTIME) - marker_xtime_update_marks (this, local); + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); out: - marker_local_unref (local); + marker_local_unref(local); - return 0; + return 0; } int32_t marker_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, - off_t offset, size_t len, dict_t *xdata) + off_t offset, size_t len, dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - priv = this->private; + priv = this->private; - if (priv->feature_enabled == 0) - goto wind; + if (priv->feature_enabled == 0) + goto wind; - local = mem_get0 (this->local_pool); + local = mem_get0(this->local_pool); - MARKER_INIT_LOCAL (frame, local); + MARKER_INIT_LOCAL(frame, local); - ret = marker_inode_loc_fill (fd->inode, &local->loc); + ret = marker_inode_loc_fill(fd->inode, &local->loc); - if (ret == -1) - goto err; + if (ret == -1) + goto err; wind: - STACK_WIND (frame, marker_fallocate_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len, - xdata); - return 0; + STACK_WIND(frame, marker_fallocate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len, + xdata); + return 0; err: - MARKER_STACK_UNWIND (fallocate, frame, -1, ENOMEM, NULL, NULL, NULL); + MARKER_STACK_UNWIND(fallocate, frame, -1, ENOMEM, NULL, NULL, NULL); - return 0; + return 0; } - int32_t marker_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata) { - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - if (op_ret == -1) { - gf_log (this->name, GF_LOG_TRACE, "%s occurred during discard", - strerror (op_errno)); - } + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, "%s occurred during discard", + strerror(op_errno)); + } - local = (marker_local_t *) frame->local; + local = (marker_local_t *)frame->local; - frame->local = NULL; + frame->local = NULL; - STACK_UNWIND_STRICT (discard, frame, op_ret, op_errno, prebuf, - postbuf, xdata); + STACK_UNWIND_STRICT(discard, frame, op_ret, op_errno, prebuf, postbuf, + xdata); - if (op_ret == -1 || local == NULL) - goto out; + if (op_ret == -1 || local == NULL) + goto out; - priv = this->private; + priv = this->private; - if (priv->feature_enabled & GF_QUOTA) - mq_initiate_quota_txn (this, &local->loc, postbuf); + if (priv->feature_enabled & GF_QUOTA) + mq_initiate_quota_txn(this, &local->loc, postbuf); - if (priv->feature_enabled & GF_XTIME) - marker_xtime_update_marks (this, local); + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); out: - marker_local_unref (local); + marker_local_unref(local); - return 0; + return 0; } int32_t marker_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, - size_t len, dict_t *xdata) + size_t len, dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - priv = this->private; + priv = this->private; - if (priv->feature_enabled == 0) - goto wind; + if (priv->feature_enabled == 0) + goto wind; - local = mem_get0 (this->local_pool); + local = mem_get0(this->local_pool); - MARKER_INIT_LOCAL (frame, local); + MARKER_INIT_LOCAL(frame, local); - ret = marker_inode_loc_fill (fd->inode, &local->loc); + ret = marker_inode_loc_fill(fd->inode, &local->loc); - if (ret == -1) - goto err; + if (ret == -1) + goto err; wind: - STACK_WIND (frame, marker_discard_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata); - return 0; + STACK_WIND(frame, marker_discard_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata); + return 0; err: - MARKER_STACK_UNWIND (discard, frame, -1, ENOMEM, NULL, NULL, NULL); + MARKER_STACK_UNWIND(discard, frame, -1, ENOMEM, NULL, NULL, NULL); - return 0; + return 0; } int32_t marker_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - if (op_ret == -1) { - gf_log (this->name, GF_LOG_TRACE, "%s occurred during zerofill", - strerror (op_errno)); - } + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, "%s occurred during zerofill", + strerror(op_errno)); + } - local = (marker_local_t *) frame->local; + local = (marker_local_t *)frame->local; - frame->local = NULL; + frame->local = NULL; - STACK_UNWIND_STRICT (zerofill, frame, op_ret, op_errno, prebuf, - postbuf, xdata); + STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, prebuf, postbuf, + xdata); - if (op_ret == -1 || local == NULL) - goto out; + if (op_ret == -1 || local == NULL) + goto out; - priv = this->private; + priv = this->private; - if (priv->feature_enabled & GF_QUOTA) - mq_initiate_quota_txn (this, &local->loc, postbuf); + if (priv->feature_enabled & GF_QUOTA) + mq_initiate_quota_txn(this, &local->loc, postbuf); - if (priv->feature_enabled & GF_XTIME) - marker_xtime_update_marks (this, local); + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); out: - marker_local_unref (local); + marker_local_unref(local); - return 0; + return 0; } int32_t marker_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, - off_t len, dict_t *xdata) + off_t len, dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - priv = this->private; + priv = this->private; - if (priv->feature_enabled == 0) - goto wind; + if (priv->feature_enabled == 0) + goto wind; - local = mem_get0 (this->local_pool); + local = mem_get0(this->local_pool); - MARKER_INIT_LOCAL (frame, local); + MARKER_INIT_LOCAL(frame, local); - ret = marker_inode_loc_fill (fd->inode, &local->loc); + ret = marker_inode_loc_fill(fd->inode, &local->loc); - if (ret == -1) - goto err; + if (ret == -1) + goto err; wind: - STACK_WIND (frame, marker_zerofill_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata); - return 0; + STACK_WIND(frame, marker_zerofill_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata); + return 0; err: - MARKER_STACK_UNWIND (zerofill, frame, -1, ENOMEM, NULL, NULL, NULL); + MARKER_STACK_UNWIND(zerofill, frame, -1, ENOMEM, NULL, NULL, NULL); - return 0; + return 0; } - /* when a call from the special client is received on * key trusted.glusterfs.volume-mark with value "RESET" * or if the value is 0length, update the change the @@ -2206,1182 +2323,1246 @@ err: * timestamp file. */ int32_t -call_from_sp_client_to_reset_tmfile (call_frame_t *frame, - xlator_t *this, - dict_t *dict) +call_from_sp_client_to_reset_tmfile(call_frame_t *frame, xlator_t *this, + dict_t *dict) { - int32_t fd = 0; - int32_t op_ret = 0; - int32_t op_errno = 0; - data_t *data = NULL; - marker_conf_t *priv = NULL; + int32_t fd = 0; + int32_t op_ret = 0; + int32_t op_errno = 0; + data_t *data = NULL; + marker_conf_t *priv = NULL; - if (frame == NULL || this == NULL || dict == NULL) - return -1; + if (frame == NULL || this == NULL || dict == NULL) + return -1; - priv = this->private; + priv = this->private; - data = dict_get (dict, "trusted.glusterfs.volume-mark"); - if (data == NULL) - return -1; + data = dict_get(dict, "trusted.glusterfs.volume-mark"); + if (data == NULL) + return -1; - if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { - op_ret = -1; - op_errno = EPERM; + if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { + op_ret = -1; + op_errno = EPERM; - goto out; + goto out; + } + + if (data->len == 0 || + (data->len == 5 && memcmp(data->data, "RESET", 5) == 0)) { + fd = open(priv->timestamp_file, O_WRONLY | O_TRUNC); + if (fd != -1) { + /* TODO check whether the O_TRUNC would update the + * timestamps on a zero length file on all machies. + */ + sys_close(fd); } - if (data->len == 0 || (data->len == 5 && - memcmp (data->data, "RESET", 5) == 0)) { - fd = open (priv->timestamp_file, O_WRONLY|O_TRUNC); - if (fd != -1) { - /* TODO check whether the O_TRUNC would update the - * timestamps on a zero length file on all machies. - */ - sys_close (fd); - } - - if (fd != -1 || errno == ENOENT) { - op_ret = 0; - op_errno = 0; - } else { - op_ret = -1; - op_errno = errno; - } + if (fd != -1 || errno == ENOENT) { + op_ret = 0; + op_errno = 0; } else { - op_ret = -1; - op_errno = EINVAL; + op_ret = -1; + op_errno = errno; } + } else { + op_ret = -1; + op_errno = EINVAL; + } out: - STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, NULL); + STACK_UNWIND_STRICT(setxattr, frame, op_ret, op_errno, NULL); - return 0; + return 0; } - int32_t -marker_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +marker_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - if (op_ret == -1) { - gf_log (this->name, GF_LOG_TRACE, "%s occurred in " - "setxattr ", strerror (op_errno)); - } + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, + "%s occurred in " + "setxattr ", + strerror(op_errno)); + } - local = (marker_local_t *) frame->local; + local = (marker_local_t *)frame->local; - frame->local = NULL; + frame->local = NULL; - STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata); + STACK_UNWIND_STRICT(setxattr, frame, op_ret, op_errno, xdata); - if (op_ret == -1 || local == NULL) - goto out; + if (op_ret == -1 || local == NULL) + goto out; - priv = this->private; + priv = this->private; - if (priv->feature_enabled & GF_XTIME) - marker_xtime_update_marks (this, local); + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); out: - marker_local_unref (local); + marker_local_unref(local); - return 0; + return 0; } int -remove_quota_keys (dict_t *dict, char *k, data_t *v, void *data) -{ - call_frame_t *frame = data; - marker_local_t *local = frame->local; - xlator_t *this = frame->this; - int ret = -1; - - ret = syncop_removexattr (FIRST_CHILD (this), &local->loc, k, 0, NULL); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "%s: Failed to remove " - "extended attribute: %s", local->loc.path, k); - return -1; - } - return 0; +remove_quota_keys(dict_t *dict, char *k, data_t *v, void *data) +{ + call_frame_t *frame = data; + marker_local_t *local = frame->local; + xlator_t *this = frame->this; + marker_conf_t *priv = NULL; + char ver_str[NAME_MAX] = { + 0, + }; + char *dot = NULL; + int ret = -1; + + priv = this->private; + + /* If quota is enabled immediately after disable. + * quota healing starts creating new xattrs + * before completing the cleanup operation. + * So we should check if the xattr is the new. + * Do not remove xattr if its xattr + * version is same as current version + */ + if ((priv->feature_enabled & GF_QUOTA) && priv->version > 0) { + snprintf(ver_str, sizeof(ver_str), ".%d", priv->version); + dot = strrchr(k, '.'); + if (dot && !strcmp(dot, ver_str)) + return 0; + } + + ret = syncop_removexattr(FIRST_CHILD(this), &local->loc, k, 0, NULL); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, + "%s: Failed to remove " + "extended attribute: %s", + local->loc.path, k); + return -1; + } + return 0; } int -quota_xattr_cleaner_cbk (int ret, call_frame_t *frame, void *args) +quota_xattr_cleaner_cbk(int ret, call_frame_t *frame, void *args) { - dict_t *xdata = args; - int op_ret = -1; - int op_errno = 0; + dict_t *xdata = args; + int op_ret = -1; + int op_errno = 0; - op_ret = (ret < 0)? -1: 0; - op_errno = -ret; + op_ret = (ret < 0) ? -1 : 0; + op_errno = -ret; - MARKER_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata); - return ret; + MARKER_STACK_UNWIND(setxattr, frame, op_ret, op_errno, xdata); + return ret; } int -quota_xattr_cleaner (void *args) +quota_xattr_cleaner(void *args) { - struct synctask *task = NULL; - call_frame_t *frame = NULL; - xlator_t *this = NULL; - marker_local_t *local = NULL; - dict_t *xdata = NULL; - int ret = -1; + struct synctask *task = NULL; + call_frame_t *frame = NULL; + xlator_t *this = NULL; + marker_local_t *local = NULL; + dict_t *xdata = NULL; + int ret = -1; - task = synctask_get (); - if (!task) - goto out; + task = synctask_get(); + if (!task) + goto out; - frame = task->frame; - this = frame->this; - local = frame->local; + frame = task->frame; + this = frame->this; + local = frame->local; - ret = syncop_listxattr (FIRST_CHILD(this), &local->loc, &xdata, NULL, - NULL); - if (ret == -1) { - ret = -errno; - goto out; - } + ret = syncop_listxattr(FIRST_CHILD(this), &local->loc, &xdata, NULL, NULL); + if (ret == -1) { + ret = -errno; + goto out; + } - ret = dict_foreach_fnmatch (xdata, "trusted.glusterfs.quota.*", - remove_quota_keys, frame); - if (ret == -1) { - ret = -errno; - goto out; - } - ret = dict_foreach_fnmatch (xdata, PGFID_XATTR_KEY_PREFIX"*", - remove_quota_keys, frame); - if (ret == -1) { - ret = -errno; - goto out; - } + ret = dict_foreach_fnmatch(xdata, "trusted.glusterfs.quota.*", + remove_quota_keys, frame); + if (ret == -1) { + ret = -errno; + goto out; + } + ret = dict_foreach_fnmatch(xdata, PGFID_XATTR_KEY_PREFIX "*", + remove_quota_keys, frame); + if (ret == -1) { + ret = -errno; + goto out; + } - ret = 0; + ret = 0; out: - if (xdata) - dict_unref (xdata); + if (xdata) + dict_unref(xdata); - return ret; + return ret; } int -marker_do_xattr_cleanup (call_frame_t *frame, xlator_t *this, dict_t *xdata, +marker_do_xattr_cleanup(call_frame_t *frame, xlator_t *this, dict_t *xdata, loc_t *loc) { - int ret = -1; - marker_local_t *local = NULL; + int ret = -1; + marker_local_t *local = NULL; - local = mem_get0 (this->local_pool); - if (!local) - goto out; + local = mem_get0(this->local_pool); + if (!local) + goto out; - MARKER_INIT_LOCAL (frame, local); + MARKER_INIT_LOCAL(frame, local); - loc_copy (&local->loc, loc); - ret = synctask_new (this->ctx->env, quota_xattr_cleaner, - quota_xattr_cleaner_cbk, frame, xdata); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Failed to create synctask " - "for cleaning up quota extended attributes"); - goto out; - } + loc_copy(&local->loc, loc); + ret = synctask_new(this->ctx->env, quota_xattr_cleaner, + quota_xattr_cleaner_cbk, frame, xdata); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, + "Failed to create synctask " + "for cleaning up quota extended attributes"); + goto out; + } - ret = 0; + ret = 0; out: - if (ret) - MARKER_STACK_UNWIND (setxattr, frame, -1, ENOMEM, xdata); + if (ret) + MARKER_STACK_UNWIND(setxattr, frame, -1, ENOMEM, xdata); - return ret; + return ret; } static gf_boolean_t -marker_xattr_cleanup_cmd (dict_t *dict) +marker_xattr_cleanup_cmd(dict_t *dict) { - return (dict_get (dict, VIRTUAL_QUOTA_XATTR_CLEANUP_KEY) != NULL); + return (dict_get(dict, VIRTUAL_QUOTA_XATTR_CLEANUP_KEY) != NULL); } int32_t -marker_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, - int32_t flags, dict_t *xdata) +marker_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + int32_t flags, dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; - int op_errno = ENOMEM; - - priv = this->private; - - if (marker_xattr_cleanup_cmd (dict)) { - if (frame->root->uid != 0 || frame->root->gid != 0) { - op_errno = EPERM; - ret = -1; - goto err; - } - - /* The following function does the cleanup and then unwinds the - * corresponding call*/ - loc_path (loc, NULL); - marker_do_xattr_cleanup (frame, this, xdata, loc); - return 0; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + int op_errno = ENOMEM; + + priv = this->private; + + if (marker_xattr_cleanup_cmd(dict)) { + if (frame->root->uid != 0 || frame->root->gid != 0) { + op_errno = EPERM; + ret = -1; + goto err; } - ret = marker_key_replace_with_ver (this, dict); - if (ret < 0) - goto err; + /* The following function does the cleanup and then unwinds the + * corresponding call*/ + loc_path(loc, NULL); + marker_do_xattr_cleanup(frame, this, xdata, loc); + return 0; + } - if (priv->feature_enabled == 0) - goto wind; + ret = marker_key_replace_with_ver(this, dict); + if (ret < 0) + goto err; - ret = call_from_sp_client_to_reset_tmfile (frame, this, dict); - if (ret == 0) - return 0; + if (priv->feature_enabled == 0) + goto wind; - local = mem_get0 (this->local_pool); + ret = call_from_sp_client_to_reset_tmfile(frame, this, dict); + if (ret == 0) + return 0; - MARKER_INIT_LOCAL (frame, local); + local = mem_get0(this->local_pool); - ret = loc_copy (&local->loc, loc); + MARKER_INIT_LOCAL(frame, local); - if (ret == -1) - goto err; + ret = loc_copy(&local->loc, loc); + + if (ret == -1) + goto err; wind: - STACK_WIND (frame, marker_setxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata); - return 0; + STACK_WIND(frame, marker_setxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata); + return 0; err: - MARKER_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL); + MARKER_STACK_UNWIND(setxattr, frame, -1, op_errno, NULL); - return 0; + return 0; } - int32_t -marker_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +marker_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - if (op_ret == -1) { - gf_log (this->name, GF_LOG_TRACE, "%s occurred in " - "fsetxattr", strerror (op_errno)); - } + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, + "%s occurred in " + "fsetxattr", + strerror(op_errno)); + } - local = (marker_local_t *) frame->local; + local = (marker_local_t *)frame->local; - frame->local = NULL; + frame->local = NULL; - STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata); + STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, xdata); - if (op_ret == -1 || local == NULL) - goto out; + if (op_ret == -1 || local == NULL) + goto out; - priv = this->private; + priv = this->private; - if (priv->feature_enabled & GF_XTIME) - marker_xtime_update_marks (this, local); + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); out: - marker_local_unref (local); + marker_local_unref(local); - return 0; + return 0; } int32_t -marker_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, - int32_t flags, dict_t *xdata) +marker_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, + int32_t flags, dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - priv = this->private; + priv = this->private; - if (priv->feature_enabled == 0) - goto wind; + if (priv->feature_enabled == 0) + goto wind; - ret = call_from_sp_client_to_reset_tmfile (frame, this, dict); - if (ret == 0) - return 0; + ret = call_from_sp_client_to_reset_tmfile(frame, this, dict); + if (ret == 0) + return 0; - local = mem_get0 (this->local_pool); + local = mem_get0(this->local_pool); - MARKER_INIT_LOCAL (frame, local); + MARKER_INIT_LOCAL(frame, local); - ret = marker_inode_loc_fill (fd->inode, &local->loc); + ret = marker_inode_loc_fill(fd->inode, &local->loc); - if (ret == -1) - goto err; + if (ret == -1) + goto err; wind: - STACK_WIND (frame, marker_fsetxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata); - return 0; + STACK_WIND(frame, marker_fsetxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata); + return 0; err: - MARKER_STACK_UNWIND (fsetxattr, frame, -1, ENOMEM, NULL); + MARKER_STACK_UNWIND(fsetxattr, frame, -1, ENOMEM, NULL); - return 0; + return 0; } - int32_t -marker_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *statpre, - struct iatt *statpost, dict_t *xdata) +marker_fsetattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *statpre, + struct iatt *statpost, dict_t *xdata) { - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - if (op_ret == -1) { - gf_log (this->name, GF_LOG_TRACE, "%s occurred in " - "fsetattr ", strerror (op_errno)); - } + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, + "%s occurred in " + "fsetattr ", + strerror(op_errno)); + } - local = (marker_local_t *) frame->local; + local = (marker_local_t *)frame->local; - frame->local = NULL; + frame->local = NULL; - STACK_UNWIND_STRICT (fsetattr, frame, op_ret, op_errno, statpre, - statpost, xdata); + STACK_UNWIND_STRICT(fsetattr, frame, op_ret, op_errno, statpre, statpost, + xdata); - if (op_ret == -1 || local == NULL) - goto out; + if (op_ret == -1 || local == NULL) + goto out; - priv = this->private; + priv = this->private; - if (priv->feature_enabled & GF_XTIME) - marker_xtime_update_marks (this, local); + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); out: - marker_local_unref (local); + marker_local_unref(local); - return 0; + return 0; } - int32_t -marker_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iatt *stbuf, int32_t valid, dict_t *xdata) +marker_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iatt *stbuf, int32_t valid, dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - priv = this->private; + priv = this->private; - if (priv->feature_enabled == 0) - goto wind; + if (priv->feature_enabled == 0) + goto wind; - local = mem_get0 (this->local_pool); + local = mem_get0(this->local_pool); - MARKER_INIT_LOCAL (frame, local); + MARKER_INIT_LOCAL(frame, local); - ret = marker_inode_loc_fill (fd->inode, &local->loc); + ret = marker_inode_loc_fill(fd->inode, &local->loc); - if (ret == -1) - goto err; + if (ret == -1) + goto err; wind: - STACK_WIND (frame, marker_fsetattr_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fsetattr, fd, stbuf, valid, xdata); - return 0; + STACK_WIND(frame, marker_fsetattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata); + return 0; err: - MARKER_STACK_UNWIND (fsetattr, frame, -1, ENOMEM, NULL, NULL, NULL); + MARKER_STACK_UNWIND(fsetattr, frame, -1, ENOMEM, NULL, NULL, NULL); - return 0; + return 0; } - int32_t -marker_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *statpre, - struct iatt *statpost, dict_t *xdata) +marker_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *statpre, + struct iatt *statpost, dict_t *xdata) { - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - local = (marker_local_t *) frame->local; + local = (marker_local_t *)frame->local; - frame->local = NULL; + frame->local = NULL; - if (op_ret == -1) { - gf_log (this->name, GF_LOG_TRACE, - "%s occurred during setattr of %s", - strerror (op_errno), - (local ? local->loc.path : "<nul>")); - } + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, "%s occurred during setattr of %s", + strerror(op_errno), (local ? local->loc.path : "<nul>")); + } - STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, statpre, - statpost, xdata); + STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, statpre, statpost, + xdata); - if (op_ret == -1 || local == NULL) - goto out; + if (op_ret == -1 || local == NULL) + goto out; - priv = this->private; + priv = this->private; - if (priv->feature_enabled & GF_XTIME) - marker_xtime_update_marks (this, local); + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); out: - marker_local_unref (local); + marker_local_unref(local); - return 0; + return 0; } int32_t -marker_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - struct iatt *stbuf, int32_t valid, dict_t *xdata) +marker_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid, dict_t *xdata) { - int32_t ret = 0; - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - priv = this->private; + priv = this->private; - if (priv->feature_enabled == 0) - goto wind; + if (priv->feature_enabled == 0) + goto wind; - local = mem_get0 (this->local_pool); + local = mem_get0(this->local_pool); - MARKER_INIT_LOCAL (frame, local); + MARKER_INIT_LOCAL(frame, local); - ret = loc_copy (&local->loc, loc); + ret = loc_copy(&local->loc, loc); - if (ret == -1) - goto err; + if (ret == -1) + goto err; wind: - STACK_WIND (frame, marker_setattr_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->setattr, loc, stbuf, valid, xdata); - return 0; + STACK_WIND(frame, marker_setattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata); + return 0; err: - MARKER_STACK_UNWIND (setattr, frame, -1, ENOMEM, NULL, NULL, NULL); + MARKER_STACK_UNWIND(setattr, frame, -1, ENOMEM, NULL, NULL, NULL); - return 0; + return 0; } - int32_t -marker_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +marker_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - if (op_ret == -1) { - gf_log (this->name, GF_LOG_TRACE, - "%s occurred while " - "removing extended attribute", - strerror (op_errno)); - } + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, + "%s occurred while " + "removing extended attribute", + strerror(op_errno)); + } - local = (marker_local_t *) frame->local; + local = (marker_local_t *)frame->local; - frame->local = NULL; + frame->local = NULL; - STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, xdata); + STACK_UNWIND_STRICT(removexattr, frame, op_ret, op_errno, xdata); - if (op_ret == -1 || local == NULL) - goto out; + if (op_ret == -1 || local == NULL) + goto out; - priv = this->private; + priv = this->private; - if (priv->feature_enabled & GF_XTIME) - marker_xtime_update_marks (this, local); + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks(this, local); out: - marker_local_unref (local); + marker_local_unref(local); - return 0; + return 0; } int32_t -marker_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *name, dict_t *xdata) -{ - int32_t ret = -1; - int32_t i = 0; - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; - char key[QUOTA_KEY_MAX] = {0, }; - - priv = this->private; - - if (name) { - for (i = 0; mq_ext_xattrs[i]; i++) { - if (strcmp (name, mq_ext_xattrs[i])) - continue; - - GET_QUOTA_KEY (this, key, mq_ext_xattrs[i], ret); - if (ret < 0) - goto err; - name = key; - break; - } +marker_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) +{ + int32_t ret = -1; + int32_t i = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + char key[QUOTA_KEY_MAX] = { + 0, + }; + + priv = this->private; + + if (name) { + for (i = 0; mq_ext_xattrs[i]; i++) { + if (strcmp(name, mq_ext_xattrs[i])) + continue; + + GET_QUOTA_KEY(this, key, mq_ext_xattrs[i], ret); + if (ret < 0) + goto err; + name = key; + break; } + } - if (priv->feature_enabled == 0) - goto wind; + if (priv->feature_enabled == 0) + goto wind; - local = mem_get0 (this->local_pool); + local = mem_get0(this->local_pool); - MARKER_INIT_LOCAL (frame, local); + MARKER_INIT_LOCAL(frame, local); - ret = loc_copy (&local->loc, loc); + ret = loc_copy(&local->loc, loc); - if (ret == -1) - goto err; + if (ret == -1) + goto err; wind: - STACK_WIND (frame, marker_removexattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->removexattr, loc, name, xdata); - return 0; + STACK_WIND(frame, marker_removexattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, loc, name, xdata); + return 0; err: - MARKER_STACK_UNWIND (removexattr, frame, -1, ENOMEM, NULL); + MARKER_STACK_UNWIND(removexattr, frame, -1, ENOMEM, NULL); - return 0; + return 0; } static gf_boolean_t -__has_quota_xattrs (dict_t *xattrs) +__has_quota_xattrs(dict_t *xattrs) { - if (dict_foreach_match (xattrs, _is_quota_internal_xattr, NULL, - dict_null_foreach_fn, NULL) > 0) - return _gf_true; + if (dict_foreach_match(xattrs, _is_quota_internal_xattr, NULL, + dict_null_foreach_fn, NULL) > 0) + return _gf_true; - return _gf_false; + return _gf_false; } int32_t -marker_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *dict, struct iatt *postparent) -{ - marker_conf_t *priv = NULL; - marker_local_t *local = NULL; - dict_t *xattrs = NULL; - int32_t ret = -1; - - priv = this->private; - local = (marker_local_t *) frame->local; - frame->local = NULL; - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_TRACE, "lookup failed with %s", - strerror (op_errno)); - goto unwind; - } - - ret = marker_key_set_ver (this, dict); - if (ret < 0) { - op_ret = -1; - op_errno = ENOMEM; - goto unwind; +marker_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *dict, struct iatt *postparent) +{ + marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + dict_t *xattrs = NULL; + quota_inode_ctx_t *ctx = NULL; + int32_t ret = -1; + + priv = this->private; + local = (marker_local_t *)frame->local; + frame->local = NULL; + + if (op_ret == -1) { + gf_log(this->name, GF_LOG_TRACE, "lookup failed with %s", + strerror(op_errno)); + goto unwind; + } + + ret = marker_key_set_ver(this, dict); + if (ret < 0) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + + if (dict && __has_quota_xattrs(dict)) { + xattrs = dict_copy_with_ref(dict, NULL); + if (!xattrs) { + op_ret = -1; + op_errno = ENOMEM; + } else { + marker_filter_internal_xattrs(this, xattrs); } + } else if (dict) { + xattrs = dict_ref(dict); + } - if (dict && __has_quota_xattrs (dict)) { - xattrs = dict_copy_with_ref (dict, NULL); - if (!xattrs) { - op_ret = -1; - op_errno = ENOMEM; - } else { - marker_filter_internal_xattrs (this, xattrs); - } - } else if (dict) { - xattrs = dict_ref (dict); + if (op_ret >= 0 && inode && (priv->feature_enabled & GF_QUOTA)) { + ctx = mq_inode_ctx_new(inode, this); + if (ctx == NULL) { + gf_log(this->name, GF_LOG_WARNING, + "mq_inode_ctx_new " + "failed for %s", + uuid_utoa(inode->gfid)); + op_ret = -1; + op_errno = ENOMEM; } + } unwind: - STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf, - xattrs, postparent); - - if (op_ret == -1 || local == NULL) - goto out; + STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, xattrs, + postparent); - /* copy the gfid from the stat structure instead of inode, - * since if the lookup is fresh lookup, then the inode - * would have not yet linked to the inode table which happens - * in protocol/server. - */ - if (gf_uuid_is_null (local->loc.gfid)) - gf_uuid_copy (local->loc.gfid, buf->ia_gfid); + if (op_ret == -1 || local == NULL) + goto out; + /* copy the gfid from the stat structure instead of inode, + * since if the lookup is fresh lookup, then the inode + * would have not yet linked to the inode table which happens + * in protocol/server. + */ + if (gf_uuid_is_null(local->loc.gfid)) + gf_uuid_copy(local->loc.gfid, buf->ia_gfid); - if (priv->feature_enabled & GF_QUOTA) { - mq_xattr_state (this, &local->loc, dict, *buf); - } + if (priv->feature_enabled & GF_QUOTA) { + mq_xattr_state(this, &local->loc, dict, buf); + } out: - marker_local_unref (local); - if (xattrs) - dict_unref (xattrs); + marker_local_unref(local); + if (xattrs) + dict_unref(xattrs); - return 0; + return 0; } int32_t -marker_lookup (call_frame_t *frame, xlator_t *this, - loc_t *loc, dict_t *xattr_req) +marker_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, + dict_t *xattr_req) { - int32_t ret = 0; - marker_local_t *local = NULL; - marker_conf_t *priv = NULL; + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; - priv = this->private; + priv = this->private; - xattr_req = xattr_req ? dict_ref (xattr_req) : dict_new (); - if (!xattr_req) - goto err; + xattr_req = xattr_req ? dict_ref(xattr_req) : dict_new(); + if (!xattr_req) + goto err; - ret = marker_key_replace_with_ver (this, xattr_req); - if (ret < 0) - goto err; + ret = marker_key_replace_with_ver(this, xattr_req); + if (ret < 0) + goto err; - if (priv->feature_enabled == 0) - goto wind; + if (priv->feature_enabled == 0) + goto wind; - local = mem_get0 (this->local_pool); - if (local == NULL) - goto err; + local = mem_get0(this->local_pool); + if (local == NULL) + goto err; - MARKER_INIT_LOCAL (frame, local); + MARKER_INIT_LOCAL(frame, local); - ret = loc_copy (&local->loc, loc); - if (ret == -1) - goto err; + ret = loc_copy(&local->loc, loc); + if (ret == -1) + goto err; - if ((priv->feature_enabled & GF_QUOTA)) - mq_req_xattr (this, loc, xattr_req, NULL, NULL); + if ((priv->feature_enabled & GF_QUOTA)) + mq_req_xattr(this, loc, xattr_req, NULL, NULL); wind: - STACK_WIND (frame, marker_lookup_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, loc, xattr_req); + STACK_WIND(frame, marker_lookup_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, loc, xattr_req); - dict_unref (xattr_req); + dict_unref(xattr_req); - return 0; + return 0; err: - MARKER_STACK_UNWIND (lookup, frame, -1, ENOMEM, NULL, NULL, NULL, NULL); + MARKER_STACK_UNWIND(lookup, frame, -1, ENOMEM, NULL, NULL, NULL, NULL); - if (xattr_req) - dict_unref (xattr_req); + if (xattr_req) + dict_unref(xattr_req); - return 0; + return 0; } - int -marker_build_ancestry_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, gf_dirent_t *entries, - dict_t *xdata) +marker_build_ancestry_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, gf_dirent_t *entries, + dict_t *xdata) { - gf_dirent_t *entry = NULL; - loc_t loc = {0, }; - inode_t *parent = NULL; - int ret = -1; + gf_dirent_t *entry = NULL; + quota_inode_ctx_t *ctx = NULL; + int ret = -1; - if ((op_ret <= 0) || (entries == NULL)) { - goto out; - } + if ((op_ret <= 0) || (entries == NULL)) { + goto out; + } - list_for_each_entry (entry, &entries->list, list) { - if (entry->inode == entry->inode->table->root) { - inode_unref (parent); - parent = NULL; - } - - if (parent) - _marker_inode_loc_fill (entry->inode, parent, - entry->d_name, &loc); - else - ret = marker_inode_loc_fill (entry->inode, &loc); - - if (ret) { - gf_log (this->name, GF_LOG_WARNING, "Couldn't build " - "loc for %s/%s", - parent? uuid_utoa (parent->gfid): NULL, - entry->d_name); - continue; - } - - inode_unref (parent); - parent = inode_ref (entry->inode); - loc_wipe (&loc); - - ret = marker_key_set_ver (this, entry->dict); - if (ret < 0) { - op_ret = -1; - op_errno = ENOMEM; - break; - } + list_for_each_entry(entry, &entries->list, list) + { + if (entry->inode == NULL) + continue; + + ret = marker_key_set_ver(this, entry->dict); + if (ret < 0) { + op_ret = -1; + op_errno = ENOMEM; + break; } - if (parent) - inode_unref (parent); + ctx = mq_inode_ctx_new(entry->inode, this); + if (ctx == NULL) + gf_log(this->name, GF_LOG_WARNING, + "mq_inode_ctx_new " + "failed for %s", + uuid_utoa(entry->inode->gfid)); + } out: - STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata); - return 0; + STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, entries, xdata); + return 0; } int -marker_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, gf_dirent_t *entries, - dict_t *xdata) -{ - gf_dirent_t *entry = NULL; - marker_conf_t *priv = NULL; - marker_local_t *local = NULL; - loc_t loc = {0, }; - int ret = -1; - char *resolvedpath = NULL; +marker_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, gf_dirent_t *entries, + dict_t *xdata) +{ + gf_dirent_t *entry = NULL; + marker_conf_t *priv = NULL; + marker_local_t *local = NULL; + loc_t loc = { + 0, + }; + int ret = -1; + char *resolvedpath = NULL; + quota_inode_ctx_t *ctx = NULL; + + if (op_ret <= 0) + goto unwind; + + priv = this->private; + local = frame->local; + + if (!(priv->feature_enabled & GF_QUOTA) || (local == NULL)) { + goto unwind; + } + + list_for_each_entry(entry, &entries->list, list) + { + if ((strcmp(entry->d_name, ".") == 0) || + (strcmp(entry->d_name, "..") == 0) || entry->inode == NULL) + continue; + + loc.parent = inode_ref(local->loc.inode); + loc.inode = inode_ref(entry->inode); + ret = inode_path(loc.parent, entry->d_name, &resolvedpath); + if (ret < 0) { + gf_log(this->name, GF_LOG_ERROR, + "failed to get the " + "path for the entry %s", + entry->d_name); + loc_wipe(&loc); + continue; + } - if (op_ret <= 0) - goto unwind; + loc.path = resolvedpath; + resolvedpath = NULL; - priv = this->private; - local = frame->local; + ctx = mq_inode_ctx_new(loc.inode, this); + if (ctx == NULL) + gf_log(this->name, GF_LOG_WARNING, + "mq_inode_ctx_new " + "failed for %s", + uuid_utoa(loc.inode->gfid)); - if (!(priv->feature_enabled & GF_QUOTA) || (local == NULL)) { - goto unwind; - } + mq_xattr_state(this, &loc, entry->dict, &entry->d_stat); + loc_wipe(&loc); - list_for_each_entry (entry, &entries->list, list) { - if ((strcmp (entry->d_name, ".") == 0) || - (strcmp (entry->d_name, "..") == 0) || - entry->inode == NULL) - continue; - - loc.parent = inode_ref (local->loc.inode); - loc.inode = inode_ref (entry->inode); - ret = inode_path (loc.parent, entry->d_name, &resolvedpath); - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, "failed to get the " - "path for the entry %s", entry->d_name); - loc_wipe (&loc); - continue; - } - - loc.path = gf_strdup (resolvedpath); - if (!loc.path) { - gf_log (this->name, GF_LOG_ERROR, "strdup of path " - "failed for the entry %s (path: %s)", - entry->d_name, resolvedpath); - loc_wipe (&loc); - continue; - } - - mq_xattr_state (this, &loc, entry->dict, entry->d_stat); - loc_wipe (&loc); - GF_FREE (resolvedpath); - resolvedpath = NULL; - - ret = marker_key_set_ver (this, entry->dict); - if (ret < 0) { - op_ret = -1; - op_errno = ENOMEM; - goto unwind; - } + ret = marker_key_set_ver(this, entry->dict); + if (ret < 0) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; } + } unwind: - MARKER_STACK_UNWIND (readdirp, frame, op_ret, op_errno, entries, xdata); + MARKER_STACK_UNWIND(readdirp, frame, op_ret, op_errno, entries, xdata); - return 0; + return 0; } int -marker_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t offset, dict_t *dict) -{ - marker_conf_t *priv = NULL; - loc_t loc = {0, }; - marker_local_t *local = NULL; - int ret = -1; - - priv = this->private; - - dict = dict ? dict_ref(dict) : dict_new(); - if (!dict) - goto unwind; - - ret = marker_key_replace_with_ver (this, dict); - if (ret < 0) - goto unwind; - - if (dict_get (dict, GET_ANCESTRY_DENTRY_KEY)) { - STACK_WIND (frame, marker_build_ancestry_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readdirp, - fd, size, offset, dict); - } else { - if (priv->feature_enabled & GF_QUOTA) { - local = mem_get0 (this->local_pool); - - MARKER_INIT_LOCAL (frame, local); +marker_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, dict_t *dict) +{ + marker_conf_t *priv = NULL; + loc_t loc = { + 0, + }; + marker_local_t *local = NULL; + int ret = -1; + + priv = this->private; + + dict = dict ? dict_ref(dict) : dict_new(); + if (!dict) + goto unwind; + + ret = marker_key_replace_with_ver(this, dict); + if (ret < 0) + goto unwind; + + if (dict_get(dict, GET_ANCESTRY_DENTRY_KEY)) { + STACK_WIND(frame, marker_build_ancestry_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdirp, fd, size, offset, dict); + } else { + if (priv->feature_enabled & GF_QUOTA) { + local = mem_get0(this->local_pool); - loc.parent = local->loc.inode = inode_ref (fd->inode); + MARKER_INIT_LOCAL(frame, local); - mq_req_xattr (this, &loc, dict, NULL, NULL); - } + loc.parent = local->loc.inode = inode_ref(fd->inode); - STACK_WIND (frame, marker_readdirp_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readdirp, - fd, size, offset, dict); + mq_req_xattr(this, &loc, dict, NULL, NULL); } - dict_unref (dict); - return 0; + STACK_WIND(frame, marker_readdirp_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdirp, fd, size, offset, dict); + } + + dict_unref(dict); + return 0; unwind: - MARKER_STACK_UNWIND (readdirp, frame, -1, ENOMEM, NULL, NULL); - return 0; + MARKER_STACK_UNWIND(readdirp, frame, -1, ENOMEM, NULL, NULL); + return 0; } int32_t -mem_acct_init (xlator_t *this) +mem_acct_init(xlator_t *this) { - int ret = -1; + int ret = -1; - if (!this) - return ret; + if (!this) + return ret; - ret = xlator_mem_acct_init (this, gf_marker_mt_end + 1); - - if (ret != 0) { - gf_log(this->name, GF_LOG_ERROR, "Memory accounting init" - " failed"); - return ret; - } + ret = xlator_mem_acct_init(this, gf_marker_mt_end + 1); + if (ret != 0) { + gf_log(this->name, GF_LOG_ERROR, + "Memory accounting init" + " failed"); return ret; -} + } + return ret; +} int32_t -init_xtime_priv (xlator_t *this, dict_t *options) +init_xtime_priv(xlator_t *this, dict_t *options) { - data_t *data = NULL; - int32_t ret = -1; - marker_conf_t *priv = NULL; + int32_t ret = -1; + marker_conf_t *priv = NULL; + char *tmp_opt = NULL; - GF_VALIDATE_OR_GOTO ("marker", this, out); - GF_VALIDATE_OR_GOTO (this->name, options, out); - GF_VALIDATE_OR_GOTO (this->name, this->private, out); + GF_VALIDATE_OR_GOTO("marker", this, out); + GF_VALIDATE_OR_GOTO(this->name, options, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); - priv = this->private; + priv = this->private; - if((data = dict_get (options, VOLUME_UUID)) != NULL) { - priv->volume_uuid = data->data; + ret = dict_get_str(options, "volume-uuid", &tmp_opt); - ret = gf_uuid_parse (priv->volume_uuid, priv->volume_uuid_bin); - if (ret == -1) { - gf_log (this->name, GF_LOG_ERROR, - "invalid volume uuid %s", priv->volume_uuid); - goto out; - } + if (ret) { + priv->volume_uuid = NULL; + tmp_opt = ""; - ret = gf_asprintf (& (priv->marker_xattr), "%s.%s.%s", - MARKER_XATTR_PREFIX, priv->volume_uuid, - XTIME); + gf_log(this->name, GF_LOG_ERROR, + "please specify the volume-uuid" + "in the translator options"); - if (ret == -1){ - priv->marker_xattr = NULL; - goto out; - } + return -1; + } + gf_asprintf(&priv->volume_uuid, "%s", tmp_opt); - gf_log (this->name, GF_LOG_DEBUG, - "volume-uuid = %s", priv->volume_uuid); - } else { - priv->volume_uuid = NULL; + ret = gf_uuid_parse(priv->volume_uuid, priv->volume_uuid_bin); - gf_log (this->name, GF_LOG_ERROR, - "please specify the volume-uuid" - "in the translator options"); + if (ret == -1) { + gf_log(this->name, GF_LOG_ERROR, "invalid volume uuid %s", + priv->volume_uuid); + goto out; + } - return -1; - } + ret = gf_asprintf(&(priv->marker_xattr), "%s.%s.%s", MARKER_XATTR_PREFIX, + priv->volume_uuid, XTIME); - if ((data = dict_get (options, TIMESTAMP_FILE)) != NULL) { - priv->timestamp_file = data->data; + if (ret == -1) { + priv->marker_xattr = NULL; + goto out; + } - gf_log (this->name, GF_LOG_DEBUG, - "the timestamp-file is = %s", - priv->timestamp_file); + gf_log(this->name, GF_LOG_DEBUG, "volume-uuid = %s", priv->volume_uuid); - } else { - priv->timestamp_file = NULL; + ret = dict_get_str(options, "timestamp-file", &tmp_opt); + if (ret) { + priv->timestamp_file = NULL; + tmp_opt = ""; - gf_log (this->name, GF_LOG_ERROR, - "please specify the timestamp-file" - "in the translator options"); + gf_log(this->name, GF_LOG_ERROR, + "please specify the timestamp-file" + "in the translator options"); - goto out; - } + goto out; + } - ret = 0; + ret = gf_asprintf(&priv->timestamp_file, "%s", tmp_opt); + if (ret == -1) { + priv->timestamp_file = NULL; + goto out; + } + + gf_log(this->name, GF_LOG_DEBUG, "the timestamp-file is = %s", + priv->timestamp_file); + + ret = 0; out: - return ret; + return ret; } void -marker_xtime_priv_cleanup (xlator_t *this) +marker_xtime_priv_cleanup(xlator_t *this) { - marker_conf_t *priv = NULL; + marker_conf_t *priv = NULL; - GF_VALIDATE_OR_GOTO ("marker", this, out); + GF_VALIDATE_OR_GOTO("marker", this, out); - priv = (marker_conf_t *) this->private; + priv = (marker_conf_t *)this->private; - GF_VALIDATE_OR_GOTO (this->name, priv, out); + GF_VALIDATE_OR_GOTO(this->name, priv, out); - GF_FREE (priv->volume_uuid); + GF_FREE(priv->volume_uuid); - GF_FREE (priv->timestamp_file); + GF_FREE(priv->timestamp_file); - GF_FREE (priv->marker_xattr); + GF_FREE(priv->marker_xattr); out: - return; + return; } void -marker_priv_cleanup (xlator_t *this) +marker_priv_cleanup(xlator_t *this) { - marker_conf_t *priv = NULL; + marker_conf_t *priv = NULL; + + GF_VALIDATE_OR_GOTO("marker", this, out); + + priv = (marker_conf_t *)this->private; - GF_VALIDATE_OR_GOTO ("marker", this, out); + GF_VALIDATE_OR_GOTO(this->name, priv, out); - priv = (marker_conf_t *) this->private; + marker_xtime_priv_cleanup(this); - GF_VALIDATE_OR_GOTO (this->name, priv, out); + LOCK_DESTROY(&priv->lock); - marker_xtime_priv_cleanup (this); + GF_FREE(priv); - LOCK_DESTROY (&priv->lock); + if (this->local_pool) { + mem_pool_destroy(this->local_pool); + this->local_pool = NULL; + } - GF_FREE (priv); out: - return; + return; } int32_t -reconfigure (xlator_t *this, dict_t *options) +reconfigure(xlator_t *this, dict_t *options) { - int32_t ret = 0; - data_t *data = NULL; - gf_boolean_t flag = _gf_false; - marker_conf_t *priv = NULL; - int32_t version = 0; + int32_t ret = 0; + data_t *data = NULL; + gf_boolean_t flag = _gf_false; + marker_conf_t *priv = NULL; + int32_t version = 0; - GF_ASSERT (this); - GF_ASSERT (this->private); + GF_ASSERT(this); + GF_ASSERT(this->private); - priv = this->private; + priv = this->private; - priv->feature_enabled = 0; + priv->feature_enabled = 0; - GF_VALIDATE_OR_GOTO (this->name, options, out); + GF_VALIDATE_OR_GOTO(this->name, options, out); - data = dict_get (options, "quota"); - if (data) { - ret = gf_string2boolean (data->data, &flag); - if (ret == 0 && flag == _gf_true) - priv->feature_enabled |= GF_QUOTA; - } + data = dict_get(options, "quota"); + if (data) { + ret = gf_string2boolean(data->data, &flag); + if (ret == 0 && flag == _gf_true) + priv->feature_enabled |= GF_QUOTA; + } - data = dict_get (options, "inode-quota"); - if (data) { - ret = gf_string2boolean (data->data, &flag); - if (ret == 0 && flag == _gf_true) - priv->feature_enabled |= GF_INODE_QUOTA; - } + data = dict_get(options, "inode-quota"); + if (data) { + ret = gf_string2boolean(data->data, &flag); + if (ret == 0 && flag == _gf_true) + priv->feature_enabled |= GF_INODE_QUOTA; + } - data = dict_get (options, "quota-version"); - if (data) - ret = gf_string2int32 (data->data, &version); + data = dict_get(options, "quota-version"); + if (data) + ret = gf_string2int32(data->data, &version); - if (priv->feature_enabled) { - if (version >= 0) - priv->version = version; - else - gf_log (this->name, GF_LOG_ERROR, "Invalid quota " - "version %d", priv->version); - } - - data = dict_get (options, "xtime"); - if (data) { - ret = gf_string2boolean (data->data, &flag); - if (ret == 0 && flag == _gf_true) { - marker_xtime_priv_cleanup (this); - - ret = init_xtime_priv (this, options); - if (ret < 0) { - gf_log (this->name, GF_LOG_WARNING, - "failed to initialize xtime private, " - "xtime updation will fail"); - } else { - priv->feature_enabled |= GF_XTIME; - data = dict_get (options, "gsync-force-xtime"); - if (!data) - goto out; - ret = gf_string2boolean (data->data, &flag); - if (ret == 0 && flag) - priv->feature_enabled |= GF_XTIME_GSYNC_FORCE; - } - } - } + if (priv->feature_enabled) { + if (version >= 0) + priv->version = version; + else + gf_log(this->name, GF_LOG_ERROR, + "Invalid quota " + "version %d", + priv->version); + } + + data = dict_get(options, "xtime"); + if (data) { + ret = gf_string2boolean(data->data, &flag); + if (ret == 0 && flag == _gf_true) { + marker_xtime_priv_cleanup(this); + + ret = init_xtime_priv(this, options); + if (ret < 0) { + gf_log(this->name, GF_LOG_WARNING, + "failed to initialize xtime private, " + "xtime updation will fail"); + } else { + priv->feature_enabled |= GF_XTIME; + data = dict_get(options, "gsync-force-xtime"); + if (!data) + goto out; + ret = gf_string2boolean(data->data, &flag); + if (ret == 0 && flag) + priv->feature_enabled |= GF_XTIME_GSYNC_FORCE; + } + } + } out: - return ret; + return ret; } - int32_t -init (xlator_t *this) -{ - dict_t *options = NULL; - data_t *data = NULL; - int32_t ret = 0; - gf_boolean_t flag = _gf_false; - marker_conf_t *priv = NULL; - - if (!this->children) { - gf_log (this->name, GF_LOG_ERROR, - "marker translator needs subvolume defined."); - return -1; - } +init(xlator_t *this) +{ + dict_t *options = NULL; + data_t *data = NULL; + int32_t ret = 0; + gf_boolean_t flag = _gf_false; + marker_conf_t *priv = NULL; - if (!this->parents) { - gf_log (this->name, GF_LOG_WARNING, - "Volume is dangling."); - return -1; - } + if (!this->children) { + gf_log(this->name, GF_LOG_ERROR, + "marker translator needs subvolume defined."); + return -1; + } - options = this->options; + if (!this->parents) { + gf_log(this->name, GF_LOG_WARNING, "Volume is dangling."); + return -1; + } - ALLOCATE_OR_GOTO (this->private, marker_conf_t, err); + options = this->options; - priv = this->private; + ALLOCATE_OR_GOTO(this->private, marker_conf_t, err); - priv->feature_enabled = 0; - priv->version = 0; + priv = this->private; - LOCK_INIT (&priv->lock); + priv->feature_enabled = 0; + priv->version = 0; - data = dict_get (options, "quota"); - if (data) { - ret = gf_string2boolean (data->data, &flag); - if (ret == 0 && flag == _gf_true) - priv->feature_enabled |= GF_QUOTA; - } + LOCK_INIT(&priv->lock); - data = dict_get (options, "inode-quota"); - if (data) { - ret = gf_string2boolean (data->data, &flag); - if (ret == 0 && flag == _gf_true) - priv->feature_enabled |= GF_INODE_QUOTA; - } + data = dict_get(options, "quota"); + if (data) { + ret = gf_string2boolean(data->data, &flag); + if (ret == 0 && flag == _gf_true) + priv->feature_enabled |= GF_QUOTA; + } - data = dict_get (options, "quota-version"); - if (data) - ret = gf_string2int32 (data->data, &priv->version); + data = dict_get(options, "inode-quota"); + if (data) { + ret = gf_string2boolean(data->data, &flag); + if (ret == 0 && flag == _gf_true) + priv->feature_enabled |= GF_INODE_QUOTA; + } - if (priv->feature_enabled && priv->version < 0) { - gf_log (this->name, GF_LOG_ERROR, "Invalid quota version %d", - priv->version); - goto err; - } + data = dict_get(options, "quota-version"); + if (data) + ret = gf_string2int32(data->data, &priv->version); - data = dict_get (options, "xtime"); - if (data) { - ret = gf_string2boolean (data->data, &flag); - if (ret == 0 && flag == _gf_true) { - ret = init_xtime_priv (this, options); - if (ret < 0) - goto err; - - priv->feature_enabled |= GF_XTIME; - data = dict_get (options, "gsync-force-xtime"); - if (!data) - goto cont; - ret = gf_string2boolean (data->data, &flag); - if (ret == 0 && flag) - priv->feature_enabled |= GF_XTIME_GSYNC_FORCE; - } - } + if ((ret == 0) && priv->feature_enabled && priv->version < 0) { + gf_log(this->name, GF_LOG_ERROR, "Invalid quota version %d", + priv->version); + goto err; + } - cont: - this->local_pool = mem_pool_new (marker_local_t, 128); - if (!this->local_pool) { - gf_log (this->name, GF_LOG_ERROR, - "failed to create local_t's memory pool"); + data = dict_get(options, "xtime"); + if (data) { + ret = gf_string2boolean(data->data, &flag); + if (ret == 0 && flag == _gf_true) { + ret = init_xtime_priv(this, options); + if (ret < 0) goto err; - } - return 0; + priv->feature_enabled |= GF_XTIME; + data = dict_get(options, "gsync-force-xtime"); + if (!data) + goto cont; + ret = gf_string2boolean(data->data, &flag); + if (ret == 0 && flag) + priv->feature_enabled |= GF_XTIME_GSYNC_FORCE; + } + } + +cont: + this->local_pool = mem_pool_new(marker_local_t, 128); + if (!this->local_pool) { + gf_log(this->name, GF_LOG_ERROR, + "failed to create local_t's memory pool"); + goto err; + } + + return 0; err: - marker_priv_cleanup (this); + marker_priv_cleanup(this); - return -1; + return -1; } int32_t -marker_forget (xlator_t *this, inode_t *inode) +marker_forget(xlator_t *this, inode_t *inode) { - marker_inode_ctx_t *ctx = NULL; - uint64_t value = 0; + marker_inode_ctx_t *ctx = NULL; + uint64_t value = 0; - if (inode_ctx_del (inode, this, &value) != 0) - goto out; + if (inode_ctx_del(inode, this, &value) != 0) + goto out; - ctx = (marker_inode_ctx_t *)(unsigned long)value; - if (ctx == NULL) { - goto out; - } + ctx = (marker_inode_ctx_t *)(unsigned long)value; + if (ctx == NULL) { + goto out; + } - mq_forget (this, ctx->quota_ctx); + mq_forget(this, ctx->quota_ctx); - GF_FREE (ctx); + GF_FREE(ctx); out: - return 0; + return 0; } void -fini (xlator_t *this) +fini(xlator_t *this) { - marker_priv_cleanup (this); + marker_priv_cleanup(this); } struct xlator_fops fops = { - .lookup = marker_lookup, - .create = marker_create, - .mkdir = marker_mkdir, - .writev = marker_writev, - .truncate = marker_truncate, - .ftruncate = marker_ftruncate, - .symlink = marker_symlink, - .link = marker_link, - .unlink = marker_unlink, - .rmdir = marker_rmdir, - .rename = marker_rename, - .mknod = marker_mknod, - .setxattr = marker_setxattr, - .fsetxattr = marker_fsetxattr, - .setattr = marker_setattr, - .fsetattr = marker_fsetattr, - .removexattr = marker_removexattr, - .getxattr = marker_getxattr, - .readdirp = marker_readdirp, - .fallocate = marker_fallocate, - .discard = marker_discard, - .zerofill = marker_zerofill, + .lookup = marker_lookup, + .create = marker_create, + .mkdir = marker_mkdir, + .writev = marker_writev, + .truncate = marker_truncate, + .ftruncate = marker_ftruncate, + .symlink = marker_symlink, + .link = marker_link, + .unlink = marker_unlink, + .rmdir = marker_rmdir, + .rename = marker_rename, + .mknod = marker_mknod, + .setxattr = marker_setxattr, + .fsetxattr = marker_fsetxattr, + .setattr = marker_setattr, + .fsetattr = marker_fsetattr, + .removexattr = marker_removexattr, + .getxattr = marker_getxattr, + .readdirp = marker_readdirp, + .fallocate = marker_fallocate, + .discard = marker_discard, + .zerofill = marker_zerofill, }; -struct xlator_cbks cbks = { - .forget = marker_forget -}; +struct xlator_cbks cbks = {.forget = marker_forget}; struct volume_options options[] = { - {.key = {"volume-uuid"}}, - {.key = {"timestamp-file"}}, - {.key = {"quota"}}, - {.key = {"inode-quota"} }, - {.key = {"xtime"}}, - {.key = {"gsync-force-xtime"}}, - {.key = {NULL}} + {.key = {"volume-uuid"}, .default_value = "{{ volume.id }}"}, + {.key = {"timestamp-file"}}, + { + .key = {"quota"}, + .op_version = {1}, + .flags = OPT_FLAG_NONE, + .tags = {}, + }, + { + .key = {"inode-quota"}, + .op_version = {1}, + .flags = OPT_FLAG_NONE, + .tags = {}, + }, + { + .key = {"xtime"}, + .op_version = {1}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_FORCE, + .tags = {}, + }, + { + .key = {"gsync-force-xtime"}, + .op_version = {2}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_FORCE, + .tags = {}, + }, + { + .key = {"quota-version"}, + .flags = OPT_FLAG_NONE, + }, + {.key = {NULL}}}; + +xlator_api_t xlator_api = { + .init = init, + .fini = fini, + .reconfigure = reconfigure, + .mem_acct_init = mem_acct_init, + .op_version = {1}, /* Present from the initial version */ + .fops = &fops, + .cbks = &cbks, + .options = options, + .identifier = "marker", + .category = GF_MAINTAINED, }; diff --git a/xlators/features/marker/src/marker.h b/xlators/features/marker/src/marker.h index 4726880b82f..4821094c14b 100644 --- a/xlators/features/marker/src/marker.h +++ b/xlators/features/marker/src/marker.h @@ -11,138 +11,137 @@ #define _MARKER_H #include "marker-quota.h" -#include "xlator.h" -#include "defaults.h" -#include "compat-uuid.h" -#include "call-stub.h" +#include <glusterfs/xlator.h> +#include <glusterfs/defaults.h> +#include <glusterfs/compat-uuid.h> +#include <glusterfs/call-stub.h> #define MARKER_XATTR_PREFIX "trusted.glusterfs" -#define XTIME "xtime" -#define VOLUME_MARK "volume-mark" -#define VOLUME_UUID "volume-uuid" -#define TIMESTAMP_FILE "timestamp-file" +#define XTIME "xtime" +#define VOLUME_MARK "volume-mark" +#define VOLUME_UUID "volume-uuid" +#define TIMESTAMP_FILE "timestamp-file" enum { - GF_QUOTA = 1, - GF_XTIME = 2, - GF_XTIME_GSYNC_FORCE = 4, - GF_INODE_QUOTA = 8, + GF_QUOTA = 1, + GF_XTIME = 2, + GF_XTIME_GSYNC_FORCE = 4, + GF_INODE_QUOTA = 8, }; /*initialize the local variable*/ -#define MARKER_INIT_LOCAL(_frame,_local) do { \ - _frame->local = _local; \ - _local->pid = _frame->root->pid; \ - memset (&_local->loc, 0, sizeof (loc_t)); \ - _local->ref = 1; \ - _local->uid = -1; \ - _local->gid = -1; \ - LOCK_INIT (&_local->lock); \ - _local->oplocal = NULL; \ - } while (0) +#define MARKER_INIT_LOCAL(_frame, _local) \ + do { \ + _frame->local = _local; \ + _local->pid = _frame->root->pid; \ + memset(&_local->loc, 0, sizeof(loc_t)); \ + _local->ref = 1; \ + _local->uid = -1; \ + _local->gid = -1; \ + LOCK_INIT(&_local->lock); \ + _local->oplocal = NULL; \ + } while (0) /* try alloc and if it fails, goto label */ -#define ALLOCATE_OR_GOTO(var, type, label) do { \ - var = GF_CALLOC (sizeof (type), 1, \ - gf_marker_mt_##type); \ - if (!var) { \ - gf_log (this->name, GF_LOG_ERROR, \ - "out of memory :("); \ - goto label; \ - } \ - } while (0) - -#define _MARKER_SET_UID_GID(dest, src) \ - do { \ - if (src->uid != -1 && \ - src->gid != -1) { \ - dest->uid = src->uid; \ - dest->gid = src->gid; \ - } \ - } while (0) - -#define MARKER_SET_UID_GID(frame, dest, src) \ - do { \ - _MARKER_SET_UID_GID (dest, src); \ - frame->root->uid = 0; \ - frame->root->gid = 0; \ - frame->cookie = (void *) _GF_UID_GID_CHANGED; \ - } while (0) - -#define MARKER_RESET_UID_GID(frame, dest, src) \ - do { \ - _MARKER_SET_UID_GID (dest, src); \ - frame->cookie = NULL; \ - } while (0) - -#define MARKER_STACK_UNWIND(fop, frame, params...) \ - do { \ - quota_local_t *_local = NULL; \ - if (frame) { \ - _local = frame->local; \ - frame->local = NULL; \ - } \ - STACK_UNWIND_STRICT (fop, frame, params); \ - if (_local) \ - marker_local_unref (_local); \ - } while (0) - -struct marker_local{ - uint32_t timebuf[2]; - pid_t pid; - loc_t loc; - loc_t parent_loc; - uid_t uid; - gid_t gid; - int32_t ref; - uint32_t ia_nlink; - struct iatt buf; - gf_lock_t lock; - mode_t mode; - int32_t err; - call_stub_t *stub; - call_frame_t *lk_frame; - quota_meta_t contribution; - struct marker_local *oplocal; - - /* marker quota specific */ - int64_t delta; - int64_t d_off; - int64_t sum; - int64_t size; - int32_t hl_count; - int32_t dentry_child_count; - - fd_t *fd; - call_frame_t *frame; - - quota_inode_ctx_t *ctx; - inode_contribution_t *contri; - - int xflag; - dict_t *xdata; - gf_boolean_t skip_txn; +#define ALLOCATE_OR_GOTO(var, type, label) \ + do { \ + var = GF_CALLOC(sizeof(type), 1, gf_marker_mt_##type); \ + if (!var) { \ + gf_log(this->name, GF_LOG_ERROR, "out of memory :("); \ + goto label; \ + } \ + } while (0) + +#define _MARKER_SET_UID_GID(dest, src) \ + do { \ + if (src->uid != -1 && src->gid != -1) { \ + dest->uid = src->uid; \ + dest->gid = src->gid; \ + } \ + } while (0) + +#define MARKER_SET_UID_GID(frame, dest, src) \ + do { \ + _MARKER_SET_UID_GID(dest, src); \ + frame->root->uid = 0; \ + frame->root->gid = 0; \ + frame->cookie = (void *)_GF_UID_GID_CHANGED; \ + } while (0) + +#define MARKER_RESET_UID_GID(frame, dest, src) \ + do { \ + _MARKER_SET_UID_GID(dest, src); \ + frame->cookie = NULL; \ + } while (0) + +#define MARKER_STACK_UNWIND(fop, frame, params...) \ + do { \ + quota_local_t *_local = NULL; \ + if (frame) { \ + _local = frame->local; \ + frame->local = NULL; \ + } \ + STACK_UNWIND_STRICT(fop, frame, params); \ + if (_local) \ + marker_local_unref(_local); \ + } while (0) + +struct marker_local { + uint32_t timebuf[2]; + pid_t pid; + loc_t loc; + loc_t parent_loc; + uid_t uid; + gid_t gid; + int32_t ref; + uint32_t ia_nlink; + struct iatt buf; + gf_lock_t lock; + mode_t mode; + int32_t err; + call_stub_t *stub; + call_frame_t *lk_frame; + quota_meta_t contribution; + struct marker_local *oplocal; + + /* marker quota specific */ + int64_t delta; + int64_t d_off; + int64_t sum; + int64_t size; + int32_t hl_count; + int32_t dentry_child_count; + + fd_t *fd; + call_frame_t *frame; + + quota_inode_ctx_t *ctx; + inode_contribution_t *contri; + + int xflag; + dict_t *xdata; + gf_boolean_t skip_txn; }; typedef struct marker_local marker_local_t; #define quota_local_t marker_local_t struct marker_inode_ctx { - struct quota_inode_ctx *quota_ctx; + struct quota_inode_ctx *quota_ctx; }; typedef struct marker_inode_ctx marker_inode_ctx_t; -struct marker_conf{ - char feature_enabled; - char *size_key; - char *dirty_key; - char *volume_uuid; - uuid_t volume_uuid_bin; - char *timestamp_file; - char *marker_xattr; - uint64_t quota_lk_owner; - gf_lock_t lock; - int32_t version; +struct marker_conf { + char feature_enabled; + char *size_key; + char *dirty_key; + char *volume_uuid; + uuid_t volume_uuid_bin; + char *timestamp_file; + char *marker_xattr; + uint64_t quota_lk_owner; + gf_lock_t lock; + int32_t version; }; typedef struct marker_conf marker_conf_t; diff --git a/xlators/features/path-convertor/Makefile.am b/xlators/features/metadisp/Makefile.am index d471a3f9243..a985f42a877 100644 --- a/xlators/features/path-convertor/Makefile.am +++ b/xlators/features/metadisp/Makefile.am @@ -1,3 +1,3 @@ SUBDIRS = src -CLEANFILES = +CLEANFILES = diff --git a/xlators/features/metadisp/src/Makefile.am b/xlators/features/metadisp/src/Makefile.am new file mode 100644 index 00000000000..1520ad8c424 --- /dev/null +++ b/xlators/features/metadisp/src/Makefile.am @@ -0,0 +1,38 @@ +noinst_PYTHON = gen-fops.py + +EXTRA_DIST = fops-tmpl.c + +xlator_LTLIBRARIES = metadisp.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features + +nodist_metadisp_la_SOURCES = fops.c + +BUILT_SOURCES = fops.c + +metadisp_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) + +metadisp_la_SOURCES = metadisp.c \ + metadisp-unlink.c \ + metadisp-stat.c \ + metadisp-lookup.c \ + metadisp-readdir.c \ + metadisp-create.c \ + metadisp-open.c \ + metadisp-fsync.c \ + metadisp-setattr.c \ + backend.c + +metadisp_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +noinst_HEADERS = metadisp.h metadisp-fops.h + +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ + -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src + +AM_CFLAGS = -Wall $(GF_CFLAGS) + +fops.c: fops-tmpl.c $(top_srcdir)/libglusterfs/src/generator.py gen-fops.py + PYTHONPATH=$(top_srcdir)/libglusterfs/src \ + $(PYTHON) $(srcdir)/gen-fops.py $(srcdir)/fops-tmpl.c > $@ + +CLEANFILES = $(nodist_metadisp_la_SOURCES) diff --git a/xlators/features/metadisp/src/backend.c b/xlators/features/metadisp/src/backend.c new file mode 100644 index 00000000000..ee2c25bfaa7 --- /dev/null +++ b/xlators/features/metadisp/src/backend.c @@ -0,0 +1,45 @@ +#define GFID_STR_LEN 37 + +#include "metadisp.h" + +/* + * backend.c + * + * functions responsible for converting user-facing paths to backend-style + * "/$GFID" paths. + */ + +int32_t +build_backend_loc(uuid_t gfid, loc_t *src_loc, loc_t *dst_loc) +{ + static uuid_t root = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; + char gfid_buf[GFID_STR_LEN + 1] = { + 0, + }; + char *path = NULL; + + GF_VALIDATE_OR_GOTO("metadisp", src_loc, out); + GF_VALIDATE_OR_GOTO("metadisp", dst_loc, out); + + loc_copy(dst_loc, src_loc); + memcpy(dst_loc->pargfid, root, sizeof(root)); + GF_FREE((char *)dst_loc->path); // we are overwriting path so nuke + // whatever loc_copy gave us + + uuid_utoa_r(gfid, gfid_buf); + + path = GF_CALLOC(GFID_STR_LEN + 1, sizeof(char), + gf_common_mt_char); // freed via loc_wipe + + path[0] = '/'; + strncpy(path + 1, gfid_buf, GFID_STR_LEN); + path[GFID_STR_LEN] = 0; + dst_loc->path = path; + if (src_loc->name) + dst_loc->name = strrchr(dst_loc->path, '/'); + if (dst_loc->name) + dst_loc->name++; + return 0; +out: + return -1; +} diff --git a/xlators/features/metadisp/src/fops-tmpl.c b/xlators/features/metadisp/src/fops-tmpl.c new file mode 100644 index 00000000000..4385b7dd5b7 --- /dev/null +++ b/xlators/features/metadisp/src/fops-tmpl.c @@ -0,0 +1,10 @@ +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include <glusterfs/xlator.h> +#include "metadisp.h" +#include "metadisp-fops.h" + +#pragma generate diff --git a/xlators/features/metadisp/src/gen-fops.py b/xlators/features/metadisp/src/gen-fops.py new file mode 100644 index 00000000000..8b5e120fdec --- /dev/null +++ b/xlators/features/metadisp/src/gen-fops.py @@ -0,0 +1,160 @@ +#!/usr/bin/python + +import sys +from generator import fop_subs, generate + +FN_METADATA_CHILD_GENERIC = """ +int32_t +metadisp_@NAME@ (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@) +{ + METADISP_TRACE("@NAME@ metadata"); + STACK_WIND (frame, default_@NAME@_cbk, + METADATA_CHILD(this), METADATA_CHILD(this)->fops->@NAME@, + @SHORT_ARGS@); + return 0; +} +""" + +FN_GENERIC_TEMPLATE = """ +int32_t +metadisp_@NAME@ (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@) +{ + METADISP_TRACE("@NAME@ generic"); + STACK_WIND (frame, default_@NAME@_cbk, + DATA_CHILD(this), DATA_CHILD(this)->fops->@NAME@, + @SHORT_ARGS@); + return 0; +} +""" + +FN_DATAFD_TEMPLATE = """ +int32_t +metadisp_@NAME@ (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@) +{ + METADISP_TRACE("@NAME@ datafd"); + xlator_t *child = NULL; + child = DATA_CHILD(this); + STACK_WIND (frame, default_@NAME@_cbk, + child, child->fops->@NAME@, + @SHORT_ARGS@); + return 0; +} +""" + +FN_DATALOC_TEMPLATE = """ +int32_t +metadisp_@NAME@ (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@) +{ + METADISP_TRACE("@NAME@ dataloc"); + loc_t backend_loc = { + 0, + }; + if (build_backend_loc(loc->gfid, loc, &backend_loc)) { + goto unwind; + } + xlator_t *child = NULL; + child = DATA_CHILD(this); + STACK_WIND (frame, default_@NAME@_cbk, + child, child->fops->@NAME@, + @SHORT_ARGS@); + return 0; + +unwind: + STACK_UNWIND_STRICT(lookup, frame, -1, EINVAL, NULL, NULL, NULL, NULL); + return 0; +} +""" + +FOPS_LINE_TEMPLATE = "\t.@NAME@ = metadisp_@NAME@," + +skipped = [ + "readdir", + "readdirp", + "lookup", + "fsync", + "stat", + "open", + "create", + "unlink", + "setattr", + # TODO: implement "inodelk", +] + + +def gen_fops(): + done = skipped + + # + # these are fops that wind to the DATA_CHILD + # + # NOTE: re-written in order from google doc: + # https://docs.google.com/document/d/1KEwVtSNvDhs4qb63gWx2ulCp5GJjge77NGJk4p_Ms4Q + for name in [ + "writev", + "readv", + "ftruncate", + "zerofill", + "discard", + "seek", + "fstat", + ]: + done = done + [name] + print(generate(FN_DATAFD_TEMPLATE, name, fop_subs)) + + for name in ["truncate"]: + done = done + [name] + print(generate(FN_DATALOC_TEMPLATE, name, fop_subs)) + + # these are fops that operate solely on dentries, folders, + # or extended attributes. Therefore, they must always + # wind to METADATA_CHILD and should never perform + # any path rewriting + # + # NOTE: re-written in order from google doc: + # https://docs.google.com/document/d/1KEwVtSNvDhs4qb63gWx2ulCp5GJjge77NGJk4p_Ms4Q + for name in [ + "mkdir", + "symlink", + "link", + "rename", + "mknod", + "opendir", + # "readdir, # special-cased + # "readdirp, # special-cased + "fsyncdir", + # "setattr", # special-cased + "readlink", + "fentrylk", + "access", + # TODO: these wind to both, + # data for backend-attributes and metadata for the rest + "xattrop", + "setxattr", + "getxattr", + "removexattr", + "fgetxattr", + "fsetxattr", + "fremovexattr", + ]: + + done = done + [name] + print(generate(FN_METADATA_CHILD_GENERIC, name, fop_subs)) + + print("struct xlator_fops fops = {") + for name in done: + print(generate(FOPS_LINE_TEMPLATE, name, fop_subs)) + + print("};") + + +for l in open(sys.argv[1], "r").readlines(): + if l.find("#pragma generate") != -1: + print("/* BEGIN GENERATED CODE - DO NOT MODIFY */") + gen_fops() + print("/* END GENERATED CODE */") + else: + print(l[:-1]) diff --git a/xlators/features/metadisp/src/metadisp-create.c b/xlators/features/metadisp/src/metadisp-create.c new file mode 100644 index 00000000000..f8c9798dd59 --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-create.c @@ -0,0 +1,101 @@ +#include "metadisp.h" +#include <glusterfs/call-stub.h> + +/** + * Create, like stat, is a two-step process. We send a create + * to the METADATA_CHILD, then send another create to the DATA_CHILD. + * + * We do the metadata child first to ensure that the ACLs are enforced. + */ + +int32_t +metadisp_create_dentry_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, + inode_t *inode, struct iatt *buf, + struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) +{ + STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, buf, + preparent, postparent, xdata); + return 0; +} + +int32_t +metadisp_create_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, + int32_t flags, mode_t mode, mode_t umask, fd_t *fd, + dict_t *xdata) +{ + // create the backend data inode + STACK_WIND(frame, metadisp_create_dentry_cbk, DATA_CHILD(this), + DATA_CHILD(this)->fops->create, loc, flags, mode, umask, fd, + xdata); + return 0; +} + +int32_t +metadisp_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + METADISP_TRACE("%d %d", op_ret, op_errno); + call_stub_t *stub = cookie; + if (op_ret != 0) { + STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, buf, + preparent, postparent, xdata); + return 0; + } + + if (stub == NULL) { + goto unwind; + } + + if (stub->poison) { + call_stub_destroy(stub); + return 0; + } + + call_resume(stub); + return 0; + +unwind: + STACK_UNWIND_STRICT(create, frame, -1, EINVAL, NULL, NULL, NULL, NULL, NULL, + NULL); + return 0; +} + +int32_t +metadisp_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) +{ + METADISP_TRACE("."); + + loc_t backend_loc = { + 0, + }; + call_stub_t *stub = NULL; + uuid_t *gfid_req = NULL; + + RESOLVE_GFID_REQ(xdata, gfid_req, out); + + if (build_backend_loc(*gfid_req, loc, &backend_loc)) { + goto unwind; + } + + frame->local = loc; + + stub = fop_create_stub(frame, metadisp_create_resume, &backend_loc, flags, + mode, umask, fd, xdata); + + STACK_WIND_COOKIE(frame, metadisp_create_cbk, stub, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->create, loc, flags, mode, + umask, fd, xdata); + return 0; + +unwind: + STACK_UNWIND_STRICT(create, frame, -1, EINVAL, NULL, NULL, NULL, NULL, NULL, + NULL); + return 0; +out: + return -1; +} diff --git a/xlators/features/metadisp/src/metadisp-fops.h b/xlators/features/metadisp/src/metadisp-fops.h new file mode 100644 index 00000000000..56dd427cf34 --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-fops.h @@ -0,0 +1,51 @@ +#ifndef GF_METADISP_FOPS_H_ +#define GF_METADISP_FOPS_H_ + +#include <glusterfs/xlator.h> +#include <glusterfs/dict.h> +#include <glusterfs/glusterfs.h> + +#include <sys/types.h> + +/* fops in here are defined in their own file. Every other fop is just defined + * inline of fops.c */ + +int +metadisp_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *xdata); + +int +metadisp_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *dict); + +int +metadisp_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata); + +int +metadisp_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata); + +int +metadisp_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + fd_t *fd, dict_t *xdata); + +int +metadisp_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata); + +int +metadisp_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, + loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata); + +int +metadisp_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, + dict_t *xdata); + +int +metadisp_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, + dict_t *xdata); + +int +metadisp_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid, dict_t *xdata); + +#endif diff --git a/xlators/features/metadisp/src/metadisp-fsync.c b/xlators/features/metadisp/src/metadisp-fsync.c new file mode 100644 index 00000000000..2e46fa84eac --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-fsync.c @@ -0,0 +1,54 @@ + +#include "metadisp.h" +#include <glusterfs/call-stub.h> + +int32_t +metadisp_fsync_resume(call_frame_t *frame, xlator_t *this, fd_t *fd, + int32_t flags, dict_t *xdata) +{ + STACK_WIND(frame, default_fsync_cbk, DATA_CHILD(this), + DATA_CHILD(this)->fops->fsync, fd, flags, xdata); + return 0; +} + +int32_t +metadisp_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + call_stub_t *stub = NULL; + if (cookie) { + stub = cookie; + } + + if (op_ret != 0) { + goto unwind; + } + + if (stub->poison) { + call_stub_destroy(stub); + stub = NULL; + return 0; + } + + call_resume(stub); + return 0; + +unwind: + if (stub) { + call_stub_destroy(stub); + } + STACK_UNWIND_STRICT(fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata); + return 0; +} + +int32_t +metadisp_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, + dict_t *xdata) +{ + call_stub_t *stub = NULL; + stub = fop_fsync_stub(frame, metadisp_fsync_resume, fd, flags, xdata); + STACK_WIND_COOKIE(frame, metadisp_fsync_cbk, stub, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->fsync, fd, flags, xdata); + return 0; +} diff --git a/xlators/features/metadisp/src/metadisp-lookup.c b/xlators/features/metadisp/src/metadisp-lookup.c new file mode 100644 index 00000000000..27d90c9f746 --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-lookup.c @@ -0,0 +1,90 @@ +#include "metadisp.h" +#include <glusterfs/call-stub.h> + +/** + * Lookup, like stat, is a two-step process for grabbing the metadata details + * as well as the data details. + */ + +int32_t +metadisp_backend_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, + struct iatt *postparent) +{ + METADISP_TRACE("backend_lookup_cbk"); + if (op_errno == ENOENT) { + op_errno = ENODATA; + op_ret = -1; + } + STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, xdata, + postparent); + return 0; +} + +int32_t +metadisp_backend_lookup_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, + dict_t *xdata) +{ + METADISP_TRACE("backend_lookup_resume"); + loc_t backend_loc = { + 0, + }; + if (build_backend_loc(loc->gfid, loc, &backend_loc)) { + goto unwind; + } + + STACK_WIND(frame, metadisp_backend_lookup_cbk, DATA_CHILD(this), + DATA_CHILD(this)->fops->lookup, &backend_loc, xdata); + return 0; + +unwind: + STACK_UNWIND_STRICT(lookup, frame, -1, EINVAL, NULL, NULL, NULL, NULL); + return 0; +} + +int32_t +metadisp_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, struct iatt *postparent) +{ + METADISP_TRACE("%d %d", op_ret, op_errno); + call_stub_t *stub = NULL; + stub = cookie; + + if (op_ret != 0) { + goto unwind; + } + + if (!IA_ISREG(buf->ia_type)) { + goto unwind; + } else if (!stub) { + op_errno = EINVAL; + goto unwind; + } + + METADISP_TRACE("resuming stub"); + + // memcpy(stub->args.loc.gfid, buf->ia_gfid, sizeof(uuid_t)); + call_resume(stub); + return 0; +unwind: + METADISP_TRACE("unwinding %d %d", op_ret, op_errno); + STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, xdata, + postparent); + if (stub) { + call_stub_destroy(stub); + } + return 0; +} + +int32_t +metadisp_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +{ + METADISP_TRACE("lookup"); + call_stub_t *stub = NULL; + stub = fop_lookup_stub(frame, metadisp_backend_lookup_resume, loc, xdata); + STACK_WIND_COOKIE(frame, metadisp_lookup_cbk, stub, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->lookup, loc, xdata); + return 0; +} diff --git a/xlators/features/metadisp/src/metadisp-open.c b/xlators/features/metadisp/src/metadisp-open.c new file mode 100644 index 00000000000..64814afe636 --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-open.c @@ -0,0 +1,70 @@ +#include <glusterfs/call-stub.h> +#include "metadisp.h" + +int32_t +metadisp_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) +{ + METADISP_TRACE("got open results %d %d", op_ret, op_errno); + + call_stub_t *stub = NULL; + if (cookie) { + stub = cookie; + } + + if (op_ret != 0) { + goto unwind; + } + + if (!stub) { + goto unwind; + } + + if (stub->poison) { + call_stub_destroy(stub); + stub = NULL; + return 0; + } + + call_resume(stub); + return 0; + +unwind: + if (stub) { + call_stub_destroy(stub); + } + STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, xdata); + return 0; +} + +int32_t +metadisp_open_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, + int32_t flags, fd_t *fd, dict_t *xdata) +{ + STACK_WIND_COOKIE(frame, metadisp_open_cbk, NULL, DATA_CHILD(this), + DATA_CHILD(this)->fops->open, loc, flags, fd, xdata); + return 0; +} + +int32_t +metadisp_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + fd_t *fd, dict_t *xdata) +{ + call_stub_t *stub = NULL; + loc_t backend_loc = { + 0, + }; + + if (build_backend_loc(loc->gfid, loc, &backend_loc)) { + goto unwind; + } + + stub = fop_open_stub(frame, metadisp_open_resume, &backend_loc, flags, fd, + xdata); + STACK_WIND_COOKIE(frame, metadisp_open_cbk, stub, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->open, loc, flags, fd, xdata); + return 0; +unwind: + STACK_UNWIND_STRICT(open, frame, -1, EINVAL, NULL, NULL); + return 0; +} diff --git a/xlators/features/metadisp/src/metadisp-readdir.c b/xlators/features/metadisp/src/metadisp-readdir.c new file mode 100644 index 00000000000..5f840b1e88f --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-readdir.c @@ -0,0 +1,65 @@ +#include "metadisp.h" + +/** + * With a change to the posix xlator, readdir and readdirp are shockingly + * simple. + * + * The issue with separating the backend data of the files + * with the metadata is that readdirs must now read from multiple sources + * to coalesce the directory entries. + * + * The way we do this is to tell the METADATA_CHILD that when it's + * running readdirp, each file entry should have a stat wound to + * 'stat-source-of-truth'. + * + * see metadisp_stat for how it handles winds _from_posix. + */ + +int32_t +metadisp_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *xdata) +{ + METADISP_TRACE("."); + /* + * Always use readdirp, even if the original was readdir. Why? Because NFS. + * There are multiple translations between Gluster, UNIX, and NFS stat + * structures in that path. One of them uses the type etc. from the stat + * structure, which is only filled in by readdirp. If we use readdir, the + * entries do actually go all the way back to the client and are visible in + * getdents, but then the readdir throws them away because of the + * uninitialized type. + */ + GF_UNUSED int32_t ret; + if (!xdata) { + xdata = dict_new(); + } + + // ret = dict_set_int32 (xdata, "list-xattr", 1); + + // I'm my own source of truth! + ret = dict_set_static_ptr(xdata, "stat-source-of-truth", (void *)this); + + STACK_WIND(frame, default_readdirp_cbk, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->readdirp, fd, size, off, xdata); + + return 0; +} + +int32_t +metadisp_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *xdata) +{ + METADISP_TRACE("."); + if (!xdata) { + xdata = dict_new(); + } + GF_UNUSED int32_t ret; + // ret = dict_set_int32 (xdata, "list-xattr", 1); + + // I'm my own source of truth! + ret = dict_set_static_ptr(xdata, "stat-source-of-truth", (void *)this); + + STACK_WIND(frame, default_readdirp_cbk, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->readdirp, fd, size, off, xdata); + return 0; +} diff --git a/xlators/features/metadisp/src/metadisp-setattr.c b/xlators/features/metadisp/src/metadisp-setattr.c new file mode 100644 index 00000000000..6991cf644f3 --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-setattr.c @@ -0,0 +1,90 @@ +#include "metadisp.h" +#include <glusterfs/call-stub.h> + +int32_t +metadisp_backend_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *statpre, struct iatt *statpost, + dict_t *xdata) + +{ + METADISP_TRACE("backend_setattr_cbk"); + if (op_errno == ENOENT) { + op_errno = ENODATA; + op_ret = -1; + } + STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, statpre, statpost, + xdata); + return 0; +} + +int32_t +metadisp_backend_setattr_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid, + dict_t *xdata) + +{ + METADISP_TRACE("backend_setattr_resume"); + loc_t backend_loc = { + 0, + }; + if (build_backend_loc(loc->gfid, loc, &backend_loc)) { + goto unwind; + } + + STACK_WIND(frame, metadisp_backend_setattr_cbk, DATA_CHILD(this), + DATA_CHILD(this)->fops->setattr, &backend_loc, stbuf, valid, + xdata); + return 0; + +unwind: + STACK_UNWIND_STRICT(setattr, frame, -1, EINVAL, NULL, NULL, NULL); + return 0; +} + +int32_t +metadisp_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *statpre, + struct iatt *statpost, dict_t *xdata) +{ + METADISP_TRACE("%d %d", op_ret, op_errno); + call_stub_t *stub = NULL; + stub = cookie; + + if (op_ret != 0) { + goto unwind; + } + + if (!IA_ISREG(statpost->ia_type)) { + goto unwind; + } else if (!stub) { + op_errno = EINVAL; + goto unwind; + } + + METADISP_TRACE("resuming stub"); + call_resume(stub); + return 0; +unwind: + METADISP_TRACE("unwinding %d %d", op_ret, op_errno); + STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, statpre, statpost, + xdata); + if (stub) { + call_stub_destroy(stub); + } + return 0; +} + +int32_t +metadisp_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid, dict_t *xdata) +{ + METADISP_TRACE("setattr"); + call_stub_t *stub = NULL; + stub = fop_setattr_stub(frame, metadisp_backend_setattr_resume, loc, stbuf, + valid, xdata); + STACK_WIND_COOKIE(frame, metadisp_setattr_cbk, stub, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->setattr, loc, stbuf, valid, + xdata); + return 0; +} diff --git a/xlators/features/metadisp/src/metadisp-stat.c b/xlators/features/metadisp/src/metadisp-stat.c new file mode 100644 index 00000000000..b06d0dbcddd --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-stat.c @@ -0,0 +1,124 @@ +#include "metadisp.h" +#include <glusterfs/call-stub.h> + +/** + * The stat flow in METADISP is complicated because we must + * do ensure a few things: + * 1. stat, on the path within the metadata layer, + * MUST get the backend FD of the data layer. + * --- we wind to the metadata layer, then the data layer. + * + * 2. the metadata layer MUST be able to ask the data + * layer for stat information. + * --- this is 'syncop-internal-from-posix' + * + * 3. when the metadata exists BUT the data is missing, + * we MUST mark the backend file as bad and heal it. + */ + +int32_t +metadisp_stat_backend_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + dict_t *xdata) +{ + METADISP_TRACE("got backend stat results %d %d", op_ret, op_errno); + if (op_errno == ENOENT) { + STACK_UNWIND_STRICT(open, frame, -1, ENODATA, NULL, NULL); + return 0; + } + STACK_UNWIND_STRICT(stat, frame, op_ret, op_errno, buf, xdata); + return 0; +} + +int32_t +metadisp_stat_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, + dict_t *xdata) +{ + METADISP_TRACE("winding stat to path %s", loc->path); + if (gf_uuid_is_null(loc->gfid)) { + METADISP_TRACE("bad object, sending EUCLEAN"); + STACK_UNWIND_STRICT(open, frame, -1, EUCLEAN, NULL, NULL); + return 0; + } + + STACK_WIND(frame, metadisp_stat_backend_cbk, SECOND_CHILD(this), + SECOND_CHILD(this)->fops->stat, loc, xdata); + return 0; +} + +int32_t +metadisp_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + dict_t *xdata) +{ + call_stub_t *stub = NULL; + + METADISP_TRACE("got stat results %d %d", op_ret, op_errno); + + if (cookie) { + stub = cookie; + } + + if (op_ret != 0) { + goto unwind; + } + + // only use the stub for the files + if (!IA_ISREG(buf->ia_type)) { + goto unwind; + } + + if (stub->poison) { + call_stub_destroy(stub); + stub = NULL; + return 0; + } + + call_resume(stub); + return 0; + +unwind: + if (stub) { + call_stub_destroy(stub); + } + STACK_UNWIND_STRICT(stat, frame, op_ret, op_errno, buf, xdata); + return 0; +} + +int32_t +metadisp_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +{ + call_stub_t *stub = NULL; + int32_t ret = 0; + loc_t backend_loc = { + 0, + }; + METADISP_FILTER_ROOT(stat, loc, xdata); + + if (build_backend_loc(loc->gfid, loc, &backend_loc)) { + goto unwind; + } + + if (dict_get_int32(xdata, "syncop-internal-from-posix", &ret) == 0) { + // if we've just been sent a stat from posix, then we know + // that we must send down a stat for a file to the second child. + // + // that means we can skip the stat for the first child and just + // send to the data disk. + METADISP_TRACE("got syncop-internal-from-posix"); + STACK_WIND(frame, default_stat_cbk, DATA_CHILD(this), + DATA_CHILD(this)->fops->stat, &backend_loc, xdata); + return 0; + } + + // we do not know if the request is for a file, folder, etc. wind + // to first child to find out. + stub = fop_stat_stub(frame, metadisp_stat_resume, &backend_loc, xdata); + METADISP_TRACE("winding stat to first child %s", loc->path); + STACK_WIND_COOKIE(frame, metadisp_stat_cbk, stub, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->stat, loc, xdata); + return 0; +unwind: + STACK_UNWIND_STRICT(stat, frame, -1, EINVAL, NULL, NULL); + return 0; +} diff --git a/xlators/features/metadisp/src/metadisp-unlink.c b/xlators/features/metadisp/src/metadisp-unlink.c new file mode 100644 index 00000000000..1f6a8eb35ce --- /dev/null +++ b/xlators/features/metadisp/src/metadisp-unlink.c @@ -0,0 +1,160 @@ + +#include "metadisp.h" +#include <glusterfs/call-stub.h> + +/** + * The unlink flow in metadisp is complicated because we must + * do ensure that UNLINK causes both the metadata objects + * to get removed and the data objects to get removed. + */ + +int32_t +metadisp_unlink_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, + int xflag, dict_t *xdata) +{ + METADISP_TRACE("winding backend unlink to path %s", loc->path); + STACK_WIND(frame, default_unlink_cbk, DATA_CHILD(this), + DATA_CHILD(this)->fops->unlink, loc, xflag, xdata); + return 0; +} + +int32_t +metadisp_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + METADISP_TRACE(". %d %d", op_ret, op_errno); + + int ret = 0; + call_stub_t *stub = NULL; + int nlink = 0; + + if (cookie) { + stub = cookie; + } + + if (op_ret != 0) { + goto unwind; + } + + if (stub->poison) { + call_stub_destroy(stub); + stub = NULL; + return 0; + } + + ret = dict_get_uint32(xdata, GF_RESPONSE_LINK_COUNT_XDATA, &nlink); + if (ret != 0) { + op_errno = EINVAL; + op_ret = -1; + goto unwind; + } + METADISP_TRACE("frontend hardlink count %d %d", ret, nlink); + if (nlink > 1) { + goto unwind; + } + + call_resume(stub); + return 0; + +unwind: + if (stub) { + call_stub_destroy(stub); + } + STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, preparent, postparent, + xdata); + return 0; +} + +int32_t +metadisp_unlink_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, + struct iatt *postparent) +{ + call_stub_t *stub = NULL; + + if (cookie) { + stub = cookie; + } + + if (op_ret != 0) { + goto unwind; + } + + // fail fast on empty gfid so we don't loop forever + if (gf_uuid_is_null(buf->ia_gfid)) { + op_ret = -1; + op_errno = ENODATA; + goto unwind; + } + + // fill gfid since the stub is incomplete + memcpy(stub->args.loc.gfid, buf->ia_gfid, sizeof(uuid_t)); + memcpy(stub->args.loc.pargfid, postparent->ia_gfid, sizeof(uuid_t)); + + if (stub->poison) { + call_stub_destroy(stub); + stub = NULL; + return 0; + } + + call_resume(stub); + return 0; + +unwind: + if (stub) { + call_stub_destroy(stub); + } + STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, NULL, NULL, NULL); + return 0; +} + +int32_t +metadisp_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, + dict_t *xdata) +{ + call_stub_t *stub = NULL; + loc_t backend_loc = { + 0, + }; + + if (gf_uuid_is_null(loc->gfid)) { + METADISP_TRACE("winding lookup for unlink to path %s", loc->path); + + // loop back to ourselves after a lookup + stub = fop_unlink_stub(frame, metadisp_unlink, loc, xflag, xdata); + STACK_WIND_COOKIE(frame, metadisp_unlink_lookup_cbk, stub, + METADATA_CHILD(this), + METADATA_CHILD(this)->fops->lookup, loc, xdata); + return 0; + } + + if (build_backend_loc(loc->gfid, loc, &backend_loc)) { + goto unwind; + } + + // + // ensure we get the link count on the unlink response, so we can + // account for hardlinks before winding to the backend. + // NOTE: + // multiple xlators use GF_REQUEST_LINK_COUNT_XDATA. confirmation + // is needed to ensure that multiple requests will work in the same + // xlator stack. + // + if (!xdata) { + xdata = dict_new(); + } + dict_set_int32(xdata, GF_REQUEST_LINK_COUNT_XDATA, 1); + + METADISP_TRACE("winding frontend unlink to path %s", loc->path); + stub = fop_unlink_stub(frame, metadisp_unlink_resume, &backend_loc, xflag, + xdata); + + STACK_WIND_COOKIE(frame, metadisp_unlink_cbk, stub, METADATA_CHILD(this), + METADATA_CHILD(this)->fops->unlink, loc, xflag, xdata); + return 0; +unwind: + STACK_UNWIND_STRICT(unlink, frame, -1, EINVAL, NULL, NULL, NULL); + return 0; +} diff --git a/xlators/features/metadisp/src/metadisp.c b/xlators/features/metadisp/src/metadisp.c new file mode 100644 index 00000000000..3c8f150cebc --- /dev/null +++ b/xlators/features/metadisp/src/metadisp.c @@ -0,0 +1,46 @@ +#include <glusterfs/call-stub.h> + +#include "metadisp.h" +#include "metadisp-fops.h" + +int32_t +init(xlator_t *this) +{ + if (!this->children) { + gf_log(this->name, GF_LOG_ERROR, + "not configured with children. exiting"); + return -1; + } + + if (!this->parents) { + gf_log(this->name, GF_LOG_WARNING, "dangling volume. check volfile "); + } + + return 0; +} + +void +fini(xlator_t *this) +{ + return; +} + +/* defined in fops.c */ +struct xlator_fops fops; + +struct xlator_cbks cbks = {}; + +struct volume_options options[] = { + {.key = {NULL}}, +}; + +xlator_api_t xlator_api = { + .init = init, + .fini = fini, + .fops = &fops, + .cbks = &cbks, + .options = options, + .op_version = {1}, + .identifier = "metadisp", + .category = GF_EXPERIMENTAL, +}; diff --git a/xlators/features/metadisp/src/metadisp.h b/xlators/features/metadisp/src/metadisp.h new file mode 100644 index 00000000000..c8fd7a13c04 --- /dev/null +++ b/xlators/features/metadisp/src/metadisp.h @@ -0,0 +1,45 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef GF_METADISP_H_ +#define GF_METADISP_H_ + +#include <glusterfs/glusterfs.h> +#include <glusterfs/logging.h> +#include <glusterfs/dict.h> +#include <glusterfs/xlator.h> +#include <glusterfs/defaults.h> + +#define METADATA_CHILD(_this) FIRST_CHILD(_this) +#define DATA_CHILD(_this) SECOND_CHILD(_this) + +int32_t +build_backend_loc(uuid_t gfid, loc_t *src_loc, loc_t *dst_loc); + +#define METADISP_TRACE(_args...) gf_log("metadisp", GF_LOG_INFO, _args) + +#define METADISP_FILTER_ROOT(_op, _args...) \ + if (strcmp(loc->path, "/") == 0) { \ + STACK_WIND(frame, default_##_op##_cbk, METADATA_CHILD(this), \ + METADATA_CHILD(this)->fops->_op, _args); \ + return 0; \ + } + +#define METADISP_FILTER_ROOT_BY_GFID(_op, _gfid, _args...) \ + if (__is_root_gfid(_gfid)) { \ + STACK_WIND(frame, default_##_op##_cbk, METADATA_CHILD(this), \ + METADATA_CHILD(this)->fops->_op, _args); \ + return 0; \ + } + +#define RESOLVE_GFID_REQ(_dict, _dest, _lbl) \ + VALIDATE_OR_GOTO(dict_get_ptr(_dict, "gfid-req", (void **)&_dest) == 0, \ + _lbl) + +#endif /* __TEMPLATE_H__ */ diff --git a/xlators/features/namespace/Makefile.am b/xlators/features/namespace/Makefile.am new file mode 100644 index 00000000000..a985f42a877 --- /dev/null +++ b/xlators/features/namespace/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/features/namespace/src/Makefile.am b/xlators/features/namespace/src/Makefile.am new file mode 100644 index 00000000000..e355d42cf4e --- /dev/null +++ b/xlators/features/namespace/src/Makefile.am @@ -0,0 +1,17 @@ +xlator_LTLIBRARIES = namespace.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features + +namespace_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) + +namespace_la_SOURCES = namespace.c +namespace_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +noinst_HEADERS = namespace.h + +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ + -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \ + -I$(top_srcdir)/xlators/lib/src + +AM_CFLAGS = -Wall $(GF_CFLAGS) + +CLEANFILES = diff --git a/xlators/features/namespace/src/namespace.c b/xlators/features/namespace/src/namespace.c new file mode 100644 index 00000000000..86c5ebee900 --- /dev/null +++ b/xlators/features/namespace/src/namespace.c @@ -0,0 +1,1344 @@ +/* + * Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + * This file is part of GlusterFS. + * + * This file is licensed to you under your choice of the GNU Lesser + * General Public License, version 3 or any later version (LGPLv3 or + * later), or the GNU General Public License, version 2 (GPLv2), in all + * cases as published by the Free Software Foundation. + * + * xlators/features/namespace: + * This translator tags each request with a namespace hash, + * which then can be used in later translators to track and + * throttle fops per namespace. + */ + +#include <sys/types.h> + +#include <glusterfs/defaults.h> +#include <glusterfs/hashfn.h> +#include <glusterfs/logging.h> +#include "namespace.h" + +/* Return codes for common path parsing functions. */ +enum _path_parse_result { + PATH_PARSE_RESULT_NO_PATH = 0, + PATH_PARSE_RESULT_FOUND = 1, + PATH_PARSE_RESULT_IS_GFID = 2, +}; + +typedef enum _path_parse_result path_parse_result_t; + +/* Clean up an ns_local struct. Wipe a loc (its inode is ref'd, so we're good.) + */ +static inline void +ns_local_cleanup(ns_local_t *local) +{ + if (!local) { + return; + } + + loc_wipe(&local->loc); + GF_FREE(local); +} + +/* Create a new ns_local. We ref the inode, fake a new loc struct, and stash + * the stub given to us. */ +static inline ns_local_t * +ns_local_new(call_stub_t *stub, inode_t *inode) +{ + ns_local_t *local = NULL; + loc_t loc = { + 0, + }; + + if (!stub || !inode) { + goto out; + } + + local = GF_CALLOC(1, sizeof(ns_local_t), 0); + if (local == NULL) { + goto out; + } + + /* Set up a fake loc_t struct to give to the getxattr call. */ + gf_uuid_copy(loc.gfid, inode->gfid); + loc.inode = inode_ref(inode); + + /* If for some reason inode_ref() fails, then just give up. */ + if (!loc.inode) { + GF_FREE(local); + goto out; + } + + local->stub = stub; + local->loc = loc; + +out: + return local; +} + +/* Try parsing a path string. If the path string is a GFID, then return + * with PATH_PARSE_RESULT_IS_GFID. If we have no namespace (i.e. '/') then + * return PATH_PARSE_RESULT_NO_PATH and set the hash to 1. Otherwise, hash the + * namespace and store it in the info struct. */ +static path_parse_result_t +parse_path(ns_info_t *info, const char *path) +{ + int len = 0; + const char *ns_begin = path; + const char *ns_end = NULL; + + if (!path || strlen(path) == 0) { + return PATH_PARSE_RESULT_NO_PATH; + } + + if (path[0] == '<') { + return PATH_PARSE_RESULT_IS_GFID; + } + + /* Right now we only want the top-level directory, so + * skip the initial '/' and read until the next '/'. */ + while (*ns_begin == '/') { + ns_begin++; + } + + /* ns_end will point to the next '/' or NULL if there is no delimiting + * '/' (i.e. "/directory" or the top level "/") */ + ns_end = strchr(ns_begin, '/'); + len = ns_end ? (ns_end - ns_begin) : strlen(ns_begin); + + if (len != 0) { + info->hash = SuperFastHash(ns_begin, len); + } else { + /* If our substring is empty, then we can hash '/' instead. + * '/' is used in the namespace config for the top-level + * namespace. */ + info->hash = SuperFastHash("/", 1); + } + + info->found = _gf_true; + return PATH_PARSE_RESULT_FOUND; +} + +/* Cache namespace info stored in the stack (info) into the inode. */ +static int +ns_inode_ctx_put(inode_t *inode, xlator_t *this, ns_info_t *info) +{ + ns_info_t *cached_ns_info = NULL; + uint64_t ns_as_64 = 0; + int ret = -1; + + if (!inode || !this) { + gf_log(this ? this->name : "namespace", GF_LOG_WARNING, + "Need a valid inode and xlator to cache ns_info."); + ret = -1; + goto out; + } + + cached_ns_info = GF_CALLOC(1, sizeof(ns_info_t), 0); + + /* If we've run out of memory, then return ENOMEM. */ + if (cached_ns_info == NULL) { + gf_log(this->name, GF_LOG_WARNING, "No memory to cache ns_info."); + ret = -(ENOMEM); + goto out; + } + + *cached_ns_info = *info; + ns_as_64 = (uint64_t)(uintptr_t)cached_ns_info; + + ret = inode_ctx_put(inode, this, ns_as_64); + + if (ret) { + goto out; + } + + ret = 0; +out: + if (ret && cached_ns_info) { + GF_FREE(cached_ns_info); + } + + return ret; +} + +/* Retrieve namespace info cached in the inode into the stack for use in later + * translators. */ +static int +ns_inode_ctx_get(inode_t *inode, xlator_t *this, ns_info_t *info) +{ + ns_info_t *cached_ns_info = NULL; + uint64_t ns_as_64 = 0; + int ret = -1; + + if (!inode) { + ret = -ENOENT; + goto out; + } + + ret = inode_ctx_get(inode, this, &ns_as_64); + + if (!ret) { + cached_ns_info = (ns_info_t *)(uintptr_t)ns_as_64; + *info = *cached_ns_info; + } + +out: + return ret; +} + +/* This callback is the top of the unwind path of our attempt to get the path + * manually from the posix translator. We'll try to parse the path returned + * if it exists, then cache the hash if possible. Then just return to the + * default stub that we provide in the local, since there's nothing else to do + * once we've gotten the namespace hash. */ +int32_t +get_path_resume_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata) +{ + path_parse_result_t ret = PATH_PARSE_RESULT_NO_PATH; + call_frame_t *resume_frame = NULL; + ns_local_t *local = NULL; + call_stub_t *stub = NULL; + ns_info_t *info = NULL; + char *path = NULL; + + GF_VALIDATE_OR_GOTO(this->name, frame, out); + local = frame->local; + + GF_VALIDATE_OR_GOTO(this->name, local, out); + stub = local->stub; + + GF_VALIDATE_OR_GOTO(this->name, stub, out); + /* Get the ns_info from the frame that we will eventually resume, + * not the frame that we're going to destroy (frame). */ + resume_frame = stub->frame; + + GF_VALIDATE_OR_GOTO(this->name, resume_frame, out); + GF_VALIDATE_OR_GOTO(this->name, resume_frame->root, out); + info = &resume_frame->root->ns_info; + + GF_VALIDATE_OR_GOTO(this->name, dict, out); + + /* If we get a value back for the GET_ANCESTRY_PATH_KEY, then we + * try to access it and parse it like a path. */ + if (!op_ret && !dict_get_str(dict, GET_ANCESTRY_PATH_KEY, &path)) { + gf_log(this->name, GF_LOG_DEBUG, "G>P %s retrieved path %s", + uuid_utoa(local->loc.gfid), path); + /* Now let's parse a path, finally. */ + ret = parse_path(info, path); + } + + if (ret == PATH_PARSE_RESULT_FOUND) { + /* If we finally found namespace, then stash it. */ + ns_inode_ctx_put(local->loc.inode, this, info); + + gf_log(this->name, GF_LOG_DEBUG, "G>P %s %10u namespace found %s", + uuid_utoa(local->loc.inode->gfid), info->hash, path); + } else if (ret == PATH_PARSE_RESULT_NO_PATH) { + gf_log(this->name, GF_LOG_WARNING, "G>P %s has no path", + uuid_utoa(local->loc.inode->gfid)); + } else if (ret == PATH_PARSE_RESULT_IS_GFID) { + gf_log(this->name, GF_LOG_WARNING, + "G>P %s winding failed, still have gfid", + uuid_utoa(local->loc.inode->gfid)); + } + +out: + /* Make sure to clean up local finally. */ + + if (frame) { + frame->local = NULL; + STACK_DESTROY(frame->root); + } + + if (local) { + ns_local_cleanup(local); + } + + if (stub) { + call_resume(stub); + } + + return 0; +} + +/* This function tries first to set a namespace based on the information that + * it can retrieve from an `loc_t`. This includes first looking for a cached + * namespace in the inode, then trying to parse the path string in the `loc_t` + * struct. If this fails, then it will try to call inode_path. */ +static path_parse_result_t +set_ns_from_loc(const char *fn, call_frame_t *frame, xlator_t *this, loc_t *loc) +{ + path_parse_result_t ret = PATH_PARSE_RESULT_NO_PATH; + ns_private_t *priv = (ns_private_t *)this->private; + ns_info_t *info = &frame->root->ns_info; + char *path = NULL; + + info->hash = 0; + info->found = _gf_false; + + if (!priv->tag_namespaces) { + return ret; + } + + /* This is our first pass at trying to get a path. Try getting + * from the inode context, then from the loc's path itself. */ + if (!loc || !loc->path || !loc->inode) { + ret = PATH_PARSE_RESULT_NO_PATH; + } else if (!ns_inode_ctx_get(loc->inode, this, info)) { + ret = PATH_PARSE_RESULT_FOUND; + } else { + ret = parse_path(info, loc->path); + gf_log(this->name, GF_LOG_DEBUG, "%s: LOC retrieved path %s", fn, + loc->path); + + if (ret == PATH_PARSE_RESULT_FOUND) { + ns_inode_ctx_put(loc->inode, this, info); + } + } + + /* Keep trying by calling inode_path next, making sure to copy + the loc's gfid into its inode if necessary. */ + if (ret == PATH_PARSE_RESULT_IS_GFID) { + if (gf_uuid_is_null(loc->inode->gfid)) { + gf_uuid_copy(loc->inode->gfid, loc->gfid); + } + + if (inode_path(loc->inode, NULL, &path) >= 0 && path) { + ret = parse_path(info, loc->path); + gf_log(this->name, GF_LOG_DEBUG, "%s: LOC retrieved path %s", fn, + path); + + if (ret == PATH_PARSE_RESULT_FOUND) { + ns_inode_ctx_put(loc->inode, this, info); + } + } + + if (path) { + GF_FREE(path); + } + } + + /* Report our status, and if we have a GFID, we'll eventually try a + * GET_ANCESTRY_PATH_KEY wind when we return from this function. */ + if (ret == PATH_PARSE_RESULT_FOUND) { + gf_log(this->name, GF_LOG_DEBUG, + "%s: LOC %s %10u namespace found for %s", fn, + uuid_utoa(loc->inode->gfid), info->hash, loc->path); + } else if (ret == PATH_PARSE_RESULT_NO_PATH) { + gf_log(this->name, GF_LOG_WARNING, "%s: LOC has no path", fn); + } else if (ret == PATH_PARSE_RESULT_IS_GFID) { + /* Make sure to copy the inode's gfid for the eventual wind. */ + if (gf_uuid_is_null(loc->inode->gfid)) { + gf_uuid_copy(loc->inode->gfid, loc->gfid); + } + + gf_log(this->name, GF_LOG_DEBUG, "%s: LOC %s winding, looking for path", + fn, uuid_utoa(loc->inode->gfid)); + } + + return ret; +} + +/* This function tries first to set a namespace based on the information that + * it can retrieve from an `fd_t`. This includes first looking for a cached + * namespace in the inode, then trying to call inode_path manually. */ +static path_parse_result_t +set_ns_from_fd(const char *fn, call_frame_t *frame, xlator_t *this, fd_t *fd) +{ + path_parse_result_t ret = PATH_PARSE_RESULT_NO_PATH; + ns_private_t *priv = (ns_private_t *)this->private; + ns_info_t *info = &frame->root->ns_info; + char *path = NULL; + + info->hash = 0; + info->found = _gf_false; + + if (!priv->tag_namespaces) { + return ret; + } + + /* This is our first pass at trying to get a path. Try getting + * from the inode context, then inode_path. */ + if (!fd || !fd->inode) { + ret = PATH_PARSE_RESULT_NO_PATH; + } else if (!ns_inode_ctx_get(fd->inode, this, info)) { + ret = PATH_PARSE_RESULT_FOUND; + } else if (inode_path(fd->inode, NULL, &path) >= 0 && path) { + ret = parse_path(info, path); + gf_log(this->name, GF_LOG_DEBUG, "%s: FD retrieved path %s", fn, path); + + if (ret == PATH_PARSE_RESULT_FOUND) { + ns_inode_ctx_put(fd->inode, this, info); + } + } + + if (path) { + GF_FREE(path); + } + + /* Report our status, and if we have a GFID, we'll eventually try a + * GET_ANCESTRY_PATH_KEY wind when we return from this function. */ + if (ret == PATH_PARSE_RESULT_FOUND) { + gf_log(this->name, GF_LOG_DEBUG, "%s: FD %s %10u namespace found", fn, + uuid_utoa(fd->inode->gfid), info->hash); + } else if (ret == PATH_PARSE_RESULT_NO_PATH) { + gf_log(this->name, GF_LOG_WARNING, "%s: FD has no path", fn); + } else if (ret == PATH_PARSE_RESULT_IS_GFID) { + gf_log(this->name, GF_LOG_DEBUG, "%s: FD %s winding, looking for path", + fn, uuid_utoa(fd->inode->gfid)); + } + + return ret; +} + +/* This macro does the work of winding down a call of `getxattr` in the case + * that we have to retrieve the path manually. It assumes that there is a label + * called `wind` and the existence of several basic variables (frame, this), + * but otherwise is general enough for any fop (fd- or loc-based.) */ +#define GET_ANCESTRY_PATH_WIND(fop, inode, args...) \ + do { \ + ns_info_t *info = &frame->root->ns_info; \ + call_frame_t *new_frame = NULL; \ + ns_local_t *local = NULL; \ + call_stub_t *stub = NULL; \ + \ + gf_log(this->name, GF_LOG_DEBUG, " %s winding, looking for path", \ + uuid_utoa(inode->gfid)); \ + \ + new_frame = create_frame(this, this->ctx->pool); \ + if (!new_frame) { \ + gf_log(this->name, GF_LOG_ERROR, \ + "Cannot allocate new call frame."); \ + goto wind; \ + } \ + \ + stub = fop_##fop##_stub(frame, default_##fop, args); \ + if (!stub) { \ + gf_log(this->name, GF_LOG_ERROR, \ + "Cannot allocate function stub."); \ + goto wind; \ + } \ + \ + new_frame->root->uid = 0; \ + new_frame->root->gid = 0; \ + /* Put a phony "not found" NS info into this call. */ \ + new_frame->root->ns_info = *info; \ + \ + local = ns_local_new(stub, inode); \ + if (!local) { \ + gf_log(this->name, GF_LOG_ERROR, \ + "Cannot allocate function local."); \ + goto wind; \ + } \ + \ + new_frame->local = local; \ + /* After allocating a new frame, a call stub (to \ + * resume our current fop), and a local variables \ + * struct (for our loc to getxattr and our resume \ + * stub), call getxattr and unwind to get_path_resume_cbk. \ + */ \ + STACK_WIND(new_frame, get_path_resume_cbk, FIRST_CHILD(this), \ + FIRST_CHILD(this)->fops->getxattr, &local->loc, \ + GET_ANCESTRY_PATH_KEY, NULL); \ + } while (0) + +int32_t +ns_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags, + dict_t *xdata) +{ + path_parse_result_t ret = set_ns_from_loc(__FUNCTION__, frame, this, loc); + + if (ret == PATH_PARSE_RESULT_IS_GFID) { + GET_ANCESTRY_PATH_WIND(rmdir, loc->inode, loc, xflags, xdata); + return 0; + } +wind: + STACK_WIND(frame, default_rmdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rmdir, loc, xflags, xdata); + return 0; +} + +int32_t +ns_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags, + dict_t *xdata) +{ + path_parse_result_t ret = set_ns_from_loc(__FUNCTION__, frame, this, loc); + + if (ret == PATH_PARSE_RESULT_IS_GFID) { + GET_ANCESTRY_PATH_WIND(unlink, loc->inode, loc, xflags, xdata); + return 0; + } +wind: + STACK_WIND(frame, default_unlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, loc, xflags, xdata); + return 0; +} + +int32_t +ns_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) +{ + path_parse_result_t ret = set_ns_from_loc(__FUNCTION__, frame, this, + newloc); + + if (ret == PATH_PARSE_RESULT_IS_GFID) { + GET_ANCESTRY_PATH_WIND(rename, newloc->inode, oldloc, newloc, xdata); + return 0; + } +wind: + STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); + return 0; +} + +int32_t +ns_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) +{ + path_parse_result_t ret = set_ns_from_loc(__FUNCTION__, frame, this, + newloc); + + if (ret == PATH_PARSE_RESULT_IS_GFID) { + GET_ANCESTRY_PATH_WIND(link, newloc->inode, oldloc, newloc, xdata); + return 0; + } +wind: + STACK_WIND(frame, default_link_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata); + return 0; +} + +int32_t +ns_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + mode_t umask, dict_t *xdata) +{ + path_parse_result_t ret = set_ns_from_loc(__FUNCTION__, frame, this, loc); + + if (ret == PATH_PARSE_RESULT_IS_GFID) { + GET_ANCESTRY_PATH_WIND(mkdir, loc->inode, loc, mode, umask, xdata); + return 0; + } +wind: + STACK_WIND(frame, default_mkdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata); + return 0; +} + +int32_t +ns_symlink(call_frame_t *frame, xlator_t *this, const char *linkname, + loc_t *loc, mode_t umask, dict_t *xdata) +{ + path_parse_result_t ret = set_ns_from_loc(__FUNCTION__, frame, this, loc); + + if (ret == PATH_PARSE_RESULT_IS_GFID) { + GET_ANCESTRY_PATH_WIND(symlink, loc->inode, linkname, loc, umask, + xdata); + return 0; + } +wind: + STACK_WIND(frame, default_symlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->symlink, linkname, loc, umask, xdata); + return 0; +} + +int32_t +ns_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + dev_t dev, mode_t umask, dict_t *xdata) +{ + path_parse_result_t ret = set_ns_from_loc(__FUNCTION__, frame, this, loc); + + if (ret == PATH_PARSE_RESULT_IS_GFID) { + GET_ANCESTRY_PATH_WIND(mknod, loc->inode, loc, mode, dev, umask, xdata); + return 0; + } +wind: + STACK_WIND(frame, default_mknod_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mknod, loc, mode, dev, umask, xdata); + return 0; +} + +int32_t +ns_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) +{ + path_parse_result_t ret = set_ns_from_loc(__FUNCTION__, frame, this, loc); + if (ret == PATH_PARSE_RESULT_IS_GFID) { + GET_ANCESTRY_PATH_WIND(create, loc->inode, loc, flags, mode, umask, fd, + xdata); + return 0; + } +wind: + STACK_WIND(frame, default_create_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd, + xdata); + return 0; +} + +int32_t +ns_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf, + int32_t valid, dict_t *xdata) +{ + path_parse_result_t ret = set_ns_from_fd(__FUNCTION__, frame, this, fd); + + if (ret == PATH_PARSE_RESULT_IS_GFID) { + GET_ANCESTRY_PATH_WIND(fsetattr, fd->inode, fd, stbuf, valid, xdata); + return 0; + } +wind: + STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata); + return 0; +} + +int32_t +ns_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf, + int32_t valid, dict_t *xdata) +{ + path_parse_result_t ret = set_ns_from_loc(__FUNCTION__, frame, this, loc); + + if (ret == PATH_PARSE_RESULT_IS_GFID) { + GET_ANCESTRY_PATH_WIND(setattr, loc->inode, loc, stbuf, valid, xdata); + return 0; + } +wind: + STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata); + return 0; +} + +int32_t +ns_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, + dict_t *xdata) +{ + path_parse_result_t ret = set_ns_from_fd(__FUNCTION__, frame, this, fd); + + if (ret == PATH_PARSE_RESULT_IS_GFID) { + GET_ANCESTRY_PATH_WIND(fremovexattr, fd->inode, fd, name, xdata); + return 0; + } +wind: + STACK_WIND(frame, default_fremovexattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata); + return 0; +} + +int32_t +ns_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) +{ + path_parse_result_t ret = set_ns_from_loc(__FUNCTION__, frame, this, loc); + + if (ret == PATH_PARSE_RESULT_IS_GFID) { + GET_ANCESTRY_PATH_WIND(removexattr, loc->inode, loc, name, xdata); + return 0; + } +wind: + STACK_WIND(frame, default_removexattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, loc, name, xdata); + return 0; +} + +int32_t +ns_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + int32_t flags, dict_t *xdata) +{ + path_parse_result_t ret = set_ns_from_loc(__FUNCTION__, frame, this, loc); + + if (ret == PATH_PARSE_RESULT_IS_GFID) { + GET_ANCESTRY_PATH_WIND(setxattr, loc->inode, loc, dict, flags, xdata); + return 0; + } +wind: + STACK_WIND(frame, default_setxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata); + return 0; +} + +int32_t +ns_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, + int32_t flags, dict_t *xdata) +{ + path_parse_result_t ret = set_ns_from_fd(__FUNCTION__, frame, this, fd); + + if (ret == PATH_PARSE_RESULT_IS_GFID) { + GET_ANCESTRY_PATH_WIND(fsetxattr, fd->inode, fd, dict, flags, xdata); + return 0; + } +wind: + STACK_WIND(frame, default_fsetxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata); + return 0; +} + +int32_t +ns_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) +{ + path_parse_result_t ret = set_ns_from_loc(__FUNCTION__, frame, this, loc); + + if (ret == PATH_PARSE_RESULT_IS_GFID) { + GET_ANCESTRY_PATH_WIND(truncate, loc->inode, loc, offset, xdata); + return 0; + } +wind: + STACK_WIND(frame, default_truncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); + return 0; +} + +int32_t +ns_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) +{ + path_parse_result_t ret = set_ns_from_fd(__FUNCTION__, frame, this, fd); + + if (ret == PATH_PARSE_RESULT_IS_GFID) { + GET_ANCESTRY_PATH_WIND(ftruncate, fd->inode, fd, offset, xdata); + return 0; + } +wind: + STACK_WIND(frame, default_ftruncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); + return 0; +} + +int32_t +ns_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, + int32_t count, off_t offset, uint32_t flags, struct iobref *iobref, + dict_t *xdata) +{ + path_parse_result_t ret = set_ns_from_fd(__FUNCTION__, frame, this, fd); + + if (ret == PATH_PARSE_RESULT_IS_GFID) { + GET_ANCESTRY_PATH_WIND(writev, fd->inode, fd, vector, count, offset, + flags, iobref, xdata); + return 0; + } +wind: + STACK_WIND(frame, default_writev_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->writev, fd, vector, count, offset, + flags, iobref, xdata); + return 0; +} + +int32_t +ns_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +{ + path_parse_result_t ret = set_ns_from_loc(__FUNCTION__, frame, this, loc); + + if (ret == PATH_PARSE_RESULT_IS_GFID) { + GET_ANCESTRY_PATH_WIND(lookup, loc->inode, loc, xdata); + return 0; + } +wind: + STACK_WIND(frame, default_lookup_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, loc, xdata); + return 0; +} + +int32_t +ns_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +{ + path_parse_result_t ret = set_ns_from_loc(__FUNCTION__, frame, this, loc); + + if (ret == PATH_PARSE_RESULT_IS_GFID) { + GET_ANCESTRY_PATH_WIND(stat, loc->inode, loc, xdata); + return 0; + } +wind: + STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->stat, loc, xdata); + return 0; +} + +int32_t +ns_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) +{ + path_parse_result_t ret = set_ns_from_fd(__FUNCTION__, frame, this, fd); + + if (ret == PATH_PARSE_RESULT_IS_GFID) { + GET_ANCESTRY_PATH_WIND(fstat, fd->inode, fd, xdata); + return 0; + } +wind: + STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, fd, xdata); + return 0; +} + +int32_t +ns_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size, + dict_t *xdata) +{ + path_parse_result_t ret = set_ns_from_loc(__FUNCTION__, frame, this, loc); + + if (ret == PATH_PARSE_RESULT_IS_GFID) { + GET_ANCESTRY_PATH_WIND(readlink, loc->inode, loc, size, xdata); + return 0; + } +wind: + STACK_WIND(frame, default_readlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readlink, loc, size, xdata); + return 0; +} + +int32_t +ns_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask, + dict_t *xdata) +{ + path_parse_result_t ret = set_ns_from_loc(__FUNCTION__, frame, this, loc); + + if (ret == PATH_PARSE_RESULT_IS_GFID) { + GET_ANCESTRY_PATH_WIND(access, loc->inode, loc, mask, xdata); + return 0; + } +wind: + STACK_WIND(frame, default_access_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->access, loc, mask, xdata); + return 0; +} + +int32_t +ns_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + fd_t *fd, dict_t *xdata) +{ + path_parse_result_t ret = set_ns_from_fd(__FUNCTION__, frame, this, fd); + + if (ret == PATH_PARSE_RESULT_IS_GFID) { + GET_ANCESTRY_PATH_WIND(open, fd->inode, loc, flags, fd, xdata); + return 0; + } +wind: + STACK_WIND(frame, default_open_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); + return 0; +} + +int32_t +ns_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) +{ + path_parse_result_t ret = set_ns_from_fd(__FUNCTION__, frame, this, fd); + + if (ret == PATH_PARSE_RESULT_IS_GFID) { + GET_ANCESTRY_PATH_WIND(readv, fd->inode, fd, size, offset, flags, + xdata); + return 0; + } +wind: + STACK_WIND(frame, default_readv_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata); + return 0; +} + +int32_t +ns_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) +{ + path_parse_result_t ret = set_ns_from_fd(__FUNCTION__, frame, this, fd); + + if (ret == PATH_PARSE_RESULT_IS_GFID) { + GET_ANCESTRY_PATH_WIND(flush, fd->inode, fd, xdata); + return 0; + } +wind: + STACK_WIND(frame, default_flush_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->flush, fd, xdata); + return 0; +} + +int32_t +ns_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, + dict_t *xdata) +{ + path_parse_result_t ret = set_ns_from_fd(__FUNCTION__, frame, this, fd); + + if (ret == PATH_PARSE_RESULT_IS_GFID) { + GET_ANCESTRY_PATH_WIND(fsync, fd->inode, fd, datasync, xdata); + return 0; + } +wind: + STACK_WIND(frame, default_fsync_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata); + return 0; +} + +int32_t +ns_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, + dict_t *xdata) +{ + path_parse_result_t ret = set_ns_from_loc(__FUNCTION__, frame, this, loc); + + if (ret == PATH_PARSE_RESULT_IS_GFID) { + GET_ANCESTRY_PATH_WIND(opendir, loc->inode, loc, fd, xdata); + return 0; + } +wind: + STACK_WIND(frame, default_opendir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->opendir, loc, fd, xdata); + return 0; +} + +int32_t +ns_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, + dict_t *xdata) + +{ + path_parse_result_t ret = set_ns_from_fd(__FUNCTION__, frame, this, fd); + + if (ret == PATH_PARSE_RESULT_IS_GFID) { + GET_ANCESTRY_PATH_WIND(fsyncdir, fd->inode, fd, datasync, xdata); + return 0; + } +wind: + STACK_WIND(frame, default_fsyncdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsyncdir, fd, datasync, xdata); + return 0; +} + +int32_t +ns_rchecksum(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + int32_t len, dict_t *xdata) +{ + path_parse_result_t ret = set_ns_from_fd(__FUNCTION__, frame, this, fd); + + if (ret == PATH_PARSE_RESULT_IS_GFID) { + GET_ANCESTRY_PATH_WIND(rchecksum, fd->inode, fd, offset, len, xdata); + return 0; + } +wind: + STACK_WIND(frame, default_rchecksum_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rchecksum, fd, offset, len, xdata); + return 0; +} + +int32_t +ns_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +{ + path_parse_result_t ret = set_ns_from_loc(__FUNCTION__, frame, this, loc); + + if (ret == PATH_PARSE_RESULT_IS_GFID) { + GET_ANCESTRY_PATH_WIND(statfs, loc->inode, loc, xdata); + return 0; + } +wind: + STACK_WIND(frame, default_statfs_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->statfs, loc, xdata); + return 0; +} + +int32_t +ns_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc, + int32_t cmd, struct gf_flock *flock, dict_t *xdata) +{ + path_parse_result_t ret = set_ns_from_loc(__FUNCTION__, frame, this, loc); + + if (ret == PATH_PARSE_RESULT_IS_GFID) { + GET_ANCESTRY_PATH_WIND(inodelk, loc->inode, volume, loc, cmd, flock, + xdata); + return 0; + } +wind: + STACK_WIND(frame, default_inodelk_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->inodelk, volume, loc, cmd, flock, + xdata); + return 0; +} + +int32_t +ns_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, + int32_t cmd, struct gf_flock *flock, dict_t *xdata) +{ + path_parse_result_t ret = set_ns_from_fd(__FUNCTION__, frame, this, fd); + + if (ret == PATH_PARSE_RESULT_IS_GFID) { + GET_ANCESTRY_PATH_WIND(finodelk, fd->inode, volume, fd, cmd, flock, + xdata); + return 0; + } +wind: + STACK_WIND(frame, default_finodelk_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->finodelk, volume, fd, cmd, flock, + xdata); + return 0; +} + +int32_t +ns_entrylk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc, + const char *basename, entrylk_cmd cmd, entrylk_type type, + dict_t *xdata) +{ + path_parse_result_t ret = set_ns_from_loc(__FUNCTION__, frame, this, loc); + + if (ret == PATH_PARSE_RESULT_IS_GFID) { + GET_ANCESTRY_PATH_WIND(entrylk, loc->inode, volume, loc, basename, cmd, + type, xdata); + return 0; + } +wind: + STACK_WIND(frame, default_entrylk_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->entrylk, volume, loc, basename, cmd, + type, xdata); + return 0; +} + +int32_t +ns_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, + const char *basename, entrylk_cmd cmd, entrylk_type type, + dict_t *xdata) +{ + path_parse_result_t ret = set_ns_from_fd(__FUNCTION__, frame, this, fd); + + if (ret == PATH_PARSE_RESULT_IS_GFID) { + GET_ANCESTRY_PATH_WIND(fentrylk, fd->inode, volume, fd, basename, cmd, + type, xdata); + return 0; + } +wind: + STACK_WIND(frame, default_fentrylk_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fentrylk, volume, fd, basename, cmd, + type, xdata); + return 0; +} + +int32_t +ns_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, + dict_t *xdata) +{ + path_parse_result_t ret = set_ns_from_fd(__FUNCTION__, frame, this, fd); + + if (ret == PATH_PARSE_RESULT_IS_GFID) { + GET_ANCESTRY_PATH_WIND(fgetxattr, fd->inode, fd, name, xdata); + return 0; + } +wind: + STACK_WIND(frame, default_fgetxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata); + return 0; +} + +int32_t +ns_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name, + dict_t *xdata) +{ + path_parse_result_t ret = set_ns_from_loc(__FUNCTION__, frame, this, loc); + + if (ret == PATH_PARSE_RESULT_IS_GFID) { + GET_ANCESTRY_PATH_WIND(getxattr, loc->inode, loc, name, xdata); + return 0; + } +wind: + STACK_WIND(frame, default_getxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->getxattr, loc, name, xdata); + return 0; +} + +int32_t +ns_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, + struct gf_flock *flock, dict_t *xdata) +{ + path_parse_result_t ret = set_ns_from_fd(__FUNCTION__, frame, this, fd); + + if (ret == PATH_PARSE_RESULT_IS_GFID) { + GET_ANCESTRY_PATH_WIND(lk, fd->inode, fd, cmd, flock, xdata); + return 0; + } +wind: + STACK_WIND(frame, default_lk_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lk, fd, cmd, flock, xdata); + return 0; +} + +int32_t +ns_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, dict_t *xdata) +{ + path_parse_result_t ret = set_ns_from_fd(__FUNCTION__, frame, this, fd); + + if (ret == PATH_PARSE_RESULT_IS_GFID) { + GET_ANCESTRY_PATH_WIND(readdir, fd->inode, fd, size, offset, xdata); + return 0; + } +wind: + STACK_WIND(frame, default_readdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdir, fd, size, offset, xdata); + + return 0; +} + +int32_t +ns_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, dict_t *dict) +{ + path_parse_result_t ret = set_ns_from_fd(__FUNCTION__, frame, this, fd); + + if (ret == PATH_PARSE_RESULT_IS_GFID) { + GET_ANCESTRY_PATH_WIND(readdirp, fd->inode, fd, size, offset, dict); + return 0; + } +wind: + STACK_WIND(frame, default_readdirp_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdirp, fd, size, offset, dict); + return 0; +} + +int32_t +ns_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc, + gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata) +{ + path_parse_result_t ret = set_ns_from_loc(__FUNCTION__, frame, this, loc); + + if (ret == PATH_PARSE_RESULT_IS_GFID) { + GET_ANCESTRY_PATH_WIND(xattrop, loc->inode, loc, flags, dict, xdata); + return 0; + } +wind: + STACK_WIND(frame, default_xattrop_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->xattrop, loc, flags, dict, xdata); + + return 0; +} + +int32_t +ns_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd, + gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata) +{ + path_parse_result_t ret = set_ns_from_fd(__FUNCTION__, frame, this, fd); + + if (ret == PATH_PARSE_RESULT_IS_GFID) { + GET_ANCESTRY_PATH_WIND(fxattrop, fd->inode, fd, flags, dict, xdata); + return 0; + } +wind: + STACK_WIND(frame, default_fxattrop_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fxattrop, fd, flags, dict, xdata); + + return 0; +} + +int32_t +ns_getspec(call_frame_t *frame, xlator_t *this, const char *key, int32_t flag) +{ + STACK_WIND(frame, default_getspec_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->getspec, key, flag); + return 0; +} + +int32_t +ns_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t keep_size, + off_t offset, size_t len, dict_t *xdata) +{ + path_parse_result_t ret = set_ns_from_fd(__FUNCTION__, frame, this, fd); + + if (ret == PATH_PARSE_RESULT_IS_GFID) { + GET_ANCESTRY_PATH_WIND(fallocate, fd->inode, fd, keep_size, offset, len, + xdata); + return 0; + } +wind: + STACK_WIND(frame, default_fallocate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fallocate, fd, keep_size, offset, len, + xdata); + return 0; +} + +int32_t +ns_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + path_parse_result_t ret = set_ns_from_fd(__FUNCTION__, frame, this, fd); + + if (ret == PATH_PARSE_RESULT_IS_GFID) { + GET_ANCESTRY_PATH_WIND(discard, fd->inode, fd, offset, len, xdata); + return 0; + } +wind: + STACK_WIND(frame, default_discard_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata); + return 0; +} + +int32_t +ns_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + off_t len, dict_t *xdata) +{ + path_parse_result_t ret = set_ns_from_fd(__FUNCTION__, frame, this, fd); + + if (ret == PATH_PARSE_RESULT_IS_GFID) { + GET_ANCESTRY_PATH_WIND(zerofill, fd->inode, fd, offset, len, xdata); + return 0; + } +wind: + STACK_WIND(frame, default_zerofill_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata); + return 0; +} + +int +ns_forget(xlator_t *this, inode_t *inode) +{ + uint64_t ns_as_64 = 0; + ns_info_t *info = NULL; + + inode_ctx_del(inode, this, &ns_as_64); + + if (!ns_as_64) { + return 0; + } + + info = (ns_info_t *)(uintptr_t)ns_as_64; + GF_FREE(info); + + return 0; +} + +int32_t +init(xlator_t *this) +{ + int32_t ret = -1; + ns_private_t *priv = NULL; + + GF_VALIDATE_OR_GOTO(GF_NAMESPACE, this, out); + + if (!this->children || this->children->next) { + gf_log(this->name, GF_LOG_ERROR, + "translator needs a single subvolume."); + goto out; + } + + if (!this->parents) { + gf_log(this->name, GF_LOG_ERROR, + "dangling volume. please check volfile."); + goto out; + } + + priv = GF_CALLOC(1, sizeof(ns_private_t), 0); + + if (!priv) { + gf_log(this->name, GF_LOG_ERROR, "Can't allocate ns_priv structure."); + goto out; + } + + GF_OPTION_INIT("tag-namespaces", priv->tag_namespaces, bool, out); + + gf_log(this->name, GF_LOG_INFO, "Namespace xlator loaded"); + this->private = priv; + ret = 0; + +out: + if (ret) { + GF_FREE(priv); + } + + return ret; +} + +void +fini(xlator_t *this) +{ + GF_FREE(this->private); +} + +int +reconfigure(xlator_t *this, dict_t *options) +{ + int ret = -1; + ns_private_t *priv = NULL; + + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + GF_VALIDATE_OR_GOTO(this->name, options, out); + + priv = (ns_private_t *)this->private; + + GF_OPTION_RECONF("tag-namespaces", priv->tag_namespaces, options, bool, + out); + + ret = 0; +out: + return ret; +} + +struct xlator_fops fops = { + .lookup = ns_lookup, + .stat = ns_stat, + .fstat = ns_fstat, + .truncate = ns_truncate, + .ftruncate = ns_ftruncate, + .access = ns_access, + .readlink = ns_readlink, + .mknod = ns_mknod, + .mkdir = ns_mkdir, + .unlink = ns_unlink, + .rmdir = ns_rmdir, + .symlink = ns_symlink, + .rename = ns_rename, + .link = ns_link, + .create = ns_create, + .open = ns_open, + .readv = ns_readv, + .writev = ns_writev, + .flush = ns_flush, + .fsync = ns_fsync, + .opendir = ns_opendir, + .readdir = ns_readdir, + .readdirp = ns_readdirp, + .fsyncdir = ns_fsyncdir, + .statfs = ns_statfs, + .setxattr = ns_setxattr, + .getxattr = ns_getxattr, + .fsetxattr = ns_fsetxattr, + .fgetxattr = ns_fgetxattr, + .removexattr = ns_removexattr, + .fremovexattr = ns_fremovexattr, + .lk = ns_lk, + .inodelk = ns_inodelk, + .finodelk = ns_finodelk, + .entrylk = ns_entrylk, + .fentrylk = ns_fentrylk, + .rchecksum = ns_rchecksum, + .xattrop = ns_xattrop, + .fxattrop = ns_fxattrop, + .setattr = ns_setattr, + .fsetattr = ns_fsetattr, + .getspec = ns_getspec, + .fallocate = ns_fallocate, + .discard = ns_discard, + .zerofill = ns_zerofill, +}; + +struct xlator_cbks cbks = { + .forget = ns_forget, +}; + +struct xlator_dumpops dumpops; + +struct volume_options options[] = { + { + .key = {"tag-namespaces"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "This option enables this translator's functionality " + "that tags every fop with a namespace hash for later " + "throttling, stats collection, logging, etc.", + .op_version = {GD_OP_VERSION_4_1_0}, + .tags = {"namespace"}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC, + }, + {.key = {NULL}}, +}; + +xlator_api_t xlator_api = { + .init = init, + .fini = fini, + .reconfigure = reconfigure, + .op_version = {GD_OP_VERSION_3_12_0}, + .dumpops = &dumpops, + .fops = &fops, + .cbks = &cbks, + .options = options, + .identifier = "namespace", + .category = GF_TECH_PREVIEW, +}; diff --git a/xlators/features/namespace/src/namespace.h b/xlators/features/namespace/src/namespace.h new file mode 100644 index 00000000000..3a9b84d6426 --- /dev/null +++ b/xlators/features/namespace/src/namespace.h @@ -0,0 +1,23 @@ +#ifndef __NAMESPACE_H__ +#define __NAMESPACE_H__ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include <glusterfs/xlator.h> +#include <glusterfs/call-stub.h> + +#define GF_NAMESPACE "namespace" + +typedef struct { + gf_boolean_t tag_namespaces; +} ns_private_t; + +typedef struct { + loc_t loc; /* We store a "fake" loc_t for the getxattr wind. */ + call_stub_t *stub; /* A stub back to the function we're resuming. */ +} ns_local_t; + +#endif /* __NAMESPACE_H__ */ diff --git a/xlators/features/path-convertor/src/Makefile.am b/xlators/features/path-convertor/src/Makefile.am deleted file mode 100644 index 9b5c7f45d12..00000000000 --- a/xlators/features/path-convertor/src/Makefile.am +++ /dev/null @@ -1,15 +0,0 @@ - -xlator_LTLIBRARIES = path-converter.la -xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/testing/features - -path_converter_la_LDFLAGS = $(GF_XLATOR_DEFAULT_LDFLAGS) - -path_converter_la_SOURCES = path.c -path_converter_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la - -AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src - -AM_CFLAGS = -Wall $(GF_CFLAGS) - -CLEANFILES = - diff --git a/xlators/features/path-convertor/src/path-mem-types.h b/xlators/features/path-convertor/src/path-mem-types.h deleted file mode 100644 index 77ada8d537a..00000000000 --- a/xlators/features/path-convertor/src/path-mem-types.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ -#ifndef __PATH_MEM_TYPES_H__ -#define __PATH_MEM_TYPES_H__ - -#include "mem-types.h" - -enum gf_path_mem_types_ { - gf_path_mt_path_private_t = gf_common_mt_end + 1, - gf_path_mt_char, - gf_path_mt_regex_t, - gf_path_mt_end -}; -#endif - diff --git a/xlators/features/path-convertor/src/path.c b/xlators/features/path-convertor/src/path.c deleted file mode 100644 index b0e5d6cc625..00000000000 --- a/xlators/features/path-convertor/src/path.c +++ /dev/null @@ -1,1223 +0,0 @@ -/* - Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ -/* TODO: add gf_log to all the cases returning errors */ - -/** - * xlators/features/path-translator: - * This translator converts the path it gets into user specified targets. - */ - -#include <sys/types.h> -#include <regex.h> -#include <time.h> -#include <errno.h> -#include "glusterfs.h" -#include "xlator.h" -#include "path-mem-types.h" - -typedef struct path_private -{ - int32_t this_len; - int32_t start_off; - int32_t end_off; - char *this; - char *that; - char *path; - regex_t *preg; -} path_private_t; - -static char * -name_this_to_that (xlator_t *xl, const char *path, const char *name) -{ - path_private_t *priv = xl->private; - char priv_path[PATH_MAX] = {0,}; - char *tmp_name = NULL; - int32_t path_len = strlen (path); - int32_t name_len = strlen (name) - ZR_FILE_CONTENT_STRLEN; - int32_t total_len = path_len + name_len; - int32_t i = 0, j = 0; - - if (path_len >= priv->end_off) - return (char *)name; - - if (priv->end_off && (total_len > priv->end_off)) { - j = priv->start_off; - tmp_name = GF_CALLOC (1, (total_len + - ZR_FILE_CONTENT_STRLEN), - gf_path_mt_char); - ERR_ABORT (tmp_name); - - /* Get the complete path for the file first */ - strcpy (tmp_name, path); - strcat (tmp_name, name + ZR_FILE_CONTENT_STRLEN); - - strncpy (priv_path, tmp_name, priv->start_off); - for (i = priv->start_off; i < priv->end_off; i++) { - if (tmp_name[i] == '/') - continue; - priv_path[j++] = tmp_name[i]; - } - memcpy ((priv_path + j), - (tmp_name + priv->end_off), - (total_len - priv->end_off)); - priv_path[(total_len - (priv->end_off - j))] = '\0'; - - strcpy (tmp_name, ZR_FILE_CONTENT_STR); - strcat (tmp_name, priv_path); - - return tmp_name; - } - - return (char *)name; -} - -/* This function should return - * NULL - - * converted path - if path match - * same path - if it doesn't match - */ -static char * -path_this_to_that (xlator_t *xl, const char *path) -{ - path_private_t *priv = xl->private; - char *priv_path = NULL; - int32_t path_len = strlen (path); - int32_t i = 0, j = 0; - - if (priv->end_off && (path_len > priv->start_off)) { - priv_path = GF_CALLOC (1, path_len, gf_path_mt_char); - ERR_ABORT (priv_path); - - if (priv->start_off && (path_len > priv->start_off)) - memcpy (priv_path, path, priv->start_off); - if (path_len > priv->end_off) { - j = priv->start_off; - for (i = priv->start_off; i < priv->end_off; i++) { - if (path[i] == '/') - continue; - priv_path[j++] = path[i]; - } - memcpy ((priv_path + j), - (path + priv->end_off), - (path_len - priv->end_off)); - priv_path[(path_len - (priv->end_off - j))] = '\0'; - } - return priv_path; - } - return (char *)path; -} - -int32_t -path_create_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - fd_t *fd, - inode_t *inode, - struct iatt *buf, - struct iatt *preparent, - struct iatt *postparent) -{ - STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf); - return 0; -} - -int32_t -path_open_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - fd_t *fd) -{ - STACK_UNWIND (frame, op_ret, op_errno, fd); - return 0; -} - -int32_t -path_getdents_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - dir_entry_t *entries, - int32_t count) -{ - STACK_UNWIND (frame, op_ret, op_errno, entries, count); - return 0; -} - -int32_t -path_readdir_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - gf_dirent_t *buf) -{ - STACK_UNWIND (frame, op_ret, op_errno, buf); - return 0; -} - - -int32_t -path_readlink_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - const char *buf, - struct iatt *sbuf) -{ - STACK_UNWIND (frame, op_ret, op_errno, buf, sbuf); - return 0; -} - -int32_t -path_lookup_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - inode_t *inode, - struct iatt *buf, - dict_t *xattr, - struct iatt *postparent) -{ - STACK_UNWIND (frame, op_ret, op_errno, inode, buf, xattr); - return 0; -} - - -int32_t -path_symlink_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - inode_t *inode, - struct iatt *buf, - struct iatt *preparent, - struct iatt *postparent) -{ - STACK_UNWIND (frame, op_ret, op_errno, inode, buf); - return 0; -} - -int32_t -path_mknod_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - inode_t *inode, - struct iatt *buf, - struct iatt *preparent, - struct iatt *postparent) -{ - STACK_UNWIND (frame, op_ret, op_errno, inode, buf); - return 0; -} - - -int32_t -path_mkdir_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - inode_t *inode, - struct iatt *buf, - struct iatt *preparent, - struct iatt *postparent) -{ - STACK_UNWIND (frame, op_ret, op_errno, inode, buf); - return 0; -} - -int32_t -path_link_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - inode_t *inode, - struct iatt *buf, - struct iatt *preparent, - struct iatt *postparent) -{ - STACK_UNWIND (frame, op_ret, op_errno, inode, buf); - return 0; -} - -int32_t -path_opendir_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - fd_t *fd) -{ - STACK_UNWIND (frame, op_ret, op_errno, fd); - return 0; -} - - -int32_t -path_rename_buf_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - struct iatt *buf, - struct iatt *preoldparent, - struct iatt *postoldparent, - struct iatt *prenewparent, - struct iatt *postnewparent) -{ - STACK_UNWIND (frame, op_ret, op_errno, buf); - return 0; -} - - - -int32_t -path_common_buf_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - struct iatt *buf) -{ - STACK_UNWIND (frame, op_ret, op_errno, buf); - return 0; -} - -int32_t -path_common_dict_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - dict_t *dict) -{ - STACK_UNWIND (frame, op_ret, op_errno, dict); - return 0; -} - -int32_t -path_common_remove_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno,struct iatt *preparent, - struct iatt *postparent) -{ - STACK_UNWIND (frame, op_ret, op_errno); - return 0; -} - -int32_t -path_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno,struct iatt *prebuf, - struct iatt *postbuf) -{ - STACK_UNWIND (frame, op_ret, op_errno, prebuf, postbuf); - return 0; -} - - -int32_t -path_common_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno) -{ - STACK_UNWIND (frame, op_ret, op_errno); - return 0; -} - -/* */ -int32_t -path_lookup (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - dict_t *xattr_req) -{ - char *loc_path = (char *)loc->path; - char *tmp_path = NULL; - - if (!(tmp_path = path_this_to_that (this, loc->path))) { - STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); - return 0; - } - loc->path = tmp_path; - - STACK_WIND (frame, path_lookup_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, - loc, xattr_req); - - loc->path = loc_path; - if (tmp_path != loc_path) - GF_FREE (tmp_path); - - return 0; -} - -int32_t -path_stat (call_frame_t *frame, - xlator_t *this, - loc_t *loc) -{ - char *loc_path = (char *)loc->path; - char *tmp_path = NULL; - - if (!(tmp_path = path_this_to_that (this, loc->path))) { - STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); - return 0; - } - loc->path = tmp_path; - - STACK_WIND (frame, - path_common_buf_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->stat, - loc); - - loc->path = loc_path; - if (tmp_path != loc_path) - GF_FREE (tmp_path); - - return 0; -} - -int32_t -path_readlink (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - size_t size) -{ - char *loc_path = (char *)loc->path; - char *tmp_path = NULL; - - if (!(tmp_path = path_this_to_that (this, loc->path))) { - STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); - return 0; - } - loc->path = tmp_path; - - STACK_WIND (frame, - path_readlink_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readlink, - loc, - size); - - loc->path = loc_path; - if (tmp_path != loc_path) - GF_FREE (tmp_path); - - return 0; -} - -int32_t -path_mknod (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - mode_t mode, - dev_t dev) -{ - char *loc_path = (char *)loc->path; - char *tmp_path = NULL; - - if (!(tmp_path = path_this_to_that (this, loc->path))) { - STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); - return 0; - } - loc->path = tmp_path; - - STACK_WIND (frame, - path_mknod_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->mknod, - loc, - mode, - dev); - - loc->path = loc_path; - if (tmp_path != loc_path) - GF_FREE (tmp_path); - - return 0; -} - -int32_t -path_mkdir (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - mode_t mode) -{ - char *loc_path = (char *)loc->path; - char *tmp_path = NULL; - - if (!(tmp_path = path_this_to_that (this, loc->path))) { - STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); - return 0; - } - loc->path = tmp_path; - - STACK_WIND (frame, - path_mkdir_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->mkdir, - loc, - mode); - - loc->path = loc_path; - if (tmp_path != loc_path) - GF_FREE (tmp_path); - - return 0; -} - -int32_t -path_unlink (call_frame_t *frame, - xlator_t *this, - loc_t *loc) -{ - char *loc_path = (char *)loc->path; - char *tmp_path = NULL; - - if (!(tmp_path = path_this_to_that (this, loc->path))) { - STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); - return 0; - } - loc->path = tmp_path; - - STACK_WIND (frame, - path_common_remove_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, - loc); - - loc->path = loc_path; - if (tmp_path != loc_path) - GF_FREE (tmp_path); - - return 0; -} - -int32_t -path_rmdir (call_frame_t *frame, - xlator_t *this, - loc_t *loc) -{ - char *loc_path = (char *)loc->path; - char *tmp_path = NULL; - - if (!(tmp_path = path_this_to_that (this, loc->path))) { - STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); - return 0; - } - loc->path = tmp_path; - - STACK_WIND (frame, - path_common_remove_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rmdir, - loc); - - loc->path = loc_path; - if (tmp_path != loc_path) - GF_FREE (tmp_path); - - return 0; -} - -int32_t -path_symlink (call_frame_t *frame, - xlator_t *this, - const char *linkpath, - loc_t *loc) -{ - char *loc_path = (char *)loc->path; - char *tmp_path = NULL; - - if (!(tmp_path = path_this_to_that (this, loc->path))) { - STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); - return 0; - } - loc->path = tmp_path; - - STACK_WIND (frame, - path_symlink_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->symlink, - linkpath, - loc); - - loc->path = loc_path; - if (tmp_path != loc_path) - GF_FREE (tmp_path); - - return 0; -} - -int32_t -path_rename (call_frame_t *frame, - xlator_t *this, - loc_t *oldloc, - loc_t *newloc) -{ - char *oldloc_path = (char *)oldloc->path; - char *tmp_oldloc_path = NULL; - - char *newloc_path = (char *)newloc->path; - char *tmp_newloc_path = NULL; - - if (!(tmp_oldloc_path = path_this_to_that (this, oldloc->path))) { - STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); - return 0; - } - oldloc->path = tmp_oldloc_path; - - if (!(tmp_newloc_path = path_this_to_that (this, newloc->path))) { - STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); - return 0; - } - newloc->path = tmp_newloc_path; - - STACK_WIND (frame, - path_rename_buf_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rename, - oldloc, - newloc); - - oldloc->path = oldloc_path; - if (tmp_oldloc_path != oldloc_path) - GF_FREE (tmp_oldloc_path); - - newloc->path = newloc_path; - if (tmp_newloc_path != newloc_path) - GF_FREE (tmp_newloc_path); - - return 0; -} - -int32_t -path_link (call_frame_t *frame, - xlator_t *this, - loc_t *oldloc, - loc_t *newloc) -{ - char *oldloc_path = (char *)oldloc->path; - char *tmp_oldloc_path = NULL; - - char *newloc_path = (char *)newloc->path; - char *tmp_newloc_path = NULL; - - if (!(tmp_oldloc_path = path_this_to_that (this, oldloc->path))) { - STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); - return 0; - } - oldloc->path = tmp_oldloc_path; - - if (!(tmp_newloc_path = path_this_to_that (this, newloc->path))) { - STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); - return 0; - } - newloc->path = tmp_newloc_path; - - STACK_WIND (frame, - path_link_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->link, - oldloc, - newloc); - - oldloc->path = oldloc_path; - if (tmp_oldloc_path != oldloc_path) - GF_FREE (tmp_oldloc_path); - - newloc->path = newloc_path; - if (tmp_newloc_path != newloc_path) - GF_FREE (tmp_newloc_path); - - return 0; -} - -int32_t -path_setattr_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - struct iatt *preop, - struct iatt *postop) -{ - STACK_UNWIND (frame, op_ret, op_errno, preop, postop); - return 0; -} - -int32_t -path_setattr (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - struct iatt *stbuf, - int32_t valid) -{ - char *loc_path = (char *)loc->path; - char *tmp_path = NULL; - - if (!(tmp_path = path_this_to_that (this, loc->path))) { - STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); - return 0; - } - loc->path = tmp_path; - - STACK_WIND (frame, - path_setattr_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setattr, - loc, - stbuf, valid); - - loc->path = loc_path; - if (tmp_path != loc_path) - GF_FREE (tmp_path); - - return 0; -} - - -int32_t -path_truncate (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - off_t offset) -{ - char *loc_path = (char *)loc->path; - char *tmp_path = NULL; - - if (!(tmp_path = path_this_to_that (this, loc->path))) { - STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); - return 0; - } - loc->path = tmp_path; - - STACK_WIND (frame, - path_truncate_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->truncate, - loc, - offset); - - loc->path = loc_path; - if (tmp_path != loc_path) - GF_FREE (tmp_path); - - return 0; -} - - -int32_t -path_open (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - int32_t flags, - fd_t *fd, - int32_t wbflags) -{ - char *loc_path = (char *)loc->path; - char *tmp_path = NULL; - - if (!(tmp_path = path_this_to_that (this, loc->path))) { - STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); - return 0; - } - loc->path = tmp_path; - - STACK_WIND (frame, - path_open_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, - loc, - flags, - fd, - wbflags); - - loc->path = loc_path; - if (tmp_path != loc_path) - GF_FREE (tmp_path); - - return 0; -} - -int32_t -path_create (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - int32_t flags, - mode_t mode, - fd_t *fd) -{ - char *loc_path = (char *)loc->path; - char *tmp_path = NULL; - - if (!(tmp_path = path_this_to_that (this, loc->path))) { - STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); - return 0; - } - loc->path = tmp_path; - - STACK_WIND (frame, - path_create_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->create, - loc, - flags, - mode, - fd); - - loc->path = loc_path; - if (tmp_path != loc_path) - GF_FREE (tmp_path); - - return 0; -} - -int32_t -path_setxattr (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - dict_t *dict, - int32_t flags) -{ - char *tmp_name = NULL; - data_pair_t *trav = dict->members_list; - char *loc_path = (char *)loc->path; - char *tmp_path = NULL; - - if (!(tmp_path = path_this_to_that (this, loc->path))) { - STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); - return 0; - } - loc->path = tmp_path; - - if (ZR_FILE_CONTENT_REQUEST(trav->key)) { - tmp_name = name_this_to_that (this, loc->path, trav->key); - if (tmp_name != trav->key) { - trav->key = tmp_name; - } else { - tmp_name = NULL; - } - } - - STACK_WIND (frame, - path_common_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setxattr, - loc, - dict, - flags); - - loc->path = loc_path; - if (tmp_path != loc_path) - GF_FREE (tmp_path); - - GF_FREE (tmp_name); - - return 0; -} - -int32_t -path_getxattr (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - const char *name) -{ - char *tmp_name = (char *)name; - char *loc_path = (char *)loc->path; - char *tmp_path = NULL; - - if (!(tmp_path = path_this_to_that (this, loc->path))) { - STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); - return 0; - } - loc->path = tmp_path; - - if (ZR_FILE_CONTENT_REQUEST(name)) { - tmp_name = name_this_to_that (this, loc->path, name); - } - - STACK_WIND (frame, - path_common_dict_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->getxattr, - loc, - tmp_name); - - loc->path = loc_path; - if (tmp_path != loc_path) - GF_FREE (tmp_path); - - if (tmp_name != name) - GF_FREE (tmp_name); - - return 0; -} - -int32_t -path_removexattr (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - const char *name) -{ - char *tmp_name = (char *)name; - char *loc_path = (char *)loc->path; - char *tmp_path = NULL; - - if (!(tmp_path = path_this_to_that (this, loc->path))) { - STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); - return 0; - } - loc->path = tmp_path; - - if (ZR_FILE_CONTENT_REQUEST(name)) { - tmp_name = name_this_to_that (this, loc->path, name); - } - - STACK_WIND (frame, - path_common_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->removexattr, - loc, - tmp_name); - - loc->path = loc_path; - if (tmp_path != loc_path) - GF_FREE (tmp_path); - - if (tmp_name != name) - GF_FREE (tmp_name); - - return 0; -} - -int32_t -path_opendir (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - fd_t *fd) -{ - char *loc_path = (char *)loc->path; - char *tmp_path = NULL; - - if (!(tmp_path = path_this_to_that (this, loc->path))) { - STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); - return 0; - } - loc->path = tmp_path; - - STACK_WIND (frame, - path_opendir_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->opendir, - loc, - fd); - - loc->path = loc_path; - if (tmp_path != loc_path) - GF_FREE (tmp_path); - - return 0; -} - -int32_t -path_access (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - int32_t mask) -{ - char *loc_path = (char *)loc->path; - char *tmp_path = NULL; - - if (!(tmp_path = path_this_to_that (this, loc->path))) { - STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); - return 0; - } - loc->path = tmp_path; - - STACK_WIND (frame, - path_common_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->access, - loc, - mask); - - loc->path = loc_path; - if (tmp_path != loc_path) - GF_FREE (tmp_path); - - return 0; -} - -int32_t -path_checksum_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - uint8_t *fchecksum, - uint8_t *dchecksum) -{ - STACK_UNWIND (frame, op_ret, op_errno, fchecksum, dchecksum); - return 0; -} - -int32_t -path_checksum (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - int32_t flag) -{ - char *loc_path = (char *)loc->path; - char *tmp_path = NULL; - - if (!(tmp_path = path_this_to_that (this, loc->path))) { - STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); - return 0; - } - loc->path = tmp_path; - - STACK_WIND (frame, - path_checksum_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->checksum, - loc, - flag); - - loc->path = loc_path; - if (tmp_path != loc_path) - GF_FREE (tmp_path); - - return 0; -} - - -int32_t -path_entrylk (call_frame_t *frame, xlator_t *this, - const char *volume, loc_t *loc, const char *basename, - entrylk_cmd cmd, entrylk_type type) -{ - char *loc_path = (char *)loc->path; - char *tmp_path = NULL; - - if (!(tmp_path = path_this_to_that (this, loc->path))) { - STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); - return 0; - } - loc->path = tmp_path; - - STACK_WIND (frame, path_common_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->entrylk, - volume, loc, basename, cmd, type); - - loc->path = loc_path; - if (tmp_path != loc_path) - GF_FREE (tmp_path); - - return 0; -} - -int32_t -path_inodelk (call_frame_t *frame, xlator_t *this, - const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *lock) -{ - char *loc_path = (char *)loc->path; - char *tmp_path = NULL; - - if (!(tmp_path = path_this_to_that (this, loc->path))) { - STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); - return 0; - } - loc->path = tmp_path; - - STACK_WIND (frame, - path_common_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->inodelk, - volume, loc, cmd, lock); - - loc->path = loc_path; - if (tmp_path != loc_path) - GF_FREE (tmp_path); - - return 0; -} - - -int32_t -path_xattrop (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - gf_xattrop_flags_t flags, - dict_t *dict) -{ - char *loc_path = (char *)loc->path; - char *tmp_path = NULL; - - if (!(tmp_path = path_this_to_that (this, loc->path))) { - STACK_UNWIND (frame, -1, ENOENT, NULL, NULL); - return 0; - } - loc->path = tmp_path; - - STACK_WIND (frame, - path_common_dict_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->xattrop, - loc, - flags, - dict); - - loc->path = loc_path; - if (tmp_path != loc_path) - GF_FREE (tmp_path); - - return 0; -} - -int32_t -mem_acct_init (xlator_t *this) -{ - int ret = -1; - - if (!this) - return ret; - - ret = xlator_mem_acct_init (this, gf_path_mt_end + 1); - - if (ret != 0) { - gf_log (this->name, GF_LOG_ERROR, "Memory accounting init" - "failed"); - return ret; - } - - return ret; -} - -int32_t -init (xlator_t *this) -{ - dict_t *options = this->options; - path_private_t *priv = NULL; - - if (!this->children || this->children->next) { - gf_log (this->name, GF_LOG_ERROR, - "path translator requires exactly one subvolume"); - return -1; - } - - if (!this->parents) { - gf_log (this->name, GF_LOG_WARNING, - "dangling volume. check volfile "); - } - - priv = GF_CALLOC (1, sizeof (*priv), gf_path_mt_path_private_t); - ERR_ABORT (priv); - if (dict_get (options, "start-offset")) { - priv->start_off = data_to_int32 (dict_get (options, - "start-offset")); - } - if (dict_get (options, "end-offset")) { - priv->end_off = data_to_int32 (dict_get (options, - "end-offset")); - } - - if (dict_get (options, "regex")) { - int32_t ret = 0; - priv->preg = GF_CALLOC (1, sizeof (regex_t), - gf_path_mt_regex_t); - ERR_ABORT (priv->preg); - ret = regcomp (priv->preg, - data_to_str (dict_get (options, "regex")), - REG_EXTENDED); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "Failed to compile the 'option regex'"); - GF_FREE (priv); - return -1; - } - if (dict_get (options, "replace-with")) { - priv->that = data_to_str (dict_get (options, - "replace-with")); - } else { - priv->that = ""; - } - } - - this->private = priv; - return 0; -} - -void -fini (xlator_t *this) -{ - return; -} - -struct xlator_fops fops = { - .stat = path_stat, - .readlink = path_readlink, - .mknod = path_mknod, - .mkdir = path_mkdir, - .unlink = path_unlink, - .rmdir = path_rmdir, - .symlink = path_symlink, - .rename = path_rename, - .link = path_link, - .truncate = path_truncate, - .open = path_open, - .setxattr = path_setxattr, - .getxattr = path_getxattr, - .removexattr = path_removexattr, - .opendir = path_opendir, - .access = path_access, - .create = path_create, - .lookup = path_lookup, - .checksum = path_checksum, - .xattrop = path_xattrop, - .entrylk = path_entrylk, - .inodelk = path_inodelk, - .setattr = path_setattr, -}; - -struct xlator_cbks cbks = { -}; - -struct volume_options options[] = { - { .key = {"start-offset"}, - .type = GF_OPTION_TYPE_INT, - .min = 0, - .max = 4095 - }, - { .key = {"end-offset"}, - .type = GF_OPTION_TYPE_INT, - .min = 1, - .max = 4096 - }, - { .key = {"replace-with"}, - .type = GF_OPTION_TYPE_ANY - }, - { .key = {NULL} }, -}; diff --git a/xlators/features/protect/src/Makefile.am b/xlators/features/protect/src/Makefile.am deleted file mode 100644 index bc0643d6806..00000000000 --- a/xlators/features/protect/src/Makefile.am +++ /dev/null @@ -1,21 +0,0 @@ -xlator_LTLIBRARIES = prot_dht.la prot_client.la prot_server.la - -xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features - -prot_dht_la_LDFLAGS = $(GF_XLATOR_DEFAULT_LDFLAGS) -prot_dht_la_SOURCES = prot_dht.c -prot_dht_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la - -prot_client_la_LDFLAGS = $(GF_XLATOR_DEFAULT_LDFLAGS) -prot_client_la_SOURCES = prot_client.c -prot_client_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la - -prot_server_la_LDFLAGS = $(GF_XLATOR_DEFAULT_LDFLAGS) -prot_server_la_SOURCES = prot_server.c -prot_server_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la - -AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ - -I$(CONTRIBDIR)/libexecinfo -AM_CFLAGS = -Wall $(GF_CFLAGS) - -CLEANFILES = diff --git a/xlators/features/protect/src/prot_client.c b/xlators/features/protect/src/prot_client.c deleted file mode 100644 index 79636410b94..00000000000 --- a/xlators/features/protect/src/prot_client.c +++ /dev/null @@ -1,213 +0,0 @@ -/* - Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ -#include "xlator.h" -#include "defaults.h" - -#ifdef HAVE_BACKTRACE -#include <execinfo.h> -#else -#include "execinfo_compat.h" -#endif - -#define NUM_FRAMES 20 - -static char PROTECT_KEY[] = "trusted.glusterfs.protect"; - -enum { - PROT_ACT_NONE = 0, - PROT_ACT_LOG, - PROT_ACT_REJECT, -}; - -void -pcli_print_trace (char *name, call_frame_t *frame) -{ - void *frames[NUM_FRAMES]; - char **symbols; - int size; - int i; - - gf_log (name, GF_LOG_INFO, "Translator stack:"); - list_for_each_entry (frame, &frame->root->myframes, frames) { - gf_log (name, GF_LOG_INFO, "%s (%s)", - frame->wind_from, frame->this->name); - } - - size = backtrace (frames, NUM_FRAMES); - if (size <= 0) { - return; - } - symbols = backtrace_symbols (frames, size); - if (!symbols) { - return; - } - - gf_log (name, GF_LOG_INFO, "Processor stack:"); - for (i = 0; i < size; ++i) { - gf_log (name, GF_LOG_INFO, "%s", symbols[i]); - } - free (symbols); -} - -int32_t -pcli_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, - loc_t *newloc, dict_t *xdata) -{ - uint64_t value; - - if (newloc->parent == oldloc->parent) { - gf_log (this->name, GF_LOG_DEBUG, "rename in same directory"); - goto simple_unwind; - } - if (!oldloc->parent) { - goto simple_unwind; - } - if (inode_ctx_get (oldloc->parent, this, &value) != 0) { - goto simple_unwind; - } - - if (value != PROT_ACT_NONE) { - gf_log (this->name, GF_LOG_WARNING, - "got rename for protected %s", oldloc->path); - pcli_print_trace (this->name, frame); - if (value == PROT_ACT_REJECT) { - STACK_UNWIND_STRICT (rename, frame, -1, EPERM, - NULL, NULL, NULL, NULL, NULL, - xdata); - return 0; - } - } - -simple_unwind: - STACK_WIND_TAIL (frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rename, oldloc, newloc, - xdata); - return 0; -} - -int32_t -pcli_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, - int32_t flags, dict_t *xdata) -{ - data_t *data; - uint64_t value; - - /* - * We can't use dict_get_str and strcmp here, because the value comes - * directly from the user and might not be NUL-terminated (it would - * be if we had set it ourselves. - */ - - data = dict_get(dict,PROTECT_KEY); - if (!data) { - goto simple_wind; - } - - if (dict->count > 1) { - gf_log (this->name, GF_LOG_WARNING, - "attempted to mix %s with other keys", PROTECT_KEY); - goto simple_wind; - } - - gf_log (this->name, GF_LOG_DEBUG, "got %s request", PROTECT_KEY); - if (!strncmp(data->data,"log",data->len)) { - gf_log (this->name, GF_LOG_DEBUG, - "logging removals on %s", loc->path); - value = PROT_ACT_LOG; - } - else if (!strncmp(data->data,"reject",data->len)) { - gf_log (this->name, GF_LOG_DEBUG, - "rejecting removals on %s", loc->path); - value = PROT_ACT_REJECT; - } - else { - gf_log (this->name, GF_LOG_DEBUG, - "removing protection on %s", loc->path); - value = PROT_ACT_NONE; - } - /* Right now the value doesn't matter - just the presence. */ - if (inode_ctx_set(loc->inode,this,&value) != 0) { - gf_log (this->name, GF_LOG_WARNING, - "failed to set protection status for %s", loc->path); - } - STACK_UNWIND_STRICT (setxattr, frame, 0, 0, NULL); - return 0; - -simple_wind: - STACK_WIND_TAIL (frame, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr, - loc, dict, flags, xdata); - return 0; -} - -int32_t -pcli_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, - dict_t *xdata) -{ - uint64_t value; - - if (!loc->parent || (inode_ctx_get(loc->parent,this,&value) != 0)) { - goto simple_unwind; - } - - if (value != PROT_ACT_NONE) { - gf_log (this->name, GF_LOG_WARNING, - "got unlink for protected %s", loc->path); - pcli_print_trace(this->name, frame); - if (value == PROT_ACT_REJECT) { - STACK_UNWIND_STRICT (unlink, frame, -1, EPERM, - NULL, NULL, NULL); - return 0; - } - } - -simple_unwind: - STACK_WIND_TAIL (frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); - return 0; -} - -int32_t -init (xlator_t *this) -{ - if (!this->children || this->children->next) { - gf_log (this->name, GF_LOG_ERROR, - "translator not configured with exactly one child"); - return -1; - } - - if (!this->parents) { - gf_log (this->name, GF_LOG_WARNING, - "dangling volume. check volfile "); - } - - return 0; -} - - -void -fini (xlator_t *this) -{ - return; -} - - -struct xlator_fops fops = { - .rename = pcli_rename, - .setxattr = pcli_setxattr, - .unlink = pcli_unlink, -}; - -struct xlator_cbks cbks = { -}; - -struct volume_options options[] = { - { .key = {NULL} }, -}; diff --git a/xlators/features/protect/src/prot_dht.c b/xlators/features/protect/src/prot_dht.c deleted file mode 100644 index 1fc8cc1ffde..00000000000 --- a/xlators/features/protect/src/prot_dht.c +++ /dev/null @@ -1,163 +0,0 @@ -/* - Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ -#include "xlator.h" -#include "defaults.h" - -enum gf_pdht_mem_types_ { - gf_pdht_mt_coord_t = gf_common_mt_end + 1, - gf_pdht_mt_end -}; - -typedef struct { - pthread_mutex_t lock; - uint16_t refs; - int32_t op_ret; - int32_t op_errno; - dict_t *xdata; -} pdht_coord_t; - -static char PROTECT_KEY[] = "trusted.glusterfs.protect"; - -void -pdht_unref_and_unlock (call_frame_t *frame, xlator_t *this, - pdht_coord_t *coord) -{ - gf_boolean_t should_unwind; - - should_unwind = (--(coord->refs) == 0); - pthread_mutex_unlock(&coord->lock); - - if (should_unwind) { - STACK_UNWIND_STRICT (setxattr, frame, - coord->op_ret, coord->op_errno, - coord->xdata); - if (coord->xdata) { - dict_unref(coord->xdata); - } - GF_FREE(coord); - } -} - -int32_t -pdht_recurse_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - pdht_coord_t *coord = cookie; - - pthread_mutex_lock(&coord->lock); - if (op_ret) { - coord->op_ret = op_ret; - coord->op_errno = op_errno; - } - if (xdata) { - if (coord->xdata) { - dict_unref(coord->xdata); - } - coord->xdata = dict_ref(xdata); - } - pdht_unref_and_unlock(frame,this,coord); - - return 0; -} - -void -pdht_recurse (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, - int32_t flags, dict_t *xdata, xlator_t *xl, pdht_coord_t *coord) -{ - xlator_list_t *iter; - - if (!strcmp(xl->type,"features/prot_client")) { - pthread_mutex_lock(&coord->lock); - ++(coord->refs); - pthread_mutex_unlock(&coord->lock); - STACK_WIND_COOKIE (frame, pdht_recurse_cbk, coord, xl, - xl->fops->setxattr, loc, dict, flags, xdata); - } - - else for (iter = xl->children; iter; iter = iter->next) { - pdht_recurse (frame, this, loc, dict, flags, xdata, - iter->xlator, coord); - } -} - -int32_t -pdht_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, - int32_t flags, dict_t *xdata) -{ - pdht_coord_t *coord; - - if (!dict_get(dict,PROTECT_KEY)) { - goto simple_wind; - } - - if (dict->count > 1) { - gf_log (this->name, GF_LOG_WARNING, - "attempted to mix %s with other keys", PROTECT_KEY); - goto simple_wind; - } - - coord = GF_CALLOC(1,sizeof(*coord),gf_pdht_mt_coord_t); - if (!coord) { - gf_log (this->name, GF_LOG_WARNING, "allocation failed"); - goto simple_wind; - } - - pthread_mutex_init(&coord->lock,NULL); - coord->refs = 1; - coord->op_ret = 0; - coord->xdata = NULL; - - pdht_recurse(frame,this,loc,dict,flags,xdata,this,coord); - pthread_mutex_lock(&coord->lock); - pdht_unref_and_unlock(frame,this,coord); - - return 0; - -simple_wind: - STACK_WIND_TAIL (frame, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr, - loc, dict, flags, xdata); - return 0; -} - -int32_t -init (xlator_t *this) -{ - if (!this->children || this->children->next) { - gf_log (this->name, GF_LOG_ERROR, - "translator not configured with exactly one child"); - return -1; - } - - if (!this->parents) { - gf_log (this->name, GF_LOG_WARNING, - "dangling volume. check volfile "); - } - - return 0; -} - - -void -fini (xlator_t *this) -{ - return; -} - -struct xlator_fops fops = { - .setxattr = pdht_setxattr, -}; - -struct xlator_cbks cbks = { -}; - -struct volume_options options[] = { - { .key = {NULL} }, -}; diff --git a/xlators/features/protect/src/prot_server.c b/xlators/features/protect/src/prot_server.c deleted file mode 100644 index 8ebace240f3..00000000000 --- a/xlators/features/protect/src/prot_server.c +++ /dev/null @@ -1,46 +0,0 @@ -/* - Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ -#include "xlator.h" -#include "defaults.h" - -int32_t -init (xlator_t *this) -{ - if (!this->children || this->children->next) { - gf_log (this->name, GF_LOG_ERROR, - "translator not configured with exactly one child"); - return -1; - } - - if (!this->parents) { - gf_log (this->name, GF_LOG_WARNING, - "dangling volume. check volfile "); - } - - return 0; -} - - -void -fini (xlator_t *this) -{ - return; -} - - -struct xlator_fops fops = { -}; - -struct xlator_cbks cbks = { -}; - -struct volume_options options[] = { - { .key = {NULL} }, -}; diff --git a/xlators/features/qemu-block/Makefile.am b/xlators/features/qemu-block/Makefile.am deleted file mode 100644 index af437a64d6d..00000000000 --- a/xlators/features/qemu-block/Makefile.am +++ /dev/null @@ -1 +0,0 @@ -SUBDIRS = src diff --git a/xlators/features/qemu-block/src/Makefile.am b/xlators/features/qemu-block/src/Makefile.am deleted file mode 100644 index e32fc54605b..00000000000 --- a/xlators/features/qemu-block/src/Makefile.am +++ /dev/null @@ -1,156 +0,0 @@ -if ENABLE_QEMU_BLOCK -xlator_LTLIBRARIES = qemu-block.la -xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features - -qemu_block_la_LDFLAGS = -module -avoid-version -qemu_block_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ - $(GLIB_LIBS) $(ZLIB_LIBS) - -qemu_block_la_SOURCES_qemu = \ - $(CONTRIBDIR)/qemu/qemu-coroutine.c \ - $(CONTRIBDIR)/qemu/qemu-coroutine-lock.c \ - $(CONTRIBDIR)/qemu/qemu-coroutine-sleep.c \ - $(CONTRIBDIR)/qemu/coroutine-ucontext.c \ - $(CONTRIBDIR)/qemu/block.c \ - $(CONTRIBDIR)/qemu/nop-symbols.c - -qemu_block_la_SOURCES_qemu_util = \ - $(CONTRIBDIR)/qemu/util/aes.c \ - $(CONTRIBDIR)/qemu/util/bitmap.c \ - $(CONTRIBDIR)/qemu/util/bitops.c \ - $(CONTRIBDIR)/qemu/util/cutils.c \ - $(CONTRIBDIR)/qemu/util/error.c \ - $(CONTRIBDIR)/qemu/util/hbitmap.c \ - $(CONTRIBDIR)/qemu/util/iov.c \ - $(CONTRIBDIR)/qemu/util/module.c \ - $(CONTRIBDIR)/qemu/util/oslib-posix.c \ - $(CONTRIBDIR)/qemu/util/qemu-option.c \ - $(CONTRIBDIR)/qemu/util/qemu-error.c \ - $(CONTRIBDIR)/qemu/util/qemu-thread-posix.c \ - $(CONTRIBDIR)/qemu/util/unicode.c \ - $(CONTRIBDIR)/qemu/util/hexdump.c - -qemu_block_la_SOURCES_qemu_block = \ - $(CONTRIBDIR)/qemu/block/snapshot.c \ - $(CONTRIBDIR)/qemu/block/qcow2-cache.c \ - $(CONTRIBDIR)/qemu/block/qcow2-cluster.c \ - $(CONTRIBDIR)/qemu/block/qcow2-refcount.c \ - $(CONTRIBDIR)/qemu/block/qcow2-snapshot.c \ - $(CONTRIBDIR)/qemu/block/qcow2.c \ - $(CONTRIBDIR)/qemu/block/qed-check.c \ - $(CONTRIBDIR)/qemu/block/qed-cluster.c \ - $(CONTRIBDIR)/qemu/block/qed-gencb.c \ - $(CONTRIBDIR)/qemu/block/qed-l2-cache.c \ - $(CONTRIBDIR)/qemu/block/qed-table.c \ - $(CONTRIBDIR)/qemu/block/qed.c - -qemu_block_la_SOURCES_qemu_qobject = \ - $(CONTRIBDIR)/qemu/qobject/json-lexer.c \ - $(CONTRIBDIR)/qemu/qobject/json-parser.c \ - $(CONTRIBDIR)/qemu/qobject/json-streamer.c \ - $(CONTRIBDIR)/qemu/qobject/qbool.c \ - $(CONTRIBDIR)/qemu/qobject/qdict.c \ - $(CONTRIBDIR)/qemu/qobject/qerror.c \ - $(CONTRIBDIR)/qemu/qobject/qfloat.c \ - $(CONTRIBDIR)/qemu/qobject/qint.c \ - $(CONTRIBDIR)/qemu/qobject/qjson.c \ - $(CONTRIBDIR)/qemu/qobject/qlist.c \ - $(CONTRIBDIR)/qemu/qobject/qstring.c - -qemu_block_la_SOURCES = \ - $(qemu_block_la_SOURCES_qemu) \ - $(qemu_block_la_SOURCES_qemu_util) \ - $(qemu_block_la_SOURCES_qemu_block) \ - $(qemu_block_la_SOURCES_qemu_qobject) \ - bdrv-xlator.c \ - coroutine-synctask.c \ - bh-syncop.c \ - monitor-logging.c \ - clock-timer.c \ - qemu-block.c \ - qb-coroutines.c - -noinst_HEADERS_qemu = \ - $(CONTRIBDIR)/qemu/config-host.h \ - $(CONTRIBDIR)/qemu/qapi-types.h \ - $(CONTRIBDIR)/qemu/qmp-commands.h \ - $(CONTRIBDIR)/qemu/trace/generated-tracers.h \ - $(CONTRIBDIR)/qemu/include/config.h \ - $(CONTRIBDIR)/qemu/include/glib-compat.h \ - $(CONTRIBDIR)/qemu/include/qemu-common.h \ - $(CONTRIBDIR)/qemu/include/trace.h \ - $(CONTRIBDIR)/qemu/include/block/coroutine.h \ - $(CONTRIBDIR)/qemu/include/block/aio.h \ - $(CONTRIBDIR)/qemu/include/block/block.h \ - $(CONTRIBDIR)/qemu/include/block/block_int.h \ - $(CONTRIBDIR)/qemu/include/block/blockjob.h \ - $(CONTRIBDIR)/qemu/include/block/coroutine.h \ - $(CONTRIBDIR)/qemu/include/block/coroutine_int.h \ - $(CONTRIBDIR)/qemu/include/block/snapshot.h \ - $(CONTRIBDIR)/qemu/include/exec/cpu-common.h \ - $(CONTRIBDIR)/qemu/include/exec/hwaddr.h \ - $(CONTRIBDIR)/qemu/include/exec/poison.h \ - $(CONTRIBDIR)/qemu/include/fpu/softfloat.h \ - $(CONTRIBDIR)/qemu/include/migration/migration.h \ - $(CONTRIBDIR)/qemu/include/migration/qemu-file.h \ - $(CONTRIBDIR)/qemu/include/migration/vmstate.h \ - $(CONTRIBDIR)/qemu/include/monitor/monitor.h \ - $(CONTRIBDIR)/qemu/include/monitor/readline.h \ - $(CONTRIBDIR)/qemu/include/qapi/error.h \ - $(CONTRIBDIR)/qemu/include/qapi/qmp/json-lexer.h \ - $(CONTRIBDIR)/qemu/include/qapi/qmp/json-parser.h \ - $(CONTRIBDIR)/qemu/include/qapi/qmp/json-streamer.h \ - $(CONTRIBDIR)/qemu/include/qapi/qmp/qbool.h \ - $(CONTRIBDIR)/qemu/include/qapi/qmp/qdict.h \ - $(CONTRIBDIR)/qemu/include/qapi/qmp/qerror.h \ - $(CONTRIBDIR)/qemu/include/qapi/qmp/qfloat.h \ - $(CONTRIBDIR)/qemu/include/qapi/qmp/qint.h \ - $(CONTRIBDIR)/qemu/include/qapi/qmp/qjson.h \ - $(CONTRIBDIR)/qemu/include/qapi/qmp/qlist.h \ - $(CONTRIBDIR)/qemu/include/qapi/qmp/qobject.h \ - $(CONTRIBDIR)/qemu/include/qapi/qmp/qstring.h \ - $(CONTRIBDIR)/qemu/include/qapi/qmp/types.h \ - $(CONTRIBDIR)/qemu/include/qemu/aes.h \ - $(CONTRIBDIR)/qemu/include/qemu/atomic.h \ - $(CONTRIBDIR)/qemu/include/qemu/bitmap.h \ - $(CONTRIBDIR)/qemu/include/qemu/bitops.h \ - $(CONTRIBDIR)/qemu/include/qemu/bswap.h \ - $(CONTRIBDIR)/qemu/include/qemu/compiler.h \ - $(CONTRIBDIR)/qemu/include/qemu/error-report.h \ - $(CONTRIBDIR)/qemu/include/qemu/event_notifier.h \ - $(CONTRIBDIR)/qemu/include/qemu/hbitmap.h \ - $(CONTRIBDIR)/qemu/include/qemu/host-utils.h \ - $(CONTRIBDIR)/qemu/include/qemu/iov.h \ - $(CONTRIBDIR)/qemu/include/qemu/main-loop.h \ - $(CONTRIBDIR)/qemu/include/qemu/module.h \ - $(CONTRIBDIR)/qemu/include/qemu/notify.h \ - $(CONTRIBDIR)/qemu/include/qemu/option.h \ - $(CONTRIBDIR)/qemu/include/qemu/option_int.h \ - $(CONTRIBDIR)/qemu/include/qemu/osdep.h \ - $(CONTRIBDIR)/qemu/include/qemu/queue.h \ - $(CONTRIBDIR)/qemu/include/qemu/sockets.h \ - $(CONTRIBDIR)/qemu/include/qemu/thread-posix.h \ - $(CONTRIBDIR)/qemu/include/qemu/thread.h \ - $(CONTRIBDIR)/qemu/include/qemu/timer.h \ - $(CONTRIBDIR)/qemu/include/qemu/typedefs.h \ - $(CONTRIBDIR)/qemu/include/sysemu/sysemu.h \ - $(CONTRIBDIR)/qemu/include/sysemu/os-posix.h \ - $(CONTRIBDIR)/qemu/block/qcow2.h \ - $(CONTRIBDIR)/qemu/block/qed.h - -noinst_HEADERS = \ - $(noinst_HEADERS_qemu) \ - qemu-block.h \ - qemu-block-memory-types.h \ - qb-coroutines.h - -AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ - -I$(CONTRIBDIR)/qemu \ - -I$(CONTRIBDIR)/qemu/include \ - -DGLUSTER_XLATOR - -AM_CFLAGS = -fno-strict-aliasing -Wall $(GF_CFLAGS) $(GLIB_CFLAGS) - -CLEANFILES = - -endif diff --git a/xlators/features/qemu-block/src/bdrv-xlator.c b/xlators/features/qemu-block/src/bdrv-xlator.c deleted file mode 100644 index dcb8304712d..00000000000 --- a/xlators/features/qemu-block/src/bdrv-xlator.c +++ /dev/null @@ -1,386 +0,0 @@ -/* - Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - - -#include "inode.h" -#include "syncop.h" -#include "qemu-block.h" -#include "block/block_int.h" - -typedef struct BDRVGlusterState { - inode_t *inode; -} BDRVGlusterState; - -static QemuOptsList runtime_opts = { - .name = "gluster", - .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head), - .desc = { - { - .name = "filename", - .type = QEMU_OPT_STRING, - .help = "GFID of file", - }, - { /* end of list */ } - }, -}; - -inode_t * -qb_inode_from_filename (const char *filename) -{ - const char *iptr = NULL; - inode_t *inode = NULL; - - iptr = filename + 17; - sscanf (iptr, "%p", &inode); - - return inode; -} - - -int -qb_inode_to_filename (inode_t *inode, char *filename, int size) -{ - return snprintf (filename, size, "gluster://inodep:%p", inode); -} - - -static fd_t * -fd_from_bs (BlockDriverState *bs) -{ - BDRVGlusterState *s = bs->opaque; - - return fd_anonymous (s->inode); -} - - -static int -qemu_gluster_open (BlockDriverState *bs, QDict *options, int bdrv_flags) -{ - inode_t *inode = NULL; - BDRVGlusterState *s = bs->opaque; - QemuOpts *opts = NULL; - Error *local_err = NULL; - const char *filename = NULL; - char gfid_str[128]; - int ret; - qb_conf_t *conf = THIS->private; - - opts = qemu_opts_create_nofail(&runtime_opts); - qemu_opts_absorb_qdict(opts, options, &local_err); - if (error_is_set(&local_err)) { - qerror_report_err(local_err); - error_free(local_err); - return -EINVAL; - } - - filename = qemu_opt_get(opts, "filename"); - - /* - * gfid:<gfid> format means we're opening a backing image. - */ - ret = sscanf(filename, "gluster://gfid:%s", gfid_str); - if (ret) { - loc_t loc = {0,}; - struct iatt buf = {0,}; - uuid_t gfid; - - gf_uuid_parse(gfid_str, gfid); - - loc.inode = inode_find(conf->root_inode->table, gfid); - if (!loc.inode) { - loc.inode = inode_new(conf->root_inode->table); - gf_uuid_copy(loc.inode->gfid, gfid); - } - - gf_uuid_copy(loc.gfid, loc.inode->gfid); - ret = syncop_lookup(FIRST_CHILD(THIS), &loc, &buf, NULL, - NULL, NULL); - if (ret) { - loc_wipe(&loc); - return ret; - } - - s->inode = inode_ref(loc.inode); - loc_wipe(&loc); - } else { - inode = qb_inode_from_filename (filename); - if (!inode) - return -EINVAL; - - s->inode = inode_ref(inode); - } - - return 0; -} - - -static int -qemu_gluster_create (const char *filename, QEMUOptionParameter *options) -{ - uint64_t total_size = 0; - inode_t *inode = NULL; - fd_t *fd = NULL; - struct iatt stat = {0, }; - int ret = 0; - - inode = qb_inode_from_filename (filename); - if (!inode) - return -EINVAL; - - while (options && options->name) { - if (!strcmp(options->name, BLOCK_OPT_SIZE)) { - total_size = options->value.n / BDRV_SECTOR_SIZE; - } - options++; - } - - fd = fd_anonymous (inode); - if (!fd) - return -ENOMEM; - - ret = syncop_fstat (FIRST_CHILD(THIS), fd, &stat, NULL, NULL); - if (ret) { - fd_unref (fd); - return ret; - } - - if (stat.ia_size) { - /* format ONLY if the filesize is 0 bytes */ - fd_unref (fd); - return -EFBIG; - } - - if (total_size) { - ret = syncop_ftruncate (FIRST_CHILD(THIS), fd, total_size, - NULL, NULL); - if (ret) { - fd_unref (fd); - return ret; - } - } - - fd_unref (fd); - return 0; -} - - -static int -qemu_gluster_co_readv (BlockDriverState *bs, int64_t sector_num, int nb_sectors, - QEMUIOVector *qiov) -{ - fd_t *fd = NULL; - off_t offset = 0; - size_t size = 0; - struct iovec *iov = NULL; - int count = 0; - struct iobref *iobref = NULL; - int ret = 0; - - fd = fd_from_bs (bs); - if (!fd) - return -EIO; - - offset = sector_num * BDRV_SECTOR_SIZE; - size = nb_sectors * BDRV_SECTOR_SIZE; - - ret = syncop_readv (FIRST_CHILD(THIS), fd, size, offset, 0, - &iov, &count, &iobref, NULL, NULL); - if (ret < 0) - goto out; - - iov_copy (qiov->iov, qiov->niov, iov, count); /* *choke!* */ - -out: - GF_FREE (iov); - if (iobref) - iobref_unref (iobref); - fd_unref (fd); - return ret; -} - - -static int -qemu_gluster_co_writev (BlockDriverState *bs, int64_t sector_num, int nb_sectors, - QEMUIOVector *qiov) -{ - fd_t *fd = NULL; - off_t offset = 0; - size_t size = 0; - struct iobref *iobref = NULL; - struct iobuf *iobuf = NULL; - struct iovec iov = {0, }; - int ret = -ENOMEM; - - fd = fd_from_bs (bs); - if (!fd) - return -EIO; - - offset = sector_num * BDRV_SECTOR_SIZE; - size = nb_sectors * BDRV_SECTOR_SIZE; - - iobuf = iobuf_get2 (THIS->ctx->iobuf_pool, size); - if (!iobuf) - goto out; - - iobref = iobref_new (); - if (!iobref) { - goto out; - } - - iobref_add (iobref, iobuf); - - iov_unload (iobuf_ptr (iobuf), qiov->iov, qiov->niov); /* *choke!* */ - - iov.iov_base = iobuf_ptr (iobuf); - iov.iov_len = size; - - ret = syncop_writev (FIRST_CHILD(THIS), fd, &iov, 1, offset, iobref, 0, - NULL, NULL); - -out: - if (iobuf) - iobuf_unref (iobuf); - if (iobref) - iobref_unref (iobref); - fd_unref (fd); - return ret; -} - - -static int -qemu_gluster_co_flush (BlockDriverState *bs) -{ - fd_t *fd = NULL; - int ret = 0; - - fd = fd_from_bs (bs); - - ret = syncop_flush (FIRST_CHILD(THIS), fd, NULL, NULL); - - fd_unref (fd); - - return ret; -} - - -static int -qemu_gluster_co_fsync (BlockDriverState *bs) -{ - fd_t *fd = NULL; - int ret = 0; - - fd = fd_from_bs (bs); - - ret = syncop_fsync (FIRST_CHILD(THIS), fd, 0, NULL, NULL); - - fd_unref (fd); - - return ret; -} - - -static int -qemu_gluster_truncate (BlockDriverState *bs, int64_t offset) -{ - fd_t *fd = NULL; - int ret = 0; - - fd = fd_from_bs (bs); - - ret = syncop_ftruncate (FIRST_CHILD(THIS), fd, offset, NULL, NULL); - - fd_unref (fd); - - return ret; -} - - -static int64_t -qemu_gluster_getlength (BlockDriverState *bs) -{ - fd_t *fd = NULL; - int ret = 0; - struct iatt iatt = {0, }; - - fd = fd_from_bs (bs); - - ret = syncop_fstat (FIRST_CHILD(THIS), fd, &iatt, NULL, NULL); - if (ret < 0) - return -1; - - return iatt.ia_size; -} - - -static int64_t -qemu_gluster_allocated_file_size (BlockDriverState *bs) -{ - fd_t *fd = NULL; - int ret = 0; - struct iatt iatt = {0, }; - - fd = fd_from_bs (bs); - - ret = syncop_fstat (FIRST_CHILD(THIS), fd, &iatt, NULL, NULL); - if (ret < 0) - return -1; - - return iatt.ia_blocks * 512; -} - - -static void -qemu_gluster_close (BlockDriverState *bs) -{ - BDRVGlusterState *s = NULL; - - s = bs->opaque; - - inode_unref (s->inode); - - return; -} - - -static QEMUOptionParameter qemu_gluster_create_options[] = { - { - .name = BLOCK_OPT_SIZE, - .type = OPT_SIZE, - .help = "Virtual disk size" - }, - { NULL } -}; - - -static BlockDriver bdrv_gluster = { - .format_name = "gluster", - .protocol_name = "gluster", - .instance_size = sizeof(BDRVGlusterState), - .bdrv_file_open = qemu_gluster_open, - .bdrv_close = qemu_gluster_close, - .bdrv_create = qemu_gluster_create, - .bdrv_getlength = qemu_gluster_getlength, - .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, - .bdrv_co_readv = qemu_gluster_co_readv, - .bdrv_co_writev = qemu_gluster_co_writev, - .bdrv_co_flush_to_os = qemu_gluster_co_flush, - .bdrv_co_flush_to_disk = qemu_gluster_co_fsync, - .bdrv_truncate = qemu_gluster_truncate, - .create_options = qemu_gluster_create_options, -}; - - -static void bdrv_gluster_init(void) -{ - bdrv_register(&bdrv_gluster); -} - - -block_init(bdrv_gluster_init); diff --git a/xlators/features/qemu-block/src/bh-syncop.c b/xlators/features/qemu-block/src/bh-syncop.c deleted file mode 100644 index 4374b5abcee..00000000000 --- a/xlators/features/qemu-block/src/bh-syncop.c +++ /dev/null @@ -1,43 +0,0 @@ -/* - Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - - -#include "glusterfs.h" -#include "logging.h" -#include "dict.h" -#include "xlator.h" -#include "syncop.h" -#include "qemu-block-memory-types.h" - -#include "block/aio.h" - -void -qemu_bh_schedule (QEMUBH *bh) -{ - return; -} - -void -qemu_bh_cancel (QEMUBH *bh) -{ - return; -} - -void -qemu_bh_delete (QEMUBH *bh) -{ - -} - -QEMUBH * -qemu_bh_new (QEMUBHFunc *cb, void *opaque) -{ - return NULL; -} diff --git a/xlators/features/qemu-block/src/clock-timer.c b/xlators/features/qemu-block/src/clock-timer.c deleted file mode 100644 index 36c08a2e766..00000000000 --- a/xlators/features/qemu-block/src/clock-timer.c +++ /dev/null @@ -1,55 +0,0 @@ -/* - Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - - -#include "glusterfs.h" -#include "logging.h" -#include "dict.h" -#include "xlator.h" -#include "syncop.h" -#include "qemu-block-memory-types.h" - -#include "qemu/timer.h" - -QEMUClock *vm_clock; -int use_rt_clock = 0; - -QEMUTimer *qemu_new_timer (QEMUClock *clock, int scale, - QEMUTimerCB *cb, void *opaque) -{ - return NULL; -} - -int64_t qemu_get_clock_ns (QEMUClock *clock) -{ - return 0; -} - -void qemu_mod_timer (QEMUTimer *ts, int64_t expire_time) -{ - return; -} - -void qemu_free_timer (QEMUTimer *ts) -{ - -} - -void qemu_del_timer (QEMUTimer *ts) -{ - -} - -bool qemu_aio_wait() -{ - synctask_wake (synctask_get()); - synctask_yield (synctask_get()); - return 0; -} diff --git a/xlators/features/qemu-block/src/coroutine-synctask.c b/xlators/features/qemu-block/src/coroutine-synctask.c deleted file mode 100644 index d7d43831af4..00000000000 --- a/xlators/features/qemu-block/src/coroutine-synctask.c +++ /dev/null @@ -1,111 +0,0 @@ -/* - Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - - -#include "glusterfs.h" -#include "logging.h" -#include "dict.h" -#include "xlator.h" -#include "syncop.h" -#include "qemu-block-memory-types.h" - -#include "qemu-block.h" - -/* - * This code serves as the bridge from the main glusterfs context to the qemu - * coroutine context via synctask. We create a single threaded syncenv with a - * single synctask responsible for processing a queue of coroutines. The qemu - * code invoked from within the synctask function handlers uses the ucontext - * coroutine implementation and scheduling logic internal to qemu. This - * effectively donates a thread of execution to qemu and its internal coroutine - * management. - * - * NOTE: The existence of concurrent synctasks has proven quite racy with regard - * to qemu coroutine management, particularly related to the lifecycle - * differences with top-level synctasks and internally created coroutines and - * interactions with qemu-internal queues (and locks, in turn). We explicitly - * disallow this scenario, via the queue, until it is more well supported. - */ - -static struct { - struct list_head queue; - gf_lock_t lock; - struct synctask *task; -} qb_co; - -static void -init_qbco() -{ - INIT_LIST_HEAD(&qb_co.queue); - LOCK_INIT(&qb_co.lock); -} - -static int -synctask_nop_cbk (int ret, call_frame_t *frame, void *opaque) -{ - return 0; -} - -static int -qb_synctask_wrap (void *opaque) -{ - qb_local_t *qb_local, *tmp; - - LOCK(&qb_co.lock); - - while (!list_empty(&qb_co.queue)) { - list_for_each_entry_safe(qb_local, tmp, &qb_co.queue, list) { - list_del_init(&qb_local->list); - break; - } - - UNLOCK(&qb_co.lock); - - qb_local->synctask_fn(qb_local); - /* qb_local is now unwound and gone! */ - - LOCK(&qb_co.lock); - } - - qb_co.task = NULL; - - UNLOCK(&qb_co.lock); - - return 0; -} - -int -qb_coroutine (call_frame_t *frame, synctask_fn_t fn) -{ - qb_local_t *qb_local = NULL; - qb_conf_t *qb_conf = NULL; - static int init = 0; - - qb_local = frame->local; - qb_local->synctask_fn = fn; - qb_conf = frame->this->private; - - if (!init) { - init = 1; - init_qbco(); - } - - LOCK(&qb_co.lock); - - if (!qb_co.task) - qb_co.task = synctask_create(qb_conf->env, 0, qb_synctask_wrap, - synctask_nop_cbk, frame, NULL); - - list_add_tail(&qb_local->list, &qb_co.queue); - - UNLOCK(&qb_co.lock); - - return 0; -} diff --git a/xlators/features/qemu-block/src/monitor-logging.c b/xlators/features/qemu-block/src/monitor-logging.c deleted file mode 100644 index c4dc12c1d5e..00000000000 --- a/xlators/features/qemu-block/src/monitor-logging.c +++ /dev/null @@ -1,45 +0,0 @@ -/* - Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - - -#include "glusterfs.h" -#include "logging.h" -#include "dict.h" -#include "xlator.h" -#include "qemu-block-memory-types.h" - -#include "block/block_int.h" - -Monitor *cur_mon; - -int -monitor_cur_is_qmp() -{ - /* No QMP support here */ - return 0; -} - -void -monitor_set_error (Monitor *mon, QError *qerror) -{ - /* NOP here */ - return; -} - - -void -monitor_vprintf(Monitor *mon, const char *fmt, va_list ap) -{ - char buf[4096]; - - vsnprintf(buf, sizeof(buf), fmt, ap); - - gf_log (THIS->name, GF_LOG_ERROR, "%s", buf); -} diff --git a/xlators/features/qemu-block/src/qb-coroutines.c b/xlators/features/qemu-block/src/qb-coroutines.c deleted file mode 100644 index 0c2fa62bb4f..00000000000 --- a/xlators/features/qemu-block/src/qb-coroutines.c +++ /dev/null @@ -1,662 +0,0 @@ -/* - Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - - -#include "glusterfs.h" -#include "logging.h" -#include "dict.h" -#include "xlator.h" -#include "inode.h" -#include "call-stub.h" -#include "defaults.h" -#include "qemu-block-memory-types.h" -#include "qemu-block.h" -#include "qb-coroutines.h" - - -int -qb_format_and_resume (void *opaque) -{ - qb_local_t *local = NULL; - call_frame_t *frame = NULL; - call_stub_t *stub = NULL; - inode_t *inode = NULL; - char filename[64]; - char base_filename[128]; - int use_base = 0; - qb_inode_t *qb_inode = NULL; - Error *local_err = NULL; - fd_t *fd = NULL; - dict_t *xattr = NULL; - qb_conf_t *qb_conf = NULL; - int ret = -1; - - local = opaque; - frame = local->frame; - stub = local->stub; - inode = local->inode; - qb_conf = frame->this->private; - - qb_inode_to_filename (inode, filename, 64); - - qb_inode = qb_inode_ctx_get (frame->this, inode); - - /* - * See if the caller specified a backing image. - */ - if (!gf_uuid_is_null(qb_inode->backing_gfid) || qb_inode->backing_fname) { - loc_t loc = {0,}; - char gfid_str[64]; - struct iatt buf; - - if (!gf_uuid_is_null(qb_inode->backing_gfid)) { - loc.inode = inode_find(qb_conf->root_inode->table, - qb_inode->backing_gfid); - if (!loc.inode) { - loc.inode = inode_new(qb_conf->root_inode->table); - gf_uuid_copy(loc.inode->gfid, - qb_inode->backing_gfid); - } - gf_uuid_copy(loc.gfid, loc.inode->gfid); - } else if (qb_inode->backing_fname) { - loc.inode = inode_new(qb_conf->root_inode->table); - loc.name = qb_inode->backing_fname; - loc.parent = inode_parent(inode, NULL, NULL); - loc_path(&loc, loc.name); - } - - /* - * Lookup the backing image. Verify existence and/or get the - * gfid if we don't already have it. - */ - ret = syncop_lookup(FIRST_CHILD(frame->this), &loc, &buf, NULL, - NULL, NULL); - GF_FREE(qb_inode->backing_fname); - if (ret) { - loc_wipe(&loc); - ret = -ret; - goto err; - } - - gf_uuid_copy(qb_inode->backing_gfid, buf.ia_gfid); - loc_wipe(&loc); - - /* - * We pass the filename of the backing image into the qemu block - * subsystem as the associated gfid. This is embedded into the - * clone image and passed along to the gluster bdrv backend when - * the block subsystem needs to operate on the backing image on - * behalf of the clone. - */ - gf_uuid_unparse(qb_inode->backing_gfid, gfid_str); - snprintf(base_filename, sizeof(base_filename), - "gluster://gfid:%s", gfid_str); - use_base = 1; - } - - bdrv_img_create (filename, qb_inode->fmt, - use_base ? base_filename : NULL, 0, 0, qb_inode->size, - 0, &local_err, true); - - if (error_is_set (&local_err)) { - gf_log (frame->this->name, GF_LOG_ERROR, "%s", - error_get_pretty (local_err)); - error_free (local_err); - QB_STUB_UNWIND (stub, -1, EIO); - return 0; - } - - fd = fd_anonymous (inode); - if (!fd) { - gf_log (frame->this->name, GF_LOG_ERROR, - "could not create anonymous fd for %s", - uuid_utoa (inode->gfid)); - QB_STUB_UNWIND (stub, -1, ENOMEM); - return 0; - } - - xattr = dict_new (); - if (!xattr) { - gf_log (frame->this->name, GF_LOG_ERROR, - "could not allocate xattr dict for %s", - uuid_utoa (inode->gfid)); - QB_STUB_UNWIND (stub, -1, ENOMEM); - fd_unref (fd); - return 0; - } - - ret = dict_set_str (xattr, qb_conf->qb_xattr_key, local->fmt); - if (ret) { - gf_log (frame->this->name, GF_LOG_ERROR, - "could not dict_set for %s", - uuid_utoa (inode->gfid)); - QB_STUB_UNWIND (stub, -1, ENOMEM); - fd_unref (fd); - dict_unref (xattr); - return 0; - } - - ret = syncop_fsetxattr (FIRST_CHILD(THIS), fd, xattr, 0, NULL, NULL); - if (ret) { - gf_log (frame->this->name, GF_LOG_ERROR, - "failed to setxattr for %s", - uuid_utoa (inode->gfid)); - QB_STUB_UNWIND (stub, -1, -ret); - fd_unref (fd); - dict_unref (xattr); - return 0; - } - - fd_unref (fd); - dict_unref (xattr); - - QB_STUB_UNWIND (stub, 0, 0); - - return 0; - -err: - QB_STUB_UNWIND(stub, -1, ret); - return 0; -} - - -static BlockDriverState * -qb_bs_create (inode_t *inode, const char *fmt) -{ - char filename[64]; - BlockDriverState *bs = NULL; - BlockDriver *drv = NULL; - int op_errno = 0; - int ret = 0; - - bs = bdrv_new (uuid_utoa (inode->gfid)); - if (!bs) { - op_errno = ENOMEM; - gf_log (THIS->name, GF_LOG_ERROR, - "could not allocate @bdrv for gfid:%s", - uuid_utoa (inode->gfid)); - goto err; - } - - drv = bdrv_find_format (fmt); - if (!drv) { - op_errno = EINVAL; - gf_log (THIS->name, GF_LOG_ERROR, - "Unknown file format: %s for gfid:%s", - fmt, uuid_utoa (inode->gfid)); - goto err; - } - - qb_inode_to_filename (inode, filename, 64); - - ret = bdrv_open (bs, filename, NULL, BDRV_O_RDWR, drv); - if (ret < 0) { - op_errno = -ret; - gf_log (THIS->name, GF_LOG_ERROR, - "Unable to bdrv_open() gfid:%s (%s)", - uuid_utoa (inode->gfid), strerror (op_errno)); - goto err; - } - - return bs; -err: - errno = op_errno; - return NULL; -} - - -int -qb_co_open (void *opaque) -{ - qb_local_t *local = NULL; - call_frame_t *frame = NULL; - call_stub_t *stub = NULL; - inode_t *inode = NULL; - qb_inode_t *qb_inode = NULL; - - local = opaque; - frame = local->frame; - stub = local->stub; - inode = local->inode; - - qb_inode = qb_inode_ctx_get (frame->this, inode); - if (!qb_inode->bs) { - /* FIXME: we need locks around this when - enabling multithreaded syncop/coroutine - for qemu-block - */ - - qb_inode->bs = qb_bs_create (inode, qb_inode->fmt); - if (!qb_inode->bs) { - QB_STUB_UNWIND (stub, -1, errno); - return 0; - } - } - qb_inode->refcnt++; - - QB_STUB_RESUME (stub); - - return 0; -} - - -int -qb_co_writev (void *opaque) -{ - qb_local_t *local = NULL; - call_frame_t *frame = NULL; - call_stub_t *stub = NULL; - inode_t *inode = NULL; - qb_inode_t *qb_inode = NULL; - QEMUIOVector qiov = {0, }; - int ret = 0; - - local = opaque; - frame = local->frame; - stub = local->stub; - inode = local->inode; - - qb_inode = qb_inode_ctx_get (frame->this, inode); - if (!qb_inode->bs) { - /* FIXME: we need locks around this when - enabling multithreaded syncop/coroutine - for qemu-block - */ - - qb_inode->bs = qb_bs_create (inode, qb_inode->fmt); - if (!qb_inode->bs) { - QB_STUB_UNWIND (stub, -1, errno); - return 0; - } - } - - qemu_iovec_init_external (&qiov, stub->args.vector, stub->args.count); - - ret = bdrv_pwritev (qb_inode->bs, stub->args.offset, &qiov); - - if (ret < 0) { - QB_STUB_UNWIND (stub, -1, -ret); - } else { - QB_STUB_UNWIND (stub, ret, 0); - } - - return 0; -} - - -int -qb_co_readv (void *opaque) -{ - qb_local_t *local = NULL; - call_frame_t *frame = NULL; - call_stub_t *stub = NULL; - inode_t *inode = NULL; - qb_inode_t *qb_inode = NULL; - struct iobuf *iobuf = NULL; - struct iobref *iobref = NULL; - struct iovec iov = {0, }; - int ret = 0; - - local = opaque; - frame = local->frame; - stub = local->stub; - inode = local->inode; - - qb_inode = qb_inode_ctx_get (frame->this, inode); - if (!qb_inode->bs) { - /* FIXME: we need locks around this when - enabling multithreaded syncop/coroutine - for qemu-block - */ - - qb_inode->bs = qb_bs_create (inode, qb_inode->fmt); - if (!qb_inode->bs) { - QB_STUB_UNWIND (stub, -1, errno); - return 0; - } - } - - if (stub->args.offset >= qb_inode->size) { - QB_STUB_UNWIND (stub, 0, 0); - return 0; - } - - iobuf = iobuf_get2 (frame->this->ctx->iobuf_pool, stub->args.size); - if (!iobuf) { - QB_STUB_UNWIND (stub, -1, ENOMEM); - return 0; - } - - iobref = iobref_new (); - if (!iobref) { - QB_STUB_UNWIND (stub, -1, ENOMEM); - iobuf_unref (iobuf); - return 0; - } - - if (iobref_add (iobref, iobuf) < 0) { - iobuf_unref (iobuf); - iobref_unref (iobref); - QB_STUB_UNWIND (stub, -1, ENOMEM); - return 0; - } - - ret = bdrv_pread (qb_inode->bs, stub->args.offset, iobuf_ptr (iobuf), - stub->args.size); - - if (ret < 0) { - QB_STUB_UNWIND (stub, -1, -ret); - iobref_unref (iobref); - return 0; - } - - iov.iov_base = iobuf_ptr (iobuf); - iov.iov_len = ret; - - stub->args_cbk.vector = iov_dup (&iov, 1); - stub->args_cbk.count = 1; - stub->args_cbk.iobref = iobref; - - QB_STUB_UNWIND (stub, ret, 0); - - return 0; -} - - -int -qb_co_fsync (void *opaque) -{ - qb_local_t *local = NULL; - call_frame_t *frame = NULL; - call_stub_t *stub = NULL; - inode_t *inode = NULL; - qb_inode_t *qb_inode = NULL; - int ret = 0; - - local = opaque; - frame = local->frame; - stub = local->stub; - inode = local->inode; - - qb_inode = qb_inode_ctx_get (frame->this, inode); - if (!qb_inode->bs) { - /* FIXME: we need locks around this when - enabling multithreaded syncop/coroutine - for qemu-block - */ - - qb_inode->bs = qb_bs_create (inode, qb_inode->fmt); - if (!qb_inode->bs) { - QB_STUB_UNWIND (stub, -1, errno); - return 0; - } - } - - ret = bdrv_flush (qb_inode->bs); - - if (ret < 0) { - QB_STUB_UNWIND (stub, -1, -ret); - } else { - QB_STUB_UNWIND (stub, ret, 0); - } - - return 0; -} - - -static void -qb_update_size_xattr (xlator_t *this, fd_t *fd, const char *fmt, off_t offset) -{ - char val[QB_XATTR_VAL_MAX]; - qb_conf_t *qb_conf = NULL; - dict_t *xattr = NULL; - - qb_conf = this->private; - - snprintf (val, QB_XATTR_VAL_MAX, "%s:%llu", - fmt, (long long unsigned) offset); - - xattr = dict_new (); - if (!xattr) - return; - - if (dict_set_str (xattr, qb_conf->qb_xattr_key, val) != 0) { - dict_unref (xattr); - return; - } - - syncop_fsetxattr (FIRST_CHILD(this), fd, xattr, 0, NULL, NULL); - dict_unref (xattr); -} - - -int -qb_co_truncate (void *opaque) -{ - qb_local_t *local = NULL; - call_frame_t *frame = NULL; - call_stub_t *stub = NULL; - inode_t *inode = NULL; - qb_inode_t *qb_inode = NULL; - int ret = 0; - off_t offset = 0; - xlator_t *this = NULL; - - this = THIS; - - local = opaque; - frame = local->frame; - stub = local->stub; - inode = local->inode; - - qb_inode = qb_inode_ctx_get (frame->this, inode); - if (!qb_inode->bs) { - /* FIXME: we need locks around this when - enabling multithreaded syncop/coroutine - for qemu-block - */ - - qb_inode->bs = qb_bs_create (inode, qb_inode->fmt); - if (!qb_inode->bs) { - QB_STUB_UNWIND (stub, -1, errno); - return 0; - } - } - - ret = syncop_fstat (FIRST_CHILD(this), local->fd, - &stub->args_cbk.prestat, NULL, NULL); - if (ret < 0) - goto out; - stub->args_cbk.prestat.ia_size = qb_inode->size; - - ret = bdrv_truncate (qb_inode->bs, stub->args.offset); - if (ret < 0) - goto out; - - offset = bdrv_getlength (qb_inode->bs); - - qb_inode->size = offset; - - ret = syncop_fstat (FIRST_CHILD(this), local->fd, - &stub->args_cbk.poststat, NULL, NULL); - if (ret < 0) - goto out; - stub->args_cbk.poststat.ia_size = qb_inode->size; - - qb_update_size_xattr (this, local->fd, qb_inode->fmt, qb_inode->size); - -out: - if (ret < 0) { - QB_STUB_UNWIND (stub, -1, -ret); - } else { - QB_STUB_UNWIND (stub, ret, 0); - } - - return 0; -} - - -int -qb_co_close (void *opaque) -{ - qb_local_t *local = NULL; - call_frame_t *frame = NULL; - inode_t *inode = NULL; - qb_inode_t *qb_inode = NULL; - BlockDriverState *bs = NULL; - - local = opaque; - inode = local->inode; - - qb_inode = qb_inode_ctx_get (THIS, inode); - - if (!--qb_inode->refcnt) { - bs = qb_inode->bs; - qb_inode->bs = NULL; - bdrv_delete (bs); - } - - frame = local->frame; - frame->local = NULL; - qb_local_free (THIS, local); - STACK_DESTROY (frame->root); - - return 0; -} - - -int -qb_snapshot_create (void *opaque) -{ - qb_local_t *local = NULL; - call_frame_t *frame = NULL; - call_stub_t *stub = NULL; - inode_t *inode = NULL; - qb_inode_t *qb_inode = NULL; - QEMUSnapshotInfo sn; - struct timeval tv = {0, }; - int ret = 0; - - local = opaque; - frame = local->frame; - stub = local->stub; - inode = local->inode; - - qb_inode = qb_inode_ctx_get (frame->this, inode); - if (!qb_inode->bs) { - /* FIXME: we need locks around this when - enabling multithreaded syncop/coroutine - for qemu-block - */ - - qb_inode->bs = qb_bs_create (inode, qb_inode->fmt); - if (!qb_inode->bs) { - QB_STUB_UNWIND (stub, -1, errno); - return 0; - } - } - - memset (&sn, 0, sizeof (sn)); - pstrcpy (sn.name, sizeof(sn.name), local->name); - gettimeofday (&tv, NULL); - sn.date_sec = tv.tv_sec; - sn.date_nsec = tv.tv_usec * 1000; - - ret = bdrv_snapshot_create (qb_inode->bs, &sn); - if (ret < 0) { - QB_STUB_UNWIND (stub, -1, -ret); - } else { - QB_STUB_UNWIND (stub, ret, 0); - } - - return 0; -} - - -int -qb_snapshot_delete (void *opaque) -{ - qb_local_t *local = NULL; - call_frame_t *frame = NULL; - call_stub_t *stub = NULL; - inode_t *inode = NULL; - qb_inode_t *qb_inode = NULL; - int ret = 0; - - local = opaque; - frame = local->frame; - stub = local->stub; - inode = local->inode; - - qb_inode = qb_inode_ctx_get (frame->this, inode); - if (!qb_inode->bs) { - /* FIXME: we need locks around this when - enabling multithreaded syncop/coroutine - for qemu-block - */ - - qb_inode->bs = qb_bs_create (inode, qb_inode->fmt); - if (!qb_inode->bs) { - QB_STUB_UNWIND (stub, -1, errno); - return 0; - } - } - - ret = bdrv_snapshot_delete (qb_inode->bs, local->name); - - if (ret < 0) { - QB_STUB_UNWIND (stub, -1, -ret); - } else { - QB_STUB_UNWIND (stub, ret, 0); - } - - return 0; -} - - -int -qb_snapshot_goto (void *opaque) -{ - qb_local_t *local = NULL; - call_frame_t *frame = NULL; - call_stub_t *stub = NULL; - inode_t *inode = NULL; - qb_inode_t *qb_inode = NULL; - int ret = 0; - - local = opaque; - frame = local->frame; - stub = local->stub; - inode = local->inode; - - qb_inode = qb_inode_ctx_get (frame->this, inode); - if (!qb_inode->bs) { - /* FIXME: we need locks around this when - enabling multithreaded syncop/coroutine - for qemu-block - */ - - qb_inode->bs = qb_bs_create (inode, qb_inode->fmt); - if (!qb_inode->bs) { - QB_STUB_UNWIND (stub, -1, errno); - return 0; - } - } - - ret = bdrv_snapshot_goto (qb_inode->bs, local->name); - - if (ret < 0) { - QB_STUB_UNWIND (stub, -1, -ret); - } else { - QB_STUB_UNWIND (stub, ret, 0); - } - - return 0; -} diff --git a/xlators/features/qemu-block/src/qb-coroutines.h b/xlators/features/qemu-block/src/qb-coroutines.h deleted file mode 100644 index 583319f3b06..00000000000 --- a/xlators/features/qemu-block/src/qb-coroutines.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#ifndef __QB_COROUTINES_H -#define __QB_COROUTINES_H - -#include "syncop.h" -#include "call-stub.h" -#include "block/block_int.h" -#include "monitor/monitor.h" - -int qb_format_and_resume (void *opaque); -int qb_snapshot_create (void *opaque); -int qb_snapshot_delete (void *opaque); -int qb_snapshot_goto (void *opaque); -int qb_co_open (void *opaque); -int qb_co_close (void *opaque); -int qb_co_writev (void *opaque); -int qb_co_readv (void *opaque); -int qb_co_fsync (void *opaque); -int qb_co_truncate (void *opaque); - -#endif /* __QB_COROUTINES_H */ diff --git a/xlators/features/qemu-block/src/qemu-block-memory-types.h b/xlators/features/qemu-block/src/qemu-block-memory-types.h deleted file mode 100644 index 267b3893fed..00000000000 --- a/xlators/features/qemu-block/src/qemu-block-memory-types.h +++ /dev/null @@ -1,25 +0,0 @@ -/* - Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - - -#ifndef __QB_MEM_TYPES_H__ -#define __QB_MEM_TYPES_H__ - -#include "mem-types.h" - -enum gf_qb_mem_types_ { - gf_qb_mt_qb_conf_t = gf_common_mt_end + 1, - gf_qb_mt_qb_inode_t, - gf_qb_mt_qb_local_t, - gf_qb_mt_coroutinesynctask_t, - gf_qb_mt_end -}; -#endif - diff --git a/xlators/features/qemu-block/src/qemu-block.c b/xlators/features/qemu-block/src/qemu-block.c deleted file mode 100644 index 9aa9f43ab19..00000000000 --- a/xlators/features/qemu-block/src/qemu-block.c +++ /dev/null @@ -1,1134 +0,0 @@ -/* - Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - - -#include "glusterfs.h" -#include "logging.h" -#include "dict.h" -#include "xlator.h" -#include "inode.h" -#include "call-stub.h" -#include "defaults.h" -#include "qemu-block-memory-types.h" -#include "qemu-block.h" -#include "qb-coroutines.h" - - -qb_inode_t * -__qb_inode_ctx_get (xlator_t *this, inode_t *inode) -{ - uint64_t value = 0; - qb_inode_t *qb_inode = NULL; - - __inode_ctx_get (inode, this, &value); - qb_inode = (qb_inode_t *)(unsigned long) value; - - return qb_inode; -} - - -qb_inode_t * -qb_inode_ctx_get (xlator_t *this, inode_t *inode) -{ - qb_inode_t *qb_inode = NULL; - - LOCK (&inode->lock); - { - qb_inode = __qb_inode_ctx_get (this, inode); - } - UNLOCK (&inode->lock); - - return qb_inode; -} - - -qb_inode_t * -qb_inode_ctx_del (xlator_t *this, inode_t *inode) -{ - uint64_t value = 0; - qb_inode_t *qb_inode = NULL; - - inode_ctx_del (inode, this, &value); - qb_inode = (qb_inode_t *)(unsigned long) value; - - return qb_inode; -} - - -int -qb_inode_cleanup (xlator_t *this, inode_t *inode, int warn) -{ - qb_inode_t *qb_inode = NULL; - - qb_inode = qb_inode_ctx_del (this, inode); - - if (!qb_inode) - return 0; - - if (warn) - gf_log (this->name, GF_LOG_WARNING, - "inode %s no longer block formatted", - uuid_utoa (inode->gfid)); - - /* free (qb_inode->bs); */ - - GF_FREE (qb_inode); - - return 0; -} - - -int -qb_iatt_fixup (xlator_t *this, inode_t *inode, struct iatt *iatt) -{ - qb_inode_t *qb_inode = NULL; - - qb_inode = qb_inode_ctx_get (this, inode); - if (!qb_inode) - return 0; - - iatt->ia_size = qb_inode->size; - - return 0; -} - - -int -qb_format_extract (xlator_t *this, char *format, inode_t *inode) -{ - char *s, *save; - uint64_t size = 0; - char fmt[QB_XATTR_VAL_MAX+1] = {0, }; - qb_inode_t *qb_inode = NULL; - char *formatstr = NULL; - uuid_t gfid = {0,}; - char gfid_str[64] = {0,}; - int ret; - - strncpy(fmt, format, QB_XATTR_VAL_MAX); - - s = strtok_r(fmt, ":", &save); - if (!s) - goto invalid; - formatstr = gf_strdup(s); - - s = strtok_r(NULL, ":", &save); - if (!s) - goto invalid; - if (gf_string2bytesize (s, &size)) - goto invalid; - if (!size) - goto invalid; - - s = strtok_r(NULL, "\0", &save); - if (s && !strncmp(s, "<gfid:", strlen("<gfid:"))) { - /* - * Check for valid gfid backing image specifier. - */ - if (strlen(s) + 1 > sizeof(gfid_str)) - goto invalid; - ret = sscanf(s, "<gfid:%[^>]s", gfid_str); - if (ret == 1) { - ret = gf_uuid_parse(gfid_str, gfid); - if (ret < 0) - goto invalid; - } - } - - qb_inode = qb_inode_ctx_get (this, inode); - if (!qb_inode) - qb_inode = GF_CALLOC (1, sizeof (*qb_inode), - gf_qb_mt_qb_inode_t); - if (!qb_inode) { - GF_FREE(formatstr); - return ENOMEM; - } - - strncpy(qb_inode->fmt, formatstr, QB_XATTR_VAL_MAX); - qb_inode->size = size; - - /* - * If a backing gfid was not specified, interpret any remaining bytes - * associated with a backing image as a filename local to the parent - * directory. The format processing will validate further. - */ - if (!gf_uuid_is_null(gfid)) - gf_uuid_copy(qb_inode->backing_gfid, gfid); - else if (s) - qb_inode->backing_fname = gf_strdup(s); - - inode_ctx_set (inode, this, (void *)&qb_inode); - - GF_FREE(formatstr); - - return 0; - -invalid: - GF_FREE(formatstr); - - gf_log (this->name, GF_LOG_WARNING, - "invalid format '%s' in inode %s", format, - uuid_utoa (inode->gfid)); - return EINVAL; -} - - -void -qb_local_free (xlator_t *this, qb_local_t *local) -{ - if (local->inode) - inode_unref (local->inode); - if (local->fd) - fd_unref (local->fd); - GF_FREE (local); -} - - -int -qb_local_init (call_frame_t *frame) -{ - qb_local_t *qb_local = NULL; - - qb_local = GF_CALLOC (1, sizeof (*qb_local), gf_qb_mt_qb_local_t); - if (!qb_local) - return -1; - INIT_LIST_HEAD(&qb_local->list); - - qb_local->frame = frame; - frame->local = qb_local; - - return 0; -} - - -int -qb_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, inode_t *inode, struct iatt *buf, - dict_t *xdata, struct iatt *postparent) -{ - char *format = NULL; - qb_conf_t *conf = NULL; - - conf = this->private; - - if (op_ret == -1) - goto out; - - /* - * Cache the root inode for dealing with backing images. The format - * coroutine and the gluster qemu backend driver both use the root inode - * table to verify and/or redirect I/O to the backing image via - * anonymous fd's. - */ - if (!conf->root_inode && __is_root_gfid(inode->gfid)) - conf->root_inode = inode_ref(inode); - - if (!xdata) - goto out; - - if (dict_get_str (xdata, conf->qb_xattr_key, &format)) - goto out; - - if (!format) { - qb_inode_cleanup (this, inode, 1); - goto out; - } - - op_errno = qb_format_extract (this, format, inode); - if (op_errno) - op_ret = -1; - - qb_iatt_fixup (this, inode, buf); -out: - QB_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, buf, - xdata, postparent); - return 0; -} - - -int -qb_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) -{ - qb_conf_t *conf = NULL; - - conf = this->private; - - xdata = xdata ? dict_ref (xdata) : dict_new (); - - if (!xdata) - goto enomem; - - if (dict_set_int32 (xdata, conf->qb_xattr_key, 0)) - goto enomem; - - STACK_WIND (frame, qb_lookup_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, loc, xdata); - dict_unref (xdata); - return 0; -enomem: - QB_STACK_UNWIND (lookup, frame, -1, ENOMEM, 0, 0, 0, 0); - if (xdata) - dict_unref (xdata); - return 0; -} - - -int -qb_setxattr_format (call_frame_t *frame, xlator_t *this, call_stub_t *stub, - dict_t *xattr, inode_t *inode) -{ - char *format = NULL; - int op_errno = 0; - qb_local_t *qb_local = NULL; - data_t *data = NULL; - qb_inode_t *qb_inode; - - if (!(data = dict_get (xattr, "trusted.glusterfs.block-format"))) { - QB_STUB_RESUME (stub); - return 0; - } - - format = alloca (data->len + 1); - memcpy (format, data->data, data->len); - format[data->len] = 0; - - op_errno = qb_format_extract (this, format, inode); - if (op_errno) { - QB_STUB_UNWIND (stub, -1, op_errno); - return 0; - } - qb_inode = qb_inode_ctx_get(this, inode); - - qb_local = frame->local; - - qb_local->stub = stub; - qb_local->inode = inode_ref (inode); - - snprintf(qb_local->fmt, QB_XATTR_VAL_MAX, "%s:%" PRId64, qb_inode->fmt, - qb_inode->size); - - qb_coroutine (frame, qb_format_and_resume); - - return 0; -} - - -int -qb_setxattr_snapshot_create (call_frame_t *frame, xlator_t *this, - call_stub_t *stub, dict_t *xattr, inode_t *inode) -{ - qb_local_t *qb_local = NULL; - char *name = NULL; - data_t *data = NULL; - - if (!(data = dict_get (xattr, "trusted.glusterfs.block-snapshot-create"))) { - QB_STUB_RESUME (stub); - return 0; - } - - name = alloca (data->len + 1); - memcpy (name, data->data, data->len); - name[data->len] = 0; - - qb_local = frame->local; - - qb_local->stub = stub; - qb_local->inode = inode_ref (inode); - strncpy (qb_local->name, name, 128); - - qb_coroutine (frame, qb_snapshot_create); - - return 0; -} - - -int -qb_setxattr_snapshot_delete (call_frame_t *frame, xlator_t *this, - call_stub_t *stub, dict_t *xattr, inode_t *inode) -{ - qb_local_t *qb_local = NULL; - char *name = NULL; - data_t *data = NULL; - - if (!(data = dict_get (xattr, "trusted.glusterfs.block-snapshot-delete"))) { - QB_STUB_RESUME (stub); - return 0; - } - - name = alloca (data->len + 1); - memcpy (name, data->data, data->len); - name[data->len] = 0; - - qb_local = frame->local; - - qb_local->stub = stub; - qb_local->inode = inode_ref (inode); - strncpy (qb_local->name, name, 128); - - qb_coroutine (frame, qb_snapshot_delete); - - return 0; -} - -int -qb_setxattr_snapshot_goto (call_frame_t *frame, xlator_t *this, - call_stub_t *stub, dict_t *xattr, inode_t *inode) -{ - qb_local_t *qb_local = NULL; - char *name = NULL; - data_t *data = NULL; - - if (!(data = dict_get (xattr, "trusted.glusterfs.block-snapshot-goto"))) { - QB_STUB_RESUME (stub); - return 0; - } - - name = alloca (data->len + 1); - memcpy (name, data->data, data->len); - name[data->len] = 0; - - qb_local = frame->local; - - qb_local->stub = stub; - qb_local->inode = inode_ref (inode); - strncpy (qb_local->name, name, 128); - - qb_coroutine (frame, qb_snapshot_goto); - - return 0; -} - - -int -qb_setxattr_common (call_frame_t *frame, xlator_t *this, call_stub_t *stub, - dict_t *xattr, inode_t *inode) -{ - - if (dict_get (xattr, "trusted.glusterfs.block-format")) { - qb_setxattr_format (frame, this, stub, xattr, inode); - return 0; - } - - if (dict_get (xattr, "trusted.glusterfs.block-snapshot-create")) { - qb_setxattr_snapshot_create (frame, this, stub, xattr, inode); - return 0; - } - - if (dict_get (xattr, "trusted.glusterfs.block-snapshot-delete")) { - qb_setxattr_snapshot_delete (frame, this, stub, xattr, inode); - return 0; - } - - if (dict_get (xattr, "trusted.glusterfs.block-snapshot-goto")) { - qb_setxattr_snapshot_goto (frame, this, stub, xattr, inode); - return 0; - } - - QB_STUB_RESUME (stub); - - return 0; -} - - -int -qb_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr, - int flags, dict_t *xdata) -{ - call_stub_t *stub = NULL; - - if (qb_local_init (frame) != 0) - goto enomem; - - stub = fop_setxattr_stub (frame, default_setxattr_resume, loc, xattr, - flags, xdata); - if (!stub) - goto enomem; - - qb_setxattr_common (frame, this, stub, xattr, loc->inode); - - return 0; -enomem: - QB_STACK_UNWIND (setxattr, frame, -1, ENOMEM, 0); - return 0; -} - - -int -qb_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xattr, - int flags, dict_t *xdata) -{ - call_stub_t *stub = NULL; - - if (qb_local_init (frame) != 0) - goto enomem; - - stub = fop_fsetxattr_stub (frame, default_fsetxattr_resume, fd, xattr, - flags, xdata); - if (!stub) - goto enomem; - - qb_setxattr_common (frame, this, stub, xattr, fd->inode); - - return 0; -enomem: - QB_STACK_UNWIND (fsetxattr, frame, -1, ENOMEM, 0); - return 0; -} - - -int -qb_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, fd_t *fd, dict_t *xdata) -{ - call_stub_t *stub = NULL; - qb_local_t *qb_local = NULL; - - qb_local = frame->local; - - if (op_ret < 0) - goto unwind; - - if (!qb_inode_ctx_get (this, qb_local->inode)) - goto unwind; - - stub = fop_open_cbk_stub (frame, NULL, op_ret, op_errno, fd, xdata); - if (!stub) { - op_ret = -1; - op_errno = ENOMEM; - goto unwind; - } - - qb_local->stub = stub; - - qb_coroutine (frame, qb_co_open); - - return 0; -unwind: - QB_STACK_UNWIND (open, frame, op_ret, op_errno, fd, xdata); - return 0; -} - - -int -qb_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, - fd_t *fd, dict_t *xdata) -{ - qb_local_t *qb_local = NULL; - qb_inode_t *qb_inode = NULL; - - qb_inode = qb_inode_ctx_get (this, loc->inode); - if (!qb_inode) { - STACK_WIND (frame, default_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, loc, flags, fd, - xdata); - return 0; - } - - if (qb_local_init (frame) != 0) - goto enomem; - - qb_local = frame->local; - - qb_local->inode = inode_ref (loc->inode); - qb_local->fd = fd_ref (fd); - - STACK_WIND (frame, qb_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); - return 0; -enomem: - QB_STACK_UNWIND (open, frame, -1, ENOMEM, 0, 0); - return 0; -} - - -int -qb_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, - int count, off_t offset, uint32_t flags, struct iobref *iobref, - dict_t *xdata) -{ - qb_local_t *qb_local = NULL; - qb_inode_t *qb_inode = NULL; - - qb_inode = qb_inode_ctx_get (this, fd->inode); - if (!qb_inode) { - STACK_WIND (frame, default_writev_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->writev, fd, vector, count, - offset, flags, iobref, xdata); - return 0; - } - - if (qb_local_init (frame) != 0) - goto enomem; - - qb_local = frame->local; - - qb_local->inode = inode_ref (fd->inode); - qb_local->fd = fd_ref (fd); - - qb_local->stub = fop_writev_stub (frame, NULL, fd, vector, count, - offset, flags, iobref, xdata); - if (!qb_local->stub) - goto enomem; - - qb_coroutine (frame, qb_co_writev); - - return 0; -enomem: - QB_STACK_UNWIND (writev, frame, -1, ENOMEM, 0, 0, 0); - return 0; -} - - -int -qb_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t offset, uint32_t flags, dict_t *xdata) -{ - qb_local_t *qb_local = NULL; - qb_inode_t *qb_inode = NULL; - - qb_inode = qb_inode_ctx_get (this, fd->inode); - if (!qb_inode) { - STACK_WIND (frame, default_readv_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readv, fd, size, offset, - flags, xdata); - return 0; - } - - if (qb_local_init (frame) != 0) - goto enomem; - - qb_local = frame->local; - - qb_local->inode = inode_ref (fd->inode); - qb_local->fd = fd_ref (fd); - - qb_local->stub = fop_readv_stub (frame, NULL, fd, size, offset, - flags, xdata); - if (!qb_local->stub) - goto enomem; - - qb_coroutine (frame, qb_co_readv); - - return 0; -enomem: - QB_STACK_UNWIND (readv, frame, -1, ENOMEM, 0, 0, 0, 0, 0); - return 0; -} - - -int -qb_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int dsync, - dict_t *xdata) -{ - qb_local_t *qb_local = NULL; - qb_inode_t *qb_inode = NULL; - - qb_inode = qb_inode_ctx_get (this, fd->inode); - if (!qb_inode) { - STACK_WIND (frame, default_fsync_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsync, fd, dsync, xdata); - return 0; - } - - if (qb_local_init (frame) != 0) - goto enomem; - - qb_local = frame->local; - - qb_local->inode = inode_ref (fd->inode); - qb_local->fd = fd_ref (fd); - - qb_local->stub = fop_fsync_stub (frame, NULL, fd, dsync, xdata); - - if (!qb_local->stub) - goto enomem; - - qb_coroutine (frame, qb_co_fsync); - - return 0; -enomem: - QB_STACK_UNWIND (fsync, frame, -1, ENOMEM, 0, 0, 0); - return 0; -} - - -int -qb_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) -{ - qb_local_t *qb_local = NULL; - qb_inode_t *qb_inode = NULL; - - qb_inode = qb_inode_ctx_get (this, fd->inode); - if (!qb_inode) { - STACK_WIND (frame, default_flush_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->flush, fd, xdata); - return 0; - } - - if (qb_local_init (frame) != 0) - goto enomem; - - qb_local = frame->local; - - qb_local->inode = inode_ref (fd->inode); - qb_local->fd = fd_ref (fd); - - qb_local->stub = fop_flush_stub (frame, NULL, fd, xdata); - - if (!qb_local->stub) - goto enomem; - - qb_coroutine (frame, qb_co_fsync); - - return 0; -enomem: - QB_STACK_UNWIND (flush, frame, -1, ENOMEM, 0); - return 0; -} - -static int32_t -qb_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, - dict_t *xdata) -{ - qb_conf_t *conf = this->private; - gf_dirent_t *entry; - char *format; - - list_for_each_entry(entry, &entries->list, list) { - if (!entry->inode || !entry->dict) - continue; - - format = NULL; - if (dict_get_str(entry->dict, conf->qb_xattr_key, &format)) - continue; - - if (!format) { - qb_inode_cleanup(this, entry->inode, 1); - continue; - } - - if (qb_format_extract(this, format, entry->inode)) - continue; - - qb_iatt_fixup(this, entry->inode, &entry->d_stat); - } - - STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, entries, xdata); - return 0; -} - -static int32_t -qb_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t off, dict_t *xdata) -{ - qb_conf_t *conf = this->private; - - xdata = xdata ? dict_ref(xdata) : dict_new(); - if (!xdata) - goto enomem; - - if (dict_set_int32 (xdata, conf->qb_xattr_key, 0)) - goto enomem; - - STACK_WIND(frame, qb_readdirp_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readdirp, fd, size, off, xdata); - - dict_unref(xdata); - return 0; - -enomem: - QB_STACK_UNWIND(readdirp, frame, -1, ENOMEM, NULL, NULL); - if (xdata) - dict_unref(xdata); - return 0; -} - -int -qb_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, - dict_t *xdata) -{ - qb_local_t *qb_local = NULL; - qb_inode_t *qb_inode = NULL; - - qb_inode = qb_inode_ctx_get (this, loc->inode); - if (!qb_inode) { - STACK_WIND (frame, default_truncate_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->truncate, loc, offset, - xdata); - return 0; - } - - if (qb_local_init (frame) != 0) - goto enomem; - - qb_local = frame->local; - - qb_local->inode = inode_ref (loc->inode); - qb_local->fd = fd_anonymous (loc->inode); - - qb_local->stub = fop_truncate_stub (frame, NULL, loc, offset, xdata); - - if (!qb_local->stub) - goto enomem; - - qb_coroutine (frame, qb_co_truncate); - - return 0; -enomem: - QB_STACK_UNWIND (truncate, frame, -1, ENOMEM, 0, 0, 0); - return 0; -} - - -int -qb_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, - dict_t *xdata) -{ - qb_local_t *qb_local = NULL; - qb_inode_t *qb_inode = NULL; - - qb_inode = qb_inode_ctx_get (this, fd->inode); - if (!qb_inode) { - STACK_WIND (frame, default_ftruncate_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->ftruncate, fd, offset, - xdata); - return 0; - } - - if (qb_local_init (frame) != 0) - goto enomem; - - qb_local = frame->local; - - qb_local->inode = inode_ref (fd->inode); - qb_local->fd = fd_ref (fd); - - qb_local->stub = fop_ftruncate_stub (frame, NULL, fd, offset, xdata); - - if (!qb_local->stub) - goto enomem; - - qb_coroutine (frame, qb_co_truncate); - - return 0; -enomem: - QB_STACK_UNWIND (ftruncate, frame, -1, ENOMEM, 0, 0, 0); - return 0; -} - - -int -qb_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct iatt *iatt, dict_t *xdata) -{ - inode_t *inode = NULL; - - inode = frame->local; - frame->local = NULL; - - if (inode) { - qb_iatt_fixup (this, inode, iatt); - inode_unref (inode); - } - - QB_STACK_UNWIND (stat, frame, op_ret, op_errno, iatt, xdata); - - return 0; -} - -int -qb_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) -{ - if (qb_inode_ctx_get (this, loc->inode)) - frame->local = inode_ref (loc->inode); - - STACK_WIND (frame, qb_stat_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->stat, loc, xdata); - return 0; -} - - -int -qb_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct iatt *iatt, dict_t *xdata) -{ - inode_t *inode = NULL; - - inode = frame->local; - frame->local = NULL; - - if (inode) { - qb_iatt_fixup (this, inode, iatt); - inode_unref (inode); - } - - QB_STACK_UNWIND (fstat, frame, op_ret, op_errno, iatt, xdata); - - return 0; -} - - -int -qb_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) -{ - if (qb_inode_ctx_get (this, fd->inode)) - frame->local = inode_ref (fd->inode); - - STACK_WIND (frame, qb_fstat_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fstat, fd, xdata); - return 0; -} - - -int -qb_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct iatt *pre, struct iatt *post, - dict_t *xdata) -{ - inode_t *inode = NULL; - - inode = frame->local; - frame->local = NULL; - - if (inode) { - qb_iatt_fixup (this, inode, pre); - qb_iatt_fixup (this, inode, post); - inode_unref (inode); - } - - QB_STACK_UNWIND (setattr, frame, op_ret, op_errno, pre, post, xdata); - - return 0; -} - - -int -qb_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *buf, - int valid, dict_t *xdata) -{ - if (qb_inode_ctx_get (this, loc->inode)) - frame->local = inode_ref (loc->inode); - - STACK_WIND (frame, qb_setattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setattr, loc, buf, valid, xdata); - return 0; -} - - -int -qb_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct iatt *pre, struct iatt *post, - dict_t *xdata) -{ - inode_t *inode = NULL; - - inode = frame->local; - frame->local = NULL; - - if (inode) { - qb_iatt_fixup (this, inode, pre); - qb_iatt_fixup (this, inode, post); - inode_unref (inode); - } - - QB_STACK_UNWIND (fsetattr, frame, op_ret, op_errno, pre, post, xdata); - - return 0; -} - - -int -qb_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *buf, - int valid, dict_t *xdata) -{ - if (qb_inode_ctx_get (this, fd->inode)) - frame->local = inode_ref (fd->inode); - - STACK_WIND (frame, qb_setattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsetattr, fd, buf, valid, xdata); - return 0; -} - - -int -qb_forget (xlator_t *this, inode_t *inode) -{ - return qb_inode_cleanup (this, inode, 0); -} - - -int -qb_release (xlator_t *this, fd_t *fd) -{ - call_frame_t *frame = NULL; - - frame = create_frame (this, this->ctx->pool); - if (!frame) { - gf_log (this->name, GF_LOG_ERROR, - "Could not allocate frame. " - "Leaking QEMU BlockDriverState"); - return -1; - } - - if (qb_local_init (frame) != 0) { - gf_log (this->name, GF_LOG_ERROR, - "Could not allocate local. " - "Leaking QEMU BlockDriverState"); - STACK_DESTROY (frame->root); - return -1; - } - - if (qb_coroutine (frame, qb_co_close) != 0) { - gf_log (this->name, GF_LOG_ERROR, - "Could not allocate coroutine. " - "Leaking QEMU BlockDriverState"); - qb_local_free (this, frame->local); - frame->local = NULL; - STACK_DESTROY (frame->root); - } - - return 0; -} - -int -mem_acct_init (xlator_t *this) -{ - int ret = -1; - - ret = xlator_mem_acct_init (this, gf_qb_mt_end + 1); - - if (ret) - gf_log (this->name, GF_LOG_ERROR, "Memory accounting init " - "failed"); - return ret; -} - - -int -reconfigure (xlator_t *this, dict_t *options) -{ - return 0; -} - - -int -init (xlator_t *this) -{ - qb_conf_t *conf = NULL; - int32_t ret = -1; - static int bdrv_inited = 0; - - if (!this->children || this->children->next) { - gf_log (this->name, GF_LOG_ERROR, - "FATAL: qemu-block (%s) not configured with exactly " - "one child", this->name); - goto out; - } - - conf = GF_CALLOC (1, sizeof (*conf), gf_qb_mt_qb_conf_t); - if (!conf) - goto out; - - /* configure 'option window-size <size>' */ - GF_OPTION_INIT ("default-password", conf->default_password, str, out); - - /* qemu coroutines use "co_mutex" for synchronizing among themselves. - However "co_mutex" itself is not threadsafe if the coroutine framework - is multithreaded (which usually is not). However synctasks are - fundamentally multithreaded, so for now create a syncenv which has - scaling limits set to max 1 thread so that the qemu coroutines can - execute "safely". - - Future work: provide an implementation of "co_mutex" which is - threadsafe and use the global multithreaded ctx->env syncenv. - */ - conf->env = syncenv_new (0, 1, 1); - - this->private = conf; - - ret = 0; - - snprintf (conf->qb_xattr_key, QB_XATTR_KEY_MAX, QB_XATTR_KEY_FMT, - this->name); - - cur_mon = (void *) 1; - - if (!bdrv_inited) { - bdrv_init (); - bdrv_inited = 1; - } - -out: - if (ret) - GF_FREE (conf); - - return ret; -} - - -void -fini (xlator_t *this) -{ - qb_conf_t *conf = NULL; - - conf = this->private; - - this->private = NULL; - - /* No need to do inode_unref of conf->root_inode as ref/unref doesn't - * apply for it*/ - GF_FREE (conf); - - return; -} - - -struct xlator_fops fops = { - .lookup = qb_lookup, - .fsetxattr = qb_fsetxattr, - .setxattr = qb_setxattr, - .open = qb_open, - .writev = qb_writev, - .readv = qb_readv, - .fsync = qb_fsync, - .truncate = qb_truncate, - .ftruncate = qb_ftruncate, - .stat = qb_stat, - .fstat = qb_fstat, - .setattr = qb_setattr, - .fsetattr = qb_fsetattr, - .flush = qb_flush, -/* - .getxattr = qb_getxattr, - .fgetxattr = qb_fgetxattr -*/ - .readdirp = qb_readdirp, -}; - - -struct xlator_cbks cbks = { - .forget = qb_forget, - .release = qb_release, -}; - - -struct xlator_dumpops dumpops = { -}; - - -struct volume_options options[] = { - { .key = {"default-password"}, - .type = GF_OPTION_TYPE_STR, - .default_value = "", - .description = "Default password for the AES encrypted block images." - }, - { .key = {NULL} }, -}; diff --git a/xlators/features/qemu-block/src/qemu-block.h b/xlators/features/qemu-block/src/qemu-block.h deleted file mode 100644 index 21cdcec2613..00000000000 --- a/xlators/features/qemu-block/src/qemu-block.h +++ /dev/null @@ -1,109 +0,0 @@ -/* - Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#ifndef __QEMU_BLOCK_H -#define __QEMU_BLOCK_H - -#include "syncop.h" -#include "call-stub.h" -#include "block/block_int.h" -#include "monitor/monitor.h" - -/* QB_XATTR_KEY_FMT is the on-disk xattr stored in the inode which - indicates that the file must be "interpreted" by the block format - logic. The value of the key is of the pattern: - - "format:virtual_size" - - e.g - - "qcow2:20GB" or "qed:100GB" - - The format and virtual size are colon separated. The format is - a case sensitive string which qemu recognizes. virtual_size is - specified as a size which glusterfs recognizes as size (i.e., - value accepted by gf_string2bytesize()) -*/ -#define QB_XATTR_KEY_FMT "trusted.glusterfs.%s.format" - -#define QB_XATTR_KEY_MAX 64 - -#define QB_XATTR_VAL_MAX 64 - - -typedef struct qb_inode { - char fmt[QB_XATTR_VAL_MAX]; /* this is only the format, not "format:size" */ - uint64_t size; /* virtual size in bytes */ - BlockDriverState *bs; - int refcnt; - uuid_t backing_gfid; - char *backing_fname; -} qb_inode_t; - - -typedef struct qb_conf { - Monitor *mon; - struct syncenv *env; - char qb_xattr_key[QB_XATTR_KEY_MAX]; - char *default_password; - inode_t *root_inode; -} qb_conf_t; - - -typedef struct qb_local { - call_frame_t *frame; /* backpointer */ - call_stub_t *stub; - inode_t *inode; - fd_t *fd; - char fmt[QB_XATTR_VAL_MAX+1]; - char name[256]; - synctask_fn_t synctask_fn; - struct list_head list; -} qb_local_t; - -void qb_local_free (xlator_t *this, qb_local_t *local); -int qb_coroutine (call_frame_t *frame, synctask_fn_t fn); -inode_t *qb_inode_from_filename (const char *filename); -int qb_inode_to_filename (inode_t *inode, char *filename, int size); -int qb_format_extract (xlator_t *this, char *format, inode_t *inode); - -qb_inode_t *qb_inode_ctx_get (xlator_t *this, inode_t *inode); - -#define QB_STACK_UNWIND(typ, frame, args ...) do { \ - qb_local_t *__local = frame->local; \ - xlator_t *__this = frame->this; \ - \ - frame->local = NULL; \ - STACK_UNWIND_STRICT (typ, frame, args); \ - if (__local) \ - qb_local_free (__this, __local); \ - } while (0) - -#define QB_STUB_UNWIND(stub, op_ret, op_errno) do { \ - qb_local_t *__local = stub->frame->local; \ - xlator_t *__this = stub->frame->this; \ - \ - stub->frame->local = NULL; \ - call_unwind_error (stub, op_ret, op_errno); \ - if (__local) \ - qb_local_free (__this, __local); \ - } while (0) - -#define QB_STUB_RESUME(stub_errno) do { \ - qb_local_t *__local = stub->frame->local; \ - xlator_t *__this = stub->frame->this; \ - \ - stub->frame->local = NULL; \ - call_resume (stub); \ - if (__local) \ - qb_local_free (__this, __local); \ - } while (0) - -#endif /* !__QEMU_BLOCK_H */ diff --git a/xlators/features/quiesce/src/Makefile.am b/xlators/features/quiesce/src/Makefile.am index 862ca76f3e5..74ea999c045 100644 --- a/xlators/features/quiesce/src/Makefile.am +++ b/xlators/features/quiesce/src/Makefile.am @@ -1,14 +1,15 @@ xlator_LTLIBRARIES = quiesce.la xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features -quiesce_la_LDFLAGS = $(GF_XLATOR_DEFAULT_LDFLAGS) +quiesce_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) quiesce_la_SOURCES = quiesce.c quiesce_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la -noinst_HEADERS = quiesce.h quiesce-mem-types.h +noinst_HEADERS = quiesce.h quiesce-mem-types.h quiesce-messages.h -AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ + -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src AM_CFLAGS = -Wall $(GF_CFLAGS) diff --git a/xlators/features/quiesce/src/quiesce-mem-types.h b/xlators/features/quiesce/src/quiesce-mem-types.h index 6e582f424ea..416456b13af 100644 --- a/xlators/features/quiesce/src/quiesce-mem-types.h +++ b/xlators/features/quiesce/src/quiesce-mem-types.h @@ -11,10 +11,11 @@ #ifndef __QUIESCE_MEM_TYPES_H__ #define __QUIESCE_MEM_TYPES_H__ -#include "mem-types.h" +#include <glusterfs/mem-types.h> enum gf_quiesce_mem_types_ { - gf_quiesce_mt_priv_t = gf_common_mt_end + 1, - gf_quiesce_mt_end + gf_quiesce_mt_priv_t = gf_common_mt_end + 1, + gf_quiesce_mt_failover_hosts, + gf_quiesce_mt_end }; #endif diff --git a/xlators/features/quiesce/src/quiesce-messages.h b/xlators/features/quiesce/src/quiesce-messages.h new file mode 100644 index 00000000000..32ffd409807 --- /dev/null +++ b/xlators/features/quiesce/src/quiesce-messages.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> + * This file is part of GlusterFS. + * + * This file is licensed to you under your choice of the GNU Lesser + * General Public License, version 3 or any later version (LGPLv3 or + * later), or the GNU General Public License, version 2 (GPLv2), in all + * cases as published by the Free Software Foundation. + */ + +#ifndef __QUIESCE_MESSAGES_H__ +#define __QUIESCE_MESSAGES_H__ + +#include <glusterfs/glfs-message-id.h> + +/* To add new message IDs, append new identifiers at the end of the list. + * + * Never remove a message ID. If it's not used anymore, you can rename it or + * leave it as it is, but not delete it. This is to prevent reutilization of + * IDs by other messages. + * + * The component name must match one of the entries defined in + * glfs-message-id.h. + */ + +GLFS_MSGID(QUIESCE, QUIESCE_MSG_INVAL_HOST, QUIESCE_MSG_FAILOVER_FAILED); + +#endif /* __NL_CACHE_MESSAGES_H__ */ diff --git a/xlators/features/quiesce/src/quiesce.c b/xlators/features/quiesce/src/quiesce.c index 3a4100f796e..0e5eb60a16f 100644 --- a/xlators/features/quiesce/src/quiesce.c +++ b/xlators/features/quiesce/src/quiesce.c @@ -8,663 +8,814 @@ cases as published by the Free Software Foundation. */ #include "quiesce.h" -#include "defaults.h" -#include "call-stub.h" +#include <glusterfs/defaults.h> +#include <glusterfs/call-stub.h> /* TODO: */ /* Think about 'writev/_*_lk/setattr/xattrop/' fops to do re-transmittion */ +void +gf_quiesce_timeout(void *data); /* Quiesce Specific Functions */ void -gf_quiesce_local_wipe (xlator_t *this, quiesce_local_t *local) +gf_quiesce_local_wipe(xlator_t *this, quiesce_local_t *local) { - if (!local || !this || !this->private) - return; + if (!local || !this || !this->private) + return; - if (local->loc.inode) - loc_wipe (&local->loc); - if (local->fd) - fd_unref (local->fd); - GF_FREE (local->name); - GF_FREE (local->volname); - if (local->dict) - dict_unref (local->dict); - if (local->iobref) - iobref_unref (local->iobref); - GF_FREE (local->vector); + if (local->loc.inode) + loc_wipe(&local->loc); + if (local->fd) + fd_unref(local->fd); + GF_FREE(local->name); + GF_FREE(local->volname); + if (local->dict) + dict_unref(local->dict); + if (local->iobref) + iobref_unref(local->iobref); + GF_FREE(local->vector); - mem_put (local); + mem_put(local); } -call_stub_t * -gf_quiesce_dequeue (xlator_t *this) +void +__gf_quiesce_start_timer(xlator_t *this, quiesce_priv_t *priv) { - call_stub_t *stub = NULL; - quiesce_priv_t *priv = NULL; - - priv = this->private; + struct timespec timeout = { + 0, + }; - if (!priv || list_empty (&priv->req)) - return NULL; + if (!priv->timer) { + timeout.tv_sec = priv->timeout; + timeout.tv_nsec = 0; - LOCK (&priv->lock); - { - stub = list_entry (priv->req.next, call_stub_t, list); - list_del_init (&stub->list); - priv->queue_size--; + priv->timer = gf_timer_call_after(this->ctx, timeout, + gf_quiesce_timeout, (void *)this); + if (priv->timer == NULL) { + gf_log(this->name, GF_LOG_ERROR, "Cannot create timer"); } - UNLOCK (&priv->lock); + } +} + +static void +__gf_quiesce_cleanup_failover_hosts(xlator_t *this, quiesce_priv_t *priv) +{ + quiesce_failover_hosts_t *tmp = NULL; + quiesce_failover_hosts_t *failover_host = NULL; - return stub; + list_for_each_entry_safe(failover_host, tmp, &priv->failover_list, list) + { + GF_FREE(failover_host->addr); + list_del(&failover_host->list); + GF_FREE(failover_host); + } + return; } -void * -gf_quiesce_dequeue_start (void *data) +void +gf_quiesce_populate_failover_hosts(xlator_t *this, quiesce_priv_t *priv, + const char *value) +{ + char *dup_val = NULL; + char *addr_tok = NULL; + char *save_ptr = NULL; + quiesce_failover_hosts_t *failover_host = NULL; + + if (!value) + goto out; + + dup_val = gf_strdup(value); + if (!dup_val) + goto out; + + addr_tok = strtok_r(dup_val, ",", &save_ptr); + LOCK(&priv->lock); + { + if (!list_empty(&priv->failover_list)) + __gf_quiesce_cleanup_failover_hosts(this, priv); + + while (addr_tok) { + if (!valid_internet_address(addr_tok, _gf_true, _gf_false)) { + gf_msg(this->name, GF_LOG_INFO, 0, QUIESCE_MSG_INVAL_HOST, + "Specified " + "invalid internet address:%s", + addr_tok); + continue; + } + failover_host = GF_CALLOC(1, sizeof(*failover_host), + gf_quiesce_mt_failover_hosts); + failover_host->addr = gf_strdup(addr_tok); + INIT_LIST_HEAD(&failover_host->list); + list_add(&failover_host->list, &priv->failover_list); + addr_tok = strtok_r(NULL, ",", &save_ptr); + } + } + UNLOCK(&priv->lock); + GF_FREE(dup_val); +out: + return; +} + +int32_t +gf_quiesce_failover_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - xlator_t *this = NULL; - quiesce_priv_t *priv = NULL; - call_stub_t *stub = NULL; + quiesce_priv_t *priv = NULL; - this = data; - priv = this->private; - THIS = this; + if (op_ret < 0) { + /* Failure here doesn't mean the failover to another host didn't + * succeed, we will know if failover succeeds or not by the + * CHILD_UP/CHILD_DOWN event. A failure here indicates something + * went wrong with the submission of failover command, hence + * just abort the failover attempts without retrying with other + * hosts. + */ + gf_msg(this->name, GF_LOG_INFO, op_errno, QUIESCE_MSG_FAILOVER_FAILED, + "Initiating failover to host:%s failed:", (char *)cookie); + } - while (!list_empty (&priv->req)) { - stub = gf_quiesce_dequeue (this); - if (stub) { - call_resume (stub); - } - } + GF_FREE(cookie); + STACK_DESTROY(frame->root); - return 0; + priv = this->private; + __gf_quiesce_start_timer(this, priv); + + return 0; } +int +__gf_quiesce_perform_failover(xlator_t *this) +{ + int ret = 0; + call_frame_t *frame = NULL; + dict_t *dict = NULL; + quiesce_priv_t *priv = NULL; + quiesce_failover_hosts_t *failover_host = NULL; + quiesce_failover_hosts_t *host = NULL; + + priv = this->private; + + if (priv->pass_through) { + gf_msg_trace(this->name, 0, + "child is up, hence not " + "performing any failover"); + goto out; + } + + list_for_each_entry(failover_host, &priv->failover_list, list) + { + if (failover_host->tried == 0) { + host = failover_host; + failover_host->tried = 1; + break; + } + } + if (!host) { + /*TODO: Keep trying until any of the gfproxy comes back up. + Currently it tries failing over once for each host, + if it doesn't succeed then returns error to mount point + list_for_each_entry (failover_host, + &priv->failover_list, list) { + failover_host->tried = 0; + }*/ + gf_msg_debug(this->name, 0, + "all the failover hosts have " + "been tried and looks like didn't succeed"); + ret = -1; + goto out; + } + + frame = create_frame(this, this->ctx->pool); + if (!frame) { + gf_msg_debug(this->name, 0, "failed to create the frame"); + ret = -1; + goto out; + } + + dict = dict_new(); + + ret = dict_set_dynstr(dict, CLIENT_CMD_CONNECT, gf_strdup(host->addr)); + + gf_msg_trace(this->name, 0, "Initiating failover to:%s", host->addr); + + STACK_WIND_COOKIE(frame, gf_quiesce_failover_cbk, NULL, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, NULL, dict, 0, NULL); +out: -void -gf_quiesce_timeout (void *data) + if (dict) + dict_unref(dict); + + return ret; +} + +call_stub_t * +gf_quiesce_dequeue(xlator_t *this) { - xlator_t *this = NULL; - quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + quiesce_priv_t *priv = NULL; - this = data; - priv = this->private; - THIS = this; + priv = this->private; - LOCK (&priv->lock); - { - priv->pass_through = _gf_true; - } - UNLOCK (&priv->lock); + if (!priv || list_empty(&priv->req)) + return NULL; - gf_quiesce_dequeue_start (this); + LOCK(&priv->lock); + { + stub = list_entry(priv->req.next, call_stub_t, list); + list_del_init(&stub->list); + priv->queue_size--; + } + UNLOCK(&priv->lock); - return; + return stub; } -void -gf_quiesce_enqueue (xlator_t *this, call_stub_t *stub) +void * +gf_quiesce_dequeue_start(void *data) { - quiesce_priv_t *priv = NULL; - struct timespec timeout = {0,}; + xlator_t *this = NULL; + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; - priv = this->private; - if (!priv) { - gf_log_callingfn (this->name, GF_LOG_ERROR, - "this->private == NULL"); - return; - } + this = data; + priv = this->private; + THIS = this; - LOCK (&priv->lock); - { - list_add_tail (&stub->list, &priv->req); - priv->queue_size++; + while (!list_empty(&priv->req)) { + stub = gf_quiesce_dequeue(this); + if (stub) { + call_resume(stub); } - UNLOCK (&priv->lock); + } - if (!priv->timer) { - timeout.tv_sec = 20; - timeout.tv_nsec = 0; + return 0; +} + +void +gf_quiesce_timeout(void *data) +{ + xlator_t *this = NULL; + quiesce_priv_t *priv = NULL; + int ret = -1; + + this = data; + priv = this->private; + THIS = this; - priv->timer = gf_timer_call_after (this->ctx, - timeout, - gf_quiesce_timeout, - (void *) this); + LOCK(&priv->lock); + { + priv->timer = NULL; + if (priv->pass_through) { + UNLOCK(&priv->lock); + goto out; } + ret = __gf_quiesce_perform_failover(THIS); + } + UNLOCK(&priv->lock); - return; + if (ret < 0) { + priv->pass_through = _gf_true; + gf_quiesce_dequeue_start(this); + } + +out: + return; } +void +gf_quiesce_enqueue(xlator_t *this, call_stub_t *stub) +{ + quiesce_priv_t *priv = NULL; + priv = this->private; + if (!priv) { + gf_log_callingfn(this->name, GF_LOG_ERROR, "this->private == NULL"); + return; + } + + LOCK(&priv->lock); + { + list_add_tail(&stub->list, &priv->req); + priv->queue_size++; + __gf_quiesce_start_timer(this, priv); + } + UNLOCK(&priv->lock); + + return; +} /* _CBK function section */ int32_t -quiesce_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *dict, struct iatt *postparent) +quiesce_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *dict, struct iatt *postparent) { - call_stub_t *stub = NULL; - quiesce_local_t *local = NULL; - - local = frame->local; - frame->local = NULL; - if ((op_ret == -1) && (op_errno == ENOTCONN)) { - /* Re-transmit (by putting in the queue) */ - stub = fop_lookup_stub (frame, default_lookup_resume, - &local->loc, local->dict); - if (!stub) { - STACK_UNWIND_STRICT (lookup, frame, -1, ENOMEM, - NULL, NULL, NULL, NULL); - goto out; - } + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; - gf_quiesce_enqueue (this, stub); - goto out; + local = frame->local; + frame->local = NULL; + if ((op_ret == -1) && (op_errno == ENOTCONN)) { + /* Re-transmit (by putting in the queue) */ + stub = fop_lookup_stub(frame, default_lookup_resume, &local->loc, + local->dict); + if (!stub) { + STACK_UNWIND_STRICT(lookup, frame, -1, ENOMEM, NULL, NULL, NULL, + NULL); + goto out; } - STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf, - dict, postparent); + gf_quiesce_enqueue(this, stub); + goto out; + } + + STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, dict, + postparent); out: - gf_quiesce_local_wipe (this, local); + gf_quiesce_local_wipe(this, local); - return 0; + return 0; } int32_t -quiesce_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, - dict_t *xdata) +quiesce_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + dict_t *xdata) { - call_stub_t *stub = NULL; - quiesce_local_t *local = NULL; - - local = frame->local; - frame->local = NULL; - if ((op_ret == -1) && (op_errno == ENOTCONN)) { - /* Re-transmit (by putting in the queue) */ - stub = fop_stat_stub (frame, default_stat_resume, - &local->loc, xdata); - if (!stub) { - STACK_UNWIND_STRICT (stat, frame, -1, ENOMEM, - NULL, NULL); - goto out; - } + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; - gf_quiesce_enqueue (this, stub); - goto out; + local = frame->local; + frame->local = NULL; + if ((op_ret == -1) && (op_errno == ENOTCONN)) { + /* Re-transmit (by putting in the queue) */ + stub = fop_stat_stub(frame, default_stat_resume, &local->loc, xdata); + if (!stub) { + STACK_UNWIND_STRICT(stat, frame, -1, ENOMEM, NULL, NULL); + goto out; } - STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf, xdata); + gf_quiesce_enqueue(this, stub); + goto out; + } + + STACK_UNWIND_STRICT(stat, frame, op_ret, op_errno, buf, xdata); out: - gf_quiesce_local_wipe (this, local); + gf_quiesce_local_wipe(this, local); - return 0; + return 0; } int32_t -quiesce_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +quiesce_access_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - call_stub_t *stub = NULL; - quiesce_local_t *local = NULL; - - local = frame->local; - frame->local = NULL; - if ((op_ret == -1) && (op_errno == ENOTCONN)) { - /* Re-transmit (by putting in the queue) */ - stub = fop_access_stub (frame, default_access_resume, - &local->loc, local->flag, xdata); - if (!stub) { - STACK_UNWIND_STRICT (access, frame, -1, ENOMEM, NULL); - goto out; - } + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; - gf_quiesce_enqueue (this, stub); - goto out; + local = frame->local; + frame->local = NULL; + if ((op_ret == -1) && (op_errno == ENOTCONN)) { + /* Re-transmit (by putting in the queue) */ + stub = fop_access_stub(frame, default_access_resume, &local->loc, + local->flag, xdata); + if (!stub) { + STACK_UNWIND_STRICT(access, frame, -1, ENOMEM, NULL); + goto out; } - STACK_UNWIND_STRICT (access, frame, op_ret, op_errno, xdata); + gf_quiesce_enqueue(this, stub); + goto out; + } + + STACK_UNWIND_STRICT(access, frame, op_ret, op_errno, xdata); out: - gf_quiesce_local_wipe (this, local); + gf_quiesce_local_wipe(this, local); - return 0; + return 0; } int32_t -quiesce_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, const char *path, - struct iatt *buf, dict_t *xdata) +quiesce_readlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, const char *path, + struct iatt *buf, dict_t *xdata) { - call_stub_t *stub = NULL; - quiesce_local_t *local = NULL; - - local = frame->local; - frame->local = NULL; - if ((op_ret == -1) && (op_errno == ENOTCONN)) { - /* Re-transmit (by putting in the queue) */ - stub = fop_readlink_stub (frame, default_readlink_resume, - &local->loc, local->size, xdata); - if (!stub) { - STACK_UNWIND_STRICT (readlink, frame, -1, ENOMEM, - NULL, NULL, NULL); - goto out; - } + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; - gf_quiesce_enqueue (this, stub); - goto out; + local = frame->local; + frame->local = NULL; + if ((op_ret == -1) && (op_errno == ENOTCONN)) { + /* Re-transmit (by putting in the queue) */ + stub = fop_readlink_stub(frame, default_readlink_resume, &local->loc, + local->size, xdata); + if (!stub) { + STACK_UNWIND_STRICT(readlink, frame, -1, ENOMEM, NULL, NULL, NULL); + goto out; } - STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, path, buf, xdata); + gf_quiesce_enqueue(this, stub); + goto out; + } + + STACK_UNWIND_STRICT(readlink, frame, op_ret, op_errno, path, buf, xdata); out: - gf_quiesce_local_wipe (this, local); + gf_quiesce_local_wipe(this, local); - return 0; + return 0; } int32_t -quiesce_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) +quiesce_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) { - call_stub_t *stub = NULL; - quiesce_local_t *local = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; - local = frame->local; - frame->local = NULL; - if ((op_ret == -1) && (op_errno == ENOTCONN)) { - /* Re-transmit (by putting in the queue) */ - stub = fop_open_stub (frame, default_open_resume, - &local->loc, local->flag, local->fd, - xdata); - if (!stub) { - STACK_UNWIND_STRICT (open, frame, -1, ENOMEM, - NULL, NULL); - goto out; - } - - gf_quiesce_enqueue (this, stub); - goto out; + local = frame->local; + frame->local = NULL; + if ((op_ret == -1) && (op_errno == ENOTCONN)) { + /* Re-transmit (by putting in the queue) */ + stub = fop_open_stub(frame, default_open_resume, &local->loc, + local->flag, local->fd, xdata); + if (!stub) { + STACK_UNWIND_STRICT(open, frame, -1, ENOMEM, NULL, NULL); + goto out; } - STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata); + gf_quiesce_enqueue(this, stub); + goto out; + } + + STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, xdata); out: - gf_quiesce_local_wipe (this, local); + gf_quiesce_local_wipe(this, local); - return 0; + return 0; } int32_t -quiesce_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iovec *vector, - int32_t count, struct iatt *stbuf, struct iobref *iobref, dict_t *xdata) +quiesce_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iovec *vector, + int32_t count, struct iatt *stbuf, struct iobref *iobref, + dict_t *xdata) { - call_stub_t *stub = NULL; - quiesce_local_t *local = NULL; - - local = frame->local; - frame->local = NULL; - if ((op_ret == -1) && (op_errno == ENOTCONN)) { - /* Re-transmit (by putting in the queue) */ - stub = fop_readv_stub (frame, default_readv_resume, - local->fd, local->size, local->offset, - local->io_flag, xdata); - if (!stub) { - STACK_UNWIND_STRICT (readv, frame, -1, ENOMEM, - NULL, 0, NULL, NULL, NULL); - goto out; - } + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; - gf_quiesce_enqueue (this, stub); - goto out; + local = frame->local; + frame->local = NULL; + if ((op_ret == -1) && (op_errno == ENOTCONN)) { + /* Re-transmit (by putting in the queue) */ + stub = fop_readv_stub(frame, default_readv_resume, local->fd, + local->size, local->offset, local->io_flag, + xdata); + if (!stub) { + STACK_UNWIND_STRICT(readv, frame, -1, ENOMEM, NULL, 0, NULL, NULL, + NULL); + goto out; } - STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count, - stbuf, iobref, xdata); + gf_quiesce_enqueue(this, stub); + goto out; + } + + STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, vector, count, stbuf, + iobref, xdata); out: - gf_quiesce_local_wipe (this, local); + gf_quiesce_local_wipe(this, local); - return 0; + return 0; } int32_t -quiesce_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +quiesce_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - call_stub_t *stub = NULL; - quiesce_local_t *local = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; - local = frame->local; - frame->local = NULL; - if ((op_ret == -1) && (op_errno == ENOTCONN)) { - /* Re-transmit (by putting in the queue) */ - stub = fop_flush_stub (frame, default_flush_resume, - local->fd, xdata); - if (!stub) { - STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM, NULL); - goto out; - } - - gf_quiesce_enqueue (this, stub); - goto out; + local = frame->local; + frame->local = NULL; + if ((op_ret == -1) && (op_errno == ENOTCONN)) { + /* Re-transmit (by putting in the queue) */ + stub = fop_flush_stub(frame, default_flush_resume, local->fd, xdata); + if (!stub) { + STACK_UNWIND_STRICT(flush, frame, -1, ENOMEM, NULL); + goto out; } - STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, xdata); + gf_quiesce_enqueue(this, stub); + goto out; + } + + STACK_UNWIND_STRICT(flush, frame, op_ret, op_errno, xdata); out: - gf_quiesce_local_wipe (this, local); + gf_quiesce_local_wipe(this, local); - return 0; + return 0; } - - int32_t -quiesce_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) +quiesce_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - call_stub_t *stub = NULL; - quiesce_local_t *local = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; - local = frame->local; - frame->local = NULL; - if ((op_ret == -1) && (op_errno == ENOTCONN)) { - /* Re-transmit (by putting in the queue) */ - stub = fop_fsync_stub (frame, default_fsync_resume, - local->fd, local->flag, xdata); - if (!stub) { - STACK_UNWIND_STRICT (fsync, frame, -1, ENOMEM, - NULL, NULL, NULL); - goto out; - } - - gf_quiesce_enqueue (this, stub); - goto out; + local = frame->local; + frame->local = NULL; + if ((op_ret == -1) && (op_errno == ENOTCONN)) { + /* Re-transmit (by putting in the queue) */ + stub = fop_fsync_stub(frame, default_fsync_resume, local->fd, + local->flag, xdata); + if (!stub) { + STACK_UNWIND_STRICT(fsync, frame, -1, ENOMEM, NULL, NULL, NULL); + goto out; } - STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata); + gf_quiesce_enqueue(this, stub); + goto out; + } + + STACK_UNWIND_STRICT(fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata); out: - gf_quiesce_local_wipe (this, local); + gf_quiesce_local_wipe(this, local); - return 0; + return 0; } int32_t -quiesce_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata) +quiesce_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + dict_t *xdata) { - call_stub_t *stub = NULL; - quiesce_local_t *local = NULL; - - local = frame->local; - frame->local = NULL; - if ((op_ret == -1) && (op_errno == ENOTCONN)) { - /* Re-transmit (by putting in the queue) */ - stub = fop_fstat_stub (frame, default_fstat_resume, - local->fd, xdata); - if (!stub) { - STACK_UNWIND_STRICT (fstat, frame, -1, ENOMEM, - NULL, NULL); - goto out; - } + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; - gf_quiesce_enqueue (this, stub); - goto out; + local = frame->local; + frame->local = NULL; + if ((op_ret == -1) && (op_errno == ENOTCONN)) { + /* Re-transmit (by putting in the queue) */ + stub = fop_fstat_stub(frame, default_fstat_resume, local->fd, xdata); + if (!stub) { + STACK_UNWIND_STRICT(fstat, frame, -1, ENOMEM, NULL, NULL); + goto out; } - STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf, xdata); + gf_quiesce_enqueue(this, stub); + goto out; + } + + STACK_UNWIND_STRICT(fstat, frame, op_ret, op_errno, buf, xdata); out: - gf_quiesce_local_wipe (this, local); + gf_quiesce_local_wipe(this, local); - return 0; + return 0; } int32_t -quiesce_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) +quiesce_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) { - call_stub_t *stub = NULL; - quiesce_local_t *local = NULL; - - local = frame->local; - frame->local = NULL; - if ((op_ret == -1) && (op_errno == ENOTCONN)) { - /* Re-transmit (by putting in the queue) */ - stub = fop_opendir_stub (frame, default_opendir_resume, - &local->loc, local->fd, xdata); - if (!stub) { - STACK_UNWIND_STRICT (opendir, frame, -1, ENOMEM, - NULL, NULL); - goto out; - } + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; - gf_quiesce_enqueue (this, stub); - goto out; + local = frame->local; + frame->local = NULL; + if ((op_ret == -1) && (op_errno == ENOTCONN)) { + /* Re-transmit (by putting in the queue) */ + stub = fop_opendir_stub(frame, default_opendir_resume, &local->loc, + local->fd, xdata); + if (!stub) { + STACK_UNWIND_STRICT(opendir, frame, -1, ENOMEM, NULL, NULL); + goto out; } - STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, xdata); + gf_quiesce_enqueue(this, stub); + goto out; + } + + STACK_UNWIND_STRICT(opendir, frame, op_ret, op_errno, fd, xdata); out: - gf_quiesce_local_wipe (this, local); + gf_quiesce_local_wipe(this, local); - return 0; + return 0; } int32_t -quiesce_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +quiesce_fsyncdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - call_stub_t *stub = NULL; - quiesce_local_t *local = NULL; - - local = frame->local; - frame->local = NULL; - if ((op_ret == -1) && (op_errno == ENOTCONN)) { - /* Re-transmit (by putting in the queue) */ - stub = fop_fsyncdir_stub (frame, default_fsyncdir_resume, - local->fd, local->flag, xdata); - if (!stub) { - STACK_UNWIND_STRICT (fsyncdir, frame, -1, ENOMEM, NULL); - goto out; - } + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; - gf_quiesce_enqueue (this, stub); - goto out; + local = frame->local; + frame->local = NULL; + if ((op_ret == -1) && (op_errno == ENOTCONN)) { + /* Re-transmit (by putting in the queue) */ + stub = fop_fsyncdir_stub(frame, default_fsyncdir_resume, local->fd, + local->flag, xdata); + if (!stub) { + STACK_UNWIND_STRICT(fsyncdir, frame, -1, ENOMEM, NULL); + goto out; } - STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno, xdata); + gf_quiesce_enqueue(this, stub); + goto out; + } + + STACK_UNWIND_STRICT(fsyncdir, frame, op_ret, op_errno, xdata); out: - gf_quiesce_local_wipe (this, local); + gf_quiesce_local_wipe(this, local); - return 0; + return 0; } int32_t -quiesce_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct statvfs *buf, dict_t *xdata) +quiesce_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct statvfs *buf, + dict_t *xdata) { - call_stub_t *stub = NULL; - quiesce_local_t *local = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; - local = frame->local; - frame->local = NULL; - if ((op_ret == -1) && (op_errno == ENOTCONN)) { - /* Re-transmit (by putting in the queue) */ - stub = fop_statfs_stub (frame, default_statfs_resume, - &local->loc, xdata); - if (!stub) { - STACK_UNWIND_STRICT (statfs, frame, -1, ENOMEM, - NULL, NULL); - goto out; - } - - gf_quiesce_enqueue (this, stub); - goto out; + local = frame->local; + frame->local = NULL; + if ((op_ret == -1) && (op_errno == ENOTCONN)) { + /* Re-transmit (by putting in the queue) */ + stub = fop_statfs_stub(frame, default_statfs_resume, &local->loc, + xdata); + if (!stub) { + STACK_UNWIND_STRICT(statfs, frame, -1, ENOMEM, NULL, NULL); + goto out; } - STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf, xdata); + gf_quiesce_enqueue(this, stub); + goto out; + } + + STACK_UNWIND_STRICT(statfs, frame, op_ret, op_errno, buf, xdata); out: - gf_quiesce_local_wipe (this, local); + gf_quiesce_local_wipe(this, local); - return 0; + return 0; } int32_t -quiesce_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata) +quiesce_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata) { - call_stub_t *stub = NULL; - quiesce_local_t *local = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; - local = frame->local; - frame->local = NULL; - if ((op_ret == -1) && (op_errno == ENOTCONN)) { - /* Re-transmit (by putting in the queue) */ - stub = fop_fgetxattr_stub (frame, default_fgetxattr_resume, - local->fd, local->name, xdata); - if (!stub) { - STACK_UNWIND_STRICT (fgetxattr, frame, -1, ENOMEM, - NULL, NULL); - goto out; - } - - gf_quiesce_enqueue (this, stub); - goto out; + local = frame->local; + frame->local = NULL; + if ((op_ret == -1) && (op_errno == ENOTCONN)) { + /* Re-transmit (by putting in the queue) */ + stub = fop_fgetxattr_stub(frame, default_fgetxattr_resume, local->fd, + local->name, xdata); + if (!stub) { + STACK_UNWIND_STRICT(fgetxattr, frame, -1, ENOMEM, NULL, NULL); + goto out; } - STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, xdata); + gf_quiesce_enqueue(this, stub); + goto out; + } + + STACK_UNWIND_STRICT(fgetxattr, frame, op_ret, op_errno, dict, xdata); out: - gf_quiesce_local_wipe (this, local); + gf_quiesce_local_wipe(this, local); - return 0; + return 0; } - int32_t -quiesce_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata) +quiesce_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata) { - call_stub_t *stub = NULL; - quiesce_local_t *local = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; - local = frame->local; - frame->local = NULL; - if ((op_ret == -1) && (op_errno == ENOTCONN)) { - /* Re-transmit (by putting in the queue) */ - stub = fop_getxattr_stub (frame, default_getxattr_resume, - &local->loc, local->name, xdata); - if (!stub) { - STACK_UNWIND_STRICT (getxattr, frame, -1, ENOMEM, - NULL, NULL); - goto out; - } - - gf_quiesce_enqueue (this, stub); - goto out; + local = frame->local; + frame->local = NULL; + if ((op_ret == -1) && (op_errno == ENOTCONN)) { + /* Re-transmit (by putting in the queue) */ + stub = fop_getxattr_stub(frame, default_getxattr_resume, &local->loc, + local->name, xdata); + if (!stub) { + STACK_UNWIND_STRICT(getxattr, frame, -1, ENOMEM, NULL, NULL); + goto out; } - STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata); + gf_quiesce_enqueue(this, stub); + goto out; + } + + STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, dict, xdata); out: - gf_quiesce_local_wipe (this, local); + gf_quiesce_local_wipe(this, local); - return 0; + return 0; } - int32_t -quiesce_rchecksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, uint32_t weak_checksum, - uint8_t *strong_checksum, dict_t *xdata) +quiesce_rchecksum_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, uint32_t weak_checksum, + uint8_t *strong_checksum, dict_t *xdata) { - call_stub_t *stub = NULL; - quiesce_local_t *local = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; - local = frame->local; - frame->local = NULL; - if ((op_ret == -1) && (op_errno == ENOTCONN)) { - /* Re-transmit (by putting in the queue) */ - stub = fop_rchecksum_stub (frame, default_rchecksum_resume, - local->fd, local->offset, local->flag, xdata); - if (!stub) { - STACK_UNWIND_STRICT (rchecksum, frame, -1, ENOMEM, - 0, NULL, NULL); - goto out; - } - - gf_quiesce_enqueue (this, stub); - goto out; + local = frame->local; + frame->local = NULL; + if ((op_ret == -1) && (op_errno == ENOTCONN)) { + /* Re-transmit (by putting in the queue) */ + stub = fop_rchecksum_stub(frame, default_rchecksum_resume, local->fd, + local->offset, local->flag, xdata); + if (!stub) { + STACK_UNWIND_STRICT(rchecksum, frame, -1, ENOMEM, 0, NULL, NULL); + goto out; } - STACK_UNWIND_STRICT (rchecksum, frame, op_ret, op_errno, weak_checksum, - strong_checksum, xdata); + gf_quiesce_enqueue(this, stub); + goto out; + } + + STACK_UNWIND_STRICT(rchecksum, frame, op_ret, op_errno, weak_checksum, + strong_checksum, xdata); out: - gf_quiesce_local_wipe (this, local); + gf_quiesce_local_wipe(this, local); - return 0; + return 0; } - int32_t -quiesce_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, dict_t *xdata) +quiesce_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, + dict_t *xdata) { - call_stub_t *stub = NULL; - quiesce_local_t *local = NULL; - - local = frame->local; - frame->local = NULL; - if ((op_ret == -1) && (op_errno == ENOTCONN)) { - /* Re-transmit (by putting in the queue) */ - stub = fop_readdir_stub (frame, default_readdir_resume, - local->fd, local->size, local->offset, xdata); - if (!stub) { - STACK_UNWIND_STRICT (readdir, frame, -1, ENOMEM, - NULL, NULL); - goto out; - } + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; - gf_quiesce_enqueue (this, stub); - goto out; + local = frame->local; + frame->local = NULL; + if ((op_ret == -1) && (op_errno == ENOTCONN)) { + /* Re-transmit (by putting in the queue) */ + stub = fop_readdir_stub(frame, default_readdir_resume, local->fd, + local->size, local->offset, xdata); + if (!stub) { + STACK_UNWIND_STRICT(readdir, frame, -1, ENOMEM, NULL, NULL); + goto out; } - STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, entries, xdata); + gf_quiesce_enqueue(this, stub); + goto out; + } + + STACK_UNWIND_STRICT(readdir, frame, op_ret, op_errno, entries, xdata); out: - gf_quiesce_local_wipe (this, local); + gf_quiesce_local_wipe(this, local); - return 0; + return 0; } - int32_t -quiesce_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, dict_t *xdata) +quiesce_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, + dict_t *xdata) { - call_stub_t *stub = NULL; - quiesce_local_t *local = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; - local = frame->local; - frame->local = NULL; - if ((op_ret == -1) && (op_errno == ENOTCONN)) { - /* Re-transmit (by putting in the queue) */ - stub = fop_readdirp_stub (frame, default_readdirp_resume, - local->fd, local->size, local->offset, - local->dict); - if (!stub) { - STACK_UNWIND_STRICT (readdirp, frame, -1, ENOMEM, - NULL, NULL); - goto out; - } - - gf_quiesce_enqueue (this, stub); - goto out; + local = frame->local; + frame->local = NULL; + if ((op_ret == -1) && (op_errno == ENOTCONN)) { + /* Re-transmit (by putting in the queue) */ + stub = fop_readdirp_stub(frame, default_readdirp_resume, local->fd, + local->size, local->offset, local->dict); + if (!stub) { + STACK_UNWIND_STRICT(readdirp, frame, -1, ENOMEM, NULL, NULL); + goto out; } - STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata); + gf_quiesce_enqueue(this, stub); + goto out; + } + + STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, entries, xdata); out: - gf_quiesce_local_wipe (this, local); + gf_quiesce_local_wipe(this, local); - return 0; + return 0; } - #if 0 int32_t @@ -1010,1596 +1161,1544 @@ out: #endif /* if 0 */ - /* FOP */ /* No retransmittion */ int32_t -quiesce_removexattr (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - const char *name, dict_t *xdata) +quiesce_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) { - quiesce_priv_t *priv = NULL; - call_stub_t *stub = NULL; + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; - priv = this->private; + priv = this->private; - if (priv->pass_through) { - STACK_WIND (frame, - default_removexattr_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->removexattr, - loc, - name, xdata); - return 0; - } + if (priv->pass_through) { + STACK_WIND(frame, default_removexattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, loc, name, xdata); + return 0; + } - stub = fop_removexattr_stub (frame, default_removexattr_resume, - loc, name, xdata); - if (!stub) { - STACK_UNWIND_STRICT (removexattr, frame, -1, ENOMEM, NULL); - return 0; - } + stub = fop_removexattr_stub(frame, default_removexattr_resume, loc, name, + xdata); + if (!stub) { + STACK_UNWIND_STRICT(removexattr, frame, -1, ENOMEM, NULL); + return 0; + } - gf_quiesce_enqueue (this, stub); + gf_quiesce_enqueue(this, stub); - return 0; + return 0; } int32_t -quiesce_truncate (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - off_t offset, dict_t *xdata) +quiesce_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *name, dict_t *xdata) { - quiesce_priv_t *priv = NULL; - call_stub_t *stub = NULL; + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; - priv = this->private; + priv = this->private; - if (priv->pass_through) { - STACK_WIND (frame, - default_truncate_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->truncate, - loc, - offset, xdata); - return 0; - } + if (priv->pass_through) { + STACK_WIND(frame, default_fremovexattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata); + return 0; + } - stub = fop_truncate_stub (frame, default_truncate_resume, loc, offset, xdata); - if (!stub) { - STACK_UNWIND_STRICT (truncate, frame, -1, ENOMEM, NULL, NULL, NULL); - return 0; - } + stub = fop_fremovexattr_stub(frame, default_fremovexattr_resume, fd, name, + xdata); + if (!stub) { + STACK_UNWIND_STRICT(fremovexattr, frame, -1, ENOMEM, NULL); + return 0; + } - gf_quiesce_enqueue (this, stub); + gf_quiesce_enqueue(this, stub); - return 0; + return 0; } int32_t -quiesce_fsetxattr (call_frame_t *frame, - xlator_t *this, - fd_t *fd, - dict_t *dict, - int32_t flags, dict_t *xdata) +quiesce_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) { - quiesce_priv_t *priv = NULL; - call_stub_t *stub = NULL; + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; - priv = this->private; + priv = this->private; - if (priv->pass_through) { - STACK_WIND (frame, - default_fsetxattr_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsetxattr, - fd, - dict, - flags, xdata); - return 0; - } + if (priv->pass_through) { + STACK_WIND(frame, default_truncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); + return 0; + } - stub = fop_fsetxattr_stub (frame, default_fsetxattr_resume, - fd, dict, flags, xdata); - if (!stub) { - STACK_UNWIND_STRICT (fsetxattr, frame, -1, ENOMEM, NULL); - return 0; - } + stub = fop_truncate_stub(frame, default_truncate_resume, loc, offset, + xdata); + if (!stub) { + STACK_UNWIND_STRICT(truncate, frame, -1, ENOMEM, NULL, NULL, NULL); + return 0; + } - gf_quiesce_enqueue (this, stub); + gf_quiesce_enqueue(this, stub); - return 0; + return 0; } int32_t -quiesce_setxattr (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - dict_t *dict, - int32_t flags, dict_t *xdata) +quiesce_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, + int32_t flags, dict_t *xdata) { - quiesce_priv_t *priv = NULL; - call_stub_t *stub = NULL; + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; - priv = this->private; + priv = this->private; - if (priv->pass_through) { - STACK_WIND (frame, - default_setxattr_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setxattr, - loc, - dict, - flags, xdata); - return 0; - } + if (priv->pass_through) { + STACK_WIND(frame, default_fsetxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata); + return 0; + } - stub = fop_setxattr_stub (frame, default_setxattr_resume, - loc, dict, flags, xdata); - if (!stub) { - STACK_UNWIND_STRICT (setxattr, frame, -1, ENOMEM, NULL); - return 0; - } + stub = fop_fsetxattr_stub(frame, default_fsetxattr_resume, fd, dict, flags, + xdata); + if (!stub) { + STACK_UNWIND_STRICT(fsetxattr, frame, -1, ENOMEM, NULL); + return 0; + } - gf_quiesce_enqueue (this, stub); + gf_quiesce_enqueue(this, stub); - return 0; + return 0; } int32_t -quiesce_create (call_frame_t *frame, xlator_t *this, - loc_t *loc, int32_t flags, mode_t mode, - mode_t umask, fd_t *fd, dict_t *xdata) +quiesce_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + int32_t flags, dict_t *xdata) { - quiesce_priv_t *priv = NULL; - call_stub_t *stub = NULL; + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; - priv = this->private; + priv = this->private; - if (priv->pass_through) { - /* Don't send O_APPEND below, as write() re-transmittions can - fail with O_APPEND */ - STACK_WIND (frame, default_create_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->create, - loc, (flags & ~O_APPEND), mode, umask, fd, xdata); - return 0; - } + if (priv->pass_through) { + STACK_WIND(frame, default_setxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata); + return 0; + } - stub = fop_create_stub (frame, default_create_resume, - loc, (flags & ~O_APPEND), mode, umask, fd, xdata); - if (!stub) { - STACK_UNWIND_STRICT (create, frame, -1, ENOMEM, - NULL, NULL, NULL, NULL, NULL, NULL); - return 0; - } + stub = fop_setxattr_stub(frame, default_setxattr_resume, loc, dict, flags, + xdata); + if (!stub) { + STACK_UNWIND_STRICT(setxattr, frame, -1, ENOMEM, NULL); + return 0; + } - gf_quiesce_enqueue (this, stub); + gf_quiesce_enqueue(this, stub); - return 0; + return 0; } int32_t -quiesce_link (call_frame_t *frame, - xlator_t *this, - loc_t *oldloc, - loc_t *newloc, dict_t *xdata) +quiesce_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) { - quiesce_priv_t *priv = NULL; - call_stub_t *stub = NULL; + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; - priv = this->private; + priv = this->private; - if (priv->pass_through) { - STACK_WIND (frame, - default_link_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->link, - oldloc, newloc, xdata); - return 0; - } + if (priv->pass_through) { + /* Don't send O_APPEND below, as write() re-transmittions can + fail with O_APPEND */ + STACK_WIND(frame, default_create_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->create, loc, (flags & ~O_APPEND), + mode, umask, fd, xdata); + return 0; + } - stub = fop_link_stub (frame, default_link_resume, oldloc, newloc, xdata); - if (!stub) { - STACK_UNWIND_STRICT (link, frame, -1, ENOMEM, - NULL, NULL, NULL, NULL, NULL); - return 0; - } + stub = fop_create_stub(frame, default_create_resume, loc, + (flags & ~O_APPEND), mode, umask, fd, xdata); + if (!stub) { + STACK_UNWIND_STRICT(create, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, + NULL, NULL); + return 0; + } - gf_quiesce_enqueue (this, stub); + gf_quiesce_enqueue(this, stub); - return 0; + return 0; } int32_t -quiesce_rename (call_frame_t *frame, - xlator_t *this, - loc_t *oldloc, - loc_t *newloc, dict_t *xdata) +quiesce_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) { - quiesce_priv_t *priv = NULL; - call_stub_t *stub = NULL; + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; - priv = this->private; + priv = this->private; - if (priv->pass_through) { - STACK_WIND (frame, - default_rename_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rename, - oldloc, newloc, xdata); - return 0; - } + if (priv->pass_through) { + STACK_WIND(frame, default_link_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata); + return 0; + } - stub = fop_rename_stub (frame, default_rename_resume, oldloc, newloc, xdata); - if (!stub) { - STACK_UNWIND_STRICT (rename, frame, -1, ENOMEM, - NULL, NULL, NULL, NULL, NULL, NULL); - return 0; - } + stub = fop_link_stub(frame, default_link_resume, oldloc, newloc, xdata); + if (!stub) { + STACK_UNWIND_STRICT(link, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, + NULL); + return 0; + } - gf_quiesce_enqueue (this, stub); + gf_quiesce_enqueue(this, stub); - return 0; + return 0; } +int32_t +quiesce_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, + loc_t *newloc, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + + priv = this->private; + + if (priv->pass_through) { + STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); + return 0; + } + + stub = fop_rename_stub(frame, default_rename_resume, oldloc, newloc, xdata); + if (!stub) { + STACK_UNWIND_STRICT(rename, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, + NULL, NULL); + return 0; + } + + gf_quiesce_enqueue(this, stub); + + return 0; +} int -quiesce_symlink (call_frame_t *frame, xlator_t *this, - const char *linkpath, loc_t *loc, mode_t umask, dict_t *xdata) +quiesce_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath, + loc_t *loc, mode_t umask, dict_t *xdata) { - quiesce_priv_t *priv = NULL; - call_stub_t *stub = NULL; + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; - priv = this->private; + priv = this->private; - if (priv->pass_through) { - STACK_WIND (frame, default_symlink_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->symlink, - linkpath, loc, umask, xdata); - return 0; - } + if (priv->pass_through) { + STACK_WIND(frame, default_symlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->symlink, linkpath, loc, umask, + xdata); + return 0; + } - stub = fop_symlink_stub (frame, default_symlink_resume, - linkpath, loc, umask, xdata); - if (!stub) { - STACK_UNWIND_STRICT (symlink, frame, -1, ENOMEM, - NULL, NULL, NULL, NULL, NULL); - return 0; - } + stub = fop_symlink_stub(frame, default_symlink_resume, linkpath, loc, umask, + xdata); + if (!stub) { + STACK_UNWIND_STRICT(symlink, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, + NULL); + return 0; + } - gf_quiesce_enqueue (this, stub); + gf_quiesce_enqueue(this, stub); - return 0; + return 0; } - int -quiesce_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, dict_t *xdata) +quiesce_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + dict_t *xdata) { - quiesce_priv_t *priv = NULL; - call_stub_t *stub = NULL; + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; - priv = this->private; + priv = this->private; - if (priv->pass_through) { - STACK_WIND (frame, default_rmdir_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rmdir, - loc, flags, xdata); - return 0; - } + if (priv->pass_through) { + STACK_WIND(frame, default_rmdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rmdir, loc, flags, xdata); + return 0; + } - stub = fop_rmdir_stub (frame, default_rmdir_resume, loc, flags, xdata); - if (!stub) { - STACK_UNWIND_STRICT (rmdir, frame, -1, ENOMEM, NULL, NULL, NULL); - return 0; - } + stub = fop_rmdir_stub(frame, default_rmdir_resume, loc, flags, xdata); + if (!stub) { + STACK_UNWIND_STRICT(rmdir, frame, -1, ENOMEM, NULL, NULL, NULL); + return 0; + } - gf_quiesce_enqueue (this, stub); + gf_quiesce_enqueue(this, stub); - return 0; + return 0; } int32_t -quiesce_unlink (call_frame_t *frame, - xlator_t *this, - loc_t *loc, int xflag, dict_t *xdata) +quiesce_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, + dict_t *xdata) { - quiesce_priv_t *priv = NULL; - call_stub_t *stub = NULL; + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; - priv = this->private; + priv = this->private; - if (priv->pass_through) { - STACK_WIND (frame, - default_unlink_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, - loc, xflag, xdata); - return 0; - } + if (priv->pass_through) { + STACK_WIND(frame, default_unlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); + return 0; + } - stub = fop_unlink_stub (frame, default_unlink_resume, loc, xflag, xdata); - if (!stub) { - STACK_UNWIND_STRICT (unlink, frame, -1, ENOMEM, NULL, NULL, NULL); - return 0; - } + stub = fop_unlink_stub(frame, default_unlink_resume, loc, xflag, xdata); + if (!stub) { + STACK_UNWIND_STRICT(unlink, frame, -1, ENOMEM, NULL, NULL, NULL); + return 0; + } - gf_quiesce_enqueue (this, stub); + gf_quiesce_enqueue(this, stub); - return 0; + return 0; } int -quiesce_mkdir (call_frame_t *frame, xlator_t *this, - loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata) +quiesce_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + mode_t umask, dict_t *xdata) { - quiesce_priv_t *priv = NULL; - call_stub_t *stub = NULL; + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; - priv = this->private; + priv = this->private; - if (priv->pass_through) { - STACK_WIND (frame, default_mkdir_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->mkdir, - loc, mode, umask, xdata); - return 0; - } + if (priv->pass_through) { + STACK_WIND(frame, default_mkdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata); + return 0; + } - stub = fop_mkdir_stub (frame, default_mkdir_resume, - loc, mode, umask, xdata); - if (!stub) { - STACK_UNWIND_STRICT (mkdir, frame, -1, ENOMEM, - NULL, NULL, NULL, NULL, NULL); - return 0; - } + stub = fop_mkdir_stub(frame, default_mkdir_resume, loc, mode, umask, xdata); + if (!stub) { + STACK_UNWIND_STRICT(mkdir, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, + NULL); + return 0; + } - gf_quiesce_enqueue (this, stub); + gf_quiesce_enqueue(this, stub); - return 0; + return 0; } - int -quiesce_mknod (call_frame_t *frame, xlator_t *this, - loc_t *loc, mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata) +quiesce_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + dev_t rdev, mode_t umask, dict_t *xdata) { - quiesce_priv_t *priv = NULL; - call_stub_t *stub = NULL; + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; - priv = this->private; + priv = this->private; - if (priv->pass_through) { - STACK_WIND (frame, default_mknod_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->mknod, - loc, mode, rdev, umask, xdata); - return 0; - } + if (priv->pass_through) { + STACK_WIND(frame, default_mknod_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, + xdata); + return 0; + } - stub = fop_mknod_stub (frame, default_mknod_resume, - loc, mode, rdev, umask, xdata); - if (!stub) { - STACK_UNWIND_STRICT (mknod, frame, -1, ENOMEM, - NULL, NULL, NULL, NULL, NULL); - return 0; - } + stub = fop_mknod_stub(frame, default_mknod_resume, loc, mode, rdev, umask, + xdata); + if (!stub) { + STACK_UNWIND_STRICT(mknod, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, + NULL); + return 0; + } - gf_quiesce_enqueue (this, stub); + gf_quiesce_enqueue(this, stub); - return 0; + return 0; } int32_t -quiesce_ftruncate (call_frame_t *frame, - xlator_t *this, - fd_t *fd, - off_t offset, dict_t *xdata) +quiesce_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) { - quiesce_priv_t *priv = NULL; - call_stub_t *stub = NULL; + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; - priv = this->private; + priv = this->private; - if (priv->pass_through) { - STACK_WIND (frame, - default_ftruncate_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->ftruncate, - fd, - offset, xdata); - return 0; - } + if (priv->pass_through) { + STACK_WIND(frame, default_ftruncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); + return 0; + } - stub = fop_ftruncate_stub (frame, default_ftruncate_resume, fd, offset, xdata); - if (!stub) { - STACK_UNWIND_STRICT (ftruncate, frame, -1, ENOMEM, NULL, NULL, NULL); - return 0; - } + stub = fop_ftruncate_stub(frame, default_ftruncate_resume, fd, offset, + xdata); + if (!stub) { + STACK_UNWIND_STRICT(ftruncate, frame, -1, ENOMEM, NULL, NULL, NULL); + return 0; + } - gf_quiesce_enqueue (this, stub); + gf_quiesce_enqueue(this, stub); - return 0; + return 0; } /* Re-transmittion */ int32_t -quiesce_readlink (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - size_t size, dict_t *xdata) +quiesce_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size, + dict_t *xdata) { - quiesce_priv_t *priv = NULL; - call_stub_t *stub = NULL; - quiesce_local_t *local = NULL; - - priv = this->private; + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; - if (priv && priv->pass_through) { - local = mem_get0 (priv->local_pool); - loc_dup (loc, &local->loc); - local->size = size; - frame->local = local; - - STACK_WIND (frame, - quiesce_readlink_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readlink, - loc, - size, xdata); - return 0; - } + priv = this->private; - stub = fop_readlink_stub (frame, default_readlink_resume, loc, size, xdata); - if (!stub) { - STACK_UNWIND_STRICT (readlink, frame, -1, ENOMEM, NULL, NULL, NULL); - return 0; - } + if (priv && priv->pass_through) { + local = mem_get0(priv->local_pool); + loc_dup(loc, &local->loc); + local->size = size; + frame->local = local; - gf_quiesce_enqueue (this, stub); + STACK_WIND(frame, quiesce_readlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readlink, loc, size, xdata); + return 0; + } + stub = fop_readlink_stub(frame, default_readlink_resume, loc, size, xdata); + if (!stub) { + STACK_UNWIND_STRICT(readlink, frame, -1, ENOMEM, NULL, NULL, NULL); return 0; -} + } + + gf_quiesce_enqueue(this, stub); + return 0; +} int32_t -quiesce_access (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - int32_t mask, dict_t *xdata) +quiesce_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask, + dict_t *xdata) { - quiesce_priv_t *priv = NULL; - call_stub_t *stub = NULL; - quiesce_local_t *local = NULL; + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; - priv = this->private; - - if (priv && priv->pass_through) { - local = mem_get0 (priv->local_pool); - loc_dup (loc, &local->loc); - local->flag = mask; - frame->local = local; - - STACK_WIND (frame, - quiesce_access_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->access, - loc, - mask, xdata); - return 0; - } + priv = this->private; - stub = fop_access_stub (frame, default_access_resume, loc, mask, xdata); - if (!stub) { - STACK_UNWIND_STRICT (access, frame, -1, ENOMEM, NULL); - return 0; - } + if (priv && priv->pass_through) { + local = mem_get0(priv->local_pool); + loc_dup(loc, &local->loc); + local->flag = mask; + frame->local = local; - gf_quiesce_enqueue (this, stub); + STACK_WIND(frame, quiesce_access_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->access, loc, mask, xdata); + return 0; + } + stub = fop_access_stub(frame, default_access_resume, loc, mask, xdata); + if (!stub) { + STACK_UNWIND_STRICT(access, frame, -1, ENOMEM, NULL); return 0; + } + + gf_quiesce_enqueue(this, stub); + + return 0; } int32_t -quiesce_fgetxattr (call_frame_t *frame, - xlator_t *this, - fd_t *fd, - const char *name, dict_t *xdata) +quiesce_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *name, dict_t *xdata) { - quiesce_priv_t *priv = NULL; - call_stub_t *stub = NULL; - quiesce_local_t *local = NULL; + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; - priv = this->private; + priv = this->private; - if (priv && priv->pass_through) { - local = mem_get0 (priv->local_pool); - local->fd = fd_ref (fd); - if (name) - local->name = gf_strdup (name); - - frame->local = local; - - STACK_WIND (frame, - quiesce_fgetxattr_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fgetxattr, - fd, - name, xdata); - return 0; - } + if (priv && priv->pass_through) { + local = mem_get0(priv->local_pool); + local->fd = fd_ref(fd); + if (name) + local->name = gf_strdup(name); - stub = fop_fgetxattr_stub (frame, default_fgetxattr_resume, fd, name, xdata); - if (!stub) { - STACK_UNWIND_STRICT (fgetxattr, frame, -1, ENOMEM, NULL, NULL); - return 0; - } + frame->local = local; - gf_quiesce_enqueue (this, stub); + STACK_WIND(frame, quiesce_fgetxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata); + return 0; + } + stub = fop_fgetxattr_stub(frame, default_fgetxattr_resume, fd, name, xdata); + if (!stub) { + STACK_UNWIND_STRICT(fgetxattr, frame, -1, ENOMEM, NULL, NULL); return 0; + } + + gf_quiesce_enqueue(this, stub); + + return 0; } int32_t -quiesce_statfs (call_frame_t *frame, - xlator_t *this, - loc_t *loc, dict_t *xdata) +quiesce_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { - quiesce_priv_t *priv = NULL; - call_stub_t *stub = NULL; - quiesce_local_t *local = NULL; + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; - priv = this->private; + priv = this->private; - if (priv && priv->pass_through) { - local = mem_get0 (priv->local_pool); - loc_dup (loc, &local->loc); - frame->local = local; - - STACK_WIND (frame, - quiesce_statfs_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->statfs, - loc, xdata); - return 0; - } + if (priv && priv->pass_through) { + local = mem_get0(priv->local_pool); + loc_dup(loc, &local->loc); + frame->local = local; - stub = fop_statfs_stub (frame, default_statfs_resume, loc, xdata); - if (!stub) { - STACK_UNWIND_STRICT (statfs, frame, -1, ENOMEM, NULL, NULL); - return 0; - } - - gf_quiesce_enqueue (this, stub); + STACK_WIND(frame, quiesce_statfs_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->statfs, loc, xdata); + return 0; + } + stub = fop_statfs_stub(frame, default_statfs_resume, loc, xdata); + if (!stub) { + STACK_UNWIND_STRICT(statfs, frame, -1, ENOMEM, NULL, NULL); return 0; + } + + gf_quiesce_enqueue(this, stub); + + return 0; } int32_t -quiesce_fsyncdir (call_frame_t *frame, - xlator_t *this, - fd_t *fd, - int32_t flags, dict_t *xdata) +quiesce_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, + dict_t *xdata) { - quiesce_priv_t *priv = NULL; - call_stub_t *stub = NULL; - quiesce_local_t *local = NULL; + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; - priv = this->private; + priv = this->private; - if (priv && priv->pass_through) { - local = mem_get0 (priv->local_pool); - local->fd = fd_ref (fd); - local->flag = flags; - frame->local = local; - - STACK_WIND (frame, - quiesce_fsyncdir_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsyncdir, - fd, - flags, xdata); - return 0; - } + if (priv && priv->pass_through) { + local = mem_get0(priv->local_pool); + local->fd = fd_ref(fd); + local->flag = flags; + frame->local = local; - stub = fop_fsyncdir_stub (frame, default_fsyncdir_resume, fd, flags, xdata); - if (!stub) { - STACK_UNWIND_STRICT (fsyncdir, frame, -1, ENOMEM, NULL); - return 0; - } - - gf_quiesce_enqueue (this, stub); + STACK_WIND(frame, quiesce_fsyncdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsyncdir, fd, flags, xdata); + return 0; + } + stub = fop_fsyncdir_stub(frame, default_fsyncdir_resume, fd, flags, xdata); + if (!stub) { + STACK_UNWIND_STRICT(fsyncdir, frame, -1, ENOMEM, NULL); return 0; + } + + gf_quiesce_enqueue(this, stub); + + return 0; } int32_t -quiesce_opendir (call_frame_t *frame, - xlator_t *this, - loc_t *loc, fd_t *fd, dict_t *xdata) +quiesce_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, + dict_t *xdata) { - quiesce_priv_t *priv = NULL; - call_stub_t *stub = NULL; - quiesce_local_t *local = NULL; - - priv = this->private; + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; - if (priv && priv->pass_through) { - local = mem_get0 (priv->local_pool); - loc_dup (loc, &local->loc); - local->fd = fd_ref (fd); - frame->local = local; - - STACK_WIND (frame, - quiesce_opendir_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->opendir, - loc, fd, xdata); - return 0; - } + priv = this->private; - stub = fop_opendir_stub (frame, default_opendir_resume, loc, fd, xdata); - if (!stub) { - STACK_UNWIND_STRICT (opendir, frame, -1, ENOMEM, NULL, NULL); - return 0; - } + if (priv && priv->pass_through) { + local = mem_get0(priv->local_pool); + loc_dup(loc, &local->loc); + local->fd = fd_ref(fd); + frame->local = local; - gf_quiesce_enqueue (this, stub); + STACK_WIND(frame, quiesce_opendir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->opendir, loc, fd, xdata); + return 0; + } + stub = fop_opendir_stub(frame, default_opendir_resume, loc, fd, xdata); + if (!stub) { + STACK_UNWIND_STRICT(opendir, frame, -1, ENOMEM, NULL, NULL); return 0; + } + + gf_quiesce_enqueue(this, stub); + + return 0; } int32_t -quiesce_fstat (call_frame_t *frame, - xlator_t *this, - fd_t *fd, dict_t *xdata) +quiesce_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { - quiesce_priv_t *priv = NULL; - call_stub_t *stub = NULL; - quiesce_local_t *local = NULL; - - priv = this->private; + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; - if (priv && priv->pass_through) { - local = mem_get0 (priv->local_pool); - local->fd = fd_ref (fd); - frame->local = local; - - STACK_WIND (frame, - quiesce_fstat_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fstat, - fd, xdata); - return 0; - } + priv = this->private; - stub = fop_fstat_stub (frame, default_fstat_resume, fd, xdata); - if (!stub) { - STACK_UNWIND_STRICT (fstat, frame, -1, ENOMEM, NULL, NULL); - return 0; - } + if (priv && priv->pass_through) { + local = mem_get0(priv->local_pool); + local->fd = fd_ref(fd); + frame->local = local; - gf_quiesce_enqueue (this, stub); + STACK_WIND(frame, quiesce_fstat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, fd, xdata); + return 0; + } + stub = fop_fstat_stub(frame, default_fstat_resume, fd, xdata); + if (!stub) { + STACK_UNWIND_STRICT(fstat, frame, -1, ENOMEM, NULL, NULL); return 0; + } + + gf_quiesce_enqueue(this, stub); + + return 0; } int32_t -quiesce_fsync (call_frame_t *frame, - xlator_t *this, - fd_t *fd, - int32_t flags, dict_t *xdata) +quiesce_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, + dict_t *xdata) { - quiesce_priv_t *priv = NULL; - call_stub_t *stub = NULL; - quiesce_local_t *local = NULL; + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; - priv = this->private; + priv = this->private; - if (priv && priv->pass_through) { - local = mem_get0 (priv->local_pool); - local->fd = fd_ref (fd); - local->flag = flags; - frame->local = local; - - STACK_WIND (frame, - quiesce_fsync_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsync, - fd, - flags, xdata); - return 0; - } + if (priv && priv->pass_through) { + local = mem_get0(priv->local_pool); + local->fd = fd_ref(fd); + local->flag = flags; + frame->local = local; - stub = fop_fsync_stub (frame, default_fsync_resume, fd, flags, xdata); - if (!stub) { - STACK_UNWIND_STRICT (fsync, frame, -1, ENOMEM, NULL, NULL, NULL); - return 0; - } - - gf_quiesce_enqueue (this, stub); + STACK_WIND(frame, quiesce_fsync_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsync, fd, flags, xdata); + return 0; + } + stub = fop_fsync_stub(frame, default_fsync_resume, fd, flags, xdata); + if (!stub) { + STACK_UNWIND_STRICT(fsync, frame, -1, ENOMEM, NULL, NULL, NULL); return 0; + } + + gf_quiesce_enqueue(this, stub); + + return 0; } int32_t -quiesce_flush (call_frame_t *frame, - xlator_t *this, - fd_t *fd, dict_t *xdata) +quiesce_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { - quiesce_priv_t *priv = NULL; - call_stub_t *stub = NULL; - quiesce_local_t *local = NULL; - - priv = this->private; + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; - if (priv && priv->pass_through) { - local = mem_get0 (priv->local_pool); - local->fd = fd_ref (fd); - frame->local = local; - - STACK_WIND (frame, - quiesce_flush_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->flush, - fd, xdata); - return 0; - } + priv = this->private; - stub = fop_flush_stub (frame, default_flush_resume, fd, xdata); - if (!stub) { - STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM, NULL); - return 0; - } + if (priv && priv->pass_through) { + local = mem_get0(priv->local_pool); + local->fd = fd_ref(fd); + frame->local = local; - gf_quiesce_enqueue (this, stub); + STACK_WIND(frame, quiesce_flush_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->flush, fd, xdata); + return 0; + } + stub = fop_flush_stub(frame, default_flush_resume, fd, xdata); + if (!stub) { + STACK_UNWIND_STRICT(flush, frame, -1, ENOMEM, NULL); return 0; + } + + gf_quiesce_enqueue(this, stub); + + return 0; } int32_t -quiesce_writev (call_frame_t *frame, - xlator_t *this, - fd_t *fd, - struct iovec *vector, - int32_t count, - off_t off, uint32_t flags, - struct iobref *iobref, dict_t *xdata) +quiesce_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iovec *vector, int32_t count, off_t off, uint32_t flags, + struct iobref *iobref, dict_t *xdata) { - quiesce_priv_t *priv = NULL; - call_stub_t *stub = NULL; + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; - priv = this->private; + priv = this->private; - if (priv && priv->pass_through) { - STACK_WIND (frame, - default_writev_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->writev, - fd, - vector, - count, - off, flags, - iobref, xdata); - return 0; - } + if (priv && priv->pass_through) { + STACK_WIND(frame, default_writev_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->writev, fd, vector, count, off, + flags, iobref, xdata); + return 0; + } - stub = fop_writev_stub (frame, default_writev_resume, - fd, vector, count, off, flags, iobref, xdata); - if (!stub) { - STACK_UNWIND_STRICT (writev, frame, -1, ENOMEM, NULL, NULL, NULL); - return 0; - } + stub = fop_writev_stub(frame, default_writev_resume, fd, vector, count, off, + flags, iobref, xdata); + if (!stub) { + STACK_UNWIND_STRICT(writev, frame, -1, ENOMEM, NULL, NULL, NULL); + return 0; + } - gf_quiesce_enqueue (this, stub); + gf_quiesce_enqueue(this, stub); - return 0; + return 0; } int32_t -quiesce_readv (call_frame_t *frame, - xlator_t *this, - fd_t *fd, - size_t size, - off_t offset, uint32_t flags, dict_t *xdata) +quiesce_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) { - quiesce_priv_t *priv = NULL; - call_stub_t *stub = NULL; - quiesce_local_t *local = NULL; + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; - priv = this->private; - - if (priv && priv->pass_through) { - local = mem_get0 (priv->local_pool); - local->fd = fd_ref (fd); - local->size = size; - local->offset = offset; - local->io_flag = flags; - frame->local = local; - - STACK_WIND (frame, - quiesce_readv_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readv, - fd, - size, - offset, flags, xdata); - return 0; - } + priv = this->private; - stub = fop_readv_stub (frame, default_readv_resume, fd, size, offset, - flags, xdata); - if (!stub) { - STACK_UNWIND_STRICT (readv, frame, -1, ENOMEM, - NULL, 0, NULL, NULL, NULL); - return 0; - } + if (priv && priv->pass_through) { + local = mem_get0(priv->local_pool); + local->fd = fd_ref(fd); + local->size = size; + local->offset = offset; + local->io_flag = flags; + frame->local = local; - gf_quiesce_enqueue (this, stub); + STACK_WIND(frame, quiesce_readv_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, + xdata); + return 0; + } + stub = fop_readv_stub(frame, default_readv_resume, fd, size, offset, flags, + xdata); + if (!stub) { + STACK_UNWIND_STRICT(readv, frame, -1, ENOMEM, NULL, 0, NULL, NULL, + NULL); return 0; -} + } + + gf_quiesce_enqueue(this, stub); + return 0; +} int32_t -quiesce_open (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - int32_t flags, fd_t *fd, - dict_t *xdata) +quiesce_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + fd_t *fd, dict_t *xdata) { - quiesce_priv_t *priv = NULL; - call_stub_t *stub = NULL; - quiesce_local_t *local = NULL; + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; - priv = this->private; + priv = this->private; - if (priv && priv->pass_through) { - local = mem_get0 (priv->local_pool); - loc_dup (loc, &local->loc); - local->fd = fd_ref (fd); - - /* Don't send O_APPEND below, as write() re-transmittions can - fail with O_APPEND */ - local->flag = (flags & ~O_APPEND); - frame->local = local; - - STACK_WIND (frame, - quiesce_open_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, - loc, (flags & ~O_APPEND), fd, xdata); - return 0; - } + if (priv && priv->pass_through) { + local = mem_get0(priv->local_pool); + loc_dup(loc, &local->loc); + local->fd = fd_ref(fd); - stub = fop_open_stub (frame, default_open_resume, loc, - (flags & ~O_APPEND), fd, xdata); - if (!stub) { - STACK_UNWIND_STRICT (open, frame, -1, ENOMEM, NULL, NULL); - return 0; - } + /* Don't send O_APPEND below, as write() re-transmittions can + fail with O_APPEND */ + local->flag = (flags & ~O_APPEND); + frame->local = local; - gf_quiesce_enqueue (this, stub); + STACK_WIND(frame, quiesce_open_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->open, loc, (flags & ~O_APPEND), fd, + xdata); + return 0; + } + stub = fop_open_stub(frame, default_open_resume, loc, (flags & ~O_APPEND), + fd, xdata); + if (!stub) { + STACK_UNWIND_STRICT(open, frame, -1, ENOMEM, NULL, NULL); return 0; + } + + gf_quiesce_enqueue(this, stub); + + return 0; } int32_t -quiesce_getxattr (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - const char *name, dict_t *xdata) +quiesce_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) { - quiesce_priv_t *priv = NULL; - call_stub_t *stub = NULL; - quiesce_local_t *local = NULL; + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; - priv = this->private; + priv = this->private; - if (priv && priv->pass_through) { - local = mem_get0 (priv->local_pool); - loc_dup (loc, &local->loc); - if (name) - local->name = gf_strdup (name); - - frame->local = local; - - STACK_WIND (frame, - quiesce_getxattr_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->getxattr, - loc, - name, xdata); - return 0; - } + if (priv && priv->pass_through) { + local = mem_get0(priv->local_pool); + loc_dup(loc, &local->loc); + if (name) + local->name = gf_strdup(name); - stub = fop_getxattr_stub (frame, default_getxattr_resume, loc, name, xdata); - if (!stub) { - STACK_UNWIND_STRICT (getxattr, frame, -1, ENOMEM, NULL, NULL); - return 0; - } + frame->local = local; - gf_quiesce_enqueue (this, stub); + STACK_WIND(frame, quiesce_getxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->getxattr, loc, name, xdata); + return 0; + } + stub = fop_getxattr_stub(frame, default_getxattr_resume, loc, name, xdata); + if (!stub) { + STACK_UNWIND_STRICT(getxattr, frame, -1, ENOMEM, NULL, NULL); return 0; -} + } + + gf_quiesce_enqueue(this, stub); + return 0; +} int32_t -quiesce_xattrop (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - gf_xattrop_flags_t flags, - dict_t *dict, dict_t *xdata) +quiesce_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc, + gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata) { - quiesce_priv_t *priv = NULL; - call_stub_t *stub = NULL; + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; - priv = this->private; + priv = this->private; - if (priv && priv->pass_through) { - STACK_WIND (frame, - default_xattrop_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->xattrop, - loc, - flags, - dict, xdata); - return 0; - } + if (priv && priv->pass_through) { + STACK_WIND(frame, default_xattrop_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->xattrop, loc, flags, dict, xdata); + return 0; + } - stub = fop_xattrop_stub (frame, default_xattrop_resume, - loc, flags, dict, xdata); - if (!stub) { - STACK_UNWIND_STRICT (xattrop, frame, -1, ENOMEM, NULL, NULL); - return 0; - } + stub = fop_xattrop_stub(frame, default_xattrop_resume, loc, flags, dict, + xdata); + if (!stub) { + STACK_UNWIND_STRICT(xattrop, frame, -1, ENOMEM, NULL, NULL); + return 0; + } - gf_quiesce_enqueue (this, stub); + gf_quiesce_enqueue(this, stub); - return 0; + return 0; } int32_t -quiesce_fxattrop (call_frame_t *frame, - xlator_t *this, - fd_t *fd, - gf_xattrop_flags_t flags, - dict_t *dict, dict_t *xdata) +quiesce_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd, + gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata) { - quiesce_priv_t *priv = NULL; - call_stub_t *stub = NULL; + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; - priv = this->private; + priv = this->private; - if (priv && priv->pass_through) { - STACK_WIND (frame, - default_fxattrop_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fxattrop, - fd, - flags, - dict, xdata); - return 0; - } + if (priv && priv->pass_through) { + STACK_WIND(frame, default_fxattrop_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fxattrop, fd, flags, dict, xdata); + return 0; + } - stub = fop_fxattrop_stub (frame, default_fxattrop_resume, - fd, flags, dict, xdata); - if (!stub) { - STACK_UNWIND_STRICT (fxattrop, frame, -1, ENOMEM, NULL, NULL); - return 0; - } + stub = fop_fxattrop_stub(frame, default_fxattrop_resume, fd, flags, dict, + xdata); + if (!stub) { + STACK_UNWIND_STRICT(fxattrop, frame, -1, ENOMEM, NULL, NULL); + return 0; + } - gf_quiesce_enqueue (this, stub); + gf_quiesce_enqueue(this, stub); + return 0; +} + +int32_t +quiesce_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, + struct gf_flock *lock, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + + priv = this->private; + + if (priv && priv->pass_through) { + STACK_WIND(frame, default_lk_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lk, fd, cmd, lock, xdata); return 0; + } + + stub = fop_lk_stub(frame, default_lk_resume, fd, cmd, lock, xdata); + if (!stub) { + STACK_UNWIND_STRICT(lk, frame, -1, ENOMEM, NULL, NULL); + return 0; + } + + gf_quiesce_enqueue(this, stub); + + return 0; } int32_t -quiesce_lk (call_frame_t *frame, - xlator_t *this, - fd_t *fd, - int32_t cmd, - struct gf_flock *lock, dict_t *xdata) +quiesce_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, + loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata) { - quiesce_priv_t *priv = NULL; - call_stub_t *stub = NULL; + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; - priv = this->private; + priv = this->private; - if (priv && priv->pass_through) { - STACK_WIND (frame, - default_lk_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lk, - fd, - cmd, + if (priv && priv->pass_through) { + STACK_WIND(frame, default_inodelk_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->inodelk, volume, loc, cmd, lock, + xdata); + return 0; + } + + stub = fop_inodelk_stub(frame, default_inodelk_resume, volume, loc, cmd, lock, xdata); - return 0; - } + if (!stub) { + STACK_UNWIND_STRICT(inodelk, frame, -1, ENOMEM, NULL); + return 0; + } - stub = fop_lk_stub (frame, default_lk_resume, fd, cmd, lock, xdata); - if (!stub) { - STACK_UNWIND_STRICT (lk, frame, -1, ENOMEM, NULL, NULL); - return 0; - } + gf_quiesce_enqueue(this, stub); + + return 0; +} + +int32_t +quiesce_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, + fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata) +{ + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; - gf_quiesce_enqueue (this, stub); + priv = this->private; + if (priv && priv->pass_through) { + STACK_WIND(frame, default_finodelk_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->finodelk, volume, fd, cmd, lock, + xdata); return 0; -} + } + stub = fop_finodelk_stub(frame, default_finodelk_resume, volume, fd, cmd, + lock, xdata); + if (!stub) { + STACK_UNWIND_STRICT(finodelk, frame, -1, ENOMEM, NULL); + return 0; + } + + gf_quiesce_enqueue(this, stub); + + return 0; +} int32_t -quiesce_inodelk (call_frame_t *frame, xlator_t *this, - const char *volume, loc_t *loc, int32_t cmd, - struct gf_flock *lock, dict_t *xdata) +quiesce_entrylk(call_frame_t *frame, xlator_t *this, const char *volume, + loc_t *loc, const char *basename, entrylk_cmd cmd, + entrylk_type type, dict_t *xdata) { - quiesce_priv_t *priv = NULL; - call_stub_t *stub = NULL; + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; - priv = this->private; + priv = this->private; - if (priv && priv->pass_through) { - STACK_WIND (frame, - default_inodelk_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->inodelk, - volume, loc, cmd, lock, xdata); - return 0; - } + if (priv && priv->pass_through) { + STACK_WIND(frame, default_entrylk_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->entrylk, volume, loc, basename, cmd, + type, xdata); + return 0; + } - stub = fop_inodelk_stub (frame, default_inodelk_resume, - volume, loc, cmd, lock, xdata); - if (!stub) { - STACK_UNWIND_STRICT (inodelk, frame, -1, ENOMEM, NULL); - return 0; - } + stub = fop_entrylk_stub(frame, default_entrylk_resume, volume, loc, + basename, cmd, type, xdata); + if (!stub) { + STACK_UNWIND_STRICT(entrylk, frame, -1, ENOMEM, NULL); + return 0; + } - gf_quiesce_enqueue (this, stub); + gf_quiesce_enqueue(this, stub); - return 0; + return 0; } int32_t -quiesce_finodelk (call_frame_t *frame, xlator_t *this, - const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata) +quiesce_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, + fd_t *fd, const char *basename, entrylk_cmd cmd, + entrylk_type type, dict_t *xdata) { - quiesce_priv_t *priv = NULL; - call_stub_t *stub = NULL; + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; - priv = this->private; + priv = this->private; - if (priv && priv->pass_through) { - STACK_WIND (frame, - default_finodelk_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->finodelk, - volume, fd, cmd, lock, xdata); - return 0; - } + if (priv && priv->pass_through) { + STACK_WIND(frame, default_fentrylk_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fentrylk, volume, fd, basename, cmd, + type, xdata); + return 0; + } - stub = fop_finodelk_stub (frame, default_finodelk_resume, - volume, fd, cmd, lock, xdata); - if (!stub) { - STACK_UNWIND_STRICT (finodelk, frame, -1, ENOMEM, NULL); - return 0; - } + stub = fop_fentrylk_stub(frame, default_fentrylk_resume, volume, fd, + basename, cmd, type, xdata); + if (!stub) { + STACK_UNWIND_STRICT(fentrylk, frame, -1, ENOMEM, NULL); + return 0; + } - gf_quiesce_enqueue (this, stub); + gf_quiesce_enqueue(this, stub); - return 0; + return 0; } int32_t -quiesce_entrylk (call_frame_t *frame, xlator_t *this, - const char *volume, loc_t *loc, const char *basename, - entrylk_cmd cmd, entrylk_type type, dict_t *xdata) +quiesce_rchecksum(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + int32_t len, dict_t *xdata) { - quiesce_priv_t *priv = NULL; - call_stub_t *stub = NULL; - - priv = this->private; + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; - if (priv && priv->pass_through) { - STACK_WIND (frame, default_entrylk_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->entrylk, - volume, loc, basename, cmd, type, xdata); - return 0; - } + priv = this->private; - stub = fop_entrylk_stub (frame, default_entrylk_resume, - volume, loc, basename, cmd, type, xdata); - if (!stub) { - STACK_UNWIND_STRICT (entrylk, frame, -1, ENOMEM, NULL); - return 0; - } + if (priv && priv->pass_through) { + local = mem_get0(priv->local_pool); + local->fd = fd_ref(fd); + local->offset = offset; + local->flag = len; + frame->local = local; - gf_quiesce_enqueue (this, stub); + STACK_WIND(frame, quiesce_rchecksum_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rchecksum, fd, offset, len, xdata); + return 0; + } + stub = fop_rchecksum_stub(frame, default_rchecksum_resume, fd, offset, len, + xdata); + if (!stub) { + STACK_UNWIND_STRICT(rchecksum, frame, -1, ENOMEM, 0, NULL, NULL); return 0; + } + + gf_quiesce_enqueue(this, stub); + + return 0; } int32_t -quiesce_fentrylk (call_frame_t *frame, xlator_t *this, - const char *volume, fd_t *fd, const char *basename, - entrylk_cmd cmd, entrylk_type type, dict_t *xdata) +quiesce_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *xdata) { - quiesce_priv_t *priv = NULL; - call_stub_t *stub = NULL; + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; - priv = this->private; + priv = this->private; - if (priv && priv->pass_through) { - STACK_WIND (frame, default_fentrylk_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fentrylk, - volume, fd, basename, cmd, type, xdata); - return 0; - } + if (priv && priv->pass_through) { + local = mem_get0(priv->local_pool); + local->fd = fd_ref(fd); + local->size = size; + local->offset = off; + frame->local = local; - stub = fop_fentrylk_stub (frame, default_fentrylk_resume, - volume, fd, basename, cmd, type, xdata); - if (!stub) { - STACK_UNWIND_STRICT (fentrylk, frame, -1, ENOMEM, NULL); - return 0; - } - - gf_quiesce_enqueue (this, stub); + STACK_WIND(frame, quiesce_readdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdir, fd, size, off, xdata); + return 0; + } + stub = fop_readdir_stub(frame, default_readdir_resume, fd, size, off, + xdata); + if (!stub) { + STACK_UNWIND_STRICT(readdir, frame, -1, ENOMEM, NULL, NULL); return 0; + } + + gf_quiesce_enqueue(this, stub); + + return 0; } int32_t -quiesce_rchecksum (call_frame_t *frame, - xlator_t *this, - fd_t *fd, off_t offset, - int32_t len, dict_t *xdata) +quiesce_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *dict) { - quiesce_priv_t *priv = NULL; - call_stub_t *stub = NULL; - quiesce_local_t *local = NULL; - - priv = this->private; + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; - if (priv && priv->pass_through) { - local = mem_get0 (priv->local_pool); - local->fd = fd_ref (fd); - local->offset = offset; - local->flag = len; - frame->local = local; - - STACK_WIND (frame, - quiesce_rchecksum_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rchecksum, - fd, offset, len, xdata); - return 0; - } + priv = this->private; - stub = fop_rchecksum_stub (frame, default_rchecksum_resume, - fd, offset, len, xdata); - if (!stub) { - STACK_UNWIND_STRICT (rchecksum, frame, -1, ENOMEM, 0, NULL, NULL); - return 0; - } + if (priv && priv->pass_through) { + local = mem_get0(priv->local_pool); + local->fd = fd_ref(fd); + local->size = size; + local->offset = off; + local->dict = dict_ref(dict); + frame->local = local; - gf_quiesce_enqueue (this, stub); + STACK_WIND(frame, quiesce_readdirp_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdirp, fd, size, off, dict); + return 0; + } + stub = fop_readdirp_stub(frame, default_readdirp_resume, fd, size, off, + dict); + if (!stub) { + STACK_UNWIND_STRICT(readdirp, frame, -1, ENOMEM, NULL, NULL); return 0; -} + } + + gf_quiesce_enqueue(this, stub); + return 0; +} int32_t -quiesce_readdir (call_frame_t *frame, - xlator_t *this, - fd_t *fd, - size_t size, - off_t off, dict_t *xdata) +quiesce_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid, dict_t *xdata) { - quiesce_priv_t *priv = NULL; - call_stub_t *stub = NULL; - quiesce_local_t *local = NULL; + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; - priv = this->private; + priv = this->private; - if (priv && priv->pass_through) { - local = mem_get0 (priv->local_pool); - local->fd = fd_ref (fd); - local->size = size; - local->offset = off; - frame->local = local; - - STACK_WIND (frame, - quiesce_readdir_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readdir, - fd, size, off, xdata); - return 0; - } + if (priv && priv->pass_through) { + STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata); + return 0; + } - stub = fop_readdir_stub (frame, default_readdir_resume, fd, size, off, xdata); - if (!stub) { - STACK_UNWIND_STRICT (readdir, frame, -1, ENOMEM, NULL, NULL); - return 0; - } + stub = fop_setattr_stub(frame, default_setattr_resume, loc, stbuf, valid, + xdata); + if (!stub) { + STACK_UNWIND_STRICT(setattr, frame, -1, ENOMEM, NULL, NULL, NULL); + return 0; + } - gf_quiesce_enqueue (this, stub); + gf_quiesce_enqueue(this, stub); - return 0; + return 0; } - int32_t -quiesce_readdirp (call_frame_t *frame, - xlator_t *this, - fd_t *fd, - size_t size, - off_t off, dict_t *dict) +quiesce_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { - quiesce_priv_t *priv = NULL; - call_stub_t *stub = NULL; - quiesce_local_t *local = NULL; + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; - priv = this->private; + priv = this->private; - if (priv && priv->pass_through) { - local = mem_get0 (priv->local_pool); - local->fd = fd_ref (fd); - local->size = size; - local->offset = off; - local->dict = dict_ref (dict); - frame->local = local; - - STACK_WIND (frame, - quiesce_readdirp_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readdirp, - fd, size, off, dict); - return 0; - } + if (priv && priv->pass_through) { + local = mem_get0(priv->local_pool); + loc_dup(loc, &local->loc); + frame->local = local; - stub = fop_readdirp_stub (frame, default_readdirp_resume, fd, size, - off, dict); - if (!stub) { - STACK_UNWIND_STRICT (readdirp, frame, -1, ENOMEM, NULL, NULL); - return 0; - } - - gf_quiesce_enqueue (this, stub); + STACK_WIND(frame, quiesce_stat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->stat, loc, xdata); + return 0; + } + stub = fop_stat_stub(frame, default_stat_resume, loc, xdata); + if (!stub) { + STACK_UNWIND_STRICT(stat, frame, -1, ENOMEM, NULL, NULL); return 0; + } + + gf_quiesce_enqueue(this, stub); + + return 0; } int32_t -quiesce_setattr (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - struct iatt *stbuf, - int32_t valid, dict_t *xdata) +quiesce_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, + dict_t *xattr_req) { - quiesce_priv_t *priv = NULL; - call_stub_t *stub = NULL; + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; - priv = this->private; - - if (priv && priv->pass_through) { - STACK_WIND (frame, - default_setattr_cbk, - FIRST_CHILD (this), - FIRST_CHILD (this)->fops->setattr, - loc, stbuf, valid, xdata); - return 0; - } + priv = this->private; - stub = fop_setattr_stub (frame, default_setattr_resume, - loc, stbuf, valid, xdata); - if (!stub) { - STACK_UNWIND_STRICT (setattr, frame, -1, ENOMEM, NULL, NULL, NULL); - return 0; - } + if (priv && priv->pass_through) { + local = mem_get0(priv->local_pool); + loc_dup(loc, &local->loc); + local->dict = dict_ref(xattr_req); + frame->local = local; - gf_quiesce_enqueue (this, stub); + STACK_WIND(frame, quiesce_lookup_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, loc, xattr_req); + return 0; + } + stub = fop_lookup_stub(frame, default_lookup_resume, loc, xattr_req); + if (!stub) { + STACK_UNWIND_STRICT(lookup, frame, -1, ENOMEM, NULL, NULL, NULL, NULL); return 0; -} + } + + gf_quiesce_enqueue(this, stub); + return 0; +} int32_t -quiesce_stat (call_frame_t *frame, - xlator_t *this, - loc_t *loc, dict_t *xdata) +quiesce_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iatt *stbuf, int32_t valid, dict_t *xdata) { - quiesce_priv_t *priv = NULL; - call_stub_t *stub = NULL; - quiesce_local_t *local = NULL; + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; - priv = this->private; + priv = this->private; - if (priv && priv->pass_through) { - local = mem_get0 (priv->local_pool); - loc_dup (loc, &local->loc); - frame->local = local; - - STACK_WIND (frame, - quiesce_stat_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->stat, - loc, xdata); - return 0; - } + if (priv && priv->pass_through) { + STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata); + return 0; + } - stub = fop_stat_stub (frame, default_stat_resume, loc, xdata); - if (!stub) { - STACK_UNWIND_STRICT (stat, frame, -1, ENOMEM, NULL, NULL); - return 0; - } + stub = fop_fsetattr_stub(frame, default_fsetattr_resume, fd, stbuf, valid, + xdata); + if (!stub) { + STACK_UNWIND_STRICT(fsetattr, frame, -1, ENOMEM, NULL, NULL, NULL); + return 0; + } - gf_quiesce_enqueue (this, stub); + gf_quiesce_enqueue(this, stub); - return 0; + return 0; } int32_t -quiesce_lookup (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - dict_t *xattr_req) +quiesce_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, + off_t offset, size_t len, dict_t *xdata) { - quiesce_priv_t *priv = NULL; - call_stub_t *stub = NULL; - quiesce_local_t *local = NULL; + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; - priv = this->private; + priv = this->private; - if (priv && priv->pass_through) { - local = mem_get0 (priv->local_pool); - loc_dup (loc, &local->loc); - local->dict = dict_ref (xattr_req); - frame->local = local; - - STACK_WIND (frame, - quiesce_lookup_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, - loc, xattr_req); - return 0; - } + if (priv && priv->pass_through) { + STACK_WIND(frame, default_fallocate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len, + xdata); + return 0; + } + + stub = fop_fallocate_stub(frame, default_fallocate_resume, fd, mode, offset, + len, xdata); + if (!stub) { + STACK_UNWIND_STRICT(fallocate, frame, -1, ENOMEM, NULL, NULL, NULL); + return 0; + } - stub = fop_lookup_stub (frame, default_lookup_resume, loc, xattr_req); + gf_quiesce_enqueue(this, stub); + + return 0; +} + +int +quiesce_seek_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, off_t offset, dict_t *xdata) +{ + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; + + local = frame->local; + frame->local = NULL; + if ((op_ret == -1) && (op_errno == ENOTCONN)) { + /* Re-transmit (by putting in the queue) */ + stub = fop_seek_stub(frame, default_seek_resume, local->fd, + local->offset, local->what, xdata); if (!stub) { - STACK_UNWIND_STRICT (lookup, frame, -1, ENOMEM, - NULL, NULL, NULL, NULL); - return 0; + STACK_UNWIND_STRICT(seek, frame, -1, ENOMEM, 0, NULL); + goto out; } - gf_quiesce_enqueue (this, stub); + gf_quiesce_enqueue(this, stub); + goto out; + } - return 0; + STACK_UNWIND_STRICT(seek, frame, op_ret, op_errno, offset, xdata); +out: + gf_quiesce_local_wipe(this, local); + + return 0; } -int32_t -quiesce_fsetattr (call_frame_t *frame, - xlator_t *this, - fd_t *fd, - struct iatt *stbuf, - int32_t valid, dict_t *xdata) +int +quiesce_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + gf_seek_what_t what, dict_t *xdata) { - quiesce_priv_t *priv = NULL; - call_stub_t *stub = NULL; + quiesce_priv_t *priv = NULL; + call_stub_t *stub = NULL; + quiesce_local_t *local = NULL; - priv = this->private; + priv = this->private; - if (priv && priv->pass_through) { - STACK_WIND (frame, - default_fsetattr_cbk, - FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fsetattr, - fd, stbuf, valid, xdata); - return 0; - } + if (priv && priv->pass_through) { + local = mem_get0(priv->local_pool); + local->fd = fd_ref(fd); + local->offset = offset; + local->what = what; - stub = fop_fsetattr_stub (frame, default_fsetattr_resume, - fd, stbuf, valid, xdata); - if (!stub) { - STACK_UNWIND_STRICT (fsetattr, frame, -1, ENOMEM, NULL, NULL, NULL); - return 0; - } + frame->local = local; - gf_quiesce_enqueue (this, stub); + STACK_WIND(frame, quiesce_seek_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->seek, fd, offset, what, xdata); + return 0; + } + stub = fop_seek_stub(frame, default_seek_resume, fd, offset, what, xdata); + if (!stub) { + STACK_UNWIND_STRICT(seek, frame, -1, ENOMEM, 0, NULL); return 0; + } + + gf_quiesce_enqueue(this, stub); + + return 0; } int32_t -mem_acct_init (xlator_t *this) +mem_acct_init(xlator_t *this) { - int ret = -1; + int ret = -1; - ret = xlator_mem_acct_init (this, gf_quiesce_mt_end + 1); + ret = xlator_mem_acct_init(this, gf_quiesce_mt_end + 1); - return ret; + return ret; } int -init (xlator_t *this) +reconfigure(xlator_t *this, dict_t *options) { - int ret = -1; - quiesce_priv_t *priv = NULL; + int32_t ret = -1; + quiesce_priv_t *priv = NULL; - if (!this->children || this->children->next) { - gf_log (this->name, GF_LOG_ERROR, - "'quiesce' not configured with exactly one child"); - goto out; - } + priv = this->private; - if (!this->parents) { - gf_log (this->name, GF_LOG_WARNING, - "dangling volume. check volfile "); - } + GF_OPTION_RECONF("timeout", priv->timeout, options, time, out); + GF_OPTION_RECONF("failover-hosts", priv->failover_hosts, options, str, out); + gf_quiesce_populate_failover_hosts(this, priv, priv->failover_hosts); - priv = GF_CALLOC (1, sizeof (*priv), gf_quiesce_mt_priv_t); - if (!priv) - goto out; + ret = 0; +out: + return ret; +} + +int +init(xlator_t *this) +{ + int ret = -1; + quiesce_priv_t *priv = NULL; - priv->local_pool = mem_pool_new (quiesce_local_t, - GF_FOPS_EXPECTED_IN_PARALLEL); + if (!this->children || this->children->next) { + gf_log(this->name, GF_LOG_ERROR, + "'quiesce' not configured with exactly one child"); + goto out; + } - LOCK_INIT (&priv->lock); - priv->pass_through = _gf_false; + if (!this->parents) { + gf_log(this->name, GF_LOG_WARNING, "dangling volume. check volfile "); + } - INIT_LIST_HEAD (&priv->req); + priv = GF_CALLOC(1, sizeof(*priv), gf_quiesce_mt_priv_t); + if (!priv) + goto out; - this->private = priv; - ret = 0; + INIT_LIST_HEAD(&priv->failover_list); + + GF_OPTION_INIT("timeout", priv->timeout, time, out); + GF_OPTION_INIT("failover-hosts", priv->failover_hosts, str, out); + gf_quiesce_populate_failover_hosts(this, priv, priv->failover_hosts); + + priv->local_pool = mem_pool_new(quiesce_local_t, + GF_FOPS_EXPECTED_IN_PARALLEL); + + LOCK_INIT(&priv->lock); + priv->pass_through = _gf_false; + + INIT_LIST_HEAD(&priv->req); + + this->private = priv; + ret = 0; out: - return ret; + return ret; } void -fini (xlator_t *this) +fini(xlator_t *this) { - quiesce_priv_t *priv = NULL; + quiesce_priv_t *priv = NULL; - priv = this->private; - if (!priv) - goto out; - this->private = NULL; + priv = this->private; + if (!priv) + goto out; + this->private = NULL; - mem_pool_destroy (priv->local_pool); - LOCK_DESTROY (&priv->lock); - GF_FREE (priv); + mem_pool_destroy(priv->local_pool); + priv->local_pool = NULL; + LOCK_DESTROY(&priv->lock); + GF_FREE(priv); out: - return; + return; } int -notify (xlator_t *this, int event, void *data, ...) -{ - int ret = 0; - quiesce_priv_t *priv = NULL; - struct timespec timeout = {0,}; - - priv = this->private; - if (!priv) - goto out; - - switch (event) { - case GF_EVENT_CHILD_UP: - { - ret = pthread_create (&priv->thr, NULL, gf_quiesce_dequeue_start, - this); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "failed to create the quiesce-dequeue thread"); - } - - LOCK (&priv->lock); - { - priv->pass_through = _gf_true; - } - UNLOCK (&priv->lock); - break; +notify(xlator_t *this, int event, void *data, ...) +{ + int ret = 0; + quiesce_priv_t *priv = NULL; + + priv = this->private; + if (!priv) + goto out; + + switch (event) { + case GF_EVENT_CHILD_UP: { + ret = gf_thread_create(&priv->thr, NULL, gf_quiesce_dequeue_start, + this, "quiesce"); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, + "failed to create the quiesce-dequeue thread"); + } + + LOCK(&priv->lock); + { + priv->pass_through = _gf_true; + } + UNLOCK(&priv->lock); + break; } case GF_EVENT_CHILD_DOWN: - LOCK (&priv->lock); - { - priv->pass_through = _gf_false; - } - UNLOCK (&priv->lock); - - if (priv->timer) - break; - timeout.tv_sec = 20; - timeout.tv_nsec = 0; - - priv->timer = gf_timer_call_after (this->ctx, - timeout, - gf_quiesce_timeout, - (void *) this); - - if (priv->timer == NULL) { - gf_log (this->name, GF_LOG_ERROR, - "Cannot create timer"); - } - - break; + LOCK(&priv->lock); + { + priv->pass_through = _gf_false; + __gf_quiesce_start_timer(this, priv); + } + UNLOCK(&priv->lock); + break; default: - break; - } + break; + } - ret = default_notify (this, event, data); + ret = default_notify(this, event, data); out: - return ret; + return ret; } - struct xlator_fops fops = { - /* write/modifying fops */ - .mknod = quiesce_mknod, - .create = quiesce_create, - .truncate = quiesce_truncate, - .ftruncate = quiesce_ftruncate, - .setxattr = quiesce_setxattr, - .removexattr = quiesce_removexattr, - .symlink = quiesce_symlink, - .unlink = quiesce_unlink, - .link = quiesce_link, - .mkdir = quiesce_mkdir, - .rmdir = quiesce_rmdir, - .rename = quiesce_rename, - - /* The below calls are known to change state, hence - re-transmittion is not advised */ - .lk = quiesce_lk, - .inodelk = quiesce_inodelk, - .finodelk = quiesce_finodelk, - .entrylk = quiesce_entrylk, - .fentrylk = quiesce_fentrylk, - .xattrop = quiesce_xattrop, - .fxattrop = quiesce_fxattrop, - .setattr = quiesce_setattr, - .fsetattr = quiesce_fsetattr, - - /* Special case, re-transmittion is not harmful * - * as offset is properly sent from above layers */ - /* TODO: not re-transmitted as of now */ - .writev = quiesce_writev, - - /* re-transmittable fops */ - .lookup = quiesce_lookup, - .stat = quiesce_stat, - .fstat = quiesce_fstat, - .access = quiesce_access, - .readlink = quiesce_readlink, - .getxattr = quiesce_getxattr, - .open = quiesce_open, - .readv = quiesce_readv, - .flush = quiesce_flush, - .fsync = quiesce_fsync, - .statfs = quiesce_statfs, - .opendir = quiesce_opendir, - .readdir = quiesce_readdir, - .readdirp = quiesce_readdirp, - .fsyncdir = quiesce_fsyncdir, - + /* write/modifying fops */ + .mknod = quiesce_mknod, + .create = quiesce_create, + .truncate = quiesce_truncate, + .ftruncate = quiesce_ftruncate, + .setxattr = quiesce_setxattr, + .fsetxattr = quiesce_fsetxattr, + .removexattr = quiesce_removexattr, + .fremovexattr = quiesce_fremovexattr, + .symlink = quiesce_symlink, + .unlink = quiesce_unlink, + .link = quiesce_link, + .mkdir = quiesce_mkdir, + .rmdir = quiesce_rmdir, + .rename = quiesce_rename, + .fallocate = quiesce_fallocate, + + /* The below calls are known to change state, hence + re-transmittion is not advised */ + .lk = quiesce_lk, + .inodelk = quiesce_inodelk, + .finodelk = quiesce_finodelk, + .entrylk = quiesce_entrylk, + .fentrylk = quiesce_fentrylk, + .xattrop = quiesce_xattrop, + .fxattrop = quiesce_fxattrop, + .setattr = quiesce_setattr, + .fsetattr = quiesce_fsetattr, + + /* Special case, re-transmittion is not harmful * + * as offset is properly sent from above layers */ + /* TODO: not re-transmitted as of now */ + .writev = quiesce_writev, + + /* re-transmittable fops */ + .lookup = quiesce_lookup, + .stat = quiesce_stat, + .fstat = quiesce_fstat, + .access = quiesce_access, + .readlink = quiesce_readlink, + .getxattr = quiesce_getxattr, + .fgetxattr = quiesce_fgetxattr, + .open = quiesce_open, + .readv = quiesce_readv, + .flush = quiesce_flush, + .fsync = quiesce_fsync, + .statfs = quiesce_statfs, + .opendir = quiesce_opendir, + .readdir = quiesce_readdir, + .readdirp = quiesce_readdirp, + .fsyncdir = quiesce_fsyncdir, + .seek = quiesce_seek, }; struct xlator_dumpops dumpops; - struct xlator_cbks cbks; - struct volume_options options[] = { - { .key = {NULL} }, + { + .key = {"timeout"}, + .type = GF_OPTION_TYPE_TIME, + .default_value = "45", + .description = + "After 'timeout' seconds since the time 'quiesce' " + "option was set to \"!pass-through\", acknowledgements to file " + "operations are no longer quiesced and previously " + "quiesced acknowledgements are sent to the application", + .op_version = {GD_OP_VERSION_4_0_0}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + }, + {.key = {"failover-hosts"}, + .type = GF_OPTION_TYPE_INTERNET_ADDRESS_LIST, + .op_version = {GD_OP_VERSION_4_0_0}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .description = "It is a comma separated list of hostname/IP " + "addresses. It Specifies the list of hosts where " + "the gfproxy daemons are running, to which the " + "the thin clients can failover to."}, + {.key = {NULL}}, +}; + +xlator_api_t xlator_api = { + .init = init, + .fini = fini, + .notify = notify, + .reconfigure = reconfigure, + .mem_acct_init = mem_acct_init, + .op_version = {GD_OP_VERSION_3_12_0}, + .dumpops = &dumpops, + .fops = &fops, + .cbks = &cbks, + .options = options, + .identifier = "quiesce", + .category = GF_TECH_PREVIEW, }; diff --git a/xlators/features/quiesce/src/quiesce.h b/xlators/features/quiesce/src/quiesce.h index 878ed77e928..6ab2af40a56 100644 --- a/xlators/features/quiesce/src/quiesce.h +++ b/xlators/features/quiesce/src/quiesce.h @@ -12,40 +12,54 @@ #define __QUIESCE_H__ #include "quiesce-mem-types.h" -#include "xlator.h" -#include "timer.h" +#include "quiesce-messages.h" +#include <glusterfs/xlator.h> +#include <glusterfs/timer.h> #define GF_FOPS_EXPECTED_IN_PARALLEL 512 typedef struct { - gf_timer_t *timer; - gf_boolean_t pass_through; - gf_lock_t lock; - struct list_head req; - int queue_size; - pthread_t thr; - struct mem_pool *local_pool; + struct list_head list; + char *addr; + gf_boolean_t tried; /* indicates attempted connecting */ +} quiesce_failover_hosts_t; + +typedef struct { + gf_timer_t *timer; + gf_boolean_t pass_through; + gf_lock_t lock; + struct list_head req; + int queue_size; + pthread_t thr; + struct mem_pool *local_pool; + uint32_t timeout; + char *failover_hosts; + struct list_head failover_list; } quiesce_priv_t; typedef struct { - fd_t *fd; - char *name; - char *volname; - loc_t loc; - off_t size; - off_t offset; - mode_t mode; - int32_t flag; - struct iatt stbuf; - struct iovec *vector; - struct iobref *iobref; - dict_t *dict; - struct gf_flock flock; - entrylk_cmd cmd; - entrylk_type type; - gf_xattrop_flags_t xattrop_flags; - int32_t wbflags; - uint32_t io_flag; + fd_t *fd; + char *name; + char *volname; + loc_t loc; + off_t size; + off_t offset; + mode_t mode; + int32_t flag; + struct iatt stbuf; + struct iovec *vector; + struct iobref *iobref; + dict_t *dict; + struct gf_flock flock; + entrylk_cmd cmd; + entrylk_type type; + gf_xattrop_flags_t xattrop_flags; + int32_t wbflags; + uint32_t io_flag; + /* for fallocate */ + size_t len; + /* for lseek */ + gf_seek_what_t what; } quiesce_local_t; #endif diff --git a/xlators/features/quota/src/Makefile.am b/xlators/features/quota/src/Makefile.am index a6d2c86bf58..1c2dcef0ca3 100644 --- a/xlators/features/quota/src/Makefile.am +++ b/xlators/features/quota/src/Makefile.am @@ -1,25 +1,29 @@ +if WITH_SERVER xlator_LTLIBRARIES = quota.la quotad.la +endif xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features -quota_la_LDFLAGS = $(GF_XLATOR_DEFAULT_LDFLAGS) -quotad_la_LDFLAGS = -module -avoid-version -export-symbols $(top_srcdir)/xlators/features/quota/src/quotad.sym +quota_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) +quotad_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) quota_la_SOURCES = quota.c quota-enforcer-client.c -quota_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la +quota_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ + $(top_builddir)/rpc/xdr/src/libgfxdr.la \ + $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la quotad_la_SOURCES = quotad.c quotad-helpers.c quotad-aggregator.c -quotad_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la +quotad_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ + $(top_builddir)/rpc/xdr/src/libgfxdr.la \ + $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la noinst_HEADERS = quota-mem-types.h quota.h quotad-aggregator.h \ - quotad-helpers.h quota-messages.h + quotad-helpers.h quota-messages.h AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ - -I$(top_srcdir)/xlators/cluster/dht/src -I$(top_srcdir)/rpc/xdr/src/ \ - -I$(top_srcdir)/rpc/rpc-lib/src + -I$(top_srcdir)/rpc/xdr/src/ -I$(top_builddir)/rpc/xdr/src/ \ + -I$(top_srcdir)/rpc/rpc-lib/src \ + -I$(top_srcdir)/xlators/cluster/dht/src AM_CFLAGS = -Wall $(GF_CFLAGS) CLEANFILES = - -EXTRA_DIST = quotad.sym - diff --git a/xlators/features/quota/src/quota-enforcer-client.c b/xlators/features/quota/src/quota-enforcer-client.c index 6f36c081dbc..480d64ade27 100644 --- a/xlators/features/quota/src/quota-enforcer-client.c +++ b/xlators/features/quota/src/quota-enforcer-client.c @@ -32,460 +32,472 @@ #include <malloc.h> #endif -#ifdef HAVE_MALLOC_STATS -#ifdef DEBUG -#include <mcheck.h> -#endif -#endif - #include "quota.h" #include "quota-messages.h" extern struct rpc_clnt_program quota_enforcer_clnt; int32_t -quota_validate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *xdata, struct iatt *postparent); +quota_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, struct iatt *postparent); int -quota_enforcer_submit_request (void *req, call_frame_t *frame, - rpc_clnt_prog_t *prog, - int procnum, struct iobref *iobref, - xlator_t *this, fop_cbk_fn_t cbkfn, - xdrproc_t xdrproc) +quota_enforcer_submit_request(void *req, call_frame_t *frame, + rpc_clnt_prog_t *prog, int procnum, + struct iobref *iobref, xlator_t *this, + fop_cbk_fn_t cbkfn, xdrproc_t xdrproc) { - int ret = -1; - int count = 0; - struct iovec iov = {0, }; - struct iobuf *iobuf = NULL; - char new_iobref = 0; - ssize_t xdr_size = 0; - quota_priv_t *priv = NULL; - - GF_ASSERT (this); - - priv = this->private; - - if (req) { - xdr_size = xdr_sizeof (xdrproc, req); - iobuf = iobuf_get2 (this->ctx->iobuf_pool, xdr_size); - if (!iobuf) { - goto out; - } - - if (!iobref) { - iobref = iobref_new (); - if (!iobref) { - goto out; - } - - new_iobref = 1; - } - - iobref_add (iobref, iobuf); - - iov.iov_base = iobuf->ptr; - iov.iov_len = iobuf_size (iobuf); - - /* Create the xdr payload */ - ret = xdr_serialize_generic (iov, req, xdrproc); - if (ret == -1) { - goto out; - } - iov.iov_len = ret; - count = 1; + int ret = -1; + int count = 0; + struct iovec iov = { + 0, + }; + struct iobuf *iobuf = NULL; + char new_iobref = 0; + ssize_t xdr_size = 0; + quota_priv_t *priv = NULL; + + GF_ASSERT(this); + + priv = this->private; + + if (req) { + xdr_size = xdr_sizeof(xdrproc, req); + iobuf = iobuf_get2(this->ctx->iobuf_pool, xdr_size); + if (!iobuf) { + goto out; + } + + if (!iobref) { + iobref = iobref_new(); + if (!iobref) { + goto out; + } + + new_iobref = 1; } - /* Send the msg */ - ret = rpc_clnt_submit (priv->rpc_clnt, prog, procnum, cbkfn, - &iov, count, - NULL, 0, iobref, frame, NULL, 0, NULL, 0, NULL); - ret = 0; + iobref_add(iobref, iobuf); + + iov.iov_base = iobuf->ptr; + iov.iov_len = iobuf_size(iobuf); + + /* Create the xdr payload */ + ret = xdr_serialize_generic(iov, req, xdrproc); + if (ret == -1) { + goto out; + } + iov.iov_len = ret; + count = 1; + } + + /* Send the msg */ + ret = rpc_clnt_submit(priv->rpc_clnt, prog, procnum, cbkfn, &iov, count, + NULL, 0, iobref, frame, NULL, 0, NULL, 0, NULL); + ret = 0; out: - if (new_iobref) - iobref_unref (iobref); - if (iobuf) - iobuf_unref (iobuf); + if (new_iobref) + iobref_unref(iobref); + if (iobuf) + iobuf_unref(iobuf); - return ret; + return ret; } int -quota_enforcer_lookup_cbk (struct rpc_req *req, struct iovec *iov, - int count, void *myframe) +quota_enforcer_lookup_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) { - quota_local_t *local = NULL; - call_frame_t *frame = NULL; - int ret = 0; - gfs3_lookup_rsp rsp = {0,}; - struct iatt stbuf = {0,}; - struct iatt postparent = {0,}; - int op_errno = EINVAL; - dict_t *xdata = NULL; - inode_t *inode = NULL; - xlator_t *this = NULL; - quota_priv_t *priv = NULL; - struct timespec retry_delay = {0,}; - gf_timer_t *timer = NULL; - - this = THIS; - - frame = myframe; - local = frame->local; - inode = local->validate_loc.inode; - priv = this->private; - - if (-1 == req->rpc_status) { - rsp.op_ret = -1; - op_errno = ENOTCONN; - goto out; - } + quota_local_t *local = NULL; + call_frame_t *frame = NULL; + int ret = 0; + gfs3_lookup_rsp rsp = { + 0, + }; + struct iatt stbuf = { + 0, + }; + struct iatt postparent = { + 0, + }; + int op_errno = EINVAL; + dict_t *xdata = NULL; + inode_t *inode = NULL; + xlator_t *this = NULL; + quota_priv_t *priv = NULL; + struct timespec retry_delay = { + 0, + }; + gf_timer_t *timer = NULL; + + this = THIS; + + frame = myframe; + local = frame->local; + inode = local->validate_loc.inode; + priv = this->private; + + if (-1 == req->rpc_status) { + rsp.op_ret = -1; + op_errno = ENOTCONN; + goto out; + } + + ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gfs3_lookup_rsp); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, Q_MSG_XDR_DECODING_FAILED, + "XDR decoding failed"); + rsp.op_ret = -1; + op_errno = EINVAL; + goto out; + } - ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_lookup_rsp); - if (ret < 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - Q_MSG_XDR_DECODING_FAILED, - "XDR decoding failed"); - rsp.op_ret = -1; - op_errno = EINVAL; - goto out; - } + op_errno = gf_error_to_errno(rsp.op_errno); + gf_stat_to_iatt(&rsp.postparent, &postparent); - op_errno = gf_error_to_errno (rsp.op_errno); - gf_stat_to_iatt (&rsp.postparent, &postparent); + if (rsp.op_ret == -1) + goto out; - if (rsp.op_ret == -1) - goto out; + rsp.op_ret = -1; + gf_stat_to_iatt(&rsp.stat, &stbuf); + + GF_PROTOCOL_DICT_UNSERIALIZE(frame->this, xdata, (rsp.xdata.xdata_val), + (rsp.xdata.xdata_len), rsp.op_ret, op_errno, + out); + if ((!gf_uuid_is_null(inode->gfid)) && + (gf_uuid_compare(stbuf.ia_gfid, inode->gfid) != 0)) { + gf_msg_debug(frame->this->name, ESTALE, "gfid changed for %s", + local->validate_loc.path); rsp.op_ret = -1; - gf_stat_to_iatt (&rsp.stat, &stbuf); - - GF_PROTOCOL_DICT_UNSERIALIZE (frame->this, xdata, (rsp.xdata.xdata_val), - (rsp.xdata.xdata_len), rsp.op_ret, - op_errno, out); - - if ((!gf_uuid_is_null (inode->gfid)) - && (gf_uuid_compare (stbuf.ia_gfid, inode->gfid) != 0)) { - gf_msg_debug (frame->this->name, ESTALE, - "gfid changed for %s", local->validate_loc.path); - rsp.op_ret = -1; - op_errno = ESTALE; - goto out; - } + op_errno = ESTALE; + goto out; + } - rsp.op_ret = 0; + rsp.op_ret = 0; out: - rsp.op_errno = op_errno; - - /* We need to retry connecting to quotad on ENOTCONN error. - * Suppose if there are two volumes vol1 and vol2, - * and quota is enabled and limit is set on vol1. - * Now if IO is happening on vol1 and quota is enabled/disabled - * on vol2, quotad gets restarted and client will receive - * ENOTCONN in the IO path of vol1 - */ - if (rsp.op_ret == -1 && rsp.op_errno == ENOTCONN) { - if (local->quotad_conn_retry >= 12) { - priv->quotad_conn_status = 1; - gf_log (this->name, GF_LOG_WARNING, "failed to connect " - "to quotad after retry count %d)", - local->quotad_conn_retry); - } else { - local->quotad_conn_retry++; - } - - if (priv->quotad_conn_status == 0) { - /* retry connecting after 5secs for 12 retries - * (upto 60sec). - */ - gf_log (this->name, GF_LOG_DEBUG, "retry connecting to " - "quotad (retry count %d)", - local->quotad_conn_retry); - - retry_delay.tv_sec = 5; - retry_delay.tv_nsec = 0; - timer = gf_timer_call_after (this->ctx, retry_delay, - _quota_enforcer_lookup, - (void *) frame); - if (timer == NULL) { - gf_log (this->name, GF_LOG_WARNING, "failed to " - "set quota_enforcer_lookup with timer"); - } else { - goto clean; - } - } + rsp.op_errno = op_errno; + + /* We need to retry connecting to quotad on ENOTCONN error. + * Suppose if there are two volumes vol1 and vol2, + * and quota is enabled and limit is set on vol1. + * Now if IO is happening on vol1 and quota is enabled/disabled + * on vol2, quotad gets restarted and client will receive + * ENOTCONN in the IO path of vol1 + */ + if (rsp.op_ret == -1 && rsp.op_errno == ENOTCONN) { + if (local->quotad_conn_retry >= 12) { + priv->quotad_conn_status = 1; + gf_log(this->name, GF_LOG_WARNING, + "failed to connect " + "to quotad after retry count %d)", + local->quotad_conn_retry); } else { - priv->quotad_conn_status = 0; + local->quotad_conn_retry++; } - if (rsp.op_ret == -1) { - /* any error other than ENOENT */ - if (rsp.op_errno != ENOENT) - gf_msg (this->name, GF_LOG_WARNING, rsp.op_errno, - Q_MSG_LOOKUP_FAILED, - "Getting cluster-wide size of directory failed " - "(path: %s gfid:%s)", local->validate_loc.path, - loc_gfid_utoa (&local->validate_loc)); - else - gf_msg_trace (this->name, ENOENT, - "not found on remote node"); - - } else if (local->quotad_conn_retry) { - gf_log (this->name, GF_LOG_DEBUG, "connected to quotad after " - "retry count %d", local->quotad_conn_retry); + if (priv->quotad_conn_status == 0) { + /* retry connecting after 5secs for 12 retries + * (up to 60sec). + */ + gf_log(this->name, GF_LOG_DEBUG, + "retry connecting to " + "quotad (retry count %d)", + local->quotad_conn_retry); + + retry_delay.tv_sec = 5; + retry_delay.tv_nsec = 0; + timer = gf_timer_call_after(this->ctx, retry_delay, + _quota_enforcer_lookup, (void *)frame); + if (timer == NULL) { + gf_log(this->name, GF_LOG_WARNING, + "failed to " + "set quota_enforcer_lookup with timer"); + } else { + goto clean; + } } + } else { + priv->quotad_conn_status = 0; + } + + if (rsp.op_ret == -1) { + /* any error other than ENOENT */ + if (rsp.op_errno != ENOENT) + gf_msg( + this->name, GF_LOG_WARNING, rsp.op_errno, Q_MSG_LOOKUP_FAILED, + "Getting cluster-wide size of directory failed " + "(path: %s gfid:%s)", + local->validate_loc.path, loc_gfid_utoa(&local->validate_loc)); + else + gf_msg_trace(this->name, ENOENT, "not found on remote node"); - local->validate_cbk (frame, NULL, this, rsp.op_ret, rsp.op_errno, inode, - &stbuf, xdata, &postparent); + } else if (local->quotad_conn_retry) { + gf_log(this->name, GF_LOG_DEBUG, + "connected to quotad after " + "retry count %d", + local->quotad_conn_retry); + } + + local->validate_cbk(frame, NULL, this, rsp.op_ret, rsp.op_errno, inode, + &stbuf, xdata, &postparent); clean: - if (xdata) - dict_unref (xdata); + if (xdata) + dict_unref(xdata); - free (rsp.xdata.xdata_val); + free(rsp.xdata.xdata_val); - return 0; + return 0; } void -_quota_enforcer_lookup (void *data) +_quota_enforcer_lookup(void *data) { - quota_local_t *local = NULL; - gfs3_lookup_req req = {{0,},}; - int ret = 0; - int op_errno = ESTALE; - quota_priv_t *priv = NULL; - call_frame_t *frame = NULL; - loc_t *loc = NULL; - xlator_t *this = NULL; - char *dir_path = NULL; - - frame = data; - local = frame->local; - this = local->this; - loc = &local->validate_loc; - - priv = this->private; - - if (!(loc && loc->inode)) - goto unwind; - - if (!gf_uuid_is_null (loc->inode->gfid)) - memcpy (req.gfid, loc->inode->gfid, 16); - else - memcpy (req.gfid, loc->gfid, 16); - - if (local->validate_xdata) { - GF_PROTOCOL_DICT_SERIALIZE (this, local->validate_xdata, - (&req.xdata.xdata_val), - req.xdata.xdata_len, - op_errno, unwind); - } - - if (loc->name) - req.bname = (char *)loc->name; - else - req.bname = ""; - - if (loc->path) - dir_path = (char *)loc->path; - else - dir_path = ""; - - ret = quota_enforcer_submit_request (&req, frame, - priv->quota_enforcer, - GF_AGGREGATOR_LOOKUP, - NULL, this, - quota_enforcer_lookup_cbk, - (xdrproc_t)xdr_gfs3_lookup_req); - - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, 0, - Q_MSG_RPC_SUBMIT_FAILED, "Couldn't send the request to " - "fetch cluster wide size of directory (path:%s gfid:%s)" - , dir_path, req.gfid); - } - - GF_FREE (req.xdata.xdata_val); - - return; + quota_local_t *local = NULL; + gfs3_lookup_req req = { + { + 0, + }, + }; + int ret = 0; + int op_errno = ESTALE; + quota_priv_t *priv = NULL; + call_frame_t *frame = NULL; + loc_t *loc = NULL; + xlator_t *this = NULL; + char *dir_path = NULL; + + frame = data; + local = frame->local; + this = local->this; + loc = &local->validate_loc; + + priv = this->private; + + if (!(loc && loc->inode)) + goto unwind; + + if (!gf_uuid_is_null(loc->inode->gfid)) + memcpy(req.gfid, loc->inode->gfid, 16); + else + memcpy(req.gfid, loc->gfid, 16); + + if (local->validate_xdata) { + GF_PROTOCOL_DICT_SERIALIZE(this, local->validate_xdata, + (&req.xdata.xdata_val), req.xdata.xdata_len, + op_errno, unwind); + } + + if (loc->name) + req.bname = (char *)loc->name; + else + req.bname = ""; + + if (loc->path) + dir_path = (char *)loc->path; + else + dir_path = ""; + + ret = quota_enforcer_submit_request( + &req, frame, priv->quota_enforcer, GF_AGGREGATOR_LOOKUP, NULL, this, + quota_enforcer_lookup_cbk, (xdrproc_t)xdr_gfs3_lookup_req); + + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, Q_MSG_RPC_SUBMIT_FAILED, + "Couldn't send the request to " + "fetch cluster wide size of directory (path:%s gfid:%s)", + dir_path, req.gfid); + } + + GF_FREE(req.xdata.xdata_val); + + return; unwind: - local->validate_cbk (frame, NULL, this, -1, op_errno, NULL, NULL, NULL, - NULL); + local->validate_cbk(frame, NULL, this, -1, op_errno, NULL, NULL, NULL, + NULL); - GF_FREE (req.xdata.xdata_val); + GF_FREE(req.xdata.xdata_val); - return; + return; } int -quota_enforcer_lookup (call_frame_t *frame, xlator_t *this, dict_t *xdata, - fop_lookup_cbk_t validate_cbk) +quota_enforcer_lookup(call_frame_t *frame, xlator_t *this, dict_t *xdata, + fop_lookup_cbk_t validate_cbk) { - quota_local_t *local = NULL; + quota_local_t *local = NULL; - if (!frame || !this) - goto unwind; + if (!frame || !this) + goto unwind; - local = frame->local; - local->this = this; - local->validate_cbk = validate_cbk; - local->validate_xdata = dict_ref (xdata); + local = frame->local; + local->this = this; + local->validate_cbk = validate_cbk; + local->validate_xdata = dict_ref(xdata); - _quota_enforcer_lookup (frame); + _quota_enforcer_lookup(frame); - return 0; + return 0; unwind: - validate_cbk (frame, NULL, this, -1, ESTALE, NULL, NULL, NULL, NULL); + validate_cbk(frame, NULL, this, -1, ESTALE, NULL, NULL, NULL, NULL); - return 0; + return 0; } int -quota_enforcer_notify (struct rpc_clnt *rpc, void *mydata, - rpc_clnt_event_t event, void *data) +quota_enforcer_notify(struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data) { - xlator_t *this = NULL; - int ret = 0; - - this = mydata; - - switch (event) { - case RPC_CLNT_CONNECT: - { - gf_msg_trace (this->name, 0, "got RPC_CLNT_CONNECT"); - break; + xlator_t *this = NULL; + int ret = 0; + quota_priv_t *priv = NULL; + + this = mydata; + priv = this->private; + switch (event) { + case RPC_CLNT_CONNECT: { + pthread_mutex_lock(&priv->conn_mutex); + { + priv->conn_status = _gf_true; + } + pthread_mutex_unlock(&priv->conn_mutex); + gf_msg_trace(this->name, 0, "got RPC_CLNT_CONNECT"); + break; } - case RPC_CLNT_DISCONNECT: - { - gf_msg_trace (this->name, 0, "got RPC_CLNT_DISCONNECT"); - break; + case RPC_CLNT_DISCONNECT: { + pthread_mutex_lock(&priv->conn_mutex); + { + priv->conn_status = _gf_false; + pthread_cond_signal(&priv->conn_cond); + } + pthread_mutex_unlock(&priv->conn_mutex); + gf_msg_trace(this->name, 0, "got RPC_CLNT_DISCONNECT"); + break; } default: - gf_msg_trace (this->name, 0, - "got some other RPC event %d", event); - ret = 0; - break; - } + gf_msg_trace(this->name, 0, "got some other RPC event %d", event); + ret = 0; + break; + } - return ret; + return ret; } int -quota_enforcer_blocking_connect (rpc_clnt_t *rpc) +quota_enforcer_blocking_connect(rpc_clnt_t *rpc) { - dict_t *options = NULL; - int ret = -1; + dict_t *options = NULL; + int ret = -1; - options = dict_new (); - if (options == NULL) - goto out; + options = dict_new(); + if (options == NULL) + goto out; - ret = dict_set_str (options, "non-blocking-io", "no"); - if (ret) - goto out; + ret = dict_set_sizen_str_sizen(options, "non-blocking-io", "no"); + if (ret) + goto out; - rpc->conn.trans->reconfigure (rpc->conn.trans, options); + rpc->conn.trans->reconfigure(rpc->conn.trans, options); - rpc_clnt_start (rpc); + rpc_clnt_start(rpc); - ret = dict_set_str (options, "non-blocking-io", "yes"); - if (ret) - goto out; + ret = dict_set_sizen_str_sizen(options, "non-blocking-io", "yes"); + if (ret) + goto out; - rpc->conn.trans->reconfigure (rpc->conn.trans, options); + rpc->conn.trans->reconfigure(rpc->conn.trans, options); - ret = 0; + ret = 0; out: - if (options) - dict_unref (options); + if (options) + dict_unref(options); - return ret; + return ret; } -//Returns a started rpc_clnt. Creates a new rpc_clnt if quota_priv doesn't have -//one already +// Returns a started rpc_clnt. Creates a new rpc_clnt if quota_priv doesn't have +// one already struct rpc_clnt * -quota_enforcer_init (xlator_t *this, dict_t *options) +quota_enforcer_init(xlator_t *this, dict_t *options) { - struct rpc_clnt *rpc = NULL; - quota_priv_t *priv = NULL; - int ret = -1; + struct rpc_clnt *rpc = NULL; + quota_priv_t *priv = NULL; + int ret = -1; - priv = this->private; + priv = this->private; - LOCK (&priv->lock); - { - if (priv->rpc_clnt) { - ret = 0; - rpc = priv->rpc_clnt; - } + LOCK(&priv->lock); + { + if (priv->rpc_clnt) { + ret = 0; + rpc = priv->rpc_clnt; } - UNLOCK (&priv->lock); + } + UNLOCK(&priv->lock); - if (rpc) - goto out; + if (rpc) + goto out; - priv->quota_enforcer = "a_enforcer_clnt; + priv->quota_enforcer = "a_enforcer_clnt; - ret = dict_set_str (options, "transport.address-family", "unix"); - if (ret) - goto out; + ret = dict_set_sizen_str_sizen(options, "transport.address-family", "unix"); + if (ret) + goto out; - ret = dict_set_str (options, "transport-type", "socket"); - if (ret) - goto out; + ret = dict_set_sizen_str_sizen(options, "transport-type", "socket"); + if (ret) + goto out; - ret = dict_set_str (options, "transport.socket.connect-path", - "/var/run/gluster/quotad.socket"); - if (ret) - goto out; + ret = dict_set_sizen_str_sizen(options, "transport.socket.connect-path", + "/var/run/gluster/quotad.socket"); + if (ret) + goto out; - rpc = rpc_clnt_new (options, this, this->name, 16); - if (!rpc) { - ret = -1; - goto out; - } + rpc = rpc_clnt_new(options, this, this->name, 16); + if (!rpc) { + ret = -1; + goto out; + } - ret = rpc_clnt_register_notify (rpc, quota_enforcer_notify, this); - if (ret) { - gf_msg ("quota", GF_LOG_ERROR, 0, - Q_MSG_RPCCLNT_REGISTER_NOTIFY_FAILED, - "failed to register notify"); - goto out; - } + ret = rpc_clnt_register_notify(rpc, quota_enforcer_notify, this); + if (ret) { + gf_msg("quota", GF_LOG_ERROR, 0, Q_MSG_RPCCLNT_REGISTER_NOTIFY_FAILED, + "failed to register notify"); + goto out; + } - ret = quota_enforcer_blocking_connect (rpc); - if (ret) - goto out; + ret = quota_enforcer_blocking_connect(rpc); + if (ret) + goto out; - ret = 0; + ret = 0; out: - if (ret) { - if (rpc) - rpc_clnt_unref (rpc); - rpc = NULL; - } + if (ret) { + if (rpc) + rpc_clnt_unref(rpc); + rpc = NULL; + } - return rpc; - } + return rpc; +} struct rpc_clnt_procedure quota_enforcer_actors[GF_AGGREGATOR_MAXVALUE] = { - [GF_AGGREGATOR_NULL] = {"NULL", NULL}, - [GF_AGGREGATOR_LOOKUP] = {"LOOKUP", NULL}, + [GF_AGGREGATOR_NULL] = {"NULL", NULL}, + [GF_AGGREGATOR_LOOKUP] = {"LOOKUP", NULL}, }; struct rpc_clnt_program quota_enforcer_clnt = { - .progname = "Quota enforcer", - .prognum = GLUSTER_AGGREGATOR_PROGRAM, - .progver = GLUSTER_AGGREGATOR_VERSION, - .numproc = GF_AGGREGATOR_MAXVALUE, - .proctable = quota_enforcer_actors, + .progname = "Quota enforcer", + .prognum = GLUSTER_AGGREGATOR_PROGRAM, + .progver = GLUSTER_AGGREGATOR_VERSION, + .numproc = GF_AGGREGATOR_MAXVALUE, + .proctable = quota_enforcer_actors, }; diff --git a/xlators/features/quota/src/quota-mem-types.h b/xlators/features/quota/src/quota-mem-types.h index 97d9165681f..782a7de96bb 100644 --- a/xlators/features/quota/src/quota-mem-types.h +++ b/xlators/features/quota/src/quota-mem-types.h @@ -10,21 +10,21 @@ #ifndef __QUOTA_MEM_TYPES_H__ #define __QUOTA_MEM_TYPES_H__ -#include "mem-types.h" +#include <glusterfs/mem-types.h> enum gf_quota_mem_types_ { - gf_quota_mt_quota_priv_t = gf_common_mt_end + 1, - gf_quota_mt_quota_inode_ctx_t, - gf_quota_mt_loc_t, - gf_quota_mt_char, - gf_quota_mt_int64_t, - gf_quota_mt_int32_t, - gf_quota_mt_limits_t, - gf_quota_mt_quota_dentry_t, - gf_quota_mt_quota_limits_level_t, - gf_quota_mt_qd_vols_conf_t, - gf_quota_mt_aggregator_state_t, - gf_quota_mt_end + /* Those are used by QUOTA_ALLOC_OR_GOTO macro */ + gf_quota_mt_quota_priv_t = gf_common_mt_end + 1, + gf_quota_mt_quota_inode_ctx_t, + gf_quota_mt_loc_t, + gf_quota_mt_char, + gf_quota_mt_int64_t, + gf_quota_mt_int32_t, + gf_quota_mt_limits_t, + gf_quota_mt_quota_dentry_t, + gf_quota_mt_quota_limits_level_t, + gf_quota_mt_qd_vols_conf_t, + gf_quota_mt_aggregator_state_t, + gf_quota_mt_end }; #endif - diff --git a/xlators/features/quota/src/quota-messages.h b/xlators/features/quota/src/quota-messages.h index b01fe98e908..d434ed75e76 100644 --- a/xlators/features/quota/src/quota-messages.h +++ b/xlators/features/quota/src/quota-messages.h @@ -11,237 +11,29 @@ #ifndef _QUOTA_MESSAGES_H_ #define _QUOTA_MESSAGES_H_ -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif +#include <glusterfs/glfs-message-id.h> -#include "glfs-message-id.h" - -/*! \file quota-messages.h - * \brief Quota log-message IDs and their descriptions +/* To add new message IDs, append new identifiers at the end of the list. * - */ - -/* NOTE: Rules for message additions - * 1) Each instance of a message is _better_ left with a unique message ID, even - * if the message format is the same. Reasoning is that, if the message - * format needs to change in one instance, the other instances are not - * impacted or the new change does not change the ID of the instance being - * modified. - * 2) Addition of a message, - * - Should increment the GLFS_NUM_MESSAGES - * - Append to the list of messages defined, towards the end - * - Retain macro naming as glfs_msg_X (for redability across developers) - * NOTE: Rules for message format modifications - * 3) Check across the code if the message ID macro in question is reused - * anywhere. If reused then the modifications should ensure correctness - * everywhere, or needs a new message ID as (1) above was not adhered to. If - * not used anywhere, proceed with the required modification. - * NOTE: Rules for message deletion - * 4) Check (3) and if used anywhere else, then cannot be deleted. If not used - * anywhere, then can be deleted, but will leave a hole by design, as - * addition rules specify modification to the end of the list and not filling - * holes. - */ - -#define GLFS_QUOTA_BASE GLFS_MSGID_COMP_QUOTA -#define GLFS_NUM_MESSAGES 23 -#define GLFS_MSGID_END (GLFS_QUOTA_BASE + GLFS_NUM_MESSAGES + 1) -/* Messaged with message IDs */ -#define glfs_msg_start_x GLFS_QUOTA_BASE, "Invalid: Start of messages" -/*------------*/ - -/*! - * @messageid 120001 - * @diagnosis Quota enforcement has failed. - * @recommendedaction None - */ -#define Q_MSG_ENFORCEMENT_FAILED (GLFS_QUOTA_BASE + 1) - - -/*! -* @messageid 120002 -* @diagnosis system is out of memory -* @recommendedaction None -*/ -#define Q_MSG_ENOMEM (GLFS_QUOTA_BASE + 2) - -/*! - * @messageid 120003 - * @diagnosis Parent inode is not present in the inode table due to the - * inode table limits or the brick was restarted recently. - * @recommendedaction If it is a brick restart then perform a crawl on the - * file system or the specific directory in which the problem is observed. - * If inode table limit has been reached,please increase the limit of - * network.inode-lru-limit to a higher value(can be set through CLI). - */ -#define Q_MSG_PARENT_NULL (GLFS_QUOTA_BASE + 3) - -/*! - * @messageid 120004 - * @diagnosis This is to inform the admin that the user has crossed the soft limit - * of the quota configured on the directory and expected to cross the hard limit soon. - * @recommendedaction You may reconfigure your quota limits. - */ -#define Q_MSG_CROSSED_SOFT_LIMIT (GLFS_QUOTA_BASE + 4) - -/*! - * @messageid 120005 - * @diagnosis Quota translator failed to connect to quotad. This could be - * due to one or more of the following reasons, (1) Quotad is not running. - * (2) Brick process has run out of memory. - * @recommendedaction If quotad is not running, consider starting quotad. - * else check system memory consumption. - */ -#define Q_MSG_QUOTA_ENFORCER_RPC_INIT_FAILED (GLFS_QUOTA_BASE + 5) - -/*! - * @messageid 120006 - * @diagnosis Getting cluster-wide size failed - * @recommendedaction Restart quotad. Kill quotad by searching - * "ps ax | grep quotad" and use volume start force to restart it. - */ - -#define Q_MSG_REMOTE_OPERATION_FAILED (GLFS_QUOTA_BASE + 6) - -/*! - * @messageid 120007 - * @diagnosis Updation of global quota size failed. This may be due to quotad - * is down or lost connection with quotad. - * @recommendedaction Please restart quotad. - */ - -#define Q_MSG_FAILED_TO_SEND_FOP (GLFS_QUOTA_BASE + 7) - -/*! - * @messageid 120008 - * @diagnosis - * @recommendedaction Check volfile for correctness - */ - -#define Q_MSG_INVALID_VOLFILE (GLFS_QUOTA_BASE + 8) - -/*! - * @messageid 120009 - * @diagnosis - * @recommendedaction - */ - -#define Q_MSG_INODE_PARENT_NOT_FOUND (GLFS_QUOTA_BASE + 9) - -/*! - * @messageid 120010 - * @diagnosis - * @recommendedaction - */ - -#define Q_MSG_XDR_DECODE_ERROR (GLFS_QUOTA_BASE + 10) - -/*! - * @messageid 120011 - * @diagnosis - * @recommendedaction - */ - -#define Q_MSG_DICT_UNSERIALIZE_FAIL (GLFS_QUOTA_BASE + 11) - -/*! - * @messageid 120012 - * @diagnosis - * @recommendedaction - */ - -#define Q_MSG_DICT_SERIALIZE_FAIL (GLFS_QUOTA_BASE + 12) - -/*! - * @messageid 120013 - * @diagnosis - * @recommendedaction - */ - -#define Q_MSG_RPCSVC_INIT_FAILED (GLFS_QUOTA_BASE + 13) - -/*! - * @messageid 120014 - * @diagnosis - * @recommendedaction - */ - -#define Q_MSG_RPCSVC_LISTENER_CREATION_FAILED (GLFS_QUOTA_BASE + 14) - -/*! - * @messageid 120015 - * @diagnosis - * @recommendedaction - */ - -#define Q_MSG_RPCSVC_REGISTER_FAILED (GLFS_QUOTA_BASE + 15) - -/*! - * @messageid 120016 - * @diagnosis - * @recommendedaction - */ - -#define Q_MSG_XDR_DECODING_FAILED (GLFS_QUOTA_BASE + 16) -/*! - * @messageid 120017 - * @diagnosis - * @recommendedaction - */ - -#define Q_MSG_RPCCLNT_REGISTER_NOTIFY_FAILED (GLFS_QUOTA_BASE + 17) -/*! - * @messageid 120018 - * @diagnosis - * @recommendedaction Umount and mount the corresponing volume - */ - -#define Q_MSG_ANCESTRY_BUILD_FAILED (GLFS_QUOTA_BASE + 18) - -/*! - * @messageid 120019 - * @diagnosis - * @recommendedaction - */ - -#define Q_MSG_SIZE_KEY_MISSING (GLFS_QUOTA_BASE + 19) - -/*! - * @messageid 120020 - * @diagnosis - * @recommendedaction - */ - -#define Q_MSG_INODE_CTX_GET_FAILED (GLFS_QUOTA_BASE + 20) - -/*! - * @messageid 120021 - * @diagnosis - * @recommendedaction - */ - -#define Q_MSG_INODE_CTX_SET_FAILED (GLFS_QUOTA_BASE + 21) - -/*! - * @messageid 120022 - * @diagnosis - * @recommendedaction - */ - -#define Q_MSG_LOOKUP_FAILED (GLFS_QUOTA_BASE + 22) - -/*! - * @messageid 120023 - * @diagnosis - * @recommendedaction - */ - -#define Q_MSG_RPC_SUBMIT_FAILED (GLFS_QUOTA_BASE + 23) - -/*------------*/ -#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages" + * Never remove a message ID. If it's not used anymore, you can rename it or + * leave it as it is, but not delete it. This is to prevent reutilization of + * IDs by other messages. + * + * The component name must match one of the entries defined in + * glfs-message-id.h. + */ + +GLFS_MSGID(QUOTA, Q_MSG_ENFORCEMENT_FAILED, Q_MSG_ENOMEM, Q_MSG_PARENT_NULL, + Q_MSG_CROSSED_SOFT_LIMIT, Q_MSG_QUOTA_ENFORCER_RPC_INIT_FAILED, + Q_MSG_REMOTE_OPERATION_FAILED, Q_MSG_FAILED_TO_SEND_FOP, + Q_MSG_INVALID_VOLFILE, Q_MSG_INODE_PARENT_NOT_FOUND, + Q_MSG_XDR_DECODE_ERROR, Q_MSG_DICT_UNSERIALIZE_FAIL, + Q_MSG_DICT_SERIALIZE_FAIL, Q_MSG_RPCSVC_INIT_FAILED, + Q_MSG_RPCSVC_LISTENER_CREATION_FAILED, Q_MSG_RPCSVC_REGISTER_FAILED, + Q_MSG_XDR_DECODING_FAILED, Q_MSG_RPCCLNT_REGISTER_NOTIFY_FAILED, + Q_MSG_ANCESTRY_BUILD_FAILED, Q_MSG_SIZE_KEY_MISSING, + Q_MSG_INODE_CTX_GET_FAILED, Q_MSG_INODE_CTX_SET_FAILED, + Q_MSG_LOOKUP_FAILED, Q_MSG_RPC_SUBMIT_FAILED, + Q_MSG_ENFORCEMENT_SKIPPED, Q_MSG_INTERNAL_FOP_KEY_MISSING); #endif /* !_QUOTA_MESSAGES_H_ */ - diff --git a/xlators/features/quota/src/quota.c b/xlators/features/quota/src/quota.c index 53753559c5b..18df9ae6d19 100644 --- a/xlators/features/quota/src/quota.c +++ b/xlators/features/quota/src/quota.c @@ -7,4700 +7,4636 @@ later), or the GNU General Public License, version 2 (GPLv2), in all cases as published by the Free Software Foundation. */ -#include <fnmatch.h> #include "quota.h" -#include "common-utils.h" -#include "defaults.h" -#include "statedump.h" -#include "quota-common-utils.h" +#include <glusterfs/statedump.h> #include "quota-messages.h" +#include <glusterfs/events.h> struct volume_options options[]; static int32_t -__quota_init_inode_ctx (inode_t *inode, xlator_t *this, - quota_inode_ctx_t **context) +__quota_init_inode_ctx(inode_t *inode, xlator_t *this, + quota_inode_ctx_t **context) { - int32_t ret = -1; - quota_inode_ctx_t *ctx = NULL; + int32_t ret = -1; + quota_inode_ctx_t *ctx = NULL; - if (inode == NULL) { - goto out; - } + if (inode == NULL) { + goto out; + } - QUOTA_ALLOC_OR_GOTO (ctx, quota_inode_ctx_t, out); + QUOTA_ALLOC_OR_GOTO(ctx, quota_inode_ctx_t, out); - LOCK_INIT(&ctx->lock); + LOCK_INIT(&ctx->lock); - if (context != NULL) { - *context = ctx; - } + if (context != NULL) { + *context = ctx; + } - INIT_LIST_HEAD (&ctx->parents); + INIT_LIST_HEAD(&ctx->parents); - ret = __inode_ctx_put (inode, this, (uint64_t )(long)ctx); - if (ret == -1) { - gf_msg (this->name, GF_LOG_WARNING, 0, - Q_MSG_INODE_CTX_SET_FAILED, "cannot set quota context " - "in inode (gfid:%s)", uuid_utoa (inode->gfid)); - } + ret = __inode_ctx_put(inode, this, (uint64_t)(long)ctx); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, Q_MSG_INODE_CTX_SET_FAILED, + "cannot set quota context " + "in inode (gfid:%s)", + uuid_utoa(inode->gfid)); + GF_FREE(ctx); + } out: - return ret; + return ret; } - static int32_t -quota_inode_ctx_get (inode_t *inode, xlator_t *this, - quota_inode_ctx_t **ctx, char create_if_absent) +quota_inode_ctx_get(inode_t *inode, xlator_t *this, quota_inode_ctx_t **ctx, + char create_if_absent) { - int32_t ret = 0; - uint64_t ctx_int; + int32_t ret = 0; + uint64_t ctx_int; - LOCK (&inode->lock); - { - ret = __inode_ctx_get (inode, this, &ctx_int); + LOCK(&inode->lock); + { + ret = __inode_ctx_get(inode, this, &ctx_int); - if ((ret == 0) && (ctx != NULL)) { - *ctx = (quota_inode_ctx_t *) (unsigned long)ctx_int; - } else if (create_if_absent) { - ret = __quota_init_inode_ctx (inode, this, ctx); - } + if ((ret == 0) && (ctx != NULL)) { + *ctx = (quota_inode_ctx_t *)(unsigned long)ctx_int; + } else if (create_if_absent) { + ret = __quota_init_inode_ctx(inode, this, ctx); } - UNLOCK (&inode->lock); + } + UNLOCK(&inode->lock); - return ret; + return ret; } int -quota_loc_fill (loc_t *loc, inode_t *inode, inode_t *parent, char *path) +quota_loc_fill(loc_t *loc, inode_t *inode, inode_t *parent, char *path) { - int ret = -1; + int ret = -1; - if (!loc || (inode == NULL)) - return ret; + if (!loc || (inode == NULL)) + return ret; - if (inode) { - loc->inode = inode_ref (inode); - gf_uuid_copy (loc->gfid, inode->gfid); - } + if (inode) { + loc->inode = inode_ref(inode); + gf_uuid_copy(loc->gfid, inode->gfid); + } - if (parent) { - loc->parent = inode_ref (parent); - } + if (parent) { + loc->parent = inode_ref(parent); + } - if (path != NULL) { - loc->path = gf_strdup (path); + if (path != NULL) { + loc->path = gf_strdup(path); - loc->name = strrchr (loc->path, '/'); - if (loc->name) { - loc->name++; - } + loc->name = strrchr(loc->path, '/'); + if (loc->name) { + loc->name++; } + } - ret = 0; + ret = 0; - return ret; + return ret; } - int -quota_inode_loc_fill (inode_t *inode, loc_t *loc) +quota_inode_loc_fill(inode_t *inode, loc_t *loc) { - char *resolvedpath = NULL; - inode_t *parent = NULL; - int ret = -1; - xlator_t *this = NULL; + char *resolvedpath = NULL; + inode_t *parent = NULL; + int ret = -1; + xlator_t *this = NULL; - if ((!inode) || (!loc)) { - return ret; - } + if ((!inode) || (!loc)) { + return ret; + } - this = THIS; + this = THIS; - if ((inode) && __is_root_gfid (inode->gfid)) { - loc->parent = NULL; - goto ignore_parent; - } + if ((inode) && __is_root_gfid(inode->gfid)) { + loc->parent = NULL; + goto ignore_parent; + } - parent = inode_parent (inode, 0, NULL); - if (!parent) { - gf_msg_debug (this->name, 0, "cannot find parent for " - "inode (gfid:%s)", uuid_utoa (inode->gfid)); - } + parent = inode_parent(inode, 0, NULL); + if (!parent) { + gf_msg_debug(this->name, 0, + "cannot find parent for " + "inode (gfid:%s)", + uuid_utoa(inode->gfid)); + } ignore_parent: - ret = inode_path (inode, NULL, &resolvedpath); - if (ret < 0) { - gf_msg_debug (this->name, 0, "cannot construct path for " - "inode (gfid:%s)", uuid_utoa (inode->gfid)); - } - - ret = quota_loc_fill (loc, inode, parent, resolvedpath); - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM, - "cannot fill loc"); - goto err; - } + ret = inode_path(inode, NULL, &resolvedpath); + if (ret < 0) { + gf_msg_debug(this->name, 0, + "cannot construct path for " + "inode (gfid:%s)", + uuid_utoa(inode->gfid)); + } + + ret = quota_loc_fill(loc, inode, parent, resolvedpath); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM, + "cannot fill loc"); + goto err; + } err: - if (parent) { - inode_unref (parent); - } + if (parent) { + inode_unref(parent); + } - GF_FREE (resolvedpath); + GF_FREE(resolvedpath); - return ret; + return ret; } - int32_t -quota_local_cleanup (quota_local_t *local) +quota_local_cleanup(quota_local_t *local) { - if (local == NULL) { - goto out; - } + if (local == NULL) { + goto out; + } - loc_wipe (&local->loc); - loc_wipe (&local->newloc); - loc_wipe (&local->oldloc); - loc_wipe (&local->validate_loc); + loc_wipe(&local->loc); + loc_wipe(&local->newloc); + loc_wipe(&local->oldloc); + loc_wipe(&local->validate_loc); - inode_unref (local->inode); + inode_unref(local->inode); - if (local->xdata) - dict_unref (local->xdata); + if (local->xdata) + dict_unref(local->xdata); - if (local->validate_xdata) - dict_unref (local->validate_xdata); + if (local->validate_xdata) + dict_unref(local->validate_xdata); - if (local->stub) - call_stub_destroy (local->stub); + if (local->stub) + call_stub_destroy(local->stub); - LOCK_DESTROY (&local->lock); + LOCK_DESTROY(&local->lock); - mem_put (local); + mem_put(local); out: - return 0; + return 0; } - static quota_local_t * -quota_local_new () +quota_local_new() { - quota_local_t *local = NULL; - local = mem_get0 (THIS->local_pool); - if (local == NULL) - goto out; + quota_local_t *local = NULL; + local = mem_get0(THIS->local_pool); + if (local == NULL) + goto out; - LOCK_INIT (&local->lock); - local->space_available = -1; + LOCK_INIT(&local->lock); + local->space_available = -1; out: - return local; + return local; } - quota_dentry_t * -__quota_dentry_new (quota_inode_ctx_t *ctx, char *name, uuid_t par) +__quota_dentry_new(quota_inode_ctx_t *ctx, char *name, uuid_t par) { - quota_dentry_t *dentry = NULL; - GF_UNUSED int32_t ret = 0; + quota_dentry_t *dentry = NULL; + GF_UNUSED int32_t ret = 0; - QUOTA_ALLOC_OR_GOTO (dentry, quota_dentry_t, err); + QUOTA_ALLOC_OR_GOTO(dentry, quota_dentry_t, err); - INIT_LIST_HEAD (&dentry->next); + INIT_LIST_HEAD(&dentry->next); - dentry->name = gf_strdup (name); - if (dentry->name == NULL) { - GF_FREE (dentry); - dentry = NULL; - goto err; - } + dentry->name = gf_strdup(name); + if (dentry->name == NULL) { + GF_FREE(dentry); + dentry = NULL; + goto err; + } - gf_uuid_copy (dentry->par, par); + gf_uuid_copy(dentry->par, par); - if (ctx != NULL) - list_add_tail (&dentry->next, &ctx->parents); + if (ctx != NULL) + list_add_tail(&dentry->next, &ctx->parents); err: - return dentry; + return dentry; } - void -__quota_dentry_free (quota_dentry_t *dentry) +__quota_dentry_free(quota_dentry_t *dentry) { - if (dentry == NULL) { - goto out; - } + if (dentry == NULL) { + goto out; + } - list_del_init (&dentry->next); + list_del_init(&dentry->next); - GF_FREE (dentry->name); - GF_FREE (dentry); + GF_FREE(dentry->name); + GF_FREE(dentry); out: - return; + return; } void -__quota_dentry_del (quota_inode_ctx_t *ctx, const char *name, uuid_t par) +__quota_dentry_del(quota_inode_ctx_t *ctx, const char *name, uuid_t par) { - quota_dentry_t *dentry = NULL; - quota_dentry_t *tmp = NULL; - - list_for_each_entry_safe (dentry, tmp, &ctx->parents, next) { - if ((strcmp (dentry->name, name) == 0) && - (gf_uuid_compare (dentry->par, par) == 0)) { - __quota_dentry_free (dentry); - break; - } + quota_dentry_t *dentry = NULL; + quota_dentry_t *tmp = NULL; + + list_for_each_entry_safe(dentry, tmp, &ctx->parents, next) + { + if ((strcmp(dentry->name, name) == 0) && + (gf_uuid_compare(dentry->par, par) == 0)) { + __quota_dentry_free(dentry); + break; } + } } void -quota_dentry_del (quota_inode_ctx_t *ctx, const char *name, uuid_t par) +quota_dentry_del(quota_inode_ctx_t *ctx, const char *name, uuid_t par) { - LOCK (&ctx->lock); - { - __quota_dentry_del (ctx, name, par); - } - UNLOCK (&ctx->lock); + LOCK(&ctx->lock); + { + __quota_dentry_del(ctx, name, par); + } + UNLOCK(&ctx->lock); } -static inode_t* -__quota_inode_parent (inode_t *inode, uuid_t pargfid, const char *name) +static inode_t * +__quota_inode_parent(inode_t *inode, uuid_t pargfid, const char *name) { - inode_t *parent = NULL; + inode_t *parent = NULL; - parent = inode_parent (inode, pargfid, name); - inode_unref (inode); - return parent; + parent = inode_parent(inode, pargfid, name); + inode_unref(inode); + return parent; } -static inode_t* -quota_inode_parent (inode_t *inode, uuid_t pargfid, const char *name) +static inode_t * +quota_inode_parent(inode_t *inode, uuid_t pargfid, const char *name) { - inode_t *parent = NULL; + inode_t *parent = NULL; - parent = __quota_inode_parent (inode, pargfid, name); - if (!parent) - gf_msg_callingfn (THIS->name, GF_LOG_ERROR, 0, - Q_MSG_PARENT_NULL, - "Failed to find " - "ancestor for inode (%s)", - uuid_utoa(inode->gfid)); + parent = __quota_inode_parent(inode, pargfid, name); + if (!parent) + gf_msg_callingfn(THIS->name, GF_LOG_ERROR, 0, Q_MSG_PARENT_NULL, + "Failed to find " + "ancestor for inode (%s)", + uuid_utoa(inode->gfid)); - return parent; + return parent; } int32_t -quota_inode_depth (inode_t *inode) +quota_inode_depth(inode_t *inode) { - int depth = 0; - inode_t *cur_inode = NULL; - - cur_inode = inode_ref (inode); - while (cur_inode && !__is_root_gfid (cur_inode->gfid)) { - depth++; - cur_inode = quota_inode_parent (cur_inode, 0 , NULL); - if (!cur_inode) - depth = -1; - } + int depth = 0; + inode_t *cur_inode = NULL; + + cur_inode = inode_ref(inode); + while (cur_inode && !__is_root_gfid(cur_inode->gfid)) { + depth++; + cur_inode = quota_inode_parent(cur_inode, 0, NULL); + if (!cur_inode) + depth = -1; + } - if (cur_inode) - inode_unref (cur_inode); + if (cur_inode) + inode_unref(cur_inode); - return depth; + return depth; } -int32_t quota_find_common_ancestor (inode_t *inode1, inode_t *inode2, - uuid_t *common_ancestor) +int32_t +quota_find_common_ancestor(inode_t *inode1, inode_t *inode2, + uuid_t *common_ancestor) { - int32_t depth1 = 0; - int32_t depth2 = 0; - int32_t ret = -1; - inode_t *cur_inode1 = NULL; - inode_t *cur_inode2 = NULL; - - depth1 = quota_inode_depth (inode1); - if (depth1 < 0) - goto out; - - depth2 = quota_inode_depth (inode2); - if (depth2 < 0) - goto out; - - cur_inode1 = inode_ref (inode1); - cur_inode2 = inode_ref (inode2); - - while (cur_inode1 && depth1 > depth2) { - cur_inode1 = quota_inode_parent (cur_inode1, 0 , NULL); - depth1--; - } - - while (cur_inode2 && depth2 > depth1) { - cur_inode2 = quota_inode_parent (cur_inode2, 0 , NULL); - depth2--; - } - - while (depth1 && cur_inode1 && cur_inode2 && cur_inode1 != cur_inode2) { - cur_inode1 = quota_inode_parent (cur_inode1, 0 , NULL); - cur_inode2 = quota_inode_parent (cur_inode2, 0 , NULL); - depth1--; - } - - if (cur_inode1 && cur_inode2) { - gf_uuid_copy (*common_ancestor, cur_inode1->gfid); - ret = 0; - } + int32_t depth1 = 0; + int32_t depth2 = 0; + int32_t ret = -1; + inode_t *cur_inode1 = NULL; + inode_t *cur_inode2 = NULL; + + depth1 = quota_inode_depth(inode1); + if (depth1 < 0) + goto out; + + depth2 = quota_inode_depth(inode2); + if (depth2 < 0) + goto out; + + cur_inode1 = inode_ref(inode1); + cur_inode2 = inode_ref(inode2); + + while (cur_inode1 && depth1 > depth2) { + cur_inode1 = quota_inode_parent(cur_inode1, 0, NULL); + depth1--; + } + + while (cur_inode2 && depth2 > depth1) { + cur_inode2 = quota_inode_parent(cur_inode2, 0, NULL); + depth2--; + } + + while (depth1 && cur_inode1 && cur_inode2 && cur_inode1 != cur_inode2) { + cur_inode1 = quota_inode_parent(cur_inode1, 0, NULL); + cur_inode2 = quota_inode_parent(cur_inode2, 0, NULL); + depth1--; + } + + if (cur_inode1 && cur_inode2) { + gf_uuid_copy(*common_ancestor, cur_inode1->gfid); + ret = 0; + } out: - if (cur_inode1) - inode_unref (cur_inode1); + if (cur_inode1) + inode_unref(cur_inode1); - if (cur_inode2) - inode_unref (cur_inode2); + if (cur_inode2) + inode_unref(cur_inode2); - return ret; - } + return ret; +} void -check_ancestory_continue (struct list_head *parents, inode_t *inode, - int32_t op_ret, int32_t op_errno, void *data) +check_ancestory_continue(struct list_head *parents, inode_t *inode, + int32_t op_ret, int32_t op_errno, void *data) { - call_frame_t *frame = NULL; - quota_local_t *local = NULL; - uint32_t link_count = 0; - - frame = data; - local = frame->local; - - if (parents && list_empty (parents)) { - gf_msg (THIS->name, GF_LOG_WARNING, EIO, - Q_MSG_ANCESTRY_BUILD_FAILED, - "Couldn't build ancestry for inode (gfid:%s). " - "Without knowing ancestors till root, quota " - "cannot be enforced. " - "Hence, failing fop with EIO", - uuid_utoa (inode->gfid)); - op_errno = EIO; - op_ret = -1; - } - - LOCK (&local->lock); - { - link_count = --local->link_count; - if (op_ret < 0) { - local->op_ret = op_ret; - local->op_errno = op_errno; - } + call_frame_t *frame = NULL; + quota_local_t *local = NULL; + uint32_t link_count = 0; + + frame = data; + local = frame->local; + + if (parents && list_empty(parents)) { + gf_msg(THIS->name, GF_LOG_WARNING, EIO, Q_MSG_ANCESTRY_BUILD_FAILED, + "Couldn't build ancestry for inode (gfid:%s). " + "Without knowing ancestors till root, quota " + "cannot be enforced. " + "Hence, failing fop with EIO", + uuid_utoa(inode->gfid)); + op_errno = EIO; + op_ret = -1; + } + + LOCK(&local->lock); + { + link_count = --local->link_count; + if (op_ret < 0) { + local->op_ret = op_ret; + local->op_errno = op_errno; } - UNLOCK (&local->lock); + } + UNLOCK(&local->lock); - if (link_count == 0) - local->fop_continue_cbk (frame); + if (link_count == 0) + local->fop_continue_cbk(frame); } void -check_ancestory (call_frame_t *frame, inode_t *inode) +check_ancestory(call_frame_t *frame, inode_t *inode) { - inode_t *cur_inode = NULL; - inode_t *parent = NULL; - - cur_inode = inode_ref (inode); - while (cur_inode && !__is_root_gfid (cur_inode->gfid)) { - parent = inode_parent (cur_inode, 0, NULL); - if (!parent) { - quota_build_ancestry (cur_inode, - check_ancestory_continue, frame); - inode_unref (cur_inode); - return; - } - inode_unref (cur_inode); - cur_inode = parent; - } + inode_t *cur_inode = NULL; + inode_t *parent = NULL; - if (cur_inode) { - inode_unref (cur_inode); - check_ancestory_continue (NULL, NULL, 0, 0, frame); - } else { - check_ancestory_continue (NULL, NULL, -1, ESTALE, frame); - } + cur_inode = inode_ref(inode); + while (cur_inode && !__is_root_gfid(cur_inode->gfid)) { + parent = inode_parent(cur_inode, 0, NULL); + if (!parent) { + quota_build_ancestry(cur_inode, check_ancestory_continue, frame); + inode_unref(cur_inode); + return; + } + inode_unref(cur_inode); + cur_inode = parent; + } + + if (cur_inode) { + inode_unref(cur_inode); + check_ancestory_continue(NULL, NULL, 0, 0, frame); + } else { + check_ancestory_continue(NULL, NULL, -1, ESTALE, frame); + } } void -check_ancestory_2_cbk (struct list_head *parents, inode_t *inode, - int32_t op_ret, int32_t op_errno, void *data) +check_ancestory_2_cbk(struct list_head *parents, inode_t *inode, int32_t op_ret, + int32_t op_errno, void *data) { - inode_t *this_inode = NULL; - quota_inode_ctx_t *ctx = NULL; + inode_t *this_inode = NULL; + quota_inode_ctx_t *ctx = NULL; - this_inode = data; + this_inode = data; - if (op_ret < 0) - goto out; + if (op_ret < 0) + goto out; - if (parents == NULL || list_empty (parents)) { - gf_msg (THIS->name, GF_LOG_WARNING, 0, - Q_MSG_ENFORCEMENT_FAILED, - "Couldn't build ancestry for inode (gfid:%s). " - "Without knowing ancestors till root, quota " - "cannot be enforced.", - uuid_utoa (this_inode->gfid)); - goto out; - } + if (parents == NULL || list_empty(parents)) { + gf_msg(THIS->name, GF_LOG_WARNING, 0, Q_MSG_ENFORCEMENT_FAILED, + "Couldn't build ancestry for inode (gfid:%s). " + "Without knowing ancestors till root, quota " + "cannot be enforced.", + uuid_utoa(this_inode->gfid)); + goto out; + } - quota_inode_ctx_get (this_inode, THIS, &ctx, 0); - if (ctx) - ctx->ancestry_built = _gf_true; + quota_inode_ctx_get(this_inode, THIS, &ctx, 0); + if (ctx) + ctx->ancestry_built = _gf_true; out: - inode_unref (this_inode); + inode_unref(this_inode); } void -check_ancestory_2 (xlator_t *this, quota_local_t *local, inode_t *inode) +check_ancestory_2(xlator_t *this, quota_local_t *local, inode_t *inode) { - inode_t *cur_inode = NULL; - inode_t *parent = NULL; - quota_inode_ctx_t *ctx = NULL; - char *name = NULL; - uuid_t pgfid = {0}; - - name = (char *) local->loc.name; - if (local->loc.parent) { - gf_uuid_copy (pgfid, local->loc.parent->gfid); - parent = local->loc.parent; - } - - cur_inode = inode_ref (inode); - while (cur_inode && !__is_root_gfid (cur_inode->gfid)) { - quota_inode_ctx_get (cur_inode, this, &ctx, 0); - /* build ancestry is required only on the first lookup, - * so stop crawling when the inode_ctx is set for an inode - */ - if (ctx && ctx->ancestry_built) - goto setctx; - - parent = inode_parent (cur_inode, pgfid, name); - if (!parent) { - quota_build_ancestry (cur_inode, check_ancestory_2_cbk, - inode_ref (inode)); - goto out; - } + inode_t *cur_inode = NULL; + inode_t *parent = NULL; + quota_inode_ctx_t *ctx = NULL; + char *name = NULL; + uuid_t pgfid = {0}; + + name = (char *)local->loc.name; + if (local->loc.parent) { + gf_uuid_copy(pgfid, local->loc.parent->gfid); + } + + cur_inode = inode_ref(inode); + while (cur_inode && !__is_root_gfid(cur_inode->gfid)) { + quota_inode_ctx_get(cur_inode, this, &ctx, 0); + /* build ancestry is required only on the first lookup, + * so stop crawling when the inode_ctx is set for an inode + */ + if (ctx && ctx->ancestry_built) + goto setctx; - if (name != NULL) { - name = NULL; - gf_uuid_clear (pgfid); - } + parent = inode_parent(cur_inode, pgfid, name); + if (!parent) { + quota_build_ancestry(cur_inode, check_ancestory_2_cbk, + inode_ref(inode)); + goto out; + } - inode_unref (cur_inode); - cur_inode = parent; + if (name != NULL) { + name = NULL; + gf_uuid_clear(pgfid); } + inode_unref(cur_inode); + cur_inode = parent; + } + setctx: - if (cur_inode && cur_inode != inode) { - quota_inode_ctx_get (inode, this, &ctx, 0); - if (ctx) - ctx->ancestry_built = _gf_true; - } + if (cur_inode && cur_inode != inode) { + quota_inode_ctx_get(inode, this, &ctx, 0); + if (ctx) + ctx->ancestry_built = _gf_true; + } out: - if (cur_inode) - inode_unref (cur_inode); + if (cur_inode) + inode_unref(cur_inode); } static void -quota_link_count_decrement (call_frame_t *frame) +quota_link_count_decrement(call_frame_t *frame) { - call_frame_t *tmpframe = NULL; - quota_local_t *local = NULL; - call_stub_t *stub = NULL; - int link_count = -1; - - local = frame->local; - if (local && local->par_frame) { - local = local->par_frame->local; - tmpframe = frame; - } - - if (local == NULL) - goto out; - - LOCK (&local->lock); - { - link_count = --local->link_count; - if (link_count == 0) { - stub = local->stub; - local->stub = NULL; - } - } - UNLOCK (&local->lock); - - if (stub != NULL) { - call_resume (stub); - } + call_frame_t *tmpframe = NULL; + quota_local_t *local = NULL; + call_stub_t *stub = NULL; + int link_count = -1; + + local = frame->local; + if (local && local->par_frame) { + local = local->par_frame->local; + tmpframe = frame; + } + + if (local == NULL) + goto out; + + LOCK(&local->lock); + { + link_count = --local->link_count; + if (link_count == 0) { + stub = local->stub; + local->stub = NULL; + } + } + UNLOCK(&local->lock); + + if (stub != NULL) { + call_resume(stub); + } out: - if (tmpframe) { - local = tmpframe->local; - tmpframe->local = NULL; + if (tmpframe) { + local = tmpframe->local; + tmpframe->local = NULL; - STACK_DESTROY (frame->root); - if (local) - quota_local_cleanup (local); - } + STACK_DESTROY(frame->root); + if (local) + quota_local_cleanup(local); + } - return; + return; } static void -quota_handle_validate_error (call_frame_t *frame, int32_t op_ret, - int32_t op_errno) +quota_handle_validate_error(call_frame_t *frame, int32_t op_ret, + int32_t op_errno) { - quota_local_t *local; + quota_local_t *local; - local = frame->local; - if (local && local->par_frame) - local = local->par_frame->local; + local = frame->local; + if (local && local->par_frame) + local = local->par_frame->local; - if (local == NULL) - goto out; + if (local == NULL) + goto out; - LOCK (&local->lock); + if (op_ret < 0) { + LOCK(&local->lock); { - if (op_ret < 0) { - local->op_ret = op_ret; - local->op_errno = op_errno; - } + local->op_ret = op_ret; + local->op_errno = op_errno; } - UNLOCK (&local->lock); - - /* we abort checking limits on this path to root */ - quota_link_count_decrement (frame); + UNLOCK(&local->lock); + } + /* we abort checking limits on this path to root */ + quota_link_count_decrement(frame); out: - return; + return; } int32_t -quota_validate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *xdata, struct iatt *postparent) +quota_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, struct iatt *postparent) { - quota_local_t *local = NULL; - int32_t ret = 0; - quota_inode_ctx_t *ctx = NULL; - int64_t *object_size = 0; - uint64_t value = 0; - data_t *data = NULL; - quota_meta_t size = {0,}; - - local = frame->local; - - if (op_ret < 0) { - goto unwind; - } - - GF_ASSERT (local); - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO_WITH_ERROR ("quota", this, unwind, op_errno, - EINVAL); - GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, xdata, unwind, op_errno, - EINVAL); - - ret = inode_ctx_get (local->validate_loc.inode, this, &value); - - ctx = (quota_inode_ctx_t *)(unsigned long)value; - if ((ret == -1) || (ctx == NULL)) { - gf_msg (this->name, GF_LOG_WARNING, EINVAL, - Q_MSG_INODE_CTX_GET_FAILED, "quota context is" - " not present in inode (gfid:%s)", - uuid_utoa (local->validate_loc.inode->gfid)); - op_errno = EINVAL; - goto unwind; - } - - ret = quota_dict_get_meta (xdata, QUOTA_SIZE_KEY, &size); - if (ret == -1) { - gf_msg (this->name, GF_LOG_WARNING, EINVAL, - Q_MSG_SIZE_KEY_MISSING, "quota size key not present " - "in dict"); - op_errno = EINVAL; - } - - local->just_validated = 1; /* so that we don't go into infinite - * loop of validation and checking - * limit when timeout is zero. - */ - LOCK (&ctx->lock); - { - ctx->size = size.size; - ctx->file_count = size.file_count; - ctx->dir_count = size.dir_count; - gettimeofday (&ctx->tv, NULL); - } - UNLOCK (&ctx->lock); - - quota_check_limit (frame, local->validate_loc.inode, this); - return 0; + quota_local_t *local = NULL; + int32_t ret = 0; + quota_inode_ctx_t *ctx = NULL; + uint64_t value = 0; + quota_meta_t size = { + 0, + }; + + local = frame->local; + + if (op_ret < 0) { + goto unwind; + } + + GF_ASSERT(local); + GF_ASSERT(frame); + GF_VALIDATE_OR_GOTO_WITH_ERROR("quota", this, unwind, op_errno, EINVAL); + GF_VALIDATE_OR_GOTO_WITH_ERROR(this->name, xdata, unwind, op_errno, EINVAL); + + ret = inode_ctx_get(local->validate_loc.inode, this, &value); + + ctx = (quota_inode_ctx_t *)(unsigned long)value; + if ((ret == -1) || (ctx == NULL)) { + gf_msg(this->name, GF_LOG_WARNING, EINVAL, Q_MSG_INODE_CTX_GET_FAILED, + "quota context is" + " not present in inode (gfid:%s)", + uuid_utoa(local->validate_loc.inode->gfid)); + op_errno = EINVAL; + goto unwind; + } + + ret = quota_dict_get_meta(xdata, QUOTA_SIZE_KEY, SLEN(QUOTA_SIZE_KEY), + &size); + if (ret == -1) { + gf_msg(this->name, GF_LOG_WARNING, EINVAL, Q_MSG_SIZE_KEY_MISSING, + "quota size key not present " + "in dict"); + op_errno = EINVAL; + } + + local->just_validated = 1; /* so that we don't go into infinite + * loop of validation and checking + * limit when timeout is zero. + */ + LOCK(&ctx->lock); + { + ctx->size = size.size; + ctx->validate_time = gf_time(); + ctx->file_count = size.file_count; + ctx->dir_count = size.dir_count; + } + UNLOCK(&ctx->lock); + + quota_check_limit(frame, local->validate_loc.inode, this); + return 0; unwind: - quota_handle_validate_error (frame, op_ret, op_errno); - return 0; + quota_handle_validate_error(frame, op_ret, op_errno); + return 0; } - -static uint64_t -quota_time_elapsed (struct timeval *now, struct timeval *then) +static inline gf_boolean_t +quota_timeout(time_t t, uint32_t timeout) { - return (now->tv_sec - then->tv_sec); -} - - -int32_t -quota_timeout (struct timeval *tv, int32_t timeout) -{ - struct timeval now = {0,}; - int32_t timed_out = 0; - - gettimeofday (&now, NULL); - - if (quota_time_elapsed (&now, tv) >= timeout) { - timed_out = 1; - } - - return timed_out; + return (gf_time() - t) >= timeout; } /* Return: 1 if new entry added * 0 no entry added + * -1 on errors */ static int32_t -quota_add_parent (struct list_head *list, char *name, uuid_t pgfid) +quota_add_parent(struct list_head *list, char *name, uuid_t pgfid) { - quota_dentry_t *entry = NULL; - gf_boolean_t found = _gf_false; + quota_dentry_t *entry = NULL; + gf_boolean_t found = _gf_false; + int ret = 0; - if (list == NULL) { + if (!list_empty(list)) { + list_for_each_entry(entry, list, next) + { + if (gf_uuid_compare(pgfid, entry->par) == 0) { + found = _gf_true; goto out; + } } + } - list_for_each_entry (entry, list, next) { - if (gf_uuid_compare (pgfid, entry->par) == 0) { - found = _gf_true; - goto out; - } - } - - entry = __quota_dentry_new (NULL, name, pgfid); - list_add_tail (&entry->next, list); + entry = __quota_dentry_new(NULL, name, pgfid); + if (entry) + list_add_tail(&entry->next, list); + else + ret = -1; out: - if (found) - return 0; - else - return 1; - + if (found) + return 0; + else if (ret == 0) + return 1; + else + return -1; } /* This function iterates the parent list in inode * context and add unique parent to the list - * Returns number of dentry added to the list + * Returns number of dentry added to the list, or -1 on errors */ static int32_t -quota_add_parents_from_ctx (quota_inode_ctx_t *ctx, struct list_head *list) +quota_add_parents_from_ctx(quota_inode_ctx_t *ctx, struct list_head *list) { - int ret = 0; - quota_dentry_t *dentry = NULL; - int32_t count = 0; + int ret = 0; + quota_dentry_t *dentry = NULL; + int32_t count = 0; - if (ctx == NULL || list == NULL) - goto out; + if (ctx == NULL || list == NULL) + goto out; - LOCK (&ctx->lock); + LOCK(&ctx->lock); + { + list_for_each_entry(dentry, &ctx->parents, next) { - list_for_each_entry (dentry, &ctx->parents, next) { - ret = quota_add_parent (list, dentry->name, - dentry->par); - - if (ret == 1) - count++; - } + ret = quota_add_parent(list, dentry->name, dentry->par); + if (ret == 1) + count++; + else if (ret == -1) + break; } - UNLOCK (&ctx->lock); + } + UNLOCK(&ctx->lock); out: - return count; + return (ret == -1) ? -1 : count; } int32_t -quota_build_ancestry_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - gf_dirent_t *entries, dict_t *xdata) +quota_build_ancestry_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, + dict_t *xdata) { - inode_t *parent = NULL, *tmp_parent = NULL; - gf_dirent_t *entry = NULL; - loc_t loc = {0, }; - quota_dentry_t *dentry = NULL, *tmp = NULL; - quota_inode_ctx_t *ctx = NULL; - struct list_head parents = {0, }; - quota_local_t *local = NULL; - - INIT_LIST_HEAD (&parents); - - local = frame->local; - frame->local = NULL; - - if (op_ret < 0) - goto err; - - parent = inode_parent (local->loc.inode, 0, NULL); - if (parent == NULL) { - gf_msg (this->name, GF_LOG_WARNING, EINVAL, - Q_MSG_PARENT_NULL, "parent is NULL"); - op_errno = EINVAL; - goto err; - } - - if ((op_ret > 0) && (entries != NULL)) { - list_for_each_entry (entry, &entries->list, list) { - if (__is_root_gfid (entry->inode->gfid)) { - /* The list contains a sub-list for each - * possible path to the target inode. Each - * sub-list starts with the root entry of the - * tree and is followed by the child entries - * for a particular path to the target entry. - * The root entry is an implied sub-list - * delimiter, as it denotes we have started - * processing a new path. Reset the parent - * pointer and continue - */ - - tmp_parent = NULL; - } - - gf_uuid_copy (loc.gfid, entry->d_stat.ia_gfid); - - loc.inode = inode_ref (entry->inode); - loc.parent = inode_ref (tmp_parent); - loc.name = entry->d_name; - - quota_fill_inodectx (this, entry->inode, entry->dict, - &loc, &entry->d_stat, &op_errno); - - tmp_parent = entry->inode; + inode_t *parent = NULL; + inode_t *tmp_parent = NULL; + inode_t *linked_inode = NULL; + inode_t *tmp_inode = NULL; + gf_dirent_t *entry = NULL; + loc_t loc = { + 0, + }; + quota_dentry_t *dentry = NULL; + quota_dentry_t *tmp = NULL; + quota_inode_ctx_t *ctx = NULL; + struct list_head parents; + quota_local_t *local = NULL; + int ret; + + INIT_LIST_HEAD(&parents); + + local = frame->local; + frame->local = NULL; + + if (op_ret < 0) + goto err; + + if ((op_ret > 0) && (entries != NULL)) { + list_for_each_entry(entry, &entries->list, list) + { + if (__is_root_gfid(entry->inode->gfid)) { + /* The list contains a sub-list for each + * possible path to the target inode. Each + * sub-list starts with the root entry of the + * tree and is followed by the child entries + * for a particular path to the target entry. + * The root entry is an implied sub-list + * delimiter, as it denotes we have started + * processing a new path. Reset the parent + * pointer and continue + */ - loc_wipe (&loc); + tmp_parent = NULL; + } else { + /* For a non-root entry, link this inode */ + linked_inode = inode_link(entry->inode, tmp_parent, + entry->d_name, &entry->d_stat); + if (linked_inode) { + tmp_inode = entry->inode; + entry->inode = linked_inode; + inode_unref(tmp_inode); + } else { + gf_msg(this->name, GF_LOG_WARNING, EINVAL, + Q_MSG_PARENT_NULL, "inode link failed"); + op_errno = EINVAL; + goto err; } + } + + gf_uuid_copy(loc.gfid, entry->d_stat.ia_gfid); + + loc.inode = inode_ref(entry->inode); + loc.parent = inode_ref(tmp_parent); + loc.name = entry->d_name; + + quota_fill_inodectx(this, entry->inode, entry->dict, &loc, + &entry->d_stat, &op_errno); + + /* For non-directory, posix_get_ancestry_non_directory + * returns all hard-links that are represented by nodes + * adjacent to each other in the dentry-list. + * (Unlike the directory case where adjacent nodes + * either have a parent/child relationship or belong to + * different paths). + */ + if (entry->inode->ia_type == IA_IFDIR) + tmp_parent = entry->inode; + + loc_wipe(&loc); + } + } + + parent = inode_parent(local->loc.inode, 0, NULL); + if (parent == NULL) { + gf_msg(this->name, GF_LOG_WARNING, EINVAL, Q_MSG_PARENT_NULL, + "parent is NULL"); + op_errno = EINVAL; + goto err; + } + + quota_inode_ctx_get(local->loc.inode, this, &ctx, 0); + + ret = quota_add_parents_from_ctx(ctx, &parents); + if (ret == -1) { + op_errno = errno; + goto err; + } + + if (list_empty(&parents)) { + /* we built ancestry for a directory */ + list_for_each_entry(entry, &entries->list, list) + { + if (entry->inode == local->loc.inode) + break; } - quota_inode_ctx_get (local->loc.inode, this, &ctx, 0); - - quota_add_parents_from_ctx (ctx, &parents); - - if (list_empty (&parents)) { - /* we built ancestry for a directory */ - list_for_each_entry (entry, &entries->list, list) { - if (entry->inode == local->loc.inode) - break; - } - - GF_ASSERT (&entry->list != &entries->list); + /* Getting assertion here, need to investigate + comment for now + GF_ASSERT (&entry->list != &entries->list); + */ - quota_add_parent (&parents, entry->d_name, parent->gfid); + ret = quota_add_parent(&parents, entry->d_name, parent->gfid); + if (ret == -1) { + op_errno = errno; + goto err; } + } - local->ancestry_cbk (&parents, local->loc.inode, 0, 0, - local->ancestry_data); - goto cleanup; + local->ancestry_cbk(&parents, local->loc.inode, 0, 0, local->ancestry_data); + goto cleanup; err: - local->ancestry_cbk (NULL, NULL, -1, op_errno, local->ancestry_data); + local->ancestry_cbk(NULL, NULL, -1, op_errno, local->ancestry_data); cleanup: - STACK_DESTROY (frame->root); - quota_local_cleanup (local); + STACK_DESTROY(frame->root); + quota_local_cleanup(local); - if (parent != NULL) { - inode_unref (parent); - parent = NULL; - } + if (parent != NULL) { + inode_unref(parent); + parent = NULL; + } - list_for_each_entry_safe (dentry, tmp, &parents, next) { - __quota_dentry_free (dentry); + if (!list_empty(&parents)) { + list_for_each_entry_safe(dentry, tmp, &parents, next) + { + __quota_dentry_free(dentry); } + } - return 0; + return 0; } int -quota_build_ancestry (inode_t *inode, quota_ancestry_built_t ancestry_cbk, - void *data) +quota_build_ancestry(inode_t *inode, quota_ancestry_built_t ancestry_cbk, + void *data) { - fd_t *fd = NULL; - quota_local_t *local = NULL; - call_frame_t *new_frame = NULL; - int op_errno = ENOMEM; - int op_ret = -1; - xlator_t *this = NULL; - dict_t *xdata_req = NULL; - - this = THIS; - - xdata_req = dict_new (); - if (xdata_req == NULL) - goto err; + fd_t *fd = NULL; + quota_local_t *local = NULL; + call_frame_t *new_frame = NULL; + int op_errno = ENOMEM; + int op_ret = -1; + xlator_t *this = NULL; + dict_t *xdata_req = NULL; + + this = THIS; + + xdata_req = dict_new(); + if (xdata_req == NULL) + goto err; + + fd = fd_anonymous(inode); + if (fd == NULL) + goto err; + + new_frame = create_frame(this, this->ctx->pool); + if (new_frame == NULL) + goto err; + + local = quota_local_new(); + if (local == NULL) + goto err; + + new_frame->root->uid = new_frame->root->gid = 0; + new_frame->local = local; + local->ancestry_cbk = ancestry_cbk; + local->ancestry_data = data; + local->loc.inode = inode_ref(inode); + + op_ret = dict_set_int8(xdata_req, QUOTA_LIMIT_KEY, 1); + if (op_ret < 0) { + op_errno = -op_ret; + goto err; + } + + op_ret = dict_set_int8(xdata_req, QUOTA_LIMIT_OBJECTS_KEY, 1); + if (op_ret < 0) { + op_errno = -op_ret; + goto err; + } + + op_ret = dict_set_int8(xdata_req, GET_ANCESTRY_DENTRY_KEY, 1); + if (op_ret < 0) { + op_errno = -op_ret; + goto err; + } + + /* This would ask posix layer to construct dentry chain till root + * We don't need to do a opendir, we can use the anonymous fd + * here for the readidrp. + * avoiding opendir also reduces the window size where another FOP + * can be executed before completion of build ancestry + */ + STACK_WIND(new_frame, quota_build_ancestry_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdirp, fd, 0, 0, xdata_req); + + op_ret = 0; - fd = fd_anonymous (inode); - if (fd == NULL) - goto err; - - new_frame = create_frame (this, this->ctx->pool); - if (new_frame == NULL) - goto err; +err: + if (fd) + fd_unref(fd); - local = quota_local_new (); - if (local == NULL) - goto err; + if (xdata_req) + dict_unref(xdata_req); - new_frame->root->uid = new_frame->root->gid = 0; - new_frame->local = local; - local->ancestry_cbk = ancestry_cbk; - local->ancestry_data = data; - local->loc.inode = inode_ref (inode); + if (op_ret < 0) { + ancestry_cbk(NULL, NULL, -1, op_errno, data); - op_ret = dict_set_int8 (xdata_req, QUOTA_LIMIT_KEY, 1); - if (op_ret < 0) { - op_errno = -op_ret; - goto err; + if (new_frame) { + local = new_frame->local; + new_frame->local = NULL; + STACK_DESTROY(new_frame->root); } - op_ret = dict_set_int8 (xdata_req, QUOTA_LIMIT_OBJECTS_KEY, 1); - if (op_ret < 0) { - op_errno = -op_ret; - goto err; - } + if (local) + quota_local_cleanup(local); + } - op_ret = dict_set_int8 (xdata_req, GET_ANCESTRY_DENTRY_KEY, 1); - if (op_ret < 0) { - op_errno = -op_ret; - goto err; - } - - /* This would ask posix layer to construct dentry chain till root - * We don't need to do a opendir, we can use the anonymous fd - * here for the readidrp. - * avoiding opendir also reduces the window size where another FOP - * can be executed before completion of build ancestry - */ - STACK_WIND (new_frame, quota_build_ancestry_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readdirp, fd, 0, 0, xdata_req); + return 0; +} - op_ret = 0; +int +quota_validate(call_frame_t *frame, inode_t *inode, xlator_t *this, + fop_lookup_cbk_t cbk_fn) +{ + quota_local_t *local = NULL; + int ret = 0; + dict_t *xdata = NULL; + quota_priv_t *priv = NULL; -err: - if (fd) - fd_unref (fd); + local = frame->local; + priv = this->private; - if (xdata_req) - dict_unref (xdata_req); + LOCK(&local->lock); + { + loc_wipe(&local->validate_loc); - if (op_ret < 0) { - ancestry_cbk (NULL, NULL, -1, op_errno, data); + ret = quota_inode_loc_fill(inode, &local->validate_loc); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENFORCEMENT_FAILED, + "cannot fill loc for inode (gfid:%s), hence " + "aborting quota-checks and continuing with fop", + uuid_utoa(inode->gfid)); + } + } + UNLOCK(&local->lock); + + if (ret < 0) { + ret = -ENOMEM; + goto err; + } + + xdata = dict_new(); + if (xdata == NULL) { + ret = -ENOMEM; + goto err; + } + + ret = dict_set_int8(xdata, QUOTA_SIZE_KEY, 1); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM, + "dict set failed"); + ret = -ENOMEM; + goto err; + } + + ret = dict_set_str(xdata, "volume-uuid", priv->volume_uuid); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM, + "dict set failed"); + ret = -ENOMEM; + goto err; + } + + ret = quota_enforcer_lookup(frame, this, xdata, cbk_fn); + if (ret < 0) { + ret = -ENOTCONN; + goto err; + } + + ret = 0; +err: + if (xdata) + dict_unref(xdata); - if (new_frame) { - local = new_frame->local; - new_frame->local = NULL; - STACK_DESTROY (new_frame->root); - } + return ret; +} - if (local) - quota_local_cleanup (local); +void +quota_check_limit_continuation(struct list_head *parents, inode_t *inode, + int32_t op_ret, int32_t op_errno, void *data) +{ + call_frame_t *frame = NULL; + xlator_t *this = NULL; + quota_local_t *local = NULL; + quota_local_t *par_local = NULL; + quota_dentry_t *entry = NULL; + inode_t *parent = NULL; + int parent_count = 0; + + frame = data; + local = frame->local; + this = THIS; + + if (local->par_frame) + par_local = local->par_frame->local; + else + par_local = local; + + if ((op_ret < 0) || list_empty(parents)) { + if (op_ret >= 0) { + gf_msg(this->name, GF_LOG_WARNING, EIO, Q_MSG_ANCESTRY_BUILD_FAILED, + "Couldn't build ancestry for inode (gfid:%s). " + "Without knowing ancestors till root, quota" + "cannot be enforced. " + "Hence, failing fop with EIO", + uuid_utoa(inode->gfid)); + op_errno = EIO; + } + + quota_handle_validate_error(frame, -1, op_errno); + goto out; + } + + list_for_each_entry(entry, parents, next) { parent_count++; } + + LOCK(&par_local->lock); + { + par_local->link_count += (parent_count - 1); + } + UNLOCK(&par_local->lock); + + if (local->par_frame) { + list_for_each_entry(entry, parents, next) + { + parent = inode_find(inode->table, entry->par); + quota_check_limit(frame, parent, this); + inode_unref(parent); } + } else { + list_for_each_entry(entry, parents, next) + { + parent = do_quota_check_limit(frame, inode, this, entry, _gf_true); + if (parent) + inode_unref(parent); + else + quota_link_count_decrement(frame); + } + } - return 0; +out: + return; } -int -quota_validate (call_frame_t *frame, inode_t *inode, xlator_t *this, - fop_lookup_cbk_t cbk_fn) +int32_t +quota_check_object_limit(call_frame_t *frame, quota_inode_ctx_t *ctx, + quota_priv_t *priv, inode_t *_inode, xlator_t *this, + int32_t *op_errno, int just_validated, + quota_local_t *local, gf_boolean_t *skip_check) { - quota_local_t *local = NULL; - int ret = 0; - dict_t *xdata = NULL; - quota_priv_t *priv = NULL; - - local = frame->local; - priv = this->private; - - LOCK (&local->lock); + int32_t ret = -1; + uint32_t timeout = 0; + char need_validate = 0; + gf_boolean_t hard_limit_exceeded = 0; + int64_t object_aggr_count = 0; + + GF_ASSERT(frame); + GF_ASSERT(priv); + GF_ASSERT(_inode); + GF_ASSERT(this); + GF_ASSERT(local); + + if (ctx != NULL && (ctx->object_hard_lim > 0 || ctx->object_soft_lim)) { + LOCK(&ctx->lock); { - loc_wipe (&local->validate_loc); - - ret = quota_inode_loc_fill (inode, &local->validate_loc); - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, ENOMEM, - Q_MSG_ENFORCEMENT_FAILED, - "cannot fill loc for inode (gfid:%s), hence " - "aborting quota-checks and continuing with fop", - uuid_utoa (inode->gfid)); - } - } - UNLOCK (&local->lock); + timeout = priv->soft_timeout; - if (ret < 0) { - ret = -ENOMEM; - goto err; - } + object_aggr_count = ctx->file_count + ctx->dir_count + 1; + if (((ctx->object_soft_lim >= 0) && + (object_aggr_count) > ctx->object_soft_lim)) { + timeout = priv->hard_timeout; + } - xdata = dict_new (); - if (xdata == NULL) { - ret = -ENOMEM; - goto err; + if (!just_validated && quota_timeout(ctx->validate_time, timeout)) { + need_validate = 1; + } else if ((object_aggr_count) > ctx->object_hard_lim) { + hard_limit_exceeded = 1; + } } + UNLOCK(&ctx->lock); - ret = dict_set_int8 (xdata, QUOTA_SIZE_KEY, 1); - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, ENOMEM, - Q_MSG_ENOMEM, "dict set failed"); - ret = -ENOMEM; - goto err; + if (need_validate && *skip_check != _gf_true) { + *skip_check = _gf_true; + ret = quota_validate(frame, _inode, this, quota_validate_cbk); + if (ret < 0) { + *op_errno = -ret; + *skip_check = _gf_false; + } + goto out; } - ret = dict_set_str (xdata, "volume-uuid", priv->volume_uuid); - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, ENOMEM, - Q_MSG_ENOMEM, "dict set failed"); - ret = -ENOMEM; - goto err; + if (hard_limit_exceeded) { + local->op_ret = -1; + local->op_errno = EDQUOT; + *op_errno = EDQUOT; + goto out; } - ret = quota_enforcer_lookup (frame, this, xdata, cbk_fn); - if (ret < 0) { - ret = -ENOTCONN; - goto err; - } + /*We log usage only if quota limit is configured on + that inode + */ + quota_log_usage(this, ctx, _inode, 0); + } - ret = 0; -err: - if (xdata) - dict_unref (xdata); + ret = 0; - return ret; +out: + return ret; } -void -quota_check_limit_continuation (struct list_head *parents, inode_t *inode, - int32_t op_ret, int32_t op_errno, void *data) +int32_t +quota_check_size_limit(call_frame_t *frame, quota_inode_ctx_t *ctx, + quota_priv_t *priv, inode_t *_inode, xlator_t *this, + int32_t *op_errno, int just_validated, int64_t delta, + quota_local_t *local, gf_boolean_t *skip_check) { - call_frame_t *frame = NULL; - xlator_t *this = NULL; - quota_local_t *local = NULL; - quota_local_t *par_local = NULL; - quota_dentry_t *entry = NULL; - inode_t *parent = NULL; - int parent_count = 0; - - frame = data; - local = frame->local; - this = THIS; - - if (local->par_frame) - par_local = local->par_frame->local; - else - par_local = local; - - - if ((op_ret < 0) || list_empty (parents)) { - if (op_ret >= 0) { - gf_msg (this->name, GF_LOG_WARNING, EIO, - Q_MSG_ANCESTRY_BUILD_FAILED, - "Couldn't build ancestry for inode (gfid:%s). " - "Without knowing ancestors till root, quota" - "cannot be enforced. " - "Hence, failing fop with EIO", - uuid_utoa (inode->gfid)); - op_errno = EIO; - } + int32_t ret = -1; + uint32_t timeout = 0; + char need_validate = 0; + gf_boolean_t hard_limit_exceeded = 0; + int64_t space_available = 0; + int64_t wouldbe_size = 0; + + GF_ASSERT(frame); + GF_ASSERT(priv); + GF_ASSERT(_inode); + GF_ASSERT(this); + GF_ASSERT(local); + + if (ctx != NULL && (ctx->hard_lim > 0 || ctx->soft_lim > 0)) { + wouldbe_size = ctx->size + delta; + + LOCK(&ctx->lock); + { + timeout = priv->soft_timeout; - quota_handle_validate_error (frame, -1, op_errno); - goto out; - } + if ((ctx->soft_lim >= 0) && (wouldbe_size > ctx->soft_lim)) { + timeout = priv->hard_timeout; + } - list_for_each_entry (entry, parents, next) { - parent_count++; + if (!just_validated && quota_timeout(ctx->validate_time, timeout)) { + need_validate = 1; + } else if (wouldbe_size >= ctx->hard_lim) { + hard_limit_exceeded = 1; + } } + UNLOCK(&ctx->lock); - LOCK (&par_local->lock); - { - par_local->link_count += (parent_count - 1); + if (need_validate && *skip_check != _gf_true) { + *skip_check = _gf_true; + ret = quota_validate(frame, _inode, this, quota_validate_cbk); + if (ret < 0) { + *op_errno = -ret; + *skip_check = _gf_false; + } + goto out; } - UNLOCK (&par_local->lock); - if (local->par_frame) { - list_for_each_entry (entry, parents, next) { - parent = inode_find (inode->table, entry->par); - quota_check_limit (frame, parent, this); - inode_unref (parent); - } - } else { - list_for_each_entry (entry, parents, next) { - parent = do_quota_check_limit (frame, inode, this, - entry, _gf_true); - if (parent) - inode_unref (parent); - else - quota_link_count_decrement (frame); - } - } + if (hard_limit_exceeded) { + local->op_ret = -1; + local->op_errno = EDQUOT; -out: - return; -} + space_available = ctx->hard_lim - ctx->size; -int32_t -quota_check_object_limit (call_frame_t *frame, quota_inode_ctx_t *ctx, - quota_priv_t *priv, inode_t *_inode, xlator_t *this, - int32_t *op_errno, int just_validated, - quota_local_t *local, gf_boolean_t *skip_check) -{ - int32_t ret = -1; - uint32_t timeout = 0; - char need_validate = 0; - gf_boolean_t hard_limit_exceeded = 0; - int64_t object_aggr_count = 0; - - GF_ASSERT (frame); - GF_ASSERT (priv); - GF_ASSERT (_inode); - GF_ASSERT (this); - GF_ASSERT (local); - - if (ctx != NULL && (ctx->object_hard_lim > 0 || - ctx->object_soft_lim)) { - LOCK (&ctx->lock); - { - timeout = priv->soft_timeout; - - object_aggr_count = ctx->file_count + - ctx->dir_count + 1; - if (((ctx->object_soft_lim >= 0) - && (object_aggr_count) > - ctx->object_soft_lim)) { - timeout = priv->hard_timeout; - } - - if (!just_validated - && quota_timeout (&ctx->tv, timeout)) { - need_validate = 1; - } else if ((object_aggr_count) > - ctx->object_hard_lim) { - hard_limit_exceeded = 1; - } - } - UNLOCK (&ctx->lock); - - if (need_validate && *skip_check != _gf_true) { - *skip_check = _gf_true; - ret = quota_validate (frame, _inode, this, - quota_validate_cbk); - if (ret < 0) { - *op_errno = -ret; - *skip_check = _gf_false; - } - goto out; - } + if (space_available < 0) + space_available = 0; - if (hard_limit_exceeded) { - local->op_ret = -1; - local->op_errno = EDQUOT; - *op_errno = EDQUOT; - goto out; - } + if ((local->space_available < 0) || + (local->space_available > space_available)) { + local->space_available = space_available; + } - /*We log usage only if quota limit is configured on - that inode - */ - quota_log_usage (this, ctx, _inode, 0); + if (space_available == 0) { + *op_errno = EDQUOT; + goto out; + } } - ret = 0; + /* We log usage only if quota limit is configured on + that inode. */ + quota_log_usage(this, ctx, _inode, delta); + } + ret = 0; out: - return ret; + return ret; } - int32_t -quota_check_size_limit (call_frame_t *frame, quota_inode_ctx_t *ctx, - quota_priv_t *priv, inode_t *_inode, xlator_t *this, - int32_t *op_errno, int just_validated, int64_t delta, - quota_local_t *local, gf_boolean_t *skip_check) +quota_check_limit(call_frame_t *frame, inode_t *inode, xlator_t *this) { - int32_t ret = -1; - uint32_t timeout = 0; - char need_validate = 0; - gf_boolean_t hard_limit_exceeded = 0; - int64_t space_available = 0; - int64_t wouldbe_size = 0; - - GF_ASSERT (frame); - GF_ASSERT (priv); - GF_ASSERT (_inode); - GF_ASSERT (this); - GF_ASSERT (local); - - if (ctx != NULL && (ctx->hard_lim > 0 || ctx->soft_lim > 0)) { - wouldbe_size = ctx->size + delta; - - LOCK (&ctx->lock); - { - timeout = priv->soft_timeout; - - if ((ctx->soft_lim >= 0) - && (wouldbe_size > ctx->soft_lim)) { - timeout = priv->hard_timeout; - } - - if (!just_validated - && quota_timeout (&ctx->tv, timeout)) { - need_validate = 1; - } else if (wouldbe_size >= ctx->hard_lim) { - hard_limit_exceeded = 1; - } - } - UNLOCK (&ctx->lock); - - if (need_validate && *skip_check != _gf_true) { - *skip_check = _gf_true; - ret = quota_validate (frame, _inode, this, - quota_validate_cbk); - if (ret < 0) { - *op_errno = -ret; - *skip_check = _gf_false; - } - goto out; - } - - if (hard_limit_exceeded) { - local->op_ret = -1; - local->op_errno = EDQUOT; + int32_t ret = -1, op_errno = EINVAL; + inode_t *_inode = NULL, *parent = NULL; + quota_inode_ctx_t *ctx = NULL; + quota_priv_t *priv = NULL; + quota_local_t *local = NULL; + quota_local_t *par_local = NULL; + char just_validated = 0; + int64_t delta = 0; + int8_t object_delta = 0; + uint64_t value = 0; + gf_boolean_t skip_check = _gf_false; + + GF_VALIDATE_OR_GOTO("quota", this, err); + GF_VALIDATE_OR_GOTO(this->name, frame, err); + GF_VALIDATE_OR_GOTO(this->name, inode, err); + + local = frame->local; + GF_VALIDATE_OR_GOTO(this->name, local, err); + + if (local->par_frame) { + par_local = local->par_frame->local; + GF_VALIDATE_OR_GOTO(this->name, par_local, err); + } else { + par_local = local; + } + + delta = par_local->delta; + object_delta = par_local->object_delta; + + GF_VALIDATE_OR_GOTO(this->name, par_local->stub, err); + /* Allow all the trusted clients + * Don't block the gluster internal processes like rebalance, gsyncd, + * self heal etc from the disk quotas. + * + * Method: Allow all the clients with PID negative. This is by the + * assumption that any kernel assigned pid doesn't have the negative + * number. + */ + if (0 > frame->root->pid) { + ret = 0; + quota_link_count_decrement(frame); + goto done; + } - space_available = ctx->hard_lim - ctx->size; + priv = this->private; - if (space_available < 0) - space_available = 0; + inode_ctx_get(inode, this, &value); + ctx = (quota_inode_ctx_t *)(unsigned long)value; - if ((local->space_available < 0) - || (local->space_available - > space_available)){ - local->space_available - = space_available; + _inode = inode_ref(inode); - } + LOCK(&local->lock); + { + just_validated = local->just_validated; + local->just_validated = 0; + } + UNLOCK(&local->lock); - if (space_available == 0) { - *op_errno = EDQUOT; - goto out; - } - } - - /* We log usage only if quota limit is configured on - that inode. */ - quota_log_usage (this, ctx, _inode, delta); + do { + /* In a rename operation, enforce should be stopped at common + ancestor */ + if (!gf_uuid_is_null(par_local->common_ancestor) && + !gf_uuid_compare(_inode->gfid, par_local->common_ancestor)) { + quota_link_count_decrement(frame); + break; } - ret = 0; -out: - return ret; -} + if (object_delta <= 0) + goto skip_check_object_limit; + ret = quota_check_object_limit(frame, ctx, priv, _inode, this, + &op_errno, just_validated, par_local, + &skip_check); + if (skip_check == _gf_true) + goto done; -int32_t -quota_check_limit (call_frame_t *frame, inode_t *inode, xlator_t *this) -{ - int32_t ret = -1, op_errno = EINVAL; - inode_t *_inode = NULL, *parent = NULL; - quota_inode_ctx_t *ctx = NULL; - quota_priv_t *priv = NULL; - quota_local_t *local = NULL; - quota_local_t *par_local = NULL; - char need_validate = 0; - char just_validated = 0; - gf_boolean_t hard_limit_exceeded = 0; - int64_t delta = 0; - uint64_t value = 0; - gf_boolean_t skip_check = _gf_false; - - GF_VALIDATE_OR_GOTO ("quota", this, err); - GF_VALIDATE_OR_GOTO (this->name, frame, err); - GF_VALIDATE_OR_GOTO (this->name, inode, err); - - local = frame->local; - GF_VALIDATE_OR_GOTO (this->name, local, err); - - if (local->par_frame) { - par_local = local->par_frame->local; - GF_VALIDATE_OR_GOTO (this->name, par_local, err); - } else { - par_local = local; + if (ret) { + if (op_errno != EDQUOT) + gf_msg(this->name, GF_LOG_ERROR, 0, Q_MSG_ENFORCEMENT_FAILED, + "Failed to " + "check quota object limit"); + goto err; } - delta = par_local->delta; + skip_check_object_limit: + ret = quota_check_size_limit(frame, ctx, priv, _inode, this, &op_errno, + just_validated, delta, par_local, + &skip_check); + if (skip_check == _gf_true) + goto done; - GF_VALIDATE_OR_GOTO (this->name, par_local->stub, err); - /* Allow all the trusted clients - * Don't block the gluster internal processes like rebalance, gsyncd, - * self heal etc from the disk quotas. - * - * Method: Allow all the clients with PID negative. This is by the - * assumption that any kernel assigned pid doesn't have the negative - * number. - */ - if (0 > frame->root->pid) { - ret = 0; - quota_link_count_decrement (frame); - goto done; + if (ret) { + if (op_errno != EDQUOT) + gf_msg(this->name, GF_LOG_ERROR, 0, Q_MSG_ENFORCEMENT_FAILED, + "Failed to " + "check quota size limit"); + goto err; } - priv = this->private; - - inode_ctx_get (inode, this, &value); - ctx = (quota_inode_ctx_t *)(unsigned long)value; - - _inode = inode_ref (inode); - - LOCK (&local->lock); - { - just_validated = local->just_validated; - local->just_validated = 0; + if (__is_root_gfid(_inode->gfid)) { + quota_link_count_decrement(frame); + break; } - UNLOCK (&local->lock); - - do { - /* In a rename operation, enforce should be stopped at common - ancestor */ - if (!gf_uuid_is_null (par_local->common_ancestor) && - !gf_uuid_compare (_inode->gfid, par_local->common_ancestor) - ) { - quota_link_count_decrement (frame); - break; - } - - ret = quota_check_object_limit (frame, ctx, priv, _inode, this, - &op_errno, just_validated, - par_local, &skip_check); - if (skip_check == _gf_true) - goto done; - - if (ret) { - if (op_errno != EDQUOT) - gf_msg (this->name, GF_LOG_ERROR, 0, - Q_MSG_ENFORCEMENT_FAILED, "Failed to " - "check quota object limit"); - goto err; - } - ret = quota_check_size_limit (frame, ctx, priv, _inode, this, - &op_errno, just_validated, delta, - par_local, &skip_check); - if (skip_check == _gf_true) - goto done; - - if (ret) { - if (op_errno != EDQUOT) - gf_msg (this->name, GF_LOG_ERROR, 0, - Q_MSG_ENFORCEMENT_FAILED, "Failed to " - "check quota size limit"); - goto err; - } - - if (__is_root_gfid (_inode->gfid)) { - quota_link_count_decrement (frame); - break; - } + parent = inode_parent(_inode, 0, NULL); + if (parent == NULL) { + ret = quota_build_ancestry(_inode, quota_check_limit_continuation, + frame); + if (ret < 0) { + op_errno = -ret; + goto err; + } - parent = inode_parent (_inode, 0, NULL); - if (parent == NULL) { - ret = quota_build_ancestry (_inode, - quota_check_limit_continuation, - frame); - if (ret < 0) { - op_errno = -ret; - goto err; - } - - break; - } + break; + } - inode_unref (_inode); - _inode = parent; - just_validated = 0; + inode_unref(_inode); + _inode = parent; + just_validated = 0; - value = 0; - inode_ctx_get (_inode, this, &value); - ctx = (quota_inode_ctx_t *)(unsigned long)value; - } while (1); + value = 0; + inode_ctx_get(_inode, this, &value); + ctx = (quota_inode_ctx_t *)(unsigned long)value; + } while (1); done: - if (_inode != NULL) { - inode_unref (_inode); - _inode = NULL; - } - return 0; + if (_inode != NULL) { + inode_unref(_inode); + _inode = NULL; + } + return 0; err: - quota_handle_validate_error (frame, -1, op_errno); + quota_handle_validate_error(frame, -1, op_errno); - inode_unref (_inode); - return 0; + inode_unref(_inode); + return 0; } inode_t * -do_quota_check_limit (call_frame_t *frame, inode_t *inode, xlator_t *this, - quota_dentry_t *dentry, gf_boolean_t force) +do_quota_check_limit(call_frame_t *frame, inode_t *inode, xlator_t *this, + quota_dentry_t *dentry, gf_boolean_t force) { - int32_t ret = -1; - inode_t *parent = NULL; - call_frame_t *new_frame = NULL; - quota_local_t *local = NULL; - quota_local_t *new_local = NULL; - - local = frame->local; - - parent = inode_parent (inode, dentry->par, dentry->name); - if (parent == NULL) { - if (force) - parent = inode_find (inode->table, dentry->par); - else - goto out; - } - if (parent == NULL) - goto out; + int32_t ret = -1; + inode_t *parent = NULL; + call_frame_t *new_frame = NULL; + quota_local_t *new_local = NULL; + + parent = inode_parent(inode, dentry->par, dentry->name); + if (parent == NULL) { + if (force) + parent = inode_find(inode->table, dentry->par); + else + goto out; + } + if (parent == NULL) + goto out; - new_frame = copy_frame (frame); - if (new_frame == NULL) - goto out; + new_frame = copy_frame(frame); + if (new_frame == NULL) + goto out; - new_local = quota_local_new (); - if (new_local == NULL) - goto out; + new_local = quota_local_new(); + if (new_local == NULL) + goto out; - new_frame->local = new_local; - new_local->par_frame = frame; + new_frame->local = new_local; + new_local->par_frame = frame; - quota_check_limit (new_frame, parent, this); + quota_check_limit(new_frame, parent, this); - ret = 0; + ret = 0; out: - if (ret < 0) { - if (parent) { - /* Caller should decrement link_count, in case parent is - * NULL - */ - quota_handle_validate_error (frame, -1, ENOMEM); - } - - if (new_frame) { - new_frame->local = NULL; - STACK_DESTROY (new_frame->root); - } + if (ret < 0) { + if (parent) { + /* Caller should decrement link_count, in case parent is + * NULL + */ + quota_handle_validate_error(frame, -1, ENOMEM); + } - if (new_local) - quota_local_cleanup (new_local); + if (new_frame) { + new_frame->local = NULL; + STACK_DESTROY(new_frame->root); } + } - return parent; + return parent; } static int -quota_get_limits (xlator_t *this, dict_t *dict, int64_t *hard_lim, - int64_t *soft_lim, int64_t *object_hard_limit, - int64_t *object_soft_limit) +quota_get_limits(xlator_t *this, dict_t *dict, int64_t *hard_lim, + int64_t *soft_lim, int64_t *object_hard_limit, + int64_t *object_soft_limit) { - quota_limits_t *limit = NULL; - quota_limits_t *object_limit = NULL; - quota_priv_t *priv = NULL; - int64_t soft_lim_percent = 0; - int64_t *ptr = NULL; - int ret = 0; - - if ((this == NULL) || (dict == NULL) || (hard_lim == NULL) - || (soft_lim == NULL)) - goto out; + quota_limits_t *limit = NULL; + quota_limits_t *object_limit = NULL; + quota_priv_t *priv = NULL; + int64_t soft_lim_percent = 0; + int64_t *ptr = NULL; + int ret = 0; - priv = this->private; + if ((this == NULL) || (dict == NULL) || (hard_lim == NULL) || + (soft_lim == NULL)) + goto out; - ret = dict_get_bin (dict, QUOTA_LIMIT_KEY, (void **) &ptr); - limit = (quota_limits_t *)ptr; + priv = this->private; - if (limit) { - *hard_lim = ntoh64 (limit->hl); - soft_lim_percent = ntoh64 (limit->sl); - } + ret = dict_get_bin(dict, QUOTA_LIMIT_KEY, (void **)&ptr); + limit = (quota_limits_t *)ptr; - if (soft_lim_percent < 0) { - soft_lim_percent = priv->default_soft_lim; - } + if (limit) { + *hard_lim = ntoh64(limit->hl); + soft_lim_percent = ntoh64(limit->sl); + } - if ((*hard_lim > 0) && (soft_lim_percent > 0)) { - *soft_lim = (soft_lim_percent * (*hard_lim))/100; - } + if (soft_lim_percent < 0) { + soft_lim_percent = priv->default_soft_lim; + } - ret = dict_get_bin (dict, QUOTA_LIMIT_OBJECTS_KEY, (void **) &ptr); - if (ret) - return 0; - object_limit = (quota_limits_t *)ptr; + if ((*hard_lim > 0) && (soft_lim_percent > 0)) { + *soft_lim = (soft_lim_percent * (*hard_lim)) / 100; + } - if (object_limit) { - *object_hard_limit = ntoh64 (object_limit->hl); - soft_lim_percent = ntoh64 (object_limit->sl); - } + ret = dict_get_bin(dict, QUOTA_LIMIT_OBJECTS_KEY, (void **)&ptr); + if (ret) + return 0; + object_limit = (quota_limits_t *)ptr; - if (soft_lim_percent < 0) { - soft_lim_percent = priv->default_soft_lim; - } + if (object_limit) { + *object_hard_limit = ntoh64(object_limit->hl); + soft_lim_percent = ntoh64(object_limit->sl); + } - if ((*object_hard_limit > 0) && (soft_lim_percent > 0)) { - *object_soft_limit = (soft_lim_percent * - (*object_hard_limit))/100; - } + if (soft_lim_percent < 0) { + soft_lim_percent = priv->default_soft_lim; + } + + if ((*object_hard_limit > 0) && (soft_lim_percent > 0)) { + *object_soft_limit = (soft_lim_percent * (*object_hard_limit)) / 100; + } out: - return 0; + return 0; } int -quota_fill_inodectx (xlator_t *this, inode_t *inode, dict_t *dict, - loc_t *loc, struct iatt *buf, int32_t *op_errno) +quota_fill_inodectx(xlator_t *this, inode_t *inode, dict_t *dict, loc_t *loc, + struct iatt *buf, int32_t *op_errno) { - int32_t ret = -1; - char found = 0; - quota_inode_ctx_t *ctx = NULL; - quota_dentry_t *dentry = NULL; - uint64_t value = 0; - int64_t hard_lim = 0; - int64_t soft_lim = 0; - int64_t object_hard_limit = 0; - int64_t object_soft_limit = 0; - - quota_get_limits (this, dict, &hard_lim, &soft_lim, &object_hard_limit, - &object_soft_limit); - - inode_ctx_get (inode, this, &value); - ctx = (quota_inode_ctx_t *)(unsigned long)value; - - if ((((ctx == NULL) || (ctx->hard_lim == hard_lim)) - && (hard_lim < 0) && !QUOTA_REG_OR_LNK_FILE (buf->ia_type))) { - ret = 0; - goto out; + int32_t ret = -1; + char found = 0; + quota_inode_ctx_t *ctx = NULL; + quota_dentry_t *dentry = NULL; + uint64_t value = 0; + int64_t hard_lim = 0; + int64_t soft_lim = 0; + int64_t object_hard_limit = 0; + int64_t object_soft_limit = 0; + + quota_get_limits(this, dict, &hard_lim, &soft_lim, &object_hard_limit, + &object_soft_limit); + + inode_ctx_get(inode, this, &value); + ctx = (quota_inode_ctx_t *)(unsigned long)value; + + if ((((ctx == NULL) || (ctx->hard_lim == hard_lim)) && (hard_lim < 0) && + !QUOTA_REG_OR_LNK_FILE(buf->ia_type))) { + ret = 0; + goto out; + } + + ret = quota_inode_ctx_get(inode, this, &ctx, 1); + if ((ret == -1) || (ctx == NULL)) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_INODE_CTX_GET_FAILED, + "cannot create quota " + "context in inode(gfid:%s)", + uuid_utoa(inode->gfid)); + ret = -1; + *op_errno = ENOMEM; + goto out; + } + + LOCK(&ctx->lock); + { + ctx->hard_lim = hard_lim; + ctx->soft_lim = soft_lim; + ctx->object_hard_lim = object_hard_limit; + ctx->object_soft_lim = object_soft_limit; + + ctx->buf = *buf; + + if (!QUOTA_REG_OR_LNK_FILE(buf->ia_type)) { + goto unlock; + } + + /* do nothing if it is a nameless lookup */ + if (loc->name == NULL || !loc->parent) + goto unlock; + + list_for_each_entry(dentry, &ctx->parents, next) + { + if ((strcmp(dentry->name, loc->name) == 0) && + (gf_uuid_compare(loc->parent->gfid, dentry->par) == 0)) { + found = 1; + break; + } } - ret = quota_inode_ctx_get (inode, this, &ctx, 1); - if ((ret == -1) || (ctx == NULL)) { - gf_msg (this->name, GF_LOG_WARNING, ENOMEM, - Q_MSG_INODE_CTX_GET_FAILED, "cannot create quota " - "context in inode(gfid:%s)", uuid_utoa (inode->gfid)); + if (!found) { + dentry = __quota_dentry_new(ctx, (char *)loc->name, + loc->parent->gfid); + if (dentry == NULL) { + /* + gf_msg (this->name, GF_LOG_WARNING, ENOMEM, + Q_MSG_ENOMEM, + "cannot create a new dentry (par:%" +- PRId64", name:%s) for inode(ino:%" +- PRId64", gfid:%s)", +- uuid_utoa (local->loc.inode->gfid)); + */ ret = -1; *op_errno = ENOMEM; - goto out; + goto unlock; + } } + } +unlock: + UNLOCK(&ctx->lock); - LOCK (&ctx->lock); - { - ctx->hard_lim = hard_lim; - ctx->soft_lim = soft_lim; - ctx->object_hard_lim = object_hard_limit; - ctx->object_soft_lim = object_soft_limit; - - ctx->buf = *buf; - - if (!QUOTA_REG_OR_LNK_FILE (buf->ia_type)) { - goto unlock; - } +out: + return ret; +} - /* do nothing if it is a nameless lookup */ - if (loc->name == NULL || !loc->parent) - goto unlock; - - list_for_each_entry (dentry, &ctx->parents, next) { - if ((strcmp (dentry->name, loc->name) == 0) && - (gf_uuid_compare (loc->parent->gfid, - dentry->par) == 0)) { - found = 1; - break; - } - } +/* + * return _gf_true if enforcement is needed and _gf_false otherwise + */ +gf_boolean_t +should_quota_enforce(xlator_t *this, dict_t *dict, glusterfs_fop_t fop) +{ + int ret = 0; - if (!found) { - dentry = __quota_dentry_new (ctx, - (char *)loc->name, - loc->parent->gfid); - if (dentry == NULL) { - /* - gf_msg (this->name, GF_LOG_WARNING, ENOMEM, - Q_MSG_ENOMEM, - "cannot create a new dentry (par:%" -- PRId64", name:%s) for inode(ino:%" -- PRId64", gfid:%s)", -- uuid_utoa (local->loc.inode->gfid)); - */ - ret = -1; - *op_errno = ENOMEM; - goto unlock; - } - } - } -unlock: - UNLOCK (&ctx->lock); + ret = dict_check_flag(dict, GF_INTERNAL_CTX_KEY, GF_DHT_HEAL_DIR); + if (fop == GF_FOP_MKDIR && ret == DICT_FLAG_SET) { + return _gf_false; + } else if (ret == -ENOENT) { + gf_msg(this->name, GF_LOG_DEBUG, EINVAL, Q_MSG_INTERNAL_FOP_KEY_MISSING, + "No internal fop context present"); + goto out; + } out: - return ret; + return _gf_true; } int32_t -quota_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *dict, struct iatt *postparent) +quota_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *dict, struct iatt *postparent) { - quota_local_t *local = NULL; - int32_t ret = 0; - inode_t *this_inode = NULL; + quota_local_t *local = NULL; + inode_t *this_inode = NULL; - local = frame->local; - frame->local = NULL; + local = frame->local; + frame->local = NULL; - if (op_ret >= 0 && inode) { - this_inode = inode_ref (inode); + if (op_ret >= 0 && inode) { + this_inode = inode_ref(inode); - op_ret = quota_fill_inodectx (this, inode, dict, &local->loc, - buf, &op_errno); - if (op_ret < 0) - op_errno = ENOMEM; - } + op_ret = quota_fill_inodectx(this, inode, dict, &local->loc, buf, + &op_errno); + if (op_ret < 0) + op_errno = ENOMEM; + } - QUOTA_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, buf, - dict, postparent); + QUOTA_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, buf, dict, + postparent); - if (op_ret < 0 || this_inode == NULL || gf_uuid_is_null(this_inode->gfid)) - goto out; + if (op_ret < 0 || this_inode == NULL || gf_uuid_is_null(this_inode->gfid)) + goto out; - check_ancestory_2 (this, local, this_inode); + check_ancestory_2(this, local, this_inode); out: - if (this_inode) - inode_unref (this_inode); + if (this_inode) + inode_unref(this_inode); - quota_local_cleanup (local); + quota_local_cleanup(local); - return 0; + return 0; } int32_t -quota_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, - dict_t *xattr_req) +quota_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) { - quota_priv_t *priv = NULL; - int32_t ret = -1; - quota_local_t *local = NULL; + quota_priv_t *priv = NULL; + int32_t ret = -1; + quota_local_t *local = NULL; - priv = this->private; + priv = this->private; - WIND_IF_QUOTAOFF (priv->is_quota_on, off); + WIND_IF_QUOTAOFF(priv->is_quota_on, off); - xattr_req = xattr_req ? dict_ref(xattr_req) : dict_new(); - if (!xattr_req) - goto err; + xattr_req = xattr_req ? dict_ref(xattr_req) : dict_new(); + if (!xattr_req) + goto err; - local = quota_local_new (); - if (local == NULL) { - goto err; - } + local = quota_local_new(); + if (local == NULL) { + goto err; + } - frame->local = local; - loc_copy (&local->loc, loc); + frame->local = local; + loc_copy(&local->loc, loc); - ret = dict_set_int8 (xattr_req, QUOTA_LIMIT_KEY, 1); - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, ENOMEM, - Q_MSG_ENOMEM, "dict set of key for " - "hard-limit failed"); - goto err; - } + ret = dict_set_int8(xattr_req, QUOTA_LIMIT_KEY, 1); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM, + "dict set of key for " + "hard-limit failed"); + goto err; + } - ret = dict_set_int8 (xattr_req, QUOTA_LIMIT_OBJECTS_KEY, 1); - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM, - "dict set of key for quota object limit failed"); - goto err; - } + ret = dict_set_int8(xattr_req, QUOTA_LIMIT_OBJECTS_KEY, 1); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM, + "dict set of key for quota object limit failed"); + goto err; + } - STACK_WIND (frame, quota_lookup_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, loc, xattr_req); + STACK_WIND(frame, quota_lookup_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, loc, xattr_req); - ret = 0; + ret = 0; err: - if (xattr_req) - dict_unref (xattr_req); + if (xattr_req) + dict_unref(xattr_req); - if (ret < 0) { - QUOTA_STACK_UNWIND (lookup, frame, -1, ENOMEM, - NULL, NULL, NULL, NULL); - } + if (ret < 0) { + QUOTA_STACK_UNWIND(lookup, frame, -1, ENOMEM, NULL, NULL, NULL, NULL); + } - return 0; + return 0; off: - STACK_WIND_TAIL (frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, loc, xattr_req); - return 0; + STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, + loc, xattr_req); + return 0; } int32_t -quota_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) +quota_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - int32_t ret = 0; - uint64_t ctx_int = 0; - quota_inode_ctx_t *ctx = NULL; - quota_local_t *local = NULL; - - local = frame->local; - - if ((op_ret < 0) || (local == NULL) || (postbuf == NULL)) { - goto out; - } - - ret = inode_ctx_get (local->loc.inode, this, &ctx_int); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, 0, - Q_MSG_INODE_CTX_GET_FAILED, "%s: failed to get the " - "context", local->loc.path); - goto out; - } - - ctx = (quota_inode_ctx_t *)(unsigned long) ctx_int; - - if (ctx == NULL) { - gf_msg (this->name, GF_LOG_WARNING, 0, - Q_MSG_INODE_CTX_GET_FAILED, - "quota context not set in %s (gfid:%s)", - local->loc.path, uuid_utoa (local->loc.inode->gfid)); - goto out; - } - - LOCK (&ctx->lock); - { - ctx->buf = *postbuf; - } - UNLOCK (&ctx->lock); + int32_t ret = 0; + uint64_t ctx_int = 0; + quota_inode_ctx_t *ctx = NULL; + quota_local_t *local = NULL; + + local = frame->local; + + if ((op_ret < 0) || (local == NULL) || (postbuf == NULL)) { + goto out; + } + + ret = inode_ctx_get(local->loc.inode, this, &ctx_int); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, Q_MSG_INODE_CTX_GET_FAILED, + "%s: failed to get the " + "context", + local->loc.path); + goto out; + } + + ctx = (quota_inode_ctx_t *)(unsigned long)ctx_int; + + if (ctx == NULL) { + gf_msg(this->name, GF_LOG_WARNING, 0, Q_MSG_INODE_CTX_GET_FAILED, + "quota context not set in %s (gfid:%s)", local->loc.path, + uuid_utoa(local->loc.inode->gfid)); + goto out; + } + + LOCK(&ctx->lock); + { + ctx->buf = *postbuf; + } + UNLOCK(&ctx->lock); out: - QUOTA_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf, - xdata); - - return 0; -} - - -int32_t -quota_writev_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iovec *vector, int32_t count, off_t off, - uint32_t flags, struct iobref *iobref, dict_t *xdata) -{ - quota_local_t *local = NULL; - int32_t op_errno = EINVAL; - quota_priv_t *priv = NULL; - struct iovec *new_vector = NULL; - int32_t new_count = 0; - - priv = this->private; - - local = frame->local; - - GF_VALIDATE_OR_GOTO ("quota", local, unwind); - - if (local->op_ret == -1) { - op_errno = local->op_errno; - - if ((op_errno == EDQUOT) && (local->space_available > 0)) { - new_count = iov_subset (vector, count, 0, - local->space_available, NULL); - - new_vector = GF_CALLOC (new_count, - sizeof (struct iovec), - gf_common_mt_iovec); - if (new_vector == NULL) { - local->op_ret = -1; - local->op_errno = ENOMEM; - goto unwind; - } - - new_count = iov_subset (vector, count, 0, - local->space_available, - new_vector); - - vector = new_vector; - count = new_count; - } else if (op_errno == ENOENT || op_errno == ESTALE) { - /* We may get ENOENT/ESTALE in case of below scenario - * fd = open file.txt - * unlink file.txt - * write on fd - * Here build_ancestry can fail as the file is removed. - * For now ignore ENOENT/ESTALE with writes on active fd - * We need to re-visit this code once we understand - * how other file-system behave in this scenario - */ - gf_msg_debug (this->name, 0, "quota enforcer failed " - "with ENOENT/ESTALE on %s, cannot check " - "quota limits and allowing writes", - uuid_utoa (fd->inode->gfid)); - } else { - goto unwind; - } - } - - STACK_WIND (frame, quota_writev_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev, fd, - vector, count, off, flags, iobref, xdata); - - if (new_vector != NULL) - GF_FREE (new_vector); - - return 0; + QUOTA_STACK_UNWIND(writev, frame, op_ret, op_errno, prebuf, postbuf, xdata); -unwind: - QUOTA_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + return 0; } +static int gf_quota_enforcer_log; int32_t -quota_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iovec *vector, int32_t count, off_t off, - uint32_t flags, struct iobref *iobref, dict_t *xdata) +quota_writev_helper(call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iovec *vector, int32_t count, off_t off, + uint32_t flags, struct iobref *iobref, dict_t *xdata) { - quota_priv_t *priv = NULL; - int32_t ret = -1, op_errno = EINVAL; - int32_t parents = 0; - int32_t fail_count = 0; - uint64_t size = 0; - quota_local_t *local = NULL; - quota_inode_ctx_t *ctx = NULL; - quota_dentry_t *dentry = NULL, *tmp = NULL; - call_stub_t *stub = NULL; - struct list_head head = {0, }; - inode_t *par_inode = NULL; - - priv = this->private; + quota_local_t *local = NULL; + int32_t op_errno = EINVAL; + struct iovec *new_vector = NULL; + int32_t new_count = 0; - WIND_IF_QUOTAOFF (priv->is_quota_on, off); + local = frame->local; - INIT_LIST_HEAD (&head); + GF_VALIDATE_OR_GOTO("quota", local, unwind); - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO ("quota", this, unwind); - GF_VALIDATE_OR_GOTO (this->name, fd, unwind); + if (local->op_ret == -1) { + op_errno = local->op_errno; - local = quota_local_new (); - if (local == NULL) { + if ((op_errno == EDQUOT) && (local->space_available > 0)) { + new_count = iov_subset(vector, count, 0, local->space_available, + &new_vector, 0); + if (new_count < 0) { + local->op_ret = -1; + local->op_errno = ENOMEM; goto unwind; + } + + vector = new_vector; + count = new_count; + } else if (op_errno == ENOENT || op_errno == ESTALE) { + /* We may get ENOENT/ESTALE in case of below scenario + * fd = open file.txt + * unlink file.txt + * write on fd + * Here build_ancestry can fail as the file is removed. + * For now ignore ENOENT/ESTALE with writes on active fd + * We need to re-visit this code once we understand + * how other file-system behave in this scenario + */ + gf_msg_debug(this->name, 0, + "quota enforcer failed " + "with ENOENT/ESTALE on %s, cannot check " + "quota limits and allowing writes", + uuid_utoa(fd->inode->gfid)); + } else if ((op_errno == EINVAL) && + !inode_parent(local->loc.inode, 0, NULL)) { + /* We may get INVAL with parent == NULL, + * in case of below scenario + * 1. enable quota + * 2. glusterfsd stop/start + * 3. nameless lookup + * 4. write on fd + * Here build_ancestry can fail as the file's pgfid + * is't exist. + * For now ignore EINVAL with writes on active fd + * untils the pgfid is created at name lookup + */ + GF_LOG_OCCASIONALLY(gf_quota_enforcer_log, this->name, + GF_LOG_CRITICAL, + "Quota cannot be enforced as " + "parent is not available and writes are being " + "allowed without checking whether they are " + "within quota limits. This can happen if Quota " + "crawl is not complete. If crawl has been " + "completed, please file a bug."); + } else { + goto unwind; } + } - frame->local = local; - local->loc.inode = inode_ref (fd->inode); + STACK_WIND(frame, quota_writev_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->writev, fd, vector, count, off, flags, + iobref, xdata); - ret = quota_inode_ctx_get (fd->inode, this, &ctx, 0); - if (ctx == NULL) { - gf_msg_debug (this->name, 0, "quota context is NULL on inode" - " (%s). If quota is not enabled recently and " - "crawler has finished crawling, its an error", - uuid_utoa (fd->inode->gfid)); - } + if (new_vector != NULL) + GF_FREE(new_vector); - stub = fop_writev_stub (frame, quota_writev_helper, fd, vector, count, - off, flags, iobref, xdata); - if (stub == NULL) { - op_errno = ENOMEM; - goto unwind; - } - - priv = this->private; - GF_VALIDATE_OR_GOTO (this->name, priv, unwind); + return 0; - size = iov_length (vector, count); +unwind: + QUOTA_STACK_UNWIND(writev, frame, -1, op_errno, NULL, NULL, NULL); + return 0; +} - parents = quota_add_parents_from_ctx (ctx, &head); +int32_t +quota_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iovec *vector, int32_t count, off_t off, uint32_t flags, + struct iobref *iobref, dict_t *xdata) +{ + quota_priv_t *priv = NULL; + int32_t op_errno = EINVAL; + int32_t parents = 0; + int32_t fail_count = 0; + uint64_t size = 0; + quota_local_t *local = NULL; + quota_inode_ctx_t *ctx = NULL; + quota_dentry_t *dentry = NULL, *tmp = NULL; + call_stub_t *stub = NULL; + struct list_head head; + inode_t *par_inode = NULL; + + priv = this->private; + + WIND_IF_QUOTAOFF(priv->is_quota_on, off); + + INIT_LIST_HEAD(&head); + + GF_ASSERT(frame); + GF_VALIDATE_OR_GOTO("quota", this, unwind); + GF_VALIDATE_OR_GOTO(this->name, fd, unwind); + + local = quota_local_new(); + if (local == NULL) { + goto unwind; + } + + frame->local = local; + local->loc.inode = inode_ref(fd->inode); + + (void)quota_inode_ctx_get(fd->inode, this, &ctx, 0); + if (ctx == NULL) { + gf_msg_debug(this->name, 0, + "quota context is NULL on inode" + " (%s). If quota is not enabled recently and " + "crawler has finished crawling, its an error", + uuid_utoa(fd->inode->gfid)); + } + + stub = fop_writev_stub(frame, quota_writev_helper, fd, vector, count, off, + flags, iobref, xdata); + if (stub == NULL) { + op_errno = ENOMEM; + goto unwind; + } + + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, unwind); + + parents = quota_add_parents_from_ctx(ctx, &head); + if (parents == -1) { + op_errno = errno; + goto unwind; + } + + size = iov_length(vector, count); + + LOCK(&local->lock); + { + local->delta = size; + local->object_delta = 0; + local->link_count = (parents != 0) ? parents : 1; + local->stub = stub; + } + UNLOCK(&local->lock); - LOCK (&local->lock); + if (parents == 0) { + /* nameless lookup on this inode, allow quota to reconstruct + * ancestry as part of check_limit. + */ + quota_check_limit(frame, fd->inode, this); + } else { + list_for_each_entry_safe(dentry, tmp, &head, next) { - local->delta = size; - local->link_count = (parents != 0) ? parents : 1; - local->stub = stub; + par_inode = do_quota_check_limit(frame, fd->inode, this, dentry, + _gf_false); + if (par_inode == NULL) { + if (ctx) { + /* remove stale entry from inode ctx */ + quota_dentry_del(ctx, dentry->name, dentry->par); + parents--; + fail_count++; + } + } else { + inode_unref(par_inode); + } + __quota_dentry_free(dentry); } - UNLOCK (&local->lock); if (parents == 0) { - /* nameless lookup on this inode, allow quota to reconstruct - * ancestry as part of check_limit. - */ - quota_check_limit (frame, fd->inode, this); - } else { - list_for_each_entry_safe (dentry, tmp, &head, next) { - par_inode = do_quota_check_limit (frame, fd->inode, - this, dentry, - _gf_false); - if (par_inode == NULL) { - /* remove stale entry from inode ctx */ - quota_dentry_del (ctx, dentry->name, - dentry->par); - parents--; - fail_count++; - } else { - inode_unref (par_inode); - } - __quota_dentry_free (dentry); - } - - if (parents == 0) { - LOCK (&local->lock); - { - local->link_count++; - } - UNLOCK (&local->lock); - quota_check_limit (frame, fd->inode, this); - } + LOCK(&local->lock); + { + local->link_count++; + } + UNLOCK(&local->lock); + quota_check_limit(frame, fd->inode, this); + } - while (fail_count != 0) { - quota_link_count_decrement (frame); - fail_count--; - } + while (fail_count != 0) { + quota_link_count_decrement(frame); + fail_count--; } + } - return 0; + return 0; unwind: - QUOTA_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + QUOTA_STACK_UNWIND(writev, frame, -1, op_errno, NULL, NULL, NULL); + return 0; off: - STACK_WIND_TAIL (frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->writev, fd, - vector, count, off, flags, iobref, xdata); - return 0; + STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev, + fd, vector, count, off, flags, iobref, xdata); + return 0; } - int32_t -quota_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +quota_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - QUOTA_STACK_UNWIND (mkdir, frame, op_ret, op_errno, inode, - buf, preparent, postparent, xdata); - return 0; + QUOTA_STACK_UNWIND(mkdir, frame, op_ret, op_errno, inode, buf, preparent, + postparent, xdata); + return 0; } - int32_t -quota_mkdir_helper (call_frame_t *frame, xlator_t *this, loc_t *loc, - mode_t mode, mode_t umask, dict_t *xdata) +quota_mkdir_helper(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + mode_t umask, dict_t *xdata) { - quota_local_t *local = NULL; - int32_t op_errno = EINVAL; + quota_local_t *local = NULL; + int32_t op_errno = EINVAL; - local = frame->local; + local = frame->local; - GF_VALIDATE_OR_GOTO ("quota", local, unwind); + GF_VALIDATE_OR_GOTO("quota", local, unwind); - op_errno = local->op_errno; + op_errno = local->op_errno; - if (local->op_ret == -1) { - goto unwind; - } + if (local->op_ret == -1) { + goto unwind; + } - STACK_WIND (frame, quota_mkdir_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir, loc, - mode, umask, xdata); + STACK_WIND(frame, quota_mkdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata); - return 0; + return 0; unwind: - QUOTA_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, - NULL, NULL, NULL); - return 0; + QUOTA_STACK_UNWIND(mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL, + NULL); + return 0; } - int32_t -quota_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, - mode_t umask, dict_t *xdata) +quota_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + mode_t umask, dict_t *xdata) { - quota_priv_t *priv = NULL; - int32_t ret = 0, op_errno = 0; - quota_local_t *local = NULL; - call_stub_t *stub = NULL; - - priv = this->private; - - WIND_IF_QUOTAOFF (priv->is_quota_on, off); - - local = quota_local_new (); - if (local == NULL) { - op_errno = ENOMEM; - goto err; - } - - frame->local = local; - - ret = loc_copy (&local->loc, loc); - if (ret) { - op_errno = ENOMEM; - gf_msg (this->name, GF_LOG_WARNING, ENOMEM, - Q_MSG_ENOMEM, "loc_copy failed"); - goto err; - } - - stub = fop_mkdir_stub (frame, quota_mkdir_helper, loc, mode, umask, - xdata); - if (stub == NULL) { - op_errno = ENOMEM; - goto err; - } - - LOCK (&local->lock); - { - local->stub = stub; - local->delta = 0; - local->link_count = 1; - } - UNLOCK (&local->lock); + quota_priv_t *priv = NULL; + int32_t ret = 0, op_errno = 0; + quota_local_t *local = NULL; + call_stub_t *stub = NULL; + + priv = this->private; + WIND_IF_QUOTAOFF(priv->is_quota_on, off); + + if (!should_quota_enforce(this, xdata, GF_FOP_MKDIR)) { + gf_msg(this->name, GF_LOG_DEBUG, 0, Q_MSG_ENFORCEMENT_SKIPPED, + "Enforcement has been skipped(internal fop)."); + goto off; + } + + local = quota_local_new(); + if (local == NULL) { + op_errno = ENOMEM; + goto err; + } + + frame->local = local; + + ret = loc_copy(&local->loc, loc); + if (ret) { + op_errno = ENOMEM; + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM, + "loc_copy failed"); + goto err; + } + + stub = fop_mkdir_stub(frame, quota_mkdir_helper, loc, mode, umask, xdata); + if (stub == NULL) { + op_errno = ENOMEM; + goto err; + } + + LOCK(&local->lock); + { + local->stub = stub; + local->delta = 0; + local->object_delta = 1; + local->link_count = 1; + } + UNLOCK(&local->lock); - quota_check_limit (frame, loc->parent, this); - return 0; + quota_check_limit(frame, loc->parent, this); + return 0; err: - QUOTA_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL, - NULL, NULL); + QUOTA_STACK_UNWIND(mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL, + NULL); - return 0; + return 0; off: - STACK_WIND_TAIL (frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->mkdir, - loc, mode, umask, xdata); + STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir, + loc, mode, umask, xdata); - return 0; + return 0; } - int32_t -quota_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +quota_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - int32_t ret = -1; - quota_local_t *local = NULL; - quota_inode_ctx_t *ctx = NULL; - quota_dentry_t *dentry = NULL; - - local = frame->local; - if (op_ret < 0) { - goto unwind; - } - - ret = quota_inode_ctx_get (inode, this, &ctx, 1); - if ((ret == -1) || (ctx == NULL)) { - gf_msg (this->name, GF_LOG_WARNING, ENOMEM, - Q_MSG_INODE_CTX_GET_FAILED, "cannot create quota " - "context in inode(gfid:%s)", uuid_utoa (inode->gfid)); - op_ret = -1; - op_errno = ENOMEM; - goto unwind; - } - - LOCK (&ctx->lock); - { - ctx->buf = *buf; - - dentry = __quota_dentry_new (ctx, (char *)local->loc.name, - local->loc.parent->gfid); - if (dentry == NULL) { - gf_msg (this->name, GF_LOG_WARNING, ENOMEM, - Q_MSG_ENOMEM, "cannot create a new dentry " - "(name:%s) for inode(gfid:%s)", local->loc.name, - uuid_utoa (local->loc.inode->gfid)); - op_ret = -1; - op_errno = ENOMEM; - goto unlock; - } - } + int32_t ret = -1; + quota_local_t *local = NULL; + quota_inode_ctx_t *ctx = NULL; + quota_dentry_t *dentry = NULL; + + local = frame->local; + if (op_ret < 0) { + goto unwind; + } + + ret = quota_inode_ctx_get(inode, this, &ctx, 1); + if ((ret == -1) || (ctx == NULL)) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_INODE_CTX_GET_FAILED, + "cannot create quota " + "context in inode(gfid:%s)", + uuid_utoa(inode->gfid)); + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + + LOCK(&ctx->lock); + { + ctx->buf = *buf; + + dentry = __quota_dentry_new(ctx, (char *)local->loc.name, + local->loc.parent->gfid); + if (dentry == NULL) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM, + "cannot create a new dentry " + "(name:%s) for inode(gfid:%s)", + local->loc.name, uuid_utoa(local->loc.inode->gfid)); + op_ret = -1; + op_errno = ENOMEM; + goto unlock; + } + } unlock: - UNLOCK (&ctx->lock); + UNLOCK(&ctx->lock); unwind: - QUOTA_STACK_UNWIND (create, frame, op_ret, op_errno, fd, inode, buf, - preparent, postparent, xdata); - return 0; + QUOTA_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, buf, + preparent, postparent, xdata); + return 0; } - int32_t -quota_create_helper (call_frame_t *frame, xlator_t *this, loc_t *loc, - int32_t flags, mode_t mode, mode_t umask, fd_t *fd, - dict_t *xdata) +quota_create_helper(call_frame_t *frame, xlator_t *this, loc_t *loc, + int32_t flags, mode_t mode, mode_t umask, fd_t *fd, + dict_t *xdata) { - quota_local_t *local = NULL; - int32_t op_errno = EINVAL; - quota_priv_t *priv = NULL; - - local = frame->local; + quota_local_t *local = NULL; + int32_t op_errno = EINVAL; - GF_VALIDATE_OR_GOTO ("quota", local, unwind); + local = frame->local; - priv = this->private; + GF_VALIDATE_OR_GOTO("quota", local, unwind); + if (local->op_ret == -1) { + op_errno = local->op_errno; + goto unwind; + } - if (local->op_ret == -1) { - op_errno = local->op_errno; - goto unwind; - } - - - STACK_WIND (frame, quota_create_cbk, - FIRST_CHILD (this), FIRST_CHILD (this)->fops->create, loc, - flags, mode, umask, fd, xdata); - return 0; + STACK_WIND(frame, quota_create_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd, + xdata); + return 0; unwind: - QUOTA_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, - NULL, NULL, NULL, NULL); - return 0; + QUOTA_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, + NULL, NULL); + return 0; } - int32_t -quota_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) +quota_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) { - quota_priv_t *priv = NULL; - int32_t ret = -1; - quota_local_t *local = NULL; - int32_t op_errno = 0; - call_stub_t *stub = NULL; - - priv = this->private; - - WIND_IF_QUOTAOFF (priv->is_quota_on, off); - QUOTA_WIND_FOR_INTERNAL_FOP (xdata, off); - - local = quota_local_new (); - if (local == NULL) { - op_errno = ENOMEM; - goto err; - } - - frame->local = local; - - ret = loc_copy (&local->loc, loc); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, ENOMEM, - Q_MSG_ENOMEM, "loc_copy failed"); - op_errno = ENOMEM; - goto err; - } - - stub = fop_create_stub (frame, quota_create_helper, loc, flags, mode, - umask, fd, xdata); - if (stub == NULL) { - goto err; - } - - LOCK (&local->lock); - { - local->link_count = 1; - local->stub = stub; - local->delta = 0; - } - UNLOCK (&local->lock); + quota_priv_t *priv = NULL; + int32_t ret = -1; + quota_local_t *local = NULL; + int32_t op_errno = 0; + call_stub_t *stub = NULL; + + priv = this->private; + + WIND_IF_QUOTAOFF(priv->is_quota_on, off); + QUOTA_WIND_FOR_INTERNAL_FOP(xdata, off); + + local = quota_local_new(); + if (local == NULL) { + op_errno = ENOMEM; + goto err; + } + + frame->local = local; + + ret = loc_copy(&local->loc, loc); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM, + "loc_copy failed"); + op_errno = ENOMEM; + goto err; + } + + stub = fop_create_stub(frame, quota_create_helper, loc, flags, mode, umask, + fd, xdata); + if (stub == NULL) { + goto err; + } + + LOCK(&local->lock); + { + local->link_count = 1; + local->stub = stub; + local->delta = 0; + local->object_delta = 1; + } + UNLOCK(&local->lock); - quota_check_limit (frame, loc->parent, this); - return 0; + quota_check_limit(frame, loc->parent, this); + return 0; err: - QUOTA_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL, - NULL, NULL, NULL); + QUOTA_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, + NULL, NULL); - return 0; + return 0; off: - STACK_WIND_TAIL (frame, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->create, loc, - flags, mode, umask, fd, xdata); - return 0; + STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->create, + loc, flags, mode, umask, fd, xdata); + return 0; } - int32_t -quota_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +quota_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - quota_local_t *local = NULL; - quota_inode_ctx_t *ctx = NULL; - uint64_t value = 0; + quota_local_t *local = NULL; + quota_inode_ctx_t *ctx = NULL; + uint64_t value = 0; - if (op_ret < 0) { - goto out; - } + if (op_ret < 0) { + goto out; + } - local = (quota_local_t *) frame->local; + local = (quota_local_t *)frame->local; - inode_ctx_get (local->loc.inode, this, &value); - ctx = (quota_inode_ctx_t *)(unsigned long)value; + inode_ctx_get(local->loc.inode, this, &value); + ctx = (quota_inode_ctx_t *)(unsigned long)value; - if (ctx == NULL) { - gf_msg (this->name, GF_LOG_WARNING, EINVAL, - Q_MSG_INODE_CTX_GET_FAILED, - "quota context not set inode (gfid:%s)", - uuid_utoa (local->loc.inode->gfid)); - goto out; - } + if (ctx == NULL) { + gf_msg(this->name, GF_LOG_INFO, EINVAL, Q_MSG_INODE_CTX_GET_FAILED, + "quota context not set inode (gfid:%s)", + uuid_utoa(local->loc.gfid)); + goto out; + } - quota_dentry_del (ctx, local->loc.name, local->loc.parent->gfid); + quota_dentry_del(ctx, local->loc.name, local->loc.parent->gfid); out: - QUOTA_STACK_UNWIND (unlink, frame, op_ret, op_errno, preparent, - postparent, xdata); - return 0; + QUOTA_STACK_UNWIND(unlink, frame, op_ret, op_errno, preparent, postparent, + xdata); + return 0; } - int32_t -quota_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, - dict_t *xdata) +quota_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, + dict_t *xdata) { - quota_priv_t *priv = NULL; - int32_t ret = -1; - quota_local_t *local = NULL; + quota_priv_t *priv = NULL; + int32_t ret = -1; + quota_local_t *local = NULL; - priv = this->private; + priv = this->private; - WIND_IF_QUOTAOFF (priv->is_quota_on, off); + WIND_IF_QUOTAOFF(priv->is_quota_on, off); - local = quota_local_new (); - if (local == NULL) { - goto err; - } + local = quota_local_new(); + if (local == NULL) { + goto err; + } - frame->local = local; + frame->local = local; - ret = loc_copy (&local->loc, loc); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, ENOMEM, - Q_MSG_ENOMEM, "loc_copy failed"); - goto err; - } + ret = loc_copy(&local->loc, loc); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM, + "loc_copy failed"); + goto err; + } - STACK_WIND (frame, quota_unlink_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); + STACK_WIND(frame, quota_unlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); - ret = 0; + ret = 0; err: - if (ret == -1) { - QUOTA_STACK_UNWIND (unlink, frame, -1, 0, NULL, NULL, NULL); - } + if (ret == -1) { + QUOTA_STACK_UNWIND(unlink, frame, -1, 0, NULL, NULL, NULL); + } - return 0; + return 0; off: - STACK_WIND_TAIL (frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); - return 0; + STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->unlink, + loc, xflag, xdata); + return 0; } - int32_t -quota_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +quota_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - int32_t ret = -1; - quota_local_t *local = NULL; - quota_inode_ctx_t *ctx = NULL; - quota_dentry_t *dentry = NULL; - char found = 0; - - if (op_ret < 0) { - goto out; - } - - local = (quota_local_t *) frame->local; - - ret = quota_inode_ctx_get (inode, this, &ctx, 0); - if ((ret == -1) || (ctx == NULL)) { - gf_msg_debug (this->name, 0, "quota context is NULL on inode" - " (%s). If quota is not enabled recently and " - "crawler has finished crawling, its an error", - uuid_utoa (inode->gfid)); - goto out; - } - - LOCK (&ctx->lock); + int32_t ret = -1; + quota_local_t *local = NULL; + quota_inode_ctx_t *ctx = NULL; + quota_dentry_t *dentry = NULL; + char found = 0; + + if (op_ret < 0) { + goto out; + } + + local = (quota_local_t *)frame->local; + + ret = quota_inode_ctx_get(inode, this, &ctx, 0); + if ((ret == -1) || (ctx == NULL)) { + gf_msg_debug(this->name, 0, + "quota context is NULL on inode" + " (%s). If quota is not enabled recently and " + "crawler has finished crawling, its an error", + uuid_utoa(inode->gfid)); + goto out; + } + + LOCK(&ctx->lock); + { + list_for_each_entry(dentry, &ctx->parents, next) { - list_for_each_entry (dentry, &ctx->parents, next) { - if ((strcmp (dentry->name, local->loc.name) == 0) && - (gf_uuid_compare (local->loc.parent->gfid, - dentry->par) == 0)) { - found = 1; - - gf_msg_debug (this->name, 0, "new entry being" - " linked (name:%s) for inode " - "(gfid:%s) is already present " - "in inode-dentry-list", - dentry->name, - uuid_utoa (local->loc.inode->gfid)); - break; - } - } - - if (!found) { - dentry = __quota_dentry_new (ctx, - (char *)local->loc.name, - local->loc.parent->gfid); - if (dentry == NULL) { - gf_msg (this->name, GF_LOG_WARNING, ENOMEM, - Q_MSG_ENOMEM, - "cannot create a new dentry (name:%s)" - "for inode(gfid:%s)", local->loc.name, - uuid_utoa (local->loc.inode->gfid)); - op_ret = -1; - op_errno = ENOMEM; - goto unlock; - } - } - - ctx->buf = *buf; + if ((strcmp(dentry->name, local->loc.name) == 0) && + (gf_uuid_compare(local->loc.parent->gfid, dentry->par) == 0)) { + found = 1; + + gf_msg_debug(this->name, 0, + "new entry being" + " linked (name:%s) for inode " + "(gfid:%s) is already present " + "in inode-dentry-list", + dentry->name, uuid_utoa(local->loc.inode->gfid)); + break; + } + } + + if (!found) { + dentry = __quota_dentry_new(ctx, (char *)local->loc.name, + local->loc.parent->gfid); + if (dentry == NULL) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM, + "cannot create a new dentry (name:%s)" + "for inode(gfid:%s)", + local->loc.name, uuid_utoa(local->loc.inode->gfid)); + op_ret = -1; + op_errno = ENOMEM; + goto unlock; + } } + + ctx->buf = *buf; + } unlock: - UNLOCK (&ctx->lock); + UNLOCK(&ctx->lock); out: - QUOTA_STACK_UNWIND (link, frame, op_ret, op_errno, inode, buf, - preparent, postparent, xdata); + QUOTA_STACK_UNWIND(link, frame, op_ret, op_errno, inode, buf, preparent, + postparent, xdata); - return 0; + return 0; } - int32_t -quota_link_helper (call_frame_t *frame, xlator_t *this, loc_t *oldloc, - loc_t *newloc, dict_t *xdata) +quota_link_helper(call_frame_t *frame, xlator_t *this, loc_t *oldloc, + loc_t *newloc, dict_t *xdata) { - quota_local_t *local = NULL; - int32_t op_errno = EINVAL; - quota_priv_t *priv = NULL; + quota_local_t *local = NULL; + int32_t op_errno = EINVAL; - priv = this->private; + local = frame->local; - local = frame->local; + GF_VALIDATE_OR_GOTO("quota", local, unwind); - GF_VALIDATE_OR_GOTO ("quota", local, unwind); + op_errno = local->op_errno; - op_errno = local->op_errno; + if (local->op_ret == -1) { + goto unwind; + } - if (local->op_ret == -1) { - goto unwind; - } - - STACK_WIND (frame, quota_link_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->link, oldloc, - newloc, xdata); - return 0; + STACK_WIND(frame, quota_link_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata); + return 0; unwind: - QUOTA_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, - NULL, NULL, NULL); - return 0; + QUOTA_STACK_UNWIND(link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL); + return 0; } void -quota_link_continue (call_frame_t *frame) +quota_link_continue(call_frame_t *frame) { - int32_t ret = -1; - int32_t op_errno = EIO; - quota_local_t *local = NULL; - uuid_t common_ancestor = {0}; - xlator_t *this = NULL; - quota_inode_ctx_t *ctx = NULL; - inode_t *src_parent = NULL; - inode_t *dst_parent = NULL; - - local = frame->local; - this = THIS; - - if (local->op_ret < 0) { - op_errno = local->op_errno; - goto err; - } - - if (local->xdata && - dict_get (local->xdata, GLUSTERFS_INTERNAL_FOP_KEY)) { - /* Treat link as rename, crawl upwards only till common ancestor - */ - ret = quota_find_common_ancestor (local->oldloc.inode, - local->newloc.parent, - &common_ancestor); - if (ret < 0 || gf_uuid_is_null(common_ancestor)) { - gf_msg (this->name, GF_LOG_ERROR, ESTALE, - Q_MSG_ANCESTRY_BUILD_FAILED, "failed to get " - "common_ancestor for %s and %s", - local->oldloc.path, local->newloc.path); - op_errno = ESTALE; - goto err; - } - } else { - /* Treat link as a new file. - * TODO: Currently marker accounts twice for the links created - * across directories. - * This needs re-vist if marker accounts only once - * for the links created across directories - */ - if (local->oldloc.parent) - src_parent = inode_ref (local->oldloc.parent); - else - src_parent = inode_parent (local->oldloc.inode, 0, - NULL); - dst_parent = local->newloc.parent; - - /* No need to check quota limit if src and dst parents are same - */ - if (src_parent == dst_parent || - gf_uuid_compare (src_parent->gfid, dst_parent->gfid) == 0) { - inode_unref (src_parent); - goto wind; - } - - inode_unref (src_parent); - } + int32_t ret = -1; + int32_t op_errno = EIO; + quota_local_t *local = NULL; + uuid_t common_ancestor = {0}; + xlator_t *this = NULL; + quota_inode_ctx_t *ctx = NULL; + inode_t *src_parent = NULL; + inode_t *dst_parent = NULL; + + local = frame->local; + this = THIS; + + if (local->op_ret < 0) { + op_errno = local->op_errno; + goto err; + } - quota_inode_ctx_get (local->oldloc.inode, this, &ctx, 0); - if (ctx == NULL) { - gf_msg_debug (this->name, 0, "quota context is NULL on inode" - " (%s). If quota is not enabled recently and " - "crawler has finished crawling, its an error", - uuid_utoa (local->oldloc.inode->gfid)); - } + if (local->xdata && dict_get(local->xdata, GLUSTERFS_INTERNAL_FOP_KEY)) { + /* Treat link as rename, crawl upwards only till common ancestor + */ + ret = quota_find_common_ancestor( + local->oldloc.inode, local->newloc.parent, &common_ancestor); + if (ret < 0 || gf_uuid_is_null(common_ancestor)) { + gf_msg(this->name, GF_LOG_ERROR, ESTALE, + Q_MSG_ANCESTRY_BUILD_FAILED, + "failed to get " + "common_ancestor for %s and %s", + local->oldloc.path, local->newloc.path); + op_errno = ESTALE; + goto err; + } + } else { + /* Treat link as a new file. + * TODO: Currently marker accounts twice for the links created + * across directories. + * This needs re-visit if marker accounts only once + * for the links created across directories + */ + if (local->oldloc.parent) + src_parent = inode_ref(local->oldloc.parent); + else + src_parent = inode_parent(local->oldloc.inode, 0, NULL); + dst_parent = local->newloc.parent; - LOCK (&local->lock); - { - local->link_count = 1; - local->delta = (ctx != NULL) ? ctx->buf.ia_blocks * 512 : 0; - gf_uuid_copy (local->common_ancestor, common_ancestor); - } - UNLOCK (&local->lock); + /* No need to check quota limit if src and dst parents are same + */ + if (src_parent == dst_parent || + gf_uuid_compare(src_parent->gfid, dst_parent->gfid) == 0) { + inode_unref(src_parent); + goto wind; + } + + inode_unref(src_parent); + } + + quota_inode_ctx_get(local->oldloc.inode, this, &ctx, 0); + if (ctx == NULL) { + gf_msg_debug(this->name, 0, + "quota context is NULL on inode" + " (%s). If quota is not enabled recently and " + "crawler has finished crawling, its an error", + uuid_utoa(local->oldloc.inode->gfid)); + } + + LOCK(&local->lock); + { + local->link_count = 1; + local->delta = (ctx != NULL) ? ctx->buf.ia_blocks * 512 : 0; + local->object_delta = 1; + gf_uuid_copy(local->common_ancestor, common_ancestor); + } + UNLOCK(&local->lock); - quota_check_limit (frame, local->newloc.parent, this); - return; + quota_check_limit(frame, local->newloc.parent, this); + return; err: - QUOTA_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, - NULL, NULL, NULL); - return; + QUOTA_STACK_UNWIND(link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL); + return; wind: - STACK_WIND (frame, quota_link_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->link, &(local->oldloc), - &(local->newloc), local->xdata); - return; + STACK_WIND(frame, quota_link_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->link, &(local->oldloc), + &(local->newloc), local->xdata); + return; } int32_t -quota_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, - dict_t *xdata) +quota_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) { - quota_priv_t *priv = NULL; - int32_t ret = -1; - int32_t op_errno = ENOMEM; - quota_local_t *local = NULL; - call_stub_t *stub = NULL; - - priv = this->private; - - WIND_IF_QUOTAOFF (priv->is_quota_on, off); - - local = quota_local_new (); - if (local == NULL) { - goto err; - } - - frame->local = (void *) local; - - if (xdata) - local->xdata = dict_ref (xdata); - - ret = loc_copy (&local->loc, newloc); - if (ret == -1) { - gf_msg (this->name, GF_LOG_WARNING, ENOMEM, - Q_MSG_ENOMEM, "loc_copy failed"); - goto err; - } - - ret = loc_copy (&local->oldloc, oldloc); - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM, - "loc_copy failed"); - goto err; - } - - ret = loc_copy (&local->newloc, newloc); - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM, - "loc_copy failed"); - goto err; - } - - /* No need to check quota limit if src and dst parents are same */ - if (oldloc->parent && newloc->parent && - !gf_uuid_compare(oldloc->parent->gfid, newloc->parent->gfid)) { - gf_msg_debug (this->name, GF_LOG_DEBUG, "link %s -> %s are " - "in the same directory, so skip check limit", - oldloc->path, newloc->path); - goto wind; - } - - stub = fop_link_stub (frame, quota_link_helper, oldloc, newloc, xdata); - if (stub == NULL) { - goto err; - } - - LOCK (&local->lock); - { - local->link_count = 2; - local->fop_continue_cbk = quota_link_continue; - local->stub = stub; - } - UNLOCK (&local->lock); + quota_priv_t *priv = NULL; + int32_t ret = -1; + int32_t op_errno = ENOMEM; + quota_local_t *local = NULL; + call_stub_t *stub = NULL; + + priv = this->private; + + WIND_IF_QUOTAOFF(priv->is_quota_on, off); + + local = quota_local_new(); + if (local == NULL) { + goto err; + } + + frame->local = (void *)local; + + if (xdata) + local->xdata = dict_ref(xdata); + + ret = loc_copy(&local->loc, newloc); + if (ret == -1) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM, + "loc_copy failed"); + goto err; + } + + ret = loc_copy(&local->oldloc, oldloc); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM, + "loc_copy failed"); + goto err; + } + + ret = loc_copy(&local->newloc, newloc); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM, + "loc_copy failed"); + goto err; + } + + /* No need to check quota limit if src and dst parents are same */ + if (oldloc->parent && newloc->parent && + !gf_uuid_compare(oldloc->parent->gfid, newloc->parent->gfid)) { + gf_msg_debug(this->name, GF_LOG_DEBUG, + "link %s -> %s are " + "in the same directory, so skip check limit", + oldloc->path, newloc->path); + goto wind; + } + + stub = fop_link_stub(frame, quota_link_helper, oldloc, newloc, xdata); + if (stub == NULL) { + goto err; + } + + LOCK(&local->lock); + { + local->link_count = 2; + local->fop_continue_cbk = quota_link_continue; + local->stub = stub; + } + UNLOCK(&local->lock); - check_ancestory (frame, newloc->parent); + check_ancestory(frame, newloc->parent); - /* source parent can be NULL, so do check_ancestory on a file */ - if (oldloc->parent) - check_ancestory (frame, oldloc->parent); - else - check_ancestory (frame, oldloc->inode); + /* source parent can be NULL, so do check_ancestry on a file */ + if (oldloc->parent) + check_ancestory(frame, oldloc->parent); + else + check_ancestory(frame, oldloc->inode); - return 0; + return 0; err: - QUOTA_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, - NULL, NULL, NULL); - return 0; + QUOTA_STACK_UNWIND(link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL); + return 0; off: - STACK_WIND_TAIL (frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->link, oldloc, - newloc, xdata); - return 0; + STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->link, + oldloc, newloc, xdata); + return 0; wind: - STACK_WIND (frame, quota_link_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->link, oldloc, - newloc, xdata); - return 0; + STACK_WIND(frame, quota_link_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata); + return 0; } - int32_t -quota_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, - struct iatt *preoldparent, struct iatt *postoldparent, - struct iatt *prenewparent, struct iatt *postnewparent, - dict_t *xdata) +quota_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + struct iatt *preoldparent, struct iatt *postoldparent, + struct iatt *prenewparent, struct iatt *postnewparent, + dict_t *xdata) { - int32_t ret = -1; - int64_t size = 0; - quota_local_t *local = NULL; - quota_inode_ctx_t *ctx = NULL; - quota_dentry_t *old_dentry = NULL, *dentry = NULL; - char new_dentry_found = 0; + int32_t ret = -1; + quota_local_t *local = NULL; + quota_inode_ctx_t *ctx = NULL; + quota_dentry_t *old_dentry = NULL, *dentry = NULL; + char new_dentry_found = 0; - if (op_ret < 0) { - goto out; - } + if (op_ret < 0) { + goto out; + } - local = frame->local; + local = frame->local; - GF_VALIDATE_OR_GOTO ("quota", local, out); + GF_VALIDATE_OR_GOTO("quota", local, out); - if (QUOTA_REG_OR_LNK_FILE (local->oldloc.inode->ia_type)) - size = buf->ia_blocks * 512; - else - goto out; + if (!QUOTA_REG_OR_LNK_FILE(local->oldloc.inode->ia_type)) + goto out; - ret = quota_inode_ctx_get (local->oldloc.inode, this, &ctx, 0); - if ((ret == -1) || (ctx == NULL)) { - gf_msg_debug (this->name, 0, "quota context is NULL on inode" - " (%s). If quota is not enabled recently and " - "crawler has finished crawling, its an error", - uuid_utoa (local->oldloc.inode->gfid)); + ret = quota_inode_ctx_get(local->oldloc.inode, this, &ctx, 0); + if ((ret == -1) || (ctx == NULL)) { + gf_msg_debug(this->name, 0, + "quota context is NULL on inode" + " (%s). If quota is not enabled recently and " + "crawler has finished crawling, its an error", + uuid_utoa(local->oldloc.inode->gfid)); - goto out; - } + goto out; + } - LOCK (&ctx->lock); + LOCK(&ctx->lock); + { + list_for_each_entry(dentry, &ctx->parents, next) { - list_for_each_entry (dentry, &ctx->parents, next) { - if ((strcmp (dentry->name, local->oldloc.name) == 0) && - (gf_uuid_compare (local->oldloc.parent->gfid, - dentry->par) == 0)) { - old_dentry = dentry; - } else if ((strcmp (dentry->name, - local->newloc.name) == 0) && - (gf_uuid_compare (local->newloc.parent->gfid, - dentry->par) == 0)) { - new_dentry_found = 1; - gf_msg_debug (this->name, 0, "new entry being " - "linked (name:%s) for inode (gfid:%s) " - "is in inode-dentry-list", dentry->name, - uuid_utoa (local->oldloc.inode->gfid)); - } - - if (old_dentry && new_dentry_found) - break; - } - - if (old_dentry != NULL) { - __quota_dentry_free (old_dentry); - } else { - gf_msg_debug (this->name, 0, "dentry corresponding" - "the path just renamed (name:%s) is not" - " present", local->oldloc.name); - } - - if (!new_dentry_found) { - dentry = __quota_dentry_new (ctx, - (char *)local->newloc.name, - local->newloc.parent->gfid); - if (dentry == NULL) { - gf_msg (this->name, GF_LOG_WARNING, ENOMEM, - Q_MSG_ENOMEM, - "cannot create a new dentry (name:%s) " - "for inode(gfid:%s)", - local->newloc.name, - uuid_utoa (local->newloc.inode->gfid)); - op_ret = -1; - op_errno = ENOMEM; - goto unlock; - } - } - - ctx->buf = *buf; + if ((strcmp(dentry->name, local->oldloc.name) == 0) && + (gf_uuid_compare(local->oldloc.parent->gfid, dentry->par) == + 0)) { + old_dentry = dentry; + } else if ((strcmp(dentry->name, local->newloc.name) == 0) && + (gf_uuid_compare(local->newloc.parent->gfid, + dentry->par) == 0)) { + new_dentry_found = 1; + gf_msg_debug(this->name, 0, + "new entry being " + "linked (name:%s) for inode (gfid:%s) " + "is in inode-dentry-list", + dentry->name, + uuid_utoa(local->oldloc.inode->gfid)); + } + + if (old_dentry && new_dentry_found) + break; + } + + if (old_dentry != NULL) { + __quota_dentry_free(old_dentry); + } else { + gf_msg_debug(this->name, 0, + "dentry corresponding" + "the path just renamed (name:%s) is not" + " present", + local->oldloc.name); + } + + if (!new_dentry_found) { + dentry = __quota_dentry_new(ctx, (char *)local->newloc.name, + local->newloc.parent->gfid); + if (dentry == NULL) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM, + "cannot create a new dentry (name:%s) " + "for inode(gfid:%s)", + local->newloc.name, + uuid_utoa(local->newloc.inode->gfid)); + op_ret = -1; + op_errno = ENOMEM; + goto unlock; + } } + + ctx->buf = *buf; + } unlock: - UNLOCK (&ctx->lock); + UNLOCK(&ctx->lock); out: - QUOTA_STACK_UNWIND (rename, frame, op_ret, op_errno, buf, preoldparent, - postoldparent, prenewparent, postnewparent, xdata); + QUOTA_STACK_UNWIND(rename, frame, op_ret, op_errno, buf, preoldparent, + postoldparent, prenewparent, postnewparent, xdata); - return 0; + return 0; } - int32_t -quota_rename_helper (call_frame_t *frame, xlator_t *this, loc_t *oldloc, - loc_t *newloc, dict_t *xdata) +quota_rename_helper(call_frame_t *frame, xlator_t *this, loc_t *oldloc, + loc_t *newloc, dict_t *xdata) { - quota_local_t *local = NULL; - int32_t op_errno = EINVAL; - quota_priv_t *priv = NULL; + quota_local_t *local = NULL; + int32_t op_errno = EINVAL; - priv = this->private; + local = frame->local; - local = frame->local; + GF_VALIDATE_OR_GOTO("quota", local, unwind); - GF_VALIDATE_OR_GOTO ("quota", local, unwind); + op_errno = local->op_errno; - op_errno = local->op_errno; + if (local->op_ret == -1) { + goto unwind; + } - if (local->op_ret == -1) { - goto unwind; - } - - STACK_WIND (frame, quota_rename_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->rename, oldloc, - newloc, xdata); + STACK_WIND(frame, quota_rename_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); - return 0; + return 0; unwind: - QUOTA_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, - NULL, NULL, NULL, NULL); - return 0; + QUOTA_STACK_UNWIND(rename, frame, -1, op_errno, NULL, NULL, NULL, NULL, + NULL, NULL); + return 0; } - static int32_t -quota_rename_get_size_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *xdata, - struct iatt *postparent) +quota_rename_get_size_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, + struct iatt *postparent) { - quota_local_t *local = NULL; - int32_t ret = 0; - int64_t *size = 0; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO_WITH_ERROR ("quota", this, out, op_errno, - EINVAL); - GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, xdata, out, op_errno, - EINVAL); - local = frame->local; - GF_ASSERT (local); - local->link_count = 1; - - if (op_ret < 0) - goto out; - - - ret = dict_get_bin (xdata, QUOTA_SIZE_KEY, (void **) &size); - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, EINVAL, - Q_MSG_SIZE_KEY_MISSING, "size key not present in dict"); - op_errno = EINVAL; - goto out; - } - local->delta = ntoh64 (*size); - quota_check_limit (frame, local->newloc.parent, this); - return 0; + quota_local_t *local = NULL; + int32_t ret = 0; + int64_t *size = 0; + + GF_ASSERT(frame); + GF_VALIDATE_OR_GOTO_WITH_ERROR("quota", this, out, op_errno, EINVAL); + GF_VALIDATE_OR_GOTO_WITH_ERROR(this->name, xdata, out, op_errno, EINVAL); + local = frame->local; + GF_ASSERT(local); + local->link_count = 1; + + if (op_ret < 0) + goto out; + + ret = dict_get_bin(xdata, QUOTA_SIZE_KEY, (void **)&size); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, EINVAL, Q_MSG_SIZE_KEY_MISSING, + "size key not present in dict"); + op_errno = EINVAL; + goto out; + } + local->delta = ntoh64(*size); + local->object_delta = 1; + quota_check_limit(frame, local->newloc.parent, this); + return 0; out: - quota_handle_validate_error (frame, -1, op_errno); - return 0; + quota_handle_validate_error(frame, -1, op_errno); + return 0; } void -quota_rename_continue (call_frame_t *frame) +quota_rename_continue(call_frame_t *frame) { - int32_t ret = -1; - int32_t op_errno = EIO; - quota_local_t *local = NULL; - uuid_t common_ancestor = {0}; - xlator_t *this = NULL; - quota_inode_ctx_t *ctx = NULL; - - local = frame->local; - this = THIS; - - if (local->op_ret < 0) { - op_errno = local->op_errno; - goto err; - } - - ret = quota_find_common_ancestor (local->oldloc.parent, - local->newloc.parent, - &common_ancestor); - if (ret < 0 || gf_uuid_is_null(common_ancestor)) { - gf_msg (this->name, GF_LOG_ERROR, ESTALE, - Q_MSG_ANCESTRY_BUILD_FAILED, "failed to get " - "common_ancestor for %s and %s", - local->oldloc.path, local->newloc.path); - op_errno = ESTALE; - goto err; - } - - LOCK (&local->lock); - { - local->link_count = 1; - gf_uuid_copy (local->common_ancestor, common_ancestor); - } - UNLOCK (&local->lock); - - if (QUOTA_REG_OR_LNK_FILE (local->oldloc.inode->ia_type)) { - ret = quota_inode_ctx_get (local->oldloc.inode, this, &ctx, 0); - if (ctx == NULL) { - gf_msg (this->name, GF_LOG_WARNING, 0, - Q_MSG_INODE_CTX_GET_FAILED, - "quota context not set in inode (gfid:%s), " - "considering file size as zero while enforcing " - "quota on new ancestry", - uuid_utoa (local->oldloc.inode->gfid)); + int32_t ret = -1; + int32_t op_errno = EIO; + quota_local_t *local = NULL; + uuid_t common_ancestor = {0}; + xlator_t *this = NULL; + quota_inode_ctx_t *ctx = NULL; + local = frame->local; + this = THIS; - local->delta = 0; - } else { + if (local->op_ret < 0) { + op_errno = local->op_errno; + goto err; + } + + ret = quota_find_common_ancestor(local->oldloc.parent, local->newloc.parent, + &common_ancestor); + if (ret < 0 || gf_uuid_is_null(common_ancestor)) { + gf_msg(this->name, GF_LOG_ERROR, ESTALE, Q_MSG_ANCESTRY_BUILD_FAILED, + "failed to get " + "common_ancestor for %s and %s", + local->oldloc.path, local->newloc.path); + op_errno = ESTALE; + goto err; + } + + LOCK(&local->lock); + { + local->link_count = 1; + gf_uuid_copy(local->common_ancestor, common_ancestor); + } + UNLOCK(&local->lock); - /* FIXME: We need to account for the size occupied by this - * inode on the target directory. To avoid double - * accounting, we need to modify enforcer to perform - * quota_check_limit only uptil the least common ancestor - * directory inode*/ + if (QUOTA_REG_OR_LNK_FILE(local->oldloc.inode->ia_type)) { + ret = quota_inode_ctx_get(local->oldloc.inode, this, &ctx, 0); + if (ctx == NULL) { + gf_msg(this->name, GF_LOG_WARNING, 0, Q_MSG_INODE_CTX_GET_FAILED, + "quota context not set in inode (gfid:%s), " + "considering file size as zero while enforcing " + "quota on new ancestry", + uuid_utoa(local->oldloc.inode->gfid)); + + local->delta = 0; + local->object_delta = 1; + } else { + /* FIXME: We need to account for the size occupied by + * this inode on the target directory. To avoid double + * accounting, we need to modify enforcer to perform + * quota_check_limit only up till the least common + * ancestor directory inode*/ - /* FIXME: The following code assumes that regular files and - * linkfiles are present, in their entirety, in a single - * brick. This *assumption is invalid in the case of - * stripe.*/ + /* FIXME: The following code assumes that regular files + * and link files are present, in their entirety, in a + * single brick. This *assumption is invalid in the + * case of stripe.*/ - local->delta = ctx->buf.ia_blocks * 512; - } - - } else if (IA_ISDIR (local->oldloc.inode->ia_type)) { - ret = quota_validate (frame, local->oldloc.inode, this, - quota_rename_get_size_cbk); - if (ret){ - op_errno = -ret; - goto err; - } + local->delta = ctx->buf.ia_blocks * 512; + local->object_delta = 1; + } - return; + } else if (IA_ISDIR(local->oldloc.inode->ia_type)) { + ret = quota_validate(frame, local->oldloc.inode, this, + quota_rename_get_size_cbk); + if (ret) { + op_errno = -ret; + goto err; } - quota_check_limit (frame, local->newloc.parent, this); return; + } -err: - QUOTA_STACK_UNWIND (rename, frame, -1, op_errno, NULL, - NULL, NULL, NULL, NULL, NULL); - return; + quota_check_limit(frame, local->newloc.parent, this); + return; +err: + QUOTA_STACK_UNWIND(rename, frame, -1, op_errno, NULL, NULL, NULL, NULL, + NULL, NULL); + return; } int32_t -quota_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, - loc_t *newloc, dict_t *xdata) +quota_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) { - quota_priv_t *priv = NULL; - int32_t ret = -1; - int32_t op_errno = ENOMEM; - quota_local_t *local = NULL; - call_stub_t *stub = NULL; - - priv = this->private; - - WIND_IF_QUOTAOFF (priv->is_quota_on, off); - - local = quota_local_new (); - if (local == NULL) { - goto err; - } - - frame->local = local; - - ret = loc_copy (&local->oldloc, oldloc); - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM, - "loc_copy failed"); - goto err; - } - - ret = loc_copy (&local->newloc, newloc); - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM, - "loc_copy failed"); - goto err; - } - - /* No need to check quota limit if src and dst parents are same */ - if (oldloc->parent && newloc->parent && - !gf_uuid_compare(oldloc->parent->gfid, newloc->parent->gfid)) { - gf_msg_debug (this->name, 0, "rename %s -> %s are " - "in the same directory, so skip check limit", - oldloc->path, newloc->path); - goto wind; - } - - stub = fop_rename_stub (frame, quota_rename_helper, oldloc, newloc, - xdata); - if (stub == NULL) { - goto err; - } - - LOCK (&local->lock); - { - /* link_count here tell how many check_ancestory should be done - * before continuing the FOP - */ - local->link_count = 2; - local->stub = stub; - local->fop_continue_cbk = quota_rename_continue; - } - UNLOCK (&local->lock); + quota_priv_t *priv = NULL; + int32_t ret = -1; + int32_t op_errno = ENOMEM; + quota_local_t *local = NULL; + call_stub_t *stub = NULL; + + priv = this->private; + + WIND_IF_QUOTAOFF(priv->is_quota_on, off); + + local = quota_local_new(); + if (local == NULL) { + goto err; + } + + frame->local = local; + + ret = loc_copy(&local->oldloc, oldloc); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM, + "loc_copy failed"); + goto err; + } + + ret = loc_copy(&local->newloc, newloc); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM, + "loc_copy failed"); + goto err; + } + + /* No need to check quota limit if src and dst parents are same */ + if (oldloc->parent && newloc->parent && + !gf_uuid_compare(oldloc->parent->gfid, newloc->parent->gfid)) { + gf_msg_debug(this->name, 0, + "rename %s -> %s are " + "in the same directory, so skip check limit", + oldloc->path, newloc->path); + goto wind; + } + + stub = fop_rename_stub(frame, quota_rename_helper, oldloc, newloc, xdata); + if (stub == NULL) { + goto err; + } + + LOCK(&local->lock); + { + /* link_count here tell how many check_ancestry should be done + * before continuing the FOP + */ + local->link_count = 2; + local->stub = stub; + local->fop_continue_cbk = quota_rename_continue; + } + UNLOCK(&local->lock); - check_ancestory (frame, newloc->parent); - check_ancestory (frame, oldloc->parent); - return 0; + check_ancestory(frame, newloc->parent); + check_ancestory(frame, oldloc->parent); + return 0; err: - QUOTA_STACK_UNWIND (rename, frame, -1, op_errno, NULL, - NULL, NULL, NULL, NULL, NULL); - return 0; + QUOTA_STACK_UNWIND(rename, frame, -1, op_errno, NULL, NULL, NULL, NULL, + NULL, NULL); + return 0; off: - STACK_WIND_TAIL (frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rename, oldloc, - newloc, xdata); - return 0; + STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->rename, + oldloc, newloc, xdata); + return 0; wind: - STACK_WIND (frame, quota_rename_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->rename, oldloc, - newloc, xdata); - return 0; + STACK_WIND(frame, quota_rename_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); + return 0; } - int32_t -quota_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +quota_symlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - quota_local_t *local = NULL; - quota_inode_ctx_t *ctx = NULL; - quota_dentry_t *dentry = NULL; - - if (op_ret < 0) { - goto out; - } - - local = frame->local; - - quota_inode_ctx_get (local->loc.inode, this, &ctx, 1); - if (ctx == NULL) { - gf_msg_debug (this->name, 0, "quota context is NULL on inode" - " (%s). If quota is not enabled recently and " - "crawler has finished crawling, its an error", - uuid_utoa (local->loc.inode->gfid)); - - goto out; - } - - LOCK (&ctx->lock); - { - ctx->buf = *buf; - - dentry = __quota_dentry_new (ctx, (char *)local->loc.name, - local->loc.parent->gfid); - if (dentry == NULL) { - gf_msg (this->name, GF_LOG_WARNING, ENOMEM, - Q_MSG_ENOMEM, "cannot create " - "a new dentry (name:%s) for inode(gfid:%s)", - local->loc.name, - uuid_utoa (local->loc.inode->gfid)); - op_ret = -1; - op_errno = ENOMEM; - } + quota_local_t *local = NULL; + quota_inode_ctx_t *ctx = NULL; + quota_dentry_t *dentry = NULL; + int32_t ret = -1; + + if (op_ret < 0) { + goto out; + } + + local = frame->local; + + ret = quota_inode_ctx_get(local->loc.inode, this, &ctx, 1); + if ((ret == -1) || (ctx == NULL)) { + gf_msg_debug(this->name, 0, + "quota context is NULL on inode" + " (%s). If quota is not enabled recently and " + "crawler has finished crawling, its an error", + uuid_utoa(local->loc.inode->gfid)); + + goto out; + } + + LOCK(&ctx->lock); + { + ctx->buf = *buf; + + dentry = __quota_dentry_new(ctx, (char *)local->loc.name, + local->loc.parent->gfid); + if (dentry == NULL) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM, + "cannot create " + "a new dentry (name:%s) for inode(gfid:%s)", + local->loc.name, uuid_utoa(local->loc.inode->gfid)); + op_ret = -1; + op_errno = ENOMEM; } - UNLOCK (&ctx->lock); + } + UNLOCK(&ctx->lock); out: - QUOTA_STACK_UNWIND (symlink, frame, op_ret, op_errno, inode, buf, - preparent, postparent, xdata); + QUOTA_STACK_UNWIND(symlink, frame, op_ret, op_errno, inode, buf, preparent, + postparent, xdata); - return 0; + return 0; } - int -quota_symlink_helper (call_frame_t *frame, xlator_t *this, const char *linkpath, - loc_t *loc, mode_t umask, dict_t *xdata) +quota_symlink_helper(call_frame_t *frame, xlator_t *this, const char *linkpath, + loc_t *loc, mode_t umask, dict_t *xdata) { - quota_local_t *local = NULL; - int32_t op_errno = EINVAL; - quota_priv_t *priv = NULL; - - local = frame->local; + quota_local_t *local = NULL; + int32_t op_errno = EINVAL; - GF_VALIDATE_OR_GOTO ("quota", local, unwind); + local = frame->local; - priv = this->private; + GF_VALIDATE_OR_GOTO("quota", local, unwind); - if (local->op_ret == -1) { - op_errno = local->op_errno; - goto unwind; - } + if (local->op_ret == -1) { + op_errno = local->op_errno; + goto unwind; + } - STACK_WIND (frame, quota_symlink_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->symlink, - linkpath, loc, umask, xdata); - return 0; + STACK_WIND(frame, quota_symlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->symlink, linkpath, loc, umask, xdata); + return 0; unwind: - QUOTA_STACK_UNWIND (symlink, frame, -1, op_errno, NULL, NULL, - NULL, NULL, NULL); - return 0; + QUOTA_STACK_UNWIND(symlink, frame, -1, op_errno, NULL, NULL, NULL, NULL, + NULL); + return 0; } - int -quota_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath, - loc_t *loc, mode_t umask, dict_t *xdata) +quota_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath, + loc_t *loc, mode_t umask, dict_t *xdata) { - quota_priv_t *priv = NULL; - int32_t ret = -1; - int32_t op_errno = ENOMEM; - quota_local_t *local = NULL; - call_stub_t *stub = NULL; + quota_priv_t *priv = NULL; + int32_t ret = -1; + int32_t op_errno = ENOMEM; + quota_local_t *local = NULL; + call_stub_t *stub = NULL; - priv = this->private; + priv = this->private; - WIND_IF_QUOTAOFF (priv->is_quota_on, off); + WIND_IF_QUOTAOFF(priv->is_quota_on, off); - local = quota_local_new (); - if (local == NULL) { - goto err; - } + local = quota_local_new(); + if (local == NULL) { + goto err; + } - frame->local = local; + frame->local = local; - ret = loc_copy (&local->loc, loc); - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, ENOMEM, - Q_MSG_ENOMEM, "loc_copy failed"); - goto err; - } + ret = loc_copy(&local->loc, loc); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM, + "loc_copy failed"); + goto err; + } - stub = fop_symlink_stub (frame, quota_symlink_helper, linkpath, loc, - umask, xdata); - if (stub == NULL) { - goto err; - } + stub = fop_symlink_stub(frame, quota_symlink_helper, linkpath, loc, umask, + xdata); + if (stub == NULL) { + goto err; + } - LOCK (&local->lock); - { - local->stub = stub; - local->delta = strlen (linkpath); - local->link_count = 1; - } - UNLOCK (&local->lock); + LOCK(&local->lock); + { + local->stub = stub; + local->delta = strlen(linkpath); + local->object_delta = 1; + local->link_count = 1; + } + UNLOCK(&local->lock); - quota_check_limit (frame, loc->parent, this); - return 0; + quota_check_limit(frame, loc->parent, this); + return 0; err: - QUOTA_STACK_UNWIND (symlink, frame, -1, op_errno, NULL, NULL, NULL, - NULL, NULL); + QUOTA_STACK_UNWIND(symlink, frame, -1, op_errno, NULL, NULL, NULL, NULL, + NULL); - return 0; + return 0; off: - STACK_WIND_TAIL (frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->symlink, - linkpath, loc, umask, xdata); - return 0; + STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->symlink, + linkpath, loc, umask, xdata); + return 0; } - int32_t -quota_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) +quota_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - quota_local_t *local = NULL; - quota_inode_ctx_t *ctx = NULL; + quota_local_t *local = NULL; + quota_inode_ctx_t *ctx = NULL; - if (op_ret < 0) { - goto out; - } + if (op_ret < 0) { + goto out; + } - local = frame->local; + local = frame->local; - GF_VALIDATE_OR_GOTO ("quota", local, out); + GF_VALIDATE_OR_GOTO("quota", local, out); - quota_inode_ctx_get (local->loc.inode, this, &ctx, 0); - if (ctx == NULL) { - gf_msg_debug (this->name, 0, "quota context is NULL on inode" - " (%s). If quota is not enabled recently and " - "crawler has finished crawling, its an error", - uuid_utoa (local->loc.inode->gfid)); - goto out; - } + quota_inode_ctx_get(local->loc.inode, this, &ctx, 0); + if (ctx == NULL) { + gf_msg_debug(this->name, 0, + "quota context is NULL on inode" + " (%s). If quota is not enabled recently and " + "crawler has finished crawling, its an error", + uuid_utoa(local->loc.inode->gfid)); + goto out; + } - LOCK (&ctx->lock); - { - ctx->buf = *postbuf; - } - UNLOCK (&ctx->lock); + LOCK(&ctx->lock); + { + ctx->buf = *postbuf; + } + UNLOCK(&ctx->lock); out: - QUOTA_STACK_UNWIND (truncate, frame, op_ret, op_errno, prebuf, - postbuf, xdata); - return 0; + QUOTA_STACK_UNWIND(truncate, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + return 0; } - int32_t -quota_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, - dict_t *xdata) +quota_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) { - quota_priv_t *priv = NULL; - int32_t ret = -1; - quota_local_t *local = NULL; + quota_priv_t *priv = NULL; + int32_t ret = -1; + quota_local_t *local = NULL; - priv = this->private; + priv = this->private; - WIND_IF_QUOTAOFF (priv->is_quota_on, off); + WIND_IF_QUOTAOFF(priv->is_quota_on, off); - local = quota_local_new (); - if (local == NULL) { - goto err; - } + local = quota_local_new(); + if (local == NULL) { + goto err; + } - frame->local = local; + frame->local = local; - ret = loc_copy (&local->loc, loc); - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, ENOMEM, - Q_MSG_ENOMEM, "loc_copy failed"); - goto err; - } + ret = loc_copy(&local->loc, loc); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM, + "loc_copy failed"); + goto err; + } - STACK_WIND (frame, quota_truncate_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); + STACK_WIND(frame, quota_truncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); - return 0; + return 0; err: - QUOTA_STACK_UNWIND (truncate, frame, -1, ENOMEM, NULL, NULL, NULL); + QUOTA_STACK_UNWIND(truncate, frame, -1, ENOMEM, NULL, NULL, NULL); - return 0; + return 0; off: - STACK_WIND_TAIL (frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); - return 0; + STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->truncate, + loc, offset, xdata); + return 0; } - int32_t -quota_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) +quota_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - quota_local_t *local = NULL; - quota_inode_ctx_t *ctx = NULL; + quota_local_t *local = NULL; + quota_inode_ctx_t *ctx = NULL; - if (op_ret < 0) { - goto out; - } + if (op_ret < 0) { + goto out; + } - local = frame->local; + local = frame->local; - GF_VALIDATE_OR_GOTO ("quota", local, out); + GF_VALIDATE_OR_GOTO("quota", local, out); - quota_inode_ctx_get (local->loc.inode, this, &ctx, 0); - if (ctx == NULL) { - gf_msg_debug (this->name, 0, "quota context is NULL on inode" - " (%s). If quota is not enabled recently and " - "crawler has finished crawling, its an error", - uuid_utoa (local->loc.inode->gfid)); - goto out; - } + quota_inode_ctx_get(local->loc.inode, this, &ctx, 0); + if (ctx == NULL) { + gf_msg_debug(this->name, 0, + "quota context is NULL on inode" + " (%s). If quota is not enabled recently and " + "crawler has finished crawling, its an error", + uuid_utoa(local->loc.inode->gfid)); + goto out; + } - LOCK (&ctx->lock); - { - ctx->buf = *postbuf; - } - UNLOCK (&ctx->lock); + LOCK(&ctx->lock); + { + ctx->buf = *postbuf; + } + UNLOCK(&ctx->lock); out: - QUOTA_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, prebuf, - postbuf, xdata); - return 0; + QUOTA_STACK_UNWIND(ftruncate, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + return 0; } - int32_t -quota_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, - dict_t *xdata) +quota_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) { - quota_priv_t *priv = NULL; - quota_local_t *local = NULL; + quota_priv_t *priv = NULL; + quota_local_t *local = NULL; - priv = this->private; + priv = this->private; - WIND_IF_QUOTAOFF (priv->is_quota_on, off); + WIND_IF_QUOTAOFF(priv->is_quota_on, off); - local = quota_local_new (); - if (local == NULL) - goto err; + local = quota_local_new(); + if (local == NULL) + goto err; - frame->local = local; + frame->local = local; - local->loc.inode = inode_ref (fd->inode); + local->loc.inode = inode_ref(fd->inode); - STACK_WIND (frame, quota_ftruncate_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->ftruncate, fd, - offset, xdata); + STACK_WIND(frame, quota_ftruncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); - return 0; + return 0; err: - QUOTA_STACK_UNWIND (ftruncate, frame, -1, ENOMEM, NULL, NULL, NULL); + QUOTA_STACK_UNWIND(ftruncate, frame, -1, ENOMEM, NULL, NULL, NULL); - return 0; + return 0; off: - STACK_WIND_TAIL (frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->ftruncate, fd, - offset, xdata); - return 0; + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); + return 0; } - -int32_t -quota_send_dir_limit_to_cli (call_frame_t *frame, xlator_t *this, - inode_t *inode, const char *name) +static int32_t +quota_send_dir_limit_to_cli(call_frame_t *frame, xlator_t *this, inode_t *inode, + const char *name, const int namelen) { - int32_t ret = 0; - char dir_limit [1024] = {0, }; - dict_t *dict = NULL; - quota_inode_ctx_t *ctx = NULL; - uint64_t value = 0; - quota_priv_t *priv = NULL; - - priv = this->private; - if (!priv->is_quota_on) { - snprintf (dir_limit, 1024, "Quota is disabled please turn on"); - goto dict_set; - } - - ret = inode_ctx_get (inode, this, &value); - if (ret < 0) - goto out; - - ctx = (quota_inode_ctx_t *)(unsigned long)value; - snprintf (dir_limit, 1024, "%"PRId64",%"PRId64, ctx->size, - ctx->hard_lim); + int32_t ret = 0; + int dir_limit_len = 0; + char dir_limit[64] = { + 0, + }; + dict_t *dict = NULL; + quota_inode_ctx_t *ctx = NULL; + uint64_t value = 0; + quota_priv_t *priv = NULL; + + priv = this->private; + if (!priv->is_quota_on) { + dir_limit_len = snprintf(dir_limit, sizeof(dir_limit), + "Quota is disabled please turn on"); + goto dict_set; + } + + ret = inode_ctx_get(inode, this, &value); + if (ret < 0) + goto out; + + ctx = (quota_inode_ctx_t *)(unsigned long)value; + dir_limit_len = snprintf(dir_limit, sizeof(dir_limit), + "%" PRId64 ",%" PRId64, ctx->size, ctx->hard_lim); dict_set: - dict = dict_new (); - if (dict == NULL) { - ret = -1; - goto out; - } + dict = dict_new(); + if (dict == NULL) { + ret = -1; + goto out; + } - ret = dict_set_str (dict, (char *) name, dir_limit); - if (ret < 0) - goto out; + ret = dict_set_nstrn(dict, (char *)name, namelen, dir_limit, dir_limit_len); + if (ret < 0) + goto out; - gf_msg_debug (this->name, 0, "str = %s", dir_limit); + gf_msg_debug(this->name, 0, "str = %s", dir_limit); - QUOTA_STACK_UNWIND (getxattr, frame, 0, 0, dict, NULL); + QUOTA_STACK_UNWIND(getxattr, frame, 0, 0, dict, NULL); - ret = 0; + ret = 0; out: - if (dict) - dict_unref (dict); - return ret; + if (dict) + dict_unref(dict); + return ret; } - int32_t -quota_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, - const char *name, dict_t *xdata) +quota_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, + dict_t *xdata) { - int32_t ret = 0; + int32_t ret = 0; - if (name && strcasecmp (name, "trusted.limit.list") == 0) { - ret = quota_send_dir_limit_to_cli (frame, this, fd->inode, - name); - if (ret == 0) { - return 0; - } + if (name && strcasecmp(name, "trusted.limit.list") == 0) { + ret = quota_send_dir_limit_to_cli(frame, this, fd->inode, + "trusted.limit.list", + SLEN("trusted.limit.list")); + if (ret == 0) { + return 0; } + } - STACK_WIND (frame, default_fgetxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata); - return 0; + STACK_WIND(frame, default_fgetxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata); + return 0; } - int32_t -quota_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *name, dict_t *xdata) +quota_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) { - int32_t ret = 0; - - if ((name != NULL) && strcasecmp (name, "trusted.limit.list") == 0) { - ret = quota_send_dir_limit_to_cli (frame, this, loc->inode, - name); - if (ret == 0) - return 0; - } - - STACK_WIND (frame, default_getxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->getxattr, loc, name, xdata); - return 0; + int32_t ret = 0; + + if ((name != NULL) && strcasecmp(name, "trusted.limit.list") == 0) { + ret = quota_send_dir_limit_to_cli(frame, this, loc->inode, + "trusted.limit.list", + SLEN("trusted.limit.list")); + if (ret == 0) + return 0; + } + + STACK_WIND(frame, default_getxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->getxattr, loc, name, xdata); + return 0; } - int32_t -quota_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, - dict_t *xdata) +quota_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + dict_t *xdata) { - quota_local_t *local = NULL; - quota_inode_ctx_t *ctx = NULL; - - if (op_ret < 0) { - goto out; - } + quota_local_t *local = NULL; + quota_inode_ctx_t *ctx = NULL; - local = frame->local; + if (op_ret < 0) { + goto out; + } - GF_VALIDATE_OR_GOTO ("quota", local, out); + local = frame->local; - quota_inode_ctx_get (local->loc.inode, this, &ctx, 0); - if (ctx == NULL) { - if (!IA_ISDIR (buf->ia_type)) { - gf_msg_debug (this->name, 0, "quota context is NULL on inode" - " (%s). If quota is not enabled recently and " - "crawler has finished crawling, its an error", - uuid_utoa (local->loc.inode->gfid)); - } + GF_VALIDATE_OR_GOTO("quota", local, out); - goto out; + quota_inode_ctx_get(local->loc.inode, this, &ctx, 0); + if (ctx == NULL) { + if (!IA_ISDIR(buf->ia_type)) { + gf_msg_debug(this->name, 0, + "quota context is NULL on inode" + " (%s). If quota is not enabled recently and " + "crawler has finished crawling, its an error", + uuid_utoa(local->loc.inode->gfid)); } - LOCK (&ctx->lock); - { - if (buf) - ctx->buf = *buf; - } - UNLOCK (&ctx->lock); + goto out; + } + + if (buf) { + LOCK(&ctx->lock); + ctx->buf = *buf; + UNLOCK(&ctx->lock); + } out: - QUOTA_STACK_UNWIND (stat, frame, op_ret, op_errno, buf, xdata); - return 0; + QUOTA_STACK_UNWIND(stat, frame, op_ret, op_errno, buf, xdata); + return 0; } - int32_t -quota_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +quota_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { - quota_priv_t *priv = NULL; - quota_local_t *local = NULL; - int32_t ret = -1; + quota_priv_t *priv = NULL; + quota_local_t *local = NULL; + int32_t ret = -1; - priv = this->private; + priv = this->private; - WIND_IF_QUOTAOFF (priv->is_quota_on, off); + WIND_IF_QUOTAOFF(priv->is_quota_on, off); - local = quota_local_new (); - if (local == NULL) { - goto unwind; - } + local = quota_local_new(); + if (local == NULL) { + goto unwind; + } - frame->local = local; - ret = loc_copy (&local->loc, loc); - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, ENOMEM, - Q_MSG_ENOMEM, "loc_copy failed"); - goto unwind; - } + frame->local = local; + ret = loc_copy(&local->loc, loc); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM, + "loc_copy failed"); + goto unwind; + } - STACK_WIND (frame, quota_stat_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->stat, loc, - xdata); - return 0; + STACK_WIND(frame, quota_stat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->stat, loc, xdata); + return 0; unwind: - QUOTA_STACK_UNWIND (stat, frame, -1, ENOMEM, NULL, NULL); - return 0; + QUOTA_STACK_UNWIND(stat, frame, -1, ENOMEM, NULL, NULL); + return 0; off: - STACK_WIND_TAIL (frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->stat, loc, - xdata); - return 0; + STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->stat, + loc, xdata); + return 0; } - int32_t -quota_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, - dict_t *xdata) +quota_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + dict_t *xdata) { - quota_local_t *local = NULL; - quota_inode_ctx_t *ctx = NULL; - - if (op_ret < 0) { - goto out; - } + quota_local_t *local = NULL; + quota_inode_ctx_t *ctx = NULL; - local = frame->local; + if (op_ret < 0) { + goto out; + } - GF_VALIDATE_OR_GOTO ("quota", local, out); + local = frame->local; - quota_inode_ctx_get (local->loc.inode, this, &ctx, 0); - if (ctx == NULL) { - if (!IA_ISDIR (buf->ia_type)) { - gf_msg_debug (this->name, 0, "quota context is NULL on inode" - " (%s). If quota is not enabled recently and " - "crawler has finished crawling, its an error", - uuid_utoa (local->loc.inode->gfid)); - } + GF_VALIDATE_OR_GOTO("quota", local, out); - goto out; + quota_inode_ctx_get(local->loc.inode, this, &ctx, 0); + if (ctx == NULL) { + if (!IA_ISDIR(buf->ia_type)) { + gf_msg_debug(this->name, 0, + "quota context is NULL on inode" + " (%s). If quota is not enabled recently and " + "crawler has finished crawling, its an error", + uuid_utoa(local->loc.inode->gfid)); } - LOCK (&ctx->lock); - { - if (buf) - ctx->buf = *buf; - } - UNLOCK (&ctx->lock); + goto out; + } + + if (buf) { + LOCK(&ctx->lock); + ctx->buf = *buf; + UNLOCK(&ctx->lock); + } out: - QUOTA_STACK_UNWIND (fstat, frame, op_ret, op_errno, buf, xdata); - return 0; + QUOTA_STACK_UNWIND(fstat, frame, op_ret, op_errno, buf, xdata); + return 0; } - int32_t -quota_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) +quota_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { - quota_priv_t *priv = NULL; - quota_local_t *local = NULL; + quota_priv_t *priv = NULL; + quota_local_t *local = NULL; - priv = this->private; + priv = this->private; - WIND_IF_QUOTAOFF (priv->is_quota_on, off); + WIND_IF_QUOTAOFF(priv->is_quota_on, off); - local = quota_local_new (); - if (local == NULL) { - goto unwind; - } + local = quota_local_new(); + if (local == NULL) { + goto unwind; + } - frame->local = local; + frame->local = local; - local->loc.inode = inode_ref (fd->inode); + local->loc.inode = inode_ref(fd->inode); - STACK_WIND (frame, quota_fstat_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->fstat, fd, - xdata); - return 0; + STACK_WIND(frame, quota_fstat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, fd, xdata); + return 0; unwind: - QUOTA_STACK_UNWIND (fstat, frame, -1, ENOMEM, NULL, NULL); - return 0; + QUOTA_STACK_UNWIND(fstat, frame, -1, ENOMEM, NULL, NULL); + return 0; off: - STACK_WIND_TAIL (frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fstat, fd, - xdata); - return 0; + STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fstat, + fd, xdata); + return 0; } - int32_t -quota_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, const char *path, - struct iatt *buf, dict_t *xdata) +quota_readlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, const char *path, + struct iatt *buf, dict_t *xdata) { - quota_local_t *local = NULL; - quota_inode_ctx_t *ctx = NULL; + quota_local_t *local = NULL; + quota_inode_ctx_t *ctx = NULL; - if (op_ret < 0) { - goto out; - } + if (op_ret < 0) { + goto out; + } - local = frame->local; + local = frame->local; - GF_VALIDATE_OR_GOTO ("quota", local, out); + GF_VALIDATE_OR_GOTO("quota", local, out); - quota_inode_ctx_get (local->loc.inode, this, &ctx, 0); - if (ctx == NULL) { - gf_msg_debug (this->name, 0, "quota context is NULL on inode" - " (%s). If quota is not enabled recently and " - "crawler has finished crawling, its an error", - uuid_utoa (local->loc.inode->gfid)); - goto out; - } + quota_inode_ctx_get(local->loc.inode, this, &ctx, 0); + if (ctx == NULL) { + gf_msg_debug(this->name, 0, + "quota context is NULL on inode" + " (%s). If quota is not enabled recently and " + "crawler has finished crawling, its an error", + uuid_utoa(local->loc.inode->gfid)); + goto out; + } - LOCK (&ctx->lock); - { - ctx->buf = *buf; - } - UNLOCK (&ctx->lock); + LOCK(&ctx->lock); + { + ctx->buf = *buf; + } + UNLOCK(&ctx->lock); out: - QUOTA_STACK_UNWIND (readlink, frame, op_ret, op_errno, path, buf, - xdata); - return 0; + QUOTA_STACK_UNWIND(readlink, frame, op_ret, op_errno, path, buf, xdata); + return 0; } - int32_t -quota_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size, - dict_t *xdata) +quota_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size, + dict_t *xdata) { - quota_priv_t *priv = NULL; - quota_local_t *local = NULL; - int32_t ret = -1; + quota_priv_t *priv = NULL; + quota_local_t *local = NULL; + int32_t ret = -1; - priv = this->private; + priv = this->private; - WIND_IF_QUOTAOFF (priv->is_quota_on, off); + WIND_IF_QUOTAOFF(priv->is_quota_on, off); - local = quota_local_new (); - if (local == NULL) { - goto unwind; - } + local = quota_local_new(); + if (local == NULL) { + goto unwind; + } - frame->local = local; + frame->local = local; - ret = loc_copy (&local->loc, loc); - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, ENOMEM, - Q_MSG_ENOMEM, "loc_copy failed"); - goto unwind; - } + ret = loc_copy(&local->loc, loc); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM, + "loc_copy failed"); + goto unwind; + } - STACK_WIND (frame, quota_readlink_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->readlink, loc, - size, xdata); - return 0; + STACK_WIND(frame, quota_readlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readlink, loc, size, xdata); + return 0; unwind: - QUOTA_STACK_UNWIND (readlink, frame, -1, ENOMEM, NULL, NULL, NULL); - return 0; + QUOTA_STACK_UNWIND(readlink, frame, -1, ENOMEM, NULL, NULL, NULL); + return 0; off: - STACK_WIND_TAIL (frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readlink, loc, - size, xdata); - return 0; + STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->readlink, + loc, size, xdata); + return 0; } - int32_t -quota_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iovec *vector, - int32_t count, struct iatt *buf, struct iobref *iobref, - dict_t *xdata) +quota_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iovec *vector, + int32_t count, struct iatt *buf, struct iobref *iobref, + dict_t *xdata) { - quota_local_t *local = NULL; - quota_inode_ctx_t *ctx = NULL; + quota_local_t *local = NULL; + quota_inode_ctx_t *ctx = NULL; - if (op_ret < 0) { - goto out; - } + if (op_ret < 0) { + goto out; + } - local = frame->local; + local = frame->local; - GF_VALIDATE_OR_GOTO ("quota", local, out); + GF_VALIDATE_OR_GOTO("quota", local, out); - quota_inode_ctx_get (local->loc.inode, this, &ctx, 0); - if (ctx == NULL) { - gf_msg_debug (this->name, 0, "quota context is NULL on inode" - " (%s). If quota is not enabled recently and " - "crawler has finished crawling, its an error", - uuid_utoa (local->loc.inode->gfid)); - goto out; - } + quota_inode_ctx_get(local->loc.inode, this, &ctx, 0); + if (ctx == NULL) { + gf_msg_debug(this->name, 0, + "quota context is NULL on inode" + " (%s). If quota is not enabled recently and " + "crawler has finished crawling, its an error", + uuid_utoa(local->loc.inode->gfid)); + goto out; + } - LOCK (&ctx->lock); - { - ctx->buf = *buf; - } - UNLOCK (&ctx->lock); + LOCK(&ctx->lock); + { + ctx->buf = *buf; + } + UNLOCK(&ctx->lock); out: - QUOTA_STACK_UNWIND (readv, frame, op_ret, op_errno, vector, count, - buf, iobref, xdata); - return 0; + QUOTA_STACK_UNWIND(readv, frame, op_ret, op_errno, vector, count, buf, + iobref, xdata); + return 0; } - int32_t -quota_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t offset, uint32_t flags, dict_t *xdata) +quota_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) { - quota_priv_t *priv = NULL; - quota_local_t *local = NULL; + quota_priv_t *priv = NULL; + quota_local_t *local = NULL; - priv = this->private; + priv = this->private; - WIND_IF_QUOTAOFF (priv->is_quota_on, off); + WIND_IF_QUOTAOFF(priv->is_quota_on, off); - local = quota_local_new (); - if (local == NULL) { - goto unwind; - } + local = quota_local_new(); + if (local == NULL) { + goto unwind; + } - frame->local = local; + frame->local = local; - local->loc.inode = inode_ref (fd->inode); + local->loc.inode = inode_ref(fd->inode); - STACK_WIND (frame, quota_readv_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->readv, fd, - size, offset, flags, xdata); - return 0; + STACK_WIND(frame, quota_readv_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata); + return 0; unwind: - QUOTA_STACK_UNWIND (readv, frame, -1, ENOMEM, NULL, -1, NULL, NULL, - NULL); - return 0; + QUOTA_STACK_UNWIND(readv, frame, -1, ENOMEM, NULL, -1, NULL, NULL, NULL); + return 0; off: - STACK_WIND_TAIL (frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readv, fd, - size, offset, flags, xdata); - return 0; + STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->readv, + fd, size, offset, flags, xdata); + return 0; } - int32_t -quota_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) +quota_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - quota_local_t *local = NULL; - quota_inode_ctx_t *ctx = NULL; + quota_local_t *local = NULL; + quota_inode_ctx_t *ctx = NULL; - if (op_ret < 0) { - goto out; - } + if (op_ret < 0) { + goto out; + } - local = frame->local; + local = frame->local; - GF_VALIDATE_OR_GOTO ("quota", local, out); + GF_VALIDATE_OR_GOTO("quota", local, out); - quota_inode_ctx_get (local->loc.inode, this, &ctx, 0); - if (ctx == NULL) { - gf_msg_debug (this->name, 0, "quota context is NULL on inode" - " (%s). If quota is not enabled recently and " - "crawler has finished crawling, its an error", - uuid_utoa (local->loc.inode->gfid)); - goto out; - } + quota_inode_ctx_get(local->loc.inode, this, &ctx, 0); + if (ctx == NULL) { + gf_msg_debug(this->name, 0, + "quota context is NULL on inode" + " (%s). If quota is not enabled recently and " + "crawler has finished crawling, its an error", + uuid_utoa(local->loc.inode->gfid)); + goto out; + } - LOCK (&ctx->lock); - { - ctx->buf = *postbuf; - } - UNLOCK (&ctx->lock); + LOCK(&ctx->lock); + { + ctx->buf = *postbuf; + } + UNLOCK(&ctx->lock); out: - QUOTA_STACK_UNWIND (fsync, frame, op_ret, op_errno, prebuf, postbuf, - xdata); - return 0; + QUOTA_STACK_UNWIND(fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata); + return 0; } - int32_t -quota_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, - dict_t *xdata) +quota_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, + dict_t *xdata) { - quota_priv_t *priv = NULL; - quota_local_t *local = NULL; + quota_priv_t *priv = NULL; + quota_local_t *local = NULL; - priv = this->private; + priv = this->private; - WIND_IF_QUOTAOFF (priv->is_quota_on, off); + WIND_IF_QUOTAOFF(priv->is_quota_on, off); - local = quota_local_new (); - if (local == NULL) { - goto unwind; - } + local = quota_local_new(); + if (local == NULL) { + goto unwind; + } - local->loc.inode = inode_ref (fd->inode); + local->loc.inode = inode_ref(fd->inode); - frame->local = local; + frame->local = local; - STACK_WIND (frame, quota_fsync_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsync, fd, - flags, xdata); - return 0; + STACK_WIND(frame, quota_fsync_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsync, fd, flags, xdata); + return 0; unwind: - QUOTA_STACK_UNWIND (fsync, frame, -1, ENOMEM, NULL, NULL, NULL); - return 0; + QUOTA_STACK_UNWIND(fsync, frame, -1, ENOMEM, NULL, NULL, NULL); + return 0; off: - STACK_WIND_TAIL (frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsync, fd, - flags, xdata); - return 0; + STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync, + fd, flags, xdata); + return 0; } - int32_t -quota_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *statpre, - struct iatt *statpost, dict_t *xdata) +quota_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *statpre, + struct iatt *statpost, dict_t *xdata) { - quota_local_t *local = NULL; - quota_inode_ctx_t *ctx = NULL; + quota_local_t *local = NULL; + quota_inode_ctx_t *ctx = NULL; - if (op_ret < 0) { - goto out; - } + if (op_ret < 0) { + goto out; + } - local = frame->local; + local = frame->local; - GF_VALIDATE_OR_GOTO ("quota", local, out); + GF_VALIDATE_OR_GOTO("quota", local, out); - quota_inode_ctx_get (local->loc.inode, this, &ctx, 0); - if (ctx == NULL) { - if (!IA_ISDIR (statpost->ia_type)) { - gf_msg_debug (this->name, 0, "quota context is NULL on inode" - " (%s). If quota is not enabled recently and " - "crawler has finished crawling, its an error", - uuid_utoa (local->loc.inode->gfid)); - } - - goto out; + quota_inode_ctx_get(local->loc.inode, this, &ctx, 0); + if (ctx == NULL) { + if (!IA_ISDIR(statpost->ia_type)) { + gf_msg_debug(this->name, 0, + "quota context is NULL on inode" + " (%s). If quota is not enabled recently and " + "crawler has finished crawling, its an error", + uuid_utoa(local->loc.inode->gfid)); } - LOCK (&ctx->lock); - { - if (statpost) - ctx->buf = *statpost; - } - UNLOCK (&ctx->lock); + goto out; + } + + if (statpost) { + LOCK(&ctx->lock); + ctx->buf = *statpost; + UNLOCK(&ctx->lock); + } out: - QUOTA_STACK_UNWIND (setattr, frame, op_ret, op_errno, statpre, - statpost, xdata); - return 0; + QUOTA_STACK_UNWIND(setattr, frame, op_ret, op_errno, statpre, statpost, + xdata); + return 0; } - int32_t -quota_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - struct iatt *stbuf, int32_t valid, dict_t *xdata) +quota_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid, dict_t *xdata) { - quota_priv_t *priv = NULL; - quota_local_t *local = NULL; - int32_t ret = -1; + quota_priv_t *priv = NULL; + quota_local_t *local = NULL; + int32_t ret = -1; - priv = this->private; + priv = this->private; - WIND_IF_QUOTAOFF (priv->is_quota_on, off); + WIND_IF_QUOTAOFF(priv->is_quota_on, off); - local = quota_local_new (); - if (local == NULL) { - goto unwind; - } + local = quota_local_new(); + if (local == NULL) { + goto unwind; + } - frame->local = local; + frame->local = local; - ret = loc_copy (&local->loc, loc); - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, ENOMEM, - Q_MSG_ENOMEM, "loc_copy failed"); - goto unwind; - } + ret = loc_copy(&local->loc, loc); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM, + "loc_copy failed"); + goto unwind; + } - STACK_WIND (frame, quota_setattr_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->setattr, loc, - stbuf, valid, xdata); - return 0; + STACK_WIND(frame, quota_setattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata); + return 0; unwind: - QUOTA_STACK_UNWIND (setattr, frame, -1, ENOMEM, NULL, NULL, NULL); - return 0; + QUOTA_STACK_UNWIND(setattr, frame, -1, ENOMEM, NULL, NULL, NULL); + return 0; off: - STACK_WIND_TAIL (frame, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->setattr, loc, - stbuf, valid, xdata); - return 0; + STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->setattr, + loc, stbuf, valid, xdata); + return 0; } - int32_t -quota_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *statpre, - struct iatt *statpost, dict_t *xdata) +quota_fsetattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *statpre, + struct iatt *statpost, dict_t *xdata) { - quota_local_t *local = NULL; - quota_inode_ctx_t *ctx = NULL; + quota_local_t *local = NULL; + quota_inode_ctx_t *ctx = NULL; - if (op_ret < 0) { - goto out; - } + if (op_ret < 0) { + goto out; + } - local = frame->local; + local = frame->local; - GF_VALIDATE_OR_GOTO ("quota", local, out); + GF_VALIDATE_OR_GOTO("quota", local, out); - quota_inode_ctx_get (local->loc.inode, this, &ctx, 0); - if (ctx == NULL) { - if (!IA_ISDIR (statpost->ia_type)) { - gf_msg_debug (this->name, 0, "quota context is NULL on inode" - " (%s). If quota is not enabled recently and " - "crawler has finished crawling, its an error", - uuid_utoa (local->loc.inode->gfid)); - } - - goto out; + quota_inode_ctx_get(local->loc.inode, this, &ctx, 0); + if (ctx == NULL) { + if (!IA_ISDIR(statpost->ia_type)) { + gf_msg_debug(this->name, 0, + "quota context is NULL on inode" + " (%s). If quota is not enabled recently and " + "crawler has finished crawling, its an error", + uuid_utoa(local->loc.inode->gfid)); } - LOCK (&ctx->lock); - { - ctx->buf = *statpost; - } - UNLOCK (&ctx->lock); + goto out; + } + + LOCK(&ctx->lock); + { + ctx->buf = *statpost; + } + UNLOCK(&ctx->lock); out: - QUOTA_STACK_UNWIND (fsetattr, frame, op_ret, op_errno, statpre, - statpost, xdata); - return 0; + QUOTA_STACK_UNWIND(fsetattr, frame, op_ret, op_errno, statpre, statpost, + xdata); + return 0; } - int32_t -quota_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iatt *stbuf, int32_t valid, dict_t *xdata) +quota_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iatt *stbuf, int32_t valid, dict_t *xdata) { - quota_priv_t *priv = NULL; - quota_local_t *local = NULL; + quota_priv_t *priv = NULL; + quota_local_t *local = NULL; - priv = this->private; + priv = this->private; - WIND_IF_QUOTAOFF (priv->is_quota_on, off); + WIND_IF_QUOTAOFF(priv->is_quota_on, off); - local = quota_local_new (); - if (local == NULL) { - goto unwind; - } + local = quota_local_new(); + if (local == NULL) { + goto unwind; + } - frame->local = local; + frame->local = local; - local->loc.inode = inode_ref (fd->inode); + local->loc.inode = inode_ref(fd->inode); - STACK_WIND (frame, quota_fsetattr_cbk, - FIRST_CHILD (this), FIRST_CHILD (this)->fops->fsetattr, fd, - stbuf, valid, xdata); - return 0; + STACK_WIND(frame, quota_fsetattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata); + return 0; unwind: - QUOTA_STACK_UNWIND (fsetattr, frame, -1, ENOMEM, NULL, NULL, NULL); - return 0; + QUOTA_STACK_UNWIND(fsetattr, frame, -1, ENOMEM, NULL, NULL, NULL); + return 0; off: - STACK_WIND_TAIL (frame, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fsetattr, fd, - stbuf, valid, xdata); - return 0; + STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsetattr, + fd, stbuf, valid, xdata); + return 0; } - int32_t -quota_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +quota_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - int32_t ret = -1; - quota_local_t *local = NULL; - quota_inode_ctx_t *ctx = NULL; - quota_dentry_t *dentry = NULL; - - local = frame->local; - if (op_ret < 0) { - goto unwind; - } - - ret = quota_inode_ctx_get (inode, this, &ctx, 1); - if ((ret == -1) || (ctx == NULL)) { - gf_msg (this->name, GF_LOG_WARNING, 0, - Q_MSG_INODE_CTX_GET_FAILED, - "cannot create quota context in " - "inode(gfid:%s)", uuid_utoa (inode->gfid)); - op_ret = -1; - op_errno = ENOMEM; - goto unwind; - } - - LOCK (&ctx->lock); - { - ctx->buf = *buf; - - dentry = __quota_dentry_new (ctx, (char *)local->loc.name, - local->loc.parent->gfid); - if (dentry == NULL) { - gf_msg (this->name, GF_LOG_WARNING, ENOMEM, - Q_MSG_ENOMEM, "cannot create a new dentry " - "(name:%s) for inode(gfid:%s)", local->loc.name, - uuid_utoa (local->loc.inode->gfid)); - op_ret = -1; - op_errno = ENOMEM; - goto unlock; - } - } + int32_t ret = -1; + quota_local_t *local = NULL; + quota_inode_ctx_t *ctx = NULL; + quota_dentry_t *dentry = NULL; + + local = frame->local; + if (op_ret < 0) { + goto unwind; + } + + ret = quota_inode_ctx_get(inode, this, &ctx, 1); + if ((ret == -1) || (ctx == NULL)) { + gf_msg(this->name, GF_LOG_WARNING, 0, Q_MSG_INODE_CTX_GET_FAILED, + "cannot create quota context in " + "inode(gfid:%s)", + uuid_utoa(inode->gfid)); + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + + LOCK(&ctx->lock); + { + ctx->buf = *buf; + + dentry = __quota_dentry_new(ctx, (char *)local->loc.name, + local->loc.parent->gfid); + if (dentry == NULL) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM, + "cannot create a new dentry " + "(name:%s) for inode(gfid:%s)", + local->loc.name, uuid_utoa(local->loc.inode->gfid)); + op_ret = -1; + op_errno = ENOMEM; + goto unlock; + } + } unlock: - UNLOCK (&ctx->lock); + UNLOCK(&ctx->lock); unwind: - QUOTA_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode, - buf, preparent, postparent, xdata); - return 0; + QUOTA_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, buf, preparent, + postparent, xdata); + return 0; } - int -quota_mknod_helper (call_frame_t *frame, xlator_t *this, loc_t *loc, - mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata) +quota_mknod_helper(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + dev_t rdev, mode_t umask, dict_t *xdata) { - quota_local_t *local = NULL; - int32_t op_errno = EINVAL; - quota_priv_t *priv = NULL; - - local = frame->local; + quota_local_t *local = NULL; + int32_t op_errno = EINVAL; - GF_VALIDATE_OR_GOTO ("quota", local, unwind); + local = frame->local; - priv = this->private; + GF_VALIDATE_OR_GOTO("quota", local, unwind); - if (local->op_ret == -1) { - op_errno = local->op_errno; - goto unwind; - } + if (local->op_ret == -1) { + op_errno = local->op_errno; + goto unwind; + } - STACK_WIND (frame, quota_mknod_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod, loc, - mode, rdev, umask, xdata); + STACK_WIND(frame, quota_mknod_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata); - return 0; + return 0; unwind: - QUOTA_STACK_UNWIND (mknod, frame, -1, op_errno, NULL, NULL, - NULL, NULL, NULL); - return 0; + QUOTA_STACK_UNWIND(mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL, + NULL); + return 0; } - int -quota_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, - dev_t rdev, mode_t umask, dict_t *xdata) +quota_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + dev_t rdev, mode_t umask, dict_t *xdata) { - quota_priv_t *priv = NULL; - int32_t ret = -1; - quota_local_t *local = NULL; - call_stub_t *stub = NULL; - - priv = this->private; - - WIND_IF_QUOTAOFF (priv->is_quota_on, off); - QUOTA_WIND_FOR_INTERNAL_FOP (xdata, off); - - local = quota_local_new (); - if (local == NULL) { - goto err; - } - - frame->local = local; - - ret = loc_copy (&local->loc, loc); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, ENOMEM, - Q_MSG_ENOMEM, "loc_copy failed"); - goto err; - } - - stub = fop_mknod_stub (frame, quota_mknod_helper, loc, mode, rdev, - umask, xdata); - if (stub == NULL) { - goto err; - } - - LOCK (&local->lock); - { - local->link_count = 1; - local->stub = stub; - local->delta = 0; - } - UNLOCK (&local->lock); + quota_priv_t *priv = NULL; + int32_t ret = -1; + quota_local_t *local = NULL; + call_stub_t *stub = NULL; + + priv = this->private; + + WIND_IF_QUOTAOFF(priv->is_quota_on, off); + QUOTA_WIND_FOR_INTERNAL_FOP(xdata, off); + + local = quota_local_new(); + if (local == NULL) { + goto err; + } + + frame->local = local; + + ret = loc_copy(&local->loc, loc); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM, + "loc_copy failed"); + goto err; + } + + stub = fop_mknod_stub(frame, quota_mknod_helper, loc, mode, rdev, umask, + xdata); + if (stub == NULL) { + goto err; + } + + LOCK(&local->lock); + { + local->link_count = 1; + local->stub = stub; + local->delta = 0; + local->object_delta = 1; + } + UNLOCK(&local->lock); - quota_check_limit (frame, loc->parent, this); - return 0; + quota_check_limit(frame, loc->parent, this); + return 0; err: - QUOTA_STACK_UNWIND (mknod, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, - NULL); - return 0; + QUOTA_STACK_UNWIND(mknod, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, NULL); + return 0; off: - STACK_WIND_TAIL (frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->mknod, loc, - mode, rdev, umask, xdata); - return 0; + STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod, + loc, mode, rdev, umask, xdata); + return 0; } int -quota_setxattr_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int op_ret, int op_errno, dict_t *xdata) +quota_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xdata) { - quota_local_t *local = NULL; - quota_inode_ctx_t *ctx = NULL; - int ret = 0; - - if (op_ret < 0) { - goto out; - } - - local = frame->local; - if (!local) - goto out; - - ret = quota_inode_ctx_get (local->loc.inode, this, &ctx, 1); - if ((ret < 0) || (ctx == NULL)) { - op_errno = -1; - goto out; - } - - LOCK (&ctx->lock); - { - ctx->hard_lim = local->limit.hl; - ctx->soft_lim = local->limit.sl; - ctx->object_hard_lim = local->object_limit.hl; - ctx->object_soft_lim = local->object_limit.sl; - } - UNLOCK (&ctx->lock); + quota_local_t *local = NULL; + quota_inode_ctx_t *ctx = NULL; + int ret = 0; + + if (op_ret < 0) { + goto out; + } + + local = frame->local; + if (!local) + goto out; + + ret = quota_inode_ctx_get(local->loc.inode, this, &ctx, 1); + if ((ret < 0) || (ctx == NULL)) { + op_errno = -1; + goto out; + } + + LOCK(&ctx->lock); + { + ctx->hard_lim = local->limit.hl; + ctx->soft_lim = local->limit.sl; + ctx->object_hard_lim = local->object_limit.hl; + ctx->object_soft_lim = local->object_limit.sl; + } + UNLOCK(&ctx->lock); out: - QUOTA_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata); - return 0; + QUOTA_STACK_UNWIND(setxattr, frame, op_ret, op_errno, xdata); + return 0; } int -quota_setxattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, dict_t *dict, int flags, dict_t *xdata) +quota_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + int flags, dict_t *xdata) { - quota_priv_t *priv = NULL; - int op_errno = EINVAL; - int op_ret = -1; - int64_t hard_lim = -1; - int64_t soft_lim = -1; - int64_t object_hard_limit = -1; - int64_t object_soft_limit = -1; - quota_local_t *local = NULL; - - priv = this->private; - - WIND_IF_QUOTAOFF (priv->is_quota_on, off); - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - - if (frame->root->pid >= 0) { - GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.quota*", dict, - op_errno, err); - GF_IF_INTERNAL_XATTR_GOTO ("trusted.pgfid*", dict, op_errno, - err); - } - - quota_get_limits (this, dict, &hard_lim, &soft_lim, &object_hard_limit, - &object_soft_limit); - - if (hard_lim > 0 || object_hard_limit > 0) { - local = quota_local_new (); - if (local == NULL) { - op_errno = ENOMEM; - goto err; - } - frame->local = local; - loc_copy (&local->loc, loc); - } - - if (hard_lim > 0) { - local->limit.hl = hard_lim; - local->limit.sl = soft_lim; - } - - if (object_hard_limit > 0) { - local->object_limit.hl = object_hard_limit; - local->object_limit.sl = object_soft_limit; + quota_priv_t *priv = NULL; + int op_errno = EINVAL; + int op_ret = -1; + int64_t hard_lim = -1; + int64_t soft_lim = -1; + int64_t object_hard_limit = -1; + int64_t object_soft_limit = -1; + quota_local_t *local = NULL; + gf_boolean_t internal_fop = _gf_false; + + priv = this->private; + + WIND_IF_QUOTAOFF(priv->is_quota_on, off); + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(loc, err); + + if (xdata && dict_get_sizen(xdata, GLUSTERFS_INTERNAL_FOP_KEY)) + internal_fop = _gf_true; + + if (frame->root->pid >= 0 && internal_fop == _gf_false) { + GF_IF_INTERNAL_XATTR_GOTO("trusted.glusterfs.quota*", dict, op_errno, + err); + GF_IF_INTERNAL_XATTR_GOTO("trusted.pgfid*", dict, op_errno, err); + } + + quota_get_limits(this, dict, &hard_lim, &soft_lim, &object_hard_limit, + &object_soft_limit); + + if (hard_lim > 0 || object_hard_limit > 0) { + local = quota_local_new(); + if (local == NULL) { + op_errno = ENOMEM; + goto err; } - - STACK_WIND (frame, quota_setxattr_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr, loc, - dict, flags, xdata); - return 0; + frame->local = local; + loc_copy(&local->loc, loc); + } + + if (hard_lim > 0) { + local->limit.hl = hard_lim; + local->limit.sl = soft_lim; + } + + if (object_hard_limit > 0) { + local->object_limit.hl = object_hard_limit; + local->object_limit.sl = object_soft_limit; + } + + STACK_WIND(frame, quota_setxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata); + return 0; err: - QUOTA_STACK_UNWIND (setxattr, frame, op_ret, op_errno, NULL); - return 0; + QUOTA_STACK_UNWIND(setxattr, frame, op_ret, op_errno, NULL); + return 0; off: - STACK_WIND_TAIL (frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setxattr, loc, - dict, flags, xdata); - return 0; + STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr, + loc, dict, flags, xdata); + return 0; } int -quota_fsetxattr_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int op_ret, int op_errno, dict_t *xdata) +quota_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xdata) { - quota_inode_ctx_t *ctx = NULL; - quota_local_t *local = NULL; - - if (op_ret < 0) - goto out; - - local = frame->local; - if (!local) - goto out; - - op_ret = quota_inode_ctx_get (local->loc.inode, this, &ctx, 1); - if ((op_ret < 0) || (ctx == NULL)) { - op_errno = ENOMEM; - goto out; - } - - LOCK (&ctx->lock); - { - ctx->hard_lim = local->limit.hl; - ctx->soft_lim = local->limit.sl; - ctx->object_hard_lim = local->object_limit.hl; - ctx->object_soft_lim = local->object_limit.sl; - } - UNLOCK (&ctx->lock); + quota_inode_ctx_t *ctx = NULL; + quota_local_t *local = NULL; + + if (op_ret < 0) + goto out; + + local = frame->local; + if (!local) + goto out; + + op_ret = quota_inode_ctx_get(local->loc.inode, this, &ctx, 1); + if ((op_ret < 0) || (ctx == NULL)) { + op_errno = ENOMEM; + goto out; + } + + LOCK(&ctx->lock); + { + ctx->hard_lim = local->limit.hl; + ctx->soft_lim = local->limit.sl; + ctx->object_hard_lim = local->object_limit.hl; + ctx->object_soft_lim = local->object_limit.sl; + } + UNLOCK(&ctx->lock); out: - QUOTA_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, xdata); - return 0; + QUOTA_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, xdata); + return 0; } int -quota_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, - dict_t *dict, int flags, dict_t *xdata) +quota_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, + int flags, dict_t *xdata) { - quota_priv_t *priv = NULL; - int32_t op_ret = -1; - int32_t op_errno = EINVAL; - quota_local_t *local = NULL; - int64_t hard_lim = -1; - int64_t soft_lim = -1; - int64_t object_hard_limit = -1; - int64_t object_soft_limit = -1; - - priv = this->private; - - WIND_IF_QUOTAOFF (priv->is_quota_on, off); - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - - if (0 <= frame->root->pid) { - GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.quota*", - dict, op_errno, err); - GF_IF_INTERNAL_XATTR_GOTO ("trusted.pgfid*", dict, - op_errno, err); - } - - quota_get_limits (this, dict, &hard_lim, &soft_lim, &object_hard_limit, - &object_soft_limit); - - if (hard_lim > 0 || object_hard_limit > 0) { - local = quota_local_new (); - if (local == NULL) { - op_errno = ENOMEM; - goto err; - } - frame->local = local; - local->loc.inode = inode_ref (fd->inode); - } - - if (hard_lim > 0) { - local->limit.hl = hard_lim; - local->limit.sl = soft_lim; - } - - if (object_hard_limit > 0) { - local->object_limit.hl = object_hard_limit; - local->object_limit.sl = object_soft_limit; + quota_priv_t *priv = NULL; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + quota_local_t *local = NULL; + int64_t hard_lim = -1; + int64_t soft_lim = -1; + int64_t object_hard_limit = -1; + int64_t object_soft_limit = -1; + + priv = this->private; + + WIND_IF_QUOTAOFF(priv->is_quota_on, off); + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(fd, err); + + if (0 <= frame->root->pid) { + GF_IF_INTERNAL_XATTR_GOTO("trusted.glusterfs.quota*", dict, op_errno, + err); + GF_IF_INTERNAL_XATTR_GOTO("trusted.pgfid*", dict, op_errno, err); + } + + quota_get_limits(this, dict, &hard_lim, &soft_lim, &object_hard_limit, + &object_soft_limit); + + if (hard_lim > 0 || object_hard_limit > 0) { + local = quota_local_new(); + if (local == NULL) { + op_errno = ENOMEM; + goto err; } - - STACK_WIND (frame, quota_fsetxattr_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsetxattr, fd, - dict, flags, xdata); - return 0; + frame->local = local; + local->loc.inode = inode_ref(fd->inode); + } + + if (hard_lim > 0) { + local->limit.hl = hard_lim; + local->limit.sl = soft_lim; + } + + if (object_hard_limit > 0) { + local->object_limit.hl = object_hard_limit; + local->object_limit.sl = object_soft_limit; + } + + STACK_WIND(frame, quota_fsetxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata); + return 0; err: - QUOTA_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, NULL); - return 0; + QUOTA_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, NULL); + return 0; off: - STACK_WIND_TAIL (frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsetxattr, fd, - dict, flags, xdata); - return 0; + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata); + return 0; } - int -quota_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +quota_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - QUOTA_STACK_UNWIND (removexattr, frame, op_ret, op_errno, xdata); - return 0; + QUOTA_STACK_UNWIND(removexattr, frame, op_ret, op_errno, xdata); + return 0; } int -quota_removexattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, const char *name, dict_t *xdata) +quota_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) { - quota_priv_t *priv = NULL; - int32_t op_errno = EINVAL; + quota_priv_t *priv = NULL; + int32_t op_errno = EINVAL; - priv = this->private; + priv = this->private; - WIND_IF_QUOTAOFF (priv->is_quota_on, off); + WIND_IF_QUOTAOFF(priv->is_quota_on, off); - VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO(this, err); - /* all quota xattrs can be cleaned up by doing setxattr on special key. - * Hence its ok that we don't allow removexattr on quota keys here. - */ - if (frame->root->pid >= 0) { - GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.quota*", - name, op_errno, err); - GF_IF_NATIVE_XATTR_GOTO ("trusted.pgfid*", name, - op_errno, err); - } + /* all quota xattrs can be cleaned up by doing setxattr on special key. + * Hence its ok that we don't allow removexattr on quota keys here. + */ + if (frame->root->pid >= 0) { + GF_IF_NATIVE_XATTR_GOTO("trusted.glusterfs.quota*", name, op_errno, + err); + GF_IF_NATIVE_XATTR_GOTO("trusted.pgfid*", name, op_errno, err); + } - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(loc, err); - STACK_WIND (frame, quota_removexattr_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->removexattr, - loc, name, xdata); - return 0; + STACK_WIND(frame, quota_removexattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, loc, name, xdata); + return 0; err: - QUOTA_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL); - return 0; + QUOTA_STACK_UNWIND(removexattr, frame, -1, op_errno, NULL); + return 0; off: - STACK_WIND_TAIL (frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->removexattr, - loc, name, xdata); - return 0; + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, loc, name, xdata); + return 0; } - int -quota_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +quota_fremovexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - QUOTA_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, xdata); - return 0; + QUOTA_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, xdata); + return 0; } int -quota_fremovexattr (call_frame_t *frame, xlator_t *this, - fd_t *fd, const char *name, dict_t *xdata) +quota_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *name, dict_t *xdata) { - quota_priv_t *priv = NULL; - int32_t op_ret = -1; - int32_t op_errno = EINVAL; - - priv = this->private; - - WIND_IF_QUOTAOFF (priv->is_quota_on, off); - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - - if (frame->root->pid >= 0) { - GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.quota*", - name, op_errno, err); - GF_IF_NATIVE_XATTR_GOTO ("trusted.pgfid*", name, - op_errno, err); - } - STACK_WIND (frame, quota_fremovexattr_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->fremovexattr, - fd, name, xdata); - return 0; + quota_priv_t *priv = NULL; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + + priv = this->private; + + WIND_IF_QUOTAOFF(priv->is_quota_on, off); + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(fd, err); + + if (frame->root->pid >= 0) { + GF_IF_NATIVE_XATTR_GOTO("trusted.glusterfs.quota*", name, op_errno, + err); + GF_IF_NATIVE_XATTR_GOTO("trusted.pgfid*", name, op_errno, err); + } + STACK_WIND(frame, quota_fremovexattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata); + return 0; err: - QUOTA_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, NULL); - return 0; + QUOTA_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, NULL); + return 0; off: - STACK_WIND_TAIL (frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fremovexattr, - fd, name, xdata); - return 0; + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata); + return 0; } - int32_t -quota_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct statvfs *buf, - dict_t *xdata) +quota_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct statvfs *buf, + dict_t *xdata) { - inode_t *inode = NULL; - uint64_t value = 0; - int64_t usage = -1; - int64_t avail = -1; - int64_t blocks = 0; - quota_inode_ctx_t *ctx = NULL; - int ret = 0; - quota_local_t *local = frame->local; - - inode = cookie; - - /* This fop will fail mostly in case of client disconnect's, - * which is already logged. Hence, not logging here */ - if (op_ret == -1) - goto unwind; - /* - * We should never get here unless quota_statfs (below) sent us a - * cookie, and it would only do so if the value was non-NULL. This - * check is therefore just routine defensive coding. - */ - - GF_VALIDATE_OR_GOTO ("quota", inode, unwind); - - inode_ctx_get (inode, this, &value); - ctx = (quota_inode_ctx_t *)(unsigned long)value; - if (!ctx || ctx->hard_lim <= 0) - goto unwind; - - { /* statfs is adjusted in this code block */ - usage = (ctx->size) / buf->f_bsize; - - blocks = ctx->hard_lim / buf->f_bsize; - buf->f_blocks = blocks; - - avail = buf->f_blocks - usage; - avail = max (avail, 0); - - buf->f_bfree = avail; - /* - * We have to assume that the total assigned quota - * won't cause us to dip into the reserved space, - * because dealing with the overcommitted cases is - * just too hairy (especially when different bricks - * might be using different reserved percentages and - * such). - */ - buf->f_bavail = buf->f_bfree; - } + inode_t *inode = NULL; + uint64_t value = 0; + int64_t usage = -1; + int64_t avail = -1; + int64_t blocks = 0; + quota_inode_ctx_t *ctx = NULL; + int ret = 0; + + inode = cookie; + + /* This fop will fail mostly in case of client disconnect, + * which is already logged. Hence, not logging here */ + if (op_ret == -1) + goto unwind; + /* + * We should never get here unless quota_statfs (below) sent us a + * cookie, and it would only do so if the value was non-NULL. This + * check is therefore just routine defensive coding. + */ + + GF_VALIDATE_OR_GOTO("quota", inode, unwind); + + inode_ctx_get(inode, this, &value); + ctx = (quota_inode_ctx_t *)(unsigned long)value; + if (!ctx || ctx->hard_lim <= 0) + goto unwind; + + { /* statfs is adjusted in this code block */ + usage = (ctx->size) / buf->f_bsize; + + blocks = ctx->hard_lim / buf->f_bsize; + buf->f_blocks = blocks; + + avail = buf->f_blocks - usage; + avail = max(avail, 0); + + buf->f_bfree = avail; + /* + * We have to assume that the total assigned quota + * won't cause us to dip into the reserved space, + * because dealing with the overcommitted cases is + * just too hairy (especially when different bricks + * might be using different reserved percentages and + * such). + */ + buf->f_bavail = buf->f_bfree; + } - xdata = xdata ? dict_ref(xdata) : dict_new(); - if (!xdata) - goto unwind; + xdata = xdata ? dict_ref(xdata) : dict_new(); + if (!xdata) + goto unwind; - ret = dict_set_int8 (xdata, "quota-deem-statfs", 1); - if (-1 == ret) - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - Q_MSG_ENOMEM, "Dict set failed, deem-statfs option may " - "have no effect"); + ret = dict_set_int8(xdata, "quota-deem-statfs", 1); + if (-1 == ret) + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, Q_MSG_ENOMEM, + "Dict set failed, deem-statfs option may " + "have no effect"); unwind: - QUOTA_STACK_UNWIND (statfs, frame, op_ret, op_errno, buf, xdata); + QUOTA_STACK_UNWIND(statfs, frame, op_ret, op_errno, buf, xdata); - if (xdata) - dict_unref (xdata); + if (xdata) + dict_unref(xdata); - return 0; + return 0; } - int32_t -quota_statfs_helper (call_frame_t *frame, xlator_t *this, loc_t *loc, - dict_t *xdata) +quota_statfs_helper(call_frame_t *frame, xlator_t *this, loc_t *loc, + dict_t *xdata) { - quota_local_t *local = frame->local; - int op_errno = EINVAL; + quota_local_t *local = frame->local; + int op_errno = EINVAL; - GF_VALIDATE_OR_GOTO ("quota", local, err); + GF_VALIDATE_OR_GOTO("quota", local, err); - if (-1 == local->op_ret) { - op_errno = local->op_errno; - goto err; - } + if (-1 == local->op_ret) { + op_errno = local->op_errno; + goto err; + } - STACK_WIND_COOKIE (frame, quota_statfs_cbk, local->inode, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->statfs, loc, xdata); - return 0; + STACK_WIND_COOKIE(frame, quota_statfs_cbk, local->inode, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->statfs, loc, xdata); + return 0; err: - QUOTA_STACK_UNWIND (statfs, frame, -1, op_errno, NULL, NULL); + QUOTA_STACK_UNWIND(statfs, frame, -1, op_errno, NULL, NULL); - return 0; + return 0; } int32_t -quota_statfs_validate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *xdata, - struct iatt *postparent) +quota_statfs_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, + struct iatt *postparent) { - quota_local_t *local = NULL; - int32_t ret = 0; - quota_inode_ctx_t *ctx = NULL; - uint64_t value = 0; - data_t *data = NULL; - quota_meta_t size = {0,}; - - local = frame->local; - - if (op_ret < 0) - goto resume; - - GF_ASSERT (local); - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO_WITH_ERROR ("quota", this, resume, op_errno, - EINVAL); - GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, xdata, resume, op_errno, - EINVAL); - - ret = inode_ctx_get (local->validate_loc.inode, this, &value); - - ctx = (quota_inode_ctx_t *)(unsigned long)value; - if ((ret == -1) || (ctx == NULL)) { - gf_msg (this->name, GF_LOG_WARNING, EINVAL, - Q_MSG_INODE_CTX_GET_FAILED, - "quota context is not present in inode (gfid:%s)", - uuid_utoa (local->validate_loc.inode->gfid)); - op_errno = EINVAL; - goto resume; - } - - ret = quota_dict_get_meta (xdata, QUOTA_SIZE_KEY, &size); - if (ret == -1) { - gf_msg (this->name, GF_LOG_WARNING, EINVAL, - Q_MSG_SIZE_KEY_MISSING, "size key not present in " - "dict"); - op_errno = EINVAL; - } - - LOCK (&ctx->lock); - { - ctx->size = size.size; - ctx->file_count = size.file_count; - ctx->dir_count = size.dir_count; - gettimeofday (&ctx->tv, NULL); - } - UNLOCK (&ctx->lock); + quota_local_t *local = NULL; + int32_t ret = 0; + quota_inode_ctx_t *ctx = NULL; + uint64_t value = 0; + quota_meta_t size = { + 0, + }; + + local = frame->local; + + if (op_ret < 0) + goto resume; + + GF_ASSERT(local); + GF_ASSERT(frame); + GF_VALIDATE_OR_GOTO_WITH_ERROR("quota", this, resume, op_errno, EINVAL); + GF_VALIDATE_OR_GOTO_WITH_ERROR(this->name, xdata, resume, op_errno, EINVAL); + + ret = inode_ctx_get(local->validate_loc.inode, this, &value); + + ctx = (quota_inode_ctx_t *)(unsigned long)value; + if ((ret == -1) || (ctx == NULL)) { + gf_msg(this->name, GF_LOG_WARNING, EINVAL, Q_MSG_INODE_CTX_GET_FAILED, + "quota context is not present in inode (gfid:%s)", + uuid_utoa(local->validate_loc.inode->gfid)); + op_errno = EINVAL; + goto resume; + } + + ret = quota_dict_get_meta(xdata, QUOTA_SIZE_KEY, SLEN(QUOTA_SIZE_KEY), + &size); + if (ret == -1) { + gf_msg(this->name, GF_LOG_WARNING, EINVAL, Q_MSG_SIZE_KEY_MISSING, + "size key not present in " + "dict"); + op_errno = EINVAL; + } + + LOCK(&ctx->lock); + { + ctx->size = size.size; + ctx->validate_time = gf_time(); + ctx->file_count = size.file_count; + ctx->dir_count = size.dir_count; + } + UNLOCK(&ctx->lock); resume: - quota_link_count_decrement (frame); - return 0; + local->op_errno = op_errno; + quota_link_count_decrement(frame); + return 0; } void -quota_get_limit_dir_continuation (struct list_head *parents, inode_t *inode, - int32_t op_ret, int32_t op_errno, void *data) +quota_get_limit_dir_continuation(struct list_head *parents, inode_t *inode, + int32_t op_ret, int32_t op_errno, void *data) { - call_frame_t *frame = NULL; - xlator_t *this = NULL; - quota_local_t *local = NULL; - quota_dentry_t *entry = NULL; - inode_t *parent = NULL; - - frame = data; - local = frame->local; - this = THIS; - - if ((op_ret < 0) || list_empty (parents)) { - if (op_ret >= 0) { - gf_msg (this->name, GF_LOG_WARNING, EIO, - Q_MSG_ANCESTRY_BUILD_FAILED, - "Couldn't build ancestry for inode (gfid:%s). " - "Without knowing ancestors till root, quota " - "cannot be enforced. " - "Hence, failing fop with EIO", - uuid_utoa (inode->gfid)); - op_errno = EIO; - } + call_frame_t *frame = NULL; + xlator_t *this = NULL; + quota_dentry_t *entry = NULL; + inode_t *parent = NULL; - quota_handle_validate_error (frame, -1, op_errno); - goto out; + frame = data; + this = THIS; + + if ((op_ret < 0) || list_empty(parents)) { + if (op_ret >= 0) { + gf_msg(this->name, GF_LOG_WARNING, EIO, Q_MSG_ANCESTRY_BUILD_FAILED, + "Couldn't build ancestry for inode (gfid:%s). " + "Without knowing ancestors till root, quota " + "cannot be enforced. " + "Hence, failing fop with EIO", + uuid_utoa(inode->gfid)); + op_errno = EIO; } - entry = list_entry (parents, quota_dentry_t, next); - parent = inode_find (inode->table, entry->par); + quota_handle_validate_error(frame, -1, op_errno); + goto out; + } - quota_get_limit_dir (frame, parent, this); + entry = list_entry(parents, quota_dentry_t, next); + parent = inode_find(inode->table, entry->par); - inode_unref (parent); + quota_get_limit_dir(frame, parent, this); + + inode_unref(parent); out: - return; + return; } void -quota_statfs_continue (call_frame_t *frame, xlator_t *this, inode_t *inode) +quota_statfs_continue(call_frame_t *frame, xlator_t *this, inode_t *inode) { - call_stub_t *stub = NULL; - quota_local_t *local = frame->local; - int ret = -1; - - LOCK (&local->lock); - { - local->inode = inode_ref (inode); - } - UNLOCK (&local->lock); - - ret = quota_validate (frame, local->inode, this, - quota_statfs_validate_cbk); - if (0 > ret) - quota_handle_validate_error (frame, -1, -ret); + quota_local_t *local = frame->local; + int ret = -1; + + LOCK(&local->lock); + { + local->inode = inode_ref(inode); + } + UNLOCK(&local->lock); + + ret = quota_validate(frame, local->inode, this, quota_statfs_validate_cbk); + if (0 > ret) + quota_handle_validate_error(frame, -1, -ret); } void -quota_get_limit_dir (call_frame_t *frame, inode_t *cur_inode, xlator_t *this) +quota_get_limit_dir(call_frame_t *frame, inode_t *cur_inode, xlator_t *this) { - inode_t *inode = NULL; - inode_t *parent = NULL; - uint64_t value = 0; - quota_inode_ctx_t *ctx = NULL; - int ret = -1; - quota_local_t *local = frame->local; + inode_t *inode = NULL; + inode_t *parent = NULL; + uint64_t value = 0; + quota_inode_ctx_t *ctx = NULL; + quota_local_t *local = frame->local; - if (!cur_inode) - goto out; + if (!cur_inode) + goto out; - inode = inode_ref (cur_inode); - while (inode) { - value = 0; - inode_ctx_get (inode, this, &value); + inode = inode_ref(cur_inode); + while (inode) { + value = 0; + inode_ctx_get(inode, this, &value); - if (value) { - ctx = (quota_inode_ctx_t *)(unsigned long)value; - if (ctx->hard_lim > 0) - break; - } - - if (__is_root_gfid (inode->gfid)) - goto off; + if (value) { + ctx = (quota_inode_ctx_t *)(unsigned long)value; + if (ctx->hard_lim > 0) + break; + } - parent = inode_parent (inode, 0, NULL); - if (!parent) { - ret = quota_build_ancestry - (inode, quota_get_limit_dir_continuation, - (void *)frame); - goto out; - } + if (__is_root_gfid(inode->gfid)) + goto off; - inode_unref (inode); - inode = parent; + parent = inode_parent(inode, 0, NULL); + if (!parent) { + (void)quota_build_ancestry(inode, quota_get_limit_dir_continuation, + frame); + goto out; } - quota_statfs_continue (frame, this, inode); - inode_unref (inode); - return; + inode_unref(inode); + inode = parent; + } + + quota_statfs_continue(frame, this, inode); + inode_unref(inode); + return; off: - gf_msg_debug (this->name, 0, - "No limit set on the inode or it's parents."); + gf_msg_debug(this->name, 0, "No limit set on the inode or it's parents."); - QUOTA_STACK_WIND_TAIL (frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->statfs, - &local->loc, local->xdata); + QUOTA_STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->statfs, &local->loc, + local->xdata); out: - inode_unref (inode); + inode_unref(inode); - return; + return; } int32_t -quota_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +quota_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { - int op_errno = 0; - int ret = -1; - int8_t ignore_deem_statfs = 0; - quota_priv_t *priv = NULL; - quota_local_t *local = NULL; - call_stub_t *stub = NULL; + int op_errno = 0; + int ret = -1; + int8_t ignore_deem_statfs = 0; + quota_priv_t *priv = NULL; + quota_local_t *local = NULL; + call_stub_t *stub = NULL; - priv = this->private; - GF_ASSERT (loc); + priv = this->private; + GF_ASSERT(loc); - WIND_IF_QUOTAOFF (priv->is_quota_on, off); + WIND_IF_QUOTAOFF(priv->is_quota_on, off); - ret = dict_get_int8 (xdata, GF_INTERNAL_IGNORE_DEEM_STATFS, - &ignore_deem_statfs); - ret = 0; + ret = dict_get_int8(xdata, GF_INTERNAL_IGNORE_DEEM_STATFS, + &ignore_deem_statfs); + ret = 0; - if (ignore_deem_statfs) - goto off; + if (ignore_deem_statfs) + goto off; - if (priv->consider_statfs && loc->inode) { - local = quota_local_new (); - if (!local) { - op_errno = ENOMEM; - goto err; - } - frame->local = local; + if (priv->consider_statfs && loc->inode) { + local = quota_local_new(); + if (!local) { + op_errno = ENOMEM; + goto err; + } + frame->local = local; - ret = loc_copy (&local->loc, loc); - if (-1 == ret) { - op_errno = ENOMEM; - goto err; - } + ret = loc_copy(&local->loc, loc); + if (-1 == ret) { + op_errno = ENOMEM; + goto err; + } - if (xdata) - local->xdata = dict_ref (xdata); + if (xdata) + local->xdata = dict_ref(xdata); - stub = fop_statfs_stub (frame, quota_statfs_helper, - &local->loc, local->xdata); - if (!stub) { - op_errno = ENOMEM; - goto err; - } + stub = fop_statfs_stub(frame, quota_statfs_helper, &local->loc, + local->xdata); + if (!stub) { + op_errno = ENOMEM; + goto err; + } - LOCK (&local->lock); - { - local->link_count = 1; - local->stub = stub; - } - UNLOCK (&local->lock); + LOCK(&local->lock); + { + local->link_count = 1; + local->stub = stub; + } + UNLOCK(&local->lock); - quota_get_limit_dir (frame, loc->inode, this); + quota_get_limit_dir(frame, loc->inode, this); - return 0; - } + return 0; + } - /* - * We have to make sure that we never get to quota_statfs_cbk - * with a cookie that points to something other than an inode, - * which is exactly what would happen with STACK_UNWIND using - * that as a callback. Therefore, use default_statfs_cbk in - * this case instead. - * - * Also if the option deem-statfs is not set to "on" don't - * bother calculating quota limit on / in statfs_cbk. - */ - if (priv->consider_statfs) - gf_log (this->name, GF_LOG_ERROR, - "Missing inode, can't adjust for quota"); + /* + * We have to make sure that we never get to quota_statfs_cbk + * with a cookie that points to something other than an inode, + * which is exactly what would happen with STACK_UNWIND using + * that as a callback. Therefore, use default_statfs_cbk in + * this case instead. + * + * Also if the option deem-statfs is not set to "on" don't + * bother calculating quota limit on / in statfs_cbk. + */ + if (priv->consider_statfs) + gf_log(this->name, GF_LOG_ERROR, + "Missing inode, can't adjust for quota"); off: - STACK_WIND_TAIL (frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->statfs, loc, xdata); - return 0; + STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->statfs, + loc, xdata); + return 0; err: - QUOTA_STACK_UNWIND (statfs, frame, -1, op_errno, NULL, NULL); + QUOTA_STACK_UNWIND(statfs, frame, -1, op_errno, NULL, NULL); - return 0; + return 0; } int -quota_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, gf_dirent_t *entries, - dict_t *xdata) +quota_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, gf_dirent_t *entries, + dict_t *xdata) { - gf_dirent_t *entry = NULL; - quota_local_t *local = NULL; - loc_t loc = {0, }; + gf_dirent_t *entry = NULL; + quota_local_t *local = NULL; + loc_t loc = { + 0, + }; - if (op_ret <= 0) - goto unwind; + if (op_ret <= 0) + goto unwind; - local = frame->local; + local = frame->local; - list_for_each_entry (entry, &entries->list, list) { - if ((strcmp (entry->d_name, ".") == 0) || - (strcmp (entry->d_name, "..") == 0) || - entry->inode == NULL) - continue; + list_for_each_entry(entry, &entries->list, list) + { + if ((strcmp(entry->d_name, ".") == 0) || + (strcmp(entry->d_name, "..") == 0) || entry->inode == NULL) + continue; - gf_uuid_copy (loc.gfid, entry->d_stat.ia_gfid); - loc.inode = inode_ref (entry->inode); - loc.parent = inode_ref (local->loc.inode); - gf_uuid_copy (loc.pargfid, loc.parent->gfid); - loc.name = entry->d_name; + gf_uuid_copy(loc.gfid, entry->d_stat.ia_gfid); + loc.inode = inode_ref(entry->inode); + loc.parent = inode_ref(local->loc.inode); + gf_uuid_copy(loc.pargfid, loc.parent->gfid); + loc.name = entry->d_name; - quota_fill_inodectx (this, entry->inode, entry->dict, - &loc, &entry->d_stat, &op_errno); + quota_fill_inodectx(this, entry->inode, entry->dict, &loc, + &entry->d_stat, &op_errno); - loc_wipe (&loc); - } + loc_wipe(&loc); + } unwind: - QUOTA_STACK_UNWIND (readdirp, frame, op_ret, op_errno, entries, xdata); + QUOTA_STACK_UNWIND(readdirp, frame, op_ret, op_errno, entries, xdata); - return 0; + return 0; } int -quota_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t offset, dict_t *dict) +quota_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, dict_t *dict) { - quota_priv_t *priv = NULL; - int ret = 0; - gf_boolean_t new_dict = _gf_false; - quota_local_t *local = NULL; + quota_priv_t *priv = NULL; + int ret = 0; + gf_boolean_t new_dict = _gf_false; + quota_local_t *local = NULL; - priv = this->private; + priv = this->private; - WIND_IF_QUOTAOFF (priv->is_quota_on, off); + WIND_IF_QUOTAOFF(priv->is_quota_on, off); - local = quota_local_new (); + local = quota_local_new(); - if (local == NULL) { - goto err; - } + if (local == NULL) { + goto err; + } - frame->local = local; + frame->local = local; - local->loc.inode = inode_ref (fd->inode); + local->loc.inode = inode_ref(fd->inode); - if (dict == NULL) { - dict = dict_new (); - new_dict = _gf_true; - } + if (dict == NULL) { + dict = dict_new(); + new_dict = _gf_true; + } - if (dict) { - ret = dict_set_int8 (dict, QUOTA_LIMIT_KEY, 1); - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, ENOMEM, - Q_MSG_ENOMEM, - "dict set of key for hard-limit"); - goto err; - } + if (dict) { + ret = dict_set_int8(dict, QUOTA_LIMIT_KEY, 1); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM, + "dict set of key for hard-limit"); + goto err; } + } - if (dict) { - ret = dict_set_int8 (dict, QUOTA_LIMIT_OBJECTS_KEY, 1); - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, ENOMEM, - Q_MSG_ENOMEM, "dict set of key for hard-limit " - "failed"); - goto err; - } + if (dict) { + ret = dict_set_int8(dict, QUOTA_LIMIT_OBJECTS_KEY, 1); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM, + "dict set of key for hard-limit " + "failed"); + goto err; } + } - STACK_WIND (frame, quota_readdirp_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp, fd, - size, offset, dict); + STACK_WIND(frame, quota_readdirp_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdirp, fd, size, offset, dict); - if (new_dict) { - dict_unref (dict); - } + if (new_dict) { + dict_unref(dict); + } - return 0; + return 0; err: - STACK_UNWIND_STRICT (readdirp, frame, -1, EINVAL, NULL, NULL); + STACK_UNWIND_STRICT(readdirp, frame, -1, EINVAL, NULL, NULL); - if (new_dict) { - dict_unref (dict); - } + if (new_dict) { + dict_unref(dict); + } - return 0; + return 0; off: - STACK_WIND_TAIL (frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readdirp, fd, - size, offset, dict); - return 0; + STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp, + fd, size, offset, dict); + return 0; } int32_t @@ -4708,596 +4644,693 @@ quota_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata) { - int32_t ret = 0; - uint64_t ctx_int = 0; - quota_inode_ctx_t *ctx = NULL; - quota_local_t *local = NULL; - - local = frame->local; - - if ((op_ret < 0) || (local == NULL)) { - goto out; - } - - ret = inode_ctx_get (local->loc.inode, this, &ctx_int); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, 0, - Q_MSG_INODE_CTX_GET_FAILED, - "%s: failed to get the context", local->loc.path); - goto out; - } - - ctx = (quota_inode_ctx_t *)(unsigned long) ctx_int; - - if (ctx == NULL) { - gf_msg (this->name, GF_LOG_WARNING, 0, - Q_MSG_INODE_CTX_GET_FAILED, - "quota context not set in %s (gfid:%s)", - local->loc.path, uuid_utoa (local->loc.inode->gfid)); - goto out; - } - - LOCK (&ctx->lock); - { - ctx->buf = *postbuf; - } - UNLOCK (&ctx->lock); + int32_t ret = 0; + uint64_t ctx_int = 0; + quota_inode_ctx_t *ctx = NULL; + quota_local_t *local = NULL; + + local = frame->local; + + if ((op_ret < 0) || (local == NULL)) { + goto out; + } + + ret = inode_ctx_get(local->loc.inode, this, &ctx_int); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, Q_MSG_INODE_CTX_GET_FAILED, + "%s: failed to get the context", local->loc.path); + goto out; + } + + ctx = (quota_inode_ctx_t *)(unsigned long)ctx_int; + + if (ctx == NULL) { + gf_msg(this->name, GF_LOG_WARNING, 0, Q_MSG_INODE_CTX_GET_FAILED, + "quota context not set in %s (gfid:%s)", local->loc.path, + uuid_utoa(local->loc.inode->gfid)); + goto out; + } + + LOCK(&ctx->lock); + { + ctx->buf = *postbuf; + } + UNLOCK(&ctx->lock); out: - QUOTA_STACK_UNWIND (fallocate, frame, op_ret, op_errno, prebuf, postbuf, - xdata); + QUOTA_STACK_UNWIND(fallocate, frame, op_ret, op_errno, prebuf, postbuf, + xdata); - return 0; + return 0; } - int32_t -quota_fallocate_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, - int32_t mode, off_t offset, size_t len, dict_t *xdata) +quota_fallocate_helper(call_frame_t *frame, xlator_t *this, fd_t *fd, + int32_t mode, off_t offset, size_t len, dict_t *xdata) { - quota_local_t *local = NULL; - int32_t op_errno = EINVAL; - quota_priv_t *priv = NULL; - - local = frame->local; - - GF_VALIDATE_OR_GOTO ("quota", local, unwind); - - priv = this->private; - - if (local->op_ret == -1) { - op_errno = local->op_errno; - if (op_errno == ENOENT || op_errno == ESTALE) { - /* We may get ENOENT/ESTALE in case of below scenario - * fd = open file.txt - * unlink file.txt - * fallocate on fd - * Here build_ancestry can fail as the file is removed. - * For now ignore ENOENT/ESTALE on active fd - * We need to re-visit this code once we understand - * how other file-system behave in this scenario - */ - gf_msg_debug (this->name, 0, "quota enforcer failed " - "with ENOENT/ESTALE on %s, cannot check " - "quota limits and allowing fallocate", - uuid_utoa (fd->inode->gfid)); - } else { - goto unwind; - } + quota_local_t *local = NULL; + int32_t op_errno = EINVAL; + + local = frame->local; + + GF_VALIDATE_OR_GOTO("quota", local, unwind); + + if (local->op_ret == -1) { + op_errno = local->op_errno; + if (op_errno == ENOENT || op_errno == ESTALE) { + /* We may get ENOENT/ESTALE in case of below scenario + * fd = open file.txt + * unlink file.txt + * fallocate on fd + * Here build_ancestry can fail as the file is removed. + * For now ignore ENOENT/ESTALE on active fd + * We need to re-visit this code once we understand + * how other file-system behave in this scenario + */ + gf_msg_debug(this->name, 0, + "quota enforcer failed " + "with ENOENT/ESTALE on %s, cannot check " + "quota limits and allowing fallocate", + uuid_utoa(fd->inode->gfid)); + } else { + goto unwind; } + } - STACK_WIND (frame, quota_fallocate_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len, - xdata); - return 0; + STACK_WIND(frame, quota_fallocate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len, + xdata); + return 0; unwind: - QUOTA_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + QUOTA_STACK_UNWIND(fallocate, frame, -1, op_errno, NULL, NULL, NULL); + return 0; } - int32_t quota_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, - off_t offset, size_t len, dict_t *xdata) + off_t offset, size_t len, dict_t *xdata) { - int32_t ret = -1, op_errno = EINVAL; - int32_t parents = 0; - int32_t fail_count = 0; - quota_local_t *local = NULL; - quota_inode_ctx_t *ctx = NULL; - quota_priv_t *priv = NULL; - quota_dentry_t *dentry = NULL; - quota_dentry_t *tmp = NULL; - call_stub_t *stub = NULL; - struct list_head head = {0, }; - inode_t *par_inode = NULL; - - priv = this->private; - GF_VALIDATE_OR_GOTO (this->name, priv, unwind); - - WIND_IF_QUOTAOFF (priv->is_quota_on, off); - - INIT_LIST_HEAD (&head); - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO ("quota", this, unwind); - GF_VALIDATE_OR_GOTO (this->name, fd, unwind); - - local = quota_local_new (); - if (local == NULL) { - goto unwind; - } - - frame->local = local; - local->loc.inode = inode_ref (fd->inode); - - ret = quota_inode_ctx_get (fd->inode, this, &ctx, 0); - if (ctx == NULL) { - gf_msg_debug (this->name, 0, "quota context is NULL on inode" - " (%s). If quota is not enabled recently and " - "crawler has finished crawling, its an error", - uuid_utoa (local->loc.inode->gfid)); - } - - stub = fop_fallocate_stub(frame, quota_fallocate_helper, fd, mode, - offset, len, xdata); - if (stub == NULL) { - op_errno = ENOMEM; - goto unwind; + int32_t op_errno = EINVAL; + int32_t parents = 0; + int32_t fail_count = 0; + quota_local_t *local = NULL; + quota_inode_ctx_t *ctx = NULL; + quota_priv_t *priv = NULL; + quota_dentry_t *dentry = NULL; + quota_dentry_t *tmp = NULL; + call_stub_t *stub = NULL; + struct list_head head = { + 0, + }; + inode_t *par_inode = NULL; + + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, unwind); + + WIND_IF_QUOTAOFF(priv->is_quota_on, off); + + INIT_LIST_HEAD(&head); + + GF_ASSERT(frame); + GF_VALIDATE_OR_GOTO("quota", this, unwind); + GF_VALIDATE_OR_GOTO(this->name, fd, unwind); + + local = quota_local_new(); + if (local == NULL) { + goto unwind; + } + + frame->local = local; + local->loc.inode = inode_ref(fd->inode); + + (void)quota_inode_ctx_get(fd->inode, this, &ctx, 0); + if (ctx == NULL) { + gf_msg_debug(this->name, 0, + "quota context is NULL on inode" + " (%s). If quota is not enabled recently and " + "crawler has finished crawling, its an error", + uuid_utoa(local->loc.inode->gfid)); + } + + stub = fop_fallocate_stub(frame, quota_fallocate_helper, fd, mode, offset, + len, xdata); + if (stub == NULL) { + op_errno = ENOMEM; + goto unwind; + } + + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, unwind); + + parents = quota_add_parents_from_ctx(ctx, &head); + if (parents == -1) { + op_errno = errno; + goto unwind; + } + + /* + * Note that by using len as the delta we're assuming the range from + * offset to offset+len has not already been allocated. This can result + * in ENOSPC errors attempting to allocate an already allocated range. + */ + local->delta = len; + local->object_delta = 0; + local->stub = stub; + local->link_count = parents; + + if (parents == 0) { + local->link_count = 1; + quota_check_limit(frame, fd->inode, this); + } else { + list_for_each_entry_safe(dentry, tmp, &head, next) + { + par_inode = do_quota_check_limit(frame, fd->inode, this, dentry, + _gf_false); + if (par_inode == NULL) { + /* remove stale entry from inode_ctx */ + quota_dentry_del(ctx, dentry->name, dentry->par); + parents--; + fail_count++; + } else { + inode_unref(par_inode); + } + __quota_dentry_free(dentry); } - priv = this->private; - GF_VALIDATE_OR_GOTO (this->name, priv, unwind); - - parents = quota_add_parents_from_ctx (ctx, &head); - - /* - * Note that by using len as the delta we're assuming the range from - * offset to offset+len has not already been allocated. This can result - * in ENOSPC errors attempting to allocate an already allocated range. - */ - local->delta = len; - local->stub = stub; - local->link_count = parents; - if (parents == 0) { - local->link_count = 1; - quota_check_limit (frame, fd->inode, this); - } else { - list_for_each_entry_safe (dentry, tmp, &head, next) { - par_inode = do_quota_check_limit (frame, fd->inode, - this, dentry, - _gf_false); - if (par_inode == NULL) { - /* remove stale entry from inode_ctx */ - quota_dentry_del (ctx, dentry->name, - dentry->par); - parents--; - fail_count++; - } else { - inode_unref (par_inode); - } - __quota_dentry_free (dentry); - } - - if (parents == 0) { - LOCK (&local->lock); - { - local->link_count++; - } - UNLOCK (&local->lock); - quota_check_limit (frame, fd->inode, this); - } + LOCK(&local->lock); + { + local->link_count++; + } + UNLOCK(&local->lock); + quota_check_limit(frame, fd->inode, this); + } - while (fail_count != 0) { - quota_link_count_decrement (frame); - fail_count--; - } + while (fail_count != 0) { + quota_link_count_decrement(frame); + fail_count--; } + } - return 0; + return 0; unwind: - QUOTA_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + QUOTA_STACK_UNWIND(fallocate, frame, -1, op_errno, NULL, NULL, NULL); + return 0; off: - STACK_WIND_TAIL (frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, - len, xdata); - return 0; + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len, + xdata); + return 0; } void -quota_log_helper (char **usage_str, int64_t cur_size, inode_t *inode, - char **path, struct timeval *cur_time) +quota_log_helper(char **usage_str, int64_t cur_size, inode_t *inode, + char **path, time_t *cur_time) { - xlator_t *this = THIS; + xlator_t *this = THIS; - if (!usage_str || !inode || !path || !cur_time) { - gf_log (this->name, GF_LOG_ERROR, "Received null argument"); - return; - } + if (!usage_str || !inode || !path || !cur_time) { + gf_log(this->name, GF_LOG_ERROR, "Received null argument"); + return; + } - *usage_str = gf_uint64_2human_readable (cur_size); - if (!(*usage_str)) - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, Q_MSG_ENOMEM, - "integer to string conversion failed Reason" - ":\"Cannot allocate memory\""); + *usage_str = gf_uint64_2human_readable(cur_size); + if (!(*usage_str)) + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, Q_MSG_ENOMEM, + "integer to string conversion failed Reason" + ":\"Cannot allocate memory\""); - inode_path (inode, NULL, path); - if (!(*path)) - *path = uuid_utoa (inode->gfid); + inode_path(inode, NULL, path); + if (!(*path)) + *path = uuid_utoa(inode->gfid); - gettimeofday (cur_time, NULL); + *cur_time = gf_time(); } /* Logs if -* i. Usage crossed soft limit -* ii. Usage above soft limit and alert-time elapsed -*/ + * i. Usage crossed soft limit + * ii. Usage above soft limit and alert-time elapsed + */ void -quota_log_usage (xlator_t *this, quota_inode_ctx_t *ctx, inode_t *inode, - int64_t delta) +quota_log_usage(xlator_t *this, quota_inode_ctx_t *ctx, inode_t *inode, + int64_t delta) { - struct timeval cur_time = {0,}; - char *usage_str = NULL; - char *path = NULL; - int64_t cur_size = 0; - quota_priv_t *priv = NULL; + time_t cur_time = 0; + char *usage_str = NULL; + char *path = NULL; + int64_t cur_size = 0; + quota_priv_t *priv = NULL; - priv = this->private; - cur_size = ctx->size + delta; + priv = this->private; + cur_size = ctx->size + delta; - if ((ctx->soft_lim <= 0) || cur_size < ctx->soft_lim) - return; + if ((ctx->soft_lim <= 0) || cur_size < ctx->soft_lim) + return; - /* Usage crossed/reached soft limit */ - if (DID_REACH_LIMIT (ctx->soft_lim, ctx->size, cur_size)) { + /* Usage crossed/reached soft limit */ + if (DID_REACH_LIMIT(ctx->soft_lim, ctx->size, cur_size)) { + quota_log_helper(&usage_str, cur_size, inode, &path, &cur_time); - quota_log_helper (&usage_str, cur_size, inode, - &path, &cur_time); + gf_msg(this->name, GF_LOG_ALERT, 0, Q_MSG_CROSSED_SOFT_LIMIT, + "Usage crossed soft limit: " + "%s used by %s", + usage_str, path); - gf_msg (this->name, GF_LOG_ALERT, 0, - Q_MSG_CROSSED_SOFT_LIMIT, "Usage crossed soft limit: " - "%s used by %s", usage_str, path); - ctx->prev_log = cur_time; - } - /* Usage is above soft limit */ - else if (cur_size > ctx->soft_lim && - quota_timeout (&ctx->prev_log, priv->log_timeout)) { + gf_event(EVENT_QUOTA_CROSSED_SOFT_LIMIT, + "Usage=%s;volume=%s;" + "path=%s", + usage_str, priv->volume_uuid, path); - quota_log_helper (&usage_str, cur_size, inode, - &path, &cur_time); + ctx->prev_log_time = cur_time; - gf_msg (this->name, GF_LOG_ALERT, 0, Q_MSG_CROSSED_SOFT_LIMIT, - "Usage is above soft limit: %s used by %s", - usage_str, path); - ctx->prev_log = cur_time; - } + } + /* Usage is above soft limit */ + else if (cur_size > ctx->soft_lim && + quota_timeout(ctx->prev_log_time, priv->log_timeout)) { + quota_log_helper(&usage_str, cur_size, inode, &path, &cur_time); + + gf_msg(this->name, GF_LOG_ALERT, 0, Q_MSG_CROSSED_SOFT_LIMIT, + "Usage is above soft limit: %s used by %s", usage_str, path); + + gf_event(EVENT_QUOTA_CROSSED_SOFT_LIMIT, + "Usage=%s;volume=%s;" + "path=%s", + usage_str, priv->volume_uuid, path); - if (usage_str) - GF_FREE (usage_str); + ctx->prev_log_time = cur_time; + } + + if (path) + GF_FREE(path); + + if (usage_str) + GF_FREE(usage_str); } int32_t -mem_acct_init (xlator_t *this) +mem_acct_init(xlator_t *this) { - int ret = -1; - - if (!this) - return ret; + int ret = -1; - ret = xlator_mem_acct_init (this, gf_quota_mt_end + 1); + if (!this) + return ret; - if (ret != 0) { - gf_msg (this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM, - "Memory accounting init failed"); - return ret; - } + ret = xlator_mem_acct_init(this, gf_quota_mt_end + 1); + if (ret != 0) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM, + "Memory accounting init failed"); return ret; -} + } + return ret; +} int32_t -quota_forget (xlator_t *this, inode_t *inode) +quota_forget(xlator_t *this, inode_t *inode) { - int32_t ret = 0; - uint64_t ctx_int = 0; - quota_inode_ctx_t *ctx = NULL; - quota_dentry_t *dentry = NULL, *tmp; + int32_t ret = 0; + uint64_t ctx_int = 0; + quota_inode_ctx_t *ctx = NULL; + quota_dentry_t *dentry = NULL, *tmp; - ret = inode_ctx_del (inode, this, &ctx_int); + ret = inode_ctx_del(inode, this, &ctx_int); - if (ret < 0) { - return 0; - } + if (ret < 0) { + return 0; + } - ctx = (quota_inode_ctx_t *) (long)ctx_int; + ctx = (quota_inode_ctx_t *)(long)ctx_int; - LOCK (&ctx->lock); + LOCK(&ctx->lock); + { + list_for_each_entry_safe(dentry, tmp, &ctx->parents, next) { - list_for_each_entry_safe (dentry, tmp, &ctx->parents, next) { - __quota_dentry_free (dentry); - } + __quota_dentry_free(dentry); } - UNLOCK (&ctx->lock); + } + UNLOCK(&ctx->lock); - LOCK_DESTROY (&ctx->lock); + LOCK_DESTROY(&ctx->lock); - GF_FREE (ctx); + GF_FREE(ctx); - return 0; + return 0; } -int32_t -init (xlator_t *this) +int +notify(xlator_t *this, int event, void *data, ...) { - int32_t ret = -1; - quota_priv_t *priv = NULL; - rpc_clnt_t *rpc = NULL; - - if ((this->children == NULL) - || this->children->next) { - gf_msg (this->name, GF_LOG_ERROR, 0, - Q_MSG_INVALID_VOLFILE, - "FATAL: quota (%s) not configured with " - "exactly one child", this->name); - return -1; - } - - if (this->parents == NULL) { - gf_msg (this->name, GF_LOG_WARNING, 0, - Q_MSG_INVALID_VOLFILE, - "dangling volume. check volfile"); - } - - QUOTA_ALLOC_OR_GOTO (priv, quota_priv_t, err); - - LOCK_INIT (&priv->lock); - - this->private = priv; - - GF_OPTION_INIT ("deem-statfs", priv->consider_statfs, bool, err); - GF_OPTION_INIT ("server-quota", priv->is_quota_on, bool, err); - GF_OPTION_INIT ("default-soft-limit", priv->default_soft_lim, percent, - err); - GF_OPTION_INIT ("soft-timeout", priv->soft_timeout, time, err); - GF_OPTION_INIT ("hard-timeout", priv->hard_timeout, time, err); - GF_OPTION_INIT ("alert-time", priv->log_timeout, time, err); - GF_OPTION_INIT ("volume-uuid", priv->volume_uuid, str, err); - - this->local_pool = mem_pool_new (quota_local_t, 64); - if (!this->local_pool) { - ret = -1; - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - Q_MSG_ENOMEM, "failed to create local_t's memory pool"); - goto err; + quota_priv_t *priv = NULL; + int ret = 0; + rpc_clnt_t *rpc = NULL; + gf_boolean_t conn_status = _gf_true; + xlator_t *victim = data; + + priv = this->private; + if (!priv || !priv->is_quota_on) + goto out; + + if (event == GF_EVENT_PARENT_DOWN) { + rpc = priv->rpc_clnt; + if (rpc) { + rpc_clnt_disable(rpc); + pthread_mutex_lock(&priv->conn_mutex); + { + conn_status = priv->conn_status; + while (conn_status) { + (void)pthread_cond_wait(&priv->conn_cond, + &priv->conn_mutex); + conn_status = priv->conn_status; + } + } + pthread_mutex_unlock(&priv->conn_mutex); + gf_log(this->name, GF_LOG_INFO, + "Notify GF_EVENT_PARENT_DOWN for brick %s", victim->name); } + } - if (priv->is_quota_on) { - rpc = quota_enforcer_init (this, this->options); - if (rpc == NULL) { - ret = -1; - gf_msg (this->name, GF_LOG_WARNING, 0, - Q_MSG_QUOTA_ENFORCER_RPC_INIT_FAILED, - "quota enforcer rpc init failed"); - goto err; - } +out: + ret = default_notify(this, event, data); + return ret; +} - LOCK (&priv->lock); - { - priv->rpc_clnt = rpc; - } - UNLOCK (&priv->lock); +int32_t +init(xlator_t *this) +{ + int32_t ret = -1; + quota_priv_t *priv = NULL; + rpc_clnt_t *rpc = NULL; + + if ((this->children == NULL) || this->children->next) { + gf_msg(this->name, GF_LOG_ERROR, 0, Q_MSG_INVALID_VOLFILE, + "FATAL: quota (%s) not configured with " + "exactly one child", + this->name); + return -1; + } + + if (this->parents == NULL) { + gf_msg(this->name, GF_LOG_WARNING, 0, Q_MSG_INVALID_VOLFILE, + "dangling volume. check volfile"); + } + + QUOTA_ALLOC_OR_GOTO(priv, quota_priv_t, err); + + LOCK_INIT(&priv->lock); + + this->private = priv; + + GF_OPTION_INIT("deem-statfs", priv->consider_statfs, bool, err); + GF_OPTION_INIT("server-quota", priv->is_quota_on, bool, err); + GF_OPTION_INIT("default-soft-limit", priv->default_soft_lim, percent, err); + GF_OPTION_INIT("soft-timeout", priv->soft_timeout, time, err); + GF_OPTION_INIT("hard-timeout", priv->hard_timeout, time, err); + GF_OPTION_INIT("alert-time", priv->log_timeout, time, err); + GF_OPTION_INIT("volume-uuid", priv->volume_uuid, str, err); + + this->local_pool = mem_pool_new(quota_local_t, 64); + if (!this->local_pool) { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, Q_MSG_ENOMEM, + "failed to create local_t's memory pool"); + goto err; + } + + pthread_mutex_init(&priv->conn_mutex, NULL); + pthread_cond_init(&priv->conn_cond, NULL); + priv->conn_status = _gf_false; + + if (priv->is_quota_on) { + rpc = quota_enforcer_init(this, this->options); + if (rpc == NULL) { + ret = -1; + gf_msg(this->name, GF_LOG_WARNING, 0, + Q_MSG_QUOTA_ENFORCER_RPC_INIT_FAILED, + "quota enforcer rpc init failed"); + goto err; + } + + LOCK(&priv->lock); + { + priv->rpc_clnt = rpc; } + UNLOCK(&priv->lock); + } - ret = 0; + ret = 0; err: - return ret; + return ret; } int -reconfigure (xlator_t *this, dict_t *options) +reconfigure(xlator_t *this, dict_t *options) { - int32_t ret = -1; - quota_priv_t *priv = NULL; - gf_boolean_t quota_on = _gf_false; - rpc_clnt_t *rpc = NULL; - - priv = this->private; - - GF_OPTION_RECONF ("deem-statfs", priv->consider_statfs, options, bool, - out); - GF_OPTION_RECONF ("server-quota", quota_on, options, bool, - out); - GF_OPTION_RECONF ("default-soft-limit", priv->default_soft_lim, - options, percent, out); - GF_OPTION_RECONF ("alert-time", priv->log_timeout, options, - time, out); - GF_OPTION_RECONF ("soft-timeout", priv->soft_timeout, options, - time, out); - GF_OPTION_RECONF ("hard-timeout", priv->hard_timeout, options, - time, out); - - if (quota_on) { - priv->rpc_clnt = quota_enforcer_init (this, - this->options); - if (priv->rpc_clnt == NULL) { - ret = -1; - gf_msg (this->name, GF_LOG_WARNING, 0, - Q_MSG_QUOTA_ENFORCER_RPC_INIT_FAILED, - "quota enforcer rpc init failed"); - goto out; - } + int32_t ret = -1; + quota_priv_t *priv = NULL; + gf_boolean_t quota_on = _gf_false; + rpc_clnt_t *rpc = NULL; + + priv = this->private; + + GF_OPTION_RECONF("deem-statfs", priv->consider_statfs, options, bool, out); + GF_OPTION_RECONF("server-quota", quota_on, options, bool, out); + GF_OPTION_RECONF("default-soft-limit", priv->default_soft_lim, options, + percent, out); + GF_OPTION_RECONF("alert-time", priv->log_timeout, options, time, out); + GF_OPTION_RECONF("soft-timeout", priv->soft_timeout, options, time, out); + GF_OPTION_RECONF("hard-timeout", priv->hard_timeout, options, time, out); + + if (quota_on) { + priv->rpc_clnt = quota_enforcer_init(this, this->options); + if (priv->rpc_clnt == NULL) { + ret = -1; + gf_msg(this->name, GF_LOG_WARNING, 0, + Q_MSG_QUOTA_ENFORCER_RPC_INIT_FAILED, + "quota enforcer rpc init failed"); + goto out; + } + + } else { + LOCK(&priv->lock); + { + rpc = priv->rpc_clnt; + priv->rpc_clnt = NULL; + } + UNLOCK(&priv->lock); - } else { - LOCK (&priv->lock); - { - rpc = priv->rpc_clnt; - priv->rpc_clnt = NULL; - } - UNLOCK (&priv->lock); - - if (rpc != NULL) { - // Quotad is shutdown when there is no started volume - // which has quota enabled. So, we should disable the - // enforcer client when quota is disabled on a volume, - // to avoid spurious reconnect attempts to a service - // (quotad), that is known to be down. - rpc_clnt_unref (rpc); - } + if (rpc != NULL) { + // Quotad is shutdown when there is no started volume + // which has quota enabled. So, we should disable the + // enforcer client when quota is disabled on a volume, + // to avoid spurious reconnect attempts to a service + // (quotad), that is known to be down. + rpc_clnt_unref(rpc); } + } - priv->is_quota_on = quota_on; + priv->is_quota_on = quota_on; - ret = 0; + ret = 0; out: - return ret; + return ret; } int32_t -quota_priv_dump (xlator_t *this) +quota_priv_dump(xlator_t *this) { - quota_priv_t *priv = NULL; - int32_t ret = -1; - - - GF_ASSERT (this); - - priv = this->private; - - gf_proc_dump_add_section ("xlators.features.quota.priv", this->name); - - ret = TRY_LOCK (&priv->lock); - if (ret) - goto out; - else { - gf_proc_dump_write("soft-timeout", "%d", priv->soft_timeout); - gf_proc_dump_write("hard-timeout", "%d", priv->hard_timeout); - gf_proc_dump_write("alert-time", "%d", priv->log_timeout); - gf_proc_dump_write("quota-on", "%d", priv->is_quota_on); - gf_proc_dump_write("statfs", "%d", priv->consider_statfs); - gf_proc_dump_write("volume-uuid", "%s", priv->volume_uuid); - gf_proc_dump_write("validation-count", "%ld", - priv->validation_count); - } - UNLOCK (&priv->lock); + quota_priv_t *priv = NULL; + int32_t ret = -1; + + GF_ASSERT(this); + + priv = this->private; + if (!priv) + goto out; + + gf_proc_dump_add_section("xlators.features.quota.priv"); + + ret = TRY_LOCK(&priv->lock); + if (ret) + goto out; + else { + gf_proc_dump_write("soft-timeout", "%u", priv->soft_timeout); + gf_proc_dump_write("hard-timeout", "%u", priv->hard_timeout); + gf_proc_dump_write("alert-time", "%u", priv->log_timeout); + gf_proc_dump_write("quota-on", "%d", priv->is_quota_on); + gf_proc_dump_write("statfs", "%d", priv->consider_statfs); + gf_proc_dump_write("volume-uuid", "%s", priv->volume_uuid); + gf_proc_dump_write("validation-count", "%" PRIu64, + priv->validation_count); + } + UNLOCK(&priv->lock); out: - return 0; + return 0; } void -fini (xlator_t *this) +fini(xlator_t *this) { + quota_priv_t *priv = NULL; + rpc_clnt_t *rpc = NULL; + + priv = this->private; + if (!priv) return; + rpc = priv->rpc_clnt; + priv->rpc_clnt = NULL; + if (rpc) { + rpc_clnt_connection_cleanup(&rpc->conn); + rpc_clnt_unref(rpc); + } + + this->private = NULL; + LOCK_DESTROY(&priv->lock); + pthread_mutex_destroy(&priv->conn_mutex); + pthread_cond_destroy(&priv->conn_cond); + + GF_FREE(priv); + if (this->local_pool) { + mem_pool_destroy(this->local_pool); + this->local_pool = NULL; + } + return; } - struct xlator_fops fops = { - .statfs = quota_statfs, - .lookup = quota_lookup, - .writev = quota_writev, - .create = quota_create, - .mkdir = quota_mkdir, - .truncate = quota_truncate, - .ftruncate = quota_ftruncate, - .unlink = quota_unlink, - .symlink = quota_symlink, - .link = quota_link, - .rename = quota_rename, - .getxattr = quota_getxattr, - .fgetxattr = quota_fgetxattr, - .stat = quota_stat, - .fstat = quota_fstat, - .readlink = quota_readlink, - .readv = quota_readv, - .fsync = quota_fsync, - .setattr = quota_setattr, - .fsetattr = quota_fsetattr, - .mknod = quota_mknod, - .setxattr = quota_setxattr, - .fsetxattr = quota_fsetxattr, - .removexattr = quota_removexattr, - .fremovexattr = quota_fremovexattr, - .readdirp = quota_readdirp, - .fallocate = quota_fallocate, + .statfs = quota_statfs, + .lookup = quota_lookup, + .writev = quota_writev, + .create = quota_create, + .mkdir = quota_mkdir, + .truncate = quota_truncate, + .ftruncate = quota_ftruncate, + .unlink = quota_unlink, + .symlink = quota_symlink, + .link = quota_link, + .rename = quota_rename, + .getxattr = quota_getxattr, + .fgetxattr = quota_fgetxattr, + .stat = quota_stat, + .fstat = quota_fstat, + .readlink = quota_readlink, + .readv = quota_readv, + .fsync = quota_fsync, + .setattr = quota_setattr, + .fsetattr = quota_fsetattr, + .mknod = quota_mknod, + .setxattr = quota_setxattr, + .fsetxattr = quota_fsetxattr, + .removexattr = quota_removexattr, + .fremovexattr = quota_fremovexattr, + .readdirp = quota_readdirp, + .fallocate = quota_fallocate, }; -struct xlator_cbks cbks = { - .forget = quota_forget -}; +struct xlator_cbks cbks = {.forget = quota_forget}; struct xlator_dumpops dumpops = { - .priv = quota_priv_dump, + .priv = quota_priv_dump, }; struct volume_options options[] = { - {.key = {"limit-set"}}, - {.key = {"deem-statfs"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "off", - .description = "If set to on, it takes quota limits into" - "consideration while estimating fs size. (df command)" - " (Default is off)." - }, - {.key = {"server-quota"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "off", - .description = "Skip the quota enforcement if the feature is" - " not turned on. This is not a user exposed option." - }, - {.key = {"default-soft-limit"}, - .type = GF_OPTION_TYPE_PERCENT, - .default_value = "80%", - }, - {.key = {"soft-timeout"}, - .type = GF_OPTION_TYPE_TIME, - .min = 0, - .max = 1800, - .default_value = "60", - .description = "quota caches the directory sizes on client. " - "soft-timeout indicates the timeout for the validity of" - " cache before soft-limit has been crossed." - }, - {.key = {"hard-timeout"}, - .type = GF_OPTION_TYPE_TIME, - .min = 0, - .max = 60, - .default_value = "5", - .description = "quota caches the directory sizes on client. " - "hard-timeout indicates the timeout for the validity of" - " cache after soft-limit has been crossed." - }, - { .key = {"username"}, - .type = GF_OPTION_TYPE_ANY, - }, - { .key = {"password"}, - .type = GF_OPTION_TYPE_ANY, - }, - { .key = {"transport-type"}, - .value = {"tcp", "socket", "ib-verbs", "unix", "ib-sdp", - "tcp/client", "ib-verbs/client", "rdma"}, - .type = GF_OPTION_TYPE_STR, - }, - { .key = {"remote-host"}, - .type = GF_OPTION_TYPE_INTERNET_ADDRESS, - }, - { .key = {"remote-port"}, - .type = GF_OPTION_TYPE_INT, - }, - { .key = {"volume-uuid"}, - .type = GF_OPTION_TYPE_STR, - .description = "uuid of the volume this brick is part of." - }, - { .key = {"alert-time"}, - .type = GF_OPTION_TYPE_TIME, - .min = 0, - .max = 7*86400, - .default_value = "86400", - }, - {.key = {NULL}} + { + .key = {"enable"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "enable is the volume option that can be used " + "to turn on quota.", + .op_version = {1}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .level = OPT_STATUS_BASIC, + .tags = {}, + }, + { + .key = {"deem-statfs"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "on", + .description = "If set to on, it takes quota limits into" + " consideration while estimating fs size. (df command)" + " (Default is on).", + .op_version = {2}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {}, + }, + { + .key = {"server-quota"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "Skip the quota enforcement if the feature is" + " not turned on. This is not a user exposed option.", + .flags = OPT_FLAG_NONE, + }, + { + .key = {"default-soft-limit"}, + .type = GF_OPTION_TYPE_PERCENT, + .default_value = "80%", + .op_version = {3}, + .description = "Soft limit is expressed as a proportion of hard limit." + " Default-soft-limit is the proportion used when the " + " user does not supply any soft limit value.", + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {}, + }, + { + .key = {"soft-timeout"}, + .type = GF_OPTION_TYPE_TIME, + .min = 0, + .max = 1800, + .default_value = "60", + .description = "quota caches the directory sizes on client. " + "soft-timeout indicates the timeout for the validity of" + " cache before soft-limit has been crossed.", + .op_version = {3}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {}, + }, + { + .key = {"hard-timeout"}, + .type = GF_OPTION_TYPE_TIME, + .min = 0, + .max = 60, + .default_value = "5", + .description = "quota caches the directory sizes on client. " + "hard-timeout indicates the timeout for the validity of" + " cache after soft-limit has been crossed.", + .op_version = {3}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {}, + }, + {.key = {"volume-uuid"}, + .type = GF_OPTION_TYPE_STR, + .default_value = "{{ volume.id }}", + .description = "uuid of the volume this brick is part of."}, + { + .key = {"alert-time"}, + .type = GF_OPTION_TYPE_TIME, + .min = 0, + .max = 7 * 86400, + .default_value = "86400", + .op_version = {3}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .description = "Frequency of limit breach messages in log.", + .tags = {}, + }, + {.key = {NULL}}}; + +xlator_api_t xlator_api = { + .init = init, + .fini = fini, + .notify = notify, + .reconfigure = reconfigure, + .mem_acct_init = mem_acct_init, + .op_version = {1}, /* Present from the initial version */ + .fops = &fops, + .cbks = &cbks, + .options = options, + .identifier = "quota", + .category = GF_MAINTAINED, }; diff --git a/xlators/features/quota/src/quota.h b/xlators/features/quota/src/quota.h index 1d9d88adad4..0395d78c9ef 100644 --- a/xlators/features/quota/src/quota.h +++ b/xlators/features/quota/src/quota.h @@ -10,272 +10,257 @@ #ifndef _QUOTA_H #define _QUOTA_H -#include "xlator.h" -#include "call-stub.h" -#include "defaults.h" -#include "common-utils.h" +#include <glusterfs/call-stub.h> #include "quota-mem-types.h" -#include "glusterfs.h" -#include "compat.h" -#include "logging.h" -#include "dict.h" -#include "stack.h" -#include "event.h" -#include "globals.h" +#include <glusterfs/glusterfs.h> +#include <glusterfs/compat.h> +#include <glusterfs/logging.h> +#include <glusterfs/dict.h> +#include <glusterfs/gf-event.h> #include "rpcsvc.h" #include "rpc-clnt.h" -#include "byte-order.h" +#include <glusterfs/byte-order.h> #include "glusterfs3-xdr.h" #include "glusterfs3.h" #include "xdr-generic.h" -#include "compat-errno.h" +#include <glusterfs/compat-errno.h> #include "protocol-common.h" -#include "quota-common-utils.h" +#include <glusterfs/quota-common-utils.h> #include "quota-messages.h" -#define DIRTY "dirty" -#define SIZE "size" -#define CONTRIBUTION "contri" -#define VAL_LENGTH 8 -#define READDIR_BUF 4096 +#define DIRTY "dirty" +#define SIZE "size" +#define CONTRIBUTION "contri" +#define VAL_LENGTH 8 +#define READDIR_BUF 4096 #ifndef UUID_CANONICAL_FORM_LEN #define UUID_CANONICAL_FORM_LEN 36 #endif -#define WIND_IF_QUOTAOFF(is_quota_on, label) \ - if (!is_quota_on) \ - goto label; - -#define QUOTA_WIND_FOR_INTERNAL_FOP(xdata, label) \ - do { \ - if (xdata && dict_get (xdata, GLUSTERFS_INTERNAL_FOP_KEY)) \ - goto label; \ - } while (0) - -#define DID_REACH_LIMIT(lim, prev_size, cur_size) \ - ((cur_size) >= (lim) && (prev_size) < (lim)) - -#define QUOTA_SAFE_INCREMENT(lock, var) \ - do { \ - LOCK (lock); \ - var ++; \ - UNLOCK (lock); \ - } while (0) - -#define QUOTA_SAFE_DECREMENT(lock, var) \ - do { \ - LOCK (lock); \ - var --; \ - UNLOCK (lock); \ - } while (0) - -#define QUOTA_ALLOC_OR_GOTO(var, type, label) \ - do { \ - var = GF_CALLOC (sizeof (type), 1, \ - gf_quota_mt_##type); \ - if (!var) { \ - gf_msg ("", GF_LOG_ERROR, \ - ENOMEM, Q_MSG_ENOMEM, \ - "out of memory"); \ - ret = -1; \ - goto label; \ - } \ - } while (0); - -#define QUOTA_STACK_WIND_TAIL(frame, params...) \ - do { \ - quota_local_t *_local = NULL; \ - xlator_t *_this = NULL; \ - \ - if (frame) { \ - _local = frame->local; \ - _this = frame->this; \ - frame->local = NULL; \ - } \ - \ - STACK_WIND_TAIL (frame, params); \ - \ - if (_local) \ - quota_local_cleanup (_local); \ - } while (0) - -#define QUOTA_STACK_UNWIND(fop, frame, params...) \ - do { \ - quota_local_t *_local = NULL; \ - xlator_t *_this = NULL; \ - if (frame) { \ - _local = frame->local; \ - _this = frame->this; \ - frame->local = NULL; \ - } \ - STACK_UNWIND_STRICT (fop, frame, params); \ - quota_local_cleanup (_local); \ - } while (0) - -#define QUOTA_FREE_CONTRIBUTION_NODE(_contribution) \ - do { \ - list_del (&_contribution->contri_list); \ - GF_FREE (_contribution); \ - } while (0) - -#define GET_CONTRI_KEY(var, _vol_name, _gfid, _ret) \ - do { \ - char _gfid_unparsed[40]; \ - if (_gfid != NULL) { \ - gf_uuid_unparse (_gfid, _gfid_unparsed);\ - _ret = gf_asprintf (var, QUOTA_XATTR_PREFIX \ - "%s.%s." CONTRIBUTION, \ - _vol_name, _gfid_unparsed); \ - } else { \ - _ret = gf_asprintf (var, QUOTA_XATTR_PREFIX \ - "%s.." CONTRIBUTION, \ - _vol_name); \ - } \ - } while (0) - - -#define GET_CONTRI_KEY_OR_GOTO(var, _vol_name, _gfid, label) \ - do { \ - GET_CONTRI_KEY(var, _vol_name, _gfid, ret); \ - if (ret == -1) \ - goto label; \ - } while (0) - -#define GET_DIRTY_KEY_OR_GOTO(var, _vol_name, label) \ - do { \ - ret = gf_asprintf (var, QUOTA_XATTR_PREFIX \ - "%s." DIRTY, _vol_name); \ - if (ret == -1) \ - goto label; \ - } while (0) - -#define QUOTA_REG_OR_LNK_FILE(ia_type) \ - (IA_ISREG (ia_type) || IA_ISLNK (ia_type)) - - +#define WIND_IF_QUOTAOFF(is_quota_on, label) \ + if (!is_quota_on) \ + goto label; + +#define QUOTA_WIND_FOR_INTERNAL_FOP(xdata, label) \ + do { \ + if (xdata && dict_get_sizen(xdata, GLUSTERFS_INTERNAL_FOP_KEY)) \ + goto label; \ + } while (0) + +#define DID_REACH_LIMIT(lim, prev_size, cur_size) \ + ((cur_size) >= (lim) && (prev_size) < (lim)) + +#define QUOTA_SAFE_INCREMENT(lock, var) \ + do { \ + LOCK(lock); \ + var++; \ + UNLOCK(lock); \ + } while (0) + +#define QUOTA_SAFE_DECREMENT(lock, var) \ + do { \ + LOCK(lock); \ + var--; \ + UNLOCK(lock); \ + } while (0) + +#define QUOTA_ALLOC_OR_GOTO(var, type, label) \ + do { \ + var = GF_CALLOC(sizeof(type), 1, gf_quota_mt_##type); \ + if (!var) { \ + gf_msg("", GF_LOG_ERROR, ENOMEM, Q_MSG_ENOMEM, "out of memory"); \ + ret = -1; \ + goto label; \ + } \ + } while (0); + +#define QUOTA_STACK_WIND_TAIL(frame, params...) \ + do { \ + quota_local_t *_local = NULL; \ + \ + if (frame) { \ + _local = frame->local; \ + frame->local = NULL; \ + } \ + \ + STACK_WIND_TAIL(frame, params); \ + \ + if (_local) \ + quota_local_cleanup(_local); \ + } while (0) + +#define QUOTA_STACK_UNWIND(fop, frame, params...) \ + do { \ + quota_local_t *_local = NULL; \ + if (frame) { \ + _local = frame->local; \ + frame->local = NULL; \ + } \ + STACK_UNWIND_STRICT(fop, frame, params); \ + quota_local_cleanup(_local); \ + } while (0) + +#define QUOTA_FREE_CONTRIBUTION_NODE(_contribution) \ + do { \ + list_del(&_contribution->contri_list); \ + GF_FREE(_contribution); \ + } while (0) + +#define GET_CONTRI_KEY(var, _vol_name, _gfid, _ret) \ + do { \ + char _gfid_unparsed[40]; \ + if (_gfid != NULL) { \ + gf_uuid_unparse(_gfid, _gfid_unparsed); \ + _ret = gf_asprintf(var, QUOTA_XATTR_PREFIX "%s.%s." CONTRIBUTION, \ + _vol_name, _gfid_unparsed); \ + } else { \ + _ret = gf_asprintf(var, QUOTA_XATTR_PREFIX "%s.." CONTRIBUTION, \ + _vol_name); \ + } \ + } while (0) + +#define GET_CONTRI_KEY_OR_GOTO(var, _vol_name, _gfid, label) \ + do { \ + GET_CONTRI_KEY(var, _vol_name, _gfid, ret); \ + if (ret == -1) \ + goto label; \ + } while (0) + +#define GET_DIRTY_KEY_OR_GOTO(var, _vol_name, label) \ + do { \ + ret = gf_asprintf(var, QUOTA_XATTR_PREFIX "%s." DIRTY, _vol_name); \ + if (ret == -1) \ + goto label; \ + } while (0) + +#define QUOTA_REG_OR_LNK_FILE(ia_type) (IA_ISREG(ia_type) || IA_ISLNK(ia_type)) struct quota_dentry { - char *name; - uuid_t par; - struct list_head next; + char *name; + uuid_t par; + struct list_head next; }; typedef struct quota_dentry quota_dentry_t; struct quota_inode_ctx { - int64_t size; - int64_t hard_lim; - int64_t soft_lim; - int64_t file_count; - int64_t dir_count; - int64_t object_hard_lim; - int64_t object_soft_lim; - struct iatt buf; - struct list_head parents; - struct timeval tv; - struct timeval prev_log; - gf_boolean_t ancestry_built; - gf_lock_t lock; + int64_t size; + int64_t hard_lim; + int64_t soft_lim; + int64_t file_count; + int64_t dir_count; + int64_t object_hard_lim; + int64_t object_soft_lim; + struct iatt buf; + struct list_head parents; + time_t validate_time; + time_t prev_log_time; + gf_boolean_t ancestry_built; + gf_lock_t lock; }; typedef struct quota_inode_ctx quota_inode_ctx_t; -typedef void -(*quota_ancestry_built_t) (struct list_head *parents, inode_t *inode, - int32_t op_ret, int32_t op_errno, void *data); +typedef void (*quota_ancestry_built_t)(struct list_head *parents, + inode_t *inode, int32_t op_ret, + int32_t op_errno, void *data); -typedef void -(*quota_fop_continue_t) (call_frame_t *frame); +typedef void (*quota_fop_continue_t)(call_frame_t *frame); struct quota_local { - gf_lock_t lock; - uint32_t link_count; - loc_t loc; - loc_t oldloc; - loc_t newloc; - loc_t validate_loc; - int64_t delta; - int32_t op_ret; - int32_t op_errno; - int64_t size; - char just_validated; - fop_lookup_cbk_t validate_cbk; - quota_fop_continue_t fop_continue_cbk; - inode_t *inode; - uuid_t common_ancestor; /* Used by quota_rename */ - call_stub_t *stub; - struct iobref *iobref; - quota_limits_t limit; - quota_limits_t object_limit; - int64_t space_available; - quota_ancestry_built_t ancestry_cbk; - void *ancestry_data; - dict_t *xdata; - dict_t *validate_xdata; - int32_t quotad_conn_retry; - xlator_t *this; - call_frame_t *par_frame; + gf_lock_t lock; + uint32_t link_count; + loc_t loc; + loc_t oldloc; + loc_t newloc; + loc_t validate_loc; + int64_t delta; + int8_t object_delta; + int32_t op_ret; + int32_t op_errno; + int64_t size; + char just_validated; + fop_lookup_cbk_t validate_cbk; + quota_fop_continue_t fop_continue_cbk; + inode_t *inode; + uuid_t common_ancestor; /* Used by quota_rename */ + call_stub_t *stub; + struct iobref *iobref; + quota_limits_t limit; + quota_limits_t object_limit; + int64_t space_available; + quota_ancestry_built_t ancestry_cbk; + void *ancestry_data; + dict_t *xdata; + dict_t *validate_xdata; + int32_t quotad_conn_retry; + xlator_t *this; + call_frame_t *par_frame; }; -typedef struct quota_local quota_local_t; +typedef struct quota_local quota_local_t; struct quota_priv { - uint32_t soft_timeout; - uint32_t hard_timeout; - uint32_t log_timeout; - double default_soft_lim; - gf_boolean_t is_quota_on; - gf_boolean_t consider_statfs; - gf_lock_t lock; - rpc_clnt_prog_t *quota_enforcer; - struct rpcsvc_program *quotad_aggregator; - struct rpc_clnt *rpc_clnt; - rpcsvc_t *rpcsvc; - inode_table_t *itable; - char *volume_uuid; - uint64_t validation_count; - int32_t quotad_conn_status; + /* FIXME: consider time_t for timeouts. */ + uint32_t soft_timeout; + uint32_t hard_timeout; + uint32_t log_timeout; + double default_soft_lim; + gf_boolean_t is_quota_on; + gf_boolean_t consider_statfs; + gf_lock_t lock; + rpc_clnt_prog_t *quota_enforcer; + struct rpcsvc_program *quotad_aggregator; + struct rpc_clnt *rpc_clnt; + rpcsvc_t *rpcsvc; + inode_table_t *itable; + char *volume_uuid; + uint64_t validation_count; + int32_t quotad_conn_status; + pthread_mutex_t conn_mutex; + pthread_cond_t conn_cond; + gf_boolean_t conn_status; }; -typedef struct quota_priv quota_priv_t; +typedef struct quota_priv quota_priv_t; int -quota_enforcer_lookup (call_frame_t *frame, xlator_t *this, dict_t *xdata, - fop_lookup_cbk_t cbk); +quota_enforcer_lookup(call_frame_t *frame, xlator_t *this, dict_t *xdata, + fop_lookup_cbk_t cbk); void -_quota_enforcer_lookup (void *data); +_quota_enforcer_lookup(void *data); struct rpc_clnt * -quota_enforcer_init (xlator_t *this, dict_t *options); +quota_enforcer_init(xlator_t *this, dict_t *options); void -quota_log_usage (xlator_t *this, quota_inode_ctx_t *ctx, inode_t *inode, - int64_t delta); +quota_log_usage(xlator_t *this, quota_inode_ctx_t *ctx, inode_t *inode, + int64_t delta); int -quota_build_ancestry (inode_t *inode, quota_ancestry_built_t ancestry_cbk, - void *data); +quota_build_ancestry(inode_t *inode, quota_ancestry_built_t ancestry_cbk, + void *data); void -quota_get_limit_dir (call_frame_t *frame, inode_t *cur_inode, xlator_t *this); +quota_get_limit_dir(call_frame_t *frame, inode_t *cur_inode, xlator_t *this); int32_t -quota_check_limit (call_frame_t *frame, inode_t *inode, xlator_t *this); +quota_check_limit(call_frame_t *frame, inode_t *inode, xlator_t *this); inode_t * -do_quota_check_limit (call_frame_t *frame, inode_t *inode, xlator_t *this, - quota_dentry_t *dentry, gf_boolean_t force); +do_quota_check_limit(call_frame_t *frame, inode_t *inode, xlator_t *this, + quota_dentry_t *dentry, gf_boolean_t force); int -quota_fill_inodectx (xlator_t *this, inode_t *inode, dict_t *dict, - loc_t *loc, struct iatt *buf, int32_t *op_errno); +quota_fill_inodectx(xlator_t *this, inode_t *inode, dict_t *dict, loc_t *loc, + struct iatt *buf, int32_t *op_errno); int32_t -quota_check_size_limit (call_frame_t *frame, quota_inode_ctx_t *ctx, - quota_priv_t *priv, inode_t *_inode, xlator_t *this, - int32_t *op_errno, int just_validated, int64_t delta, - quota_local_t *local, gf_boolean_t *skip_check); +quota_check_size_limit(call_frame_t *frame, quota_inode_ctx_t *ctx, + quota_priv_t *priv, inode_t *_inode, xlator_t *this, + int32_t *op_errno, int just_validated, int64_t delta, + quota_local_t *local, gf_boolean_t *skip_check); int32_t -quota_check_object_limit (call_frame_t *frame, quota_inode_ctx_t *ctx, - quota_priv_t *priv, inode_t *_inode, xlator_t *this, - int32_t *op_errno, int just_validated, - quota_local_t *local, gf_boolean_t *skip_check); +quota_check_object_limit(call_frame_t *frame, quota_inode_ctx_t *ctx, + quota_priv_t *priv, inode_t *_inode, xlator_t *this, + int32_t *op_errno, int just_validated, + quota_local_t *local, gf_boolean_t *skip_check); #endif diff --git a/xlators/features/quota/src/quotad-aggregator.c b/xlators/features/quota/src/quotad-aggregator.c index 8a7cfdca3f5..75d47867b5b 100644 --- a/xlators/features/quota/src/quotad-aggregator.c +++ b/xlators/features/quota/src/quotad-aggregator.c @@ -13,443 +13,482 @@ #include "quotad-helpers.h" #include "quotad-aggregator.h" -struct rpcsvc_program quotad_aggregator_prog; +static char *qd_ext_xattrs[] = { + QUOTA_SIZE_KEY, + QUOTA_LIMIT_KEY, + QUOTA_LIMIT_OBJECTS_KEY, + NULL, +}; + +static struct rpcsvc_program quotad_aggregator_prog; struct iobuf * -quotad_serialize_reply (rpcsvc_request_t *req, void *arg, struct iovec *outmsg, - xdrproc_t xdrproc) +quotad_serialize_reply(rpcsvc_request_t *req, void *arg, struct iovec *outmsg, + xdrproc_t xdrproc) { - struct iobuf *iob = NULL; - ssize_t retlen = 0; - ssize_t xdr_size = 0; - - GF_VALIDATE_OR_GOTO ("server", req, ret); - - /* First, get the io buffer into which the reply in arg will - * be serialized. + struct iobuf *iob = NULL; + ssize_t retlen = 0; + ssize_t xdr_size = 0; + + GF_VALIDATE_OR_GOTO("server", req, ret); + + /* First, get the io buffer into which the reply in arg will + * be serialized. + */ + if (arg && xdrproc) { + xdr_size = xdr_sizeof(xdrproc, arg); + iob = iobuf_get2(req->svc->ctx->iobuf_pool, xdr_size); + if (!iob) { + gf_log_callingfn(THIS->name, GF_LOG_ERROR, "Failed to get iobuf"); + goto ret; + }; + + iobuf_to_iovec(iob, outmsg); + /* Use the given serializer to translate the given C structure + * in arg to XDR format which will be written into the buffer + * in outmsg. + */ + /* retlen is used to received the error since size_t is unsigned and we + * need -1 for error notification during encoding. */ - if (arg && xdrproc) { - xdr_size = xdr_sizeof (xdrproc, arg); - iob = iobuf_get2 (req->svc->ctx->iobuf_pool, xdr_size); - if (!iob) { - gf_log_callingfn (THIS->name, GF_LOG_ERROR, - "Failed to get iobuf"); - goto ret; - }; - - iobuf_to_iovec (iob, outmsg); - /* Use the given serializer to translate the give C structure in arg - * to XDR format which will be written into the buffer in outmsg. - */ - /* retlen is used to received the error since size_t is unsigned and we - * need -1 for error notification during encoding. - */ - - retlen = xdr_serialize_generic (*outmsg, arg, xdrproc); - if (retlen == -1) { - /* Failed to Encode 'GlusterFS' msg in RPC is not exactly - failure of RPC return values.. client should get - notified about this, so there are no missing frames */ - gf_log_callingfn ("", GF_LOG_ERROR, "Failed to encode message"); - req->rpc_err = GARBAGE_ARGS; - retlen = 0; - } + + retlen = xdr_serialize_generic(*outmsg, arg, xdrproc); + if (retlen == -1) { + /* Failed to Encode 'GlusterFS' msg in RPC is not exactly + failure of RPC return values.. Client should get + notified about this, so there are no missing frames */ + gf_log_callingfn("", GF_LOG_ERROR, "Failed to encode message"); + req->rpc_err = GARBAGE_ARGS; + retlen = 0; } - outmsg->iov_len = retlen; + } + outmsg->iov_len = retlen; ret: - return iob; + return iob; } int -quotad_aggregator_submit_reply (call_frame_t *frame, rpcsvc_request_t *req, - void *arg, struct iovec *payload, - int payloadcount, struct iobref *iobref, - xdrproc_t xdrproc) +quotad_aggregator_submit_reply(call_frame_t *frame, rpcsvc_request_t *req, + void *arg, struct iovec *payload, + int payloadcount, struct iobref *iobref, + xdrproc_t xdrproc) { - struct iobuf *iob = NULL; - int ret = -1; - struct iovec rsp = {0,}; - quotad_aggregator_state_t *state = NULL; - char new_iobref = 0; + struct iobuf *iob = NULL; + int ret = -1; + struct iovec rsp = { + 0, + }; + quotad_aggregator_state_t *state = NULL; + char new_iobref = 0; - GF_VALIDATE_OR_GOTO ("server", req, ret); + GF_VALIDATE_OR_GOTO("server", req, ret); - if (frame) { - state = frame->root->state; - frame->local = NULL; - } + if (frame) { + state = frame->root->state; + frame->local = NULL; + } + if (!iobref) { + iobref = iobref_new(); if (!iobref) { - iobref = iobref_new (); - if (!iobref) { - goto ret; - } - - new_iobref = 1; + goto ret; } - iob = quotad_serialize_reply (req, arg, &rsp, xdrproc); - if (!iob) { - gf_msg ("", GF_LOG_ERROR, 0, Q_MSG_DICT_SERIALIZE_FAIL, - "Failed to serialize reply"); - goto ret; - } + new_iobref = 1; + } + + iob = quotad_serialize_reply(req, arg, &rsp, xdrproc); + if (!iob) { + gf_msg("", GF_LOG_ERROR, 0, Q_MSG_DICT_SERIALIZE_FAIL, + "Failed to serialize reply"); + goto ret; + } - iobref_add (iobref, iob); + iobref_add(iobref, iob); - ret = rpcsvc_submit_generic (req, &rsp, 1, payload, payloadcount, - iobref); + ret = rpcsvc_submit_generic(req, &rsp, 1, payload, payloadcount, iobref); - iobuf_unref (iob); + iobuf_unref(iob); - ret = 0; + ret = 0; ret: - if (state) { - quotad_aggregator_free_state (state); - } + if (state) { + quotad_aggregator_free_state(state); + } - if (frame) - STACK_DESTROY (frame->root); + if (frame) + STACK_DESTROY(frame->root); - if (new_iobref) { - iobref_unref (iobref); - } + if (new_iobref) { + iobref_unref(iobref); + } - return ret; + return ret; } int -quotad_aggregator_getlimit_cbk (xlator_t *this, call_frame_t *frame, - void *lookup_rsp) +quotad_aggregator_getlimit_cbk(xlator_t *this, call_frame_t *frame, + void *lookup_rsp) { - gfs3_lookup_rsp *rsp = lookup_rsp; - gf_cli_rsp cli_rsp = {0,}; - dict_t *xdata = NULL; - quotad_aggregator_state_t *state = NULL; - int ret = -1; - int type = 0; - - GF_PROTOCOL_DICT_UNSERIALIZE (frame->this, xdata, - (rsp->xdata.xdata_val), - (rsp->xdata.xdata_len), rsp->op_ret, - rsp->op_errno, out); - - if (xdata) { - state = frame->root->state; - ret = dict_get_int32 (state->xdata, "type", &type); - if (ret < 0) - goto out; - - ret = dict_set_int32 (xdata, "type", type); - if (ret < 0) - goto out; - } + gfs3_lookup_rsp *rsp = lookup_rsp; + gf_cli_rsp cli_rsp = { + 0, + }; + dict_t *xdata = NULL; + quotad_aggregator_state_t *state = NULL; + int ret = -1; + int type = 0; + + if (!rsp || (rsp->op_ret == -1)) + goto reply; + + GF_PROTOCOL_DICT_UNSERIALIZE(frame->this, xdata, (rsp->xdata.xdata_val), + (rsp->xdata.xdata_len), rsp->op_ret, + rsp->op_errno, out); + + if (xdata) { + state = frame->root->state; + ret = dict_get_int32n(state->req_xdata, "type", SLEN("type"), &type); + if (ret < 0) + goto out; + + ret = dict_set_int32_sizen(xdata, "type", type); + if (ret < 0) + goto out; + } - ret = 0; + ret = 0; out: - rsp->op_ret = ret; - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - Q_MSG_DICT_UNSERIALIZE_FAIL, - "failed to unserialize " - "nameless lookup rsp"); - goto reply; - } - cli_rsp.op_ret = rsp->op_ret; - cli_rsp.op_errno = rsp->op_errno; - cli_rsp.op_errstr = ""; - if (xdata) { - GF_PROTOCOL_DICT_SERIALIZE (frame->this, xdata, - (&cli_rsp.dict.dict_val), - (cli_rsp.dict.dict_len), - cli_rsp.op_errno, reply); - } + rsp->op_ret = ret; + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, Q_MSG_DICT_UNSERIALIZE_FAIL, + "failed to unserialize " + "nameless lookup rsp"); + goto reply; + } + cli_rsp.op_ret = rsp->op_ret; + cli_rsp.op_errno = rsp->op_errno; + cli_rsp.op_errstr = ""; + if (xdata) { + GF_PROTOCOL_DICT_SERIALIZE(frame->this, xdata, (&cli_rsp.dict.dict_val), + (cli_rsp.dict.dict_len), cli_rsp.op_errno, + reply); + } reply: - quotad_aggregator_submit_reply (frame, frame->local, (void*)&cli_rsp, NULL, 0, - NULL, (xdrproc_t)xdr_gf_cli_rsp); + quotad_aggregator_submit_reply(frame, (frame) ? frame->local : NULL, + (void *)&cli_rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gf_cli_rsp); - dict_unref (xdata); - GF_FREE (cli_rsp.dict.dict_val); - return 0; + dict_unref(xdata); + GF_FREE(cli_rsp.dict.dict_val); + return 0; } int -quotad_aggregator_getlimit (rpcsvc_request_t *req) +quotad_aggregator_getlimit(rpcsvc_request_t *req) { - call_frame_t *frame = NULL; - gf_cli_req cli_req = {{0}, }; - gf_cli_rsp cli_rsp = {0}; - gfs3_lookup_req args = {{0,},}; - gfs3_lookup_rsp rsp = {0,}; - quotad_aggregator_state_t *state = NULL; - xlator_t *this = NULL; - dict_t *dict = NULL; - int ret = -1, op_errno = 0; - char *gfid_str = NULL; - uuid_t gfid = {0}; - - GF_VALIDATE_OR_GOTO ("quotad-aggregator", req, err); - - this = THIS; - - ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); - if (ret < 0) { - //failed to decode msg; - gf_msg ("this->name", GF_LOG_ERROR, 0, Q_MSG_XDR_DECODE_ERROR, - "xdr decoding error"); - req->rpc_err = GARBAGE_ARGS; - goto err; - } - - if (cli_req.dict.dict_len) { - dict = dict_new (); - ret = dict_unserialize (cli_req.dict.dict_val, - cli_req.dict.dict_len, &dict); - if (ret < 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - Q_MSG_DICT_UNSERIALIZE_FAIL, - "Failed to unserialize req-buffer to " - "dictionary"); - goto err; - } - } - - ret = dict_get_str (dict, "gfid", &gfid_str); - if (ret) { - goto err; - } - - gf_uuid_parse ((const char*)gfid_str, gfid); - - frame = quotad_aggregator_get_frame_from_req (req); - if (frame == NULL) { - rsp.op_errno = ENOMEM; - goto err; - } - state = frame->root->state; - state->xdata = dict; - - ret = dict_set_int32 (state->xdata, QUOTA_LIMIT_KEY, 42); - if (ret) - goto err; - - ret = dict_set_int32 (state->xdata, QUOTA_LIMIT_OBJECTS_KEY, 42); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, Q_MSG_ENOMEM, - "Failed to set QUOTA_LIMIT_OBJECTS_KEY"); - goto err; - } - - ret = dict_set_int32 (state->xdata, QUOTA_SIZE_KEY, 42); - if (ret) - goto err; - - ret = dict_set_int32 (state->xdata, GET_ANCESTRY_PATH_KEY, 42); - if (ret) - goto err; - - memcpy (&args.gfid, &gfid, 16); - - args.bname = alloca (req->msg[0].iov_len); - args.xdata.xdata_val = alloca (req->msg[0].iov_len); - - ret = qd_nameless_lookup (this, frame, &args, state->xdata, - quotad_aggregator_getlimit_cbk); - if (ret) { - rsp.op_errno = ret; - goto err; + call_frame_t *frame = NULL; + gf_cli_req cli_req = { + {0}, + }; + gf_cli_rsp cli_rsp = {0}; + quotad_aggregator_state_t *state = NULL; + xlator_t *this = NULL; + dict_t *dict = NULL; + int ret = -1, op_errno = 0; + char *gfid_str = NULL; + uuid_t gfid = {0}; + char *volume_uuid = NULL; + + GF_VALIDATE_OR_GOTO("quotad-aggregator", req, err); + + this = THIS; + + cli_req.dict.dict_val = alloca(req->msg[0].iov_len); + + ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + // failed to decode msg; + gf_msg("this->name", GF_LOG_ERROR, 0, Q_MSG_XDR_DECODE_ERROR, + "xdr decoding error"); + req->rpc_err = GARBAGE_ARGS; + goto err; + } + + if (cli_req.dict.dict_len) { + dict = dict_new(); + ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, Q_MSG_DICT_UNSERIALIZE_FAIL, + "Failed to unserialize req-buffer to " + "dictionary"); + goto err; } - - return ret; + } + + ret = dict_get_strn(dict, "gfid", SLEN("gfid"), &gfid_str); + if (ret) { + goto err; + } + + ret = dict_get_strn(dict, "volume-uuid", SLEN("volume-uuid"), &volume_uuid); + if (ret) { + goto err; + } + + gf_uuid_parse((const char *)gfid_str, gfid); + + frame = quotad_aggregator_get_frame_from_req(req); + if (frame == NULL) { + cli_rsp.op_errno = ENOMEM; + goto errx; + } + state = frame->root->state; + state->req_xdata = dict; + state->xdata = dict_new(); + dict = NULL; + + ret = dict_set_int32_sizen(state->xdata, QUOTA_LIMIT_KEY, 42); + if (ret) + goto err; + + ret = dict_set_int32_sizen(state->xdata, QUOTA_LIMIT_OBJECTS_KEY, 42); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, Q_MSG_ENOMEM, + "Failed to set QUOTA_LIMIT_OBJECTS_KEY"); + goto err; + } + + ret = dict_set_int32_sizen(state->xdata, QUOTA_SIZE_KEY, 42); + if (ret) + goto err; + + ret = dict_set_int32_sizen(state->xdata, GET_ANCESTRY_PATH_KEY, 42); + if (ret) + goto err; + + ret = qd_nameless_lookup(this, frame, (char *)gfid, state->xdata, + volume_uuid, quotad_aggregator_getlimit_cbk); + if (ret) { + cli_rsp.op_errno = ret; + goto errx; + } + + return ret; err: - cli_rsp.op_ret = -1; - cli_rsp.op_errno = op_errno; - cli_rsp.op_errstr = ""; - - quotad_aggregator_getlimit_cbk (this, frame, &cli_rsp); - if (dict) - dict_unref (dict); - - return ret; + cli_rsp.op_errno = op_errno; +errx: + cli_rsp.op_ret = -1; + cli_rsp.op_errstr = ""; + + quotad_aggregator_getlimit_cbk(this, frame, &cli_rsp); + if (dict) + dict_unref(dict); + return ret; } int -quotad_aggregator_lookup_cbk (xlator_t *this, call_frame_t *frame, - void *rsp) +quotad_aggregator_lookup_cbk(xlator_t *this, call_frame_t *frame, void *rsp) { - quotad_aggregator_submit_reply (frame, frame->local, rsp, NULL, 0, NULL, - (xdrproc_t)xdr_gfs3_lookup_rsp); + quotad_aggregator_submit_reply(frame, frame ? frame->local : NULL, rsp, + NULL, 0, NULL, + (xdrproc_t)xdr_gfs3_lookup_rsp); - return 0; + return 0; } - int -quotad_aggregator_lookup (rpcsvc_request_t *req) +quotad_aggregator_lookup(rpcsvc_request_t *req) { - call_frame_t *frame = NULL; - gfs3_lookup_req args = {{0,},}; - int ret = -1, op_errno = 0; - gfs3_lookup_rsp rsp = {0,}; - quotad_aggregator_state_t *state = NULL; - xlator_t *this = NULL; - - GF_VALIDATE_OR_GOTO ("quotad-aggregator", req, err); - - this = THIS; - - args.bname = alloca (req->msg[0].iov_len); - args.xdata.xdata_val = alloca (req->msg[0].iov_len); - - ret = xdr_to_generic (req->msg[0], &args, - (xdrproc_t)xdr_gfs3_lookup_req); - if (ret < 0) { - rsp.op_errno = EINVAL; + call_frame_t *frame = NULL; + gfs3_lookup_req args = { + { + 0, + }, + }; + int i = 0, ret = -1, op_errno = 0; + gfs3_lookup_rsp rsp = { + 0, + }; + quotad_aggregator_state_t *state = NULL; + xlator_t *this = NULL; + dict_t *dict = NULL; + char *volume_uuid = NULL; + + GF_VALIDATE_OR_GOTO("quotad-aggregator", req, err); + + this = THIS; + + args.bname = alloca(req->msg[0].iov_len); + args.xdata.xdata_val = alloca(req->msg[0].iov_len); + + ret = xdr_to_generic(req->msg[0], &args, (xdrproc_t)xdr_gfs3_lookup_req); + if (ret < 0) { + rsp.op_errno = EINVAL; + goto err; + } + + frame = quotad_aggregator_get_frame_from_req(req); + if (frame == NULL) { + rsp.op_errno = ENOMEM; + goto err; + } + + state = frame->root->state; + + GF_PROTOCOL_DICT_UNSERIALIZE(this, dict, (args.xdata.xdata_val), + (args.xdata.xdata_len), ret, op_errno, err); + + ret = dict_get_str(dict, "volume-uuid", &volume_uuid); + if (ret) { + goto err; + } + + state->xdata = dict_new(); + + for (i = 0; qd_ext_xattrs[i]; i++) { + if (dict_get(dict, qd_ext_xattrs[i])) { + ret = dict_set_uint32(state->xdata, qd_ext_xattrs[i], 1); + if (ret < 0) goto err; } + } - frame = quotad_aggregator_get_frame_from_req (req); - if (frame == NULL) { - rsp.op_errno = ENOMEM; - goto err; - } - - state = frame->root->state; - - GF_PROTOCOL_DICT_UNSERIALIZE (this, state->xdata, - (args.xdata.xdata_val), - (args.xdata.xdata_len), ret, - op_errno, err); + ret = qd_nameless_lookup(this, frame, args.gfid, state->xdata, volume_uuid, + quotad_aggregator_lookup_cbk); + if (ret) { + rsp.op_errno = ret; + goto err; + } + if (dict) + dict_unref(dict); - ret = qd_nameless_lookup (this, frame, &args, state->xdata, - quotad_aggregator_lookup_cbk); - if (ret) { - rsp.op_errno = ret; - goto err; - } - - return ret; + return ret; err: - rsp.op_ret = -1; - rsp.op_errno = op_errno; + rsp.op_ret = -1; + rsp.op_errno = op_errno; + + quotad_aggregator_lookup_cbk(this, frame, &rsp); + if (dict) + dict_unref(dict); - quotad_aggregator_lookup_cbk (this, frame, &rsp); - return ret; + return ret; } int -quotad_aggregator_rpc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event, - void *data) +quotad_aggregator_rpc_notify(rpcsvc_t *rpc, void *xl, rpcsvc_event_t event, + void *data) { - if (!xl || !data) { - gf_log_callingfn ("server", GF_LOG_WARNING, - "Calling rpc_notify without initializing"); - goto out; - } + if (!xl || !data) { + gf_log_callingfn("server", GF_LOG_WARNING, + "Calling rpc_notify without initializing"); + goto out; + } - switch (event) { + switch (event) { case RPCSVC_EVENT_ACCEPT: - break; + break; case RPCSVC_EVENT_DISCONNECT: - break; + break; default: - break; - } + break; + } out: - return 0; + return 0; } int -quotad_aggregator_init (xlator_t *this) +quotad_aggregator_init(xlator_t *this) { - quota_priv_t *priv = NULL; - int ret = -1; - - priv = this->private; - - if (priv->rpcsvc) { - /* Listener already created */ - return 0; - } - - ret = dict_set_str (this->options, "transport.address-family", "unix"); - if (ret) - goto out; - - ret = dict_set_str (this->options, "transport-type", "socket"); - if (ret) - goto out; - - ret = dict_set_str (this->options, "transport.socket.listen-path", - "/var/run/gluster/quotad.socket"); - if (ret) - goto out; - - /* RPC related */ - priv->rpcsvc = rpcsvc_init (this, this->ctx, this->options, 0); - if (priv->rpcsvc == NULL) { - gf_msg (this->name, GF_LOG_WARNING, 0, - Q_MSG_RPCSVC_INIT_FAILED, - "creation of rpcsvc failed"); - ret = -1; - goto out; - } - - ret = rpcsvc_create_listeners (priv->rpcsvc, this->options, - this->name); - if (ret < 1) { - gf_msg (this->name, GF_LOG_WARNING, 0, - Q_MSG_RPCSVC_LISTENER_CREATION_FAILED, - "creation of listener failed"); - ret = -1; - goto out; - } + quota_priv_t *priv = NULL; + int ret = -1; - priv->quotad_aggregator = "ad_aggregator_prog; - quotad_aggregator_prog.options = this->options; - - ret = rpcsvc_program_register (priv->rpcsvc, "ad_aggregator_prog); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, 0, - Q_MSG_RPCSVC_REGISTER_FAILED, - "registration of program (name:%s, prognum:%d, " - "progver:%d) failed", quotad_aggregator_prog.progname, - quotad_aggregator_prog.prognum, - quotad_aggregator_prog.progver); - goto out; - } + priv = this->private; - ret = 0; + if (priv->rpcsvc) { + /* Listener already created */ + return 0; + } + + ret = dict_set_nstrn(this->options, "transport.address-family", + SLEN("transport.address-family"), "unix", + SLEN("unix")); + if (ret) + goto out; + + ret = dict_set_nstrn(this->options, "transport-type", + SLEN("transport-type"), "socket", SLEN("socket")); + if (ret) + goto out; + + ret = dict_set_nstrn(this->options, "transport.socket.listen-path", + SLEN("transport.socket.listen-path"), + "/var/run/gluster/quotad.socket", + SLEN("/var/run/gluster/quotad.socket")); + if (ret) + goto out; + + /* RPC related */ + priv->rpcsvc = rpcsvc_init(this, this->ctx, this->options, 0); + if (priv->rpcsvc == NULL) { + gf_msg(this->name, GF_LOG_WARNING, 0, Q_MSG_RPCSVC_INIT_FAILED, + "creation of rpcsvc failed"); + ret = -1; + goto out; + } + + ret = rpcsvc_create_listeners(priv->rpcsvc, this->options, this->name); + if (ret < 1) { + gf_msg(this->name, GF_LOG_WARNING, 0, + Q_MSG_RPCSVC_LISTENER_CREATION_FAILED, + "creation of listener failed"); + ret = -1; + goto out; + } + + priv->quotad_aggregator = "ad_aggregator_prog; + quotad_aggregator_prog.options = this->options; + + ret = rpcsvc_program_register(priv->rpcsvc, "ad_aggregator_prog, + _gf_false); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, Q_MSG_RPCSVC_REGISTER_FAILED, + "registration of program (name:%s, prognum:%d, " + "progver:%d) failed", + quotad_aggregator_prog.progname, quotad_aggregator_prog.prognum, + quotad_aggregator_prog.progver); + goto out; + } + + ret = 0; out: - if (ret && priv->rpcsvc) { - GF_FREE (priv->rpcsvc); - priv->rpcsvc = NULL; - } + if (ret && priv->rpcsvc) { + GF_FREE(priv->rpcsvc); + priv->rpcsvc = NULL; + } - return ret; + return ret; } -rpcsvc_actor_t quotad_aggregator_actors[GF_AGGREGATOR_MAXVALUE] = { - [GF_AGGREGATOR_NULL] = {"NULL", GF_AGGREGATOR_NULL, NULL, NULL, 0, - DRC_NA}, - [GF_AGGREGATOR_LOOKUP] = {"LOOKUP", GF_AGGREGATOR_NULL, - quotad_aggregator_lookup, NULL, 0, DRC_NA}, - [GF_AGGREGATOR_GETLIMIT] = {"GETLIMIT", GF_AGGREGATOR_GETLIMIT, - quotad_aggregator_getlimit, NULL, 0, DRC_NA}, +static rpcsvc_actor_t quotad_aggregator_actors[GF_AGGREGATOR_MAXVALUE] = { + [GF_AGGREGATOR_NULL] = {"NULL", NULL, NULL, GF_AGGREGATOR_NULL, DRC_NA, 0}, + [GF_AGGREGATOR_LOOKUP] = {"LOOKUP", quotad_aggregator_lookup, NULL, + GF_AGGREGATOR_NULL, DRC_NA, 0}, + [GF_AGGREGATOR_GETLIMIT] = {"GETLIMIT", quotad_aggregator_getlimit, NULL, + GF_AGGREGATOR_GETLIMIT, DRC_NA, 0}, }; - -struct rpcsvc_program quotad_aggregator_prog = { - .progname = "GlusterFS 3.3", - .prognum = GLUSTER_AGGREGATOR_PROGRAM, - .progver = GLUSTER_AGGREGATOR_VERSION, - .numactors = GF_AGGREGATOR_MAXVALUE, - .actors = quotad_aggregator_actors -}; +static struct rpcsvc_program quotad_aggregator_prog = { + .progname = "GlusterFS 3.3", + .prognum = GLUSTER_AGGREGATOR_PROGRAM, + .progver = GLUSTER_AGGREGATOR_VERSION, + .numactors = GF_AGGREGATOR_MAXVALUE, + .actors = quotad_aggregator_actors}; diff --git a/xlators/features/quota/src/quotad-aggregator.h b/xlators/features/quota/src/quotad-aggregator.h index 5ddea5b3c46..706592c7d50 100644 --- a/xlators/features/quota/src/quotad-aggregator.h +++ b/xlators/features/quota/src/quotad-aggregator.h @@ -12,26 +12,27 @@ #define _QUOTAD_AGGREGATOR_H #include "quota.h" -#include "stack.h" +#include <glusterfs/stack.h> #include "glusterfs3-xdr.h" -#include "inode.h" +#include <glusterfs/inode.h> typedef struct { - void *pool; - xlator_t *this; - xlator_t *active_subvol; - inode_table_t *itable; - loc_t loc; - dict_t *xdata; + void *pool; + xlator_t *this; + xlator_t *active_subvol; + inode_table_t *itable; + loc_t loc; + dict_t *xdata; + dict_t *req_xdata; } quotad_aggregator_state_t; -typedef int (*quotad_aggregator_lookup_cbk_t) (xlator_t *this, - call_frame_t *frame, - void *rsp); +typedef int (*quotad_aggregator_lookup_cbk_t)(xlator_t *this, + call_frame_t *frame, void *rsp); int -qd_nameless_lookup (xlator_t *this, call_frame_t *frame, gfs3_lookup_req *req, - dict_t *xdata, quotad_aggregator_lookup_cbk_t lookup_cbk); +qd_nameless_lookup(xlator_t *this, call_frame_t *frame, char *gfid, + dict_t *xdata, char *volume_uuid, + quotad_aggregator_lookup_cbk_t lookup_cbk); int -quotad_aggregator_init (xlator_t *this); +quotad_aggregator_init(xlator_t *this); #endif diff --git a/xlators/features/quota/src/quotad-helpers.c b/xlators/features/quota/src/quotad-helpers.c index 70298fc87f5..51ff1d7e98d 100644 --- a/xlators/features/quota/src/quotad-helpers.c +++ b/xlators/features/quota/src/quotad-helpers.c @@ -11,97 +11,97 @@ #include "quotad-helpers.h" quotad_aggregator_state_t * -get_quotad_aggregator_state (xlator_t *this, rpcsvc_request_t *req) +get_quotad_aggregator_state(xlator_t *this, rpcsvc_request_t *req) { - quotad_aggregator_state_t *state = NULL; - xlator_t *active_subvol = NULL; - quota_priv_t *priv = NULL; + quotad_aggregator_state_t *state = NULL; + xlator_t *active_subvol = NULL; + quota_priv_t *priv = NULL; - state = (void *)GF_CALLOC (1, sizeof (*state), - gf_quota_mt_aggregator_state_t); - if (!state) - return NULL; + state = (void *)GF_CALLOC(1, sizeof(*state), + gf_quota_mt_aggregator_state_t); + if (!state) + return NULL; - state->this = THIS; - priv = this->private; + state->this = THIS; + priv = this->private; - LOCK (&priv->lock); - { - active_subvol = state->active_subvol = FIRST_CHILD (this); - } - UNLOCK (&priv->lock); + LOCK(&priv->lock); + { + active_subvol = state->active_subvol = FIRST_CHILD(this); + } + UNLOCK(&priv->lock); - if (active_subvol->itable == NULL) - active_subvol->itable = inode_table_new (4096, active_subvol); + if (active_subvol->itable == NULL) + active_subvol->itable = inode_table_new(4096, active_subvol); - state->itable = active_subvol->itable; + state->itable = active_subvol->itable; - state->pool = this->ctx->pool; + state->pool = this->ctx->pool; - return state; + return state; } void -quotad_aggregator_free_state (quotad_aggregator_state_t *state) +quotad_aggregator_free_state(quotad_aggregator_state_t *state) { - if (state->xdata) - dict_unref (state->xdata); + if (state->xdata) + dict_unref(state->xdata); - GF_FREE (state); + if (state->req_xdata) + dict_unref(state->req_xdata); + + GF_FREE(state); } call_frame_t * -quotad_aggregator_alloc_frame (rpcsvc_request_t *req) +quotad_aggregator_alloc_frame(rpcsvc_request_t *req) { - call_frame_t *frame = NULL; - quotad_aggregator_state_t *state = NULL; - xlator_t *this = NULL; + call_frame_t *frame = NULL; + quotad_aggregator_state_t *state = NULL; + xlator_t *this = NULL; - GF_VALIDATE_OR_GOTO ("server", req, out); - GF_VALIDATE_OR_GOTO ("server", req->trans, out); - GF_VALIDATE_OR_GOTO ("server", req->svc, out); - GF_VALIDATE_OR_GOTO ("server", req->svc->ctx, out); + GF_VALIDATE_OR_GOTO("server", req, out); + GF_VALIDATE_OR_GOTO("server", req->trans, out); + GF_VALIDATE_OR_GOTO("server", req->svc, out); + GF_VALIDATE_OR_GOTO("server", req->svc->ctx, out); - this = req->svc->xl; + this = req->svc->xl; - frame = create_frame (this, req->svc->ctx->pool); - if (!frame) - goto out; + frame = create_frame(this, req->svc->ctx->pool); + if (!frame) + goto out; - state = get_quotad_aggregator_state (this, req); - if (!state) - goto out; + state = get_quotad_aggregator_state(this, req); + if (!state) + goto out; - frame->root->state = state; - frame->root->unique = 0; + frame->root->state = state; - frame->this = this; + frame->this = this; out: - return frame; + return frame; } call_frame_t * -quotad_aggregator_get_frame_from_req (rpcsvc_request_t *req) +quotad_aggregator_get_frame_from_req(rpcsvc_request_t *req) { - call_frame_t *frame = NULL; - - GF_VALIDATE_OR_GOTO ("server", req, out); + call_frame_t *frame = NULL; - frame = quotad_aggregator_alloc_frame (req); - if (!frame) - goto out; + GF_VALIDATE_OR_GOTO("server", req, out); - frame->root->op = req->procnum; + frame = quotad_aggregator_alloc_frame(req); + if (!frame) + goto out; - frame->root->unique = req->xid; + frame->root->op = req->procnum; - frame->root->uid = req->uid; - frame->root->gid = req->gid; - frame->root->pid = req->pid; + frame->root->uid = req->uid; + frame->root->gid = req->gid; + frame->root->pid = req->pid; - frame->root->lk_owner = req->lk_owner; + frame->root->lk_owner = req->lk_owner; - frame->local = req; + frame->local = req; out: - return frame; + return frame; } diff --git a/xlators/features/quota/src/quotad-helpers.h b/xlators/features/quota/src/quotad-helpers.h index a10fb7fa82a..bcb39fe845e 100644 --- a/xlators/features/quota/src/quotad-helpers.h +++ b/xlators/features/quota/src/quotad-helpers.h @@ -16,9 +16,9 @@ #include "quotad-aggregator.h" void -quotad_aggregator_free_state (quotad_aggregator_state_t *state); +quotad_aggregator_free_state(quotad_aggregator_state_t *state); call_frame_t * -quotad_aggregator_get_frame_from_req (rpcsvc_request_t *req); +quotad_aggregator_get_frame_from_req(rpcsvc_request_t *req); #endif diff --git a/xlators/features/quota/src/quotad.c b/xlators/features/quota/src/quotad.c index 028c8047141..643f25c9c2a 100644 --- a/xlators/features/quota/src/quotad.c +++ b/xlators/features/quota/src/quotad.c @@ -9,226 +9,237 @@ */ #include "quota.h" #include "quotad-aggregator.h" -#include "common-utils.h" int -qd_notify (xlator_t *this, int32_t event, void *data, ...) +qd_notify(xlator_t *this, int32_t event, void *data, ...) { - switch (event) { + switch (event) { case GF_EVENT_PARENT_UP: - quotad_aggregator_init (this); - } + quotad_aggregator_init(this); + } - default_notify (this, event, data); - return 0; + default_notify(this, event, data); + return 0; } int32_t -mem_acct_init (xlator_t *this) +mem_acct_init(xlator_t *this) { - int ret = -1; - - if (!this) - return ret; + int ret = -1; - ret = xlator_mem_acct_init (this, gf_quota_mt_end + 1); + if (!this) + return ret; - if (0 != ret) { - gf_log (this->name, GF_LOG_WARNING, "Memory accounting " - "init failed"); - return ret; - } + ret = xlator_mem_acct_init(this, gf_quota_mt_end + 1); + if (0 != ret) { + gf_log(this->name, GF_LOG_WARNING, + "Memory accounting " + "init failed"); return ret; + } + + return ret; } int32_t -qd_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *xdata, struct iatt *postparent) +qd_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, inode_t *inode, struct iatt *buf, dict_t *xdata, + struct iatt *postparent) { - quotad_aggregator_lookup_cbk_t lookup_cbk = NULL; - gfs3_lookup_rsp rsp = {0, }; + quotad_aggregator_lookup_cbk_t lookup_cbk = NULL; + gfs3_lookup_rsp rsp = { + 0, + }; - lookup_cbk = cookie; + lookup_cbk = cookie; - rsp.op_ret = op_ret; - rsp.op_errno = op_errno; + rsp.op_ret = op_ret; + rsp.op_errno = op_errno; - gf_stat_from_iatt (&rsp.postparent, postparent); + gf_stat_from_iatt(&rsp.postparent, postparent); - GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&rsp.xdata.xdata_val), - rsp.xdata.xdata_len, rsp.op_errno, out); + GF_PROTOCOL_DICT_SERIALIZE(this, xdata, (&rsp.xdata.xdata_val), + rsp.xdata.xdata_len, rsp.op_errno, out); - gf_stat_from_iatt (&rsp.stat, buf); + gf_stat_from_iatt(&rsp.stat, buf); out: - lookup_cbk (this, frame, &rsp); + lookup_cbk(this, frame, &rsp); - GF_FREE (rsp.xdata.xdata_val); + GF_FREE(rsp.xdata.xdata_val); - inode_unref (inode); + inode_unref(inode); - return 0; + return 0; } xlator_t * -qd_find_subvol (xlator_t *this, char *volume_uuid) +qd_find_subvol(xlator_t *this, char *volume_uuid) { - xlator_list_t *child = NULL; - xlator_t *subvol = NULL; - char key[1024]; - char *optstr = NULL; - - if (!this || !volume_uuid) - goto out; - - for (child = this->children; child; child = child->next) { - snprintf(key, 1024, "%s.volume-id", child->xlator->name); - if (dict_get_str(this->options, key, &optstr) < 0) - continue; - - if (strcmp (optstr, volume_uuid) == 0) { - subvol = child->xlator; - break; - } + xlator_list_t *child = NULL; + xlator_t *subvol = NULL; + char key[1024]; + int keylen = 0; + char *optstr = NULL; + + if (!this || !volume_uuid) + goto out; + + for (child = this->children; child; child = child->next) { + keylen = snprintf(key, sizeof(key), "%s.volume-id", + child->xlator->name); + if (dict_get_strn(this->options, key, keylen, &optstr) < 0) + continue; + + if (strcmp(optstr, volume_uuid) == 0) { + subvol = child->xlator; + break; } + } out: - return subvol; + return subvol; } int -qd_nameless_lookup (xlator_t *this, call_frame_t *frame, gfs3_lookup_req *req, - dict_t *xdata, quotad_aggregator_lookup_cbk_t lookup_cbk) +qd_nameless_lookup(xlator_t *this, call_frame_t *frame, char *gfid, + dict_t *xdata, char *volume_uuid, + quotad_aggregator_lookup_cbk_t lookup_cbk) { - gfs3_lookup_rsp rsp = {0, }; - int op_errno = 0, ret = -1; - loc_t loc = {0, }; - quotad_aggregator_state_t *state = NULL; - quota_priv_t *priv = NULL; - xlator_t *subvol = NULL; - char *volume_uuid = NULL; - - priv = this->private; - state = frame->root->state; - - frame->root->op = GF_FOP_LOOKUP; - - loc.inode = inode_new (state->itable); - if (loc.inode == NULL) { - op_errno = ENOMEM; - goto out; - } - - memcpy (loc.gfid, req->gfid, 16); - - ret = dict_get_str (xdata, "volume-uuid", &volume_uuid); - if (ret < 0) { - op_errno = EINVAL; - goto out; - } - - subvol = qd_find_subvol (this, volume_uuid); - if (subvol == NULL) { - op_errno = EINVAL; - goto out; - } - - STACK_WIND_COOKIE (frame, qd_lookup_cbk, lookup_cbk, subvol, - subvol->fops->lookup, &loc, xdata); - return 0; + gfs3_lookup_rsp rsp = { + 0, + }; + int op_errno = 0, ret = -1; + loc_t loc = { + 0, + }; + quotad_aggregator_state_t *state = NULL; + xlator_t *subvol = NULL; + + state = frame->root->state; + + frame->root->op = GF_FOP_LOOKUP; + + loc.inode = inode_new(state->itable); + if (loc.inode == NULL) { + op_errno = ENOMEM; + goto out; + } + + memcpy(loc.gfid, gfid, 16); + + ret = dict_set_int8(xdata, QUOTA_READ_ONLY_KEY, 1); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM, + "dict set failed"); + ret = -ENOMEM; + goto out; + } + + subvol = qd_find_subvol(this, volume_uuid); + if (subvol == NULL) { + op_errno = EINVAL; + goto out; + } + + STACK_WIND_COOKIE(frame, qd_lookup_cbk, lookup_cbk, subvol, + subvol->fops->lookup, &loc, xdata); + return 0; out: - rsp.op_ret = -1; - rsp.op_errno = op_errno; + rsp.op_ret = -1; + rsp.op_errno = op_errno; - lookup_cbk (this, frame, &rsp); + lookup_cbk(this, frame, &rsp); - inode_unref (loc.inode); - return 0; + inode_unref(loc.inode); + return 0; } int -qd_reconfigure (xlator_t *this, dict_t *options) +qd_reconfigure(xlator_t *this, dict_t *options) { - /* As of now quotad is restarted upon alteration of volfile */ - return 0; + /* As of now quotad is restarted upon alteration of volfile */ + return 0; } void -qd_fini (xlator_t *this) +qd_fini(xlator_t *this) { - quota_priv_t *priv = NULL; + quota_priv_t *priv = NULL; - if (this == NULL || this->private == NULL) - goto out; + if (this == NULL || this->private == NULL) + goto out; - priv = this->private; + priv = this->private; - if (priv->rpcsvc) { - GF_FREE (priv->rpcsvc); - priv->rpcsvc = NULL; - } + if (priv->rpcsvc) { + GF_FREE(priv->rpcsvc); + priv->rpcsvc = NULL; + } - GF_FREE (priv); + GF_FREE(priv); out: - return; + return; } int32_t -qd_init (xlator_t *this) +qd_init(xlator_t *this) { - int32_t ret = -1; - quota_priv_t *priv = NULL; - - if (NULL == this->children) { - gf_log (this->name, GF_LOG_ERROR, - "FATAL: quota (%s) not configured for min of 1 child", - this->name); - ret = -1; - goto err; - } + int32_t ret = -1; + quota_priv_t *priv = NULL; - QUOTA_ALLOC_OR_GOTO (priv, quota_priv_t, err); - LOCK_INIT (&priv->lock); + if (NULL == this->children) { + gf_log(this->name, GF_LOG_ERROR, + "FATAL: quota (%s) not configured for min of 1 child", + this->name); + ret = -1; + goto err; + } - this->private = priv; + QUOTA_ALLOC_OR_GOTO(priv, quota_priv_t, err); + LOCK_INIT(&priv->lock); - ret = 0; + this->private = priv; + + ret = 0; err: - if (ret) { - GF_FREE (priv); - } - return ret; + if (ret) { + GF_FREE(priv); + } + return ret; } -class_methods_t class_methods = { - .init = qd_init, - .fini = qd_fini, - .reconfigure = qd_reconfigure, - .notify = qd_notify -}; +struct xlator_fops fops = {}; -struct xlator_fops fops = { -}; +struct xlator_cbks cbks = {}; -struct xlator_cbks cbks = { +struct volume_options options[] = { + {.key = {"transport-type"}, + .value = {"rpc", "rpc-over-rdma", "tcp", "socket", "ib-verbs", "unix", + "ib-sdp", "tcp/server", "ib-verbs/server", "rdma", + "rdma*([ \t]),*([ \t])socket", "rdma*([ \t]),*([ \t])tcp", + "tcp*([ \t]),*([ \t])rdma", "socket*([ \t]),*([ \t])rdma"}, + .type = GF_OPTION_TYPE_STR}, + { + .key = {"transport.*"}, + .type = GF_OPTION_TYPE_ANY, + }, + {.key = {NULL}}, }; -struct volume_options options[] = { - { .key = {"transport-type"}, - .value = {"rpc", "rpc-over-rdma", "tcp", "socket", "ib-verbs", - "unix", "ib-sdp", "tcp/server", "ib-verbs/server", "rdma", - "rdma*([ \t]),*([ \t])socket", - "rdma*([ \t]),*([ \t])tcp", - "tcp*([ \t]),*([ \t])rdma", - "socket*([ \t]),*([ \t])rdma"}, - .type = GF_OPTION_TYPE_STR - }, - { .key = {"transport.*"}, - .type = GF_OPTION_TYPE_ANY, - }, - {.key = {NULL}} +xlator_api_t xlator_api = { + .init = qd_init, + .fini = qd_fini, + .reconfigure = qd_reconfigure, + .notify = qd_notify, + .mem_acct_init = mem_acct_init, + .op_version = {1}, + .fops = &fops, + .cbks = &cbks, + .options = options, + .identifier = "quotad", + .category = GF_MAINTAINED, }; diff --git a/xlators/features/quota/src/quotad.sym b/xlators/features/quota/src/quotad.sym deleted file mode 100644 index 0829ffe1584..00000000000 --- a/xlators/features/quota/src/quotad.sym +++ /dev/null @@ -1,7 +0,0 @@ -fops -cbks -class_methods -options -mem_acct_init -reconfigure -dumpops diff --git a/xlators/features/read-only/src/Makefile.am b/xlators/features/read-only/src/Makefile.am index a44fd36f035..e4a2017ef0d 100644 --- a/xlators/features/read-only/src/Makefile.am +++ b/xlators/features/read-only/src/Makefile.am @@ -2,19 +2,20 @@ xlator_LTLIBRARIES = read-only.la worm.la xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features -noinst_HEADERS = read-only.h read-only-mem-types.h read-only-common.h +noinst_HEADERS = read-only.h read-only-mem-types.h read-only-common.h worm-helper.h -read_only_la_LDFLAGS = $(GF_XLATOR_DEFAULT_LDFLAGS) +read_only_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) read_only_la_SOURCES = read-only.c read-only-common.c read_only_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la -worm_la_LDFLAGS = $(GF_XLATOR_DEFAULT_LDFLAGS) +worm_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) -worm_la_SOURCES = read-only-common.c worm.c +worm_la_SOURCES = read-only-common.c worm-helper.c worm.c worm_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la -AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ + -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src AM_CFLAGS = -Wall $(GF_CFLAGS) diff --git a/xlators/features/read-only/src/read-only-common.c b/xlators/features/read-only/src/read-only-common.c index ad2eaaa5e26..9640e7e3eee 100644 --- a/xlators/features/read-only/src/read-only-common.c +++ b/xlators/features/read-only/src/read-only-common.c @@ -9,409 +9,398 @@ */ #include "read-only.h" #include "read-only-mem-types.h" -#include "defaults.h" +#include <glusterfs/defaults.h> gf_boolean_t -is_readonly_or_worm_enabled (xlator_t *this) +is_readonly_or_worm_enabled(call_frame_t *frame, xlator_t *this) { - read_only_priv_t *priv = NULL; - gf_boolean_t readonly_or_worm_enabled = _gf_false; + read_only_priv_t *priv = NULL; + gf_boolean_t readonly_or_worm_enabled = _gf_false; - priv = this->private; - GF_ASSERT (priv); + priv = this->private; + GF_ASSERT(priv); - readonly_or_worm_enabled = priv->readonly_or_worm_enabled; + readonly_or_worm_enabled = priv->readonly_or_worm_enabled; - return readonly_or_worm_enabled; + if (frame->root->pid < GF_CLIENT_PID_MAX) + readonly_or_worm_enabled = _gf_false; + + return readonly_or_worm_enabled; } static int -_check_key_is_zero_filled (dict_t *d, char *k, data_t *v, - void *tmp) +_check_key_is_zero_filled(dict_t *d, char *k, data_t *v, void *tmp) { - if (mem_0filled ((const char *)v->data, v->len)) { - /* -1 means, no more iterations, treat as 'break' */ - return -1; - } - return 0; + if (mem_0filled((const char *)v->data, v->len)) { + /* -1 means, no more iterations, treat as 'break' */ + return -1; + } + return 0; } int32_t -ro_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, - gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata) +ro_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc, + gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata) { - gf_boolean_t allzero = _gf_false; - int ret = 0; - - ret = dict_foreach (dict, _check_key_is_zero_filled, NULL); - if (ret == 0) - allzero = _gf_true; - - if (is_readonly_or_worm_enabled (this) && !allzero) - STACK_UNWIND_STRICT (xattrop, frame, -1, EROFS, NULL, xdata); - else - STACK_WIND_TAIL (frame, FIRST_CHILD (this), - FIRST_CHILD(this)->fops->xattrop, - loc, flags, dict, xdata); - return 0; + gf_boolean_t allzero = _gf_false; + int ret = 0; + + ret = dict_foreach(dict, _check_key_is_zero_filled, NULL); + if (ret == 0) + allzero = _gf_true; + + if (is_readonly_or_worm_enabled(frame, this) && !allzero) + STACK_UNWIND_STRICT(xattrop, frame, -1, EROFS, NULL, xdata); + else + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->xattrop, loc, flags, dict, + xdata); + return 0; } int32_t -ro_fxattrop (call_frame_t *frame, xlator_t *this, - fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata) +ro_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd, + gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata) { - gf_boolean_t allzero = _gf_false; - int ret = 0; + gf_boolean_t allzero = _gf_false; + int ret = 0; - ret = dict_foreach (dict, _check_key_is_zero_filled, NULL); - if (ret == 0) - allzero = _gf_true; + ret = dict_foreach(dict, _check_key_is_zero_filled, NULL); + if (ret == 0) + allzero = _gf_true; - if (is_readonly_or_worm_enabled (this) && !allzero) - STACK_UNWIND_STRICT (fxattrop, frame, -1, EROFS, NULL, xdata); - else - STACK_WIND_TAIL (frame, FIRST_CHILD (this), - FIRST_CHILD(this)->fops->fxattrop, - fd, flags, dict, xdata); + if (is_readonly_or_worm_enabled(frame, this) && !allzero) + STACK_UNWIND_STRICT(fxattrop, frame, -1, EROFS, NULL, xdata); + else + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fxattrop, fd, flags, dict, + xdata); - return 0; + return 0; } int32_t -ro_entrylk (call_frame_t *frame, xlator_t *this, const char *volume, - loc_t *loc, const char *basename, entrylk_cmd cmd, - entrylk_type type, dict_t *xdata) +ro_entrylk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc, + const char *basename, entrylk_cmd cmd, entrylk_type type, + dict_t *xdata) { - STACK_WIND_TAIL (frame, FIRST_CHILD (this), - FIRST_CHILD(this)->fops->entrylk, - volume, loc, basename, cmd, type, xdata); + STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->entrylk, + volume, loc, basename, cmd, type, xdata); - return 0; + return 0; } int32_t -ro_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume, - fd_t *fd, const char *basename, entrylk_cmd cmd, entrylk_type type, - dict_t *xdata) +ro_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, + const char *basename, entrylk_cmd cmd, entrylk_type type, + dict_t *xdata) { - STACK_WIND_TAIL (frame, FIRST_CHILD (this), - FIRST_CHILD(this)->fops->fentrylk, - volume, fd, basename, cmd, type, xdata); + STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fentrylk, + volume, fd, basename, cmd, type, xdata); - return 0; + return 0; } int32_t -ro_inodelk (call_frame_t *frame, xlator_t *this, const char *volume, - loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata) +ro_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc, + int32_t cmd, struct gf_flock *lock, dict_t *xdata) { - STACK_WIND_TAIL (frame, FIRST_CHILD (this), - FIRST_CHILD(this)->fops->inodelk, - volume, loc, cmd, lock, xdata); + STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->inodelk, + volume, loc, cmd, lock, xdata); - return 0; + return 0; } int32_t -ro_finodelk (call_frame_t *frame, xlator_t *this, const char *volume, - fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata) +ro_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, + int32_t cmd, struct gf_flock *lock, dict_t *xdata) { - STACK_WIND_TAIL (frame, FIRST_CHILD (this), - FIRST_CHILD(this)->fops->finodelk, - volume, fd, cmd, lock, xdata); + STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->finodelk, + volume, fd, cmd, lock, xdata); - return 0; + return 0; } int32_t -ro_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int cmd, - struct gf_flock *flock, dict_t *xdata) +ro_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int cmd, + struct gf_flock *flock, dict_t *xdata) { - STACK_WIND_TAIL (frame, FIRST_CHILD (this), - FIRST_CHILD(this)->fops->lk, fd, cmd, flock, - xdata); + STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->lk, fd, + cmd, flock, xdata); - return 0; + return 0; } int32_t -ro_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - struct iatt *stbuf, int32_t valid, dict_t *xdata) +ro_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf, + int32_t valid, dict_t *xdata) { - if (is_readonly_or_worm_enabled (this)) - STACK_UNWIND_STRICT (setattr, frame, -1, EROFS, NULL, NULL, - xdata); - else - STACK_WIND_TAIL (frame, FIRST_CHILD (this), - FIRST_CHILD(this)->fops->setattr, loc, stbuf, - valid, xdata); + if (is_readonly_or_worm_enabled(frame, this)) + STACK_UNWIND_STRICT(setattr, frame, -1, EROFS, NULL, NULL, xdata); + else + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, + xdata); + + return 0; +} - return 0; +int32_t +ro_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf, + int32_t valid, dict_t *xdata) +{ + if (is_readonly_or_worm_enabled(frame, this)) + STACK_UNWIND_STRICT(fsetattr, frame, -1, EROFS, NULL, NULL, xdata); + else + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, + xdata); + + return 0; } int32_t -ro_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iatt *stbuf, int32_t valid, dict_t *xdata) +ro_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) { - if (is_readonly_or_worm_enabled (this)) - STACK_UNWIND_STRICT (fsetattr, frame, -1, EROFS, NULL, NULL, - xdata); - else - STACK_WIND_TAIL (frame, FIRST_CHILD (this), - FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, - valid, xdata); + if (is_readonly_or_worm_enabled(frame, this)) + STACK_UNWIND_STRICT(truncate, frame, -1, EROFS, NULL, NULL, xdata); + else + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); - return 0; + return 0; } - int32_t -ro_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, dict_t *xdata) +ro_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) { - if (is_readonly_or_worm_enabled (this)) - STACK_UNWIND_STRICT (truncate, frame, -1, EROFS, NULL, NULL, - xdata); - else - STACK_WIND_TAIL (frame, FIRST_CHILD (this), - FIRST_CHILD(this)->fops->truncate, loc, offset, - xdata); - - return 0; + if (is_readonly_or_worm_enabled(frame, this)) + STACK_UNWIND_STRICT(ftruncate, frame, -1, EROFS, NULL, NULL, xdata); + else + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); + + return 0; } int32_t -ro_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, dict_t *xdata) +ro_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, + off_t offset, size_t len, dict_t *xdata) { - if (is_readonly_or_worm_enabled (this)) - STACK_UNWIND_STRICT (ftruncate, frame, -1, EROFS, NULL, NULL, - xdata); - else - STACK_WIND_TAIL (frame, FIRST_CHILD (this), - FIRST_CHILD(this)->fops->ftruncate, fd, offset, - xdata); - - return 0; + if (is_readonly_or_worm_enabled(frame, this)) + STACK_UNWIND_STRICT(fallocate, frame, -1, EROFS, NULL, NULL, xdata); + else + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, + len, xdata); + return 0; } int -ro_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, - dev_t rdev, mode_t umask, dict_t *xdata) +ro_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + dev_t rdev, mode_t umask, dict_t *xdata) { - if (is_readonly_or_worm_enabled (this)) - STACK_UNWIND_STRICT (mknod, frame, -1, EROFS, NULL, NULL, NULL, - NULL, xdata); - else - STACK_WIND_TAIL (frame, FIRST_CHILD (this), - FIRST_CHILD(this)->fops->mknod, loc, mode, - rdev, umask, xdata); - - return 0; + if (is_readonly_or_worm_enabled(frame, this)) + STACK_UNWIND_STRICT(mknod, frame, -1, EROFS, NULL, NULL, NULL, NULL, + xdata); + else + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, + xdata); + + return 0; } - int -ro_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, - mode_t umask, dict_t *xdata) +ro_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + mode_t umask, dict_t *xdata) { - if (is_readonly_or_worm_enabled (this)) - STACK_UNWIND_STRICT (mkdir, frame, -1, EROFS, NULL, NULL, NULL, - NULL, xdata); - else - STACK_WIND_TAIL (frame, FIRST_CHILD (this), - FIRST_CHILD(this)->fops->mkdir, loc, mode, - umask, xdata); - - return 0; + if (is_readonly_or_worm_enabled(frame, this)) + STACK_UNWIND_STRICT(mkdir, frame, -1, EROFS, NULL, NULL, NULL, NULL, + xdata); + else + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, + xdata); + + return 0; } int32_t -ro_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, - dict_t *xdata) +ro_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, + dict_t *xdata) { - if (is_readonly_or_worm_enabled (this)) - STACK_UNWIND_STRICT (unlink, frame, -1, EROFS, NULL, NULL, - xdata); - else - STACK_WIND_TAIL (frame, FIRST_CHILD (this), - FIRST_CHILD(this)->fops->unlink, loc, xflag, - xdata); + if (is_readonly_or_worm_enabled(frame, this)) + STACK_UNWIND_STRICT(unlink, frame, -1, EROFS, NULL, NULL, xdata); + else + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); - return 0; + return 0; } - int -ro_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, - dict_t *xdata) +ro_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + dict_t *xdata) { - if (is_readonly_or_worm_enabled (this)) - STACK_UNWIND_STRICT (rmdir, frame, -1, EROFS, NULL, NULL, - xdata); - else - STACK_WIND_TAIL (frame, FIRST_CHILD (this), - FIRST_CHILD(this)->fops->rmdir, loc, flags, - xdata); + if (is_readonly_or_worm_enabled(frame, this)) + STACK_UNWIND_STRICT(rmdir, frame, -1, EROFS, NULL, NULL, xdata); + else + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rmdir, loc, flags, xdata); - return 0; + return 0; } - int -ro_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath, - loc_t *loc, mode_t umask, dict_t *xdata) +ro_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath, + loc_t *loc, mode_t umask, dict_t *xdata) { - if (is_readonly_or_worm_enabled (this)) - STACK_UNWIND_STRICT (symlink, frame, -1, EROFS, NULL, NULL, - NULL, NULL, xdata); - else - STACK_WIND_TAIL (frame, FIRST_CHILD (this), - FIRST_CHILD(this)->fops->symlink, linkpath, - loc, umask, xdata); - - return 0; + if (is_readonly_or_worm_enabled(frame, this)) + STACK_UNWIND_STRICT(symlink, frame, -1, EROFS, NULL, NULL, NULL, NULL, + xdata); + else + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->symlink, linkpath, loc, umask, + xdata); + + return 0; } - - int32_t -ro_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, - dict_t *xdata) +ro_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) { - if (is_readonly_or_worm_enabled (this)) - STACK_UNWIND_STRICT (rename, frame, -1, EROFS, NULL, NULL, NULL, - NULL, NULL, xdata); - else - STACK_WIND_TAIL (frame, FIRST_CHILD (this), - FIRST_CHILD(this)->fops->rename, oldloc, - newloc, xdata); - - return 0; + if (is_readonly_or_worm_enabled(frame, this)) + STACK_UNWIND_STRICT(rename, frame, -1, EROFS, NULL, NULL, NULL, NULL, + NULL, xdata); + else + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); + + return 0; } - int32_t -ro_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata) +ro_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) { - if (is_readonly_or_worm_enabled (this)) - STACK_UNWIND_STRICT (link, frame, -1, EROFS, NULL, NULL, NULL, - NULL, xdata); - else - STACK_WIND_TAIL (frame, FIRST_CHILD (this), - FIRST_CHILD(this)->fops->link, oldloc, newloc, - xdata); - - return 0; + if (is_readonly_or_worm_enabled(frame, this)) + STACK_UNWIND_STRICT(link, frame, -1, EROFS, NULL, NULL, NULL, NULL, + xdata); + else + STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->link, + oldloc, newloc, xdata); + + return 0; } int32_t -ro_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) +ro_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) { - if (is_readonly_or_worm_enabled (this)) - STACK_UNWIND_STRICT (create, frame, -1, EROFS, NULL, NULL, NULL, - NULL, NULL, xdata); - else - STACK_WIND_TAIL (frame, FIRST_CHILD (this), - FIRST_CHILD(this)->fops->create, loc, flags, - mode, umask, fd, xdata); - - return 0; + if (is_readonly_or_worm_enabled(frame, this)) + STACK_UNWIND_STRICT(create, frame, -1, EROFS, NULL, NULL, NULL, NULL, + NULL, xdata); + else + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->create, loc, flags, mode, + umask, fd, xdata); + + return 0; } - static int32_t -ro_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, fd_t *fd, dict_t *xdata) +ro_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, fd_t *fd, dict_t *xdata) { - STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata); - return 0; + STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, xdata); + return 0; } int32_t -ro_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - fd_t *fd, dict_t *xdata) +ro_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + fd_t *fd, dict_t *xdata) { - if (is_readonly_or_worm_enabled (this) && - (((flags & O_ACCMODE) == O_WRONLY) || - ((flags & O_ACCMODE) == O_RDWR))) { - STACK_UNWIND_STRICT (open, frame, -1, EROFS, NULL, xdata); - return 0; - } - - STACK_WIND (frame, ro_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); - return 0; + if (is_readonly_or_worm_enabled(frame, this) && + (((flags & O_ACCMODE) == O_WRONLY) || + ((flags & O_ACCMODE) == O_RDWR))) { + STACK_UNWIND_STRICT(open, frame, -1, EROFS, NULL, xdata); + return 0; + } + + STACK_WIND(frame, ro_open_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); + return 0; } int32_t -ro_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, - int32_t flags, dict_t *xdata) +ro_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, + int32_t flags, dict_t *xdata) { - if (is_readonly_or_worm_enabled (this)) - STACK_UNWIND_STRICT (fsetxattr, frame, -1, EROFS, xdata); - else - STACK_WIND_TAIL (frame, FIRST_CHILD (this), - FIRST_CHILD(this)->fops->fsetxattr, fd, dict, - flags, xdata); - - return 0; + if (is_readonly_or_worm_enabled(frame, this)) + STACK_UNWIND_STRICT(fsetxattr, frame, -1, EROFS, xdata); + else + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, + xdata); + + return 0; } int32_t -ro_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, - dict_t *xdata) +ro_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, + dict_t *xdata) { - if (is_readonly_or_worm_enabled (this)) - STACK_UNWIND_STRICT (fsyncdir, frame, -1, EROFS, xdata); - else - STACK_WIND_TAIL (frame, FIRST_CHILD (this), - FIRST_CHILD(this)->fops->fsyncdir, fd, flags, - xdata); - - return 0; + if (is_readonly_or_worm_enabled(frame, this)) + STACK_UNWIND_STRICT(fsyncdir, frame, -1, EROFS, xdata); + else + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsyncdir, fd, flags, xdata); + + return 0; } int32_t -ro_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, - int32_t count, off_t off, uint32_t flags, struct iobref *iobref, - dict_t *xdata) +ro_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, + int32_t count, off_t off, uint32_t flags, struct iobref *iobref, + dict_t *xdata) { - if (is_readonly_or_worm_enabled (this)) - STACK_UNWIND_STRICT (writev, frame, -1, EROFS, NULL, NULL, - xdata); - else - STACK_WIND_TAIL (frame, FIRST_CHILD (this), - FIRST_CHILD(this)->fops->writev, fd, vector, - count, off, flags, iobref, xdata); - - return 0; + if (is_readonly_or_worm_enabled(frame, this)) + STACK_UNWIND_STRICT(writev, frame, -1, EROFS, NULL, NULL, xdata); + else + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->writev, fd, vector, count, off, + flags, iobref, xdata); + + return 0; } - int32_t -ro_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, - int32_t flags, dict_t *xdata) +ro_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + int32_t flags, dict_t *xdata) { - if (is_readonly_or_worm_enabled (this)) - STACK_UNWIND_STRICT (setxattr, frame, -1, EROFS, xdata); - else - STACK_WIND_TAIL (frame, FIRST_CHILD (this), - FIRST_CHILD(this)->fops->setxattr, loc, dict, - flags, xdata); - - return 0; + if (is_readonly_or_worm_enabled(frame, this)) + STACK_UNWIND_STRICT(setxattr, frame, -1, EROFS, xdata); + else + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, + xdata); + + return 0; } int32_t -ro_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *name, dict_t *xdata) +ro_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) { - if (is_readonly_or_worm_enabled (this)) - STACK_UNWIND_STRICT (removexattr, frame, -1, EROFS, xdata); - else - STACK_WIND_TAIL (frame, FIRST_CHILD (this), - FIRST_CHILD(this)->fops->removexattr, loc, - name, xdata); + if (is_readonly_or_worm_enabled(frame, this)) + STACK_UNWIND_STRICT(removexattr, frame, -1, EROFS, xdata); + else + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, loc, name, xdata); - return 0; + return 0; } diff --git a/xlators/features/read-only/src/read-only-common.h b/xlators/features/read-only/src/read-only-common.h index 248ca47b660..5561961ffa2 100644 --- a/xlators/features/read-only/src/read-only-common.h +++ b/xlators/features/read-only/src/read-only-common.h @@ -7,107 +7,115 @@ later), or the GNU General Public License, version 2 (GPLv2), in all cases as published by the Free Software Foundation. */ -#include "xlator.h" -#include "defaults.h" +#include <glusterfs/xlator.h> +#include <glusterfs/defaults.h> gf_boolean_t -is_readonly_or_worm_enabled (xlator_t *this); +is_readonly_or_worm_enabled(call_frame_t *frame, xlator_t *this); int32_t -ro_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, - gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata); +ro_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc, + gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata); int32_t -ro_fxattrop (call_frame_t *frame, xlator_t *this, - fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata); +ro_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd, + gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata); int32_t -ro_entrylk (call_frame_t *frame, xlator_t *this, const char *volume, - loc_t *loc, const char *basename, entrylk_cmd cmd, - entrylk_type type, dict_t *xdata); +ro_entrylk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc, + const char *basename, entrylk_cmd cmd, entrylk_type type, + dict_t *xdata); int32_t -ro_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume, - fd_t *fd, const char *basename, entrylk_cmd cmd, entrylk_type - type, dict_t *xdata); +ro_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, + const char *basename, entrylk_cmd cmd, entrylk_type type, + dict_t *xdata); int32_t -ro_inodelk (call_frame_t *frame, xlator_t *this, const char *volume, - loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata); +ro_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc, + int32_t cmd, struct gf_flock *lock, dict_t *xdata); int32_t -ro_finodelk (call_frame_t *frame, xlator_t *this, const char *volume, - fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata); +ro_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, + int32_t cmd, struct gf_flock *lock, dict_t *xdata); int32_t -ro_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int cmd, - struct gf_flock *flock, dict_t *xdata); +ro_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int cmd, + struct gf_flock *flock, dict_t *xdata); int32_t -ro_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - struct iatt *stbuf, int32_t valid, dict_t *xdata); +ro_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf, + int32_t valid, dict_t *xdata); int32_t -ro_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iatt *stbuf, int32_t valid, dict_t *xdata); - +ro_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf, + int32_t valid, dict_t *xdata); int32_t -ro_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, dict_t *xdata); +ro_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata); int32_t -ro_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, dict_t *xdata); +ro_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata); int -ro_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, - dev_t rdev, mode_t umask, dict_t *xdata); +ro_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + dev_t rdev, mode_t umask, dict_t *xdata); int -ro_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, - mode_t umask, dict_t *xdata); +ro_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + mode_t umask, dict_t *xdata); int32_t -ro_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, - dict_t *xdata); - -int -ro_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, +ro_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, dict_t *xdata); +int +ro_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + dict_t *xdata); int -ro_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath, - loc_t *loc, mode_t umask, dict_t *xdata); +ro_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath, + loc_t *loc, mode_t umask, dict_t *xdata); int32_t -ro_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata); +ro_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata); int32_t -ro_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata); +ro_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata); int32_t -ro_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata); +ro_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata); int32_t -ro_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - fd_t *fd, dict_t *xdata); +ro_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + fd_t *fd, dict_t *xdata); int32_t -ro_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, - int32_t flags, dict_t *xdata); +ro_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, + int32_t flags, dict_t *xdata); int32_t -ro_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata); +ro_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, + dict_t *xdata); int32_t -ro_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, - int32_t count, off_t off, uint32_t flags, struct iobref *iobref, dict_t *xdata); +ro_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, + int32_t count, off_t off, uint32_t flags, struct iobref *iobref, + dict_t *xdata); int32_t -ro_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, - int32_t flags, dict_t *xdata); +ro_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + int32_t flags, dict_t *xdata); + +int32_t +ro_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata); int32_t -ro_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *name, dict_t *xdata); +ro_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, + off_t offset, size_t len, dict_t *xdata); diff --git a/xlators/features/read-only/src/read-only-mem-types.h b/xlators/features/read-only/src/read-only-mem-types.h index 940700a017d..c67d6c02cd0 100644 --- a/xlators/features/read-only/src/read-only-mem-types.h +++ b/xlators/features/read-only/src/read-only-mem-types.h @@ -11,10 +11,10 @@ #ifndef __READONLY_MEM_TYPES_H__ #define __READONLY_MEM_TYPES_H__ -#include "mem-types.h" +#include <glusterfs/mem-types.h> enum gf_read_only_mem_types_ { - gf_read_only_mt_priv_t = gf_common_mt_end + 1, - gf_read_only_mt_end + gf_read_only_mt_priv_t = gf_common_mt_end + 1, + gf_read_only_mt_end }; #endif diff --git a/xlators/features/read-only/src/read-only.c b/xlators/features/read-only/src/read-only.c index 8733a40abce..48654998e63 100644 --- a/xlators/features/read-only/src/read-only.c +++ b/xlators/features/read-only/src/read-only.c @@ -7,124 +7,138 @@ later), or the GNU General Public License, version 2 (GPLv2), in all cases as published by the Free Software Foundation. */ -#include "defaults.h" #include "read-only-common.h" #include "read-only-mem-types.h" #include "read-only.h" int32_t -mem_acct_init (xlator_t *this) +mem_acct_init(xlator_t *this) { - int ret = -1; + int ret = -1; - ret = xlator_mem_acct_init (this, gf_read_only_mt_end + 1); - if (ret) - gf_log (this->name, GF_LOG_ERROR, "Memory accounting " - "initialization failed."); + ret = xlator_mem_acct_init(this, gf_read_only_mt_end + 1); + if (ret) + gf_log(this->name, GF_LOG_ERROR, + "Memory accounting " + "initialization failed."); - return ret; + return ret; } int32_t -init (xlator_t *this) +init(xlator_t *this) { - int ret = -1; - read_only_priv_t *priv = NULL; + int ret = -1; + read_only_priv_t *priv = NULL; - if (!this->children || this->children->next) { - gf_log (this->name, GF_LOG_ERROR, - "translator not configured with exactly one child"); - return -1; - } + if (!this->children || this->children->next) { + gf_log(this->name, GF_LOG_ERROR, + "translator not configured with exactly one child"); + return -1; + } - if (!this->parents) { - gf_log (this->name, GF_LOG_WARNING, - "dangling volume. check volfile "); - } + if (!this->parents) { + gf_log(this->name, GF_LOG_WARNING, "dangling volume. check volfile "); + } - priv = GF_CALLOC (1, sizeof (*priv), gf_read_only_mt_priv_t); - if (!priv) - goto out; + priv = GF_CALLOC(1, sizeof(*priv), gf_read_only_mt_priv_t); + if (!priv) + goto out; - GF_OPTION_INIT ("read-only", priv->readonly_or_worm_enabled, bool, out); + this->private = priv; - this->private = priv; - ret = 0; + GF_OPTION_INIT("read-only", priv->readonly_or_worm_enabled, bool, out); + + ret = 0; out: - return ret; + return ret; } int -reconfigure (xlator_t *this, dict_t *options) +reconfigure(xlator_t *this, dict_t *options) { - read_only_priv_t *priv = NULL; - int ret = -1; - gf_boolean_t readonly_or_worm_enabled = _gf_false; + read_only_priv_t *priv = NULL; + int ret = -1; + gf_boolean_t readonly_or_worm_enabled = _gf_false; - priv = this->private; - GF_ASSERT (priv); + priv = this->private; + GF_ASSERT(priv); - GF_OPTION_RECONF ("read-only", readonly_or_worm_enabled, options, bool, - out); - priv->readonly_or_worm_enabled = readonly_or_worm_enabled; - ret = 0; + GF_OPTION_RECONF("read-only", readonly_or_worm_enabled, options, bool, out); + priv->readonly_or_worm_enabled = readonly_or_worm_enabled; + ret = 0; out: - gf_log (this->name, GF_LOG_DEBUG, "returning %d", ret); - return ret; + gf_log(this->name, GF_LOG_DEBUG, "returning %d", ret); + return ret; } void -fini (xlator_t *this) +fini(xlator_t *this) { - read_only_priv_t *priv = NULL; + read_only_priv_t *priv = NULL; - priv = this->private; - if (!priv) - return; + priv = this->private; + if (!priv) + return; - this->private = NULL; - GF_FREE (priv); + this->private = NULL; + GF_FREE(priv); - return; + return; } - struct xlator_fops fops = { - .mknod = ro_mknod, - .mkdir = ro_mkdir, - .unlink = ro_unlink, - .rmdir = ro_rmdir, - .symlink = ro_symlink, - .rename = ro_rename, - .link = ro_link, - .truncate = ro_truncate, - .open = ro_open, - .writev = ro_writev, - .setxattr = ro_setxattr, - .fsetxattr = ro_fsetxattr, - .removexattr = ro_removexattr, - .fsyncdir = ro_fsyncdir, - .ftruncate = ro_ftruncate, - .create = ro_create, - .setattr = ro_setattr, - .fsetattr = ro_fsetattr, - .xattrop = ro_xattrop, - .fxattrop = ro_fxattrop, - .inodelk = ro_inodelk, - .finodelk = ro_finodelk, - .entrylk = ro_entrylk, - .fentrylk = ro_fentrylk, - .lk = ro_lk, + .mknod = ro_mknod, + .mkdir = ro_mkdir, + .unlink = ro_unlink, + .rmdir = ro_rmdir, + .symlink = ro_symlink, + .rename = ro_rename, + .link = ro_link, + .truncate = ro_truncate, + .open = ro_open, + .writev = ro_writev, + .setxattr = ro_setxattr, + .fsetxattr = ro_fsetxattr, + .removexattr = ro_removexattr, + .fsyncdir = ro_fsyncdir, + .ftruncate = ro_ftruncate, + .create = ro_create, + .setattr = ro_setattr, + .fsetattr = ro_fsetattr, + .xattrop = ro_xattrop, + .fxattrop = ro_fxattrop, + .inodelk = ro_inodelk, + .finodelk = ro_finodelk, + .entrylk = ro_entrylk, + .fentrylk = ro_fentrylk, + .lk = ro_lk, + .fallocate = ro_fallocate, }; -struct xlator_cbks cbks = { -}; +struct xlator_cbks cbks = {}; struct volume_options options[] = { - { .key = {"read-only"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "off", - .description = "When \"on\", makes a volume read-only. It is turned " - "\"off\" by default." - }, + {.key = {"read-only"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + /*.validate_fn = validate_boolean,*/ + .op_version = {1}, + .flags = OPT_FLAG_SETTABLE, + .description = "When \"on\", makes a volume read-only. It is turned " + "\"off\" by default."}, + {.key = {NULL}}, +}; + +xlator_api_t xlator_api = { + .init = init, + .fini = fini, + .reconfigure = reconfigure, + .mem_acct_init = mem_acct_init, + .op_version = {1}, /* Present from the initial version */ + .fops = &fops, + .cbks = &cbks, + .options = options, + .identifier = "read-only", + .category = GF_TECH_PREVIEW, }; diff --git a/xlators/features/read-only/src/read-only.h b/xlators/features/read-only/src/read-only.h index 8e7e1b68081..aced5d3c577 100644 --- a/xlators/features/read-only/src/read-only.h +++ b/xlators/features/read-only/src/read-only.h @@ -11,11 +11,27 @@ #ifndef __READONLY_H__ #define __READONLY_H__ -#include "read-only-mem-types.h" -#include "xlator.h" +#include <stdint.h> // for uint64_t, uint8_t +#include <sys/time.h> // for time_t +#include "glusterfs/glusterfs.h" // for gf_boolean_t typedef struct { - gf_boolean_t readonly_or_worm_enabled; + uint8_t worm : 1; + uint8_t retain : 1; + uint8_t legal_hold : 1; + uint8_t ret_mode : 1; + int64_t ret_period; + int64_t auto_commit_period; +} worm_reten_state_t; + +typedef struct { + gf_boolean_t readonly_or_worm_enabled; + gf_boolean_t worm_file; + gf_boolean_t worm_files_deletable; + int64_t reten_period; + int64_t com_period; + int reten_mode; + time_t start_time; } read_only_priv_t; #endif diff --git a/xlators/features/read-only/src/worm-helper.c b/xlators/features/read-only/src/worm-helper.c new file mode 100644 index 00000000000..df45f2a940b --- /dev/null +++ b/xlators/features/read-only/src/worm-helper.c @@ -0,0 +1,395 @@ +/* + Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#include "read-only-mem-types.h" +#include "read-only.h" +#include <glusterfs/xlator.h> +#include <glusterfs/syncop.h> +#include "worm-helper.h" + +/*Function to check whether file is read-only. + * The input *stbuf contains the attributes of the file, which is used to check + * the write protection bits for all the users of the file. + * Return true if all the write bits are disabled,false otherwise*/ +gf_boolean_t +gf_worm_write_disabled(struct iatt *stbuf) +{ + gf_boolean_t ret = _gf_false; + + GF_VALIDATE_OR_GOTO("worm", stbuf, out); + + if (stbuf->ia_prot.owner.write == 0 && stbuf->ia_prot.group.write == 0 && + stbuf->ia_prot.other.write == 0) + ret = _gf_true; +out: + return ret; +} + +int32_t +worm_init_state(xlator_t *this, gf_boolean_t fop_with_fd, void *file_ptr) +{ + int ret = -1; + uint64_t start_time = 0; + dict_t *dict = NULL; + + GF_VALIDATE_OR_GOTO("worm", this, out); + GF_VALIDATE_OR_GOTO(this->name, file_ptr, out); + + start_time = gf_time(); + dict = dict_new(); + if (!dict) { + gf_log(this->name, GF_LOG_ERROR, "Error creating the dict"); + goto out; + } + ret = dict_set_uint64(dict, "trusted.start_time", start_time); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, "Error in setting the dict"); + goto out; + } + if (fop_with_fd) + ret = syncop_fsetxattr(this, (fd_t *)file_ptr, dict, 0, NULL, NULL); + else + ret = syncop_setxattr(this, (loc_t *)file_ptr, dict, 0, NULL, NULL); +out: + if (dict) + dict_unref(dict); + return ret; +} + +/*Function to set the retention state for a file. + * It loads the WORM/Retention state into the retention_state pointer.*/ +int32_t +worm_set_state(xlator_t *this, gf_boolean_t fop_with_fd, void *file_ptr, + worm_reten_state_t *retention_state, struct iatt *stbuf) +{ + read_only_priv_t *priv = NULL; + struct iatt stpre = { + 0, + }; + int ret = -1; + + GF_VALIDATE_OR_GOTO("worm", this, out); + GF_VALIDATE_OR_GOTO(this->name, file_ptr, out); + GF_VALIDATE_OR_GOTO(this->name, retention_state, out); + GF_VALIDATE_OR_GOTO(this->name, stbuf, out); + + priv = this->private; + GF_ASSERT(priv); + retention_state->worm = 1; + retention_state->retain = 1; + retention_state->legal_hold = 0; + retention_state->ret_mode = priv->reten_mode; + retention_state->ret_period = priv->reten_period; + retention_state->auto_commit_period = priv->com_period; + if (fop_with_fd) + ret = syncop_fstat(this, (fd_t *)file_ptr, &stpre, NULL, NULL); + else + ret = syncop_stat(this, (loc_t *)file_ptr, &stpre, NULL, NULL); + if (ret) + goto out; + stbuf->ia_mtime = stpre.ia_mtime; + stbuf->ia_atime = gf_time() + retention_state->ret_period; + + if (fop_with_fd) + ret = syncop_fsetattr(this, (fd_t *)file_ptr, stbuf, GF_SET_ATTR_ATIME, + NULL, NULL, NULL, NULL); + else + ret = syncop_setattr(this, (loc_t *)file_ptr, stbuf, GF_SET_ATTR_ATIME, + NULL, NULL, NULL, NULL); + if (ret) + goto out; + + ret = gf_worm_set_xattr(this, retention_state, fop_with_fd, file_ptr); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, "Error setting xattr"); + goto out; + } + ret = 0; +out: + return ret; +} + +/*This function gets the state of the WORM/Retention xattr and loads it in the + * dict pointer.*/ +int32_t +worm_get_state(xlator_t *this, gf_boolean_t fop_with_fd, void *file_ptr, + worm_reten_state_t *reten_state) +{ + dict_t *dict = NULL; + char *val = NULL; + int ret = -1; + + GF_VALIDATE_OR_GOTO("worm", this, out); + GF_VALIDATE_OR_GOTO(this->name, file_ptr, out); + GF_VALIDATE_OR_GOTO(this->name, reten_state, out); + + if (fop_with_fd) + ret = syncop_fgetxattr(this, (fd_t *)file_ptr, &dict, + "trusted.reten_state", NULL, NULL); + else + ret = syncop_getxattr(this, (loc_t *)file_ptr, &dict, + "trusted.reten_state", NULL, NULL); + if (ret < 0 || !dict) { + ret = -1; + goto out; + } + ret = dict_get_str(dict, "trusted.reten_state", &val); + if (ret) { + ret = -2; + gf_log(this->name, GF_LOG_ERROR, "Empty val"); + } + gf_worm_deserialize_state(val, reten_state); +out: + if (dict) + dict_unref(dict); + return ret; +} + +/*Function to lookup the current state of the WORM/Retention profile. + * Based on the retain value and the access time of the file, the transition + * from WORM/Retention to WORM is made.*/ +void +gf_worm_state_lookup(xlator_t *this, gf_boolean_t fop_with_fd, void *file_ptr, + worm_reten_state_t *reten_state, struct iatt *stbuf) +{ + int ret = -1; + + GF_VALIDATE_OR_GOTO("worm", this, out); + GF_VALIDATE_OR_GOTO(this->name, file_ptr, out); + GF_VALIDATE_OR_GOTO(this->name, reten_state, out); + GF_VALIDATE_OR_GOTO(this->name, stbuf, out); + + stbuf->ia_atime -= reten_state->ret_period; + reten_state->retain = 0; + reten_state->ret_period = 0; + reten_state->auto_commit_period = 0; + ret = gf_worm_set_xattr(this, reten_state, fop_with_fd, file_ptr); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, "Error setting xattr"); + goto out; + } + + if (fop_with_fd) + ret = syncop_fsetattr(this, (fd_t *)file_ptr, stbuf, GF_SET_ATTR_ATIME, + NULL, NULL, NULL, NULL); + else + ret = syncop_setattr(this, (loc_t *)file_ptr, stbuf, GF_SET_ATTR_ATIME, + NULL, NULL, NULL, NULL); + if (ret) + goto out; + gf_log(this->name, GF_LOG_INFO, "Retention state reset"); +out: + return; +} + +/*This function serializes and stores the WORM/Retention state of a file in an + * uint64_t variable by setting the bits using the bitwise operations.*/ +void +gf_worm_serialize_state(worm_reten_state_t *reten_state, char *val) +{ + uint32_t state = 0; + + GF_VALIDATE_OR_GOTO("worm", reten_state, out); + GF_VALIDATE_OR_GOTO("worm", val, out); + + state |= reten_state->worm << 0; + state |= reten_state->retain << 1; + state |= reten_state->legal_hold << 2; + state |= reten_state->ret_mode << 3; + sprintf(val, "%d/%" PRIu64 "/%" PRIu64, state, reten_state->ret_period, + reten_state->auto_commit_period); + +out: + return; +} + +/*This function deserializes the data stored in the xattr of the file and loads + * the value to the reten_state structure.*/ +void +gf_worm_deserialize_state(char *val, worm_reten_state_t *reten_state) +{ + char *token = NULL; + uint32_t state = 0; + + GF_VALIDATE_OR_GOTO("worm", val, out); + GF_VALIDATE_OR_GOTO("worm", reten_state, out); + + token = strtok(val, "/"); + state = atoi(token); + reten_state->worm = (state >> 0) & 1; + reten_state->retain = (state >> 1) & 1; + reten_state->legal_hold = (state >> 2) & 1; + reten_state->ret_mode = (state >> 3) & 1; + token = strtok(NULL, "/"); + reten_state->ret_period = atoi(token); + token = strtok(NULL, "/"); + reten_state->auto_commit_period = atoi(token); + +out: + return; +} + +/*Function to set the xattr for a file. + * If the xattr is already present then it will replace that.*/ +int32_t +gf_worm_set_xattr(xlator_t *this, worm_reten_state_t *reten_state, + gf_boolean_t fop_with_fd, void *file_ptr) +{ + char val[100] = ""; + int ret = -1; + dict_t *dict = NULL; + + GF_VALIDATE_OR_GOTO("worm", this, out); + GF_VALIDATE_OR_GOTO(this->name, reten_state, out); + GF_VALIDATE_OR_GOTO(this->name, file_ptr, out); + + gf_worm_serialize_state(reten_state, val); + dict = dict_new(); + if (!dict) { + gf_log(this->name, GF_LOG_ERROR, "Error creating the dict"); + goto out; + } + ret = dict_set_str(dict, "trusted.reten_state", val); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, "Error in setting the dict"); + goto out; + } + if (fop_with_fd) + ret = syncop_fsetxattr(this, (fd_t *)file_ptr, dict, 0, NULL, NULL); + else + ret = syncop_setxattr(this, (loc_t *)file_ptr, dict, 0, NULL, NULL); +out: + if (dict) + dict_unref(dict); + return ret; +} + +/*This function checks whether a file's timeout is happened for the state + * transition and if yes, then it will do the transition from the current state + * to the appropriate state. It also decides whether to continue or to block + * the FOP. + * Return: + * 0 : If the FOP should continue i.e., if the file is not in the WORM-Retained + * state or if the FOP is unlink and the file is not in the Retained state. + * 1: If the FOP sholud block i.e., if the file is in WORM-Retained/WORM state. + * 2: Blocks the FOP if any operation fails while doing the state transition or + * fails to get the state of the file.*/ +int +gf_worm_state_transition(xlator_t *this, gf_boolean_t fop_with_fd, + void *file_ptr, glusterfs_fop_t op) +{ + int op_errno = EROFS; + int ret = -1; + time_t now = 0; + uint64_t com_period = 0; + uint64_t start_time = 0; + dict_t *dict = NULL; + worm_reten_state_t reten_state = { + 0, + }; + read_only_priv_t *priv = NULL; + struct iatt stbuf = { + 0, + }; + + priv = this->private; + GF_ASSERT(priv); + + if (fop_with_fd) + ret = syncop_fgetxattr(this, (fd_t *)file_ptr, &dict, + "trusted.start_time", NULL, NULL); + else + ret = syncop_getxattr(this, (loc_t *)file_ptr, &dict, + "trusted.start_time", NULL, NULL); + if (ret < 0 || !dict) { + op_errno = ret; + gf_msg(this->name, GF_LOG_ERROR, -ret, 0, "Error getting xattr"); + goto out; + } + ret = dict_get_uint64(dict, "trusted.start_time", &start_time); + if (ret) { + op_errno = ret; + gf_msg(this->name, GF_LOG_ERROR, -ret, 0, "Error getting start time"); + goto out; + } + + com_period = priv->com_period; + if (fop_with_fd) + ret = syncop_fstat(this, (fd_t *)file_ptr, &stbuf, NULL, NULL); + else + ret = syncop_stat(this, (loc_t *)file_ptr, &stbuf, NULL, NULL); + if (ret) { + op_errno = ret; + gf_msg(this->name, GF_LOG_ERROR, -ret, 0, "Error getting file stat"); + goto out; + } + + ret = worm_get_state(this, fop_with_fd, file_ptr, &reten_state); + if (ret == -2) { + op_errno = ret; + gf_msg(this->name, GF_LOG_ERROR, -ret, 0, + "Error getting worm/retention state"); + goto out; + } + + now = gf_time(); + + if (ret == -1 && (now - start_time) >= com_period) { + if ((now - stbuf.ia_mtime) >= com_period) { + ret = worm_set_state(this, fop_with_fd, file_ptr, &reten_state, + &stbuf); + if (ret) { + op_errno = ret; + gf_msg(this->name, GF_LOG_ERROR, -ret, 0, + "Error setting worm/retention state"); + goto out; + } + goto out; + } else { + op_errno = 0; + goto out; + } + } else if (ret == -1 && (now - start_time) < com_period) { + op_errno = 0; + goto out; + } else if (reten_state.retain && ((now >= stbuf.ia_atime))) { + gf_worm_state_lookup(this, fop_with_fd, file_ptr, &reten_state, &stbuf); + } + if (reten_state.worm && !reten_state.retain && priv->worm_files_deletable && + op == GF_FOP_UNLINK) { + op_errno = 0; + goto out; + } + +out: + if (dict) + dict_unref(dict); + return op_errno; +} + +/*Function to check whether a file is independently WORMed (i.e., file level + * WORM is set on the file). */ +int32_t +is_wormfile(xlator_t *this, gf_boolean_t fop_with_fd, void *file_ptr) +{ + int ret = -1; + dict_t *dict = NULL; + + if (fop_with_fd) + ret = syncop_fgetxattr(this, (fd_t *)file_ptr, &dict, + "trusted.worm_file", NULL, NULL); + else + ret = syncop_getxattr(this, (loc_t *)file_ptr, &dict, + "trusted.worm_file", NULL, NULL); + if (dict) { + ret = 0; + dict_unref(dict); + } + return ret; +} diff --git a/xlators/features/read-only/src/worm-helper.h b/xlators/features/read-only/src/worm-helper.h new file mode 100644 index 00000000000..b42f8d2b40c --- /dev/null +++ b/xlators/features/read-only/src/worm-helper.h @@ -0,0 +1,44 @@ +/* + Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +gf_boolean_t +gf_worm_write_disabled(struct iatt *stbuf); + +int32_t +worm_init_state(xlator_t *this, gf_boolean_t fop_with_fd, void *file_ptr); + +int32_t +worm_set_state(xlator_t *this, gf_boolean_t fop_with_fd, void *file_ptr, + worm_reten_state_t *retention_state, struct iatt *stbuf); + +int32_t +worm_get_state(xlator_t *this, gf_boolean_t fop_with_fd, void *file_ptr, + worm_reten_state_t *reten_state); + +void +gf_worm_state_lookup(xlator_t *this, gf_boolean_t fop_with_fd, void *file_ptr, + worm_reten_state_t *reten_state, struct iatt *stbuf); + +void +gf_worm_serialize_state(worm_reten_state_t *reten_state, char *val); + +void +gf_worm_deserialize_state(char *val, worm_reten_state_t *reten_state); + +int32_t +gf_worm_set_xattr(xlator_t *this, worm_reten_state_t *reten_state, + gf_boolean_t fop_with_fd, void *file_ptr); + +int +gf_worm_state_transition(xlator_t *this, gf_boolean_t fop_with_fd, + void *file_ptr, glusterfs_fop_t op); + +int32_t +is_wormfile(xlator_t *this, gf_boolean_t fop_with_fd, void *file_ptr); diff --git a/xlators/features/read-only/src/worm.c b/xlators/features/read-only/src/worm.c index f117e206285..1cc5526d5cd 100644 --- a/xlators/features/read-only/src/worm.c +++ b/xlators/features/read-only/src/worm.c @@ -1,5 +1,5 @@ /* - Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + Copyright (c) 2008-2012, 2016 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. This file is licensed to you under your choice of the GNU Lesser @@ -7,138 +7,716 @@ later), or the GNU General Public License, version 2 (GPLv2), in all cases as published by the Free Software Foundation. */ -#include "xlator.h" -#include "defaults.h" +#include <glusterfs/xlator.h> +#include <glusterfs/defaults.h> #include "read-only-common.h" #include "read-only-mem-types.h" #include "read-only.h" +#include <glusterfs/syncop.h> +#include "worm-helper.h" int32_t -mem_acct_init (xlator_t *this) +mem_acct_init(xlator_t *this) { - int ret = -1; + int ret = -1; - ret = xlator_mem_acct_init (this, gf_read_only_mt_end + 1); - if (ret) - gf_log (this->name, GF_LOG_ERROR, "Memory accounting " - "initialization failed."); + ret = xlator_mem_acct_init(this, gf_read_only_mt_end + 1); + if (ret) + gf_log(this->name, GF_LOG_ERROR, + "Memory accounting " + "initialization failed."); - return ret; + return ret; } static int32_t -worm_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, fd_t *fd, dict_t *xdata) +worm_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + fd_t *fd, dict_t *xdata) { - STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata); + if (is_readonly_or_worm_enabled(frame, this) && + (flags & (O_WRONLY | O_RDWR | O_APPEND | O_TRUNC))) { + STACK_UNWIND_STRICT(open, frame, -1, EROFS, NULL, NULL); return 0; + } + + STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->open, + loc, flags, fd, xdata); + return 0; } -int32_t -worm_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - fd_t *fd, dict_t *xdata) +static int32_t +worm_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) { - if (is_readonly_or_worm_enabled (this) && - ((((flags & O_ACCMODE) == O_WRONLY) || - ((flags & O_ACCMODE) == O_RDWR)) && - !(flags & O_APPEND))) { - STACK_UNWIND_STRICT (open, frame, -1, EROFS, NULL, NULL); - return 0; + int op_errno = EROFS; + read_only_priv_t *priv = NULL; + + priv = this->private; + GF_ASSERT(priv); + if (is_readonly_or_worm_enabled(frame, this)) + goto out; + if (!priv->worm_file || (frame->root->pid < 0)) { + op_errno = 0; + goto out; + } + + gf_uuid_copy(oldloc->gfid, oldloc->inode->gfid); + if (is_wormfile(this, _gf_false, oldloc)) { + op_errno = 0; + goto out; + } + op_errno = gf_worm_state_transition(this, _gf_false, oldloc, GF_FOP_LINK); + +out: + if (op_errno) { + if (op_errno < 0) + op_errno = EROFS; + STACK_UNWIND_STRICT(link, frame, -1, op_errno, NULL, NULL, NULL, NULL, + NULL); + } else + STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->link, + oldloc, newloc, xdata); + return 0; +} + +static int32_t +worm_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + dict_t *xdata) +{ + int op_errno = EROFS; + read_only_priv_t *priv = NULL; + + priv = this->private; + GF_ASSERT(priv); + if (is_readonly_or_worm_enabled(frame, this)) { + goto out; + } + if (!priv->worm_file || (frame->root->pid < 0)) { + op_errno = 0; + goto out; + } + + gf_uuid_copy(loc->gfid, loc->inode->gfid); + if (is_wormfile(this, _gf_false, loc)) { + op_errno = 0; + goto out; + } + op_errno = gf_worm_state_transition(this, _gf_false, loc, GF_FOP_UNLINK); +out: + if (op_errno) { + if (op_errno < 0) + op_errno = EROFS; + STACK_UNWIND_STRICT(unlink, frame, -1, op_errno, NULL, NULL, NULL); + } else + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, loc, flags, xdata); + return 0; +} + +static int32_t +worm_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) +{ + int op_errno = EROFS; + read_only_priv_t *priv = NULL; + + priv = this->private; + GF_ASSERT(priv); + if (is_readonly_or_worm_enabled(frame, this)) + goto out; + if (!priv->worm_file || (frame->root->pid < 0)) { + op_errno = 0; + goto out; + } + + gf_uuid_copy(oldloc->gfid, oldloc->inode->gfid); + if (is_wormfile(this, _gf_false, oldloc)) { + op_errno = 0; + goto check_newloc; + } + op_errno = gf_worm_state_transition(this, _gf_false, oldloc, GF_FOP_RENAME); + + if (op_errno == 0) { + check_newloc: + if (newloc->inode != NULL) { + gf_uuid_copy(newloc->gfid, newloc->inode->gfid); + if (is_wormfile(this, _gf_false, newloc)) { + op_errno = 0; + goto out; + } + op_errno = gf_worm_state_transition(this, _gf_false, newloc, + GF_FOP_RENAME); } + } - STACK_WIND (frame, worm_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); - return 0; +out: + if (op_errno) { + if (op_errno < 0) + op_errno = EROFS; + STACK_UNWIND_STRICT(rename, frame, -1, op_errno, NULL, NULL, NULL, NULL, + NULL, NULL); + } else + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); + return 0; } -int32_t -init (xlator_t *this) +static int32_t +worm_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) +{ + int op_errno = EROFS; + read_only_priv_t *priv = NULL; + + priv = this->private; + GF_ASSERT(priv); + if (is_readonly_or_worm_enabled(frame, this)) + goto out; + if (!priv->worm_file || (frame->root->pid < 0)) { + op_errno = 0; + goto out; + } + + if (is_wormfile(this, _gf_false, loc)) { + op_errno = 0; + goto out; + } + op_errno = gf_worm_state_transition(this, _gf_false, loc, GF_FOP_TRUNCATE); + +out: + if (op_errno) { + if (op_errno < 0) + op_errno = EROFS; + STACK_UNWIND_STRICT(truncate, frame, -1, op_errno, NULL, NULL, NULL); + } else + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); + return 0; +} + +static int32_t +worm_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) { - int ret = -1; - read_only_priv_t *priv = NULL; + int op_errno = EROFS; + read_only_priv_t *priv = NULL; + + priv = this->private; + GF_ASSERT(priv); + if (is_readonly_or_worm_enabled(frame, this)) + goto out; + if (!priv->worm_file || (frame->root->pid < 0)) { + op_errno = 0; + goto out; + } - if (!this->children || this->children->next) { - gf_log (this->name, GF_LOG_ERROR, - "translator not configured with exactly one child"); - return -1; + if (is_wormfile(this, _gf_true, fd)) { + op_errno = 0; + goto out; + } + op_errno = gf_worm_state_transition(this, _gf_true, fd, GF_FOP_FTRUNCATE); + +out: + if (op_errno) { + if (op_errno < 0) + op_errno = EROFS; + STACK_UNWIND_STRICT(ftruncate, frame, -1, op_errno, NULL, NULL, NULL); + } else + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); + return 0; +} + +static int32_t +worm_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid, dict_t *xdata) +{ + gf_boolean_t rd_only = _gf_false; + worm_reten_state_t reten_state = { + 0, + }; + struct iatt stpre = { + 0, + }; + read_only_priv_t *priv = NULL; + int op_errno = EROFS; + int ret = -1; + + priv = this->private; + GF_ASSERT(priv); + if (!priv->worm_file) { + op_errno = 0; + goto out; + } + + if (is_wormfile(this, _gf_false, loc)) { + op_errno = 0; + goto out; + } + if (valid & GF_SET_ATTR_MODE) { + rd_only = gf_worm_write_disabled(stbuf); + if (!rd_only) { + op_errno = 0; + goto out; } - if (!this->parents) { - gf_log (this->name, GF_LOG_WARNING, - "dangling volume. check volfile "); + ret = worm_set_state(this, _gf_false, loc, &reten_state, stbuf); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, "Error setting worm state"); + goto out; + } + } else if (valid & GF_SET_ATTR_ATIME) { + ret = worm_get_state(this, _gf_false, loc, &reten_state); + if (ret) { + op_errno = 0; + goto out; } + if (reten_state.retain) { + ret = syncop_stat(this, loc, &stpre, NULL, NULL); + if (ret) + goto out; + if (reten_state.ret_mode == 0) { + if (stbuf->ia_atime < stpre.ia_mtime) { + gf_log(this->name, GF_LOG_ERROR, + "Cannot set atime less than " + "the mtime for a WORM-Retained " + "file"); + goto out; + } + } else { + if (stbuf->ia_atime < stpre.ia_atime) { + gf_log(this->name, GF_LOG_ERROR, + "Cannot decrease the atime of a" + " WORM-Retained file in " + "Enterprise mode"); + goto out; + } + } + reten_state.ret_period = reten_state.ret_period + stbuf->ia_atime - + stpre.ia_atime; + ret = gf_worm_set_xattr(this, &reten_state, _gf_false, loc); + if (ret) { + goto out; + } + stbuf->ia_mtime = stpre.ia_mtime; + } + } + op_errno = 0; + +out: + if (op_errno) + STACK_UNWIND_STRICT(setattr, frame, -1, EROFS, NULL, NULL, NULL); + else + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, + xdata); + return 0; +} + +static int32_t +worm_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf, + int32_t valid, dict_t *xdata) +{ + gf_boolean_t rd_only = _gf_false; + worm_reten_state_t reten_state = { + 0, + }; + struct iatt stpre = { + 0, + }; + read_only_priv_t *priv = NULL; + int op_errno = EROFS; + int ret = -1; - priv = GF_CALLOC (1, sizeof (*priv), gf_read_only_mt_priv_t); - if (!priv) + priv = this->private; + GF_ASSERT(priv); + if (!priv->worm_file) { + op_errno = 0; + goto out; + } + + if (is_wormfile(this, _gf_true, fd)) { + op_errno = 0; + goto out; + } + if (valid & GF_SET_ATTR_MODE) { + rd_only = gf_worm_write_disabled(stbuf); + if (!rd_only) { + op_errno = 0; + goto out; + } + + ret = worm_set_state(this, _gf_true, fd, &reten_state, stbuf); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, "Error setting worm state"); + goto out; + } + } else if (valid & GF_SET_ATTR_ATIME) { + ret = worm_get_state(this, _gf_true, fd, &reten_state); + if (ret) { + op_errno = 0; + goto out; + } + if (reten_state.retain) { + ret = syncop_fstat(this, fd, &stpre, NULL, NULL); + if (ret) goto out; + if (reten_state.ret_mode == 0) { + if (stbuf->ia_atime < stpre.ia_mtime) { + gf_log(this->name, GF_LOG_ERROR, + "Cannot set atime less than " + "the mtime for a WORM-Retained " + "file"); + goto out; + } + } else { + if (stbuf->ia_atime < stpre.ia_atime) { + gf_log(this->name, GF_LOG_ERROR, + "Cannot decrease the atime of a" + " WORM-Retained file in " + "Enterprise mode"); + goto out; + } + } + reten_state.ret_period = reten_state.ret_period + stbuf->ia_atime - + stpre.ia_atime; + ret = gf_worm_set_xattr(this, &reten_state, _gf_true, fd); + if (ret) { + goto out; + } - GF_OPTION_INIT ("worm", priv->readonly_or_worm_enabled, bool, out); + stbuf->ia_mtime = stpre.ia_mtime; + } + } + op_errno = 0; - this->private = priv; - ret = 0; out: - return ret; + if (op_errno) + STACK_UNWIND_STRICT(fsetattr, frame, -1, op_errno, NULL, NULL, NULL); + else + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, + xdata); + return 0; } -int -reconfigure (xlator_t *this, dict_t *options) +static int32_t +worm_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, + int32_t count, off_t offset, uint32_t flags, struct iobref *iobref, + dict_t *xdata) { - read_only_priv_t *priv = NULL; - int ret = -1; - gf_boolean_t readonly_or_worm_enabled = _gf_false; + read_only_priv_t *priv = NULL; + int op_errno = EROFS; - priv = this->private; - GF_ASSERT (priv); + priv = this->private; + GF_ASSERT(priv); + if (!priv->worm_file || (frame->root->pid < 0)) { + op_errno = 0; + goto out; + } + if (is_wormfile(this, _gf_true, fd)) { + op_errno = 0; + goto out; + } + op_errno = gf_worm_state_transition(this, _gf_true, fd, GF_FOP_WRITE); - GF_OPTION_RECONF ("worm", readonly_or_worm_enabled, options, bool, out); +out: + if (op_errno) { + if (op_errno < 0) + op_errno = EROFS; + STACK_UNWIND_STRICT(writev, frame, -1, op_errno, NULL, NULL, NULL); + } else + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->writev, fd, vector, count, + offset, flags, iobref, xdata); + return 0; +} + +static int32_t +worm_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + int ret = 0; + read_only_priv_t *priv = NULL; + // In case of an error exit because fd can be NULL and this would + // cause an segfault when performing fsetxattr . We explicitly + // unwind to avoid future problems + if (op_ret < 0) { + goto out; + } + + priv = this->private; + GF_ASSERT(priv); + if (priv->worm_file) { + ret = fd_ctx_set(fd, this, 1); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, + "Failed to set the fd ctx " + "for gfid:%s . Worm feature may not work for the gfid", + uuid_utoa(inode->gfid)); + } + ret = worm_init_state(this, _gf_true, fd); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, "Error initializing state"); + } + } - priv->readonly_or_worm_enabled = readonly_or_worm_enabled; - ret = 0; out: - gf_log (this->name, GF_LOG_DEBUG, "returning %d", ret); - return ret; + STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, buf, + preparent, postparent, xdata); + return ret; } -void -fini (xlator_t *this) +static int32_t +worm_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) +{ + STACK_WIND(frame, worm_create_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd, + xdata); + return 0; +} + +static void +set_reten_mode(read_only_priv_t *priv, char *reten_mode) +{ + if (strcmp(reten_mode, "relax") == 0) + priv->reten_mode = 0; + else + priv->reten_mode = 1; +} + +int32_t +init(xlator_t *this) +{ + int ret = -1; + read_only_priv_t *priv = NULL; + char *reten_mode = NULL; + + if (!this->children || this->children->next) { + gf_log(this->name, GF_LOG_ERROR, + "translator not configured with exactly one child"); + return -1; + } + + if (!this->parents) { + gf_log(this->name, GF_LOG_WARNING, "dangling volume. check volfile "); + } + + this->local_pool = mem_pool_new(read_only_priv_t, 64); + if (!this->local_pool) { + ret = -1; + gf_log(this->name, GF_LOG_ERROR, + "failed to create read_only_priv_t's memory pool"); + goto out; + } + + priv = mem_get0(this->local_pool); + if (!priv) { + gf_log(this->name, GF_LOG_ERROR, "Error allocating priv"); + goto out; + } + + this->private = priv; + + GF_OPTION_INIT("worm", priv->readonly_or_worm_enabled, bool, out); + GF_OPTION_INIT("worm-file-level", priv->worm_file, bool, out); + GF_OPTION_INIT("default-retention-period", priv->reten_period, int64, out); + GF_OPTION_INIT("auto-commit-period", priv->com_period, int64, out); + GF_OPTION_INIT("retention-mode", reten_mode, str, out); + set_reten_mode(priv, reten_mode); + GF_OPTION_INIT("worm-files-deletable", priv->worm_files_deletable, bool, + out); + + ret = 0; +out: + return ret; +} + +int +reconfigure(xlator_t *this, dict_t *options) { - read_only_priv_t *priv = NULL; + read_only_priv_t *priv = NULL; + char *reten_mode = NULL; + int ret = -1; - priv = this->private; - if (!priv) - return; + priv = this->private; + GF_ASSERT(priv); - this->private = NULL; - GF_FREE (priv); + GF_OPTION_RECONF("worm", priv->readonly_or_worm_enabled, options, bool, + out); + GF_OPTION_RECONF("worm-file-level", priv->worm_file, options, bool, out); + GF_OPTION_RECONF("default-retention-period", priv->reten_period, options, + int64, out); + GF_OPTION_RECONF("retention-mode", reten_mode, options, str, out); + set_reten_mode(priv, reten_mode); + GF_OPTION_RECONF("auto-commit-period", priv->com_period, options, int64, + out); + GF_OPTION_RECONF("worm-files-deletable", priv->worm_files_deletable, + options, bool, out); + ret = 0; +out: + gf_log(this->name, GF_LOG_DEBUG, "returning %d", ret); + return ret; +} - return; +void +fini(xlator_t *this) +{ + read_only_priv_t *priv = NULL; + + priv = this->private; + if (!priv) + goto out; + mem_put(priv); + this->private = NULL; + mem_pool_destroy(this->local_pool); + this->local_pool = NULL; +out: + return; } struct xlator_fops fops = { - .open = worm_open, - - .unlink = ro_unlink, - .rmdir = ro_rmdir, - .rename = ro_rename, - .truncate = ro_truncate, - .removexattr = ro_removexattr, - .fsyncdir = ro_fsyncdir, - .xattrop = ro_xattrop, - .inodelk = ro_inodelk, - .finodelk = ro_finodelk, - .entrylk = ro_entrylk, - .fentrylk = ro_fentrylk, - .lk = ro_lk, + .open = worm_open, + .writev = worm_writev, + .setattr = worm_setattr, + .fsetattr = worm_fsetattr, + .rename = worm_rename, + .link = worm_link, + .unlink = worm_unlink, + .truncate = worm_truncate, + .ftruncate = worm_ftruncate, + .create = worm_create, + + .rmdir = ro_rmdir, + .removexattr = ro_removexattr, + .fsyncdir = ro_fsyncdir, + .xattrop = ro_xattrop, + .inodelk = ro_inodelk, + .finodelk = ro_finodelk, + .entrylk = ro_entrylk, + .fentrylk = ro_fentrylk, + .lk = ro_lk, }; -struct xlator_cbks cbks; +int32_t +worm_release(xlator_t *this, fd_t *fd) +{ + dict_t *dict = NULL; + int ret = -1; + dict = dict_new(); + uint64_t value = 0; + loc_t loc = { + 0, + }; + read_only_priv_t *priv = NULL; + priv = this->private; + + if (priv->worm_file) { + if (!dict) { + gf_log(this->name, GF_LOG_ERROR, "Error creating the dict"); + goto out; + } + + ret = fd_ctx_get(fd, this, &value); + if (ret) { + gf_log(this->name, GF_LOG_DEBUG, "Failed to get the fd ctx"); + } + if (!value) { + goto out; + } + + ret = dict_set_int8(dict, "trusted.worm_file", 1); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, + "Error in setting " + "the dict"); + goto out; + } + + loc.inode = inode_ref(fd->inode); + gf_uuid_copy(loc.gfid, fd->inode->gfid); + ret = syncop_setxattr(this, &loc, dict, 0, NULL, NULL); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, "Error setting xattr"); + goto out; + } + + gf_worm_state_transition(this, _gf_false, &loc, GF_FOP_WRITE); + } + +out: + loc_wipe(&loc); + if (dict) + dict_unref(dict); + return 0; +} + +struct xlator_cbks cbks = { + .release = worm_release, +}; struct volume_options options[] = { - { .key = {"worm"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "off", - .description = "When \"on\", makes a volume get write once read many " - " feature. It is turned \"off\" by default." - }, + {.key = {"worm"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + /*.validate_fn = validate_boolean,*/ + .op_version = {2}, + .flags = OPT_FLAG_SETTABLE, + .description = "When \"on\", makes a volume get write once read many " + " feature. It is turned \"off\" by default."}, + {.key = {"worm-file-level"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + /*.validate_fn = validate_boolean,*/ + .op_version = {GD_OP_VERSION_3_8_0}, + .flags = OPT_FLAG_SETTABLE, + .description = "When \"on\", activates the file level worm. " + "It is turned \"off\" by default."}, + {.key = {"worm-files-deletable"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "on", + /*.validate_fn = validate_boolean,*/ + .op_version = {GD_OP_VERSION_3_13_0}, + .flags = OPT_FLAG_SETTABLE, + .description = "When \"off\", doesn't allow the Worm files" + "to be deleted. It is turned \"on\" by default."}, + {.key = {"default-retention-period"}, + .type = GF_OPTION_TYPE_TIME, + .default_value = "120", + /*.validate_fn = validate_worm_period,*/ + .op_version = {GD_OP_VERSION_3_8_0}, + .flags = OPT_FLAG_SETTABLE, + .description = "The default retention period for the files."}, + {.key = {"retention-mode"}, + .type = GF_OPTION_TYPE_STR, + .default_value = "relax", + /*.validate_fn = validate_reten_mode,*/ + .op_version = {GD_OP_VERSION_3_8_0}, + .flags = OPT_FLAG_SETTABLE, + .description = "The mode of retention (relax/enterprise). " + "It is relax by default."}, + {.key = {"auto-commit-period"}, + .type = GF_OPTION_TYPE_TIME, + .default_value = "180", + /*.validate_fn = validate_worm_period,*/ + .op_version = {GD_OP_VERSION_3_8_0}, + .flags = OPT_FLAG_SETTABLE, + .description = "Auto commit period for the files."}, + {.key = {NULL}}, }; +xlator_api_t xlator_api = { + .init = init, + .fini = fini, + .reconfigure = reconfigure, + .mem_acct_init = mem_acct_init, + .op_version = {1}, /* Present from the initial version */ + .fops = &fops, + .cbks = &cbks, + .options = options, + .identifier = "worm", + .category = GF_TECH_PREVIEW, +}; diff --git a/xlators/features/sdfs/Makefile.am b/xlators/features/sdfs/Makefile.am new file mode 100644 index 00000000000..a985f42a877 --- /dev/null +++ b/xlators/features/sdfs/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/features/sdfs/src/Makefile.am b/xlators/features/sdfs/src/Makefile.am new file mode 100644 index 00000000000..6118d46ad22 --- /dev/null +++ b/xlators/features/sdfs/src/Makefile.am @@ -0,0 +1,19 @@ +if WITH_SERVER +xlator_LTLIBRARIES = sdfs.la +endif +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features + +sdfs_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) + +sdfs_la_SOURCES = sdfs.c +sdfs_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +noinst_HEADERS = sdfs.h sdfs-messages.h $(top_builddir)/xlators/lib/src/libxlator.h + +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ + -I$(top_srcdir)/xlators/lib/src \ + -I$(top_srcdir)/rpc/xdr/src/ -I$(top_builddir)/rpc/xdr/src/ + +AM_CFLAGS = -Wall -fno-strict-aliasing $(GF_CFLAGS) + +CLEANFILES = diff --git a/xlators/features/sdfs/src/sdfs-messages.h b/xlators/features/sdfs/src/sdfs-messages.h new file mode 100644 index 00000000000..3053efa8935 --- /dev/null +++ b/xlators/features/sdfs/src/sdfs-messages.h @@ -0,0 +1,67 @@ +/* + Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. + */ + +#ifndef _DFS_MESSAGES_H_ +#define _DFS_MESSAGES_H_ + +#include <glusterfs/glfs-message-id.h> + +/* file bit-rot-bitd-messages.h + * brief SDFS log-message IDs and their descriptions + */ + +/* NOTE: Rules for message additions + * 1) Each instance of a message is _better_ left with a unique message ID, even + * if the message format is the same. Reasoning is that, if the message + * format needs to change in one instance, the other instances are not + * impacted or the new change does not change the ID of the instance being + * modified. + * 2) Addition of a message, + * - Should increment the GLFS_NUM_MESSAGES + * - Append to the list of messages defined, towards the end + * - Retain macro naming as glfs_msg_X (for redability across developers) + * NOTE: Rules for message format modifications + * 3) Check acorss the code if the message ID macro in question is reused + * anywhere. If reused then then the modifications should ensure correctness + * everywhere, or needs a new message ID as (1) above was not adhered to. If + * not used anywhere, proceed with the required modification. + * NOTE: Rules for message deletion + * 4) Check (3) and if used anywhere else, then cannot be deleted. If not used + * anywhere, then can be deleted, but will leave a hole by design, as + * addition rules specify modification to the end of the list and not filling + * holes. + */ + +#define GLFS_SDFS_BASE GLFS_MSGID_COMP_SDFS +#define GLFS_SDFS_NUM_MESSAGES 2 +#define GLFS_MSGID_END (GLFS_SDFS_BASE + GLFS_SDFS_NUM_MESSAGES + 1) +/* Messaged with message IDs */ +#define glfs_msg_start_x GLFS_DFS_BASE, "Invalid: Start of messages" +/*------------*/ + +#define SDFS_MSG_ENTRYLK_ERROR (GLFS_SDFS_BASE + 1) +/*! + * @messageid + * @diagnosis + * @recommendedaction + * + */ + +#define SDFS_MSG_MKDIR_ERROR (GLFS_SDFS_BASE + 2) +/*! + * @messageid + * @diagnosis + * @recommendedaction + * + */ +/*------------*/ + +#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages" +#endif /* !_SDFS_MESSAGES_H_ */ diff --git a/xlators/features/sdfs/src/sdfs.c b/xlators/features/sdfs/src/sdfs.c new file mode 100644 index 00000000000..aaf13f0852e --- /dev/null +++ b/xlators/features/sdfs/src/sdfs.c @@ -0,0 +1,1479 @@ +/* + Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#include <libgen.h> +#include "sdfs.h" + +static int +sdfs_frame_return(call_frame_t *frame) +{ + sdfs_local_t *local = NULL; + + if (!frame) + return -1; + + local = frame->local; + + return GF_ATOMIC_DEC(local->call_cnt); +} + +static void +sdfs_lock_free(sdfs_entry_lock_t *entrylk) +{ + if (entrylk == NULL) + goto out; + + loc_wipe(&entrylk->parent_loc); + GF_FREE(entrylk->basename); + +out: + return; +} + +static void +sdfs_lock_array_free(sdfs_lock_t *lock) +{ + sdfs_entry_lock_t *entrylk = NULL; + int i = 0; + + if (lock == NULL) + goto out; + + for (i = 0; i < lock->lock_count; i++) { + entrylk = &lock->entrylk[i]; + sdfs_lock_free(entrylk); + } + +out: + return; +} + +static void +sdfs_local_cleanup(sdfs_local_t *local) +{ + if (!local) + return; + + loc_wipe(&local->loc); + loc_wipe(&local->parent_loc); + + if (local->stub) { + call_stub_destroy(local->stub); + local->stub = NULL; + } + + sdfs_lock_array_free(local->lock); + GF_FREE(local->lock); + + mem_put(local); +} + +static int +sdfs_build_parent_loc(loc_t *parent, loc_t *child) +{ + int ret = -1; + char *path = NULL; + + if (!child->parent) { + goto out; + } + parent->inode = inode_ref(child->parent); + path = gf_strdup(child->path); + if (!path) { + ret = -ENOMEM; + goto out; + } + + parent->path = dirname(path); + if (!parent->path) { + goto out; + } + + gf_uuid_copy(parent->gfid, child->pargfid); + return 0; + +out: + GF_FREE(path); + return ret; +} + +static sdfs_local_t * +sdfs_local_init(call_frame_t *frame, xlator_t *this) +{ + sdfs_local_t *local = NULL; + + local = mem_get0(this->local_pool); + if (!local) + goto out; + + frame->local = local; +out: + return local; +} + +static int +sdfs_get_new_frame_common(call_frame_t *frame, call_frame_t **new_frame) +{ + int ret = -1; + sdfs_local_t *local = NULL; + client_t *client = NULL; + + *new_frame = copy_frame(frame); + if (!*new_frame) { + goto err; + } + + client = frame->root->client; + gf_client_ref(client); + (*new_frame)->root->client = client; + + local = sdfs_local_init(*new_frame, THIS); + if (!local) { + goto err; + } + + local->main_frame = frame; + /*Set unique lk-owner for the fop*/ + set_lk_owner_from_ptr(&(*new_frame)->root->lk_owner, (*new_frame)->root); + + ret = 0; +err: + if ((ret == -1) && (*new_frame)) { + SDFS_STACK_DESTROY((*new_frame)); + *new_frame = NULL; + } + + return ret; +} + +static int +sdfs_get_new_frame(call_frame_t *frame, loc_t *loc, call_frame_t **new_frame) +{ + int ret = -1; + sdfs_local_t *local = NULL; + + ret = sdfs_get_new_frame_common(frame, new_frame); + if (ret < 0) { + goto err; + } + + local = (*new_frame)->local; + + ret = sdfs_build_parent_loc(&local->parent_loc, loc); + if (ret) { + goto err; + } + + ret = loc_copy(&local->loc, loc); + if (ret == -1) { + goto err; + } + + ret = 0; +err: + if (ret && (*new_frame)) { + SDFS_STACK_DESTROY((*new_frame)); + *new_frame = NULL; + ret = -1; + } + + return ret; +} + +static int +sdfs_get_new_frame_readdirp(call_frame_t *frame, fd_t *fd, + call_frame_t **new_frame) +{ + int ret = -1; + sdfs_local_t *local = NULL; + + ret = sdfs_get_new_frame_common(frame, new_frame); + if (ret < 0) { + goto err; + } + + local = (*new_frame)->local; + local->parent_loc.inode = inode_ref(fd->inode); + gf_uuid_copy(local->parent_loc.gfid, fd->inode->gfid); + + ret = 0; +err: + return ret; +} + +int +sdfs_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + sdfs_local_t *local = NULL; + call_stub_t *stub = NULL; + + local = frame->local; + + local->op_ret = op_ret; + local->op_errno = op_errno; + + if (local->stub) { + stub = local->stub; + local->stub = NULL; + call_resume(stub); + } else { + if (op_ret < 0) + gf_msg(this->name, GF_LOG_ERROR, 0, SDFS_MSG_ENTRYLK_ERROR, + "Unlocking entry lock failed for %s", local->loc.name); + + SDFS_STACK_DESTROY(frame); + } + + return 0; +} + +int +sdfs_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *stbuf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + sdfs_local_t *local = NULL; + + local = frame->local; + + STACK_UNWIND_STRICT(mkdir, local->main_frame, op_ret, op_errno, inode, + stbuf, preparent, postparent, xdata); + + local->main_frame = NULL; + STACK_WIND(frame, sdfs_entrylk_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->entrylk, this->name, &local->parent_loc, + local->loc.name, ENTRYLK_UNLOCK, ENTRYLK_WRLCK, xdata); + return 0; +} + +int +sdfs_mkdir_helper(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + mode_t umask, dict_t *xdata) +{ + sdfs_local_t *local = NULL; + char gfid[GF_UUID_BUF_SIZE] = {0}; + int op_errno = -1; + + local = frame->local; + + gf_uuid_unparse(loc->pargfid, gfid); + + if (local->op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, SDFS_MSG_ENTRYLK_ERROR, + "Acquiring entry lock failed for directory %s " + "with parent gfid %s", + local->loc.name, gfid); + op_errno = local->op_errno; + goto err; + } + + STACK_WIND(frame, sdfs_mkdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata); + + return 0; +err: + STACK_UNWIND_STRICT(mkdir, local->main_frame, -1, op_errno, NULL, NULL, + NULL, NULL, NULL); + + local->main_frame = NULL; + SDFS_STACK_DESTROY(frame); + return 0; +} + +int +sdfs_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + mode_t umask, dict_t *xdata) +{ + sdfs_local_t *local = NULL; + call_frame_t *new_frame = NULL; + call_stub_t *stub = NULL; + int op_errno = 0; + + if (-1 == sdfs_get_new_frame(frame, loc, &new_frame)) { + op_errno = ENOMEM; + goto err; + } + + stub = fop_mkdir_stub(new_frame, sdfs_mkdir_helper, loc, mode, umask, + xdata); + if (!stub) { + op_errno = ENOMEM; + goto err; + } + + local = new_frame->local; + local->stub = stub; + + STACK_WIND(new_frame, sdfs_entrylk_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->entrylk, this->name, &local->parent_loc, + local->loc.name, ENTRYLK_LOCK, ENTRYLK_WRLCK, xdata); + + return 0; +err: + STACK_UNWIND_STRICT(mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL, + NULL); + + if (new_frame) + SDFS_STACK_DESTROY(new_frame); + + return 0; +} + +int +sdfs_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + sdfs_local_t *local = NULL; + + local = frame->local; + + STACK_UNWIND_STRICT(rmdir, local->main_frame, op_ret, op_errno, preparent, + postparent, xdata); + + local->main_frame = NULL; + STACK_WIND(frame, sdfs_entrylk_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->entrylk, this->name, &local->parent_loc, + local->loc.name, ENTRYLK_UNLOCK, ENTRYLK_WRLCK, xdata); + return 0; +} + +int +sdfs_rmdir_helper(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + dict_t *xdata) +{ + sdfs_local_t *local = NULL; + char gfid[GF_UUID_BUF_SIZE] = {0}; + + local = frame->local; + + gf_uuid_unparse(loc->pargfid, gfid); + + if (local->op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, SDFS_MSG_ENTRYLK_ERROR, + "Acquiring entry lock failed for directory %s " + "with parent gfid %s", + local->loc.name, gfid); + goto err; + } + + STACK_WIND(frame, sdfs_rmdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rmdir, loc, flags, xdata); + + return 0; +err: + STACK_UNWIND_STRICT(rmdir, local->main_frame, -1, local->op_errno, NULL, + NULL, NULL); + + local->main_frame = NULL; + SDFS_STACK_DESTROY(frame); + return 0; +} + +int +sdfs_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + dict_t *xdata) +{ + sdfs_local_t *local = NULL; + call_frame_t *new_frame = NULL; + call_stub_t *stub = NULL; + int op_errno = 0; + + if (-1 == sdfs_get_new_frame(frame, loc, &new_frame)) { + op_errno = ENOMEM; + goto err; + } + + stub = fop_rmdir_stub(new_frame, sdfs_rmdir_helper, loc, flags, xdata); + if (!stub) { + op_errno = ENOMEM; + goto err; + } + + local = new_frame->local; + local->stub = stub; + + STACK_WIND(new_frame, sdfs_entrylk_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->entrylk, this->name, &local->parent_loc, + local->loc.name, ENTRYLK_LOCK, ENTRYLK_WRLCK, xdata); + + return 0; +err: + STACK_UNWIND_STRICT(rmdir, frame, -1, op_errno, NULL, NULL, NULL); + + if (new_frame) + SDFS_STACK_DESTROY(new_frame); + + return 0; +} + +int +sdfs_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, + struct iatt *stbuf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + sdfs_local_t *local = NULL; + + local = frame->local; + + STACK_UNWIND_STRICT(create, local->main_frame, op_ret, op_errno, fd, inode, + stbuf, preparent, postparent, xdata); + + local->main_frame = NULL; + STACK_WIND(frame, sdfs_entrylk_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->entrylk, this->name, &local->parent_loc, + local->loc.name, ENTRYLK_UNLOCK, ENTRYLK_WRLCK, xdata); + return 0; +} + +int +sdfs_create_helper(call_frame_t *frame, xlator_t *this, loc_t *loc, + int32_t flags, mode_t mode, mode_t umask, fd_t *fd, + dict_t *xdata) +{ + sdfs_local_t *local = NULL; + char gfid[GF_UUID_BUF_SIZE] = {0}; + + local = frame->local; + + gf_uuid_unparse(loc->pargfid, gfid); + + if (local->op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, SDFS_MSG_ENTRYLK_ERROR, + "Acquiring entry lock failed for directory %s " + "with parent gfid %s", + local->loc.name, gfid); + goto err; + } + + STACK_WIND(frame, sdfs_create_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd, + xdata); + + return 0; +err: + STACK_UNWIND_STRICT(create, local->main_frame, -1, local->op_errno, NULL, + NULL, NULL, NULL, NULL, NULL); + + local->main_frame = NULL; + SDFS_STACK_DESTROY(frame); + return 0; +} + +int +sdfs_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) +{ + sdfs_local_t *local = NULL; + call_frame_t *new_frame = NULL; + call_stub_t *stub = NULL; + int op_errno = 0; + + if (-1 == sdfs_get_new_frame(frame, loc, &new_frame)) { + op_errno = ENOMEM; + goto err; + } + + stub = fop_create_stub(new_frame, sdfs_create_helper, loc, flags, mode, + umask, fd, xdata); + if (!stub) { + op_errno = ENOMEM; + goto err; + } + + local = new_frame->local; + local->stub = stub; + + STACK_WIND(new_frame, sdfs_entrylk_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->entrylk, this->name, &local->parent_loc, + local->loc.name, ENTRYLK_LOCK, ENTRYLK_WRLCK, xdata); + + return 0; +err: + STACK_UNWIND_STRICT(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, + NULL, NULL); + + if (new_frame) + SDFS_STACK_DESTROY(new_frame); + + return 0; +} + +int +sdfs_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + sdfs_local_t *local = NULL; + + local = frame->local; + + STACK_UNWIND_STRICT(unlink, local->main_frame, op_ret, op_errno, preparent, + postparent, xdata); + + local->main_frame = NULL; + STACK_WIND(frame, sdfs_entrylk_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->entrylk, this->name, &local->parent_loc, + local->loc.name, ENTRYLK_UNLOCK, ENTRYLK_WRLCK, xdata); + return 0; +} + +int +sdfs_unlink_helper(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + dict_t *xdata) +{ + sdfs_local_t *local = NULL; + char gfid[GF_UUID_BUF_SIZE] = {0}; + + local = frame->local; + + gf_uuid_unparse(loc->pargfid, gfid); + + if (local->op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, SDFS_MSG_ENTRYLK_ERROR, + "Acquiring entry lock failed for directory %s " + "with parent gfid %s", + local->loc.name, gfid); + goto err; + } + + STACK_WIND(frame, sdfs_unlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, loc, flags, xdata); + + return 0; +err: + STACK_UNWIND_STRICT(unlink, local->main_frame, -1, local->op_errno, NULL, + NULL, NULL); + + local->main_frame = NULL; + SDFS_STACK_DESTROY(frame); + return 0; +} + +int +sdfs_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + dict_t *xdata) +{ + sdfs_local_t *local = NULL; + call_frame_t *new_frame = NULL; + call_stub_t *stub = NULL; + int op_errno = 0; + + if (-1 == sdfs_get_new_frame(frame, loc, &new_frame)) { + op_errno = ENOMEM; + goto err; + } + + stub = fop_unlink_stub(new_frame, sdfs_unlink_helper, loc, flags, xdata); + if (!stub) { + op_errno = ENOMEM; + goto err; + } + + local = new_frame->local; + local->stub = stub; + + STACK_WIND(new_frame, sdfs_entrylk_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->entrylk, this->name, &local->parent_loc, + local->loc.name, ENTRYLK_LOCK, ENTRYLK_WRLCK, xdata); + + return 0; +err: + STACK_UNWIND_STRICT(unlink, frame, -1, op_errno, NULL, NULL, NULL); + + if (new_frame) + SDFS_STACK_DESTROY(new_frame); + + return 0; +} + +int +sdfs_symlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *stbuf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + sdfs_local_t *local = NULL; + + local = frame->local; + + STACK_UNWIND_STRICT(link, local->main_frame, op_ret, op_errno, inode, stbuf, + preparent, postparent, xdata); + + local->main_frame = NULL; + STACK_WIND(frame, sdfs_entrylk_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->entrylk, this->name, &local->parent_loc, + local->loc.name, ENTRYLK_UNLOCK, ENTRYLK_WRLCK, xdata); + return 0; +} + +int +sdfs_symlink_helper(call_frame_t *frame, xlator_t *this, const char *linkname, + loc_t *loc, mode_t umask, dict_t *xdata) +{ + sdfs_local_t *local = NULL; + char gfid[GF_UUID_BUF_SIZE] = {0}; + + local = frame->local; + + gf_uuid_unparse(loc->pargfid, gfid); + + if (local->op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, SDFS_MSG_ENTRYLK_ERROR, + "Acquiring entry lock failed for directory %s " + "with parent gfid %s", + local->loc.name, gfid); + goto err; + } + + STACK_WIND(frame, sdfs_symlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->symlink, linkname, loc, umask, xdata); + + return 0; +err: + STACK_UNWIND_STRICT(link, local->main_frame, -1, local->op_errno, NULL, + NULL, NULL, NULL, NULL); + + local->main_frame = NULL; + SDFS_STACK_DESTROY(frame); + return 0; +} + +int +sdfs_symlink(call_frame_t *frame, xlator_t *this, const char *linkname, + loc_t *loc, mode_t umask, dict_t *xdata) +{ + sdfs_local_t *local = NULL; + call_frame_t *new_frame = NULL; + call_stub_t *stub = NULL; + int op_errno = 0; + + if (-1 == sdfs_get_new_frame(frame, loc, &new_frame)) { + op_errno = ENOMEM; + goto err; + } + + stub = fop_symlink_stub(new_frame, sdfs_symlink_helper, linkname, loc, + umask, xdata); + if (!stub) { + op_errno = ENOMEM; + goto err; + } + + local = new_frame->local; + local->stub = stub; + + STACK_WIND(new_frame, sdfs_entrylk_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->entrylk, this->name, &local->parent_loc, + local->loc.name, ENTRYLK_LOCK, ENTRYLK_WRLCK, xdata); + + return 0; +err: + STACK_UNWIND_STRICT(link, frame, -1, op_errno, NULL, NULL, NULL, NULL, + NULL); + + if (new_frame) + SDFS_STACK_DESTROY(new_frame); + + return 0; +} + +int +sdfs_common_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + sdfs_local_t *local = NULL; + int this_call_cnt = 0; + int lk_index = 0; + sdfs_lock_t *locks = NULL; + call_stub_t *stub = NULL; + + local = frame->local; + locks = local->lock; + lk_index = (long)cookie; + + if (op_ret < 0) { + local->op_ret = op_ret; + local->op_errno = op_errno; + } else { + locks->entrylk->locked[lk_index] = _gf_true; + } + + this_call_cnt = sdfs_frame_return(frame); + if (this_call_cnt > 0) { + gf_log(this->name, GF_LOG_DEBUG, + "As there are more callcnt (%d) returning without WIND", + this_call_cnt); + return 0; + } + + if (local->stub) { + stub = local->stub; + local->stub = NULL; + call_resume(stub); + } else { + if (local->op_ret < 0) + gf_msg(this->name, GF_LOG_ERROR, 0, SDFS_MSG_ENTRYLK_ERROR, + "unlocking entry lock failed "); + SDFS_STACK_DESTROY(frame); + } + + return 0; +} + +int +sdfs_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, inode_t *inode, struct iatt *stbuf, + struct iatt *preparent, struct iatt *postparent, dict_t *xdata) +{ + sdfs_local_t *local = NULL; + sdfs_lock_t *lock = NULL; + int i = 0; + int lock_count = 0; + + local = frame->local; + lock = local->lock; + + STACK_UNWIND_STRICT(link, local->main_frame, op_ret, op_errno, inode, stbuf, + preparent, postparent, xdata); + + local->main_frame = NULL; + lock_count = lock->lock_count; + for (i = 0; i < lock_count; i++) { + STACK_WIND_COOKIE(frame, sdfs_common_entrylk_cbk, (void *)(long)i, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->entrylk, + this->name, &lock->entrylk[i].parent_loc, + lock->entrylk[i].basename, ENTRYLK_UNLOCK, + ENTRYLK_WRLCK, xdata); + } + + return 0; +} + +int +sdfs_link_helper(call_frame_t *frame, xlator_t *this, loc_t *oldloc, + loc_t *newloc, dict_t *xdata) +{ + sdfs_local_t *local = NULL; + sdfs_lock_t *locks = NULL; + gf_boolean_t stack_destroy = _gf_true; + int lock_count = 0; + int i = 0; + + local = frame->local; + locks = local->lock; + + if (local->op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, SDFS_MSG_ENTRYLK_ERROR, + "Acquiring entry lock failed"); + goto err; + } + + STACK_WIND(frame, sdfs_link_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata); + + return 0; +err: + STACK_UNWIND_STRICT(link, local->main_frame, -1, local->op_errno, NULL, + NULL, NULL, NULL, NULL); + + local->main_frame = NULL; + for (i = 0; i < locks->lock_count && locks->entrylk->locked[i]; i++) { + lock_count++; + } + GF_ATOMIC_INIT(local->call_cnt, lock_count); + + for (i = 0; i < lock_count; i++) { + if (!locks->entrylk->locked[i]) { + lock_count++; + continue; + } + + stack_destroy = _gf_false; + STACK_WIND(frame, sdfs_common_entrylk_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->entrylk, this->name, + &locks->entrylk[i].parent_loc, locks->entrylk[i].basename, + ENTRYLK_UNLOCK, ENTRYLK_WRLCK, xdata); + } + + if (stack_destroy) + SDFS_STACK_DESTROY(frame); + + return 0; +} + +static int +sdfs_init_entry_lock(sdfs_entry_lock_t *lock, loc_t *loc) +{ + int ret = 0; + + ret = sdfs_build_parent_loc(&lock->parent_loc, loc); + if (ret) + return -1; + + lock->basename = gf_strdup(loc->name); + if (!lock->basename) + return -1; + + return 0; +} + +int +sdfs_entry_lock_cmp(const void *l1, const void *l2) +{ + const sdfs_entry_lock_t *r1 = l1; + const sdfs_entry_lock_t *r2 = l2; + int ret = 0; + uuid_t gfid1 = {0}; + uuid_t gfid2 = {0}; + + loc_gfid((loc_t *)&r1->parent_loc, gfid1); + loc_gfid((loc_t *)&r2->parent_loc, gfid2); + ret = gf_uuid_compare(gfid1, gfid2); + /*Entrylks with NULL basename are the 'smallest'*/ + if (ret == 0) { + if (!r1->basename) + return -1; + if (!r2->basename) + return 1; + ret = strcmp(r1->basename, r2->basename); + } + + if (ret <= 0) + return -1; + else + return 1; +} + +int +sdfs_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) +{ + sdfs_local_t *local = NULL; + call_frame_t *new_frame = NULL; + call_stub_t *stub = NULL; + sdfs_lock_t *lock = NULL; + client_t *client = NULL; + int ret = 0; + int op_errno = ENOMEM; + + new_frame = copy_frame(frame); + if (!new_frame) { + op_errno = ENOMEM; + goto err; + } + /*Set unique lk-owner for the fop*/ + set_lk_owner_from_ptr(&new_frame->root->lk_owner, new_frame->root); + + gf_client_ref(client); + new_frame->root->client = client; + local = sdfs_local_init(new_frame, this); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + local->main_frame = frame; + + lock = GF_CALLOC(1, sizeof(*lock), gf_common_mt_char); + if (!lock) + goto err; + + local->lock = lock; + + ret = sdfs_init_entry_lock(&lock->entrylk[0], newloc); + if (ret) + goto err; + + ++lock->lock_count; + + local->lock = lock; + GF_ATOMIC_INIT(local->call_cnt, lock->lock_count); + + ret = loc_copy(&local->loc, newloc); + if (ret == -1) { + op_errno = ENOMEM; + goto err; + } + + stub = fop_link_stub(new_frame, sdfs_link_helper, oldloc, newloc, xdata); + if (!stub) { + op_errno = ENOMEM; + goto err; + } + + local->stub = stub; + + STACK_WIND_COOKIE(new_frame, sdfs_common_entrylk_cbk, 0, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->entrylk, this->name, + &lock->entrylk[0].parent_loc, lock->entrylk[0].basename, + ENTRYLK_LOCK, ENTRYLK_WRLCK, xdata); + + return 0; +err: + + STACK_UNWIND_STRICT(link, frame, -1, op_errno, NULL, NULL, NULL, NULL, + NULL); + + if (new_frame) + SDFS_STACK_DESTROY(new_frame); + + return 0; +} + +int +sdfs_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *stbuf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + sdfs_local_t *local = NULL; + + local = frame->local; + + STACK_UNWIND_STRICT(mknod, local->main_frame, op_ret, op_errno, inode, + stbuf, preparent, postparent, xdata); + + local->main_frame = NULL; + STACK_WIND(frame, sdfs_entrylk_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->entrylk, this->name, &local->parent_loc, + local->loc.name, ENTRYLK_UNLOCK, ENTRYLK_WRLCK, xdata); + return 0; +} + +int +sdfs_mknod_helper(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + dev_t rdev, mode_t umask, dict_t *xdata) +{ + sdfs_local_t *local = NULL; + char gfid[GF_UUID_BUF_SIZE] = {0}; + + local = frame->local; + + gf_uuid_unparse(loc->pargfid, gfid); + + if (local->op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, SDFS_MSG_ENTRYLK_ERROR, + "Acquiring entry lock failed for directory %s " + "with parent gfid %s", + local->loc.name, gfid); + goto err; + } + + STACK_WIND(frame, sdfs_mknod_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata); + + return 0; +err: + STACK_UNWIND_STRICT(mknod, local->main_frame, -1, local->op_errno, NULL, + NULL, NULL, NULL, NULL); + + local->main_frame = NULL; + SDFS_STACK_DESTROY(frame); + return 0; +} + +int +sdfs_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + dev_t rdev, mode_t umask, dict_t *xdata) +{ + sdfs_local_t *local = NULL; + call_frame_t *new_frame = NULL; + call_stub_t *stub = NULL; + int op_errno = 0; + + if (-1 == sdfs_get_new_frame(frame, loc, &new_frame)) { + op_errno = ENOMEM; + goto err; + } + + stub = fop_mknod_stub(new_frame, sdfs_mknod_helper, loc, mode, rdev, umask, + xdata); + if (!stub) { + op_errno = ENOMEM; + goto err; + } + + local = new_frame->local; + local->stub = stub; + + STACK_WIND(new_frame, sdfs_entrylk_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->entrylk, this->name, &local->parent_loc, + local->loc.name, ENTRYLK_LOCK, ENTRYLK_WRLCK, xdata); + + return 0; +err: + STACK_UNWIND_STRICT(mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL, + NULL); + + if (new_frame) + SDFS_STACK_DESTROY(new_frame); + + return 0; +} + +int +sdfs_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *stbuf, + struct iatt *preoldparent, struct iatt *postoldparent, + struct iatt *prenewparent, struct iatt *postnewparent, + dict_t *xdata) +{ + sdfs_local_t *local = NULL; + sdfs_lock_t *lock = NULL; + int i = 0; + int call_cnt = 0; + + local = frame->local; + lock = local->lock; + GF_ATOMIC_INIT(local->call_cnt, lock->lock_count); + + STACK_UNWIND_STRICT(rename, local->main_frame, op_ret, op_errno, stbuf, + preoldparent, postoldparent, prenewparent, + postnewparent, xdata); + + local->main_frame = NULL; + call_cnt = GF_ATOMIC_GET(local->call_cnt); + + for (i = 0; i < call_cnt; i++) { + STACK_WIND_COOKIE(frame, sdfs_common_entrylk_cbk, (void *)(long)i, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->entrylk, + this->name, &lock->entrylk[i].parent_loc, + lock->entrylk[i].basename, ENTRYLK_UNLOCK, + ENTRYLK_WRLCK, xdata); + } + + return 0; +} + +int +sdfs_rename_helper(call_frame_t *frame, xlator_t *this, loc_t *oldloc, + loc_t *newloc, dict_t *xdata) +{ + sdfs_local_t *local = NULL; + sdfs_lock_t *lock = NULL; + gf_boolean_t stack_destroy = _gf_true; + int lock_count = 0; + int i = 0; + + local = frame->local; + lock = local->lock; + + if (local->op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, SDFS_MSG_ENTRYLK_ERROR, + "Acquiring entry lock failed "); + goto err; + } + + STACK_WIND(frame, sdfs_rename_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); + + return 0; + +err: + STACK_UNWIND_STRICT(rename, local->main_frame, -1, local->op_errno, NULL, + NULL, NULL, NULL, NULL, NULL); + + local->main_frame = NULL; + for (i = 0; i < lock->lock_count && lock->entrylk->locked[i]; i++) { + lock_count++; + } + GF_ATOMIC_INIT(local->call_cnt, lock_count); + + for (i = 0; i < lock_count; i++) { + if (!lock->entrylk->locked[i]) { + lock_count++; + continue; + } + stack_destroy = _gf_false; + STACK_WIND(frame, sdfs_common_entrylk_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->entrylk, this->name, + &lock->entrylk[i].parent_loc, lock->entrylk[i].basename, + ENTRYLK_UNLOCK, ENTRYLK_WRLCK, xdata); + } + + if (stack_destroy) + SDFS_STACK_DESTROY(frame); + + return 0; +} + +int +sdfs_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) +{ + sdfs_local_t *local = NULL; + sdfs_lock_t *lock = NULL; + call_frame_t *new_frame = NULL; + call_stub_t *stub = NULL; + client_t *client = NULL; + int ret = 0; + int op_errno = ENOMEM; + int i = 0; + int call_cnt = 0; + + new_frame = copy_frame(frame); + if (!new_frame) { + op_errno = ENOMEM; + goto err; + } + /*Set unique lk-owner for the fop*/ + set_lk_owner_from_ptr(&new_frame->root->lk_owner, new_frame->root); + + gf_client_ref(client); + new_frame->root->client = client; + local = sdfs_local_init(new_frame, this); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + local->main_frame = frame; + + lock = GF_CALLOC(1, sizeof(*lock), gf_common_mt_char); + if (!lock) + goto err; + + local->lock = lock; + + ret = sdfs_init_entry_lock(&lock->entrylk[0], oldloc); + if (ret) + goto err; + lock->entrylk->locked[0] = _gf_false; + + ++lock->lock_count; + + ret = sdfs_init_entry_lock(&lock->entrylk[1], newloc); + if (ret) + goto err; + lock->entrylk->locked[1] = _gf_false; + + ++lock->lock_count; + + qsort(lock->entrylk, lock->lock_count, sizeof(*lock->entrylk), + sdfs_entry_lock_cmp); + + local->lock = lock; + GF_ATOMIC_INIT(local->call_cnt, lock->lock_count); + + stub = fop_rename_stub(new_frame, sdfs_rename_helper, oldloc, newloc, + xdata); + if (!stub) { + op_errno = ENOMEM; + goto err; + } + + local->stub = stub; + call_cnt = GF_ATOMIC_GET(local->call_cnt); + for (i = 0; i < call_cnt; i++) { + STACK_WIND_COOKIE(new_frame, sdfs_common_entrylk_cbk, (void *)(long)i, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->entrylk, + this->name, &lock->entrylk[i].parent_loc, + lock->entrylk[i].basename, ENTRYLK_LOCK, + ENTRYLK_WRLCK, xdata); + } + + return 0; +err: + + STACK_UNWIND_STRICT(rename, frame, -1, op_errno, NULL, NULL, NULL, NULL, + NULL, NULL); + + if (new_frame) + SDFS_STACK_DESTROY(new_frame); + + return 0; +} + +int +sdfs_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *stbuf, dict_t *xdata, struct iatt *postparent) +{ + sdfs_local_t *local = NULL; + + local = frame->local; + + if (!local->loc.parent) { + sdfs_local_cleanup(local); + frame->local = NULL; + STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, stbuf, + xdata, postparent); + return 0; + } + + STACK_UNWIND_STRICT(lookup, local->main_frame, op_ret, op_errno, inode, + stbuf, xdata, postparent); + + local->main_frame = NULL; + STACK_WIND(frame, sdfs_entrylk_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->entrylk, this->name, &local->parent_loc, + local->loc.name, ENTRYLK_UNLOCK, ENTRYLK_RDLCK, xdata); + return 0; +} + +int +sdfs_lookup_helper(call_frame_t *frame, xlator_t *this, loc_t *loc, + dict_t *xdata) +{ + sdfs_local_t *local = NULL; + char gfid[GF_UUID_BUF_SIZE] = {0}; + + local = frame->local; + + gf_uuid_unparse(loc->pargfid, gfid); + + if (local->op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, SDFS_MSG_ENTRYLK_ERROR, + "Acquiring entry lock failed for directory %s " + "with parent gfid %s", + local->loc.name, gfid); + goto err; + } + + STACK_WIND(frame, sdfs_lookup_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, loc, xdata); + + return 0; +err: + STACK_UNWIND_STRICT(lookup, local->main_frame, -1, local->op_errno, NULL, + NULL, NULL, NULL); + local->main_frame = NULL; + + SDFS_STACK_DESTROY(frame); + return 0; +} + +int +sdfs_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +{ + sdfs_local_t *local = NULL; + call_frame_t *new_frame = NULL; + call_stub_t *stub = NULL; + int op_errno = 0; + + if (!loc->parent) { + local = sdfs_local_init(frame, this); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, loc, xdata); + return 0; + } + + if (-1 == sdfs_get_new_frame(frame, loc, &new_frame)) { + op_errno = ENOMEM; + goto err; + } + + stub = fop_lookup_stub(new_frame, sdfs_lookup_helper, loc, xdata); + if (!stub) { + op_errno = ENOMEM; + goto err; + } + + local = new_frame->local; + local->stub = stub; + + STACK_WIND(new_frame, sdfs_entrylk_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->entrylk, this->name, &local->parent_loc, + local->loc.name, ENTRYLK_LOCK, ENTRYLK_RDLCK, xdata); + + return 0; + +err: + STACK_UNWIND_STRICT(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL); + + if (new_frame) + SDFS_STACK_DESTROY(new_frame); + + return 0; +} + +int32_t +sdfs_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, + dict_t *xdata) +{ + sdfs_local_t *local = NULL; + + local = frame->local; + STACK_UNWIND_STRICT(readdirp, local->main_frame, op_ret, op_errno, entries, + xdata); + + local->main_frame = NULL; + STACK_WIND(frame, sdfs_entrylk_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->entrylk, this->name, &local->parent_loc, + NULL, ENTRYLK_UNLOCK, ENTRYLK_RDLCK, xdata); + return 0; +} + +int32_t +sdfs_readdirp_helper(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *xdata) +{ + sdfs_local_t *local = NULL; + char gfid[GF_UUID_BUF_SIZE] = {0}; + + local = frame->local; + + gf_uuid_unparse(fd->inode->gfid, gfid); + + if (local->op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, SDFS_MSG_ENTRYLK_ERROR, + "Acquiring entry lock failed for directory %s " + "with parent gfid %s", + local->loc.name, gfid); + goto err; + } + + STACK_WIND(frame, sdfs_readdirp_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdirp, fd, size, off, xdata); + + return 0; +err: + STACK_UNWIND_STRICT(readdirp, local->main_frame, -1, local->op_errno, NULL, + NULL); + + local->main_frame = NULL; + + SDFS_STACK_DESTROY(frame); + return 0; +} + +int32_t +sdfs_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *xdata) +{ + sdfs_local_t *local = NULL; + call_frame_t *new_frame = NULL; + call_stub_t *stub = NULL; + int op_errno = 0; + + if (-1 == sdfs_get_new_frame_readdirp(frame, fd, &new_frame)) { + op_errno = ENOMEM; + goto err; + } + + stub = fop_readdirp_stub(new_frame, sdfs_readdirp_helper, fd, size, off, + xdata); + if (!stub) { + op_errno = ENOMEM; + goto err; + } + + local = new_frame->local; + local->stub = stub; + + STACK_WIND(new_frame, sdfs_entrylk_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->entrylk, this->name, &local->parent_loc, + NULL, ENTRYLK_LOCK, ENTRYLK_RDLCK, xdata); + + return 0; + +err: + STACK_UNWIND_STRICT(readdirp, frame, -1, op_errno, NULL, NULL); + + if (new_frame) + SDFS_STACK_DESTROY(new_frame); + + return 0; +} + +int +init(xlator_t *this) +{ + int ret = -1; + + if (!this->children || this->children->next) { + gf_log(this->name, GF_LOG_ERROR, + "'dentry-fop-serializer' not configured with exactly one child"); + goto out; + } + + if (!this->parents) { + gf_log(this->name, GF_LOG_WARNING, "dangling volume. check volfile "); + } + + this->local_pool = mem_pool_new(sdfs_local_t, 512); + if (!this->local_pool) { + goto out; + } + + GF_OPTION_INIT("pass-through", this->pass_through, bool, out); + + ret = 0; + +out: + return ret; +} + +int +reconfigure(xlator_t *this, dict_t *options) +{ + int ret = -1; + + GF_OPTION_RECONF("pass-through", this->pass_through, options, bool, out); + + ret = 0; +out: + return ret; +} + +void +fini(xlator_t *this) +{ + mem_pool_destroy(this->local_pool); + this->local_pool = NULL; + return; +} + +struct xlator_fops fops = { + .mkdir = sdfs_mkdir, + .rmdir = sdfs_rmdir, + .create = sdfs_create, + .unlink = sdfs_unlink, + .symlink = sdfs_symlink, + .link = sdfs_link, + .mknod = sdfs_mknod, + .rename = sdfs_rename, + .lookup = sdfs_lookup, + .readdirp = sdfs_readdirp, +}; + +struct xlator_cbks cbks; + +struct volume_options options[] = { + {.key = {"pass-through"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "true", + .op_version = {GD_OP_VERSION_4_1_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT, + .tags = {"sdfs"}, + .description = "Enable/Disable dentry serialize functionality"}, + {.key = {NULL}}, +}; + +xlator_api_t xlator_api = { + .init = init, + .fini = fini, + .reconfigure = reconfigure, + .op_version = {GD_OP_VERSION_4_0_0}, + .fops = &fops, + .cbks = &cbks, + .options = options, + .identifier = "sdfs", + .category = GF_TECH_PREVIEW, +}; diff --git a/xlators/features/sdfs/src/sdfs.h b/xlators/features/sdfs/src/sdfs.h new file mode 100644 index 00000000000..dded5a2d7fc --- /dev/null +++ b/xlators/features/sdfs/src/sdfs.h @@ -0,0 +1,49 @@ +/* + Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include <glusterfs/xlator.h> +#include <glusterfs/call-stub.h> +#include "sdfs-messages.h" +#include <glusterfs/atomic.h> + +#define SDFS_LOCK_COUNT_MAX 2 + +typedef struct { + loc_t parent_loc; + char *basename; + int locked[SDFS_LOCK_COUNT_MAX]; +} sdfs_entry_lock_t; + +typedef struct { + sdfs_entry_lock_t entrylk[SDFS_LOCK_COUNT_MAX]; + int lock_count; +} sdfs_lock_t; + +struct sdfs_local { + call_frame_t *main_frame; + loc_t loc; + loc_t parent_loc; + call_stub_t *stub; + sdfs_lock_t *lock; + int op_ret; + int op_errno; + gf_atomic_t call_cnt; +}; +typedef struct sdfs_local sdfs_local_t; + +#define SDFS_STACK_DESTROY(frame) \ + do { \ + sdfs_local_t *__local = NULL; \ + __local = frame->local; \ + frame->local = NULL; \ + gf_client_unref(frame->root->client); \ + STACK_DESTROY(frame->root); \ + sdfs_local_cleanup(__local); \ + } while (0) diff --git a/xlators/features/selinux/Makefile.am b/xlators/features/selinux/Makefile.am new file mode 100644 index 00000000000..a985f42a877 --- /dev/null +++ b/xlators/features/selinux/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/features/selinux/src/Makefile.am b/xlators/features/selinux/src/Makefile.am new file mode 100644 index 00000000000..4f1e5e149b3 --- /dev/null +++ b/xlators/features/selinux/src/Makefile.am @@ -0,0 +1,20 @@ +if WITH_SERVER +xlator_LTLIBRARIES = selinux.la +endif +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features + +selinux_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) + +selinux_la_SOURCES = selinux.c + +selinux_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +noinst_HEADERS = selinux.h selinux-messages.h selinux-mem-types.h + +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ + -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src + +AM_CFLAGS = -Wall $(GF_CFLAGS) + +CLEANFILES = + diff --git a/xlators/features/filter/src/filter-mem-types.h b/xlators/features/selinux/src/selinux-mem-types.h index 47a17249b8d..553e59e5a9d 100644 --- a/xlators/features/filter/src/filter-mem-types.h +++ b/xlators/features/selinux/src/selinux-mem-types.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + Copyright (c) 2017 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. This file is licensed to you under your choice of the GNU Lesser @@ -7,14 +7,13 @@ later), or the GNU General Public License, version 2 (GPLv2), in all cases as published by the Free Software Foundation. */ -#ifndef __FILTER_MEM_TYPES_H__ -#define __FILTER_MEM_TYPES_H__ +#ifndef __SELINUX_MEM_TYPES_H__ +#define __SELINUX_MEM_TYPES_H__ -#include "mem-types.h" +#include <glusterfs/mem-types.h> -enum gf_filter_mem_types_ { - gf_filter_mt_gf_filter = gf_common_mt_end + 1, - gf_filter_mt_end +enum gf_selinux_mem_types_ { + gf_selinux_mt_selinux_priv_t = gf_common_mt_end + 1, + gf_selinux_mt_end }; #endif - diff --git a/xlators/features/selinux/src/selinux-messages.h b/xlators/features/selinux/src/selinux-messages.h new file mode 100644 index 00000000000..f49a54f956c --- /dev/null +++ b/xlators/features/selinux/src/selinux-messages.h @@ -0,0 +1,30 @@ +/* + Copyright (c) 2017 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _SELINUX_MESSAGES_H__ +#define _SELINUX_MESSAGES_H__ + +#include <glusterfs/glfs-message-id.h> + +/* To add new message IDs, append new identifiers at the end of the list. + * + * Never remove a message ID. If it's not used anymore, you can rename it or + * leave it as it is, but not delete it. This is to prevent reutilization of + * IDs by other messages. + * + * The component name must match one of the entries defined in + * glfs-message-id.h. + */ + +GLFS_MSGID(SL, SL_MSG_INVALID_VOLFILE, SL_MSG_ENOMEM, + SL_MSG_MEM_ACCT_INIT_FAILED, SL_MSG_SELINUX_GLUSTER_XATTR_MISSING, + SL_MSG_SELINUX_XATTR_MISSING); + +#endif /*_SELINUX_MESSAGES_H */ diff --git a/xlators/features/selinux/src/selinux.c b/xlators/features/selinux/src/selinux.c new file mode 100644 index 00000000000..9b1b4b55e1a --- /dev/null +++ b/xlators/features/selinux/src/selinux.c @@ -0,0 +1,323 @@ +/* + Copyright (c) 2017 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include <glusterfs/xlator.h> + +#include "selinux.h" +#include "selinux-messages.h" +#include "selinux-mem-types.h" +#include <glusterfs/compat-errno.h> + +static int +selinux_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *dict, dict_t *xdata) +{ + int ret = 0; + char *name = cookie; + + if (op_errno == 0 && dict && name && + (!strcmp(name, SELINUX_GLUSTER_XATTR))) { + ret = dict_rename_key(dict, SELINUX_GLUSTER_XATTR, SELINUX_XATTR); + if (ret < 0) + gf_msg(this->name, GF_LOG_ERROR, op_errno, + SL_MSG_SELINUX_GLUSTER_XATTR_MISSING, + "getxattr failed for %s", SELINUX_XATTR); + } + + STACK_UNWIND_STRICT(fgetxattr, frame, op_ret, op_errno, dict, xdata); + return ret; +} + +static int +selinux_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *name, dict_t *xdata) +{ + selinux_priv_t *priv = NULL; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + char *xattr_name = (char *)name; + + priv = this->private; + + GF_VALIDATE_OR_GOTO("selinux", priv, err); + + /* name can be NULL for listxattr calls */ + if (!priv->selinux_enabled || !name) + goto off; + + if (strcmp(name, SELINUX_XATTR) == 0) + xattr_name = SELINUX_GLUSTER_XATTR; + +off: + STACK_WIND_COOKIE(frame, selinux_fgetxattr_cbk, xattr_name, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->fgetxattr, fd, + xattr_name, xdata); + return 0; +err: + STACK_UNWIND_STRICT(fgetxattr, frame, op_ret, op_errno, NULL, xdata); + + return 0; +} + +static int +selinux_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *dict, dict_t *xdata) +{ + int ret = 0; + char *name = cookie; + + if (op_errno == 0 && dict && name && + (!strcmp(name, SELINUX_GLUSTER_XATTR))) { + ret = dict_rename_key(dict, SELINUX_GLUSTER_XATTR, SELINUX_XATTR); + if (ret < 0) + gf_msg(this->name, GF_LOG_ERROR, op_errno, + SL_MSG_SELINUX_GLUSTER_XATTR_MISSING, + "getxattr failed for %s", SELINUX_XATTR); + } + + STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, dict, xdata); + + return 0; +} + +static int +selinux_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) +{ + selinux_priv_t *priv = NULL; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + char *xattr_name = (char *)name; + + priv = this->private; + + GF_VALIDATE_OR_GOTO("selinux", priv, err); + + /* name can be NULL for listxattr calls */ + if (!priv->selinux_enabled || !name) + goto off; + + if (strcmp(name, SELINUX_XATTR) == 0) + xattr_name = SELINUX_GLUSTER_XATTR; + +off: + STACK_WIND_COOKIE(frame, selinux_getxattr_cbk, xattr_name, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->getxattr, loc, + xattr_name, xdata); + return 0; +err: + STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, NULL, xdata); + return 0; +} + +static int +selinux_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xdata) +{ + STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, xdata); + return 0; +} + +static int +selinux_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, + int flags, dict_t *xdata) +{ + selinux_priv_t *priv = NULL; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + int32_t ret = -1; + + priv = this->private; + + GF_VALIDATE_OR_GOTO("selinux", priv, err); + + if (!priv->selinux_enabled && !dict) + goto off; + + ret = dict_rename_key(dict, SELINUX_XATTR, SELINUX_GLUSTER_XATTR); + if (ret < 0 && ret != -ENODATA) + goto err; + +off: + STACK_WIND(frame, selinux_fsetxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata); + + return 0; +err: + STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, xdata); + return 0; +} + +static int +selinux_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xdata) +{ + STACK_UNWIND_STRICT(setxattr, frame, op_ret, op_errno, xdata); + return 0; +} + +static int +selinux_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + int flags, dict_t *xdata) +{ + selinux_priv_t *priv = NULL; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + int32_t ret = -1; + + priv = this->private; + + GF_VALIDATE_OR_GOTO("selinux", priv, err); + + if (!priv->selinux_enabled && !dict) + goto off; + + ret = dict_rename_key(dict, SELINUX_XATTR, SELINUX_GLUSTER_XATTR); + if (ret < 0 && ret != -ENODATA) + goto err; + +off: + STACK_WIND(frame, selinux_setxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata); + return 0; +err: + STACK_UNWIND_STRICT(setxattr, frame, op_ret, op_errno, xdata); + return 0; +} + +int32_t +mem_acct_init(xlator_t *this) +{ + int ret = -1; + + GF_VALIDATE_OR_GOTO("selinux", this, out); + + ret = xlator_mem_acct_init(this, gf_selinux_mt_end + 1); + + if (ret != 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, SL_MSG_MEM_ACCT_INIT_FAILED, + "Memory accounting init failed"); + return ret; + } +out: + return ret; +} + +int32_t +init(xlator_t *this) +{ + int32_t ret = -1; + selinux_priv_t *priv = NULL; + + GF_VALIDATE_OR_GOTO("selinux", this, out); + + if (!this->children || this->children->next) { + gf_msg(this->name, GF_LOG_WARNING, 0, SL_MSG_INVALID_VOLFILE, + "Error: SELinux (%s) not configured with exactly one " + "child", + this->name); + return -1; + } + + if (this->parents == NULL) { + gf_msg(this->name, GF_LOG_WARNING, 0, SL_MSG_INVALID_VOLFILE, + "Dangling volume. Please check the volfile"); + } + + priv = GF_CALLOC(1, sizeof(*priv), gf_selinux_mt_selinux_priv_t); + if (!priv) { + gf_log(this->name, GF_LOG_ERROR, "out of memory"); + goto out; + } + + GF_OPTION_INIT("selinux", priv->selinux_enabled, bool, out); + + this->local_pool = mem_pool_new(selinux_priv_t, 64); + if (!this->local_pool) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, SL_MSG_ENOMEM, + "Failed to create local_t's memory pool"); + goto out; + } + + this->private = (void *)priv; + ret = 0; +out: + if (ret) { + GF_FREE(priv); + mem_pool_destroy(this->local_pool); + this->local_pool = NULL; + } + return ret; +} + +int +reconfigure(xlator_t *this, dict_t *options) +{ + int32_t ret = -1; + selinux_priv_t *priv = NULL; + + priv = this->private; + + GF_OPTION_RECONF("selinux", priv->selinux_enabled, options, bool, out); + + ret = 0; +out: + return ret; +} + +void +fini(xlator_t *this) +{ + selinux_priv_t *priv = NULL; + + priv = this->private; + GF_FREE(priv); + + mem_pool_destroy(this->local_pool); + this->local_pool = NULL; + + return; +} + +struct xlator_fops fops = { + .getxattr = selinux_getxattr, + .fgetxattr = selinux_fgetxattr, + .setxattr = selinux_setxattr, + .fsetxattr = selinux_fsetxattr, +}; + +struct xlator_cbks cbks = {}; + +struct volume_options options[] = { + { + .key = {"selinux"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "on", + .description = "Enable/disable selinux translator", + .op_version = {GD_OP_VERSION_3_11_0}, + .flags = OPT_FLAG_SETTABLE, + .tags = {"security", "linux"}, + }, + { + .key = {NULL}, + }}; + +xlator_api_t xlator_api = { + .init = init, + .fini = fini, + .reconfigure = reconfigure, + .mem_acct_init = mem_acct_init, + .op_version = {1}, /* Present from the initial version */ + .fops = &fops, + .cbks = &cbks, + .options = options, + .identifier = "selinux", + .category = GF_MAINTAINED, +}; diff --git a/xlators/features/selinux/src/selinux.h b/xlators/features/selinux/src/selinux.h new file mode 100644 index 00000000000..1bbdad3bb36 --- /dev/null +++ b/xlators/features/selinux/src/selinux.h @@ -0,0 +1,24 @@ +/* + Copyright (c) 2017 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef __SELINUX_H__ +#define __SELINUX_H__ + +#include <glusterfs/common-utils.h> + +#define SELINUX_XATTR "security.selinux" +#define SELINUX_GLUSTER_XATTR "trusted.glusterfs.selinux" + +struct selinux_priv { + gf_boolean_t selinux_enabled; +}; + +typedef struct selinux_priv selinux_priv_t; + +#endif diff --git a/xlators/features/shard/src/Makefile.am b/xlators/features/shard/src/Makefile.am index 3569eb69afa..bf5700d4bcc 100644 --- a/xlators/features/shard/src/Makefile.am +++ b/xlators/features/shard/src/Makefile.am @@ -1,7 +1,7 @@ xlator_LTLIBRARIES = shard.la xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features -shard_la_LDFLAGS = $(GF_XLATOR_DEFAULT_LDFLAGS) +shard_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) shard_la_SOURCES = shard.c @@ -9,7 +9,8 @@ shard_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la noinst_HEADERS = shard.h shard-mem-types.h shard-messages.h -AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ + -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src AM_CFLAGS = -Wall $(GF_CFLAGS) diff --git a/xlators/features/shard/src/shard-mem-types.h b/xlators/features/shard/src/shard-mem-types.h index 77f0cee7f58..1fe7e2e2798 100644 --- a/xlators/features/shard/src/shard-mem-types.h +++ b/xlators/features/shard/src/shard-mem-types.h @@ -10,14 +10,15 @@ #ifndef __SHARD_MEM_TYPES_H__ #define __SHARD_MEM_TYPES_H__ -#include "mem-types.h" +#include <glusterfs/mem-types.h> enum gf_shard_mem_types_ { - gf_shard_mt_priv_t = gf_common_mt_end + 1, - gf_shard_mt_inode_list, - gf_shard_mt_inode_ctx_t, - gf_shard_mt_iovec, - gf_shard_mt_int64_t, - gf_shard_mt_end + gf_shard_mt_priv_t = gf_common_mt_end + 1, + gf_shard_mt_inode_list, + gf_shard_mt_inode_ctx_t, + gf_shard_mt_iovec, + gf_shard_mt_int64_t, + gf_shard_mt_uint64_t, + gf_shard_mt_end }; #endif diff --git a/xlators/features/shard/src/shard-messages.h b/xlators/features/shard/src/shard-messages.h index be019a7ca5e..2d0867eb136 100644 --- a/xlators/features/shard/src/shard-messages.h +++ b/xlators/features/shard/src/shard-messages.h @@ -11,167 +11,29 @@ #ifndef _SHARD_MESSAGES_H_ #define _SHARD_MESSAGES_H_ -#include "glfs-message-id.h" +#include <glusterfs/glfs-message-id.h> + +/* To add new message IDs, append new identifiers at the end of the list. + * + * Never remove a message ID. If it's not used anymore, you can rename it or + * leave it as it is, but not delete it. This is to prevent reutilization of + * IDs by other messages. + * + * The component name must match one of the entries defined in + * glfs-message-id.h. + */ + +GLFS_MSGID(SHARD, SHARD_MSG_BASE_FILE_LOOKUP_FAILED, SHARD_MSG_DICT_OP_FAILED, + SHARD_MSG_DOT_SHARD_NODIR, SHARD_MSG_FD_CTX_SET_FAILED, + SHARD_MSG_INODE_CTX_GET_FAILED, SHARD_MSG_INODE_CTX_SET_FAILED, + SHARD_MSG_INODE_PATH_FAILED, SHARD_MSG_INTERNAL_XATTR_MISSING, + SHARD_MSG_INVALID_VOLFILE, SHARD_MSG_LOOKUP_SHARD_FAILED, + SHARD_MSG_MEM_ACCT_INIT_FAILED, SHARD_MSG_NULL_THIS, + SHARD_MSG_SIZE_SET_FAILED, SHARD_MSG_STAT_FAILED, + SHARD_MSG_TRUNCATE_LAST_SHARD_FAILED, + SHARD_MSG_UPDATE_FILE_SIZE_FAILED, SHARD_MSG_FOP_NOT_SUPPORTED, + SHARD_MSG_INVALID_FOP, SHARD_MSG_MEMALLOC_FAILED, + SHARD_MSG_FOP_FAILED, SHARD_MSG_SHARDS_DELETION_FAILED, + SHARD_MSG_SHARD_DELETION_COMPLETED); -/*! \file shard-messages.h - * \brief shard log-message IDs and their descriptions. - */ - -/* NOTE: Rules for message additions - * 1) Each instance of a message is _better_ left with a unique message ID, even - * if the message format is the same. Reasoning is that, if the message - * format needs to change in one instance, the other instances are not - * impacted or the new change does not change the ID of the instance being - * modified. - * 2) Addition of a message, - * - Should increment the GLFS_NUM_MESSAGES - * - Append to the list of messages defined, towards the end - * - Retain macro naming as glfs_msg_X (for redability across developers) - * NOTE: Rules for message format modifications - * 3) Check across the code if the message ID macro in question is reused - * anywhere. If reused then the modifications should ensure correctness - * everywhere, or needs a new message ID as (1) above was not adhered to. If - * not used anywhere, proceed with the required modification. - * NOTE: Rules for message deletion - * 4) Check (3) and if used anywhere else, then cannot be deleted. If not used - * anywhere, then can be deleted, but will leave a hole by design, as - * addition rules specify modification to the end of the list and not filling - * holes. - */ - -#define GLFS_COMP_BASE_SHARD GLFS_MSGID_COMP_SHARD -#define GLFS_NUM_MESSAGES 17 -#define GLFS_MSGID_END (GLFS_COMP_BASE_SHARD + GLFS_NUM_MESSAGES + 1) - -#define glfs_msg_start_x GLFS_COMP_BASE_SHARD, "Invalid: Start of messages" - -/*! - * @messageid 133001 - * @diagnosis - * @recommendedaction - */ -#define SHARD_MSG_BASE_FILE_LOOKUP_FAILED (GLFS_COMP_BASE_SHARD + 1) - - -/*! - * @messageid 133002 - * @diagnosis - * @recommendedaction - */ -#define SHARD_MSG_DICT_SET_FAILED (GLFS_COMP_BASE_SHARD + 2) - - -/*! - * @messageid 133003 - * @diagnosis /.shard already exists and is not a directory. - * @recommendedaction Delete the /.shard file from the backend and try again. - */ -#define SHARD_MSG_DOT_SHARD_NODIR (GLFS_COMP_BASE_SHARD + 3) - - -/*! - * @messageid 133004 - * @diagnosis - * @recommendedaction - */ -#define SHARD_MSG_FD_CTX_SET_FAILED (GLFS_COMP_BASE_SHARD + 4) - - -/*! - * @messageid 133005 - * @diagnosis - * @recommendedaction - */ -#define SHARD_MSG_INODE_CTX_GET_FAILED (GLFS_COMP_BASE_SHARD + 5) - - -/*! - * @messageid 133006 - * @diagnosis - * @recommendedaction - */ -#define SHARD_MSG_INODE_CTX_SET_FAILED (GLFS_COMP_BASE_SHARD + 6) - - -/*! - * @messageid 133007 - * @diagnosis - * @recommendedaction -*/ -#define SHARD_MSG_INODE_PATH_FAILED (GLFS_COMP_BASE_SHARD + 7) - - -/*! - * @messageid 133008 - * @diagnosis - * @recommendedaction - */ -#define SHARD_MSG_INTERNAL_XATTR_MISSING (GLFS_COMP_BASE_SHARD + 8) - - -/*! - * @messageid 133009 - * @diagnosis The client process did not get launched due to incorrect volfile. - * @recommendedaction Possibly check to see if the volfile is correct. - */ -#define SHARD_MSG_INVALID_VOLFILE (GLFS_COMP_BASE_SHARD + 9) - - -/*! - * @messageid 133010 - * @diagnosis - * @recommendedaction -*/ -#define SHARD_MSG_LOOKUP_SHARD_FAILED (GLFS_COMP_BASE_SHARD + 10) - -/*! - * @messageid 133011 - * @diagnosis - * @recommendedaction -*/ -#define SHARD_MSG_MEM_ACCT_INIT_FAILED (GLFS_COMP_BASE_SHARD + 11) - -/*! - * @messageid 133012 - * @diagnosis - * @recommendedaction -*/ -#define SHARD_MSG_NULL_THIS (GLFS_COMP_BASE_SHARD + 12) - -/*! - * @messageid 133013 - * @diagnosis - * @recommendedaction -*/ -#define SHARD_MSG_SIZE_SET_FAILED (GLFS_COMP_BASE_SHARD + 13) - -/*! - * @messageid 133014 - * @diagnosis - * @recommendedaction -*/ -#define SHARD_MSG_STAT_FAILED (GLFS_COMP_BASE_SHARD + 14) - -/*! - * @messageid 133015 - * @diagnosis - * @recommendedaction -*/ -#define SHARD_MSG_TRUNCATE_LAST_SHARD_FAILED (GLFS_COMP_BASE_SHARD + 15) - -/*! - * @messageid 133016 - * @diagnosis - * @recommendedaction -*/ -#define SHARD_MSG_UPDATE_FILE_SIZE_FAILED (GLFS_COMP_BASE_SHARD + 16) - -/*! - * @messageid 133017 - * @diagnosis The operation invoked is not supported. - * @recommendedaction Use other syscalls to write to the file. -*/ -#define SHARD_MSG_FOP_NOT_SUPPORTED (GLFS_COMP_BASE_SHARD + 17) - -#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages" #endif /* !_SHARD_MESSAGES_H_ */ diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c index 35c9d1d9d45..e5f93063943 100644 --- a/xlators/features/shard/src/shard.c +++ b/xlators/features/shard/src/shard.c @@ -12,1842 +12,2741 @@ #include "shard.h" #include "shard-mem-types.h" -#include "byte-order.h" -#include "defaults.h" -#include "statedump.h" +#include <glusterfs/byte-order.h> +#include <glusterfs/defaults.h> +#include <glusterfs/statedump.h> static gf_boolean_t -__is_shard_dir (uuid_t gfid) +__is_shard_dir(uuid_t gfid) { - shard_priv_t *priv = THIS->private; + shard_priv_t *priv = THIS->private; - if (gf_uuid_compare (gfid, priv->dot_shard_gfid) == 0) - return _gf_true; + if (gf_uuid_compare(gfid, priv->dot_shard_gfid) == 0) + return _gf_true; - return _gf_false; + return _gf_false; } static gf_boolean_t -__is_gsyncd_on_shard_dir (call_frame_t *frame, loc_t *loc) +__is_gsyncd_on_shard_dir(call_frame_t *frame, loc_t *loc) { - if (frame->root->pid == GF_CLIENT_PID_GSYNCD && - (__is_shard_dir (loc->pargfid) || - (loc->parent && __is_shard_dir(loc->parent->gfid)))) - return _gf_true; + if (frame->root->pid == GF_CLIENT_PID_GSYNCD && + (__is_shard_dir(loc->pargfid) || + (loc->parent && __is_shard_dir(loc->parent->gfid)))) + return _gf_true; - return _gf_false; + return _gf_false; } void -shard_make_block_bname (int block_num, uuid_t gfid, char *buf, size_t len) +shard_make_block_bname(int block_num, uuid_t gfid, char *buf, size_t len) { - char gfid_str[GF_UUID_BUF_SIZE] = {0,}; + char gfid_str[GF_UUID_BUF_SIZE] = { + 0, + }; - gf_uuid_unparse (gfid, gfid_str); - snprintf (buf, len, "%s.%d", gfid_str, block_num); + gf_uuid_unparse(gfid, gfid_str); + snprintf(buf, len, "%s.%d", gfid_str, block_num); } void -shard_make_block_abspath (int block_num, uuid_t gfid, char *filepath, - size_t len) +shard_make_block_abspath(int block_num, uuid_t gfid, char *filepath, size_t len) { - char gfid_str[GF_UUID_BUF_SIZE] = {0,}; + char gfid_str[GF_UUID_BUF_SIZE] = { + 0, + }; - gf_uuid_unparse (gfid, gfid_str); - snprintf (filepath, len, "/%s/%s.%d", GF_SHARD_DIR, gfid_str, - block_num); + gf_uuid_unparse(gfid, gfid_str); + snprintf(filepath, len, "/%s/%s.%d", GF_SHARD_DIR, gfid_str, block_num); } int -__shard_inode_ctx_get (inode_t *inode, xlator_t *this, shard_inode_ctx_t **ctx) +__shard_inode_ctx_get(inode_t *inode, xlator_t *this, shard_inode_ctx_t **ctx) { - int ret = -1; - uint64_t ctx_uint = 0; - shard_inode_ctx_t *ctx_p = NULL; + int ret = -1; + uint64_t ctx_uint = 0; + shard_inode_ctx_t *ctx_p = NULL; - ret = __inode_ctx_get (inode, this, &ctx_uint); - if (ret == 0) { - *ctx = (shard_inode_ctx_t *) ctx_uint; - return ret; - } + ret = __inode_ctx_get(inode, this, &ctx_uint); + if (ret == 0) { + *ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; + return ret; + } - ctx_p = GF_CALLOC (1, sizeof (*ctx_p), gf_shard_mt_inode_ctx_t); - if (!ctx_p) - return ret; + ctx_p = GF_CALLOC(1, sizeof(*ctx_p), gf_shard_mt_inode_ctx_t); + if (!ctx_p) + return ret; - INIT_LIST_HEAD (&ctx_p->ilist); + INIT_LIST_HEAD(&ctx_p->ilist); + INIT_LIST_HEAD(&ctx_p->to_fsync_list); - ret = __inode_ctx_set (inode, this, (uint64_t *)&ctx_p); - if (ret < 0) { - GF_FREE (ctx_p); - return ret; - } + ctx_uint = (uint64_t)(uintptr_t)ctx_p; + ret = __inode_ctx_set(inode, this, &ctx_uint); + if (ret < 0) { + GF_FREE(ctx_p); + return ret; + } - *ctx = ctx_p; + *ctx = ctx_p; - return ret; + return ret; } int -shard_inode_ctx_get (inode_t *inode, xlator_t *this, shard_inode_ctx_t **ctx) +shard_inode_ctx_get(inode_t *inode, xlator_t *this, shard_inode_ctx_t **ctx) { - int ret = 0; + int ret = 0; - LOCK(&inode->lock); - { - ret = __shard_inode_ctx_get (inode, this, ctx); - } - UNLOCK(&inode->lock); + LOCK(&inode->lock); + { + ret = __shard_inode_ctx_get(inode, this, ctx); + } + UNLOCK(&inode->lock); - return ret; + return ret; } int -__shard_inode_ctx_set (inode_t *inode, xlator_t *this, struct iatt *stbuf, - uint64_t block_size, int32_t valid) +__shard_inode_ctx_set(inode_t *inode, xlator_t *this, struct iatt *stbuf, + uint64_t block_size, int32_t valid) { - int ret = -1; - shard_inode_ctx_t *ctx = NULL; + int ret = -1; + shard_inode_ctx_t *ctx = NULL; - ret = __shard_inode_ctx_get (inode, this, &ctx); - if (ret) - return ret; - - if (valid & SHARD_MASK_BLOCK_SIZE) - ctx->block_size = block_size; + ret = __shard_inode_ctx_get(inode, this, &ctx); + if (ret) + return ret; - if (!stbuf) - return 0; + if (valid & SHARD_MASK_BLOCK_SIZE) + ctx->block_size = block_size; - if (valid & SHARD_MASK_PROT) - ctx->stat.ia_prot = stbuf->ia_prot; + if (valid & SHARD_MASK_PROT) + ctx->stat.ia_prot = stbuf->ia_prot; - if (valid & SHARD_MASK_NLINK) - ctx->stat.ia_nlink = stbuf->ia_nlink; + if (valid & SHARD_MASK_NLINK) + ctx->stat.ia_nlink = stbuf->ia_nlink; - if (valid & SHARD_MASK_UID) - ctx->stat.ia_uid = stbuf->ia_uid; + if (valid & SHARD_MASK_UID) + ctx->stat.ia_uid = stbuf->ia_uid; - if (valid & SHARD_MASK_GID) - ctx->stat.ia_gid = stbuf->ia_gid; + if (valid & SHARD_MASK_GID) + ctx->stat.ia_gid = stbuf->ia_gid; - if (valid & SHARD_MASK_SIZE) - ctx->stat.ia_size = stbuf->ia_size; + if (valid & SHARD_MASK_SIZE) + ctx->stat.ia_size = stbuf->ia_size; - if (valid & SHARD_MASK_BLOCKS) - ctx->stat.ia_blocks = stbuf->ia_blocks; + if (valid & SHARD_MASK_BLOCKS) + ctx->stat.ia_blocks = stbuf->ia_blocks; - if (valid & SHARD_MASK_TIMES) { - SHARD_TIME_UPDATE (ctx->stat.ia_mtime, ctx->stat.ia_mtime_nsec, - stbuf->ia_mtime, stbuf->ia_mtime_nsec); - SHARD_TIME_UPDATE (ctx->stat.ia_ctime, ctx->stat.ia_ctime_nsec, - stbuf->ia_ctime, stbuf->ia_ctime_nsec); - SHARD_TIME_UPDATE (ctx->stat.ia_atime, ctx->stat.ia_atime_nsec, - stbuf->ia_atime, stbuf->ia_atime_nsec); - } + if (valid & SHARD_MASK_TIMES) { + SHARD_TIME_UPDATE(ctx->stat.ia_mtime, ctx->stat.ia_mtime_nsec, + stbuf->ia_mtime, stbuf->ia_mtime_nsec); + SHARD_TIME_UPDATE(ctx->stat.ia_ctime, ctx->stat.ia_ctime_nsec, + stbuf->ia_ctime, stbuf->ia_ctime_nsec); + SHARD_TIME_UPDATE(ctx->stat.ia_atime, ctx->stat.ia_atime_nsec, + stbuf->ia_atime, stbuf->ia_atime_nsec); + } - if (valid & SHARD_MASK_OTHERS) { - ctx->stat.ia_ino = stbuf->ia_ino; - gf_uuid_copy (ctx->stat.ia_gfid, stbuf->ia_gfid); - ctx->stat.ia_dev = stbuf->ia_dev; - ctx->stat.ia_type = stbuf->ia_type; - ctx->stat.ia_rdev = stbuf->ia_rdev; - ctx->stat.ia_blksize = stbuf->ia_blksize; - } + if (valid & SHARD_MASK_OTHERS) { + ctx->stat.ia_ino = stbuf->ia_ino; + gf_uuid_copy(ctx->stat.ia_gfid, stbuf->ia_gfid); + ctx->stat.ia_dev = stbuf->ia_dev; + ctx->stat.ia_type = stbuf->ia_type; + ctx->stat.ia_rdev = stbuf->ia_rdev; + ctx->stat.ia_blksize = stbuf->ia_blksize; + } - if (valid & SHARD_MASK_REFRESH_RESET) - ctx->refresh = _gf_false; + if (valid & SHARD_MASK_REFRESH_RESET) + ctx->refresh = _gf_false; - return 0; + return 0; } int -shard_inode_ctx_set (inode_t *inode, xlator_t *this, struct iatt *stbuf, - uint64_t block_size, int32_t valid) +shard_inode_ctx_set(inode_t *inode, xlator_t *this, struct iatt *stbuf, + uint64_t block_size, int32_t valid) { - int ret = -1; + int ret = -1; - LOCK (&inode->lock); - { - ret = __shard_inode_ctx_set (inode, this, stbuf, block_size, - valid); - } - UNLOCK (&inode->lock); + LOCK(&inode->lock); + { + ret = __shard_inode_ctx_set(inode, this, stbuf, block_size, valid); + } + UNLOCK(&inode->lock); - return ret; + return ret; } int -__shard_inode_ctx_invalidate (inode_t *inode, xlator_t *this, struct iatt *stbuf) +__shard_inode_ctx_set_refresh_flag(inode_t *inode, xlator_t *this) { - int ret = -1; - shard_inode_ctx_t *ctx = NULL; + int ret = -1; + shard_inode_ctx_t *ctx = NULL; - ret = __shard_inode_ctx_get (inode, this, &ctx); - if (ret) - return ret; + ret = __shard_inode_ctx_get(inode, this, &ctx); + if (ret) + return ret; - if ((stbuf->ia_size != ctx->stat.ia_size) || - (stbuf->ia_blocks != ctx->stat.ia_blocks)) - ctx->refresh = _gf_true; + ctx->refresh = _gf_true; - return 0; + return 0; } - int -shard_inode_ctx_invalidate (inode_t *inode, xlator_t *this, struct iatt *stbuf) +shard_inode_ctx_set_refresh_flag(inode_t *inode, xlator_t *this) { - int ret = -1; + int ret = -1; - LOCK (&inode->lock); - { - ret = __shard_inode_ctx_invalidate (inode, this, stbuf); - } - UNLOCK (&inode->lock); + LOCK(&inode->lock); + { + ret = __shard_inode_ctx_set_refresh_flag(inode, this); + } + UNLOCK(&inode->lock); - return ret; + return ret; } int -__shard_inode_ctx_get_block_size (inode_t *inode, xlator_t *this, - uint64_t *block_size) +__shard_inode_ctx_mark_dir_refreshed(inode_t *inode, xlator_t *this) { - int ret = -1; - uint64_t ctx_uint = 0; - shard_inode_ctx_t *ctx = NULL; + int ret = -1; + shard_inode_ctx_t *ctx = NULL; - ret = __inode_ctx_get (inode, this, &ctx_uint); - if (ret < 0) - return ret; + ret = __shard_inode_ctx_get(inode, this, &ctx); + if (ret) + return ret; - ctx = (shard_inode_ctx_t *) ctx_uint; + ctx->refreshed = _gf_true; + return 0; +} - *block_size = ctx->block_size; +int +shard_inode_ctx_mark_dir_refreshed(inode_t *inode, xlator_t *this) +{ + int ret = -1; - return 0; + LOCK(&inode->lock); + { + ret = __shard_inode_ctx_mark_dir_refreshed(inode, this); + } + UNLOCK(&inode->lock); + + return ret; } int -shard_inode_ctx_get_block_size (inode_t *inode, xlator_t *this, - uint64_t *block_size) +__shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this, + inode_t *shard_inode) { - int ret = -1; + int ret = -1; + shard_inode_ctx_t *base_ictx = NULL; + shard_inode_ctx_t *shard_ictx = NULL; - LOCK (&inode->lock); - { - ret = __shard_inode_ctx_get_block_size (inode, this, - block_size); - } - UNLOCK (&inode->lock); + ret = __shard_inode_ctx_get(base_inode, this, &base_ictx); + if (ret) + return ret; + ret = __shard_inode_ctx_get(shard_inode, this, &shard_ictx); + if (ret) return ret; + + if (shard_ictx->fsync_needed) { + shard_ictx->fsync_needed++; + return 1; + } + + list_add_tail(&shard_ictx->to_fsync_list, &base_ictx->to_fsync_list); + shard_ictx->inode = shard_inode; + shard_ictx->fsync_needed++; + base_ictx->fsync_count++; + shard_ictx->base_inode = base_inode; + + return 0; } int -__shard_inode_ctx_get_all (inode_t *inode, xlator_t *this, - shard_inode_ctx_t *ctx_out) +shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this, + inode_t *shard_inode) { - int ret = -1; - uint64_t ctx_uint = 0; - shard_inode_ctx_t *ctx = NULL; + int ret = -1; - ret = __inode_ctx_get (inode, this, &ctx_uint); - if (ret < 0) - return ret; + /* This ref acts as a refkeepr on the base inode. We + * need to keep this inode alive as it holds the head + * of the to_fsync_list. + */ + inode_ref(base_inode); + inode_ref(shard_inode); - ctx = (shard_inode_ctx_t *) ctx_uint; + LOCK(&base_inode->lock); + LOCK(&shard_inode->lock); + { + ret = __shard_inode_ctx_add_to_fsync_list(base_inode, this, + shard_inode); + } + UNLOCK(&shard_inode->lock); + UNLOCK(&base_inode->lock); - memcpy (ctx_out, ctx, sizeof (shard_inode_ctx_t)); - return 0; + /* Unref the base inode corresponding to the ref above, if the shard is + * found to be already part of the fsync list. + */ + if (ret != 0) { + inode_unref(base_inode); + inode_unref(shard_inode); + } + return ret; } -int -shard_inode_ctx_get_all (inode_t *inode, xlator_t *this, - shard_inode_ctx_t *ctx_out) +gf_boolean_t +__shard_inode_ctx_needs_lookup(inode_t *inode, xlator_t *this) { - int ret = -1; + int ret = -1; + shard_inode_ctx_t *ctx = NULL; - LOCK (&inode->lock); - { - ret = __shard_inode_ctx_get_all (inode, this, ctx_out); - } - UNLOCK (&inode->lock); + ret = __shard_inode_ctx_get(inode, this, &ctx); + /* If inode ctx get fails, better to err on the side of caution and + * try again? Unless the failure is due to mem-allocation. + */ + if (ret) + return _gf_true; - return ret; + return !ctx->refreshed; } -int -__shard_inode_ctx_fill_iatt_from_cache (inode_t *inode, xlator_t *this, - struct iatt *buf, - gf_boolean_t *need_refresh) +gf_boolean_t +shard_inode_ctx_needs_lookup(inode_t *inode, xlator_t *this) { - int ret = -1; - uint64_t ctx_uint = 0; - shard_inode_ctx_t *ctx = NULL; + gf_boolean_t flag = _gf_false; - ret = __inode_ctx_get (inode, this, &ctx_uint); - if (ret < 0) - return ret; + LOCK(&inode->lock); + { + flag = __shard_inode_ctx_needs_lookup(inode, this); + } + UNLOCK(&inode->lock); - ctx = (shard_inode_ctx_t *) ctx_uint; + return flag; +} +int +__shard_inode_ctx_invalidate(inode_t *inode, xlator_t *this, struct iatt *stbuf) +{ + int ret = -1; + shard_inode_ctx_t *ctx = NULL; - if (ctx->refresh == _gf_false) - *buf = ctx->stat; - else - *need_refresh = _gf_true; + ret = __shard_inode_ctx_get(inode, this, &ctx); + if (ret) + return ret; - return 0; + if ((stbuf->ia_size != ctx->stat.ia_size) || + (stbuf->ia_blocks != ctx->stat.ia_blocks)) + ctx->refresh = _gf_true; + + return 0; } int -shard_inode_ctx_fill_iatt_from_cache (inode_t *inode, xlator_t *this, - struct iatt *buf, - gf_boolean_t *need_refresh) +shard_inode_ctx_invalidate(inode_t *inode, xlator_t *this, struct iatt *stbuf) { - int ret = -1; + int ret = -1; - LOCK (&inode->lock); - { - ret = __shard_inode_ctx_fill_iatt_from_cache (inode, this, buf, - need_refresh); - } - UNLOCK (&inode->lock); + LOCK(&inode->lock); + { + ret = __shard_inode_ctx_invalidate(inode, this, stbuf); + } + UNLOCK(&inode->lock); - return ret; + return ret; } -void -shard_local_wipe (shard_local_t *local) +int +__shard_inode_ctx_get_block_size(inode_t *inode, xlator_t *this, + uint64_t *block_size) { - int i = 0; - int count = 0; + int ret = -1; + uint64_t ctx_uint = 0; + shard_inode_ctx_t *ctx = NULL; - count = local->num_blocks; - - loc_wipe (&local->loc); - loc_wipe (&local->dot_shard_loc); - loc_wipe (&local->loc2); - loc_wipe (&local->tmp_loc); + ret = __inode_ctx_get(inode, this, &ctx_uint); + if (ret < 0) + return ret; - if (local->fd) - fd_unref (local->fd); + ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; - if (local->xattr_req) - dict_unref (local->xattr_req); - if (local->xattr_rsp) - dict_unref (local->xattr_rsp); + *block_size = ctx->block_size; - for (i = 0; i < count; i++) { - if (!local->inode_list) - break; + return 0; +} - if (local->inode_list[i]) - inode_unref (local->inode_list[i]); - } +int +shard_inode_ctx_get_block_size(inode_t *inode, xlator_t *this, + uint64_t *block_size) +{ + int ret = -1; - GF_FREE (local->inode_list); + LOCK(&inode->lock); + { + ret = __shard_inode_ctx_get_block_size(inode, this, block_size); + } + UNLOCK(&inode->lock); - GF_FREE (local->vector); - if (local->iobref) - iobref_unref (local->iobref); - if (local->list_inited) - gf_dirent_free (&local->entries_head); + return ret; } int -shard_modify_size_and_block_count (struct iatt *stbuf, dict_t *dict) +__shard_inode_ctx_get_fsync_count(inode_t *inode, xlator_t *this, + int *fsync_count) { - int ret = -1; - void *size_attr = NULL; - uint64_t size_array[4]; + int ret = -1; + uint64_t ctx_uint = 0; + shard_inode_ctx_t *ctx = NULL; - ret = dict_get_ptr (dict, GF_XATTR_SHARD_FILE_SIZE, &size_attr); - if (ret) { - gf_msg_callingfn (THIS->name, GF_LOG_ERROR, 0, - SHARD_MSG_INTERNAL_XATTR_MISSING, "Failed to " - "get "GF_XATTR_SHARD_FILE_SIZE" for %s", - uuid_utoa (stbuf->ia_gfid)); - return ret; - } + ret = __inode_ctx_get(inode, this, &ctx_uint); + if (ret < 0) + return ret; - memcpy (size_array, size_attr, sizeof (size_array)); + ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; - stbuf->ia_size = ntoh64 (size_array[0]); - stbuf->ia_blocks = ntoh64 (size_array[2]); + *fsync_count = ctx->fsync_needed; - return 0; + return 0; } int -shard_call_count_return (call_frame_t *frame) +shard_inode_ctx_get_fsync_count(inode_t *inode, xlator_t *this, + int *fsync_count) { - int call_count = 0; - shard_local_t *local = NULL; + int ret = -1; - local = frame->local; + LOCK(&inode->lock); + { + ret = __shard_inode_ctx_get_fsync_count(inode, this, fsync_count); + } + UNLOCK(&inode->lock); - LOCK (&frame->lock); - { - call_count = --local->call_count; - } - UNLOCK (&frame->lock); - - return call_count; + return ret; } +int +__shard_inode_ctx_get_all(inode_t *inode, xlator_t *this, + shard_inode_ctx_t *ctx_out) +{ + int ret = -1; + uint64_t ctx_uint = 0; + shard_inode_ctx_t *ctx = NULL; -static int -shard_init_dot_shard_loc (xlator_t *this, shard_local_t *local) -{ - int ret = -1; - loc_t *dot_shard_loc = NULL; - - if (!local) - return -1; - - dot_shard_loc = &local->dot_shard_loc; - dot_shard_loc->inode = inode_new (this->itable); - dot_shard_loc->parent = inode_ref (this->itable->root); - ret = inode_path (dot_shard_loc->parent, GF_SHARD_DIR, - (char **)&dot_shard_loc->path); - if (ret < 0 || !(dot_shard_loc->inode)) { - gf_msg (this->name, GF_LOG_ERROR, 0, - SHARD_MSG_INODE_PATH_FAILED, - "Inode path failed on %s", GF_SHARD_DIR); - goto out; - } + ret = __inode_ctx_get(inode, this, &ctx_uint); + if (ret < 0) + return ret; - dot_shard_loc->name = strrchr (dot_shard_loc->path, '/'); - if (dot_shard_loc->name) - dot_shard_loc->name++; + ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; - ret = 0; -out: - return ret; + memcpy(ctx_out, ctx, sizeof(shard_inode_ctx_t)); + return 0; } -void -__shard_update_shards_inode_list (inode_t *linked_inode, xlator_t *this, - inode_t *base_inode, int block_num) +int +shard_inode_ctx_get_all(inode_t *inode, xlator_t *this, + shard_inode_ctx_t *ctx_out) { - char block_bname[256] = {0,}; - inode_t *lru_inode = NULL; - shard_priv_t *priv = NULL; - shard_inode_ctx_t *ctx = NULL; - shard_inode_ctx_t *lru_inode_ctx = NULL; - - priv = this->private; + int ret = -1; - shard_inode_ctx_get (linked_inode, this, &ctx); + LOCK(&inode->lock); + { + ret = __shard_inode_ctx_get_all(inode, this, ctx_out); + } + UNLOCK(&inode->lock); - if (list_empty (&ctx->ilist)) { - if (priv->inode_count + 1 <= SHARD_MAX_INODES) { - /* If this inode was linked here for the first time (indicated - * by empty list), and if there is still space in the priv list, - * add this ctx to the tail of the list. - */ - gf_uuid_copy (ctx->base_gfid, base_inode->gfid); - ctx->block_num = block_num; - list_add_tail (&ctx->ilist, &priv->ilist_head); - priv->inode_count++; - } else { - /*If on the other hand there is no available slot for this inode - * in the list, delete the lru inode from the head of the list, - * unlink it. And in its place add this new inode into the list. - */ - lru_inode_ctx = list_first_entry (&priv->ilist_head, - shard_inode_ctx_t, - ilist); - GF_ASSERT (lru_inode_ctx->block_num > 0); - list_del_init (&lru_inode_ctx->ilist); - lru_inode = inode_find (linked_inode->table, - lru_inode_ctx->stat.ia_gfid); - shard_make_block_bname (lru_inode_ctx->block_num, - lru_inode_ctx->base_gfid, - block_bname, - sizeof (block_bname)); - inode_unlink (lru_inode, priv->dot_shard_inode, - block_bname); - /* The following unref corresponds to the ref held by - * inode_find() above. - */ - inode_forget (lru_inode, 0); - inode_unref (lru_inode); - gf_uuid_copy (ctx->base_gfid, base_inode->gfid); - ctx->block_num = block_num; - list_add_tail (&ctx->ilist, &priv->ilist_head); - } - } else { - /* If this is not the first time this inode is being operated on, move - * it to the most recently used end of the list. - */ - list_move_tail (&ctx->ilist, &priv->ilist_head); - } + return ret; } int -shard_common_resolve_shards (call_frame_t *frame, xlator_t *this, - inode_t *res_inode, - shard_post_resolve_fop_handler_t post_res_handler) +__shard_inode_ctx_fill_iatt_from_cache(inode_t *inode, xlator_t *this, + struct iatt *buf, + gf_boolean_t *need_refresh) { - int i = -1; - uint32_t shard_idx_iter = 0; - char path[PATH_MAX] = {0,}; - inode_t *inode = NULL; - shard_priv_t *priv = NULL; - shard_local_t *local = NULL; + int ret = -1; + uint64_t ctx_uint = 0; + shard_inode_ctx_t *ctx = NULL; - priv = this->private; - local = frame->local; - shard_idx_iter = local->first_block; + ret = __inode_ctx_get(inode, this, &ctx_uint); + if (ret < 0) + return ret; - while (shard_idx_iter <= local->last_block) { - i++; - if (shard_idx_iter == 0) { - local->inode_list[i] = inode_ref (res_inode); - shard_idx_iter++; - continue; - } + ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; - shard_make_block_abspath (shard_idx_iter, res_inode->gfid, path, - sizeof(path)); - - inode = NULL; - inode = inode_resolve (this->itable, path); - if (inode) { - gf_msg_debug (this->name, 0, "Shard %d already " - "present. gfid=%s. Saving inode for future.", - shard_idx_iter, uuid_utoa(inode->gfid)); - shard_idx_iter++; - local->inode_list[i] = inode; - /* Let the ref on the inodes that are already present - * in inode table still be held so that they don't get - * forgotten by the time the fop reaches the actual - * write stage. - */ - LOCK(&priv->lock); - { - __shard_update_shards_inode_list (inode, this, - res_inode, - shard_idx_iter); - } - UNLOCK(&priv->lock); - - continue; - } else { - local->call_count++; - shard_idx_iter++; - } - } + if (ctx->refresh == _gf_false) + *buf = ctx->stat; + else + *need_refresh = _gf_true; - post_res_handler (frame, this); - return 0; + return 0; } int -shard_update_file_size_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict, - dict_t *xdata) +shard_inode_ctx_fill_iatt_from_cache(inode_t *inode, xlator_t *this, + struct iatt *buf, + gf_boolean_t *need_refresh) { - inode_t *inode = NULL; - shard_local_t *local = NULL; + int ret = -1; - local = frame->local; + LOCK(&inode->lock); + { + ret = __shard_inode_ctx_fill_iatt_from_cache(inode, this, buf, + need_refresh); + } + UNLOCK(&inode->lock); - if ((local->fd) && (local->fd->inode)) - inode = local->fd->inode; - else if (local->loc.inode) - inode = local->loc.inode; + return ret; +} - if (op_ret < 0) { - gf_msg (this->name, GF_LOG_ERROR, op_errno, - SHARD_MSG_UPDATE_FILE_SIZE_FAILED, "Update to file size" - " xattr failed on %s", uuid_utoa (inode->gfid)); - local->op_ret = op_ret; - local->op_errno = op_errno; - goto err; - } +void +shard_local_wipe(shard_local_t *local) +{ + int i = 0; + int count = 0; - if (shard_modify_size_and_block_count (&local->postbuf, dict)) { - local->op_ret = -1; - local->op_errno = ENOMEM; - goto err; - } + count = local->num_blocks; - shard_inode_ctx_set (inode, this, &local->postbuf, 0, - SHARD_INODE_WRITE_MASK); + syncbarrier_destroy(&local->barrier); + loc_wipe(&local->loc); + loc_wipe(&local->dot_shard_loc); + loc_wipe(&local->dot_shard_rm_loc); + loc_wipe(&local->loc2); + loc_wipe(&local->tmp_loc); + loc_wipe(&local->int_inodelk.loc); + loc_wipe(&local->int_entrylk.loc); + loc_wipe(&local->newloc); -err: - local->post_update_size_handler (frame, this); - return 0; -} + if (local->name) + GF_FREE(local->name); -int -shard_set_size_attrs (int64_t size, int64_t block_count, int64_t **size_attr_p) -{ - int ret = -1; - int64_t *size_attr = NULL; + if (local->int_entrylk.basename) + GF_FREE(local->int_entrylk.basename); + if (local->fd) + fd_unref(local->fd); - if (!size_attr_p) - goto out; + if (local->xattr_req) + dict_unref(local->xattr_req); + if (local->xattr_rsp) + dict_unref(local->xattr_rsp); - size_attr = GF_CALLOC (4, sizeof (int64_t), gf_shard_mt_int64_t); - if (!size_attr) - goto out; + for (i = 0; i < count; i++) { + if (!local->inode_list) + break; - size_attr[0] = hton64 (size); - /* As sharding evolves, it _may_ be necessary to embed more pieces of - * information within the same xattr. So allocating slots for them in - * advance. For now, only bytes 0-63 and 128-191 which would make up the - * current size and block count respectively of the file are valid. - */ - size_attr[2] = hton64 (block_count); + if (local->inode_list[i]) + inode_unref(local->inode_list[i]); + } - *size_attr_p = size_attr; + GF_FREE(local->inode_list); - ret = 0; -out: - return ret; + GF_FREE(local->vector); + if (local->iobref) + iobref_unref(local->iobref); + if (local->list_inited) + gf_dirent_free(&local->entries_head); + if (local->inodelk_frame) + SHARD_STACK_DESTROY(local->inodelk_frame); + if (local->entrylk_frame) + SHARD_STACK_DESTROY(local->entrylk_frame); } int -shard_update_file_size (call_frame_t *frame, xlator_t *this, fd_t *fd, - loc_t *loc, - shard_post_update_size_fop_handler_t handler) -{ - int ret = -1; - int64_t *size_attr = NULL; - inode_t *inode = NULL; - shard_local_t *local = NULL; - dict_t *xattr_req = NULL; +shard_modify_size_and_block_count(struct iatt *stbuf, dict_t *dict) +{ + int ret = -1; + void *size_attr = NULL; + uint64_t size_array[4]; + + ret = dict_get_ptr(dict, GF_XATTR_SHARD_FILE_SIZE, &size_attr); + if (ret) { + gf_msg_callingfn(THIS->name, GF_LOG_ERROR, 0, + SHARD_MSG_INTERNAL_XATTR_MISSING, + "Failed to " + "get " GF_XATTR_SHARD_FILE_SIZE " for %s", + uuid_utoa(stbuf->ia_gfid)); + return ret; + } - local = frame->local; - local->post_update_size_handler = handler; + memcpy(size_array, size_attr, sizeof(size_array)); - xattr_req = dict_new (); - if (!xattr_req) { - local->op_ret = -1; - local->op_errno = ENOMEM; - goto out; - } + stbuf->ia_size = ntoh64(size_array[0]); + stbuf->ia_blocks = ntoh64(size_array[2]); - if (fd) - inode = fd->inode; - else - inode = loc->inode; + return 0; +} - /* If both size and block count have not changed, then skip the xattrop. - */ - if ((local->delta_size + local->hole_size == 0) && - (local->delta_blocks == 0)) { - goto out; - } +int +shard_call_count_return(call_frame_t *frame) +{ + int call_count = 0; + shard_local_t *local = NULL; - ret = shard_set_size_attrs (local->delta_size + local->hole_size, - local->delta_blocks, &size_attr); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_SIZE_SET_FAILED, - "Failed to set size attrs for %s", - uuid_utoa (inode->gfid)); - local->op_ret = -1; - local->op_errno = ENOMEM; - goto out; - } + local = frame->local; - ret = dict_set_bin (xattr_req, GF_XATTR_SHARD_FILE_SIZE, size_attr, - 8 * 4); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_SET_FAILED, - "Failed to set key %s into dict. gfid=%s", - GF_XATTR_SHARD_FILE_SIZE, uuid_utoa (inode->gfid)); - GF_FREE (size_attr); - local->op_ret = -1; - local->op_errno = ENOMEM; - goto out; - } + LOCK(&frame->lock); + { + call_count = --local->call_count; + } + UNLOCK(&frame->lock); - if (fd) - STACK_WIND (frame, shard_update_file_size_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fxattrop, fd, - GF_XATTROP_ADD_ARRAY64, xattr_req, NULL); - else - STACK_WIND (frame, shard_update_file_size_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->xattrop, loc, - GF_XATTROP_ADD_ARRAY64, xattr_req, NULL); + return call_count; +} - dict_unref (xattr_req); - return 0; +static char * +shard_internal_dir_string(shard_internal_dir_type_t type) +{ + char *str = NULL; + + switch (type) { + case SHARD_INTERNAL_DIR_DOT_SHARD: + str = GF_SHARD_DIR; + break; + case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: + str = GF_SHARD_REMOVE_ME_DIR; + break; + default: + break; + } + return str; +} +static int +shard_init_internal_dir_loc(xlator_t *this, shard_local_t *local, + shard_internal_dir_type_t type) +{ + int ret = -1; + char *bname = NULL; + inode_t *parent = NULL; + loc_t *internal_dir_loc = NULL; + shard_priv_t *priv = NULL; + + priv = this->private; + if (!local) + return -1; + + switch (type) { + case SHARD_INTERNAL_DIR_DOT_SHARD: + internal_dir_loc = &local->dot_shard_loc; + bname = GF_SHARD_DIR; + parent = inode_ref(this->itable->root); + break; + case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: + internal_dir_loc = &local->dot_shard_rm_loc; + bname = GF_SHARD_REMOVE_ME_DIR; + parent = inode_ref(priv->dot_shard_inode); + break; + default: + break; + } + + internal_dir_loc->inode = inode_new(this->itable); + internal_dir_loc->parent = parent; + ret = inode_path(internal_dir_loc->parent, bname, + (char **)&internal_dir_loc->path); + if (ret < 0 || !(internal_dir_loc->inode)) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, + "Inode path failed on %s", bname); + goto out; + } + + internal_dir_loc->name = strrchr(internal_dir_loc->path, '/'); + if (internal_dir_loc->name) + internal_dir_loc->name++; + + ret = 0; out: - if (xattr_req) - dict_unref (xattr_req); - handler (frame, this); - return 0; + return ret; +} + +inode_t * +__shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this, + inode_t *base_inode, int block_num, + uuid_t gfid) +{ + char block_bname[256] = { + 0, + }; + inode_t *lru_inode = NULL; + shard_priv_t *priv = NULL; + shard_inode_ctx_t *ctx = NULL; + shard_inode_ctx_t *lru_inode_ctx = NULL; + shard_inode_ctx_t *lru_base_inode_ctx = NULL; + inode_t *fsync_inode = NULL; + inode_t *lru_base_inode = NULL; + gf_boolean_t do_fsync = _gf_false; + + priv = this->private; + + shard_inode_ctx_get(linked_inode, this, &ctx); + + if (list_empty(&ctx->ilist)) { + if (priv->inode_count + 1 <= priv->lru_limit) { + /* If this inode was linked here for the first time (indicated + * by empty list), and if there is still space in the priv list, + * add this ctx to the tail of the list. + */ + /* For as long as an inode is in lru list, we try to + * keep it alive by holding a ref on it. + */ + inode_ref(linked_inode); + if (base_inode) + gf_uuid_copy(ctx->base_gfid, base_inode->gfid); + else + gf_uuid_copy(ctx->base_gfid, gfid); + ctx->block_num = block_num; + list_add_tail(&ctx->ilist, &priv->ilist_head); + priv->inode_count++; + ctx->base_inode = inode_ref(base_inode); + } else { + /*If on the other hand there is no available slot for this inode + * in the list, delete the lru inode from the head of the list, + * unlink it. And in its place add this new inode into the list. + */ + lru_inode_ctx = list_first_entry(&priv->ilist_head, + shard_inode_ctx_t, ilist); + GF_ASSERT(lru_inode_ctx->block_num > 0); + lru_base_inode = lru_inode_ctx->base_inode; + list_del_init(&lru_inode_ctx->ilist); + lru_inode = inode_find(linked_inode->table, + lru_inode_ctx->stat.ia_gfid); + /* If the lru inode was part of the pending-fsync list, + * the base inode needs to be unref'd, the lru inode + * deleted from fsync list and fsync'd in a new frame, + * and then unlinked in memory and forgotten. + */ + if (!lru_base_inode) + goto after_fsync_check; + LOCK(&lru_base_inode->lock); + LOCK(&lru_inode->lock); + { + if (!list_empty(&lru_inode_ctx->to_fsync_list)) { + list_del_init(&lru_inode_ctx->to_fsync_list); + lru_inode_ctx->fsync_needed = 0; + do_fsync = _gf_true; + __shard_inode_ctx_get(lru_base_inode, this, + &lru_base_inode_ctx); + lru_base_inode_ctx->fsync_count--; + } + } + UNLOCK(&lru_inode->lock); + UNLOCK(&lru_base_inode->lock); + + after_fsync_check: + if (!do_fsync) { + shard_make_block_bname(lru_inode_ctx->block_num, + lru_inode_ctx->base_gfid, block_bname, + sizeof(block_bname)); + /* The following unref corresponds to the ref held at + * the time the shard was added to the lru list. + */ + inode_unref(lru_inode); + inode_unlink(lru_inode, priv->dot_shard_inode, block_bname); + inode_forget(lru_inode, 0); + } else { + /* The following unref corresponds to the ref + * held when the shard was added to fsync list. + */ + inode_unref(lru_inode); + fsync_inode = lru_inode; + if (lru_base_inode) + inode_unref(lru_base_inode); + } + /* The following unref corresponds to the ref + * held by inode_find() above. + */ + inode_unref(lru_inode); + + /* The following unref corresponds to the ref held on the base shard + * at the time of adding shard inode to lru list + */ + if (lru_base_inode) + inode_unref(lru_base_inode); + + /* For as long as an inode is in lru list, we try to + * keep it alive by holding a ref on it. + */ + inode_ref(linked_inode); + if (base_inode) + gf_uuid_copy(ctx->base_gfid, base_inode->gfid); + else + gf_uuid_copy(ctx->base_gfid, gfid); + ctx->block_num = block_num; + ctx->base_inode = inode_ref(base_inode); + list_add_tail(&ctx->ilist, &priv->ilist_head); + } + } else { + /* If this is not the first time this inode is being operated on, move + * it to the most recently used end of the list. + */ + list_move_tail(&ctx->ilist, &priv->ilist_head); + } + return fsync_inode; +} +int +shard_common_failure_unwind(glusterfs_fop_t fop, call_frame_t *frame, + int32_t op_ret, int32_t op_errno) +{ + switch (fop) { + case GF_FOP_LOOKUP: + SHARD_STACK_UNWIND(lookup, frame, op_ret, op_errno, NULL, NULL, + NULL, NULL); + break; + case GF_FOP_STAT: + SHARD_STACK_UNWIND(stat, frame, op_ret, op_errno, NULL, NULL); + break; + case GF_FOP_FSTAT: + SHARD_STACK_UNWIND(fstat, frame, op_ret, op_errno, NULL, NULL); + break; + case GF_FOP_TRUNCATE: + SHARD_STACK_UNWIND(truncate, frame, op_ret, op_errno, NULL, NULL, + NULL); + break; + case GF_FOP_FTRUNCATE: + SHARD_STACK_UNWIND(ftruncate, frame, op_ret, op_errno, NULL, NULL, + NULL); + break; + case GF_FOP_MKNOD: + SHARD_STACK_UNWIND(mknod, frame, op_ret, op_errno, NULL, NULL, NULL, + NULL, NULL); + break; + case GF_FOP_LINK: + SHARD_STACK_UNWIND(link, frame, op_ret, op_errno, NULL, NULL, NULL, + NULL, NULL); + break; + case GF_FOP_CREATE: + SHARD_STACK_UNWIND(create, frame, op_ret, op_errno, NULL, NULL, + NULL, NULL, NULL, NULL); + break; + case GF_FOP_UNLINK: + SHARD_STACK_UNWIND(unlink, frame, op_ret, op_errno, NULL, NULL, + NULL); + break; + case GF_FOP_RENAME: + SHARD_STACK_UNWIND(rename, frame, op_ret, op_errno, NULL, NULL, + NULL, NULL, NULL, NULL); + break; + case GF_FOP_WRITE: + SHARD_STACK_UNWIND(writev, frame, op_ret, op_errno, NULL, NULL, + NULL); + break; + case GF_FOP_FALLOCATE: + SHARD_STACK_UNWIND(fallocate, frame, op_ret, op_errno, NULL, NULL, + NULL); + break; + case GF_FOP_ZEROFILL: + SHARD_STACK_UNWIND(zerofill, frame, op_ret, op_errno, NULL, NULL, + NULL); + break; + case GF_FOP_DISCARD: + SHARD_STACK_UNWIND(discard, frame, op_ret, op_errno, NULL, NULL, + NULL); + break; + case GF_FOP_READ: + SHARD_STACK_UNWIND(readv, frame, op_ret, op_errno, NULL, -1, NULL, + NULL, NULL); + break; + case GF_FOP_FSYNC: + SHARD_STACK_UNWIND(fsync, frame, op_ret, op_errno, NULL, NULL, + NULL); + break; + case GF_FOP_REMOVEXATTR: + SHARD_STACK_UNWIND(removexattr, frame, op_ret, op_errno, NULL); + break; + case GF_FOP_FREMOVEXATTR: + SHARD_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, NULL); + break; + case GF_FOP_FGETXATTR: + SHARD_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, NULL, NULL); + break; + case GF_FOP_GETXATTR: + SHARD_STACK_UNWIND(getxattr, frame, op_ret, op_errno, NULL, NULL); + break; + case GF_FOP_FSETXATTR: + SHARD_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, NULL); + break; + case GF_FOP_SETXATTR: + SHARD_STACK_UNWIND(setxattr, frame, op_ret, op_errno, NULL); + break; + case GF_FOP_SETATTR: + SHARD_STACK_UNWIND(setattr, frame, op_ret, op_errno, NULL, NULL, + NULL); + break; + case GF_FOP_FSETATTR: + SHARD_STACK_UNWIND(fsetattr, frame, op_ret, op_errno, NULL, NULL, + NULL); + break; + case GF_FOP_SEEK: + SHARD_STACK_UNWIND(seek, frame, op_ret, op_errno, 0, NULL); + break; + default: + gf_msg(THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, + "Invalid fop id = %d", fop); + break; + } + return 0; } -static void -shard_link_dot_shard_inode (shard_local_t *local, inode_t *inode, - struct iatt *buf) -{ - inode_t *linked_inode = NULL; - shard_priv_t *priv = NULL; +int +shard_common_inode_write_success_unwind(glusterfs_fop_t fop, + call_frame_t *frame, int32_t op_ret) +{ + shard_local_t *local = frame->local; + + /* the below 3 variables are required because, in SHARD_STACK_UNWIND() + macro, there is a check for local being null. So many static analyzers + backtrace the code with assumption of possible (local == NULL) case, + and complains for below lines. By handling it like below, we overcome + the warnings */ + + struct iatt *prebuf = ((local) ? &local->prebuf : NULL); + struct iatt *postbuf = ((local) ? &local->postbuf : NULL); + dict_t *xattr_rsp = ((local) ? local->xattr_rsp : NULL); + + switch (fop) { + case GF_FOP_WRITE: + SHARD_STACK_UNWIND(writev, frame, op_ret, 0, prebuf, postbuf, + xattr_rsp); + break; + case GF_FOP_FALLOCATE: + SHARD_STACK_UNWIND(fallocate, frame, op_ret, 0, prebuf, postbuf, + xattr_rsp); + break; + case GF_FOP_ZEROFILL: + SHARD_STACK_UNWIND(zerofill, frame, op_ret, 0, prebuf, postbuf, + xattr_rsp); + break; + case GF_FOP_DISCARD: + SHARD_STACK_UNWIND(discard, frame, op_ret, 0, prebuf, postbuf, + xattr_rsp); + break; + default: + gf_msg(THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, + "Invalid fop id = %d", fop); + break; + } + return 0; +} - priv = THIS->private; +int +shard_evicted_inode_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *prebuf, struct iatt *postbuf, + dict_t *xdata) +{ + char block_bname[256] = { + 0, + }; + fd_t *anon_fd = cookie; + inode_t *shard_inode = NULL; + shard_inode_ctx_t *ctx = NULL; + shard_priv_t *priv = NULL; + + priv = this->private; + + if (anon_fd == NULL || op_ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, SHARD_MSG_MEMALLOC_FAILED, + "fsync failed on shard"); + goto out; + } + shard_inode = anon_fd->inode; + + LOCK(&priv->lock); + LOCK(&shard_inode->lock); + { + __shard_inode_ctx_get(shard_inode, this, &ctx); + if ((list_empty(&ctx->to_fsync_list)) && (list_empty(&ctx->ilist))) { + shard_make_block_bname(ctx->block_num, shard_inode->gfid, + block_bname, sizeof(block_bname)); + inode_unlink(shard_inode, priv->dot_shard_inode, block_bname); + /* The following unref corresponds to the ref held by + * inode_link() at the time the shard was created or + * looked up + */ + inode_unref(shard_inode); + inode_forget(shard_inode, 0); + } + } + UNLOCK(&shard_inode->lock); + UNLOCK(&priv->lock); - linked_inode = inode_link (inode, local->dot_shard_loc.parent, - local->dot_shard_loc.name, buf); - inode_lookup (linked_inode); - priv->dot_shard_inode = linked_inode; +out: + if (anon_fd) + fd_unref(anon_fd); + STACK_DESTROY(frame->root); + return 0; } int -shard_lookup_dot_shard_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *xdata, - struct iatt *postparent) -{ - shard_local_t *local = NULL; - - local = frame->local; +shard_initiate_evicted_inode_fsync(xlator_t *this, inode_t *inode) +{ + fd_t *anon_fd = NULL; + call_frame_t *fsync_frame = NULL; + + fsync_frame = create_frame(this, this->ctx->pool); + if (!fsync_frame) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, + "Failed to create new frame " + "to fsync shard"); + return -1; + } + + anon_fd = fd_anonymous(inode); + if (!anon_fd) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, + "Failed to create anon fd to" + " fsync shard"); + STACK_DESTROY(fsync_frame->root); + return -1; + } + + STACK_WIND_COOKIE(fsync_frame, shard_evicted_inode_fsync_cbk, anon_fd, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync, + anon_fd, 1, NULL); + return 0; +} - if (op_ret) { - local->op_ret = op_ret; - local->op_errno = op_errno; - goto unwind; - } +int +shard_common_inode_write_post_lookup_shards_handler(call_frame_t *frame, + xlator_t *this); - if (!IA_ISDIR (buf->ia_type)) { - gf_msg (this->name, GF_LOG_CRITICAL, 0, - SHARD_MSG_DOT_SHARD_NODIR, "/.shard already exists and " - "is not a directory. Please remove /.shard from all " - "bricks and try again"); - local->op_ret = -1; - local->op_errno = EIO; - goto unwind; +int +shard_common_resolve_shards(call_frame_t *frame, xlator_t *this, + shard_post_resolve_fop_handler_t post_res_handler) +{ + int i = -1; + uint32_t shard_idx_iter = 0; + char path[PATH_MAX] = { + 0, + }; + uuid_t gfid = { + 0, + }; + inode_t *inode = NULL; + inode_t *res_inode = NULL; + inode_t *fsync_inode = NULL; + shard_priv_t *priv = NULL; + shard_local_t *local = NULL; + uint64_t resolve_count = 0; + + priv = this->private; + local = frame->local; + local->call_count = 0; + shard_idx_iter = local->first_block; + res_inode = local->resolver_base_inode; + + if ((local->op_ret < 0) || (local->resolve_not)) + goto out; + + /* If this prealloc FOP is for fresh file creation, then the size of the + * file will be 0. Then there will be no shards associated with this file. + * So we can skip the lookup process for the shards which do not exists + * and directly issue mknod to crete shards. + * + * In case the prealloc fop is to extend the preallocated file to bigger + * size then just lookup and populate inodes of existing shards and + * update the create count + */ + if (local->fop == GF_FOP_FALLOCATE) { + if (!local->prebuf.ia_size) { + local->inode_list[0] = inode_ref(res_inode); + local->create_count = local->last_block; + shard_common_inode_write_post_lookup_shards_handler(frame, this); + return 0; + } + if (local->prebuf.ia_size < local->total_size) + local->create_count = local->last_block - + ((local->prebuf.ia_size - 1) / + local->block_size); + } + + resolve_count = local->last_block - local->create_count; + + if (res_inode) + gf_uuid_copy(gfid, res_inode->gfid); + else + gf_uuid_copy(gfid, local->base_gfid); + + while (shard_idx_iter <= resolve_count) { + i++; + if (shard_idx_iter == 0) { + local->inode_list[i] = inode_ref(res_inode); + shard_idx_iter++; + continue; + } + + shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path)); + + inode = NULL; + inode = inode_resolve(this->itable, path); + if (inode) { + gf_msg_debug(this->name, 0, + "Shard %d already " + "present. gfid=%s. Saving inode for future.", + shard_idx_iter, uuid_utoa(inode->gfid)); + local->inode_list[i] = inode; + /* Let the ref on the inodes that are already present + * in inode table still be held so that they don't get + * forgotten by the time the fop reaches the actual + * write stage. + */ + LOCK(&priv->lock); + { + fsync_inode = __shard_update_shards_inode_list( + inode, this, res_inode, shard_idx_iter, gfid); + } + UNLOCK(&priv->lock); + shard_idx_iter++; + if (fsync_inode) + shard_initiate_evicted_inode_fsync(this, fsync_inode); + continue; + } else { + local->call_count++; + shard_idx_iter++; } + } +out: + post_res_handler(frame, this); + return 0; +} - shard_link_dot_shard_inode (local, inode, buf); - shard_common_resolve_shards (frame, this, - (local->fop == GF_FOP_RENAME) ? - local->loc2.inode : local->loc.inode, - local->post_res_handler); - return 0; - -unwind: - local->post_res_handler (frame, this); - return 0; +int +shard_update_file_size_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata) +{ + inode_t *inode = NULL; + shard_local_t *local = NULL; + + local = frame->local; + + if ((local->fd) && (local->fd->inode)) + inode = local->fd->inode; + else if (local->loc.inode) + inode = local->loc.inode; + + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, op_errno, + SHARD_MSG_UPDATE_FILE_SIZE_FAILED, + "Update to file size" + " xattr failed on %s", + uuid_utoa(inode->gfid)); + local->op_ret = op_ret; + local->op_errno = op_errno; + goto err; + } + + if (shard_modify_size_and_block_count(&local->postbuf, dict)) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto err; + } +err: + local->post_update_size_handler(frame, this); + return 0; } int -shard_lookup_dot_shard (call_frame_t *frame, xlator_t *this, - shard_post_resolve_fop_handler_t post_res_handler) +shard_set_size_attrs(int64_t size, int64_t block_count, int64_t **size_attr_p) { - int ret = -1; - dict_t *xattr_req = NULL; - shard_priv_t *priv = NULL; - shard_local_t *local = NULL; + int ret = -1; + int64_t *size_attr = NULL; - local = frame->local; - priv = this->private; - local->post_res_handler = post_res_handler; + if (!size_attr_p) + goto out; - xattr_req = dict_new (); - if (!xattr_req) { - local->op_ret = -1; - local->op_errno = ENOMEM; - goto err; - } - - ret = dict_set_static_bin (xattr_req, "gfid-req", priv->dot_shard_gfid, - 16); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_SET_FAILED, - "Failed to set gfid of /.shard into dict"); - local->op_ret = -1; - local->op_errno = ENOMEM; - goto err; - } + size_attr = GF_CALLOC(4, sizeof(int64_t), gf_shard_mt_int64_t); + if (!size_attr) + goto out; - STACK_WIND (frame, shard_lookup_dot_shard_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, &local->dot_shard_loc, - xattr_req); + size_attr[0] = hton64(size); + /* As sharding evolves, it _may_ be necessary to embed more pieces of + * information within the same xattr. So allocating slots for them in + * advance. For now, only bytes 0-63 and 128-191 which would make up the + * current size and block count respectively of the file are valid. + */ + size_attr[2] = hton64(block_count); - dict_unref (xattr_req); - return 0; + *size_attr_p = size_attr; -err: - if (xattr_req) - dict_unref (xattr_req); - post_res_handler (frame, this); - return 0; + ret = 0; +out: + return ret; } -static void -shard_inode_ctx_update (inode_t *inode, xlator_t *this, dict_t *xdata, - struct iatt *buf) -{ - int ret = 0; - uint64_t size = 0; - void *bsize = NULL; - - if (shard_inode_ctx_get_block_size (inode, this, &size)) { - /* Fresh lookup */ - ret = dict_get_ptr (xdata, GF_XATTR_SHARD_BLOCK_SIZE, &bsize); - if (!ret) - size = ntoh64 (*((uint64_t *)bsize)); - /* If the file is sharded, set its block size, otherwise just - * set 0. - */ +int +shard_update_file_size(call_frame_t *frame, xlator_t *this, fd_t *fd, + loc_t *loc, shard_post_update_size_fop_handler_t handler) +{ + int ret = -1; + int64_t *size_attr = NULL; + int64_t delta_blocks = 0; + inode_t *inode = NULL; + shard_local_t *local = NULL; + dict_t *xattr_req = NULL; + + local = frame->local; + local->post_update_size_handler = handler; + + xattr_req = dict_new(); + if (!xattr_req) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto out; + } + + if (fd) + inode = fd->inode; + else + inode = loc->inode; + + /* If both size and block count have not changed, then skip the xattrop. + */ + delta_blocks = GF_ATOMIC_GET(local->delta_blocks); + if ((local->delta_size + local->hole_size == 0) && (delta_blocks == 0)) { + goto out; + } + + ret = shard_set_size_attrs(local->delta_size + local->hole_size, + delta_blocks, &size_attr); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SIZE_SET_FAILED, + "Failed to set size attrs for %s", uuid_utoa(inode->gfid)); + local->op_ret = -1; + local->op_errno = ENOMEM; + goto out; + } + + ret = dict_set_bin(xattr_req, GF_XATTR_SHARD_FILE_SIZE, size_attr, 8 * 4); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to set key %s into dict. gfid=%s", + GF_XATTR_SHARD_FILE_SIZE, uuid_utoa(inode->gfid)); + GF_FREE(size_attr); + local->op_ret = -1; + local->op_errno = ENOMEM; + goto out; + } + + if (fd) + STACK_WIND(frame, shard_update_file_size_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fxattrop, fd, + GF_XATTROP_ADD_ARRAY64, xattr_req, NULL); + else + STACK_WIND(frame, shard_update_file_size_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->xattrop, loc, + GF_XATTROP_ADD_ARRAY64, xattr_req, NULL); + + dict_unref(xattr_req); + return 0; - shard_inode_ctx_set (inode, this, buf, size, - SHARD_MASK_BLOCK_SIZE); - } - /* If the file is sharded, also set the remaining attributes, - * except for ia_size and ia_blocks. - */ - if (size) { - shard_inode_ctx_set (inode, this, buf, 0, SHARD_LOOKUP_MASK); - (void) shard_inode_ctx_invalidate (inode, this, buf); - } +out: + if (xattr_req) + dict_unref(xattr_req); + handler(frame, this); + return 0; +} + +static inode_t * +shard_link_internal_dir_inode(shard_local_t *local, inode_t *inode, + struct iatt *buf, shard_internal_dir_type_t type) +{ + inode_t *linked_inode = NULL; + shard_priv_t *priv = NULL; + char *bname = NULL; + inode_t **priv_inode = NULL; + inode_t *parent = NULL; + + priv = THIS->private; + + switch (type) { + case SHARD_INTERNAL_DIR_DOT_SHARD: + bname = GF_SHARD_DIR; + priv_inode = &priv->dot_shard_inode; + parent = inode->table->root; + break; + case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: + bname = GF_SHARD_REMOVE_ME_DIR; + priv_inode = &priv->dot_shard_rm_inode; + parent = priv->dot_shard_inode; + break; + default: + break; + } + + linked_inode = inode_link(inode, parent, bname, buf); + inode_lookup(linked_inode); + *priv_inode = linked_inode; + return linked_inode; } int -shard_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *xdata, struct iatt *postparent) +shard_refresh_internal_dir_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + inode_t *inode, struct iatt *buf, dict_t *xdata, + struct iatt *postparent) { - if (op_ret < 0) - goto unwind; + shard_local_t *local = NULL; + inode_t *linked_inode = NULL; + shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie; - if (IA_ISDIR (buf->ia_type)) - goto unwind; + local = frame->local; - /* Also, if the file is sharded, get the file size and block cnt xattr, - * and store them in the stbuf appropriately. - */ + if (op_ret) { + local->op_ret = op_ret; + local->op_errno = op_errno; + goto out; + } + + /* To-Do: Fix refcount increment per call to + * shard_link_internal_dir_inode(). + */ + linked_inode = shard_link_internal_dir_inode(local, inode, buf, type); + shard_inode_ctx_mark_dir_refreshed(linked_inode, this); +out: + shard_common_resolve_shards(frame, this, local->post_res_handler); + return 0; +} - if (dict_get (xdata, GF_XATTR_SHARD_FILE_SIZE) && - frame->root->pid != GF_CLIENT_PID_GSYNCD) - shard_modify_size_and_block_count (buf, xdata); - - /* If this was a fresh lookup, there are two possibilities: - * 1) If the file is sharded (indicated by the presence of block size - * xattr), store this block size, along with rdev and mode in its - * inode ctx. - * 2) If the file is not sharded, store size along with rdev and mode - * (which are anyway don't cares) in inode ctx. Since @ctx_tmp is - * already initialised to all zeroes, nothing more needs to be done. - */ +int +shard_refresh_internal_dir(call_frame_t *frame, xlator_t *this, + shard_internal_dir_type_t type) +{ + loc_t loc = { + 0, + }; + inode_t *inode = NULL; + shard_priv_t *priv = NULL; + shard_local_t *local = NULL; + uuid_t gfid = { + 0, + }; + + local = frame->local; + priv = this->private; + + switch (type) { + case SHARD_INTERNAL_DIR_DOT_SHARD: + gf_uuid_copy(gfid, priv->dot_shard_gfid); + break; + case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: + gf_uuid_copy(gfid, priv->dot_shard_rm_gfid); + break; + default: + break; + } + + inode = inode_find(this->itable, gfid); + + if (!shard_inode_ctx_needs_lookup(inode, this)) { + local->op_ret = 0; + goto out; + } - (void) shard_inode_ctx_update (inode, this, xdata, buf); + /* Plain assignment because the ref is already taken above through + * call to inode_find() + */ + loc.inode = inode; + gf_uuid_copy(loc.gfid, gfid); -unwind: - SHARD_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, buf, - xdata, postparent); - return 0; + STACK_WIND_COOKIE(frame, shard_refresh_internal_dir_cbk, (void *)(long)type, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, &loc, + NULL); + loc_wipe(&loc); + + return 0; + +out: + shard_common_resolve_shards(frame, this, local->post_res_handler); + return 0; } int -shard_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, - dict_t *xattr_req) +shard_lookup_internal_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, + struct iatt *postparent) { - int ret = -1; - int32_t op_errno = ENOMEM; - uint64_t block_size = 0; - shard_local_t *local = NULL; + inode_t *link_inode = NULL; + shard_local_t *local = NULL; + shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie; - if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { - SHARD_ENTRY_FOP_CHECK (loc, op_errno, err); - } + local = frame->local; - local = mem_get0 (this->local_pool); - if (!local) - goto err; - - frame->local = local; + if (op_ret) { + local->op_ret = op_ret; + local->op_errno = op_errno; + goto unwind; + } + + if (!IA_ISDIR(buf->ia_type)) { + gf_msg(this->name, GF_LOG_CRITICAL, 0, SHARD_MSG_DOT_SHARD_NODIR, + "%s already exists and " + "is not a directory. Please remove it from all bricks " + "and try again", + shard_internal_dir_string(type)); + local->op_ret = -1; + local->op_errno = EIO; + goto unwind; + } + + link_inode = shard_link_internal_dir_inode(local, inode, buf, type); + if (link_inode != inode) { + shard_refresh_internal_dir(frame, this, type); + } else { + shard_inode_ctx_mark_dir_refreshed(link_inode, this); + shard_common_resolve_shards(frame, this, local->post_res_handler); + } + return 0; - loc_copy (&local->loc, loc); +unwind: + local->post_res_handler(frame, this); + return 0; +} - local->xattr_req = xattr_req ? dict_ref (xattr_req) : dict_new (); - if (!local->xattr_req) - goto err; +int +shard_lookup_internal_dir(call_frame_t *frame, xlator_t *this, + shard_post_resolve_fop_handler_t post_res_handler, + shard_internal_dir_type_t type) +{ + int ret = -1; + dict_t *xattr_req = NULL; + shard_priv_t *priv = NULL; + shard_local_t *local = NULL; + uuid_t *gfid = NULL; + loc_t *loc = NULL; + gf_boolean_t free_gfid = _gf_true; + + local = frame->local; + priv = this->private; + local->post_res_handler = post_res_handler; + + gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); + if (!gfid) + goto err; + + xattr_req = dict_new(); + if (!xattr_req) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto err; + } + + switch (type) { + case SHARD_INTERNAL_DIR_DOT_SHARD: + gf_uuid_copy(*gfid, priv->dot_shard_gfid); + loc = &local->dot_shard_loc; + break; + case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: + gf_uuid_copy(*gfid, priv->dot_shard_rm_gfid); + loc = &local->dot_shard_rm_loc; + break; + default: + bzero(*gfid, sizeof(uuid_t)); + break; + } + + ret = dict_set_gfuuid(xattr_req, "gfid-req", *gfid, false); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to set gfid of %s into dict", + shard_internal_dir_string(type)); + local->op_ret = -1; + local->op_errno = ENOMEM; + goto err; + } else { + free_gfid = _gf_false; + } + + STACK_WIND_COOKIE(frame, shard_lookup_internal_dir_cbk, (void *)(long)type, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, loc, + xattr_req); + + dict_unref(xattr_req); + return 0; - if (shard_inode_ctx_get_block_size (loc->inode, this, &block_size)) { - ret = dict_set_uint64 (local->xattr_req, - GF_XATTR_SHARD_BLOCK_SIZE, 0); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, 0, - SHARD_MSG_DICT_SET_FAILED, "Failed to set dict" - " value: key:%s for path %s", - GF_XATTR_SHARD_BLOCK_SIZE, loc->path); - goto err; - } - } +err: + if (xattr_req) + dict_unref(xattr_req); + if (free_gfid) + GF_FREE(gfid); + post_res_handler(frame, this); + return 0; +} - if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { - ret = dict_set_uint64 (local->xattr_req, - GF_XATTR_SHARD_FILE_SIZE, 8 * 4); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, 0, - SHARD_MSG_DICT_SET_FAILED, - "Failed to set dict value: key:%s for path %s.", - GF_XATTR_SHARD_FILE_SIZE, loc->path); - goto err; - } - } +static void +shard_inode_ctx_update(inode_t *inode, xlator_t *this, dict_t *xdata, + struct iatt *buf) +{ + int ret = 0; + uint64_t size = 0; + void *bsize = NULL; + + if (shard_inode_ctx_get_block_size(inode, this, &size)) { + /* Fresh lookup */ + ret = dict_get_ptr(xdata, GF_XATTR_SHARD_BLOCK_SIZE, &bsize); + if (!ret) + size = ntoh64(*((uint64_t *)bsize)); + /* If the file is sharded, set its block size, otherwise just + * set 0. + */ - if ((xattr_req) && (dict_get (xattr_req, GF_CONTENT_KEY))) - dict_del (xattr_req, GF_CONTENT_KEY); + shard_inode_ctx_set(inode, this, buf, size, SHARD_MASK_BLOCK_SIZE); + } + /* If the file is sharded, also set the remaining attributes, + * except for ia_size and ia_blocks. + */ + if (size) { + shard_inode_ctx_set(inode, this, buf, 0, SHARD_LOOKUP_MASK); + (void)shard_inode_ctx_invalidate(inode, this, buf); + } +} - STACK_WIND (frame, shard_lookup_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->lookup, loc, local->xattr_req); +int +shard_delete_shards(void *opaque); - return 0; +int +shard_delete_shards_cbk(int ret, call_frame_t *frame, void *data); +int +shard_start_background_deletion(xlator_t *this) +{ + int ret = 0; + gf_boolean_t i_cleanup = _gf_true; + shard_priv_t *priv = NULL; + call_frame_t *cleanup_frame = NULL; + + priv = this->private; + + LOCK(&priv->lock); + { + switch (priv->bg_del_state) { + case SHARD_BG_DELETION_NONE: + i_cleanup = _gf_true; + priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING; + break; + case SHARD_BG_DELETION_LAUNCHING: + i_cleanup = _gf_false; + break; + case SHARD_BG_DELETION_IN_PROGRESS: + priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING; + i_cleanup = _gf_false; + break; + default: + break; + } + } + UNLOCK(&priv->lock); + if (!i_cleanup) + return 0; + + cleanup_frame = create_frame(this, this->ctx->pool); + if (!cleanup_frame) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, + "Failed to create " + "new frame to delete shards"); + ret = -ENOMEM; + goto err; + } + + set_lk_owner_from_ptr(&cleanup_frame->root->lk_owner, cleanup_frame->root); + + ret = synctask_new(this->ctx->env, shard_delete_shards, + shard_delete_shards_cbk, cleanup_frame, cleanup_frame); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, errno, + SHARD_MSG_SHARDS_DELETION_FAILED, + "failed to create task to do background " + "cleanup of shards"); + STACK_DESTROY(cleanup_frame->root); + goto err; + } + return 0; err: - SHARD_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, - NULL, NULL); - return 0; - + LOCK(&priv->lock); + { + priv->bg_del_state = SHARD_BG_DELETION_NONE; + } + UNLOCK(&priv->lock); + return ret; } int -shard_lookup_base_file_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *xdata, - struct iatt *postparent) +shard_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, struct iatt *postparent) { - int ret = -1; - int32_t mask = SHARD_INODE_WRITE_MASK; - shard_local_t *local = NULL; - shard_inode_ctx_t ctx = {0,}; + int ret = -1; + shard_priv_t *priv = NULL; + gf_boolean_t i_start_cleanup = _gf_false; - local = frame->local; + priv = this->private; - if (op_ret < 0) { - gf_msg (this->name, GF_LOG_ERROR, op_errno, - SHARD_MSG_BASE_FILE_LOOKUP_FAILED, "Lookup on base file" - " failed : %s", loc_gfid_utoa (&(local->loc))); - local->op_ret = op_ret; - local->op_errno = op_errno; - goto unwind; - } + if (op_ret < 0) + goto unwind; - local->prebuf = *buf; - if (shard_modify_size_and_block_count (&local->prebuf, xdata)) { - local->op_ret = -1; - local->op_errno = EINVAL; - goto unwind; + if (IA_ISDIR(buf->ia_type)) + goto unwind; + + /* Also, if the file is sharded, get the file size and block cnt xattr, + * and store them in the stbuf appropriately. + */ + + if (dict_get(xdata, GF_XATTR_SHARD_FILE_SIZE) && + frame->root->pid != GF_CLIENT_PID_GSYNCD) + shard_modify_size_and_block_count(buf, xdata); + + /* If this was a fresh lookup, there are two possibilities: + * 1) If the file is sharded (indicated by the presence of block size + * xattr), store this block size, along with rdev and mode in its + * inode ctx. + * 2) If the file is not sharded, store size along with rdev and mode + * (which are anyway don't cares) in inode ctx. Since @ctx_tmp is + * already initialised to all zeroes, nothing more needs to be done. + */ + + (void)shard_inode_ctx_update(inode, this, xdata, buf); + + LOCK(&priv->lock); + { + if (priv->first_lookup_done == _gf_false) { + priv->first_lookup_done = _gf_true; + i_start_cleanup = _gf_true; } + } + UNLOCK(&priv->lock); - if (shard_inode_ctx_get_all (inode, this, &ctx)) - mask = SHARD_ALL_MASK; + if (!i_start_cleanup) + goto unwind; - ret = shard_inode_ctx_set (inode, this, &local->prebuf, 0, - (mask | SHARD_MASK_REFRESH_RESET)); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, - SHARD_MSG_INODE_CTX_SET_FAILED, 0, "Failed to set inode" - " write params into inode ctx for %s", - uuid_utoa (buf->ia_gfid)); - local->op_ret = -1; - local->op_errno = ENOMEM; - goto unwind; + ret = shard_start_background_deletion(this); + if (ret < 0) { + LOCK(&priv->lock); + { + priv->first_lookup_done = _gf_false; } + UNLOCK(&priv->lock); + } unwind: - local->handler (frame, this); - return 0; + SHARD_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, buf, xdata, + postparent); + return 0; } int -shard_lookup_base_file (call_frame_t *frame, xlator_t *this, loc_t *loc, - shard_post_fop_handler_t handler) +shard_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) { - int ret = -1; - shard_local_t *local = NULL; - dict_t *xattr_req = NULL; - gf_boolean_t need_refresh = _gf_false; + int ret = -1; + int32_t op_errno = ENOMEM; + uint64_t block_size = 0; + shard_local_t *local = NULL; - local = frame->local; - local->handler = handler; + this->itable = loc->inode->table; + if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && + (frame->root->pid != GF_CLIENT_PID_GLFS_HEAL)) { + SHARD_ENTRY_FOP_CHECK(loc, op_errno, err); + } - ret = shard_inode_ctx_fill_iatt_from_cache (loc->inode, this, - &local->prebuf, - &need_refresh); - /* By this time, inode ctx should have been created either in create, - * mknod, readdirp or lookup. If not it is a bug! - */ - if ((ret == 0) && (need_refresh == _gf_false)) { - gf_msg_debug (this->name, 0, "Skipping lookup on base file: %s" - "Serving prebuf off the inode ctx cache", - uuid_utoa (loc->gfid)); - goto out; + local = mem_get0(this->local_pool); + if (!local) + goto err; + + frame->local = local; + + loc_copy(&local->loc, loc); + + local->xattr_req = xattr_req ? dict_ref(xattr_req) : dict_new(); + if (!local->xattr_req) + goto err; + + if (shard_inode_ctx_get_block_size(loc->inode, this, &block_size)) { + ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to set dict" + " value: key:%s for path %s", + GF_XATTR_SHARD_BLOCK_SIZE, loc->path); + goto err; } + } - xattr_req = dict_new (); - if (!xattr_req) { - local->op_ret = -1; - local->op_errno = ENOMEM; - goto out; + if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { + ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_FILE_SIZE, + 8 * 4); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to set dict value: key:%s for path %s.", + GF_XATTR_SHARD_FILE_SIZE, loc->path); + goto err; } + } - SHARD_MD_READ_FOP_INIT_REQ_DICT (this, xattr_req, loc->gfid, - local, out); + if ((xattr_req) && (dict_get(xattr_req, GF_CONTENT_KEY))) + dict_del(xattr_req, GF_CONTENT_KEY); - STACK_WIND (frame, shard_lookup_base_file_cbk, FIRST_CHILD (this), - FIRST_CHILD(this)->fops->lookup, loc, xattr_req); + STACK_WIND(frame, shard_lookup_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, loc, local->xattr_req); + return 0; +err: + shard_common_failure_unwind(GF_FOP_LOOKUP, frame, -1, op_errno); + return 0; +} - dict_unref (xattr_req); - return 0; +int +shard_set_iattr_invoke_post_handler(call_frame_t *frame, xlator_t *this, + inode_t *inode, int32_t op_ret, + int32_t op_errno, struct iatt *buf, + dict_t *xdata) +{ + int ret = -1; + int32_t mask = SHARD_INODE_WRITE_MASK; + shard_local_t *local = frame->local; + shard_inode_ctx_t ctx = { + 0, + }; + + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, op_errno, + SHARD_MSG_BASE_FILE_LOOKUP_FAILED, + "Lookup on base file" + " failed : %s", + uuid_utoa(inode->gfid)); + local->op_ret = op_ret; + local->op_errno = op_errno; + goto unwind; + } + + local->prebuf = *buf; + if (shard_modify_size_and_block_count(&local->prebuf, xdata)) { + local->op_ret = -1; + local->op_errno = EINVAL; + goto unwind; + } + + if (shard_inode_ctx_get_all(inode, this, &ctx)) + mask = SHARD_ALL_MASK; + + ret = shard_inode_ctx_set(inode, this, &local->prebuf, 0, + (mask | SHARD_MASK_REFRESH_RESET)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, SHARD_MSG_INODE_CTX_SET_FAILED, 0, + "Failed to set inode" + " write params into inode ctx for %s", + uuid_utoa(buf->ia_gfid)); + local->op_ret = -1; + local->op_errno = ENOMEM; + goto unwind; + } -out: - if (xattr_req) - dict_unref (xattr_req); - handler (frame, this); - return 0; +unwind: + local->handler(frame, this); + return 0; +} + +int +shard_fstat_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + dict_t *xdata) +{ + shard_local_t *local = frame->local; + + shard_set_iattr_invoke_post_handler(frame, this, local->fd->inode, op_ret, + op_errno, buf, xdata); + return 0; +} + +int +shard_lookup_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, + struct iatt *postparent) +{ + /* In case of op_ret < 0, inode passed to this function will be NULL + ex: in case of op_errno = ENOENT. So refer prefilled inode data + which is part of local. + Note: Reassigning/overriding the inode passed to this cbk with inode + which is part of *struct shard_local_t* won't cause any issue as + both inodes have same reference/address as of the inode passed */ + inode = ((shard_local_t *)frame->local)->loc.inode; + + shard_set_iattr_invoke_post_handler(frame, this, inode, op_ret, op_errno, + buf, xdata); + return 0; +} + +/* This function decides whether to make file based lookup or + * fd based lookup (fstat) depending on the 3rd and 4th arg. + * If fd != NULL and loc == NULL then call is for fstat + * If fd == NULL and loc != NULL then call is for file based + * lookup. Please pass args based on the requirement. + */ +int +shard_refresh_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc, + fd_t *fd, shard_post_fop_handler_t handler) +{ + int ret = -1; + inode_t *inode = NULL; + shard_local_t *local = NULL; + dict_t *xattr_req = NULL; + gf_boolean_t need_refresh = _gf_false; + + local = frame->local; + local->handler = handler; + inode = fd ? fd->inode : loc->inode; + + ret = shard_inode_ctx_fill_iatt_from_cache(inode, this, &local->prebuf, + &need_refresh); + /* By this time, inode ctx should have been created either in create, + * mknod, readdirp or lookup. If not it is a bug! + */ + if ((ret == 0) && (need_refresh == _gf_false)) { + gf_msg_debug(this->name, 0, + "Skipping lookup on base file: %s" + "Serving prebuf off the inode ctx cache", + uuid_utoa(inode->gfid)); + goto out; + } + + xattr_req = dict_new(); + if (!xattr_req) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto out; + } + + SHARD_MD_READ_FOP_INIT_REQ_DICT(this, xattr_req, inode->gfid, local, out); + + if (fd) + STACK_WIND(frame, shard_fstat_base_file_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, fd, xattr_req); + else + STACK_WIND(frame, shard_lookup_base_file_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, loc, xattr_req); + + dict_unref(xattr_req); + return 0; +out: + if (xattr_req) + dict_unref(xattr_req); + handler(frame, this); + return 0; } int -shard_post_fstat_handler (call_frame_t *frame, xlator_t *this) +shard_post_fstat_handler(call_frame_t *frame, xlator_t *this) { - shard_local_t *local = NULL; + shard_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (local->op_ret >= 0) - shard_inode_ctx_set (local->fd->inode, this, &local->prebuf, 0, - SHARD_LOOKUP_MASK); + if (local->op_ret >= 0) + shard_inode_ctx_set(local->fd->inode, this, &local->prebuf, 0, + SHARD_LOOKUP_MASK); - SHARD_STACK_UNWIND (fstat, frame, local->op_ret, local->op_errno, - &local->prebuf, local->xattr_rsp); - return 0; + SHARD_STACK_UNWIND(fstat, frame, local->op_ret, local->op_errno, + &local->prebuf, local->xattr_rsp); + return 0; } int -shard_post_stat_handler (call_frame_t *frame, xlator_t *this) +shard_post_stat_handler(call_frame_t *frame, xlator_t *this) { - shard_local_t *local = NULL; + shard_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (local->op_ret >= 0) - shard_inode_ctx_set (local->loc.inode, this, &local->prebuf, 0, - SHARD_LOOKUP_MASK); + if (local->op_ret >= 0) + shard_inode_ctx_set(local->loc.inode, this, &local->prebuf, 0, + SHARD_LOOKUP_MASK); - SHARD_STACK_UNWIND (stat, frame, local->op_ret, local->op_errno, - &local->prebuf, local->xattr_rsp); - return 0; + SHARD_STACK_UNWIND(stat, frame, local->op_ret, local->op_errno, + &local->prebuf, local->xattr_rsp); + return 0; } int -shard_common_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, - dict_t *xdata) +shard_common_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + dict_t *xdata) { - inode_t *inode = NULL; - shard_local_t *local = NULL; + inode_t *inode = NULL; + shard_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (op_ret < 0) { - gf_msg (this->name, GF_LOG_ERROR, op_errno, - SHARD_MSG_STAT_FAILED, "stat failed: %s", - local->fd ? uuid_utoa (local->fd->inode->gfid) - : uuid_utoa ((local->loc.inode)->gfid)); - local->op_ret = op_ret; - local->op_errno = op_errno; - goto unwind; - } + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_STAT_FAILED, + "stat failed: %s", + local->fd ? uuid_utoa(local->fd->inode->gfid) + : uuid_utoa((local->loc.inode)->gfid)); + local->op_ret = op_ret; + local->op_errno = op_errno; + goto unwind; + } - local->prebuf = *buf; - if (shard_modify_size_and_block_count (&local->prebuf, xdata)) { - local->op_ret = -1; - local->op_errno = EINVAL; - goto unwind; - } - local->xattr_rsp = dict_ref (xdata); + local->prebuf = *buf; + if (shard_modify_size_and_block_count(&local->prebuf, xdata)) { + local->op_ret = -1; + local->op_errno = EINVAL; + goto unwind; + } + local->xattr_rsp = dict_ref(xdata); - if (local->loc.inode) - inode = local->loc.inode; - else - inode = local->fd->inode; + if (local->loc.inode) + inode = local->loc.inode; + else + inode = local->fd->inode; - shard_inode_ctx_invalidate (inode, this, buf); + shard_inode_ctx_invalidate(inode, this, &local->prebuf); unwind: - local->handler (frame, this); - return 0; + local->handler(frame, this); + return 0; } int -shard_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +shard_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { - int ret = -1; - uint64_t block_size = 0; - shard_local_t *local = NULL; - - if ((IA_ISDIR (loc->inode->ia_type)) || - (IA_ISLNK (loc->inode->ia_type))) { - STACK_WIND (frame, default_stat_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->stat, loc, xdata); - return 0; - } - - ret = shard_inode_ctx_get_block_size (loc->inode, this, &block_size); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - SHARD_MSG_INODE_CTX_GET_FAILED, "Failed to get block " - "size from inode ctx of %s", - uuid_utoa (loc->inode->gfid)); - goto err; - } + int ret = -1; + uint64_t block_size = 0; + shard_local_t *local = NULL; - if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { - STACK_WIND (frame, default_stat_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->stat, loc, xdata); - return 0; - } + if ((IA_ISDIR(loc->inode->ia_type)) || (IA_ISLNK(loc->inode->ia_type))) { + STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->stat, loc, xdata); + return 0; + } - local = mem_get0 (this->local_pool); - if (!local) - goto err; + ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, + "Failed to get block " + "size from inode ctx of %s", + uuid_utoa(loc->inode->gfid)); + goto err; + } - frame->local = local; + if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { + STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->stat, loc, xdata); + return 0; + } - local->handler = shard_post_stat_handler; - loc_copy (&local->loc, loc); - local->xattr_req = (xdata) ? dict_ref (xdata) : dict_new (); - if (!local->xattr_req) - goto err; + local = mem_get0(this->local_pool); + if (!local) + goto err; - SHARD_MD_READ_FOP_INIT_REQ_DICT (this, local->xattr_req, - local->loc.gfid, local, err); + frame->local = local; - STACK_WIND (frame, shard_common_stat_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->stat, loc, local->xattr_req); + local->handler = shard_post_stat_handler; + loc_copy(&local->loc, loc); + local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); + if (!local->xattr_req) + goto err; - return 0; + SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, local->loc.gfid, + local, err); + STACK_WIND(frame, shard_common_stat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->stat, loc, local->xattr_req); + return 0; err: - SHARD_STACK_UNWIND (stat, frame, -1, ENOMEM, NULL, NULL); - return 0; + shard_common_failure_unwind(GF_FOP_STAT, frame, -1, ENOMEM); + return 0; } int -shard_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) +shard_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { - int ret = -1; - uint64_t block_size = 0; - shard_local_t *local = NULL; - - if ((IA_ISDIR (fd->inode->ia_type)) || - (IA_ISLNK (fd->inode->ia_type))) { - STACK_WIND (frame, default_fstat_cbk, FIRST_CHILD(this), - FIRST_CHILD (this)->fops->fstat, fd, xdata); - return 0; - } + int ret = -1; + uint64_t block_size = 0; + shard_local_t *local = NULL; - ret = shard_inode_ctx_get_block_size (fd->inode, this, &block_size); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - SHARD_MSG_INODE_CTX_GET_FAILED, "Failed to get block " - "size from inode ctx of %s", - uuid_utoa (fd->inode->gfid)); - goto err; - } + if ((IA_ISDIR(fd->inode->ia_type)) || (IA_ISLNK(fd->inode->ia_type))) { + STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, fd, xdata); + return 0; + } - if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { - STACK_WIND (frame, default_fstat_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fstat, fd, xdata); - return 0; - } + ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, + "Failed to get block " + "size from inode ctx of %s", + uuid_utoa(fd->inode->gfid)); + goto err; + } - if (!this->itable) - this->itable = fd->inode->table; + if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { + STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, fd, xdata); + return 0; + } - local = mem_get0 (this->local_pool); - if (!local) - goto err; + if (!this->itable) + this->itable = fd->inode->table; - frame->local = local; + local = mem_get0(this->local_pool); + if (!local) + goto err; - local->handler = shard_post_fstat_handler; - local->fd = fd_ref (fd); - local->xattr_req = (xdata) ? dict_ref (xdata) : dict_new (); - if (!local->xattr_req) - goto err; + frame->local = local; - SHARD_MD_READ_FOP_INIT_REQ_DICT (this, local->xattr_req, - fd->inode->gfid, local, err); + local->handler = shard_post_fstat_handler; + local->fd = fd_ref(fd); + local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); + if (!local->xattr_req) + goto err; - STACK_WIND (frame, shard_common_stat_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fstat, fd, local->xattr_req); - return 0; + SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid, + local, err); + STACK_WIND(frame, shard_common_stat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, fd, local->xattr_req); + return 0; err: - SHARD_STACK_UNWIND (fstat, frame, -1, ENOMEM, NULL, NULL); - return 0; + shard_common_failure_unwind(GF_FOP_FSTAT, frame, -1, ENOMEM); + return 0; } int -shard_post_update_size_truncate_handler (call_frame_t *frame, xlator_t *this) +shard_post_update_size_truncate_handler(call_frame_t *frame, xlator_t *this) { - shard_local_t *local = NULL; + shard_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (local->fop == GF_FOP_TRUNCATE) - SHARD_STACK_UNWIND (truncate, frame, local->op_ret, - local->op_errno, &local->prebuf, - &local->postbuf, NULL); - else - SHARD_STACK_UNWIND (ftruncate, frame, local->op_ret, - local->op_errno, &local->prebuf, - &local->postbuf, NULL); - return 0; + if (local->fop == GF_FOP_TRUNCATE) + SHARD_STACK_UNWIND(truncate, frame, local->op_ret, local->op_errno, + &local->prebuf, &local->postbuf, NULL); + else + SHARD_STACK_UNWIND(ftruncate, frame, local->op_ret, local->op_errno, + &local->prebuf, &local->postbuf, NULL); + return 0; } int -shard_truncate_last_shard_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - struct iatt *prebuf, struct iatt *postbuf, - dict_t *xdata) -{ - inode_t *inode = NULL; - shard_local_t *local = NULL; - - local = frame->local; - - SHARD_UNSET_ROOT_FS_ID (frame, local); - - inode = (local->fop == GF_FOP_TRUNCATE) ? local->loc.inode - : local->fd->inode; - if (op_ret < 0) { - gf_msg (this->name, GF_LOG_ERROR, op_errno, - SHARD_MSG_TRUNCATE_LAST_SHARD_FAILED, "truncate on last" - " shard failed : %s", uuid_utoa (inode->gfid)); - local->op_ret = op_ret; - local->op_errno = op_errno; - goto err; - } - - local->postbuf.ia_size = local->offset; - local->postbuf.ia_blocks -= (prebuf->ia_blocks - postbuf->ia_blocks); - /* Let the delta be negative. We want xattrop to do subtraction */ - local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size; - local->delta_blocks = postbuf->ia_blocks - prebuf->ia_blocks; - local->hole_size = 0; - - shard_inode_ctx_set (inode, this, postbuf, 0, SHARD_MASK_TIMES); - - shard_update_file_size (frame, this, NULL, &local->loc, - shard_post_update_size_truncate_handler); - return 0; - +shard_truncate_last_shard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *prebuf, struct iatt *postbuf, + dict_t *xdata) +{ + inode_t *inode = NULL; + int64_t delta_blocks = 0; + shard_local_t *local = NULL; + + local = frame->local; + + SHARD_UNSET_ROOT_FS_ID(frame, local); + + inode = (local->fop == GF_FOP_TRUNCATE) ? local->loc.inode + : local->fd->inode; + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, op_errno, + SHARD_MSG_TRUNCATE_LAST_SHARD_FAILED, + "truncate on last" + " shard failed : %s", + uuid_utoa(inode->gfid)); + local->op_ret = op_ret; + local->op_errno = op_errno; + goto err; + } + + local->postbuf.ia_size = local->offset; + /* Let the delta be negative. We want xattrop to do subtraction */ + local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size; + delta_blocks = GF_ATOMIC_ADD(local->delta_blocks, + postbuf->ia_blocks - prebuf->ia_blocks); + GF_ASSERT(delta_blocks <= 0); + local->postbuf.ia_blocks += delta_blocks; + local->hole_size = 0; + + shard_inode_ctx_set(inode, this, &local->postbuf, 0, SHARD_MASK_TIMES); + shard_update_file_size(frame, this, NULL, &local->loc, + shard_post_update_size_truncate_handler); + return 0; err: - if (local->fop == GF_FOP_TRUNCATE) - SHARD_STACK_UNWIND (truncate, frame, local->op_ret, - local->op_errno, NULL, NULL, NULL); - else - SHARD_STACK_UNWIND (ftruncate, frame, local->op_ret, - local->op_errno, NULL, NULL, NULL); - return 0; + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); + return 0; } int -shard_truncate_last_shard (call_frame_t *frame, xlator_t *this, inode_t *inode) +shard_truncate_last_shard(call_frame_t *frame, xlator_t *this, inode_t *inode) { - size_t last_shard_size_after = 0; - loc_t loc = {0,}; - shard_local_t *local = NULL; + size_t last_shard_size_after = 0; + loc_t loc = { + 0, + }; + shard_local_t *local = NULL; - local = frame->local; + local = frame->local; - /* A NULL inode could be due to the fact that the last shard which - * needs to be truncated does not exist due to it lying in a hole - * region. So the only thing left to do in that case would be an - * update to file size xattr. - */ - if (!inode) { - gf_msg_debug (this->name, 0, "Last shard to be truncated absent" - " in backend: %s. Directly proceeding to update " - "file size", uuid_utoa (inode->gfid)); - shard_update_file_size (frame, this, NULL, &local->loc, - shard_post_update_size_truncate_handler); - return 0; - } + /* A NULL inode could be due to the fact that the last shard which + * needs to be truncated does not exist due to it lying in a hole + * region. So the only thing left to do in that case would be an + * update to file size xattr. + */ + if (!inode) { + gf_msg_debug(this->name, 0, + "Last shard to be truncated absent in backend: %" PRIu64 + " of gfid %s. Directly proceeding to update file size", + local->first_block, uuid_utoa(local->loc.inode->gfid)); + shard_update_file_size(frame, this, NULL, &local->loc, + shard_post_update_size_truncate_handler); + return 0; + } - SHARD_SET_ROOT_FS_ID (frame, local); + SHARD_SET_ROOT_FS_ID(frame, local); - loc.inode = inode_ref (inode); - gf_uuid_copy (loc.gfid, inode->gfid); + loc.inode = inode_ref(inode); + gf_uuid_copy(loc.gfid, inode->gfid); - last_shard_size_after = (local->offset % local->block_size); + last_shard_size_after = (local->offset % local->block_size); - STACK_WIND (frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->truncate, &loc, - last_shard_size_after, NULL); - loc_wipe (&loc); - return 0; + STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, &loc, last_shard_size_after, + NULL); + loc_wipe(&loc); + return 0; } -int -shard_unlink_shards_do_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *preparent, struct iatt *postparent, - dict_t *xdata); +void +shard_unlink_block_inode(shard_local_t *local, int shard_block_num); int -shard_truncate_htol (call_frame_t *frame, xlator_t *this, inode_t *inode) +shard_truncate_htol_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) { - int i = 1; - int ret = -1; - int call_count = 0; - uint32_t cur_block = 0; - uint32_t last_block = 0; - char path[PATH_MAX] = {0,}; - char *bname = NULL; - loc_t loc = {0,}; - gf_boolean_t wind_failed = _gf_false; - shard_local_t *local = NULL; - shard_priv_t *priv = NULL; - - local = frame->local; - priv = this->private; + int ret = 0; + int call_count = 0; + int shard_block_num = (long)cookie; + uint64_t block_count = 0; + shard_local_t *local = NULL; - cur_block = local->first_block + 1; - last_block = local->last_block; + local = frame->local; - /* Determine call count */ - for (i = 1; i < local->num_blocks; i++) { - if (!local->inode_list[i]) - continue; - call_count++; - } - - if (!call_count) { - /* Call count = 0 implies that all of the shards that need to be - * unlinked do not exist. So shard xlator would now proceed to - * do the final truncate + size updates. - */ - gf_msg_debug (this->name, 0, "Shards to be unlinked as part of " - "truncate absent in backend: %s. Directly " - "proceeding to update file size", - uuid_utoa (inode->gfid)); - local->postbuf.ia_size = local->offset; - local->postbuf.ia_blocks = local->prebuf.ia_blocks; - local->delta_size = local->postbuf.ia_size - - local->prebuf.ia_size; - local->delta_blocks = 0; - local->hole_size = 0; - shard_update_file_size (frame, this, local->fd, &local->loc, - shard_post_update_size_truncate_handler); - return 0; - } - - local->call_count = call_count; - i = 1; - - SHARD_SET_ROOT_FS_ID (frame, local); - while (cur_block <= last_block) { - if (!local->inode_list[i]) { - cur_block++; - i++; - continue; - } - if (wind_failed) { - shard_unlink_shards_do_cbk (frame, - (void *)(long) cur_block, - this, -1, ENOMEM, NULL, - NULL, NULL); - goto next; - } - - shard_make_block_abspath (cur_block, inode->gfid, path, - sizeof (path)); - bname = strrchr (path, '/') + 1; - loc.parent = inode_ref (priv->dot_shard_inode); - ret = inode_path (loc.parent, bname, (char **)&(loc.path)); - if (ret < 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - SHARD_MSG_INODE_PATH_FAILED, "Inode path failed" - " on %s. Base file gfid = %s", bname, - uuid_utoa (inode->gfid)); - local->op_ret = -1; - local->op_errno = ENOMEM; - loc_wipe (&loc); - wind_failed = _gf_true; - shard_unlink_shards_do_cbk (frame, - (void *)(long) cur_block, - this, -1, ENOMEM, NULL, - NULL, NULL); - goto next; - } - loc.name = strrchr (loc.path, '/'); - if (loc.name) - loc.name++; - loc.inode = inode_ref (local->inode_list[i]); - - STACK_WIND_COOKIE (frame, shard_unlink_shards_do_cbk, - (void *) (long) cur_block, FIRST_CHILD(this), - FIRST_CHILD (this)->fops->unlink, &loc, - 0, NULL); - loc_wipe (&loc); -next: - i++; - cur_block++; - if (!--call_count) - break; - } - return 0; + if (op_ret < 0) { + local->op_ret = op_ret; + local->op_errno = op_errno; + goto done; + } + ret = dict_get_uint64(xdata, GF_GET_FILE_BLOCK_COUNT, &block_count); + if (!ret) { + GF_ATOMIC_SUB(local->delta_blocks, block_count); + } else { + /* dict_get failed possibly due to a heterogeneous cluster? */ + gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to get key %s from dict during truncate of gfid %s", + GF_GET_FILE_BLOCK_COUNT, + uuid_utoa(local->resolver_base_inode->gfid)); + } + + shard_unlink_block_inode(local, shard_block_num); +done: + call_count = shard_call_count_return(frame); + if (call_count == 0) { + SHARD_UNSET_ROOT_FS_ID(frame, local); + shard_truncate_last_shard(frame, this, local->inode_list[0]); + } + return 0; +} +int +shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode) +{ + int i = 1; + int ret = -1; + int call_count = 0; + uint32_t cur_block = 0; + uint32_t last_block = 0; + char path[PATH_MAX] = { + 0, + }; + char *bname = NULL; + loc_t loc = { + 0, + }; + gf_boolean_t wind_failed = _gf_false; + shard_local_t *local = NULL; + shard_priv_t *priv = NULL; + dict_t *xdata_req = NULL; + + local = frame->local; + priv = this->private; + + cur_block = local->first_block + 1; + last_block = local->last_block; + + /* Determine call count */ + for (i = 1; i < local->num_blocks; i++) { + if (!local->inode_list[i]) + continue; + call_count++; + } + + if (!call_count) { + /* Call count = 0 implies that all of the shards that need to be + * unlinked do not exist. So shard xlator would now proceed to + * do the final truncate + size updates. + */ + gf_msg_debug(this->name, 0, + "Shards to be unlinked as part of " + "truncate absent in backend: %s. Directly " + "proceeding to update file size", + uuid_utoa(inode->gfid)); + local->postbuf.ia_size = local->offset; + local->postbuf.ia_blocks = local->prebuf.ia_blocks; + local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size; + GF_ATOMIC_INIT(local->delta_blocks, 0); + local->hole_size = 0; + shard_update_file_size(frame, this, local->fd, &local->loc, + shard_post_update_size_truncate_handler); + return 0; + } + + local->call_count = call_count; + i = 1; + xdata_req = dict_new(); + if (!xdata_req) { + shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); + return 0; + } + ret = dict_set_uint64(xdata_req, GF_GET_FILE_BLOCK_COUNT, 8 * 8); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to set key %s into dict during truncate of %s", + GF_GET_FILE_BLOCK_COUNT, + uuid_utoa(local->resolver_base_inode->gfid)); + dict_unref(xdata_req); + shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); + return 0; + } + + SHARD_SET_ROOT_FS_ID(frame, local); + while (cur_block <= last_block) { + if (!local->inode_list[i]) { + cur_block++; + i++; + continue; + } + if (wind_failed) { + shard_truncate_htol_cbk(frame, (void *)(long)cur_block, this, -1, + ENOMEM, NULL, NULL, NULL); + goto next; + } + + shard_make_block_abspath(cur_block, inode->gfid, path, sizeof(path)); + bname = strrchr(path, '/') + 1; + loc.parent = inode_ref(priv->dot_shard_inode); + ret = inode_path(loc.parent, bname, (char **)&(loc.path)); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, + "Inode path failed" + " on %s. Base file gfid = %s", + bname, uuid_utoa(inode->gfid)); + local->op_ret = -1; + local->op_errno = ENOMEM; + loc_wipe(&loc); + wind_failed = _gf_true; + shard_truncate_htol_cbk(frame, (void *)(long)cur_block, this, -1, + ENOMEM, NULL, NULL, NULL); + goto next; + } + loc.name = strrchr(loc.path, '/'); + if (loc.name) + loc.name++; + loc.inode = inode_ref(local->inode_list[i]); + + STACK_WIND_COOKIE(frame, shard_truncate_htol_cbk, + (void *)(long)cur_block, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, &loc, 0, xdata_req); + loc_wipe(&loc); + next: + i++; + cur_block++; + if (!--call_count) + break; + } + dict_unref(xdata_req); + return 0; } int -shard_truncate_do (call_frame_t *frame, xlator_t *this) +shard_truncate_do(call_frame_t *frame, xlator_t *this) { - shard_local_t *local = NULL; + shard_local_t *local = NULL; - local = frame->local; + local = frame->local; - if ((local->num_blocks == 1)) { - /* This means that there are no shards to be unlinked. - * The fop boils down to truncating the last shard, updating - * the size and unwinding. - */ - shard_truncate_last_shard (frame, this, - local->inode_list[0]); - return 0; - } else { - shard_truncate_htol (frame, this, local->loc.inode); - } + if (local->num_blocks == 1) { + /* This means that there are no shards to be unlinked. + * The fop boils down to truncating the last shard, updating + * the size and unwinding. + */ + shard_truncate_last_shard(frame, this, local->inode_list[0]); return 0; + } else { + shard_truncate_htol(frame, this, local->loc.inode); + } + return 0; } int -shard_post_lookup_shards_truncate_handler (call_frame_t *frame, xlator_t *this) +shard_post_lookup_shards_truncate_handler(call_frame_t *frame, xlator_t *this) { - shard_local_t *local = NULL; + shard_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (local->op_ret < 0) { - if (local->fop == GF_FOP_TRUNCATE) - SHARD_STACK_UNWIND (truncate, frame, local->op_ret, - local->op_errno, NULL, NULL, NULL); - else - SHARD_STACK_UNWIND (ftruncate, frame, local->op_ret, - local->op_errno, NULL, NULL, NULL); - return 0; - } - - shard_truncate_do (frame, this); + if (local->op_ret < 0) { + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); return 0; + } + + shard_truncate_do(frame, this); + return 0; } void -shard_link_block_inode (shard_local_t *local, int block_num, inode_t *inode, - struct iatt *buf) -{ - int list_index = 0; - char block_bname[256] = {0,}; - inode_t *linked_inode = NULL; - xlator_t *this = NULL; - shard_priv_t *priv = NULL; - - this = THIS; - priv = this->private; - - shard_make_block_bname (block_num, (local->loc.inode)->gfid, - block_bname, sizeof (block_bname)); - - shard_inode_ctx_set (inode, this, buf, 0, SHARD_LOOKUP_MASK); - linked_inode = inode_link (inode, priv->dot_shard_inode, block_bname, - buf); - inode_lookup (linked_inode); - list_index = block_num - local->first_block; - - /* Defer unref'ing the inodes until write is complete. These inodes are - * unref'd in the event of a failure or after successful fop completion - * in shard_local_wipe(). - */ - local->inode_list[list_index] = linked_inode; - - LOCK(&priv->lock); - { - __shard_update_shards_inode_list (linked_inode, this, - local->loc.inode, block_num); - } - UNLOCK(&priv->lock); +shard_link_block_inode(shard_local_t *local, int block_num, inode_t *inode, + struct iatt *buf) +{ + int list_index = 0; + char block_bname[256] = { + 0, + }; + uuid_t gfid = { + 0, + }; + inode_t *linked_inode = NULL; + xlator_t *this = NULL; + inode_t *fsync_inode = NULL; + shard_priv_t *priv = NULL; + inode_t *base_inode = NULL; + + this = THIS; + priv = this->private; + if (local->loc.inode) { + gf_uuid_copy(gfid, local->loc.inode->gfid); + base_inode = local->loc.inode; + } else if (local->resolver_base_inode) { + gf_uuid_copy(gfid, local->resolver_base_inode->gfid); + base_inode = local->resolver_base_inode; + } else { + gf_uuid_copy(gfid, local->base_gfid); + } + + shard_make_block_bname(block_num, gfid, block_bname, sizeof(block_bname)); + + shard_inode_ctx_set(inode, this, buf, 0, SHARD_LOOKUP_MASK); + linked_inode = inode_link(inode, priv->dot_shard_inode, block_bname, buf); + inode_lookup(linked_inode); + list_index = block_num - local->first_block; + local->inode_list[list_index] = linked_inode; + + LOCK(&priv->lock); + { + fsync_inode = __shard_update_shards_inode_list( + linked_inode, this, base_inode, block_num, gfid); + } + UNLOCK(&priv->lock); + if (fsync_inode) + shard_initiate_evicted_inode_fsync(this, fsync_inode); } int -shard_common_lookup_shards_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, - int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *xdata, - struct iatt *postparent) -{ - int call_count = 0; - int shard_block_num = (long) cookie; - shard_local_t *local = NULL; - - local = frame->local; - - if (op_ret < 0) { - /* Ignore absence of shards in the backend in truncate fop. */ - if (((local->fop == GF_FOP_TRUNCATE) || - (local->fop == GF_FOP_FTRUNCATE) || - (local->fop == GF_FOP_RENAME) || - (local->fop == GF_FOP_UNLINK)) && (op_errno == ENOENT)) - goto done; - gf_msg (this->name, GF_LOG_ERROR, op_errno, - SHARD_MSG_LOOKUP_SHARD_FAILED, "Lookup on shard %d " - "failed. Base file gfid = %s", shard_block_num, - (local->fop == GF_FOP_RENAME) ? - uuid_utoa (local->loc2.inode->gfid) - : uuid_utoa (local->loc.inode->gfid)); - local->op_ret = op_ret; - local->op_errno = op_errno; - goto done; +shard_common_lookup_shards_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + inode_t *inode, struct iatt *buf, dict_t *xdata, + struct iatt *postparent) +{ + int call_count = 0; + int shard_block_num = (long)cookie; + uuid_t gfid = { + 0, + }; + shard_local_t *local = NULL; + + local = frame->local; + if (local->resolver_base_inode) + gf_uuid_copy(gfid, local->resolver_base_inode->gfid); + else + gf_uuid_copy(gfid, local->base_gfid); + + if (op_ret < 0) { + /* Ignore absence of shards in the backend in truncate fop. */ + switch (local->fop) { + case GF_FOP_TRUNCATE: + case GF_FOP_FTRUNCATE: + case GF_FOP_RENAME: + case GF_FOP_UNLINK: + if (op_errno == ENOENT) + goto done; + break; + case GF_FOP_WRITE: + case GF_FOP_READ: + case GF_FOP_ZEROFILL: + case GF_FOP_DISCARD: + case GF_FOP_FALLOCATE: + if ((!local->first_lookup_done) && (op_errno == ENOENT)) { + LOCK(&frame->lock); + { + local->create_count++; + } + UNLOCK(&frame->lock); + goto done; + } + break; + default: + break; } - shard_link_block_inode (local, shard_block_num, inode, buf); + /* else */ + gf_msg(this->name, GF_LOG_ERROR, op_errno, + SHARD_MSG_LOOKUP_SHARD_FAILED, + "Lookup on shard %d " + "failed. Base file gfid = %s", + shard_block_num, uuid_utoa(gfid)); + local->op_ret = op_ret; + local->op_errno = op_errno; + goto done; + } + + shard_link_block_inode(local, shard_block_num, inode, buf); done: - call_count = shard_call_count_return (frame); + if (local->lookup_shards_barriered) { + syncbarrier_wake(&local->barrier); + return 0; + } else { + call_count = shard_call_count_return(frame); if (call_count == 0) { - if (local->op_ret < 0) - goto unwind; - else - local->pls_fop_handler (frame, this); + if (!local->first_lookup_done) + local->first_lookup_done = _gf_true; + local->pls_fop_handler(frame, this); } - return 0; - -unwind: - local->pls_fop_handler (frame, this); - return 0; + } + return 0; } -dict_t* -shard_create_gfid_dict (dict_t *dict) +dict_t * +shard_create_gfid_dict(dict_t *dict) { - int ret = 0; - dict_t *new = NULL; - uuid_t *gfid = NULL; + int ret = 0; + dict_t *new = NULL; + unsigned char *gfid = NULL; - new = dict_copy_with_ref (dict, NULL); - if (!new) - return NULL; + new = dict_copy_with_ref(dict, NULL); + if (!new) + return NULL; - gfid = GF_CALLOC (1, sizeof (uuid_t), gf_common_mt_char); - if (!gfid) { - ret = -1; - goto out; - } + gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_char); + if (!gfid) { + ret = -1; + goto out; + } - gf_uuid_generate (*gfid); + gf_uuid_generate(gfid); - ret = dict_set_dynptr (new, "gfid-req", gfid, sizeof (uuid_t)); + ret = dict_set_gfuuid(new, "gfid-req", gfid, false); out: - if (ret) { - dict_unref (new); - new = NULL; - GF_FREE (gfid); - } - - return new; -} - -int -shard_common_lookup_shards (call_frame_t *frame, xlator_t *this, inode_t *inode, - shard_post_lookup_shards_fop_handler_t handler) -{ - int i = 0; - int ret = 0; - int call_count = 0; - int32_t shard_idx_iter = 0; - int last_block = 0; - char path[PATH_MAX] = {0,}; - char *bname = NULL; - loc_t loc = {0,}; - shard_local_t *local = NULL; - shard_priv_t *priv = NULL; - gf_boolean_t wind_failed = _gf_false; - dict_t *xattr_req = NULL; - - priv = this->private; - local = frame->local; - call_count = local->call_count; - shard_idx_iter = local->first_block; - last_block = local->last_block; - local->pls_fop_handler = handler; - - while (shard_idx_iter <= last_block) { - if (local->inode_list[i]) { - i++; - shard_idx_iter++; - continue; - } - - if (wind_failed) { - shard_common_lookup_shards_cbk (frame, - (void *) (long) shard_idx_iter, - this, -1, ENOMEM, NULL, NULL, - NULL, NULL); - goto next; - } - - shard_make_block_abspath (shard_idx_iter, inode->gfid, path, - sizeof(path)); - - bname = strrchr (path, '/') + 1; - loc.inode = inode_new (this->itable); - loc.parent = inode_ref (priv->dot_shard_inode); - ret = inode_path (loc.parent, bname, (char **) &(loc.path)); - if (ret < 0 || !(loc.inode)) { - gf_msg (this->name, GF_LOG_ERROR, 0, - SHARD_MSG_INODE_PATH_FAILED, "Inode path failed" - " on %s, base file gfid = %s", bname, - uuid_utoa (inode->gfid)); - local->op_ret = -1; - local->op_errno = ENOMEM; - loc_wipe (&loc); - wind_failed = _gf_true; - shard_common_lookup_shards_cbk (frame, - (void *) (long) shard_idx_iter, - this, -1, ENOMEM, NULL, NULL, - NULL, NULL); - goto next; - } - - loc.name = strrchr (loc.path, '/'); - if (loc.name) - loc.name++; - - xattr_req = shard_create_gfid_dict (local->xattr_req); - if (!xattr_req) { - local->op_ret = -1; - local->op_errno = ENOMEM; - wind_failed = _gf_true; - loc_wipe (&loc); - shard_common_lookup_shards_cbk (frame, - (void *) (long) shard_idx_iter, - this, -1, ENOMEM, NULL, NULL, - NULL, NULL); - goto next; - } + if (ret) { + dict_unref(new); + new = NULL; + GF_FREE(gfid); + } - STACK_WIND_COOKIE (frame, shard_common_lookup_shards_cbk, - (void *) (long) shard_idx_iter, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, &loc, - xattr_req); - loc_wipe (&loc); - dict_unref (xattr_req); -next: - shard_idx_iter++; - i++; - - if (!--call_count) - break; - } - - return 0; + return new; } int -shard_post_resolve_truncate_handler (call_frame_t *frame, xlator_t *this) -{ - shard_local_t *local = NULL; - - local = frame->local; +shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode, + shard_post_lookup_shards_fop_handler_t handler) +{ + int i = 0; + int ret = 0; + int count = 0; + int call_count = 0; + int32_t shard_idx_iter = 0; + int lookup_count = 0; + char path[PATH_MAX] = { + 0, + }; + char *bname = NULL; + uuid_t gfid = { + 0, + }; + loc_t loc = { + 0, + }; + shard_local_t *local = NULL; + shard_priv_t *priv = NULL; + gf_boolean_t wind_failed = _gf_false; + dict_t *xattr_req = NULL; + + priv = this->private; + local = frame->local; + count = call_count = local->call_count; + shard_idx_iter = local->first_block; + lookup_count = local->last_block - local->create_count; + local->pls_fop_handler = handler; + if (local->lookup_shards_barriered) + local->barrier.waitfor = local->call_count; + + if (inode) + gf_uuid_copy(gfid, inode->gfid); + else + gf_uuid_copy(gfid, local->base_gfid); + + while (shard_idx_iter <= lookup_count) { + if (local->inode_list[i]) { + i++; + shard_idx_iter++; + continue; + } + + if (wind_failed) { + shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, + this, -1, ENOMEM, NULL, NULL, NULL, + NULL); + goto next; + } + + shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path)); + + bname = strrchr(path, '/') + 1; + loc.inode = inode_new(this->itable); + loc.parent = inode_ref(priv->dot_shard_inode); + gf_uuid_copy(loc.pargfid, priv->dot_shard_gfid); + ret = inode_path(loc.parent, bname, (char **)&(loc.path)); + if (ret < 0 || !(loc.inode)) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, + "Inode path failed" + " on %s, base file gfid = %s", + bname, uuid_utoa(gfid)); + local->op_ret = -1; + local->op_errno = ENOMEM; + loc_wipe(&loc); + wind_failed = _gf_true; + shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, + this, -1, ENOMEM, NULL, NULL, NULL, + NULL); + goto next; + } + + loc.name = strrchr(loc.path, '/'); + if (loc.name) + loc.name++; + + xattr_req = shard_create_gfid_dict(local->xattr_req); + if (!xattr_req) { + local->op_ret = -1; + local->op_errno = ENOMEM; + wind_failed = _gf_true; + loc_wipe(&loc); + shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, + this, -1, ENOMEM, NULL, NULL, NULL, + NULL); + goto next; + } + + STACK_WIND_COOKIE(frame, shard_common_lookup_shards_cbk, + (void *)(long)shard_idx_iter, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, &loc, xattr_req); + loc_wipe(&loc); + dict_unref(xattr_req); + next: + shard_idx_iter++; + i++; + + if (!--call_count) + break; + } + if (local->lookup_shards_barriered) { + syncbarrier_wait(&local->barrier, count); + local->pls_fop_handler(frame, this); + } + return 0; +} - if (local->op_ret < 0) { - if (local->op_errno == ENOENT) { - /* If lookup on /.shard fails with ENOENT, it means that - * the file was 0-byte in size but truncated sometime in - * the past to a higher size which is reflected in the - * size xattr, and now being truncated to a lower size. - * In this case, the only thing that needs to be done is - * to update the size xattr of the file and unwind. - */ - local->first_block = local->last_block = 0; - local->num_blocks = 1; - local->call_count = 0; - local->op_ret = 0; - local->postbuf.ia_size = local->offset; - shard_update_file_size (frame, this, local->fd, - &local->loc, - shard_post_update_size_truncate_handler); - return 0; - } else { - if (local->fop == GF_FOP_TRUNCATE) - SHARD_STACK_UNWIND (truncate, frame, - local->op_ret, - local->op_errno, NULL, NULL, - NULL); - else - SHARD_STACK_UNWIND (ftruncate, frame, - local->op_ret, - local->op_errno, NULL, NULL, - NULL); - return 0; - } +int +shard_post_resolve_truncate_handler(call_frame_t *frame, xlator_t *this) +{ + shard_local_t *local = NULL; + + local = frame->local; + + if (local->op_ret < 0) { + if (local->op_errno == ENOENT) { + /* If lookup on /.shard fails with ENOENT, it means that + * the file was 0-byte in size but truncated sometime in + * the past to a higher size which is reflected in the + * size xattr, and now being truncated to a lower size. + * In this case, the only thing that needs to be done is + * to update the size xattr of the file and unwind. + */ + local->first_block = local->last_block = 0; + local->num_blocks = 1; + local->call_count = 0; + local->op_ret = 0; + local->postbuf.ia_size = local->offset; + shard_update_file_size(frame, this, local->fd, &local->loc, + shard_post_update_size_truncate_handler); + return 0; + } else { + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); + return 0; } + } - if (!local->call_count) - shard_truncate_do (frame, this); - else - shard_common_lookup_shards (frame, this, local->loc.inode, - shard_post_lookup_shards_truncate_handler); + if (!local->call_count) + shard_truncate_do(frame, this); + else + shard_common_lookup_shards(frame, this, local->loc.inode, + shard_post_lookup_shards_truncate_handler); - return 0; + return 0; } int -shard_truncate_begin (call_frame_t *frame, xlator_t *this) -{ - int ret = 0; - shard_local_t *local = NULL; - shard_priv_t *priv = NULL; - - priv = this->private; - local = frame->local; - - /* First participant block here is the lowest numbered block that would - * hold the last byte of the file post successful truncation. - * Last participant block is the block that contains the last byte in - * the current state of the file. - * If (first block == last_block): - * then that means that the file only needs truncation of the - * first (or last since both are same) block. - * Else - * if (new_size % block_size == 0) - * then that means there is no truncate to be done with - * only shards from first_block + 1 through the last - * block needing to be unlinked. - * else - * both truncate of the first block and unlink of the - * remaining shards until end of file is required. - */ - local->first_block = (local->offset == 0) ? 0 - : get_lowest_block (local->offset - 1, - local->block_size); - local->last_block = get_highest_block (0, local->prebuf.ia_size, - local->block_size); - - local->num_blocks = local->last_block - local->first_block + 1; - - if ((local->first_block == 0) && (local->num_blocks == 1)) { - if (local->fop == GF_FOP_TRUNCATE) - STACK_WIND (frame, shard_truncate_last_shard_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->truncate, - &local->loc, local->offset, - local->xattr_req); - else - STACK_WIND (frame, shard_truncate_last_shard_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->ftruncate, - local->fd, local->offset, local->xattr_req); - return 0; - } +shard_truncate_begin(call_frame_t *frame, xlator_t *this) +{ + int ret = 0; + shard_local_t *local = NULL; + shard_priv_t *priv = NULL; + + priv = this->private; + local = frame->local; + + /* First participant block here is the lowest numbered block that would + * hold the last byte of the file post successful truncation. + * Last participant block is the block that contains the last byte in + * the current state of the file. + * If (first block == last_block): + * then that means that the file only needs truncation of the + * first (or last since both are same) block. + * Else + * if (new_size % block_size == 0) + * then that means there is no truncate to be done with + * only shards from first_block + 1 through the last + * block needing to be unlinked. + * else + * both truncate of the first block and unlink of the + * remaining shards until end of file is required. + */ + local->first_block = (local->offset == 0) + ? 0 + : get_lowest_block(local->offset - 1, + local->block_size); + local->last_block = get_highest_block(0, local->prebuf.ia_size, + local->block_size); + + local->num_blocks = local->last_block - local->first_block + 1; + GF_ASSERT(local->num_blocks > 0); + local->resolver_base_inode = (local->fop == GF_FOP_TRUNCATE) + ? local->loc.inode + : local->fd->inode; + + if ((local->first_block == 0) && (local->num_blocks == 1)) { + if (local->fop == GF_FOP_TRUNCATE) + STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, &local->loc, + local->offset, local->xattr_req); + else + STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ftruncate, local->fd, + local->offset, local->xattr_req); + return 0; + } - local->inode_list = GF_CALLOC (local->num_blocks, sizeof (inode_t *), - gf_shard_mt_inode_list); - if (!local->inode_list) - goto err; + local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *), + gf_shard_mt_inode_list); + if (!local->inode_list) + goto err; - local->dot_shard_loc.inode = inode_find (this->itable, - priv->dot_shard_gfid); - if (!local->dot_shard_loc.inode) { - ret = shard_init_dot_shard_loc (this, local); - if (ret) - goto err; - shard_lookup_dot_shard (frame, this, - shard_post_resolve_truncate_handler); - } else { - shard_common_resolve_shards (frame, this, - (local->fop == GF_FOP_TRUNCATE) ? - local->loc.inode : - local->fd->inode, - shard_post_resolve_truncate_handler); - } - return 0; + local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid); + if (!local->dot_shard_loc.inode) { + ret = shard_init_internal_dir_loc(this, local, + SHARD_INTERNAL_DIR_DOT_SHARD); + if (ret) + goto err; + shard_lookup_internal_dir(frame, this, + shard_post_resolve_truncate_handler, + SHARD_INTERNAL_DIR_DOT_SHARD); + } else { + local->post_res_handler = shard_post_resolve_truncate_handler; + shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD); + } + return 0; err: - if (local->fop == GF_FOP_TRUNCATE) - SHARD_STACK_UNWIND (truncate, frame, -1, ENOMEM, NULL, NULL, - NULL); - else - SHARD_STACK_UNWIND (ftruncate, frame, -1, ENOMEM, NULL, NULL, - NULL); - - return 0; + shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); + return 0; } int -shard_post_lookup_truncate_handler (call_frame_t *frame, xlator_t *this) +shard_post_lookup_truncate_handler(call_frame_t *frame, xlator_t *this) { - shard_local_t *local = NULL; - - local = frame->local; + shard_local_t *local = NULL; + struct iatt tmp_stbuf = { + 0, + }; - if (local->op_ret < 0) { - if (local->fop == GF_FOP_TRUNCATE) - SHARD_STACK_UNWIND (truncate, frame, local->op_ret, - local->op_errno, NULL, NULL, NULL); - else - SHARD_STACK_UNWIND (ftruncate, frame, local->op_ret, - local->op_errno, NULL, NULL, NULL); + local = frame->local; - return 0; - } + if (local->op_ret < 0) { + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); + return 0; + } - local->postbuf = local->prebuf; + local->postbuf = tmp_stbuf = local->prebuf; - if (local->prebuf.ia_size == local->offset) { - /* If the file size is same as requested size, unwind the call - * immediately. - */ - if (local->fop == GF_FOP_TRUNCATE) - SHARD_STACK_UNWIND (truncate, frame, 0, 0, - &local->prebuf, &local->postbuf, - NULL); - else - SHARD_STACK_UNWIND (ftruncate, frame, 0, 0, - &local->prebuf, &local->postbuf, - NULL); - } else if (local->offset > local->prebuf.ia_size) { - /* If the truncate is from a lower to a higher size, set the - * new size xattr and unwind. - */ - local->hole_size = local->offset - local->prebuf.ia_size; - local->delta_size = 0; - local->delta_blocks = 0; - local->postbuf.ia_size = local->offset; - shard_update_file_size (frame, this, NULL, &local->loc, - shard_post_update_size_truncate_handler); - } else { - /* ... else - * i. unlink all shards that need to be unlinked. - * ii. truncate the last of the shards. - * iii. update the new size using setxattr. - * and unwind the fop. - */ - local->hole_size = 0; - local->delta_size = (local->offset - local->prebuf.ia_size); - local->delta_blocks = 0; - shard_truncate_begin (frame, this); - } - return 0; + if (local->prebuf.ia_size == local->offset) { + /* If the file size is same as requested size, unwind the call + * immediately. + */ + if (local->fop == GF_FOP_TRUNCATE) + SHARD_STACK_UNWIND(truncate, frame, 0, 0, &local->prebuf, + &local->postbuf, NULL); + else + SHARD_STACK_UNWIND(ftruncate, frame, 0, 0, &local->prebuf, + &local->postbuf, NULL); + } else if (local->offset > local->prebuf.ia_size) { + /* If the truncate is from a lower to a higher size, set the + * new size xattr and unwind. + */ + local->hole_size = local->offset - local->prebuf.ia_size; + local->delta_size = 0; + GF_ATOMIC_INIT(local->delta_blocks, 0); + local->postbuf.ia_size = local->offset; + tmp_stbuf.ia_size = local->offset; + shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0, + SHARD_INODE_WRITE_MASK); + shard_update_file_size(frame, this, NULL, &local->loc, + shard_post_update_size_truncate_handler); + } else { + /* ... else + * i. unlink all shards that need to be unlinked. + * ii. truncate the last of the shards. + * iii. update the new size using setxattr. + * and unwind the fop. + */ + local->hole_size = 0; + local->delta_size = (local->offset - local->prebuf.ia_size); + GF_ATOMIC_INIT(local->delta_blocks, 0); + tmp_stbuf.ia_size = local->offset; + shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0, + SHARD_INODE_WRITE_MASK); + shard_truncate_begin(frame, this); + } + return 0; } /* TO-DO: @@ -1855,2808 +2754,4629 @@ shard_post_lookup_truncate_handler (call_frame_t *frame, xlator_t *this) */ int -shard_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, +shard_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) +{ + int ret = -1; + uint64_t block_size = 0; + shard_local_t *local = NULL; + + ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, + "Failed to get block " + "size from inode ctx of %s", + uuid_utoa(loc->inode->gfid)); + goto err; + } + + if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { + STACK_WIND(frame, default_truncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); + return 0; + } + + if (!this->itable) + this->itable = loc->inode->table; + + local = mem_get0(this->local_pool); + if (!local) + goto err; + + frame->local = local; + + ret = syncbarrier_init(&local->barrier); + if (ret) + goto err; + loc_copy(&local->loc, loc); + local->offset = offset; + local->block_size = block_size; + local->fop = GF_FOP_TRUNCATE; + local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); + if (!local->xattr_req) + goto err; + local->resolver_base_inode = loc->inode; + GF_ATOMIC_INIT(local->delta_blocks, 0); + + shard_refresh_base_file(frame, this, &local->loc, NULL, + shard_post_lookup_truncate_handler); + return 0; + +err: + shard_common_failure_unwind(GF_FOP_TRUNCATE, frame, -1, ENOMEM); + return 0; +} + +int +shard_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, dict_t *xdata) { - int ret = -1; - uint64_t block_size = 0; - shard_local_t *local = NULL; - - ret = shard_inode_ctx_get_block_size (loc->inode, this, &block_size); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - SHARD_MSG_INODE_CTX_GET_FAILED, "Failed to get block " - "size from inode ctx of %s", - uuid_utoa (loc->inode->gfid)); - goto err; - } + int ret = -1; + uint64_t block_size = 0; + shard_local_t *local = NULL; + + ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, + "Failed to get block " + "size from inode ctx of %s", + uuid_utoa(fd->inode->gfid)); + goto err; + } + + if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { + STACK_WIND(frame, default_ftruncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); + return 0; + } + + if (!this->itable) + this->itable = fd->inode->table; + + local = mem_get0(this->local_pool); + if (!local) + goto err; + + frame->local = local; + ret = syncbarrier_init(&local->barrier); + if (ret) + goto err; + local->fd = fd_ref(fd); + local->offset = offset; + local->block_size = block_size; + local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); + if (!local->xattr_req) + goto err; + local->fop = GF_FOP_FTRUNCATE; + + local->loc.inode = inode_ref(fd->inode); + gf_uuid_copy(local->loc.gfid, fd->inode->gfid); + local->resolver_base_inode = fd->inode; + GF_ATOMIC_INIT(local->delta_blocks, 0); + + shard_refresh_base_file(frame, this, NULL, fd, + shard_post_lookup_truncate_handler); + return 0; +err: + shard_common_failure_unwind(GF_FOP_FTRUNCATE, frame, -1, ENOMEM); + return 0; +} - if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { - STACK_WIND (frame, default_truncate_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->truncate, loc, offset, - xdata); - return 0; - } +int +shard_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + int ret = -1; + shard_local_t *local = NULL; - if (!this->itable) - this->itable = loc->inode->table; + local = frame->local; - local = mem_get0 (this->local_pool); - if (!local) - goto err; + if (op_ret == -1) + goto unwind; - frame->local = local; + ret = shard_inode_ctx_set(inode, this, buf, local->block_size, + SHARD_ALL_MASK); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INODE_CTX_SET_FAILED, + "Failed to set inode " + "ctx for %s", + uuid_utoa(inode->gfid)); - loc_copy (&local->loc, loc); - local->offset = offset; - local->block_size = block_size; - local->fop = GF_FOP_TRUNCATE; - local->xattr_req = (xdata) ? dict_ref (xdata) : dict_new (); - if (!local->xattr_req) - goto err; +unwind: + SHARD_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, buf, preparent, + postparent, xdata); - shard_lookup_base_file (frame, this, &local->loc, - shard_post_lookup_truncate_handler); - return 0; + return 0; +} +int +shard_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + dev_t rdev, mode_t umask, dict_t *xdata) +{ + shard_priv_t *priv = NULL; + shard_local_t *local = NULL; + + priv = this->private; + local = mem_get0(this->local_pool); + if (!local) + goto err; + + frame->local = local; + local->block_size = priv->block_size; + if (!__is_gsyncd_on_shard_dir(frame, loc)) { + SHARD_INODE_CREATE_INIT(this, local->block_size, xdata, loc, 0, 0, err); + } + + STACK_WIND(frame, shard_mknod_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata); + return 0; err: - SHARD_STACK_UNWIND (truncate, frame, -1, ENOMEM, NULL, NULL, NULL); - return 0; + shard_common_failure_unwind(GF_FOP_MKNOD, frame, -1, ENOMEM); + return 0; } -int -shard_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, - dict_t *xdata) +int32_t +shard_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - int ret = -1; - uint64_t block_size = 0; - shard_local_t *local = NULL; - - ret = shard_inode_ctx_get_block_size (fd->inode, this, &block_size); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - SHARD_MSG_INODE_CTX_GET_FAILED, "Failed to get block " - "size from inode ctx of %s", - uuid_utoa (fd->inode->gfid)); - goto err; - } - - if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { - STACK_WIND (frame, default_ftruncate_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->ftruncate, fd, offset, - xdata); - return 0; - } - - if (!this->itable) - this->itable = fd->inode->table; - - local = mem_get0 (this->local_pool); - if (!local) - goto err; + shard_local_t *local = NULL; - frame->local = local; - local->fd = fd_ref (fd); - local->offset = offset; - local->block_size = block_size; - local->xattr_req = (xdata) ? dict_ref (xdata) : dict_new (); - if (!local->xattr_req) - goto err; - local->fop = GF_FOP_FTRUNCATE; + local = frame->local; + if (op_ret < 0) + goto err; - local->loc.inode = inode_ref (fd->inode); - gf_uuid_copy (local->loc.gfid, fd->inode->gfid); + shard_inode_ctx_set(inode, this, buf, 0, + SHARD_MASK_NLINK | SHARD_MASK_TIMES); + buf->ia_size = local->prebuf.ia_size; + buf->ia_blocks = local->prebuf.ia_blocks; - shard_lookup_base_file (frame, this, &local->loc, - shard_post_lookup_truncate_handler); - return 0; + SHARD_STACK_UNWIND(link, frame, op_ret, op_errno, inode, buf, preparent, + postparent, xdata); + return 0; err: - - SHARD_STACK_UNWIND (ftruncate, frame, -1, ENOMEM, NULL, NULL, NULL); - return 0; + shard_common_failure_unwind(GF_FOP_LINK, frame, op_ret, op_errno); + return 0; } int -shard_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +shard_post_lookup_link_handler(call_frame_t *frame, xlator_t *this) { - int ret = -1; - shard_local_t *local = NULL; + shard_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (op_ret == -1) - goto unwind; - - ret = shard_inode_ctx_set (inode, this, buf, ntoh64 (local->block_size), - SHARD_ALL_MASK); - if (ret) - gf_msg (this->name, GF_LOG_WARNING, 0, - SHARD_MSG_INODE_CTX_SET_FAILED, "Failed to set inode " - "ctx for %s", uuid_utoa (inode->gfid)); + if (local->op_ret < 0) { + SHARD_STACK_UNWIND(link, frame, local->op_ret, local->op_errno, NULL, + NULL, NULL, NULL, NULL); + return 0; + } -unwind: - SHARD_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode, buf, - preparent, postparent, xdata); + STACK_WIND(frame, shard_link_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->link, &local->loc, &local->loc2, + local->xattr_req); + return 0; +} - return 0; +int32_t +shard_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) +{ + int ret = -1; + uint64_t block_size = 0; + shard_local_t *local = NULL; + + ret = shard_inode_ctx_get_block_size(oldloc->inode, this, &block_size); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, + "Failed to get block " + "size from inode ctx of %s", + uuid_utoa(oldloc->inode->gfid)); + goto err; + } + + if (!block_size) { + STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->link, + oldloc, newloc, xdata); + return 0; + } + + if (!this->itable) + this->itable = oldloc->inode->table; + + local = mem_get0(this->local_pool); + if (!local) + goto err; + + frame->local = local; + + loc_copy(&local->loc, oldloc); + loc_copy(&local->loc2, newloc); + local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); + if (!local->xattr_req) + goto err; + + shard_refresh_base_file(frame, this, &local->loc, NULL, + shard_post_lookup_link_handler); + return 0; +err: + shard_common_failure_unwind(GF_FOP_LINK, frame, -1, ENOMEM); + return 0; } int -shard_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, - dev_t rdev, mode_t umask, dict_t *xdata) -{ - shard_local_t *local = NULL; +shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode); - local = mem_get0 (this->local_pool); - if (!local) - goto err; +int +shard_post_lookup_shards_unlink_handler(call_frame_t *frame, xlator_t *this) +{ + shard_local_t *local = NULL; + uuid_t gfid = { + 0, + }; - frame->local = local; - if (!__is_gsyncd_on_shard_dir (frame, loc)) { - SHARD_INODE_CREATE_INIT (this, local, xdata, loc, err); - } + local = frame->local; - STACK_WIND (frame, shard_mknod_cbk, FIRST_CHILD (this), - FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, - xdata); - return 0; + if (local->resolver_base_inode) + gf_uuid_copy(gfid, local->resolver_base_inode->gfid); + else + gf_uuid_copy(gfid, local->base_gfid); -err: - SHARD_STACK_UNWIND (mknod, frame, -1, ENOMEM, NULL, NULL, NULL, - NULL, NULL); + if ((local->op_ret < 0) && (local->op_errno != ENOENT)) { + gf_msg(this->name, GF_LOG_ERROR, local->op_errno, SHARD_MSG_FOP_FAILED, + "failed to delete shards of %s", uuid_utoa(gfid)); return 0; + } + local->op_ret = 0; + local->op_errno = 0; + shard_unlink_shards_do(frame, this, local->resolver_base_inode); + return 0; } -int32_t -shard_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, - dict_t *xdata) +int +shard_post_resolve_unlink_handler(call_frame_t *frame, xlator_t *this) { - if (op_ret < 0) - goto err; + shard_local_t *local = NULL; - shard_inode_ctx_set (inode, this, buf, 0, - SHARD_MASK_NLINK | SHARD_MASK_TIMES); + local = frame->local; + local->lookup_shards_barriered = _gf_true; - SHARD_STACK_UNWIND (link, frame, op_ret, op_errno, inode, buf, - preparent, postparent, xdata); - return 0; -err: - SHARD_STACK_UNWIND (link, frame, op_ret, op_errno, inode, NULL, NULL, - NULL, NULL); - return 0; + if (!local->call_count) + shard_unlink_shards_do(frame, this, local->resolver_base_inode); + else + shard_common_lookup_shards(frame, this, local->resolver_base_inode, + shard_post_lookup_shards_unlink_handler); + return 0; } -int32_t -shard_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, - dict_t *xdata) -{ - int ret = -1; - uint64_t block_size = 0; - - ret = shard_inode_ctx_get_block_size (oldloc->inode, this, &block_size); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - SHARD_MSG_INODE_CTX_GET_FAILED, "Failed to get block " - "size from inode ctx of %s", - uuid_utoa (oldloc->inode->gfid)); - goto err; - } +void +shard_unlink_block_inode(shard_local_t *local, int shard_block_num) +{ + char block_bname[256] = { + 0, + }; + uuid_t gfid = { + 0, + }; + inode_t *inode = NULL; + inode_t *base_inode = NULL; + xlator_t *this = NULL; + shard_priv_t *priv = NULL; + shard_inode_ctx_t *ctx = NULL; + shard_inode_ctx_t *base_ictx = NULL; + int unref_base_inode = 0; + int unref_shard_inode = 0; + + this = THIS; + priv = this->private; + + inode = local->inode_list[shard_block_num - local->first_block]; + shard_inode_ctx_get(inode, this, &ctx); + base_inode = ctx->base_inode; + if (base_inode) + gf_uuid_copy(gfid, base_inode->gfid); + else + gf_uuid_copy(gfid, ctx->base_gfid); + shard_make_block_bname(shard_block_num, gfid, block_bname, + sizeof(block_bname)); + + LOCK(&priv->lock); + if (base_inode) + LOCK(&base_inode->lock); + LOCK(&inode->lock); + { + __shard_inode_ctx_get(inode, this, &ctx); + if (!list_empty(&ctx->ilist)) { + list_del_init(&ctx->ilist); + priv->inode_count--; + unref_base_inode++; + unref_shard_inode++; + GF_ASSERT(priv->inode_count >= 0); + } + if (ctx->fsync_needed) { + unref_base_inode++; + unref_shard_inode++; + list_del_init(&ctx->to_fsync_list); + if (base_inode) { + __shard_inode_ctx_get(base_inode, this, &base_ictx); + base_ictx->fsync_count--; + } + } + } + UNLOCK(&inode->lock); + if (base_inode) + UNLOCK(&base_inode->lock); + + inode_unlink(inode, priv->dot_shard_inode, block_bname); + inode_ref_reduce_by_n(inode, unref_shard_inode); + inode_forget(inode, 0); + + if (base_inode && unref_base_inode) + inode_ref_reduce_by_n(base_inode, unref_base_inode); + UNLOCK(&priv->lock); +} - if (!block_size) { - STACK_WIND_TAIL (frame, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->link, oldloc, newloc, - xdata); - return 0; - } +int +shard_rename_cbk(call_frame_t *frame, xlator_t *this) +{ + shard_local_t *local = NULL; - STACK_WIND (frame, shard_link_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata); - return 0; + local = frame->local; -err: - SHARD_STACK_UNWIND (link, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, - NULL); - return 0; + SHARD_STACK_UNWIND(rename, frame, local->op_ret, local->op_errno, + &local->prebuf, &local->preoldparent, + &local->postoldparent, &local->prenewparent, + &local->postnewparent, local->xattr_rsp); + return 0; } -int -shard_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +int32_t +shard_unlink_cbk(call_frame_t *frame, xlator_t *this) { - SHARD_STACK_UNWIND (unlink, frame, op_ret, op_errno, preparent, - postparent, xdata); + shard_local_t *local = frame->local; - return 0; + SHARD_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno, + &local->preoldparent, &local->postoldparent, + local->xattr_rsp); + return 0; } int -shard_unlink_base_file (call_frame_t *frame, xlator_t *this) +shard_unlink_shards_do_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) { - shard_local_t *local = NULL; + int shard_block_num = (long)cookie; + shard_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (local->op_ret < 0) { - shard_unlink_cbk (frame, 0, this, local->op_ret, - local->op_errno, NULL, NULL, NULL); - return 0; - } + if (op_ret < 0) { + local->op_ret = op_ret; + local->op_errno = op_errno; + goto done; + } - STACK_WIND (frame, shard_unlink_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, &local->loc, local->xflag, - local->xattr_req); - return 0; + shard_unlink_block_inode(local, shard_block_num); +done: + syncbarrier_wake(&local->barrier); + return 0; } -void -shard_unlink_block_inode (shard_local_t *local, int shard_block_num) -{ - char block_bname[256] = {0,}; - inode_t *inode = NULL; - xlator_t *this = NULL; - shard_priv_t *priv = NULL; - shard_inode_ctx_t *ctx = NULL; - - this = THIS; - priv = this->private; +int +shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode) +{ + int i = 0; + int ret = -1; + int count = 0; + uint32_t cur_block = 0; + uint32_t cur_block_idx = 0; /*this is idx into inode_list[] array */ + char *bname = NULL; + char path[PATH_MAX] = { + 0, + }; + uuid_t gfid = { + 0, + }; + loc_t loc = { + 0, + }; + gf_boolean_t wind_failed = _gf_false; + shard_local_t *local = NULL; + shard_priv_t *priv = NULL; + + priv = this->private; + local = frame->local; + + if (inode) + gf_uuid_copy(gfid, inode->gfid); + else + gf_uuid_copy(gfid, local->base_gfid); + + for (i = 0; i < local->num_blocks; i++) { + if (!local->inode_list[i]) + continue; + count++; + } + + if (!count) { + /* callcount = 0 implies that all of the shards that need to be + * unlinked are non-existent (in other words the file is full of + * holes). + */ + gf_msg_debug(this->name, 0, + "All shards that need to be " + "unlinked are non-existent: %s", + uuid_utoa(gfid)); + return 0; + } - inode = local->inode_list[shard_block_num - local->first_block]; + SHARD_SET_ROOT_FS_ID(frame, local); + local->barrier.waitfor = count; + cur_block = cur_block_idx + local->first_block; - shard_make_block_bname (shard_block_num, (local->loc.inode)->gfid, - block_bname, sizeof (block_bname)); + while (cur_block_idx < local->num_blocks) { + if (!local->inode_list[cur_block_idx]) + goto next; - LOCK(&priv->lock); - { - shard_inode_ctx_get (inode, this, &ctx); - if (!list_empty (&ctx->ilist)) { - list_del_init (&ctx->ilist); - priv->inode_count--; - } - GF_ASSERT (priv->inode_count >= 0); - inode_unlink (inode, priv->dot_shard_inode, block_bname); - inode_forget (inode, 0); + if (wind_failed) { + shard_unlink_shards_do_cbk(frame, (void *)(long)cur_block, this, -1, + ENOMEM, NULL, NULL, NULL); + goto next; } - UNLOCK(&priv->lock); + shard_make_block_abspath(cur_block, gfid, path, sizeof(path)); + bname = strrchr(path, '/') + 1; + loc.parent = inode_ref(priv->dot_shard_inode); + ret = inode_path(loc.parent, bname, (char **)&(loc.path)); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, + "Inode path failed" + " on %s, base file gfid = %s", + bname, uuid_utoa(gfid)); + local->op_ret = -1; + local->op_errno = ENOMEM; + loc_wipe(&loc); + wind_failed = _gf_true; + shard_unlink_shards_do_cbk(frame, (void *)(long)cur_block, this, -1, + ENOMEM, NULL, NULL, NULL); + goto next; + } + + loc.name = strrchr(loc.path, '/'); + if (loc.name) + loc.name++; + loc.inode = inode_ref(local->inode_list[cur_block_idx]); + + STACK_WIND_COOKIE(frame, shard_unlink_shards_do_cbk, + (void *)(long)cur_block, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, &loc, local->xflag, + local->xattr_req); + loc_wipe(&loc); + next: + cur_block++; + cur_block_idx++; + } + syncbarrier_wait(&local->barrier, count); + SHARD_UNSET_ROOT_FS_ID(frame, local); + return 0; } int -shard_rename_cbk (call_frame_t *frame, xlator_t *this); +shard_regulated_shards_deletion(call_frame_t *cleanup_frame, xlator_t *this, + int now, int first_block, gf_dirent_t *entry) +{ + int i = 0; + int ret = 0; + shard_local_t *local = NULL; + uuid_t gfid = { + 0, + }; + + local = cleanup_frame->local; + + local->inode_list = GF_CALLOC(now, sizeof(inode_t *), + gf_shard_mt_inode_list); + if (!local->inode_list) + return -ENOMEM; + + local->first_block = first_block; + local->last_block = first_block + now - 1; + local->num_blocks = now; + gf_uuid_parse(entry->d_name, gfid); + gf_uuid_copy(local->base_gfid, gfid); + local->resolver_base_inode = inode_find(this->itable, gfid); + local->call_count = 0; + ret = syncbarrier_init(&local->barrier); + if (ret) { + GF_FREE(local->inode_list); + local->inode_list = NULL; + inode_unref(local->resolver_base_inode); + local->resolver_base_inode = NULL; + return -errno; + } + shard_common_resolve_shards(cleanup_frame, this, + shard_post_resolve_unlink_handler); + + for (i = 0; i < local->num_blocks; i++) { + if (local->inode_list[i]) + inode_unref(local->inode_list[i]); + } + GF_FREE(local->inode_list); + local->inode_list = NULL; + if (local->op_ret) + ret = -local->op_errno; + syncbarrier_destroy(&local->barrier); + inode_unref(local->resolver_base_inode); + local->resolver_base_inode = NULL; + STACK_RESET(cleanup_frame->root); + return ret; +} int -shard_unlink_shards_do_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *preparent, struct iatt *postparent, - dict_t *xdata) -{ - int call_count = 0; - int shard_block_num = (long) cookie; - shard_local_t *local = NULL; +__shard_delete_shards_of_entry(call_frame_t *cleanup_frame, xlator_t *this, + gf_dirent_t *entry, inode_t *inode) +{ + int ret = 0; + int shard_count = 0; + int first_block = 0; + int now = 0; + uint64_t size = 0; + uint64_t block_size = 0; + uint64_t size_array[4] = { + 0, + }; + void *bsize = NULL; + void *size_attr = NULL; + dict_t *xattr_rsp = NULL; + loc_t loc = { + 0, + }; + shard_local_t *local = NULL; + shard_priv_t *priv = NULL; + + priv = this->private; + local = cleanup_frame->local; + ret = dict_reset(local->xattr_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to reset dict"); + ret = -ENOMEM; + goto err; + } + + ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to set dict value: key:%s", GF_XATTR_SHARD_BLOCK_SIZE); + ret = -ENOMEM; + goto err; + } + + ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_FILE_SIZE, 8 * 4); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to set dict value: key:%s", GF_XATTR_SHARD_FILE_SIZE); + ret = -ENOMEM; + goto err; + } + + loc.inode = inode_ref(inode); + loc.parent = inode_ref(priv->dot_shard_rm_inode); + ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path)); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, + "Inode path failed on %s", entry->d_name); + ret = -ENOMEM; + goto err; + } + + loc.name = strrchr(loc.path, '/'); + if (loc.name) + loc.name++; + ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, local->xattr_req, + &xattr_rsp); + if (ret) + goto err; + + ret = dict_get_ptr(xattr_rsp, GF_XATTR_SHARD_BLOCK_SIZE, &bsize); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to get dict value: key:%s", GF_XATTR_SHARD_BLOCK_SIZE); + goto err; + } + block_size = ntoh64(*((uint64_t *)bsize)); + + ret = dict_get_ptr(xattr_rsp, GF_XATTR_SHARD_FILE_SIZE, &size_attr); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to get dict value: key:%s", GF_XATTR_SHARD_FILE_SIZE); + goto err; + } + + memcpy(size_array, size_attr, sizeof(size_array)); + size = ntoh64(size_array[0]); + + shard_count = (size / block_size) - 1; + if (shard_count < 0) { + gf_msg_debug(this->name, 0, + "Size of %s hasn't grown beyond " + "its shard-block-size. Nothing to delete. " + "Returning", + entry->d_name); + /* File size < shard-block-size, so nothing to delete */ + ret = 0; + goto delete_marker; + } + if ((size % block_size) > 0) + shard_count++; + + if (shard_count == 0) { + gf_msg_debug(this->name, 0, + "Size of %s is exactly equal to " + "its shard-block-size. Nothing to delete. " + "Returning", + entry->d_name); + ret = 0; + goto delete_marker; + } + gf_msg_debug(this->name, 0, + "base file = %s, " + "shard-block-size=%" PRIu64 ", file-size=%" PRIu64 + ", " + "shard_count=%d", + entry->d_name, block_size, size, shard_count); + + /* Perform a gfid-based lookup to see if gfid corresponding to marker + * file's base name exists. + */ + loc_wipe(&loc); + loc.inode = inode_new(this->itable); + if (!loc.inode) { + ret = -ENOMEM; + goto err; + } + gf_uuid_parse(entry->d_name, loc.gfid); + ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL); + if (!ret) { + gf_msg_debug(this->name, 0, + "Base shard corresponding to gfid " + "%s is present. Skipping shard deletion. " + "Returning", + entry->d_name); + ret = 0; + goto delete_marker; + } - local = frame->local; + first_block = 1; - if (op_ret < 0) { - local->op_ret = op_ret; - local->op_errno = op_errno; - goto done; + while (shard_count) { + if (shard_count < local->deletion_rate) { + now = shard_count; + shard_count = 0; + } else { + now = local->deletion_rate; + shard_count -= local->deletion_rate; } - shard_unlink_block_inode (local, shard_block_num); - -done: - call_count = shard_call_count_return (frame); - if (call_count == 0) { - SHARD_UNSET_ROOT_FS_ID (frame, local); - - if (local->fop == GF_FOP_UNLINK) - shard_unlink_base_file (frame, this); - else if (local->fop == GF_FOP_RENAME) - shard_rename_cbk (frame, this); - else - shard_truncate_last_shard (frame, this, - local->inode_list[0]); - } + gf_msg_debug(this->name, 0, + "deleting %d shards starting from " + "block %d of gfid %s", + now, first_block, entry->d_name); + ret = shard_regulated_shards_deletion(cleanup_frame, this, now, + first_block, entry); + if (ret) + goto err; + first_block += now; + } + +delete_marker: + loc_wipe(&loc); + loc.inode = inode_ref(inode); + loc.parent = inode_ref(priv->dot_shard_rm_inode); + ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path)); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, + "Inode path failed on %s", entry->d_name); + ret = -ENOMEM; + goto err; + } + loc.name = strrchr(loc.path, '/'); + if (loc.name) + loc.name++; + ret = syncop_unlink(FIRST_CHILD(this), &loc, NULL, NULL); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SHARDS_DELETION_FAILED, + "Failed to delete %s " + "from /%s", + entry->d_name, GF_SHARD_REMOVE_ME_DIR); +err: + if (xattr_rsp) + dict_unref(xattr_rsp); + loc_wipe(&loc); + return ret; +} - return 0; +int +shard_delete_shards_of_entry(call_frame_t *cleanup_frame, xlator_t *this, + gf_dirent_t *entry, inode_t *inode) +{ + int ret = -1; + loc_t loc = { + 0, + }; + shard_priv_t *priv = NULL; + + priv = this->private; + loc.inode = inode_ref(priv->dot_shard_rm_inode); + + ret = syncop_entrylk(FIRST_CHILD(this), this->name, &loc, entry->d_name, + ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL, NULL); + if (ret < 0) { + if (ret == -EAGAIN) { + ret = 0; + } + goto out; + } + { + ret = __shard_delete_shards_of_entry(cleanup_frame, this, entry, inode); + } + syncop_entrylk(FIRST_CHILD(this), this->name, &loc, entry->d_name, + ENTRYLK_UNLOCK, ENTRYLK_WRLCK, NULL, NULL); +out: + loc_wipe(&loc); + return ret; } int -shard_unlink_shards_do (call_frame_t *frame, xlator_t *this, inode_t *inode) +shard_delete_shards_cbk(int ret, call_frame_t *frame, void *data) { - int i = 0; - int ret = -1; - int count = 0; - int call_count = 0; - uint32_t last_block = 0; - uint32_t cur_block = 0; - char *bname = NULL; - char path[PATH_MAX] = {0,}; - loc_t loc = {0,}; - gf_boolean_t wind_failed = _gf_false; - shard_local_t *local = NULL; - shard_priv_t *priv = NULL; - - priv = this->private; - local = frame->local; - local->call_count = call_count = local->num_blocks - 1; - last_block = local->last_block; - - for (i = 1; i < local->num_blocks; i++) { - if (!local->inode_list[i]) - continue; - count++; - } + SHARD_STACK_DESTROY(frame); + return 0; +} - if (!count) { - /* callcount = 0 implies that all of the shards that need to be - * unlinked are non-existent (in other words the file is full of - * holes). So shard xlator would now proceed to do the final - * unlink on the base file. - */ - gf_msg_debug (this->name, 0, "All shards that need to be " - "unlinked are non-existent: %s", - uuid_utoa (inode->gfid)); - local->num_blocks = 1; - if (local->fop == GF_FOP_UNLINK) { - gf_msg_debug (this->name, 0, "Proceeding to unlink the" - " base file"); - STACK_WIND (frame, shard_unlink_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, - &local->loc, local->flags, - local->xattr_req); - } else if (local->fop == GF_FOP_RENAME) { - gf_msg_debug (this->name, 0, "Resuming rename()"); - shard_rename_cbk (frame, this); - } - return 0; +int +shard_resolve_internal_dir(xlator_t *this, shard_local_t *local, + shard_internal_dir_type_t type) +{ + int ret = 0; + char *bname = NULL; + loc_t *loc = NULL; + shard_priv_t *priv = NULL; + uuid_t gfid = { + 0, + }; + struct iatt stbuf = { + 0, + }; + + priv = this->private; + + switch (type) { + case SHARD_INTERNAL_DIR_DOT_SHARD: + loc = &local->dot_shard_loc; + gf_uuid_copy(gfid, priv->dot_shard_gfid); + bname = GF_SHARD_DIR; + break; + case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: + loc = &local->dot_shard_rm_loc; + gf_uuid_copy(gfid, priv->dot_shard_rm_gfid); + bname = GF_SHARD_REMOVE_ME_DIR; + break; + default: + break; + } + + loc->inode = inode_find(this->itable, gfid); + if (!loc->inode) { + ret = shard_init_internal_dir_loc(this, local, type); + if (ret) + goto err; + ret = dict_reset(local->xattr_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to reset " + "dict"); + ret = -ENOMEM; + goto err; + } + ret = dict_set_gfuuid(local->xattr_req, "gfid-req", gfid, true); + ret = syncop_lookup(FIRST_CHILD(this), loc, &stbuf, NULL, + local->xattr_req, NULL); + if (ret < 0) { + if (ret != -ENOENT) + gf_msg(this->name, GF_LOG_ERROR, -ret, + SHARD_MSG_SHARDS_DELETION_FAILED, + "Lookup on %s failed, exiting", bname); + goto err; + } else { + shard_link_internal_dir_inode(local, loc->inode, &stbuf, type); } + } + ret = 0; +err: + return ret; +} - local->call_count = call_count = count; - cur_block = 1; - SHARD_SET_ROOT_FS_ID (frame, local); +int +shard_lookup_marker_entry(xlator_t *this, shard_local_t *local, + gf_dirent_t *entry) +{ + int ret = 0; + loc_t loc = { + 0, + }; + + loc.inode = inode_new(this->itable); + if (!loc.inode) { + ret = -ENOMEM; + goto err; + } + loc.parent = inode_ref(local->fd->inode); + + ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path)); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, + "Inode path failed on %s", entry->d_name); + ret = -ENOMEM; + goto err; + } + + loc.name = strrchr(loc.path, '/'); + if (loc.name) + loc.name++; + + ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL); + if (ret < 0) { + goto err; + } + entry->inode = inode_ref(loc.inode); + ret = 0; +err: + loc_wipe(&loc); + return ret; +} - while (cur_block <= last_block) { - if (!local->inode_list[cur_block]) { - cur_block++; +int +shard_delete_shards(void *opaque) +{ + int ret = 0; + off_t offset = 0; + loc_t loc = { + 0, + }; + inode_t *link_inode = NULL; + xlator_t *this = NULL; + shard_priv_t *priv = NULL; + shard_local_t *local = NULL; + gf_dirent_t entries; + gf_dirent_t *entry = NULL; + call_frame_t *cleanup_frame = NULL; + gf_boolean_t done = _gf_false; + + this = THIS; + priv = this->private; + INIT_LIST_HEAD(&entries.list); + + cleanup_frame = opaque; + + local = mem_get0(this->local_pool); + if (!local) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, + "Failed to create local to " + "delete shards"); + ret = -ENOMEM; + goto err; + } + cleanup_frame->local = local; + local->fop = GF_FOP_UNLINK; + + local->xattr_req = dict_new(); + if (!local->xattr_req) { + ret = -ENOMEM; + goto err; + } + local->deletion_rate = priv->deletion_rate; + + ret = shard_resolve_internal_dir(this, local, SHARD_INTERNAL_DIR_DOT_SHARD); + if (ret == -ENOENT) { + gf_msg_debug(this->name, 0, + ".shard absent. Nothing to" + " delete. Exiting"); + ret = 0; + goto err; + } else if (ret < 0) { + goto err; + } + + ret = shard_resolve_internal_dir(this, local, + SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME); + if (ret == -ENOENT) { + gf_msg_debug(this->name, 0, + ".remove_me absent. " + "Nothing to delete. Exiting"); + ret = 0; + goto err; + } else if (ret < 0) { + goto err; + } + + local->fd = fd_anonymous(local->dot_shard_rm_loc.inode); + if (!local->fd) { + ret = -ENOMEM; + goto err; + } + + for (;;) { + offset = 0; + LOCK(&priv->lock); + { + if (priv->bg_del_state == SHARD_BG_DELETION_LAUNCHING) { + priv->bg_del_state = SHARD_BG_DELETION_IN_PROGRESS; + } else if (priv->bg_del_state == SHARD_BG_DELETION_IN_PROGRESS) { + priv->bg_del_state = SHARD_BG_DELETION_NONE; + done = _gf_true; + } + } + UNLOCK(&priv->lock); + if (done) + break; + while ( + (ret = syncop_readdirp(FIRST_CHILD(this), local->fd, 131072, offset, + &entries, local->xattr_req, NULL))) { + if (ret > 0) + ret = 0; + list_for_each_entry(entry, &entries.list, list) + { + offset = entry->d_off; + + if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) + continue; + + if (!entry->inode) { + ret = shard_lookup_marker_entry(this, local, entry); + if (ret < 0) continue; } - - if (wind_failed) { - shard_unlink_shards_do_cbk (frame, - (void *) (long) cur_block, - this, -1, ENOMEM, NULL, - NULL, NULL); - goto next; - } - - shard_make_block_abspath (cur_block, inode->gfid, path, - sizeof (path)); - bname = strrchr (path, '/') + 1; - loc.parent = inode_ref (priv->dot_shard_inode); - ret = inode_path (loc.parent, bname, (char **) &(loc.path)); - if (ret < 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - SHARD_MSG_INODE_PATH_FAILED, "Inode path failed" - " on %s, base file gfid = %s", bname, - uuid_utoa (inode->gfid)); - local->op_ret = -1; - local->op_errno = ENOMEM; - loc_wipe (&loc); - wind_failed = _gf_true; - shard_unlink_shards_do_cbk (frame, - (void *) (long) cur_block, - this, -1, ENOMEM, NULL, - NULL, NULL); - goto next; + link_inode = inode_link(entry->inode, local->fd->inode, + entry->d_name, &entry->d_stat); + + gf_msg_debug(this->name, 0, + "Initiating deletion of " + "shards of gfid %s", + entry->d_name); + ret = shard_delete_shards_of_entry(cleanup_frame, this, entry, + link_inode); + inode_unlink(link_inode, local->fd->inode, entry->d_name); + inode_unref(link_inode); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, + SHARD_MSG_SHARDS_DELETION_FAILED, + "Failed to clean up shards of gfid %s", + entry->d_name); + continue; } + gf_msg(this->name, GF_LOG_INFO, 0, + SHARD_MSG_SHARD_DELETION_COMPLETED, + "Deleted " + "shards of gfid=%s from backend", + entry->d_name); + } + gf_dirent_free(&entries); + if (ret) + break; + } + } + ret = 0; + loc_wipe(&loc); + return ret; - loc.name = strrchr (loc.path, '/'); - if (loc.name) - loc.name++; - loc.inode = inode_ref (local->inode_list[cur_block]); - - STACK_WIND_COOKIE (frame, shard_unlink_shards_do_cbk, - (void *) (long) cur_block, FIRST_CHILD(this), - FIRST_CHILD (this)->fops->unlink, &loc, - local->xflag, local->xattr_req); - loc_wipe (&loc); - -next: - cur_block++; - if (!--call_count) - break; - } - - return 0; +err: + LOCK(&priv->lock); + { + priv->bg_del_state = SHARD_BG_DELETION_NONE; + } + UNLOCK(&priv->lock); + loc_wipe(&loc); + return ret; } int -shard_post_lookup_shards_unlink_handler (call_frame_t *frame, xlator_t *this) +shard_unlock_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - shard_local_t *local = NULL; - - local = frame->local; - - if ((local->op_ret < 0) && (local->op_errno != ENOENT)) { - if (local->fop == GF_FOP_UNLINK) - SHARD_STACK_UNWIND (unlink, frame, local->op_ret, - local->op_errno, NULL, NULL, NULL); - else - SHARD_STACK_UNWIND (rename, frame, local->op_ret, - local->op_errno, NULL, NULL, NULL, - NULL, NULL, NULL); - return 0; - } - local->op_ret = 0; - local->op_errno = 0; - - shard_unlink_shards_do (frame, this, - (local->fop == GF_FOP_RENAME) - ? local->loc2.inode - : local->loc.inode); - return 0; + if (op_ret) + gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, + "Unlock failed. Please check brick logs for " + "more details"); + SHARD_STACK_DESTROY(frame); + return 0; } int -shard_post_resolve_unlink_handler (call_frame_t *frame, xlator_t *this) +shard_unlock_inodelk(call_frame_t *frame, xlator_t *this) { - shard_local_t *local = NULL; - - local = frame->local; + loc_t *loc = NULL; + call_frame_t *lk_frame = NULL; + shard_local_t *local = NULL; + shard_local_t *lk_local = NULL; + shard_inodelk_t *lock = NULL; - if (local->op_ret < 0) { - if (local->op_errno == ENOENT) { - /* If lookup on /.shard fails with ENOENT, it probably - * means that the file is being unlinked before it - * could grow beyond its first block. In this case, - * unlink boils down to unlinking the base file and - * unwinding the call. - */ - local->op_ret = 0; - local->first_block = local->last_block = 0; - local->num_blocks = 1; - if (local->fop == GF_FOP_UNLINK) - STACK_WIND (frame, shard_unlink_cbk, - FIRST_CHILD(this), - FIRST_CHILD (this)->fops->unlink, - &local->loc, local->xflag, - local->xattr_req); - else - shard_rename_cbk (frame, this); - return 0; - } else { - if (local->fop == GF_FOP_UNLINK) - SHARD_STACK_UNWIND (unlink, frame, - local->op_ret, - local->op_errno, NULL, NULL, - NULL); - else - shard_rename_cbk (frame, this); - return 0; - } - } + local = frame->local; + lk_frame = local->inodelk_frame; + lk_local = lk_frame->local; + local->inodelk_frame = NULL; + loc = &local->int_inodelk.loc; + lock = &lk_local->int_inodelk; + lock->flock.l_type = F_UNLCK; - if (!local->call_count) - shard_unlink_shards_do (frame, this, - (local->fop == GF_FOP_RENAME) - ? local->loc2.inode - : local->loc.inode); - else - shard_common_lookup_shards (frame, this, - (local->fop == GF_FOP_RENAME) - ? local->loc2.inode - : local->loc.inode, - shard_post_lookup_shards_unlink_handler); - return 0; + STACK_WIND(lk_frame, shard_unlock_inodelk_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->inodelk, lock->domain, loc, F_SETLK, + &lock->flock, NULL); + local->int_inodelk.acquired_lock = _gf_false; + return 0; } int -shard_post_lookup_unlink_handler (call_frame_t *frame, xlator_t *this) -{ - int ret = -1; - shard_priv_t *priv = NULL; - shard_local_t *local = NULL; - - priv = this->private; - local = frame->local; - - if (local->op_ret < 0) { - SHARD_STACK_UNWIND (unlink, frame, local->op_ret, - local->op_errno, NULL, NULL, NULL); - return 0; - } +shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + struct iatt *preoldparent, struct iatt *postoldparent, + struct iatt *prenewparent, struct iatt *postnewparent, + dict_t *xdata); +int +shard_rename_src_base_file(call_frame_t *frame, xlator_t *this) +{ + int ret = 0; + loc_t *dst_loc = NULL; + loc_t tmp_loc = { + 0, + }; + shard_local_t *local = frame->local; + + if (local->dst_block_size) { + tmp_loc.parent = inode_ref(local->loc2.parent); + ret = inode_path(tmp_loc.parent, local->loc2.name, + (char **)&tmp_loc.path); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, + "Inode path failed" + " on pargfid=%s bname=%s", + uuid_utoa(tmp_loc.parent->gfid), local->loc2.name); + local->op_ret = -1; + local->op_errno = ENOMEM; + goto err; + } + + tmp_loc.name = strrchr(tmp_loc.path, '/'); + if (tmp_loc.name) + tmp_loc.name++; + dst_loc = &tmp_loc; + } else { + dst_loc = &local->loc2; + } + + /* To-Do: Request open-fd count on dst base file */ + STACK_WIND(frame, shard_rename_src_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rename, &local->loc, dst_loc, + local->xattr_req); + loc_wipe(&tmp_loc); + return 0; +err: + loc_wipe(&tmp_loc); + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); + return 0; +} - local->first_block = get_lowest_block (0, local->block_size); - local->last_block = get_highest_block (0, local->prebuf.ia_size, - local->block_size); - local->num_blocks = local->last_block - local->first_block + 1; - - if ((local->num_blocks == 1) || (local->prebuf.ia_nlink > 1)) { - /* num_blocks = 1 implies that the file has not crossed its - * shard block size. So unlink boils down to unlinking just the - * base file. - * Because link() does not create links for all but the - * base shard, unlink() must delete these shards only when the - * link count is 1. - */ - STACK_WIND (frame, shard_unlink_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->unlink, &local->loc, - local->xflag, local->xattr_req); - return 0; - } +int +shard_unlink_base_file(call_frame_t *frame, xlator_t *this); - local->inode_list = GF_CALLOC (local->num_blocks, sizeof (inode_t *), - gf_shard_mt_inode_list); - if (!local->inode_list) - goto out; +int +shard_set_size_attrs_on_marker_file_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *dict, + dict_t *xdata) +{ + shard_priv_t *priv = NULL; + shard_local_t *local = NULL; + + priv = this->private; + local = frame->local; + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, + "Xattrop on marker file failed " + "while performing %s; entry gfid=%s", + gf_fop_string(local->fop), local->newloc.name); + goto err; + } + + inode_unlink(local->newloc.inode, priv->dot_shard_rm_inode, + local->newloc.name); + + if (local->fop == GF_FOP_UNLINK) + shard_unlink_base_file(frame, this); + else if (local->fop == GF_FOP_RENAME) + shard_rename_src_base_file(frame, this); + return 0; +err: + shard_common_failure_unwind(local->fop, frame, op_ret, op_errno); + return 0; +} - local->dot_shard_loc.inode = inode_find (this->itable, - priv->dot_shard_gfid); - if (!local->dot_shard_loc.inode) { - ret = shard_init_dot_shard_loc (this, local); - if (ret) - goto out; - shard_lookup_dot_shard (frame, this, - shard_post_resolve_unlink_handler); - } else { - shard_common_resolve_shards (frame, this, local->loc.inode, - shard_post_resolve_unlink_handler); - } - return 0; +int +shard_set_size_attrs_on_marker_file(call_frame_t *frame, xlator_t *this) +{ + int op_errno = ENOMEM; + uint64_t bs = 0; + dict_t *xdata = NULL; + shard_local_t *local = NULL; + + local = frame->local; + xdata = dict_new(); + if (!xdata) + goto err; + + if (local->fop == GF_FOP_UNLINK) + bs = local->block_size; + else if (local->fop == GF_FOP_RENAME) + bs = local->dst_block_size; + SHARD_INODE_CREATE_INIT(this, bs, xdata, &local->newloc, + local->prebuf.ia_size, 0, err); + STACK_WIND(frame, shard_set_size_attrs_on_marker_file_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->xattrop, + &local->newloc, GF_XATTROP_GET_AND_SET, xdata, NULL); + dict_unref(xdata); + return 0; +err: + if (xdata) + dict_unref(xdata); + shard_common_failure_unwind(local->fop, frame, -1, op_errno); + return 0; +} -out: - SHARD_STACK_UNWIND (unlink, frame, -1, ENOMEM, NULL, NULL, NULL); - return 0; +int +shard_lookup_marker_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, + struct iatt *postparent) +{ + inode_t *linked_inode = NULL; + shard_priv_t *priv = NULL; + shard_local_t *local = NULL; + + local = frame->local; + priv = this->private; + + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, + "Lookup on marker file failed " + "while performing %s; entry gfid=%s", + gf_fop_string(local->fop), local->newloc.name); + goto err; + } + + linked_inode = inode_link(inode, priv->dot_shard_rm_inode, + local->newloc.name, buf); + inode_unref(local->newloc.inode); + local->newloc.inode = linked_inode; + shard_set_size_attrs_on_marker_file(frame, this); + return 0; +err: + shard_common_failure_unwind(local->fop, frame, op_ret, op_errno); + return 0; } int -shard_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, - dict_t *xdata) +shard_lookup_marker_file(call_frame_t *frame, xlator_t *this) { - int ret = -1; - uint64_t block_size = 0; - shard_local_t *local = NULL; + int op_errno = ENOMEM; + dict_t *xattr_req = NULL; + shard_local_t *local = NULL; - ret = shard_inode_ctx_get_block_size (loc->inode, this, &block_size); - if ((ret) && (!IA_ISLNK(loc->inode->ia_type))) { - gf_msg (this->name, GF_LOG_ERROR, 0, - SHARD_MSG_INODE_CTX_GET_FAILED, "Failed to get block " - "size from inode ctx of %s", - uuid_utoa (loc->inode->gfid)); - goto err; - } + local = frame->local; - if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { - STACK_WIND (frame, default_unlink_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); - return 0; - } + xattr_req = shard_create_gfid_dict(local->xattr_req); + if (!xattr_req) + goto err; - local = mem_get0 (this->local_pool); - if (!local) - goto err; - - frame->local = local; + STACK_WIND(frame, shard_lookup_marker_file_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, &local->newloc, xattr_req); + dict_unref(xattr_req); + return 0; +err: + shard_common_failure_unwind(local->fop, frame, -1, op_errno); + return 0; +} - loc_copy (&local->loc, loc); - local->xflag = xflag; - local->xattr_req = (xdata) ? dict_ref (xdata) : dict_new (); - local->block_size = block_size; - local->fop = GF_FOP_UNLINK; - if (!this->itable) - this->itable = (local->loc.inode)->table; +int +shard_create_marker_file_under_remove_me_cbk( + call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, inode_t *inode, struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + inode_t *linked_inode = NULL; + shard_priv_t *priv = NULL; + shard_local_t *local = NULL; + + local = frame->local; + priv = this->private; + + SHARD_UNSET_ROOT_FS_ID(frame, local); + if (op_ret < 0) { + if ((op_errno != EEXIST) && (op_errno != ENODATA)) { + local->op_ret = op_ret; + local->op_errno = op_errno; + gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, + "Marker file creation " + "failed while performing %s; entry gfid=%s", + gf_fop_string(local->fop), local->newloc.name); + goto err; + } else { + shard_lookup_marker_file(frame, this); + return 0; + } + } - shard_lookup_base_file (frame, this, &local->loc, - shard_post_lookup_unlink_handler); + linked_inode = inode_link(inode, priv->dot_shard_rm_inode, + local->newloc.name, buf); + inode_unref(local->newloc.inode); + local->newloc.inode = linked_inode; - return 0; + if (local->fop == GF_FOP_UNLINK) + shard_unlink_base_file(frame, this); + else if (local->fop == GF_FOP_RENAME) + shard_rename_src_base_file(frame, this); + return 0; err: - SHARD_STACK_UNWIND (unlink, frame, -1, ENOMEM, NULL, NULL, NULL); - return 0; - + shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); + return 0; } int -shard_rename_cbk (call_frame_t *frame, xlator_t *this) -{ - shard_local_t *local = NULL; +shard_create_marker_file_under_remove_me(call_frame_t *frame, xlator_t *this, + loc_t *loc) +{ + int ret = 0; + int op_errno = ENOMEM; + uint64_t bs = 0; + char g1[64] = { + 0, + }; + char g2[64] = { + 0, + }; + dict_t *xattr_req = NULL; + shard_priv_t *priv = NULL; + shard_local_t *local = NULL; + + priv = this->private; + local = frame->local; + + SHARD_SET_ROOT_FS_ID(frame, local); + + xattr_req = shard_create_gfid_dict(local->xattr_req); + if (!xattr_req) + goto err; + + local->newloc.inode = inode_new(this->itable); + local->newloc.parent = inode_ref(priv->dot_shard_rm_inode); + ret = inode_path(local->newloc.parent, uuid_utoa(loc->inode->gfid), + (char **)&local->newloc.path); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, + "Inode path failed on " + "pargfid=%s bname=%s", + uuid_utoa_r(priv->dot_shard_rm_gfid, g1), + uuid_utoa_r(loc->inode->gfid, g2)); + goto err; + } + local->newloc.name = strrchr(local->newloc.path, '/'); + if (local->newloc.name) + local->newloc.name++; + + if (local->fop == GF_FOP_UNLINK) + bs = local->block_size; + else if (local->fop == GF_FOP_RENAME) + bs = local->dst_block_size; + + SHARD_INODE_CREATE_INIT(this, bs, xattr_req, &local->newloc, + local->prebuf.ia_size, 0, err); + + STACK_WIND(frame, shard_create_marker_file_under_remove_me_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod, + &local->newloc, 0, 0, 0644, xattr_req); + dict_unref(xattr_req); + return 0; - local = frame->local; - - SHARD_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno, - &local->prebuf, &local->preoldparent, - &local->postoldparent, &local->prenewparent, - &local->postnewparent, local->xattr_rsp); - return 0; +err: + if (xattr_req) + dict_unref(xattr_req); + shard_create_marker_file_under_remove_me_cbk(frame, 0, this, -1, op_errno, + NULL, NULL, NULL, NULL, NULL); + return 0; } int -shard_rename_unlink_dst_shards_do (call_frame_t *frame, xlator_t *this) +shard_unlock_entrylk(call_frame_t *frame, xlator_t *this); + +int +shard_unlink_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) { - int ret = -1; - shard_local_t *local = NULL; - shard_priv_t *priv = NULL; + int ret = 0; + shard_local_t *local = NULL; - local = frame->local; - priv = this->private; + local = frame->local; - local->first_block = get_lowest_block (0, local->dst_block_size); - local->last_block = get_highest_block (0, local->postbuf.ia_size, - local->dst_block_size); - local->num_blocks = local->last_block - local->first_block + 1; + if (op_ret < 0) { + local->op_ret = op_ret; + local->op_errno = op_errno; + } else { + shard_inode_ctx_set_refresh_flag(local->int_inodelk.loc.inode, this); + local->preoldparent = *preparent; + local->postoldparent = *postparent; + if (xdata) + local->xattr_rsp = dict_ref(xdata); + if (local->cleanup_required) + shard_start_background_deletion(this); + } - if ((local->num_blocks == 1) || (local->postbuf.ia_nlink > 1)) { - shard_rename_cbk (frame, this); - return 0; + if (local->entrylk_frame) { + ret = shard_unlock_entrylk(frame, this); + if (ret < 0) { + local->op_ret = -1; + local->op_errno = -ret; } + } - local->inode_list = GF_CALLOC (local->num_blocks, sizeof (inode_t *), - gf_shard_mt_inode_list); - if (!local->inode_list) - goto out; + ret = shard_unlock_inodelk(frame, this); + if (ret < 0) { + local->op_ret = -1; + local->op_errno = -ret; + } - local->dot_shard_loc.inode = inode_find (this->itable, - priv->dot_shard_gfid); - if (!local->dot_shard_loc.inode) { - ret = shard_init_dot_shard_loc (this, local); - if (ret) - goto out; - shard_lookup_dot_shard (frame, this, - shard_post_resolve_unlink_handler); - } else { - shard_common_resolve_shards (frame, this, local->loc2.inode, - shard_post_resolve_unlink_handler); - } + shard_unlink_cbk(frame, this); + return 0; +} - return 0; +int +shard_unlink_base_file(call_frame_t *frame, xlator_t *this) +{ + shard_local_t *local = frame->local; -out: - SHARD_STACK_UNWIND (rename, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, - NULL, NULL); - return 0; + /* To-Do: Request open-fd count on base file */ + STACK_WIND(frame, shard_unlink_base_file_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, &local->loc, local->xflag, + local->xattr_req); + return 0; } int -shard_post_rename_lookup_handler (call_frame_t *frame, xlator_t *this) +shard_unlock_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - shard_local_t *local = NULL; - - local = frame->local; + if (op_ret) + gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, + "Unlock failed. Please check brick logs for " + "more details"); + SHARD_STACK_DESTROY(frame); + return 0; +} - if (local->op_ret < 0) { - SHARD_STACK_UNWIND (rename, frame, local->op_ret, - local->op_errno, NULL, NULL, NULL, NULL, - NULL, NULL); - return 0; - } +int +shard_unlock_entrylk(call_frame_t *frame, xlator_t *this) +{ + loc_t *loc = NULL; + call_frame_t *lk_frame = NULL; + shard_local_t *local = NULL; + shard_local_t *lk_local = NULL; + shard_entrylk_t *lock = NULL; - if (local->dst_block_size) - shard_rename_unlink_dst_shards_do (frame, this); - else - shard_rename_cbk (frame, this); + local = frame->local; + lk_frame = local->entrylk_frame; + lk_local = lk_frame->local; + local->entrylk_frame = NULL; + lock = &lk_local->int_entrylk; + loc = &lock->loc; - return 0; + STACK_WIND(lk_frame, shard_unlock_entrylk_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->entrylk, this->name, loc, + lk_local->int_entrylk.basename, ENTRYLK_UNLOCK, ENTRYLK_WRLCK, + NULL); + local->int_entrylk.acquired_lock = _gf_false; + return 0; } int -shard_rename_src_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, - struct iatt *preoldparent, struct iatt *postoldparent, - struct iatt *prenewparent, struct iatt *postnewparent, - dict_t *xdata) +shard_post_entrylk_fop_handler(call_frame_t *frame, xlator_t *this) { - shard_local_t *local = NULL; + shard_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (op_ret < 0) { - local->op_ret = op_ret; - local->op_errno = op_errno; - goto err; - } - - local->prebuf = *buf; - local->preoldparent = *preoldparent; - local->postoldparent = *postoldparent; - local->prenewparent = *prenewparent; - local->postnewparent = *postnewparent; - if (xdata) - local->xattr_rsp = dict_ref (xdata); + switch (local->fop) { + case GF_FOP_UNLINK: + case GF_FOP_RENAME: + shard_create_marker_file_under_remove_me(frame, this, + &local->int_inodelk.loc); + break; + default: + gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, + "post-entrylk handler not defined. This case should not" + " be hit"); + break; + } + return 0; +} - /* Now the base file is looked up to gather the ia_size and ia_blocks.*/ +int +shard_acquire_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + call_frame_t *main_frame = NULL; + shard_local_t *local = NULL; + shard_local_t *main_local = NULL; - if (local->block_size) { - local->tmp_loc.inode = inode_new (this->itable); - gf_uuid_copy (local->tmp_loc.gfid, (local->loc.inode)->gfid); - shard_lookup_base_file (frame, this, &local->tmp_loc, - shard_post_rename_lookup_handler); - } else { - shard_rename_unlink_dst_shards_do (frame, this); - } + local = frame->local; + main_frame = local->main_frame; + main_local = main_frame->local; + if (local->op_ret < 0) { + shard_common_failure_unwind(main_local->fop, main_frame, op_ret, + op_errno); return 0; -err: - SHARD_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno, NULL, - NULL, NULL, NULL, NULL, NULL); - return 0; + } + main_local->int_entrylk.acquired_lock = _gf_true; + shard_post_entrylk_fop_handler(main_frame, this); + return 0; } int -shard_rename_src_base_file (call_frame_t *frame, xlator_t *this) -{ - shard_local_t *local = NULL; - - local = frame->local; +shard_acquire_entrylk(call_frame_t *frame, xlator_t *this, inode_t *inode, + uuid_t gfid) +{ + char gfid_str[GF_UUID_BUF_SIZE] = { + 0, + }; + shard_local_t *local = NULL; + shard_local_t *entrylk_local = NULL; + shard_entrylk_t *int_entrylk = NULL; + call_frame_t *entrylk_frame = NULL; + + local = frame->local; + entrylk_frame = create_frame(this, this->ctx->pool); + if (!entrylk_frame) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, + "Failed to create new frame " + "to lock marker file"); + goto err; + } + + entrylk_local = mem_get0(this->local_pool); + if (!entrylk_local) { + STACK_DESTROY(entrylk_frame->root); + goto err; + } + + entrylk_frame->local = entrylk_local; + entrylk_local->main_frame = frame; + int_entrylk = &entrylk_local->int_entrylk; + + int_entrylk->loc.inode = inode_ref(inode); + set_lk_owner_from_ptr(&entrylk_frame->root->lk_owner, entrylk_frame->root); + local->entrylk_frame = entrylk_frame; + gf_uuid_unparse(gfid, gfid_str); + int_entrylk->basename = gf_strdup(gfid_str); + + STACK_WIND(entrylk_frame, shard_acquire_entrylk_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->entrylk, this->name, &int_entrylk->loc, + int_entrylk->basename, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL); + return 0; +err: + shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); + return 0; +} - STACK_WIND (frame, shard_rename_src_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rename, &local->loc, &local->loc2, - local->xattr_req); - return 0; +int +shard_post_lookup_base_shard_rm_handler(call_frame_t *frame, xlator_t *this) +{ + shard_local_t *local = NULL; + shard_priv_t *priv = NULL; + + priv = this->private; + local = frame->local; + + if (local->op_ret < 0) { + shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); + return 0; + } + + if (local->prebuf.ia_nlink > 1) { + gf_msg_debug(this->name, 0, + "link count on %s > 1:%d, " + "performing rename()/unlink()", + local->int_inodelk.loc.path, local->prebuf.ia_nlink); + if (local->fop == GF_FOP_RENAME) + shard_rename_src_base_file(frame, this); + else if (local->fop == GF_FOP_UNLINK) + shard_unlink_base_file(frame, this); + } else { + gf_msg_debug(this->name, 0, + "link count on %s = 1, creating " + "file under .remove_me", + local->int_inodelk.loc.path); + local->cleanup_required = _gf_true; + shard_acquire_entrylk(frame, this, priv->dot_shard_rm_inode, + local->prebuf.ia_gfid); + } + return 0; } int -shard_post_lookup_dst_base_file_handler (call_frame_t *frame, xlator_t *this) +shard_post_inodelk_fop_handler(call_frame_t *frame, xlator_t *this) { - shard_local_t *local = NULL; + shard_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (local->op_ret < 0) { - SHARD_STACK_UNWIND (rename, frame, local->op_ret, - local->op_errno, NULL, NULL, NULL, NULL, - NULL, NULL); - return 0; - } - - /* Save dst base file attributes into postbuf so the information is not - * lost when it is overwritten after lookup on base file of src in - * shard_lookup_base_file_cbk(). - */ - local->postbuf = local->prebuf; - shard_rename_src_base_file (frame, this); - return 0; + switch (local->fop) { + case GF_FOP_UNLINK: + case GF_FOP_RENAME: + shard_refresh_base_file(frame, this, &local->int_inodelk.loc, NULL, + shard_post_lookup_base_shard_rm_handler); + break; + default: + gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, + "post-inodelk handler not defined. This case should not" + " be hit"); + break; + } + return 0; } int -shard_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, - dict_t *xdata) +shard_acquire_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - int ret = -1; - uint64_t block_size = 0; - uint64_t dst_block_size = 0; - shard_local_t *local = NULL; - - if (IA_ISDIR (oldloc->inode->ia_type)) { - STACK_WIND (frame, default_rename_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rename, oldloc, newloc, - xdata); - return 0; - } + call_frame_t *main_frame = NULL; + shard_local_t *local = NULL; + shard_local_t *main_local = NULL; - ret = shard_inode_ctx_get_block_size (oldloc->inode, this, &block_size); - if ((ret) && (!IA_ISLNK (oldloc->inode->ia_type))) { - gf_msg (this->name, GF_LOG_ERROR, 0, - SHARD_MSG_INODE_CTX_GET_FAILED, "Failed to get block " - "size from inode ctx of %s", - uuid_utoa (oldloc->inode->gfid)); - goto err; - } + local = frame->local; + main_frame = local->main_frame; + main_local = main_frame->local; - if (newloc->inode) - ret = shard_inode_ctx_get_block_size (newloc->inode, this, - &dst_block_size); - /* The following stack_wind covers the case where: - * a. the src file is not sharded and dst doesn't exist, OR - * b. the src and dst both exist but are not sharded. - */ - if (((!block_size) && (!dst_block_size)) || - frame->root->pid == GF_CLIENT_PID_GSYNCD) { - STACK_WIND (frame, default_rename_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rename, oldloc, newloc, - xdata); - return 0; - } - - local = mem_get0 (this->local_pool); - if (!local) - goto err; - - frame->local = local; - loc_copy (&local->loc, oldloc); - loc_copy (&local->loc2, newloc); - local->fop = GF_FOP_RENAME; - local->xattr_req = (xdata) ? dict_ref (xdata) : dict_new(); - if (!local->xattr_req) - goto err; - - local->block_size = block_size; - local->dst_block_size = dst_block_size; - if (!this->itable) - this->itable = (local->loc.inode)->table; - - if (local->dst_block_size) - /* The if block covers the case where the dst file exists and is - * sharded. So it is important to look up this inode, record its - * size, before renaming src to dst, so as to NOT lose this - * information. - */ - shard_lookup_base_file (frame, this, &local->loc2, - shard_post_lookup_dst_base_file_handler); - else - /* The following block covers the case where the dst either - * doesn't exist or is NOT sharded. In this case, shard xlator - * would go ahead and rename src to dst. - */ - shard_rename_src_base_file (frame, this); + if (local->op_ret < 0) { + shard_common_failure_unwind(main_local->fop, main_frame, op_ret, + op_errno); return 0; + } + main_local->int_inodelk.acquired_lock = _gf_true; + shard_post_inodelk_fop_handler(main_frame, this); + return 0; +} +int +shard_acquire_inodelk(call_frame_t *frame, xlator_t *this, loc_t *loc) +{ + call_frame_t *lk_frame = NULL; + shard_local_t *local = NULL; + shard_local_t *lk_local = NULL; + shard_inodelk_t *int_inodelk = NULL; + + local = frame->local; + lk_frame = create_frame(this, this->ctx->pool); + if (!lk_frame) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, + "Failed to create new frame " + "to lock base shard"); + goto err; + } + lk_local = mem_get0(this->local_pool); + if (!lk_local) { + STACK_DESTROY(lk_frame->root); + goto err; + } + + lk_frame->local = lk_local; + lk_local->main_frame = frame; + int_inodelk = &lk_local->int_inodelk; + + int_inodelk->flock.l_len = 0; + int_inodelk->flock.l_start = 0; + int_inodelk->domain = this->name; + int_inodelk->flock.l_type = F_WRLCK; + loc_copy(&local->int_inodelk.loc, loc); + set_lk_owner_from_ptr(&lk_frame->root->lk_owner, lk_frame->root); + local->inodelk_frame = lk_frame; + + STACK_WIND(lk_frame, shard_acquire_inodelk_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->inodelk, int_inodelk->domain, + &local->int_inodelk.loc, F_SETLKW, &int_inodelk->flock, NULL); + return 0; err: - SHARD_STACK_UNWIND (rename, frame, -1, ENOMEM, NULL, NULL, NULL, - NULL, NULL, NULL); - return 0; - + shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); + return 0; } - int -shard_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, - struct iatt *stbuf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +shard_post_mkdir_rm_handler(call_frame_t *frame, xlator_t *this) { - int ret = -1; - shard_local_t *local = NULL; + loc_t *loc = NULL; + shard_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (op_ret == -1) - goto unwind; - - ret = shard_inode_ctx_set (inode, this, stbuf, - ntoh64 (local->block_size), SHARD_ALL_MASK); - if (ret) - gf_msg (this->name, GF_LOG_WARNING, 0, - SHARD_MSG_INODE_CTX_SET_FAILED, "Failed to set inode " - "ctx for %s", uuid_utoa (inode->gfid)); - -unwind: - SHARD_STACK_UNWIND (create, frame, op_ret, op_errno, fd, inode, stbuf, - preparent, postparent, xdata); + if (local->op_ret < 0) { + shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); return 0; + } + if (local->fop == GF_FOP_UNLINK) + loc = &local->loc; + else if (local->fop == GF_FOP_RENAME) + loc = &local->loc2; + shard_acquire_inodelk(frame, this, loc); + return 0; } int -shard_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) +shard_mkdir_internal_dir(call_frame_t *frame, xlator_t *this, + shard_post_resolve_fop_handler_t handler, + shard_internal_dir_type_t type); +int +shard_pre_mkdir_rm_handler(call_frame_t *frame, xlator_t *this) { - shard_local_t *local = NULL; + shard_local_t *local = NULL; - local = mem_get0 (this->local_pool); - if (!local) - goto err; + local = frame->local; - frame->local = local; - - if (!__is_gsyncd_on_shard_dir (frame, loc)) { - SHARD_INODE_CREATE_INIT (this, local, xdata, loc, err); - } - - STACK_WIND (frame, shard_create_cbk, FIRST_CHILD (this), - FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, - fd, xdata); + if (local->op_ret < 0) { + shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); return 0; + } + shard_mkdir_internal_dir(frame, this, shard_post_mkdir_rm_handler, + SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME); + return 0; +} -err: - SHARD_STACK_UNWIND (create, frame, -1, ENOMEM, NULL, NULL, NULL, - NULL, NULL, NULL); - return 0; +void +shard_begin_rm_resolution(call_frame_t *frame, xlator_t *this) +{ + shard_priv_t *priv = NULL; + shard_local_t *local = NULL; + priv = this->private; + local = frame->local; + + local->dot_shard_rm_loc.inode = inode_find(this->itable, + priv->dot_shard_rm_gfid); + if (!local->dot_shard_rm_loc.inode) { + local->dot_shard_loc.inode = inode_find(this->itable, + priv->dot_shard_gfid); + if (!local->dot_shard_loc.inode) { + shard_mkdir_internal_dir(frame, this, shard_pre_mkdir_rm_handler, + SHARD_INTERNAL_DIR_DOT_SHARD); + } else { + local->post_res_handler = shard_pre_mkdir_rm_handler; + shard_refresh_internal_dir(frame, this, + SHARD_INTERNAL_DIR_DOT_SHARD); + } + } else { + local->post_res_handler = shard_post_mkdir_rm_handler; + shard_refresh_internal_dir(frame, this, + SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME); + } } int -shard_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) +shard_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, + dict_t *xdata) { - /* To-Do: Handle open with O_TRUNC under locks */ - SHARD_STACK_UNWIND (open, frame, op_ret, op_errno, fd, xdata); - return 0; + int ret = -1; + uint64_t block_size = 0; + shard_local_t *local = NULL; + + ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); + if ((ret) && (!IA_ISLNK(loc->inode->ia_type))) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, + "Failed to get block " + "size from inode ctx of %s", + uuid_utoa(loc->inode->gfid)); + goto err; + } + + if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { + STACK_WIND(frame, default_unlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); + return 0; + } + + local = mem_get0(this->local_pool); + if (!local) + goto err; + + frame->local = local; + + loc_copy(&local->loc, loc); + local->xflag = xflag; + local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); + local->block_size = block_size; + local->resolver_base_inode = loc->inode; + local->fop = GF_FOP_UNLINK; + if (!this->itable) + this->itable = (local->loc.inode)->table; + + local->resolve_not = _gf_true; + shard_begin_rm_resolution(frame, this); + return 0; +err: + shard_common_failure_unwind(GF_FOP_UNLINK, frame, -1, ENOMEM); + return 0; } int -shard_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - fd_t *fd, dict_t *xdata) +shard_post_rename_lookup_handler(call_frame_t *frame, xlator_t *this) { - STACK_WIND (frame, shard_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); - return 0; + shard_rename_cbk(frame, this); + return 0; } int -shard_readv_do_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iovec *vector, - int32_t count, struct iatt *stbuf, struct iobref *iobref, - dict_t *xdata) +shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + struct iatt *preoldparent, struct iatt *postoldparent, + struct iatt *prenewparent, struct iatt *postnewparent, + dict_t *xdata) { - int i = 0; - int call_count = 0; - void *address = NULL; - uint64_t block_num = 0; - off_t off = 0; - struct iovec vec = {0,}; - shard_local_t *local = NULL; - fd_t *anon_fd = cookie; - - local = frame->local; + int ret = 0; + shard_local_t *local = NULL; - /* If shard has already seen a failure here before, there is no point - * in aggregating subsequent reads, so just go to out. - */ - if (local->op_ret < 0) - goto out; + local = frame->local; - if (op_ret < 0) { - local->op_ret = op_ret; - local->op_errno = op_errno; - goto out; + if (op_ret < 0) { + local->op_ret = op_ret; + local->op_errno = op_errno; + goto err; + } + /* Set ctx->refresh to TRUE to force a lookup on disk when + * shard_lookup_base_file() is called next to refresh the hard link + * count in ctx. Note that this is applicable only to the case where + * the rename dst is already existent and sharded. + */ + if ((local->dst_block_size) && (!local->cleanup_required)) + shard_inode_ctx_set_refresh_flag(local->int_inodelk.loc.inode, this); + + local->prebuf = *buf; + local->preoldparent = *preoldparent; + local->postoldparent = *postoldparent; + local->prenewparent = *prenewparent; + local->postnewparent = *postnewparent; + if (xdata) + local->xattr_rsp = dict_ref(xdata); + + if (local->dst_block_size) { + if (local->entrylk_frame) { + ret = shard_unlock_entrylk(frame, this); + if (ret < 0) { + local->op_ret = -1; + local->op_errno = -ret; + } } - if (local->op_ret >= 0) - local->op_ret += op_ret; - - fd_ctx_get (anon_fd, this, &block_num); - - if (block_num == local->first_block) { - address = local->iobuf->ptr; - } else { - /* else - * address to start writing to = beginning of buffer + - * number of bytes until end of first block + - * + block_size times number of blocks - * between the current block and the first - */ - address = (char *) local->iobuf->ptr + (local->block_size - - (local->offset % local->block_size)) + - ((block_num - local->first_block - 1) * - local->block_size); - } + ret = shard_unlock_inodelk(frame, this); + if (ret < 0) { + local->op_ret = -1; + local->op_errno = -ret; + goto err; + } + if (local->cleanup_required) + shard_start_background_deletion(this); + } + + /* Now the base file of src, if sharded, is looked up to gather ia_size + * and ia_blocks.*/ + if (local->block_size) { + local->tmp_loc.inode = inode_new(this->itable); + gf_uuid_copy(local->tmp_loc.gfid, (local->loc.inode)->gfid); + shard_refresh_base_file(frame, this, &local->tmp_loc, NULL, + shard_post_rename_lookup_handler); + } else { + shard_rename_cbk(frame, this); + } + return 0; +err: + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); + return 0; +} - for (i = 0; i < count; i++) { - address = (char *) address + off; - memcpy (address, vector[i].iov_base, vector[i].iov_len); - off += vector[i].iov_len; - } +int +shard_post_lookup_dst_base_file_handler(call_frame_t *frame, xlator_t *this) +{ + shard_local_t *local = NULL; -out: - if (anon_fd) - fd_unref (anon_fd); - call_count = shard_call_count_return (frame); - if (call_count == 0) { - SHARD_UNSET_ROOT_FS_ID (frame, local); - if (local->op_ret < 0) { - SHARD_STACK_UNWIND (readv, frame, local->op_ret, - local->op_errno, NULL, 0, NULL, - NULL, NULL); - } else { - if (xdata) - local->xattr_rsp = dict_ref (xdata); - vec.iov_base = local->iobuf->ptr; - vec.iov_len = local->total_size; - SHARD_STACK_UNWIND (readv, frame, local->total_size, - local->op_errno, &vec, 1, - &local->prebuf, local->iobref, - local->xattr_rsp); - return 0; - } - } + local = frame->local; + if (local->op_ret < 0) { + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); return 0; + } + + /* Save dst base file attributes into postbuf so the information is not + * lost when it is overwritten after lookup on base file of src in + * shard_lookup_base_file_cbk(). + */ + local->postbuf = local->prebuf; + shard_rename_src_base_file(frame, this); + return 0; } int -shard_readv_do (call_frame_t *frame, xlator_t *this) +shard_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) { - int i = 0; - int ret = 0; - int call_count = 0; - int last_block = 0; - int cur_block = 0; - off_t orig_offset = 0; - off_t shard_offset = 0; - size_t read_size = 0; - size_t remaining_size = 0; - fd_t *fd = NULL; - fd_t *anon_fd = NULL; - shard_local_t *local = NULL; - gf_boolean_t wind_failed = _gf_false; - - local = frame->local; - fd = local->fd; + int ret = -1; + uint64_t block_size = 0; + uint64_t dst_block_size = 0; + shard_local_t *local = NULL; + + if (IA_ISDIR(oldloc->inode->ia_type)) { + STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); + return 0; + } + + ret = shard_inode_ctx_get_block_size(oldloc->inode, this, &block_size); + if ((ret) && (!IA_ISLNK(oldloc->inode->ia_type))) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, + "Failed to get block " + "size from inode ctx of %s", + uuid_utoa(oldloc->inode->gfid)); + goto err; + } + + if (newloc->inode) + ret = shard_inode_ctx_get_block_size(newloc->inode, this, + &dst_block_size); + + /* The following stack_wind covers the case where: + * a. the src file is not sharded and dst doesn't exist, OR + * b. the src and dst both exist but are not sharded. + */ + if (((!block_size) && (!dst_block_size)) || + frame->root->pid == GF_CLIENT_PID_GSYNCD) { + STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); + return 0; + } + + local = mem_get0(this->local_pool); + if (!local) + goto err; + + frame->local = local; + loc_copy(&local->loc, oldloc); + loc_copy(&local->loc2, newloc); + local->resolver_base_inode = newloc->inode; + local->fop = GF_FOP_RENAME; + local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); + if (!local->xattr_req) + goto err; + + local->block_size = block_size; + local->dst_block_size = dst_block_size; + if (!this->itable) + this->itable = (local->loc.inode)->table; + local->resolve_not = _gf_true; + + /* The following if-block covers the case where the dst file exists + * and is sharded. + */ + if (local->dst_block_size) { + shard_begin_rm_resolution(frame, this); + } else { + /* The following block covers the case where the dst either doesn't + * exist or is NOT sharded but the src is sharded. In this case, shard + * xlator would go ahead and rename src to dst. Once done, it would also + * lookup the base shard of src to get the ia_size and ia_blocks xattr + * values. + */ + shard_rename_src_base_file(frame, this); + } + return 0; - orig_offset = local->offset; - cur_block = local->first_block; - last_block = local->last_block; - remaining_size = local->total_size; - local->call_count = call_count = local->num_blocks; +err: + shard_common_failure_unwind(GF_FOP_RENAME, frame, -1, ENOMEM); + return 0; +} - SHARD_SET_ROOT_FS_ID (frame, local); +int +shard_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, + struct iatt *stbuf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + int ret = -1; + shard_local_t *local = NULL; - while (cur_block <= last_block) { - if (wind_failed) { - shard_readv_do_cbk (frame, (void *) (long) 0, this, -1, - ENOMEM, NULL, 0, NULL, NULL, NULL); - goto next; - } + local = frame->local; - shard_offset = orig_offset % local->block_size; - read_size = local->block_size - shard_offset; - if (read_size > remaining_size) - read_size = remaining_size; - - remaining_size -= read_size; - - if (cur_block == 0) { - anon_fd = fd_ref (fd); - } else { - anon_fd = fd_anonymous (local->inode_list[i]); - if (!anon_fd) { - local->op_ret = -1; - local->op_errno = ENOMEM; - wind_failed = _gf_true; - shard_readv_do_cbk (frame, - (void *) (long) anon_fd, - this, -1, ENOMEM, NULL, 0, - NULL, NULL, NULL); - goto next; - } - } + if (op_ret == -1) + goto unwind; - ret = fd_ctx_set (anon_fd, this, cur_block); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - SHARD_MSG_FD_CTX_SET_FAILED, - "Failed to set fd ctx for block %d, gfid=%s", - cur_block, - uuid_utoa (local->inode_list[i]->gfid)); - local->op_ret = -1; - local->op_errno = ENOMEM; - wind_failed = _gf_true; - shard_readv_do_cbk (frame, (void *) (long) anon_fd, - this, -1, ENOMEM, NULL, 0, NULL, - NULL, NULL); - goto next; - } + ret = shard_inode_ctx_set(inode, this, stbuf, local->block_size, + SHARD_ALL_MASK); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INODE_CTX_SET_FAILED, + "Failed to set inode " + "ctx for %s", + uuid_utoa(inode->gfid)); - STACK_WIND_COOKIE (frame, shard_readv_do_cbk, anon_fd, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readv, anon_fd, - read_size, shard_offset, local->flags, - local->xattr_req); - - orig_offset += read_size; -next: - cur_block++; - i++; - call_count--; - } - return 0; +unwind: + SHARD_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, stbuf, + preparent, postparent, xdata); + return 0; } int -shard_post_lookup_shards_readv_handler (call_frame_t *frame, xlator_t *this) +shard_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) { - shard_local_t *local = NULL; + shard_priv_t *priv = NULL; + shard_local_t *local = NULL; - local = frame->local; + priv = this->private; + local = mem_get0(this->local_pool); + if (!local) + goto err; - if (local->op_ret < 0) { - SHARD_STACK_UNWIND (readv, frame, local->op_ret, - local->op_errno, NULL, 0, NULL, NULL, NULL); - return 0; - } + frame->local = local; + local->block_size = priv->block_size; - shard_readv_do (frame, this); + if (!__is_gsyncd_on_shard_dir(frame, loc)) { + SHARD_INODE_CREATE_INIT(this, local->block_size, xdata, loc, 0, 0, err); + } - return 0; + STACK_WIND(frame, shard_create_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd, + xdata); + return 0; +err: + shard_common_failure_unwind(GF_FOP_CREATE, frame, -1, ENOMEM); + return 0; } int -shard_post_mknod_readv_handler (call_frame_t *frame, xlator_t *this) +shard_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) { - shard_local_t *local = NULL; - - local = frame->local; - - if (local->op_ret < 0) { - SHARD_STACK_UNWIND (readv, frame, local->op_ret, - local->op_errno, NULL, 0, NULL, NULL, NULL); - return 0; - } + /* To-Do: Handle open with O_TRUNC under locks */ + SHARD_STACK_UNWIND(open, frame, op_ret, op_errno, fd, xdata); + return 0; +} - if (!local->eexist_count) { - shard_readv_do (frame, this); - } else { - local->call_count = local->eexist_count; - shard_common_lookup_shards (frame, this, local->loc.inode, - shard_post_lookup_shards_readv_handler); - } - return 0; +int +shard_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + fd_t *fd, dict_t *xdata) +{ + STACK_WIND(frame, shard_open_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); + return 0; } int -shard_common_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +shard_readv_do_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iovec *vector, + int32_t count, struct iatt *stbuf, struct iobref *iobref, + dict_t *xdata) { - int shard_block_num = (long) cookie; - int call_count = 0; - shard_local_t *local = NULL; + int i = 0; + int call_count = 0; + void *address = NULL; + uint64_t block_num = 0; + off_t off = 0; + struct iovec vec = { + 0, + }; + shard_local_t *local = NULL; + fd_t *anon_fd = cookie; + shard_inode_ctx_t *ctx = NULL; + + local = frame->local; + + /* If shard has already seen a failure here before, there is no point + * in aggregating subsequent reads, so just go to out. + */ + if (local->op_ret < 0) + goto out; + + if (op_ret < 0) { + local->op_ret = op_ret; + local->op_errno = op_errno; + goto out; + } - local = frame->local; + if (local->op_ret >= 0) + local->op_ret += op_ret; - if (op_ret < 0) { - if (op_errno == EEXIST) { - local->eexist_count++; - } else { - local->op_ret = op_ret; - local->op_errno = op_errno; - } - gf_msg_debug (this->name, 0, "mknod of shard %d " - "failed: %s", shard_block_num, strerror (op_errno)); - goto done; - } + shard_inode_ctx_get(anon_fd->inode, this, &ctx); + block_num = ctx->block_num; + + if (block_num == local->first_block) { + address = local->iobuf->ptr; + } else { + /* else + * address to start writing to = beginning of buffer + + * number of bytes until end of first block + + * + block_size times number of blocks + * between the current block and the first + */ + address = (char *)local->iobuf->ptr + + (local->block_size - (local->offset % local->block_size)) + + ((block_num - local->first_block - 1) * local->block_size); + } - shard_link_block_inode (local, shard_block_num, inode, buf); + for (i = 0; i < count; i++) { + address = (char *)address + off; + memcpy(address, vector[i].iov_base, vector[i].iov_len); + off += vector[i].iov_len; + } -done: - call_count = shard_call_count_return (frame); - if (call_count == 0) { - SHARD_UNSET_ROOT_FS_ID (frame, local); - local->post_mknod_handler (frame, this); +out: + if (anon_fd) + fd_unref(anon_fd); + call_count = shard_call_count_return(frame); + if (call_count == 0) { + SHARD_UNSET_ROOT_FS_ID(frame, local); + if (local->op_ret < 0) { + shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, + local->op_errno); + } else { + if (xdata) + local->xattr_rsp = dict_ref(xdata); + vec.iov_base = local->iobuf->ptr; + if (local->offset + local->req_size > local->prebuf.ia_size) + local->total_size = local->prebuf.ia_size - local->offset; + vec.iov_len = local->total_size; + local->op_ret = local->total_size; + SHARD_STACK_UNWIND(readv, frame, local->op_ret, local->op_errno, + &vec, 1, &local->prebuf, local->iobref, + local->xattr_rsp); + return 0; } + } - return 0; + return 0; } int -shard_common_resume_mknod (call_frame_t *frame, xlator_t *this, - shard_post_mknod_fop_handler_t post_mknod_handler) -{ - int i = 0; - int shard_idx_iter = 0; - int last_block = 0; - int ret = 0; - int call_count = 0; - char path[PATH_MAX] = {0,}; - mode_t mode = 0; - char *bname = NULL; - shard_priv_t *priv = NULL; - shard_inode_ctx_t ctx_tmp = {0,}; - shard_local_t *local = NULL; - gf_boolean_t wind_failed = _gf_false; - fd_t *fd = NULL; - loc_t loc = {0,}; - dict_t *xattr_req = NULL; - - local = frame->local; - priv = this->private; - fd = local->fd; - shard_idx_iter = local->first_block; - last_block = local->last_block; - call_count = local->call_count = local->create_count; - local->post_mknod_handler = post_mknod_handler; - - SHARD_SET_ROOT_FS_ID (frame, local); - - ret = shard_inode_ctx_get_all (fd->inode, this, &ctx_tmp); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - SHARD_MSG_INODE_CTX_GET_FAILED, "Failed to get inode " - "ctx for %s", uuid_utoa (fd->inode->gfid)); +shard_readv_do(call_frame_t *frame, xlator_t *this) +{ + int i = 0; + int call_count = 0; + int last_block = 0; + int cur_block = 0; + off_t orig_offset = 0; + off_t shard_offset = 0; + size_t read_size = 0; + size_t remaining_size = 0; + fd_t *fd = NULL; + fd_t *anon_fd = NULL; + shard_local_t *local = NULL; + gf_boolean_t wind_failed = _gf_false; + + local = frame->local; + fd = local->fd; + + orig_offset = local->offset; + cur_block = local->first_block; + last_block = local->last_block; + remaining_size = local->total_size; + local->call_count = call_count = local->num_blocks; + + SHARD_SET_ROOT_FS_ID(frame, local); + + if (fd->flags & O_DIRECT) + local->flags = O_DIRECT; + + while (cur_block <= last_block) { + if (wind_failed) { + shard_readv_do_cbk(frame, (void *)(long)0, this, -1, ENOMEM, NULL, + 0, NULL, NULL, NULL); + goto next; + } + + shard_offset = orig_offset % local->block_size; + read_size = local->block_size - shard_offset; + if (read_size > remaining_size) + read_size = remaining_size; + + remaining_size -= read_size; + + if (cur_block == 0) { + anon_fd = fd_ref(fd); + } else { + anon_fd = fd_anonymous(local->inode_list[i]); + if (!anon_fd) { local->op_ret = -1; local->op_errno = ENOMEM; - goto err; + wind_failed = _gf_true; + shard_readv_do_cbk(frame, (void *)(long)anon_fd, this, -1, + ENOMEM, NULL, 0, NULL, NULL, NULL); + goto next; + } } - mode = st_mode_from_ia (ctx_tmp.stat.ia_prot, ctx_tmp.stat.ia_type); - while (shard_idx_iter <= last_block) { - if (local->inode_list[i]) { - shard_idx_iter++; - i++; - continue; - } + STACK_WIND_COOKIE(frame, shard_readv_do_cbk, anon_fd, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readv, anon_fd, read_size, + shard_offset, local->flags, local->xattr_req); - if (wind_failed) { - shard_common_mknod_cbk (frame, - (void *) (long) shard_idx_iter, - this, -1, ENOMEM, NULL, NULL, - NULL, NULL, NULL); - goto next; - } + orig_offset += read_size; + next: + cur_block++; + i++; + call_count--; + } + return 0; +} - shard_make_block_abspath (shard_idx_iter, fd->inode->gfid, - path, sizeof(path)); +int +shard_common_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + int shard_block_num = (long)cookie; + int call_count = 0; + shard_local_t *local = NULL; + + local = frame->local; + + if (op_ret < 0) { + if (op_errno == EEXIST) { + LOCK(&frame->lock); + { + local->eexist_count++; + } + UNLOCK(&frame->lock); + } else { + local->op_ret = op_ret; + local->op_errno = op_errno; + } + gf_msg_debug(this->name, 0, + "mknod of shard %d " + "failed: %s", + shard_block_num, strerror(op_errno)); + goto done; + } - xattr_req = shard_create_gfid_dict (local->xattr_req); - if (!xattr_req) { - local->op_ret = -1; - local->op_errno = ENOMEM; - wind_failed = _gf_true; - shard_common_mknod_cbk (frame, - (void *) (long) shard_idx_iter, - this, -1, ENOMEM, NULL, NULL, - NULL, NULL, NULL); - goto next; - } + shard_link_block_inode(local, shard_block_num, inode, buf); - bname = strrchr (path, '/') + 1; - loc.inode = inode_new (this->itable); - loc.parent = inode_ref (priv->dot_shard_inode); - ret = inode_path (loc.parent, bname, - (char **) &(loc.path)); - if (ret < 0 || !(loc.inode)) { - gf_msg (this->name, GF_LOG_ERROR, 0, - SHARD_MSG_INODE_PATH_FAILED, "Inode path failed" - "on %s, base file gfid = %s", bname, - uuid_utoa (fd->inode->gfid)); - local->op_ret = -1; - local->op_errno = ENOMEM; - wind_failed = _gf_true; - loc_wipe (&loc); - dict_unref (xattr_req); - shard_common_mknod_cbk (frame, - (void *) (long) shard_idx_iter, - this, -1, ENOMEM, NULL, NULL, - NULL, NULL, NULL); - goto next; - } +done: + call_count = shard_call_count_return(frame); + if (call_count == 0) { + SHARD_UNSET_ROOT_FS_ID(frame, local); + local->create_count = 0; + local->post_mknod_handler(frame, this); + } - loc.name = strrchr (loc.path, '/'); - if (loc.name) - loc.name++; - - STACK_WIND_COOKIE (frame, shard_common_mknod_cbk, - (void *) (long) shard_idx_iter, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->mknod, &loc, - mode, ctx_tmp.stat.ia_rdev, 0, xattr_req); - loc_wipe (&loc); - dict_unref (xattr_req); - -next: - shard_idx_iter++; - i++; - if (!--call_count) - break; - } + return 0; +} - return 0; +int +shard_common_resume_mknod(call_frame_t *frame, xlator_t *this, + shard_post_mknod_fop_handler_t post_mknod_handler) +{ + int i = 0; + int shard_idx_iter = 0; + int last_block = 0; + int ret = 0; + int call_count = 0; + char path[PATH_MAX] = { + 0, + }; + mode_t mode = 0; + char *bname = NULL; + shard_priv_t *priv = NULL; + shard_inode_ctx_t ctx_tmp = { + 0, + }; + shard_local_t *local = NULL; + gf_boolean_t wind_failed = _gf_false; + fd_t *fd = NULL; + loc_t loc = { + 0, + }; + dict_t *xattr_req = NULL; + + local = frame->local; + priv = this->private; + fd = local->fd; + shard_idx_iter = local->first_block; + last_block = local->last_block; + call_count = local->call_count = local->create_count; + local->post_mknod_handler = post_mknod_handler; + + SHARD_SET_ROOT_FS_ID(frame, local); + + ret = shard_inode_ctx_get_all(fd->inode, this, &ctx_tmp); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, + "Failed to get inode " + "ctx for %s", + uuid_utoa(fd->inode->gfid)); + local->op_ret = -1; + local->op_errno = ENOMEM; + goto err; + } + mode = st_mode_from_ia(ctx_tmp.stat.ia_prot, ctx_tmp.stat.ia_type); + + while (shard_idx_iter <= last_block) { + if (local->inode_list[i]) { + shard_idx_iter++; + i++; + continue; + } + + if (wind_failed) { + shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, + -1, ENOMEM, NULL, NULL, NULL, NULL, NULL); + goto next; + } + + shard_make_block_abspath(shard_idx_iter, fd->inode->gfid, path, + sizeof(path)); + + xattr_req = shard_create_gfid_dict(local->xattr_req); + if (!xattr_req) { + local->op_ret = -1; + local->op_errno = ENOMEM; + wind_failed = _gf_true; + shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, + -1, ENOMEM, NULL, NULL, NULL, NULL, NULL); + goto next; + } + + bname = strrchr(path, '/') + 1; + loc.inode = inode_new(this->itable); + loc.parent = inode_ref(priv->dot_shard_inode); + ret = inode_path(loc.parent, bname, (char **)&(loc.path)); + if (ret < 0 || !(loc.inode)) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, + "Inode path failed" + "on %s, base file gfid = %s", + bname, uuid_utoa(fd->inode->gfid)); + local->op_ret = -1; + local->op_errno = ENOMEM; + wind_failed = _gf_true; + loc_wipe(&loc); + dict_unref(xattr_req); + shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, + -1, ENOMEM, NULL, NULL, NULL, NULL, NULL); + goto next; + } + + loc.name = strrchr(loc.path, '/'); + if (loc.name) + loc.name++; + + STACK_WIND_COOKIE(frame, shard_common_mknod_cbk, + (void *)(long)shard_idx_iter, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mknod, &loc, mode, + ctx_tmp.stat.ia_rdev, 0, xattr_req); + loc_wipe(&loc); + dict_unref(xattr_req); + + next: + shard_idx_iter++; + i++; + if (!--call_count) + break; + } + + return 0; err: - /* - * This block is for handling failure in shard_inode_ctx_get_all(). - * Failures in the while-loop are handled within the loop. - */ - SHARD_UNSET_ROOT_FS_ID (frame, local); - post_mknod_handler (frame, this); - return 0; + /* + * This block is for handling failure in shard_inode_ctx_get_all(). + * Failures in the while-loop are handled within the loop. + */ + SHARD_UNSET_ROOT_FS_ID(frame, local); + post_mknod_handler(frame, this); + return 0; } int -shard_post_resolve_readv_handler (call_frame_t *frame, xlator_t *this) +shard_post_mknod_readv_handler(call_frame_t *frame, xlator_t *this); + +int +shard_post_lookup_shards_readv_handler(call_frame_t *frame, xlator_t *this) { - shard_local_t *local = NULL; + shard_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (local->op_ret < 0) { - if (local->op_errno != ENOENT) { - SHARD_STACK_UNWIND (readv, frame, local->op_ret, - local->op_errno, NULL, 0, NULL, - NULL, NULL); - return 0; - } else { - struct iovec vec = {0,}; - - vec.iov_base = local->iobuf->ptr; - vec.iov_len = local->total_size; - SHARD_STACK_UNWIND (readv, frame, local->total_size, - 0, &vec, 1, &local->prebuf, - local->iobref, NULL); - return 0; - } - } + if (local->op_ret < 0) { + shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, + local->op_errno); + return 0; + } - if (local->call_count) { - local->create_count = local->call_count; - shard_common_resume_mknod (frame, this, - shard_post_mknod_readv_handler); - } else { - shard_readv_do (frame, this); - } + if (local->create_count) { + shard_common_resume_mknod(frame, this, shard_post_mknod_readv_handler); + } else { + shard_readv_do(frame, this); + } - return 0; + return 0; } int -shard_post_lookup_readv_handler (call_frame_t *frame, xlator_t *this) +shard_post_mknod_readv_handler(call_frame_t *frame, xlator_t *this) { - int ret = 0; - size_t read_size = 0; - size_t actual_size = 0; - struct iobuf *iobuf = NULL; - shard_local_t *local = NULL; - shard_priv_t *priv = NULL; + shard_local_t *local = NULL; - priv = this->private; - local = frame->local; + local = frame->local; - if (local->op_ret < 0) { - SHARD_STACK_UNWIND (readv, frame, local->op_ret, - local->op_errno, NULL, 0, NULL, NULL, NULL); - return 0; - } + if (local->op_ret < 0) { + shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, + local->op_errno); + return 0; + } - if (local->offset >= local->prebuf.ia_size) { - /* If the read is being performed past the end of the file, - * unwind the FOP with 0 bytes read as status. - */ - struct iovec vec = {0,}; + if (!local->eexist_count) { + shard_readv_do(frame, this); + } else { + local->call_count = local->eexist_count; + shard_common_lookup_shards(frame, this, local->loc.inode, + shard_post_lookup_shards_readv_handler); + } + return 0; +} - iobuf = iobuf_get2 (this->ctx->iobuf_pool, local->req_size); - if (!iobuf) - goto err; +int +shard_post_resolve_readv_handler(call_frame_t *frame, xlator_t *this) +{ + shard_local_t *local = NULL; - vec.iov_base = iobuf->ptr; - vec.iov_len = 0; - local->iobref = iobref_new (); - iobref_add (local->iobref, iobuf); - iobuf_unref (iobuf); + local = frame->local; - SHARD_STACK_UNWIND (readv, frame, 0, 0, &vec, 1, &local->prebuf, - local->iobref, NULL); - return 0; - } + if (local->op_ret < 0) { + if (local->op_errno != ENOENT) { + shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, + local->op_errno); + return 0; + } else { + struct iovec vec = { + 0, + }; - read_size = (local->offset + local->req_size); - actual_size = local->prebuf.ia_size; + vec.iov_base = local->iobuf->ptr; + vec.iov_len = local->total_size; + local->op_ret = local->total_size; + SHARD_STACK_UNWIND(readv, frame, local->op_ret, 0, &vec, 1, + &local->prebuf, local->iobref, NULL); + return 0; + } + } - local->first_block = get_lowest_block (local->offset, - local->block_size); + if (local->call_count) { + shard_common_lookup_shards(frame, this, local->resolver_base_inode, + shard_post_lookup_shards_readv_handler); + } else { + shard_readv_do(frame, this); + } - /* If the end of read surpasses the file size, only resolve and read - * till the end of the file size. If the read is confined within the - * size of the file, read only the requested size. - */ + return 0; +} - if (read_size >= actual_size) - local->total_size = actual_size - local->offset; - else - local->total_size = local->req_size; +int +shard_post_lookup_readv_handler(call_frame_t *frame, xlator_t *this) +{ + int ret = 0; + struct iobuf *iobuf = NULL; + shard_local_t *local = NULL; + shard_priv_t *priv = NULL; - local->last_block = get_highest_block (local->offset, local->total_size, - local->block_size); + priv = this->private; + local = frame->local; - local->num_blocks = local->last_block - local->first_block + 1; + if (local->op_ret < 0) { + shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, + local->op_errno); + return 0; + } - local->inode_list = GF_CALLOC (local->num_blocks, sizeof (inode_t *), - gf_shard_mt_inode_list); - if (!local->inode_list) - goto err; + if (local->offset >= local->prebuf.ia_size) { + /* If the read is being performed past the end of the file, + * unwind the FOP with 0 bytes read as status. + */ + struct iovec vec = { + 0, + }; - iobuf = iobuf_get2 (this->ctx->iobuf_pool, local->total_size); + iobuf = iobuf_get2(this->ctx->iobuf_pool, local->req_size); if (!iobuf) - goto err; - - local->iobref = iobref_new (); - if (!local->iobref) { - iobuf_unref (iobuf); - goto err; - } - - if (iobref_add (local->iobref, iobuf) != 0) { - iobuf_unref (iobuf); - goto err; - } + goto err; - iobuf_unref (iobuf); - local->iobuf = iobuf; - memset (iobuf->ptr, 0, local->total_size); + vec.iov_base = iobuf->ptr; + vec.iov_len = 0; + local->iobref = iobref_new(); + iobref_add(local->iobref, iobuf); + iobuf_unref(iobuf); - local->dot_shard_loc.inode = inode_find (this->itable, - priv->dot_shard_gfid); - if (!local->dot_shard_loc.inode) { - ret = shard_init_dot_shard_loc (this, local); - if (ret) - goto err; - shard_lookup_dot_shard (frame, this, - shard_post_resolve_readv_handler); - } else { - shard_common_resolve_shards (frame, this, local->loc.inode, - shard_post_resolve_readv_handler); - } + SHARD_STACK_UNWIND(readv, frame, 0, 0, &vec, 1, &local->prebuf, + local->iobref, NULL); return 0; + } -err: - SHARD_STACK_UNWIND (readv, frame, -1, ENOMEM, NULL, 0, NULL, NULL, - NULL); - return 0; -} - -int -shard_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t offset, uint32_t flags, dict_t *xdata) -{ - int ret = 0; - uint64_t block_size = 0; - shard_local_t *local = NULL; + local->first_block = get_lowest_block(local->offset, local->block_size); - ret = shard_inode_ctx_get_block_size (fd->inode, this, &block_size); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - SHARD_MSG_INODE_CTX_GET_FAILED, "Failed to get block " - "size for %s from its inode ctx", - uuid_utoa (fd->inode->gfid)); - goto err; - } - - if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { - /* block_size = 0 means that the file was created before - * sharding was enabled on the volume. - */ - STACK_WIND (frame, default_readv_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readv, fd, size, offset, - flags, xdata); - return 0; - } + local->total_size = local->req_size; - if (!this->itable) - this->itable = fd->inode->table; + local->last_block = get_highest_block(local->offset, local->total_size, + local->block_size); - local = mem_get0 (this->local_pool); - if (!local) - goto err; + local->num_blocks = local->last_block - local->first_block + 1; + GF_ASSERT(local->num_blocks > 0); + local->resolver_base_inode = local->loc.inode; - frame->local = local; + local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *), + gf_shard_mt_inode_list); + if (!local->inode_list) + goto err; - local->fd = fd_ref (fd); - local->block_size = block_size; - local->offset = offset; - local->req_size = size; - local->flags = flags; - local->xattr_req = (xdata) ? dict_ref (xdata) : dict_new (); - if (!local->xattr_req) - goto err; + iobuf = iobuf_get2(this->ctx->iobuf_pool, local->total_size); + if (!iobuf) + goto err; - local->loc.inode = inode_ref (fd->inode); - gf_uuid_copy (local->loc.gfid, fd->inode->gfid); + local->iobref = iobref_new(); + if (!local->iobref) { + iobuf_unref(iobuf); + goto err; + } - shard_lookup_base_file (frame, this, &local->loc, - shard_post_lookup_readv_handler); + if (iobref_add(local->iobref, iobuf) != 0) { + iobuf_unref(iobuf); + goto err; + } - return 0; + memset(iobuf->ptr, 0, local->total_size); + iobuf_unref(iobuf); + local->iobuf = iobuf; + local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid); + if (!local->dot_shard_loc.inode) { + ret = shard_init_internal_dir_loc(this, local, + SHARD_INTERNAL_DIR_DOT_SHARD); + if (ret) + goto err; + shard_lookup_internal_dir(frame, this, shard_post_resolve_readv_handler, + SHARD_INTERNAL_DIR_DOT_SHARD); + } else { + local->post_res_handler = shard_post_resolve_readv_handler; + shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD); + } + return 0; err: - SHARD_STACK_UNWIND (readv, frame, -1, ENOMEM, NULL, 0, NULL, NULL, - NULL); - return 0; + shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM); + return 0; +} +int +shard_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) +{ + int ret = 0; + uint64_t block_size = 0; + shard_local_t *local = NULL; + + ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, + "Failed to get block " + "size for %s from its inode ctx", + uuid_utoa(fd->inode->gfid)); + goto err; + } + + if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { + /* block_size = 0 means that the file was created before + * sharding was enabled on the volume. + */ + STACK_WIND(frame, default_readv_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, + xdata); + return 0; + } + + if (!this->itable) + this->itable = fd->inode->table; + + local = mem_get0(this->local_pool); + if (!local) + goto err; + + frame->local = local; + + ret = syncbarrier_init(&local->barrier); + if (ret) + goto err; + local->fd = fd_ref(fd); + local->block_size = block_size; + local->offset = offset; + local->req_size = size; + local->flags = flags; + local->fop = GF_FOP_READ; + local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); + if (!local->xattr_req) + goto err; + + local->loc.inode = inode_ref(fd->inode); + gf_uuid_copy(local->loc.gfid, fd->inode->gfid); + + shard_refresh_base_file(frame, this, NULL, fd, + shard_post_lookup_readv_handler); + return 0; +err: + shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM); + return 0; } int -shard_post_update_size_writev_handler (call_frame_t *frame, xlator_t *this) +shard_common_inode_write_post_update_size_handler(call_frame_t *frame, + xlator_t *this) { - shard_local_t *local = NULL; + shard_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (local->op_ret < 0) { - SHARD_STACK_UNWIND (writev, frame, local->op_ret, - local->op_errno, NULL, NULL, NULL); - return 0; - } - - local->postbuf.ia_size += (local->delta_size + local->hole_size); - local->postbuf.ia_blocks += local->delta_blocks; + if (local->op_ret < 0) { + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); + } else { + shard_common_inode_write_success_unwind(local->fop, frame, + local->written_size); + } + return 0; +} - SHARD_STACK_UNWIND (writev, frame, local->written_size, local->op_errno, - &local->prebuf, &local->postbuf, local->xattr_rsp); - return 0; +static gf_boolean_t +shard_is_appending_write(shard_local_t *local) +{ + if (local->fop != GF_FOP_WRITE) + return _gf_false; + if (local->flags & O_APPEND) + return _gf_true; + if (local->fd->flags & O_APPEND) + return _gf_true; + return _gf_false; } int -__shard_get_delta_size_from_inode_ctx (shard_local_t *local, inode_t *inode, - xlator_t *this) +__shard_get_delta_size_from_inode_ctx(shard_local_t *local, inode_t *inode, + xlator_t *this) { - int ret = -1; - uint64_t ctx_uint = 0; - shard_inode_ctx_t *ctx = NULL; + int ret = -1; + uint64_t ctx_uint = 0; + shard_inode_ctx_t *ctx = NULL; - ret = __inode_ctx_get (inode, this, &ctx_uint); - if (ret < 0) - return ret; + ret = __inode_ctx_get(inode, this, &ctx_uint); + if (ret < 0) + return ret; - ctx = (shard_inode_ctx_t *) ctx_uint; + ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; - if (local->offset + local->total_size > ctx->stat.ia_size) { - local->delta_size = (local->offset + local->total_size) - - ctx->stat.ia_size; - ctx->stat.ia_size += (local->delta_size); - } else { - local->delta_size = 0; - } + if (shard_is_appending_write(local)) { + local->delta_size = local->total_size; + } else if (local->offset + local->total_size > ctx->stat.ia_size) { + local->delta_size = (local->offset + local->total_size) - + ctx->stat.ia_size; + } else { + local->delta_size = 0; + } + ctx->stat.ia_size += (local->delta_size); + local->postbuf = ctx->stat; - return 0; + return 0; } int -shard_get_delta_size_from_inode_ctx (shard_local_t *local, inode_t *inode, - xlator_t *this) +shard_get_delta_size_from_inode_ctx(shard_local_t *local, inode_t *inode, + xlator_t *this) { - int ret = -1; + int ret = -1; - LOCK (&inode->lock); - { - ret = __shard_get_delta_size_from_inode_ctx (local, inode, - this); - } - UNLOCK (&inode->lock); + LOCK(&inode->lock); + { + ret = __shard_get_delta_size_from_inode_ctx(local, inode, this); + } + UNLOCK(&inode->lock); - return ret; + return ret; } int -shard_writev_do_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) +shard_common_inode_write_do_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iatt *pre, + struct iatt *post, dict_t *xdata) { - int call_count = 0; - fd_t *anon_fd = cookie; - shard_local_t *local = NULL; + int call_count = 0; + fd_t *anon_fd = cookie; + shard_local_t *local = NULL; + glusterfs_fop_t fop = 0; - local = frame->local; + local = frame->local; + fop = local->fop; + LOCK(&frame->lock); + { if (op_ret < 0) { - local->op_ret = op_ret; - local->op_errno = op_errno; + local->op_ret = op_ret; + local->op_errno = op_errno; } else { - local->written_size += op_ret; - local->delta_blocks += (postbuf->ia_blocks - prebuf->ia_blocks); - local->delta_size += (postbuf->ia_size - prebuf->ia_size); - shard_inode_ctx_set (local->fd->inode, this, postbuf, 0, - SHARD_MASK_TIMES); + local->written_size += op_ret; + GF_ATOMIC_ADD(local->delta_blocks, + post->ia_blocks - pre->ia_blocks); + local->delta_size += (post->ia_size - pre->ia_size); + shard_inode_ctx_set(local->fd->inode, this, post, 0, + SHARD_MASK_TIMES); + if (local->fd->inode != anon_fd->inode) + shard_inode_ctx_add_to_fsync_list(local->fd->inode, this, + anon_fd->inode); + } + } + UNLOCK(&frame->lock); + + if (anon_fd) + fd_unref(anon_fd); + + call_count = shard_call_count_return(frame); + if (call_count == 0) { + SHARD_UNSET_ROOT_FS_ID(frame, local); + if (local->op_ret < 0) { + shard_common_failure_unwind(fop, frame, local->op_ret, + local->op_errno); + } else { + shard_get_delta_size_from_inode_ctx(local, local->fd->inode, this); + local->hole_size = 0; + if (xdata) + local->xattr_rsp = dict_ref(xdata); + shard_update_file_size( + frame, this, local->fd, NULL, + shard_common_inode_write_post_update_size_handler); } + } - if (anon_fd) - fd_unref (anon_fd); - - call_count = shard_call_count_return (frame); - if (call_count == 0) { - SHARD_UNSET_ROOT_FS_ID (frame, local); - if (local->op_ret < 0) { - SHARD_STACK_UNWIND (writev, frame, local->op_ret, - local->op_errno, NULL, NULL, NULL); - } else { - shard_get_delta_size_from_inode_ctx (local, - local->fd->inode, - this); - local->hole_size = 0; - if (xdata) - local->xattr_rsp = dict_ref (xdata); - shard_update_file_size (frame, this, local->fd, NULL, - shard_post_update_size_writev_handler); - } - } + return 0; +} - return 0; +int +shard_common_inode_write_wind(call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iovec *vec, int count, off_t shard_offset, + size_t size) +{ + shard_local_t *local = NULL; + + local = frame->local; + + switch (local->fop) { + case GF_FOP_WRITE: + STACK_WIND_COOKIE( + frame, shard_common_inode_write_do_cbk, fd, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->writev, fd, vec, count, shard_offset, + local->flags, local->iobref, local->xattr_req); + break; + case GF_FOP_FALLOCATE: + STACK_WIND_COOKIE( + frame, shard_common_inode_write_do_cbk, fd, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fallocate, fd, local->flags, + shard_offset, size, local->xattr_req); + break; + case GF_FOP_ZEROFILL: + STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->zerofill, fd, + shard_offset, size, local->xattr_req); + break; + case GF_FOP_DISCARD: + STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->discard, fd, + shard_offset, size, local->xattr_req); + break; + default: + gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, + "Invalid fop id = %d", local->fop); + break; + } + return 0; } int -shard_writev_do (call_frame_t *frame, xlator_t *this) -{ - int i = 0; - int count = 0; - int call_count = 0; - int last_block = 0; - uint32_t cur_block = 0; - fd_t *fd = NULL; - fd_t *anon_fd = NULL; - shard_local_t *local = NULL; - struct iovec *vec = NULL; - gf_boolean_t wind_failed = _gf_false; - off_t orig_offset = 0; - off_t shard_offset = 0; - off_t vec_offset = 0; - size_t remaining_size = 0; - size_t write_size = 0; - - local = frame->local; - fd = local->fd; - - orig_offset = local->offset; - remaining_size = local->total_size; - cur_block = local->first_block; - local->call_count = call_count = local->num_blocks; - last_block = local->last_block; - - SHARD_SET_ROOT_FS_ID (frame, local); - - if (dict_set_uint32 (local->xattr_req, - GLUSTERFS_WRITE_UPDATE_ATOMIC, 4)) { - gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_SET_FAILED, - "Failed to set "GLUSTERFS_WRITE_UPDATE_ATOMIC" into " - "dict: %s", uuid_utoa (fd->inode->gfid)); +shard_common_inode_write_do(call_frame_t *frame, xlator_t *this) +{ + int i = 0; + int count = 0; + int call_count = 0; + int last_block = 0; + uint32_t cur_block = 0; + fd_t *fd = NULL; + fd_t *anon_fd = NULL; + shard_local_t *local = NULL; + struct iovec *vec = NULL; + gf_boolean_t wind_failed = _gf_false; + gf_boolean_t odirect = _gf_false; + off_t orig_offset = 0; + off_t shard_offset = 0; + off_t vec_offset = 0; + size_t remaining_size = 0; + size_t shard_write_size = 0; + + local = frame->local; + fd = local->fd; + + orig_offset = local->offset; + remaining_size = local->total_size; + cur_block = local->first_block; + local->call_count = call_count = local->num_blocks; + last_block = local->last_block; + + SHARD_SET_ROOT_FS_ID(frame, local); + + if (dict_set_uint32(local->xattr_req, GLUSTERFS_WRITE_UPDATE_ATOMIC, 4)) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to set " GLUSTERFS_WRITE_UPDATE_ATOMIC + " into " + "dict: %s", + uuid_utoa(fd->inode->gfid)); + local->op_ret = -1; + local->op_errno = ENOMEM; + local->call_count = 1; + shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1, + ENOMEM, NULL, NULL, NULL); + return 0; + } + + if ((fd->flags & O_DIRECT) && (local->fop == GF_FOP_WRITE)) + odirect = _gf_true; + + while (cur_block <= last_block) { + if (wind_failed) { + shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1, + ENOMEM, NULL, NULL, NULL); + goto next; + } + + shard_offset = orig_offset % local->block_size; + shard_write_size = local->block_size - shard_offset; + if (shard_write_size > remaining_size) + shard_write_size = remaining_size; + + remaining_size -= shard_write_size; + + if (local->fop == GF_FOP_WRITE) { + vec = NULL; + count = iov_subset(local->vector, local->count, vec_offset, + shard_write_size, &vec, 0); + if (count < 0) { local->op_ret = -1; local->op_errno = ENOMEM; - local->call_count = 1; - shard_writev_do_cbk (frame, (void *)(long)0, this, -1, ENOMEM, - NULL, NULL, NULL); - return 0; + wind_failed = _gf_true; + shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, + -1, ENOMEM, NULL, NULL, NULL); + goto next; + } } - while (cur_block <= last_block) { - if (wind_failed) { - shard_writev_do_cbk (frame, (void *) (long) 0, this, -1, - ENOMEM, NULL, NULL, NULL); - goto next; - } - - shard_offset = orig_offset % local->block_size; - write_size = local->block_size - shard_offset; - if (write_size > remaining_size) - write_size = remaining_size; - - remaining_size -= write_size; - - count = iov_subset (local->vector, local->count, vec_offset, - vec_offset + write_size, NULL); - - vec = GF_CALLOC (count, sizeof (struct iovec), - gf_shard_mt_iovec); - if (!vec) { - local->op_ret = -1; - local->op_errno = ENOMEM; - wind_failed = _gf_true; - GF_FREE (vec); - shard_writev_do_cbk (frame, (void *) (long) 0, this, -1, - ENOMEM, NULL, NULL, NULL); - goto next; - } - - count = iov_subset (local->vector, local->count, vec_offset, - vec_offset + write_size, vec); - - if (cur_block == 0) { - anon_fd = fd_ref (fd); - } else { - anon_fd = fd_anonymous (local->inode_list[i]); - if (!anon_fd) { - local->op_ret = -1; - local->op_errno = ENOMEM; - wind_failed = _gf_true; - GF_FREE (vec); - shard_writev_do_cbk (frame, - (void *) (long) anon_fd, - this, -1, ENOMEM, NULL, - NULL, NULL); - goto next; - } - } + if (cur_block == 0) { + anon_fd = fd_ref(fd); + } else { + anon_fd = fd_anonymous(local->inode_list[i]); + if (!anon_fd) { + local->op_ret = -1; + local->op_errno = ENOMEM; + wind_failed = _gf_true; + GF_FREE(vec); + shard_common_inode_write_do_cbk(frame, (void *)(long)anon_fd, + this, -1, ENOMEM, NULL, NULL, + NULL); + goto next; + } - STACK_WIND_COOKIE (frame, shard_writev_do_cbk, anon_fd, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->writev, anon_fd, - vec, count, shard_offset, local->flags, - local->iobref, local->xattr_req); - GF_FREE (vec); - vec = NULL; - orig_offset += write_size; - vec_offset += write_size; -next: - cur_block++; - i++; - call_count--; + if (local->fop == GF_FOP_WRITE) { + if (odirect) + local->flags = O_DIRECT; + else + local->flags = GF_ANON_FD_FLAGS; + } } - return 0; + + shard_common_inode_write_wind(frame, this, anon_fd, vec, count, + shard_offset, shard_write_size); + if (vec) + vec_offset += shard_write_size; + orig_offset += shard_write_size; + GF_FREE(vec); + vec = NULL; + next: + cur_block++; + i++; + call_count--; + } + return 0; } int -shard_post_lookup_shards_writev_handler (call_frame_t *frame, xlator_t *this) +shard_common_inode_write_post_mknod_handler(call_frame_t *frame, + xlator_t *this); + +int +shard_common_inode_write_post_lookup_shards_handler(call_frame_t *frame, + xlator_t *this) { - shard_local_t *local = NULL; + shard_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (local->op_ret < 0) { - SHARD_STACK_UNWIND (writev, frame, local->op_ret, - local->op_errno, NULL, NULL, NULL); - return 0; - } + if (local->op_ret < 0) { + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); + return 0; + } - shard_writev_do (frame, this); + if (local->create_count) { + shard_common_resume_mknod(frame, this, + shard_common_inode_write_post_mknod_handler); + } else { + shard_common_inode_write_do(frame, this); + } - return 0; + return 0; } int -shard_post_mknod_writev_handler (call_frame_t *frame, xlator_t *this) +shard_common_inode_write_post_mknod_handler(call_frame_t *frame, xlator_t *this) { - shard_local_t *local = NULL; + shard_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (local->op_ret < 0) { - SHARD_STACK_UNWIND (writev, frame, local->op_ret, - local->op_errno, NULL, NULL, NULL); - return 0; - } + if (local->op_ret < 0) { + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); + return 0; + } - if (!local->eexist_count) { - shard_writev_do (frame, this); - } else { - local->call_count = local->eexist_count; - shard_common_lookup_shards (frame, this, local->loc.inode, - shard_post_lookup_shards_writev_handler); - } + if (!local->eexist_count) { + shard_common_inode_write_do(frame, this); + } else { + local->call_count = local->eexist_count; + shard_common_lookup_shards( + frame, this, local->loc.inode, + shard_common_inode_write_post_lookup_shards_handler); + } - return 0; + return 0; } int -shard_post_lookup_writev_handler (call_frame_t *frame, xlator_t *this) +shard_common_inode_write_post_resolve_handler(call_frame_t *frame, + xlator_t *this) { - shard_local_t *local = NULL; + shard_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (local->op_ret < 0) { - SHARD_STACK_UNWIND (writev, frame, local->op_ret, - local->op_errno, NULL, NULL, NULL); - return 0; - } - - local->postbuf = local->prebuf; + if (local->op_ret < 0) { + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); + return 0; + } - if (local->create_count) - shard_common_resume_mknod (frame, this, - shard_post_mknod_writev_handler); - else - shard_writev_do (frame, this); + if (local->call_count) { + shard_common_lookup_shards( + frame, this, local->resolver_base_inode, + shard_common_inode_write_post_lookup_shards_handler); + } else if (local->create_count) { + shard_common_inode_write_post_lookup_shards_handler(frame, this); + } else { + shard_common_inode_write_do(frame, this); + } - return 0; + return 0; } int -shard_post_resolve_writev_handler (call_frame_t *frame, xlator_t *this) -{ - shard_local_t *local = NULL; - - local = frame->local; - - if (local->op_ret < 0) { - SHARD_STACK_UNWIND (writev, frame, local->op_ret, - local->op_errno, NULL, NULL, NULL); - return 0; - } - - local->create_count = local->call_count; - - shard_lookup_base_file (frame, this, &local->loc, - shard_post_lookup_writev_handler); - return 0; +shard_common_inode_write_post_lookup_handler(call_frame_t *frame, + xlator_t *this) +{ + shard_local_t *local = frame->local; + shard_priv_t *priv = this->private; + + if (local->op_ret < 0) { + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); + return 0; + } + + local->postbuf = local->prebuf; + + /*Adjust offset to EOF so that correct shard is chosen for append*/ + if (shard_is_appending_write(local)) + local->offset = local->prebuf.ia_size; + + local->first_block = get_lowest_block(local->offset, local->block_size); + local->last_block = get_highest_block(local->offset, local->total_size, + local->block_size); + local->num_blocks = local->last_block - local->first_block + 1; + GF_ASSERT(local->num_blocks > 0); + local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *), + gf_shard_mt_inode_list); + if (!local->inode_list) { + shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); + return 0; + } + + gf_msg_trace(this->name, 0, + "%s: gfid=%s first_block=%" PRIu64 + " " + "last_block=%" PRIu64 " num_blocks=%" PRIu64 " offset=%" PRId64 + " total_size=%zu flags=%" PRId32 "", + gf_fop_list[local->fop], + uuid_utoa(local->resolver_base_inode->gfid), + local->first_block, local->last_block, local->num_blocks, + local->offset, local->total_size, local->flags); + + local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid); + + if (!local->dot_shard_loc.inode) { + /*change handler*/ + shard_mkdir_internal_dir(frame, this, + shard_common_inode_write_post_resolve_handler, + SHARD_INTERNAL_DIR_DOT_SHARD); + } else { + /*change handler*/ + local->post_res_handler = shard_common_inode_write_post_resolve_handler; + shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD); + } + return 0; } int -shard_writev_mkdir_dot_shard_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, - int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +shard_mkdir_internal_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - shard_local_t *local = NULL; - - local = frame->local; + inode_t *link_inode = NULL; + shard_local_t *local = NULL; + shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie; - SHARD_UNSET_ROOT_FS_ID (frame, local); + local = frame->local; - if (op_ret == -1) { - if (op_errno != EEXIST) { - goto unwind; - } else { - gf_msg_debug (this->name, 0, "mkdir on /.shard failed " - "with EEXIST. Attempting lookup now"); - shard_lookup_dot_shard (frame, this, - shard_post_resolve_writev_handler); - return 0; - } - } - - shard_link_dot_shard_inode (local, inode, buf); - shard_common_resolve_shards (frame, this, local->loc.inode, - shard_post_resolve_writev_handler); - return 0; + SHARD_UNSET_ROOT_FS_ID(frame, local); + if (op_ret == -1) { + if (op_errno != EEXIST) { + local->op_ret = op_ret; + local->op_errno = op_errno; + goto unwind; + } else { + gf_msg_debug(this->name, 0, + "mkdir on %s failed " + "with EEXIST. Attempting lookup now", + shard_internal_dir_string(type)); + shard_lookup_internal_dir(frame, this, local->post_res_handler, + type); + return 0; + } + } + + link_inode = shard_link_internal_dir_inode(local, inode, buf, type); + if (link_inode != inode) { + shard_refresh_internal_dir(frame, this, type); + } else { + shard_inode_ctx_mark_dir_refreshed(link_inode, this); + shard_common_resolve_shards(frame, this, local->post_res_handler); + } + return 0; unwind: - SHARD_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + shard_common_resolve_shards(frame, this, local->post_res_handler); + return 0; } int -shard_writev_mkdir_dot_shard (call_frame_t *frame, xlator_t *this) -{ - int ret = -1; - shard_local_t *local = NULL; - shard_priv_t *priv = NULL; - dict_t *xattr_req = NULL; - - local = frame->local; - priv = this->private; - - xattr_req = dict_new (); - if (!xattr_req) - goto err; - - ret = shard_init_dot_shard_loc (this, local); - if (ret) - goto err; - - ret = dict_set_static_bin (xattr_req, "gfid-req", priv->dot_shard_gfid, - 16); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_SET_FAILED, - "Failed to set gfid-req for /.shard"); - goto err; - } - - SHARD_SET_ROOT_FS_ID (frame, local); - - STACK_WIND (frame, shard_writev_mkdir_dot_shard_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->mkdir, &local->dot_shard_loc, - 0755, 0, xattr_req); - dict_unref (xattr_req); - return 0; +shard_mkdir_internal_dir(call_frame_t *frame, xlator_t *this, + shard_post_resolve_fop_handler_t handler, + shard_internal_dir_type_t type) +{ + int ret = -1; + shard_local_t *local = NULL; + shard_priv_t *priv = NULL; + dict_t *xattr_req = NULL; + uuid_t *gfid = NULL; + loc_t *loc = NULL; + gf_boolean_t free_gfid = _gf_true; + + local = frame->local; + priv = this->private; + + local->post_res_handler = handler; + gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); + if (!gfid) + goto err; + + switch (type) { + case SHARD_INTERNAL_DIR_DOT_SHARD: + gf_uuid_copy(*gfid, priv->dot_shard_gfid); + loc = &local->dot_shard_loc; + break; + case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: + gf_uuid_copy(*gfid, priv->dot_shard_rm_gfid); + loc = &local->dot_shard_rm_loc; + break; + default: + bzero(*gfid, sizeof(uuid_t)); + break; + } + + xattr_req = dict_new(); + if (!xattr_req) + goto err; + + ret = shard_init_internal_dir_loc(this, local, type); + if (ret) + goto err; + + ret = dict_set_gfuuid(xattr_req, "gfid-req", *gfid, false); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to set gfid-req for %s", + shard_internal_dir_string(type)); + goto err; + } else { + free_gfid = _gf_false; + } + + SHARD_SET_ROOT_FS_ID(frame, local); + + STACK_WIND_COOKIE(frame, shard_mkdir_internal_dir_cbk, (void *)(long)type, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir, loc, + 0755, 0, xattr_req); + dict_unref(xattr_req); + return 0; err: - if (xattr_req) - dict_unref (xattr_req); - SHARD_STACK_UNWIND (writev, frame, -1, ENOMEM, NULL, NULL, NULL); - return 0; + if (xattr_req) + dict_unref(xattr_req); + local->op_ret = -1; + local->op_errno = ENOMEM; + if (free_gfid) + GF_FREE(gfid); + handler(frame, this); + return 0; } int -shard_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iovec *vector, int32_t count, off_t offset, uint32_t flags, - struct iobref *iobref, dict_t *xdata) +shard_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - int ret = 0; - int i = 0; - uint64_t block_size = 0; - shard_local_t *local = NULL; - shard_priv_t *priv = NULL; - - priv = this->private; - - ret = shard_inode_ctx_get_block_size (fd->inode, this, &block_size); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - SHARD_MSG_INODE_CTX_GET_FAILED, "Failed to get block " - "size for %s from its inode ctx", - uuid_utoa (fd->inode->gfid)); - goto out; - } - - if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { - /* block_size = 0 means that the file was created before - * sharding was enabled on the volume. - */ - STACK_WIND (frame, default_writev_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev, - fd, vector, count, offset, flags, iobref, xdata); - return 0; - } - - if (!this->itable) - this->itable = fd->inode->table; - - local = mem_get0 (this->local_pool); - if (!local) - goto out; - - frame->local = local; - - local->xattr_req = (xdata) ? dict_ref (xdata) : dict_new (); - if (!local->xattr_req) - goto out; - - local->vector = iov_dup (vector, count); - if (!local->vector) - goto out; + /* To-Do: Wind flush on all shards of the file */ + SHARD_STACK_UNWIND(flush, frame, op_ret, op_errno, xdata); + return 0; +} - for (i = 0; i < count; i++) - local->total_size += vector[i].iov_len; +int +shard_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) +{ + STACK_WIND(frame, shard_flush_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->flush, fd, xdata); + return 0; +} - local->count = count; - local->offset = offset; - local->flags = flags; - local->iobref = iobref_ref (iobref); - local->fd = fd_ref (fd); - local->block_size = block_size; - local->first_block = get_lowest_block (offset, local->block_size); - local->last_block = get_highest_block (offset, local->total_size, - local->block_size); - local->num_blocks = local->last_block - local->first_block + 1; - local->inode_list = GF_CALLOC (local->num_blocks, sizeof (inode_t *), - gf_shard_mt_inode_list); - if (!local->inode_list) - goto out; +int +__shard_get_timestamps_from_inode_ctx(shard_local_t *local, inode_t *inode, + xlator_t *this) +{ + int ret = -1; + uint64_t ctx_uint = 0; + shard_inode_ctx_t *ctx = NULL; - local->loc.inode = inode_ref (fd->inode); - gf_uuid_copy (local->loc.gfid, fd->inode->gfid); + ret = __inode_ctx_get(inode, this, &ctx_uint); + if (ret < 0) + return ret; - gf_msg_trace (this->name, 0, "gfid=%s first_block=%"PRIu32" " - "last_block=%"PRIu32" num_blocks=%"PRIu32" offset=%"PRId64" " - "total_size=%lu", uuid_utoa (fd->inode->gfid), - local->first_block, local->last_block, local->num_blocks, - offset, local->total_size); + ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; - local->dot_shard_loc.inode = inode_find (this->itable, - priv->dot_shard_gfid); - if (!local->dot_shard_loc.inode) - shard_writev_mkdir_dot_shard (frame, this); - else - shard_common_resolve_shards (frame, this, local->loc.inode, - shard_post_resolve_writev_handler); + local->postbuf.ia_ctime = ctx->stat.ia_ctime; + local->postbuf.ia_ctime_nsec = ctx->stat.ia_ctime_nsec; + local->postbuf.ia_atime = ctx->stat.ia_atime; + local->postbuf.ia_atime_nsec = ctx->stat.ia_atime_nsec; + local->postbuf.ia_mtime = ctx->stat.ia_mtime; + local->postbuf.ia_mtime_nsec = ctx->stat.ia_mtime_nsec; - return 0; -out: - SHARD_STACK_UNWIND (writev, frame, -1, ENOMEM, NULL, NULL, NULL); - return 0; + return 0; } int -shard_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +shard_get_timestamps_from_inode_ctx(shard_local_t *local, inode_t *inode, + xlator_t *this) { - /* To-Do: Wind flush on all shards of the file */ - SHARD_STACK_UNWIND (flush, frame, op_ret, op_errno, xdata); - return 0; + int ret = 0; + + LOCK(&inode->lock); + { + ret = __shard_get_timestamps_from_inode_ctx(local, inode, this); + } + UNLOCK(&inode->lock); + + return ret; } int -shard_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) -{ - STACK_WIND (frame, shard_flush_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->flush, fd, xdata); - return 0; +shard_fsync_shards_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + int call_count = 0; + uint64_t fsync_count = 0; + fd_t *anon_fd = cookie; + shard_local_t *local = NULL; + shard_inode_ctx_t *ctx = NULL; + shard_inode_ctx_t *base_ictx = NULL; + inode_t *base_inode = NULL; + gf_boolean_t unref_shard_inode = _gf_false; + + local = frame->local; + base_inode = local->fd->inode; + + if (local->op_ret < 0) + goto out; + + LOCK(&frame->lock); + { + if (op_ret < 0) { + local->op_ret = op_ret; + local->op_errno = op_errno; + UNLOCK(&frame->lock); + goto out; + } + shard_inode_ctx_set(local->fd->inode, this, postbuf, 0, + SHARD_MASK_TIMES); + } + UNLOCK(&frame->lock); + fd_ctx_get(anon_fd, this, &fsync_count); +out: + if (anon_fd && (base_inode != anon_fd->inode)) { + LOCK(&base_inode->lock); + LOCK(&anon_fd->inode->lock); + { + __shard_inode_ctx_get(anon_fd->inode, this, &ctx); + __shard_inode_ctx_get(base_inode, this, &base_ictx); + if (op_ret == 0) + ctx->fsync_needed -= fsync_count; + GF_ASSERT(ctx->fsync_needed >= 0); + if (ctx->fsync_needed != 0) { + list_add_tail(&ctx->to_fsync_list, &base_ictx->to_fsync_list); + base_ictx->fsync_count++; + } else { + unref_shard_inode = _gf_true; + } + } + UNLOCK(&anon_fd->inode->lock); + UNLOCK(&base_inode->lock); + } + + if (unref_shard_inode) + inode_unref(anon_fd->inode); + if (anon_fd) + fd_unref(anon_fd); + + call_count = shard_call_count_return(frame); + if (call_count != 0) + return 0; + + if (local->op_ret < 0) { + shard_common_failure_unwind(GF_FOP_FSYNC, frame, local->op_ret, + local->op_errno); + } else { + shard_get_timestamps_from_inode_ctx(local, base_inode, this); + SHARD_STACK_UNWIND(fsync, frame, local->op_ret, local->op_errno, + &local->prebuf, &local->postbuf, local->xattr_rsp); + } + return 0; } int -shard_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) -{ - /* To-Do: Wind fsync on all shards of the file */ - SHARD_STACK_UNWIND (fsync, frame, op_ret, op_errno, prebuf, postbuf, - xdata); - return 0; +shard_post_lookup_fsync_handler(call_frame_t *frame, xlator_t *this) +{ + int ret = 0; + int call_count = 0; + int fsync_count = 0; + fd_t *anon_fd = NULL; + inode_t *base_inode = NULL; + shard_local_t *local = NULL; + shard_inode_ctx_t *ctx = NULL; + shard_inode_ctx_t *iter = NULL; + struct list_head copy = { + 0, + }; + shard_inode_ctx_t *tmp = NULL; + + local = frame->local; + base_inode = local->fd->inode; + local->postbuf = local->prebuf; + INIT_LIST_HEAD(©); + + if (local->op_ret < 0) { + shard_common_failure_unwind(GF_FOP_FSYNC, frame, local->op_ret, + local->op_errno); + return 0; + } + + LOCK(&base_inode->lock); + { + __shard_inode_ctx_get(base_inode, this, &ctx); + list_splice_init(&ctx->to_fsync_list, ©); + call_count = ctx->fsync_count; + ctx->fsync_count = 0; + } + UNLOCK(&base_inode->lock); + + local->call_count = ++call_count; + + /* Send fsync() on the base shard first */ + anon_fd = fd_ref(local->fd); + STACK_WIND_COOKIE(frame, shard_fsync_shards_cbk, anon_fd, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsync, anon_fd, local->datasync, + local->xattr_req); + call_count--; + anon_fd = NULL; + + list_for_each_entry_safe(iter, tmp, ©, to_fsync_list) + { + list_del_init(&iter->to_fsync_list); + fsync_count = 0; + shard_inode_ctx_get_fsync_count(iter->inode, this, &fsync_count); + GF_ASSERT(fsync_count > 0); + anon_fd = fd_anonymous(iter->inode); + if (!anon_fd) { + local->op_ret = -1; + local->op_errno = ENOMEM; + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, + SHARD_MSG_MEMALLOC_FAILED, + "Failed to create " + "anon fd to fsync shard"); + shard_fsync_shards_cbk(frame, (void *)(long)anon_fd, this, -1, + ENOMEM, NULL, NULL, NULL); + continue; + } + + ret = fd_ctx_set(anon_fd, this, fsync_count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_FD_CTX_SET_FAILED, + "Failed to set fd " + "ctx for shard inode gfid=%s", + uuid_utoa(iter->inode->gfid)); + local->op_ret = -1; + local->op_errno = ENOMEM; + shard_fsync_shards_cbk(frame, (void *)(long)anon_fd, this, -1, + ENOMEM, NULL, NULL, NULL); + continue; + } + STACK_WIND_COOKIE(frame, shard_fsync_shards_cbk, anon_fd, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync, + anon_fd, local->datasync, local->xattr_req); + call_count--; + } + + return 0; } int -shard_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, - dict_t *xdata) -{ - STACK_WIND (frame, shard_fsync_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata); - return 0; +shard_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, + dict_t *xdata) +{ + int ret = 0; + uint64_t block_size = 0; + shard_local_t *local = NULL; + + ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, + "Failed to get block " + "size for %s from its inode ctx", + uuid_utoa(fd->inode->gfid)); + goto err; + } + + if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { + STACK_WIND(frame, default_fsync_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata); + return 0; + } + + if (!this->itable) + this->itable = fd->inode->table; + + local = mem_get0(this->local_pool); + if (!local) + goto err; + + frame->local = local; + + local->fd = fd_ref(fd); + local->fop = GF_FOP_FSYNC; + local->datasync = datasync; + local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); + if (!local->xattr_req) + goto err; + + local->loc.inode = inode_ref(fd->inode); + gf_uuid_copy(local->loc.gfid, fd->inode->gfid); + + shard_refresh_base_file(frame, this, NULL, fd, + shard_post_lookup_fsync_handler); + return 0; +err: + shard_common_failure_unwind(GF_FOP_FSYNC, frame, -1, ENOMEM); + return 0; } int -shard_readdir_past_dot_shard_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, - int32_t op_errno, gf_dirent_t *orig_entries, - dict_t *xdata) +shard_readdir_past_dot_shard_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, gf_dirent_t *orig_entries, + dict_t *xdata) { - gf_dirent_t *entry = NULL; - gf_dirent_t *tmp = NULL; - shard_local_t *local = NULL; + gf_dirent_t *entry = NULL; + gf_dirent_t *tmp = NULL; + shard_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (op_ret < 0) - goto unwind; + if (op_ret < 0) + goto unwind; - list_for_each_entry_safe (entry, tmp, (&orig_entries->list), list) { + list_for_each_entry_safe(entry, tmp, (&orig_entries->list), list) + { + list_del_init(&entry->list); + list_add_tail(&entry->list, &local->entries_head.list); - list_del_init (&entry->list); - list_add_tail (&entry->list, &local->entries_head.list); + if (!entry->dict) + continue; - if (!entry->dict) - continue; + if (IA_ISDIR(entry->d_stat.ia_type)) + continue; - if (IA_ISDIR (entry->d_stat.ia_type)) - continue; - - if (dict_get (entry->dict, GF_XATTR_SHARD_FILE_SIZE)) - shard_modify_size_and_block_count (&entry->d_stat, - entry->dict); - if (!entry->inode) - continue; + if (dict_get(entry->dict, GF_XATTR_SHARD_FILE_SIZE)) + shard_modify_size_and_block_count(&entry->d_stat, entry->dict); + if (!entry->inode) + continue; - shard_inode_ctx_update (entry->inode, this, entry->dict, - &entry->d_stat); - } - local->op_ret += op_ret; + shard_inode_ctx_update(entry->inode, this, entry->dict, &entry->d_stat); + } + local->op_ret += op_ret; unwind: - if (local->fop == GF_FOP_READDIR) - SHARD_STACK_UNWIND (readdir, frame, local->op_ret, - local->op_errno, - &local->entries_head, xdata); - else - SHARD_STACK_UNWIND (readdirp, frame, op_ret, op_errno, - &local->entries_head, xdata); - return 0; + if (local->fop == GF_FOP_READDIR) + SHARD_STACK_UNWIND(readdir, frame, local->op_ret, local->op_errno, + &local->entries_head, xdata); + else + SHARD_STACK_UNWIND(readdirp, frame, op_ret, op_errno, + &local->entries_head, xdata); + return 0; } int32_t -shard_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, gf_dirent_t *orig_entries, - dict_t *xdata) +shard_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, gf_dirent_t *orig_entries, + dict_t *xdata) { - fd_t *fd = NULL; - gf_dirent_t *entry = NULL; - gf_dirent_t *tmp = NULL; - shard_local_t *local = NULL; - gf_boolean_t last_entry = _gf_false; - - local = frame->local; - fd = local->fd; + fd_t *fd = NULL; + gf_dirent_t *entry = NULL; + gf_dirent_t *tmp = NULL; + shard_local_t *local = NULL; + gf_boolean_t last_entry = _gf_false; - if (op_ret < 0) - goto unwind; + local = frame->local; + fd = local->fd; - list_for_each_entry_safe (entry, tmp, (&orig_entries->list), list) { - if (last_entry) - last_entry = _gf_false; + if (op_ret < 0) + goto unwind; - if (__is_root_gfid (fd->inode->gfid) && - !(strcmp (entry->d_name, GF_SHARD_DIR))) { - local->offset = entry->d_off; - op_ret--; - last_entry = _gf_true; - continue; - } + list_for_each_entry_safe(entry, tmp, (&orig_entries->list), list) + { + if (last_entry) + last_entry = _gf_false; - list_del_init (&entry->list); - list_add_tail (&entry->list, &local->entries_head.list); + if (__is_root_gfid(fd->inode->gfid) && + !(strcmp(entry->d_name, GF_SHARD_DIR))) { + local->offset = entry->d_off; + op_ret--; + last_entry = _gf_true; + continue; + } - if (!entry->dict) - continue; + list_del_init(&entry->list); + list_add_tail(&entry->list, &local->entries_head.list); - if (IA_ISDIR (entry->d_stat.ia_type)) - continue; + if (!entry->dict) + continue; - if (dict_get (entry->dict, GF_XATTR_SHARD_FILE_SIZE) && - frame->root->pid != GF_CLIENT_PID_GSYNCD) - shard_modify_size_and_block_count (&entry->d_stat, - entry->dict); + if (IA_ISDIR(entry->d_stat.ia_type)) + continue; - if (!entry->inode) - continue; + if (dict_get(entry->dict, GF_XATTR_SHARD_FILE_SIZE) && + frame->root->pid != GF_CLIENT_PID_GSYNCD) + shard_modify_size_and_block_count(&entry->d_stat, entry->dict); - shard_inode_ctx_update (entry->inode, this, entry->dict, - &entry->d_stat); - } + if (!entry->inode) + continue; - local->op_ret = op_ret; + shard_inode_ctx_update(entry->inode, this, entry->dict, &entry->d_stat); + } - if (last_entry) { - if (local->fop == GF_FOP_READDIR) - STACK_WIND (frame, shard_readdir_past_dot_shard_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readdir, local->fd, - local->readdir_size, local->offset, - local->xattr_req); - else - STACK_WIND (frame, shard_readdir_past_dot_shard_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readdirp, - local->fd, local->readdir_size, - local->offset, local->xattr_req); - return 0; - } + local->op_ret = op_ret; -unwind: + if (last_entry) { if (local->fop == GF_FOP_READDIR) - SHARD_STACK_UNWIND (readdir, frame, op_ret, op_errno, - &local->entries_head, xdata); + STACK_WIND(frame, shard_readdir_past_dot_shard_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdir, + local->fd, local->readdir_size, local->offset, + local->xattr_req); else - SHARD_STACK_UNWIND (readdirp, frame, op_ret, op_errno, - &local->entries_head, xdata); + STACK_WIND(frame, shard_readdir_past_dot_shard_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp, + local->fd, local->readdir_size, local->offset, + local->xattr_req); return 0; -} + } +unwind: + if (local->fop == GF_FOP_READDIR) + SHARD_STACK_UNWIND(readdir, frame, op_ret, op_errno, + &local->entries_head, xdata); + else + SHARD_STACK_UNWIND(readdirp, frame, op_ret, op_errno, + &local->entries_head, xdata); + return 0; +} int -shard_readdir_do (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t offset, int whichop, dict_t *xdata) -{ - int ret = 0; - shard_local_t *local = NULL; - - local = mem_get0 (this->local_pool); - if (!local) { - local->op_ret = -1; - local->op_errno = ENOMEM; - goto err; +shard_readdir_do(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, int whichop, dict_t *xdata) +{ + int ret = 0; + shard_local_t *local = NULL; + + local = mem_get0(this->local_pool); + if (!local) { + goto err; + } + + frame->local = local; + + local->fd = fd_ref(fd); + local->fop = whichop; + local->readdir_size = size; + INIT_LIST_HEAD(&local->entries_head.list); + local->list_inited = _gf_true; + + if (whichop == GF_FOP_READDIR) { + STACK_WIND(frame, shard_readdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdir, fd, size, offset, xdata); + } else { + local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); + SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid, + local, err); + ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, + "Failed to set " + "dict value: key:%s, directory gfid=%s", + GF_XATTR_SHARD_BLOCK_SIZE, uuid_utoa(fd->inode->gfid)); + goto err; } - frame->local = local; - - local->fd = fd_ref (fd); - local->fop = whichop; - local->readdir_size = size; - INIT_LIST_HEAD (&local->entries_head.list); - local->list_inited = _gf_true; - - if (whichop == GF_FOP_READDIR) { - STACK_WIND (frame, shard_readdir_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readdir, fd, size, offset, - xdata); - } else { - local->xattr_req = (xdata) ? dict_ref (xdata) : dict_new (); - SHARD_MD_READ_FOP_INIT_REQ_DICT (this, local->xattr_req, - fd->inode->gfid, local, err); - ret = dict_set_uint64 (local->xattr_req, - GF_XATTR_SHARD_BLOCK_SIZE, 0); - if (ret) { - gf_log (this->name, GF_LOG_WARNING, "Failed to set " - "dict value: key:%s, directory gfid=%s", - GF_XATTR_SHARD_BLOCK_SIZE, - uuid_utoa (fd->inode->gfid)); - local->op_ret = -1; - local->op_errno = ENOMEM; - goto err; - } - - STACK_WIND (frame, shard_readdir_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readdirp, fd, size, offset, - local->xattr_req); - } + STACK_WIND(frame, shard_readdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdirp, fd, size, offset, + local->xattr_req); + } - return 0; + return 0; err: - STACK_UNWIND_STRICT (readdir, frame, local->op_ret, local->op_errno, - NULL, NULL); - return 0; - + STACK_UNWIND_STRICT(readdir, frame, -1, ENOMEM, NULL, NULL); + return 0; } +int32_t +shard_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, dict_t *xdata) +{ + shard_readdir_do(frame, this, fd, size, offset, GF_FOP_READDIR, xdata); + return 0; +} int32_t -shard_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, +shard_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t offset, dict_t *xdata) { - shard_readdir_do (frame, this, fd, size, offset, GF_FOP_READDIR, xdata); - return 0; + shard_readdir_do(frame, this, fd, size, offset, GF_FOP_READDIRP, xdata); + return 0; } - int32_t -shard_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t offset, dict_t *xdata) -{ - shard_readdir_do (frame, this, fd, size, offset, GF_FOP_READDIRP, - xdata); - return 0; +shard_modify_and_set_iatt_in_dict(dict_t *xdata, shard_local_t *local, + char *key) +{ + int ret = 0; + struct iatt *tmpbuf = NULL; + struct iatt *stbuf = NULL; + data_t *data = NULL; + + if (!xdata) + return 0; + + data = dict_get(xdata, key); + if (!data) + return 0; + + tmpbuf = data_to_iatt(data, key); + stbuf = GF_MALLOC(sizeof(struct iatt), gf_common_mt_char); + if (stbuf == NULL) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto err; + } + *stbuf = *tmpbuf; + stbuf->ia_size = local->prebuf.ia_size; + stbuf->ia_blocks = local->prebuf.ia_blocks; + ret = dict_set_iatt(xdata, key, stbuf, false); + if (ret < 0) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto err; + } + return 0; + +err: + GF_FREE(stbuf); + return -1; } int32_t -shard_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *name, dict_t *xdata) +shard_common_remove_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - int op_errno = EINVAL; + int ret = -1; + shard_local_t *local = NULL; - if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { - GF_IF_NATIVE_XATTR_GOTO (SHARD_XATTR_PREFIX"*", - name, op_errno, out); - } - - if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { - dict_del (xdata, GF_XATTR_SHARD_BLOCK_SIZE); - dict_del (xdata, GF_XATTR_SHARD_FILE_SIZE); - } + local = frame->local; - STACK_WIND_TAIL (frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->removexattr, loc, name, - xdata); - return 0; + if (op_ret < 0) { + local->op_ret = op_ret; + local->op_errno = op_errno; + goto err; + } + + ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_PRESTAT); + if (ret < 0) + goto err; + + ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_POSTSTAT); + if (ret < 0) + goto err; + + if (local->fd) + SHARD_STACK_UNWIND(fremovexattr, frame, local->op_ret, local->op_errno, + xdata); + else + SHARD_STACK_UNWIND(removexattr, frame, local->op_ret, local->op_errno, + xdata); + return 0; -out: - SHARD_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL); - return 0; +err: + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); + return 0; } int32_t -shard_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd, - const char *name, dict_t *xdata) +shard_post_lookup_remove_xattr_handler(call_frame_t *frame, xlator_t *this) { - int op_errno = EINVAL; - - if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { - GF_IF_NATIVE_XATTR_GOTO (SHARD_XATTR_PREFIX"*", - name, op_errno, out); - } + shard_local_t *local = NULL; - if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { - dict_del (xdata, GF_XATTR_SHARD_BLOCK_SIZE); - dict_del (xdata, GF_XATTR_SHARD_FILE_SIZE); - } + local = frame->local; - STACK_WIND_TAIL (frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fremovexattr, fd, name, - xdata); + if (local->op_ret < 0) { + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); return 0; + } -out: - SHARD_STACK_UNWIND (fremovexattr, frame, -1, op_errno, NULL); + if (local->fd) + STACK_WIND(frame, shard_common_remove_xattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fremovexattr, local->fd, + local->name, local->xattr_req); + else + STACK_WIND(frame, shard_common_remove_xattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, &local->loc, + local->name, local->xattr_req); + return 0; +} + +int32_t +shard_common_remove_xattr(call_frame_t *frame, xlator_t *this, + glusterfs_fop_t fop, loc_t *loc, fd_t *fd, + const char *name, dict_t *xdata) +{ + int ret = -1; + int op_errno = ENOMEM; + uint64_t block_size = 0; + shard_local_t *local = NULL; + inode_t *inode = loc ? loc->inode : fd->inode; + + if ((IA_ISDIR(inode->ia_type)) || (IA_ISLNK(inode->ia_type))) { + if (loc) + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, loc, name, + xdata); + else + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fremovexattr, fd, name, + xdata); return 0; + } + + /* If shard's special xattrs are attempted to be removed, + * fail the fop with EPERM (except if the client is gsyncd). + */ + if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { + GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, err); + } + + /* Repeat the same check for bulk-removexattr */ + if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { + dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE); + dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE); + } + + ret = shard_inode_ctx_get_block_size(inode, this, &block_size); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, + "Failed to get block size from inode ctx of %s", + uuid_utoa(inode->gfid)); + goto err; + } + + if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { + if (loc) + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, loc, name, + xdata); + else + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fremovexattr, fd, name, + xdata); + return 0; + } + + local = mem_get0(this->local_pool); + if (!local) + goto err; + + frame->local = local; + local->fop = fop; + if (loc) { + if (loc_copy(&local->loc, loc) != 0) + goto err; + } + + if (fd) { + local->fd = fd_ref(fd); + local->loc.inode = inode_ref(fd->inode); + gf_uuid_copy(local->loc.gfid, fd->inode->gfid); + } + + if (name) { + local->name = gf_strdup(name); + if (!local->name) + goto err; + } + + if (xdata) + local->xattr_req = dict_ref(xdata); + + shard_refresh_base_file(frame, this, loc, fd, + shard_post_lookup_remove_xattr_handler); + return 0; +err: + shard_common_failure_unwind(fop, frame, -1, op_errno); + return 0; } int32_t -shard_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict, - dict_t *xdata) +shard_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) { - if (op_ret < 0) - goto unwind; + shard_common_remove_xattr(frame, this, GF_FOP_REMOVEXATTR, loc, NULL, name, + xdata); + return 0; +} - if (dict && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { - dict_del (dict, GF_XATTR_SHARD_BLOCK_SIZE); - dict_del (dict, GF_XATTR_SHARD_FILE_SIZE); - } +int32_t +shard_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *name, dict_t *xdata) +{ + shard_common_remove_xattr(frame, this, GF_FOP_FREMOVEXATTR, NULL, fd, name, + xdata); + return 0; +} + +int32_t +shard_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata) +{ + if (op_ret < 0) + goto unwind; + + if (dict && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { + dict_del(dict, GF_XATTR_SHARD_BLOCK_SIZE); + dict_del(dict, GF_XATTR_SHARD_FILE_SIZE); + } unwind: - SHARD_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, dict, xdata); - return 0; + SHARD_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, dict, xdata); + return 0; } int32_t -shard_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, - const char *name, dict_t *xdata) +shard_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, + dict_t *xdata) { - int op_errno = EINVAL; - - if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && - (name) && (!strncmp (name, SHARD_XATTR_PREFIX, - strlen (SHARD_XATTR_PREFIX)))) { - op_errno = ENODATA; - goto out; - } + int op_errno = EINVAL; - STACK_WIND (frame, shard_fgetxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata); - return 0; + if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && (name) && + (!strncmp(name, SHARD_XATTR_PREFIX, SLEN(SHARD_XATTR_PREFIX)))) { + op_errno = ENODATA; + goto out; + } + STACK_WIND(frame, shard_fgetxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata); + return 0; out: - SHARD_STACK_UNWIND (fgetxattr, frame, -1, op_errno, NULL, NULL); - return 0; + shard_common_failure_unwind(GF_FOP_FGETXATTR, frame, -1, op_errno); + return 0; } - int32_t -shard_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict, - dict_t *xdata) +shard_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata) { - if (op_ret < 0) - goto unwind; + if (op_ret < 0) + goto unwind; - if (dict && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { - dict_del (dict, GF_XATTR_SHARD_BLOCK_SIZE); - dict_del (dict, GF_XATTR_SHARD_FILE_SIZE); - } + if (dict && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { + dict_del(dict, GF_XATTR_SHARD_BLOCK_SIZE); + dict_del(dict, GF_XATTR_SHARD_FILE_SIZE); + } unwind: - SHARD_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata); - return 0; + SHARD_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata); + return 0; } int32_t -shard_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *name, dict_t *xdata) +shard_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) { - int op_errno = EINVAL; + int op_errno = EINVAL; - if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && - (name) && (!strncmp (name, SHARD_XATTR_PREFIX, - strlen (SHARD_XATTR_PREFIX)))) { - op_errno = ENODATA; - goto out; - } - - STACK_WIND (frame, shard_getxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->getxattr, loc, name, xdata); - return 0; + if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && (name) && + (!strncmp(name, SHARD_XATTR_PREFIX, sizeof(SHARD_XATTR_PREFIX) - 1))) { + op_errno = ENODATA; + goto out; + } + STACK_WIND(frame, shard_getxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->getxattr, loc, name, xdata); + return 0; out: - SHARD_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL, NULL); - return 0; + shard_common_failure_unwind(GF_FOP_GETXATTR, frame, -1, op_errno); + return 0; } int32_t -shard_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, - int32_t flags, dict_t *xdata) +shard_common_set_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - int op_errno = EINVAL; + int ret = -1; + shard_local_t *local = NULL; - if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { - GF_IF_INTERNAL_XATTR_GOTO (SHARD_XATTR_PREFIX"*", dict, - op_errno, out); - } + local = frame->local; - STACK_WIND_TAIL (frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, - xdata); - return 0; + if (op_ret < 0) { + local->op_ret = op_ret; + local->op_errno = op_errno; + goto err; + } + + ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_PRESTAT); + if (ret < 0) + goto err; + + ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_POSTSTAT); + if (ret < 0) + goto err; + + if (local->fd) + SHARD_STACK_UNWIND(fsetxattr, frame, local->op_ret, local->op_errno, + xdata); + else + SHARD_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno, + xdata); + return 0; -out: - SHARD_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL); - return 0; +err: + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); + return 0; } int32_t -shard_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, - int32_t flags, dict_t *xdata) +shard_post_lookup_set_xattr_handler(call_frame_t *frame, xlator_t *this) { - int op_errno = EINVAL; + shard_local_t *local = NULL; - if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { - GF_IF_INTERNAL_XATTR_GOTO (SHARD_XATTR_PREFIX"*", dict, - op_errno, out); - } + local = frame->local; - STACK_WIND_TAIL (frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, - xdata); + if (local->op_ret < 0) { + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); return 0; + } -out: - SHARD_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL); + if (local->fd) + STACK_WIND(frame, shard_common_set_xattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetxattr, local->fd, + local->xattr_req, local->flags, local->xattr_rsp); + else + STACK_WIND(frame, shard_common_set_xattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, &local->loc, + local->xattr_req, local->flags, local->xattr_rsp); + return 0; +} + +int32_t +shard_common_set_xattr(call_frame_t *frame, xlator_t *this, glusterfs_fop_t fop, + loc_t *loc, fd_t *fd, dict_t *dict, int32_t flags, + dict_t *xdata) +{ + int ret = -1; + int op_errno = ENOMEM; + uint64_t block_size = 0; + shard_local_t *local = NULL; + inode_t *inode = loc ? loc->inode : fd->inode; + + if ((IA_ISDIR(inode->ia_type)) || (IA_ISLNK(inode->ia_type))) { + if (loc) + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, + xdata); + else + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, + xdata); + return 0; + } + + /* Sharded or not, if shard's special xattrs are attempted to be set, + * fail the fop with EPERM (except if the client is gsyncd. + */ + if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { + GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, err); + } + + ret = shard_inode_ctx_get_block_size(inode, this, &block_size); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, + "Failed to get block size from inode ctx of %s", + uuid_utoa(inode->gfid)); + goto err; + } + + if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { + if (loc) + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, + xdata); + else + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, + xdata); return 0; + } + + local = mem_get0(this->local_pool); + if (!local) + goto err; + + frame->local = local; + local->fop = fop; + if (loc) { + if (loc_copy(&local->loc, loc) != 0) + goto err; + } + + if (fd) { + local->fd = fd_ref(fd); + local->loc.inode = inode_ref(fd->inode); + gf_uuid_copy(local->loc.gfid, fd->inode->gfid); + } + local->flags = flags; + /* Reusing local->xattr_req and local->xattr_rsp to store the setxattr dict + * and the xdata dict + */ + if (dict) + local->xattr_req = dict_ref(dict); + if (xdata) + local->xattr_rsp = dict_ref(xdata); + + shard_refresh_base_file(frame, this, loc, fd, + shard_post_lookup_set_xattr_handler); + return 0; +err: + shard_common_failure_unwind(fop, frame, -1, op_errno); + return 0; +} + +int32_t +shard_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, + int32_t flags, dict_t *xdata) +{ + shard_common_set_xattr(frame, this, GF_FOP_FSETXATTR, NULL, fd, dict, flags, + xdata); + return 0; +} + +int32_t +shard_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + int32_t flags, dict_t *xdata) +{ + shard_common_set_xattr(frame, this, GF_FOP_SETXATTR, loc, NULL, dict, flags, + xdata); + return 0; } int -shard_post_setattr_handler (call_frame_t *frame, xlator_t *this) +shard_post_setattr_handler(call_frame_t *frame, xlator_t *this) { - shard_local_t *local = NULL; + shard_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (local->fop == GF_FOP_SETATTR) { - if (local->op_ret >= 0) - shard_inode_ctx_set (local->loc.inode, this, - &local->postbuf, 0, - SHARD_LOOKUP_MASK); - SHARD_STACK_UNWIND (setattr, frame, local->op_ret, - local->op_errno, &local->prebuf, - &local->postbuf, local->xattr_rsp); - } else if (local->fop == GF_FOP_FSETATTR) { - if (local->op_ret >= 0) - shard_inode_ctx_set (local->fd->inode, this, - &local->postbuf, 0, - SHARD_LOOKUP_MASK); - SHARD_STACK_UNWIND (fsetattr, frame, local->op_ret, - local->op_errno, &local->prebuf, - &local->postbuf, local->xattr_rsp); - } + if (local->fop == GF_FOP_SETATTR) { + if (local->op_ret >= 0) + shard_inode_ctx_set(local->loc.inode, this, &local->postbuf, 0, + SHARD_LOOKUP_MASK); + SHARD_STACK_UNWIND(setattr, frame, local->op_ret, local->op_errno, + &local->prebuf, &local->postbuf, local->xattr_rsp); + } else if (local->fop == GF_FOP_FSETATTR) { + if (local->op_ret >= 0) + shard_inode_ctx_set(local->fd->inode, this, &local->postbuf, 0, + SHARD_LOOKUP_MASK); + SHARD_STACK_UNWIND(fsetattr, frame, local->op_ret, local->op_errno, + &local->prebuf, &local->postbuf, local->xattr_rsp); + } - return 0; + return 0; } int -shard_common_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) +shard_common_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - shard_local_t *local = NULL; + shard_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (op_ret < 0) { - local->op_ret = op_ret; - local->op_errno = op_errno; - goto unwind; - } - - local->prebuf = *prebuf; - if (shard_modify_size_and_block_count (&local->prebuf, xdata)) { - local->op_ret = -1; - local->op_errno = EINVAL; - goto unwind; - } - if (xdata) - local->xattr_rsp = dict_ref (xdata); - local->postbuf = *postbuf; - local->postbuf.ia_size = local->prebuf.ia_size; - local->postbuf.ia_blocks = local->prebuf.ia_blocks; + if (op_ret < 0) { + local->op_ret = op_ret; + local->op_errno = op_errno; + goto unwind; + } + + local->prebuf = *prebuf; + if (shard_modify_size_and_block_count(&local->prebuf, xdata)) { + local->op_ret = -1; + local->op_errno = EINVAL; + goto unwind; + } + if (xdata) + local->xattr_rsp = dict_ref(xdata); + local->postbuf = *postbuf; + local->postbuf.ia_size = local->prebuf.ia_size; + local->postbuf.ia_blocks = local->prebuf.ia_blocks; unwind: - local->handler (frame, this); - return 0; + local->handler(frame, this); + return 0; } int -shard_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - struct iatt *stbuf, int32_t valid, dict_t *xdata) +shard_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid, dict_t *xdata) { - int ret = -1; - uint64_t block_size = 0; - shard_local_t *local = NULL; + int ret = -1; + uint64_t block_size = 0; + shard_local_t *local = NULL; - if ((IA_ISDIR (loc->inode->ia_type)) || - (IA_ISLNK (loc->inode->ia_type))) { - STACK_WIND (frame, default_setattr_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->setattr, loc, stbuf, - valid, xdata); - return 0; - } - - ret = shard_inode_ctx_get_block_size (loc->inode, this, &block_size); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - SHARD_MSG_INODE_CTX_GET_FAILED, - "Failed to get block size from inode ctx of %s", - uuid_utoa (loc->inode->gfid)); - goto err; - } - - if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { - STACK_WIND (frame, default_setattr_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->setattr, loc, stbuf, - valid, xdata); - return 0; - } + if ((IA_ISDIR(loc->inode->ia_type)) || (IA_ISLNK(loc->inode->ia_type))) { + STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata); + return 0; + } - local = mem_get0 (this->local_pool); - if (!local) - goto err; + ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, + "Failed to get block size from inode ctx of %s", + uuid_utoa(loc->inode->gfid)); + goto err; + } - frame->local = local; + if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { + STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata); + return 0; + } - local->handler = shard_post_setattr_handler; - local->xattr_req = (xdata) ? dict_ref (xdata) : dict_new (); - if (!local->xattr_req) - goto err; - local->fop = GF_FOP_SETATTR; - loc_copy (&local->loc, loc); + local = mem_get0(this->local_pool); + if (!local) + goto err; - SHARD_MD_READ_FOP_INIT_REQ_DICT (this, local->xattr_req, - local->loc.gfid, local, err); + frame->local = local; - STACK_WIND (frame, shard_common_setattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, - local->xattr_req); + local->handler = shard_post_setattr_handler; + local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); + if (!local->xattr_req) + goto err; + local->fop = GF_FOP_SETATTR; + loc_copy(&local->loc, loc); - return 0; + SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, local->loc.gfid, + local, err); + STACK_WIND(frame, shard_common_setattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, + local->xattr_req); + return 0; err: - SHARD_STACK_UNWIND (setattr, frame, -1, ENOMEM, NULL, NULL, NULL); - return 0; + shard_common_failure_unwind(GF_FOP_SETATTR, frame, -1, ENOMEM); + return 0; } int -shard_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iatt *stbuf, int32_t valid, dict_t *xdata) +shard_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iatt *stbuf, int32_t valid, dict_t *xdata) { - int ret = -1; - uint64_t block_size = 0; - shard_local_t *local = NULL; - - if ((IA_ISDIR (fd->inode->ia_type)) || - (IA_ISLNK (fd->inode->ia_type))) { - STACK_WIND (frame, default_fsetattr_cbk, FIRST_CHILD(this), - FIRST_CHILD (this)->fops->fsetattr, fd, stbuf, - valid, xdata); - return 0; - } + int ret = -1; + uint64_t block_size = 0; + shard_local_t *local = NULL; - ret = shard_inode_ctx_get_block_size (fd->inode, this, &block_size); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - SHARD_MSG_INODE_CTX_GET_FAILED, - "Failed to get block size from inode ctx of %s", - uuid_utoa (fd->inode->gfid)); - goto err; - } + if ((IA_ISDIR(fd->inode->ia_type)) || (IA_ISLNK(fd->inode->ia_type))) { + STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata); + return 0; + } - if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { - STACK_WIND (frame, default_fsetattr_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fsetattr, fd, stbuf, - valid, xdata); - return 0; - } + ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, + "Failed to get block size from inode ctx of %s", + uuid_utoa(fd->inode->gfid)); + goto err; + } - if (!this->itable) - this->itable = fd->inode->table; + if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { + STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata); + return 0; + } - local = mem_get0 (this->local_pool); - if (!local) - goto err; + if (!this->itable) + this->itable = fd->inode->table; - frame->local = local; + local = mem_get0(this->local_pool); + if (!local) + goto err; - local->handler = shard_post_setattr_handler; - local->xattr_req = (xdata) ? dict_ref (xdata) : dict_new (); - if (!local->xattr_req) - goto err; - local->fop = GF_FOP_FSETATTR; - local->fd = fd_ref (fd); + frame->local = local; - SHARD_MD_READ_FOP_INIT_REQ_DICT (this, local->xattr_req, - fd->inode->gfid, local, err); + local->handler = shard_post_setattr_handler; + local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); + if (!local->xattr_req) + goto err; + local->fop = GF_FOP_FSETATTR; + local->fd = fd_ref(fd); - STACK_WIND (frame, shard_common_setattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, - local->xattr_req); - return 0; + SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid, + local, err); + STACK_WIND(frame, shard_common_setattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, + local->xattr_req); + return 0; err: - SHARD_STACK_UNWIND (fsetattr, frame, -1, ENOMEM, NULL, NULL, NULL); - return 0; + shard_common_failure_unwind(GF_FOP_FSETATTR, frame, -1, ENOMEM); + return 0; } int -shard_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd, - int32_t keep_size, off_t offset, size_t len, dict_t *xdata) -{ - /* TBD */ - gf_msg (this->name, GF_LOG_INFO, ENOTSUP, SHARD_MSG_FOP_NOT_SUPPORTED, - "fallocate called on %s.", uuid_utoa (fd->inode->gfid)); - SHARD_STACK_UNWIND (fallocate, frame, -1, ENOTSUP, NULL, NULL, NULL); - return 0; +shard_common_inode_write_begin(call_frame_t *frame, xlator_t *this, + glusterfs_fop_t fop, fd_t *fd, + struct iovec *vector, int32_t count, + off_t offset, uint32_t flags, size_t len, + struct iobref *iobref, dict_t *xdata) +{ + int ret = 0; + int i = 0; + uint64_t block_size = 0; + shard_local_t *local = NULL; + + ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, + "Failed to get block " + "size for %s from its inode ctx", + uuid_utoa(fd->inode->gfid)); + goto out; + } + + if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { + /* block_size = 0 means that the file was created before + * sharding was enabled on the volume. + */ + switch (fop) { + case GF_FOP_WRITE: + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->writev, fd, vector, + count, offset, flags, iobref, xdata); + break; + case GF_FOP_FALLOCATE: + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fallocate, fd, flags, + offset, len, xdata); + break; + case GF_FOP_ZEROFILL: + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->zerofill, fd, offset, + len, xdata); + break; + case GF_FOP_DISCARD: + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->discard, fd, offset, + len, xdata); + break; + default: + gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, + "Invalid fop id = %d", fop); + break; + } + return 0; + } + + if (!this->itable) + this->itable = fd->inode->table; + + local = mem_get0(this->local_pool); + if (!local) + goto out; + + frame->local = local; + + ret = syncbarrier_init(&local->barrier); + if (ret) + goto out; + local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); + if (!local->xattr_req) + goto out; + + if (vector) { + local->vector = iov_dup(vector, count); + if (!local->vector) + goto out; + for (i = 0; i < count; i++) + local->total_size += vector[i].iov_len; + local->count = count; + } else { + local->total_size = len; + } + + local->fop = fop; + local->offset = offset; + local->flags = flags; + if (iobref) + local->iobref = iobref_ref(iobref); + local->fd = fd_ref(fd); + local->block_size = block_size; + local->resolver_base_inode = local->fd->inode; + GF_ATOMIC_INIT(local->delta_blocks, 0); + + local->loc.inode = inode_ref(fd->inode); + gf_uuid_copy(local->loc.gfid, fd->inode->gfid); + + shard_refresh_base_file(frame, this, NULL, fd, + shard_common_inode_write_post_lookup_handler); + return 0; +out: + shard_common_failure_unwind(fop, frame, -1, ENOMEM); + return 0; } int -shard_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, - size_t len, dict_t *xdata) +shard_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iovec *vector, int32_t count, off_t offset, uint32_t flags, + struct iobref *iobref, dict_t *xdata) { - /* TBD */ - gf_msg (this->name, GF_LOG_INFO, ENOTSUP, SHARD_MSG_FOP_NOT_SUPPORTED, - "discard called on %s.", uuid_utoa (fd->inode->gfid)); - SHARD_STACK_UNWIND (discard, frame, -1, ENOTSUP, NULL, NULL, NULL); - return 0; + shard_common_inode_write_begin(frame, this, GF_FOP_WRITE, fd, vector, count, + offset, flags, 0, iobref, xdata); + return 0; } int -shard_zerofill (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, - off_t len, dict_t *xdata) +shard_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, + int32_t keep_size, off_t offset, size_t len, dict_t *xdata) { - /* TBD */ - gf_msg (this->name, GF_LOG_INFO, ENOTSUP, SHARD_MSG_FOP_NOT_SUPPORTED, - "zerofill called on %s.", uuid_utoa (fd->inode->gfid)); - SHARD_STACK_UNWIND (zerofill, frame, -1, ENOTSUP, NULL, NULL, NULL); - return 0; + if ((keep_size != 0) && (keep_size != FALLOC_FL_ZERO_RANGE) && + (keep_size != (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))) + goto out; + + shard_common_inode_write_begin(frame, this, GF_FOP_FALLOCATE, fd, NULL, 0, + offset, keep_size, len, NULL, xdata); + return 0; +out: + shard_common_failure_unwind(GF_FOP_FALLOCATE, frame, -1, ENOTSUP); + return 0; } -int32_t -mem_acct_init (xlator_t *this) +int +shard_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + off_t len, dict_t *xdata) { - int ret = -1; - - if (!this) - return ret; - - ret = xlator_mem_acct_init (this, gf_shard_mt_end + 1); - - if (ret != 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - SHARD_MSG_MEM_ACCT_INIT_FAILED, "Memory accounting init" - "failed"); - return ret; - } - - return ret; + shard_common_inode_write_begin(frame, this, GF_FOP_ZEROFILL, fd, NULL, 0, + offset, 0, len, NULL, xdata); + return 0; } int -init (xlator_t *this) +shard_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) { - int ret = -1; - shard_priv_t *priv = NULL; + shard_common_inode_write_begin(frame, this, GF_FOP_DISCARD, fd, NULL, 0, + offset, 0, len, NULL, xdata); + return 0; +} - if (!this) { - gf_msg ("shard", GF_LOG_ERROR, 0, SHARD_MSG_NULL_THIS, - "this is NULL. init() failed"); - goto out; - } +int32_t +shard_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + gf_seek_what_t what, dict_t *xdata) +{ + /* TBD */ + gf_msg(this->name, GF_LOG_INFO, ENOTSUP, SHARD_MSG_FOP_NOT_SUPPORTED, + "seek called on %s.", uuid_utoa(fd->inode->gfid)); + shard_common_failure_unwind(GF_FOP_SEEK, frame, -1, ENOTSUP); + return 0; +} - if (!this->parents) { - gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_INVALID_VOLFILE, - "Dangling volume. Check volfile"); - goto out; - } +int32_t +mem_acct_init(xlator_t *this) +{ + int ret = -1; - if (!this->children || this->children->next) { - gf_msg (this->name, GF_LOG_ERROR, 0, SHARD_MSG_INVALID_VOLFILE, - "shard not configured with exactly one sub-volume. " - "Check volfile"); - goto out; - } + if (!this) + return ret; - priv = GF_CALLOC (1, sizeof (shard_priv_t), gf_shard_mt_priv_t); - if (!priv) - goto out; + ret = xlator_mem_acct_init(this, gf_shard_mt_end + 1); - GF_OPTION_INIT ("shard-block-size", priv->block_size, size_uint64, out); + if (ret != 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_MEM_ACCT_INIT_FAILED, + "Memory accounting init" + "failed"); + return ret; + } - this->local_pool = mem_pool_new (shard_local_t, 128); - if (!this->local_pool) { - ret = -1; - goto out; - } - gf_uuid_parse (SHARD_ROOT_GFID, priv->dot_shard_gfid); + return ret; +} - this->private = priv; - LOCK_INIT (&priv->lock); - INIT_LIST_HEAD (&priv->ilist_head); - ret = 0; +int +init(xlator_t *this) +{ + int ret = -1; + shard_priv_t *priv = NULL; + + if (!this) { + gf_msg("shard", GF_LOG_ERROR, 0, SHARD_MSG_NULL_THIS, + "this is NULL. init() failed"); + return -1; + } + + if (!this->parents) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INVALID_VOLFILE, + "Dangling volume. Check volfile"); + goto out; + } + + if (!this->children || this->children->next) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INVALID_VOLFILE, + "shard not configured with exactly one sub-volume. " + "Check volfile"); + goto out; + } + + priv = GF_CALLOC(1, sizeof(shard_priv_t), gf_shard_mt_priv_t); + if (!priv) + goto out; + + GF_OPTION_INIT("shard-block-size", priv->block_size, size_uint64, out); + + GF_OPTION_INIT("shard-deletion-rate", priv->deletion_rate, uint32, out); + + GF_OPTION_INIT("shard-lru-limit", priv->lru_limit, uint64, out); + + this->local_pool = mem_pool_new(shard_local_t, 128); + if (!this->local_pool) { + ret = -1; + goto out; + } + gf_uuid_parse(SHARD_ROOT_GFID, priv->dot_shard_gfid); + gf_uuid_parse(DOT_SHARD_REMOVE_ME_GFID, priv->dot_shard_rm_gfid); + + this->private = priv; + LOCK_INIT(&priv->lock); + INIT_LIST_HEAD(&priv->ilist_head); + ret = 0; out: - if (ret) { - GF_FREE (priv); - mem_pool_destroy (this->local_pool); - } - - return ret; + if (ret) { + GF_FREE(priv); + mem_pool_destroy(this->local_pool); + } + return ret; } void -fini (xlator_t *this) +fini(xlator_t *this) { - shard_priv_t *priv = NULL; + shard_priv_t *priv = NULL; + + GF_VALIDATE_OR_GOTO("shard", this, out); - GF_VALIDATE_OR_GOTO ("shard", this, out); + /*Itable was not created by shard, hence setting to NULL.*/ + this->itable = NULL; - mem_pool_destroy (this->local_pool); - this->local_pool = NULL; + mem_pool_destroy(this->local_pool); + this->local_pool = NULL; - priv = this->private; - if (!priv) - goto out; + priv = this->private; + if (!priv) + goto out; - this->private = NULL; - LOCK_DESTROY (&priv->lock); - GF_FREE (priv); + this->private = NULL; + LOCK_DESTROY(&priv->lock); + GF_FREE(priv); out: - return; + return; } int -reconfigure (xlator_t *this, dict_t *options) +reconfigure(xlator_t *this, dict_t *options) { - int ret = -1; - shard_priv_t *priv = NULL; + int ret = -1; + shard_priv_t *priv = NULL; - priv = this->private; + priv = this->private; - GF_OPTION_RECONF ("shard-block-size", priv->block_size, options, size, - out); + GF_OPTION_RECONF("shard-block-size", priv->block_size, options, size, out); - ret = 0; + GF_OPTION_RECONF("shard-deletion-rate", priv->deletion_rate, options, + uint32, out); + ret = 0; out: - return ret; + return ret; } int -shard_forget (xlator_t *this, inode_t *inode) +shard_forget(xlator_t *this, inode_t *inode) { - uint64_t ctx_uint = 0; - shard_inode_ctx_t *ctx = NULL; + uint64_t ctx_uint = 0; + shard_inode_ctx_t *ctx = NULL; + shard_priv_t *priv = NULL; - inode_ctx_del (inode, this, &ctx_uint); - if (!ctx_uint) - return 0; + priv = this->private; + if (!priv) + return 0; - ctx = (shard_inode_ctx_t *)ctx_uint; + inode_ctx_del(inode, this, &ctx_uint); + if (!ctx_uint) + return 0; - GF_FREE (ctx); + ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; - return 0; + /* When LRU limit reaches inode will be forcefully removed from the + * table, inode needs to be removed from LRU of shard as well. + */ + if (!list_empty(&ctx->ilist)) { + LOCK(&priv->lock); + { + list_del_init(&ctx->ilist); + priv->inode_count--; + } + UNLOCK(&priv->lock); + } + GF_FREE(ctx); + + return 0; } int -shard_release (xlator_t *this, fd_t *fd) +shard_release(xlator_t *this, fd_t *fd) { - /* TBD */ - return 0; + /* TBD */ + return 0; } int -shard_priv_dump (xlator_t *this) +shard_priv_dump(xlator_t *this) { - shard_priv_t *priv = NULL; - char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0,}; + shard_priv_t *priv = NULL; + char key_prefix[GF_DUMP_MAX_BUF_LEN] = { + 0, + }; + char *str = NULL; - priv = this->private; + priv = this->private; - snprintf (key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, - this->name); - gf_proc_dump_add_section (key_prefix); - gf_proc_dump_write ("shard-block-size", "%s", - gf_uint64_2human_readable (priv->block_size)); - gf_proc_dump_write ("inode-count", "%d", priv->inode_count); - gf_proc_dump_write ("ilist_head", "%p", &priv->ilist_head); - gf_proc_dump_write ("lru-max-limit", "%d", SHARD_MAX_INODES); + snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name); + gf_proc_dump_add_section("%s", key_prefix); + str = gf_uint64_2human_readable(priv->block_size); + gf_proc_dump_write("shard-block-size", "%s", str); + gf_proc_dump_write("inode-count", "%d", priv->inode_count); + gf_proc_dump_write("ilist_head", "%p", &priv->ilist_head); + gf_proc_dump_write("lru-max-limit", "%" PRIu64, priv->lru_limit); - return 0; + GF_FREE(str); + + return 0; } int -shard_releasedir (xlator_t *this, fd_t *fd) +shard_releasedir(xlator_t *this, fd_t *fd) { - return 0; + return 0; } struct xlator_fops fops = { - .lookup = shard_lookup, - .open = shard_open, - .flush = shard_flush, - .fsync = shard_fsync, - .stat = shard_stat, - .fstat = shard_fstat, - .getxattr = shard_getxattr, - .fgetxattr = shard_fgetxattr, - .readv = shard_readv, - .writev = shard_writev, - .truncate = shard_truncate, - .ftruncate = shard_ftruncate, - .setxattr = shard_setxattr, - .fsetxattr = shard_fsetxattr, - .setattr = shard_setattr, - .fsetattr = shard_fsetattr, - .removexattr = shard_removexattr, - .fremovexattr = shard_fremovexattr, - .fallocate = shard_fallocate, - .discard = shard_discard, - .zerofill = shard_zerofill, - .readdir = shard_readdir, - .readdirp = shard_readdirp, - .create = shard_create, - .mknod = shard_mknod, - .link = shard_link, - .unlink = shard_unlink, - .rename = shard_rename, + .lookup = shard_lookup, + .open = shard_open, + .flush = shard_flush, + .fsync = shard_fsync, + .stat = shard_stat, + .fstat = shard_fstat, + .getxattr = shard_getxattr, + .fgetxattr = shard_fgetxattr, + .readv = shard_readv, + .writev = shard_writev, + .truncate = shard_truncate, + .ftruncate = shard_ftruncate, + .setxattr = shard_setxattr, + .fsetxattr = shard_fsetxattr, + .setattr = shard_setattr, + .fsetattr = shard_fsetattr, + .removexattr = shard_removexattr, + .fremovexattr = shard_fremovexattr, + .fallocate = shard_fallocate, + .discard = shard_discard, + .zerofill = shard_zerofill, + .readdir = shard_readdir, + .readdirp = shard_readdirp, + .create = shard_create, + .mknod = shard_mknod, + .link = shard_link, + .unlink = shard_unlink, + .rename = shard_rename, + .seek = shard_seek, }; struct xlator_cbks cbks = { - .forget = shard_forget, - .release = shard_release, - .releasedir = shard_releasedir, + .forget = shard_forget, + .release = shard_release, + .releasedir = shard_releasedir, }; struct xlator_dumpops dumpops = { - .priv = shard_priv_dump, + .priv = shard_priv_dump, }; struct volume_options options[] = { - { .key = {"shard-block-size"}, - .type = GF_OPTION_TYPE_SIZET, - .default_value = "4MB", - .min = SHARD_MIN_BLOCK_SIZE, - .max = SHARD_MAX_BLOCK_SIZE, - .description = "The size unit used to break a file into multiple " - "chunks", - }, - { .key = {NULL} }, + { + .key = {"shard"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "enable/disable shard", + .op_version = {GD_OP_VERSION_6_0}, + .flags = OPT_FLAG_SETTABLE, + }, + { + .key = {"shard-block-size"}, + .type = GF_OPTION_TYPE_SIZET, + .op_version = {GD_OP_VERSION_3_7_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC, + .tags = {"shard"}, + .default_value = "64MB", + .min = SHARD_MIN_BLOCK_SIZE, + .max = SHARD_MAX_BLOCK_SIZE, + .description = "The size unit used to break a file into multiple " + "chunks", + }, + { + .key = {"shard-deletion-rate"}, + .type = GF_OPTION_TYPE_INT, + .op_version = {GD_OP_VERSION_5_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC, + .tags = {"shard"}, + .default_value = "100", + .min = 100, + .max = INT_MAX, + .description = "The number of shards to send deletes on at a time", + }, + { + .key = {"shard-lru-limit"}, + .type = GF_OPTION_TYPE_INT, + .op_version = {GD_OP_VERSION_5_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT, + .tags = {"shard"}, + .default_value = "16384", + .min = 20, + .max = INT_MAX, + .description = "The number of resolved shard inodes to keep in " + "memory. A higher number means shards that are " + "resolved will remain in memory longer, avoiding " + "frequent lookups on them when they participate in " + "file operations. The option also has a bearing on " + "amount of memory consumed by these inodes and their " + "internal metadata", + }, + {.key = {NULL}}, +}; + +xlator_api_t xlator_api = { + .init = init, + .fini = fini, + .reconfigure = reconfigure, + .mem_acct_init = mem_acct_init, + .op_version = {1}, /* Present from the initial version */ + .dumpops = &dumpops, + .fops = &fops, + .cbks = &cbks, + .options = options, + .identifier = "shard", + .category = GF_MAINTAINED, }; diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h index ecb63dd157b..4fe181b64d5 100644 --- a/xlators/features/shard/src/shard.h +++ b/xlators/features/shard/src/shard.h @@ -8,267 +8,341 @@ cases as published by the Free Software Foundation. */ - #ifndef __SHARD_H__ #define __SHARD_H__ -#include "xlator.h" -#include "compat-errno.h" +#include <glusterfs/xlator.h> +#include <glusterfs/compat-errno.h> #include "shard-messages.h" +#include <glusterfs/syncop.h> #define GF_SHARD_DIR ".shard" -#define SHARD_MIN_BLOCK_SIZE (4 * GF_UNIT_MB) -#define SHARD_MAX_BLOCK_SIZE (4 * GF_UNIT_TB) +#define GF_SHARD_REMOVE_ME_DIR ".remove_me" +#define SHARD_MIN_BLOCK_SIZE (4 * GF_UNIT_MB) +#define SHARD_MAX_BLOCK_SIZE (4 * GF_UNIT_TB) #define SHARD_XATTR_PREFIX "trusted.glusterfs.shard." #define GF_XATTR_SHARD_BLOCK_SIZE "trusted.glusterfs.shard.block-size" -#define GF_XATTR_SHARD_FILE_SIZE "trusted.glusterfs.shard.file-size" -#define SHARD_ROOT_GFID "be318638-e8a0-4c6d-977d-7a937aa84806" -#define SHARD_INODE_LRU_LIMIT 4096 -#define SHARD_MAX_INODES 16384 /** * Bit masks for the valid flag, which is used while updating ctx -**/ -#define SHARD_MASK_BLOCK_SIZE (1 << 0) -#define SHARD_MASK_PROT (1 << 1) -#define SHARD_MASK_NLINK (1 << 2) -#define SHARD_MASK_UID (1 << 3) -#define SHARD_MASK_GID (1 << 4) -#define SHARD_MASK_SIZE (1 << 6) -#define SHARD_MASK_BLOCKS (1 << 7) -#define SHARD_MASK_TIMES (1 << 8) -#define SHARD_MASK_OTHERS (1 << 9) -#define SHARD_MASK_REFRESH_RESET (1 << 10) - -#define SHARD_INODE_WRITE_MASK (SHARD_MASK_SIZE | SHARD_MASK_BLOCKS \ - | SHARD_MASK_TIMES) + **/ +#define SHARD_MASK_BLOCK_SIZE (1 << 0) +#define SHARD_MASK_PROT (1 << 1) +#define SHARD_MASK_NLINK (1 << 2) +#define SHARD_MASK_UID (1 << 3) +#define SHARD_MASK_GID (1 << 4) +#define SHARD_MASK_SIZE (1 << 6) +#define SHARD_MASK_BLOCKS (1 << 7) +#define SHARD_MASK_TIMES (1 << 8) +#define SHARD_MASK_OTHERS (1 << 9) +#define SHARD_MASK_REFRESH_RESET (1 << 10) -#define SHARD_LOOKUP_MASK (SHARD_MASK_PROT | SHARD_MASK_NLINK | SHARD_MASK_UID \ - | SHARD_MASK_GID | SHARD_MASK_TIMES \ - | SHARD_MASK_OTHERS) +#define SHARD_INODE_WRITE_MASK \ + (SHARD_MASK_SIZE | SHARD_MASK_BLOCKS | SHARD_MASK_TIMES) -#define SHARD_ALL_MASK (SHARD_MASK_BLOCK_SIZE | SHARD_MASK_PROT \ - | SHARD_MASK_NLINK | SHARD_MASK_UID | SHARD_MASK_GID \ - | SHARD_MASK_SIZE | SHARD_MASK_BLOCKS \ - | SHARD_MASK_TIMES | SHARD_MASK_OTHERS) +#define SHARD_LOOKUP_MASK \ + (SHARD_MASK_PROT | SHARD_MASK_NLINK | SHARD_MASK_UID | SHARD_MASK_GID | \ + SHARD_MASK_TIMES | SHARD_MASK_OTHERS) +#define SHARD_ALL_MASK \ + (SHARD_MASK_BLOCK_SIZE | SHARD_MASK_PROT | SHARD_MASK_NLINK | \ + SHARD_MASK_UID | SHARD_MASK_GID | SHARD_MASK_SIZE | SHARD_MASK_BLOCKS | \ + SHARD_MASK_TIMES | SHARD_MASK_OTHERS) #define get_lowest_block(off, shard_size) ((off) / (shard_size)) -#define get_highest_block(off, len, shard_size) \ - (((((off)+(len)) == 0)?0:((off)+(len)-1)) / (shard_size)) +#define get_highest_block(off, len, shard_size) \ + (((((off) + (len)) == 0) ? 0 : ((off) + (len)-1)) / (shard_size)) + +int +shard_unlock_inodelk(call_frame_t *frame, xlator_t *this); + +int +shard_unlock_entrylk(call_frame_t *frame, xlator_t *this); -#define SHARD_ENTRY_FOP_CHECK(loc, op_errno, label) do { \ - if ((loc->name && !strcmp (GF_SHARD_DIR, loc->name)) && \ - (((loc->parent) && \ - __is_root_gfid (loc->parent->gfid)) || \ - __is_root_gfid (loc->pargfid))) { \ - op_errno = EPERM; \ - goto label; \ - } \ - \ - if ((loc->parent && \ - __is_shard_dir (loc->parent->gfid)) || \ - __is_shard_dir (loc->pargfid)) { \ - op_errno = EPERM; \ - goto label; \ - } \ -} while (0) +#define SHARD_ENTRY_FOP_CHECK(loc, op_errno, label) \ + do { \ + if ((loc->name && !strcmp(GF_SHARD_DIR, loc->name)) && \ + (((loc->parent) && __is_root_gfid(loc->parent->gfid)) || \ + __is_root_gfid(loc->pargfid))) { \ + op_errno = EPERM; \ + goto label; \ + } \ + \ + if ((loc->parent && __is_shard_dir(loc->parent->gfid)) || \ + __is_shard_dir(loc->pargfid)) { \ + op_errno = EPERM; \ + goto label; \ + } \ + } while (0) -#define SHARD_INODE_OP_CHECK(gfid, err, label) do { \ - if (__is_shard_dir(gfid)) { \ - err = EPERM; \ - goto label; \ - } \ -} while (0) +#define SHARD_INODE_OP_CHECK(gfid, err, label) \ + do { \ + if (__is_shard_dir(gfid)) { \ + err = EPERM; \ + goto label; \ + } \ + } while (0) -#define SHARD_STACK_UNWIND(fop, frame, params ...) do { \ - shard_local_t *__local = NULL; \ - if (frame) { \ - __local = frame->local; \ - frame->local = NULL; \ - } \ - STACK_UNWIND_STRICT (fop, frame, params); \ - if (__local) { \ - shard_local_wipe (__local); \ - mem_put (__local); \ - } \ -} while (0) +#define SHARD_STACK_UNWIND(fop, frame, params...) \ + do { \ + shard_local_t *__local = NULL; \ + if (frame) { \ + __local = frame->local; \ + if (__local && __local->int_inodelk.acquired_lock) \ + shard_unlock_inodelk(frame, frame->this); \ + if (__local && __local->int_entrylk.acquired_lock) \ + shard_unlock_entrylk(frame, frame->this); \ + frame->local = NULL; \ + } \ + STACK_UNWIND_STRICT(fop, frame, params); \ + if (__local) { \ + shard_local_wipe(__local); \ + mem_put(__local); \ + } \ + } while (0) +#define SHARD_STACK_DESTROY(frame) \ + do { \ + shard_local_t *__local = NULL; \ + __local = frame->local; \ + frame->local = NULL; \ + STACK_DESTROY(frame->root); \ + if (__local) { \ + shard_local_wipe(__local); \ + mem_put(__local); \ + } \ + } while (0); -#define SHARD_INODE_CREATE_INIT(this, local, xattr_req, loc, label) do { \ - int __ret = -1; \ - int64_t *__size_attr = NULL; \ - shard_priv_t *__priv = NULL; \ - \ - __priv = this->private; \ - \ - local->block_size = hton64 (__priv->block_size); \ - __ret = dict_set_static_bin (xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, \ - &local->block_size, \ - sizeof (local->block_size)); \ - if (__ret) { \ - gf_msg (this->name, GF_LOG_WARNING, 0, \ - SHARD_MSG_DICT_SET_FAILED, "Failed to set key: %s " \ - "on path %s", GF_XATTR_SHARD_BLOCK_SIZE, loc->path); \ - goto label; \ - } \ - \ - __ret = shard_set_size_attrs (0, 0, &__size_attr); \ - if (__ret) \ - goto label; \ - \ - __ret = dict_set_bin (xattr_req, GF_XATTR_SHARD_FILE_SIZE, \ - __size_attr, 8 * 4); \ - if (__ret) { \ - gf_msg (this->name, GF_LOG_WARNING, 0, \ - SHARD_MSG_DICT_SET_FAILED, "Failed to set key: %s " \ - "on path %s", GF_XATTR_SHARD_FILE_SIZE, loc->path); \ - GF_FREE (__size_attr); \ - goto label; \ - } \ -} while (0) +#define SHARD_INODE_CREATE_INIT(this, block_size, xattr_req, loc, size, \ + block_count, label) \ + do { \ + int __ret = -1; \ + int64_t *__size_attr = NULL; \ + uint64_t *__bs = 0; \ + \ + __bs = GF_MALLOC(sizeof(uint64_t), gf_shard_mt_uint64_t); \ + if (!__bs) \ + goto label; \ + *__bs = hton64(block_size); \ + __ret = dict_set_bin(xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, __bs, \ + sizeof(*__bs)); \ + if (__ret) { \ + gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, \ + "Failed to set key: %s " \ + "on path %s", \ + GF_XATTR_SHARD_BLOCK_SIZE, (loc)->path); \ + GF_FREE(__bs); \ + goto label; \ + } \ + \ + __ret = shard_set_size_attrs(size, block_count, &__size_attr); \ + if (__ret) \ + goto label; \ + \ + __ret = dict_set_bin(xattr_req, GF_XATTR_SHARD_FILE_SIZE, __size_attr, \ + 8 * 4); \ + if (__ret) { \ + gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, \ + "Failed to set key: %s " \ + "on path %s", \ + GF_XATTR_SHARD_FILE_SIZE, (loc)->path); \ + GF_FREE(__size_attr); \ + goto label; \ + } \ + } while (0) +#define SHARD_MD_READ_FOP_INIT_REQ_DICT(this, dict, gfid, local, label) \ + do { \ + int __ret = -1; \ + \ + __ret = dict_set_uint64(dict, GF_XATTR_SHARD_FILE_SIZE, 8 * 4); \ + if (__ret) { \ + local->op_ret = -1; \ + local->op_errno = ENOMEM; \ + gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, \ + "Failed to set dict value:" \ + " key:%s for %s.", \ + GF_XATTR_SHARD_FILE_SIZE, uuid_utoa(gfid)); \ + goto label; \ + } \ + } while (0) -#define SHARD_MD_READ_FOP_INIT_REQ_DICT(this, dict, gfid, local, label) do { \ - int __ret = -1; \ - \ - __ret = dict_set_uint64 (dict, GF_XATTR_SHARD_FILE_SIZE, 8 * 4); \ - if (__ret) { \ - local->op_ret = -1; \ - local->op_errno = ENOMEM; \ - gf_msg (this->name, GF_LOG_WARNING, 0, \ - SHARD_MSG_DICT_SET_FAILED, "Failed to set dict value:"\ - " key:%s for %s.", GF_XATTR_SHARD_FILE_SIZE, \ - uuid_utoa (gfid)); \ - goto label; \ - } \ -} while (0) +#define SHARD_SET_ROOT_FS_ID(frame, local) \ + do { \ + if (!local->is_set_fsid) { \ + local->uid = frame->root->uid; \ + local->gid = frame->root->gid; \ + frame->root->uid = 0; \ + frame->root->gid = 0; \ + local->is_set_fsid = _gf_true; \ + } \ + } while (0) -#define SHARD_SET_ROOT_FS_ID(frame, local) do { \ - if (!local->is_set_fsid) { \ - local->uid = frame->root->uid; \ - local->gid = frame->root->gid; \ - frame->root->uid = 0; \ - frame->root->gid = 0; \ - local->is_set_fsid = _gf_true; \ - } \ -} while (0) +#define SHARD_UNSET_ROOT_FS_ID(frame, local) \ + do { \ + if (local->is_set_fsid) { \ + frame->root->uid = local->uid; \ + frame->root->gid = local->gid; \ + local->is_set_fsid = _gf_false; \ + } \ + } while (0) -#define SHARD_UNSET_ROOT_FS_ID(frame, local) do { \ - if (local->is_set_fsid) { \ - frame->root->uid = local->uid; \ - frame->root->gid = local->gid; \ - local->is_set_fsid = _gf_false; \ - } \ -} while (0) +#define SHARD_TIME_UPDATE(ctx_sec, ctx_nsec, new_sec, new_nsec) \ + do { \ + if (ctx_sec == new_sec) \ + ctx_nsec = new_nsec = max(new_nsec, ctx_nsec); \ + else if (ctx_sec > new_sec) { \ + new_sec = ctx_sec; \ + new_nsec = ctx_nsec; \ + } else { \ + ctx_sec = new_sec; \ + ctx_nsec = new_nsec; \ + } \ + } while (0) -#define SHARD_TIME_UPDATE(ctx_sec, ctx_nsec, new_sec, new_nsec) do { \ - if (ctx_sec == new_sec) \ - ctx_nsec = new_nsec = max (new_nsec, ctx_nsec); \ - else if (ctx_sec > new_sec) { \ - new_sec = ctx_sec; \ - new_nsec = ctx_nsec; \ - } else { \ - ctx_sec = new_sec; \ - ctx_nsec = new_nsec; \ - } \ - } while (0) +typedef enum { + SHARD_BG_DELETION_NONE = 0, + SHARD_BG_DELETION_LAUNCHING, + SHARD_BG_DELETION_IN_PROGRESS, +} shard_bg_deletion_state_t; +/* rm = "remove me" */ typedef struct shard_priv { - uint64_t block_size; - uuid_t dot_shard_gfid; - inode_t *dot_shard_inode; - gf_lock_t lock; - int inode_count; - struct list_head ilist_head; + uint64_t block_size; + uuid_t dot_shard_gfid; + uuid_t dot_shard_rm_gfid; + inode_t *dot_shard_inode; + inode_t *dot_shard_rm_inode; + gf_lock_t lock; + int inode_count; + struct list_head ilist_head; + uint32_t deletion_rate; + shard_bg_deletion_state_t bg_del_state; + gf_boolean_t first_lookup_done; + uint64_t lru_limit; } shard_priv_t; typedef struct { - loc_t *loc; - short type; - char *domain; -} shard_lock_t; + loc_t loc; + char *domain; + struct gf_flock flock; + gf_boolean_t acquired_lock; +} shard_inodelk_t; -typedef int32_t (*shard_post_fop_handler_t) (call_frame_t *frame, - xlator_t *this); -typedef int32_t (*shard_post_resolve_fop_handler_t) (call_frame_t *frame, - xlator_t *this); -typedef int32_t (*shard_post_lookup_shards_fop_handler_t) (call_frame_t *frame, - xlator_t *this); +typedef struct { + loc_t loc; + char *domain; + char *basename; + entrylk_cmd cmd; + entrylk_type type; + gf_boolean_t acquired_lock; +} shard_entrylk_t; + +typedef int32_t (*shard_post_fop_handler_t)(call_frame_t *frame, + xlator_t *this); +typedef int32_t (*shard_post_resolve_fop_handler_t)(call_frame_t *frame, + xlator_t *this); +typedef int32_t (*shard_post_lookup_shards_fop_handler_t)(call_frame_t *frame, + xlator_t *this); -typedef int32_t (*shard_post_mknod_fop_handler_t) (call_frame_t *frame, - xlator_t *this); +typedef int32_t (*shard_post_mknod_fop_handler_t)(call_frame_t *frame, + xlator_t *this); + +typedef int32_t (*shard_post_update_size_fop_handler_t)(call_frame_t *frame, + xlator_t *this); -typedef int32_t (*shard_post_update_size_fop_handler_t) (call_frame_t *frame, - xlator_t *this); typedef struct shard_local { - int op_ret; - int op_errno; - int first_block; - int last_block; - int num_blocks; - int call_count; - int eexist_count; - int create_count; - int xflag; - int count; - uint32_t flags; - uint32_t uid; - uint32_t gid; - uint64_t block_size; - uint64_t dst_block_size; - off_t offset; - size_t total_size; - size_t written_size; - size_t hole_size; - size_t req_size; - size_t readdir_size; - int64_t delta_size; - int delta_blocks; - loc_t loc; - loc_t dot_shard_loc; - loc_t loc2; - loc_t tmp_loc; - fd_t *fd; - dict_t *xattr_req; - dict_t *xattr_rsp; - inode_t **inode_list; - glusterfs_fop_t fop; - struct iatt prebuf; - struct iatt postbuf; - struct iatt preoldparent; - struct iatt postoldparent; - struct iatt prenewparent; - struct iatt postnewparent; - struct iovec *vector; - struct iobref *iobref; - struct iobuf *iobuf; - gf_dirent_t entries_head; - gf_boolean_t is_set_fsid; - gf_boolean_t list_inited; - shard_post_fop_handler_t handler; - shard_post_lookup_shards_fop_handler_t pls_fop_handler; - shard_post_resolve_fop_handler_t post_res_handler; - shard_post_mknod_fop_handler_t post_mknod_handler; - shard_post_update_size_fop_handler_t post_update_size_handler; - struct { - int lock_count; - fop_inodelk_cbk_t inodelk_cbk; - shard_lock_t *shard_lock; - } lock; + int op_ret; + int op_errno; + uint64_t first_block; + uint64_t last_block; + uint64_t num_blocks; + int call_count; + int eexist_count; + int create_count; + int xflag; + int count; + uint32_t flags; + uint32_t uid; + uint32_t gid; + uint64_t block_size; + uint64_t dst_block_size; + int32_t datasync; + off_t offset; + size_t total_size; + size_t written_size; + size_t hole_size; + size_t req_size; + size_t readdir_size; + int64_t delta_size; + gf_atomic_t delta_blocks; + loc_t loc; + loc_t dot_shard_loc; + loc_t dot_shard_rm_loc; + loc_t loc2; + loc_t tmp_loc; + fd_t *fd; + dict_t *xattr_req; + dict_t *xattr_rsp; + inode_t **inode_list; + glusterfs_fop_t fop; + struct iatt prebuf; + struct iatt postbuf; + struct iatt preoldparent; + struct iatt postoldparent; + struct iatt prenewparent; + struct iatt postnewparent; + struct iovec *vector; + struct iobref *iobref; + struct iobuf *iobuf; + gf_dirent_t entries_head; + gf_boolean_t is_set_fsid; + gf_boolean_t list_inited; + shard_post_fop_handler_t handler; + shard_post_lookup_shards_fop_handler_t pls_fop_handler; + shard_post_resolve_fop_handler_t post_res_handler; + shard_post_mknod_fop_handler_t post_mknod_handler; + shard_post_update_size_fop_handler_t post_update_size_handler; + shard_inodelk_t int_inodelk; + shard_entrylk_t int_entrylk; + inode_t *resolver_base_inode; + gf_boolean_t first_lookup_done; + syncbarrier_t barrier; + gf_boolean_t lookup_shards_barriered; + gf_boolean_t unlink_shards_barriered; + gf_boolean_t resolve_not; + loc_t newloc; + call_frame_t *main_frame; + call_frame_t *inodelk_frame; + call_frame_t *entrylk_frame; + uint32_t deletion_rate; + gf_boolean_t cleanup_required; + uuid_t base_gfid; + char *name; } shard_local_t; typedef struct shard_inode_ctx { - uint64_t block_size; /* The block size with which this inode is - sharded */ - struct iatt stat; - gf_boolean_t refresh; - /* The following members of inode ctx will be applicable only to the - * individual shards' ctx and never the base file ctx. - */ - struct list_head ilist; - uuid_t base_gfid; - int block_num; + uint64_t block_size; /* The block size with which this inode is + sharded */ + struct iatt stat; + gf_boolean_t refresh; + /* The following members of inode ctx will be applicable only to the + * individual shards' ctx and never the base file ctx. + */ + struct list_head ilist; + uuid_t base_gfid; + int block_num; + gf_boolean_t refreshed; + struct list_head to_fsync_list; + int fsync_needed; + inode_t *inode; + int fsync_count; + inode_t *base_inode; } shard_inode_ctx_t; +typedef enum { + SHARD_INTERNAL_DIR_DOT_SHARD = 1, + SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME, +} shard_internal_dir_type_t; + #endif /* __SHARD_H__ */ diff --git a/xlators/features/snapview-client/src/Makefile.am b/xlators/features/snapview-client/src/Makefile.am index 3b08e685092..fa08656c537 100644 --- a/xlators/features/snapview-client/src/Makefile.am +++ b/xlators/features/snapview-client/src/Makefile.am @@ -1,14 +1,15 @@ xlator_LTLIBRARIES = snapview-client.la xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features -snapview_client_la_LDFLAGS = $(GF_XLATOR_DEFAULT_LDFLAGS) +snapview_client_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) snapview_client_la_SOURCES = snapview-client.c snapview_client_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la -noinst_HEADERS = snapview-client.h snapview-client-mem-types.h +noinst_HEADERS = snapview-client.h snapview-client-mem-types.h snapview-client-messages.h -AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ + -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src AM_CFLAGS = -Wall $(GF_CFLAGS) diff --git a/xlators/features/snapview-client/src/snapview-client-mem-types.h b/xlators/features/snapview-client/src/snapview-client-mem-types.h index 1a0158d950e..3c3ab555a55 100644 --- a/xlators/features/snapview-client/src/snapview-client-mem-types.h +++ b/xlators/features/snapview-client/src/snapview-client-mem-types.h @@ -11,14 +11,14 @@ #ifndef _SVC_MEM_TYPES_H #define _SVC_MEM_TYPES_H -#include "mem-types.h" +#include <glusterfs/mem-types.h> enum svc_mem_types { - gf_svc_mt_svc_private_t = gf_common_mt_end + 1, - gf_svc_mt_svc_local_t, - gf_svc_mt_svc_inode_t, - gf_svc_mt_svc_fd_t, - gf_svc_mt_end + gf_svc_mt_svc_private_t = gf_common_mt_end + 1, + gf_svc_mt_svc_local_t, + gf_svc_mt_svc_inode_t, + gf_svc_mt_svc_fd_t, + gf_svc_mt_end }; #endif diff --git a/xlators/features/snapview-client/src/snapview-client-messages.h b/xlators/features/snapview-client/src/snapview-client-messages.h new file mode 100644 index 00000000000..c02fb154930 --- /dev/null +++ b/xlators/features/snapview-client/src/snapview-client-messages.h @@ -0,0 +1,71 @@ +/* + Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. + */ + +#ifndef _SNAPVIEW_CLIENT_MESSAGES_H_ +#define _SNAPVIEW_CLIENT_MESSAGES_H_ + +#include <glusterfs/glfs-message-id.h> + +/* To add new message IDs, append new identifiers at the end of the list. + * + * Never remove a message ID. If it's not used anymore, you can rename it or + * leave it as it is, but not delete it. This is to prevent reutilization of + * IDs by other messages. + * + * The component name must match one of the entries defined in + * glfs-message-id.h. + */ + +GLFS_MSGID(SNAPVIEW_CLIENT, SVC_MSG_NO_MEMORY, SVC_MSG_MEM_ACNT_FAILED, + SVC_MSG_SET_INODE_CONTEXT_FAILED, SVC_MSG_GET_INODE_CONTEXT_FAILED, + SVC_MSG_DELETE_INODE_CONTEXT_FAILED, SVC_MSG_SET_FD_CONTEXT_FAILED, + SVC_MSG_GET_FD_CONTEXT_FAILED, SVC_MSG_DICT_SET_FAILED, + SVC_MSG_SUBVOLUME_NULL, SVC_MSG_NO_CHILD_FOR_XLATOR, + SVC_MSG_XLATOR_CHILDREN_WRONG, SVC_MSG_NORMAL_GRAPH_LOOKUP_FAIL, + SVC_MSG_SNAPVIEW_GRAPH_LOOKUP_FAIL, SVC_MSG_OPENDIR_SPECIAL_DIR, + SVC_MSG_RENAME_SNAPSHOT_ENTRY, SVC_MSG_LINK_SNAPSHOT_ENTRY, + SVC_MSG_COPY_ENTRY_POINT_FAILED, SVC_MSG_ENTRY_POINT_SPECIAL_DIR, + SVC_MSG_STR_LEN, SVC_MSG_INVALID_ENTRY_POINT, SVC_MSG_NULL_PRIV, + SVC_MSG_PRIV_DESTROY_FAILED, SVC_MSG_ALLOC_FD_FAILED, + SVC_MSG_ALLOC_INODE_FAILED, SVC_MSG_NULL_SPECIAL_DIR, + SVC_MSG_MEM_POOL_GET_FAILED); + +#define SVC_MSG_ALLOC_FD_FAILED_STR "failed to allocate new fd context" +#define SVC_MSG_SET_FD_CONTEXT_FAILED_STR "failed to set fd context" +#define SVC_MSG_STR_LEN_STR \ + "destination buffer size is less than the length of entry point name" +#define SVC_MSG_NORMAL_GRAPH_LOOKUP_FAIL_STR "lookup failed on normal graph" +#define SVC_MSG_SNAPVIEW_GRAPH_LOOKUP_FAIL_STR "lookup failed on snapview graph" +#define SVC_MSG_SET_INODE_CONTEXT_FAILED_STR "failed to set inode context" +#define SVC_MSG_NO_MEMORY_STR "failed to allocate memory" +#define SVC_MSG_COPY_ENTRY_POINT_FAILED_STR \ + "failed to copy the entry point string" +#define SVC_MSG_GET_FD_CONTEXT_FAILED_STR "fd context not found" +#define SVC_MSG_GET_INODE_CONTEXT_FAILED_STR "failed to get inode context" +#define SVC_MSG_ALLOC_INODE_FAILED_STR "failed to allocate new inode" +#define SVC_MSG_DICT_SET_FAILED_STR "failed to set dict" +#define SVC_MSG_RENAME_SNAPSHOT_ENTRY_STR \ + "rename happening on a entry residing in snapshot" +#define SVC_MSG_DELETE_INODE_CONTEXT_FAILED_STR "failed to delete inode context" +#define SVC_MSG_NULL_PRIV_STR "priv NULL" +#define SVC_MSG_INVALID_ENTRY_POINT_STR "not a valid entry point" +#define SVC_MSG_MEM_ACNT_FAILED_STR "Memory accouting init failed" +#define SVC_MSG_NO_CHILD_FOR_XLATOR_STR "configured without any child" +#define SVC_MSG_XLATOR_CHILDREN_WRONG_STR \ + "snap-view-client has got wrong subvolumes. It can have only 2" +#define SVC_MSG_ENTRY_POINT_SPECIAL_DIR_STR \ + "entry point directory cannot be part of special directory" +#define SVC_MSG_NULL_SPECIAL_DIR_STR "null special directory" +#define SVC_MSG_MEM_POOL_GET_FAILED_STR \ + "could not get mem pool for frame->local" +#define SVC_MSG_PRIV_DESTROY_FAILED_STR "failed to destroy private" +#define SVC_MSG_LINK_SNAPSHOT_ENTRY_STR \ + "link happening on a entry residin gin snapshot" +#endif /* !_SNAPVIEW_CLIENT_MESSAGES_H_ */ diff --git a/xlators/features/snapview-client/src/snapview-client.c b/xlators/features/snapview-client/src/snapview-client.c index 4a2bdfbd301..486c5179d5b 100644 --- a/xlators/features/snapview-client/src/snapview-client.c +++ b/xlators/features/snapview-client/src/snapview-client.c @@ -1,555 +1,593 @@ - /* - Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. +/* + Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ #include "snapview-client.h" -#include "inode.h" -#include "byte-order.h" - +#include <glusterfs/inode.h> +#include <glusterfs/byte-order.h> -void -svc_local_free (svc_local_t *local) +static void +svc_local_free(svc_local_t *local) { - if (local) { - loc_wipe (&local->loc); - if (local->fd) - fd_unref (local->fd); - if (local->xdata) - dict_unref (local->xdata); - mem_put (local); - } + if (local) { + loc_wipe(&local->loc); + if (local->fd) + fd_unref(local->fd); + if (local->xdata) + dict_unref(local->xdata); + mem_put(local); + } } -xlator_t * -svc_get_subvolume (xlator_t *this, int inode_type) +static xlator_t * +svc_get_subvolume(xlator_t *this, int inode_type) { - xlator_t *subvolume = NULL; + xlator_t *subvolume = NULL; - GF_VALIDATE_OR_GOTO ("snapview-client", this, out); + GF_VALIDATE_OR_GOTO("snapview-client", this, out); - if (inode_type == VIRTUAL_INODE) - subvolume = SECOND_CHILD (this); - else - subvolume = FIRST_CHILD (this); + if (inode_type == VIRTUAL_INODE) + subvolume = SECOND_CHILD(this); + else + subvolume = FIRST_CHILD(this); out: - return subvolume; + return subvolume; } -int32_t -__svc_inode_ctx_set (xlator_t *this, inode_t *inode, int inode_type) +static int32_t +__svc_inode_ctx_set(xlator_t *this, inode_t *inode, int inode_type) { - uint64_t value = 0; - int32_t ret = -1; + uint64_t value = 0; + int32_t ret = -1; - GF_VALIDATE_OR_GOTO ("snapview-client", this, out); - GF_VALIDATE_OR_GOTO (this->name, inode, out); + GF_VALIDATE_OR_GOTO("snapview-client", this, out); + GF_VALIDATE_OR_GOTO(this->name, inode, out); - value = inode_type; + value = inode_type; - ret = __inode_ctx_set (inode, this, &value); + ret = __inode_ctx_set(inode, this, &value); out: - return ret; + return ret; } -int -__svc_inode_ctx_get (xlator_t *this, inode_t *inode, int *inode_type) +static int +__svc_inode_ctx_get(xlator_t *this, inode_t *inode, int *inode_type) { - uint64_t value = 0; - int ret = -1; + uint64_t value = 0; + int ret = -1; - GF_VALIDATE_OR_GOTO ("snapview-client", this, out); - GF_VALIDATE_OR_GOTO (this->name, inode, out); + GF_VALIDATE_OR_GOTO("snapview-client", this, out); + GF_VALIDATE_OR_GOTO(this->name, inode, out); - ret = __inode_ctx_get (inode, this, &value); - if (ret < 0) - goto out; + ret = __inode_ctx_get(inode, this, &value); + if (ret < 0) + goto out; - *inode_type = (int)(value); + *inode_type = (int)(value); out: - return ret; + return ret; } -int -svc_inode_ctx_get (xlator_t *this, inode_t *inode, int *inode_type) +static int +svc_inode_ctx_get(xlator_t *this, inode_t *inode, int *inode_type) { - int ret = -1; + int ret = -1; - GF_VALIDATE_OR_GOTO ("snapview-client", this, out); - GF_VALIDATE_OR_GOTO (this->name, inode, out); + GF_VALIDATE_OR_GOTO("snapview-client", this, out); + GF_VALIDATE_OR_GOTO(this->name, inode, out); - LOCK (&inode->lock); - { - ret = __svc_inode_ctx_get (this, inode, inode_type); - } - UNLOCK (&inode->lock); + LOCK(&inode->lock); + { + ret = __svc_inode_ctx_get(this, inode, inode_type); + } + UNLOCK(&inode->lock); out: - return ret; + return ret; } -int32_t -svc_inode_ctx_set (xlator_t *this, inode_t *inode, int inode_type) +static int32_t +svc_inode_ctx_set(xlator_t *this, inode_t *inode, int inode_type) { - int32_t ret = -1; + int32_t ret = -1; - GF_VALIDATE_OR_GOTO ("snapview-client", this, out); - GF_VALIDATE_OR_GOTO (this->name, inode, out); + GF_VALIDATE_OR_GOTO("snapview-client", this, out); + GF_VALIDATE_OR_GOTO(this->name, inode, out); - LOCK (&inode->lock); - { - ret = __svc_inode_ctx_set (this, inode, inode_type); - } - UNLOCK (&inode->lock); + LOCK(&inode->lock); + { + ret = __svc_inode_ctx_set(this, inode, inode_type); + } + UNLOCK(&inode->lock); out: - return ret; + return ret; } -svc_fd_t * -svc_fd_new (void) +static svc_fd_t * +svc_fd_new(void) { - svc_fd_t *svc_fd = NULL; + svc_fd_t *svc_fd = NULL; - svc_fd = GF_CALLOC (1, sizeof (*svc_fd), gf_svc_mt_svc_fd_t); + svc_fd = GF_CALLOC(1, sizeof(*svc_fd), gf_svc_mt_svc_fd_t); - return svc_fd; + return svc_fd; } -svc_fd_t * -__svc_fd_ctx_get (xlator_t *this, fd_t *fd) +static svc_fd_t * +__svc_fd_ctx_get(xlator_t *this, fd_t *fd) { - svc_fd_t *svc_fd = NULL; - uint64_t value = 0; - int ret = -1; + svc_fd_t *svc_fd = NULL; + uint64_t value = 0; + int ret = -1; - GF_VALIDATE_OR_GOTO ("snapview-client", this, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); + GF_VALIDATE_OR_GOTO("snapview-client", this, out); + GF_VALIDATE_OR_GOTO(this->name, fd, out); - ret = __fd_ctx_get (fd, this, &value); - if (ret) - return NULL; + ret = __fd_ctx_get(fd, this, &value); + if (ret) + return NULL; - svc_fd = (svc_fd_t *) ((long) value); + svc_fd = (svc_fd_t *)((long)value); out: - return svc_fd; + return svc_fd; } -svc_fd_t * -svc_fd_ctx_get (xlator_t *this, fd_t *fd) +static svc_fd_t * +svc_fd_ctx_get(xlator_t *this, fd_t *fd) { - svc_fd_t *svc_fd = NULL; + svc_fd_t *svc_fd = NULL; - GF_VALIDATE_OR_GOTO ("snapview-client", this, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); + GF_VALIDATE_OR_GOTO("snapview-client", this, out); + GF_VALIDATE_OR_GOTO(this->name, fd, out); - LOCK (&fd->lock); - { - svc_fd = __svc_fd_ctx_get (this, fd); - } - UNLOCK (&fd->lock); + LOCK(&fd->lock); + { + svc_fd = __svc_fd_ctx_get(this, fd); + } + UNLOCK(&fd->lock); out: - return svc_fd; + return svc_fd; } -int -__svc_fd_ctx_set (xlator_t *this, fd_t *fd, svc_fd_t *svc_fd) +static int +__svc_fd_ctx_set(xlator_t *this, fd_t *fd, svc_fd_t *svc_fd) { - uint64_t value = 0; - int ret = -1; + uint64_t value = 0; + int ret = -1; - GF_VALIDATE_OR_GOTO ("snapview-client", this, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - GF_VALIDATE_OR_GOTO (this->name, svc_fd, out); + GF_VALIDATE_OR_GOTO("snapview-client", this, out); + GF_VALIDATE_OR_GOTO(this->name, fd, out); + GF_VALIDATE_OR_GOTO(this->name, svc_fd, out); - value = (uint64_t)(long) svc_fd; + value = (uint64_t)(long)svc_fd; - ret = __fd_ctx_set (fd, this, value); + ret = __fd_ctx_set(fd, this, value); out: - return ret; + return ret; } -int32_t -svc_fd_ctx_set (xlator_t *this, fd_t *fd, svc_fd_t *svc_fd) +static svc_fd_t * +__svc_fd_ctx_get_or_new(xlator_t *this, fd_t *fd) { - int32_t ret = -1; + svc_fd_t *svc_fd = NULL; + int ret = -1; + inode_t *inode = NULL; - GF_VALIDATE_OR_GOTO ("snapview-client", this, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - GF_VALIDATE_OR_GOTO (this->name, svc_fd, out); + GF_VALIDATE_OR_GOTO("snapview-client", this, out); + GF_VALIDATE_OR_GOTO(this->name, fd, out); - LOCK (&fd->lock); - { - ret = __svc_fd_ctx_set (this, fd, svc_fd); - } - UNLOCK (&fd->lock); + inode = fd->inode; + svc_fd = __svc_fd_ctx_get(this, fd); + if (svc_fd) { + ret = 0; + goto out; + } + + svc_fd = svc_fd_new(); + if (!svc_fd) { + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, SVC_MSG_ALLOC_FD_FAILED, + "gfid=%s", uuid_utoa(inode->gfid), NULL); + goto out; + } + + ret = __svc_fd_ctx_set(this, fd, svc_fd); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_FD_CONTEXT_FAILED, + "gfid=%s", uuid_utoa(inode->gfid), NULL); + ret = -1; + } out: - return ret; + if (ret) { + GF_FREE(svc_fd); + svc_fd = NULL; + } + + return svc_fd; } -svc_fd_t * -__svc_fd_ctx_get_or_new (xlator_t *this, fd_t *fd) +static svc_fd_t * +svc_fd_ctx_get_or_new(xlator_t *this, fd_t *fd) { - svc_fd_t *svc_fd = NULL; - int ret = -1; - inode_t *inode = NULL; + svc_fd_t *svc_fd = NULL; - GF_VALIDATE_OR_GOTO ("snapview-client", this, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); + GF_VALIDATE_OR_GOTO("snapview-client", this, out); + GF_VALIDATE_OR_GOTO(this->name, fd, out); - inode = fd->inode; - svc_fd = __svc_fd_ctx_get (this, fd); - if (svc_fd) { - ret = 0; - goto out; - } - - svc_fd = svc_fd_new (); - if (!svc_fd) { - gf_log (this->name, GF_LOG_ERROR, "failed to allocate new fd " - "context for gfid %s", uuid_utoa (inode->gfid)); - goto out; - } - - ret = __svc_fd_ctx_set (this, fd, svc_fd); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "failed to set fd context " - "for gfid %s", uuid_utoa (inode->gfid)); - ret = -1; - } + LOCK(&fd->lock); + { + svc_fd = __svc_fd_ctx_get_or_new(this, fd); + } + UNLOCK(&fd->lock); out: - if (ret) { - GF_FREE (svc_fd); - svc_fd = NULL; - } - - return svc_fd; + return svc_fd; } -svc_fd_t * -svc_fd_ctx_get_or_new (xlator_t *this, fd_t *fd) +/** + * @this: xlator + * @entry_point: pointer to the buffer provided by consumer + * + * This function is mainly for copying the entry point name + * (stored as string in priv->path) to a buffer point to by + * @entry_point within the lock. It is for the consumer to + * allocate the memory for the buffer. + * + * This function is called by all the functions (or fops) + * who need to use priv->path for avoiding the race. + * For example, either in lookup or in any other fop, + * while priv->path is being accessed, a reconfigure can + * happen to change priv->path. This ensures that, a lock + * is taken before accessing priv->path. + **/ +int +gf_svc_get_entry_point(xlator_t *this, char *entry_point, size_t dest_size) { - svc_fd_t *svc_fd = NULL; + int ret = -1; + svc_private_t *priv = NULL; - GF_VALIDATE_OR_GOTO ("snapview-client", this, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); + GF_VALIDATE_OR_GOTO("snapview-client", this, out); + GF_VALIDATE_OR_GOTO(this->name, entry_point, out); - LOCK (&fd->lock); - { - svc_fd = __svc_fd_ctx_get_or_new (this, fd); + priv = this->private; + + LOCK(&priv->lock); + { + if (dest_size <= strlen(priv->path)) { + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_STR_LEN, + "dest-size=%zu", dest_size, "priv-path-len=%zu", + strlen(priv->path), "path=%s", priv->path, NULL); + } else { + snprintf(entry_point, dest_size, "%s", priv->path); + ret = 0; } - UNLOCK (&fd->lock); + } + UNLOCK(&priv->lock); out: - return svc_fd; + return ret; } - static int32_t -svc_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *xdata, struct iatt *postparent) +gf_svc_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, struct iatt *postparent) { - svc_local_t *local = NULL; - inode_t *parent = NULL; - xlator_t *subvolume = NULL; - gf_boolean_t do_unwind = _gf_true; - int inode_type = -1; - int parent_type = -1; - int ret = -1; - - local = frame->local; - subvolume = local->subvolume; - if (!subvolume) { - gf_log_callingfn (this->name, GF_LOG_ERROR, "path: %s, " - "gfid: %s ", local->loc.path, - inode?uuid_utoa (inode->gfid):""); - GF_ASSERT (0); - } - - /* There is a possibility that, the client process just came online - and does not have the inode on which the lookup came. In that case, - the fresh inode created from fuse for the lookup fop, wont have - the inode context set without which svc cannot decide where to - STACK_WIND to. So by default it decides to send the fop to the - regular subvolume (i.e first child of the xlator). If lookup fails - on the regular volume, then there is a possibility that the lookup - is happening on a virtual inode (i.e history data residing in snaps). - So if lookup fails with ENOENT and the inode context is not there, - then send the lookup to the 2nd child of svc. - - If there are any changes in volfile/client-restarted then inode-ctx - is lost. In this case if nameless lookup fails with ESTALE, - then send the lookup to the 2nd child of svc. - */ - if (op_ret) { - if (subvolume == FIRST_CHILD (this)) { - gf_log (this->name, - (op_errno == ENOENT || op_errno == ESTALE) - ? GF_LOG_DEBUG:GF_LOG_ERROR, - "Lookup failed on normal graph with error %s", - strerror (op_errno)); - } else { - gf_log (this->name, - (op_errno == ENOENT || op_errno == ESTALE) - ? GF_LOG_DEBUG:GF_LOG_ERROR, - "Lookup failed on snapview graph with error %s", - strerror (op_errno)); - } - - if ((op_errno == ENOENT || op_errno == ESTALE) && - !gf_uuid_is_null (local->loc.gfid)) { - ret = svc_inode_ctx_get (this, inode, &inode_type); - if (ret < 0 && subvolume == FIRST_CHILD (this)) { - gf_log (this->name, GF_LOG_DEBUG, - "Lookup on normal graph failed. " - "Sending lookup to snapview-server"); - - subvolume = SECOND_CHILD (this); - local->subvolume = subvolume; - STACK_WIND (frame, svc_lookup_cbk, subvolume, - subvolume->fops->lookup, - &local->loc, xdata); - do_unwind = _gf_false; - } - } - - goto out; + svc_local_t *local = NULL; + xlator_t *subvolume = NULL; + gf_boolean_t do_unwind = _gf_true; + int inode_type = -1; + int ret = -1; + + local = frame->local; + subvolume = local->subvolume; + if (!subvolume) { + gf_msg_callingfn(this->name, GF_LOG_ERROR, 0, SVC_MSG_SUBVOLUME_NULL, + "path: %s gfid: %s ", local->loc.path, + inode ? uuid_utoa(inode->gfid) : ""); + GF_ASSERT(0); + } + + /* There is a possibility that, the client process just came online + and does not have the inode on which the lookup came. In that case, + the fresh inode created from fuse for the lookup fop, won't have + the inode context set without which svc cannot decide where to + STACK_WIND to. So by default it decides to send the fop to the + regular subvolume (i.e first child of the xlator). If lookup fails + on the regular volume, then there is a possibility that the lookup + is happening on a virtual inode (i.e history data residing in snaps). + So if lookup fails with ENOENT and the inode context is not there, + then send the lookup to the 2nd child of svc. + + If there are any changes in volfile/client-restarted then inode-ctx + is lost. In this case if nameless lookup fails with ESTALE, + then send the lookup to the 2nd child of svc. + */ + if (op_ret) { + if (subvolume == FIRST_CHILD(this)) { + gf_smsg(this->name, + (op_errno == ENOENT || op_errno == ESTALE) ? GF_LOG_DEBUG + : GF_LOG_ERROR, + op_errno, SVC_MSG_NORMAL_GRAPH_LOOKUP_FAIL, "error=%s", + strerror(op_errno), NULL); + } else { + gf_smsg(this->name, + (op_errno == ENOENT || op_errno == ESTALE) ? GF_LOG_DEBUG + : GF_LOG_ERROR, + op_errno, SVC_MSG_SNAPVIEW_GRAPH_LOOKUP_FAIL, "error=%s", + strerror(op_errno), NULL); + goto out; + } + + if ((op_errno == ENOENT || op_errno == ESTALE) && + !gf_uuid_is_null(local->loc.gfid)) { + if (inode != NULL) + ret = svc_inode_ctx_get(this, inode, &inode_type); + + if (ret < 0 || inode == NULL) { + gf_msg_debug(this->name, 0, + "Lookup on normal graph failed. " + " Sending lookup to snapview-server"); + subvolume = SECOND_CHILD(this); + local->subvolume = subvolume; + STACK_WIND(frame, gf_svc_lookup_cbk, subvolume, + subvolume->fops->lookup, &local->loc, xdata); + do_unwind = _gf_false; + } } - if (local->loc.parent) - parent = inode_ref (local->loc.parent); - else { - parent = inode_parent (inode, NULL, NULL); - if (!parent && !gf_uuid_is_null (local->loc.pargfid)) { - parent = inode_find (inode->table, - local->loc.pargfid); - } - } + goto out; + } - if (!__is_root_gfid (buf->ia_gfid) && parent) { - ret = svc_inode_ctx_get (this, parent, &parent_type); - if (ret < 0) { - op_ret = -1; - op_errno = EINVAL; - gf_log (this->name, GF_LOG_WARNING, - "Error fetching parent context"); - goto out; - } - } - - if (subvolume == FIRST_CHILD (this)) - inode_type = NORMAL_INODE; - else - inode_type = VIRTUAL_INODE; + if (subvolume == FIRST_CHILD(this)) + inode_type = NORMAL_INODE; + else + inode_type = VIRTUAL_INODE; - ret = svc_inode_ctx_set (this, inode, inode_type); - if (ret) - gf_log (this->name, GF_LOG_ERROR, "failed to set inode type" - "into the context"); + ret = svc_inode_ctx_set(this, inode, inode_type); + if (ret) + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED, + "gfid=%s", uuid_utoa(inode->gfid), NULL); out: - if (do_unwind) { - SVC_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, buf, - xdata, postparent); - } - - if (parent) - inode_unref (parent); + if (do_unwind) { + SVC_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, buf, xdata, + postparent); + } - return 0; + return 0; } static int32_t -svc_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, - dict_t *xdata) +gf_svc_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { - int32_t ret = -1; - svc_local_t *local = NULL; - xlator_t *subvolume = NULL; - int op_ret = -1; - int op_errno = EINVAL; - inode_t *parent = NULL; - svc_private_t *priv = NULL; - dict_t *new_xdata = NULL; - int inode_type = -1; - int parent_type = -1; - gf_boolean_t wind = _gf_false; - - GF_VALIDATE_OR_GOTO ("svc", this, out); - GF_VALIDATE_OR_GOTO (this->name, this->private, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, loc->inode, out); - - priv = this->private; - - ret = svc_inode_ctx_get (this, loc->inode, &inode_type); - if (!__is_root_gfid (loc->gfid)) { - if (loc->parent) { - parent = inode_ref (loc->parent); - ret = svc_inode_ctx_get (this, loc->parent, - &parent_type); - } else { - parent = inode_parent (loc->inode, loc->pargfid, NULL); - if (parent) - ret = svc_inode_ctx_get (this, parent, - &parent_type); - } - } - - local = mem_get0 (this->local_pool); - if (!local) { - gf_log (this->name, GF_LOG_ERROR, "failed to allocate local"); - op_ret = -1; - op_errno = ENOMEM; - goto out; - } - - frame->local = local; - loc_copy (&local->loc, loc); - - if (__is_root_gfid (loc->inode->gfid)) { - subvolume = FIRST_CHILD (this); - GF_ASSERT (subvolume); - local->subvolume = subvolume; - wind = _gf_true; - goto out; - } - - /* nfs sends nameless lookups directly using the gfid. In that case - loc->name will be NULL. So check if loc->name is NULL. If so, then - try to get the subvolume using inode context. But if the inode has - not been looked up yet, then send the lookup call to the first - subvolume. - */ - - if (!loc->name) { - if (gf_uuid_is_null (loc->inode->gfid)) { - subvolume = FIRST_CHILD (this); - local->subvolume = subvolume; - wind = _gf_true; - goto out; - } else { - if (inode_type >= 0) - subvolume = svc_get_subvolume (this, - inode_type); - else - subvolume = FIRST_CHILD (this); - local->subvolume = subvolume; - wind = _gf_true; - goto out; - } - } - - if (strcmp (loc->name, priv->path)) { - if (parent_type == NORMAL_INODE) { - subvolume = FIRST_CHILD (this); - local->subvolume = subvolume; - } else { - subvolume = SECOND_CHILD (this); - local->subvolume = subvolume; - } + int32_t ret = -1; + svc_local_t *local = NULL; + xlator_t *subvolume = NULL; + int op_ret = -1; + int op_errno = EINVAL; + inode_t *parent = NULL; + dict_t *new_xdata = NULL; + int inode_type = -1; + int parent_type = -1; + gf_boolean_t wind = _gf_false; + char entry_point[NAME_MAX + 1] = { + 0, + }; + + GF_VALIDATE_OR_GOTO("svc", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, loc, out); + GF_VALIDATE_OR_GOTO(this->name, loc->inode, out); + + ret = svc_inode_ctx_get(this, loc->inode, &inode_type); + if (!__is_root_gfid(loc->gfid)) { + if (loc->parent) { + parent = inode_ref(loc->parent); + ret = svc_inode_ctx_get(this, loc->parent, &parent_type); } else { - subvolume = SECOND_CHILD (this); - local->subvolume = subvolume; - if (parent_type == NORMAL_INODE) { - /* Indication of whether the lookup is happening on the - entry point or not, to the snapview-server. - */ - SVC_ENTRY_POINT_SET (this, xdata, op_ret, op_errno, - new_xdata, priv, ret, out); - } + parent = inode_parent(loc->inode, loc->pargfid, NULL); + if (parent) + ret = svc_inode_ctx_get(this, parent, &parent_type); + } + } + + local = mem_get0(this->local_pool); + if (!local) { + op_ret = -1; + op_errno = ENOMEM; + gf_smsg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_NO_MEMORY, NULL); + goto out; + } + + frame->local = local; + loc_copy(&local->loc, loc); + + if (__is_root_gfid(loc->inode->gfid)) { + subvolume = FIRST_CHILD(this); + GF_ASSERT(subvolume); + local->subvolume = subvolume; + wind = _gf_true; + goto out; + } + + /* nfs sends nameless lookups directly using the gfid. In that case + loc->name will be NULL. So check if loc->name is NULL. If so, then + try to get the subvolume using inode context. But if the inode has + not been looked up yet, then send the lookup call to the first + subvolume. + */ + + if (!loc->name) { + if (gf_uuid_is_null(loc->inode->gfid)) { + subvolume = FIRST_CHILD(this); + local->subvolume = subvolume; + wind = _gf_true; + goto out; + } else { + if (inode_type >= 0) + subvolume = svc_get_subvolume(this, inode_type); + else + subvolume = FIRST_CHILD(this); + local->subvolume = subvolume; + wind = _gf_true; + goto out; + } + } + + if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) { + gf_smsg(this->name, GF_LOG_WARNING, op_errno, + SVC_MSG_COPY_ENTRY_POINT_FAILED, NULL); + goto out; + } + + if (strcmp(loc->name, entry_point)) { + if (parent_type == VIRTUAL_INODE) { + subvolume = SECOND_CHILD(this); + } else { + /* + * Either parent type is normal graph, or the parent + * type is uncertain. + */ + subvolume = FIRST_CHILD(this); } + local->subvolume = subvolume; + } else { + subvolume = SECOND_CHILD(this); + local->subvolume = subvolume; + if (parent_type == NORMAL_INODE) { + /* Indication of whether the lookup is happening on the + entry point or not, to the snapview-server. + */ + SVC_ENTRY_POINT_SET(this, xdata, op_ret, op_errno, new_xdata, ret, + out); + } + } - wind = _gf_true; + wind = _gf_true; out: - if (wind) - STACK_WIND (frame, svc_lookup_cbk, - subvolume, subvolume->fops->lookup, loc, xdata); - else - SVC_STACK_UNWIND (lookup, frame, op_ret, op_errno, NULL, - NULL, NULL, NULL); - if (new_xdata) - dict_unref (new_xdata); - - if (parent) - inode_unref (parent); - - return 0; + if (wind) + STACK_WIND(frame, gf_svc_lookup_cbk, subvolume, subvolume->fops->lookup, + loc, xdata); + else + SVC_STACK_UNWIND(lookup, frame, op_ret, op_errno, NULL, NULL, NULL, + NULL); + if (new_xdata) + dict_unref(new_xdata); + + if (parent) + inode_unref(parent); + + return 0; } static int32_t -svc_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, - dict_t *xdata) +gf_svc_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { - xlator_t *subvolume = NULL; - int32_t ret = -1; - int inode_type = -1; - int32_t op_ret = -1; - int32_t op_errno = EINVAL; - gf_boolean_t wind = _gf_false; - svc_private_t *priv = NULL; - const char *path = NULL; - int path_len = -1; - int snap_len = -1; - loc_t root_loc = {0,}; - loc_t *temp_loc = NULL; - - GF_VALIDATE_OR_GOTO ("svc", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, loc->inode, out); - - priv = this->private; - SVC_GET_SUBVOL_FROM_CTX (this, op_ret, op_errno, inode_type, ret, - loc->inode, subvolume, out); - path_len = strlen (loc->path); - snap_len = strlen (priv->path); - temp_loc = loc; - - if (path_len >= snap_len && inode_type == VIRTUAL_INODE) { - path = &loc->path[path_len - snap_len]; - if (!strcmp (path, priv->path)) { - /* - * statfs call for virtual snap directory. - * Sent the fops to parent volume by removing - * virtual directory from path - */ - subvolume = FIRST_CHILD (this); - root_loc.path = gf_strdup("/"); - gf_uuid_clear(root_loc.gfid); - root_loc.gfid[15] = 1; - root_loc.inode = inode_ref (loc->inode->table->root); - temp_loc = &root_loc; - } - } - - STACK_WIND_TAIL (frame, subvolume, subvolume->fops->statfs, - temp_loc, xdata); - if (temp_loc == &root_loc) - loc_wipe (temp_loc); - - wind = _gf_true; + xlator_t *subvolume = NULL; + int32_t ret = -1; + int inode_type = -1; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + gf_boolean_t wind = _gf_false; + svc_private_t *priv = NULL; + const char *path = NULL; + int path_len = -1; + int snap_len = -1; + loc_t root_loc = { + 0, + }; + loc_t *temp_loc = NULL; + char entry_point[NAME_MAX + 1] = { + 0, + }; + + GF_VALIDATE_OR_GOTO("svc", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, loc, out); + GF_VALIDATE_OR_GOTO(this->name, loc->inode, out); + + priv = this->private; + SVC_GET_SUBVOL_FROM_CTX(this, op_ret, op_errno, inode_type, ret, loc->inode, + subvolume, out); + path_len = strlen(loc->path); + snap_len = strlen(priv->path); + temp_loc = loc; + + if (path_len >= snap_len && inode_type == VIRTUAL_INODE) { + path = &loc->path[path_len - snap_len]; + if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) { + gf_smsg(this->name, GF_LOG_WARNING, op_errno, + SVC_MSG_COPY_ENTRY_POINT_FAILED, NULL); + goto out; + } + + if (!strcmp(path, entry_point)) { + /* + * statfs call for virtual snap directory. + * Sent the fops to parent volume by removing + * virtual directory from path + */ + subvolume = FIRST_CHILD(this); + root_loc.path = gf_strdup("/"); + gf_uuid_clear(root_loc.gfid); + root_loc.gfid[15] = 1; + root_loc.inode = inode_ref(loc->inode->table->root); + temp_loc = &root_loc; + } + } + + STACK_WIND_TAIL(frame, subvolume, subvolume->fops->statfs, temp_loc, xdata); + if (temp_loc == &root_loc) + loc_wipe(temp_loc); + + wind = _gf_true; out: - if (!wind) - SVC_STACK_UNWIND (statfs, frame, op_ret, op_errno, - NULL, NULL); - return 0; + if (!wind) + SVC_STACK_UNWIND(statfs, frame, op_ret, op_errno, NULL, NULL); + return 0; +} + +static int32_t +gf_svc_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + dict_t *xdata) +{ + /* TODO: FIX ME + * Consider a testcase: + * #mount -t nfs host1:/vol1 /mnt + * #ls /mnt + * #ls /mnt/.snaps (As expected this fails) + * #gluster volume set vol1 features.uss enable + * Now `ls /mnt/.snaps` should work, but fails with No such file or + * directory. This is because NFS client (gNFS) caches the list of files + * in a directory. This cache is updated if there are any changes in the + * directory attributes. So, one way to solve this problem is to change + * 'ctime' attribute when USS is enabled as below. + * + * if (op_ret == 0 && IA_ISDIR(buf->ia_type)) + * buf->ia_ctime_nsec++; + * + * But this is not the ideal solution as applications see the unexpected + * ctime change causing failures. + */ + + SVC_STACK_UNWIND(stat, frame, op_ret, op_errno, buf, xdata); + return 0; } /* should all the fops be handled like lookup is supposed to be @@ -557,121 +595,119 @@ out: be sent and in the call back update the contexts. */ static int32_t -svc_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, - dict_t *xdata) +gf_svc_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { - int32_t ret = -1; - int inode_type = -1; - xlator_t *subvolume = NULL; - int32_t op_ret = -1; - int32_t op_errno = EINVAL; - gf_boolean_t wind = _gf_false; + int32_t ret = -1; + int inode_type = -1; + xlator_t *subvolume = NULL; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + gf_boolean_t wind = _gf_false; - GF_VALIDATE_OR_GOTO ("svc", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, loc->inode, out); + GF_VALIDATE_OR_GOTO("svc", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, loc, out); + GF_VALIDATE_OR_GOTO(this->name, loc->inode, out); - SVC_GET_SUBVOL_FROM_CTX (this, op_ret, op_errno, inode_type, ret, - loc->inode, subvolume, out); + SVC_GET_SUBVOL_FROM_CTX(this, op_ret, op_errno, inode_type, ret, loc->inode, + subvolume, out); - STACK_WIND_TAIL (frame,subvolume, subvolume->fops->stat, loc, xdata); + STACK_WIND(frame, gf_svc_stat_cbk, subvolume, subvolume->fops->stat, loc, + xdata); - wind = _gf_true; + wind = _gf_true; out: - if (!wind) - SVC_STACK_UNWIND (stat, frame, op_ret, op_errno, - NULL, NULL); - return 0; + if (!wind) + SVC_STACK_UNWIND(stat, frame, op_ret, op_errno, NULL, NULL); + return 0; } static int32_t -svc_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) +gf_svc_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { - int32_t ret = -1; - int inode_type = -1; - xlator_t *subvolume = NULL; - int32_t op_ret = -1; - int32_t op_errno = EINVAL; - gf_boolean_t wind = _gf_false; + int32_t ret = -1; + int inode_type = -1; + xlator_t *subvolume = NULL; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + gf_boolean_t wind = _gf_false; - GF_VALIDATE_OR_GOTO ("svc", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - GF_VALIDATE_OR_GOTO (this->name, fd->inode, out); + GF_VALIDATE_OR_GOTO("svc", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, fd, out); + GF_VALIDATE_OR_GOTO(this->name, fd->inode, out); - SVC_GET_SUBVOL_FROM_CTX (this, op_ret, op_errno, inode_type, ret, - fd->inode, subvolume, out); + SVC_GET_SUBVOL_FROM_CTX(this, op_ret, op_errno, inode_type, ret, fd->inode, + subvolume, out); - STACK_WIND_TAIL (frame, subvolume, subvolume->fops->fstat, fd, xdata); + STACK_WIND_TAIL(frame, subvolume, subvolume->fops->fstat, fd, xdata); - wind = _gf_true; + wind = _gf_true; out: - if (!wind) - SVC_STACK_UNWIND (fstat, frame, op_ret, op_errno, NULL, NULL); + if (!wind) + SVC_STACK_UNWIND(fstat, frame, op_ret, op_errno, NULL, NULL); - return ret; + return ret; } static int32_t -svc_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) +gf_svc_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) { - svc_fd_t *svc_fd = NULL; - svc_local_t *local = NULL; - svc_private_t *priv = NULL; - gf_boolean_t special_dir = _gf_false; - char path[PATH_MAX] = {0, }; - - GF_VALIDATE_OR_GOTO ("snapview-client", this, out); - GF_VALIDATE_OR_GOTO (this->name, this->private, out); - - if (op_ret) - goto out; + svc_fd_t *svc_fd = NULL; + svc_local_t *local = NULL; + svc_private_t *priv = NULL; + gf_boolean_t special_dir = _gf_false; + char path[PATH_MAX] = { + 0, + }; + + GF_VALIDATE_OR_GOTO("snapview-client", this, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + + if (op_ret) + goto out; + + priv = this->private; + local = frame->local; + + if (local->subvolume == FIRST_CHILD(this) && priv->special_dir && + strcmp(priv->special_dir, "")) { + if (!__is_root_gfid(fd->inode->gfid)) + snprintf(path, sizeof(path), "%s/.", priv->special_dir); + else + snprintf(path, sizeof(path), "/."); - priv = this->private; - local = frame->local; - - if (local->subvolume == FIRST_CHILD (this) && priv->special_dir - && strcmp (priv->special_dir, "")) { - if (!__is_root_gfid (fd->inode->gfid)) - snprintf (path, sizeof (path), "%s/.", - priv->special_dir); - else - snprintf (path, sizeof (path), "/."); - - if (!strcmp (local->loc.path, priv->special_dir) || - !strcmp (local->loc.path, path)) { - gf_log_callingfn (this->name, GF_LOG_DEBUG, - "got opendir on special " - "directory %s (%s)", path, - uuid_utoa (fd->inode->gfid)); - special_dir = _gf_true; - } + if (!strcmp(local->loc.path, priv->special_dir) || + !strcmp(local->loc.path, path)) { + gf_msg_debug(this->name, 0, + "got opendir on special directory" + " %s (gfid: %s)", + path, uuid_utoa(fd->inode->gfid)); + special_dir = _gf_true; } + } - if (special_dir) { - svc_fd = svc_fd_ctx_get_or_new (this, fd); - if (!svc_fd) { - gf_log (this->name, GF_LOG_ERROR, - "fd context not found for %s", - uuid_utoa (fd->inode->gfid)); - goto out; - } - - svc_fd->last_offset = -1; - svc_fd->special_dir = special_dir; + if (special_dir) { + svc_fd = svc_fd_ctx_get_or_new(this, fd); + if (!svc_fd) { + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED, + "gfid=%s", uuid_utoa(fd->inode->gfid), NULL); + goto out; } + svc_fd->last_offset = -1; + svc_fd->special_dir = special_dir; + } + out: - STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, xdata); + STACK_UNWIND_STRICT(opendir, frame, op_ret, op_errno, fd, xdata); - return 0; + return 0; } - /* If the inode represents a directory which is actually present in a snapshot, then opendir on that directory should be sent to the snap-view-server which opens @@ -683,99 +719,98 @@ out: svc has to do things that open-behind is doing. */ static int32_t -svc_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, - dict_t *xdata) +gf_svc_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, + dict_t *xdata) { - int32_t ret = -1; - int inode_type = -1; - xlator_t *subvolume = NULL; - int op_ret = -1; - int op_errno = EINVAL; - gf_boolean_t wind = _gf_false; - svc_local_t *local = NULL; - - GF_VALIDATE_OR_GOTO ("svc", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, loc->inode, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - - local = mem_get0 (this->local_pool); - if (!local) { - gf_log (this->name, GF_LOG_ERROR, "failed to allocate memory " - "for local (path: %s, gfid: %s)", loc->path, - uuid_utoa (fd->inode->gfid)); - op_errno = ENOMEM; - goto out; - } - - SVC_GET_SUBVOL_FROM_CTX (this, op_ret, op_errno, inode_type, ret, - loc->inode, subvolume, out); - - loc_copy (&local->loc, loc); - local->subvolume = subvolume; - frame->local = local; - - STACK_WIND (frame, svc_opendir_cbk, subvolume, subvolume->fops->opendir, - loc, fd, xdata); - - wind = _gf_true; + int32_t ret = -1; + int inode_type = -1; + xlator_t *subvolume = NULL; + int op_ret = -1; + int op_errno = EINVAL; + gf_boolean_t wind = _gf_false; + svc_local_t *local = NULL; + + GF_VALIDATE_OR_GOTO("svc", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, loc, out); + GF_VALIDATE_OR_GOTO(this->name, loc->inode, out); + GF_VALIDATE_OR_GOTO(this->name, fd, out); + + local = mem_get0(this->local_pool); + if (!local) { + op_errno = ENOMEM; + gf_smsg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_NO_MEMORY, + "path=%s", loc->path, "gfid=%s", uuid_utoa(fd->inode->gfid), + NULL); + goto out; + } + loc_copy(&local->loc, loc); + frame->local = local; + + SVC_GET_SUBVOL_FROM_CTX(this, op_ret, op_errno, inode_type, ret, loc->inode, + subvolume, out); + local->subvolume = subvolume; + + STACK_WIND(frame, gf_svc_opendir_cbk, subvolume, subvolume->fops->opendir, + loc, fd, xdata); + + wind = _gf_true; out: - if (!wind) - SVC_STACK_UNWIND (opendir, frame, op_ret, op_errno, NULL, NULL); + if (!wind) + SVC_STACK_UNWIND(opendir, frame, op_ret, op_errno, NULL, NULL); - return 0; + return 0; } static int32_t -svc_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - struct iatt *stbuf, int32_t valid, dict_t *xdata) +gf_svc_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid, dict_t *xdata) { - int32_t ret = -1; - int inode_type = -1; - int op_ret = -1; - int op_errno = EINVAL; - gf_boolean_t wind = _gf_false; - - GF_VALIDATE_OR_GOTO ("svc", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, loc->inode, out); - - ret = svc_inode_ctx_get (this, loc->inode, &inode_type); - if (ret < 0) { - op_ret = -1; - op_errno = EINVAL; - gf_log (this->name, GF_LOG_ERROR, "failed to get the inode " - "context for %s (gfid: %s)", loc->path, - uuid_utoa (loc->inode->gfid)); - goto out; - } - - if (inode_type == NORMAL_INODE) { - STACK_WIND_TAIL (frame, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->setattr, loc, stbuf, - valid, xdata); - } else { - op_ret = -1; - op_errno = EROFS; - goto out; - } - - wind = _gf_true; + int32_t ret = -1; + int inode_type = -1; + int op_ret = -1; + int op_errno = EINVAL; + gf_boolean_t wind = _gf_false; + + GF_VALIDATE_OR_GOTO("svc", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, loc, out); + GF_VALIDATE_OR_GOTO(this->name, loc->inode, out); + + ret = svc_inode_ctx_get(this, loc->inode, &inode_type); + if (ret < 0) { + op_ret = -1; + op_errno = EINVAL; + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + SVC_MSG_GET_INODE_CONTEXT_FAILED, "path=%s", loc->path, + "gfid= %s", uuid_utoa(loc->inode->gfid), NULL); + goto out; + } + + if (inode_type == NORMAL_INODE) { + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, + xdata); + } else { + op_ret = -1; + op_errno = EROFS; + goto out; + } + + wind = _gf_true; out: - if (!wind) - SVC_STACK_UNWIND (setattr, frame, op_ret, op_errno, - NULL, NULL, NULL); - return 0; + if (!wind) + SVC_STACK_UNWIND(setattr, frame, op_ret, op_errno, NULL, NULL, NULL); + return 0; } -/* XXX: This function is currently not used. Mark it 'static' when required */ -int32_t -svc_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf, - int32_t valid, dict_t *xdata) +/* XXX: This function is currently not used. Remove "#if 0" when required */ +#if 0 +static int32_t +gf_svc_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iatt *stbuf, int32_t valid, dict_t *xdata) { int32_t ret = -1; int inode_type = -1; @@ -790,10 +825,12 @@ svc_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf, ret = svc_inode_ctx_get (this, fd->inode, &inode_type); if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, "failed to get the inode " - "context for %s", uuid_utoa (fd->inode->gfid)); op_ret = -1; op_errno = EINVAL; + gf_msg (this->name, GF_LOG_ERROR, op_errno, + SVC_MSG_GET_INODE_CONTEXT_FAILED, "failed to " + "get the inode context for %s", + uuid_utoa (fd->inode->gfid)); goto out; } @@ -815,91 +852,98 @@ out: NULL, NULL, NULL); return 0; } +#endif /* gf_svc_fsetattr() is not used */ static int32_t -svc_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name, - dict_t *xdata) +gf_svc_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) { - int32_t ret = -1; - int inode_type = -1; - xlator_t *subvolume = NULL; - int op_ret = -1; - int op_errno = EINVAL; - gf_boolean_t wind = _gf_false; - svc_private_t *priv = NULL; - char attrname[PATH_MAX] = ""; - char attrval[64] = ""; - dict_t *dict = NULL; + int32_t ret = -1; + int inode_type = -1; + xlator_t *subvolume = NULL; + int op_ret = -1; + int op_errno = EINVAL; + gf_boolean_t wind = _gf_false; + svc_private_t *priv = NULL; + char attrname[PATH_MAX] = ""; + char attrval[64] = ""; + dict_t *dict = NULL; + char entry_point[NAME_MAX + 1] = { + 0, + }; + + GF_VALIDATE_OR_GOTO("svc", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, loc, out); + GF_VALIDATE_OR_GOTO(this->name, loc->inode, out); + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, out); + + /* + * Samba sends this special key for case insensitive + * filename check. This request comes with a parent + * path and with a special key GF_XATTR_GET_REAL_FILENAME_KEY. + * e.g. "glusterfs.get_real_filename:.snaps". + * If the name variable matches this key then we have + * to send back .snaps as the real filename. + */ + if (!name) + goto stack_wind; + + sscanf(name, "%[^:]:%[^@]", attrname, attrval); + strcat(attrname, ":"); + + if (!strcmp(attrname, GF_XATTR_GET_REAL_FILENAME_KEY)) { + if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) { + gf_smsg(this->name, GF_LOG_WARNING, op_errno, + SVC_MSG_COPY_ENTRY_POINT_FAILED, NULL); + goto out; + } + + if (!strcasecmp(attrval, entry_point)) { + dict = dict_new(); + if (NULL == dict) { + op_errno = ENOMEM; + goto out; + } - GF_VALIDATE_OR_GOTO ("svc", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, loc->inode, out); - priv = this->private; - GF_VALIDATE_OR_GOTO (this->name, priv, out); - - /* - * Samba sends this special key for case insensitive - * filename check. This request comes with a parent - * path and with a special key GF_XATTR_GET_REAL_FILENAME_KEY. - * e.g. "glusterfs.get_real_filename:.snaps". - * If the name variable matches this key then we have - * to send back .snaps as the real filename. - */ - if (!name) - goto stack_wind; - - sscanf (name, "%[^:]:%[^@]", attrname, attrval); - strcat (attrname, ":"); - - if (!strcmp (attrname, GF_XATTR_GET_REAL_FILENAME_KEY)) { - if (!strcasecmp (attrval, priv->path)) { - dict = dict_new (); - if (NULL == dict) { - op_errno = ENOMEM; - goto out; - } - - ret = dict_set_dynstr_with_alloc (dict, - (char *)name, - priv->path); - - if (ret) { - op_errno = ENOMEM; - dict_unref (dict); - goto out; - } - - op_errno = 0; - op_ret = strlen (priv->path) + 1; - /* We should return from here */ - goto out; - } + ret = dict_set_dynstr_with_alloc(dict, (char *)name, entry_point); + + if (ret) { + op_errno = ENOMEM; + goto out; + } + + op_errno = 0; + op_ret = strlen(entry_point) + 1; + /* We should return from here */ + goto out; } + } stack_wind: - SVC_GET_SUBVOL_FROM_CTX (this, op_ret, op_errno, inode_type, ret, - loc->inode, subvolume, out); + SVC_GET_SUBVOL_FROM_CTX(this, op_ret, op_errno, inode_type, ret, loc->inode, + subvolume, out); - STACK_WIND_TAIL (frame, subvolume, subvolume->fops->getxattr, loc, name, - xdata); + STACK_WIND_TAIL(frame, subvolume, subvolume->fops->getxattr, loc, name, + xdata); - wind = _gf_true; + wind = _gf_true; out: - if (!wind) - SVC_STACK_UNWIND (getxattr, frame, op_ret, op_errno, - dict, NULL); + if (!wind) + SVC_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, NULL); - if (dict) - dict_unref (dict); + if (dict) + dict_unref(dict); - return 0; + return 0; } -/* XXX: This function is currently not used. Mark it 'static' when required */ -int32_t -svc_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, - dict_t *xdata) +/* XXX: This function is currently not used. Mark it '#if 0' when required */ +#if 0 +static int32_t +gf_svc_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *name, dict_t *xdata) { int32_t ret = -1; int inode_type = -1; @@ -927,281 +971,289 @@ out: NULL, NULL); return 0; } +#endif /* gf_svc_fgetxattr() is not used */ static int32_t -svc_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, - int32_t flags, dict_t *xdata) +gf_svc_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + int32_t flags, dict_t *xdata) { - int32_t ret = -1; - int inode_type = -1; - int op_ret = -1; - int op_errno = EINVAL; - gf_boolean_t wind = _gf_false; - - GF_VALIDATE_OR_GOTO ("svc", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, loc->inode, out); - - ret = svc_inode_ctx_get (this, loc->inode, &inode_type); - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, "failed to get inode context " - "for %s (gfid: %s)", loc->name, - uuid_utoa (loc->inode->gfid)); - op_ret = -1; - op_errno = EINVAL; - goto out; - } - - if (inode_type == NORMAL_INODE) { - STACK_WIND_TAIL (frame, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->setxattr, loc, dict, - flags, xdata); - } else { - op_ret = -1; - op_errno = EROFS; - goto out; - } - - wind = _gf_true; + int32_t ret = -1; + int inode_type = -1; + int op_ret = -1; + int op_errno = EINVAL; + gf_boolean_t wind = _gf_false; + + GF_VALIDATE_OR_GOTO("svc", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, loc, out); + GF_VALIDATE_OR_GOTO(this->name, loc->inode, out); + + ret = svc_inode_ctx_get(this, loc->inode, &inode_type); + if (ret < 0) { + op_ret = -1; + op_errno = EINVAL; + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + SVC_MSG_GET_INODE_CONTEXT_FAILED, "name=%s", loc->name, + "gfid=%s", uuid_utoa(loc->inode->gfid), NULL); + goto out; + } + + if (inode_type == NORMAL_INODE) { + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, + xdata); + } else { + op_ret = -1; + op_errno = EROFS; + goto out; + } + + wind = _gf_true; out: - if (!wind) - SVC_STACK_UNWIND (setxattr, frame, op_ret, op_errno, - NULL); + if (!wind) + SVC_STACK_UNWIND(setxattr, frame, op_ret, op_errno, NULL); - return 0; + return 0; } static int32_t -svc_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, - int32_t flags, dict_t *xdata) +gf_svc_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, + int32_t flags, dict_t *xdata) { - int32_t ret = -1; - int inode_type = -1; - int op_ret = -1; - int op_errno = EINVAL; - gf_boolean_t wind = _gf_false; - - GF_VALIDATE_OR_GOTO ("svc", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - GF_VALIDATE_OR_GOTO (this->name, fd->inode, out); - - ret = svc_inode_ctx_get (this, fd->inode, &inode_type); - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, "failed to get inode context " - "for %s", uuid_utoa (fd->inode->gfid)); - op_ret = -1; - op_errno = EINVAL; - goto out; - } - - if (inode_type == NORMAL_INODE) { - STACK_WIND_TAIL (frame, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fsetxattr, fd, dict, - flags, xdata); - } else { - op_ret = -1; - op_errno = EROFS; - goto out; - } - - wind = _gf_true; + int32_t ret = -1; + int inode_type = -1; + int op_ret = -1; + int op_errno = EINVAL; + gf_boolean_t wind = _gf_false; + + GF_VALIDATE_OR_GOTO("svc", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, fd, out); + GF_VALIDATE_OR_GOTO(this->name, fd->inode, out); + + ret = svc_inode_ctx_get(this, fd->inode, &inode_type); + if (ret < 0) { + op_ret = -1; + op_errno = EINVAL; + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + SVC_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s", + uuid_utoa(fd->inode->gfid), NULL); + goto out; + } + + if (inode_type == NORMAL_INODE) { + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, + xdata); + } else { + op_ret = -1; + op_errno = EROFS; + goto out; + } + + wind = _gf_true; out: - if (!wind) - STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, - NULL); + if (!wind) + STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, NULL); - return 0; + return 0; } static int32_t -svc_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, - dict_t *xdata) +gf_svc_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + dict_t *xdata) { - int inode_type = -1; - int ret = -1; - int op_ret = -1; - int op_errno = EINVAL; - gf_boolean_t wind = _gf_false; - - GF_VALIDATE_OR_GOTO ("svc", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, loc->inode, out); - - ret = svc_inode_ctx_get (this, loc->inode, &inode_type); - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, "failed to get the inode " - "context for %s (gfid: %s)", loc->name, - uuid_utoa (loc->inode->gfid)); - op_ret = -1; - op_errno = EINVAL; - goto out; - } - - if (inode_type == NORMAL_INODE) { - STACK_WIND_TAIL (frame, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->rmdir, loc, flags, - xdata); - } else { - op_ret = -1; - op_errno = EROFS; - goto out; - } - - wind = _gf_true; + int inode_type = -1; + int ret = -1; + int op_ret = -1; + int op_errno = EINVAL; + gf_boolean_t wind = _gf_false; + + GF_VALIDATE_OR_GOTO("svc", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, loc, out); + GF_VALIDATE_OR_GOTO(this->name, loc->inode, out); + + ret = svc_inode_ctx_get(this, loc->inode, &inode_type); + if (ret < 0) { + op_ret = -1; + op_errno = EINVAL; + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + SVC_MSG_GET_INODE_CONTEXT_FAILED, "name=%s", loc->name, + "gfid=%s", uuid_utoa(loc->inode->gfid), NULL); + goto out; + } + + if (inode_type == NORMAL_INODE) { + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rmdir, loc, flags, xdata); + } else { + op_ret = -1; + op_errno = EROFS; + goto out; + } + + wind = _gf_true; out: - if (!wind) - SVC_STACK_UNWIND (rmdir, frame, op_ret, op_errno, - NULL, NULL, NULL); - return 0; + if (!wind) + SVC_STACK_UNWIND(rmdir, frame, op_ret, op_errno, NULL, NULL, NULL); + return 0; } static int32_t -svc_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +gf_svc_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - int inode_type = -1; - int ret = -1; + int inode_type = -1; + int ret = -1; - if (op_ret < 0) - goto out; - - inode_type = NORMAL_INODE; - ret = svc_inode_ctx_set (this, inode, inode_type); - if (ret) - gf_log (this->name, GF_LOG_ERROR, "failed to set inode " - "context"); + if (op_ret < 0) + goto out; + inode_type = NORMAL_INODE; + ret = svc_inode_ctx_set(this, inode, inode_type); + if (ret) + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED, + NULL); out: - SVC_STACK_UNWIND (mkdir, frame, op_ret, op_errno, inode, - buf, preparent, postparent, xdata); - return 0; + SVC_STACK_UNWIND(mkdir, frame, op_ret, op_errno, inode, buf, preparent, + postparent, xdata); + return 0; } static int32_t -svc_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, - mode_t umask, dict_t *xdata) +gf_svc_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + mode_t umask, dict_t *xdata) { - int parent_type = -1; - int ret = -1; - int op_ret = -1; - int op_errno = EINVAL; - svc_private_t *priv = NULL; - gf_boolean_t wind = _gf_false; - - GF_VALIDATE_OR_GOTO ("svc", this, out); - GF_VALIDATE_OR_GOTO (this->name, this->private, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, loc->inode, out); - - priv = this->private; - - ret = svc_inode_ctx_get (this, loc->parent, &parent_type); - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, "failed to get the inode " - "context for %s", uuid_utoa (loc->parent->gfid)); - op_ret = -1; - op_errno = EINVAL; - goto out; - } - - if (strcmp (loc->name, priv->path) && parent_type == NORMAL_INODE) { - STACK_WIND (frame, svc_mkdir_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->mkdir, loc, mode, - umask, xdata); - } else { - op_ret = -1; - op_errno = EROFS; - goto out; - } - - wind = _gf_true; + int parent_type = -1; + int ret = -1; + int op_ret = -1; + int op_errno = EINVAL; + gf_boolean_t wind = _gf_false; + char entry_point[NAME_MAX + 1] = { + 0, + }; + + GF_VALIDATE_OR_GOTO("svc", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, loc, out); + GF_VALIDATE_OR_GOTO(this->name, loc->inode, out); + + ret = svc_inode_ctx_get(this, loc->parent, &parent_type); + if (ret < 0) { + op_ret = -1; + op_errno = EINVAL; + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + SVC_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s", + uuid_utoa(loc->parent->gfid), NULL); + goto out; + } + + if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) { + gf_smsg(this->name, GF_LOG_WARNING, op_errno, + SVC_MSG_COPY_ENTRY_POINT_FAILED, NULL); + goto out; + } + + if (strcmp(loc->name, entry_point) && parent_type == NORMAL_INODE) { + STACK_WIND(frame, gf_svc_mkdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata); + } else { + op_ret = -1; + op_errno = EROFS; + goto out; + } + + wind = _gf_true; out: - if (!wind) - SVC_STACK_UNWIND (mkdir, frame, op_ret, op_errno, NULL, NULL, - NULL, NULL, NULL); - return 0; + if (!wind) + SVC_STACK_UNWIND(mkdir, frame, op_ret, op_errno, NULL, NULL, NULL, NULL, + NULL); + return 0; } static int32_t -svc_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +gf_svc_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - int inode_type = -1; - int ret = -1; + int inode_type = -1; + int ret = -1; - if (op_ret < 0) - goto out; + if (op_ret < 0) + goto out; - inode_type = NORMAL_INODE; - ret = svc_inode_ctx_set (this, inode, inode_type); - if (ret) - gf_log (this->name, GF_LOG_ERROR, "failed to set inode " - "context"); + inode_type = NORMAL_INODE; + ret = svc_inode_ctx_set(this, inode, inode_type); + if (ret) + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED, + NULL); out: - SVC_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode, - buf, preparent, postparent, xdata); - return 0; + SVC_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, buf, preparent, + postparent, xdata); + return 0; } static int32_t -svc_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, - dev_t rdev, mode_t umask, dict_t *xdata) +gf_svc_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + dev_t rdev, mode_t umask, dict_t *xdata) { - int parent_type = -1; - int ret = -1; - int op_ret = -1; - int op_errno = EINVAL; - svc_private_t *priv = NULL; - gf_boolean_t wind = _gf_false; - - GF_VALIDATE_OR_GOTO ("svc", this, out); - GF_VALIDATE_OR_GOTO (this->name, this->private, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, loc->inode, out); - - priv = this->private; - - ret = svc_inode_ctx_get (this, loc->parent, &parent_type); - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, "failed to get the inode " - "context for %s", uuid_utoa (loc->parent->gfid)); - op_ret = -1; - op_errno = EINVAL; - goto out; - } - - if (strcmp (loc->name, priv->path) && parent_type == NORMAL_INODE) { - STACK_WIND (frame, svc_mknod_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->mknod, loc, mode, - rdev, umask, xdata); - } else { - op_ret = -1; - op_errno = EROFS; - goto out; - } - - wind = _gf_true; + int parent_type = -1; + int ret = -1; + int op_ret = -1; + int op_errno = EINVAL; + gf_boolean_t wind = _gf_false; + char entry_point[NAME_MAX + 1] = { + 0, + }; + + GF_VALIDATE_OR_GOTO("svc", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, loc, out); + GF_VALIDATE_OR_GOTO(this->name, loc->inode, out); + + ret = svc_inode_ctx_get(this, loc->parent, &parent_type); + if (ret < 0) { + op_ret = -1; + op_errno = EINVAL; + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + SVC_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s", + uuid_utoa(loc->parent->gfid), NULL); + goto out; + } + + if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) { + gf_smsg(this->name, GF_LOG_WARNING, op_errno, + SVC_MSG_COPY_ENTRY_POINT_FAILED, NULL); + goto out; + } + + if (strcmp(loc->name, entry_point) && parent_type == NORMAL_INODE) { + STACK_WIND(frame, gf_svc_mknod_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, + xdata); + } else { + op_ret = -1; + op_errno = EROFS; + goto out; + } + + wind = _gf_true; out: - if (!wind) - SVC_STACK_UNWIND (mknod, frame, op_ret, op_errno, NULL, NULL, - NULL, NULL, NULL); - return 0; + if (!wind) + SVC_STACK_UNWIND(mknod, frame, op_ret, op_errno, NULL, NULL, NULL, NULL, + NULL); + return 0; } /* If the flags of the open call contain O_WRONLY or O_RDWR and the inode is @@ -1209,382 +1261,447 @@ out: STACK_WIND the call to the first child of svc xlator. */ static int32_t -svc_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - fd_t *fd, dict_t *xdata) +gf_svc_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + fd_t *fd, dict_t *xdata) { - xlator_t *subvolume = NULL; - int inode_type = -1; - int op_ret = -1; - int op_errno = EINVAL; - int ret = -1; - gf_boolean_t wind = _gf_false; - - GF_VALIDATE_OR_GOTO ("svc", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, loc->inode, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - - /* Another way is to STACK_WIND to normal subvolume, if inode - type is not there in the context. If the file actually resides - in snapshots, then ENOENT would be returned. Needs more analysis. - */ - SVC_GET_SUBVOL_FROM_CTX (this, op_ret, op_errno, inode_type, ret, - loc->inode, subvolume, out); - - if (((flags & O_ACCMODE) == O_WRONLY) || - ((flags & O_ACCMODE) == O_RDWR)) { - if (subvolume != FIRST_CHILD (this)) { - op_ret = -1; - op_errno = EINVAL; - goto out; - } - } - - STACK_WIND_TAIL (frame, subvolume, subvolume->fops->open, loc, - flags, fd, xdata); - - wind = _gf_true; + xlator_t *subvolume = NULL; + int inode_type = -1; + int op_ret = -1; + int op_errno = EINVAL; + int ret = -1; + gf_boolean_t wind = _gf_false; + + GF_VALIDATE_OR_GOTO("svc", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, loc, out); + GF_VALIDATE_OR_GOTO(this->name, loc->inode, out); + GF_VALIDATE_OR_GOTO(this->name, fd, out); + + /* Another way is to STACK_WIND to normal subvolume, if inode + type is not there in the context. If the file actually resides + in snapshots, then ENOENT would be returned. Needs more analysis. + */ + SVC_GET_SUBVOL_FROM_CTX(this, op_ret, op_errno, inode_type, ret, loc->inode, + subvolume, out); + + if (((flags & O_ACCMODE) == O_WRONLY) || ((flags & O_ACCMODE) == O_RDWR)) { + if (subvolume != FIRST_CHILD(this)) { + op_ret = -1; + op_errno = EINVAL; + goto out; + } + } + + STACK_WIND_TAIL(frame, subvolume, subvolume->fops->open, loc, flags, fd, + xdata); + + wind = _gf_true; out: - if (!wind) - SVC_STACK_UNWIND (open, frame, op_ret, op_errno, NULL, - NULL); - return 0; + if (!wind) + SVC_STACK_UNWIND(open, frame, op_ret, op_errno, NULL, NULL); + return 0; } static int32_t -svc_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, - struct iatt *stbuf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +gf_svc_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, + struct iatt *stbuf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - int inode_type = -1; - int ret = -1; + int inode_type = -1; + int ret = -1; - if (op_ret < 0) - goto out; + if (op_ret < 0) + goto out; - inode_type = NORMAL_INODE; - ret = svc_inode_ctx_set (this, inode, inode_type); - if (ret) - gf_log (this->name, GF_LOG_ERROR, "failed to set inode " - "context"); + inode_type = NORMAL_INODE; + ret = svc_inode_ctx_set(this, inode, inode_type); + if (ret) + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED, + NULL); out: - SVC_STACK_UNWIND (create, frame, op_ret, op_errno, fd, - inode, stbuf, preparent, postparent, xdata); + SVC_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, stbuf, + preparent, postparent, xdata); - return 0; + return 0; } static int32_t -svc_create (call_frame_t *frame, xlator_t *this, - loc_t *loc, int32_t flags, mode_t mode, - mode_t umask, fd_t *fd, dict_t *xdata) +gf_svc_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) { - int parent_type = -1; - int ret = -1; - int op_ret = -1; - int op_errno = EINVAL; - svc_private_t *priv = NULL; - gf_boolean_t wind = _gf_false; - - GF_VALIDATE_OR_GOTO ("svc", this, out); - GF_VALIDATE_OR_GOTO (this->name, this->private, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, loc->inode, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - - priv = this->private; - - ret = svc_inode_ctx_get (this, loc->parent, &parent_type); - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, "failed to get the inode " - "context for %s", uuid_utoa (loc->parent->gfid)); - op_ret = -1; - op_errno = EINVAL; - goto out; - } - - if (strcmp (loc->name, priv->path) && parent_type == NORMAL_INODE) { - STACK_WIND (frame, svc_create_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->create, loc, flags, - mode, umask, fd, xdata); - } else { - op_ret = -1; - op_errno = EROFS; - goto out; - } - - wind = _gf_true; + int parent_type = -1; + int ret = -1; + int op_ret = -1; + int op_errno = EINVAL; + gf_boolean_t wind = _gf_false; + char entry_point[NAME_MAX + 1] = { + 0, + }; + + GF_VALIDATE_OR_GOTO("svc", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, loc, out); + GF_VALIDATE_OR_GOTO(this->name, loc->inode, out); + GF_VALIDATE_OR_GOTO(this->name, fd, out); + + ret = svc_inode_ctx_get(this, loc->parent, &parent_type); + if (ret < 0) { + op_ret = -1; + op_errno = EINVAL; + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + SVC_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s", + uuid_utoa(loc->parent->gfid), NULL); + goto out; + } + + if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) { + gf_smsg(this->name, GF_LOG_WARNING, op_errno, + SVC_MSG_COPY_ENTRY_POINT_FAILED, NULL); + goto out; + } + + if (strcmp(loc->name, entry_point) && parent_type == NORMAL_INODE) { + STACK_WIND(frame, gf_svc_create_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd, + xdata); + } else { + op_ret = -1; + op_errno = EROFS; + goto out; + } + + wind = _gf_true; out: - if (!wind) - SVC_STACK_UNWIND (create, frame, op_ret, op_errno, - NULL, NULL, NULL, NULL, NULL, NULL); - return 0; + if (!wind) + SVC_STACK_UNWIND(create, frame, op_ret, op_errno, NULL, NULL, NULL, + NULL, NULL, NULL); + return 0; } static int32_t -svc_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +gf_svc_symlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - int inode_type = -1; - int ret = -1; + int inode_type = -1; + int ret = -1; - if (op_ret < 0) - goto out; + if (op_ret < 0) + goto out; - inode_type = NORMAL_INODE; - ret = svc_inode_ctx_set (this, inode, inode_type); - if (ret) - gf_log (this->name, GF_LOG_ERROR, "failed to set inode " - "context"); + inode_type = NORMAL_INODE; + ret = svc_inode_ctx_set(this, inode, inode_type); + if (ret) + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED, + NULL); out: - SVC_STACK_UNWIND (symlink, frame, op_ret, op_errno, inode, - buf, preparent, postparent, xdata); + SVC_STACK_UNWIND(symlink, frame, op_ret, op_errno, inode, buf, preparent, + postparent, xdata); - return 0; + return 0; } static int32_t -svc_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath, - loc_t *loc, mode_t umask, dict_t *xdata) +gf_svc_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath, + loc_t *loc, mode_t umask, dict_t *xdata) { - int parent_type = -1; - int op_ret = -1; - int op_errno = EINVAL; - int ret = -1; - svc_private_t *priv = NULL; - gf_boolean_t wind = _gf_false; - - GF_VALIDATE_OR_GOTO ("svc", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - GF_VALIDATE_OR_GOTO (this->name, this->private, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, loc->inode, out); - - priv = this->private; - - ret = svc_inode_ctx_get (this, loc->parent, &parent_type); - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, "failed to get the inode " - "context for %s", uuid_utoa (loc->parent->gfid)); - op_ret = -1; - op_errno = EINVAL; - goto out; - } - - if (strcmp (loc->name, priv->path) && parent_type == NORMAL_INODE) { - STACK_WIND (frame, svc_symlink_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->symlink, linkpath, loc, - umask, xdata); - } else { - op_ret = -1; - op_errno = EROFS; - goto out; - } - - wind = _gf_true; + int parent_type = -1; + int op_ret = -1; + int op_errno = EINVAL; + int ret = -1; + gf_boolean_t wind = _gf_false; + char entry_point[NAME_MAX + 1] = { + 0, + }; + + GF_VALIDATE_OR_GOTO("svc", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, loc, out); + GF_VALIDATE_OR_GOTO(this->name, loc->inode, out); + + ret = svc_inode_ctx_get(this, loc->parent, &parent_type); + if (ret < 0) { + op_ret = -1; + op_errno = EINVAL; + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + SVC_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s", + uuid_utoa(loc->parent->gfid), NULL); + goto out; + } + + if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) { + gf_smsg(this->name, GF_LOG_WARNING, op_errno, + SVC_MSG_COPY_ENTRY_POINT_FAILED, NULL); + goto out; + } + + if (strcmp(loc->name, entry_point) && parent_type == NORMAL_INODE) { + STACK_WIND(frame, gf_svc_symlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->symlink, linkpath, loc, umask, + xdata); + } else { + op_ret = -1; + op_errno = EROFS; + goto out; + } + + wind = _gf_true; out: - if (!wind) - SVC_STACK_UNWIND (symlink, frame, op_ret, op_errno, - NULL, NULL, NULL, NULL, NULL); - return 0; + if (!wind) + SVC_STACK_UNWIND(symlink, frame, op_ret, op_errno, NULL, NULL, NULL, + NULL, NULL); + return 0; } static int32_t -svc_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, - dict_t *xdata) +gf_svc_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + dict_t *xdata) { - int inode_type = -1; - int op_ret = -1; - int op_errno = EINVAL; - int ret = -1; - gf_boolean_t wind = _gf_false; - - GF_VALIDATE_OR_GOTO ("svc", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, loc->inode, out); - - ret = svc_inode_ctx_get (this, loc->inode, &inode_type); - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, "failed to get the inode " - "context for %s", uuid_utoa (loc->parent->gfid)); - op_ret = -1; - op_errno = EINVAL; - goto out; - } - - if (inode_type == NORMAL_INODE) { - STACK_WIND_TAIL (frame, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->unlink, loc, flags, - xdata); - } else { - op_ret = -1; - op_errno = EROFS; - goto out; - } - - wind = _gf_true; + int inode_type = -1; + int op_ret = -1; + int op_errno = EINVAL; + int ret = -1; + gf_boolean_t wind = _gf_false; + + GF_VALIDATE_OR_GOTO("svc", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, loc, out); + GF_VALIDATE_OR_GOTO(this->name, loc->inode, out); + + ret = svc_inode_ctx_get(this, loc->inode, &inode_type); + if (ret < 0) { + op_ret = -1; + op_errno = EINVAL; + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + SVC_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s", + uuid_utoa(loc->parent->gfid), NULL); + goto out; + } + + if (inode_type == NORMAL_INODE) { + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, loc, flags, xdata); + } else { + op_ret = -1; + op_errno = EROFS; + goto out; + } + + wind = _gf_true; out: - if (!wind) - SVC_STACK_UNWIND (unlink, frame, op_ret, op_errno, NULL, NULL, - NULL); - return 0; + if (!wind) + SVC_STACK_UNWIND(unlink, frame, op_ret, op_errno, NULL, NULL, NULL); + return 0; } static int32_t -svc_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t offset, uint32_t flags, dict_t *xdata) +gf_svc_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) { - int inode_type = -1; - xlator_t *subvolume = NULL; - int ret = -1; - int op_ret = -1; - int op_errno = EINVAL; - gf_boolean_t wind = _gf_false; + int inode_type = -1; + xlator_t *subvolume = NULL; + int ret = -1; + int op_ret = -1; + int op_errno = EINVAL; + gf_boolean_t wind = _gf_false; - GF_VALIDATE_OR_GOTO ("svc", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - GF_VALIDATE_OR_GOTO (this->name, fd->inode, out); + GF_VALIDATE_OR_GOTO("svc", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, fd, out); + GF_VALIDATE_OR_GOTO(this->name, fd->inode, out); - SVC_GET_SUBVOL_FROM_CTX (this, op_ret, op_errno, inode_type, ret, - fd->inode, subvolume, out); + SVC_GET_SUBVOL_FROM_CTX(this, op_ret, op_errno, inode_type, ret, fd->inode, + subvolume, out); - STACK_WIND_TAIL (frame, subvolume, subvolume->fops->readv, - fd, size, offset, flags, xdata); + STACK_WIND_TAIL(frame, subvolume, subvolume->fops->readv, fd, size, offset, + flags, xdata); - wind = _gf_true; + wind = _gf_true; out: - if (!wind) - SVC_STACK_UNWIND (readv, frame, op_ret, op_errno, NULL, 0, NULL, - NULL, NULL); - return 0; + if (!wind) + SVC_STACK_UNWIND(readv, frame, op_ret, op_errno, NULL, 0, NULL, NULL, + NULL); + return 0; } static int32_t -svc_readlink (call_frame_t *frame, xlator_t *this, - loc_t *loc, size_t size, dict_t *xdata) +gf_svc_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size, + dict_t *xdata) { - int inode_type = -1; - xlator_t *subvolume = NULL; - int ret = -1; - int op_ret = -1; - int op_errno = EINVAL; - gf_boolean_t wind = _gf_false; + int inode_type = -1; + xlator_t *subvolume = NULL; + int ret = -1; + int op_ret = -1; + int op_errno = EINVAL; + gf_boolean_t wind = _gf_false; - GF_VALIDATE_OR_GOTO ("svc", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, loc->inode, out); + GF_VALIDATE_OR_GOTO("svc", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, loc, out); + GF_VALIDATE_OR_GOTO(this->name, loc->inode, out); - SVC_GET_SUBVOL_FROM_CTX (this, op_ret, op_errno, inode_type, ret, - loc->inode, subvolume, out); + SVC_GET_SUBVOL_FROM_CTX(this, op_ret, op_errno, inode_type, ret, loc->inode, + subvolume, out); - STACK_WIND_TAIL (frame, subvolume, subvolume->fops->readlink, loc, size, - xdata); + STACK_WIND_TAIL(frame, subvolume, subvolume->fops->readlink, loc, size, + xdata); - wind = _gf_true; + wind = _gf_true; out: - if (!wind) - STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, NULL, NULL, - NULL); - return 0; + if (!wind) + STACK_UNWIND_STRICT(readlink, frame, op_ret, op_errno, NULL, NULL, + NULL); + return 0; } static int32_t -svc_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask, - dict_t *xdata) +gf_svc_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask, + dict_t *xdata) { - int ret = -1; - int inode_type = -1; - xlator_t *subvolume = NULL; - int op_ret = -1; - int op_errno = EINVAL; - gf_boolean_t wind = _gf_false; + int ret = -1; + int inode_type = -1; + xlator_t *subvolume = NULL; + int op_ret = -1; + int op_errno = EINVAL; + gf_boolean_t wind = _gf_false; - GF_VALIDATE_OR_GOTO ("svc", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, loc->inode, out); + GF_VALIDATE_OR_GOTO("svc", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, loc, out); + GF_VALIDATE_OR_GOTO(this->name, loc->inode, out); - SVC_GET_SUBVOL_FROM_CTX (this, op_ret, op_errno, inode_type, ret, - loc->inode, subvolume, out); + SVC_GET_SUBVOL_FROM_CTX(this, op_ret, op_errno, inode_type, ret, loc->inode, + subvolume, out); - STACK_WIND_TAIL (frame, subvolume, subvolume->fops->access, loc, mask, - xdata); + STACK_WIND_TAIL(frame, subvolume, subvolume->fops->access, loc, mask, + xdata); - wind = _gf_true; + wind = _gf_true; out: - if (!wind) - SVC_STACK_UNWIND (access, frame, op_ret, op_errno, NULL); + if (!wind) + SVC_STACK_UNWIND(access, frame, op_ret, op_errno, NULL); - return 0; + return 0; } -static int32_t -svc_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, - size_t size, off_t off, - dict_t *xdata) +int32_t +gf_svc_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, + dict_t *xdata) { - int inode_type = -1; - xlator_t *subvolume = NULL; - int ret = -1; - int op_ret = -1; - int op_errno = EINVAL; - gf_boolean_t wind = _gf_false; - svc_fd_t *svc_fd = NULL; - gf_dirent_t entries; - - INIT_LIST_HEAD (&entries); - - GF_VALIDATE_OR_GOTO ("svc", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - GF_VALIDATE_OR_GOTO (this->name, fd->inode, out); - - svc_fd = svc_fd_ctx_get_or_new (this, fd); - if (!svc_fd) - gf_log (this->name, GF_LOG_ERROR, "failed to get the fd " - "context for the inode %s", - uuid_utoa (fd->inode->gfid)); - else { - if (svc_fd->entry_point_handled && off == svc_fd->last_offset) { - op_ret = 0; - op_errno = ENOENT; - goto out; - } - } + gf_dirent_t *entry = NULL; + gf_dirent_t *tmpentry = NULL; + svc_local_t *local = NULL; + char entry_point[NAME_MAX + 1] = { + 0, + }; + + if (op_ret < 0) + goto out; + + local = frame->local; + + /* If .snaps pre-exists, then it should not be listed + * in the NORMAL INODE directory when USS is enabled, + * so filter the .snaps entry if exists. + * However it is OK to list .snaps in VIRTUAL world + */ + if (local->subvolume != FIRST_CHILD(this)) + goto out; + + /* + * Better to goto out if getting the entry point + * fails. We might end up sending the directory + * entry for the snapview entry point in the readdir + * response. But, the intention is to avoid the race + * condition where priv->path is being changed in + * reconfigure while this is accessing it. + */ + if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) { + gf_smsg(this->name, GF_LOG_WARNING, op_errno, + SVC_MSG_COPY_ENTRY_POINT_FAILED, NULL); + goto out; + } + + list_for_each_entry_safe(entry, tmpentry, &entries->list, list) + { + if (strcmp(entry_point, entry->d_name) == 0) + gf_dirent_entry_free(entry); + } - SVC_GET_SUBVOL_FROM_CTX (this, op_ret, op_errno, inode_type, ret, - fd->inode, subvolume, out); - - STACK_WIND_TAIL (frame, subvolume, subvolume->fops->readdir, fd, size, - off, xdata); +out: + SVC_STACK_UNWIND(readdir, frame, op_ret, op_errno, entries, xdata); + return 0; +} - wind = _gf_true; +static int32_t +gf_svc_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *xdata) +{ + int inode_type = -1; + xlator_t *subvolume = NULL; + svc_local_t *local = NULL; + int ret = -1; + int op_ret = -1; + int op_errno = EINVAL; + gf_boolean_t wind = _gf_false; + svc_fd_t *svc_fd = NULL; + gf_dirent_t entries; + + INIT_LIST_HEAD(&entries); + + GF_VALIDATE_OR_GOTO("svc", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, fd, out); + GF_VALIDATE_OR_GOTO(this->name, fd->inode, out); + + svc_fd = svc_fd_ctx_get_or_new(this, fd); + if (!svc_fd) + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED, + "gfid=%s", uuid_utoa(fd->inode->gfid), NULL); + else { + if (svc_fd->entry_point_handled && off == svc_fd->last_offset) { + op_ret = 0; + op_errno = ENOENT; + goto out; + } + } + + SVC_GET_SUBVOL_FROM_CTX(this, op_ret, op_errno, inode_type, ret, fd->inode, + subvolume, out); + + local = mem_get0(this->local_pool); + if (!local) { + gf_smsg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_NO_MEMORY, + "inode-gfid=%s", uuid_utoa(fd->inode->gfid), NULL); + goto out; + } + local->subvolume = subvolume; + frame->local = local; + + STACK_WIND(frame, gf_svc_readdir_cbk, subvolume, subvolume->fops->readdir, + fd, size, off, xdata); + + wind = _gf_true; out: - if (!wind) - SVC_STACK_UNWIND (readdir, frame, op_ret, op_errno, &entries, - NULL); + if (!wind) + SVC_STACK_UNWIND(readdir, frame, op_ret, op_errno, &entries, NULL); - gf_dirent_free (&entries); + gf_dirent_free(&entries); - return 0; + return 0; } /* @@ -1612,801 +1729,1063 @@ out: */ static int32_t -svc_readdirp_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *xdata, - struct iatt *postparent) +gf_svc_readdirp_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, + struct iatt *postparent) { - gf_dirent_t entries; - gf_dirent_t *entry = NULL; - svc_private_t *private = NULL; - svc_fd_t *svc_fd = NULL; - svc_local_t *local = NULL; - int inode_type = -1; - int ret = -1; - - GF_VALIDATE_OR_GOTO ("snapview-client", this, out); - GF_VALIDATE_OR_GOTO (this->name, this->private, out); - - private = this->private; - INIT_LIST_HEAD (&entries.list); - - local = frame->local; - - if (op_ret) { - op_ret = 0; - op_errno = ENOENT; - goto out; - } - - svc_fd = svc_fd_ctx_get (this, local->fd); - if (!svc_fd) { - gf_log (this->name, GF_LOG_ERROR, "failed to get the fd " - "context for the inode %s", - uuid_utoa (local->fd->inode->gfid)); - op_ret = 0; - op_errno = ENOENT; - goto out; - } - - entry = gf_dirent_for_name (private->path); - if (!entry) { - gf_log (this->name, GF_LOG_ERROR, "failed to allocate memory " - "for the entry %s", private->path); - op_ret = 0; - op_errno = ENOMEM; - goto out; - } - - entry->inode = inode_ref (inode); - entry->d_off = svc_fd->last_offset + 22; - entry->d_ino = buf->ia_ino; - entry->d_type = DT_DIR; - entry->d_stat = *buf; - inode_type = VIRTUAL_INODE; - ret = svc_inode_ctx_set (this, entry->inode, inode_type); - if (ret) - gf_log (this->name, GF_LOG_ERROR, "failed to set the inode " - "context"); - - list_add_tail (&entry->list, &entries.list); - op_ret = 1; - svc_fd->last_offset = entry->d_off; - svc_fd->entry_point_handled = _gf_true; + gf_dirent_t entries; + gf_dirent_t *entry = NULL; + svc_fd_t *svc_fd = NULL; + svc_local_t *local = NULL; + int inode_type = -1; + int ret = -1; + char entry_point[NAME_MAX + 1] = { + 0, + }; + + GF_VALIDATE_OR_GOTO("snapview-client", this, out); + + INIT_LIST_HEAD(&entries.list); + + local = frame->local; + + if (op_ret) { + if (op_errno == ESTALE && !local->revalidate) { + local->revalidate = 1; + ret = gf_svc_special_dir_revalidate_lookup(frame, this, xdata); + + if (!ret) + return 0; + } + op_ret = 0; + op_errno = ENOENT; + goto out; + } + + svc_fd = svc_fd_ctx_get(this, local->fd); + if (!svc_fd) { + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED, + "gfid=%s", uuid_utoa(local->fd->inode->gfid), NULL); + op_ret = 0; + op_errno = ENOENT; + goto out; + } + + if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) { + gf_smsg(this->name, GF_LOG_WARNING, 0, SVC_MSG_COPY_ENTRY_POINT_FAILED, + NULL); + op_ret = 0; + op_errno = ENOENT; + goto out; + } + + entry = gf_dirent_for_name(entry_point); + if (!entry) { + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_NO_MEMORY, + "entry-point=%s", entry_point, NULL); + op_ret = 0; + op_errno = ENOMEM; + goto out; + } + + entry->inode = inode_ref(inode); + entry->d_off = svc_fd->last_offset + 22; + entry->d_ino = buf->ia_ino; + entry->d_type = DT_DIR; + entry->d_stat = *buf; + inode_type = VIRTUAL_INODE; + ret = svc_inode_ctx_set(this, entry->inode, inode_type); + if (ret) + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED, + "entry-name=%s", entry->d_name, NULL); + + list_add_tail(&entry->list, &entries.list); + op_ret = 1; + svc_fd->last_offset = entry->d_off; + svc_fd->entry_point_handled = _gf_true; out: - SVC_STACK_UNWIND (readdirp, frame, op_ret, op_errno, &entries, - local->xdata); + SVC_STACK_UNWIND(readdirp, frame, op_ret, op_errno, &entries, + local ? local->xdata : NULL); - gf_dirent_free (&entries); + gf_dirent_free(&entries); - return 0; + return 0; } -gf_boolean_t -svc_readdir_on_special_dir (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - gf_dirent_t *entries, dict_t *xdata) +int +gf_svc_special_dir_revalidate_lookup(call_frame_t *frame, xlator_t *this, + dict_t *xdata) { - svc_local_t *local = NULL; - svc_private_t *private = NULL; - inode_t *inode = NULL; - fd_t *fd = NULL; - char *path = NULL; - loc_t *loc = NULL; - dict_t *tmp_xdata = NULL; - int ret = -1; - gf_boolean_t unwind = _gf_true; - svc_fd_t *svc_fd = NULL; - - GF_VALIDATE_OR_GOTO ("snapview-client", this, out); - GF_VALIDATE_OR_GOTO (this->name, this->private, out); - - private = this->private; - local = frame->local; - - loc = &local->loc; - fd = local->fd; - svc_fd = svc_fd_ctx_get (this, fd); - if (!svc_fd) { - gf_log (this->name, GF_LOG_ERROR, "failed to get the fd " - "context for the inode %s", - uuid_utoa (fd->inode->gfid)); - goto out; - } - - /* - * check if its end of readdir operation from posix, if special_dir - * option is set, if readdir is done on special directory and if - * readdirp is from normal regular graph. - */ - - if (!private->show_entry_point) - goto out; - - if (op_ret == 0 && op_errno == ENOENT && private->special_dir && - strcmp (private->special_dir, "") && svc_fd->special_dir && - local->subvolume == FIRST_CHILD (this)) { - inode = inode_grep (fd->inode->table, fd->inode, - private->path); - if (!inode) { - inode = inode_new (fd->inode->table); - if (!inode) { - gf_log (this->name, GF_LOG_ERROR, "failed to " - "allocate new inode"); - goto out; - } - } - - gf_uuid_copy (local->loc.pargfid, fd->inode->gfid); - gf_uuid_copy (local->loc.gfid, inode->gfid); - if (gf_uuid_is_null (inode->gfid)) - ret = inode_path (fd->inode, private->path, &path); - else - ret = inode_path (inode, NULL, &path); - - if (ret < 0) - goto out; - loc->path = gf_strdup (path); - if (loc->path) { - if (!loc->name || - (loc->name && !strcmp (loc->name, ""))) { - loc->name = strrchr (loc->path, '/'); - if (loc->name) - loc->name++; - } - } - - loc->inode = inode; - loc->parent = inode_ref (fd->inode); - tmp_xdata = dict_new (); - if (!tmp_xdata) - goto out; - ret = dict_set_str (tmp_xdata, "entry-point", "true"); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "failed to set dict"); - goto out; - } - - local->cookie = cookie; - local->xdata = dict_ref (xdata); - STACK_WIND (frame, svc_readdirp_lookup_cbk, - SECOND_CHILD (this), - SECOND_CHILD (this)->fops->lookup, loc, tmp_xdata); - unwind = _gf_false; - } - + svc_local_t *local = NULL; + loc_t *loc = NULL; + dict_t *tmp_xdata = NULL; + char *path = NULL; + int ret = -1; + char entry_point[NAME_MAX + 1] = { + 0, + }; + + GF_VALIDATE_OR_GOTO("snapview-client", this, out); + + local = frame->local; + loc = &local->loc; + + if (local->xdata) { + dict_unref(local->xdata); + local->xdata = NULL; + } + + if (xdata) + local->xdata = dict_ref(xdata); + + inode_unref(loc->inode); + loc->inode = inode_new(loc->parent->table); + if (!loc->inode) { + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, SVC_MSG_ALLOC_INODE_FAILED, + NULL); + goto out; + } + + if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) { + gf_smsg(this->name, GF_LOG_WARNING, 0, SVC_MSG_COPY_ENTRY_POINT_FAILED, + NULL); + goto out; + } + + gf_uuid_copy(local->loc.gfid, loc->inode->gfid); + ret = inode_path(loc->parent, entry_point, &path); + if (ret < 0) + goto out; + + if (loc->path) + GF_FREE((char *)loc->path); + + loc->path = gf_strdup(path); + if (loc->path) { + if (!loc->name || (loc->name && !strcmp(loc->name, ""))) { + loc->name = strrchr(loc->path, '/'); + if (loc->name) + loc->name++; + } + } else + loc->path = NULL; + + tmp_xdata = dict_new(); + if (!tmp_xdata) { + ret = -1; + goto out; + } + + ret = dict_set_str(tmp_xdata, "entry-point", "true"); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_DICT_SET_FAILED, NULL); + goto out; + } + + STACK_WIND(frame, gf_svc_readdirp_lookup_cbk, SECOND_CHILD(this), + SECOND_CHILD(this)->fops->lookup, loc, tmp_xdata); out: - if (tmp_xdata) - dict_unref (tmp_xdata); + if (tmp_xdata) + dict_unref(tmp_xdata); - GF_FREE (path); - return unwind; + GF_FREE(path); + return ret; } -static int32_t -svc_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - gf_dirent_t *entries, dict_t *xdata) +static gf_boolean_t +gf_svc_readdir_on_special_dir(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + gf_dirent_t *entries, dict_t *xdata) { - gf_dirent_t *entry = NULL; - svc_local_t *local = NULL; - gf_boolean_t real = _gf_true; - int inode_type = -1; - int ret = -1; - svc_fd_t *svc_fd = NULL; - gf_boolean_t unwind = _gf_true; - - GF_VALIDATE_OR_GOTO ("snapview-client", this, out); - - if (op_ret < 0) + svc_local_t *local = NULL; + svc_private_t *private = NULL; + inode_t *inode = NULL; + fd_t *fd = NULL; + char *path = NULL; + loc_t *loc = NULL; + dict_t *tmp_xdata = NULL; + int ret = -1; + gf_boolean_t unwind = _gf_true; + svc_fd_t *svc_fd = NULL; + char entry_point[NAME_MAX + 1] = { + 0, + }; + + GF_VALIDATE_OR_GOTO("snapview-client", this, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + + private + = this->private; + local = frame->local; + + loc = &local->loc; + fd = local->fd; + svc_fd = svc_fd_ctx_get(this, fd); + if (!svc_fd) { + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED, + "gfid=%s", uuid_utoa(fd->inode->gfid), NULL); + goto out; + } + + /* + * check if its end of readdir operation from posix, if special_dir + * option is set, if readdir is done on special directory and if + * readdirp is from normal regular graph. + */ + + if (!private->show_entry_point) + goto out; + + if (op_ret == 0 && op_errno == ENOENT && private->special_dir && + strcmp(private->special_dir, "") && svc_fd->special_dir && + local->subvolume == FIRST_CHILD(this)) { + if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) { + gf_smsg(this->name, GF_LOG_WARNING, 0, + SVC_MSG_GET_FD_CONTEXT_FAILED, NULL); + goto out; + } + + inode = inode_grep(fd->inode->table, fd->inode, entry_point); + if (!inode) { + inode = inode_new(fd->inode->table); + if (!inode) { + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_ALLOC_INODE_FAILED, + NULL); goto out; + } + } - local = frame->local; + gf_uuid_copy(local->loc.pargfid, fd->inode->gfid); + gf_uuid_copy(local->loc.gfid, inode->gfid); + if (gf_uuid_is_null(inode->gfid)) + ret = inode_path(fd->inode, entry_point, &path); + else + ret = inode_path(inode, NULL, &path); - svc_fd = svc_fd_ctx_get (this, local->fd); - if (!svc_fd) { - gf_log (this->name, GF_LOG_WARNING, "failed to get the fd " - "context for the gfid %s", - uuid_utoa (local->fd->inode->gfid)); + if (ret < 0) + goto out; + loc->path = gf_strdup(path); + if (loc->path) { + if (!loc->name || (loc->name && !strcmp(loc->name, ""))) { + loc->name = strrchr(loc->path, '/'); + if (loc->name) + loc->name++; + } + } + + loc->inode = inode; + loc->parent = inode_ref(fd->inode); + tmp_xdata = dict_new(); + if (!tmp_xdata) + goto out; + ret = dict_set_str(tmp_xdata, "entry-point", "true"); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_DICT_SET_FAILED, NULL); + goto out; } - if (local->subvolume == FIRST_CHILD (this)) - real = _gf_true; - else - real = _gf_false; - - list_for_each_entry (entry, &entries->list, list) { - if (!entry->inode) - continue; - - if (real) - inode_type = NORMAL_INODE; - else - inode_type = VIRTUAL_INODE; - - ret = svc_inode_ctx_set (this, entry->inode, inode_type); - if (ret) - gf_log (this->name, GF_LOG_ERROR, "failed to set inode " - "context"); - if (svc_fd) - svc_fd->last_offset = entry->d_off; + local->cookie = cookie; + if (local->xdata) { + dict_unref(local->xdata); + local->xdata = NULL; } + if (xdata) + local->xdata = dict_ref(xdata); - unwind = svc_readdir_on_special_dir (frame, cookie, this, op_ret, - op_errno, entries, xdata); + STACK_WIND(frame, gf_svc_readdirp_lookup_cbk, SECOND_CHILD(this), + SECOND_CHILD(this)->fops->lookup, loc, tmp_xdata); + unwind = _gf_false; + } out: - if (unwind) - SVC_STACK_UNWIND (readdirp, frame, op_ret, op_errno, entries, - xdata); + if (tmp_xdata) + dict_unref(tmp_xdata); - return 0; + GF_FREE(path); + return unwind; } static int32_t -svc_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, - size_t size, off_t off, - dict_t *xdata) +gf_svc_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, + dict_t *xdata) { - int inode_type = -1; - xlator_t *subvolume = NULL; - svc_local_t *local = NULL; - int ret = -1; - int op_ret = -1; - int op_errno = EINVAL; - gf_boolean_t wind = _gf_false; - svc_fd_t *svc_fd = NULL; - gf_dirent_t entries; - - INIT_LIST_HEAD (&entries.list); - - GF_VALIDATE_OR_GOTO ("svc", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - GF_VALIDATE_OR_GOTO (this->name, fd->inode, out); - - local = mem_get0 (this->local_pool); - if (!local) { - gf_log (this->name, GF_LOG_ERROR, "failed to allocate local"); - op_errno = ENOMEM; - goto out; - } + gf_dirent_t *entry = NULL; + gf_dirent_t *tmpentry = NULL; + svc_local_t *local = NULL; + int inode_type = -1; + int ret = -1; + svc_fd_t *svc_fd = NULL; + gf_boolean_t unwind = _gf_true; + char entry_point[NAME_MAX + 1] = { + 0, + }; + + if (op_ret < 0) + goto out; + + GF_VALIDATE_OR_GOTO("snapview-client", this, out); + + local = frame->local; + + svc_fd = svc_fd_ctx_get(this, local->fd); + if (!svc_fd) { + gf_smsg(this->name, GF_LOG_WARNING, 0, SVC_MSG_GET_FD_CONTEXT_FAILED, + "gfid=%s", uuid_utoa(local->fd->inode->gfid), NULL); + } + + if (local->subvolume == FIRST_CHILD(this)) + inode_type = NORMAL_INODE; + else + inode_type = VIRTUAL_INODE; - /* - * This is mainly for samba shares (or windows clients). As part of - * readdirp on the directory used as samba share, the entry point - * directory would have been added at the end. So when a new readdirp - * request comes, we have to check if the entry point has been handled - * or not in readdirp. That information and the offset used for it - * is remembered in fd context. If it has been handled, then simply - * unwind indication end of readdir operation. + /* + * Better to goto out and return whatever is there in the + * readdirp response (even if the readdir response contains + * a directory entry for the snapshot entry point). Otherwise + * if we ignore the error, then there is a chance of race + * condition where, priv->path is changed in reconfigure + */ + if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) { + gf_smsg(this->name, GF_LOG_WARNING, 0, SVC_MSG_COPY_ENTRY_POINT_FAILED, + NULL); + goto out; + } + + list_for_each_entry_safe(entry, tmpentry, &entries->list, list) + { + /* If .snaps pre-exists, then it should not be listed + * in the NORMAL INODE directory when USS is enabled, + * so filter the .snaps entry if exists. + * However it is OK to list .snaps in VIRTUAL world */ - svc_fd = svc_fd_ctx_get_or_new (this, fd); - if (!svc_fd) - gf_log (this->name, GF_LOG_ERROR, "failed to get the fd " - "context for the inode %s", - uuid_utoa (fd->inode->gfid)); - else { - if (svc_fd->entry_point_handled && off == svc_fd->last_offset) { - op_ret = 0; - op_errno = ENOENT; - goto out; - } + if (inode_type == NORMAL_INODE && !strcmp(entry_point, entry->d_name)) { + gf_dirent_entry_free(entry); + continue; } - SVC_GET_SUBVOL_FROM_CTX (this, op_ret, op_errno, inode_type, ret, - fd->inode, subvolume, out); + if (!entry->inode) + continue; - local->subvolume = subvolume; - local->fd = fd_ref (fd); - frame->local = local; + ret = svc_inode_ctx_set(this, entry->inode, inode_type); + if (ret) + gf_smsg(this->name, GF_LOG_ERROR, 0, + SVC_MSG_SET_INODE_CONTEXT_FAILED, NULL); + if (svc_fd) + svc_fd->last_offset = entry->d_off; + } - STACK_WIND (frame, svc_readdirp_cbk, subvolume, - subvolume->fops->readdirp, fd, size, off, xdata); + unwind = gf_svc_readdir_on_special_dir(frame, cookie, this, op_ret, + op_errno, entries, xdata); - wind = _gf_true; +out: + if (unwind) + SVC_STACK_UNWIND(readdirp, frame, op_ret, op_errno, entries, xdata); + + return 0; +} + +static int32_t +gf_svc_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *xdata) +{ + int inode_type = -1; + xlator_t *subvolume = NULL; + svc_local_t *local = NULL; + int ret = -1; + int op_ret = -1; + int op_errno = EINVAL; + gf_boolean_t wind = _gf_false; + svc_fd_t *svc_fd = NULL; + gf_dirent_t entries; + + INIT_LIST_HEAD(&entries.list); + + GF_VALIDATE_OR_GOTO("svc", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, fd, out); + GF_VALIDATE_OR_GOTO(this->name, fd->inode, out); + + local = mem_get0(this->local_pool); + if (!local) { + op_errno = ENOMEM; + gf_smsg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_NO_MEMORY, NULL); + goto out; + } + + /* + * This is mainly for samba shares (or windows clients). As part of + * readdirp on the directory used as samba share, the entry point + * directory would have been added at the end. So when a new readdirp + * request comes, we have to check if the entry point has been handled + * or not in readdirp. That information and the offset used for it + * is remembered in fd context. If it has been handled, then simply + * unwind indication end of readdir operation. + */ + svc_fd = svc_fd_ctx_get_or_new(this, fd); + if (!svc_fd) + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED, + "gfid=%s", uuid_utoa(fd->inode->gfid), NULL); + else { + if (svc_fd->entry_point_handled && off == svc_fd->last_offset) { + op_ret = 0; + op_errno = ENOENT; + goto out; + } + } + + SVC_GET_SUBVOL_FROM_CTX(this, op_ret, op_errno, inode_type, ret, fd->inode, + subvolume, out); + + local->subvolume = subvolume; + local->fd = fd_ref(fd); + frame->local = local; + + STACK_WIND(frame, gf_svc_readdirp_cbk, subvolume, subvolume->fops->readdirp, + fd, size, off, xdata); + + wind = _gf_true; out: - if (!wind) - SVC_STACK_UNWIND (readdirp, frame, op_ret, op_errno, &entries, - NULL); + if (!wind) + SVC_STACK_UNWIND(readdirp, frame, op_ret, op_errno, &entries, NULL); - gf_dirent_free (&entries); + gf_dirent_free(&entries); - return 0; + return 0; } /* Renaming the entries from or to snapshots is not allowed as the snapshots are read-only. */ static int32_t -svc_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, - loc_t *newloc, dict_t *xdata) +gf_svc_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) { - int src_inode_type = -1; - int dst_inode_type = -1; - int dst_parent_type = -1; - int32_t op_ret = -1; - int32_t op_errno = 0; - int32_t ret = -1; - gf_boolean_t wind = _gf_false; - - GF_VALIDATE_OR_GOTO ("svc", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - GF_VALIDATE_OR_GOTO (this->name, oldloc, out); - GF_VALIDATE_OR_GOTO (this->name, oldloc->inode, out); - GF_VALIDATE_OR_GOTO (this->name, newloc, out); - - ret = svc_inode_ctx_get (this, oldloc->inode, &src_inode_type); - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, "failed to get the inode " - "context for the inode %s", - uuid_utoa (oldloc->inode->gfid)); - op_ret = -1; - op_errno = EINVAL; - goto out; - } - - if (src_inode_type == VIRTUAL_INODE) { - gf_log (this->name, GF_LOG_ERROR, "rename happening on a entry" - " %s residing in snapshot", oldloc->name); - op_ret = -1; - op_errno = EROFS; - goto out; - } - - if (newloc->inode) { - ret = svc_inode_ctx_get (this, newloc->inode, &dst_inode_type); - if (!ret && dst_inode_type == VIRTUAL_INODE) { - gf_log (this->name, GF_LOG_ERROR, "rename of %s " - "happening to a entry %s residing in snapshot", - oldloc->name, newloc->name); - op_ret = -1; - op_errno = EROFS; - goto out; - } - } - - if (dst_inode_type < 0) { - ret = svc_inode_ctx_get (this, newloc->parent, - &dst_parent_type); - if (!ret && dst_parent_type == VIRTUAL_INODE) { - gf_log (this->name, GF_LOG_ERROR, "rename of %s " - "happening to a entry %s residing in snapshot", - oldloc->name, newloc->name); - op_ret = -1; - op_errno = EROFS; - goto out; - } + int src_inode_type = -1; + int dst_inode_type = -1; + int dst_parent_type = -1; + int32_t op_ret = -1; + int32_t op_errno = 0; + int32_t ret = -1; + gf_boolean_t wind = _gf_false; + + GF_VALIDATE_OR_GOTO("svc", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, oldloc, out); + GF_VALIDATE_OR_GOTO(this->name, oldloc->inode, out); + GF_VALIDATE_OR_GOTO(this->name, newloc, out); + + ret = svc_inode_ctx_get(this, oldloc->inode, &src_inode_type); + if (ret < 0) { + op_ret = -1; + op_errno = EINVAL; + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + SVC_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s", + uuid_utoa(oldloc->inode->gfid), NULL); + goto out; + } + + if (src_inode_type == VIRTUAL_INODE) { + op_ret = -1; + op_errno = EROFS; + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + SVC_MSG_RENAME_SNAPSHOT_ENTRY, "name=%s", oldloc->name, NULL); + goto out; + } + + if (newloc->inode) { + ret = svc_inode_ctx_get(this, newloc->inode, &dst_inode_type); + if (!ret && dst_inode_type == VIRTUAL_INODE) { + op_ret = -1; + op_errno = EROFS; + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + SVC_MSG_RENAME_SNAPSHOT_ENTRY, "oldloc-name=%s", + oldloc->name, "newloc-name=%s", newloc->name, NULL); + goto out; + } + } + + if (dst_inode_type < 0) { + ret = svc_inode_ctx_get(this, newloc->parent, &dst_parent_type); + if (!ret && dst_parent_type == VIRTUAL_INODE) { + op_ret = -1; + op_errno = EROFS; + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + SVC_MSG_RENAME_SNAPSHOT_ENTRY, "oldloc-name=%s", + oldloc->name, "newloc-name=%s", newloc->name, NULL); + goto out; } + } - STACK_WIND_TAIL (frame, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->rename, oldloc, newloc, - xdata); + STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->rename, + oldloc, newloc, xdata); - wind = _gf_true; + wind = _gf_true; out: - if (!wind) - SVC_STACK_UNWIND (rename, frame, op_ret, op_errno, NULL, - NULL, NULL, NULL, NULL, NULL); - return 0; + if (!wind) + SVC_STACK_UNWIND(rename, frame, op_ret, op_errno, NULL, NULL, NULL, + NULL, NULL, NULL); + return 0; } /* Creating hardlinks for the files from the snapshot is not allowed as it will be equivalent of creating hardlinks across different filesystems. - And so is vise versa. + And so is vice versa. */ static int32_t -svc_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, - dict_t *xdata) +gf_svc_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) { - int src_inode_type = -1; - int dst_parent_type = -1; - int32_t op_ret = -1; - int32_t op_errno = 0; - int32_t ret = -1; - gf_boolean_t wind = _gf_false; - - GF_VALIDATE_OR_GOTO ("svc", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - GF_VALIDATE_OR_GOTO (this->name, oldloc, out); - GF_VALIDATE_OR_GOTO (this->name, oldloc->inode, out); - GF_VALIDATE_OR_GOTO (this->name, newloc, out); - - ret = svc_inode_ctx_get (this, oldloc->inode, &src_inode_type); - if (!ret && src_inode_type == VIRTUAL_INODE) { - gf_log (this->name, GF_LOG_ERROR, "rename happening on a entry" - " %s residing in snapshot", oldloc->name); - op_ret = -1; - op_errno = EROFS; - goto out; - } - - ret = svc_inode_ctx_get (this, newloc->parent, &dst_parent_type); - if (!ret && dst_parent_type == VIRTUAL_INODE) { - gf_log (this->name, GF_LOG_ERROR, "rename of %s " - "happening to a entry %s residing in snapshot", - oldloc->name, newloc->name); - op_ret = -1; - op_errno = EROFS; - goto out; - } - - STACK_WIND_TAIL (frame, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->link, oldloc, newloc, xdata); - - wind = _gf_true; + int src_inode_type = -1; + int dst_parent_type = -1; + int32_t op_ret = -1; + int32_t op_errno = 0; + int32_t ret = -1; + gf_boolean_t wind = _gf_false; + + GF_VALIDATE_OR_GOTO("svc", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, oldloc, out); + GF_VALIDATE_OR_GOTO(this->name, oldloc->inode, out); + GF_VALIDATE_OR_GOTO(this->name, newloc, out); + + ret = svc_inode_ctx_get(this, oldloc->inode, &src_inode_type); + if (!ret && src_inode_type == VIRTUAL_INODE) { + op_ret = -1; + op_errno = EROFS; + gf_smsg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_LINK_SNAPSHOT_ENTRY, + "oldloc-name=%s", oldloc->name, NULL); + goto out; + } + + ret = svc_inode_ctx_get(this, newloc->parent, &dst_parent_type); + if (!ret && dst_parent_type == VIRTUAL_INODE) { + op_ret = -1; + op_errno = EROFS; + gf_smsg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_LINK_SNAPSHOT_ENTRY, + "oldloc-name=%s", oldloc->name, "newloc-name=%s", newloc->name, + NULL); + goto out; + } + + STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->link, + oldloc, newloc, xdata); + + wind = _gf_true; out: - if (!wind) - SVC_STACK_UNWIND (link, frame, op_ret, op_errno, - NULL, NULL, NULL, NULL, NULL); - return 0; + if (!wind) + SVC_STACK_UNWIND(link, frame, op_ret, op_errno, NULL, NULL, NULL, NULL, + NULL); + return 0; } static int32_t -svc_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *name, dict_t *xdata) +gf_svc_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) { - int ret = -1; - int inode_type = -1; - int op_ret = -1; - int op_errno = EINVAL; - gf_boolean_t wind = _gf_false; + int ret = -1; + int inode_type = -1; + int op_ret = -1; + int op_errno = EINVAL; + gf_boolean_t wind = _gf_false; + + GF_VALIDATE_OR_GOTO("svc", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, loc, out); + GF_VALIDATE_OR_GOTO(this->name, loc->inode, out); + + ret = svc_inode_ctx_get(this, loc->inode, &inode_type); + if (ret < 0) { + op_ret = -1; + op_errno = EINVAL; + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + SVC_MSG_GET_INODE_CONTEXT_FAILED, "path=%s", loc->path, + "gfid=%s", uuid_utoa(loc->inode->gfid), NULL); + goto out; + } + + if (inode_type == NORMAL_INODE) { + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, loc, name, xdata); + } else { + op_ret = -1; + op_errno = EROFS; + goto out; + } + + wind = _gf_true; - GF_VALIDATE_OR_GOTO ("svc", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, loc->inode, out); - - ret = svc_inode_ctx_get (this, loc->inode, &inode_type); - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, "failed to get te inode " - "context for %s (gfid: %s)", loc->path, - uuid_utoa (loc->inode->gfid)); - op_ret = -1; - op_errno = EINVAL; - goto out; - } +out: + if (!wind) + SVC_STACK_UNWIND(removexattr, frame, op_ret, op_errno, NULL); - if (inode_type == NORMAL_INODE) { - STACK_WIND_TAIL (frame, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->removexattr, loc, - name, xdata); - } else { - op_ret = -1; - op_errno = EROFS; - goto out; - } + return 0; +} - wind = _gf_true; +static int +gf_svc_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync, + dict_t *xdata) +{ + int inode_type = -1; + int ret = -1; + int op_ret = -1; + int op_errno = EINVAL; + gf_boolean_t wind = _gf_false; + + GF_VALIDATE_OR_GOTO("svc", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, fd, out); + GF_VALIDATE_OR_GOTO(this->name, fd->inode, out); + + ret = svc_inode_ctx_get(this, fd->inode, &inode_type); + if (ret < 0) { + op_ret = -1; + op_errno = EINVAL; + gf_smsg(this->name, GF_LOG_ERROR, op_errno, + SVC_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s", + uuid_utoa(fd->inode->gfid), NULL); + goto out; + } + + if (inode_type == NORMAL_INODE) { + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata); + } else { + op_ret = -1; + op_errno = EROFS; + goto out; + } + + wind = _gf_true; out: - if (!wind) - SVC_STACK_UNWIND (removexattr, frame, op_ret, op_errno, - NULL); + if (!wind) + SVC_STACK_UNWIND(fsync, frame, op_ret, op_errno, NULL, NULL, NULL); - return 0; + return 0; } -static int -svc_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync, - dict_t *xdata) +static int32_t +gf_svc_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { - int inode_type = -1; - int ret = -1; - int op_ret = -1; - int op_errno = EINVAL; - gf_boolean_t wind = _gf_false; + int32_t op_ret = -1; + int32_t op_errno = 0; + int ret = -1; + int inode_type = -1; + xlator_t *subvolume = NULL; + gf_boolean_t wind = _gf_false; - GF_VALIDATE_OR_GOTO ("svc", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - GF_VALIDATE_OR_GOTO (this->name, fd->inode, out); + GF_VALIDATE_OR_GOTO("svc", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, fd, out); + GF_VALIDATE_OR_GOTO(this->name, fd->inode, out); - ret = svc_inode_ctx_get (this, fd->inode, &inode_type); - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, "failed to get inode context " - "for %s", uuid_utoa (fd->inode->gfid)); - op_ret = -1; - op_errno = EINVAL; - goto out; - } + SVC_GET_SUBVOL_FROM_CTX(this, op_ret, op_errno, inode_type, ret, fd->inode, + subvolume, out); - if (inode_type == NORMAL_INODE) { - STACK_WIND_TAIL (frame, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fsync, fd, datasync, - xdata); - } else { - op_ret = -1; - op_errno = EROFS; - goto out; - } + STACK_WIND_TAIL(frame, subvolume, subvolume->fops->flush, fd, xdata); - wind = _gf_true; + wind = _gf_true; out: - if (!wind) - SVC_STACK_UNWIND (fsync, frame, op_ret, op_errno, NULL, NULL, - NULL); + if (!wind) + SVC_STACK_UNWIND(flush, frame, op_ret, op_errno, NULL); - return 0; + return 0; } static int32_t -svc_flush (call_frame_t *frame, xlator_t *this, - fd_t *fd, dict_t *xdata) +gf_svc_releasedir(xlator_t *this, fd_t *fd) { - int32_t op_ret = -1; - int32_t op_errno = 0; - int ret = -1; - int inode_type = -1; - xlator_t *subvolume = NULL; - gf_boolean_t wind = _gf_false; - - GF_VALIDATE_OR_GOTO ("svc", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - GF_VALIDATE_OR_GOTO (this->name, fd->inode, out); + svc_fd_t *sfd = NULL; + uint64_t tmp_pfd = 0; + int ret = 0; - SVC_GET_SUBVOL_FROM_CTX (this, op_ret, op_errno, inode_type, ret, - fd->inode, subvolume, out); + GF_VALIDATE_OR_GOTO("snapview-client", this, out); + GF_VALIDATE_OR_GOTO(this->name, fd, out); - STACK_WIND_TAIL (frame, subvolume, subvolume->fops->flush, fd, xdata); + ret = fd_ctx_del(fd, this, &tmp_pfd); + if (ret < 0) { + gf_msg_debug(this->name, 0, "pfd from fd=%p is NULL", fd); + goto out; + } - wind = _gf_true; + GF_FREE(sfd); out: - if (!wind) - SVC_STACK_UNWIND (flush, frame, op_ret, op_errno, NULL); - - return 0; + return 0; } static int32_t -svc_releasedir (xlator_t *this, fd_t *fd) +gf_svc_forget(xlator_t *this, inode_t *inode) { - svc_fd_t *sfd = NULL; - uint64_t tmp_pfd = 0; - int ret = 0; + int ret = -1; + uint64_t value = 0; - GF_VALIDATE_OR_GOTO ("snapview-client", this, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - - ret = fd_ctx_del (fd, this, &tmp_pfd); - if (ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, - "pfd from fd=%p is NULL", fd); - goto out; - } + GF_VALIDATE_OR_GOTO("svc", this, out); + GF_VALIDATE_OR_GOTO(this->name, inode, out); - GF_FREE (sfd); + ret = inode_ctx_del(inode, this, &value); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, + SVC_MSG_DELETE_INODE_CONTEXT_FAILED, "gfid=%s", + uuid_utoa(inode->gfid), NULL); + goto out; + } out: - return 0; + return 0; } -static int32_t -svc_forget (xlator_t *this, inode_t *inode) +static int +gf_svc_priv_destroy(xlator_t *this, svc_private_t *priv) { - int ret = -1; - uint64_t value = 0; + int ret = -1; - GF_VALIDATE_OR_GOTO ("svc", this, out); - GF_VALIDATE_OR_GOTO (this->name, inode, out); + if (!priv) { + gf_smsg(this->name, GF_LOG_WARNING, 0, SVC_MSG_NULL_PRIV, NULL); + goto out; + } - ret = inode_ctx_del (inode, this, &value); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "failed to delete inode " - "context for %s", uuid_utoa (inode->gfid)); - goto out; - } + GF_FREE(priv->path); + GF_FREE(priv->special_dir); + + LOCK_DESTROY(&priv->lock); + + GF_FREE(priv); + + if (this->local_pool) { + mem_pool_destroy(this->local_pool); + this->local_pool = NULL; + } + + ret = 0; out: - return 0; + return ret; } +/** + * ** NOTE **: + * ============= + * The option "snapdir-entry-path" is NOT reconfigurable. + * That option as of now is only for the consumption of + * samba, where, it needs to tell glusterfs about the + * directory that is shared with windows client for the + * access. Now, in windows-explorer (GUI) interface, for + * the directory shared, the entry point to the snapshot + * world (snapshot-directory option) should be visible, + * atleast as a hidden entry. For that to happen, glusterfs + * has to send that entry in the readdir response coming on + * the directory used as the smb share. Therefore, samba, + * while initializing the gluster volume (via gfapi) sets + * the xlator option "snapdir-entry-path" to the directory + * which is to be shared with windows (check the file + * vfs_glusterfs.c from samba source code). So to avoid + * problems with smb access, not allowing snapdir-entry-path + * option to be configurable. That option is for those + * consumers who know what they are doing. + **/ int -reconfigure (xlator_t *this, dict_t *options) +reconfigure(xlator_t *this, dict_t *options) { - svc_private_t *priv = NULL; - - priv = this->private; + svc_private_t *priv = NULL; + char *path = NULL; + gf_boolean_t show_entry_point = _gf_false; + char *tmp = NULL; + + priv = this->private; + + GF_OPTION_RECONF("snapshot-directory", path, options, str, out); + if (!path || (strlen(path) > NAME_MAX) || path[0] != '.') { + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_INVALID_ENTRY_POINT, + "path=%s", path, NULL); + goto out; + } + + GF_OPTION_RECONF("show-snapshot-directory", show_entry_point, options, bool, + out); + + /* + * The assumption now is that priv->path is an allocated memory (either + * in init or in a previous reconfigure). + * So, the intention here is to preserve the older contents of the option + * until the new option's value has been completely stored in the priv. + * So, do this. + * - Store the pointer of priv->path in a temporary pointer. + * - Allocate new memory for the new value of the option that is just + * obtained from the above call to GF_OPTION_RECONF. + * - If the above allocation fails, again set the pointer from priv + * to the address stored in tmp. i.e. the previous value. + * - If the allocation succeeds, then free the tmp pointer. + * WARNING: Before changing the allocation and freeing logic of + * priv->path, always check the init function to see how + * priv->path is set. Take decisions accordingly. As of now, + * the assumption is that, the string elements of private + * structure of snapview-client are allocated (either in + * init or here in reconfugure). + */ + LOCK(&priv->lock); + { + tmp = priv->path; + priv->path = NULL; + priv->path = gf_strdup(path); + if (!priv->path) { + gf_log(this->name, GF_LOG_ERROR, + "failed to reconfigure snapshot-directory option to %s", + path); + priv->path = tmp; + } else { + GF_FREE(tmp); + tmp = NULL; + } - GF_OPTION_RECONF ("snapshot-directory", priv->path, options, str, out); - GF_OPTION_RECONF ("show-snapshot-directory", priv->show_entry_point, - options, bool, out); + priv->show_entry_point = show_entry_point; + } + UNLOCK(&priv->lock); out: - return 0; + return 0; } int32_t -mem_acct_init (xlator_t *this) +mem_acct_init(xlator_t *this) { - int32_t ret = -1; + int32_t ret = -1; - if (!this) - return ret; + if (!this) + return ret; - ret = xlator_mem_acct_init (this, gf_svc_mt_end + 1); + ret = xlator_mem_acct_init(this, gf_svc_mt_end + 1); - if (ret != 0) { - gf_log (this->name, GF_LOG_WARNING, "Memory accounting" - " init failed"); - return ret; - } + if (ret != 0) { + gf_smsg(this->name, GF_LOG_WARNING, 0, SVC_MSG_MEM_ACNT_FAILED, NULL); + } - return ret; + return ret; } int32_t -init (xlator_t *this) +init(xlator_t *this) { - svc_private_t *private = NULL; - int ret = -1; - int children = 0; - xlator_list_t *xl = NULL; - - if (!this->children) { - gf_log (this->name, GF_LOG_ERROR, - "configured without any child"); - goto out; - } - - xl = this->children; - while (xl) { - children++; - xl = xl->next; - } + svc_private_t *private = NULL; + int ret = -1; + int children = 0; + xlator_list_t *xl = NULL; + char *path = NULL; + char *special_dir = NULL; + + if (!this->children) { + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_NO_CHILD_FOR_XLATOR, NULL); + goto out; + } + + xl = this->children; + while (xl) { + children++; + xl = xl->next; + } + + if (children != 2) { + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_XLATOR_CHILDREN_WRONG, + "subvol-num=%d", children, NULL); + goto out; + } + + /* This can be the top of graph in certain cases */ + if (!this->parents) { + gf_msg_debug(this->name, 0, + "dangling volume. Check " + "volfile"); + } + + private + = GF_CALLOC(1, sizeof(*private), gf_svc_mt_svc_private_t); + if (!private) + goto out; + + LOCK_INIT(&private->lock); + + GF_OPTION_INIT("snapshot-directory", path, str, out); + if (!path || (strlen(path) > NAME_MAX) || path[0] != '.') { + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_INVALID_ENTRY_POINT, + "path=%s", path, NULL); + goto out; + } + + private + ->path = gf_strdup(path); + if (!private->path) { + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_NO_MEMORY, + "entry-point-path=%s", path, NULL); + goto out; + } + + GF_OPTION_INIT("snapdir-entry-path", special_dir, str, out); + if (!special_dir || strstr(special_dir, path)) { + if (special_dir) + gf_smsg(this->name, GF_LOG_ERROR, 0, + SVC_MSG_ENTRY_POINT_SPECIAL_DIR, "path=%s", path, + "special-dir=%s", special_dir); + else + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_NULL_SPECIAL_DIR, + NULL); + goto out; + } - if (children != 2) { - gf_log (this->name, GF_LOG_ERROR, "snap-view-client has got " - "%d subvolumes. It can have only 2 subvolumes.", - children); - goto out; - } + private + ->special_dir = gf_strdup(special_dir); + if (!private->special_dir) { + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_NO_MEMORY, + "special-directory=%s", special_dir, NULL); + goto out; + } - /* This can be the top of graph in certain cases */ - if (!this->parents) { - gf_log (this->name, GF_LOG_DEBUG, - "dangling volume. check volfile "); - } + GF_OPTION_INIT("show-snapshot-directory", private->show_entry_point, bool, + out); - private = GF_CALLOC (1, sizeof (*private), gf_svc_mt_svc_private_t); - if (!private) - goto out; + this->local_pool = mem_pool_new(svc_local_t, 128); + if (!this->local_pool) { + gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_MEM_POOL_GET_FAILED, NULL); + goto out; + } - GF_OPTION_INIT ("snapshot-directory", private->path, str, out); - GF_OPTION_INIT ("snapdir-entry-path", private->special_dir, str, - out); - GF_OPTION_INIT ("show-snapshot-directory", private->show_entry_point, - bool, out); - - if (strstr (private->special_dir, private->path)) { - gf_log (this->name, GF_LOG_ERROR, "entry point directory " - "cannot be part of the special directory"); - GF_FREE (private->special_dir); - private->special_dir = NULL; - goto out; - } - - this->private = private; - this->local_pool = mem_pool_new (svc_local_t, 128); - if (!this->local_pool) { - gf_log (this->name, GF_LOG_ERROR, "could not get mem pool for " - "frame->local"); - goto out; - } + this->private = private; - ret = 0; + ret = 0; out: - if (ret) - GF_FREE (private); + if (ret) + (void)gf_svc_priv_destroy(this, private); - return ret; + return ret; } void -fini (xlator_t *this) +fini(xlator_t *this) { - svc_private_t *priv = NULL; + svc_private_t *priv = NULL; - if (!this) - return; + if (!this) + return; - priv = this->private; - if (!priv) - return; + priv = this->private; + if (!priv) + return; - this->private = NULL; + /* + * Just log the failure and go ahead to + * set this->priv to NULL. + */ + if (gf_svc_priv_destroy(this, priv)) + gf_smsg(this->name, GF_LOG_WARNING, 0, SVC_MSG_PRIV_DESTROY_FAILED, + NULL); - GF_FREE (priv); + this->private = NULL; - return; + return; } int -notify (xlator_t *this, int event, void *data, ...) +notify(xlator_t *this, int event, void *data, ...) { - xlator_t *subvol = NULL; - int ret = 0; - - subvol = data; - - /* As there are two subvolumes in snapview-client, there is - * a possibility that the regular subvolume is still down and - * snapd subvolume come up first. So if we don't handle this situation - * CHILD_UP event will be propagated upwards to fuse when - * regular subvolume is still down. - * This can cause data unavailable for the application. - * So for now send notifications up only for regular subvolume. - * - * TODO: In future if required we may need to handle - * notifications from virtual subvolume - */ - if (subvol != SECOND_CHILD (this)) - ret = default_notify (this, event, data); - - return ret; + xlator_t *subvol = NULL; + int ret = 0; + + subvol = data; + + /* As there are two subvolumes in snapview-client, there is + * a possibility that the regular subvolume is still down and + * snapd subvolume come up first. So if we don't handle this situation + * CHILD_UP event will be propagated upwards to fuse when + * regular subvolume is still down. + * This can cause data unavailable for the application. + * So for now send notifications up only for regular subvolume. + * + * TODO: In future if required we may need to handle + * notifications from virtual subvolume + */ + if (subvol != SECOND_CHILD(this)) + ret = default_notify(this, event, data); + + return ret; } struct xlator_fops fops = { - .lookup = svc_lookup, - .opendir = svc_opendir, - .stat = svc_stat, - .fstat = svc_fstat, - .statfs = svc_statfs, - .rmdir = svc_rmdir, - .rename = svc_rename, - .mkdir = svc_mkdir, - .open = svc_open, - .unlink = svc_unlink, - .setattr = svc_setattr, - .getxattr = svc_getxattr, - .setxattr = svc_setxattr, - .fsetxattr = svc_fsetxattr, - .readv = svc_readv, - .readdir = svc_readdir, - .readdirp = svc_readdirp, - .create = svc_create, - .readlink = svc_readlink, - .mknod = svc_mknod, - .symlink = svc_symlink, - .flush = svc_flush, - .link = svc_link, - .access = svc_access, - .removexattr = svc_removexattr, - .fsync = svc_fsync, + .lookup = gf_svc_lookup, + .opendir = gf_svc_opendir, + .stat = gf_svc_stat, + .fstat = gf_svc_fstat, + .statfs = gf_svc_statfs, + .rmdir = gf_svc_rmdir, + .rename = gf_svc_rename, + .mkdir = gf_svc_mkdir, + .open = gf_svc_open, + .unlink = gf_svc_unlink, + .setattr = gf_svc_setattr, + .getxattr = gf_svc_getxattr, + .setxattr = gf_svc_setxattr, + .fsetxattr = gf_svc_fsetxattr, + .readv = gf_svc_readv, + .readdir = gf_svc_readdir, + .readdirp = gf_svc_readdirp, + .create = gf_svc_create, + .readlink = gf_svc_readlink, + .mknod = gf_svc_mknod, + .symlink = gf_svc_symlink, + .flush = gf_svc_flush, + .link = gf_svc_link, + .access = gf_svc_access, + .removexattr = gf_svc_removexattr, + .fsync = gf_svc_fsync, }; struct xlator_cbks cbks = { - .forget = svc_forget, - .releasedir = svc_releasedir, + .forget = gf_svc_forget, + .releasedir = gf_svc_releasedir, }; struct volume_options options[] = { - { .key = {"snapshot-directory"}, - .type = GF_OPTION_TYPE_STR, - .default_value = ".snaps", - }, - { .key = {"snapdir-entry-path"}, - .type = GF_OPTION_TYPE_STR, - .description = "An option to set the path of a directory on which " - "when readdir comes, dentry for the snapshot-directory" - " should be created and added in the readdir response", - .default_value = "", - }, - { .key = {"show-snapshot-directory"}, - .type = GF_OPTION_TYPE_BOOL, - .description = "If this option is set, and the option " - "\"snapdir-entry-path\" is set (which is set by samba " - "vfs plugin for glusterfs, then send the entry point " - "when readdir comes on the snapdir-entry-path", - .default_value = "off", - }, - { .key = {NULL} }, + { + .key = {"snapshot-directory"}, + .type = GF_OPTION_TYPE_STR, + .default_value = ".snaps", + }, + { + .key = {"snapdir-entry-path"}, + .type = GF_OPTION_TYPE_STR, + .description = "An option to set the path of a directory on which " + "when readdir comes, dentry for the snapshot-directory" + " should be created and added in the readdir response", + .default_value = "", + }, + { + .key = {"show-snapshot-directory"}, + .type = GF_OPTION_TYPE_BOOL, + .description = "If this option is set, and the option " + "\"snapdir-entry-path\" is set (which is set by samba " + "vfs plugin for glusterfs, then send the entry point " + "when readdir comes on the snapdir-entry-path", + .default_value = "off", + }, + {.key = {NULL}}, +}; + +xlator_api_t xlator_api = { + .init = init, + .fini = fini, + .notify = notify, + .reconfigure = reconfigure, + .mem_acct_init = mem_acct_init, + .op_version = {1}, + .fops = &fops, + .cbks = &cbks, + .options = options, + .identifier = "snapview-client", + .category = GF_MAINTAINED, }; diff --git a/xlators/features/snapview-client/src/snapview-client.h b/xlators/features/snapview-client/src/snapview-client.h index fb0a15d8615..166116a439d 100644 --- a/xlators/features/snapview-client/src/snapview-client.h +++ b/xlators/features/snapview-client/src/snapview-client.h @@ -1,121 +1,101 @@ - /* - Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. +/* + Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ #ifndef __SNAP_VIEW_CLIENT_H__ #define __SNAP_VIEW_CLIENT_H__ -#include "glusterfs.h" -#include "logging.h" -#include "dict.h" -#include "xlator.h" -#include "defaults.h" +#include <glusterfs/glusterfs.h> +#include <glusterfs/logging.h> +#include <glusterfs/dict.h> +#include <glusterfs/xlator.h> +#include <glusterfs/defaults.h> #include "snapview-client-mem-types.h" +#include "snapview-client-messages.h" struct __svc_local { - loc_t loc; - xlator_t *subvolume; - fd_t *fd; - void *cookie; - dict_t *xdata; + loc_t loc; + xlator_t *subvolume; + fd_t *fd; + void *cookie; + dict_t *xdata; + uint16_t revalidate; }; typedef struct __svc_local svc_local_t; -void -svc_local_free (svc_local_t *local); - -#define SVC_STACK_UNWIND(fop, frame, params ...) do { \ - svc_local_t *__local = NULL; \ - if (frame) { \ - __local = frame->local; \ - frame->local = NULL; \ - } \ - STACK_UNWIND_STRICT (fop, frame, params); \ - svc_local_free (__local); \ - } while (0) - -#define SVC_ENTRY_POINT_SET(this, xdata, op_ret, op_errno, new_xdata, \ - priv, ret, label) \ - do { \ - if (!xdata) { \ - xdata = new_xdata = dict_new (); \ - if (!new_xdata) { \ - gf_log (this->name, GF_LOG_ERROR, \ - "failed to allocate new dict"); \ - op_ret = -1; \ - op_errno = ENOMEM; \ - goto label; \ - } \ - } \ - ret = dict_set_str (xdata, "entry-point", "true"); \ - if (ret) { \ - gf_log (this->name, GF_LOG_ERROR, \ - "failed to set dict"); \ - op_ret = -1; \ - op_errno = ENOMEM; \ - goto label; \ - } \ - } while (0); - -#define SVC_GET_SUBVOL_FROM_CTX(this, op_ret, op_errno, inode_type, ret, \ - inode, subvolume, label) \ - do { \ - ret = svc_inode_ctx_get (this, inode, &inode_type); \ - if (ret < 0) { \ - gf_log (this->name, GF_LOG_ERROR, \ - "inode context not found for gfid %s", \ - uuid_utoa (inode->gfid)); \ - op_ret = -1; \ - op_errno = EINVAL; \ - goto label; \ - } \ - \ - subvolume = svc_get_subvolume (this, inode_type); \ - } while (0); +#define SVC_STACK_UNWIND(fop, frame, params...) \ + do { \ + svc_local_t *__local = NULL; \ + if (frame) { \ + __local = frame->local; \ + frame->local = NULL; \ + } \ + STACK_UNWIND_STRICT(fop, frame, params); \ + svc_local_free(__local); \ + } while (0) + +#define SVC_ENTRY_POINT_SET(this, xdata, op_ret, op_errno, new_xdata, ret, \ + label) \ + do { \ + if (!xdata) { \ + xdata = new_xdata = dict_new(); \ + if (!new_xdata) { \ + gf_log(this->name, GF_LOG_ERROR, \ + "failed to allocate new dict"); \ + op_ret = -1; \ + op_errno = ENOMEM; \ + goto label; \ + } \ + } \ + ret = dict_set_str(xdata, "entry-point", "true"); \ + if (ret) { \ + gf_log(this->name, GF_LOG_ERROR, "failed to set dict"); \ + op_ret = -1; \ + op_errno = ENOMEM; \ + goto label; \ + } \ + } while (0); + +#define SVC_GET_SUBVOL_FROM_CTX(this, op_ret, op_errno, inode_type, ret, \ + inode, subvolume, label) \ + do { \ + ret = svc_inode_ctx_get(this, inode, &inode_type); \ + if (ret < 0) { \ + gf_log(this->name, GF_LOG_ERROR, \ + "inode context not found for gfid %s", \ + uuid_utoa(inode->gfid)); \ + op_ret = -1; \ + op_errno = EINVAL; \ + goto label; \ + } \ + \ + subvolume = svc_get_subvolume(this, inode_type); \ + } while (0); struct svc_private { - char *path; - char *special_dir; /* needed for samba */ - gf_boolean_t show_entry_point; + char *path; + char *special_dir; /* needed for samba */ + gf_boolean_t show_entry_point; + gf_lock_t lock; /* mainly to guard private->path */ }; typedef struct svc_private svc_private_t; struct svc_fd { - off_t last_offset; - gf_boolean_t entry_point_handled; - gf_boolean_t special_dir; + off_t last_offset; + gf_boolean_t entry_point_handled; + gf_boolean_t special_dir; }; typedef struct svc_fd svc_fd_t; -typedef enum { - NORMAL_INODE = 1, - VIRTUAL_INODE -} inode_type_t; - -void svc_local_free (svc_local_t *local); - -xlator_t * -svc_get_subvolume (xlator_t *this, int inode_type); - -int -__svc_inode_ctx_get (xlator_t *this, inode_t *inode, int *inode_type); +typedef enum { NORMAL_INODE = 1, VIRTUAL_INODE } inode_type_t; int -svc_inode_ctx_get (xlator_t *this, inode_t *inode, int *inode_type); - -int32_t -svc_inode_ctx_set (xlator_t *this, inode_t *inode, int inode_type); - -void -svc_local_free (svc_local_t *local); +gf_svc_special_dir_revalidate_lookup(call_frame_t *frame, xlator_t *this, + dict_t *xdata); -gf_boolean_t -svc_readdir_on_special_dir (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - gf_dirent_t *entries, dict_t *xdata); #endif /* __SNAP_VIEW_CLIENT_H__ */ diff --git a/xlators/features/snapview-server/src/Makefile.am b/xlators/features/snapview-server/src/Makefile.am index 004741cedad..2935f138a4c 100644 --- a/xlators/features/snapview-server/src/Makefile.am +++ b/xlators/features/snapview-server/src/Makefile.am @@ -1,21 +1,24 @@ +if WITH_SERVER xlator_LTLIBRARIES = snapview-server.la +endif xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features -snapview_server_la_LDFLAGS = $(GF_XLATOR_DEFAULT_LDFLAGS) +snapview_server_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) -snapview_server_la_SOURCES = snapview-server.c snapview-server-mgmt.c snapview-server-helpers.c -snapview_server_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la\ - $(top_builddir)/api/src/libgfapi.la\ - $(RLLIBS) $(top_builddir)/rpc/xdr/src/libgfxdr.la \ - $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la +snapview_server_la_SOURCES = snapview-server.c snapview-server-mgmt.c \ + snapview-server-helpers.c -noinst_HEADERS = snapview-server.h snapview-server-mem-types.h +snapview_server_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ + $(top_builddir)/api/src/libgfapi.la \ + $(RLLIBS) $(top_builddir)/rpc/xdr/src/libgfxdr.la \ + $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la + +noinst_HEADERS = snapview-server.h snapview-server-mem-types.h snapview-server-messages.h AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ - -I$(top_srcdir)/api/src \ - -I$(top_srcdir)/rpc/rpc-lib/src \ - -I$(top_srcdir)/rpc/xdr/src \ - -DDATADIR=\"$(localstatedir)\" + -I$(top_srcdir)/api/src -I$(top_srcdir)/rpc/rpc-lib/src \ + -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \ + -DDATADIR=\"$(localstatedir)\" AM_CFLAGS = -Wall $(GF_CFLAGS) diff --git a/xlators/features/snapview-server/src/snapview-server-helpers.c b/xlators/features/snapview-server/src/snapview-server-helpers.c index 38ca7ae6342..62c1ddac49c 100644 --- a/xlators/features/snapview-server/src/snapview-server-helpers.c +++ b/xlators/features/snapview-server/src/snapview-server-helpers.c @@ -10,580 +10,706 @@ #include "snapview-server.h" #include "snapview-server-mem-types.h" -#include "xlator.h" +#include <glusterfs/xlator.h> #include "rpc-clnt.h" #include "xdr-generic.h" #include "protocol-common.h" #include <pthread.h> - int -__svs_inode_ctx_set (xlator_t *this, inode_t *inode, svs_inode_t *svs_inode) +__svs_inode_ctx_set(xlator_t *this, inode_t *inode, svs_inode_t *svs_inode) { - uint64_t value = 0; - int ret = -1; + uint64_t value = 0; + int ret = -1; - GF_VALIDATE_OR_GOTO ("snapview-server", this, out); - GF_VALIDATE_OR_GOTO (this->name, inode, out); - GF_VALIDATE_OR_GOTO (this->name, svs_inode, out); + GF_VALIDATE_OR_GOTO("snapview-server", this, out); + GF_VALIDATE_OR_GOTO(this->name, inode, out); + GF_VALIDATE_OR_GOTO(this->name, svs_inode, out); - value = (uint64_t)(long) svs_inode; + value = (uint64_t)(long)svs_inode; - ret = __inode_ctx_set (inode, this, &value); + ret = __inode_ctx_set(inode, this, &value); out: - return ret; + return ret; } svs_inode_t * -__svs_inode_ctx_get (xlator_t *this, inode_t *inode) +__svs_inode_ctx_get(xlator_t *this, inode_t *inode) { - svs_inode_t *svs_inode = NULL; - uint64_t value = 0; - int ret = -1; + svs_inode_t *svs_inode = NULL; + uint64_t value = 0; + int ret = -1; - GF_VALIDATE_OR_GOTO ("snapview-server", this, out); - GF_VALIDATE_OR_GOTO (this->name, inode, out); + GF_VALIDATE_OR_GOTO("snapview-server", this, out); + GF_VALIDATE_OR_GOTO(this->name, inode, out); - ret = __inode_ctx_get (inode, this, &value); - if (ret) - goto out; + ret = __inode_ctx_get(inode, this, &value); + if (ret) + goto out; - svs_inode = (svs_inode_t *) ((long) value); + svs_inode = (svs_inode_t *)((long)value); out: - return svs_inode; + return svs_inode; } svs_inode_t * -svs_inode_ctx_get (xlator_t *this, inode_t *inode) +svs_inode_ctx_get(xlator_t *this, inode_t *inode) { - svs_inode_t *svs_inode = NULL; + svs_inode_t *svs_inode = NULL; - GF_VALIDATE_OR_GOTO ("snapview-server", this, out); - GF_VALIDATE_OR_GOTO (this->name, inode, out); + GF_VALIDATE_OR_GOTO("snapview-server", this, out); + GF_VALIDATE_OR_GOTO(this->name, inode, out); - LOCK (&inode->lock); - { - svs_inode = __svs_inode_ctx_get (this, inode); - } - UNLOCK (&inode->lock); + LOCK(&inode->lock); + { + svs_inode = __svs_inode_ctx_get(this, inode); + } + UNLOCK(&inode->lock); out: - return svs_inode; + return svs_inode; } int32_t -svs_inode_ctx_set (xlator_t *this, inode_t *inode, svs_inode_t *svs_inode) +svs_inode_ctx_set(xlator_t *this, inode_t *inode, svs_inode_t *svs_inode) { - int32_t ret = -1; + int32_t ret = -1; - GF_VALIDATE_OR_GOTO ("snapview-server", this, out); - GF_VALIDATE_OR_GOTO (this->name, inode, out); - GF_VALIDATE_OR_GOTO (this->name, svs_inode, out); + GF_VALIDATE_OR_GOTO("snapview-server", this, out); + GF_VALIDATE_OR_GOTO(this->name, inode, out); + GF_VALIDATE_OR_GOTO(this->name, svs_inode, out); - LOCK (&inode->lock); - { - ret = __svs_inode_ctx_set (this, inode, svs_inode); - } - UNLOCK (&inode->lock); + LOCK(&inode->lock); + { + ret = __svs_inode_ctx_set(this, inode, svs_inode); + } + UNLOCK(&inode->lock); out: - return ret; + return ret; } svs_inode_t * -svs_inode_new (void) +svs_inode_new(void) { - svs_inode_t *svs_inode = NULL; + svs_inode_t *svs_inode = NULL; - svs_inode = GF_CALLOC (1, sizeof (*svs_inode), gf_svs_mt_svs_inode_t); + svs_inode = GF_CALLOC(1, sizeof(*svs_inode), gf_svs_mt_svs_inode_t); - return svs_inode; + return svs_inode; } svs_inode_t * -svs_inode_ctx_get_or_new (xlator_t *this, inode_t *inode) +svs_inode_ctx_get_or_new(xlator_t *this, inode_t *inode) { - svs_inode_t *svs_inode = NULL; - int ret = -1; - - GF_VALIDATE_OR_GOTO ("snapview-server", this, out); - GF_VALIDATE_OR_GOTO (this->name, inode, out); - - LOCK (&inode->lock); - { - svs_inode = __svs_inode_ctx_get (this, inode); - if (!svs_inode) { - svs_inode = svs_inode_new (); - if (svs_inode) { - ret = __svs_inode_ctx_set (this, inode, - svs_inode); - if (ret) { - GF_FREE (svs_inode); - svs_inode = NULL; - } - } + svs_inode_t *svs_inode = NULL; + int ret = -1; + + GF_VALIDATE_OR_GOTO("snapview-server", this, out); + GF_VALIDATE_OR_GOTO(this->name, inode, out); + + LOCK(&inode->lock); + { + svs_inode = __svs_inode_ctx_get(this, inode); + if (!svs_inode) { + svs_inode = svs_inode_new(); + if (svs_inode) { + ret = __svs_inode_ctx_set(this, inode, svs_inode); + if (ret) { + GF_FREE(svs_inode); + svs_inode = NULL; } + } } - UNLOCK (&inode->lock); + } + UNLOCK(&inode->lock); out: - return svs_inode; + return svs_inode; } svs_fd_t * -svs_fd_new (void) +svs_fd_new(void) { - svs_fd_t *svs_fd = NULL; + svs_fd_t *svs_fd = NULL; - svs_fd = GF_CALLOC (1, sizeof (*svs_fd), gf_svs_mt_svs_fd_t); + svs_fd = GF_CALLOC(1, sizeof(*svs_fd), gf_svs_mt_svs_fd_t); - return svs_fd; + return svs_fd; } int -__svs_fd_ctx_set (xlator_t *this, fd_t *fd, svs_fd_t *svs_fd) +__svs_fd_ctx_set(xlator_t *this, fd_t *fd, svs_fd_t *svs_fd) { - uint64_t value = 0; - int ret = -1; + uint64_t value = 0; + int ret = -1; - GF_VALIDATE_OR_GOTO ("snapview-server", this, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - GF_VALIDATE_OR_GOTO (this->name, svs_fd, out); + GF_VALIDATE_OR_GOTO("snapview-server", this, out); + GF_VALIDATE_OR_GOTO(this->name, fd, out); + GF_VALIDATE_OR_GOTO(this->name, svs_fd, out); - value = (uint64_t)(long) svs_fd; + value = (uint64_t)(long)svs_fd; - ret = __fd_ctx_set (fd, this, value); + ret = __fd_ctx_set(fd, this, value); out: - return ret; + return ret; } svs_fd_t * -__svs_fd_ctx_get (xlator_t *this, fd_t *fd) +__svs_fd_ctx_get(xlator_t *this, fd_t *fd) { - svs_fd_t *svs_fd = NULL; - uint64_t value = 0; - int ret = -1; + svs_fd_t *svs_fd = NULL; + uint64_t value = 0; + int ret = -1; - GF_VALIDATE_OR_GOTO ("snapview-server", this, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); + GF_VALIDATE_OR_GOTO("snapview-server", this, out); + GF_VALIDATE_OR_GOTO(this->name, fd, out); - ret = __fd_ctx_get (fd, this, &value); - if (ret) - return NULL; + ret = __fd_ctx_get(fd, this, &value); + if (ret) + return NULL; - svs_fd = (svs_fd_t *) ((long) value); + svs_fd = (svs_fd_t *)((long)value); out: - return svs_fd; + return svs_fd; } svs_fd_t * -svs_fd_ctx_get (xlator_t *this, fd_t *fd) +svs_fd_ctx_get(xlator_t *this, fd_t *fd) { - svs_fd_t *svs_fd = NULL; + svs_fd_t *svs_fd = NULL; - GF_VALIDATE_OR_GOTO ("snapview-server", this, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); + GF_VALIDATE_OR_GOTO("snapview-server", this, out); + GF_VALIDATE_OR_GOTO(this->name, fd, out); - LOCK (&fd->lock); - { - svs_fd = __svs_fd_ctx_get (this, fd); - } - UNLOCK (&fd->lock); + LOCK(&fd->lock); + { + svs_fd = __svs_fd_ctx_get(this, fd); + } + UNLOCK(&fd->lock); out: - return svs_fd; + return svs_fd; } int32_t -svs_fd_ctx_set (xlator_t *this, fd_t *fd, svs_fd_t *svs_fd) +svs_fd_ctx_set(xlator_t *this, fd_t *fd, svs_fd_t *svs_fd) { - int32_t ret = -1; + int32_t ret = -1; - GF_VALIDATE_OR_GOTO ("snapview-server", this, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - GF_VALIDATE_OR_GOTO (this->name, svs_fd, out); + GF_VALIDATE_OR_GOTO("snapview-server", this, out); + GF_VALIDATE_OR_GOTO(this->name, fd, out); + GF_VALIDATE_OR_GOTO(this->name, svs_fd, out); - LOCK (&fd->lock); - { - ret = __svs_fd_ctx_set (this, fd, svs_fd); - } - UNLOCK (&fd->lock); + LOCK(&fd->lock); + { + ret = __svs_fd_ctx_set(this, fd, svs_fd); + } + UNLOCK(&fd->lock); out: - return ret; + return ret; } svs_fd_t * -__svs_fd_ctx_get_or_new (xlator_t *this, fd_t *fd) +__svs_fd_ctx_get_or_new(xlator_t *this, fd_t *fd) { - svs_fd_t *svs_fd = NULL; - int ret = -1; - glfs_t *fs = NULL; - glfs_object_t *object = NULL; - svs_inode_t *inode_ctx = NULL; - glfs_fd_t *glfd = NULL; - inode_t *inode = NULL; - - GF_VALIDATE_OR_GOTO ("snapview-server", this, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - - inode = fd->inode; - svs_fd = __svs_fd_ctx_get (this, fd); - if (svs_fd) { - ret = 0; - goto out; + svs_fd_t *svs_fd = NULL; + int ret = -1; + glfs_t *fs = NULL; + glfs_object_t *object = NULL; + svs_inode_t *inode_ctx = NULL; + glfs_fd_t *glfd = NULL; + inode_t *inode = NULL; + + GF_VALIDATE_OR_GOTO("snapview-server", this, out); + GF_VALIDATE_OR_GOTO(this->name, fd, out); + + inode = fd->inode; + svs_fd = __svs_fd_ctx_get(this, fd); + if (svs_fd) { + ret = 0; + goto out; + } + + svs_fd = svs_fd_new(); + if (!svs_fd) { + gf_msg(this->name, GF_LOG_ERROR, 0, SVS_MSG_NEW_FD_CTX_FAILED, + "failed to allocate new fd " + "context for gfid %s", + uuid_utoa(inode->gfid)); + goto out; + } + + if (fd_is_anonymous(fd)) { + inode_ctx = svs_inode_ctx_get(this, inode); + if (!inode_ctx) { + gf_msg(this->name, GF_LOG_ERROR, 0, + SVS_MSG_GET_INODE_CONTEXT_FAILED, + "failed to get inode " + "context for %s", + uuid_utoa(inode->gfid)); + goto out; } - svs_fd = svs_fd_new (); - if (!svs_fd) { - gf_log (this->name, GF_LOG_ERROR, "failed to allocate new fd " - "context for gfid %s", uuid_utoa (inode->gfid)); + fs = inode_ctx->fs; + object = inode_ctx->object; + + if (inode->ia_type == IA_IFDIR) { + glfd = glfs_h_opendir(fs, object); + if (!glfd) { + gf_msg(this->name, GF_LOG_ERROR, errno, SVS_MSG_OPENDIR_FAILED, + "failed to " + "open the directory %s", + uuid_utoa(inode->gfid)); goto out; + } } - if (fd_is_anonymous (fd)) { - inode_ctx = svs_inode_ctx_get (this, inode); - if (!inode_ctx) { - gf_log (this->name, GF_LOG_ERROR, "failed to get inode " - "context for %s", uuid_utoa (inode->gfid)); - goto out; - } - - fs = inode_ctx->fs; - object = inode_ctx->object; - - if (inode->ia_type == IA_IFDIR) { - glfd = glfs_h_opendir (fs, object); - if (!glfd) { - gf_log (this->name, GF_LOG_ERROR, "failed to " - "open the directory %s", - uuid_utoa (inode->gfid)); - goto out; - } - } - - if (inode->ia_type == IA_IFREG) { - glfd = glfs_h_open (fs, object, O_RDONLY|O_LARGEFILE); - if (!glfd) { - gf_log (this->name, GF_LOG_ERROR, "failed to " - "open the file %s", - uuid_utoa (inode->gfid)); - goto out; - } - } - - svs_fd->fd = glfd; + if (inode->ia_type == IA_IFREG) { + glfd = glfs_h_open(fs, object, O_RDONLY | O_LARGEFILE); + if (!glfd) { + gf_msg(this->name, GF_LOG_ERROR, errno, SVS_MSG_OPEN_FAILED, + "failed to " + "open the file %s", + uuid_utoa(inode->gfid)); + goto out; + } } - ret = __svs_fd_ctx_set (this, fd, svs_fd); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "failed to set fd context " - "for gfid %s", uuid_utoa (inode->gfid)); - if (svs_fd->fd) { - if (inode->ia_type == IA_IFDIR) { - ret = glfs_closedir (svs_fd->fd); - if (ret) - gf_log (this->name, GF_LOG_ERROR, - "failed to close the fd for %s", - uuid_utoa (inode->gfid)); - } - if (inode->ia_type == IA_IFREG) { - ret = glfs_close (svs_fd->fd); - if (ret) - gf_log (this->name, GF_LOG_ERROR, - "failed to close the fd for %s", - uuid_utoa (inode->gfid)); - } - } - ret = -1; + svs_fd->fd = glfd; + } + + ret = __svs_fd_ctx_set(this, fd, svs_fd); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SVS_MSG_SET_FD_CONTEXT_FAILED, + "failed to set fd context " + "for gfid %s", + uuid_utoa(inode->gfid)); + if (svs_fd->fd) { + if (inode->ia_type == IA_IFDIR) { + ret = glfs_closedir(svs_fd->fd); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, errno, + SVS_MSG_CLOSEDIR_FAILED, + "failed to close the fd for %s", + uuid_utoa(inode->gfid)); + } + if (inode->ia_type == IA_IFREG) { + ret = glfs_close(svs_fd->fd); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, SVS_MSG_CLOSE_FAILED, + "failed to close the fd for %s", + uuid_utoa(inode->gfid)); + } } + ret = -1; + } out: - if (ret) { - GF_FREE (svs_fd); - svs_fd = NULL; - } + if (ret) { + GF_FREE(svs_fd); + svs_fd = NULL; + } - return svs_fd; + return svs_fd; } svs_fd_t * -svs_fd_ctx_get_or_new (xlator_t *this, fd_t *fd) +svs_fd_ctx_get_or_new(xlator_t *this, fd_t *fd) { - svs_fd_t *svs_fd = NULL; + svs_fd_t *svs_fd = NULL; - GF_VALIDATE_OR_GOTO ("snapview-server", this, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); + GF_VALIDATE_OR_GOTO("snapview-server", this, out); + GF_VALIDATE_OR_GOTO(this->name, fd, out); - LOCK (&fd->lock); - { - svs_fd = __svs_fd_ctx_get_or_new (this, fd); - } - UNLOCK (&fd->lock); + LOCK(&fd->lock); + { + svs_fd = __svs_fd_ctx_get_or_new(this, fd); + } + UNLOCK(&fd->lock); + +out: + return svs_fd; +} + +int +svs_uuid_generate(xlator_t *this, uuid_t gfid, char *snapname, + uuid_t origin_gfid) +{ + char ino_string[NAME_MAX + 32] = ""; + uuid_t tmp = { + 0, + }; + int ret = -1; + + GF_VALIDATE_OR_GOTO("snapview-server", this, out); + GF_VALIDATE_OR_GOTO(this->name, snapname, out); + + (void)snprintf(ino_string, sizeof(ino_string), "%s%s", snapname, + uuid_utoa(origin_gfid)); + + if (gf_gfid_generate_from_xxh64(tmp, ino_string)) { + gf_msg(this->name, GF_LOG_WARNING, 0, SVS_MSG_GFID_GEN_FAILED, + "failed to generate " + "gfid for object with actual gfid of %s " + "(snapname: %s, key: %s)", + uuid_utoa(origin_gfid), snapname, ino_string); + goto out; + } + + gf_uuid_copy(gfid, tmp); + + ret = 0; + + gf_msg_debug(this->name, 0, "gfid generated is %s ", uuid_utoa(gfid)); out: - return svs_fd; + return ret; } void -svs_fill_ino_from_gfid (struct iatt *buf) +svs_fill_ino_from_gfid(struct iatt *buf) { - uint64_t temp_ino = 0; - int j = 0; - int i = 0; - xlator_t *this = NULL; + xlator_t *this = NULL; - this = THIS; + this = THIS; - GF_VALIDATE_OR_GOTO ("snapview-server", this, out); - GF_VALIDATE_OR_GOTO (this->name, buf, out); + GF_VALIDATE_OR_GOTO("snapview-server", this, out); + GF_VALIDATE_OR_GOTO(this->name, buf, out); - /* consider least significant 8 bytes of value out of gfid */ - if (gf_uuid_is_null (buf->ia_gfid)) { - buf->ia_ino = -1; - goto out; - } - for (i = 15; i > (15 - 8); i--) { - temp_ino += (uint64_t)(buf->ia_gfid[i]) << j; - j += 8; - } - buf->ia_ino = temp_ino; + /* consider least significant 8 bytes of value out of gfid */ + if (gf_uuid_is_null(buf->ia_gfid)) { + buf->ia_ino = -1; + goto out; + } + + buf->ia_ino = gfid_to_ino(buf->ia_gfid); out: - return; + return; } void -svs_iatt_fill (uuid_t gfid, struct iatt *buf) +svs_iatt_fill(uuid_t gfid, struct iatt *buf) { - struct timeval tv = {0, }; - xlator_t *this = NULL; + struct timeval tv = { + 0, + }; + xlator_t *this = NULL; - this = THIS; + this = THIS; - GF_VALIDATE_OR_GOTO ("snapview-server", this, out); - GF_VALIDATE_OR_GOTO (this->name, buf, out); + GF_VALIDATE_OR_GOTO("snapview-server", this, out); + GF_VALIDATE_OR_GOTO(this->name, buf, out); - buf->ia_type = IA_IFDIR; - buf->ia_uid = 0; - buf->ia_gid = 0; - buf->ia_size = 0; - buf->ia_nlink = 2; - buf->ia_blocks = 8; - buf->ia_size = 4096; + buf->ia_type = IA_IFDIR; + buf->ia_uid = 0; + buf->ia_gid = 0; + buf->ia_size = 0; + buf->ia_nlink = 2; + buf->ia_blocks = 8; + buf->ia_size = 4096; - gf_uuid_copy (buf->ia_gfid, gfid); - svs_fill_ino_from_gfid (buf); + gf_uuid_copy(buf->ia_gfid, gfid); + svs_fill_ino_from_gfid(buf); - buf->ia_prot = ia_prot_from_st_mode (0755); + buf->ia_prot = ia_prot_from_st_mode(0755); - gettimeofday (&tv, 0); + gettimeofday(&tv, 0); - buf->ia_mtime = buf->ia_atime = buf->ia_ctime = tv.tv_sec; - buf->ia_mtime_nsec = buf->ia_atime_nsec = buf->ia_ctime_nsec = - (tv.tv_usec * 1000); + buf->ia_mtime = buf->ia_atime = buf->ia_ctime = tv.tv_sec; + buf->ia_mtime_nsec = buf->ia_atime_nsec = buf->ia_ctime_nsec = (tv.tv_usec * + 1000); out: - return; + return; } /* priv->snaplist_lock should be held before calling this function */ snap_dirent_t * -__svs_get_snap_dirent (xlator_t *this, const char *name) +__svs_get_snap_dirent(xlator_t *this, const char *name) { - svs_private_t *private = NULL; - int i = 0; - snap_dirent_t *dirents = NULL; - snap_dirent_t *tmp_dirent = NULL; - snap_dirent_t *dirent = NULL; - - private = this->private; - - dirents = private->dirents; - if (!dirents) { - goto out; - } - - tmp_dirent = dirents; - for (i = 0; i < private->num_snaps; i++) { - if (!strcmp (tmp_dirent->name, name)) { - dirent = tmp_dirent; - break; - } - tmp_dirent++; + svs_private_t *private = NULL; + int i = 0; + snap_dirent_t *dirents = NULL; + snap_dirent_t *tmp_dirent = NULL; + snap_dirent_t *dirent = NULL; + + private + = this->private; + + dirents = private->dirents; + if (!dirents) { + goto out; + } + + tmp_dirent = dirents; + for (i = 0; i < private->num_snaps; i++) { + if (!strcmp(tmp_dirent->name, name)) { + dirent = tmp_dirent; + break; } + tmp_dirent++; + } - out: - return dirent; +out: + return dirent; } glfs_t * -__svs_initialise_snapshot_volume (xlator_t *this, const char *name, - int32_t *op_errno) +__svs_initialise_snapshot_volume(xlator_t *this, const char *name, + int32_t *op_errno) { - svs_private_t *priv = NULL; - int32_t ret = -1; - int32_t local_errno = ESTALE; - snap_dirent_t *dirent = NULL; - char volname[PATH_MAX] = {0, }; - glfs_t *fs = NULL; - int loglevel = GF_LOG_INFO; - char logfile[PATH_MAX] = {0, }; - - GF_VALIDATE_OR_GOTO ("snapview-server", this, out); - GF_VALIDATE_OR_GOTO (this->name, this->private, out); - GF_VALIDATE_OR_GOTO (this->name, name, out); - - priv = this->private; - - dirent = __svs_get_snap_dirent (this, name); - if (!dirent) { - gf_log (this->name, GF_LOG_DEBUG, "snap entry for " - "name %s not found", name); - local_errno = ENOENT; - goto out; + svs_private_t *priv = NULL; + int32_t ret = -1; + int32_t local_errno = ESTALE; + snap_dirent_t *dirent = NULL; + char volname[PATH_MAX] = { + 0, + }; + glfs_t *fs = NULL; + int loglevel = GF_LOG_INFO; + char logfile[PATH_MAX] = { + 0, + }; + char *volfile_server = NULL; + + GF_VALIDATE_OR_GOTO("snapview-server", this, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + GF_VALIDATE_OR_GOTO(this->name, name, out); + + priv = this->private; + + dirent = __svs_get_snap_dirent(this, name); + if (!dirent) { + gf_msg_debug(this->name, 0, + "snap entry for " + "name %s not found", + name); + local_errno = ENOENT; + goto out; + } + + if (dirent->fs) { + ret = 0; + fs = dirent->fs; + goto out; + } + + snprintf(volname, sizeof(volname), "/snaps/%s/%s/%s", dirent->name, + dirent->snap_volname, dirent->snap_volname); + + fs = glfs_new(volname); + if (!fs) { + local_errno = ENOMEM; + gf_msg(this->name, GF_LOG_ERROR, local_errno, SVS_MSG_GLFS_NEW_FAILED, + "glfs instance for snap volume %s " + "failed", + dirent->name); + goto out; + } + + /* + * Before, localhost was used as the volfile server. But, with that + * method, accessing snapshots started giving ENOENT error if a + * specific bind address is mentioned in the glusterd volume file. + * Check the bug https://bugzilla.redhat.com/show_bug.cgi?id=1725211. + * So, the new method is tried below, where, snapview-server first + * uses the volfile server used by the snapd (obtained from the + * command line arguments saved in the global context of the process). + * If the volfile server in global context is NULL, then localhost + * is tried (like before). + */ + if (this->ctx->cmd_args.volfile_server) { + volfile_server = gf_strdup(this->ctx->cmd_args.volfile_server); + if (!volfile_server) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, + SVS_MSG_VOLFILE_SERVER_GET_FAIL, + "failed to copy volfile server %s. ", + this->ctx->cmd_args.volfile_server); + ret = -1; + goto out; } - - if (dirent->fs) { - ret = 0; - fs = dirent->fs; - goto out; + } else { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, + SVS_MSG_VOLFILE_SERVER_GET_FAIL, + "volfile server is NULL in cmd args. " + "Trying with localhost"); + volfile_server = gf_strdup("localhost"); + if (!volfile_server) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, + SVS_MSG_VOLFILE_SERVER_GET_FAIL, + "failed to copy volfile server localhost."); + ret = -1; + goto out; } + } + + ret = glfs_set_volfile_server(fs, "tcp", volfile_server, 24007); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, local_errno, + SVS_MSG_SET_VOLFILE_SERVR_FAILED, + "setting the " + "volfile server %s for snap volume %s " + "failed", + volfile_server, dirent->name); + goto out; + } + + snprintf(logfile, sizeof(logfile), + DEFAULT_SVD_LOG_FILE_DIRECTORY "/snaps/%s/%s-%s.log", + priv->volname, name, dirent->uuid); + + ret = glfs_set_logging(fs, logfile, loglevel); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, local_errno, + SVS_MSG_SET_LOGGING_FAILED, + "failed to set the " + "log file path"); + goto out; + } + + ret = glfs_init(fs); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, local_errno, SVS_MSG_GLFS_INIT_FAILED, + "initing the " + "fs for %s failed", + dirent->name); + goto out; + } + + ret = 0; - snprintf (volname, sizeof (volname), "/snaps/%s/%s", - dirent->name, dirent->snap_volname); +out: + if (ret) { + if (op_errno) + *op_errno = local_errno; + if (fs) + glfs_fini(fs); + fs = NULL; + } - fs = glfs_new (volname); - if (!fs) { - gf_log (this->name, GF_LOG_ERROR, - "glfs instance for snap volume %s " - "failed", dirent->name); - local_errno = ENOMEM; - goto out; - } + if (fs) { + dirent->fs = fs; + } - ret = glfs_set_volfile_server (fs, "tcp", "localhost", - 24007); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "setting the " - "volfile server for snap volume %s " - "failed", dirent->name); - goto out; - } + GF_FREE(volfile_server); + return fs; +} - snprintf (logfile, sizeof (logfile), - DEFAULT_SVD_LOG_FILE_DIRECTORY "/snaps/%s/%s-%s.log", - priv->volname, name, dirent->uuid); +glfs_t * +svs_initialise_snapshot_volume(xlator_t *this, const char *name, + int32_t *op_errno) +{ + glfs_t *fs = NULL; + svs_private_t *priv = NULL; - ret = glfs_set_logging(fs, logfile, loglevel); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "failed to set the " - "log file path"); - goto out; - } + GF_VALIDATE_OR_GOTO("snapview-server", this, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + GF_VALIDATE_OR_GOTO(this->name, name, out); - ret = glfs_init (fs); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "initing the " - "fs for %s failed", dirent->name); - goto out; - } + priv = this->private; - ret = 0; + LOCK(&priv->snaplist_lock); + { + fs = __svs_initialise_snapshot_volume(this, name, op_errno); + } + UNLOCK(&priv->snaplist_lock); out: - if (ret) { - if (op_errno) - *op_errno = local_errno; - if (fs) - glfs_fini (fs); - fs = NULL; - } + return fs; +} + +snap_dirent_t * +svs_get_latest_snap_entry(xlator_t *this) +{ + svs_private_t *priv = NULL; + snap_dirent_t *dirents = NULL; + snap_dirent_t *dirent = NULL; - if (fs) { - dirent->fs = fs; + GF_VALIDATE_OR_GOTO("svs", this, out); + + priv = this->private; + + LOCK(&priv->snaplist_lock); + { + dirents = priv->dirents; + if (!dirents) { + goto unlock; } + if (priv->num_snaps) + dirent = &dirents[priv->num_snaps - 1]; + } +unlock: + UNLOCK(&priv->snaplist_lock); - return fs; +out: + return dirent; } glfs_t * -svs_initialise_snapshot_volume (xlator_t *this, const char *name, - int32_t *op_errno) +svs_get_latest_snapshot(xlator_t *this) { - glfs_t *fs = NULL; - svs_private_t *priv = NULL; + glfs_t *fs = NULL; + snap_dirent_t *dirent = NULL; + svs_private_t *priv = NULL; - GF_VALIDATE_OR_GOTO ("snapview-server", this, out); - GF_VALIDATE_OR_GOTO (this->name, this->private, out); - GF_VALIDATE_OR_GOTO (this->name, name, out); + GF_VALIDATE_OR_GOTO("svs", this, out); + priv = this->private; - priv = this->private; + dirent = svs_get_latest_snap_entry(this); - LOCK (&priv->snaplist_lock); + if (dirent) { + LOCK(&priv->snaplist_lock); { - fs = __svs_initialise_snapshot_volume (this, name, op_errno); + fs = dirent->fs; } - UNLOCK (&priv->snaplist_lock); - + UNLOCK(&priv->snaplist_lock); + } out: - - return fs; + return fs; } -snap_dirent_t * -svs_get_latest_snap_entry (xlator_t *this) +glfs_t * +svs_inode_ctx_glfs_mapping(xlator_t *this, svs_inode_t *inode_ctx) { - svs_private_t *priv = NULL; - snap_dirent_t *dirents = NULL; - snap_dirent_t *dirent = NULL; + glfs_t *fs = NULL; - GF_VALIDATE_OR_GOTO ("svs", this, out); + GF_VALIDATE_OR_GOTO("svs", this, out); + GF_VALIDATE_OR_GOTO(this->name, inode_ctx, out); - priv = this->private; + fs = inode_ctx->fs; - LOCK (&priv->snaplist_lock); - { - dirents = priv->dirents; - if (!dirents) { - goto unlock; - } - if (priv->num_snaps) - dirent = &dirents[priv->num_snaps - 1]; - } -unlock: - UNLOCK (&priv->snaplist_lock); + SVS_CHECK_VALID_SNAPSHOT_HANDLE(fs, this); out: - return dirent; + return fs; } glfs_t * -svs_get_latest_snapshot (xlator_t *this) +svs_inode_glfs_mapping(xlator_t *this, inode_t *inode) { - glfs_t *fs = NULL; - snap_dirent_t *dirent = NULL; - svs_private_t *priv = NULL; - - GF_VALIDATE_OR_GOTO ("svs", this, out); - priv = this->private; + svs_inode_t *inode_ctx = NULL; + glfs_t *fs = NULL; - dirent = svs_get_latest_snap_entry (this); + inode_ctx = svs_inode_ctx_get(this, inode); + if (!inode_ctx) { + gf_msg(this->name, GF_LOG_ERROR, 0, SVS_MSG_GET_INODE_CONTEXT_FAILED, + "inode context not found for" + " the inode %s", + uuid_utoa(inode->gfid)); + goto out; + } - if (dirent) { - LOCK (&priv->snaplist_lock); - { - fs = dirent->fs; - } - UNLOCK (&priv->snaplist_lock); - } + fs = svs_inode_ctx_glfs_mapping(this, inode_ctx); out: - return fs; + return fs; } diff --git a/xlators/features/snapview-server/src/snapview-server-mem-types.h b/xlators/features/snapview-server/src/snapview-server-mem-types.h index a8035165000..63456b85323 100644 --- a/xlators/features/snapview-server/src/snapview-server-mem-types.h +++ b/xlators/features/snapview-server/src/snapview-server-mem-types.h @@ -11,16 +11,15 @@ #ifndef __SNAP_VIEW_MEM_TYPES_H #define __SNAP_VIEW_MEM_TYPES_H -#include "mem-types.h" +#include <glusterfs/mem-types.h> enum snapview_mem_types { - gf_svs_mt_priv_t = gf_common_mt_end + 1, - gf_svs_mt_svs_inode_t, - gf_svs_mt_dirents_t, - gf_svs_mt_svs_fd_t, - gf_svs_mt_snaplist_t, - gf_svs_mt_end + gf_svs_mt_priv_t = gf_common_mt_end + 1, + gf_svs_mt_svs_inode_t, + gf_svs_mt_dirents_t, + gf_svs_mt_svs_fd_t, + gf_svs_mt_snaplist_t, + gf_svs_mt_end }; #endif - diff --git a/xlators/features/snapview-server/src/snapview-server-messages.h b/xlators/features/snapview-server/src/snapview-server-messages.h new file mode 100644 index 00000000000..f634ab5d2b0 --- /dev/null +++ b/xlators/features/snapview-server/src/snapview-server-messages.h @@ -0,0 +1,54 @@ +/* + Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. + */ + +#ifndef _SNAPVIEW_SERVER_MESSAGES_H_ +#define _SNAPVIEW_SERVER_MESSAGES_H_ + +#include <glusterfs/glfs-message-id.h> + +/* To add new message IDs, append new identifiers at the end of the list. + * + * Never remove a message ID. If it's not used anymore, you can rename it or + * leave it as it is, but not delete it. This is to prevent reutilization of + * IDs by other messages. + * + * The component name must match one of the entries defined in + * glfs-message-id.h. + */ + +GLFS_MSGID(SNAPVIEW_SERVER, SVS_MSG_NO_MEMORY, SVS_MSG_MEM_ACNT_FAILED, + SVS_MSG_NULL_GFID, SVS_MSG_GET_LATEST_SNAP_FAILED, + SVS_MSG_INVALID_GLFS_CTX, SVS_MSG_LOCK_DESTROY_FAILED, + SVS_MSG_SNAPSHOT_LIST_CHANGED, SVS_MSG_MGMT_INIT_FAILED, + SVS_MSG_GET_SNAPSHOT_LIST_FAILED, SVS_MSG_GET_GLFS_H_OBJECT_FAILED, + SVS_MSG_PARENT_CTX_OR_NAME_NULL, SVS_MSG_SET_INODE_CONTEXT_FAILED, + SVS_MSG_GET_INODE_CONTEXT_FAILED, SVS_MSG_NEW_INODE_CTX_FAILED, + SVS_MSG_DELETE_INODE_CONTEXT_FAILED, SVS_MSG_SET_FD_CONTEXT_FAILED, + SVS_MSG_NEW_FD_CTX_FAILED, SVS_MSG_DELETE_FD_CTX_FAILED, + SVS_MSG_GETXATTR_FAILED, SVS_MSG_LISTXATTR_FAILED, + SVS_MSG_RELEASEDIR_FAILED, SVS_MSG_RELEASE_FAILED, + SVS_MSG_TELLDIR_FAILED, SVS_MSG_STAT_FAILED, SVS_MSG_STATFS_FAILED, + SVS_MSG_OPEN_FAILED, SVS_MSG_READ_FAILED, SVS_MSG_READLINK_FAILED, + SVS_MSG_ACCESS_FAILED, SVS_MSG_GET_FD_CONTEXT_FAILED, + SVS_MSG_DICT_SET_FAILED, SVS_MSG_OPENDIR_FAILED, + SVS_MSG_FS_INSTANCE_INVALID, SVS_MSG_SETFSUID_FAIL, + SVS_MSG_SETFSGID_FAIL, SVS_MSG_SETFSGRPS_FAIL, + SVS_MSG_BUILD_TRNSPRT_OPT_FAILED, SVS_MSG_RPC_INIT_FAILED, + SVS_MSG_REG_NOTIFY_FAILED, SVS_MSG_REG_CBK_PRGM_FAILED, + SVS_MSG_RPC_CLNT_START_FAILED, SVS_MSG_XDR_PAYLOAD_FAILED, + SVS_MSG_NULL_CTX, SVS_MSG_RPC_CALL_FAILED, SVS_MSG_XDR_DECODE_FAILED, + SVS_MSG_RSP_DICT_EMPTY, SVS_MSG_DICT_GET_FAILED, + SVS_MSG_SNAP_LIST_REFRESH_FAILED, SVS_MSG_RPC_REQ_FAILED, + SVS_MSG_CLOSEDIR_FAILED, SVS_MSG_CLOSE_FAILED, + SVS_MSG_GFID_GEN_FAILED, SVS_MSG_GLFS_NEW_FAILED, + SVS_MSG_SET_VOLFILE_SERVR_FAILED, SVS_MSG_SET_LOGGING_FAILED, + SVS_MSG_VOLFILE_SERVER_GET_FAIL, SVS_MSG_GLFS_INIT_FAILED); + +#endif /* !_SNAPVIEW_CLIENT_MESSAGES_H_ */ diff --git a/xlators/features/snapview-server/src/snapview-server-mgmt.c b/xlators/features/snapview-server/src/snapview-server-mgmt.c index fc2ff2ab10d..ecf31c3b880 100644 --- a/xlators/features/snapview-server/src/snapview-server-mgmt.c +++ b/xlators/features/snapview-server/src/snapview-server-mgmt.c @@ -12,465 +12,513 @@ #include <pthread.h> int -mgmt_cbk_snap (struct rpc_clnt *rpc, void *mydata, void *data) +mgmt_cbk_snap(struct rpc_clnt *rpc, void *mydata, void *data) { - xlator_t *this = NULL; + xlator_t *this = NULL; - this = mydata; - GF_ASSERT (this); + this = mydata; + GF_ASSERT(this); - gf_log ("mgmt", GF_LOG_INFO, "list of snapshots changed"); + gf_msg("mgmt", GF_LOG_INFO, 0, SVS_MSG_SNAPSHOT_LIST_CHANGED, + "list of snapshots changed"); - svs_get_snapshot_list (this); - return 0; + svs_get_snapshot_list(this); + return 0; } -rpcclnt_cb_actor_t svs_cbk_actors[GF_CBK_MAXVALUE] = { - [GF_CBK_GET_SNAPS] = {"GETSNAPS", GF_CBK_GET_SNAPS, mgmt_cbk_snap}, +static rpcclnt_cb_actor_t svs_cbk_actors[GF_CBK_MAXVALUE] = { + [GF_CBK_GET_SNAPS] = {"GETSNAPS", mgmt_cbk_snap, GF_CBK_GET_SNAPS}, }; -struct rpcclnt_cb_program svs_cbk_prog = { - .progname = "GlusterFS Callback", - .prognum = GLUSTER_CBK_PROGRAM, - .progver = GLUSTER_CBK_VERSION, - .actors = svs_cbk_actors, - .numactors = GF_CBK_MAXVALUE, +static struct rpcclnt_cb_program svs_cbk_prog = { + .progname = "GlusterFS Callback", + .prognum = GLUSTER_CBK_PROGRAM, + .progver = GLUSTER_CBK_VERSION, + .actors = svs_cbk_actors, + .numactors = GF_CBK_MAXVALUE, }; -char *clnt_handshake_procs[GF_HNDSK_MAXVALUE] = { - [GF_HNDSK_NULL] = "NULL", - [GF_HNDSK_EVENT_NOTIFY] = "EVENTNOTIFY", +static char *clnt_handshake_procs[GF_HNDSK_MAXVALUE] = { + [GF_HNDSK_NULL] = "NULL", + [GF_HNDSK_EVENT_NOTIFY] = "EVENTNOTIFY", }; -rpc_clnt_prog_t svs_clnt_handshake_prog = { - .progname = "GlusterFS Handshake", - .prognum = GLUSTER_HNDSK_PROGRAM, - .progver = GLUSTER_HNDSK_VERSION, - .procnames = clnt_handshake_procs, +static rpc_clnt_prog_t svs_clnt_handshake_prog = { + .progname = "GlusterFS Handshake", + .prognum = GLUSTER_HNDSK_PROGRAM, + .progver = GLUSTER_HNDSK_VERSION, + .procnames = clnt_handshake_procs, }; -int -svs_mgmt_init (xlator_t *this) +static int +svs_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event, + void *data) { - int ret = -1; - svs_private_t *priv = NULL; - dict_t *options = NULL; - int port = GF_DEFAULT_BASE_PORT; - char *host = NULL; - cmd_args_t *cmd_args = NULL; - glusterfs_ctx_t *ctx = NULL; - - GF_VALIDATE_OR_GOTO ("snapview-server", this, out); - GF_VALIDATE_OR_GOTO (this->name, this->private, out); - GF_VALIDATE_OR_GOTO (this->name, this->ctx, out); - - priv = this->private; - - ctx = this->ctx; - cmd_args = &ctx->cmd_args; - - host = "localhost"; - if (cmd_args->volfile_server) - host = cmd_args->volfile_server; - - ret = rpc_transport_inet_options_build (&options, host, port); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "failed to build the " - "transport options"); - goto out; - } - - priv->rpc = rpc_clnt_new (options, this, this->name, 8); - if (!priv->rpc) { - gf_log (this->name, GF_LOG_ERROR, "failed to initialize RPC"); - goto out; - } - - ret = rpcclnt_cbk_program_register (priv->rpc, &svs_cbk_prog, - this); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "failed to register callback program"); - goto out; - } - - ret = rpc_clnt_start (priv->rpc); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "failed to start the rpc " - "client"); - goto out; - } - - ret = 0; + xlator_t *this = NULL; + int ret = 0; + + this = mydata; + + switch (event) { + case RPC_CLNT_CONNECT: + ret = svs_get_snapshot_list(this); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, + SVS_MSG_GET_SNAPSHOT_LIST_FAILED, + "Error in refreshing the snaplist " + "infrastructure"); + ret = -1; + } + break; + default: + break; + } + return ret; +} - gf_log (this->name, GF_LOG_DEBUG, "svs mgmt init successful"); +int +svs_mgmt_init(xlator_t *this) +{ + int ret = -1; + svs_private_t *priv = NULL; + dict_t *options = NULL; + int port = GF_DEFAULT_BASE_PORT; + char *host = NULL; + cmd_args_t *cmd_args = NULL; + glusterfs_ctx_t *ctx = NULL; + xlator_cmdline_option_t *opt = NULL; + + GF_VALIDATE_OR_GOTO("snapview-server", this, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + GF_VALIDATE_OR_GOTO(this->name, this->ctx, out); + + priv = this->private; + + ctx = this->ctx; + cmd_args = &ctx->cmd_args; + + host = "localhost"; + if (cmd_args->volfile_server) + host = cmd_args->volfile_server; + + options = dict_new(); + if (!options) + goto out; + + opt = find_xlator_option_in_cmd_args_t("address-family", cmd_args); + ret = rpc_transport_inet_options_build(options, host, port, + (opt != NULL ? opt->value : NULL)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SVS_MSG_BUILD_TRNSPRT_OPT_FAILED, + "failed to build the " + "transport options"); + goto out; + } + + priv->rpc = rpc_clnt_new(options, this, this->name, 8); + if (!priv->rpc) { + gf_msg(this->name, GF_LOG_ERROR, 0, SVS_MSG_RPC_INIT_FAILED, + "failed to initialize RPC"); + goto out; + } + + ret = rpc_clnt_register_notify(priv->rpc, svs_rpc_notify, this); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, SVS_MSG_REG_NOTIFY_FAILED, + "failed to register notify function"); + goto out; + } + + ret = rpcclnt_cbk_program_register(priv->rpc, &svs_cbk_prog, this); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SVS_MSG_REG_CBK_PRGM_FAILED, + "failed to register callback program"); + goto out; + } + + ret = rpc_clnt_start(priv->rpc); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SVS_MSG_RPC_CLNT_START_FAILED, + "failed to start the rpc " + "client"); + goto out; + } + + ret = 0; + + gf_msg_debug(this->name, 0, "svs mgmt init successful"); out: - if (ret) - if (priv) { - rpc_clnt_connection_cleanup (&priv->rpc->conn); - rpc_clnt_unref (priv->rpc); - priv->rpc = NULL; - } + if (options) + dict_unref(options); + if (ret) + if (priv) { + rpc_clnt_connection_cleanup(&priv->rpc->conn); + rpc_clnt_unref(priv->rpc); + priv->rpc = NULL; + } - return ret; + return ret; } int -svs_mgmt_submit_request (void *req, call_frame_t *frame, - glusterfs_ctx_t *ctx, - rpc_clnt_prog_t *prog, int procnum, - fop_cbk_fn_t cbkfn, xdrproc_t xdrproc) +svs_mgmt_submit_request(void *req, call_frame_t *frame, glusterfs_ctx_t *ctx, + rpc_clnt_prog_t *prog, int procnum, fop_cbk_fn_t cbkfn, + xdrproc_t xdrproc) { - int ret = -1; - int count = 0; - struct iovec iov = {0, }; - struct iobuf *iobuf = NULL; - struct iobref *iobref = NULL; - ssize_t xdr_size = 0; - - GF_VALIDATE_OR_GOTO ("snapview-server", frame, out); - GF_VALIDATE_OR_GOTO ("snapview-server", req, out); - GF_VALIDATE_OR_GOTO ("snapview-server", ctx, out); - GF_VALIDATE_OR_GOTO ("snapview-server", prog, out); - - GF_ASSERT (frame->this); - - iobref = iobref_new (); - if (!iobref) { - goto out; + int ret = -1; + int count = 0; + struct iovec iov = { + 0, + }; + struct iobuf *iobuf = NULL; + struct iobref *iobref = NULL; + ssize_t xdr_size = 0; + + GF_VALIDATE_OR_GOTO("snapview-server", frame, out); + GF_VALIDATE_OR_GOTO("snapview-server", req, out); + GF_VALIDATE_OR_GOTO("snapview-server", ctx, out); + GF_VALIDATE_OR_GOTO("snapview-server", prog, out); + + GF_ASSERT(frame->this); + + iobref = iobref_new(); + if (!iobref) { + gf_msg(frame->this->name, GF_LOG_WARNING, ENOMEM, SVS_MSG_NO_MEMORY, + "failed to allocate " + "new iobref"); + goto out; + } + + if (req) { + xdr_size = xdr_sizeof(xdrproc, req); + + iobuf = iobuf_get2(ctx->iobuf_pool, xdr_size); + if (!iobuf) { + goto out; } - if (req) { - xdr_size = xdr_sizeof (xdrproc, req); - - iobuf = iobuf_get2 (ctx->iobuf_pool, xdr_size); - if (!iobuf) { - goto out; - } - - iobref_add (iobref, iobuf); + iobref_add(iobref, iobuf); - iov.iov_base = iobuf->ptr; - iov.iov_len = iobuf_pagesize (iobuf); + iov.iov_base = iobuf->ptr; + iov.iov_len = iobuf_pagesize(iobuf); - /* Create the xdr payload */ - ret = xdr_serialize_generic (iov, req, xdrproc); - if (ret == -1) { - gf_log (frame->this->name, GF_LOG_WARNING, - "Failed to create XDR payload"); - goto out; - } - iov.iov_len = ret; - count = 1; + /* Create the xdr payload */ + ret = xdr_serialize_generic(iov, req, xdrproc); + if (ret == -1) { + gf_msg(frame->this->name, GF_LOG_WARNING, 0, + SVS_MSG_XDR_PAYLOAD_FAILED, "Failed to create XDR payload"); + goto out; } + iov.iov_len = ret; + count = 1; + } - ret = rpc_clnt_submit (ctx->mgmt, prog, procnum, cbkfn, - &iov, count, - NULL, 0, iobref, frame, NULL, 0, NULL, 0, NULL); + ret = rpc_clnt_submit(ctx->mgmt, prog, procnum, cbkfn, &iov, count, NULL, 0, + iobref, frame, NULL, 0, NULL, 0, NULL); out: - if (iobref) - iobref_unref (iobref); + if (iobref) + iobref_unref(iobref); - if (iobuf) - iobuf_unref (iobuf); - return ret; + if (iobuf) + iobuf_unref(iobuf); + return ret; } - int -mgmt_get_snapinfo_cbk (struct rpc_req *req, struct iovec *iov, - int count, void *myframe) +mgmt_get_snapinfo_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) { - gf_getsnap_name_uuid_rsp rsp = {0,}; - call_frame_t *frame = NULL; - glusterfs_ctx_t *ctx = NULL; - int ret = -1; - dict_t *dict = NULL; - char key[1024] = {0}; - int snapcount = 0; - svs_private_t *priv = NULL; - xlator_t *this = NULL; - int i = 0; - int j = 0; - char *value = NULL; - snap_dirent_t *dirents = NULL; - snap_dirent_t *old_dirents = NULL; - int oldcount = 0; - - GF_VALIDATE_OR_GOTO ("snapview-server", req, error_out); - GF_VALIDATE_OR_GOTO ("snapview-server", myframe, error_out); - GF_VALIDATE_OR_GOTO ("snapview-server", iov, error_out); - - frame = myframe; - this = frame->this; - ctx = frame->this->ctx; - priv = this->private; - old_dirents = priv->dirents; - - if (!ctx) { - gf_log (frame->this->name, GF_LOG_ERROR, "NULL context"); - errno = EINVAL; - goto out; - } - - if (-1 == req->rpc_status) { - gf_log (frame->this->name, GF_LOG_ERROR, - "RPC call is not successful"); - errno = EINVAL; - goto out; - } - - ret = xdr_to_generic (*iov, &rsp, - (xdrproc_t)xdr_gf_getsnap_name_uuid_rsp); - if (ret < 0) { - gf_log (frame->this->name, GF_LOG_ERROR, - "Failed to decode xdr response, rsp.op_ret = %d", - rsp.op_ret); - goto out; - } - - if (rsp.op_ret == -1) { - errno = rsp.op_errno; - ret = -1; - goto out; + gf_getsnap_name_uuid_rsp rsp = { + 0, + }; + call_frame_t *frame = NULL; + glusterfs_ctx_t *ctx = NULL; + int ret = -1; + dict_t *dict = NULL; + char key[32] = {0}; + int len; + int snapcount = 0; + svs_private_t *priv = NULL; + xlator_t *this = NULL; + int i = 0; + int j = 0; + char *value = NULL; + snap_dirent_t *dirents = NULL; + snap_dirent_t *old_dirents = NULL; + int oldcount = 0; + + GF_VALIDATE_OR_GOTO("snapview-server", req, error_out); + GF_VALIDATE_OR_GOTO("snapview-server", myframe, error_out); + GF_VALIDATE_OR_GOTO("snapview-server", iov, error_out); + + frame = myframe; + this = frame->this; + ctx = frame->this->ctx; + priv = this->private; + + if (!ctx) { + errno = EINVAL; + gf_msg(frame->this->name, GF_LOG_ERROR, errno, SVS_MSG_NULL_CTX, + "NULL context"); + goto out; + } + + if (-1 == req->rpc_status) { + errno = EINVAL; + gf_msg(frame->this->name, GF_LOG_ERROR, errno, SVS_MSG_RPC_CALL_FAILED, + "RPC call is not successful"); + goto out; + } + + ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_getsnap_name_uuid_rsp); + if (ret < 0) { + gf_msg(frame->this->name, GF_LOG_ERROR, 0, SVS_MSG_XDR_DECODE_FAILED, + "Failed to decode xdr response, rsp.op_ret = %d", rsp.op_ret); + goto out; + } + + if (rsp.op_ret == -1) { + errno = rsp.op_errno; + ret = -1; + goto out; + } + + if (!rsp.dict.dict_len) { + ret = -1; + errno = EINVAL; + gf_msg(frame->this->name, GF_LOG_ERROR, errno, SVS_MSG_RSP_DICT_EMPTY, + "Response dict is not populated"); + goto out; + } + + dict = dict_new(); + if (!dict) { + ret = -1; + errno = ENOMEM; + goto out; + } + + ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict); + if (ret) { + errno = EINVAL; + gf_msg(frame->this->name, GF_LOG_ERROR, errno, + LG_MSG_DICT_UNSERIAL_FAILED, "Failed to unserialize dictionary"); + goto out; + } + + ret = dict_get_int32(dict, "snap-count", (int32_t *)&snapcount); + if (ret) { + errno = EINVAL; + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, errno, SVS_MSG_DICT_GET_FAILED, + "Error retrieving snapcount"); + goto out; + } + + if (snapcount > 0) { + /* first time we are fetching snap list */ + dirents = GF_CALLOC(snapcount, sizeof(snap_dirent_t), + gf_svs_mt_dirents_t); + if (!dirents) { + errno = ENOMEM; + ret = -1; + gf_msg(frame->this->name, GF_LOG_ERROR, errno, SVS_MSG_NO_MEMORY, + "Unable to allocate memory"); + goto out; } + } - if (!rsp.dict.dict_len) { - gf_log (frame->this->name, GF_LOG_ERROR, - "Response dict is not populated"); - ret = -1; - errno = EINVAL; - goto out; + for (i = 0; i < snapcount; i++) { + len = snprintf(key, sizeof(key), "snap-volname.%d", i + 1); + ret = dict_get_strn(dict, key, len, &value); + if (ret) { + errno = EINVAL; + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, errno, SVS_MSG_DICT_GET_FAILED, + "Error retrieving snap volname %d", i + 1); + goto out; } - dict = dict_new (); - if (!dict) { - ret = -1; - errno = ENOMEM; - goto out; - } + strncpy(dirents[i].snap_volname, value, + sizeof(dirents[i].snap_volname)); - ret = dict_unserialize (rsp.dict.dict_val, rsp.dict.dict_len, &dict); + len = snprintf(key, sizeof(key), "snap-id.%d", i + 1); + ret = dict_get_strn(dict, key, len, &value); if (ret) { - gf_log (frame->this->name, GF_LOG_ERROR, - "Failed to unserialize dictionary"); - errno = EINVAL; - goto out; + errno = EINVAL; + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, errno, SVS_MSG_DICT_GET_FAILED, + "Error retrieving snap uuid %d", i + 1); + goto out; } + strncpy(dirents[i].uuid, value, sizeof(dirents[i].uuid)); - ret = dict_get_int32 (dict, "snap-count", (int32_t*)&snapcount); + len = snprintf(key, sizeof(key), "snapname.%d", i + 1); + ret = dict_get_strn(dict, key, len, &value); if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "Error retrieving snapcount"); - errno = EINVAL; - ret = -1; - goto out; + errno = EINVAL; + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, errno, SVS_MSG_DICT_GET_FAILED, + "Error retrieving snap name %d", i + 1); + goto out; } - - if (snapcount > 0) { - /* first time we are fetching snap list */ - dirents = GF_CALLOC (snapcount, sizeof (snap_dirent_t), - gf_svs_mt_dirents_t); - if (!dirents) { - gf_log (frame->this->name, GF_LOG_ERROR, - "Unable to allocate memory"); - errno = ENOMEM; - ret = -1; - goto out; - } - } - - for (i = 0; i < snapcount; i++) { - snprintf (key, sizeof (key), "snap-volname.%d", i+1); - ret = dict_get_str (dict, key, &value); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "Error retrieving snap volname %d", - i+1); - errno = EINVAL; - ret = -1; - goto out; - } - - strncpy (dirents[i].snap_volname, value, - sizeof (dirents[i].snap_volname)); - - snprintf (key, sizeof (key), "snap-id.%d", i+1); - ret = dict_get_str (dict, key, &value); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "Error retrieving snap uuid %d", i+1); - errno = EINVAL; - ret = -1; - goto out; - } - strncpy (dirents[i].uuid, value, - sizeof (dirents[i].uuid)); - - snprintf (key, sizeof (key), "snapname.%d", i+1); - ret = dict_get_str (dict, key, &value); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "Error retrieving snap name %d", i+1); - errno = EINVAL; - ret = -1; - goto out; - } - strncpy (dirents[i].name, value, - sizeof (dirents[i].name)); - } - - /* - * Got the new snap list populated in dirents - * The new snap list is either a subset or a superset of - * the existing snaplist old_dirents which has priv->num_snaps - * number of entries. - * - * If subset, then clean up the fs for entries which are - * no longer relevant. - * - * For other overlapping entries set the fs for new dirents - * entries which have a fs assigned already in old_dirents - * - * We do this as we don't want to do new glfs_init()s repeatedly - * as the dirents entries for snapshot volumes get repatedly - * cleaned up and allocated. And if we don't then that will lead - * to memleaks - */ - - LOCK (&priv->snaplist_lock); - { - oldcount = priv->num_snaps; - for (i = 0; i < priv->num_snaps; i++) { - for (j = 0; j < snapcount; j++) { - if ((!strcmp (old_dirents[i].name, - dirents[j].name)) && - (!strcmp (old_dirents[i].uuid, - dirents[j].uuid))) { - dirents[j].fs = old_dirents[i].fs; - old_dirents[i].fs = NULL; - break; - } - } + strncpy(dirents[i].name, value, sizeof(dirents[i].name)); + } + + /* + * Got the new snap list populated in dirents + * The new snap list is either a subset or a superset of + * the existing snaplist old_dirents which has priv->num_snaps + * number of entries. + * + * If subset, then clean up the fs for entries which are + * no longer relevant. + * + * For other overlapping entries set the fs for new dirents + * entries which have a fs assigned already in old_dirents + * + * We do this as we don't want to do new glfs_init()s repeatedly + * as the dirents entries for snapshot volumes get repatedly + * cleaned up and allocated. And if we don't then that will lead + * to memleaks + */ + + LOCK(&priv->snaplist_lock); + { + oldcount = priv->num_snaps; + old_dirents = priv->dirents; + for (i = 0; i < priv->num_snaps; i++) { + for (j = 0; j < snapcount; j++) { + if ((!strcmp(old_dirents[i].name, dirents[j].name)) && + (!strcmp(old_dirents[i].uuid, dirents[j].uuid))) { + dirents[j].fs = old_dirents[i].fs; + old_dirents[i].fs = NULL; + break; } - - priv->dirents = dirents; - priv->num_snaps = snapcount; + } } - UNLOCK (&priv->snaplist_lock); - if (old_dirents) { - for (i = 0; i < oldcount; i++) { - if (old_dirents[i].fs) - glfs_fini (old_dirents[i].fs); - } + priv->dirents = dirents; + priv->num_snaps = snapcount; + } + UNLOCK(&priv->snaplist_lock); + + if (old_dirents) { + for (i = 0; i < oldcount; i++) { + if (old_dirents[i].fs) + gf_msg_debug(this->name, 0, + "calling glfs_fini on " + "name: %s, snap_volname: %s, uuid: %s", + old_dirents[i].name, old_dirents[i].snap_volname, + old_dirents[i].uuid); + glfs_fini(old_dirents[i].fs); } + } - GF_FREE (old_dirents); + GF_FREE(old_dirents); - ret = 0; + ret = 0; out: - if (dict) { - dict_unref (dict); - } - free (rsp.dict.dict_val); - free (rsp.op_errstr); + if (dict) { + dict_unref(dict); + } + free(rsp.dict.dict_val); + free(rsp.op_errstr); - if (ret && dirents) { - gf_log (this->name, GF_LOG_WARNING, - "Could not update dirents with refreshed snap list"); - GF_FREE (dirents); - } + if (ret && dirents) { + gf_msg(this->name, GF_LOG_WARNING, 0, SVS_MSG_SNAP_LIST_REFRESH_FAILED, + "Could not update dirents with refreshed snap list"); + GF_FREE(dirents); + } - if (myframe) - SVS_STACK_DESTROY (myframe); + if (myframe) + SVS_STACK_DESTROY(myframe); error_out: - return ret; + return ret; } int -svs_get_snapshot_list (xlator_t *this) +svs_get_snapshot_list(xlator_t *this) { - gf_getsnap_name_uuid_req req = {{0,}}; - int ret = -1; - dict_t *dict = NULL; - glusterfs_ctx_t *ctx = NULL; - call_frame_t *frame = NULL; - svs_private_t *priv = NULL; - gf_boolean_t frame_cleanup = _gf_true; - - GF_VALIDATE_OR_GOTO ("snapview-server", this, out); - - ctx = this->ctx; - if (!ctx) { - gf_log (this->name, GF_LOG_ERROR, - "ctx is NULL"); - goto out; - } - - frame = create_frame (this, ctx->pool); - if (!frame) { - gf_log (this->name, GF_LOG_ERROR, - "Error allocating frame"); - goto out; - } - - priv = this->private; - - dict = dict_new (); - if (!dict) { - gf_log (this->name, GF_LOG_ERROR, - "Error allocating dictionary"); - goto out; - } - - ret = dict_set_str (dict, "volname", priv->volname); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "Error setting volname in dict"); - goto out; - } - - ret = dict_allocate_and_serialize (dict, &req.dict.dict_val, - &req.dict.dict_len); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "Failed to serialize dictionary"); - ret = -1; - goto out; - } - - ret = svs_mgmt_submit_request (&req, frame, ctx, - &svs_clnt_handshake_prog, - GF_HNDSK_GET_SNAPSHOT_INFO, - mgmt_get_snapinfo_cbk, - (xdrproc_t)xdr_gf_getsnap_name_uuid_req); - - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "Error sending snapshot names RPC request"); - } - - frame_cleanup = _gf_false; + gf_getsnap_name_uuid_req req = {{ + 0, + }}; + int ret = -1; + dict_t *dict = NULL; + glusterfs_ctx_t *ctx = NULL; + call_frame_t *frame = NULL; + svs_private_t *priv = NULL; + gf_boolean_t frame_cleanup = _gf_true; + + GF_VALIDATE_OR_GOTO("snapview-server", this, out); + + ctx = this->ctx; + if (!ctx) { + gf_msg(this->name, GF_LOG_ERROR, 0, SVS_MSG_NULL_CTX, "ctx is NULL"); + goto out; + } + + frame = create_frame(this, ctx->pool); + if (!frame) { + gf_msg(this->name, GF_LOG_ERROR, 0, LG_MSG_FRAME_ERROR, + "Error allocating frame"); + goto out; + } + + priv = this->private; + + dict = dict_new(); + if (!dict) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, SVS_MSG_NO_MEMORY, + "Error allocating dictionary"); + goto out; + } + + ret = dict_set_str(dict, "volname", priv->volname); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SVS_MSG_DICT_SET_FAILED, + "Error setting volname in dict"); + goto out; + } + + ret = dict_allocate_and_serialize(dict, &req.dict.dict_val, + &req.dict.dict_len); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, LG_MSG_DICT_UNSERIAL_FAILED, + "Failed to serialize dictionary"); + ret = -1; + goto out; + } + + ret = svs_mgmt_submit_request( + &req, frame, ctx, &svs_clnt_handshake_prog, GF_HNDSK_GET_SNAPSHOT_INFO, + mgmt_get_snapinfo_cbk, (xdrproc_t)xdr_gf_getsnap_name_uuid_req); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SVS_MSG_RPC_REQ_FAILED, + "Error sending snapshot names RPC request"); + } + + frame_cleanup = _gf_false; out: - if (dict) { - dict_unref (dict); - } - GF_FREE (req.dict.dict_val); - - if (frame_cleanup && frame) { - /* - * Destroy the frame if we encountered an error - * Else we need to clean it up in - * mgmt_get_snapinfo_cbk - */ - SVS_STACK_DESTROY (frame); - } + if (dict) { + dict_unref(dict); + } + GF_FREE(req.dict.dict_val); + + if (frame_cleanup && frame) { + /* + * Destroy the frame if we encountered an error + * Else we need to clean it up in + * mgmt_get_snapinfo_cbk + */ + SVS_STACK_DESTROY(frame); + } - return ret; + return ret; } diff --git a/xlators/features/snapview-server/src/snapview-server.c b/xlators/features/snapview-server/src/snapview-server.c index f209edd2e94..76cccae5914 100644 --- a/xlators/features/snapview-server/src/snapview-server.c +++ b/xlators/features/snapview-server/src/snapview-server.c @@ -9,78 +9,119 @@ */ #include "snapview-server.h" #include "snapview-server-mem-types.h" -#include "compat-errno.h" +#include <glusterfs/compat-errno.h> -#include "xlator.h" +#include <glusterfs/xlator.h> #include "rpc-clnt.h" #include "xdr-generic.h" #include "protocol-common.h" -#include "syscall.h" +#include <glusterfs/syscall.h> #include <pthread.h> +#include "glfs-internal.h" + +int +gf_setcredentials(uid_t *uid, gid_t *gid, uint16_t ngrps, uint32_t *groups) +{ + int ret = 0; + + if (uid) { + ret = glfs_setfsuid(*uid); + if (ret != 0) { + gf_msg("snapview-server", GF_LOG_ERROR, 0, SVS_MSG_SETFSUID_FAIL, + "failed to set uid " + "%u in thread context", + *uid); + return ret; + } + } + if (gid) { + ret = glfs_setfsgid(*gid); + if (ret != 0) { + gf_msg("snapview-server", GF_LOG_ERROR, 0, SVS_MSG_SETFSGID_FAIL, + "failed to set gid " + "%u in thread context", + *gid); + return ret; + } + } + + if (ngrps != 0 && groups) { + ret = glfs_setfsgroups(ngrps, groups); + if (ret != 0) { + gf_msg("snapview-server", GF_LOG_ERROR, 0, SVS_MSG_SETFSGRPS_FAIL, + "failed to set " + "groups in thread context"); + return ret; + } + } + return 0; +} int32_t -svs_lookup_entry_point (xlator_t *this, loc_t *loc, inode_t *parent, - struct iatt *buf, struct iatt *postparent, - int32_t *op_errno) +svs_lookup_entry_point(xlator_t *this, loc_t *loc, inode_t *parent, + struct iatt *buf, struct iatt *postparent, + int32_t *op_errno) { - uuid_t gfid; - svs_inode_t *inode_ctx = NULL; - int op_ret = -1; - - GF_VALIDATE_OR_GOTO ("snapview-server", this, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, loc->inode, out); - GF_VALIDATE_OR_GOTO (this->name, buf, out); - GF_VALIDATE_OR_GOTO (this->name, postparent, out); - - if (gf_uuid_is_null (loc->inode->gfid)) { - gf_uuid_generate (gfid); - svs_iatt_fill (gfid, buf); - - /* Here the inode context of the entry point directory - is filled with just the type of the inode and the gfid - of the parent from where the entry point was entered. - The glfs object and the fs instance will be NULL. - */ - if (parent) - svs_iatt_fill (parent->gfid, postparent); - else { - svs_iatt_fill (buf->ia_gfid, postparent); - } + uuid_t gfid; + svs_inode_t *inode_ctx = NULL; + int op_ret = -1; + + GF_VALIDATE_OR_GOTO("snapview-server", this, out); + GF_VALIDATE_OR_GOTO(this->name, loc, out); + GF_VALIDATE_OR_GOTO(this->name, loc->inode, out); + GF_VALIDATE_OR_GOTO(this->name, buf, out); + GF_VALIDATE_OR_GOTO(this->name, postparent, out); + + if (gf_uuid_is_null(loc->inode->gfid)) { + gf_uuid_generate(gfid); + svs_iatt_fill(gfid, buf); + + /* Here the inode context of the entry point directory + is filled with just the type of the inode and the gfid + of the parent from where the entry point was entered. + The glfs object and the fs instance will be NULL. + */ + if (parent) + svs_iatt_fill(parent->gfid, postparent); + else { + svs_iatt_fill(buf->ia_gfid, postparent); + } - inode_ctx = svs_inode_ctx_get_or_new (this, loc->inode); - if (!inode_ctx) { - gf_log (this->name, GF_LOG_ERROR, "failed to " - "allocate inode context for entry point " - "directory"); - op_ret = -1; - *op_errno = ENOMEM; - goto out; - } - gf_uuid_copy (inode_ctx->pargfid, loc->pargfid); - memcpy (&inode_ctx->buf, buf, sizeof (*buf)); - inode_ctx->type = SNAP_VIEW_ENTRY_POINT_INODE; + inode_ctx = svs_inode_ctx_get_or_new(this, loc->inode); + if (!inode_ctx) { + op_ret = -1; + *op_errno = ENOMEM; + gf_msg(this->name, GF_LOG_ERROR, *op_errno, + SVS_MSG_NEW_INODE_CTX_FAILED, + "failed to " + "allocate inode context for entry point " + "directory"); + goto out; + } + + gf_uuid_copy(inode_ctx->pargfid, loc->pargfid); + memcpy(&inode_ctx->buf, buf, sizeof(*buf)); + inode_ctx->type = SNAP_VIEW_ENTRY_POINT_INODE; + } else { + inode_ctx = svs_inode_ctx_get(this, loc->inode); + if (inode_ctx) { + memcpy(buf, &inode_ctx->buf, sizeof(*buf)); + svs_iatt_fill(inode_ctx->pargfid, postparent); } else { - if (inode_ctx) { - memcpy (buf, &inode_ctx->buf, sizeof (*buf)); - svs_iatt_fill (inode_ctx->pargfid, postparent); - } else { - svs_iatt_fill (loc->inode->gfid, buf); - if (parent) - svs_iatt_fill (parent->gfid, - postparent); - else { - svs_iatt_fill (loc->inode->gfid, - postparent); - } - } + svs_iatt_fill(loc->inode->gfid, buf); + if (parent) + svs_iatt_fill(parent->gfid, postparent); + else { + svs_iatt_fill(loc->inode->gfid, postparent); + } } + } - op_ret = 0; + op_ret = 0; out: - return op_ret; + return op_ret; } /* When lookup comes from client and the protocol/server tries to resolve @@ -103,80 +144,87 @@ out: snapshot is referred and a random gfid is not generated. */ int32_t -svs_lookup_gfid (xlator_t *this, loc_t *loc, struct iatt *buf, - struct iatt *postparent, int32_t *op_errno) +svs_lookup_gfid(xlator_t *this, loc_t *loc, struct iatt *buf, + struct iatt *postparent, int32_t *op_errno) { - int32_t op_ret = -1; - unsigned char handle_obj[GFAPI_HANDLE_LENGTH] = {0, }; - glfs_t *fs = NULL; - glfs_object_t *object = NULL; - struct stat statbuf = {0, }; - svs_inode_t *inode_ctx = NULL; - - GF_VALIDATE_OR_GOTO ("snapview-server", this, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, loc->inode, out); - GF_VALIDATE_OR_GOTO (this->name, buf, out); - GF_VALIDATE_OR_GOTO (this->name, postparent, out); - - if (gf_uuid_is_null (loc->gfid) && gf_uuid_is_null (loc->inode->gfid)) { - gf_log (this->name, GF_LOG_ERROR, "gfid is NULL"); - goto out; - } - - if (!gf_uuid_is_null (loc->inode->gfid)) - memcpy (handle_obj, loc->inode->gfid, - GFAPI_HANDLE_LENGTH); - else - memcpy (handle_obj, loc->gfid, - GFAPI_HANDLE_LENGTH); - - fs = svs_get_latest_snapshot (this); - if (!fs) { - gf_log (this->name, GF_LOG_ERROR, "failed to get the latest " - "snapshot"); - op_ret = -1; - *op_errno = EINVAL; - goto out; - } - - - object = glfs_h_create_from_handle (fs, handle_obj, GFAPI_HANDLE_LENGTH, - &statbuf); - if (!object) { - gf_log (this->name, GF_LOG_ERROR, "failed to do lookup and get " - "the handle on the snapshot %s (path: %s, gfid: %s)", - loc->name, loc->path, uuid_utoa (loc->gfid)); - op_ret = -1; - *op_errno = ESTALE; - goto out; - } - - inode_ctx = svs_inode_ctx_get_or_new (this, loc->inode); - if (!inode_ctx) { - gf_log (this->name, GF_LOG_ERROR, "failed to allocate inode " - "context"); - op_ret = -1; - *op_errno = ENOMEM; - goto out; - } - - iatt_from_stat (buf, &statbuf); - if (!gf_uuid_is_null (loc->gfid)) - gf_uuid_copy (buf->ia_gfid, loc->gfid); - else - gf_uuid_copy (buf->ia_gfid, loc->inode->gfid); - - inode_ctx->type = SNAP_VIEW_VIRTUAL_INODE; - inode_ctx->fs = fs; - inode_ctx->object = object; - memcpy (&inode_ctx->buf, buf, sizeof (*buf)); - svs_iatt_fill (buf->ia_gfid, postparent); - - op_ret = 0; + int32_t op_ret = -1; + unsigned char handle_obj[GFAPI_HANDLE_LENGTH] = { + 0, + }; + glfs_t *fs = NULL; + glfs_object_t *object = NULL; + struct stat statbuf = { + 0, + }; + svs_inode_t *inode_ctx = NULL; + + GF_VALIDATE_OR_GOTO("snapview-server", this, out); + GF_VALIDATE_OR_GOTO(this->name, loc, out); + GF_VALIDATE_OR_GOTO(this->name, loc->inode, out); + GF_VALIDATE_OR_GOTO(this->name, buf, out); + GF_VALIDATE_OR_GOTO(this->name, postparent, out); + + if (gf_uuid_is_null(loc->gfid) && gf_uuid_is_null(loc->inode->gfid)) { + gf_msg(this->name, GF_LOG_ERROR, 0, SVS_MSG_NULL_GFID, "gfid is NULL"); + goto out; + } + + if (!gf_uuid_is_null(loc->inode->gfid)) + memcpy(handle_obj, loc->inode->gfid, GFAPI_HANDLE_LENGTH); + else + memcpy(handle_obj, loc->gfid, GFAPI_HANDLE_LENGTH); + + fs = svs_get_latest_snapshot(this); + if (!fs) { + op_ret = -1; + *op_errno = EINVAL; + gf_msg(this->name, GF_LOG_ERROR, *op_errno, + SVS_MSG_GET_LATEST_SNAP_FAILED, + "failed to get the latest " + "snapshot"); + goto out; + } + + object = glfs_h_create_from_handle(fs, handle_obj, GFAPI_HANDLE_LENGTH, + &statbuf); + if (!object) { + op_ret = -1; + *op_errno = ESTALE; + gf_msg(this->name, GF_LOG_ERROR, *op_errno, + SVS_MSG_GET_GLFS_H_OBJECT_FAILED, + "failed to do lookup and get " + "the handle on the snapshot %s (path: %s, gfid: %s)", + loc->name, loc->path, uuid_utoa(loc->gfid)); + goto out; + } + + inode_ctx = svs_inode_ctx_get_or_new(this, loc->inode); + if (!inode_ctx) { + op_ret = -1; + *op_errno = ENOMEM; + gf_msg(this->name, GF_LOG_ERROR, *op_errno, + SVS_MSG_NEW_INODE_CTX_FAILED, + "failed to allocate inode " + "context"); + goto out; + } + + iatt_from_stat(buf, &statbuf); + if (!gf_uuid_is_null(loc->gfid)) + gf_uuid_copy(buf->ia_gfid, loc->gfid); + else + gf_uuid_copy(buf->ia_gfid, loc->inode->gfid); + + inode_ctx->type = SNAP_VIEW_VIRTUAL_INODE; + inode_ctx->fs = fs; + inode_ctx->object = object; + memcpy(&inode_ctx->buf, buf, sizeof(*buf)); + svs_iatt_fill(buf->ia_gfid, postparent); + + op_ret = 0; out: - return op_ret; + return op_ret; } /* If the parent is an entry point inode, then create the handle for the @@ -187,149 +235,213 @@ out: parent's context */ int32_t -svs_lookup_snapshot (xlator_t *this, loc_t *loc, struct iatt *buf, - struct iatt *postparent, inode_t *parent, - svs_inode_t *parent_ctx, int32_t *op_errno) +svs_lookup_snapshot(xlator_t *this, loc_t *loc, struct iatt *buf, + struct iatt *postparent, inode_t *parent, + svs_inode_t *parent_ctx, int32_t *op_errno) { - int32_t op_ret = -1; - unsigned char handle_obj[GFAPI_HANDLE_LENGTH] = {0, }; - glfs_t *fs = NULL; - glfs_object_t *object = NULL; - struct stat statbuf = {0, }; - svs_inode_t *inode_ctx = NULL; - uuid_t gfid; - - GF_VALIDATE_OR_GOTO ("snapview-server", this, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, loc->inode, out); - GF_VALIDATE_OR_GOTO (this->name, buf, out); - GF_VALIDATE_OR_GOTO (this->name, postparent, out); - GF_VALIDATE_OR_GOTO (this->name, parent_ctx, out); - GF_VALIDATE_OR_GOTO (this->name, parent, out); - - fs = svs_initialise_snapshot_volume (this, loc->name, op_errno); - if (!fs) { - gf_log (this->name, GF_LOG_DEBUG, "failed to " - "create the fs instance for snap %s", - loc->name); - *op_errno = ENOENT; - op_ret = -1; - goto out; - } - - memcpy (handle_obj, parent_ctx->pargfid, - GFAPI_HANDLE_LENGTH); - object = glfs_h_create_from_handle (fs, handle_obj, GFAPI_HANDLE_LENGTH, - &statbuf); - if (!object) { - gf_log (this->name, GF_LOG_DEBUG, "failed to do lookup and " - "get the handle on the snapshot %s", loc->name); - op_ret = -1; - *op_errno = errno; - goto out; - } - - inode_ctx = svs_inode_ctx_get_or_new (this, loc->inode); - if (!inode_ctx) { - gf_log (this->name, GF_LOG_ERROR, "failed to " - "allocate inode context"); - op_ret = -1; - *op_errno = ENOMEM; - goto out; - } + int32_t op_ret = -1; + unsigned char handle_obj[GFAPI_HANDLE_LENGTH] = { + 0, + }; + glfs_t *fs = NULL; + glfs_object_t *object = NULL; + struct stat statbuf = { + 0, + }; + svs_inode_t *inode_ctx = NULL; + uuid_t gfid; + + GF_VALIDATE_OR_GOTO("snapview-server", this, out); + GF_VALIDATE_OR_GOTO(this->name, loc, out); + GF_VALIDATE_OR_GOTO(this->name, loc->inode, out); + GF_VALIDATE_OR_GOTO(this->name, buf, out); + GF_VALIDATE_OR_GOTO(this->name, postparent, out); + GF_VALIDATE_OR_GOTO(this->name, parent_ctx, out); + GF_VALIDATE_OR_GOTO(this->name, parent, out); + + fs = svs_initialise_snapshot_volume(this, loc->name, op_errno); + if (!fs) { + gf_msg_debug(this->name, 0, + "failed to create " + "the fs instance for snap %s", + loc->name); + *op_errno = ENOENT; + op_ret = -1; + goto out; + } + + memcpy(handle_obj, parent_ctx->pargfid, GFAPI_HANDLE_LENGTH); + object = glfs_h_create_from_handle(fs, handle_obj, GFAPI_HANDLE_LENGTH, + &statbuf); + if (!object) { + op_ret = -1; + *op_errno = errno; + /* Should this be in warning or error mode? */ + gf_msg_debug(this->name, 0, + "failed to do lookup and " + "get the handle on the snapshot %s", + loc->name); + goto out; + } + + inode_ctx = svs_inode_ctx_get_or_new(this, loc->inode); + if (!inode_ctx) { + op_ret = -1; + *op_errno = ENOMEM; + gf_msg(this->name, GF_LOG_ERROR, *op_errno, + SVS_MSG_NEW_INODE_CTX_FAILED, + "failed to allocate " + "inode context"); + goto out; + } + + if (gf_uuid_is_null(loc->gfid) && gf_uuid_is_null(loc->inode->gfid)) + gf_uuid_generate(gfid); + else { + if (!gf_uuid_is_null(loc->inode->gfid)) + gf_uuid_copy(gfid, loc->inode->gfid); + else + gf_uuid_copy(gfid, loc->gfid); + } + iatt_from_stat(buf, &statbuf); + gf_uuid_copy(buf->ia_gfid, gfid); + svs_fill_ino_from_gfid(buf); + inode_ctx->type = SNAP_VIEW_SNAPSHOT_INODE; + inode_ctx->fs = fs; + inode_ctx->object = object; + memcpy(&inode_ctx->buf, buf, sizeof(*buf)); + svs_iatt_fill(parent->gfid, postparent); + + SVS_STRDUP(inode_ctx->snapname, loc->name); + if (!inode_ctx->snapname) { + op_ret = -1; + *op_errno = ENOMEM; + goto out; + } + op_ret = 0; - if (gf_uuid_is_null (loc->gfid) && - gf_uuid_is_null (loc->inode->gfid)) - gf_uuid_generate (gfid); - else { - if (!gf_uuid_is_null (loc->inode->gfid)) - gf_uuid_copy (gfid, loc->inode->gfid); - else - gf_uuid_copy (gfid, loc->gfid); - } - iatt_from_stat (buf, &statbuf); - gf_uuid_copy (buf->ia_gfid, gfid); - svs_fill_ino_from_gfid (buf); - inode_ctx->type = SNAP_VIEW_SNAPSHOT_INODE; - inode_ctx->fs = fs; - inode_ctx->object = object; - memcpy (&inode_ctx->buf, buf, sizeof (*buf)); - svs_iatt_fill (parent->gfid, postparent); +out: + if (op_ret) { + if (object) + glfs_h_close(object); - op_ret = 0; + if (inode_ctx) + inode_ctx->object = NULL; + } -out: - return op_ret; + return op_ret; } /* Both parent and entry are from snapshot world */ int32_t -svs_lookup_entry (xlator_t *this, loc_t *loc, struct iatt *buf, - struct iatt *postparent, inode_t *parent, - svs_inode_t *parent_ctx, int32_t *op_errno) +svs_lookup_entry(xlator_t *this, loc_t *loc, struct iatt *buf, + struct iatt *postparent, inode_t *parent, + svs_inode_t *parent_ctx, int32_t *op_errno) { - int32_t op_ret = -1; - glfs_t *fs = NULL; - glfs_object_t *object = NULL; - struct stat statbuf = {0, }; - svs_inode_t *inode_ctx = NULL; - glfs_object_t *parent_object = NULL; - uuid_t gfid; - - GF_VALIDATE_OR_GOTO ("snapview-server", this, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, loc->inode, out); - GF_VALIDATE_OR_GOTO (this->name, buf, out); - GF_VALIDATE_OR_GOTO (this->name, postparent, out); - GF_VALIDATE_OR_GOTO (this->name, parent_ctx, out); - GF_VALIDATE_OR_GOTO (this->name, parent, out); - - parent_object = parent_ctx->object; - fs = parent_ctx->fs; - - object = glfs_h_lookupat (fs, parent_object, loc->name, - &statbuf, 0); - if (!object) { - gf_log (this->name, GF_LOG_DEBUG, "failed to do lookup and " - "get the handle for entry %s (path: %s)", loc->name, - loc->path); - op_ret = -1; - *op_errno = errno; - goto out; - } + int32_t op_ret = -1; + glfs_t *fs = NULL; + glfs_object_t *object = NULL; + struct stat statbuf = { + 0, + }; + svs_inode_t *inode_ctx = NULL; + glfs_object_t *parent_object = NULL; + uuid_t gfid = { + 0, + }; + + GF_VALIDATE_OR_GOTO("snapview-server", this, out); + GF_VALIDATE_OR_GOTO(this->name, loc, out); + GF_VALIDATE_OR_GOTO(this->name, loc->inode, out); + GF_VALIDATE_OR_GOTO(this->name, buf, out); + GF_VALIDATE_OR_GOTO(this->name, postparent, out); + GF_VALIDATE_OR_GOTO(this->name, parent_ctx, out); + GF_VALIDATE_OR_GOTO(this->name, parent, out); + + parent_object = parent_ctx->object; + fs = parent_ctx->fs; + + object = glfs_h_lookupat(fs, parent_object, loc->name, &statbuf, 0); + if (!object) { + /* should this be in WARNING or ERROR mode? */ + gf_msg_debug(this->name, 0, + "failed to do lookup and " + "get the handle for entry %s (path: %s)", + loc->name, loc->path); + op_ret = -1; + *op_errno = errno; + goto out; + } + + if (gf_uuid_is_null(object->gfid)) { + /* should this be in WARNING or ERROR mode? */ + gf_msg_debug(this->name, 0, + "gfid from glfs handle is " + "NULL for entry %s (path: %s)", + loc->name, loc->path); + op_ret = -1; + *op_errno = errno; + goto out; + } + + inode_ctx = svs_inode_ctx_get_or_new(this, loc->inode); + if (!inode_ctx) { + op_ret = -1; + *op_errno = ENOMEM; + gf_msg(this->name, GF_LOG_ERROR, *op_errno, + SVS_MSG_NEW_INODE_CTX_FAILED, + "failed to allocate " + "inode context"); + goto out; + } + + if (gf_uuid_is_null(loc->gfid) && gf_uuid_is_null(loc->inode->gfid)) { + if (svs_uuid_generate(this, gfid, parent_ctx->snapname, object->gfid)) { + /* + * should op_errno be something else such as + * EINVAL or ESTALE? + */ + op_ret = -1; + *op_errno = EIO; + goto out; + } + } else { + if (!gf_uuid_is_null(loc->inode->gfid)) + gf_uuid_copy(gfid, loc->inode->gfid); + else + gf_uuid_copy(gfid, loc->gfid); + } - inode_ctx = svs_inode_ctx_get_or_new (this, loc->inode); - if (!inode_ctx) { - gf_log (this->name, GF_LOG_ERROR, "failed to " - "allocate inode context"); - op_ret = -1; - *op_errno = ENOMEM; - goto out; - } + iatt_from_stat(buf, &statbuf); + gf_uuid_copy(buf->ia_gfid, gfid); + svs_fill_ino_from_gfid(buf); + inode_ctx->type = SNAP_VIEW_VIRTUAL_INODE; + inode_ctx->fs = fs; + inode_ctx->object = object; + memcpy(&inode_ctx->buf, buf, sizeof(*buf)); + svs_iatt_fill(parent->gfid, postparent); - if (gf_uuid_is_null (loc->gfid) && - gf_uuid_is_null (loc->inode->gfid)) - gf_uuid_generate (gfid); - else { - if (!gf_uuid_is_null (loc->inode->gfid)) - gf_uuid_copy (gfid, loc->inode->gfid); - else - gf_uuid_copy (gfid, loc->gfid); + if (IA_ISDIR(buf->ia_type)) { + SVS_STRDUP(inode_ctx->snapname, parent_ctx->snapname); + if (!inode_ctx->snapname) { + op_ret = -1; + *op_errno = ENOMEM; + goto out; } + } - iatt_from_stat (buf, &statbuf); - gf_uuid_copy (buf->ia_gfid, gfid); - svs_fill_ino_from_gfid (buf); - inode_ctx->type = SNAP_VIEW_VIRTUAL_INODE; - inode_ctx->fs = fs; - inode_ctx->object = object; - memcpy (&inode_ctx->buf, buf, sizeof (*buf)); - svs_iatt_fill (parent->gfid, postparent); - - op_ret = 0; + op_ret = 0; out: - return op_ret; + if (op_ret) { + if (object) + glfs_h_close(object); + + if (inode_ctx) + inode_ctx->object = NULL; + } + + return op_ret; } /* inode context is there means lookup has come on an object which was @@ -358,344 +470,352 @@ out: world */ int32_t -svs_revalidate (xlator_t *this, loc_t *loc, inode_t *parent, - svs_inode_t *inode_ctx, svs_inode_t *parent_ctx, - struct iatt *buf, struct iatt *postparent, int32_t *op_errno) +svs_revalidate(xlator_t *this, loc_t *loc, inode_t *parent, + svs_inode_t *inode_ctx, svs_inode_t *parent_ctx, + struct iatt *buf, struct iatt *postparent, int32_t *op_errno) { - int32_t op_ret = -1; - int ret = -1; - char tmp_uuid[64] = {0, }; - glfs_t *fs = NULL; - glfs_object_t *object = NULL; - - GF_VALIDATE_OR_GOTO ("snapview-server", this, out); - GF_VALIDATE_OR_GOTO (this->name, buf, out); - GF_VALIDATE_OR_GOTO (this->name, postparent, out); - GF_VALIDATE_OR_GOTO (this->name, inode_ctx, out); - - if (inode_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) { - svs_iatt_fill (loc->inode->gfid, buf); + int32_t op_ret = -1; + int ret = -1; + char tmp_uuid[64] = { + 0, + }; + glfs_t *fs = NULL; + + GF_VALIDATE_OR_GOTO("snapview-server", this, out); + GF_VALIDATE_OR_GOTO(this->name, buf, out); + GF_VALIDATE_OR_GOTO(this->name, postparent, out); + GF_VALIDATE_OR_GOTO(this->name, inode_ctx, out); + + if (inode_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) { + svs_iatt_fill(loc->inode->gfid, buf); + if (parent) + svs_iatt_fill(parent->gfid, postparent); + else + svs_iatt_fill(loc->inode->gfid, postparent); + op_ret = 0; + goto out; + } else { + /* Though fs and object are present in the inode context, its + * better to check if fs is valid or not before doing anything. + * Its for the protection from the following operations. + * 1) Create a file on the glusterfs mount point + * 2) Create a snapshot (say "snap1") + * 3) Access the contents of the snapshot + * 4) Delete the file from the mount point + * 5) Delete the snapshot "snap1" + * 6) Create a new snapshot "snap1" + * + * Now accessing the new snapshot "snap1" gives problems. + * Because the inode and dentry created for snap1 would not be + * deleted upon the deletion of the snapshot (as deletion of + * snapshot is a gluster cli operation, not a fop). So next time + * upon creation of a new snap with same name, the previous + * inode and dentry itself will be used. But the inode context + * contains old information about the glfs_t instance and the + * handle in the gfapi world. Thus the glfs_t instance should + * be checked before accessing. If its wrong, then right + * instance should be obtained by doing the lookup. + */ + if (inode_ctx->fs && inode_ctx->object) { + fs = inode_ctx->fs; + SVS_CHECK_VALID_SNAPSHOT_HANDLE(fs, this); + if (fs) { + memcpy(buf, &inode_ctx->buf, sizeof(*buf)); if (parent) - svs_iatt_fill (parent->gfid, - postparent); + svs_iatt_fill(parent->gfid, postparent); else - svs_iatt_fill (loc->inode->gfid, postparent); + svs_iatt_fill(buf->ia_gfid, postparent); op_ret = 0; goto out; - } else { - /* Though fs and object are present in the inode context, its - * better to check if fs is valid or not before doing anything. - * Its for the protection from the following operations. - * 1) Create a file on the glusterfs mount point - * 2) Create a snapshot (say "snap1") - * 3) Access the contents of the snapshot - * 4) Delete the file from the mount point - * 5) Delete the snapshot "snap1" - * 6) Create a new snapshot "snap1" - * - * Now accessing the new snapshot "snap1" gives problems. - * Because the inode and dentry created for snap1 would not be - * deleted upon the deletion of the snapshot (as deletion of - * snapshot is a gluster cli operation, not a fop). So next time - * upon creation of a new snap with same name, the previous - * inode and dentry itself will be used. But the inode context - * contains old information about the glfs_t instance and the - * handle in the gfapi world. Thus the glfs_t instance should - * be checked before accessing. If its wrong, then right - * instance should be obtained by doing the lookup. - */ - if (inode_ctx->fs && inode_ctx->object) { - fs = inode_ctx->fs; - object = inode_ctx->object; - SVS_CHECK_VALID_SNAPSHOT_HANDLE(fs, this); - if (fs) { - memcpy (buf, &inode_ctx->buf, sizeof (*buf)); - if (parent) - svs_iatt_fill (parent->gfid, - postparent); - else - svs_iatt_fill (buf->ia_gfid, - postparent); - op_ret = 0; - goto out; - } else { - inode_ctx->fs = NULL; - inode_ctx->object = NULL; - ret = svs_get_handle (this, loc, inode_ctx, - op_errno); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "failed to get the handle for " - "%s (gfid %s)", loc->path, - uuid_utoa_r (loc->inode->gfid, - tmp_uuid)); - op_ret = -1; - goto out; - } - } + } else { + inode_ctx->fs = NULL; + inode_ctx->object = NULL; + ret = svs_get_handle(this, loc, inode_ctx, op_errno); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, *op_errno, + SVS_MSG_GET_GLFS_H_OBJECT_FAILED, + "failed to get the handle for " + "%s (gfid %s)", + loc->path, uuid_utoa_r(loc->inode->gfid, tmp_uuid)); + op_ret = -1; + goto out; } + } + } - /* To send the lookup to gfapi world, both the name of the - entry as well as the parent context is needed. - */ - if (!loc->name || !parent_ctx) { - *op_errno = ESTALE; - gf_log (this->name, GF_LOG_ERROR, "%s is NULL", - loc->name?"parent context":"loc->name"); - goto out; - } + /* To send the lookup to gfapi world, both the name of the + entry as well as the parent context is needed. + */ + if (!loc->name || !parent_ctx) { + *op_errno = ESTALE; + gf_msg(this->name, GF_LOG_ERROR, *op_errno, + SVS_MSG_PARENT_CTX_OR_NAME_NULL, "%s is NULL", + loc->name ? "parent context" : "loc->name"); + goto out; + } - if (parent_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) - op_ret = svs_lookup_snapshot (this, loc, buf, - postparent, parent, - parent_ctx, op_errno); - else - op_ret = svs_lookup_entry (this, loc, buf, postparent, - parent, parent_ctx, - op_errno); + if (parent_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) + op_ret = svs_lookup_snapshot(this, loc, buf, postparent, parent, + parent_ctx, op_errno); + else + op_ret = svs_lookup_entry(this, loc, buf, postparent, parent, + parent_ctx, op_errno); - goto out; - } + goto out; + } out: - return op_ret; + return op_ret; } int32_t -svs_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +svs_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { - struct iatt buf = {0, }; - int32_t op_ret = -1; - int32_t op_errno = EINVAL; - struct iatt postparent = {0,}; - svs_inode_t *inode_ctx = NULL; - svs_inode_t *parent_ctx = NULL; - int32_t ret = -1; - svs_private_t *private = NULL; - inode_t *parent = NULL; - snap_dirent_t *dirent = NULL; - gf_boolean_t entry_point_key = _gf_false; - gf_boolean_t entry_point = _gf_false; - - GF_VALIDATE_OR_GOTO ("svs", this, out); - GF_VALIDATE_OR_GOTO (this->name, this->private, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, loc->inode, out); - - private = this->private; - - /* For lookups sent on inodes (i.e not parent inode + basename, but - direct inode itself which usually is a nameless lookup or revalidate - on the inode), loc->name will not be there. Get it from path if - it is there. - This is the difference between nameless lookup and revalidate lookup - on an inode: - nameless lookup: loc->path contains gfid and strrchr on it fails - revalidate lookup: loc->path contains the entry name of the inode - and strrchr gives the name of the entry from path - */ - if (loc->path) { - if (!loc->name || (loc->name && !strcmp (loc->name, ""))) { - loc->name = strrchr (loc->path, '/'); - if (loc->name) - loc->name++; - } - } - - if (loc->parent) - parent = inode_ref (loc->parent); - else { - parent = inode_find (loc->inode->table, loc->pargfid); - if (!parent) - parent = inode_parent (loc->inode, NULL, NULL); - } - if (parent) - parent_ctx = svs_inode_ctx_get (this, parent); - - inode_ctx = svs_inode_ctx_get (this, loc->inode); - - /* Initialize latest snapshot, which is used for nameless lookups */ - dirent = svs_get_latest_snap_entry (this); - - if (dirent && !dirent->fs) { - svs_initialise_snapshot_volume (this, dirent->name, NULL); - } - - if (xdata && !inode_ctx) { - ret = dict_get_str_boolean (xdata, "entry-point", _gf_false); - if (ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, "failed to get the " - "entry point info"); - entry_point_key = _gf_false; - } else { - entry_point_key = ret; - } - - if (loc->name && strlen (loc->name)) { - /* lookup can come with the entry-point set in the dict - * for the parent directory of the entry-point as well. - * So consider entry_point only for named lookup - */ - entry_point = entry_point_key; - } - } - - if (inode_ctx && inode_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) { - /* entry-point may not be set in the dictonary. - * This can happen if snap-view client is restarted where - * inode-ctx not available and a nameless lookup has come - */ - entry_point = _gf_true; - } - - /* lookup is on the entry point to the snapshot world */ - if (entry_point) { - op_ret = svs_lookup_entry_point (this, loc, parent, &buf, - &postparent, &op_errno); - goto out; + struct iatt buf = { + 0, + }; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + struct iatt postparent = { + 0, + }; + svs_inode_t *inode_ctx = NULL; + svs_inode_t *parent_ctx = NULL; + int32_t ret = -1; + inode_t *parent = NULL; + gf_boolean_t entry_point_key = _gf_false; + gf_boolean_t entry_point = _gf_false; + call_stack_t *root = NULL; + + GF_VALIDATE_OR_GOTO("svs", this, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, loc, out); + GF_VALIDATE_OR_GOTO(this->name, loc->inode, out); + + root = frame->root; + op_ret = gf_setcredentials(&root->uid, &root->gid, root->ngrps, + root->groups); + if (op_ret != 0) { + goto out; + } + + /* For lookups sent on inodes (i.e not parent inode + basename, but + direct inode itself which usually is a nameless lookup or revalidate + on the inode), loc->name will not be there. Get it from path if + it is there. + This is the difference between nameless lookup and revalidate lookup + on an inode: + nameless lookup: loc->path contains gfid and strrchr on it fails + revalidate lookup: loc->path contains the entry name of the inode + and strrchr gives the name of the entry from path + */ + if (loc->path) { + if (!loc->name || (loc->name && !strcmp(loc->name, ""))) { + loc->name = strrchr(loc->path, '/'); + if (loc->name) + loc->name++; + } + } + + if (loc->parent) + parent = inode_ref(loc->parent); + else { + parent = inode_find(loc->inode->table, loc->pargfid); + if (!parent) + parent = inode_parent(loc->inode, NULL, NULL); + } + if (parent) + parent_ctx = svs_inode_ctx_get(this, parent); + + inode_ctx = svs_inode_ctx_get(this, loc->inode); + + if (xdata && !inode_ctx) { + ret = dict_get_str_boolean(xdata, "entry-point", _gf_false); + if (ret == -1) { + gf_msg_debug(this->name, 0, + "failed to get the " + "entry point info"); + entry_point_key = _gf_false; + } else { + entry_point_key = ret; } - /* revalidate */ - if (inode_ctx) { - op_ret = svs_revalidate (this, loc, parent, inode_ctx, - parent_ctx, &buf, &postparent, - &op_errno); - goto out; + if (loc->name && strlen(loc->name)) { + /* lookup can come with the entry-point set in the dict + * for the parent directory of the entry-point as well. + * So consider entry_point only for named lookup + */ + entry_point = entry_point_key; } + } - /* This can happen when entry point directory is entered from non-root - directory. (ex: if /mnt/glusterfs is the mount point, then entry - point (say .snaps) is entered from /mnt/glusterfs/dir/.snaps). Also - it can happen when client sends a nameless lookup on just a gfid and - the server does not have the inode in the inode table. - */ - if (!inode_ctx && !parent_ctx) { - if (gf_uuid_is_null (loc->gfid) && - gf_uuid_is_null (loc->inode->gfid)) { - gf_log (this->name, GF_LOG_ERROR, "gfid is NULL"); - op_ret = -1; - op_errno = ESTALE; - goto out; - } - - if (!entry_point_key) { - /* This can happen when there is no inode_ctx available. - * snapview-server might have restarted or - * graph change might have happened - */ - op_ret = -1; - op_errno = ESTALE; - goto out; - } - - /* lookup is on the parent directory of entry-point. - * this would have already looked up by snap-view client - * so return success - */ - if (!gf_uuid_is_null (loc->gfid)) - gf_uuid_copy (buf.ia_gfid, loc->gfid); - else - gf_uuid_copy (buf.ia_gfid, loc->inode->gfid); + if (inode_ctx && inode_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) { + /* entry-point may not be set in the dictonary. + * This can happen if snap-view client is restarted where + * inode-ctx not available and a nameless lookup has come + */ + entry_point = _gf_true; + } + + /* lookup is on the entry point to the snapshot world */ + if (entry_point) { + op_ret = svs_lookup_entry_point(this, loc, parent, &buf, &postparent, + &op_errno); + goto out; + } + + /* revalidate */ + if (inode_ctx) { + op_ret = svs_revalidate(this, loc, parent, inode_ctx, parent_ctx, &buf, + &postparent, &op_errno); + goto out; + } + + /* This can happen when entry point directory is entered from non-root + directory. (ex: if /mnt/glusterfs is the mount point, then entry + point (say .snaps) is entered from /mnt/glusterfs/dir/.snaps). Also + it can happen when client sends a nameless lookup on just a gfid and + the server does not have the inode in the inode table. + */ + if (!inode_ctx && !parent_ctx) { + if (gf_uuid_is_null(loc->gfid) && gf_uuid_is_null(loc->inode->gfid)) { + op_ret = -1; + op_errno = ESTALE; + gf_msg_debug(this->name, 0, + "gfid is NULL. Either the lookup " + "came on missing entry or the " + "entry is stale"); + goto out; + } + + if (!entry_point_key) { + /* This can happen when there is no inode_ctx available. + * snapview-server might have restarted or + * graph change might have happened + */ + op_ret = -1; + op_errno = ESTALE; + goto out; + } + + /* lookup is on the parent directory of entry-point. + * this would have already looked up by snap-view client + * so return success + */ + if (!gf_uuid_is_null(loc->gfid)) + gf_uuid_copy(buf.ia_gfid, loc->gfid); + else + gf_uuid_copy(buf.ia_gfid, loc->inode->gfid); - svs_iatt_fill (buf.ia_gfid, &buf); - svs_iatt_fill (buf.ia_gfid, &postparent); + svs_iatt_fill(buf.ia_gfid, &buf); + svs_iatt_fill(buf.ia_gfid, &postparent); - op_ret = 0; - goto out; - } + op_ret = 0; + goto out; + } - if (parent_ctx) { - if (parent_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) - op_ret = svs_lookup_snapshot (this, loc, &buf, - &postparent, parent, - parent_ctx, &op_errno); - else - op_ret = svs_lookup_entry (this, loc, &buf, - &postparent, parent, - parent_ctx, &op_errno); - goto out; - } + if (parent_ctx) { + if (parent_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) + op_ret = svs_lookup_snapshot(this, loc, &buf, &postparent, parent, + parent_ctx, &op_errno); + else + op_ret = svs_lookup_entry(this, loc, &buf, &postparent, parent, + parent_ctx, &op_errno); + goto out; + } out: - STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, - loc?loc->inode:NULL, &buf, xdata, &postparent); + STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, + loc ? loc->inode : NULL, &buf, xdata, &postparent); - if (parent) - inode_unref (parent); + if (parent) + inode_unref(parent); - return 0; + return 0; } int32_t -svs_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, - dict_t *xdata) +svs_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, + dict_t *xdata) { - svs_inode_t *inode_ctx = NULL; - int32_t op_ret = -1; - int32_t op_errno = EINVAL; - svs_fd_t *svs_fd = NULL; - glfs_fd_t *glfd = NULL; - glfs_t *fs = NULL; - glfs_object_t *object = NULL; - - GF_VALIDATE_OR_GOTO ("snap-view-daemon", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, loc->inode, out); - - inode_ctx = svs_inode_ctx_get (this, loc->inode); - if (!inode_ctx) { - gf_log (this->name, GF_LOG_ERROR, "inode context not found " - "for the inode %s", uuid_utoa (loc->inode->gfid)); - op_ret = -1; - op_errno = ESTALE; - goto out; - } - - /* Fake success is sent if the opendir is on the entry point directory - or the inode is SNAP_VIEW_ENTRY_POINT_INODE - */ - if (inode_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) { - op_ret = 0; - op_errno = 0; - goto out; - } - else { + svs_inode_t *inode_ctx = NULL; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + svs_fd_t *svs_fd = NULL; + glfs_fd_t *glfd = NULL; + glfs_t *fs = NULL; + glfs_object_t *object = NULL; + call_stack_t *root = NULL; + + GF_VALIDATE_OR_GOTO("snap-view-daemon", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, fd, out); + GF_VALIDATE_OR_GOTO(this->name, loc, out); + GF_VALIDATE_OR_GOTO(this->name, loc->inode, out); + + root = frame->root; + op_ret = gf_setcredentials(&root->uid, &root->gid, root->ngrps, + root->groups); + if (op_ret != 0) { + goto out; + } + + inode_ctx = svs_inode_ctx_get(this, loc->inode); + if (!inode_ctx) { + op_ret = -1; + op_errno = ESTALE; + gf_msg(this->name, GF_LOG_ERROR, op_errno, + SVS_MSG_GET_INODE_CONTEXT_FAILED, + "inode context not found " + "for the inode %s", + uuid_utoa(loc->inode->gfid)); + goto out; + } + + /* Fake success is sent if the opendir is on the entry point directory + or the inode is SNAP_VIEW_ENTRY_POINT_INODE + */ + if (inode_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) { + op_ret = 0; + op_errno = 0; + goto out; + } else { + SVS_GET_INODE_CTX_INFO(inode_ctx, fs, object, this, loc, op_ret, + op_errno, out); - SVS_GET_INODE_CTX_INFO(inode_ctx, fs, object, this, loc, op_ret, - op_errno, out); - - glfd = glfs_h_opendir (fs, object); - if (!glfd) { - op_ret = -1; - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, "opendir on %s " - "failed (gfid: %s)", loc->name, - uuid_utoa (loc->inode->gfid)); - goto out; - } - svs_fd = svs_fd_ctx_get_or_new (this, fd); - if (!svs_fd) { - gf_log (this->name, GF_LOG_ERROR, "failed to allocate " - "fd context %s (gfid: %s)", loc->name, - uuid_utoa (fd->inode->gfid)); - op_ret = -1; - op_errno = ENOMEM; - glfs_closedir (glfd); - goto out; - } - svs_fd->fd = glfd; + glfd = glfs_h_opendir(fs, object); + if (!glfd) { + op_ret = -1; + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, op_errno, SVS_MSG_OPENDIR_FAILED, + "opendir on %s failed " + "(gfid: %s)", + loc->name, uuid_utoa(loc->inode->gfid)); + goto out; + } + svs_fd = svs_fd_ctx_get_or_new(this, fd); + if (!svs_fd) { + op_ret = -1; + op_errno = ENOMEM; + gf_msg(this->name, GF_LOG_ERROR, op_errno, + SVS_MSG_NEW_FD_CTX_FAILED, + "failed to allocate fd context " + "for %s (gfid: %s)", + loc->name, uuid_utoa(fd->inode->gfid)); + glfs_closedir(glfd); + goto out; + } + svs_fd->fd = glfd; - op_ret = 0; - op_errno = 0; - } + op_ret = 0; + op_errno = 0; + } out: - STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, NULL); + STACK_UNWIND_STRICT(opendir, frame, op_ret, op_errno, fd, NULL); - return 0; + return 0; } /* @@ -708,600 +828,726 @@ out: * back into the dict. But to get the values for those xattrs it has to do the * getxattr operation on each xattr which might turn out to be a costly * operation. So for each of the xattrs present in the list, a 0 byte value - * ("") is set into the dict before unwinding. This can be treated as an + * ("") is set into the dict before unwinding. Since ("") is also a valid xattr + * value(in a file system) we use an extra key in the same dictionary as an * indicator to other xlators which want to cache the xattrs (as of now, * md-cache which caches acl and selinux related xattrs) to not to cache the * values of the xattrs present in the dict. */ int32_t -svs_add_xattrs_to_dict (xlator_t *this, dict_t *dict, char *list, ssize_t size) +svs_add_xattrs_to_dict(xlator_t *this, dict_t *dict, char *list, ssize_t size) { - char keybuffer[4096] = {0,}; - size_t remaining_size = 0; - int32_t list_offset = 0; - int32_t ret = -1; - - GF_VALIDATE_OR_GOTO ("snapview-daemon", this, out); - GF_VALIDATE_OR_GOTO (this->name, dict, out); - GF_VALIDATE_OR_GOTO (this->name, list, out); - - remaining_size = size; - list_offset = 0; - while (remaining_size > 0) { - strncpy (keybuffer, list + list_offset, sizeof (keybuffer) - 1); + char keybuffer[4096] = { + 0, + }; + size_t remaining_size = 0; + int32_t list_offset = 0; + int32_t ret = -1; + + GF_VALIDATE_OR_GOTO("snapview-daemon", this, out); + GF_VALIDATE_OR_GOTO(this->name, dict, out); + GF_VALIDATE_OR_GOTO(this->name, list, out); + + remaining_size = size; + list_offset = 0; + while (remaining_size > 0) { + strncpy(keybuffer, list + list_offset, sizeof(keybuffer) - 1); #ifdef GF_DARWIN_HOST_OS - /* The protocol expect namespace for now */ - char *newkey = NULL; - gf_add_prefix (XATTR_USER_PREFIX, keybuffer, &newkey); - strcpy (keybuffer, newkey); - GF_FREE (newkey); + /* The protocol expect namespace for now */ + char *newkey = NULL; + gf_add_prefix(XATTR_USER_PREFIX, keybuffer, &newkey); + strcpy(keybuffer, newkey); + GF_FREE(newkey); #endif - ret = dict_set_str (dict, keybuffer, ""); - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, "dict set operation " - "for the key %s failed.", keybuffer); - goto out; - } + ret = dict_set_str(dict, keybuffer, ""); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, SVS_MSG_DICT_SET_FAILED, + "dict set operation " + "for the key %s failed.", + keybuffer); + goto out; + } - remaining_size -= strlen (keybuffer) + 1; - list_offset += strlen (keybuffer) + 1; - } /* while (remaining_size > 0) */ + remaining_size -= strlen(keybuffer) + 1; + list_offset += strlen(keybuffer) + 1; + } /* while (remaining_size > 0) */ - ret = 0; + /* Add an additional key to indicate that we don't need to cache these + * xattrs(with value "") */ + ret = dict_set_str(dict, "glusterfs.skip-cache", ""); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, SVS_MSG_DICT_SET_FAILED, + "dict set operation for the key glusterfs.skip-cache failed."); + goto out; + } + + ret = 0; out: - return ret; + return ret; } int32_t -svs_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name, - dict_t *xdata) +svs_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name, + dict_t *xdata) { - svs_inode_t *inode_ctx = NULL; - int32_t op_ret = -1; - int32_t op_errno = EINVAL; - glfs_t *fs = NULL; - glfs_object_t *object = NULL; - char *value = 0; - ssize_t size = 0; - dict_t *dict = NULL; - - GF_VALIDATE_OR_GOTO ("snap-view-daemon", this, out); - GF_VALIDATE_OR_GOTO ("snap-view-daemon", frame, out); - GF_VALIDATE_OR_GOTO ("snap-view-daemon", loc, out); - GF_VALIDATE_OR_GOTO ("snap-view-daemon", loc->inode, out); - - inode_ctx = svs_inode_ctx_get (this, loc->inode); - if (!inode_ctx) { - gf_log (this->name, GF_LOG_ERROR, "inode context not found " - "for the inode %s", uuid_utoa (loc->inode->gfid)); - op_ret = -1; - op_errno = ESTALE; - goto out; - } + svs_inode_t *inode_ctx = NULL; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + glfs_t *fs = NULL; + glfs_object_t *object = NULL; + char *value = 0; + ssize_t size = 0; + dict_t *dict = NULL; + call_stack_t *root = NULL; + + GF_VALIDATE_OR_GOTO("snap-view-daemon", this, out); + GF_VALIDATE_OR_GOTO("snap-view-daemon", frame, out); + GF_VALIDATE_OR_GOTO("snap-view-daemon", loc, out); + GF_VALIDATE_OR_GOTO("snap-view-daemon", loc->inode, out); + + root = frame->root; + op_ret = gf_setcredentials(&root->uid, &root->gid, root->ngrps, + root->groups); + if (op_ret != 0) { + goto out; + } + + inode_ctx = svs_inode_ctx_get(this, loc->inode); + if (!inode_ctx) { + op_ret = -1; + op_errno = ESTALE; + gf_msg(this->name, GF_LOG_ERROR, op_errno, + SVS_MSG_GET_INODE_CONTEXT_FAILED, + "inode context not found " + "for the inode %s", + uuid_utoa(loc->inode->gfid)); + goto out; + } + + /* ENODATA is sent if the getxattr is on entry point directory + or the inode is SNAP_VIEW_ENTRY_POINT_INODE. Entry point is + a virtual directory on which setxattr operations are not + allowed. If getxattr has to be faked as success, then a value + for the name of the xattr has to be sent which we don't have. + */ + if (inode_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) { + op_ret = -1; + op_errno = ENODATA; + goto out; + } else { + SVS_GET_INODE_CTX_INFO(inode_ctx, fs, object, this, loc, op_ret, + op_errno, out); - /* ENODATA is sent if the getxattr is on entry point directory - or the inode is SNAP_VIEW_ENTRY_POINT_INODE. Entry point is - a virtual directory on which setxattr operations are not - allowed. If getxattr has to be faked as success, then a value - for the name of the xattr has to be sent which we dont have. - */ - if (inode_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) { - op_ret = -1; - op_errno = ENODATA; + dict = dict_new(); + if (!dict) { + op_ret = -1; + op_errno = ENOMEM; + gf_msg(this->name, GF_LOG_ERROR, op_errno, SVS_MSG_NO_MEMORY, + "failed to allocate dict"); + goto out; + } + + size = glfs_h_getxattrs(fs, object, name, NULL, 0); + if (size == -1) { + op_ret = -1; + op_errno = errno; + if (errno == ENODATA) { + gf_msg_debug(this->name, 0, + "getxattr on " + "%s failed (ket: %s) with %s", + loc->path, name, strerror(errno)); + } else { + gf_msg(this->name, GF_LOG_ERROR, op_errno, + SVS_MSG_GETXATTR_FAILED, + "getxattr on %s failed (key: %s) with %s", loc->path, + name, strerror(errno)); + } + goto out; + } + value = GF_CALLOC(size + 1, sizeof(char), gf_common_mt_char); + if (!value) { + op_ret = -1; + op_errno = ENOMEM; + gf_msg(this->name, GF_LOG_ERROR, op_errno, SVS_MSG_NO_MEMORY, + "failed to allocate memory for getxattr " + "on %s (key: %s)", + loc->name, name); + goto out; + } + + size = glfs_h_getxattrs(fs, object, name, value, size); + if (size == -1) { + op_ret = -1; + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, op_errno, SVS_MSG_GETXATTR_FAILED, + "failed to get the xattr %s for " + "entry %s", + name, loc->name); + goto out; + } + value[size] = '\0'; + + if (name) { + op_ret = dict_set_dynptr(dict, (char *)name, value, size); + if (op_ret < 0) { + op_errno = -op_ret; + gf_msg(this->name, GF_LOG_ERROR, op_errno, + SVS_MSG_DICT_SET_FAILED, + "dict set operation for %s for " + "the key %s failed.", + loc->path, name); + GF_FREE(value); + value = NULL; goto out; + } + } else { + op_ret = svs_add_xattrs_to_dict(this, dict, value, size); + if (op_ret == -1) { + op_errno = ENOMEM; + gf_msg(this->name, GF_LOG_ERROR, op_errno, SVS_MSG_NO_MEMORY, + "failed to add xattrs from the list to " + "dict for %s (gfid: %s)", + loc->path, uuid_utoa(loc->inode->gfid)); + goto out; + } + GF_FREE(value); + value = NULL; } - else { - - SVS_GET_INODE_CTX_INFO(inode_ctx, fs, object, this, loc, op_ret, - op_errno, out); - - dict = dict_new (); - if (!dict) { - gf_log (this->name, GF_LOG_ERROR, "failed to " - "allocate dict"); - op_ret = -1; - op_errno = ENOMEM; - goto out; - } - - size = glfs_h_getxattrs (fs, object, name, NULL, 0); - if (size == -1) { - gf_log (this->name, GF_LOG_ERROR, "getxattr " - "on %s failed (key: %s)", loc->name, - name); - op_ret = -1; - op_errno = errno; - goto out; - } - value = GF_CALLOC (size + 1, sizeof (char), - gf_common_mt_char); - if (!value) { - gf_log (this->name, GF_LOG_ERROR, "failed to " - "allocate memory for getxattr on %s " - "(key: %s)", loc->name, name); - op_ret = -1; - op_errno = ENOMEM; - goto out; - } - - size = glfs_h_getxattrs (fs, object, name, value, size); - if (size == -1) { - gf_log (this->name, GF_LOG_ERROR, "failed to " - "get the xattr %s for entry %s", name, - loc->name); - op_ret = -1; - op_errno = errno; - goto out; - } - value[size] = '\0'; - - if (name) { - op_ret = dict_set_dynptr (dict, (char *)name, value, - size); - if (op_ret < 0) { - op_errno = -op_ret; - gf_log (this->name, GF_LOG_ERROR, "dict set " - "operation for %s for the key %s " - "failed.", loc->path, name); - GF_FREE (value); - value = NULL; - goto out; - } - } else { - op_ret = svs_add_xattrs_to_dict (this, dict, value, - size); - if (op_ret == -1) { - gf_log (this->name, GF_LOG_ERROR, "failed to " - "add the xattrs from the list to dict"); - op_errno = ENOMEM; - goto out; - } - GF_FREE (value); - } - } + } out: - if (op_ret) - GF_FREE (value); + if (op_ret && value) + GF_FREE(value); - STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, NULL); + STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, dict, NULL); - if (dict) - dict_unref (dict); + if (dict) + dict_unref(dict); - return 0; + return 0; } int32_t -svs_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, - dict_t *xdata) +svs_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, + dict_t *xdata) { - svs_inode_t *inode_ctx = NULL; - int32_t op_ret = -1; - int32_t op_errno = EINVAL; - char *value = 0; - ssize_t size = 0; - dict_t *dict = NULL; - svs_fd_t *sfd = NULL; - glfs_fd_t *glfd = NULL; - - GF_VALIDATE_OR_GOTO ("snap-view-daemon", this, out); - GF_VALIDATE_OR_GOTO ("snap-view-daemon", frame, out); - GF_VALIDATE_OR_GOTO ("snap-view-daemon", fd, out); - GF_VALIDATE_OR_GOTO ("snap-view-daemon", fd->inode, out); - - inode_ctx = svs_inode_ctx_get (this, fd->inode); - if (!inode_ctx) { - gf_log (this->name, GF_LOG_ERROR, "inode context not found " - "for the inode %s", uuid_utoa (fd->inode->gfid)); + svs_inode_t *inode_ctx = NULL; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + char *value = 0; + ssize_t size = 0; + dict_t *dict = NULL; + svs_fd_t *sfd = NULL; + glfs_fd_t *glfd = NULL; + + GF_VALIDATE_OR_GOTO("snap-view-daemon", this, out); + GF_VALIDATE_OR_GOTO("snap-view-daemon", frame, out); + GF_VALIDATE_OR_GOTO("snap-view-daemon", fd, out); + GF_VALIDATE_OR_GOTO("snap-view-daemon", fd->inode, out); + + inode_ctx = svs_inode_ctx_get(this, fd->inode); + if (!inode_ctx) { + op_ret = -1; + op_errno = ESTALE; + gf_msg(this->name, GF_LOG_ERROR, op_errno, + SVS_MSG_GET_INODE_CONTEXT_FAILED, + "inode context not found " + "for the inode %s", + uuid_utoa(fd->inode->gfid)); + goto out; + } + + if (!(svs_inode_ctx_glfs_mapping(this, inode_ctx))) { + op_ret = -1; + op_errno = EBADF; + gf_msg(this->name, GF_LOG_ERROR, op_errno, SVS_MSG_FS_INSTANCE_INVALID, + "glfs instance %p to which the inode %s " + "belongs to does not exist. The snapshot " + "corresponding to the instance might have" + "been deleted or deactivated", + inode_ctx->fs, uuid_utoa(fd->inode->gfid)); + goto out; + } + + sfd = svs_fd_ctx_get_or_new(this, fd); + if (!sfd) { + op_ret = -1; + op_errno = EBADFD; + gf_msg(this->name, GF_LOG_ERROR, op_errno, + SVS_MSG_GET_FD_CONTEXT_FAILED, + "failed to get the fd " + "context for %s", + uuid_utoa(fd->inode->gfid)); + goto out; + } + + glfd = sfd->fd; + /* EINVAL is sent if the getxattr is on entry point directory + or the inode is SNAP_VIEW_ENTRY_POINT_INODE. Entry point is + a virtual directory on which setxattr operations are not + allowed. If getxattr has to be faked as success, then a value + for the name of the xattr has to be sent which we don't have. + */ + if (inode_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) { + op_ret = -1; + op_errno = EINVAL; + goto out; + } else { + dict = dict_new(); + if (!dict) { + op_ret = -1; + op_errno = ENOMEM; + gf_msg(this->name, GF_LOG_ERROR, op_errno, SVS_MSG_NO_MEMORY, + "failed to allocate dict " + "(gfid: %s, key: %s)", + uuid_utoa(fd->inode->gfid), name); + goto out; + } + + if (name) { + size = glfs_fgetxattr(glfd, name, NULL, 0); + if (size == -1) { op_ret = -1; - op_errno = ESTALE; + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, op_errno, + SVS_MSG_GETXATTR_FAILED, + "getxattr on %s failed " + "(key: %s)", + uuid_utoa(fd->inode->gfid), name); goto out; - } + } + value = GF_CALLOC(size + 1, sizeof(char), gf_common_mt_char); + if (!value) { + op_ret = -1; + op_errno = ENOMEM; + gf_msg(this->name, GF_LOG_ERROR, op_errno, SVS_MSG_NO_MEMORY, + "failed to " + "allocate memory for getxattr on %s " + "(key: %s)", + uuid_utoa(fd->inode->gfid), name); + goto out; + } - sfd = svs_fd_ctx_get_or_new (this, fd); - if (!sfd) { - gf_log (this->name, GF_LOG_ERROR, "failed to get the fd " - "context for %s", uuid_utoa (fd->inode->gfid)); + size = glfs_fgetxattr(glfd, name, value, size); + if (size == -1) { op_ret = -1; - op_errno = EBADFD; + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, op_errno, + SVS_MSG_GETXATTR_FAILED, + "failed to get the xattr %s " + "for inode %s", + name, uuid_utoa(fd->inode->gfid)); goto out; - } + } + value[size] = '\0'; + + op_ret = dict_set_dynptr(dict, (char *)name, value, size); + if (op_ret < 0) { + op_errno = -op_ret; + gf_msg(this->name, GF_LOG_ERROR, op_errno, + SVS_MSG_DICT_SET_FAILED, + "dict set operation for gfid %s " + "for the key %s failed.", + uuid_utoa(fd->inode->gfid), name); + goto out; + } + } else { + size = glfs_flistxattr(glfd, NULL, 0); + if (size == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, op_errno, + SVS_MSG_LISTXATTR_FAILED, "listxattr on %s failed", + uuid_utoa(fd->inode->gfid)); + goto out; + } - glfd = sfd->fd; - /* EINVAL is sent if the getxattr is on entry point directory - or the inode is SNAP_VIEW_ENTRY_POINT_INODE. Entry point is - a virtual directory on which setxattr operations are not - allowed. If getxattr has to be faked as success, then a value - for the name of the xattr has to be sent which we dont have. - */ - if (inode_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) { + value = GF_CALLOC(size + 1, sizeof(char), gf_common_mt_char); + if (!value) { op_ret = -1; - op_errno = EINVAL; + op_errno = ENOMEM; + gf_msg(this->name, GF_LOG_ERROR, op_errno, SVS_MSG_NO_MEMORY, + "failed to " + "allocate buffer for xattr " + "list (%s)", + uuid_utoa(fd->inode->gfid)); goto out; - } - else { - dict = dict_new (); - if (!dict) { - gf_log (this->name, GF_LOG_ERROR, "failed to " - "allocate dict"); - op_ret = -1; - op_errno = ENOMEM; - goto out; - } + } - if (name) { - size = glfs_fgetxattr (glfd, name, NULL, 0); - if (size == -1) { - gf_log (this->name, GF_LOG_ERROR, "getxattr on " - "%s failed (key: %s)", - uuid_utoa (fd->inode->gfid), name); - op_ret = -1; - op_errno = errno; - goto out; - } - value = GF_CALLOC (size + 1, sizeof (char), - gf_common_mt_char); - if (!value) { - gf_log (this->name, GF_LOG_ERROR, "failed to " - "allocate memory for getxattr on %s " - "(key: %s)", - uuid_utoa (fd->inode->gfid), name); - op_ret = -1; - op_errno = ENOMEM; - goto out; - } - - size = glfs_fgetxattr (glfd, name, value, size); - if (size == -1) { - gf_log (this->name, GF_LOG_ERROR, "failed to " - "get the xattr %s for inode %s", name, - uuid_utoa (fd->inode->gfid)); - op_ret = -1; - op_errno = errno; - goto out; - } - value[size] = '\0'; - - op_ret = dict_set_dynptr (dict, (char *)name, value, - size); - if (op_ret < 0) { - op_errno = -op_ret; - gf_log (this->name, GF_LOG_ERROR, "dict set " - "operation for gfid %s for the key %s " - "failed.", - uuid_utoa (fd->inode->gfid), name); - GF_FREE (value); - goto out; - } - } else { - size = glfs_flistxattr (glfd, NULL, 0); - if (size == -1) { - gf_log (this->name, GF_LOG_ERROR, "listxattr " - "on %s failed", - uuid_utoa (fd->inode->gfid)); - goto out; - } - - value = GF_CALLOC (size + 1, sizeof (char), - gf_common_mt_char); - if (!value) { - op_ret = -1; - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_ERROR, "failed to " - "allocate buffer for xattr list (%s)", - uuid_utoa (fd->inode->gfid)); - goto out; - } - - size = glfs_flistxattr (glfd, value, size); - if (size == -1) { - op_ret = -1; - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, "listxattr " - "on %s failed", - uuid_utoa (fd->inode->gfid)); - goto out; - } - - op_ret = svs_add_xattrs_to_dict (this, dict, value, - size); - if (op_ret == -1) { - gf_log (this->name, GF_LOG_ERROR, "failed to " - "add the xattrs from the list to dict"); - op_errno = ENOMEM; - goto out; - } - GF_FREE (value); - } + size = glfs_flistxattr(glfd, value, size); + if (size == -1) { + op_ret = -1; + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, op_errno, + SVS_MSG_LISTXATTR_FAILED, "listxattr on %s failed", + uuid_utoa(fd->inode->gfid)); + goto out; + } - op_ret = 0; - op_errno = 0; + op_ret = svs_add_xattrs_to_dict(this, dict, value, size); + if (op_ret == -1) { + op_errno = ENOMEM; + gf_msg(this->name, GF_LOG_ERROR, op_errno, SVS_MSG_NO_MEMORY, + "failed to add xattrs from the list " + "to dict (gfid: %s)", + uuid_utoa(fd->inode->gfid)); + goto out; + } + GF_FREE(value); } + op_ret = 0; + op_errno = 0; + } + out: - if (op_ret) - GF_FREE (value); + if (op_ret) + GF_FREE(value); - STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, NULL); + STACK_UNWIND_STRICT(fgetxattr, frame, op_ret, op_errno, dict, NULL); - if (dict) - dict_unref (dict); + if (dict) + dict_unref(dict); - return 0; + return 0; } int32_t -svs_releasedir (xlator_t *this, fd_t *fd) +svs_releasedir(xlator_t *this, fd_t *fd) { - svs_fd_t *sfd = NULL; - uint64_t tmp_pfd = 0; - int ret = 0; - - GF_VALIDATE_OR_GOTO ("snapview-server", this, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - - ret = fd_ctx_del (fd, this, &tmp_pfd); - if (ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, - "pfd from fd=%p is NULL", fd); - goto out; - } - - sfd = (svs_fd_t *)(long)tmp_pfd; - if (sfd->fd) { - ret = glfs_closedir (sfd->fd); + svs_fd_t *sfd = NULL; + uint64_t tmp_pfd = 0; + int ret = 0; + svs_inode_t *svs_inode = NULL; + glfs_t *fs = NULL; + inode_t *inode = NULL; + + GF_VALIDATE_OR_GOTO("snapview-server", this, out); + GF_VALIDATE_OR_GOTO(this->name, fd, out); + + ret = fd_ctx_del(fd, this, &tmp_pfd); + if (ret < 0) { + gf_msg_debug(this->name, 0, "pfd from fd=%p is NULL", fd); + goto out; + } + + inode = fd->inode; + + svs_inode = svs_inode_ctx_get(this, inode); + if (svs_inode) { + fs = svs_inode->fs; /* should inode->lock be held for this? */ + SVS_CHECK_VALID_SNAPSHOT_HANDLE(fs, this); + if (fs) { + sfd = (svs_fd_t *)(long)tmp_pfd; + if (sfd->fd) { + ret = glfs_closedir(sfd->fd); if (ret) - gf_log (this->name, GF_LOG_WARNING, "failed to close " - "the glfd for directory %s", - uuid_utoa (fd->inode->gfid)); + gf_msg(this->name, GF_LOG_WARNING, errno, + SVS_MSG_RELEASEDIR_FAILED, + "failed to close the glfd for " + "directory %s", + uuid_utoa(fd->inode->gfid)); + } } + } - GF_FREE (sfd); + GF_FREE(sfd); out: - return 0; + return 0; } int32_t -svs_flush (call_frame_t *frame, xlator_t *this, - fd_t *fd, dict_t *xdata) +svs_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { - int32_t op_ret = -1; - int32_t op_errno = 0; - int ret = -1; - uint64_t value = 0; - svs_inode_t *inode_ctx = NULL; - - GF_VALIDATE_OR_GOTO ("snapview-server", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - - inode_ctx = svs_inode_ctx_get (this, fd->inode); - if (!inode_ctx) { - gf_log (this->name, GF_LOG_ERROR, "inode context not found for" - " the inode %s", uuid_utoa (fd->inode->gfid)); - op_ret = -1; - op_errno = EINVAL; - goto out; - } - - ret = fd_ctx_get (fd, this, &value); - if (ret < 0 && inode_ctx->type != SNAP_VIEW_ENTRY_POINT_INODE) { - op_errno = EINVAL; - gf_log (this->name, GF_LOG_WARNING, - "pfd is NULL on fd=%p", fd); - goto out; - } - - op_ret = 0; + int32_t op_ret = -1; + int32_t op_errno = 0; + int ret = -1; + uint64_t value = 0; + svs_inode_t *inode_ctx = NULL; + call_stack_t *root = NULL; + + GF_VALIDATE_OR_GOTO("snapview-server", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, fd, out); + + root = frame->root; + op_ret = gf_setcredentials(&root->uid, &root->gid, root->ngrps, + root->groups); + if (op_ret != 0) { + goto out; + } + + inode_ctx = svs_inode_ctx_get(this, fd->inode); + if (!inode_ctx) { + op_ret = -1; + op_errno = EINVAL; + gf_msg(this->name, GF_LOG_ERROR, op_errno, + SVS_MSG_GET_INODE_CONTEXT_FAILED, + "inode context not found for" + " the inode %s", + uuid_utoa(fd->inode->gfid)); + goto out; + } + + ret = fd_ctx_get(fd, this, &value); + if (ret < 0 && inode_ctx->type != SNAP_VIEW_ENTRY_POINT_INODE) { + op_errno = EINVAL; + gf_msg(this->name, GF_LOG_WARNING, op_errno, + SVS_MSG_GET_FD_CONTEXT_FAILED, "pfd is NULL on fd=%p", fd); + goto out; + } + + op_ret = 0; out: - STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, NULL); + STACK_UNWIND_STRICT(flush, frame, op_ret, op_errno, NULL); - return 0; + return 0; } int32_t -svs_release (xlator_t *this, fd_t *fd) +svs_release(xlator_t *this, fd_t *fd) { - svs_fd_t *sfd = NULL; - uint64_t tmp_pfd = 0; - int ret = 0; - - GF_VALIDATE_OR_GOTO ("snapview-server", this, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - - ret = fd_ctx_del (fd, this, &tmp_pfd); - if (ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, - "pfd from fd=%p is NULL", fd); - goto out; - } - - sfd = (svs_fd_t *)(long)tmp_pfd; - if (sfd->fd) { - ret = glfs_close (sfd->fd); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "failed to close " - "the glfd for %s", - uuid_utoa (fd->inode->gfid)); - } + svs_fd_t *sfd = NULL; + uint64_t tmp_pfd = 0; + int ret = 0; + inode_t *inode = NULL; + svs_inode_t *svs_inode = NULL; + glfs_t *fs = NULL; + + GF_VALIDATE_OR_GOTO("snapview-server", this, out); + GF_VALIDATE_OR_GOTO(this->name, fd, out); + + ret = fd_ctx_del(fd, this, &tmp_pfd); + if (ret < 0) { + gf_msg_debug(this->name, 0, "pfd from fd=%p is NULL", fd); + goto out; + } + + inode = fd->inode; + + svs_inode = svs_inode_ctx_get(this, inode); + if (svs_inode) { + fs = svs_inode->fs; /* should inode->lock be held for this? */ + SVS_CHECK_VALID_SNAPSHOT_HANDLE(fs, this); + if (fs) { + sfd = (svs_fd_t *)(long)tmp_pfd; + if (sfd->fd) { + ret = glfs_close(sfd->fd); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, errno, + SVS_MSG_RELEASE_FAILED, + "failed to close " + "the glfd for %s", + uuid_utoa(fd->inode->gfid)); + } } + } - GF_FREE (sfd); + GF_FREE(sfd); out: - return 0; + return 0; } int32_t -svs_forget (xlator_t *this, inode_t *inode) +svs_forget(xlator_t *this, inode_t *inode) { - int ret = -1; - uint64_t value = 0; - svs_inode_t *inode_ctx = NULL; - - GF_VALIDATE_OR_GOTO ("snapview-server", this, out); - GF_VALIDATE_OR_GOTO (this->name, inode, out); - - ret = inode_ctx_del (inode, this, &value); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "failed to delte the inode " - "context of %s", uuid_utoa (inode->gfid)); - goto out; - } - - inode_ctx = (svs_inode_t *)value; - - if (inode_ctx->object) - glfs_h_close (inode_ctx->object); - - GF_FREE (inode_ctx); + int ret = -1; + uint64_t value = 0; + svs_inode_t *inode_ctx = NULL; + + GF_VALIDATE_OR_GOTO("snapview-server", this, out); + GF_VALIDATE_OR_GOTO(this->name, inode, out); + + ret = inode_ctx_del(inode, this, &value); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SVS_MSG_DELETE_INODE_CONTEXT_FAILED, + "failed to delete the inode " + "context of %s", + uuid_utoa(inode->gfid)); + goto out; + } + + inode_ctx = (svs_inode_t *)(uintptr_t)value; + if (!inode_ctx) + goto out; + + if (inode_ctx->snapname) + GF_FREE(inode_ctx->snapname); + + /* + * glfs_h_close leads to unref and forgetting of the + * underlying inode in the gfapi world. i.e. the inode + * which inode_ctx->object points to. + * As of now the only possibility is, this forget came as a + * result of snapdaemon's inode table reaching the lru + * limit and receiving forget as a result of purging of + * extra inodes that exceeded the limit. But, care must + * be taken to ensure that, the gfapi instance to which + * the glfs_h_object belongs to is not deleted. Otherwise + * this might result in access of a freed pointer. + * This will still be helpful in reducing the memory + * footprint of snapdaemon when the fs instance itself is + * valid (i.e. present and not destroyed due to either snap + * deactivate or snap delete), but the lru limit is reached. + * The forget due to lru limit will make the underlying inode + * being unrefed and forgotten. + */ + if (svs_inode_ctx_glfs_mapping(this, inode_ctx)) { + glfs_h_close(inode_ctx->object); + inode_ctx->object = NULL; + } + GF_FREE(inode_ctx); out: - return 0; + return 0; } int -svs_fill_readdir (xlator_t *this, gf_dirent_t *entries, size_t size, off_t off) +svs_fill_readdir(xlator_t *this, gf_dirent_t *entries, size_t size, off_t off) { - gf_dirent_t *entry = NULL; - svs_private_t *priv = NULL; - int i = 0; - snap_dirent_t *dirents = NULL; - int this_size = 0; - int filled_size = 0; - int count = 0; - - GF_VALIDATE_OR_GOTO ("snap-view-daemon", this, out); - GF_VALIDATE_OR_GOTO ("snap-view-daemon", entries, out); - - priv = this->private; - GF_ASSERT (priv); - - /* create the dir entries */ - LOCK (&priv->snaplist_lock); - { - dirents = priv->dirents; - - for (i = off; i < priv->num_snaps; ) { - this_size = sizeof (gf_dirent_t) + - strlen (dirents[i].name) + 1; - if (this_size + filled_size > size ) - goto unlock; - - entry = gf_dirent_for_name (dirents[i].name); - if (!entry) { - gf_log (this->name, GF_LOG_ERROR, - "failed to allocate dentry for %s", - dirents[i].name); - goto unlock; - } - - entry->d_off = i + 1; - /* - * readdir on the entry-point directory to the snapshot - * world, will return elements in the list of the - * snapshots as the directory entries. Since the entries - * returned are virtual entries which does not exist - * physically on the disk, pseudo inode numbers are - * generated. - */ - entry->d_ino = i + 2*42; - entry->d_type = DT_DIR; - list_add_tail (&entry->list, &entries->list); - ++i; - count++; - filled_size += this_size; - } - } + gf_dirent_t *entry = NULL; + svs_private_t *priv = NULL; + int i = 0; + snap_dirent_t *dirents = NULL; + int this_size = 0; + int filled_size = 0; + int count = 0; + + GF_VALIDATE_OR_GOTO("snap-view-daemon", this, out); + GF_VALIDATE_OR_GOTO("snap-view-daemon", entries, out); + + priv = this->private; + GF_ASSERT(priv); + + /* create the dir entries */ + LOCK(&priv->snaplist_lock); + { + dirents = priv->dirents; + + for (i = off; i < priv->num_snaps;) { + this_size = sizeof(gf_dirent_t) + strlen(dirents[i].name) + 1; + if (this_size + filled_size > size) + goto unlock; + + entry = gf_dirent_for_name(dirents[i].name); + if (!entry) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, SVS_MSG_NO_MEMORY, + "failed to allocate dentry for %s", dirents[i].name); + goto unlock; + } + + entry->d_off = i + 1; + /* + * readdir on the entry-point directory to the snapshot + * world, will return elements in the list of the + * snapshots as the directory entries. Since the entries + * returned are virtual entries which does not exist + * physically on the disk, pseudo inode numbers are + * generated. + */ + entry->d_ino = i + 2 * 42; + entry->d_type = DT_DIR; + list_add_tail(&entry->list, &entries->list); + ++i; + count++; + filled_size += this_size; + } + } unlock: - UNLOCK (&priv->snaplist_lock); + UNLOCK(&priv->snaplist_lock); out: - return count; + return count; } int32_t -svs_glfs_readdir (xlator_t *this, glfs_fd_t *glfd, gf_dirent_t *entries, - int32_t *op_errno, struct iatt *buf, gf_boolean_t readdirplus, - size_t size) +svs_glfs_readdir(xlator_t *this, glfs_fd_t *glfd, gf_dirent_t *entries, + int32_t *op_errno, struct iatt *buf, gf_boolean_t readdirplus, + size_t size) { - int filled_size = 0; - int this_size = 0; - int32_t ret = -1; - int32_t count = 0; - gf_dirent_t *entry = NULL; - struct dirent *dirents = NULL; - struct dirent de = {0, }; - struct stat statbuf = {0, }; - off_t in_case = -1; - - GF_VALIDATE_OR_GOTO ("svs", this, out); - GF_VALIDATE_OR_GOTO (this->name, glfd, out); - GF_VALIDATE_OR_GOTO (this->name, entries, out); - - while (filled_size < size) { - in_case = glfs_telldir (glfd); - if (in_case == -1) { - gf_log (this->name, GF_LOG_ERROR, "telldir failed"); - break; - } - - if (readdirplus) - ret = glfs_readdirplus_r (glfd, &statbuf, &de, - &dirents); - else - ret = glfs_readdir_r (glfd, &de, &dirents); - - if (ret == 0 && dirents != NULL) { - if (readdirplus) - this_size = max (sizeof (gf_dirent_t), - sizeof (gfs3_dirplist)) - + strlen (de.d_name) + 1; - else - this_size = sizeof (gf_dirent_t) - + strlen (de.d_name) + 1; - - if (this_size + filled_size > size) { - glfs_seekdir (glfd, in_case); - break; - } - - entry = gf_dirent_for_name (de.d_name); - if (!entry) { - gf_log (this->name, GF_LOG_ERROR, - "could not create gf_dirent " - "for entry %s: (%s)", - entry->d_name, - strerror (errno)); - break; - } - entry->d_off = glfs_telldir (glfd); - entry->d_ino = de.d_ino; - entry->d_type = de.d_type; - if (readdirplus) { - iatt_from_stat (buf, &statbuf); - entry->d_stat = *buf; - } - list_add_tail (&entry->list, &entries->list); - - filled_size += this_size; - count++; - } else if (ret == 0 && dirents == NULL) { - *op_errno = ENOENT; - break; - } else if (ret != 0) { - *op_errno = errno; - break; - } - dirents = NULL; - ret = -1; - } + int filled_size = 0; + int this_size = 0; + int32_t ret = -1; + int32_t count = 0; + gf_dirent_t *entry = NULL; + struct dirent *dirents = NULL; + struct dirent de = { + 0, + }; + struct stat statbuf = { + 0, + }; + off_t in_case = -1; + + GF_VALIDATE_OR_GOTO("svs", this, out); + GF_VALIDATE_OR_GOTO(this->name, glfd, out); + GF_VALIDATE_OR_GOTO(this->name, entries, out); + + while (filled_size < size) { + in_case = glfs_telldir(glfd); + if (in_case == -1) { + gf_msg(this->name, GF_LOG_ERROR, errno, SVS_MSG_TELLDIR_FAILED, + "telldir failed"); + break; + } + + if (readdirplus) + ret = glfs_readdirplus_r(glfd, &statbuf, &de, &dirents); + else + ret = glfs_readdir_r(glfd, &de, &dirents); + + if (ret == 0 && dirents != NULL) { + if (readdirplus) + this_size = max(sizeof(gf_dirent_t), sizeof(gfs3_dirplist)) + + strlen(de.d_name) + 1; + else + this_size = sizeof(gf_dirent_t) + strlen(de.d_name) + 1; + + if (this_size + filled_size > size) { + glfs_seekdir(glfd, in_case); + break; + } + + entry = gf_dirent_for_name(de.d_name); + if (!entry) { + /* + * Since gf_dirent_for_name can return + * NULL only when it fails to allocate + * memory for the directory entry, + * SVS_MSG_NO_MEMORY is used as the + * message-id. + */ + gf_msg(this->name, GF_LOG_ERROR, errno, SVS_MSG_NO_MEMORY, + "could not create gf_dirent " + "for entry %s: (%s)", + entry->d_name, strerror(errno)); + break; + } + entry->d_off = glfs_telldir(glfd); + entry->d_ino = de.d_ino; + entry->d_type = de.d_type; + if (readdirplus) { + iatt_from_stat(buf, &statbuf); + entry->d_stat = *buf; + } + list_add_tail(&entry->list, &entries->list); + + filled_size += this_size; + count++; + } else if (ret == 0 && dirents == NULL) { + *op_errno = ENOENT; + break; + } else if (ret != 0) { + *op_errno = errno; + break; + } + dirents = NULL; + } out: - return count; + return count; } /* readdirp can be of 2 types. @@ -1316,80 +1562,97 @@ out: numbers will be newly generated and filled in. */ void -svs_readdirp_fill (xlator_t *this, inode_t *parent, svs_inode_t *parent_ctx, - gf_dirent_t *entry) +svs_readdirp_fill(xlator_t *this, inode_t *parent, svs_inode_t *parent_ctx, + gf_dirent_t *entry) { - inode_t *inode = NULL; - uuid_t random_gfid = {0,}; - struct iatt buf = {0, }; - svs_inode_t *inode_ctx = NULL; - - GF_VALIDATE_OR_GOTO ("snapview-server", this, out); - GF_VALIDATE_OR_GOTO (this->name, parent, out); - GF_VALIDATE_OR_GOTO (this->name, parent_ctx, out); - GF_VALIDATE_OR_GOTO (this->name, entry, out); - - if (!strcmp (entry->d_name, ".") || !strcmp (entry->d_name, "..")) - goto out; - - inode = inode_grep (parent->table, parent, entry->d_name); - if (inode) { - entry->inode = inode; - inode_ctx = svs_inode_ctx_get (this, inode); - if (!inode_ctx) { - gf_uuid_copy (buf.ia_gfid, inode->gfid); - svs_iatt_fill (inode->gfid, &buf); - buf.ia_type = inode->ia_type; - } else { - buf = inode_ctx->buf; - } - - entry->d_ino = buf.ia_ino; - - if (parent_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) - entry->d_stat = buf; - else { - entry->d_stat.ia_ino = buf.ia_ino; - gf_uuid_copy (entry->d_stat.ia_gfid, buf.ia_gfid); - } + inode_t *inode = NULL; + uuid_t random_gfid = { + 0, + }; + struct iatt buf = { + 0, + }; + svs_inode_t *inode_ctx = NULL; + + GF_VALIDATE_OR_GOTO("snapview-server", this, out); + GF_VALIDATE_OR_GOTO(this->name, parent, out); + GF_VALIDATE_OR_GOTO(this->name, parent_ctx, out); + GF_VALIDATE_OR_GOTO(this->name, entry, out); + + if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) + goto out; + + inode = inode_grep(parent->table, parent, entry->d_name); + if (inode) { + entry->inode = inode; + inode_ctx = svs_inode_ctx_get(this, inode); + if (!inode_ctx) { + gf_uuid_copy(buf.ia_gfid, inode->gfid); + svs_iatt_fill(inode->gfid, &buf); + buf.ia_type = inode->ia_type; } else { - inode = inode_new (parent->table); - entry->inode = inode; - gf_uuid_generate (random_gfid); - gf_uuid_copy (buf.ia_gfid, random_gfid); - svs_fill_ino_from_gfid (&buf); - entry->d_ino = buf.ia_ino; - - /* If inode context allocation fails, then do not send the - inode for that particular entry as part of readdirp - response. Fuse and protocol/server will link the inodes - in readdirp only if the entry contains inode in it. - */ - inode_ctx = svs_inode_ctx_get_or_new (this, inode); - if (!inode_ctx) { - gf_log (this->name, GF_LOG_ERROR, "failed to allocate " - "inode context for %s", entry->d_name); - inode_unref (entry->inode); - entry->inode = NULL; - goto out; - } + buf = inode_ctx->buf; + } + entry->d_ino = buf.ia_ino; - if (parent_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) { - buf.ia_type = IA_IFDIR; - inode_ctx->buf = buf; - entry->d_stat = buf; - inode_ctx->type = SNAP_VIEW_SNAPSHOT_INODE; - } else { - gf_uuid_copy (entry->d_stat.ia_gfid, buf.ia_gfid); - entry->d_stat.ia_ino = buf.ia_ino; - inode_ctx->buf = entry->d_stat; - inode_ctx->type = SNAP_VIEW_VIRTUAL_INODE; - } + if (parent_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) + entry->d_stat = buf; + else { + entry->d_stat.ia_ino = buf.ia_ino; + gf_uuid_copy(entry->d_stat.ia_gfid, buf.ia_gfid); } + } else { + if (parent_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) { + inode = inode_new(parent->table); + entry->inode = inode; + + /* If inode context allocation fails, then do not send + * the inode for that particular entry as part of + * readdirp response. Fuse and protocol/server will link + * the inodes in readdirp only if the entry contains + * inode in it. + */ + inode_ctx = svs_inode_ctx_get_or_new(this, inode); + if (!inode_ctx) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, SVS_MSG_NO_MEMORY, + "failed to allocate inode " + "context for %s", + entry->d_name); + inode_unref(entry->inode); + entry->inode = NULL; + goto out; + } + + /* Generate virtual gfid for SNAPSHOT dir and + * update the statbuf + */ + gf_uuid_generate(random_gfid); + gf_uuid_copy(buf.ia_gfid, random_gfid); + svs_fill_ino_from_gfid(&buf); + buf.ia_type = IA_IFDIR; + entry->d_ino = buf.ia_ino; + entry->d_stat = buf; + inode_ctx->buf = buf; + inode_ctx->type = SNAP_VIEW_SNAPSHOT_INODE; + } else { + /* For files under snapshot world do not set + * entry->inode and reset statbuf (except ia_ino), + * so that FUSE/Kernel will send an explicit lookup. + * entry->d_stat contains the statbuf information + * of original file, so for NFS not to cache this + * information and to send explicit lookup, it is + * required to reset the statbuf. + * Virtual gfid for these files will be generated in the + * first lookup. + */ + buf.ia_ino = entry->d_ino; + entry->d_stat = buf; + } + } out: - return; + return; } /* In readdirp, though new inode is created along with the generation of @@ -1401,151 +1664,176 @@ out: and is filled in when lookup comes on that object. */ int32_t -svs_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t off, dict_t *dict) +svs_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *dict) { - gf_dirent_t entries; - gf_dirent_t *entry = NULL; - struct iatt buf = {0, }; - int count = 0; - int op_ret = -1; - int op_errno = EINVAL; - svs_inode_t *parent_ctx = NULL; - svs_fd_t *svs_fd = NULL; - - GF_VALIDATE_OR_GOTO ("snap-view-daemon", this, unwind); - GF_VALIDATE_OR_GOTO (this->name, frame, unwind); - GF_VALIDATE_OR_GOTO (this->name, fd, unwind); - GF_VALIDATE_OR_GOTO (this->name, fd->inode, unwind); - - INIT_LIST_HEAD (&entries.list); - - parent_ctx = svs_inode_ctx_get (this, fd->inode); - if (!parent_ctx) { - gf_log (this->name, GF_LOG_ERROR, "failed to get the inode " - "context for %s", uuid_utoa (fd->inode->gfid)); - op_ret = -1; - op_errno = EINVAL; - goto unwind; + gf_dirent_t entries; + gf_dirent_t *entry = NULL; + struct iatt buf = { + 0, + }; + int count = 0; + int op_ret = -1; + int op_errno = EINVAL; + svs_inode_t *parent_ctx = NULL; + svs_fd_t *svs_fd = NULL; + call_stack_t *root = NULL; + + GF_VALIDATE_OR_GOTO("snap-view-daemon", this, unwind); + GF_VALIDATE_OR_GOTO(this->name, frame, unwind); + GF_VALIDATE_OR_GOTO(this->name, fd, unwind); + GF_VALIDATE_OR_GOTO(this->name, fd->inode, unwind); + + INIT_LIST_HEAD(&entries.list); + + root = frame->root; + op_ret = gf_setcredentials(&root->uid, &root->gid, root->ngrps, + root->groups); + if (op_ret != 0) { + goto unwind; + } + + parent_ctx = svs_inode_ctx_get(this, fd->inode); + if (!parent_ctx) { + op_ret = -1; + op_errno = EINVAL; + gf_msg(this->name, GF_LOG_ERROR, op_errno, + SVS_MSG_GET_INODE_CONTEXT_FAILED, + "failed to get the inode " + "context for %s", + uuid_utoa(fd->inode->gfid)); + goto unwind; + } + + if (parent_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) { + LOCK(&fd->lock); + { + count = svs_fill_readdir(this, &entries, size, off); } + UNLOCK(&fd->lock); - if (parent_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) { - LOCK (&fd->lock); - { - count = svs_fill_readdir (this, &entries, size, off); - } - UNLOCK (&fd->lock); - - op_ret = count; - - list_for_each_entry (entry, &entries.list, list) { - svs_readdirp_fill (this, fd->inode, parent_ctx, entry); - } + op_ret = count; - goto unwind; - } else { - svs_fd = svs_fd_ctx_get_or_new (this, fd); - if (!svs_fd) { - gf_log (this->name, GF_LOG_ERROR, "failed to get the " - "fd context %s", uuid_utoa (fd->inode->gfid)); - op_ret = -1; - op_errno = EBADFD; - goto unwind; - } + list_for_each_entry(entry, &entries.list, list) + { + svs_readdirp_fill(this, fd->inode, parent_ctx, entry); + } - glfs_seekdir (svs_fd->fd, off); + goto unwind; + } else { + svs_fd = svs_fd_ctx_get_or_new(this, fd); + if (!svs_fd) { + op_ret = -1; + op_errno = EBADFD; + gf_msg(this->name, GF_LOG_ERROR, op_errno, + SVS_MSG_GET_FD_CONTEXT_FAILED, + "failed to get the fd context " + "for the inode %s", + uuid_utoa(fd->inode->gfid)); + goto unwind; + } - LOCK (&fd->lock); - { - count = svs_glfs_readdir (this, svs_fd->fd, &entries, - &op_errno, &buf, _gf_true, - size); - } - UNLOCK (&fd->lock); + glfs_seekdir(svs_fd->fd, off); - op_ret = count; + LOCK(&fd->lock); + { + count = svs_glfs_readdir(this, svs_fd->fd, &entries, &op_errno, + &buf, _gf_true, size); + } + UNLOCK(&fd->lock); - list_for_each_entry (entry, &entries.list, list) { - svs_readdirp_fill (this, fd->inode, parent_ctx, entry); - } + op_ret = count; - goto unwind; + list_for_each_entry(entry, &entries.list, list) + { + svs_readdirp_fill(this, fd->inode, parent_ctx, entry); } + goto unwind; + } + unwind: - STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, &entries, dict); + STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, &entries, dict); - gf_dirent_free (&entries); + gf_dirent_free(&entries); - return 0; + return 0; } int32_t -svs_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t off, dict_t *xdata) +svs_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *xdata) { - svs_private_t *priv = NULL; - gf_dirent_t entries = {{{0, }, }, }; - int count = 0; - svs_inode_t *inode_ctx = NULL; - int op_errno = EINVAL; - int op_ret = -1; - svs_fd_t *svs_fd = NULL; - glfs_fd_t *glfd = NULL; - - INIT_LIST_HEAD (&entries.list); - - GF_VALIDATE_OR_GOTO ("snap-view-server", this, unwind); - GF_VALIDATE_OR_GOTO (this->name, frame, unwind); - GF_VALIDATE_OR_GOTO (this->name, fd, unwind); - GF_VALIDATE_OR_GOTO (this->name, fd->inode, unwind); - - priv = this->private; - - inode_ctx = svs_inode_ctx_get (this, fd->inode); - if (!inode_ctx) { - gf_log (this->name, GF_LOG_ERROR, "inode context not found in " - "the inode %s", uuid_utoa (fd->inode->gfid)); - op_ret = -1; - op_errno = EINVAL; - goto unwind; + gf_dirent_t entries = { + { + { + 0, + }, + }, + }; + int count = 0; + svs_inode_t *inode_ctx = NULL; + int op_errno = EINVAL; + int op_ret = -1; + svs_fd_t *svs_fd = NULL; + glfs_fd_t *glfd = NULL; + + INIT_LIST_HEAD(&entries.list); + + GF_VALIDATE_OR_GOTO("snap-view-server", this, unwind); + GF_VALIDATE_OR_GOTO(this->name, frame, unwind); + GF_VALIDATE_OR_GOTO(this->name, fd, unwind); + GF_VALIDATE_OR_GOTO(this->name, fd->inode, unwind); + + inode_ctx = svs_inode_ctx_get(this, fd->inode); + if (!inode_ctx) { + op_ret = -1; + op_errno = EINVAL; + gf_msg(this->name, GF_LOG_ERROR, op_errno, + SVS_MSG_GET_INODE_CONTEXT_FAILED, + "inode context not found in " + "the inode %s", + uuid_utoa(fd->inode->gfid)); + goto unwind; + } + + if (inode_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) { + LOCK(&fd->lock); + { + count = svs_fill_readdir(this, &entries, size, off); + } + UNLOCK(&fd->lock); + } else { + svs_fd = svs_fd_ctx_get_or_new(this, fd); + if (!svs_fd) { + op_ret = -1; + op_errno = EBADFD; + gf_msg(this->name, GF_LOG_ERROR, op_errno, + SVS_MSG_GET_FD_CONTEXT_FAILED, + "failed to get the fd " + "context for %s", + uuid_utoa(fd->inode->gfid)); + goto unwind; } - if (inode_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) { - LOCK (&fd->lock); - { - count = svs_fill_readdir (this, &entries, size, off); - } - UNLOCK (&fd->lock); - } else { - svs_fd = svs_fd_ctx_get_or_new (this, fd); - if (!svs_fd) { - gf_log (this->name, GF_LOG_ERROR, "failed to get the " - "fd context %s", uuid_utoa (fd->inode->gfid)); - op_ret = -1; - op_errno = EBADFD; - goto unwind; - } - - glfd = svs_fd->fd; + glfd = svs_fd->fd; - LOCK (&fd->lock); - { - count = svs_glfs_readdir (this, glfd, &entries, - &op_errno, NULL, _gf_false, - size); - } - UNLOCK (&fd->lock); + LOCK(&fd->lock); + { + count = svs_glfs_readdir(this, glfd, &entries, &op_errno, NULL, + _gf_false, size); } + UNLOCK(&fd->lock); + } - op_ret = count; + op_ret = count; unwind: - STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, &entries, xdata); + STACK_UNWIND_STRICT(readdir, frame, op_ret, op_errno, &entries, xdata); - gf_dirent_free (&entries); + gf_dirent_free(&entries); - return 0; + return 0; } /* @@ -1583,11 +1871,11 @@ unwind: * the gfid it got from NFS client, for which it was not able to find the right * inode. So snapview-server was able to get the fs instance (glfs_t) of the * snapshot volume to which the entry belongs to, and the handle for the entry - * from the corresponding snapshot volume and fill those informations in the + * from the corresponding snapshot volume and fill those information in the * inode context. * * But now, since NFS server is able to find the inode from the inode table for - * the gfid it got from the NFS client, it wont send lookup. Rather it directly + * the gfid it got from the NFS client, it won't send lookup. Rather it directly * sends the fop it received from the client. Now this causes problems for * snapview-server. Because for each fop snapview-server assumes that lookup has * been performed on that entry and the entry's inode context contains the @@ -1601,693 +1889,832 @@ unwind: */ int32_t -svs_get_handle (xlator_t *this, loc_t *loc, svs_inode_t *inode_ctx, - int32_t *op_errno) +svs_get_handle(xlator_t *this, loc_t *loc, svs_inode_t *inode_ctx, + int32_t *op_errno) { - svs_inode_t *parent_ctx = NULL; - int ret = -1; - inode_t *parent = NULL; - struct iatt postparent = {0, }; - struct iatt buf = {0, }; - char uuid1[64]; - - GF_VALIDATE_OR_GOTO ("snap-view-daemon", this, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, loc->inode, out); - - if (loc->path) { - if (!loc->name || (loc->name && !strcmp (loc->name, ""))) { - loc->name = strrchr (loc->path, '/'); - if (loc->name) - loc->name++; - } - } - - if (loc->parent) - parent = inode_ref (loc->parent); - else { - parent = inode_find (loc->inode->table, loc->pargfid); - if (!parent) - parent = inode_parent (loc->inode, NULL, NULL); - } - - if (parent) - parent_ctx = svs_inode_ctx_get (this, parent); - - if (!parent_ctx) { - gf_log (this->name, GF_LOG_WARNING, "failed to get the parent " - "context for %s (%s)", loc->path, - uuid_utoa_r (loc->inode->gfid, uuid1)); - *op_errno = EINVAL; - goto out; - } - - if (parent_ctx) { - if (parent_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) - ret = svs_lookup_snapshot (this, loc, &buf, - &postparent, parent, - parent_ctx, op_errno); - else - ret = svs_lookup_entry (this, loc, &buf, - &postparent, parent, - parent_ctx, op_errno); - } + svs_inode_t *parent_ctx = NULL; + int ret = -1; + inode_t *parent = NULL; + struct iatt postparent = { + 0, + }; + struct iatt buf = { + 0, + }; + char uuid1[64]; + + GF_VALIDATE_OR_GOTO("snap-view-daemon", this, out); + GF_VALIDATE_OR_GOTO(this->name, loc, out); + GF_VALIDATE_OR_GOTO(this->name, loc->inode, out); + + if (loc->path) { + if (!loc->name || (loc->name && !strcmp(loc->name, ""))) { + loc->name = strrchr(loc->path, '/'); + if (loc->name) + loc->name++; + } + } + + if (loc->parent) + parent = inode_ref(loc->parent); + else { + parent = inode_find(loc->inode->table, loc->pargfid); + if (!parent) + parent = inode_parent(loc->inode, NULL, NULL); + } + + if (parent) + parent_ctx = svs_inode_ctx_get(this, parent); + + if (!parent_ctx) { + *op_errno = EINVAL; + gf_msg(this->name, GF_LOG_WARNING, *op_errno, + SVS_MSG_GET_INODE_CONTEXT_FAILED, + "failed to get the parent " + "context for %s (%s)", + loc->path, uuid_utoa_r(loc->inode->gfid, uuid1)); + goto out; + } + + if (parent_ctx) { + if (parent_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) + ret = svs_lookup_snapshot(this, loc, &buf, &postparent, parent, + parent_ctx, op_errno); + else + ret = svs_lookup_entry(this, loc, &buf, &postparent, parent, + parent_ctx, op_errno); + } out: - if (parent) - inode_unref (parent); + if (parent) + inode_unref(parent); - return ret; + return ret; } int32_t -svs_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +svs_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { - svs_private_t *priv = NULL; - struct iatt buf = {0, }; - int32_t op_errno = EINVAL; - int32_t op_ret = -1; - svs_inode_t *inode_ctx = NULL; - glfs_t *fs = NULL; - glfs_object_t *object = NULL; - struct stat stat = {0, }; - int ret = -1; - - GF_VALIDATE_OR_GOTO ("snap-view-daemon", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, loc->inode, out); - - priv = this->private; - - /* Instead of doing the check of whether it is a entry point directory - or not by checking the name of the entry and then deciding what - to do, just check the inode context and decide what to be done. - */ - - inode_ctx = svs_inode_ctx_get (this, loc->inode); - if (!inode_ctx) { - gf_log (this->name, GF_LOG_ERROR, "inode context not found for" - " %s", uuid_utoa (loc->inode->gfid)); - op_ret = -1; - op_errno = EINVAL; - goto out; - } - - if (inode_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) { - svs_iatt_fill (loc->inode->gfid, &buf); - op_ret = 0; - } - else { - - SVS_GET_INODE_CTX_INFO(inode_ctx, fs, object, this, loc, op_ret, - op_errno, out); - - ret = glfs_h_stat (fs, object, &stat); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "glfs_h_stat on %s " - "(gfid: %s) failed", loc->name, - uuid_utoa (loc->inode->gfid)); - op_ret = -1; - op_errno = errno; - goto out; - } + struct iatt buf = { + 0, + }; + int32_t op_errno = EINVAL; + int32_t op_ret = -1; + svs_inode_t *inode_ctx = NULL; + glfs_t *fs = NULL; + glfs_object_t *object = NULL; + struct stat stat = { + 0, + }; + int ret = -1; + call_stack_t *root = NULL; + + GF_VALIDATE_OR_GOTO("snap-view-daemon", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, loc, out); + GF_VALIDATE_OR_GOTO(this->name, loc->inode, out); + + root = frame->root; + op_ret = gf_setcredentials(&root->uid, &root->gid, root->ngrps, + root->groups); + if (op_ret != 0) { + goto out; + } + + /* Instead of doing the check of whether it is a entry point directory + or not by checking the name of the entry and then deciding what + to do, just check the inode context and decide what to be done. + */ + + inode_ctx = svs_inode_ctx_get(this, loc->inode); + if (!inode_ctx) { + op_ret = -1; + op_errno = EINVAL; + gf_msg(this->name, GF_LOG_ERROR, op_errno, + SVS_MSG_GET_INODE_CONTEXT_FAILED, + "inode context not found for %s", uuid_utoa(loc->inode->gfid)); + goto out; + } + + if (inode_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) { + svs_iatt_fill(loc->inode->gfid, &buf); + op_ret = 0; + } else { + SVS_GET_INODE_CTX_INFO(inode_ctx, fs, object, this, loc, op_ret, + op_errno, out); - iatt_from_stat (&buf, &stat); - gf_uuid_copy (buf.ia_gfid, loc->inode->gfid); - svs_fill_ino_from_gfid (&buf); - op_ret = ret; - } + ret = glfs_h_stat(fs, object, &stat); + if (ret) { + op_ret = -1; + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, op_errno, SVS_MSG_STAT_FAILED, + "glfs_h_stat on %s (gfid: %s) " + "failed", + loc->name, uuid_utoa(loc->inode->gfid)); + goto out; + } else + gf_msg_debug(this->name, 0, "stat on %s (%s) successful", loc->path, + uuid_utoa(loc->inode->gfid)); + + iatt_from_stat(&buf, &stat); + gf_uuid_copy(buf.ia_gfid, loc->inode->gfid); + svs_fill_ino_from_gfid(&buf); + op_ret = ret; + } out: - STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, &buf, xdata); - return 0; + STACK_UNWIND_STRICT(stat, frame, op_ret, op_errno, &buf, xdata); + return 0; } int32_t -svs_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) +svs_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { - svs_private_t *priv = NULL; - struct iatt buf = {0, }; - int32_t op_errno = EINVAL; - int32_t op_ret = -1; - svs_inode_t *inode_ctx = NULL; - struct stat stat = {0, }; - int ret = -1; - glfs_fd_t *glfd = NULL; - svs_fd_t *sfd = NULL; - - GF_VALIDATE_OR_GOTO ("snap-view-daemon", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - GF_VALIDATE_OR_GOTO (this->name, fd->inode, out); - - priv = this->private; - - /* Instead of doing the check of whether it is a entry point directory - or not by checking the name of the entry and then deciding what - to do, just check the inode context and decide what to be done. - */ - - inode_ctx = svs_inode_ctx_get (this, fd->inode); - if (!inode_ctx) { - gf_log (this->name, GF_LOG_ERROR, "inode context not found for" - " the inode %s", uuid_utoa (fd->inode->gfid)); - op_ret = -1; - op_errno = EINVAL; - goto out; + struct iatt buf = { + 0, + }; + int32_t op_errno = EINVAL; + int32_t op_ret = -1; + svs_inode_t *inode_ctx = NULL; + struct stat stat = { + 0, + }; + int ret = -1; + glfs_fd_t *glfd = NULL; + svs_fd_t *sfd = NULL; + call_stack_t *root = NULL; + + GF_VALIDATE_OR_GOTO("snap-view-daemon", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, fd, out); + GF_VALIDATE_OR_GOTO(this->name, fd->inode, out); + + /* Instead of doing the check of whether it is a entry point directory + or not by checking the name of the entry and then deciding what + to do, just check the inode context and decide what to be done. + */ + + root = frame->root; + op_ret = gf_setcredentials(&root->uid, &root->gid, root->ngrps, + root->groups); + if (op_ret != 0) { + goto out; + } + + inode_ctx = svs_inode_ctx_get(this, fd->inode); + if (!inode_ctx) { + op_ret = -1; + op_errno = EINVAL; + gf_msg(this->name, GF_LOG_ERROR, op_errno, + SVS_MSG_GET_INODE_CONTEXT_FAILED, + "inode context not found for" + " the inode %s", + uuid_utoa(fd->inode->gfid)); + goto out; + } + + if (inode_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) { + svs_iatt_fill(fd->inode->gfid, &buf); + op_ret = 0; + } else { + if (!(svs_inode_ctx_glfs_mapping(this, inode_ctx))) { + op_ret = -1; + op_errno = EBADF; + gf_msg(this->name, GF_LOG_ERROR, op_errno, + SVS_MSG_FS_INSTANCE_INVALID, + "glfs instance %p to which the inode %s " + "belongs to does not exist. That snapshot " + "corresponding to the fs instance " + "might have been deleted or deactivated.", + inode_ctx->fs, uuid_utoa(fd->inode->gfid)); + goto out; + } + + sfd = svs_fd_ctx_get_or_new(this, fd); + if (!sfd) { + op_ret = -1; + op_errno = EBADFD; + gf_msg(this->name, GF_LOG_ERROR, op_errno, + SVS_MSG_GET_FD_CONTEXT_FAILED, + "failed to get the fd context " + "for %s", + uuid_utoa(fd->inode->gfid)); + goto out; } - if (inode_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) { - svs_iatt_fill (fd->inode->gfid, &buf); - op_ret = 0; + glfd = sfd->fd; + ret = glfs_fstat(glfd, &stat); + if (ret) { + op_ret = -1; + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, op_errno, SVS_MSG_STAT_FAILED, + "glfs_fstat on gfid: %s failed", uuid_utoa(fd->inode->gfid)); + goto out; } - else { - sfd = svs_fd_ctx_get_or_new (this, fd); - if (!sfd) { - gf_log (this->name, GF_LOG_ERROR, "failed to get the " - "fd context for %s", - uuid_utoa (fd->inode->gfid)); - op_ret = -1; - op_errno = EBADFD; - goto out; - } - - glfd = sfd->fd; - ret = glfs_fstat (glfd, &stat); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "glfs_fstat on " - "gfid: %s failed", uuid_utoa (fd->inode->gfid)); - op_ret = -1; - op_errno = errno; - goto out; - } - iatt_from_stat (&buf, &stat); - gf_uuid_copy (buf.ia_gfid, fd->inode->gfid); - svs_fill_ino_from_gfid (&buf); - op_ret = ret; - } + iatt_from_stat(&buf, &stat); + gf_uuid_copy(buf.ia_gfid, fd->inode->gfid); + svs_fill_ino_from_gfid(&buf); + op_ret = ret; + } out: - STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, &buf, xdata); - return 0; + STACK_UNWIND_STRICT(fstat, frame, op_ret, op_errno, &buf, xdata); + return 0; } int32_t -svs_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +svs_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { - svs_private_t *priv = NULL; - struct statvfs buf = {0, }; - int32_t op_errno = EINVAL; - int32_t op_ret = -1; - svs_inode_t *inode_ctx = NULL; - glfs_t *fs = NULL; - glfs_object_t *object = NULL; - int ret = -1; - - GF_VALIDATE_OR_GOTO ("snap-view-daemon", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, loc->inode, out); - - priv = this->private; - - /* Instead of doing the check of whether it is a entry point directory - or not by checking the name of the entry and then deciding what - to do, just check the inode context and decide what to be done. - */ - inode_ctx = svs_inode_ctx_get (this, loc->inode); - if (!inode_ctx) { - gf_log (this->name, GF_LOG_ERROR, "inode context not found for" - " %s", uuid_utoa (loc->inode->gfid)); - op_ret = -1; - op_errno = EINVAL; - goto out; - } - - SVS_GET_INODE_CTX_INFO(inode_ctx, fs, object, this, loc, op_ret, - op_errno, out); - - ret = glfs_h_statfs (fs, object, &buf); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "glfs_h_statvfs on %s " - "(gfid: %s) failed", loc->name, - uuid_utoa (loc->inode->gfid)); - op_ret = -1; - op_errno = errno; - goto out; - } - op_ret = ret; + struct statvfs buf = { + 0, + }; + int32_t op_errno = EINVAL; + int32_t op_ret = -1; + svs_inode_t *inode_ctx = NULL; + glfs_t *fs = NULL; + glfs_object_t *object = NULL; + int ret = -1; + call_stack_t *root = NULL; + + GF_VALIDATE_OR_GOTO("snap-view-daemon", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, loc, out); + GF_VALIDATE_OR_GOTO(this->name, loc->inode, out); + + root = frame->root; + op_ret = gf_setcredentials(&root->uid, &root->gid, root->ngrps, + root->groups); + if (op_ret != 0) { + goto out; + } + + /* Instead of doing the check of whether it is a entry point directory + or not by checking the name of the entry and then deciding what + to do, just check the inode context and decide what to be done. + */ + inode_ctx = svs_inode_ctx_get(this, loc->inode); + if (!inode_ctx) { + op_ret = -1; + op_errno = EINVAL; + gf_msg(this->name, GF_LOG_ERROR, op_errno, + SVS_MSG_GET_INODE_CONTEXT_FAILED, + "inode context not found for %s", uuid_utoa(loc->inode->gfid)); + goto out; + } + + SVS_GET_INODE_CTX_INFO(inode_ctx, fs, object, this, loc, op_ret, op_errno, + out); + + ret = glfs_h_statfs(fs, object, &buf); + if (ret) { + op_ret = -1; + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, op_errno, SVS_MSG_STATFS_FAILED, + "glfs_h_statvfs on %s (gfid: %s) " + "failed", + loc->name, uuid_utoa(loc->inode->gfid)); + goto out; + } + op_ret = ret; out: - STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, &buf, xdata); - return 0; + STACK_UNWIND_STRICT(statfs, frame, op_ret, op_errno, &buf, xdata); + return 0; } - int32_t -svs_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - fd_t *fd, dict_t *xdata) +svs_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + fd_t *fd, dict_t *xdata) { - svs_inode_t *inode_ctx = NULL; - svs_fd_t *sfd = NULL; - int32_t op_ret = -1; - int32_t op_errno = EINVAL; - glfs_fd_t *glfd = NULL; - glfs_t *fs = NULL; - glfs_object_t *object = NULL; - - - GF_VALIDATE_OR_GOTO ("snap-view-daemon", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, loc->inode, out); - - inode_ctx = svs_inode_ctx_get (this, loc->inode); - if (!inode_ctx) { - gf_log (this->name, GF_LOG_ERROR, "inode context for %s " - "(gfid: %s) not found", loc->name, - uuid_utoa (loc->inode->gfid)); - goto out; - } - - if (inode_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) - GF_ASSERT (0); // on entry point it should always be opendir - - SVS_GET_INODE_CTX_INFO(inode_ctx, fs, object, this, loc, op_ret, - op_errno, out); - - glfd = glfs_h_open (fs, object, flags); - if (!glfd) { - gf_log (this->name, GF_LOG_ERROR, "glfs_h_open on %s failed " - "(gfid: %s)", loc->name, uuid_utoa (loc->inode->gfid)); - op_ret = -1; - op_errno = errno; - goto out; - } - - sfd = svs_fd_ctx_get_or_new (this, fd); - if (!sfd) { - gf_log (this->name, GF_LOG_ERROR, "failed to allocate fd " - "context for %s (gfid: %s)", loc->name, - uuid_utoa (loc->inode->gfid)); - op_ret = -1; - op_errno = ENOMEM; - glfs_close (glfd); - goto out; - } - sfd->fd = glfd; - - op_ret = 0; + svs_inode_t *inode_ctx = NULL; + svs_fd_t *sfd = NULL; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + glfs_fd_t *glfd = NULL; + glfs_t *fs = NULL; + glfs_object_t *object = NULL; + call_stack_t *root = NULL; + + GF_VALIDATE_OR_GOTO("snap-view-daemon", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, fd, out); + GF_VALIDATE_OR_GOTO(this->name, loc, out); + GF_VALIDATE_OR_GOTO(this->name, loc->inode, out); + + root = frame->root; + + inode_ctx = svs_inode_ctx_get(this, loc->inode); + if (!inode_ctx) { + gf_msg(this->name, GF_LOG_ERROR, op_errno, + SVS_MSG_GET_INODE_CONTEXT_FAILED, + "inode context for %s (gfid: %s) " + "not found", + loc->name, uuid_utoa(loc->inode->gfid)); + goto out; + } + + if (inode_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) + GF_ASSERT(0); // on entry point it should always be opendir + + SVS_GET_INODE_CTX_INFO(inode_ctx, fs, object, this, loc, op_ret, op_errno, + out); + + op_ret = gf_setcredentials(&root->uid, &root->gid, root->ngrps, + root->groups); + if (op_ret != 0) { + goto out; + } + + glfd = glfs_h_open(fs, object, flags); + if (!glfd) { + op_ret = -1; + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, op_errno, SVS_MSG_OPEN_FAILED, + "glfs_h_open on %s failed (gfid: %s)", loc->name, + uuid_utoa(loc->inode->gfid)); + goto out; + } + + sfd = svs_fd_ctx_get_or_new(this, fd); + if (!sfd) { + op_ret = -1; + op_errno = ENOMEM; + gf_msg(this->name, GF_LOG_ERROR, op_errno, SVS_MSG_NO_MEMORY, + "failed to allocate fd context " + "for %s (gfid: %s)", + loc->name, uuid_utoa(loc->inode->gfid)); + glfs_close(glfd); + goto out; + } + sfd->fd = glfd; + + op_ret = 0; out: - STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, NULL); - return 0; + STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, NULL); + return 0; } int32_t -svs_readv (call_frame_t *frame, xlator_t *this, - fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata) +svs_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) { - int32_t op_ret = -1; - int32_t op_errno = 0; - svs_private_t *priv = NULL; - struct iobuf *iobuf = NULL; - struct iobref *iobref = NULL; - struct iovec vec = {0,}; - svs_fd_t *sfd = NULL; - int ret = -1; - struct stat fstatbuf = {0, }; - glfs_fd_t *glfd = NULL; - struct iatt stbuf = {0, }; - - GF_VALIDATE_OR_GOTO ("snap-view-daemon", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - GF_VALIDATE_OR_GOTO (this->name, fd->inode, out); - - priv = this->private; - VALIDATE_OR_GOTO (priv, out); - - sfd = svs_fd_ctx_get_or_new (this, fd); - if (!sfd) { - gf_log (this->name, GF_LOG_ERROR, "failed to get the fd " - "context for %s", uuid_utoa (fd->inode->gfid)); - op_ret = -1; - op_errno = EBADFD; - goto out; - } - - glfd = sfd->fd; - - iobuf = iobuf_get2 (this->ctx->iobuf_pool, size); - if (!iobuf) { - op_ret = -1; - op_errno = ENOMEM; - goto out; - } - - ret = glfs_pread (glfd, iobuf->ptr, size, offset, 0); - if (ret < 0) { - op_ret = -1; - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, "glfs_read failed (%s)", - strerror (op_errno)); - goto out; - } - - vec.iov_base = iobuf->ptr; - vec.iov_len = ret; - - iobref = iobref_new (); - - iobref_add (iobref, iobuf); - - ret = glfs_fstat (glfd, &fstatbuf); - if (ret) { - op_ret = -1; - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, "glfs_fstat failed after " - "readv on %s", uuid_utoa (fd->inode->gfid)); - goto out; - } - - iatt_from_stat (&stbuf, &fstatbuf); - gf_uuid_copy (stbuf.ia_gfid, fd->inode->gfid); - svs_fill_ino_from_gfid (&stbuf); - - /* Hack to notify higher layers of EOF. */ - if (!stbuf.ia_size || (offset + vec.iov_len) >= stbuf.ia_size) - op_errno = ENOENT; - - op_ret = vec.iov_len; + int32_t op_ret = -1; + int32_t op_errno = 0; + svs_private_t *priv = NULL; + struct iobuf *iobuf = NULL; + struct iobref *iobref = NULL; + struct iovec vec = { + 0, + }; + svs_fd_t *sfd = NULL; + int ret = -1; + struct glfs_stat fstatbuf = { + 0, + }; + glfs_fd_t *glfd = NULL; + struct iatt stbuf = { + 0, + }; + call_stack_t *root = NULL; + + GF_VALIDATE_OR_GOTO("snap-view-daemon", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, fd, out); + GF_VALIDATE_OR_GOTO(this->name, fd->inode, out); + + priv = this->private; + VALIDATE_OR_GOTO(priv, out); + + root = frame->root; + op_ret = gf_setcredentials(&root->uid, &root->gid, root->ngrps, + root->groups); + if (op_ret != 0) { + goto out; + } + + if (!svs_inode_glfs_mapping(this, fd->inode)) { + op_ret = -1; + op_errno = EBADF; /* should this be some other error? */ + gf_msg(this->name, GF_LOG_ERROR, op_errno, SVS_MSG_FS_INSTANCE_INVALID, + "glfs instance to which the inode " + "%s receiving read request belongs, " + "does not exist anymore", + uuid_utoa(fd->inode->gfid)); + goto out; + } + + sfd = svs_fd_ctx_get_or_new(this, fd); + if (!sfd) { + op_ret = -1; + op_errno = EBADFD; + gf_msg(this->name, GF_LOG_ERROR, op_errno, + SVS_MSG_GET_INODE_CONTEXT_FAILED, + "failed to get the fd " + "context for %s", + uuid_utoa(fd->inode->gfid)); + goto out; + } + + glfd = sfd->fd; + + iobuf = iobuf_get2(this->ctx->iobuf_pool, size); + if (!iobuf) { + op_ret = -1; + op_errno = ENOMEM; + gf_msg(this->name, GF_LOG_ERROR, op_errno, SVS_MSG_NO_MEMORY, + "failed to " + "allocate iobuf while reading the " + "file with gfid %s", + uuid_utoa(fd->inode->gfid)); + goto out; + } + + ret = glfs_pread(glfd, iobuf->ptr, size, offset, 0, &fstatbuf); + if (ret < 0) { + op_ret = -1; + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, op_errno, SVS_MSG_READ_FAILED, + "glfs_read failed on %s (%s)", uuid_utoa(fd->inode->gfid), + strerror(op_errno)); + goto out; + } + + vec.iov_base = iobuf->ptr; + vec.iov_len = ret; + + iobref = iobref_new(); + + iobref_add(iobref, iobuf); + glfs_iatt_from_statx(&stbuf, &fstatbuf); + gf_uuid_copy(stbuf.ia_gfid, fd->inode->gfid); + svs_fill_ino_from_gfid(&stbuf); + + /* Hack to notify higher layers of EOF. */ + if (!stbuf.ia_size || (offset + vec.iov_len) >= stbuf.ia_size) + op_errno = ENOENT; + + op_ret = vec.iov_len; out: - STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, - &vec, 1, &stbuf, iobref, NULL); + STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, &vec, 1, &stbuf, iobref, + NULL); - if (iobref) - iobref_unref (iobref); - if (iobuf) - iobuf_unref (iobuf); + if (iobref) + iobref_unref(iobref); + if (iobuf) + iobuf_unref(iobuf); - return 0; + return 0; } int32_t -svs_readlink (call_frame_t *frame, xlator_t *this, - loc_t *loc, size_t size, dict_t *xdata) +svs_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size, + dict_t *xdata) { - svs_inode_t *inode_ctx = NULL; - glfs_t *fs = NULL; - glfs_object_t *object = NULL; - int op_ret = -1; - int op_errno = EINVAL; - char *buf = NULL; - struct iatt stbuf = {0, }; - int ret = -1; - struct stat stat = {0, }; - - GF_VALIDATE_OR_GOTO ("snap-view-daemon", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, loc->inode, out); - - inode_ctx = svs_inode_ctx_get (this, loc->inode); - if (!inode_ctx) { - gf_log (this->name, GF_LOG_ERROR, "failed to get inode context " - "for %s (gfid: %s)", loc->name, - uuid_utoa (loc->inode->gfid)); - op_ret = -1; - op_errno = EINVAL; - goto out; - } - - SVS_GET_INODE_CTX_INFO(inode_ctx, fs, object, this, loc, op_ret, - op_errno, out); - - ret = glfs_h_stat (fs, object, &stat); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "glfs_h_stat on %s " - "(gfid: %s) failed", loc->name, - uuid_utoa (loc->inode->gfid)); - op_ret = -1; - op_errno = errno; - goto out; - } - - iatt_from_stat (&stbuf, &stat); - gf_uuid_copy (stbuf.ia_gfid, loc->inode->gfid); - svs_fill_ino_from_gfid (&stbuf); - - buf = alloca (size + 1); - op_ret = glfs_h_readlink (fs, object, buf, size); - if (op_ret == -1) { - gf_log (this->name, GF_LOG_ERROR, "readlink on %s failed " - "(gfid: %s)", loc->name, uuid_utoa (loc->inode->gfid)); - op_errno = errno; - goto out; - } - - buf[op_ret] = 0; + svs_inode_t *inode_ctx = NULL; + glfs_t *fs = NULL; + glfs_object_t *object = NULL; + int op_ret = -1; + int op_errno = EINVAL; + char *buf = NULL; + struct iatt stbuf = { + 0, + }; + int ret = -1; + struct stat stat = { + 0, + }; + call_stack_t *root = NULL; + + GF_VALIDATE_OR_GOTO("snap-view-daemon", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, loc, out); + GF_VALIDATE_OR_GOTO(this->name, loc->inode, out); + + root = frame->root; + op_ret = gf_setcredentials(&root->uid, &root->gid, root->ngrps, + root->groups); + if (op_ret != 0) { + goto out; + } + + inode_ctx = svs_inode_ctx_get(this, loc->inode); + if (!inode_ctx) { + op_ret = -1; + op_errno = EINVAL; + gf_msg(this->name, GF_LOG_ERROR, op_errno, + SVS_MSG_GET_INODE_CONTEXT_FAILED, + "failed to get inode context " + "for %s (gfid: %s)", + loc->name, uuid_utoa(loc->inode->gfid)); + goto out; + } + + SVS_GET_INODE_CTX_INFO(inode_ctx, fs, object, this, loc, op_ret, op_errno, + out); + + ret = glfs_h_stat(fs, object, &stat); + if (ret) { + op_ret = -1; + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, op_errno, SVS_MSG_STAT_FAILED, + "glfs_h_stat on %s (gfid: %s) " + "failed", + loc->name, uuid_utoa(loc->inode->gfid)); + goto out; + } + + iatt_from_stat(&stbuf, &stat); + gf_uuid_copy(stbuf.ia_gfid, loc->inode->gfid); + svs_fill_ino_from_gfid(&stbuf); + + buf = alloca(size + 1); + op_ret = glfs_h_readlink(fs, object, buf, size); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, op_errno, SVS_MSG_READLINK_FAILED, + "readlink on %s failed (gfid: %s)", loc->name, + uuid_utoa(loc->inode->gfid)); + goto out; + } + + buf[op_ret] = 0; out: - STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, buf, &stbuf, - NULL); + STACK_UNWIND_STRICT(readlink, frame, op_ret, op_errno, buf, &stbuf, NULL); - return 0; + return 0; } int32_t -svs_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int mask, - dict_t *xdata) +svs_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int mask, + dict_t *xdata) { - int ret = -1; - int32_t op_ret = -1; - int32_t op_errno = EINVAL; - svs_private_t *priv = NULL; - glfs_t *fs = NULL; - glfs_object_t *object = NULL; - svs_inode_t *inode_ctx = NULL; - gf_boolean_t is_fuse_call = 0; - int mode = 0; - - GF_VALIDATE_OR_GOTO ("svs", this, out); - GF_VALIDATE_OR_GOTO (this->name, this->private, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, loc->inode, out); - - priv = this->private; - - inode_ctx = svs_inode_ctx_get (this, loc->inode); - if (!inode_ctx) { - gf_log (this->name, GF_LOG_ERROR, "inode context not found for" - " %s", uuid_utoa (loc->inode->gfid)); - op_ret = -1; - op_errno = EINVAL; - goto out; - } - - is_fuse_call = __is_fuse_call (frame); - - /* - * For entry-point directory, set read and execute bits. But not write - * permissions. - */ - if (inode_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) { - if (is_fuse_call) { - op_ret = 0; - op_errno = 0; - } else { - op_ret = 0; - mode |= POSIX_ACL_READ; - mode |= POSIX_ACL_EXECUTE; - op_errno = mode; - } - goto out; - } - - - SVS_GET_INODE_CTX_INFO(inode_ctx, fs, object, this, loc, op_ret, - op_errno, out); - - /* The actual posix_acl xlator does acl checks differently for - fuse and nfs. So set frame->root->pid as fspid of the syncop - if the call came from nfs - */ - if (!is_fuse_call) { - syncopctx_setfspid (&frame->root->pid); - syncopctx_setfsuid (&frame->root->uid); - syncopctx_setfsgid (&frame->root->gid); - syncopctx_setfsgroups (frame->root->ngrps, - frame->root->groups); - } - - ret = glfs_h_access (fs, object, mask); - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, "failed to access %s " - "(gfid: %s)", loc->path, uuid_utoa (loc->inode->gfid)); - op_ret = -1; - op_errno = errno; - goto out; - } - - op_ret = 0; - op_errno = ret; + int ret = -1; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + glfs_t *fs = NULL; + glfs_object_t *object = NULL; + svs_inode_t *inode_ctx = NULL; + gf_boolean_t is_fuse_call = 0; + int mode = 0; + call_stack_t *root = NULL; + + GF_VALIDATE_OR_GOTO("svs", this, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, loc, out); + GF_VALIDATE_OR_GOTO(this->name, loc->inode, out); + + root = frame->root; + op_ret = gf_setcredentials(&root->uid, &root->gid, root->ngrps, + root->groups); + if (op_ret != 0) { + goto out; + } + + inode_ctx = svs_inode_ctx_get(this, loc->inode); + if (!inode_ctx) { + op_ret = -1; + op_errno = EINVAL; + gf_msg(this->name, GF_LOG_ERROR, op_errno, + SVS_MSG_GET_INODE_CONTEXT_FAILED, + "inode context not found for %s", uuid_utoa(loc->inode->gfid)); + goto out; + } + + is_fuse_call = __is_fuse_call(frame); + + /* + * For entry-point directory, set read and execute bits. But not write + * permissions. + */ + if (inode_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) { + if (is_fuse_call) { + op_ret = 0; + op_errno = 0; + } else { + op_ret = 0; + mode |= POSIX_ACL_READ; + mode |= POSIX_ACL_EXECUTE; + op_errno = mode; + } + goto out; + } + + SVS_GET_INODE_CTX_INFO(inode_ctx, fs, object, this, loc, op_ret, op_errno, + out); + + /* The actual posix_acl xlator does acl checks differently for + fuse and nfs. So set frame->root->pid as fspid of the syncop + if the call came from nfs + */ + if (!is_fuse_call) { + syncopctx_setfspid(&frame->root->pid); + syncopctx_setfsuid(&frame->root->uid); + syncopctx_setfsgid(&frame->root->gid); + syncopctx_setfsgroups(frame->root->ngrps, frame->root->groups); + } + + ret = glfs_h_access(fs, object, mask); + if (ret < 0) { + op_ret = -1; + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, op_errno, SVS_MSG_ACCESS_FAILED, + "failed to access %s (gfid: %s)", loc->path, + uuid_utoa(loc->inode->gfid)); + goto out; + } + + op_ret = 0; + op_errno = ret; out: - STACK_UNWIND_STRICT (access, frame, op_ret, op_errno, NULL); - return 0; + STACK_UNWIND_STRICT(access, frame, op_ret, op_errno, NULL); + return 0; } - int32_t -mem_acct_init (xlator_t *this) +notify(xlator_t *this, int32_t event, void *data, ...) { - int ret = -1; - - if (!this) - return ret; - - ret = xlator_mem_acct_init (this, gf_svs_mt_end + 1); - - if (ret != 0) { - gf_log (this->name, GF_LOG_WARNING, "Memory accounting" - " init failed"); - return ret; - } - - return ret; + switch (event) { + case GF_EVENT_PARENT_UP: { + /* Tell the parent that snapview-server xlator is up */ + default_notify(this, GF_EVENT_CHILD_UP, data); + } break; + default: + break; + } + return 0; } int32_t -init (xlator_t *this) +mem_acct_init(xlator_t *this) { - svs_private_t *priv = NULL; - int ret = -1; - pthread_t snap_thread; - - /* This can be the top of graph in certain cases */ - if (!this->parents) { - gf_log (this->name, GF_LOG_DEBUG, - "dangling volume. check volfile "); - } - - priv = GF_CALLOC (1, sizeof (*priv), gf_svs_mt_priv_t); - if (!priv) - goto out; - - this->private = priv; + int ret = -1; - GF_OPTION_INIT ("volname", priv->volname, str, out); - LOCK_INIT (&priv->snaplist_lock); + if (!this) + return ret; - LOCK (&priv->snaplist_lock); - { - priv->num_snaps = 0; - } - UNLOCK (&priv->snaplist_lock); + ret = xlator_mem_acct_init(this, gf_svs_mt_end + 1); - /* What to do here upon failure? should init be failed or succeed? */ - /* If succeeded, then dynamic management of snapshots will not */ - /* happen.*/ - ret = svs_mgmt_init (this); - if (ret) { - gf_log (this->name, GF_LOG_WARNING, "failed to initiate the " - "mgmt rpc callback for svs. Dymamic management of the" - "snapshots will not happen"); - goto out; - } + if (ret != 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, SVS_MSG_MEM_ACNT_FAILED, + "Memory accounting" + " init failed"); + return ret; + } - /* get the list of snaps first to return to client xlator */ - ret = svs_get_snapshot_list (this); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "Error initializing snaplist infrastructure"); - ret = -1; - goto out; - } + return ret; +} - ret = 0; +int32_t +init(xlator_t *this) +{ + svs_private_t *priv = NULL; + int ret = -1; + + /* This can be the top of graph in certain cases */ + if (!this->parents) { + gf_msg_debug(this->name, 0, "dangling volume. check volfile "); + } + + priv = GF_CALLOC(1, sizeof(*priv), gf_svs_mt_priv_t); + if (!priv) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, SVS_MSG_NO_MEMORY, + "failed to " + "allocate memory for this->private "); + goto out; + } + + this->private = priv; + + GF_OPTION_INIT("volname", priv->volname, str, out); + LOCK_INIT(&priv->snaplist_lock); + + LOCK(&priv->snaplist_lock); + { + priv->num_snaps = 0; + } + UNLOCK(&priv->snaplist_lock); + + /* What to do here upon failure? should init be failed or succeed? */ + /* If succeeded, then dynamic management of snapshots will not */ + /* happen.*/ + ret = svs_mgmt_init(this); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, EINVAL, SVS_MSG_MGMT_INIT_FAILED, + "failed to initiate the " + "mgmt rpc callback for svs. Dymamic management of the" + "snapshots will not happen"); + goto out; + } + + /* get the list of snaps first to return to client xlator */ + ret = svs_get_snapshot_list(this); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, + SVS_MSG_GET_SNAPSHOT_LIST_FAILED, + "Error initializing snaplist infrastructure"); + ret = -1; + goto out; + } + + ret = 0; out: - if (ret && priv) { - LOCK_DESTROY (&priv->snaplist_lock); - GF_FREE (priv->dirents); - GF_FREE (priv); - } + if (ret && priv) { + LOCK_DESTROY(&priv->snaplist_lock); + GF_FREE(priv->dirents); + GF_FREE(priv); + } - return ret; + return ret; } void -fini (xlator_t *this) +fini(xlator_t *this) { - svs_private_t *priv = NULL; - glusterfs_ctx_t *ctx = NULL; - int ret = 0; - - GF_ASSERT (this); - priv = this->private; - this->private = NULL; - ctx = this->ctx; - if (!ctx) - gf_log (this->name, GF_LOG_ERROR, - "Invalid ctx found"); - - if (priv) { - ret = LOCK_DESTROY (&priv->snaplist_lock); - if (ret != 0) { - gf_log (this->name, GF_LOG_WARNING, - "Could not destroy mutex snaplist_lock"); - } - - if (priv->dirents) { - GF_FREE (priv->dirents); - } + svs_private_t *priv = NULL; + glusterfs_ctx_t *ctx = NULL; + int ret = 0; + + GF_ASSERT(this); + priv = this->private; + this->private = NULL; + ctx = this->ctx; + if (!ctx) + gf_msg(this->name, GF_LOG_ERROR, 0, SVS_MSG_INVALID_GLFS_CTX, + "Invalid ctx found"); + + if (priv) { + ret = LOCK_DESTROY(&priv->snaplist_lock); + if (ret != 0) { + gf_msg(this->name, GF_LOG_WARNING, errno, + SVS_MSG_LOCK_DESTROY_FAILED, + "Could not destroy mutex snaplist_lock"); + } - if (priv->rpc) { - /* cleanup the saved-frames before last unref */ - rpc_clnt_connection_cleanup (&priv->rpc->conn); - rpc_clnt_unref (priv->rpc); - } + if (priv->dirents) { + GF_FREE(priv->dirents); + } - GF_FREE (priv); + if (priv->rpc) { + /* cleanup the saved-frames before last unref */ + rpc_clnt_connection_cleanup(&priv->rpc->conn); + rpc_clnt_unref(priv->rpc); } - return; + GF_FREE(priv); + } + + return; } struct xlator_fops fops = { - .lookup = svs_lookup, - .stat = svs_stat, - .statfs = svs_statfs, - .opendir = svs_opendir, - .readdirp = svs_readdirp, - .readdir = svs_readdir, - .open = svs_open, - .readv = svs_readv, - .flush = svs_flush, - .fstat = svs_fstat, - .getxattr = svs_getxattr, - .access = svs_access, - .readlink = svs_readlink, - /* entry fops */ + .lookup = svs_lookup, + .stat = svs_stat, + .statfs = svs_statfs, + .opendir = svs_opendir, + .readdirp = svs_readdirp, + .readdir = svs_readdir, + .open = svs_open, + .readv = svs_readv, + .flush = svs_flush, + .fstat = svs_fstat, + .getxattr = svs_getxattr, + .access = svs_access, + .readlink = svs_readlink, + /* entry fops */ }; struct xlator_cbks cbks = { - .release = svs_release, - .releasedir = svs_releasedir, - .forget = svs_forget, + .release = svs_release, + .releasedir = svs_releasedir, + .forget = svs_forget, }; struct volume_options options[] = { - { .key = {"volname"}, - .type = GF_OPTION_TYPE_STR, - }, - { .key = {NULL} }, + { + .key = {"volname"}, + .type = GF_OPTION_TYPE_STR, + }, + {.key = {NULL}}, +}; + +xlator_api_t xlator_api = { + .init = init, + .fini = fini, + .notify = notify, + .mem_acct_init = mem_acct_init, + .op_version = {1}, + .fops = &fops, + .cbks = &cbks, + .options = options, + .identifier = "snapview-server", + .category = GF_MAINTAINED, }; diff --git a/xlators/features/snapview-server/src/snapview-server.h b/xlators/features/snapview-server/src/snapview-server.h index a682aeed33e..6472422e715 100644 --- a/xlators/features/snapview-server/src/snapview-server.h +++ b/xlators/features/snapview-server/src/snapview-server.h @@ -10,212 +10,246 @@ #ifndef __SNAP_VIEW_H__ #define __SNAP_VIEW_H__ -#include "dict.h" -#include "defaults.h" -#include "mem-types.h" -#include "call-stub.h" -#include "inode.h" -#include "byte-order.h" -#include "iatt.h" +#include <glusterfs/dict.h> +#include <glusterfs/defaults.h> +#include <glusterfs/mem-types.h> +#include <glusterfs/call-stub.h> +#include <glusterfs/byte-order.h> +#include <glusterfs/iatt.h> #include <ctype.h> #include <sys/uio.h> -#include "glusterfs.h" -#include "xlator.h" -#include "logging.h" +#include <glusterfs/glusterfs.h> +#include <glusterfs/logging.h> #include "glfs.h" -#include "common-utils.h" #include "glfs-handles.h" #include "glfs-internal.h" #include "glusterfs3-xdr.h" -#include "glusterfs-acl.h" -#include "syncop.h" -#include "list.h" -#include "timer.h" +#include <glusterfs/glusterfs-acl.h> +#include <glusterfs/syncop.h> +#include <glusterfs/list.h> +#include <glusterfs/timer.h> #include "rpc-clnt.h" #include "protocol-common.h" #include "xdr-generic.h" - +#include "snapview-server-messages.h" #define DEFAULT_SVD_LOG_FILE_DIRECTORY DATADIR "/log/glusterfs" -#define SNAP_VIEW_MAX_GLFS_T 256 -#define SNAP_VIEW_MAX_GLFS_FDS 1024 -#define SNAP_VIEW_MAX_GLFS_OBJ_HANDLES 1024 - -#define SVS_STACK_DESTROY(_frame) \ - do { \ - ((call_frame_t *)_frame)->local = NULL; \ - STACK_DESTROY (((call_frame_t *)_frame)->root); \ - } while (0) - -#define SVS_CHECK_VALID_SNAPSHOT_HANDLE(fs, this) \ - do { \ - svs_private_t *_private = NULL; \ - _private = this->private; \ - int i = 0; \ - gf_boolean_t found = _gf_false; \ - LOCK (&_private->snaplist_lock); \ - { \ - for (i = 0; i < _private->num_snaps; i++) { \ - if (_private->dirents->fs && fs && \ - _private->dirents->fs == fs) { \ - found = _gf_true; \ - break; \ - } \ - } \ - } \ - UNLOCK (&_private->snaplist_lock); \ - \ - if (!found) \ - fs = NULL; \ - } while (0) - -#define SVS_GET_INODE_CTX_INFO(inode_ctx, fs, object, this, loc, ret, \ - op_errno, label) \ - do { \ - fs = inode_ctx->fs; \ - object = inode_ctx->object; \ - SVS_CHECK_VALID_SNAPSHOT_HANDLE (fs, this); \ - if (!fs) \ - object = NULL; \ - \ - if (!fs || !object) { \ - int32_t tmp = -1; \ - char tmp_uuid[64]; \ - \ - tmp = svs_get_handle (this, loc, inode_ctx, \ - &op_errno); \ - if (tmp) { \ - gf_log (this->name, GF_LOG_ERROR, \ - "failed to get the handle for %s " \ - "(gfid: %s)", loc->path, \ - uuid_utoa_r (loc->inode->gfid, \ - tmp_uuid)); \ - ret = -1; \ - goto label; \ - } \ - \ - fs = inode_ctx->fs; \ - object = inode_ctx->object; \ - } \ - } while(0); +#define SNAP_VIEW_MAX_GLFS_T 256 +#define SNAP_VIEW_MAX_GLFS_FDS 1024 +#define SNAP_VIEW_MAX_GLFS_OBJ_HANDLES 1024 + +#define SVS_STACK_DESTROY(_frame) \ + do { \ + ((call_frame_t *)_frame)->local = NULL; \ + STACK_DESTROY(((call_frame_t *)_frame)->root); \ + } while (0) + +#define SVS_CHECK_VALID_SNAPSHOT_HANDLE(fs, this) \ + do { \ + svs_private_t *_private = NULL; \ + _private = this->private; \ + int i = 0; \ + gf_boolean_t found = _gf_false; \ + glfs_t *tmp_fs = NULL; \ + LOCK(&_private->snaplist_lock); \ + { \ + for (i = 0; i < _private->num_snaps; i++) { \ + tmp_fs = _private->dirents[i].fs; \ + gf_log(this->name, GF_LOG_DEBUG, \ + "snap name: %s, snap volume: %s," \ + "dirent->fs: %p", \ + _private->dirents[i].name, \ + _private->dirents[i].snap_volname, tmp_fs); \ + if (tmp_fs && fs && (tmp_fs == fs)) { \ + found = _gf_true; \ + gf_msg_debug(this->name, 0, \ + "found the fs " \ + "instance"); \ + break; \ + } \ + } \ + } \ + UNLOCK(&_private->snaplist_lock); \ + \ + if (!found) { \ + gf_log(this->name, GF_LOG_WARNING, \ + "failed to" \ + " find the fs instance %p", \ + fs); \ + fs = NULL; \ + } \ + } while (0) + +#define SVS_GET_INODE_CTX_INFO(inode_ctx, fs, object, this, loc, ret, \ + op_errno, label) \ + do { \ + fs = inode_ctx->fs; \ + object = inode_ctx->object; \ + SVS_CHECK_VALID_SNAPSHOT_HANDLE(fs, this); \ + if (!fs) \ + object = NULL; \ + \ + if (!fs || !object) { \ + int32_t tmp = -1; \ + char tmp_uuid[64]; \ + \ + tmp = svs_get_handle(this, loc, inode_ctx, &op_errno); \ + if (tmp) { \ + gf_log(this->name, GF_LOG_ERROR, \ + "failed to get the handle for %s " \ + "(gfid: %s)", \ + loc->path, uuid_utoa_r(loc->inode->gfid, tmp_uuid)); \ + ret = -1; \ + goto label; \ + } \ + \ + fs = inode_ctx->fs; \ + object = inode_ctx->object; \ + } \ + } while (0); + +#define SVS_STRDUP(dst, src) \ + do { \ + if (dst && strcmp(src, dst)) { \ + GF_FREE(dst); \ + dst = NULL; \ + } \ + \ + if (!dst) \ + dst = gf_strdup(src); \ + } while (0) int -svs_mgmt_submit_request (void *req, call_frame_t *frame, - glusterfs_ctx_t *ctx, - rpc_clnt_prog_t *prog, int procnum, - fop_cbk_fn_t cbkfn, xdrproc_t xdrproc); +svs_mgmt_submit_request(void *req, call_frame_t *frame, glusterfs_ctx_t *ctx, + rpc_clnt_prog_t *prog, int procnum, fop_cbk_fn_t cbkfn, + xdrproc_t xdrproc); int -svs_get_snapshot_list (xlator_t *this); +svs_get_snapshot_list(xlator_t *this); int -mgmt_get_snapinfo_cbk (struct rpc_req *req, struct iovec *iov, - int count, void *myframe); +mgmt_get_snapinfo_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe); typedef enum { - SNAP_VIEW_ENTRY_POINT_INODE = 0, - SNAP_VIEW_SNAPSHOT_INODE, - SNAP_VIEW_VIRTUAL_INODE + SNAP_VIEW_ENTRY_POINT_INODE = 0, + SNAP_VIEW_SNAPSHOT_INODE, + SNAP_VIEW_VIRTUAL_INODE } inode_type_t; struct svs_inode { - glfs_t *fs; - glfs_object_t *object; - inode_type_t type; - - /* used only for entry point directory where gfid of the directory - from where the entry point was entered is saved. - */ - uuid_t pargfid; - struct iatt buf; + glfs_t *fs; + glfs_object_t *object; + inode_type_t type; + + /* used only for entry point directory where gfid of the directory + from where the entry point was entered is saved. + */ + uuid_t pargfid; + + /* This is used to generate gfid for all sub files/dirs under this + * snapshot + */ + char *snapname; + struct iatt buf; }; typedef struct svs_inode svs_inode_t; struct svs_fd { - glfs_fd_t *fd; + glfs_fd_t *fd; }; typedef struct svs_fd svs_fd_t; struct snap_dirent { - char name[NAME_MAX]; - char uuid[UUID_CANONICAL_FORM_LEN + 1]; - char snap_volname[NAME_MAX]; - glfs_t *fs; + char name[NAME_MAX]; + char uuid[UUID_CANONICAL_FORM_LEN + 1]; + char snap_volname[NAME_MAX]; + glfs_t *fs; }; typedef struct snap_dirent snap_dirent_t; struct svs_private { - snap_dirent_t *dirents; - int num_snaps; - char *volname; - struct list_head snaplist; - gf_lock_t snaplist_lock; - struct rpc_clnt *rpc; + snap_dirent_t *dirents; + int num_snaps; + char *volname; + struct list_head snaplist; + gf_lock_t snaplist_lock; + struct rpc_clnt *rpc; }; typedef struct svs_private svs_private_t; int -__svs_inode_ctx_set (xlator_t *this, inode_t *inode, svs_inode_t *svs_inode); +__svs_inode_ctx_set(xlator_t *this, inode_t *inode, svs_inode_t *svs_inode); svs_inode_t * -__svs_inode_ctx_get (xlator_t *this, inode_t *inode); +__svs_inode_ctx_get(xlator_t *this, inode_t *inode); svs_inode_t * -svs_inode_ctx_get (xlator_t *this, inode_t *inode); +svs_inode_ctx_get(xlator_t *this, inode_t *inode); int32_t -svs_inode_ctx_set (xlator_t *this, inode_t *inode, svs_inode_t *svs_inode); +svs_inode_ctx_set(xlator_t *this, inode_t *inode, svs_inode_t *svs_inode); svs_inode_t * -svs_inode_ctx_get_or_new (xlator_t *this, inode_t *inode); +svs_inode_ctx_get_or_new(xlator_t *this, inode_t *inode); int -__svs_fd_ctx_set (xlator_t *this, fd_t *fd, svs_fd_t *svs_fd); +__svs_fd_ctx_set(xlator_t *this, fd_t *fd, svs_fd_t *svs_fd); svs_fd_t * -__svs_fd_ctx_get (xlator_t *this, fd_t *fd); +__svs_fd_ctx_get(xlator_t *this, fd_t *fd); svs_fd_t * -svs_fd_ctx_get (xlator_t *this, fd_t *fd); +svs_fd_ctx_get(xlator_t *this, fd_t *fd); int32_t -svs_fd_ctx_set (xlator_t *this, fd_t *fd, svs_fd_t *svs_fd); +svs_fd_ctx_set(xlator_t *this, fd_t *fd, svs_fd_t *svs_fd); svs_fd_t * -__svs_fd_ctx_get_or_new (xlator_t *this, fd_t *fd); +__svs_fd_ctx_get_or_new(xlator_t *this, fd_t *fd); svs_fd_t * -svs_fd_ctx_get_or_new (xlator_t *this, fd_t *fd); +svs_fd_ctx_get_or_new(xlator_t *this, fd_t *fd); + +int +svs_uuid_generate(xlator_t *this, uuid_t gfid, char *snapname, + uuid_t origin_gfid); void -svs_fill_ino_from_gfid (struct iatt *buf); +svs_fill_ino_from_gfid(struct iatt *buf); void -svs_iatt_fill (uuid_t gfid, struct iatt *buf); +svs_iatt_fill(uuid_t gfid, struct iatt *buf); snap_dirent_t * -svs_get_latest_snap_entry (xlator_t *this); +svs_get_latest_snap_entry(xlator_t *this); glfs_t * -svs_get_latest_snapshot (xlator_t *this); +svs_get_latest_snapshot(xlator_t *this); glfs_t * -svs_initialise_snapshot_volume (xlator_t *this, const char *name, - int32_t *op_errno); +svs_initialise_snapshot_volume(xlator_t *this, const char *name, + int32_t *op_errno); glfs_t * -__svs_initialise_snapshot_volume (xlator_t *this, const char *name, - int32_t *op_errno); +__svs_initialise_snapshot_volume(xlator_t *this, const char *name, + int32_t *op_errno); snap_dirent_t * -__svs_get_snap_dirent (xlator_t *this, const char *name); +__svs_get_snap_dirent(xlator_t *this, const char *name); int -svs_mgmt_init (xlator_t *this); +svs_mgmt_init(xlator_t *this); int32_t -svs_get_handle (xlator_t *this, loc_t *loc, svs_inode_t *inode_ctx, - int32_t *op_errno); +svs_get_handle(xlator_t *this, loc_t *loc, svs_inode_t *inode_ctx, + int32_t *op_errno); + +glfs_t * +svs_inode_glfs_mapping(xlator_t *this, inode_t *inode); + +glfs_t * +svs_inode_ctx_glfs_mapping(xlator_t *this, svs_inode_t *inode_ctx); #endif /* __SNAP_VIEW_H__ */ diff --git a/xlators/features/thin-arbiter/Makefile.am b/xlators/features/thin-arbiter/Makefile.am new file mode 100644 index 00000000000..a985f42a877 --- /dev/null +++ b/xlators/features/thin-arbiter/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/features/thin-arbiter/src/Makefile.am b/xlators/features/thin-arbiter/src/Makefile.am new file mode 100644 index 00000000000..a3c133e7798 --- /dev/null +++ b/xlators/features/thin-arbiter/src/Makefile.am @@ -0,0 +1,22 @@ +xlator_LTLIBRARIES = thin-arbiter.la + +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features + +thin_arbiter_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) + +thin_arbiter_la_SOURCES = thin-arbiter.c \ + $(top_builddir)/xlators/lib/src/libxlator.c + +thin_arbiter_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +noinst_HEADERS = thin-arbiter.h thin-arbiter-mem-types.h thin-arbiter-messages.h \ + $(top_builddir)/xlators/lib/src/libxlator.h + +AM_CPPFLAGS = $(GF_CPPFLAGS) \ + -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/xlators/lib/src \ + -I$(top_srcdir)/rpc/rpc-lib/src \ + -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src + +AM_CFLAGS = -Wall $(GF_CFLAGS) + +CLEANFILES = diff --git a/xlators/features/thin-arbiter/src/thin-arbiter-mem-types.h b/xlators/features/thin-arbiter/src/thin-arbiter-mem-types.h new file mode 100644 index 00000000000..69562d2febc --- /dev/null +++ b/xlators/features/thin-arbiter/src/thin-arbiter-mem-types.h @@ -0,0 +1,19 @@ +/* + Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __THIN_ARBITER_MEM_TYPES_H__ +#define __THIN_ARBITER_MEM_TYPES_H__ +#include <glusterfs/mem-types.h> + +typedef enum gf_ta_mem_types_ { + gf_ta_mt_local_t = gf_common_mt_end + 1, + gf_ta_mt_char, + gf_ta_mt_end +} gf_ta_mem_types_t; +#endif diff --git a/xlators/features/thin-arbiter/src/thin-arbiter-messages.h b/xlators/features/thin-arbiter/src/thin-arbiter-messages.h new file mode 100644 index 00000000000..81d7491577a --- /dev/null +++ b/xlators/features/thin-arbiter/src/thin-arbiter-messages.h @@ -0,0 +1,28 @@ +/* + Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _TA_MESSAGES_H_ +#define _TA_MESSAGES_H_ + +#include <glusterfs/glfs-message-id.h> + +/* To add new message IDs, append new identifiers at the end of the list. + * + * Never remove a message ID. If it's not used anymore, you can rename it or + * leave it as it is, but not delete it. This is to prevent reutilization of + * IDs by other messages. + * + * The component name must match one of the entries defined in + * glfs-message-id.h. + */ + +GLFS_MSGID(TA, TA_MSG_INVALID_FOP); + +#endif /* !_TA_MESSAGES_H_ */ diff --git a/xlators/features/thin-arbiter/src/thin-arbiter.c b/xlators/features/thin-arbiter/src/thin-arbiter.c new file mode 100644 index 00000000000..ce3008636f1 --- /dev/null +++ b/xlators/features/thin-arbiter/src/thin-arbiter.c @@ -0,0 +1,661 @@ +/* + Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include "thin-arbiter.h" +#include "thin-arbiter-messages.h" +#include "thin-arbiter-mem-types.h" +#include <glusterfs/glusterfs.h> +#include <glusterfs/xlator.h> +#include <glusterfs/logging.h> +#include <glusterfs/byte-order.h> +#include <glusterfs/common-utils.h> + +int +ta_set_incoming_values(dict_t *dict, char *key, data_t *value, void *data) +{ + int32_t ret = 0; + ta_fop_t *fop = (ta_fop_t *)data; + int32_t *pending = NULL; + + pending = GF_CALLOC(1, value->len, gf_ta_mt_char); + if (!pending) { + ret = -ENOMEM; + goto out; + } + ret = dict_set_bin(fop->brick_xattr, key, pending, value->len); +out: + return ret; +} + +int +ta_get_incoming_and_brick_values(dict_t *dict, char *key, data_t *value, + void *data) +{ + ta_fop_t *fop = data; + char *source = NULL; + char *in_coming = NULL; + int32_t len = 0, ret = 0; + + source = GF_CALLOC(1, value->len, gf_ta_mt_char); + if (!source) { + ret = -ENOMEM; + goto out; + } + + ret = dict_get_ptr_and_len(fop->dict, key, (void **)&in_coming, &len); + + if (!in_coming || value->len != len) { + ret = -EINVAL; + goto out; + } + + if (!memcmp(value->data, source, value->len) && + (!memcmp(in_coming, source, len))) { + fop->on_disk[fop->idx] = 0; + } else { + fop->on_disk[fop->idx] = 1; + } + + fop->idx++; +out: + GF_FREE(source); + return ret; +} + +void +ta_release_fop(ta_fop_t *fop) +{ + if (!fop) { + return; + } + if (fop->fd) { + fd_unref(fop->fd); + } + loc_wipe(&fop->loc); + if (fop->dict) { + dict_unref(fop->dict); + } + if (fop->brick_xattr) { + dict_unref(fop->brick_xattr); + } + + GF_FREE(fop); + return; +} + +int32_t +ta_set_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata) +{ + TA_STACK_UNWIND(xattrop, frame, op_ret, op_errno, dict, xdata); + return 0; +} + +/* +case 1 - If brick value is 0 and incoming value is also 0, fine +case 2 - If brick value is 0 and incoming value is non 0, fine +case 3 - If brick value is non 0 and incoming value is also 0, fine +case 4 - If brick value is non 0 and incoming value is non 0, fine +case 5 - If incoming value is non zero on both brick, it is wrong +case 6 - If incoming value is non zero but brick value for other +brick is also non zero, wrong +*/ + +int32_t +ta_verify_on_disk_source(ta_fop_t *fop, dict_t *dict) +{ + int ret = 0; + + if (!fop) { + return -EINVAL; + } + + ret = dict_foreach(dict, ta_get_incoming_and_brick_values, (void *)fop); + if (ret < 0) { + return ret; + } + if (fop->on_disk[0] && fop->on_disk[1]) { + return -EINVAL; + } + return 0; +} + +int32_t +ta_get_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata) +{ + ta_fop_t *fop = NULL; + int ret = 0; + + fop = frame->local; + if (op_ret) { + goto unwind; + } + + ret = ta_verify_on_disk_source(fop, dict); + if (ret < 0) { + op_errno = -ret; + goto unwind; + } + + if (fop->fd) { + STACK_WIND(frame, ta_set_xattrop_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fxattrop, fop->fd, + fop->xattrop_flags, fop->dict, NULL); + } else { + STACK_WIND(frame, ta_set_xattrop_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->xattrop, &fop->loc, + fop->xattrop_flags, fop->dict, NULL); + } + return 0; + +unwind: + + TA_STACK_UNWIND(xattrop, frame, -1, op_errno, NULL, NULL); + return -1; +} + +ta_fop_t * +ta_prepare_fop(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, + gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata) +{ + ta_fop_t *fop = NULL; + int ret = 0; + + fop = GF_CALLOC(1, sizeof(*fop), gf_ta_mt_local_t); + if (!fop) { + goto out; + } + + if (loc) { + loc_copy(&fop->loc, loc); + } + + if (fd) { + fop->fd = fd_ref(fd); + } + + fop->xattrop_flags = flags; + fop->idx = 0; + + if (dict != NULL) { + fop->dict = dict_ref(dict); + } + fop->brick_xattr = dict_new(); + if (fop->brick_xattr == NULL) { + goto out; + } + ret = dict_foreach(dict, ta_set_incoming_values, (void *)fop); + if (ret < 0) { + goto out; + } + frame->local = fop; + return fop; + +out: + ta_release_fop(fop); + return NULL; +} + +int32_t +ta_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd, + gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata) +{ + int ret = 0; + ta_fop_t *fop = NULL; + + fop = ta_prepare_fop(frame, this, NULL, fd, flags, dict, xdata); + if (!fop) { + ret = -ENOMEM; + goto unwind; + } + + STACK_WIND(frame, ta_get_xattrop_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fxattrop, fd, flags, fop->brick_xattr, + xdata); + return 0; + +unwind: + + TA_STACK_UNWIND(xattrop, frame, -1, -ret, NULL, NULL); + return 0; +} + +int32_t +ta_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc, + gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata) +{ + int ret = 0; + ta_fop_t *fop = NULL; + + fop = ta_prepare_fop(frame, this, loc, NULL, flags, dict, xdata); + if (!fop) { + ret = -ENOMEM; + goto unwind; + } + + STACK_WIND(frame, ta_get_xattrop_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->xattrop, loc, flags, fop->brick_xattr, + xdata); + return 0; + +unwind: + + TA_STACK_UNWIND(xattrop, frame, -1, -ret, NULL, NULL); + return 0; +} + +int32_t +ta_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, + int32_t count, off_t off, uint32_t flags, struct iobref *iobref, + dict_t *xdata) +{ + TA_FAILED_FOP(writev, frame, EINVAL); + return 0; +} + +int32_t +ta_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, + int32_t flags, dict_t *xdata) +{ + TA_FAILED_FOP(fsetxattr, frame, EINVAL); + return 0; +} + +int32_t +ta_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + int32_t flags, dict_t *xdata) +{ + TA_FAILED_FOP(setxattr, frame, EINVAL); + return 0; +} + +int32_t +ta_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t keep_size, + off_t offset, size_t len, dict_t *xdata) +{ + TA_FAILED_FOP(fallocate, frame, EINVAL); + return 0; +} + +int32_t +ta_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask, + dict_t *xdata) +{ + TA_FAILED_FOP(access, frame, EINVAL); + return 0; +} + +int32_t +ta_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + TA_FAILED_FOP(discard, frame, EINVAL); + return 0; +} + +int32_t +ta_entrylk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc, + const char *basename, entrylk_cmd cmd, entrylk_type type, + dict_t *xdata) +{ + TA_FAILED_FOP(entrylk, frame, EINVAL); + return 0; +} + +int32_t +ta_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, + const char *basename, entrylk_cmd cmd, entrylk_type type, + dict_t *xdata) +{ + TA_FAILED_FOP(fentrylk, frame, EINVAL); + return 0; +} + +int32_t +ta_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) +{ + TA_FAILED_FOP(flush, frame, EINVAL); + return 0; +} + +int32_t +ta_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, + dict_t *xdata) +{ + TA_FAILED_FOP(fsync, frame, EINVAL); + return 0; +} +int32_t +ta_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, + dict_t *xdata) +{ + TA_FAILED_FOP(fsyncdir, frame, EINVAL); + return 0; +} + +int32_t +ta_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name, + dict_t *xdata) +{ + TA_FAILED_FOP(getxattr, frame, EINVAL); + return 0; +} + +int32_t +ta_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, + dict_t *xdata) +{ + TA_FAILED_FOP(fgetxattr, frame, EINVAL); + return 0; +} + +int32_t +ta_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) +{ + TA_FAILED_FOP(link, frame, EINVAL); + return 0; +} + +int32_t +ta_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, + struct gf_flock *flock, dict_t *xdata) +{ + TA_FAILED_FOP(lk, frame, EINVAL); + return 0; +} + +int32_t +ta_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + mode_t umask, dict_t *xdata) +{ + TA_FAILED_FOP(mkdir, frame, EINVAL); + return 0; +} + +int32_t +ta_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + dev_t rdev, mode_t umask, dict_t *xdata) +{ + TA_FAILED_FOP(mknod, frame, EINVAL); + return 0; +} + +int32_t +ta_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + fd_t *fd, dict_t *xdata) +{ + TA_FAILED_FOP(open, frame, EINVAL); + return 0; +} + +int32_t +ta_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, + dict_t *xdata) +{ + TA_FAILED_FOP(opendir, frame, EINVAL); + return 0; +} + +int32_t +ta_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, dict_t *xdata) +{ + TA_FAILED_FOP(readdir, frame, EINVAL); + return 0; +} + +int32_t +ta_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, dict_t *xdata) +{ + TA_FAILED_FOP(readdirp, frame, EINVAL); + return 0; +} + +int32_t +ta_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size, + dict_t *xdata) +{ + TA_FAILED_FOP(readlink, frame, EINVAL); + return 0; +} + +int32_t +ta_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) +{ + TA_FAILED_FOP(readv, frame, EINVAL); + return 0; +} + +int32_t +ta_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) +{ + TA_FAILED_FOP(removexattr, frame, EINVAL); + return 0; +} + +int32_t +ta_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, + dict_t *xdata) +{ + TA_FAILED_FOP(fremovexattr, frame, EINVAL); + return 0; +} + +int32_t +ta_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) +{ + TA_FAILED_FOP(rename, frame, EINVAL); + return 0; +} + +int32_t +ta_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags, + dict_t *xdata) +{ + TA_FAILED_FOP(rmdir, frame, EINVAL); + return 0; +} + +int32_t +ta_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf, + int32_t valid, dict_t *xdata) +{ + TA_FAILED_FOP(setattr, frame, EINVAL); + return 0; +} + +int32_t +ta_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf, + int32_t valid, dict_t *xdata) +{ + TA_FAILED_FOP(fsetattr, frame, EINVAL); + return 0; +} + +int32_t +ta_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +{ + TA_FAILED_FOP(stat, frame, EINVAL); + return 0; +} + +int32_t +ta_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) +{ + TA_FAILED_FOP(fstat, frame, EINVAL); + return 0; +} + +int32_t +ta_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +{ + TA_FAILED_FOP(statfs, frame, EINVAL); + return 0; +} + +int32_t +ta_symlink(call_frame_t *frame, xlator_t *this, const char *linkname, + loc_t *loc, mode_t umask, dict_t *xdata) +{ + TA_FAILED_FOP(symlink, frame, EINVAL); + return 0; +} + +int32_t +ta_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) +{ + TA_FAILED_FOP(truncate, frame, EINVAL); + return 0; +} + +int32_t +ta_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) +{ + TA_FAILED_FOP(ftruncate, frame, EINVAL); + return 0; +} + +int32_t +ta_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags, + dict_t *xdata) +{ + TA_FAILED_FOP(unlink, frame, EINVAL); + return 0; +} + +int32_t +ta_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + off_t len, dict_t *xdata) +{ + TA_FAILED_FOP(zerofill, frame, EINVAL); + return 0; +} + +int32_t +ta_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + gf_seek_what_t what, dict_t *xdata) +{ + TA_FAILED_FOP(seek, frame, EINVAL); + return 0; +} + +int32_t +mem_acct_init(xlator_t *this) +{ + int ret = -1; + + ret = xlator_mem_acct_init(this, gf_ta_mt_end + 1); + if (ret) + gf_log(this->name, GF_LOG_ERROR, + "Memory accounting " + "initialization failed."); + return ret; +} + +int +reconfigure(xlator_t *this, dict_t *options) +{ + return 0; +} + +int32_t +init(xlator_t *this) +{ + if (!this->children || this->children->next) { + gf_log(this->name, GF_LOG_ERROR, + "'thin_arbiter' not configured with exactly one child"); + return -1; + } + + if (!this->parents) { + gf_log(this->name, GF_LOG_ERROR, "dangling volume. check volfile "); + } + return 0; +} + +void +fini(xlator_t *this) +{ + return; +} + +struct xlator_fops fops = { + /*Passed fop*/ + .xattrop = ta_xattrop, + .fxattrop = ta_fxattrop, + /*Failed fop*/ + .writev = ta_writev, + .stat = ta_stat, + .fstat = ta_fstat, + .truncate = ta_truncate, + .ftruncate = ta_ftruncate, + .access = ta_access, + .readlink = ta_readlink, + .mknod = ta_mknod, + .mkdir = ta_mkdir, + .unlink = ta_unlink, + .rmdir = ta_rmdir, + .symlink = ta_symlink, + .rename = ta_rename, + .link = ta_link, + .open = ta_open, + .readv = ta_readv, + .flush = ta_flush, + .fsync = ta_fsync, + .opendir = ta_opendir, + .readdir = ta_readdir, + .readdirp = ta_readdirp, + .fsyncdir = ta_fsyncdir, + .statfs = ta_statfs, + .setxattr = ta_setxattr, + .getxattr = ta_getxattr, + .fsetxattr = ta_fsetxattr, + .fgetxattr = ta_fgetxattr, + .removexattr = ta_removexattr, + .fremovexattr = ta_fremovexattr, + .lk = ta_lk, + .entrylk = ta_entrylk, + .fentrylk = ta_fentrylk, + .setattr = ta_setattr, + .fsetattr = ta_fsetattr, + .fallocate = ta_fallocate, + .discard = ta_discard, + .zerofill = ta_zerofill, + .seek = ta_seek, +}; + +struct xlator_cbks cbks = {}; + +struct volume_options options[] = { + {.key = {NULL}}, +}; + +xlator_api_t xlator_api = { + .init = init, + .fini = fini, + .reconfigure = reconfigure, + .mem_acct_init = mem_acct_init, + .op_version = {GD_OP_VERSION_6_0}, + .fops = &fops, + .cbks = &cbks, + .options = options, + .identifier = "thin-arbiter", + .category = GF_MAINTAINED, +}; diff --git a/xlators/features/thin-arbiter/src/thin-arbiter.h b/xlators/features/thin-arbiter/src/thin-arbiter.h new file mode 100644 index 00000000000..e5f914b84bf --- /dev/null +++ b/xlators/features/thin-arbiter/src/thin-arbiter.h @@ -0,0 +1,59 @@ +/* + Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _THIN_ARBITER_H +#define _THIN_ARBITER_H + +#include <glusterfs/locking.h> +#include <glusterfs/common-utils.h> +#include <glusterfs/glusterfs.h> +#include <glusterfs/xlator.h> +#include <glusterfs/defaults.h> +#include <glusterfs/list.h> + +#define THIN_ARBITER_SOURCE_XATTR "trusted.ta.source" +#define THIN_ARBITER_SOURCE_SIZE 2 + +#define TA_FAILED_FOP(fop, frame, op_errno) \ + do { \ + default_##fop##_failure_cbk(frame, op_errno); \ + } while (0) + +#define TA_STACK_UNWIND(fop, frame, op_ret, op_errno, params...) \ + do { \ + ta_fop_t *__local = NULL; \ + int32_t __op_ret = 0; \ + int32_t __op_errno = 0; \ + \ + __local = frame->local; \ + __op_ret = op_ret; \ + __op_errno = op_errno; \ + if (__local) { \ + ta_release_fop(__local); \ + frame->local = NULL; \ + } \ + STACK_UNWIND_STRICT(fop, frame, __op_ret, __op_errno, params); \ + \ + } while (0) + +struct _ta_fop; +typedef struct _ta_fop ta_fop_t; + +struct _ta_fop { + gf_xattrop_flags_t xattrop_flags; + loc_t loc; + fd_t *fd; + dict_t *dict; + dict_t *brick_xattr; + int32_t on_disk[2]; + int32_t idx; +}; + +#endif /* _THIN_ARBITER_H */ diff --git a/xlators/features/trash/src/Makefile.am b/xlators/features/trash/src/Makefile.am index dc216c366f1..8557e7171af 100644 --- a/xlators/features/trash/src/Makefile.am +++ b/xlators/features/trash/src/Makefile.am @@ -1,14 +1,17 @@ +if WITH_SERVER xlator_LTLIBRARIES = trash.la +endif xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features -trash_la_LDFLAGS = $(GF_XLATOR_DEFAULT_LDFLAGS) +trash_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) trash_la_SOURCES = trash.c trash_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la noinst_HEADERS = trash.h trash-mem-types.h -AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ + -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src AM_CFLAGS = -Wall $(GF_CFLAGS) diff --git a/xlators/features/trash/src/trash-mem-types.h b/xlators/features/trash/src/trash-mem-types.h index b7cad3ce3a9..43353c8f095 100644 --- a/xlators/features/trash/src/trash-mem-types.h +++ b/xlators/features/trash/src/trash-mem-types.h @@ -10,14 +10,13 @@ #ifndef __TRASH_MEM_TYPES_H__ #define __TRASH_MEM_TYPES_H__ -#include "mem-types.h" +#include <glusterfs/mem-types.h> enum gf_trash_mem_types_ { - gf_trash_mt_trash_private_t = gf_common_mt_end + 1, - gf_trash_mt_char, - gf_trash_mt_uuid, - gf_trash_mt_trash_elim_path, - gf_trash_mt_end + gf_trash_mt_trash_private_t = gf_common_mt_end + 1, + gf_trash_mt_char, + gf_trash_mt_uuid, + gf_trash_mt_trash_elim_path, + gf_trash_mt_end }; #endif - diff --git a/xlators/features/trash/src/trash.c b/xlators/features/trash/src/trash.c index b363dbab825..7d09cba3e9c 100644 --- a/xlators/features/trash/src/trash.c +++ b/xlators/features/trash/src/trash.c @@ -9,31 +9,32 @@ */ #include "trash.h" #include "trash-mem-types.h" -#include "syscall.h" +#include <glusterfs/syscall.h> -#define root_gfid (uuid_t){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1} -#define trash_gfid (uuid_t){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5} -#define internal_op_gfid (uuid_t){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6} +#define root_gfid \ + (uuid_t) { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 } +#define trash_gfid \ + (uuid_t) { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5 } +#define internal_op_gfid \ + (uuid_t) { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6 } int32_t -trash_truncate_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *prebuf, struct iatt *postbuf, - dict_t *xdata); +trash_truncate_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata); int32_t -trash_truncate_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *stbuf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata); +trash_truncate_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *stbuf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata); int32_t -trash_unlink_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, - struct iatt *preoldparent, struct iatt *postoldparent, - struct iatt *prenewparent, struct iatt *postnewparent, - dict_t *xdata); - +trash_unlink_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + struct iatt *preoldparent, struct iatt *postoldparent, + struct iatt *prenewparent, struct iatt *postnewparent, + dict_t *xdata); /* Common routines used in this translator */ /** @@ -42,21 +43,27 @@ trash_unlink_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, * the existing directory and returns the same */ mode_t -get_permission (char *path) +get_permission(char *path) { - mode_t mode = 0755; - struct stat sbuf = {0,}; - struct iatt ibuf = {0,}; - int ret = 0; - - ret = sys_stat (path, &sbuf); - if (!ret) { - iatt_from_stat (&ibuf, &sbuf); - mode = st_mode_from_ia (ibuf.ia_prot, ibuf.ia_type); - } else - gf_log ("trash", GF_LOG_DEBUG, "stat on %s failed" - " using default", path); - return mode; + mode_t mode = 0755; + struct stat sbuf = { + 0, + }; + struct iatt ibuf = { + 0, + }; + int ret = 0; + + ret = sys_stat(path, &sbuf); + if (!ret) { + iatt_from_stat(&ibuf, &sbuf); + mode = st_mode_from_ia(ibuf.ia_prot, ibuf.ia_type); + } else + gf_log("trash", GF_LOG_DEBUG, + "stat on %s failed" + " using default", + path); + return mode; } /** @@ -65,197 +72,198 @@ get_permission (char *path) * striped out for additional usage. */ int -extract_trash_directory (char *priv_value, const char **trash_directory) +extract_trash_directory(char *priv_value, const char **trash_directory) { - char *tmp = NULL; - int ret = 0; - - GF_VALIDATE_OR_GOTO("trash", priv_value, out); - - tmp = gf_strdup (priv_value + 1); - if (!tmp) { - ret = ENOMEM; - goto out; - } - if (tmp[strlen(tmp)-1] == '/') - tmp[strlen(tmp)-1] = '\0'; - *trash_directory = gf_strdup (tmp); - if (!(*trash_directory)) { - ret = ENOMEM; - goto out; - } + char *tmp = NULL; + int ret = 0; + + GF_VALIDATE_OR_GOTO("trash", priv_value, out); + + tmp = gf_strdup(priv_value + 1); + if (!tmp) { + ret = ENOMEM; + goto out; + } + if (tmp[strlen(tmp) - 1] == '/') + tmp[strlen(tmp) - 1] = '\0'; + *trash_directory = gf_strdup(tmp); + if (!(*trash_directory)) { + ret = ENOMEM; + goto out; + } out: - if (tmp) - GF_FREE (tmp); - return ret; + if (tmp) + GF_FREE(tmp); + return ret; } /** - * The trash directory path should be append at begining of file path for + * The trash directory path should be append at beginning of file path for * delete or truncate operations. Normal trashing moves the contents to * trash directory and trashing done by internal operations are moved to * internal_op directory inside trash. */ void -copy_trash_path (const char *priv_value, gf_boolean_t internal, char *path) +copy_trash_path(const char *priv_value, gf_boolean_t internal, char *path, + size_t path_size) { - char trash_path[PATH_MAX] = {0,}; - - strcpy (trash_path, priv_value); - if (internal) - strcat (trash_path, "internal_op/"); - - strcpy (path, trash_path); + char trash_path[PATH_MAX] = { + 0, + }; + + strncpy(trash_path, priv_value, sizeof(trash_path)); + trash_path[sizeof(trash_path) - 1] = 0; + if (internal) + strncat(trash_path, "internal_op/", + sizeof(trash_path) - strlen(trash_path) - 1); + + strncpy(path, trash_path, path_size); + path[path_size - 1] = 0; } /** * This function performs the reverse operation of copy_trash_path(). It gives * out a pointer, whose starting value will be the path inside trash directory, - * similar to orginal path. + * similar to original path. */ void -remove_trash_path (const char *path, gf_boolean_t internal, char **rem_path) +remove_trash_path(const char *path, gf_boolean_t internal, char **rem_path) { - if (rem_path == NULL) { - return; - } - - *rem_path = strchr (path + 1, '/'); - if (internal) - *rem_path = strchr (*rem_path + 1, '/'); -} - -/** - * Check whether the path includes trash directory or internal op directory - * inside trash. This check is used to make sure that we avoid deletion, - * rename and creation operations from trash directory. - */ -int -check_whether_trash_directory (const char *path, - const char *trash_directory_path) -{ - char tmp_path[PATH_MAX] = {0,}; - char internal_op_path[PATH_MAX] = {0,}; - int ret = 0; - - if (path[strlen(path)-1] == '/') - sprintf (tmp_path, "%s", path); - else - sprintf (tmp_path, "%s/", path); - - copy_trash_path (trash_directory_path, _gf_true, internal_op_path); - ret = strcmp (tmp_path, trash_directory_path) && - strcmp (tmp_path, internal_op_path); + if (rem_path == NULL) { + return; + } - return ret; + *rem_path = strchr(path + 1, '/'); + if (internal) + *rem_path = strchr(*rem_path + 1, '/'); } /** * Checks whether the given path reside under the specified eliminate path */ int -check_whether_eliminate_path (trash_elim_path *trav, const char *path) +check_whether_eliminate_path(trash_elim_path *trav, const char *path) { - int match = 0; - - while (trav) { - if (strncmp (path, trav->path, strlen(trav->path)) == 0) { - match++; - break; - } - trav = trav->next; + int match = 0; + + while (trav) { + if (strncmp(path, trav->path, strlen(trav->path)) == 0) { + match++; + break; } - return match; + trav = trav->next; + } + return match; } /** * Stores the eliminate path into internal eliminate path structure */ int -store_eliminate_path (char *str, trash_elim_path **eliminate) +store_eliminate_path(char *str, trash_elim_path **eliminate) { - trash_elim_path *trav = NULL; - char *component = NULL; - char elm_path[PATH_MAX] = {0,}; - int ret = 0; - char *strtokptr = NULL; - - if (eliminate == NULL) { - ret = EINVAL; - goto out; - } - - component = strtok_r (str, ",", &strtokptr); - while (component) { - trav = GF_CALLOC (1, sizeof (*trav), - gf_trash_mt_trash_elim_path); - if (!trav) { - ret = ENOMEM; - goto out; - } - if (component[0] == '/') - sprintf(elm_path, "%s", component); - else - sprintf(elm_path, "/%s", component); - - if (component[strlen(component)-1] != '/') - strcat (elm_path, "/"); - - trav->path = gf_strdup(elm_path); - if (!trav->path) { - ret = ENOMEM; - gf_log ("trash", GF_LOG_DEBUG, "out of memory"); - goto out; - } - trav->next = *eliminate; - *eliminate = trav; - component = strtok_r (NULL, ",", &strtokptr); - } + trash_elim_path *trav = NULL; + char *component = NULL; + char elm_path[PATH_MAX] = { + 0, + }; + int ret = 0; + char *strtokptr = NULL; + + if ((str == NULL) || (eliminate == NULL)) { + ret = EINVAL; + goto out; + } + + component = strtok_r(str, ",", &strtokptr); + while (component) { + trav = GF_CALLOC(1, sizeof(*trav), gf_trash_mt_trash_elim_path); + if (!trav) { + ret = ENOMEM; + goto out; + } + if (component[0] == '/') + sprintf(elm_path, "%s", component); + else + sprintf(elm_path, "/%s", component); + + if (component[strlen(component) - 1] != '/') + strncat(elm_path, "/", sizeof(elm_path) - strlen(elm_path) - 1); + + trav->path = gf_strdup(elm_path); + if (!trav->path) { + ret = ENOMEM; + gf_log("trash", GF_LOG_DEBUG, "out of memory"); + GF_FREE(trav); + goto out; + } + trav->next = *eliminate; + *eliminate = trav; + component = strtok_r(NULL, ",", &strtokptr); + } out: - return ret; + return ret; } /** * Appends time stamp to given string */ void -append_time_stamp (char *name) +append_time_stamp(char *name, size_t name_size) { - int i; - char timestr[64] = {0,}; + int i; + char timestr[GF_TIMESTR_SIZE] = { + 0, + }; + + gf_time_fmt(timestr, sizeof(timestr), gf_time(), gf_timefmt_F_HMS); + + /* removing white spaces in timestamp */ + for (i = 0; i < strlen(timestr); i++) { + if (timestr[i] == ' ') + timestr[i] = '_'; + } + strncat(name, "_", name_size - strlen(name) - 1); + strncat(name, timestr, name_size - strlen(name) - 1); +} + +/* * + * Check whether delete/rename operation is permitted on + * trash directory + */ - gf_time_fmt (timestr, sizeof(timestr), time (NULL), - gf_timefmt_F_HMS); +gf_boolean_t +check_whether_op_permitted(trash_private_t *priv, loc_t *loc) +{ + if ((priv->state && (gf_uuid_compare(loc->inode->gfid, trash_gfid) == 0))) + return _gf_false; + if (priv->internal && + (gf_uuid_compare(loc->inode->gfid, internal_op_gfid) == 0)) + return _gf_false; - /* removing white spaces in timestamp */ - for (i = 0; i < strlen (timestr); i++) { - if (timestr[i] == ' ') - timestr[i] = '_'; - } - strcat (name, "_"); - strcat (name, timestr); + return _gf_true; } /** * Wipe the memory used by trash location variable */ void -trash_local_wipe (trash_local_t *local) +trash_local_wipe(trash_local_t *local) { - if (!local) - goto out; + if (!local) + goto out; - loc_wipe (&local->loc); - loc_wipe (&local->newloc); + loc_wipe(&local->loc); + loc_wipe(&local->newloc); - if (local->fd) - fd_unref (local->fd); - if (local->newfd) - fd_unref (local->newfd); + if (local->fd) + fd_unref(local->fd); + if (local->newfd) + fd_unref(local->newfd); - mem_put (local); + mem_put(local); out: - return; + return; } /** @@ -263,308 +271,579 @@ out: * recursive call */ void -wipe_eliminate_path (trash_elim_path **trav) +wipe_eliminate_path(trash_elim_path **trav) { - if (trav == NULL) { - return; - } + if (trav == NULL) { + return; + } - if (*trav == NULL) { - return; - } + if (*trav == NULL) { + return; + } - wipe_eliminate_path (&(*trav)->next); - GF_FREE ((*trav)->path); - GF_FREE (*trav); - *trav = NULL; + wipe_eliminate_path(&(*trav)->next); + GF_FREE((*trav)->path); + GF_FREE(*trav); + *trav = NULL; } /** - * This getxattr calls returns existing trash directory path in - * the dictionary + * This is the call back of rename fop initated using STACK_WIND in + * reconfigure/notify function which is used to rename trash directory + * in the brick when it is required either in volume start or set. + * This frame must destroyed from this function itself since it was + * created by trash xlator */ int32_t -trash_notify_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict, - dict_t *xdata) +trash_dir_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + struct iatt *preoldparent, struct iatt *postoldparent, + struct iatt *prenewparent, struct iatt *postnewparent, + dict_t *xdata) { - data_t *data = NULL; - trash_private_t *priv = NULL; - int ret = 0; - - priv = this->private; - GF_VALIDATE_OR_GOTO ("trash", priv, out); - - data = dict_get (dict, GET_ANCESTRY_PATH_KEY); - if (!data) { - gf_log (this->name, GF_LOG_DEBUG, - "oldtrash-directory doesnot exists"); - priv->oldtrash_dir = gf_strdup (priv->newtrash_dir); - if (!priv->oldtrash_dir) { - gf_log (this->name, GF_LOG_ERROR, "out of memory"); - ret = ENOMEM; - goto out; - } - } else { - priv->oldtrash_dir = gf_strdup (data->data); - if (!priv->oldtrash_dir) { - gf_log (this->name, GF_LOG_ERROR, "out of memory"); - ret = ENOMEM; - goto out; - } - gf_log (this->name, GF_LOG_DEBUG, "old trash directory" - " path is %s", data->data); - } + trash_private_t *priv = NULL; + trash_local_t *local = NULL; -out: - return ret; -} + priv = this->private; -/** - * This is a nameless look up for old trash directory - * The lookup is based on gfid, because trash directory - * has fixed gfid. - */ -int32_t -trash_notify_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *xdata, - struct iatt *postparent) -{ - trash_private_t *priv = NULL; - loc_t loc = {0,}; - int ret = 0; + local = frame->local; - priv = this->private; - GF_VALIDATE_OR_GOTO ("trash", priv, out); + if (op_ret == -1) { + gf_log(this->name, GF_LOG_ERROR, + "rename trash directory " + "failed: %s", + strerror(op_errno)); + goto out; + } - if (op_ret == 0) { + GF_FREE(priv->oldtrash_dir); - gf_log (this->name, GF_LOG_DEBUG, "inode found with gfid %s", - uuid_utoa(buf->ia_gfid)); + priv->oldtrash_dir = gf_strdup(priv->newtrash_dir); + if (!priv->oldtrash_dir) { + op_ret = ENOMEM; + gf_log(this->name, GF_LOG_DEBUG, "out of memory"); + } - gf_uuid_copy (loc.gfid, trash_gfid); - - /* Find trash inode using available information */ - priv->trash_inode = inode_link (inode, NULL, NULL, buf); +out: + frame->local = NULL; + STACK_DESTROY(frame->root); + trash_local_wipe(local); + return op_ret; +} - loc.inode = inode_ref (priv->trash_inode); +int +rename_trash_directory(xlator_t *this) +{ + trash_private_t *priv = NULL; + int ret = 0; + loc_t loc = { + 0, + }; + loc_t old_loc = { + 0, + }; + call_frame_t *frame = NULL; + trash_local_t *local = NULL; + + priv = this->private; + + frame = create_frame(this, this->ctx->pool); + if (frame == NULL) { + gf_log(this->name, GF_LOG_ERROR, "failed to create frame"); + ret = ENOMEM; + goto out; + } + + local = mem_get0(this->local_pool); + if (!local) { + gf_log(this->name, GF_LOG_DEBUG, "out of memory"); + ret = ENOMEM; + goto out; + } + frame->local = local; + + /* assign new location values to new_loc members */ + gf_uuid_copy(loc.gfid, trash_gfid); + gf_uuid_copy(loc.pargfid, root_gfid); + ret = extract_trash_directory(priv->newtrash_dir, &loc.name); + if (ret) { + gf_log(this->name, GF_LOG_DEBUG, "out of memory"); + goto out; + } + loc.path = gf_strdup(priv->newtrash_dir); + if (!loc.path) { + ret = ENOMEM; + gf_log(this->name, GF_LOG_DEBUG, "out of memory"); + goto out; + } + + /* assign old location values to old_loc members */ + gf_uuid_copy(old_loc.gfid, trash_gfid); + gf_uuid_copy(old_loc.pargfid, root_gfid); + ret = extract_trash_directory(priv->oldtrash_dir, &old_loc.name); + if (ret) { + gf_log(this->name, GF_LOG_DEBUG, "out of memory"); + goto out; + } + old_loc.path = gf_strdup(priv->oldtrash_dir); + if (!old_loc.path) { + ret = ENOMEM; + gf_log(this->name, GF_LOG_DEBUG, "out of memory"); + goto out; + } + + old_loc.inode = inode_ref(priv->trash_inode); + gf_uuid_copy(old_loc.inode->gfid, old_loc.gfid); + + loc_copy(&local->loc, &old_loc); + loc_copy(&local->newloc, &loc); + + STACK_WIND(frame, trash_dir_rename_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rename, &old_loc, &loc, NULL); + return 0; - /*Used to find path of old trash directory*/ - STACK_WIND (frame, trash_notify_getxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->getxattr, &loc, - GET_ANCESTRY_PATH_KEY, xdata); - } +out: + if (frame) { + frame->local = NULL; + STACK_DESTROY(frame->root); + } - /* If there is no old trash directory we set its value to new one, - * which is the valid condition for trash directory creation - */ - else { - priv->oldtrash_dir = gf_strdup (priv->newtrash_dir); - if (!priv->oldtrash_dir) { - gf_log (this->name, GF_LOG_ERROR, "out of memory"); - goto out; - } - } + trash_local_wipe(local); -out: - loc_wipe (&loc); - return ret; + return ret; } int32_t -trash_internal_op_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +trash_internal_op_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - if (op_ret != 0) - gf_log (this->name, GF_LOG_ERROR, "mkdir failed for " - "internal op directory : %s", strerror (op_errno)); - return op_ret; + trash_local_t *local = NULL; + local = frame->local; + + if (op_ret != 0 && !(op_errno == EEXIST)) + gf_log(this->name, GF_LOG_ERROR, + "mkdir failed for " + "internal op directory : %s", + strerror(op_errno)); + + frame->local = NULL; + STACK_DESTROY(frame->root); + trash_local_wipe(local); + return op_ret; } /** * This is the call back of mkdir fop initated using STACK_WIND in - * notify function which is used to create trash directory in the brick - * when a volume starts.The frame of the mkdir must destroyed from - * this function itself since it was created by trash xlator + * notify/reconfigure function which is used to create trash directory + * in the brick when "trash" is on. The frame of the mkdir must + * destroyed from this function itself since it was created by trash xlator */ + int32_t -trash_notify_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +trash_dir_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - uuid_t *gfid_ptr = NULL; - loc_t loc = {0, }; - int ret = 0; - dict_t *dict = NULL; - char internal_op_path[PATH_MAX] = {0,}; - trash_private_t *priv = NULL; + trash_private_t *priv = NULL; + trash_local_t *local = NULL; - priv = this->private; - GF_VALIDATE_OR_GOTO ("trash", priv, out); + priv = this->private; - dict = dict_new (); - if (!dict) { - ret = -1; - goto out; - } - if ((op_ret == 0) || (op_ret == -1 && op_errno == EEXIST)) { - gfid_ptr = GF_CALLOC (1, sizeof(uuid_t), - gf_common_mt_uuid_t); - if (!gfid_ptr) { - ret = ENOMEM; - goto out; - } - gf_uuid_copy (*gfid_ptr, internal_op_gfid); - - gf_uuid_copy (loc.gfid, internal_op_gfid); - gf_uuid_copy (loc.pargfid, trash_gfid); - loc.name = gf_strdup ("internal_op"); - - if (!loc.name) { - gf_log (this->name, GF_LOG_DEBUG, - "out of memory"); - ret = ENOMEM; - goto out; - } - sprintf (internal_op_path, "%s%s", - priv->newtrash_dir, loc.name); - - loc.path = gf_strdup (internal_op_path); - - if (!loc.path) { - gf_log (this->name, GF_LOG_DEBUG, - "out of memory"); - ret = ENOMEM; - goto out; - } - - loc.inode = inode_new (priv->trash_itable); - loc.inode->ia_type = IA_IFDIR; - /* Fixed gfid is set for trash directory with - * this function - */ - ret = dict_set_dynptr (dict, "gfid-req", gfid_ptr, - sizeof (uuid_t)); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "setting key gfid-req failed"); - goto out; - } - - /* The mkdir call for creating trash directory */ - STACK_WIND (frame, trash_internal_op_mkdir_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->mkdir, &loc, 0755, - 0022, dict); - /* After creating we must call other notify functions */ - default_notify (this, GF_EVENT_CHILD_UP, NULL); - } else { - gf_log (this->name, GF_LOG_ERROR, "mkdir failed for trash" - " directory : %s", strerror (op_errno)); + local = frame->local; + + if (op_ret == 0) { + priv->oldtrash_dir = gf_strdup(priv->newtrash_dir); + if (!priv->oldtrash_dir) { + gf_log(this->name, GF_LOG_ERROR, "out of memory"); + op_ret = ENOMEM; } + } else if (op_ret != 0 && errno != EEXIST) + gf_log(this->name, GF_LOG_ERROR, + "mkdir failed for trash" + " directory : %s", + strerror(op_errno)); - STACK_DESTROY (frame->root); -out: - if (ret && gfid_ptr) - GF_FREE (gfid_ptr); - if (dict) - dict_unref (dict); - return 0; + frame->local = NULL; + STACK_DESTROY(frame->root); + trash_local_wipe(local); + return op_ret; } +/** + * This getxattr calls returns existing trash directory path in + * the dictionary + */ +int32_t +trash_dir_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata) +{ + data_t *data = NULL; + trash_private_t *priv = NULL; + int ret = 0; + trash_local_t *local = NULL; + + priv = this->private; + GF_VALIDATE_OR_GOTO("trash", priv, out); + + local = frame->local; + + data = dict_get(dict, GET_ANCESTRY_PATH_KEY); + if (!data) { + goto out; + } + priv->oldtrash_dir = GF_MALLOC(PATH_MAX, gf_common_mt_char); + if (!priv->oldtrash_dir) { + gf_log(this->name, GF_LOG_ERROR, "out of memory"); + ret = ENOMEM; + goto out; + } + /* appending '/' if it is not present */ + sprintf(priv->oldtrash_dir, "%s%c", data->data, + data->data[strlen(data->data) - 1] != '/' ? '/' : '\0'); + gf_log(this->name, GF_LOG_DEBUG, + "old trash directory path " + "is %s", + priv->oldtrash_dir); + if (strcmp(priv->newtrash_dir, priv->oldtrash_dir) != 0) { + /* When user set a new name for trash directory, trash + * xlator will perform a rename operation on old trash + * directory to the new one using a STACK_WIND from here. + * This option can be configured only when volume is in + * started state + */ + ret = rename_trash_directory(this); + } + +out: + frame->local = NULL; + STACK_DESTROY(frame->root); + trash_local_wipe(local); + return ret; +} +/** + * This is a nameless look up for internal op directory + * The lookup is based on gfid, because internal op directory + * has fixed gfid. + */ int32_t -trash_notify_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, - struct iatt *preoldparent, struct iatt *postoldparent, - struct iatt *prenewparent, struct iatt *postnewparent, - dict_t *xdata) +trash_internalop_dir_lookup_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, + struct iatt *postparent) { - if ((op_ret == 0) || (op_ret == -1 && op_errno == EEXIST)) { - /* After creating we must call other notify functions */ - default_notify (this, GF_EVENT_CHILD_UP, NULL); - } else { - gf_log (this->name, GF_LOG_ERROR, "rename failed: %s", - strerror (op_errno)); + trash_private_t *priv = NULL; + int ret = 0; + uuid_t *gfid_ptr = NULL; + loc_t loc = { + 0, + }; + char internal_op_path[PATH_MAX] = { + 0, + }; + dict_t *dict = NULL; + trash_local_t *local = NULL; + + priv = this->private; + GF_VALIDATE_OR_GOTO("trash", priv, out); + + local = frame->local; + if (op_ret != 0 && op_errno == ENOENT) { + loc_wipe(&local->loc); + gfid_ptr = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); + if (!gfid_ptr) { + ret = ENOMEM; + goto out; + } + + gf_uuid_copy(*gfid_ptr, internal_op_gfid); + + dict = dict_new(); + if (!dict) { + ret = ENOMEM; + goto out; + } + ret = dict_set_gfuuid(dict, "gfid-req", *gfid_ptr, false); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, "setting key gfid-req failed"); + goto out; + } + gf_uuid_copy(loc.gfid, internal_op_gfid); + gf_uuid_copy(loc.pargfid, trash_gfid); + + loc.inode = inode_new(priv->trash_itable); + + /* The mkdir call for creating internal op directory */ + loc.name = gf_strdup("internal_op"); + if (!loc.name) { + gf_log(this->name, GF_LOG_DEBUG, "out of memory"); + ret = ENOMEM; + goto out; + } + sprintf(internal_op_path, "%s%s/", priv->newtrash_dir, loc.name); + + loc.path = gf_strdup(internal_op_path); + if (!loc.path) { + gf_log(this->name, GF_LOG_DEBUG, "out of memory"); + ret = ENOMEM; + goto out; } - STACK_DESTROY (frame->root); - return op_ret; + loc_copy(&local->loc, &loc); + STACK_WIND(frame, trash_internal_op_mkdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mkdir, &loc, 0755, 0022, dict); + return 0; + } + +out: + if (ret && gfid_ptr) + GF_FREE(gfid_ptr); + if (dict) + dict_unref(dict); + frame->local = NULL; + STACK_DESTROY(frame->root); + trash_local_wipe(local); + return op_ret; } /** - * This is the call back of rename fop initated using STACK_WIND in - * reconfigure function which is used to rename trash directory in - * the brick when we perform volume set.This frame must destroyed - * from this function itself since it was created by trash xlator + * This is a nameless look up for old trash directory + * The lookup is based on gfid, because trash directory + * has fixed gfid. */ int32_t -trash_reconf_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, - struct iatt *preoldparent, struct iatt *postoldparent, - struct iatt *prenewparent, struct iatt *postnewparent, - dict_t *xdata) +trash_dir_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, struct iatt *postparent) { - if (op_ret == -1 && op_errno == EEXIST) { + trash_private_t *priv = NULL; + loc_t loc = { + 0, + }; + int ret = 0; + uuid_t *gfid_ptr = NULL; + dict_t *dict = NULL; + trash_local_t *local = NULL; + + priv = this->private; + GF_VALIDATE_OR_GOTO("trash", priv, out); + + local = frame->local; + + loc_wipe(&local->loc); + if (op_ret == 0) { + gf_log(this->name, GF_LOG_DEBUG, "inode found with gfid %s", + uuid_utoa(buf->ia_gfid)); - gf_log (this->name, GF_LOG_ERROR, "rename failed: %s", - strerror (op_errno)); + gf_uuid_copy(loc.gfid, trash_gfid); + + /* Find trash inode using available information */ + priv->trash_inode = inode_link(inode, NULL, NULL, buf); + + loc.inode = inode_ref(priv->trash_inode); + loc_copy(&local->loc, &loc); + + /*Used to find path of old trash directory*/ + STACK_WIND(frame, trash_dir_getxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->getxattr, &loc, + GET_ANCESTRY_PATH_KEY, xdata); + return 0; + } + + /* If there is no old trash directory we set its value to new one, + * which is the valid condition for trash directory creation + */ + else { + gf_log(this->name, GF_LOG_DEBUG, + "Creating trash " + "directory %s ", + priv->newtrash_dir); + + gfid_ptr = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); + if (!gfid_ptr) { + ret = ENOMEM; + goto out; + } + gf_uuid_copy(*gfid_ptr, trash_gfid); + + gf_uuid_copy(loc.gfid, trash_gfid); + gf_uuid_copy(loc.pargfid, root_gfid); + ret = extract_trash_directory(priv->newtrash_dir, &loc.name); + if (ret) { + gf_log(this->name, GF_LOG_DEBUG, "out of memory"); + goto out; + } + loc.path = gf_strdup(priv->newtrash_dir); + if (!loc.path) { + gf_log(this->name, GF_LOG_DEBUG, "out of memory"); + ret = ENOMEM; + goto out; + } + + priv->trash_inode = inode_new(priv->trash_itable); + priv->trash_inode->ia_type = IA_IFDIR; + loc.inode = inode_ref(priv->trash_inode); + dict = dict_new(); + if (!dict) { + ret = ENOMEM; + goto out; } + /* Fixed gfid is set for trash directory with + * this function + */ + ret = dict_set_gfuuid(dict, "gfid-req", *gfid_ptr, false); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, "setting key gfid-req failed"); + goto out; + } + loc_copy(&local->loc, &loc); + + /* The mkdir call for creating trash directory */ + STACK_WIND(frame, trash_dir_mkdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mkdir, &loc, 0755, 0022, dict); + return 0; + } +out: + if (ret && gfid_ptr) + GF_FREE(gfid_ptr); + if (dict) + dict_unref(dict); + frame->local = NULL; + STACK_DESTROY(frame->root); + trash_local_wipe(local); + return ret; +} + +int +create_or_rename_trash_directory(xlator_t *this) +{ + trash_private_t *priv = NULL; + int ret = 0; + loc_t loc = { + 0, + }; + call_frame_t *frame = NULL; + trash_local_t *local = NULL; + + priv = this->private; + + frame = create_frame(this, this->ctx->pool); + if (frame == NULL) { + gf_log(this->name, GF_LOG_ERROR, "failed to create frame"); + ret = ENOMEM; + goto out; + } + + local = mem_get0(this->local_pool); + if (!local) { + gf_log(this->name, GF_LOG_DEBUG, "out of memory"); + ret = ENOMEM; + goto out; + } + frame->local = local; + + loc.inode = inode_new(priv->trash_itable); + gf_uuid_copy(loc.gfid, trash_gfid); + loc_copy(&local->loc, &loc); + gf_log(this->name, GF_LOG_DEBUG, + "nameless lookup for" + "old trash directory"); + STACK_WIND(frame, trash_dir_lookup_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, &loc, NULL); +out: + return ret; +} - STACK_DESTROY (frame->root); +int +create_internalop_directory(xlator_t *this) +{ + trash_private_t *priv = NULL; + int ret = 0; + loc_t loc = { + 0, + }; + call_frame_t *frame = NULL; + trash_local_t *local = NULL; + + priv = this->private; + + frame = create_frame(this, this->ctx->pool); + if (frame == NULL) { + gf_log(this->name, GF_LOG_ERROR, "failed to create frame"); + ret = ENOMEM; + goto out; + } + + local = mem_get0(this->local_pool); + if (!local) { + gf_log(this->name, GF_LOG_DEBUG, "out of memory"); + ret = ENOMEM; + goto out; + } + frame->local = local; + + gf_uuid_copy(loc.gfid, internal_op_gfid); + gf_uuid_copy(loc.pargfid, trash_gfid); + loc.inode = inode_new(priv->trash_itable); + loc.inode->ia_type = IA_IFDIR; + + loc_copy(&local->loc, &loc); + STACK_WIND(frame, trash_internalop_dir_lookup_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, &loc, NULL); +out: - return op_ret; + return ret; } int32_t -trash_common_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +trash_common_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, inode, - buf, preparent, postparent, xdata); - return 0; + STACK_UNWIND_STRICT(mkdir, frame, op_ret, op_errno, inode, buf, preparent, + postparent, xdata); + return 0; } int32_t -trash_common_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, - struct iatt *preoldparent, struct iatt *postoldparent, - struct iatt *prenewparent, struct iatt *postnewparent, - dict_t *xdata) +trash_common_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + struct iatt *preoldparent, struct iatt *postoldparent, + struct iatt *prenewparent, struct iatt *postnewparent, + dict_t *xdata) { - STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, buf, preoldparent, - postoldparent, prenewparent, postnewparent, xdata); - return 0; + STACK_UNWIND_STRICT(rename, frame, op_ret, op_errno, buf, preoldparent, + postoldparent, prenewparent, postnewparent, xdata); + return 0; } int32_t -trash_common_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *preparent, - struct iatt *postparent, - dict_t *xdata) +trash_common_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno, preparent, - postparent, xdata); - return 0; + STACK_UNWIND_STRICT(rmdir, frame, op_ret, op_errno, preparent, postparent, + xdata); + return 0; } /** * move backs from trash translator to unlink call */ int32_t -trash_common_unwind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *preparent, struct iatt *postparent, - dict_t *xdata) +trash_common_unwind_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) { - TRASH_STACK_UNWIND (unlink, frame, op_ret, op_errno, preparent, - postparent, xdata); - return 0; + TRASH_STACK_UNWIND(unlink, frame, op_ret, op_errno, preparent, postparent, + xdata); + return 0; } /** @@ -573,160 +852,183 @@ trash_common_unwind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, * the starting */ int32_t -trash_unlink_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *stbuf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +trash_unlink_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *stbuf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - trash_local_t *local = NULL; - char *tmp_str = NULL; - char *tmp_path = NULL; - char *tmp_dirname = NULL; - char *tmp_stat = NULL; - char real_path[PATH_MAX] = {0,}; - char *dir_name = NULL; - size_t count = 0; - int32_t loop_count = 0; - int i = 0; - loc_t tmp_loc = {0,}; - trash_private_t *priv = NULL; - int ret = 0; - - priv = this->private; - GF_VALIDATE_OR_GOTO ("trash", priv, out); - - local = frame->local; - GF_VALIDATE_OR_GOTO ("trash", local, out); - - tmp_str = gf_strdup (local->newpath); - if (!tmp_str) { - gf_log (this->name, GF_LOG_ERROR, "out of memory"); - ret = -1; - goto out; - } - loop_count = local->loop_count; - - /* The directory is not present , need to create it */ - if ((op_ret == -1) && (op_errno == ENOENT)) { - tmp_dirname = strchr (tmp_str, '/'); - while (tmp_dirname) { - count = tmp_dirname - tmp_str; - if (count == 0) - count = 1; - i++; - if (i > loop_count) - break; - tmp_dirname = strchr (tmp_str + count + 1, '/'); - } - tmp_path = gf_memdup (local->newpath, count + 1); - if (!tmp_path) { - gf_log (this->name, GF_LOG_ERROR, "out of memory"); - ret = ENOMEM; - goto out; - } - tmp_path[count] = '\0'; - - loc_copy (&tmp_loc, &local->loc); - tmp_loc.path = gf_strdup (tmp_path); - if (!tmp_loc.path) { - gf_log (this->name, GF_LOG_ERROR, "out of memory"); - ret = ENOMEM; - goto out; - } - - /* Stores the the name of directory to be created */ - tmp_loc.name = gf_strdup (strrchr(tmp_path, '/') + 1); - if (!tmp_loc.name) { - gf_log (this->name, GF_LOG_ERROR, "out of memory"); - ret = ENOMEM; - goto out; - } - strcpy (real_path, priv->brick_path); - remove_trash_path (tmp_path, (frame->root->pid < 0), &tmp_stat); - if (tmp_stat) - strcat (real_path, tmp_stat); - STACK_WIND_COOKIE (frame, trash_unlink_mkdir_cbk, tmp_path, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->mkdir, - &tmp_loc, get_permission(real_path), - 0022, xdata); - loc_wipe (&tmp_loc); - goto out; - } - - /* Given path is created , comparing to the required path */ - if (op_ret == 0) { - dir_name = dirname (tmp_str); - if (strcmp((char *)cookie, dir_name) == 0) { - /* File path exists we can rename it*/ - loc_copy (&tmp_loc, &local->loc); - tmp_loc.path = local->newpath; - STACK_WIND (frame, trash_unlink_rename_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rename, - &local->loc, &tmp_loc, xdata); - goto out; - } - } - - LOCK (&frame->lock); - { - loop_count = ++local->loop_count; - } - UNLOCK (&frame->lock); - - tmp_dirname = strchr (tmp_str, '/'); - - /* Path is not completed , need to create remaining path */ + trash_local_t *local = NULL; + char *tmp_str = NULL; + char *tmp_path = NULL; + char *tmp_dirname = NULL; + char *tmp_stat = NULL; + char real_path[PATH_MAX] = { + 0, + }; + char *dir_name = NULL; + size_t count = 0; + int32_t loop_count = 0; + int i = 0; + loc_t tmp_loc = { + 0, + }; + trash_private_t *priv = NULL; + int ret = 0; + + priv = this->private; + GF_VALIDATE_OR_GOTO("trash", priv, out); + + local = frame->local; + GF_VALIDATE_OR_GOTO("trash", local, out); + + TRASH_UNSET_PID(frame, local); + + tmp_str = gf_strdup(local->newpath); + if (!tmp_str) { + gf_log(this->name, GF_LOG_ERROR, "out of memory"); + ret = -1; + goto out; + } + loop_count = local->loop_count; + + /* The directory is not present , need to create it */ + if ((op_ret == -1) && (op_errno == ENOENT)) { + tmp_dirname = strchr(tmp_str, '/'); while (tmp_dirname) { - count = tmp_dirname - tmp_str; - if (count == 0) - count = 1; - i++; - if (i > loop_count) - break; - tmp_dirname = strchr (tmp_str + count + 1, '/'); - } - tmp_path = gf_memdup (local->newpath, count + 1); + count = tmp_dirname - tmp_str; + if (count == 0) + count = 1; + i++; + if (i > loop_count) + break; + tmp_dirname = strchr(tmp_str + count + 1, '/'); + } + tmp_path = gf_memdup(local->newpath, count + 1); if (!tmp_path) { - gf_log (this->name, GF_LOG_ERROR, "out of memory"); - ret = -1; - goto out; + gf_log(this->name, GF_LOG_ERROR, "out of memory"); + ret = ENOMEM; + goto out; } tmp_path[count] = '\0'; - loc_copy (&tmp_loc, &local->loc); - tmp_loc.path = gf_strdup (tmp_path); + loc_copy(&tmp_loc, &local->loc); + tmp_loc.path = gf_strdup(tmp_path); if (!tmp_loc.path) { - gf_log (this->name, GF_LOG_ERROR, "out of memory"); - ret = -1; - goto out; + gf_log(this->name, GF_LOG_ERROR, "out of memory"); + ret = ENOMEM; + goto out; } /* Stores the the name of directory to be created */ - tmp_loc.name = gf_strdup (strrchr(tmp_path, '/') + 1); + tmp_loc.name = gf_strdup(strrchr(tmp_path, '/') + 1); if (!tmp_loc.name) { - gf_log (this->name, GF_LOG_ERROR, "out of memory"); - ret = -1; - goto out; + gf_log(this->name, GF_LOG_ERROR, "out of memory"); + ret = ENOMEM; + goto out; } + strncpy(real_path, priv->brick_path, sizeof(real_path)); + real_path[sizeof(real_path) - 1] = 0; - strcpy (real_path, priv->brick_path); - remove_trash_path (tmp_path, (frame->root->pid < 0), &tmp_stat); + remove_trash_path(tmp_path, (frame->root->pid < 0), &tmp_stat); if (tmp_stat) - strcat (real_path, tmp_stat); - - STACK_WIND_COOKIE (frame, trash_unlink_mkdir_cbk, tmp_path, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->mkdir, &tmp_loc, - get_permission(real_path), 0022, xdata); + strncat(real_path, tmp_stat, + sizeof(real_path) - strlen(real_path) - 1); + + TRASH_SET_PID(frame, local); + + STACK_WIND_COOKIE(frame, trash_unlink_mkdir_cbk, tmp_path, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir, + &tmp_loc, get_permission(real_path), 0022, xdata); + loc_wipe(&tmp_loc); + goto out; + } + + /* Given path is created , comparing to the required path */ + if (op_ret == 0) { + dir_name = dirname(tmp_str); + if (strcmp((char *)cookie, dir_name) == 0) { + /* File path exists we can rename it*/ + loc_copy(&tmp_loc, &local->loc); + tmp_loc.path = local->newpath; + STACK_WIND(frame, trash_unlink_rename_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rename, &local->loc, &tmp_loc, + xdata); + goto out; + } + } + + if ((op_ret == -1) && (op_errno != EEXIST)) { + gf_log(this->name, GF_LOG_ERROR, + "Directory creation failed [%s]. " + "Therefore unlinking %s without moving to trash " + "directory", + strerror(op_errno), local->loc.name); + STACK_WIND(frame, trash_common_unwind_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, &local->loc, 0, xdata); + goto out; + } + + LOCK(&frame->lock); + { + loop_count = ++local->loop_count; + } + UNLOCK(&frame->lock); + + tmp_dirname = strchr(tmp_str, '/'); + + /* Path is not completed , need to create remaining path */ + while (tmp_dirname) { + count = tmp_dirname - tmp_str; + if (count == 0) + count = 1; + i++; + if (i > loop_count) + break; + tmp_dirname = strchr(tmp_str + count + 1, '/'); + } + tmp_path = gf_memdup(local->newpath, count + 1); + if (!tmp_path) { + gf_log(this->name, GF_LOG_ERROR, "out of memory"); + ret = -1; + goto out; + } + tmp_path[count] = '\0'; + + loc_copy(&tmp_loc, &local->loc); + tmp_loc.path = gf_strdup(tmp_path); + if (!tmp_loc.path) { + gf_log(this->name, GF_LOG_ERROR, "out of memory"); + ret = -1; + goto out; + } + + /* Stores the the name of directory to be created */ + tmp_loc.name = gf_strdup(strrchr(tmp_path, '/') + 1); + if (!tmp_loc.name) { + gf_log(this->name, GF_LOG_ERROR, "out of memory"); + ret = -1; + goto out; + } + + strncpy(real_path, priv->brick_path, sizeof(real_path)); + real_path[sizeof(real_path) - 1] = 0; + + remove_trash_path(tmp_path, (frame->root->pid < 0), &tmp_stat); + if (tmp_stat) + strncat(real_path, tmp_stat, sizeof(real_path) - strlen(real_path) - 1); + + TRASH_SET_PID(frame, local); + + STACK_WIND_COOKIE(frame, trash_unlink_mkdir_cbk, tmp_path, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir, + &tmp_loc, get_permission(real_path), 0022, xdata); out: - if (tmp_path) - GF_FREE (tmp_path); - if (tmp_str) - GF_FREE (tmp_str); - return ret; + if (tmp_path) + GF_FREE(tmp_path); + if (tmp_str) + GF_FREE(tmp_str); + return ret; } /** @@ -734,249 +1036,238 @@ out: * from trash directory as mentioned in the mount point */ int32_t -trash_unlink_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, - struct iatt *preoldparent, struct iatt *postoldparent, - struct iatt *prenewparent, struct iatt *postnewparent, - dict_t *xdata) +trash_unlink_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + struct iatt *preoldparent, struct iatt *postoldparent, + struct iatt *prenewparent, struct iatt *postnewparent, + dict_t *xdata) { - trash_local_t *local = NULL; - trash_private_t *priv = NULL; - char *tmp_str = NULL; - char *dir_name = NULL; - char *tmp_cookie = NULL; - loc_t tmp_loc = {0,}; - dict_t *new_xdata = NULL; - char *tmp_stat = NULL; - char real_path[PATH_MAX] = {0,}; - int ret = 0; - - priv = this->private; - GF_VALIDATE_OR_GOTO ("trash", priv, out); - - local = frame->local; - GF_VALIDATE_OR_GOTO ("trash", local, out); - - if ((op_ret == -1) && (op_errno == ENOENT)) { - /* the file path doesnot exists we want to create path - * for the file - */ - tmp_str = gf_strdup (local->newpath); - if (!tmp_str) { - gf_log (this->name, GF_LOG_DEBUG, "out of memory"); - ret = ENOMEM; - goto out; - } - dir_name = dirname (tmp_str); /* stores directory name */ - - loc_copy (&tmp_loc, &local->loc); - tmp_loc.path = gf_strdup (dir_name); - if (!tmp_loc.path) { - gf_log (this->name, GF_LOG_ERROR, "out of memory"); - ret = ENOMEM; - goto out; - } - - tmp_cookie = gf_strdup (dir_name); - if (!tmp_cookie) { - gf_log (this->name, GF_LOG_DEBUG, "out of memory"); - ret = ENOMEM; - goto out; - } - strcpy (real_path, priv->brick_path); - remove_trash_path (tmp_str, (frame->root->pid < 0), &tmp_stat); - if (tmp_stat) - strcat (real_path, tmp_stat); - /* create the directory with proper permissions */ - STACK_WIND_COOKIE (frame, trash_unlink_mkdir_cbk, tmp_cookie, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->mkdir, - &tmp_loc, get_permission(real_path), - 0022, xdata); - loc_wipe (&tmp_loc); - goto out; + trash_local_t *local = NULL; + trash_private_t *priv = NULL; + char *tmp_str = NULL; + char *dir_name = NULL; + char *tmp_cookie = NULL; + loc_t tmp_loc = { + 0, + }; + dict_t *new_xdata = NULL; + char *tmp_stat = NULL; + char real_path[PATH_MAX] = { + 0, + }; + int ret = 0; + + priv = this->private; + GF_VALIDATE_OR_GOTO("trash", priv, out); + + local = frame->local; + GF_VALIDATE_OR_GOTO("trash", local, out); + + if ((op_ret == -1) && (op_errno == ENOENT)) { + /* the file path does not exist we want to create path + * for the file + */ + tmp_str = gf_strdup(local->newpath); + if (!tmp_str) { + gf_log(this->name, GF_LOG_DEBUG, "out of memory"); + ret = ENOMEM; + goto out; } + dir_name = dirname(tmp_str); /* stores directory name */ - if ((op_ret == -1) && (op_errno == ENOTDIR)) { - /* if entry is already present in trash directory, - * new one is not copied*/ - gf_log (this->name, GF_LOG_DEBUG, - "target(%s) exists, cannot keep the copy, deleting", - local->newpath); - - STACK_WIND (frame, trash_common_unwind_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, - &local->loc, 0, xdata); - - goto out; + loc_copy(&tmp_loc, &local->loc); + tmp_loc.path = gf_strdup(dir_name); + if (!tmp_loc.path) { + gf_log(this->name, GF_LOG_ERROR, "out of memory"); + ret = ENOMEM; + goto out; } - if ((op_ret == -1) && (op_errno == EISDIR)) { - - /* if entry is directory,we remove directly */ - gf_log (this->name, GF_LOG_DEBUG, - "target(%s) exists as directory, cannot keep copy, " - "deleting", local->newpath); - - STACK_WIND (frame, trash_common_unwind_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, - &local->loc, 0, xdata); - goto out; + tmp_cookie = gf_strdup(dir_name); + if (!tmp_cookie) { + gf_log(this->name, GF_LOG_DEBUG, "out of memory"); + ret = ENOMEM; + goto out; } - - /********************************************************************** - * - * CTR Xlator message handling done here! - * - **********************************************************************/ - /** - * If unlink is handled by trash translator, it should inform the - * CTR Xlator. And trash translator only handles the unlink for - * the last hardlink. - * - * Check if there is a GF_REQUEST_LINK_COUNT_XDATA from CTR Xlator - * - */ - - if (local->ctr_link_count_req) { - - /* Sending back inode link count to ctr_unlink - * (changetimerecoder xlator) via - * "GF_RESPONSE_LINK_COUNT_XDATA" key using xdata. - * */ - if (xdata) { - ret = dict_set_uint32 (xdata, - GF_RESPONSE_LINK_COUNT_XDATA, - 1); - if (ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "Failed to set" - " GF_RESPONSE_LINK_COUNT_XDATA"); - } - } else { - new_xdata = dict_new (); - if (!new_xdata) { - gf_log (this->name, GF_LOG_WARNING, - "Memory allocation failure while " - "creating new_xdata"); - goto ctr_out; - } - ret = dict_set_uint32 (new_xdata, - GF_RESPONSE_LINK_COUNT_XDATA, - 1); - if (ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "Failed to set" - " GF_RESPONSE_LINK_COUNT_XDATA"); - } -ctr_out: - TRASH_STACK_UNWIND (unlink, frame, 0, op_errno, - &local->preparent, - &local->postparent, new_xdata); - goto out; - } - } - /* All other cases, unlink should return success */ - TRASH_STACK_UNWIND (unlink, frame, 0, op_errno, &local->preparent, - &local->postparent, xdata); + strncpy(real_path, priv->brick_path, sizeof(real_path)); + real_path[sizeof(real_path) - 1] = 0; + remove_trash_path(tmp_str, (frame->root->pid < 0), &tmp_stat); + if (tmp_stat) + strncat(real_path, tmp_stat, + sizeof(real_path) - strlen(real_path) - 1); + + TRASH_SET_PID(frame, local); + + /* create the directory with proper permissions */ + STACK_WIND_COOKIE(frame, trash_unlink_mkdir_cbk, tmp_cookie, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir, + &tmp_loc, get_permission(real_path), 0022, xdata); + loc_wipe(&tmp_loc); + goto out; + } + + if ((op_ret == -1) && (op_errno == ENOTDIR)) { + /* if entry is already present in trash directory, + * new one is not copied*/ + gf_log(this->name, GF_LOG_DEBUG, + "target(%s) exists, cannot keep the copy, deleting", + local->newpath); + + STACK_WIND(frame, trash_common_unwind_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, &local->loc, 0, xdata); + + goto out; + } + + if ((op_ret == -1) && (op_errno == EISDIR)) { + /* if entry is directory,we remove directly */ + gf_log(this->name, GF_LOG_DEBUG, + "target(%s) exists as directory, cannot keep copy, " + "deleting", + local->newpath); + + STACK_WIND(frame, trash_common_unwind_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, &local->loc, 0, xdata); + goto out; + } + + /********************************************************************** + * + * CTR Xlator message handling done here! + * + **********************************************************************/ + /** + * If unlink is handled by trash translator, it should inform the + * CTR Xlator. And trash translator only handles the unlink for + * the last hardlink. + * + * Check if there is a GF_REQUEST_LINK_COUNT_XDATA from CTR Xlator + * + */ + + if (local->ctr_link_count_req) { + /* Sending back inode link count to ctr_unlink + * (changetimerecoder xlator) via + * "GF_RESPONSE_LINK_COUNT_XDATA" key using xdata. + * */ + if (xdata) { + ret = dict_set_uint32(xdata, GF_RESPONSE_LINK_COUNT_XDATA, 1); + if (ret == -1) { + gf_log(this->name, GF_LOG_WARNING, + "Failed to set" + " GF_RESPONSE_LINK_COUNT_XDATA"); + } + } else { + new_xdata = dict_new(); + if (!new_xdata) { + gf_log(this->name, GF_LOG_WARNING, + "Memory allocation failure while " + "creating new_xdata"); + goto ctr_out; + } + ret = dict_set_uint32(new_xdata, GF_RESPONSE_LINK_COUNT_XDATA, 1); + if (ret == -1) { + gf_log(this->name, GF_LOG_WARNING, + "Failed to set" + " GF_RESPONSE_LINK_COUNT_XDATA"); + } + ctr_out: + TRASH_STACK_UNWIND(unlink, frame, 0, op_errno, preoldparent, + postoldparent, new_xdata); + goto out; + } + } + /* All other cases, unlink should return success */ + TRASH_STACK_UNWIND(unlink, frame, 0, op_errno, preoldparent, postoldparent, + xdata); out: - if (tmp_str) - GF_FREE (tmp_str); - if (tmp_cookie) - GF_FREE (tmp_cookie); - if (new_xdata) - dict_unref (new_xdata); + if (tmp_str) + GF_FREE(tmp_str); + if (tmp_cookie) + GF_FREE(tmp_cookie); + if (new_xdata) + dict_unref(new_xdata); - return ret; + return ret; } /** * move backs from trash translator to truncate call */ int32_t -trash_common_unwind_buf_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *prebuf, struct iatt *postbuf, - dict_t *xdata) +trash_common_unwind_buf_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *prebuf, struct iatt *postbuf, + dict_t *xdata) { - TRASH_STACK_UNWIND (truncate, frame, op_ret, op_errno, prebuf, - postbuf, xdata); - return 0; + TRASH_STACK_UNWIND(truncate, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + return 0; } - - int32_t -trash_unlink_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, - dict_t *xdata) +trash_unlink_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + dict_t *xdata) { - trash_private_t *priv = NULL; - trash_local_t *local = NULL; - loc_t new_loc = {0,}; - int ret = 0; - - priv = this->private; - GF_VALIDATE_OR_GOTO ("trash", priv, out); - - local = frame->local; - GF_VALIDATE_OR_GOTO ("trash", local, out); - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, "%s: %s", - local->loc.path, strerror (op_errno)); - TRASH_STACK_UNWIND (unlink, frame, op_ret, op_errno, buf, - NULL, xdata); - ret = -1; - goto out; - } - - /* Only last hardlink will be moved to trash directory */ - if (buf->ia_nlink > 1) { - STACK_WIND (frame, trash_common_unwind_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, &local->loc, - 0, xdata); - goto out; - } - - /* if the file is too big just unlink it */ - if (buf->ia_size > (priv->max_trash_file_size)) { - gf_log (this->name, GF_LOG_DEBUG, - "%s: file size too big (%"PRId64") to " - "move into trash directory", - local->loc.path, buf->ia_size); - - STACK_WIND (frame, trash_common_unwind_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, &local->loc, - 0, xdata); - goto out; - } - - /* Copies new path for renaming */ - loc_copy (&new_loc, &local->loc); - new_loc.path = gf_strdup (local->newpath); - if (!new_loc.path) { - gf_log (this->name, GF_LOG_DEBUG, "out of memory"); - ret = ENOMEM; - goto out; - } - - - STACK_WIND (frame, trash_unlink_rename_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rename, - &local->loc, &new_loc, xdata); + trash_private_t *priv = NULL; + trash_local_t *local = NULL; + loc_t new_loc = { + 0, + }; + int ret = 0; + + priv = this->private; + GF_VALIDATE_OR_GOTO("trash", priv, out); + + local = frame->local; + GF_VALIDATE_OR_GOTO("trash", local, out); + + if (op_ret == -1) { + gf_log(this->name, GF_LOG_DEBUG, "%s: %s", local->loc.path, + strerror(op_errno)); + TRASH_STACK_UNWIND(unlink, frame, op_ret, op_errno, buf, NULL, xdata); + ret = -1; + goto out; + } + + /* Only last hardlink will be moved to trash directory */ + if (buf->ia_nlink > 1) { + STACK_WIND(frame, trash_common_unwind_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, &local->loc, 0, xdata); + goto out; + } + + /* if the file is too big just unlink it */ + if (buf->ia_size > (priv->max_trash_file_size)) { + gf_log(this->name, GF_LOG_DEBUG, + "%s: file size too big (%" PRId64 + ") to " + "move into trash directory", + local->loc.path, buf->ia_size); + + STACK_WIND(frame, trash_common_unwind_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, &local->loc, 0, xdata); + goto out; + } + + /* Copies new path for renaming */ + loc_copy(&new_loc, &local->loc); + new_loc.path = gf_strdup(local->newpath); + if (!new_loc.path) { + gf_log(this->name, GF_LOG_DEBUG, "out of memory"); + ret = ENOMEM; + goto out; + } + + STACK_WIND(frame, trash_unlink_rename_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rename, &local->loc, &new_loc, xdata); out: - loc_wipe (&new_loc); - - return ret; + loc_wipe(&new_loc); + return ret; } /** @@ -984,274 +1275,248 @@ out: * by internal operations of gluster such as self-heal */ int32_t -trash_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags, - dict_t *xdata) +trash_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags, + dict_t *xdata) { - trash_private_t *priv = NULL; - trash_local_t *local = NULL;/* files inside trash */ - int32_t match = 0; - int32_t ctr_link_req = 0; - char *pathbuf = NULL; - int ret = 0; - - priv = this->private; - GF_VALIDATE_OR_GOTO ("trash", priv, out); - - /* If trash is not active or not enabled through cli, then - * we bypass and wind back - */ - if (!priv->state) { - STACK_WIND (frame, trash_common_unwind_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, loc, 0, - xdata); - goto out; - } - - /* The files removed by gluster internal operations such as self-heal, - * should moved to trash directory , but files by client should not - * moved - */ - if ((frame->root->pid < 0) && !priv->internal) { - STACK_WIND (frame, trash_common_unwind_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, loc, 0, - xdata); - goto out; - } - /* loc need some gfid which will be present in inode */ - gf_uuid_copy (loc->gfid, loc->inode->gfid); - - /* Checking for valid location */ - if (gf_uuid_is_null (loc->gfid) && gf_uuid_is_null (loc->inode->gfid)) { - gf_log (this->name, GF_LOG_DEBUG, "Bad address"); - STACK_WIND (frame, trash_common_unwind_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, loc, 0, - xdata); - ret = EFAULT; - goto out; - } - - /* This will be more accurate */ - inode_path (loc->inode, NULL, &pathbuf); - /* Check whether the file is present under eliminate paths or - * inside trash directory. In both cases we don't need to move the - * file to trash directory. Instead delete it permanently - */ - match = check_whether_eliminate_path (priv->eliminate, pathbuf); - if ((strncmp (pathbuf, priv->newtrash_dir, - strlen (priv->newtrash_dir)) == 0) || (match)) { - if (match) { - gf_log (this->name, GF_LOG_DEBUG, - "%s is a file comes under an eliminate path, " - "so it is not moved to trash", loc->name); - } - - /* Trying to unlink from the trash-dir. So do the - * actual unlink without moving to trash-dir. - */ - STACK_WIND (frame, trash_common_unwind_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, loc, 0, - xdata); - goto out; - } - - local = mem_get0 (this->local_pool); - if (!local) { - gf_log (this->name, GF_LOG_DEBUG, "out of memory"); - TRASH_STACK_UNWIND (unlink, frame, -1, ENOMEM, NULL, NULL, - xdata); - ret = ENOMEM; - goto out; - } - frame->local = local; - loc_copy (&local->loc, loc); - - /* rename new location of file as starting from trash directory */ - strcpy (local->origpath, pathbuf); - copy_trash_path (priv->newtrash_dir, (frame->root->pid < 0), - local->newpath); - strcat (local->newpath, pathbuf); - - /* append timestamp to file name so that we can avoid - * name collisions inside trash + trash_private_t *priv = NULL; + trash_local_t *local = NULL; /* files inside trash */ + int32_t match = 0; + int32_t ctr_link_req = 0; + char *pathbuf = NULL; + int ret = 0; + + priv = this->private; + GF_VALIDATE_OR_GOTO("trash", priv, out); + + /* If trash is not active or not enabled through cli, then + * we bypass and wind back + */ + if (!priv->state) { + STACK_WIND(frame, trash_common_unwind_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, loc, 0, xdata); + goto out; + } + + /* The files removed by gluster internal operations such as self-heal, + * should moved to trash directory , but files by client should not + * moved + */ + if ((frame->root->pid < 0) && !priv->internal) { + STACK_WIND(frame, trash_common_unwind_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, loc, 0, xdata); + goto out; + } + /* loc need some gfid which will be present in inode */ + gf_uuid_copy(loc->gfid, loc->inode->gfid); + + /* Checking for valid location */ + if (gf_uuid_is_null(loc->gfid) && gf_uuid_is_null(loc->inode->gfid)) { + gf_log(this->name, GF_LOG_DEBUG, "Bad address"); + STACK_WIND(frame, trash_common_unwind_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, loc, 0, xdata); + ret = EFAULT; + goto out; + } + + /* This will be more accurate */ + inode_path(loc->inode, NULL, &pathbuf); + /* Check whether the file is present under eliminate paths or + * inside trash directory. In both cases we don't need to move the + * file to trash directory. Instead delete it permanently + */ + match = check_whether_eliminate_path(priv->eliminate, pathbuf); + if ((strncmp(pathbuf, priv->newtrash_dir, strlen(priv->newtrash_dir)) == + 0) || + (match)) { + if (match) { + gf_log(this->name, GF_LOG_DEBUG, + "%s is a file comes under an eliminate path, " + "so it is not moved to trash", + loc->name); + } + + /* Trying to unlink from the trash-dir. So do the + * actual unlink without moving to trash-dir. */ - append_time_stamp (local->newpath); - if (strlen (local->newpath) > PATH_MAX) { - STACK_WIND (frame, trash_common_unwind_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, loc, 0, - xdata); - goto out; - } - - /* To know whether CTR xlator requested for the link count */ - ret = dict_get_int32 (xdata, GF_REQUEST_LINK_COUNT_XDATA, - &ctr_link_req); - if (ret) { - local->ctr_link_count_req = _gf_false; - ret = 0; - } else - local->ctr_link_count_req = _gf_true; + STACK_WIND(frame, trash_common_unwind_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, loc, 0, xdata); + goto out; + } + + local = mem_get0(this->local_pool); + if (!local) { + gf_log(this->name, GF_LOG_DEBUG, "out of memory"); + TRASH_STACK_UNWIND(unlink, frame, -1, ENOMEM, NULL, NULL, xdata); + ret = ENOMEM; + goto out; + } + frame->local = local; + loc_copy(&local->loc, loc); + + /* rename new location of file as starting from trash directory */ + copy_trash_path(priv->newtrash_dir, (frame->root->pid < 0), local->newpath, + sizeof(local->newpath)); + strncat(local->newpath, pathbuf, + sizeof(local->newpath) - strlen(local->newpath) - 1); + + /* append timestamp to file name so that we can avoid + * name collisions inside trash + */ + append_time_stamp(local->newpath, sizeof(local->newpath)); + if (strlen(local->newpath) > PATH_MAX) { + STACK_WIND(frame, trash_common_unwind_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, loc, 0, xdata); + goto out; + } + + /* To know whether CTR xlator requested for the link count */ + ret = dict_get_int32(xdata, GF_REQUEST_LINK_COUNT_XDATA, &ctr_link_req); + if (ret) { + local->ctr_link_count_req = _gf_false; + ret = 0; + } else + local->ctr_link_count_req = _gf_true; - LOCK_INIT (&frame->lock); + LOCK_INIT(&frame->lock); - STACK_WIND (frame, trash_unlink_stat_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->stat, loc, xdata); + STACK_WIND(frame, trash_unlink_stat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->stat, loc, xdata); out: - return ret; + return ret; } /** * Use this when a failure occurs, and delete the newly created file */ int32_t -trash_truncate_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *preparent, struct iatt *postparent, - dict_t *xdata) +trash_truncate_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) { - trash_local_t *local = NULL; + trash_local_t *local = NULL; - local = frame->local; - GF_VALIDATE_OR_GOTO ("trash", local, out); + local = frame->local; + GF_VALIDATE_OR_GOTO("trash", local, out); - if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, - "deleting the newly created file: %s", - strerror (op_errno)); - } + if (op_ret == -1) { + gf_log(this->name, GF_LOG_DEBUG, "deleting the newly created file: %s", + strerror(op_errno)); + } - STACK_WIND (frame, trash_common_unwind_buf_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->truncate, - &local->loc, local->fop_offset, xdata); + STACK_WIND(frame, trash_common_unwind_buf_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, &local->loc, + local->fop_offset, xdata); out: - return 0; + return 0; } /** * Read from source file */ int32_t -trash_truncate_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iovec *vector, int32_t count, - struct iatt *stbuf, struct iobref *iobuf, - dict_t *xdata) +trash_truncate_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iovec *vector, + int32_t count, struct iatt *stbuf, + struct iobref *iobuf, dict_t *xdata) { + trash_local_t *local = NULL; - trash_local_t *local = NULL; - - local = frame->local; - GF_VALIDATE_OR_GOTO ("trash", local, out); + local = frame->local; + GF_VALIDATE_OR_GOTO("trash", local, out); - if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, - "readv on the existing file failed: %s", - strerror (op_errno)); + if (op_ret == -1) { + gf_log(this->name, GF_LOG_DEBUG, + "readv on the existing file failed: %s", strerror(op_errno)); - STACK_WIND (frame, trash_truncate_unlink_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->unlink, - &local->newloc, 0, xdata); - goto out; - } + STACK_WIND(frame, trash_truncate_unlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, &local->newloc, 0, xdata); + goto out; + } - local->fsize = stbuf->ia_size; - STACK_WIND (frame, trash_truncate_writev_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->writev, - local->newfd, vector, count, local->cur_offset, 0, iobuf, - xdata); + local->fsize = stbuf->ia_size; + STACK_WIND(frame, trash_truncate_writev_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->writev, local->newfd, vector, count, + local->cur_offset, 0, iobuf, xdata); out: - return 0; - + return 0; } /** * Write to file created in trash directory */ int32_t -trash_truncate_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *prebuf, struct iatt *postbuf, - dict_t *xdata) +trash_truncate_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - trash_local_t *local = NULL; - - local = frame->local; - GF_VALIDATE_OR_GOTO ("trash", local, out); - - if (op_ret == -1) { - /* Let truncate work, but previous copy is not preserved. */ - gf_log (this->name, GF_LOG_DEBUG, - "writev on the existing file failed: %s", - strerror (op_errno)); - - STACK_WIND (frame, trash_truncate_unlink_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, &local->newloc, 0, - xdata); - goto out; - } - - if (local->cur_offset < local->fsize) { - local->cur_offset += GF_BLOCK_READV_SIZE; - /* Loop back and Read the contents again. */ - STACK_WIND (frame, trash_truncate_readv_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->readv, - local->fd, (size_t)GF_BLOCK_READV_SIZE, - local->cur_offset, 0, xdata); - goto out; - } - - - /* OOFH.....Finally calling Truncate. */ - STACK_WIND (frame, trash_common_unwind_buf_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->truncate, &local->loc, - local->fop_offset, xdata); + trash_local_t *local = NULL; + + local = frame->local; + GF_VALIDATE_OR_GOTO("trash", local, out); + + if (op_ret == -1) { + /* Let truncate work, but previous copy is not preserved. */ + gf_log(this->name, GF_LOG_DEBUG, + "writev on the existing file failed: %s", strerror(op_errno)); + + STACK_WIND(frame, trash_truncate_unlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, &local->newloc, 0, xdata); + goto out; + } + + if (local->cur_offset < local->fsize) { + local->cur_offset += GF_BLOCK_READV_SIZE; + /* Loop back and Read the contents again. */ + STACK_WIND(frame, trash_truncate_readv_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readv, local->fd, + (size_t)GF_BLOCK_READV_SIZE, local->cur_offset, 0, xdata); + goto out; + } + + /* OOFH.....Finally calling Truncate. */ + STACK_WIND(frame, trash_common_unwind_buf_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, &local->loc, + local->fop_offset, xdata); out: - return 0; + return 0; } /** * The source file is opened for reading and writing */ int32_t -trash_truncate_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd, - dict_t *xdata) +trash_truncate_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, + dict_t *xdata) { - trash_local_t *local = NULL; + trash_local_t *local = NULL; - local = frame->local; - GF_VALIDATE_OR_GOTO ("trash", local, out); + local = frame->local; + GF_VALIDATE_OR_GOTO("trash", local, out); - if (op_ret == -1) { - /* Let truncate work, but previous copy is not preserved. */ - gf_log (this->name, GF_LOG_DEBUG, - "open on the existing file failed: %s", - strerror (op_errno)); + if (op_ret == -1) { + /* Let truncate work, but previous copy is not preserved. */ + gf_log(this->name, GF_LOG_DEBUG, "open on the existing file failed: %s", + strerror(op_errno)); - STACK_WIND (frame, trash_truncate_unlink_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->unlink, - &local->newloc, 0, xdata); - goto out; - } + STACK_WIND(frame, trash_truncate_unlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, &local->newloc, 0, xdata); + goto out; + } - fd_bind (fd); + fd_bind(fd); - local->cur_offset = 0; + local->cur_offset = 0; - STACK_WIND (frame, trash_truncate_readv_cbk, - FIRST_CHILD (this), FIRST_CHILD (this)->fops->readv, - local->fd, (size_t)GF_BLOCK_READV_SIZE, local->cur_offset, - 0, xdata); + STACK_WIND(frame, trash_truncate_readv_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readv, local->fd, + (size_t)GF_BLOCK_READV_SIZE, local->cur_offset, 0, xdata); out: - return 0; + return 0; } /** @@ -1259,96 +1524,104 @@ out: * if the path is present in trash directory */ int32_t -trash_truncate_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd, - inode_t *inode, struct iatt *buf, - struct iatt *preparent, struct iatt *postparent, - dict_t *xdata) +trash_truncate_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, + inode_t *inode, struct iatt *buf, + struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) { - trash_local_t *local = NULL; - char *tmp_str = NULL; - char *dir_name = NULL; - char *tmp_path = NULL; - int32_t flags = 0; - loc_t tmp_loc = {0,}; - char *tmp_stat = NULL; - char real_path[PATH_MAX] = {0,}; - trash_private_t *priv = NULL; - - priv = this->private; - GF_VALIDATE_OR_GOTO ("trash", priv, out); - - local = frame->local; - GF_VALIDATE_OR_GOTO ("trash", local, out); - - /* Checks whether path is present in trash directory or not */ - - if ((op_ret == -1) && (op_errno == ENOENT)) { - /* Creating the directory structure here. */ - tmp_str = gf_strdup (local->newpath); - if (!tmp_str) { - gf_log (this->name, GF_LOG_DEBUG, "out of memory"); - goto out; - } - dir_name = dirname (tmp_str); - - tmp_path = gf_strdup (dir_name); - if (!tmp_path) { - gf_log (this->name, GF_LOG_DEBUG, "out of memory"); - goto out; - } - loc_copy (&tmp_loc, &local->newloc); - tmp_loc.path = gf_strdup (tmp_path); - if (!tmp_loc.path) { - gf_log (this->name, GF_LOG_DEBUG, "out of memory"); - goto out; - } - strcpy (real_path, priv->brick_path); - remove_trash_path (tmp_path, (frame->root->pid < 0), &tmp_stat); - if (tmp_stat) - strcat (real_path, tmp_stat); - /* create the directory with proper permissions */ - STACK_WIND_COOKIE (frame, trash_truncate_mkdir_cbk, - tmp_path, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->mkdir, - &tmp_loc, get_permission(real_path), - 0022, xdata); - loc_wipe (&tmp_loc); - goto out; + trash_local_t *local = NULL; + char *tmp_str = NULL; + char *dir_name = NULL; + char *tmp_path = NULL; + int32_t flags = 0; + loc_t tmp_loc = { + 0, + }; + char *tmp_stat = NULL; + char real_path[PATH_MAX] = { + 0, + }; + trash_private_t *priv = NULL; + + priv = this->private; + GF_VALIDATE_OR_GOTO("trash", priv, out); + + local = frame->local; + GF_VALIDATE_OR_GOTO("trash", local, out); + + TRASH_UNSET_PID(frame, local); + + /* Checks whether path is present in trash directory or not */ + + if ((op_ret == -1) && (op_errno == ENOENT)) { + /* Creating the directory structure here. */ + tmp_str = gf_strdup(local->newpath); + if (!tmp_str) { + gf_log(this->name, GF_LOG_DEBUG, "out of memory"); + goto out; } + dir_name = dirname(tmp_str); - if (op_ret == -1) { - /* Let truncate work, but previous copy is not preserved. - * Deleting the newly created copy. - */ - gf_log (this->name, GF_LOG_DEBUG, - "creation of new file in trash-dir failed, " - "when truncate was called: %s", strerror (op_errno)); - - STACK_WIND (frame, trash_common_unwind_buf_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->truncate, &local->loc, - local->fop_offset, xdata); - goto out; + tmp_path = gf_strdup(dir_name); + if (!tmp_path) { + gf_log(this->name, GF_LOG_DEBUG, "out of memory"); + goto out; + } + loc_copy(&tmp_loc, &local->newloc); + tmp_loc.path = gf_strdup(tmp_path); + if (!tmp_loc.path) { + gf_log(this->name, GF_LOG_DEBUG, "out of memory"); + goto out; } + strncpy(real_path, priv->brick_path, sizeof(real_path)); + real_path[sizeof(real_path) - 1] = 0; + remove_trash_path(tmp_path, (frame->root->pid < 0), &tmp_stat); + if (tmp_stat) + strncat(real_path, tmp_stat, + sizeof(real_path) - strlen(real_path) - 1); + + TRASH_SET_PID(frame, local); + + /* create the directory with proper permissions */ + STACK_WIND_COOKIE(frame, trash_truncate_mkdir_cbk, tmp_path, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir, + &tmp_loc, get_permission(real_path), 0022, xdata); + loc_wipe(&tmp_loc); + goto out; + } + + if (op_ret == -1) { + /* Let truncate work, but previous copy is not preserved. + * Deleting the newly created copy. + */ + gf_log(this->name, GF_LOG_DEBUG, + "creation of new file in trash-dir failed, " + "when truncate was called: %s", + strerror(op_errno)); + + STACK_WIND(frame, trash_common_unwind_buf_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, &local->loc, + local->fop_offset, xdata); + goto out; + } - fd_bind (fd); - flags = O_RDONLY; + fd_bind(fd); + flags = O_RDONLY; - /* fd which represents source file for reading and writing from it */ + /* fd which represents source file for reading and writing from it */ - local->fd = fd_create (local->loc.inode, frame->root->pid); + local->fd = fd_create(local->loc.inode, frame->root->pid); - STACK_WIND (frame, trash_truncate_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, &local->loc, flags, - local->fd, 0); + STACK_WIND(frame, trash_truncate_open_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->open, &local->loc, flags, local->fd, 0); out: - if (tmp_str) - GF_FREE (tmp_str); - if (tmp_path) - GF_FREE (tmp_path); + if (tmp_str) + GF_FREE(tmp_str); + if (tmp_path) + GF_FREE(tmp_path); - return 0; + return 0; } /** @@ -1357,294 +1630,316 @@ out: * beginning */ int32_t -trash_truncate_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *stbuf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +trash_truncate_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *stbuf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - trash_local_t *local = NULL; - trash_private_t *priv = NULL; - char *tmp_str = NULL; - char *tmp_path = NULL; - char *tmp_dirname = NULL; - char *dir_name = NULL; - char *tmp_stat = NULL; - char real_path[PATH_MAX] = {0,}; - size_t count = 0; - int32_t flags = 0; - int32_t loop_count = 0; - int i = 0; - loc_t tmp_loc = {0,}; - int ret = 0; - - priv = this->private; - GF_VALIDATE_OR_GOTO ("trash", priv, out); - - local = frame->local; - GF_VALIDATE_OR_GOTO ("trash", local, out); - - loop_count = local->loop_count; - - tmp_str = gf_strdup (local->newpath); - if (!tmp_str) { - gf_log (this->name, GF_LOG_DEBUG, "out of memory"); - ret = ENOMEM; - goto out; - } - - if ((op_ret == -1) && (op_errno == ENOENT)) { - tmp_dirname = strchr (tmp_str, '/'); - while (tmp_dirname) { - count = tmp_dirname - tmp_str; - if (count == 0) - count = 1; - i++; - if (i > loop_count) - break; - tmp_dirname = strchr (tmp_str + count + 1, '/'); - } - tmp_path = gf_memdup (local->newpath, count + 1); - if (!tmp_path) { - gf_log (this->name, GF_LOG_DEBUG, "out of memory"); - ret = ENOMEM; - goto out; - } - tmp_path[count] = '\0'; - - loc_copy (&tmp_loc, &local->newloc); - tmp_loc.path = gf_strdup (tmp_path); - if (!tmp_loc.path) { - gf_log (this->name, GF_LOG_DEBUG, "out of memory"); - ret = ENOMEM; - goto out; - } - - /* Stores the the name of directory to be created */ - tmp_loc.name = gf_strdup (strrchr(tmp_path, '/') + 1); - if (!tmp_loc.name) { - gf_log (this->name, GF_LOG_DEBUG, "out of memory"); - ret = ENOMEM; - goto out; - } - strcpy (real_path, priv->brick_path); - remove_trash_path (tmp_path, (frame->root->pid < 0), &tmp_stat); - if (tmp_stat) - strcat (real_path, tmp_stat); - STACK_WIND_COOKIE (frame, trash_truncate_mkdir_cbk, - tmp_path, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->mkdir, - &tmp_loc, get_permission(real_path), - 0022, xdata); - loc_wipe (&tmp_loc); - goto out; - } - - if (op_ret == 0) { - dir_name = dirname (tmp_str); - if (strcmp ((char*)cookie, dir_name) == 0) { - flags = O_CREAT|O_EXCL|O_WRONLY; - strcpy (real_path, priv->brick_path); - strcat (real_path, local->origpath); - /* Call create again once directory structure - is created. */ - STACK_WIND (frame, trash_truncate_create_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->create, - &local->newloc, flags, - get_permission (real_path), - 0022, local->newfd, xdata); - goto out; - } - } - - LOCK (&frame->lock); - { - loop_count = ++local->loop_count; - } - UNLOCK (&frame->lock); - - tmp_dirname = strchr (tmp_str, '/'); + trash_local_t *local = NULL; + trash_private_t *priv = NULL; + char *tmp_str = NULL; + char *tmp_path = NULL; + char *tmp_dirname = NULL; + char *dir_name = NULL; + char *tmp_stat = NULL; + char real_path[PATH_MAX] = { + 0, + }; + size_t count = 0; + int32_t flags = 0; + int32_t loop_count = 0; + int i = 0; + loc_t tmp_loc = { + 0, + }; + int ret = 0; + + priv = this->private; + GF_VALIDATE_OR_GOTO("trash", priv, out); + + local = frame->local; + GF_VALIDATE_OR_GOTO("trash", local, out); + + loop_count = local->loop_count; + + TRASH_UNSET_PID(frame, local); + + tmp_str = gf_strdup(local->newpath); + if (!tmp_str) { + gf_log(this->name, GF_LOG_DEBUG, "out of memory"); + ret = ENOMEM; + goto out; + } + + if ((op_ret == -1) && (op_errno == ENOENT)) { + tmp_dirname = strchr(tmp_str, '/'); while (tmp_dirname) { - count = tmp_dirname - tmp_str; - if (count == 0) - count = 1; - i++; - if (i > loop_count) - break; - tmp_dirname = strchr (tmp_str + count + 1, '/'); - } - tmp_path = gf_memdup (local->newpath, count + 1); + count = tmp_dirname - tmp_str; + if (count == 0) + count = 1; + i++; + if (i > loop_count) + break; + tmp_dirname = strchr(tmp_str + count + 1, '/'); + } + tmp_path = gf_memdup(local->newpath, count + 1); if (!tmp_path) { - gf_log (this->name, GF_LOG_DEBUG, "out of memory"); - ret = ENOMEM; - goto out; + gf_log(this->name, GF_LOG_DEBUG, "out of memory"); + ret = ENOMEM; + goto out; } tmp_path[count] = '\0'; - loc_copy (&tmp_loc, &local->newloc); - tmp_loc.path = gf_strdup (tmp_path); + loc_copy(&tmp_loc, &local->newloc); + tmp_loc.path = gf_strdup(tmp_path); if (!tmp_loc.path) { - gf_log (this->name, GF_LOG_DEBUG, "out of memory"); - ret = ENOMEM; - goto out; + gf_log(this->name, GF_LOG_DEBUG, "out of memory"); + ret = ENOMEM; + goto out; } /* Stores the the name of directory to be created */ - tmp_loc.name = gf_strdup (strrchr(tmp_path, '/') + 1); + tmp_loc.name = gf_strdup(strrchr(tmp_path, '/') + 1); if (!tmp_loc.name) { - gf_log (this->name, GF_LOG_DEBUG, "out of memory"); - goto out; + gf_log(this->name, GF_LOG_DEBUG, "out of memory"); + ret = ENOMEM; + goto out; } - - strcpy (real_path, priv->brick_path); - remove_trash_path (tmp_path, (frame->root->pid < 0), &tmp_stat); + strncpy(real_path, priv->brick_path, sizeof(real_path)); + real_path[sizeof(real_path) - 1] = 0; + remove_trash_path(tmp_path, (frame->root->pid < 0), &tmp_stat); if (tmp_stat) - strcat (real_path, tmp_stat); - - STACK_WIND_COOKIE (frame, trash_truncate_mkdir_cbk, tmp_path, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->mkdir, &tmp_loc, - get_permission(real_path), - 0022, xdata); + strncat(real_path, tmp_stat, + sizeof(real_path) - strlen(real_path) - 1); + + TRASH_SET_PID(frame, local); + + STACK_WIND_COOKIE(frame, trash_truncate_mkdir_cbk, tmp_path, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir, + &tmp_loc, get_permission(real_path), 0022, xdata); + loc_wipe(&tmp_loc); + goto out; + } + + if (op_ret == 0) { + dir_name = dirname(tmp_str); + if (strcmp((char *)cookie, dir_name) == 0) { + flags = O_CREAT | O_EXCL | O_WRONLY; + strncpy(real_path, priv->brick_path, sizeof(real_path)); + real_path[sizeof(real_path) - 1] = 0; + strncat(real_path, local->origpath, + sizeof(real_path) - strlen(real_path) - 1); + /* Call create again once directory structure + is created. */ + + TRASH_SET_PID(frame, local); + + STACK_WIND(frame, trash_truncate_create_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->create, &local->newloc, flags, + get_permission(real_path), 0022, local->newfd, xdata); + goto out; + } + } + + if ((op_ret == -1) && (op_errno != EEXIST)) { + gf_log(this->name, GF_LOG_ERROR, + "Directory creation failed [%s]. " + "Therefore truncating %s without moving the " + "original copy to trash directory", + strerror(op_errno), local->loc.name); + STACK_WIND(frame, trash_common_unwind_buf_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, &local->loc, + local->fop_offset, xdata); + goto out; + } + + LOCK(&frame->lock); + { + loop_count = ++local->loop_count; + } + UNLOCK(&frame->lock); + + tmp_dirname = strchr(tmp_str, '/'); + while (tmp_dirname) { + count = tmp_dirname - tmp_str; + if (count == 0) + count = 1; + i++; + if (i > loop_count) + break; + tmp_dirname = strchr(tmp_str + count + 1, '/'); + } + tmp_path = gf_memdup(local->newpath, count + 1); + if (!tmp_path) { + gf_log(this->name, GF_LOG_DEBUG, "out of memory"); + ret = ENOMEM; + goto out; + } + tmp_path[count] = '\0'; + + loc_copy(&tmp_loc, &local->newloc); + tmp_loc.path = gf_strdup(tmp_path); + if (!tmp_loc.path) { + gf_log(this->name, GF_LOG_DEBUG, "out of memory"); + ret = ENOMEM; + goto out; + } + + /* Stores the the name of directory to be created */ + tmp_loc.name = gf_strdup(strrchr(tmp_path, '/') + 1); + if (!tmp_loc.name) { + gf_log(this->name, GF_LOG_DEBUG, "out of memory"); + goto out; + } + + strncpy(real_path, priv->brick_path, sizeof(real_path)); + real_path[sizeof(real_path) - 1] = 0; + remove_trash_path(tmp_path, (frame->root->pid < 0), &tmp_stat); + if (tmp_stat) + strncat(real_path, tmp_stat, sizeof(real_path) - strlen(real_path) - 1); + + TRASH_SET_PID(frame, local); + + STACK_WIND_COOKIE(frame, trash_truncate_mkdir_cbk, tmp_path, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir, + &tmp_loc, get_permission(real_path), 0022, xdata); out: - if (tmp_str) - GF_FREE (tmp_str); - if (tmp_path) - GF_FREE (tmp_path); + if (tmp_str) + GF_FREE(tmp_str); + if (tmp_path) + GF_FREE(tmp_path); - return ret; + return ret; } - int32_t -trash_truncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, - dict_t *xdata) +trash_truncate_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + dict_t *xdata) { - trash_private_t *priv = NULL; - trash_local_t *local = NULL; - char loc_newname[PATH_MAX] = {0,}; - int32_t flags = 0; - dentry_t *dir_entry = NULL; - inode_table_t *table = NULL; - int ret = 0; - - priv = this->private; - GF_VALIDATE_OR_GOTO ("trash", priv, out); - - local = frame->local; - GF_VALIDATE_OR_GOTO ("trash", local, out); - - table = local->loc.inode->table; - - pthread_mutex_lock (&table->lock); - { - dir_entry = __dentry_search_arbit (local->loc.inode); - } - pthread_mutex_unlock (&table->lock); - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, - "fstat on the file failed: %s", - strerror (op_errno)); - - TRASH_STACK_UNWIND (truncate, frame, op_ret, op_errno, buf, - NULL, xdata); - goto out; - } - - /* Only last hardlink will be moved to trash directory */ - if (buf->ia_nlink > 1) { - STACK_WIND (frame, trash_common_unwind_buf_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->truncate, - &local->loc, local->fop_offset, xdata); - goto out; - } - - /** - * If the file is too big or if it is extended truncate, - * just don't move it to trash directory. - */ - if (buf->ia_size > (priv->max_trash_file_size) || - buf->ia_size <= local->fop_offset) { - gf_log (this->name, GF_LOG_DEBUG, "%s: not moving to trash , " - "having inappropiate file size", local->loc.path); - - STACK_WIND (frame, trash_common_unwind_buf_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->truncate, - &local->loc, local->fop_offset, xdata); - goto out; - } - - /* Retrives the name of file from path */ - local->loc.name = gf_strdup (strrchr (local->loc.path, '/')); - if (!local->loc.name) { - gf_log (this->name, GF_LOG_DEBUG, "out of memory"); - goto out; - } - - /* Stores new path for source file */ - copy_trash_path (priv->newtrash_dir, (frame->root->pid < 0), - local->newpath); - strcat (local->newpath, local->loc.path); - - /* append timestamp to file name so that we can avoid - name collisions inside trash */ - append_time_stamp (local->newpath); - if (strlen (local->newpath) > PATH_MAX) { - STACK_WIND (frame, trash_common_unwind_buf_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->truncate, - &local->loc, local->fop_offset, xdata); - goto out; - } - - strcpy (loc_newname, local->loc.name); - append_time_stamp (loc_newname); - /* local->newloc represents old file(file inside trash), - where as local->loc represents truncated file. We need - to create new inode and fd for new file*/ - local->newloc.name = gf_strdup (loc_newname); - if (!local->newloc.name) { - gf_log (this->name, GF_LOG_DEBUG, "out of memory"); - ret = ENOMEM; - goto out; - } - local->newloc.path = gf_strdup (local->newpath); - if (!local->newloc.path) { - gf_log (this->name, GF_LOG_DEBUG, "out of memory"); - ret = ENOMEM; - goto out; - } - local->newloc.inode = inode_new (local->loc.inode->table); - local->newfd = fd_create (local->newloc.inode, frame->root->pid); - - /* Creating vaild parent and pargfids for both files */ - - if (dir_entry == NULL) { - ret = EINVAL; - goto out; - } - local->loc.parent = inode_ref (dir_entry->parent); - gf_uuid_copy (local->loc.pargfid, dir_entry->parent->gfid); - - local->newloc.parent = inode_ref (dir_entry->parent); - gf_uuid_copy (local->newloc.pargfid, dir_entry->parent->gfid); - - flags = O_CREAT|O_EXCL|O_WRONLY; - - STACK_WIND (frame, trash_truncate_create_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->create, - &local->newloc, flags, - st_mode_from_ia (buf->ia_prot, local->loc.inode->ia_type), - 0022, local->newfd, xdata); + trash_private_t *priv = NULL; + trash_local_t *local = NULL; + char loc_newname[PATH_MAX] = { + 0, + }; + int32_t flags = 0; + dentry_t *dir_entry = NULL; + inode_table_t *table = NULL; + int ret = 0; + + priv = this->private; + GF_VALIDATE_OR_GOTO("trash", priv, out); + + local = frame->local; + GF_VALIDATE_OR_GOTO("trash", local, out); + + table = local->loc.inode->table; + + pthread_mutex_lock(&table->lock); + { + dir_entry = __dentry_search_arbit(local->loc.inode); + } + pthread_mutex_unlock(&table->lock); + + if (op_ret == -1) { + gf_log(this->name, GF_LOG_DEBUG, "fstat on the file failed: %s", + strerror(op_errno)); + + TRASH_STACK_UNWIND(truncate, frame, op_ret, op_errno, buf, NULL, xdata); + goto out; + } + + /* Only last hardlink will be moved to trash directory */ + if (buf->ia_nlink > 1) { + STACK_WIND(frame, trash_common_unwind_buf_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, &local->loc, + local->fop_offset, xdata); + goto out; + } + + /** + * If the file is too big or if it is extended truncate, + * just don't move it to trash directory. + */ + if (buf->ia_size > (priv->max_trash_file_size) || + buf->ia_size <= local->fop_offset) { + gf_log(this->name, GF_LOG_DEBUG, + "%s: file is too large to move to trash", local->loc.path); + + STACK_WIND(frame, trash_common_unwind_buf_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, &local->loc, + local->fop_offset, xdata); + goto out; + } + + /* Retrieves the name of file from path */ + local->loc.name = gf_strdup(strrchr(local->loc.path, '/')); + if (!local->loc.name) { + gf_log(this->name, GF_LOG_DEBUG, "out of memory"); + goto out; + } + + /* Stores new path for source file */ + copy_trash_path(priv->newtrash_dir, (frame->root->pid < 0), local->newpath, + sizeof(local->newpath)); + strncat(local->newpath, local->loc.path, + sizeof(local->newpath) - strlen(local->newpath) - 1); + + /* append timestamp to file name so that we can avoid + name collisions inside trash */ + append_time_stamp(local->newpath, sizeof(local->newpath)); + if (strlen(local->newpath) > PATH_MAX) { + STACK_WIND(frame, trash_common_unwind_buf_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, &local->loc, + local->fop_offset, xdata); + goto out; + } + + strncpy(loc_newname, local->loc.name, sizeof(loc_newname)); + loc_newname[sizeof(loc_newname) - 1] = 0; + append_time_stamp(loc_newname, sizeof(loc_newname)); + /* local->newloc represents old file(file inside trash), + where as local->loc represents truncated file. We need + to create new inode and fd for new file*/ + local->newloc.name = gf_strdup(loc_newname); + if (!local->newloc.name) { + gf_log(this->name, GF_LOG_DEBUG, "out of memory"); + ret = ENOMEM; + goto out; + } + local->newloc.path = gf_strdup(local->newpath); + if (!local->newloc.path) { + gf_log(this->name, GF_LOG_DEBUG, "out of memory"); + ret = ENOMEM; + goto out; + } + local->newloc.inode = inode_new(local->loc.inode->table); + local->newfd = fd_create(local->newloc.inode, frame->root->pid); + + /* Creating valid parent and pargfids for both files */ + + if (dir_entry == NULL) { + ret = EINVAL; + goto out; + } + local->loc.parent = inode_ref(dir_entry->parent); + gf_uuid_copy(local->loc.pargfid, dir_entry->parent->gfid); + + local->newloc.parent = inode_ref(dir_entry->parent); + gf_uuid_copy(local->newloc.pargfid, dir_entry->parent->gfid); + + flags = O_CREAT | O_EXCL | O_WRONLY; + + TRASH_SET_PID(frame, local); + + STACK_WIND(frame, trash_truncate_create_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->create, &local->newloc, flags, + st_mode_from_ia(buf->ia_prot, local->loc.inode->ia_type), 0022, + local->newfd, xdata); out: - return ret; + return ret; } /** @@ -1652,89 +1947,85 @@ out: * like text editors etc.. */ int32_t -trash_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, - off_t offset, dict_t *xdata) +trash_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) { - trash_private_t *priv = NULL; - trash_local_t *local = NULL; - int32_t match = 0; - char *pathbuf = NULL; - int ret = 0; - - priv = this->private; - GF_VALIDATE_OR_GOTO ("trash", priv, out); - /* If trash is not active or not enabled through cli, then - * we bypass and wind back + trash_private_t *priv = NULL; + trash_local_t *local = NULL; + int32_t match = 0; + char *pathbuf = NULL; + int ret = 0; + + priv = this->private; + GF_VALIDATE_OR_GOTO("trash", priv, out); + /* If trash is not active or not enabled through cli, then + * we bypass and wind back + */ + if (!priv->state) { + STACK_WIND(frame, trash_common_unwind_buf_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); + goto out; + } + + /* The files removed by gluster operations such as self-heal, + should moved to trash directory, but files by client should + not moved */ + if ((frame->root->pid < 0) && !priv->internal) { + STACK_WIND(frame, trash_common_unwind_buf_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); + goto out; + } + /* This will be more accurate */ + inode_path(loc->inode, NULL, &pathbuf); + + /* Checks whether file is in trash directory or eliminate path. + * In all such cases it does not move to trash directory, + * truncate will be performed + */ + match = check_whether_eliminate_path(priv->eliminate, pathbuf); + + if ((strncmp(pathbuf, priv->newtrash_dir, strlen(priv->newtrash_dir)) == + 0) || + (match)) { + if (match) { + gf_log(this->name, GF_LOG_DEBUG, + "%s: file not moved to trash as per option " + "'eliminate path'", + loc->path); + } + + /* Trying to truncate from the trash-dir. So do the + * actual truncate without moving to trash-dir. */ - if (!priv->state) { - STACK_WIND (frame, trash_common_unwind_buf_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->truncate, loc, - offset, xdata); - goto out; - } - - /* The files removed by gluster operations such as self-heal, - should moved to trash directory, but files by client should - not moved */ - if ((frame->root->pid < 0) && !priv->internal) { - STACK_WIND (frame, trash_common_unwind_buf_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->truncate, loc, - offset, xdata); - goto out; - } - /* This will be more accurate */ - inode_path(loc->inode, NULL, &pathbuf); + STACK_WIND(frame, trash_common_unwind_buf_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); + goto out; + } - /* Checks whether file is in trash directory or eliminate path. - * In all such cases it does not move to trash directory, - * truncate will be performed - */ - match = check_whether_eliminate_path (priv->eliminate, pathbuf); - - if ((strncmp (pathbuf, priv->newtrash_dir, - strlen (priv->newtrash_dir)) == 0) || (match)) { - if (match) { - gf_log (this->name, GF_LOG_DEBUG, - "%s: file not moved to trash as per option " - "'eliminate path'", loc->path); - } - - /* Trying to truncate from the trash-dir. So do the - * actual truncate without moving to trash-dir. - */ - STACK_WIND (frame, trash_common_unwind_buf_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->truncate, loc, offset, - xdata); - goto out; - } + LOCK_INIT(&frame->lock); - LOCK_INIT (&frame->lock); + local = mem_get0(this->local_pool); + if (!local) { + gf_log(this->name, GF_LOG_DEBUG, "out of memory"); + TRASH_STACK_UNWIND(truncate, frame, -1, ENOMEM, NULL, NULL, xdata); + ret = ENOMEM; + goto out; + } - local = mem_get0 (this->local_pool); - if (!local) { - gf_log (this->name, GF_LOG_DEBUG, "out of memory"); - TRASH_STACK_UNWIND (truncate, frame, -1, ENOMEM, NULL, NULL, - xdata); - ret = ENOMEM; - goto out; - } + strncpy(local->origpath, pathbuf, sizeof(local->origpath)); + local->origpath[sizeof(local->origpath) - 1] = 0; - loc_copy (&local->loc, loc); - local->loc.path = pathbuf; - local->fop_offset = offset; + loc_copy(&local->loc, loc); + local->loc.path = pathbuf; + local->fop_offset = offset; - frame->local = local; + frame->local = local; - STACK_WIND (frame, trash_truncate_stat_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->stat, loc, - xdata); + STACK_WIND(frame, trash_truncate_stat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->stat, loc, xdata); out: - return ret; + return ret; } /** @@ -1743,95 +2034,91 @@ out: * other than that it also called by Rebalance operation */ int32_t -trash_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, - dict_t *xdata) +trash_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) { - trash_private_t *priv = NULL; - trash_local_t *local = NULL;/* file inside trash */ - char *pathbuf = NULL;/* path of file from fd */ - int32_t retval = 0; - int32_t match = 0; - int ret = 0; - - priv = this->private; - GF_VALIDATE_OR_GOTO ("trash", priv, out); - /* If trash is not active or not enabled through cli, then - * we bypass and wind back - */ - if (!priv->state) { - STACK_WIND (frame, trash_common_unwind_buf_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->ftruncate, fd, - offset, xdata); - goto out; - } - - /* The files removed by gluster operations such as self-heal, - * should moved to trash directory, but files by client - * should not moved - */ - if ((frame->root->pid < 0) && !priv->internal) { - STACK_WIND (frame, trash_common_unwind_buf_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->ftruncate, fd, - offset, xdata); - goto out; - } - /* This will be more accurate */ - retval = inode_path (fd->inode, NULL, &pathbuf); - - /* Checking the eliminate path */ - - /* Checks whether file is trash directory or eliminate path or - * invalid fd. In all such cases it does not move to trash directory, - * ftruncate will be performed - */ - match = check_whether_eliminate_path (priv->eliminate, pathbuf); - if ((strncmp (pathbuf, priv->newtrash_dir, - strlen (priv->newtrash_dir)) == 0) || match || - !retval) { - - if (match) { - gf_log (this->name, GF_LOG_DEBUG, - "%s: file matches eliminate path, " - "not moved to trash", pathbuf); - } - - /* Trying to ftruncate from the trash-dir. So do the - * actual ftruncate without moving to trash-dir - */ - STACK_WIND (frame, trash_common_unwind_buf_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->ftruncate, - fd, offset, xdata); - goto out; - } - - local = mem_get0 (this->local_pool); - if (!local) { - gf_log (this->name, GF_LOG_DEBUG, "out of memory"); - TRASH_STACK_UNWIND (ftruncate, frame, -1, ENOMEM, NULL, - NULL, xdata); - ret = -1; - goto out; - } - - /* To convert fd to location */ - frame->local=local; - - local->loc.path = pathbuf; - local->loc.inode = inode_ref (fd->inode); - gf_uuid_copy (local->loc.gfid, local->loc.inode->gfid); - - local->fop_offset = offset; - - /* Else remains same to truncate code, so from here flow goes - * to truncate_stat + trash_private_t *priv = NULL; + trash_local_t *local = NULL; /* file inside trash */ + char *pathbuf = NULL; /* path of file from fd */ + int32_t retval = 0; + int32_t match = 0; + int ret = 0; + + priv = this->private; + GF_VALIDATE_OR_GOTO("trash", priv, out); + /* If trash is not active or not enabled through cli, then + * we bypass and wind back + */ + if (!priv->state) { + STACK_WIND(frame, trash_common_unwind_buf_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); + goto out; + } + + /* The files removed by gluster operations such as self-heal, + * should moved to trash directory, but files by client + * should not moved + */ + if ((frame->root->pid < 0) && !priv->internal) { + STACK_WIND(frame, trash_common_unwind_buf_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); + goto out; + } + /* This will be more accurate */ + retval = inode_path(fd->inode, NULL, &pathbuf); + + /* Checking the eliminate path */ + + /* Checks whether file is trash directory or eliminate path or + * invalid fd. In all such cases it does not move to trash directory, + * ftruncate will be performed + */ + match = check_whether_eliminate_path(priv->eliminate, pathbuf); + if ((strncmp(pathbuf, priv->newtrash_dir, strlen(priv->newtrash_dir)) == + 0) || + match || !retval) { + if (match) { + gf_log(this->name, GF_LOG_DEBUG, + "%s: file matches eliminate path, " + "not moved to trash", + pathbuf); + } + + /* Trying to ftruncate from the trash-dir. So do the + * actual ftruncate without moving to trash-dir */ - STACK_WIND (frame, trash_truncate_stat_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fstat, fd, xdata); + STACK_WIND(frame, trash_common_unwind_buf_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); + goto out; + } + + local = mem_get0(this->local_pool); + if (!local) { + gf_log(this->name, GF_LOG_DEBUG, "out of memory"); + TRASH_STACK_UNWIND(ftruncate, frame, -1, ENOMEM, NULL, NULL, xdata); + ret = -1; + goto out; + } + + strncpy(local->origpath, pathbuf, sizeof(local->origpath)); + local->origpath[sizeof(local->origpath) - 1] = 0; + + /* To convert fd to location */ + frame->local = local; + + local->loc.path = pathbuf; + local->loc.inode = inode_ref(fd->inode); + gf_uuid_copy(local->loc.gfid, local->loc.inode->gfid); + + local->fop_offset = offset; + + /* Else remains same to truncate code, so from here flow goes + * to truncate_stat + */ + STACK_WIND(frame, trash_truncate_stat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, fd, xdata); out: - return ret; + return ret; } /** @@ -1839,31 +2126,32 @@ out: * trash directory in the mount by the user */ int32_t -trash_mkdir (call_frame_t *frame, xlator_t *this, - loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata) +trash_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + mode_t umask, dict_t *xdata) { - int32_t op_ret = 0; - int32_t op_errno = 0; - trash_private_t *priv = NULL; - - priv = this->private; - GF_VALIDATE_OR_GOTO ("trash", priv, out); - - if (!check_whether_trash_directory (loc->path, priv->newtrash_dir)) { - gf_log (this->name, GF_LOG_WARNING, - "mkdir issued on %s, which is not permitted", - priv->newtrash_dir); - op_errno = EPERM; - op_ret = -1; - - STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, - NULL, NULL, NULL, NULL, xdata); - } else { - STACK_WIND (frame, trash_common_mkdir_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata); - } + int32_t op_ret = 0; + int32_t op_errno = 0; + trash_private_t *priv = NULL; + + priv = this->private; + GF_VALIDATE_OR_GOTO("trash", priv, out); + + if (!check_whether_op_permitted(priv, loc)) { + gf_log(this->name, GF_LOG_WARNING, + "mkdir issued on %s, which is not permitted", + priv->newtrash_dir); + op_errno = EPERM; + op_ret = -1; + + STACK_UNWIND_STRICT(mkdir, frame, op_ret, op_errno, NULL, NULL, NULL, + NULL, xdata); + } else { + STACK_WIND(frame, trash_common_mkdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata); + } + out: - return 0; + return 0; } /** @@ -1871,31 +2159,32 @@ out: * of trash directory in the mount by the user */ int -trash_rename (call_frame_t *frame, xlator_t *this, - loc_t *oldloc, loc_t *newloc, dict_t *xdata) +trash_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) { - int32_t op_ret = 0; - int32_t op_errno = 0; - trash_private_t *priv = NULL; - - priv = this->private; - GF_VALIDATE_OR_GOTO ("trash", priv, out); - - if (!check_whether_trash_directory (oldloc->path, priv->newtrash_dir)) { - gf_log (this->name, GF_LOG_WARNING, - "rename issued on %s, which is not permitted", - priv->newtrash_dir); - op_errno = EPERM; - op_ret = -1; - - STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, NULL, - NULL, NULL, NULL, NULL, xdata); - } else { - STACK_WIND (frame, trash_common_rename_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); - } + int32_t op_ret = 0; + int32_t op_errno = 0; + trash_private_t *priv = NULL; + + priv = this->private; + GF_VALIDATE_OR_GOTO("trash", priv, out); + + if (!check_whether_op_permitted(priv, oldloc)) { + gf_log(this->name, GF_LOG_WARNING, + "rename issued on %s, which is not permitted", + priv->newtrash_dir); + op_errno = EPERM; + op_ret = -1; + + STACK_UNWIND_STRICT(rename, frame, op_ret, op_errno, NULL, NULL, NULL, + NULL, NULL, xdata); + } else { + STACK_WIND(frame, trash_common_rename_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); + } + out: - return 0; + return 0; } /** @@ -1903,192 +2192,128 @@ out: * trash directory in the mount by the user */ int32_t -trash_rmdir (call_frame_t *frame, xlator_t *this, - loc_t *loc, int flags, dict_t *xdata) +trash_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + dict_t *xdata) { - int32_t op_ret = 0; - int32_t op_errno = 0; - trash_private_t *priv = NULL; - - priv = this->private; - GF_VALIDATE_OR_GOTO ("trash", priv, out); - - if (!check_whether_trash_directory (loc->path, priv->newtrash_dir)) { - gf_log (this->name, GF_LOG_WARNING, - "rmdir issued on %s, which is not permitted", - priv->newtrash_dir); - op_errno = EPERM; - op_ret = -1; - - STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno, - NULL, NULL, xdata); - } else { - STACK_WIND (frame, trash_common_rmdir_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rmdir, loc, flags, xdata); - } + int32_t op_ret = 0; + int32_t op_errno = 0; + trash_private_t *priv = NULL; + + priv = this->private; + GF_VALIDATE_OR_GOTO("trash", priv, out); + + if (!check_whether_op_permitted(priv, loc)) { + gf_log(this->name, GF_LOG_WARNING, + "rmdir issued on %s, which is not permitted", + priv->newtrash_dir); + op_errno = EPERM; + op_ret = -1; + + STACK_UNWIND_STRICT(rmdir, frame, op_ret, op_errno, NULL, NULL, xdata); + } else { + STACK_WIND(frame, trash_common_rmdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rmdir, loc, flags, xdata); + } + out: - return 0; + return 0; } /** - * Volume set option is handled by the reconfigure funtion. + * Volume set option is handled by the reconfigure function. * Here we checks whether each option is set or not ,if it * sets then corresponding modifciations will be made */ int -reconfigure (xlator_t *this, dict_t *options) +reconfigure(xlator_t *this, dict_t *options) { - uint64_t max_fsize = 0; - int ret = 0; - char *tmp = NULL; - char *tmp_str = NULL; - trash_private_t *priv = NULL; - loc_t old_loc = {0, }; - loc_t new_loc = {0, }; - call_frame_t *frame = NULL; - char trash_dir[PATH_MAX] = {0,}; - - priv = this->private; - GF_VALIDATE_OR_GOTO ("trash", priv, out); - - GF_OPTION_RECONF ("trash", priv->state, options, bool, out); - - GF_OPTION_RECONF ("trash-dir", tmp, options, str, out); - if (tmp) { - sprintf(trash_dir, "/%s/", tmp); - if (strcmp(priv->newtrash_dir, trash_dir) != 0) { - - /* When user set a new name for trash directory, trash - * xlator will perform a rename operation on old trash - * directory to the new one using a STACK_WIND from here. - * This option can be configured only when volume is in - * started state - */ - - GF_FREE (priv->newtrash_dir); - - priv->newtrash_dir = gf_strdup (trash_dir); - if (!priv->newtrash_dir) { - ret = ENOMEM; - gf_log (this->name, GF_LOG_DEBUG, - "out of memory"); - goto out; - } - gf_log (this->name, GF_LOG_DEBUG, - "Renaming %s -> %s from reconfigure", - priv->oldtrash_dir, priv->newtrash_dir); - - if (!priv->newtrash_dir) { - gf_log (this->name, GF_LOG_DEBUG, - "out of memory"); - ret = ENOMEM; - goto out; - } - frame = create_frame (this, this->ctx->pool); - if (frame == NULL) { - gf_log (this->name, GF_LOG_ERROR, - "failed to create frame"); - ret = ENOMEM; - goto out; - } - - /* assign new location values to new_loc members */ - gf_uuid_copy (new_loc.gfid, trash_gfid); - gf_uuid_copy (new_loc.pargfid, root_gfid); - ret = extract_trash_directory (priv->newtrash_dir, - &new_loc.name); - if (ret) { - gf_log (this->name, GF_LOG_DEBUG, - "out of memory"); - goto out; - } - new_loc.path = gf_strdup (priv->newtrash_dir); - if (!new_loc.path) { - ret = ENOMEM; - gf_log (this->name, GF_LOG_DEBUG, - "out of memory"); - goto out; - } - - /* assign old location values to old_loc members */ - gf_uuid_copy (old_loc.gfid, trash_gfid); - gf_uuid_copy (old_loc.pargfid, root_gfid); - ret = extract_trash_directory (priv->oldtrash_dir, - &old_loc.name); - if (ret) { - gf_log (this->name, GF_LOG_DEBUG, - "out of memory"); - goto out; - } - old_loc.path = gf_strdup (priv->oldtrash_dir); - if (!old_loc.path) { - ret = ENOMEM; - gf_log (this->name, GF_LOG_DEBUG, - "out of memory"); - goto out; - } - - old_loc.inode = inode_ref (priv->trash_inode); - gf_uuid_copy(old_loc.inode->gfid, old_loc.gfid); - - STACK_WIND (frame, trash_reconf_rename_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rename, - &old_loc, &new_loc, options); - GF_FREE (priv->oldtrash_dir); - - priv->oldtrash_dir = gf_strdup(priv->newtrash_dir); - if (!priv->oldtrash_dir) { - ret = ENOMEM; - gf_log (this->name, GF_LOG_DEBUG, - "out of memory"); - goto out; - } - } - } - tmp = NULL; - - GF_OPTION_RECONF ("trash-internal-op", priv->internal, options, - bool, out); - - GF_OPTION_RECONF ("trash-max-filesize", max_fsize, options, - size_uint64, out); - if (max_fsize) { - if (max_fsize > GF_ALLOWED_MAX_FILE_SIZE) { - gf_log (this->name, GF_LOG_DEBUG, - "Size specified for max-size(in MB) is too " - "large so using 1GB as max-size (NOT IDEAL)"); - priv->max_trash_file_size = GF_ALLOWED_MAX_FILE_SIZE; - } else - priv->max_trash_file_size = max_fsize; - gf_log (this->name, GF_LOG_DEBUG, "%"GF_PRI_SIZET" max-size", - priv->max_trash_file_size); - } - GF_OPTION_RECONF ("trash-eliminate-path", tmp, options, str, out); - if (!tmp) { - gf_log (this->name, GF_LOG_DEBUG, - "no option specified for 'eliminate', using NULL"); - } else { - if (priv->eliminate) - wipe_eliminate_path (&priv->eliminate); + uint64_t max_fsize = 0; + int ret = 0; + char *tmp = NULL; + char *tmp_str = NULL; + trash_private_t *priv = NULL; + char trash_dir[PATH_MAX] = { + 0, + }; + + priv = this->private; + + GF_VALIDATE_OR_GOTO("trash", priv, out); + + GF_OPTION_RECONF("trash-internal-op", priv->internal, options, bool, out); + GF_OPTION_RECONF("trash-dir", tmp, options, str, out); + + GF_OPTION_RECONF("trash", priv->state, options, bool, out); + + if (priv->state) { + ret = create_or_rename_trash_directory(this); - tmp_str = gf_strdup (tmp); - if (!tmp_str) { - gf_log (this->name, GF_LOG_DEBUG, "out of memory"); - ret = ENOMEM; - goto out; - } - ret = store_eliminate_path (tmp_str, &priv->eliminate); + if (tmp) + sprintf(trash_dir, "/%s/", tmp); + else + sprintf(trash_dir, "%s", priv->oldtrash_dir); + + if (strcmp(priv->newtrash_dir, trash_dir) != 0) { + /* When user set a new name for trash directory, trash + * xlator will perform a rename operation on old trash + * directory to the new one using a STACK_WIND from here. + * This option can be configured only when volume is in + * started state + */ + + GF_FREE(priv->newtrash_dir); + + priv->newtrash_dir = gf_strdup(trash_dir); + if (!priv->newtrash_dir) { + ret = ENOMEM; + gf_log(this->name, GF_LOG_DEBUG, "out of memory"); + goto out; + } + gf_log(this->name, GF_LOG_DEBUG, + "Renaming %s -> %s from reconfigure", priv->oldtrash_dir, + priv->newtrash_dir); + if (!priv->newtrash_dir) { + gf_log(this->name, GF_LOG_DEBUG, "out of memory"); + ret = ENOMEM; + goto out; + } + ret = rename_trash_directory(this); + } + + if (priv->internal) { + ret = create_internalop_directory(this); + } + } + tmp = NULL; + + GF_OPTION_RECONF("trash-max-filesize", max_fsize, options, size_uint64, + out); + if (max_fsize) { + priv->max_trash_file_size = max_fsize; + gf_log(this->name, GF_LOG_DEBUG, "%" GF_PRI_SIZET " max-size", + priv->max_trash_file_size); + } + GF_OPTION_RECONF("trash-eliminate-path", tmp, options, str, out); + if (!tmp) { + gf_log(this->name, GF_LOG_DEBUG, + "no option specified for 'eliminate', using NULL"); + } else { + if (priv->eliminate) + wipe_eliminate_path(&priv->eliminate); + + tmp_str = gf_strdup(tmp); + if (!tmp_str) { + gf_log(this->name, GF_LOG_DEBUG, "out of memory"); + ret = ENOMEM; + goto out; } + ret = store_eliminate_path(tmp_str, &priv->eliminate); + } out: - if (tmp_str) - GF_FREE (tmp_str); - loc_wipe (&new_loc); - loc_wipe (&old_loc); - return ret; + return ret; } /** @@ -2096,424 +2321,333 @@ out: * using STACK_WIND only when posix xlator is up */ int -notify (xlator_t *this, int event, void *data, ...) +notify(xlator_t *this, int event, void *data, ...) { - trash_private_t *priv = NULL; - dict_t *dict = NULL; - int ret = 0; - uuid_t *tgfid_ptr = NULL; - loc_t loc = {0, }; - loc_t old_loc = {0, }; - call_frame_t *frame = NULL; - - priv = this->private; - GF_VALIDATE_OR_GOTO ("trash", priv, out); - - /* Check whether posix is up not */ - if (event == GF_EVENT_CHILD_UP) { - frame = create_frame(this, this->ctx->pool); - if (frame == NULL) { - gf_log (this->name, GF_LOG_ERROR, - "failed to create frame"); - ret = ENOMEM; - goto out; - } - - dict = dict_new (); - if (!dict) { - ret = ENOMEM; - goto out; - } - priv->trash_itable = inode_table_new (0, this); - - /* Here there is two possiblities ,if trash directory already - * exist ,then we need to perform a rename operation on the - * old one. Otherwise, we need to create the trash directory - * For both, we need to pass location variable, gfid of parent - * and a frame for calling STACK_WIND.The location variable - * requires name,path,gfid and inode - */ - if (!priv->oldtrash_dir) { - loc.inode = inode_new (priv->trash_itable); - gf_uuid_copy (loc.gfid, trash_gfid); - - gf_log (this->name, GF_LOG_DEBUG, "nameless lookup for" - "old trash directory"); - STACK_WIND (frame, trash_notify_lookup_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, - &loc, dict); - gf_log (this->name, GF_LOG_DEBUG, "old_trash_dir %s", - priv->oldtrash_dir); - loc_wipe (&loc); - } - - if (priv->oldtrash_dir == NULL) { - ret = EINVAL; - goto out; - } - if (strcmp (priv->oldtrash_dir, priv->newtrash_dir) == 0) { - gf_log (this->name, GF_LOG_DEBUG, "Creating trash " - "directory %s from notify", - priv->newtrash_dir); - - tgfid_ptr = GF_CALLOC (1, sizeof(uuid_t), - gf_common_mt_uuid_t); - if (!tgfid_ptr) { - ret = ENOMEM; - goto out; - } - gf_uuid_copy (*tgfid_ptr, trash_gfid); - - gf_uuid_copy (loc.gfid, trash_gfid); - gf_uuid_copy (loc.pargfid, root_gfid); - ret = extract_trash_directory (priv->newtrash_dir, - &loc.name); - if (ret) { - gf_log (this->name, GF_LOG_DEBUG, - "out of memory"); - goto out; - } - loc.path = gf_strdup (priv->newtrash_dir); - if (!loc.path) { - gf_log (this->name, GF_LOG_DEBUG, - "out of memory"); - ret = ENOMEM; - goto out; - } - - priv->trash_inode = inode_new (priv->trash_itable); - priv->trash_inode->ia_type = IA_IFDIR; - loc.inode = inode_ref (priv->trash_inode); - - /* Fixed gfid is set for trash directory with - * this function - */ - ret = dict_set_dynptr (dict, "gfid-req", tgfid_ptr, - sizeof (uuid_t)); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "setting key gfid-req failed"); - goto out; - } - - /* The mkdir call for creating trash directory */ - STACK_WIND (frame, trash_notify_mkdir_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->mkdir, &loc, 0755, - 0022, dict); - } else { - /* assign new location values to new_loc members */ - gf_log (this->name, GF_LOG_DEBUG, "Renaming %s -> %s" - " from notify", priv->oldtrash_dir, - priv->newtrash_dir); - gf_uuid_copy (loc.gfid, trash_gfid); - gf_uuid_copy (loc.pargfid, root_gfid); - ret = extract_trash_directory (priv->newtrash_dir, - &loc.name); - if (ret) { - gf_log (this->name, GF_LOG_DEBUG, - "out of memory"); - goto out; - } - loc.path = gf_strdup (priv->newtrash_dir); - if (!loc.path) { - gf_log (this->name, GF_LOG_DEBUG, - "out of memory"); - ret = ENOMEM; - goto out; - } - /* assign old location values to old_loc members */ - gf_uuid_copy (old_loc.gfid, trash_gfid); - gf_uuid_copy (old_loc.pargfid, root_gfid); - ret = extract_trash_directory (priv->oldtrash_dir, - &old_loc.name); - if (ret) { - gf_log (this->name, GF_LOG_DEBUG, - "out of memory"); - goto out; - } - old_loc.path = gf_strdup (priv->oldtrash_dir); - if (!old_loc.path) { - gf_log (this->name, GF_LOG_DEBUG, - "out of memory"); - ret = ENOMEM; - goto out; - } - - old_loc.inode = inode_ref (priv->trash_inode); - gf_uuid_copy(old_loc.inode->gfid, old_loc.gfid); - - STACK_WIND (frame, trash_notify_rename_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rename, - &old_loc, &loc, dict); - GF_FREE (priv->oldtrash_dir); - - priv->oldtrash_dir = gf_strdup(priv->newtrash_dir); - if (!priv->oldtrash_dir) { - gf_log (this->name, GF_LOG_DEBUG, - "out of memory"); - ret = ENOMEM; - goto out; - } - } - } else { - ret = default_notify (this, event, data); - if (ret) - gf_log (this->name, GF_LOG_INFO, - "default notify event failed"); + trash_private_t *priv = NULL; + int ret = 0; + + priv = this->private; + GF_VALIDATE_OR_GOTO("trash", priv, out); + + /* Check whether posix is up not */ + if (event == GF_EVENT_CHILD_UP) { + if (!priv->state) { + gf_log(this->name, GF_LOG_DEBUG, "trash xlator is off"); + goto out; } -out: - if (ret && tgfid_ptr) - GF_FREE (tgfid_ptr); - if (dict) - dict_unref (dict); - loc_wipe (&loc); - loc_wipe (&old_loc); + /* Here there is two possibilities ,if trash directory already + * exist ,then we need to perform a rename operation on the + * old one. Otherwise, we need to create the trash directory + * For both, we need to pass location variable, gfid of parent + * and a frame for calling STACK_WIND.The location variable + * requires name,path,gfid and inode + */ + if (!priv->oldtrash_dir) + ret = create_or_rename_trash_directory(this); + else if (strcmp(priv->newtrash_dir, priv->oldtrash_dir) != 0) + ret = rename_trash_directory(this); + if (ret) + goto out; - return ret; + if (priv->internal) + (void)create_internalop_directory(this); + } + +out: + ret = default_notify(this, event, data); + if (ret) + gf_log(this->name, GF_LOG_INFO, "default notify event failed"); + return ret; } int32_t -mem_acct_init (xlator_t *this) +mem_acct_init(xlator_t *this) { - int ret = -1; + int ret = -1; - GF_VALIDATE_OR_GOTO ("trash", this, out); + GF_VALIDATE_OR_GOTO("trash", this, out); - ret = xlator_mem_acct_init (this, gf_trash_mt_end + 1); - if (ret != 0) { - gf_log(this->name, GF_LOG_ERROR, "Memory accounting init" - "failed"); - return ret; - } -out: + ret = xlator_mem_acct_init(this, gf_trash_mt_end + 1); + if (ret != 0) { + gf_log(this->name, GF_LOG_ERROR, + "Memory accounting init" + "failed"); return ret; + } +out: + return ret; } /** * trash_init */ int32_t -init (xlator_t *this) +init(xlator_t *this) { - trash_private_t *priv = NULL; - int ret = -1; - char *tmp = NULL; - char *tmp_str = NULL; - char trash_dir[PATH_MAX] = {0,}; - uint64_t max_trash_file_size64 = 0; - data_t *data = NULL; - - GF_VALIDATE_OR_GOTO ("trash", this, out); - - if (!this->children || this->children->next) { - gf_log (this->name, GF_LOG_ERROR, - "not configured with exactly one child. exiting"); - ret = -1; - goto out; - } - - if (!this->parents) { - gf_log (this->name, GF_LOG_WARNING, - "dangling volume. check volfile"); - } - - priv = GF_CALLOC (1, sizeof (*priv), gf_trash_mt_trash_private_t); - if (!priv) { - gf_log (this->name, GF_LOG_ERROR, "out of memory"); - ret = ENOMEM; - goto out; - } - - /* Trash priv data members are initialized through the following - * set of statements - */ - GF_OPTION_INIT ("trash", priv->state, bool, out); - - GF_OPTION_INIT ("trash-dir", tmp, str, out); - - /* We store trash dir value as path for easier manipulation*/ - if (!tmp) { - gf_log (this->name, GF_LOG_INFO, - "no option specified for 'trash-dir', " - "using \"/.trashcan/\""); - priv->newtrash_dir = gf_strdup ("/.trashcan/"); - if (!priv->newtrash_dir) { - ret = ENOMEM; - gf_log (this->name, GF_LOG_DEBUG, "out of memory"); - goto out; - } - } else { - sprintf(trash_dir, "/%s/", tmp); - priv->newtrash_dir = gf_strdup (trash_dir); - if (!priv->newtrash_dir) { - ret = ENOMEM; - gf_log (this->name, GF_LOG_DEBUG, "out of memory"); - goto out; - } - } - tmp = NULL; - - GF_OPTION_INIT ("trash-eliminate-path", tmp, str, out); - if (!tmp) { - gf_log (this->name, GF_LOG_INFO, - "no option specified for 'eliminate', using NULL"); - } else { - tmp_str = gf_strdup (tmp); - if (!tmp_str) { - gf_log (this->name, GF_LOG_ERROR, - "out of memory"); - ret = ENOMEM; - goto out; - } - ret = store_eliminate_path (tmp_str, &priv->eliminate); - - } - tmp = NULL; - - GF_OPTION_INIT ("trash-max-filesize", max_trash_file_size64, - size_uint64, out); - if (!max_trash_file_size64) { - gf_log (this->name, GF_LOG_ERROR, - "no option specified for 'max-trashable-file-size', " - "using default = %lld MB", - GF_DEFAULT_MAX_FILE_SIZE / GF_UNIT_MB); - priv->max_trash_file_size = GF_DEFAULT_MAX_FILE_SIZE; - } else { - if( max_trash_file_size64 > GF_ALLOWED_MAX_FILE_SIZE ) { - gf_log (this->name, GF_LOG_DEBUG, - "Size specified for max-size(in MB) is too " - "large so using 1GB as max-size (NOT IDEAL)"); - priv->max_trash_file_size = GF_ALLOWED_MAX_FILE_SIZE; - } else - priv->max_trash_file_size = max_trash_file_size64; - gf_log (this->name, GF_LOG_DEBUG, "%"GF_PRI_SIZET" max-size", - priv->max_trash_file_size); - } - - GF_OPTION_INIT ("trash-internal-op", priv->internal, bool, out); - - this->local_pool = mem_pool_new (trash_local_t, 64); - if (!this->local_pool) { - gf_log (this->name, GF_LOG_ERROR, - "failed to create local_t's memory pool"); - ret = ENOMEM; - goto out; - } - - /* For creating directories inside trash with proper permissions, - * we need to perform stat on that directories, for this we use - * brick path - */ - data = dict_get (this->options, "brick-path"); - if (!data) { - gf_log (this->name, GF_LOG_ERROR, - "no option specified for 'brick-path'"); - ret = ENOMEM; - goto out; - } - priv->brick_path = gf_strdup (data->data); - if (!priv->brick_path) { - ret = ENOMEM; - gf_log (this->name, GF_LOG_DEBUG, "out of memory"); - goto out; - } - - gf_log (this->name, GF_LOG_DEBUG, "brick path is%s", priv->brick_path); - - this->private = (void *)priv; - ret = 0; + trash_private_t *priv = NULL; + int ret = -1; + char *tmp = NULL; + char *tmp_str = NULL; + char trash_dir[PATH_MAX] = { + 0, + }; + uint64_t max_trash_file_size64 = 0; + data_t *data = NULL; + + GF_VALIDATE_OR_GOTO("trash", this, out); + + if (!this->children || this->children->next) { + gf_log(this->name, GF_LOG_ERROR, + "not configured with exactly one child. exiting"); + ret = -1; + goto out; + } + + if (!this->parents) { + gf_log(this->name, GF_LOG_WARNING, "dangling volume. check volfile"); + } + + priv = GF_CALLOC(1, sizeof(*priv), gf_trash_mt_trash_private_t); + if (!priv) { + gf_log(this->name, GF_LOG_ERROR, "out of memory"); + ret = ENOMEM; + goto out; + } + + /* Trash priv data members are initialized through the following + * set of statements + */ + GF_OPTION_INIT("trash", priv->state, bool, out); + + GF_OPTION_INIT("trash-dir", tmp, str, out); + + /* We store trash dir value as path for easier manipulation*/ + if (!tmp) { + gf_log(this->name, GF_LOG_INFO, + "no option specified for 'trash-dir', " + "using \"/.trashcan/\""); + priv->newtrash_dir = gf_strdup("/.trashcan/"); + if (!priv->newtrash_dir) { + ret = ENOMEM; + gf_log(this->name, GF_LOG_DEBUG, "out of memory"); + goto out; + } + } else { + sprintf(trash_dir, "/%s/", tmp); + priv->newtrash_dir = gf_strdup(trash_dir); + if (!priv->newtrash_dir) { + ret = ENOMEM; + gf_log(this->name, GF_LOG_DEBUG, "out of memory"); + goto out; + } + } + tmp = NULL; + + GF_OPTION_INIT("trash-eliminate-path", tmp, str, out); + if (!tmp) { + gf_log(this->name, GF_LOG_INFO, + "no option specified for 'eliminate', using NULL"); + } else { + tmp_str = gf_strdup(tmp); + if (!tmp_str) { + gf_log(this->name, GF_LOG_ERROR, "out of memory"); + ret = ENOMEM; + goto out; + } + ret = store_eliminate_path(tmp_str, &priv->eliminate); + } + tmp = NULL; + + GF_OPTION_INIT("trash-max-filesize", max_trash_file_size64, size_uint64, + out); + if (!max_trash_file_size64) { + gf_log(this->name, GF_LOG_ERROR, + "no option specified for 'max-trashable-file-size', " + "using default = %lld MB", + GF_DEFAULT_MAX_FILE_SIZE / GF_UNIT_MB); + priv->max_trash_file_size = GF_DEFAULT_MAX_FILE_SIZE; + } else { + priv->max_trash_file_size = max_trash_file_size64; + gf_log(this->name, GF_LOG_DEBUG, "%" GF_PRI_SIZET " max-size", + priv->max_trash_file_size); + } + + GF_OPTION_INIT("trash-internal-op", priv->internal, bool, out); + + this->local_pool = mem_pool_new(trash_local_t, 64); + if (!this->local_pool) { + gf_log(this->name, GF_LOG_ERROR, + "failed to create local_t's memory pool"); + ret = ENOMEM; + goto out; + } + + /* For creating directories inside trash with proper permissions, + * we need to perform stat on that directories, for this we use + * brick path + */ + data = dict_get(this->options, "brick-path"); + if (!data) { + gf_log(this->name, GF_LOG_ERROR, + "no option specified for 'brick-path'"); + ret = ENOMEM; + goto out; + } + priv->brick_path = gf_strdup(data->data); + if (!priv->brick_path) { + ret = ENOMEM; + gf_log(this->name, GF_LOG_DEBUG, "out of memory"); + goto out; + } + + priv->trash_itable = inode_table_new(0, this); + gf_log(this->name, GF_LOG_DEBUG, "brick path is%s", priv->brick_path); + + this->private = (void *)priv; + ret = 0; out: - if (tmp_str) - GF_FREE (tmp_str); - if (ret) { - if (priv) { - if (priv->newtrash_dir) - GF_FREE (priv->newtrash_dir); - if (priv->oldtrash_dir) - GF_FREE (priv->oldtrash_dir); - if (priv->brick_path) - GF_FREE (priv->brick_path); - if (priv->eliminate) - wipe_eliminate_path (&priv->eliminate); - GF_FREE (priv); - } - mem_pool_destroy (this->local_pool); - } - return ret; + if (tmp_str) + GF_FREE(tmp_str); + if (ret) { + if (priv) { + if (priv->newtrash_dir) + GF_FREE(priv->newtrash_dir); + if (priv->oldtrash_dir) + GF_FREE(priv->oldtrash_dir); + if (priv->brick_path) + GF_FREE(priv->brick_path); + if (priv->eliminate) + wipe_eliminate_path(&priv->eliminate); + GF_FREE(priv); + } + mem_pool_destroy(this->local_pool); + this->local_pool = NULL; + } + return ret; } /** * trash_fini */ void -fini (xlator_t *this) +fini(xlator_t *this) { - trash_private_t *priv = NULL; - - GF_VALIDATE_OR_GOTO ("trash", this, out); - priv = this->private; - - if (priv) { - if (priv->newtrash_dir) - GF_FREE (priv->newtrash_dir); - if (priv->oldtrash_dir) - GF_FREE (priv->oldtrash_dir); - if (priv->brick_path) - GF_FREE (priv->brick_path); - if (priv->eliminate) - wipe_eliminate_path (&priv->eliminate); - GF_FREE (priv); - } - mem_pool_destroy (this->local_pool); - this->private = NULL; + trash_private_t *priv = NULL; + inode_table_t *inode_table = NULL; + + GF_VALIDATE_OR_GOTO("trash", this, out); + priv = this->private; + if (priv) { + inode_table = priv->trash_itable; + if (priv->newtrash_dir) { + GF_FREE(priv->newtrash_dir); + priv->newtrash_dir = NULL; + } + if (priv->oldtrash_dir) { + GF_FREE(priv->oldtrash_dir); + priv->oldtrash_dir = NULL; + } + if (priv->brick_path) { + GF_FREE(priv->brick_path); + priv->brick_path = NULL; + } + if (priv->eliminate) { + wipe_eliminate_path(&priv->eliminate); + priv->eliminate = NULL; + } + if (inode_table) { + inode_table_destroy(inode_table); + priv->trash_itable = NULL; + } + GF_FREE(priv); + } + + if (this->local_pool) { + mem_pool_destroy(this->local_pool); + this->local_pool = NULL; + } + this->private = NULL; out: - return; + return; } struct xlator_fops fops = { - .unlink = trash_unlink, - .truncate = trash_truncate, - .ftruncate = trash_ftruncate, - .rmdir = trash_rmdir, - .mkdir = trash_mkdir, - .rename = trash_rename, + .unlink = trash_unlink, + .truncate = trash_truncate, + .ftruncate = trash_ftruncate, + .rmdir = trash_rmdir, + .mkdir = trash_mkdir, + .rename = trash_rename, }; -struct xlator_cbks cbks = { -}; +struct xlator_cbks cbks = {}; struct volume_options options[] = { - { .key = { "trash" }, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "off", - .description = "Enable/disable trash translator", - }, - { .key = { "trash-dir" }, - .type = GF_OPTION_TYPE_STR, - .default_value = ".trashcan", - .description = "Directory for trash files", - }, - { .key = { "trash-eliminate-path" }, - .type = GF_OPTION_TYPE_STR, - .description = "Eliminate paths to be excluded " - "from trashing", - }, - { .key = { "trash-max-filesize" }, - .type = GF_OPTION_TYPE_SIZET, - .default_value = "5MB", - .description = "Maximum size of file that can be " - "moved to trash", - }, - { .key = { "trash-internal-op" }, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "off", - .description = "Enable/disable trash translator for " - "internal operations", - }, - { .key = {NULL} }, + { + .key = {"trash"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "Enable/disable trash translator", + .op_version = {GD_OP_VERSION_3_7_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"backup"}, + }, + { + .key = {"trash-dir"}, + .type = GF_OPTION_TYPE_STR, + .default_value = ".trashcan", + .description = "Directory for trash files", + .op_version = {GD_OP_VERSION_3_7_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"backup"}, + }, + { + .key = {"trash-eliminate-path"}, + .type = GF_OPTION_TYPE_STR, + .description = "Eliminate paths to be excluded " + "from trashing", + .op_version = {GD_OP_VERSION_3_7_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"backup"}, + }, + { + .key = {"trash-max-filesize"}, + .type = GF_OPTION_TYPE_SIZET, + .default_value = "5MB", + .description = "Maximum size of file that can be " + "moved to trash", + .op_version = {GD_OP_VERSION_3_7_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"backup"}, + }, + { + .key = {"trash-internal-op"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "Enable/disable trash translator for " + "internal operations", + .op_version = {GD_OP_VERSION_3_7_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"backup"}, + }, + {.key = {"brick-path"}, + .type = GF_OPTION_TYPE_PATH, + .default_value = "{{ brick.path }}"}, + {.key = {NULL}}, +}; + +xlator_api_t xlator_api = { + .init = init, + .fini = fini, + .notify = notify, + .reconfigure = reconfigure, + .mem_acct_init = mem_acct_init, + .op_version = {1}, /* Present from the initial version */ + .fops = &fops, + .cbks = &cbks, + .options = options, + .identifier = "trash", + .category = GF_TECH_PREVIEW, }; diff --git a/xlators/features/trash/src/trash.h b/xlators/features/trash/src/trash.h index 9a57ac9f67e..6671617c2c6 100644 --- a/xlators/features/trash/src/trash.h +++ b/xlators/features/trash/src/trash.h @@ -10,70 +10,88 @@ #ifndef __TRASH_H__ #define __TRASH_H__ -#include "glusterfs.h" -#include "logging.h" -#include "dict.h" -#include "xlator.h" -#include "defaults.h" +#include <glusterfs/glusterfs.h> +#include <glusterfs/logging.h> +#include <glusterfs/dict.h> +#include <glusterfs/xlator.h> +#include <glusterfs/defaults.h> #include "inode.c" #include "fnmatch.h" #include <libgen.h> #ifndef GF_BLOCK_READV_SIZE -#define GF_BLOCK_READV_SIZE (128 * GF_UNIT_KB) +#define GF_BLOCK_READV_SIZE (128 * GF_UNIT_KB) #endif #ifndef GF_DEFAULT_MAX_FILE_SIZE #define GF_DEFAULT_MAX_FILE_SIZE (200 * GF_UNIT_MB) #endif -#ifndef GF_ALLOWED_MAX_FILE_SIZE -#define GF_ALLOWED_MAX_FILE_SIZE (1 * GF_UNIT_GB) -#endif - struct trash_struct { - fd_t *fd; /* for the fd of existing file */ - fd_t *newfd; /* for the newly created file */ - loc_t loc; /* to store the location of the existing file */ - loc_t newloc; /* to store the location for the new file */ - size_t fsize; /* for keeping the size of existing file */ - off_t cur_offset; /* current offset for read and write ops */ - off_t fop_offset; - char origpath[PATH_MAX]; - char newpath[PATH_MAX]; - int32_t loop_count; - struct iatt preparent; - struct iatt postparent; - gf_boolean_t ctr_link_count_req; + fd_t *fd; /* for the fd of existing file */ + fd_t *newfd; /* for the newly created file */ + loc_t loc; /* to store the location of the existing file */ + loc_t newloc; /* to store the location for the new file */ + size_t fsize; /* for keeping the size of existing file */ + off_t cur_offset; /* current offset for read and write ops */ + off_t fop_offset; /* original offset received with the fop */ + pid_t pid; + char origpath[PATH_MAX]; + char newpath[PATH_MAX]; + int32_t loop_count; + gf_boolean_t is_set_pid; + struct iatt preparent; + struct iatt postparent; + gf_boolean_t ctr_link_count_req; }; typedef struct trash_struct trash_local_t; struct _trash_elim_path { - struct _trash_elim_path *next; - char *path; + struct _trash_elim_path *next; + char *path; }; typedef struct _trash_elim_path trash_elim_path; struct trash_priv { - char *oldtrash_dir; - char *newtrash_dir; - char *brick_path; - trash_elim_path *eliminate; - size_t max_trash_file_size; - gf_boolean_t state; - gf_boolean_t internal; - inode_t *trash_inode; - inode_table_t *trash_itable; + char *oldtrash_dir; + char *newtrash_dir; + char *brick_path; + trash_elim_path *eliminate; + size_t max_trash_file_size; + gf_boolean_t state; + gf_boolean_t internal; + inode_t *trash_inode; + inode_table_t *trash_itable; }; typedef struct trash_priv trash_private_t; -#define TRASH_STACK_UNWIND(op, frame, params ...) do { \ - trash_local_t *__local = NULL; \ - __local = frame->local; \ - frame->local = NULL; \ - STACK_UNWIND_STRICT (op, frame, params); \ - trash_local_wipe (__local); \ - } while (0) +#define TRASH_SET_PID(frame, local) \ + do { \ + GF_ASSERT(!local->is_set_pid); \ + if (!local->is_set_pid) { \ + local->pid = frame->root->pid; \ + frame->root->pid = GF_SERVER_PID_TRASH; \ + local->is_set_pid = _gf_true; \ + } \ + } while (0) + +#define TRASH_UNSET_PID(frame, local) \ + do { \ + GF_ASSERT(local->is_set_pid); \ + if (local->is_set_pid) { \ + frame->root->pid = local->pid; \ + local->is_set_pid = _gf_false; \ + } \ + } while (0) + +#define TRASH_STACK_UNWIND(op, frame, params...) \ + do { \ + trash_local_t *__local = NULL; \ + __local = frame->local; \ + frame->local = NULL; \ + STACK_UNWIND_STRICT(op, frame, params); \ + trash_local_wipe(__local); \ + } while (0) #endif /* __TRASH_H__ */ diff --git a/xlators/features/upcall/src/Makefile.am b/xlators/features/upcall/src/Makefile.am index a81c36940e5..72b7f55ae0a 100644 --- a/xlators/features/upcall/src/Makefile.am +++ b/xlators/features/upcall/src/Makefile.am @@ -1,20 +1,22 @@ +if WITH_SERVER xlator_LTLIBRARIES = upcall.la +endif xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features -upcall_la_LDFLAGS = $(GF_XLATOR_DEFAULT_LDFLAGS) +upcall_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) upcall_la_SOURCES = upcall.c upcall-internal.c upcall_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ - $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \ - $(top_builddir)/rpc/xdr/src/libgfxdr.la + $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \ + $(top_builddir)/rpc/xdr/src/libgfxdr.la noinst_HEADERS = upcall.h upcall-mem-types.h upcall-messages.h \ upcall-cache-invalidation.h AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ - -I$(top_srcdir)/rpc/rpc-lib/src \ - -I$(top_srcdir)/rpc/xdr/src + -I$(top_srcdir)/rpc/rpc-lib/src \ + -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src AM_CFLAGS = -Wall -fno-strict-aliasing $(GF_CFLAGS) diff --git a/xlators/features/upcall/src/upcall-cache-invalidation.h b/xlators/features/upcall/src/upcall-cache-invalidation.h index 77286b063d8..db649b2c9a6 100644 --- a/xlators/features/upcall/src/upcall-cache-invalidation.h +++ b/xlators/features/upcall/src/upcall-cache-invalidation.h @@ -15,42 +15,4 @@ * events post its last access */ #define CACHE_INVALIDATION_TIMEOUT "60" -/* Flags sent for cache_invalidation */ -#define UP_NLINK 0x00000001 /* update nlink */ -#define UP_MODE 0x00000002 /* update mode and ctime */ -#define UP_OWN 0x00000004 /* update mode,uid,gid and ctime */ -#define UP_SIZE 0x00000008 /* update fsize */ -#define UP_TIMES 0x00000010 /* update all times */ -#define UP_ATIME 0x00000020 /* update atime only */ -#define UP_PERM 0x00000040 /* update fields needed for - permission checking */ -#define UP_RENAME 0x00000080 /* this is a rename op - - delete the cache entry */ -#define UP_FORGET 0x00000100 /* inode_forget on server side - - invalidate the cache entry */ -#define UP_PARENT_TIMES 0x00000200 /* update parent dir times */ - -/* for fops - open, read, lk, */ -#define UP_UPDATE_CLIENT (UP_ATIME) - -/* for fop - write, truncate */ -#define UP_WRITE_FLAGS (UP_SIZE | UP_TIMES) - -/* for fop - setattr */ -#define UP_ATTR_FLAGS (UP_SIZE | UP_TIMES | UP_OWN | \ - UP_MODE | UP_PERM) -/* for fop - rename */ -#define UP_RENAME_FLAGS (UP_RENAME) - -/* to invalidate parent directory entries for fops -rename, unlink, - * rmdir, mkdir, create */ -#define UP_PARENT_DENTRY_FLAGS (UP_PARENT_TIMES) - -/* for fop - unlink, link, rmdir, mkdir */ -#define UP_NLINK_FLAGS (UP_NLINK | UP_TIMES) - -/* xlator options */ -gf_boolean_t is_cache_invalidation_enabled(xlator_t *this); -int32_t get_cache_invalidation_timeout(xlator_t *this); - #endif /* __UPCALL_CACHE_INVALIDATION_H__ */ diff --git a/xlators/features/upcall/src/upcall-internal.c b/xlators/features/upcall/src/upcall-internal.c index 007a8f854bf..c641bd6f432 100644 --- a/xlators/features/upcall/src/upcall-internal.c +++ b/xlators/features/upcall/src/upcall-internal.c @@ -12,377 +12,287 @@ #include <fcntl.h> #include <limits.h> -#include "glusterfs.h" -#include "compat.h" -#include "xlator.h" -#include "inode.h" -#include "logging.h" -#include "common-utils.h" +#include <glusterfs/glusterfs.h> +#include <glusterfs/compat.h> +#include <glusterfs/xlator.h> +#include <glusterfs/logging.h> +#include <glusterfs/common-utils.h> -#include "statedump.h" -#include "syncop.h" +#include <glusterfs/statedump.h> +#include <glusterfs/syncop.h> #include "upcall.h" #include "upcall-mem-types.h" #include "glusterfs3-xdr.h" #include "protocol-common.h" -#include "defaults.h" +#include <glusterfs/defaults.h> /* * Check if any of the upcall options are enabled: * - cache_invalidation - * - XXX: lease_lk */ gf_boolean_t -is_upcall_enabled(xlator_t *this) { - upcall_private_t *priv = NULL; - gf_boolean_t is_enabled = _gf_false; - - if (this->private) { - priv = (upcall_private_t *)this->private; - - if (priv->cache_invalidation_enabled) { - is_enabled = _gf_true; - } - } - - return is_enabled; -} - -/* - * Check if any of cache_invalidation is enabled - */ -gf_boolean_t -is_cache_invalidation_enabled(xlator_t *this) { - upcall_private_t *priv = NULL; - gf_boolean_t is_enabled = _gf_false; - - if (this->private) { - priv = (upcall_private_t *)this->private; +is_upcall_enabled(xlator_t *this) +{ + upcall_private_t *priv = NULL; - if (priv->cache_invalidation_enabled) { - is_enabled = _gf_true; - } - } + if (this->private) { + priv = (upcall_private_t *)this->private; + return priv->cache_invalidation_enabled; + } - return is_enabled; + return _gf_false; } /* * Get the cache_invalidation_timeout */ -int32_t -get_cache_invalidation_timeout(xlator_t *this) { - upcall_private_t *priv = NULL; - int32_t timeout = 0; - - if (this->private) { - priv = (upcall_private_t *)this->private; - timeout = priv->cache_invalidation_timeout; - } - - return timeout; -} - -/* - * Allocate and add a new client entry to the given upcall entry - */ -upcall_client_t* -add_upcall_client (call_frame_t *frame, uuid_t gfid, - client_t *client, - upcall_inode_ctx_t *up_inode_ctx) +static int32_t +get_cache_invalidation_timeout(xlator_t *this) { - upcall_client_t *up_client_entry = NULL; + upcall_private_t *priv = NULL; - pthread_mutex_lock (&up_inode_ctx->client_list_lock); - { - up_client_entry = __add_upcall_client (frame, - gfid, - client, - up_inode_ctx); - } - pthread_mutex_unlock (&up_inode_ctx->client_list_lock); + if (this->private) { + priv = (upcall_private_t *)this->private; + return priv->cache_invalidation_timeout; + } - return up_client_entry; + return 0; } -upcall_client_t* -__add_upcall_client (call_frame_t *frame, uuid_t gfid, - client_t *client, - upcall_inode_ctx_t *up_inode_ctx) +static upcall_client_t * +__add_upcall_client(call_frame_t *frame, client_t *client, + upcall_inode_ctx_t *up_inode_ctx, time_t now) { - upcall_client_t *up_client_entry = NULL; - - up_client_entry = GF_CALLOC (1, sizeof(*up_client_entry), - gf_upcall_mt_upcall_client_entry_t); - if (!up_client_entry) { - gf_msg ("upcall", GF_LOG_WARNING, 0, - UPCALL_MSG_NO_MEMORY, - "Memory allocation failed"); - return NULL; - } - INIT_LIST_HEAD (&up_client_entry->client_list); - up_client_entry->client_uid = gf_strdup(client->client_uid); - up_client_entry->access_time = time(NULL); - up_client_entry->expire_time_attr = - get_cache_invalidation_timeout(frame->this); + upcall_client_t *up_client_entry = GF_MALLOC( + sizeof(*up_client_entry), gf_upcall_mt_upcall_client_entry_t); + if (!up_client_entry) { + gf_msg("upcall", GF_LOG_WARNING, 0, UPCALL_MSG_NO_MEMORY, + "Memory allocation failed"); + return NULL; + } + INIT_LIST_HEAD(&up_client_entry->client_list); + up_client_entry->client_uid = gf_strdup(client->client_uid); + up_client_entry->access_time = now; + up_client_entry->expire_time_attr = get_cache_invalidation_timeout( + frame->this); - list_add_tail (&up_client_entry->client_list, - &up_inode_ctx->client_list); + list_add_tail(&up_client_entry->client_list, &up_inode_ctx->client_list); - gf_log (THIS->name, GF_LOG_DEBUG, "upcall_entry_t client added - %s", - up_client_entry->client_uid); + gf_log(THIS->name, GF_LOG_DEBUG, "upcall_entry_t client added - %s", + up_client_entry->client_uid); - return up_client_entry; + return up_client_entry; } -/* - * Given gfid and client->uid, retrieve the corresponding upcall client entry. - * If none found, create a new entry. - */ -upcall_client_t* -__get_upcall_client (call_frame_t *frame, uuid_t gfid, client_t *client, - upcall_inode_ctx_t *up_inode_ctx) +static int +__upcall_inode_ctx_set(inode_t *inode, xlator_t *this) { - upcall_client_t *up_client_entry = NULL; - upcall_client_t *up_client = NULL; - upcall_client_t *tmp = NULL; - gf_boolean_t found_client = _gf_false; - - list_for_each_entry_safe (up_client_entry, tmp, - &up_inode_ctx->client_list, - client_list) { - if (strcmp(client->client_uid, - up_client_entry->client_uid) == 0) { - /* found client entry. Update the access_time */ - up_client_entry->access_time = time(NULL); - found_client = _gf_true; - gf_log (THIS->name, GF_LOG_DEBUG, - "upcall_entry_t client found - %s", - up_client_entry->client_uid); - break; - } - } - - if (!found_client) { /* create one */ - up_client_entry = __add_upcall_client (frame, gfid, client, - up_inode_ctx); - } - - return up_client_entry; + upcall_inode_ctx_t *inode_ctx = NULL; + upcall_private_t *priv = NULL; + int ret = -1; + uint64_t ctx = 0; + + priv = this->private; + GF_ASSERT(priv); + + ret = __inode_ctx_get(inode, this, &ctx); + + if (!ret) + goto out; + + inode_ctx = GF_MALLOC(sizeof(upcall_inode_ctx_t), + gf_upcall_mt_upcall_inode_ctx_t); + + if (!inode_ctx) { + ret = -ENOMEM; + goto out; + } + + pthread_mutex_init(&inode_ctx->client_list_lock, NULL); + INIT_LIST_HEAD(&inode_ctx->inode_ctx_list); + INIT_LIST_HEAD(&inode_ctx->client_list); + inode_ctx->destroy = 0; + gf_uuid_copy(inode_ctx->gfid, inode->gfid); + + ctx = (long)inode_ctx; + ret = __inode_ctx_set(inode, this, &ctx); + if (ret) { + gf_log(this->name, GF_LOG_DEBUG, "failed to set inode ctx (%p)", inode); + GF_FREE(inode_ctx); + goto out; + } + + /* add this inode_ctx to the global list */ + LOCK(&priv->inode_ctx_lk); + { + list_add_tail(&inode_ctx->inode_ctx_list, &priv->inode_ctx_list); + } + UNLOCK(&priv->inode_ctx_lk); +out: + return ret; } -int -__upcall_inode_ctx_set (inode_t *inode, xlator_t *this) +static upcall_inode_ctx_t * +__upcall_inode_ctx_get(inode_t *inode, xlator_t *this) { - upcall_inode_ctx_t *inode_ctx = NULL; - upcall_private_t *priv = NULL; - int ret = -1; - uint64_t ctx = 0; - - priv = this->private; - GF_ASSERT(priv); + upcall_inode_ctx_t *inode_ctx = NULL; + uint64_t ctx = 0; + int ret = 0; - ret = __inode_ctx_get (inode, this, &ctx); + ret = __inode_ctx_get(inode, this, &ctx); - if (!ret) - goto out; + if (ret < 0) { + ret = __upcall_inode_ctx_set(inode, this); + if (ret < 0) + goto out; - inode_ctx = GF_CALLOC (1, sizeof (upcall_inode_ctx_t), - gf_upcall_mt_upcall_inode_ctx_t); - - if (!inode_ctx) { - ret = -ENOMEM; - goto out; - } + ret = __inode_ctx_get(inode, this, &ctx); + if (ret < 0) + goto out; + } - pthread_mutex_init (&inode_ctx->client_list_lock, NULL); - INIT_LIST_HEAD (&inode_ctx->inode_ctx_list); - INIT_LIST_HEAD (&inode_ctx->client_list); - inode_ctx->destroy = 0; - - ctx = (long) inode_ctx; - ret = __inode_ctx_set (inode, this, &ctx); - if (ret) { - gf_log (this->name, GF_LOG_DEBUG, - "failed to set inode ctx (%p)", inode); - goto out; - } + inode_ctx = (upcall_inode_ctx_t *)(long)(ctx); - /* add this inode_ctx to the global list */ - LOCK (&priv->inode_ctx_lk); - { - list_add_tail (&inode_ctx->inode_ctx_list, - &priv->inode_ctx_list); - } - UNLOCK (&priv->inode_ctx_lk); out: - return ret; + return inode_ctx; } upcall_inode_ctx_t * -__upcall_inode_ctx_get (inode_t *inode, xlator_t *this) +upcall_inode_ctx_get(inode_t *inode, xlator_t *this) { - upcall_inode_ctx_t *inode_ctx = NULL; - uint64_t ctx = 0; - int ret = 0; - - ret = __inode_ctx_get (inode, this, &ctx); + upcall_inode_ctx_t *inode_ctx = NULL; - if (ret < 0) { - ret = __upcall_inode_ctx_set (inode, this); - if (ret < 0) - goto out; + LOCK(&inode->lock); + { + inode_ctx = __upcall_inode_ctx_get(inode, this); + } + UNLOCK(&inode->lock); - ret = __inode_ctx_get (inode, this, &ctx); - if (ret < 0) - goto out; - } - - inode_ctx = (upcall_inode_ctx_t *) (long) (ctx); - -out: - return inode_ctx; + return inode_ctx; } -upcall_inode_ctx_t * -upcall_inode_ctx_get (inode_t *inode, xlator_t *this) +static int +__upcall_cleanup_client_entry(upcall_client_t *up_client) { - upcall_inode_ctx_t *inode_ctx = NULL; + list_del_init(&up_client->client_list); - LOCK (&inode->lock); - { - inode_ctx = __upcall_inode_ctx_get (inode, this); - } - UNLOCK (&inode->lock); + GF_FREE(up_client->client_uid); + GF_FREE(up_client); - return inode_ctx; + return 0; } -int -upcall_cleanup_expired_clients (xlator_t *this, - upcall_inode_ctx_t *up_inode_ctx) { +static int +upcall_cleanup_expired_clients(xlator_t *this, upcall_inode_ctx_t *up_inode_ctx, + time_t now) +{ + upcall_client_t *up_client = NULL; + upcall_client_t *tmp = NULL; + int ret = -1; + time_t timeout = 0; + time_t t_expired = 0; + + timeout = get_cache_invalidation_timeout(this); + + pthread_mutex_lock(&up_inode_ctx->client_list_lock); + { + list_for_each_entry_safe(up_client, tmp, &up_inode_ctx->client_list, + client_list) + { + t_expired = now - up_client->access_time; - upcall_client_t *up_client = NULL; - upcall_client_t *tmp = NULL; - int ret = -1; - time_t timeout = 0; - time_t t_expired = 0; + if (t_expired > (2 * timeout)) { + gf_log(THIS->name, GF_LOG_TRACE, "Cleaning up client_entry(%s)", + up_client->client_uid); - timeout = get_cache_invalidation_timeout(this); + ret = __upcall_cleanup_client_entry(up_client); - pthread_mutex_lock (&up_inode_ctx->client_list_lock); - { - list_for_each_entry_safe (up_client, - tmp, - &up_inode_ctx->client_list, - client_list) { - t_expired = time(NULL) - - up_client->access_time; - - if (t_expired > (2*timeout)) { - ret = - __upcall_cleanup_client_entry (up_client); - - if (ret) { - gf_msg ("upcall", GF_LOG_WARNING, 0, - UPCALL_MSG_INTERNAL_ERROR, - "Client entry cleanup failed (%p)", - up_client); - goto out; - } - gf_log (THIS->name, GF_LOG_TRACE, - "Cleaned up client_entry(%s)", - up_client->client_uid); - } + if (ret) { + gf_msg("upcall", GF_LOG_WARNING, 0, + UPCALL_MSG_INTERNAL_ERROR, + "Client entry cleanup failed (%p)", up_client); + goto out; } + } } - pthread_mutex_unlock (&up_inode_ctx->client_list_lock); + } + pthread_mutex_unlock(&up_inode_ctx->client_list_lock); - ret = 0; + ret = 0; out: - return ret; -} - -int -__upcall_cleanup_client_entry (upcall_client_t *up_client) -{ - list_del_init (&up_client->client_list); - - GF_FREE (up_client->client_uid); - GF_FREE (up_client); - - return 0; + return ret; } /* * Free Upcall inode_ctx client list */ int -__upcall_cleanup_inode_ctx_client_list (upcall_inode_ctx_t *inode_ctx) +__upcall_cleanup_inode_ctx_client_list(upcall_inode_ctx_t *inode_ctx) { - upcall_client_t *up_client = NULL; - upcall_client_t *tmp = NULL; + upcall_client_t *up_client = NULL; + upcall_client_t *tmp = NULL; - list_for_each_entry_safe (up_client, tmp, - &inode_ctx->client_list, - client_list) { - __upcall_cleanup_client_entry (up_client); - } + list_for_each_entry_safe(up_client, tmp, &inode_ctx->client_list, + client_list) + { + __upcall_cleanup_client_entry(up_client); + } - return 0; + return 0; } +static void +upcall_cache_forget(xlator_t *this, inode_t *inode, + upcall_inode_ctx_t *up_inode_ctx); + /* * Free upcall_inode_ctx */ int -upcall_cleanup_inode_ctx (xlator_t *this, inode_t *inode) +upcall_cleanup_inode_ctx(xlator_t *this, inode_t *inode) { - uint64_t ctx = 0; - upcall_inode_ctx_t *inode_ctx = NULL; - int ret = 0; - upcall_private_t *priv = NULL; - - priv = this->private; - GF_ASSERT(priv); - - ret = inode_ctx_del (inode, this, &ctx); - - if (ret < 0) { - gf_msg ("upcall", GF_LOG_WARNING, 0, - UPCALL_MSG_INTERNAL_ERROR, - "Failed to del upcall_inode_ctx (%p)", - inode); - goto out; - } + uint64_t ctx = 0; + upcall_inode_ctx_t *inode_ctx = NULL; + int ret = 0; + upcall_private_t *priv = NULL; - inode_ctx = (upcall_inode_ctx_t *)(long) ctx; + priv = this->private; + GF_ASSERT(priv); - if (inode_ctx) { + ret = inode_ctx_del(inode, this, &ctx); - /* Invalidate all the upcall cache entries */ - upcall_cache_forget (this, inode, inode_ctx); + if (ret < 0) { + gf_msg("upcall", GF_LOG_WARNING, 0, UPCALL_MSG_INTERNAL_ERROR, + "Failed to del upcall_inode_ctx (%p)", inode); + goto out; + } - /* do we really need lock? yes now reaper thread - * may also be trying to cleanup the client entries. - */ - pthread_mutex_lock (&inode_ctx->client_list_lock); - { - if (!list_empty (&inode_ctx->client_list)) { - __upcall_cleanup_inode_ctx_client_list (inode_ctx); - } - } - pthread_mutex_unlock (&inode_ctx->client_list_lock); + inode_ctx = (upcall_inode_ctx_t *)(long)ctx; - /* Mark the inode_ctx to be destroyed */ - inode_ctx->destroy = 1; - gf_msg_debug ("upcall", 0, "set upcall_inode_ctx (%p) to destroy mode", - inode_ctx); + if (inode_ctx) { + /* Invalidate all the upcall cache entries */ + upcall_cache_forget(this, inode, inode_ctx); + + /* do we really need lock? yes now reaper thread + * may also be trying to cleanup the client entries. + */ + pthread_mutex_lock(&inode_ctx->client_list_lock); + { + if (!list_empty(&inode_ctx->client_list)) { + __upcall_cleanup_inode_ctx_client_list(inode_ctx); + } } + pthread_mutex_unlock(&inode_ctx->client_list_lock); + + /* Mark the inode_ctx to be destroyed */ + inode_ctx->destroy = 1; + gf_msg_debug("upcall", 0, "set upcall_inode_ctx (%p) to destroy mode", + inode_ctx); + } out: - return ret; + return ret; } /* @@ -391,74 +301,165 @@ out: * which is no longer valid and has destroy bit set. */ void * -upcall_reaper_thread (void *data) +upcall_reaper_thread(void *data) { - upcall_private_t *priv = NULL; - upcall_inode_ctx_t *inode_ctx = NULL; - upcall_inode_ctx_t *tmp = NULL; - xlator_t *this = NULL; - time_t timeout = 0; - - this = (xlator_t *)data; - GF_ASSERT (this); - - priv = this->private; - GF_ASSERT (priv); - - - while (!priv->fini) { - list_for_each_entry_safe (inode_ctx, tmp, - &priv->inode_ctx_list, - inode_ctx_list) { - - /* cleanup expired clients */ - upcall_cleanup_expired_clients (this, inode_ctx); - - if (!inode_ctx->destroy) { - continue; - } - - LOCK (&priv->inode_ctx_lk); - { - /* client list would have been cleaned up*/ - gf_msg_debug ("upcall", 0, "Freeing upcall_inode_ctx (%p)", - inode_ctx); - list_del_init (&inode_ctx->inode_ctx_list); - pthread_mutex_destroy (&inode_ctx->client_list_lock); - GF_FREE (inode_ctx); - inode_ctx = NULL; - } - UNLOCK (&priv->inode_ctx_lk); - } - - /* don't do a very busy loop */ - timeout = get_cache_invalidation_timeout (this); - sleep (timeout / 2); + upcall_private_t *priv = NULL; + upcall_inode_ctx_t *inode_ctx = NULL; + upcall_inode_ctx_t *tmp = NULL; + xlator_t *this = NULL; + time_t timeout = 0; + time_t time_now; + + this = (xlator_t *)data; + GF_ASSERT(this); + + priv = this->private; + GF_ASSERT(priv); + + time_now = gf_time(); + while (!priv->fini) { + list_for_each_entry_safe(inode_ctx, tmp, &priv->inode_ctx_list, + inode_ctx_list) + { + /* cleanup expired clients */ + upcall_cleanup_expired_clients(this, inode_ctx, time_now); + + if (!inode_ctx->destroy) { + continue; + } + + /* client list would have been cleaned up*/ + gf_msg_debug("upcall", 0, "Freeing upcall_inode_ctx (%p)", + inode_ctx); + LOCK(&priv->inode_ctx_lk); + { + list_del_init(&inode_ctx->inode_ctx_list); + pthread_mutex_destroy(&inode_ctx->client_list_lock); + } + UNLOCK(&priv->inode_ctx_lk); + GF_FREE(inode_ctx); + inode_ctx = NULL; } - return NULL; + /* don't do a very busy loop */ + timeout = get_cache_invalidation_timeout(this); + sleep(timeout / 2); + time_now = gf_time(); + } + + return NULL; } /* * Initialize upcall reaper thread. */ int -upcall_reaper_thread_init (xlator_t *this) +upcall_reaper_thread_init(xlator_t *this) +{ + upcall_private_t *priv = NULL; + int ret = -1; + + priv = this->private; + GF_ASSERT(priv); + + ret = gf_thread_create(&priv->reaper_thr, NULL, upcall_reaper_thread, this, + "upreaper"); + + return ret; +} + +int +up_compare_afr_xattr(dict_t *d, char *k, data_t *v, void *tmp) +{ + dict_t *dict = tmp; + + if (!strncmp(k, AFR_XATTR_PREFIX, SLEN(AFR_XATTR_PREFIX)) && + (!is_data_equal(v, dict_get(dict, k)))) + return -1; + + return 0; +} + +static void +up_filter_afr_xattr(dict_t *xattrs, char *xattr, data_t *v) { - upcall_private_t *priv = NULL; - int ret = -1; + /* Filter the afr pending xattrs, with value 0. Ideally this should + * be executed only in case of xattrop and not in set and removexattr, + * butset and remove xattr fops do not come with keys AFR_XATTR_PREFIX + */ + if (!strncmp(xattr, AFR_XATTR_PREFIX, SLEN(AFR_XATTR_PREFIX)) && + (mem_0filled(v->data, v->len) == 0)) { + dict_del(xattrs, xattr); + } + return; +} + +static gf_boolean_t +up_key_is_regd_xattr(dict_t *regd_xattrs, char *regd_xattr, data_t *v, + void *xattr) +{ + int ret = _gf_false; + char *key = xattr; + + if (fnmatch(regd_xattr, key, 0) == 0) + ret = _gf_true; - priv = this->private; - GF_ASSERT (priv); + return ret; +} - ret = pthread_create (&priv->reaper_thr, NULL, - upcall_reaper_thread, this); +int +up_filter_unregd_xattr(dict_t *xattrs, char *xattr, data_t *v, + void *regd_xattrs) +{ + int ret = 0; - return ret; + ret = dict_foreach_match(regd_xattrs, up_key_is_regd_xattr, xattr, + dict_null_foreach_fn, NULL); + if (ret == 0) { + /* xattr was not found in the registered xattr, hence do not + * send notification for its change + */ + dict_del(xattrs, xattr); + goto out; + } + up_filter_afr_xattr(xattrs, xattr, v); +out: + return 0; +} + +int +up_filter_xattr(dict_t *xattr, dict_t *regd_xattrs) +{ + int ret = 0; + + ret = dict_foreach(xattr, up_filter_unregd_xattr, regd_xattrs); + + return ret; +} + +static void +upcall_client_cache_invalidate(xlator_t *this, uuid_t gfid, + upcall_client_t *up_client_entry, uint32_t flags, + struct iatt *stbuf, struct iatt *p_stbuf, + struct iatt *oldp_stbuf, dict_t *xattr, + time_t now); + +gf_boolean_t +up_invalidate_needed(dict_t *xattrs) +{ + if (dict_key_count(xattrs) == 0) { + gf_msg_trace("upcall", 0, + "None of xattrs requested for" + " invalidation, were changed. Nothing to " + "invalidate"); + return _gf_false; + } + + return _gf_true; } /* - * Given a gfid, client, first fetch upcall_entry_t based on gfid. + * Given a client, first fetch upcall_entry_t from the inode_ctx client list. * Later traverse through the client list of that upcall entry. If this client * is not present in the list, create one client entry with this client info. * Also check if there are other clients which need to be notified of this @@ -468,184 +469,221 @@ upcall_reaper_thread_init (xlator_t *this) * any errors during the process are logged and ignored. */ void -upcall_cache_invalidate (call_frame_t *frame, xlator_t *this, client_t *client, - inode_t *inode, uint32_t flags, struct iatt *stbuf, - struct iatt *p_stbuf, struct iatt *oldp_stbuf) +upcall_cache_invalidate(call_frame_t *frame, xlator_t *this, client_t *client, + inode_t *inode, uint32_t flags, struct iatt *stbuf, + struct iatt *p_stbuf, struct iatt *oldp_stbuf, + dict_t *xattr) { - upcall_client_t *up_client = NULL; - upcall_client_t *up_client_entry = NULL; - upcall_client_t *tmp = NULL; - upcall_inode_ctx_t *up_inode_ctx = NULL; - gf_boolean_t found = _gf_false; - - if (!is_cache_invalidation_enabled(this)) - return; - - /* server-side generated fops like quota/marker will not have any - * client associated with them. Ignore such fops. - */ - if (!client) { - gf_msg_debug ("upcall", 0, "Internal fop - client NULL"); - return; - } - - up_inode_ctx = ((upcall_local_t *)frame->local)->upcall_inode_ctx; - - if (!up_inode_ctx) - up_inode_ctx = upcall_inode_ctx_get (inode, this); - - if (!up_inode_ctx) { - gf_msg ("upcall", GF_LOG_WARNING, 0, - UPCALL_MSG_INTERNAL_ERROR, - "upcall_inode_ctx_get failed (%p)", - inode); - return; + upcall_client_t *up_client_entry = NULL; + upcall_client_t *tmp = NULL; + upcall_inode_ctx_t *up_inode_ctx = NULL; + gf_boolean_t found = _gf_false; + time_t time_now; + inode_t *linked_inode = NULL; + + if (!is_upcall_enabled(this)) + return; + + /* server-side generated fops like quota/marker will not have any + * client associated with them. Ignore such fops. + */ + if (!client) { + gf_msg_debug("upcall", 0, "Internal fop - client NULL"); + return; + } + + /* For nameless LOOKUPs, inode created shall always be + * invalid. Hence check if there is any already linked inode. + * If yes, update the inode_ctx of that valid inode + */ + if (inode && (inode->ia_type == IA_INVAL) && stbuf) { + linked_inode = inode_find(inode->table, stbuf->ia_gfid); + if (linked_inode) { + gf_log("upcall", GF_LOG_DEBUG, + "upcall_inode_ctx_get of linked inode (%p)", inode); + up_inode_ctx = upcall_inode_ctx_get(linked_inode, this); } - - pthread_mutex_lock (&up_inode_ctx->client_list_lock); + } + + if (inode && !up_inode_ctx) + up_inode_ctx = upcall_inode_ctx_get(inode, this); + + if (!up_inode_ctx) { + gf_msg("upcall", GF_LOG_WARNING, 0, UPCALL_MSG_INTERNAL_ERROR, + "upcall_inode_ctx_get failed (%p)", inode); + return; + } + + /* In case of LOOKUP, if first time, inode created shall be + * invalid till it gets linked to inode table. Read gfid from + * the stat returned in such cases. + */ + if (gf_uuid_is_null(up_inode_ctx->gfid) && stbuf) { + /* That means inode must have been invalid when this inode_ctx + * is created. Copy the gfid value from stbuf instead. + */ + gf_uuid_copy(up_inode_ctx->gfid, stbuf->ia_gfid); + } + + if (gf_uuid_is_null(up_inode_ctx->gfid)) { + gf_msg_debug(this->name, 0, + "up_inode_ctx->gfid and " + "stbuf->ia_gfid is NULL, fop:%s", + gf_fop_list[frame->root->op]); + goto out; + } + + time_now = gf_time(); + pthread_mutex_lock(&up_inode_ctx->client_list_lock); + { + list_for_each_entry_safe(up_client_entry, tmp, + &up_inode_ctx->client_list, client_list) { - list_for_each_entry_safe (up_client_entry, tmp, - &up_inode_ctx->client_list, - client_list) { - - if (!strcmp(client->client_uid, - up_client_entry->client_uid)) { - up_client_entry->access_time = time(NULL); - found = _gf_true; - } - - /* - * Ignore sending notifications in case of only UP_ATIME - */ - if (!(flags & ~(UP_ATIME))) { - if (found) - break; - else /* we still need to find current client entry*/ - continue; - } - - /* any other client */ - - /* XXX: Send notifications asynchrounously - * instead of in the I/O path - BZ 1200264 - * Also if the file is frequently accessed, set - * expire_time_attr to 0. - */ - upcall_client_cache_invalidate(this, - inode->gfid, - up_client_entry, - flags, stbuf, - p_stbuf, oldp_stbuf); - } + /* Do not send UPCALL event if same client. */ + if (!strcmp(client->client_uid, up_client_entry->client_uid)) { + up_client_entry->access_time = time_now; + found = _gf_true; + continue; + } + + /* + * Ignore sending notifications in case of only UP_ATIME + */ + if (!(flags & ~(UP_ATIME))) { + if (found) + break; + else /* we still need to find current client entry*/ + continue; + } + + /* any other client */ + + /* XXX: Send notifications asynchrounously + * instead of in the I/O path - BZ 1200264 + * Also if the file is frequently accessed, set + * expire_time_attr to 0. + */ + upcall_client_cache_invalidate( + this, up_inode_ctx->gfid, up_client_entry, flags, stbuf, + p_stbuf, oldp_stbuf, xattr, time_now); + } - if (!found) { - up_client_entry = __add_upcall_client (frame, - inode->gfid, - client, - up_inode_ctx); - } + if (!found) { + up_client_entry = __add_upcall_client(frame, client, up_inode_ctx, + time_now); } - pthread_mutex_unlock (&up_inode_ctx->client_list_lock); + } + pthread_mutex_unlock(&up_inode_ctx->client_list_lock); +out: + /* release the ref from inode_find */ + if (linked_inode) + inode_unref(linked_inode); + return; } /* * If the upcall_client_t has recently accessed the file (i.e, within * priv->cache_invalidation_timeout), send a upcall notification. */ -void -upcall_client_cache_invalidate (xlator_t *this, uuid_t gfid, - upcall_client_t *up_client_entry, - uint32_t flags, struct iatt *stbuf, - struct iatt *p_stbuf, - struct iatt *oldp_stbuf) +static void +upcall_client_cache_invalidate(xlator_t *this, uuid_t gfid, + upcall_client_t *up_client_entry, uint32_t flags, + struct iatt *stbuf, struct iatt *p_stbuf, + struct iatt *oldp_stbuf, dict_t *xattr, + time_t now) { - struct gf_upcall up_req = {0,}; - struct gf_upcall_cache_invalidation ca_req = {0,}; - time_t timeout = 0; - int ret = -1; - time_t t_expired = time(NULL) - up_client_entry->access_time; + struct gf_upcall up_req = { + 0, + }; + struct gf_upcall_cache_invalidation ca_req = { + 0, + }; + time_t timeout = 0; + int ret = -1; + time_t t_expired = now - up_client_entry->access_time; + + GF_VALIDATE_OR_GOTO("upcall_client_cache_invalidate", + !(gf_uuid_is_null(gfid)), out); + timeout = get_cache_invalidation_timeout(this); + + if (t_expired < timeout) { + /* Send notify call */ + up_req.client_uid = up_client_entry->client_uid; + gf_uuid_copy(up_req.gfid, gfid); + + ca_req.flags = flags; + ca_req.expire_time_attr = up_client_entry->expire_time_attr; + if (stbuf) + ca_req.stat = *stbuf; + if (p_stbuf) + ca_req.p_stat = *p_stbuf; + if (oldp_stbuf) + ca_req.oldp_stat = *oldp_stbuf; + ca_req.dict = xattr; + + up_req.data = &ca_req; + up_req.event_type = GF_UPCALL_CACHE_INVALIDATION; + + gf_log(THIS->name, GF_LOG_TRACE, + "Cache invalidation notification sent to %s", + up_client_entry->client_uid); + + /* Need to send inode flags */ + ret = this->notify(this, GF_EVENT_UPCALL, &up_req); + + /* + * notify may fail as the client could have been + * dis(re)connected. Cleanup the client entry. + */ + if (ret < 0) + __upcall_cleanup_client_entry(up_client_entry); - timeout = get_cache_invalidation_timeout(this); + } else { + gf_log(THIS->name, GF_LOG_TRACE, + "Cache invalidation notification NOT sent to %s", + up_client_entry->client_uid); - if (t_expired < timeout) { - /* Send notify call */ - up_req.client_uid = up_client_entry->client_uid; - gf_uuid_copy (up_req.gfid, gfid); - - ca_req.flags = flags; - ca_req.expire_time_attr = - up_client_entry->expire_time_attr; - if (stbuf) - ca_req.stat = *stbuf; - if (p_stbuf) - ca_req.p_stat = *p_stbuf; - if (oldp_stbuf) - ca_req.oldp_stat = *oldp_stbuf; - - up_req.data = &ca_req; - up_req.event_type = GF_UPCALL_CACHE_INVALIDATION; - - gf_log (THIS->name, GF_LOG_TRACE, - "Cache invalidation notification sent to %s", - up_client_entry->client_uid); - - /* Need to send inode flags */ - ret = this->notify (this, GF_EVENT_UPCALL, &up_req); - - /* - * notify may fail as the client could have been - * dis(re)connected. Cleanup the client entry. - */ - if (ret < 0) - __upcall_cleanup_client_entry (up_client_entry); - - } else { - gf_log (THIS->name, GF_LOG_TRACE, - "Cache invalidation notification NOT sent to %s", - up_client_entry->client_uid); - - if (t_expired > (2*timeout)) { - /* Cleanup the entry */ - __upcall_cleanup_client_entry (up_client_entry); - } + if (t_expired > (2 * timeout)) { + /* Cleanup the entry */ + __upcall_cleanup_client_entry(up_client_entry); } + } +out: + return; } /* - * This is called during upcall_inode_ctx cleanup incase of 'inode_forget'. + * This is called during upcall_inode_ctx cleanup in case of 'inode_forget'. * Send "UP_FORGET" to all the clients so that they invalidate their cache * entry and do a fresh lookup next time when any I/O comes in. */ -void -upcall_cache_forget (xlator_t *this, inode_t *inode, upcall_inode_ctx_t *up_inode_ctx) +static void +upcall_cache_forget(xlator_t *this, inode_t *inode, + upcall_inode_ctx_t *up_inode_ctx) { - upcall_client_t *up_client = NULL; - upcall_client_t *up_client_entry = NULL; - upcall_client_t *tmp = NULL; - uint32_t flags = 0; - - if (!up_inode_ctx) { - return; - } - - pthread_mutex_lock (&up_inode_ctx->client_list_lock); + upcall_client_t *up_client_entry = NULL; + upcall_client_t *tmp = NULL; + uint32_t flags = UP_FORGET; + time_t time_now; + + if (!up_inode_ctx) { + return; + } + + time_now = gf_time(); + pthread_mutex_lock(&up_inode_ctx->client_list_lock); + { + list_for_each_entry_safe(up_client_entry, tmp, + &up_inode_ctx->client_list, client_list) { - list_for_each_entry_safe (up_client_entry, tmp, - &up_inode_ctx->client_list, - client_list) { - flags = UP_FORGET; - - /* Set the access time to time(NULL) - * to send notify */ - up_client_entry->access_time = time(NULL); - - upcall_client_cache_invalidate(this, - inode->gfid, - up_client_entry, - flags, NULL, - NULL, NULL); - } + /* Set the access time to gf_time() + * to send notify */ + up_client_entry->access_time = time_now; + upcall_client_cache_invalidate(this, up_inode_ctx->gfid, + up_client_entry, flags, NULL, NULL, + NULL, NULL, time_now); } - pthread_mutex_unlock (&up_inode_ctx->client_list_lock); + } + pthread_mutex_unlock(&up_inode_ctx->client_list_lock); } diff --git a/xlators/features/upcall/src/upcall-mem-types.h b/xlators/features/upcall/src/upcall-mem-types.h index 55793ec65ca..f9883d9d72c 100644 --- a/xlators/features/upcall/src/upcall-mem-types.h +++ b/xlators/features/upcall/src/upcall-mem-types.h @@ -11,14 +11,13 @@ #ifndef __UPCALL_MEM_TYPES_H__ #define __UPCALL_MEM_TYPES_H__ -#include "mem-types.h" +#include <glusterfs/mem-types.h> enum gf_upcall_mem_types_ { - gf_upcall_mt_conf_t = gf_common_mt_end + 1, - gf_upcall_mt_private_t, - gf_upcall_mt_upcall_inode_ctx_t, - gf_upcall_mt_upcall_client_entry_t, - gf_upcall_mt_end + gf_upcall_mt_conf_t = gf_common_mt_end + 1, + gf_upcall_mt_private_t, + gf_upcall_mt_upcall_inode_ctx_t, + gf_upcall_mt_upcall_client_entry_t, + gf_upcall_mt_end }; #endif - diff --git a/xlators/features/upcall/src/upcall-messages.h b/xlators/features/upcall/src/upcall-messages.h index 0cfdfd68b77..4095a34c200 100644 --- a/xlators/features/upcall/src/upcall-messages.h +++ b/xlators/features/upcall/src/upcall-messages.h @@ -11,49 +11,19 @@ #ifndef _UPCALL_MESSAGES_H_ #define _UPCALL_MESSAGES_H_ -#include "glfs-message-id.h" - -/*! \file upcall-messages.h - * \brief UPCALL log-message IDs and their descriptions. - */ - -/* NOTE: Rules for message additions - * 1) Each instance of a message is _better_ left with a unique message ID, even - * if the message format is the same. Reasoning is that, if the message - * format needs to change in one instance, the other instances are not - * impacted or the new change does not change the ID of the instance being - * modified. - * 2) Addition of a message, - * - Should increment the GLFS_NUM_MESSAGES - * - Append to the list of messages defined, towards the end - * - Retain macro naming as glfs_msg_X (for redability across developers) - * NOTE: Rules for message format modifications - * 3) Check across the code if the message ID macro in question is reused - * anywhere. If reused then then the modifications should ensure correctness - * everywhere, or needs a new message ID as (1) above was not adhered to. If - * not used anywhere, proceed with the required modification. - * NOTE: Rules for message deletion - * 4) Check (3) and if used anywhere else, then cannot be deleted. If not used - * anywhere, then can be deleted, but will leave a hole by design, as - * addition rules specify modification to the end of the list and not filling - * holes. - */ - -#define GLFS_COMP_BASE_UPCALL GLFS_MSGID_COMP_UPCALL -#define GLFS_NUM_MESSAGES 1 -#define GLFS_MSGID_END (GLFS_COMP_BASE_UPCALL + GLFS_NUM_MESSAGES + 1) - -#define glfs_msg_start_x GLFS_COMP_BASE_UPCALL, "Invalid: Start of messages" - -/*! - * @messageid 110001 - * @diagnosis Out of Memory - * @recommendedaction None +#include <glusterfs/glfs-message-id.h> + +/* To add new message IDs, append new identifiers at the end of the list. + * + * Never remove a message ID. If it's not used anymore, you can rename it or + * leave it as it is, but not delete it. This is to prevent reutilization of + * IDs by other messages. + * + * The component name must match one of the entries defined in + * glfs-message-id.h. */ -#define UPCALL_MSG_NO_MEMORY (GLFS_COMP_BASE_UPCALL + 1) -#define UPCALL_MSG_INTERNAL_ERROR (GLFS_COMP_BASE_UPCALL + 2) -#define UPCALL_MSG_NOTIFY_FAILED (GLFS_COMP_BASE_UPCALL + 3) -#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages" +GLFS_MSGID(UPCALL, UPCALL_MSG_NO_MEMORY, UPCALL_MSG_INTERNAL_ERROR, + UPCALL_MSG_NOTIFY_FAILED); #endif /* !_UPCALL_MESSAGES_H_ */ diff --git a/xlators/features/upcall/src/upcall.c b/xlators/features/upcall/src/upcall.c index fc04d4d5d51..0795f58059d 100644 --- a/xlators/features/upcall/src/upcall.c +++ b/xlators/features/upcall/src/upcall.c @@ -13,1821 +13,2493 @@ #include <limits.h> #include <pthread.h> -#include "glusterfs.h" -#include "compat.h" -#include "xlator.h" -#include "inode.h" -#include "logging.h" -#include "common-utils.h" +#include <glusterfs/glusterfs.h> +#include <glusterfs/compat.h> +#include <glusterfs/xlator.h> +#include <glusterfs/logging.h> +#include <glusterfs/common-utils.h> -#include "statedump.h" -#include "syncop.h" +#include <glusterfs/statedump.h> #include "upcall.h" #include "upcall-mem-types.h" #include "glusterfs3-xdr.h" #include "protocol-common.h" -#include "defaults.h" +#include <glusterfs/defaults.h> -int32_t -up_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) +static int32_t +up_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, fd_t *fd, dict_t *xdata) { - client_t *client = NULL; - uint32_t flags = 0; - upcall_local_t *local = NULL; + client_t *client = NULL; + uint32_t flags = 0; + upcall_local_t *local = NULL; - EXIT_IF_UPCALL_OFF (this, out); + EXIT_IF_UPCALL_OFF(this, out); - client = frame->root->client; - local = frame->local; + client = frame->root->client; + local = frame->local; - if ((op_ret < 0) || !local) { - goto out; - } - flags = UP_UPDATE_CLIENT; - upcall_cache_invalidate (frame, this, client, local->inode, flags, - NULL, NULL, NULL); + if ((op_ret < 0) || !local) { + goto out; + } + flags = UP_UPDATE_CLIENT; + upcall_cache_invalidate(frame, this, client, local->inode, flags, NULL, + NULL, NULL, NULL); out: - UPCALL_STACK_UNWIND (open, frame, op_ret, op_errno, fd, xdata); + UPCALL_STACK_UNWIND(open, frame, op_ret, op_errno, fd, xdata); - return 0; + return 0; } - -int32_t -up_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - fd_t *fd, dict_t *xdata) +static int32_t +up_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + fd_t *fd, dict_t *xdata) { - int32_t op_errno = -1; - upcall_local_t *local = NULL; + int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; - EXIT_IF_UPCALL_OFF (this, out); + EXIT_IF_UPCALL_OFF(this, out); - local = upcall_local_init (frame, this, fd->inode); - if (!local) { - op_errno = ENOMEM; - goto err; - } + local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); + if (!local) { + goto err; + } out: - STACK_WIND (frame, up_open_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->open, - loc, flags, fd, xdata); + STACK_WIND(frame, up_open_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); - return 0; + return 0; err: - UPCALL_STACK_UNWIND (open, frame, -1, op_errno, NULL, NULL); + UPCALL_STACK_UNWIND(open, frame, -1, op_errno, NULL, NULL); - return 0; + return 0; } -int32_t -up_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) +static int32_t +up_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, struct iatt *prebuf, struct iatt *postbuf, + dict_t *xdata) { - client_t *client = NULL; - uint32_t flags = 0; - upcall_local_t *local = NULL; + client_t *client = NULL; + uint32_t flags = 0; + upcall_local_t *local = NULL; - client = frame->root->client; - local = frame->local; + client = frame->root->client; + local = frame->local; - if ((op_ret < 0) || !local) { - goto out; - } - flags = UP_WRITE_FLAGS; - upcall_cache_invalidate (frame, this, client, local->inode, flags, - postbuf, NULL, NULL); + if ((op_ret < 0) || !local) { + goto out; + } + flags = UP_WRITE_FLAGS; + upcall_cache_invalidate(frame, this, client, local->inode, flags, postbuf, + NULL, NULL, NULL); out: - UPCALL_STACK_UNWIND (writev, frame, op_ret, op_errno, - prebuf, postbuf, xdata); + UPCALL_STACK_UNWIND(writev, frame, op_ret, op_errno, prebuf, postbuf, + xdata); - return 0; + return 0; } - -int32_t -up_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iovec *vector, int count, off_t off, uint32_t flags, - struct iobref *iobref, dict_t *xdata) +static int32_t +up_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, + int count, off_t off, uint32_t flags, struct iobref *iobref, + dict_t *xdata) { - int32_t op_errno = -1; - upcall_local_t *local = NULL; + int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; - EXIT_IF_UPCALL_OFF (this, out); + EXIT_IF_UPCALL_OFF(this, out); - local = upcall_local_init (frame, this, fd->inode); - if (!local) { - op_errno = ENOMEM; - goto err; - } + local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); + if (!local) { + goto err; + } out: - STACK_WIND (frame, up_writev_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev, - fd, vector, count, off, flags, iobref, xdata); + STACK_WIND(frame, up_writev_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->writev, fd, vector, count, off, flags, + iobref, xdata); - return 0; + return 0; err: - UPCALL_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL); + UPCALL_STACK_UNWIND(writev, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + return 0; } - -int32_t -up_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, - struct iovec *vector, int count, struct iatt *stbuf, - struct iobref *iobref, dict_t *xdata) +static int32_t +up_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, struct iovec *vector, int count, struct iatt *stbuf, + struct iobref *iobref, dict_t *xdata) { - client_t *client = NULL; - uint32_t flags = 0; - upcall_local_t *local = NULL; + client_t *client = NULL; + uint32_t flags = 0; + upcall_local_t *local = NULL; - EXIT_IF_UPCALL_OFF (this, out); + EXIT_IF_UPCALL_OFF(this, out); - client = frame->root->client; - local = frame->local; + client = frame->root->client; + local = frame->local; - if ((op_ret < 0) || !local) { - goto out; - } - flags = UP_UPDATE_CLIENT; - upcall_cache_invalidate (frame, this, client, local->inode, flags, - NULL, NULL, NULL); + if ((op_ret < 0) || !local) { + goto out; + } + flags = UP_UPDATE_CLIENT; + upcall_cache_invalidate(frame, this, client, local->inode, flags, stbuf, + NULL, NULL, NULL); out: - UPCALL_STACK_UNWIND (readv, frame, op_ret, op_errno, vector, - count, stbuf, iobref, xdata); + UPCALL_STACK_UNWIND(readv, frame, op_ret, op_errno, vector, count, stbuf, + iobref, xdata); - return 0; + return 0; } -int32_t -up_readv (call_frame_t *frame, xlator_t *this, - fd_t *fd, size_t size, off_t offset, - uint32_t flags, dict_t *xdata) +static int32_t +up_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) { - int32_t op_errno = -1; - upcall_local_t *local = NULL; + int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; - EXIT_IF_UPCALL_OFF (this, out); + EXIT_IF_UPCALL_OFF(this, out); - local = upcall_local_init (frame, this, fd->inode); - if (!local) { - op_errno = ENOMEM; - goto err; - } + local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); + if (!local) { + goto err; + } out: - STACK_WIND (frame, up_readv_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->readv, - fd, size, offset, flags, xdata); + STACK_WIND(frame, up_readv_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata); - return 0; + return 0; err: - UPCALL_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, - NULL, NULL, NULL); + UPCALL_STACK_UNWIND(readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL); - return 0; + return 0; } -int32_t -up_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct gf_flock *lock, - dict_t *xdata) +static int32_t +up_lk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct gf_flock *lock, dict_t *xdata) { - client_t *client = NULL; - uint32_t flags = 0; - upcall_local_t *local = NULL; + client_t *client = NULL; + uint32_t flags = 0; + upcall_local_t *local = NULL; - EXIT_IF_UPCALL_OFF (this, out); + EXIT_IF_UPCALL_OFF(this, out); - client = frame->root->client; - local = frame->local; + client = frame->root->client; + local = frame->local; - if ((op_ret < 0) || !local) { - goto out; - } - flags = UP_UPDATE_CLIENT; - upcall_cache_invalidate (frame, this, client, local->inode, flags, - NULL, NULL, NULL); + if ((op_ret < 0) || !local) { + goto out; + } + flags = UP_UPDATE_CLIENT; + upcall_cache_invalidate(frame, this, client, local->inode, flags, NULL, + NULL, NULL, NULL); out: - UPCALL_STACK_UNWIND (lk, frame, op_ret, op_errno, lock, xdata); + UPCALL_STACK_UNWIND(lk, frame, op_ret, op_errno, lock, xdata); - return 0; + return 0; } -int32_t -up_lk (call_frame_t *frame, xlator_t *this, - fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata) +static int32_t +up_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, + struct gf_flock *flock, dict_t *xdata) { - int32_t op_errno = -1; - upcall_local_t *local = NULL; + int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; - EXIT_IF_UPCALL_OFF (this, out); + EXIT_IF_UPCALL_OFF(this, out); - local = upcall_local_init (frame, this, fd->inode); - if (!local) { - op_errno = ENOMEM; - goto err; - } + local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); + if (!local) { + goto err; + } out: - STACK_WIND (frame, up_lk_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lk, - fd, cmd, flock, xdata); - return 0; + STACK_WIND(frame, up_lk_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->lk, + fd, cmd, flock, xdata); + return 0; err: - UPCALL_STACK_UNWIND (lk, frame, -1, op_errno, NULL, NULL); + UPCALL_STACK_UNWIND(lk, frame, -1, op_errno, NULL, NULL); - return 0; + return 0; } -int32_t -up_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) +static int32_t +up_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, struct iatt *prebuf, struct iatt *postbuf, + dict_t *xdata) { - client_t *client = NULL; - uint32_t flags = 0; - upcall_local_t *local = NULL; + client_t *client = NULL; + uint32_t flags = 0; + upcall_local_t *local = NULL; - EXIT_IF_UPCALL_OFF (this, out); + EXIT_IF_UPCALL_OFF(this, out); - client = frame->root->client; - local = frame->local; + client = frame->root->client; + local = frame->local; - if ((op_ret < 0) || !local) { - goto out; - } - flags = UP_WRITE_FLAGS; - upcall_cache_invalidate (frame, this, client, local->inode, flags, - postbuf, NULL, NULL); + if ((op_ret < 0) || !local) { + goto out; + } + flags = UP_WRITE_FLAGS; + upcall_cache_invalidate(frame, this, client, local->inode, flags, postbuf, + NULL, NULL, NULL); out: - UPCALL_STACK_UNWIND (truncate, frame, op_ret, op_errno, - prebuf, postbuf, xdata); + UPCALL_STACK_UNWIND(truncate, frame, op_ret, op_errno, prebuf, postbuf, + xdata); - return 0; + return 0; } -int32_t -up_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, - dict_t *xdata) +static int32_t +up_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) { - int32_t op_errno = -1; - upcall_local_t *local = NULL; + int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; - EXIT_IF_UPCALL_OFF (this, out); + EXIT_IF_UPCALL_OFF(this, out); - local = upcall_local_init (frame, this, loc->inode); - if (!local) { - op_errno = ENOMEM; - goto err; - } + local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL); + if (!local) { + goto err; + } out: - STACK_WIND (frame, up_truncate_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->truncate, - loc, offset, xdata); + STACK_WIND(frame, up_truncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); - return 0; + return 0; err: - UPCALL_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL, NULL); + UPCALL_STACK_UNWIND(truncate, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + return 0; } -int32_t -up_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct iatt *statpre, - struct iatt *statpost, dict_t *xdata) +static int32_t +up_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, struct iatt *statpre, struct iatt *statpost, + dict_t *xdata) { - client_t *client = NULL; - uint32_t flags = 0; - upcall_local_t *local = NULL; - - EXIT_IF_UPCALL_OFF (this, out); - - client = frame->root->client; - local = frame->local; - - if ((op_ret < 0) || !local) { - goto out; - } - /* XXX: setattr -> UP_SIZE or UP_OWN or UP_MODE or UP_TIMES - * or INODE_UPDATE (or UP_PERM esp incase of ACLs -> INODE_INVALIDATE) - * Need to check what attr is changed and accordingly pass UP_FLAGS. - * Bug1200271. - */ - flags = UP_ATTR_FLAGS; - upcall_cache_invalidate (frame, this, client, local->inode, flags, - statpost, NULL, NULL); + client_t *client = NULL; + uint32_t flags = 0; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + client = frame->root->client; + local = frame->local; + + if ((op_ret < 0) || !local) { + goto out; + } + /* XXX: setattr -> UP_SIZE or UP_OWN or UP_MODE or UP_TIMES + * or INODE_UPDATE (or UP_PERM esp in case of ACLs -> INODE_INVALIDATE) + * Need to check what attr is changed and accordingly pass UP_FLAGS. + * Bug1200271. + */ + flags = UP_ATTR_FLAGS; + /* If mode bits have changed invalidate the xattrs, as posix-acl and + * others store permission related information in xattrs. With changing + * of permissions/mode, we need to make clients to forget all the + * xattrs related to permissions. + * TODO: Invalidate the xattr system.posix_acl_access alone. + */ + if (is_same_mode(statpre->ia_prot, statpost->ia_prot) != 0) + flags |= UP_XATTR; + + upcall_cache_invalidate(frame, this, client, local->inode, flags, statpost, + NULL, NULL, NULL); out: - UPCALL_STACK_UNWIND (setattr, frame, op_ret, op_errno, - statpre, statpost, xdata); + UPCALL_STACK_UNWIND(setattr, frame, op_ret, op_errno, statpre, statpost, + xdata); - return 0; + return 0; } -int32_t -up_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - struct iatt *stbuf, int32_t valid, dict_t *xdata) +static int32_t +up_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf, + int32_t valid, dict_t *xdata) { - int32_t op_errno = -1; - upcall_local_t *local = NULL; + int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; - EXIT_IF_UPCALL_OFF (this, out); + EXIT_IF_UPCALL_OFF(this, out); - local = upcall_local_init (frame, this, loc->inode); - if (!local) { - op_errno = ENOMEM; - goto err; - } + local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL); + if (!local) { + goto err; + } out: - STACK_WIND (frame, up_setattr_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setattr, - loc, stbuf, valid, xdata); + STACK_WIND(frame, up_setattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata); - return 0; + return 0; err: - UPCALL_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL, NULL); + UPCALL_STACK_UNWIND(setattr, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + return 0; } -int32_t -up_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *stbuf, - struct iatt *preoldparent, struct iatt *postoldparent, - struct iatt *prenewparent, struct iatt *postnewparent, - dict_t *xdata) +static int32_t +up_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iatt *stbuf, struct iatt *preoldparent, + struct iatt *postoldparent, struct iatt *prenewparent, + struct iatt *postnewparent, dict_t *xdata) { - client_t *client = NULL; - uint32_t flags = 0; - upcall_local_t *local = NULL; + client_t *client = NULL; + uint32_t flags = 0; + upcall_local_t *local = NULL; - EXIT_IF_UPCALL_OFF (this, out); + EXIT_IF_UPCALL_OFF(this, out); - client = frame->root->client; - local = frame->local; + client = frame->root->client; + local = frame->local; - if ((op_ret < 0) || !local) { - goto out; - } - flags = (UP_RENAME_FLAGS | UP_PARENT_DENTRY_FLAGS); - upcall_cache_invalidate (frame, this, client, local->inode, flags, - stbuf, postnewparent, postoldparent); + if ((op_ret < 0) || !local) { + goto out; + } + flags = (UP_RENAME_FLAGS | UP_PARENT_DENTRY_FLAGS); + upcall_cache_invalidate(frame, this, client, local->inode, flags, stbuf, + postnewparent, postoldparent, NULL); + + flags = UP_UPDATE_CLIENT; + upcall_cache_invalidate(frame, this, client, local->rename_oldloc.parent, + flags, postoldparent, NULL, NULL, NULL); + + if (local->rename_oldloc.parent == local->loc.parent) + goto out; + + flags = UP_UPDATE_CLIENT; + upcall_cache_invalidate(frame, this, client, local->loc.parent, flags, + postnewparent, NULL, NULL, NULL); out: - UPCALL_STACK_UNWIND (rename, frame, op_ret, op_errno, - stbuf, preoldparent, postoldparent, - prenewparent, postnewparent, xdata); + UPCALL_STACK_UNWIND(rename, frame, op_ret, op_errno, stbuf, preoldparent, + postoldparent, prenewparent, postnewparent, xdata); - return 0; + return 0; } -int32_t -up_rename (call_frame_t *frame, xlator_t *this, - loc_t *oldloc, loc_t *newloc, dict_t *xdata) +static int32_t +up_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) { - int32_t op_errno = -1; - upcall_local_t *local = NULL; + int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; - EXIT_IF_UPCALL_OFF (this, out); + EXIT_IF_UPCALL_OFF(this, out); - local = upcall_local_init (frame, this, oldloc->inode); - if (!local) { - op_errno = ENOMEM; - goto err; - } + local = upcall_local_init(frame, this, newloc, NULL, oldloc->inode, NULL); + if (!local) { + goto err; + } - /* copy oldloc */ - loc_copy (&local->rename_oldloc, oldloc); + /* copy oldloc */ + loc_copy(&local->rename_oldloc, oldloc); out: - STACK_WIND (frame, up_rename_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->rename, - oldloc, newloc, xdata); + STACK_WIND(frame, up_rename_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); - return 0; + return 0; err: - UPCALL_STACK_UNWIND (rename, frame, -1, op_errno, NULL, - NULL, NULL, NULL, NULL, NULL); + UPCALL_STACK_UNWIND(rename, frame, -1, op_errno, NULL, NULL, NULL, NULL, + NULL, NULL); - return 0; + return 0; } -int32_t -up_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +static int32_t +up_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) { - client_t *client = NULL; - uint32_t flags = 0; - upcall_local_t *local = NULL; + client_t *client = NULL; + uint32_t flags = 0; + upcall_local_t *local = NULL; - EXIT_IF_UPCALL_OFF (this, out); + EXIT_IF_UPCALL_OFF(this, out); - client = frame->root->client; - local = frame->local; + client = frame->root->client; + local = frame->local; - if ((op_ret < 0) || !local) { - goto out; - } - flags = (UP_NLINK_FLAGS | UP_PARENT_DENTRY_FLAGS); - upcall_cache_invalidate (frame, this, client, local->inode, flags, - NULL, postparent, NULL); + if ((op_ret < 0) || !local) { + goto out; + } + flags = (UP_NLINK_FLAGS | UP_PARENT_DENTRY_FLAGS); + upcall_cache_invalidate(frame, this, client, local->inode, flags, NULL, + postparent, NULL, NULL); + + flags = UP_UPDATE_CLIENT; + upcall_cache_invalidate(frame, this, client, local->loc.parent, flags, + postparent, NULL, NULL, NULL); out: - UPCALL_STACK_UNWIND (unlink, frame, op_ret, op_errno, - preparent, postparent, xdata); + UPCALL_STACK_UNWIND(unlink, frame, op_ret, op_errno, preparent, postparent, + xdata); - return 0; + return 0; } -int32_t -up_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, - dict_t *xdata) +static int32_t +up_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, + dict_t *xdata) { - int32_t op_errno = -1; - upcall_local_t *local = NULL; + int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; - EXIT_IF_UPCALL_OFF (this, out); + EXIT_IF_UPCALL_OFF(this, out); - local = upcall_local_init (frame, this, loc->inode); - if (!local) { - op_errno = ENOMEM; - goto err; - } + local = upcall_local_init(frame, this, loc, NULL, loc->inode, NULL); + if (!local) { + goto err; + } out: - STACK_WIND (frame, up_unlink_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->unlink, - loc, xflag, xdata); + STACK_WIND(frame, up_unlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); - return 0; + return 0; err: - UPCALL_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL); + UPCALL_STACK_UNWIND(unlink, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + return 0; } -int32_t -up_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf, - struct iatt *preparent, struct iatt *postparent, dict_t *xdata) +static int32_t +up_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, inode_t *inode, struct iatt *stbuf, + struct iatt *preparent, struct iatt *postparent, dict_t *xdata) { - client_t *client = NULL; - uint32_t flags = 0; - upcall_local_t *local = NULL; + client_t *client = NULL; + uint32_t flags = 0; + upcall_local_t *local = NULL; - EXIT_IF_UPCALL_OFF (this, out); + EXIT_IF_UPCALL_OFF(this, out); - client = frame->root->client; - local = frame->local; + client = frame->root->client; + local = frame->local; - if ((op_ret < 0) || !local) { - goto out; - } - flags = (UP_NLINK_FLAGS | UP_PARENT_DENTRY_FLAGS); - upcall_cache_invalidate (frame, this, client, local->inode, flags, - stbuf, postparent, NULL); + if ((op_ret < 0) || !local) { + goto out; + } + flags = (UP_NLINK_FLAGS | UP_PARENT_DENTRY_FLAGS); + upcall_cache_invalidate(frame, this, client, local->inode, flags, stbuf, + postparent, NULL, NULL); + + flags = UP_UPDATE_CLIENT; + upcall_cache_invalidate(frame, this, client, local->loc.parent, flags, + postparent, NULL, NULL, NULL); out: - UPCALL_STACK_UNWIND (link, frame, op_ret, op_errno, - inode, stbuf, preparent, postparent, xdata); + UPCALL_STACK_UNWIND(link, frame, op_ret, op_errno, inode, stbuf, preparent, + postparent, xdata); - return 0; + return 0; } -int32_t -up_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, - loc_t *newloc, dict_t *xdata) +static int32_t +up_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) { - int32_t op_errno = -1; - upcall_local_t *local = NULL; + int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; - EXIT_IF_UPCALL_OFF (this, out); + EXIT_IF_UPCALL_OFF(this, out); - local = upcall_local_init (frame, this, oldloc->inode); - if (!local) { - op_errno = ENOMEM; - goto err; - } + local = upcall_local_init(frame, this, newloc, NULL, oldloc->inode, NULL); + if (!local) { + goto err; + } out: - STACK_WIND (frame, up_link_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->link, - oldloc, newloc, xdata); + STACK_WIND(frame, up_link_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata); - return 0; + return 0; err: - UPCALL_STACK_UNWIND (link, frame, -1, op_errno, NULL, - NULL, NULL, NULL, NULL); + UPCALL_STACK_UNWIND(link, frame, -1, op_errno, NULL, NULL, NULL, NULL, + NULL); - return 0; + return 0; } -int32_t -up_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +static int32_t +up_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) { - client_t *client = NULL; - uint32_t flags = 0; - upcall_local_t *local = NULL; + client_t *client = NULL; + uint32_t flags = 0; + upcall_local_t *local = NULL; - EXIT_IF_UPCALL_OFF (this, out); + EXIT_IF_UPCALL_OFF(this, out); - client = frame->root->client; - local = frame->local; + client = frame->root->client; + local = frame->local; - if ((op_ret < 0) || !local) { - goto out; - } + if ((op_ret < 0) || !local) { + goto out; + } - flags = (UP_NLINK_FLAGS | UP_PARENT_DENTRY_FLAGS); - upcall_cache_invalidate (frame, this, client, local->inode, flags, - NULL, postparent, NULL); + flags = (UP_NLINK_FLAGS | UP_PARENT_DENTRY_FLAGS); + upcall_cache_invalidate(frame, this, client, local->inode, flags, NULL, + postparent, NULL, NULL); + + flags = UP_UPDATE_CLIENT; + upcall_cache_invalidate(frame, this, client, local->loc.parent, flags, + postparent, NULL, NULL, NULL); out: - UPCALL_STACK_UNWIND (rmdir, frame, op_ret, op_errno, - preparent, postparent, xdata); + UPCALL_STACK_UNWIND(rmdir, frame, op_ret, op_errno, preparent, postparent, + xdata); - return 0; + return 0; } -int32_t -up_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, - dict_t *xdata) +static int32_t +up_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + dict_t *xdata) { - int32_t op_errno = -1; - upcall_local_t *local = NULL; + int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; - EXIT_IF_UPCALL_OFF (this, out); + EXIT_IF_UPCALL_OFF(this, out); - local = upcall_local_init (frame, this, loc->inode); - if (!local) { - op_errno = ENOMEM; - goto err; - } + local = upcall_local_init(frame, this, loc, NULL, loc->inode, NULL); + if (!local) { + goto err; + } out: - STACK_WIND (frame, up_rmdir_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->rmdir, - loc, flags, xdata); + STACK_WIND(frame, up_rmdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rmdir, loc, flags, xdata); - return 0; + return 0; err: - UPCALL_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL, NULL); + UPCALL_STACK_UNWIND(rmdir, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + return 0; } -int32_t -up_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, inode_t *inode, - struct iatt *stbuf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +static int32_t +up_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, inode_t *inode, struct iatt *stbuf, + struct iatt *preparent, struct iatt *postparent, dict_t *xdata) { - client_t *client = NULL; - uint32_t flags = 0; - upcall_local_t *local = NULL; + client_t *client = NULL; + uint32_t flags = 0; + upcall_local_t *local = NULL; - EXIT_IF_UPCALL_OFF (this, out); + EXIT_IF_UPCALL_OFF(this, out); - client = frame->root->client; - local = frame->local; + client = frame->root->client; + local = frame->local; - if ((op_ret < 0) || !local) { - goto out; - } + if ((op_ret < 0) || !local) { + goto out; + } + + /* invalidate parent's entry too */ + flags = UP_TIMES; + upcall_cache_invalidate(frame, this, client, local->inode, flags, + postparent, NULL, NULL, NULL); - /* invalidate parent's entry too */ - flags = UP_TIMES; - upcall_cache_invalidate (frame, this, client, local->inode, flags, - stbuf, postparent, NULL); + flags = UP_UPDATE_CLIENT; + upcall_cache_invalidate(frame, this, client, local->loc.inode, flags, stbuf, + NULL, NULL, NULL); out: - UPCALL_STACK_UNWIND (mkdir, frame, op_ret, op_errno, - inode, stbuf, preparent, postparent, xdata); + UPCALL_STACK_UNWIND(mkdir, frame, op_ret, op_errno, inode, stbuf, preparent, + postparent, xdata); - return 0; + return 0; } -int32_t -up_mkdir (call_frame_t *frame, xlator_t *this, - loc_t *loc, mode_t mode, mode_t umask, dict_t *params) +static int32_t +up_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + mode_t umask, dict_t *params) { - int32_t op_errno = -1; - upcall_local_t *local = NULL; + int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; - EXIT_IF_UPCALL_OFF (this, out); + EXIT_IF_UPCALL_OFF(this, out); - local = upcall_local_init (frame, this, loc->parent); - if (!local) { - op_errno = ENOMEM; - goto err; - } + local = upcall_local_init(frame, this, loc, NULL, loc->parent, NULL); + if (!local) { + goto err; + } out: - STACK_WIND (frame, up_mkdir_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir, - loc, mode, umask, params); + STACK_WIND(frame, up_mkdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, params); - return 0; + return 0; err: - UPCALL_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, - NULL, NULL, NULL, NULL); + UPCALL_STACK_UNWIND(mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL, + NULL); - return 0; + return 0; } -int32_t -up_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, fd_t *fd, inode_t *inode, - struct iatt *stbuf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +static int32_t +up_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, fd_t *fd, inode_t *inode, struct iatt *stbuf, + struct iatt *preparent, struct iatt *postparent, dict_t *xdata) { - client_t *client = NULL; - uint32_t flags = 0; - upcall_local_t *local = NULL; + client_t *client = NULL; + uint32_t flags = 0; + upcall_local_t *local = NULL; - EXIT_IF_UPCALL_OFF (this, out); + EXIT_IF_UPCALL_OFF(this, out); - client = frame->root->client; - local = frame->local; + client = frame->root->client; + local = frame->local; - if ((op_ret < 0) || !local) { - goto out; - } + if ((op_ret < 0) || !local) { + goto out; + } + + /* As its a new file create, no need of sending notification + * However invalidate parent's entry and update that fact that the + * client has accessed the newly created entry */ + flags = UP_TIMES; + upcall_cache_invalidate(frame, this, client, local->inode, flags, + postparent, NULL, NULL, NULL); - /* As its a new file create, no need of sending notification */ - /* However invalidate parent's entry */ - flags = UP_TIMES; - upcall_cache_invalidate (frame, this, client, local->inode, flags, - stbuf, postparent, NULL); + flags = UP_UPDATE_CLIENT; + upcall_cache_invalidate(frame, this, client, local->loc.inode, flags, stbuf, + NULL, NULL, NULL); out: - UPCALL_STACK_UNWIND (create, frame, op_ret, op_errno, fd, - inode, stbuf, preparent, postparent, xdata); + UPCALL_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, stbuf, + preparent, postparent, xdata); - return 0; + return 0; } -int32_t -up_create (call_frame_t *frame, xlator_t *this, - loc_t *loc, int32_t flags, mode_t mode, - mode_t umask, fd_t *fd, dict_t *params) +static int32_t +up_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *params) { - int32_t op_errno = -1; - upcall_local_t *local = NULL; - - EXIT_IF_UPCALL_OFF (this, out); + int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; - local = upcall_local_init (frame, this, loc->parent); + EXIT_IF_UPCALL_OFF(this, out); - if (!local) { - op_errno = ENOMEM; - goto err; - } + local = upcall_local_init(frame, this, loc, NULL, loc->parent, NULL); + if (!local) { + goto err; + } out: - STACK_WIND (frame, up_create_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->create, - loc, flags, mode, umask, fd, params); + STACK_WIND(frame, up_create_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd, + params); - return 0; + return 0; err: - UPCALL_STACK_UNWIND (create, frame, -1, op_errno, NULL, - NULL, NULL, NULL, NULL, NULL); + UPCALL_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, + NULL, NULL); - return 0; + return 0; } -int32_t -up_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, - inode_t *inode, struct iatt *stbuf, dict_t *xattr, - struct iatt *postparent) +static int32_t +up_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, inode_t *inode, struct iatt *stbuf, dict_t *xattr, + struct iatt *postparent) { - client_t *client = NULL; - uint32_t flags = 0; - upcall_local_t *local = NULL; + client_t *client = NULL; + uint32_t flags = 0; + upcall_local_t *local = NULL; - EXIT_IF_UPCALL_OFF (this, out); + EXIT_IF_UPCALL_OFF(this, out); - client = frame->root->client; - local = frame->local; + client = frame->root->client; + local = frame->local; - if ((op_ret < 0) || !local) { - goto out; - } - flags = UP_UPDATE_CLIENT; - upcall_cache_invalidate (frame, this, client, local->inode, flags, - NULL, NULL, NULL); + if ((op_ret < 0) || !local) { + goto out; + } + flags = UP_UPDATE_CLIENT; + upcall_cache_invalidate(frame, this, client, local->inode, flags, stbuf, + NULL, NULL, NULL); out: - UPCALL_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, stbuf, - xattr, postparent); + UPCALL_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, stbuf, xattr, + postparent); - return 0; + return 0; } -int32_t -up_lookup (call_frame_t *frame, xlator_t *this, - loc_t *loc, dict_t *xattr_req) +static int32_t +up_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) { - int32_t op_errno = -1; - upcall_local_t *local = NULL; + int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; - EXIT_IF_UPCALL_OFF (this, out); + EXIT_IF_UPCALL_OFF(this, out); - local = upcall_local_init (frame, this, loc->inode); - if (!local) { - op_errno = ENOMEM; - goto err; - } + local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL); + if (!local) { + goto err; + } out: - STACK_WIND (frame, up_lookup_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, - loc, xattr_req); + STACK_WIND(frame, up_lookup_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, loc, xattr_req); - return 0; + return 0; err: - UPCALL_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, - NULL, NULL, NULL); + UPCALL_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL); - return 0; + return 0; } -int32_t -up_stat_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - struct iatt *buf, dict_t *xdata) +static int32_t +up_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iatt *buf, dict_t *xdata) { - client_t *client = NULL; - uint32_t flags = 0; - upcall_local_t *local = NULL; + client_t *client = NULL; + uint32_t flags = 0; + upcall_local_t *local = NULL; - EXIT_IF_UPCALL_OFF (this, out); + EXIT_IF_UPCALL_OFF(this, out); - client = frame->root->client; - local = frame->local; + client = frame->root->client; + local = frame->local; - if ((op_ret < 0) || !local) { - goto out; - } - flags = UP_UPDATE_CLIENT; - upcall_cache_invalidate (frame, this, client, local->inode, flags, - NULL, NULL, NULL); + if ((op_ret < 0) || !local) { + goto out; + } + flags = UP_UPDATE_CLIENT; + upcall_cache_invalidate(frame, this, client, local->inode, flags, buf, NULL, + NULL, NULL); out: - UPCALL_STACK_UNWIND (stat, frame, op_ret, op_errno, buf, - xdata); + UPCALL_STACK_UNWIND(stat, frame, op_ret, op_errno, buf, xdata); - return 0; + return 0; } -int32_t -up_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +static int32_t +up_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { - int32_t op_errno = -1; - upcall_local_t *local = NULL; + int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; - EXIT_IF_UPCALL_OFF (this, out); + EXIT_IF_UPCALL_OFF(this, out); - local = upcall_local_init (frame, this, loc->inode); - if (!local) { - op_errno = ENOMEM; - goto err; - } + local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL); + if (!local) { + goto err; + } out: - STACK_WIND (frame, up_stat_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->stat, - loc, xdata); + STACK_WIND(frame, up_stat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->stat, loc, xdata); - return 0; + return 0; err: - UPCALL_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL); + UPCALL_STACK_UNWIND(stat, frame, -1, op_errno, NULL, NULL); - return 0; + return 0; } -int32_t -up_fstat (call_frame_t *frame, xlator_t *this, - fd_t *fd, dict_t *xdata) +static int32_t +up_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { - int32_t op_errno = -1; - upcall_local_t *local = NULL; + int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; - EXIT_IF_UPCALL_OFF (this, out); + EXIT_IF_UPCALL_OFF(this, out); - local = upcall_local_init (frame, this, fd->inode); - if (!local) { - op_errno = ENOMEM; - goto err; - } + local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); + if (!local) { + goto err; + } out: - STACK_WIND (frame, up_stat_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->fstat, - fd, xdata); + STACK_WIND(frame, up_stat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, fd, xdata); - return 0; + return 0; err: - UPCALL_STACK_UNWIND (fstat, frame, -1, op_errno, NULL, NULL); + UPCALL_STACK_UNWIND(fstat, frame, -1, op_errno, NULL, NULL); - return 0; + return 0; } -int32_t -up_ftruncate (call_frame_t *frame, xlator_t *this, - fd_t *fd, off_t offset, dict_t *xdata) +static int32_t +up_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) { - int32_t op_errno = -1; - upcall_local_t *local = NULL; + int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; - EXIT_IF_UPCALL_OFF (this, out); + EXIT_IF_UPCALL_OFF(this, out); - local = upcall_local_init (frame, this, fd->inode); - if (!local) { - op_errno = ENOMEM; - goto err; - } + local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); + if (!local) { + goto err; + } out: - STACK_WIND (frame, up_truncate_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->ftruncate, - fd, offset, xdata); + STACK_WIND(frame, up_truncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); - return 0; + return 0; err: - UPCALL_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, - NULL, NULL); + UPCALL_STACK_UNWIND(ftruncate, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + return 0; } -int32_t -up_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *xdata) +static int32_t +up_access_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, dict_t *xdata) { - client_t *client = NULL; - uint32_t flags = 0; - upcall_local_t *local = NULL; + client_t *client = NULL; + uint32_t flags = 0; + upcall_local_t *local = NULL; - EXIT_IF_UPCALL_OFF (this, out); + EXIT_IF_UPCALL_OFF(this, out); - client = frame->root->client; - local = frame->local; + client = frame->root->client; + local = frame->local; - if ((op_ret < 0) || !local) { - goto out; - } - flags = UP_UPDATE_CLIENT; - upcall_cache_invalidate (frame, this, client, local->inode, flags, - NULL, NULL, NULL); + if ((op_ret < 0) || !local) { + goto out; + } + flags = UP_UPDATE_CLIENT; + upcall_cache_invalidate(frame, this, client, local->inode, flags, NULL, + NULL, NULL, NULL); out: - UPCALL_STACK_UNWIND (access, frame, op_ret, op_errno, xdata); + UPCALL_STACK_UNWIND(access, frame, op_ret, op_errno, xdata); - return 0; + return 0; } -int32_t -up_access (call_frame_t *frame, xlator_t *this, - loc_t *loc, int32_t mask, dict_t *xdata) +static int32_t +up_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask, + dict_t *xdata) { - int32_t op_errno = -1; - upcall_local_t *local = NULL; + int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; - EXIT_IF_UPCALL_OFF (this, out); + EXIT_IF_UPCALL_OFF(this, out); - local = upcall_local_init (frame, this, loc->inode); - if (!local) { - op_errno = ENOMEM; - goto err; - } + local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL); + if (!local) { + goto err; + } out: - STACK_WIND (frame, up_access_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->access, - loc, mask, xdata); + STACK_WIND(frame, up_access_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->access, loc, mask, xdata); - return 0; + return 0; err: - UPCALL_STACK_UNWIND (access, frame, -1, op_errno, NULL); + UPCALL_STACK_UNWIND(access, frame, -1, op_errno, NULL); - return 0; + return 0; } -int32_t -up_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, const char *path, - struct iatt *stbuf, dict_t *xdata) +static int32_t +up_readlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, const char *path, struct iatt *stbuf, + dict_t *xdata) { - client_t *client = NULL; - uint32_t flags = 0; - upcall_local_t *local = NULL; + client_t *client = NULL; + uint32_t flags = 0; + upcall_local_t *local = NULL; - EXIT_IF_UPCALL_OFF (this, out); + EXIT_IF_UPCALL_OFF(this, out); - client = frame->root->client; - local = frame->local; + client = frame->root->client; + local = frame->local; - if ((op_ret < 0) || !local) { - goto out; - } - flags = UP_UPDATE_CLIENT; - upcall_cache_invalidate (frame, this, client, local->inode, flags, - NULL, NULL, NULL); + if ((op_ret < 0) || !local) { + goto out; + } + flags = UP_UPDATE_CLIENT; + upcall_cache_invalidate(frame, this, client, local->inode, flags, stbuf, + NULL, NULL, NULL); out: - UPCALL_STACK_UNWIND (readlink, frame, op_ret, op_errno, path, stbuf, - xdata); + UPCALL_STACK_UNWIND(readlink, frame, op_ret, op_errno, path, stbuf, xdata); - return 0; + return 0; } -int32_t -up_readlink (call_frame_t *frame, xlator_t *this, - loc_t *loc, size_t size, dict_t *xdata) +static int32_t +up_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size, + dict_t *xdata) { - int32_t op_errno = -1; - upcall_local_t *local = NULL; + int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; - EXIT_IF_UPCALL_OFF (this, out); + EXIT_IF_UPCALL_OFF(this, out); - local = upcall_local_init (frame, this, loc->inode); - if (!local) { - op_errno = ENOMEM; - goto err; - } + local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL); + if (!local) { + goto err; + } out: - STACK_WIND (frame, up_readlink_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->readlink, - loc, size, xdata); + STACK_WIND(frame, up_readlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readlink, loc, size, xdata); - return 0; + return 0; err: - UPCALL_STACK_UNWIND (readlink, frame, -1, op_errno, NULL, - NULL, NULL); + UPCALL_STACK_UNWIND(readlink, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + return 0; } -int32_t -up_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +static int32_t +up_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, inode_t *inode, struct iatt *buf, + struct iatt *preparent, struct iatt *postparent, dict_t *xdata) { - client_t *client = NULL; - uint32_t flags = 0; - upcall_local_t *local = NULL; + client_t *client = NULL; + uint32_t flags = 0; + upcall_local_t *local = NULL; - EXIT_IF_UPCALL_OFF (this, out); + EXIT_IF_UPCALL_OFF(this, out); - client = frame->root->client; - local = frame->local; + client = frame->root->client; + local = frame->local; - if ((op_ret < 0) || !local) { - goto out; - } + if ((op_ret < 0) || !local) { + goto out; + } + + /* invalidate parent's entry too */ + flags = UP_TIMES; + upcall_cache_invalidate(frame, this, client, local->inode, flags, + postparent, NULL, NULL, NULL); - /* invalidate parent's entry too */ - flags = UP_TIMES; - upcall_cache_invalidate (frame, this, client, local->inode, flags, - buf, postparent, NULL); + flags = UP_UPDATE_CLIENT; + upcall_cache_invalidate(frame, this, client, local->loc.inode, flags, buf, + NULL, NULL, NULL); out: - UPCALL_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode, buf, - preparent, postparent, xdata); + UPCALL_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, buf, preparent, + postparent, xdata); - return 0; + return 0; } -int32_t -up_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, - mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata) +static int32_t +up_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + dev_t rdev, mode_t umask, dict_t *xdata) { - int32_t op_errno = -1; - upcall_local_t *local = NULL; + int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; - EXIT_IF_UPCALL_OFF (this, out); + EXIT_IF_UPCALL_OFF(this, out); - local = upcall_local_init (frame, this, loc->parent); - if (!local) { - op_errno = ENOMEM; - goto err; - } + local = upcall_local_init(frame, this, loc, NULL, loc->parent, NULL); + if (!local) { + goto err; + } out: - STACK_WIND (frame, up_mknod_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod, - loc, mode, rdev, umask, xdata); + STACK_WIND(frame, up_mknod_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata); - return 0; + return 0; err: - UPCALL_STACK_UNWIND (mknod, frame, -1, op_errno, NULL, - NULL, NULL, NULL, NULL); + UPCALL_STACK_UNWIND(mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL, + NULL); - return 0; + return 0; } -int32_t -up_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) +static int32_t +up_symlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - client_t *client = NULL; - uint32_t flags = 0; - upcall_local_t *local = NULL; + client_t *client = NULL; + uint32_t flags = 0; + upcall_local_t *local = NULL; - EXIT_IF_UPCALL_OFF (this, out); + EXIT_IF_UPCALL_OFF(this, out); - client = frame->root->client; - local = frame->local; + client = frame->root->client; + local = frame->local; - if ((op_ret < 0) || !local) { - goto out; - } + if ((op_ret < 0) || !local) { + goto out; + } + + /* invalidate parent's entry too */ + flags = UP_TIMES; + upcall_cache_invalidate(frame, this, client, local->inode, flags, + postparent, NULL, NULL, NULL); - /* invalidate parent's entry too */ - flags = UP_TIMES; - upcall_cache_invalidate (frame, this, client, local->inode, flags, - buf, postparent, NULL); + flags = UP_UPDATE_CLIENT; + upcall_cache_invalidate(frame, this, client, local->loc.inode, flags, buf, + NULL, NULL, NULL); out: - UPCALL_STACK_UNWIND (symlink, frame, op_ret, op_errno, inode, buf, - preparent, postparent, xdata); + UPCALL_STACK_UNWIND(symlink, frame, op_ret, op_errno, inode, buf, preparent, + postparent, xdata); - return 0; + return 0; } -int32_t -up_symlink (call_frame_t *frame, xlator_t *this, - const char *linkpath, loc_t *loc, mode_t umask, - dict_t *xdata) +static int32_t +up_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath, + loc_t *loc, mode_t umask, dict_t *xdata) { - int32_t op_errno = -1; - upcall_local_t *local = NULL; + int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; - EXIT_IF_UPCALL_OFF (this, out); + EXIT_IF_UPCALL_OFF(this, out); - local = upcall_local_init (frame, this, loc->parent); - if (!local) { - op_errno = ENOMEM; - goto err; - } + local = upcall_local_init(frame, this, loc, NULL, loc->parent, NULL); + if (!local) { + goto err; + } out: - STACK_WIND (frame, up_symlink_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->symlink, - linkpath, loc, umask, xdata); + STACK_WIND(frame, up_symlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->symlink, linkpath, loc, umask, xdata); - return 0; + return 0; err: - UPCALL_STACK_UNWIND (symlink, frame, -1, op_errno, NULL, - NULL, NULL, NULL, NULL); + UPCALL_STACK_UNWIND(symlink, frame, -1, op_errno, NULL, NULL, NULL, NULL, + NULL); - return 0; + return 0; } -int32_t -up_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd, - dict_t *xdata) +static int32_t +up_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) { - client_t *client = NULL; - uint32_t flags = 0; - upcall_local_t *local = NULL; + client_t *client = NULL; + uint32_t flags = 0; + upcall_local_t *local = NULL; - EXIT_IF_UPCALL_OFF (this, out); + EXIT_IF_UPCALL_OFF(this, out); - client = frame->root->client; - local = frame->local; + client = frame->root->client; + local = frame->local; - if ((op_ret < 0) || !local) { - goto out; - } - flags = UP_UPDATE_CLIENT; - upcall_cache_invalidate (frame, this, client, local->inode, flags, - NULL, NULL, NULL); + if ((op_ret < 0) || !local) { + goto out; + } + flags = UP_UPDATE_CLIENT; + upcall_cache_invalidate(frame, this, client, local->inode, flags, NULL, + NULL, NULL, NULL); out: - UPCALL_STACK_UNWIND (opendir, frame, op_ret, op_errno, fd, xdata); + UPCALL_STACK_UNWIND(opendir, frame, op_ret, op_errno, fd, xdata); - return 0; + return 0; } -int32_t -up_opendir (call_frame_t *frame, xlator_t *this, - loc_t *loc, fd_t *fd, dict_t *xdata) +static int32_t +up_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, + dict_t *xdata) { - int32_t op_errno = -1; - upcall_local_t *local = NULL; + int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; - EXIT_IF_UPCALL_OFF (this, out); + EXIT_IF_UPCALL_OFF(this, out); - local = upcall_local_init (frame, this, loc->inode); - if (!local) { - op_errno = ENOMEM; - goto err; - } + local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL); + if (!local) { + goto err; + } out: - STACK_WIND (frame, up_opendir_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->opendir, - loc, fd, xdata); + STACK_WIND(frame, up_opendir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->opendir, loc, fd, xdata); - return 0; + return 0; err: - UPCALL_STACK_UNWIND (opendir, frame, -1, op_errno, NULL, NULL); + UPCALL_STACK_UNWIND(opendir, frame, -1, op_errno, NULL, NULL); - return 0; + return 0; } -int32_t -up_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct statvfs *buf, - dict_t *xdata) +static int32_t +up_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct statvfs *buf, dict_t *xdata) { - client_t *client = NULL; - uint32_t flags = 0; - upcall_local_t *local = NULL; + client_t *client = NULL; + uint32_t flags = 0; + upcall_local_t *local = NULL; - EXIT_IF_UPCALL_OFF (this, out); + EXIT_IF_UPCALL_OFF(this, out); - client = frame->root->client; - local = frame->local; + client = frame->root->client; + local = frame->local; - if ((op_ret < 0) || !local) { - goto out; - } - flags = UP_UPDATE_CLIENT; - upcall_cache_invalidate (frame, this, client, local->inode, flags, - NULL, NULL, NULL); + if ((op_ret < 0) || !local) { + goto out; + } + flags = UP_UPDATE_CLIENT; + upcall_cache_invalidate(frame, this, client, local->inode, flags, NULL, + NULL, NULL, NULL); out: - UPCALL_STACK_UNWIND (statfs, frame, op_ret, op_errno, buf, xdata); + UPCALL_STACK_UNWIND(statfs, frame, op_ret, op_errno, buf, xdata); - return 0; + return 0; } -int32_t -up_statfs (call_frame_t *frame, xlator_t *this, - loc_t *loc, dict_t *xdata) +static int32_t +up_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { - int32_t op_errno = -1; - upcall_local_t *local = NULL; + int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; - EXIT_IF_UPCALL_OFF (this, out); + EXIT_IF_UPCALL_OFF(this, out); - local = upcall_local_init (frame, this, loc->inode); - if (!local) { - op_errno = ENOMEM; - goto err; - } + local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL); + if (!local) { + goto err; + } out: - STACK_WIND (frame, up_statfs_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->statfs, - loc, xdata); + STACK_WIND(frame, up_statfs_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->statfs, loc, xdata); - return 0; + return 0; err: - UPCALL_STACK_UNWIND (statfs, frame, -1, op_errno, NULL, NULL); + UPCALL_STACK_UNWIND(statfs, frame, -1, op_errno, NULL, NULL); - return 0; + return 0; } -int32_t -up_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, - dict_t *xdata) +static int32_t +up_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, + dict_t *xdata) { - client_t *client = NULL; - uint32_t flags = 0; - upcall_local_t *local = NULL; + client_t *client = NULL; + uint32_t flags = 0; + upcall_local_t *local = NULL; - EXIT_IF_UPCALL_OFF (this, out); + EXIT_IF_UPCALL_OFF(this, out); - client = frame->root->client; - local = frame->local; + client = frame->root->client; + local = frame->local; - if ((op_ret < 0) || !local) { - goto out; - } - flags = UP_UPDATE_CLIENT; - upcall_cache_invalidate (frame, this, client, local->inode, flags, - NULL, NULL, NULL); + if ((op_ret < 0) || !local) { + goto out; + } + flags = UP_UPDATE_CLIENT; + upcall_cache_invalidate(frame, this, client, local->inode, flags, NULL, + NULL, NULL, NULL); out: - UPCALL_STACK_UNWIND (readdir, frame, op_ret, op_errno, entries, xdata); + UPCALL_STACK_UNWIND(readdir, frame, op_ret, op_errno, entries, xdata); - return 0; + return 0; } -int32_t -up_readdir (call_frame_t *frame, xlator_t *this, - fd_t *fd, size_t size, off_t off, dict_t *xdata) +static int32_t +up_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *xdata) { - int32_t op_errno = -1; - upcall_local_t *local = NULL; + int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; - EXIT_IF_UPCALL_OFF (this, out); + EXIT_IF_UPCALL_OFF(this, out); - local = upcall_local_init (frame, this, fd->inode); - if (!local) { - op_errno = ENOMEM; - goto err; - } + local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); + if (!local) { + goto err; + } out: - STACK_WIND (frame, up_readdir_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdir, - fd, size, off, xdata); + STACK_WIND(frame, up_readdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdir, fd, size, off, xdata); - return 0; + return 0; err: - UPCALL_STACK_UNWIND (readdir, frame, -1, op_errno, NULL, NULL); + UPCALL_STACK_UNWIND(readdir, frame, -1, op_errno, NULL, NULL); - return 0; + return 0; } -int32_t -up_readdirp (call_frame_t *frame, xlator_t *this, - fd_t *fd, size_t size, off_t off, dict_t *dict) +static int32_t +up_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, + dict_t *xdata) { - int32_t op_errno = -1; - upcall_local_t *local = NULL; + client_t *client = NULL; + uint32_t flags = 0; + upcall_local_t *local = NULL; + gf_dirent_t *entry = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + client = frame->root->client; + local = frame->local; + + if ((op_ret < 0) || !local) { + goto out; + } + flags = UP_UPDATE_CLIENT; + upcall_cache_invalidate(frame, this, client, local->inode, flags, NULL, + NULL, NULL, NULL); + + list_for_each_entry(entry, &entries->list, list) + { + if (entry->inode == NULL) { + continue; + } + upcall_cache_invalidate(frame, this, client, entry->inode, flags, + &entry->d_stat, NULL, NULL, NULL); + } - EXIT_IF_UPCALL_OFF (this, out); +out: + UPCALL_STACK_UNWIND(readdirp, frame, op_ret, op_errno, entries, xdata); - local = upcall_local_init (frame, this, fd->inode); - if (!local) { - op_errno = ENOMEM; - goto err; - } + return 0; +} + +static int32_t +up_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *dict) +{ + int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); + if (!local) { + goto err; + } out: - STACK_WIND (frame, up_readdir_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp, - fd, size, off, dict); + STACK_WIND(frame, up_readdirp_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdirp, fd, size, off, dict); - return 0; + return 0; err: - UPCALL_STACK_UNWIND (readdirp, frame, -1, op_errno, NULL, NULL); + UPCALL_STACK_UNWIND(readdirp, frame, -1, op_errno, NULL, NULL); - return 0; + return 0; } -int32_t -up_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iatt *stbuf, int32_t valid, dict_t *xdata) +static int32_t +up_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf, + int32_t valid, dict_t *xdata) { - int32_t op_errno = -1; - upcall_local_t *local = NULL; + int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; - EXIT_IF_UPCALL_OFF (this, out); + EXIT_IF_UPCALL_OFF(this, out); - local = upcall_local_init (frame, this, fd->inode); - if (!local) { - op_errno = ENOMEM; - goto err; - } + local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); + if (!local) { + goto err; + } out: - STACK_WIND (frame, up_setattr_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsetattr, - fd, stbuf, valid, xdata); + STACK_WIND(frame, up_setattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata); - return 0; + return 0; err: - UPCALL_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, - NULL, NULL); + UPCALL_STACK_UNWIND(fsetattr, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + return 0; } -int32_t +static int32_t up_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *pre, struct iatt *post, dict_t *xdata) { - client_t *client = NULL; - uint32_t flags = 0; - upcall_local_t *local = NULL; + client_t *client = NULL; + uint32_t flags = 0; + upcall_local_t *local = NULL; - EXIT_IF_UPCALL_OFF (this, out); + EXIT_IF_UPCALL_OFF(this, out); - client = frame->root->client; - local = frame->local; + client = frame->root->client; + local = frame->local; - if ((op_ret < 0) || !local) { - goto out; - } - flags = UP_WRITE_FLAGS; - upcall_cache_invalidate (frame, this, client, local->inode, flags, - post, NULL, NULL); + if ((op_ret < 0) || !local) { + goto out; + } + flags = UP_WRITE_FLAGS; + upcall_cache_invalidate(frame, this, client, local->inode, flags, post, + NULL, NULL, NULL); out: - UPCALL_STACK_UNWIND (fallocate, frame, op_ret, op_errno, pre, - post, xdata); + UPCALL_STACK_UNWIND(fallocate, frame, op_ret, op_errno, pre, post, xdata); - return 0; + return 0; } -int32_t -up_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, - int32_t mode, off_t offset, size_t len, dict_t *xdata) +static int32_t +up_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, + off_t offset, size_t len, dict_t *xdata) { - int32_t op_errno = -1; - upcall_local_t *local = NULL; + int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; - EXIT_IF_UPCALL_OFF (this, out); + EXIT_IF_UPCALL_OFF(this, out); - local = upcall_local_init (frame, this, fd->inode); - if (!local) { - op_errno = ENOMEM; - goto err; - } + local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); + if (!local) { + goto err; + } out: - STACK_WIND (frame, up_fallocate_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->fallocate, - fd, mode, offset, len, xdata); + STACK_WIND(frame, up_fallocate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len, + xdata); - return 0; + return 0; err: - UPCALL_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, - NULL, NULL); + UPCALL_STACK_UNWIND(fallocate, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + return 0; } -int32_t +static int32_t up_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *pre, struct iatt *post, dict_t *xdata) { - client_t *client = NULL; - uint32_t flags = 0; - upcall_local_t *local = NULL; + client_t *client = NULL; + uint32_t flags = 0; + upcall_local_t *local = NULL; - EXIT_IF_UPCALL_OFF (this, out); + EXIT_IF_UPCALL_OFF(this, out); - client = frame->root->client; - local = frame->local; + client = frame->root->client; + local = frame->local; - if ((op_ret < 0) || !local) { - goto out; - } - flags = UP_WRITE_FLAGS; - upcall_cache_invalidate (frame, this, client, local->inode, flags, - post, NULL, NULL); + if ((op_ret < 0) || !local) { + goto out; + } + flags = UP_WRITE_FLAGS; + upcall_cache_invalidate(frame, this, client, local->inode, flags, post, + NULL, NULL, NULL); out: - UPCALL_STACK_UNWIND (discard, frame, op_ret, op_errno, pre, - post, xdata); + UPCALL_STACK_UNWIND(discard, frame, op_ret, op_errno, pre, post, xdata); - return 0; + return 0; } -int32_t -up_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, - off_t offset, size_t len, dict_t *xdata) +static int32_t +up_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) { - int32_t op_errno = -1; - upcall_local_t *local = NULL; + int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; - EXIT_IF_UPCALL_OFF (this, out); + EXIT_IF_UPCALL_OFF(this, out); - local = upcall_local_init (frame, this, fd->inode); - if (!local) { - op_errno = ENOMEM; - goto err; - } + local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); + if (!local) { + goto err; + } out: - STACK_WIND (frame, up_discard_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->discard, - fd, offset, len, xdata); + STACK_WIND(frame, up_discard_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata); - return 0; + return 0; err: - UPCALL_STACK_UNWIND (discard, frame, -1, op_errno, NULL, - NULL, NULL); + UPCALL_STACK_UNWIND(discard, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + return 0; } -int32_t +static int32_t up_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *pre, struct iatt *post, dict_t *xdata) { - client_t *client = NULL; - uint32_t flags = 0; - upcall_local_t *local = NULL; + client_t *client = NULL; + uint32_t flags = 0; + upcall_local_t *local = NULL; - EXIT_IF_UPCALL_OFF (this, out); + EXIT_IF_UPCALL_OFF(this, out); - client = frame->root->client; - local = frame->local; + client = frame->root->client; + local = frame->local; - if ((op_ret < 0) || !local) { - goto out; - } - flags = UP_WRITE_FLAGS; - upcall_cache_invalidate (frame, this, client, local->inode, flags, - post, NULL, NULL); + if ((op_ret < 0) || !local) { + goto out; + } + flags = UP_WRITE_FLAGS; + upcall_cache_invalidate(frame, this, client, local->inode, flags, post, + NULL, NULL, NULL); out: - UPCALL_STACK_UNWIND (zerofill, frame, op_ret, op_errno, pre, - post, xdata); + UPCALL_STACK_UNWIND(zerofill, frame, op_ret, op_errno, pre, post, xdata); - return 0; + return 0; } -int -up_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, - off_t offset, off_t len, dict_t *xdata) +static int +up_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + off_t len, dict_t *xdata) { - int32_t op_errno = -1; - upcall_local_t *local = NULL; + int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; - EXIT_IF_UPCALL_OFF (this, out); + EXIT_IF_UPCALL_OFF(this, out); - local = upcall_local_init (frame, this, fd->inode); - if (!local) { - op_errno = ENOMEM; - goto err; - } + local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); + if (!local) { + goto err; + } out: - STACK_WIND (frame, up_zerofill_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->zerofill, - fd, offset, len, xdata); + STACK_WIND(frame, up_zerofill_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata); - return 0; + return 0; err: - UPCALL_STACK_UNWIND (zerofill, frame, -1, op_errno, NULL, - NULL, NULL); + UPCALL_STACK_UNWIND(zerofill, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + return 0; } -int32_t -mem_acct_init (xlator_t *this) +static int32_t +up_seek_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, off_t offset, dict_t *xdata) { - int ret = -1; + client_t *client = NULL; + uint32_t flags = 0; + upcall_local_t *local = NULL; - if (!this) - return ret; + EXIT_IF_UPCALL_OFF(this, out); - ret = xlator_mem_acct_init (this, gf_upcall_mt_end + 1); + client = frame->root->client; + local = frame->local; - if (ret != 0) { - gf_msg ("upcall", GF_LOG_WARNING, 0, - UPCALL_MSG_NO_MEMORY, - "Memory allocation failed"); - return ret; - } + if ((op_ret < 0) || !local) { + goto out; + } + flags = UP_UPDATE_CLIENT; + upcall_cache_invalidate(frame, this, client, local->inode, flags, NULL, + NULL, NULL, NULL); + +out: + UPCALL_STACK_UNWIND(seek, frame, op_ret, op_errno, offset, xdata); + + return 0; +} + +static int32_t +up_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + gf_seek_what_t what, dict_t *xdata) +{ + int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); + if (!local) { + goto err; + } + +out: + STACK_WIND(frame, up_seek_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->seek, fd, offset, what, xdata); + + return 0; + +err: + UPCALL_STACK_UNWIND(seek, frame, -1, op_errno, 0, NULL); + + return 0; +} +static int32_t +up_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + client_t *client = NULL; + uint32_t flags = 0; + upcall_local_t *local = NULL; + int ret = 0; + struct iatt stbuf = { + 0, + }; + upcall_private_t *priv = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, out); + + client = frame->root->client; + local = frame->local; + + if ((op_ret < 0) || !local) { + goto out; + } + + flags = UP_XATTR; + + ret = up_filter_xattr(local->xattr, priv->xattrs); + if (ret < 0) { + op_ret = ret; + goto out; + } + if (!up_invalidate_needed(local->xattr)) + goto out; + + ret = dict_get_iatt(xdata, GF_POSTSTAT, &stbuf); + if (ret == 0) + flags |= UP_TIMES; + + upcall_cache_invalidate(frame, this, client, local->inode, flags, &stbuf, + NULL, NULL, local->xattr); + +out: + UPCALL_STACK_UNWIND(setxattr, frame, op_ret, op_errno, xdata); + + return 0; +} + +static int32_t +up_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + int32_t flags, dict_t *xdata) +{ + int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, loc, NULL, loc->inode, dict); + if (!local) { + goto err; + } + +out: + STACK_WIND(frame, up_setxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata); + + return 0; + +err: + UPCALL_STACK_UNWIND(setxattr, frame, -1, op_errno, NULL); + + return 0; +} + +static int32_t +up_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + client_t *client = NULL; + uint32_t flags = 0; + upcall_local_t *local = NULL; + int ret = 0; + struct iatt stbuf = { + 0, + }; + upcall_private_t *priv = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, out); + + client = frame->root->client; + local = frame->local; + + if ((op_ret < 0) || !local) { + goto out; + } + + flags = UP_XATTR; + + ret = up_filter_xattr(local->xattr, priv->xattrs); + if (ret < 0) { + op_ret = ret; + goto out; + } + if (!up_invalidate_needed(local->xattr)) + goto out; + + ret = dict_get_iatt(xdata, GF_POSTSTAT, &stbuf); + if (ret == 0) + flags |= UP_TIMES; + + upcall_cache_invalidate(frame, this, client, local->inode, flags, &stbuf, + NULL, NULL, local->xattr); + +out: + UPCALL_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, xdata); + + return 0; +} + +static int32_t +up_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, + int32_t flags, dict_t *xdata) +{ + int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, fd, fd->inode, dict); + if (!local) { + goto err; + } + +out: + STACK_WIND(frame, up_fsetxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata); + + return 0; + +err: + UPCALL_STACK_UNWIND(fsetxattr, frame, -1, op_errno, NULL); + + return 0; +} + +static int32_t +up_fremovexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + client_t *client = NULL; + uint32_t flags = 0; + upcall_local_t *local = NULL; + struct iatt stbuf = { + 0, + }; + int ret = 0; + upcall_private_t *priv = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, out); + + client = frame->root->client; + local = frame->local; + + if ((op_ret < 0) || !local) { + goto out; + } + flags = UP_XATTR_RM; + + ret = up_filter_xattr(local->xattr, priv->xattrs); + if (ret < 0) { + op_ret = ret; + goto out; + } + if (!up_invalidate_needed(local->xattr)) + goto out; + + ret = dict_get_iatt(xdata, GF_POSTSTAT, &stbuf); + if (ret == 0) + flags |= UP_TIMES; + + upcall_cache_invalidate(frame, this, client, local->inode, flags, &stbuf, + NULL, NULL, local->xattr); + +out: + UPCALL_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, xdata); + return 0; +} + +static int32_t +up_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, + dict_t *xdata) +{ + int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + dict_t *xattr = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + xattr = dict_for_key_value(name, "", 1, _gf_true); + if (!xattr) { + goto err; + } + + local = upcall_local_init(frame, this, NULL, fd, fd->inode, xattr); + if (!local) { + goto err; + } + +out: + if (xattr) + dict_unref(xattr); + + STACK_WIND(frame, up_fremovexattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata); + return 0; + +err: + if (xattr) + dict_unref(xattr); + + UPCALL_STACK_UNWIND(fremovexattr, frame, -1, op_errno, NULL); + + return 0; +} + +static int32_t +up_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + client_t *client = NULL; + uint32_t flags = 0; + upcall_local_t *local = NULL; + struct iatt stbuf = { + 0, + }; + int ret = 0; + upcall_private_t *priv = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, out); + + client = frame->root->client; + local = frame->local; + + if ((op_ret < 0) || !local) { + goto out; + } + flags = UP_XATTR_RM; + + ret = up_filter_xattr(local->xattr, priv->xattrs); + if (ret < 0) { + op_ret = ret; + goto out; + } + if (!up_invalidate_needed(local->xattr)) + goto out; + + ret = dict_get_iatt(xdata, GF_POSTSTAT, &stbuf); + if (ret == 0) + flags |= UP_TIMES; + + upcall_cache_invalidate(frame, this, client, local->inode, flags, &stbuf, + NULL, NULL, local->xattr); + +out: + UPCALL_STACK_UNWIND(removexattr, frame, op_ret, op_errno, xdata); + return 0; +} + +static int32_t +up_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) +{ + int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + dict_t *xattr = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + xattr = dict_for_key_value(name, "", 1, _gf_true); + if (!xattr) { + goto err; + } + + local = upcall_local_init(frame, this, loc, NULL, loc->inode, xattr); + if (!local) { + goto err; + } + +out: + if (xattr) + dict_unref(xattr); + + STACK_WIND(frame, up_removexattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, loc, name, xdata); + return 0; + +err: + if (xattr) + dict_unref(xattr); + + UPCALL_STACK_UNWIND(removexattr, frame, -1, op_errno, NULL); + + return 0; +} + +static int32_t +up_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata) +{ + client_t *client = NULL; + uint32_t flags = 0; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + client = frame->root->client; + local = frame->local; + + if ((op_ret < 0) || !local) { + goto out; + } + + flags = UP_UPDATE_CLIENT; + upcall_cache_invalidate(frame, this, client, local->inode, flags, NULL, + NULL, NULL, NULL); + +out: + UPCALL_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, dict, xdata); + return 0; +} + +static int32_t +up_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, + dict_t *xdata) +{ + int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); + if (!local) { + goto err; + } + +out: + STACK_WIND(frame, up_fgetxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata); + return 0; +err: + UPCALL_STACK_UNWIND(fgetxattr, frame, -1, op_errno, NULL, NULL); + return 0; +} + +static int32_t +up_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata) +{ + client_t *client = NULL; + uint32_t flags = 0; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + client = frame->root->client; + local = frame->local; + + if ((op_ret < 0) || !local) { + goto out; + } + + flags = UP_UPDATE_CLIENT; + upcall_cache_invalidate(frame, this, client, local->inode, flags, NULL, + NULL, NULL, NULL); + +out: + UPCALL_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata); + return 0; +} + +static int32_t +up_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name, + dict_t *xdata) +{ + int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL); + if (!local) { + goto err; + } + +out: + STACK_WIND(frame, up_getxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->getxattr, loc, name, xdata); + return 0; +err: + UPCALL_STACK_UNWIND(getxattr, frame, -1, op_errno, NULL, NULL); + return 0; +} + +/* The xattrops here mainly tracks changes in afr pending xattr. + * 1. xattrop doesn't carry info saying post op/pre op. + * 2. Pre xattrop will have 0 value for all pending xattrs, + * the cbk of pre xattrop carries the on-disk xattr value. + * Non zero on-disk xattr indicates pending healing. + * 3. Post xattrop will either have 0 or 1 as value of pending xattrs, + * 0 on success, 1 on failure. But the post xattrop cbk will have + * 0 or 1 or any higher value. + * 0 - if no healing required* + * 1 - if this is the first time pending xattr is being set. + * n - if there is already a pending xattr set, it will increment + * the on-disk value and send that in cbk. + * Our aim is to send an invalidation, only the first time a pending + * xattr was set on a file. Below are some of the exceptions in handling + * xattrop: + * - Do not filter unregistered xattrs in the cbk, but in the call path. + * Else, we will be invalidating on every preop, if the file already has + * pending xattr set. Filtering unregistered xattrs on the fop path + * ensures we invalidate only in postop, every time a postop comes with + * pending xattr value 1. + * - Consider a brick is down, and the postop sets pending xattrs as long + * as the other brick is down. But we do not want to invalidate every time + * a pending xattr is set, but we want to invalidate only the first time + * a pending xattr is set on any file. Hence, to identify if its the first + * time a pending xattr is set, we compare the value of pending xattrs that + * came in postop and postop cbk, if its same then its the first time. + */ +static int32_t +up_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata) +{ + client_t *client = NULL; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + client = frame->root->client; + local = frame->local; + + if ((op_ret < 0) || !local) { + goto out; + } + + if (up_invalidate_needed(local->xattr)) { + if (dict_foreach(local->xattr, up_compare_afr_xattr, dict) < 0) + goto out; + + upcall_cache_invalidate(frame, this, client, local->inode, UP_XATTR, + NULL, NULL, NULL, local->xattr); + } +out: + if (frame->root->op == GF_FOP_FXATTROP) { + UPCALL_STACK_UNWIND(fxattrop, frame, op_ret, op_errno, dict, xdata); + } else { + UPCALL_STACK_UNWIND(xattrop, frame, op_ret, op_errno, dict, xdata); + } + return 0; +} + +static int32_t +up_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc, + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) +{ + int32_t op_errno = EINVAL; + upcall_local_t *local = NULL; + int ret = 0; + upcall_private_t *priv = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, out); + + local = upcall_local_init(frame, this, loc, NULL, loc->inode, xattr); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + ret = up_filter_xattr(local->xattr, priv->xattrs); + if (ret < 0) { + goto err; + } + +out: + STACK_WIND(frame, up_xattrop_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->xattrop, loc, optype, xattr, xdata); + return 0; +err: + UPCALL_STACK_UNWIND(xattrop, frame, -1, op_errno, NULL, NULL); + return 0; +} + +static int32_t +up_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd, + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) +{ + int32_t op_errno = EINVAL; + upcall_local_t *local = NULL; + int ret = 0; + upcall_private_t *priv = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, out); + + local = upcall_local_init(frame, this, NULL, fd, fd->inode, xattr); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + ret = up_filter_xattr(local->xattr, priv->xattrs); + if (ret < 0) { + goto err; + } + +out: + STACK_WIND(frame, up_xattrop_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fxattrop, fd, optype, xattr, xdata); + return 0; +err: + STACK_UNWIND_STRICT(fxattrop, frame, -1, op_errno, NULL, NULL); + return 0; +} + +int32_t +mem_acct_init(xlator_t *this) +{ + int ret = -1; + + if (!this) return ret; + + ret = xlator_mem_acct_init(this, gf_upcall_mt_end + 1); + + if (ret != 0) { + gf_msg("upcall", GF_LOG_WARNING, 0, UPCALL_MSG_NO_MEMORY, + "Memory allocation failed"); + return ret; + } + + return ret; } void -upcall_local_wipe (xlator_t *this, upcall_local_t *local) +upcall_local_wipe(xlator_t *this, upcall_local_t *local) { - if (local) { - inode_unref (local->inode); - loc_wipe (&local->rename_oldloc); - mem_put (local); - } + if (local) { + inode_unref(local->inode); + if (local->xattr) + dict_unref(local->xattr); + loc_wipe(&local->rename_oldloc); + loc_wipe(&local->loc); + if (local->fd) + fd_unref(local->fd); + mem_put(local); + } } upcall_local_t * -upcall_local_init (call_frame_t *frame, xlator_t *this, inode_t *inode) +upcall_local_init(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, + inode_t *inode, dict_t *xattr) { - upcall_local_t *local = NULL; + upcall_local_t *local = NULL; - local = mem_get0 (THIS->local_pool); + GF_VALIDATE_OR_GOTO("upcall", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, inode, out); - if (!local) - goto out; + local = mem_get0(THIS->local_pool); + + if (!local) + goto out; - local->inode = inode_ref (inode); + local->inode = inode_ref(inode); + if (xattr) + local->xattr = dict_copy_with_ref(xattr, NULL); - /* Shall we get inode_ctx and store it here itself? */ - local->upcall_inode_ctx = upcall_inode_ctx_get (inode, this); + if (loc) + loc_copy(&local->loc, loc); + if (fd) + local->fd = fd_ref(fd); - frame->local = local; + frame->local = local; out: - return local; + return local; +} + +static int32_t +update_xattrs(dict_t *dict, char *key, data_t *value, void *data) +{ + dict_t *xattrs = data; + int ret = 0; + + ret = dict_set_int8(xattrs, key, 0); + return ret; +} + +int32_t +up_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata) +{ + upcall_private_t *priv = NULL; + int ret = 0; + + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, out); + + if (op != GF_IPC_TARGET_UPCALL) + goto wind; + + /* TODO: Bz-1371622 Along with the xattrs also store list of clients + * that are interested in notifications, so that the notification + * can be sent to the clients that have registered. + * Once this implemented there can be unregister of xattrs for + * notifications. Until then there is no unregister of xattrs*/ + if (xdata && priv->xattrs) { + ret = dict_foreach(xdata, update_xattrs, priv->xattrs); + } + +out: + STACK_UNWIND_STRICT(ipc, frame, ret, 0, NULL); + return 0; + +wind: + STACK_WIND(frame, default_ipc_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ipc, op, xdata); + return 0; } int -reconfigure (xlator_t *this, dict_t *options) +reconfigure(xlator_t *this, dict_t *options) { - upcall_private_t *priv = NULL; - int ret = -1; + upcall_private_t *priv = NULL; + int ret = -1; - priv = this->private; - GF_ASSERT (priv); + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, out); - GF_OPTION_RECONF ("cache-invalidation", priv->cache_invalidation_enabled, - options, bool, out); - GF_OPTION_RECONF ("cache-invalidation-timeout", priv->cache_invalidation_timeout, - options, int32, out); + GF_OPTION_RECONF("cache-invalidation", priv->cache_invalidation_enabled, + options, bool, out); + GF_OPTION_RECONF("cache-invalidation-timeout", + priv->cache_invalidation_timeout, options, int32, out); - ret = 0; + ret = 0; - if (priv->cache_invalidation_enabled && - !priv->reaper_init_done) { - ret = upcall_reaper_thread_init (this); + if (priv->cache_invalidation_enabled && !priv->reaper_init_done) { + ret = upcall_reaper_thread_init(this); - if (ret) { - gf_msg ("upcall", GF_LOG_WARNING, 0, - UPCALL_MSG_INTERNAL_ERROR, - "reaper_thread creation failed (%s)." - " Disabling cache_invalidation", - strerror(errno)); - } - priv->reaper_init_done = 1; + if (ret) { + gf_msg("upcall", GF_LOG_WARNING, 0, UPCALL_MSG_INTERNAL_ERROR, + "reaper_thread creation failed (%s)." + " Disabling cache_invalidation", + strerror(errno)); } + priv->reaper_init_done = _gf_true; + } out: - return ret; + return ret; } int -init (xlator_t *this) +init(xlator_t *this) { - int ret = -1; - upcall_private_t *priv = NULL; + int ret = -1; + upcall_private_t *priv = NULL; - priv = GF_CALLOC (1, sizeof (*priv), - gf_upcall_mt_private_t); - if (!priv) { - gf_msg ("upcall", GF_LOG_WARNING, 0, - UPCALL_MSG_NO_MEMORY, - "Memory allocation failed"); - goto out; - } + priv = GF_CALLOC(1, sizeof(*priv), gf_upcall_mt_private_t); + if (!priv) + goto out; - GF_OPTION_INIT ("cache-invalidation", priv->cache_invalidation_enabled, - bool, out); - GF_OPTION_INIT ("cache-invalidation-timeout", - priv->cache_invalidation_timeout, int32, out); + priv->xattrs = dict_new(); + if (!priv->xattrs) + goto out; - LOCK_INIT (&priv->inode_ctx_lk); - INIT_LIST_HEAD (&priv->inode_ctx_list); + GF_OPTION_INIT("cache-invalidation", priv->cache_invalidation_enabled, bool, + out); + GF_OPTION_INIT("cache-invalidation-timeout", + priv->cache_invalidation_timeout, int32, out); - this->private = priv; - priv->fini = 0; - priv->reaper_init_done = 0; + LOCK_INIT(&priv->inode_ctx_lk); + INIT_LIST_HEAD(&priv->inode_ctx_list); - this->local_pool = mem_pool_new (upcall_local_t, 512); - ret = 0; + priv->fini = 0; + priv->reaper_init_done = _gf_false; - if (priv->cache_invalidation_enabled) { - ret = upcall_reaper_thread_init (this); + this->private = priv; + this->local_pool = mem_pool_new(upcall_local_t, 512); + ret = 0; + + if (priv->cache_invalidation_enabled) { + ret = upcall_reaper_thread_init(this); - if (ret) { - gf_msg ("upcall", GF_LOG_WARNING, 0, - UPCALL_MSG_INTERNAL_ERROR, - "reaper_thread creation failed (%s)." - " Disabling cache_invalidation", - strerror(errno)); - } - priv->reaper_init_done = 1; - } -out: if (ret) { - GF_FREE (priv); + gf_msg("upcall", GF_LOG_WARNING, 0, UPCALL_MSG_INTERNAL_ERROR, + "reaper_thread creation failed (%s)." + " Disabling cache_invalidation", + strerror(errno)); } + priv->reaper_init_done = _gf_true; + } +out: + if (ret && priv) { + if (priv->xattrs) + dict_unref(priv->xattrs); - return ret; + GF_FREE(priv); + } + + return ret; } -int -fini (xlator_t *this) +void +fini(xlator_t *this) { - upcall_private_t *priv = NULL; + upcall_private_t *priv = NULL; - priv = this->private; - if (!priv) { - return 0; - } - this->private = NULL; + priv = this->private; + if (!priv) { + return; + } + this->private = NULL; + + priv->fini = 1; - priv->fini = 1; + if (priv->reaper_thr) { + gf_thread_cleanup_xint(priv->reaper_thr); + priv->reaper_thr = 0; + priv->reaper_init_done = _gf_false; + } - pthread_join (priv->reaper_thr, NULL); + dict_unref(priv->xattrs); + LOCK_DESTROY(&priv->inode_ctx_lk); - LOCK_DESTROY (&priv->inode_ctx_lk); + /* Do we need to cleanup the inode_ctxs? IMO not required + * as inode_forget would have been done on all the inodes + * before calling xlator_fini */ + GF_FREE(priv); - /* Do we need to cleanup the inode_ctxs? IMO not required - * as inode_forget would have been done on all the inodes - * before calling xlator_fini */ - GF_FREE (priv); + if (this->local_pool) { + mem_pool_destroy(this->local_pool); + this->local_pool = NULL; + } - return 0; + return; } int -upcall_forget (xlator_t *this, inode_t *inode) +upcall_forget(xlator_t *this, inode_t *inode) { - upcall_cleanup_inode_ctx (this, inode); - return 0; + upcall_private_t *priv = this->private; + + if (!priv) + goto out; + + upcall_cleanup_inode_ctx(this, inode); +out: + return 0; } int -upcall_release (xlator_t *this, fd_t *fd) +upcall_release(xlator_t *this, fd_t *fd) { - return 0; + return 0; } int -notify (xlator_t *this, int32_t event, void *data, ...) +notify(xlator_t *this, int32_t event, void *data, ...) { - int ret = -1; - int32_t val = 0; - struct gf_upcall *up_req = NULL; + int ret = -1; + struct gf_upcall *up_req = NULL; - switch (event) { - case GF_EVENT_UPCALL: - { - gf_log (this->name, GF_LOG_DEBUG, "Upcall Notify event = %d", - event); + switch (event) { + case GF_EVENT_UPCALL: { + gf_log(this->name, GF_LOG_DEBUG, "Upcall Notify event = %d", event); - up_req = (struct gf_upcall *) data; + up_req = (struct gf_upcall *)data; - GF_VALIDATE_OR_GOTO(this->name, up_req, out); + GF_VALIDATE_OR_GOTO(this->name, up_req, out); - ret = default_notify (this, event, up_req); + ret = default_notify(this, event, up_req); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, 0, - UPCALL_MSG_NOTIFY_FAILED, - "Failed to notify cache invalidation" - " to client(%s)", - up_req->client_uid); - goto out; - } - } - break; + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, UPCALL_MSG_NOTIFY_FAILED, + "Failed to notify cache invalidation" + " to client(%s)", + up_req->client_uid); + goto out; + } + } break; default: - default_notify (this, event, data); - break; - } - ret = 0; + default_notify(this, event, data); + break; + } + ret = 0; out: - return ret; + return ret; } struct xlator_fops fops = { - /* fops which change only "ATIME" do not result - * in any cache invalidation. Hence upcall - * notifications are not sent in this case. - * But however, we need to store/update the - * client info in the upcall state to be able - * to notify them incase of any changes done - * to the data. - * - * Below such fops do not trigger upcall - * notifications but will add/update - * clients info in the upcall inode ctx.*/ - .lookup = up_lookup, - .open = up_open, - .statfs = up_statfs, - .opendir = up_opendir, - .readdir = up_readdir, - .readdirp = up_readdirp, - .stat = up_stat, - .fstat = up_fstat, - .access = up_access, - .readlink = up_readlink, - .readv = up_readv, - .lk = up_lk, - - /* fops doing write */ - .truncate = up_truncate, - .ftruncate = up_ftruncate, - .writev = up_writev, - .zerofill = up_zerofill, - .fallocate = up_fallocate, - .discard = up_discard, - - /* fops changing attributes */ - .fsetattr = up_fsetattr, - .setattr = up_setattr, - - /* fops affecting parent dirent */ - .mknod = up_mknod, - .create = up_create, - .symlink = up_symlink, - .mkdir = up_mkdir, - - /* fops affecting both file and parent - * cache entries */ - .unlink = up_unlink, - .link = up_link, - .rmdir = up_rmdir, - .rename = up_rename, + .ipc = up_ipc, + /* fops which change only "ATIME" do not result + * in any cache invalidation. Hence upcall + * notifications are not sent in this case. + * But however, we need to store/update the + * client info in the upcall state to be able + * to notify them in case of any changes done + * to the data. + * + * Below such fops do not trigger upcall + * notifications but will add/update + * clients info in the upcall inode ctx.*/ + .lookup = up_lookup, + .open = up_open, + .statfs = up_statfs, + .opendir = up_opendir, + .readdir = up_readdir, + .readdirp = up_readdirp, + .stat = up_stat, + .fstat = up_fstat, + .access = up_access, + .readlink = up_readlink, + .readv = up_readv, + .lk = up_lk, + .seek = up_seek, + + /* fops doing write */ + .truncate = up_truncate, + .ftruncate = up_ftruncate, + .writev = up_writev, + .zerofill = up_zerofill, + .fallocate = up_fallocate, + .discard = up_discard, + + /* fops changing attributes */ + .fsetattr = up_fsetattr, + .setattr = up_setattr, + + /* fops affecting parent dirent */ + .mknod = up_mknod, + .create = up_create, + .symlink = up_symlink, + .mkdir = up_mkdir, + + /* fops affecting both file and parent + * cache entries */ + .unlink = up_unlink, + .link = up_link, + .rmdir = up_rmdir, + .rename = up_rename, + + .setxattr = up_setxattr, + .fsetxattr = up_fsetxattr, + .getxattr = up_getxattr, + .fgetxattr = up_fgetxattr, + .fremovexattr = up_fremovexattr, + .removexattr = up_removexattr, + .xattrop = up_xattrop, + .fxattrop = up_fxattrop, #ifdef NOT_SUPPORTED - /* internal lk fops */ - .inodelk = up_inodelk, - .finodelk = up_finodelk, - .entrylk = up_entrylk, - .fentrylk = up_fentrylk, - - /* Below fops follow 'WRITE' which - * would have already sent upcall - * notifications */ - .flush = up_flush, - .fsync = up_fsync, - .fsyncdir = up_fsyncdir, - - /* XXX: Handle xattr fops (BZ-1211863) */ - .getxattr = up_getxattr, - .fgetxattr = up_fgetxattr, - .fremovexattr = up_fremovexattr, - .removexattr = up_removexattr, - .setxattr = up_setxattr, - .fsetxattr = up_fsetxattr, - .xattrop = up_xattrop, - .fxattrop = up_fxattrop, + /* internal lk fops */ + .inodelk = up_inodelk, + .finodelk = up_finodelk, + .entrylk = up_entrylk, + .fentrylk = up_fentrylk, + + /* Below fops follow 'WRITE' which + * would have already sent upcall + * notifications */ + .flush = up_flush, + .fsync = up_fsync, + .fsyncdir = up_fsyncdir, #endif }; struct xlator_cbks cbks = { - .forget = upcall_forget, - .release = upcall_release, + .forget = upcall_forget, + .release = upcall_release, }; struct volume_options options[] = { - { .key = {"cache-invalidation"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "off", - .description = "When \"on\", sends cache-invalidation" - " notifications." - }, - { .key = {"cache-invalidation-timeout"}, - .type = GF_OPTION_TYPE_INT, - .default_value = CACHE_INVALIDATION_TIMEOUT, - .description = "After 'timeout' seconds since the time" - " client accessed any file, cache-invalidation" - " notifications are no longer sent to that client." - }, - { .key = {NULL} }, + { + .key = {"cache-invalidation"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "When \"on\", sends cache-invalidation" + " notifications.", + .op_version = {GD_OP_VERSION_3_7_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"cache", "cacheconsistency", "upcall"}, + }, + {.key = {"cache-invalidation-timeout"}, + .type = GF_OPTION_TYPE_INT, + .default_value = CACHE_INVALIDATION_TIMEOUT, + .description = "After 'timeout' seconds since the time" + " client accessed any file, cache-invalidation" + " notifications are no longer sent to that client.", + .op_version = {GD_OP_VERSION_3_7_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"cache", "cachetimeout", "upcall"}}, + {.key = {NULL}}, +}; + +xlator_api_t xlator_api = { + .init = init, + .fini = fini, + .notify = notify, + .reconfigure = reconfigure, + .mem_acct_init = mem_acct_init, + .op_version = {1}, /* Present from the initial version */ + .fops = &fops, + .cbks = &cbks, + .options = options, + .identifier = "upcall", + .category = GF_MAINTAINED, }; diff --git a/xlators/features/upcall/src/upcall.h b/xlators/features/upcall/src/upcall.h index eb1c7df89be..aa535088ad7 100644 --- a/xlators/features/upcall/src/upcall.h +++ b/xlators/features/upcall/src/upcall.h @@ -10,121 +10,122 @@ #ifndef __UPCALL_H__ #define __UPCALL_H__ -#include "compat-errno.h" +#include <glusterfs/compat-errno.h> #include "upcall-mem-types.h" -#include "client_t.h" +#include <glusterfs/client_t.h> #include "upcall-messages.h" #include "upcall-cache-invalidation.h" -#include "upcall-utils.h" - -#define EXIT_IF_UPCALL_OFF(this, label) do { \ - if (!is_upcall_enabled(this)) \ - goto label; \ -} while (0) - -#define UPCALL_STACK_UNWIND(fop, frame, params ...) do { \ - upcall_local_t *__local = NULL; \ - xlator_t *__xl = NULL; \ - if (frame) { \ - __xl = frame->this; \ - __local = frame->local; \ - frame->local = NULL; \ - } \ - STACK_UNWIND_STRICT (fop, frame, params); \ - upcall_local_wipe (__xl, __local); \ -} while (0) - -#define UPCALL_STACK_DESTROY(frame) do { \ - upcall_local_t *__local = NULL; \ - xlator_t *__xl = NULL; \ - __xl = frame->this; \ - __local = frame->local; \ - frame->local = NULL; \ - STACK_DESTROY (frame->root); \ - upcall_local_wipe (__xl, __local); \ -} while (0) - -struct _upcall_private_t { - gf_boolean_t cache_invalidation_enabled; - int32_t cache_invalidation_timeout; - struct list_head inode_ctx_list; - gf_lock_t inode_ctx_lk; - int32_t reaper_init_done; - pthread_t reaper_thr; - int32_t fini; +#include <glusterfs/upcall-utils.h> + +#define EXIT_IF_UPCALL_OFF(this, label) \ + do { \ + if (!is_upcall_enabled(this)) \ + goto label; \ + } while (0) + +#define UPCALL_STACK_UNWIND(fop, frame, params...) \ + do { \ + upcall_local_t *__local = NULL; \ + xlator_t *__xl = NULL; \ + if (frame) { \ + __xl = frame->this; \ + __local = frame->local; \ + frame->local = NULL; \ + } \ + STACK_UNWIND_STRICT(fop, frame, params); \ + upcall_local_wipe(__xl, __local); \ + } while (0) + +#define UPCALL_STACK_DESTROY(frame) \ + do { \ + upcall_local_t *__local = NULL; \ + xlator_t *__xl = NULL; \ + __xl = frame->this; \ + __local = frame->local; \ + frame->local = NULL; \ + STACK_DESTROY(frame->root); \ + upcall_local_wipe(__xl, __local); \ + } while (0) + +struct _upcall_private { + gf_boolean_t cache_invalidation_enabled; + int32_t cache_invalidation_timeout; + struct list_head inode_ctx_list; + gf_lock_t inode_ctx_lk; + gf_boolean_t reaper_init_done; + pthread_t reaper_thr; + int32_t fini; + dict_t *xattrs; /* list of xattrs registered by clients + for receiving invalidation */ }; -typedef struct _upcall_private_t upcall_private_t; - -struct _upcall_client_t { - struct list_head client_list; - /* strdup to store client_uid, strdup. Free it explicitly */ - char *client_uid; - time_t access_time; /* time last accessed */ - /* the amount of time which client can cache this entry */ - uint32_t expire_time_attr; +typedef struct _upcall_private upcall_private_t; + +struct _upcall_client { + struct list_head client_list; + /* strdup to store client_uid, strdup. Free it explicitly */ + char *client_uid; + time_t access_time; /* time last accessed */ + /* the amount of time which client can cache this entry */ + uint32_t expire_time_attr; }; -typedef struct _upcall_client_t upcall_client_t; +typedef struct _upcall_client upcall_client_t; /* Upcall entries are maintained in inode_ctx */ -struct _upcall_inode_ctx_t { - struct list_head inode_ctx_list; - struct list_head client_list; - pthread_mutex_t client_list_lock; /* mutex for clients list - of this upcall entry */ - int destroy; +struct _upcall_inode_ctx { + struct list_head inode_ctx_list; + struct list_head client_list; + pthread_mutex_t client_list_lock; /* mutex for clients list + of this upcall entry */ + int destroy; + uuid_t gfid; /* gfid of the entry */ }; -typedef struct _upcall_inode_ctx_t upcall_inode_ctx_t; +typedef struct _upcall_inode_ctx upcall_inode_ctx_t; struct upcall_local { - /* XXX: need to check if we can store - * pointers in 'local' which may get freed - * in future by other thread - */ - upcall_inode_ctx_t *upcall_inode_ctx; - inode_t *inode; - loc_t rename_oldloc; + /* XXX: need to check if we can store + * pointers in 'local' which may get freed + * in future by other thread + */ + inode_t *inode; + loc_t rename_oldloc; + loc_t loc; /* required for stat in *xattr_cbk */ + fd_t *fd; /* required for fstat in *xattr_cbk */ + dict_t *xattr; }; typedef struct upcall_local upcall_local_t; -void upcall_local_wipe (xlator_t *this, upcall_local_t *local); -upcall_local_t *upcall_local_init (call_frame_t *frame, xlator_t *this, inode_t *inode); - -upcall_client_t *add_upcall_client (call_frame_t *frame, uuid_t gfid, - client_t *client, - upcall_inode_ctx_t *up_inode_ctx); -upcall_client_t *__add_upcall_client (call_frame_t *frame, uuid_t gfid, - client_t *client, - upcall_inode_ctx_t *up_inode_ctx); -upcall_client_t *__get_upcall_client (call_frame_t *frame, uuid_t gfid, - client_t *client, - upcall_inode_ctx_t *up_inode_ctx); -int __upcall_cleanup_client_entry (upcall_client_t *up_client); -int upcall_cleanup_expired_clients (xlator_t *this, - upcall_inode_ctx_t *up_inode_ctx); - -int __upcall_inode_ctx_set (inode_t *inode, xlator_t *this); -upcall_inode_ctx_t *__upcall_inode_ctx_get (inode_t *inode, xlator_t *this); -upcall_inode_ctx_t *upcall_inode_ctx_get (inode_t *inode, xlator_t *this); -int upcall_cleanup_inode_ctx (xlator_t *this, inode_t *inode); -void upcall_cache_forget (xlator_t *this, inode_t *inode, - upcall_inode_ctx_t *up_inode_ctx); - -void *upcall_reaper_thread (void *data); -int upcall_reaper_thread_init (xlator_t *this); +void +upcall_local_wipe(xlator_t *this, upcall_local_t *local); +upcall_local_t * +upcall_local_init(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, + inode_t *inode, dict_t *xattr); + +upcall_inode_ctx_t * +upcall_inode_ctx_get(inode_t *inode, xlator_t *this); +int +upcall_cleanup_inode_ctx(xlator_t *this, inode_t *inode); + +void * +upcall_reaper_thread(void *data); +int +upcall_reaper_thread_init(xlator_t *this); /* Xlator options */ -gf_boolean_t is_upcall_enabled(xlator_t *this); +gf_boolean_t +is_upcall_enabled(xlator_t *this); /* Cache invalidation specific */ -void upcall_cache_invalidate (call_frame_t *frame, xlator_t *this, - client_t *client, inode_t *inode, - uint32_t flags, struct iatt *stbuf, - struct iatt *p_stbuf, - struct iatt *oldp_stbuf); -void upcall_client_cache_invalidate (xlator_t *xl, uuid_t gfid, - upcall_client_t *up_client_entry, - uint32_t flags, struct iatt *stbuf, - struct iatt *p_stbuf, - struct iatt *oldp_stbuf); - +void +upcall_cache_invalidate(call_frame_t *frame, xlator_t *this, client_t *client, + inode_t *inode, uint32_t flags, struct iatt *stbuf, + struct iatt *p_stbuf, struct iatt *oldp_stbuf, + dict_t *xattr); +int +up_filter_xattr(dict_t *xattr, dict_t *regd_xattrs); + +int +up_compare_afr_xattr(dict_t *d, char *k, data_t *v, void *tmp); + +gf_boolean_t +up_invalidate_needed(dict_t *xattrs); #endif /* __UPCALL_H__ */ diff --git a/xlators/features/utime/Makefile.am b/xlators/features/utime/Makefile.am new file mode 100644 index 00000000000..a985f42a877 --- /dev/null +++ b/xlators/features/utime/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/features/utime/src/Makefile.am b/xlators/features/utime/src/Makefile.am new file mode 100644 index 00000000000..7c3adbc2195 --- /dev/null +++ b/xlators/features/utime/src/Makefile.am @@ -0,0 +1,41 @@ +xlator_LTLIBRARIES = utime.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features + +UTIME_SRC = $(top_srcdir)/xlators/features/utime/src + +utime_sources = $(UTIME_SRC)/utime-helpers.c +utime_sources += $(UTIME_SRC)/utime.c + +utime_la_SOURCES = $(utime_sources) +nodist_utime_la_SOURCES = utime-autogen-fops.c utime-autogen-fops.h +BUILT_SOURCES = utime-autogen-fops.h + +utime_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) +utime_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +noinst_HEADERS_utime = $(UTIME_SRC)/utime-helpers.h +noinst_HEADERS_utime += $(UTIME_SRC)/utime.h +noinst_HEADERS_utime += $(UTIME_SRC)/utime-messages.h +noinst_HEADERS_utime += $(UTIME_SRC)/utime-mem-types.h +noinst_HEADERS = $(top_srcdir)/xlators/lib/src/libxlator.h +noinst_HEADERS += $(noinst_HEADERS_utime) + +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ + -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \ + -I$(top_srcdir)/xlators/lib/src + +AM_CFLAGS = -Wall $(GF_CFLAGS) + +noinst_PYTHON = utime-gen-fops-c.py utime-gen-fops-h.py +EXTRA_DIST = utime-autogen-fops-tmpl.c utime-autogen-fops-tmpl.h + +utime-autogen-fops.c: utime-gen-fops-c.py utime-autogen-fops-tmpl.c + $(PYTHON) $(UTIME_SRC)/utime-gen-fops-c.py $(UTIME_SRC)/utime-autogen-fops-tmpl.c > $@ + +utime-autogen-fops.h: utime-gen-fops-h.py utime-autogen-fops-tmpl.h + $(PYTHON) $(UTIME_SRC)/utime-gen-fops-h.py $(UTIME_SRC)/utime-autogen-fops-tmpl.h > $@ + +CLEANFILES = $(nodist_utime_la_SOURCES) + +uninstall-local: + rm -f $(DESTDIR)$(xlatordir)/utime.so diff --git a/xlators/features/utime/src/utime-autogen-fops-tmpl.c b/xlators/features/utime/src/utime-autogen-fops-tmpl.c new file mode 100644 index 00000000000..f2f35322926 --- /dev/null +++ b/xlators/features/utime/src/utime-autogen-fops-tmpl.c @@ -0,0 +1,28 @@ +/* + Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +/* File: utime-autogen-fops-tmpl.c + * This file contains the utime autogenerated FOPs. This is run through + * the code generator, generator.py to generate the required FOPs. + */ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include <glusterfs/glusterfs.h> +#include <glusterfs/xlator.h> +#include <glusterfs/logging.h> +#include <glusterfs/statedump.h> +#include "utime-helpers.h" +#include <glusterfs/timespec.h> + +#pragma generate diff --git a/xlators/features/utime/src/utime-autogen-fops-tmpl.h b/xlators/features/utime/src/utime-autogen-fops-tmpl.h new file mode 100644 index 00000000000..4e102ffed6c --- /dev/null +++ b/xlators/features/utime/src/utime-autogen-fops-tmpl.h @@ -0,0 +1,22 @@ +/* + Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +/* File: utime-autogen-fops-tmpl.h + * This file contains the utime autogenerated FOPs declarations. + */ + +#ifndef _UTIME_AUTOGEN_FOPS_H +#define _UTIME_AUTOGEN_FOPS_H + +#include <glusterfs/xlator.h> + +#pragma generate + +#endif /* _UTIME_AUTOGEN_FOPS_H */ diff --git a/xlators/features/utime/src/utime-gen-fops-c.py b/xlators/features/utime/src/utime-gen-fops-c.py new file mode 100755 index 00000000000..9fb3e1b8b1a --- /dev/null +++ b/xlators/features/utime/src/utime-gen-fops-c.py @@ -0,0 +1,147 @@ +#!/usr/bin/python3 + +from __future__ import print_function +import os +import sys + +curdir = os.path.dirname(sys.argv[0]) +gendir = os.path.join(curdir, '../../../../libglusterfs/src') +sys.path.append(gendir) +from generator import ops, fop_subs, cbk_subs, generate + +FOPS_COMMON_TEMPLATE = """ +int32_t +gf_utime_@NAME@ (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@) +{ + gl_timespec_get(&frame->root->ctime); + + (void) utime_update_attribute_flags(frame, this, GF_FOP_@UPNAME@); + STACK_WIND (frame, gf_utime_@NAME@_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->@NAME@, @SHORT_ARGS@); + return 0; +} +""" + +FOPS_CBK_COMMON_TEMPLATE = """ +int32_t +gf_utime_@NAME@_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + @LONG_ARGS@) +{ + STACK_UNWIND_STRICT (@NAME@, frame, op_ret, op_errno, @SHORT_ARGS@); + return 0; +} +""" + +FOPS_READ_TEMPLATE = """ +int32_t +gf_utime_@NAME@ (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@) +{ + gl_timespec_get(&frame->root->ctime); + + (void) utime_update_attribute_flags(frame, this, GF_FOP_READ); + STACK_WIND (frame, gf_utime_@NAME@_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->@NAME@, @SHORT_ARGS@); + return 0; +} +""" + +FOPS_WRITE_TEMPLATE = """ +int32_t +gf_utime_@NAME@ (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@) +{ + gl_timespec_get(&frame->root->ctime); + + (void) utime_update_attribute_flags(frame, this, GF_FOP_WRITE); + STACK_WIND (frame, gf_utime_@NAME@_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->@NAME@, @SHORT_ARGS@); + return 0; +} +""" + +FOPS_COPY_FILE_RANGE_TEMPLATE = """ +int32_t +gf_utime_@NAME@ (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@) +{ + gl_timespec_get(&frame->root->ctime); + + (void) utime_update_attribute_flags(frame, this, GF_FOP_COPY_FILE_RANGE); + STACK_WIND (frame, gf_utime_@NAME@_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->@NAME@, @SHORT_ARGS@); + return 0; +} +""" + +FOPS_SETATTR_TEMPLATE = """ +int32_t +gf_utime_@NAME@ (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@) +{ + gl_timespec_get(&frame->root->ctime); + + if (!valid) { + frame->root->flags |= MDATA_CTIME; + } + + if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)) { + frame->root->flags |= MDATA_CTIME; + } + + if (valid & GF_SET_ATTR_MODE) { + frame->root->flags |= MDATA_CTIME; + } + + if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) { + if (valid & GF_ATTR_ATIME_NOW) { + frame->root->ctime.tv_sec = stbuf->ia_atime; + frame->root->ctime.tv_nsec = stbuf->ia_atime_nsec; + } else if (valid & GF_ATTR_MTIME_NOW) { + frame->root->ctime.tv_sec = stbuf->ia_mtime; + frame->root->ctime.tv_nsec = stbuf->ia_mtime_nsec; + } + } + + STACK_WIND (frame, gf_utime_@NAME@_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->@NAME@, @SHORT_ARGS@); + return 0; +} +""" + +utime_ops = ['fallocate', 'zerofill', 'opendir', 'mknod', 'mkdir', + 'unlink', 'rmdir', 'symlink', 'rename', 'link', 'truncate', + 'ftruncate', 'create', 'open', 'removexattr', 'fremovexattr'] + +utime_read_op = ['readv'] +utime_write_op = ['writev'] +utime_setattr_ops = ['setattr', 'fsetattr'] +utime_copy_file_range_ops = ['copy_file_range'] + +def gen_defaults(): + for name in ops: + if name in utime_ops: + print(generate(FOPS_CBK_COMMON_TEMPLATE, name, cbk_subs)) + print(generate(FOPS_COMMON_TEMPLATE, name, fop_subs)) + if name in utime_read_op: + print(generate(FOPS_CBK_COMMON_TEMPLATE, name, cbk_subs)) + print(generate(FOPS_READ_TEMPLATE, name, fop_subs)) + if name in utime_write_op: + print(generate(FOPS_CBK_COMMON_TEMPLATE, name, cbk_subs)) + print(generate(FOPS_WRITE_TEMPLATE, name, fop_subs)) + if name in utime_setattr_ops: + print(generate(FOPS_CBK_COMMON_TEMPLATE, name, cbk_subs)) + print(generate(FOPS_SETATTR_TEMPLATE, name, fop_subs)) + if name in utime_copy_file_range_ops: + print(generate(FOPS_CBK_COMMON_TEMPLATE, name, cbk_subs)) + print(generate(FOPS_COPY_FILE_RANGE_TEMPLATE, name, fop_subs)) + +for l in open(sys.argv[1], 'r').readlines(): + if l.find('#pragma generate') != -1: + print("/* BEGIN GENERATED CODE - DO NOT MODIFY */") + gen_defaults() + print("/* END GENERATED CODE */") + else: + print(l[:-1]) diff --git a/xlators/features/utime/src/utime-gen-fops-h.py b/xlators/features/utime/src/utime-gen-fops-h.py new file mode 100755 index 00000000000..e96274c229a --- /dev/null +++ b/xlators/features/utime/src/utime-gen-fops-h.py @@ -0,0 +1,35 @@ +#!/usr/bin/python3 + +from __future__ import print_function +import os +import sys + +curdir = os.path.dirname(sys.argv[0]) +gendir = os.path.join(curdir, '../../../../libglusterfs/src') +sys.path.append(gendir) +from generator import ops, fop_subs, generate + +OP_FOP_TEMPLATE = """ +int32_t +gf_utime_@NAME@ (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@); +""" + +utime_ops = ['fallocate', 'zerofill', 'opendir', 'mknod', 'mkdir', + 'unlink', 'rmdir', 'symlink', 'rename', 'link', 'truncate', + 'ftruncate', 'create', 'open', 'removexattr', 'fremovexattr', + 'readv', 'writev', 'setattr', 'fsetattr', 'copy_file_range'] + +def gen_defaults(): + for name, value in ops.items(): + if name in utime_ops: + print(generate(OP_FOP_TEMPLATE, name, fop_subs)) + + +for l in open(sys.argv[1], 'r').readlines(): + if l.find('#pragma generate') != -1: + print("/* BEGIN GENERATED CODE - DO NOT MODIFY */") + gen_defaults() + print("/* END GENERATED CODE */") + else: + print(l[:-1]) diff --git a/xlators/features/utime/src/utime-helpers.c b/xlators/features/utime/src/utime-helpers.c new file mode 100644 index 00000000000..29d9ad93561 --- /dev/null +++ b/xlators/features/utime/src/utime-helpers.c @@ -0,0 +1,110 @@ +/* + Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include "utime-helpers.h" +#include "utime.h" + +void +gl_timespec_get(struct timespec *ts) +{ +#ifdef TIME_UTC + timespec_get(ts, TIME_UTC); +#else + timespec_now_realtime(ts); +#endif +} + +void +utime_update_attribute_flags(call_frame_t *frame, xlator_t *this, + glusterfs_fop_t fop) +{ + utime_priv_t *utime_priv = NULL; + + if (!frame || !this) { + goto out; + } + + utime_priv = this->private; + + switch (fop) { + case GF_FOP_SETXATTR: + case GF_FOP_FSETXATTR: + frame->root->flags |= MDATA_CTIME; + break; + + case GF_FOP_FALLOCATE: + case GF_FOP_ZEROFILL: + frame->root->flags |= MDATA_MTIME; + frame->root->flags |= MDATA_ATIME; + break; + + case GF_FOP_OPENDIR: + case GF_FOP_OPEN: + case GF_FOP_READ: + if (!utime_priv->noatime) { + frame->root->flags |= MDATA_ATIME; + } + break; + case GF_FOP_MKNOD: + case GF_FOP_MKDIR: + case GF_FOP_SYMLINK: + case GF_FOP_CREATE: + frame->root->flags |= MDATA_ATIME; + frame->root->flags |= MDATA_CTIME; + frame->root->flags |= MDATA_MTIME; + frame->root->flags |= MDATA_PAR_CTIME; + frame->root->flags |= MDATA_PAR_MTIME; + break; + + case GF_FOP_UNLINK: + case GF_FOP_RMDIR: + frame->root->flags |= MDATA_CTIME; + frame->root->flags |= MDATA_PAR_CTIME; + frame->root->flags |= MDATA_PAR_MTIME; + break; + + case GF_FOP_WRITE: + frame->root->flags |= MDATA_MTIME; + frame->root->flags |= MDATA_CTIME; + break; + + case GF_FOP_LINK: + case GF_FOP_RENAME: + frame->root->flags |= MDATA_CTIME; + frame->root->flags |= MDATA_PAR_CTIME; + frame->root->flags |= MDATA_PAR_MTIME; + break; + + case GF_FOP_TRUNCATE: + case GF_FOP_FTRUNCATE: + frame->root->flags |= MDATA_CTIME; + frame->root->flags |= MDATA_MTIME; + break; + + case GF_FOP_REMOVEXATTR: + case GF_FOP_FREMOVEXATTR: + frame->root->flags |= MDATA_CTIME; + break; + + case GF_FOP_COPY_FILE_RANGE: + /* Below 2 are for destination fd */ + frame->root->flags |= MDATA_CTIME; + frame->root->flags |= MDATA_MTIME; + /* Below flag is for the source fd */ + if (!utime_priv->noatime) { + frame->root->flags |= MDATA_ATIME; + } + break; + default: + frame->root->flags = 0; + } +out: + return; +} diff --git a/xlators/features/utime/src/utime-helpers.h b/xlators/features/utime/src/utime-helpers.h new file mode 100644 index 00000000000..2e32d4bece6 --- /dev/null +++ b/xlators/features/utime/src/utime-helpers.h @@ -0,0 +1,25 @@ +/* + Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _UTIME_HELPERS_H +#define _UTIME_HELPERS_H + +#include <glusterfs/stack.h> +#include <glusterfs/xlator.h> +#include <glusterfs/timespec.h> +#include <time.h> + +void +gl_timespec_get(struct timespec *ts); +void +utime_update_attribute_flags(call_frame_t *frame, xlator_t *this, + glusterfs_fop_t fop); + +#endif /* _UTIME_HELPERS_H */ diff --git a/xlators/features/utime/src/utime-mem-types.h b/xlators/features/utime/src/utime-mem-types.h new file mode 100644 index 00000000000..ad1255f85f3 --- /dev/null +++ b/xlators/features/utime/src/utime-mem-types.h @@ -0,0 +1,21 @@ +/* + Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __UTIME_MEM_TYPES_H__ +#define __UTIME_MEM_TYPES_H__ + +#include <glusterfs/mem-types.h> + +enum gf_utime_mem_types_ { + utime_mt_utime_t = gf_common_mt_end + 1, + utime_mt_end +}; + +#endif /* __UTIME_MEM_TYPES_H__ */ diff --git a/xlators/features/utime/src/utime-messages.h b/xlators/features/utime/src/utime-messages.h new file mode 100644 index 00000000000..bd40265abaf --- /dev/null +++ b/xlators/features/utime/src/utime-messages.h @@ -0,0 +1,29 @@ +/* + Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __UTIME_MESSAGES_H__ +#define __UTIME_MESSAGES_H__ + +#include <glusterfs/glfs-message-id.h> + +/* To add new message IDs, append new identifiers at the end of the list. + * + * Never remove a message ID. If it's not used anymore, you can rename it or + * leave it as it is, but not delete it. This is to prevent reutilization of + * IDs by other messages. + * + * The component name must match one of the entries defined in + * glfs-message-id.h. + */ + +GLFS_MSGID(UTIME, UTIME_MSG_NO_MEMORY, UTIME_MSG_SET_MDATA_FAILED, + UTIME_MSG_DICT_SET_FAILED); + +#endif /* __UTIME_MESSAGES_H__ */ diff --git a/xlators/features/utime/src/utime.c b/xlators/features/utime/src/utime.c new file mode 100644 index 00000000000..2acc63e6a05 --- /dev/null +++ b/xlators/features/utime/src/utime.c @@ -0,0 +1,392 @@ +/* + Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include "utime.h" +#include "utime-helpers.h" +#include "utime-messages.h" +#include "utime-mem-types.h" +#include <glusterfs/call-stub.h> + +int32_t +gf_utime_invalidate(xlator_t *this, inode_t *inode) +{ + return 0; +} + +int32_t +gf_utime_forget(xlator_t *this, inode_t *inode) +{ + return 0; +} + +int32_t +gf_utime_client_destroy(xlator_t *this, client_t *client) +{ + return 0; +} + +void +gf_utime_ictxmerge(xlator_t *this, fd_t *fd, inode_t *inode, + inode_t *linked_inode) +{ + return; +} + +int32_t +gf_utime_release(xlator_t *this, fd_t *fd) +{ + return 0; +} + +int32_t +gf_utime_releasedir(xlator_t *this, fd_t *fd) +{ + return 0; +} + +int32_t +gf_utime_client_disconnect(xlator_t *this, client_t *client) +{ + return 0; +} + +int32_t +gf_utime_fdctx_to_dict(xlator_t *this, fd_t *fd, dict_t *dict) +{ + return 0; +} + +int32_t +gf_utime_inode(xlator_t *this) +{ + return 0; +} + +int32_t +gf_utime_inode_to_dict(xlator_t *this, dict_t *dict) +{ + return 0; +} + +int32_t +gf_utime_history(xlator_t *this) +{ + return 0; +} + +int32_t +gf_utime_fd(xlator_t *this) +{ + return 0; +} + +int32_t +gf_utime_fd_to_dict(xlator_t *this, dict_t *dict) +{ + return 0; +} + +int32_t +gf_utime_fdctx(xlator_t *this, fd_t *fd) +{ + return 0; +} + +int32_t +gf_utime_inodectx(xlator_t *this, inode_t *ino) +{ + return 0; +} + +int32_t +gf_utime_inodectx_to_dict(xlator_t *this, inode_t *ino, dict_t *dict) +{ + return 0; +} + +int32_t +gf_utime_priv_to_dict(xlator_t *this, dict_t *dict, char *brickname) +{ + return 0; +} + +int32_t +gf_utime_priv(xlator_t *this) +{ + return 0; +} + +int32_t +mem_acct_init(xlator_t *this) +{ + if (xlator_mem_acct_init(this, utime_mt_end + 1) != 0) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, UTIME_MSG_NO_MEMORY, + "Memory accounting initialization failed."); + return -1; + } + return 0; +} + +int32_t +gf_utime_set_mdata_setxattr_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int op_ret, int op_errno, + dict_t *xdata) +{ + call_stub_t *stub = frame->local; + /* Don't fail lookup if mdata setxattr fails */ + if (op_ret) { + gf_msg(this->name, GF_LOG_ERROR, op_errno, UTIME_MSG_SET_MDATA_FAILED, + "dict set of key for set-ctime-mdata failed"); + } + frame->local = NULL; + call_resume(stub); + STACK_DESTROY(frame->root); + return 0; +} + +int32_t +gf_utime_set_mdata_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *stbuf, dict_t *xdata, + struct iatt *postparent) +{ + dict_t *dict = NULL; + struct mdata_iatt *mdata = NULL; + int ret = 0; + loc_t loc = { + 0, + }; + call_frame_t *new_frame = NULL; + + if (!op_ret && dict_get(xdata, GF_XATTR_MDATA_KEY) == NULL) { + dict = dict_new(); + if (!dict) { + op_errno = ENOMEM; + goto err; + } + mdata = GF_MALLOC(sizeof(struct mdata_iatt), gf_common_mt_char); + if (mdata == NULL) { + op_errno = ENOMEM; + goto err; + } + iatt_to_mdata(mdata, stbuf); + ret = dict_set_mdata(dict, CTIME_MDATA_XDATA_KEY, mdata, _gf_false); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, UTIME_MSG_NO_MEMORY, + "dict set of key for set-ctime-mdata failed"); + goto err; + } + new_frame = copy_frame(frame); + if (!new_frame) { + op_errno = ENOMEM; + goto stub_err; + } + + new_frame->local = fop_lookup_cbk_stub(frame, default_lookup_cbk, + op_ret, op_errno, inode, stbuf, + xdata, postparent); + if (!new_frame->local) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, UTIME_MSG_NO_MEMORY, + "lookup_cbk stub allocation failed"); + op_errno = ENOMEM; + STACK_DESTROY(new_frame->root); + goto stub_err; + } + + loc.inode = inode_ref(inode); + gf_uuid_copy(loc.gfid, stbuf->ia_gfid); + + new_frame->root->uid = 0; + new_frame->root->gid = 0; + new_frame->root->pid = GF_CLIENT_PID_SET_UTIME; + STACK_WIND(new_frame, gf_utime_set_mdata_setxattr_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr, &loc, + dict, 0, NULL); + + dict_unref(dict); + inode_unref(loc.inode); + return 0; + } + + STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, stbuf, xdata, + postparent); + return 0; + +err: + if (mdata) { + GF_FREE(mdata); + } +stub_err: + if (dict) { + dict_unref(dict); + } + STACK_UNWIND_STRICT(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL); + return 0; +} + +int +gf_utime_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +{ + int op_errno = EINVAL; + int ret = -1; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(this, err); + VALIDATE_OR_GOTO(loc, err); + VALIDATE_OR_GOTO(loc->inode, err); + + xdata = xdata ? dict_ref(xdata) : dict_new(); + if (!xdata) { + op_errno = ENOMEM; + goto err; + } + + ret = dict_set_int8(xdata, GF_XATTR_MDATA_KEY, 1); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, -ret, UTIME_MSG_DICT_SET_FAILED, + "%s: Unable to set dict value for %s", loc->path, + GF_XATTR_MDATA_KEY); + op_errno = -ret; + goto free_dict; + } + + STACK_WIND(frame, gf_utime_set_mdata_lookup_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, loc, xdata); + dict_unref(xdata); + return 0; + +free_dict: + dict_unref(xdata); +err: + STACK_UNWIND_STRICT(lookup, frame, ret, op_errno, NULL, NULL, NULL, NULL); + return 0; +} + +int32_t +init(xlator_t *this) +{ + utime_priv_t *utime = NULL; + + utime = GF_MALLOC(sizeof(*utime), utime_mt_utime_t); + if (utime == NULL) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, UTIME_MSG_NO_MEMORY, + "Failed to allocate private memory."); + return -1; + } + memset(utime, 0, sizeof(*utime)); + + this->private = utime; + GF_OPTION_INIT("noatime", utime->noatime, bool, err); + + return 0; +err: + return -1; +} + +void +fini(xlator_t *this) +{ + utime_priv_t *utime = NULL; + + utime = this->private; + GF_FREE(utime); + return; +} + +int32_t +reconfigure(xlator_t *this, dict_t *options) +{ + utime_priv_t *utime = this->private; + + GF_OPTION_RECONF("noatime", utime->noatime, options, bool, err); + + return 0; +err: + return -1; +} + +int +notify(xlator_t *this, int event, void *data, ...) +{ + return default_notify(this, event, data); +} + +struct xlator_fops fops = { + .rename = gf_utime_rename, + .mknod = gf_utime_mknod, + .readv = gf_utime_readv, + .fremovexattr = gf_utime_fremovexattr, + .open = gf_utime_open, + .create = gf_utime_create, + .mkdir = gf_utime_mkdir, + .writev = gf_utime_writev, + .rmdir = gf_utime_rmdir, + .fallocate = gf_utime_fallocate, + .truncate = gf_utime_truncate, + .symlink = gf_utime_symlink, + .zerofill = gf_utime_zerofill, + .link = gf_utime_link, + .ftruncate = gf_utime_ftruncate, + .unlink = gf_utime_unlink, + .setattr = gf_utime_setattr, + .fsetattr = gf_utime_fsetattr, + .opendir = gf_utime_opendir, + .removexattr = gf_utime_removexattr, + .lookup = gf_utime_lookup, +}; +struct xlator_cbks cbks = { + .invalidate = gf_utime_invalidate, + .forget = gf_utime_forget, + .client_destroy = gf_utime_client_destroy, + .ictxmerge = gf_utime_ictxmerge, + .release = gf_utime_release, + .releasedir = gf_utime_releasedir, + .client_disconnect = gf_utime_client_disconnect, +}; +struct xlator_dumpops dumpops = { + .fdctx_to_dict = gf_utime_fdctx_to_dict, + .inode = gf_utime_inode, + .inode_to_dict = gf_utime_inode_to_dict, + .history = gf_utime_history, + .fd = gf_utime_fd, + .fd_to_dict = gf_utime_fd_to_dict, + .fdctx = gf_utime_fdctx, + .inodectx = gf_utime_inodectx, + .inodectx_to_dict = gf_utime_inodectx_to_dict, + .priv_to_dict = gf_utime_priv_to_dict, + .priv = gf_utime_priv, +}; + +struct volume_options options[] = { + {.key = {"noatime"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "on", + .op_version = {GD_OP_VERSION_5_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC, + .tags = {"ctime"}, + .description = "Enable/Disable atime updation when ctime feature is " + "enabled. When noatime is on, atime is not updated with " + "ctime feature enabled and vice versa."}, + {.key = {NULL}}}; + +xlator_api_t xlator_api = { + .init = init, + .fini = fini, + .notify = notify, + .reconfigure = reconfigure, + .mem_acct_init = mem_acct_init, + .op_version = {GD_OP_VERSION_5_0}, + .dumpops = &dumpops, + .fops = &fops, + .cbks = &cbks, + .options = options, + .identifier = "utime", + .category = GF_MAINTAINED, +}; diff --git a/xlators/features/utime/src/utime.h b/xlators/features/utime/src/utime.h new file mode 100644 index 00000000000..ba55eec00de --- /dev/null +++ b/xlators/features/utime/src/utime.h @@ -0,0 +1,23 @@ +/* + Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __UTIME_H__ +#define __UTIME_H__ + +#include <glusterfs/glusterfs.h> +#include <glusterfs/xlator.h> +#include <glusterfs/defaults.h> +#include "utime-autogen-fops.h" + +typedef struct utime_priv { + gf_boolean_t noatime; +} utime_priv_t; + +#endif /* __UTIME_H__ */ |
