summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAmar Tumballi <amarts@redhat.com>2018-12-06 12:29:25 +0530
committerAmar Tumballi <amarts@redhat.com>2018-12-13 17:10:00 +0000
commit8293d21280fd6ddfc9bb54068cf87794fc6be207 (patch)
tree39729fb407b436ed0cc3e4a9f4e5bbd29036a9db
parentaf7e957b4954bd84b8f7df6bfbd59c939092ead2 (diff)
all: remove code which is not being considered in build
These xlators are now removed from build as per discussion/announcement done at https://lists.gluster.org/pipermail/gluster-users/2018-July/034400.html * move rot-13 to playground, as it is used only as demo purpose, and is documented in many places. * Removed code of below xlators: - cluster/stripe - cluster/tier - features/changetimerecorder - features/glupy - performance/symlink-cache - encryption/crypt - storage/bd - experimental/posix2 - experimental/dht2 - experimental/fdl - experimental/jbr updates: bz#1635688 Change-Id: I1d2d63c32535e149bc8dcb2daa76236c707996e8 Signed-off-by: Amar Tumballi <amarts@redhat.com>
-rw-r--r--xlators/cluster/dht/src/tier-common.c1199
-rw-r--r--xlators/cluster/dht/src/tier-common.h55
-rw-r--r--xlators/cluster/dht/src/tier.c3090
-rw-r--r--xlators/cluster/dht/src/tier.h110
-rw-r--r--xlators/cluster/dht/src/tier.sym9
-rw-r--r--xlators/cluster/stripe/src/Makefile.am22
-rw-r--r--xlators/cluster/stripe/src/stripe-helpers.c658
-rw-r--r--xlators/cluster/stripe/src/stripe-mem-types.h29
-rw-r--r--xlators/cluster/stripe/src/stripe.c5612
-rw-r--r--xlators/cluster/stripe/src/stripe.h291
-rw-r--r--xlators/encryption/Makefile.am3
-rw-r--r--xlators/encryption/crypt/Makefile.am3
-rw-r--r--xlators/encryption/crypt/src/Makefile.am26
-rw-r--r--xlators/encryption/crypt/src/atom.c861
-rw-r--r--xlators/encryption/crypt/src/crypt-common.h133
-rw-r--r--xlators/encryption/crypt/src/crypt-mem-types.h41
-rw-r--r--xlators/encryption/crypt/src/crypt.c3906
-rw-r--r--xlators/encryption/crypt/src/crypt.h931
-rw-r--r--xlators/encryption/crypt/src/data.c715
-rw-r--r--xlators/encryption/crypt/src/keys.c284
-rw-r--r--xlators/encryption/crypt/src/metadata.c575
-rw-r--r--xlators/encryption/crypt/src/metadata.h79
-rw-r--r--xlators/encryption/rot-13/Makefile.am3
-rw-r--r--xlators/experimental/Makefile.am3
-rw-r--r--xlators/experimental/README.md107
-rw-r--r--xlators/experimental/dht2/Makefile.am3
-rw-r--r--xlators/experimental/dht2/README.md47
-rw-r--r--xlators/experimental/dht2/TODO.md3
-rw-r--r--xlators/experimental/dht2/dht2-client/Makefile.am3
-rw-r--r--xlators/experimental/dht2/dht2-client/src/Makefile.am21
-rw-r--r--xlators/experimental/dht2/dht2-client/src/dht2-client-main.c58
-rw-r--r--xlators/experimental/dht2/dht2-common/src/dht2-common-map.c19
-rw-r--r--xlators/experimental/dht2/dht2-server/Makefile.am3
-rw-r--r--xlators/experimental/dht2/dht2-server/src/Makefile.am23
-rw-r--r--xlators/experimental/dht2/dht2-server/src/dht2-server-main.c58
-rw-r--r--xlators/experimental/fdl/Makefile.am3
-rw-r--r--xlators/experimental/fdl/src/Makefile.am48
-rw-r--r--xlators/experimental/fdl/src/dump-tmpl.c.in177
-rw-r--r--xlators/experimental/fdl/src/fdl-tmpl.c.in513
-rw-r--r--xlators/experimental/fdl/src/fdl.h30
-rwxr-xr-xxlators/experimental/fdl/src/gen_dumper.py117
-rwxr-xr-xxlators/experimental/fdl/src/gen_fdl.py354
-rwxr-xr-xxlators/experimental/fdl/src/gen_recon.py218
-rw-r--r--xlators/experimental/fdl/src/logdump.c51
-rw-r--r--xlators/experimental/fdl/src/recon-tmpl.c.in297
-rw-r--r--xlators/experimental/fdl/src/recon.c89
-rw-r--r--xlators/experimental/jbr-client/Makefile.am3
-rw-r--r--xlators/experimental/jbr-client/src/Makefile.am34
-rw-r--r--xlators/experimental/jbr-client/src/fop-template.c.in102
-rwxr-xr-xxlators/experimental/jbr-client/src/gen-fops.py58
-rw-r--r--xlators/experimental/jbr-client/src/jbr-messages.h30
-rw-r--r--xlators/experimental/jbr-client/src/jbrc.c311
-rw-r--r--xlators/experimental/jbr-client/src/jbrc.h27
-rw-r--r--xlators/experimental/jbr-server/Makefile.am3
-rw-r--r--xlators/experimental/jbr-server/src/Makefile.am39
-rw-r--r--xlators/experimental/jbr-server/src/all-templates.c.in501
-rwxr-xr-xxlators/experimental/jbr-server/src/gen-fops.py181
-rw-r--r--xlators/experimental/jbr-server/src/jbr-internal.h118
-rw-r--r--xlators/experimental/jbr-server/src/jbr.c1676
-rw-r--r--xlators/experimental/posix2/Makefile.am3
-rw-r--r--xlators/experimental/posix2/README.md7
-rw-r--r--xlators/experimental/posix2/TODO.md3
-rw-r--r--xlators/experimental/posix2/common/Makefile.am3
-rw-r--r--xlators/experimental/posix2/common/src/Makefile.am16
-rw-r--r--xlators/experimental/posix2/common/src/posix2-common.c18
-rw-r--r--xlators/experimental/posix2/ds/Makefile.am3
-rw-r--r--xlators/experimental/posix2/ds/src/Makefile.am22
-rw-r--r--xlators/experimental/posix2/ds/src/posix2-ds-main.c56
-rw-r--r--xlators/experimental/posix2/mds/Makefile.am3
-rw-r--r--xlators/experimental/posix2/mds/src/Makefile.am22
-rw-r--r--xlators/experimental/posix2/mds/src/posix2-mds-main.c56
-rw-r--r--xlators/features/changetimerecorder/Makefile.am3
-rw-r--r--xlators/features/changetimerecorder/src/Makefile.am26
-rw-r--r--xlators/features/changetimerecorder/src/changetimerecorder.c2357
-rw-r--r--xlators/features/changetimerecorder/src/changetimerecorder.h21
-rw-r--r--xlators/features/changetimerecorder/src/ctr-helper.c293
-rw-r--r--xlators/features/changetimerecorder/src/ctr-helper.h854
-rw-r--r--xlators/features/changetimerecorder/src/ctr-messages.h61
-rw-r--r--xlators/features/changetimerecorder/src/ctr-xlator-ctx.c362
-rw-r--r--xlators/features/changetimerecorder/src/ctr-xlator-ctx.h68
-rw-r--r--xlators/features/changetimerecorder/src/ctr_mem_types.h22
-rw-r--r--xlators/features/glupy/Makefile.am3
-rw-r--r--xlators/features/glupy/doc/README.md44
-rw-r--r--xlators/features/glupy/doc/TESTING9
-rw-r--r--xlators/features/glupy/doc/test.vol10
-rw-r--r--xlators/features/glupy/examples/Makefile.am5
-rw-r--r--xlators/features/glupy/examples/debug-trace.py777
-rw-r--r--xlators/features/glupy/examples/helloworld.py21
-rw-r--r--xlators/features/glupy/examples/negative.py93
-rw-r--r--xlators/features/glupy/src/Makefile.am36
-rw-r--r--xlators/features/glupy/src/__init__.py.in2
-rw-r--r--xlators/features/glupy/src/glupy.c2446
-rw-r--r--xlators/features/glupy/src/glupy.h56
-rw-r--r--xlators/features/glupy/src/glupy.sym101
-rw-r--r--xlators/features/glupy/src/glupy/Makefile.am5
-rw-r--r--xlators/features/glupy/src/glupy/__init__.py852
-rw-r--r--xlators/features/glupy/src/setup.py.in24
-rw-r--r--xlators/performance/symlink-cache/Makefile.am3
-rw-r--r--xlators/performance/symlink-cache/src/Makefile.am16
-rw-r--r--xlators/performance/symlink-cache/src/symlink-cache-messages.h30
-rw-r--r--xlators/performance/symlink-cache/src/symlink-cache.c368
-rw-r--r--xlators/playground/rot-13/Makefile.am (renamed from xlators/cluster/stripe/Makefile.am)0
-rw-r--r--xlators/playground/rot-13/src/Makefile.am (renamed from xlators/encryption/rot-13/src/Makefile.am)0
-rw-r--r--xlators/playground/rot-13/src/rot-13.c (renamed from xlators/encryption/rot-13/src/rot-13.c)0
-rw-r--r--xlators/playground/rot-13/src/rot-13.h (renamed from xlators/encryption/rot-13/src/rot-13.h)0
-rw-r--r--xlators/storage/bd/Makefile.am3
-rw-r--r--xlators/storage/bd/src/Makefile.am21
-rw-r--r--xlators/storage/bd/src/bd-aio.c518
-rw-r--r--xlators/storage/bd/src/bd-aio.h40
-rw-r--r--xlators/storage/bd/src/bd-helper.c1073
-rw-r--r--xlators/storage/bd/src/bd-mem-types.h26
-rw-r--r--xlators/storage/bd/src/bd.c2426
-rw-r--r--xlators/storage/bd/src/bd.h189
113 files changed, 0 insertions, 37439 deletions
diff --git a/xlators/cluster/dht/src/tier-common.c b/xlators/cluster/dht/src/tier-common.c
deleted file mode 100644
index b22f477..0000000
--- a/xlators/cluster/dht/src/tier-common.c
+++ /dev/null
@@ -1,1199 +0,0 @@
-/*
- Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#include <glusterfs/glusterfs.h>
-#include <glusterfs/xlator.h>
-#include "libxlator.h"
-#include "dht-common.h"
-#include <glusterfs/defaults.h>
-#include "tier-common.h"
-#include "tier.h"
-
-int
-dht_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata);
-
-int
-tier_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
-{
- dht_local_t *local = NULL;
- loc_t *oldloc = NULL;
- loc_t *newloc = NULL;
-
- local = frame->local;
-
- oldloc = &local->loc;
- newloc = &local->loc2;
-
- if (op_ret == -1) {
- /* No continuation on DHT inode missing errors, as we should
- * then have a good stbuf that states P2 happened. We would
- * get inode missing if, the file completed migrated between
- * the lookup and the link call */
- goto out;
- }
-
- if (local->call_cnt != 1) {
- goto out;
- }
-
- local->call_cnt = 2;
-
- /* Do this on the hot tier now */
-
- STACK_WIND(frame, tier_link_cbk, local->cached_subvol,
- local->cached_subvol->fops->link, oldloc, newloc, xdata);
-
- return 0;
-
-out:
- DHT_STRIP_PHASE1_FLAGS(stbuf);
-
- DHT_STACK_UNWIND(link, frame, op_ret, op_errno, inode, stbuf, preparent,
- postparent, NULL);
-
- return 0;
-}
-
-int
-tier_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
- dict_t *xdata)
-{
- xlator_t *cached_subvol = NULL;
- xlator_t *hashed_subvol = NULL;
- int op_errno = -1;
- int ret = -1;
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(oldloc, err);
- VALIDATE_OR_GOTO(newloc, err);
-
- conf = this->private;
-
- local = dht_local_init(frame, oldloc, NULL, GF_FOP_LINK);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->call_cnt = 1;
-
- cached_subvol = local->cached_subvol;
-
- if (!cached_subvol) {
- gf_msg_debug(this->name, 0, "no cached subvolume for path=%s",
- oldloc->path);
- op_errno = ENOENT;
- goto err;
- }
-
- hashed_subvol = TIER_HASHED_SUBVOL;
-
- ret = loc_copy(&local->loc2, newloc);
- if (ret == -1) {
- op_errno = ENOMEM;
- goto err;
- }
-
- if (hashed_subvol == cached_subvol) {
- STACK_WIND(frame, dht_link_cbk, cached_subvol,
- cached_subvol->fops->link, oldloc, newloc, xdata);
- return 0;
- }
-
- /* Create hardlinks to both the data file on the hot tier
- and the linkto file on the cold tier */
-
- gf_uuid_copy(local->gfid, oldloc->inode->gfid);
-
- STACK_WIND(frame, tier_link_cbk, hashed_subvol, hashed_subvol->fops->link,
- oldloc, newloc, xdata);
-
- return 0;
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND(link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
- return 0;
-}
-
-int
-tier_create_unlink_stale_linkto_cbk(call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- dht_local_t *local = NULL;
-
- local = frame->local;
-
- if (local->params) {
- dict_del(local->params, GLUSTERFS_INTERNAL_FOP_KEY);
- }
-
- DHT_STACK_UNWIND(create, frame, -1, local->op_errno, NULL, NULL, NULL, NULL,
- NULL, NULL);
-
- return 0;
-}
-
-int
-tier_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, fd_t *fd, inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
-{
- xlator_t *prev = NULL;
- int ret = -1;
- dht_local_t *local = NULL;
- xlator_t *hashed_subvol = NULL;
- dht_conf_t *conf = NULL;
-
- local = frame->local;
- conf = this->private;
-
- hashed_subvol = TIER_HASHED_SUBVOL;
-
- if (!local) {
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
-
- if (op_ret == -1) {
- if (local->linked == _gf_true && local->xattr_req) {
- local->op_errno = op_errno;
- local->op_ret = op_ret;
- ret = dht_fill_dict_to_avoid_unlink_of_migrating_file(
- local->xattr_req);
- if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value to "
- "unlink of migrating file");
- goto out;
- }
-
- STACK_WIND(frame, tier_create_unlink_stale_linkto_cbk,
- hashed_subvol, hashed_subvol->fops->unlink, &local->loc,
- 0, local->xattr_req);
- return 0;
- }
- goto out;
- }
-
- prev = cookie;
-
- if (local->loc.parent) {
- dht_inode_ctx_time_update(local->loc.parent, this, preparent, 0);
-
- dht_inode_ctx_time_update(local->loc.parent, this, postparent, 1);
- }
-
- ret = dht_layout_preset(this, prev, inode);
- if (ret != 0) {
- gf_msg_debug(this->name, 0, "could not set preset layout for subvol %s",
- prev->name);
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
-
- local->op_errno = op_errno;
-
- if (local->linked == _gf_true) {
- local->stbuf = *stbuf;
- dht_linkfile_attr_heal(frame, this);
- }
-out:
- if (local) {
- if (local->xattr_req) {
- dict_del(local->xattr_req, TIER_LINKFILE_GFID);
- }
- }
-
- DHT_STRIP_PHASE1_FLAGS(stbuf);
-
- DHT_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, stbuf,
- preparent, postparent, xdata);
-
- return 0;
-}
-
-int
-tier_create_linkfile_create_cbk(call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, inode_t *inode,
- struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- dht_local_t *local = NULL;
- xlator_t *cached_subvol = NULL;
- dht_conf_t *conf = NULL;
- int ret = -1;
- unsigned char *gfid = NULL;
-
- local = frame->local;
- if (!local) {
- op_errno = EINVAL;
- goto err;
- }
-
- if (op_ret == -1) {
- local->op_errno = op_errno;
- goto err;
- }
-
- conf = this->private;
- if (!conf) {
- local->op_errno = EINVAL;
- op_errno = EINVAL;
- goto err;
- }
-
- cached_subvol = TIER_UNHASHED_SUBVOL;
-
- if (local->params) {
- dict_del(local->params, conf->link_xattr_name);
- dict_del(local->params, GLUSTERFS_INTERNAL_FOP_KEY);
- }
-
- /*
- * We will delete the linkfile if data file creation fails.
- * When deleting this stale linkfile, there is a possibility
- * for a race between this linkfile deletion and a stale
- * linkfile deletion triggered by another lookup from different
- * client.
- *
- * For eg:
- *
- * Client 1 Client 2
- *
- * 1 linkfile created for foo
- *
- * 2 data file creation failed
- *
- * 3 creating a file with same name
- *
- * 4 lookup before creation deleted
- * the linkfile created by client1
- * considering as a stale linkfile.
- *
- * 5 New linkfile created for foo
- * with different gfid.
- *
- * 6 Trigger linkfile deletion as
- * data file creation failed.
- *
- * 7 Linkfile deleted which is
- * created by client2.
- *
- * 8 Data file created.
- *
- * With this race, we will end up having a file in a non-hashed subvol
- * without a linkfile in hashed subvol.
- *
- * To avoid this, we store the gfid of linkfile created by client, So
- * If we delete the linkfile , we validate gfid of existing file with
- * stored value from posix layer.
- *
- * Storing this value in local->xattr_req as local->params was also used
- * to create the data file. During the linkfile deletion we will use
- * local->xattr_req dictionary.
- */
- if (!local->xattr_req) {
- local->xattr_req = dict_new();
- if (!local->xattr_req) {
- local->op_errno = ENOMEM;
- op_errno = ENOMEM;
- goto err;
- }
- }
-
- gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_char);
- if (!gfid) {
- local->op_errno = ENOMEM;
- op_errno = ENOMEM;
- goto err;
- }
-
- gf_uuid_copy(gfid, stbuf->ia_gfid);
- ret = dict_set_dynptr(local->xattr_req, TIER_LINKFILE_GFID, gfid,
- sizeof(uuid_t));
- if (ret) {
- GF_FREE(gfid);
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value"
- " : key = %s",
- TIER_LINKFILE_GFID);
- }
-
- STACK_WIND_COOKIE(frame, tier_create_cbk, cached_subvol, cached_subvol,
- cached_subvol->fops->create, &local->loc, local->flags,
- local->mode, local->umask, local->fd, local->params);
-
- return 0;
-err:
- DHT_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL,
- NULL);
- return 0;
-}
-
-gf_boolean_t
-tier_is_hot_tier_decommissioned(xlator_t *this)
-{
- dht_conf_t *conf = NULL;
- xlator_t *hot_tier = NULL;
- int i = 0;
-
- conf = this->private;
- hot_tier = conf->subvolumes[1];
-
- if (conf->decommission_subvols_cnt) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (conf->decommissioned_bricks[i] &&
- conf->decommissioned_bricks[i] == hot_tier)
- return _gf_true;
- }
- }
-
- return _gf_false;
-}
-
-int
-tier_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, mode_t umask, fd_t *fd, dict_t *params)
-{
- int op_errno = -1;
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- xlator_t *hot_subvol = NULL;
- xlator_t *cold_subvol = NULL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
-
- conf = this->private;
-
- dht_get_du_info(frame, this, loc);
-
- local = dht_local_init(frame, loc, fd, GF_FOP_CREATE);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- cold_subvol = TIER_HASHED_SUBVOL;
- hot_subvol = TIER_UNHASHED_SUBVOL;
-
- if (conf->subvolumes[0] != cold_subvol) {
- hot_subvol = conf->subvolumes[0];
- }
- /*
- * if hot tier full, write to cold.
- * Also if hot tier is full, create in cold
- */
- if (dht_is_subvol_filled(this, hot_subvol) ||
- tier_is_hot_tier_decommissioned(this)) {
- gf_msg_debug(this->name, 0, "creating %s on %s", loc->path,
- cold_subvol->name);
-
- STACK_WIND_COOKIE(frame, tier_create_cbk, cold_subvol, cold_subvol,
- cold_subvol->fops->create, loc, flags, mode, umask,
- fd, params);
- } else {
- local->params = dict_ref(params);
- local->flags = flags;
- local->mode = mode;
- local->umask = umask;
- local->cached_subvol = hot_subvol;
- local->hashed_subvol = cold_subvol;
-
- gf_msg_debug(this->name, 0, "creating %s on %s (link at %s)", loc->path,
- hot_subvol->name, cold_subvol->name);
-
- dht_linkfile_create(frame, tier_create_linkfile_create_cbk, this,
- hot_subvol, cold_subvol, loc);
-
- goto out;
- }
-out:
- return 0;
-
-err:
-
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL,
- NULL);
-
- return 0;
-}
-
-int
-tier_unlink_nonhashed_linkfile_cbk(call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- dht_local_t *local = NULL;
- xlator_t *prev = NULL;
-
- local = frame->local;
- prev = cookie;
-
- LOCK(&frame->lock);
- {
- if ((op_ret == -1) && (op_errno != ENOENT)) {
- local->op_errno = op_errno;
- local->op_ret = op_ret;
- gf_msg_debug(this->name, op_errno,
- "Unlink link: subvolume %s"
- " returned -1",
- prev->name);
- goto unlock;
- }
-
- local->op_ret = 0;
- }
-unlock:
- UNLOCK(&frame->lock);
-
- if (local->op_ret == -1)
- goto err;
- DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
- &local->preparent, &local->postparent, NULL);
-
- return 0;
-
-err:
- DHT_STACK_UNWIND(unlink, frame, -1, local->op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int
-tier_unlink_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, inode_t *inode,
- struct iatt *preparent, dict_t *xdata,
- struct iatt *postparent)
-{
- dht_local_t *local = NULL;
- xlator_t *prev = NULL;
- dht_conf_t *conf = NULL;
- xlator_t *hot_subvol = NULL;
-
- local = frame->local;
- prev = cookie;
- conf = this->private;
- hot_subvol = TIER_UNHASHED_SUBVOL;
-
- if (!op_ret) {
- /*
- * linkfile present on hot tier. unlinking the linkfile
- */
- STACK_WIND_COOKIE(frame, tier_unlink_nonhashed_linkfile_cbk, hot_subvol,
- hot_subvol, hot_subvol->fops->unlink, &local->loc,
- local->flags, NULL);
- return 0;
- }
-
- LOCK(&frame->lock);
- {
- if (op_errno == ENOENT) {
- local->op_ret = 0;
- local->op_errno = op_errno;
- } else {
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- }
- gf_msg_debug(this->name, op_errno, "Lookup : subvolume %s returned -1",
- prev->name);
- }
-
- UNLOCK(&frame->lock);
-
- DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
- &local->preparent, &local->postparent, xdata);
-
- return 0;
-}
-
-int
-tier_unlink_linkfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- dht_local_t *local = NULL;
- xlator_t *prev = NULL;
-
- local = frame->local;
- prev = cookie;
-
- LOCK(&frame->lock);
- {
- /* Ignore EINVAL for tier to ignore error when the file
- does not exist on the other tier */
- if ((op_ret == -1) && !((op_errno == ENOENT) || (op_errno == EINVAL))) {
- local->op_errno = op_errno;
- local->op_ret = op_ret;
- gf_msg_debug(this->name, op_errno,
- "Unlink link: subvolume %s"
- " returned -1",
- prev->name);
- goto unlock;
- }
-
- local->op_ret = 0;
- }
-unlock:
- UNLOCK(&frame->lock);
-
- if (local->op_ret == -1)
- goto err;
-
- DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
- &local->preparent, &local->postparent, xdata);
-
- return 0;
-
-err:
- DHT_STACK_UNWIND(unlink, frame, -1, local->op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-tier_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
-{
- dht_local_t *local = NULL;
- xlator_t *prev = NULL;
- struct iatt *stbuf = NULL;
- dht_conf_t *conf = NULL;
- int ret = -1;
- xlator_t *hot_tier = NULL;
- xlator_t *cold_tier = NULL;
-
- local = frame->local;
- prev = cookie;
- conf = this->private;
-
- cold_tier = TIER_HASHED_SUBVOL;
- hot_tier = TIER_UNHASHED_SUBVOL;
-
- LOCK(&frame->lock);
- {
- if (op_ret == -1) {
- if (op_errno == ENOENT) {
- local->op_ret = 0;
- } else {
- local->op_ret = -1;
- local->op_errno = op_errno;
- }
- gf_msg_debug(this->name, op_errno,
- "Unlink: subvolume %s returned -1"
- " with errno = %d",
- prev->name, op_errno);
- goto unlock;
- }
-
- local->op_ret = 0;
-
- local->postparent = *postparent;
- local->preparent = *preparent;
-
- if (local->loc.parent) {
- dht_inode_ctx_time_update(local->loc.parent, this,
- &local->preparent, 0);
- dht_inode_ctx_time_update(local->loc.parent, this,
- &local->postparent, 1);
- }
- }
-unlock:
- UNLOCK(&frame->lock);
-
- if (local->op_ret)
- goto out;
-
- if (cold_tier != local->cached_subvol) {
- /*
- * File is present in hot tier, so there will be
- * a link file on cold tier, deleting the linkfile
- * from cold tier
- */
- STACK_WIND_COOKIE(frame, tier_unlink_linkfile_cbk, cold_tier, cold_tier,
- cold_tier->fops->unlink, &local->loc, local->flags,
- xdata);
- return 0;
- }
-
- ret = dict_get_bin(xdata, DHT_IATT_IN_XDATA_KEY, (void **)&stbuf);
- if (!ret && stbuf &&
- ((IS_DHT_MIGRATION_PHASE2(stbuf)) || IS_DHT_MIGRATION_PHASE1(stbuf))) {
- /*
- * File is migrating from cold to hot tier.
- * Delete the destination linkfile.
- */
- STACK_WIND_COOKIE(frame, tier_unlink_lookup_cbk, hot_tier, hot_tier,
- hot_tier->fops->lookup, &local->loc, NULL);
- return 0;
- }
-
-out:
- DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
- &local->preparent, &local->postparent, xdata);
-
- return 0;
-}
-
-int
-tier_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
- dict_t *xdata)
-{
- xlator_t *cached_subvol = NULL;
- xlator_t *hashed_subvol = NULL;
- dht_conf_t *conf = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
- int ret = -1;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
-
- conf = this->private;
-
- local = dht_local_init(frame, loc, NULL, GF_FOP_UNLINK);
- if (!local) {
- op_errno = ENOMEM;
-
- goto err;
- }
-
- hashed_subvol = TIER_HASHED_SUBVOL;
-
- cached_subvol = local->cached_subvol;
- if (!cached_subvol) {
- gf_msg_debug(this->name, 0, "no cached subvolume for path=%s",
- loc->path);
- op_errno = EINVAL;
- goto err;
- }
-
- local->flags = xflag;
- if (IA_ISREG(loc->inode->ia_type) && (hashed_subvol == cached_subvol)) {
- /*
- * File resides in cold tier. We need to stat
- * the file to see if it is being promoted.
- * If yes we need to delete the destination
- * file as well.
- *
- * Currently we are doing this check only for
- * regular files.
- */
- xdata = xdata ? dict_ref(xdata) : dict_new();
- if (xdata) {
- ret = dict_set_int8(xdata, DHT_IATT_IN_XDATA_KEY, 1);
- if (ret) {
- gf_msg_debug(this->name, 0, "Failed to set dictionary key %s",
- DHT_IATT_IN_XDATA_KEY);
- }
- }
- }
-
- /*
- * File is on hot tier, delete the data file first, then
- * linkfile from cold.
- */
- STACK_WIND_COOKIE(frame, tier_unlink_cbk, cached_subvol, cached_subvol,
- cached_subvol->fops->unlink, loc, xflag, xdata);
- if (xdata)
- dict_unref(xdata);
- return 0;
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND(unlink, frame, -1, op_errno, NULL, NULL, NULL);
-
- return 0;
-}
-
-int
-tier_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, gf_dirent_t *orig_entries, dict_t *xdata)
-{
- gf_dirent_t entries;
- gf_dirent_t *orig_entry = NULL;
- gf_dirent_t *entry = NULL;
- int count = 0;
-
- INIT_LIST_HEAD(&entries.list);
-
- if (op_ret < 0)
- goto unwind;
-
- list_for_each_entry(orig_entry, (&orig_entries->list), list)
- {
- entry = gf_dirent_for_name(orig_entry->d_name);
- if (!entry) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
- "Memory allocation failed ");
- goto unwind;
- }
-
- entry->d_off = orig_entry->d_off;
- entry->d_ino = orig_entry->d_ino;
- entry->d_type = orig_entry->d_type;
- entry->d_len = orig_entry->d_len;
-
- list_add_tail(&entry->list, &entries.list);
- count++;
- }
- op_ret = count;
-
-unwind:
- if (op_ret < 0)
- op_ret = 0;
-
- DHT_STACK_UNWIND(readdir, frame, op_ret, op_errno, &entries, NULL);
-
- gf_dirent_free(&entries);
-
- return 0;
-}
-
-int
-tier_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, gf_dirent_t *orig_entries, dict_t *xdata)
-{
- dht_local_t *local = NULL;
- gf_dirent_t entries;
- gf_dirent_t *orig_entry = NULL;
- gf_dirent_t *entry = NULL;
- xlator_t *prev = NULL;
- xlator_t *next_subvol = NULL;
- off_t next_offset = 0;
- int count = 0;
- dht_conf_t *conf = NULL;
- int ret = 0;
- inode_table_t *itable = NULL;
- inode_t *inode = NULL;
-
- INIT_LIST_HEAD(&entries.list);
- prev = cookie;
- local = frame->local;
- itable = local->fd ? local->fd->inode->table : NULL;
-
- conf = this->private;
- GF_VALIDATE_OR_GOTO(this->name, conf, unwind);
-
- if (op_ret < 0)
- goto done;
-
- list_for_each_entry(orig_entry, (&orig_entries->list), list)
- {
- next_offset = orig_entry->d_off;
-
- if (IA_ISINVAL(orig_entry->d_stat.ia_type)) {
- /*stat failed somewhere- ignore this entry*/
- continue;
- }
-
- entry = gf_dirent_for_name(orig_entry->d_name);
- if (!entry) {
- goto unwind;
- }
-
- entry->d_off = orig_entry->d_off;
- entry->d_stat = orig_entry->d_stat;
- entry->d_ino = orig_entry->d_ino;
- entry->d_type = orig_entry->d_type;
- entry->d_len = orig_entry->d_len;
-
- if (orig_entry->dict)
- entry->dict = dict_ref(orig_entry->dict);
-
- if (check_is_linkfile(NULL, (&orig_entry->d_stat), orig_entry->dict,
- conf->link_xattr_name)) {
- goto entries;
-
- } else if (IA_ISDIR(entry->d_stat.ia_type)) {
- if (orig_entry->inode) {
- dht_inode_ctx_time_update(orig_entry->inode, this,
- &entry->d_stat, 1);
- }
- } else {
- if (orig_entry->inode) {
- ret = dht_layout_preset(this, prev, orig_entry->inode);
- if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0,
- DHT_MSG_LAYOUT_SET_FAILED,
- "failed to link the layout "
- "in inode");
-
- entry->inode = inode_ref(orig_entry->inode);
- } else if (itable) {
- /*
- * orig_entry->inode might be null if any upper
- * layer xlators below client set to null, to
- * force a lookup on the inode even if the inode
- * is present in the inode table. In that case
- * we just update the ctx to make sure we didn't
- * missed anything.
- */
- inode = inode_find(itable, orig_entry->d_stat.ia_gfid);
- if (inode) {
- ret = dht_layout_preset(this, TIER_HASHED_SUBVOL, inode);
- if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0,
- DHT_MSG_LAYOUT_SET_FAILED,
- "failed to link the layout"
- " in inode");
- inode_unref(inode);
- inode = NULL;
- }
- }
- }
-
- entries:
- list_add_tail(&entry->list, &entries.list);
- count++;
- }
- op_ret = count;
-
-done:
- if (count == 0) {
- /* non-zero next_offset means that
- EOF is not yet hit on the current subvol
- */
- if (next_offset != 0) {
- next_subvol = prev;
- } else {
- goto unwind;
- }
-
- STACK_WIND_COOKIE(frame, tier_readdirp_cbk, next_subvol, next_subvol,
- next_subvol->fops->readdirp, local->fd, local->size,
- next_offset, local->xattr);
- return 0;
- }
-
-unwind:
- if (op_ret < 0)
- op_ret = 0;
-
- DHT_STACK_UNWIND(readdirp, frame, op_ret, op_errno, &entries, NULL);
-
- gf_dirent_free(&entries);
-
- return 0;
-}
-
-int
-tier_do_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t yoff, int whichop, dict_t *dict)
-{
- dht_local_t *local = NULL;
- int op_errno = -1;
- xlator_t *hashed_subvol = NULL;
- int ret = 0;
- dht_conf_t *conf = NULL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(fd, err);
- VALIDATE_OR_GOTO(this->private, err);
-
- conf = this->private;
-
- local = dht_local_init(frame, NULL, NULL, whichop);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- local->fd = fd_ref(fd);
- local->size = size;
- local->xattr_req = (dict) ? dict_ref(dict) : NULL;
-
- hashed_subvol = TIER_HASHED_SUBVOL;
-
- /* TODO: do proper readdir */
- if (whichop == GF_FOP_READDIRP) {
- if (dict)
- local->xattr = dict_ref(dict);
- else
- local->xattr = dict_new();
-
- if (local->xattr) {
- ret = dict_set_uint32(local->xattr, conf->link_xattr_name, 256);
- if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value"
- " : key = %s",
- conf->link_xattr_name);
- }
-
- STACK_WIND_COOKIE(frame, tier_readdirp_cbk, hashed_subvol,
- hashed_subvol, hashed_subvol->fops->readdirp, fd,
- size, yoff, local->xattr);
-
- } else {
- STACK_WIND_COOKIE(frame, tier_readdir_cbk, hashed_subvol, hashed_subvol,
- hashed_subvol->fops->readdir, fd, size, yoff,
- local->xattr);
- }
-
- return 0;
-
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND(readdir, frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-int
-tier_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t yoff, dict_t *xdata)
-{
- int op = GF_FOP_READDIR;
- dht_conf_t *conf = NULL;
- int i = 0;
-
- conf = this->private;
- if (!conf)
- goto out;
-
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (!conf->subvolume_status[i]) {
- op = GF_FOP_READDIRP;
- break;
- }
- }
-
- if (conf->use_readdirp)
- op = GF_FOP_READDIRP;
-
-out:
- tier_do_readdir(frame, this, fd, size, yoff, op, 0);
- return 0;
-}
-
-int
-tier_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t yoff, dict_t *dict)
-{
- tier_do_readdir(frame, this, fd, size, yoff, GF_FOP_READDIRP, dict);
- return 0;
-}
-
-int
-tier_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, struct statvfs *statvfs, dict_t *xdata)
-{
- gf_boolean_t event = _gf_false;
- qdstatfs_action_t action = qdstatfs_action_OFF;
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- int bsize = 0;
- int frsize = 0;
- GF_UNUSED int ret = 0;
- unsigned long new_usage = 0;
- unsigned long cur_usage = 0;
- xlator_t *prev = NULL;
- dht_conf_t *conf = NULL;
- tier_statvfs_t *tier_stat = NULL;
-
- prev = cookie;
- local = frame->local;
- GF_ASSERT(local);
-
- conf = this->private;
-
- if (xdata)
- ret = dict_get_int8(xdata, "quota-deem-statfs", (int8_t *)&event);
-
- tier_stat = &local->tier_statvfs;
-
- LOCK(&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- goto unlock;
- }
- if (!statvfs) {
- op_errno = EINVAL;
- local->op_ret = -1;
- goto unlock;
- }
- local->op_ret = 0;
-
- if (local->quota_deem_statfs) {
- if (event == _gf_true) {
- action = qdstatfs_action_COMPARE;
- } else {
- action = qdstatfs_action_NEGLECT;
- }
- } else {
- if (event == _gf_true) {
- action = qdstatfs_action_REPLACE;
- local->quota_deem_statfs = _gf_true;
- }
- }
-
- if (local->quota_deem_statfs) {
- switch (action) {
- case qdstatfs_action_NEGLECT:
- goto unlock;
-
- case qdstatfs_action_REPLACE:
- local->statvfs = *statvfs;
- goto unlock;
-
- case qdstatfs_action_COMPARE:
- new_usage = statvfs->f_blocks - statvfs->f_bfree;
- cur_usage = local->statvfs.f_blocks -
- local->statvfs.f_bfree;
-
- /* Take the max of the usage from subvols */
- if (new_usage >= cur_usage)
- local->statvfs = *statvfs;
- goto unlock;
-
- default:
- break;
- }
- }
-
- if (local->statvfs.f_bsize != 0) {
- bsize = max(local->statvfs.f_bsize, statvfs->f_bsize);
- frsize = max(local->statvfs.f_frsize, statvfs->f_frsize);
- dht_normalize_stats(&local->statvfs, bsize, frsize);
- dht_normalize_stats(statvfs, bsize, frsize);
- } else {
- local->statvfs.f_bsize = statvfs->f_bsize;
- local->statvfs.f_frsize = statvfs->f_frsize;
- }
-
- if (prev == TIER_HASHED_SUBVOL) {
- local->statvfs.f_blocks = statvfs->f_blocks;
- local->statvfs.f_files = statvfs->f_files;
- local->statvfs.f_fsid = statvfs->f_fsid;
- local->statvfs.f_flag = statvfs->f_flag;
- local->statvfs.f_namemax = statvfs->f_namemax;
- tier_stat->blocks_used = (statvfs->f_blocks - statvfs->f_bfree);
- tier_stat->pblocks_used = (statvfs->f_blocks - statvfs->f_bavail);
- tier_stat->files_used = (statvfs->f_files - statvfs->f_ffree);
- tier_stat->pfiles_used = (statvfs->f_files - statvfs->f_favail);
- tier_stat->hashed_fsid = statvfs->f_fsid;
- } else {
- tier_stat->unhashed_fsid = statvfs->f_fsid;
- tier_stat->unhashed_blocks_used = (statvfs->f_blocks -
- statvfs->f_bfree);
- tier_stat->unhashed_pblocks_used = (statvfs->f_blocks -
- statvfs->f_bavail);
- tier_stat->unhashed_files_used = (statvfs->f_files -
- statvfs->f_ffree);
- tier_stat->unhashed_pfiles_used = (statvfs->f_files -
- statvfs->f_favail);
- }
- }
-unlock:
- UNLOCK(&frame->lock);
-
- this_call_cnt = dht_frame_return(frame);
- if (is_last_call(this_call_cnt)) {
- if (tier_stat->unhashed_fsid != tier_stat->hashed_fsid) {
- tier_stat->blocks_used += tier_stat->unhashed_blocks_used;
- tier_stat->pblocks_used += tier_stat->unhashed_pblocks_used;
- tier_stat->files_used += tier_stat->unhashed_files_used;
- tier_stat->pfiles_used += tier_stat->unhashed_pfiles_used;
- }
- local->statvfs.f_bfree = local->statvfs.f_blocks -
- tier_stat->blocks_used;
- local->statvfs.f_bavail = local->statvfs.f_blocks -
- tier_stat->pblocks_used;
- local->statvfs.f_ffree = local->statvfs.f_files - tier_stat->files_used;
- local->statvfs.f_favail = local->statvfs.f_files -
- tier_stat->pfiles_used;
- DHT_STACK_UNWIND(statfs, frame, local->op_ret, local->op_errno,
- &local->statvfs, xdata);
- }
-
- return 0;
-}
-
-int
-tier_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
-{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- int op_errno = -1;
- int i = -1;
- inode_t *inode = NULL;
- inode_table_t *itable = NULL;
- uuid_t root_gfid = {
- 0,
- };
- loc_t newloc = {
- 0,
- };
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
- VALIDATE_OR_GOTO(this->private, err);
-
- conf = this->private;
-
- local = dht_local_init(frame, NULL, NULL, GF_FOP_STATFS);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- if (loc->inode && !IA_ISDIR(loc->inode->ia_type)) {
- itable = loc->inode->table;
- if (!itable) {
- op_errno = EINVAL;
- goto err;
- }
-
- loc = &local->loc2;
- root_gfid[15] = 1;
-
- inode = inode_find(itable, root_gfid);
- if (!inode) {
- op_errno = EINVAL;
- goto err;
- }
-
- dht_build_root_loc(inode, &newloc);
- loc = &newloc;
- }
-
- local->call_cnt = conf->subvolume_cnt;
-
- for (i = 0; i < conf->subvolume_cnt; i++) {
- STACK_WIND_COOKIE(frame, tier_statfs_cbk, conf->subvolumes[i],
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->statfs, loc, xdata);
- }
-
- return 0;
-
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND(statfs, frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
diff --git a/xlators/cluster/dht/src/tier-common.h b/xlators/cluster/dht/src/tier-common.h
deleted file mode 100644
index b1ebaa8..0000000
--- a/xlators/cluster/dht/src/tier-common.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _TIER_COMMON_H_
-#define _TIER_COMMON_H_
-/* Function definitions */
-int
-tier_create_unlink_stale_linkto_cbk(call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-int
-tier_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, fd_t *fd, inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata);
-
-int
-tier_create_linkfile_create_cbk(call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, inode_t *inode,
- struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-int
-tier_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, mode_t umask, fd_t *fd, dict_t *params);
-
-int32_t
-tier_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
- dict_t *xdata);
-
-int32_t
-tier_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t off, dict_t *dict);
-
-int
-tier_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t yoff, dict_t *xdata);
-
-int
-tier_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
- dict_t *xdata);
-
-int
-tier_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata);
-
-#endif
diff --git a/xlators/cluster/dht/src/tier.c b/xlators/cluster/dht/src/tier.c
deleted file mode 100644
index a8cccaf..0000000
--- a/xlators/cluster/dht/src/tier.c
+++ /dev/null
@@ -1,3090 +0,0 @@
-/*
- Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#include <dlfcn.h>
-
-#include "dht-common.h"
-#include "tier.h"
-#include "tier-common.h"
-#include <glusterfs/syscall.h>
-#include <glusterfs/events.h>
-#include "tier-ctr-interface.h"
-
-/*Hard coded DB info*/
-static gfdb_db_type_t dht_tier_db_type = GFDB_SQLITE3;
-/*Hard coded DB info*/
-
-/*Mutex for updating the data movement stats*/
-static pthread_mutex_t dm_stat_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-/* Stores the path location of promotion query files */
-static char *promotion_qfile;
-/* Stores the path location of demotion query files */
-static char *demotion_qfile;
-
-static void *libhandle;
-static gfdb_methods_t gfdb_methods;
-
-#define DB_QUERY_RECORD_SIZE 4096
-
-/*
- * Closes all the fds and frees the qfile_array
- * */
-static void
-qfile_array_free(tier_qfile_array_t *qfile_array)
-{
- ssize_t i = 0;
-
- if (qfile_array) {
- if (qfile_array->fd_array) {
- for (i = 0; i < qfile_array->array_size; i++) {
- if (qfile_array->fd_array[i] != -1) {
- sys_close(qfile_array->fd_array[i]);
- }
- }
- }
- GF_FREE(qfile_array->fd_array);
- }
- GF_FREE(qfile_array);
-}
-
-/* Create a new query file list with given size */
-static tier_qfile_array_t *
-qfile_array_new(ssize_t array_size)
-{
- int ret = -1;
- tier_qfile_array_t *qfile_array = NULL;
- ssize_t i = 0;
-
- GF_VALIDATE_OR_GOTO("tier", (array_size > 0), out);
-
- qfile_array = GF_CALLOC(1, sizeof(tier_qfile_array_t),
- gf_tier_mt_qfile_array_t);
- if (!qfile_array) {
- gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to allocate memory for tier_qfile_array_t");
- goto out;
- }
-
- qfile_array->fd_array = GF_MALLOC(array_size * sizeof(int),
- gf_dht_mt_int32_t);
- if (!qfile_array->fd_array) {
- gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to allocate memory for "
- "tier_qfile_array_t->fd_array");
- goto out;
- }
-
- /* Init all the fds to -1 */
- for (i = 0; i < array_size; i++) {
- qfile_array->fd_array[i] = -1;
- }
-
- qfile_array->array_size = array_size;
- qfile_array->next_index = 0;
-
- /* Set exhausted count to list size as the list is empty */
- qfile_array->exhausted_count = qfile_array->array_size;
-
- ret = 0;
-out:
- if (ret) {
- qfile_array_free(qfile_array);
- qfile_array = NULL;
- }
- return qfile_array;
-}
-
-/* Checks if the query file list is empty or totally exhausted. */
-static gf_boolean_t
-is_qfile_array_empty(tier_qfile_array_t *qfile_array)
-{
- return (qfile_array->exhausted_count == qfile_array->array_size)
- ? _gf_true
- : _gf_false;
-}
-
-/* Shifts the next_fd pointer to the next available fd in the list */
-static void
-shift_next_index(tier_qfile_array_t *qfile_array)
-{
- int qfile_fd = 0;
- int spin_count = 0;
-
- if (is_qfile_array_empty(qfile_array)) {
- return;
- }
-
- do {
- /* change next_index in a rotional manner */
- (qfile_array->next_index == (qfile_array->array_size - 1))
- ? qfile_array->next_index = 0
- : qfile_array->next_index++;
-
- qfile_fd = (qfile_array->fd_array[qfile_array->next_index]);
-
- spin_count++;
-
- } while ((qfile_fd == -1) && (spin_count < qfile_array->array_size));
-}
-
-/*
- * This is a non-thread safe function to read query records
- * from a list of query files in a Round-Robin manner.
- * As in when the query files get exhuasted they are closed.
- * Returns:
- * 0 if all the query records in all the query files of the list are
- * exhausted.
- * > 0 if a query record is successfully read. Indicates the size of the query
- * record read.
- * < 0 if there was failure
- * */
-static int
-read_query_record_list(tier_qfile_array_t *qfile_array,
- gfdb_query_record_t **query_record)
-{
- int ret = -1;
- int qfile_fd = 0;
-
- GF_VALIDATE_OR_GOTO("tier", qfile_array, out);
- GF_VALIDATE_OR_GOTO("tier", qfile_array->fd_array, out);
-
- do {
- if (is_qfile_array_empty(qfile_array)) {
- ret = 0;
- break;
- }
-
- qfile_fd = qfile_array->fd_array[qfile_array->next_index];
- ret = gfdb_methods.gfdb_read_query_record(qfile_fd, query_record);
- if (ret <= 0) {
- /*The qfile_fd has reached EOF or
- * there was an error.
- * 1. Close the exhausted fd
- * 2. increment the exhausted count
- * 3. shift next_qfile to next qfile
- **/
- sys_close(qfile_fd);
- qfile_array->fd_array[qfile_array->next_index] = -1;
- qfile_array->exhausted_count++;
- /* shift next_qfile to next qfile */
- shift_next_index(qfile_array);
- continue;
- } else {
- /* shift next_qfile to next qfile */
- shift_next_index(qfile_array);
- break;
- }
- } while (1);
-out:
- return ret;
-}
-
-/* Check and update the watermark every WM_INTERVAL seconds */
-#define WM_INTERVAL 5
-#define WM_INTERVAL_EMERG 1
-
-static int
-tier_check_same_node(xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag)
-{
- int ret = -1;
- dict_t *dict = NULL;
- char *uuid_str = NULL;
- uuid_t node_uuid = {
- 0,
- };
-
- GF_VALIDATE_OR_GOTO("tier", this, out);
- GF_VALIDATE_OR_GOTO(this->name, loc, out);
- GF_VALIDATE_OR_GOTO(this->name, defrag, out);
-
- if (syncop_getxattr(this, loc, &dict, GF_XATTR_NODE_UUID_KEY, NULL, NULL)) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Unable to get NODE_UUID_KEY %s %s\n", loc->name, loc->path);
- goto out;
- }
-
- if (dict_get_str(dict, GF_XATTR_NODE_UUID_KEY, &uuid_str) < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to get node-uuids for %s", loc->path);
- goto out;
- }
-
- if (gf_uuid_parse(uuid_str, node_uuid)) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "uuid_parse failed for %s", loc->path);
- goto out;
- }
-
- if (gf_uuid_compare(node_uuid, defrag->node_uuid)) {
- gf_msg_debug(this->name, 0, "%s does not belong to this node",
- loc->path);
- ret = 1;
- goto out;
- }
-
- ret = 0;
-out:
- if (dict)
- dict_unref(dict);
-
- return ret;
-}
-
-int
-tier_get_fs_stat(xlator_t *this, loc_t *root_loc)
-{
- int ret = 0;
- gf_defrag_info_t *defrag = NULL;
- dht_conf_t *conf = NULL;
- dict_t *xdata = NULL;
- struct statvfs statfs = {
- 0,
- };
- gf_tier_conf_t *tier_conf = NULL;
-
- conf = this->private;
- if (!conf) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS,
- "conf is NULL");
- ret = -1;
- goto exit;
- }
-
- defrag = conf->defrag;
- if (!defrag) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS,
- "defrag is NULL");
- ret = -1;
- goto exit;
- }
-
- tier_conf = &defrag->tier_conf;
-
- xdata = dict_new();
- if (!xdata) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
- "failed to allocate dictionary");
- ret = -1;
- goto exit;
- }
-
- ret = dict_set_int8(xdata, GF_INTERNAL_IGNORE_DEEM_STATFS, 1);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
- "Failed to set " GF_INTERNAL_IGNORE_DEEM_STATFS " in dict");
- ret = -1;
- goto exit;
- }
-
- /* Find how much free space is on the hot subvolume.
- * Then see if that value */
- /* is less than or greater than user defined watermarks.
- * Stash results in */
- /* the tier_conf data structure. */
-
- ret = syncop_statfs(conf->subvolumes[1], root_loc, &statfs, xdata, NULL);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LOG_TIER_STATUS,
- "Unable to obtain statfs.");
- goto exit;
- }
-
- pthread_mutex_lock(&dm_stat_mutex);
-
- tier_conf->block_size = statfs.f_bsize;
- tier_conf->blocks_total = statfs.f_blocks;
- tier_conf->blocks_used = statfs.f_blocks - statfs.f_bfree;
-
- tier_conf->percent_full = GF_PERCENTAGE(tier_conf->blocks_used,
- statfs.f_blocks);
- pthread_mutex_unlock(&dm_stat_mutex);
-
-exit:
- if (xdata)
- dict_unref(xdata);
- return ret;
-}
-
-static void
-tier_send_watermark_event(const char *volname, tier_watermark_op_t old_wm,
- tier_watermark_op_t new_wm)
-{
- if (old_wm == TIER_WM_LOW || old_wm == TIER_WM_NONE) {
- if (new_wm == TIER_WM_MID) {
- gf_event(EVENT_TIER_WATERMARK_RAISED_TO_MID, "vol=%s", volname);
- } else if (new_wm == TIER_WM_HI) {
- gf_event(EVENT_TIER_WATERMARK_HI, "vol=%s", volname);
- }
- } else if (old_wm == TIER_WM_MID) {
- if (new_wm == TIER_WM_LOW) {
- gf_event(EVENT_TIER_WATERMARK_DROPPED_TO_LOW, "vol=%s", volname);
- } else if (new_wm == TIER_WM_HI) {
- gf_event(EVENT_TIER_WATERMARK_HI, "vol=%s", volname);
- }
- } else if (old_wm == TIER_WM_HI) {
- if (new_wm == TIER_WM_MID) {
- gf_event(EVENT_TIER_WATERMARK_DROPPED_TO_MID, "vol=%s", volname);
- } else if (new_wm == TIER_WM_LOW) {
- gf_event(EVENT_TIER_WATERMARK_DROPPED_TO_LOW, "vol=%s", volname);
- }
- }
-}
-
-int
-tier_check_watermark(xlator_t *this)
-{
- int ret = -1;
- gf_defrag_info_t *defrag = NULL;
- dht_conf_t *conf = NULL;
- gf_tier_conf_t *tier_conf = NULL;
- tier_watermark_op_t wm = TIER_WM_NONE;
-
- conf = this->private;
- if (!conf)
- goto exit;
-
- defrag = conf->defrag;
- if (!defrag)
- goto exit;
-
- tier_conf = &defrag->tier_conf;
-
- if (tier_conf->percent_full < tier_conf->watermark_low) {
- wm = TIER_WM_LOW;
-
- } else if (tier_conf->percent_full < tier_conf->watermark_hi) {
- wm = TIER_WM_MID;
-
- } else {
- wm = TIER_WM_HI;
- }
-
- if (wm != tier_conf->watermark_last) {
- tier_send_watermark_event(tier_conf->volname, tier_conf->watermark_last,
- wm);
-
- tier_conf->watermark_last = wm;
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "Tier watermark now %d", wm);
- }
-
- ret = 0;
-
-exit:
- return ret;
-}
-
-static gf_boolean_t
-is_hot_tier_full(gf_tier_conf_t *tier_conf)
-{
- if (tier_conf && (tier_conf->mode == TIER_MODE_WM) &&
- (tier_conf->watermark_last == TIER_WM_HI))
- return _gf_true;
-
- return _gf_false;
-}
-
-int
-tier_do_migration(xlator_t *this, int promote)
-{
- gf_defrag_info_t *defrag = NULL;
- dht_conf_t *conf = NULL;
- long rand = 0;
- int migrate = 0;
- gf_tier_conf_t *tier_conf = NULL;
-
- conf = this->private;
- if (!conf)
- goto exit;
-
- defrag = conf->defrag;
- if (!defrag)
- goto exit;
-
- if (tier_check_watermark(this) != 0) {
- gf_msg(this->name, GF_LOG_CRITICAL, errno, DHT_MSG_LOG_TIER_ERROR,
- "Failed to get watermark");
- goto exit;
- }
-
- tier_conf = &defrag->tier_conf;
-
- switch (tier_conf->watermark_last) {
- case TIER_WM_LOW:
- migrate = promote ? 1 : 0;
- break;
- case TIER_WM_HI:
- migrate = promote ? 0 : 1;
- break;
- case TIER_WM_MID:
- /* coverity[DC.WEAK_CRYPTO] */
- rand = random() % 100;
- if (promote) {
- migrate = (rand > tier_conf->percent_full);
- } else {
- migrate = (rand <= tier_conf->percent_full);
- }
- break;
- }
-
-exit:
- return migrate;
-}
-
-int
-tier_migrate(xlator_t *this, int is_promotion, dict_t *migrate_data, loc_t *loc,
- gf_tier_conf_t *tier_conf)
-{
- int ret = -1;
-
- pthread_mutex_lock(&tier_conf->pause_mutex);
- if (is_promotion)
- tier_conf->promote_in_progress = 1;
- else
- tier_conf->demote_in_progress = 1;
- pthread_mutex_unlock(&tier_conf->pause_mutex);
-
- /* Data migration */
- ret = syncop_setxattr(this, loc, migrate_data, 0, NULL, NULL);
-
- pthread_mutex_lock(&tier_conf->pause_mutex);
- if (is_promotion)
- tier_conf->promote_in_progress = 0;
- else
- tier_conf->demote_in_progress = 0;
- pthread_mutex_unlock(&tier_conf->pause_mutex);
-
- return ret;
-}
-
-/* returns _gf_true: if file can be promoted
- * returns _gf_false: if file cannot be promoted
- */
-static gf_boolean_t
-tier_can_promote_file(xlator_t *this, char const *file_name,
- struct iatt *current, gf_defrag_info_t *defrag)
-{
- gf_boolean_t ret = _gf_false;
- fsblkcnt_t estimated_usage = 0;
-
- if (defrag->tier_conf.tier_max_promote_size &&
- (current->ia_size > defrag->tier_conf.tier_max_promote_size)) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "File %s (gfid:%s) with size (%" PRIu64
- ") exceeds maxsize "
- "(%d) for promotion. File will not be promoted.",
- file_name, uuid_utoa(current->ia_gfid), current->ia_size,
- defrag->tier_conf.tier_max_promote_size);
- goto err;
- }
-
- /* bypass further validations for TEST mode */
- if (defrag->tier_conf.mode != TIER_MODE_WM) {
- ret = _gf_true;
- goto err;
- }
-
- /* convert the file size to blocks as per the block size of the
- * destination tier
- * NOTE: add (block_size - 1) to get the correct block size when
- * there is a remainder after a modulo
- */
- estimated_usage = ((current->ia_size + defrag->tier_conf.block_size - 1) /
- defrag->tier_conf.block_size) +
- defrag->tier_conf.blocks_used;
-
- /* test if the estimated block usage goes above HI watermark */
- if (GF_PERCENTAGE(estimated_usage, defrag->tier_conf.blocks_total) >=
- defrag->tier_conf.watermark_hi) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "Estimated block count consumption on "
- "hot tier (%" PRIu64
- ") exceeds hi watermark (%d%%). "
- "File will not be promoted.",
- estimated_usage, defrag->tier_conf.watermark_hi);
- goto err;
- }
- ret = _gf_true;
-err:
- return ret;
-}
-
-static int
-tier_set_migrate_data(dict_t *migrate_data)
-{
- int failed = 1;
-
- failed = dict_set_str(migrate_data, GF_XATTR_FILE_MIGRATE_KEY, "force");
- if (failed) {
- goto bail_out;
- }
-
- /* Flag to suggest the xattr call is from migrator */
- failed = dict_set_str(migrate_data, "from.migrator", "yes");
- if (failed) {
- goto bail_out;
- }
-
- /* Flag to suggest its a tiering migration
- * The reason for this dic key-value is that
- * promotions and demotions are multithreaded
- * so the original frame from gf_defrag_start()
- * is not carried. A new frame will be created when
- * we do syncop_setxattr(). This does not have the
- * frame->root->pid of the original frame. So we pass
- * this dic key-value when we do syncop_setxattr() to do
- * data migration and set the frame->root->pid to
- * GF_CLIENT_PID_TIER_DEFRAG in dht_setxattr() just before
- * calling dht_start_rebalance_task() */
- failed = dict_set_str(migrate_data, TIERING_MIGRATION_KEY, "yes");
- if (failed) {
- goto bail_out;
- }
-
- failed = 0;
-
-bail_out:
- return failed;
-}
-
-static char *
-tier_get_parent_path(xlator_t *this, loc_t *p_loc, struct iatt *par_stbuf,
- int *per_link_status)
-{
- int ret = -1;
- char *parent_path = NULL;
- dict_t *xdata_request = NULL;
- dict_t *xdata_response = NULL;
-
- xdata_request = dict_new();
- if (!xdata_request) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to create xdata_request dict");
- goto err;
- }
- ret = dict_set_int32(xdata_request, GET_ANCESTRY_PATH_KEY, 42);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to set value to dict : key %s \n",
- GET_ANCESTRY_PATH_KEY);
- goto err;
- }
-
- ret = syncop_lookup(this, p_loc, par_stbuf, NULL, xdata_request,
- &xdata_response);
- /* When the parent gfid is a stale entry, the lookup
- * will fail and stop the demotion process.
- * The parent gfid can be stale when a huge folder is
- * deleted while the files within it are being migrated
- */
- if (ret == -ESTALE) {
- gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_STALE_LOOKUP,
- "Stale entry in parent lookup for %s", uuid_utoa(p_loc->gfid));
- *per_link_status = 1;
- goto err;
- } else if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LOG_TIER_ERROR,
- "Error in parent lookup for %s", uuid_utoa(p_loc->gfid));
- *per_link_status = -1;
- goto err;
- }
- ret = dict_get_str(xdata_response, GET_ANCESTRY_PATH_KEY, &parent_path);
- if (ret || !parent_path) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to get parent path for %s", uuid_utoa(p_loc->gfid));
- *per_link_status = -1;
- goto err;
- }
-
-err:
- if (xdata_request) {
- dict_unref(xdata_request);
- }
-
- if (xdata_response) {
- dict_unref(xdata_response);
- xdata_response = NULL;
- }
-
- return parent_path;
-}
-
-static int
-tier_get_file_name_and_path(xlator_t *this, uuid_t gfid,
- gfdb_link_info_t *link_info,
- char const *parent_path, loc_t *loc,
- int *per_link_status)
-{
- int ret = -1;
-
- loc->name = gf_strdup(link_info->file_name);
- if (!loc->name) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Memory "
- "allocation failed for %s",
- uuid_utoa(gfid));
- *per_link_status = -1;
- goto err;
- }
- ret = gf_asprintf((char **)&(loc->path), "%s/%s", parent_path, loc->name);
- if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to "
- "construct file path for %s %s\n",
- parent_path, loc->name);
- *per_link_status = -1;
- goto err;
- }
-
- ret = 0;
-
-err:
- return ret;
-}
-
-static int
-tier_lookup_file(xlator_t *this, loc_t *p_loc, loc_t *loc, struct iatt *current,
- int *per_link_status)
-{
- int ret = -1;
-
- ret = syncop_lookup(this, loc, current, NULL, NULL, NULL);
-
- /* The file may be deleted even when the parent
- * is available and the lookup will
- * return a stale entry which would stop the
- * migration. so if its a stale entry, then skip
- * the file and keep migrating.
- */
- if (ret == -ESTALE) {
- gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_STALE_LOOKUP,
- "Stale lookup for %s", uuid_utoa(p_loc->gfid));
- *per_link_status = 1;
- goto err;
- } else if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LOG_TIER_ERROR,
- "Failed to "
- "lookup file %s\n",
- loc->name);
- *per_link_status = -1;
- goto err;
- }
- ret = 0;
-
-err:
- return ret;
-}
-
-static gf_boolean_t
-tier_is_file_already_at_destination(xlator_t *src_subvol,
- query_cbk_args_t *query_cbk_args,
- dht_conf_t *conf, int *per_link_status)
-{
- gf_boolean_t at_destination = _gf_true;
-
- if (src_subvol == NULL) {
- *per_link_status = 1;
- goto err;
- }
- if (query_cbk_args->is_promotion && src_subvol == conf->subvolumes[1]) {
- *per_link_status = 1;
- goto err;
- }
-
- if (!query_cbk_args->is_promotion && src_subvol == conf->subvolumes[0]) {
- *per_link_status = 1;
- goto err;
- }
- at_destination = _gf_false;
-
-err:
- return at_destination;
-}
-
-static void
-tier_update_migration_counters(query_cbk_args_t *query_cbk_args,
- gf_defrag_info_t *defrag,
- uint64_t *total_migrated_bytes, int *total_files)
-{
- if (query_cbk_args->is_promotion) {
- defrag->total_files_promoted++;
- *total_migrated_bytes += defrag->tier_conf.st_last_promoted_size;
- pthread_mutex_lock(&dm_stat_mutex);
- defrag->tier_conf.blocks_used += defrag->tier_conf
- .st_last_promoted_size;
- pthread_mutex_unlock(&dm_stat_mutex);
- } else {
- defrag->total_files_demoted++;
- *total_migrated_bytes += defrag->tier_conf.st_last_demoted_size;
- pthread_mutex_lock(&dm_stat_mutex);
- defrag->tier_conf.blocks_used -= defrag->tier_conf.st_last_demoted_size;
- pthread_mutex_unlock(&dm_stat_mutex);
- }
- if (defrag->tier_conf.blocks_total) {
- pthread_mutex_lock(&dm_stat_mutex);
- defrag->tier_conf.percent_full = GF_PERCENTAGE(
- defrag->tier_conf.blocks_used, defrag->tier_conf.blocks_total);
- pthread_mutex_unlock(&dm_stat_mutex);
- }
-
- (*total_files)++;
-}
-
-static int
-tier_migrate_link(xlator_t *this, dht_conf_t *conf, uuid_t gfid,
- gfdb_link_info_t *link_info, gf_defrag_info_t *defrag,
- query_cbk_args_t *query_cbk_args, dict_t *migrate_data,
- int *per_link_status, int *total_files,
- uint64_t *total_migrated_bytes)
-{
- int ret = -1;
- struct iatt current = {
- 0,
- };
- struct iatt par_stbuf = {
- 0,
- };
- loc_t p_loc = {
- 0,
- };
- loc_t loc = {
- 0,
- };
- xlator_t *src_subvol = NULL;
- inode_t *linked_inode = NULL;
- char *parent_path = NULL;
-
- /* Lookup for parent and get the path of parent */
- gf_uuid_copy(p_loc.gfid, link_info->pargfid);
- p_loc.inode = inode_new(defrag->root_inode->table);
- if (!p_loc.inode) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to create reference to inode"
- " for %s",
- uuid_utoa(p_loc.gfid));
-
- *per_link_status = -1;
- goto err;
- }
-
- parent_path = tier_get_parent_path(this, &p_loc, &par_stbuf,
- per_link_status);
- if (!parent_path) {
- goto err;
- }
-
- linked_inode = inode_link(p_loc.inode, NULL, NULL, &par_stbuf);
- inode_unref(p_loc.inode);
- p_loc.inode = linked_inode;
-
- /* Preparing File Inode */
- gf_uuid_copy(loc.gfid, gfid);
- loc.inode = inode_new(defrag->root_inode->table);
- gf_uuid_copy(loc.pargfid, link_info->pargfid);
- loc.parent = inode_ref(p_loc.inode);
-
- /* Get filename and Construct file path */
- if (tier_get_file_name_and_path(this, gfid, link_info, parent_path, &loc,
- per_link_status) != 0) {
- goto err;
- }
- gf_uuid_copy(loc.parent->gfid, link_info->pargfid);
-
- /* lookup file inode */
- if (tier_lookup_file(this, &p_loc, &loc, &current, per_link_status) != 0) {
- goto err;
- }
-
- if (query_cbk_args->is_promotion) {
- if (!tier_can_promote_file(this, link_info->file_name, &current,
- defrag)) {
- *per_link_status = 1;
- goto err;
- }
- }
-
- linked_inode = inode_link(loc.inode, NULL, NULL, &current);
- inode_unref(loc.inode);
- loc.inode = linked_inode;
-
- /*
- * Do not promote/demote if file already is where it
- * should be. It means another brick moved the file
- * so is not an error. So we set per_link_status = 1
- * so that we ignore counting this.
- */
- src_subvol = dht_subvol_get_cached(this, loc.inode);
-
- if (tier_is_file_already_at_destination(src_subvol, query_cbk_args, conf,
- per_link_status)) {
- goto err;
- }
-
- gf_msg_debug(this->name, 0, "Tier %s: src_subvol %s file %s",
- (query_cbk_args->is_promotion ? "promote" : "demote"),
- src_subvol->name, loc.path);
-
- ret = tier_check_same_node(this, &loc, defrag);
- if (ret != 0) {
- if (ret < 0) {
- *per_link_status = -1;
- goto err;
- }
- ret = 0;
- /* By setting per_link_status to 1 we are
- * ignoring this status and will not be counting
- * this file for migration */
- *per_link_status = 1;
- goto err;
- }
-
- gf_uuid_copy(loc.gfid, loc.inode->gfid);
-
- if (gf_defrag_get_pause_state(&defrag->tier_conf) != TIER_RUNNING) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "Tiering paused. "
- "Exiting tier_migrate_link");
- goto err;
- }
-
- ret = tier_migrate(this, query_cbk_args->is_promotion, migrate_data, &loc,
- &defrag->tier_conf);
-
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LOG_TIER_ERROR,
- "Failed to "
- "migrate %s ",
- loc.path);
- *per_link_status = -1;
- goto err;
- }
-
- tier_update_migration_counters(query_cbk_args, defrag, total_migrated_bytes,
- total_files);
-
- ret = 0;
-
-err:
- GF_FREE((char *)loc.name);
- loc.name = NULL;
- loc_wipe(&loc);
- loc_wipe(&p_loc);
-
- if ((*total_files >= defrag->tier_conf.max_migrate_files) ||
- (*total_migrated_bytes > defrag->tier_conf.max_migrate_bytes)) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "Reached cycle migration limit."
- "migrated bytes %" PRId64 " files %d",
- *total_migrated_bytes, *total_files);
- ret = -1;
- }
-
- return ret;
-}
-
-static int
-tier_migrate_using_query_file(void *_args)
-{
- int ret = -1;
- query_cbk_args_t *query_cbk_args = (query_cbk_args_t *)_args;
- xlator_t *this = NULL;
- gf_defrag_info_t *defrag = NULL;
- gfdb_query_record_t *query_record = NULL;
- gfdb_link_info_t *link_info = NULL;
- dict_t *migrate_data = NULL;
- /*
- * per_file_status and per_link_status
- * 0 : success
- * -1 : failure
- * 1 : ignore the status and don't count for migration
- * */
- int per_file_status = 0;
- int per_link_status = 0;
- int total_status = 0;
- dht_conf_t *conf = NULL;
- uint64_t total_migrated_bytes = 0;
- int total_files = 0;
- loc_t root_loc = {0};
- gfdb_time_t start_time = {0};
- gfdb_time_t current_time = {0};
- int total_time = 0;
- int max_time = 0;
- gf_boolean_t emergency_demote_mode = _gf_false;
-
- GF_VALIDATE_OR_GOTO("tier", query_cbk_args, out);
- GF_VALIDATE_OR_GOTO("tier", query_cbk_args->this, out);
- this = query_cbk_args->this;
- GF_VALIDATE_OR_GOTO(this->name, query_cbk_args->defrag, out);
- GF_VALIDATE_OR_GOTO(this->name, query_cbk_args->qfile_array, out);
- GF_VALIDATE_OR_GOTO(this->name, this->private, out);
-
- conf = this->private;
-
- defrag = query_cbk_args->defrag;
- migrate_data = dict_new();
- if (!migrate_data)
- goto out;
-
- emergency_demote_mode = (!query_cbk_args->is_promotion &&
- is_hot_tier_full(&defrag->tier_conf));
-
- if (tier_set_migrate_data(migrate_data) != 0) {
- goto out;
- }
-
- dht_build_root_loc(defrag->root_inode, &root_loc);
-
- ret = gettimeofday(&start_time, NULL);
- if (query_cbk_args->is_promotion) {
- max_time = defrag->tier_conf.tier_promote_frequency;
- } else {
- max_time = defrag->tier_conf.tier_demote_frequency;
- }
-
- /* Per file */
- while ((ret = read_query_record_list(query_cbk_args->qfile_array,
- &query_record)) != 0) {
- if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to fetch query record "
- "from query file");
- goto out;
- }
-
- if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
- ret = -1;
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Exiting tier migration as"
- "defrag status is not started");
- goto out;
- }
-
- ret = gettimeofday(&current_time, NULL);
- if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Could not get current time.");
- goto out;
- }
-
- total_time = current_time.tv_sec - start_time.tv_sec;
- if (total_time > max_time) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "Max cycle time reached. Exiting migration.");
- goto out;
- }
-
- per_file_status = 0;
- per_link_status = 0;
-
- if (gf_defrag_get_pause_state(&defrag->tier_conf) != TIER_RUNNING) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "Tiering paused. "
- "Exiting tier_migrate_using_query_file");
- break;
- }
-
- if (defrag->tier_conf.mode == TIER_MODE_WM) {
- ret = tier_get_fs_stat(this, &root_loc);
- if (ret != 0) {
- gfdb_methods.gfdb_query_record_free(query_record);
- query_record = NULL;
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS,
- "tier_get_fs_stat() FAILED ... "
- "skipping file migrations until next cycle");
- break;
- }
-
- if (!tier_do_migration(this, query_cbk_args->is_promotion)) {
- gfdb_methods.gfdb_query_record_free(query_record);
- query_record = NULL;
-
- /* We have crossed the high watermark. Stop processing
- * files if this is a promotion cycle so demotion gets
- * a chance to start if not already running*/
-
- if (query_cbk_args->is_promotion &&
- is_hot_tier_full(&defrag->tier_conf)) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "High watermark crossed during "
- "promotion. Exiting "
- "tier_migrate_using_query_file");
- break;
- }
- continue;
- }
- }
-
- per_link_status = 0;
-
- /* For now we only support single link migration. And we will
- * ignore other hard links in the link info list of query record
- * TODO: Multiple hard links migration */
- if (!list_empty(&query_record->link_list)) {
- link_info = list_first_entry(&query_record->link_list,
- gfdb_link_info_t, list);
- }
- if (link_info != NULL) {
- if (tier_migrate_link(this, conf, query_record->gfid, link_info,
- defrag, query_cbk_args, migrate_data,
- &per_link_status, &total_files,
- &total_migrated_bytes) != 0) {
- gf_msg(
- this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "%s failed for %s(gfid:%s)",
- (query_cbk_args->is_promotion ? "Promotion" : "Demotion"),
- link_info->file_name, uuid_utoa(query_record->gfid));
- }
- }
- per_file_status = per_link_status;
-
- if (per_file_status < 0) { /* Failure */
- pthread_mutex_lock(&dm_stat_mutex);
- defrag->total_failures++;
- pthread_mutex_unlock(&dm_stat_mutex);
- } else if (per_file_status == 0) { /* Success */
- pthread_mutex_lock(&dm_stat_mutex);
- defrag->total_files++;
- pthread_mutex_unlock(&dm_stat_mutex);
- } else if (per_file_status == 1) { /* Ignore */
- per_file_status = 0;
- /* Since this attempt was ignored we
- * decrement the lookup count*/
- pthread_mutex_lock(&dm_stat_mutex);
- defrag->num_files_lookedup--;
- pthread_mutex_unlock(&dm_stat_mutex);
- }
- total_status = total_status + per_file_status;
- per_link_status = 0;
- per_file_status = 0;
-
- gfdb_methods.gfdb_query_record_free(query_record);
- query_record = NULL;
-
- /* If we are demoting and the entry watermark was HI, then
- * we are done with emergency demotions if the current
- * watermark has fallen below hi-watermark level
- */
- if (emergency_demote_mode) {
- if (tier_check_watermark(this) == 0) {
- if (!is_hot_tier_full(&defrag->tier_conf)) {
- break;
- }
- }
- }
- }
-
-out:
- if (migrate_data)
- dict_unref(migrate_data);
-
- gfdb_methods.gfdb_query_record_free(query_record);
- query_record = NULL;
-
- return total_status;
-}
-
-/* This is the call back function per record/file from data base */
-static int
-tier_gf_query_callback(gfdb_query_record_t *gfdb_query_record, void *_args)
-{
- int ret = -1;
- query_cbk_args_t *query_cbk_args = _args;
-
- GF_VALIDATE_OR_GOTO("tier", query_cbk_args, out);
- GF_VALIDATE_OR_GOTO("tier", query_cbk_args->defrag, out);
- GF_VALIDATE_OR_GOTO("tier", (query_cbk_args->query_fd > 0), out);
-
- ret = gfdb_methods.gfdb_write_query_record(query_cbk_args->query_fd,
- gfdb_query_record);
- if (ret) {
- gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed writing query record to query file");
- goto out;
- }
-
- pthread_mutex_lock(&dm_stat_mutex);
- query_cbk_args->defrag->num_files_lookedup++;
- pthread_mutex_unlock(&dm_stat_mutex);
-
- ret = 0;
-out:
- return ret;
-}
-
-/* Create query file in tier process */
-static int
-tier_process_self_query(tier_brick_list_t *local_brick, void *args)
-{
- int ret = -1;
- char *db_path = NULL;
- query_cbk_args_t *query_cbk_args = NULL;
- xlator_t *this = NULL;
- gfdb_conn_node_t *conn_node = NULL;
- dict_t *params_dict = NULL;
- dict_t *ctr_ipc_dict = NULL;
- gfdb_brick_info_t *gfdb_brick_info = args;
-
- /*Init of all the essentials*/
- GF_VALIDATE_OR_GOTO("tier", gfdb_brick_info, out);
- query_cbk_args = gfdb_brick_info->_query_cbk_args;
-
- GF_VALIDATE_OR_GOTO("tier", query_cbk_args->this, out);
- this = query_cbk_args->this;
-
- GF_VALIDATE_OR_GOTO(this->name, gfdb_brick_info->_query_cbk_args, out);
-
- GF_VALIDATE_OR_GOTO(this->name, local_brick, out);
-
- GF_VALIDATE_OR_GOTO(this->name, local_brick->xlator, out);
-
- GF_VALIDATE_OR_GOTO(this->name, local_brick->brick_db_path, out);
-
- db_path = local_brick->brick_db_path;
-
- /*Preparing DB parameters before init_db i.e getting db connection*/
- params_dict = dict_new();
- if (!params_dict) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "DB Params cannot initialized");
- goto out;
- }
- SET_DB_PARAM_TO_DICT(this->name, params_dict,
- (char *)gfdb_methods.get_db_path_key(), db_path, ret,
- out);
-
- /*Get the db connection*/
- conn_node = gfdb_methods.init_db((void *)params_dict, dht_tier_db_type);
- if (!conn_node) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "FATAL: Failed initializing db operations");
- goto out;
- }
-
- /* Query for eligible files from db */
- query_cbk_args->query_fd = open(local_brick->qfile_path,
- O_WRONLY | O_CREAT | O_APPEND,
- S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
- if (query_cbk_args->query_fd < 0) {
- gf_msg(this->name, GF_LOG_ERROR, errno, DHT_MSG_LOG_TIER_ERROR,
- "Failed to open query file %s", local_brick->qfile_path);
- goto out;
- }
- if (!gfdb_brick_info->_gfdb_promote) {
- if (query_cbk_args->defrag->tier_conf.watermark_last == TIER_WM_HI) {
- /* emergency demotion mode */
- ret = gfdb_methods.find_all(
- conn_node, tier_gf_query_callback, (void *)query_cbk_args,
- query_cbk_args->defrag->tier_conf.query_limit);
- } else {
- if (query_cbk_args->defrag->write_freq_threshold == 0 &&
- query_cbk_args->defrag->read_freq_threshold == 0) {
- ret = gfdb_methods.find_unchanged_for_time(
- conn_node, tier_gf_query_callback, (void *)query_cbk_args,
- gfdb_brick_info->time_stamp);
- } else {
- ret = gfdb_methods.find_unchanged_for_time_freq(
- conn_node, tier_gf_query_callback, (void *)query_cbk_args,
- gfdb_brick_info->time_stamp,
- query_cbk_args->defrag->write_freq_threshold,
- query_cbk_args->defrag->read_freq_threshold, _gf_false);
- }
- }
- } else {
- if (query_cbk_args->defrag->write_freq_threshold == 0 &&
- query_cbk_args->defrag->read_freq_threshold == 0) {
- ret = gfdb_methods.find_recently_changed_files(
- conn_node, tier_gf_query_callback, (void *)query_cbk_args,
- gfdb_brick_info->time_stamp);
- } else {
- ret = gfdb_methods.find_recently_changed_files_freq(
- conn_node, tier_gf_query_callback, (void *)query_cbk_args,
- gfdb_brick_info->time_stamp,
- query_cbk_args->defrag->write_freq_threshold,
- query_cbk_args->defrag->read_freq_threshold, _gf_false);
- }
- }
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "FATAL: query from db failed");
- goto out;
- }
-
- /*Clear the heat on the DB entries*/
- /*Preparing ctr_ipc_dict*/
- ctr_ipc_dict = dict_new();
- if (!ctr_ipc_dict) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "ctr_ipc_dict cannot initialized");
- goto out;
- }
-
- SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_dict, GFDB_IPC_CTR_KEY,
- GFDB_IPC_CTR_CLEAR_OPS, ret, out);
-
- ret = syncop_ipc(local_brick->xlator, GF_IPC_TARGET_CTR, ctr_ipc_dict,
- NULL);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed clearing the heat "
- "on db %s error %d",
- local_brick->brick_db_path, ret);
- goto out;
- }
-
- ret = 0;
-out:
- if (params_dict) {
- dict_unref(params_dict);
- params_dict = NULL;
- }
-
- if (ctr_ipc_dict) {
- dict_unref(ctr_ipc_dict);
- ctr_ipc_dict = NULL;
- }
-
- if (query_cbk_args && query_cbk_args->query_fd >= 0) {
- sys_close(query_cbk_args->query_fd);
- query_cbk_args->query_fd = -1;
- }
- gfdb_methods.fini_db(conn_node);
-
- return ret;
-}
-
-/*Ask CTR to create the query file*/
-static int
-tier_process_ctr_query(tier_brick_list_t *local_brick, void *args)
-{
- int ret = -1;
- query_cbk_args_t *query_cbk_args = NULL;
- xlator_t *this = NULL;
- dict_t *ctr_ipc_in_dict = NULL;
- dict_t *ctr_ipc_out_dict = NULL;
- gfdb_brick_info_t *gfdb_brick_info = args;
- gfdb_ipc_ctr_params_t *ipc_ctr_params = NULL;
- int count = 0;
-
- /*Init of all the essentials*/
- GF_VALIDATE_OR_GOTO("tier", gfdb_brick_info, out);
- query_cbk_args = gfdb_brick_info->_query_cbk_args;
-
- GF_VALIDATE_OR_GOTO("tier", query_cbk_args->this, out);
- this = query_cbk_args->this;
-
- GF_VALIDATE_OR_GOTO(this->name, gfdb_brick_info->_query_cbk_args, out);
-
- GF_VALIDATE_OR_GOTO(this->name, local_brick, out);
-
- GF_VALIDATE_OR_GOTO(this->name, local_brick->xlator, out);
-
- GF_VALIDATE_OR_GOTO(this->name, local_brick->brick_db_path, out);
-
- /*Preparing ctr_ipc_in_dict*/
- ctr_ipc_in_dict = dict_new();
- if (!ctr_ipc_in_dict) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "ctr_ipc_in_dict cannot initialized");
- goto out;
- }
-
- ipc_ctr_params = GF_CALLOC(1, sizeof(gfdb_ipc_ctr_params_t),
- gf_tier_mt_ipc_ctr_params_t);
- if (!ipc_ctr_params) {
- goto out;
- }
-
- /* set all the query params*/
- ipc_ctr_params->is_promote = gfdb_brick_info->_gfdb_promote;
-
- ipc_ctr_params->write_freq_threshold = query_cbk_args->defrag
- ->write_freq_threshold;
-
- ipc_ctr_params->read_freq_threshold = query_cbk_args->defrag
- ->read_freq_threshold;
-
- ipc_ctr_params->query_limit = query_cbk_args->defrag->tier_conf.query_limit;
-
- ipc_ctr_params->emergency_demote = (!gfdb_brick_info->_gfdb_promote &&
- query_cbk_args->defrag->tier_conf
- .watermark_last == TIER_WM_HI);
-
- memcpy(&ipc_ctr_params->time_stamp, gfdb_brick_info->time_stamp,
- sizeof(gfdb_time_t));
-
- SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_in_dict, GFDB_IPC_CTR_KEY,
- GFDB_IPC_CTR_QUERY_OPS, ret, out);
-
- SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_in_dict,
- GFDB_IPC_CTR_GET_QFILE_PATH, local_brick->qfile_path,
- ret, out);
-
- ret = dict_set_bin(ctr_ipc_in_dict, GFDB_IPC_CTR_GET_QUERY_PARAMS,
- ipc_ctr_params, sizeof(*ipc_ctr_params));
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED,
- "Failed setting %s to params dictionary",
- GFDB_IPC_CTR_GET_QUERY_PARAMS);
- GF_FREE(ipc_ctr_params);
- goto out;
- }
- ipc_ctr_params = NULL;
-
- ret = syncop_ipc(local_brick->xlator, GF_IPC_TARGET_CTR, ctr_ipc_in_dict,
- &ctr_ipc_out_dict);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_IPC_TIER_ERROR,
- "Failed query on %s ret %d", local_brick->brick_db_path, ret);
- goto out;
- }
-
- ret = dict_get_int32(ctr_ipc_out_dict, GFDB_IPC_CTR_RET_QUERY_COUNT,
- &count);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed getting count "
- "of records on %s",
- local_brick->brick_db_path);
- goto out;
- }
-
- if (count < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed query on %s", local_brick->brick_db_path);
- ret = -1;
- goto out;
- }
-
- pthread_mutex_lock(&dm_stat_mutex);
- query_cbk_args->defrag->num_files_lookedup = count;
- pthread_mutex_unlock(&dm_stat_mutex);
-
- ret = 0;
-out:
-
- if (ctr_ipc_in_dict) {
- dict_unref(ctr_ipc_in_dict);
- ctr_ipc_in_dict = NULL;
- }
-
- if (ctr_ipc_out_dict) {
- dict_unref(ctr_ipc_out_dict);
- ctr_ipc_out_dict = NULL;
- }
-
- GF_FREE(ipc_ctr_params);
-
- return ret;
-}
-
-/* This is the call back function for each brick from hot/cold bricklist
- * It picks up each bricks db and queries for eligible files for migration.
- * The list of eligible files are populated in appropriate query files*/
-static int
-tier_process_brick(tier_brick_list_t *local_brick, void *args)
-{
- int ret = -1;
- dict_t *ctr_ipc_in_dict = NULL;
- dict_t *ctr_ipc_out_dict = NULL;
- char *strval = NULL;
-
- GF_VALIDATE_OR_GOTO("tier", local_brick, out);
-
- GF_VALIDATE_OR_GOTO("tier", local_brick->xlator, out);
-
- if (dht_tier_db_type == GFDB_SQLITE3) {
- /*Preparing ctr_ipc_in_dict*/
- ctr_ipc_in_dict = dict_new();
- if (!ctr_ipc_in_dict) {
- gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "ctr_ipc_in_dict cannot initialized");
- goto out;
- }
-
- ret = dict_set_str(ctr_ipc_in_dict, GFDB_IPC_CTR_KEY,
- GFDB_IPC_CTR_GET_DB_PARAM_OPS);
- if (ret) {
- gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED,
- "Failed to set %s "
- "to params dictionary",
- GFDB_IPC_CTR_KEY);
- goto out;
- }
-
- ret = dict_set_str(ctr_ipc_in_dict, GFDB_IPC_CTR_GET_DB_PARAM_OPS, "");
- if (ret) {
- gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED,
- "Failed to set %s "
- "to params dictionary",
- GFDB_IPC_CTR_GET_DB_PARAM_OPS);
- goto out;
- }
-
- ret = dict_set_str(ctr_ipc_in_dict, GFDB_IPC_CTR_GET_DB_KEY,
- "journal_mode");
- if (ret) {
- gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED,
- "Failed to set %s "
- "to params dictionary",
- GFDB_IPC_CTR_GET_DB_KEY);
- goto out;
- }
-
- ret = syncop_ipc(local_brick->xlator, GF_IPC_TARGET_CTR,
- ctr_ipc_in_dict, &ctr_ipc_out_dict);
- if (ret || ctr_ipc_out_dict == NULL) {
- gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to get "
- "journal_mode of sql db %s",
- local_brick->brick_db_path);
- goto out;
- }
-
- ret = dict_get_str(ctr_ipc_out_dict, "journal_mode", &strval);
- if (ret) {
- gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_GET_PARAM_FAILED,
- "Failed to get %s "
- "from params dictionary"
- "journal_mode",
- strval);
- goto out;
- }
-
- if (strval && (strncmp(strval, "wal", SLEN("wal")) == 0)) {
- ret = tier_process_self_query(local_brick, args);
- if (ret) {
- goto out;
- }
- } else {
- ret = tier_process_ctr_query(local_brick, args);
- if (ret) {
- goto out;
- }
- }
- ret = 0;
-
- } else {
- ret = tier_process_self_query(local_brick, args);
- if (ret) {
- goto out;
- }
- }
-
- ret = 0;
-out:
- if (ctr_ipc_in_dict)
- dict_unref(ctr_ipc_in_dict);
-
- if (ctr_ipc_out_dict)
- dict_unref(ctr_ipc_out_dict);
-
- return ret;
-}
-
-static int
-tier_build_migration_qfile(migration_args_t *args,
- query_cbk_args_t *query_cbk_args,
- gf_boolean_t is_promotion)
-{
- gfdb_time_t current_time;
- gfdb_brick_info_t gfdb_brick_info;
- gfdb_time_t time_in_past;
- int ret = -1;
- tier_brick_list_t *local_brick = NULL;
- int i = 0;
- time_in_past.tv_sec = args->freq_time;
- time_in_past.tv_usec = 0;
-
- ret = gettimeofday(&current_time, NULL);
- if (ret == -1) {
- gf_msg(args->this->name, GF_LOG_ERROR, errno,
- DHT_MSG_SYS_CALL_GET_TIME_FAILED, "Failed to get current time");
- goto out;
- }
- time_in_past.tv_sec = current_time.tv_sec - time_in_past.tv_sec;
-
- /* The migration daemon may run a varying numberof usec after the */
- /* sleep call triggers. A file may be registered in CTR some number */
- /* of usec X after the daemon started and missed in the subsequent */
- /* cycle if the daemon starts Y usec after the period in seconds */
- /* where Y>X. Normalize away this problem by always setting usec */
- /* to 0. */
- time_in_past.tv_usec = 0;
-
- gfdb_brick_info.time_stamp = &time_in_past;
- gfdb_brick_info._gfdb_promote = is_promotion;
- gfdb_brick_info._query_cbk_args = query_cbk_args;
-
- list_for_each_entry(local_brick, args->brick_list, list)
- {
- /* Construct query file path for this brick
- * i.e
- * /var/run/gluster/xlator_name/
- * {promote/demote}-brickname-indexinbricklist
- * So that no two query files will have same path even
- * bricks have the same name
- * */
- snprintf(local_brick->qfile_path, PATH_MAX, "%s-%s-%d",
- GET_QFILE_PATH(gfdb_brick_info._gfdb_promote),
- local_brick->brick_name, i);
-
- /* Delete any old query files for this brick */
- sys_unlink(local_brick->qfile_path);
-
- ret = tier_process_brick(local_brick, &gfdb_brick_info);
- if (ret) {
- gf_msg(args->this->name, GF_LOG_ERROR, 0,
- DHT_MSG_BRICK_QUERY_FAILED, "Brick %s query failed\n",
- local_brick->brick_db_path);
- }
- i++;
- }
- ret = 0;
-out:
- return ret;
-}
-
-static int
-tier_migrate_files_using_qfile(migration_args_t *comp,
- query_cbk_args_t *query_cbk_args)
-{
- int ret = -1;
- tier_brick_list_t *local_brick = NULL;
- tier_brick_list_t *temp = NULL;
- gfdb_time_t current_time = {
- 0,
- };
- ssize_t qfile_array_size = 0;
- int count = 0;
- int temp_fd = 0;
- gf_tier_conf_t *tier_conf = NULL;
-
- tier_conf = &(query_cbk_args->defrag->tier_conf);
-
- /* Time for error query files */
- gettimeofday(&current_time, NULL);
-
- /* Build the qfile list */
- list_for_each_entry_safe(local_brick, temp, comp->brick_list, list)
- {
- qfile_array_size++;
- }
- query_cbk_args->qfile_array = qfile_array_new(qfile_array_size);
- if (!query_cbk_args->qfile_array) {
- gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to create new "
- "qfile_array");
- goto out;
- }
-
- /*Open all qfiles*/
- count = 0;
- query_cbk_args->qfile_array->exhausted_count = 0;
- list_for_each_entry_safe(local_brick, temp, comp->brick_list, list)
- {
- temp_fd = query_cbk_args->qfile_array->fd_array[count];
- temp_fd = open(local_brick->qfile_path, O_RDONLY,
- S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
- if (temp_fd < 0) {
- gf_msg("tier", GF_LOG_ERROR, errno, DHT_MSG_LOG_TIER_ERROR,
- "Failed to open "
- "%s to the query file",
- local_brick->qfile_path);
- query_cbk_args->qfile_array->exhausted_count++;
- }
- query_cbk_args->qfile_array->fd_array[count] = temp_fd;
- count++;
- }
-
- /* Moving the query file index to the next, so that we won't the same
- * query file every cycle as the first one */
- query_cbk_args->qfile_array
- ->next_index = (query_cbk_args->is_promotion)
- ? tier_conf->last_promote_qfile_index
- : tier_conf->last_demote_qfile_index;
- shift_next_index(query_cbk_args->qfile_array);
- if (query_cbk_args->is_promotion) {
- tier_conf->last_promote_qfile_index = query_cbk_args->qfile_array
- ->next_index;
- } else {
- tier_conf->last_demote_qfile_index = query_cbk_args->qfile_array
- ->next_index;
- }
-
- /* Migrate files using query file list */
- ret = tier_migrate_using_query_file((void *)query_cbk_args);
-out:
- qfile_array_free(query_cbk_args->qfile_array);
-
- /* If there is an error rename all the query files to .err files
- * with a timestamp for better debugging */
- if (ret) {
- struct tm tm = {
- 0,
- };
- char time_str[128] = {
- 0,
- };
- char query_file_path_err[PATH_MAX] = {
- 0,
- };
- int32_t len = 0;
-
- /* Time format for error query files */
- gmtime_r(&current_time.tv_sec, &tm);
- strftime(time_str, sizeof(time_str), "%F-%T", &tm);
-
- list_for_each_entry_safe(local_brick, temp, comp->brick_list, list)
- {
- /* rename error qfile*/
- len = snprintf(query_file_path_err, sizeof(query_file_path_err),
- "%s-%s.err", local_brick->qfile_path, time_str);
- if ((len >= 0) && (len < sizeof(query_file_path_err))) {
- if (sys_rename(local_brick->qfile_path, query_file_path_err) ==
- -1)
- gf_msg_debug("tier", 0,
- "rename "
- "failed");
- }
- }
- }
-
- query_cbk_args->qfile_array = NULL;
-
- return ret;
-}
-
-int
-tier_demote(migration_args_t *demotion_args)
-{
- query_cbk_args_t query_cbk_args;
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO("tier", demotion_args, out);
- GF_VALIDATE_OR_GOTO("tier", demotion_args->this, out);
- GF_VALIDATE_OR_GOTO(demotion_args->this->name, demotion_args->brick_list,
- out);
- GF_VALIDATE_OR_GOTO(demotion_args->this->name, demotion_args->defrag, out);
-
- THIS = demotion_args->this;
-
- query_cbk_args.this = demotion_args->this;
- query_cbk_args.defrag = demotion_args->defrag;
- query_cbk_args.is_promotion = 0;
-
- /*Build the query file using bricklist*/
- ret = tier_build_migration_qfile(demotion_args, &query_cbk_args, _gf_false);
- if (ret)
- goto out;
-
- /* Migrate files using the query file */
- ret = tier_migrate_files_using_qfile(demotion_args, &query_cbk_args);
- if (ret)
- goto out;
-
-out:
- demotion_args->return_value = ret;
- return ret;
-}
-
-int
-tier_promote(migration_args_t *promotion_args)
-{
- int ret = -1;
- query_cbk_args_t query_cbk_args;
-
- GF_VALIDATE_OR_GOTO("tier", promotion_args->this, out);
- GF_VALIDATE_OR_GOTO(promotion_args->this->name, promotion_args->brick_list,
- out);
- GF_VALIDATE_OR_GOTO(promotion_args->this->name, promotion_args->defrag,
- out);
-
- THIS = promotion_args->this;
-
- query_cbk_args.this = promotion_args->this;
- query_cbk_args.defrag = promotion_args->defrag;
- query_cbk_args.is_promotion = 1;
-
- /*Build the query file using bricklist*/
- ret = tier_build_migration_qfile(promotion_args, &query_cbk_args, _gf_true);
- if (ret)
- goto out;
-
- /* Migrate files using the query file */
- ret = tier_migrate_files_using_qfile(promotion_args, &query_cbk_args);
- if (ret)
- goto out;
-
-out:
- promotion_args->return_value = ret;
- return ret;
-}
-
-/*
- * Command the CTR on a brick to compact the local database using an IPC
- */
-static int
-tier_process_self_compact(tier_brick_list_t *local_brick, void *args)
-{
- int ret = -1;
- char *db_path = NULL;
- query_cbk_args_t *query_cbk_args = NULL;
- xlator_t *this = NULL;
- gfdb_conn_node_t *conn_node = NULL;
- dict_t *params_dict = NULL;
- dict_t *ctr_ipc_dict = NULL;
- gfdb_brick_info_t *gfdb_brick_info = args;
-
- /*Init of all the essentials*/
- GF_VALIDATE_OR_GOTO("tier", gfdb_brick_info, out);
- query_cbk_args = gfdb_brick_info->_query_cbk_args;
-
- GF_VALIDATE_OR_GOTO("tier", query_cbk_args->this, out);
- this = query_cbk_args->this;
-
- GF_VALIDATE_OR_GOTO(this->name, gfdb_brick_info->_query_cbk_args, out);
-
- GF_VALIDATE_OR_GOTO(this->name, local_brick, out);
-
- GF_VALIDATE_OR_GOTO(this->name, local_brick->xlator, out);
-
- GF_VALIDATE_OR_GOTO(this->name, local_brick->brick_db_path, out);
-
- db_path = local_brick->brick_db_path;
-
- /*Preparing DB parameters before init_db i.e getting db connection*/
- params_dict = dict_new();
- if (!params_dict) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "DB Params cannot initialized");
- goto out;
- }
- SET_DB_PARAM_TO_DICT(this->name, params_dict,
- (char *)gfdb_methods.get_db_path_key(), db_path, ret,
- out);
-
- /*Get the db connection*/
- conn_node = gfdb_methods.init_db((void *)params_dict, dht_tier_db_type);
- if (!conn_node) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "FATAL: Failed initializing db operations");
- goto out;
- }
-
- ret = 0;
-
- /*Preparing ctr_ipc_dict*/
- ctr_ipc_dict = dict_new();
- if (!ctr_ipc_dict) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "ctr_ipc_dict cannot initialized");
- goto out;
- }
-
- ret = dict_set_int32(ctr_ipc_dict, "compact_active",
- query_cbk_args->defrag->tier_conf.compact_active);
-
- if (ret) {
- gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED,
- "Failed to set %s "
- "to params dictionary",
- "compact_active");
- goto out;
- }
-
- ret = dict_set_int32(
- ctr_ipc_dict, "compact_mode_switched",
- query_cbk_args->defrag->tier_conf.compact_mode_switched);
-
- if (ret) {
- gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED,
- "Failed to set %s "
- "to params dictionary",
- "compact_mode_switched");
- goto out;
- }
-
- SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_dict, GFDB_IPC_CTR_KEY,
- GFDB_IPC_CTR_SET_COMPACT_PRAGMA, ret, out);
-
- gf_msg(this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS,
- "Starting Compaction IPC");
-
- ret = syncop_ipc(local_brick->xlator, GF_IPC_TARGET_CTR, ctr_ipc_dict,
- NULL);
-
- gf_msg(this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS,
- "Ending Compaction IPC");
-
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed compaction "
- "on db %s error %d",
- local_brick->brick_db_path, ret);
- goto out;
- }
-
- gf_msg(this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS,
- "SUCCESS: %s Compaction", local_brick->brick_name);
-
- ret = 0;
-out:
- if (params_dict) {
- dict_unref(params_dict);
- params_dict = NULL;
- }
-
- if (ctr_ipc_dict) {
- dict_unref(ctr_ipc_dict);
- ctr_ipc_dict = NULL;
- }
-
- gfdb_methods.fini_db(conn_node);
-
- return ret;
-}
-
-/*
- * This is the call back function for each brick from hot/cold bricklist.
- * It determines the database type on each brick and calls the corresponding
- * function to prepare the compaction IPC.
- */
-static int
-tier_compact_db_brick(tier_brick_list_t *local_brick, void *args)
-{
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO("tier", local_brick, out);
-
- GF_VALIDATE_OR_GOTO("tier", local_brick->xlator, out);
-
- ret = tier_process_self_compact(local_brick, args);
- if (ret) {
- gf_msg("tier", GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "Brick %s did not compact", local_brick->brick_name);
- goto out;
- }
-
- ret = 0;
-
-out:
-
- return ret;
-}
-
-static int
-tier_send_compact(migration_args_t *args, query_cbk_args_t *query_cbk_args)
-{
- gfdb_time_t current_time;
- gfdb_brick_info_t gfdb_brick_info;
- gfdb_time_t time_in_past;
- int ret = -1;
- tier_brick_list_t *local_brick = NULL;
-
- time_in_past.tv_sec = args->freq_time;
- time_in_past.tv_usec = 0;
-
- ret = gettimeofday(&current_time, NULL);
- if (ret == -1) {
- gf_msg(args->this->name, GF_LOG_ERROR, errno,
- DHT_MSG_SYS_CALL_GET_TIME_FAILED, "Failed to get current time");
- goto out;
- }
- time_in_past.tv_sec = current_time.tv_sec - time_in_past.tv_sec;
-
- /* The migration daemon may run a varying numberof usec after the sleep
- call triggers. A file may be registered in CTR some number of usec X
- after the daemon started and missed in the subsequent cycle if the
- daemon starts Y usec after the period in seconds where Y>X. Normalize
- away this problem by always setting usec to 0. */
- time_in_past.tv_usec = 0;
-
- gfdb_brick_info.time_stamp = &time_in_past;
-
- /* This is meant to say we are always compacting at this point */
- /* We simply borrow the promotion flag to do this */
- gfdb_brick_info._gfdb_promote = 1;
-
- gfdb_brick_info._query_cbk_args = query_cbk_args;
-
- list_for_each_entry(local_brick, args->brick_list, list)
- {
- gf_msg(args->this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS,
- "Start compaction for %s", local_brick->brick_name);
-
- ret = tier_compact_db_brick(local_brick, &gfdb_brick_info);
- if (ret) {
- gf_msg(args->this->name, GF_LOG_ERROR, 0,
- DHT_MSG_BRICK_QUERY_FAILED, "Brick %s compaction failed\n",
- local_brick->brick_db_path);
- }
-
- gf_msg(args->this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS,
- "End compaction for %s", local_brick->brick_name);
- }
- ret = 0;
-out:
- return ret;
-}
-
-static int
-tier_compact(void *args)
-{
- int ret = -1;
- query_cbk_args_t query_cbk_args;
- migration_args_t *compaction_args = args;
-
- GF_VALIDATE_OR_GOTO("tier", compaction_args->this, out);
- GF_VALIDATE_OR_GOTO(compaction_args->this->name,
- compaction_args->brick_list, out);
- GF_VALIDATE_OR_GOTO(compaction_args->this->name, compaction_args->defrag,
- out);
-
- THIS = compaction_args->this;
-
- query_cbk_args.this = compaction_args->this;
- query_cbk_args.defrag = compaction_args->defrag;
- query_cbk_args.is_compaction = 1;
-
- /* Send the compaction pragma out to all the bricks on the bricklist. */
- /* tier_get_bricklist ensures all bricks on the list are local to */
- /* this node. */
- ret = tier_send_compact(compaction_args, &query_cbk_args);
- if (ret)
- goto out;
-
- ret = 0;
-out:
- compaction_args->return_value = ret;
- return ret;
-}
-
-static int
-tier_get_bricklist(xlator_t *xl, struct list_head *local_bricklist_head)
-{
- xlator_list_t *child = NULL;
- char *rv = NULL;
- char *rh = NULL;
- char *brickname = NULL;
- char db_name[PATH_MAX] = "";
- int ret = 0;
- tier_brick_list_t *local_brick = NULL;
- int32_t len = 0;
-
- GF_VALIDATE_OR_GOTO("tier", xl, out);
- GF_VALIDATE_OR_GOTO("tier", local_bricklist_head, out);
-
- /*
- * This function obtains remote subvolumes and filters out only
- * those running on the same node as the tier daemon.
- */
- if (strcmp(xl->type, "protocol/client") == 0) {
- ret = dict_get_str(xl->options, "remote-host", &rh);
- if (ret < 0)
- goto out;
-
- if (gf_is_local_addr(rh)) {
- local_brick = GF_CALLOC(1, sizeof(tier_brick_list_t),
- gf_tier_mt_bricklist_t);
- if (!local_brick) {
- goto out;
- }
-
- ret = dict_get_str(xl->options, "remote-subvolume", &rv);
- if (ret < 0)
- goto out;
-
- brickname = strrchr(rv, '/') + 1;
- snprintf(db_name, sizeof(db_name), "%s.db", brickname);
-
- local_brick->brick_db_path = GF_MALLOC(PATH_MAX, gf_common_mt_char);
- if (!local_brick->brick_db_path) {
- gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS,
- "Failed to allocate memory for"
- " bricklist.");
- ret = -1;
- goto out;
- }
-
- len = snprintf(local_brick->brick_db_path, PATH_MAX, "%s/%s/%s", rv,
- GF_HIDDEN_PATH, db_name);
- if ((len < 0) || (len >= PATH_MAX)) {
- gf_msg("tier", GF_LOG_ERROR, EINVAL, DHT_MSG_LOG_TIER_STATUS,
- "DB path too long");
- ret = -1;
- goto out;
- }
-
- local_brick->xlator = xl;
-
- snprintf(local_brick->brick_name, NAME_MAX, "%s", brickname);
-
- list_add_tail(&(local_brick->list), local_bricklist_head);
-
- ret = 0;
- goto out;
- }
- }
-
- for (child = xl->children; child; child = child->next) {
- ret = tier_get_bricklist(child->xlator, local_bricklist_head);
- if (ret) {
- goto out;
- }
- }
-
- ret = 0;
-out:
-
- if (ret) {
- if (local_brick) {
- GF_FREE(local_brick->brick_db_path);
- }
- GF_FREE(local_brick);
- }
-
- return ret;
-}
-
-int
-tier_get_freq_demote(gf_tier_conf_t *tier_conf)
-{
- if ((tier_conf->mode == TIER_MODE_WM) &&
- (tier_conf->watermark_last == TIER_WM_HI))
- return DEFAULT_DEMOTE_DEGRADED;
- else
- return tier_conf->tier_demote_frequency;
-}
-
-int
-tier_get_freq_promote(gf_tier_conf_t *tier_conf)
-{
- return tier_conf->tier_promote_frequency;
-}
-
-int
-tier_get_freq_compact_hot(gf_tier_conf_t *tier_conf)
-{
- return tier_conf->tier_compact_hot_frequency;
-}
-
-int
-tier_get_freq_compact_cold(gf_tier_conf_t *tier_conf)
-{
- return tier_conf->tier_compact_cold_frequency;
-}
-
-static int
-tier_check_demote(gfdb_time_t current_time, int freq)
-{
- return ((current_time.tv_sec % freq) == 0) ? _gf_true : _gf_false;
-}
-
-static gf_boolean_t
-tier_check_promote(gf_tier_conf_t *tier_conf, gfdb_time_t current_time,
- int freq)
-{
- if ((tier_conf->mode == TIER_MODE_WM) &&
- (tier_conf->watermark_last == TIER_WM_HI))
- return _gf_false;
-
- else
- return ((current_time.tv_sec % freq) == 0) ? _gf_true : _gf_false;
-}
-
-static gf_boolean_t
-tier_check_compact(gf_tier_conf_t *tier_conf, gfdb_time_t current_time,
- int freq_compact)
-{
- if (!(tier_conf->compact_active || tier_conf->compact_mode_switched))
- return _gf_false;
-
- return ((current_time.tv_sec % freq_compact) == 0) ? _gf_true : _gf_false;
-}
-
-void
-clear_bricklist(struct list_head *brick_list)
-{
- tier_brick_list_t *local_brick = NULL;
- tier_brick_list_t *temp = NULL;
-
- if (list_empty(brick_list)) {
- return;
- }
-
- list_for_each_entry_safe(local_brick, temp, brick_list, list)
- {
- list_del(&local_brick->list);
- GF_FREE(local_brick->brick_db_path);
- GF_FREE(local_brick);
- }
-}
-
-static void
-set_brick_list_qpath(struct list_head *brick_list, gf_boolean_t is_cold)
-{
- tier_brick_list_t *local_brick = NULL;
- int i = 0;
-
- GF_VALIDATE_OR_GOTO("tier", brick_list, out);
-
- list_for_each_entry(local_brick, brick_list, list)
- {
- /* Construct query file path for this brick
- * i.e
- * /var/run/gluster/xlator_name/
- * {promote/demote}-brickname-indexinbricklist
- * So that no two query files will have same path even
- * bricks have the same name
- * */
- snprintf(local_brick->qfile_path, PATH_MAX, "%s-%s-%d",
- GET_QFILE_PATH(is_cold), local_brick->brick_name, i);
- i++;
- }
-out:
- return;
-}
-
-static int
-tier_prepare_compact(migration_args_t *args, gfdb_time_t current_time)
-{
- xlator_t *this = NULL;
- dht_conf_t *conf = NULL;
- gf_defrag_info_t *defrag = NULL;
- gf_tier_conf_t *tier_conf = NULL;
- gf_boolean_t is_hot_tier = args->is_hot_tier;
- int freq = 0;
- int ret = -1;
- const char *tier_type = is_hot_tier ? "hot" : "cold";
-
- this = args->this;
-
- conf = this->private;
-
- defrag = conf->defrag;
-
- tier_conf = &defrag->tier_conf;
-
- freq = is_hot_tier ? tier_get_freq_compact_hot(tier_conf)
- : tier_get_freq_compact_cold(tier_conf);
-
- defrag->tier_conf.compact_mode_switched =
- is_hot_tier ? defrag->tier_conf.compact_mode_switched_hot
- : defrag->tier_conf.compact_mode_switched_cold;
-
- gf_msg(this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS,
- "Compact mode %i", defrag->tier_conf.compact_mode_switched);
-
- if (tier_check_compact(tier_conf, current_time, freq)) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "Start compaction on %s tier", tier_type);
-
- args->freq_time = freq;
- ret = tier_compact(args);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Compaction failed on "
- "%s tier",
- tier_type);
- goto out;
- }
-
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "End compaction on %s tier", tier_type);
-
- if (is_hot_tier) {
- defrag->tier_conf.compact_mode_switched_hot = _gf_false;
- } else {
- defrag->tier_conf.compact_mode_switched_cold = _gf_false;
- }
- }
-
-out:
- return ret;
-}
-
-static int
-tier_get_wm_interval(tier_mode_t mode, tier_watermark_op_t wm)
-{
- if (mode == TIER_MODE_WM && wm == TIER_WM_HI)
- return WM_INTERVAL_EMERG;
-
- return WM_INTERVAL;
-}
-
-/*
- * Main tiering loop. This is called from the promotion and the
- * demotion threads spawned in tier_start().
- *
- * Every second, wake from sleep to perform tasks.
- * 1. Check trigger to migrate data.
- * 2. Check for state changes (pause, unpause, stop).
- */
-static void *
-tier_run(void *in_args)
-{
- dht_conf_t *conf = NULL;
- gfdb_time_t current_time = {0};
- int freq = 0;
- int ret = 0;
- xlator_t *any = NULL;
- xlator_t *xlator = NULL;
- gf_tier_conf_t *tier_conf = NULL;
- loc_t root_loc = {0};
- int check_watermark = 0;
- gf_defrag_info_t *defrag = NULL;
- xlator_t *this = NULL;
- migration_args_t *args = in_args;
- GF_VALIDATE_OR_GOTO("tier", args, out);
- GF_VALIDATE_OR_GOTO("tier", args->brick_list, out);
-
- this = args->this;
- GF_VALIDATE_OR_GOTO("tier", this, out);
-
- conf = this->private;
- GF_VALIDATE_OR_GOTO("tier", conf, out);
-
- defrag = conf->defrag;
- GF_VALIDATE_OR_GOTO("tier", defrag, out);
-
- if (list_empty(args->brick_list)) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_ERROR,
- "Brick list for tier is empty. Exiting.");
- goto out;
- }
-
- defrag->defrag_status = GF_DEFRAG_STATUS_STARTED;
- tier_conf = &defrag->tier_conf;
-
- dht_build_root_loc(defrag->root_inode, &root_loc);
-
- while (1) {
- /*
- * Check if a graph switch occurred. If so, stop migration
- * thread. It will need to be restarted manually.
- */
- any = THIS->ctx->active->first;
- xlator = xlator_search_by_name(any, this->name);
-
- if (xlator != this) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "Detected graph switch. Exiting migration "
- "daemon.");
- goto out;
- }
-
- gf_defrag_check_pause_tier(tier_conf);
-
- sleep(1);
-
- if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
- ret = 1;
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "defrag->defrag_status != "
- "GF_DEFRAG_STATUS_STARTED");
- goto out;
- }
-
- if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER ||
- defrag->cmd == GF_DEFRAG_CMD_DETACH_START) {
- ret = 0;
- defrag->defrag_status = GF_DEFRAG_STATUS_COMPLETE;
- gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_LOG_TIER_ERROR,
- "defrag->defrag_cmd == "
- "GF_DEFRAG_CMD_START_DETACH_TIER");
- goto out;
- }
-
- if (gf_defrag_get_pause_state(&defrag->tier_conf) != TIER_RUNNING)
- continue;
-
- /* To have proper synchronization amongst all
- * brick holding nodes, so that promotion and demotions
- * start atomically w.r.t promotion/demotion frequency
- * period, all nodes should have their system time
- * in-sync with each other either manually set or
- * using a NTP server*/
- ret = gettimeofday(&current_time, NULL);
- if (ret == -1) {
- gf_msg(this->name, GF_LOG_ERROR, errno,
- DHT_MSG_SYS_CALL_GET_TIME_FAILED,
- "Failed to get current time");
- goto out;
- }
-
- check_watermark++;
-
- /* emergency demotion requires frequent watermark monitoring */
- if (check_watermark >=
- tier_get_wm_interval(tier_conf->mode, tier_conf->watermark_last)) {
- check_watermark = 0;
- if (tier_conf->mode == TIER_MODE_WM) {
- ret = tier_get_fs_stat(this, &root_loc);
- if (ret != 0) {
- continue;
- }
- ret = tier_check_watermark(this);
- if (ret != 0) {
- gf_msg(this->name, GF_LOG_CRITICAL, errno,
- DHT_MSG_LOG_TIER_ERROR, "Failed to get watermark");
- continue;
- }
- }
- }
-
- if (args->is_promotion) {
- freq = tier_get_freq_promote(tier_conf);
-
- if (tier_check_promote(tier_conf, current_time, freq)) {
- args->freq_time = freq;
- ret = tier_promote(args);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Promotion failed");
- }
- }
- } else if (args->is_compaction) {
- tier_prepare_compact(args, current_time);
- } else {
- freq = tier_get_freq_demote(tier_conf);
-
- if (tier_check_demote(current_time, freq)) {
- args->freq_time = freq;
- ret = tier_demote(args);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Demotion failed");
- }
- }
- }
-
- /* Check the statfs immediately after the processing threads
- return */
- check_watermark = WM_INTERVAL;
- }
-
- ret = 0;
-out:
-
- args->return_value = ret;
-
- return NULL;
-}
-
-int
-tier_start(xlator_t *this, gf_defrag_info_t *defrag)
-{
- pthread_t promote_thread;
- pthread_t demote_thread;
- pthread_t hot_compact_thread;
- pthread_t cold_compact_thread;
- int ret = -1;
- struct list_head bricklist_hot = {0};
- struct list_head bricklist_cold = {0};
- migration_args_t promotion_args = {0};
- migration_args_t demotion_args = {0};
- migration_args_t hot_compaction_args = {0};
- migration_args_t cold_compaction_args = {0};
- dht_conf_t *conf = NULL;
-
- INIT_LIST_HEAD((&bricklist_hot));
- INIT_LIST_HEAD((&bricklist_cold));
-
- conf = this->private;
-
- tier_get_bricklist(conf->subvolumes[1], &bricklist_hot);
- set_brick_list_qpath(&bricklist_hot, _gf_false);
-
- demotion_args.this = this;
- demotion_args.brick_list = &bricklist_hot;
- demotion_args.defrag = defrag;
- demotion_args.is_promotion = _gf_false;
- demotion_args.is_compaction = _gf_false;
-
- ret = gf_thread_create(&demote_thread, NULL, &tier_run, &demotion_args,
- "tierdem");
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to start demotion thread.");
- defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
- goto cleanup;
- }
-
- tier_get_bricklist(conf->subvolumes[0], &bricklist_cold);
- set_brick_list_qpath(&bricklist_cold, _gf_true);
-
- promotion_args.this = this;
- promotion_args.brick_list = &bricklist_cold;
- promotion_args.defrag = defrag;
- promotion_args.is_promotion = _gf_true;
-
- ret = gf_thread_create(&promote_thread, NULL, &tier_run, &promotion_args,
- "tierpro");
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to start promotion thread.");
- defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
- goto waitforspawned;
- }
-
- hot_compaction_args.this = this;
- hot_compaction_args.brick_list = &bricklist_hot;
- hot_compaction_args.defrag = defrag;
- hot_compaction_args.is_promotion = _gf_false;
- hot_compaction_args.is_compaction = _gf_true;
- hot_compaction_args.is_hot_tier = _gf_true;
-
- ret = gf_thread_create(&hot_compact_thread, NULL, &tier_run,
- &hot_compaction_args, "tierhcom");
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to start compaction thread.");
- defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
- goto waitforspawnedpromote;
- }
-
- cold_compaction_args.this = this;
- cold_compaction_args.brick_list = &bricklist_cold;
- cold_compaction_args.defrag = defrag;
- cold_compaction_args.is_promotion = _gf_false;
- cold_compaction_args.is_compaction = _gf_true;
- cold_compaction_args.is_hot_tier = _gf_false;
-
- ret = gf_thread_create(&cold_compact_thread, NULL, &tier_run,
- &cold_compaction_args, "tierccom");
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to start compaction thread.");
- defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
- goto waitforspawnedhotcompact;
- }
- pthread_join(cold_compact_thread, NULL);
-
-waitforspawnedhotcompact:
- pthread_join(hot_compact_thread, NULL);
-
-waitforspawnedpromote:
- pthread_join(promote_thread, NULL);
-
-waitforspawned:
- pthread_join(demote_thread, NULL);
-
-cleanup:
- clear_bricklist(&bricklist_cold);
- clear_bricklist(&bricklist_hot);
- return ret;
-}
-
-int32_t
-tier_migration_needed(xlator_t *this)
-{
- gf_defrag_info_t *defrag = NULL;
- dht_conf_t *conf = NULL;
- int ret = 0;
-
- conf = this->private;
-
- GF_VALIDATE_OR_GOTO(this->name, conf, out);
- GF_VALIDATE_OR_GOTO(this->name, conf->defrag, out);
-
- defrag = conf->defrag;
-
- if ((defrag->cmd == GF_DEFRAG_CMD_START_TIER) ||
- (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER))
- ret = 1;
-out:
- return ret;
-}
-
-int32_t
-tier_migration_get_dst(xlator_t *this, dht_local_t *local)
-{
- dht_conf_t *conf = NULL;
- int32_t ret = -1;
- gf_defrag_info_t *defrag = NULL;
-
- GF_VALIDATE_OR_GOTO("tier", this, out);
- GF_VALIDATE_OR_GOTO(this->name, this->private, out);
-
- conf = this->private;
-
- defrag = conf->defrag;
-
- if (defrag && defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER) {
- local->rebalance.target_node = conf->subvolumes[0];
-
- } else if (conf->subvolumes[0] == local->cached_subvol)
- local->rebalance.target_node = conf->subvolumes[1];
- else
- local->rebalance.target_node = conf->subvolumes[0];
-
- if (local->rebalance.target_node)
- ret = 0;
-
-out:
- return ret;
-}
-
-xlator_t *
-tier_search(xlator_t *this, dht_layout_t *layout, const char *name)
-{
- xlator_t *subvol = NULL;
- dht_conf_t *conf = NULL;
-
- GF_VALIDATE_OR_GOTO("tier", this, out);
- GF_VALIDATE_OR_GOTO(this->name, this->private, out);
-
- conf = this->private;
-
- subvol = TIER_HASHED_SUBVOL;
-
-out:
- return subvol;
-}
-
-static int
-tier_load_externals(xlator_t *this)
-{
- int ret = -1;
- char *libpathfull = (LIBDIR "/libgfdb.so.0");
- get_gfdb_methods_t get_gfdb_methods;
-
- GF_VALIDATE_OR_GOTO("this", this, out);
-
- libhandle = dlopen(libpathfull, RTLD_NOW);
- if (!libhandle) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Error loading libgfdb.so %s\n", dlerror());
- ret = -1;
- goto out;
- }
-
- get_gfdb_methods = dlsym(libhandle, "get_gfdb_methods");
- if (!get_gfdb_methods) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Error loading get_gfdb_methods()");
- ret = -1;
- goto out;
- }
-
- get_gfdb_methods(&gfdb_methods);
-
- ret = 0;
-
-out:
- if (ret && libhandle)
- dlclose(libhandle);
-
- return ret;
-}
-
-static tier_mode_t
-tier_validate_mode(char *mode)
-{
- int ret = -1;
-
- if (strcmp(mode, "test") == 0) {
- ret = TIER_MODE_TEST;
- } else {
- ret = TIER_MODE_WM;
- }
-
- return ret;
-}
-
-static gf_boolean_t
-tier_validate_compact_mode(char *mode)
-{
- gf_boolean_t ret = _gf_false;
-
- gf_msg("tier", GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "tier_validate_compact_mode: mode = %s", mode);
-
- if (!strcmp(mode, "on")) {
- ret = _gf_true;
- } else {
- ret = _gf_false;
- }
-
- gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS,
- "tier_validate_compact_mode: ret = %i", ret);
-
- return ret;
-}
-
-int
-tier_init_methods(xlator_t *this)
-{
- int ret = -1;
- dht_conf_t *conf = NULL;
- dht_methods_t *methods = NULL;
-
- GF_VALIDATE_OR_GOTO("tier", this, err);
-
- conf = this->private;
-
- methods = &(conf->methods);
-
- methods->migration_get_dst_subvol = tier_migration_get_dst;
- methods->migration_other = tier_start;
- methods->migration_needed = tier_migration_needed;
- methods->layout_search = tier_search;
-
- ret = 0;
-err:
- return ret;
-}
-
-static void
-tier_save_vol_name(xlator_t *this)
-{
- dht_conf_t *conf = NULL;
- gf_defrag_info_t *defrag = NULL;
- char *suffix = NULL;
- int name_len = 0;
-
- conf = this->private;
- defrag = conf->defrag;
-
- suffix = strstr(this->name, "-tier-dht");
-
- if (suffix)
- name_len = suffix - this->name;
- else
- name_len = strlen(this->name);
-
- if (name_len > GD_VOLUME_NAME_MAX)
- name_len = GD_VOLUME_NAME_MAX;
-
- strncpy(defrag->tier_conf.volname, this->name, name_len);
- defrag->tier_conf.volname[name_len] = 0;
-}
-
-int
-tier_init(xlator_t *this)
-{
- int ret = -1;
- int freq = 0;
- int maxsize = 0;
- dht_conf_t *conf = NULL;
- gf_defrag_info_t *defrag = NULL;
- char *voldir = NULL;
- char *mode = NULL;
- char *paused = NULL;
- tier_mode_t tier_mode = DEFAULT_TIER_MODE;
- gf_boolean_t compact_mode = _gf_false;
-
- ret = dht_init(this);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "tier_init failed");
- goto out;
- }
-
- conf = this->private;
-
- ret = tier_init_methods(this);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "tier_init_methods failed");
- goto out;
- }
-
- if (conf->subvolume_cnt != 2) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Invalid number of subvolumes %d", conf->subvolume_cnt);
- goto out;
- }
-
- /* if instatiated from client side initialization is complete. */
- if (!conf->defrag) {
- ret = 0;
- goto out;
- }
-
- /* if instatiated from server side, load db libraries */
- ret = tier_load_externals(this);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Could not load externals. Aborting");
- goto out;
- }
-
- defrag = conf->defrag;
-
- defrag->tier_conf.last_demote_qfile_index = 0;
- defrag->tier_conf.last_promote_qfile_index = 0;
-
- defrag->tier_conf.is_tier = 1;
- defrag->this = this;
-
- ret = dict_get_int32(this->options, "tier-max-promote-file-size", &maxsize);
- if (ret) {
- maxsize = 0;
- }
-
- defrag->tier_conf.tier_max_promote_size = maxsize;
-
- ret = dict_get_int32(this->options, "tier-promote-frequency", &freq);
- if (ret) {
- freq = DEFAULT_PROMOTE_FREQ_SEC;
- }
-
- defrag->tier_conf.tier_promote_frequency = freq;
-
- ret = dict_get_int32(this->options, "tier-demote-frequency", &freq);
- if (ret) {
- freq = DEFAULT_DEMOTE_FREQ_SEC;
- }
-
- defrag->tier_conf.tier_demote_frequency = freq;
-
- ret = dict_get_int32(this->options, "tier-hot-compact-frequency", &freq);
- if (ret) {
- freq = DEFAULT_HOT_COMPACT_FREQ_SEC;
- }
-
- defrag->tier_conf.tier_compact_hot_frequency = freq;
-
- ret = dict_get_int32(this->options, "tier-cold-compact-frequency", &freq);
- if (ret) {
- freq = DEFAULT_COLD_COMPACT_FREQ_SEC;
- }
-
- defrag->tier_conf.tier_compact_cold_frequency = freq;
-
- ret = dict_get_int32(this->options, "watermark-hi", &freq);
- if (ret) {
- freq = DEFAULT_WM_HI;
- }
-
- defrag->tier_conf.watermark_hi = freq;
-
- ret = dict_get_int32(this->options, "watermark-low", &freq);
- if (ret) {
- freq = DEFAULT_WM_LOW;
- }
-
- defrag->tier_conf.watermark_low = freq;
-
- ret = dict_get_int32(this->options, "write-freq-threshold", &freq);
- if (ret) {
- freq = DEFAULT_WRITE_FREQ_SEC;
- }
-
- defrag->write_freq_threshold = freq;
-
- ret = dict_get_int32(this->options, "read-freq-threshold", &freq);
- if (ret) {
- freq = DEFAULT_READ_FREQ_SEC;
- }
-
- defrag->read_freq_threshold = freq;
-
- ret = dict_get_int32(this->options, "tier-max-mb", &freq);
- if (ret) {
- freq = DEFAULT_TIER_MAX_MIGRATE_MB;
- }
-
- defrag->tier_conf.max_migrate_bytes = (uint64_t)freq * 1024 * 1024;
-
- ret = dict_get_int32(this->options, "tier-max-files", &freq);
- if (ret) {
- freq = DEFAULT_TIER_MAX_MIGRATE_FILES;
- }
-
- defrag->tier_conf.max_migrate_files = freq;
-
- ret = dict_get_int32(this->options, "tier-query-limit",
- &(defrag->tier_conf.query_limit));
- if (ret) {
- defrag->tier_conf.query_limit = DEFAULT_TIER_QUERY_LIMIT;
- }
-
- ret = dict_get_str(this->options, "tier-compact", &mode);
-
- if (ret) {
- defrag->tier_conf.compact_active = DEFAULT_COMP_MODE;
- } else {
- compact_mode = tier_validate_compact_mode(mode);
- /* If compaction is now active, we need to inform the bricks on
- the hot and cold tier of this. See dht-common.h for more. */
- defrag->tier_conf.compact_active = compact_mode;
- if (compact_mode) {
- defrag->tier_conf.compact_mode_switched_hot = _gf_true;
- defrag->tier_conf.compact_mode_switched_cold = _gf_true;
- }
- }
-
- ret = dict_get_str(this->options, "tier-mode", &mode);
- if (ret) {
- defrag->tier_conf.mode = DEFAULT_TIER_MODE;
- } else {
- tier_mode = tier_validate_mode(mode);
- defrag->tier_conf.mode = tier_mode;
- }
-
- pthread_mutex_init(&defrag->tier_conf.pause_mutex, 0);
-
- gf_defrag_set_pause_state(&defrag->tier_conf, TIER_RUNNING);
-
- ret = dict_get_str(this->options, "tier-pause", &paused);
-
- if (paused && strcmp(paused, "on") == 0)
- gf_defrag_set_pause_state(&defrag->tier_conf, TIER_REQUEST_PAUSE);
-
- ret = gf_asprintf(&voldir, "%s/%s", DEFAULT_VAR_RUN_DIRECTORY, this->name);
- if (ret < 0)
- goto out;
-
- ret = mkdir_p(voldir, 0777, _gf_true);
- if (ret == -1 && errno != EEXIST) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "tier_init failed");
-
- GF_FREE(voldir);
- goto out;
- }
-
- GF_FREE(voldir);
-
- ret = gf_asprintf(&promotion_qfile, "%s/%s/promote",
- DEFAULT_VAR_RUN_DIRECTORY, this->name);
- if (ret < 0)
- goto out;
-
- ret = gf_asprintf(&demotion_qfile, "%s/%s/demote",
- DEFAULT_VAR_RUN_DIRECTORY, this->name);
- if (ret < 0) {
- GF_FREE(promotion_qfile);
- goto out;
- }
-
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "Promote/demote frequency %d/%d "
- "Write/Read freq thresholds %d/%d",
- defrag->tier_conf.tier_promote_frequency,
- defrag->tier_conf.tier_demote_frequency,
- defrag->write_freq_threshold, defrag->read_freq_threshold);
-
- tier_save_vol_name(this);
-
- ret = 0;
-
-out:
-
- return ret;
-}
-
-int
-tier_cli_pause_done(int op_ret, call_frame_t *sync_frame, void *data)
-{
- gf_msg("tier", GF_LOG_INFO, 0, DHT_MSG_TIER_PAUSED,
- "Migrate file paused with op_ret %d", op_ret);
-
- return op_ret;
-}
-
-int
-tier_cli_pause(void *data)
-{
- gf_defrag_info_t *defrag = NULL;
- xlator_t *this = NULL;
- dht_conf_t *conf = NULL;
- int ret = -1;
-
- this = data;
-
- conf = this->private;
- GF_VALIDATE_OR_GOTO(this->name, conf, exit);
-
- defrag = conf->defrag;
- GF_VALIDATE_OR_GOTO(this->name, defrag, exit);
-
- gf_defrag_pause_tier(this, defrag);
-
- ret = 0;
-exit:
- return ret;
-}
-
-int
-tier_reconfigure(xlator_t *this, dict_t *options)
-{
- dht_conf_t *conf = NULL;
- gf_defrag_info_t *defrag = NULL;
- char *mode = NULL;
- int migrate_mb = 0;
- gf_boolean_t req_pause = _gf_false;
- int ret = 0;
- call_frame_t *frame = NULL;
- gf_boolean_t last_compact_setting = _gf_false;
-
- conf = this->private;
-
- if (conf->defrag) {
- defrag = conf->defrag;
- GF_OPTION_RECONF("tier-max-promote-file-size",
- defrag->tier_conf.tier_max_promote_size, options,
- int32, out);
-
- GF_OPTION_RECONF("tier-promote-frequency",
- defrag->tier_conf.tier_promote_frequency, options,
- int32, out);
-
- GF_OPTION_RECONF("tier-demote-frequency",
- defrag->tier_conf.tier_demote_frequency, options,
- int32, out);
-
- GF_OPTION_RECONF("write-freq-threshold", defrag->write_freq_threshold,
- options, int32, out);
-
- GF_OPTION_RECONF("read-freq-threshold", defrag->read_freq_threshold,
- options, int32, out);
-
- GF_OPTION_RECONF("watermark-hi", defrag->tier_conf.watermark_hi,
- options, int32, out);
-
- GF_OPTION_RECONF("watermark-low", defrag->tier_conf.watermark_low,
- options, int32, out);
-
- last_compact_setting = defrag->tier_conf.compact_active;
-
- GF_OPTION_RECONF("tier-compact", defrag->tier_conf.compact_active,
- options, bool, out);
-
- if (last_compact_setting != defrag->tier_conf.compact_active) {
- defrag->tier_conf.compact_mode_switched_hot = _gf_true;
- defrag->tier_conf.compact_mode_switched_cold = _gf_true;
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "compact mode switched");
- }
-
- GF_OPTION_RECONF("tier-hot-compact-frequency",
- defrag->tier_conf.tier_compact_hot_frequency, options,
- int32, out);
-
- GF_OPTION_RECONF("tier-cold-compact-frequency",
- defrag->tier_conf.tier_compact_cold_frequency, options,
- int32, out);
-
- GF_OPTION_RECONF("tier-mode", mode, options, str, out);
- defrag->tier_conf.mode = tier_validate_mode(mode);
-
- GF_OPTION_RECONF("tier-max-mb", migrate_mb, options, int32, out);
- defrag->tier_conf.max_migrate_bytes = (uint64_t)migrate_mb * 1024 *
- 1024;
-
- GF_OPTION_RECONF("tier-max-files", defrag->tier_conf.max_migrate_files,
- options, int32, out);
-
- GF_OPTION_RECONF("tier-query-limit", defrag->tier_conf.query_limit,
- options, int32, out);
-
- GF_OPTION_RECONF("tier-pause", req_pause, options, bool, out);
-
- if (req_pause == _gf_true) {
- frame = create_frame(this, this->ctx->pool);
- if (!frame)
- goto out;
-
- frame->root->pid = GF_CLIENT_PID_DEFRAG;
-
- ret = synctask_new(this->ctx->env, tier_cli_pause,
- tier_cli_pause_done, frame, this);
-
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "pause tier failed on reconfigure");
- }
- } else {
- ret = gf_defrag_resume_tier(this, defrag);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "resume tier failed on reconfigure");
- }
- }
- }
-
-out:
- return dht_reconfigure(this, options);
-}
-
-void
-tier_fini(xlator_t *this)
-{
- if (libhandle)
- dlclose(libhandle);
-
- GF_FREE(demotion_qfile);
- GF_FREE(promotion_qfile);
-
- dht_fini(this);
-}
-
-class_methods_t class_methods = {.init = tier_init,
- .fini = tier_fini,
- .reconfigure = tier_reconfigure,
- .notify = dht_notify};
-
-struct xlator_fops fops = {
-
- .lookup = dht_lookup,
- .create = tier_create,
- .mknod = dht_mknod,
-
- .open = dht_open,
- .statfs = tier_statfs,
- .opendir = dht_opendir,
- .readdir = tier_readdir,
- .readdirp = tier_readdirp,
- .fsyncdir = dht_fsyncdir,
- .symlink = dht_symlink,
- .unlink = tier_unlink,
- .link = tier_link,
- .mkdir = dht_mkdir,
- .rmdir = dht_rmdir,
- .rename = dht_rename,
- .entrylk = dht_entrylk,
- .fentrylk = dht_fentrylk,
-
- /* Inode read operations */
- .stat = dht_stat,
- .fstat = dht_fstat,
- .access = dht_access,
- .readlink = dht_readlink,
- .getxattr = dht_getxattr,
- .fgetxattr = dht_fgetxattr,
- .readv = dht_readv,
- .flush = dht_flush,
- .fsync = dht_fsync,
- .inodelk = dht_inodelk,
- .finodelk = dht_finodelk,
- .lk = dht_lk,
-
- /* Inode write operations */
- .fremovexattr = dht_fremovexattr,
- .removexattr = dht_removexattr,
- .setxattr = dht_setxattr,
- .fsetxattr = dht_fsetxattr,
- .truncate = dht_truncate,
- .ftruncate = dht_ftruncate,
- .writev = dht_writev,
- .xattrop = dht_xattrop,
- .fxattrop = dht_fxattrop,
- .setattr = dht_setattr,
- .fsetattr = dht_fsetattr,
- .fallocate = dht_fallocate,
- .discard = dht_discard,
- .zerofill = dht_zerofill,
-};
-
-struct xlator_cbks cbks = {.release = dht_release, .forget = dht_forget};
diff --git a/xlators/cluster/dht/src/tier.h b/xlators/cluster/dht/src/tier.h
deleted file mode 100644
index a20b1db..0000000
--- a/xlators/cluster/dht/src/tier.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _TIER_H_
-#define _TIER_H_
-
-/******************************************************************************/
-/* This is from dht-rebalancer.c as we don't have dht-rebalancer.h */
-#include "dht-common.h"
-#include <glusterfs/xlator.h>
-#include <signal.h>
-#include <fnmatch.h>
-#include <signal.h>
-
-/*
- * Size of timer wheel. We would not promote or demote less
- * frequently than this number.
- */
-#define TIMER_SECS 3600
-
-#include "gfdb_data_store.h"
-#include <ctype.h>
-#include <sys/stat.h>
-
-#define PROMOTION_QFILE "promotequeryfile"
-#define DEMOTION_QFILE "demotequeryfile"
-
-#define TIER_HASHED_SUBVOL conf->subvolumes[0]
-#define TIER_UNHASHED_SUBVOL conf->subvolumes[1]
-
-#define GET_QFILE_PATH(is_promotion) \
- (is_promotion) ? promotion_qfile : demotion_qfile
-
-typedef struct tier_qfile_array {
- int *fd_array;
- ssize_t array_size;
- ssize_t next_index;
- /* Indicate the number of exhuasted FDs*/
- ssize_t exhausted_count;
-} tier_qfile_array_t;
-
-typedef struct _query_cbk_args {
- xlator_t *this;
- gf_defrag_info_t *defrag;
- /* This is write */
- int query_fd;
- int is_promotion;
- int is_compaction;
- /* This is for read */
- tier_qfile_array_t *qfile_array;
-} query_cbk_args_t;
-
-int
-gf_run_tier(xlator_t *this, gf_defrag_info_t *defrag);
-
-typedef struct gfdb_brick_info {
- gfdb_time_t *time_stamp;
- gf_boolean_t _gfdb_promote;
- query_cbk_args_t *_query_cbk_args;
-} gfdb_brick_info_t;
-
-typedef struct brick_list {
- xlator_t *xlator;
- char *brick_db_path;
- char brick_name[NAME_MAX];
- char qfile_path[PATH_MAX];
- struct list_head list;
-} tier_brick_list_t;
-
-typedef struct _dm_thread_args {
- xlator_t *this;
- gf_defrag_info_t *defrag;
- struct list_head *brick_list;
- int freq_time;
- int return_value;
- int is_promotion;
- int is_compaction;
- gf_boolean_t is_hot_tier;
-} migration_args_t;
-
-typedef enum tier_watermark_op_ {
- TIER_WM_NONE = 0,
- TIER_WM_LOW,
- TIER_WM_HI,
- TIER_WM_MID
-} tier_watermark_op_t;
-
-#define DEFAULT_PROMOTE_FREQ_SEC 120
-#define DEFAULT_DEMOTE_FREQ_SEC 120
-#define DEFAULT_HOT_COMPACT_FREQ_SEC 604800
-#define DEFAULT_COLD_COMPACT_FREQ_SEC 604800
-#define DEFAULT_DEMOTE_DEGRADED 1
-#define DEFAULT_WRITE_FREQ_SEC 0
-#define DEFAULT_READ_FREQ_SEC 0
-#define DEFAULT_WM_LOW 75
-#define DEFAULT_WM_HI 90
-#define DEFAULT_TIER_MODE TIER_MODE_TEST
-#define DEFAULT_COMP_MODE _gf_true
-#define DEFAULT_TIER_MAX_MIGRATE_MB 1000
-#define DEFAULT_TIER_MAX_MIGRATE_FILES 5000
-#define DEFAULT_TIER_QUERY_LIMIT 100
-
-#endif
diff --git a/xlators/cluster/dht/src/tier.sym b/xlators/cluster/dht/src/tier.sym
deleted file mode 100644
index 60205d1..0000000
--- a/xlators/cluster/dht/src/tier.sym
+++ /dev/null
@@ -1,9 +0,0 @@
-fops
-cbks
-class_methods
-dht_methods
-tier_methods
-options
-mem_acct_init
-reconfigure
-dumpops
diff --git a/xlators/cluster/stripe/src/Makefile.am b/xlators/cluster/stripe/src/Makefile.am
deleted file mode 100644
index 2b59456..0000000
--- a/xlators/cluster/stripe/src/Makefile.am
+++ /dev/null
@@ -1,22 +0,0 @@
-xlator_LTLIBRARIES = stripe.la
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
-
-stripe_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
-
-
-stripe_la_SOURCES = stripe.c stripe-helpers.c \
- $(top_builddir)/xlators/lib/src/libxlator.c
-
-stripe_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-noinst_HEADERS = stripe.h stripe-mem-types.h \
- $(top_builddir)/xlators/lib/src/libxlator.h
-
-AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
- -I$(top_srcdir)/xlators/lib/src \
- -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
-
-AM_CFLAGS = -Wall $(GF_CFLAGS)
-
-CLEANFILES =
-
diff --git a/xlators/cluster/stripe/src/stripe-helpers.c b/xlators/cluster/stripe/src/stripe-helpers.c
deleted file mode 100644
index 3534237..0000000
--- a/xlators/cluster/stripe/src/stripe-helpers.c
+++ /dev/null
@@ -1,658 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#include <fnmatch.h>
-
-#include "stripe.h"
-#include <glusterfs/byte-order.h>
-#include <glusterfs/mem-types.h>
-#include <glusterfs/logging.h>
-
-void
-stripe_local_wipe(stripe_local_t *local)
-{
- if (!local)
- goto out;
-
- loc_wipe(&local->loc);
- loc_wipe(&local->loc2);
-
- if (local->fd)
- fd_unref(local->fd);
-
- if (local->inode)
- inode_unref(local->inode);
-
- if (local->xattr)
- dict_unref(local->xattr);
-
- if (local->xdata)
- dict_unref(local->xdata);
-
-out:
- return;
-}
-
-int
-stripe_aggregate(dict_t *this, char *key, data_t *value, void *data)
-{
- dict_t *dst = NULL;
- int64_t *ptr = 0, *size = NULL;
- int32_t ret = -1;
-
- dst = data;
-
- if (strcmp(key, QUOTA_SIZE_KEY) == 0) {
- ret = dict_get_bin(dst, key, (void **)&size);
- if (ret < 0) {
- size = GF_CALLOC(1, sizeof(int64_t), gf_common_mt_char);
- if (size == NULL) {
- gf_log("stripe", GF_LOG_WARNING, "memory allocation failed");
- goto out;
- }
- ret = dict_set_bin(dst, key, size, sizeof(int64_t));
- if (ret < 0) {
- gf_log("stripe", GF_LOG_WARNING,
- "stripe aggregate dict set failed");
- GF_FREE(size);
- goto out;
- }
- }
-
- ptr = data_to_bin(value);
- if (ptr == NULL) {
- gf_log("stripe", GF_LOG_WARNING, "data to bin failed");
- goto out;
- }
-
- *size = hton64(ntoh64(*size) + ntoh64(*ptr));
- } else if (strcmp(key, GF_CONTENT_KEY)) {
- /* No need to aggregate 'CONTENT' data */
- ret = dict_set(dst, key, value);
- if (ret)
- gf_log("stripe", GF_LOG_WARNING, "xattr dict set failed");
- }
-
-out:
- return 0;
-}
-
-void
-stripe_aggregate_xattr(dict_t *dst, dict_t *src)
-{
- if ((dst == NULL) || (src == NULL)) {
- goto out;
- }
-
- dict_foreach(src, stripe_aggregate, dst);
-out:
- return;
-}
-
-int32_t
-stripe_xattr_aggregate(char *buffer, stripe_local_t *local, int32_t *total)
-{
- int32_t i = 0;
- int32_t ret = -1;
- int32_t len = 0;
- char *sbuf = NULL;
- stripe_xattr_sort_t *xattr = NULL;
-
- if (!buffer || !local || !local->xattr_list)
- goto out;
-
- sbuf = buffer;
-
- for (i = 0; i < local->nallocs; i++) {
- xattr = local->xattr_list + i;
- len = xattr->xattr_len - 1; /* length includes \0 */
-
- if (len && xattr && xattr->xattr_value) {
- memcpy(buffer, xattr->xattr_value, len);
- buffer += len;
- *buffer++ = ' ';
- }
- }
-
- *--buffer = '\0';
- if (total)
- *total = buffer - sbuf;
- ret = 0;
-
-out:
- return ret;
-}
-
-int32_t
-stripe_free_xattr_str(stripe_local_t *local)
-{
- int32_t i = 0;
- int32_t ret = -1;
- stripe_xattr_sort_t *xattr = NULL;
-
- if (!local || !local->xattr_list)
- goto out;
-
- for (i = 0; i < local->nallocs; i++) {
- xattr = local->xattr_list + i;
-
- if (xattr && xattr->xattr_value)
- GF_FREE(xattr->xattr_value);
- }
-
- ret = 0;
-out:
- return ret;
-}
-
-int32_t
-stripe_fill_lockinfo_xattr(xlator_t *this, stripe_local_t *local,
- void **xattr_serz)
-{
- int32_t ret = -1, i = 0, len = 0;
- dict_t *tmp1 = NULL, *tmp2 = NULL;
- char *buf = NULL;
- stripe_xattr_sort_t *xattr = NULL;
-
- if (xattr_serz == NULL) {
- goto out;
- }
-
- tmp2 = dict_new();
-
- if (tmp2 == NULL) {
- goto out;
- }
-
- for (i = 0; i < local->nallocs; i++) {
- xattr = local->xattr_list + i;
- len = xattr->xattr_len;
-
- if (len && xattr && xattr->xattr_value) {
- ret = dict_reset(tmp2);
- if (ret < 0) {
- gf_log(this->name, GF_LOG_DEBUG, "dict_reset failed (%s)",
- strerror(-ret));
- }
-
- ret = dict_unserialize(xattr->xattr_value, xattr->xattr_len, &tmp2);
- if (ret < 0) {
- gf_log(this->name, GF_LOG_WARNING,
- "dict_unserialize failed (%s)", strerror(-ret));
- ret = -1;
- goto out;
- }
-
- tmp1 = dict_copy(tmp2, tmp1);
- if (tmp1 == NULL) {
- gf_log(this->name, GF_LOG_WARNING, "dict_copy failed (%s)",
- strerror(-ret));
- ret = -1;
- goto out;
- }
- }
- }
-
- len = dict_serialized_length(tmp1);
- if (len > 0) {
- buf = GF_CALLOC(1, len, gf_common_mt_dict_t);
- if (buf == NULL) {
- ret = -1;
- goto out;
- }
-
- ret = dict_serialize(tmp1, buf);
- if (ret < 0) {
- gf_log(this->name, GF_LOG_WARNING, "dict_serialize failed (%s)",
- strerror(-ret));
- GF_FREE(buf);
- ret = -1;
- goto out;
- }
-
- *xattr_serz = buf;
- }
-
- ret = 0;
-out:
- if (tmp1 != NULL) {
- dict_unref(tmp1);
- }
-
- if (tmp2 != NULL) {
- dict_unref(tmp2);
- }
-
- return ret;
-}
-
-int32_t
-stripe_fill_pathinfo_xattr(xlator_t *this, stripe_local_t *local,
- char **xattr_serz)
-{
- int ret = -1;
- int32_t padding = 0;
- int32_t tlen = 0;
- int len = 0;
- char stripe_size_str[20] = {
- 0,
- };
- char *pathinfo_serz = NULL;
-
- if (!local) {
- gf_log(this->name, GF_LOG_ERROR, "Possible NULL deref");
- goto out;
- }
-
- len = snprintf(stripe_size_str, sizeof(stripe_size_str), "%" PRId64,
- local->fctx ? local->fctx->stripe_size : 0);
- if (len < 0 || len >= sizeof(stripe_size_str))
- goto out;
- /* extra bytes for decorations (brackets and <>'s) */
- padding = strlen(this->name) + SLEN(STRIPE_PATHINFO_HEADER) + len + 7;
- local->xattr_total_len += (padding + 2);
-
- pathinfo_serz = GF_MALLOC(local->xattr_total_len, gf_common_mt_char);
- if (!pathinfo_serz)
- goto out;
-
- /* xlator info */
- (void)sprintf(pathinfo_serz, "(<" STRIPE_PATHINFO_HEADER "%s:[%s]> ",
- this->name, stripe_size_str);
-
- ret = stripe_xattr_aggregate(pathinfo_serz + padding, local, &tlen);
- if (ret) {
- gf_log(this->name, GF_LOG_ERROR, "Cannot aggregate pathinfo list");
- GF_FREE(pathinfo_serz);
- goto out;
- }
-
- *(pathinfo_serz + padding + tlen) = ')';
- *(pathinfo_serz + padding + tlen + 1) = '\0';
-
- *xattr_serz = pathinfo_serz;
-
- ret = 0;
-out:
- return ret;
-}
-
-/**
- * stripe_get_matching_bs - Get the matching block size for the given path.
- */
-int32_t
-stripe_get_matching_bs(const char *path, stripe_private_t *priv)
-{
- struct stripe_options *trav = NULL;
- uint64_t block_size = 0;
-
- GF_VALIDATE_OR_GOTO("stripe", priv, out);
- GF_VALIDATE_OR_GOTO("stripe", path, out);
-
- LOCK(&priv->lock);
- {
- block_size = priv->block_size;
- trav = priv->pattern;
- while (trav) {
- if (!fnmatch(trav->path_pattern, path, FNM_NOESCAPE)) {
- block_size = trav->block_size;
- break;
- }
- trav = trav->next;
- }
- }
- UNLOCK(&priv->lock);
-
-out:
- return block_size;
-}
-
-int32_t
-stripe_ctx_handle(xlator_t *this, call_frame_t *prev, stripe_local_t *local,
- dict_t *dict)
-{
- char key[256] = {
- 0,
- };
- data_t *data = NULL;
- int32_t index = 0;
- stripe_private_t *priv = NULL;
-
- priv = this->private;
-
- if (!local->fctx) {
- local->fctx = GF_CALLOC(1, sizeof(stripe_fd_ctx_t),
- gf_stripe_mt_stripe_fd_ctx_t);
- if (!local->fctx) {
- local->op_errno = ENOMEM;
- local->op_ret = -1;
- goto out;
- }
-
- local->fctx->static_array = 0;
- }
- /* Stripe block size */
- sprintf(key, "trusted.%s.stripe-size", this->name);
- data = dict_get(dict, key);
- if (!data) {
- local->xattr_self_heal_needed = 1;
- gf_log(this->name, GF_LOG_ERROR, "Failed to get stripe-size");
- goto out;
- } else {
- if (!local->fctx->stripe_size) {
- local->fctx->stripe_size = data_to_int64(data);
- }
-
- if (local->fctx->stripe_size != data_to_int64(data)) {
- gf_log(this->name, GF_LOG_WARNING,
- "stripe-size mismatch in blocks");
- local->xattr_self_heal_needed = 1;
- }
- }
-
- /* Stripe count */
- sprintf(key, "trusted.%s.stripe-count", this->name);
- data = dict_get(dict, key);
-
- if (!data) {
- local->xattr_self_heal_needed = 1;
- gf_log(this->name, GF_LOG_ERROR, "Failed to get stripe-count");
- goto out;
- }
- if (!local->fctx->xl_array) {
- local->fctx->stripe_count = data_to_int32(data);
- if (!local->fctx->stripe_count) {
- gf_log(this->name, GF_LOG_ERROR, "error with stripe-count xattr");
- local->op_ret = -1;
- local->op_errno = EIO;
- goto out;
- }
-
- local->fctx->xl_array = GF_CALLOC(local->fctx->stripe_count,
- sizeof(xlator_t *),
- gf_stripe_mt_xlator_t);
-
- if (!local->fctx->xl_array) {
- local->op_errno = ENOMEM;
- local->op_ret = -1;
- goto out;
- }
- }
- if (local->fctx->stripe_count != data_to_int32(data)) {
- gf_log(this->name, GF_LOG_ERROR,
- "error with stripe-count xattr (%d != %d)",
- local->fctx->stripe_count, data_to_int32(data));
- local->op_ret = -1;
- local->op_errno = EIO;
- goto out;
- }
-
- /* index */
- sprintf(key, "trusted.%s.stripe-index", this->name);
- data = dict_get(dict, key);
- if (!data) {
- local->xattr_self_heal_needed = 1;
- gf_log(this->name, GF_LOG_ERROR, "Failed to get stripe-index");
- goto out;
- }
- index = data_to_int32(data);
- if (index > priv->child_count) {
- gf_log(this->name, GF_LOG_ERROR, "error with stripe-index xattr (%d)",
- index);
- local->op_ret = -1;
- local->op_errno = EIO;
- goto out;
- }
- if (local->fctx->xl_array) {
- if (!local->fctx->xl_array[index])
- local->fctx->xl_array[index] = prev->this;
- }
-
- sprintf(key, "trusted.%s.stripe-coalesce", this->name);
- data = dict_get(dict, key);
- if (!data) {
- /*
- * The file was probably created prior to coalesce support.
- * Assume non-coalesce mode for this file to maintain backwards
- * compatibility.
- */
- gf_log(this->name, GF_LOG_DEBUG,
- "missing stripe-coalesce "
- "attr, assume non-coalesce mode");
- local->fctx->stripe_coalesce = 0;
- } else {
- local->fctx->stripe_coalesce = data_to_int32(data);
- }
-
-out:
- return 0;
-}
-
-int32_t
-stripe_xattr_request_build(xlator_t *this, dict_t *dict, uint64_t stripe_size,
- uint32_t stripe_count, uint32_t stripe_index,
- uint32_t stripe_coalesce)
-{
- char key[256] = {
- 0,
- };
- int32_t ret = -1;
-
- sprintf(key, "trusted.%s.stripe-size", this->name);
- ret = dict_set_int64(dict, key, stripe_size);
- if (ret) {
- gf_log(this->name, GF_LOG_WARNING, "failed to set %s in xattr_req dict",
- key);
- goto out;
- }
-
- sprintf(key, "trusted.%s.stripe-count", this->name);
- ret = dict_set_int32(dict, key, stripe_count);
- if (ret) {
- gf_log(this->name, GF_LOG_WARNING, "failed to set %s in xattr_req dict",
- key);
- goto out;
- }
-
- sprintf(key, "trusted.%s.stripe-index", this->name);
- ret = dict_set_int32(dict, key, stripe_index);
- if (ret) {
- gf_log(this->name, GF_LOG_WARNING, "failed to set %s in xattr_req dict",
- key);
- goto out;
- }
-
- sprintf(key, "trusted.%s.stripe-coalesce", this->name);
- ret = dict_set_int32(dict, key, stripe_coalesce);
- if (ret) {
- gf_log(this->name, GF_LOG_WARNING, "failed to set %s in xattr_req_dict",
- key);
- goto out;
- }
-out:
- return ret;
-}
-
-static int
-set_default_block_size(stripe_private_t *priv, char *num)
-{
- int ret = -1;
- GF_VALIDATE_OR_GOTO("stripe", THIS, out);
- GF_VALIDATE_OR_GOTO(THIS->name, priv, out);
- GF_VALIDATE_OR_GOTO(THIS->name, num, out);
-
- if (gf_string2bytesize_uint64(num, &priv->block_size) != 0) {
- gf_log(THIS->name, GF_LOG_ERROR, "invalid number format \"%s\"", num);
- goto out;
- }
-
- ret = 0;
-
-out:
- return ret;
-}
-
-int
-set_stripe_block_size(xlator_t *this, stripe_private_t *priv, char *data)
-{
- int ret = -1;
- char *tmp_str = NULL;
- char *tmp_str1 = NULL;
- char *dup_str = NULL;
- char *stripe_str = NULL;
- char *pattern = NULL;
- char *num = NULL;
- struct stripe_options *temp_stripeopt = NULL;
- struct stripe_options *stripe_opt = NULL;
-
- if (!this || !priv || !data)
- goto out;
-
- /* Get the pattern for striping.
- "option block-size *avi:10MB" etc */
- stripe_str = strtok_r(data, ",", &tmp_str);
- while (stripe_str) {
- dup_str = gf_strdup(stripe_str);
- stripe_opt = GF_CALLOC(1, sizeof(struct stripe_options),
- gf_stripe_mt_stripe_options);
- if (!stripe_opt) {
- goto out;
- }
-
- pattern = strtok_r(dup_str, ":", &tmp_str1);
- num = strtok_r(NULL, ":", &tmp_str1);
- if (!num) {
- num = pattern;
- pattern = "*";
- ret = set_default_block_size(priv, num);
- if (ret)
- goto out;
- }
- if (gf_string2bytesize_uint64(num, &stripe_opt->block_size) != 0) {
- gf_log(this->name, GF_LOG_ERROR, "invalid number format \"%s\"",
- num);
- goto out;
- }
-
- if (stripe_opt->block_size < STRIPE_MIN_BLOCK_SIZE) {
- gf_log(this->name, GF_LOG_ERROR,
- "Invalid Block-size: "
- "%s. Should be at least %llu bytes",
- num, STRIPE_MIN_BLOCK_SIZE);
- goto out;
- }
- if (stripe_opt->block_size % 512) {
- gf_log(this->name, GF_LOG_ERROR,
- "Block-size: %s should"
- " be a multiple of 512 bytes",
- num);
- goto out;
- }
-
- memcpy(stripe_opt->path_pattern, pattern, strlen(pattern));
-
- gf_log(this->name, GF_LOG_DEBUG,
- "block-size : pattern %s : size %" PRId64,
- stripe_opt->path_pattern, stripe_opt->block_size);
-
- if (priv->pattern)
- temp_stripeopt = NULL;
- else
- temp_stripeopt = priv->pattern;
-
- stripe_opt->next = temp_stripeopt;
-
- priv->pattern = stripe_opt;
- stripe_opt = NULL;
-
- GF_FREE(dup_str);
- dup_str = NULL;
-
- stripe_str = strtok_r(NULL, ",", &tmp_str);
- }
-
- ret = 0;
-out:
-
- GF_FREE(dup_str);
-
- GF_FREE(stripe_opt);
-
- return ret;
-}
-
-int32_t
-stripe_iatt_merge(struct iatt *from, struct iatt *to)
-{
- if (to->ia_size < from->ia_size)
- to->ia_size = from->ia_size;
- if (to->ia_mtime < from->ia_mtime)
- to->ia_mtime = from->ia_mtime;
- if (to->ia_ctime < from->ia_ctime)
- to->ia_ctime = from->ia_ctime;
- if (to->ia_atime < from->ia_atime)
- to->ia_atime = from->ia_atime;
- return 0;
-}
-
-off_t
-coalesced_offset(off_t offset, uint64_t stripe_size, int stripe_count)
-{
- size_t line_size = 0;
- uint64_t stripe_num = 0;
- off_t coalesced_offset = 0;
-
- line_size = stripe_size * stripe_count;
- stripe_num = offset / line_size;
-
- coalesced_offset = (stripe_num * stripe_size) + (offset % stripe_size);
-
- return coalesced_offset;
-}
-
-off_t
-uncoalesced_size(off_t size, uint64_t stripe_size, int stripe_count,
- int stripe_index)
-{
- uint64_t nr_full_stripe_chunks = 0, mod = 0;
-
- if (!size)
- return size;
-
- /*
- * Estimate the number of fully written stripes from the
- * local file size. Each stripe_size chunk corresponds to
- * a stripe.
- */
- nr_full_stripe_chunks = (size / stripe_size) * stripe_count;
- mod = size % stripe_size;
-
- if (!mod) {
- /*
- * There is no remainder, thus we could have overestimated
- * the size of the file in terms of chunks. Trim the number
- * of chunks by the following stripe members and leave it
- * up to those nodes to respond with a larger size (if
- * necessary).
- */
- nr_full_stripe_chunks -= stripe_count - (stripe_index + 1);
- size = nr_full_stripe_chunks * stripe_size;
- } else {
- /*
- * There is a remainder and thus we own the last chunk of the
- * file. Add the preceding stripe members of the final stripe
- * along with the remainder to calculate the exact size.
- */
- nr_full_stripe_chunks += stripe_index;
- size = nr_full_stripe_chunks * stripe_size + mod;
- }
-
- return size;
-}
diff --git a/xlators/cluster/stripe/src/stripe-mem-types.h b/xlators/cluster/stripe/src/stripe-mem-types.h
deleted file mode 100644
index 3ca6ecc..0000000
--- a/xlators/cluster/stripe/src/stripe-mem-types.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __STRIPE_MEM_TYPES_H__
-#define __STRIPE_MEM_TYPES_H__
-
-#include <glusterfs/mem-types.h>
-
-enum gf_stripe_mem_types_ {
- gf_stripe_mt_iovec = gf_common_mt_end + 1,
- gf_stripe_mt_stripe_replies,
- gf_stripe_mt_stripe_fd_ctx_t,
- gf_stripe_mt_char,
- gf_stripe_mt_int8_t,
- gf_stripe_mt_int32_t,
- gf_stripe_mt_xlator_t,
- gf_stripe_mt_stripe_private_t,
- gf_stripe_mt_stripe_options,
- gf_stripe_mt_xattr_sort_t,
- gf_stripe_mt_end
-};
-#endif
diff --git a/xlators/cluster/stripe/src/stripe.c b/xlators/cluster/stripe/src/stripe.c
deleted file mode 100644
index 557a818..0000000
--- a/xlators/cluster/stripe/src/stripe.c
+++ /dev/null
@@ -1,5612 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-/**
- * xlators/cluster/stripe:
- * Stripe translator, stripes the data across its child nodes,
- * as per the options given in the volfile. The striping works
- * fairly simple. It writes files at different offset as per
- * calculation. So, 'ls -l' output at the real posix level will
- * show file size bigger than the actual size. But when one does
- * 'df' or 'du <file>', real size of the file on the server is shown.
- *
- * WARNING:
- * Stripe translator can't regenerate data if a child node gets disconnected.
- * So, no 'self-heal' for stripe. Hence the advice, use stripe only when its
- * very much necessary, or else, use it in combination with AFR, to have a
- * backup copy.
- */
-#include <fnmatch.h>
-#include "stripe.h"
-#include "libxlator.h"
-#include <glusterfs/byte-order.h>
-#include <glusterfs/statedump.h>
-
-struct volume_options options[];
-
-int32_t
-stripe_sh_chown_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preop,
- struct iatt *postop, dict_t *xdata)
-{
- int callcnt = -1;
- stripe_local_t *local = NULL;
-
- if (!this || !frame || !frame->local) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- STRIPE_STACK_DESTROY(frame);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_sh_make_entry_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!frame || !frame->local || !cookie || !this) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- STACK_WIND(frame, stripe_sh_chown_cbk, prev->this,
- prev->this->fops->setattr, &local->loc, &local->stbuf,
- (GF_SET_ATTR_UID | GF_SET_ATTR_GID), NULL);
-
-out:
- return 0;
-}
-
-int32_t
-stripe_entry_self_heal(call_frame_t *frame, xlator_t *this,
- stripe_local_t *local)
-{
- xlator_list_t *trav = NULL;
- call_frame_t *rframe = NULL;
- stripe_local_t *rlocal = NULL;
- stripe_private_t *priv = NULL;
- dict_t *xdata = NULL;
- int ret = 0;
-
- if (!local || !this || !frame) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- if (!(IA_ISREG(local->stbuf.ia_type) || IA_ISDIR(local->stbuf.ia_type)))
- return 0;
-
- priv = this->private;
- trav = this->children;
- rframe = copy_frame(frame);
- if (!rframe) {
- goto out;
- }
- rlocal = mem_get0(this->local_pool);
- if (!rlocal) {
- goto out;
- }
- rframe->local = rlocal;
- rlocal->call_count = priv->child_count;
- loc_copy(&rlocal->loc, &local->loc);
- memcpy(&rlocal->stbuf, &local->stbuf, sizeof(struct iatt));
-
- xdata = dict_new();
- if (!xdata)
- goto out;
-
- ret = dict_set_gfuuid(xdata, "gfid-req", local->stbuf.ia_gfid, true);
- if (ret)
- gf_log(this->name, GF_LOG_WARNING, "%s: failed to set gfid-req",
- local->loc.path);
-
- while (trav) {
- if (IA_ISREG(local->stbuf.ia_type)) {
- STACK_WIND(
- rframe, stripe_sh_make_entry_cbk, trav->xlator,
- trav->xlator->fops->mknod, &local->loc,
- st_mode_from_ia(local->stbuf.ia_prot, local->stbuf.ia_type), 0,
- 0, xdata);
- }
- if (IA_ISDIR(local->stbuf.ia_type)) {
- STACK_WIND(
- rframe, stripe_sh_make_entry_cbk, trav->xlator,
- trav->xlator->fops->mkdir, &local->loc,
- st_mode_from_ia(local->stbuf.ia_prot, local->stbuf.ia_type), 0,
- xdata);
- }
- trav = trav->next;
- }
-
- if (xdata)
- dict_unref(xdata);
- return 0;
-
-out:
- if (rframe)
- STRIPE_STACK_DESTROY(rframe);
-
- return 0;
-}
-
-int32_t
-stripe_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *xdata, struct iatt *postparent)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
- int ret = 0;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- if ((op_errno != ENOENT) && (op_errno != ESTALE))
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- if (local->op_errno != ESTALE)
- local->op_errno = op_errno;
- if (((op_errno != ENOENT) && (op_errno != ENOTCONN) &&
- (op_errno != ESTALE)) ||
- (prev->this == FIRST_CHILD(this)))
- local->failed = 1;
- if (op_errno == ENOENT)
- local->entry_self_heal_needed = 1;
- }
-
- if (op_ret >= 0) {
- local->op_ret = 0;
- if (IA_ISREG(buf->ia_type)) {
- ret = stripe_ctx_handle(this, prev, local, xdata);
- if (ret)
- gf_log(this->name, GF_LOG_ERROR,
- "Error getting fctx info from"
- " dict");
- }
-
- if (FIRST_CHILD(this) == prev->this) {
- local->stbuf = *buf;
- local->postparent = *postparent;
- local->inode = inode_ref(inode);
- if (xdata)
- local->xdata = dict_ref(xdata);
- if (local->xattr) {
- stripe_aggregate_xattr(local->xdata, local->xattr);
- dict_unref(local->xattr);
- local->xattr = NULL;
- }
- }
-
- if (!local->xdata && !local->xattr) {
- local->xattr = dict_ref(xdata);
- } else if (local->xdata) {
- stripe_aggregate_xattr(local->xdata, xdata);
- } else if (local->xattr) {
- stripe_aggregate_xattr(local->xattr, xdata);
- }
-
- local->stbuf_blocks += buf->ia_blocks;
- local->postparent_blocks += postparent->ia_blocks;
-
- correct_file_size(buf, local->fctx, prev);
-
- if (local->stbuf_size < buf->ia_size)
- local->stbuf_size = buf->ia_size;
- if (local->postparent_size < postparent->ia_size)
- local->postparent_size = postparent->ia_size;
-
- if (gf_uuid_is_null(local->ia_gfid))
- gf_uuid_copy(local->ia_gfid, buf->ia_gfid);
-
- /* Make sure the gfid on all the nodes are same */
- if (gf_uuid_compare(local->ia_gfid, buf->ia_gfid)) {
- gf_log(this->name, GF_LOG_WARNING,
- "%s: gfid different on subvolume %s", local->loc.path,
- prev->this->name);
- }
- }
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (local->op_ret == 0 && local->entry_self_heal_needed &&
- !gf_uuid_is_null(local->loc.inode->gfid))
- stripe_entry_self_heal(frame, this, local);
-
- if (local->failed)
- local->op_ret = -1;
-
- if (local->op_ret != -1) {
- local->stbuf.ia_blocks = local->stbuf_blocks;
- local->stbuf.ia_size = local->stbuf_size;
- local->postparent.ia_blocks = local->postparent_blocks;
- local->postparent.ia_size = local->postparent_size;
- inode_ctx_put(local->inode, this, (uint64_t)(long)local->fctx);
- }
-
- STRIPE_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf, local->xdata,
- &local->postparent);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- stripe_private_t *priv = NULL;
- int32_t op_errno = EINVAL;
- int64_t filesize = 0;
- int ret = 0;
- uint64_t tmpctx = 0;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
- VALIDATE_OR_GOTO(loc->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- loc_copy(&local->loc, loc);
-
- inode_ctx_get(local->inode, this, &tmpctx);
- if (tmpctx)
- local->fctx = (stripe_fd_ctx_t *)(long)tmpctx;
-
- /* quick-read friendly changes */
- if (xdata && dict_get(xdata, GF_CONTENT_KEY)) {
- ret = dict_get_int64(xdata, GF_CONTENT_KEY, &filesize);
- if (!ret && (filesize > priv->block_size))
- dict_del(xdata, GF_CONTENT_KEY);
- }
-
- /* get stripe-size xattr on lookup. This would be required for
- * open/read/write/pathinfo calls. Hence we send down the request
- * even when type == IA_INVAL */
-
- /*
- * We aren't guaranteed to have xdata here. We need the format info for
- * the file, so allocate xdata if necessary.
- */
- if (!xdata)
- xdata = dict_new();
- else
- xdata = dict_ref(xdata);
-
- if (xdata &&
- (IA_ISREG(loc->inode->ia_type) || (loc->inode->ia_type == IA_INVAL))) {
- ret = stripe_xattr_request_build(this, xdata, 8, 4, 4, 0);
- if (ret)
- gf_log(this->name, GF_LOG_ERROR,
- "Failed to build"
- " xattr request for %s",
- loc->path);
- }
-
- /* Every time in stripe lookup, all child nodes
- should be looked up */
- local->call_count = priv->child_count;
- while (trav) {
- STACK_WIND(frame, stripe_lookup_cbk, trav->xlator,
- trav->xlator->fops->lookup, loc, xdata);
- trav = trav->next;
- }
-
- dict_unref(xdata);
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
- prev = cookie;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- local->op_errno = op_errno;
- if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this)))
- local->failed = 1;
- }
-
- if (op_ret == 0) {
- local->op_ret = 0;
-
- if (FIRST_CHILD(this) == prev->this) {
- local->stbuf = *buf;
- }
-
- local->stbuf_blocks += buf->ia_blocks;
-
- correct_file_size(buf, local->fctx, prev);
-
- if (local->stbuf_size < buf->ia_size)
- local->stbuf_size = buf->ia_size;
- }
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- if (local->op_ret != -1) {
- local->stbuf.ia_size = local->stbuf_size;
- local->stbuf.ia_blocks = local->stbuf_blocks;
- }
-
- STRIPE_STACK_UNWIND(stat, frame, local->op_ret, local->op_errno,
- &local->stbuf, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
-{
- xlator_list_t *trav = NULL;
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- stripe_fd_ctx_t *fctx = NULL;
- int32_t op_errno = EINVAL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
- VALIDATE_OR_GOTO(loc->path, err);
- VALIDATE_OR_GOTO(loc->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- if (priv->first_child_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->call_count = priv->child_count;
-
- if (IA_ISREG(loc->inode->ia_type)) {
- inode_ctx_get(loc->inode, this, (uint64_t *)&fctx);
- if (!fctx)
- goto err;
- local->fctx = fctx;
- }
-
- while (trav) {
- STACK_WIND(frame, stripe_stat_cbk, trav->xlator,
- trav->xlator->fops->stat, loc, NULL);
- trav = trav->next;
- }
-
- return 0;
-
-err:
- STRIPE_STACK_UNWIND(stat, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *stbuf,
- dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- int32_t callcnt = 0;
-
- if (!this || !frame || !frame->local) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret && (op_errno != ENOTCONN)) {
- local->op_errno = op_errno;
- }
- if (op_ret == 0) {
- struct statvfs *dict_buf = &local->statvfs_buf;
- dict_buf->f_bsize = stbuf->f_bsize;
- dict_buf->f_frsize = stbuf->f_frsize;
- dict_buf->f_blocks += stbuf->f_blocks;
- dict_buf->f_bfree += stbuf->f_bfree;
- dict_buf->f_bavail += stbuf->f_bavail;
- dict_buf->f_files += stbuf->f_files;
- dict_buf->f_ffree += stbuf->f_ffree;
- dict_buf->f_favail += stbuf->f_favail;
- dict_buf->f_fsid = stbuf->f_fsid;
- dict_buf->f_flag = stbuf->f_flag;
- dict_buf->f_namemax = stbuf->f_namemax;
- local->op_ret = 0;
- }
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- STRIPE_STACK_UNWIND(statfs, frame, local->op_ret, local->op_errno,
- &local->statvfs_buf, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- stripe_private_t *priv = NULL;
- int32_t op_errno = EINVAL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
-
- trav = this->children;
- priv = this->private;
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- local->op_errno = ENOTCONN;
- frame->local = local;
-
- local->call_count = priv->child_count;
- while (trav) {
- STACK_WIND(frame, stripe_statfs_cbk, trav->xlator,
- trav->xlator->fops->statfs, loc, NULL);
- trav = trav->next;
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(statfs, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- local->op_errno = op_errno;
- if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this)))
- local->failed = 1;
- }
-
- if (op_ret == 0) {
- local->op_ret = 0;
- if (FIRST_CHILD(this) == prev->this) {
- local->pre_buf = *prebuf;
- local->post_buf = *postbuf;
- }
-
- local->prebuf_blocks += prebuf->ia_blocks;
- local->postbuf_blocks += postbuf->ia_blocks;
-
- correct_file_size(prebuf, local->fctx, prev);
- correct_file_size(postbuf, local->fctx, prev);
-
- if (local->prebuf_size < prebuf->ia_size)
- local->prebuf_size = prebuf->ia_size;
-
- if (local->postbuf_size < postbuf->ia_size)
- local->postbuf_size = postbuf->ia_size;
- }
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- if (local->op_ret != -1) {
- local->pre_buf.ia_blocks = local->prebuf_blocks;
- local->pre_buf.ia_size = local->prebuf_size;
- local->post_buf.ia_blocks = local->postbuf_blocks;
- local->post_buf.ia_size = local->postbuf_size;
- }
-
- STRIPE_STACK_UNWIND(truncate, frame, local->op_ret, local->op_errno,
- &local->pre_buf, &local->post_buf, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
- dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- stripe_fd_ctx_t *fctx = NULL;
- int32_t op_errno = EINVAL;
- int i, eof_idx;
- off_t dest_offset, tmp_offset;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
- VALIDATE_OR_GOTO(loc->path, err);
- VALIDATE_OR_GOTO(loc->inode, err);
-
- priv = this->private;
-
- if (priv->first_child_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->call_count = priv->child_count;
-
- inode_ctx_get(loc->inode, this, (uint64_t *)&fctx);
- if (!fctx) {
- gf_log(this->name, GF_LOG_ERROR, "no stripe context");
- op_errno = EINVAL;
- goto err;
- }
-
- local->fctx = fctx;
- eof_idx = (offset / fctx->stripe_size) % fctx->stripe_count;
-
- for (i = 0; i < fctx->stripe_count; i++) {
- if (!fctx->xl_array[i]) {
- gf_log(this->name, GF_LOG_ERROR, "no xlator at index %d", i);
- op_errno = EINVAL;
- goto err;
- }
-
- if (fctx->stripe_coalesce) {
- /*
- * The node that owns EOF is truncated to the exact
- * coalesced offset. Nodes prior to this index should
- * be rounded up to the size of the complete stripe,
- * while nodes after this index should be rounded down
- * to the size of the previous stripe.
- */
- if (i < eof_idx)
- tmp_offset = gf_roof(offset,
- fctx->stripe_size * fctx->stripe_count);
- else if (i > eof_idx)
- tmp_offset = gf_floor(offset,
- fctx->stripe_size * fctx->stripe_count);
- else
- tmp_offset = offset;
-
- dest_offset = coalesced_offset(tmp_offset, fctx->stripe_size,
- fctx->stripe_count);
- } else {
- dest_offset = offset;
- }
-
- STACK_WIND(frame, stripe_truncate_cbk, fctx->xl_array[i],
- fctx->xl_array[i]->fops->truncate, loc, dest_offset, NULL);
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(truncate, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preop,
- struct iatt *postop, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- local->op_errno = op_errno;
- if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this)))
- local->failed = 1;
- }
-
- if (op_ret == 0) {
- local->op_ret = 0;
-
- if (FIRST_CHILD(this) == prev->this) {
- local->pre_buf = *preop;
- local->post_buf = *postop;
- }
-
- local->prebuf_blocks += preop->ia_blocks;
- local->postbuf_blocks += postop->ia_blocks;
-
- correct_file_size(preop, local->fctx, prev);
- correct_file_size(postop, local->fctx, prev);
-
- if (local->prebuf_size < preop->ia_size)
- local->prebuf_size = preop->ia_size;
- if (local->postbuf_size < postop->ia_size)
- local->postbuf_size = postop->ia_size;
- }
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- if (local->op_ret != -1) {
- local->pre_buf.ia_blocks = local->prebuf_blocks;
- local->pre_buf.ia_size = local->prebuf_size;
- local->post_buf.ia_blocks = local->postbuf_blocks;
- local->post_buf.ia_size = local->postbuf_size;
- }
-
- STRIPE_STACK_UNWIND(setattr, frame, local->op_ret, local->op_errno,
- &local->pre_buf, &local->post_buf, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid, dict_t *xdata)
-{
- xlator_list_t *trav = NULL;
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- stripe_fd_ctx_t *fctx = NULL;
- int32_t op_errno = EINVAL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
- VALIDATE_OR_GOTO(loc->path, err);
- VALIDATE_OR_GOTO(loc->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- if (priv->first_child_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- if (!IA_ISDIR(loc->inode->ia_type) && !IA_ISREG(loc->inode->ia_type)) {
- local->call_count = 1;
- STACK_WIND(frame, stripe_setattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, NULL);
- return 0;
- }
-
- if (IA_ISREG(loc->inode->ia_type)) {
- inode_ctx_get(loc->inode, this, (uint64_t *)&fctx);
- if (!fctx)
- goto err;
- local->fctx = fctx;
- }
-
- local->call_count = priv->child_count;
- while (trav) {
- STACK_WIND(frame, stripe_setattr_cbk, trav->xlator,
- trav->xlator->fops->setattr, loc, stbuf, valid, NULL);
- trav = trav->next;
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(setattr, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- int32_t op_errno = EINVAL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(fd, err);
- VALIDATE_OR_GOTO(fd->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->call_count = priv->child_count;
-
- while (trav) {
- STACK_WIND(frame, stripe_setattr_cbk, trav->xlator,
- trav->xlator->fops->fsetattr, fd, stbuf, valid, NULL);
- trav = trav->next;
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(fsetattr, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_stack_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent,
- dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- local->op_errno = op_errno;
- if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this)))
- local->failed = 1;
- }
-
- if (op_ret == 0) {
- local->op_ret = 0;
-
- local->stbuf.ia_blocks += buf->ia_blocks;
- local->preparent.ia_blocks += preoldparent->ia_blocks;
- local->postparent.ia_blocks += postoldparent->ia_blocks;
- local->pre_buf.ia_blocks += prenewparent->ia_blocks;
- local->post_buf.ia_blocks += postnewparent->ia_blocks;
-
- correct_file_size(buf, local->fctx, prev);
-
- if (local->stbuf.ia_size < buf->ia_size)
- local->stbuf.ia_size = buf->ia_size;
-
- if (local->preparent.ia_size < preoldparent->ia_size)
- local->preparent.ia_size = preoldparent->ia_size;
-
- if (local->postparent.ia_size < postoldparent->ia_size)
- local->postparent.ia_size = postoldparent->ia_size;
-
- if (local->pre_buf.ia_size < prenewparent->ia_size)
- local->pre_buf.ia_size = prenewparent->ia_size;
-
- if (local->post_buf.ia_size < postnewparent->ia_size)
- local->post_buf.ia_size = postnewparent->ia_size;
- }
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- STRIPE_STACK_UNWIND(rename, frame, local->op_ret, local->op_errno,
- &local->stbuf, &local->preparent,
- &local->postparent, &local->pre_buf,
- &local->post_buf, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_first_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent,
- dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
-
- if (!this || !frame || !frame->local) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- op_errno = EINVAL;
- goto unwind;
- }
-
- if (op_ret == -1) {
- goto unwind;
- }
-
- local = frame->local;
- trav = this->children;
-
- local->stbuf = *buf;
- local->preparent = *preoldparent;
- local->postparent = *postoldparent;
- local->pre_buf = *prenewparent;
- local->post_buf = *postnewparent;
-
- local->op_ret = 0;
- local->call_count--;
-
- trav = trav->next; /* Skip first child */
- while (trav) {
- STACK_WIND(frame, stripe_stack_rename_cbk, trav->xlator,
- trav->xlator->fops->rename, &local->loc, &local->loc2, NULL);
- trav = trav->next;
- }
- return 0;
-
-unwind:
- STRIPE_STACK_UNWIND(rename, frame, -1, op_errno, buf, preoldparent,
- postoldparent, prenewparent, postnewparent, NULL);
- return 0;
-}
-
-int32_t
-stripe_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
- dict_t *xdata)
-{
- stripe_private_t *priv = NULL;
- stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- stripe_fd_ctx_t *fctx = NULL;
- int32_t op_errno = EINVAL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(oldloc, err);
- VALIDATE_OR_GOTO(oldloc->path, err);
- VALIDATE_OR_GOTO(oldloc->inode, err);
- VALIDATE_OR_GOTO(newloc, err);
-
- priv = this->private;
- trav = this->children;
-
- /* If any one node is down, don't allow rename */
- if (priv->nodes_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- frame->local = local;
-
- local->op_ret = -1;
- loc_copy(&local->loc, oldloc);
- loc_copy(&local->loc2, newloc);
-
- local->call_count = priv->child_count;
-
- if (IA_ISREG(oldloc->inode->ia_type)) {
- inode_ctx_get(oldloc->inode, this, (uint64_t *)&fctx);
- if (!fctx)
- goto err;
- local->fctx = fctx;
- }
-
- STACK_WIND(frame, stripe_first_rename_cbk, trav->xlator,
- trav->xlator->fops->rename, oldloc, newloc, NULL);
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(rename, frame, -1, op_errno, NULL, NULL, NULL, NULL,
- NULL, NULL);
- return 0;
-}
-int32_t
-stripe_first_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned %s", prev->this->name,
- strerror(op_errno));
- goto out;
- }
- local->op_ret = 0;
- local->preparent = *preparent;
- local->postparent = *postparent;
- local->preparent_blocks += preparent->ia_blocks;
- local->postparent_blocks += postparent->ia_blocks;
-
- STRIPE_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
- &local->preparent, &local->postparent, xdata);
- return 0;
-out:
- STRIPE_STACK_UNWIND(unlink, frame, -1, op_errno, NULL, NULL, NULL);
-
- return 0;
-}
-
-int32_t
-stripe_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned %s", prev->this->name,
- strerror(op_errno));
- local->op_errno = op_errno;
- if (op_errno != ENOENT) {
- local->failed = 1;
- local->op_ret = op_ret;
- }
- }
- }
- UNLOCK(&frame->lock);
-
- if (callcnt == 1) {
- if (local->failed) {
- op_errno = local->op_errno;
- goto out;
- }
- STACK_WIND(frame, stripe_first_unlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink, &local->loc, local->xflag,
- local->xdata);
- }
- return 0;
-out:
- STRIPE_STACK_UNWIND(unlink, frame, -1, op_errno, NULL, NULL, NULL);
-
- return 0;
-}
-
-int32_t
-stripe_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
- dict_t *xdata)
-{
- xlator_list_t *trav = NULL;
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- int32_t op_errno = EINVAL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
- VALIDATE_OR_GOTO(loc->path, err);
- VALIDATE_OR_GOTO(loc->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- if (priv->first_child_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- /* Don't unlink a file if a node is down */
- if (priv->nodes_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- loc_copy(&local->loc, loc);
- local->xflag = xflag;
-
- if (xdata)
- local->xdata = dict_ref(xdata);
-
- frame->local = local;
- local->call_count = priv->child_count;
- trav = trav->next; /* Skip the first child */
-
- while (trav) {
- STACK_WIND(frame, stripe_unlink_cbk, trav->xlator,
- trav->xlator->fops->unlink, loc, xflag, xdata);
- trav = trav->next;
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(unlink, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_first_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
-
- if (!this || !frame || !frame->local) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- op_errno = EINVAL;
- goto err;
- }
-
- if (op_ret == -1) {
- goto err;
- }
-
- local = frame->local;
- local->op_ret = 0;
-
- local->call_count--; /* First child successful */
-
- local->preparent = *preparent;
- local->postparent = *postparent;
- local->preparent_size = preparent->ia_size;
- local->postparent_size = postparent->ia_size;
- local->preparent_blocks += preparent->ia_blocks;
- local->postparent_blocks += postparent->ia_blocks;
-
- STRIPE_STACK_UNWIND(rmdir, frame, local->op_ret, local->op_errno,
- &local->preparent, &local->postparent, xdata);
- return 0;
-err:
- STRIPE_STACK_UNWIND(rmdir, frame, op_ret, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned %s", prev->this->name,
- strerror(op_errno));
- if (op_errno != ENOENT)
- local->failed = 1;
- }
- }
- UNLOCK(&frame->lock);
-
- if (callcnt == 1) {
- if (local->failed)
- goto out;
- STACK_WIND(frame, stripe_first_rmdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rmdir, &local->loc, local->flags,
- NULL);
- }
- return 0;
-out:
- STRIPE_STACK_UNWIND(rmdir, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
- dict_t *xdata)
-{
- xlator_list_t *trav = NULL;
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- int32_t op_errno = EINVAL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
- VALIDATE_OR_GOTO(loc->path, err);
- VALIDATE_OR_GOTO(loc->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- /* don't delete a directory if any of the subvolume is down */
- if (priv->nodes_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- loc_copy(&local->loc, loc);
- local->flags = flags;
- local->call_count = priv->child_count;
- trav = trav->next; /* skip the first child */
-
- while (trav) {
- STACK_WIND(frame, stripe_rmdir_cbk, trav->xlator,
- trav->xlator->fops->rmdir, loc, flags, NULL);
- trav = trav->next;
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(rmdir, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_mknod_ifreg_fail_unlink_cbk(call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
-
- if (!this || !frame || !frame->local) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- STRIPE_STACK_UNWIND(mknod, frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf, &local->preparent,
- &local->postparent, NULL);
- }
-out:
- return 0;
-}
-
-/**
- */
-int32_t
-stripe_mknod_ifreg_setxattr_cbk(call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- priv = this->private;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- local->op_ret = -1;
- local->op_errno = op_errno;
- }
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (local->op_ret == -1) {
- local->call_count = priv->child_count;
- while (trav) {
- STACK_WIND(frame, stripe_mknod_ifreg_fail_unlink_cbk,
- trav->xlator, trav->xlator->fops->unlink,
- &local->loc, 0, NULL);
- trav = trav->next;
- }
- return 0;
- }
-
- STRIPE_STACK_UNWIND(mknod, frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf, &local->preparent,
- &local->postparent, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_mknod_ifreg_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- call_frame_t *prev = NULL;
- xlator_list_t *trav = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- priv = this->private;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this)))
- local->failed = 1;
- local->op_errno = op_errno;
- }
- if (op_ret >= 0) {
- local->op_ret = op_ret;
-
- /* Can be used as a mechanism to understand if mknod
- was successful in at least one place */
- if (gf_uuid_is_null(local->ia_gfid))
- gf_uuid_copy(local->ia_gfid, buf->ia_gfid);
-
- if (stripe_ctx_handle(this, prev, local, xdata))
- gf_log(this->name, GF_LOG_ERROR,
- "Error getting fctx info from dict");
-
- local->stbuf_blocks += buf->ia_blocks;
- local->preparent_blocks += preparent->ia_blocks;
- local->postparent_blocks += postparent->ia_blocks;
-
- correct_file_size(buf, local->fctx, prev);
-
- if (local->stbuf_size < buf->ia_size)
- local->stbuf_size = buf->ia_size;
- if (local->preparent_size < preparent->ia_size)
- local->preparent_size = preparent->ia_size;
- if (local->postparent_size < postparent->ia_size)
- local->postparent_size = postparent->ia_size;
- }
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- if ((local->op_ret == -1) && !gf_uuid_is_null(local->ia_gfid)) {
- /* ia_gfid set means, at least on one node 'mknod'
- is successful */
- local->call_count = priv->child_count;
- trav = this->children;
- while (trav) {
- STACK_WIND(frame, stripe_mknod_ifreg_fail_unlink_cbk,
- trav->xlator, trav->xlator->fops->unlink,
- &local->loc, 0, NULL);
- trav = trav->next;
- }
- return 0;
- }
-
- if (local->op_ret != -1) {
- local->preparent.ia_blocks = local->preparent_blocks;
- local->preparent.ia_size = local->preparent_size;
- local->postparent.ia_blocks = local->postparent_blocks;
- local->postparent.ia_size = local->postparent_size;
- local->stbuf.ia_size = local->stbuf_size;
- local->stbuf.ia_blocks = local->stbuf_blocks;
- inode_ctx_put(local->inode, this, (uint64_t)(long)local->fctx);
- }
- STRIPE_STACK_UNWIND(mknod, frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf, &local->preparent,
- &local->postparent, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_mknod_first_ifreg_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- call_frame_t *prev = NULL;
- xlator_list_t *trav = NULL;
- int i = 1;
- dict_t *dict = NULL;
- int ret = 0;
- int need_unref = 0;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- priv = this->private;
- local = frame->local;
- trav = this->children;
-
- local->call_count--;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- local->failed = 1;
- local->op_errno = op_errno;
- goto out;
- }
-
- local->op_ret = op_ret;
-
- local->stbuf = *buf;
- local->preparent = *preparent;
- local->postparent = *postparent;
-
- if (gf_uuid_is_null(local->ia_gfid))
- gf_uuid_copy(local->ia_gfid, buf->ia_gfid);
- local->preparent.ia_blocks = local->preparent_blocks;
- local->preparent.ia_size = local->preparent_size;
- local->postparent.ia_blocks = local->postparent_blocks;
- local->postparent.ia_size = local->postparent_size;
- local->stbuf.ia_size = local->stbuf_size;
- local->stbuf.ia_blocks = local->stbuf_blocks;
-
- trav = trav->next;
- while (trav) {
- if (priv->xattr_supported) {
- dict = dict_new();
- if (!dict) {
- gf_log(this->name, GF_LOG_ERROR, "failed to allocate dict %s",
- local->loc.path);
- }
- need_unref = 1;
-
- dict_copy(local->xattr, dict);
-
- ret = stripe_xattr_request_build(this, dict, local->stripe_size,
- priv->child_count, i,
- priv->coalesce);
- if (ret)
- gf_log(this->name, GF_LOG_ERROR,
- "Failed to build xattr request");
-
- } else {
- dict = local->xattr;
- }
-
- STACK_WIND(frame, stripe_mknod_ifreg_cbk, trav->xlator,
- trav->xlator->fops->mknod, &local->loc, local->mode,
- local->rdev, 0, dict);
- trav = trav->next;
- i++;
-
- if (dict && need_unref)
- dict_unref(dict);
- }
-
- return 0;
-
-out:
-
- STRIPE_STACK_UNWIND(mknod, frame, op_ret, op_errno, NULL, NULL, NULL, NULL,
- NULL);
- return 0;
-}
-
-int32_t
-stripe_single_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- STRIPE_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, buf, preparent,
- postparent, xdata);
- return 0;
-}
-
-int
-stripe_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dev_t rdev, mode_t umask, dict_t *xdata)
-{
- stripe_private_t *priv = NULL;
- stripe_local_t *local = NULL;
- int32_t op_errno = EINVAL;
- int32_t i = 0;
- dict_t *dict = NULL;
- int ret = 0;
- int need_unref = 0;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
- VALIDATE_OR_GOTO(loc->path, err);
- VALIDATE_OR_GOTO(loc->inode, err);
-
- priv = this->private;
-
- if (priv->first_child_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- if (S_ISREG(mode)) {
- /* NOTE: on older kernels (older than 2.6.9),
- creat() fops is sent as mknod() + open(). Hence handling
- S_IFREG files is necessary */
- if (priv->nodes_down) {
- gf_log(this->name, GF_LOG_WARNING, "Some node down, returning EIO");
- op_errno = EIO;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- local->op_errno = ENOTCONN;
- local->stripe_size = stripe_get_matching_bs(loc->path, priv);
- frame->local = local;
- local->inode = inode_ref(loc->inode);
- loc_copy(&local->loc, loc);
- local->xattr = dict_copy_with_ref(xdata, NULL);
- local->mode = mode;
- local->umask = umask;
- local->rdev = rdev;
-
- /* Every time in stripe lookup, all child nodes should
- be looked up */
- local->call_count = priv->child_count;
-
- if (priv->xattr_supported) {
- dict = dict_new();
- if (!dict) {
- gf_log(this->name, GF_LOG_ERROR, "failed to allocate dict %s",
- loc->path);
- }
- need_unref = 1;
-
- dict_copy(xdata, dict);
-
- ret = stripe_xattr_request_build(this, dict, local->stripe_size,
- priv->child_count, i,
- priv->coalesce);
- if (ret)
- gf_log(this->name, GF_LOG_ERROR,
- "failed to build xattr request");
- } else {
- dict = xdata;
- }
-
- STACK_WIND(frame, stripe_mknod_first_ifreg_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask,
- dict);
-
- if (dict && need_unref)
- dict_unref(dict);
- return 0;
- }
-
- STACK_WIND(frame, stripe_single_mknod_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata);
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL,
- NULL);
- return 0;
-}
-
-int32_t
-stripe_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- local->op_errno = op_errno;
- if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this)))
- local->failed = 1;
- }
-
- if (op_ret >= 0) {
- local->op_ret = 0;
-
- local->stbuf_blocks += buf->ia_blocks;
- local->preparent_blocks += preparent->ia_blocks;
- local->postparent_blocks += postparent->ia_blocks;
-
- if (local->stbuf_size < buf->ia_size)
- local->stbuf_size = buf->ia_size;
- if (local->preparent_size < preparent->ia_size)
- local->preparent_size = preparent->ia_size;
- if (local->postparent_size < postparent->ia_size)
- local->postparent_size = postparent->ia_size;
- }
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (local->failed != -1) {
- local->preparent.ia_blocks = local->preparent_blocks;
- local->preparent.ia_size = local->preparent_size;
- local->postparent.ia_blocks = local->postparent_blocks;
- local->postparent.ia_size = local->postparent_size;
- local->stbuf.ia_size = local->stbuf_size;
- local->stbuf.ia_blocks = local->stbuf_blocks;
- }
- STRIPE_STACK_UNWIND(mkdir, frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf, &local->preparent,
- &local->postparent, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_first_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
- xlator_list_t *trav = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
- trav = this->children;
-
- local->call_count--; /* first child is successful */
- trav = trav->next; /* skip first child */
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- local->op_errno = op_errno;
- goto out;
- }
-
- local->op_ret = 0;
-
- local->inode = inode_ref(inode);
- local->stbuf = *buf;
- local->postparent = *postparent;
- local->preparent = *preparent;
-
- local->stbuf_blocks += buf->ia_blocks;
- local->preparent_blocks += preparent->ia_blocks;
- local->postparent_blocks += postparent->ia_blocks;
-
- local->stbuf_size = buf->ia_size;
- local->preparent_size = preparent->ia_size;
- local->postparent_size = postparent->ia_size;
-
- while (trav) {
- STACK_WIND(frame, stripe_mkdir_cbk, trav->xlator,
- trav->xlator->fops->mkdir, &local->loc, local->mode,
- local->umask, local->xdata);
- trav = trav->next;
- }
- return 0;
-out:
- STRIPE_STACK_UNWIND(mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL,
- NULL);
-
- return 0;
-}
-
-int
-stripe_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- mode_t umask, dict_t *xdata)
-{
- stripe_private_t *priv = NULL;
- stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- int32_t op_errno = 1;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
- VALIDATE_OR_GOTO(loc->path, err);
- VALIDATE_OR_GOTO(loc->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- if (priv->first_child_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- local->call_count = priv->child_count;
- if (xdata)
- local->xdata = dict_ref(xdata);
- local->mode = mode;
- local->umask = umask;
- loc_copy(&local->loc, loc);
- frame->local = local;
-
- /* Every time in stripe lookup, all child nodes should be looked up */
- STACK_WIND(frame, stripe_first_mkdir_cbk, trav->xlator,
- trav->xlator->fops->mkdir, loc, mode, umask, xdata);
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL,
- NULL);
- return 0;
-}
-
-int32_t
-stripe_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
- stripe_fd_ctx_t *fctx = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- local->op_errno = op_errno;
- if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this)))
- local->failed = 1;
- }
-
- if (op_ret >= 0) {
- local->op_ret = 0;
-
- if (IA_ISREG(inode->ia_type)) {
- inode_ctx_get(inode, this, (uint64_t *)&fctx);
- if (!fctx) {
- gf_log(this->name, GF_LOG_ERROR,
- "failed to get stripe context");
- op_ret = -1;
- op_errno = EINVAL;
- }
- }
-
- if (FIRST_CHILD(this) == prev->this) {
- local->inode = inode_ref(inode);
- local->stbuf = *buf;
- local->postparent = *postparent;
- local->preparent = *preparent;
- }
- local->stbuf_blocks += buf->ia_blocks;
- local->preparent_blocks += preparent->ia_blocks;
- local->postparent_blocks += postparent->ia_blocks;
-
- correct_file_size(buf, fctx, prev);
-
- if (local->stbuf_size < buf->ia_size)
- local->stbuf_size = buf->ia_size;
- if (local->preparent_size < preparent->ia_size)
- local->preparent_size = preparent->ia_size;
- if (local->postparent_size < postparent->ia_size)
- local->postparent_size = postparent->ia_size;
- }
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- if (local->op_ret != -1) {
- local->preparent.ia_blocks = local->preparent_blocks;
- local->preparent.ia_size = local->preparent_size;
- local->postparent.ia_blocks = local->postparent_blocks;
- local->postparent.ia_size = local->postparent_size;
- local->stbuf.ia_size = local->stbuf_size;
- local->stbuf.ia_blocks = local->stbuf_blocks;
- }
- STRIPE_STACK_UNWIND(link, frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf, &local->preparent,
- &local->postparent, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
- dict_t *xdata)
-{
- xlator_list_t *trav = NULL;
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- int32_t op_errno = 1;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(oldloc, err);
- VALIDATE_OR_GOTO(oldloc->path, err);
- VALIDATE_OR_GOTO(oldloc->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- /* If any one node is down, don't allow link operation */
- if (priv->nodes_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->call_count = priv->child_count;
-
- /* Every time in stripe lookup, all child
- nodes should be looked up */
- while (trav) {
- STACK_WIND(frame, stripe_link_cbk, trav->xlator,
- trav->xlator->fops->link, oldloc, newloc, NULL);
- trav = trav->next;
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(link, frame, -1, op_errno, NULL, NULL, NULL, NULL,
- NULL);
- return 0;
-}
-
-int32_t
-stripe_create_fail_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
-
- if (!this || !frame || !frame->local) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- STRIPE_STACK_UNWIND(create, frame, local->op_ret, local->op_errno,
- local->fd, local->inode, &local->stbuf,
- &local->preparent, &local->postparent, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- call_frame_t *prev = NULL;
- xlator_list_t *trav = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- priv = this->private;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- local->failed = 1;
- local->op_errno = op_errno;
- }
-
- if (op_ret >= 0) {
- if (IA_ISREG(buf->ia_type)) {
- if (stripe_ctx_handle(this, prev, local, xdata))
- gf_log(this->name, GF_LOG_ERROR,
- "Error getting fctx info from "
- "dict");
- }
-
- local->op_ret = op_ret;
-
- local->stbuf_blocks += buf->ia_blocks;
- local->preparent_blocks += preparent->ia_blocks;
- local->postparent_blocks += postparent->ia_blocks;
-
- correct_file_size(buf, local->fctx, prev);
-
- if (local->stbuf_size < buf->ia_size)
- local->stbuf_size = buf->ia_size;
- if (local->preparent_size < preparent->ia_size)
- local->preparent_size = preparent->ia_size;
- if (local->postparent_size < postparent->ia_size)
- local->postparent_size = postparent->ia_size;
- }
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- if (local->op_ret == -1) {
- local->call_count = priv->child_count;
- trav = this->children;
- while (trav) {
- STACK_WIND(frame, stripe_create_fail_unlink_cbk, trav->xlator,
- trav->xlator->fops->unlink, &local->loc, 0, NULL);
- trav = trav->next;
- }
-
- return 0;
- }
-
- if (local->op_ret >= 0) {
- local->preparent.ia_blocks = local->preparent_blocks;
- local->preparent.ia_size = local->preparent_size;
- local->postparent.ia_blocks = local->postparent_blocks;
- local->postparent.ia_size = local->postparent_size;
- local->stbuf.ia_size = local->stbuf_size;
- local->stbuf.ia_blocks = local->stbuf_blocks;
-
- stripe_copy_xl_array(local->fctx->xl_array, priv->xl_array,
- local->fctx->stripe_count);
- inode_ctx_put(local->inode, this, (uint64_t)(uintptr_t)local->fctx);
- }
-
- /* Create itself has failed.. so return
- without setxattring */
- STRIPE_STACK_UNWIND(create, frame, local->op_ret, local->op_errno,
- local->fd, local->inode, &local->stbuf,
- &local->preparent, &local->postparent, NULL);
- }
-
-out:
- return 0;
-}
-
-int32_t
-stripe_first_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd,
- inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- call_frame_t *prev = NULL;
- xlator_list_t *trav = NULL;
- int i = 1;
- dict_t *dict = NULL;
- loc_t *loc = NULL;
- int32_t need_unref = 0;
- int32_t ret = -1;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- priv = this->private;
- local = frame->local;
- trav = this->children;
- loc = &local->loc;
-
- --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- local->failed = 1;
- local->op_errno = op_errno;
- }
-
- local->op_ret = 0;
- /* Get the mapping in inode private */
- /* Get the stat buf right */
- local->stbuf = *buf;
- local->preparent = *preparent;
- local->postparent = *postparent;
-
- local->stbuf_blocks += buf->ia_blocks;
- local->preparent_blocks += preparent->ia_blocks;
- local->postparent_blocks += postparent->ia_blocks;
-
- if (local->stbuf_size < buf->ia_size)
- local->stbuf_size = buf->ia_size;
- if (local->preparent_size < preparent->ia_size)
- local->preparent_size = preparent->ia_size;
- if (local->postparent_size < postparent->ia_size)
- local->postparent_size = postparent->ia_size;
-
- if (local->failed)
- local->op_ret = -1;
-
- if (local->op_ret == -1) {
- local->call_count = 1;
- STACK_WIND(frame, stripe_create_fail_unlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink, &local->loc, 0, NULL);
- return 0;
- }
-
- if (local->op_ret >= 0) {
- local->preparent.ia_blocks = local->preparent_blocks;
- local->preparent.ia_size = local->preparent_size;
- local->postparent.ia_blocks = local->postparent_blocks;
- local->postparent.ia_size = local->postparent_size;
- local->stbuf.ia_size = local->stbuf_size;
- local->stbuf.ia_blocks = local->stbuf_blocks;
- }
-
- /* Send a setxattr request to nodes where the
- files are created */
- trav = trav->next;
- while (trav) {
- if (priv->xattr_supported) {
- dict = dict_new();
- if (!dict) {
- gf_log(this->name, GF_LOG_ERROR, "failed to allocate dict %s",
- loc->path);
- }
- need_unref = 1;
-
- dict_copy(local->xattr, dict);
-
- ret = stripe_xattr_request_build(this, dict, local->stripe_size,
- priv->child_count, i,
- priv->coalesce);
- if (ret)
- gf_log(this->name, GF_LOG_ERROR,
- "failed to build xattr request");
- } else {
- dict = local->xattr;
- }
-
- STACK_WIND(frame, stripe_create_cbk, trav->xlator,
- trav->xlator->fops->create, &local->loc, local->flags,
- local->mode, local->umask, local->fd, dict);
- trav = trav->next;
- if (need_unref && dict)
- dict_unref(dict);
- i++;
- }
-
-out:
- return 0;
-}
-
-/**
- * stripe_create - If a block-size is specified for the 'name', create the
- * file in all the child nodes. If not, create it in only first child.
- *
- * @name- complete path of the file to be created.
- */
-int32_t
-stripe_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
-{
- stripe_private_t *priv = NULL;
- stripe_local_t *local = NULL;
- int32_t op_errno = EINVAL;
- int ret = 0;
- int need_unref = 0;
- int i = 0;
- dict_t *dict = NULL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
- VALIDATE_OR_GOTO(loc->path, err);
- VALIDATE_OR_GOTO(loc->inode, err);
-
- priv = this->private;
-
- /* files created in O_APPEND mode does not allow lseek() on fd */
- flags &= ~O_APPEND;
-
- if (priv->first_child_down || priv->nodes_down) {
- gf_log(this->name, GF_LOG_DEBUG, "First node down, returning EIO");
- op_errno = EIO;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- local->op_errno = ENOTCONN;
- local->stripe_size = stripe_get_matching_bs(loc->path, priv);
- frame->local = local;
- local->inode = inode_ref(loc->inode);
- loc_copy(&local->loc, loc);
- local->fd = fd_ref(fd);
- local->flags = flags;
- local->mode = mode;
- local->umask = umask;
- if (xdata)
- local->xattr = dict_ref(xdata);
-
- local->call_count = priv->child_count;
- /* Send a setxattr request to nodes where the
- files are created */
-
- if (priv->xattr_supported) {
- dict = dict_new();
- if (!dict) {
- gf_log(this->name, GF_LOG_ERROR, "failed to allocate dict %s",
- loc->path);
- }
- need_unref = 1;
-
- dict_copy(xdata, dict);
-
- ret = stripe_xattr_request_build(this, dict, local->stripe_size,
- priv->child_count, i, priv->coalesce);
- if (ret)
- gf_log(this->name, GF_LOG_ERROR, "failed to build xattr request");
- } else {
- dict = xdata;
- }
-
- STACK_WIND(frame, stripe_first_create_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
- dict);
-
- if (need_unref && dict)
- dict_unref(dict);
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL,
- NULL, xdata);
- return 0;
-}
-
-int32_t
-stripe_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this)))
- local->failed = 1;
- local->op_errno = op_errno;
- }
-
- if (op_ret >= 0)
- local->op_ret = op_ret;
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- STRIPE_STACK_UNWIND(open, frame, local->op_ret, local->op_errno,
- local->fd, xdata);
- }
-out:
- return 0;
-}
-
-/**
- * stripe_open -
- */
-int32_t
-stripe_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- int32_t op_errno = 1;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
- VALIDATE_OR_GOTO(loc->path, err);
- VALIDATE_OR_GOTO(loc->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- if (priv->first_child_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- /* files opened in O_APPEND mode does not allow lseek() on fd */
- flags &= ~O_APPEND;
-
- local->fd = fd_ref(fd);
- frame->local = local;
- loc_copy(&local->loc, loc);
-
- /* Striped files */
- local->flags = flags;
- local->call_count = priv->child_count;
- local->stripe_size = stripe_get_matching_bs(loc->path, priv);
-
- while (trav) {
- STACK_WIND(frame, stripe_open_cbk, trav->xlator,
- trav->xlator->fops->open, &local->loc, local->flags,
- local->fd, xdata);
- trav = trav->next;
- }
- return 0;
-err:
- STRIPE_STACK_UNWIND(open, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- local->op_ret = -1;
- local->op_errno = op_errno;
- }
-
- if (op_ret >= 0)
- local->op_ret = op_ret;
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- STRIPE_STACK_UNWIND(opendir, frame, local->op_ret, local->op_errno,
- local->fd, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
- dict_t *xdata)
-{
- xlator_list_t *trav = NULL;
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- int32_t op_errno = EINVAL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
- VALIDATE_OR_GOTO(loc->path, err);
- VALIDATE_OR_GOTO(loc->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- if (priv->first_child_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- frame->local = local;
- local->call_count = priv->child_count;
- local->fd = fd_ref(fd);
-
- while (trav) {
- STACK_WIND(frame, stripe_opendir_cbk, trav->xlator,
- trav->xlator->fops->opendir, loc, fd, NULL);
- trav = trav->next;
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(opendir, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_lk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- local->op_errno = op_errno;
- if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this)))
- local->failed = 1;
- }
- if (op_ret >= 0) {
- if (FIRST_CHILD(this) == prev->this) {
- /* First successful call, copy the *lock */
- local->op_ret = op_ret;
- local->lock = *lock;
- }
- }
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
- STRIPE_STACK_UNWIND(lk, frame, local->op_ret, local->op_errno,
- &local->lock, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
- struct gf_flock *lock, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- stripe_private_t *priv = NULL;
- int32_t op_errno = EINVAL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(fd, err);
- VALIDATE_OR_GOTO(fd->inode, err);
-
- trav = this->children;
- priv = this->private;
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->call_count = priv->child_count;
-
- while (trav) {
- STACK_WIND(frame, stripe_lk_cbk, trav->xlator, trav->xlator->fops->lk,
- fd, cmd, lock, NULL);
- trav = trav->next;
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(lk, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned %s", prev->this->name,
- strerror(op_errno));
- local->op_errno = op_errno;
- if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this)))
- local->failed = 1;
- }
- if (op_ret >= 0)
- local->op_ret = op_ret;
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- STRIPE_STACK_UNWIND(flush, frame, local->op_ret, local->op_errno, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- int32_t op_errno = 1;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(fd, err);
- VALIDATE_OR_GOTO(fd->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- if (priv->first_child_down) {
- op_errno = ENOTCONN;
- goto err;
- }
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->call_count = priv->child_count;
-
- while (trav) {
- STACK_WIND(frame, stripe_flush_cbk, trav->xlator,
- trav->xlator->fops->flush, fd, NULL);
- trav = trav->next;
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(flush, frame, -1, op_errno, NULL);
- return 0;
-}
-
-int32_t
-stripe_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned %s", prev->this->name,
- strerror(op_errno));
- local->op_errno = op_errno;
- if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this)))
- local->failed = 1;
- }
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- if (FIRST_CHILD(this) == prev->this) {
- local->pre_buf = *prebuf;
- local->post_buf = *postbuf;
- }
- local->prebuf_blocks += prebuf->ia_blocks;
- local->postbuf_blocks += postbuf->ia_blocks;
-
- correct_file_size(prebuf, local->fctx, prev);
- correct_file_size(postbuf, local->fctx, prev);
-
- if (local->prebuf_size < prebuf->ia_size)
- local->prebuf_size = prebuf->ia_size;
-
- if (local->postbuf_size < postbuf->ia_size)
- local->postbuf_size = postbuf->ia_size;
- }
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- if (local->op_ret != -1) {
- local->pre_buf.ia_blocks = local->prebuf_blocks;
- local->pre_buf.ia_size = local->prebuf_size;
- local->post_buf.ia_blocks = local->postbuf_blocks;
- local->post_buf.ia_size = local->postbuf_size;
- }
-
- STRIPE_STACK_UNWIND(fsync, frame, local->op_ret, local->op_errno,
- &local->pre_buf, &local->post_buf, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
- dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- stripe_fd_ctx_t *fctx = NULL;
- int32_t op_errno = 1;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(fd, err);
- VALIDATE_OR_GOTO(fd->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- frame->local = local;
-
- inode_ctx_get(fd->inode, this, (uint64_t *)&fctx);
- if (!fctx) {
- op_errno = EINVAL;
- goto err;
- }
- local->fctx = fctx;
- local->op_ret = -1;
- local->call_count = priv->child_count;
-
- while (trav) {
- STACK_WIND(frame, stripe_fsync_cbk, trav->xlator,
- trav->xlator->fops->fsync, fd, flags, NULL);
- trav = trav->next;
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(fsync, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- local->op_errno = op_errno;
- if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this)))
- local->failed = 1;
- }
-
- if (op_ret == 0) {
- local->op_ret = 0;
-
- if (FIRST_CHILD(this) == prev->this)
- local->stbuf = *buf;
-
- local->stbuf_blocks += buf->ia_blocks;
-
- correct_file_size(buf, local->fctx, prev);
-
- if (local->stbuf_size < buf->ia_size)
- local->stbuf_size = buf->ia_size;
- }
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- if (local->op_ret != -1) {
- local->stbuf.ia_size = local->stbuf_size;
- local->stbuf.ia_blocks = local->stbuf_blocks;
- }
-
- STRIPE_STACK_UNWIND(fstat, frame, local->op_ret, local->op_errno,
- &local->stbuf, NULL);
- }
-
-out:
- return 0;
-}
-
-int32_t
-stripe_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- stripe_fd_ctx_t *fctx = NULL;
- int32_t op_errno = 1;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(fd, err);
- VALIDATE_OR_GOTO(fd->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->call_count = priv->child_count;
-
- if (IA_ISREG(fd->inode->ia_type)) {
- inode_ctx_get(fd->inode, this, (uint64_t *)&fctx);
- if (!fctx)
- goto err;
- local->fctx = fctx;
- }
-
- while (trav) {
- STACK_WIND(frame, stripe_fstat_cbk, trav->xlator,
- trav->xlator->fops->fstat, fd, NULL);
- trav = trav->next;
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(fstat, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- stripe_fd_ctx_t *fctx = NULL;
- int i, eof_idx;
- off_t dest_offset, tmp_offset;
- int32_t op_errno = 1;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(fd, err);
- VALIDATE_OR_GOTO(fd->inode, err);
-
- priv = this->private;
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->call_count = priv->child_count;
-
- inode_ctx_get(fd->inode, this, (uint64_t *)&fctx);
- if (!fctx) {
- gf_log(this->name, GF_LOG_ERROR, "no stripe context");
- op_errno = EINVAL;
- goto err;
- }
- if (!fctx->stripe_count) {
- gf_log(this->name, GF_LOG_ERROR, "no stripe count");
- op_errno = EINVAL;
- goto err;
- }
-
- local->fctx = fctx;
- eof_idx = (offset / fctx->stripe_size) % fctx->stripe_count;
-
- for (i = 0; i < fctx->stripe_count; i++) {
- if (!fctx->xl_array[i]) {
- gf_log(this->name, GF_LOG_ERROR,
- "no xlator at index "
- "%d",
- i);
- op_errno = EINVAL;
- goto err;
- }
-
- if (fctx->stripe_coalesce) {
- if (i < eof_idx)
- tmp_offset = gf_roof(offset,
- fctx->stripe_size * fctx->stripe_count);
- else if (i > eof_idx)
- tmp_offset = gf_floor(offset,
- fctx->stripe_size * fctx->stripe_count);
- else
- tmp_offset = offset;
-
- dest_offset = coalesced_offset(tmp_offset, fctx->stripe_size,
- fctx->stripe_count);
- } else {
- dest_offset = offset;
- }
-
- STACK_WIND(frame, stripe_truncate_cbk, fctx->xl_array[i],
- fctx->xl_array[i]->fops->ftruncate, fd, dest_offset, NULL);
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_fsyncdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned %s", prev->this->name,
- strerror(op_errno));
- local->op_errno = op_errno;
- if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this)))
- local->failed = 1;
- }
- if (op_ret >= 0)
- local->op_ret = op_ret;
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- STRIPE_STACK_UNWIND(fsyncdir, frame, local->op_ret, local->op_errno,
- NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
- dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- int32_t op_errno = 1;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(fd, err);
- VALIDATE_OR_GOTO(fd->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->call_count = priv->child_count;
-
- while (trav) {
- STACK_WIND(frame, stripe_fsyncdir_cbk, trav->xlator,
- trav->xlator->fops->fsyncdir, fd, flags, NULL);
- trav = trav->next;
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(fsyncdir, frame, -1, op_errno, NULL);
- return 0;
-}
-
-int32_t
-stripe_readv_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- dict_t *xdata)
-{
- int32_t i = 0;
- int32_t callcnt = 0;
- int32_t count = 0;
- stripe_local_t *local = NULL;
- struct iovec *vec = NULL;
- struct iatt tmp_stbuf = {
- 0,
- };
- struct iobref *tmp_iobref = NULL;
- struct iobuf *iobuf = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- local = frame->local;
- prev = cookie;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret != -1) {
- correct_file_size(buf, local->fctx, prev);
- if (local->stbuf_size < buf->ia_size)
- local->stbuf_size = buf->ia_size;
- }
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- op_ret = 0;
-
- /* Keep extra space for filling in '\0's */
- vec = GF_CALLOC((local->count * 2), sizeof(struct iovec),
- gf_stripe_mt_iovec);
- if (!vec) {
- op_ret = -1;
- goto done;
- }
-
- for (i = 0; i < local->wind_count; i++) {
- if (local->replies[i].op_ret) {
- memcpy((vec + count), local->replies[i].vector,
- (local->replies[i].count * sizeof(struct iovec)));
- count += local->replies[i].count;
- op_ret += local->replies[i].op_ret;
- }
- if ((local->replies[i].op_ret < local->replies[i].requested_size) &&
- (local->stbuf_size > (local->offset + op_ret))) {
- /* Fill in 0s here */
- vec[count].iov_len = (local->replies[i].requested_size -
- local->replies[i].op_ret);
- iobuf = iobuf_get2(this->ctx->iobuf_pool, vec[count].iov_len);
- if (!iobuf) {
- gf_log(this->name, GF_LOG_ERROR, "Out of memory.");
- op_ret = -1;
- op_errno = ENOMEM;
- goto done;
- }
- memset(iobuf->ptr, 0, vec[count].iov_len);
- vec[count].iov_base = iobuf->ptr;
-
- iobref_add(local->iobref, iobuf);
- iobuf_unref(iobuf);
-
- op_ret += vec[count].iov_len;
- count++;
- }
- GF_FREE(local->replies[i].vector);
- }
-
- /* ENOENT signals EOF to the NFS-server */
- if (op_ret != -1 && op_ret < local->readv_size &&
- (local->offset + op_ret == buf->ia_size))
- op_errno = ENOENT;
-
- /* FIXME: notice that st_ino, and st_dev (gen) will be
- * different than what inode will have. Make sure this doesn't
- * cause any bugs at higher levels */
- memcpy(&tmp_stbuf, &local->replies[0].stbuf, sizeof(struct iatt));
- tmp_stbuf.ia_size = local->stbuf_size;
-
- done:
- GF_FREE(local->replies);
- tmp_iobref = local->iobref;
- STRIPE_STACK_UNWIND(readv, frame, op_ret, op_errno, vec, count,
- &tmp_stbuf, tmp_iobref, NULL);
-
- iobref_unref(tmp_iobref);
- GF_FREE(vec);
- }
-out:
- return 0;
-}
-
-/**
- * stripe_readv_cbk - get all the striped reads, and order it properly, send it
- * to above layer after putting it in a single vector.
- */
-int32_t
-stripe_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct iatt *stbuf, struct iobref *iobref,
- dict_t *xdata)
-{
- int32_t index = 0;
- int32_t callcnt = 0;
- int32_t final_count = 0;
- int32_t need_to_check_proper_size = 0;
- call_frame_t *mframe = NULL;
- stripe_local_t *mlocal = NULL;
- stripe_local_t *local = NULL;
- struct iovec *final_vec = NULL;
- struct iatt tmp_stbuf = {
- 0,
- };
- struct iatt *tmp_stbuf_p = NULL; // need it for a warning
- struct iobref *tmp_iobref = NULL;
- stripe_fd_ctx_t *fctx = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto end;
- }
-
- local = frame->local;
- index = local->node_index;
- prev = cookie;
- mframe = local->orig_frame;
- if (!mframe)
- goto out;
-
- mlocal = mframe->local;
- if (!mlocal)
- goto out;
-
- fctx = mlocal->fctx;
-
- LOCK(&mframe->lock);
- {
- mlocal->replies[index].op_ret = op_ret;
- mlocal->replies[index].op_errno = op_errno;
- mlocal->replies[index].requested_size = local->readv_size;
- if (op_ret >= 0) {
- mlocal->replies[index].stbuf = *stbuf;
- mlocal->replies[index].count = count;
- mlocal->replies[index].vector = iov_dup(vector, count);
-
- correct_file_size(stbuf, fctx, prev);
-
- if (local->stbuf_size < stbuf->ia_size)
- local->stbuf_size = stbuf->ia_size;
- local->stbuf_blocks += stbuf->ia_blocks;
-
- if (!mlocal->iobref)
- mlocal->iobref = iobref_new();
- iobref_merge(mlocal->iobref, iobref);
- }
- callcnt = ++mlocal->call_count;
- }
- UNLOCK(&mframe->lock);
-
- if (callcnt == mlocal->wind_count) {
- op_ret = 0;
-
- for (index = 0; index < mlocal->wind_count; index++) {
- /* check whether each stripe returned
- * 'expected' number of bytes */
- if (mlocal->replies[index].op_ret == -1) {
- op_ret = -1;
- op_errno = mlocal->replies[index].op_errno;
- break;
- }
- /* TODO: handle the 'holes' within the read range
- properly */
- if (mlocal->replies[index].op_ret <
- mlocal->replies[index].requested_size) {
- need_to_check_proper_size = 1;
- }
-
- op_ret += mlocal->replies[index].op_ret;
- mlocal->count += mlocal->replies[index].count;
- }
- if (op_ret == -1)
- goto done;
- if (need_to_check_proper_size)
- goto check_size;
-
- final_vec = GF_CALLOC(mlocal->count, sizeof(struct iovec),
- gf_stripe_mt_iovec);
-
- if (!final_vec) {
- op_ret = -1;
- goto done;
- }
-
- for (index = 0; index < mlocal->wind_count; index++) {
- memcpy((final_vec + final_count), mlocal->replies[index].vector,
- (mlocal->replies[index].count * sizeof(struct iovec)));
- final_count += mlocal->replies[index].count;
- GF_FREE(mlocal->replies[index].vector);
- }
-
- /* FIXME: notice that st_ino, and st_dev (gen) will be
- * different than what inode will have. Make sure this doesn't
- * cause any bugs at higher levels */
- memcpy(&tmp_stbuf, &mlocal->replies[0].stbuf, sizeof(struct iatt));
- tmp_stbuf.ia_size = local->stbuf_size;
- tmp_stbuf.ia_blocks = local->stbuf_blocks;
-
- done:
- /* */
- GF_FREE(mlocal->replies);
- tmp_iobref = mlocal->iobref;
- /* work around for nfs truncated read. Bug 3774 */
- tmp_stbuf_p = &tmp_stbuf;
- WIPE(tmp_stbuf_p);
- STRIPE_STACK_UNWIND(readv, mframe, op_ret, op_errno, final_vec,
- final_count, &tmp_stbuf, tmp_iobref, NULL);
-
- iobref_unref(tmp_iobref);
- GF_FREE(final_vec);
- }
-
- goto out;
-
-check_size:
- mlocal->call_count = fctx->stripe_count;
-
- for (index = 0; index < fctx->stripe_count; index++) {
- STACK_WIND(mframe, stripe_readv_fstat_cbk, (fctx->xl_array[index]),
- (fctx->xl_array[index])->fops->fstat, mlocal->fd, NULL);
- }
-
-out:
- STRIPE_STACK_DESTROY(frame);
-end:
- return 0;
-}
-
-int32_t
-stripe_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, uint32_t flags, dict_t *xdata)
-{
- int32_t op_errno = EINVAL;
- int32_t idx = 0;
- int32_t index = 0;
- int32_t num_stripe = 0;
- int32_t off_index = 0;
- size_t frame_size = 0;
- off_t rounded_end = 0;
- uint64_t tmp_fctx = 0;
- uint64_t stripe_size = 0;
- off_t rounded_start = 0;
- off_t frame_offset = offset;
- off_t dest_offset = 0;
- stripe_local_t *local = NULL;
- call_frame_t *rframe = NULL;
- stripe_local_t *rlocal = NULL;
- stripe_fd_ctx_t *fctx = NULL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(fd, err);
- VALIDATE_OR_GOTO(fd->inode, err);
-
- inode_ctx_get(fd->inode, this, &tmp_fctx);
- if (!tmp_fctx) {
- op_errno = EBADFD;
- goto err;
- }
- fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
- stripe_size = fctx->stripe_size;
-
- STRIPE_VALIDATE_FCTX(fctx, err);
-
- if (!stripe_size) {
- gf_log(this->name, GF_LOG_DEBUG, "Wrong stripe size for the file");
- goto err;
- }
- /* The file is stripe across the child nodes. Send the read request
- * to the child nodes appropriately after checking which region of
- * the file is in which child node. Always '0-<stripe_size>' part of
- * the file resides in the first child.
- */
- rounded_start = gf_floor(offset, stripe_size);
- rounded_end = gf_roof(offset + size, stripe_size);
- num_stripe = (rounded_end - rounded_start) / stripe_size;
-
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- frame->local = local;
-
- /* This is where all the vectors should be copied. */
- local->replies = GF_CALLOC(num_stripe, sizeof(struct stripe_replies),
- gf_stripe_mt_stripe_replies);
- if (!local->replies) {
- op_errno = ENOMEM;
- goto err;
- }
-
- off_index = (offset / stripe_size) % fctx->stripe_count;
- local->wind_count = num_stripe;
- local->readv_size = size;
- local->offset = offset;
- local->fd = fd_ref(fd);
- local->fctx = fctx;
-
- for (index = off_index; index < (num_stripe + off_index); index++) {
- rframe = copy_frame(frame);
- rlocal = mem_get0(this->local_pool);
- if (!rlocal) {
- op_errno = ENOMEM;
- goto err;
- }
-
- frame_size = min(gf_roof(frame_offset + 1, stripe_size),
- (offset + size)) -
- frame_offset;
-
- rlocal->node_index = index - off_index;
- rlocal->orig_frame = frame;
- rlocal->readv_size = frame_size;
- rframe->local = rlocal;
- idx = (index % fctx->stripe_count);
-
- if (fctx->stripe_coalesce)
- dest_offset = coalesced_offset(frame_offset, stripe_size,
- fctx->stripe_count);
- else
- dest_offset = frame_offset;
-
- STACK_WIND(rframe, stripe_readv_cbk, fctx->xl_array[idx],
- fctx->xl_array[idx]->fops->readv, fd, frame_size,
- dest_offset, flags, xdata);
-
- frame_offset += frame_size;
- }
-
- return 0;
-err:
- if (rframe)
- STRIPE_STACK_DESTROY(rframe);
-
- STRIPE_STACK_UNWIND(readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- stripe_local_t *mlocal = NULL;
- call_frame_t *prev = NULL;
- call_frame_t *mframe = NULL;
- struct stripe_replies *reply = NULL;
- int32_t i = 0;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
- mframe = local->orig_frame;
- mlocal = mframe->local;
-
- LOCK(&frame->lock);
- {
- callcnt = ++mlocal->call_count;
-
- mlocal->replies[local->node_index].op_ret = op_ret;
- mlocal->replies[local->node_index].op_errno = op_errno;
-
- if (op_ret >= 0) {
- mlocal->post_buf = *postbuf;
- mlocal->pre_buf = *prebuf;
-
- mlocal->prebuf_blocks += prebuf->ia_blocks;
- mlocal->postbuf_blocks += postbuf->ia_blocks;
-
- correct_file_size(prebuf, mlocal->fctx, prev);
- correct_file_size(postbuf, mlocal->fctx, prev);
-
- if (mlocal->prebuf_size < prebuf->ia_size)
- mlocal->prebuf_size = prebuf->ia_size;
- if (mlocal->postbuf_size < postbuf->ia_size)
- mlocal->postbuf_size = postbuf->ia_size;
- }
- }
- UNLOCK(&frame->lock);
-
- if ((callcnt == mlocal->wind_count) && mlocal->unwind) {
- mlocal->pre_buf.ia_size = mlocal->prebuf_size;
- mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks;
- mlocal->post_buf.ia_size = mlocal->postbuf_size;
- mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks;
-
- /*
- * Only return the number of consecutively written bytes up until
- * the first error. Only return an error if it occurs first.
- *
- * When a short write occurs, the application should retry at the
- * appropriate offset, at which point we'll potentially pass back
- * the error.
- */
- for (i = 0, reply = mlocal->replies; i < mlocal->wind_count;
- i++, reply++) {
- if (reply->op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG,
- "reply %d "
- "returned error %s",
- i, strerror(reply->op_errno));
- if (!mlocal->op_ret) {
- mlocal->op_ret = -1;
- mlocal->op_errno = reply->op_errno;
- }
- break;
- }
-
- mlocal->op_ret += reply->op_ret;
-
- if (reply->op_ret < reply->requested_size)
- break;
- }
-
- GF_FREE(mlocal->replies);
-
- STRIPE_STACK_UNWIND(writev, mframe, mlocal->op_ret, mlocal->op_errno,
- &mlocal->pre_buf, &mlocal->post_buf, NULL);
- }
-out:
- if (frame)
- STRIPE_STACK_DESTROY(frame);
- return 0;
-}
-
-int32_t
-stripe_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count, off_t offset, uint32_t flags,
- struct iobref *iobref, dict_t *xdata)
-{
- struct iovec *tmp_vec = NULL;
- stripe_local_t *local = NULL;
- stripe_fd_ctx_t *fctx = NULL;
- int32_t op_errno = 1;
- int32_t idx = 0;
- int32_t total_size = 0;
- int32_t offset_offset = 0;
- int32_t remaining_size = 0;
- int32_t tmp_count = count;
- off_t fill_size = 0;
- uint64_t stripe_size = 0;
- uint64_t tmp_fctx = 0;
- off_t dest_offset = 0;
- off_t rounded_start = 0;
- off_t rounded_end = 0;
- int32_t total_chunks = 0;
- call_frame_t *wframe = NULL;
- stripe_local_t *wlocal = NULL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(fd, err);
- VALIDATE_OR_GOTO(fd->inode, err);
-
- inode_ctx_get(fd->inode, this, &tmp_fctx);
- if (!tmp_fctx) {
- op_errno = EINVAL;
- goto err;
- }
- fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
- stripe_size = fctx->stripe_size;
-
- STRIPE_VALIDATE_FCTX(fctx, err);
-
- /* File has to be stripped across the child nodes */
- total_size = iov_length(vector, count);
- remaining_size = total_size;
-
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- frame->local = local;
- local->stripe_size = stripe_size;
- local->fctx = fctx;
-
- if (!stripe_size) {
- gf_log(this->name, GF_LOG_DEBUG, "Wrong stripe size for the file");
- op_errno = EINVAL;
- goto err;
- }
-
- rounded_start = gf_floor(offset, stripe_size);
- rounded_end = gf_roof(offset + total_size, stripe_size);
- total_chunks = (rounded_end - rounded_start) / stripe_size;
- local->replies = GF_CALLOC(total_chunks, sizeof(struct stripe_replies),
- gf_stripe_mt_stripe_replies);
- if (!local->replies) {
- op_errno = ENOMEM;
- goto err;
- }
-
- total_chunks = 0;
- while (1) {
- wframe = copy_frame(frame);
- wlocal = mem_get0(this->local_pool);
- if (!wlocal) {
- op_errno = ENOMEM;
- goto err;
- }
- wlocal->orig_frame = frame;
- wframe->local = wlocal;
-
- /* Send striped chunk of the vector to child
- nodes appropriately. */
- idx = (((offset + offset_offset) / local->stripe_size) %
- fctx->stripe_count);
-
- fill_size = (local->stripe_size -
- ((offset + offset_offset) % local->stripe_size));
- if (fill_size > remaining_size)
- fill_size = remaining_size;
-
- remaining_size -= fill_size;
-
- tmp_count = iov_subset(vector, count, offset_offset,
- offset_offset + fill_size, NULL);
- tmp_vec = GF_CALLOC(tmp_count, sizeof(struct iovec),
- gf_stripe_mt_iovec);
- if (!tmp_vec) {
- op_errno = ENOMEM;
- goto err;
- }
- tmp_count = iov_subset(vector, count, offset_offset,
- offset_offset + fill_size, tmp_vec);
-
- local->wind_count++;
- if (remaining_size == 0)
- local->unwind = 1;
-
- /*
- * Store off the request index (with respect to the chunk of the
- * initial offset) and the size of the request. This is required
- * in the callback to calculate an appropriate return value in
- * the event of a write failure in one or more requests.
- */
- wlocal->node_index = total_chunks;
- local->replies[total_chunks].requested_size = fill_size;
-
- dest_offset = offset + offset_offset;
- if (fctx->stripe_coalesce)
- dest_offset = coalesced_offset(dest_offset, local->stripe_size,
- fctx->stripe_count);
-
- STACK_WIND(wframe, stripe_writev_cbk, fctx->xl_array[idx],
- fctx->xl_array[idx]->fops->writev, fd, tmp_vec, tmp_count,
- dest_offset, flags, iobref, xdata);
-
- GF_FREE(tmp_vec);
- offset_offset += fill_size;
- total_chunks++;
- if (remaining_size == 0)
- break;
- }
-
- return 0;
-err:
- if (wframe)
- STRIPE_STACK_DESTROY(wframe);
-
- STRIPE_STACK_UNWIND(writev, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- stripe_local_t *mlocal = NULL;
- call_frame_t *prev = NULL;
- call_frame_t *mframe = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
- mframe = local->orig_frame;
- mlocal = mframe->local;
-
- LOCK(&frame->lock);
- {
- callcnt = ++mlocal->call_count;
-
- if (op_ret == 0) {
- mlocal->post_buf = *postbuf;
- mlocal->pre_buf = *prebuf;
-
- mlocal->prebuf_blocks += prebuf->ia_blocks;
- mlocal->postbuf_blocks += postbuf->ia_blocks;
-
- correct_file_size(prebuf, mlocal->fctx, prev);
- correct_file_size(postbuf, mlocal->fctx, prev);
-
- if (mlocal->prebuf_size < prebuf->ia_size)
- mlocal->prebuf_size = prebuf->ia_size;
- if (mlocal->postbuf_size < postbuf->ia_size)
- mlocal->postbuf_size = postbuf->ia_size;
- }
-
- /* return the first failure */
- if (mlocal->op_ret == 0) {
- mlocal->op_ret = op_ret;
- mlocal->op_errno = op_errno;
- }
- }
- UNLOCK(&frame->lock);
-
- if ((callcnt == mlocal->wind_count) && mlocal->unwind) {
- mlocal->pre_buf.ia_size = mlocal->prebuf_size;
- mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks;
- mlocal->post_buf.ia_size = mlocal->postbuf_size;
- mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks;
-
- STRIPE_STACK_UNWIND(fallocate, mframe, mlocal->op_ret, mlocal->op_errno,
- &mlocal->pre_buf, &mlocal->post_buf, NULL);
- }
-out:
- if (frame)
- STRIPE_STACK_DESTROY(frame);
- return 0;
-}
-
-int32_t
-stripe_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
- off_t offset, size_t len, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_fd_ctx_t *fctx = NULL;
- int32_t op_errno = 1;
- int32_t idx = 0;
- int32_t offset_offset = 0;
- int32_t remaining_size = 0;
- off_t fill_size = 0;
- uint64_t stripe_size = 0;
- uint64_t tmp_fctx = 0;
- off_t dest_offset = 0;
- call_frame_t *fframe = NULL;
- stripe_local_t *flocal = NULL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(fd, err);
- VALIDATE_OR_GOTO(fd->inode, err);
-
- inode_ctx_get(fd->inode, this, &tmp_fctx);
- if (!tmp_fctx) {
- op_errno = EINVAL;
- goto err;
- }
- fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
- stripe_size = fctx->stripe_size;
-
- STRIPE_VALIDATE_FCTX(fctx, err);
-
- remaining_size = len;
-
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- frame->local = local;
- local->stripe_size = stripe_size;
- local->fctx = fctx;
-
- if (!stripe_size) {
- gf_log(this->name, GF_LOG_DEBUG, "Wrong stripe size for the file");
- op_errno = EINVAL;
- goto err;
- }
-
- while (1) {
- fframe = copy_frame(frame);
- flocal = mem_get0(this->local_pool);
- if (!flocal) {
- op_errno = ENOMEM;
- goto err;
- }
- flocal->orig_frame = frame;
- fframe->local = flocal;
-
- /* send fallocate request to the associated child node */
- idx = (((offset + offset_offset) / local->stripe_size) %
- fctx->stripe_count);
-
- fill_size = (local->stripe_size -
- ((offset + offset_offset) % local->stripe_size));
- if (fill_size > remaining_size)
- fill_size = remaining_size;
-
- remaining_size -= fill_size;
-
- local->wind_count++;
- if (remaining_size == 0)
- local->unwind = 1;
-
- dest_offset = offset + offset_offset;
- if (fctx->stripe_coalesce)
- dest_offset = coalesced_offset(dest_offset, local->stripe_size,
- fctx->stripe_count);
-
- /*
- * TODO: Create a separate handler for coalesce mode that sends a
- * single fallocate per-child (since the ranges are linear).
- */
- STACK_WIND(fframe, stripe_fallocate_cbk, fctx->xl_array[idx],
- fctx->xl_array[idx]->fops->fallocate, fd, mode, dest_offset,
- fill_size, xdata);
-
- offset_offset += fill_size;
- if (remaining_size == 0)
- break;
- }
-
- return 0;
-err:
- if (fframe)
- STRIPE_STACK_DESTROY(fframe);
-
- STRIPE_STACK_UNWIND(fallocate, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- stripe_local_t *mlocal = NULL;
- call_frame_t *prev = NULL;
- call_frame_t *mframe = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
- mframe = local->orig_frame;
- mlocal = mframe->local;
-
- LOCK(&frame->lock);
- {
- callcnt = ++mlocal->call_count;
-
- if (op_ret == 0) {
- mlocal->post_buf = *postbuf;
- mlocal->pre_buf = *prebuf;
-
- mlocal->prebuf_blocks += prebuf->ia_blocks;
- mlocal->postbuf_blocks += postbuf->ia_blocks;
-
- correct_file_size(prebuf, mlocal->fctx, prev);
- correct_file_size(postbuf, mlocal->fctx, prev);
-
- if (mlocal->prebuf_size < prebuf->ia_size)
- mlocal->prebuf_size = prebuf->ia_size;
- if (mlocal->postbuf_size < postbuf->ia_size)
- mlocal->postbuf_size = postbuf->ia_size;
- }
-
- /* return the first failure */
- if (mlocal->op_ret == 0) {
- mlocal->op_ret = op_ret;
- mlocal->op_errno = op_errno;
- }
- }
- UNLOCK(&frame->lock);
-
- if ((callcnt == mlocal->wind_count) && mlocal->unwind) {
- mlocal->pre_buf.ia_size = mlocal->prebuf_size;
- mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks;
- mlocal->post_buf.ia_size = mlocal->postbuf_size;
- mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks;
-
- STRIPE_STACK_UNWIND(discard, mframe, mlocal->op_ret, mlocal->op_errno,
- &mlocal->pre_buf, &mlocal->post_buf, NULL);
- }
-out:
- if (frame)
- STRIPE_STACK_DESTROY(frame);
-
- return 0;
-}
-
-int32_t
-stripe_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- size_t len, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_fd_ctx_t *fctx = NULL;
- int32_t op_errno = 1;
- int32_t idx = 0;
- int32_t offset_offset = 0;
- int32_t remaining_size = 0;
- off_t fill_size = 0;
- uint64_t stripe_size = 0;
- uint64_t tmp_fctx = 0;
- off_t dest_offset = 0;
- call_frame_t *fframe = NULL;
- stripe_local_t *flocal = NULL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(fd, err);
- VALIDATE_OR_GOTO(fd->inode, err);
-
- inode_ctx_get(fd->inode, this, &tmp_fctx);
- if (!tmp_fctx) {
- op_errno = EINVAL;
- goto err;
- }
- fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
- stripe_size = fctx->stripe_size;
-
- STRIPE_VALIDATE_FCTX(fctx, err);
-
- remaining_size = len;
-
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- frame->local = local;
- local->stripe_size = stripe_size;
- local->fctx = fctx;
-
- if (!stripe_size) {
- gf_log(this->name, GF_LOG_DEBUG, "Wrong stripe size for the file");
- op_errno = EINVAL;
- goto err;
- }
-
- while (1) {
- fframe = copy_frame(frame);
- flocal = mem_get0(this->local_pool);
- if (!flocal) {
- op_errno = ENOMEM;
- goto err;
- }
- flocal->orig_frame = frame;
- fframe->local = flocal;
-
- /* send discard request to the associated child node */
- idx = (((offset + offset_offset) / local->stripe_size) %
- fctx->stripe_count);
-
- fill_size = (local->stripe_size -
- ((offset + offset_offset) % local->stripe_size));
- if (fill_size > remaining_size)
- fill_size = remaining_size;
-
- remaining_size -= fill_size;
-
- local->wind_count++;
- if (remaining_size == 0)
- local->unwind = 1;
-
- dest_offset = offset + offset_offset;
- if (fctx->stripe_coalesce)
- dest_offset = coalesced_offset(dest_offset, local->stripe_size,
- fctx->stripe_count);
-
- /*
- * TODO: Create a separate handler for coalesce mode that sends a
- * single discard per-child (since the ranges are linear).
- */
- STACK_WIND(fframe, stripe_discard_cbk, fctx->xl_array[idx],
- fctx->xl_array[idx]->fops->discard, fd, dest_offset,
- fill_size, xdata);
-
- offset_offset += fill_size;
- if (remaining_size == 0)
- break;
- }
-
- return 0;
-err:
- if (fframe)
- STRIPE_STACK_DESTROY(fframe);
-
- STRIPE_STACK_UNWIND(discard, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- stripe_local_t *mlocal = NULL;
- call_frame_t *prev = NULL;
- call_frame_t *mframe = NULL;
-
- GF_ASSERT(frame);
-
- if (!this || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
- mframe = local->orig_frame;
- mlocal = mframe->local;
-
- LOCK(&frame->lock);
- {
- callcnt = ++mlocal->call_count;
-
- if (op_ret == 0) {
- mlocal->post_buf = *postbuf;
- mlocal->pre_buf = *prebuf;
-
- mlocal->prebuf_blocks += prebuf->ia_blocks;
- mlocal->postbuf_blocks += postbuf->ia_blocks;
-
- correct_file_size(prebuf, mlocal->fctx, prev);
- correct_file_size(postbuf, mlocal->fctx, prev);
-
- if (mlocal->prebuf_size < prebuf->ia_size)
- mlocal->prebuf_size = prebuf->ia_size;
- if (mlocal->postbuf_size < postbuf->ia_size)
- mlocal->postbuf_size = postbuf->ia_size;
- }
-
- /* return the first failure */
- if (mlocal->op_ret == 0) {
- mlocal->op_ret = op_ret;
- mlocal->op_errno = op_errno;
- }
- }
- UNLOCK(&frame->lock);
-
- if ((callcnt == mlocal->wind_count) && mlocal->unwind) {
- mlocal->pre_buf.ia_size = mlocal->prebuf_size;
- mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks;
- mlocal->post_buf.ia_size = mlocal->postbuf_size;
- mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks;
-
- STRIPE_STACK_UNWIND(zerofill, mframe, mlocal->op_ret, mlocal->op_errno,
- &mlocal->pre_buf, &mlocal->post_buf, NULL);
- }
-out:
- STRIPE_STACK_DESTROY(frame);
- return 0;
-}
-
-int32_t
-stripe_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- off_t len, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_fd_ctx_t *fctx = NULL;
- int32_t op_errno = 1;
- int32_t idx = 0;
- int32_t offset_offset = 0;
- int32_t remaining_size = 0;
- off_t fill_size = 0;
- uint64_t stripe_size = 0;
- uint64_t tmp_fctx = 0;
- off_t dest_offset = 0;
- call_frame_t *fframe = NULL;
- stripe_local_t *flocal = NULL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(fd, err);
- VALIDATE_OR_GOTO(fd->inode, err);
-
- inode_ctx_get(fd->inode, this, &tmp_fctx);
- if (!tmp_fctx) {
- op_errno = EINVAL;
- goto err;
- }
- fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
- stripe_size = fctx->stripe_size;
-
- STRIPE_VALIDATE_FCTX(fctx, err);
-
- remaining_size = len;
-
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- frame->local = local;
- local->stripe_size = stripe_size;
- local->fctx = fctx;
-
- if (!stripe_size) {
- gf_log(this->name, GF_LOG_DEBUG, "Wrong stripe size for the file");
- op_errno = EINVAL;
- goto err;
- }
-
- while (1) {
- fframe = copy_frame(frame);
- flocal = mem_get0(this->local_pool);
- if (!flocal) {
- op_errno = ENOMEM;
- goto err;
- }
- flocal->orig_frame = frame;
- fframe->local = flocal;
-
- idx = (((offset + offset_offset) / local->stripe_size) %
- fctx->stripe_count);
-
- fill_size = (local->stripe_size -
- ((offset + offset_offset) % local->stripe_size));
- if (fill_size > remaining_size)
- fill_size = remaining_size;
-
- remaining_size -= fill_size;
-
- local->wind_count++;
- if (remaining_size == 0)
- local->unwind = 1;
-
- dest_offset = offset + offset_offset;
- if (fctx->stripe_coalesce)
- dest_offset = coalesced_offset(dest_offset, local->stripe_size,
- fctx->stripe_count);
-
- STACK_WIND(fframe, stripe_zerofill_cbk, fctx->xl_array[idx],
- fctx->xl_array[idx]->fops->zerofill, fd, dest_offset,
- fill_size, xdata);
- offset_offset += fill_size;
- if (remaining_size == 0)
- break;
- }
-
- return 0;
-err:
- if (fframe)
- STRIPE_STACK_DESTROY(fframe);
-
- STRIPE_STACK_UNWIND(zerofill, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- gf_seek_what_t what, dict_t *xdata)
-{
- /* TBD */
- gf_log(this->name, GF_LOG_INFO, "seek called on %s.",
- uuid_utoa(fd->inode->gfid));
- STRIPE_STACK_UNWIND(seek, frame, -1, ENOTSUP, 0, NULL);
- return 0;
-}
-
-int32_t
-stripe_release(xlator_t *this, fd_t *fd)
-{
- return 0;
-}
-
-int
-stripe_forget(xlator_t *this, inode_t *inode)
-{
- uint64_t tmp_fctx = 0;
- stripe_fd_ctx_t *fctx = NULL;
-
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(inode, err);
-
- (void)inode_ctx_del(inode, this, &tmp_fctx);
- if (!tmp_fctx) {
- goto err;
- }
-
- fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
-
- if (!fctx->static_array)
- GF_FREE(fctx->xl_array);
-
- GF_FREE(fctx);
-err:
- return 0;
-}
-
-int32_t
-notify(xlator_t *this, int32_t event, void *data, ...)
-{
- stripe_private_t *priv = NULL;
- int down_client = 0;
- int i = 0;
- gf_boolean_t heard_from_all_children = _gf_false;
-
- if (!this)
- return 0;
-
- priv = this->private;
- if (!priv)
- return 0;
-
- switch (event) {
- case GF_EVENT_CHILD_UP: {
- /* get an index number to set */
- for (i = 0; i < priv->child_count; i++) {
- if (data == priv->xl_array[i])
- break;
- }
-
- if (priv->child_count == i) {
- gf_log(this->name, GF_LOG_ERROR,
- "got GF_EVENT_CHILD_UP bad subvolume %s",
- data ? ((xlator_t *)data)->name : NULL);
- break;
- }
-
- LOCK(&priv->lock);
- {
- if (data == FIRST_CHILD(this))
- priv->first_child_down = 0;
- priv->last_event[i] = event;
- }
- UNLOCK(&priv->lock);
- } break;
- case GF_EVENT_CHILD_CONNECTING: {
- // 'CONNECTING' doesn't ensure its CHILD_UP, so do nothing
- goto out;
- }
- case GF_EVENT_CHILD_DOWN: {
- /* get an index number to set */
- for (i = 0; i < priv->child_count; i++) {
- if (data == priv->xl_array[i])
- break;
- }
-
- if (priv->child_count == i) {
- gf_log(this->name, GF_LOG_ERROR,
- "got GF_EVENT_CHILD_DOWN bad subvolume %s",
- data ? ((xlator_t *)data)->name : NULL);
- break;
- }
-
- LOCK(&priv->lock);
- {
- if (data == FIRST_CHILD(this))
- priv->first_child_down = 1;
- priv->last_event[i] = event;
- }
- UNLOCK(&priv->lock);
- } break;
-
- default: {
- /* */
- default_notify(this, event, data);
- goto out;
- } break;
- }
-
- // Consider child as down if it's last_event is not CHILD_UP
- for (i = 0, down_client = 0; i < priv->child_count; i++)
- if (priv->last_event[i] != GF_EVENT_CHILD_UP)
- down_client++;
-
- LOCK(&priv->lock);
- {
- priv->nodes_down = down_client;
- }
- UNLOCK(&priv->lock);
-
- heard_from_all_children = _gf_true;
- for (i = 0; i < priv->child_count; i++)
- if (!priv->last_event[i])
- heard_from_all_children = _gf_false;
-
- if (heard_from_all_children)
- default_notify(this, event, data);
-out:
- return 0;
-}
-
-int
-stripe_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata)
-{
- int ret = -1;
- int call_cnt = 0;
- stripe_local_t *local = NULL;
-
- if (!frame || !frame->local || !this) {
- gf_log("", GF_LOG_ERROR, "Possible NULL deref");
- return ret;
- }
-
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- call_cnt = --local->wind_count;
-
- /**
- * We overwrite ->op_* values here for subsequent failure
- * conditions, hence we propagate the last errno down the
- * stack.
- */
- if (op_ret < 0) {
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- goto unlock;
- }
- }
-
-unlock:
- UNLOCK(&frame->lock);
-
- if (!call_cnt) {
- STRIPE_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno,
- xdata);
- }
-
- return 0;
-}
-
-#ifdef HAVE_BD_XLATOR
-int
-stripe_is_bd(dict_t *this, char *key, data_t *value, void *data)
-{
- gf_boolean_t *is_bd = data;
-
- if (data == NULL)
- return 0;
-
- if (XATTR_IS_BD(key))
- *is_bd = _gf_true;
-
- return 0;
-}
-
-static gf_boolean_t
-stripe_setxattr_is_bd(dict_t *dict)
-{
- gf_boolean_t is_bd = _gf_false;
-
- if (dict == NULL)
- goto out;
-
- dict_foreach(dict, stripe_is_bd, &is_bd);
-out:
- return is_bd;
-}
-#else
-#define stripe_setxattr_is_bd(dict) _gf_false
-#endif
-
-int
-stripe_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
- int flags, dict_t *xdata)
-{
- int32_t op_errno = EINVAL;
- xlator_list_t *trav = NULL;
- stripe_private_t *priv = NULL;
- stripe_local_t *local = NULL;
- int i = 0;
- gf_boolean_t is_bd = _gf_false;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
- VALIDATE_OR_GOTO(loc->inode, err);
-
- GF_IF_INTERNAL_XATTR_GOTO("trusted.*stripe*", dict, op_errno, err);
-
- priv = this->private;
- trav = this->children;
-
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- frame->local = local;
- local->wind_count = priv->child_count;
- local->op_ret = local->op_errno = 0;
-
- is_bd = stripe_setxattr_is_bd(dict);
-
- /**
- * Set xattrs for directories on all subvolumes. Additionally
- * this power is only given to a special client. Bd xlator
- * also needs xattrs for regular files (ie LVs)
- */
- if (((frame->root->pid == GF_CLIENT_PID_GSYNCD) &&
- IA_ISDIR(loc->inode->ia_type)) ||
- is_bd) {
- for (i = 0; i < priv->child_count; i++, trav = trav->next) {
- STACK_WIND(frame, stripe_setxattr_cbk, trav->xlator,
- trav->xlator->fops->setxattr, loc, dict, flags, xdata);
- }
- } else {
- local->wind_count = 1;
- STACK_WIND(frame, stripe_setxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata);
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(setxattr, frame, -1, op_errno, NULL);
- return 0;
-}
-
-int
-stripe_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata)
-{
- STRIPE_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-int
-stripe_is_special_key(dict_t *this, char *key, data_t *value, void *data)
-{
- gf_boolean_t *is_special = NULL;
-
- if (data == NULL) {
- goto out;
- }
-
- is_special = data;
-
- if (XATTR_IS_LOCKINFO(key) || XATTR_IS_BD(key))
- *is_special = _gf_true;
-
-out:
- return 0;
-}
-
-int32_t
-stripe_fsetxattr_everyone_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- int call_count = 0;
- stripe_local_t *local = NULL;
-
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- call_count = --local->wind_count;
-
- if (op_ret < 0) {
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- }
- }
- UNLOCK(&frame->lock);
-
- if (call_count == 0) {
- STRIPE_STACK_UNWIND(fsetxattr, frame, local->op_ret, local->op_errno,
- NULL);
- }
- return 0;
-}
-
-int
-stripe_fsetxattr_to_everyone(call_frame_t *frame, xlator_t *this, fd_t *fd,
- dict_t *dict, int flags, dict_t *xdata)
-{
- xlator_list_t *trav = NULL;
- stripe_private_t *priv = NULL;
- int ret = -1;
- stripe_local_t *local = NULL;
-
- priv = this->private;
-
- local = mem_get0(this->local_pool);
- if (local == NULL) {
- goto out;
- }
-
- frame->local = local;
-
- local->wind_count = priv->child_count;
-
- trav = this->children;
-
- while (trav) {
- STACK_WIND(frame, stripe_fsetxattr_everyone_cbk, trav->xlator,
- trav->xlator->fops->fsetxattr, fd, dict, flags, xdata);
- trav = trav->next;
- }
-
- ret = 0;
-out:
- return ret;
-}
-
-static gf_boolean_t
-stripe_fsetxattr_is_special(dict_t *dict)
-{
- gf_boolean_t is_spl = _gf_false;
-
- if (dict == NULL) {
- goto out;
- }
-
- dict_foreach(dict, stripe_is_special_key, &is_spl);
-
-out:
- return is_spl;
-}
-
-int
-stripe_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
- int flags, dict_t *xdata)
-{
- int32_t op_ret = -1, ret = -1, op_errno = EINVAL;
- gf_boolean_t is_spl = _gf_false;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(fd, err);
-
- GF_IF_INTERNAL_XATTR_GOTO("trusted.*stripe*", dict, op_errno, err);
-
- is_spl = stripe_fsetxattr_is_special(dict);
- if (is_spl) {
- ret = stripe_fsetxattr_to_everyone(frame, this, fd, dict, flags, xdata);
- if (ret < 0) {
- op_errno = ENOMEM;
- goto err;
- }
-
- goto out;
- }
-
- STACK_WIND(frame, stripe_fsetxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
-out:
- return 0;
-err:
- STRIPE_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, NULL);
- return 0;
-}
-
-int
-stripe_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- STRIPE_STACK_UNWIND(removexattr, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-int
-stripe_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name, dict_t *xdata)
-{
- int32_t op_errno = EINVAL;
-
- VALIDATE_OR_GOTO(this, err);
-
- GF_IF_NATIVE_XATTR_GOTO("trusted.*stripe*", name, op_errno, err);
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(loc, err);
-
- STACK_WIND(frame, stripe_removexattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
- return 0;
-err:
- STRIPE_STACK_UNWIND(removexattr, frame, -1, op_errno, NULL);
- return 0;
-}
-
-int
-stripe_fremovexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- STRIPE_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-int
-stripe_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name, dict_t *xdata)
-{
- int32_t op_ret = -1;
- int32_t op_errno = EINVAL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(fd, err);
-
- GF_IF_NATIVE_XATTR_GOTO("trusted.*stripe*", name, op_errno, err);
-
- STACK_WIND(frame, stripe_fremovexattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata);
- return 0;
-err:
- STRIPE_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-int32_t
-stripe_readdirp_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, inode_t *inode,
- struct iatt *stbuf, dict_t *xattr,
- struct iatt *parent)
-{
- stripe_local_t *local = NULL;
- call_frame_t *main_frame = NULL;
- stripe_local_t *main_local = NULL;
- gf_dirent_t *entry = NULL;
- call_frame_t *prev = NULL;
- int done = 0;
-
- local = frame->local;
- prev = cookie;
-
- entry = local->dirent;
-
- main_frame = local->orig_frame;
- main_local = main_frame->local;
- LOCK(&frame->lock);
- {
- local->call_count--;
- if (!local->call_count)
- done = 1;
- if (op_ret == -1) {
- local->op_errno = op_errno;
- local->op_ret = op_ret;
- goto unlock;
- }
-
- if (stripe_ctx_handle(this, prev, local, xattr))
- gf_log(this->name, GF_LOG_ERROR,
- "Error getting fctx info from dict.");
-
- correct_file_size(stbuf, local->fctx, prev);
-
- stripe_iatt_merge(stbuf, &entry->d_stat);
- local->stbuf_blocks += stbuf->ia_blocks;
- }
-unlock:
- UNLOCK(&frame->lock);
-
- if (done) {
- inode_ctx_put(entry->inode, this, (uint64_t)(long)local->fctx);
-
- done = 0;
- LOCK(&main_frame->lock);
- {
- main_local->wind_count--;
- if (!main_local->wind_count)
- done = 1;
- if (local->op_ret == -1) {
- main_local->op_errno = local->op_errno;
- main_local->op_ret = local->op_ret;
- }
- entry->d_stat.ia_blocks = local->stbuf_blocks;
- }
- UNLOCK(&main_frame->lock);
- if (done) {
- main_frame->local = NULL;
- STRIPE_STACK_UNWIND(readdir, main_frame, main_local->op_ret,
- main_local->op_errno, &main_local->entries,
- NULL);
- gf_dirent_free(&main_local->entries);
- stripe_local_wipe(main_local);
- mem_put(main_local);
- }
- frame->local = NULL;
- stripe_local_wipe(local);
- mem_put(local);
- STRIPE_STACK_DESTROY(frame);
- }
-
- return 0;
-}
-
-int32_t
-stripe_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *orig_entries,
- dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
- gf_dirent_t *local_entry = NULL;
- gf_dirent_t *tmp_entry = NULL;
- xlator_list_t *trav = NULL;
- loc_t loc = {
- 0,
- };
- int32_t count = 0;
- stripe_private_t *priv = NULL;
- int32_t subvols = 0;
- dict_t *xattrs = NULL;
- call_frame_t *local_frame = NULL;
- stripe_local_t *local_ent = NULL;
-
- if (!this || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
- prev = cookie;
- local = frame->local;
- trav = this->children;
- priv = this->private;
-
- subvols = priv->child_count;
-
- LOCK(&frame->lock);
- {
- local->op_errno = op_errno;
- local->op_ret = op_ret;
-
- if (op_ret != -1) {
- list_splice_init(&orig_entries->list, &local->entries.list);
- local->wind_count = op_ret;
- }
- }
- UNLOCK(&frame->lock);
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_WARNING, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- goto out;
- }
-
- xattrs = dict_new();
- if (xattrs)
- (void)stripe_xattr_request_build(this, xattrs, 0, 0, 0, 0);
- count = op_ret;
- list_for_each_entry_safe(local_entry, tmp_entry, (&local->entries.list),
- list)
- {
- if (!local_entry)
- break;
- if (!IA_ISREG(local_entry->d_stat.ia_type) || !local_entry->inode) {
- LOCK(&frame->lock);
- {
- local->wind_count--;
- count = local->wind_count;
- }
- UNLOCK(&frame->lock);
- continue;
- }
-
- local_frame = copy_frame(frame);
-
- if (!local_frame) {
- op_errno = ENOMEM;
- op_ret = -1;
- goto out;
- }
-
- local_ent = mem_get0(this->local_pool);
- if (!local_ent) {
- op_errno = ENOMEM;
- op_ret = -1;
- goto out;
- }
-
- loc.inode = inode_ref(local_entry->inode);
-
- gf_uuid_copy(loc.gfid, local_entry->d_stat.ia_gfid);
-
- local_ent->orig_frame = frame;
-
- local_ent->call_count = subvols;
-
- local_ent->dirent = local_entry;
-
- local_frame->local = local_ent;
-
- trav = this->children;
- while (trav) {
- STACK_WIND(local_frame, stripe_readdirp_lookup_cbk, trav->xlator,
- trav->xlator->fops->lookup, &loc, xattrs);
- trav = trav->next;
- }
- loc_wipe(&loc);
- }
-out:
- if (!count) {
- /* all entries are directories */
- frame->local = NULL;
- STRIPE_STACK_UNWIND(readdir, frame, (local ? local->op_ret : -1),
- (local ? local->op_errno : EINVAL),
- (local ? &local->entries : NULL), NULL);
- gf_dirent_free(&local->entries);
- stripe_local_wipe(local);
- mem_put(local);
- }
- if (xattrs)
- dict_unref(xattrs);
- return 0;
-}
-int32_t
-stripe_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t off, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- int op_errno = -1;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(fd, err);
-
- priv = this->private;
- trav = this->children;
-
- if (priv->first_child_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- frame->local = local;
-
- local->fd = fd_ref(fd);
-
- local->wind_count = 0;
-
- local->count = 0;
- local->op_ret = -1;
- INIT_LIST_HEAD(&local->entries);
-
- if (!trav)
- goto err;
-
- STACK_WIND(frame, stripe_readdirp_cbk, trav->xlator,
- trav->xlator->fops->readdirp, fd, size, off, xdata);
- return 0;
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- STRIPE_STACK_UNWIND(readdir, frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-int32_t
-mem_acct_init(xlator_t *this)
-{
- int ret = -1;
-
- if (!this)
- goto out;
-
- ret = xlator_mem_acct_init(this, gf_stripe_mt_end + 1);
-
- if (ret != 0) {
- gf_log(this->name, GF_LOG_ERROR,
- "Memory accounting init"
- "failed");
- goto out;
- }
-
-out:
- return ret;
-}
-
-static int
-clear_pattern_list(stripe_private_t *priv)
-{
- struct stripe_options *prev = NULL;
- struct stripe_options *trav = NULL;
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO("stripe", priv, out);
-
- trav = priv->pattern;
- priv->pattern = NULL;
- while (trav) {
- prev = trav;
- trav = trav->next;
- GF_FREE(prev);
- }
-
- ret = 0;
-out:
- return ret;
-}
-
-int
-reconfigure(xlator_t *this, dict_t *options)
-{
- stripe_private_t *priv = NULL;
- data_t *data = NULL;
- int ret = -1;
- volume_option_t *opt = NULL;
-
- GF_ASSERT(this);
- GF_ASSERT(this->private);
-
- priv = this->private;
-
- ret = 0;
- LOCK(&priv->lock);
- {
- ret = clear_pattern_list(priv);
- if (ret)
- goto unlock;
-
- data = dict_get(options, "block-size");
- if (data) {
- ret = set_stripe_block_size(this, priv, data->data);
- if (ret)
- goto unlock;
- } else {
- opt = xlator_volume_option_get(this, "block-size");
- if (!opt) {
- gf_log(this->name, GF_LOG_WARNING,
- "option 'block-size' not found");
- ret = -1;
- goto unlock;
- }
-
- if (gf_string2bytesize_uint64(opt->default_value,
- &priv->block_size)) {
- gf_log(this->name, GF_LOG_ERROR,
- "Unable to set default block-size ");
- ret = -1;
- goto unlock;
- }
- }
-
- GF_OPTION_RECONF("coalesce", priv->coalesce, options, bool, unlock);
- }
-unlock:
- UNLOCK(&priv->lock);
- if (ret)
- goto out;
-
- ret = 0;
-out:
- return ret;
-}
-
-/**
- * init - This function is called when xlator-graph gets initialized.
- * The option given in volfiles are parsed here.
- * @this -
- */
-int32_t
-init(xlator_t *this)
-{
- stripe_private_t *priv = NULL;
- volume_option_t *opt = NULL;
- xlator_list_t *trav = NULL;
- data_t *data = NULL;
- int32_t count = 0;
- int ret = -1;
-
- if (!this)
- goto out;
-
- trav = this->children;
- while (trav) {
- count++;
- trav = trav->next;
- }
-
- if (!count) {
- gf_log(this->name, GF_LOG_ERROR,
- "stripe configured without \"subvolumes\" option. "
- "exiting");
- goto out;
- }
-
- if (!this->parents) {
- gf_log(this->name, GF_LOG_WARNING, "dangling volume. check volfile ");
- }
-
- if (count == 1) {
- gf_log(this->name, GF_LOG_ERROR,
- "stripe configured with only one \"subvolumes\" option."
- " please check the volume. exiting");
- goto out;
- }
-
- priv = GF_CALLOC(1, sizeof(stripe_private_t),
- gf_stripe_mt_stripe_private_t);
-
- if (!priv)
- goto out;
- priv->xl_array = GF_CALLOC(count, sizeof(xlator_t *),
- gf_stripe_mt_xlator_t);
- if (!priv->xl_array)
- goto out;
-
- priv->last_event = GF_CALLOC(count, sizeof(int), gf_stripe_mt_int32_t);
- if (!priv->last_event)
- goto out;
-
- priv->child_count = count;
- LOCK_INIT(&priv->lock);
-
- trav = this->children;
- count = 0;
- while (trav) {
- priv->xl_array[count++] = trav->xlator;
- trav = trav->next;
- }
-
- if (count > 256) {
- gf_log(this->name, GF_LOG_ERROR,
- "maximum number of stripe subvolumes supported "
- "is 256");
- goto out;
- }
-
- ret = 0;
- LOCK(&priv->lock);
- {
- opt = xlator_volume_option_get(this, "block-size");
- if (!opt) {
- gf_log(this->name, GF_LOG_WARNING, "option 'block-size' not found");
- ret = -1;
- goto unlock;
- }
- if (gf_string2bytesize_uint64(opt->default_value, &priv->block_size)) {
- gf_log(this->name, GF_LOG_ERROR,
- "Unable to set default block-size ");
- ret = -1;
- goto unlock;
- }
- /* option stripe-pattern *avi:1GB,*pdf:16K */
- data = dict_get(this->options, "block-size");
- if (data) {
- ret = set_stripe_block_size(this, priv, data->data);
- if (ret)
- goto unlock;
- }
- }
-unlock:
- UNLOCK(&priv->lock);
- if (ret)
- goto out;
-
- GF_OPTION_INIT("use-xattr", priv->xattr_supported, bool, out);
- /* notify related */
- priv->nodes_down = priv->child_count;
-
- GF_OPTION_INIT("coalesce", priv->coalesce, bool, out);
-
- this->local_pool = mem_pool_new(stripe_local_t, 128);
- if (!this->local_pool) {
- ret = -1;
- gf_log(this->name, GF_LOG_ERROR,
- "failed to create local_t's memory pool");
- goto out;
- }
-
- this->private = priv;
-
- ret = 0;
-out:
- if (ret) {
- if (priv) {
- GF_FREE(priv->xl_array);
- GF_FREE(priv);
- }
- }
- return ret;
-}
-
-/**
- * fini - Free all the private variables
- * @this -
- */
-void
-fini(xlator_t *this)
-{
- stripe_private_t *priv = NULL;
- struct stripe_options *prev = NULL;
- struct stripe_options *trav = NULL;
-
- if (!this)
- goto out;
-
- priv = this->private;
- if (priv) {
- this->private = NULL;
- GF_FREE(priv->xl_array);
-
- trav = priv->pattern;
- while (trav) {
- prev = trav;
- trav = trav->next;
- GF_FREE(prev);
- }
- GF_FREE(priv->last_event);
- LOCK_DESTROY(&priv->lock);
- GF_FREE(priv);
- }
-
-out:
- return;
-}
-
-int32_t
-stripe_getxattr_unwind(call_frame_t *frame, int op_ret, int op_errno,
- dict_t *dict, dict_t *xdata)
-
-{
- STRIPE_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata);
- return 0;
-}
-
-int
-stripe_internal_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xattr,
- dict_t *xdata)
-{
- char size_key[256] = {
- 0,
- };
- char index_key[256] = {
- 0,
- };
- char count_key[256] = {
- 0,
- };
- char coalesce_key[256] = {
- 0,
- };
-
- VALIDATE_OR_GOTO(frame, out);
- VALIDATE_OR_GOTO(frame->local, out);
-
- if (!xattr || (op_ret == -1))
- goto out;
-
- sprintf(size_key, "trusted.%s.stripe-size", this->name);
- sprintf(count_key, "trusted.%s.stripe-count", this->name);
- sprintf(index_key, "trusted.%s.stripe-index", this->name);
- sprintf(coalesce_key, "trusted.%s.stripe-coalesce", this->name);
-
- dict_del(xattr, size_key);
- dict_del(xattr, count_key);
- dict_del(xattr, index_key);
- dict_del(xattr, coalesce_key);
-
-out:
- STRIPE_STACK_UNWIND(getxattr, frame, op_ret, op_errno, xattr, xdata);
-
- return 0;
-}
-
-int
-stripe_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
-{
- int call_cnt = 0;
- stripe_local_t *local = NULL;
-
- VALIDATE_OR_GOTO(frame, out);
- VALIDATE_OR_GOTO(frame->local, out);
-
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- call_cnt = --local->wind_count;
- }
- UNLOCK(&frame->lock);
-
- if (!xattr || (op_ret < 0))
- goto out;
-
- local->op_ret = 0;
-
- if (!local->xattr) {
- local->xattr = dict_ref(xattr);
- } else {
- stripe_aggregate_xattr(local->xattr, xattr);
- }
-
-out:
- if (!call_cnt) {
- STRIPE_STACK_UNWIND(getxattr, frame, (local ? local->op_ret : -1),
- op_errno, (local ? local->xattr : NULL), xdata);
- }
-
- return 0;
-}
-
-int32_t
-stripe_vgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict,
- dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- int32_t callcnt = 0;
- int32_t ret = -1;
- long cky = 0;
- void *xattr_val = NULL;
- void *xattr_serz = NULL;
- stripe_xattr_sort_t *xattr = NULL;
- dict_t *stripe_xattr = NULL;
-
- if (!frame || !frame->local || !this) {
- gf_log("", GF_LOG_ERROR, "Possible NULL deref");
- return ret;
- }
-
- local = frame->local;
- cky = (long)cookie;
-
- if (local->xsel[0] == '\0') {
- gf_log(this->name, GF_LOG_ERROR, "Empty xattr in cbk");
- return ret;
- }
-
- LOCK(&frame->lock);
- {
- callcnt = --local->wind_count;
-
- if (!dict || (op_ret < 0))
- goto out;
-
- if (!local->xattr_list)
- local->xattr_list = (stripe_xattr_sort_t *)GF_CALLOC(
- local->nallocs, sizeof(stripe_xattr_sort_t),
- gf_stripe_mt_xattr_sort_t);
-
- if (local->xattr_list) {
- xattr = local->xattr_list + (int32_t)cky;
-
- ret = dict_get_ptr_and_len(dict, local->xsel, &xattr_val,
- &xattr->xattr_len);
- if (xattr->xattr_len == 0)
- goto out;
-
- xattr->pos = cky;
- xattr->xattr_value = gf_memdup(xattr_val, xattr->xattr_len);
-
- if (xattr->xattr_value != NULL)
- local->xattr_total_len += xattr->xattr_len + 1;
- }
- }
-out:
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (!local->xattr_total_len)
- goto unwind;
-
- stripe_xattr = dict_new();
- if (!stripe_xattr)
- goto unwind;
-
- /* select filler based on ->xsel */
- if (XATTR_IS_PATHINFO(local->xsel))
- ret = stripe_fill_pathinfo_xattr(this, local, (char **)&xattr_serz);
- else if (XATTR_IS_LOCKINFO(local->xsel)) {
- ret = stripe_fill_lockinfo_xattr(this, local, &xattr_serz);
- } else {
- gf_log(this->name, GF_LOG_WARNING,
- "Unknown xattr in xattr request");
- goto unwind;
- }
-
- if (!ret) {
- ret = dict_set_dynptr(stripe_xattr, local->xsel, xattr_serz,
- local->xattr_total_len);
- if (ret)
- gf_log(this->name, GF_LOG_ERROR, "Can't set %s key in dict",
- local->xsel);
- }
-
- unwind:
- /*
- * Among other things, STRIPE_STACK_UNWIND will free "local"
- * for us. That means we can't dereference it afterward.
- * Fortunately, the actual result is in stripe_xattr now, so we
- * can simply clean up before unwinding.
- */
- ret = stripe_free_xattr_str(local);
- GF_FREE(local->xattr_list);
- local->xattr_list = NULL;
-
- STRIPE_STACK_UNWIND(getxattr, frame, op_ret, op_errno, stripe_xattr,
- NULL);
-
- if (stripe_xattr)
- dict_unref(stripe_xattr);
- }
-
- return ret;
-}
-
-int
-stripe_marker_populate_args(call_frame_t *frame, int type, int *gauge,
- xlator_t **subvols)
-{
- xlator_t *this = frame->this;
- stripe_private_t *priv = this->private;
- stripe_local_t *local = frame->local;
- int count = 0;
-
- count = priv->child_count;
- if (MARKER_XTIME_TYPE == type) {
- if (!IA_FILE_OR_DIR(local->loc.inode->ia_type))
- count = 1;
- }
- memcpy(subvols, priv->xl_array, sizeof(*subvols) * count);
-
- return count;
-}
-
-int32_t
-stripe_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- stripe_private_t *priv = NULL;
- int32_t op_errno = EINVAL;
- int i = 0;
- int ret = 0;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
- VALIDATE_OR_GOTO(loc->path, err);
- VALIDATE_OR_GOTO(loc->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- loc_copy(&local->loc, loc);
-
- if (name && strncmp(name, QUOTA_SIZE_KEY, SLEN(QUOTA_SIZE_KEY)) == 0) {
- local->wind_count = priv->child_count;
-
- for (i = 0, trav = this->children; i < priv->child_count;
- i++, trav = trav->next) {
- STACK_WIND(frame, stripe_getxattr_cbk, trav->xlator,
- trav->xlator->fops->getxattr, loc, name, xdata);
- }
-
- return 0;
- }
-
- if (name && (XATTR_IS_PATHINFO(name))) {
- if (IA_ISREG(loc->inode->ia_type)) {
- ret = inode_ctx_get(loc->inode, this, (uint64_t *)&local->fctx);
- if (ret)
- gf_log(this->name, GF_LOG_ERROR,
- "stripe size unavailable from fctx"
- " relying on pathinfo could lead to"
- " wrong results");
- }
-
- local->nallocs = local->wind_count = priv->child_count;
- (void)strncpy(local->xsel, name, strlen(name));
-
- /**
- * for xattrs that need info from all children, fill ->xsel
- * as above and call the filler function in cbk based on
- * it
- */
- for (i = 0, trav = this->children; i < priv->child_count;
- i++, trav = trav->next) {
- STACK_WIND_COOKIE(frame, stripe_vgetxattr_cbk, (void *)(long)i,
- trav->xlator, trav->xlator->fops->getxattr, loc,
- name, xdata);
- }
-
- return 0;
- }
-
- if (cluster_handle_marker_getxattr(frame, loc, name, priv->vol_uuid,
- stripe_getxattr_unwind,
- stripe_marker_populate_args) == 0)
- return 0;
-
- STACK_WIND(frame, stripe_internal_getxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
-
- return 0;
-
-err:
- STRIPE_STACK_UNWIND(getxattr, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-static gf_boolean_t
-stripe_is_special_xattr(const char *name)
-{
- gf_boolean_t is_spl = _gf_false;
-
- if (!name) {
- goto out;
- }
-
- if (!strncmp(name, GF_XATTR_LOCKINFO_KEY, SLEN(GF_XATTR_LOCKINFO_KEY)) ||
- XATTR_IS_PATHINFO(name))
- is_spl = _gf_true;
-out:
- return is_spl;
-}
-
-int32_t
-stripe_fgetxattr_from_everyone(call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- int32_t ret = -1, op_errno = 0;
- int i = 0;
- xlator_list_t *trav = NULL;
-
- priv = this->private;
-
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- local->op_ret = -1;
- frame->local = local;
-
- strncpy(local->xsel, name, strlen(name));
- local->nallocs = local->wind_count = priv->child_count;
-
- for (i = 0, trav = this->children; i < priv->child_count;
- i++, trav = trav->next) {
- STACK_WIND_COOKIE(frame, stripe_vgetxattr_cbk, (void *)(long)i,
- trav->xlator, trav->xlator->fops->fgetxattr, fd, name,
- xdata);
- }
-
- return 0;
-
-err:
- STACK_UNWIND_STRICT(fgetxattr, frame, -1, op_errno, NULL, NULL);
- return ret;
-}
-
-int32_t
-stripe_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name, dict_t *xdata)
-{
- if (stripe_is_special_xattr(name)) {
- stripe_fgetxattr_from_everyone(frame, this, fd, name, xdata);
- goto out;
- }
-
- STACK_WIND(frame, stripe_internal_getxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
-
-out:
- return 0;
-}
-
-int32_t
-stripe_priv_dump(xlator_t *this)
-{
- char key[GF_DUMP_MAX_BUF_LEN];
- int i = 0;
- stripe_private_t *priv = NULL;
- int ret = -1;
- struct stripe_options *options = NULL;
-
- GF_VALIDATE_OR_GOTO("stripe", this, out);
-
- priv = this->private;
- if (!priv)
- goto out;
-
- ret = TRY_LOCK(&priv->lock);
- if (ret != 0)
- goto out;
-
- gf_proc_dump_add_section("xlator.cluster.stripe.%s.priv", this->name);
- gf_proc_dump_write("child_count", "%d", priv->child_count);
-
- for (i = 0; i < priv->child_count; i++) {
- sprintf(key, "subvolumes[%d]", i);
- gf_proc_dump_write(key, "%s.%s", priv->xl_array[i]->type,
- priv->xl_array[i]->name);
- }
-
- options = priv->pattern;
- while (options != NULL) {
- gf_proc_dump_write("path_pattern", "%s", priv->pattern->path_pattern);
- gf_proc_dump_write("options_block_size", "%ul", options->block_size);
-
- options = options->next;
- }
-
- gf_proc_dump_write("block_size", "%ul", priv->block_size);
- gf_proc_dump_write("nodes-down", "%d", priv->nodes_down);
- gf_proc_dump_write("first-child_down", "%d", priv->first_child_down);
- gf_proc_dump_write("xattr_supported", "%d", priv->xattr_supported);
-
- UNLOCK(&priv->lock);
-
-out:
- return ret;
-}
-
-struct xlator_fops fops = {
- .stat = stripe_stat,
- .unlink = stripe_unlink,
- .rename = stripe_rename,
- .link = stripe_link,
- .truncate = stripe_truncate,
- .create = stripe_create,
- .open = stripe_open,
- .readv = stripe_readv,
- .writev = stripe_writev,
- .statfs = stripe_statfs,
- .flush = stripe_flush,
- .fsync = stripe_fsync,
- .ftruncate = stripe_ftruncate,
- .fstat = stripe_fstat,
- .mkdir = stripe_mkdir,
- .rmdir = stripe_rmdir,
- .lk = stripe_lk,
- .opendir = stripe_opendir,
- .fsyncdir = stripe_fsyncdir,
- .setattr = stripe_setattr,
- .fsetattr = stripe_fsetattr,
- .lookup = stripe_lookup,
- .mknod = stripe_mknod,
- .setxattr = stripe_setxattr,
- .fsetxattr = stripe_fsetxattr,
- .getxattr = stripe_getxattr,
- .fgetxattr = stripe_fgetxattr,
- .removexattr = stripe_removexattr,
- .fremovexattr = stripe_fremovexattr,
- .readdirp = stripe_readdirp,
- .fallocate = stripe_fallocate,
- .discard = stripe_discard,
- .zerofill = stripe_zerofill,
- .seek = stripe_seek,
-};
-
-struct xlator_cbks cbks = {
- .release = stripe_release,
- .forget = stripe_forget,
-};
-
-struct xlator_dumpops dumpops = {
- .priv = stripe_priv_dump,
-};
-
-struct volume_options options[] = {
- {
- .key = {"block-size"},
- .type = GF_OPTION_TYPE_SIZE_LIST,
- .default_value = "128KB",
- .min = STRIPE_MIN_BLOCK_SIZE,
- .description = "Size of the stripe unit that would be read "
- "from or written to the striped servers.",
- .op_version = {1},
- .tags = {"stripe"},
- .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE,
- },
- {
- .key = {"use-xattr"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "true",
- .description = "handle the stripe without the xattr",
- .tags = {"stripe", "dev-only"},
- .flags = OPT_FLAG_CLIENT_OPT,
- },
- {
- .key = {"coalesce"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "true",
- .description = "Enable/Disable coalesce mode to flatten striped "
- "files as stored on the server (i.e., eliminate holes "
- "caused by the traditional format).",
- .op_version = {1},
- .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE,
- .tags = {"stripe"},
- },
- {.key = {NULL}},
-};
diff --git a/xlators/cluster/stripe/src/stripe.h b/xlators/cluster/stripe/src/stripe.h
deleted file mode 100644
index 88c24b6..0000000
--- a/xlators/cluster/stripe/src/stripe.h
+++ /dev/null
@@ -1,291 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _STRIPE_H_
-#define _STRIPE_H_
-
-#include <glusterfs/xlator.h>
-#include <glusterfs/logging.h>
-#include <glusterfs/defaults.h>
-#include <glusterfs/common-utils.h>
-#include <glusterfs/compat.h>
-#include <glusterfs/compat-errno.h>
-#include "stripe-mem-types.h"
-#include "libxlator.h"
-#include <fnmatch.h>
-#include <signal.h>
-
-#define STRIPE_PATHINFO_HEADER "STRIPE:"
-#define STRIPE_MIN_BLOCK_SIZE (16 * GF_UNIT_KB)
-
-#define STRIPE_STACK_UNWIND(fop, frame, params...) \
- do { \
- stripe_local_t *__local = NULL; \
- if (frame) { \
- __local = frame->local; \
- frame->local = NULL; \
- } \
- STACK_UNWIND_STRICT(fop, frame, params); \
- if (__local) { \
- stripe_local_wipe(__local); \
- mem_put(__local); \
- } \
- } while (0)
-
-#define STRIPE_STACK_DESTROY(frame) \
- do { \
- stripe_local_t *__local = NULL; \
- __local = frame->local; \
- frame->local = NULL; \
- STACK_DESTROY(frame->root); \
- if (__local) { \
- stripe_local_wipe(__local); \
- mem_put(__local); \
- } \
- } while (0)
-
-#define STRIPE_VALIDATE_FCTX(fctx, label) \
- do { \
- int idx = 0; \
- if (!fctx) { \
- op_errno = EINVAL; \
- goto label; \
- } \
- for (idx = 0; idx < fctx->stripe_count; idx++) { \
- if (!fctx->xl_array[idx]) { \
- gf_log(this->name, GF_LOG_ERROR, "fctx->xl_array[%d] is NULL", \
- idx); \
- op_errno = ESTALE; \
- goto label; \
- } \
- } \
- } while (0)
-
-typedef struct stripe_xattr_sort {
- int pos;
- int xattr_len;
- char *xattr_value;
-} stripe_xattr_sort_t;
-
-/**
- * struct stripe_options : This keeps the pattern and the block-size
- * information, which is used for striping on a file.
- */
-struct stripe_options {
- struct stripe_options *next;
- char path_pattern[256];
- uint64_t block_size;
-};
-
-/**
- * Private structure for stripe translator
- */
-struct stripe_private {
- struct stripe_options *pattern;
- xlator_t **xl_array;
- uint64_t block_size;
- gf_lock_t lock;
- uint8_t nodes_down;
- int8_t first_child_down;
- int *last_event;
- int8_t child_count;
- gf_boolean_t xattr_supported; /* default yes */
- gf_boolean_t coalesce;
- char vol_uuid[UUID_SIZE + 1];
-};
-
-/**
- * Used to keep info about the replies received from readv/writev calls
- */
-struct stripe_replies {
- struct iovec *vector;
- int32_t count; // count of vector
- int32_t op_ret; // op_ret of readv
- int32_t op_errno;
- int32_t requested_size;
- struct iatt stbuf; /* 'stbuf' is also a part of reply */
-};
-
-typedef struct _stripe_fd_ctx {
- off_t stripe_size;
- int stripe_count;
- int stripe_coalesce;
- int static_array;
- xlator_t **xl_array;
-} stripe_fd_ctx_t;
-
-/**
- * Local structure to be passed with all the frames in case of STACK_WIND
- */
-struct stripe_local; /* this itself is used inside the structure; */
-
-struct stripe_local {
- struct stripe_local *next;
- call_frame_t *orig_frame;
-
- stripe_fd_ctx_t *fctx;
-
- /* Used by _cbk functions */
- struct iatt stbuf;
- struct iatt pre_buf;
- struct iatt post_buf;
- struct iatt preparent;
- struct iatt postparent;
-
- off_t stbuf_size;
- off_t prebuf_size;
- off_t postbuf_size;
- off_t preparent_size;
- off_t postparent_size;
-
- blkcnt_t stbuf_blocks;
- blkcnt_t prebuf_blocks;
- blkcnt_t postbuf_blocks;
- blkcnt_t preparent_blocks;
- blkcnt_t postparent_blocks;
-
- struct stripe_replies *replies;
- struct statvfs statvfs_buf;
- dir_entry_t *entry;
-
- int8_t revalidate;
- int8_t failed;
- int8_t unwind;
-
- size_t readv_size;
- int32_t entry_count;
- int32_t node_index;
- int32_t call_count;
- int32_t wind_count; /* used instead of child_cound
- in case of read and write */
- int32_t op_ret;
- int32_t op_errno;
- int32_t count;
- int32_t flags;
- char *name;
- inode_t *inode;
-
- loc_t loc;
- loc_t loc2;
-
- mode_t mode;
- dev_t rdev;
- /* For File I/O fops */
- dict_t *xdata;
-
- stripe_xattr_sort_t *xattr_list;
- int32_t xattr_total_len;
- int32_t nallocs;
- char xsel[256];
-
- /* General usage */
- off_t offset;
- off_t stripe_size;
-
- int xattr_self_heal_needed;
- int entry_self_heal_needed;
-
- int8_t *list;
- struct gf_flock lock;
- fd_t *fd;
- void *value;
- struct iobref *iobref;
- gf_dirent_t entries;
- gf_dirent_t *dirent;
- dict_t *xattr;
- uuid_t ia_gfid;
-
- int xflag;
- mode_t umask;
-};
-
-typedef struct stripe_local stripe_local_t;
-typedef struct stripe_private stripe_private_t;
-
-/*
- * Determine the stripe index of a particular frame based on the translator.
- */
-static inline int32_t
-stripe_get_frame_index(stripe_fd_ctx_t *fctx, call_frame_t *prev)
-{
- int32_t i, idx = -1;
-
- for (i = 0; i < fctx->stripe_count; i++) {
- if (fctx->xl_array[i] == prev->this) {
- idx = i;
- break;
- }
- }
-
- return idx;
-}
-
-static inline void
-stripe_copy_xl_array(xlator_t **dst, xlator_t **src, int count)
-{
- int i;
-
- for (i = 0; i < count; i++)
- dst[i] = src[i];
-}
-
-void
-stripe_local_wipe(stripe_local_t *local);
-int32_t
-stripe_ctx_handle(xlator_t *this, call_frame_t *prev, stripe_local_t *local,
- dict_t *dict);
-void
-stripe_aggregate_xattr(dict_t *dst, dict_t *src);
-int32_t
-stripe_xattr_request_build(xlator_t *this, dict_t *dict, uint64_t stripe_size,
- uint32_t stripe_count, uint32_t stripe_index,
- uint32_t stripe_coalesce);
-int32_t
-stripe_get_matching_bs(const char *path, stripe_private_t *priv);
-int
-set_stripe_block_size(xlator_t *this, stripe_private_t *priv, char *data);
-int32_t
-stripe_iatt_merge(struct iatt *from, struct iatt *to);
-int32_t
-stripe_fill_pathinfo_xattr(xlator_t *this, stripe_local_t *local,
- char **xattr_serz);
-int32_t
-stripe_free_xattr_str(stripe_local_t *local);
-int32_t
-stripe_xattr_aggregate(char *buffer, stripe_local_t *local, int32_t *total);
-off_t
-coalesced_offset(off_t offset, uint64_t stripe_size, int stripe_count);
-off_t
-uncoalesced_size(off_t size, uint64_t stripe_size, int stripe_count,
- int stripe_index);
-int32_t
-stripe_fill_lockinfo_xattr(xlator_t *this, stripe_local_t *local,
- void **xattr_serz);
-
-/*
- * Adjust the size attribute for files if coalesce is enabled.
- */
-static inline void
-correct_file_size(struct iatt *buf, stripe_fd_ctx_t *fctx, call_frame_t *prev)
-{
- int index;
-
- if (!IA_ISREG(buf->ia_type))
- return;
-
- if (!fctx || !fctx->stripe_coalesce)
- return;
-
- index = stripe_get_frame_index(fctx, prev);
- buf->ia_size = uncoalesced_size(buf->ia_size, fctx->stripe_size,
- fctx->stripe_count, index);
-}
-
-#endif /* _STRIPE_H_ */
diff --git a/xlators/encryption/Makefile.am b/xlators/encryption/Makefile.am
deleted file mode 100644
index 36efc66..0000000
--- a/xlators/encryption/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = rot-13 crypt
-
-CLEANFILES =
diff --git a/xlators/encryption/crypt/Makefile.am b/xlators/encryption/crypt/Makefile.am
deleted file mode 100644
index d471a3f..0000000
--- a/xlators/encryption/crypt/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = src
-
-CLEANFILES =
diff --git a/xlators/encryption/crypt/src/Makefile.am b/xlators/encryption/crypt/src/Makefile.am
deleted file mode 100644
index 05fd3d5..0000000
--- a/xlators/encryption/crypt/src/Makefile.am
+++ /dev/null
@@ -1,26 +0,0 @@
-if ENABLE_CRYPT_XLATOR
-
-xlator_LTLIBRARIES = crypt.la
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/encryption
-
-crypt_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
-
-crypt_la_SOURCES = keys.c data.c metadata.c atom.c crypt.c
-crypt_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
- -lssl -lcrypto
-
-noinst_HEADERS = crypt-common.h crypt-mem-types.h crypt.h metadata.h
-
-AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
- -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
-
-AM_CFLAGS = -Wall $(GF_CFLAGS)
-
-CLEANFILES =
-
-else
-
-noinst_DIST = keys.c data.c metadata.c atom.c crypt.c
-noinst_HEADERS = crypt-common.h crypt-mem-types.h crypt.h metadata.h
-
-endif
diff --git a/xlators/encryption/crypt/src/atom.c b/xlators/encryption/crypt/src/atom.c
deleted file mode 100644
index bdc37c5..0000000
--- a/xlators/encryption/crypt/src/atom.c
+++ /dev/null
@@ -1,861 +0,0 @@
-/*
- Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#include <glusterfs/defaults.h>
-#include "crypt-common.h"
-#include "crypt.h"
-
-/*
- * Glossary
- *
- *
- * cblock (or cipher block). A logical unit in a file.
- * cblock size is defined as the number of bits
- * in an input (or output) block of the block
- * cipher (*). Cipher block size is a property of
- * cipher algorithm. E.g. cblock size is 64 bits
- * for DES, 128 bits for AES, etc.
- *
- * atomic cipher A cipher algorithm, which requires some chunks of
- * algorithm text to be padded at left and(or) right sides before
- * cipher transaform.
- *
- *
- * block (atom) Minimal chunk of file's data, which doesn't require
- * padding. We'll consider logical units in a file of
- * block size (atom size).
- *
- * cipher algorithm Atomic cipher algorithm, which requires the last
- * with EOF issue incomplete cblock in a file to be padded with some
- * data (usually zeros).
- *
- *
- * operation, which reading/writing from offset, which is not aligned to
- * forms a gap at to atom size
- * the beginning
- *
- *
- * operation, which reading/writing count bytes starting from offset off,
- * forms a gap at so that off+count is not aligned to atom_size
- * the end
- *
- * head block the first atom affected by an operation, which forms
- * a gap at the beginning, or(and) at the end.
- * –°omment. Head block has at least one gap (either at
- * the beginning, or at the end)
- *
- *
- * tail block the last atom different from head, affected by an
- * operation, which forms a gap at the end.
- * –°omment: Tail block has exactly one gap (at the end).
- *
- *
- * partial block head or tail block
- *
- *
- * full block block without gaps.
- *
- *
- * (*) Recommendation for Block Cipher Modes of Operation
- * Methods and Techniques
- * NIST Special Publication 800-38A Edition 2001
- */
-
-/*
- * atom->offset_at()
- */
-static off_t
-offset_at_head(struct avec_config *conf)
-{
- return conf->aligned_offset;
-}
-
-static off_t
-offset_at_hole_head(call_frame_t *frame, struct object_cipher_info *object)
-{
- return offset_at_head(get_hole_conf(frame));
-}
-
-static off_t
-offset_at_data_head(call_frame_t *frame, struct object_cipher_info *object)
-{
- return offset_at_head(get_data_conf(frame));
-}
-
-static off_t
-offset_at_tail(struct avec_config *conf, struct object_cipher_info *object)
-{
- return conf->aligned_offset +
- (conf->off_in_head ? get_atom_size(object) : 0) +
- (conf->nr_full_blocks << get_atom_bits(object));
-}
-
-static off_t
-offset_at_hole_tail(call_frame_t *frame, struct object_cipher_info *object)
-{
- return offset_at_tail(get_hole_conf(frame), object);
-}
-
-static off_t
-offset_at_data_tail(call_frame_t *frame, struct object_cipher_info *object)
-{
- return offset_at_tail(get_data_conf(frame), object);
-}
-
-static off_t
-offset_at_full(struct avec_config *conf, struct object_cipher_info *object)
-{
- return conf->aligned_offset +
- (conf->off_in_head ? get_atom_size(object) : 0);
-}
-
-static off_t
-offset_at_data_full(call_frame_t *frame, struct object_cipher_info *object)
-{
- return offset_at_full(get_data_conf(frame), object);
-}
-
-static off_t
-offset_at_hole_full(call_frame_t *frame, struct object_cipher_info *object)
-{
- return offset_at_full(get_hole_conf(frame), object);
-}
-
-/*
- * atom->io_size_nopad()
- */
-
-static uint32_t
-io_size_nopad_head(struct avec_config *conf, struct object_cipher_info *object)
-{
- uint32_t gap_at_beg;
- uint32_t gap_at_end;
-
- check_head_block(conf);
-
- gap_at_beg = conf->off_in_head;
-
- if (has_tail_block(conf) || has_full_blocks(conf) || conf->off_in_tail == 0)
- gap_at_end = 0;
- else
- gap_at_end = get_atom_size(object) - conf->off_in_tail;
-
- return get_atom_size(object) - (gap_at_beg + gap_at_end);
-}
-
-static uint32_t
-io_size_nopad_tail(struct avec_config *conf, struct object_cipher_info *object)
-{
- check_tail_block(conf);
- return conf->off_in_tail;
-}
-
-static uint32_t
-io_size_nopad_full(struct avec_config *conf, struct object_cipher_info *object)
-{
- check_full_block(conf);
- return get_atom_size(object);
-}
-
-static uint32_t
-io_size_nopad_data_head(call_frame_t *frame, struct object_cipher_info *object)
-{
- return io_size_nopad_head(get_data_conf(frame), object);
-}
-
-static uint32_t
-io_size_nopad_hole_head(call_frame_t *frame, struct object_cipher_info *object)
-{
- return io_size_nopad_head(get_hole_conf(frame), object);
-}
-
-static uint32_t
-io_size_nopad_data_tail(call_frame_t *frame, struct object_cipher_info *object)
-{
- return io_size_nopad_tail(get_data_conf(frame), object);
-}
-
-static uint32_t
-io_size_nopad_hole_tail(call_frame_t *frame, struct object_cipher_info *object)
-{
- return io_size_nopad_tail(get_hole_conf(frame), object);
-}
-
-static uint32_t
-io_size_nopad_data_full(call_frame_t *frame, struct object_cipher_info *object)
-{
- return io_size_nopad_full(get_data_conf(frame), object);
-}
-
-static uint32_t
-io_size_nopad_hole_full(call_frame_t *frame, struct object_cipher_info *object)
-{
- return io_size_nopad_full(get_hole_conf(frame), object);
-}
-
-static uint32_t
-offset_in_head(struct avec_config *conf)
-{
- check_cursor_head(conf);
-
- return conf->off_in_head;
-}
-
-static uint32_t
-offset_in_tail(call_frame_t *frame, struct object_cipher_info *object)
-{
- return 0;
-}
-
-static uint32_t
-offset_in_full(struct avec_config *conf, struct object_cipher_info *object)
-{
- check_cursor_full(conf);
-
- if (has_head_block(conf))
- return (conf->cursor - 1) << get_atom_bits(object);
- else
- return conf->cursor << get_atom_bits(object);
-}
-
-static uint32_t
-offset_in_data_head(call_frame_t *frame, struct object_cipher_info *object)
-{
- return offset_in_head(get_data_conf(frame));
-}
-
-static uint32_t
-offset_in_hole_head(call_frame_t *frame, struct object_cipher_info *object)
-{
- return offset_in_head(get_hole_conf(frame));
-}
-
-static uint32_t
-offset_in_data_full(call_frame_t *frame, struct object_cipher_info *object)
-{
- return offset_in_full(get_data_conf(frame), object);
-}
-
-static uint32_t
-offset_in_hole_full(call_frame_t *frame, struct object_cipher_info *object)
-{
- return offset_in_full(get_hole_conf(frame), object);
-}
-
-/*
- * atom->rmw()
- */
-/*
- * Pre-conditions:
- * @vec contains plain text of the latest
- * version.
- *
- * Uptodate gaps of the @partial block with
- * this plain text, encrypt the whole block
- * and write the result to disk.
- */
-static int32_t
-rmw_partial_block(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iovec *vec,
- int32_t count, struct iatt *stbuf, struct iobref *iobref,
- struct rmw_atom *atom)
-{
- size_t was_read = 0;
- uint64_t file_size;
- crypt_local_t *local = frame->local;
- struct object_cipher_info *object = &local->info->cinfo;
-
- struct iovec *partial = atom->get_iovec(frame, 0);
- struct avec_config *conf = atom->get_config(frame);
- end_writeback_handler_t end_writeback_partial_block;
-#if DEBUG_CRYPT
- gf_boolean_t check_last_cblock = _gf_false;
-#endif
- local->op_ret = op_ret;
- local->op_errno = op_errno;
-
- if (op_ret < 0)
- goto exit;
-
- file_size = local->cur_file_size;
- was_read = op_ret;
-
- if (atom->locality == HEAD_ATOM && conf->off_in_head) {
- /*
- * head atom with a non-uptodate gap
- * at the beginning
- *
- * fill the gap with plain text of the
- * latest version. Convert a part of hole
- * (if any) to zeros.
- */
- int32_t i;
- int32_t copied = 0;
- int32_t to_gap; /* amount of data needed to uptodate
- the gap at the beginning */
-#if 0
- int32_t hole = 0; /* The part of the hole which
- * got in the head block */
-#endif /* 0 */
- to_gap = conf->off_in_head;
-
- if (was_read < to_gap) {
- if (file_size > offset_at_head(conf) + was_read) {
- /*
- * It is impossible to uptodate
- * head block: too few bytes have
- * been read from disk, so that
- * partial write is impossible.
- *
- * It could happen because of many
- * reasons: IO errors, (meta)data
- * corruption in the local file system,
- * etc.
- */
- gf_log(this->name, GF_LOG_WARNING,
- "Can not uptodate a gap at the beginning");
- local->op_ret = -1;
- local->op_errno = EIO;
- goto exit;
- }
-#if 0
- hole = to_gap - was_read;
-#endif /* 0 */
- to_gap = was_read;
- }
- /*
- * uptodate the gap at the beginning
- */
- for (i = 0; i < count && copied < to_gap; i++) {
- int32_t to_copy;
-
- to_copy = vec[i].iov_len;
- if (to_copy > to_gap - copied)
- to_copy = to_gap - copied;
-
- memcpy(partial->iov_base, vec[i].iov_base, to_copy);
- copied += to_copy;
- }
-#if 0
- /*
- * If possible, convert part of the
- * hole, which got in the head block
- */
- ret = TRY_LOCK(&local->hole_lock);
- if (!ret) {
- if (local->hole_handled)
- /*
- * already converted by
- * crypt_writev_cbk()
- */
- UNLOCK(&local->hole_lock);
- else {
- /*
- * convert the part of the hole
- * which got in the head block
- * to zeros.
- *
- * Update the orig_offset to make
- * sure writev_cbk() won't care
- * about this part of the hole.
- *
- */
- memset(partial->iov_base + to_gap, 0, hole);
-
- conf->orig_offset -= hole;
- conf->orig_size += hole;
- UNLOCK(&local->hole_lock);
- }
- }
- else /*
- * conversion is being performed
- * by crypt_writev_cbk()
- */
- ;
-#endif /* 0 */
- }
- if (atom->locality == TAIL_ATOM ||
- (!has_tail_block(conf) && conf->off_in_tail)) {
- /*
- * tail atom, or head atom with a non-uptodate
- * gap at the end.
- *
- * fill the gap at the end of the block
- * with plain text of the latest version.
- * Pad the result, (if needed)
- */
- int32_t i;
- int32_t to_gap;
- int copied;
- off_t off_in_tail;
- int32_t to_copy;
-
- off_in_tail = conf->off_in_tail;
- to_gap = conf->gap_in_tail;
-
- if (to_gap && was_read < off_in_tail + to_gap) {
- /*
- * It is impossible to uptodate
- * the gap at the end: too few bytes
- * have been read from disk, so that
- * partial write is impossible.
- *
- * It could happen because of many
- * reasons: IO errors, (meta)data
- * corruption in the local file system,
- * etc.
- */
- gf_log(this->name, GF_LOG_WARNING,
- "Can not uptodate a gap at the end");
- local->op_ret = -1;
- local->op_errno = EIO;
- goto exit;
- }
- /*
- * uptodate the gap at the end
- */
- copied = 0;
- to_copy = to_gap;
- for (i = count - 1; i >= 0 && to_copy > 0; i--) {
- uint32_t from_vec, off_in_vec;
-
- off_in_vec = 0;
- from_vec = vec[i].iov_len;
- if (from_vec > to_copy) {
- off_in_vec = from_vec - to_copy;
- from_vec = to_copy;
- }
- memcpy(partial->iov_base + off_in_tail + to_gap - copied - from_vec,
- vec[i].iov_base + off_in_vec, from_vec);
-
- gf_log(
- this->name, GF_LOG_DEBUG,
- "uptodate %d bytes at tail. Offset at target(source): %d(%d)",
- (int)from_vec, (int)off_in_tail + to_gap - copied - from_vec,
- (int)off_in_vec);
-
- copied += from_vec;
- to_copy -= from_vec;
- }
- partial->iov_len = off_in_tail + to_gap;
-
- if (object_alg_should_pad(object)) {
- int32_t resid = 0;
- resid = partial->iov_len & (object_alg_blksize(object) - 1);
- if (resid) {
- /*
- * append a new EOF padding
- */
- local->eof_padding_size = object_alg_blksize(object) - resid;
-
- gf_log(this->name, GF_LOG_DEBUG, "set padding size %d",
- local->eof_padding_size);
-
- memset(partial->iov_base + partial->iov_len, 1,
- local->eof_padding_size);
- partial->iov_len += local->eof_padding_size;
-#if DEBUG_CRYPT
- gf_log(this->name, GF_LOG_DEBUG,
- "pad cblock with %d zeros:", local->eof_padding_size);
- dump_cblock(this, (unsigned char *)partial->iov_base +
- partial->iov_len -
- object_alg_blksize(object));
- check_last_cblock = _gf_true;
-#endif
- }
- }
- }
- /*
- * encrypt the whole block
- */
- encrypt_aligned_iov(object, partial, 1, atom->offset_at(frame, object));
-#if DEBUG_CRYPT
- if (check_last_cblock == _gf_true) {
- gf_log(this->name, GF_LOG_DEBUG, "encrypt last cblock with offset %llu",
- (unsigned long long)atom->offset_at(frame, object));
- dump_cblock(this, (unsigned char *)partial->iov_base +
- partial->iov_len - object_alg_blksize(object));
- }
-#endif
- set_local_io_params_writev(frame, object, atom,
- atom->offset_at(frame, object),
- iov_length(partial, 1));
- /*
- * write the whole block to disk
- */
- end_writeback_partial_block = dispatch_end_writeback(local->fop);
- conf->cursor++;
- STACK_WIND(frame, end_writeback_partial_block, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev, local->fd, partial, 1,
- atom->offset_at(frame, object), local->flags, local->iobref_data,
- local->xdata);
-
- gf_log("crypt", GF_LOG_DEBUG,
- "submit partial block: %d bytes from %d offset",
- (int)iov_length(partial, 1), (int)atom->offset_at(frame, object));
-exit:
- return 0;
-}
-
-/*
- * Perform a (read-)modify-write sequence.
- * This should be performed only after approval
- * of upper server-side manager, i.e. the caller
- * needs to make sure this is his turn to rmw.
- */
-void
-submit_partial(call_frame_t *frame, xlator_t *this, fd_t *fd,
- atom_locality_type ltype)
-{
- int32_t ret;
- dict_t *dict;
- struct rmw_atom *atom;
- crypt_local_t *local = frame->local;
- struct object_cipher_info *object = &local->info->cinfo;
-
- atom = atom_by_types(local->active_setup, ltype);
- /*
- * To perform the "read" component of the read-modify-write
- * sequence the crypt translator does stack_wind to itself.
- *
- * Pass current file size to crypt_readv()
- */
- dict = dict_new();
- if (!dict) {
- /*
- * FIXME: Handle the error
- */
- gf_log("crypt", GF_LOG_WARNING, "Can not alloc dict");
- return;
- }
- ret = dict_set(dict, FSIZE_XATTR_PREFIX,
- data_from_uint64(local->cur_file_size));
- if (ret) {
- /*
- * FIXME: Handle the error
- */
- dict_unref(dict);
- gf_log("crypt", GF_LOG_WARNING, "Can not set dict");
- goto exit;
- }
- STACK_WIND(frame, atom->rmw, this, this->fops->readv, /* crypt_readv */
- fd, atom->count_to_uptodate(frame, object), /* count */
- atom->offset_at(frame, object), /* offset to read from */
- 0, dict);
-exit:
- dict_unref(dict);
-}
-
-/*
- * submit blocks of FULL_ATOM type
- */
-void
-submit_full(call_frame_t *frame, xlator_t *this)
-{
- crypt_local_t *local = frame->local;
- struct object_cipher_info *object = &local->info->cinfo;
- struct rmw_atom *atom = atom_by_types(local->active_setup, FULL_ATOM);
- uint32_t count; /* total number of full blocks to submit */
- uint32_t granularity; /* number of blocks to submit in one iteration */
-
- uint64_t off_in_file; /* start offset in the file, bytes */
- uint32_t off_in_atom; /* start offset in the atom, blocks */
- uint32_t blocks_written = 0; /* blocks written for this submit */
-
- struct avec_config *conf = atom->get_config(frame);
- end_writeback_handler_t end_writeback_full_block;
- /*
- * Write full blocks by groups of granularity size.
- */
- end_writeback_full_block = dispatch_end_writeback(local->fop);
-
- if (is_ordered_mode(frame)) {
- uint32_t skip = has_head_block(conf) ? 1 : 0;
- count = 1;
- granularity = 1;
- /*
- * calculate start offset using cursor value;
- * here we should take into account head block,
- * which corresponds to cursor value 0.
- */
- off_in_file = atom->offset_at(frame, object) +
- ((conf->cursor - skip) << get_atom_bits(object));
- off_in_atom = conf->cursor - skip;
- } else {
- /*
- * in parallel mode
- */
- count = conf->nr_full_blocks;
- granularity = MAX_IOVEC;
- off_in_file = atom->offset_at(frame, object);
- off_in_atom = 0;
- }
- while (count) {
- uint32_t blocks_to_write = count;
-
- if (blocks_to_write > granularity)
- blocks_to_write = granularity;
- if (conf->type == HOLE_ATOM)
- /*
- * reset iovec before encryption
- */
- memset(atom->get_iovec(frame, 0)->iov_base, 0,
- get_atom_size(object));
- /*
- * encrypt the group
- */
- encrypt_aligned_iov(
- object, atom->get_iovec(frame, off_in_atom + blocks_written),
- blocks_to_write,
- off_in_file + (blocks_written << get_atom_bits(object)));
-
- set_local_io_params_writev(
- frame, object, atom,
- off_in_file + (blocks_written << get_atom_bits(object)),
- blocks_to_write << get_atom_bits(object));
-
- conf->cursor += blocks_to_write;
-
- STACK_WIND(frame, end_writeback_full_block, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev, local->fd,
- atom->get_iovec(frame, off_in_atom + blocks_written),
- blocks_to_write,
- off_in_file + (blocks_written << get_atom_bits(object)),
- local->flags,
- local->iobref_data ? local->iobref_data : local->iobref,
- local->xdata);
-
- gf_log("crypt", GF_LOG_DEBUG, "submit %d full blocks from %d offset",
- blocks_to_write,
- (int)(off_in_file + (blocks_written << get_atom_bits(object))));
-
- count -= blocks_to_write;
- blocks_written += blocks_to_write;
- }
- return;
-}
-
-static int32_t
-rmw_data_head(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iovec *vec, int32_t count,
- struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
-{
- return rmw_partial_block(frame, cookie, this, op_ret, op_errno, vec, count,
- stbuf, iobref,
- atom_by_types(DATA_ATOM, HEAD_ATOM));
-}
-
-static int32_t
-rmw_data_tail(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iovec *vec, int32_t count,
- struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
-{
- return rmw_partial_block(frame, cookie, this, op_ret, op_errno, vec, count,
- stbuf, iobref,
- atom_by_types(DATA_ATOM, TAIL_ATOM));
-}
-
-static int32_t
-rmw_hole_head(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iovec *vec, int32_t count,
- struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
-{
- return rmw_partial_block(frame, cookie, this, op_ret, op_errno, vec, count,
- stbuf, iobref,
- atom_by_types(HOLE_ATOM, HEAD_ATOM));
-}
-
-static int32_t
-rmw_hole_tail(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iovec *vec, int32_t count,
- struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
-{
- return rmw_partial_block(frame, cookie, this, op_ret, op_errno, vec, count,
- stbuf, iobref,
- atom_by_types(HOLE_ATOM, TAIL_ATOM));
-}
-
-/*
- * atom->count_to_uptodate()
- */
-static uint32_t
-count_to_uptodate_head(struct avec_config *conf,
- struct object_cipher_info *object)
-{
- if (conf->acount == 1 && conf->off_in_tail)
- return get_atom_size(object);
- else
- /* there is no need to read the whole head block */
- return conf->off_in_head;
-}
-
-static uint32_t
-count_to_uptodate_tail(struct avec_config *conf,
- struct object_cipher_info *object)
-{
- /* we need to read the whole tail block */
- return get_atom_size(object);
-}
-
-static uint32_t
-count_to_uptodate_data_head(call_frame_t *frame,
- struct object_cipher_info *object)
-{
- return count_to_uptodate_head(get_data_conf(frame), object);
-}
-
-static uint32_t
-count_to_uptodate_data_tail(call_frame_t *frame,
- struct object_cipher_info *object)
-{
- return count_to_uptodate_tail(get_data_conf(frame), object);
-}
-
-static uint32_t
-count_to_uptodate_hole_head(call_frame_t *frame,
- struct object_cipher_info *object)
-{
- return count_to_uptodate_head(get_hole_conf(frame), object);
-}
-
-static uint32_t
-count_to_uptodate_hole_tail(call_frame_t *frame,
- struct object_cipher_info *object)
-{
- return count_to_uptodate_tail(get_hole_conf(frame), object);
-}
-
-/* atom->get_config() */
-
-static struct avec_config *
-get_config_data(call_frame_t *frame)
-{
- return &((crypt_local_t *)frame->local)->data_conf;
-}
-
-static struct avec_config *
-get_config_hole(call_frame_t *frame)
-{
- return &((crypt_local_t *)frame->local)->hole_conf;
-}
-
-/*
- * atom->get_iovec()
- */
-static struct iovec *
-get_iovec_hole_head(call_frame_t *frame, uint32_t count)
-{
- struct avec_config *conf = get_hole_conf(frame);
-
- return conf->avec;
-}
-
-static struct iovec *
-get_iovec_hole_full(call_frame_t *frame, uint32_t count)
-{
- struct avec_config *conf = get_hole_conf(frame);
-
- return conf->avec + (conf->off_in_head ? 1 : 0);
-}
-
-static struct iovec *
-get_iovec_hole_tail(call_frame_t *frame, uint32_t count)
-{
- struct avec_config *conf = get_hole_conf(frame);
-
- return conf->avec + (conf->blocks_in_pool - 1);
-}
-
-static struct iovec *
-get_iovec_data_head(call_frame_t *frame, uint32_t count)
-{
- struct avec_config *conf = get_data_conf(frame);
-
- return conf->avec;
-}
-
-static struct iovec *
-get_iovec_data_full(call_frame_t *frame, uint32_t count)
-{
- struct avec_config *conf = get_data_conf(frame);
-
- return conf->avec + (conf->off_in_head ? 1 : 0) + count;
-}
-
-static struct iovec *
-get_iovec_data_tail(call_frame_t *frame, uint32_t count)
-{
- struct avec_config *conf = get_data_conf(frame);
-
- return conf->avec + (conf->off_in_head ? 1 : 0) + conf->nr_full_blocks;
-}
-
-static struct rmw_atom atoms[LAST_DATA_TYPE][LAST_LOCALITY_TYPE] = {
- [DATA_ATOM][HEAD_ATOM] = {.locality = HEAD_ATOM,
- .rmw = rmw_data_head,
- .offset_at = offset_at_data_head,
- .offset_in = offset_in_data_head,
- .get_iovec = get_iovec_data_head,
- .io_size_nopad = io_size_nopad_data_head,
- .count_to_uptodate = count_to_uptodate_data_head,
- .get_config = get_config_data},
- [DATA_ATOM][TAIL_ATOM] = {.locality = TAIL_ATOM,
- .rmw = rmw_data_tail,
- .offset_at = offset_at_data_tail,
- .offset_in = offset_in_tail,
- .get_iovec = get_iovec_data_tail,
- .io_size_nopad = io_size_nopad_data_tail,
- .count_to_uptodate = count_to_uptodate_data_tail,
- .get_config = get_config_data},
- [DATA_ATOM][FULL_ATOM] = {.locality = FULL_ATOM,
- .offset_at = offset_at_data_full,
- .offset_in = offset_in_data_full,
- .get_iovec = get_iovec_data_full,
- .io_size_nopad = io_size_nopad_data_full,
- .get_config = get_config_data},
- [HOLE_ATOM][HEAD_ATOM] = {.locality = HEAD_ATOM,
- .rmw = rmw_hole_head,
- .offset_at = offset_at_hole_head,
- .offset_in = offset_in_hole_head,
- .get_iovec = get_iovec_hole_head,
- .io_size_nopad = io_size_nopad_hole_head,
- .count_to_uptodate = count_to_uptodate_hole_head,
- .get_config = get_config_hole},
- [HOLE_ATOM][TAIL_ATOM] = {.locality = TAIL_ATOM,
- .rmw = rmw_hole_tail,
- .offset_at = offset_at_hole_tail,
- .offset_in = offset_in_tail,
- .get_iovec = get_iovec_hole_tail,
- .io_size_nopad = io_size_nopad_hole_tail,
- .count_to_uptodate = count_to_uptodate_hole_tail,
- .get_config = get_config_hole},
- [HOLE_ATOM][FULL_ATOM] = {.locality = FULL_ATOM,
- .offset_at = offset_at_hole_full,
- .offset_in = offset_in_hole_full,
- .get_iovec = get_iovec_hole_full,
- .io_size_nopad = io_size_nopad_hole_full,
- .get_config = get_config_hole}};
-
-struct rmw_atom *
-atom_by_types(atom_data_type data, atom_locality_type locality)
-{
- return &atoms[data][locality];
-}
-
-/*
- Local variables:
- c-indentation-style: "K&R"
- mode-name: "LC"
- c-basic-offset: 8
- tab-width: 8
- fill-column: 80
- scroll-step: 1
- End:
-*/
diff --git a/xlators/encryption/crypt/src/crypt-common.h b/xlators/encryption/crypt/src/crypt-common.h
deleted file mode 100644
index 123d5c2..0000000
--- a/xlators/encryption/crypt/src/crypt-common.h
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __CRYPT_COMMON_H__
-#define __CRYPT_COMMON_H__
-
-#define INVAL_SUBVERSION_NUMBER (0xff)
-#define CRYPT_INVAL_OP (GF_FOP_NULL)
-
-#define CRYPTO_FORMAT_PREFIX "trusted.glusterfs.crypt.att.cfmt"
-#define FSIZE_XATTR_PREFIX "trusted.glusterfs.crypt.att.size"
-#define SUBREQ_PREFIX "trusted.glusterfs.crypt.msg.sreq"
-#define FSIZE_MSG_PREFIX "trusted.glusterfs.crypt.msg.size"
-#define DE_MSG_PREFIX "trusted.glusterfs.crypt.msg.dent"
-#define REQUEST_ID_PREFIX "trusted.glusterfs.crypt.msg.rqid"
-#define MSGFLAGS_PREFIX "trusted.glusterfs.crypt.msg.xfgs"
-
-/* messages for crypt_open() */
-#define MSGFLAGS_REQUEST_MTD_RLOCK 1 /* take read lock and don't unlock */
-#define MSGFLAGS_REQUEST_MTD_WLOCK 2 /* take write lock and don't unlock */
-
-#define AES_BLOCK_BITS (4) /* AES_BLOCK_SIZE == 1 << AES_BLOCK_BITS */
-
-#define noop \
- do { \
- ; \
- } while (0)
-#define cassert(cond) \
- ({ \
- switch (-1) { \
- case (cond): \
- case 0: \
- break; \
- } \
- })
-#define __round_mask(x, y) ((__typeof__(x))((y)-1))
-#define round_up(x, y) ((((x)-1) | __round_mask(x, y)) + 1)
-
-/*
- * Format of file's metadata
- */
-struct crypt_format {
- uint8_t loader_id; /* version of metadata loader */
- uint8_t versioned[0]; /* file's metadata of specific version */
-} __attribute__((packed));
-
-typedef enum { AES_CIPHER_ALG, LAST_CIPHER_ALG } cipher_alg_t;
-
-typedef enum { XTS_CIPHER_MODE, LAST_CIPHER_MODE } cipher_mode_t;
-
-typedef enum { MTD_LOADER_V1, LAST_MTD_LOADER } mtd_loader_id;
-
-static inline void
-msgflags_set_mtd_rlock(uint32_t *flags)
-{
- *flags |= MSGFLAGS_REQUEST_MTD_RLOCK;
-}
-
-static inline void
-msgflags_set_mtd_wlock(uint32_t *flags)
-{
- *flags |= MSGFLAGS_REQUEST_MTD_WLOCK;
-}
-
-static inline gf_boolean_t
-msgflags_check_mtd_rlock(uint32_t *flags)
-{
- return *flags & MSGFLAGS_REQUEST_MTD_RLOCK;
-}
-
-static inline gf_boolean_t
-msgflags_check_mtd_wlock(uint32_t *flags)
-{
- return *flags & MSGFLAGS_REQUEST_MTD_WLOCK;
-}
-
-static inline gf_boolean_t
-msgflags_check_mtd_lock(uint32_t *flags)
-{
- return msgflags_check_mtd_rlock(flags) || msgflags_check_mtd_wlock(flags);
-}
-
-/*
- * returns number of logical blocks occupied
- * (maybe partially) by @count bytes
- * at offset @start.
- */
-static inline off_t
-logical_blocks_occupied(uint64_t start, off_t count, int blkbits)
-{
- return ((start + count - 1) >> blkbits) - (start >> blkbits) + 1;
-}
-
-/*
- * are two bytes (represented by offsets @off1
- * and @off2 respectively) in the same logical
- * block.
- */
-static inline int
-in_same_lblock(uint64_t off1, uint64_t off2, int blkbits)
-{
- return off1 >> blkbits == off2 >> blkbits;
-}
-
-static inline void
-dump_cblock(xlator_t *this, unsigned char *buf)
-{
- gf_log(this->name, GF_LOG_DEBUG,
- "dump cblock: %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x",
- (buf)[0], (buf)[1], (buf)[2], (buf)[3], (buf)[4], (buf)[5], (buf)[6],
- (buf)[7], (buf)[8], (buf)[9], (buf)[10], (buf)[11], (buf)[12],
- (buf)[13], (buf)[14], (buf)[15]);
-}
-
-#endif /* __CRYPT_COMMON_H__ */
-
-/*
- Local variables:
- c-indentation-style: "K&R"
- mode-name: "LC"
- c-basic-offset: 8
- tab-width: 8
- fill-column: 80
- scroll-step: 1
- End:
-*/
diff --git a/xlators/encryption/crypt/src/crypt-mem-types.h b/xlators/encryption/crypt/src/crypt-mem-types.h
deleted file mode 100644
index e756ea4..0000000
--- a/xlators/encryption/crypt/src/crypt-mem-types.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __CRYPT_MEM_TYPES_H__
-#define __CRYPT_MEM_TYPES_H__
-
-#include <glusterfs/mem-types.h>
-
-enum gf_crypt_mem_types_ {
- gf_crypt_mt_priv = gf_common_mt_end + 1,
- gf_crypt_mt_inode,
- gf_crypt_mt_data,
- gf_crypt_mt_mtd,
- gf_crypt_mt_loc,
- gf_crypt_mt_iatt,
- gf_crypt_mt_key,
- gf_crypt_mt_iovec,
- gf_crypt_mt_char,
- gf_crypt_mt_local,
- gf_crypt_mt_end,
-};
-
-#endif /* __CRYPT_MEM_TYPES_H__ */
-
-/*
- Local variables:
- c-indentation-style: "K&R"
- mode-name: "LC"
- c-basic-offset: 8
- tab-width: 8
- fill-column: 80
- scroll-step: 1
- End:
-*/
diff --git a/xlators/encryption/crypt/src/crypt.c b/xlators/encryption/crypt/src/crypt.c
deleted file mode 100644
index 0225364..0000000
--- a/xlators/encryption/crypt/src/crypt.c
+++ /dev/null
@@ -1,3906 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-#include <ctype.h>
-#include <sys/uio.h>
-
-#include <glusterfs/glusterfs.h>
-#include <glusterfs/xlator.h>
-#include <glusterfs/logging.h>
-#include <glusterfs/defaults.h>
-
-#include "crypt-common.h"
-#include "crypt.h"
-
-static void
-init_inode_info_head(struct crypt_inode_info *info, fd_t *fd);
-static int32_t
-init_inode_info_tail(struct crypt_inode_info *info,
- struct master_cipher_info *master);
-static int32_t
-prepare_for_submit_hole(call_frame_t *frame, xlator_t *this, uint64_t from,
- off_t size);
-static int32_t
-load_file_size(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata);
-static void
-do_ordered_submit(call_frame_t *frame, xlator_t *this, atom_data_type dtype);
-static void
-do_parallel_submit(call_frame_t *frame, xlator_t *this, atom_data_type dtype);
-static void
-put_one_call_open(call_frame_t *frame);
-static void
-put_one_call_readv(call_frame_t *frame, xlator_t *this);
-static void
-put_one_call_writev(call_frame_t *frame, xlator_t *this);
-static void
-put_one_call_ftruncate(call_frame_t *frame, xlator_t *this);
-static void
-free_avec(struct iovec *avec, char **pool, int blocks_in_pool);
-static void
-free_avec_data(crypt_local_t *local);
-static void
-free_avec_hole(crypt_local_t *local);
-
-static crypt_local_t *
-crypt_alloc_local(call_frame_t *frame, xlator_t *this, glusterfs_fop_t fop)
-{
- crypt_local_t *local = NULL;
-
- local = GF_CALLOC(1, sizeof(crypt_local_t), gf_crypt_mt_local);
- if (!local) {
- gf_log(this->name, GF_LOG_ERROR, "out of memory");
- return NULL;
- }
- local->fop = fop;
- LOCK_INIT(&local->hole_lock);
- LOCK_INIT(&local->call_lock);
- LOCK_INIT(&local->rw_count_lock);
-
- frame->local = local;
- return local;
-}
-
-struct crypt_inode_info *
-get_crypt_inode_info(inode_t *inode, xlator_t *this)
-{
- int ret;
- uint64_t value = 0;
- struct crypt_inode_info *info;
-
- ret = inode_ctx_get(inode, this, &value);
- if (ret == -1) {
- gf_log(this->name, GF_LOG_WARNING, "Can not get inode info");
- return NULL;
- }
- info = (struct crypt_inode_info *)(long)value;
- if (info == NULL) {
- gf_log(this->name, GF_LOG_WARNING, "Can not obtain inode info");
- return NULL;
- }
- return info;
-}
-
-static struct crypt_inode_info *
-local_get_inode_info(crypt_local_t *local, xlator_t *this)
-{
- if (local->info)
- return local->info;
- local->info = get_crypt_inode_info(local->fd->inode, this);
- return local->info;
-}
-
-static struct crypt_inode_info *
-alloc_inode_info(crypt_local_t *local, loc_t *loc)
-{
- struct crypt_inode_info *info;
-
- info = GF_CALLOC(1, sizeof(struct crypt_inode_info), gf_crypt_mt_inode);
- if (!info) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- gf_log("crypt", GF_LOG_WARNING, "Can not allocate inode info");
- return NULL;
- }
-#if DEBUG_CRYPT
- info->loc = GF_CALLOC(1, sizeof(loc_t), gf_crypt_mt_loc);
- if (!info->loc) {
- gf_log("crypt", GF_LOG_WARNING, "Can not allocate loc");
- GF_FREE(info);
- return NULL;
- }
- if (loc_copy(info->loc, loc)) {
- GF_FREE(info->loc);
- GF_FREE(info);
- return NULL;
- }
-#endif /* DEBUG_CRYPT */
-
- local->info = info;
- return info;
-}
-
-static void
-free_inode_info(struct crypt_inode_info *info)
-{
-#if DEBUG_CRYPT
- loc_wipe(info->loc);
- GF_FREE(info->loc);
-#endif
- memset(info, 0, sizeof(*info));
- GF_FREE(info);
-}
-
-int
-crypt_forget(xlator_t *this, inode_t *inode)
-{
- uint64_t ctx_addr = 0;
- if (!inode_ctx_del(inode, this, &ctx_addr))
- free_inode_info((struct crypt_inode_info *)(long)ctx_addr);
- return 0;
-}
-
-#if DEBUG_CRYPT
-static void
-check_read(call_frame_t *frame, xlator_t *this, int32_t read, struct iovec *vec,
- int32_t count, struct iatt *stbuf)
-{
- crypt_local_t *local = frame->local;
- struct object_cipher_info *object = get_object_cinfo(local->info);
- struct avec_config *conf = &local->data_conf;
- uint32_t resid = stbuf->ia_size & (object_alg_blksize(object) - 1);
-
- if (read <= 0)
- return;
- if (read != iov_length(vec, count))
- gf_log("crypt", GF_LOG_DEBUG,
- "op_ret differs from amount of read bytes");
-
- if (object_alg_should_pad(object) &&
- (read & (object_alg_blksize(object) - 1)))
- gf_log("crypt", GF_LOG_DEBUG,
- "bad amount of read bytes (!= 0 mod(cblock size))");
-
- if (conf->aligned_offset + read >
- stbuf->ia_size + (resid ? object_alg_blksize(object) - resid : 0))
- gf_log("crypt", GF_LOG_DEBUG, "bad amount of read bytes (too large))");
-}
-
-#define PT_BYTES_TO_DUMP (32)
-static void
-dump_plain_text(crypt_local_t *local, struct iovec *avec)
-{
- int32_t to_dump;
- char str[PT_BYTES_TO_DUMP + 1];
-
- if (!avec)
- return;
- to_dump = avec->iov_len;
- if (to_dump > PT_BYTES_TO_DUMP)
- to_dump = PT_BYTES_TO_DUMP;
- memcpy(str, avec->iov_base, to_dump);
- memset(str + to_dump, '0', 1);
- gf_log("crypt", GF_LOG_DEBUG, "Read file: %s", str);
-}
-
-static int32_t
-data_conf_invariant(struct avec_config *conf)
-{
- return conf->acount == !!has_head_block(conf) + !!has_tail_block(conf) +
- conf->nr_full_blocks;
-}
-
-static int32_t
-hole_conf_invariant(struct avec_config *conf)
-{
- return conf->blocks_in_pool == !!has_head_block(conf) +
- !!has_tail_block(conf) +
- !!has_full_blocks(conf);
-}
-
-static void
-crypt_check_conf(struct avec_config *conf)
-{
- int32_t ret = 0;
- const char *msg;
-
- switch (conf->type) {
- case DATA_ATOM:
- msg = "data";
- ret = data_conf_invariant(conf);
- break;
- case HOLE_ATOM:
- msg = "hole";
- ret = hole_conf_invariant(conf);
- break;
- default:
- msg = "unknown";
- }
- if (!ret)
- gf_log("crypt", GF_LOG_DEBUG, "bad %s conf", msg);
-}
-
-static void
-check_buf(call_frame_t *frame, xlator_t *this, struct iatt *buf)
-{
- crypt_local_t *local = frame->local;
- struct object_cipher_info *object = &local->info->cinfo;
- uint64_t local_file_size;
-
- switch (local->fop) {
- case GF_FOP_FTRUNCATE:
- return;
- case GF_FOP_WRITE:
- local_file_size = local->new_file_size;
- break;
- case GF_FOP_READ:
- if (parent_is_crypt_xlator(frame, this))
- return;
- local_file_size = local->cur_file_size;
- break;
- default:
- gf_log("crypt", GF_LOG_DEBUG, "bad file operation");
- return;
- }
- if (buf->ia_size != round_up(local_file_size, object_alg_blksize(object)))
- gf_log("crypt", GF_LOG_DEBUG,
- "bad ia_size in buf (%llu), should be %llu",
- (unsigned long long)buf->ia_size,
- (unsigned long long)round_up(local_file_size,
- object_alg_blksize(object)));
-}
-
-#else
-#define check_read(frame, this, op_ret, vec, count, stbuf) noop
-#define dump_plain_text(local, avec) noop
-#define crypt_check_conf(conf) noop
-#define check_buf(frame, this, buf) noop
-#endif /* DEBUG_CRYPT */
-
-/*
- * Pre-conditions:
- * @vec represents a ciphertext of expanded size and
- * aligned offset.
- *
- * Compound a temporal vector @avec with block-aligned
- * components, decrypt and fix it up to represent a chunk
- * of data corresponding to the original size and offset.
- * Pass the result to the next translator.
- */
-int32_t
-crypt_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iovec *vec,
- int32_t count, struct iatt *stbuf, struct iobref *iobref,
- dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
- struct avec_config *conf = &local->data_conf;
- struct object_cipher_info *object = &local->info->cinfo;
-
- struct iovec *avec;
- uint32_t i;
- uint32_t to_vec;
- uint32_t to_user;
-
- check_buf(frame, this, stbuf);
- check_read(frame, this, op_ret, vec, count, stbuf);
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- local->iobref = iobref_ref(iobref);
-
- local->buf = *stbuf;
- local->buf.ia_size = local->cur_file_size;
-
- if (op_ret <= 0 || count == 0 || vec[0].iov_len == 0)
- goto put_one_call;
-
- if (conf->orig_offset >= local->cur_file_size) {
- local->op_ret = 0;
- goto put_one_call;
- }
- /*
- * correct config params with real file size
- * and actual amount of bytes read
- */
- set_config_offsets(frame, this, conf->orig_offset, op_ret, DATA_ATOM, 0);
-
- if (conf->orig_offset + conf->orig_size > local->cur_file_size)
- conf->orig_size = local->cur_file_size - conf->orig_offset;
- /*
- * calculate amount of data to be returned
- * to user.
- */
- to_user = op_ret;
- if (conf->aligned_offset + to_user <= conf->orig_offset) {
- gf_log(this->name, GF_LOG_WARNING, "Incomplete read");
- local->op_ret = -1;
- local->op_errno = EIO;
- goto put_one_call;
- }
- to_user -= (conf->aligned_offset - conf->orig_offset);
-
- if (to_user > conf->orig_size)
- to_user = conf->orig_size;
- local->rw_count = to_user;
-
- op_errno = set_config_avec_data(this, local, conf, object, vec, count);
- if (op_errno) {
- local->op_ret = -1;
- local->op_errno = op_errno;
- goto put_one_call;
- }
- avec = conf->avec;
-#if DEBUG_CRYPT
- if (conf->off_in_tail != 0 &&
- conf->off_in_tail < object_alg_blksize(object) &&
- object_alg_should_pad(object))
- gf_log(this->name, GF_LOG_DEBUG, "Bad offset in tail %d",
- conf->off_in_tail);
- if (iov_length(vec, count) != 0 &&
- in_same_lblock(conf->orig_offset + iov_length(vec, count) - 1,
- local->cur_file_size - 1, object_alg_blkbits(object))) {
- gf_log(this->name, GF_LOG_DEBUG, "Compound last cblock");
- dump_cblock(this, (unsigned char *)(avec[conf->acount - 1].iov_base) +
- avec[conf->acount - 1].iov_len -
- object_alg_blksize(object));
- dump_cblock(this, (unsigned char *)(vec[count - 1].iov_base) +
- vec[count - 1].iov_len -
- object_alg_blksize(object));
- }
-#endif
- decrypt_aligned_iov(object, avec, conf->acount, conf->aligned_offset);
- /*
- * pass proper plain data to user
- */
- avec[0].iov_base += (conf->aligned_offset - conf->orig_offset);
- avec[0].iov_len -= (conf->aligned_offset - conf->orig_offset);
-
- to_vec = to_user;
- for (i = 0; i < conf->acount; i++) {
- if (avec[i].iov_len > to_vec)
- avec[i].iov_len = to_vec;
- to_vec -= avec[i].iov_len;
- }
-put_one_call:
- put_one_call_readv(frame, this);
- return 0;
-}
-
-static int32_t
-do_readv(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *dict, dict_t *xdata)
-{
- data_t *data;
- crypt_local_t *local = frame->local;
-
- if (op_ret < 0)
- goto error;
- /*
- * extract regular file size
- */
- data = dict_get(dict, FSIZE_XATTR_PREFIX);
- if (!data) {
- gf_log("crypt", GF_LOG_WARNING, "Regular file size not found");
- op_errno = EIO;
- goto error;
- }
- local->cur_file_size = data_to_uint64(data);
-
- get_one_call(frame);
- STACK_WIND(frame, crypt_readv_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readv, local->fd,
- /*
- * FIXME: read amount can be reduced
- */
- local->data_conf.expanded_size, local->data_conf.aligned_offset,
- local->flags, local->xdata);
- return 0;
-error:
- local->op_ret = -1;
- local->op_errno = op_errno;
-
- get_one_call(frame);
- put_one_call_readv(frame, this);
- return 0;
-}
-
-static int32_t
-crypt_readv_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
-
- if (op_ret < 0)
- goto error;
- /*
- * An access has been granted,
- * retrieve file size
- */
- STACK_WIND(frame, do_readv, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fgetxattr, local->fd,
- FSIZE_XATTR_PREFIX, NULL);
- return 0;
-error:
- fd_unref(local->fd);
- if (local->xdata)
- dict_unref(local->xdata);
- CRYPT_STACK_UNWIND(readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL);
- return 0;
-}
-
-static int32_t
-readv_trivial_completion(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
-
- if (op_ret < 0) {
- gf_log(this->name, GF_LOG_WARNING, "stat failed (%d)", op_errno);
- goto error;
- }
- local->buf = *buf;
- STACK_WIND(frame, load_file_size, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr, local->loc,
- FSIZE_XATTR_PREFIX, NULL);
- return 0;
-error:
- CRYPT_STACK_UNWIND(readv, frame, op_ret, op_errno, NULL, 0, NULL, NULL,
- NULL);
- return 0;
-}
-
-int32_t
-crypt_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, uint32_t flags, dict_t *xdata)
-{
- int32_t ret;
- crypt_local_t *local;
- struct crypt_inode_info *info;
- struct gf_flock lock = {
- 0,
- };
-
-#if DEBUG_CRYPT
- gf_log("crypt", GF_LOG_DEBUG, "reading %d bytes from offset %llu",
- (int)size, (long long)offset);
- if (parent_is_crypt_xlator(frame, this))
- gf_log("crypt", GF_LOG_DEBUG, "parent is crypt");
-#endif
- local = crypt_alloc_local(frame, this, GF_FOP_READ);
- if (!local) {
- ret = ENOMEM;
- goto error;
- }
- if (size == 0)
- goto trivial;
-
- local->fd = fd_ref(fd);
- local->flags = flags;
-
- info = local_get_inode_info(local, this);
- if (info == NULL) {
- ret = EINVAL;
- fd_unref(fd);
- goto error;
- }
- if (!object_alg_atomic(&info->cinfo)) {
- ret = EINVAL;
- fd_unref(fd);
- goto error;
- }
- set_config_offsets(frame, this, offset, size, DATA_ATOM, 0);
- if (parent_is_crypt_xlator(frame, this)) {
- data_t *data;
- /*
- * We are called by crypt_writev (or cypt_ftruncate)
- * to perform the "read" component of the read-modify-write
- * (or read-prune-write) sequence for some atom;
- *
- * don't ask for access:
- * it has already been acquired
- *
- * Retrieve current file size
- */
- if (!xdata) {
- gf_log("crypt", GF_LOG_WARNING,
- "Regular file size hasn't been passed");
- ret = EIO;
- goto error;
- }
- data = dict_get(xdata, FSIZE_XATTR_PREFIX);
- if (!data) {
- gf_log("crypt", GF_LOG_WARNING, "Regular file size not found");
- ret = EIO;
- goto error;
- }
- local->old_file_size = local->cur_file_size = data_to_uint64(data);
-
- get_one_call(frame);
- STACK_WIND(frame, crypt_readv_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readv, local->fd,
- /*
- * FIXME: read amount can be reduced
- */
- local->data_conf.expanded_size,
- local->data_conf.aligned_offset, flags, NULL);
- return 0;
- }
- if (xdata)
- local->xdata = dict_ref(xdata);
-
- lock.l_len = 0;
- lock.l_start = 0;
- lock.l_type = F_RDLCK;
- lock.l_whence = SEEK_SET;
-
- STACK_WIND(frame, crypt_readv_finodelk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->finodelk, this->name, fd, F_SETLKW,
- &lock, NULL);
- return 0;
-trivial:
- STACK_WIND(frame, readv_trivial_completion, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat, fd, NULL);
- return 0;
-error:
- CRYPT_STACK_UNWIND(readv, frame, -1, ret, NULL, 0, NULL, NULL, NULL);
- return 0;
-}
-
-void
-set_local_io_params_writev(call_frame_t *frame,
- struct object_cipher_info *object,
- struct rmw_atom *atom, off_t io_offset,
- uint32_t io_size)
-{
- crypt_local_t *local = frame->local;
-
- local->io_offset = io_offset;
- local->io_size = io_size;
-
- local->io_offset_nopad = atom->offset_at(frame, object) +
- atom->offset_in(frame, object);
-
- gf_log("crypt", GF_LOG_DEBUG, "set nopad offset to %llu",
- (unsigned long long)local->io_offset_nopad);
-
- local->io_size_nopad = atom->io_size_nopad(frame, object);
-
- gf_log("crypt", GF_LOG_DEBUG, "set nopad size to %llu",
- (unsigned long long)local->io_size_nopad);
-
- local->update_disk_file_size = 0;
- /*
- * NOTE: eof_padding_size is 0 for all full atoms;
- * For head and tail atoms it will be set up at rmw_partial block()
- */
- local->new_file_size = local->cur_file_size;
-
- if (local->io_offset_nopad + local->io_size_nopad > local->cur_file_size) {
- local->new_file_size = local->io_offset_nopad + local->io_size_nopad;
-
- gf_log("crypt", GF_LOG_DEBUG, "set new file size to %llu",
- (unsigned long long)local->new_file_size);
-
- local->update_disk_file_size = 1;
- }
-}
-
-void
-set_local_io_params_ftruncate(call_frame_t *frame,
- struct object_cipher_info *object)
-{
- uint32_t resid;
- crypt_local_t *local = frame->local;
- struct avec_config *conf = &local->data_conf;
-
- resid = conf->orig_offset & (object_alg_blksize(object) - 1);
- if (resid) {
- local->eof_padding_size = object_alg_blksize(object) - resid;
- local->new_file_size = conf->aligned_offset;
- local->update_disk_file_size = 0;
- /*
- * file size will be updated
- * in the ->writev() stack,
- * when submitting file tail
- */
- } else {
- local->eof_padding_size = 0;
- local->new_file_size = conf->orig_offset;
- local->update_disk_file_size = 1;
- /*
- * file size will be updated
- * in this ->ftruncate stack
- */
- }
-}
-
-static void
-submit_head(call_frame_t *frame, xlator_t *this)
-{
- crypt_local_t *local = frame->local;
- submit_partial(frame, this, local->fd, HEAD_ATOM);
-}
-
-static void
-submit_tail(call_frame_t *frame, xlator_t *this)
-{
- crypt_local_t *local = frame->local;
- submit_partial(frame, this, local->fd, TAIL_ATOM);
-}
-
-static void
-submit_hole(call_frame_t *frame, xlator_t *this)
-{
- /*
- * hole conversion always means
- * appended write and goes in ordered fashion
- */
- do_ordered_submit(frame, this, HOLE_ATOM);
-}
-
-static void
-submit_data(call_frame_t *frame, xlator_t *this)
-{
- if (is_ordered_mode(frame)) {
- do_ordered_submit(frame, this, DATA_ATOM);
- return;
- }
- gf_log("crypt", GF_LOG_WARNING, "Bad submit mode");
- get_nr_calls(frame, nr_calls_data(frame));
- do_parallel_submit(frame, this, DATA_ATOM);
- return;
-}
-
-/*
- * heplers called by writev_cbk, fruncate_cbk in ordered mode
- */
-
-static int32_t
-should_submit_hole(crypt_local_t *local)
-{
- struct avec_config *conf = &local->hole_conf;
-
- return conf->avec != NULL;
-}
-
-static int32_t
-should_resume_submit_hole(crypt_local_t *local)
-{
- struct avec_config *conf = &local->hole_conf;
-
- if (local->fop == GF_FOP_WRITE && has_tail_block(conf))
- /*
- * Don't submit a part of hole, which
- * fits into a data block:
- * this part of hole will be converted
- * as a gap filled by zeros in data head
- * block.
- */
- return conf->cursor < conf->acount - 1;
- else
- return conf->cursor < conf->acount;
-}
-
-static int32_t
-should_resume_submit_data(call_frame_t *frame)
-{
- crypt_local_t *local = frame->local;
- struct avec_config *conf = &local->data_conf;
-
- if (is_ordered_mode(frame))
- return conf->cursor < conf->acount;
- /*
- * parallel writes
- */
- return 0;
-}
-
-static int32_t
-should_submit_data_after_hole(crypt_local_t *local)
-{
- return local->data_conf.avec != NULL;
-}
-
-static void
-update_local_file_params(call_frame_t *frame, xlator_t *this,
- struct iatt *prebuf, struct iatt *postbuf)
-{
- crypt_local_t *local = frame->local;
-
- check_buf(frame, this, postbuf);
-
- local->prebuf = *prebuf;
- local->postbuf = *postbuf;
-
- local->prebuf.ia_size = local->cur_file_size;
- local->postbuf.ia_size = local->new_file_size;
-
- local->cur_file_size = local->new_file_size;
-}
-
-static int32_t
-end_writeback_writev(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
-
- if (op_ret <= 0) {
- gf_log(this->name, GF_LOG_WARNING, "writev iteration failed");
- goto put_one_call;
- }
- /*
- * op_ret includes paddings (atom's head, atom's tail and EOF)
- */
- if (op_ret < local->io_size) {
- gf_log(this->name, GF_LOG_WARNING, "Incomplete writev iteration");
- goto put_one_call;
- }
- op_ret -= local->eof_padding_size;
- local->op_ret = op_ret;
-
- update_local_file_params(frame, this, prebuf, postbuf);
-
- if (data_write_in_progress(local)) {
- LOCK(&local->rw_count_lock);
- local->rw_count += op_ret;
- UNLOCK(&local->rw_count_lock);
-
- if (should_resume_submit_data(frame))
- submit_data(frame, this);
- } else {
- /*
- * hole conversion is going on;
- * don't take into account written zeros
- */
- if (should_resume_submit_hole(local))
- submit_hole(frame, this);
-
- else if (should_submit_data_after_hole(local))
- submit_data(frame, this);
- }
-put_one_call:
- put_one_call_writev(frame, this);
- return 0;
-}
-
-#define crypt_writev_cbk end_writeback_writev
-
-#define HOLE_WRITE_CHUNK_BITS 12
-#define HOLE_WRITE_CHUNK_SIZE (1 << HOLE_WRITE_CHUNK_BITS)
-
-/*
- * Convert hole of size @size at offset @off to
- * zeros and prepare respective iovecs for submit.
- * The hole lock should be held.
- *
- * Pre-conditions:
- * @local->file_size is set and valid.
- */
-int32_t
-prepare_for_submit_hole(call_frame_t *frame, xlator_t *this, uint64_t off,
- off_t size)
-{
- int32_t ret;
- crypt_local_t *local = frame->local;
- struct object_cipher_info *object = &local->info->cinfo;
-
- set_config_offsets(frame, this, off, size, HOLE_ATOM, 1);
-
- ret = set_config_avec_hole(this, local, &local->hole_conf, object,
- local->fop);
- crypt_check_conf(&local->hole_conf);
-
- return ret;
-}
-
-/*
- * prepare for submit @count bytes at offset @from
- */
-int32_t
-prepare_for_submit_data(call_frame_t *frame, xlator_t *this, off_t from,
- int32_t size, struct iovec *vec, int32_t vec_count,
- int32_t setup_gap)
-{
- uint32_t ret;
- crypt_local_t *local = frame->local;
- struct object_cipher_info *object = &local->info->cinfo;
-
- set_config_offsets(frame, this, from, size, DATA_ATOM, setup_gap);
-
- ret = set_config_avec_data(this, local, &local->data_conf, object, vec,
- vec_count);
- crypt_check_conf(&local->data_conf);
-
- return ret;
-}
-
-static void
-free_avec(struct iovec *avec, char **pool, int blocks_in_pool)
-{
- if (!avec)
- return;
- GF_FREE(pool);
- GF_FREE(avec);
-}
-
-static void
-free_avec_data(crypt_local_t *local)
-{
- return free_avec(local->data_conf.avec, local->data_conf.pool,
- local->data_conf.blocks_in_pool);
-}
-
-static void
-free_avec_hole(crypt_local_t *local)
-{
- return free_avec(local->hole_conf.avec, local->hole_conf.pool,
- local->hole_conf.blocks_in_pool);
-}
-
-static void
-do_parallel_submit(call_frame_t *frame, xlator_t *this, atom_data_type dtype)
-{
- crypt_local_t *local = frame->local;
- struct avec_config *conf;
-
- local->active_setup = dtype;
- conf = conf_by_type(frame, dtype);
-
- if (has_head_block(conf))
- submit_head(frame, this);
-
- if (has_full_blocks(conf))
- submit_full(frame, this);
-
- if (has_tail_block(conf))
- submit_tail(frame, this);
- return;
-}
-
-static void
-do_ordered_submit(call_frame_t *frame, xlator_t *this, atom_data_type dtype)
-{
- crypt_local_t *local = frame->local;
- struct avec_config *conf;
-
- local->active_setup = dtype;
- conf = conf_by_type(frame, dtype);
-
- if (should_submit_head_block(conf)) {
- get_one_call_nolock(frame);
- submit_head(frame, this);
- } else if (should_submit_full_block(conf)) {
- get_one_call_nolock(frame);
- submit_full(frame, this);
- } else if (should_submit_tail_block(conf)) {
- get_one_call_nolock(frame);
- submit_tail(frame, this);
- } else
- gf_log("crypt", GF_LOG_DEBUG,
- "nothing has been submitted in ordered mode");
- return;
-}
-
-static int32_t
-do_writev(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *dict, dict_t *xdata)
-{
- data_t *data;
- crypt_local_t *local = frame->local;
- struct object_cipher_info *object = &local->info->cinfo;
- /*
- * extract regular file size
- */
- data = dict_get(dict, FSIZE_XATTR_PREFIX);
- if (!data) {
- gf_log("crypt", GF_LOG_WARNING, "Regular file size not found");
- op_ret = -1;
- op_errno = EIO;
- goto error;
- }
- local->old_file_size = local->cur_file_size = data_to_uint64(data);
-
- set_gap_at_end(frame, object, &local->data_conf, DATA_ATOM);
-
- if (local->cur_file_size < local->data_conf.orig_offset) {
- /*
- * Set up hole config
- */
- op_errno = prepare_for_submit_hole(
- frame, this, local->cur_file_size,
- local->data_conf.orig_offset - local->cur_file_size);
- if (op_errno) {
- local->op_ret = -1;
- local->op_errno = op_errno;
- goto error;
- }
- }
- if (should_submit_hole(local))
- submit_hole(frame, this);
- else
- submit_data(frame, this);
- return 0;
-error:
- get_one_call_nolock(frame);
- put_one_call_writev(frame, this);
- return 0;
-}
-
-static int32_t
-crypt_writev_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
-
- if (op_ret < 0)
- goto error;
- /*
- * An access has been granted,
- * retrieve file size first
- */
- STACK_WIND(frame, do_writev, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fgetxattr, local->fd,
- FSIZE_XATTR_PREFIX, NULL);
- return 0;
-error:
- get_one_call_nolock(frame);
- put_one_call_writev(frame, this);
- return 0;
-}
-
-static int32_t
-writev_trivial_completion(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- dict_t *dict)
-{
- crypt_local_t *local = frame->local;
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- local->prebuf = *buf;
- local->postbuf = *buf;
-
- local->prebuf.ia_size = local->cur_file_size;
- local->postbuf.ia_size = local->cur_file_size;
-
- get_one_call(frame);
- put_one_call_writev(frame, this);
- return 0;
-}
-
-int
-crypt_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vec,
- int32_t count, off_t offset, uint32_t flags, struct iobref *iobref,
- dict_t *xdata)
-{
- int32_t ret;
- crypt_local_t *local;
- struct crypt_inode_info *info;
- struct gf_flock lock = {
- 0,
- };
-#if DEBUG_CRYPT
- gf_log("crypt", GF_LOG_DEBUG, "writing %d bytes from offset %llu",
- (int)iov_length(vec, count), (long long)offset);
-#endif
- local = crypt_alloc_local(frame, this, GF_FOP_WRITE);
- if (!local) {
- ret = ENOMEM;
- goto error;
- }
- local->fd = fd_ref(fd);
-
- if (iobref)
- local->iobref = iobref_ref(iobref);
- /*
- * to update real file size on the server
- */
- local->xattr = dict_new();
- if (!local->xattr) {
- ret = ENOMEM;
- goto error;
- }
- local->flags = flags;
-
- info = local_get_inode_info(local, this);
- if (info == NULL) {
- ret = EINVAL;
- goto error;
- }
- if (!object_alg_atomic(&info->cinfo)) {
- ret = EINVAL;
- goto error;
- }
- if (iov_length(vec, count) == 0)
- goto trivial;
-
- ret = prepare_for_submit_data(frame, this, offset,
- iov_length(vec, count),
- vec, count, 0 /* don't setup gup
- in tail: we don't
- know file size yet */);
- if (ret) {
- ret = ENOMEM;
- goto error;
- }
-
- if (parent_is_crypt_xlator(frame, this)) {
- data_t *data;
- /*
- * we are called by shinking crypt_ftruncate(),
- * which doesn't perform hole conversion;
- *
- * don't ask for access:
- * it has already been acquired
- */
-
- /*
- * extract file size
- */
- if (!xdata) {
- gf_log("crypt", GF_LOG_WARNING,
- "Regular file size hasn't been passed");
- ret = EIO;
- goto error;
- }
- data = dict_get(xdata, FSIZE_XATTR_PREFIX);
- if (!data) {
- gf_log("crypt", GF_LOG_WARNING, "Regular file size not found");
- ret = EIO;
- goto error;
- }
- local->old_file_size = local->cur_file_size = data_to_uint64(data);
-
- submit_data(frame, this);
- return 0;
- }
- if (xdata)
- local->xdata = dict_ref(xdata);
- /*
- * lock the file and retrieve its size
- */
- lock.l_len = 0;
- lock.l_start = 0;
- lock.l_type = F_WRLCK;
- lock.l_whence = SEEK_SET;
-
- STACK_WIND(frame, crypt_writev_finodelk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->finodelk, this->name, fd, F_SETLKW,
- &lock, NULL);
- return 0;
-trivial:
- STACK_WIND(frame, writev_trivial_completion, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat, fd, NULL);
- return 0;
-error:
- if (local && local->fd)
- fd_unref(fd);
- if (local && local->iobref)
- iobref_unref(iobref);
- if (local && local->xdata)
- dict_unref(xdata);
- if (local && local->xattr)
- dict_unref(local->xattr);
- if (local && local->info)
- free_inode_info(local->info);
-
- CRYPT_STACK_UNWIND(writev, frame, -1, ret, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-prepare_for_prune(call_frame_t *frame, xlator_t *this, uint64_t offset)
-{
- set_config_offsets(frame, this,
- offset,
- 0, /* count */
- DATA_ATOM,
- 0 /* since we prune, there is no
- gap in tail to uptodate */);
- return 0;
-}
-
-/*
- * Finish the read-prune-modify sequence
- *
- * Can be invoked as
- * 1) ->ftruncate_cbk() for cblock-aligned, or trivial prune
- * 2) ->writev_cbk() for non-cblock-aligned prune
- */
-
-static int32_t
-prune_complete(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
-
- update_local_file_params(frame, this, prebuf, postbuf);
-
- put_one_call_ftruncate(frame, this);
- return 0;
-}
-
-/*
- * This is called as ->ftruncate_cbk()
- *
- * Perform the "write" component of the
- * read-prune-write sequence.
- *
- * submuit the rest of the file
- */
-static int32_t
-prune_submit_file_tail(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
- struct avec_config *conf = &local->data_conf;
- dict_t *dict;
-
- if (op_ret < 0)
- goto put_one_call;
-
- if (local->xdata) {
- dict_unref(local->xdata);
- local->xdata = NULL;
- }
- if (xdata)
- local->xdata = dict_ref(xdata);
-
- dict = dict_new();
- if (!dict) {
- op_errno = ENOMEM;
- goto error;
- }
-
- update_local_file_params(frame, this, prebuf, postbuf);
- local->new_file_size = conf->orig_offset;
-
- /*
- * The rest of the file is a partial block and, hence,
- * should be written via RMW sequence, so the crypt xlator
- * does STACK_WIND to itself.
- *
- * Pass current file size to crypt_writev()
- */
- op_errno = dict_set(dict, FSIZE_XATTR_PREFIX,
- data_from_uint64(local->cur_file_size));
- if (op_errno) {
- gf_log("crypt", GF_LOG_WARNING, "can not set key to update file size");
- dict_unref(dict);
- goto error;
- }
- gf_log("crypt", GF_LOG_DEBUG,
- "passing current file size (%llu) to crypt_writev",
- (unsigned long long)local->cur_file_size);
- /*
- * Padding will be filled with
- * zeros by rmw_partial_block()
- */
- STACK_WIND(frame, prune_complete, this,
- this->fops->writev, /* crypt_writev */
- local->fd, &local->vec, 1,
- conf->aligned_offset, /* offset to write from */
- 0, local->iobref, dict);
-
- dict_unref(dict);
- return 0;
-error:
- local->op_ret = -1;
- local->op_errno = op_errno;
-put_one_call:
- put_one_call_ftruncate(frame, this);
- return 0;
-}
-
-/*
- * This is called as a callback of ->writev() invoked in behalf
- * of ftruncate(): it can be
- * 1) ordered writes issued by hole conversion in the case of
- * expanded truncate, or
- * 2) an rmw partial data block issued by non-cblock-aligned
- * prune.
- */
-int32_t
-end_writeback_ftruncate(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
- /*
- * if nothing has been written,
- * then it must be an error
- */
- local->op_ret = op_ret;
- local->op_errno = op_errno;
-
- if (op_ret < 0)
- goto put_one_call;
-
- update_local_file_params(frame, this, prebuf, postbuf);
-
- if (data_write_in_progress(local))
- /* case (2) */
- goto put_one_call;
- /* case (1) */
- if (should_resume_submit_hole(local))
- submit_hole(frame, this);
- /*
- * case of hole, when we shouldn't resume
- */
-put_one_call:
- put_one_call_ftruncate(frame, this);
- return 0;
-}
-
-/*
- * Perform prune and write components of the
- * read-prune-write sequence.
- *
- * Called as ->readv_cbk()
- *
- * Pre-conditions:
- * @vec contains the latest atom of the file
- * (plain text)
- */
-static int32_t
-prune_write(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iovec *vec, int32_t count,
- struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
-{
- int32_t i;
- size_t to_copy;
- size_t copied = 0;
- crypt_local_t *local = frame->local;
- struct avec_config *conf = &local->data_conf;
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- if (op_ret == -1)
- goto put_one_call;
-
- /*
- * At first, uptodate head block
- */
- if (iov_length(vec, count) < conf->off_in_head) {
- gf_log(this->name, GF_LOG_WARNING,
- "Failed to uptodate head block for prune");
- local->op_ret = -1;
- local->op_errno = EIO;
- goto put_one_call;
- }
- local->vec.iov_len = conf->off_in_head;
- local->vec.iov_base = GF_CALLOC(1, local->vec.iov_len, gf_crypt_mt_data);
-
- if (local->vec.iov_base == NULL) {
- gf_log(this->name, GF_LOG_WARNING,
- "Failed to calloc head block for prune");
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- goto put_one_call;
- }
- for (i = 0; i < count; i++) {
- to_copy = vec[i].iov_len;
- if (to_copy > local->vec.iov_len - copied)
- to_copy = local->vec.iov_len - copied;
-
- memcpy((char *)local->vec.iov_base + copied, vec[i].iov_base, to_copy);
- copied += to_copy;
- if (copied == local->vec.iov_len)
- break;
- }
- /*
- * perform prune with aligned offset
- * (i.e. at this step we prune a bit
- * more then it is needed
- */
- STACK_WIND(frame, prune_submit_file_tail, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate, local->fd,
- conf->aligned_offset, local->xdata);
- return 0;
-put_one_call:
- put_one_call_ftruncate(frame, this);
- return 0;
-}
-
-/*
- * Perform a read-prune-write sequence
- */
-int32_t
-read_prune_write(call_frame_t *frame, xlator_t *this)
-{
- int32_t ret = 0;
- dict_t *dict = NULL;
- crypt_local_t *local = frame->local;
- struct avec_config *conf = &local->data_conf;
- struct object_cipher_info *object = &local->info->cinfo;
-
- set_local_io_params_ftruncate(frame, object);
- get_one_call_nolock(frame);
-
- if ((conf->orig_offset & (object_alg_blksize(object) - 1)) == 0) {
- /*
- * cblock-aligned prune:
- * we don't need read and write components,
- * just cut file body
- */
- gf_log("crypt", GF_LOG_DEBUG, "prune without RMW (at offset %llu",
- (unsigned long long)conf->orig_offset);
-
- STACK_WIND(frame, prune_complete, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate, local->fd,
- conf->orig_offset, local->xdata);
- return 0;
- }
- gf_log("crypt", GF_LOG_DEBUG, "prune with RMW (at offset %llu",
- (unsigned long long)conf->orig_offset);
- /*
- * We are about to perform the "read" component of the
- * read-prune-write sequence. It means that we need to
- * read encrypted data from disk and decrypt it.
- * So, the crypt translator does STACK_WIND to itself.
- *
- * Pass current file size to crypt_readv()
-
- */
- dict = dict_new();
- if (!dict) {
- gf_log("crypt", GF_LOG_WARNING, "Can not alloc dict");
- ret = ENOMEM;
- goto exit;
- }
- ret = dict_set(dict, FSIZE_XATTR_PREFIX,
- data_from_uint64(local->cur_file_size));
- if (ret) {
- gf_log("crypt", GF_LOG_WARNING, "Can not set dict");
- goto exit;
- }
- STACK_WIND(frame, prune_write, this, this->fops->readv, /* crypt_readv */
- local->fd, get_atom_size(object), /* bytes to read */
- conf->aligned_offset, /* offset to read from */
- 0, dict);
-exit:
- if (dict)
- dict_unref(dict);
- return ret;
-}
-
-/*
- * File prune is more complicated than expand.
- * First we need to read the latest atom to not lose info
- * needed for proper update. Also we need to make sure that
- * every component of read-prune-write sequence leaves data
- * consistent
- *
- * Non-cblock aligned prune is performed as read-prune-write
- * sequence:
- *
- * 1) read the latest atom;
- * 2) perform cblock-aligned prune
- * 3) issue a write request for the end-of-file
- */
-int32_t
-prune_file(call_frame_t *frame, xlator_t *this, uint64_t offset)
-{
- int32_t ret;
-
- ret = prepare_for_prune(frame, this, offset);
- if (ret)
- return ret;
- return read_prune_write(frame, this);
-}
-
-int32_t
-expand_file(call_frame_t *frame, xlator_t *this, uint64_t offset)
-{
- int32_t ret;
- crypt_local_t *local = frame->local;
-
- ret = prepare_for_submit_hole(frame, this, local->old_file_size,
- offset - local->old_file_size);
- if (ret)
- return ret;
- submit_hole(frame, this);
- return 0;
-}
-
-static int32_t
-ftruncate_trivial_completion(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- dict_t *dict)
-{
- crypt_local_t *local = frame->local;
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- local->prebuf = *buf;
- local->postbuf = *buf;
-
- local->prebuf.ia_size = local->cur_file_size;
- local->postbuf.ia_size = local->cur_file_size;
-
- get_one_call(frame);
- put_one_call_ftruncate(frame, this);
- return 0;
-}
-
-static int32_t
-do_ftruncate(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *dict, dict_t *xdata)
-{
- data_t *data;
- crypt_local_t *local = frame->local;
-
- if (op_ret)
- goto error;
- /*
- * extract regular file size
- */
- data = dict_get(dict, FSIZE_XATTR_PREFIX);
- if (!data) {
- gf_log("crypt", GF_LOG_WARNING, "Regular file size not found");
- op_errno = EIO;
- goto error;
- }
- local->old_file_size = local->cur_file_size = data_to_uint64(data);
-
- if (local->data_conf.orig_offset == local->cur_file_size) {
-#if DEBUG_CRYPT
- gf_log("crypt", GF_LOG_DEBUG,
- "trivial ftruncate (current file size %llu)",
- (unsigned long long)local->cur_file_size);
-#endif
- goto trivial;
- } else if (local->data_conf.orig_offset < local->cur_file_size) {
-#if DEBUG_CRYPT
- gf_log("crypt", GF_LOG_DEBUG, "prune from %llu to %llu",
- (unsigned long long)local->cur_file_size,
- (unsigned long long)local->data_conf.orig_offset);
-#endif
- op_errno = prune_file(frame, this, local->data_conf.orig_offset);
- } else {
-#if DEBUG_CRYPT
- gf_log("crypt", GF_LOG_DEBUG, "expand from %llu to %llu",
- (unsigned long long)local->cur_file_size,
- (unsigned long long)local->data_conf.orig_offset);
-#endif
- op_errno = expand_file(frame, this, local->data_conf.orig_offset);
- }
- if (op_errno)
- goto error;
- return 0;
-trivial:
- STACK_WIND(frame, ftruncate_trivial_completion, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat, local->fd, NULL);
- return 0;
-error:
- /*
- * finish with ftruncate
- */
- local->op_ret = -1;
- local->op_errno = op_errno;
-
- get_one_call_nolock(frame);
- put_one_call_ftruncate(frame, this);
- return 0;
-}
-
-static int32_t
-crypt_ftruncate_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
-
- if (op_ret < 0)
- goto error;
- /*
- * An access has been granted,
- * retrieve file size first
- */
- STACK_WIND(frame, do_ftruncate, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fgetxattr, local->fd,
- FSIZE_XATTR_PREFIX, NULL);
- return 0;
-error:
- get_one_call_nolock(frame);
- put_one_call_ftruncate(frame, this);
- return 0;
-}
-
-/*
- * ftruncate is performed in 2 steps:
- * . receive file size;
- * . expand or prune file.
- */
-static int32_t
-crypt_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- dict_t *xdata)
-{
- int32_t ret;
- crypt_local_t *local;
- struct crypt_inode_info *info;
- struct gf_flock lock = {
- 0,
- };
-
- local = crypt_alloc_local(frame, this, GF_FOP_FTRUNCATE);
- if (!local) {
- ret = ENOMEM;
- goto error;
- }
- local->xattr = dict_new();
- if (!local->xattr) {
- ret = ENOMEM;
- goto error;
- }
- local->fd = fd_ref(fd);
- info = local_get_inode_info(local, this);
- if (info == NULL) {
- ret = EINVAL;
- goto error;
- }
- if (!object_alg_atomic(&info->cinfo)) {
- ret = EINVAL;
- goto error;
- }
- local->data_conf.orig_offset = offset;
- if (xdata)
- local->xdata = dict_ref(xdata);
-
- lock.l_len = 0;
- lock.l_start = 0;
- lock.l_type = F_WRLCK;
- lock.l_whence = SEEK_SET;
-
- STACK_WIND(frame, crypt_ftruncate_finodelk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->finodelk, this->name, fd, F_SETLKW,
- &lock, NULL);
- return 0;
-error:
- if (local && local->fd)
- fd_unref(fd);
- if (local && local->xdata)
- dict_unref(xdata);
- if (local && local->xattr)
- dict_unref(local->xattr);
- if (local && local->info)
- free_inode_info(local->info);
-
- CRYPT_STACK_UNWIND(ftruncate, frame, -1, ret, NULL, NULL, NULL);
- return 0;
-}
-
-/* ->flush_cbk() */
-int32_t
-truncate_end(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
-
- CRYPT_STACK_UNWIND(truncate, frame, op_ret, op_errno, &local->prebuf,
- &local->postbuf, local->xdata);
- return 0;
-}
-
-/* ftruncate_cbk() */
-int32_t
-truncate_flush(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
- fd_t *fd = local->fd;
- local->prebuf = *prebuf;
- local->postbuf = *postbuf;
-
- STACK_WIND(frame, truncate_end, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush, fd, NULL);
- fd_unref(fd);
- return 0;
-}
-
-/*
- * is called as ->open_cbk()
- */
-static int32_t
-truncate_begin(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
-
- if (op_ret < 0) {
- fd_unref(fd);
- CRYPT_STACK_UNWIND(truncate, frame, op_ret, op_errno, NULL, NULL, NULL);
- return 0;
- } else {
- fd_bind(fd);
- }
- /*
- * crypt_truncate() is implemented via crypt_ftruncate(),
- * so the crypt xlator does STACK_WIND to itself here
- */
- STACK_WIND(frame, truncate_flush, this,
- this->fops->ftruncate, /* crypt_ftruncate */
- fd, local->offset, NULL);
- return 0;
-}
-
-/*
- * crypt_truncate() is implemented via crypt_ftruncate() as a
- * sequence crypt_open() - crypt_ftruncate() - truncate_flush()
- */
-int32_t
-crypt_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
- dict_t *xdata)
-{
- fd_t *fd;
- crypt_local_t *local;
-
-#if DEBUG_CRYPT
- gf_log(this->name, GF_LOG_DEBUG, "truncate file %s at offset %llu",
- loc->path, (unsigned long long)offset);
-#endif
- local = crypt_alloc_local(frame, this, GF_FOP_TRUNCATE);
- if (!local)
- goto error;
-
- fd = fd_create(loc->inode, frame->root->pid);
- if (!fd) {
- gf_log(this->name, GF_LOG_ERROR, "Can not create fd");
- goto error;
- }
- local->fd = fd;
- local->offset = offset;
- local->xdata = xdata;
- STACK_WIND(frame, truncate_begin, this, this->fops->open, /* crypt_open() */
- loc, O_RDWR, fd, NULL);
- return 0;
-error:
- CRYPT_STACK_UNWIND(truncate, frame, -1, EINVAL, NULL, NULL, NULL);
- return 0;
-}
-
-end_writeback_handler_t
-dispatch_end_writeback(glusterfs_fop_t fop)
-{
- switch (fop) {
- case GF_FOP_WRITE:
- return end_writeback_writev;
- case GF_FOP_FTRUNCATE:
- return end_writeback_ftruncate;
- default:
- gf_log("crypt", GF_LOG_WARNING, "Bad wb operation %d", fop);
- return NULL;
- }
-}
-
-/*
- * true, if the caller needs metadata string
- */
-static int32_t
-is_custom_mtd(dict_t *xdata)
-{
- data_t *data;
- uint32_t flags;
-
- if (!xdata)
- return 0;
-
- data = dict_get(xdata, MSGFLAGS_PREFIX);
- if (!data)
- return 0;
- if (data->len != sizeof(uint32_t)) {
- gf_log("crypt", GF_LOG_WARNING, "Bad msgflags size (%d)", data->len);
- return -1;
- }
- flags = *((uint32_t *)data->data);
- return msgflags_check_mtd_lock(&flags);
-}
-
-static int32_t
-crypt_open_done(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- if (op_ret < 0)
- gf_log(this->name, GF_LOG_WARNING, "mtd unlock failed (%d)", op_errno);
- put_one_call_open(frame);
- return 0;
-}
-
-static void
-crypt_open_tail(call_frame_t *frame, xlator_t *this)
-{
- struct gf_flock lock = {
- 0,
- };
- crypt_local_t *local = frame->local;
-
- lock.l_type = F_UNLCK;
- lock.l_whence = SEEK_SET;
- lock.l_start = 0;
- lock.l_len = 0;
- lock.l_pid = 0;
-
- STACK_WIND(frame, crypt_open_done, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->finodelk, this->name, local->fd,
- F_SETLKW, &lock, NULL);
-}
-
-/*
- * load private inode info at open time
- * called as ->fgetxattr_cbk()
- */
-static int
-load_mtd_open(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *dict, dict_t *xdata)
-{
- int32_t ret;
- gf_boolean_t upload_info;
- data_t *mtd;
- uint64_t value = 0;
- struct crypt_inode_info *info;
- crypt_local_t *local = frame->local;
- crypt_private_t *priv = this->private;
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
-
- if (local->fd->inode->ia_type == IA_IFLNK)
- goto exit;
- if (op_ret < 0)
- goto exit;
- /*
- * first, check for cached info
- */
- ret = inode_ctx_get(local->fd->inode, this, &value);
- if (ret != -1) {
- info = (struct crypt_inode_info *)(long)value;
- if (info == NULL) {
- gf_log(this->name, GF_LOG_WARNING,
- "Inode info expected, but not found");
- local->op_ret = -1;
- local->op_errno = EIO;
- goto exit;
- }
- /*
- * info has been found in the cache
- */
- upload_info = _gf_false;
- } else {
- /*
- * info hasn't been found in the cache.
- */
- info = alloc_inode_info(local, local->loc);
- if (!info) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- goto exit;
- }
- init_inode_info_head(info, local->fd);
- upload_info = _gf_true;
- }
- /*
- * extract metadata
- */
- mtd = dict_get(dict, CRYPTO_FORMAT_PREFIX);
- if (!mtd) {
- local->op_ret = -1;
- local->op_errno = ENOENT;
- gf_log(this->name, GF_LOG_WARNING, "Format string wasn't found");
- goto exit;
- }
- /*
- * authenticate metadata against the path
- */
- ret = open_format((unsigned char *)mtd->data, mtd->len, local->loc, info,
- get_master_cinfo(priv), local, upload_info);
- if (ret) {
- local->op_ret = -1;
- local->op_errno = ret;
- goto exit;
- }
- if (upload_info) {
- ret = init_inode_info_tail(info, get_master_cinfo(priv));
- if (ret) {
- local->op_ret = -1;
- local->op_errno = ret;
- goto exit;
- }
- ret = inode_ctx_put(local->fd->inode, this, (uint64_t)(long)info);
- if (ret == -1) {
- local->op_ret = -1;
- local->op_errno = EIO;
- goto exit;
- }
- }
- if (local->custom_mtd) {
- /*
- * pass the metadata string to the customer
- */
- ret = dict_set_static_bin(local->xdata, CRYPTO_FORMAT_PREFIX, mtd->data,
- mtd->len);
- if (ret) {
- local->op_ret = -1;
- local->op_errno = ret;
- goto exit;
- }
- }
-exit:
- if (!local->custom_mtd)
- crypt_open_tail(frame, this);
- else
- put_one_call_open(frame);
- return 0;
-}
-
-static int32_t
-crypt_open_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
-
- if (op_ret < 0) {
- gf_log(this->name, GF_LOG_WARNING, "finodelk (LOCK) failed");
- goto exit;
- }
- STACK_WIND(frame, load_mtd_open, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fgetxattr, local->fd,
- CRYPTO_FORMAT_PREFIX, NULL);
- return 0;
-exit:
- put_one_call_open(frame);
- return 0;
-}
-
-/*
- * verify metadata against the specified pathname
- */
-static int32_t
-crypt_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
-{
- struct gf_flock lock = {
- 0,
- };
- crypt_local_t *local = frame->local;
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
-
- if (local->fd->inode->ia_type == IA_IFLNK)
- goto exit;
- if (op_ret < 0)
- goto exit;
- if (xdata)
- local->xdata = dict_ref(xdata);
- else if (local->custom_mtd) {
- local->xdata = dict_new();
- if (!local->xdata) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- gf_log("crypt", GF_LOG_ERROR,
- "Can not get new dict for mtd string");
- goto exit;
- }
- }
- lock.l_len = 0;
- lock.l_start = 0;
- lock.l_type = local->custom_mtd ? F_WRLCK : F_RDLCK;
- lock.l_whence = SEEK_SET;
-
- STACK_WIND(frame, crypt_open_finodelk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->finodelk, this->name, fd, F_SETLKW,
- &lock, NULL);
- return 0;
-exit:
- put_one_call_open(frame);
- return 0;
-}
-
-static int32_t
-crypt_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, dict_t *xdata)
-{
- int32_t ret = ENOMEM;
- crypt_local_t *local;
-
- local = crypt_alloc_local(frame, this, GF_FOP_OPEN);
- if (!local)
- goto error;
- local->loc = GF_CALLOC(1, sizeof(loc_t), gf_crypt_mt_loc);
- if (!local->loc) {
- ret = ENOMEM;
- goto error;
- }
- ret = loc_copy(local->loc, loc);
- if (ret) {
- GF_FREE(local->loc);
- ret = ENOMEM;
- goto error;
- }
- local->fd = fd_ref(fd);
-
- ret = is_custom_mtd(xdata);
- if (ret < 0) {
- loc_wipe(local->loc);
- GF_FREE(local->loc);
- ret = EINVAL;
- goto error;
- }
- local->custom_mtd = ret;
-
- if ((flags & O_ACCMODE) == O_WRONLY)
- /*
- * we can't open O_WRONLY, because
- * we need to do read-modify-write
- */
- flags = (flags & ~O_ACCMODE) | O_RDWR;
- /*
- * Make sure that out translated offsets
- * and counts won't be ignored
- */
- flags &= ~O_APPEND;
- get_one_call_nolock(frame);
- STACK_WIND(frame, crypt_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
- return 0;
-error:
- CRYPT_STACK_UNWIND(open, frame, -1, ret, NULL, NULL);
- return 0;
-}
-
-static int32_t
-init_inode_info_tail(struct crypt_inode_info *info,
- struct master_cipher_info *master)
-{
- int32_t ret;
- struct object_cipher_info *object = &info->cinfo;
-
-#if DEBUG_CRYPT
- gf_log("crypt", GF_LOG_DEBUG, "Init inode info for object %s",
- uuid_utoa(info->oid));
-#endif
- ret = data_cipher_algs[object->o_alg][object->o_mode].set_private(info,
- master);
- if (ret) {
- gf_log("crypt", GF_LOG_ERROR, "Set private info failed");
- return ret;
- }
- return 0;
-}
-
-/*
- * Init inode info at ->create() time
- */
-static void
-init_inode_info_create(struct crypt_inode_info *info,
- struct master_cipher_info *master, data_t *data)
-{
- struct object_cipher_info *object;
-
- info->nr_minor = CRYPT_XLATOR_ID;
- memcpy(info->oid, data->data, data->len);
-
- object = &info->cinfo;
-
- object->o_alg = master->m_alg;
- object->o_mode = master->m_mode;
- object->o_block_bits = master->m_block_bits;
- object->o_dkey_size = master->m_dkey_size;
-}
-
-static void
-init_inode_info_head(struct crypt_inode_info *info, fd_t *fd)
-{
- memcpy(info->oid, fd->inode->gfid, sizeof(uuid_t));
-}
-
-static int32_t
-crypt_create_done(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- crypt_private_t *priv = this->private;
- crypt_local_t *local = frame->local;
- struct crypt_inode_info *info = local->info;
- fd_t *local_fd = local->fd;
- dict_t *local_xdata = local->xdata;
- inode_t *local_inode = local->inode;
-
- if (op_ret < 0) {
- free_inode_info(info);
- goto unwind;
- }
- op_errno = init_inode_info_tail(info, get_master_cinfo(priv));
- if (op_errno) {
- op_ret = -1;
- free_inode_info(info);
- goto unwind;
- }
- /*
- * FIXME: drop major subversion number
- */
- op_ret = inode_ctx_put(local->fd->inode, this, (uint64_t)(long)info);
- if (op_ret == -1) {
- op_errno = EIO;
- free_inode_info(info);
- goto unwind;
- }
-unwind:
- free_format(local);
- CRYPT_STACK_UNWIND(create, frame, op_ret, op_errno, local_fd, local_inode,
- &local->buf, &local->prebuf, &local->postbuf,
- local_xdata);
- fd_unref(local_fd);
- inode_unref(local_inode);
- if (local_xdata)
- dict_unref(local_xdata);
- return 0;
-}
-
-static int
-crypt_create_tail(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- struct gf_flock lock = {
- 0,
- };
- crypt_local_t *local = frame->local;
- fd_t *local_fd = local->fd;
- dict_t *local_xdata = local->xdata;
- inode_t *local_inode = local->inode;
-
- dict_unref(local->xattr);
-
- if (op_ret < 0)
- goto error;
-
- lock.l_type = F_UNLCK;
- lock.l_whence = SEEK_SET;
- lock.l_start = 0;
- lock.l_len = 0;
- lock.l_pid = 0;
-
- STACK_WIND(frame, crypt_create_done, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->finodelk, this->name, local->fd,
- F_SETLKW, &lock, NULL);
- return 0;
-error:
- free_inode_info(local->info);
- free_format(local);
-
- CRYPT_STACK_UNWIND(create, frame, op_ret, op_errno, local_fd, local_inode,
- &local->buf, &local->prebuf, &local->postbuf,
- local_xdata);
-
- fd_unref(local_fd);
- inode_unref(local_inode);
- if (local_xdata)
- dict_unref(local_xdata);
- return 0;
-}
-
-static int32_t
-crypt_create_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
- struct crypt_inode_info *info = local->info;
-
- if (op_ret < 0)
- goto error;
-
- STACK_WIND(frame, crypt_create_tail, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetxattr, local->fd,
- local->xattr, /* CRYPTO_FORMAT_PREFIX */
- 0, NULL);
- return 0;
-error:
- free_inode_info(info);
- free_format(local);
- fd_unref(local->fd);
- dict_unref(local->xattr);
- if (local->xdata)
- dict_unref(local->xdata);
-
- CRYPT_STACK_UNWIND(create, frame, op_ret, op_errno, NULL, NULL, NULL, NULL,
- NULL, NULL);
- return 0;
-}
-
-/*
- * Create and store crypt-specific format on disk;
- * Populate cache with private inode info
- */
-static int32_t
-crypt_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- struct gf_flock lock = {
- 0,
- };
- crypt_local_t *local = frame->local;
- struct crypt_inode_info *info = local->info;
-
- if (op_ret < 0)
- goto error;
- if (xdata)
- local->xdata = dict_ref(xdata);
- local->inode = inode_ref(inode);
- local->buf = *buf;
- local->prebuf = *preparent;
- local->postbuf = *postparent;
-
- lock.l_len = 0;
- lock.l_start = 0;
- lock.l_type = F_WRLCK;
- lock.l_whence = SEEK_SET;
-
- STACK_WIND(frame, crypt_create_finodelk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->finodelk, this->name, local->fd,
- F_SETLKW, &lock, NULL);
- return 0;
-error:
- free_inode_info(info);
- free_format(local);
- fd_unref(local->fd);
- dict_unref(local->xattr);
-
- CRYPT_STACK_UNWIND(create, frame, op_ret, op_errno, NULL, NULL, NULL, NULL,
- NULL, NULL);
- return 0;
-}
-
-static int32_t
-crypt_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
-{
- int ret;
- data_t *data;
- crypt_local_t *local;
- crypt_private_t *priv;
- struct master_cipher_info *master;
- struct crypt_inode_info *info;
-
- priv = this->private;
- master = get_master_cinfo(priv);
-
- if (master_alg_atomic(master)) {
- /*
- * We can't open O_WRONLY, because we
- * need to do read-modify-write.
- */
- if ((flags & O_ACCMODE) == O_WRONLY)
- flags = (flags & ~O_ACCMODE) | O_RDWR;
- /*
- * Make sure that out translated offsets
- * and counts won't be ignored
- */
- flags &= ~O_APPEND;
- }
- local = crypt_alloc_local(frame, this, GF_FOP_CREATE);
- if (!local) {
- ret = ENOMEM;
- goto error;
- }
- data = dict_get(xdata, "gfid-req");
- if (!data) {
- ret = EINVAL;
- gf_log("crypt", GF_LOG_WARNING, "gfid not found");
- goto error;
- }
- if (data->len != sizeof(uuid_t)) {
- ret = EINVAL;
- gf_log("crypt", GF_LOG_WARNING, "bad gfid size (%d), should be %d",
- (int)data->len, (int)sizeof(uuid_t));
- goto error;
- }
- info = alloc_inode_info(local, loc);
- if (!info) {
- ret = ENOMEM;
- goto error;
- }
- /*
- * NOTE:
- * format has to be created BEFORE
- * proceeding to the untrusted server
- */
- ret = alloc_format_create(local);
- if (ret) {
- free_inode_info(info);
- goto error;
- }
- init_inode_info_create(info, master, data);
-
- ret = create_format(local->format, loc, info, master);
- if (ret) {
- free_inode_info(info);
- goto error;
- }
- local->xattr = dict_new();
- if (!local->xattr) {
- free_inode_info(info);
- free_format(local);
- goto error;
- }
- ret = dict_set_static_bin(local->xattr, CRYPTO_FORMAT_PREFIX, local->format,
- new_format_size());
- if (ret) {
- dict_unref(local->xattr);
- free_inode_info(info);
- free_format(local);
- ret = EINVAL;
- goto error;
- }
- ret = dict_set(local->xattr, FSIZE_XATTR_PREFIX, data_from_uint64(0));
- if (ret) {
- dict_unref(local->xattr);
- free_inode_info(info);
- free_format(local);
- ret = ENOMEM;
- goto error;
- }
- local->fd = fd_ref(fd);
-
- STACK_WIND(frame, crypt_create_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
- xdata);
- return 0;
-error:
- gf_log("crypt", GF_LOG_WARNING, "can not create file");
- CRYPT_STACK_UNWIND(create, frame, -1, ret, NULL, NULL, NULL, NULL, NULL,
- NULL);
- return 0;
-}
-
-/*
- * FIXME: this should depends on the version of format string
- */
-static int32_t
-filter_crypt_xattr(dict_t *dict, char *key, data_t *value, void *data)
-{
- dict_del(dict, key);
- return 0;
-}
-
-static int32_t
-crypt_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
- int32_t flags, dict_t *xdata)
-{
- dict_foreach_fnmatch(dict, "trusted.glusterfs.crypt*", filter_crypt_xattr,
- NULL);
- STACK_WIND(frame, default_fsetxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
- return 0;
-}
-
-/*
- * TBD: verify file metadata before wind
- */
-static int32_t
-crypt_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
- int32_t flags, dict_t *xdata)
-{
- dict_foreach_fnmatch(dict, "trusted.glusterfs.crypt*", filter_crypt_xattr,
- NULL);
- STACK_WIND(frame, default_setxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata);
- return 0;
-}
-
-/*
- * called as flush_cbk()
- */
-static int32_t
-linkop_end(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
- linkop_unwind_handler_t unwind_fn;
- unwind_fn = linkop_unwind_dispatch(local->fop);
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
-
- if (op_ret < 0 && op_errno == ENOENT &&
- local->loc->inode->ia_type == IA_IFLNK) {
- local->op_ret = 0;
- local->op_errno = 0;
- }
- unwind_fn(frame);
- return 0;
-}
-
-/*
- * unpin inode on the server
- */
-static int32_t
-link_flush(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
-
- if (op_ret < 0)
- goto error;
- if (local->xdata) {
- dict_unref(local->xdata);
- local->xdata = NULL;
- }
- if (xdata)
- local->xdata = dict_ref(xdata);
- local->inode = inode_ref(inode);
- local->buf = *buf;
- local->prebuf = *preparent;
- local->postbuf = *postparent;
-
- STACK_WIND(frame, linkop_end, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush, local->fd, NULL);
- return 0;
-error:
- local->op_ret = -1;
- local->op_errno = op_errno;
- link_unwind(frame);
- return 0;
-}
-
-void
-link_unwind(call_frame_t *frame)
-{
- crypt_local_t *local = frame->local;
- dict_t *xdata;
- dict_t *xattr;
- inode_t *inode;
-
- if (!local) {
- CRYPT_STACK_UNWIND(link, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
- NULL);
- return;
- }
- xdata = local->xdata;
- xattr = local->xattr;
- inode = local->inode;
-
- if (local->loc) {
- loc_wipe(local->loc);
- GF_FREE(local->loc);
- }
- if (local->newloc) {
- loc_wipe(local->newloc);
- GF_FREE(local->newloc);
- }
- if (local->fd)
- fd_unref(local->fd);
- if (local->format)
- GF_FREE(local->format);
-
- CRYPT_STACK_UNWIND(link, frame, local->op_ret, local->op_errno, inode,
- &local->buf, &local->prebuf, &local->postbuf, xdata);
- if (xdata)
- dict_unref(xdata);
- if (xattr)
- dict_unref(xattr);
- if (inode)
- inode_unref(inode);
-}
-
-void
-link_wind(call_frame_t *frame, xlator_t *this)
-{
- crypt_local_t *local = frame->local;
-
- STACK_WIND(frame, link_flush, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->link, local->loc, local->newloc,
- local->xdata);
-}
-
-/*
- * unlink()
- */
-static int32_t
-unlink_flush(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
-
- if (op_ret < 0)
- goto error;
- local->prebuf = *preparent;
- local->postbuf = *postparent;
- if (local->xdata) {
- dict_unref(local->xdata);
- local->xdata = NULL;
- }
- if (xdata)
- local->xdata = dict_ref(xdata);
-
- STACK_WIND(frame, linkop_end, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush, local->fd, NULL);
- return 0;
-error:
- local->op_ret = -1;
- local->op_errno = op_errno;
- unlink_unwind(frame);
- return 0;
-}
-
-void
-unlink_unwind(call_frame_t *frame)
-{
- crypt_local_t *local = frame->local;
- dict_t *xdata;
- dict_t *xattr;
-
- if (!local) {
- CRYPT_STACK_UNWIND(unlink, frame, -1, ENOMEM, NULL, NULL, NULL);
- return;
- }
- xdata = local->xdata;
- xattr = local->xattr;
- if (local->loc) {
- loc_wipe(local->loc);
- GF_FREE(local->loc);
- }
- if (local->fd)
- fd_unref(local->fd);
- if (local->format)
- GF_FREE(local->format);
-
- CRYPT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
- &local->prebuf, &local->postbuf, xdata);
- if (xdata)
- dict_unref(xdata);
- if (xattr)
- dict_unref(xattr);
-}
-
-void
-unlink_wind(call_frame_t *frame, xlator_t *this)
-{
- crypt_local_t *local = frame->local;
-
- STACK_WIND(frame, unlink_flush, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink, local->loc, local->flags,
- local->xdata);
-}
-
-void
-rename_unwind(call_frame_t *frame)
-{
- crypt_local_t *local = frame->local;
- dict_t *xdata;
- dict_t *xattr;
- struct iatt *prenewparent;
- struct iatt *postnewparent;
-
- if (!local) {
- CRYPT_STACK_UNWIND(rename, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
- NULL, NULL);
- return;
- }
- xdata = local->xdata;
- xattr = local->xattr;
- prenewparent = local->prenewparent;
- postnewparent = local->postnewparent;
-
- if (local->loc) {
- loc_wipe(local->loc);
- GF_FREE(local->loc);
- }
- if (local->newloc) {
- loc_wipe(local->newloc);
- GF_FREE(local->newloc);
- }
- if (local->fd)
- fd_unref(local->fd);
- if (local->format)
- GF_FREE(local->format);
-
- CRYPT_STACK_UNWIND(rename, frame, local->op_ret, local->op_errno,
- &local->buf, &local->prebuf, &local->postbuf,
- prenewparent, postnewparent, xdata);
- if (xdata)
- dict_unref(xdata);
- if (xattr)
- dict_unref(xattr);
- if (prenewparent)
- GF_FREE(prenewparent);
- if (postnewparent)
- GF_FREE(postnewparent);
-}
-
-/*
- * called as flush_cbk()
- */
-static int32_t
-rename_end(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
-
- rename_unwind(frame);
- return 0;
-}
-
-static int32_t
-rename_flush(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iatt *buf, struct iatt *preoldparent,
- struct iatt *postoldparent, struct iatt *prenewparent,
- struct iatt *postnewparent, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
-
- if (op_ret < 0)
- goto error;
- dict_unref(local->xdata);
- local->xdata = NULL;
- if (xdata)
- local->xdata = dict_ref(xdata);
-
- local->buf = *buf;
- local->prebuf = *preoldparent;
- local->postbuf = *postoldparent;
- if (prenewparent) {
- local->prenewparent = GF_CALLOC(1, sizeof(*prenewparent),
- gf_crypt_mt_iatt);
- if (!local->prenewparent) {
- op_errno = ENOMEM;
- goto error;
- }
- *local->prenewparent = *prenewparent;
- }
- if (postnewparent) {
- local->postnewparent = GF_CALLOC(1, sizeof(*postnewparent),
- gf_crypt_mt_iatt);
- if (!local->postnewparent) {
- op_errno = ENOMEM;
- goto error;
- }
- *local->postnewparent = *postnewparent;
- }
- STACK_WIND(frame, rename_end, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush, local->fd, NULL);
- return 0;
-error:
- local->op_ret = -1;
- local->op_errno = op_errno;
- rename_unwind(frame);
- return 0;
-}
-
-void
-rename_wind(call_frame_t *frame, xlator_t *this)
-{
- crypt_local_t *local = frame->local;
-
- STACK_WIND(frame, rename_flush, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rename, local->loc, local->newloc,
- local->xdata);
-}
-
-static int32_t
-__do_linkop(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
- linkop_wind_handler_t wind_fn;
- linkop_unwind_handler_t unwind_fn;
-
- wind_fn = linkop_wind_dispatch(local->fop);
- unwind_fn = linkop_unwind_dispatch(local->fop);
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
-
- if (op_ret >= 0)
- wind_fn(frame, this);
- else {
- gf_log(this->name, GF_LOG_WARNING, "mtd unlock failed (%d)", op_errno);
- unwind_fn(frame);
- }
- return 0;
-}
-
-static int32_t
-do_linkop(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
-{
- struct gf_flock lock = {
- 0,
- };
- crypt_local_t *local = frame->local;
- linkop_unwind_handler_t unwind_fn;
-
- unwind_fn = linkop_unwind_dispatch(local->fop);
- local->op_ret = op_ret;
- local->op_errno = op_errno;
-
- if (op_ret < 0)
- goto error;
-
- lock.l_type = F_UNLCK;
- lock.l_whence = SEEK_SET;
- lock.l_start = 0;
- lock.l_len = 0;
- lock.l_pid = 0;
-
- STACK_WIND(frame, __do_linkop, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->finodelk, this->name, local->fd,
- F_SETLKW, &lock, NULL);
- return 0;
-error:
- unwind_fn(frame);
- return 0;
-}
-
-/*
- * Update the metadata string (against the new pathname);
- * submit the result
- */
-static int32_t
-linkop_begin(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, fd_t *fd, dict_t *xdata)
-{
- gf_boolean_t upload_info;
- crypt_local_t *local = frame->local;
- crypt_private_t *priv = this->private;
- struct crypt_inode_info *info;
- data_t *old_mtd;
- uint32_t new_mtd_size;
- uint64_t value = 0;
- void (*unwind_fn)(call_frame_t * frame);
- mtd_op_t mop;
-
- unwind_fn = linkop_unwind_dispatch(local->fop);
- mop = linkop_mtdop_dispatch(local->fop);
-
- if (op_ret < 0) {
- /*
- * verification failed
- */
- goto error;
- } else {
- fd_bind(fd);
- }
-
- old_mtd = dict_get(xdata, CRYPTO_FORMAT_PREFIX);
- if (!old_mtd) {
- op_errno = EIO;
- gf_log(this->name, GF_LOG_DEBUG, "Metadata string wasn't found");
- goto error;
- }
- new_mtd_size = format_size(mop, old_mtd->len);
- op_errno = alloc_format(local, new_mtd_size);
- if (op_errno)
- goto error;
- /*
- * check for cached info
- */
- op_ret = inode_ctx_get(fd->inode, this, &value);
- if (op_ret != -1) {
- info = (struct crypt_inode_info *)(long)value;
- if (info == NULL) {
- gf_log(this->name, GF_LOG_WARNING, "Inode info was not found");
- op_errno = EINVAL;
- goto error;
- }
- /*
- * info was found in the cache
- */
- local->info = info;
- upload_info = _gf_false;
- } else {
- /*
- * info wasn't found in the cache;
- */
- info = alloc_inode_info(local, local->loc);
- if (!info)
- goto error;
- init_inode_info_head(info, fd);
- local->info = info;
- upload_info = _gf_true;
- }
- op_errno = open_format((unsigned char *)old_mtd->data, old_mtd->len,
- local->loc, info, get_master_cinfo(priv), local,
- upload_info);
- if (op_errno)
- goto error;
- if (upload_info == _gf_true) {
- op_errno = init_inode_info_tail(info, get_master_cinfo(priv));
- if (op_errno)
- goto error;
- op_errno = inode_ctx_put(fd->inode, this, (uint64_t)(long)(info));
- if (op_errno == -1) {
- op_errno = EIO;
- goto error;
- }
- }
- /*
- * update the format string (append/update/cup a MAC)
- */
- op_errno = update_format(local->format, (unsigned char *)old_mtd->data,
- old_mtd->len, local->mac_idx, mop, local->newloc,
- info, get_master_cinfo(priv), local);
- if (op_errno)
- goto error;
- /*
- * store the new format string on the server
- */
- if (new_mtd_size) {
- op_errno = dict_set_static_bin(local->xattr, CRYPTO_FORMAT_PREFIX,
- local->format, new_mtd_size);
- if (op_errno)
- goto error;
- }
- STACK_WIND(frame, do_linkop, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr, local->loc, local->xattr, 0,
- NULL);
- return 0;
-error:
- local->op_ret = -1;
- local->op_errno = op_errno;
- unwind_fn(frame);
- return 0;
-}
-
-static int32_t
-linkop_grab_local(call_frame_t *frame, xlator_t *this, loc_t *oldloc,
- loc_t *newloc, int flags, dict_t *xdata, glusterfs_fop_t op)
-{
- int32_t ret = ENOMEM;
- fd_t *fd;
- crypt_local_t *local;
-
- local = crypt_alloc_local(frame, this, op);
- if (!local)
- goto error;
- if (xdata)
- local->xdata = dict_ref(xdata);
-
- fd = fd_create(oldloc->inode, frame->root->pid);
- if (!fd) {
- gf_log(this->name, GF_LOG_ERROR, "Can not create fd");
- goto error;
- }
- local->fd = fd;
- local->flags = flags;
- local->loc = GF_CALLOC(1, sizeof(loc_t), gf_crypt_mt_loc);
- if (!local->loc)
- goto error;
- ret = loc_copy(local->loc, oldloc);
- if (ret) {
- GF_FREE(local->loc);
- local->loc = NULL;
- goto error;
- }
- if (newloc) {
- local->newloc = GF_CALLOC(1, sizeof(loc_t), gf_crypt_mt_loc);
- if (!local->newloc) {
- loc_wipe(local->loc);
- GF_FREE(local->loc);
- goto error;
- }
- ret = loc_copy(local->newloc, newloc);
- if (ret) {
- loc_wipe(local->loc);
- GF_FREE(local->loc);
- GF_FREE(local->newloc);
- goto error;
- }
- }
- local->xattr = dict_new();
- if (!local->xattr) {
- gf_log(this->name, GF_LOG_ERROR, "Can not create dict");
- ret = ENOMEM;
- goto error;
- }
- return 0;
-
-error:
- if (local) {
- if (local->xdata)
- dict_unref(local->xdata);
- if (local->fd)
- fd_unref(local->fd);
- local->fd = 0;
- local->loc = NULL;
- local->newloc = NULL;
- local->op_ret = -1;
- local->op_errno = ret;
- }
-
- return ret;
-}
-
-/*
- * read and verify locked metadata against the old pathname (via open);
- * update the metadata string in accordance with the new pathname;
- * submit modified metadata;
- * wind;
- */
-static int32_t
-linkop(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
- int flags, dict_t *xdata, glusterfs_fop_t op)
-{
- int32_t ret;
- dict_t *dict;
- crypt_local_t *local;
- void (*unwind_fn)(call_frame_t * frame);
- void (*wind_fn)(call_frame_t * frame, xlator_t * this);
-
- wind_fn = linkop_wind_dispatch(op);
- unwind_fn = linkop_unwind_dispatch(op);
-
- ret = linkop_grab_local(frame, this, oldloc, newloc, flags, xdata, op);
- local = frame->local;
- if (ret)
- goto error;
-
- if (local->fd->inode->ia_type == IA_IFLNK)
- goto wind;
-
- dict = dict_new();
- if (!dict) {
- gf_log(this->name, GF_LOG_ERROR, "Can not create dict");
- ret = ENOMEM;
- goto error;
- }
- /*
- * Set a message to crypt_open() that we need
- * locked metadata string.
- * All link operations (link, unlink, rename)
- * need write lock
- */
- msgflags_set_mtd_wlock(&local->msgflags);
- ret = dict_set_static_bin(dict, MSGFLAGS_PREFIX, &local->msgflags,
- sizeof(local->msgflags));
- if (ret) {
- gf_log(this->name, GF_LOG_ERROR, "Can not set dict");
- dict_unref(dict);
- goto error;
- }
- /*
- * verify metadata against the old pathname
- * and retrieve locked metadata string
- */
- STACK_WIND(frame, linkop_begin, this, this->fops->open, /* crypt_open() */
- oldloc, O_RDWR, local->fd, dict);
- dict_unref(dict);
- return 0;
-
-wind:
- wind_fn(frame, this);
- return 0;
-
-error:
- local->op_ret = -1;
- local->op_errno = ret;
- unwind_fn(frame);
- return 0;
-}
-
-static int32_t
-crypt_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
- dict_t *xdata)
-{
- return linkop(frame, this, oldloc, newloc, 0, xdata, GF_FOP_LINK);
-}
-
-static int32_t
-crypt_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
- dict_t *xdata)
-{
- return linkop(frame, this, loc, NULL, flags, xdata, GF_FOP_UNLINK);
-}
-
-static int32_t
-crypt_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
- dict_t *xdata)
-{
- return linkop(frame, this, oldloc, newloc, 0, xdata, GF_FOP_RENAME);
-}
-
-static void
-put_one_call_open(call_frame_t *frame)
-{
- crypt_local_t *local = frame->local;
- if (put_one_call(local)) {
- fd_t *fd = local->fd;
- loc_t *loc = local->loc;
- dict_t *xdata = local->xdata;
-
- CRYPT_STACK_UNWIND(open, frame, local->op_ret, local->op_errno, fd,
- xdata);
- fd_unref(fd);
- if (xdata)
- dict_unref(xdata);
- loc_wipe(loc);
- GF_FREE(loc);
- }
-}
-
-static int32_t
-__crypt_readv_done(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
- fd_t *local_fd = local->fd;
- dict_t *local_xdata = local->xdata;
- /* read deals with data configs only */
- struct iovec *avec = local->data_conf.avec;
- char **pool = local->data_conf.pool;
- int blocks_in_pool = local->data_conf.blocks_in_pool;
- struct iobref *iobref = local->iobref;
- struct iobref *iobref_data = local->iobref_data;
-
- if (op_ret < 0) {
- gf_log(this->name, GF_LOG_WARNING, "readv unlock failed (%d)",
- op_errno);
- if (local->op_ret >= 0) {
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- }
- }
- dump_plain_text(local, avec);
-
- gf_log("crypt", GF_LOG_DEBUG,
- "readv: ret_to_user: %d, iovec len: %d, ia_size: %llu",
- (int)(local->rw_count > 0 ? local->rw_count : local->op_ret),
- (int)(local->rw_count > 0 ? iov_length(avec, local->data_conf.acount)
- : 0),
- (unsigned long long)local->buf.ia_size);
-
- CRYPT_STACK_UNWIND(
- readv, frame, local->rw_count > 0 ? local->rw_count : local->op_ret,
- local->op_errno, avec, avec ? local->data_conf.acount : 0, &local->buf,
- local->iobref, local_xdata);
-
- free_avec(avec, pool, blocks_in_pool);
- fd_unref(local_fd);
- if (local_xdata)
- dict_unref(local_xdata);
- if (iobref)
- iobref_unref(iobref);
- if (iobref_data)
- iobref_unref(iobref_data);
- return 0;
-}
-
-static void
-crypt_readv_done(call_frame_t *frame, xlator_t *this)
-{
- if (parent_is_crypt_xlator(frame, this))
- /*
- * don't unlock (it will be done by the parent)
- */
- __crypt_readv_done(frame, NULL, this, 0, 0, NULL);
- else {
- crypt_local_t *local = frame->local;
- struct gf_flock lock = {
- 0,
- };
-
- lock.l_type = F_UNLCK;
- lock.l_whence = SEEK_SET;
- lock.l_start = 0;
- lock.l_len = 0;
- lock.l_pid = 0;
-
- STACK_WIND(frame, __crypt_readv_done, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->finodelk, this->name, local->fd,
- F_SETLKW, &lock, NULL);
- }
-}
-
-static void
-put_one_call_readv(call_frame_t *frame, xlator_t *this)
-{
- crypt_local_t *local = frame->local;
- if (put_one_call(local))
- crypt_readv_done(frame, this);
-}
-
-static int32_t
-__crypt_writev_done(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
- fd_t *local_fd = local->fd;
- dict_t *local_xdata = local->xdata;
- int32_t ret_to_user;
-
- if (local->xattr)
- dict_unref(local->xattr);
- /*
- * Calculate amount of butes to be returned
- * to user. We need to subtract paddings that
- * have been written as a part of atom.
- */
- /*
- * subtract head padding
- */
- if (local->rw_count == 0)
- /*
- * Nothing has been written, it must be an error
- */
- ret_to_user = local->op_ret;
- else if (local->rw_count <= local->data_conf.off_in_head) {
- gf_log("crypt", GF_LOG_WARNING, "Incomplete write");
- ret_to_user = 0;
- } else
- ret_to_user = local->rw_count - local->data_conf.off_in_head;
- /*
- * subtract tail padding
- */
- if (ret_to_user > local->data_conf.orig_size)
- ret_to_user = local->data_conf.orig_size;
-
- if (local->iobref)
- iobref_unref(local->iobref);
- if (local->iobref_data)
- iobref_unref(local->iobref_data);
- free_avec_data(local);
- free_avec_hole(local);
-
- gf_log("crypt", GF_LOG_DEBUG, "writev: ret_to_user: %d", ret_to_user);
-
- CRYPT_STACK_UNWIND(writev, frame, ret_to_user, local->op_errno,
- &local->prebuf, &local->postbuf, local_xdata);
- fd_unref(local_fd);
- if (local_xdata)
- dict_unref(local_xdata);
- return 0;
-}
-
-static int32_t
-crypt_writev_done(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
-
- if (op_ret < 0)
- gf_log("crypt", GF_LOG_WARNING, "can not update file size");
-
- if (parent_is_crypt_xlator(frame, this))
- /*
- * don't unlock (it will be done by the parent)
- */
- __crypt_writev_done(frame, NULL, this, 0, 0, NULL);
- else {
- struct gf_flock lock = {
- 0,
- };
-
- lock.l_type = F_UNLCK;
- lock.l_whence = SEEK_SET;
- lock.l_start = 0;
- lock.l_len = 0;
- lock.l_pid = 0;
-
- STACK_WIND(frame, __crypt_writev_done, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->finodelk, this->name, local->fd,
- F_SETLKW, &lock, NULL);
- }
- return 0;
-}
-
-static void
-put_one_call_writev(call_frame_t *frame, xlator_t *this)
-{
- crypt_local_t *local = frame->local;
- if (put_one_call(local)) {
- if (local->update_disk_file_size) {
- int32_t ret;
- /*
- * update file size, unlock the file and unwind
- */
- ret = dict_set(local->xattr, FSIZE_XATTR_PREFIX,
- data_from_uint64(local->cur_file_size));
- if (ret) {
- gf_log("crypt", GF_LOG_WARNING,
- "can not set key to update file size");
- crypt_writev_done(frame, NULL, this, 0, 0, NULL);
- return;
- }
- gf_log("crypt", GF_LOG_DEBUG, "Updating disk file size to %llu",
- (unsigned long long)local->cur_file_size);
- STACK_WIND(frame, crypt_writev_done, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetxattr, local->fd,
- local->xattr, /* CRYPTO_FORMAT_PREFIX */
- 0, NULL);
- } else
- crypt_writev_done(frame, NULL, this, 0, 0, NULL);
- }
-}
-
-static int32_t
-__crypt_ftruncate_done(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
- fd_t *local_fd = local->fd;
- dict_t *local_xdata = local->xdata;
- char *iobase = local->vec.iov_base;
-
- if (op_ret < 0) {
- gf_log(this->name, GF_LOG_WARNING, "ftruncate unlock failed (%d)",
- op_errno);
- if (local->op_ret >= 0) {
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- }
- }
- if (local->iobref_data)
- iobref_unref(local->iobref_data);
- free_avec_data(local);
- free_avec_hole(local);
-
- gf_log("crypt", GF_LOG_DEBUG,
- "ftruncate, return to user: presize=%llu, postsize=%llu",
- (unsigned long long)local->prebuf.ia_size,
- (unsigned long long)local->postbuf.ia_size);
-
- CRYPT_STACK_UNWIND(ftruncate, frame, ((local->op_ret < 0) ? -1 : 0),
- local->op_errno, &local->prebuf, &local->postbuf,
- local_xdata);
- fd_unref(local_fd);
- if (local_xdata)
- dict_unref(local_xdata);
- if (iobase)
- GF_FREE(iobase);
- return 0;
-}
-
-static int32_t
-crypt_ftruncate_done(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
- struct gf_flock lock = {
- 0,
- };
-
- dict_unref(local->xattr);
- if (op_ret < 0)
- gf_log("crypt", GF_LOG_WARNING, "can not update file size");
-
- lock.l_type = F_UNLCK;
- lock.l_whence = SEEK_SET;
- lock.l_start = 0;
- lock.l_len = 0;
- lock.l_pid = 0;
-
- STACK_WIND(frame, __crypt_ftruncate_done, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->finodelk, this->name, local->fd,
- F_SETLKW, &lock, NULL);
- return 0;
-}
-
-static void
-put_one_call_ftruncate(call_frame_t *frame, xlator_t *this)
-{
- crypt_local_t *local = frame->local;
- if (put_one_call(local)) {
- if (local->update_disk_file_size) {
- int32_t ret;
- /*
- * update file size, unlock the file and unwind
- */
- ret = dict_set(local->xattr, FSIZE_XATTR_PREFIX,
- data_from_uint64(local->cur_file_size));
- if (ret) {
- gf_log("crypt", GF_LOG_WARNING,
- "can not set key to update file size");
- crypt_ftruncate_done(frame, NULL, this, 0, 0, NULL);
- return;
- }
- gf_log("crypt", GF_LOG_DEBUG, "Updating disk file size to %llu",
- (unsigned long long)local->cur_file_size);
- STACK_WIND(frame, crypt_ftruncate_done, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetxattr, local->fd,
- local->xattr, /* CRYPTO_FORMAT_PREFIX */
- 0, NULL);
- } else
- crypt_ftruncate_done(frame, NULL, this, 0, 0, NULL);
- }
-}
-
-/*
- * load regular file size for some FOPs
- */
-static int32_t
-load_file_size(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
-{
- data_t *data;
- crypt_local_t *local = frame->local;
-
- dict_t *local_xdata = local->xdata;
- inode_t *local_inode = local->inode;
-
- if (op_ret < 0)
- goto unwind;
- /*
- * load regular file size
- */
- data = dict_get(dict, FSIZE_XATTR_PREFIX);
- if (!data) {
- if (local->xdata)
- dict_unref(local->xdata);
- gf_log("crypt", GF_LOG_WARNING, "Regular file size not found");
- op_ret = -1;
- op_errno = EIO;
- goto unwind;
- }
- local->buf.ia_size = data_to_uint64(data);
-
- gf_log(this->name, GF_LOG_DEBUG, "FOP %d: Translate regular file to %llu",
- local->fop, (unsigned long long)local->buf.ia_size);
-unwind:
- if (local->fd)
- fd_unref(local->fd);
- if (local->loc) {
- loc_wipe(local->loc);
- GF_FREE(local->loc);
- }
- switch (local->fop) {
- case GF_FOP_FSTAT:
- CRYPT_STACK_UNWIND(fstat, frame, op_ret, op_errno,
- op_ret >= 0 ? &local->buf : NULL, local->xdata);
- break;
- case GF_FOP_STAT:
- CRYPT_STACK_UNWIND(stat, frame, op_ret, op_errno,
- op_ret >= 0 ? &local->buf : NULL, local->xdata);
- break;
- case GF_FOP_LOOKUP:
- CRYPT_STACK_UNWIND(lookup, frame, op_ret, op_errno,
- op_ret >= 0 ? local->inode : NULL,
- op_ret >= 0 ? &local->buf : NULL, local->xdata,
- op_ret >= 0 ? &local->postbuf : NULL);
- break;
- case GF_FOP_READ:
- CRYPT_STACK_UNWIND(readv, frame, op_ret, op_errno, NULL, 0,
- op_ret >= 0 ? &local->buf : NULL, NULL, NULL);
- break;
- default:
- gf_log(this->name, GF_LOG_WARNING, "Improper file operation %d",
- local->fop);
- }
- if (local_xdata)
- dict_unref(local_xdata);
- if (local_inode)
- inode_unref(local_inode);
- return 0;
-}
-
-static int32_t
-crypt_stat_common_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
-
- if (op_ret < 0)
- goto unwind;
- if (!IA_ISREG(buf->ia_type))
- goto unwind;
-
- local->buf = *buf;
- if (xdata)
- local->xdata = dict_ref(xdata);
-
- switch (local->fop) {
- case GF_FOP_FSTAT:
- STACK_WIND(frame, load_file_size, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fgetxattr, local->fd,
- FSIZE_XATTR_PREFIX, NULL);
- break;
- case GF_FOP_STAT:
- STACK_WIND(frame, load_file_size, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr, local->loc,
- FSIZE_XATTR_PREFIX, NULL);
- break;
- default:
- gf_log(this->name, GF_LOG_WARNING, "Improper file operation %d",
- local->fop);
- }
- return 0;
-unwind:
- if (local->fd)
- fd_unref(local->fd);
- if (local->loc) {
- loc_wipe(local->loc);
- GF_FREE(local->loc);
- }
- switch (local->fop) {
- case GF_FOP_FSTAT:
- CRYPT_STACK_UNWIND(fstat, frame, op_ret, op_errno,
- op_ret >= 0 ? buf : NULL,
- op_ret >= 0 ? xdata : NULL);
- break;
- case GF_FOP_STAT:
- CRYPT_STACK_UNWIND(stat, frame, op_ret, op_errno,
- op_ret >= 0 ? buf : NULL,
- op_ret >= 0 ? xdata : NULL);
- break;
- default:
- gf_log(this->name, GF_LOG_WARNING, "Improper file operation %d",
- local->fop);
- }
- return 0;
-}
-
-static int32_t
-crypt_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
-{
- crypt_local_t *local;
-
- local = crypt_alloc_local(frame, this, GF_FOP_FSTAT);
- if (!local)
- goto error;
- local->fd = fd_ref(fd);
- STACK_WIND(frame, crypt_stat_common_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat, fd, xdata);
- return 0;
-error:
- CRYPT_STACK_UNWIND(fstat, frame, -1, ENOMEM, NULL, NULL);
- return 0;
-}
-
-static int32_t
-crypt_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
-{
- int32_t ret;
- crypt_local_t *local;
-
- local = crypt_alloc_local(frame, this, GF_FOP_STAT);
- if (!local)
- goto error;
- local->loc = GF_CALLOC(1, sizeof(loc_t), gf_crypt_mt_loc);
- if (!local->loc)
- goto error;
- ret = loc_copy(local->loc, loc);
- if (ret) {
- GF_FREE(local->loc);
- goto error;
- }
- STACK_WIND(frame, crypt_stat_common_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat, loc, xdata);
- return 0;
-error:
- CRYPT_STACK_UNWIND(stat, frame, -1, ENOMEM, NULL, NULL);
- return 0;
-}
-
-static int32_t
-crypt_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *xdata, struct iatt *postparent)
-{
- crypt_local_t *local = frame->local;
-
- if (op_ret < 0)
- goto unwind;
- if (!IA_ISREG(buf->ia_type))
- goto unwind;
-
- local->inode = inode_ref(inode);
- local->buf = *buf;
- local->postbuf = *postparent;
- if (xdata)
- local->xdata = dict_ref(xdata);
- gf_uuid_copy(local->loc->gfid, buf->ia_gfid);
-
- STACK_WIND(frame, load_file_size, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr, local->loc,
- FSIZE_XATTR_PREFIX, NULL);
- return 0;
-unwind:
- loc_wipe(local->loc);
- GF_FREE(local->loc);
- CRYPT_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, buf, xdata,
- postparent);
- return 0;
-}
-
-static int32_t
-crypt_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
-{
- int32_t ret;
- crypt_local_t *local;
-
- local = crypt_alloc_local(frame, this, GF_FOP_LOOKUP);
- if (!local)
- goto error;
- local->loc = GF_CALLOC(1, sizeof(loc_t), gf_crypt_mt_loc);
- if (!local->loc)
- goto error;
- ret = loc_copy(local->loc, loc);
- if (ret) {
- GF_FREE(local->loc);
- goto error;
- }
- gf_log(this->name, GF_LOG_DEBUG, "Lookup %s", loc->path);
- STACK_WIND(frame, crypt_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, xdata);
- return 0;
-error:
- CRYPT_STACK_UNWIND(lookup, frame, -1, ENOMEM, NULL, NULL, NULL, NULL);
- return 0;
-}
-
-/*
- * for every regular directory entry find its real file size
- * and update stat's buf properly
- */
-static int32_t
-crypt_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
- dict_t *xdata)
-{
- gf_dirent_t *entry = NULL;
-
- if (op_ret < 0)
- goto unwind;
-
- list_for_each_entry(entry, (&entries->list), list)
- {
- data_t *data;
-
- if (!IA_ISREG(entry->d_stat.ia_type))
- continue;
- data = dict_get(entry->dict, FSIZE_XATTR_PREFIX);
- if (!data) {
- gf_log("crypt", GF_LOG_WARNING,
- "Regular file size of direntry not found");
- op_errno = EIO;
- op_ret = -1;
- break;
- }
- entry->d_stat.ia_size = data_to_uint64(data);
- }
-unwind:
- CRYPT_STACK_UNWIND(readdirp, frame, op_ret, op_errno, entries, xdata);
- return 0;
-}
-
-/*
- * ->readdirp() fills in-core inodes, so we need to set proper
- * file sizes for all directory entries of the parent @fd.
- * Actual updates take place in ->crypt_readdirp_cbk()
- */
-static int32_t
-crypt_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, dict_t *xdata)
-{
- int32_t ret = ENOMEM;
-
- if (!xdata) {
- xdata = dict_new();
- if (!xdata)
- goto error;
- } else
- dict_ref(xdata);
- /*
- * make sure that we'll have real file sizes at ->readdirp_cbk()
- */
- ret = dict_set(xdata, FSIZE_XATTR_PREFIX, data_from_uint64(0));
- if (ret) {
- dict_unref(xdata);
- ret = ENOMEM;
- goto error;
- }
- STACK_WIND(frame, crypt_readdirp_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readdirp, fd, size, offset, xdata);
- dict_unref(xdata);
- return 0;
-error:
- CRYPT_STACK_UNWIND(readdirp, frame, -1, ret, NULL, NULL);
- return 0;
-}
-
-static int32_t
-crypt_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
- dict_t *xdata)
-{
- gf_log(this->name, GF_LOG_WARNING,
- "NFS mounts of encrypted volumes are unsupported");
- CRYPT_STACK_UNWIND(access, frame, -1, EPERM, NULL);
- return 0;
-}
-
-int32_t
-master_set_block_size(xlator_t *this, crypt_private_t *priv, dict_t *options)
-{
- uint64_t block_size = 0;
- struct master_cipher_info *master = get_master_cinfo(priv);
-
- if (options != NULL)
- GF_OPTION_RECONF("block-size", block_size, options, size_uint64, error);
- else
- GF_OPTION_INIT("block-size", block_size, size_uint64, error);
-
- switch (block_size) {
- case 512:
- master->m_block_bits = 9;
- break;
- case 1024:
- master->m_block_bits = 10;
- break;
- case 2048:
- master->m_block_bits = 11;
- break;
- case 4096:
- master->m_block_bits = 12;
- break;
- default:
- gf_log("crypt", GF_LOG_ERROR, "FATAL: unsupported block size %llu",
- (unsigned long long)block_size);
- goto error;
- }
- return 0;
-error:
- return -1;
-}
-
-int32_t
-master_set_alg(xlator_t *this, crypt_private_t *priv)
-{
- struct master_cipher_info *master = get_master_cinfo(priv);
- master->m_alg = AES_CIPHER_ALG;
- return 0;
-}
-
-int32_t
-master_set_mode(xlator_t *this, crypt_private_t *priv)
-{
- struct master_cipher_info *master = get_master_cinfo(priv);
- master->m_mode = XTS_CIPHER_MODE;
- return 0;
-}
-
-/*
- * set key size in bits to the master info
- * Pre-conditions: cipher mode in the master info is uptodate.
- */
-static int
-master_set_data_key_size(xlator_t *this, crypt_private_t *priv, dict_t *options)
-{
- int32_t ret;
- uint64_t key_size = 0;
- struct master_cipher_info *master = get_master_cinfo(priv);
-
- if (options != NULL)
- GF_OPTION_RECONF("data-key-size", key_size, options, uint64, error);
- else
- GF_OPTION_INIT("data-key-size", key_size, uint64, error);
-
- ret = data_cipher_algs[master->m_alg][master->m_mode].check_key(key_size);
- if (ret) {
- gf_log("crypt", GF_LOG_ERROR,
- "FATAL: wrong bin key size %llu for alg %d mode %d",
- (unsigned long long)key_size, (int)master->m_alg,
- (int)master->m_mode);
- goto error;
- }
- master->m_dkey_size = key_size;
- return 0;
-error:
- return -1;
-}
-
-static int
-is_hex(char *s)
-{
- return ('0' <= *s && *s <= '9') || ('a' <= *s && *s <= 'f');
-}
-
-static int
-parse_hex_buf(xlator_t *this, char *src, unsigned char *dst, int hex_size)
-{
- int i;
- int hex_byte = 0;
-
- for (i = 0; i < (hex_size / 2); i++) {
- if (!is_hex(src + i * 2) || !is_hex(src + i * 2 + 1)) {
- gf_log("crypt", GF_LOG_ERROR, "FATAL: not hex symbol in key");
- return -1;
- }
- if (sscanf(src + i * 2, "%2x", &hex_byte) != 1) {
- gf_log("crypt", GF_LOG_ERROR, "FATAL: can not parse hex key");
- return -1;
- }
- dst[i] = hex_byte & 0xff;
- }
- return 0;
-}
-
-/*
- * Parse options;
- * install master volume key
- */
-int32_t
-master_set_master_vol_key(xlator_t *this, crypt_private_t *priv)
-{
- int32_t ret;
- FILE *file = NULL;
-
- int32_t key_size;
- char *opt_key_file_pathname = NULL;
-
- unsigned char bin_buf[MASTER_VOL_KEY_SIZE];
- char hex_buf[2 * MASTER_VOL_KEY_SIZE];
-
- struct master_cipher_info *master = get_master_cinfo(priv);
- /*
- * extract master key passed via option
- */
- GF_OPTION_INIT("master-key", opt_key_file_pathname, path, bad_key);
-
- if (!opt_key_file_pathname) {
- gf_log(this->name, GF_LOG_ERROR, "FATAL: missing master key");
- return -1;
- }
- gf_log(this->name, GF_LOG_DEBUG, "handling file key %s",
- opt_key_file_pathname);
-
- file = fopen(opt_key_file_pathname, "r");
- if (file == NULL) {
- gf_log(this->name, GF_LOG_ERROR,
- "FATAL: can not open file with master key");
- return -1;
- }
- /*
- * extract hex key
- */
- key_size = fread(hex_buf, 1, sizeof(hex_buf), file);
- if (key_size < sizeof(hex_buf)) {
- gf_log(this->name, GF_LOG_ERROR, "FATAL: master key is too short");
- goto bad_key;
- }
- ret = parse_hex_buf(this, hex_buf, bin_buf, key_size);
- if (ret)
- goto bad_key;
- memcpy(master->m_key, bin_buf, MASTER_VOL_KEY_SIZE);
- memset(hex_buf, 0, sizeof(hex_buf));
- fclose(file);
-
- memset(bin_buf, 0, sizeof(bin_buf));
- return 0;
-bad_key:
- gf_log(this->name, GF_LOG_ERROR, "FATAL: bad master key");
- if (file)
- fclose(file);
- memset(bin_buf, 0, sizeof(bin_buf));
- return -1;
-}
-
-/*
- * Derive volume key for object-id authentication
- */
-int32_t
-master_set_nmtd_vol_key(xlator_t *this, crypt_private_t *priv)
-{
- return get_nmtd_vol_key(get_master_cinfo(priv));
-}
-
-int32_t
-crypt_init_xlator(xlator_t *this)
-{
- int32_t ret;
- crypt_private_t *priv = this->private;
-
- ret = master_set_alg(this, priv);
- if (ret)
- return ret;
- ret = master_set_mode(this, priv);
- if (ret)
- return ret;
- ret = master_set_block_size(this, priv, NULL);
- if (ret)
- return ret;
- ret = master_set_data_key_size(this, priv, NULL);
- if (ret)
- return ret;
- ret = master_set_master_vol_key(this, priv);
- if (ret)
- return ret;
- return master_set_nmtd_vol_key(this, priv);
-}
-
-static int32_t
-crypt_alloc_private(xlator_t *this)
-{
- this->private = GF_CALLOC(1, sizeof(crypt_private_t), gf_crypt_mt_priv);
- if (!this->private) {
- gf_log("crypt", GF_LOG_ERROR,
- "Can not allocate memory for private data");
- return ENOMEM;
- }
- return 0;
-}
-
-static void
-crypt_free_private(xlator_t *this)
-{
- crypt_private_t *priv = this->private;
- if (priv) {
- memset(priv, 0, sizeof(*priv));
- GF_FREE(priv);
- }
-}
-
-int32_t
-mem_acct_init(xlator_t *this)
-{
- int ret = -1;
-
- if (!this)
- return ret;
-
- ret = xlator_mem_acct_init(this, gf_crypt_mt_end);
-
- if (ret != 0) {
- gf_log(this->name, GF_LOG_ERROR,
- "Memory accounting init"
- "failed");
- return ret;
- }
-
- return ret;
-}
-
-int32_t
-reconfigure(xlator_t *this, dict_t *options)
-{
- int32_t ret = -1;
- crypt_private_t *priv = NULL;
-
- GF_VALIDATE_OR_GOTO("crypt", this, error);
- GF_VALIDATE_OR_GOTO(this->name, this->private, error);
- GF_VALIDATE_OR_GOTO(this->name, options, error);
-
- priv = this->private;
-
- ret = master_set_block_size(this, priv, options);
- if (ret) {
- gf_log("this->name", GF_LOG_ERROR, "Failed to reconfure block size");
- goto error;
- }
- ret = master_set_data_key_size(this, priv, options);
- if (ret) {
- gf_log("this->name", GF_LOG_ERROR, "Failed to reconfure data key size");
- goto error;
- }
- return 0;
-error:
- return ret;
-}
-
-int32_t
-init(xlator_t *this)
-{
- int32_t ret;
-
- if (!this->children || this->children->next) {
- gf_log("crypt", GF_LOG_ERROR,
- "FATAL: crypt should have exactly one child");
- return EINVAL;
- }
- if (!this->parents) {
- gf_log(this->name, GF_LOG_WARNING, "dangling volume. check volfile ");
- }
- ret = crypt_alloc_private(this);
- if (ret)
- return ret;
- ret = crypt_init_xlator(this);
- if (ret)
- goto error;
- this->local_pool = mem_pool_new(crypt_local_t, 64);
- if (!this->local_pool) {
- gf_log(this->name, GF_LOG_ERROR,
- "failed to create local_t's memory pool");
- ret = ENOMEM;
- goto error;
- }
- gf_log("crypt", GF_LOG_INFO, "crypt xlator loaded");
- return 0;
-error:
- crypt_free_private(this);
- return ret;
-}
-
-void
-fini(xlator_t *this)
-{
- crypt_free_private(this);
-}
-
-struct xlator_fops fops = {.readv = crypt_readv,
- .writev = crypt_writev,
- .truncate = crypt_truncate,
- .ftruncate = crypt_ftruncate,
- .setxattr = crypt_setxattr,
- .fsetxattr = crypt_fsetxattr,
- .link = crypt_link,
- .unlink = crypt_unlink,
- .rename = crypt_rename,
- .open = crypt_open,
- .create = crypt_create,
- .stat = crypt_stat,
- .fstat = crypt_fstat,
- .lookup = crypt_lookup,
- .readdirp = crypt_readdirp,
- .access = crypt_access};
-
-struct xlator_cbks cbks = {.forget = crypt_forget};
-
-struct volume_options options[] = {
- {.key = {"master-key"},
- .type = GF_OPTION_TYPE_PATH,
- .description =
- "Pathname of regular file which contains master volume key"},
- {
- .key = {"data-key-size"},
- .type = GF_OPTION_TYPE_SIZET,
- .description = "Data key size (bits)",
- .min = 256,
- .max = 512,
- .default_value = "256",
- },
- {.key = {"block-size"},
- .type = GF_OPTION_TYPE_SIZET,
- .description = "Atom size (bits)",
- .min = 512,
- .max = 4096,
- .default_value = "4096"},
- {.key = {NULL}},
-};
-
-/*
- Local variables:
- c-indentation-style: "K&R"
- mode-name: "LC"
- c-basic-offset: 8
- tab-width: 8
- fill-column: 80
- scroll-step: 1
- End:
-*/
diff --git a/xlators/encryption/crypt/src/crypt.h b/xlators/encryption/crypt/src/crypt.h
deleted file mode 100644
index c1216a2..0000000
--- a/xlators/encryption/crypt/src/crypt.h
+++ /dev/null
@@ -1,931 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __CRYPT_H__
-#define __CRYPT_H__
-
-#include <openssl/aes.h>
-#include <openssl/evp.h>
-#include <openssl/sha.h>
-#include <openssl/hmac.h>
-#include <openssl/cmac.h>
-#include <openssl/modes.h>
-#include "crypt-mem-types.h"
-#include <glusterfs/compat.h>
-
-#define CRYPT_XLATOR_ID (0)
-
-#define MAX_IOVEC_BITS (3)
-#define MAX_IOVEC (1 << MAX_IOVEC_BITS)
-#define KEY_FACTOR_BITS (6)
-
-#define DEBUG_CRYPT (0)
-#define TRIVIAL_TFM (0)
-
-#define CRYPT_MIN_BLOCK_BITS (9)
-#define CRYPT_MAX_BLOCK_BITS (12)
-
-#define MASTER_VOL_KEY_SIZE (32)
-#define NMTD_VOL_KEY_SIZE (16)
-
-#if !defined(GF_LINUX_HOST_OS)
-typedef off_t loff_t;
-#endif
-
-struct crypt_key {
- uint32_t len;
- const char *label;
-};
-
-/*
- * Add new key types to the end of this
- * enumeration but before LAST_KEY_TYPE
- */
-typedef enum {
- MASTER_VOL_KEY,
- NMTD_VOL_KEY,
- NMTD_LINK_KEY,
- EMTD_FILE_KEY,
- DATA_FILE_KEY_256,
- DATA_FILE_KEY_512,
- LAST_KEY_TYPE
-} crypt_key_type;
-
-struct kderive_context {
- const unsigned char *pkey; /* parent key */
- uint32_t pkey_len; /* parent key size, bits */
- uint32_t ckey_len; /* child key size, bits */
- unsigned char *fid; /* fixed input data, NIST 800-108, 5.1 */
- uint32_t fid_len; /* fid len, bytes */
- unsigned char *out; /* contains child keying material */
- uint32_t out_len; /* out len, bytes */
-};
-
-typedef enum { DATA_ATOM, HOLE_ATOM, LAST_DATA_TYPE } atom_data_type;
-
-typedef enum {
- HEAD_ATOM,
- TAIL_ATOM,
- FULL_ATOM,
- LAST_LOCALITY_TYPE
-} atom_locality_type;
-
-typedef enum {
- MTD_CREATE,
- MTD_APPEND,
- MTD_OVERWRITE,
- MTD_CUT,
- MTD_LAST_OP
-} mtd_op_t;
-
-struct xts128_context {
- void *key1, *key2;
- block128_f block1, block2;
-};
-
-struct object_cipher_info {
- cipher_alg_t o_alg;
- cipher_mode_t o_mode;
- uint32_t o_block_bits;
- uint32_t o_dkey_size; /* raw data key size in bits */
- union {
- struct {
- unsigned char ivec[16];
- AES_KEY dkey[2];
- AES_KEY tkey; /* key used for tweaking */
- XTS128_CONTEXT xts;
- } aes_xts;
- } u;
-};
-
-struct master_cipher_info {
- /*
- * attributes inherited by newly created regular files
- */
- cipher_alg_t m_alg;
- cipher_mode_t m_mode;
- uint32_t m_block_bits;
- uint32_t m_dkey_size; /* raw key size in bits */
- /*
- * master key
- */
- unsigned char m_key[MASTER_VOL_KEY_SIZE];
- /*
- * volume key for oid authentication
- */
- unsigned char m_nmtd_key[NMTD_VOL_KEY_SIZE];
-};
-
-/*
- * This info is not changed during file's life
- */
-struct crypt_inode_info {
-#if DEBUG_CRYPT
- loc_t *loc; /* pathname that the file has been
- opened, or created with */
-#endif
- uint16_t nr_minor;
- uuid_t oid;
- struct object_cipher_info cinfo;
-};
-
-/*
- * this should locate in secure memory
- */
-typedef struct {
- struct master_cipher_info master;
-} crypt_private_t;
-
-static inline struct master_cipher_info *
-get_master_cinfo(crypt_private_t *priv)
-{
- return &priv->master;
-}
-
-static inline struct object_cipher_info *
-get_object_cinfo(struct crypt_inode_info *info)
-{
- return &info->cinfo;
-}
-
-/*
- * this describes layouts and properties
- * of atoms in an aligned vector
- */
-struct avec_config {
- uint32_t atom_size;
- atom_data_type type;
- size_t orig_size;
- off_t orig_offset;
- size_t expanded_size;
- off_t aligned_offset;
-
- uint32_t off_in_head;
- uint32_t off_in_tail;
- uint32_t gap_in_tail;
- uint32_t nr_full_blocks;
-
- struct iovec *avec; /* aligned vector */
- uint32_t acount; /* number of avec components. The same
- * as number of occupied logical blocks */
- char **pool;
- uint32_t blocks_in_pool;
- uint32_t cursor; /* makes sense only for ordered writes,
- * so there is no races on this counter.
- *
- * Cursor is per-config object, we don't
- * reset cursor for atoms of different
- * localities (head, tail, full)
- */
-};
-
-typedef struct {
- glusterfs_fop_t fop; /* code of FOP this local info built for */
- fd_t *fd;
- inode_t *inode;
- loc_t *loc;
- int32_t mac_idx;
- loc_t *newloc;
- int32_t flags;
- int32_t wbflags;
- struct crypt_inode_info *info;
- struct iobref *iobref;
- struct iobref *iobref_data;
- off_t offset;
-
- uint64_t old_file_size; /* per FOP, retrieved under lock held */
- uint64_t cur_file_size; /* per iteration, before issuing IOs */
- uint64_t new_file_size; /* per iteration, after issuing IOs */
-
- uint64_t io_offset; /* offset of IOs issued per iteration */
- uint64_t io_offset_nopad; /* offset of user's data in the atom */
- uint32_t io_size; /* size of IOs issued per iteration */
- uint32_t io_size_nopad; /* size of user's data in the IOs */
- uint32_t eof_padding_size; /* size od EOF padding in the IOs */
-
- gf_lock_t call_lock; /* protect nr_calls from many cbks */
- int32_t nr_calls;
-
- atom_data_type active_setup; /* which setup (hole or date)
- is currently active */
- /* data setup */
- struct avec_config data_conf;
-
- /* hole setup */
- int hole_conv_in_proggress;
- gf_lock_t hole_lock; /* protect hole config from many cbks */
- int hole_handled;
- struct avec_config hole_conf;
- struct iatt buf;
- struct iatt prebuf;
- struct iatt postbuf;
- struct iatt *prenewparent;
- struct iatt *postnewparent;
- int32_t op_ret;
- int32_t op_errno;
- int32_t rw_count; /* total read or written */
- gf_lock_t rw_count_lock; /* protect the counter above */
- unsigned char *format; /* for create, update format string */
- uint32_t format_size;
- uint32_t msgflags; /* messages for crypt_open() */
- dict_t *xdata;
- dict_t *xattr;
- struct iovec vec; /* contains last file's atom for
- read-prune-write sequence */
- gf_boolean_t custom_mtd;
- /*
- * the next 3 fields are used by readdir and friends
- */
- gf_dirent_t *de; /* directory entry */
- char *de_path; /* pathname of directory entry */
- uint32_t de_prefix_len; /* length of the parent's pathname */
- gf_dirent_t *entries;
-
- uint32_t update_disk_file_size : 1;
-} crypt_local_t;
-
-/* This represents a (read)modify-write atom */
-struct rmw_atom {
- atom_locality_type locality;
- /*
- * read-modify-write sequence of the atom
- */
- int32_t (*rmw)(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iovec *vec,
- int32_t count, struct iatt *stbuf, struct iobref *iobref,
- dict_t *xdata);
- /*
- * offset of the logical block in a file
- */
- loff_t (*offset_at)(call_frame_t *frame, struct object_cipher_info *object);
- /*
- * IO offset in an atom
- */
- uint32_t (*offset_in)(call_frame_t *frame,
- struct object_cipher_info *object);
- /*
- * number of bytes of plain text of this atom that user
- * wants to read/write.
- * It can be smaller than atom_size in the case of head
- * or tail atoms.
- */
- uint32_t (*io_size_nopad)(call_frame_t *frame,
- struct object_cipher_info *object);
- /*
- * which iovec represents the atom
- */
- struct iovec *(*get_iovec)(call_frame_t *frame, uint32_t count);
- /*
- * how many bytes of partial block should be uptodated by
- * reading from disk.
- * This is used to perform a read component of RMW (read-modify-write).
- */
- uint32_t (*count_to_uptodate)(call_frame_t *frame,
- struct object_cipher_info *object);
- struct avec_config *(*get_config)(call_frame_t *frame);
-};
-
-struct data_cipher_alg {
- gf_boolean_t atomic; /* true means that algorithm requires
- to pad data before cipher transform */
- gf_boolean_t should_pad; /* true means that algorithm requires
- to pad the end of file with extra-data */
- uint32_t blkbits; /* blksize = 1 << blkbits */
- /*
- * any preliminary sanity checks goes here
- */
- int32_t (*init)(void);
- /*
- * set alg-mode specific inode info
- */
- int32_t (*set_private)(struct crypt_inode_info *info,
- struct master_cipher_info *master);
- /*
- * check alg-mode specific data key
- */
- int32_t (*check_key)(uint32_t key_size);
- void (*set_iv)(off_t offset, struct object_cipher_info *object);
- int32_t (*encrypt)(const unsigned char *from, unsigned char *to,
- size_t length, off_t offset, const int enc,
- struct object_cipher_info *object);
-};
-
-/*
- * version-dependent metadata loader
- */
-struct crypt_mtd_loader {
- /*
- * return core format size
- */
- size_t (*format_size)(mtd_op_t op, size_t old_size);
- /*
- * pack version-specific metadata of an object
- * at ->create()
- */
- int32_t (*create_format)(unsigned char *wire, loc_t *loc,
- struct crypt_inode_info *info,
- struct master_cipher_info *master);
- /*
- * extract version-specific metadata of an object
- * at ->open() time
- */
- int32_t (*open_format)(unsigned char *wire, int32_t len, loc_t *loc,
- struct crypt_inode_info *info,
- struct master_cipher_info *master,
- crypt_local_t *local, gf_boolean_t load_info);
- int32_t (*update_format)(unsigned char *new, unsigned char *old,
- size_t old_len, int32_t mac_idx, mtd_op_t op,
- loc_t *loc, struct crypt_inode_info *info,
- struct master_cipher_info *master,
- crypt_local_t *local);
-};
-
-typedef int32_t (*end_writeback_handler_t)(call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata);
-typedef void (*linkop_wind_handler_t)(call_frame_t *frame, xlator_t *this);
-typedef void (*linkop_unwind_handler_t)(call_frame_t *frame);
-
-/* Declarations */
-
-/* keys.c */
-extern struct crypt_key crypt_keys[LAST_KEY_TYPE];
-int32_t
-get_nmtd_vol_key(struct master_cipher_info *master);
-int32_t
-get_nmtd_link_key(loc_t *loc, struct master_cipher_info *master,
- unsigned char *result);
-int32_t
-get_emtd_file_key(struct crypt_inode_info *info,
- struct master_cipher_info *master, unsigned char *result);
-int32_t
-get_data_file_key(struct crypt_inode_info *info,
- struct master_cipher_info *master, uint32_t keysize,
- unsigned char *key);
-/* data.c */
-extern struct data_cipher_alg data_cipher_algs[LAST_CIPHER_ALG]
- [LAST_CIPHER_MODE];
-void
-encrypt_aligned_iov(struct object_cipher_info *object, struct iovec *vec,
- int count, off_t off);
-void
-decrypt_aligned_iov(struct object_cipher_info *object, struct iovec *vec,
- int count, off_t off);
-int32_t
-align_iov_by_atoms(xlator_t *this, crypt_local_t *local,
- struct object_cipher_info *object,
- struct iovec *vec /* input vector */,
- int32_t count /* number of vec components */,
- struct iovec *avec /* aligned vector */,
- char **blocks /* pool of blocks */,
- uint32_t *blocks_allocated, struct avec_config *conf);
-int32_t
-set_config_avec_data(xlator_t *this, crypt_local_t *local,
- struct avec_config *conf,
- struct object_cipher_info *object, struct iovec *vec,
- int32_t vec_count);
-int32_t
-set_config_avec_hole(xlator_t *this, crypt_local_t *local,
- struct avec_config *conf,
- struct object_cipher_info *object, glusterfs_fop_t fop);
-void
-set_gap_at_end(call_frame_t *frame, struct object_cipher_info *object,
- struct avec_config *conf, atom_data_type dtype);
-void
-set_config_offsets(call_frame_t *frame, xlator_t *this, uint64_t offset,
- uint64_t count, atom_data_type dtype,
- int32_t setup_gap_in_tail);
-
-/* metadata.c */
-extern struct crypt_mtd_loader mtd_loaders[LAST_MTD_LOADER];
-
-int32_t
-alloc_format(crypt_local_t *local, size_t size);
-int32_t
-alloc_format_create(crypt_local_t *local);
-void
-free_format(crypt_local_t *local);
-size_t
-format_size(mtd_op_t op, size_t old_size);
-size_t
-new_format_size(void);
-int32_t
-open_format(unsigned char *str, int32_t len, loc_t *loc,
- struct crypt_inode_info *info, struct master_cipher_info *master,
- crypt_local_t *local, gf_boolean_t load_info);
-int32_t
-update_format(unsigned char *new, unsigned char *old, size_t old_len,
- int32_t mac_idx, mtd_op_t op, loc_t *loc,
- struct crypt_inode_info *info, struct master_cipher_info *master,
- crypt_local_t *local);
-int32_t
-create_format(unsigned char *wire, loc_t *loc, struct crypt_inode_info *info,
- struct master_cipher_info *master);
-
-/* atom.c */
-struct rmw_atom *
-atom_by_types(atom_data_type data, atom_locality_type locality);
-void
-submit_partial(call_frame_t *frame, xlator_t *this, fd_t *fd,
- atom_locality_type ltype);
-void
-submit_full(call_frame_t *frame, xlator_t *this);
-
-/* crypt.c */
-
-end_writeback_handler_t
-dispatch_end_writeback(glusterfs_fop_t fop);
-void
-set_local_io_params_writev(call_frame_t *frame,
- struct object_cipher_info *object,
- struct rmw_atom *atom, off_t io_offset,
- uint32_t io_size);
-void
-link_wind(call_frame_t *frame, xlator_t *this);
-void
-unlink_wind(call_frame_t *frame, xlator_t *this);
-void
-link_unwind(call_frame_t *frame);
-void
-unlink_unwind(call_frame_t *frame);
-void
-rename_wind(call_frame_t *frame, xlator_t *this);
-void
-rename_unwind(call_frame_t *frame);
-
-/* Inline functions */
-
-static inline int32_t
-crypt_xlator_id(void)
-{
- return CRYPT_XLATOR_ID;
-}
-
-static inline mtd_loader_id
-current_mtd_loader(void)
-{
- return MTD_LOADER_V1;
-}
-
-static inline uint32_t
-master_key_size(void)
-{
- return crypt_keys[MASTER_VOL_KEY].len >> 3;
-}
-
-static inline uint32_t
-nmtd_vol_key_size(void)
-{
- return crypt_keys[NMTD_VOL_KEY].len >> 3;
-}
-
-static inline uint32_t
-alg_mode_blkbits(cipher_alg_t alg, cipher_mode_t mode)
-{
- return data_cipher_algs[alg][mode].blkbits;
-}
-
-static inline uint32_t
-alg_mode_blksize(cipher_alg_t alg, cipher_mode_t mode)
-{
- return 1 << alg_mode_blkbits(alg, mode);
-}
-
-static inline gf_boolean_t
-alg_mode_atomic(cipher_alg_t alg, cipher_mode_t mode)
-{
- return data_cipher_algs[alg][mode].atomic;
-}
-
-static inline gf_boolean_t
-alg_mode_should_pad(cipher_alg_t alg, cipher_mode_t mode)
-{
- return data_cipher_algs[alg][mode].should_pad;
-}
-
-static inline uint32_t
-master_alg_blksize(struct master_cipher_info *mr)
-{
- return alg_mode_blksize(mr->m_alg, mr->m_mode);
-}
-
-static inline uint32_t
-master_alg_blkbits(struct master_cipher_info *mr)
-{
- return alg_mode_blkbits(mr->m_alg, mr->m_mode);
-}
-
-static inline gf_boolean_t
-master_alg_atomic(struct master_cipher_info *mr)
-{
- return alg_mode_atomic(mr->m_alg, mr->m_mode);
-}
-
-static inline gf_boolean_t
-master_alg_should_pad(struct master_cipher_info *mr)
-{
- return alg_mode_should_pad(mr->m_alg, mr->m_mode);
-}
-
-static inline uint32_t
-object_alg_blksize(struct object_cipher_info *ob)
-{
- return alg_mode_blksize(ob->o_alg, ob->o_mode);
-}
-
-static inline uint32_t
-object_alg_blkbits(struct object_cipher_info *ob)
-{
- return alg_mode_blkbits(ob->o_alg, ob->o_mode);
-}
-
-static inline gf_boolean_t
-object_alg_atomic(struct object_cipher_info *ob)
-{
- return alg_mode_atomic(ob->o_alg, ob->o_mode);
-}
-
-static inline gf_boolean_t
-object_alg_should_pad(struct object_cipher_info *ob)
-{
- return alg_mode_should_pad(ob->o_alg, ob->o_mode);
-}
-
-static inline uint32_t
-aes_raw_key_size(struct master_cipher_info *master)
-{
- return master->m_dkey_size >> 3;
-}
-
-static inline struct avec_config *
-get_hole_conf(call_frame_t *frame)
-{
- return &(((crypt_local_t *)frame->local)->hole_conf);
-}
-
-static inline struct avec_config *
-get_data_conf(call_frame_t *frame)
-{
- return &(((crypt_local_t *)frame->local)->data_conf);
-}
-
-static inline int32_t
-get_atom_bits(struct object_cipher_info *object)
-{
- return object->o_block_bits;
-}
-
-static inline int32_t
-get_atom_size(struct object_cipher_info *object)
-{
- return 1 << get_atom_bits(object);
-}
-
-static inline int32_t
-has_head_block(struct avec_config *conf)
-{
- return conf->off_in_head || (conf->acount == 1 && conf->off_in_tail);
-}
-
-static inline int32_t
-has_tail_block(struct avec_config *conf)
-{
- return conf->off_in_tail && conf->acount > 1;
-}
-
-static inline int32_t
-has_full_blocks(struct avec_config *conf)
-{
- return conf->nr_full_blocks;
-}
-
-static inline int32_t
-should_submit_head_block(struct avec_config *conf)
-{
- return has_head_block(conf) && (conf->cursor == 0);
-}
-
-static inline int32_t
-should_submit_tail_block(struct avec_config *conf)
-{
- return has_tail_block(conf) && (conf->cursor == conf->acount - 1);
-}
-
-static inline int32_t
-should_submit_full_block(struct avec_config *conf)
-{
- uint32_t start = has_head_block(conf) ? 1 : 0;
-
- return has_full_blocks(conf) && conf->cursor >= start &&
- conf->cursor < start + conf->nr_full_blocks;
-}
-
-#if DEBUG_CRYPT
-static inline void
-crypt_check_input_len(size_t len, struct object_cipher_info *object)
-{
- if (object_alg_should_pad(object) &&
- (len & (object_alg_blksize(object) - 1)))
- gf_log("crypt", GF_LOG_DEBUG, "bad input len: %d", (int)len);
-}
-
-static inline void
-check_head_block(struct avec_config *conf)
-{
- if (!has_head_block(conf))
- gf_log("crypt", GF_LOG_DEBUG, "not a head atom");
-}
-
-static inline void
-check_tail_block(struct avec_config *conf)
-{
- if (!has_tail_block(conf))
- gf_log("crypt", GF_LOG_DEBUG, "not a tail atom");
-}
-
-static inline void
-check_full_block(struct avec_config *conf)
-{
- if (!has_full_blocks(conf))
- gf_log("crypt", GF_LOG_DEBUG, "not a full atom");
-}
-
-static inline void
-check_cursor_head(struct avec_config *conf)
-{
- if (!has_head_block(conf))
- gf_log("crypt", GF_LOG_DEBUG, "Illegal call of head atom method");
- else if (conf->cursor != 0)
- gf_log("crypt", GF_LOG_DEBUG, "Cursor (%d) is not at head atom",
- conf->cursor);
-}
-
-static inline void
-check_cursor_full(struct avec_config *conf)
-{
- if (!has_full_blocks(conf))
- gf_log("crypt", GF_LOG_DEBUG, "Illegal call of full atom method");
- if (has_head_block(conf) && (conf->cursor == 0))
- gf_log("crypt", GF_LOG_DEBUG, "Cursor is not at full atom");
-}
-
-/*
- * FIXME: use avec->iov_len to check setup
- */
-static inline int
-data_local_invariant(crypt_local_t *local)
-{
- return 0;
-}
-
-#else
-#define crypt_check_input_len(len, object) noop
-#define check_head_block(conf) noop
-#define check_tail_block(conf) noop
-#define check_full_block(conf) noop
-#define check_cursor_head(conf) noop
-#define check_cursor_full(conf) noop
-
-#endif /* DEBUG_CRYPT */
-
-static inline struct avec_config *
-conf_by_type(call_frame_t *frame, atom_data_type dtype)
-{
- struct avec_config *conf = NULL;
-
- switch (dtype) {
- case HOLE_ATOM:
- conf = get_hole_conf(frame);
- break;
- case DATA_ATOM:
- conf = get_data_conf(frame);
- break;
- default:
- gf_log("crypt", GF_LOG_DEBUG, "bad atom type");
- }
- return conf;
-}
-
-static inline uint32_t
-nr_calls_head(struct avec_config *conf)
-{
- return has_head_block(conf) ? 1 : 0;
-}
-
-static inline uint32_t
-nr_calls_tail(struct avec_config *conf)
-{
- return has_tail_block(conf) ? 1 : 0;
-}
-
-static inline uint32_t
-nr_calls_full(struct avec_config *conf)
-{
- switch (conf->type) {
- case HOLE_ATOM:
- return has_full_blocks(conf);
- case DATA_ATOM:
- return has_full_blocks(conf)
- ? logical_blocks_occupied(0, conf->nr_full_blocks,
- MAX_IOVEC_BITS)
- : 0;
- default:
- gf_log("crypt", GF_LOG_DEBUG, "bad atom data type");
- return 0;
- }
-}
-
-static inline uint32_t
-nr_calls(struct avec_config *conf)
-{
- return nr_calls_head(conf) + nr_calls_tail(conf) + nr_calls_full(conf);
-}
-
-static inline uint32_t
-nr_calls_data(call_frame_t *frame)
-{
- return nr_calls(get_data_conf(frame));
-}
-
-static inline uint32_t
-nr_calls_hole(call_frame_t *frame)
-{
- return nr_calls(get_hole_conf(frame));
-}
-
-static inline void
-get_one_call_nolock(call_frame_t *frame)
-{
- crypt_local_t *local = frame->local;
-
- ++local->nr_calls;
-
- // gf_log("crypt", GF_LOG_DEBUG, "get %d calls", 1);
-}
-
-static inline void
-get_one_call(call_frame_t *frame)
-{
- crypt_local_t *local = frame->local;
-
- LOCK(&local->call_lock);
- get_one_call_nolock(frame);
- UNLOCK(&local->call_lock);
-}
-
-static inline void
-get_nr_calls_nolock(call_frame_t *frame, int32_t nr)
-{
- crypt_local_t *local = frame->local;
-
- local->nr_calls += nr;
-
- // gf_log("crypt", GF_LOG_DEBUG, "get %d calls", nr);
-}
-
-static inline void
-get_nr_calls(call_frame_t *frame, int32_t nr)
-{
- crypt_local_t *local = frame->local;
-
- LOCK(&local->call_lock);
- get_nr_calls_nolock(frame, nr);
- UNLOCK(&local->call_lock);
-}
-
-static inline int
-put_one_call(crypt_local_t *local)
-{
- uint32_t last = 0;
-
- LOCK(&local->call_lock);
- if (--local->nr_calls == 0)
- last = 1;
-
- // gf_log("crypt", GF_LOG_DEBUG, "put %d calls", 1);
-
- UNLOCK(&local->call_lock);
- return last;
-}
-
-static inline int
-is_appended_write(call_frame_t *frame)
-{
- crypt_local_t *local = frame->local;
- struct avec_config *conf = get_data_conf(frame);
-
- return conf->orig_offset + conf->orig_size > local->old_file_size;
-}
-
-static inline int
-is_ordered_mode(call_frame_t *frame)
-{
-#if 0
- crypt_local_t *local = frame->local;
- return local->fop == GF_FOP_FTRUNCATE ||
- (local->fop == GF_FOP_WRITE && is_appended_write(frame));
-#endif
- return 1;
-}
-
-static inline int32_t
-hole_conv_completed(crypt_local_t *local)
-{
- struct avec_config *conf = &local->hole_conf;
- return conf->cursor == conf->acount;
-}
-
-static inline int32_t
-data_write_in_progress(crypt_local_t *local)
-{
- return local->active_setup == DATA_ATOM;
-}
-
-static inline int32_t
-parent_is_crypt_xlator(call_frame_t *frame, xlator_t *this)
-{
- return frame->parent->this == this;
-}
-
-static inline linkop_wind_handler_t
-linkop_wind_dispatch(glusterfs_fop_t fop)
-{
- switch (fop) {
- case GF_FOP_LINK:
- return link_wind;
- case GF_FOP_UNLINK:
- return unlink_wind;
- case GF_FOP_RENAME:
- return rename_wind;
- default:
- gf_log("crypt", GF_LOG_ERROR, "Bad link operation %d", fop);
- return NULL;
- }
-}
-
-static inline linkop_unwind_handler_t
-linkop_unwind_dispatch(glusterfs_fop_t fop)
-{
- switch (fop) {
- case GF_FOP_LINK:
- return link_unwind;
- case GF_FOP_UNLINK:
- return unlink_unwind;
- case GF_FOP_RENAME:
- return rename_unwind;
- default:
- gf_log("crypt", GF_LOG_ERROR, "Bad link operation %d", fop);
- return NULL;
- }
-}
-
-static inline mtd_op_t
-linkop_mtdop_dispatch(glusterfs_fop_t fop)
-{
- switch (fop) {
- case GF_FOP_LINK:
- return MTD_APPEND;
- case GF_FOP_UNLINK:
- return MTD_CUT;
- case GF_FOP_RENAME:
- return MTD_OVERWRITE;
- default:
- gf_log("crypt", GF_LOG_WARNING, "Bad link operation %d", fop);
- return MTD_LAST_OP;
- }
-}
-
-#define CRYPT_STACK_UNWIND(fop, frame, params...) \
- do { \
- crypt_local_t *__local = NULL; \
- if (frame) { \
- __local = frame->local; \
- frame->local = NULL; \
- } \
- STACK_UNWIND_STRICT(fop, frame, params); \
- if (__local) { \
- GF_FREE(__local); \
- } \
- } while (0)
-
-#endif /* __CRYPT_H__ */
-
-/*
- Local variables:
- c-indentation-style: "K&R"
- mode-name: "LC"
- c-basic-offset: 8
- tab-width: 8
- fill-column: 80
- scroll-step: 1
- End:
-*/
diff --git a/xlators/encryption/crypt/src/data.c b/xlators/encryption/crypt/src/data.c
deleted file mode 100644
index 93288b1..0000000
--- a/xlators/encryption/crypt/src/data.c
+++ /dev/null
@@ -1,715 +0,0 @@
-/*
- Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#include <glusterfs/defaults.h>
-#include "crypt-common.h"
-#include "crypt.h"
-
-static void
-set_iv_aes_xts(off_t offset, struct object_cipher_info *object)
-{
- unsigned char *ivec;
-
- ivec = object->u.aes_xts.ivec;
-
- /* convert the tweak into a little-endian byte
- * array (IEEE P1619/D16, May 2007, section 5.1)
- */
-
- *((uint64_t *)ivec) = htole64(offset);
-
- /* ivec is padded with zeroes */
-}
-
-static int32_t
-aes_set_keys_common(unsigned char *raw_key, uint32_t key_size, AES_KEY *keys)
-{
- int32_t ret;
-
- ret = AES_set_encrypt_key(raw_key, key_size, &keys[AES_ENCRYPT]);
- if (ret) {
- gf_log("crypt", GF_LOG_ERROR, "Set encrypt key failed");
- return ret;
- }
- ret = AES_set_decrypt_key(raw_key, key_size, &keys[AES_DECRYPT]);
- if (ret) {
- gf_log("crypt", GF_LOG_ERROR, "Set decrypt key failed");
- return ret;
- }
- return 0;
-}
-
-/*
- * set private cipher info for xts mode
- */
-static int32_t
-set_private_aes_xts(struct crypt_inode_info *info,
- struct master_cipher_info *master)
-{
- int ret;
- struct object_cipher_info *object = get_object_cinfo(info);
- unsigned char *data_key;
- uint32_t subkey_size;
-
- /* init tweak value */
- memset(object->u.aes_xts.ivec, 0, 16);
-
- data_key = GF_CALLOC(1, object->o_dkey_size, gf_crypt_mt_key);
- if (!data_key)
- return ENOMEM;
-
- /*
- * retrieve data keying material
- */
- ret = get_data_file_key(info, master, object->o_dkey_size, data_key);
- if (ret) {
- gf_log("crypt", GF_LOG_ERROR, "Failed to retrieve data key");
- GF_FREE(data_key);
- return ret;
- }
- /*
- * parse compound xts key
- */
- subkey_size = object->o_dkey_size >> 4; /* (xts-key-size-in-bytes / 2) */
- /*
- * install key for data encryption
- */
- ret = aes_set_keys_common(data_key, subkey_size << 3,
- object->u.aes_xts.dkey);
- if (ret) {
- GF_FREE(data_key);
- return ret;
- }
- /*
- * set up key used to encrypt tweaks
- */
- ret = AES_set_encrypt_key(data_key + subkey_size, object->o_dkey_size / 2,
- &object->u.aes_xts.tkey);
- if (ret < 0)
- gf_log("crypt", GF_LOG_ERROR, "Set tweak key failed");
-
- GF_FREE(data_key);
- return ret;
-}
-
-static int32_t
-aes_xts_init(void)
-{
- cassert(AES_BLOCK_SIZE == (1 << AES_BLOCK_BITS));
- return 0;
-}
-
-static int32_t
-check_key_aes_xts(uint32_t keysize)
-{
- switch (keysize) {
- case 256:
- case 512:
- return 0;
- default:
- break;
- }
- return -1;
-}
-
-static int32_t
-encrypt_aes_xts(const unsigned char *from, unsigned char *to, size_t length,
- off_t offset, const int enc, struct object_cipher_info *object)
-{
- XTS128_CONTEXT ctx;
- if (enc) {
- ctx.key1 = &object->u.aes_xts.dkey[AES_ENCRYPT];
- ctx.block1 = (block128_f)AES_encrypt;
- } else {
- ctx.key1 = &object->u.aes_xts.dkey[AES_DECRYPT];
- ctx.block1 = (block128_f)AES_decrypt;
- }
- ctx.key2 = &object->u.aes_xts.tkey;
- ctx.block2 = (block128_f)AES_encrypt;
-
- return CRYPTO_xts128_encrypt(&ctx, object->u.aes_xts.ivec, from, to, length,
- enc);
-}
-
-/*
- * Cipher input chunk @from of length @len;
- * @to: result of cipher transform;
- * @off: offset in a file (must be cblock-aligned);
- */
-static void
-cipher_data(struct object_cipher_info *object, char *from, char *to, off_t off,
- size_t len, const int enc)
-{
- crypt_check_input_len(len, object);
-
-#if TRIVIAL_TFM && DEBUG_CRYPT
- return;
-#endif
- data_cipher_algs[object->o_alg][object->o_mode].set_iv(off, object);
- data_cipher_algs[object->o_alg][object->o_mode].encrypt(
- (const unsigned char *)from, (unsigned char *)to, len, off, enc,
- object);
-}
-
-#define MAX_CIPHER_CHUNK (1 << 30)
-
-/*
- * Do cipher (encryption/decryption) transform of a
- * continuous region of memory.
- *
- * @len: a number of bytes to transform;
- * @buf: data to transform;
- * @off: offset in a file, should be block-aligned
- * for atomic cipher modes and ksize-aligned
- * for other modes).
- * @dir: direction of transform (encrypt/decrypt).
- */
-static void
-cipher_region(struct object_cipher_info *object, char *from, char *to,
- off_t off, size_t len, int dir)
-{
- while (len > 0) {
- size_t to_cipher;
-
- to_cipher = len;
- if (to_cipher > MAX_CIPHER_CHUNK)
- to_cipher = MAX_CIPHER_CHUNK;
-
- /* this will reset IV */
- cipher_data(object, from, to, off, to_cipher, dir);
- from += to_cipher;
- to += to_cipher;
- off += to_cipher;
- len -= to_cipher;
- }
-}
-
-/*
- * Do cipher transform (encryption/decryption) of
- * plaintext/ciphertext represented by @vec.
- *
- * Pre-conditions: @vec represents a continuous piece
- * of data in a file at offset @off to be ciphered
- * (encrypted/decrypted).
- * @count is the number of vec's components. All the
- * components must be block-aligned, the caller is
- * responsible for this. @dir is "direction" of
- * transform (encrypt/decrypt).
- */
-static void
-cipher_aligned_iov(struct object_cipher_info *object, struct iovec *vec,
- int count, off_t off, int32_t dir)
-{
- int i;
- int len = 0;
-
- for (i = 0; i < count; i++) {
- cipher_region(object, vec[i].iov_base, vec[i].iov_base, off + len,
- vec[i].iov_len, dir);
- len += vec[i].iov_len;
- }
-}
-
-void
-encrypt_aligned_iov(struct object_cipher_info *object, struct iovec *vec,
- int count, off_t off)
-{
- cipher_aligned_iov(object, vec, count, off, 1);
-}
-
-void
-decrypt_aligned_iov(struct object_cipher_info *object, struct iovec *vec,
- int count, off_t off)
-{
- cipher_aligned_iov(object, vec, count, off, 0);
-}
-
-#if DEBUG_CRYPT
-static void
-compound_stream(struct iovec *vec, int count, char *buf, off_t skip)
-{
- int i;
- int off = 0;
- for (i = 0; i < count; i++) {
- memcpy(buf + off, vec[i].iov_base + skip, vec[i].iov_len - skip);
-
- off += (vec[i].iov_len - skip);
- skip = 0;
- }
-}
-
-static void
-check_iovecs(struct iovec *vec, int cnt, struct iovec *avec, int acnt,
- uint32_t off_in_head)
-{
- char *s1, *s2;
- uint32_t size, asize;
-
- size = iov_length(vec, cnt);
- asize = iov_length(avec, acnt) - off_in_head;
- if (size != asize) {
- gf_log("crypt", GF_LOG_DEBUG, "size %d is not eq asize %d", size,
- asize);
- return;
- }
- s1 = GF_CALLOC(1, size, gf_crypt_mt_data);
- if (!s1) {
- gf_log("crypt", GF_LOG_DEBUG, "Can not allocate stream ");
- return;
- }
- s2 = GF_CALLOC(1, asize, gf_crypt_mt_data);
- if (!s2) {
- GF_FREE(s1);
- gf_log("crypt", GF_LOG_DEBUG, "Can not allocate stream ");
- return;
- }
- compound_stream(vec, cnt, s1, 0);
- compound_stream(avec, acnt, s2, off_in_head);
- if (memcmp(s1, s2, size))
- gf_log("crypt", GF_LOG_DEBUG, "chunks of different data");
- GF_FREE(s1);
- GF_FREE(s2);
-}
-
-#else
-#define check_iovecs(vec, count, avec, avecn, off) noop
-#endif /* DEBUG_CRYPT */
-
-static char *
-data_alloc_block(xlator_t *this, crypt_local_t *local, int32_t block_size)
-{
- struct iobuf *iobuf = NULL;
-
- iobuf = iobuf_get2(this->ctx->iobuf_pool, block_size);
- if (!iobuf) {
- gf_log("crypt", GF_LOG_ERROR, "Failed to get iobuf");
- return NULL;
- }
- if (!local->iobref_data) {
- local->iobref_data = iobref_new();
- if (!local->iobref_data) {
- gf_log("crypt", GF_LOG_ERROR, "Failed to get iobref");
- iobuf_unref(iobuf);
- return NULL;
- }
- }
- iobref_add(local->iobref_data, iobuf);
- return iobuf->ptr;
-}
-
-/*
- * Compound @avec, which represent the same data
- * chunk as @vec, but has aligned components of
- * specified block size. Alloc blocks, if needed.
- * In particular, incomplete head and tail blocks
- * must be allocated.
- * Put number of allocated blocks to @num_blocks.
- *
- * Example:
- *
- * input: data chunk represented by 4 components
- * [AB],[BC],[CD],[DE];
- * output: 5 logical blocks (0, 1, 2, 3, 4).
- *
- * A B C D E
- * *-----*+------*-+---*----+--------+-*
- * | || | | | | | |
- * *-+-----+*------+-*---+----*--------*-+------*
- * 0 1 2 3 4
- *
- * 0 - incomplete compound (head);
- * 1, 2 - full compound;
- * 3 - full non-compound (the case of reuse);
- * 4 - incomplete non-compound (tail).
- */
-int32_t
-align_iov_by_atoms(xlator_t *this, crypt_local_t *local,
- struct object_cipher_info *object,
- struct iovec *vec /* input vector */,
- int32_t count /* number of vec components */,
- struct iovec *avec /* aligned vector */,
- char **blocks /* pool of blocks */,
- uint32_t *blocks_allocated, struct avec_config *conf)
-{
- int vecn = 0; /* number of the current component in vec */
- int avecn = 0; /* number of the current component in avec */
- off_t vec_off = 0; /* offset in the current vec component,
- * i.e. the number of bytes have already
- * been copied */
- int32_t block_size = get_atom_size(object);
- size_t to_process; /* number of vec's bytes to copy and(or) re-use */
- int32_t off_in_head = conf->off_in_head;
-
- to_process = iov_length(vec, count);
-
- while (to_process > 0) {
- if (off_in_head || vec[vecn].iov_len - vec_off < block_size) {
- /*
- * less than block_size:
- * the case of incomplete (head or tail),
- * or compound block
- */
- size_t copied = 0;
- /*
- * populate the pool with a new block
- */
- blocks[*blocks_allocated] = data_alloc_block(this, local,
- block_size);
- if (!blocks[*blocks_allocated])
- return -ENOMEM;
- memset(blocks[*blocks_allocated], 0, off_in_head);
- /*
- * fill the block with vec components
- */
- do {
- size_t to_copy;
-
- to_copy = vec[vecn].iov_len - vec_off;
- if (to_copy > block_size - off_in_head)
- to_copy = block_size - off_in_head;
-
- memcpy(blocks[*blocks_allocated] + off_in_head + copied,
- vec[vecn].iov_base + vec_off, to_copy);
-
- copied += to_copy;
- to_process -= to_copy;
-
- vec_off += to_copy;
- if (vec_off == vec[vecn].iov_len) {
- /* finished with this vecn */
- vec_off = 0;
- vecn++;
- }
- } while (copied < (block_size - off_in_head) && to_process > 0);
- /*
- * update avec
- */
- avec[avecn].iov_len = off_in_head + copied;
- avec[avecn].iov_base = blocks[*blocks_allocated];
-
- (*blocks_allocated)++;
- off_in_head = 0;
- } else {
- /*
- * the rest of the current vec component
- * is not less than block_size, so reuse
- * the memory buffer of the component.
- */
- size_t to_reuse;
- to_reuse = (to_process > block_size ? block_size : to_process);
- avec[avecn].iov_len = to_reuse;
- avec[avecn].iov_base = vec[vecn].iov_base + vec_off;
-
- vec_off += to_reuse;
- if (vec_off == vec[vecn].iov_len) {
- /* finished with this vecn */
- vec_off = 0;
- vecn++;
- }
- to_process -= to_reuse;
- }
- avecn++;
- }
- check_iovecs(vec, count, avec, avecn, conf->off_in_head);
- return 0;
-}
-
-/*
- * allocate and setup aligned vector for data submission
- * Pre-condition: @conf is set.
- */
-int32_t
-set_config_avec_data(xlator_t *this, crypt_local_t *local,
- struct avec_config *conf,
- struct object_cipher_info *object, struct iovec *vec,
- int32_t vec_count)
-{
- int32_t ret = ENOMEM;
- struct iovec *avec;
- char **pool;
- uint32_t blocks_in_pool = 0;
-
- conf->type = DATA_ATOM;
-
- avec = GF_CALLOC(conf->acount, sizeof(*avec), gf_crypt_mt_iovec);
- if (!avec)
- return ret;
- pool = GF_CALLOC(conf->acount, sizeof(*pool), gf_crypt_mt_char);
- if (!pool) {
- GF_FREE(avec);
- return ret;
- }
- if (!vec) {
- /*
- * degenerated case: no data
- */
- pool[0] = data_alloc_block(this, local, get_atom_size(object));
- if (!pool[0])
- goto free;
- blocks_in_pool = 1;
- avec->iov_base = pool[0];
- avec->iov_len = conf->off_in_tail;
- } else {
- ret = align_iov_by_atoms(this, local, object, vec, vec_count, avec,
- pool, &blocks_in_pool, conf);
- if (ret)
- goto free;
- }
- conf->avec = avec;
- conf->pool = pool;
- conf->blocks_in_pool = blocks_in_pool;
- return 0;
-free:
- GF_FREE(avec);
- GF_FREE(pool);
- return ret;
-}
-
-/*
- * allocate and setup aligned vector for hole submission
- */
-int32_t
-set_config_avec_hole(xlator_t *this, crypt_local_t *local,
- struct avec_config *conf,
- struct object_cipher_info *object, glusterfs_fop_t fop)
-{
- uint32_t i, idx;
- struct iovec *avec;
- char **pool;
- uint32_t num_blocks;
- uint32_t blocks_in_pool = 0;
-
- conf->type = HOLE_ATOM;
-
- num_blocks = conf->acount -
- (conf->nr_full_blocks ? conf->nr_full_blocks - 1 : 0);
-
- switch (fop) {
- case GF_FOP_WRITE:
- /*
- * hole goes before data
- */
- if (num_blocks == 1 && conf->off_in_tail != 0)
- /*
- * we won't submit a hole which fits into
- * a data atom: this part of hole will be
- * submitted with data write
- */
- return 0;
- break;
- case GF_FOP_FTRUNCATE:
- /*
- * expanding truncate, hole goes after data,
- * and will be submitted in any case.
- */
- break;
- default:
- gf_log("crypt", GF_LOG_WARNING, "bad file operation %d", fop);
- return 0;
- }
- avec = GF_CALLOC(num_blocks, sizeof(*avec), gf_crypt_mt_iovec);
- if (!avec)
- return ENOMEM;
- pool = GF_CALLOC(num_blocks, sizeof(*pool), gf_crypt_mt_char);
- if (!pool) {
- GF_FREE(avec);
- return ENOMEM;
- }
- for (i = 0; i < num_blocks; i++) {
- pool[i] = data_alloc_block(this, local, get_atom_size(object));
- if (pool[i] == NULL)
- goto free;
- blocks_in_pool++;
- }
- if (has_head_block(conf)) {
- /* set head block */
- idx = 0;
- avec[idx].iov_base = pool[idx];
- avec[idx].iov_len = get_atom_size(object);
- memset(avec[idx].iov_base + conf->off_in_head, 0,
- get_atom_size(object) - conf->off_in_head);
- }
- if (has_tail_block(conf)) {
- /* set tail block */
- idx = num_blocks - 1;
- avec[idx].iov_base = pool[idx];
- avec[idx].iov_len = get_atom_size(object);
- memset(avec[idx].iov_base, 0, conf->off_in_tail);
- }
- if (has_full_blocks(conf)) {
- /* set full block */
- idx = conf->off_in_head ? 1 : 0;
- avec[idx].iov_base = pool[idx];
- avec[idx].iov_len = get_atom_size(object);
- /*
- * since we re-use the buffer,
- * zeroes will be set every time
- * before encryption, see submit_full()
- */
- }
- conf->avec = avec;
- conf->pool = pool;
- conf->blocks_in_pool = blocks_in_pool;
- return 0;
-free:
- GF_FREE(avec);
- GF_FREE(pool);
- return ENOMEM;
-}
-
-/* A helper for setting up config of partial atoms (which
- * participate in read-modify-write sequence).
- *
- * Calculate and setup precise amount of "extra-bytes"
- * that should be uptodated at the end of partial (not
- * necessarily tail!) block.
- *
- * Pre-condition: local->old_file_size is valid!
- * @conf contains setup, which is enough for correct calculation
- * of has_tail_block(), ->get_offset().
- */
-void
-set_gap_at_end(call_frame_t *frame, struct object_cipher_info *object,
- struct avec_config *conf, atom_data_type dtype)
-{
- uint32_t to_block;
- crypt_local_t *local = frame->local;
- uint64_t old_file_size = local->old_file_size;
- struct rmw_atom *partial = atom_by_types(
- dtype, has_tail_block(conf) ? TAIL_ATOM : HEAD_ATOM);
-
- if (old_file_size <= partial->offset_at(frame, object))
- to_block = 0;
- else {
- to_block = old_file_size - partial->offset_at(frame, object);
- if (to_block > get_atom_size(object))
- to_block = get_atom_size(object);
- }
- if (to_block > conf->off_in_tail)
- conf->gap_in_tail = to_block - conf->off_in_tail;
- else
- /*
- * nothing to uptodate
- */
- conf->gap_in_tail = 0;
-}
-
-/*
- * fill struct avec_config with offsets layouts
- */
-void
-set_config_offsets(call_frame_t *frame, xlator_t *this, uint64_t offset,
- uint64_t count, atom_data_type dtype, int32_t set_gap)
-{
- crypt_local_t *local;
- struct object_cipher_info *object;
- struct avec_config *conf;
- uint32_t resid;
-
- uint32_t atom_size;
- uint32_t atom_bits;
-
- size_t orig_size;
- off_t orig_offset;
- size_t expanded_size;
- off_t aligned_offset;
-
- uint32_t off_in_head = 0;
- uint32_t off_in_tail = 0;
- uint32_t nr_full_blocks;
- int32_t size_full_blocks;
-
- uint32_t acount; /* number of aligned components to write.
- * The same as number of occupied logical
- * blocks (atoms)
- */
- local = frame->local;
- object = &local->info->cinfo;
- conf = (dtype == DATA_ATOM ? get_data_conf(frame) : get_hole_conf(frame));
-
- orig_offset = offset;
- orig_size = count;
-
- atom_size = get_atom_size(object);
- atom_bits = get_atom_bits(object);
-
- /*
- * Round-down the start,
- * round-up the end.
- */
- resid = offset & (uint64_t)(atom_size - 1);
-
- if (resid)
- off_in_head = resid;
- aligned_offset = offset - off_in_head;
- expanded_size = orig_size + off_in_head;
-
- /* calculate tail,
- expand size forward */
- resid = (offset + orig_size) & (uint64_t)(atom_size - 1);
-
- if (resid) {
- off_in_tail = resid;
- expanded_size += (atom_size - off_in_tail);
- }
- /*
- * calculate number of occupied blocks
- */
- acount = expanded_size >> atom_bits;
- /*
- * calculate number of full blocks
- */
- size_full_blocks = expanded_size;
- if (off_in_head)
- size_full_blocks -= atom_size;
- if (off_in_tail && size_full_blocks > 0)
- size_full_blocks -= atom_size;
- nr_full_blocks = size_full_blocks >> atom_bits;
-
- conf->atom_size = atom_size;
- conf->orig_size = orig_size;
- conf->orig_offset = orig_offset;
- conf->expanded_size = expanded_size;
- conf->aligned_offset = aligned_offset;
-
- conf->off_in_head = off_in_head;
- conf->off_in_tail = off_in_tail;
- conf->nr_full_blocks = nr_full_blocks;
- conf->acount = acount;
- /*
- * Finally, calculate precise amount of
- * "extra-bytes" that should be uptodated
- * at the end.
- * Only if RMW is expected.
- */
- if (off_in_tail && set_gap)
- set_gap_at_end(frame, object, conf, dtype);
-}
-
-struct data_cipher_alg data_cipher_algs[LAST_CIPHER_ALG][LAST_CIPHER_MODE] = {
- [AES_CIPHER_ALG][XTS_CIPHER_MODE] = {.atomic = _gf_true,
- .should_pad = _gf_true,
- .blkbits = AES_BLOCK_BITS,
- .init = aes_xts_init,
- .set_private = set_private_aes_xts,
- .check_key = check_key_aes_xts,
- .set_iv = set_iv_aes_xts,
- .encrypt = encrypt_aes_xts}};
-
-/*
- Local variables:
- c-indentation-style: "K&R"
- mode-name: "LC"
- c-basic-offset: 8
- tab-width: 8
- fill-column: 80
- scroll-step: 1
- End:
-*/
diff --git a/xlators/encryption/crypt/src/keys.c b/xlators/encryption/crypt/src/keys.c
deleted file mode 100644
index 92a4d47..0000000
--- a/xlators/encryption/crypt/src/keys.c
+++ /dev/null
@@ -1,284 +0,0 @@
-/*
- Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#include <glusterfs/defaults.h>
-#include "crypt-common.h"
-#include "crypt.h"
-
-/* Key hierarchy
-
- +----------------+
- | MASTER_VOL_KEY |
- +-------+--------+
- |
- |
- +----------------+----------------+
- | | |
- | | |
- +-------+------+ +-------+-------+ +------+--------+
- | NMTD_VOL_KEY | | EMTD_FILE_KEY | | DATA_FILE_KEY |
- +-------+------+ +---------------+ +---------------+
- |
- |
- +-------+-------+
- | NMTD_LINK_KEY |
- +---------------+
-
- */
-
-#if DEBUG_CRYPT
-static void
-check_prf_iters(uint32_t num_iters)
-{
- if (num_iters == 0)
- gf_log("crypt", GF_LOG_DEBUG, "bad number of prf iterations : %d",
- num_iters);
-}
-#else
-#define check_prf_iters(num_iters) noop
-#endif /* DEBUG_CRYPT */
-
-unsigned char crypt_fake_oid[16] = {0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0};
-
-/*
- * derive key in the counter mode using
- * sha256-based HMAC as PRF, see
- * NIST Special Publication 800-108, 5.1)
- */
-
-#define PRF_OUTPUT_SIZE SHA256_DIGEST_LENGTH
-
-static int32_t
-kderive_init(struct kderive_context *ctx,
- const unsigned char *pkey, /* parent key */
- uint32_t pkey_size, /* parent key size */
- const unsigned char *idctx, /* id-context */
- uint32_t idctx_size, crypt_key_type type /* type of child key */)
-{
- unsigned char *pos;
- uint32_t llen = strlen(crypt_keys[type].label);
- /*
- * Compoud the fixed input data for KDF:
- * [i]_2 || Label || 0x00 || Id-Context || [L]_2),
- * NIST SP 800-108, 5.1
- */
- ctx->fid_len = sizeof(uint32_t) + llen + 1 + idctx_size + sizeof(uint32_t);
-
- ctx->fid = GF_CALLOC(ctx->fid_len, 1, gf_crypt_mt_key);
- if (!ctx->fid)
- return ENOMEM;
- ctx->out_len = round_up(crypt_keys[type].len >> 3, PRF_OUTPUT_SIZE);
- ctx->out = GF_CALLOC(ctx->out_len, 1, gf_crypt_mt_key);
- if (!ctx->out) {
- GF_FREE(ctx->fid);
- return ENOMEM;
- }
- ctx->pkey = pkey;
- ctx->pkey_len = pkey_size;
- ctx->ckey_len = crypt_keys[type].len;
-
- pos = ctx->fid;
-
- /* counter will be set up in kderive_rfn() */
- pos += sizeof(uint32_t);
-
- memcpy(pos, crypt_keys[type].label, llen);
- pos += llen;
-
- /* set up zero octet */
- *pos = 0;
- pos += 1;
-
- memcpy(pos, idctx, idctx_size);
- pos += idctx_size;
-
- *((uint32_t *)pos) = htobe32(ctx->ckey_len);
-
- return 0;
-}
-
-static void
-kderive_update(struct kderive_context *ctx)
-{
- uint32_t i;
-#if (OPENSSL_VERSION_NUMBER < 0x1010002f)
- HMAC_CTX hctx;
-#endif
- HMAC_CTX *phctx = NULL;
- unsigned char *pos = ctx->out;
- uint32_t *p_iter = (uint32_t *)ctx->fid;
- uint32_t num_iters = ctx->out_len / PRF_OUTPUT_SIZE;
-
- check_prf_iters(num_iters);
-
-#if (OPENSSL_VERSION_NUMBER < 0x1010002f)
- HMAC_CTX_init(&hctx);
- phctx = &hctx;
-#else
- phctx = HMAC_CTX_new();
- /* I guess we presume it was successful? */
-#endif
- for (i = 0; i < num_iters; i++) {
- /*
- * update the iteration number in the fid
- */
- *p_iter = htobe32(i);
- HMAC_Init_ex(phctx, ctx->pkey, ctx->pkey_len >> 3, EVP_sha256(), NULL);
- HMAC_Update(phctx, ctx->fid, ctx->fid_len);
- HMAC_Final(phctx, pos, NULL);
-
- pos += PRF_OUTPUT_SIZE;
- }
-#if (OPENSSL_VERSION_NUMBER < 0x1010002f)
- HMAC_CTX_cleanup(phctx);
-#else
- HMAC_CTX_free(phctx);
-#endif
-}
-
-static void
-kderive_final(struct kderive_context *ctx, unsigned char *child)
-{
- memcpy(child, ctx->out, ctx->ckey_len >> 3);
- GF_FREE(ctx->fid);
- GF_FREE(ctx->out);
- memset(ctx, 0, sizeof(*ctx));
-}
-
-/*
- * derive per-volume key for object ids aithentication
- */
-int32_t
-get_nmtd_vol_key(struct master_cipher_info *master)
-{
- int32_t ret;
- struct kderive_context ctx;
-
- ret = kderive_init(&ctx, master->m_key, master_key_size(), crypt_fake_oid,
- sizeof(uuid_t), NMTD_VOL_KEY);
- if (ret)
- return ret;
- kderive_update(&ctx);
- kderive_final(&ctx, master->m_nmtd_key);
- return 0;
-}
-
-/*
- * derive per-link key for aithentication of non-encrypted
- * meta-data (nmtd)
- */
-int32_t
-get_nmtd_link_key(loc_t *loc, struct master_cipher_info *master,
- unsigned char *result)
-{
- int32_t ret;
- struct kderive_context ctx;
-
- ret = kderive_init(&ctx, master->m_nmtd_key, nmtd_vol_key_size(),
- (const unsigned char *)loc->path, strlen(loc->path),
- NMTD_LINK_KEY);
- if (ret)
- return ret;
- kderive_update(&ctx);
- kderive_final(&ctx, result);
- return 0;
-}
-
-/*
- * derive per-file key for encryption and authentication
- * of encrypted part of metadata (emtd)
- */
-int32_t
-get_emtd_file_key(struct crypt_inode_info *info,
- struct master_cipher_info *master, unsigned char *result)
-{
- int32_t ret;
- struct kderive_context ctx;
-
- ret = kderive_init(&ctx, master->m_key, master_key_size(), info->oid,
- sizeof(uuid_t), EMTD_FILE_KEY);
- if (ret)
- return ret;
- kderive_update(&ctx);
- kderive_final(&ctx, result);
- return 0;
-}
-
-static int32_t
-data_key_type_by_size(uint32_t keysize, crypt_key_type *type)
-{
- int32_t ret = 0;
- switch (keysize) {
- case 256:
- *type = DATA_FILE_KEY_256;
- break;
- case 512:
- *type = DATA_FILE_KEY_512;
- break;
- default:
- gf_log("crypt", GF_LOG_ERROR, "Unsupported data key size %d",
- keysize);
- ret = ENOTSUP;
- break;
- }
- return ret;
-}
-
-/*
- * derive per-file key for data encryption
- */
-int32_t
-get_data_file_key(struct crypt_inode_info *info,
- struct master_cipher_info *master, uint32_t keysize,
- unsigned char *key)
-{
- int32_t ret;
- struct kderive_context ctx;
- crypt_key_type type;
-
- ret = data_key_type_by_size(keysize, &type);
- if (ret)
- return ret;
- ret = kderive_init(&ctx, master->m_key, master_key_size(), info->oid,
- sizeof(uuid_t), type);
- if (ret)
- return ret;
- kderive_update(&ctx);
- kderive_final(&ctx, key);
- return 0;
-}
-
-/*
- * NOTE: Don't change existing keys: it will break compatibility;
- */
-struct crypt_key crypt_keys[LAST_KEY_TYPE] = {
- [MASTER_VOL_KEY] =
- {
- .len = MASTER_VOL_KEY_SIZE << 3,
- .label = "volume-master",
- },
- [NMTD_VOL_KEY] = {.len = NMTD_VOL_KEY_SIZE << 3,
- .label = "volume-nmtd-key-generation"},
- [NMTD_LINK_KEY] = {.len = 128, .label = "link-nmtd-authentication"},
- [EMTD_FILE_KEY] = {.len = 128, .label = "file-emtd-encryption-and-auth"},
- [DATA_FILE_KEY_256] = {.len = 256, .label = "file-data-encryption-256"},
- [DATA_FILE_KEY_512] = {.len = 512, .label = "file-data-encryption-512"}};
-
-/*
- Local variables:
- c-indentation-style: "K&R"
- mode-name: "LC"
- c-basic-offset: 8
- tab-width: 8
- fill-column: 80
- scroll-step: 1
- End:
-*/
diff --git a/xlators/encryption/crypt/src/metadata.c b/xlators/encryption/crypt/src/metadata.c
deleted file mode 100644
index 120ae62..0000000
--- a/xlators/encryption/crypt/src/metadata.c
+++ /dev/null
@@ -1,575 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#include <glusterfs/defaults.h>
-#include "crypt-common.h"
-#include "crypt.h"
-#include "metadata.h"
-
-int32_t
-alloc_format(crypt_local_t *local, size_t size)
-{
- if (size > 0) {
- local->format = GF_CALLOC(1, size, gf_crypt_mt_mtd);
- if (!local->format)
- return ENOMEM;
- }
- local->format_size = size;
- return 0;
-}
-
-int32_t
-alloc_format_create(crypt_local_t *local)
-{
- return alloc_format(local, new_format_size());
-}
-
-void
-free_format(crypt_local_t *local)
-{
- GF_FREE(local->format);
-}
-
-/*
- * Check compatibility with extracted metadata
- */
-static int32_t
-check_file_metadata(struct crypt_inode_info *info)
-{
- struct object_cipher_info *object = &info->cinfo;
-
- if (info->nr_minor != CRYPT_XLATOR_ID) {
- gf_log("crypt", GF_LOG_WARNING, "unsupported minor subversion %d",
- info->nr_minor);
- return EINVAL;
- }
- if (object->o_alg > LAST_CIPHER_ALG) {
- gf_log("crypt", GF_LOG_WARNING, "unsupported cipher algorithm %d",
- object->o_alg);
- return EINVAL;
- }
- if (object->o_mode > LAST_CIPHER_MODE) {
- gf_log("crypt", GF_LOG_WARNING, "unsupported cipher mode %d",
- object->o_mode);
- return EINVAL;
- }
- if (object->o_block_bits < CRYPT_MIN_BLOCK_BITS ||
- object->o_block_bits > CRYPT_MAX_BLOCK_BITS) {
- gf_log("crypt", GF_LOG_WARNING, "unsupported block bits %d",
- object->o_block_bits);
- return EINVAL;
- }
- /* TBD: check data key size */
- return 0;
-}
-
-static size_t
-format_size_v1(mtd_op_t op, size_t old_size)
-{
- switch (op) {
- case MTD_CREATE:
- return sizeof(struct mtd_format_v1);
- case MTD_OVERWRITE:
- return old_size;
- case MTD_APPEND:
- return old_size + NMTD_8_MAC_SIZE;
- case MTD_CUT:
- if (old_size > sizeof(struct mtd_format_v1))
- return old_size - NMTD_8_MAC_SIZE;
- else
- return 0;
- default:
- gf_log("crypt", GF_LOG_WARNING, "Bad mtd operation");
- return 0;
- }
-}
-
-/*
- * Calculate size of the updated format string.
- * Returned zero means that we don't need to update the format string.
- */
-size_t
-format_size(mtd_op_t op, size_t old_size)
-{
- size_t versioned;
-
- versioned = mtd_loaders[current_mtd_loader()].format_size(
- op, old_size - sizeof(struct crypt_format));
- if (versioned != 0)
- return versioned + sizeof(struct crypt_format);
- return 0;
-}
-
-/*
- * size of the format string of newly created file (nr_links = 1)
- */
-size_t
-new_format_size(void)
-{
- return format_size(MTD_CREATE, 0);
-}
-
-/*
- * Calculate per-link MAC by pathname
- */
-static int32_t
-calc_link_mac_v1(struct mtd_format_v1 *fmt, loc_t *loc, unsigned char *result,
- struct crypt_inode_info *info,
- struct master_cipher_info *master)
-{
- int32_t ret;
- unsigned char nmtd_link_key[16];
- CMAC_CTX *cctx;
- size_t len;
-
- ret = get_nmtd_link_key(loc, master, nmtd_link_key);
- if (ret) {
- gf_log("crypt", GF_LOG_ERROR, "Can not get nmtd link key");
- return -1;
- }
- cctx = CMAC_CTX_new();
- if (!cctx) {
- gf_log("crypt", GF_LOG_ERROR, "CMAC_CTX_new failed");
- return -1;
- }
- ret = CMAC_Init(cctx, nmtd_link_key, sizeof(nmtd_link_key),
- EVP_aes_128_cbc(), 0);
- if (!ret) {
- gf_log("crypt", GF_LOG_ERROR, "CMAC_Init failed");
- CMAC_CTX_free(cctx);
- return -1;
- }
- ret = CMAC_Update(cctx, get_NMTD_V1(info), SIZE_OF_NMTD_V1);
- if (!ret) {
- gf_log("crypt", GF_LOG_ERROR, "CMAC_Update failed");
- CMAC_CTX_free(cctx);
- return -1;
- }
- ret = CMAC_Final(cctx, result, &len);
- CMAC_CTX_free(cctx);
- if (!ret) {
- gf_log("crypt", GF_LOG_ERROR, "CMAC_Final failed");
- return -1;
- }
- return 0;
-}
-
-/*
- * Create per-link MAC of index @idx by pathname
- */
-static int32_t
-create_link_mac_v1(struct mtd_format_v1 *fmt, uint32_t idx, loc_t *loc,
- struct crypt_inode_info *info,
- struct master_cipher_info *master)
-{
- int32_t ret;
- unsigned char *mac;
- unsigned char cmac[16];
-
- mac = get_NMTD_V1_MAC(fmt) + idx * SIZE_OF_NMTD_V1_MAC;
-
- ret = calc_link_mac_v1(fmt, loc, cmac, info, master);
- if (ret)
- return -1;
- memcpy(mac, cmac, SIZE_OF_NMTD_V1_MAC);
- return 0;
-}
-
-static int32_t
-create_format_v1(unsigned char *wire, loc_t *loc, struct crypt_inode_info *info,
- struct master_cipher_info *master)
-{
- int32_t ret;
- struct mtd_format_v1 *fmt;
- unsigned char mtd_key[16];
- AES_KEY EMTD_KEY;
- unsigned char nmtd_link_key[16];
- uint32_t ad;
- GCM128_CONTEXT *gctx;
-
- fmt = (struct mtd_format_v1 *)wire;
-
- fmt->minor_id = info->nr_minor;
- fmt->alg_id = AES_CIPHER_ALG;
- fmt->dkey_factor = master->m_dkey_size >> KEY_FACTOR_BITS;
- fmt->block_bits = master->m_block_bits;
- fmt->mode_id = master->m_mode;
- /*
- * retrieve keys for the parts of metadata
- */
- ret = get_emtd_file_key(info, master, mtd_key);
- if (ret)
- return ret;
- ret = get_nmtd_link_key(loc, master, nmtd_link_key);
- if (ret)
- return ret;
-
- AES_set_encrypt_key(mtd_key, sizeof(mtd_key) * 8, &EMTD_KEY);
-
- gctx = CRYPTO_gcm128_new(&EMTD_KEY, (block128_f)AES_encrypt);
-
- /* TBD: Check return values */
-
- CRYPTO_gcm128_setiv(gctx, info->oid, sizeof(uuid_t));
-
- ad = htole32(MTD_LOADER_V1);
- ret = CRYPTO_gcm128_aad(gctx, (const unsigned char *)&ad, sizeof(ad));
- if (ret) {
- gf_log("crypt", GF_LOG_ERROR, " CRYPTO_gcm128_aad failed");
- CRYPTO_gcm128_release(gctx);
- return ret;
- }
- ret = CRYPTO_gcm128_encrypt(gctx, get_EMTD_V1(fmt), get_EMTD_V1(fmt),
- SIZE_OF_EMTD_V1);
- if (ret) {
- gf_log("crypt", GF_LOG_ERROR, " CRYPTO_gcm128_encrypt failed");
- CRYPTO_gcm128_release(gctx);
- return ret;
- }
- /*
- * set MAC of encrypted part of metadata
- */
- CRYPTO_gcm128_tag(gctx, get_EMTD_V1_MAC(fmt), SIZE_OF_EMTD_V1_MAC);
- CRYPTO_gcm128_release(gctx);
- /*
- * set the first MAC of non-encrypted part of metadata
- */
- return create_link_mac_v1(fmt, 0, loc, info, master);
-}
-
-/*
- * Called by fops:
- * ->create();
- * ->link();
- *
- * Pack common and version-specific parts of file's metadata
- * Pre-conditions: @info contains valid object-id.
- */
-int32_t
-create_format(unsigned char *wire, loc_t *loc, struct crypt_inode_info *info,
- struct master_cipher_info *master)
-{
- struct crypt_format *fmt = (struct crypt_format *)wire;
-
- fmt->loader_id = current_mtd_loader();
-
- wire += sizeof(struct crypt_format);
- return mtd_loaders[current_mtd_loader()].create_format(wire, loc, info,
- master);
-}
-
-/*
- * Append or overwrite per-link mac of @mac_idx index
- * in accordance with the new pathname
- */
-int32_t
-appov_link_mac_v1(unsigned char *new, unsigned char *old, uint32_t old_size,
- int32_t mac_idx, loc_t *loc, struct crypt_inode_info *info,
- struct master_cipher_info *master, crypt_local_t *local)
-{
- memcpy(new, old, old_size);
- return create_link_mac_v1((struct mtd_format_v1 *)new, mac_idx, loc, info,
- master);
-}
-
-/*
- * Cut per-link mac of @mac_idx index
- */
-static int32_t
-cut_link_mac_v1(unsigned char *new, unsigned char *old, uint32_t old_size,
- int32_t mac_idx, loc_t *loc, struct crypt_inode_info *info,
- struct master_cipher_info *master, crypt_local_t *local)
-{
- memcpy(new, old,
- sizeof(struct mtd_format_v1) + NMTD_8_MAC_SIZE * (mac_idx - 1));
-
- memcpy(
- new + sizeof(struct mtd_format_v1) + NMTD_8_MAC_SIZE *(mac_idx - 1),
- old + sizeof(struct mtd_format_v1) + NMTD_8_MAC_SIZE * mac_idx,
- old_size - (sizeof(struct mtd_format_v1) + NMTD_8_MAC_SIZE * mac_idx));
- return 0;
-}
-
-int32_t
-update_format_v1(unsigned char *new, unsigned char *old, size_t old_len,
- int32_t mac_idx, /* of old name */
- mtd_op_t op, loc_t *loc, struct crypt_inode_info *info,
- struct master_cipher_info *master, crypt_local_t *local)
-{
- switch (op) {
- case MTD_APPEND:
- mac_idx = 1 + (old_len - sizeof(struct mtd_format_v1)) / 8;
- case MTD_OVERWRITE:
- return appov_link_mac_v1(new, old, old_len, mac_idx, loc, info,
- master, local);
- case MTD_CUT:
- return cut_link_mac_v1(new, old, old_len, mac_idx, loc, info,
- master, local);
- default:
- gf_log("crypt", GF_LOG_ERROR, "Bad mtd operation %d", op);
- return -1;
- }
-}
-
-/*
- * Called by fops:
- *
- * ->link()
- * ->unlink()
- * ->rename()
- *
- */
-int32_t
-update_format(unsigned char *new, unsigned char *old, size_t old_len,
- int32_t mac_idx, mtd_op_t op, loc_t *loc,
- struct crypt_inode_info *info, struct master_cipher_info *master,
- crypt_local_t *local)
-{
- if (!new)
- return 0;
- memcpy(new, old, sizeof(struct crypt_format));
-
- old += sizeof(struct crypt_format);
- new += sizeof(struct crypt_format);
- old_len -= sizeof(struct crypt_format);
-
- return mtd_loaders[current_mtd_loader()].update_format(
- new, old, old_len, mac_idx, op, loc, info, master, local);
-}
-
-/*
- * Perform preliminary checks of found metadata
- * Return < 0 on errors;
- * Return number of object-id MACs (>= 1) on success
- */
-int32_t
-check_format_v1(uint32_t len, unsigned char *wire)
-{
- uint32_t nr_links;
-
- if (len < sizeof(struct mtd_format_v1)) {
- gf_log("crypt", GF_LOG_ERROR, "v1-loader: bad metadata size %d", len);
- goto error;
- }
- len -= sizeof(struct mtd_format_v1);
- if (len % sizeof(nmtd_8_mac_t)) {
- gf_log("crypt", GF_LOG_ERROR, "v1-loader: bad metadata format");
- goto error;
- }
- nr_links = 1 + len / sizeof(nmtd_8_mac_t);
- if (nr_links > _POSIX_LINK_MAX)
- goto error;
- return nr_links;
-error:
- return EIO;
-}
-
-/*
- * Verify per-link MAC specified by index @idx
- *
- * return:
- * -1 on errors;
- * 0 on failed verification;
- * 1 on successful verification
- */
-static int32_t
-verify_link_mac_v1(struct mtd_format_v1 *fmt,
- uint32_t idx /* index of the mac to verify */, loc_t *loc,
- struct crypt_inode_info *info,
- struct master_cipher_info *master)
-{
- int32_t ret;
- unsigned char *mac;
- unsigned char cmac[16];
-
- mac = get_NMTD_V1_MAC(fmt) + idx * SIZE_OF_NMTD_V1_MAC;
-
- ret = calc_link_mac_v1(fmt, loc, cmac, info, master);
- if (ret)
- return -1;
- if (memcmp(cmac, mac, SIZE_OF_NMTD_V1_MAC))
- return 0;
- return 1;
-}
-
-/*
- * Lookup per-link MAC by pathname.
- *
- * return index of the MAC, if it was found;
- * return < 0 on errors, or if the MAC wasn't found
- */
-static int32_t
-lookup_link_mac_v1(struct mtd_format_v1 *fmt, uint32_t nr_macs, loc_t *loc,
- struct crypt_inode_info *info,
- struct master_cipher_info *master)
-{
- int32_t ret;
- uint32_t idx;
-
- for (idx = 0; idx < nr_macs; idx++) {
- ret = verify_link_mac_v1(fmt, idx, loc, info, master);
- if (ret < 0)
- return ret;
- if (ret > 0)
- return idx;
- }
- return -ENOENT;
-}
-
-/*
- * Extract version-specific part of metadata
- */
-static int32_t
-open_format_v1(unsigned char *wire, int32_t len, loc_t *loc,
- struct crypt_inode_info *info, struct master_cipher_info *master,
- crypt_local_t *local, gf_boolean_t load_info)
-{
- int32_t ret;
- int32_t num_nmtd_macs;
- struct mtd_format_v1 *fmt;
- unsigned char mtd_key[16];
- AES_KEY EMTD_KEY;
- GCM128_CONTEXT *gctx;
- uint32_t ad;
- emtd_8_mac_t gmac;
- struct object_cipher_info *object;
-
- num_nmtd_macs = check_format_v1(len, wire);
- if (num_nmtd_macs <= 0)
- return EIO;
-
- ret = lookup_link_mac_v1((struct mtd_format_v1 *)wire, num_nmtd_macs, loc,
- info, master);
- if (ret < 0) {
- gf_log("crypt", GF_LOG_ERROR, "NMTD verification failed");
- return EINVAL;
- }
-
- local->mac_idx = ret;
- if (load_info == _gf_false)
- /* the case of partial open */
- return 0;
-
- fmt = GF_MALLOC(len, gf_crypt_mt_mtd);
- if (!fmt)
- return ENOMEM;
- memcpy(fmt, wire, len);
-
- object = &info->cinfo;
-
- ret = get_emtd_file_key(info, master, mtd_key);
- if (ret) {
- gf_log("crypt", GF_LOG_ERROR, "Can not retrieve metadata key");
- goto out;
- }
- /*
- * decrypt encrypted meta-data
- */
- ret = AES_set_encrypt_key(mtd_key, sizeof(mtd_key) * 8, &EMTD_KEY);
- if (ret < 0) {
- gf_log("crypt", GF_LOG_ERROR, "Can not set encrypt key");
- ret = EIO;
- goto out;
- }
- gctx = CRYPTO_gcm128_new(&EMTD_KEY, (block128_f)AES_encrypt);
- if (!gctx) {
- gf_log("crypt", GF_LOG_ERROR, "Can not alloc gcm context");
- ret = ENOMEM;
- goto out;
- }
- CRYPTO_gcm128_setiv(gctx, info->oid, sizeof(uuid_t));
-
- ad = htole32(MTD_LOADER_V1);
- ret = CRYPTO_gcm128_aad(gctx, (const unsigned char *)&ad, sizeof(ad));
- if (ret) {
- gf_log("crypt", GF_LOG_ERROR, " CRYPTO_gcm128_aad failed");
- CRYPTO_gcm128_release(gctx);
- ret = EIO;
- goto out;
- }
- ret = CRYPTO_gcm128_decrypt(gctx, get_EMTD_V1(fmt), get_EMTD_V1(fmt),
- SIZE_OF_EMTD_V1);
- if (ret) {
- gf_log("crypt", GF_LOG_ERROR, " CRYPTO_gcm128_decrypt failed");
- CRYPTO_gcm128_release(gctx);
- ret = EIO;
- goto out;
- }
- /*
- * verify metadata
- */
- CRYPTO_gcm128_tag(gctx, gmac, sizeof(gmac));
- CRYPTO_gcm128_release(gctx);
- if (memcmp(gmac, get_EMTD_V1_MAC(fmt), SIZE_OF_EMTD_V1_MAC)) {
- gf_log("crypt", GF_LOG_ERROR, "EMTD verification failed");
- ret = EINVAL;
- goto out;
- }
- /*
- * load verified metadata to the private part of inode
- */
- info->nr_minor = fmt->minor_id;
-
- object->o_alg = fmt->alg_id;
- object->o_dkey_size = fmt->dkey_factor << KEY_FACTOR_BITS;
- object->o_block_bits = fmt->block_bits;
- object->o_mode = fmt->mode_id;
-
- ret = check_file_metadata(info);
-out:
- GF_FREE(fmt);
- return ret;
-}
-
-/*
- * perform metadata authentication against @loc->path;
- * extract crypt-specific attribute and populate @info
- * with them (optional)
- */
-int32_t
-open_format(unsigned char *str, int32_t len, loc_t *loc,
- struct crypt_inode_info *info, struct master_cipher_info *master,
- crypt_local_t *local, gf_boolean_t load_info)
-{
- struct crypt_format *fmt;
- if (len < sizeof(*fmt)) {
- gf_log("crypt", GF_LOG_ERROR, "Bad core format");
- return EIO;
- }
- fmt = (struct crypt_format *)str;
-
- if (fmt->loader_id >= LAST_MTD_LOADER) {
- gf_log("crypt", GF_LOG_ERROR, "Unsupported loader id %d",
- fmt->loader_id);
- return EINVAL;
- }
- str += sizeof(*fmt);
- len -= sizeof(*fmt);
-
- return mtd_loaders[fmt->loader_id].open_format(str, len, loc, info, master,
- local, load_info);
-}
-
-struct crypt_mtd_loader mtd_loaders[LAST_MTD_LOADER] = {
- [MTD_LOADER_V1] = {.format_size = format_size_v1,
- .create_format = create_format_v1,
- .open_format = open_format_v1,
- .update_format = update_format_v1}};
-
-/*
- Local variables:
- c-indentation-style: "K&R"
- mode-name: "LC"
- c-basic-offset: 8
- tab-width: 8
- fill-column: 80
- scroll-step: 1
- End:
-*/
diff --git a/xlators/encryption/crypt/src/metadata.h b/xlators/encryption/crypt/src/metadata.h
deleted file mode 100644
index 0bcee1b..0000000
--- a/xlators/encryption/crypt/src/metadata.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __METADATA_H__
-#define __METADATA_H__
-
-#define NMTD_8_MAC_SIZE (8)
-#define EMTD_8_MAC_SIZE (8)