diff options
author | Amar Tumballi <amarts@redhat.com> | 2018-12-06 12:29:25 +0530 |
---|---|---|
committer | Amar Tumballi <amarts@redhat.com> | 2018-12-13 17:10:00 +0000 |
commit | 8293d21280fd6ddfc9bb54068cf87794fc6be207 (patch) | |
tree | 39729fb407b436ed0cc3e4a9f4e5bbd29036a9db | |
parent | af7e957b4954bd84b8f7df6bfbd59c939092ead2 (diff) |
all: remove code which is not being considered in build
These xlators are now removed from build as per discussion/announcement
done at https://lists.gluster.org/pipermail/gluster-users/2018-July/034400.html
* move rot-13 to playground, as it is used only as demo
purpose, and is documented in many places.
* Removed code of below xlators:
- cluster/stripe
- cluster/tier
- features/changetimerecorder
- features/glupy
- performance/symlink-cache
- encryption/crypt
- storage/bd
- experimental/posix2
- experimental/dht2
- experimental/fdl
- experimental/jbr
updates: bz#1635688
Change-Id: I1d2d63c32535e149bc8dcb2daa76236c707996e8
Signed-off-by: Amar Tumballi <amarts@redhat.com>
113 files changed, 0 insertions, 37439 deletions
diff --git a/xlators/cluster/dht/src/tier-common.c b/xlators/cluster/dht/src/tier-common.c deleted file mode 100644 index b22f477..0000000 --- a/xlators/cluster/dht/src/tier-common.c +++ /dev/null @@ -1,1199 +0,0 @@ -/* - Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#include <glusterfs/glusterfs.h> -#include <glusterfs/xlator.h> -#include "libxlator.h" -#include "dht-common.h" -#include <glusterfs/defaults.h> -#include "tier-common.h" -#include "tier.h" - -int -dht_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, - int op_errno, inode_t *inode, struct iatt *stbuf, - struct iatt *preparent, struct iatt *postparent, dict_t *xdata); - -int -tier_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, - int op_errno, inode_t *inode, struct iatt *stbuf, - struct iatt *preparent, struct iatt *postparent, dict_t *xdata) -{ - dht_local_t *local = NULL; - loc_t *oldloc = NULL; - loc_t *newloc = NULL; - - local = frame->local; - - oldloc = &local->loc; - newloc = &local->loc2; - - if (op_ret == -1) { - /* No continuation on DHT inode missing errors, as we should - * then have a good stbuf that states P2 happened. We would - * get inode missing if, the file completed migrated between - * the lookup and the link call */ - goto out; - } - - if (local->call_cnt != 1) { - goto out; - } - - local->call_cnt = 2; - - /* Do this on the hot tier now */ - - STACK_WIND(frame, tier_link_cbk, local->cached_subvol, - local->cached_subvol->fops->link, oldloc, newloc, xdata); - - return 0; - -out: - DHT_STRIP_PHASE1_FLAGS(stbuf); - - DHT_STACK_UNWIND(link, frame, op_ret, op_errno, inode, stbuf, preparent, - postparent, NULL); - - return 0; -} - -int -tier_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, - dict_t *xdata) -{ - xlator_t *cached_subvol = NULL; - xlator_t *hashed_subvol = NULL; - int op_errno = -1; - int ret = -1; - dht_local_t *local = NULL; - dht_conf_t *conf = NULL; - - VALIDATE_OR_GOTO(frame, err); - VALIDATE_OR_GOTO(this, err); - VALIDATE_OR_GOTO(oldloc, err); - VALIDATE_OR_GOTO(newloc, err); - - conf = this->private; - - local = dht_local_init(frame, oldloc, NULL, GF_FOP_LINK); - if (!local) { - op_errno = ENOMEM; - goto err; - } - local->call_cnt = 1; - - cached_subvol = local->cached_subvol; - - if (!cached_subvol) { - gf_msg_debug(this->name, 0, "no cached subvolume for path=%s", - oldloc->path); - op_errno = ENOENT; - goto err; - } - - hashed_subvol = TIER_HASHED_SUBVOL; - - ret = loc_copy(&local->loc2, newloc); - if (ret == -1) { - op_errno = ENOMEM; - goto err; - } - - if (hashed_subvol == cached_subvol) { - STACK_WIND(frame, dht_link_cbk, cached_subvol, - cached_subvol->fops->link, oldloc, newloc, xdata); - return 0; - } - - /* Create hardlinks to both the data file on the hot tier - and the linkto file on the cold tier */ - - gf_uuid_copy(local->gfid, oldloc->inode->gfid); - - STACK_WIND(frame, tier_link_cbk, hashed_subvol, hashed_subvol->fops->link, - oldloc, newloc, xdata); - - return 0; -err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND(link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL); - return 0; -} - -int -tier_create_unlink_stale_linkto_cbk(call_frame_t *frame, void *cookie, - xlator_t *this, int op_ret, int op_errno, - struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - dht_local_t *local = NULL; - - local = frame->local; - - if (local->params) { - dict_del(local->params, GLUSTERFS_INTERNAL_FOP_KEY); - } - - DHT_STACK_UNWIND(create, frame, -1, local->op_errno, NULL, NULL, NULL, NULL, - NULL, NULL); - - return 0; -} - -int -tier_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, - int op_errno, fd_t *fd, inode_t *inode, struct iatt *stbuf, - struct iatt *preparent, struct iatt *postparent, dict_t *xdata) -{ - xlator_t *prev = NULL; - int ret = -1; - dht_local_t *local = NULL; - xlator_t *hashed_subvol = NULL; - dht_conf_t *conf = NULL; - - local = frame->local; - conf = this->private; - - hashed_subvol = TIER_HASHED_SUBVOL; - - if (!local) { - op_ret = -1; - op_errno = EINVAL; - goto out; - } - - if (op_ret == -1) { - if (local->linked == _gf_true && local->xattr_req) { - local->op_errno = op_errno; - local->op_ret = op_ret; - ret = dht_fill_dict_to_avoid_unlink_of_migrating_file( - local->xattr_req); - if (ret) { - gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, - "Failed to set dictionary value to " - "unlink of migrating file"); - goto out; - } - - STACK_WIND(frame, tier_create_unlink_stale_linkto_cbk, - hashed_subvol, hashed_subvol->fops->unlink, &local->loc, - 0, local->xattr_req); - return 0; - } - goto out; - } - - prev = cookie; - - if (local->loc.parent) { - dht_inode_ctx_time_update(local->loc.parent, this, preparent, 0); - - dht_inode_ctx_time_update(local->loc.parent, this, postparent, 1); - } - - ret = dht_layout_preset(this, prev, inode); - if (ret != 0) { - gf_msg_debug(this->name, 0, "could not set preset layout for subvol %s", - prev->name); - op_ret = -1; - op_errno = EINVAL; - goto out; - } - - local->op_errno = op_errno; - - if (local->linked == _gf_true) { - local->stbuf = *stbuf; - dht_linkfile_attr_heal(frame, this); - } -out: - if (local) { - if (local->xattr_req) { - dict_del(local->xattr_req, TIER_LINKFILE_GFID); - } - } - - DHT_STRIP_PHASE1_FLAGS(stbuf); - - DHT_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, stbuf, - preparent, postparent, xdata); - - return 0; -} - -int -tier_create_linkfile_create_cbk(call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, - int32_t op_errno, inode_t *inode, - struct iatt *stbuf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - dht_local_t *local = NULL; - xlator_t *cached_subvol = NULL; - dht_conf_t *conf = NULL; - int ret = -1; - unsigned char *gfid = NULL; - - local = frame->local; - if (!local) { - op_errno = EINVAL; - goto err; - } - - if (op_ret == -1) { - local->op_errno = op_errno; - goto err; - } - - conf = this->private; - if (!conf) { - local->op_errno = EINVAL; - op_errno = EINVAL; - goto err; - } - - cached_subvol = TIER_UNHASHED_SUBVOL; - - if (local->params) { - dict_del(local->params, conf->link_xattr_name); - dict_del(local->params, GLUSTERFS_INTERNAL_FOP_KEY); - } - - /* - * We will delete the linkfile if data file creation fails. - * When deleting this stale linkfile, there is a possibility - * for a race between this linkfile deletion and a stale - * linkfile deletion triggered by another lookup from different - * client. - * - * For eg: - * - * Client 1 Client 2 - * - * 1 linkfile created for foo - * - * 2 data file creation failed - * - * 3 creating a file with same name - * - * 4 lookup before creation deleted - * the linkfile created by client1 - * considering as a stale linkfile. - * - * 5 New linkfile created for foo - * with different gfid. - * - * 6 Trigger linkfile deletion as - * data file creation failed. - * - * 7 Linkfile deleted which is - * created by client2. - * - * 8 Data file created. - * - * With this race, we will end up having a file in a non-hashed subvol - * without a linkfile in hashed subvol. - * - * To avoid this, we store the gfid of linkfile created by client, So - * If we delete the linkfile , we validate gfid of existing file with - * stored value from posix layer. - * - * Storing this value in local->xattr_req as local->params was also used - * to create the data file. During the linkfile deletion we will use - * local->xattr_req dictionary. - */ - if (!local->xattr_req) { - local->xattr_req = dict_new(); - if (!local->xattr_req) { - local->op_errno = ENOMEM; - op_errno = ENOMEM; - goto err; - } - } - - gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_char); - if (!gfid) { - local->op_errno = ENOMEM; - op_errno = ENOMEM; - goto err; - } - - gf_uuid_copy(gfid, stbuf->ia_gfid); - ret = dict_set_dynptr(local->xattr_req, TIER_LINKFILE_GFID, gfid, - sizeof(uuid_t)); - if (ret) { - GF_FREE(gfid); - gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, - "Failed to set dictionary value" - " : key = %s", - TIER_LINKFILE_GFID); - } - - STACK_WIND_COOKIE(frame, tier_create_cbk, cached_subvol, cached_subvol, - cached_subvol->fops->create, &local->loc, local->flags, - local->mode, local->umask, local->fd, local->params); - - return 0; -err: - DHT_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL, - NULL); - return 0; -} - -gf_boolean_t -tier_is_hot_tier_decommissioned(xlator_t *this) -{ - dht_conf_t *conf = NULL; - xlator_t *hot_tier = NULL; - int i = 0; - - conf = this->private; - hot_tier = conf->subvolumes[1]; - - if (conf->decommission_subvols_cnt) { - for (i = 0; i < conf->subvolume_cnt; i++) { - if (conf->decommissioned_bricks[i] && - conf->decommissioned_bricks[i] == hot_tier) - return _gf_true; - } - } - - return _gf_false; -} - -int -tier_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - mode_t mode, mode_t umask, fd_t *fd, dict_t *params) -{ - int op_errno = -1; - dht_local_t *local = NULL; - dht_conf_t *conf = NULL; - xlator_t *hot_subvol = NULL; - xlator_t *cold_subvol = NULL; - - VALIDATE_OR_GOTO(frame, err); - VALIDATE_OR_GOTO(this, err); - VALIDATE_OR_GOTO(loc, err); - - conf = this->private; - - dht_get_du_info(frame, this, loc); - - local = dht_local_init(frame, loc, fd, GF_FOP_CREATE); - if (!local) { - op_errno = ENOMEM; - goto err; - } - - cold_subvol = TIER_HASHED_SUBVOL; - hot_subvol = TIER_UNHASHED_SUBVOL; - - if (conf->subvolumes[0] != cold_subvol) { - hot_subvol = conf->subvolumes[0]; - } - /* - * if hot tier full, write to cold. - * Also if hot tier is full, create in cold - */ - if (dht_is_subvol_filled(this, hot_subvol) || - tier_is_hot_tier_decommissioned(this)) { - gf_msg_debug(this->name, 0, "creating %s on %s", loc->path, - cold_subvol->name); - - STACK_WIND_COOKIE(frame, tier_create_cbk, cold_subvol, cold_subvol, - cold_subvol->fops->create, loc, flags, mode, umask, - fd, params); - } else { - local->params = dict_ref(params); - local->flags = flags; - local->mode = mode; - local->umask = umask; - local->cached_subvol = hot_subvol; - local->hashed_subvol = cold_subvol; - - gf_msg_debug(this->name, 0, "creating %s on %s (link at %s)", loc->path, - hot_subvol->name, cold_subvol->name); - - dht_linkfile_create(frame, tier_create_linkfile_create_cbk, this, - hot_subvol, cold_subvol, loc); - - goto out; - } -out: - return 0; - -err: - - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL, - NULL); - - return 0; -} - -int -tier_unlink_nonhashed_linkfile_cbk(call_frame_t *frame, void *cookie, - xlator_t *this, int op_ret, int op_errno, - struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - dht_local_t *local = NULL; - xlator_t *prev = NULL; - - local = frame->local; - prev = cookie; - - LOCK(&frame->lock); - { - if ((op_ret == -1) && (op_errno != ENOENT)) { - local->op_errno = op_errno; - local->op_ret = op_ret; - gf_msg_debug(this->name, op_errno, - "Unlink link: subvolume %s" - " returned -1", - prev->name); - goto unlock; - } - - local->op_ret = 0; - } -unlock: - UNLOCK(&frame->lock); - - if (local->op_ret == -1) - goto err; - DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno, - &local->preparent, &local->postparent, NULL); - - return 0; - -err: - DHT_STACK_UNWIND(unlink, frame, -1, local->op_errno, NULL, NULL, NULL); - return 0; -} - -int -tier_unlink_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, inode_t *inode, - struct iatt *preparent, dict_t *xdata, - struct iatt *postparent) -{ - dht_local_t *local = NULL; - xlator_t *prev = NULL; - dht_conf_t *conf = NULL; - xlator_t *hot_subvol = NULL; - - local = frame->local; - prev = cookie; - conf = this->private; - hot_subvol = TIER_UNHASHED_SUBVOL; - - if (!op_ret) { - /* - * linkfile present on hot tier. unlinking the linkfile - */ - STACK_WIND_COOKIE(frame, tier_unlink_nonhashed_linkfile_cbk, hot_subvol, - hot_subvol, hot_subvol->fops->unlink, &local->loc, - local->flags, NULL); - return 0; - } - - LOCK(&frame->lock); - { - if (op_errno == ENOENT) { - local->op_ret = 0; - local->op_errno = op_errno; - } else { - local->op_ret = op_ret; - local->op_errno = op_errno; - } - gf_msg_debug(this->name, op_errno, "Lookup : subvolume %s returned -1", - prev->name); - } - - UNLOCK(&frame->lock); - - DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno, - &local->preparent, &local->postparent, xdata); - - return 0; -} - -int -tier_unlink_linkfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - dht_local_t *local = NULL; - xlator_t *prev = NULL; - - local = frame->local; - prev = cookie; - - LOCK(&frame->lock); - { - /* Ignore EINVAL for tier to ignore error when the file - does not exist on the other tier */ - if ((op_ret == -1) && !((op_errno == ENOENT) || (op_errno == EINVAL))) { - local->op_errno = op_errno; - local->op_ret = op_ret; - gf_msg_debug(this->name, op_errno, - "Unlink link: subvolume %s" - " returned -1", - prev->name); - goto unlock; - } - - local->op_ret = 0; - } -unlock: - UNLOCK(&frame->lock); - - if (local->op_ret == -1) - goto err; - - DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno, - &local->preparent, &local->postparent, xdata); - - return 0; - -err: - DHT_STACK_UNWIND(unlink, frame, -1, local->op_errno, NULL, NULL, NULL); - return 0; -} - -int32_t -tier_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, - int op_errno, struct iatt *preparent, struct iatt *postparent, - dict_t *xdata) -{ - dht_local_t *local = NULL; - xlator_t *prev = NULL; - struct iatt *stbuf = NULL; - dht_conf_t *conf = NULL; - int ret = -1; - xlator_t *hot_tier = NULL; - xlator_t *cold_tier = NULL; - - local = frame->local; - prev = cookie; - conf = this->private; - - cold_tier = TIER_HASHED_SUBVOL; - hot_tier = TIER_UNHASHED_SUBVOL; - - LOCK(&frame->lock); - { - if (op_ret == -1) { - if (op_errno == ENOENT) { - local->op_ret = 0; - } else { - local->op_ret = -1; - local->op_errno = op_errno; - } - gf_msg_debug(this->name, op_errno, - "Unlink: subvolume %s returned -1" - " with errno = %d", - prev->name, op_errno); - goto unlock; - } - - local->op_ret = 0; - - local->postparent = *postparent; - local->preparent = *preparent; - - if (local->loc.parent) { - dht_inode_ctx_time_update(local->loc.parent, this, - &local->preparent, 0); - dht_inode_ctx_time_update(local->loc.parent, this, - &local->postparent, 1); - } - } -unlock: - UNLOCK(&frame->lock); - - if (local->op_ret) - goto out; - - if (cold_tier != local->cached_subvol) { - /* - * File is present in hot tier, so there will be - * a link file on cold tier, deleting the linkfile - * from cold tier - */ - STACK_WIND_COOKIE(frame, tier_unlink_linkfile_cbk, cold_tier, cold_tier, - cold_tier->fops->unlink, &local->loc, local->flags, - xdata); - return 0; - } - - ret = dict_get_bin(xdata, DHT_IATT_IN_XDATA_KEY, (void **)&stbuf); - if (!ret && stbuf && - ((IS_DHT_MIGRATION_PHASE2(stbuf)) || IS_DHT_MIGRATION_PHASE1(stbuf))) { - /* - * File is migrating from cold to hot tier. - * Delete the destination linkfile. - */ - STACK_WIND_COOKIE(frame, tier_unlink_lookup_cbk, hot_tier, hot_tier, - hot_tier->fops->lookup, &local->loc, NULL); - return 0; - } - -out: - DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno, - &local->preparent, &local->postparent, xdata); - - return 0; -} - -int -tier_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, - dict_t *xdata) -{ - xlator_t *cached_subvol = NULL; - xlator_t *hashed_subvol = NULL; - dht_conf_t *conf = NULL; - int op_errno = -1; - dht_local_t *local = NULL; - int ret = -1; - - VALIDATE_OR_GOTO(frame, err); - VALIDATE_OR_GOTO(this, err); - VALIDATE_OR_GOTO(loc, err); - - conf = this->private; - - local = dht_local_init(frame, loc, NULL, GF_FOP_UNLINK); - if (!local) { - op_errno = ENOMEM; - - goto err; - } - - hashed_subvol = TIER_HASHED_SUBVOL; - - cached_subvol = local->cached_subvol; - if (!cached_subvol) { - gf_msg_debug(this->name, 0, "no cached subvolume for path=%s", - loc->path); - op_errno = EINVAL; - goto err; - } - - local->flags = xflag; - if (IA_ISREG(loc->inode->ia_type) && (hashed_subvol == cached_subvol)) { - /* - * File resides in cold tier. We need to stat - * the file to see if it is being promoted. - * If yes we need to delete the destination - * file as well. - * - * Currently we are doing this check only for - * regular files. - */ - xdata = xdata ? dict_ref(xdata) : dict_new(); - if (xdata) { - ret = dict_set_int8(xdata, DHT_IATT_IN_XDATA_KEY, 1); - if (ret) { - gf_msg_debug(this->name, 0, "Failed to set dictionary key %s", - DHT_IATT_IN_XDATA_KEY); - } - } - } - - /* - * File is on hot tier, delete the data file first, then - * linkfile from cold. - */ - STACK_WIND_COOKIE(frame, tier_unlink_cbk, cached_subvol, cached_subvol, - cached_subvol->fops->unlink, loc, xflag, xdata); - if (xdata) - dict_unref(xdata); - return 0; -err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND(unlink, frame, -1, op_errno, NULL, NULL, NULL); - - return 0; -} - -int -tier_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, - int op_errno, gf_dirent_t *orig_entries, dict_t *xdata) -{ - gf_dirent_t entries; - gf_dirent_t *orig_entry = NULL; - gf_dirent_t *entry = NULL; - int count = 0; - - INIT_LIST_HEAD(&entries.list); - - if (op_ret < 0) - goto unwind; - - list_for_each_entry(orig_entry, (&orig_entries->list), list) - { - entry = gf_dirent_for_name(orig_entry->d_name); - if (!entry) { - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, - "Memory allocation failed "); - goto unwind; - } - - entry->d_off = orig_entry->d_off; - entry->d_ino = orig_entry->d_ino; - entry->d_type = orig_entry->d_type; - entry->d_len = orig_entry->d_len; - - list_add_tail(&entry->list, &entries.list); - count++; - } - op_ret = count; - -unwind: - if (op_ret < 0) - op_ret = 0; - - DHT_STACK_UNWIND(readdir, frame, op_ret, op_errno, &entries, NULL); - - gf_dirent_free(&entries); - - return 0; -} - -int -tier_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, - int op_errno, gf_dirent_t *orig_entries, dict_t *xdata) -{ - dht_local_t *local = NULL; - gf_dirent_t entries; - gf_dirent_t *orig_entry = NULL; - gf_dirent_t *entry = NULL; - xlator_t *prev = NULL; - xlator_t *next_subvol = NULL; - off_t next_offset = 0; - int count = 0; - dht_conf_t *conf = NULL; - int ret = 0; - inode_table_t *itable = NULL; - inode_t *inode = NULL; - - INIT_LIST_HEAD(&entries.list); - prev = cookie; - local = frame->local; - itable = local->fd ? local->fd->inode->table : NULL; - - conf = this->private; - GF_VALIDATE_OR_GOTO(this->name, conf, unwind); - - if (op_ret < 0) - goto done; - - list_for_each_entry(orig_entry, (&orig_entries->list), list) - { - next_offset = orig_entry->d_off; - - if (IA_ISINVAL(orig_entry->d_stat.ia_type)) { - /*stat failed somewhere- ignore this entry*/ - continue; - } - - entry = gf_dirent_for_name(orig_entry->d_name); - if (!entry) { - goto unwind; - } - - entry->d_off = orig_entry->d_off; - entry->d_stat = orig_entry->d_stat; - entry->d_ino = orig_entry->d_ino; - entry->d_type = orig_entry->d_type; - entry->d_len = orig_entry->d_len; - - if (orig_entry->dict) - entry->dict = dict_ref(orig_entry->dict); - - if (check_is_linkfile(NULL, (&orig_entry->d_stat), orig_entry->dict, - conf->link_xattr_name)) { - goto entries; - - } else if (IA_ISDIR(entry->d_stat.ia_type)) { - if (orig_entry->inode) { - dht_inode_ctx_time_update(orig_entry->inode, this, - &entry->d_stat, 1); - } - } else { - if (orig_entry->inode) { - ret = dht_layout_preset(this, prev, orig_entry->inode); - if (ret) - gf_msg(this->name, GF_LOG_WARNING, 0, - DHT_MSG_LAYOUT_SET_FAILED, - "failed to link the layout " - "in inode"); - - entry->inode = inode_ref(orig_entry->inode); - } else if (itable) { - /* - * orig_entry->inode might be null if any upper - * layer xlators below client set to null, to - * force a lookup on the inode even if the inode - * is present in the inode table. In that case - * we just update the ctx to make sure we didn't - * missed anything. - */ - inode = inode_find(itable, orig_entry->d_stat.ia_gfid); - if (inode) { - ret = dht_layout_preset(this, TIER_HASHED_SUBVOL, inode); - if (ret) - gf_msg(this->name, GF_LOG_WARNING, 0, - DHT_MSG_LAYOUT_SET_FAILED, - "failed to link the layout" - " in inode"); - inode_unref(inode); - inode = NULL; - } - } - } - - entries: - list_add_tail(&entry->list, &entries.list); - count++; - } - op_ret = count; - -done: - if (count == 0) { - /* non-zero next_offset means that - EOF is not yet hit on the current subvol - */ - if (next_offset != 0) { - next_subvol = prev; - } else { - goto unwind; - } - - STACK_WIND_COOKIE(frame, tier_readdirp_cbk, next_subvol, next_subvol, - next_subvol->fops->readdirp, local->fd, local->size, - next_offset, local->xattr); - return 0; - } - -unwind: - if (op_ret < 0) - op_ret = 0; - - DHT_STACK_UNWIND(readdirp, frame, op_ret, op_errno, &entries, NULL); - - gf_dirent_free(&entries); - - return 0; -} - -int -tier_do_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t yoff, int whichop, dict_t *dict) -{ - dht_local_t *local = NULL; - int op_errno = -1; - xlator_t *hashed_subvol = NULL; - int ret = 0; - dht_conf_t *conf = NULL; - - VALIDATE_OR_GOTO(frame, err); - VALIDATE_OR_GOTO(this, err); - VALIDATE_OR_GOTO(fd, err); - VALIDATE_OR_GOTO(this->private, err); - - conf = this->private; - - local = dht_local_init(frame, NULL, NULL, whichop); - if (!local) { - op_errno = ENOMEM; - goto err; - } - - local->fd = fd_ref(fd); - local->size = size; - local->xattr_req = (dict) ? dict_ref(dict) : NULL; - - hashed_subvol = TIER_HASHED_SUBVOL; - - /* TODO: do proper readdir */ - if (whichop == GF_FOP_READDIRP) { - if (dict) - local->xattr = dict_ref(dict); - else - local->xattr = dict_new(); - - if (local->xattr) { - ret = dict_set_uint32(local->xattr, conf->link_xattr_name, 256); - if (ret) - gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, - "Failed to set dictionary value" - " : key = %s", - conf->link_xattr_name); - } - - STACK_WIND_COOKIE(frame, tier_readdirp_cbk, hashed_subvol, - hashed_subvol, hashed_subvol->fops->readdirp, fd, - size, yoff, local->xattr); - - } else { - STACK_WIND_COOKIE(frame, tier_readdir_cbk, hashed_subvol, hashed_subvol, - hashed_subvol->fops->readdir, fd, size, yoff, - local->xattr); - } - - return 0; - -err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND(readdir, frame, -1, op_errno, NULL, NULL); - - return 0; -} - -int -tier_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t yoff, dict_t *xdata) -{ - int op = GF_FOP_READDIR; - dht_conf_t *conf = NULL; - int i = 0; - - conf = this->private; - if (!conf) - goto out; - - for (i = 0; i < conf->subvolume_cnt; i++) { - if (!conf->subvolume_status[i]) { - op = GF_FOP_READDIRP; - break; - } - } - - if (conf->use_readdirp) - op = GF_FOP_READDIRP; - -out: - tier_do_readdir(frame, this, fd, size, yoff, op, 0); - return 0; -} - -int -tier_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t yoff, dict_t *dict) -{ - tier_do_readdir(frame, this, fd, size, yoff, GF_FOP_READDIRP, dict); - return 0; -} - -int -tier_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, - int op_errno, struct statvfs *statvfs, dict_t *xdata) -{ - gf_boolean_t event = _gf_false; - qdstatfs_action_t action = qdstatfs_action_OFF; - dht_local_t *local = NULL; - int this_call_cnt = 0; - int bsize = 0; - int frsize = 0; - GF_UNUSED int ret = 0; - unsigned long new_usage = 0; - unsigned long cur_usage = 0; - xlator_t *prev = NULL; - dht_conf_t *conf = NULL; - tier_statvfs_t *tier_stat = NULL; - - prev = cookie; - local = frame->local; - GF_ASSERT(local); - - conf = this->private; - - if (xdata) - ret = dict_get_int8(xdata, "quota-deem-statfs", (int8_t *)&event); - - tier_stat = &local->tier_statvfs; - - LOCK(&frame->lock); - { - if (op_ret == -1) { - local->op_errno = op_errno; - goto unlock; - } - if (!statvfs) { - op_errno = EINVAL; - local->op_ret = -1; - goto unlock; - } - local->op_ret = 0; - - if (local->quota_deem_statfs) { - if (event == _gf_true) { - action = qdstatfs_action_COMPARE; - } else { - action = qdstatfs_action_NEGLECT; - } - } else { - if (event == _gf_true) { - action = qdstatfs_action_REPLACE; - local->quota_deem_statfs = _gf_true; - } - } - - if (local->quota_deem_statfs) { - switch (action) { - case qdstatfs_action_NEGLECT: - goto unlock; - - case qdstatfs_action_REPLACE: - local->statvfs = *statvfs; - goto unlock; - - case qdstatfs_action_COMPARE: - new_usage = statvfs->f_blocks - statvfs->f_bfree; - cur_usage = local->statvfs.f_blocks - - local->statvfs.f_bfree; - - /* Take the max of the usage from subvols */ - if (new_usage >= cur_usage) - local->statvfs = *statvfs; - goto unlock; - - default: - break; - } - } - - if (local->statvfs.f_bsize != 0) { - bsize = max(local->statvfs.f_bsize, statvfs->f_bsize); - frsize = max(local->statvfs.f_frsize, statvfs->f_frsize); - dht_normalize_stats(&local->statvfs, bsize, frsize); - dht_normalize_stats(statvfs, bsize, frsize); - } else { - local->statvfs.f_bsize = statvfs->f_bsize; - local->statvfs.f_frsize = statvfs->f_frsize; - } - - if (prev == TIER_HASHED_SUBVOL) { - local->statvfs.f_blocks = statvfs->f_blocks; - local->statvfs.f_files = statvfs->f_files; - local->statvfs.f_fsid = statvfs->f_fsid; - local->statvfs.f_flag = statvfs->f_flag; - local->statvfs.f_namemax = statvfs->f_namemax; - tier_stat->blocks_used = (statvfs->f_blocks - statvfs->f_bfree); - tier_stat->pblocks_used = (statvfs->f_blocks - statvfs->f_bavail); - tier_stat->files_used = (statvfs->f_files - statvfs->f_ffree); - tier_stat->pfiles_used = (statvfs->f_files - statvfs->f_favail); - tier_stat->hashed_fsid = statvfs->f_fsid; - } else { - tier_stat->unhashed_fsid = statvfs->f_fsid; - tier_stat->unhashed_blocks_used = (statvfs->f_blocks - - statvfs->f_bfree); - tier_stat->unhashed_pblocks_used = (statvfs->f_blocks - - statvfs->f_bavail); - tier_stat->unhashed_files_used = (statvfs->f_files - - statvfs->f_ffree); - tier_stat->unhashed_pfiles_used = (statvfs->f_files - - statvfs->f_favail); - } - } -unlock: - UNLOCK(&frame->lock); - - this_call_cnt = dht_frame_return(frame); - if (is_last_call(this_call_cnt)) { - if (tier_stat->unhashed_fsid != tier_stat->hashed_fsid) { - tier_stat->blocks_used += tier_stat->unhashed_blocks_used; - tier_stat->pblocks_used += tier_stat->unhashed_pblocks_used; - tier_stat->files_used += tier_stat->unhashed_files_used; - tier_stat->pfiles_used += tier_stat->unhashed_pfiles_used; - } - local->statvfs.f_bfree = local->statvfs.f_blocks - - tier_stat->blocks_used; - local->statvfs.f_bavail = local->statvfs.f_blocks - - tier_stat->pblocks_used; - local->statvfs.f_ffree = local->statvfs.f_files - tier_stat->files_used; - local->statvfs.f_favail = local->statvfs.f_files - - tier_stat->pfiles_used; - DHT_STACK_UNWIND(statfs, frame, local->op_ret, local->op_errno, - &local->statvfs, xdata); - } - - return 0; -} - -int -tier_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) -{ - dht_local_t *local = NULL; - dht_conf_t *conf = NULL; - int op_errno = -1; - int i = -1; - inode_t *inode = NULL; - inode_table_t *itable = NULL; - uuid_t root_gfid = { - 0, - }; - loc_t newloc = { - 0, - }; - - VALIDATE_OR_GOTO(frame, err); - VALIDATE_OR_GOTO(this, err); - VALIDATE_OR_GOTO(loc, err); - VALIDATE_OR_GOTO(this->private, err); - - conf = this->private; - - local = dht_local_init(frame, NULL, NULL, GF_FOP_STATFS); - if (!local) { - op_errno = ENOMEM; - goto err; - } - - if (loc->inode && !IA_ISDIR(loc->inode->ia_type)) { - itable = loc->inode->table; - if (!itable) { - op_errno = EINVAL; - goto err; - } - - loc = &local->loc2; - root_gfid[15] = 1; - - inode = inode_find(itable, root_gfid); - if (!inode) { - op_errno = EINVAL; - goto err; - } - - dht_build_root_loc(inode, &newloc); - loc = &newloc; - } - - local->call_cnt = conf->subvolume_cnt; - - for (i = 0; i < conf->subvolume_cnt; i++) { - STACK_WIND_COOKIE(frame, tier_statfs_cbk, conf->subvolumes[i], - conf->subvolumes[i], - conf->subvolumes[i]->fops->statfs, loc, xdata); - } - - return 0; - -err: - op_errno = (op_errno == -1) ? errno : op_errno; - DHT_STACK_UNWIND(statfs, frame, -1, op_errno, NULL, NULL); - - return 0; -} diff --git a/xlators/cluster/dht/src/tier-common.h b/xlators/cluster/dht/src/tier-common.h deleted file mode 100644 index b1ebaa8..0000000 --- a/xlators/cluster/dht/src/tier-common.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#ifndef _TIER_COMMON_H_ -#define _TIER_COMMON_H_ -/* Function definitions */ -int -tier_create_unlink_stale_linkto_cbk(call_frame_t *frame, void *cookie, - xlator_t *this, int op_ret, int op_errno, - struct iatt *preparent, - struct iatt *postparent, dict_t *xdata); - -int -tier_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, - int op_errno, fd_t *fd, inode_t *inode, struct iatt *stbuf, - struct iatt *preparent, struct iatt *postparent, dict_t *xdata); - -int -tier_create_linkfile_create_cbk(call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, - int32_t op_errno, inode_t *inode, - struct iatt *stbuf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata); - -int -tier_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - mode_t mode, mode_t umask, fd_t *fd, dict_t *params); - -int32_t -tier_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, - dict_t *xdata); - -int32_t -tier_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t off, dict_t *dict); - -int -tier_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t yoff, dict_t *xdata); - -int -tier_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, - dict_t *xdata); - -int -tier_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata); - -#endif diff --git a/xlators/cluster/dht/src/tier.c b/xlators/cluster/dht/src/tier.c deleted file mode 100644 index a8cccaf..0000000 --- a/xlators/cluster/dht/src/tier.c +++ /dev/null @@ -1,3090 +0,0 @@ -/* - Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#include <dlfcn.h> - -#include "dht-common.h" -#include "tier.h" -#include "tier-common.h" -#include <glusterfs/syscall.h> -#include <glusterfs/events.h> -#include "tier-ctr-interface.h" - -/*Hard coded DB info*/ -static gfdb_db_type_t dht_tier_db_type = GFDB_SQLITE3; -/*Hard coded DB info*/ - -/*Mutex for updating the data movement stats*/ -static pthread_mutex_t dm_stat_mutex = PTHREAD_MUTEX_INITIALIZER; - -/* Stores the path location of promotion query files */ -static char *promotion_qfile; -/* Stores the path location of demotion query files */ -static char *demotion_qfile; - -static void *libhandle; -static gfdb_methods_t gfdb_methods; - -#define DB_QUERY_RECORD_SIZE 4096 - -/* - * Closes all the fds and frees the qfile_array - * */ -static void -qfile_array_free(tier_qfile_array_t *qfile_array) -{ - ssize_t i = 0; - - if (qfile_array) { - if (qfile_array->fd_array) { - for (i = 0; i < qfile_array->array_size; i++) { - if (qfile_array->fd_array[i] != -1) { - sys_close(qfile_array->fd_array[i]); - } - } - } - GF_FREE(qfile_array->fd_array); - } - GF_FREE(qfile_array); -} - -/* Create a new query file list with given size */ -static tier_qfile_array_t * -qfile_array_new(ssize_t array_size) -{ - int ret = -1; - tier_qfile_array_t *qfile_array = NULL; - ssize_t i = 0; - - GF_VALIDATE_OR_GOTO("tier", (array_size > 0), out); - - qfile_array = GF_CALLOC(1, sizeof(tier_qfile_array_t), - gf_tier_mt_qfile_array_t); - if (!qfile_array) { - gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Failed to allocate memory for tier_qfile_array_t"); - goto out; - } - - qfile_array->fd_array = GF_MALLOC(array_size * sizeof(int), - gf_dht_mt_int32_t); - if (!qfile_array->fd_array) { - gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Failed to allocate memory for " - "tier_qfile_array_t->fd_array"); - goto out; - } - - /* Init all the fds to -1 */ - for (i = 0; i < array_size; i++) { - qfile_array->fd_array[i] = -1; - } - - qfile_array->array_size = array_size; - qfile_array->next_index = 0; - - /* Set exhausted count to list size as the list is empty */ - qfile_array->exhausted_count = qfile_array->array_size; - - ret = 0; -out: - if (ret) { - qfile_array_free(qfile_array); - qfile_array = NULL; - } - return qfile_array; -} - -/* Checks if the query file list is empty or totally exhausted. */ -static gf_boolean_t -is_qfile_array_empty(tier_qfile_array_t *qfile_array) -{ - return (qfile_array->exhausted_count == qfile_array->array_size) - ? _gf_true - : _gf_false; -} - -/* Shifts the next_fd pointer to the next available fd in the list */ -static void -shift_next_index(tier_qfile_array_t *qfile_array) -{ - int qfile_fd = 0; - int spin_count = 0; - - if (is_qfile_array_empty(qfile_array)) { - return; - } - - do { - /* change next_index in a rotional manner */ - (qfile_array->next_index == (qfile_array->array_size - 1)) - ? qfile_array->next_index = 0 - : qfile_array->next_index++; - - qfile_fd = (qfile_array->fd_array[qfile_array->next_index]); - - spin_count++; - - } while ((qfile_fd == -1) && (spin_count < qfile_array->array_size)); -} - -/* - * This is a non-thread safe function to read query records - * from a list of query files in a Round-Robin manner. - * As in when the query files get exhuasted they are closed. - * Returns: - * 0 if all the query records in all the query files of the list are - * exhausted. - * > 0 if a query record is successfully read. Indicates the size of the query - * record read. - * < 0 if there was failure - * */ -static int -read_query_record_list(tier_qfile_array_t *qfile_array, - gfdb_query_record_t **query_record) -{ - int ret = -1; - int qfile_fd = 0; - - GF_VALIDATE_OR_GOTO("tier", qfile_array, out); - GF_VALIDATE_OR_GOTO("tier", qfile_array->fd_array, out); - - do { - if (is_qfile_array_empty(qfile_array)) { - ret = 0; - break; - } - - qfile_fd = qfile_array->fd_array[qfile_array->next_index]; - ret = gfdb_methods.gfdb_read_query_record(qfile_fd, query_record); - if (ret <= 0) { - /*The qfile_fd has reached EOF or - * there was an error. - * 1. Close the exhausted fd - * 2. increment the exhausted count - * 3. shift next_qfile to next qfile - **/ - sys_close(qfile_fd); - qfile_array->fd_array[qfile_array->next_index] = -1; - qfile_array->exhausted_count++; - /* shift next_qfile to next qfile */ - shift_next_index(qfile_array); - continue; - } else { - /* shift next_qfile to next qfile */ - shift_next_index(qfile_array); - break; - } - } while (1); -out: - return ret; -} - -/* Check and update the watermark every WM_INTERVAL seconds */ -#define WM_INTERVAL 5 -#define WM_INTERVAL_EMERG 1 - -static int -tier_check_same_node(xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag) -{ - int ret = -1; - dict_t *dict = NULL; - char *uuid_str = NULL; - uuid_t node_uuid = { - 0, - }; - - GF_VALIDATE_OR_GOTO("tier", this, out); - GF_VALIDATE_OR_GOTO(this->name, loc, out); - GF_VALIDATE_OR_GOTO(this->name, defrag, out); - - if (syncop_getxattr(this, loc, &dict, GF_XATTR_NODE_UUID_KEY, NULL, NULL)) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Unable to get NODE_UUID_KEY %s %s\n", loc->name, loc->path); - goto out; - } - - if (dict_get_str(dict, GF_XATTR_NODE_UUID_KEY, &uuid_str) < 0) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Failed to get node-uuids for %s", loc->path); - goto out; - } - - if (gf_uuid_parse(uuid_str, node_uuid)) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "uuid_parse failed for %s", loc->path); - goto out; - } - - if (gf_uuid_compare(node_uuid, defrag->node_uuid)) { - gf_msg_debug(this->name, 0, "%s does not belong to this node", - loc->path); - ret = 1; - goto out; - } - - ret = 0; -out: - if (dict) - dict_unref(dict); - - return ret; -} - -int -tier_get_fs_stat(xlator_t *this, loc_t *root_loc) -{ - int ret = 0; - gf_defrag_info_t *defrag = NULL; - dht_conf_t *conf = NULL; - dict_t *xdata = NULL; - struct statvfs statfs = { - 0, - }; - gf_tier_conf_t *tier_conf = NULL; - - conf = this->private; - if (!conf) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS, - "conf is NULL"); - ret = -1; - goto exit; - } - - defrag = conf->defrag; - if (!defrag) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS, - "defrag is NULL"); - ret = -1; - goto exit; - } - - tier_conf = &defrag->tier_conf; - - xdata = dict_new(); - if (!xdata) { - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, - "failed to allocate dictionary"); - ret = -1; - goto exit; - } - - ret = dict_set_int8(xdata, GF_INTERNAL_IGNORE_DEEM_STATFS, 1); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, - "Failed to set " GF_INTERNAL_IGNORE_DEEM_STATFS " in dict"); - ret = -1; - goto exit; - } - - /* Find how much free space is on the hot subvolume. - * Then see if that value */ - /* is less than or greater than user defined watermarks. - * Stash results in */ - /* the tier_conf data structure. */ - - ret = syncop_statfs(conf->subvolumes[1], root_loc, &statfs, xdata, NULL); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LOG_TIER_STATUS, - "Unable to obtain statfs."); - goto exit; - } - - pthread_mutex_lock(&dm_stat_mutex); - - tier_conf->block_size = statfs.f_bsize; - tier_conf->blocks_total = statfs.f_blocks; - tier_conf->blocks_used = statfs.f_blocks - statfs.f_bfree; - - tier_conf->percent_full = GF_PERCENTAGE(tier_conf->blocks_used, - statfs.f_blocks); - pthread_mutex_unlock(&dm_stat_mutex); - -exit: - if (xdata) - dict_unref(xdata); - return ret; -} - -static void -tier_send_watermark_event(const char *volname, tier_watermark_op_t old_wm, - tier_watermark_op_t new_wm) -{ - if (old_wm == TIER_WM_LOW || old_wm == TIER_WM_NONE) { - if (new_wm == TIER_WM_MID) { - gf_event(EVENT_TIER_WATERMARK_RAISED_TO_MID, "vol=%s", volname); - } else if (new_wm == TIER_WM_HI) { - gf_event(EVENT_TIER_WATERMARK_HI, "vol=%s", volname); - } - } else if (old_wm == TIER_WM_MID) { - if (new_wm == TIER_WM_LOW) { - gf_event(EVENT_TIER_WATERMARK_DROPPED_TO_LOW, "vol=%s", volname); - } else if (new_wm == TIER_WM_HI) { - gf_event(EVENT_TIER_WATERMARK_HI, "vol=%s", volname); - } - } else if (old_wm == TIER_WM_HI) { - if (new_wm == TIER_WM_MID) { - gf_event(EVENT_TIER_WATERMARK_DROPPED_TO_MID, "vol=%s", volname); - } else if (new_wm == TIER_WM_LOW) { - gf_event(EVENT_TIER_WATERMARK_DROPPED_TO_LOW, "vol=%s", volname); - } - } -} - -int -tier_check_watermark(xlator_t *this) -{ - int ret = -1; - gf_defrag_info_t *defrag = NULL; - dht_conf_t *conf = NULL; - gf_tier_conf_t *tier_conf = NULL; - tier_watermark_op_t wm = TIER_WM_NONE; - - conf = this->private; - if (!conf) - goto exit; - - defrag = conf->defrag; - if (!defrag) - goto exit; - - tier_conf = &defrag->tier_conf; - - if (tier_conf->percent_full < tier_conf->watermark_low) { - wm = TIER_WM_LOW; - - } else if (tier_conf->percent_full < tier_conf->watermark_hi) { - wm = TIER_WM_MID; - - } else { - wm = TIER_WM_HI; - } - - if (wm != tier_conf->watermark_last) { - tier_send_watermark_event(tier_conf->volname, tier_conf->watermark_last, - wm); - - tier_conf->watermark_last = wm; - gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, - "Tier watermark now %d", wm); - } - - ret = 0; - -exit: - return ret; -} - -static gf_boolean_t -is_hot_tier_full(gf_tier_conf_t *tier_conf) -{ - if (tier_conf && (tier_conf->mode == TIER_MODE_WM) && - (tier_conf->watermark_last == TIER_WM_HI)) - return _gf_true; - - return _gf_false; -} - -int -tier_do_migration(xlator_t *this, int promote) -{ - gf_defrag_info_t *defrag = NULL; - dht_conf_t *conf = NULL; - long rand = 0; - int migrate = 0; - gf_tier_conf_t *tier_conf = NULL; - - conf = this->private; - if (!conf) - goto exit; - - defrag = conf->defrag; - if (!defrag) - goto exit; - - if (tier_check_watermark(this) != 0) { - gf_msg(this->name, GF_LOG_CRITICAL, errno, DHT_MSG_LOG_TIER_ERROR, - "Failed to get watermark"); - goto exit; - } - - tier_conf = &defrag->tier_conf; - - switch (tier_conf->watermark_last) { - case TIER_WM_LOW: - migrate = promote ? 1 : 0; - break; - case TIER_WM_HI: - migrate = promote ? 0 : 1; - break; - case TIER_WM_MID: - /* coverity[DC.WEAK_CRYPTO] */ - rand = random() % 100; - if (promote) { - migrate = (rand > tier_conf->percent_full); - } else { - migrate = (rand <= tier_conf->percent_full); - } - break; - } - -exit: - return migrate; -} - -int -tier_migrate(xlator_t *this, int is_promotion, dict_t *migrate_data, loc_t *loc, - gf_tier_conf_t *tier_conf) -{ - int ret = -1; - - pthread_mutex_lock(&tier_conf->pause_mutex); - if (is_promotion) - tier_conf->promote_in_progress = 1; - else - tier_conf->demote_in_progress = 1; - pthread_mutex_unlock(&tier_conf->pause_mutex); - - /* Data migration */ - ret = syncop_setxattr(this, loc, migrate_data, 0, NULL, NULL); - - pthread_mutex_lock(&tier_conf->pause_mutex); - if (is_promotion) - tier_conf->promote_in_progress = 0; - else - tier_conf->demote_in_progress = 0; - pthread_mutex_unlock(&tier_conf->pause_mutex); - - return ret; -} - -/* returns _gf_true: if file can be promoted - * returns _gf_false: if file cannot be promoted - */ -static gf_boolean_t -tier_can_promote_file(xlator_t *this, char const *file_name, - struct iatt *current, gf_defrag_info_t *defrag) -{ - gf_boolean_t ret = _gf_false; - fsblkcnt_t estimated_usage = 0; - - if (defrag->tier_conf.tier_max_promote_size && - (current->ia_size > defrag->tier_conf.tier_max_promote_size)) { - gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, - "File %s (gfid:%s) with size (%" PRIu64 - ") exceeds maxsize " - "(%d) for promotion. File will not be promoted.", - file_name, uuid_utoa(current->ia_gfid), current->ia_size, - defrag->tier_conf.tier_max_promote_size); - goto err; - } - - /* bypass further validations for TEST mode */ - if (defrag->tier_conf.mode != TIER_MODE_WM) { - ret = _gf_true; - goto err; - } - - /* convert the file size to blocks as per the block size of the - * destination tier - * NOTE: add (block_size - 1) to get the correct block size when - * there is a remainder after a modulo - */ - estimated_usage = ((current->ia_size + defrag->tier_conf.block_size - 1) / - defrag->tier_conf.block_size) + - defrag->tier_conf.blocks_used; - - /* test if the estimated block usage goes above HI watermark */ - if (GF_PERCENTAGE(estimated_usage, defrag->tier_conf.blocks_total) >= - defrag->tier_conf.watermark_hi) { - gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, - "Estimated block count consumption on " - "hot tier (%" PRIu64 - ") exceeds hi watermark (%d%%). " - "File will not be promoted.", - estimated_usage, defrag->tier_conf.watermark_hi); - goto err; - } - ret = _gf_true; -err: - return ret; -} - -static int -tier_set_migrate_data(dict_t *migrate_data) -{ - int failed = 1; - - failed = dict_set_str(migrate_data, GF_XATTR_FILE_MIGRATE_KEY, "force"); - if (failed) { - goto bail_out; - } - - /* Flag to suggest the xattr call is from migrator */ - failed = dict_set_str(migrate_data, "from.migrator", "yes"); - if (failed) { - goto bail_out; - } - - /* Flag to suggest its a tiering migration - * The reason for this dic key-value is that - * promotions and demotions are multithreaded - * so the original frame from gf_defrag_start() - * is not carried. A new frame will be created when - * we do syncop_setxattr(). This does not have the - * frame->root->pid of the original frame. So we pass - * this dic key-value when we do syncop_setxattr() to do - * data migration and set the frame->root->pid to - * GF_CLIENT_PID_TIER_DEFRAG in dht_setxattr() just before - * calling dht_start_rebalance_task() */ - failed = dict_set_str(migrate_data, TIERING_MIGRATION_KEY, "yes"); - if (failed) { - goto bail_out; - } - - failed = 0; - -bail_out: - return failed; -} - -static char * -tier_get_parent_path(xlator_t *this, loc_t *p_loc, struct iatt *par_stbuf, - int *per_link_status) -{ - int ret = -1; - char *parent_path = NULL; - dict_t *xdata_request = NULL; - dict_t *xdata_response = NULL; - - xdata_request = dict_new(); - if (!xdata_request) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Failed to create xdata_request dict"); - goto err; - } - ret = dict_set_int32(xdata_request, GET_ANCESTRY_PATH_KEY, 42); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Failed to set value to dict : key %s \n", - GET_ANCESTRY_PATH_KEY); - goto err; - } - - ret = syncop_lookup(this, p_loc, par_stbuf, NULL, xdata_request, - &xdata_response); - /* When the parent gfid is a stale entry, the lookup - * will fail and stop the demotion process. - * The parent gfid can be stale when a huge folder is - * deleted while the files within it are being migrated - */ - if (ret == -ESTALE) { - gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_STALE_LOOKUP, - "Stale entry in parent lookup for %s", uuid_utoa(p_loc->gfid)); - *per_link_status = 1; - goto err; - } else if (ret) { - gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LOG_TIER_ERROR, - "Error in parent lookup for %s", uuid_utoa(p_loc->gfid)); - *per_link_status = -1; - goto err; - } - ret = dict_get_str(xdata_response, GET_ANCESTRY_PATH_KEY, &parent_path); - if (ret || !parent_path) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Failed to get parent path for %s", uuid_utoa(p_loc->gfid)); - *per_link_status = -1; - goto err; - } - -err: - if (xdata_request) { - dict_unref(xdata_request); - } - - if (xdata_response) { - dict_unref(xdata_response); - xdata_response = NULL; - } - - return parent_path; -} - -static int -tier_get_file_name_and_path(xlator_t *this, uuid_t gfid, - gfdb_link_info_t *link_info, - char const *parent_path, loc_t *loc, - int *per_link_status) -{ - int ret = -1; - - loc->name = gf_strdup(link_info->file_name); - if (!loc->name) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Memory " - "allocation failed for %s", - uuid_utoa(gfid)); - *per_link_status = -1; - goto err; - } - ret = gf_asprintf((char **)&(loc->path), "%s/%s", parent_path, loc->name); - if (ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Failed to " - "construct file path for %s %s\n", - parent_path, loc->name); - *per_link_status = -1; - goto err; - } - - ret = 0; - -err: - return ret; -} - -static int -tier_lookup_file(xlator_t *this, loc_t *p_loc, loc_t *loc, struct iatt *current, - int *per_link_status) -{ - int ret = -1; - - ret = syncop_lookup(this, loc, current, NULL, NULL, NULL); - - /* The file may be deleted even when the parent - * is available and the lookup will - * return a stale entry which would stop the - * migration. so if its a stale entry, then skip - * the file and keep migrating. - */ - if (ret == -ESTALE) { - gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_STALE_LOOKUP, - "Stale lookup for %s", uuid_utoa(p_loc->gfid)); - *per_link_status = 1; - goto err; - } else if (ret) { - gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LOG_TIER_ERROR, - "Failed to " - "lookup file %s\n", - loc->name); - *per_link_status = -1; - goto err; - } - ret = 0; - -err: - return ret; -} - -static gf_boolean_t -tier_is_file_already_at_destination(xlator_t *src_subvol, - query_cbk_args_t *query_cbk_args, - dht_conf_t *conf, int *per_link_status) -{ - gf_boolean_t at_destination = _gf_true; - - if (src_subvol == NULL) { - *per_link_status = 1; - goto err; - } - if (query_cbk_args->is_promotion && src_subvol == conf->subvolumes[1]) { - *per_link_status = 1; - goto err; - } - - if (!query_cbk_args->is_promotion && src_subvol == conf->subvolumes[0]) { - *per_link_status = 1; - goto err; - } - at_destination = _gf_false; - -err: - return at_destination; -} - -static void -tier_update_migration_counters(query_cbk_args_t *query_cbk_args, - gf_defrag_info_t *defrag, - uint64_t *total_migrated_bytes, int *total_files) -{ - if (query_cbk_args->is_promotion) { - defrag->total_files_promoted++; - *total_migrated_bytes += defrag->tier_conf.st_last_promoted_size; - pthread_mutex_lock(&dm_stat_mutex); - defrag->tier_conf.blocks_used += defrag->tier_conf - .st_last_promoted_size; - pthread_mutex_unlock(&dm_stat_mutex); - } else { - defrag->total_files_demoted++; - *total_migrated_bytes += defrag->tier_conf.st_last_demoted_size; - pthread_mutex_lock(&dm_stat_mutex); - defrag->tier_conf.blocks_used -= defrag->tier_conf.st_last_demoted_size; - pthread_mutex_unlock(&dm_stat_mutex); - } - if (defrag->tier_conf.blocks_total) { - pthread_mutex_lock(&dm_stat_mutex); - defrag->tier_conf.percent_full = GF_PERCENTAGE( - defrag->tier_conf.blocks_used, defrag->tier_conf.blocks_total); - pthread_mutex_unlock(&dm_stat_mutex); - } - - (*total_files)++; -} - -static int -tier_migrate_link(xlator_t *this, dht_conf_t *conf, uuid_t gfid, - gfdb_link_info_t *link_info, gf_defrag_info_t *defrag, - query_cbk_args_t *query_cbk_args, dict_t *migrate_data, - int *per_link_status, int *total_files, - uint64_t *total_migrated_bytes) -{ - int ret = -1; - struct iatt current = { - 0, - }; - struct iatt par_stbuf = { - 0, - }; - loc_t p_loc = { - 0, - }; - loc_t loc = { - 0, - }; - xlator_t *src_subvol = NULL; - inode_t *linked_inode = NULL; - char *parent_path = NULL; - - /* Lookup for parent and get the path of parent */ - gf_uuid_copy(p_loc.gfid, link_info->pargfid); - p_loc.inode = inode_new(defrag->root_inode->table); - if (!p_loc.inode) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Failed to create reference to inode" - " for %s", - uuid_utoa(p_loc.gfid)); - - *per_link_status = -1; - goto err; - } - - parent_path = tier_get_parent_path(this, &p_loc, &par_stbuf, - per_link_status); - if (!parent_path) { - goto err; - } - - linked_inode = inode_link(p_loc.inode, NULL, NULL, &par_stbuf); - inode_unref(p_loc.inode); - p_loc.inode = linked_inode; - - /* Preparing File Inode */ - gf_uuid_copy(loc.gfid, gfid); - loc.inode = inode_new(defrag->root_inode->table); - gf_uuid_copy(loc.pargfid, link_info->pargfid); - loc.parent = inode_ref(p_loc.inode); - - /* Get filename and Construct file path */ - if (tier_get_file_name_and_path(this, gfid, link_info, parent_path, &loc, - per_link_status) != 0) { - goto err; - } - gf_uuid_copy(loc.parent->gfid, link_info->pargfid); - - /* lookup file inode */ - if (tier_lookup_file(this, &p_loc, &loc, ¤t, per_link_status) != 0) { - goto err; - } - - if (query_cbk_args->is_promotion) { - if (!tier_can_promote_file(this, link_info->file_name, ¤t, - defrag)) { - *per_link_status = 1; - goto err; - } - } - - linked_inode = inode_link(loc.inode, NULL, NULL, ¤t); - inode_unref(loc.inode); - loc.inode = linked_inode; - - /* - * Do not promote/demote if file already is where it - * should be. It means another brick moved the file - * so is not an error. So we set per_link_status = 1 - * so that we ignore counting this. - */ - src_subvol = dht_subvol_get_cached(this, loc.inode); - - if (tier_is_file_already_at_destination(src_subvol, query_cbk_args, conf, - per_link_status)) { - goto err; - } - - gf_msg_debug(this->name, 0, "Tier %s: src_subvol %s file %s", - (query_cbk_args->is_promotion ? "promote" : "demote"), - src_subvol->name, loc.path); - - ret = tier_check_same_node(this, &loc, defrag); - if (ret != 0) { - if (ret < 0) { - *per_link_status = -1; - goto err; - } - ret = 0; - /* By setting per_link_status to 1 we are - * ignoring this status and will not be counting - * this file for migration */ - *per_link_status = 1; - goto err; - } - - gf_uuid_copy(loc.gfid, loc.inode->gfid); - - if (gf_defrag_get_pause_state(&defrag->tier_conf) != TIER_RUNNING) { - gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, - "Tiering paused. " - "Exiting tier_migrate_link"); - goto err; - } - - ret = tier_migrate(this, query_cbk_args->is_promotion, migrate_data, &loc, - &defrag->tier_conf); - - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LOG_TIER_ERROR, - "Failed to " - "migrate %s ", - loc.path); - *per_link_status = -1; - goto err; - } - - tier_update_migration_counters(query_cbk_args, defrag, total_migrated_bytes, - total_files); - - ret = 0; - -err: - GF_FREE((char *)loc.name); - loc.name = NULL; - loc_wipe(&loc); - loc_wipe(&p_loc); - - if ((*total_files >= defrag->tier_conf.max_migrate_files) || - (*total_migrated_bytes > defrag->tier_conf.max_migrate_bytes)) { - gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, - "Reached cycle migration limit." - "migrated bytes %" PRId64 " files %d", - *total_migrated_bytes, *total_files); - ret = -1; - } - - return ret; -} - -static int -tier_migrate_using_query_file(void *_args) -{ - int ret = -1; - query_cbk_args_t *query_cbk_args = (query_cbk_args_t *)_args; - xlator_t *this = NULL; - gf_defrag_info_t *defrag = NULL; - gfdb_query_record_t *query_record = NULL; - gfdb_link_info_t *link_info = NULL; - dict_t *migrate_data = NULL; - /* - * per_file_status and per_link_status - * 0 : success - * -1 : failure - * 1 : ignore the status and don't count for migration - * */ - int per_file_status = 0; - int per_link_status = 0; - int total_status = 0; - dht_conf_t *conf = NULL; - uint64_t total_migrated_bytes = 0; - int total_files = 0; - loc_t root_loc = {0}; - gfdb_time_t start_time = {0}; - gfdb_time_t current_time = {0}; - int total_time = 0; - int max_time = 0; - gf_boolean_t emergency_demote_mode = _gf_false; - - GF_VALIDATE_OR_GOTO("tier", query_cbk_args, out); - GF_VALIDATE_OR_GOTO("tier", query_cbk_args->this, out); - this = query_cbk_args->this; - GF_VALIDATE_OR_GOTO(this->name, query_cbk_args->defrag, out); - GF_VALIDATE_OR_GOTO(this->name, query_cbk_args->qfile_array, out); - GF_VALIDATE_OR_GOTO(this->name, this->private, out); - - conf = this->private; - - defrag = query_cbk_args->defrag; - migrate_data = dict_new(); - if (!migrate_data) - goto out; - - emergency_demote_mode = (!query_cbk_args->is_promotion && - is_hot_tier_full(&defrag->tier_conf)); - - if (tier_set_migrate_data(migrate_data) != 0) { - goto out; - } - - dht_build_root_loc(defrag->root_inode, &root_loc); - - ret = gettimeofday(&start_time, NULL); - if (query_cbk_args->is_promotion) { - max_time = defrag->tier_conf.tier_promote_frequency; - } else { - max_time = defrag->tier_conf.tier_demote_frequency; - } - - /* Per file */ - while ((ret = read_query_record_list(query_cbk_args->qfile_array, - &query_record)) != 0) { - if (ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Failed to fetch query record " - "from query file"); - goto out; - } - - if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { - ret = -1; - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Exiting tier migration as" - "defrag status is not started"); - goto out; - } - - ret = gettimeofday(¤t_time, NULL); - if (ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Could not get current time."); - goto out; - } - - total_time = current_time.tv_sec - start_time.tv_sec; - if (total_time > max_time) { - gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, - "Max cycle time reached. Exiting migration."); - goto out; - } - - per_file_status = 0; - per_link_status = 0; - - if (gf_defrag_get_pause_state(&defrag->tier_conf) != TIER_RUNNING) { - gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, - "Tiering paused. " - "Exiting tier_migrate_using_query_file"); - break; - } - - if (defrag->tier_conf.mode == TIER_MODE_WM) { - ret = tier_get_fs_stat(this, &root_loc); - if (ret != 0) { - gfdb_methods.gfdb_query_record_free(query_record); - query_record = NULL; - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS, - "tier_get_fs_stat() FAILED ... " - "skipping file migrations until next cycle"); - break; - } - - if (!tier_do_migration(this, query_cbk_args->is_promotion)) { - gfdb_methods.gfdb_query_record_free(query_record); - query_record = NULL; - - /* We have crossed the high watermark. Stop processing - * files if this is a promotion cycle so demotion gets - * a chance to start if not already running*/ - - if (query_cbk_args->is_promotion && - is_hot_tier_full(&defrag->tier_conf)) { - gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, - "High watermark crossed during " - "promotion. Exiting " - "tier_migrate_using_query_file"); - break; - } - continue; - } - } - - per_link_status = 0; - - /* For now we only support single link migration. And we will - * ignore other hard links in the link info list of query record - * TODO: Multiple hard links migration */ - if (!list_empty(&query_record->link_list)) { - link_info = list_first_entry(&query_record->link_list, - gfdb_link_info_t, list); - } - if (link_info != NULL) { - if (tier_migrate_link(this, conf, query_record->gfid, link_info, - defrag, query_cbk_args, migrate_data, - &per_link_status, &total_files, - &total_migrated_bytes) != 0) { - gf_msg( - this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, - "%s failed for %s(gfid:%s)", - (query_cbk_args->is_promotion ? "Promotion" : "Demotion"), - link_info->file_name, uuid_utoa(query_record->gfid)); - } - } - per_file_status = per_link_status; - - if (per_file_status < 0) { /* Failure */ - pthread_mutex_lock(&dm_stat_mutex); - defrag->total_failures++; - pthread_mutex_unlock(&dm_stat_mutex); - } else if (per_file_status == 0) { /* Success */ - pthread_mutex_lock(&dm_stat_mutex); - defrag->total_files++; - pthread_mutex_unlock(&dm_stat_mutex); - } else if (per_file_status == 1) { /* Ignore */ - per_file_status = 0; - /* Since this attempt was ignored we - * decrement the lookup count*/ - pthread_mutex_lock(&dm_stat_mutex); - defrag->num_files_lookedup--; - pthread_mutex_unlock(&dm_stat_mutex); - } - total_status = total_status + per_file_status; - per_link_status = 0; - per_file_status = 0; - - gfdb_methods.gfdb_query_record_free(query_record); - query_record = NULL; - - /* If we are demoting and the entry watermark was HI, then - * we are done with emergency demotions if the current - * watermark has fallen below hi-watermark level - */ - if (emergency_demote_mode) { - if (tier_check_watermark(this) == 0) { - if (!is_hot_tier_full(&defrag->tier_conf)) { - break; - } - } - } - } - -out: - if (migrate_data) - dict_unref(migrate_data); - - gfdb_methods.gfdb_query_record_free(query_record); - query_record = NULL; - - return total_status; -} - -/* This is the call back function per record/file from data base */ -static int -tier_gf_query_callback(gfdb_query_record_t *gfdb_query_record, void *_args) -{ - int ret = -1; - query_cbk_args_t *query_cbk_args = _args; - - GF_VALIDATE_OR_GOTO("tier", query_cbk_args, out); - GF_VALIDATE_OR_GOTO("tier", query_cbk_args->defrag, out); - GF_VALIDATE_OR_GOTO("tier", (query_cbk_args->query_fd > 0), out); - - ret = gfdb_methods.gfdb_write_query_record(query_cbk_args->query_fd, - gfdb_query_record); - if (ret) { - gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Failed writing query record to query file"); - goto out; - } - - pthread_mutex_lock(&dm_stat_mutex); - query_cbk_args->defrag->num_files_lookedup++; - pthread_mutex_unlock(&dm_stat_mutex); - - ret = 0; -out: - return ret; -} - -/* Create query file in tier process */ -static int -tier_process_self_query(tier_brick_list_t *local_brick, void *args) -{ - int ret = -1; - char *db_path = NULL; - query_cbk_args_t *query_cbk_args = NULL; - xlator_t *this = NULL; - gfdb_conn_node_t *conn_node = NULL; - dict_t *params_dict = NULL; - dict_t *ctr_ipc_dict = NULL; - gfdb_brick_info_t *gfdb_brick_info = args; - - /*Init of all the essentials*/ - GF_VALIDATE_OR_GOTO("tier", gfdb_brick_info, out); - query_cbk_args = gfdb_brick_info->_query_cbk_args; - - GF_VALIDATE_OR_GOTO("tier", query_cbk_args->this, out); - this = query_cbk_args->this; - - GF_VALIDATE_OR_GOTO(this->name, gfdb_brick_info->_query_cbk_args, out); - - GF_VALIDATE_OR_GOTO(this->name, local_brick, out); - - GF_VALIDATE_OR_GOTO(this->name, local_brick->xlator, out); - - GF_VALIDATE_OR_GOTO(this->name, local_brick->brick_db_path, out); - - db_path = local_brick->brick_db_path; - - /*Preparing DB parameters before init_db i.e getting db connection*/ - params_dict = dict_new(); - if (!params_dict) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "DB Params cannot initialized"); - goto out; - } - SET_DB_PARAM_TO_DICT(this->name, params_dict, - (char *)gfdb_methods.get_db_path_key(), db_path, ret, - out); - - /*Get the db connection*/ - conn_node = gfdb_methods.init_db((void *)params_dict, dht_tier_db_type); - if (!conn_node) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "FATAL: Failed initializing db operations"); - goto out; - } - - /* Query for eligible files from db */ - query_cbk_args->query_fd = open(local_brick->qfile_path, - O_WRONLY | O_CREAT | O_APPEND, - S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); - if (query_cbk_args->query_fd < 0) { - gf_msg(this->name, GF_LOG_ERROR, errno, DHT_MSG_LOG_TIER_ERROR, - "Failed to open query file %s", local_brick->qfile_path); - goto out; - } - if (!gfdb_brick_info->_gfdb_promote) { - if (query_cbk_args->defrag->tier_conf.watermark_last == TIER_WM_HI) { - /* emergency demotion mode */ - ret = gfdb_methods.find_all( - conn_node, tier_gf_query_callback, (void *)query_cbk_args, - query_cbk_args->defrag->tier_conf.query_limit); - } else { - if (query_cbk_args->defrag->write_freq_threshold == 0 && - query_cbk_args->defrag->read_freq_threshold == 0) { - ret = gfdb_methods.find_unchanged_for_time( - conn_node, tier_gf_query_callback, (void *)query_cbk_args, - gfdb_brick_info->time_stamp); - } else { - ret = gfdb_methods.find_unchanged_for_time_freq( - conn_node, tier_gf_query_callback, (void *)query_cbk_args, - gfdb_brick_info->time_stamp, - query_cbk_args->defrag->write_freq_threshold, - query_cbk_args->defrag->read_freq_threshold, _gf_false); - } - } - } else { - if (query_cbk_args->defrag->write_freq_threshold == 0 && - query_cbk_args->defrag->read_freq_threshold == 0) { - ret = gfdb_methods.find_recently_changed_files( - conn_node, tier_gf_query_callback, (void *)query_cbk_args, - gfdb_brick_info->time_stamp); - } else { - ret = gfdb_methods.find_recently_changed_files_freq( - conn_node, tier_gf_query_callback, (void *)query_cbk_args, - gfdb_brick_info->time_stamp, - query_cbk_args->defrag->write_freq_threshold, - query_cbk_args->defrag->read_freq_threshold, _gf_false); - } - } - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "FATAL: query from db failed"); - goto out; - } - - /*Clear the heat on the DB entries*/ - /*Preparing ctr_ipc_dict*/ - ctr_ipc_dict = dict_new(); - if (!ctr_ipc_dict) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "ctr_ipc_dict cannot initialized"); - goto out; - } - - SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_dict, GFDB_IPC_CTR_KEY, - GFDB_IPC_CTR_CLEAR_OPS, ret, out); - - ret = syncop_ipc(local_brick->xlator, GF_IPC_TARGET_CTR, ctr_ipc_dict, - NULL); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Failed clearing the heat " - "on db %s error %d", - local_brick->brick_db_path, ret); - goto out; - } - - ret = 0; -out: - if (params_dict) { - dict_unref(params_dict); - params_dict = NULL; - } - - if (ctr_ipc_dict) { - dict_unref(ctr_ipc_dict); - ctr_ipc_dict = NULL; - } - - if (query_cbk_args && query_cbk_args->query_fd >= 0) { - sys_close(query_cbk_args->query_fd); - query_cbk_args->query_fd = -1; - } - gfdb_methods.fini_db(conn_node); - - return ret; -} - -/*Ask CTR to create the query file*/ -static int -tier_process_ctr_query(tier_brick_list_t *local_brick, void *args) -{ - int ret = -1; - query_cbk_args_t *query_cbk_args = NULL; - xlator_t *this = NULL; - dict_t *ctr_ipc_in_dict = NULL; - dict_t *ctr_ipc_out_dict = NULL; - gfdb_brick_info_t *gfdb_brick_info = args; - gfdb_ipc_ctr_params_t *ipc_ctr_params = NULL; - int count = 0; - - /*Init of all the essentials*/ - GF_VALIDATE_OR_GOTO("tier", gfdb_brick_info, out); - query_cbk_args = gfdb_brick_info->_query_cbk_args; - - GF_VALIDATE_OR_GOTO("tier", query_cbk_args->this, out); - this = query_cbk_args->this; - - GF_VALIDATE_OR_GOTO(this->name, gfdb_brick_info->_query_cbk_args, out); - - GF_VALIDATE_OR_GOTO(this->name, local_brick, out); - - GF_VALIDATE_OR_GOTO(this->name, local_brick->xlator, out); - - GF_VALIDATE_OR_GOTO(this->name, local_brick->brick_db_path, out); - - /*Preparing ctr_ipc_in_dict*/ - ctr_ipc_in_dict = dict_new(); - if (!ctr_ipc_in_dict) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "ctr_ipc_in_dict cannot initialized"); - goto out; - } - - ipc_ctr_params = GF_CALLOC(1, sizeof(gfdb_ipc_ctr_params_t), - gf_tier_mt_ipc_ctr_params_t); - if (!ipc_ctr_params) { - goto out; - } - - /* set all the query params*/ - ipc_ctr_params->is_promote = gfdb_brick_info->_gfdb_promote; - - ipc_ctr_params->write_freq_threshold = query_cbk_args->defrag - ->write_freq_threshold; - - ipc_ctr_params->read_freq_threshold = query_cbk_args->defrag - ->read_freq_threshold; - - ipc_ctr_params->query_limit = query_cbk_args->defrag->tier_conf.query_limit; - - ipc_ctr_params->emergency_demote = (!gfdb_brick_info->_gfdb_promote && - query_cbk_args->defrag->tier_conf - .watermark_last == TIER_WM_HI); - - memcpy(&ipc_ctr_params->time_stamp, gfdb_brick_info->time_stamp, - sizeof(gfdb_time_t)); - - SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_in_dict, GFDB_IPC_CTR_KEY, - GFDB_IPC_CTR_QUERY_OPS, ret, out); - - SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_in_dict, - GFDB_IPC_CTR_GET_QFILE_PATH, local_brick->qfile_path, - ret, out); - - ret = dict_set_bin(ctr_ipc_in_dict, GFDB_IPC_CTR_GET_QUERY_PARAMS, - ipc_ctr_params, sizeof(*ipc_ctr_params)); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED, - "Failed setting %s to params dictionary", - GFDB_IPC_CTR_GET_QUERY_PARAMS); - GF_FREE(ipc_ctr_params); - goto out; - } - ipc_ctr_params = NULL; - - ret = syncop_ipc(local_brick->xlator, GF_IPC_TARGET_CTR, ctr_ipc_in_dict, - &ctr_ipc_out_dict); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_IPC_TIER_ERROR, - "Failed query on %s ret %d", local_brick->brick_db_path, ret); - goto out; - } - - ret = dict_get_int32(ctr_ipc_out_dict, GFDB_IPC_CTR_RET_QUERY_COUNT, - &count); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Failed getting count " - "of records on %s", - local_brick->brick_db_path); - goto out; - } - - if (count < 0) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Failed query on %s", local_brick->brick_db_path); - ret = -1; - goto out; - } - - pthread_mutex_lock(&dm_stat_mutex); - query_cbk_args->defrag->num_files_lookedup = count; - pthread_mutex_unlock(&dm_stat_mutex); - - ret = 0; -out: - - if (ctr_ipc_in_dict) { - dict_unref(ctr_ipc_in_dict); - ctr_ipc_in_dict = NULL; - } - - if (ctr_ipc_out_dict) { - dict_unref(ctr_ipc_out_dict); - ctr_ipc_out_dict = NULL; - } - - GF_FREE(ipc_ctr_params); - - return ret; -} - -/* This is the call back function for each brick from hot/cold bricklist - * It picks up each bricks db and queries for eligible files for migration. - * The list of eligible files are populated in appropriate query files*/ -static int -tier_process_brick(tier_brick_list_t *local_brick, void *args) -{ - int ret = -1; - dict_t *ctr_ipc_in_dict = NULL; - dict_t *ctr_ipc_out_dict = NULL; - char *strval = NULL; - - GF_VALIDATE_OR_GOTO("tier", local_brick, out); - - GF_VALIDATE_OR_GOTO("tier", local_brick->xlator, out); - - if (dht_tier_db_type == GFDB_SQLITE3) { - /*Preparing ctr_ipc_in_dict*/ - ctr_ipc_in_dict = dict_new(); - if (!ctr_ipc_in_dict) { - gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "ctr_ipc_in_dict cannot initialized"); - goto out; - } - - ret = dict_set_str(ctr_ipc_in_dict, GFDB_IPC_CTR_KEY, - GFDB_IPC_CTR_GET_DB_PARAM_OPS); - if (ret) { - gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED, - "Failed to set %s " - "to params dictionary", - GFDB_IPC_CTR_KEY); - goto out; - } - - ret = dict_set_str(ctr_ipc_in_dict, GFDB_IPC_CTR_GET_DB_PARAM_OPS, ""); - if (ret) { - gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED, - "Failed to set %s " - "to params dictionary", - GFDB_IPC_CTR_GET_DB_PARAM_OPS); - goto out; - } - - ret = dict_set_str(ctr_ipc_in_dict, GFDB_IPC_CTR_GET_DB_KEY, - "journal_mode"); - if (ret) { - gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED, - "Failed to set %s " - "to params dictionary", - GFDB_IPC_CTR_GET_DB_KEY); - goto out; - } - - ret = syncop_ipc(local_brick->xlator, GF_IPC_TARGET_CTR, - ctr_ipc_in_dict, &ctr_ipc_out_dict); - if (ret || ctr_ipc_out_dict == NULL) { - gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Failed to get " - "journal_mode of sql db %s", - local_brick->brick_db_path); - goto out; - } - - ret = dict_get_str(ctr_ipc_out_dict, "journal_mode", &strval); - if (ret) { - gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_GET_PARAM_FAILED, - "Failed to get %s " - "from params dictionary" - "journal_mode", - strval); - goto out; - } - - if (strval && (strncmp(strval, "wal", SLEN("wal")) == 0)) { - ret = tier_process_self_query(local_brick, args); - if (ret) { - goto out; - } - } else { - ret = tier_process_ctr_query(local_brick, args); - if (ret) { - goto out; - } - } - ret = 0; - - } else { - ret = tier_process_self_query(local_brick, args); - if (ret) { - goto out; - } - } - - ret = 0; -out: - if (ctr_ipc_in_dict) - dict_unref(ctr_ipc_in_dict); - - if (ctr_ipc_out_dict) - dict_unref(ctr_ipc_out_dict); - - return ret; -} - -static int -tier_build_migration_qfile(migration_args_t *args, - query_cbk_args_t *query_cbk_args, - gf_boolean_t is_promotion) -{ - gfdb_time_t current_time; - gfdb_brick_info_t gfdb_brick_info; - gfdb_time_t time_in_past; - int ret = -1; - tier_brick_list_t *local_brick = NULL; - int i = 0; - time_in_past.tv_sec = args->freq_time; - time_in_past.tv_usec = 0; - - ret = gettimeofday(¤t_time, NULL); - if (ret == -1) { - gf_msg(args->this->name, GF_LOG_ERROR, errno, - DHT_MSG_SYS_CALL_GET_TIME_FAILED, "Failed to get current time"); - goto out; - } - time_in_past.tv_sec = current_time.tv_sec - time_in_past.tv_sec; - - /* The migration daemon may run a varying numberof usec after the */ - /* sleep call triggers. A file may be registered in CTR some number */ - /* of usec X after the daemon started and missed in the subsequent */ - /* cycle if the daemon starts Y usec after the period in seconds */ - /* where Y>X. Normalize away this problem by always setting usec */ - /* to 0. */ - time_in_past.tv_usec = 0; - - gfdb_brick_info.time_stamp = &time_in_past; - gfdb_brick_info._gfdb_promote = is_promotion; - gfdb_brick_info._query_cbk_args = query_cbk_args; - - list_for_each_entry(local_brick, args->brick_list, list) - { - /* Construct query file path for this brick - * i.e - * /var/run/gluster/xlator_name/ - * {promote/demote}-brickname-indexinbricklist - * So that no two query files will have same path even - * bricks have the same name - * */ - snprintf(local_brick->qfile_path, PATH_MAX, "%s-%s-%d", - GET_QFILE_PATH(gfdb_brick_info._gfdb_promote), - local_brick->brick_name, i); - - /* Delete any old query files for this brick */ - sys_unlink(local_brick->qfile_path); - - ret = tier_process_brick(local_brick, &gfdb_brick_info); - if (ret) { - gf_msg(args->this->name, GF_LOG_ERROR, 0, - DHT_MSG_BRICK_QUERY_FAILED, "Brick %s query failed\n", - local_brick->brick_db_path); - } - i++; - } - ret = 0; -out: - return ret; -} - -static int -tier_migrate_files_using_qfile(migration_args_t *comp, - query_cbk_args_t *query_cbk_args) -{ - int ret = -1; - tier_brick_list_t *local_brick = NULL; - tier_brick_list_t *temp = NULL; - gfdb_time_t current_time = { - 0, - }; - ssize_t qfile_array_size = 0; - int count = 0; - int temp_fd = 0; - gf_tier_conf_t *tier_conf = NULL; - - tier_conf = &(query_cbk_args->defrag->tier_conf); - - /* Time for error query files */ - gettimeofday(¤t_time, NULL); - - /* Build the qfile list */ - list_for_each_entry_safe(local_brick, temp, comp->brick_list, list) - { - qfile_array_size++; - } - query_cbk_args->qfile_array = qfile_array_new(qfile_array_size); - if (!query_cbk_args->qfile_array) { - gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Failed to create new " - "qfile_array"); - goto out; - } - - /*Open all qfiles*/ - count = 0; - query_cbk_args->qfile_array->exhausted_count = 0; - list_for_each_entry_safe(local_brick, temp, comp->brick_list, list) - { - temp_fd = query_cbk_args->qfile_array->fd_array[count]; - temp_fd = open(local_brick->qfile_path, O_RDONLY, - S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); - if (temp_fd < 0) { - gf_msg("tier", GF_LOG_ERROR, errno, DHT_MSG_LOG_TIER_ERROR, - "Failed to open " - "%s to the query file", - local_brick->qfile_path); - query_cbk_args->qfile_array->exhausted_count++; - } - query_cbk_args->qfile_array->fd_array[count] = temp_fd; - count++; - } - - /* Moving the query file index to the next, so that we won't the same - * query file every cycle as the first one */ - query_cbk_args->qfile_array - ->next_index = (query_cbk_args->is_promotion) - ? tier_conf->last_promote_qfile_index - : tier_conf->last_demote_qfile_index; - shift_next_index(query_cbk_args->qfile_array); - if (query_cbk_args->is_promotion) { - tier_conf->last_promote_qfile_index = query_cbk_args->qfile_array - ->next_index; - } else { - tier_conf->last_demote_qfile_index = query_cbk_args->qfile_array - ->next_index; - } - - /* Migrate files using query file list */ - ret = tier_migrate_using_query_file((void *)query_cbk_args); -out: - qfile_array_free(query_cbk_args->qfile_array); - - /* If there is an error rename all the query files to .err files - * with a timestamp for better debugging */ - if (ret) { - struct tm tm = { - 0, - }; - char time_str[128] = { - 0, - }; - char query_file_path_err[PATH_MAX] = { - 0, - }; - int32_t len = 0; - - /* Time format for error query files */ - gmtime_r(¤t_time.tv_sec, &tm); - strftime(time_str, sizeof(time_str), "%F-%T", &tm); - - list_for_each_entry_safe(local_brick, temp, comp->brick_list, list) - { - /* rename error qfile*/ - len = snprintf(query_file_path_err, sizeof(query_file_path_err), - "%s-%s.err", local_brick->qfile_path, time_str); - if ((len >= 0) && (len < sizeof(query_file_path_err))) { - if (sys_rename(local_brick->qfile_path, query_file_path_err) == - -1) - gf_msg_debug("tier", 0, - "rename " - "failed"); - } - } - } - - query_cbk_args->qfile_array = NULL; - - return ret; -} - -int -tier_demote(migration_args_t *demotion_args) -{ - query_cbk_args_t query_cbk_args; - int ret = -1; - - GF_VALIDATE_OR_GOTO("tier", demotion_args, out); - GF_VALIDATE_OR_GOTO("tier", demotion_args->this, out); - GF_VALIDATE_OR_GOTO(demotion_args->this->name, demotion_args->brick_list, - out); - GF_VALIDATE_OR_GOTO(demotion_args->this->name, demotion_args->defrag, out); - - THIS = demotion_args->this; - - query_cbk_args.this = demotion_args->this; - query_cbk_args.defrag = demotion_args->defrag; - query_cbk_args.is_promotion = 0; - - /*Build the query file using bricklist*/ - ret = tier_build_migration_qfile(demotion_args, &query_cbk_args, _gf_false); - if (ret) - goto out; - - /* Migrate files using the query file */ - ret = tier_migrate_files_using_qfile(demotion_args, &query_cbk_args); - if (ret) - goto out; - -out: - demotion_args->return_value = ret; - return ret; -} - -int -tier_promote(migration_args_t *promotion_args) -{ - int ret = -1; - query_cbk_args_t query_cbk_args; - - GF_VALIDATE_OR_GOTO("tier", promotion_args->this, out); - GF_VALIDATE_OR_GOTO(promotion_args->this->name, promotion_args->brick_list, - out); - GF_VALIDATE_OR_GOTO(promotion_args->this->name, promotion_args->defrag, - out); - - THIS = promotion_args->this; - - query_cbk_args.this = promotion_args->this; - query_cbk_args.defrag = promotion_args->defrag; - query_cbk_args.is_promotion = 1; - - /*Build the query file using bricklist*/ - ret = tier_build_migration_qfile(promotion_args, &query_cbk_args, _gf_true); - if (ret) - goto out; - - /* Migrate files using the query file */ - ret = tier_migrate_files_using_qfile(promotion_args, &query_cbk_args); - if (ret) - goto out; - -out: - promotion_args->return_value = ret; - return ret; -} - -/* - * Command the CTR on a brick to compact the local database using an IPC - */ -static int -tier_process_self_compact(tier_brick_list_t *local_brick, void *args) -{ - int ret = -1; - char *db_path = NULL; - query_cbk_args_t *query_cbk_args = NULL; - xlator_t *this = NULL; - gfdb_conn_node_t *conn_node = NULL; - dict_t *params_dict = NULL; - dict_t *ctr_ipc_dict = NULL; - gfdb_brick_info_t *gfdb_brick_info = args; - - /*Init of all the essentials*/ - GF_VALIDATE_OR_GOTO("tier", gfdb_brick_info, out); - query_cbk_args = gfdb_brick_info->_query_cbk_args; - - GF_VALIDATE_OR_GOTO("tier", query_cbk_args->this, out); - this = query_cbk_args->this; - - GF_VALIDATE_OR_GOTO(this->name, gfdb_brick_info->_query_cbk_args, out); - - GF_VALIDATE_OR_GOTO(this->name, local_brick, out); - - GF_VALIDATE_OR_GOTO(this->name, local_brick->xlator, out); - - GF_VALIDATE_OR_GOTO(this->name, local_brick->brick_db_path, out); - - db_path = local_brick->brick_db_path; - - /*Preparing DB parameters before init_db i.e getting db connection*/ - params_dict = dict_new(); - if (!params_dict) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "DB Params cannot initialized"); - goto out; - } - SET_DB_PARAM_TO_DICT(this->name, params_dict, - (char *)gfdb_methods.get_db_path_key(), db_path, ret, - out); - - /*Get the db connection*/ - conn_node = gfdb_methods.init_db((void *)params_dict, dht_tier_db_type); - if (!conn_node) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "FATAL: Failed initializing db operations"); - goto out; - } - - ret = 0; - - /*Preparing ctr_ipc_dict*/ - ctr_ipc_dict = dict_new(); - if (!ctr_ipc_dict) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "ctr_ipc_dict cannot initialized"); - goto out; - } - - ret = dict_set_int32(ctr_ipc_dict, "compact_active", - query_cbk_args->defrag->tier_conf.compact_active); - - if (ret) { - gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED, - "Failed to set %s " - "to params dictionary", - "compact_active"); - goto out; - } - - ret = dict_set_int32( - ctr_ipc_dict, "compact_mode_switched", - query_cbk_args->defrag->tier_conf.compact_mode_switched); - - if (ret) { - gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED, - "Failed to set %s " - "to params dictionary", - "compact_mode_switched"); - goto out; - } - - SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_dict, GFDB_IPC_CTR_KEY, - GFDB_IPC_CTR_SET_COMPACT_PRAGMA, ret, out); - - gf_msg(this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS, - "Starting Compaction IPC"); - - ret = syncop_ipc(local_brick->xlator, GF_IPC_TARGET_CTR, ctr_ipc_dict, - NULL); - - gf_msg(this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS, - "Ending Compaction IPC"); - - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Failed compaction " - "on db %s error %d", - local_brick->brick_db_path, ret); - goto out; - } - - gf_msg(this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS, - "SUCCESS: %s Compaction", local_brick->brick_name); - - ret = 0; -out: - if (params_dict) { - dict_unref(params_dict); - params_dict = NULL; - } - - if (ctr_ipc_dict) { - dict_unref(ctr_ipc_dict); - ctr_ipc_dict = NULL; - } - - gfdb_methods.fini_db(conn_node); - - return ret; -} - -/* - * This is the call back function for each brick from hot/cold bricklist. - * It determines the database type on each brick and calls the corresponding - * function to prepare the compaction IPC. - */ -static int -tier_compact_db_brick(tier_brick_list_t *local_brick, void *args) -{ - int ret = -1; - - GF_VALIDATE_OR_GOTO("tier", local_brick, out); - - GF_VALIDATE_OR_GOTO("tier", local_brick->xlator, out); - - ret = tier_process_self_compact(local_brick, args); - if (ret) { - gf_msg("tier", GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, - "Brick %s did not compact", local_brick->brick_name); - goto out; - } - - ret = 0; - -out: - - return ret; -} - -static int -tier_send_compact(migration_args_t *args, query_cbk_args_t *query_cbk_args) -{ - gfdb_time_t current_time; - gfdb_brick_info_t gfdb_brick_info; - gfdb_time_t time_in_past; - int ret = -1; - tier_brick_list_t *local_brick = NULL; - - time_in_past.tv_sec = args->freq_time; - time_in_past.tv_usec = 0; - - ret = gettimeofday(¤t_time, NULL); - if (ret == -1) { - gf_msg(args->this->name, GF_LOG_ERROR, errno, - DHT_MSG_SYS_CALL_GET_TIME_FAILED, "Failed to get current time"); - goto out; - } - time_in_past.tv_sec = current_time.tv_sec - time_in_past.tv_sec; - - /* The migration daemon may run a varying numberof usec after the sleep - call triggers. A file may be registered in CTR some number of usec X - after the daemon started and missed in the subsequent cycle if the - daemon starts Y usec after the period in seconds where Y>X. Normalize - away this problem by always setting usec to 0. */ - time_in_past.tv_usec = 0; - - gfdb_brick_info.time_stamp = &time_in_past; - - /* This is meant to say we are always compacting at this point */ - /* We simply borrow the promotion flag to do this */ - gfdb_brick_info._gfdb_promote = 1; - - gfdb_brick_info._query_cbk_args = query_cbk_args; - - list_for_each_entry(local_brick, args->brick_list, list) - { - gf_msg(args->this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS, - "Start compaction for %s", local_brick->brick_name); - - ret = tier_compact_db_brick(local_brick, &gfdb_brick_info); - if (ret) { - gf_msg(args->this->name, GF_LOG_ERROR, 0, - DHT_MSG_BRICK_QUERY_FAILED, "Brick %s compaction failed\n", - local_brick->brick_db_path); - } - - gf_msg(args->this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS, - "End compaction for %s", local_brick->brick_name); - } - ret = 0; -out: - return ret; -} - -static int -tier_compact(void *args) -{ - int ret = -1; - query_cbk_args_t query_cbk_args; - migration_args_t *compaction_args = args; - - GF_VALIDATE_OR_GOTO("tier", compaction_args->this, out); - GF_VALIDATE_OR_GOTO(compaction_args->this->name, - compaction_args->brick_list, out); - GF_VALIDATE_OR_GOTO(compaction_args->this->name, compaction_args->defrag, - out); - - THIS = compaction_args->this; - - query_cbk_args.this = compaction_args->this; - query_cbk_args.defrag = compaction_args->defrag; - query_cbk_args.is_compaction = 1; - - /* Send the compaction pragma out to all the bricks on the bricklist. */ - /* tier_get_bricklist ensures all bricks on the list are local to */ - /* this node. */ - ret = tier_send_compact(compaction_args, &query_cbk_args); - if (ret) - goto out; - - ret = 0; -out: - compaction_args->return_value = ret; - return ret; -} - -static int -tier_get_bricklist(xlator_t *xl, struct list_head *local_bricklist_head) -{ - xlator_list_t *child = NULL; - char *rv = NULL; - char *rh = NULL; - char *brickname = NULL; - char db_name[PATH_MAX] = ""; - int ret = 0; - tier_brick_list_t *local_brick = NULL; - int32_t len = 0; - - GF_VALIDATE_OR_GOTO("tier", xl, out); - GF_VALIDATE_OR_GOTO("tier", local_bricklist_head, out); - - /* - * This function obtains remote subvolumes and filters out only - * those running on the same node as the tier daemon. - */ - if (strcmp(xl->type, "protocol/client") == 0) { - ret = dict_get_str(xl->options, "remote-host", &rh); - if (ret < 0) - goto out; - - if (gf_is_local_addr(rh)) { - local_brick = GF_CALLOC(1, sizeof(tier_brick_list_t), - gf_tier_mt_bricklist_t); - if (!local_brick) { - goto out; - } - - ret = dict_get_str(xl->options, "remote-subvolume", &rv); - if (ret < 0) - goto out; - - brickname = strrchr(rv, '/') + 1; - snprintf(db_name, sizeof(db_name), "%s.db", brickname); - - local_brick->brick_db_path = GF_MALLOC(PATH_MAX, gf_common_mt_char); - if (!local_brick->brick_db_path) { - gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS, - "Failed to allocate memory for" - " bricklist."); - ret = -1; - goto out; - } - - len = snprintf(local_brick->brick_db_path, PATH_MAX, "%s/%s/%s", rv, - GF_HIDDEN_PATH, db_name); - if ((len < 0) || (len >= PATH_MAX)) { - gf_msg("tier", GF_LOG_ERROR, EINVAL, DHT_MSG_LOG_TIER_STATUS, - "DB path too long"); - ret = -1; - goto out; - } - - local_brick->xlator = xl; - - snprintf(local_brick->brick_name, NAME_MAX, "%s", brickname); - - list_add_tail(&(local_brick->list), local_bricklist_head); - - ret = 0; - goto out; - } - } - - for (child = xl->children; child; child = child->next) { - ret = tier_get_bricklist(child->xlator, local_bricklist_head); - if (ret) { - goto out; - } - } - - ret = 0; -out: - - if (ret) { - if (local_brick) { - GF_FREE(local_brick->brick_db_path); - } - GF_FREE(local_brick); - } - - return ret; -} - -int -tier_get_freq_demote(gf_tier_conf_t *tier_conf) -{ - if ((tier_conf->mode == TIER_MODE_WM) && - (tier_conf->watermark_last == TIER_WM_HI)) - return DEFAULT_DEMOTE_DEGRADED; - else - return tier_conf->tier_demote_frequency; -} - -int -tier_get_freq_promote(gf_tier_conf_t *tier_conf) -{ - return tier_conf->tier_promote_frequency; -} - -int -tier_get_freq_compact_hot(gf_tier_conf_t *tier_conf) -{ - return tier_conf->tier_compact_hot_frequency; -} - -int -tier_get_freq_compact_cold(gf_tier_conf_t *tier_conf) -{ - return tier_conf->tier_compact_cold_frequency; -} - -static int -tier_check_demote(gfdb_time_t current_time, int freq) -{ - return ((current_time.tv_sec % freq) == 0) ? _gf_true : _gf_false; -} - -static gf_boolean_t -tier_check_promote(gf_tier_conf_t *tier_conf, gfdb_time_t current_time, - int freq) -{ - if ((tier_conf->mode == TIER_MODE_WM) && - (tier_conf->watermark_last == TIER_WM_HI)) - return _gf_false; - - else - return ((current_time.tv_sec % freq) == 0) ? _gf_true : _gf_false; -} - -static gf_boolean_t -tier_check_compact(gf_tier_conf_t *tier_conf, gfdb_time_t current_time, - int freq_compact) -{ - if (!(tier_conf->compact_active || tier_conf->compact_mode_switched)) - return _gf_false; - - return ((current_time.tv_sec % freq_compact) == 0) ? _gf_true : _gf_false; -} - -void -clear_bricklist(struct list_head *brick_list) -{ - tier_brick_list_t *local_brick = NULL; - tier_brick_list_t *temp = NULL; - - if (list_empty(brick_list)) { - return; - } - - list_for_each_entry_safe(local_brick, temp, brick_list, list) - { - list_del(&local_brick->list); - GF_FREE(local_brick->brick_db_path); - GF_FREE(local_brick); - } -} - -static void -set_brick_list_qpath(struct list_head *brick_list, gf_boolean_t is_cold) -{ - tier_brick_list_t *local_brick = NULL; - int i = 0; - - GF_VALIDATE_OR_GOTO("tier", brick_list, out); - - list_for_each_entry(local_brick, brick_list, list) - { - /* Construct query file path for this brick - * i.e - * /var/run/gluster/xlator_name/ - * {promote/demote}-brickname-indexinbricklist - * So that no two query files will have same path even - * bricks have the same name - * */ - snprintf(local_brick->qfile_path, PATH_MAX, "%s-%s-%d", - GET_QFILE_PATH(is_cold), local_brick->brick_name, i); - i++; - } -out: - return; -} - -static int -tier_prepare_compact(migration_args_t *args, gfdb_time_t current_time) -{ - xlator_t *this = NULL; - dht_conf_t *conf = NULL; - gf_defrag_info_t *defrag = NULL; - gf_tier_conf_t *tier_conf = NULL; - gf_boolean_t is_hot_tier = args->is_hot_tier; - int freq = 0; - int ret = -1; - const char *tier_type = is_hot_tier ? "hot" : "cold"; - - this = args->this; - - conf = this->private; - - defrag = conf->defrag; - - tier_conf = &defrag->tier_conf; - - freq = is_hot_tier ? tier_get_freq_compact_hot(tier_conf) - : tier_get_freq_compact_cold(tier_conf); - - defrag->tier_conf.compact_mode_switched = - is_hot_tier ? defrag->tier_conf.compact_mode_switched_hot - : defrag->tier_conf.compact_mode_switched_cold; - - gf_msg(this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS, - "Compact mode %i", defrag->tier_conf.compact_mode_switched); - - if (tier_check_compact(tier_conf, current_time, freq)) { - gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, - "Start compaction on %s tier", tier_type); - - args->freq_time = freq; - ret = tier_compact(args); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Compaction failed on " - "%s tier", - tier_type); - goto out; - } - - gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, - "End compaction on %s tier", tier_type); - - if (is_hot_tier) { - defrag->tier_conf.compact_mode_switched_hot = _gf_false; - } else { - defrag->tier_conf.compact_mode_switched_cold = _gf_false; - } - } - -out: - return ret; -} - -static int -tier_get_wm_interval(tier_mode_t mode, tier_watermark_op_t wm) -{ - if (mode == TIER_MODE_WM && wm == TIER_WM_HI) - return WM_INTERVAL_EMERG; - - return WM_INTERVAL; -} - -/* - * Main tiering loop. This is called from the promotion and the - * demotion threads spawned in tier_start(). - * - * Every second, wake from sleep to perform tasks. - * 1. Check trigger to migrate data. - * 2. Check for state changes (pause, unpause, stop). - */ -static void * -tier_run(void *in_args) -{ - dht_conf_t *conf = NULL; - gfdb_time_t current_time = {0}; - int freq = 0; - int ret = 0; - xlator_t *any = NULL; - xlator_t *xlator = NULL; - gf_tier_conf_t *tier_conf = NULL; - loc_t root_loc = {0}; - int check_watermark = 0; - gf_defrag_info_t *defrag = NULL; - xlator_t *this = NULL; - migration_args_t *args = in_args; - GF_VALIDATE_OR_GOTO("tier", args, out); - GF_VALIDATE_OR_GOTO("tier", args->brick_list, out); - - this = args->this; - GF_VALIDATE_OR_GOTO("tier", this, out); - - conf = this->private; - GF_VALIDATE_OR_GOTO("tier", conf, out); - - defrag = conf->defrag; - GF_VALIDATE_OR_GOTO("tier", defrag, out); - - if (list_empty(args->brick_list)) { - gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_ERROR, - "Brick list for tier is empty. Exiting."); - goto out; - } - - defrag->defrag_status = GF_DEFRAG_STATUS_STARTED; - tier_conf = &defrag->tier_conf; - - dht_build_root_loc(defrag->root_inode, &root_loc); - - while (1) { - /* - * Check if a graph switch occurred. If so, stop migration - * thread. It will need to be restarted manually. - */ - any = THIS->ctx->active->first; - xlator = xlator_search_by_name(any, this->name); - - if (xlator != this) { - gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, - "Detected graph switch. Exiting migration " - "daemon."); - goto out; - } - - gf_defrag_check_pause_tier(tier_conf); - - sleep(1); - - if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { - ret = 1; - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "defrag->defrag_status != " - "GF_DEFRAG_STATUS_STARTED"); - goto out; - } - - if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER || - defrag->cmd == GF_DEFRAG_CMD_DETACH_START) { - ret = 0; - defrag->defrag_status = GF_DEFRAG_STATUS_COMPLETE; - gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_LOG_TIER_ERROR, - "defrag->defrag_cmd == " - "GF_DEFRAG_CMD_START_DETACH_TIER"); - goto out; - } - - if (gf_defrag_get_pause_state(&defrag->tier_conf) != TIER_RUNNING) - continue; - - /* To have proper synchronization amongst all - * brick holding nodes, so that promotion and demotions - * start atomically w.r.t promotion/demotion frequency - * period, all nodes should have their system time - * in-sync with each other either manually set or - * using a NTP server*/ - ret = gettimeofday(¤t_time, NULL); - if (ret == -1) { - gf_msg(this->name, GF_LOG_ERROR, errno, - DHT_MSG_SYS_CALL_GET_TIME_FAILED, - "Failed to get current time"); - goto out; - } - - check_watermark++; - - /* emergency demotion requires frequent watermark monitoring */ - if (check_watermark >= - tier_get_wm_interval(tier_conf->mode, tier_conf->watermark_last)) { - check_watermark = 0; - if (tier_conf->mode == TIER_MODE_WM) { - ret = tier_get_fs_stat(this, &root_loc); - if (ret != 0) { - continue; - } - ret = tier_check_watermark(this); - if (ret != 0) { - gf_msg(this->name, GF_LOG_CRITICAL, errno, - DHT_MSG_LOG_TIER_ERROR, "Failed to get watermark"); - continue; - } - } - } - - if (args->is_promotion) { - freq = tier_get_freq_promote(tier_conf); - - if (tier_check_promote(tier_conf, current_time, freq)) { - args->freq_time = freq; - ret = tier_promote(args); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Promotion failed"); - } - } - } else if (args->is_compaction) { - tier_prepare_compact(args, current_time); - } else { - freq = tier_get_freq_demote(tier_conf); - - if (tier_check_demote(current_time, freq)) { - args->freq_time = freq; - ret = tier_demote(args); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Demotion failed"); - } - } - } - - /* Check the statfs immediately after the processing threads - return */ - check_watermark = WM_INTERVAL; - } - - ret = 0; -out: - - args->return_value = ret; - - return NULL; -} - -int -tier_start(xlator_t *this, gf_defrag_info_t *defrag) -{ - pthread_t promote_thread; - pthread_t demote_thread; - pthread_t hot_compact_thread; - pthread_t cold_compact_thread; - int ret = -1; - struct list_head bricklist_hot = {0}; - struct list_head bricklist_cold = {0}; - migration_args_t promotion_args = {0}; - migration_args_t demotion_args = {0}; - migration_args_t hot_compaction_args = {0}; - migration_args_t cold_compaction_args = {0}; - dht_conf_t *conf = NULL; - - INIT_LIST_HEAD((&bricklist_hot)); - INIT_LIST_HEAD((&bricklist_cold)); - - conf = this->private; - - tier_get_bricklist(conf->subvolumes[1], &bricklist_hot); - set_brick_list_qpath(&bricklist_hot, _gf_false); - - demotion_args.this = this; - demotion_args.brick_list = &bricklist_hot; - demotion_args.defrag = defrag; - demotion_args.is_promotion = _gf_false; - demotion_args.is_compaction = _gf_false; - - ret = gf_thread_create(&demote_thread, NULL, &tier_run, &demotion_args, - "tierdem"); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Failed to start demotion thread."); - defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; - goto cleanup; - } - - tier_get_bricklist(conf->subvolumes[0], &bricklist_cold); - set_brick_list_qpath(&bricklist_cold, _gf_true); - - promotion_args.this = this; - promotion_args.brick_list = &bricklist_cold; - promotion_args.defrag = defrag; - promotion_args.is_promotion = _gf_true; - - ret = gf_thread_create(&promote_thread, NULL, &tier_run, &promotion_args, - "tierpro"); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Failed to start promotion thread."); - defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; - goto waitforspawned; - } - - hot_compaction_args.this = this; - hot_compaction_args.brick_list = &bricklist_hot; - hot_compaction_args.defrag = defrag; - hot_compaction_args.is_promotion = _gf_false; - hot_compaction_args.is_compaction = _gf_true; - hot_compaction_args.is_hot_tier = _gf_true; - - ret = gf_thread_create(&hot_compact_thread, NULL, &tier_run, - &hot_compaction_args, "tierhcom"); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Failed to start compaction thread."); - defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; - goto waitforspawnedpromote; - } - - cold_compaction_args.this = this; - cold_compaction_args.brick_list = &bricklist_cold; - cold_compaction_args.defrag = defrag; - cold_compaction_args.is_promotion = _gf_false; - cold_compaction_args.is_compaction = _gf_true; - cold_compaction_args.is_hot_tier = _gf_false; - - ret = gf_thread_create(&cold_compact_thread, NULL, &tier_run, - &cold_compaction_args, "tierccom"); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Failed to start compaction thread."); - defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; - goto waitforspawnedhotcompact; - } - pthread_join(cold_compact_thread, NULL); - -waitforspawnedhotcompact: - pthread_join(hot_compact_thread, NULL); - -waitforspawnedpromote: - pthread_join(promote_thread, NULL); - -waitforspawned: - pthread_join(demote_thread, NULL); - -cleanup: - clear_bricklist(&bricklist_cold); - clear_bricklist(&bricklist_hot); - return ret; -} - -int32_t -tier_migration_needed(xlator_t *this) -{ - gf_defrag_info_t *defrag = NULL; - dht_conf_t *conf = NULL; - int ret = 0; - - conf = this->private; - - GF_VALIDATE_OR_GOTO(this->name, conf, out); - GF_VALIDATE_OR_GOTO(this->name, conf->defrag, out); - - defrag = conf->defrag; - - if ((defrag->cmd == GF_DEFRAG_CMD_START_TIER) || - (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER)) - ret = 1; -out: - return ret; -} - -int32_t -tier_migration_get_dst(xlator_t *this, dht_local_t *local) -{ - dht_conf_t *conf = NULL; - int32_t ret = -1; - gf_defrag_info_t *defrag = NULL; - - GF_VALIDATE_OR_GOTO("tier", this, out); - GF_VALIDATE_OR_GOTO(this->name, this->private, out); - - conf = this->private; - - defrag = conf->defrag; - - if (defrag && defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER) { - local->rebalance.target_node = conf->subvolumes[0]; - - } else if (conf->subvolumes[0] == local->cached_subvol) - local->rebalance.target_node = conf->subvolumes[1]; - else - local->rebalance.target_node = conf->subvolumes[0]; - - if (local->rebalance.target_node) - ret = 0; - -out: - return ret; -} - -xlator_t * -tier_search(xlator_t *this, dht_layout_t *layout, const char *name) -{ - xlator_t *subvol = NULL; - dht_conf_t *conf = NULL; - - GF_VALIDATE_OR_GOTO("tier", this, out); - GF_VALIDATE_OR_GOTO(this->name, this->private, out); - - conf = this->private; - - subvol = TIER_HASHED_SUBVOL; - -out: - return subvol; -} - -static int -tier_load_externals(xlator_t *this) -{ - int ret = -1; - char *libpathfull = (LIBDIR "/libgfdb.so.0"); - get_gfdb_methods_t get_gfdb_methods; - - GF_VALIDATE_OR_GOTO("this", this, out); - - libhandle = dlopen(libpathfull, RTLD_NOW); - if (!libhandle) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Error loading libgfdb.so %s\n", dlerror()); - ret = -1; - goto out; - } - - get_gfdb_methods = dlsym(libhandle, "get_gfdb_methods"); - if (!get_gfdb_methods) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Error loading get_gfdb_methods()"); - ret = -1; - goto out; - } - - get_gfdb_methods(&gfdb_methods); - - ret = 0; - -out: - if (ret && libhandle) - dlclose(libhandle); - - return ret; -} - -static tier_mode_t -tier_validate_mode(char *mode) -{ - int ret = -1; - - if (strcmp(mode, "test") == 0) { - ret = TIER_MODE_TEST; - } else { - ret = TIER_MODE_WM; - } - - return ret; -} - -static gf_boolean_t -tier_validate_compact_mode(char *mode) -{ - gf_boolean_t ret = _gf_false; - - gf_msg("tier", GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, - "tier_validate_compact_mode: mode = %s", mode); - - if (!strcmp(mode, "on")) { - ret = _gf_true; - } else { - ret = _gf_false; - } - - gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS, - "tier_validate_compact_mode: ret = %i", ret); - - return ret; -} - -int -tier_init_methods(xlator_t *this) -{ - int ret = -1; - dht_conf_t *conf = NULL; - dht_methods_t *methods = NULL; - - GF_VALIDATE_OR_GOTO("tier", this, err); - - conf = this->private; - - methods = &(conf->methods); - - methods->migration_get_dst_subvol = tier_migration_get_dst; - methods->migration_other = tier_start; - methods->migration_needed = tier_migration_needed; - methods->layout_search = tier_search; - - ret = 0; -err: - return ret; -} - -static void -tier_save_vol_name(xlator_t *this) -{ - dht_conf_t *conf = NULL; - gf_defrag_info_t *defrag = NULL; - char *suffix = NULL; - int name_len = 0; - - conf = this->private; - defrag = conf->defrag; - - suffix = strstr(this->name, "-tier-dht"); - - if (suffix) - name_len = suffix - this->name; - else - name_len = strlen(this->name); - - if (name_len > GD_VOLUME_NAME_MAX) - name_len = GD_VOLUME_NAME_MAX; - - strncpy(defrag->tier_conf.volname, this->name, name_len); - defrag->tier_conf.volname[name_len] = 0; -} - -int -tier_init(xlator_t *this) -{ - int ret = -1; - int freq = 0; - int maxsize = 0; - dht_conf_t *conf = NULL; - gf_defrag_info_t *defrag = NULL; - char *voldir = NULL; - char *mode = NULL; - char *paused = NULL; - tier_mode_t tier_mode = DEFAULT_TIER_MODE; - gf_boolean_t compact_mode = _gf_false; - - ret = dht_init(this); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "tier_init failed"); - goto out; - } - - conf = this->private; - - ret = tier_init_methods(this); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "tier_init_methods failed"); - goto out; - } - - if (conf->subvolume_cnt != 2) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Invalid number of subvolumes %d", conf->subvolume_cnt); - goto out; - } - - /* if instatiated from client side initialization is complete. */ - if (!conf->defrag) { - ret = 0; - goto out; - } - - /* if instatiated from server side, load db libraries */ - ret = tier_load_externals(this); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "Could not load externals. Aborting"); - goto out; - } - - defrag = conf->defrag; - - defrag->tier_conf.last_demote_qfile_index = 0; - defrag->tier_conf.last_promote_qfile_index = 0; - - defrag->tier_conf.is_tier = 1; - defrag->this = this; - - ret = dict_get_int32(this->options, "tier-max-promote-file-size", &maxsize); - if (ret) { - maxsize = 0; - } - - defrag->tier_conf.tier_max_promote_size = maxsize; - - ret = dict_get_int32(this->options, "tier-promote-frequency", &freq); - if (ret) { - freq = DEFAULT_PROMOTE_FREQ_SEC; - } - - defrag->tier_conf.tier_promote_frequency = freq; - - ret = dict_get_int32(this->options, "tier-demote-frequency", &freq); - if (ret) { - freq = DEFAULT_DEMOTE_FREQ_SEC; - } - - defrag->tier_conf.tier_demote_frequency = freq; - - ret = dict_get_int32(this->options, "tier-hot-compact-frequency", &freq); - if (ret) { - freq = DEFAULT_HOT_COMPACT_FREQ_SEC; - } - - defrag->tier_conf.tier_compact_hot_frequency = freq; - - ret = dict_get_int32(this->options, "tier-cold-compact-frequency", &freq); - if (ret) { - freq = DEFAULT_COLD_COMPACT_FREQ_SEC; - } - - defrag->tier_conf.tier_compact_cold_frequency = freq; - - ret = dict_get_int32(this->options, "watermark-hi", &freq); - if (ret) { - freq = DEFAULT_WM_HI; - } - - defrag->tier_conf.watermark_hi = freq; - - ret = dict_get_int32(this->options, "watermark-low", &freq); - if (ret) { - freq = DEFAULT_WM_LOW; - } - - defrag->tier_conf.watermark_low = freq; - - ret = dict_get_int32(this->options, "write-freq-threshold", &freq); - if (ret) { - freq = DEFAULT_WRITE_FREQ_SEC; - } - - defrag->write_freq_threshold = freq; - - ret = dict_get_int32(this->options, "read-freq-threshold", &freq); - if (ret) { - freq = DEFAULT_READ_FREQ_SEC; - } - - defrag->read_freq_threshold = freq; - - ret = dict_get_int32(this->options, "tier-max-mb", &freq); - if (ret) { - freq = DEFAULT_TIER_MAX_MIGRATE_MB; - } - - defrag->tier_conf.max_migrate_bytes = (uint64_t)freq * 1024 * 1024; - - ret = dict_get_int32(this->options, "tier-max-files", &freq); - if (ret) { - freq = DEFAULT_TIER_MAX_MIGRATE_FILES; - } - - defrag->tier_conf.max_migrate_files = freq; - - ret = dict_get_int32(this->options, "tier-query-limit", - &(defrag->tier_conf.query_limit)); - if (ret) { - defrag->tier_conf.query_limit = DEFAULT_TIER_QUERY_LIMIT; - } - - ret = dict_get_str(this->options, "tier-compact", &mode); - - if (ret) { - defrag->tier_conf.compact_active = DEFAULT_COMP_MODE; - } else { - compact_mode = tier_validate_compact_mode(mode); - /* If compaction is now active, we need to inform the bricks on - the hot and cold tier of this. See dht-common.h for more. */ - defrag->tier_conf.compact_active = compact_mode; - if (compact_mode) { - defrag->tier_conf.compact_mode_switched_hot = _gf_true; - defrag->tier_conf.compact_mode_switched_cold = _gf_true; - } - } - - ret = dict_get_str(this->options, "tier-mode", &mode); - if (ret) { - defrag->tier_conf.mode = DEFAULT_TIER_MODE; - } else { - tier_mode = tier_validate_mode(mode); - defrag->tier_conf.mode = tier_mode; - } - - pthread_mutex_init(&defrag->tier_conf.pause_mutex, 0); - - gf_defrag_set_pause_state(&defrag->tier_conf, TIER_RUNNING); - - ret = dict_get_str(this->options, "tier-pause", &paused); - - if (paused && strcmp(paused, "on") == 0) - gf_defrag_set_pause_state(&defrag->tier_conf, TIER_REQUEST_PAUSE); - - ret = gf_asprintf(&voldir, "%s/%s", DEFAULT_VAR_RUN_DIRECTORY, this->name); - if (ret < 0) - goto out; - - ret = mkdir_p(voldir, 0777, _gf_true); - if (ret == -1 && errno != EEXIST) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "tier_init failed"); - - GF_FREE(voldir); - goto out; - } - - GF_FREE(voldir); - - ret = gf_asprintf(&promotion_qfile, "%s/%s/promote", - DEFAULT_VAR_RUN_DIRECTORY, this->name); - if (ret < 0) - goto out; - - ret = gf_asprintf(&demotion_qfile, "%s/%s/demote", - DEFAULT_VAR_RUN_DIRECTORY, this->name); - if (ret < 0) { - GF_FREE(promotion_qfile); - goto out; - } - - gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, - "Promote/demote frequency %d/%d " - "Write/Read freq thresholds %d/%d", - defrag->tier_conf.tier_promote_frequency, - defrag->tier_conf.tier_demote_frequency, - defrag->write_freq_threshold, defrag->read_freq_threshold); - - tier_save_vol_name(this); - - ret = 0; - -out: - - return ret; -} - -int -tier_cli_pause_done(int op_ret, call_frame_t *sync_frame, void *data) -{ - gf_msg("tier", GF_LOG_INFO, 0, DHT_MSG_TIER_PAUSED, - "Migrate file paused with op_ret %d", op_ret); - - return op_ret; -} - -int -tier_cli_pause(void *data) -{ - gf_defrag_info_t *defrag = NULL; - xlator_t *this = NULL; - dht_conf_t *conf = NULL; - int ret = -1; - - this = data; - - conf = this->private; - GF_VALIDATE_OR_GOTO(this->name, conf, exit); - - defrag = conf->defrag; - GF_VALIDATE_OR_GOTO(this->name, defrag, exit); - - gf_defrag_pause_tier(this, defrag); - - ret = 0; -exit: - return ret; -} - -int -tier_reconfigure(xlator_t *this, dict_t *options) -{ - dht_conf_t *conf = NULL; - gf_defrag_info_t *defrag = NULL; - char *mode = NULL; - int migrate_mb = 0; - gf_boolean_t req_pause = _gf_false; - int ret = 0; - call_frame_t *frame = NULL; - gf_boolean_t last_compact_setting = _gf_false; - - conf = this->private; - - if (conf->defrag) { - defrag = conf->defrag; - GF_OPTION_RECONF("tier-max-promote-file-size", - defrag->tier_conf.tier_max_promote_size, options, - int32, out); - - GF_OPTION_RECONF("tier-promote-frequency", - defrag->tier_conf.tier_promote_frequency, options, - int32, out); - - GF_OPTION_RECONF("tier-demote-frequency", - defrag->tier_conf.tier_demote_frequency, options, - int32, out); - - GF_OPTION_RECONF("write-freq-threshold", defrag->write_freq_threshold, - options, int32, out); - - GF_OPTION_RECONF("read-freq-threshold", defrag->read_freq_threshold, - options, int32, out); - - GF_OPTION_RECONF("watermark-hi", defrag->tier_conf.watermark_hi, - options, int32, out); - - GF_OPTION_RECONF("watermark-low", defrag->tier_conf.watermark_low, - options, int32, out); - - last_compact_setting = defrag->tier_conf.compact_active; - - GF_OPTION_RECONF("tier-compact", defrag->tier_conf.compact_active, - options, bool, out); - - if (last_compact_setting != defrag->tier_conf.compact_active) { - defrag->tier_conf.compact_mode_switched_hot = _gf_true; - defrag->tier_conf.compact_mode_switched_cold = _gf_true; - gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, - "compact mode switched"); - } - - GF_OPTION_RECONF("tier-hot-compact-frequency", - defrag->tier_conf.tier_compact_hot_frequency, options, - int32, out); - - GF_OPTION_RECONF("tier-cold-compact-frequency", - defrag->tier_conf.tier_compact_cold_frequency, options, - int32, out); - - GF_OPTION_RECONF("tier-mode", mode, options, str, out); - defrag->tier_conf.mode = tier_validate_mode(mode); - - GF_OPTION_RECONF("tier-max-mb", migrate_mb, options, int32, out); - defrag->tier_conf.max_migrate_bytes = (uint64_t)migrate_mb * 1024 * - 1024; - - GF_OPTION_RECONF("tier-max-files", defrag->tier_conf.max_migrate_files, - options, int32, out); - - GF_OPTION_RECONF("tier-query-limit", defrag->tier_conf.query_limit, - options, int32, out); - - GF_OPTION_RECONF("tier-pause", req_pause, options, bool, out); - - if (req_pause == _gf_true) { - frame = create_frame(this, this->ctx->pool); - if (!frame) - goto out; - - frame->root->pid = GF_CLIENT_PID_DEFRAG; - - ret = synctask_new(this->ctx->env, tier_cli_pause, - tier_cli_pause_done, frame, this); - - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "pause tier failed on reconfigure"); - } - } else { - ret = gf_defrag_resume_tier(this, defrag); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, - "resume tier failed on reconfigure"); - } - } - } - -out: - return dht_reconfigure(this, options); -} - -void -tier_fini(xlator_t *this) -{ - if (libhandle) - dlclose(libhandle); - - GF_FREE(demotion_qfile); - GF_FREE(promotion_qfile); - - dht_fini(this); -} - -class_methods_t class_methods = {.init = tier_init, - .fini = tier_fini, - .reconfigure = tier_reconfigure, - .notify = dht_notify}; - -struct xlator_fops fops = { - - .lookup = dht_lookup, - .create = tier_create, - .mknod = dht_mknod, - - .open = dht_open, - .statfs = tier_statfs, - .opendir = dht_opendir, - .readdir = tier_readdir, - .readdirp = tier_readdirp, - .fsyncdir = dht_fsyncdir, - .symlink = dht_symlink, - .unlink = tier_unlink, - .link = tier_link, - .mkdir = dht_mkdir, - .rmdir = dht_rmdir, - .rename = dht_rename, - .entrylk = dht_entrylk, - .fentrylk = dht_fentrylk, - - /* Inode read operations */ - .stat = dht_stat, - .fstat = dht_fstat, - .access = dht_access, - .readlink = dht_readlink, - .getxattr = dht_getxattr, - .fgetxattr = dht_fgetxattr, - .readv = dht_readv, - .flush = dht_flush, - .fsync = dht_fsync, - .inodelk = dht_inodelk, - .finodelk = dht_finodelk, - .lk = dht_lk, - - /* Inode write operations */ - .fremovexattr = dht_fremovexattr, - .removexattr = dht_removexattr, - .setxattr = dht_setxattr, - .fsetxattr = dht_fsetxattr, - .truncate = dht_truncate, - .ftruncate = dht_ftruncate, - .writev = dht_writev, - .xattrop = dht_xattrop, - .fxattrop = dht_fxattrop, - .setattr = dht_setattr, - .fsetattr = dht_fsetattr, - .fallocate = dht_fallocate, - .discard = dht_discard, - .zerofill = dht_zerofill, -}; - -struct xlator_cbks cbks = {.release = dht_release, .forget = dht_forget}; diff --git a/xlators/cluster/dht/src/tier.h b/xlators/cluster/dht/src/tier.h deleted file mode 100644 index a20b1db..0000000 --- a/xlators/cluster/dht/src/tier.h +++ /dev/null @@ -1,110 +0,0 @@ -/* - Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#ifndef _TIER_H_ -#define _TIER_H_ - -/******************************************************************************/ -/* This is from dht-rebalancer.c as we don't have dht-rebalancer.h */ -#include "dht-common.h" -#include <glusterfs/xlator.h> -#include <signal.h> -#include <fnmatch.h> -#include <signal.h> - -/* - * Size of timer wheel. We would not promote or demote less - * frequently than this number. - */ -#define TIMER_SECS 3600 - -#include "gfdb_data_store.h" -#include <ctype.h> -#include <sys/stat.h> - -#define PROMOTION_QFILE "promotequeryfile" -#define DEMOTION_QFILE "demotequeryfile" - -#define TIER_HASHED_SUBVOL conf->subvolumes[0] -#define TIER_UNHASHED_SUBVOL conf->subvolumes[1] - -#define GET_QFILE_PATH(is_promotion) \ - (is_promotion) ? promotion_qfile : demotion_qfile - -typedef struct tier_qfile_array { - int *fd_array; - ssize_t array_size; - ssize_t next_index; - /* Indicate the number of exhuasted FDs*/ - ssize_t exhausted_count; -} tier_qfile_array_t; - -typedef struct _query_cbk_args { - xlator_t *this; - gf_defrag_info_t *defrag; - /* This is write */ - int query_fd; - int is_promotion; - int is_compaction; - /* This is for read */ - tier_qfile_array_t *qfile_array; -} query_cbk_args_t; - -int -gf_run_tier(xlator_t *this, gf_defrag_info_t *defrag); - -typedef struct gfdb_brick_info { - gfdb_time_t *time_stamp; - gf_boolean_t _gfdb_promote; - query_cbk_args_t *_query_cbk_args; -} gfdb_brick_info_t; - -typedef struct brick_list { - xlator_t *xlator; - char *brick_db_path; - char brick_name[NAME_MAX]; - char qfile_path[PATH_MAX]; - struct list_head list; -} tier_brick_list_t; - -typedef struct _dm_thread_args { - xlator_t *this; - gf_defrag_info_t *defrag; - struct list_head *brick_list; - int freq_time; - int return_value; - int is_promotion; - int is_compaction; - gf_boolean_t is_hot_tier; -} migration_args_t; - -typedef enum tier_watermark_op_ { - TIER_WM_NONE = 0, - TIER_WM_LOW, - TIER_WM_HI, - TIER_WM_MID -} tier_watermark_op_t; - -#define DEFAULT_PROMOTE_FREQ_SEC 120 -#define DEFAULT_DEMOTE_FREQ_SEC 120 -#define DEFAULT_HOT_COMPACT_FREQ_SEC 604800 -#define DEFAULT_COLD_COMPACT_FREQ_SEC 604800 -#define DEFAULT_DEMOTE_DEGRADED 1 -#define DEFAULT_WRITE_FREQ_SEC 0 -#define DEFAULT_READ_FREQ_SEC 0 -#define DEFAULT_WM_LOW 75 -#define DEFAULT_WM_HI 90 -#define DEFAULT_TIER_MODE TIER_MODE_TEST -#define DEFAULT_COMP_MODE _gf_true -#define DEFAULT_TIER_MAX_MIGRATE_MB 1000 -#define DEFAULT_TIER_MAX_MIGRATE_FILES 5000 -#define DEFAULT_TIER_QUERY_LIMIT 100 - -#endif diff --git a/xlators/cluster/dht/src/tier.sym b/xlators/cluster/dht/src/tier.sym deleted file mode 100644 index 60205d1..0000000 --- a/xlators/cluster/dht/src/tier.sym +++ /dev/null @@ -1,9 +0,0 @@ -fops -cbks -class_methods -dht_methods -tier_methods -options -mem_acct_init -reconfigure -dumpops diff --git a/xlators/cluster/stripe/src/Makefile.am b/xlators/cluster/stripe/src/Makefile.am deleted file mode 100644 index 2b59456..0000000 --- a/xlators/cluster/stripe/src/Makefile.am +++ /dev/null @@ -1,22 +0,0 @@ -xlator_LTLIBRARIES = stripe.la -xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster - -stripe_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) - - -stripe_la_SOURCES = stripe.c stripe-helpers.c \ - $(top_builddir)/xlators/lib/src/libxlator.c - -stripe_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la - -noinst_HEADERS = stripe.h stripe-mem-types.h \ - $(top_builddir)/xlators/lib/src/libxlator.h - -AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ - -I$(top_srcdir)/xlators/lib/src \ - -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src - -AM_CFLAGS = -Wall $(GF_CFLAGS) - -CLEANFILES = - diff --git a/xlators/cluster/stripe/src/stripe-helpers.c b/xlators/cluster/stripe/src/stripe-helpers.c deleted file mode 100644 index 3534237..0000000 --- a/xlators/cluster/stripe/src/stripe-helpers.c +++ /dev/null @@ -1,658 +0,0 @@ -/* - Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#include <fnmatch.h> - -#include "stripe.h" -#include <glusterfs/byte-order.h> -#include <glusterfs/mem-types.h> -#include <glusterfs/logging.h> - -void -stripe_local_wipe(stripe_local_t *local) -{ - if (!local) - goto out; - - loc_wipe(&local->loc); - loc_wipe(&local->loc2); - - if (local->fd) - fd_unref(local->fd); - - if (local->inode) - inode_unref(local->inode); - - if (local->xattr) - dict_unref(local->xattr); - - if (local->xdata) - dict_unref(local->xdata); - -out: - return; -} - -int -stripe_aggregate(dict_t *this, char *key, data_t *value, void *data) -{ - dict_t *dst = NULL; - int64_t *ptr = 0, *size = NULL; - int32_t ret = -1; - - dst = data; - - if (strcmp(key, QUOTA_SIZE_KEY) == 0) { - ret = dict_get_bin(dst, key, (void **)&size); - if (ret < 0) { - size = GF_CALLOC(1, sizeof(int64_t), gf_common_mt_char); - if (size == NULL) { - gf_log("stripe", GF_LOG_WARNING, "memory allocation failed"); - goto out; - } - ret = dict_set_bin(dst, key, size, sizeof(int64_t)); - if (ret < 0) { - gf_log("stripe", GF_LOG_WARNING, - "stripe aggregate dict set failed"); - GF_FREE(size); - goto out; - } - } - - ptr = data_to_bin(value); - if (ptr == NULL) { - gf_log("stripe", GF_LOG_WARNING, "data to bin failed"); - goto out; - } - - *size = hton64(ntoh64(*size) + ntoh64(*ptr)); - } else if (strcmp(key, GF_CONTENT_KEY)) { - /* No need to aggregate 'CONTENT' data */ - ret = dict_set(dst, key, value); - if (ret) - gf_log("stripe", GF_LOG_WARNING, "xattr dict set failed"); - } - -out: - return 0; -} - -void -stripe_aggregate_xattr(dict_t *dst, dict_t *src) -{ - if ((dst == NULL) || (src == NULL)) { - goto out; - } - - dict_foreach(src, stripe_aggregate, dst); -out: - return; -} - -int32_t -stripe_xattr_aggregate(char *buffer, stripe_local_t *local, int32_t *total) -{ - int32_t i = 0; - int32_t ret = -1; - int32_t len = 0; - char *sbuf = NULL; - stripe_xattr_sort_t *xattr = NULL; - - if (!buffer || !local || !local->xattr_list) - goto out; - - sbuf = buffer; - - for (i = 0; i < local->nallocs; i++) { - xattr = local->xattr_list + i; - len = xattr->xattr_len - 1; /* length includes \0 */ - - if (len && xattr && xattr->xattr_value) { - memcpy(buffer, xattr->xattr_value, len); - buffer += len; - *buffer++ = ' '; - } - } - - *--buffer = '\0'; - if (total) - *total = buffer - sbuf; - ret = 0; - -out: - return ret; -} - -int32_t -stripe_free_xattr_str(stripe_local_t *local) -{ - int32_t i = 0; - int32_t ret = -1; - stripe_xattr_sort_t *xattr = NULL; - - if (!local || !local->xattr_list) - goto out; - - for (i = 0; i < local->nallocs; i++) { - xattr = local->xattr_list + i; - - if (xattr && xattr->xattr_value) - GF_FREE(xattr->xattr_value); - } - - ret = 0; -out: - return ret; -} - -int32_t -stripe_fill_lockinfo_xattr(xlator_t *this, stripe_local_t *local, - void **xattr_serz) -{ - int32_t ret = -1, i = 0, len = 0; - dict_t *tmp1 = NULL, *tmp2 = NULL; - char *buf = NULL; - stripe_xattr_sort_t *xattr = NULL; - - if (xattr_serz == NULL) { - goto out; - } - - tmp2 = dict_new(); - - if (tmp2 == NULL) { - goto out; - } - - for (i = 0; i < local->nallocs; i++) { - xattr = local->xattr_list + i; - len = xattr->xattr_len; - - if (len && xattr && xattr->xattr_value) { - ret = dict_reset(tmp2); - if (ret < 0) { - gf_log(this->name, GF_LOG_DEBUG, "dict_reset failed (%s)", - strerror(-ret)); - } - - ret = dict_unserialize(xattr->xattr_value, xattr->xattr_len, &tmp2); - if (ret < 0) { - gf_log(this->name, GF_LOG_WARNING, - "dict_unserialize failed (%s)", strerror(-ret)); - ret = -1; - goto out; - } - - tmp1 = dict_copy(tmp2, tmp1); - if (tmp1 == NULL) { - gf_log(this->name, GF_LOG_WARNING, "dict_copy failed (%s)", - strerror(-ret)); - ret = -1; - goto out; - } - } - } - - len = dict_serialized_length(tmp1); - if (len > 0) { - buf = GF_CALLOC(1, len, gf_common_mt_dict_t); - if (buf == NULL) { - ret = -1; - goto out; - } - - ret = dict_serialize(tmp1, buf); - if (ret < 0) { - gf_log(this->name, GF_LOG_WARNING, "dict_serialize failed (%s)", - strerror(-ret)); - GF_FREE(buf); - ret = -1; - goto out; - } - - *xattr_serz = buf; - } - - ret = 0; -out: - if (tmp1 != NULL) { - dict_unref(tmp1); - } - - if (tmp2 != NULL) { - dict_unref(tmp2); - } - - return ret; -} - -int32_t -stripe_fill_pathinfo_xattr(xlator_t *this, stripe_local_t *local, - char **xattr_serz) -{ - int ret = -1; - int32_t padding = 0; - int32_t tlen = 0; - int len = 0; - char stripe_size_str[20] = { - 0, - }; - char *pathinfo_serz = NULL; - - if (!local) { - gf_log(this->name, GF_LOG_ERROR, "Possible NULL deref"); - goto out; - } - - len = snprintf(stripe_size_str, sizeof(stripe_size_str), "%" PRId64, - local->fctx ? local->fctx->stripe_size : 0); - if (len < 0 || len >= sizeof(stripe_size_str)) - goto out; - /* extra bytes for decorations (brackets and <>'s) */ - padding = strlen(this->name) + SLEN(STRIPE_PATHINFO_HEADER) + len + 7; - local->xattr_total_len += (padding + 2); - - pathinfo_serz = GF_MALLOC(local->xattr_total_len, gf_common_mt_char); - if (!pathinfo_serz) - goto out; - - /* xlator info */ - (void)sprintf(pathinfo_serz, "(<" STRIPE_PATHINFO_HEADER "%s:[%s]> ", - this->name, stripe_size_str); - - ret = stripe_xattr_aggregate(pathinfo_serz + padding, local, &tlen); - if (ret) { - gf_log(this->name, GF_LOG_ERROR, "Cannot aggregate pathinfo list"); - GF_FREE(pathinfo_serz); - goto out; - } - - *(pathinfo_serz + padding + tlen) = ')'; - *(pathinfo_serz + padding + tlen + 1) = '\0'; - - *xattr_serz = pathinfo_serz; - - ret = 0; -out: - return ret; -} - -/** - * stripe_get_matching_bs - Get the matching block size for the given path. - */ -int32_t -stripe_get_matching_bs(const char *path, stripe_private_t *priv) -{ - struct stripe_options *trav = NULL; - uint64_t block_size = 0; - - GF_VALIDATE_OR_GOTO("stripe", priv, out); - GF_VALIDATE_OR_GOTO("stripe", path, out); - - LOCK(&priv->lock); - { - block_size = priv->block_size; - trav = priv->pattern; - while (trav) { - if (!fnmatch(trav->path_pattern, path, FNM_NOESCAPE)) { - block_size = trav->block_size; - break; - } - trav = trav->next; - } - } - UNLOCK(&priv->lock); - -out: - return block_size; -} - -int32_t -stripe_ctx_handle(xlator_t *this, call_frame_t *prev, stripe_local_t *local, - dict_t *dict) -{ - char key[256] = { - 0, - }; - data_t *data = NULL; - int32_t index = 0; - stripe_private_t *priv = NULL; - - priv = this->private; - - if (!local->fctx) { - local->fctx = GF_CALLOC(1, sizeof(stripe_fd_ctx_t), - gf_stripe_mt_stripe_fd_ctx_t); - if (!local->fctx) { - local->op_errno = ENOMEM; - local->op_ret = -1; - goto out; - } - - local->fctx->static_array = 0; - } - /* Stripe block size */ - sprintf(key, "trusted.%s.stripe-size", this->name); - data = dict_get(dict, key); - if (!data) { - local->xattr_self_heal_needed = 1; - gf_log(this->name, GF_LOG_ERROR, "Failed to get stripe-size"); - goto out; - } else { - if (!local->fctx->stripe_size) { - local->fctx->stripe_size = data_to_int64(data); - } - - if (local->fctx->stripe_size != data_to_int64(data)) { - gf_log(this->name, GF_LOG_WARNING, - "stripe-size mismatch in blocks"); - local->xattr_self_heal_needed = 1; - } - } - - /* Stripe count */ - sprintf(key, "trusted.%s.stripe-count", this->name); - data = dict_get(dict, key); - - if (!data) { - local->xattr_self_heal_needed = 1; - gf_log(this->name, GF_LOG_ERROR, "Failed to get stripe-count"); - goto out; - } - if (!local->fctx->xl_array) { - local->fctx->stripe_count = data_to_int32(data); - if (!local->fctx->stripe_count) { - gf_log(this->name, GF_LOG_ERROR, "error with stripe-count xattr"); - local->op_ret = -1; - local->op_errno = EIO; - goto out; - } - - local->fctx->xl_array = GF_CALLOC(local->fctx->stripe_count, - sizeof(xlator_t *), - gf_stripe_mt_xlator_t); - - if (!local->fctx->xl_array) { - local->op_errno = ENOMEM; - local->op_ret = -1; - goto out; - } - } - if (local->fctx->stripe_count != data_to_int32(data)) { - gf_log(this->name, GF_LOG_ERROR, - "error with stripe-count xattr (%d != %d)", - local->fctx->stripe_count, data_to_int32(data)); - local->op_ret = -1; - local->op_errno = EIO; - goto out; - } - - /* index */ - sprintf(key, "trusted.%s.stripe-index", this->name); - data = dict_get(dict, key); - if (!data) { - local->xattr_self_heal_needed = 1; - gf_log(this->name, GF_LOG_ERROR, "Failed to get stripe-index"); - goto out; - } - index = data_to_int32(data); - if (index > priv->child_count) { - gf_log(this->name, GF_LOG_ERROR, "error with stripe-index xattr (%d)", - index); - local->op_ret = -1; - local->op_errno = EIO; - goto out; - } - if (local->fctx->xl_array) { - if (!local->fctx->xl_array[index]) - local->fctx->xl_array[index] = prev->this; - } - - sprintf(key, "trusted.%s.stripe-coalesce", this->name); - data = dict_get(dict, key); - if (!data) { - /* - * The file was probably created prior to coalesce support. - * Assume non-coalesce mode for this file to maintain backwards - * compatibility. - */ - gf_log(this->name, GF_LOG_DEBUG, - "missing stripe-coalesce " - "attr, assume non-coalesce mode"); - local->fctx->stripe_coalesce = 0; - } else { - local->fctx->stripe_coalesce = data_to_int32(data); - } - -out: - return 0; -} - -int32_t -stripe_xattr_request_build(xlator_t *this, dict_t *dict, uint64_t stripe_size, - uint32_t stripe_count, uint32_t stripe_index, - uint32_t stripe_coalesce) -{ - char key[256] = { - 0, - }; - int32_t ret = -1; - - sprintf(key, "trusted.%s.stripe-size", this->name); - ret = dict_set_int64(dict, key, stripe_size); - if (ret) { - gf_log(this->name, GF_LOG_WARNING, "failed to set %s in xattr_req dict", - key); - goto out; - } - - sprintf(key, "trusted.%s.stripe-count", this->name); - ret = dict_set_int32(dict, key, stripe_count); - if (ret) { - gf_log(this->name, GF_LOG_WARNING, "failed to set %s in xattr_req dict", - key); - goto out; - } - - sprintf(key, "trusted.%s.stripe-index", this->name); - ret = dict_set_int32(dict, key, stripe_index); - if (ret) { - gf_log(this->name, GF_LOG_WARNING, "failed to set %s in xattr_req dict", - key); - goto out; - } - - sprintf(key, "trusted.%s.stripe-coalesce", this->name); - ret = dict_set_int32(dict, key, stripe_coalesce); - if (ret) { - gf_log(this->name, GF_LOG_WARNING, "failed to set %s in xattr_req_dict", - key); - goto out; - } -out: - return ret; -} - -static int -set_default_block_size(stripe_private_t *priv, char *num) -{ - int ret = -1; - GF_VALIDATE_OR_GOTO("stripe", THIS, out); - GF_VALIDATE_OR_GOTO(THIS->name, priv, out); - GF_VALIDATE_OR_GOTO(THIS->name, num, out); - - if (gf_string2bytesize_uint64(num, &priv->block_size) != 0) { - gf_log(THIS->name, GF_LOG_ERROR, "invalid number format \"%s\"", num); - goto out; - } - - ret = 0; - -out: - return ret; -} - -int -set_stripe_block_size(xlator_t *this, stripe_private_t *priv, char *data) -{ - int ret = -1; - char *tmp_str = NULL; - char *tmp_str1 = NULL; - char *dup_str = NULL; - char *stripe_str = NULL; - char *pattern = NULL; - char *num = NULL; - struct stripe_options *temp_stripeopt = NULL; - struct stripe_options *stripe_opt = NULL; - - if (!this || !priv || !data) - goto out; - - /* Get the pattern for striping. - "option block-size *avi:10MB" etc */ - stripe_str = strtok_r(data, ",", &tmp_str); - while (stripe_str) { - dup_str = gf_strdup(stripe_str); - stripe_opt = GF_CALLOC(1, sizeof(struct stripe_options), - gf_stripe_mt_stripe_options); - if (!stripe_opt) { - goto out; - } - - pattern = strtok_r(dup_str, ":", &tmp_str1); - num = strtok_r(NULL, ":", &tmp_str1); - if (!num) { - num = pattern; - pattern = "*"; - ret = set_default_block_size(priv, num); - if (ret) - goto out; - } - if (gf_string2bytesize_uint64(num, &stripe_opt->block_size) != 0) { - gf_log(this->name, GF_LOG_ERROR, "invalid number format \"%s\"", - num); - goto out; - } - - if (stripe_opt->block_size < STRIPE_MIN_BLOCK_SIZE) { - gf_log(this->name, GF_LOG_ERROR, - "Invalid Block-size: " - "%s. Should be at least %llu bytes", - num, STRIPE_MIN_BLOCK_SIZE); - goto out; - } - if (stripe_opt->block_size % 512) { - gf_log(this->name, GF_LOG_ERROR, - "Block-size: %s should" - " be a multiple of 512 bytes", - num); - goto out; - } - - memcpy(stripe_opt->path_pattern, pattern, strlen(pattern)); - - gf_log(this->name, GF_LOG_DEBUG, - "block-size : pattern %s : size %" PRId64, - stripe_opt->path_pattern, stripe_opt->block_size); - - if (priv->pattern) - temp_stripeopt = NULL; - else - temp_stripeopt = priv->pattern; - - stripe_opt->next = temp_stripeopt; - - priv->pattern = stripe_opt; - stripe_opt = NULL; - - GF_FREE(dup_str); - dup_str = NULL; - - stripe_str = strtok_r(NULL, ",", &tmp_str); - } - - ret = 0; -out: - - GF_FREE(dup_str); - - GF_FREE(stripe_opt); - - return ret; -} - -int32_t -stripe_iatt_merge(struct iatt *from, struct iatt *to) -{ - if (to->ia_size < from->ia_size) - to->ia_size = from->ia_size; - if (to->ia_mtime < from->ia_mtime) - to->ia_mtime = from->ia_mtime; - if (to->ia_ctime < from->ia_ctime) - to->ia_ctime = from->ia_ctime; - if (to->ia_atime < from->ia_atime) - to->ia_atime = from->ia_atime; - return 0; -} - -off_t -coalesced_offset(off_t offset, uint64_t stripe_size, int stripe_count) -{ - size_t line_size = 0; - uint64_t stripe_num = 0; - off_t coalesced_offset = 0; - - line_size = stripe_size * stripe_count; - stripe_num = offset / line_size; - - coalesced_offset = (stripe_num * stripe_size) + (offset % stripe_size); - - return coalesced_offset; -} - -off_t -uncoalesced_size(off_t size, uint64_t stripe_size, int stripe_count, - int stripe_index) -{ - uint64_t nr_full_stripe_chunks = 0, mod = 0; - - if (!size) - return size; - - /* - * Estimate the number of fully written stripes from the - * local file size. Each stripe_size chunk corresponds to - * a stripe. - */ - nr_full_stripe_chunks = (size / stripe_size) * stripe_count; - mod = size % stripe_size; - - if (!mod) { - /* - * There is no remainder, thus we could have overestimated - * the size of the file in terms of chunks. Trim the number - * of chunks by the following stripe members and leave it - * up to those nodes to respond with a larger size (if - * necessary). - */ - nr_full_stripe_chunks -= stripe_count - (stripe_index + 1); - size = nr_full_stripe_chunks * stripe_size; - } else { - /* - * There is a remainder and thus we own the last chunk of the - * file. Add the preceding stripe members of the final stripe - * along with the remainder to calculate the exact size. - */ - nr_full_stripe_chunks += stripe_index; - size = nr_full_stripe_chunks * stripe_size + mod; - } - - return size; -} diff --git a/xlators/cluster/stripe/src/stripe-mem-types.h b/xlators/cluster/stripe/src/stripe-mem-types.h deleted file mode 100644 index 3ca6ecc..0000000 --- a/xlators/cluster/stripe/src/stripe-mem-types.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#ifndef __STRIPE_MEM_TYPES_H__ -#define __STRIPE_MEM_TYPES_H__ - -#include <glusterfs/mem-types.h> - -enum gf_stripe_mem_types_ { - gf_stripe_mt_iovec = gf_common_mt_end + 1, - gf_stripe_mt_stripe_replies, - gf_stripe_mt_stripe_fd_ctx_t, - gf_stripe_mt_char, - gf_stripe_mt_int8_t, - gf_stripe_mt_int32_t, - gf_stripe_mt_xlator_t, - gf_stripe_mt_stripe_private_t, - gf_stripe_mt_stripe_options, - gf_stripe_mt_xattr_sort_t, - gf_stripe_mt_end -}; -#endif diff --git a/xlators/cluster/stripe/src/stripe.c b/xlators/cluster/stripe/src/stripe.c deleted file mode 100644 index 557a818..0000000 --- a/xlators/cluster/stripe/src/stripe.c +++ /dev/null @@ -1,5612 +0,0 @@ -/* - Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -/** - * xlators/cluster/stripe: - * Stripe translator, stripes the data across its child nodes, - * as per the options given in the volfile. The striping works - * fairly simple. It writes files at different offset as per - * calculation. So, 'ls -l' output at the real posix level will - * show file size bigger than the actual size. But when one does - * 'df' or 'du <file>', real size of the file on the server is shown. - * - * WARNING: - * Stripe translator can't regenerate data if a child node gets disconnected. - * So, no 'self-heal' for stripe. Hence the advice, use stripe only when its - * very much necessary, or else, use it in combination with AFR, to have a - * backup copy. - */ -#include <fnmatch.h> -#include "stripe.h" -#include "libxlator.h" -#include <glusterfs/byte-order.h> -#include <glusterfs/statedump.h> - -struct volume_options options[]; - -int32_t -stripe_sh_chown_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *preop, - struct iatt *postop, dict_t *xdata) -{ - int callcnt = -1; - stripe_local_t *local = NULL; - - if (!this || !frame || !frame->local) { - gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } - - local = frame->local; - - LOCK(&frame->lock); - { - callcnt = --local->call_count; - } - UNLOCK(&frame->lock); - - if (!callcnt) { - STRIPE_STACK_DESTROY(frame); - } -out: - return 0; -} - -int32_t -stripe_sh_make_entry_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - stripe_local_t *local = NULL; - call_frame_t *prev = NULL; - - if (!frame || !frame->local || !cookie || !this) { - gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } - - prev = cookie; - local = frame->local; - - STACK_WIND(frame, stripe_sh_chown_cbk, prev->this, - prev->this->fops->setattr, &local->loc, &local->stbuf, - (GF_SET_ATTR_UID | GF_SET_ATTR_GID), NULL); - -out: - return 0; -} - -int32_t -stripe_entry_self_heal(call_frame_t *frame, xlator_t *this, - stripe_local_t *local) -{ - xlator_list_t *trav = NULL; - call_frame_t *rframe = NULL; - stripe_local_t *rlocal = NULL; - stripe_private_t *priv = NULL; - dict_t *xdata = NULL; - int ret = 0; - - if (!local || !this || !frame) { - gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } - - if (!(IA_ISREG(local->stbuf.ia_type) || IA_ISDIR(local->stbuf.ia_type))) - return 0; - - priv = this->private; - trav = this->children; - rframe = copy_frame(frame); - if (!rframe) { - goto out; - } - rlocal = mem_get0(this->local_pool); - if (!rlocal) { - goto out; - } - rframe->local = rlocal; - rlocal->call_count = priv->child_count; - loc_copy(&rlocal->loc, &local->loc); - memcpy(&rlocal->stbuf, &local->stbuf, sizeof(struct iatt)); - - xdata = dict_new(); - if (!xdata) - goto out; - - ret = dict_set_gfuuid(xdata, "gfid-req", local->stbuf.ia_gfid, true); - if (ret) - gf_log(this->name, GF_LOG_WARNING, "%s: failed to set gfid-req", - local->loc.path); - - while (trav) { - if (IA_ISREG(local->stbuf.ia_type)) { - STACK_WIND( - rframe, stripe_sh_make_entry_cbk, trav->xlator, - trav->xlator->fops->mknod, &local->loc, - st_mode_from_ia(local->stbuf.ia_prot, local->stbuf.ia_type), 0, - 0, xdata); - } - if (IA_ISDIR(local->stbuf.ia_type)) { - STACK_WIND( - rframe, stripe_sh_make_entry_cbk, trav->xlator, - trav->xlator->fops->mkdir, &local->loc, - st_mode_from_ia(local->stbuf.ia_prot, local->stbuf.ia_type), 0, - xdata); - } - trav = trav->next; - } - - if (xdata) - dict_unref(xdata); - return 0; - -out: - if (rframe) - STRIPE_STACK_DESTROY(rframe); - - return 0; -} - -int32_t -stripe_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *xdata, struct iatt *postparent) -{ - int32_t callcnt = 0; - stripe_local_t *local = NULL; - call_frame_t *prev = NULL; - int ret = 0; - - if (!this || !frame || !frame->local || !cookie) { - gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } - - prev = cookie; - local = frame->local; - - LOCK(&frame->lock); - { - callcnt = --local->call_count; - - if (op_ret == -1) { - if ((op_errno != ENOENT) && (op_errno != ESTALE)) - gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s", - prev->this->name, strerror(op_errno)); - if (local->op_errno != ESTALE) - local->op_errno = op_errno; - if (((op_errno != ENOENT) && (op_errno != ENOTCONN) && - (op_errno != ESTALE)) || - (prev->this == FIRST_CHILD(this))) - local->failed = 1; - if (op_errno == ENOENT) - local->entry_self_heal_needed = 1; - } - - if (op_ret >= 0) { - local->op_ret = 0; - if (IA_ISREG(buf->ia_type)) { - ret = stripe_ctx_handle(this, prev, local, xdata); - if (ret) - gf_log(this->name, GF_LOG_ERROR, - "Error getting fctx info from" - " dict"); - } - - if (FIRST_CHILD(this) == prev->this) { - local->stbuf = *buf; - local->postparent = *postparent; - local->inode = inode_ref(inode); - if (xdata) - local->xdata = dict_ref(xdata); - if (local->xattr) { - stripe_aggregate_xattr(local->xdata, local->xattr); - dict_unref(local->xattr); - local->xattr = NULL; - } - } - - if (!local->xdata && !local->xattr) { - local->xattr = dict_ref(xdata); - } else if (local->xdata) { - stripe_aggregate_xattr(local->xdata, xdata); - } else if (local->xattr) { - stripe_aggregate_xattr(local->xattr, xdata); - } - - local->stbuf_blocks += buf->ia_blocks; - local->postparent_blocks += postparent->ia_blocks; - - correct_file_size(buf, local->fctx, prev); - - if (local->stbuf_size < buf->ia_size) - local->stbuf_size = buf->ia_size; - if (local->postparent_size < postparent->ia_size) - local->postparent_size = postparent->ia_size; - - if (gf_uuid_is_null(local->ia_gfid)) - gf_uuid_copy(local->ia_gfid, buf->ia_gfid); - - /* Make sure the gfid on all the nodes are same */ - if (gf_uuid_compare(local->ia_gfid, buf->ia_gfid)) { - gf_log(this->name, GF_LOG_WARNING, - "%s: gfid different on subvolume %s", local->loc.path, - prev->this->name); - } - } - } - UNLOCK(&frame->lock); - - if (!callcnt) { - if (local->op_ret == 0 && local->entry_self_heal_needed && - !gf_uuid_is_null(local->loc.inode->gfid)) - stripe_entry_self_heal(frame, this, local); - - if (local->failed) - local->op_ret = -1; - - if (local->op_ret != -1) { - local->stbuf.ia_blocks = local->stbuf_blocks; - local->stbuf.ia_size = local->stbuf_size; - local->postparent.ia_blocks = local->postparent_blocks; - local->postparent.ia_size = local->postparent_size; - inode_ctx_put(local->inode, this, (uint64_t)(long)local->fctx); - } - - STRIPE_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno, - local->inode, &local->stbuf, local->xdata, - &local->postparent); - } -out: - return 0; -} - -int32_t -stripe_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) -{ - stripe_local_t *local = NULL; - xlator_list_t *trav = NULL; - stripe_private_t *priv = NULL; - int32_t op_errno = EINVAL; - int64_t filesize = 0; - int ret = 0; - uint64_t tmpctx = 0; - - VALIDATE_OR_GOTO(frame, err); - VALIDATE_OR_GOTO(this, err); - VALIDATE_OR_GOTO(loc, err); - VALIDATE_OR_GOTO(loc->inode, err); - - priv = this->private; - trav = this->children; - - /* Initialization */ - local = mem_get0(this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - local->op_ret = -1; - frame->local = local; - loc_copy(&local->loc, loc); - - inode_ctx_get(local->inode, this, &tmpctx); - if (tmpctx) - local->fctx = (stripe_fd_ctx_t *)(long)tmpctx; - - /* quick-read friendly changes */ - if (xdata && dict_get(xdata, GF_CONTENT_KEY)) { - ret = dict_get_int64(xdata, GF_CONTENT_KEY, &filesize); - if (!ret && (filesize > priv->block_size)) - dict_del(xdata, GF_CONTENT_KEY); - } - - /* get stripe-size xattr on lookup. This would be required for - * open/read/write/pathinfo calls. Hence we send down the request - * even when type == IA_INVAL */ - - /* - * We aren't guaranteed to have xdata here. We need the format info for - * the file, so allocate xdata if necessary. - */ - if (!xdata) - xdata = dict_new(); - else - xdata = dict_ref(xdata); - - if (xdata && - (IA_ISREG(loc->inode->ia_type) || (loc->inode->ia_type == IA_INVAL))) { - ret = stripe_xattr_request_build(this, xdata, 8, 4, 4, 0); - if (ret) - gf_log(this->name, GF_LOG_ERROR, - "Failed to build" - " xattr request for %s", - loc->path); - } - - /* Every time in stripe lookup, all child nodes - should be looked up */ - local->call_count = priv->child_count; - while (trav) { - STACK_WIND(frame, stripe_lookup_cbk, trav->xlator, - trav->xlator->fops->lookup, loc, xdata); - trav = trav->next; - } - - dict_unref(xdata); - - return 0; -err: - STRIPE_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL); - return 0; -} - -int32_t -stripe_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, - dict_t *xdata) -{ - int32_t callcnt = 0; - stripe_local_t *local = NULL; - call_frame_t *prev = NULL; - - if (!this || !frame || !frame->local || !cookie) { - gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } - prev = cookie; - local = frame->local; - - LOCK(&frame->lock); - { - callcnt = --local->call_count; - - if (op_ret == -1) { - gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s", - prev->this->name, strerror(op_errno)); - local->op_errno = op_errno; - if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this))) - local->failed = 1; - } - - if (op_ret == 0) { - local->op_ret = 0; - - if (FIRST_CHILD(this) == prev->this) { - local->stbuf = *buf; - } - - local->stbuf_blocks += buf->ia_blocks; - - correct_file_size(buf, local->fctx, prev); - - if (local->stbuf_size < buf->ia_size) - local->stbuf_size = buf->ia_size; - } - } - UNLOCK(&frame->lock); - - if (!callcnt) { - if (local->failed) - local->op_ret = -1; - - if (local->op_ret != -1) { - local->stbuf.ia_size = local->stbuf_size; - local->stbuf.ia_blocks = local->stbuf_blocks; - } - - STRIPE_STACK_UNWIND(stat, frame, local->op_ret, local->op_errno, - &local->stbuf, NULL); - } -out: - return 0; -} - -int32_t -stripe_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) -{ - xlator_list_t *trav = NULL; - stripe_local_t *local = NULL; - stripe_private_t *priv = NULL; - stripe_fd_ctx_t *fctx = NULL; - int32_t op_errno = EINVAL; - - VALIDATE_OR_GOTO(frame, err); - VALIDATE_OR_GOTO(this, err); - VALIDATE_OR_GOTO(loc, err); - VALIDATE_OR_GOTO(loc->path, err); - VALIDATE_OR_GOTO(loc->inode, err); - - priv = this->private; - trav = this->children; - - if (priv->first_child_down) { - op_errno = ENOTCONN; - goto err; - } - - /* Initialization */ - local = mem_get0(this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - local->op_ret = -1; - frame->local = local; - local->call_count = priv->child_count; - - if (IA_ISREG(loc->inode->ia_type)) { - inode_ctx_get(loc->inode, this, (uint64_t *)&fctx); - if (!fctx) - goto err; - local->fctx = fctx; - } - - while (trav) { - STACK_WIND(frame, stripe_stat_cbk, trav->xlator, - trav->xlator->fops->stat, loc, NULL); - trav = trav->next; - } - - return 0; - -err: - STRIPE_STACK_UNWIND(stat, frame, -1, op_errno, NULL, NULL); - return 0; -} - -int32_t -stripe_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct statvfs *stbuf, - dict_t *xdata) -{ - stripe_local_t *local = NULL; - int32_t callcnt = 0; - - if (!this || !frame || !frame->local) { - gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } - local = frame->local; - - LOCK(&frame->lock); - { - callcnt = --local->call_count; - - if (op_ret && (op_errno != ENOTCONN)) { - local->op_errno = op_errno; - } - if (op_ret == 0) { - struct statvfs *dict_buf = &local->statvfs_buf; - dict_buf->f_bsize = stbuf->f_bsize; - dict_buf->f_frsize = stbuf->f_frsize; - dict_buf->f_blocks += stbuf->f_blocks; - dict_buf->f_bfree += stbuf->f_bfree; - dict_buf->f_bavail += stbuf->f_bavail; - dict_buf->f_files += stbuf->f_files; - dict_buf->f_ffree += stbuf->f_ffree; - dict_buf->f_favail += stbuf->f_favail; - dict_buf->f_fsid = stbuf->f_fsid; - dict_buf->f_flag = stbuf->f_flag; - dict_buf->f_namemax = stbuf->f_namemax; - local->op_ret = 0; - } - } - UNLOCK(&frame->lock); - - if (!callcnt) { - STRIPE_STACK_UNWIND(statfs, frame, local->op_ret, local->op_errno, - &local->statvfs_buf, NULL); - } -out: - return 0; -} - -int32_t -stripe_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) -{ - stripe_local_t *local = NULL; - xlator_list_t *trav = NULL; - stripe_private_t *priv = NULL; - int32_t op_errno = EINVAL; - - VALIDATE_OR_GOTO(frame, err); - VALIDATE_OR_GOTO(this, err); - VALIDATE_OR_GOTO(loc, err); - - trav = this->children; - priv = this->private; - - /* Initialization */ - local = mem_get0(this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - local->op_ret = -1; - local->op_errno = ENOTCONN; - frame->local = local; - - local->call_count = priv->child_count; - while (trav) { - STACK_WIND(frame, stripe_statfs_cbk, trav->xlator, - trav->xlator->fops->statfs, loc, NULL); - trav = trav->next; - } - - return 0; -err: - STRIPE_STACK_UNWIND(statfs, frame, -1, op_errno, NULL, NULL); - return 0; -} - -int32_t -stripe_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) -{ - int32_t callcnt = 0; - stripe_local_t *local = NULL; - call_frame_t *prev = NULL; - - if (!this || !frame || !frame->local || !cookie) { - gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } - - prev = cookie; - local = frame->local; - - LOCK(&frame->lock); - { - callcnt = --local->call_count; - - if (op_ret == -1) { - gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s", - prev->this->name, strerror(op_errno)); - local->op_errno = op_errno; - if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this))) - local->failed = 1; - } - - if (op_ret == 0) { - local->op_ret = 0; - if (FIRST_CHILD(this) == prev->this) { - local->pre_buf = *prebuf; - local->post_buf = *postbuf; - } - - local->prebuf_blocks += prebuf->ia_blocks; - local->postbuf_blocks += postbuf->ia_blocks; - - correct_file_size(prebuf, local->fctx, prev); - correct_file_size(postbuf, local->fctx, prev); - - if (local->prebuf_size < prebuf->ia_size) - local->prebuf_size = prebuf->ia_size; - - if (local->postbuf_size < postbuf->ia_size) - local->postbuf_size = postbuf->ia_size; - } - } - UNLOCK(&frame->lock); - - if (!callcnt) { - if (local->failed) - local->op_ret = -1; - - if (local->op_ret != -1) { - local->pre_buf.ia_blocks = local->prebuf_blocks; - local->pre_buf.ia_size = local->prebuf_size; - local->post_buf.ia_blocks = local->postbuf_blocks; - local->post_buf.ia_size = local->postbuf_size; - } - - STRIPE_STACK_UNWIND(truncate, frame, local->op_ret, local->op_errno, - &local->pre_buf, &local->post_buf, NULL); - } -out: - return 0; -} - -int32_t -stripe_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, - dict_t *xdata) -{ - stripe_local_t *local = NULL; - stripe_private_t *priv = NULL; - stripe_fd_ctx_t *fctx = NULL; - int32_t op_errno = EINVAL; - int i, eof_idx; - off_t dest_offset, tmp_offset; - - VALIDATE_OR_GOTO(frame, err); - VALIDATE_OR_GOTO(this, err); - VALIDATE_OR_GOTO(loc, err); - VALIDATE_OR_GOTO(loc->path, err); - VALIDATE_OR_GOTO(loc->inode, err); - - priv = this->private; - - if (priv->first_child_down) { - op_errno = ENOTCONN; - goto err; - } - - /* Initialization */ - local = mem_get0(this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - local->op_ret = -1; - frame->local = local; - local->call_count = priv->child_count; - - inode_ctx_get(loc->inode, this, (uint64_t *)&fctx); - if (!fctx) { - gf_log(this->name, GF_LOG_ERROR, "no stripe context"); - op_errno = EINVAL; - goto err; - } - - local->fctx = fctx; - eof_idx = (offset / fctx->stripe_size) % fctx->stripe_count; - - for (i = 0; i < fctx->stripe_count; i++) { - if (!fctx->xl_array[i]) { - gf_log(this->name, GF_LOG_ERROR, "no xlator at index %d", i); - op_errno = EINVAL; - goto err; - } - - if (fctx->stripe_coalesce) { - /* - * The node that owns EOF is truncated to the exact - * coalesced offset. Nodes prior to this index should - * be rounded up to the size of the complete stripe, - * while nodes after this index should be rounded down - * to the size of the previous stripe. - */ - if (i < eof_idx) - tmp_offset = gf_roof(offset, - fctx->stripe_size * fctx->stripe_count); - else if (i > eof_idx) - tmp_offset = gf_floor(offset, - fctx->stripe_size * fctx->stripe_count); - else - tmp_offset = offset; - - dest_offset = coalesced_offset(tmp_offset, fctx->stripe_size, - fctx->stripe_count); - } else { - dest_offset = offset; - } - - STACK_WIND(frame, stripe_truncate_cbk, fctx->xl_array[i], - fctx->xl_array[i]->fops->truncate, loc, dest_offset, NULL); - } - - return 0; -err: - STRIPE_STACK_UNWIND(truncate, frame, -1, op_errno, NULL, NULL, NULL); - return 0; -} - -int32_t -stripe_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *preop, - struct iatt *postop, dict_t *xdata) -{ - int32_t callcnt = 0; - stripe_local_t *local = NULL; - call_frame_t *prev = NULL; - - if (!this || !frame || !frame->local || !cookie) { - gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } - - prev = cookie; - local = frame->local; - - LOCK(&frame->lock); - { - callcnt = --local->call_count; - - if (op_ret == -1) { - gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s", - prev->this->name, strerror(op_errno)); - local->op_errno = op_errno; - if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this))) - local->failed = 1; - } - - if (op_ret == 0) { - local->op_ret = 0; - - if (FIRST_CHILD(this) == prev->this) { - local->pre_buf = *preop; - local->post_buf = *postop; - } - - local->prebuf_blocks += preop->ia_blocks; - local->postbuf_blocks += postop->ia_blocks; - - correct_file_size(preop, local->fctx, prev); - correct_file_size(postop, local->fctx, prev); - - if (local->prebuf_size < preop->ia_size) - local->prebuf_size = preop->ia_size; - if (local->postbuf_size < postop->ia_size) - local->postbuf_size = postop->ia_size; - } - } - UNLOCK(&frame->lock); - - if (!callcnt) { - if (local->failed) - local->op_ret = -1; - - if (local->op_ret != -1) { - local->pre_buf.ia_blocks = local->prebuf_blocks; - local->pre_buf.ia_size = local->prebuf_size; - local->post_buf.ia_blocks = local->postbuf_blocks; - local->post_buf.ia_size = local->postbuf_size; - } - - STRIPE_STACK_UNWIND(setattr, frame, local->op_ret, local->op_errno, - &local->pre_buf, &local->post_buf, NULL); - } -out: - return 0; -} - -int32_t -stripe_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, - struct iatt *stbuf, int32_t valid, dict_t *xdata) -{ - xlator_list_t *trav = NULL; - stripe_local_t *local = NULL; - stripe_private_t *priv = NULL; - stripe_fd_ctx_t *fctx = NULL; - int32_t op_errno = EINVAL; - - VALIDATE_OR_GOTO(frame, err); - VALIDATE_OR_GOTO(this, err); - VALIDATE_OR_GOTO(loc, err); - VALIDATE_OR_GOTO(loc->path, err); - VALIDATE_OR_GOTO(loc->inode, err); - - priv = this->private; - trav = this->children; - - if (priv->first_child_down) { - op_errno = ENOTCONN; - goto err; - } - - /* Initialization */ - local = mem_get0(this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - local->op_ret = -1; - frame->local = local; - if (!IA_ISDIR(loc->inode->ia_type) && !IA_ISREG(loc->inode->ia_type)) { - local->call_count = 1; - STACK_WIND(frame, stripe_setattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, NULL); - return 0; - } - - if (IA_ISREG(loc->inode->ia_type)) { - inode_ctx_get(loc->inode, this, (uint64_t *)&fctx); - if (!fctx) - goto err; - local->fctx = fctx; - } - - local->call_count = priv->child_count; - while (trav) { - STACK_WIND(frame, stripe_setattr_cbk, trav->xlator, - trav->xlator->fops->setattr, loc, stbuf, valid, NULL); - trav = trav->next; - } - - return 0; -err: - STRIPE_STACK_UNWIND(setattr, frame, -1, op_errno, NULL, NULL, NULL); - return 0; -} - -int32_t -stripe_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iatt *stbuf, int32_t valid, dict_t *xdata) -{ - stripe_local_t *local = NULL; - stripe_private_t *priv = NULL; - xlator_list_t *trav = NULL; - int32_t op_errno = EINVAL; - - VALIDATE_OR_GOTO(frame, err); - VALIDATE_OR_GOTO(this, err); - VALIDATE_OR_GOTO(fd, err); - VALIDATE_OR_GOTO(fd->inode, err); - - priv = this->private; - trav = this->children; - - /* Initialization */ - local = mem_get0(this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - local->op_ret = -1; - frame->local = local; - local->call_count = priv->child_count; - - while (trav) { - STACK_WIND(frame, stripe_setattr_cbk, trav->xlator, - trav->xlator->fops->fsetattr, fd, stbuf, valid, NULL); - trav = trav->next; - } - - return 0; -err: - STRIPE_STACK_UNWIND(fsetattr, frame, -1, op_errno, NULL, NULL, NULL); - return 0; -} - -int32_t -stripe_stack_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, - struct iatt *preoldparent, struct iatt *postoldparent, - struct iatt *prenewparent, struct iatt *postnewparent, - dict_t *xdata) -{ - int32_t callcnt = 0; - stripe_local_t *local = NULL; - call_frame_t *prev = NULL; - - if (!this || !frame || !frame->local || !cookie) { - gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } - - prev = cookie; - local = frame->local; - - LOCK(&frame->lock); - { - callcnt = --local->call_count; - - if (op_ret == -1) { - gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s", - prev->this->name, strerror(op_errno)); - local->op_errno = op_errno; - if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this))) - local->failed = 1; - } - - if (op_ret == 0) { - local->op_ret = 0; - - local->stbuf.ia_blocks += buf->ia_blocks; - local->preparent.ia_blocks += preoldparent->ia_blocks; - local->postparent.ia_blocks += postoldparent->ia_blocks; - local->pre_buf.ia_blocks += prenewparent->ia_blocks; - local->post_buf.ia_blocks += postnewparent->ia_blocks; - - correct_file_size(buf, local->fctx, prev); - - if (local->stbuf.ia_size < buf->ia_size) - local->stbuf.ia_size = buf->ia_size; - - if (local->preparent.ia_size < preoldparent->ia_size) - local->preparent.ia_size = preoldparent->ia_size; - - if (local->postparent.ia_size < postoldparent->ia_size) - local->postparent.ia_size = postoldparent->ia_size; - - if (local->pre_buf.ia_size < prenewparent->ia_size) - local->pre_buf.ia_size = prenewparent->ia_size; - - if (local->post_buf.ia_size < postnewparent->ia_size) - local->post_buf.ia_size = postnewparent->ia_size; - } - } - UNLOCK(&frame->lock); - - if (!callcnt) { - if (local->failed) - local->op_ret = -1; - - STRIPE_STACK_UNWIND(rename, frame, local->op_ret, local->op_errno, - &local->stbuf, &local->preparent, - &local->postparent, &local->pre_buf, - &local->post_buf, NULL); - } -out: - return 0; -} - -int32_t -stripe_first_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, - struct iatt *preoldparent, struct iatt *postoldparent, - struct iatt *prenewparent, struct iatt *postnewparent, - dict_t *xdata) -{ - stripe_local_t *local = NULL; - xlator_list_t *trav = NULL; - - if (!this || !frame || !frame->local) { - gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); - op_errno = EINVAL; - goto unwind; - } - - if (op_ret == -1) { - goto unwind; - } - - local = frame->local; - trav = this->children; - - local->stbuf = *buf; - local->preparent = *preoldparent; - local->postparent = *postoldparent; - local->pre_buf = *prenewparent; - local->post_buf = *postnewparent; - - local->op_ret = 0; - local->call_count--; - - trav = trav->next; /* Skip first child */ - while (trav) { - STACK_WIND(frame, stripe_stack_rename_cbk, trav->xlator, - trav->xlator->fops->rename, &local->loc, &local->loc2, NULL); - trav = trav->next; - } - return 0; - -unwind: - STRIPE_STACK_UNWIND(rename, frame, -1, op_errno, buf, preoldparent, - postoldparent, prenewparent, postnewparent, NULL); - return 0; -} - -int32_t -stripe_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, - dict_t *xdata) -{ - stripe_private_t *priv = NULL; - stripe_local_t *local = NULL; - xlator_list_t *trav = NULL; - stripe_fd_ctx_t *fctx = NULL; - int32_t op_errno = EINVAL; - - VALIDATE_OR_GOTO(frame, err); - VALIDATE_OR_GOTO(this, err); - VALIDATE_OR_GOTO(oldloc, err); - VALIDATE_OR_GOTO(oldloc->path, err); - VALIDATE_OR_GOTO(oldloc->inode, err); - VALIDATE_OR_GOTO(newloc, err); - - priv = this->private; - trav = this->children; - - /* If any one node is down, don't allow rename */ - if (priv->nodes_down) { - op_errno = ENOTCONN; - goto err; - } - - /* Initialization */ - local = mem_get0(this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - - frame->local = local; - - local->op_ret = -1; - loc_copy(&local->loc, oldloc); - loc_copy(&local->loc2, newloc); - - local->call_count = priv->child_count; - - if (IA_ISREG(oldloc->inode->ia_type)) { - inode_ctx_get(oldloc->inode, this, (uint64_t *)&fctx); - if (!fctx) - goto err; - local->fctx = fctx; - } - - STACK_WIND(frame, stripe_first_rename_cbk, trav->xlator, - trav->xlator->fops->rename, oldloc, newloc, NULL); - - return 0; -err: - STRIPE_STACK_UNWIND(rename, frame, -1, op_errno, NULL, NULL, NULL, NULL, - NULL, NULL); - return 0; -} -int32_t -stripe_first_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *preparent, struct iatt *postparent, - dict_t *xdata) -{ - stripe_local_t *local = NULL; - call_frame_t *prev = NULL; - - if (!this || !frame || !frame->local || !cookie) { - gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } - - prev = cookie; - local = frame->local; - - if (op_ret == -1) { - gf_log(this->name, GF_LOG_DEBUG, "%s returned %s", prev->this->name, - strerror(op_errno)); - goto out; - } - local->op_ret = 0; - local->preparent = *preparent; - local->postparent = *postparent; - local->preparent_blocks += preparent->ia_blocks; - local->postparent_blocks += postparent->ia_blocks; - - STRIPE_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno, - &local->preparent, &local->postparent, xdata); - return 0; -out: - STRIPE_STACK_UNWIND(unlink, frame, -1, op_errno, NULL, NULL, NULL); - - return 0; -} - -int32_t -stripe_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - int32_t callcnt = 0; - stripe_local_t *local = NULL; - call_frame_t *prev = NULL; - - if (!this || !frame || !frame->local || !cookie) { - gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } - - prev = cookie; - local = frame->local; - - LOCK(&frame->lock); - { - callcnt = --local->call_count; - - if (op_ret == -1) { - gf_log(this->name, GF_LOG_DEBUG, "%s returned %s", prev->this->name, - strerror(op_errno)); - local->op_errno = op_errno; - if (op_errno != ENOENT) { - local->failed = 1; - local->op_ret = op_ret; - } - } - } - UNLOCK(&frame->lock); - - if (callcnt == 1) { - if (local->failed) { - op_errno = local->op_errno; - goto out; - } - STACK_WIND(frame, stripe_first_unlink_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, &local->loc, local->xflag, - local->xdata); - } - return 0; -out: - STRIPE_STACK_UNWIND(unlink, frame, -1, op_errno, NULL, NULL, NULL); - - return 0; -} - -int32_t -stripe_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, - dict_t *xdata) -{ - xlator_list_t *trav = NULL; - stripe_local_t *local = NULL; - stripe_private_t *priv = NULL; - int32_t op_errno = EINVAL; - - VALIDATE_OR_GOTO(frame, err); - VALIDATE_OR_GOTO(this, err); - VALIDATE_OR_GOTO(loc, err); - VALIDATE_OR_GOTO(loc->path, err); - VALIDATE_OR_GOTO(loc->inode, err); - - priv = this->private; - trav = this->children; - - if (priv->first_child_down) { - op_errno = ENOTCONN; - goto err; - } - - /* Don't unlink a file if a node is down */ - if (priv->nodes_down) { - op_errno = ENOTCONN; - goto err; - } - - /* Initialization */ - local = mem_get0(this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - local->op_ret = -1; - loc_copy(&local->loc, loc); - local->xflag = xflag; - - if (xdata) - local->xdata = dict_ref(xdata); - - frame->local = local; - local->call_count = priv->child_count; - trav = trav->next; /* Skip the first child */ - - while (trav) { - STACK_WIND(frame, stripe_unlink_cbk, trav->xlator, - trav->xlator->fops->unlink, loc, xflag, xdata); - trav = trav->next; - } - - return 0; -err: - STRIPE_STACK_UNWIND(unlink, frame, -1, op_errno, NULL, NULL, NULL); - return 0; -} - -int32_t -stripe_first_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - stripe_local_t *local = NULL; - - if (!this || !frame || !frame->local) { - gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); - op_errno = EINVAL; - goto err; - } - - if (op_ret == -1) { - goto err; - } - - local = frame->local; - local->op_ret = 0; - - local->call_count--; /* First child successful */ - - local->preparent = *preparent; - local->postparent = *postparent; - local->preparent_size = preparent->ia_size; - local->postparent_size = postparent->ia_size; - local->preparent_blocks += preparent->ia_blocks; - local->postparent_blocks += postparent->ia_blocks; - - STRIPE_STACK_UNWIND(rmdir, frame, local->op_ret, local->op_errno, - &local->preparent, &local->postparent, xdata); - return 0; -err: - STRIPE_STACK_UNWIND(rmdir, frame, op_ret, op_errno, NULL, NULL, NULL); - return 0; -} - -int32_t -stripe_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - int32_t callcnt = 0; - stripe_local_t *local = NULL; - call_frame_t *prev = NULL; - - if (!this || !frame || !frame->local || !cookie) { - gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } - - prev = cookie; - local = frame->local; - - LOCK(&frame->lock); - { - callcnt = --local->call_count; - - if (op_ret == -1) { - gf_log(this->name, GF_LOG_DEBUG, "%s returned %s", prev->this->name, - strerror(op_errno)); - if (op_errno != ENOENT) - local->failed = 1; - } - } - UNLOCK(&frame->lock); - - if (callcnt == 1) { - if (local->failed) - goto out; - STACK_WIND(frame, stripe_first_rmdir_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rmdir, &local->loc, local->flags, - NULL); - } - return 0; -out: - STRIPE_STACK_UNWIND(rmdir, frame, -1, op_errno, NULL, NULL, NULL); - return 0; -} - -int32_t -stripe_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, - dict_t *xdata) -{ - xlator_list_t *trav = NULL; - stripe_local_t *local = NULL; - stripe_private_t *priv = NULL; - int32_t op_errno = EINVAL; - - VALIDATE_OR_GOTO(frame, err); - VALIDATE_OR_GOTO(this, err); - VALIDATE_OR_GOTO(loc, err); - VALIDATE_OR_GOTO(loc->path, err); - VALIDATE_OR_GOTO(loc->inode, err); - - priv = this->private; - trav = this->children; - - /* don't delete a directory if any of the subvolume is down */ - if (priv->nodes_down) { - op_errno = ENOTCONN; - goto err; - } - - /* Initialization */ - local = mem_get0(this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - local->op_ret = -1; - frame->local = local; - loc_copy(&local->loc, loc); - local->flags = flags; - local->call_count = priv->child_count; - trav = trav->next; /* skip the first child */ - - while (trav) { - STACK_WIND(frame, stripe_rmdir_cbk, trav->xlator, - trav->xlator->fops->rmdir, loc, flags, NULL); - trav = trav->next; - } - - return 0; -err: - STRIPE_STACK_UNWIND(rmdir, frame, -1, op_errno, NULL, NULL, NULL); - return 0; -} - -int32_t -stripe_mknod_ifreg_fail_unlink_cbk(call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, - int32_t op_errno, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - int32_t callcnt = 0; - stripe_local_t *local = NULL; - - if (!this || !frame || !frame->local) { - gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } - - local = frame->local; - - LOCK(&frame->lock); - { - callcnt = --local->call_count; - } - UNLOCK(&frame->lock); - - if (!callcnt) { - STRIPE_STACK_UNWIND(mknod, frame, local->op_ret, local->op_errno, - local->inode, &local->stbuf, &local->preparent, - &local->postparent, NULL); - } -out: - return 0; -} - -/** - */ -int32_t -stripe_mknod_ifreg_setxattr_cbk(call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, - int32_t op_errno, dict_t *xdata) -{ - int32_t callcnt = 0; - stripe_local_t *local = NULL; - stripe_private_t *priv = NULL; - xlator_list_t *trav = NULL; - call_frame_t *prev = NULL; - - if (!this || !frame || !frame->local || !cookie) { - gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } - - prev = cookie; - priv = this->private; - local = frame->local; - - LOCK(&frame->lock); - { - callcnt = --local->call_count; - - if (op_ret == -1) { - gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s", - prev->this->name, strerror(op_errno)); - local->op_ret = -1; - local->op_errno = op_errno; - } - } - UNLOCK(&frame->lock); - - if (!callcnt) { - if (local->op_ret == -1) { - local->call_count = priv->child_count; - while (trav) { - STACK_WIND(frame, stripe_mknod_ifreg_fail_unlink_cbk, - trav->xlator, trav->xlator->fops->unlink, - &local->loc, 0, NULL); - trav = trav->next; - } - return 0; - } - - STRIPE_STACK_UNWIND(mknod, frame, local->op_ret, local->op_errno, - local->inode, &local->stbuf, &local->preparent, - &local->postparent, NULL); - } -out: - return 0; -} - -int32_t -stripe_mknod_ifreg_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - int32_t callcnt = 0; - stripe_local_t *local = NULL; - stripe_private_t *priv = NULL; - call_frame_t *prev = NULL; - xlator_list_t *trav = NULL; - - if (!this || !frame || !frame->local || !cookie) { - gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } - - prev = cookie; - priv = this->private; - local = frame->local; - - LOCK(&frame->lock); - { - callcnt = --local->call_count; - - if (op_ret == -1) { - gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s", - prev->this->name, strerror(op_errno)); - if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this))) - local->failed = 1; - local->op_errno = op_errno; - } - if (op_ret >= 0) { - local->op_ret = op_ret; - - /* Can be used as a mechanism to understand if mknod - was successful in at least one place */ - if (gf_uuid_is_null(local->ia_gfid)) - gf_uuid_copy(local->ia_gfid, buf->ia_gfid); - - if (stripe_ctx_handle(this, prev, local, xdata)) - gf_log(this->name, GF_LOG_ERROR, - "Error getting fctx info from dict"); - - local->stbuf_blocks += buf->ia_blocks; - local->preparent_blocks += preparent->ia_blocks; - local->postparent_blocks += postparent->ia_blocks; - - correct_file_size(buf, local->fctx, prev); - - if (local->stbuf_size < buf->ia_size) - local->stbuf_size = buf->ia_size; - if (local->preparent_size < preparent->ia_size) - local->preparent_size = preparent->ia_size; - if (local->postparent_size < postparent->ia_size) - local->postparent_size = postparent->ia_size; - } - } - UNLOCK(&frame->lock); - - if (!callcnt) { - if (local->failed) - local->op_ret = -1; - - if ((local->op_ret == -1) && !gf_uuid_is_null(local->ia_gfid)) { - /* ia_gfid set means, at least on one node 'mknod' - is successful */ - local->call_count = priv->child_count; - trav = this->children; - while (trav) { - STACK_WIND(frame, stripe_mknod_ifreg_fail_unlink_cbk, - trav->xlator, trav->xlator->fops->unlink, - &local->loc, 0, NULL); - trav = trav->next; - } - return 0; - } - - if (local->op_ret != -1) { - local->preparent.ia_blocks = local->preparent_blocks; - local->preparent.ia_size = local->preparent_size; - local->postparent.ia_blocks = local->postparent_blocks; - local->postparent.ia_size = local->postparent_size; - local->stbuf.ia_size = local->stbuf_size; - local->stbuf.ia_blocks = local->stbuf_blocks; - inode_ctx_put(local->inode, this, (uint64_t)(long)local->fctx); - } - STRIPE_STACK_UNWIND(mknod, frame, local->op_ret, local->op_errno, - local->inode, &local->stbuf, &local->preparent, - &local->postparent, NULL); - } -out: - return 0; -} - -int32_t -stripe_mknod_first_ifreg_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - stripe_local_t *local = NULL; - stripe_private_t *priv = NULL; - call_frame_t *prev = NULL; - xlator_list_t *trav = NULL; - int i = 1; - dict_t *dict = NULL; - int ret = 0; - int need_unref = 0; - - if (!this || !frame || !frame->local || !cookie) { - gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } - - prev = cookie; - priv = this->private; - local = frame->local; - trav = this->children; - - local->call_count--; - - if (op_ret == -1) { - gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s", - prev->this->name, strerror(op_errno)); - local->failed = 1; - local->op_errno = op_errno; - goto out; - } - - local->op_ret = op_ret; - - local->stbuf = *buf; - local->preparent = *preparent; - local->postparent = *postparent; - - if (gf_uuid_is_null(local->ia_gfid)) - gf_uuid_copy(local->ia_gfid, buf->ia_gfid); - local->preparent.ia_blocks = local->preparent_blocks; - local->preparent.ia_size = local->preparent_size; - local->postparent.ia_blocks = local->postparent_blocks; - local->postparent.ia_size = local->postparent_size; - local->stbuf.ia_size = local->stbuf_size; - local->stbuf.ia_blocks = local->stbuf_blocks; - - trav = trav->next; - while (trav) { - if (priv->xattr_supported) { - dict = dict_new(); - if (!dict) { - gf_log(this->name, GF_LOG_ERROR, "failed to allocate dict %s", - local->loc.path); - } - need_unref = 1; - - dict_copy(local->xattr, dict); - - ret = stripe_xattr_request_build(this, dict, local->stripe_size, - priv->child_count, i, - priv->coalesce); - if (ret) - gf_log(this->name, GF_LOG_ERROR, - "Failed to build xattr request"); - - } else { - dict = local->xattr; - } - - STACK_WIND(frame, stripe_mknod_ifreg_cbk, trav->xlator, - trav->xlator->fops->mknod, &local->loc, local->mode, - local->rdev, 0, dict); - trav = trav->next; - i++; - - if (dict && need_unref) - dict_unref(dict); - } - - return 0; - -out: - - STRIPE_STACK_UNWIND(mknod, frame, op_ret, op_errno, NULL, NULL, NULL, NULL, - NULL); - return 0; -} - -int32_t -stripe_single_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - STRIPE_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, buf, preparent, - postparent, xdata); - return 0; -} - -int -stripe_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, - dev_t rdev, mode_t umask, dict_t *xdata) -{ - stripe_private_t *priv = NULL; - stripe_local_t *local = NULL; - int32_t op_errno = EINVAL; - int32_t i = 0; - dict_t *dict = NULL; - int ret = 0; - int need_unref = 0; - - VALIDATE_OR_GOTO(frame, err); - VALIDATE_OR_GOTO(this, err); - VALIDATE_OR_GOTO(loc, err); - VALIDATE_OR_GOTO(loc->path, err); - VALIDATE_OR_GOTO(loc->inode, err); - - priv = this->private; - - if (priv->first_child_down) { - op_errno = ENOTCONN; - goto err; - } - - if (S_ISREG(mode)) { - /* NOTE: on older kernels (older than 2.6.9), - creat() fops is sent as mknod() + open(). Hence handling - S_IFREG files is necessary */ - if (priv->nodes_down) { - gf_log(this->name, GF_LOG_WARNING, "Some node down, returning EIO"); - op_errno = EIO; - goto err; - } - - /* Initialization */ - local = mem_get0(this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - local->op_ret = -1; - local->op_errno = ENOTCONN; - local->stripe_size = stripe_get_matching_bs(loc->path, priv); - frame->local = local; - local->inode = inode_ref(loc->inode); - loc_copy(&local->loc, loc); - local->xattr = dict_copy_with_ref(xdata, NULL); - local->mode = mode; - local->umask = umask; - local->rdev = rdev; - - /* Every time in stripe lookup, all child nodes should - be looked up */ - local->call_count = priv->child_count; - - if (priv->xattr_supported) { - dict = dict_new(); - if (!dict) { - gf_log(this->name, GF_LOG_ERROR, "failed to allocate dict %s", - loc->path); - } - need_unref = 1; - - dict_copy(xdata, dict); - - ret = stripe_xattr_request_build(this, dict, local->stripe_size, - priv->child_count, i, - priv->coalesce); - if (ret) - gf_log(this->name, GF_LOG_ERROR, - "failed to build xattr request"); - } else { - dict = xdata; - } - - STACK_WIND(frame, stripe_mknod_first_ifreg_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, - dict); - - if (dict && need_unref) - dict_unref(dict); - return 0; - } - - STACK_WIND(frame, stripe_single_mknod_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata); - - return 0; -err: - STRIPE_STACK_UNWIND(mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL, - NULL); - return 0; -} - -int32_t -stripe_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - int32_t callcnt = 0; - stripe_local_t *local = NULL; - call_frame_t *prev = NULL; - - if (!this || !frame || !frame->local || !cookie) { - gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } - - prev = cookie; - local = frame->local; - - LOCK(&frame->lock); - { - callcnt = --local->call_count; - - if (op_ret == -1) { - gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s", - prev->this->name, strerror(op_errno)); - local->op_errno = op_errno; - if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this))) - local->failed = 1; - } - - if (op_ret >= 0) { - local->op_ret = 0; - - local->stbuf_blocks += buf->ia_blocks; - local->preparent_blocks += preparent->ia_blocks; - local->postparent_blocks += postparent->ia_blocks; - - if (local->stbuf_size < buf->ia_size) - local->stbuf_size = buf->ia_size; - if (local->preparent_size < preparent->ia_size) - local->preparent_size = preparent->ia_size; - if (local->postparent_size < postparent->ia_size) - local->postparent_size = postparent->ia_size; - } - } - UNLOCK(&frame->lock); - - if (!callcnt) { - if (local->failed != -1) { - local->preparent.ia_blocks = local->preparent_blocks; - local->preparent.ia_size = local->preparent_size; - local->postparent.ia_blocks = local->postparent_blocks; - local->postparent.ia_size = local->postparent_size; - local->stbuf.ia_size = local->stbuf_size; - local->stbuf.ia_blocks = local->stbuf_blocks; - } - STRIPE_STACK_UNWIND(mkdir, frame, local->op_ret, local->op_errno, - local->inode, &local->stbuf, &local->preparent, - &local->postparent, NULL); - } -out: - return 0; -} - -int32_t -stripe_first_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - stripe_local_t *local = NULL; - call_frame_t *prev = NULL; - xlator_list_t *trav = NULL; - - if (!this || !frame || !frame->local || !cookie) { - gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } - - prev = cookie; - local = frame->local; - trav = this->children; - - local->call_count--; /* first child is successful */ - trav = trav->next; /* skip first child */ - - if (op_ret == -1) { - gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s", - prev->this->name, strerror(op_errno)); - local->op_errno = op_errno; - goto out; - } - - local->op_ret = 0; - - local->inode = inode_ref(inode); - local->stbuf = *buf; - local->postparent = *postparent; - local->preparent = *preparent; - - local->stbuf_blocks += buf->ia_blocks; - local->preparent_blocks += preparent->ia_blocks; - local->postparent_blocks += postparent->ia_blocks; - - local->stbuf_size = buf->ia_size; - local->preparent_size = preparent->ia_size; - local->postparent_size = postparent->ia_size; - - while (trav) { - STACK_WIND(frame, stripe_mkdir_cbk, trav->xlator, - trav->xlator->fops->mkdir, &local->loc, local->mode, - local->umask, local->xdata); - trav = trav->next; - } - return 0; -out: - STRIPE_STACK_UNWIND(mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL, - NULL); - - return 0; -} - -int -stripe_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, - mode_t umask, dict_t *xdata) -{ - stripe_private_t *priv = NULL; - stripe_local_t *local = NULL; - xlator_list_t *trav = NULL; - int32_t op_errno = 1; - - VALIDATE_OR_GOTO(frame, err); - VALIDATE_OR_GOTO(this, err); - VALIDATE_OR_GOTO(loc, err); - VALIDATE_OR_GOTO(loc->path, err); - VALIDATE_OR_GOTO(loc->inode, err); - - priv = this->private; - trav = this->children; - - if (priv->first_child_down) { - op_errno = ENOTCONN; - goto err; - } - - /* Initialization */ - local = mem_get0(this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - local->op_ret = -1; - local->call_count = priv->child_count; - if (xdata) - local->xdata = dict_ref(xdata); - local->mode = mode; - local->umask = umask; - loc_copy(&local->loc, loc); - frame->local = local; - - /* Every time in stripe lookup, all child nodes should be looked up */ - STACK_WIND(frame, stripe_first_mkdir_cbk, trav->xlator, - trav->xlator->fops->mkdir, loc, mode, umask, xdata); - - return 0; -err: - STRIPE_STACK_UNWIND(mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL, - NULL); - return 0; -} - -int32_t -stripe_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - int32_t callcnt = 0; - stripe_local_t *local = NULL; - call_frame_t *prev = NULL; - stripe_fd_ctx_t *fctx = NULL; - - if (!this || !frame || !frame->local || !cookie) { - gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } - - prev = cookie; - local = frame->local; - - LOCK(&frame->lock); - { - callcnt = --local->call_count; - - if (op_ret == -1) { - gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s", - prev->this->name, strerror(op_errno)); - local->op_errno = op_errno; - if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this))) - local->failed = 1; - } - - if (op_ret >= 0) { - local->op_ret = 0; - - if (IA_ISREG(inode->ia_type)) { - inode_ctx_get(inode, this, (uint64_t *)&fctx); - if (!fctx) { - gf_log(this->name, GF_LOG_ERROR, - "failed to get stripe context"); - op_ret = -1; - op_errno = EINVAL; - } - } - - if (FIRST_CHILD(this) == prev->this) { - local->inode = inode_ref(inode); - local->stbuf = *buf; - local->postparent = *postparent; - local->preparent = *preparent; - } - local->stbuf_blocks += buf->ia_blocks; - local->preparent_blocks += preparent->ia_blocks; - local->postparent_blocks += postparent->ia_blocks; - - correct_file_size(buf, fctx, prev); - - if (local->stbuf_size < buf->ia_size) - local->stbuf_size = buf->ia_size; - if (local->preparent_size < preparent->ia_size) - local->preparent_size = preparent->ia_size; - if (local->postparent_size < postparent->ia_size) - local->postparent_size = postparent->ia_size; - } - } - UNLOCK(&frame->lock); - - if (!callcnt) { - if (local->failed) - local->op_ret = -1; - - if (local->op_ret != -1) { - local->preparent.ia_blocks = local->preparent_blocks; - local->preparent.ia_size = local->preparent_size; - local->postparent.ia_blocks = local->postparent_blocks; - local->postparent.ia_size = local->postparent_size; - local->stbuf.ia_size = local->stbuf_size; - local->stbuf.ia_blocks = local->stbuf_blocks; - } - STRIPE_STACK_UNWIND(link, frame, local->op_ret, local->op_errno, - local->inode, &local->stbuf, &local->preparent, - &local->postparent, NULL); - } -out: - return 0; -} - -int32_t -stripe_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, - dict_t *xdata) -{ - xlator_list_t *trav = NULL; - stripe_local_t *local = NULL; - stripe_private_t *priv = NULL; - int32_t op_errno = 1; - - VALIDATE_OR_GOTO(frame, err); - VALIDATE_OR_GOTO(this, err); - VALIDATE_OR_GOTO(oldloc, err); - VALIDATE_OR_GOTO(oldloc->path, err); - VALIDATE_OR_GOTO(oldloc->inode, err); - - priv = this->private; - trav = this->children; - - /* If any one node is down, don't allow link operation */ - if (priv->nodes_down) { - op_errno = ENOTCONN; - goto err; - } - - /* Initialization */ - local = mem_get0(this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - local->op_ret = -1; - frame->local = local; - local->call_count = priv->child_count; - - /* Every time in stripe lookup, all child - nodes should be looked up */ - while (trav) { - STACK_WIND(frame, stripe_link_cbk, trav->xlator, - trav->xlator->fops->link, oldloc, newloc, NULL); - trav = trav->next; - } - - return 0; -err: - STRIPE_STACK_UNWIND(link, frame, -1, op_errno, NULL, NULL, NULL, NULL, - NULL); - return 0; -} - -int32_t -stripe_create_fail_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *preparent, struct iatt *postparent, - dict_t *xdata) -{ - int32_t callcnt = 0; - stripe_local_t *local = NULL; - - if (!this || !frame || !frame->local) { - gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } - - local = frame->local; - - LOCK(&frame->lock); - { - callcnt = --local->call_count; - } - UNLOCK(&frame->lock); - - if (!callcnt) { - STRIPE_STACK_UNWIND(create, frame, local->op_ret, local->op_errno, - local->fd, local->inode, &local->stbuf, - &local->preparent, &local->postparent, NULL); - } -out: - return 0; -} - -int32_t -stripe_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - int32_t callcnt = 0; - stripe_local_t *local = NULL; - stripe_private_t *priv = NULL; - call_frame_t *prev = NULL; - xlator_list_t *trav = NULL; - - if (!this || !frame || !frame->local || !cookie) { - gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } - - prev = cookie; - priv = this->private; - local = frame->local; - - LOCK(&frame->lock); - { - callcnt = --local->call_count; - - if (op_ret == -1) { - gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s", - prev->this->name, strerror(op_errno)); - local->failed = 1; - local->op_errno = op_errno; - } - - if (op_ret >= 0) { - if (IA_ISREG(buf->ia_type)) { - if (stripe_ctx_handle(this, prev, local, xdata)) - gf_log(this->name, GF_LOG_ERROR, - "Error getting fctx info from " - "dict"); - } - - local->op_ret = op_ret; - - local->stbuf_blocks += buf->ia_blocks; - local->preparent_blocks += preparent->ia_blocks; - local->postparent_blocks += postparent->ia_blocks; - - correct_file_size(buf, local->fctx, prev); - - if (local->stbuf_size < buf->ia_size) - local->stbuf_size = buf->ia_size; - if (local->preparent_size < preparent->ia_size) - local->preparent_size = preparent->ia_size; - if (local->postparent_size < postparent->ia_size) - local->postparent_size = postparent->ia_size; - } - } - UNLOCK(&frame->lock); - - if (!callcnt) { - if (local->failed) - local->op_ret = -1; - - if (local->op_ret == -1) { - local->call_count = priv->child_count; - trav = this->children; - while (trav) { - STACK_WIND(frame, stripe_create_fail_unlink_cbk, trav->xlator, - trav->xlator->fops->unlink, &local->loc, 0, NULL); - trav = trav->next; - } - - return 0; - } - - if (local->op_ret >= 0) { - local->preparent.ia_blocks = local->preparent_blocks; - local->preparent.ia_size = local->preparent_size; - local->postparent.ia_blocks = local->postparent_blocks; - local->postparent.ia_size = local->postparent_size; - local->stbuf.ia_size = local->stbuf_size; - local->stbuf.ia_blocks = local->stbuf_blocks; - - stripe_copy_xl_array(local->fctx->xl_array, priv->xl_array, - local->fctx->stripe_count); - inode_ctx_put(local->inode, this, (uint64_t)(uintptr_t)local->fctx); - } - - /* Create itself has failed.. so return - without setxattring */ - STRIPE_STACK_UNWIND(create, frame, local->op_ret, local->op_errno, - local->fd, local->inode, &local->stbuf, - &local->preparent, &local->postparent, NULL); - } - -out: - return 0; -} - -int32_t -stripe_first_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd, - inode_t *inode, struct iatt *buf, - struct iatt *preparent, struct iatt *postparent, - dict_t *xdata) -{ - stripe_local_t *local = NULL; - stripe_private_t *priv = NULL; - call_frame_t *prev = NULL; - xlator_list_t *trav = NULL; - int i = 1; - dict_t *dict = NULL; - loc_t *loc = NULL; - int32_t need_unref = 0; - int32_t ret = -1; - - if (!this || !frame || !frame->local || !cookie) { - gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } - - prev = cookie; - priv = this->private; - local = frame->local; - trav = this->children; - loc = &local->loc; - - --local->call_count; - - if (op_ret == -1) { - gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s", - prev->this->name, strerror(op_errno)); - local->failed = 1; - local->op_errno = op_errno; - } - - local->op_ret = 0; - /* Get the mapping in inode private */ - /* Get the stat buf right */ - local->stbuf = *buf; - local->preparent = *preparent; - local->postparent = *postparent; - - local->stbuf_blocks += buf->ia_blocks; - local->preparent_blocks += preparent->ia_blocks; - local->postparent_blocks += postparent->ia_blocks; - - if (local->stbuf_size < buf->ia_size) - local->stbuf_size = buf->ia_size; - if (local->preparent_size < preparent->ia_size) - local->preparent_size = preparent->ia_size; - if (local->postparent_size < postparent->ia_size) - local->postparent_size = postparent->ia_size; - - if (local->failed) - local->op_ret = -1; - - if (local->op_ret == -1) { - local->call_count = 1; - STACK_WIND(frame, stripe_create_fail_unlink_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, &local->loc, 0, NULL); - return 0; - } - - if (local->op_ret >= 0) { - local->preparent.ia_blocks = local->preparent_blocks; - local->preparent.ia_size = local->preparent_size; - local->postparent.ia_blocks = local->postparent_blocks; - local->postparent.ia_size = local->postparent_size; - local->stbuf.ia_size = local->stbuf_size; - local->stbuf.ia_blocks = local->stbuf_blocks; - } - - /* Send a setxattr request to nodes where the - files are created */ - trav = trav->next; - while (trav) { - if (priv->xattr_supported) { - dict = dict_new(); - if (!dict) { - gf_log(this->name, GF_LOG_ERROR, "failed to allocate dict %s", - loc->path); - } - need_unref = 1; - - dict_copy(local->xattr, dict); - - ret = stripe_xattr_request_build(this, dict, local->stripe_size, - priv->child_count, i, - priv->coalesce); - if (ret) - gf_log(this->name, GF_LOG_ERROR, - "failed to build xattr request"); - } else { - dict = local->xattr; - } - - STACK_WIND(frame, stripe_create_cbk, trav->xlator, - trav->xlator->fops->create, &local->loc, local->flags, - local->mode, local->umask, local->fd, dict); - trav = trav->next; - if (need_unref && dict) - dict_unref(dict); - i++; - } - -out: - return 0; -} - -/** - * stripe_create - If a block-size is specified for the 'name', create the - * file in all the child nodes. If not, create it in only first child. - * - * @name- complete path of the file to be created. - */ -int32_t -stripe_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) -{ - stripe_private_t *priv = NULL; - stripe_local_t *local = NULL; - int32_t op_errno = EINVAL; - int ret = 0; - int need_unref = 0; - int i = 0; - dict_t *dict = NULL; - - VALIDATE_OR_GOTO(frame, err); - VALIDATE_OR_GOTO(this, err); - VALIDATE_OR_GOTO(loc, err); - VALIDATE_OR_GOTO(loc->path, err); - VALIDATE_OR_GOTO(loc->inode, err); - - priv = this->private; - - /* files created in O_APPEND mode does not allow lseek() on fd */ - flags &= ~O_APPEND; - - if (priv->first_child_down || priv->nodes_down) { - gf_log(this->name, GF_LOG_DEBUG, "First node down, returning EIO"); - op_errno = EIO; - goto err; - } - - /* Initialization */ - local = mem_get0(this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - local->op_ret = -1; - local->op_errno = ENOTCONN; - local->stripe_size = stripe_get_matching_bs(loc->path, priv); - frame->local = local; - local->inode = inode_ref(loc->inode); - loc_copy(&local->loc, loc); - local->fd = fd_ref(fd); - local->flags = flags; - local->mode = mode; - local->umask = umask; - if (xdata) - local->xattr = dict_ref(xdata); - - local->call_count = priv->child_count; - /* Send a setxattr request to nodes where the - files are created */ - - if (priv->xattr_supported) { - dict = dict_new(); - if (!dict) { - gf_log(this->name, GF_LOG_ERROR, "failed to allocate dict %s", - loc->path); - } - need_unref = 1; - - dict_copy(xdata, dict); - - ret = stripe_xattr_request_build(this, dict, local->stripe_size, - priv->child_count, i, priv->coalesce); - if (ret) - gf_log(this->name, GF_LOG_ERROR, "failed to build xattr request"); - } else { - dict = xdata; - } - - STACK_WIND(frame, stripe_first_create_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd, - dict); - - if (need_unref && dict) - dict_unref(dict); - - return 0; -err: - STRIPE_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, - NULL, xdata); - return 0; -} - -int32_t -stripe_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) -{ - int32_t callcnt = 0; - stripe_local_t *local = NULL; - call_frame_t *prev = NULL; - - if (!this || !frame || !frame->local || !cookie) { - gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } - - prev = cookie; - local = frame->local; - - LOCK(&frame->lock); - { - callcnt = --local->call_count; - - if (op_ret == -1) { - gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s", - prev->this->name, strerror(op_errno)); - if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this))) - local->failed = 1; - local->op_errno = op_errno; - } - - if (op_ret >= 0) - local->op_ret = op_ret; - } - UNLOCK(&frame->lock); - - if (!callcnt) { - if (local->failed) - local->op_ret = -1; - - STRIPE_STACK_UNWIND(open, frame, local->op_ret, local->op_errno, - local->fd, xdata); - } -out: - return 0; -} - -/** - * stripe_open - - */ -int32_t -stripe_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - fd_t *fd, dict_t *xdata) -{ - stripe_local_t *local = NULL; - stripe_private_t *priv = NULL; - xlator_list_t *trav = NULL; - int32_t op_errno = 1; - - VALIDATE_OR_GOTO(frame, err); - VALIDATE_OR_GOTO(this, err); - VALIDATE_OR_GOTO(loc, err); - VALIDATE_OR_GOTO(loc->path, err); - VALIDATE_OR_GOTO(loc->inode, err); - - priv = this->private; - trav = this->children; - - if (priv->first_child_down) { - op_errno = ENOTCONN; - goto err; - } - - /* Initialization */ - local = mem_get0(this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - - /* files opened in O_APPEND mode does not allow lseek() on fd */ - flags &= ~O_APPEND; - - local->fd = fd_ref(fd); - frame->local = local; - loc_copy(&local->loc, loc); - - /* Striped files */ - local->flags = flags; - local->call_count = priv->child_count; - local->stripe_size = stripe_get_matching_bs(loc->path, priv); - - while (trav) { - STACK_WIND(frame, stripe_open_cbk, trav->xlator, - trav->xlator->fops->open, &local->loc, local->flags, - local->fd, xdata); - trav = trav->next; - } - return 0; -err: - STRIPE_STACK_UNWIND(open, frame, -1, op_errno, NULL, NULL); - return 0; -} - -int32_t -stripe_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) -{ - int32_t callcnt = 0; - stripe_local_t *local = NULL; - call_frame_t *prev = NULL; - - if (!this || !frame || !frame->local || !cookie) { - gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } - - prev = cookie; - local = frame->local; - - LOCK(&frame->lock); - { - callcnt = --local->call_count; - - if (op_ret == -1) { - gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s", - prev->this->name, strerror(op_errno)); - local->op_ret = -1; - local->op_errno = op_errno; - } - - if (op_ret >= 0) - local->op_ret = op_ret; - } - UNLOCK(&frame->lock); - - if (!callcnt) { - STRIPE_STACK_UNWIND(opendir, frame, local->op_ret, local->op_errno, - local->fd, NULL); - } -out: - return 0; -} - -int32_t -stripe_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, - dict_t *xdata) -{ - xlator_list_t *trav = NULL; - stripe_local_t *local = NULL; - stripe_private_t *priv = NULL; - int32_t op_errno = EINVAL; - - VALIDATE_OR_GOTO(frame, err); - VALIDATE_OR_GOTO(this, err); - VALIDATE_OR_GOTO(loc, err); - VALIDATE_OR_GOTO(loc->path, err); - VALIDATE_OR_GOTO(loc->inode, err); - - priv = this->private; - trav = this->children; - - if (priv->first_child_down) { - op_errno = ENOTCONN; - goto err; - } - - /* Initialization */ - local = mem_get0(this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - frame->local = local; - local->call_count = priv->child_count; - local->fd = fd_ref(fd); - - while (trav) { - STACK_WIND(frame, stripe_opendir_cbk, trav->xlator, - trav->xlator->fops->opendir, loc, fd, NULL); - trav = trav->next; - } - - return 0; -err: - STRIPE_STACK_UNWIND(opendir, frame, -1, op_errno, NULL, NULL); - return 0; -} - -int32_t -stripe_lk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, struct gf_flock *lock, dict_t *xdata) -{ - int32_t callcnt = 0; - stripe_local_t *local = NULL; - call_frame_t *prev = NULL; - - if (!this || !frame || !frame->local || !cookie) { - gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } - - prev = cookie; - local = frame->local; - - LOCK(&frame->lock); - { - callcnt = --local->call_count; - if (op_ret == -1) { - gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s", - prev->this->name, strerror(op_errno)); - local->op_errno = op_errno; - if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this))) - local->failed = 1; - } - if (op_ret >= 0) { - if (FIRST_CHILD(this) == prev->this) { - /* First successful call, copy the *lock */ - local->op_ret = op_ret; - local->lock = *lock; - } - } - } - UNLOCK(&frame->lock); - - if (!callcnt) { - if (local->failed) - local->op_ret = -1; - STRIPE_STACK_UNWIND(lk, frame, local->op_ret, local->op_errno, - &local->lock, NULL); - } -out: - return 0; -} - -int32_t -stripe_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, - struct gf_flock *lock, dict_t *xdata) -{ - stripe_local_t *local = NULL; - xlator_list_t *trav = NULL; - stripe_private_t *priv = NULL; - int32_t op_errno = EINVAL; - - VALIDATE_OR_GOTO(frame, err); - VALIDATE_OR_GOTO(this, err); - VALIDATE_OR_GOTO(fd, err); - VALIDATE_OR_GOTO(fd->inode, err); - - trav = this->children; - priv = this->private; - - /* Initialization */ - local = mem_get0(this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - local->op_ret = -1; - frame->local = local; - local->call_count = priv->child_count; - - while (trav) { - STACK_WIND(frame, stripe_lk_cbk, trav->xlator, trav->xlator->fops->lk, - fd, cmd, lock, NULL); - trav = trav->next; - } - - return 0; -err: - STRIPE_STACK_UNWIND(lk, frame, -1, op_errno, NULL, NULL); - return 0; -} - -int32_t -stripe_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - int32_t callcnt = 0; - stripe_local_t *local = NULL; - call_frame_t *prev = NULL; - - if (!this || !frame || !frame->local || !cookie) { - gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } - - prev = cookie; - local = frame->local; - - LOCK(&frame->lock); - { - callcnt = --local->call_count; - - if (op_ret == -1) { - gf_log(this->name, GF_LOG_DEBUG, "%s returned %s", prev->this->name, - strerror(op_errno)); - local->op_errno = op_errno; - if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this))) - local->failed = 1; - } - if (op_ret >= 0) - local->op_ret = op_ret; - } - UNLOCK(&frame->lock); - - if (!callcnt) { - if (local->failed) - local->op_ret = -1; - - STRIPE_STACK_UNWIND(flush, frame, local->op_ret, local->op_errno, NULL); - } -out: - return 0; -} - -int32_t -stripe_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) -{ - stripe_local_t *local = NULL; - stripe_private_t *priv = NULL; - xlator_list_t *trav = NULL; - int32_t op_errno = 1; - - VALIDATE_OR_GOTO(frame, err); - VALIDATE_OR_GOTO(this, err); - VALIDATE_OR_GOTO(fd, err); - VALIDATE_OR_GOTO(fd->inode, err); - - priv = this->private; - trav = this->children; - - if (priv->first_child_down) { - op_errno = ENOTCONN; - goto err; - } - /* Initialization */ - local = mem_get0(this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - local->op_ret = -1; - frame->local = local; - local->call_count = priv->child_count; - - while (trav) { - STACK_WIND(frame, stripe_flush_cbk, trav->xlator, - trav->xlator->fops->flush, fd, NULL); - trav = trav->next; - } - - return 0; -err: - STRIPE_STACK_UNWIND(flush, frame, -1, op_errno, NULL); - return 0; -} - -int32_t -stripe_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) -{ - int32_t callcnt = 0; - stripe_local_t *local = NULL; - call_frame_t *prev = NULL; - - if (!this || !frame || !frame->local || !cookie) { - gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } - - prev = cookie; - local = frame->local; - - LOCK(&frame->lock); - { - callcnt = --local->call_count; - - if (op_ret == -1) { - gf_log(this->name, GF_LOG_DEBUG, "%s returned %s", prev->this->name, - strerror(op_errno)); - local->op_errno = op_errno; - if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this))) - local->failed = 1; - } - if (op_ret >= 0) { - local->op_ret = op_ret; - if (FIRST_CHILD(this) == prev->this) { - local->pre_buf = *prebuf; - local->post_buf = *postbuf; - } - local->prebuf_blocks += prebuf->ia_blocks; - local->postbuf_blocks += postbuf->ia_blocks; - - correct_file_size(prebuf, local->fctx, prev); - correct_file_size(postbuf, local->fctx, prev); - - if (local->prebuf_size < prebuf->ia_size) - local->prebuf_size = prebuf->ia_size; - - if (local->postbuf_size < postbuf->ia_size) - local->postbuf_size = postbuf->ia_size; - } - } - UNLOCK(&frame->lock); - - if (!callcnt) { - if (local->failed) - local->op_ret = -1; - - if (local->op_ret != -1) { - local->pre_buf.ia_blocks = local->prebuf_blocks; - local->pre_buf.ia_size = local->prebuf_size; - local->post_buf.ia_blocks = local->postbuf_blocks; - local->post_buf.ia_size = local->postbuf_size; - } - - STRIPE_STACK_UNWIND(fsync, frame, local->op_ret, local->op_errno, - &local->pre_buf, &local->post_buf, NULL); - } -out: - return 0; -} - -int32_t -stripe_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, - dict_t *xdata) -{ - stripe_local_t *local = NULL; - stripe_private_t *priv = NULL; - xlator_list_t *trav = NULL; - stripe_fd_ctx_t *fctx = NULL; - int32_t op_errno = 1; - - VALIDATE_OR_GOTO(frame, err); - VALIDATE_OR_GOTO(this, err); - VALIDATE_OR_GOTO(fd, err); - VALIDATE_OR_GOTO(fd->inode, err); - - priv = this->private; - trav = this->children; - - /* Initialization */ - local = mem_get0(this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - - frame->local = local; - - inode_ctx_get(fd->inode, this, (uint64_t *)&fctx); - if (!fctx) { - op_errno = EINVAL; - goto err; - } - local->fctx = fctx; - local->op_ret = -1; - local->call_count = priv->child_count; - - while (trav) { - STACK_WIND(frame, stripe_fsync_cbk, trav->xlator, - trav->xlator->fops->fsync, fd, flags, NULL); - trav = trav->next; - } - - return 0; -err: - STRIPE_STACK_UNWIND(fsync, frame, -1, op_errno, NULL, NULL, NULL); - return 0; -} - -int32_t -stripe_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, - dict_t *xdata) -{ - int32_t callcnt = 0; - stripe_local_t *local = NULL; - call_frame_t *prev = NULL; - - if (!this || !frame || !frame->local || !cookie) { - gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } - - prev = cookie; - local = frame->local; - - LOCK(&frame->lock); - { - callcnt = --local->call_count; - - if (op_ret == -1) { - gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s", - prev->this->name, strerror(op_errno)); - local->op_errno = op_errno; - if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this))) - local->failed = 1; - } - - if (op_ret == 0) { - local->op_ret = 0; - - if (FIRST_CHILD(this) == prev->this) - local->stbuf = *buf; - - local->stbuf_blocks += buf->ia_blocks; - - correct_file_size(buf, local->fctx, prev); - - if (local->stbuf_size < buf->ia_size) - local->stbuf_size = buf->ia_size; - } - } - UNLOCK(&frame->lock); - - if (!callcnt) { - if (local->failed) - local->op_ret = -1; - - if (local->op_ret != -1) { - local->stbuf.ia_size = local->stbuf_size; - local->stbuf.ia_blocks = local->stbuf_blocks; - } - - STRIPE_STACK_UNWIND(fstat, frame, local->op_ret, local->op_errno, - &local->stbuf, NULL); - } - -out: - return 0; -} - -int32_t -stripe_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) -{ - stripe_local_t *local = NULL; - stripe_private_t *priv = NULL; - xlator_list_t *trav = NULL; - stripe_fd_ctx_t *fctx = NULL; - int32_t op_errno = 1; - - VALIDATE_OR_GOTO(frame, err); - VALIDATE_OR_GOTO(this, err); - VALIDATE_OR_GOTO(fd, err); - VALIDATE_OR_GOTO(fd->inode, err); - - priv = this->private; - trav = this->children; - - /* Initialization */ - local = mem_get0(this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - local->op_ret = -1; - frame->local = local; - local->call_count = priv->child_count; - - if (IA_ISREG(fd->inode->ia_type)) { - inode_ctx_get(fd->inode, this, (uint64_t *)&fctx); - if (!fctx) - goto err; - local->fctx = fctx; - } - - while (trav) { - STACK_WIND(frame, stripe_fstat_cbk, trav->xlator, - trav->xlator->fops->fstat, fd, NULL); - trav = trav->next; - } - - return 0; -err: - STRIPE_STACK_UNWIND(fstat, frame, -1, op_errno, NULL, NULL); - return 0; -} - -int32_t -stripe_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, - dict_t *xdata) -{ - stripe_local_t *local = NULL; - stripe_private_t *priv = NULL; - stripe_fd_ctx_t *fctx = NULL; - int i, eof_idx; - off_t dest_offset, tmp_offset; - int32_t op_errno = 1; - - VALIDATE_OR_GOTO(frame, err); - VALIDATE_OR_GOTO(this, err); - VALIDATE_OR_GOTO(fd, err); - VALIDATE_OR_GOTO(fd->inode, err); - - priv = this->private; - - /* Initialization */ - local = mem_get0(this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - local->op_ret = -1; - frame->local = local; - local->call_count = priv->child_count; - - inode_ctx_get(fd->inode, this, (uint64_t *)&fctx); - if (!fctx) { - gf_log(this->name, GF_LOG_ERROR, "no stripe context"); - op_errno = EINVAL; - goto err; - } - if (!fctx->stripe_count) { - gf_log(this->name, GF_LOG_ERROR, "no stripe count"); - op_errno = EINVAL; - goto err; - } - - local->fctx = fctx; - eof_idx = (offset / fctx->stripe_size) % fctx->stripe_count; - - for (i = 0; i < fctx->stripe_count; i++) { - if (!fctx->xl_array[i]) { - gf_log(this->name, GF_LOG_ERROR, - "no xlator at index " - "%d", - i); - op_errno = EINVAL; - goto err; - } - - if (fctx->stripe_coalesce) { - if (i < eof_idx) - tmp_offset = gf_roof(offset, - fctx->stripe_size * fctx->stripe_count); - else if (i > eof_idx) - tmp_offset = gf_floor(offset, - fctx->stripe_size * fctx->stripe_count); - else - tmp_offset = offset; - - dest_offset = coalesced_offset(tmp_offset, fctx->stripe_size, - fctx->stripe_count); - } else { - dest_offset = offset; - } - - STACK_WIND(frame, stripe_truncate_cbk, fctx->xl_array[i], - fctx->xl_array[i]->fops->ftruncate, fd, dest_offset, NULL); - } - - return 0; -err: - STRIPE_STACK_UNWIND(ftruncate, frame, -1, op_errno, NULL, NULL, NULL); - return 0; -} - -int32_t -stripe_fsyncdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - int32_t callcnt = 0; - stripe_local_t *local = NULL; - call_frame_t *prev = NULL; - - if (!this || !frame || !frame->local || !cookie) { - gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } - - prev = cookie; - local = frame->local; - - LOCK(&frame->lock); - { - callcnt = --local->call_count; - - if (op_ret == -1) { - gf_log(this->name, GF_LOG_DEBUG, "%s returned %s", prev->this->name, - strerror(op_errno)); - local->op_errno = op_errno; - if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this))) - local->failed = 1; - } - if (op_ret >= 0) - local->op_ret = op_ret; - } - UNLOCK(&frame->lock); - - if (!callcnt) { - if (local->failed) - local->op_ret = -1; - - STRIPE_STACK_UNWIND(fsyncdir, frame, local->op_ret, local->op_errno, - NULL); - } -out: - return 0; -} - -int32_t -stripe_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, - dict_t *xdata) -{ - stripe_local_t *local = NULL; - stripe_private_t *priv = NULL; - xlator_list_t *trav = NULL; - int32_t op_errno = 1; - - VALIDATE_OR_GOTO(frame, err); - VALIDATE_OR_GOTO(this, err); - VALIDATE_OR_GOTO(fd, err); - VALIDATE_OR_GOTO(fd->inode, err); - - priv = this->private; - trav = this->children; - - /* Initialization */ - local = mem_get0(this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - local->op_ret = -1; - frame->local = local; - local->call_count = priv->child_count; - - while (trav) { - STACK_WIND(frame, stripe_fsyncdir_cbk, trav->xlator, - trav->xlator->fops->fsyncdir, fd, flags, NULL); - trav = trav->next; - } - - return 0; -err: - STRIPE_STACK_UNWIND(fsyncdir, frame, -1, op_errno, NULL); - return 0; -} - -int32_t -stripe_readv_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, - dict_t *xdata) -{ - int32_t i = 0; - int32_t callcnt = 0; - int32_t count = 0; - stripe_local_t *local = NULL; - struct iovec *vec = NULL; - struct iatt tmp_stbuf = { - 0, - }; - struct iobref *tmp_iobref = NULL; - struct iobuf *iobuf = NULL; - call_frame_t *prev = NULL; - - if (!this || !frame || !frame->local) { - gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } - - local = frame->local; - prev = cookie; - - LOCK(&frame->lock); - { - callcnt = --local->call_count; - if (op_ret != -1) { - correct_file_size(buf, local->fctx, prev); - if (local->stbuf_size < buf->ia_size) - local->stbuf_size = buf->ia_size; - } - } - UNLOCK(&frame->lock); - - if (!callcnt) { - op_ret = 0; - - /* Keep extra space for filling in '\0's */ - vec = GF_CALLOC((local->count * 2), sizeof(struct iovec), - gf_stripe_mt_iovec); - if (!vec) { - op_ret = -1; - goto done; - } - - for (i = 0; i < local->wind_count; i++) { - if (local->replies[i].op_ret) { - memcpy((vec + count), local->replies[i].vector, - (local->replies[i].count * sizeof(struct iovec))); - count += local->replies[i].count; - op_ret += local->replies[i].op_ret; - } - if ((local->replies[i].op_ret < local->replies[i].requested_size) && - (local->stbuf_size > (local->offset + op_ret))) { - /* Fill in 0s here */ - vec[count].iov_len = (local->replies[i].requested_size - - local->replies[i].op_ret); - iobuf = iobuf_get2(this->ctx->iobuf_pool, vec[count].iov_len); - if (!iobuf) { - gf_log(this->name, GF_LOG_ERROR, "Out of memory."); - op_ret = -1; - op_errno = ENOMEM; - goto done; - } - memset(iobuf->ptr, 0, vec[count].iov_len); - vec[count].iov_base = iobuf->ptr; - - iobref_add(local->iobref, iobuf); - iobuf_unref(iobuf); - - op_ret += vec[count].iov_len; - count++; - } - GF_FREE(local->replies[i].vector); - } - - /* ENOENT signals EOF to the NFS-server */ - if (op_ret != -1 && op_ret < local->readv_size && - (local->offset + op_ret == buf->ia_size)) - op_errno = ENOENT; - - /* FIXME: notice that st_ino, and st_dev (gen) will be - * different than what inode will have. Make sure this doesn't - * cause any bugs at higher levels */ - memcpy(&tmp_stbuf, &local->replies[0].stbuf, sizeof(struct iatt)); - tmp_stbuf.ia_size = local->stbuf_size; - - done: - GF_FREE(local->replies); - tmp_iobref = local->iobref; - STRIPE_STACK_UNWIND(readv, frame, op_ret, op_errno, vec, count, - &tmp_stbuf, tmp_iobref, NULL); - - iobref_unref(tmp_iobref); - GF_FREE(vec); - } -out: - return 0; -} - -/** - * stripe_readv_cbk - get all the striped reads, and order it properly, send it - * to above layer after putting it in a single vector. - */ -int32_t -stripe_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iovec *vector, - int32_t count, struct iatt *stbuf, struct iobref *iobref, - dict_t *xdata) -{ - int32_t index = 0; - int32_t callcnt = 0; - int32_t final_count = 0; - int32_t need_to_check_proper_size = 0; - call_frame_t *mframe = NULL; - stripe_local_t *mlocal = NULL; - stripe_local_t *local = NULL; - struct iovec *final_vec = NULL; - struct iatt tmp_stbuf = { - 0, - }; - struct iatt *tmp_stbuf_p = NULL; // need it for a warning - struct iobref *tmp_iobref = NULL; - stripe_fd_ctx_t *fctx = NULL; - call_frame_t *prev = NULL; - - if (!this || !frame || !frame->local || !cookie) { - gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto end; - } - - local = frame->local; - index = local->node_index; - prev = cookie; - mframe = local->orig_frame; - if (!mframe) - goto out; - - mlocal = mframe->local; - if (!mlocal) - goto out; - - fctx = mlocal->fctx; - - LOCK(&mframe->lock); - { - mlocal->replies[index].op_ret = op_ret; - mlocal->replies[index].op_errno = op_errno; - mlocal->replies[index].requested_size = local->readv_size; - if (op_ret >= 0) { - mlocal->replies[index].stbuf = *stbuf; - mlocal->replies[index].count = count; - mlocal->replies[index].vector = iov_dup(vector, count); - - correct_file_size(stbuf, fctx, prev); - - if (local->stbuf_size < stbuf->ia_size) - local->stbuf_size = stbuf->ia_size; - local->stbuf_blocks += stbuf->ia_blocks; - - if (!mlocal->iobref) - mlocal->iobref = iobref_new(); - iobref_merge(mlocal->iobref, iobref); - } - callcnt = ++mlocal->call_count; - } - UNLOCK(&mframe->lock); - - if (callcnt == mlocal->wind_count) { - op_ret = 0; - - for (index = 0; index < mlocal->wind_count; index++) { - /* check whether each stripe returned - * 'expected' number of bytes */ - if (mlocal->replies[index].op_ret == -1) { - op_ret = -1; - op_errno = mlocal->replies[index].op_errno; - break; - } - /* TODO: handle the 'holes' within the read range - properly */ - if (mlocal->replies[index].op_ret < - mlocal->replies[index].requested_size) { - need_to_check_proper_size = 1; - } - - op_ret += mlocal->replies[index].op_ret; - mlocal->count += mlocal->replies[index].count; - } - if (op_ret == -1) - goto done; - if (need_to_check_proper_size) - goto check_size; - - final_vec = GF_CALLOC(mlocal->count, sizeof(struct iovec), - gf_stripe_mt_iovec); - - if (!final_vec) { - op_ret = -1; - goto done; - } - - for (index = 0; index < mlocal->wind_count; index++) { - memcpy((final_vec + final_count), mlocal->replies[index].vector, - (mlocal->replies[index].count * sizeof(struct iovec))); - final_count += mlocal->replies[index].count; - GF_FREE(mlocal->replies[index].vector); - } - - /* FIXME: notice that st_ino, and st_dev (gen) will be - * different than what inode will have. Make sure this doesn't - * cause any bugs at higher levels */ - memcpy(&tmp_stbuf, &mlocal->replies[0].stbuf, sizeof(struct iatt)); - tmp_stbuf.ia_size = local->stbuf_size; - tmp_stbuf.ia_blocks = local->stbuf_blocks; - - done: - /* */ - GF_FREE(mlocal->replies); - tmp_iobref = mlocal->iobref; - /* work around for nfs truncated read. Bug 3774 */ - tmp_stbuf_p = &tmp_stbuf; - WIPE(tmp_stbuf_p); - STRIPE_STACK_UNWIND(readv, mframe, op_ret, op_errno, final_vec, - final_count, &tmp_stbuf, tmp_iobref, NULL); - - iobref_unref(tmp_iobref); - GF_FREE(final_vec); - } - - goto out; - -check_size: - mlocal->call_count = fctx->stripe_count; - - for (index = 0; index < fctx->stripe_count; index++) { - STACK_WIND(mframe, stripe_readv_fstat_cbk, (fctx->xl_array[index]), - (fctx->xl_array[index])->fops->fstat, mlocal->fd, NULL); - } - -out: - STRIPE_STACK_DESTROY(frame); -end: - return 0; -} - -int32_t -stripe_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t offset, uint32_t flags, dict_t *xdata) -{ - int32_t op_errno = EINVAL; - int32_t idx = 0; - int32_t index = 0; - int32_t num_stripe = 0; - int32_t off_index = 0; - size_t frame_size = 0; - off_t rounded_end = 0; - uint64_t tmp_fctx = 0; - uint64_t stripe_size = 0; - off_t rounded_start = 0; - off_t frame_offset = offset; - off_t dest_offset = 0; - stripe_local_t *local = NULL; - call_frame_t *rframe = NULL; - stripe_local_t *rlocal = NULL; - stripe_fd_ctx_t *fctx = NULL; - - VALIDATE_OR_GOTO(frame, err); - VALIDATE_OR_GOTO(this, err); - VALIDATE_OR_GOTO(fd, err); - VALIDATE_OR_GOTO(fd->inode, err); - - inode_ctx_get(fd->inode, this, &tmp_fctx); - if (!tmp_fctx) { - op_errno = EBADFD; - goto err; - } - fctx = (stripe_fd_ctx_t *)(long)tmp_fctx; - stripe_size = fctx->stripe_size; - - STRIPE_VALIDATE_FCTX(fctx, err); - - if (!stripe_size) { - gf_log(this->name, GF_LOG_DEBUG, "Wrong stripe size for the file"); - goto err; - } - /* The file is stripe across the child nodes. Send the read request - * to the child nodes appropriately after checking which region of - * the file is in which child node. Always '0-<stripe_size>' part of - * the file resides in the first child. - */ - rounded_start = gf_floor(offset, stripe_size); - rounded_end = gf_roof(offset + size, stripe_size); - num_stripe = (rounded_end - rounded_start) / stripe_size; - - local = mem_get0(this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - frame->local = local; - - /* This is where all the vectors should be copied. */ - local->replies = GF_CALLOC(num_stripe, sizeof(struct stripe_replies), - gf_stripe_mt_stripe_replies); - if (!local->replies) { - op_errno = ENOMEM; - goto err; - } - - off_index = (offset / stripe_size) % fctx->stripe_count; - local->wind_count = num_stripe; - local->readv_size = size; - local->offset = offset; - local->fd = fd_ref(fd); - local->fctx = fctx; - - for (index = off_index; index < (num_stripe + off_index); index++) { - rframe = copy_frame(frame); - rlocal = mem_get0(this->local_pool); - if (!rlocal) { - op_errno = ENOMEM; - goto err; - } - - frame_size = min(gf_roof(frame_offset + 1, stripe_size), - (offset + size)) - - frame_offset; - - rlocal->node_index = index - off_index; - rlocal->orig_frame = frame; - rlocal->readv_size = frame_size; - rframe->local = rlocal; - idx = (index % fctx->stripe_count); - - if (fctx->stripe_coalesce) - dest_offset = coalesced_offset(frame_offset, stripe_size, - fctx->stripe_count); - else - dest_offset = frame_offset; - - STACK_WIND(rframe, stripe_readv_cbk, fctx->xl_array[idx], - fctx->xl_array[idx]->fops->readv, fd, frame_size, - dest_offset, flags, xdata); - - frame_offset += frame_size; - } - - return 0; -err: - if (rframe) - STRIPE_STACK_DESTROY(rframe); - - STRIPE_STACK_UNWIND(readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL); - return 0; -} - -int32_t -stripe_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) -{ - int32_t callcnt = 0; - stripe_local_t *local = NULL; - stripe_local_t *mlocal = NULL; - call_frame_t *prev = NULL; - call_frame_t *mframe = NULL; - struct stripe_replies *reply = NULL; - int32_t i = 0; - - if (!this || !frame || !frame->local || !cookie) { - gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } - - prev = cookie; - local = frame->local; - mframe = local->orig_frame; - mlocal = mframe->local; - - LOCK(&frame->lock); - { - callcnt = ++mlocal->call_count; - - mlocal->replies[local->node_index].op_ret = op_ret; - mlocal->replies[local->node_index].op_errno = op_errno; - - if (op_ret >= 0) { - mlocal->post_buf = *postbuf; - mlocal->pre_buf = *prebuf; - - mlocal->prebuf_blocks += prebuf->ia_blocks; - mlocal->postbuf_blocks += postbuf->ia_blocks; - - correct_file_size(prebuf, mlocal->fctx, prev); - correct_file_size(postbuf, mlocal->fctx, prev); - - if (mlocal->prebuf_size < prebuf->ia_size) - mlocal->prebuf_size = prebuf->ia_size; - if (mlocal->postbuf_size < postbuf->ia_size) - mlocal->postbuf_size = postbuf->ia_size; - } - } - UNLOCK(&frame->lock); - - if ((callcnt == mlocal->wind_count) && mlocal->unwind) { - mlocal->pre_buf.ia_size = mlocal->prebuf_size; - mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks; - mlocal->post_buf.ia_size = mlocal->postbuf_size; - mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks; - - /* - * Only return the number of consecutively written bytes up until - * the first error. Only return an error if it occurs first. - * - * When a short write occurs, the application should retry at the - * appropriate offset, at which point we'll potentially pass back - * the error. - */ - for (i = 0, reply = mlocal->replies; i < mlocal->wind_count; - i++, reply++) { - if (reply->op_ret == -1) { - gf_log(this->name, GF_LOG_DEBUG, - "reply %d " - "returned error %s", - i, strerror(reply->op_errno)); - if (!mlocal->op_ret) { - mlocal->op_ret = -1; - mlocal->op_errno = reply->op_errno; - } - break; - } - - mlocal->op_ret += reply->op_ret; - - if (reply->op_ret < reply->requested_size) - break; - } - - GF_FREE(mlocal->replies); - - STRIPE_STACK_UNWIND(writev, mframe, mlocal->op_ret, mlocal->op_errno, - &mlocal->pre_buf, &mlocal->post_buf, NULL); - } -out: - if (frame) - STRIPE_STACK_DESTROY(frame); - return 0; -} - -int32_t -stripe_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iovec *vector, int32_t count, off_t offset, uint32_t flags, - struct iobref *iobref, dict_t *xdata) -{ - struct iovec *tmp_vec = NULL; - stripe_local_t *local = NULL; - stripe_fd_ctx_t *fctx = NULL; - int32_t op_errno = 1; - int32_t idx = 0; - int32_t total_size = 0; - int32_t offset_offset = 0; - int32_t remaining_size = 0; - int32_t tmp_count = count; - off_t fill_size = 0; - uint64_t stripe_size = 0; - uint64_t tmp_fctx = 0; - off_t dest_offset = 0; - off_t rounded_start = 0; - off_t rounded_end = 0; - int32_t total_chunks = 0; - call_frame_t *wframe = NULL; - stripe_local_t *wlocal = NULL; - - VALIDATE_OR_GOTO(frame, err); - VALIDATE_OR_GOTO(this, err); - VALIDATE_OR_GOTO(fd, err); - VALIDATE_OR_GOTO(fd->inode, err); - - inode_ctx_get(fd->inode, this, &tmp_fctx); - if (!tmp_fctx) { - op_errno = EINVAL; - goto err; - } - fctx = (stripe_fd_ctx_t *)(long)tmp_fctx; - stripe_size = fctx->stripe_size; - - STRIPE_VALIDATE_FCTX(fctx, err); - - /* File has to be stripped across the child nodes */ - total_size = iov_length(vector, count); - remaining_size = total_size; - - local = mem_get0(this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - frame->local = local; - local->stripe_size = stripe_size; - local->fctx = fctx; - - if (!stripe_size) { - gf_log(this->name, GF_LOG_DEBUG, "Wrong stripe size for the file"); - op_errno = EINVAL; - goto err; - } - - rounded_start = gf_floor(offset, stripe_size); - rounded_end = gf_roof(offset + total_size, stripe_size); - total_chunks = (rounded_end - rounded_start) / stripe_size; - local->replies = GF_CALLOC(total_chunks, sizeof(struct stripe_replies), - gf_stripe_mt_stripe_replies); - if (!local->replies) { - op_errno = ENOMEM; - goto err; - } - - total_chunks = 0; - while (1) { - wframe = copy_frame(frame); - wlocal = mem_get0(this->local_pool); - if (!wlocal) { - op_errno = ENOMEM; - goto err; - } - wlocal->orig_frame = frame; - wframe->local = wlocal; - - /* Send striped chunk of the vector to child - nodes appropriately. */ - idx = (((offset + offset_offset) / local->stripe_size) % - fctx->stripe_count); - - fill_size = (local->stripe_size - - ((offset + offset_offset) % local->stripe_size)); - if (fill_size > remaining_size) - fill_size = remaining_size; - - remaining_size -= fill_size; - - tmp_count = iov_subset(vector, count, offset_offset, - offset_offset + fill_size, NULL); - tmp_vec = GF_CALLOC(tmp_count, sizeof(struct iovec), - gf_stripe_mt_iovec); - if (!tmp_vec) { - op_errno = ENOMEM; - goto err; - } - tmp_count = iov_subset(vector, count, offset_offset, - offset_offset + fill_size, tmp_vec); - - local->wind_count++; - if (remaining_size == 0) - local->unwind = 1; - - /* - * Store off the request index (with respect to the chunk of the - * initial offset) and the size of the request. This is required - * in the callback to calculate an appropriate return value in - * the event of a write failure in one or more requests. - */ - wlocal->node_index = total_chunks; - local->replies[total_chunks].requested_size = fill_size; - - dest_offset = offset + offset_offset; - if (fctx->stripe_coalesce) - dest_offset = coalesced_offset(dest_offset, local->stripe_size, - fctx->stripe_count); - - STACK_WIND(wframe, stripe_writev_cbk, fctx->xl_array[idx], - fctx->xl_array[idx]->fops->writev, fd, tmp_vec, tmp_count, - dest_offset, flags, iobref, xdata); - - GF_FREE(tmp_vec); - offset_offset += fill_size; - total_chunks++; - if (remaining_size == 0) - break; - } - - return 0; -err: - if (wframe) - STRIPE_STACK_DESTROY(wframe); - - STRIPE_STACK_UNWIND(writev, frame, -1, op_errno, NULL, NULL, NULL); - return 0; -} - -int32_t -stripe_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) -{ - int32_t callcnt = 0; - stripe_local_t *local = NULL; - stripe_local_t *mlocal = NULL; - call_frame_t *prev = NULL; - call_frame_t *mframe = NULL; - - if (!this || !frame || !frame->local || !cookie) { - gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } - - prev = cookie; - local = frame->local; - mframe = local->orig_frame; - mlocal = mframe->local; - - LOCK(&frame->lock); - { - callcnt = ++mlocal->call_count; - - if (op_ret == 0) { - mlocal->post_buf = *postbuf; - mlocal->pre_buf = *prebuf; - - mlocal->prebuf_blocks += prebuf->ia_blocks; - mlocal->postbuf_blocks += postbuf->ia_blocks; - - correct_file_size(prebuf, mlocal->fctx, prev); - correct_file_size(postbuf, mlocal->fctx, prev); - - if (mlocal->prebuf_size < prebuf->ia_size) - mlocal->prebuf_size = prebuf->ia_size; - if (mlocal->postbuf_size < postbuf->ia_size) - mlocal->postbuf_size = postbuf->ia_size; - } - - /* return the first failure */ - if (mlocal->op_ret == 0) { - mlocal->op_ret = op_ret; - mlocal->op_errno = op_errno; - } - } - UNLOCK(&frame->lock); - - if ((callcnt == mlocal->wind_count) && mlocal->unwind) { - mlocal->pre_buf.ia_size = mlocal->prebuf_size; - mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks; - mlocal->post_buf.ia_size = mlocal->postbuf_size; - mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks; - - STRIPE_STACK_UNWIND(fallocate, mframe, mlocal->op_ret, mlocal->op_errno, - &mlocal->pre_buf, &mlocal->post_buf, NULL); - } -out: - if (frame) - STRIPE_STACK_DESTROY(frame); - return 0; -} - -int32_t -stripe_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, - off_t offset, size_t len, dict_t *xdata) -{ - stripe_local_t *local = NULL; - stripe_fd_ctx_t *fctx = NULL; - int32_t op_errno = 1; - int32_t idx = 0; - int32_t offset_offset = 0; - int32_t remaining_size = 0; - off_t fill_size = 0; - uint64_t stripe_size = 0; - uint64_t tmp_fctx = 0; - off_t dest_offset = 0; - call_frame_t *fframe = NULL; - stripe_local_t *flocal = NULL; - - VALIDATE_OR_GOTO(frame, err); - VALIDATE_OR_GOTO(this, err); - VALIDATE_OR_GOTO(fd, err); - VALIDATE_OR_GOTO(fd->inode, err); - - inode_ctx_get(fd->inode, this, &tmp_fctx); - if (!tmp_fctx) { - op_errno = EINVAL; - goto err; - } - fctx = (stripe_fd_ctx_t *)(long)tmp_fctx; - stripe_size = fctx->stripe_size; - - STRIPE_VALIDATE_FCTX(fctx, err); - - remaining_size = len; - - local = mem_get0(this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - frame->local = local; - local->stripe_size = stripe_size; - local->fctx = fctx; - - if (!stripe_size) { - gf_log(this->name, GF_LOG_DEBUG, "Wrong stripe size for the file"); - op_errno = EINVAL; - goto err; - } - - while (1) { - fframe = copy_frame(frame); - flocal = mem_get0(this->local_pool); - if (!flocal) { - op_errno = ENOMEM; - goto err; - } - flocal->orig_frame = frame; - fframe->local = flocal; - - /* send fallocate request to the associated child node */ - idx = (((offset + offset_offset) / local->stripe_size) % - fctx->stripe_count); - - fill_size = (local->stripe_size - - ((offset + offset_offset) % local->stripe_size)); - if (fill_size > remaining_size) - fill_size = remaining_size; - - remaining_size -= fill_size; - - local->wind_count++; - if (remaining_size == 0) - local->unwind = 1; - - dest_offset = offset + offset_offset; - if (fctx->stripe_coalesce) - dest_offset = coalesced_offset(dest_offset, local->stripe_size, - fctx->stripe_count); - - /* - * TODO: Create a separate handler for coalesce mode that sends a - * single fallocate per-child (since the ranges are linear). - */ - STACK_WIND(fframe, stripe_fallocate_cbk, fctx->xl_array[idx], - fctx->xl_array[idx]->fops->fallocate, fd, mode, dest_offset, - fill_size, xdata); - - offset_offset += fill_size; - if (remaining_size == 0) - break; - } - - return 0; -err: - if (fframe) - STRIPE_STACK_DESTROY(fframe); - - STRIPE_STACK_UNWIND(fallocate, frame, -1, op_errno, NULL, NULL, NULL); - return 0; -} - -int32_t -stripe_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) -{ - int32_t callcnt = 0; - stripe_local_t *local = NULL; - stripe_local_t *mlocal = NULL; - call_frame_t *prev = NULL; - call_frame_t *mframe = NULL; - - if (!this || !frame || !frame->local || !cookie) { - gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } - - prev = cookie; - local = frame->local; - mframe = local->orig_frame; - mlocal = mframe->local; - - LOCK(&frame->lock); - { - callcnt = ++mlocal->call_count; - - if (op_ret == 0) { - mlocal->post_buf = *postbuf; - mlocal->pre_buf = *prebuf; - - mlocal->prebuf_blocks += prebuf->ia_blocks; - mlocal->postbuf_blocks += postbuf->ia_blocks; - - correct_file_size(prebuf, mlocal->fctx, prev); - correct_file_size(postbuf, mlocal->fctx, prev); - - if (mlocal->prebuf_size < prebuf->ia_size) - mlocal->prebuf_size = prebuf->ia_size; - if (mlocal->postbuf_size < postbuf->ia_size) - mlocal->postbuf_size = postbuf->ia_size; - } - - /* return the first failure */ - if (mlocal->op_ret == 0) { - mlocal->op_ret = op_ret; - mlocal->op_errno = op_errno; - } - } - UNLOCK(&frame->lock); - - if ((callcnt == mlocal->wind_count) && mlocal->unwind) { - mlocal->pre_buf.ia_size = mlocal->prebuf_size; - mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks; - mlocal->post_buf.ia_size = mlocal->postbuf_size; - mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks; - - STRIPE_STACK_UNWIND(discard, mframe, mlocal->op_ret, mlocal->op_errno, - &mlocal->pre_buf, &mlocal->post_buf, NULL); - } -out: - if (frame) - STRIPE_STACK_DESTROY(frame); - - return 0; -} - -int32_t -stripe_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, - size_t len, dict_t *xdata) -{ - stripe_local_t *local = NULL; - stripe_fd_ctx_t *fctx = NULL; - int32_t op_errno = 1; - int32_t idx = 0; - int32_t offset_offset = 0; - int32_t remaining_size = 0; - off_t fill_size = 0; - uint64_t stripe_size = 0; - uint64_t tmp_fctx = 0; - off_t dest_offset = 0; - call_frame_t *fframe = NULL; - stripe_local_t *flocal = NULL; - - VALIDATE_OR_GOTO(frame, err); - VALIDATE_OR_GOTO(this, err); - VALIDATE_OR_GOTO(fd, err); - VALIDATE_OR_GOTO(fd->inode, err); - - inode_ctx_get(fd->inode, this, &tmp_fctx); - if (!tmp_fctx) { - op_errno = EINVAL; - goto err; - } - fctx = (stripe_fd_ctx_t *)(long)tmp_fctx; - stripe_size = fctx->stripe_size; - - STRIPE_VALIDATE_FCTX(fctx, err); - - remaining_size = len; - - local = mem_get0(this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - frame->local = local; - local->stripe_size = stripe_size; - local->fctx = fctx; - - if (!stripe_size) { - gf_log(this->name, GF_LOG_DEBUG, "Wrong stripe size for the file"); - op_errno = EINVAL; - goto err; - } - - while (1) { - fframe = copy_frame(frame); - flocal = mem_get0(this->local_pool); - if (!flocal) { - op_errno = ENOMEM; - goto err; - } - flocal->orig_frame = frame; - fframe->local = flocal; - - /* send discard request to the associated child node */ - idx = (((offset + offset_offset) / local->stripe_size) % - fctx->stripe_count); - - fill_size = (local->stripe_size - - ((offset + offset_offset) % local->stripe_size)); - if (fill_size > remaining_size) - fill_size = remaining_size; - - remaining_size -= fill_size; - - local->wind_count++; - if (remaining_size == 0) - local->unwind = 1; - - dest_offset = offset + offset_offset; - if (fctx->stripe_coalesce) - dest_offset = coalesced_offset(dest_offset, local->stripe_size, - fctx->stripe_count); - - /* - * TODO: Create a separate handler for coalesce mode that sends a - * single discard per-child (since the ranges are linear). - */ - STACK_WIND(fframe, stripe_discard_cbk, fctx->xl_array[idx], - fctx->xl_array[idx]->fops->discard, fd, dest_offset, - fill_size, xdata); - - offset_offset += fill_size; - if (remaining_size == 0) - break; - } - - return 0; -err: - if (fframe) - STRIPE_STACK_DESTROY(fframe); - - STRIPE_STACK_UNWIND(discard, frame, -1, op_errno, NULL, NULL, NULL); - return 0; -} - -int32_t -stripe_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) -{ - int32_t callcnt = 0; - stripe_local_t *local = NULL; - stripe_local_t *mlocal = NULL; - call_frame_t *prev = NULL; - call_frame_t *mframe = NULL; - - GF_ASSERT(frame); - - if (!this || !frame->local || !cookie) { - gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } - - prev = cookie; - local = frame->local; - mframe = local->orig_frame; - mlocal = mframe->local; - - LOCK(&frame->lock); - { - callcnt = ++mlocal->call_count; - - if (op_ret == 0) { - mlocal->post_buf = *postbuf; - mlocal->pre_buf = *prebuf; - - mlocal->prebuf_blocks += prebuf->ia_blocks; - mlocal->postbuf_blocks += postbuf->ia_blocks; - - correct_file_size(prebuf, mlocal->fctx, prev); - correct_file_size(postbuf, mlocal->fctx, prev); - - if (mlocal->prebuf_size < prebuf->ia_size) - mlocal->prebuf_size = prebuf->ia_size; - if (mlocal->postbuf_size < postbuf->ia_size) - mlocal->postbuf_size = postbuf->ia_size; - } - - /* return the first failure */ - if (mlocal->op_ret == 0) { - mlocal->op_ret = op_ret; - mlocal->op_errno = op_errno; - } - } - UNLOCK(&frame->lock); - - if ((callcnt == mlocal->wind_count) && mlocal->unwind) { - mlocal->pre_buf.ia_size = mlocal->prebuf_size; - mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks; - mlocal->post_buf.ia_size = mlocal->postbuf_size; - mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks; - - STRIPE_STACK_UNWIND(zerofill, mframe, mlocal->op_ret, mlocal->op_errno, - &mlocal->pre_buf, &mlocal->post_buf, NULL); - } -out: - STRIPE_STACK_DESTROY(frame); - return 0; -} - -int32_t -stripe_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, - off_t len, dict_t *xdata) -{ - stripe_local_t *local = NULL; - stripe_fd_ctx_t *fctx = NULL; - int32_t op_errno = 1; - int32_t idx = 0; - int32_t offset_offset = 0; - int32_t remaining_size = 0; - off_t fill_size = 0; - uint64_t stripe_size = 0; - uint64_t tmp_fctx = 0; - off_t dest_offset = 0; - call_frame_t *fframe = NULL; - stripe_local_t *flocal = NULL; - - VALIDATE_OR_GOTO(frame, err); - VALIDATE_OR_GOTO(this, err); - VALIDATE_OR_GOTO(fd, err); - VALIDATE_OR_GOTO(fd->inode, err); - - inode_ctx_get(fd->inode, this, &tmp_fctx); - if (!tmp_fctx) { - op_errno = EINVAL; - goto err; - } - fctx = (stripe_fd_ctx_t *)(long)tmp_fctx; - stripe_size = fctx->stripe_size; - - STRIPE_VALIDATE_FCTX(fctx, err); - - remaining_size = len; - - local = mem_get0(this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - frame->local = local; - local->stripe_size = stripe_size; - local->fctx = fctx; - - if (!stripe_size) { - gf_log(this->name, GF_LOG_DEBUG, "Wrong stripe size for the file"); - op_errno = EINVAL; - goto err; - } - - while (1) { - fframe = copy_frame(frame); - flocal = mem_get0(this->local_pool); - if (!flocal) { - op_errno = ENOMEM; - goto err; - } - flocal->orig_frame = frame; - fframe->local = flocal; - - idx = (((offset + offset_offset) / local->stripe_size) % - fctx->stripe_count); - - fill_size = (local->stripe_size - - ((offset + offset_offset) % local->stripe_size)); - if (fill_size > remaining_size) - fill_size = remaining_size; - - remaining_size -= fill_size; - - local->wind_count++; - if (remaining_size == 0) - local->unwind = 1; - - dest_offset = offset + offset_offset; - if (fctx->stripe_coalesce) - dest_offset = coalesced_offset(dest_offset, local->stripe_size, - fctx->stripe_count); - - STACK_WIND(fframe, stripe_zerofill_cbk, fctx->xl_array[idx], - fctx->xl_array[idx]->fops->zerofill, fd, dest_offset, - fill_size, xdata); - offset_offset += fill_size; - if (remaining_size == 0) - break; - } - - return 0; -err: - if (fframe) - STRIPE_STACK_DESTROY(fframe); - - STRIPE_STACK_UNWIND(zerofill, frame, -1, op_errno, NULL, NULL, NULL); - return 0; -} - -int32_t -stripe_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, - gf_seek_what_t what, dict_t *xdata) -{ - /* TBD */ - gf_log(this->name, GF_LOG_INFO, "seek called on %s.", - uuid_utoa(fd->inode->gfid)); - STRIPE_STACK_UNWIND(seek, frame, -1, ENOTSUP, 0, NULL); - return 0; -} - -int32_t -stripe_release(xlator_t *this, fd_t *fd) -{ - return 0; -} - -int -stripe_forget(xlator_t *this, inode_t *inode) -{ - uint64_t tmp_fctx = 0; - stripe_fd_ctx_t *fctx = NULL; - - VALIDATE_OR_GOTO(this, err); - VALIDATE_OR_GOTO(inode, err); - - (void)inode_ctx_del(inode, this, &tmp_fctx); - if (!tmp_fctx) { - goto err; - } - - fctx = (stripe_fd_ctx_t *)(long)tmp_fctx; - - if (!fctx->static_array) - GF_FREE(fctx->xl_array); - - GF_FREE(fctx); -err: - return 0; -} - -int32_t -notify(xlator_t *this, int32_t event, void *data, ...) -{ - stripe_private_t *priv = NULL; - int down_client = 0; - int i = 0; - gf_boolean_t heard_from_all_children = _gf_false; - - if (!this) - return 0; - - priv = this->private; - if (!priv) - return 0; - - switch (event) { - case GF_EVENT_CHILD_UP: { - /* get an index number to set */ - for (i = 0; i < priv->child_count; i++) { - if (data == priv->xl_array[i]) - break; - } - - if (priv->child_count == i) { - gf_log(this->name, GF_LOG_ERROR, - "got GF_EVENT_CHILD_UP bad subvolume %s", - data ? ((xlator_t *)data)->name : NULL); - break; - } - - LOCK(&priv->lock); - { - if (data == FIRST_CHILD(this)) - priv->first_child_down = 0; - priv->last_event[i] = event; - } - UNLOCK(&priv->lock); - } break; - case GF_EVENT_CHILD_CONNECTING: { - // 'CONNECTING' doesn't ensure its CHILD_UP, so do nothing - goto out; - } - case GF_EVENT_CHILD_DOWN: { - /* get an index number to set */ - for (i = 0; i < priv->child_count; i++) { - if (data == priv->xl_array[i]) - break; - } - - if (priv->child_count == i) { - gf_log(this->name, GF_LOG_ERROR, - "got GF_EVENT_CHILD_DOWN bad subvolume %s", - data ? ((xlator_t *)data)->name : NULL); - break; - } - - LOCK(&priv->lock); - { - if (data == FIRST_CHILD(this)) - priv->first_child_down = 1; - priv->last_event[i] = event; - } - UNLOCK(&priv->lock); - } break; - - default: { - /* */ - default_notify(this, event, data); - goto out; - } break; - } - - // Consider child as down if it's last_event is not CHILD_UP - for (i = 0, down_client = 0; i < priv->child_count; i++) - if (priv->last_event[i] != GF_EVENT_CHILD_UP) - down_client++; - - LOCK(&priv->lock); - { - priv->nodes_down = down_client; - } - UNLOCK(&priv->lock); - - heard_from_all_children = _gf_true; - for (i = 0; i < priv->child_count; i++) - if (!priv->last_event[i]) - heard_from_all_children = _gf_false; - - if (heard_from_all_children) - default_notify(this, event, data); -out: - return 0; -} - -int -stripe_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *xdata) -{ - int ret = -1; - int call_cnt = 0; - stripe_local_t *local = NULL; - - if (!frame || !frame->local || !this) { - gf_log("", GF_LOG_ERROR, "Possible NULL deref"); - return ret; - } - - local = frame->local; - - LOCK(&frame->lock); - { - call_cnt = --local->wind_count; - - /** - * We overwrite ->op_* values here for subsequent failure - * conditions, hence we propagate the last errno down the - * stack. - */ - if (op_ret < 0) { - local->op_ret = op_ret; - local->op_errno = op_errno; - goto unlock; - } - } - -unlock: - UNLOCK(&frame->lock); - - if (!call_cnt) { - STRIPE_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno, - xdata); - } - - return 0; -} - -#ifdef HAVE_BD_XLATOR -int -stripe_is_bd(dict_t *this, char *key, data_t *value, void *data) -{ - gf_boolean_t *is_bd = data; - - if (data == NULL) - return 0; - - if (XATTR_IS_BD(key)) - *is_bd = _gf_true; - - return 0; -} - -static gf_boolean_t -stripe_setxattr_is_bd(dict_t *dict) -{ - gf_boolean_t is_bd = _gf_false; - - if (dict == NULL) - goto out; - - dict_foreach(dict, stripe_is_bd, &is_bd); -out: - return is_bd; -} -#else -#define stripe_setxattr_is_bd(dict) _gf_false -#endif - -int -stripe_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, - int flags, dict_t *xdata) -{ - int32_t op_errno = EINVAL; - xlator_list_t *trav = NULL; - stripe_private_t *priv = NULL; - stripe_local_t *local = NULL; - int i = 0; - gf_boolean_t is_bd = _gf_false; - - VALIDATE_OR_GOTO(frame, err); - VALIDATE_OR_GOTO(this, err); - VALIDATE_OR_GOTO(loc, err); - VALIDATE_OR_GOTO(loc->inode, err); - - GF_IF_INTERNAL_XATTR_GOTO("trusted.*stripe*", dict, op_errno, err); - - priv = this->private; - trav = this->children; - - local = mem_get0(this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - - frame->local = local; - local->wind_count = priv->child_count; - local->op_ret = local->op_errno = 0; - - is_bd = stripe_setxattr_is_bd(dict); - - /** - * Set xattrs for directories on all subvolumes. Additionally - * this power is only given to a special client. Bd xlator - * also needs xattrs for regular files (ie LVs) - */ - if (((frame->root->pid == GF_CLIENT_PID_GSYNCD) && - IA_ISDIR(loc->inode->ia_type)) || - is_bd) { - for (i = 0; i < priv->child_count; i++, trav = trav->next) { - STACK_WIND(frame, stripe_setxattr_cbk, trav->xlator, - trav->xlator->fops->setxattr, loc, dict, flags, xdata); - } - } else { - local->wind_count = 1; - STACK_WIND(frame, stripe_setxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata); - } - - return 0; -err: - STRIPE_STACK_UNWIND(setxattr, frame, -1, op_errno, NULL); - return 0; -} - -int -stripe_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *xdata) -{ - STRIPE_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, xdata); - return 0; -} - -int -stripe_is_special_key(dict_t *this, char *key, data_t *value, void *data) -{ - gf_boolean_t *is_special = NULL; - - if (data == NULL) { - goto out; - } - - is_special = data; - - if (XATTR_IS_LOCKINFO(key) || XATTR_IS_BD(key)) - *is_special = _gf_true; - -out: - return 0; -} - -int32_t -stripe_fsetxattr_everyone_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - int call_count = 0; - stripe_local_t *local = NULL; - - local = frame->local; - - LOCK(&frame->lock); - { - call_count = --local->wind_count; - - if (op_ret < 0) { - local->op_ret = op_ret; - local->op_errno = op_errno; - } - } - UNLOCK(&frame->lock); - - if (call_count == 0) { - STRIPE_STACK_UNWIND(fsetxattr, frame, local->op_ret, local->op_errno, - NULL); - } - return 0; -} - -int -stripe_fsetxattr_to_everyone(call_frame_t *frame, xlator_t *this, fd_t *fd, - dict_t *dict, int flags, dict_t *xdata) -{ - xlator_list_t *trav = NULL; - stripe_private_t *priv = NULL; - int ret = -1; - stripe_local_t *local = NULL; - - priv = this->private; - - local = mem_get0(this->local_pool); - if (local == NULL) { - goto out; - } - - frame->local = local; - - local->wind_count = priv->child_count; - - trav = this->children; - - while (trav) { - STACK_WIND(frame, stripe_fsetxattr_everyone_cbk, trav->xlator, - trav->xlator->fops->fsetxattr, fd, dict, flags, xdata); - trav = trav->next; - } - - ret = 0; -out: - return ret; -} - -static gf_boolean_t -stripe_fsetxattr_is_special(dict_t *dict) -{ - gf_boolean_t is_spl = _gf_false; - - if (dict == NULL) { - goto out; - } - - dict_foreach(dict, stripe_is_special_key, &is_spl); - -out: - return is_spl; -} - -int -stripe_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, - int flags, dict_t *xdata) -{ - int32_t op_ret = -1, ret = -1, op_errno = EINVAL; - gf_boolean_t is_spl = _gf_false; - - VALIDATE_OR_GOTO(frame, err); - VALIDATE_OR_GOTO(this, err); - VALIDATE_OR_GOTO(fd, err); - - GF_IF_INTERNAL_XATTR_GOTO("trusted.*stripe*", dict, op_errno, err); - - is_spl = stripe_fsetxattr_is_special(dict); - if (is_spl) { - ret = stripe_fsetxattr_to_everyone(frame, this, fd, dict, flags, xdata); - if (ret < 0) { - op_errno = ENOMEM; - goto err; - } - - goto out; - } - - STACK_WIND(frame, stripe_fsetxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata); -out: - return 0; -err: - STRIPE_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, NULL); - return 0; -} - -int -stripe_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - STRIPE_STACK_UNWIND(removexattr, frame, op_ret, op_errno, xdata); - return 0; -} - -int -stripe_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *name, dict_t *xdata) -{ - int32_t op_errno = EINVAL; - - VALIDATE_OR_GOTO(this, err); - - GF_IF_NATIVE_XATTR_GOTO("trusted.*stripe*", name, op_errno, err); - - VALIDATE_OR_GOTO(frame, err); - VALIDATE_OR_GOTO(loc, err); - - STACK_WIND(frame, stripe_removexattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->removexattr, loc, name, xdata); - return 0; -err: - STRIPE_STACK_UNWIND(removexattr, frame, -1, op_errno, NULL); - return 0; -} - -int -stripe_fremovexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - STRIPE_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, xdata); - return 0; -} - -int -stripe_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, - const char *name, dict_t *xdata) -{ - int32_t op_ret = -1; - int32_t op_errno = EINVAL; - - VALIDATE_OR_GOTO(frame, err); - VALIDATE_OR_GOTO(this, err); - VALIDATE_OR_GOTO(fd, err); - - GF_IF_NATIVE_XATTR_GOTO("trusted.*stripe*", name, op_errno, err); - - STACK_WIND(frame, stripe_fremovexattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata); - return 0; -err: - STRIPE_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, xdata); - return 0; -} - -int32_t -stripe_readdirp_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, inode_t *inode, - struct iatt *stbuf, dict_t *xattr, - struct iatt *parent) -{ - stripe_local_t *local = NULL; - call_frame_t *main_frame = NULL; - stripe_local_t *main_local = NULL; - gf_dirent_t *entry = NULL; - call_frame_t *prev = NULL; - int done = 0; - - local = frame->local; - prev = cookie; - - entry = local->dirent; - - main_frame = local->orig_frame; - main_local = main_frame->local; - LOCK(&frame->lock); - { - local->call_count--; - if (!local->call_count) - done = 1; - if (op_ret == -1) { - local->op_errno = op_errno; - local->op_ret = op_ret; - goto unlock; - } - - if (stripe_ctx_handle(this, prev, local, xattr)) - gf_log(this->name, GF_LOG_ERROR, - "Error getting fctx info from dict."); - - correct_file_size(stbuf, local->fctx, prev); - - stripe_iatt_merge(stbuf, &entry->d_stat); - local->stbuf_blocks += stbuf->ia_blocks; - } -unlock: - UNLOCK(&frame->lock); - - if (done) { - inode_ctx_put(entry->inode, this, (uint64_t)(long)local->fctx); - - done = 0; - LOCK(&main_frame->lock); - { - main_local->wind_count--; - if (!main_local->wind_count) - done = 1; - if (local->op_ret == -1) { - main_local->op_errno = local->op_errno; - main_local->op_ret = local->op_ret; - } - entry->d_stat.ia_blocks = local->stbuf_blocks; - } - UNLOCK(&main_frame->lock); - if (done) { - main_frame->local = NULL; - STRIPE_STACK_UNWIND(readdir, main_frame, main_local->op_ret, - main_local->op_errno, &main_local->entries, - NULL); - gf_dirent_free(&main_local->entries); - stripe_local_wipe(main_local); - mem_put(main_local); - } - frame->local = NULL; - stripe_local_wipe(local); - mem_put(local); - STRIPE_STACK_DESTROY(frame); - } - - return 0; -} - -int32_t -stripe_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, gf_dirent_t *orig_entries, - dict_t *xdata) -{ - stripe_local_t *local = NULL; - call_frame_t *prev = NULL; - gf_dirent_t *local_entry = NULL; - gf_dirent_t *tmp_entry = NULL; - xlator_list_t *trav = NULL; - loc_t loc = { - 0, - }; - int32_t count = 0; - stripe_private_t *priv = NULL; - int32_t subvols = 0; - dict_t *xattrs = NULL; - call_frame_t *local_frame = NULL; - stripe_local_t *local_ent = NULL; - - if (!this || !frame->local || !cookie) { - gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref"); - goto out; - } - prev = cookie; - local = frame->local; - trav = this->children; - priv = this->private; - - subvols = priv->child_count; - - LOCK(&frame->lock); - { - local->op_errno = op_errno; - local->op_ret = op_ret; - - if (op_ret != -1) { - list_splice_init(&orig_entries->list, &local->entries.list); - local->wind_count = op_ret; - } - } - UNLOCK(&frame->lock); - - if (op_ret == -1) { - gf_log(this->name, GF_LOG_WARNING, "%s returned error %s", - prev->this->name, strerror(op_errno)); - goto out; - } - - xattrs = dict_new(); - if (xattrs) - (void)stripe_xattr_request_build(this, xattrs, 0, 0, 0, 0); - count = op_ret; - list_for_each_entry_safe(local_entry, tmp_entry, (&local->entries.list), - list) - { - if (!local_entry) - break; - if (!IA_ISREG(local_entry->d_stat.ia_type) || !local_entry->inode) { - LOCK(&frame->lock); - { - local->wind_count--; - count = local->wind_count; - } - UNLOCK(&frame->lock); - continue; - } - - local_frame = copy_frame(frame); - - if (!local_frame) { - op_errno = ENOMEM; - op_ret = -1; - goto out; - } - - local_ent = mem_get0(this->local_pool); - if (!local_ent) { - op_errno = ENOMEM; - op_ret = -1; - goto out; - } - - loc.inode = inode_ref(local_entry->inode); - - gf_uuid_copy(loc.gfid, local_entry->d_stat.ia_gfid); - - local_ent->orig_frame = frame; - - local_ent->call_count = subvols; - - local_ent->dirent = local_entry; - - local_frame->local = local_ent; - - trav = this->children; - while (trav) { - STACK_WIND(local_frame, stripe_readdirp_lookup_cbk, trav->xlator, - trav->xlator->fops->lookup, &loc, xattrs); - trav = trav->next; - } - loc_wipe(&loc); - } -out: - if (!count) { - /* all entries are directories */ - frame->local = NULL; - STRIPE_STACK_UNWIND(readdir, frame, (local ? local->op_ret : -1), - (local ? local->op_errno : EINVAL), - (local ? &local->entries : NULL), NULL); - gf_dirent_free(&local->entries); - stripe_local_wipe(local); - mem_put(local); - } - if (xattrs) - dict_unref(xattrs); - return 0; -} -int32_t -stripe_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t off, dict_t *xdata) -{ - stripe_local_t *local = NULL; - stripe_private_t *priv = NULL; - xlator_list_t *trav = NULL; - int op_errno = -1; - - VALIDATE_OR_GOTO(frame, err); - VALIDATE_OR_GOTO(this, err); - VALIDATE_OR_GOTO(fd, err); - - priv = this->private; - trav = this->children; - - if (priv->first_child_down) { - op_errno = ENOTCONN; - goto err; - } - - /* Initialization */ - local = mem_get0(this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - - frame->local = local; - - local->fd = fd_ref(fd); - - local->wind_count = 0; - - local->count = 0; - local->op_ret = -1; - INIT_LIST_HEAD(&local->entries); - - if (!trav) - goto err; - - STACK_WIND(frame, stripe_readdirp_cbk, trav->xlator, - trav->xlator->fops->readdirp, fd, size, off, xdata); - return 0; -err: - op_errno = (op_errno == -1) ? errno : op_errno; - STRIPE_STACK_UNWIND(readdir, frame, -1, op_errno, NULL, NULL); - - return 0; -} - -int32_t -mem_acct_init(xlator_t *this) -{ - int ret = -1; - - if (!this) - goto out; - - ret = xlator_mem_acct_init(this, gf_stripe_mt_end + 1); - - if (ret != 0) { - gf_log(this->name, GF_LOG_ERROR, - "Memory accounting init" - "failed"); - goto out; - } - -out: - return ret; -} - -static int -clear_pattern_list(stripe_private_t *priv) -{ - struct stripe_options *prev = NULL; - struct stripe_options *trav = NULL; - int ret = -1; - - GF_VALIDATE_OR_GOTO("stripe", priv, out); - - trav = priv->pattern; - priv->pattern = NULL; - while (trav) { - prev = trav; - trav = trav->next; - GF_FREE(prev); - } - - ret = 0; -out: - return ret; -} - -int -reconfigure(xlator_t *this, dict_t *options) -{ - stripe_private_t *priv = NULL; - data_t *data = NULL; - int ret = -1; - volume_option_t *opt = NULL; - - GF_ASSERT(this); - GF_ASSERT(this->private); - - priv = this->private; - - ret = 0; - LOCK(&priv->lock); - { - ret = clear_pattern_list(priv); - if (ret) - goto unlock; - - data = dict_get(options, "block-size"); - if (data) { - ret = set_stripe_block_size(this, priv, data->data); - if (ret) - goto unlock; - } else { - opt = xlator_volume_option_get(this, "block-size"); - if (!opt) { - gf_log(this->name, GF_LOG_WARNING, - "option 'block-size' not found"); - ret = -1; - goto unlock; - } - - if (gf_string2bytesize_uint64(opt->default_value, - &priv->block_size)) { - gf_log(this->name, GF_LOG_ERROR, - "Unable to set default block-size "); - ret = -1; - goto unlock; - } - } - - GF_OPTION_RECONF("coalesce", priv->coalesce, options, bool, unlock); - } -unlock: - UNLOCK(&priv->lock); - if (ret) - goto out; - - ret = 0; -out: - return ret; -} - -/** - * init - This function is called when xlator-graph gets initialized. - * The option given in volfiles are parsed here. - * @this - - */ -int32_t -init(xlator_t *this) -{ - stripe_private_t *priv = NULL; - volume_option_t *opt = NULL; - xlator_list_t *trav = NULL; - data_t *data = NULL; - int32_t count = 0; - int ret = -1; - - if (!this) - goto out; - - trav = this->children; - while (trav) { - count++; - trav = trav->next; - } - - if (!count) { - gf_log(this->name, GF_LOG_ERROR, - "stripe configured without \"subvolumes\" option. " - "exiting"); - goto out; - } - - if (!this->parents) { - gf_log(this->name, GF_LOG_WARNING, "dangling volume. check volfile "); - } - - if (count == 1) { - gf_log(this->name, GF_LOG_ERROR, - "stripe configured with only one \"subvolumes\" option." - " please check the volume. exiting"); - goto out; - } - - priv = GF_CALLOC(1, sizeof(stripe_private_t), - gf_stripe_mt_stripe_private_t); - - if (!priv) - goto out; - priv->xl_array = GF_CALLOC(count, sizeof(xlator_t *), - gf_stripe_mt_xlator_t); - if (!priv->xl_array) - goto out; - - priv->last_event = GF_CALLOC(count, sizeof(int), gf_stripe_mt_int32_t); - if (!priv->last_event) - goto out; - - priv->child_count = count; - LOCK_INIT(&priv->lock); - - trav = this->children; - count = 0; - while (trav) { - priv->xl_array[count++] = trav->xlator; - trav = trav->next; - } - - if (count > 256) { - gf_log(this->name, GF_LOG_ERROR, - "maximum number of stripe subvolumes supported " - "is 256"); - goto out; - } - - ret = 0; - LOCK(&priv->lock); - { - opt = xlator_volume_option_get(this, "block-size"); - if (!opt) { - gf_log(this->name, GF_LOG_WARNING, "option 'block-size' not found"); - ret = -1; - goto unlock; - } - if (gf_string2bytesize_uint64(opt->default_value, &priv->block_size)) { - gf_log(this->name, GF_LOG_ERROR, - "Unable to set default block-size "); - ret = -1; - goto unlock; - } - /* option stripe-pattern *avi:1GB,*pdf:16K */ - data = dict_get(this->options, "block-size"); - if (data) { - ret = set_stripe_block_size(this, priv, data->data); - if (ret) - goto unlock; - } - } -unlock: - UNLOCK(&priv->lock); - if (ret) - goto out; - - GF_OPTION_INIT("use-xattr", priv->xattr_supported, bool, out); - /* notify related */ - priv->nodes_down = priv->child_count; - - GF_OPTION_INIT("coalesce", priv->coalesce, bool, out); - - this->local_pool = mem_pool_new(stripe_local_t, 128); - if (!this->local_pool) { - ret = -1; - gf_log(this->name, GF_LOG_ERROR, - "failed to create local_t's memory pool"); - goto out; - } - - this->private = priv; - - ret = 0; -out: - if (ret) { - if (priv) { - GF_FREE(priv->xl_array); - GF_FREE(priv); - } - } - return ret; -} - -/** - * fini - Free all the private variables - * @this - - */ -void -fini(xlator_t *this) -{ - stripe_private_t *priv = NULL; - struct stripe_options *prev = NULL; - struct stripe_options *trav = NULL; - - if (!this) - goto out; - - priv = this->private; - if (priv) { - this->private = NULL; - GF_FREE(priv->xl_array); - - trav = priv->pattern; - while (trav) { - prev = trav; - trav = trav->next; - GF_FREE(prev); - } - GF_FREE(priv->last_event); - LOCK_DESTROY(&priv->lock); - GF_FREE(priv); - } - -out: - return; -} - -int32_t -stripe_getxattr_unwind(call_frame_t *frame, int op_ret, int op_errno, - dict_t *dict, dict_t *xdata) - -{ - STRIPE_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata); - return 0; -} - -int -stripe_internal_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *xattr, - dict_t *xdata) -{ - char size_key[256] = { - 0, - }; - char index_key[256] = { - 0, - }; - char count_key[256] = { - 0, - }; - char coalesce_key[256] = { - 0, - }; - - VALIDATE_OR_GOTO(frame, out); - VALIDATE_OR_GOTO(frame->local, out); - - if (!xattr || (op_ret == -1)) - goto out; - - sprintf(size_key, "trusted.%s.stripe-size", this->name); - sprintf(count_key, "trusted.%s.stripe-count", this->name); - sprintf(index_key, "trusted.%s.stripe-index", this->name); - sprintf(coalesce_key, "trusted.%s.stripe-coalesce", this->name); - - dict_del(xattr, size_key); - dict_del(xattr, count_key); - dict_del(xattr, index_key); - dict_del(xattr, coalesce_key); - -out: - STRIPE_STACK_UNWIND(getxattr, frame, op_ret, op_errno, xattr, xdata); - - return 0; -} - -int -stripe_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, dict_t *xattr, dict_t *xdata) -{ - int call_cnt = 0; - stripe_local_t *local = NULL; - - VALIDATE_OR_GOTO(frame, out); - VALIDATE_OR_GOTO(frame->local, out); - - local = frame->local; - - LOCK(&frame->lock); - { - call_cnt = --local->wind_count; - } - UNLOCK(&frame->lock); - - if (!xattr || (op_ret < 0)) - goto out; - - local->op_ret = 0; - - if (!local->xattr) { - local->xattr = dict_ref(xattr); - } else { - stripe_aggregate_xattr(local->xattr, xattr); - } - -out: - if (!call_cnt) { - STRIPE_STACK_UNWIND(getxattr, frame, (local ? local->op_ret : -1), - op_errno, (local ? local->xattr : NULL), xdata); - } - - return 0; -} - -int32_t -stripe_vgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict, - dict_t *xdata) -{ - stripe_local_t *local = NULL; - int32_t callcnt = 0; - int32_t ret = -1; - long cky = 0; - void *xattr_val = NULL; - void *xattr_serz = NULL; - stripe_xattr_sort_t *xattr = NULL; - dict_t *stripe_xattr = NULL; - - if (!frame || !frame->local || !this) { - gf_log("", GF_LOG_ERROR, "Possible NULL deref"); - return ret; - } - - local = frame->local; - cky = (long)cookie; - - if (local->xsel[0] == '\0') { - gf_log(this->name, GF_LOG_ERROR, "Empty xattr in cbk"); - return ret; - } - - LOCK(&frame->lock); - { - callcnt = --local->wind_count; - - if (!dict || (op_ret < 0)) - goto out; - - if (!local->xattr_list) - local->xattr_list = (stripe_xattr_sort_t *)GF_CALLOC( - local->nallocs, sizeof(stripe_xattr_sort_t), - gf_stripe_mt_xattr_sort_t); - - if (local->xattr_list) { - xattr = local->xattr_list + (int32_t)cky; - - ret = dict_get_ptr_and_len(dict, local->xsel, &xattr_val, - &xattr->xattr_len); - if (xattr->xattr_len == 0) - goto out; - - xattr->pos = cky; - xattr->xattr_value = gf_memdup(xattr_val, xattr->xattr_len); - - if (xattr->xattr_value != NULL) - local->xattr_total_len += xattr->xattr_len + 1; - } - } -out: - UNLOCK(&frame->lock); - - if (!callcnt) { - if (!local->xattr_total_len) - goto unwind; - - stripe_xattr = dict_new(); - if (!stripe_xattr) - goto unwind; - - /* select filler based on ->xsel */ - if (XATTR_IS_PATHINFO(local->xsel)) - ret = stripe_fill_pathinfo_xattr(this, local, (char **)&xattr_serz); - else if (XATTR_IS_LOCKINFO(local->xsel)) { - ret = stripe_fill_lockinfo_xattr(this, local, &xattr_serz); - } else { - gf_log(this->name, GF_LOG_WARNING, - "Unknown xattr in xattr request"); - goto unwind; - } - - if (!ret) { - ret = dict_set_dynptr(stripe_xattr, local->xsel, xattr_serz, - local->xattr_total_len); - if (ret) - gf_log(this->name, GF_LOG_ERROR, "Can't set %s key in dict", - local->xsel); - } - - unwind: - /* - * Among other things, STRIPE_STACK_UNWIND will free "local" - * for us. That means we can't dereference it afterward. - * Fortunately, the actual result is in stripe_xattr now, so we - * can simply clean up before unwinding. - */ - ret = stripe_free_xattr_str(local); - GF_FREE(local->xattr_list); - local->xattr_list = NULL; - - STRIPE_STACK_UNWIND(getxattr, frame, op_ret, op_errno, stripe_xattr, - NULL); - - if (stripe_xattr) - dict_unref(stripe_xattr); - } - - return ret; -} - -int -stripe_marker_populate_args(call_frame_t *frame, int type, int *gauge, - xlator_t **subvols) -{ - xlator_t *this = frame->this; - stripe_private_t *priv = this->private; - stripe_local_t *local = frame->local; - int count = 0; - - count = priv->child_count; - if (MARKER_XTIME_TYPE == type) { - if (!IA_FILE_OR_DIR(local->loc.inode->ia_type)) - count = 1; - } - memcpy(subvols, priv->xl_array, sizeof(*subvols) * count); - - return count; -} - -int32_t -stripe_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *name, dict_t *xdata) -{ - stripe_local_t *local = NULL; - xlator_list_t *trav = NULL; - stripe_private_t *priv = NULL; - int32_t op_errno = EINVAL; - int i = 0; - int ret = 0; - - VALIDATE_OR_GOTO(frame, err); - VALIDATE_OR_GOTO(this, err); - VALIDATE_OR_GOTO(loc, err); - VALIDATE_OR_GOTO(loc->path, err); - VALIDATE_OR_GOTO(loc->inode, err); - - priv = this->private; - trav = this->children; - - /* Initialization */ - local = mem_get0(this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - local->op_ret = -1; - frame->local = local; - loc_copy(&local->loc, loc); - - if (name && strncmp(name, QUOTA_SIZE_KEY, SLEN(QUOTA_SIZE_KEY)) == 0) { - local->wind_count = priv->child_count; - - for (i = 0, trav = this->children; i < priv->child_count; - i++, trav = trav->next) { - STACK_WIND(frame, stripe_getxattr_cbk, trav->xlator, - trav->xlator->fops->getxattr, loc, name, xdata); - } - - return 0; - } - - if (name && (XATTR_IS_PATHINFO(name))) { - if (IA_ISREG(loc->inode->ia_type)) { - ret = inode_ctx_get(loc->inode, this, (uint64_t *)&local->fctx); - if (ret) - gf_log(this->name, GF_LOG_ERROR, - "stripe size unavailable from fctx" - " relying on pathinfo could lead to" - " wrong results"); - } - - local->nallocs = local->wind_count = priv->child_count; - (void)strncpy(local->xsel, name, strlen(name)); - - /** - * for xattrs that need info from all children, fill ->xsel - * as above and call the filler function in cbk based on - * it - */ - for (i = 0, trav = this->children; i < priv->child_count; - i++, trav = trav->next) { - STACK_WIND_COOKIE(frame, stripe_vgetxattr_cbk, (void *)(long)i, - trav->xlator, trav->xlator->fops->getxattr, loc, - name, xdata); - } - - return 0; - } - - if (cluster_handle_marker_getxattr(frame, loc, name, priv->vol_uuid, - stripe_getxattr_unwind, - stripe_marker_populate_args) == 0) - return 0; - - STACK_WIND(frame, stripe_internal_getxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->getxattr, loc, name, xdata); - - return 0; - -err: - STRIPE_STACK_UNWIND(getxattr, frame, -1, op_errno, NULL, NULL); - return 0; -} - -static gf_boolean_t -stripe_is_special_xattr(const char *name) -{ - gf_boolean_t is_spl = _gf_false; - - if (!name) { - goto out; - } - - if (!strncmp(name, GF_XATTR_LOCKINFO_KEY, SLEN(GF_XATTR_LOCKINFO_KEY)) || - XATTR_IS_PATHINFO(name)) - is_spl = _gf_true; -out: - return is_spl; -} - -int32_t -stripe_fgetxattr_from_everyone(call_frame_t *frame, xlator_t *this, fd_t *fd, - const char *name, dict_t *xdata) -{ - stripe_local_t *local = NULL; - stripe_private_t *priv = NULL; - int32_t ret = -1, op_errno = 0; - int i = 0; - xlator_list_t *trav = NULL; - - priv = this->private; - - local = mem_get0(this->local_pool); - if (!local) { - op_errno = ENOMEM; - goto err; - } - - local->op_ret = -1; - frame->local = local; - - strncpy(local->xsel, name, strlen(name)); - local->nallocs = local->wind_count = priv->child_count; - - for (i = 0, trav = this->children; i < priv->child_count; - i++, trav = trav->next) { - STACK_WIND_COOKIE(frame, stripe_vgetxattr_cbk, (void *)(long)i, - trav->xlator, trav->xlator->fops->fgetxattr, fd, name, - xdata); - } - - return 0; - -err: - STACK_UNWIND_STRICT(fgetxattr, frame, -1, op_errno, NULL, NULL); - return ret; -} - -int32_t -stripe_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, - const char *name, dict_t *xdata) -{ - if (stripe_is_special_xattr(name)) { - stripe_fgetxattr_from_everyone(frame, this, fd, name, xdata); - goto out; - } - - STACK_WIND(frame, stripe_internal_getxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata); - -out: - return 0; -} - -int32_t -stripe_priv_dump(xlator_t *this) -{ - char key[GF_DUMP_MAX_BUF_LEN]; - int i = 0; - stripe_private_t *priv = NULL; - int ret = -1; - struct stripe_options *options = NULL; - - GF_VALIDATE_OR_GOTO("stripe", this, out); - - priv = this->private; - if (!priv) - goto out; - - ret = TRY_LOCK(&priv->lock); - if (ret != 0) - goto out; - - gf_proc_dump_add_section("xlator.cluster.stripe.%s.priv", this->name); - gf_proc_dump_write("child_count", "%d", priv->child_count); - - for (i = 0; i < priv->child_count; i++) { - sprintf(key, "subvolumes[%d]", i); - gf_proc_dump_write(key, "%s.%s", priv->xl_array[i]->type, - priv->xl_array[i]->name); - } - - options = priv->pattern; - while (options != NULL) { - gf_proc_dump_write("path_pattern", "%s", priv->pattern->path_pattern); - gf_proc_dump_write("options_block_size", "%ul", options->block_size); - - options = options->next; - } - - gf_proc_dump_write("block_size", "%ul", priv->block_size); - gf_proc_dump_write("nodes-down", "%d", priv->nodes_down); - gf_proc_dump_write("first-child_down", "%d", priv->first_child_down); - gf_proc_dump_write("xattr_supported", "%d", priv->xattr_supported); - - UNLOCK(&priv->lock); - -out: - return ret; -} - -struct xlator_fops fops = { - .stat = stripe_stat, - .unlink = stripe_unlink, - .rename = stripe_rename, - .link = stripe_link, - .truncate = stripe_truncate, - .create = stripe_create, - .open = stripe_open, - .readv = stripe_readv, - .writev = stripe_writev, - .statfs = stripe_statfs, - .flush = stripe_flush, - .fsync = stripe_fsync, - .ftruncate = stripe_ftruncate, - .fstat = stripe_fstat, - .mkdir = stripe_mkdir, - .rmdir = stripe_rmdir, - .lk = stripe_lk, - .opendir = stripe_opendir, - .fsyncdir = stripe_fsyncdir, - .setattr = stripe_setattr, - .fsetattr = stripe_fsetattr, - .lookup = stripe_lookup, - .mknod = stripe_mknod, - .setxattr = stripe_setxattr, - .fsetxattr = stripe_fsetxattr, - .getxattr = stripe_getxattr, - .fgetxattr = stripe_fgetxattr, - .removexattr = stripe_removexattr, - .fremovexattr = stripe_fremovexattr, - .readdirp = stripe_readdirp, - .fallocate = stripe_fallocate, - .discard = stripe_discard, - .zerofill = stripe_zerofill, - .seek = stripe_seek, -}; - -struct xlator_cbks cbks = { - .release = stripe_release, - .forget = stripe_forget, -}; - -struct xlator_dumpops dumpops = { - .priv = stripe_priv_dump, -}; - -struct volume_options options[] = { - { - .key = {"block-size"}, - .type = GF_OPTION_TYPE_SIZE_LIST, - .default_value = "128KB", - .min = STRIPE_MIN_BLOCK_SIZE, - .description = "Size of the stripe unit that would be read " - "from or written to the striped servers.", - .op_version = {1}, - .tags = {"stripe"}, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE, - }, - { - .key = {"use-xattr"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "true", - .description = "handle the stripe without the xattr", - .tags = {"stripe", "dev-only"}, - .flags = OPT_FLAG_CLIENT_OPT, - }, - { - .key = {"coalesce"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "true", - .description = "Enable/Disable coalesce mode to flatten striped " - "files as stored on the server (i.e., eliminate holes " - "caused by the traditional format).", - .op_version = {1}, - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE, - .tags = {"stripe"}, - }, - {.key = {NULL}}, -}; diff --git a/xlators/cluster/stripe/src/stripe.h b/xlators/cluster/stripe/src/stripe.h deleted file mode 100644 index 88c24b6..0000000 --- a/xlators/cluster/stripe/src/stripe.h +++ /dev/null @@ -1,291 +0,0 @@ -/* - Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#ifndef _STRIPE_H_ -#define _STRIPE_H_ - -#include <glusterfs/xlator.h> -#include <glusterfs/logging.h> -#include <glusterfs/defaults.h> -#include <glusterfs/common-utils.h> -#include <glusterfs/compat.h> -#include <glusterfs/compat-errno.h> -#include "stripe-mem-types.h" -#include "libxlator.h" -#include <fnmatch.h> -#include <signal.h> - -#define STRIPE_PATHINFO_HEADER "STRIPE:" -#define STRIPE_MIN_BLOCK_SIZE (16 * GF_UNIT_KB) - -#define STRIPE_STACK_UNWIND(fop, frame, params...) \ - do { \ - stripe_local_t *__local = NULL; \ - if (frame) { \ - __local = frame->local; \ - frame->local = NULL; \ - } \ - STACK_UNWIND_STRICT(fop, frame, params); \ - if (__local) { \ - stripe_local_wipe(__local); \ - mem_put(__local); \ - } \ - } while (0) - -#define STRIPE_STACK_DESTROY(frame) \ - do { \ - stripe_local_t *__local = NULL; \ - __local = frame->local; \ - frame->local = NULL; \ - STACK_DESTROY(frame->root); \ - if (__local) { \ - stripe_local_wipe(__local); \ - mem_put(__local); \ - } \ - } while (0) - -#define STRIPE_VALIDATE_FCTX(fctx, label) \ - do { \ - int idx = 0; \ - if (!fctx) { \ - op_errno = EINVAL; \ - goto label; \ - } \ - for (idx = 0; idx < fctx->stripe_count; idx++) { \ - if (!fctx->xl_array[idx]) { \ - gf_log(this->name, GF_LOG_ERROR, "fctx->xl_array[%d] is NULL", \ - idx); \ - op_errno = ESTALE; \ - goto label; \ - } \ - } \ - } while (0) - -typedef struct stripe_xattr_sort { - int pos; - int xattr_len; - char *xattr_value; -} stripe_xattr_sort_t; - -/** - * struct stripe_options : This keeps the pattern and the block-size - * information, which is used for striping on a file. - */ -struct stripe_options { - struct stripe_options *next; - char path_pattern[256]; - uint64_t block_size; -}; - -/** - * Private structure for stripe translator - */ -struct stripe_private { - struct stripe_options *pattern; - xlator_t **xl_array; - uint64_t block_size; - gf_lock_t lock; - uint8_t nodes_down; - int8_t first_child_down; - int *last_event; - int8_t child_count; - gf_boolean_t xattr_supported; /* default yes */ - gf_boolean_t coalesce; - char vol_uuid[UUID_SIZE + 1]; -}; - -/** - * Used to keep info about the replies received from readv/writev calls - */ -struct stripe_replies { - struct iovec *vector; - int32_t count; // count of vector - int32_t op_ret; // op_ret of readv - int32_t op_errno; - int32_t requested_size; - struct iatt stbuf; /* 'stbuf' is also a part of reply */ -}; - -typedef struct _stripe_fd_ctx { - off_t stripe_size; - int stripe_count; - int stripe_coalesce; - int static_array; - xlator_t **xl_array; -} stripe_fd_ctx_t; - -/** - * Local structure to be passed with all the frames in case of STACK_WIND - */ -struct stripe_local; /* this itself is used inside the structure; */ - -struct stripe_local { - struct stripe_local *next; - call_frame_t *orig_frame; - - stripe_fd_ctx_t *fctx; - - /* Used by _cbk functions */ - struct iatt stbuf; - struct iatt pre_buf; - struct iatt post_buf; - struct iatt preparent; - struct iatt postparent; - - off_t stbuf_size; - off_t prebuf_size; - off_t postbuf_size; - off_t preparent_size; - off_t postparent_size; - - blkcnt_t stbuf_blocks; - blkcnt_t prebuf_blocks; - blkcnt_t postbuf_blocks; - blkcnt_t preparent_blocks; - blkcnt_t postparent_blocks; - - struct stripe_replies *replies; - struct statvfs statvfs_buf; - dir_entry_t *entry; - - int8_t revalidate; - int8_t failed; - int8_t unwind; - - size_t readv_size; - int32_t entry_count; - int32_t node_index; - int32_t call_count; - int32_t wind_count; /* used instead of child_cound - in case of read and write */ - int32_t op_ret; - int32_t op_errno; - int32_t count; - int32_t flags; - char *name; - inode_t *inode; - - loc_t loc; - loc_t loc2; - - mode_t mode; - dev_t rdev; - /* For File I/O fops */ - dict_t *xdata; - - stripe_xattr_sort_t *xattr_list; - int32_t xattr_total_len; - int32_t nallocs; - char xsel[256]; - - /* General usage */ - off_t offset; - off_t stripe_size; - - int xattr_self_heal_needed; - int entry_self_heal_needed; - - int8_t *list; - struct gf_flock lock; - fd_t *fd; - void *value; - struct iobref *iobref; - gf_dirent_t entries; - gf_dirent_t *dirent; - dict_t *xattr; - uuid_t ia_gfid; - - int xflag; - mode_t umask; -}; - -typedef struct stripe_local stripe_local_t; -typedef struct stripe_private stripe_private_t; - -/* - * Determine the stripe index of a particular frame based on the translator. - */ -static inline int32_t -stripe_get_frame_index(stripe_fd_ctx_t *fctx, call_frame_t *prev) -{ - int32_t i, idx = -1; - - for (i = 0; i < fctx->stripe_count; i++) { - if (fctx->xl_array[i] == prev->this) { - idx = i; - break; - } - } - - return idx; -} - -static inline void -stripe_copy_xl_array(xlator_t **dst, xlator_t **src, int count) -{ - int i; - - for (i = 0; i < count; i++) - dst[i] = src[i]; -} - -void -stripe_local_wipe(stripe_local_t *local); -int32_t -stripe_ctx_handle(xlator_t *this, call_frame_t *prev, stripe_local_t *local, - dict_t *dict); -void -stripe_aggregate_xattr(dict_t *dst, dict_t *src); -int32_t -stripe_xattr_request_build(xlator_t *this, dict_t *dict, uint64_t stripe_size, - uint32_t stripe_count, uint32_t stripe_index, - uint32_t stripe_coalesce); -int32_t -stripe_get_matching_bs(const char *path, stripe_private_t *priv); -int -set_stripe_block_size(xlator_t *this, stripe_private_t *priv, char *data); -int32_t -stripe_iatt_merge(struct iatt *from, struct iatt *to); -int32_t -stripe_fill_pathinfo_xattr(xlator_t *this, stripe_local_t *local, - char **xattr_serz); -int32_t -stripe_free_xattr_str(stripe_local_t *local); -int32_t -stripe_xattr_aggregate(char *buffer, stripe_local_t *local, int32_t *total); -off_t -coalesced_offset(off_t offset, uint64_t stripe_size, int stripe_count); -off_t -uncoalesced_size(off_t size, uint64_t stripe_size, int stripe_count, - int stripe_index); -int32_t -stripe_fill_lockinfo_xattr(xlator_t *this, stripe_local_t *local, - void **xattr_serz); - -/* - * Adjust the size attribute for files if coalesce is enabled. - */ -static inline void -correct_file_size(struct iatt *buf, stripe_fd_ctx_t *fctx, call_frame_t *prev) -{ - int index; - - if (!IA_ISREG(buf->ia_type)) - return; - - if (!fctx || !fctx->stripe_coalesce) - return; - - index = stripe_get_frame_index(fctx, prev); - buf->ia_size = uncoalesced_size(buf->ia_size, fctx->stripe_size, - fctx->stripe_count, index); -} - -#endif /* _STRIPE_H_ */ diff --git a/xlators/encryption/Makefile.am b/xlators/encryption/Makefile.am deleted file mode 100644 index 36efc66..0000000 --- a/xlators/encryption/Makefile.am +++ /dev/null @@ -1,3 +0,0 @@ -SUBDIRS = rot-13 crypt - -CLEANFILES = diff --git a/xlators/encryption/crypt/Makefile.am b/xlators/encryption/crypt/Makefile.am deleted file mode 100644 index d471a3f..0000000 --- a/xlators/encryption/crypt/Makefile.am +++ /dev/null @@ -1,3 +0,0 @@ -SUBDIRS = src - -CLEANFILES = diff --git a/xlators/encryption/crypt/src/Makefile.am b/xlators/encryption/crypt/src/Makefile.am deleted file mode 100644 index 05fd3d5..0000000 --- a/xlators/encryption/crypt/src/Makefile.am +++ /dev/null @@ -1,26 +0,0 @@ -if ENABLE_CRYPT_XLATOR - -xlator_LTLIBRARIES = crypt.la -xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/encryption - -crypt_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) - -crypt_la_SOURCES = keys.c data.c metadata.c atom.c crypt.c -crypt_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ - -lssl -lcrypto - -noinst_HEADERS = crypt-common.h crypt-mem-types.h crypt.h metadata.h - -AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ - -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src - -AM_CFLAGS = -Wall $(GF_CFLAGS) - -CLEANFILES = - -else - -noinst_DIST = keys.c data.c metadata.c atom.c crypt.c -noinst_HEADERS = crypt-common.h crypt-mem-types.h crypt.h metadata.h - -endif diff --git a/xlators/encryption/crypt/src/atom.c b/xlators/encryption/crypt/src/atom.c deleted file mode 100644 index bdc37c5..0000000 --- a/xlators/encryption/crypt/src/atom.c +++ /dev/null @@ -1,861 +0,0 @@ -/* - Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#include <glusterfs/defaults.h> -#include "crypt-common.h" -#include "crypt.h" - -/* - * Glossary - * - * - * cblock (or cipher block). A logical unit in a file. - * cblock size is defined as the number of bits - * in an input (or output) block of the block - * cipher (*). Cipher block size is a property of - * cipher algorithm. E.g. cblock size is 64 bits - * for DES, 128 bits for AES, etc. - * - * atomic cipher A cipher algorithm, which requires some chunks of - * algorithm text to be padded at left and(or) right sides before - * cipher transaform. - * - * - * block (atom) Minimal chunk of file's data, which doesn't require - * padding. We'll consider logical units in a file of - * block size (atom size). - * - * cipher algorithm Atomic cipher algorithm, which requires the last - * with EOF issue incomplete cblock in a file to be padded with some - * data (usually zeros). - * - * - * operation, which reading/writing from offset, which is not aligned to - * forms a gap at to atom size - * the beginning - * - * - * operation, which reading/writing count bytes starting from offset off, - * forms a gap at so that off+count is not aligned to atom_size - * the end - * - * head block the first atom affected by an operation, which forms - * a gap at the beginning, or(and) at the end. - * Сomment. Head block has at least one gap (either at - * the beginning, or at the end) - * - * - * tail block the last atom different from head, affected by an - * operation, which forms a gap at the end. - * Сomment: Tail block has exactly one gap (at the end). - * - * - * partial block head or tail block - * - * - * full block block without gaps. - * - * - * (*) Recommendation for Block Cipher Modes of Operation - * Methods and Techniques - * NIST Special Publication 800-38A Edition 2001 - */ - -/* - * atom->offset_at() - */ -static off_t -offset_at_head(struct avec_config *conf) -{ - return conf->aligned_offset; -} - -static off_t -offset_at_hole_head(call_frame_t *frame, struct object_cipher_info *object) -{ - return offset_at_head(get_hole_conf(frame)); -} - -static off_t -offset_at_data_head(call_frame_t *frame, struct object_cipher_info *object) -{ - return offset_at_head(get_data_conf(frame)); -} - -static off_t -offset_at_tail(struct avec_config *conf, struct object_cipher_info *object) -{ - return conf->aligned_offset + - (conf->off_in_head ? get_atom_size(object) : 0) + - (conf->nr_full_blocks << get_atom_bits(object)); -} - -static off_t -offset_at_hole_tail(call_frame_t *frame, struct object_cipher_info *object) -{ - return offset_at_tail(get_hole_conf(frame), object); -} - -static off_t -offset_at_data_tail(call_frame_t *frame, struct object_cipher_info *object) -{ - return offset_at_tail(get_data_conf(frame), object); -} - -static off_t -offset_at_full(struct avec_config *conf, struct object_cipher_info *object) -{ - return conf->aligned_offset + - (conf->off_in_head ? get_atom_size(object) : 0); -} - -static off_t -offset_at_data_full(call_frame_t *frame, struct object_cipher_info *object) -{ - return offset_at_full(get_data_conf(frame), object); -} - -static off_t -offset_at_hole_full(call_frame_t *frame, struct object_cipher_info *object) -{ - return offset_at_full(get_hole_conf(frame), object); -} - -/* - * atom->io_size_nopad() - */ - -static uint32_t -io_size_nopad_head(struct avec_config *conf, struct object_cipher_info *object) -{ - uint32_t gap_at_beg; - uint32_t gap_at_end; - - check_head_block(conf); - - gap_at_beg = conf->off_in_head; - - if (has_tail_block(conf) || has_full_blocks(conf) || conf->off_in_tail == 0) - gap_at_end = 0; - else - gap_at_end = get_atom_size(object) - conf->off_in_tail; - - return get_atom_size(object) - (gap_at_beg + gap_at_end); -} - -static uint32_t -io_size_nopad_tail(struct avec_config *conf, struct object_cipher_info *object) -{ - check_tail_block(conf); - return conf->off_in_tail; -} - -static uint32_t -io_size_nopad_full(struct avec_config *conf, struct object_cipher_info *object) -{ - check_full_block(conf); - return get_atom_size(object); -} - -static uint32_t -io_size_nopad_data_head(call_frame_t *frame, struct object_cipher_info *object) -{ - return io_size_nopad_head(get_data_conf(frame), object); -} - -static uint32_t -io_size_nopad_hole_head(call_frame_t *frame, struct object_cipher_info *object) -{ - return io_size_nopad_head(get_hole_conf(frame), object); -} - -static uint32_t -io_size_nopad_data_tail(call_frame_t *frame, struct object_cipher_info *object) -{ - return io_size_nopad_tail(get_data_conf(frame), object); -} - -static uint32_t -io_size_nopad_hole_tail(call_frame_t *frame, struct object_cipher_info *object) -{ - return io_size_nopad_tail(get_hole_conf(frame), object); -} - -static uint32_t -io_size_nopad_data_full(call_frame_t *frame, struct object_cipher_info *object) -{ - return io_size_nopad_full(get_data_conf(frame), object); -} - -static uint32_t -io_size_nopad_hole_full(call_frame_t *frame, struct object_cipher_info *object) -{ - return io_size_nopad_full(get_hole_conf(frame), object); -} - -static uint32_t -offset_in_head(struct avec_config *conf) -{ - check_cursor_head(conf); - - return conf->off_in_head; -} - -static uint32_t -offset_in_tail(call_frame_t *frame, struct object_cipher_info *object) -{ - return 0; -} - -static uint32_t -offset_in_full(struct avec_config *conf, struct object_cipher_info *object) -{ - check_cursor_full(conf); - - if (has_head_block(conf)) - return (conf->cursor - 1) << get_atom_bits(object); - else - return conf->cursor << get_atom_bits(object); -} - -static uint32_t -offset_in_data_head(call_frame_t *frame, struct object_cipher_info *object) -{ - return offset_in_head(get_data_conf(frame)); -} - -static uint32_t -offset_in_hole_head(call_frame_t *frame, struct object_cipher_info *object) -{ - return offset_in_head(get_hole_conf(frame)); -} - -static uint32_t -offset_in_data_full(call_frame_t *frame, struct object_cipher_info *object) -{ - return offset_in_full(get_data_conf(frame), object); -} - -static uint32_t -offset_in_hole_full(call_frame_t *frame, struct object_cipher_info *object) -{ - return offset_in_full(get_hole_conf(frame), object); -} - -/* - * atom->rmw() - */ -/* - * Pre-conditions: - * @vec contains plain text of the latest - * version. - * - * Uptodate gaps of the @partial block with - * this plain text, encrypt the whole block - * and write the result to disk. - */ -static int32_t -rmw_partial_block(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iovec *vec, - int32_t count, struct iatt *stbuf, struct iobref *iobref, - struct rmw_atom *atom) -{ - size_t was_read = 0; - uint64_t file_size; - crypt_local_t *local = frame->local; - struct object_cipher_info *object = &local->info->cinfo; - - struct iovec *partial = atom->get_iovec(frame, 0); - struct avec_config *conf = atom->get_config(frame); - end_writeback_handler_t end_writeback_partial_block; -#if DEBUG_CRYPT - gf_boolean_t check_last_cblock = _gf_false; -#endif - local->op_ret = op_ret; - local->op_errno = op_errno; - - if (op_ret < 0) - goto exit; - - file_size = local->cur_file_size; - was_read = op_ret; - - if (atom->locality == HEAD_ATOM && conf->off_in_head) { - /* - * head atom with a non-uptodate gap - * at the beginning - * - * fill the gap with plain text of the - * latest version. Convert a part of hole - * (if any) to zeros. - */ - int32_t i; - int32_t copied = 0; - int32_t to_gap; /* amount of data needed to uptodate - the gap at the beginning */ -#if 0 - int32_t hole = 0; /* The part of the hole which - * got in the head block */ -#endif /* 0 */ - to_gap = conf->off_in_head; - - if (was_read < to_gap) { - if (file_size > offset_at_head(conf) + was_read) { - /* - * It is impossible to uptodate - * head block: too few bytes have - * been read from disk, so that - * partial write is impossible. - * - * It could happen because of many - * reasons: IO errors, (meta)data - * corruption in the local file system, - * etc. - */ - gf_log(this->name, GF_LOG_WARNING, - "Can not uptodate a gap at the beginning"); - local->op_ret = -1; - local->op_errno = EIO; - goto exit; - } -#if 0 - hole = to_gap - was_read; -#endif /* 0 */ - to_gap = was_read; - } - /* - * uptodate the gap at the beginning - */ - for (i = 0; i < count && copied < to_gap; i++) { - int32_t to_copy; - - to_copy = vec[i].iov_len; - if (to_copy > to_gap - copied) - to_copy = to_gap - copied; - - memcpy(partial->iov_base, vec[i].iov_base, to_copy); - copied += to_copy; - } -#if 0 - /* - * If possible, convert part of the - * hole, which got in the head block - */ - ret = TRY_LOCK(&local->hole_lock); - if (!ret) { - if (local->hole_handled) - /* - * already converted by - * crypt_writev_cbk() - */ - UNLOCK(&local->hole_lock); - else { - /* - * convert the part of the hole - * which got in the head block - * to zeros. - * - * Update the orig_offset to make - * sure writev_cbk() won't care - * about this part of the hole. - * - */ - memset(partial->iov_base + to_gap, 0, hole); - - conf->orig_offset -= hole; - conf->orig_size += hole; - UNLOCK(&local->hole_lock); - } - } - else /* - * conversion is being performed - * by crypt_writev_cbk() - */ - ; -#endif /* 0 */ - } - if (atom->locality == TAIL_ATOM || - (!has_tail_block(conf) && conf->off_in_tail)) { - /* - * tail atom, or head atom with a non-uptodate - * gap at the end. - * - * fill the gap at the end of the block - * with plain text of the latest version. - * Pad the result, (if needed) - */ - int32_t i; - int32_t to_gap; - int copied; - off_t off_in_tail; - int32_t to_copy; - - off_in_tail = conf->off_in_tail; - to_gap = conf->gap_in_tail; - - if (to_gap && was_read < off_in_tail + to_gap) { - /* - * It is impossible to uptodate - * the gap at the end: too few bytes - * have been read from disk, so that - * partial write is impossible. - * - * It could happen because of many - * reasons: IO errors, (meta)data - * corruption in the local file system, - * etc. - */ - gf_log(this->name, GF_LOG_WARNING, - "Can not uptodate a gap at the end"); - local->op_ret = -1; - local->op_errno = EIO; - goto exit; - } - /* - * uptodate the gap at the end - */ - copied = 0; - to_copy = to_gap; - for (i = count - 1; i >= 0 && to_copy > 0; i--) { - uint32_t from_vec, off_in_vec; - - off_in_vec = 0; - from_vec = vec[i].iov_len; - if (from_vec > to_copy) { - off_in_vec = from_vec - to_copy; - from_vec = to_copy; - } - memcpy(partial->iov_base + off_in_tail + to_gap - copied - from_vec, - vec[i].iov_base + off_in_vec, from_vec); - - gf_log( - this->name, GF_LOG_DEBUG, - "uptodate %d bytes at tail. Offset at target(source): %d(%d)", - (int)from_vec, (int)off_in_tail + to_gap - copied - from_vec, - (int)off_in_vec); - - copied += from_vec; - to_copy -= from_vec; - } - partial->iov_len = off_in_tail + to_gap; - - if (object_alg_should_pad(object)) { - int32_t resid = 0; - resid = partial->iov_len & (object_alg_blksize(object) - 1); - if (resid) { - /* - * append a new EOF padding - */ - local->eof_padding_size = object_alg_blksize(object) - resid; - - gf_log(this->name, GF_LOG_DEBUG, "set padding size %d", - local->eof_padding_size); - - memset(partial->iov_base + partial->iov_len, 1, - local->eof_padding_size); - partial->iov_len += local->eof_padding_size; -#if DEBUG_CRYPT - gf_log(this->name, GF_LOG_DEBUG, - "pad cblock with %d zeros:", local->eof_padding_size); - dump_cblock(this, (unsigned char *)partial->iov_base + - partial->iov_len - - object_alg_blksize(object)); - check_last_cblock = _gf_true; -#endif - } - } - } - /* - * encrypt the whole block - */ - encrypt_aligned_iov(object, partial, 1, atom->offset_at(frame, object)); -#if DEBUG_CRYPT - if (check_last_cblock == _gf_true) { - gf_log(this->name, GF_LOG_DEBUG, "encrypt last cblock with offset %llu", - (unsigned long long)atom->offset_at(frame, object)); - dump_cblock(this, (unsigned char *)partial->iov_base + - partial->iov_len - object_alg_blksize(object)); - } -#endif - set_local_io_params_writev(frame, object, atom, - atom->offset_at(frame, object), - iov_length(partial, 1)); - /* - * write the whole block to disk - */ - end_writeback_partial_block = dispatch_end_writeback(local->fop); - conf->cursor++; - STACK_WIND(frame, end_writeback_partial_block, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->writev, local->fd, partial, 1, - atom->offset_at(frame, object), local->flags, local->iobref_data, - local->xdata); - - gf_log("crypt", GF_LOG_DEBUG, - "submit partial block: %d bytes from %d offset", - (int)iov_length(partial, 1), (int)atom->offset_at(frame, object)); -exit: - return 0; -} - -/* - * Perform a (read-)modify-write sequence. - * This should be performed only after approval - * of upper server-side manager, i.e. the caller - * needs to make sure this is his turn to rmw. - */ -void -submit_partial(call_frame_t *frame, xlator_t *this, fd_t *fd, - atom_locality_type ltype) -{ - int32_t ret; - dict_t *dict; - struct rmw_atom *atom; - crypt_local_t *local = frame->local; - struct object_cipher_info *object = &local->info->cinfo; - - atom = atom_by_types(local->active_setup, ltype); - /* - * To perform the "read" component of the read-modify-write - * sequence the crypt translator does stack_wind to itself. - * - * Pass current file size to crypt_readv() - */ - dict = dict_new(); - if (!dict) { - /* - * FIXME: Handle the error - */ - gf_log("crypt", GF_LOG_WARNING, "Can not alloc dict"); - return; - } - ret = dict_set(dict, FSIZE_XATTR_PREFIX, - data_from_uint64(local->cur_file_size)); - if (ret) { - /* - * FIXME: Handle the error - */ - dict_unref(dict); - gf_log("crypt", GF_LOG_WARNING, "Can not set dict"); - goto exit; - } - STACK_WIND(frame, atom->rmw, this, this->fops->readv, /* crypt_readv */ - fd, atom->count_to_uptodate(frame, object), /* count */ - atom->offset_at(frame, object), /* offset to read from */ - 0, dict); -exit: - dict_unref(dict); -} - -/* - * submit blocks of FULL_ATOM type - */ -void -submit_full(call_frame_t *frame, xlator_t *this) -{ - crypt_local_t *local = frame->local; - struct object_cipher_info *object = &local->info->cinfo; - struct rmw_atom *atom = atom_by_types(local->active_setup, FULL_ATOM); - uint32_t count; /* total number of full blocks to submit */ - uint32_t granularity; /* number of blocks to submit in one iteration */ - - uint64_t off_in_file; /* start offset in the file, bytes */ - uint32_t off_in_atom; /* start offset in the atom, blocks */ - uint32_t blocks_written = 0; /* blocks written for this submit */ - - struct avec_config *conf = atom->get_config(frame); - end_writeback_handler_t end_writeback_full_block; - /* - * Write full blocks by groups of granularity size. - */ - end_writeback_full_block = dispatch_end_writeback(local->fop); - - if (is_ordered_mode(frame)) { - uint32_t skip = has_head_block(conf) ? 1 : 0; - count = 1; - granularity = 1; - /* - * calculate start offset using cursor value; - * here we should take into account head block, - * which corresponds to cursor value 0. - */ - off_in_file = atom->offset_at(frame, object) + - ((conf->cursor - skip) << get_atom_bits(object)); - off_in_atom = conf->cursor - skip; - } else { - /* - * in parallel mode - */ - count = conf->nr_full_blocks; - granularity = MAX_IOVEC; - off_in_file = atom->offset_at(frame, object); - off_in_atom = 0; - } - while (count) { - uint32_t blocks_to_write = count; - - if (blocks_to_write > granularity) - blocks_to_write = granularity; - if (conf->type == HOLE_ATOM) - /* - * reset iovec before encryption - */ - memset(atom->get_iovec(frame, 0)->iov_base, 0, - get_atom_size(object)); - /* - * encrypt the group - */ - encrypt_aligned_iov( - object, atom->get_iovec(frame, off_in_atom + blocks_written), - blocks_to_write, - off_in_file + (blocks_written << get_atom_bits(object))); - - set_local_io_params_writev( - frame, object, atom, - off_in_file + (blocks_written << get_atom_bits(object)), - blocks_to_write << get_atom_bits(object)); - - conf->cursor += blocks_to_write; - - STACK_WIND(frame, end_writeback_full_block, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->writev, local->fd, - atom->get_iovec(frame, off_in_atom + blocks_written), - blocks_to_write, - off_in_file + (blocks_written << get_atom_bits(object)), - local->flags, - local->iobref_data ? local->iobref_data : local->iobref, - local->xdata); - - gf_log("crypt", GF_LOG_DEBUG, "submit %d full blocks from %d offset", - blocks_to_write, - (int)(off_in_file + (blocks_written << get_atom_bits(object)))); - - count -= blocks_to_write; - blocks_written += blocks_to_write; - } - return; -} - -static int32_t -rmw_data_head(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, struct iovec *vec, int32_t count, - struct iatt *stbuf, struct iobref *iobref, dict_t *xdata) -{ - return rmw_partial_block(frame, cookie, this, op_ret, op_errno, vec, count, - stbuf, iobref, - atom_by_types(DATA_ATOM, HEAD_ATOM)); -} - -static int32_t -rmw_data_tail(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, struct iovec *vec, int32_t count, - struct iatt *stbuf, struct iobref *iobref, dict_t *xdata) -{ - return rmw_partial_block(frame, cookie, this, op_ret, op_errno, vec, count, - stbuf, iobref, - atom_by_types(DATA_ATOM, TAIL_ATOM)); -} - -static int32_t -rmw_hole_head(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, struct iovec *vec, int32_t count, - struct iatt *stbuf, struct iobref *iobref, dict_t *xdata) -{ - return rmw_partial_block(frame, cookie, this, op_ret, op_errno, vec, count, - stbuf, iobref, - atom_by_types(HOLE_ATOM, HEAD_ATOM)); -} - -static int32_t -rmw_hole_tail(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, struct iovec *vec, int32_t count, - struct iatt *stbuf, struct iobref *iobref, dict_t *xdata) -{ - return rmw_partial_block(frame, cookie, this, op_ret, op_errno, vec, count, - stbuf, iobref, - atom_by_types(HOLE_ATOM, TAIL_ATOM)); -} - -/* - * atom->count_to_uptodate() - */ -static uint32_t -count_to_uptodate_head(struct avec_config *conf, - struct object_cipher_info *object) -{ - if (conf->acount == 1 && conf->off_in_tail) - return get_atom_size(object); - else - /* there is no need to read the whole head block */ - return conf->off_in_head; -} - -static uint32_t -count_to_uptodate_tail(struct avec_config *conf, - struct object_cipher_info *object) -{ - /* we need to read the whole tail block */ - return get_atom_size(object); -} - -static uint32_t -count_to_uptodate_data_head(call_frame_t *frame, - struct object_cipher_info *object) -{ - return count_to_uptodate_head(get_data_conf(frame), object); -} - -static uint32_t -count_to_uptodate_data_tail(call_frame_t *frame, - struct object_cipher_info *object) -{ - return count_to_uptodate_tail(get_data_conf(frame), object); -} - -static uint32_t -count_to_uptodate_hole_head(call_frame_t *frame, - struct object_cipher_info *object) -{ - return count_to_uptodate_head(get_hole_conf(frame), object); -} - -static uint32_t -count_to_uptodate_hole_tail(call_frame_t *frame, - struct object_cipher_info *object) -{ - return count_to_uptodate_tail(get_hole_conf(frame), object); -} - -/* atom->get_config() */ - -static struct avec_config * -get_config_data(call_frame_t *frame) -{ - return &((crypt_local_t *)frame->local)->data_conf; -} - -static struct avec_config * -get_config_hole(call_frame_t *frame) -{ - return &((crypt_local_t *)frame->local)->hole_conf; -} - -/* - * atom->get_iovec() - */ -static struct iovec * -get_iovec_hole_head(call_frame_t *frame, uint32_t count) -{ - struct avec_config *conf = get_hole_conf(frame); - - return conf->avec; -} - -static struct iovec * -get_iovec_hole_full(call_frame_t *frame, uint32_t count) -{ - struct avec_config *conf = get_hole_conf(frame); - - return conf->avec + (conf->off_in_head ? 1 : 0); -} - -static struct iovec * -get_iovec_hole_tail(call_frame_t *frame, uint32_t count) -{ - struct avec_config *conf = get_hole_conf(frame); - - return conf->avec + (conf->blocks_in_pool - 1); -} - -static struct iovec * -get_iovec_data_head(call_frame_t *frame, uint32_t count) -{ - struct avec_config *conf = get_data_conf(frame); - - return conf->avec; -} - -static struct iovec * -get_iovec_data_full(call_frame_t *frame, uint32_t count) -{ - struct avec_config *conf = get_data_conf(frame); - - return conf->avec + (conf->off_in_head ? 1 : 0) + count; -} - -static struct iovec * -get_iovec_data_tail(call_frame_t *frame, uint32_t count) -{ - struct avec_config *conf = get_data_conf(frame); - - return conf->avec + (conf->off_in_head ? 1 : 0) + conf->nr_full_blocks; -} - -static struct rmw_atom atoms[LAST_DATA_TYPE][LAST_LOCALITY_TYPE] = { - [DATA_ATOM][HEAD_ATOM] = {.locality = HEAD_ATOM, - .rmw = rmw_data_head, - .offset_at = offset_at_data_head, - .offset_in = offset_in_data_head, - .get_iovec = get_iovec_data_head, - .io_size_nopad = io_size_nopad_data_head, - .count_to_uptodate = count_to_uptodate_data_head, - .get_config = get_config_data}, - [DATA_ATOM][TAIL_ATOM] = {.locality = TAIL_ATOM, - .rmw = rmw_data_tail, - .offset_at = offset_at_data_tail, - .offset_in = offset_in_tail, - .get_iovec = get_iovec_data_tail, - .io_size_nopad = io_size_nopad_data_tail, - .count_to_uptodate = count_to_uptodate_data_tail, - .get_config = get_config_data}, - [DATA_ATOM][FULL_ATOM] = {.locality = FULL_ATOM, - .offset_at = offset_at_data_full, - .offset_in = offset_in_data_full, - .get_iovec = get_iovec_data_full, - .io_size_nopad = io_size_nopad_data_full, - .get_config = get_config_data}, - [HOLE_ATOM][HEAD_ATOM] = {.locality = HEAD_ATOM, - .rmw = rmw_hole_head, - .offset_at = offset_at_hole_head, - .offset_in = offset_in_hole_head, - .get_iovec = get_iovec_hole_head, - .io_size_nopad = io_size_nopad_hole_head, - .count_to_uptodate = count_to_uptodate_hole_head, - .get_config = get_config_hole}, - [HOLE_ATOM][TAIL_ATOM] = {.locality = TAIL_ATOM, - .rmw = rmw_hole_tail, - .offset_at = offset_at_hole_tail, - .offset_in = offset_in_tail, - .get_iovec = get_iovec_hole_tail, - .io_size_nopad = io_size_nopad_hole_tail, - .count_to_uptodate = count_to_uptodate_hole_tail, - .get_config = get_config_hole}, - [HOLE_ATOM][FULL_ATOM] = {.locality = FULL_ATOM, - .offset_at = offset_at_hole_full, - .offset_in = offset_in_hole_full, - .get_iovec = get_iovec_hole_full, - .io_size_nopad = io_size_nopad_hole_full, - .get_config = get_config_hole}}; - -struct rmw_atom * -atom_by_types(atom_data_type data, atom_locality_type locality) -{ - return &atoms[data][locality]; -} - -/* - Local variables: - c-indentation-style: "K&R" - mode-name: "LC" - c-basic-offset: 8 - tab-width: 8 - fill-column: 80 - scroll-step: 1 - End: -*/ diff --git a/xlators/encryption/crypt/src/crypt-common.h b/xlators/encryption/crypt/src/crypt-common.h deleted file mode 100644 index 123d5c2..0000000 --- a/xlators/encryption/crypt/src/crypt-common.h +++ /dev/null @@ -1,133 +0,0 @@ -/* - Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#ifndef __CRYPT_COMMON_H__ -#define __CRYPT_COMMON_H__ - -#define INVAL_SUBVERSION_NUMBER (0xff) -#define CRYPT_INVAL_OP (GF_FOP_NULL) - -#define CRYPTO_FORMAT_PREFIX "trusted.glusterfs.crypt.att.cfmt" -#define FSIZE_XATTR_PREFIX "trusted.glusterfs.crypt.att.size" -#define SUBREQ_PREFIX "trusted.glusterfs.crypt.msg.sreq" -#define FSIZE_MSG_PREFIX "trusted.glusterfs.crypt.msg.size" -#define DE_MSG_PREFIX "trusted.glusterfs.crypt.msg.dent" -#define REQUEST_ID_PREFIX "trusted.glusterfs.crypt.msg.rqid" -#define MSGFLAGS_PREFIX "trusted.glusterfs.crypt.msg.xfgs" - -/* messages for crypt_open() */ -#define MSGFLAGS_REQUEST_MTD_RLOCK 1 /* take read lock and don't unlock */ -#define MSGFLAGS_REQUEST_MTD_WLOCK 2 /* take write lock and don't unlock */ - -#define AES_BLOCK_BITS (4) /* AES_BLOCK_SIZE == 1 << AES_BLOCK_BITS */ - -#define noop \ - do { \ - ; \ - } while (0) -#define cassert(cond) \ - ({ \ - switch (-1) { \ - case (cond): \ - case 0: \ - break; \ - } \ - }) -#define __round_mask(x, y) ((__typeof__(x))((y)-1)) -#define round_up(x, y) ((((x)-1) | __round_mask(x, y)) + 1) - -/* - * Format of file's metadata - */ -struct crypt_format { - uint8_t loader_id; /* version of metadata loader */ - uint8_t versioned[0]; /* file's metadata of specific version */ -} __attribute__((packed)); - -typedef enum { AES_CIPHER_ALG, LAST_CIPHER_ALG } cipher_alg_t; - -typedef enum { XTS_CIPHER_MODE, LAST_CIPHER_MODE } cipher_mode_t; - -typedef enum { MTD_LOADER_V1, LAST_MTD_LOADER } mtd_loader_id; - -static inline void -msgflags_set_mtd_rlock(uint32_t *flags) -{ - *flags |= MSGFLAGS_REQUEST_MTD_RLOCK; -} - -static inline void -msgflags_set_mtd_wlock(uint32_t *flags) -{ - *flags |= MSGFLAGS_REQUEST_MTD_WLOCK; -} - -static inline gf_boolean_t -msgflags_check_mtd_rlock(uint32_t *flags) -{ - return *flags & MSGFLAGS_REQUEST_MTD_RLOCK; -} - -static inline gf_boolean_t -msgflags_check_mtd_wlock(uint32_t *flags) -{ - return *flags & MSGFLAGS_REQUEST_MTD_WLOCK; -} - -static inline gf_boolean_t -msgflags_check_mtd_lock(uint32_t *flags) -{ - return msgflags_check_mtd_rlock(flags) || msgflags_check_mtd_wlock(flags); -} - -/* - * returns number of logical blocks occupied - * (maybe partially) by @count bytes - * at offset @start. - */ -static inline off_t -logical_blocks_occupied(uint64_t start, off_t count, int blkbits) -{ - return ((start + count - 1) >> blkbits) - (start >> blkbits) + 1; -} - -/* - * are two bytes (represented by offsets @off1 - * and @off2 respectively) in the same logical - * block. - */ -static inline int -in_same_lblock(uint64_t off1, uint64_t off2, int blkbits) -{ - return off1 >> blkbits == off2 >> blkbits; -} - -static inline void -dump_cblock(xlator_t *this, unsigned char *buf) -{ - gf_log(this->name, GF_LOG_DEBUG, - "dump cblock: %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x", - (buf)[0], (buf)[1], (buf)[2], (buf)[3], (buf)[4], (buf)[5], (buf)[6], - (buf)[7], (buf)[8], (buf)[9], (buf)[10], (buf)[11], (buf)[12], - (buf)[13], (buf)[14], (buf)[15]); -} - -#endif /* __CRYPT_COMMON_H__ */ - -/* - Local variables: - c-indentation-style: "K&R" - mode-name: "LC" - c-basic-offset: 8 - tab-width: 8 - fill-column: 80 - scroll-step: 1 - End: -*/ diff --git a/xlators/encryption/crypt/src/crypt-mem-types.h b/xlators/encryption/crypt/src/crypt-mem-types.h deleted file mode 100644 index e756ea4..0000000 --- a/xlators/encryption/crypt/src/crypt-mem-types.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#ifndef __CRYPT_MEM_TYPES_H__ -#define __CRYPT_MEM_TYPES_H__ - -#include <glusterfs/mem-types.h> - -enum gf_crypt_mem_types_ { - gf_crypt_mt_priv = gf_common_mt_end + 1, - gf_crypt_mt_inode, - gf_crypt_mt_data, - gf_crypt_mt_mtd, - gf_crypt_mt_loc, - gf_crypt_mt_iatt, - gf_crypt_mt_key, - gf_crypt_mt_iovec, - gf_crypt_mt_char, - gf_crypt_mt_local, - gf_crypt_mt_end, -}; - -#endif /* __CRYPT_MEM_TYPES_H__ */ - -/* - Local variables: - c-indentation-style: "K&R" - mode-name: "LC" - c-basic-offset: 8 - tab-width: 8 - fill-column: 80 - scroll-step: 1 - End: -*/ diff --git a/xlators/encryption/crypt/src/crypt.c b/xlators/encryption/crypt/src/crypt.c deleted file mode 100644 index 0225364..0000000 --- a/xlators/encryption/crypt/src/crypt.c +++ /dev/null @@ -1,3906 +0,0 @@ -/* - Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ -#include <ctype.h> -#include <sys/uio.h> - -#include <glusterfs/glusterfs.h> -#include <glusterfs/xlator.h> -#include <glusterfs/logging.h> -#include <glusterfs/defaults.h> - -#include "crypt-common.h" -#include "crypt.h" - -static void -init_inode_info_head(struct crypt_inode_info *info, fd_t *fd); -static int32_t -init_inode_info_tail(struct crypt_inode_info *info, - struct master_cipher_info *master); -static int32_t -prepare_for_submit_hole(call_frame_t *frame, xlator_t *this, uint64_t from, - off_t size); -static int32_t -load_file_size(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata); -static void -do_ordered_submit(call_frame_t *frame, xlator_t *this, atom_data_type dtype); -static void -do_parallel_submit(call_frame_t *frame, xlator_t *this, atom_data_type dtype); -static void -put_one_call_open(call_frame_t *frame); -static void -put_one_call_readv(call_frame_t *frame, xlator_t *this); -static void -put_one_call_writev(call_frame_t *frame, xlator_t *this); -static void -put_one_call_ftruncate(call_frame_t *frame, xlator_t *this); -static void -free_avec(struct iovec *avec, char **pool, int blocks_in_pool); -static void -free_avec_data(crypt_local_t *local); -static void -free_avec_hole(crypt_local_t *local); - -static crypt_local_t * -crypt_alloc_local(call_frame_t *frame, xlator_t *this, glusterfs_fop_t fop) -{ - crypt_local_t *local = NULL; - - local = GF_CALLOC(1, sizeof(crypt_local_t), gf_crypt_mt_local); - if (!local) { - gf_log(this->name, GF_LOG_ERROR, "out of memory"); - return NULL; - } - local->fop = fop; - LOCK_INIT(&local->hole_lock); - LOCK_INIT(&local->call_lock); - LOCK_INIT(&local->rw_count_lock); - - frame->local = local; - return local; -} - -struct crypt_inode_info * -get_crypt_inode_info(inode_t *inode, xlator_t *this) -{ - int ret; - uint64_t value = 0; - struct crypt_inode_info *info; - - ret = inode_ctx_get(inode, this, &value); - if (ret == -1) { - gf_log(this->name, GF_LOG_WARNING, "Can not get inode info"); - return NULL; - } - info = (struct crypt_inode_info *)(long)value; - if (info == NULL) { - gf_log(this->name, GF_LOG_WARNING, "Can not obtain inode info"); - return NULL; - } - return info; -} - -static struct crypt_inode_info * -local_get_inode_info(crypt_local_t *local, xlator_t *this) -{ - if (local->info) - return local->info; - local->info = get_crypt_inode_info(local->fd->inode, this); - return local->info; -} - -static struct crypt_inode_info * -alloc_inode_info(crypt_local_t *local, loc_t *loc) -{ - struct crypt_inode_info *info; - - info = GF_CALLOC(1, sizeof(struct crypt_inode_info), gf_crypt_mt_inode); - if (!info) { - local->op_ret = -1; - local->op_errno = ENOMEM; - gf_log("crypt", GF_LOG_WARNING, "Can not allocate inode info"); - return NULL; - } -#if DEBUG_CRYPT - info->loc = GF_CALLOC(1, sizeof(loc_t), gf_crypt_mt_loc); - if (!info->loc) { - gf_log("crypt", GF_LOG_WARNING, "Can not allocate loc"); - GF_FREE(info); - return NULL; - } - if (loc_copy(info->loc, loc)) { - GF_FREE(info->loc); - GF_FREE(info); - return NULL; - } -#endif /* DEBUG_CRYPT */ - - local->info = info; - return info; -} - -static void -free_inode_info(struct crypt_inode_info *info) -{ -#if DEBUG_CRYPT - loc_wipe(info->loc); - GF_FREE(info->loc); -#endif - memset(info, 0, sizeof(*info)); - GF_FREE(info); -} - -int -crypt_forget(xlator_t *this, inode_t *inode) -{ - uint64_t ctx_addr = 0; - if (!inode_ctx_del(inode, this, &ctx_addr)) - free_inode_info((struct crypt_inode_info *)(long)ctx_addr); - return 0; -} - -#if DEBUG_CRYPT -static void -check_read(call_frame_t *frame, xlator_t *this, int32_t read, struct iovec *vec, - int32_t count, struct iatt *stbuf) -{ - crypt_local_t *local = frame->local; - struct object_cipher_info *object = get_object_cinfo(local->info); - struct avec_config *conf = &local->data_conf; - uint32_t resid = stbuf->ia_size & (object_alg_blksize(object) - 1); - - if (read <= 0) - return; - if (read != iov_length(vec, count)) - gf_log("crypt", GF_LOG_DEBUG, - "op_ret differs from amount of read bytes"); - - if (object_alg_should_pad(object) && - (read & (object_alg_blksize(object) - 1))) - gf_log("crypt", GF_LOG_DEBUG, - "bad amount of read bytes (!= 0 mod(cblock size))"); - - if (conf->aligned_offset + read > - stbuf->ia_size + (resid ? object_alg_blksize(object) - resid : 0)) - gf_log("crypt", GF_LOG_DEBUG, "bad amount of read bytes (too large))"); -} - -#define PT_BYTES_TO_DUMP (32) -static void -dump_plain_text(crypt_local_t *local, struct iovec *avec) -{ - int32_t to_dump; - char str[PT_BYTES_TO_DUMP + 1]; - - if (!avec) - return; - to_dump = avec->iov_len; - if (to_dump > PT_BYTES_TO_DUMP) - to_dump = PT_BYTES_TO_DUMP; - memcpy(str, avec->iov_base, to_dump); - memset(str + to_dump, '0', 1); - gf_log("crypt", GF_LOG_DEBUG, "Read file: %s", str); -} - -static int32_t -data_conf_invariant(struct avec_config *conf) -{ - return conf->acount == !!has_head_block(conf) + !!has_tail_block(conf) + - conf->nr_full_blocks; -} - -static int32_t -hole_conf_invariant(struct avec_config *conf) -{ - return conf->blocks_in_pool == !!has_head_block(conf) + - !!has_tail_block(conf) + - !!has_full_blocks(conf); -} - -static void -crypt_check_conf(struct avec_config *conf) -{ - int32_t ret = 0; - const char *msg; - - switch (conf->type) { - case DATA_ATOM: - msg = "data"; - ret = data_conf_invariant(conf); - break; - case HOLE_ATOM: - msg = "hole"; - ret = hole_conf_invariant(conf); - break; - default: - msg = "unknown"; - } - if (!ret) - gf_log("crypt", GF_LOG_DEBUG, "bad %s conf", msg); -} - -static void -check_buf(call_frame_t *frame, xlator_t *this, struct iatt *buf) -{ - crypt_local_t *local = frame->local; - struct object_cipher_info *object = &local->info->cinfo; - uint64_t local_file_size; - - switch (local->fop) { - case GF_FOP_FTRUNCATE: - return; - case GF_FOP_WRITE: - local_file_size = local->new_file_size; - break; - case GF_FOP_READ: - if (parent_is_crypt_xlator(frame, this)) - return; - local_file_size = local->cur_file_size; - break; - default: - gf_log("crypt", GF_LOG_DEBUG, "bad file operation"); - return; - } - if (buf->ia_size != round_up(local_file_size, object_alg_blksize(object))) - gf_log("crypt", GF_LOG_DEBUG, - "bad ia_size in buf (%llu), should be %llu", - (unsigned long long)buf->ia_size, - (unsigned long long)round_up(local_file_size, - object_alg_blksize(object))); -} - -#else -#define check_read(frame, this, op_ret, vec, count, stbuf) noop -#define dump_plain_text(local, avec) noop -#define crypt_check_conf(conf) noop -#define check_buf(frame, this, buf) noop -#endif /* DEBUG_CRYPT */ - -/* - * Pre-conditions: - * @vec represents a ciphertext of expanded size and - * aligned offset. - * - * Compound a temporal vector @avec with block-aligned - * components, decrypt and fix it up to represent a chunk - * of data corresponding to the original size and offset. - * Pass the result to the next translator. - */ -int32_t -crypt_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iovec *vec, - int32_t count, struct iatt *stbuf, struct iobref *iobref, - dict_t *xdata) -{ - crypt_local_t *local = frame->local; - struct avec_config *conf = &local->data_conf; - struct object_cipher_info *object = &local->info->cinfo; - - struct iovec *avec; - uint32_t i; - uint32_t to_vec; - uint32_t to_user; - - check_buf(frame, this, stbuf); - check_read(frame, this, op_ret, vec, count, stbuf); - - local->op_ret = op_ret; - local->op_errno = op_errno; - local->iobref = iobref_ref(iobref); - - local->buf = *stbuf; - local->buf.ia_size = local->cur_file_size; - - if (op_ret <= 0 || count == 0 || vec[0].iov_len == 0) - goto put_one_call; - - if (conf->orig_offset >= local->cur_file_size) { - local->op_ret = 0; - goto put_one_call; - } - /* - * correct config params with real file size - * and actual amount of bytes read - */ - set_config_offsets(frame, this, conf->orig_offset, op_ret, DATA_ATOM, 0); - - if (conf->orig_offset + conf->orig_size > local->cur_file_size) - conf->orig_size = local->cur_file_size - conf->orig_offset; - /* - * calculate amount of data to be returned - * to user. - */ - to_user = op_ret; - if (conf->aligned_offset + to_user <= conf->orig_offset) { - gf_log(this->name, GF_LOG_WARNING, "Incomplete read"); - local->op_ret = -1; - local->op_errno = EIO; - goto put_one_call; - } - to_user -= (conf->aligned_offset - conf->orig_offset); - - if (to_user > conf->orig_size) - to_user = conf->orig_size; - local->rw_count = to_user; - - op_errno = set_config_avec_data(this, local, conf, object, vec, count); - if (op_errno) { - local->op_ret = -1; - local->op_errno = op_errno; - goto put_one_call; - } - avec = conf->avec; -#if DEBUG_CRYPT - if (conf->off_in_tail != 0 && - conf->off_in_tail < object_alg_blksize(object) && - object_alg_should_pad(object)) - gf_log(this->name, GF_LOG_DEBUG, "Bad offset in tail %d", - conf->off_in_tail); - if (iov_length(vec, count) != 0 && - in_same_lblock(conf->orig_offset + iov_length(vec, count) - 1, - local->cur_file_size - 1, object_alg_blkbits(object))) { - gf_log(this->name, GF_LOG_DEBUG, "Compound last cblock"); - dump_cblock(this, (unsigned char *)(avec[conf->acount - 1].iov_base) + - avec[conf->acount - 1].iov_len - - object_alg_blksize(object)); - dump_cblock(this, (unsigned char *)(vec[count - 1].iov_base) + - vec[count - 1].iov_len - - object_alg_blksize(object)); - } -#endif - decrypt_aligned_iov(object, avec, conf->acount, conf->aligned_offset); - /* - * pass proper plain data to user - */ - avec[0].iov_base += (conf->aligned_offset - conf->orig_offset); - avec[0].iov_len -= (conf->aligned_offset - conf->orig_offset); - - to_vec = to_user; - for (i = 0; i < conf->acount; i++) { - if (avec[i].iov_len > to_vec) - avec[i].iov_len = to_vec; - to_vec -= avec[i].iov_len; - } -put_one_call: - put_one_call_readv(frame, this); - return 0; -} - -static int32_t -do_readv(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, dict_t *dict, dict_t *xdata) -{ - data_t *data; - crypt_local_t *local = frame->local; - - if (op_ret < 0) - goto error; - /* - * extract regular file size - */ - data = dict_get(dict, FSIZE_XATTR_PREFIX); - if (!data) { - gf_log("crypt", GF_LOG_WARNING, "Regular file size not found"); - op_errno = EIO; - goto error; - } - local->cur_file_size = data_to_uint64(data); - - get_one_call(frame); - STACK_WIND(frame, crypt_readv_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readv, local->fd, - /* - * FIXME: read amount can be reduced - */ - local->data_conf.expanded_size, local->data_conf.aligned_offset, - local->flags, local->xdata); - return 0; -error: - local->op_ret = -1; - local->op_errno = op_errno; - - get_one_call(frame); - put_one_call_readv(frame, this); - return 0; -} - -static int32_t -crypt_readv_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - crypt_local_t *local = frame->local; - - if (op_ret < 0) - goto error; - /* - * An access has been granted, - * retrieve file size - */ - STACK_WIND(frame, do_readv, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fgetxattr, local->fd, - FSIZE_XATTR_PREFIX, NULL); - return 0; -error: - fd_unref(local->fd); - if (local->xdata) - dict_unref(local->xdata); - CRYPT_STACK_UNWIND(readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL); - return 0; -} - -static int32_t -readv_trivial_completion(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, - dict_t *xdata) -{ - crypt_local_t *local = frame->local; - - local->op_ret = op_ret; - local->op_errno = op_errno; - - if (op_ret < 0) { - gf_log(this->name, GF_LOG_WARNING, "stat failed (%d)", op_errno); - goto error; - } - local->buf = *buf; - STACK_WIND(frame, load_file_size, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->getxattr, local->loc, - FSIZE_XATTR_PREFIX, NULL); - return 0; -error: - CRYPT_STACK_UNWIND(readv, frame, op_ret, op_errno, NULL, 0, NULL, NULL, - NULL); - return 0; -} - -int32_t -crypt_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t offset, uint32_t flags, dict_t *xdata) -{ - int32_t ret; - crypt_local_t *local; - struct crypt_inode_info *info; - struct gf_flock lock = { - 0, - }; - -#if DEBUG_CRYPT - gf_log("crypt", GF_LOG_DEBUG, "reading %d bytes from offset %llu", - (int)size, (long long)offset); - if (parent_is_crypt_xlator(frame, this)) - gf_log("crypt", GF_LOG_DEBUG, "parent is crypt"); -#endif - local = crypt_alloc_local(frame, this, GF_FOP_READ); - if (!local) { - ret = ENOMEM; - goto error; - } - if (size == 0) - goto trivial; - - local->fd = fd_ref(fd); - local->flags = flags; - - info = local_get_inode_info(local, this); - if (info == NULL) { - ret = EINVAL; - fd_unref(fd); - goto error; - } - if (!object_alg_atomic(&info->cinfo)) { - ret = EINVAL; - fd_unref(fd); - goto error; - } - set_config_offsets(frame, this, offset, size, DATA_ATOM, 0); - if (parent_is_crypt_xlator(frame, this)) { - data_t *data; - /* - * We are called by crypt_writev (or cypt_ftruncate) - * to perform the "read" component of the read-modify-write - * (or read-prune-write) sequence for some atom; - * - * don't ask for access: - * it has already been acquired - * - * Retrieve current file size - */ - if (!xdata) { - gf_log("crypt", GF_LOG_WARNING, - "Regular file size hasn't been passed"); - ret = EIO; - goto error; - } - data = dict_get(xdata, FSIZE_XATTR_PREFIX); - if (!data) { - gf_log("crypt", GF_LOG_WARNING, "Regular file size not found"); - ret = EIO; - goto error; - } - local->old_file_size = local->cur_file_size = data_to_uint64(data); - - get_one_call(frame); - STACK_WIND(frame, crypt_readv_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readv, local->fd, - /* - * FIXME: read amount can be reduced - */ - local->data_conf.expanded_size, - local->data_conf.aligned_offset, flags, NULL); - return 0; - } - if (xdata) - local->xdata = dict_ref(xdata); - - lock.l_len = 0; - lock.l_start = 0; - lock.l_type = F_RDLCK; - lock.l_whence = SEEK_SET; - - STACK_WIND(frame, crypt_readv_finodelk_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->finodelk, this->name, fd, F_SETLKW, - &lock, NULL); - return 0; -trivial: - STACK_WIND(frame, readv_trivial_completion, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fstat, fd, NULL); - return 0; -error: - CRYPT_STACK_UNWIND(readv, frame, -1, ret, NULL, 0, NULL, NULL, NULL); - return 0; -} - -void -set_local_io_params_writev(call_frame_t *frame, - struct object_cipher_info *object, - struct rmw_atom *atom, off_t io_offset, - uint32_t io_size) -{ - crypt_local_t *local = frame->local; - - local->io_offset = io_offset; - local->io_size = io_size; - - local->io_offset_nopad = atom->offset_at(frame, object) + - atom->offset_in(frame, object); - - gf_log("crypt", GF_LOG_DEBUG, "set nopad offset to %llu", - (unsigned long long)local->io_offset_nopad); - - local->io_size_nopad = atom->io_size_nopad(frame, object); - - gf_log("crypt", GF_LOG_DEBUG, "set nopad size to %llu", - (unsigned long long)local->io_size_nopad); - - local->update_disk_file_size = 0; - /* - * NOTE: eof_padding_size is 0 for all full atoms; - * For head and tail atoms it will be set up at rmw_partial block() - */ - local->new_file_size = local->cur_file_size; - - if (local->io_offset_nopad + local->io_size_nopad > local->cur_file_size) { - local->new_file_size = local->io_offset_nopad + local->io_size_nopad; - - gf_log("crypt", GF_LOG_DEBUG, "set new file size to %llu", - (unsigned long long)local->new_file_size); - - local->update_disk_file_size = 1; - } -} - -void -set_local_io_params_ftruncate(call_frame_t *frame, - struct object_cipher_info *object) -{ - uint32_t resid; - crypt_local_t *local = frame->local; - struct avec_config *conf = &local->data_conf; - - resid = conf->orig_offset & (object_alg_blksize(object) - 1); - if (resid) { - local->eof_padding_size = object_alg_blksize(object) - resid; - local->new_file_size = conf->aligned_offset; - local->update_disk_file_size = 0; - /* - * file size will be updated - * in the ->writev() stack, - * when submitting file tail - */ - } else { - local->eof_padding_size = 0; - local->new_file_size = conf->orig_offset; - local->update_disk_file_size = 1; - /* - * file size will be updated - * in this ->ftruncate stack - */ - } -} - -static void -submit_head(call_frame_t *frame, xlator_t *this) -{ - crypt_local_t *local = frame->local; - submit_partial(frame, this, local->fd, HEAD_ATOM); -} - -static void -submit_tail(call_frame_t *frame, xlator_t *this) -{ - crypt_local_t *local = frame->local; - submit_partial(frame, this, local->fd, TAIL_ATOM); -} - -static void -submit_hole(call_frame_t *frame, xlator_t *this) -{ - /* - * hole conversion always means - * appended write and goes in ordered fashion - */ - do_ordered_submit(frame, this, HOLE_ATOM); -} - -static void -submit_data(call_frame_t *frame, xlator_t *this) -{ - if (is_ordered_mode(frame)) { - do_ordered_submit(frame, this, DATA_ATOM); - return; - } - gf_log("crypt", GF_LOG_WARNING, "Bad submit mode"); - get_nr_calls(frame, nr_calls_data(frame)); - do_parallel_submit(frame, this, DATA_ATOM); - return; -} - -/* - * heplers called by writev_cbk, fruncate_cbk in ordered mode - */ - -static int32_t -should_submit_hole(crypt_local_t *local) -{ - struct avec_config *conf = &local->hole_conf; - - return conf->avec != NULL; -} - -static int32_t -should_resume_submit_hole(crypt_local_t *local) -{ - struct avec_config *conf = &local->hole_conf; - - if (local->fop == GF_FOP_WRITE && has_tail_block(conf)) - /* - * Don't submit a part of hole, which - * fits into a data block: - * this part of hole will be converted - * as a gap filled by zeros in data head - * block. - */ - return conf->cursor < conf->acount - 1; - else - return conf->cursor < conf->acount; -} - -static int32_t -should_resume_submit_data(call_frame_t *frame) -{ - crypt_local_t *local = frame->local; - struct avec_config *conf = &local->data_conf; - - if (is_ordered_mode(frame)) - return conf->cursor < conf->acount; - /* - * parallel writes - */ - return 0; -} - -static int32_t -should_submit_data_after_hole(crypt_local_t *local) -{ - return local->data_conf.avec != NULL; -} - -static void -update_local_file_params(call_frame_t *frame, xlator_t *this, - struct iatt *prebuf, struct iatt *postbuf) -{ - crypt_local_t *local = frame->local; - - check_buf(frame, this, postbuf); - - local->prebuf = *prebuf; - local->postbuf = *postbuf; - - local->prebuf.ia_size = local->cur_file_size; - local->postbuf.ia_size = local->new_file_size; - - local->cur_file_size = local->new_file_size; -} - -static int32_t -end_writeback_writev(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) -{ - crypt_local_t *local = frame->local; - - local->op_ret = op_ret; - local->op_errno = op_errno; - - if (op_ret <= 0) { - gf_log(this->name, GF_LOG_WARNING, "writev iteration failed"); - goto put_one_call; - } - /* - * op_ret includes paddings (atom's head, atom's tail and EOF) - */ - if (op_ret < local->io_size) { - gf_log(this->name, GF_LOG_WARNING, "Incomplete writev iteration"); - goto put_one_call; - } - op_ret -= local->eof_padding_size; - local->op_ret = op_ret; - - update_local_file_params(frame, this, prebuf, postbuf); - - if (data_write_in_progress(local)) { - LOCK(&local->rw_count_lock); - local->rw_count += op_ret; - UNLOCK(&local->rw_count_lock); - - if (should_resume_submit_data(frame)) - submit_data(frame, this); - } else { - /* - * hole conversion is going on; - * don't take into account written zeros - */ - if (should_resume_submit_hole(local)) - submit_hole(frame, this); - - else if (should_submit_data_after_hole(local)) - submit_data(frame, this); - } -put_one_call: - put_one_call_writev(frame, this); - return 0; -} - -#define crypt_writev_cbk end_writeback_writev - -#define HOLE_WRITE_CHUNK_BITS 12 -#define HOLE_WRITE_CHUNK_SIZE (1 << HOLE_WRITE_CHUNK_BITS) - -/* - * Convert hole of size @size at offset @off to - * zeros and prepare respective iovecs for submit. - * The hole lock should be held. - * - * Pre-conditions: - * @local->file_size is set and valid. - */ -int32_t -prepare_for_submit_hole(call_frame_t *frame, xlator_t *this, uint64_t off, - off_t size) -{ - int32_t ret; - crypt_local_t *local = frame->local; - struct object_cipher_info *object = &local->info->cinfo; - - set_config_offsets(frame, this, off, size, HOLE_ATOM, 1); - - ret = set_config_avec_hole(this, local, &local->hole_conf, object, - local->fop); - crypt_check_conf(&local->hole_conf); - - return ret; -} - -/* - * prepare for submit @count bytes at offset @from - */ -int32_t -prepare_for_submit_data(call_frame_t *frame, xlator_t *this, off_t from, - int32_t size, struct iovec *vec, int32_t vec_count, - int32_t setup_gap) -{ - uint32_t ret; - crypt_local_t *local = frame->local; - struct object_cipher_info *object = &local->info->cinfo; - - set_config_offsets(frame, this, from, size, DATA_ATOM, setup_gap); - - ret = set_config_avec_data(this, local, &local->data_conf, object, vec, - vec_count); - crypt_check_conf(&local->data_conf); - - return ret; -} - -static void -free_avec(struct iovec *avec, char **pool, int blocks_in_pool) -{ - if (!avec) - return; - GF_FREE(pool); - GF_FREE(avec); -} - -static void -free_avec_data(crypt_local_t *local) -{ - return free_avec(local->data_conf.avec, local->data_conf.pool, - local->data_conf.blocks_in_pool); -} - -static void -free_avec_hole(crypt_local_t *local) -{ - return free_avec(local->hole_conf.avec, local->hole_conf.pool, - local->hole_conf.blocks_in_pool); -} - -static void -do_parallel_submit(call_frame_t *frame, xlator_t *this, atom_data_type dtype) -{ - crypt_local_t *local = frame->local; - struct avec_config *conf; - - local->active_setup = dtype; - conf = conf_by_type(frame, dtype); - - if (has_head_block(conf)) - submit_head(frame, this); - - if (has_full_blocks(conf)) - submit_full(frame, this); - - if (has_tail_block(conf)) - submit_tail(frame, this); - return; -} - -static void -do_ordered_submit(call_frame_t *frame, xlator_t *this, atom_data_type dtype) -{ - crypt_local_t *local = frame->local; - struct avec_config *conf; - - local->active_setup = dtype; - conf = conf_by_type(frame, dtype); - - if (should_submit_head_block(conf)) { - get_one_call_nolock(frame); - submit_head(frame, this); - } else if (should_submit_full_block(conf)) { - get_one_call_nolock(frame); - submit_full(frame, this); - } else if (should_submit_tail_block(conf)) { - get_one_call_nolock(frame); - submit_tail(frame, this); - } else - gf_log("crypt", GF_LOG_DEBUG, - "nothing has been submitted in ordered mode"); - return; -} - -static int32_t -do_writev(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, dict_t *dict, dict_t *xdata) -{ - data_t *data; - crypt_local_t *local = frame->local; - struct object_cipher_info *object = &local->info->cinfo; - /* - * extract regular file size - */ - data = dict_get(dict, FSIZE_XATTR_PREFIX); - if (!data) { - gf_log("crypt", GF_LOG_WARNING, "Regular file size not found"); - op_ret = -1; - op_errno = EIO; - goto error; - } - local->old_file_size = local->cur_file_size = data_to_uint64(data); - - set_gap_at_end(frame, object, &local->data_conf, DATA_ATOM); - - if (local->cur_file_size < local->data_conf.orig_offset) { - /* - * Set up hole config - */ - op_errno = prepare_for_submit_hole( - frame, this, local->cur_file_size, - local->data_conf.orig_offset - local->cur_file_size); - if (op_errno) { - local->op_ret = -1; - local->op_errno = op_errno; - goto error; - } - } - if (should_submit_hole(local)) - submit_hole(frame, this); - else - submit_data(frame, this); - return 0; -error: - get_one_call_nolock(frame); - put_one_call_writev(frame, this); - return 0; -} - -static int32_t -crypt_writev_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - crypt_local_t *local = frame->local; - - local->op_ret = op_ret; - local->op_errno = op_errno; - - if (op_ret < 0) - goto error; - /* - * An access has been granted, - * retrieve file size first - */ - STACK_WIND(frame, do_writev, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fgetxattr, local->fd, - FSIZE_XATTR_PREFIX, NULL); - return 0; -error: - get_one_call_nolock(frame); - put_one_call_writev(frame, this); - return 0; -} - -static int32_t -writev_trivial_completion(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, - dict_t *dict) -{ - crypt_local_t *local = frame->local; - - local->op_ret = op_ret; - local->op_errno = op_errno; - local->prebuf = *buf; - local->postbuf = *buf; - - local->prebuf.ia_size = local->cur_file_size; - local->postbuf.ia_size = local->cur_file_size; - - get_one_call(frame); - put_one_call_writev(frame, this); - return 0; -} - -int -crypt_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vec, - int32_t count, off_t offset, uint32_t flags, struct iobref *iobref, - dict_t *xdata) -{ - int32_t ret; - crypt_local_t *local; - struct crypt_inode_info *info; - struct gf_flock lock = { - 0, - }; -#if DEBUG_CRYPT - gf_log("crypt", GF_LOG_DEBUG, "writing %d bytes from offset %llu", - (int)iov_length(vec, count), (long long)offset); -#endif - local = crypt_alloc_local(frame, this, GF_FOP_WRITE); - if (!local) { - ret = ENOMEM; - goto error; - } - local->fd = fd_ref(fd); - - if (iobref) - local->iobref = iobref_ref(iobref); - /* - * to update real file size on the server - */ - local->xattr = dict_new(); - if (!local->xattr) { - ret = ENOMEM; - goto error; - } - local->flags = flags; - - info = local_get_inode_info(local, this); - if (info == NULL) { - ret = EINVAL; - goto error; - } - if (!object_alg_atomic(&info->cinfo)) { - ret = EINVAL; - goto error; - } - if (iov_length(vec, count) == 0) - goto trivial; - - ret = prepare_for_submit_data(frame, this, offset, - iov_length(vec, count), - vec, count, 0 /* don't setup gup - in tail: we don't - know file size yet */); - if (ret) { - ret = ENOMEM; - goto error; - } - - if (parent_is_crypt_xlator(frame, this)) { - data_t *data; - /* - * we are called by shinking crypt_ftruncate(), - * which doesn't perform hole conversion; - * - * don't ask for access: - * it has already been acquired - */ - - /* - * extract file size - */ - if (!xdata) { - gf_log("crypt", GF_LOG_WARNING, - "Regular file size hasn't been passed"); - ret = EIO; - goto error; - } - data = dict_get(xdata, FSIZE_XATTR_PREFIX); - if (!data) { - gf_log("crypt", GF_LOG_WARNING, "Regular file size not found"); - ret = EIO; - goto error; - } - local->old_file_size = local->cur_file_size = data_to_uint64(data); - - submit_data(frame, this); - return 0; - } - if (xdata) - local->xdata = dict_ref(xdata); - /* - * lock the file and retrieve its size - */ - lock.l_len = 0; - lock.l_start = 0; - lock.l_type = F_WRLCK; - lock.l_whence = SEEK_SET; - - STACK_WIND(frame, crypt_writev_finodelk_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->finodelk, this->name, fd, F_SETLKW, - &lock, NULL); - return 0; -trivial: - STACK_WIND(frame, writev_trivial_completion, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fstat, fd, NULL); - return 0; -error: - if (local && local->fd) - fd_unref(fd); - if (local && local->iobref) - iobref_unref(iobref); - if (local && local->xdata) - dict_unref(xdata); - if (local && local->xattr) - dict_unref(local->xattr); - if (local && local->info) - free_inode_info(local->info); - - CRYPT_STACK_UNWIND(writev, frame, -1, ret, NULL, NULL, NULL); - return 0; -} - -int32_t -prepare_for_prune(call_frame_t *frame, xlator_t *this, uint64_t offset) -{ - set_config_offsets(frame, this, - offset, - 0, /* count */ - DATA_ATOM, - 0 /* since we prune, there is no - gap in tail to uptodate */); - return 0; -} - -/* - * Finish the read-prune-modify sequence - * - * Can be invoked as - * 1) ->ftruncate_cbk() for cblock-aligned, or trivial prune - * 2) ->writev_cbk() for non-cblock-aligned prune - */ - -static int32_t -prune_complete(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) -{ - crypt_local_t *local = frame->local; - - local->op_ret = op_ret; - local->op_errno = op_errno; - - update_local_file_params(frame, this, prebuf, postbuf); - - put_one_call_ftruncate(frame, this); - return 0; -} - -/* - * This is called as ->ftruncate_cbk() - * - * Perform the "write" component of the - * read-prune-write sequence. - * - * submuit the rest of the file - */ -static int32_t -prune_submit_file_tail(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) -{ - crypt_local_t *local = frame->local; - struct avec_config *conf = &local->data_conf; - dict_t *dict; - - if (op_ret < 0) - goto put_one_call; - - if (local->xdata) { - dict_unref(local->xdata); - local->xdata = NULL; - } - if (xdata) - local->xdata = dict_ref(xdata); - - dict = dict_new(); - if (!dict) { - op_errno = ENOMEM; - goto error; - } - - update_local_file_params(frame, this, prebuf, postbuf); - local->new_file_size = conf->orig_offset; - - /* - * The rest of the file is a partial block and, hence, - * should be written via RMW sequence, so the crypt xlator - * does STACK_WIND to itself. - * - * Pass current file size to crypt_writev() - */ - op_errno = dict_set(dict, FSIZE_XATTR_PREFIX, - data_from_uint64(local->cur_file_size)); - if (op_errno) { - gf_log("crypt", GF_LOG_WARNING, "can not set key to update file size"); - dict_unref(dict); - goto error; - } - gf_log("crypt", GF_LOG_DEBUG, - "passing current file size (%llu) to crypt_writev", - (unsigned long long)local->cur_file_size); - /* - * Padding will be filled with - * zeros by rmw_partial_block() - */ - STACK_WIND(frame, prune_complete, this, - this->fops->writev, /* crypt_writev */ - local->fd, &local->vec, 1, - conf->aligned_offset, /* offset to write from */ - 0, local->iobref, dict); - - dict_unref(dict); - return 0; -error: - local->op_ret = -1; - local->op_errno = op_errno; -put_one_call: - put_one_call_ftruncate(frame, this); - return 0; -} - -/* - * This is called as a callback of ->writev() invoked in behalf - * of ftruncate(): it can be - * 1) ordered writes issued by hole conversion in the case of - * expanded truncate, or - * 2) an rmw partial data block issued by non-cblock-aligned - * prune. - */ -int32_t -end_writeback_ftruncate(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) -{ - crypt_local_t *local = frame->local; - /* - * if nothing has been written, - * then it must be an error - */ - local->op_ret = op_ret; - local->op_errno = op_errno; - - if (op_ret < 0) - goto put_one_call; - - update_local_file_params(frame, this, prebuf, postbuf); - - if (data_write_in_progress(local)) - /* case (2) */ - goto put_one_call; - /* case (1) */ - if (should_resume_submit_hole(local)) - submit_hole(frame, this); - /* - * case of hole, when we shouldn't resume - */ -put_one_call: - put_one_call_ftruncate(frame, this); - return 0; -} - -/* - * Perform prune and write components of the - * read-prune-write sequence. - * - * Called as ->readv_cbk() - * - * Pre-conditions: - * @vec contains the latest atom of the file - * (plain text) - */ -static int32_t -prune_write(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, struct iovec *vec, int32_t count, - struct iatt *stbuf, struct iobref *iobref, dict_t *xdata) -{ - int32_t i; - size_t to_copy; - size_t copied = 0; - crypt_local_t *local = frame->local; - struct avec_config *conf = &local->data_conf; - - local->op_ret = op_ret; - local->op_errno = op_errno; - if (op_ret == -1) - goto put_one_call; - - /* - * At first, uptodate head block - */ - if (iov_length(vec, count) < conf->off_in_head) { - gf_log(this->name, GF_LOG_WARNING, - "Failed to uptodate head block for prune"); - local->op_ret = -1; - local->op_errno = EIO; - goto put_one_call; - } - local->vec.iov_len = conf->off_in_head; - local->vec.iov_base = GF_CALLOC(1, local->vec.iov_len, gf_crypt_mt_data); - - if (local->vec.iov_base == NULL) { - gf_log(this->name, GF_LOG_WARNING, - "Failed to calloc head block for prune"); - local->op_ret = -1; - local->op_errno = ENOMEM; - goto put_one_call; - } - for (i = 0; i < count; i++) { - to_copy = vec[i].iov_len; - if (to_copy > local->vec.iov_len - copied) - to_copy = local->vec.iov_len - copied; - - memcpy((char *)local->vec.iov_base + copied, vec[i].iov_base, to_copy); - copied += to_copy; - if (copied == local->vec.iov_len) - break; - } - /* - * perform prune with aligned offset - * (i.e. at this step we prune a bit - * more then it is needed - */ - STACK_WIND(frame, prune_submit_file_tail, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->ftruncate, local->fd, - conf->aligned_offset, local->xdata); - return 0; -put_one_call: - put_one_call_ftruncate(frame, this); - return 0; -} - -/* - * Perform a read-prune-write sequence - */ -int32_t -read_prune_write(call_frame_t *frame, xlator_t *this) -{ - int32_t ret = 0; - dict_t *dict = NULL; - crypt_local_t *local = frame->local; - struct avec_config *conf = &local->data_conf; - struct object_cipher_info *object = &local->info->cinfo; - - set_local_io_params_ftruncate(frame, object); - get_one_call_nolock(frame); - - if ((conf->orig_offset & (object_alg_blksize(object) - 1)) == 0) { - /* - * cblock-aligned prune: - * we don't need read and write components, - * just cut file body - */ - gf_log("crypt", GF_LOG_DEBUG, "prune without RMW (at offset %llu", - (unsigned long long)conf->orig_offset); - - STACK_WIND(frame, prune_complete, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->ftruncate, local->fd, - conf->orig_offset, local->xdata); - return 0; - } - gf_log("crypt", GF_LOG_DEBUG, "prune with RMW (at offset %llu", - (unsigned long long)conf->orig_offset); - /* - * We are about to perform the "read" component of the - * read-prune-write sequence. It means that we need to - * read encrypted data from disk and decrypt it. - * So, the crypt translator does STACK_WIND to itself. - * - * Pass current file size to crypt_readv() - - */ - dict = dict_new(); - if (!dict) { - gf_log("crypt", GF_LOG_WARNING, "Can not alloc dict"); - ret = ENOMEM; - goto exit; - } - ret = dict_set(dict, FSIZE_XATTR_PREFIX, - data_from_uint64(local->cur_file_size)); - if (ret) { - gf_log("crypt", GF_LOG_WARNING, "Can not set dict"); - goto exit; - } - STACK_WIND(frame, prune_write, this, this->fops->readv, /* crypt_readv */ - local->fd, get_atom_size(object), /* bytes to read */ - conf->aligned_offset, /* offset to read from */ - 0, dict); -exit: - if (dict) - dict_unref(dict); - return ret; -} - -/* - * File prune is more complicated than expand. - * First we need to read the latest atom to not lose info - * needed for proper update. Also we need to make sure that - * every component of read-prune-write sequence leaves data - * consistent - * - * Non-cblock aligned prune is performed as read-prune-write - * sequence: - * - * 1) read the latest atom; - * 2) perform cblock-aligned prune - * 3) issue a write request for the end-of-file - */ -int32_t -prune_file(call_frame_t *frame, xlator_t *this, uint64_t offset) -{ - int32_t ret; - - ret = prepare_for_prune(frame, this, offset); - if (ret) - return ret; - return read_prune_write(frame, this); -} - -int32_t -expand_file(call_frame_t *frame, xlator_t *this, uint64_t offset) -{ - int32_t ret; - crypt_local_t *local = frame->local; - - ret = prepare_for_submit_hole(frame, this, local->old_file_size, - offset - local->old_file_size); - if (ret) - return ret; - submit_hole(frame, this); - return 0; -} - -static int32_t -ftruncate_trivial_completion(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, - dict_t *dict) -{ - crypt_local_t *local = frame->local; - - local->op_ret = op_ret; - local->op_errno = op_errno; - local->prebuf = *buf; - local->postbuf = *buf; - - local->prebuf.ia_size = local->cur_file_size; - local->postbuf.ia_size = local->cur_file_size; - - get_one_call(frame); - put_one_call_ftruncate(frame, this); - return 0; -} - -static int32_t -do_ftruncate(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, dict_t *dict, dict_t *xdata) -{ - data_t *data; - crypt_local_t *local = frame->local; - - if (op_ret) - goto error; - /* - * extract regular file size - */ - data = dict_get(dict, FSIZE_XATTR_PREFIX); - if (!data) { - gf_log("crypt", GF_LOG_WARNING, "Regular file size not found"); - op_errno = EIO; - goto error; - } - local->old_file_size = local->cur_file_size = data_to_uint64(data); - - if (local->data_conf.orig_offset == local->cur_file_size) { -#if DEBUG_CRYPT - gf_log("crypt", GF_LOG_DEBUG, - "trivial ftruncate (current file size %llu)", - (unsigned long long)local->cur_file_size); -#endif - goto trivial; - } else if (local->data_conf.orig_offset < local->cur_file_size) { -#if DEBUG_CRYPT - gf_log("crypt", GF_LOG_DEBUG, "prune from %llu to %llu", - (unsigned long long)local->cur_file_size, - (unsigned long long)local->data_conf.orig_offset); -#endif - op_errno = prune_file(frame, this, local->data_conf.orig_offset); - } else { -#if DEBUG_CRYPT - gf_log("crypt", GF_LOG_DEBUG, "expand from %llu to %llu", - (unsigned long long)local->cur_file_size, - (unsigned long long)local->data_conf.orig_offset); -#endif - op_errno = expand_file(frame, this, local->data_conf.orig_offset); - } - if (op_errno) - goto error; - return 0; -trivial: - STACK_WIND(frame, ftruncate_trivial_completion, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fstat, local->fd, NULL); - return 0; -error: - /* - * finish with ftruncate - */ - local->op_ret = -1; - local->op_errno = op_errno; - - get_one_call_nolock(frame); - put_one_call_ftruncate(frame, this); - return 0; -} - -static int32_t -crypt_ftruncate_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - crypt_local_t *local = frame->local; - - local->op_ret = op_ret; - local->op_errno = op_errno; - - if (op_ret < 0) - goto error; - /* - * An access has been granted, - * retrieve file size first - */ - STACK_WIND(frame, do_ftruncate, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fgetxattr, local->fd, - FSIZE_XATTR_PREFIX, NULL); - return 0; -error: - get_one_call_nolock(frame); - put_one_call_ftruncate(frame, this); - return 0; -} - -/* - * ftruncate is performed in 2 steps: - * . receive file size; - * . expand or prune file. - */ -static int32_t -crypt_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, - dict_t *xdata) -{ - int32_t ret; - crypt_local_t *local; - struct crypt_inode_info *info; - struct gf_flock lock = { - 0, - }; - - local = crypt_alloc_local(frame, this, GF_FOP_FTRUNCATE); - if (!local) { - ret = ENOMEM; - goto error; - } - local->xattr = dict_new(); - if (!local->xattr) { - ret = ENOMEM; - goto error; - } - local->fd = fd_ref(fd); - info = local_get_inode_info(local, this); - if (info == NULL) { - ret = EINVAL; - goto error; - } - if (!object_alg_atomic(&info->cinfo)) { - ret = EINVAL; - goto error; - } - local->data_conf.orig_offset = offset; - if (xdata) - local->xdata = dict_ref(xdata); - - lock.l_len = 0; - lock.l_start = 0; - lock.l_type = F_WRLCK; - lock.l_whence = SEEK_SET; - - STACK_WIND(frame, crypt_ftruncate_finodelk_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->finodelk, this->name, fd, F_SETLKW, - &lock, NULL); - return 0; -error: - if (local && local->fd) - fd_unref(fd); - if (local && local->xdata) - dict_unref(xdata); - if (local && local->xattr) - dict_unref(local->xattr); - if (local && local->info) - free_inode_info(local->info); - - CRYPT_STACK_UNWIND(ftruncate, frame, -1, ret, NULL, NULL, NULL); - return 0; -} - -/* ->flush_cbk() */ -int32_t -truncate_end(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, dict_t *xdata) -{ - crypt_local_t *local = frame->local; - - CRYPT_STACK_UNWIND(truncate, frame, op_ret, op_errno, &local->prebuf, - &local->postbuf, local->xdata); - return 0; -} - -/* ftruncate_cbk() */ -int32_t -truncate_flush(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) -{ - crypt_local_t *local = frame->local; - fd_t *fd = local->fd; - local->prebuf = *prebuf; - local->postbuf = *postbuf; - - STACK_WIND(frame, truncate_end, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->flush, fd, NULL); - fd_unref(fd); - return 0; -} - -/* - * is called as ->open_cbk() - */ -static int32_t -truncate_begin(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) -{ - crypt_local_t *local = frame->local; - - if (op_ret < 0) { - fd_unref(fd); - CRYPT_STACK_UNWIND(truncate, frame, op_ret, op_errno, NULL, NULL, NULL); - return 0; - } else { - fd_bind(fd); - } - /* - * crypt_truncate() is implemented via crypt_ftruncate(), - * so the crypt xlator does STACK_WIND to itself here - */ - STACK_WIND(frame, truncate_flush, this, - this->fops->ftruncate, /* crypt_ftruncate */ - fd, local->offset, NULL); - return 0; -} - -/* - * crypt_truncate() is implemented via crypt_ftruncate() as a - * sequence crypt_open() - crypt_ftruncate() - truncate_flush() - */ -int32_t -crypt_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, - dict_t *xdata) -{ - fd_t *fd; - crypt_local_t *local; - -#if DEBUG_CRYPT - gf_log(this->name, GF_LOG_DEBUG, "truncate file %s at offset %llu", - loc->path, (unsigned long long)offset); -#endif - local = crypt_alloc_local(frame, this, GF_FOP_TRUNCATE); - if (!local) - goto error; - - fd = fd_create(loc->inode, frame->root->pid); - if (!fd) { - gf_log(this->name, GF_LOG_ERROR, "Can not create fd"); - goto error; - } - local->fd = fd; - local->offset = offset; - local->xdata = xdata; - STACK_WIND(frame, truncate_begin, this, this->fops->open, /* crypt_open() */ - loc, O_RDWR, fd, NULL); - return 0; -error: - CRYPT_STACK_UNWIND(truncate, frame, -1, EINVAL, NULL, NULL, NULL); - return 0; -} - -end_writeback_handler_t -dispatch_end_writeback(glusterfs_fop_t fop) -{ - switch (fop) { - case GF_FOP_WRITE: - return end_writeback_writev; - case GF_FOP_FTRUNCATE: - return end_writeback_ftruncate; - default: - gf_log("crypt", GF_LOG_WARNING, "Bad wb operation %d", fop); - return NULL; - } -} - -/* - * true, if the caller needs metadata string - */ -static int32_t -is_custom_mtd(dict_t *xdata) -{ - data_t *data; - uint32_t flags; - - if (!xdata) - return 0; - - data = dict_get(xdata, MSGFLAGS_PREFIX); - if (!data) - return 0; - if (data->len != sizeof(uint32_t)) { - gf_log("crypt", GF_LOG_WARNING, "Bad msgflags size (%d)", data->len); - return -1; - } - flags = *((uint32_t *)data->data); - return msgflags_check_mtd_lock(&flags); -} - -static int32_t -crypt_open_done(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - crypt_local_t *local = frame->local; - - local->op_ret = op_ret; - local->op_errno = op_errno; - if (op_ret < 0) - gf_log(this->name, GF_LOG_WARNING, "mtd unlock failed (%d)", op_errno); - put_one_call_open(frame); - return 0; -} - -static void -crypt_open_tail(call_frame_t *frame, xlator_t *this) -{ - struct gf_flock lock = { - 0, - }; - crypt_local_t *local = frame->local; - - lock.l_type = F_UNLCK; - lock.l_whence = SEEK_SET; - lock.l_start = 0; - lock.l_len = 0; - lock.l_pid = 0; - - STACK_WIND(frame, crypt_open_done, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->finodelk, this->name, local->fd, - F_SETLKW, &lock, NULL); -} - -/* - * load private inode info at open time - * called as ->fgetxattr_cbk() - */ -static int -load_mtd_open(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, dict_t *dict, dict_t *xdata) -{ - int32_t ret; - gf_boolean_t upload_info; - data_t *mtd; - uint64_t value = 0; - struct crypt_inode_info *info; - crypt_local_t *local = frame->local; - crypt_private_t *priv = this->private; - - local->op_ret = op_ret; - local->op_errno = op_errno; - - if (local->fd->inode->ia_type == IA_IFLNK) - goto exit; - if (op_ret < 0) - goto exit; - /* - * first, check for cached info - */ - ret = inode_ctx_get(local->fd->inode, this, &value); - if (ret != -1) { - info = (struct crypt_inode_info *)(long)value; - if (info == NULL) { - gf_log(this->name, GF_LOG_WARNING, - "Inode info expected, but not found"); - local->op_ret = -1; - local->op_errno = EIO; - goto exit; - } - /* - * info has been found in the cache - */ - upload_info = _gf_false; - } else { - /* - * info hasn't been found in the cache. - */ - info = alloc_inode_info(local, local->loc); - if (!info) { - local->op_ret = -1; - local->op_errno = ENOMEM; - goto exit; - } - init_inode_info_head(info, local->fd); - upload_info = _gf_true; - } - /* - * extract metadata - */ - mtd = dict_get(dict, CRYPTO_FORMAT_PREFIX); - if (!mtd) { - local->op_ret = -1; - local->op_errno = ENOENT; - gf_log(this->name, GF_LOG_WARNING, "Format string wasn't found"); - goto exit; - } - /* - * authenticate metadata against the path - */ - ret = open_format((unsigned char *)mtd->data, mtd->len, local->loc, info, - get_master_cinfo(priv), local, upload_info); - if (ret) { - local->op_ret = -1; - local->op_errno = ret; - goto exit; - } - if (upload_info) { - ret = init_inode_info_tail(info, get_master_cinfo(priv)); - if (ret) { - local->op_ret = -1; - local->op_errno = ret; - goto exit; - } - ret = inode_ctx_put(local->fd->inode, this, (uint64_t)(long)info); - if (ret == -1) { - local->op_ret = -1; - local->op_errno = EIO; - goto exit; - } - } - if (local->custom_mtd) { - /* - * pass the metadata string to the customer - */ - ret = dict_set_static_bin(local->xdata, CRYPTO_FORMAT_PREFIX, mtd->data, - mtd->len); - if (ret) { - local->op_ret = -1; - local->op_errno = ret; - goto exit; - } - } -exit: - if (!local->custom_mtd) - crypt_open_tail(frame, this); - else - put_one_call_open(frame); - return 0; -} - -static int32_t -crypt_open_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - crypt_local_t *local = frame->local; - - local->op_ret = op_ret; - local->op_errno = op_errno; - - if (op_ret < 0) { - gf_log(this->name, GF_LOG_WARNING, "finodelk (LOCK) failed"); - goto exit; - } - STACK_WIND(frame, load_mtd_open, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fgetxattr, local->fd, - CRYPTO_FORMAT_PREFIX, NULL); - return 0; -exit: - put_one_call_open(frame); - return 0; -} - -/* - * verify metadata against the specified pathname - */ -static int32_t -crypt_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) -{ - struct gf_flock lock = { - 0, - }; - crypt_local_t *local = frame->local; - - local->op_ret = op_ret; - local->op_errno = op_errno; - - if (local->fd->inode->ia_type == IA_IFLNK) - goto exit; - if (op_ret < 0) - goto exit; - if (xdata) - local->xdata = dict_ref(xdata); - else if (local->custom_mtd) { - local->xdata = dict_new(); - if (!local->xdata) { - local->op_ret = -1; - local->op_errno = ENOMEM; - gf_log("crypt", GF_LOG_ERROR, - "Can not get new dict for mtd string"); - goto exit; - } - } - lock.l_len = 0; - lock.l_start = 0; - lock.l_type = local->custom_mtd ? F_WRLCK : F_RDLCK; - lock.l_whence = SEEK_SET; - - STACK_WIND(frame, crypt_open_finodelk_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->finodelk, this->name, fd, F_SETLKW, - &lock, NULL); - return 0; -exit: - put_one_call_open(frame); - return 0; -} - -static int32_t -crypt_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - fd_t *fd, dict_t *xdata) -{ - int32_t ret = ENOMEM; - crypt_local_t *local; - - local = crypt_alloc_local(frame, this, GF_FOP_OPEN); - if (!local) - goto error; - local->loc = GF_CALLOC(1, sizeof(loc_t), gf_crypt_mt_loc); - if (!local->loc) { - ret = ENOMEM; - goto error; - } - ret = loc_copy(local->loc, loc); - if (ret) { - GF_FREE(local->loc); - ret = ENOMEM; - goto error; - } - local->fd = fd_ref(fd); - - ret = is_custom_mtd(xdata); - if (ret < 0) { - loc_wipe(local->loc); - GF_FREE(local->loc); - ret = EINVAL; - goto error; - } - local->custom_mtd = ret; - - if ((flags & O_ACCMODE) == O_WRONLY) - /* - * we can't open O_WRONLY, because - * we need to do read-modify-write - */ - flags = (flags & ~O_ACCMODE) | O_RDWR; - /* - * Make sure that out translated offsets - * and counts won't be ignored - */ - flags &= ~O_APPEND; - get_one_call_nolock(frame); - STACK_WIND(frame, crypt_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); - return 0; -error: - CRYPT_STACK_UNWIND(open, frame, -1, ret, NULL, NULL); - return 0; -} - -static int32_t -init_inode_info_tail(struct crypt_inode_info *info, - struct master_cipher_info *master) -{ - int32_t ret; - struct object_cipher_info *object = &info->cinfo; - -#if DEBUG_CRYPT - gf_log("crypt", GF_LOG_DEBUG, "Init inode info for object %s", - uuid_utoa(info->oid)); -#endif - ret = data_cipher_algs[object->o_alg][object->o_mode].set_private(info, - master); - if (ret) { - gf_log("crypt", GF_LOG_ERROR, "Set private info failed"); - return ret; - } - return 0; -} - -/* - * Init inode info at ->create() time - */ -static void -init_inode_info_create(struct crypt_inode_info *info, - struct master_cipher_info *master, data_t *data) -{ - struct object_cipher_info *object; - - info->nr_minor = CRYPT_XLATOR_ID; - memcpy(info->oid, data->data, data->len); - - object = &info->cinfo; - - object->o_alg = master->m_alg; - object->o_mode = master->m_mode; - object->o_block_bits = master->m_block_bits; - object->o_dkey_size = master->m_dkey_size; -} - -static void -init_inode_info_head(struct crypt_inode_info *info, fd_t *fd) -{ - memcpy(info->oid, fd->inode->gfid, sizeof(uuid_t)); -} - -static int32_t -crypt_create_done(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - crypt_private_t *priv = this->private; - crypt_local_t *local = frame->local; - struct crypt_inode_info *info = local->info; - fd_t *local_fd = local->fd; - dict_t *local_xdata = local->xdata; - inode_t *local_inode = local->inode; - - if (op_ret < 0) { - free_inode_info(info); - goto unwind; - } - op_errno = init_inode_info_tail(info, get_master_cinfo(priv)); - if (op_errno) { - op_ret = -1; - free_inode_info(info); - goto unwind; - } - /* - * FIXME: drop major subversion number - */ - op_ret = inode_ctx_put(local->fd->inode, this, (uint64_t)(long)info); - if (op_ret == -1) { - op_errno = EIO; - free_inode_info(info); - goto unwind; - } -unwind: - free_format(local); - CRYPT_STACK_UNWIND(create, frame, op_ret, op_errno, local_fd, local_inode, - &local->buf, &local->prebuf, &local->postbuf, - local_xdata); - fd_unref(local_fd); - inode_unref(local_inode); - if (local_xdata) - dict_unref(local_xdata); - return 0; -} - -static int -crypt_create_tail(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - struct gf_flock lock = { - 0, - }; - crypt_local_t *local = frame->local; - fd_t *local_fd = local->fd; - dict_t *local_xdata = local->xdata; - inode_t *local_inode = local->inode; - - dict_unref(local->xattr); - - if (op_ret < 0) - goto error; - - lock.l_type = F_UNLCK; - lock.l_whence = SEEK_SET; - lock.l_start = 0; - lock.l_len = 0; - lock.l_pid = 0; - - STACK_WIND(frame, crypt_create_done, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->finodelk, this->name, local->fd, - F_SETLKW, &lock, NULL); - return 0; -error: - free_inode_info(local->info); - free_format(local); - - CRYPT_STACK_UNWIND(create, frame, op_ret, op_errno, local_fd, local_inode, - &local->buf, &local->prebuf, &local->postbuf, - local_xdata); - - fd_unref(local_fd); - inode_unref(local_inode); - if (local_xdata) - dict_unref(local_xdata); - return 0; -} - -static int32_t -crypt_create_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - crypt_local_t *local = frame->local; - struct crypt_inode_info *info = local->info; - - if (op_ret < 0) - goto error; - - STACK_WIND(frame, crypt_create_tail, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsetxattr, local->fd, - local->xattr, /* CRYPTO_FORMAT_PREFIX */ - 0, NULL); - return 0; -error: - free_inode_info(info); - free_format(local); - fd_unref(local->fd); - dict_unref(local->xattr); - if (local->xdata) - dict_unref(local->xdata); - - CRYPT_STACK_UNWIND(create, frame, op_ret, op_errno, NULL, NULL, NULL, NULL, - NULL, NULL); - return 0; -} - -/* - * Create and store crypt-specific format on disk; - * Populate cache with private inode info - */ -static int32_t -crypt_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - struct gf_flock lock = { - 0, - }; - crypt_local_t *local = frame->local; - struct crypt_inode_info *info = local->info; - - if (op_ret < 0) - goto error; - if (xdata) - local->xdata = dict_ref(xdata); - local->inode = inode_ref(inode); - local->buf = *buf; - local->prebuf = *preparent; - local->postbuf = *postparent; - - lock.l_len = 0; - lock.l_start = 0; - lock.l_type = F_WRLCK; - lock.l_whence = SEEK_SET; - - STACK_WIND(frame, crypt_create_finodelk_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->finodelk, this->name, local->fd, - F_SETLKW, &lock, NULL); - return 0; -error: - free_inode_info(info); - free_format(local); - fd_unref(local->fd); - dict_unref(local->xattr); - - CRYPT_STACK_UNWIND(create, frame, op_ret, op_errno, NULL, NULL, NULL, NULL, - NULL, NULL); - return 0; -} - -static int32_t -crypt_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) -{ - int ret; - data_t *data; - crypt_local_t *local; - crypt_private_t *priv; - struct master_cipher_info *master; - struct crypt_inode_info *info; - - priv = this->private; - master = get_master_cinfo(priv); - - if (master_alg_atomic(master)) { - /* - * We can't open O_WRONLY, because we - * need to do read-modify-write. - */ - if ((flags & O_ACCMODE) == O_WRONLY) - flags = (flags & ~O_ACCMODE) | O_RDWR; - /* - * Make sure that out translated offsets - * and counts won't be ignored - */ - flags &= ~O_APPEND; - } - local = crypt_alloc_local(frame, this, GF_FOP_CREATE); - if (!local) { - ret = ENOMEM; - goto error; - } - data = dict_get(xdata, "gfid-req"); - if (!data) { - ret = EINVAL; - gf_log("crypt", GF_LOG_WARNING, "gfid not found"); - goto error; - } - if (data->len != sizeof(uuid_t)) { - ret = EINVAL; - gf_log("crypt", GF_LOG_WARNING, "bad gfid size (%d), should be %d", - (int)data->len, (int)sizeof(uuid_t)); - goto error; - } - info = alloc_inode_info(local, loc); - if (!info) { - ret = ENOMEM; - goto error; - } - /* - * NOTE: - * format has to be created BEFORE - * proceeding to the untrusted server - */ - ret = alloc_format_create(local); - if (ret) { - free_inode_info(info); - goto error; - } - init_inode_info_create(info, master, data); - - ret = create_format(local->format, loc, info, master); - if (ret) { - free_inode_info(info); - goto error; - } - local->xattr = dict_new(); - if (!local->xattr) { - free_inode_info(info); - free_format(local); - goto error; - } - ret = dict_set_static_bin(local->xattr, CRYPTO_FORMAT_PREFIX, local->format, - new_format_size()); - if (ret) { - dict_unref(local->xattr); - free_inode_info(info); - free_format(local); - ret = EINVAL; - goto error; - } - ret = dict_set(local->xattr, FSIZE_XATTR_PREFIX, data_from_uint64(0)); - if (ret) { - dict_unref(local->xattr); - free_inode_info(info); - free_format(local); - ret = ENOMEM; - goto error; - } - local->fd = fd_ref(fd); - - STACK_WIND(frame, crypt_create_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd, - xdata); - return 0; -error: - gf_log("crypt", GF_LOG_WARNING, "can not create file"); - CRYPT_STACK_UNWIND(create, frame, -1, ret, NULL, NULL, NULL, NULL, NULL, - NULL); - return 0; -} - -/* - * FIXME: this should depends on the version of format string - */ -static int32_t -filter_crypt_xattr(dict_t *dict, char *key, data_t *value, void *data) -{ - dict_del(dict, key); - return 0; -} - -static int32_t -crypt_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, - int32_t flags, dict_t *xdata) -{ - dict_foreach_fnmatch(dict, "trusted.glusterfs.crypt*", filter_crypt_xattr, - NULL); - STACK_WIND(frame, default_fsetxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata); - return 0; -} - -/* - * TBD: verify file metadata before wind - */ -static int32_t -crypt_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, - int32_t flags, dict_t *xdata) -{ - dict_foreach_fnmatch(dict, "trusted.glusterfs.crypt*", filter_crypt_xattr, - NULL); - STACK_WIND(frame, default_setxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata); - return 0; -} - -/* - * called as flush_cbk() - */ -static int32_t -linkop_end(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, dict_t *xdata) -{ - crypt_local_t *local = frame->local; - linkop_unwind_handler_t unwind_fn; - unwind_fn = linkop_unwind_dispatch(local->fop); - - local->op_ret = op_ret; - local->op_errno = op_errno; - - if (op_ret < 0 && op_errno == ENOENT && - local->loc->inode->ia_type == IA_IFLNK) { - local->op_ret = 0; - local->op_errno = 0; - } - unwind_fn(frame); - return 0; -} - -/* - * unpin inode on the server - */ -static int32_t -link_flush(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, inode_t *inode, struct iatt *buf, - struct iatt *preparent, struct iatt *postparent, dict_t *xdata) -{ - crypt_local_t *local = frame->local; - - if (op_ret < 0) - goto error; - if (local->xdata) { - dict_unref(local->xdata); - local->xdata = NULL; - } - if (xdata) - local->xdata = dict_ref(xdata); - local->inode = inode_ref(inode); - local->buf = *buf; - local->prebuf = *preparent; - local->postbuf = *postparent; - - STACK_WIND(frame, linkop_end, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->flush, local->fd, NULL); - return 0; -error: - local->op_ret = -1; - local->op_errno = op_errno; - link_unwind(frame); - return 0; -} - -void -link_unwind(call_frame_t *frame) -{ - crypt_local_t *local = frame->local; - dict_t *xdata; - dict_t *xattr; - inode_t *inode; - - if (!local) { - CRYPT_STACK_UNWIND(link, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, - NULL); - return; - } - xdata = local->xdata; - xattr = local->xattr; - inode = local->inode; - - if (local->loc) { - loc_wipe(local->loc); - GF_FREE(local->loc); - } - if (local->newloc) { - loc_wipe(local->newloc); - GF_FREE(local->newloc); - } - if (local->fd) - fd_unref(local->fd); - if (local->format) - GF_FREE(local->format); - - CRYPT_STACK_UNWIND(link, frame, local->op_ret, local->op_errno, inode, - &local->buf, &local->prebuf, &local->postbuf, xdata); - if (xdata) - dict_unref(xdata); - if (xattr) - dict_unref(xattr); - if (inode) - inode_unref(inode); -} - -void -link_wind(call_frame_t *frame, xlator_t *this) -{ - crypt_local_t *local = frame->local; - - STACK_WIND(frame, link_flush, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->link, local->loc, local->newloc, - local->xdata); -} - -/* - * unlink() - */ -static int32_t -unlink_flush(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, struct iatt *preparent, struct iatt *postparent, - dict_t *xdata) -{ - crypt_local_t *local = frame->local; - - if (op_ret < 0) - goto error; - local->prebuf = *preparent; - local->postbuf = *postparent; - if (local->xdata) { - dict_unref(local->xdata); - local->xdata = NULL; - } - if (xdata) - local->xdata = dict_ref(xdata); - - STACK_WIND(frame, linkop_end, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->flush, local->fd, NULL); - return 0; -error: - local->op_ret = -1; - local->op_errno = op_errno; - unlink_unwind(frame); - return 0; -} - -void -unlink_unwind(call_frame_t *frame) -{ - crypt_local_t *local = frame->local; - dict_t *xdata; - dict_t *xattr; - - if (!local) { - CRYPT_STACK_UNWIND(unlink, frame, -1, ENOMEM, NULL, NULL, NULL); - return; - } - xdata = local->xdata; - xattr = local->xattr; - if (local->loc) { - loc_wipe(local->loc); - GF_FREE(local->loc); - } - if (local->fd) - fd_unref(local->fd); - if (local->format) - GF_FREE(local->format); - - CRYPT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno, - &local->prebuf, &local->postbuf, xdata); - if (xdata) - dict_unref(xdata); - if (xattr) - dict_unref(xattr); -} - -void -unlink_wind(call_frame_t *frame, xlator_t *this) -{ - crypt_local_t *local = frame->local; - - STACK_WIND(frame, unlink_flush, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, local->loc, local->flags, - local->xdata); -} - -void -rename_unwind(call_frame_t *frame) -{ - crypt_local_t *local = frame->local; - dict_t *xdata; - dict_t *xattr; - struct iatt *prenewparent; - struct iatt *postnewparent; - - if (!local) { - CRYPT_STACK_UNWIND(rename, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, - NULL, NULL); - return; - } - xdata = local->xdata; - xattr = local->xattr; - prenewparent = local->prenewparent; - postnewparent = local->postnewparent; - - if (local->loc) { - loc_wipe(local->loc); - GF_FREE(local->loc); - } - if (local->newloc) { - loc_wipe(local->newloc); - GF_FREE(local->newloc); - } - if (local->fd) - fd_unref(local->fd); - if (local->format) - GF_FREE(local->format); - - CRYPT_STACK_UNWIND(rename, frame, local->op_ret, local->op_errno, - &local->buf, &local->prebuf, &local->postbuf, - prenewparent, postnewparent, xdata); - if (xdata) - dict_unref(xdata); - if (xattr) - dict_unref(xattr); - if (prenewparent) - GF_FREE(prenewparent); - if (postnewparent) - GF_FREE(postnewparent); -} - -/* - * called as flush_cbk() - */ -static int32_t -rename_end(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, dict_t *xdata) -{ - crypt_local_t *local = frame->local; - - local->op_ret = op_ret; - local->op_errno = op_errno; - - rename_unwind(frame); - return 0; -} - -static int32_t -rename_flush(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, struct iatt *buf, struct iatt *preoldparent, - struct iatt *postoldparent, struct iatt *prenewparent, - struct iatt *postnewparent, dict_t *xdata) -{ - crypt_local_t *local = frame->local; - - if (op_ret < 0) - goto error; - dict_unref(local->xdata); - local->xdata = NULL; - if (xdata) - local->xdata = dict_ref(xdata); - - local->buf = *buf; - local->prebuf = *preoldparent; - local->postbuf = *postoldparent; - if (prenewparent) { - local->prenewparent = GF_CALLOC(1, sizeof(*prenewparent), - gf_crypt_mt_iatt); - if (!local->prenewparent) { - op_errno = ENOMEM; - goto error; - } - *local->prenewparent = *prenewparent; - } - if (postnewparent) { - local->postnewparent = GF_CALLOC(1, sizeof(*postnewparent), - gf_crypt_mt_iatt); - if (!local->postnewparent) { - op_errno = ENOMEM; - goto error; - } - *local->postnewparent = *postnewparent; - } - STACK_WIND(frame, rename_end, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->flush, local->fd, NULL); - return 0; -error: - local->op_ret = -1; - local->op_errno = op_errno; - rename_unwind(frame); - return 0; -} - -void -rename_wind(call_frame_t *frame, xlator_t *this) -{ - crypt_local_t *local = frame->local; - - STACK_WIND(frame, rename_flush, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rename, local->loc, local->newloc, - local->xdata); -} - -static int32_t -__do_linkop(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, dict_t *xdata) -{ - crypt_local_t *local = frame->local; - linkop_wind_handler_t wind_fn; - linkop_unwind_handler_t unwind_fn; - - wind_fn = linkop_wind_dispatch(local->fop); - unwind_fn = linkop_unwind_dispatch(local->fop); - - local->op_ret = op_ret; - local->op_errno = op_errno; - - if (op_ret >= 0) - wind_fn(frame, this); - else { - gf_log(this->name, GF_LOG_WARNING, "mtd unlock failed (%d)", op_errno); - unwind_fn(frame); - } - return 0; -} - -static int32_t -do_linkop(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, dict_t *xdata) -{ - struct gf_flock lock = { - 0, - }; - crypt_local_t *local = frame->local; - linkop_unwind_handler_t unwind_fn; - - unwind_fn = linkop_unwind_dispatch(local->fop); - local->op_ret = op_ret; - local->op_errno = op_errno; - - if (op_ret < 0) - goto error; - - lock.l_type = F_UNLCK; - lock.l_whence = SEEK_SET; - lock.l_start = 0; - lock.l_len = 0; - lock.l_pid = 0; - - STACK_WIND(frame, __do_linkop, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->finodelk, this->name, local->fd, - F_SETLKW, &lock, NULL); - return 0; -error: - unwind_fn(frame); - return 0; -} - -/* - * Update the metadata string (against the new pathname); - * submit the result - */ -static int32_t -linkop_begin(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, fd_t *fd, dict_t *xdata) -{ - gf_boolean_t upload_info; - crypt_local_t *local = frame->local; - crypt_private_t *priv = this->private; - struct crypt_inode_info *info; - data_t *old_mtd; - uint32_t new_mtd_size; - uint64_t value = 0; - void (*unwind_fn)(call_frame_t * frame); - mtd_op_t mop; - - unwind_fn = linkop_unwind_dispatch(local->fop); - mop = linkop_mtdop_dispatch(local->fop); - - if (op_ret < 0) { - /* - * verification failed - */ - goto error; - } else { - fd_bind(fd); - } - - old_mtd = dict_get(xdata, CRYPTO_FORMAT_PREFIX); - if (!old_mtd) { - op_errno = EIO; - gf_log(this->name, GF_LOG_DEBUG, "Metadata string wasn't found"); - goto error; - } - new_mtd_size = format_size(mop, old_mtd->len); - op_errno = alloc_format(local, new_mtd_size); - if (op_errno) - goto error; - /* - * check for cached info - */ - op_ret = inode_ctx_get(fd->inode, this, &value); - if (op_ret != -1) { - info = (struct crypt_inode_info *)(long)value; - if (info == NULL) { - gf_log(this->name, GF_LOG_WARNING, "Inode info was not found"); - op_errno = EINVAL; - goto error; - } - /* - * info was found in the cache - */ - local->info = info; - upload_info = _gf_false; - } else { - /* - * info wasn't found in the cache; - */ - info = alloc_inode_info(local, local->loc); - if (!info) - goto error; - init_inode_info_head(info, fd); - local->info = info; - upload_info = _gf_true; - } - op_errno = open_format((unsigned char *)old_mtd->data, old_mtd->len, - local->loc, info, get_master_cinfo(priv), local, - upload_info); - if (op_errno) - goto error; - if (upload_info == _gf_true) { - op_errno = init_inode_info_tail(info, get_master_cinfo(priv)); - if (op_errno) - goto error; - op_errno = inode_ctx_put(fd->inode, this, (uint64_t)(long)(info)); - if (op_errno == -1) { - op_errno = EIO; - goto error; - } - } - /* - * update the format string (append/update/cup a MAC) - */ - op_errno = update_format(local->format, (unsigned char *)old_mtd->data, - old_mtd->len, local->mac_idx, mop, local->newloc, - info, get_master_cinfo(priv), local); - if (op_errno) - goto error; - /* - * store the new format string on the server - */ - if (new_mtd_size) { - op_errno = dict_set_static_bin(local->xattr, CRYPTO_FORMAT_PREFIX, - local->format, new_mtd_size); - if (op_errno) - goto error; - } - STACK_WIND(frame, do_linkop, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setxattr, local->loc, local->xattr, 0, - NULL); - return 0; -error: - local->op_ret = -1; - local->op_errno = op_errno; - unwind_fn(frame); - return 0; -} - -static int32_t -linkop_grab_local(call_frame_t *frame, xlator_t *this, loc_t *oldloc, - loc_t *newloc, int flags, dict_t *xdata, glusterfs_fop_t op) -{ - int32_t ret = ENOMEM; - fd_t *fd; - crypt_local_t *local; - - local = crypt_alloc_local(frame, this, op); - if (!local) - goto error; - if (xdata) - local->xdata = dict_ref(xdata); - - fd = fd_create(oldloc->inode, frame->root->pid); - if (!fd) { - gf_log(this->name, GF_LOG_ERROR, "Can not create fd"); - goto error; - } - local->fd = fd; - local->flags = flags; - local->loc = GF_CALLOC(1, sizeof(loc_t), gf_crypt_mt_loc); - if (!local->loc) - goto error; - ret = loc_copy(local->loc, oldloc); - if (ret) { - GF_FREE(local->loc); - local->loc = NULL; - goto error; - } - if (newloc) { - local->newloc = GF_CALLOC(1, sizeof(loc_t), gf_crypt_mt_loc); - if (!local->newloc) { - loc_wipe(local->loc); - GF_FREE(local->loc); - goto error; - } - ret = loc_copy(local->newloc, newloc); - if (ret) { - loc_wipe(local->loc); - GF_FREE(local->loc); - GF_FREE(local->newloc); - goto error; - } - } - local->xattr = dict_new(); - if (!local->xattr) { - gf_log(this->name, GF_LOG_ERROR, "Can not create dict"); - ret = ENOMEM; - goto error; - } - return 0; - -error: - if (local) { - if (local->xdata) - dict_unref(local->xdata); - if (local->fd) - fd_unref(local->fd); - local->fd = 0; - local->loc = NULL; - local->newloc = NULL; - local->op_ret = -1; - local->op_errno = ret; - } - - return ret; -} - -/* - * read and verify locked metadata against the old pathname (via open); - * update the metadata string in accordance with the new pathname; - * submit modified metadata; - * wind; - */ -static int32_t -linkop(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, - int flags, dict_t *xdata, glusterfs_fop_t op) -{ - int32_t ret; - dict_t *dict; - crypt_local_t *local; - void (*unwind_fn)(call_frame_t * frame); - void (*wind_fn)(call_frame_t * frame, xlator_t * this); - - wind_fn = linkop_wind_dispatch(op); - unwind_fn = linkop_unwind_dispatch(op); - - ret = linkop_grab_local(frame, this, oldloc, newloc, flags, xdata, op); - local = frame->local; - if (ret) - goto error; - - if (local->fd->inode->ia_type == IA_IFLNK) - goto wind; - - dict = dict_new(); - if (!dict) { - gf_log(this->name, GF_LOG_ERROR, "Can not create dict"); - ret = ENOMEM; - goto error; - } - /* - * Set a message to crypt_open() that we need - * locked metadata string. - * All link operations (link, unlink, rename) - * need write lock - */ - msgflags_set_mtd_wlock(&local->msgflags); - ret = dict_set_static_bin(dict, MSGFLAGS_PREFIX, &local->msgflags, - sizeof(local->msgflags)); - if (ret) { - gf_log(this->name, GF_LOG_ERROR, "Can not set dict"); - dict_unref(dict); - goto error; - } - /* - * verify metadata against the old pathname - * and retrieve locked metadata string - */ - STACK_WIND(frame, linkop_begin, this, this->fops->open, /* crypt_open() */ - oldloc, O_RDWR, local->fd, dict); - dict_unref(dict); - return 0; - -wind: - wind_fn(frame, this); - return 0; - -error: - local->op_ret = -1; - local->op_errno = ret; - unwind_fn(frame); - return 0; -} - -static int32_t -crypt_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, - dict_t *xdata) -{ - return linkop(frame, this, oldloc, newloc, 0, xdata, GF_FOP_LINK); -} - -static int32_t -crypt_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, - dict_t *xdata) -{ - return linkop(frame, this, loc, NULL, flags, xdata, GF_FOP_UNLINK); -} - -static int32_t -crypt_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, - dict_t *xdata) -{ - return linkop(frame, this, oldloc, newloc, 0, xdata, GF_FOP_RENAME); -} - -static void -put_one_call_open(call_frame_t *frame) -{ - crypt_local_t *local = frame->local; - if (put_one_call(local)) { - fd_t *fd = local->fd; - loc_t *loc = local->loc; - dict_t *xdata = local->xdata; - - CRYPT_STACK_UNWIND(open, frame, local->op_ret, local->op_errno, fd, - xdata); - fd_unref(fd); - if (xdata) - dict_unref(xdata); - loc_wipe(loc); - GF_FREE(loc); - } -} - -static int32_t -__crypt_readv_done(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - crypt_local_t *local = frame->local; - fd_t *local_fd = local->fd; - dict_t *local_xdata = local->xdata; - /* read deals with data configs only */ - struct iovec *avec = local->data_conf.avec; - char **pool = local->data_conf.pool; - int blocks_in_pool = local->data_conf.blocks_in_pool; - struct iobref *iobref = local->iobref; - struct iobref *iobref_data = local->iobref_data; - - if (op_ret < 0) { - gf_log(this->name, GF_LOG_WARNING, "readv unlock failed (%d)", - op_errno); - if (local->op_ret >= 0) { - local->op_ret = op_ret; - local->op_errno = op_errno; - } - } - dump_plain_text(local, avec); - - gf_log("crypt", GF_LOG_DEBUG, - "readv: ret_to_user: %d, iovec len: %d, ia_size: %llu", - (int)(local->rw_count > 0 ? local->rw_count : local->op_ret), - (int)(local->rw_count > 0 ? iov_length(avec, local->data_conf.acount) - : 0), - (unsigned long long)local->buf.ia_size); - - CRYPT_STACK_UNWIND( - readv, frame, local->rw_count > 0 ? local->rw_count : local->op_ret, - local->op_errno, avec, avec ? local->data_conf.acount : 0, &local->buf, - local->iobref, local_xdata); - - free_avec(avec, pool, blocks_in_pool); - fd_unref(local_fd); - if (local_xdata) - dict_unref(local_xdata); - if (iobref) - iobref_unref(iobref); - if (iobref_data) - iobref_unref(iobref_data); - return 0; -} - -static void -crypt_readv_done(call_frame_t *frame, xlator_t *this) -{ - if (parent_is_crypt_xlator(frame, this)) - /* - * don't unlock (it will be done by the parent) - */ - __crypt_readv_done(frame, NULL, this, 0, 0, NULL); - else { - crypt_local_t *local = frame->local; - struct gf_flock lock = { - 0, - }; - - lock.l_type = F_UNLCK; - lock.l_whence = SEEK_SET; - lock.l_start = 0; - lock.l_len = 0; - lock.l_pid = 0; - - STACK_WIND(frame, __crypt_readv_done, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->finodelk, this->name, local->fd, - F_SETLKW, &lock, NULL); - } -} - -static void -put_one_call_readv(call_frame_t *frame, xlator_t *this) -{ - crypt_local_t *local = frame->local; - if (put_one_call(local)) - crypt_readv_done(frame, this); -} - -static int32_t -__crypt_writev_done(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - crypt_local_t *local = frame->local; - fd_t *local_fd = local->fd; - dict_t *local_xdata = local->xdata; - int32_t ret_to_user; - - if (local->xattr) - dict_unref(local->xattr); - /* - * Calculate amount of butes to be returned - * to user. We need to subtract paddings that - * have been written as a part of atom. - */ - /* - * subtract head padding - */ - if (local->rw_count == 0) - /* - * Nothing has been written, it must be an error - */ - ret_to_user = local->op_ret; - else if (local->rw_count <= local->data_conf.off_in_head) { - gf_log("crypt", GF_LOG_WARNING, "Incomplete write"); - ret_to_user = 0; - } else - ret_to_user = local->rw_count - local->data_conf.off_in_head; - /* - * subtract tail padding - */ - if (ret_to_user > local->data_conf.orig_size) - ret_to_user = local->data_conf.orig_size; - - if (local->iobref) - iobref_unref(local->iobref); - if (local->iobref_data) - iobref_unref(local->iobref_data); - free_avec_data(local); - free_avec_hole(local); - - gf_log("crypt", GF_LOG_DEBUG, "writev: ret_to_user: %d", ret_to_user); - - CRYPT_STACK_UNWIND(writev, frame, ret_to_user, local->op_errno, - &local->prebuf, &local->postbuf, local_xdata); - fd_unref(local_fd); - if (local_xdata) - dict_unref(local_xdata); - return 0; -} - -static int32_t -crypt_writev_done(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - crypt_local_t *local = frame->local; - - if (op_ret < 0) - gf_log("crypt", GF_LOG_WARNING, "can not update file size"); - - if (parent_is_crypt_xlator(frame, this)) - /* - * don't unlock (it will be done by the parent) - */ - __crypt_writev_done(frame, NULL, this, 0, 0, NULL); - else { - struct gf_flock lock = { - 0, - }; - - lock.l_type = F_UNLCK; - lock.l_whence = SEEK_SET; - lock.l_start = 0; - lock.l_len = 0; - lock.l_pid = 0; - - STACK_WIND(frame, __crypt_writev_done, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->finodelk, this->name, local->fd, - F_SETLKW, &lock, NULL); - } - return 0; -} - -static void -put_one_call_writev(call_frame_t *frame, xlator_t *this) -{ - crypt_local_t *local = frame->local; - if (put_one_call(local)) { - if (local->update_disk_file_size) { - int32_t ret; - /* - * update file size, unlock the file and unwind - */ - ret = dict_set(local->xattr, FSIZE_XATTR_PREFIX, - data_from_uint64(local->cur_file_size)); - if (ret) { - gf_log("crypt", GF_LOG_WARNING, - "can not set key to update file size"); - crypt_writev_done(frame, NULL, this, 0, 0, NULL); - return; - } - gf_log("crypt", GF_LOG_DEBUG, "Updating disk file size to %llu", - (unsigned long long)local->cur_file_size); - STACK_WIND(frame, crypt_writev_done, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsetxattr, local->fd, - local->xattr, /* CRYPTO_FORMAT_PREFIX */ - 0, NULL); - } else - crypt_writev_done(frame, NULL, this, 0, 0, NULL); - } -} - -static int32_t -__crypt_ftruncate_done(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - crypt_local_t *local = frame->local; - fd_t *local_fd = local->fd; - dict_t *local_xdata = local->xdata; - char *iobase = local->vec.iov_base; - - if (op_ret < 0) { - gf_log(this->name, GF_LOG_WARNING, "ftruncate unlock failed (%d)", - op_errno); - if (local->op_ret >= 0) { - local->op_ret = op_ret; - local->op_errno = op_errno; - } - } - if (local->iobref_data) - iobref_unref(local->iobref_data); - free_avec_data(local); - free_avec_hole(local); - - gf_log("crypt", GF_LOG_DEBUG, - "ftruncate, return to user: presize=%llu, postsize=%llu", - (unsigned long long)local->prebuf.ia_size, - (unsigned long long)local->postbuf.ia_size); - - CRYPT_STACK_UNWIND(ftruncate, frame, ((local->op_ret < 0) ? -1 : 0), - local->op_errno, &local->prebuf, &local->postbuf, - local_xdata); - fd_unref(local_fd); - if (local_xdata) - dict_unref(local_xdata); - if (iobase) - GF_FREE(iobase); - return 0; -} - -static int32_t -crypt_ftruncate_done(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - crypt_local_t *local = frame->local; - struct gf_flock lock = { - 0, - }; - - dict_unref(local->xattr); - if (op_ret < 0) - gf_log("crypt", GF_LOG_WARNING, "can not update file size"); - - lock.l_type = F_UNLCK; - lock.l_whence = SEEK_SET; - lock.l_start = 0; - lock.l_len = 0; - lock.l_pid = 0; - - STACK_WIND(frame, __crypt_ftruncate_done, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->finodelk, this->name, local->fd, - F_SETLKW, &lock, NULL); - return 0; -} - -static void -put_one_call_ftruncate(call_frame_t *frame, xlator_t *this) -{ - crypt_local_t *local = frame->local; - if (put_one_call(local)) { - if (local->update_disk_file_size) { - int32_t ret; - /* - * update file size, unlock the file and unwind - */ - ret = dict_set(local->xattr, FSIZE_XATTR_PREFIX, - data_from_uint64(local->cur_file_size)); - if (ret) { - gf_log("crypt", GF_LOG_WARNING, - "can not set key to update file size"); - crypt_ftruncate_done(frame, NULL, this, 0, 0, NULL); - return; - } - gf_log("crypt", GF_LOG_DEBUG, "Updating disk file size to %llu", - (unsigned long long)local->cur_file_size); - STACK_WIND(frame, crypt_ftruncate_done, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsetxattr, local->fd, - local->xattr, /* CRYPTO_FORMAT_PREFIX */ - 0, NULL); - } else - crypt_ftruncate_done(frame, NULL, this, 0, 0, NULL); - } -} - -/* - * load regular file size for some FOPs - */ -static int32_t -load_file_size(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata) -{ - data_t *data; - crypt_local_t *local = frame->local; - - dict_t *local_xdata = local->xdata; - inode_t *local_inode = local->inode; - - if (op_ret < 0) - goto unwind; - /* - * load regular file size - */ - data = dict_get(dict, FSIZE_XATTR_PREFIX); - if (!data) { - if (local->xdata) - dict_unref(local->xdata); - gf_log("crypt", GF_LOG_WARNING, "Regular file size not found"); - op_ret = -1; - op_errno = EIO; - goto unwind; - } - local->buf.ia_size = data_to_uint64(data); - - gf_log(this->name, GF_LOG_DEBUG, "FOP %d: Translate regular file to %llu", - local->fop, (unsigned long long)local->buf.ia_size); -unwind: - if (local->fd) - fd_unref(local->fd); - if (local->loc) { - loc_wipe(local->loc); - GF_FREE(local->loc); - } - switch (local->fop) { - case GF_FOP_FSTAT: - CRYPT_STACK_UNWIND(fstat, frame, op_ret, op_errno, - op_ret >= 0 ? &local->buf : NULL, local->xdata); - break; - case GF_FOP_STAT: - CRYPT_STACK_UNWIND(stat, frame, op_ret, op_errno, - op_ret >= 0 ? &local->buf : NULL, local->xdata); - break; - case GF_FOP_LOOKUP: - CRYPT_STACK_UNWIND(lookup, frame, op_ret, op_errno, - op_ret >= 0 ? local->inode : NULL, - op_ret >= 0 ? &local->buf : NULL, local->xdata, - op_ret >= 0 ? &local->postbuf : NULL); - break; - case GF_FOP_READ: - CRYPT_STACK_UNWIND(readv, frame, op_ret, op_errno, NULL, 0, - op_ret >= 0 ? &local->buf : NULL, NULL, NULL); - break; - default: - gf_log(this->name, GF_LOG_WARNING, "Improper file operation %d", - local->fop); - } - if (local_xdata) - dict_unref(local_xdata); - if (local_inode) - inode_unref(local_inode); - return 0; -} - -static int32_t -crypt_stat_common_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, - dict_t *xdata) -{ - crypt_local_t *local = frame->local; - - if (op_ret < 0) - goto unwind; - if (!IA_ISREG(buf->ia_type)) - goto unwind; - - local->buf = *buf; - if (xdata) - local->xdata = dict_ref(xdata); - - switch (local->fop) { - case GF_FOP_FSTAT: - STACK_WIND(frame, load_file_size, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fgetxattr, local->fd, - FSIZE_XATTR_PREFIX, NULL); - break; - case GF_FOP_STAT: - STACK_WIND(frame, load_file_size, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->getxattr, local->loc, - FSIZE_XATTR_PREFIX, NULL); - break; - default: - gf_log(this->name, GF_LOG_WARNING, "Improper file operation %d", - local->fop); - } - return 0; -unwind: - if (local->fd) - fd_unref(local->fd); - if (local->loc) { - loc_wipe(local->loc); - GF_FREE(local->loc); - } - switch (local->fop) { - case GF_FOP_FSTAT: - CRYPT_STACK_UNWIND(fstat, frame, op_ret, op_errno, - op_ret >= 0 ? buf : NULL, - op_ret >= 0 ? xdata : NULL); - break; - case GF_FOP_STAT: - CRYPT_STACK_UNWIND(stat, frame, op_ret, op_errno, - op_ret >= 0 ? buf : NULL, - op_ret >= 0 ? xdata : NULL); - break; - default: - gf_log(this->name, GF_LOG_WARNING, "Improper file operation %d", - local->fop); - } - return 0; -} - -static int32_t -crypt_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) -{ - crypt_local_t *local; - - local = crypt_alloc_local(frame, this, GF_FOP_FSTAT); - if (!local) - goto error; - local->fd = fd_ref(fd); - STACK_WIND(frame, crypt_stat_common_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fstat, fd, xdata); - return 0; -error: - CRYPT_STACK_UNWIND(fstat, frame, -1, ENOMEM, NULL, NULL); - return 0; -} - -static int32_t -crypt_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) -{ - int32_t ret; - crypt_local_t *local; - - local = crypt_alloc_local(frame, this, GF_FOP_STAT); - if (!local) - goto error; - local->loc = GF_CALLOC(1, sizeof(loc_t), gf_crypt_mt_loc); - if (!local->loc) - goto error; - ret = loc_copy(local->loc, loc); - if (ret) { - GF_FREE(local->loc); - goto error; - } - STACK_WIND(frame, crypt_stat_common_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->stat, loc, xdata); - return 0; -error: - CRYPT_STACK_UNWIND(stat, frame, -1, ENOMEM, NULL, NULL); - return 0; -} - -static int32_t -crypt_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *xdata, struct iatt *postparent) -{ - crypt_local_t *local = frame->local; - - if (op_ret < 0) - goto unwind; - if (!IA_ISREG(buf->ia_type)) - goto unwind; - - local->inode = inode_ref(inode); - local->buf = *buf; - local->postbuf = *postparent; - if (xdata) - local->xdata = dict_ref(xdata); - gf_uuid_copy(local->loc->gfid, buf->ia_gfid); - - STACK_WIND(frame, load_file_size, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->getxattr, local->loc, - FSIZE_XATTR_PREFIX, NULL); - return 0; -unwind: - loc_wipe(local->loc); - GF_FREE(local->loc); - CRYPT_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, buf, xdata, - postparent); - return 0; -} - -static int32_t -crypt_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) -{ - int32_t ret; - crypt_local_t *local; - - local = crypt_alloc_local(frame, this, GF_FOP_LOOKUP); - if (!local) - goto error; - local->loc = GF_CALLOC(1, sizeof(loc_t), gf_crypt_mt_loc); - if (!local->loc) - goto error; - ret = loc_copy(local->loc, loc); - if (ret) { - GF_FREE(local->loc); - goto error; - } - gf_log(this->name, GF_LOG_DEBUG, "Lookup %s", loc->path); - STACK_WIND(frame, crypt_lookup_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, loc, xdata); - return 0; -error: - CRYPT_STACK_UNWIND(lookup, frame, -1, ENOMEM, NULL, NULL, NULL, NULL); - return 0; -} - -/* - * for every regular directory entry find its real file size - * and update stat's buf properly - */ -static int32_t -crypt_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, - dict_t *xdata) -{ - gf_dirent_t *entry = NULL; - - if (op_ret < 0) - goto unwind; - - list_for_each_entry(entry, (&entries->list), list) - { - data_t *data; - - if (!IA_ISREG(entry->d_stat.ia_type)) - continue; - data = dict_get(entry->dict, FSIZE_XATTR_PREFIX); - if (!data) { - gf_log("crypt", GF_LOG_WARNING, - "Regular file size of direntry not found"); - op_errno = EIO; - op_ret = -1; - break; - } - entry->d_stat.ia_size = data_to_uint64(data); - } -unwind: - CRYPT_STACK_UNWIND(readdirp, frame, op_ret, op_errno, entries, xdata); - return 0; -} - -/* - * ->readdirp() fills in-core inodes, so we need to set proper - * file sizes for all directory entries of the parent @fd. - * Actual updates take place in ->crypt_readdirp_cbk() - */ -static int32_t -crypt_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t offset, dict_t *xdata) -{ - int32_t ret = ENOMEM; - - if (!xdata) { - xdata = dict_new(); - if (!xdata) - goto error; - } else - dict_ref(xdata); - /* - * make sure that we'll have real file sizes at ->readdirp_cbk() - */ - ret = dict_set(xdata, FSIZE_XATTR_PREFIX, data_from_uint64(0)); - if (ret) { - dict_unref(xdata); - ret = ENOMEM; - goto error; - } - STACK_WIND(frame, crypt_readdirp_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readdirp, fd, size, offset, xdata); - dict_unref(xdata); - return 0; -error: - CRYPT_STACK_UNWIND(readdirp, frame, -1, ret, NULL, NULL); - return 0; -} - -static int32_t -crypt_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask, - dict_t *xdata) -{ - gf_log(this->name, GF_LOG_WARNING, - "NFS mounts of encrypted volumes are unsupported"); - CRYPT_STACK_UNWIND(access, frame, -1, EPERM, NULL); - return 0; -} - -int32_t -master_set_block_size(xlator_t *this, crypt_private_t *priv, dict_t *options) -{ - uint64_t block_size = 0; - struct master_cipher_info *master = get_master_cinfo(priv); - - if (options != NULL) - GF_OPTION_RECONF("block-size", block_size, options, size_uint64, error); - else - GF_OPTION_INIT("block-size", block_size, size_uint64, error); - - switch (block_size) { - case 512: - master->m_block_bits = 9; - break; - case 1024: - master->m_block_bits = 10; - break; - case 2048: - master->m_block_bits = 11; - break; - case 4096: - master->m_block_bits = 12; - break; - default: - gf_log("crypt", GF_LOG_ERROR, "FATAL: unsupported block size %llu", - (unsigned long long)block_size); - goto error; - } - return 0; -error: - return -1; -} - -int32_t -master_set_alg(xlator_t *this, crypt_private_t *priv) -{ - struct master_cipher_info *master = get_master_cinfo(priv); - master->m_alg = AES_CIPHER_ALG; - return 0; -} - -int32_t -master_set_mode(xlator_t *this, crypt_private_t *priv) -{ - struct master_cipher_info *master = get_master_cinfo(priv); - master->m_mode = XTS_CIPHER_MODE; - return 0; -} - -/* - * set key size in bits to the master info - * Pre-conditions: cipher mode in the master info is uptodate. - */ -static int -master_set_data_key_size(xlator_t *this, crypt_private_t *priv, dict_t *options) -{ - int32_t ret; - uint64_t key_size = 0; - struct master_cipher_info *master = get_master_cinfo(priv); - - if (options != NULL) - GF_OPTION_RECONF("data-key-size", key_size, options, uint64, error); - else - GF_OPTION_INIT("data-key-size", key_size, uint64, error); - - ret = data_cipher_algs[master->m_alg][master->m_mode].check_key(key_size); - if (ret) { - gf_log("crypt", GF_LOG_ERROR, - "FATAL: wrong bin key size %llu for alg %d mode %d", - (unsigned long long)key_size, (int)master->m_alg, - (int)master->m_mode); - goto error; - } - master->m_dkey_size = key_size; - return 0; -error: - return -1; -} - -static int -is_hex(char *s) -{ - return ('0' <= *s && *s <= '9') || ('a' <= *s && *s <= 'f'); -} - -static int -parse_hex_buf(xlator_t *this, char *src, unsigned char *dst, int hex_size) -{ - int i; - int hex_byte = 0; - - for (i = 0; i < (hex_size / 2); i++) { - if (!is_hex(src + i * 2) || !is_hex(src + i * 2 + 1)) { - gf_log("crypt", GF_LOG_ERROR, "FATAL: not hex symbol in key"); - return -1; - } - if (sscanf(src + i * 2, "%2x", &hex_byte) != 1) { - gf_log("crypt", GF_LOG_ERROR, "FATAL: can not parse hex key"); - return -1; - } - dst[i] = hex_byte & 0xff; - } - return 0; -} - -/* - * Parse options; - * install master volume key - */ -int32_t -master_set_master_vol_key(xlator_t *this, crypt_private_t *priv) -{ - int32_t ret; - FILE *file = NULL; - - int32_t key_size; - char *opt_key_file_pathname = NULL; - - unsigned char bin_buf[MASTER_VOL_KEY_SIZE]; - char hex_buf[2 * MASTER_VOL_KEY_SIZE]; - - struct master_cipher_info *master = get_master_cinfo(priv); - /* - * extract master key passed via option - */ - GF_OPTION_INIT("master-key", opt_key_file_pathname, path, bad_key); - - if (!opt_key_file_pathname) { - gf_log(this->name, GF_LOG_ERROR, "FATAL: missing master key"); - return -1; - } - gf_log(this->name, GF_LOG_DEBUG, "handling file key %s", - opt_key_file_pathname); - - file = fopen(opt_key_file_pathname, "r"); - if (file == NULL) { - gf_log(this->name, GF_LOG_ERROR, - "FATAL: can not open file with master key"); - return -1; - } - /* - * extract hex key - */ - key_size = fread(hex_buf, 1, sizeof(hex_buf), file); - if (key_size < sizeof(hex_buf)) { - gf_log(this->name, GF_LOG_ERROR, "FATAL: master key is too short"); - goto bad_key; - } - ret = parse_hex_buf(this, hex_buf, bin_buf, key_size); - if (ret) - goto bad_key; - memcpy(master->m_key, bin_buf, MASTER_VOL_KEY_SIZE); - memset(hex_buf, 0, sizeof(hex_buf)); - fclose(file); - - memset(bin_buf, 0, sizeof(bin_buf)); - return 0; -bad_key: - gf_log(this->name, GF_LOG_ERROR, "FATAL: bad master key"); - if (file) - fclose(file); - memset(bin_buf, 0, sizeof(bin_buf)); - return -1; -} - -/* - * Derive volume key for object-id authentication - */ -int32_t -master_set_nmtd_vol_key(xlator_t *this, crypt_private_t *priv) -{ - return get_nmtd_vol_key(get_master_cinfo(priv)); -} - -int32_t -crypt_init_xlator(xlator_t *this) -{ - int32_t ret; - crypt_private_t *priv = this->private; - - ret = master_set_alg(this, priv); - if (ret) - return ret; - ret = master_set_mode(this, priv); - if (ret) - return ret; - ret = master_set_block_size(this, priv, NULL); - if (ret) - return ret; - ret = master_set_data_key_size(this, priv, NULL); - if (ret) - return ret; - ret = master_set_master_vol_key(this, priv); - if (ret) - return ret; - return master_set_nmtd_vol_key(this, priv); -} - -static int32_t -crypt_alloc_private(xlator_t *this) -{ - this->private = GF_CALLOC(1, sizeof(crypt_private_t), gf_crypt_mt_priv); - if (!this->private) { - gf_log("crypt", GF_LOG_ERROR, - "Can not allocate memory for private data"); - return ENOMEM; - } - return 0; -} - -static void -crypt_free_private(xlator_t *this) -{ - crypt_private_t *priv = this->private; - if (priv) { - memset(priv, 0, sizeof(*priv)); - GF_FREE(priv); - } -} - -int32_t -mem_acct_init(xlator_t *this) -{ - int ret = -1; - - if (!this) - return ret; - - ret = xlator_mem_acct_init(this, gf_crypt_mt_end); - - if (ret != 0) { - gf_log(this->name, GF_LOG_ERROR, - "Memory accounting init" - "failed"); - return ret; - } - - return ret; -} - -int32_t -reconfigure(xlator_t *this, dict_t *options) -{ - int32_t ret = -1; - crypt_private_t *priv = NULL; - - GF_VALIDATE_OR_GOTO("crypt", this, error); - GF_VALIDATE_OR_GOTO(this->name, this->private, error); - GF_VALIDATE_OR_GOTO(this->name, options, error); - - priv = this->private; - - ret = master_set_block_size(this, priv, options); - if (ret) { - gf_log("this->name", GF_LOG_ERROR, "Failed to reconfure block size"); - goto error; - } - ret = master_set_data_key_size(this, priv, options); - if (ret) { - gf_log("this->name", GF_LOG_ERROR, "Failed to reconfure data key size"); - goto error; - } - return 0; -error: - return ret; -} - -int32_t -init(xlator_t *this) -{ - int32_t ret; - - if (!this->children || this->children->next) { - gf_log("crypt", GF_LOG_ERROR, - "FATAL: crypt should have exactly one child"); - return EINVAL; - } - if (!this->parents) { - gf_log(this->name, GF_LOG_WARNING, "dangling volume. check volfile "); - } - ret = crypt_alloc_private(this); - if (ret) - return ret; - ret = crypt_init_xlator(this); - if (ret) - goto error; - this->local_pool = mem_pool_new(crypt_local_t, 64); - if (!this->local_pool) { - gf_log(this->name, GF_LOG_ERROR, - "failed to create local_t's memory pool"); - ret = ENOMEM; - goto error; - } - gf_log("crypt", GF_LOG_INFO, "crypt xlator loaded"); - return 0; -error: - crypt_free_private(this); - return ret; -} - -void -fini(xlator_t *this) -{ - crypt_free_private(this); -} - -struct xlator_fops fops = {.readv = crypt_readv, - .writev = crypt_writev, - .truncate = crypt_truncate, - .ftruncate = crypt_ftruncate, - .setxattr = crypt_setxattr, - .fsetxattr = crypt_fsetxattr, - .link = crypt_link, - .unlink = crypt_unlink, - .rename = crypt_rename, - .open = crypt_open, - .create = crypt_create, - .stat = crypt_stat, - .fstat = crypt_fstat, - .lookup = crypt_lookup, - .readdirp = crypt_readdirp, - .access = crypt_access}; - -struct xlator_cbks cbks = {.forget = crypt_forget}; - -struct volume_options options[] = { - {.key = {"master-key"}, - .type = GF_OPTION_TYPE_PATH, - .description = - "Pathname of regular file which contains master volume key"}, - { - .key = {"data-key-size"}, - .type = GF_OPTION_TYPE_SIZET, - .description = "Data key size (bits)", - .min = 256, - .max = 512, - .default_value = "256", - }, - {.key = {"block-size"}, - .type = GF_OPTION_TYPE_SIZET, - .description = "Atom size (bits)", - .min = 512, - .max = 4096, - .default_value = "4096"}, - {.key = {NULL}}, -}; - -/* - Local variables: - c-indentation-style: "K&R" - mode-name: "LC" - c-basic-offset: 8 - tab-width: 8 - fill-column: 80 - scroll-step: 1 - End: -*/ diff --git a/xlators/encryption/crypt/src/crypt.h b/xlators/encryption/crypt/src/crypt.h deleted file mode 100644 index c1216a2..0000000 --- a/xlators/encryption/crypt/src/crypt.h +++ /dev/null @@ -1,931 +0,0 @@ -/* - Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#ifndef __CRYPT_H__ -#define __CRYPT_H__ - -#include <openssl/aes.h> -#include <openssl/evp.h> -#include <openssl/sha.h> -#include <openssl/hmac.h> -#include <openssl/cmac.h> -#include <openssl/modes.h> -#include "crypt-mem-types.h" -#include <glusterfs/compat.h> - -#define CRYPT_XLATOR_ID (0) - -#define MAX_IOVEC_BITS (3) -#define MAX_IOVEC (1 << MAX_IOVEC_BITS) -#define KEY_FACTOR_BITS (6) - -#define DEBUG_CRYPT (0) -#define TRIVIAL_TFM (0) - -#define CRYPT_MIN_BLOCK_BITS (9) -#define CRYPT_MAX_BLOCK_BITS (12) - -#define MASTER_VOL_KEY_SIZE (32) -#define NMTD_VOL_KEY_SIZE (16) - -#if !defined(GF_LINUX_HOST_OS) -typedef off_t loff_t; -#endif - -struct crypt_key { - uint32_t len; - const char *label; -}; - -/* - * Add new key types to the end of this - * enumeration but before LAST_KEY_TYPE - */ -typedef enum { - MASTER_VOL_KEY, - NMTD_VOL_KEY, - NMTD_LINK_KEY, - EMTD_FILE_KEY, - DATA_FILE_KEY_256, - DATA_FILE_KEY_512, - LAST_KEY_TYPE -} crypt_key_type; - -struct kderive_context { - const unsigned char *pkey; /* parent key */ - uint32_t pkey_len; /* parent key size, bits */ - uint32_t ckey_len; /* child key size, bits */ - unsigned char *fid; /* fixed input data, NIST 800-108, 5.1 */ - uint32_t fid_len; /* fid len, bytes */ - unsigned char *out; /* contains child keying material */ - uint32_t out_len; /* out len, bytes */ -}; - -typedef enum { DATA_ATOM, HOLE_ATOM, LAST_DATA_TYPE } atom_data_type; - -typedef enum { - HEAD_ATOM, - TAIL_ATOM, - FULL_ATOM, - LAST_LOCALITY_TYPE -} atom_locality_type; - -typedef enum { - MTD_CREATE, - MTD_APPEND, - MTD_OVERWRITE, - MTD_CUT, - MTD_LAST_OP -} mtd_op_t; - -struct xts128_context { - void *key1, *key2; - block128_f block1, block2; -}; - -struct object_cipher_info { - cipher_alg_t o_alg; - cipher_mode_t o_mode; - uint32_t o_block_bits; - uint32_t o_dkey_size; /* raw data key size in bits */ - union { - struct { - unsigned char ivec[16]; - AES_KEY dkey[2]; - AES_KEY tkey; /* key used for tweaking */ - XTS128_CONTEXT xts; - } aes_xts; - } u; -}; - -struct master_cipher_info { - /* - * attributes inherited by newly created regular files - */ - cipher_alg_t m_alg; - cipher_mode_t m_mode; - uint32_t m_block_bits; - uint32_t m_dkey_size; /* raw key size in bits */ - /* - * master key - */ - unsigned char m_key[MASTER_VOL_KEY_SIZE]; - /* - * volume key for oid authentication - */ - unsigned char m_nmtd_key[NMTD_VOL_KEY_SIZE]; -}; - -/* - * This info is not changed during file's life - */ -struct crypt_inode_info { -#if DEBUG_CRYPT - loc_t *loc; /* pathname that the file has been - opened, or created with */ -#endif - uint16_t nr_minor; - uuid_t oid; - struct object_cipher_info cinfo; -}; - -/* - * this should locate in secure memory - */ -typedef struct { - struct master_cipher_info master; -} crypt_private_t; - -static inline struct master_cipher_info * -get_master_cinfo(crypt_private_t *priv) -{ - return &priv->master; -} - -static inline struct object_cipher_info * -get_object_cinfo(struct crypt_inode_info *info) -{ - return &info->cinfo; -} - -/* - * this describes layouts and properties - * of atoms in an aligned vector - */ -struct avec_config { - uint32_t atom_size; - atom_data_type type; - size_t orig_size; - off_t orig_offset; - size_t expanded_size; - off_t aligned_offset; - - uint32_t off_in_head; - uint32_t off_in_tail; - uint32_t gap_in_tail; - uint32_t nr_full_blocks; - - struct iovec *avec; /* aligned vector */ - uint32_t acount; /* number of avec components. The same - * as number of occupied logical blocks */ - char **pool; - uint32_t blocks_in_pool; - uint32_t cursor; /* makes sense only for ordered writes, - * so there is no races on this counter. - * - * Cursor is per-config object, we don't - * reset cursor for atoms of different - * localities (head, tail, full) - */ -}; - -typedef struct { - glusterfs_fop_t fop; /* code of FOP this local info built for */ - fd_t *fd; - inode_t *inode; - loc_t *loc; - int32_t mac_idx; - loc_t *newloc; - int32_t flags; - int32_t wbflags; - struct crypt_inode_info *info; - struct iobref *iobref; - struct iobref *iobref_data; - off_t offset; - - uint64_t old_file_size; /* per FOP, retrieved under lock held */ - uint64_t cur_file_size; /* per iteration, before issuing IOs */ - uint64_t new_file_size; /* per iteration, after issuing IOs */ - - uint64_t io_offset; /* offset of IOs issued per iteration */ - uint64_t io_offset_nopad; /* offset of user's data in the atom */ - uint32_t io_size; /* size of IOs issued per iteration */ - uint32_t io_size_nopad; /* size of user's data in the IOs */ - uint32_t eof_padding_size; /* size od EOF padding in the IOs */ - - gf_lock_t call_lock; /* protect nr_calls from many cbks */ - int32_t nr_calls; - - atom_data_type active_setup; /* which setup (hole or date) - is currently active */ - /* data setup */ - struct avec_config data_conf; - - /* hole setup */ - int hole_conv_in_proggress; - gf_lock_t hole_lock; /* protect hole config from many cbks */ - int hole_handled; - struct avec_config hole_conf; - struct iatt buf; - struct iatt prebuf; - struct iatt postbuf; - struct iatt *prenewparent; - struct iatt *postnewparent; - int32_t op_ret; - int32_t op_errno; - int32_t rw_count; /* total read or written */ - gf_lock_t rw_count_lock; /* protect the counter above */ - unsigned char *format; /* for create, update format string */ - uint32_t format_size; - uint32_t msgflags; /* messages for crypt_open() */ - dict_t *xdata; - dict_t *xattr; - struct iovec vec; /* contains last file's atom for - read-prune-write sequence */ - gf_boolean_t custom_mtd; - /* - * the next 3 fields are used by readdir and friends - */ - gf_dirent_t *de; /* directory entry */ - char *de_path; /* pathname of directory entry */ - uint32_t de_prefix_len; /* length of the parent's pathname */ - gf_dirent_t *entries; - - uint32_t update_disk_file_size : 1; -} crypt_local_t; - -/* This represents a (read)modify-write atom */ -struct rmw_atom { - atom_locality_type locality; - /* - * read-modify-write sequence of the atom - */ - int32_t (*rmw)(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iovec *vec, - int32_t count, struct iatt *stbuf, struct iobref *iobref, - dict_t *xdata); - /* - * offset of the logical block in a file - */ - loff_t (*offset_at)(call_frame_t *frame, struct object_cipher_info *object); - /* - * IO offset in an atom - */ - uint32_t (*offset_in)(call_frame_t *frame, - struct object_cipher_info *object); - /* - * number of bytes of plain text of this atom that user - * wants to read/write. - * It can be smaller than atom_size in the case of head - * or tail atoms. - */ - uint32_t (*io_size_nopad)(call_frame_t *frame, - struct object_cipher_info *object); - /* - * which iovec represents the atom - */ - struct iovec *(*get_iovec)(call_frame_t *frame, uint32_t count); - /* - * how many bytes of partial block should be uptodated by - * reading from disk. - * This is used to perform a read component of RMW (read-modify-write). - */ - uint32_t (*count_to_uptodate)(call_frame_t *frame, - struct object_cipher_info *object); - struct avec_config *(*get_config)(call_frame_t *frame); -}; - -struct data_cipher_alg { - gf_boolean_t atomic; /* true means that algorithm requires - to pad data before cipher transform */ - gf_boolean_t should_pad; /* true means that algorithm requires - to pad the end of file with extra-data */ - uint32_t blkbits; /* blksize = 1 << blkbits */ - /* - * any preliminary sanity checks goes here - */ - int32_t (*init)(void); - /* - * set alg-mode specific inode info - */ - int32_t (*set_private)(struct crypt_inode_info *info, - struct master_cipher_info *master); - /* - * check alg-mode specific data key - */ - int32_t (*check_key)(uint32_t key_size); - void (*set_iv)(off_t offset, struct object_cipher_info *object); - int32_t (*encrypt)(const unsigned char *from, unsigned char *to, - size_t length, off_t offset, const int enc, - struct object_cipher_info *object); -}; - -/* - * version-dependent metadata loader - */ -struct crypt_mtd_loader { - /* - * return core format size - */ - size_t (*format_size)(mtd_op_t op, size_t old_size); - /* - * pack version-specific metadata of an object - * at ->create() - */ - int32_t (*create_format)(unsigned char *wire, loc_t *loc, - struct crypt_inode_info *info, - struct master_cipher_info *master); - /* - * extract version-specific metadata of an object - * at ->open() time - */ - int32_t (*open_format)(unsigned char *wire, int32_t len, loc_t *loc, - struct crypt_inode_info *info, - struct master_cipher_info *master, - crypt_local_t *local, gf_boolean_t load_info); - int32_t (*update_format)(unsigned char *new, unsigned char *old, - size_t old_len, int32_t mac_idx, mtd_op_t op, - loc_t *loc, struct crypt_inode_info *info, - struct master_cipher_info *master, - crypt_local_t *local); -}; - -typedef int32_t (*end_writeback_handler_t)(call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, - int32_t op_errno, - struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata); -typedef void (*linkop_wind_handler_t)(call_frame_t *frame, xlator_t *this); -typedef void (*linkop_unwind_handler_t)(call_frame_t *frame); - -/* Declarations */ - -/* keys.c */ -extern struct crypt_key crypt_keys[LAST_KEY_TYPE]; -int32_t -get_nmtd_vol_key(struct master_cipher_info *master); -int32_t -get_nmtd_link_key(loc_t *loc, struct master_cipher_info *master, - unsigned char *result); -int32_t -get_emtd_file_key(struct crypt_inode_info *info, - struct master_cipher_info *master, unsigned char *result); -int32_t -get_data_file_key(struct crypt_inode_info *info, - struct master_cipher_info *master, uint32_t keysize, - unsigned char *key); -/* data.c */ -extern struct data_cipher_alg data_cipher_algs[LAST_CIPHER_ALG] - [LAST_CIPHER_MODE]; -void -encrypt_aligned_iov(struct object_cipher_info *object, struct iovec *vec, - int count, off_t off); -void -decrypt_aligned_iov(struct object_cipher_info *object, struct iovec *vec, - int count, off_t off); -int32_t -align_iov_by_atoms(xlator_t *this, crypt_local_t *local, - struct object_cipher_info *object, - struct iovec *vec /* input vector */, - int32_t count /* number of vec components */, - struct iovec *avec /* aligned vector */, - char **blocks /* pool of blocks */, - uint32_t *blocks_allocated, struct avec_config *conf); -int32_t -set_config_avec_data(xlator_t *this, crypt_local_t *local, - struct avec_config *conf, - struct object_cipher_info *object, struct iovec *vec, - int32_t vec_count); -int32_t -set_config_avec_hole(xlator_t *this, crypt_local_t *local, - struct avec_config *conf, - struct object_cipher_info *object, glusterfs_fop_t fop); -void -set_gap_at_end(call_frame_t *frame, struct object_cipher_info *object, - struct avec_config *conf, atom_data_type dtype); -void -set_config_offsets(call_frame_t *frame, xlator_t *this, uint64_t offset, - uint64_t count, atom_data_type dtype, - int32_t setup_gap_in_tail); - -/* metadata.c */ -extern struct crypt_mtd_loader mtd_loaders[LAST_MTD_LOADER]; - -int32_t -alloc_format(crypt_local_t *local, size_t size); -int32_t -alloc_format_create(crypt_local_t *local); -void -free_format(crypt_local_t *local); -size_t -format_size(mtd_op_t op, size_t old_size); -size_t -new_format_size(void); -int32_t -open_format(unsigned char *str, int32_t len, loc_t *loc, - struct crypt_inode_info *info, struct master_cipher_info *master, - crypt_local_t *local, gf_boolean_t load_info); -int32_t -update_format(unsigned char *new, unsigned char *old, size_t old_len, - int32_t mac_idx, mtd_op_t op, loc_t *loc, - struct crypt_inode_info *info, struct master_cipher_info *master, - crypt_local_t *local); -int32_t -create_format(unsigned char *wire, loc_t *loc, struct crypt_inode_info *info, - struct master_cipher_info *master); - -/* atom.c */ -struct rmw_atom * -atom_by_types(atom_data_type data, atom_locality_type locality); -void -submit_partial(call_frame_t *frame, xlator_t *this, fd_t *fd, - atom_locality_type ltype); -void -submit_full(call_frame_t *frame, xlator_t *this); - -/* crypt.c */ - -end_writeback_handler_t -dispatch_end_writeback(glusterfs_fop_t fop); -void -set_local_io_params_writev(call_frame_t *frame, - struct object_cipher_info *object, - struct rmw_atom *atom, off_t io_offset, - uint32_t io_size); -void -link_wind(call_frame_t *frame, xlator_t *this); -void -unlink_wind(call_frame_t *frame, xlator_t *this); -void -link_unwind(call_frame_t *frame); -void -unlink_unwind(call_frame_t *frame); -void -rename_wind(call_frame_t *frame, xlator_t *this); -void -rename_unwind(call_frame_t *frame); - -/* Inline functions */ - -static inline int32_t -crypt_xlator_id(void) -{ - return CRYPT_XLATOR_ID; -} - -static inline mtd_loader_id -current_mtd_loader(void) -{ - return MTD_LOADER_V1; -} - -static inline uint32_t -master_key_size(void) -{ - return crypt_keys[MASTER_VOL_KEY].len >> 3; -} - -static inline uint32_t -nmtd_vol_key_size(void) -{ - return crypt_keys[NMTD_VOL_KEY].len >> 3; -} - -static inline uint32_t -alg_mode_blkbits(cipher_alg_t alg, cipher_mode_t mode) -{ - return data_cipher_algs[alg][mode].blkbits; -} - -static inline uint32_t -alg_mode_blksize(cipher_alg_t alg, cipher_mode_t mode) -{ - return 1 << alg_mode_blkbits(alg, mode); -} - -static inline gf_boolean_t -alg_mode_atomic(cipher_alg_t alg, cipher_mode_t mode) -{ - return data_cipher_algs[alg][mode].atomic; -} - -static inline gf_boolean_t -alg_mode_should_pad(cipher_alg_t alg, cipher_mode_t mode) -{ - return data_cipher_algs[alg][mode].should_pad; -} - -static inline uint32_t -master_alg_blksize(struct master_cipher_info *mr) -{ - return alg_mode_blksize(mr->m_alg, mr->m_mode); -} - -static inline uint32_t -master_alg_blkbits(struct master_cipher_info *mr) -{ - return alg_mode_blkbits(mr->m_alg, mr->m_mode); -} - -static inline gf_boolean_t -master_alg_atomic(struct master_cipher_info *mr) -{ - return alg_mode_atomic(mr->m_alg, mr->m_mode); -} - -static inline gf_boolean_t -master_alg_should_pad(struct master_cipher_info *mr) -{ - return alg_mode_should_pad(mr->m_alg, mr->m_mode); -} - -static inline uint32_t -object_alg_blksize(struct object_cipher_info *ob) -{ - return alg_mode_blksize(ob->o_alg, ob->o_mode); -} - -static inline uint32_t -object_alg_blkbits(struct object_cipher_info *ob) -{ - return alg_mode_blkbits(ob->o_alg, ob->o_mode); -} - -static inline gf_boolean_t -object_alg_atomic(struct object_cipher_info *ob) -{ - return alg_mode_atomic(ob->o_alg, ob->o_mode); -} - -static inline gf_boolean_t -object_alg_should_pad(struct object_cipher_info *ob) -{ - return alg_mode_should_pad(ob->o_alg, ob->o_mode); -} - -static inline uint32_t -aes_raw_key_size(struct master_cipher_info *master) -{ - return master->m_dkey_size >> 3; -} - -static inline struct avec_config * -get_hole_conf(call_frame_t *frame) -{ - return &(((crypt_local_t *)frame->local)->hole_conf); -} - -static inline struct avec_config * -get_data_conf(call_frame_t *frame) -{ - return &(((crypt_local_t *)frame->local)->data_conf); -} - -static inline int32_t -get_atom_bits(struct object_cipher_info *object) -{ - return object->o_block_bits; -} - -static inline int32_t -get_atom_size(struct object_cipher_info *object) -{ - return 1 << get_atom_bits(object); -} - -static inline int32_t -has_head_block(struct avec_config *conf) -{ - return conf->off_in_head || (conf->acount == 1 && conf->off_in_tail); -} - -static inline int32_t -has_tail_block(struct avec_config *conf) -{ - return conf->off_in_tail && conf->acount > 1; -} - -static inline int32_t -has_full_blocks(struct avec_config *conf) -{ - return conf->nr_full_blocks; -} - -static inline int32_t -should_submit_head_block(struct avec_config *conf) -{ - return has_head_block(conf) && (conf->cursor == 0); -} - -static inline int32_t -should_submit_tail_block(struct avec_config *conf) -{ - return has_tail_block(conf) && (conf->cursor == conf->acount - 1); -} - -static inline int32_t -should_submit_full_block(struct avec_config *conf) -{ - uint32_t start = has_head_block(conf) ? 1 : 0; - - return has_full_blocks(conf) && conf->cursor >= start && - conf->cursor < start + conf->nr_full_blocks; -} - -#if DEBUG_CRYPT -static inline void -crypt_check_input_len(size_t len, struct object_cipher_info *object) -{ - if (object_alg_should_pad(object) && - (len & (object_alg_blksize(object) - 1))) - gf_log("crypt", GF_LOG_DEBUG, "bad input len: %d", (int)len); -} - -static inline void -check_head_block(struct avec_config *conf) -{ - if (!has_head_block(conf)) - gf_log("crypt", GF_LOG_DEBUG, "not a head atom"); -} - -static inline void -check_tail_block(struct avec_config *conf) -{ - if (!has_tail_block(conf)) - gf_log("crypt", GF_LOG_DEBUG, "not a tail atom"); -} - -static inline void -check_full_block(struct avec_config *conf) -{ - if (!has_full_blocks(conf)) - gf_log("crypt", GF_LOG_DEBUG, "not a full atom"); -} - -static inline void -check_cursor_head(struct avec_config *conf) -{ - if (!has_head_block(conf)) - gf_log("crypt", GF_LOG_DEBUG, "Illegal call of head atom method"); - else if (conf->cursor != 0) - gf_log("crypt", GF_LOG_DEBUG, "Cursor (%d) is not at head atom", - conf->cursor); -} - -static inline void -check_cursor_full(struct avec_config *conf) -{ - if (!has_full_blocks(conf)) - gf_log("crypt", GF_LOG_DEBUG, "Illegal call of full atom method"); - if (has_head_block(conf) && (conf->cursor == 0)) - gf_log("crypt", GF_LOG_DEBUG, "Cursor is not at full atom"); -} - -/* - * FIXME: use avec->iov_len to check setup - */ -static inline int -data_local_invariant(crypt_local_t *local) -{ - return 0; -} - -#else -#define crypt_check_input_len(len, object) noop -#define check_head_block(conf) noop -#define check_tail_block(conf) noop -#define check_full_block(conf) noop -#define check_cursor_head(conf) noop -#define check_cursor_full(conf) noop - -#endif /* DEBUG_CRYPT */ - -static inline struct avec_config * -conf_by_type(call_frame_t *frame, atom_data_type dtype) -{ - struct avec_config *conf = NULL; - - switch (dtype) { - case HOLE_ATOM: - conf = get_hole_conf(frame); - break; - case DATA_ATOM: - conf = get_data_conf(frame); - break; - default: - gf_log("crypt", GF_LOG_DEBUG, "bad atom type"); - } - return conf; -} - -static inline uint32_t -nr_calls_head(struct avec_config *conf) -{ - return has_head_block(conf) ? 1 : 0; -} - -static inline uint32_t -nr_calls_tail(struct avec_config *conf) -{ - return has_tail_block(conf) ? 1 : 0; -} - -static inline uint32_t -nr_calls_full(struct avec_config *conf) -{ - switch (conf->type) { - case HOLE_ATOM: - return has_full_blocks(conf); - case DATA_ATOM: - return has_full_blocks(conf) - ? logical_blocks_occupied(0, conf->nr_full_blocks, - MAX_IOVEC_BITS) - : 0; - default: - gf_log("crypt", GF_LOG_DEBUG, "bad atom data type"); - return 0; - } -} - -static inline uint32_t -nr_calls(struct avec_config *conf) -{ - return nr_calls_head(conf) + nr_calls_tail(conf) + nr_calls_full(conf); -} - -static inline uint32_t -nr_calls_data(call_frame_t *frame) -{ - return nr_calls(get_data_conf(frame)); -} - -static inline uint32_t -nr_calls_hole(call_frame_t *frame) -{ - return nr_calls(get_hole_conf(frame)); -} - -static inline void -get_one_call_nolock(call_frame_t *frame) -{ - crypt_local_t *local = frame->local; - - ++local->nr_calls; - - // gf_log("crypt", GF_LOG_DEBUG, "get %d calls", 1); -} - -static inline void -get_one_call(call_frame_t *frame) -{ - crypt_local_t *local = frame->local; - - LOCK(&local->call_lock); - get_one_call_nolock(frame); - UNLOCK(&local->call_lock); -} - -static inline void -get_nr_calls_nolock(call_frame_t *frame, int32_t nr) -{ - crypt_local_t *local = frame->local; - - local->nr_calls += nr; - - // gf_log("crypt", GF_LOG_DEBUG, "get %d calls", nr); -} - -static inline void -get_nr_calls(call_frame_t *frame, int32_t nr) -{ - crypt_local_t *local = frame->local; - - LOCK(&local->call_lock); - get_nr_calls_nolock(frame, nr); - UNLOCK(&local->call_lock); -} - -static inline int -put_one_call(crypt_local_t *local) -{ - uint32_t last = 0; - - LOCK(&local->call_lock); - if (--local->nr_calls == 0) - last = 1; - - // gf_log("crypt", GF_LOG_DEBUG, "put %d calls", 1); - - UNLOCK(&local->call_lock); - return last; -} - -static inline int -is_appended_write(call_frame_t *frame) -{ - crypt_local_t *local = frame->local; - struct avec_config *conf = get_data_conf(frame); - - return conf->orig_offset + conf->orig_size > local->old_file_size; -} - -static inline int -is_ordered_mode(call_frame_t *frame) -{ -#if 0 - crypt_local_t *local = frame->local; - return local->fop == GF_FOP_FTRUNCATE || - (local->fop == GF_FOP_WRITE && is_appended_write(frame)); -#endif - return 1; -} - -static inline int32_t -hole_conv_completed(crypt_local_t *local) -{ - struct avec_config *conf = &local->hole_conf; - return conf->cursor == conf->acount; -} - -static inline int32_t -data_write_in_progress(crypt_local_t *local) -{ - return local->active_setup == DATA_ATOM; -} - -static inline int32_t -parent_is_crypt_xlator(call_frame_t *frame, xlator_t *this) -{ - return frame->parent->this == this; -} - -static inline linkop_wind_handler_t -linkop_wind_dispatch(glusterfs_fop_t fop) -{ - switch (fop) { - case GF_FOP_LINK: - return link_wind; - case GF_FOP_UNLINK: - return unlink_wind; - case GF_FOP_RENAME: - return rename_wind; - default: - gf_log("crypt", GF_LOG_ERROR, "Bad link operation %d", fop); - return NULL; - } -} - -static inline linkop_unwind_handler_t -linkop_unwind_dispatch(glusterfs_fop_t fop) -{ - switch (fop) { - case GF_FOP_LINK: - return link_unwind; - case GF_FOP_UNLINK: - return unlink_unwind; - case GF_FOP_RENAME: - return rename_unwind; - default: - gf_log("crypt", GF_LOG_ERROR, "Bad link operation %d", fop); - return NULL; - } -} - -static inline mtd_op_t -linkop_mtdop_dispatch(glusterfs_fop_t fop) -{ - switch (fop) { - case GF_FOP_LINK: - return MTD_APPEND; - case GF_FOP_UNLINK: - return MTD_CUT; - case GF_FOP_RENAME: - return MTD_OVERWRITE; - default: - gf_log("crypt", GF_LOG_WARNING, "Bad link operation %d", fop); - return MTD_LAST_OP; - } -} - -#define CRYPT_STACK_UNWIND(fop, frame, params...) \ - do { \ - crypt_local_t *__local = NULL; \ - if (frame) { \ - __local = frame->local; \ - frame->local = NULL; \ - } \ - STACK_UNWIND_STRICT(fop, frame, params); \ - if (__local) { \ - GF_FREE(__local); \ - } \ - } while (0) - -#endif /* __CRYPT_H__ */ - -/* - Local variables: - c-indentation-style: "K&R" - mode-name: "LC" - c-basic-offset: 8 - tab-width: 8 - fill-column: 80 - scroll-step: 1 - End: -*/ diff --git a/xlators/encryption/crypt/src/data.c b/xlators/encryption/crypt/src/data.c deleted file mode 100644 index 93288b1..0000000 --- a/xlators/encryption/crypt/src/data.c +++ /dev/null @@ -1,715 +0,0 @@ -/* - Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#include <glusterfs/defaults.h> -#include "crypt-common.h" -#include "crypt.h" - -static void -set_iv_aes_xts(off_t offset, struct object_cipher_info *object) -{ - unsigned char *ivec; - - ivec = object->u.aes_xts.ivec; - - /* convert the tweak into a little-endian byte - * array (IEEE P1619/D16, May 2007, section 5.1) - */ - - *((uint64_t *)ivec) = htole64(offset); - - /* ivec is padded with zeroes */ -} - -static int32_t -aes_set_keys_common(unsigned char *raw_key, uint32_t key_size, AES_KEY *keys) -{ - int32_t ret; - - ret = AES_set_encrypt_key(raw_key, key_size, &keys[AES_ENCRYPT]); - if (ret) { - gf_log("crypt", GF_LOG_ERROR, "Set encrypt key failed"); - return ret; - } - ret = AES_set_decrypt_key(raw_key, key_size, &keys[AES_DECRYPT]); - if (ret) { - gf_log("crypt", GF_LOG_ERROR, "Set decrypt key failed"); - return ret; - } - return 0; -} - -/* - * set private cipher info for xts mode - */ -static int32_t -set_private_aes_xts(struct crypt_inode_info *info, - struct master_cipher_info *master) -{ - int ret; - struct object_cipher_info *object = get_object_cinfo(info); - unsigned char *data_key; - uint32_t subkey_size; - - /* init tweak value */ - memset(object->u.aes_xts.ivec, 0, 16); - - data_key = GF_CALLOC(1, object->o_dkey_size, gf_crypt_mt_key); - if (!data_key) - return ENOMEM; - - /* - * retrieve data keying material - */ - ret = get_data_file_key(info, master, object->o_dkey_size, data_key); - if (ret) { - gf_log("crypt", GF_LOG_ERROR, "Failed to retrieve data key"); - GF_FREE(data_key); - return ret; - } - /* - * parse compound xts key - */ - subkey_size = object->o_dkey_size >> 4; /* (xts-key-size-in-bytes / 2) */ - /* - * install key for data encryption - */ - ret = aes_set_keys_common(data_key, subkey_size << 3, - object->u.aes_xts.dkey); - if (ret) { - GF_FREE(data_key); - return ret; - } - /* - * set up key used to encrypt tweaks - */ - ret = AES_set_encrypt_key(data_key + subkey_size, object->o_dkey_size / 2, - &object->u.aes_xts.tkey); - if (ret < 0) - gf_log("crypt", GF_LOG_ERROR, "Set tweak key failed"); - - GF_FREE(data_key); - return ret; -} - -static int32_t -aes_xts_init(void) -{ - cassert(AES_BLOCK_SIZE == (1 << AES_BLOCK_BITS)); - return 0; -} - -static int32_t -check_key_aes_xts(uint32_t keysize) -{ - switch (keysize) { - case 256: - case 512: - return 0; - default: - break; - } - return -1; -} - -static int32_t -encrypt_aes_xts(const unsigned char *from, unsigned char *to, size_t length, - off_t offset, const int enc, struct object_cipher_info *object) -{ - XTS128_CONTEXT ctx; - if (enc) { - ctx.key1 = &object->u.aes_xts.dkey[AES_ENCRYPT]; - ctx.block1 = (block128_f)AES_encrypt; - } else { - ctx.key1 = &object->u.aes_xts.dkey[AES_DECRYPT]; - ctx.block1 = (block128_f)AES_decrypt; - } - ctx.key2 = &object->u.aes_xts.tkey; - ctx.block2 = (block128_f)AES_encrypt; - - return CRYPTO_xts128_encrypt(&ctx, object->u.aes_xts.ivec, from, to, length, - enc); -} - -/* - * Cipher input chunk @from of length @len; - * @to: result of cipher transform; - * @off: offset in a file (must be cblock-aligned); - */ -static void -cipher_data(struct object_cipher_info *object, char *from, char *to, off_t off, - size_t len, const int enc) -{ - crypt_check_input_len(len, object); - -#if TRIVIAL_TFM && DEBUG_CRYPT - return; -#endif - data_cipher_algs[object->o_alg][object->o_mode].set_iv(off, object); - data_cipher_algs[object->o_alg][object->o_mode].encrypt( - (const unsigned char *)from, (unsigned char *)to, len, off, enc, - object); -} - -#define MAX_CIPHER_CHUNK (1 << 30) - -/* - * Do cipher (encryption/decryption) transform of a - * continuous region of memory. - * - * @len: a number of bytes to transform; - * @buf: data to transform; - * @off: offset in a file, should be block-aligned - * for atomic cipher modes and ksize-aligned - * for other modes). - * @dir: direction of transform (encrypt/decrypt). - */ -static void -cipher_region(struct object_cipher_info *object, char *from, char *to, - off_t off, size_t len, int dir) -{ - while (len > 0) { - size_t to_cipher; - - to_cipher = len; - if (to_cipher > MAX_CIPHER_CHUNK) - to_cipher = MAX_CIPHER_CHUNK; - - /* this will reset IV */ - cipher_data(object, from, to, off, to_cipher, dir); - from += to_cipher; - to += to_cipher; - off += to_cipher; - len -= to_cipher; - } -} - -/* - * Do cipher transform (encryption/decryption) of - * plaintext/ciphertext represented by @vec. - * - * Pre-conditions: @vec represents a continuous piece - * of data in a file at offset @off to be ciphered - * (encrypted/decrypted). - * @count is the number of vec's components. All the - * components must be block-aligned, the caller is - * responsible for this. @dir is "direction" of - * transform (encrypt/decrypt). - */ -static void -cipher_aligned_iov(struct object_cipher_info *object, struct iovec *vec, - int count, off_t off, int32_t dir) -{ - int i; - int len = 0; - - for (i = 0; i < count; i++) { - cipher_region(object, vec[i].iov_base, vec[i].iov_base, off + len, - vec[i].iov_len, dir); - len += vec[i].iov_len; - } -} - -void -encrypt_aligned_iov(struct object_cipher_info *object, struct iovec *vec, - int count, off_t off) -{ - cipher_aligned_iov(object, vec, count, off, 1); -} - -void -decrypt_aligned_iov(struct object_cipher_info *object, struct iovec *vec, - int count, off_t off) -{ - cipher_aligned_iov(object, vec, count, off, 0); -} - -#if DEBUG_CRYPT -static void -compound_stream(struct iovec *vec, int count, char *buf, off_t skip) -{ - int i; - int off = 0; - for (i = 0; i < count; i++) { - memcpy(buf + off, vec[i].iov_base + skip, vec[i].iov_len - skip); - - off += (vec[i].iov_len - skip); - skip = 0; - } -} - -static void -check_iovecs(struct iovec *vec, int cnt, struct iovec *avec, int acnt, - uint32_t off_in_head) -{ - char *s1, *s2; - uint32_t size, asize; - - size = iov_length(vec, cnt); - asize = iov_length(avec, acnt) - off_in_head; - if (size != asize) { - gf_log("crypt", GF_LOG_DEBUG, "size %d is not eq asize %d", size, - asize); - return; - } - s1 = GF_CALLOC(1, size, gf_crypt_mt_data); - if (!s1) { - gf_log("crypt", GF_LOG_DEBUG, "Can not allocate stream "); - return; - } - s2 = GF_CALLOC(1, asize, gf_crypt_mt_data); - if (!s2) { - GF_FREE(s1); - gf_log("crypt", GF_LOG_DEBUG, "Can not allocate stream "); - return; - } - compound_stream(vec, cnt, s1, 0); - compound_stream(avec, acnt, s2, off_in_head); - if (memcmp(s1, s2, size)) - gf_log("crypt", GF_LOG_DEBUG, "chunks of different data"); - GF_FREE(s1); - GF_FREE(s2); -} - -#else -#define check_iovecs(vec, count, avec, avecn, off) noop -#endif /* DEBUG_CRYPT */ - -static char * -data_alloc_block(xlator_t *this, crypt_local_t *local, int32_t block_size) -{ - struct iobuf *iobuf = NULL; - - iobuf = iobuf_get2(this->ctx->iobuf_pool, block_size); - if (!iobuf) { - gf_log("crypt", GF_LOG_ERROR, "Failed to get iobuf"); - return NULL; - } - if (!local->iobref_data) { - local->iobref_data = iobref_new(); - if (!local->iobref_data) { - gf_log("crypt", GF_LOG_ERROR, "Failed to get iobref"); - iobuf_unref(iobuf); - return NULL; - } - } - iobref_add(local->iobref_data, iobuf); - return iobuf->ptr; -} - -/* - * Compound @avec, which represent the same data - * chunk as @vec, but has aligned components of - * specified block size. Alloc blocks, if needed. - * In particular, incomplete head and tail blocks - * must be allocated. - * Put number of allocated blocks to @num_blocks. - * - * Example: - * - * input: data chunk represented by 4 components - * [AB],[BC],[CD],[DE]; - * output: 5 logical blocks (0, 1, 2, 3, 4). - * - * A B C D E - * *-----*+------*-+---*----+--------+-* - * | || | | | | | | - * *-+-----+*------+-*---+----*--------*-+------* - * 0 1 2 3 4 - * - * 0 - incomplete compound (head); - * 1, 2 - full compound; - * 3 - full non-compound (the case of reuse); - * 4 - incomplete non-compound (tail). - */ -int32_t -align_iov_by_atoms(xlator_t *this, crypt_local_t *local, - struct object_cipher_info *object, - struct iovec *vec /* input vector */, - int32_t count /* number of vec components */, - struct iovec *avec /* aligned vector */, - char **blocks /* pool of blocks */, - uint32_t *blocks_allocated, struct avec_config *conf) -{ - int vecn = 0; /* number of the current component in vec */ - int avecn = 0; /* number of the current component in avec */ - off_t vec_off = 0; /* offset in the current vec component, - * i.e. the number of bytes have already - * been copied */ - int32_t block_size = get_atom_size(object); - size_t to_process; /* number of vec's bytes to copy and(or) re-use */ - int32_t off_in_head = conf->off_in_head; - - to_process = iov_length(vec, count); - - while (to_process > 0) { - if (off_in_head || vec[vecn].iov_len - vec_off < block_size) { - /* - * less than block_size: - * the case of incomplete (head or tail), - * or compound block - */ - size_t copied = 0; - /* - * populate the pool with a new block - */ - blocks[*blocks_allocated] = data_alloc_block(this, local, - block_size); - if (!blocks[*blocks_allocated]) - return -ENOMEM; - memset(blocks[*blocks_allocated], 0, off_in_head); - /* - * fill the block with vec components - */ - do { - size_t to_copy; - - to_copy = vec[vecn].iov_len - vec_off; - if (to_copy > block_size - off_in_head) - to_copy = block_size - off_in_head; - - memcpy(blocks[*blocks_allocated] + off_in_head + copied, - vec[vecn].iov_base + vec_off, to_copy); - - copied += to_copy; - to_process -= to_copy; - - vec_off += to_copy; - if (vec_off == vec[vecn].iov_len) { - /* finished with this vecn */ - vec_off = 0; - vecn++; - } - } while (copied < (block_size - off_in_head) && to_process > 0); - /* - * update avec - */ - avec[avecn].iov_len = off_in_head + copied; - avec[avecn].iov_base = blocks[*blocks_allocated]; - - (*blocks_allocated)++; - off_in_head = 0; - } else { - /* - * the rest of the current vec component - * is not less than block_size, so reuse - * the memory buffer of the component. - */ - size_t to_reuse; - to_reuse = (to_process > block_size ? block_size : to_process); - avec[avecn].iov_len = to_reuse; - avec[avecn].iov_base = vec[vecn].iov_base + vec_off; - - vec_off += to_reuse; - if (vec_off == vec[vecn].iov_len) { - /* finished with this vecn */ - vec_off = 0; - vecn++; - } - to_process -= to_reuse; - } - avecn++; - } - check_iovecs(vec, count, avec, avecn, conf->off_in_head); - return 0; -} - -/* - * allocate and setup aligned vector for data submission - * Pre-condition: @conf is set. - */ -int32_t -set_config_avec_data(xlator_t *this, crypt_local_t *local, - struct avec_config *conf, - struct object_cipher_info *object, struct iovec *vec, - int32_t vec_count) -{ - int32_t ret = ENOMEM; - struct iovec *avec; - char **pool; - uint32_t blocks_in_pool = 0; - - conf->type = DATA_ATOM; - - avec = GF_CALLOC(conf->acount, sizeof(*avec), gf_crypt_mt_iovec); - if (!avec) - return ret; - pool = GF_CALLOC(conf->acount, sizeof(*pool), gf_crypt_mt_char); - if (!pool) { - GF_FREE(avec); - return ret; - } - if (!vec) { - /* - * degenerated case: no data - */ - pool[0] = data_alloc_block(this, local, get_atom_size(object)); - if (!pool[0]) - goto free; - blocks_in_pool = 1; - avec->iov_base = pool[0]; - avec->iov_len = conf->off_in_tail; - } else { - ret = align_iov_by_atoms(this, local, object, vec, vec_count, avec, - pool, &blocks_in_pool, conf); - if (ret) - goto free; - } - conf->avec = avec; - conf->pool = pool; - conf->blocks_in_pool = blocks_in_pool; - return 0; -free: - GF_FREE(avec); - GF_FREE(pool); - return ret; -} - -/* - * allocate and setup aligned vector for hole submission - */ -int32_t -set_config_avec_hole(xlator_t *this, crypt_local_t *local, - struct avec_config *conf, - struct object_cipher_info *object, glusterfs_fop_t fop) -{ - uint32_t i, idx; - struct iovec *avec; - char **pool; - uint32_t num_blocks; - uint32_t blocks_in_pool = 0; - - conf->type = HOLE_ATOM; - - num_blocks = conf->acount - - (conf->nr_full_blocks ? conf->nr_full_blocks - 1 : 0); - - switch (fop) { - case GF_FOP_WRITE: - /* - * hole goes before data - */ - if (num_blocks == 1 && conf->off_in_tail != 0) - /* - * we won't submit a hole which fits into - * a data atom: this part of hole will be - * submitted with data write - */ - return 0; - break; - case GF_FOP_FTRUNCATE: - /* - * expanding truncate, hole goes after data, - * and will be submitted in any case. - */ - break; - default: - gf_log("crypt", GF_LOG_WARNING, "bad file operation %d", fop); - return 0; - } - avec = GF_CALLOC(num_blocks, sizeof(*avec), gf_crypt_mt_iovec); - if (!avec) - return ENOMEM; - pool = GF_CALLOC(num_blocks, sizeof(*pool), gf_crypt_mt_char); - if (!pool) { - GF_FREE(avec); - return ENOMEM; - } - for (i = 0; i < num_blocks; i++) { - pool[i] = data_alloc_block(this, local, get_atom_size(object)); - if (pool[i] == NULL) - goto free; - blocks_in_pool++; - } - if (has_head_block(conf)) { - /* set head block */ - idx = 0; - avec[idx].iov_base = pool[idx]; - avec[idx].iov_len = get_atom_size(object); - memset(avec[idx].iov_base + conf->off_in_head, 0, - get_atom_size(object) - conf->off_in_head); - } - if (has_tail_block(conf)) { - /* set tail block */ - idx = num_blocks - 1; - avec[idx].iov_base = pool[idx]; - avec[idx].iov_len = get_atom_size(object); - memset(avec[idx].iov_base, 0, conf->off_in_tail); - } - if (has_full_blocks(conf)) { - /* set full block */ - idx = conf->off_in_head ? 1 : 0; - avec[idx].iov_base = pool[idx]; - avec[idx].iov_len = get_atom_size(object); - /* - * since we re-use the buffer, - * zeroes will be set every time - * before encryption, see submit_full() - */ - } - conf->avec = avec; - conf->pool = pool; - conf->blocks_in_pool = blocks_in_pool; - return 0; -free: - GF_FREE(avec); - GF_FREE(pool); - return ENOMEM; -} - -/* A helper for setting up config of partial atoms (which - * participate in read-modify-write sequence). - * - * Calculate and setup precise amount of "extra-bytes" - * that should be uptodated at the end of partial (not - * necessarily tail!) block. - * - * Pre-condition: local->old_file_size is valid! - * @conf contains setup, which is enough for correct calculation - * of has_tail_block(), ->get_offset(). - */ -void -set_gap_at_end(call_frame_t *frame, struct object_cipher_info *object, - struct avec_config *conf, atom_data_type dtype) -{ - uint32_t to_block; - crypt_local_t *local = frame->local; - uint64_t old_file_size = local->old_file_size; - struct rmw_atom *partial = atom_by_types( - dtype, has_tail_block(conf) ? TAIL_ATOM : HEAD_ATOM); - - if (old_file_size <= partial->offset_at(frame, object)) - to_block = 0; - else { - to_block = old_file_size - partial->offset_at(frame, object); - if (to_block > get_atom_size(object)) - to_block = get_atom_size(object); - } - if (to_block > conf->off_in_tail) - conf->gap_in_tail = to_block - conf->off_in_tail; - else - /* - * nothing to uptodate - */ - conf->gap_in_tail = 0; -} - -/* - * fill struct avec_config with offsets layouts - */ -void -set_config_offsets(call_frame_t *frame, xlator_t *this, uint64_t offset, - uint64_t count, atom_data_type dtype, int32_t set_gap) -{ - crypt_local_t *local; - struct object_cipher_info *object; - struct avec_config *conf; - uint32_t resid; - - uint32_t atom_size; - uint32_t atom_bits; - - size_t orig_size; - off_t orig_offset; - size_t expanded_size; - off_t aligned_offset; - - uint32_t off_in_head = 0; - uint32_t off_in_tail = 0; - uint32_t nr_full_blocks; - int32_t size_full_blocks; - - uint32_t acount; /* number of aligned components to write. - * The same as number of occupied logical - * blocks (atoms) - */ - local = frame->local; - object = &local->info->cinfo; - conf = (dtype == DATA_ATOM ? get_data_conf(frame) : get_hole_conf(frame)); - - orig_offset = offset; - orig_size = count; - - atom_size = get_atom_size(object); - atom_bits = get_atom_bits(object); - - /* - * Round-down the start, - * round-up the end. - */ - resid = offset & (uint64_t)(atom_size - 1); - - if (resid) - off_in_head = resid; - aligned_offset = offset - off_in_head; - expanded_size = orig_size + off_in_head; - - /* calculate tail, - expand size forward */ - resid = (offset + orig_size) & (uint64_t)(atom_size - 1); - - if (resid) { - off_in_tail = resid; - expanded_size += (atom_size - off_in_tail); - } - /* - * calculate number of occupied blocks - */ - acount = expanded_size >> atom_bits; - /* - * calculate number of full blocks - */ - size_full_blocks = expanded_size; - if (off_in_head) - size_full_blocks -= atom_size; - if (off_in_tail && size_full_blocks > 0) - size_full_blocks -= atom_size; - nr_full_blocks = size_full_blocks >> atom_bits; - - conf->atom_size = atom_size; - conf->orig_size = orig_size; - conf->orig_offset = orig_offset; - conf->expanded_size = expanded_size; - conf->aligned_offset = aligned_offset; - - conf->off_in_head = off_in_head; - conf->off_in_tail = off_in_tail; - conf->nr_full_blocks = nr_full_blocks; - conf->acount = acount; - /* - * Finally, calculate precise amount of - * "extra-bytes" that should be uptodated - * at the end. - * Only if RMW is expected. - */ - if (off_in_tail && set_gap) - set_gap_at_end(frame, object, conf, dtype); -} - -struct data_cipher_alg data_cipher_algs[LAST_CIPHER_ALG][LAST_CIPHER_MODE] = { - [AES_CIPHER_ALG][XTS_CIPHER_MODE] = {.atomic = _gf_true, - .should_pad = _gf_true, - .blkbits = AES_BLOCK_BITS, - .init = aes_xts_init, - .set_private = set_private_aes_xts, - .check_key = check_key_aes_xts, - .set_iv = set_iv_aes_xts, - .encrypt = encrypt_aes_xts}}; - -/* - Local variables: - c-indentation-style: "K&R" - mode-name: "LC" - c-basic-offset: 8 - tab-width: 8 - fill-column: 80 - scroll-step: 1 - End: -*/ diff --git a/xlators/encryption/crypt/src/keys.c b/xlators/encryption/crypt/src/keys.c deleted file mode 100644 index 92a4d47..0000000 --- a/xlators/encryption/crypt/src/keys.c +++ /dev/null @@ -1,284 +0,0 @@ -/* - Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#include <glusterfs/defaults.h> -#include "crypt-common.h" -#include "crypt.h" - -/* Key hierarchy - - +----------------+ - | MASTER_VOL_KEY | - +-------+--------+ - | - | - +----------------+----------------+ - | | | - | | | - +-------+------+ +-------+-------+ +------+--------+ - | NMTD_VOL_KEY | | EMTD_FILE_KEY | | DATA_FILE_KEY | - +-------+------+ +---------------+ +---------------+ - | - | - +-------+-------+ - | NMTD_LINK_KEY | - +---------------+ - - */ - -#if DEBUG_CRYPT -static void -check_prf_iters(uint32_t num_iters) -{ - if (num_iters == 0) - gf_log("crypt", GF_LOG_DEBUG, "bad number of prf iterations : %d", - num_iters); -} -#else -#define check_prf_iters(num_iters) noop -#endif /* DEBUG_CRYPT */ - -unsigned char crypt_fake_oid[16] = {0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0}; - -/* - * derive key in the counter mode using - * sha256-based HMAC as PRF, see - * NIST Special Publication 800-108, 5.1) - */ - -#define PRF_OUTPUT_SIZE SHA256_DIGEST_LENGTH - -static int32_t -kderive_init(struct kderive_context *ctx, - const unsigned char *pkey, /* parent key */ - uint32_t pkey_size, /* parent key size */ - const unsigned char *idctx, /* id-context */ - uint32_t idctx_size, crypt_key_type type /* type of child key */) -{ - unsigned char *pos; - uint32_t llen = strlen(crypt_keys[type].label); - /* - * Compoud the fixed input data for KDF: - * [i]_2 || Label || 0x00 || Id-Context || [L]_2), - * NIST SP 800-108, 5.1 - */ - ctx->fid_len = sizeof(uint32_t) + llen + 1 + idctx_size + sizeof(uint32_t); - - ctx->fid = GF_CALLOC(ctx->fid_len, 1, gf_crypt_mt_key); - if (!ctx->fid) - return ENOMEM; - ctx->out_len = round_up(crypt_keys[type].len >> 3, PRF_OUTPUT_SIZE); - ctx->out = GF_CALLOC(ctx->out_len, 1, gf_crypt_mt_key); - if (!ctx->out) { - GF_FREE(ctx->fid); - return ENOMEM; - } - ctx->pkey = pkey; - ctx->pkey_len = pkey_size; - ctx->ckey_len = crypt_keys[type].len; - - pos = ctx->fid; - - /* counter will be set up in kderive_rfn() */ - pos += sizeof(uint32_t); - - memcpy(pos, crypt_keys[type].label, llen); - pos += llen; - - /* set up zero octet */ - *pos = 0; - pos += 1; - - memcpy(pos, idctx, idctx_size); - pos += idctx_size; - - *((uint32_t *)pos) = htobe32(ctx->ckey_len); - - return 0; -} - -static void -kderive_update(struct kderive_context *ctx) -{ - uint32_t i; -#if (OPENSSL_VERSION_NUMBER < 0x1010002f) - HMAC_CTX hctx; -#endif - HMAC_CTX *phctx = NULL; - unsigned char *pos = ctx->out; - uint32_t *p_iter = (uint32_t *)ctx->fid; - uint32_t num_iters = ctx->out_len / PRF_OUTPUT_SIZE; - - check_prf_iters(num_iters); - -#if (OPENSSL_VERSION_NUMBER < 0x1010002f) - HMAC_CTX_init(&hctx); - phctx = &hctx; -#else - phctx = HMAC_CTX_new(); - /* I guess we presume it was successful? */ -#endif - for (i = 0; i < num_iters; i++) { - /* - * update the iteration number in the fid - */ - *p_iter = htobe32(i); - HMAC_Init_ex(phctx, ctx->pkey, ctx->pkey_len >> 3, EVP_sha256(), NULL); - HMAC_Update(phctx, ctx->fid, ctx->fid_len); - HMAC_Final(phctx, pos, NULL); - - pos += PRF_OUTPUT_SIZE; - } -#if (OPENSSL_VERSION_NUMBER < 0x1010002f) - HMAC_CTX_cleanup(phctx); -#else - HMAC_CTX_free(phctx); -#endif -} - -static void -kderive_final(struct kderive_context *ctx, unsigned char *child) -{ - memcpy(child, ctx->out, ctx->ckey_len >> 3); - GF_FREE(ctx->fid); - GF_FREE(ctx->out); - memset(ctx, 0, sizeof(*ctx)); -} - -/* - * derive per-volume key for object ids aithentication - */ -int32_t -get_nmtd_vol_key(struct master_cipher_info *master) -{ - int32_t ret; - struct kderive_context ctx; - - ret = kderive_init(&ctx, master->m_key, master_key_size(), crypt_fake_oid, - sizeof(uuid_t), NMTD_VOL_KEY); - if (ret) - return ret; - kderive_update(&ctx); - kderive_final(&ctx, master->m_nmtd_key); - return 0; -} - -/* - * derive per-link key for aithentication of non-encrypted - * meta-data (nmtd) - */ -int32_t -get_nmtd_link_key(loc_t *loc, struct master_cipher_info *master, - unsigned char *result) -{ - int32_t ret; - struct kderive_context ctx; - - ret = kderive_init(&ctx, master->m_nmtd_key, nmtd_vol_key_size(), - (const unsigned char *)loc->path, strlen(loc->path), - NMTD_LINK_KEY); - if (ret) - return ret; - kderive_update(&ctx); - kderive_final(&ctx, result); - return 0; -} - -/* - * derive per-file key for encryption and authentication - * of encrypted part of metadata (emtd) - */ -int32_t -get_emtd_file_key(struct crypt_inode_info *info, - struct master_cipher_info *master, unsigned char *result) -{ - int32_t ret; - struct kderive_context ctx; - - ret = kderive_init(&ctx, master->m_key, master_key_size(), info->oid, - sizeof(uuid_t), EMTD_FILE_KEY); - if (ret) - return ret; - kderive_update(&ctx); - kderive_final(&ctx, result); - return 0; -} - -static int32_t -data_key_type_by_size(uint32_t keysize, crypt_key_type *type) -{ - int32_t ret = 0; - switch (keysize) { - case 256: - *type = DATA_FILE_KEY_256; - break; - case 512: - *type = DATA_FILE_KEY_512; - break; - default: - gf_log("crypt", GF_LOG_ERROR, "Unsupported data key size %d", - keysize); - ret = ENOTSUP; - break; - } - return ret; -} - -/* - * derive per-file key for data encryption - */ -int32_t -get_data_file_key(struct crypt_inode_info *info, - struct master_cipher_info *master, uint32_t keysize, - unsigned char *key) -{ - int32_t ret; - struct kderive_context ctx; - crypt_key_type type; - - ret = data_key_type_by_size(keysize, &type); - if (ret) - return ret; - ret = kderive_init(&ctx, master->m_key, master_key_size(), info->oid, - sizeof(uuid_t), type); - if (ret) - return ret; - kderive_update(&ctx); - kderive_final(&ctx, key); - return 0; -} - -/* - * NOTE: Don't change existing keys: it will break compatibility; - */ -struct crypt_key crypt_keys[LAST_KEY_TYPE] = { - [MASTER_VOL_KEY] = - { - .len = MASTER_VOL_KEY_SIZE << 3, - .label = "volume-master", - }, - [NMTD_VOL_KEY] = {.len = NMTD_VOL_KEY_SIZE << 3, - .label = "volume-nmtd-key-generation"}, - [NMTD_LINK_KEY] = {.len = 128, .label = "link-nmtd-authentication"}, - [EMTD_FILE_KEY] = {.len = 128, .label = "file-emtd-encryption-and-auth"}, - [DATA_FILE_KEY_256] = {.len = 256, .label = "file-data-encryption-256"}, - [DATA_FILE_KEY_512] = {.len = 512, .label = "file-data-encryption-512"}}; - -/* - Local variables: - c-indentation-style: "K&R" - mode-name: "LC" - c-basic-offset: 8 - tab-width: 8 - fill-column: 80 - scroll-step: 1 - End: -*/ diff --git a/xlators/encryption/crypt/src/metadata.c b/xlators/encryption/crypt/src/metadata.c deleted file mode 100644 index 120ae62..0000000 --- a/xlators/encryption/crypt/src/metadata.c +++ /dev/null @@ -1,575 +0,0 @@ -/* - Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#include <glusterfs/defaults.h> -#include "crypt-common.h" -#include "crypt.h" -#include "metadata.h" - -int32_t -alloc_format(crypt_local_t *local, size_t size) -{ - if (size > 0) { - local->format = GF_CALLOC(1, size, gf_crypt_mt_mtd); - if (!local->format) - return ENOMEM; - } - local->format_size = size; - return 0; -} - -int32_t -alloc_format_create(crypt_local_t *local) -{ - return alloc_format(local, new_format_size()); -} - -void -free_format(crypt_local_t *local) -{ - GF_FREE(local->format); -} - -/* - * Check compatibility with extracted metadata - */ -static int32_t -check_file_metadata(struct crypt_inode_info *info) -{ - struct object_cipher_info *object = &info->cinfo; - - if (info->nr_minor != CRYPT_XLATOR_ID) { - gf_log("crypt", GF_LOG_WARNING, "unsupported minor subversion %d", - info->nr_minor); - return EINVAL; - } - if (object->o_alg > LAST_CIPHER_ALG) { - gf_log("crypt", GF_LOG_WARNING, "unsupported cipher algorithm %d", - object->o_alg); - return EINVAL; - } - if (object->o_mode > LAST_CIPHER_MODE) { - gf_log("crypt", GF_LOG_WARNING, "unsupported cipher mode %d", - object->o_mode); - return EINVAL; - } - if (object->o_block_bits < CRYPT_MIN_BLOCK_BITS || - object->o_block_bits > CRYPT_MAX_BLOCK_BITS) { - gf_log("crypt", GF_LOG_WARNING, "unsupported block bits %d", - object->o_block_bits); - return EINVAL; - } - /* TBD: check data key size */ - return 0; -} - -static size_t -format_size_v1(mtd_op_t op, size_t old_size) -{ - switch (op) { - case MTD_CREATE: - return sizeof(struct mtd_format_v1); - case MTD_OVERWRITE: - return old_size; - case MTD_APPEND: - return old_size + NMTD_8_MAC_SIZE; - case MTD_CUT: - if (old_size > sizeof(struct mtd_format_v1)) - return old_size - NMTD_8_MAC_SIZE; - else - return 0; - default: - gf_log("crypt", GF_LOG_WARNING, "Bad mtd operation"); - return 0; - } -} - -/* - * Calculate size of the updated format string. - * Returned zero means that we don't need to update the format string. - */ -size_t -format_size(mtd_op_t op, size_t old_size) -{ - size_t versioned; - - versioned = mtd_loaders[current_mtd_loader()].format_size( - op, old_size - sizeof(struct crypt_format)); - if (versioned != 0) - return versioned + sizeof(struct crypt_format); - return 0; -} - -/* - * size of the format string of newly created file (nr_links = 1) - */ -size_t -new_format_size(void) -{ - return format_size(MTD_CREATE, 0); -} - -/* - * Calculate per-link MAC by pathname - */ -static int32_t -calc_link_mac_v1(struct mtd_format_v1 *fmt, loc_t *loc, unsigned char *result, - struct crypt_inode_info *info, - struct master_cipher_info *master) -{ - int32_t ret; - unsigned char nmtd_link_key[16]; - CMAC_CTX *cctx; - size_t len; - - ret = get_nmtd_link_key(loc, master, nmtd_link_key); - if (ret) { - gf_log("crypt", GF_LOG_ERROR, "Can not get nmtd link key"); - return -1; - } - cctx = CMAC_CTX_new(); - if (!cctx) { - gf_log("crypt", GF_LOG_ERROR, "CMAC_CTX_new failed"); - return -1; - } - ret = CMAC_Init(cctx, nmtd_link_key, sizeof(nmtd_link_key), - EVP_aes_128_cbc(), 0); - if (!ret) { - gf_log("crypt", GF_LOG_ERROR, "CMAC_Init failed"); - CMAC_CTX_free(cctx); - return -1; - } - ret = CMAC_Update(cctx, get_NMTD_V1(info), SIZE_OF_NMTD_V1); - if (!ret) { - gf_log("crypt", GF_LOG_ERROR, "CMAC_Update failed"); - CMAC_CTX_free(cctx); - return -1; - } - ret = CMAC_Final(cctx, result, &len); - CMAC_CTX_free(cctx); - if (!ret) { - gf_log("crypt", GF_LOG_ERROR, "CMAC_Final failed"); - return -1; - } - return 0; -} - -/* - * Create per-link MAC of index @idx by pathname - */ -static int32_t -create_link_mac_v1(struct mtd_format_v1 *fmt, uint32_t idx, loc_t *loc, - struct crypt_inode_info *info, - struct master_cipher_info *master) -{ - int32_t ret; - unsigned char *mac; - unsigned char cmac[16]; - - mac = get_NMTD_V1_MAC(fmt) + idx * SIZE_OF_NMTD_V1_MAC; - - ret = calc_link_mac_v1(fmt, loc, cmac, info, master); - if (ret) - return -1; - memcpy(mac, cmac, SIZE_OF_NMTD_V1_MAC); - return 0; -} - -static int32_t -create_format_v1(unsigned char *wire, loc_t *loc, struct crypt_inode_info *info, - struct master_cipher_info *master) -{ - int32_t ret; - struct mtd_format_v1 *fmt; - unsigned char mtd_key[16]; - AES_KEY EMTD_KEY; - unsigned char nmtd_link_key[16]; - uint32_t ad; - GCM128_CONTEXT *gctx; - - fmt = (struct mtd_format_v1 *)wire; - - fmt->minor_id = info->nr_minor; - fmt->alg_id = AES_CIPHER_ALG; - fmt->dkey_factor = master->m_dkey_size >> KEY_FACTOR_BITS; - fmt->block_bits = master->m_block_bits; - fmt->mode_id = master->m_mode; - /* - * retrieve keys for the parts of metadata - */ - ret = get_emtd_file_key(info, master, mtd_key); - if (ret) - return ret; - ret = get_nmtd_link_key(loc, master, nmtd_link_key); - if (ret) - return ret; - - AES_set_encrypt_key(mtd_key, sizeof(mtd_key) * 8, &EMTD_KEY); - - gctx = CRYPTO_gcm128_new(&EMTD_KEY, (block128_f)AES_encrypt); - - /* TBD: Check return values */ - - CRYPTO_gcm128_setiv(gctx, info->oid, sizeof(uuid_t)); - - ad = htole32(MTD_LOADER_V1); - ret = CRYPTO_gcm128_aad(gctx, (const unsigned char *)&ad, sizeof(ad)); - if (ret) { - gf_log("crypt", GF_LOG_ERROR, " CRYPTO_gcm128_aad failed"); - CRYPTO_gcm128_release(gctx); - return ret; - } - ret = CRYPTO_gcm128_encrypt(gctx, get_EMTD_V1(fmt), get_EMTD_V1(fmt), - SIZE_OF_EMTD_V1); - if (ret) { - gf_log("crypt", GF_LOG_ERROR, " CRYPTO_gcm128_encrypt failed"); - CRYPTO_gcm128_release(gctx); - return ret; - } - /* - * set MAC of encrypted part of metadata - */ - CRYPTO_gcm128_tag(gctx, get_EMTD_V1_MAC(fmt), SIZE_OF_EMTD_V1_MAC); - CRYPTO_gcm128_release(gctx); - /* - * set the first MAC of non-encrypted part of metadata - */ - return create_link_mac_v1(fmt, 0, loc, info, master); -} - -/* - * Called by fops: - * ->create(); - * ->link(); - * - * Pack common and version-specific parts of file's metadata - * Pre-conditions: @info contains valid object-id. - */ -int32_t -create_format(unsigned char *wire, loc_t *loc, struct crypt_inode_info *info, - struct master_cipher_info *master) -{ - struct crypt_format *fmt = (struct crypt_format *)wire; - - fmt->loader_id = current_mtd_loader(); - - wire += sizeof(struct crypt_format); - return mtd_loaders[current_mtd_loader()].create_format(wire, loc, info, - master); -} - -/* - * Append or overwrite per-link mac of @mac_idx index - * in accordance with the new pathname - */ -int32_t -appov_link_mac_v1(unsigned char *new, unsigned char *old, uint32_t old_size, - int32_t mac_idx, loc_t *loc, struct crypt_inode_info *info, - struct master_cipher_info *master, crypt_local_t *local) -{ - memcpy(new, old, old_size); - return create_link_mac_v1((struct mtd_format_v1 *)new, mac_idx, loc, info, - master); -} - -/* - * Cut per-link mac of @mac_idx index - */ -static int32_t -cut_link_mac_v1(unsigned char *new, unsigned char *old, uint32_t old_size, - int32_t mac_idx, loc_t *loc, struct crypt_inode_info *info, - struct master_cipher_info *master, crypt_local_t *local) -{ - memcpy(new, old, - sizeof(struct mtd_format_v1) + NMTD_8_MAC_SIZE * (mac_idx - 1)); - - memcpy( - new + sizeof(struct mtd_format_v1) + NMTD_8_MAC_SIZE *(mac_idx - 1), - old + sizeof(struct mtd_format_v1) + NMTD_8_MAC_SIZE * mac_idx, - old_size - (sizeof(struct mtd_format_v1) + NMTD_8_MAC_SIZE * mac_idx)); - return 0; -} - -int32_t -update_format_v1(unsigned char *new, unsigned char *old, size_t old_len, - int32_t mac_idx, /* of old name */ - mtd_op_t op, loc_t *loc, struct crypt_inode_info *info, - struct master_cipher_info *master, crypt_local_t *local) -{ - switch (op) { - case MTD_APPEND: - mac_idx = 1 + (old_len - sizeof(struct mtd_format_v1)) / 8; - case MTD_OVERWRITE: - return appov_link_mac_v1(new, old, old_len, mac_idx, loc, info, - master, local); - case MTD_CUT: - return cut_link_mac_v1(new, old, old_len, mac_idx, loc, info, - master, local); - default: - gf_log("crypt", GF_LOG_ERROR, "Bad mtd operation %d", op); - return -1; - } -} - -/* - * Called by fops: - * - * ->link() - * ->unlink() - * ->rename() - * - */ -int32_t -update_format(unsigned char *new, unsigned char *old, size_t old_len, - int32_t mac_idx, mtd_op_t op, loc_t *loc, - struct crypt_inode_info *info, struct master_cipher_info *master, - crypt_local_t *local) -{ - if (!new) - return 0; - memcpy(new, old, sizeof(struct crypt_format)); - - old += sizeof(struct crypt_format); - new += sizeof(struct crypt_format); - old_len -= sizeof(struct crypt_format); - - return mtd_loaders[current_mtd_loader()].update_format( - new, old, old_len, mac_idx, op, loc, info, master, local); -} - -/* - * Perform preliminary checks of found metadata - * Return < 0 on errors; - * Return number of object-id MACs (>= 1) on success - */ -int32_t -check_format_v1(uint32_t len, unsigned char *wire) -{ - uint32_t nr_links; - - if (len < sizeof(struct mtd_format_v1)) { - gf_log("crypt", GF_LOG_ERROR, "v1-loader: bad metadata size %d", len); - goto error; - } - len -= sizeof(struct mtd_format_v1); - if (len % sizeof(nmtd_8_mac_t)) { - gf_log("crypt", GF_LOG_ERROR, "v1-loader: bad metadata format"); - goto error; - } - nr_links = 1 + len / sizeof(nmtd_8_mac_t); - if (nr_links > _POSIX_LINK_MAX) - goto error; - return nr_links; -error: - return EIO; -} - -/* - * Verify per-link MAC specified by index @idx - * - * return: - * -1 on errors; - * 0 on failed verification; - * 1 on successful verification - */ -static int32_t -verify_link_mac_v1(struct mtd_format_v1 *fmt, - uint32_t idx /* index of the mac to verify */, loc_t *loc, - struct crypt_inode_info *info, - struct master_cipher_info *master) -{ - int32_t ret; - unsigned char *mac; - unsigned char cmac[16]; - - mac = get_NMTD_V1_MAC(fmt) + idx * SIZE_OF_NMTD_V1_MAC; - - ret = calc_link_mac_v1(fmt, loc, cmac, info, master); - if (ret) - return -1; - if (memcmp(cmac, mac, SIZE_OF_NMTD_V1_MAC)) - return 0; - return 1; -} - -/* - * Lookup per-link MAC by pathname. - * - * return index of the MAC, if it was found; - * return < 0 on errors, or if the MAC wasn't found - */ -static int32_t -lookup_link_mac_v1(struct mtd_format_v1 *fmt, uint32_t nr_macs, loc_t *loc, - struct crypt_inode_info *info, - struct master_cipher_info *master) -{ - int32_t ret; - uint32_t idx; - - for (idx = 0; idx < nr_macs; idx++) { - ret = verify_link_mac_v1(fmt, idx, loc, info, master); - if (ret < 0) - return ret; - if (ret > 0) - return idx; - } - return -ENOENT; -} - -/* - * Extract version-specific part of metadata - */ -static int32_t -open_format_v1(unsigned char *wire, int32_t len, loc_t *loc, - struct crypt_inode_info *info, struct master_cipher_info *master, - crypt_local_t *local, gf_boolean_t load_info) -{ - int32_t ret; - int32_t num_nmtd_macs; - struct mtd_format_v1 *fmt; - unsigned char mtd_key[16]; - AES_KEY EMTD_KEY; - GCM128_CONTEXT *gctx; - uint32_t ad; - emtd_8_mac_t gmac; - struct object_cipher_info *object; - - num_nmtd_macs = check_format_v1(len, wire); - if (num_nmtd_macs <= 0) - return EIO; - - ret = lookup_link_mac_v1((struct mtd_format_v1 *)wire, num_nmtd_macs, loc, - info, master); - if (ret < 0) { - gf_log("crypt", GF_LOG_ERROR, "NMTD verification failed"); - return EINVAL; - } - - local->mac_idx = ret; - if (load_info == _gf_false) - /* the case of partial open */ - return 0; - - fmt = GF_MALLOC(len, gf_crypt_mt_mtd); - if (!fmt) - return ENOMEM; - memcpy(fmt, wire, len); - - object = &info->cinfo; - - ret = get_emtd_file_key(info, master, mtd_key); - if (ret) { - gf_log("crypt", GF_LOG_ERROR, "Can not retrieve metadata key"); - goto out; - } - /* - * decrypt encrypted meta-data - */ - ret = AES_set_encrypt_key(mtd_key, sizeof(mtd_key) * 8, &EMTD_KEY); - if (ret < 0) { - gf_log("crypt", GF_LOG_ERROR, "Can not set encrypt key"); - ret = EIO; - goto out; - } - gctx = CRYPTO_gcm128_new(&EMTD_KEY, (block128_f)AES_encrypt); - if (!gctx) { - gf_log("crypt", GF_LOG_ERROR, "Can not alloc gcm context"); - ret = ENOMEM; - goto out; - } - CRYPTO_gcm128_setiv(gctx, info->oid, sizeof(uuid_t)); - - ad = htole32(MTD_LOADER_V1); - ret = CRYPTO_gcm128_aad(gctx, (const unsigned char *)&ad, sizeof(ad)); - if (ret) { - gf_log("crypt", GF_LOG_ERROR, " CRYPTO_gcm128_aad failed"); - CRYPTO_gcm128_release(gctx); - ret = EIO; - goto out; - } - ret = CRYPTO_gcm128_decrypt(gctx, get_EMTD_V1(fmt), get_EMTD_V1(fmt), - SIZE_OF_EMTD_V1); - if (ret) { - gf_log("crypt", GF_LOG_ERROR, " CRYPTO_gcm128_decrypt failed"); - CRYPTO_gcm128_release(gctx); - ret = EIO; - goto out; - } - /* - * verify metadata - */ - CRYPTO_gcm128_tag(gctx, gmac, sizeof(gmac)); - CRYPTO_gcm128_release(gctx); - if (memcmp(gmac, get_EMTD_V1_MAC(fmt), SIZE_OF_EMTD_V1_MAC)) { - gf_log("crypt", GF_LOG_ERROR, "EMTD verification failed"); - ret = EINVAL; - goto out; - } - /* - * load verified metadata to the private part of inode - */ - info->nr_minor = fmt->minor_id; - - object->o_alg = fmt->alg_id; - object->o_dkey_size = fmt->dkey_factor << KEY_FACTOR_BITS; - object->o_block_bits = fmt->block_bits; - object->o_mode = fmt->mode_id; - - ret = check_file_metadata(info); -out: - GF_FREE(fmt); - return ret; -} - -/* - * perform metadata authentication against @loc->path; - * extract crypt-specific attribute and populate @info - * with them (optional) - */ -int32_t -open_format(unsigned char *str, int32_t len, loc_t *loc, - struct crypt_inode_info *info, struct master_cipher_info *master, - crypt_local_t *local, gf_boolean_t load_info) -{ - struct crypt_format *fmt; - if (len < sizeof(*fmt)) { - gf_log("crypt", GF_LOG_ERROR, "Bad core format"); - return EIO; - } - fmt = (struct crypt_format *)str; - - if (fmt->loader_id >= LAST_MTD_LOADER) { - gf_log("crypt", GF_LOG_ERROR, "Unsupported loader id %d", - fmt->loader_id); - return EINVAL; - } - str += sizeof(*fmt); - len -= sizeof(*fmt); - - return mtd_loaders[fmt->loader_id].open_format(str, len, loc, info, master, - local, load_info); -} - -struct crypt_mtd_loader mtd_loaders[LAST_MTD_LOADER] = { - [MTD_LOADER_V1] = {.format_size = format_size_v1, - .create_format = create_format_v1, - .open_format = open_format_v1, - .update_format = update_format_v1}}; - -/* - Local variables: - c-indentation-style: "K&R" - mode-name: "LC" - c-basic-offset: 8 - tab-width: 8 - fill-column: 80 - scroll-step: 1 - End: -*/ diff --git a/xlators/encryption/crypt/src/metadata.h b/xlators/encryption/crypt/src/metadata.h deleted file mode 100644 index 0bcee1b..0000000 --- a/xlators/encryption/crypt/src/metadata.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#ifndef __METADATA_H__ -#define __METADATA_H__ - -#define NMTD_8_MAC_SIZE (8) -#define EMTD_8_MAC_SIZE (8) |