summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/dht
diff options
context:
space:
mode:
authorAmar Tumballi <amarts@redhat.com>2018-12-06 12:29:25 +0530
committerAmar Tumballi <amarts@redhat.com>2018-12-13 17:10:00 +0000
commit8293d21280fd6ddfc9bb54068cf87794fc6be207 (patch)
tree39729fb407b436ed0cc3e4a9f4e5bbd29036a9db /xlators/cluster/dht
parentaf7e957b4954bd84b8f7df6bfbd59c939092ead2 (diff)
all: remove code which is not being considered in build
These xlators are now removed from build as per discussion/announcement done at https://lists.gluster.org/pipermail/gluster-users/2018-July/034400.html * move rot-13 to playground, as it is used only as demo purpose, and is documented in many places. * Removed code of below xlators: - cluster/stripe - cluster/tier - features/changetimerecorder - features/glupy - performance/symlink-cache - encryption/crypt - storage/bd - experimental/posix2 - experimental/dht2 - experimental/fdl - experimental/jbr updates: bz#1635688 Change-Id: I1d2d63c32535e149bc8dcb2daa76236c707996e8 Signed-off-by: Amar Tumballi <amarts@redhat.com>
Diffstat (limited to 'xlators/cluster/dht')
-rw-r--r--xlators/cluster/dht/src/tier-common.c1199
-rw-r--r--xlators/cluster/dht/src/tier-common.h55
-rw-r--r--xlators/cluster/dht/src/tier.c3090
-rw-r--r--xlators/cluster/dht/src/tier.h110
-rw-r--r--xlators/cluster/dht/src/tier.sym9
5 files changed, 0 insertions, 4463 deletions
diff --git a/xlators/cluster/dht/src/tier-common.c b/xlators/cluster/dht/src/tier-common.c
deleted file mode 100644
index b22f4776ada..00000000000
--- a/xlators/cluster/dht/src/tier-common.c
+++ /dev/null
@@ -1,1199 +0,0 @@
-/*
- Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#include <glusterfs/glusterfs.h>
-#include <glusterfs/xlator.h>
-#include "libxlator.h"
-#include "dht-common.h"
-#include <glusterfs/defaults.h>
-#include "tier-common.h"
-#include "tier.h"
-
-int
-dht_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata);
-
-int
-tier_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
-{
- dht_local_t *local = NULL;
- loc_t *oldloc = NULL;
- loc_t *newloc = NULL;
-
- local = frame->local;
-
- oldloc = &local->loc;
- newloc = &local->loc2;
-
- if (op_ret == -1) {
- /* No continuation on DHT inode missing errors, as we should
- * then have a good stbuf that states P2 happened. We would
- * get inode missing if, the file completed migrated between
- * the lookup and the link call */
- goto out;
- }
-
- if (local->call_cnt != 1) {
- goto out;
- }
-
- local->call_cnt = 2;
-
- /* Do this on the hot tier now */
-
- STACK_WIND(frame, tier_link_cbk, local->cached_subvol,
- local->cached_subvol->fops->link, oldloc, newloc, xdata);
-
- return 0;
-
-out:
- DHT_STRIP_PHASE1_FLAGS(stbuf);
-
- DHT_STACK_UNWIND(link, frame, op_ret, op_errno, inode, stbuf, preparent,
- postparent, NULL);
-
- return 0;
-}
-
-int
-tier_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
- dict_t *xdata)
-{
- xlator_t *cached_subvol = NULL;
- xlator_t *hashed_subvol = NULL;
- int op_errno = -1;
- int ret = -1;
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(oldloc, err);
- VALIDATE_OR_GOTO(newloc, err);
-
- conf = this->private;
-
- local = dht_local_init(frame, oldloc, NULL, GF_FOP_LINK);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->call_cnt = 1;
-
- cached_subvol = local->cached_subvol;
-
- if (!cached_subvol) {
- gf_msg_debug(this->name, 0, "no cached subvolume for path=%s",
- oldloc->path);
- op_errno = ENOENT;
- goto err;
- }
-
- hashed_subvol = TIER_HASHED_SUBVOL;
-
- ret = loc_copy(&local->loc2, newloc);
- if (ret == -1) {
- op_errno = ENOMEM;
- goto err;
- }
-
- if (hashed_subvol == cached_subvol) {
- STACK_WIND(frame, dht_link_cbk, cached_subvol,
- cached_subvol->fops->link, oldloc, newloc, xdata);
- return 0;
- }
-
- /* Create hardlinks to both the data file on the hot tier
- and the linkto file on the cold tier */
-
- gf_uuid_copy(local->gfid, oldloc->inode->gfid);
-
- STACK_WIND(frame, tier_link_cbk, hashed_subvol, hashed_subvol->fops->link,
- oldloc, newloc, xdata);
-
- return 0;
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND(link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
- return 0;
-}
-
-int
-tier_create_unlink_stale_linkto_cbk(call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- dht_local_t *local = NULL;
-
- local = frame->local;
-
- if (local->params) {
- dict_del(local->params, GLUSTERFS_INTERNAL_FOP_KEY);
- }
-
- DHT_STACK_UNWIND(create, frame, -1, local->op_errno, NULL, NULL, NULL, NULL,
- NULL, NULL);
-
- return 0;
-}
-
-int
-tier_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, fd_t *fd, inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
-{
- xlator_t *prev = NULL;
- int ret = -1;
- dht_local_t *local = NULL;
- xlator_t *hashed_subvol = NULL;
- dht_conf_t *conf = NULL;
-
- local = frame->local;
- conf = this->private;
-
- hashed_subvol = TIER_HASHED_SUBVOL;
-
- if (!local) {
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
-
- if (op_ret == -1) {
- if (local->linked == _gf_true && local->xattr_req) {
- local->op_errno = op_errno;
- local->op_ret = op_ret;
- ret = dht_fill_dict_to_avoid_unlink_of_migrating_file(
- local->xattr_req);
- if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value to "
- "unlink of migrating file");
- goto out;
- }
-
- STACK_WIND(frame, tier_create_unlink_stale_linkto_cbk,
- hashed_subvol, hashed_subvol->fops->unlink, &local->loc,
- 0, local->xattr_req);
- return 0;
- }
- goto out;
- }
-
- prev = cookie;
-
- if (local->loc.parent) {
- dht_inode_ctx_time_update(local->loc.parent, this, preparent, 0);
-
- dht_inode_ctx_time_update(local->loc.parent, this, postparent, 1);
- }
-
- ret = dht_layout_preset(this, prev, inode);
- if (ret != 0) {
- gf_msg_debug(this->name, 0, "could not set preset layout for subvol %s",
- prev->name);
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
-
- local->op_errno = op_errno;
-
- if (local->linked == _gf_true) {
- local->stbuf = *stbuf;
- dht_linkfile_attr_heal(frame, this);
- }
-out:
- if (local) {
- if (local->xattr_req) {
- dict_del(local->xattr_req, TIER_LINKFILE_GFID);
- }
- }
-
- DHT_STRIP_PHASE1_FLAGS(stbuf);
-
- DHT_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, stbuf,
- preparent, postparent, xdata);
-
- return 0;
-}
-
-int
-tier_create_linkfile_create_cbk(call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, inode_t *inode,
- struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- dht_local_t *local = NULL;
- xlator_t *cached_subvol = NULL;
- dht_conf_t *conf = NULL;
- int ret = -1;
- unsigned char *gfid = NULL;
-
- local = frame->local;
- if (!local) {
- op_errno = EINVAL;
- goto err;
- }
-
- if (op_ret == -1) {
- local->op_errno = op_errno;
- goto err;
- }
-
- conf = this->private;
- if (!conf) {
- local->op_errno = EINVAL;
- op_errno = EINVAL;
- goto err;
- }
-
- cached_subvol = TIER_UNHASHED_SUBVOL;
-
- if (local->params) {
- dict_del(local->params, conf->link_xattr_name);
- dict_del(local->params, GLUSTERFS_INTERNAL_FOP_KEY);
- }
-
- /*
- * We will delete the linkfile if data file creation fails.
- * When deleting this stale linkfile, there is a possibility
- * for a race between this linkfile deletion and a stale
- * linkfile deletion triggered by another lookup from different
- * client.
- *
- * For eg:
- *
- * Client 1 Client 2
- *
- * 1 linkfile created for foo
- *
- * 2 data file creation failed
- *
- * 3 creating a file with same name
- *
- * 4 lookup before creation deleted
- * the linkfile created by client1
- * considering as a stale linkfile.
- *
- * 5 New linkfile created for foo
- * with different gfid.
- *
- * 6 Trigger linkfile deletion as
- * data file creation failed.
- *
- * 7 Linkfile deleted which is
- * created by client2.
- *
- * 8 Data file created.
- *
- * With this race, we will end up having a file in a non-hashed subvol
- * without a linkfile in hashed subvol.
- *
- * To avoid this, we store the gfid of linkfile created by client, So
- * If we delete the linkfile , we validate gfid of existing file with
- * stored value from posix layer.
- *
- * Storing this value in local->xattr_req as local->params was also used
- * to create the data file. During the linkfile deletion we will use
- * local->xattr_req dictionary.
- */
- if (!local->xattr_req) {
- local->xattr_req = dict_new();
- if (!local->xattr_req) {
- local->op_errno = ENOMEM;
- op_errno = ENOMEM;
- goto err;
- }
- }
-
- gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_char);
- if (!gfid) {
- local->op_errno = ENOMEM;
- op_errno = ENOMEM;
- goto err;
- }
-
- gf_uuid_copy(gfid, stbuf->ia_gfid);
- ret = dict_set_dynptr(local->xattr_req, TIER_LINKFILE_GFID, gfid,
- sizeof(uuid_t));
- if (ret) {
- GF_FREE(gfid);
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value"
- " : key = %s",
- TIER_LINKFILE_GFID);
- }
-
- STACK_WIND_COOKIE(frame, tier_create_cbk, cached_subvol, cached_subvol,
- cached_subvol->fops->create, &local->loc, local->flags,
- local->mode, local->umask, local->fd, local->params);
-
- return 0;
-err:
- DHT_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL,
- NULL);
- return 0;
-}
-
-gf_boolean_t
-tier_is_hot_tier_decommissioned(xlator_t *this)
-{
- dht_conf_t *conf = NULL;
- xlator_t *hot_tier = NULL;
- int i = 0;
-
- conf = this->private;
- hot_tier = conf->subvolumes[1];
-
- if (conf->decommission_subvols_cnt) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (conf->decommissioned_bricks[i] &&
- conf->decommissioned_bricks[i] == hot_tier)
- return _gf_true;
- }
- }
-
- return _gf_false;
-}
-
-int
-tier_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, mode_t umask, fd_t *fd, dict_t *params)
-{
- int op_errno = -1;
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- xlator_t *hot_subvol = NULL;
- xlator_t *cold_subvol = NULL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
-
- conf = this->private;
-
- dht_get_du_info(frame, this, loc);
-
- local = dht_local_init(frame, loc, fd, GF_FOP_CREATE);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- cold_subvol = TIER_HASHED_SUBVOL;
- hot_subvol = TIER_UNHASHED_SUBVOL;
-
- if (conf->subvolumes[0] != cold_subvol) {
- hot_subvol = conf->subvolumes[0];
- }
- /*
- * if hot tier full, write to cold.
- * Also if hot tier is full, create in cold
- */
- if (dht_is_subvol_filled(this, hot_subvol) ||
- tier_is_hot_tier_decommissioned(this)) {
- gf_msg_debug(this->name, 0, "creating %s on %s", loc->path,
- cold_subvol->name);
-
- STACK_WIND_COOKIE(frame, tier_create_cbk, cold_subvol, cold_subvol,
- cold_subvol->fops->create, loc, flags, mode, umask,
- fd, params);
- } else {
- local->params = dict_ref(params);
- local->flags = flags;
- local->mode = mode;
- local->umask = umask;
- local->cached_subvol = hot_subvol;
- local->hashed_subvol = cold_subvol;
-
- gf_msg_debug(this->name, 0, "creating %s on %s (link at %s)", loc->path,
- hot_subvol->name, cold_subvol->name);
-
- dht_linkfile_create(frame, tier_create_linkfile_create_cbk, this,
- hot_subvol, cold_subvol, loc);
-
- goto out;
- }
-out:
- return 0;
-
-err:
-
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL,
- NULL);
-
- return 0;
-}
-
-int
-tier_unlink_nonhashed_linkfile_cbk(call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- dht_local_t *local = NULL;
- xlator_t *prev = NULL;
-
- local = frame->local;
- prev = cookie;
-
- LOCK(&frame->lock);
- {
- if ((op_ret == -1) && (op_errno != ENOENT)) {
- local->op_errno = op_errno;
- local->op_ret = op_ret;
- gf_msg_debug(this->name, op_errno,
- "Unlink link: subvolume %s"
- " returned -1",
- prev->name);
- goto unlock;
- }
-
- local->op_ret = 0;
- }
-unlock:
- UNLOCK(&frame->lock);
-
- if (local->op_ret == -1)
- goto err;
- DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
- &local->preparent, &local->postparent, NULL);
-
- return 0;
-
-err:
- DHT_STACK_UNWIND(unlink, frame, -1, local->op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int
-tier_unlink_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, inode_t *inode,
- struct iatt *preparent, dict_t *xdata,
- struct iatt *postparent)
-{
- dht_local_t *local = NULL;
- xlator_t *prev = NULL;
- dht_conf_t *conf = NULL;
- xlator_t *hot_subvol = NULL;
-
- local = frame->local;
- prev = cookie;
- conf = this->private;
- hot_subvol = TIER_UNHASHED_SUBVOL;
-
- if (!op_ret) {
- /*
- * linkfile present on hot tier. unlinking the linkfile
- */
- STACK_WIND_COOKIE(frame, tier_unlink_nonhashed_linkfile_cbk, hot_subvol,
- hot_subvol, hot_subvol->fops->unlink, &local->loc,
- local->flags, NULL);
- return 0;
- }
-
- LOCK(&frame->lock);
- {
- if (op_errno == ENOENT) {
- local->op_ret = 0;
- local->op_errno = op_errno;
- } else {
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- }
- gf_msg_debug(this->name, op_errno, "Lookup : subvolume %s returned -1",
- prev->name);
- }
-
- UNLOCK(&frame->lock);
-
- DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
- &local->preparent, &local->postparent, xdata);
-
- return 0;
-}
-
-int
-tier_unlink_linkfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- dht_local_t *local = NULL;
- xlator_t *prev = NULL;
-
- local = frame->local;
- prev = cookie;
-
- LOCK(&frame->lock);
- {
- /* Ignore EINVAL for tier to ignore error when the file
- does not exist on the other tier */
- if ((op_ret == -1) && !((op_errno == ENOENT) || (op_errno == EINVAL))) {
- local->op_errno = op_errno;
- local->op_ret = op_ret;
- gf_msg_debug(this->name, op_errno,
- "Unlink link: subvolume %s"
- " returned -1",
- prev->name);
- goto unlock;
- }
-
- local->op_ret = 0;
- }
-unlock:
- UNLOCK(&frame->lock);
-
- if (local->op_ret == -1)
- goto err;
-
- DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
- &local->preparent, &local->postparent, xdata);
-
- return 0;
-
-err:
- DHT_STACK_UNWIND(unlink, frame, -1, local->op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-tier_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
-{
- dht_local_t *local = NULL;
- xlator_t *prev = NULL;
- struct iatt *stbuf = NULL;
- dht_conf_t *conf = NULL;
- int ret = -1;
- xlator_t *hot_tier = NULL;
- xlator_t *cold_tier = NULL;
-
- local = frame->local;
- prev = cookie;
- conf = this->private;
-
- cold_tier = TIER_HASHED_SUBVOL;
- hot_tier = TIER_UNHASHED_SUBVOL;
-
- LOCK(&frame->lock);
- {
- if (op_ret == -1) {
- if (op_errno == ENOENT) {
- local->op_ret = 0;
- } else {
- local->op_ret = -1;
- local->op_errno = op_errno;
- }
- gf_msg_debug(this->name, op_errno,
- "Unlink: subvolume %s returned -1"
- " with errno = %d",
- prev->name, op_errno);
- goto unlock;
- }
-
- local->op_ret = 0;
-
- local->postparent = *postparent;
- local->preparent = *preparent;
-
- if (local->loc.parent) {
- dht_inode_ctx_time_update(local->loc.parent, this,
- &local->preparent, 0);
- dht_inode_ctx_time_update(local->loc.parent, this,
- &local->postparent, 1);
- }
- }
-unlock:
- UNLOCK(&frame->lock);
-
- if (local->op_ret)
- goto out;
-
- if (cold_tier != local->cached_subvol) {
- /*
- * File is present in hot tier, so there will be
- * a link file on cold tier, deleting the linkfile
- * from cold tier
- */
- STACK_WIND_COOKIE(frame, tier_unlink_linkfile_cbk, cold_tier, cold_tier,
- cold_tier->fops->unlink, &local->loc, local->flags,
- xdata);
- return 0;
- }
-
- ret = dict_get_bin(xdata, DHT_IATT_IN_XDATA_KEY, (void **)&stbuf);
- if (!ret && stbuf &&
- ((IS_DHT_MIGRATION_PHASE2(stbuf)) || IS_DHT_MIGRATION_PHASE1(stbuf))) {
- /*
- * File is migrating from cold to hot tier.
- * Delete the destination linkfile.
- */
- STACK_WIND_COOKIE(frame, tier_unlink_lookup_cbk, hot_tier, hot_tier,
- hot_tier->fops->lookup, &local->loc, NULL);
- return 0;
- }
-
-out:
- DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
- &local->preparent, &local->postparent, xdata);
-
- return 0;
-}
-
-int
-tier_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
- dict_t *xdata)
-{
- xlator_t *cached_subvol = NULL;
- xlator_t *hashed_subvol = NULL;
- dht_conf_t *conf = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
- int ret = -1;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
-
- conf = this->private;
-
- local = dht_local_init(frame, loc, NULL, GF_FOP_UNLINK);
- if (!local) {
- op_errno = ENOMEM;
-
- goto err;
- }
-
- hashed_subvol = TIER_HASHED_SUBVOL;
-
- cached_subvol = local->cached_subvol;
- if (!cached_subvol) {
- gf_msg_debug(this->name, 0, "no cached subvolume for path=%s",
- loc->path);
- op_errno = EINVAL;
- goto err;
- }
-
- local->flags = xflag;
- if (IA_ISREG(loc->inode->ia_type) && (hashed_subvol == cached_subvol)) {
- /*
- * File resides in cold tier. We need to stat
- * the file to see if it is being promoted.
- * If yes we need to delete the destination
- * file as well.
- *
- * Currently we are doing this check only for
- * regular files.
- */
- xdata = xdata ? dict_ref(xdata) : dict_new();
- if (xdata) {
- ret = dict_set_int8(xdata, DHT_IATT_IN_XDATA_KEY, 1);
- if (ret) {
- gf_msg_debug(this->name, 0, "Failed to set dictionary key %s",
- DHT_IATT_IN_XDATA_KEY);
- }
- }
- }
-
- /*
- * File is on hot tier, delete the data file first, then
- * linkfile from cold.
- */
- STACK_WIND_COOKIE(frame, tier_unlink_cbk, cached_subvol, cached_subvol,
- cached_subvol->fops->unlink, loc, xflag, xdata);
- if (xdata)
- dict_unref(xdata);
- return 0;
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND(unlink, frame, -1, op_errno, NULL, NULL, NULL);
-
- return 0;
-}
-
-int
-tier_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, gf_dirent_t *orig_entries, dict_t *xdata)
-{
- gf_dirent_t entries;
- gf_dirent_t *orig_entry = NULL;
- gf_dirent_t *entry = NULL;
- int count = 0;
-
- INIT_LIST_HEAD(&entries.list);
-
- if (op_ret < 0)
- goto unwind;
-
- list_for_each_entry(orig_entry, (&orig_entries->list), list)
- {
- entry = gf_dirent_for_name(orig_entry->d_name);
- if (!entry) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
- "Memory allocation failed ");
- goto unwind;
- }
-
- entry->d_off = orig_entry->d_off;
- entry->d_ino = orig_entry->d_ino;
- entry->d_type = orig_entry->d_type;
- entry->d_len = orig_entry->d_len;
-
- list_add_tail(&entry->list, &entries.list);
- count++;
- }
- op_ret = count;
-
-unwind:
- if (op_ret < 0)
- op_ret = 0;
-
- DHT_STACK_UNWIND(readdir, frame, op_ret, op_errno, &entries, NULL);
-
- gf_dirent_free(&entries);
-
- return 0;
-}
-
-int
-tier_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, gf_dirent_t *orig_entries, dict_t *xdata)
-{
- dht_local_t *local = NULL;
- gf_dirent_t entries;
- gf_dirent_t *orig_entry = NULL;
- gf_dirent_t *entry = NULL;
- xlator_t *prev = NULL;
- xlator_t *next_subvol = NULL;
- off_t next_offset = 0;
- int count = 0;
- dht_conf_t *conf = NULL;
- int ret = 0;
- inode_table_t *itable = NULL;
- inode_t *inode = NULL;
-
- INIT_LIST_HEAD(&entries.list);
- prev = cookie;
- local = frame->local;
- itable = local->fd ? local->fd->inode->table : NULL;
-
- conf = this->private;
- GF_VALIDATE_OR_GOTO(this->name, conf, unwind);
-
- if (op_ret < 0)
- goto done;
-
- list_for_each_entry(orig_entry, (&orig_entries->list), list)
- {
- next_offset = orig_entry->d_off;
-
- if (IA_ISINVAL(orig_entry->d_stat.ia_type)) {
- /*stat failed somewhere- ignore this entry*/
- continue;
- }
-
- entry = gf_dirent_for_name(orig_entry->d_name);
- if (!entry) {
- goto unwind;
- }
-
- entry->d_off = orig_entry->d_off;
- entry->d_stat = orig_entry->d_stat;
- entry->d_ino = orig_entry->d_ino;
- entry->d_type = orig_entry->d_type;
- entry->d_len = orig_entry->d_len;
-
- if (orig_entry->dict)
- entry->dict = dict_ref(orig_entry->dict);
-
- if (check_is_linkfile(NULL, (&orig_entry->d_stat), orig_entry->dict,
- conf->link_xattr_name)) {
- goto entries;
-
- } else if (IA_ISDIR(entry->d_stat.ia_type)) {
- if (orig_entry->inode) {
- dht_inode_ctx_time_update(orig_entry->inode, this,
- &entry->d_stat, 1);
- }
- } else {
- if (orig_entry->inode) {
- ret = dht_layout_preset(this, prev, orig_entry->inode);
- if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0,
- DHT_MSG_LAYOUT_SET_FAILED,
- "failed to link the layout "
- "in inode");
-
- entry->inode = inode_ref(orig_entry->inode);
- } else if (itable) {
- /*
- * orig_entry->inode might be null if any upper
- * layer xlators below client set to null, to
- * force a lookup on the inode even if the inode
- * is present in the inode table. In that case
- * we just update the ctx to make sure we didn't
- * missed anything.
- */
- inode = inode_find(itable, orig_entry->d_stat.ia_gfid);
- if (inode) {
- ret = dht_layout_preset(this, TIER_HASHED_SUBVOL, inode);
- if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0,
- DHT_MSG_LAYOUT_SET_FAILED,
- "failed to link the layout"
- " in inode");
- inode_unref(inode);
- inode = NULL;
- }
- }
- }
-
- entries:
- list_add_tail(&entry->list, &entries.list);
- count++;
- }
- op_ret = count;
-
-done:
- if (count == 0) {
- /* non-zero next_offset means that
- EOF is not yet hit on the current subvol
- */
- if (next_offset != 0) {
- next_subvol = prev;
- } else {
- goto unwind;
- }
-
- STACK_WIND_COOKIE(frame, tier_readdirp_cbk, next_subvol, next_subvol,
- next_subvol->fops->readdirp, local->fd, local->size,
- next_offset, local->xattr);
- return 0;
- }
-
-unwind:
- if (op_ret < 0)
- op_ret = 0;
-
- DHT_STACK_UNWIND(readdirp, frame, op_ret, op_errno, &entries, NULL);
-
- gf_dirent_free(&entries);
-
- return 0;
-}
-
-int
-tier_do_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t yoff, int whichop, dict_t *dict)
-{
- dht_local_t *local = NULL;
- int op_errno = -1;
- xlator_t *hashed_subvol = NULL;
- int ret = 0;
- dht_conf_t *conf = NULL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(fd, err);
- VALIDATE_OR_GOTO(this->private, err);
-
- conf = this->private;
-
- local = dht_local_init(frame, NULL, NULL, whichop);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- local->fd = fd_ref(fd);
- local->size = size;
- local->xattr_req = (dict) ? dict_ref(dict) : NULL;
-
- hashed_subvol = TIER_HASHED_SUBVOL;
-
- /* TODO: do proper readdir */
- if (whichop == GF_FOP_READDIRP) {
- if (dict)
- local->xattr = dict_ref(dict);
- else
- local->xattr = dict_new();
-
- if (local->xattr) {
- ret = dict_set_uint32(local->xattr, conf->link_xattr_name, 256);
- if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value"
- " : key = %s",
- conf->link_xattr_name);
- }
-
- STACK_WIND_COOKIE(frame, tier_readdirp_cbk, hashed_subvol,
- hashed_subvol, hashed_subvol->fops->readdirp, fd,
- size, yoff, local->xattr);
-
- } else {
- STACK_WIND_COOKIE(frame, tier_readdir_cbk, hashed_subvol, hashed_subvol,
- hashed_subvol->fops->readdir, fd, size, yoff,
- local->xattr);
- }
-
- return 0;
-
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND(readdir, frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-int
-tier_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t yoff, dict_t *xdata)
-{
- int op = GF_FOP_READDIR;
- dht_conf_t *conf = NULL;
- int i = 0;
-
- conf = this->private;
- if (!conf)
- goto out;
-
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (!conf->subvolume_status[i]) {
- op = GF_FOP_READDIRP;
- break;
- }
- }
-
- if (conf->use_readdirp)
- op = GF_FOP_READDIRP;
-
-out:
- tier_do_readdir(frame, this, fd, size, yoff, op, 0);
- return 0;
-}
-
-int
-tier_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t yoff, dict_t *dict)
-{
- tier_do_readdir(frame, this, fd, size, yoff, GF_FOP_READDIRP, dict);
- return 0;
-}
-
-int
-tier_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, struct statvfs *statvfs, dict_t *xdata)
-{
- gf_boolean_t event = _gf_false;
- qdstatfs_action_t action = qdstatfs_action_OFF;
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- int bsize = 0;
- int frsize = 0;
- GF_UNUSED int ret = 0;
- unsigned long new_usage = 0;
- unsigned long cur_usage = 0;
- xlator_t *prev = NULL;
- dht_conf_t *conf = NULL;
- tier_statvfs_t *tier_stat = NULL;
-
- prev = cookie;
- local = frame->local;
- GF_ASSERT(local);
-
- conf = this->private;
-
- if (xdata)
- ret = dict_get_int8(xdata, "quota-deem-statfs", (int8_t *)&event);
-
- tier_stat = &local->tier_statvfs;
-
- LOCK(&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- goto unlock;
- }
- if (!statvfs) {
- op_errno = EINVAL;
- local->op_ret = -1;
- goto unlock;
- }
- local->op_ret = 0;
-
- if (local->quota_deem_statfs) {
- if (event == _gf_true) {
- action = qdstatfs_action_COMPARE;
- } else {
- action = qdstatfs_action_NEGLECT;
- }
- } else {
- if (event == _gf_true) {
- action = qdstatfs_action_REPLACE;
- local->quota_deem_statfs = _gf_true;
- }
- }
-
- if (local->quota_deem_statfs) {
- switch (action) {
- case qdstatfs_action_NEGLECT:
- goto unlock;
-
- case qdstatfs_action_REPLACE:
- local->statvfs = *statvfs;
- goto unlock;
-
- case qdstatfs_action_COMPARE:
- new_usage = statvfs->f_blocks - statvfs->f_bfree;
- cur_usage = local->statvfs.f_blocks -
- local->statvfs.f_bfree;
-
- /* Take the max of the usage from subvols */
- if (new_usage >= cur_usage)
- local->statvfs = *statvfs;
- goto unlock;
-
- default:
- break;
- }
- }
-
- if (local->statvfs.f_bsize != 0) {
- bsize = max(local->statvfs.f_bsize, statvfs->f_bsize);
- frsize = max(local->statvfs.f_frsize, statvfs->f_frsize);
- dht_normalize_stats(&local->statvfs, bsize, frsize);
- dht_normalize_stats(statvfs, bsize, frsize);
- } else {
- local->statvfs.f_bsize = statvfs->f_bsize;
- local->statvfs.f_frsize = statvfs->f_frsize;
- }
-
- if (prev == TIER_HASHED_SUBVOL) {
- local->statvfs.f_blocks = statvfs->f_blocks;
- local->statvfs.f_files = statvfs->f_files;
- local->statvfs.f_fsid = statvfs->f_fsid;
- local->statvfs.f_flag = statvfs->f_flag;
- local->statvfs.f_namemax = statvfs->f_namemax;
- tier_stat->blocks_used = (statvfs->f_blocks - statvfs->f_bfree);
- tier_stat->pblocks_used = (statvfs->f_blocks - statvfs->f_bavail);
- tier_stat->files_used = (statvfs->f_files - statvfs->f_ffree);
- tier_stat->pfiles_used = (statvfs->f_files - statvfs->f_favail);
- tier_stat->hashed_fsid = statvfs->f_fsid;
- } else {
- tier_stat->unhashed_fsid = statvfs->f_fsid;
- tier_stat->unhashed_blocks_used = (statvfs->f_blocks -
- statvfs->f_bfree);
- tier_stat->unhashed_pblocks_used = (statvfs->f_blocks -
- statvfs->f_bavail);
- tier_stat->unhashed_files_used = (statvfs->f_files -
- statvfs->f_ffree);
- tier_stat->unhashed_pfiles_used = (statvfs->f_files -
- statvfs->f_favail);
- }
- }
-unlock:
- UNLOCK(&frame->lock);
-
- this_call_cnt = dht_frame_return(frame);
- if (is_last_call(this_call_cnt)) {
- if (tier_stat->unhashed_fsid != tier_stat->hashed_fsid) {
- tier_stat->blocks_used += tier_stat->unhashed_blocks_used;
- tier_stat->pblocks_used += tier_stat->unhashed_pblocks_used;
- tier_stat->files_used += tier_stat->unhashed_files_used;
- tier_stat->pfiles_used += tier_stat->unhashed_pfiles_used;
- }
- local->statvfs.f_bfree = local->statvfs.f_blocks -
- tier_stat->blocks_used;
- local->statvfs.f_bavail = local->statvfs.f_blocks -
- tier_stat->pblocks_used;
- local->statvfs.f_ffree = local->statvfs.f_files - tier_stat->files_used;
- local->statvfs.f_favail = local->statvfs.f_files -
- tier_stat->pfiles_used;
- DHT_STACK_UNWIND(statfs, frame, local->op_ret, local->op_errno,
- &local->statvfs, xdata);
- }
-
- return 0;
-}
-
-int
-tier_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
-{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- int op_errno = -1;
- int i = -1;
- inode_t *inode = NULL;
- inode_table_t *itable = NULL;
- uuid_t root_gfid = {
- 0,
- };
- loc_t newloc = {
- 0,
- };
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
- VALIDATE_OR_GOTO(this->private, err);
-
- conf = this->private;
-
- local = dht_local_init(frame, NULL, NULL, GF_FOP_STATFS);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- if (loc->inode && !IA_ISDIR(loc->inode->ia_type)) {
- itable = loc->inode->table;
- if (!itable) {
- op_errno = EINVAL;
- goto err;
- }
-
- loc = &local->loc2;
- root_gfid[15] = 1;
-
- inode = inode_find(itable, root_gfid);
- if (!inode) {
- op_errno = EINVAL;
- goto err;
- }
-
- dht_build_root_loc(inode, &newloc);
- loc = &newloc;
- }
-
- local->call_cnt = conf->subvolume_cnt;
-
- for (i = 0; i < conf->subvolume_cnt; i++) {
- STACK_WIND_COOKIE(frame, tier_statfs_cbk, conf->subvolumes[i],
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->statfs, loc, xdata);
- }
-
- return 0;
-
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND(statfs, frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
diff --git a/xlators/cluster/dht/src/tier-common.h b/xlators/cluster/dht/src/tier-common.h
deleted file mode 100644
index b1ebaa8004d..00000000000
--- a/xlators/cluster/dht/src/tier-common.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _TIER_COMMON_H_
-#define _TIER_COMMON_H_
-/* Function definitions */
-int
-tier_create_unlink_stale_linkto_cbk(call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-int
-tier_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, fd_t *fd, inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata);
-
-int
-tier_create_linkfile_create_cbk(call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, inode_t *inode,
- struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-int
-tier_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, mode_t umask, fd_t *fd, dict_t *params);
-
-int32_t
-tier_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
- dict_t *xdata);
-
-int32_t
-tier_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t off, dict_t *dict);
-
-int
-tier_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t yoff, dict_t *xdata);
-
-int
-tier_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
- dict_t *xdata);
-
-int
-tier_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata);
-
-#endif
diff --git a/xlators/cluster/dht/src/tier.c b/xlators/cluster/dht/src/tier.c
deleted file mode 100644
index a8cccaf019e..00000000000
--- a/xlators/cluster/dht/src/tier.c
+++ /dev/null
@@ -1,3090 +0,0 @@
-/*
- Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#include <dlfcn.h>
-
-#include "dht-common.h"
-#include "tier.h"
-#include "tier-common.h"
-#include <glusterfs/syscall.h>
-#include <glusterfs/events.h>
-#include "tier-ctr-interface.h"
-
-/*Hard coded DB info*/
-static gfdb_db_type_t dht_tier_db_type = GFDB_SQLITE3;
-/*Hard coded DB info*/
-
-/*Mutex for updating the data movement stats*/
-static pthread_mutex_t dm_stat_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-/* Stores the path location of promotion query files */
-static char *promotion_qfile;
-/* Stores the path location of demotion query files */
-static char *demotion_qfile;
-
-static void *libhandle;
-static gfdb_methods_t gfdb_methods;
-
-#define DB_QUERY_RECORD_SIZE 4096
-
-/*
- * Closes all the fds and frees the qfile_array
- * */
-static void
-qfile_array_free(tier_qfile_array_t *qfile_array)
-{
- ssize_t i = 0;
-
- if (qfile_array) {
- if (qfile_array->fd_array) {
- for (i = 0; i < qfile_array->array_size; i++) {
- if (qfile_array->fd_array[i] != -1) {
- sys_close(qfile_array->fd_array[i]);
- }
- }
- }
- GF_FREE(qfile_array->fd_array);
- }
- GF_FREE(qfile_array);
-}
-
-/* Create a new query file list with given size */
-static tier_qfile_array_t *
-qfile_array_new(ssize_t array_size)
-{
- int ret = -1;
- tier_qfile_array_t *qfile_array = NULL;
- ssize_t i = 0;
-
- GF_VALIDATE_OR_GOTO("tier", (array_size > 0), out);
-
- qfile_array = GF_CALLOC(1, sizeof(tier_qfile_array_t),
- gf_tier_mt_qfile_array_t);
- if (!qfile_array) {
- gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to allocate memory for tier_qfile_array_t");
- goto out;
- }
-
- qfile_array->fd_array = GF_MALLOC(array_size * sizeof(int),
- gf_dht_mt_int32_t);
- if (!qfile_array->fd_array) {
- gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to allocate memory for "
- "tier_qfile_array_t->fd_array");
- goto out;
- }
-
- /* Init all the fds to -1 */
- for (i = 0; i < array_size; i++) {
- qfile_array->fd_array[i] = -1;
- }
-
- qfile_array->array_size = array_size;
- qfile_array->next_index = 0;
-
- /* Set exhausted count to list size as the list is empty */
- qfile_array->exhausted_count = qfile_array->array_size;
-
- ret = 0;
-out:
- if (ret) {
- qfile_array_free(qfile_array);
- qfile_array = NULL;
- }
- return qfile_array;
-}
-
-/* Checks if the query file list is empty or totally exhausted. */
-static gf_boolean_t
-is_qfile_array_empty(tier_qfile_array_t *qfile_array)
-{
- return (qfile_array->exhausted_count == qfile_array->array_size)
- ? _gf_true
- : _gf_false;
-}
-
-/* Shifts the next_fd pointer to the next available fd in the list */
-static void
-shift_next_index(tier_qfile_array_t *qfile_array)
-{
- int qfile_fd = 0;
- int spin_count = 0;
-
- if (is_qfile_array_empty(qfile_array)) {
- return;
- }
-
- do {
- /* change next_index in a rotional manner */
- (qfile_array->next_index == (qfile_array->array_size - 1))
- ? qfile_array->next_index = 0
- : qfile_array->next_index++;
-
- qfile_fd = (qfile_array->fd_array[qfile_array->next_index]);
-
- spin_count++;
-
- } while ((qfile_fd == -1) && (spin_count < qfile_array->array_size));
-}
-
-/*
- * This is a non-thread safe function to read query records
- * from a list of query files in a Round-Robin manner.
- * As in when the query files get exhuasted they are closed.
- * Returns:
- * 0 if all the query records in all the query files of the list are
- * exhausted.
- * > 0 if a query record is successfully read. Indicates the size of the query
- * record read.
- * < 0 if there was failure
- * */
-static int
-read_query_record_list(tier_qfile_array_t *qfile_array,
- gfdb_query_record_t **query_record)
-{
- int ret = -1;
- int qfile_fd = 0;
-
- GF_VALIDATE_OR_GOTO("tier", qfile_array, out);
- GF_VALIDATE_OR_GOTO("tier", qfile_array->fd_array, out);
-
- do {
- if (is_qfile_array_empty(qfile_array)) {
- ret = 0;
- break;
- }
-
- qfile_fd = qfile_array->fd_array[qfile_array->next_index];
- ret = gfdb_methods.gfdb_read_query_record(qfile_fd, query_record);
- if (ret <= 0) {
- /*The qfile_fd has reached EOF or
- * there was an error.
- * 1. Close the exhausted fd
- * 2. increment the exhausted count
- * 3. shift next_qfile to next qfile
- **/
- sys_close(qfile_fd);
- qfile_array->fd_array[qfile_array->next_index] = -1;
- qfile_array->exhausted_count++;
- /* shift next_qfile to next qfile */
- shift_next_index(qfile_array);
- continue;
- } else {
- /* shift next_qfile to next qfile */
- shift_next_index(qfile_array);
- break;
- }
- } while (1);
-out:
- return ret;
-}
-
-/* Check and update the watermark every WM_INTERVAL seconds */
-#define WM_INTERVAL 5
-#define WM_INTERVAL_EMERG 1
-
-static int
-tier_check_same_node(xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag)
-{
- int ret = -1;
- dict_t *dict = NULL;
- char *uuid_str = NULL;
- uuid_t node_uuid = {
- 0,
- };
-
- GF_VALIDATE_OR_GOTO("tier", this, out);
- GF_VALIDATE_OR_GOTO(this->name, loc, out);
- GF_VALIDATE_OR_GOTO(this->name, defrag, out);
-
- if (syncop_getxattr(this, loc, &dict, GF_XATTR_NODE_UUID_KEY, NULL, NULL)) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Unable to get NODE_UUID_KEY %s %s\n", loc->name, loc->path);
- goto out;
- }
-
- if (dict_get_str(dict, GF_XATTR_NODE_UUID_KEY, &uuid_str) < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to get node-uuids for %s", loc->path);
- goto out;
- }
-
- if (gf_uuid_parse(uuid_str, node_uuid)) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "uuid_parse failed for %s", loc->path);
- goto out;
- }
-
- if (gf_uuid_compare(node_uuid, defrag->node_uuid)) {
- gf_msg_debug(this->name, 0, "%s does not belong to this node",
- loc->path);
- ret = 1;
- goto out;
- }
-
- ret = 0;
-out:
- if (dict)
- dict_unref(dict);
-
- return ret;
-}
-
-int
-tier_get_fs_stat(xlator_t *this, loc_t *root_loc)
-{
- int ret = 0;
- gf_defrag_info_t *defrag = NULL;
- dht_conf_t *conf = NULL;
- dict_t *xdata = NULL;
- struct statvfs statfs = {
- 0,
- };
- gf_tier_conf_t *tier_conf = NULL;
-
- conf = this->private;
- if (!conf) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS,
- "conf is NULL");
- ret = -1;
- goto exit;
- }
-
- defrag = conf->defrag;
- if (!defrag) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS,
- "defrag is NULL");
- ret = -1;
- goto exit;
- }
-
- tier_conf = &defrag->tier_conf;
-
- xdata = dict_new();
- if (!xdata) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
- "failed to allocate dictionary");
- ret = -1;
- goto exit;
- }
-
- ret = dict_set_int8(xdata, GF_INTERNAL_IGNORE_DEEM_STATFS, 1);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
- "Failed to set " GF_INTERNAL_IGNORE_DEEM_STATFS " in dict");
- ret = -1;
- goto exit;
- }
-
- /* Find how much free space is on the hot subvolume.
- * Then see if that value */
- /* is less than or greater than user defined watermarks.
- * Stash results in */
- /* the tier_conf data structure. */
-
- ret = syncop_statfs(conf->subvolumes[1], root_loc, &statfs, xdata, NULL);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LOG_TIER_STATUS,
- "Unable to obtain statfs.");
- goto exit;
- }
-
- pthread_mutex_lock(&dm_stat_mutex);
-
- tier_conf->block_size = statfs.f_bsize;
- tier_conf->blocks_total = statfs.f_blocks;
- tier_conf->blocks_used = statfs.f_blocks - statfs.f_bfree;
-
- tier_conf->percent_full = GF_PERCENTAGE(tier_conf->blocks_used,
- statfs.f_blocks);
- pthread_mutex_unlock(&dm_stat_mutex);
-
-exit:
- if (xdata)
- dict_unref(xdata);
- return ret;
-}
-
-static void
-tier_send_watermark_event(const char *volname, tier_watermark_op_t old_wm,
- tier_watermark_op_t new_wm)
-{
- if (old_wm == TIER_WM_LOW || old_wm == TIER_WM_NONE) {
- if (new_wm == TIER_WM_MID) {
- gf_event(EVENT_TIER_WATERMARK_RAISED_TO_MID, "vol=%s", volname);
- } else if (new_wm == TIER_WM_HI) {
- gf_event(EVENT_TIER_WATERMARK_HI, "vol=%s", volname);
- }
- } else if (old_wm == TIER_WM_MID) {
- if (new_wm == TIER_WM_LOW) {
- gf_event(EVENT_TIER_WATERMARK_DROPPED_TO_LOW, "vol=%s", volname);
- } else if (new_wm == TIER_WM_HI) {
- gf_event(EVENT_TIER_WATERMARK_HI, "vol=%s", volname);
- }
- } else if (old_wm == TIER_WM_HI) {
- if (new_wm == TIER_WM_MID) {
- gf_event(EVENT_TIER_WATERMARK_DROPPED_TO_MID, "vol=%s", volname);
- } else if (new_wm == TIER_WM_LOW) {
- gf_event(EVENT_TIER_WATERMARK_DROPPED_TO_LOW, "vol=%s", volname);
- }
- }
-}
-
-int
-tier_check_watermark(xlator_t *this)
-{
- int ret = -1;
- gf_defrag_info_t *defrag = NULL;
- dht_conf_t *conf = NULL;
- gf_tier_conf_t *tier_conf = NULL;
- tier_watermark_op_t wm = TIER_WM_NONE;
-
- conf = this->private;
- if (!conf)
- goto exit;
-
- defrag = conf->defrag;
- if (!defrag)
- goto exit;
-
- tier_conf = &defrag->tier_conf;
-
- if (tier_conf->percent_full < tier_conf->watermark_low) {
- wm = TIER_WM_LOW;
-
- } else if (tier_conf->percent_full < tier_conf->watermark_hi) {
- wm = TIER_WM_MID;
-
- } else {
- wm = TIER_WM_HI;
- }
-
- if (wm != tier_conf->watermark_last) {
- tier_send_watermark_event(tier_conf->volname, tier_conf->watermark_last,
- wm);
-
- tier_conf->watermark_last = wm;
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "Tier watermark now %d", wm);
- }
-
- ret = 0;
-
-exit:
- return ret;
-}
-
-static gf_boolean_t
-is_hot_tier_full(gf_tier_conf_t *tier_conf)
-{
- if (tier_conf && (tier_conf->mode == TIER_MODE_WM) &&
- (tier_conf->watermark_last == TIER_WM_HI))
- return _gf_true;
-
- return _gf_false;
-}
-
-int
-tier_do_migration(xlator_t *this, int promote)
-{
- gf_defrag_info_t *defrag = NULL;
- dht_conf_t *conf = NULL;
- long rand = 0;
- int migrate = 0;
- gf_tier_conf_t *tier_conf = NULL;
-
- conf = this->private;
- if (!conf)
- goto exit;
-
- defrag = conf->defrag;
- if (!defrag)
- goto exit;
-
- if (tier_check_watermark(this) != 0) {
- gf_msg(this->name, GF_LOG_CRITICAL, errno, DHT_MSG_LOG_TIER_ERROR,
- "Failed to get watermark");
- goto exit;
- }
-
- tier_conf = &defrag->tier_conf;
-
- switch (tier_conf->watermark_last) {
- case TIER_WM_LOW:
- migrate = promote ? 1 : 0;
- break;
- case TIER_WM_HI:
- migrate = promote ? 0 : 1;
- break;
- case TIER_WM_MID:
- /* coverity[DC.WEAK_CRYPTO] */
- rand = random() % 100;
- if (promote) {
- migrate = (rand > tier_conf->percent_full);
- } else {
- migrate = (rand <= tier_conf->percent_full);
- }
- break;
- }
-
-exit:
- return migrate;
-}
-
-int
-tier_migrate(xlator_t *this, int is_promotion, dict_t *migrate_data, loc_t *loc,
- gf_tier_conf_t *tier_conf)
-{
- int ret = -1;
-
- pthread_mutex_lock(&tier_conf->pause_mutex);
- if (is_promotion)
- tier_conf->promote_in_progress = 1;
- else
- tier_conf->demote_in_progress = 1;
- pthread_mutex_unlock(&tier_conf->pause_mutex);
-
- /* Data migration */
- ret = syncop_setxattr(this, loc, migrate_data, 0, NULL, NULL);
-
- pthread_mutex_lock(&tier_conf->pause_mutex);
- if (is_promotion)
- tier_conf->promote_in_progress = 0;
- else
- tier_conf->demote_in_progress = 0;
- pthread_mutex_unlock(&tier_conf->pause_mutex);
-
- return ret;
-}
-
-/* returns _gf_true: if file can be promoted
- * returns _gf_false: if file cannot be promoted
- */
-static gf_boolean_t
-tier_can_promote_file(xlator_t *this, char const *file_name,
- struct iatt *current, gf_defrag_info_t *defrag)
-{
- gf_boolean_t ret = _gf_false;
- fsblkcnt_t estimated_usage = 0;
-
- if (defrag->tier_conf.tier_max_promote_size &&
- (current->ia_size > defrag->tier_conf.tier_max_promote_size)) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "File %s (gfid:%s) with size (%" PRIu64
- ") exceeds maxsize "
- "(%d) for promotion. File will not be promoted.",
- file_name, uuid_utoa(current->ia_gfid), current->ia_size,
- defrag->tier_conf.tier_max_promote_size);
- goto err;
- }
-
- /* bypass further validations for TEST mode */
- if (defrag->tier_conf.mode != TIER_MODE_WM) {
- ret = _gf_true;
- goto err;
- }
-
- /* convert the file size to blocks as per the block size of the
- * destination tier
- * NOTE: add (block_size - 1) to get the correct block size when
- * there is a remainder after a modulo
- */
- estimated_usage = ((current->ia_size + defrag->tier_conf.block_size - 1) /
- defrag->tier_conf.block_size) +
- defrag->tier_conf.blocks_used;
-
- /* test if the estimated block usage goes above HI watermark */
- if (GF_PERCENTAGE(estimated_usage, defrag->tier_conf.blocks_total) >=
- defrag->tier_conf.watermark_hi) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "Estimated block count consumption on "
- "hot tier (%" PRIu64
- ") exceeds hi watermark (%d%%). "
- "File will not be promoted.",
- estimated_usage, defrag->tier_conf.watermark_hi);
- goto err;
- }
- ret = _gf_true;
-err:
- return ret;
-}
-
-static int
-tier_set_migrate_data(dict_t *migrate_data)
-{
- int failed = 1;
-
- failed = dict_set_str(migrate_data, GF_XATTR_FILE_MIGRATE_KEY, "force");
- if (failed) {
- goto bail_out;
- }
-
- /* Flag to suggest the xattr call is from migrator */
- failed = dict_set_str(migrate_data, "from.migrator", "yes");
- if (failed) {
- goto bail_out;
- }
-
- /* Flag to suggest its a tiering migration
- * The reason for this dic key-value is that
- * promotions and demotions are multithreaded
- * so the original frame from gf_defrag_start()
- * is not carried. A new frame will be created when
- * we do syncop_setxattr(). This does not have the
- * frame->root->pid of the original frame. So we pass
- * this dic key-value when we do syncop_setxattr() to do
- * data migration and set the frame->root->pid to
- * GF_CLIENT_PID_TIER_DEFRAG in dht_setxattr() just before
- * calling dht_start_rebalance_task() */
- failed = dict_set_str(migrate_data, TIERING_MIGRATION_KEY, "yes");
- if (failed) {
- goto bail_out;
- }
-
- failed = 0;
-
-bail_out:
- return failed;
-}
-
-static char *
-tier_get_parent_path(xlator_t *this, loc_t *p_loc, struct iatt *par_stbuf,
- int *per_link_status)
-{
- int ret = -1;
- char *parent_path = NULL;
- dict_t *xdata_request = NULL;
- dict_t *xdata_response = NULL;
-
- xdata_request = dict_new();
- if (!xdata_request) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to create xdata_request dict");
- goto err;
- }
- ret = dict_set_int32(xdata_request, GET_ANCESTRY_PATH_KEY, 42);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to set value to dict : key %s \n",
- GET_ANCESTRY_PATH_KEY);
- goto err;
- }
-
- ret = syncop_lookup(this, p_loc, par_stbuf, NULL, xdata_request,
- &xdata_response);
- /* When the parent gfid is a stale entry, the lookup
- * will fail and stop the demotion process.
- * The parent gfid can be stale when a huge folder is
- * deleted while the files within it are being migrated
- */
- if (ret == -ESTALE) {
- gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_STALE_LOOKUP,
- "Stale entry in parent lookup for %s", uuid_utoa(p_loc->gfid));
- *per_link_status = 1;
- goto err;
- } else if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LOG_TIER_ERROR,
- "Error in parent lookup for %s", uuid_utoa(p_loc->gfid));
- *per_link_status = -1;
- goto err;
- }
- ret = dict_get_str(xdata_response, GET_ANCESTRY_PATH_KEY, &parent_path);
- if (ret || !parent_path) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to get parent path for %s", uuid_utoa(p_loc->gfid));
- *per_link_status = -1;
- goto err;
- }
-
-err:
- if (xdata_request) {
- dict_unref(xdata_request);
- }
-
- if (xdata_response) {
- dict_unref(xdata_response);
- xdata_response = NULL;
- }
-
- return parent_path;
-}
-
-static int
-tier_get_file_name_and_path(xlator_t *this, uuid_t gfid,
- gfdb_link_info_t *link_info,
- char const *parent_path, loc_t *loc,
- int *per_link_status)
-{
- int ret = -1;
-
- loc->name = gf_strdup(link_info->file_name);
- if (!loc->name) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Memory "
- "allocation failed for %s",
- uuid_utoa(gfid));
- *per_link_status = -1;
- goto err;
- }
- ret = gf_asprintf((char **)&(loc->path), "%s/%s", parent_path, loc->name);
- if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to "
- "construct file path for %s %s\n",
- parent_path, loc->name);
- *per_link_status = -1;
- goto err;
- }
-
- ret = 0;
-
-err:
- return ret;
-}
-
-static int
-tier_lookup_file(xlator_t *this, loc_t *p_loc, loc_t *loc, struct iatt *current,
- int *per_link_status)
-{
- int ret = -1;
-
- ret = syncop_lookup(this, loc, current, NULL, NULL, NULL);
-
- /* The file may be deleted even when the parent
- * is available and the lookup will
- * return a stale entry which would stop the
- * migration. so if its a stale entry, then skip
- * the file and keep migrating.
- */
- if (ret == -ESTALE) {
- gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_STALE_LOOKUP,
- "Stale lookup for %s", uuid_utoa(p_loc->gfid));
- *per_link_status = 1;
- goto err;
- } else if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LOG_TIER_ERROR,
- "Failed to "
- "lookup file %s\n",
- loc->name);
- *per_link_status = -1;
- goto err;
- }
- ret = 0;
-
-err:
- return ret;
-}
-
-static gf_boolean_t
-tier_is_file_already_at_destination(xlator_t *src_subvol,
- query_cbk_args_t *query_cbk_args,
- dht_conf_t *conf, int *per_link_status)
-{
- gf_boolean_t at_destination = _gf_true;
-
- if (src_subvol == NULL) {
- *per_link_status = 1;
- goto err;
- }
- if (query_cbk_args->is_promotion && src_subvol == conf->subvolumes[1]) {
- *per_link_status = 1;
- goto err;
- }
-
- if (!query_cbk_args->is_promotion && src_subvol == conf->subvolumes[0]) {
- *per_link_status = 1;
- goto err;
- }
- at_destination = _gf_false;
-
-err:
- return at_destination;
-}
-
-static void
-tier_update_migration_counters(query_cbk_args_t *query_cbk_args,
- gf_defrag_info_t *defrag,
- uint64_t *total_migrated_bytes, int *total_files)
-{
- if (query_cbk_args->is_promotion) {
- defrag->total_files_promoted++;
- *total_migrated_bytes += defrag->tier_conf.st_last_promoted_size;
- pthread_mutex_lock(&dm_stat_mutex);
- defrag->tier_conf.blocks_used += defrag->tier_conf
- .st_last_promoted_size;
- pthread_mutex_unlock(&dm_stat_mutex);
- } else {
- defrag->total_files_demoted++;
- *total_migrated_bytes += defrag->tier_conf.st_last_demoted_size;
- pthread_mutex_lock(&dm_stat_mutex);
- defrag->tier_conf.blocks_used -= defrag->tier_conf.st_last_demoted_size;
- pthread_mutex_unlock(&dm_stat_mutex);
- }
- if (defrag->tier_conf.blocks_total) {
- pthread_mutex_lock(&dm_stat_mutex);
- defrag->tier_conf.percent_full = GF_PERCENTAGE(
- defrag->tier_conf.blocks_used, defrag->tier_conf.blocks_total);
- pthread_mutex_unlock(&dm_stat_mutex);
- }
-
- (*total_files)++;
-}
-
-static int
-tier_migrate_link(xlator_t *this, dht_conf_t *conf, uuid_t gfid,
- gfdb_link_info_t *link_info, gf_defrag_info_t *defrag,
- query_cbk_args_t *query_cbk_args, dict_t *migrate_data,
- int *per_link_status, int *total_files,
- uint64_t *total_migrated_bytes)
-{
- int ret = -1;
- struct iatt current = {
- 0,
- };
- struct iatt par_stbuf = {
- 0,
- };
- loc_t p_loc = {
- 0,
- };
- loc_t loc = {
- 0,
- };
- xlator_t *src_subvol = NULL;
- inode_t *linked_inode = NULL;
- char *parent_path = NULL;
-
- /* Lookup for parent and get the path of parent */
- gf_uuid_copy(p_loc.gfid, link_info->pargfid);
- p_loc.inode = inode_new(defrag->root_inode->table);
- if (!p_loc.inode) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to create reference to inode"
- " for %s",
- uuid_utoa(p_loc.gfid));
-
- *per_link_status = -1;
- goto err;
- }
-
- parent_path = tier_get_parent_path(this, &p_loc, &par_stbuf,
- per_link_status);
- if (!parent_path) {
- goto err;
- }
-
- linked_inode = inode_link(p_loc.inode, NULL, NULL, &par_stbuf);
- inode_unref(p_loc.inode);
- p_loc.inode = linked_inode;
-
- /* Preparing File Inode */
- gf_uuid_copy(loc.gfid, gfid);
- loc.inode = inode_new(defrag->root_inode->table);
- gf_uuid_copy(loc.pargfid, link_info->pargfid);
- loc.parent = inode_ref(p_loc.inode);
-
- /* Get filename and Construct file path */
- if (tier_get_file_name_and_path(this, gfid, link_info, parent_path, &loc,
- per_link_status) != 0) {
- goto err;
- }
- gf_uuid_copy(loc.parent->gfid, link_info->pargfid);
-
- /* lookup file inode */
- if (tier_lookup_file(this, &p_loc, &loc, &current, per_link_status) != 0) {
- goto err;
- }
-
- if (query_cbk_args->is_promotion) {
- if (!tier_can_promote_file(this, link_info->file_name, &current,
- defrag)) {
- *per_link_status = 1;
- goto err;
- }
- }
-
- linked_inode = inode_link(loc.inode, NULL, NULL, &current);
- inode_unref(loc.inode);
- loc.inode = linked_inode;
-
- /*
- * Do not promote/demote if file already is where it
- * should be. It means another brick moved the file
- * so is not an error. So we set per_link_status = 1
- * so that we ignore counting this.
- */
- src_subvol = dht_subvol_get_cached(this, loc.inode);
-
- if (tier_is_file_already_at_destination(src_subvol, query_cbk_args, conf,
- per_link_status)) {
- goto err;
- }
-
- gf_msg_debug(this->name, 0, "Tier %s: src_subvol %s file %s",
- (query_cbk_args->is_promotion ? "promote" : "demote"),
- src_subvol->name, loc.path);
-
- ret = tier_check_same_node(this, &loc, defrag);
- if (ret != 0) {
- if (ret < 0) {
- *per_link_status = -1;
- goto err;
- }
- ret = 0;
- /* By setting per_link_status to 1 we are
- * ignoring this status and will not be counting
- * this file for migration */
- *per_link_status = 1;
- goto err;
- }
-
- gf_uuid_copy(loc.gfid, loc.inode->gfid);
-
- if (gf_defrag_get_pause_state(&defrag->tier_conf) != TIER_RUNNING) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "Tiering paused. "
- "Exiting tier_migrate_link");
- goto err;
- }
-
- ret = tier_migrate(this, query_cbk_args->is_promotion, migrate_data, &loc,
- &defrag->tier_conf);
-
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LOG_TIER_ERROR,
- "Failed to "
- "migrate %s ",
- loc.path);
- *per_link_status = -1;
- goto err;
- }
-
- tier_update_migration_counters(query_cbk_args, defrag, total_migrated_bytes,
- total_files);
-
- ret = 0;
-
-err:
- GF_FREE((char *)loc.name);
- loc.name = NULL;
- loc_wipe(&loc);
- loc_wipe(&p_loc);
-
- if ((*total_files >= defrag->tier_conf.max_migrate_files) ||
- (*total_migrated_bytes > defrag->tier_conf.max_migrate_bytes)) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "Reached cycle migration limit."
- "migrated bytes %" PRId64 " files %d",
- *total_migrated_bytes, *total_files);
- ret = -1;
- }
-
- return ret;
-}
-
-static int
-tier_migrate_using_query_file(void *_args)
-{
- int ret = -1;
- query_cbk_args_t *query_cbk_args = (query_cbk_args_t *)_args;
- xlator_t *this = NULL;
- gf_defrag_info_t *defrag = NULL;
- gfdb_query_record_t *query_record = NULL;
- gfdb_link_info_t *link_info = NULL;
- dict_t *migrate_data = NULL;
- /*
- * per_file_status and per_link_status
- * 0 : success
- * -1 : failure
- * 1 : ignore the status and don't count for migration
- * */
- int per_file_status = 0;
- int per_link_status = 0;
- int total_status = 0;
- dht_conf_t *conf = NULL;
- uint64_t total_migrated_bytes = 0;
- int total_files = 0;
- loc_t root_loc = {0};
- gfdb_time_t start_time = {0};
- gfdb_time_t current_time = {0};
- int total_time = 0;
- int max_time = 0;
- gf_boolean_t emergency_demote_mode = _gf_false;
-
- GF_VALIDATE_OR_GOTO("tier", query_cbk_args, out);
- GF_VALIDATE_OR_GOTO("tier", query_cbk_args->this, out);
- this = query_cbk_args->this;
- GF_VALIDATE_OR_GOTO(this->name, query_cbk_args->defrag, out);
- GF_VALIDATE_OR_GOTO(this->name, query_cbk_args->qfile_array, out);
- GF_VALIDATE_OR_GOTO(this->name, this->private, out);
-
- conf = this->private;
-
- defrag = query_cbk_args->defrag;
- migrate_data = dict_new();
- if (!migrate_data)
- goto out;
-
- emergency_demote_mode = (!query_cbk_args->is_promotion &&
- is_hot_tier_full(&defrag->tier_conf));
-
- if (tier_set_migrate_data(migrate_data) != 0) {
- goto out;
- }
-
- dht_build_root_loc(defrag->root_inode, &root_loc);
-
- ret = gettimeofday(&start_time, NULL);
- if (query_cbk_args->is_promotion) {
- max_time = defrag->tier_conf.tier_promote_frequency;
- } else {
- max_time = defrag->tier_conf.tier_demote_frequency;
- }
-
- /* Per file */
- while ((ret = read_query_record_list(query_cbk_args->qfile_array,
- &query_record)) != 0) {
- if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to fetch query record "
- "from query file");
- goto out;
- }
-
- if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
- ret = -1;
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Exiting tier migration as"
- "defrag status is not started");
- goto out;
- }
-
- ret = gettimeofday(&current_time, NULL);
- if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Could not get current time.");
- goto out;
- }
-
- total_time = current_time.tv_sec - start_time.tv_sec;
- if (total_time > max_time) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "Max cycle time reached. Exiting migration.");
- goto out;
- }
-
- per_file_status = 0;
- per_link_status = 0;
-
- if (gf_defrag_get_pause_state(&defrag->tier_conf) != TIER_RUNNING) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "Tiering paused. "
- "Exiting tier_migrate_using_query_file");
- break;
- }
-
- if (defrag->tier_conf.mode == TIER_MODE_WM) {
- ret = tier_get_fs_stat(this, &root_loc);
- if (ret != 0) {
- gfdb_methods.gfdb_query_record_free(query_record);
- query_record = NULL;
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS,
- "tier_get_fs_stat() FAILED ... "
- "skipping file migrations until next cycle");
- break;
- }
-
- if (!tier_do_migration(this, query_cbk_args->is_promotion)) {
- gfdb_methods.gfdb_query_record_free(query_record);
- query_record = NULL;
-
- /* We have crossed the high watermark. Stop processing
- * files if this is a promotion cycle so demotion gets
- * a chance to start if not already running*/
-
- if (query_cbk_args->is_promotion &&
- is_hot_tier_full(&defrag->tier_conf)) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "High watermark crossed during "
- "promotion. Exiting "
- "tier_migrate_using_query_file");
- break;
- }
- continue;
- }
- }
-
- per_link_status = 0;
-
- /* For now we only support single link migration. And we will
- * ignore other hard links in the link info list of query record
- * TODO: Multiple hard links migration */
- if (!list_empty(&query_record->link_list)) {
- link_info = list_first_entry(&query_record->link_list,
- gfdb_link_info_t, list);
- }
- if (link_info != NULL) {
- if (tier_migrate_link(this, conf, query_record->gfid, link_info,
- defrag, query_cbk_args, migrate_data,
- &per_link_status, &total_files,
- &total_migrated_bytes) != 0) {
- gf_msg(
- this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "%s failed for %s(gfid:%s)",
- (query_cbk_args->is_promotion ? "Promotion" : "Demotion"),
- link_info->file_name, uuid_utoa(query_record->gfid));
- }
- }
- per_file_status = per_link_status;
-
- if (per_file_status < 0) { /* Failure */
- pthread_mutex_lock(&dm_stat_mutex);
- defrag->total_failures++;
- pthread_mutex_unlock(&dm_stat_mutex);
- } else if (per_file_status == 0) { /* Success */
- pthread_mutex_lock(&dm_stat_mutex);
- defrag->total_files++;
- pthread_mutex_unlock(&dm_stat_mutex);
- } else if (per_file_status == 1) { /* Ignore */
- per_file_status = 0;
- /* Since this attempt was ignored we
- * decrement the lookup count*/
- pthread_mutex_lock(&dm_stat_mutex);
- defrag->num_files_lookedup--;
- pthread_mutex_unlock(&dm_stat_mutex);
- }
- total_status = total_status + per_file_status;
- per_link_status = 0;
- per_file_status = 0;
-
- gfdb_methods.gfdb_query_record_free(query_record);
- query_record = NULL;
-
- /* If we are demoting and the entry watermark was HI, then
- * we are done with emergency demotions if the current
- * watermark has fallen below hi-watermark level
- */
- if (emergency_demote_mode) {
- if (tier_check_watermark(this) == 0) {
- if (!is_hot_tier_full(&defrag->tier_conf)) {
- break;
- }
- }
- }
- }
-
-out:
- if (migrate_data)
- dict_unref(migrate_data);
-
- gfdb_methods.gfdb_query_record_free(query_record);
- query_record = NULL;
-
- return total_status;
-}
-
-/* This is the call back function per record/file from data base */
-static int
-tier_gf_query_callback(gfdb_query_record_t *gfdb_query_record, void *_args)
-{
- int ret = -1;
- query_cbk_args_t *query_cbk_args = _args;
-
- GF_VALIDATE_OR_GOTO("tier", query_cbk_args, out);
- GF_VALIDATE_OR_GOTO("tier", query_cbk_args->defrag, out);
- GF_VALIDATE_OR_GOTO("tier", (query_cbk_args->query_fd > 0), out);
-
- ret = gfdb_methods.gfdb_write_query_record(query_cbk_args->query_fd,
- gfdb_query_record);
- if (ret) {
- gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed writing query record to query file");
- goto out;
- }
-
- pthread_mutex_lock(&dm_stat_mutex);
- query_cbk_args->defrag->num_files_lookedup++;
- pthread_mutex_unlock(&dm_stat_mutex);
-
- ret = 0;
-out:
- return ret;
-}
-
-/* Create query file in tier process */
-static int
-tier_process_self_query(tier_brick_list_t *local_brick, void *args)
-{
- int ret = -1;
- char *db_path = NULL;
- query_cbk_args_t *query_cbk_args = NULL;
- xlator_t *this = NULL;
- gfdb_conn_node_t *conn_node = NULL;
- dict_t *params_dict = NULL;
- dict_t *ctr_ipc_dict = NULL;
- gfdb_brick_info_t *gfdb_brick_info = args;
-
- /*Init of all the essentials*/
- GF_VALIDATE_OR_GOTO("tier", gfdb_brick_info, out);
- query_cbk_args = gfdb_brick_info->_query_cbk_args;
-
- GF_VALIDATE_OR_GOTO("tier", query_cbk_args->this, out);
- this = query_cbk_args->this;
-
- GF_VALIDATE_OR_GOTO(this->name, gfdb_brick_info->_query_cbk_args, out);
-
- GF_VALIDATE_OR_GOTO(this->name, local_brick, out);
-
- GF_VALIDATE_OR_GOTO(this->name, local_brick->xlator, out);
-
- GF_VALIDATE_OR_GOTO(this->name, local_brick->brick_db_path, out);
-
- db_path = local_brick->brick_db_path;
-
- /*Preparing DB parameters before init_db i.e getting db connection*/
- params_dict = dict_new();
- if (!params_dict) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "DB Params cannot initialized");
- goto out;
- }
- SET_DB_PARAM_TO_DICT(this->name, params_dict,
- (char *)gfdb_methods.get_db_path_key(), db_path, ret,
- out);
-
- /*Get the db connection*/
- conn_node = gfdb_methods.init_db((void *)params_dict, dht_tier_db_type);
- if (!conn_node) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "FATAL: Failed initializing db operations");
- goto out;
- }
-
- /* Query for eligible files from db */
- query_cbk_args->query_fd = open(local_brick->qfile_path,
- O_WRONLY | O_CREAT | O_APPEND,
- S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
- if (query_cbk_args->query_fd < 0) {
- gf_msg(this->name, GF_LOG_ERROR, errno, DHT_MSG_LOG_TIER_ERROR,
- "Failed to open query file %s", local_brick->qfile_path);
- goto out;
- }
- if (!gfdb_brick_info->_gfdb_promote) {
- if (query_cbk_args->defrag->tier_conf.watermark_last == TIER_WM_HI) {
- /* emergency demotion mode */
- ret = gfdb_methods.find_all(
- conn_node, tier_gf_query_callback, (void *)query_cbk_args,
- query_cbk_args->defrag->tier_conf.query_limit);
- } else {
- if (query_cbk_args->defrag->write_freq_threshold == 0 &&
- query_cbk_args->defrag->read_freq_threshold == 0) {
- ret = gfdb_methods.find_unchanged_for_time(
- conn_node, tier_gf_query_callback, (void *)query_cbk_args,
- gfdb_brick_info->time_stamp);
- } else {
- ret = gfdb_methods.find_unchanged_for_time_freq(
- conn_node, tier_gf_query_callback, (void *)query_cbk_args,
- gfdb_brick_info->time_stamp,
- query_cbk_args->defrag->write_freq_threshold,
- query_cbk_args->defrag->read_freq_threshold, _gf_false);
- }
- }
- } else {
- if (query_cbk_args->defrag->write_freq_threshold == 0 &&
- query_cbk_args->defrag->read_freq_threshold == 0) {
- ret = gfdb_methods.find_recently_changed_files(
- conn_node, tier_gf_query_callback, (void *)query_cbk_args,
- gfdb_brick_info->time_stamp);
- } else {
- ret = gfdb_methods.find_recently_changed_files_freq(
- conn_node, tier_gf_query_callback, (void *)query_cbk_args,
- gfdb_brick_info->time_stamp,
- query_cbk_args->defrag->write_freq_threshold,
- query_cbk_args->defrag->read_freq_threshold, _gf_false);
- }
- }
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "FATAL: query from db failed");
- goto out;
- }
-
- /*Clear the heat on the DB entries*/
- /*Preparing ctr_ipc_dict*/
- ctr_ipc_dict = dict_new();
- if (!ctr_ipc_dict) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "ctr_ipc_dict cannot initialized");
- goto out;
- }
-
- SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_dict, GFDB_IPC_CTR_KEY,
- GFDB_IPC_CTR_CLEAR_OPS, ret, out);
-
- ret = syncop_ipc(local_brick->xlator, GF_IPC_TARGET_CTR, ctr_ipc_dict,
- NULL);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed clearing the heat "
- "on db %s error %d",
- local_brick->brick_db_path, ret);
- goto out;
- }
-
- ret = 0;
-out:
- if (params_dict) {
- dict_unref(params_dict);
- params_dict = NULL;
- }
-
- if (ctr_ipc_dict) {
- dict_unref(ctr_ipc_dict);
- ctr_ipc_dict = NULL;
- }
-
- if (query_cbk_args && query_cbk_args->query_fd >= 0) {
- sys_close(query_cbk_args->query_fd);
- query_cbk_args->query_fd = -1;
- }
- gfdb_methods.fini_db(conn_node);
-
- return ret;
-}
-
-/*Ask CTR to create the query file*/
-static int
-tier_process_ctr_query(tier_brick_list_t *local_brick, void *args)
-{
- int ret = -1;
- query_cbk_args_t *query_cbk_args = NULL;
- xlator_t *this = NULL;
- dict_t *ctr_ipc_in_dict = NULL;
- dict_t *ctr_ipc_out_dict = NULL;
- gfdb_brick_info_t *gfdb_brick_info = args;
- gfdb_ipc_ctr_params_t *ipc_ctr_params = NULL;
- int count = 0;
-
- /*Init of all the essentials*/
- GF_VALIDATE_OR_GOTO("tier", gfdb_brick_info, out);
- query_cbk_args = gfdb_brick_info->_query_cbk_args;
-
- GF_VALIDATE_OR_GOTO("tier", query_cbk_args->this, out);
- this = query_cbk_args->this;
-
- GF_VALIDATE_OR_GOTO(this->name, gfdb_brick_info->_query_cbk_args, out);
-
- GF_VALIDATE_OR_GOTO(this->name, local_brick, out);
-
- GF_VALIDATE_OR_GOTO(this->name, local_brick->xlator, out);
-
- GF_VALIDATE_OR_GOTO(this->name, local_brick->brick_db_path, out);
-
- /*Preparing ctr_ipc_in_dict*/
- ctr_ipc_in_dict = dict_new();
- if (!ctr_ipc_in_dict) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "ctr_ipc_in_dict cannot initialized");
- goto out;
- }
-
- ipc_ctr_params = GF_CALLOC(1, sizeof(gfdb_ipc_ctr_params_t),
- gf_tier_mt_ipc_ctr_params_t);
- if (!ipc_ctr_params) {
- goto out;
- }
-
- /* set all the query params*/
- ipc_ctr_params->is_promote = gfdb_brick_info->_gfdb_promote;
-
- ipc_ctr_params->write_freq_threshold = query_cbk_args->defrag
- ->write_freq_threshold;
-
- ipc_ctr_params->read_freq_threshold = query_cbk_args->defrag
- ->read_freq_threshold;
-
- ipc_ctr_params->query_limit = query_cbk_args->defrag->tier_conf.query_limit;
-
- ipc_ctr_params->emergency_demote = (!gfdb_brick_info->_gfdb_promote &&
- query_cbk_args->defrag->tier_conf
- .watermark_last == TIER_WM_HI);
-
- memcpy(&ipc_ctr_params->time_stamp, gfdb_brick_info->time_stamp,
- sizeof(gfdb_time_t));
-
- SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_in_dict, GFDB_IPC_CTR_KEY,
- GFDB_IPC_CTR_QUERY_OPS, ret, out);
-
- SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_in_dict,
- GFDB_IPC_CTR_GET_QFILE_PATH, local_brick->qfile_path,
- ret, out);
-
- ret = dict_set_bin(ctr_ipc_in_dict, GFDB_IPC_CTR_GET_QUERY_PARAMS,
- ipc_ctr_params, sizeof(*ipc_ctr_params));
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED,
- "Failed setting %s to params dictionary",
- GFDB_IPC_CTR_GET_QUERY_PARAMS);
- GF_FREE(ipc_ctr_params);
- goto out;
- }
- ipc_ctr_params = NULL;
-
- ret = syncop_ipc(local_brick->xlator, GF_IPC_TARGET_CTR, ctr_ipc_in_dict,
- &ctr_ipc_out_dict);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_IPC_TIER_ERROR,
- "Failed query on %s ret %d", local_brick->brick_db_path, ret);
- goto out;
- }
-
- ret = dict_get_int32(ctr_ipc_out_dict, GFDB_IPC_CTR_RET_QUERY_COUNT,
- &count);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed getting count "
- "of records on %s",
- local_brick->brick_db_path);
- goto out;
- }
-
- if (count < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed query on %s", local_brick->brick_db_path);
- ret = -1;
- goto out;
- }
-
- pthread_mutex_lock(&dm_stat_mutex);
- query_cbk_args->defrag->num_files_lookedup = count;
- pthread_mutex_unlock(&dm_stat_mutex);
-
- ret = 0;
-out:
-
- if (ctr_ipc_in_dict) {
- dict_unref(ctr_ipc_in_dict);
- ctr_ipc_in_dict = NULL;
- }
-
- if (ctr_ipc_out_dict) {
- dict_unref(ctr_ipc_out_dict);
- ctr_ipc_out_dict = NULL;
- }
-
- GF_FREE(ipc_ctr_params);
-
- return ret;
-}
-
-/* This is the call back function for each brick from hot/cold bricklist
- * It picks up each bricks db and queries for eligible files for migration.
- * The list of eligible files are populated in appropriate query files*/
-static int
-tier_process_brick(tier_brick_list_t *local_brick, void *args)
-{
- int ret = -1;
- dict_t *ctr_ipc_in_dict = NULL;
- dict_t *ctr_ipc_out_dict = NULL;
- char *strval = NULL;
-
- GF_VALIDATE_OR_GOTO("tier", local_brick, out);
-
- GF_VALIDATE_OR_GOTO("tier", local_brick->xlator, out);
-
- if (dht_tier_db_type == GFDB_SQLITE3) {
- /*Preparing ctr_ipc_in_dict*/
- ctr_ipc_in_dict = dict_new();
- if (!ctr_ipc_in_dict) {
- gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "ctr_ipc_in_dict cannot initialized");
- goto out;
- }
-
- ret = dict_set_str(ctr_ipc_in_dict, GFDB_IPC_CTR_KEY,
- GFDB_IPC_CTR_GET_DB_PARAM_OPS);
- if (ret) {
- gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED,
- "Failed to set %s "
- "to params dictionary",
- GFDB_IPC_CTR_KEY);
- goto out;
- }
-
- ret = dict_set_str(ctr_ipc_in_dict, GFDB_IPC_CTR_GET_DB_PARAM_OPS, "");
- if (ret) {
- gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED,
- "Failed to set %s "
- "to params dictionary",
- GFDB_IPC_CTR_GET_DB_PARAM_OPS);
- goto out;
- }
-
- ret = dict_set_str(ctr_ipc_in_dict, GFDB_IPC_CTR_GET_DB_KEY,
- "journal_mode");
- if (ret) {
- gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED,
- "Failed to set %s "
- "to params dictionary",
- GFDB_IPC_CTR_GET_DB_KEY);
- goto out;
- }
-
- ret = syncop_ipc(local_brick->xlator, GF_IPC_TARGET_CTR,
- ctr_ipc_in_dict, &ctr_ipc_out_dict);
- if (ret || ctr_ipc_out_dict == NULL) {
- gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to get "
- "journal_mode of sql db %s",
- local_brick->brick_db_path);
- goto out;
- }
-
- ret = dict_get_str(ctr_ipc_out_dict, "journal_mode", &strval);
- if (ret) {
- gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_GET_PARAM_FAILED,
- "Failed to get %s "
- "from params dictionary"
- "journal_mode",
- strval);
- goto out;
- }
-
- if (strval && (strncmp(strval, "wal", SLEN("wal")) == 0)) {
- ret = tier_process_self_query(local_brick, args);
- if (ret) {
- goto out;
- }
- } else {
- ret = tier_process_ctr_query(local_brick, args);
- if (ret) {
- goto out;
- }
- }
- ret = 0;
-
- } else {
- ret = tier_process_self_query(local_brick, args);
- if (ret) {
- goto out;
- }
- }
-
- ret = 0;
-out:
- if (ctr_ipc_in_dict)
- dict_unref(ctr_ipc_in_dict);
-
- if (ctr_ipc_out_dict)
- dict_unref(ctr_ipc_out_dict);
-
- return ret;
-}
-
-static int
-tier_build_migration_qfile(migration_args_t *args,
- query_cbk_args_t *query_cbk_args,
- gf_boolean_t is_promotion)
-{
- gfdb_time_t current_time;
- gfdb_brick_info_t gfdb_brick_info;
- gfdb_time_t time_in_past;
- int ret = -1;
- tier_brick_list_t *local_brick = NULL;
- int i = 0;
- time_in_past.tv_sec = args->freq_time;
- time_in_past.tv_usec = 0;
-
- ret = gettimeofday(&current_time, NULL);
- if (ret == -1) {
- gf_msg(args->this->name, GF_LOG_ERROR, errno,
- DHT_MSG_SYS_CALL_GET_TIME_FAILED, "Failed to get current time");
- goto out;
- }
- time_in_past.tv_sec = current_time.tv_sec - time_in_past.tv_sec;
-
- /* The migration daemon may run a varying numberof usec after the */
- /* sleep call triggers. A file may be registered in CTR some number */
- /* of usec X after the daemon started and missed in the subsequent */
- /* cycle if the daemon starts Y usec after the period in seconds */
- /* where Y>X. Normalize away this problem by always setting usec */
- /* to 0. */
- time_in_past.tv_usec = 0;
-
- gfdb_brick_info.time_stamp = &time_in_past;
- gfdb_brick_info._gfdb_promote = is_promotion;
- gfdb_brick_info._query_cbk_args = query_cbk_args;
-
- list_for_each_entry(local_brick, args->brick_list, list)
- {
- /* Construct query file path for this brick
- * i.e
- * /var/run/gluster/xlator_name/
- * {promote/demote}-brickname-indexinbricklist
- * So that no two query files will have same path even
- * bricks have the same name
- * */
- snprintf(local_brick->qfile_path, PATH_MAX, "%s-%s-%d",
- GET_QFILE_PATH(gfdb_brick_info._gfdb_promote),
- local_brick->brick_name, i);
-
- /* Delete any old query files for this brick */
- sys_unlink(local_brick->qfile_path);
-
- ret = tier_process_brick(local_brick, &gfdb_brick_info);
- if (ret) {
- gf_msg(args->this->name, GF_LOG_ERROR, 0,
- DHT_MSG_BRICK_QUERY_FAILED, "Brick %s query failed\n",
- local_brick->brick_db_path);
- }
- i++;
- }
- ret = 0;
-out:
- return ret;
-}
-
-static int
-tier_migrate_files_using_qfile(migration_args_t *comp,
- query_cbk_args_t *query_cbk_args)
-{
- int ret = -1;
- tier_brick_list_t *local_brick = NULL;
- tier_brick_list_t *temp = NULL;
- gfdb_time_t current_time = {
- 0,
- };
- ssize_t qfile_array_size = 0;
- int count = 0;
- int temp_fd = 0;
- gf_tier_conf_t *tier_conf = NULL;
-
- tier_conf = &(query_cbk_args->defrag->tier_conf);
-
- /* Time for error query files */
- gettimeofday(&current_time, NULL);
-
- /* Build the qfile list */
- list_for_each_entry_safe(local_brick, temp, comp->brick_list, list)
- {
- qfile_array_size++;
- }
- query_cbk_args->qfile_array = qfile_array_new(qfile_array_size);
- if (!query_cbk_args->qfile_array) {
- gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to create new "
- "qfile_array");
- goto out;
- }
-
- /*Open all qfiles*/
- count = 0;
- query_cbk_args->qfile_array->exhausted_count = 0;
- list_for_each_entry_safe(local_brick, temp, comp->brick_list, list)
- {
- temp_fd = query_cbk_args->qfile_array->fd_array[count];
- temp_fd = open(local_brick->qfile_path, O_RDONLY,
- S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
- if (temp_fd < 0) {
- gf_msg("tier", GF_LOG_ERROR, errno, DHT_MSG_LOG_TIER_ERROR,
- "Failed to open "
- "%s to the query file",
- local_brick->qfile_path);
- query_cbk_args->qfile_array->exhausted_count++;
- }
- query_cbk_args->qfile_array->fd_array[count] = temp_fd;
- count++;
- }
-
- /* Moving the query file index to the next, so that we won't the same
- * query file every cycle as the first one */
- query_cbk_args->qfile_array
- ->next_index = (query_cbk_args->is_promotion)
- ? tier_conf->last_promote_qfile_index
- : tier_conf->last_demote_qfile_index;
- shift_next_index(query_cbk_args->qfile_array);
- if (query_cbk_args->is_promotion) {
- tier_conf->last_promote_qfile_index = query_cbk_args->qfile_array
- ->next_index;
- } else {
- tier_conf->last_demote_qfile_index = query_cbk_args->qfile_array
- ->next_index;
- }
-
- /* Migrate files using query file list */
- ret = tier_migrate_using_query_file((void *)query_cbk_args);
-out:
- qfile_array_free(query_cbk_args->qfile_array);
-
- /* If there is an error rename all the query files to .err files
- * with a timestamp for better debugging */
- if (ret) {
- struct tm tm = {
- 0,
- };
- char time_str[128] = {
- 0,
- };
- char query_file_path_err[PATH_MAX] = {
- 0,
- };
- int32_t len = 0;
-
- /* Time format for error query files */
- gmtime_r(&current_time.tv_sec, &tm);
- strftime(time_str, sizeof(time_str), "%F-%T", &tm);
-
- list_for_each_entry_safe(local_brick, temp, comp->brick_list, list)
- {
- /* rename error qfile*/
- len = snprintf(query_file_path_err, sizeof(query_file_path_err),
- "%s-%s.err", local_brick->qfile_path, time_str);
- if ((len >= 0) && (len < sizeof(query_file_path_err))) {
- if (sys_rename(local_brick->qfile_path, query_file_path_err) ==
- -1)
- gf_msg_debug("tier", 0,
- "rename "
- "failed");
- }
- }
- }
-
- query_cbk_args->qfile_array = NULL;
-
- return ret;
-}
-
-int
-tier_demote(migration_args_t *demotion_args)
-{
- query_cbk_args_t query_cbk_args;
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO("tier", demotion_args, out);
- GF_VALIDATE_OR_GOTO("tier", demotion_args->this, out);
- GF_VALIDATE_OR_GOTO(demotion_args->this->name, demotion_args->brick_list,
- out);
- GF_VALIDATE_OR_GOTO(demotion_args->this->name, demotion_args->defrag, out);
-
- THIS = demotion_args->this;
-
- query_cbk_args.this = demotion_args->this;
- query_cbk_args.defrag = demotion_args->defrag;
- query_cbk_args.is_promotion = 0;
-
- /*Build the query file using bricklist*/
- ret = tier_build_migration_qfile(demotion_args, &query_cbk_args, _gf_false);
- if (ret)
- goto out;
-
- /* Migrate files using the query file */
- ret = tier_migrate_files_using_qfile(demotion_args, &query_cbk_args);
- if (ret)
- goto out;
-
-out:
- demotion_args->return_value = ret;
- return ret;
-}
-
-int
-tier_promote(migration_args_t *promotion_args)
-{
- int ret = -1;
- query_cbk_args_t query_cbk_args;
-
- GF_VALIDATE_OR_GOTO("tier", promotion_args->this, out);
- GF_VALIDATE_OR_GOTO(promotion_args->this->name, promotion_args->brick_list,
- out);
- GF_VALIDATE_OR_GOTO(promotion_args->this->name, promotion_args->defrag,
- out);
-
- THIS = promotion_args->this;
-
- query_cbk_args.this = promotion_args->this;
- query_cbk_args.defrag = promotion_args->defrag;
- query_cbk_args.is_promotion = 1;
-
- /*Build the query file using bricklist*/
- ret = tier_build_migration_qfile(promotion_args, &query_cbk_args, _gf_true);
- if (ret)
- goto out;
-
- /* Migrate files using the query file */
- ret = tier_migrate_files_using_qfile(promotion_args, &query_cbk_args);
- if (ret)
- goto out;
-
-out:
- promotion_args->return_value = ret;
- return ret;
-}
-
-/*
- * Command the CTR on a brick to compact the local database using an IPC
- */
-static int
-tier_process_self_compact(tier_brick_list_t *local_brick, void *args)
-{
- int ret = -1;
- char *db_path = NULL;
- query_cbk_args_t *query_cbk_args = NULL;
- xlator_t *this = NULL;
- gfdb_conn_node_t *conn_node = NULL;
- dict_t *params_dict = NULL;
- dict_t *ctr_ipc_dict = NULL;
- gfdb_brick_info_t *gfdb_brick_info = args;
-
- /*Init of all the essentials*/
- GF_VALIDATE_OR_GOTO("tier", gfdb_brick_info, out);
- query_cbk_args = gfdb_brick_info->_query_cbk_args;
-
- GF_VALIDATE_OR_GOTO("tier", query_cbk_args->this, out);
- this = query_cbk_args->this;
-
- GF_VALIDATE_OR_GOTO(this->name, gfdb_brick_info->_query_cbk_args, out);
-
- GF_VALIDATE_OR_GOTO(this->name, local_brick, out);
-
- GF_VALIDATE_OR_GOTO(this->name, local_brick->xlator, out);
-
- GF_VALIDATE_OR_GOTO(this->name, local_brick->brick_db_path, out);
-
- db_path = local_brick->brick_db_path;
-
- /*Preparing DB parameters before init_db i.e getting db connection*/
- params_dict = dict_new();
- if (!params_dict) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "DB Params cannot initialized");
- goto out;
- }
- SET_DB_PARAM_TO_DICT(this->name, params_dict,
- (char *)gfdb_methods.get_db_path_key(), db_path, ret,
- out);
-
- /*Get the db connection*/
- conn_node = gfdb_methods.init_db((void *)params_dict, dht_tier_db_type);
- if (!conn_node) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "FATAL: Failed initializing db operations");
- goto out;
- }
-
- ret = 0;
-
- /*Preparing ctr_ipc_dict*/
- ctr_ipc_dict = dict_new();
- if (!ctr_ipc_dict) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "ctr_ipc_dict cannot initialized");
- goto out;
- }
-
- ret = dict_set_int32(ctr_ipc_dict, "compact_active",
- query_cbk_args->defrag->tier_conf.compact_active);
-
- if (ret) {
- gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED,
- "Failed to set %s "
- "to params dictionary",
- "compact_active");
- goto out;
- }
-
- ret = dict_set_int32(
- ctr_ipc_dict, "compact_mode_switched",
- query_cbk_args->defrag->tier_conf.compact_mode_switched);
-
- if (ret) {
- gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED,
- "Failed to set %s "
- "to params dictionary",
- "compact_mode_switched");
- goto out;
- }
-
- SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_dict, GFDB_IPC_CTR_KEY,
- GFDB_IPC_CTR_SET_COMPACT_PRAGMA, ret, out);
-
- gf_msg(this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS,
- "Starting Compaction IPC");
-
- ret = syncop_ipc(local_brick->xlator, GF_IPC_TARGET_CTR, ctr_ipc_dict,
- NULL);
-
- gf_msg(this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS,
- "Ending Compaction IPC");
-
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed compaction "
- "on db %s error %d",
- local_brick->brick_db_path, ret);
- goto out;
- }
-
- gf_msg(this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS,
- "SUCCESS: %s Compaction", local_brick->brick_name);
-
- ret = 0;
-out:
- if (params_dict) {
- dict_unref(params_dict);
- params_dict = NULL;
- }
-
- if (ctr_ipc_dict) {
- dict_unref(ctr_ipc_dict);
- ctr_ipc_dict = NULL;
- }
-
- gfdb_methods.fini_db(conn_node);
-
- return ret;
-}
-
-/*
- * This is the call back function for each brick from hot/cold bricklist.
- * It determines the database type on each brick and calls the corresponding
- * function to prepare the compaction IPC.
- */
-static int
-tier_compact_db_brick(tier_brick_list_t *local_brick, void *args)
-{
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO("tier", local_brick, out);
-
- GF_VALIDATE_OR_GOTO("tier", local_brick->xlator, out);
-
- ret = tier_process_self_compact(local_brick, args);
- if (ret) {
- gf_msg("tier", GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "Brick %s did not compact", local_brick->brick_name);
- goto out;
- }
-
- ret = 0;
-
-out:
-
- return ret;
-}
-
-static int
-tier_send_compact(migration_args_t *args, query_cbk_args_t *query_cbk_args)
-{
- gfdb_time_t current_time;
- gfdb_brick_info_t gfdb_brick_info;
- gfdb_time_t time_in_past;
- int ret = -1;
- tier_brick_list_t *local_brick = NULL;
-
- time_in_past.tv_sec = args->freq_time;
- time_in_past.tv_usec = 0;
-
- ret = gettimeofday(&current_time, NULL);
- if (ret == -1) {
- gf_msg(args->this->name, GF_LOG_ERROR, errno,
- DHT_MSG_SYS_CALL_GET_TIME_FAILED, "Failed to get current time");
- goto out;
- }
- time_in_past.tv_sec = current_time.tv_sec - time_in_past.tv_sec;
-
- /* The migration daemon may run a varying numberof usec after the sleep
- call triggers. A file may be registered in CTR some number of usec X
- after the daemon started and missed in the subsequent cycle if the
- daemon starts Y usec after the period in seconds where Y>X. Normalize
- away this problem by always setting usec to 0. */
- time_in_past.tv_usec = 0;
-
- gfdb_brick_info.time_stamp = &time_in_past;
-
- /* This is meant to say we are always compacting at this point */
- /* We simply borrow the promotion flag to do this */
- gfdb_brick_info._gfdb_promote = 1;
-
- gfdb_brick_info._query_cbk_args = query_cbk_args;
-
- list_for_each_entry(local_brick, args->brick_list, list)
- {
- gf_msg(args->this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS,
- "Start compaction for %s", local_brick->brick_name);
-
- ret = tier_compact_db_brick(local_brick, &gfdb_brick_info);
- if (ret) {
- gf_msg(args->this->name, GF_LOG_ERROR, 0,
- DHT_MSG_BRICK_QUERY_FAILED, "Brick %s compaction failed\n",
- local_brick->brick_db_path);
- }
-
- gf_msg(args->this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS,
- "End compaction for %s", local_brick->brick_name);
- }
- ret = 0;
-out:
- return ret;
-}
-
-static int
-tier_compact(void *args)
-{
- int ret = -1;
- query_cbk_args_t query_cbk_args;
- migration_args_t *compaction_args = args;
-
- GF_VALIDATE_OR_GOTO("tier", compaction_args->this, out);
- GF_VALIDATE_OR_GOTO(compaction_args->this->name,
- compaction_args->brick_list, out);
- GF_VALIDATE_OR_GOTO(compaction_args->this->name, compaction_args->defrag,
- out);
-
- THIS = compaction_args->this;
-
- query_cbk_args.this = compaction_args->this;
- query_cbk_args.defrag = compaction_args->defrag;
- query_cbk_args.is_compaction = 1;
-
- /* Send the compaction pragma out to all the bricks on the bricklist. */
- /* tier_get_bricklist ensures all bricks on the list are local to */
- /* this node. */
- ret = tier_send_compact(compaction_args, &query_cbk_args);
- if (ret)
- goto out;
-
- ret = 0;
-out:
- compaction_args->return_value = ret;
- return ret;
-}
-
-static int
-tier_get_bricklist(xlator_t *xl, struct list_head *local_bricklist_head)
-{
- xlator_list_t *child = NULL;
- char *rv = NULL;
- char *rh = NULL;
- char *brickname = NULL;
- char db_name[PATH_MAX] = "";
- int ret = 0;
- tier_brick_list_t *local_brick = NULL;
- int32_t len = 0;
-
- GF_VALIDATE_OR_GOTO("tier", xl, out);
- GF_VALIDATE_OR_GOTO("tier", local_bricklist_head, out);
-
- /*
- * This function obtains remote subvolumes and filters out only
- * those running on the same node as the tier daemon.
- */
- if (strcmp(xl->type, "protocol/client") == 0) {
- ret = dict_get_str(xl->options, "remote-host", &rh);
- if (ret < 0)
- goto out;
-
- if (gf_is_local_addr(rh)) {
- local_brick = GF_CALLOC(1, sizeof(tier_brick_list_t),
- gf_tier_mt_bricklist_t);
- if (!local_brick) {
- goto out;
- }
-
- ret = dict_get_str(xl->options, "remote-subvolume", &rv);
- if (ret < 0)
- goto out;
-
- brickname = strrchr(rv, '/') + 1;
- snprintf(db_name, sizeof(db_name), "%s.db", brickname);
-
- local_brick->brick_db_path = GF_MALLOC(PATH_MAX, gf_common_mt_char);
- if (!local_brick->brick_db_path) {
- gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS,
- "Failed to allocate memory for"
- " bricklist.");
- ret = -1;
- goto out;
- }
-
- len = snprintf(local_brick->brick_db_path, PATH_MAX, "%s/%s/%s", rv,
- GF_HIDDEN_PATH, db_name);
- if ((len < 0) || (len >= PATH_MAX)) {
- gf_msg("tier", GF_LOG_ERROR, EINVAL, DHT_MSG_LOG_TIER_STATUS,
- "DB path too long");
- ret = -1;
- goto out;
- }
-
- local_brick->xlator = xl;
-
- snprintf(local_brick->brick_name, NAME_MAX, "%s", brickname);
-
- list_add_tail(&(local_brick->list), local_bricklist_head);
-
- ret = 0;
- goto out;
- }
- }
-
- for (child = xl->children; child; child = child->next) {
- ret = tier_get_bricklist(child->xlator, local_bricklist_head);
- if (ret) {
- goto out;
- }
- }
-
- ret = 0;
-out:
-
- if (ret) {
- if (local_brick) {
- GF_FREE(local_brick->brick_db_path);
- }
- GF_FREE(local_brick);
- }
-
- return ret;
-}
-
-int
-tier_get_freq_demote(gf_tier_conf_t *tier_conf)
-{
- if ((tier_conf->mode == TIER_MODE_WM) &&
- (tier_conf->watermark_last == TIER_WM_HI))
- return DEFAULT_DEMOTE_DEGRADED;
- else
- return tier_conf->tier_demote_frequency;
-}
-
-int
-tier_get_freq_promote(gf_tier_conf_t *tier_conf)
-{
- return tier_conf->tier_promote_frequency;
-}
-
-int
-tier_get_freq_compact_hot(gf_tier_conf_t *tier_conf)
-{
- return tier_conf->tier_compact_hot_frequency;
-}
-
-int
-tier_get_freq_compact_cold(gf_tier_conf_t *tier_conf)
-{
- return tier_conf->tier_compact_cold_frequency;
-}
-
-static int
-tier_check_demote(gfdb_time_t current_time, int freq)
-{
- return ((current_time.tv_sec % freq) == 0) ? _gf_true : _gf_false;
-}
-
-static gf_boolean_t
-tier_check_promote(gf_tier_conf_t *tier_conf, gfdb_time_t current_time,
- int freq)
-{
- if ((tier_conf->mode == TIER_MODE_WM) &&
- (tier_conf->watermark_last == TIER_WM_HI))
- return _gf_false;
-
- else
- return ((current_time.tv_sec % freq) == 0) ? _gf_true : _gf_false;
-}
-
-static gf_boolean_t
-tier_check_compact(gf_tier_conf_t *tier_conf, gfdb_time_t current_time,
- int freq_compact)
-{
- if (!(tier_conf->compact_active || tier_conf->compact_mode_switched))
- return _gf_false;
-
- return ((current_time.tv_sec % freq_compact) == 0) ? _gf_true : _gf_false;
-}
-
-void
-clear_bricklist(struct list_head *brick_list)
-{
- tier_brick_list_t *local_brick = NULL;
- tier_brick_list_t *temp = NULL;
-
- if (list_empty(brick_list)) {
- return;
- }
-
- list_for_each_entry_safe(local_brick, temp, brick_list, list)
- {
- list_del(&local_brick->list);
- GF_FREE(local_brick->brick_db_path);
- GF_FREE(local_brick);
- }
-}
-
-static void
-set_brick_list_qpath(struct list_head *brick_list, gf_boolean_t is_cold)
-{
- tier_brick_list_t *local_brick = NULL;
- int i = 0;
-
- GF_VALIDATE_OR_GOTO("tier", brick_list, out);
-
- list_for_each_entry(local_brick, brick_list, list)
- {
- /* Construct query file path for this brick
- * i.e
- * /var/run/gluster/xlator_name/
- * {promote/demote}-brickname-indexinbricklist
- * So that no two query files will have same path even
- * bricks have the same name
- * */
- snprintf(local_brick->qfile_path, PATH_MAX, "%s-%s-%d",
- GET_QFILE_PATH(is_cold), local_brick->brick_name, i);
- i++;
- }
-out:
- return;
-}
-
-static int
-tier_prepare_compact(migration_args_t *args, gfdb_time_t current_time)
-{
- xlator_t *this = NULL;
- dht_conf_t *conf = NULL;
- gf_defrag_info_t *defrag = NULL;
- gf_tier_conf_t *tier_conf = NULL;
- gf_boolean_t is_hot_tier = args->is_hot_tier;
- int freq = 0;
- int ret = -1;
- const char *tier_type = is_hot_tier ? "hot" : "cold";
-
- this = args->this;
-
- conf = this->private;
-
- defrag = conf->defrag;
-
- tier_conf = &defrag->tier_conf;
-
- freq = is_hot_tier ? tier_get_freq_compact_hot(tier_conf)
- : tier_get_freq_compact_cold(tier_conf);
-
- defrag->tier_conf.compact_mode_switched =
- is_hot_tier ? defrag->tier_conf.compact_mode_switched_hot
- : defrag->tier_conf.compact_mode_switched_cold;
-
- gf_msg(this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS,
- "Compact mode %i", defrag->tier_conf.compact_mode_switched);
-
- if (tier_check_compact(tier_conf, current_time, freq)) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "Start compaction on %s tier", tier_type);
-
- args->freq_time = freq;
- ret = tier_compact(args);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Compaction failed on "
- "%s tier",
- tier_type);
- goto out;
- }
-
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "End compaction on %s tier", tier_type);
-
- if (is_hot_tier) {
- defrag->tier_conf.compact_mode_switched_hot = _gf_false;
- } else {
- defrag->tier_conf.compact_mode_switched_cold = _gf_false;
- }
- }
-
-out:
- return ret;
-}
-
-static int
-tier_get_wm_interval(tier_mode_t mode, tier_watermark_op_t wm)
-{
- if (mode == TIER_MODE_WM && wm == TIER_WM_HI)
- return WM_INTERVAL_EMERG;
-
- return WM_INTERVAL;
-}
-
-/*
- * Main tiering loop. This is called from the promotion and the
- * demotion threads spawned in tier_start().
- *
- * Every second, wake from sleep to perform tasks.
- * 1. Check trigger to migrate data.
- * 2. Check for state changes (pause, unpause, stop).
- */
-static void *
-tier_run(void *in_args)
-{
- dht_conf_t *conf = NULL;
- gfdb_time_t current_time = {0};
- int freq = 0;
- int ret = 0;
- xlator_t *any = NULL;
- xlator_t *xlator = NULL;
- gf_tier_conf_t *tier_conf = NULL;
- loc_t root_loc = {0};
- int check_watermark = 0;
- gf_defrag_info_t *defrag = NULL;
- xlator_t *this = NULL;
- migration_args_t *args = in_args;
- GF_VALIDATE_OR_GOTO("tier", args, out);
- GF_VALIDATE_OR_GOTO("tier", args->brick_list, out);
-
- this = args->this;
- GF_VALIDATE_OR_GOTO("tier", this, out);
-
- conf = this->private;
- GF_VALIDATE_OR_GOTO("tier", conf, out);
-
- defrag = conf->defrag;
- GF_VALIDATE_OR_GOTO("tier", defrag, out);
-
- if (list_empty(args->brick_list)) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_ERROR,
- "Brick list for tier is empty. Exiting.");
- goto out;
- }
-
- defrag->defrag_status = GF_DEFRAG_STATUS_STARTED;
- tier_conf = &defrag->tier_conf;
-
- dht_build_root_loc(defrag->root_inode, &root_loc);
-
- while (1) {
- /*
- * Check if a graph switch occurred. If so, stop migration
- * thread. It will need to be restarted manually.
- */
- any = THIS->ctx->active->first;
- xlator = xlator_search_by_name(any, this->name);
-
- if (xlator != this) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "Detected graph switch. Exiting migration "
- "daemon.");
- goto out;
- }
-
- gf_defrag_check_pause_tier(tier_conf);
-
- sleep(1);
-
- if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
- ret = 1;
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "defrag->defrag_status != "
- "GF_DEFRAG_STATUS_STARTED");
- goto out;
- }
-
- if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER ||
- defrag->cmd == GF_DEFRAG_CMD_DETACH_START) {
- ret = 0;
- defrag->defrag_status = GF_DEFRAG_STATUS_COMPLETE;
- gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_LOG_TIER_ERROR,
- "defrag->defrag_cmd == "
- "GF_DEFRAG_CMD_START_DETACH_TIER");
- goto out;
- }
-
- if (gf_defrag_get_pause_state(&defrag->tier_conf) != TIER_RUNNING)
- continue;
-
- /* To have proper synchronization amongst all
- * brick holding nodes, so that promotion and demotions
- * start atomically w.r.t promotion/demotion frequency
- * period, all nodes should have their system time
- * in-sync with each other either manually set or
- * using a NTP server*/
- ret = gettimeofday(&current_time, NULL);
- if (ret == -1) {
- gf_msg(this->name, GF_LOG_ERROR, errno,
- DHT_MSG_SYS_CALL_GET_TIME_FAILED,
- "Failed to get current time");
- goto out;
- }
-
- check_watermark++;
-
- /* emergency demotion requires frequent watermark monitoring */
- if (check_watermark >=
- tier_get_wm_interval(tier_conf->mode, tier_conf->watermark_last)) {
- check_watermark = 0;
- if (tier_conf->mode == TIER_MODE_WM) {
- ret = tier_get_fs_stat(this, &root_loc);
- if (ret != 0) {
- continue;
- }
- ret = tier_check_watermark(this);
- if (ret != 0) {
- gf_msg(this->name, GF_LOG_CRITICAL, errno,
- DHT_MSG_LOG_TIER_ERROR, "Failed to get watermark");
- continue;
- }
- }
- }
-
- if (args->is_promotion) {
- freq = tier_get_freq_promote(tier_conf);
-
- if (tier_check_promote(tier_conf, current_time, freq)) {
- args->freq_time = freq;
- ret = tier_promote(args);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Promotion failed");
- }
- }
- } else if (args->is_compaction) {
- tier_prepare_compact(args, current_time);
- } else {
- freq = tier_get_freq_demote(tier_conf);
-
- if (tier_check_demote(current_time, freq)) {
- args->freq_time = freq;
- ret = tier_demote(args);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Demotion failed");
- }
- }
- }
-
- /* Check the statfs immediately after the processing threads
- return */
- check_watermark = WM_INTERVAL;
- }
-
- ret = 0;
-out:
-
- args->return_value = ret;
-
- return NULL;
-}
-
-int
-tier_start(xlator_t *this, gf_defrag_info_t *defrag)
-{
- pthread_t promote_thread;
- pthread_t demote_thread;
- pthread_t hot_compact_thread;
- pthread_t cold_compact_thread;
- int ret = -1;
- struct list_head bricklist_hot = {0};
- struct list_head bricklist_cold = {0};
- migration_args_t promotion_args = {0};
- migration_args_t demotion_args = {0};
- migration_args_t hot_compaction_args = {0};
- migration_args_t cold_compaction_args = {0};
- dht_conf_t *conf = NULL;
-
- INIT_LIST_HEAD((&bricklist_hot));
- INIT_LIST_HEAD((&bricklist_cold));
-
- conf = this->private;
-
- tier_get_bricklist(conf->subvolumes[1], &bricklist_hot);
- set_brick_list_qpath(&bricklist_hot, _gf_false);
-
- demotion_args.this = this;
- demotion_args.brick_list = &bricklist_hot;
- demotion_args.defrag = defrag;
- demotion_args.is_promotion = _gf_false;
- demotion_args.is_compaction = _gf_false;
-
- ret = gf_thread_create(&demote_thread, NULL, &tier_run, &demotion_args,
- "tierdem");
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to start demotion thread.");
- defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
- goto cleanup;
- }
-
- tier_get_bricklist(conf->subvolumes[0], &bricklist_cold);
- set_brick_list_qpath(&bricklist_cold, _gf_true);
-
- promotion_args.this = this;
- promotion_args.brick_list = &bricklist_cold;
- promotion_args.defrag = defrag;
- promotion_args.is_promotion = _gf_true;
-
- ret = gf_thread_create(&promote_thread, NULL, &tier_run, &promotion_args,
- "tierpro");
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to start promotion thread.");
- defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
- goto waitforspawned;
- }
-
- hot_compaction_args.this = this;
- hot_compaction_args.brick_list = &bricklist_hot;
- hot_compaction_args.defrag = defrag;
- hot_compaction_args.is_promotion = _gf_false;
- hot_compaction_args.is_compaction = _gf_true;
- hot_compaction_args.is_hot_tier = _gf_true;
-
- ret = gf_thread_create(&hot_compact_thread, NULL, &tier_run,
- &hot_compaction_args, "tierhcom");
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to start compaction thread.");
- defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
- goto waitforspawnedpromote;
- }
-
- cold_compaction_args.this = this;
- cold_compaction_args.brick_list = &bricklist_cold;
- cold_compaction_args.defrag = defrag;
- cold_compaction_args.is_promotion = _gf_false;
- cold_compaction_args.is_compaction = _gf_true;
- cold_compaction_args.is_hot_tier = _gf_false;
-
- ret = gf_thread_create(&cold_compact_thread, NULL, &tier_run,
- &cold_compaction_args, "tierccom");
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to start compaction thread.");
- defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
- goto waitforspawnedhotcompact;
- }
- pthread_join(cold_compact_thread, NULL);
-
-waitforspawnedhotcompact:
- pthread_join(hot_compact_thread, NULL);
-
-waitforspawnedpromote:
- pthread_join(promote_thread, NULL);
-
-waitforspawned:
- pthread_join(demote_thread, NULL);
-
-cleanup:
- clear_bricklist(&bricklist_cold);
- clear_bricklist(&bricklist_hot);
- return ret;
-}
-
-int32_t
-tier_migration_needed(xlator_t *this)
-{
- gf_defrag_info_t *defrag = NULL;
- dht_conf_t *conf = NULL;
- int ret = 0;
-
- conf = this->private;
-
- GF_VALIDATE_OR_GOTO(this->name, conf, out);
- GF_VALIDATE_OR_GOTO(this->name, conf->defrag, out);
-
- defrag = conf->defrag;
-
- if ((defrag->cmd == GF_DEFRAG_CMD_START_TIER) ||
- (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER))
- ret = 1;
-out:
- return ret;
-}
-
-int32_t
-tier_migration_get_dst(xlator_t *this, dht_local_t *local)
-{
- dht_conf_t *conf = NULL;
- int32_t ret = -1;
- gf_defrag_info_t *defrag = NULL;
-
- GF_VALIDATE_OR_GOTO("tier", this, out);
- GF_VALIDATE_OR_GOTO(this->name, this->private, out);
-
- conf = this->private;
-
- defrag = conf->defrag;
-
- if (defrag && defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER) {
- local->rebalance.target_node = conf->subvolumes[0];
-
- } else if (conf->subvolumes[0] == local->cached_subvol)
- local->rebalance.target_node = conf->subvolumes[1];
- else
- local->rebalance.target_node = conf->subvolumes[0];
-
- if (local->rebalance.target_node)
- ret = 0;
-
-out:
- return ret;
-}
-
-xlator_t *
-tier_search(xlator_t *this, dht_layout_t *layout, const char *name)
-{
- xlator_t *subvol = NULL;
- dht_conf_t *conf = NULL;
-
- GF_VALIDATE_OR_GOTO("tier", this, out);
- GF_VALIDATE_OR_GOTO(this->name, this->private, out);
-
- conf = this->private;
-
- subvol = TIER_HASHED_SUBVOL;
-
-out:
- return subvol;
-}
-
-static int
-tier_load_externals(xlator_t *this)
-{
- int ret = -1;
- char *libpathfull = (LIBDIR "/libgfdb.so.0");
- get_gfdb_methods_t get_gfdb_methods;
-
- GF_VALIDATE_OR_GOTO("this", this, out);
-
- libhandle = dlopen(libpathfull, RTLD_NOW);
- if (!libhandle) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Error loading libgfdb.so %s\n", dlerror());
- ret = -1;
- goto out;
- }
-
- get_gfdb_methods = dlsym(libhandle, "get_gfdb_methods");
- if (!get_gfdb_methods) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Error loading get_gfdb_methods()");
- ret = -1;
- goto out;
- }
-
- get_gfdb_methods(&gfdb_methods);
-
- ret = 0;
-
-out:
- if (ret && libhandle)
- dlclose(libhandle);
-
- return ret;
-}
-
-static tier_mode_t
-tier_validate_mode(char *mode)
-{
- int ret = -1;
-
- if (strcmp(mode, "test") == 0) {
- ret = TIER_MODE_TEST;
- } else {
- ret = TIER_MODE_WM;
- }
-
- return ret;
-}
-
-static gf_boolean_t
-tier_validate_compact_mode(char *mode)
-{
- gf_boolean_t ret = _gf_false;
-
- gf_msg("tier", GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "tier_validate_compact_mode: mode = %s", mode);
-
- if (!strcmp(mode, "on")) {
- ret = _gf_true;
- } else {
- ret = _gf_false;
- }
-
- gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS,
- "tier_validate_compact_mode: ret = %i", ret);
-
- return ret;
-}
-
-int
-tier_init_methods(xlator_t *this)
-{
- int ret = -1;
- dht_conf_t *conf = NULL;
- dht_methods_t *methods = NULL;
-
- GF_VALIDATE_OR_GOTO("tier", this, err);
-
- conf = this->private;
-
- methods = &(conf->methods);
-
- methods->migration_get_dst_subvol = tier_migration_get_dst;
- methods->migration_other = tier_start;
- methods->migration_needed = tier_migration_needed;
- methods->layout_search = tier_search;
-
- ret = 0;
-err:
- return ret;
-}
-
-static void
-tier_save_vol_name(xlator_t *this)
-{
- dht_conf_t *conf = NULL;
- gf_defrag_info_t *defrag = NULL;
- char *suffix = NULL;
- int name_len = 0;
-
- conf = this->private;
- defrag = conf->defrag;
-
- suffix = strstr(this->name, "-tier-dht");
-
- if (suffix)
- name_len = suffix - this->name;
- else
- name_len = strlen(this->name);
-
- if (name_len > GD_VOLUME_NAME_MAX)
- name_len = GD_VOLUME_NAME_MAX;
-
- strncpy(defrag->tier_conf.volname, this->name, name_len);
- defrag->tier_conf.volname[name_len] = 0;
-}
-
-int
-tier_init(xlator_t *this)
-{
- int ret = -1;
- int freq = 0;
- int maxsize = 0;
- dht_conf_t *conf = NULL;
- gf_defrag_info_t *defrag = NULL;
- char *voldir = NULL;
- char *mode = NULL;
- char *paused = NULL;
- tier_mode_t tier_mode = DEFAULT_TIER_MODE;
- gf_boolean_t compact_mode = _gf_false;
-
- ret = dht_init(this);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "tier_init failed");
- goto out;
- }
-
- conf = this->private;
-
- ret = tier_init_methods(this);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "tier_init_methods failed");
- goto out;
- }
-
- if (conf->subvolume_cnt != 2) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Invalid number of subvolumes %d", conf->subvolume_cnt);
- goto out;
- }
-
- /* if instatiated from client side initialization is complete. */
- if (!conf->defrag) {
- ret = 0;
- goto out;
- }
-
- /* if instatiated from server side, load db libraries */
- ret = tier_load_externals(this);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Could not load externals. Aborting");
- goto out;
- }
-
- defrag = conf->defrag;
-
- defrag->tier_conf.last_demote_qfile_index = 0;
- defrag->tier_conf.last_promote_qfile_index = 0;
-
- defrag->tier_conf.is_tier = 1;
- defrag->this = this;
-
- ret = dict_get_int32(this->options, "tier-max-promote-file-size", &maxsize);
- if (ret) {
- maxsize = 0;
- }
-
- defrag->tier_conf.tier_max_promote_size = maxsize;
-
- ret = dict_get_int32(this->options, "tier-promote-frequency", &freq);
- if (ret) {
- freq = DEFAULT_PROMOTE_FREQ_SEC;
- }
-
- defrag->tier_conf.tier_promote_frequency = freq;
-
- ret = dict_get_int32(this->options, "tier-demote-frequency", &freq);
- if (ret) {
- freq = DEFAULT_DEMOTE_FREQ_SEC;
- }
-
- defrag->tier_conf.tier_demote_frequency = freq;
-
- ret = dict_get_int32(this->options, "tier-hot-compact-frequency", &freq);
- if (ret) {
- freq = DEFAULT_HOT_COMPACT_FREQ_SEC;
- }
-
- defrag->tier_conf.tier_compact_hot_frequency = freq;
-
- ret = dict_get_int32(this->options, "tier-cold-compact-frequency", &freq);
- if (ret) {
- freq = DEFAULT_COLD_COMPACT_FREQ_SEC;
- }
-
- defrag->tier_conf.tier_compact_cold_frequency = freq;
-
- ret = dict_get_int32(this->options, "watermark-hi", &freq);
- if (ret) {
- freq = DEFAULT_WM_HI;
- }
-
- defrag->tier_conf.watermark_hi = freq;
-
- ret = dict_get_int32(this->options, "watermark-low", &freq);
- if (ret) {
- freq = DEFAULT_WM_LOW;
- }
-
- defrag->tier_conf.watermark_low = freq;
-
- ret = dict_get_int32(this->options, "write-freq-threshold", &freq);
- if (ret) {
- freq = DEFAULT_WRITE_FREQ_SEC;
- }
-
- defrag->write_freq_threshold = freq;
-
- ret = dict_get_int32(this->options, "read-freq-threshold", &freq);
- if (ret) {
- freq = DEFAULT_READ_FREQ_SEC;
- }
-
- defrag->read_freq_threshold = freq;
-
- ret = dict_get_int32(this->options, "tier-max-mb", &freq);
- if (ret) {
- freq = DEFAULT_TIER_MAX_MIGRATE_MB;
- }
-
- defrag->tier_conf.max_migrate_bytes = (uint64_t)freq * 1024 * 1024;
-
- ret = dict_get_int32(this->options, "tier-max-files", &freq);
- if (ret) {
- freq = DEFAULT_TIER_MAX_MIGRATE_FILES;
- }
-
- defrag->tier_conf.max_migrate_files = freq;
-
- ret = dict_get_int32(this->options, "tier-query-limit",
- &(defrag->tier_conf.query_limit));
- if (ret) {
- defrag->tier_conf.query_limit = DEFAULT_TIER_QUERY_LIMIT;
- }
-
- ret = dict_get_str(this->options, "tier-compact", &mode);
-
- if (ret) {
- defrag->tier_conf.compact_active = DEFAULT_COMP_MODE;
- } else {
- compact_mode = tier_validate_compact_mode(mode);
- /* If compaction is now active, we need to inform the bricks on
- the hot and cold tier of this. See dht-common.h for more. */
- defrag->tier_conf.compact_active = compact_mode;
- if (compact_mode) {
- defrag->tier_conf.compact_mode_switched_hot = _gf_true;
- defrag->tier_conf.compact_mode_switched_cold = _gf_true;
- }
- }
-
- ret = dict_get_str(this->options, "tier-mode", &mode);
- if (ret) {
- defrag->tier_conf.mode = DEFAULT_TIER_MODE;
- } else {
- tier_mode = tier_validate_mode(mode);
- defrag->tier_conf.mode = tier_mode;
- }
-
- pthread_mutex_init(&defrag->tier_conf.pause_mutex, 0);
-
- gf_defrag_set_pause_state(&defrag->tier_conf, TIER_RUNNING);
-
- ret = dict_get_str(this->options, "tier-pause", &paused);
-
- if (paused && strcmp(paused, "on") == 0)
- gf_defrag_set_pause_state(&defrag->tier_conf, TIER_REQUEST_PAUSE);
-
- ret = gf_asprintf(&voldir, "%s/%s", DEFAULT_VAR_RUN_DIRECTORY, this->name);
- if (ret < 0)
- goto out;
-
- ret = mkdir_p(voldir, 0777, _gf_true);
- if (ret == -1 && errno != EEXIST) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "tier_init failed");
-
- GF_FREE(voldir);
- goto out;
- }
-
- GF_FREE(voldir);
-
- ret = gf_asprintf(&promotion_qfile, "%s/%s/promote",
- DEFAULT_VAR_RUN_DIRECTORY, this->name);
- if (ret < 0)
- goto out;
-
- ret = gf_asprintf(&demotion_qfile, "%s/%s/demote",
- DEFAULT_VAR_RUN_DIRECTORY, this->name);
- if (ret < 0) {
- GF_FREE(promotion_qfile);
- goto out;
- }
-
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "Promote/demote frequency %d/%d "
- "Write/Read freq thresholds %d/%d",
- defrag->tier_conf.tier_promote_frequency,
- defrag->tier_conf.tier_demote_frequency,
- defrag->write_freq_threshold, defrag->read_freq_threshold);
-
- tier_save_vol_name(this);
-
- ret = 0;
-
-out:
-
- return ret;
-}
-
-int
-tier_cli_pause_done(int op_ret, call_frame_t *sync_frame, void *data)
-{
- gf_msg("tier", GF_LOG_INFO, 0, DHT_MSG_TIER_PAUSED,
- "Migrate file paused with op_ret %d", op_ret);
-
- return op_ret;
-}
-
-int
-tier_cli_pause(void *data)
-{
- gf_defrag_info_t *defrag = NULL;
- xlator_t *this = NULL;
- dht_conf_t *conf = NULL;
- int ret = -1;
-
- this = data;
-
- conf = this->private;
- GF_VALIDATE_OR_GOTO(this->name, conf, exit);
-
- defrag = conf->defrag;
- GF_VALIDATE_OR_GOTO(this->name, defrag, exit);
-
- gf_defrag_pause_tier(this, defrag);
-
- ret = 0;
-exit:
- return ret;
-}
-
-int
-tier_reconfigure(xlator_t *this, dict_t *options)
-{
- dht_conf_t *conf = NULL;
- gf_defrag_info_t *defrag = NULL;
- char *mode = NULL;
- int migrate_mb = 0;
- gf_boolean_t req_pause = _gf_false;
- int ret = 0;
- call_frame_t *frame = NULL;
- gf_boolean_t last_compact_setting = _gf_false;
-
- conf = this->private;
-
- if (conf->defrag) {
- defrag = conf->defrag;
- GF_OPTION_RECONF("tier-max-promote-file-size",
- defrag->tier_conf.tier_max_promote_size, options,
- int32, out);
-
- GF_OPTION_RECONF("tier-promote-frequency",
- defrag->tier_conf.tier_promote_frequency, options,
- int32, out);
-
- GF_OPTION_RECONF("tier-demote-frequency",
- defrag->tier_conf.tier_demote_frequency, options,
- int32, out);
-
- GF_OPTION_RECONF("write-freq-threshold", defrag->write_freq_threshold,
- options, int32, out);
-
- GF_OPTION_RECONF("read-freq-threshold", defrag->read_freq_threshold,
- options, int32, out);
-
- GF_OPTION_RECONF("watermark-hi", defrag->tier_conf.watermark_hi,
- options, int32, out);
-
- GF_OPTION_RECONF("watermark-low", defrag->tier_conf.watermark_low,
- options, int32, out);
-
- last_compact_setting = defrag->tier_conf.compact_active;
-
- GF_OPTION_RECONF("tier-compact", defrag->tier_conf.compact_active,
- options, bool, out);
-
- if (last_compact_setting != defrag->tier_conf.compact_active) {
- defrag->tier_conf.compact_mode_switched_hot = _gf_true;
- defrag->tier_conf.compact_mode_switched_cold = _gf_true;
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "compact mode switched");
- }
-
- GF_OPTION_RECONF("tier-hot-compact-frequency",
- defrag->tier_conf.tier_compact_hot_frequency, options,
- int32, out);
-
- GF_OPTION_RECONF("tier-cold-compact-frequency",
- defrag->tier_conf.tier_compact_cold_frequency, options,
- int32, out);
-
- GF_OPTION_RECONF("tier-mode", mode, options, str, out);
- defrag->tier_conf.mode = tier_validate_mode(mode);
-
- GF_OPTION_RECONF("tier-max-mb", migrate_mb, options, int32, out);
- defrag->tier_conf.max_migrate_bytes = (uint64_t)migrate_mb * 1024 *
- 1024;
-
- GF_OPTION_RECONF("tier-max-files", defrag->tier_conf.max_migrate_files,
- options, int32, out);
-
- GF_OPTION_RECONF("tier-query-limit", defrag->tier_conf.query_limit,
- options, int32, out);
-
- GF_OPTION_RECONF("tier-pause", req_pause, options, bool, out);
-
- if (req_pause == _gf_true) {
- frame = create_frame(this, this->ctx->pool);
- if (!frame)
- goto out;
-
- frame->root->pid = GF_CLIENT_PID_DEFRAG;
-
- ret = synctask_new(this->ctx->env, tier_cli_pause,
- tier_cli_pause_done, frame, this);
-
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "pause tier failed on reconfigure");
- }
- } else {
- ret = gf_defrag_resume_tier(this, defrag);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "resume tier failed on reconfigure");
- }
- }
- }
-
-out:
- return dht_reconfigure(this, options);
-}
-
-void
-tier_fini(xlator_t *this)
-{
- if (libhandle)
- dlclose(libhandle);
-
- GF_FREE(demotion_qfile);
- GF_FREE(promotion_qfile);
-
- dht_fini(this);
-}
-
-class_methods_t class_methods = {.init = tier_init,
- .fini = tier_fini,
- .reconfigure = tier_reconfigure,
- .notify = dht_notify};
-
-struct xlator_fops fops = {
-
- .lookup = dht_lookup,
- .create = tier_create,
- .mknod = dht_mknod,
-
- .open = dht_open,
- .statfs = tier_statfs,
- .opendir = dht_opendir,
- .readdir = tier_readdir,
- .readdirp = tier_readdirp,
- .fsyncdir = dht_fsyncdir,
- .symlink = dht_symlink,
- .unlink = tier_unlink,
- .link = tier_link,
- .mkdir = dht_mkdir,
- .rmdir = dht_rmdir,
- .rename = dht_rename,
- .entrylk = dht_entrylk,
- .fentrylk = dht_fentrylk,
-
- /* Inode read operations */
- .stat = dht_stat,
- .fstat = dht_fstat,
- .access = dht_access,
- .readlink = dht_readlink,
- .getxattr = dht_getxattr,
- .fgetxattr = dht_fgetxattr,
- .readv = dht_readv,
- .flush = dht_flush,
- .fsync = dht_fsync,
- .inodelk = dht_inodelk,
- .finodelk = dht_finodelk,
- .lk = dht_lk,
-
- /* Inode write operations */
- .fremovexattr = dht_fremovexattr,
- .removexattr = dht_removexattr,
- .setxattr = dht_setxattr,
- .fsetxattr = dht_fsetxattr,
- .truncate = dht_truncate,
- .ftruncate = dht_ftruncate,
- .writev = dht_writev,
- .xattrop = dht_xattrop,
- .fxattrop = dht_fxattrop,
- .setattr = dht_setattr,
- .fsetattr = dht_fsetattr,
- .fallocate = dht_fallocate,
- .discard = dht_discard,
- .zerofill = dht_zerofill,
-};
-
-struct xlator_cbks cbks = {.release = dht_release, .forget = dht_forget};
diff --git a/xlators/cluster/dht/src/tier.h b/xlators/cluster/dht/src/tier.h
deleted file mode 100644
index a20b1db07e0..00000000000
--- a/xlators/cluster/dht/src/tier.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _TIER_H_
-#define _TIER_H_
-
-/******************************************************************************/
-/* This is from dht-rebalancer.c as we don't have dht-rebalancer.h */
-#include "dht-common.h"
-#include <glusterfs/xlator.h>
-#include <signal.h>
-#include <fnmatch.h>
-#include <signal.h>
-
-/*
- * Size of timer wheel. We would not promote or demote less
- * frequently than this number.
- */
-#define TIMER_SECS 3600
-
-#include "gfdb_data_store.h"
-#include <ctype.h>
-#include <sys/stat.h>
-
-#define PROMOTION_QFILE "promotequeryfile"
-#define DEMOTION_QFILE "demotequeryfile"
-
-#define TIER_HASHED_SUBVOL conf->subvolumes[0]
-#define TIER_UNHASHED_SUBVOL conf->subvolumes[1]
-
-#define GET_QFILE_PATH(is_promotion) \
- (is_promotion) ? promotion_qfile : demotion_qfile
-
-typedef struct tier_qfile_array {
- int *fd_array;
- ssize_t array_size;
- ssize_t next_index;
- /* Indicate the number of exhuasted FDs*/
- ssize_t exhausted_count;
-} tier_qfile_array_t;
-
-typedef struct _query_cbk_args {
- xlator_t *this;
- gf_defrag_info_t *defrag;
- /* This is write */
- int query_fd;
- int is_promotion;
- int is_compaction;
- /* This is for read */
- tier_qfile_array_t *qfile_array;
-} query_cbk_args_t;
-
-int
-gf_run_tier(xlator_t *this, gf_defrag_info_t *defrag);
-
-typedef struct gfdb_brick_info {
- gfdb_time_t *time_stamp;
- gf_boolean_t _gfdb_promote;
- query_cbk_args_t *_query_cbk_args;
-} gfdb_brick_info_t;
-
-typedef struct brick_list {
- xlator_t *xlator;
- char *brick_db_path;
- char brick_name[NAME_MAX];
- char qfile_path[PATH_MAX];
- struct list_head list;
-} tier_brick_list_t;
-
-typedef struct _dm_thread_args {
- xlator_t *this;
- gf_defrag_info_t *defrag;
- struct list_head *brick_list;
- int freq_time;
- int return_value;
- int is_promotion;
- int is_compaction;
- gf_boolean_t is_hot_tier;
-} migration_args_t;
-
-typedef enum tier_watermark_op_ {
- TIER_WM_NONE = 0,
- TIER_WM_LOW,
- TIER_WM_HI,
- TIER_WM_MID
-} tier_watermark_op_t;
-
-#define DEFAULT_PROMOTE_FREQ_SEC 120
-#define DEFAULT_DEMOTE_FREQ_SEC 120
-#define DEFAULT_HOT_COMPACT_FREQ_SEC 604800
-#define DEFAULT_COLD_COMPACT_FREQ_SEC 604800
-#define DEFAULT_DEMOTE_DEGRADED 1
-#define DEFAULT_WRITE_FREQ_SEC 0
-#define DEFAULT_READ_FREQ_SEC 0
-#define DEFAULT_WM_LOW 75
-#define DEFAULT_WM_HI 90
-#define DEFAULT_TIER_MODE TIER_MODE_TEST
-#define DEFAULT_COMP_MODE _gf_true
-#define DEFAULT_TIER_MAX_MIGRATE_MB 1000
-#define DEFAULT_TIER_MAX_MIGRATE_FILES 5000
-#define DEFAULT_TIER_QUERY_LIMIT 100
-
-#endif
diff --git a/xlators/cluster/dht/src/tier.sym b/xlators/cluster/dht/src/tier.sym
deleted file mode 100644
index 60205d145b6..00000000000
--- a/xlators/cluster/dht/src/tier.sym
+++ /dev/null
@@ -1,9 +0,0 @@
-fops
-cbks
-class_methods
-dht_methods
-tier_methods
-options
-mem_acct_init
-reconfigure
-dumpops