From b1ff2294d2aaf7dd36918837c09a68152adc0637 Mon Sep 17 00:00:00 2001 From: Dan Lambright Date: Thu, 4 Jun 2015 14:00:34 -0400 Subject: cluster/tier: account for reordered layouts For a tiered volume the cold subvolume is always at a fixed position in the graph. DHT's layout array, on the other hand, may have the cold subvolume in either the first or second index, therefore code cannot make any assumptions. The fix searches the layout for the correct position dynamically rather than statically. The bug manifested itself in NFS, in which a newly attached subvolume had not received an existing directory. This case is a "stale entry" and marked as such in the layout for that directory. The code did not see this, because it looked at the wrong index in the layout array. The fix also adds the check for decomissioned bricks, and fixes a problem in detach tier related to starting the rebalance process: we never received the right defrag command and it did not get directed to the tier translator. Change-Id: I77cdf9fbb0a777640c98003188565a79be9d0b56 BUG: 1214289 Signed-off-by: Dan Lambright Tested-by: Gluster Build System Tested-by: NetBSD Build System Reviewed-by: Shyamsundar Ranganathan Reviewed-by: Joseph Fernandes Reviewed-by: Mohammed Rafi KC Reviewed-on: http://review.gluster.org/11092 --- xlators/cluster/dht/src/dht-common.c | 3 +- xlators/cluster/dht/src/tier.c | 43 ++++++++++++++++++-------- xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 6 +++- 3 files changed, 37 insertions(+), 15 deletions(-) diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index 6dc64eb13ab..0e020213470 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -7097,7 +7097,8 @@ int32_t dht_migration_needed(xlator_t *this) defrag = conf->defrag; - if (defrag->cmd != GF_DEFRAG_CMD_START_TIER) + if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) && + (defrag->cmd != GF_DEFRAG_CMD_START_DETACH_TIER)) ret = 1; out: diff --git a/xlators/cluster/dht/src/tier.c b/xlators/cluster/dht/src/tier.c index 2279e060442..4cd7dfdd9b4 100644 --- a/xlators/cluster/dht/src/tier.c +++ b/xlators/cluster/dht/src/tier.c @@ -920,7 +920,8 @@ tier_migration_needed (xlator_t *this) defrag = conf->defrag; - if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) + if ((defrag->cmd == GF_DEFRAG_CMD_START_TIER) || + (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER)) ret = 1; out: return ret; @@ -962,9 +963,11 @@ tier_search (xlator_t *this, dht_layout_t *layout, const char *name) { xlator_t *subvol = NULL; void *value; - int search_first_subvol = 0; + int search_subvol = 0; dht_conf_t *conf = NULL; gf_defrag_info_t *defrag = NULL; + int layout_cold = 0; + int layout_hot = 1; GF_VALIDATE_OR_GOTO("tier", this, out); GF_VALIDATE_OR_GOTO(this->name, layout, out); @@ -973,28 +976,42 @@ tier_search (xlator_t *this, dht_layout_t *layout, const char *name) conf = this->private; + /* The first subvolume in the graph is always cold. */ + /* Find the position of the cold subvolume in the layout. */ + layout_cold = 0; + layout_hot = 1; + if (conf->subvolumes[0] != layout->list[0].xlator) { + layout_cold = 1; + layout_hot = 0; + } + + search_subvol = layout_hot; + defrag = conf->defrag; if (defrag && defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER) - search_first_subvol = 1; + search_subvol = layout_cold; + /* "decommission_subvols_cnt" can only be non-zero on detach. */ + /* This will change once brick add/remove is supported for */ + /* tiered volumes. */ + else if (conf->decommission_subvols_cnt) { + search_subvol = layout_cold; + } else if (!dict_get_ptr (this->options, "rule", &value) && - !strcmp(layout->list[0].xlator->name, value)) { - search_first_subvol = 1; + !strcmp(layout->list[layout_cold].xlator->name, value)) { + search_subvol = layout_cold; } - if ((layout->list[0].err > 0) && (layout->list[0].err != ENOTCONN)) - search_first_subvol = 0; + if ((layout->list[search_subvol].err > 0) && + (layout->list[search_subvol].err != ENOTCONN)) + search_subvol = layout_cold; - if (search_first_subvol) - subvol = layout->list[0].xlator; - else - subvol = layout->list[1].xlator; + subvol = layout->list[search_subvol].xlator; + out: -out: return subvol; } - dht_methods_t tier_methods = { .migration_get_dst_subvol = tier_migration_get_dst, .migration_other = tier_start, diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c index 5c0ed1e304c..8fff9ab2cdf 100644 --- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c @@ -2041,6 +2041,7 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) char *brick_tmpstr = NULL; int start_remove = 0; uint32_t commit_hash = 0; + int defrag_cmd = 0; this = THIS; GF_ASSERT (this); @@ -2311,9 +2312,12 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) volinfo->rebal.commit_hash = commit_hash; } /* perform the rebalance operations */ + defrag_cmd = GF_DEFRAG_CMD_START_FORCE; + if (cmd == GF_OP_CMD_DETACH_START) + defrag_cmd = GF_DEFRAG_CMD_START_DETACH_TIER; ret = glusterd_handle_defrag_start (volinfo, err_str, sizeof (err_str), - GF_DEFRAG_CMD_START_FORCE, + defrag_cmd, glusterd_remove_brick_migrate_cbk, GD_OP_REMOVE_BRICK); if (!ret) -- cgit