From b23bd3dbc2c153171d0bb1205e6804afe022a55f Mon Sep 17 00:00:00 2001 From: N Balachandran Date: Wed, 10 May 2017 21:26:28 +0530 Subject: cluster/dht: Rebalance on all nodes should migrate files Problem: Rebalance compares the node-uuid of a file against its own to and migrates a file only if they match. However, the current behaviour in both AFR and EC is to return the node-uuid of the first brick in a replica set for all files. This means a single node ends up migrating all the files if the first brick of every replica set is on the same node. Fix: AFR and EC will return all node-uuids for the replica set. The rebalance process will divide the files to be migrated among all the nodes by hashing the gfid of the file and using that value to select a node to perform the migration. This patch makes the required DHT and tiering changes. Some tests in rebal-all-nodes-migrate.t will need to be uncommented once the AFR and EC changes are merged. Change-Id: I5ce41600f5ba0e244ddfd986e2ba8fa23329ff0c BUG: 1366817 Signed-off-by: N Balachandran Reviewed-on: https://review.gluster.org/17239 Smoke: Gluster Build System NetBSD-regression: NetBSD Build System CentOS-regression: Gluster Build System Reviewed-by: Amar Tumballi Reviewed-by: Jeff Darcy Reviewed-by: Shyamsundar Ranganathan --- xlators/cluster/dht/src/tier.c | 57 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 53 insertions(+), 4 deletions(-) (limited to 'xlators/cluster/dht/src/tier.c') diff --git a/xlators/cluster/dht/src/tier.c b/xlators/cluster/dht/src/tier.c index a8aebe00f69..e4b910eb0e6 100644 --- a/xlators/cluster/dht/src/tier.c +++ b/xlators/cluster/dht/src/tier.c @@ -198,10 +198,17 @@ out: static int tier_check_same_node (xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag) { - int ret = -1; - dict_t *dict = NULL; - char *uuid_str = NULL; - uuid_t node_uuid = {0,}; + int ret = -1; + dict_t *dict = NULL; + char *uuid_str = NULL; + uuid_t node_uuid = {0,}; + char *dup_str = NULL; + char *str = NULL; + char *save_ptr = NULL; + int count = 0; + uint32_t hashval = 0; + int32_t index = 0; + char buf[GF_UUID_BUF_SIZE] = {0,}; GF_VALIDATE_OR_GOTO ("tier", this, out); GF_VALIDATE_OR_GOTO (this->name, loc, out); @@ -215,15 +222,56 @@ tier_check_same_node (xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag) goto out; } + + /* This returns multiple node-uuids now - one for each brick + * of the subvol. + */ + if (dict_get_str (dict, GF_XATTR_NODE_UUID_KEY, &uuid_str) < 0) { gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, "Failed to get node-uuid for %s", loc->path); goto out; } + dup_str = gf_strdup (uuid_str); + str = dup_str; + + /* How many uuids returned? + * No need to check if one of these is that of the current node. + */ + + count = 1; + while ((str = strchr (str, ' '))) { + count++; + str++; + } + + /* Only one node-uuid - pure distribute? */ + if (count == 1) + goto check_node; + + uuid_utoa_r (loc->gfid, buf); + ret = dht_hash_compute (this, 0, buf, &hashval); + if (ret == 0) { + index = (hashval % count); + } + + count = 0; + str = dup_str; + while ((uuid_str = strtok_r (str, " ", &save_ptr))) { + if (count == index) + break; + count++; + str = NULL; + } + + +check_node: + if (gf_uuid_parse (uuid_str, node_uuid)) { gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, "uuid_parse failed for %s", loc->path); + ret = -1; goto out; } @@ -239,6 +287,7 @@ out: if (dict) dict_unref(dict); + GF_FREE (dup_str); return ret; } -- cgit