summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/dht/src/dht-common.c
diff options
context:
space:
mode:
authorN Balachandran <nbalacha@redhat.com>2017-05-10 21:26:28 +0530
committerShyamsundar Ranganathan <srangana@redhat.com>2017-05-16 16:01:39 +0000
commitb23bd3dbc2c153171d0bb1205e6804afe022a55f (patch)
tree490914f794819e6bc7b52f2f0411e3ea1909963c /xlators/cluster/dht/src/dht-common.c
parent6f7d55c9d58797beaf8d5393c03a5a545bed8bec (diff)
cluster/dht: Rebalance on all nodes should migrate files
Problem: Rebalance compares the node-uuid of a file against its own to and migrates a file only if they match. However, the current behaviour in both AFR and EC is to return the node-uuid of the first brick in a replica set for all files. This means a single node ends up migrating all the files if the first brick of every replica set is on the same node. Fix: AFR and EC will return all node-uuids for the replica set. The rebalance process will divide the files to be migrated among all the nodes by hashing the gfid of the file and using that value to select a node to perform the migration. This patch makes the required DHT and tiering changes. Some tests in rebal-all-nodes-migrate.t will need to be uncommented once the AFR and EC changes are merged. Change-Id: I5ce41600f5ba0e244ddfd986e2ba8fa23329ff0c BUG: 1366817 Signed-off-by: N Balachandran <nbalacha@redhat.com> Reviewed-on: https://review.gluster.org/17239 Smoke: Gluster Build System <jenkins@build.gluster.org> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Amar Tumballi <amarts@redhat.com> Reviewed-by: Jeff Darcy <jeff@pl.atyp.us> Reviewed-by: Shyamsundar Ranganathan <srangana@redhat.com>
Diffstat (limited to 'xlators/cluster/dht/src/dht-common.c')
-rw-r--r--xlators/cluster/dht/src/dht-common.c64
1 files changed, 61 insertions, 3 deletions
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index af6345ecc2a..8b4fd5cf37b 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -3001,6 +3001,8 @@ dht_vgetxattr_fill_and_set (dht_local_t *local, dict_t **dict, xlator_t *this,
out:
return ret;
}
+
+
int
dht_find_local_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xattr,
@@ -3016,6 +3018,11 @@ dht_find_local_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
char *next_uuid_str = NULL;
char *saveptr = NULL;
uuid_t node_uuid = {0,};
+ char *uuid_list_copy = NULL;
+ int count = 0;
+ int i = 0;
+ int index = 0;
+ int found = 0;
VALIDATE_OR_GOTO (frame, out);
@@ -3025,6 +3032,10 @@ dht_find_local_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
prev = cookie;
conf = this->private;
+ VALIDATE_OR_GOTO (conf->defrag, out);
+
+ gf_msg_debug (this->name, 0, "subvol %s returned", prev->name);
+
LOCK (&frame->lock);
{
this_call_cnt = --local->call_cnt;
@@ -3048,6 +3059,15 @@ dht_find_local_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto unlock;
}
+ /* As DHT will not know details of its child xlators
+ * we need to parse this twice to get the count first
+ * and allocate memory later.
+ */
+ count = 0;
+ index = conf->local_subvols_cnt;
+
+ uuid_list_copy = gf_strdup (uuid_list);
+
for (uuid_str = strtok_r (uuid_list, " ", &saveptr);
uuid_str;
uuid_str = next_uuid_str) {
@@ -3057,24 +3077,57 @@ dht_find_local_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
gf_msg (this->name, GF_LOG_ERROR, 0,
DHT_MSG_UUID_PARSE_ERROR,
"Failed to parse uuid"
- " failed for %s", prev->name);
+ " for %s", prev->name);
local->op_ret = -1;
local->op_errno = EINVAL;
goto unlock;
}
+ count++;
if (gf_uuid_compare (node_uuid, conf->defrag->node_uuid)) {
gf_msg_debug (this->name, 0, "subvol %s does not"
"belong to this node",
prev->name);
} else {
+
+ /* handle multiple bricks of the same replica
+ * on the same node */
+ if (found)
+ continue;
conf->local_subvols[(conf->local_subvols_cnt)++]
- = prev;
+ = prev;
+ found = 1;
gf_msg_debug (this->name, 0, "subvol %s belongs to"
" this node", prev->name);
- break;
}
}
+
+ if (!found) {
+ local->op_ret = 0;
+ goto unlock;
+ }
+
+ conf->local_nodeuuids[index].count = count;
+ conf->local_nodeuuids[index].uuids
+ = GF_CALLOC (count, sizeof (uuid_t), 1);
+
+ /* The node-uuids are guaranteed to be returned in the same
+ * order as the bricks
+ * A null node-uuid is returned for a brick that is down.
+ */
+
+ saveptr = NULL;
+ i = 0;
+
+ for (uuid_str = strtok_r (uuid_list_copy, " ", &saveptr);
+ uuid_str;
+ uuid_str = next_uuid_str) {
+
+ next_uuid_str = strtok_r (NULL, " ", &saveptr);
+ gf_uuid_parse (uuid_str,
+ conf->local_nodeuuids[index].uuids[i]);
+ i++;
+ }
}
local->op_ret = 0;
@@ -3092,8 +3145,13 @@ dht_find_local_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
unwind:
+
+ GF_FREE (conf->local_nodeuuids[index].uuids);
+ conf->local_nodeuuids[index].uuids = NULL;
+
DHT_STACK_UNWIND (getxattr, frame, -1, local->op_errno, NULL, xdata);
out:
+ GF_FREE (uuid_list_copy);
return 0;
}