summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/dht/src/dht-layout.c
diff options
context:
space:
mode:
authorJeff Darcy <jdarcy@redhat.com>2014-05-07 19:31:30 +0000
committerVijay Bellur <vbellur@redhat.com>2015-05-10 06:17:22 -0700
commit4eaaf5188fe24a4707dc2cf2934525083cf8e64f (patch)
tree119e440d7ba0bbd85a929294915ef54085b74ffb /xlators/cluster/dht/src/dht-layout.c
parent4b7914384e2613e5ec7c618071cb89187ed6f870 (diff)
dht: make lookup-unhashed=auto do something actually useful
The key concept here is to determine whether a directory is "clean" by comparing its last-known-good topology to the current one for the volume. These are stored as "commit hashes" on the directory and the volume root respectively. The volume's commit hash changes whenever a brick is added or removed, and a fix-layout is done. A directory's commit hash changes only when a full rebalance (not just fix-layout) is done on it. If all bricks are present and have a directory commit hash that matches the volume commit hash, then we can assume that every file is in its "proper" place. Therefore, if we look for a file in that proper place and don't find it, we can assume it's not on any other subvolume and *safely* skip the global (broadcast to all) lookup. Change-Id: Id6ce4593ba1f7daffa74cfab591cb45960629ae3 BUG: 1219637 Signed-off-by: Jeff Darcy <jdarcy@redhat.com> Signed-off-by: Shyam <srangana@redhat.com> Reviewed-on: http://review.gluster.org/7702 Tested-by: Gluster Build System <jenkins@build.gluster.com> Tested-by: NetBSD Build System Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'xlators/cluster/dht/src/dht-layout.c')
-rw-r--r--xlators/cluster/dht/src/dht-layout.c78
1 files changed, 41 insertions, 37 deletions
diff --git a/xlators/cluster/dht/src/dht-layout.c b/xlators/cluster/dht/src/dht-layout.c
index 6ef28472307..da8f13fc428 100644
--- a/xlators/cluster/dht/src/dht-layout.c
+++ b/xlators/cluster/dht/src/dht-layout.c
@@ -267,7 +267,7 @@ dht_disk_layout_extract (xlator_t *this, dht_layout_t *layout,
goto out;
}
- disk_layout[0] = hton32 (1);
+ disk_layout[0] = hton32 (layout->list[pos].commit_hash);
disk_layout[1] = hton32 (layout->type);
disk_layout[2] = hton32 (layout->list[pos].start);
disk_layout[3] = hton32 (layout->list[pos].stop);
@@ -288,10 +288,10 @@ int
dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout,
int pos, void *disk_layout_raw, int disk_layout_len)
{
- int cnt = 0;
int type = 0;
int start_off = 0;
int stop_off = 0;
+ int commit_hash = 0;
int disk_layout[4];
if (!disk_layout_raw) {
@@ -305,14 +305,6 @@ dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout,
memcpy (disk_layout, disk_layout_raw, disk_layout_len);
- cnt = ntoh32 (disk_layout[0]);
- if (cnt != 1) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_INVALID_DISK_LAYOUT,
- "Invalid disk layout: Invalid count %d", cnt);
- return -1;
- }
-
type = ntoh32 (disk_layout[1]);
switch (type) {
case DHT_HASH_TYPE_DM_USER:
@@ -330,21 +322,22 @@ dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout,
return -1;
}
+ commit_hash = ntoh32 (disk_layout[0]);
start_off = ntoh32 (disk_layout[2]);
stop_off = ntoh32 (disk_layout[3]);
+ layout->list[pos].commit_hash = commit_hash;
layout->list[pos].start = start_off;
layout->list[pos].stop = stop_off;
gf_msg_trace (this->name, 0,
- "merged to layout: %u - %u (type %d) from %s",
- start_off, stop_off, type,
+ "merged to layout: %u - %u (type %d, hash %d) from %s",
+ start_off, stop_off, commit_hash, type,
layout->list[pos].xlator->name);
return 0;
}
-
int
dht_layout_merge (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
int op_ret, int op_errno, dict_t *xattr)
@@ -397,6 +390,13 @@ dht_layout_merge (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
subvol->name);
goto out;
}
+
+ if (layout->commit_hash == 0) {
+ layout->commit_hash = layout->list[i].commit_hash;
+ } else if (layout->commit_hash != layout->list[i].commit_hash) {
+ layout->commit_hash = DHT_LAYOUT_HASH_INVALID;
+ }
+
layout->list[i].err = 0;
out:
@@ -409,6 +409,7 @@ dht_layout_entry_swap (dht_layout_t *layout, int i, int j)
{
uint32_t start_swap = 0;
uint32_t stop_swap = 0;
+ uint32_t commit_hash_swap = 0;
xlator_t *xlator_swap = 0;
int err_swap = 0;
@@ -416,16 +417,19 @@ dht_layout_entry_swap (dht_layout_t *layout, int i, int j)
stop_swap = layout->list[i].stop;
xlator_swap = layout->list[i].xlator;
err_swap = layout->list[i].err;
+ commit_hash_swap = layout->list[i].commit_hash;
layout->list[i].start = layout->list[j].start;
layout->list[i].stop = layout->list[j].stop;
layout->list[i].xlator = layout->list[j].xlator;
layout->list[i].err = layout->list[j].err;
+ layout->list[i].commit_hash = layout->list[j].commit_hash;
layout->list[j].start = start_swap;
layout->list[j].stop = stop_swap;
layout->list[j].xlator = xlator_swap;
layout->list[j].err = err_swap;
+ layout->list[j].commit_hash = commit_hash_swap;
}
void
@@ -728,9 +732,9 @@ dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
int dict_ret = 0;
int32_t disk_layout[4];
void *disk_layout_raw = NULL;
- int32_t count = -1;
uint32_t start_off = -1;
uint32_t stop_off = -1;
+ uint32_t commit_hash = -1;
dht_conf_t *conf = this->private;
char gfid[GF_UUID_BUF_SIZE] = {0};
@@ -795,36 +799,21 @@ dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
memcpy (disk_layout, disk_layout_raw, sizeof (disk_layout));
- count = ntoh32 (disk_layout[0]);
- if (count != 1) {
- if (loc) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_INVALID_DISK_LAYOUT,
- "Invalid disk layout: invalid count %d,"
- "path = %s, gfid = %s ",
- count, loc->path, gfid);
- } else {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_INVALID_DISK_LAYOUT,
- "Invalid disk layout: invalid count %d,"
- "path not found, gfid = %s ",
- count, gfid);
- }
- ret = -1;
- goto out;
- }
-
start_off = ntoh32 (disk_layout[2]);
stop_off = ntoh32 (disk_layout[3]);
+ commit_hash = ntoh32 (disk_layout[0]);
if ((layout->list[pos].start != start_off)
- || (layout->list[pos].stop != stop_off)) {
+ || (layout->list[pos].stop != stop_off)
+ || (layout->list[pos].commit_hash != commit_hash)) {
gf_log (this->name, GF_LOG_INFO,
- "subvol: %s; inode layout - %"PRIu32" - %"PRIu32"; "
- "disk layout - %"PRIu32" - %"PRIu32,
+ "subvol: %s; inode layout - %"PRIu32" - %"PRIu32
+ " - %"PRIu32"; "
+ "disk layout - %"PRIu32" - %"PRIu32" - %"PRIu32,
layout->list[pos].xlator->name,
layout->list[pos].start, layout->list[pos].stop,
- start_off, stop_off);
+ layout->list[pos].commit_hash,
+ start_off, stop_off, commit_hash);
ret = 1;
} else {
ret = 0;
@@ -864,3 +853,18 @@ dht_layout_preset (xlator_t *this, xlator_t *subvol, inode_t *inode)
out:
return ret;
}
+
+int
+dht_layout_index_for_subvol (dht_layout_t *layout, xlator_t *subvol)
+{
+ int i = 0, ret = -1;
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].xlator == subvol) {
+ ret = i;
+ break;
+ }
+ }
+
+ return ret;
+}