diff options
Diffstat (limited to 'xlators/cluster/dht/src/dht-layout.c')
| -rw-r--r-- | xlators/cluster/dht/src/dht-layout.c | 216 |
1 files changed, 134 insertions, 82 deletions
diff --git a/xlators/cluster/dht/src/dht-layout.c b/xlators/cluster/dht/src/dht-layout.c index 3c8f63f5b..38e9970a7 100644 --- a/xlators/cluster/dht/src/dht-layout.c +++ b/xlators/cluster/dht/src/dht-layout.c @@ -1,20 +1,11 @@ /* - Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com> + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ #ifndef _CONFIG_H @@ -68,9 +59,7 @@ dht_layout_t * dht_layout_get (xlator_t *this, inode_t *inode) { dht_conf_t *conf = NULL; - uint64_t layout_int = 0; dht_layout_t *layout = NULL; - int ret = -1; conf = this->private; if (!conf) @@ -78,9 +67,8 @@ dht_layout_get (xlator_t *this, inode_t *inode) LOCK (&conf->layout_lock); { - ret = inode_ctx_get (inode, this, &layout_int); - if (ret == 0) { - layout = (dht_layout_t *) (unsigned long) layout_int; + dht_inode_ctx_layout_get (inode, this, &layout); + if (layout) { layout->ref++; } } @@ -98,7 +86,6 @@ dht_layout_set (xlator_t *this, inode_t *inode, dht_layout_t *layout) int oldret = -1; int ret = 0; dht_layout_t *old_layout; - uint64_t old_layout_int; conf = this->private; if (!conf) @@ -106,16 +93,13 @@ dht_layout_set (xlator_t *this, inode_t *inode, dht_layout_t *layout) LOCK (&conf->layout_lock); { - oldret = inode_ctx_get (inode, this, &old_layout_int); - + oldret = dht_inode_ctx_layout_get (inode, this, &old_layout); layout->ref++; - ret = inode_ctx_put (inode, this, (uint64_t) (unsigned long) - layout); + dht_inode_ctx_layout_set (inode, this, layout); } UNLOCK (&conf->layout_lock); - if (oldret == 0) { - old_layout = (dht_layout_t *) (unsigned long) old_layout_int; + if (!oldret) { dht_layout_unref (this, old_layout); } @@ -130,7 +114,7 @@ dht_layout_unref (xlator_t *this, dht_layout_t *layout) dht_conf_t *conf = NULL; int ref = 0; - if (layout->preset || !this->private) + if (!layout || layout->preset || !this->private) return; conf = this->private; @@ -174,9 +158,9 @@ dht_layout_search (xlator_t *this, dht_layout_t *layout, const char *name) int ret = 0; - ret = dht_hash_compute (layout->type, name, &hash); + ret = dht_hash_compute (this, layout->type, name, &hash); if (ret != 0) { - gf_log (this->name, GF_LOG_INFO, + gf_log (this->name, GF_LOG_WARNING, "hash computation failed for type=%d name=%s", layout->type, name); goto out; @@ -191,7 +175,7 @@ dht_layout_search (xlator_t *this, dht_layout_t *layout, const char *name) } if (!subvol) { - gf_log (this->name, GF_LOG_INFO, + gf_log (this->name, GF_LOG_WARNING, "no subvolume for hash (value) = %u", hash); } @@ -280,6 +264,9 @@ dht_disk_layout_extract (xlator_t *this, dht_layout_t *layout, if (disk_layout_p) *disk_layout_p = disk_layout; + else + GF_FREE (disk_layout); + ret = 0; out: @@ -309,14 +296,18 @@ dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout, cnt = ntoh32 (disk_layout[0]); if (cnt != 1) { - gf_log (this->name, GF_LOG_INFO, + gf_log (this->name, GF_LOG_ERROR, "disk layout has invalid count %d", cnt); return -1; } - switch (disk_layout[1]) { + type = ntoh32 (disk_layout[1]); + switch (type) { + case DHT_HASH_TYPE_DM_USER: + gf_log (this->name, GF_LOG_DEBUG, "found user-set layout"); + layout->type = type; + /* Fall through. */ case DHT_HASH_TYPE_DM: - type = ntoh32 (disk_layout[1]); break; default: gf_log (this->name, GF_LOG_CRITICAL, @@ -344,11 +335,12 @@ int dht_layout_merge (xlator_t *this, dht_layout_t *layout, xlator_t *subvol, int op_ret, int op_errno, dict_t *xattr) { - int i = 0; - int ret = -1; - int err = -1; - void *disk_layout_raw = NULL; - int disk_layout_len = 0; + int i = 0; + int ret = -1; + int err = -1; + void *disk_layout_raw = NULL; + int disk_layout_len = 0; + dht_conf_t *conf = this->private; if (op_ret != 0) { err = op_errno; @@ -369,12 +361,12 @@ dht_layout_merge (xlator_t *this, dht_layout_t *layout, xlator_t *subvol, if (xattr) { /* during lookup and not mkdir */ - ret = dict_get_ptr_and_len (xattr, "trusted.glusterfs.dht", + ret = dict_get_ptr_and_len (xattr, conf->xattr_name, &disk_layout_raw, &disk_layout_len); } if (ret != 0) { - layout->list[i].err = -1; + layout->list[i].err = 0; gf_log (this->name, GF_LOG_TRACE, "missing disk layout on %s. err = %d", subvol->name, err); @@ -385,7 +377,7 @@ dht_layout_merge (xlator_t *this, dht_layout_t *layout, xlator_t *subvol, ret = dht_disk_layout_merge (this, layout, i, disk_layout_raw, disk_layout_len); if (ret != 0) { - gf_log (this->name, GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_WARNING, "layout merge from subvolume %s failed", subvol->name); goto out; @@ -421,6 +413,22 @@ dht_layout_entry_swap (dht_layout_t *layout, int i, int j) layout->list[j].err = err_swap; } +void +dht_layout_range_swap (dht_layout_t *layout, int i, int j) +{ + uint32_t start_swap = 0; + uint32_t stop_swap = 0; + + start_swap = layout->list[i].start; + stop_swap = layout->list[i].stop; + + layout->list[i].start = layout->list[j].start; + layout->list[i].stop = layout->list[j].stop; + + layout->list[j].start = start_swap; + layout->list[j].stop = stop_swap; +} + int64_t dht_layout_entry_cmp_volname (dht_layout_t *layout, int i, int j) { @@ -428,17 +436,37 @@ dht_layout_entry_cmp_volname (dht_layout_t *layout, int i, int j) layout->list[j].xlator->name)); } + +gf_boolean_t +dht_is_subvol_in_layout (dht_layout_t *layout, xlator_t *xlator) +{ + int i = 0; + + for (i = 0; i < layout->cnt; i++) { + if (!strcmp (layout->list[i].xlator->name, xlator->name)) + return _gf_true; + } + return _gf_false; +} + int64_t dht_layout_entry_cmp (dht_layout_t *layout, int i, int j) { int64_t diff = 0; + /* swap zero'ed out layouts to front, if needed */ + if (!layout->list[j].start && !layout->list[j].stop) { + diff = (int64_t) layout->list[i].stop + - (int64_t) layout->list[j].stop; + goto out; + } if (layout->list[i].err || layout->list[j].err) diff = layout->list[i].err - layout->list[j].err; else diff = (int64_t) layout->list[i].start - (int64_t) layout->list[j].start; +out: return diff; } @@ -487,7 +515,8 @@ dht_layout_sort_volname (dht_layout_t *layout) int dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout, uint32_t *holes_p, uint32_t *overlaps_p, - uint32_t *missing_p, uint32_t *down_p, uint32_t *misc_p) + uint32_t *missing_p, uint32_t *down_p, uint32_t *misc_p, + uint32_t *no_space_p) { uint32_t overlaps = 0; uint32_t missing = 0; @@ -500,6 +529,7 @@ dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout, uint32_t prev_stop = 0; uint32_t last_stop = 0; char is_virgin = 1; + uint32_t no_space = 0; /* TODO: explain what is happening */ @@ -507,23 +537,30 @@ dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout, prev_stop = last_stop; for (i = 0; i < layout->cnt; i++) { - if (layout->list[i].err) { - switch (layout->list[i].err) { - case -1: - case ENOENT: - missing++; - break; - case ENOTCONN: - down++; - break; - case ENOSPC: - down++; - break; - default: - misc++; + switch (layout->list[i].err) { + case -1: + case ENOENT: + missing++; + continue; + case ENOTCONN: + down++; + continue; + case ENOSPC: + no_space++; + continue; + case 0: + /* if err == 0 and start == stop, then it is a non misc++; + * participating subvolume(spread-cnt). Then, do not + * check for anomalies. If start != stop, then treat it + * as misc err */ + if (layout->list[i].start == layout->list[i].stop) { + continue; } + break; + default: + misc++; continue; - } + } is_virgin = 0; @@ -556,6 +593,9 @@ dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout, if (misc_p) *misc_p = misc; + if (no_space_p) + *no_space_p = no_space; + return ret; } @@ -571,7 +611,6 @@ dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout) uint32_t down = 0; uint32_t misc = 0; - ret = dht_layout_sort (layout); if (ret == -1) { gf_log (this->name, GF_LOG_WARNING, @@ -581,7 +620,7 @@ dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout) ret = dht_layout_anomalies (this, loc, layout, &holes, &overlaps, - &missing, &down, &misc); + &missing, &down, &misc, NULL); if (ret == -1) { gf_log (this->name, GF_LOG_WARNING, "error while finding anomalies in %s -- not good news", @@ -599,43 +638,56 @@ dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout) "found anomalies in %s. holes=%d overlaps=%d", loc->path, holes, overlaps); } - ret = 1; + ret = -1; } for (i = 0; i < layout->cnt; i++) { - /* TODO During DHT selfheal rewrite (almost) find a better place to - * detect this - probably in dht_layout_anomalies() + /* TODO During DHT selfheal rewrite (almost) find a better place + * to detect this - probably in dht_layout_anomalies() */ if (layout->list[i].err > 0) { - gf_log (this->name, GF_LOG_DEBUG, - "path=%s err=%s on subvol=%s", - loc->path, strerror (layout->list[i].err), - (layout->list[i].xlator ? - layout->list[i].xlator->name : "<>")); - if (layout->list[i].err == ENOENT) - ret = 1; + gf_log_callingfn (this->name, GF_LOG_DEBUG, + "path=%s err=%s on subvol=%s", + loc->path, + strerror (layout->list[i].err), + (layout->list[i].xlator ? + layout->list[i].xlator->name + : "<>")); + if ((layout->list[i].err == ENOENT) && (ret >= 0)) { + ret++; + } } } + out: return ret; } +int +dht_dir_has_layout (dict_t *xattr, char *name) +{ + + void *disk_layout_raw = NULL; + + return dict_get_ptr (xattr, name, &disk_layout_raw); +} int dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout, xlator_t *subvol, loc_t *loc, dict_t *xattr) { - int idx = 0; - int pos = -1; - int ret = 0; - int err = 0; - int dict_ret = 0; - int32_t disk_layout[4]; - void *disk_layout_raw = NULL; - int32_t count = -1; - uint32_t start_off = -1; - uint32_t stop_off = -1; + int idx = 0; + int pos = -1; + int ret = 0; + int err = 0; + int dict_ret = 0; + int32_t disk_layout[4]; + void *disk_layout_raw = NULL; + int32_t count = -1; + uint32_t start_off = -1; + uint32_t stop_off = -1; + dht_conf_t *conf = this->private; for (idx = 0; idx < layout->cnt; idx++) { @@ -665,7 +717,7 @@ dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout, xlator_t *subvol, goto out; } - dict_ret = dict_get_ptr (xattr, "trusted.glusterfs.dht", + dict_ret = dict_get_ptr (xattr, conf->xattr_name, &disk_layout_raw); if (dict_ret < 0) { @@ -681,7 +733,7 @@ dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout, xlator_t *subvol, count = ntoh32 (disk_layout[0]); if (count != 1) { - gf_log (this->name, GF_LOG_INFO, + gf_log (this->name, GF_LOG_ERROR, "%s - disk layout has invalid count %d", loc->path, count); ret = -1; @@ -730,7 +782,7 @@ dht_layout_preset (xlator_t *this, xlator_t *subvol, inode_t *inode) LOCK (&conf->layout_lock); { - inode_ctx_put (inode, this, (uint64_t)(long)layout); + dht_inode_ctx_layout_set (inode, this, layout); } UNLOCK (&conf->layout_lock); |
