From 63ed610617458ac4fd85cb83471df2222380f28a Mon Sep 17 00:00:00 2001 From: shishir gowda Date: Mon, 8 Jul 2013 18:48:55 +0530 Subject: cluster/dht: Ignore subvols with error in min-free-disk/inodes Currently when selecting a alternative subvolume when hashed subvol has exceeded min-free-disk/inodes, we do not check if layouts have errors (including decommissioning). This leads to data being written to those subvolumes, and in case of decommissioning, will lead to data loss. Change-Id: Ie0c6cf4a29d7c53d8a6d8a8c1bd595cf58a0012a BUG: 982919 Signed-off-by: shishir gowda Reviewed-on: http://review.gluster.org/5299 Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- xlators/cluster/dht/src/dht-common.c | 6 ++- xlators/cluster/dht/src/dht-common.h | 9 ++-- xlators/cluster/dht/src/dht-diskusage.c | 75 +++++++++++++++++++++++++++++---- xlators/cluster/dht/src/nufa.c | 6 ++- xlators/cluster/dht/src/switch.c | 6 ++- 5 files changed, 85 insertions(+), 17 deletions(-) diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index ec54e438e..e55c25438 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -3567,7 +3567,9 @@ dht_mknod (call_frame_t *frame, xlator_t *this, subvol, subvol->fops->mknod, loc, mode, rdev, umask, params); } else { - avail_subvol = dht_free_disk_available_subvol (this, subvol); + + avail_subvol = dht_free_disk_available_subvol (this, subvol, + local); if (avail_subvol != subvol) { /* Choose the minimum filled volume, and create the files there */ @@ -3988,7 +3990,7 @@ dht_create (call_frame_t *frame, xlator_t *this, } /* Choose the minimum filled volume, and create the files there */ - avail_subvol = dht_free_disk_available_subvol (this, subvol); + avail_subvol = dht_free_disk_available_subvol (this, subvol, local); if (avail_subvol != subvol) { local->params = dict_ref (params); local->flags = flags; diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index fb90e48cc..d00d56864 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -469,7 +469,8 @@ dht_layout_sort_volname (dht_layout_t *layout); int dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc); gf_boolean_t dht_is_subvol_filled (xlator_t *this, xlator_t *subvol); -xlator_t *dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol); +xlator_t *dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol, + dht_local_t *layout); int dht_get_du_info_for_subvol (xlator_t *this, int subvol_idx); int dht_layout_preset (xlator_t *this, xlator_t *subvol, inode_t *inode); @@ -764,9 +765,11 @@ dht_dir_has_layout (dict_t *xattr, char *name); gf_boolean_t dht_is_subvol_in_layout (dht_layout_t *layout, xlator_t *xlator); xlator_t * -dht_subvol_with_free_space_inodes (xlator_t *this, xlator_t *subvol); +dht_subvol_with_free_space_inodes (xlator_t *this, xlator_t *subvol, + dht_layout_t *layout); xlator_t * -dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol); +dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol, + dht_layout_t *layout); int dht_linkfile_attr_heal (call_frame_t *frame, xlator_t *this); diff --git a/xlators/cluster/dht/src/dht-diskusage.c b/xlators/cluster/dht/src/dht-diskusage.c index 0c87f4a64..fe3955ecb 100644 --- a/xlators/cluster/dht/src/dht-diskusage.c +++ b/xlators/cluster/dht/src/dht-diskusage.c @@ -251,25 +251,45 @@ dht_is_subvol_filled (xlator_t *this, xlator_t *subvol) /*Get the best subvolume to create the file in*/ xlator_t * -dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol) +dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol, + dht_local_t *local) { xlator_t *avail_subvol = NULL; dht_conf_t *conf = NULL; + dht_layout_t *layout = NULL; + loc_t *loc = NULL; conf = this->private; + if (!local) + goto out; + loc = &local->loc; + if (!local->layout) { + layout = dht_layout_get (this, loc->parent); + + if (!layout) { + gf_log (this->name, GF_LOG_DEBUG, + "layout missing path=%s parent=%s", + loc->path, uuid_utoa (loc->parent->gfid)); + goto out; + } + } else { + layout = dht_layout_ref (this, local->layout); + } - LOCK (&conf->subvolume_lock); + LOCK (&conf->subvolume_lock); { - avail_subvol = dht_subvol_with_free_space_inodes(this, subvol); + avail_subvol = dht_subvol_with_free_space_inodes(this, subvol, + layout); if(!avail_subvol) { avail_subvol = dht_subvol_maxspace_nonzeroinode(this, - subvol); + subvol, + layout); } } UNLOCK (&conf->subvolume_lock); - +out: if (!avail_subvol) { gf_log (this->name, GF_LOG_DEBUG, @@ -278,17 +298,42 @@ dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol) avail_subvol = subvol; } - + if (layout) + dht_layout_unref (this, layout); return avail_subvol; } +static inline +int32_t dht_subvol_has_err (xlator_t *this, dht_layout_t *layout) +{ + int ret = -1; + int i = 0; + + if (!this || !layout) + goto out; + + /* check if subvol has layout errors, before selecting it */ + for (i = 0; i < layout->cnt; i++) { + if (!strcmp (layout->list[i].xlator->name, this->name) && + (layout->list[i].err != 0)) { + ret = -1; + goto out; + } + } + ret = 0; +out: + return ret; +} + /*Get subvolume which has both space and inodes more than the min criteria*/ xlator_t * -dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol) +dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol, + dht_layout_t *layout) { int i = 0; double max = 0; double max_inodes = 0; + int ignore_subvol = 0; xlator_t *avail_subvol = NULL; dht_conf_t *conf = NULL; @@ -296,6 +341,12 @@ dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol) conf = this->private; for(i=0; i < conf->subvolume_cnt; i++) { + /* check if subvol has layout errors, before selecting it */ + ignore_subvol = dht_subvol_has_err (conf->subvolumes[i], + layout); + if (ignore_subvol) + continue; + if ((conf->disk_unit == 'p') && (conf->du_stats[i].avail_percent > conf->min_free_disk) && (conf->du_stats[i].avail_inodes > conf->min_free_inodes)) { @@ -325,10 +376,12 @@ dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol) /* Get subvol which has atleast one inode and maximum space */ xlator_t * -dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol) +dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol, + dht_layout_t *layout) { int i = 0; double max = 0; + int ignore_subvol = 0; xlator_t *avail_subvol = NULL; dht_conf_t *conf = NULL; @@ -336,6 +389,12 @@ dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol) conf = this->private; for (i = 0; i < conf->subvolume_cnt; i++) { + /* check if subvol has layout errors, before selecting it */ + ignore_subvol = dht_subvol_has_err (conf->subvolumes[i], + layout); + if (ignore_subvol) + continue; + if (conf->disk_unit == 'p') { if ((conf->du_stats[i].avail_percent > max) && (conf->du_stats[i].avail_inodes > 0 )) { diff --git a/xlators/cluster/dht/src/nufa.c b/xlators/cluster/dht/src/nufa.c index 9352ca55a..5fae52626 100644 --- a/xlators/cluster/dht/src/nufa.c +++ b/xlators/cluster/dht/src/nufa.c @@ -323,7 +323,8 @@ nufa_create (call_frame_t *frame, xlator_t *this, if (dht_is_subvol_filled (this, (xlator_t *)conf->private)) { avail_subvol = dht_free_disk_available_subvol (this, - (xlator_t *)conf->private); + (xlator_t *)conf->private, + local); } if (subvol != avail_subvol) { @@ -427,7 +428,8 @@ nufa_mknod (call_frame_t *frame, xlator_t *this, if (dht_is_subvol_filled (this, (xlator_t *)conf->private)) { avail_subvol = dht_free_disk_available_subvol (this, - (xlator_t *)conf->private); + (xlator_t *)conf->private, + local); } if (avail_subvol != subvol) { diff --git a/xlators/cluster/dht/src/switch.c b/xlators/cluster/dht/src/switch.c index 861012247..d3ea90ba8 100644 --- a/xlators/cluster/dht/src/switch.c +++ b/xlators/cluster/dht/src/switch.c @@ -437,7 +437,8 @@ switch_create (call_frame_t *frame, xlator_t *this, avail_subvol = get_switch_matching_subvol (loc->path, conf, subvol); if (dht_is_subvol_filled (this, avail_subvol)) { avail_subvol = - dht_free_disk_available_subvol (this, avail_subvol); + dht_free_disk_available_subvol (this, avail_subvol, + local); } if (subvol != avail_subvol) { @@ -536,7 +537,8 @@ switch_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, avail_subvol = get_switch_matching_subvol (loc->path, conf, subvol); if (dht_is_subvol_filled (this, avail_subvol)) { avail_subvol = - dht_free_disk_available_subvol (this, avail_subvol); + dht_free_disk_available_subvol (this, avail_subvol, + local); } if (avail_subvol != subvol) { -- cgit