summaryrefslogtreecommitdiffstats
path: root/xlators
diff options
context:
space:
mode:
authorSusant Palai <spalai@redhat.com>2017-04-17 13:00:54 +0530
committerShyamsundar Ranganathan <srangana@redhat.com>2017-08-11 16:14:12 +0000
commit7daeae73d9edfa181f8e86394fb6dfc68b22440f (patch)
tree1561d3aa9703dd18c5d013839ab6ba318c632f7f /xlators
parentc90bffe08466baba369a23964a68b9ac59eb0394 (diff)
cluster/dht: Skip file migration if the subvol that meets min-free-disk
... criteria happens to be the same subvol containing data-file Rebalance need to figure out a new subvol in case the hashed subvol does not have enough space. In the process of figuring out the new subvol, we need to ignore the source subvol, otherwise it will lead to data loss. Test: Manual Ran the following sizeof /tmp/1: 1.5GB sizeof /brick/1: 16GB sizeof /tmp/2: 1.5GB <start> glusterd; gluster v create test1 vm1:/brick/1 vm1:/tmp/1; gluster v start test1; mount -t glusterfs vm1:test1 /mnt; for i in {1..2000} do dd if=/dev/zero of=/mnt/file$i bs=1KB count=1 &> /dev/null; done gluster v add-brick test1 vm1:/tmp/2 gluster v set test1 min-free-disk 12GB gluster v remove-brick test1 vm1:/tmp/1 star <end> file count and data were intact. > Change-Id: Ib8fc8467a3d48a7c12958824c4f0b88e160b86c1 > BUG: 1441508 > Signed-off-by: Susant Palai <spalai@redhat.com> > Reviewed-on: https://review.gluster.org/17064 > Smoke: Gluster Build System <jenkins@build.gluster.org> > NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> > CentOS-regression: Gluster Build System <jenkins@build.gluster.org> > Reviewed-by: Raghavendra G <rgowdapp@redhat.com> > Signed-off-by: Susant Palai <spalai@redhat.com> Change-Id: Ib8fc8467a3d48a7c12958824c4f0b88e160b86c1 BUG: 1473133 Signed-off-by: Susant Palai <spalai@redhat.com> Reviewed-on: https://review.gluster.org/17832 Smoke: Gluster Build System <jenkins@build.gluster.org> Tested-by: Shyamsundar Ranganathan <srangana@redhat.com> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Shyamsundar Ranganathan <srangana@redhat.com>
Diffstat (limited to 'xlators')
-rw-r--r--xlators/cluster/dht/src/dht-common.h2
-rw-r--r--xlators/cluster/dht/src/dht-diskusage.c19
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c96
3 files changed, 92 insertions, 25 deletions
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index 56918707449..66a70a555a9 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -1162,7 +1162,7 @@ dht_dir_has_layout (dict_t *xattr, char *name);
gf_boolean_t
dht_is_subvol_in_layout (dht_layout_t *layout, xlator_t *xlator);
xlator_t *
-dht_subvol_with_free_space_inodes (xlator_t *this, xlator_t *subvol,
+dht_subvol_with_free_space_inodes (xlator_t *this, xlator_t *subvol, xlator_t *ignore,
dht_layout_t *layout, uint64_t filesize);
xlator_t *
dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol,
diff --git a/xlators/cluster/dht/src/dht-diskusage.c b/xlators/cluster/dht/src/dht-diskusage.c
index 13698a9616d..05592154e30 100644
--- a/xlators/cluster/dht/src/dht-diskusage.c
+++ b/xlators/cluster/dht/src/dht-diskusage.c
@@ -315,7 +315,7 @@ dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol,
LOCK (&conf->subvolume_lock);
{
- avail_subvol = dht_subvol_with_free_space_inodes(this, subvol,
+ avail_subvol = dht_subvol_with_free_space_inodes(this, subvol, NULL,
layout, 0);
if(!avail_subvol)
{
@@ -340,8 +340,8 @@ out:
}
static inline
-int32_t dht_subvol_has_err (dht_conf_t *conf, xlator_t *this,
- dht_layout_t *layout)
+int32_t dht_subvol_has_err (dht_conf_t *conf, xlator_t *this, xlator_t *ignore,
+ dht_layout_t *layout)
{
int ret = -1;
int i = 0;
@@ -349,6 +349,13 @@ int32_t dht_subvol_has_err (dht_conf_t *conf, xlator_t *this,
if (!this || !layout)
goto out;
+ /* this check is meant for rebalance process. The source of the file
+ * should be ignored for space check */
+ if (this == ignore) {
+ goto out;
+ }
+
+
/* check if subvol has layout errors, before selecting it */
for (i = 0; i < layout->cnt; i++) {
if (!strcmp (layout->list[i].xlator->name, this->name) &&
@@ -376,7 +383,7 @@ out:
/*Get subvolume which has both space and inodes more than the min criteria*/
xlator_t *
-dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol,
+dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol, xlator_t *ignore,
dht_layout_t *layout, uint64_t filesize)
{
int i = 0;
@@ -398,7 +405,7 @@ dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol,
/* check if subvol has layout errors and also it is not a
* decommissioned brick, before selecting it */
ignore_subvol = dht_subvol_has_err (conf, conf->subvolumes[i],
- layout);
+ ignore, layout);
if (ignore_subvol)
continue;
@@ -463,7 +470,7 @@ dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol,
/* check if subvol has layout errors and also it is not a
* decommissioned brick, before selecting it*/
- ignore_subvol = dht_subvol_has_err (conf, conf->subvolumes[i],
+ ignore_subvol = dht_subvol_has_err (conf, conf->subvolumes[i], NULL,
layout);
if (ignore_subvol)
continue;
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index de7b5146c56..117146b599c 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -719,27 +719,30 @@ __dht_rebalance_create_dst_file (xlator_t *to, xlator_t *from, loc_t *loc, struc
loc->path, to->name, strerror (-ret));
*/
+ ret = syncop_fsetattr (to, fd, stbuf,
+ (GF_SET_ATTR_UID | GF_SET_ATTR_GID),
+ NULL, NULL, NULL, NULL);
+ if (ret < 0)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "chown failed for %s on %s (%s)",
+ loc->path, to->name, strerror (-ret));
+
/* Fallocate does not work for size 0, hence the check. Anyway we don't
* need to care about min-free-disk for 0 byte size file */
if (stbuf->ia_size > 0) {
ret = syncop_fallocate (to, fd, 0, 0, stbuf->ia_size, NULL,
NULL);
- if (ret < 0)
+ if (ret < 0) {
gf_msg (this->name, GF_LOG_ERROR, 0,
DHT_MSG_MIGRATE_FILE_FAILED,
"fallocate failed for %s on %s (%s)",
loc->path, to->name, strerror (-ret));
+ ret = -1;
+ goto out;
+ }
}
- ret = syncop_fsetattr (to, fd, stbuf,
- (GF_SET_ATTR_UID | GF_SET_ATTR_GID),
- NULL, NULL, NULL, NULL);
- if (ret < 0)
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "chown failed for %s on %s (%s)",
- loc->path, to->name, strerror (-ret));
-
/* success */
ret = 0;
@@ -761,7 +764,8 @@ out:
static int
__dht_check_free_space (xlator_t *to, xlator_t *from, loc_t *loc,
struct iatt *stbuf, int flag, dht_conf_t *conf,
- gf_boolean_t *target_changed, xlator_t **new_subvol)
+ gf_boolean_t *target_changed, xlator_t **new_subvol,
+ gf_boolean_t *ignore_failure)
{
struct statvfs src_statfs = {0,};
struct statvfs dst_statfs = {0,};
@@ -773,6 +777,7 @@ __dht_check_free_space (xlator_t *to, xlator_t *from, loc_t *loc,
uint64_t dst_statfs_blocks = 1;
double post_availspace = 0;
double post_percent = 0;
+ int i = 0;
this = THIS;
@@ -897,13 +902,27 @@ find_new_subvol:
goto out;
}
- *new_subvol = dht_subvol_with_free_space_inodes (this, to,
- layout, stbuf->ia_size);
- if (!(*new_subvol)) {
+ *new_subvol = dht_subvol_with_free_space_inodes (this, to, from, layout,
+ stbuf->ia_size);
+ if ((!(*new_subvol)) || (*new_subvol == from)) {
gf_msg (this->name, GF_LOG_WARNING, 0,
DHT_MSG_SUBVOL_INSUFF_SPACE, "Could not find any subvol"
- " with space accomodating the file. Consider adding "
- "bricks");
+ " with space accomodating the file - %s. Consider adding "
+ "bricks", loc->path);
+
+ /* For remove-brick case if the source is not one of the
+ * removed-brick, do not mark the error as failure */
+ if (conf->decommission_subvols_cnt) {
+ *ignore_failure = _gf_true;
+ for (i = 0; i < conf->decommission_subvols_cnt; i++) {
+ if (conf->decommissioned_bricks[i] == from) {
+ *ignore_failure = _gf_false;
+ break;
+ }
+ }
+ } else {
+ *ignore_failure = _gf_false;
+ }
*target_changed = _gf_false;
ret = -1;
@@ -1382,6 +1401,8 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
gf_boolean_t target_changed = _gf_false;
xlator_t *new_target = NULL;
xlator_t *old_target = NULL;
+ fd_t *linkto_fd = NULL;
+ gf_boolean_t ignore_failure = _gf_false;
defrag = conf->defrag;
if (!defrag)
@@ -1500,7 +1521,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
clean_dst = _gf_true;
ret = __dht_check_free_space (to, from, loc, &stbuf, flag, conf,
- &target_changed, &new_target);
+ &target_changed, &new_target, &ignore_failure);
if (target_changed) {
/* Can't handle for hardlinks. Marking this as failure */
if (flag == GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS || stbuf.ia_nlink > 1) {
@@ -1544,6 +1565,9 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
}
if (ret) {
+ if (ignore_failure)
+ ret = 0;
+
goto out;
}
@@ -1791,13 +1815,47 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
}
ret = syncop_setxattr (old_target, loc, dict, 0, NULL, NULL);
- if (ret) {
+ if (ret && -ret != ESTALE && -ret != ENOENT) {
gf_msg (this->name, GF_LOG_ERROR, 0,
DHT_MSG_MIGRATE_FILE_FAILED,
"failed to set xattr on %s in %s (%s)",
loc->path, old_target->name, strerror (-ret));
ret = -1;
goto out;
+ } else if (-ret == ESTALE || -ret == ENOENT) {
+ /* The failure ESTALE indicates that the linkto
+ * file on the hashed subvol might have been deleted.
+ * In this case will create a linkto file with new target
+ * as linkto xattr value*/
+ linkto_fd = fd_create (loc->inode, DHT_REBALANCE_PID);
+ if (!linkto_fd) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: fd create failed (%s)",
+ loc->path, strerror (errno));
+ ret = -1;
+ goto out;
+ }
+ ret = syncop_create (old_target, loc, O_RDWR,
+ DHT_LINKFILE_MODE, linkto_fd,
+ NULL, dict, NULL);
+ if (ret != 0 && -ret != EEXIST && -ret != ESTALE) {
+ ret = -1;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "failed to create linkto file on %s in %s (%s)",
+ loc->path, old_target->name, strerror (-ret));
+ goto out;
+ } else if (ret == 0) {
+ ret = syncop_fsetattr (old_target, linkto_fd, &stbuf,
+ (GF_SET_ATTR_UID | GF_SET_ATTR_GID),
+ NULL, NULL, NULL, NULL);
+ if (ret < 0)
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "chown failed for %s on %s (%s)",
+ loc->path, old_target->name, strerror (-ret));
+ }
}
}
}
@@ -2043,6 +2101,8 @@ out:
syncop_close (dst_fd);
if (src_fd)
syncop_close (src_fd);
+ if (linkto_fd)
+ syncop_close (linkto_fd);
loc_wipe (&tmp_loc);