diff options
Diffstat (limited to 'xlators/cluster/dht/src/dht-rebalance.c')
| -rw-r--r-- | xlators/cluster/dht/src/dht-rebalance.c | 491 |
1 files changed, 352 insertions, 139 deletions
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index a2e96a1b2..4f78f5203 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -16,6 +16,8 @@ #include "dht-common.h" #include "xlator.h" +#include <signal.h> +#include <fnmatch.h> #define GF_DISK_SECTOR_SIZE 512 #define DHT_REBALANCE_PID 4242 /* Change it if required */ @@ -56,7 +58,8 @@ dht_write_with_holes (xlator_t *to, fd_t *fd, struct iovec *vec, int count, if (ret < 0) { gf_log (THIS->name, GF_LOG_WARNING, "failed to write (%s)", - strerror (errno)); + strerror (-ret)); + ret = -1; goto out; } @@ -74,7 +77,8 @@ dht_write_with_holes (xlator_t *to, fd_t *fd, struct iovec *vec, int count, /* 'path' will be logged in calling function */ gf_log (THIS->name, GF_LOG_WARNING, "failed to write (%s)", - strerror (errno)); + strerror (-ret)); + ret = -1; goto out; } } @@ -90,6 +94,41 @@ out: } +/* + return values: + -1 : failure + -2 : success + +Hard link migration is carried out in three stages. + +(Say there are n hardlinks) +Stage 1: Setting the new hashed subvol information on the 1st hardlink + encountered (linkto setxattr) + +Stage 2: Creating hardlinks on new hashed subvol for the 2nd to (n-1)th + hardlink + +Stage 3: Physical migration of the data file for nth hardlink + +Why to deem "-2" as success and not "0": + + dht_migrate_file expects return value "0" from _is_file_migratable if +the file has to be migrated. + + _is_file_migratable returns zero only when it is called with the +flag "GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS". + + gf_defrag_handle_hardlink calls dht_migrate_file for physical migration +of the data file with the flag "GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS" + +Hence, gf_defrag_handle_hardlink returning "0" for success will force +"dht_migrate_file" to migrate each of the hardlink which is not intended. + +For each of the three stage mentioned above "-2" will be returned and will +be converted to "0" in dht_migrate_file. + +*/ + int32_t gf_defrag_handle_hardlink (xlator_t *this, loc_t *loc, dict_t *xattrs, struct iatt *stbuf) @@ -101,12 +140,16 @@ gf_defrag_handle_hardlink (xlator_t *this, loc_t *loc, dict_t *xattrs, data_t *data = NULL; struct iatt iatt = {0,}; int32_t op_errno = 0; + dht_conf_t *conf = NULL; GF_VALIDATE_OR_GOTO ("defrag", loc, out); GF_VALIDATE_OR_GOTO ("defrag", loc->name, out); GF_VALIDATE_OR_GOTO ("defrag", stbuf, out); GF_VALIDATE_OR_GOTO ("defrag", this, out); GF_VALIDATE_OR_GOTO ("defrag", xattrs, out); + GF_VALIDATE_OR_GOTO ("defrag", this->private, out); + + conf = this->private; if (uuid_is_null (loc->pargfid)) { gf_log ("", GF_LOG_ERROR, "loc->pargfid is NULL for " @@ -137,10 +180,10 @@ gf_defrag_handle_hardlink (xlator_t *this, loc_t *loc, dict_t *xattrs, gf_log (this->name, GF_LOG_INFO, "Attempting to migrate hardlink %s " "with gfid %s from %s -> %s", loc->name, uuid_utoa (loc->gfid), cached_subvol->name, hashed_subvol->name); - data = dict_get (xattrs, DHT_LINKFILE_KEY); + data = dict_get (xattrs, conf->link_xattr_name); /* set linkto on cached -> hashed if not present, else link it */ if (!data) { - ret = dict_set_str (xattrs, DHT_LINKFILE_KEY, + ret = dict_set_str (xattrs, conf->link_xattr_name, hashed_subvol->name); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Failed to set " @@ -152,9 +195,11 @@ gf_defrag_handle_hardlink (xlator_t *this, loc_t *loc, dict_t *xattrs, if (ret) { gf_log (this->name, GF_LOG_ERROR, "Linkto setxattr " "failed %s -> %s (%s)", cached_subvol->name, - loc->name, strerror (errno)); + loc->name, strerror (-ret)); + ret = -1; goto out; } + ret = -2; goto out; } else { linkto_subvol = dht_linkfile_subvol (this, NULL, NULL, xattrs); @@ -167,7 +212,8 @@ gf_defrag_handle_hardlink (xlator_t *this, loc_t *loc, dict_t *xattrs, ret = syncop_link (hashed_subvol, loc, loc); if (ret) { - op_errno = errno; + op_errno = -ret; + ret = -1; gf_log (this->name, GF_LOG_ERROR, "link of %s -> %s" " failed on subvol %s (%s)", loc->name, uuid_utoa(loc->gfid), @@ -179,7 +225,8 @@ gf_defrag_handle_hardlink (xlator_t *this, loc_t *loc, dict_t *xattrs, ret = syncop_lookup (hashed_subvol, loc, NULL, &iatt, NULL, NULL); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Failed lookup %s on %s (%s)" - , loc->name, hashed_subvol->name, strerror (errno)); + , loc->name, hashed_subvol->name, strerror (-ret)); + ret = -1; goto out; } @@ -189,12 +236,19 @@ gf_defrag_handle_hardlink (xlator_t *this, loc_t *loc, dict_t *xattrs, if (ret) goto out; } - ret = 0; + ret = -2; out: return ret; } - +/* + return values + 0 : File will be migrated + -2 : File will not be migrated + (This is the return value from gf_defrag_handle_hardlink. Checkout + gf_defrag_handle_hardlink for description of "returning -2") + -1 : failure +*/ static inline int __is_file_migratable (xlator_t *this, loc_t *loc, struct iatt *stbuf, dict_t *xattrs, int flags) @@ -217,7 +271,12 @@ __is_file_migratable (xlator_t *this, loc_t *loc, if (flags == GF_DHT_MIGRATE_HARDLINK) { ret = gf_defrag_handle_hardlink (this, loc, xattrs, stbuf); - if (ret) { + + /* + Returning zero will force the file to be remigrated. + Checkout gf_defrag_handle_hardlink for more information. + */ + if (ret && ret != -2) { gf_log (this->name, GF_LOG_WARNING, "%s: failed to migrate file with link", loc->path); @@ -225,8 +284,8 @@ __is_file_migratable (xlator_t *this, loc_t *loc, } else { gf_log (this->name, GF_LOG_WARNING, "%s: file has hardlinks", loc->path); + ret = -ENOTSUP; } - ret = ENOTSUP; goto out; } @@ -238,14 +297,16 @@ out: static inline int __dht_rebalance_create_dst_file (xlator_t *to, xlator_t *from, loc_t *loc, struct iatt *stbuf, - dict_t *dict, fd_t **dst_fd) + dict_t *dict, fd_t **dst_fd, dict_t *xattr) { - xlator_t *this = NULL; - int ret = -1; - fd_t *fd = NULL; - struct iatt new_stbuf = {0,}; + xlator_t *this = NULL; + int ret = -1; + fd_t *fd = NULL; + struct iatt new_stbuf = {0,}; + dht_conf_t *conf = NULL; this = THIS; + conf = this->private; ret = dict_set_static_bin (dict, "gfid-req", stbuf->ia_gfid, 16); if (ret) { @@ -254,7 +315,7 @@ __dht_rebalance_create_dst_file (xlator_t *to, xlator_t *from, loc_t *loc, struc goto out; } - ret = dict_set_str (dict, DHT_LINKFILE_KEY, from->name); + ret = dict_set_str (dict, conf->link_xattr_name, from->name); if (ret) { gf_log (this->name, GF_LOG_ERROR, "%s: failed to set gfid in dict for create", loc->path); @@ -281,32 +342,46 @@ __dht_rebalance_create_dst_file (xlator_t *to, xlator_t *from, loc_t *loc, struc goto out; } } - if ((ret == -1) && (errno != ENOENT)) { + if ((ret < 0) && (-ret != ENOENT)) { /* File exists in destination, but not accessible */ gf_log (THIS->name, GF_LOG_WARNING, "%s: failed to lookup file (%s)", - loc->path, strerror (errno)); + loc->path, strerror (-ret)); + ret = -1; goto out; } /* Create the destination with LINKFILE mode, and linkto xattr, if the linkfile already exists, it will just open the file */ ret = syncop_create (to, loc, O_RDWR, DHT_LINKFILE_MODE, fd, - dict); + dict, &new_stbuf); if (ret < 0) { gf_log (this->name, GF_LOG_ERROR, "failed to create %s on %s (%s)", - loc->path, to->name, strerror (errno)); + loc->path, to->name, strerror (-ret)); + ret = -1; goto out; } + ret = syncop_fsetxattr (to, fd, xattr, 0); + if (ret < 0) + gf_log (this->name, GF_LOG_WARNING, + "%s: failed to set xattr on %s (%s)", + loc->path, to->name, strerror (-ret)); + + ret = syncop_ftruncate (to, fd, stbuf->ia_size); + if (ret < 0) + gf_log (this->name, GF_LOG_ERROR, + "ftruncate failed for %s on %s (%s)", + loc->path, to->name, strerror (-ret)); + ret = syncop_fsetattr (to, fd, stbuf, (GF_SET_ATTR_UID | GF_SET_ATTR_GID), NULL, NULL); if (ret < 0) gf_log (this->name, GF_LOG_ERROR, "chown failed for %s on %s (%s)", - loc->path, to->name, strerror (errno)); + loc->path, to->name, strerror (-ret)); if (dst_fd) *dst_fd = fd; @@ -327,13 +402,17 @@ __dht_check_free_space (xlator_t *to, xlator_t *from, loc_t *loc, int ret = -1; xlator_t *this = NULL; + uint64_t src_statfs_blocks = 1; + uint64_t dst_statfs_blocks = 1; + this = THIS; ret = syncop_statfs (from, loc, &src_statfs); if (ret) { gf_log (this->name, GF_LOG_ERROR, "failed to get statfs of %s on %s (%s)", - loc->path, from->name, strerror (errno)); + loc->path, from->name, strerror (-ret)); + ret = -1; goto out; } @@ -341,7 +420,8 @@ __dht_check_free_space (xlator_t *to, xlator_t *from, loc_t *loc, if (ret) { gf_log (this->name, GF_LOG_ERROR, "failed to get statfs of %s on %s (%s)", - loc->path, to->name, strerror (errno)); + loc->path, to->name, strerror (-ret)); + ret = -1; goto out; } @@ -350,22 +430,34 @@ __dht_check_free_space (xlator_t *to, xlator_t *from, loc_t *loc, if (flag != GF_DHT_MIGRATE_DATA) goto check_avail_space; - if (((dst_statfs.f_bavail * - dst_statfs.f_bsize) / GF_DISK_SECTOR_SIZE) < - (((src_statfs.f_bavail * src_statfs.f_bsize) / - GF_DISK_SECTOR_SIZE) - stbuf->ia_blocks)) { - gf_log (this->name, GF_LOG_WARNING, - "data movement attempted from node (%s) with" - " higher disk space to a node (%s) with " - "lesser disk space (%s)", from->name, - to->name, loc->path); - - /* this is not a 'failure', but we don't want to - consider this as 'success' too :-/ */ - ret = 1; - goto out; + /* Check: + During rebalance `migrate-data` - Destination subvol experiences + a `reduction` in 'blocks' of free space, at the same time source + subvol gains certain 'blocks' of free space. A valid check is + necessary here to avoid errorneous move to destination where + the space could be scantily available. + */ + if (stbuf) { + dst_statfs_blocks = ((dst_statfs.f_bavail * + dst_statfs.f_bsize) / + GF_DISK_SECTOR_SIZE); + src_statfs_blocks = ((src_statfs.f_bavail * + src_statfs.f_bsize) / + GF_DISK_SECTOR_SIZE); + if ((dst_statfs_blocks - stbuf->ia_blocks) < + (src_statfs_blocks + stbuf->ia_blocks)) { + gf_log (this->name, GF_LOG_WARNING, + "data movement attempted from node (%s) with" + " higher disk space to a node (%s) with " + "lesser disk space (%s)", from->name, + to->name, loc->path); + + /* this is not a 'failure', but we don't want to + consider this as 'success' too :-/ */ + ret = 1; + goto out; + } } - check_avail_space: if (((dst_statfs.f_bavail * dst_statfs.f_bsize) / GF_DISK_SECTOR_SIZE) < stbuf->ia_blocks) { @@ -428,6 +520,8 @@ __dht_rebalance_migrate_data (xlator_t *from, xlator_t *to, fd_t *src, fd_t *dst if (ret >= 0) ret = 0; + else + ret = -1; return ret; } @@ -442,8 +536,10 @@ __dht_rebalance_open_src_file (xlator_t *from, xlator_t *to, loc_t *loc, dict_t *dict = NULL; xlator_t *this = NULL; struct iatt iatt = {0,}; + dht_conf_t *conf = NULL; this = THIS; + conf = this->private; fd = fd_create (loc->inode, DHT_REBALANCE_PID); if (!fd) { @@ -454,10 +550,11 @@ __dht_rebalance_open_src_file (xlator_t *from, xlator_t *to, loc_t *loc, } ret = syncop_open (from, loc, O_RDWR, fd); - if (ret == -1) { + if (ret < 0) { gf_log (this->name, GF_LOG_ERROR, "failed to open file %s on %s (%s)", - loc->path, from->name, strerror (errno)); + loc->path, from->name, strerror (-ret)); + ret = -1; goto out; } @@ -466,7 +563,7 @@ __dht_rebalance_open_src_file (xlator_t *from, xlator_t *to, loc_t *loc, if (!dict) goto out; - ret = dict_set_str (dict, DHT_LINKFILE_KEY, to->name); + ret = dict_set_str (dict, conf->link_xattr_name, to->name); if (ret) { gf_log (this->name, GF_LOG_ERROR, "failed to set xattr in dict for %s (linkto:%s)", @@ -480,7 +577,8 @@ __dht_rebalance_open_src_file (xlator_t *from, xlator_t *to, loc_t *loc, if (ret) { gf_log (this->name, GF_LOG_ERROR, "failed to set xattr on %s in %s (%s)", - loc->path, from->name, strerror (errno)); + loc->path, from->name, strerror (-ret)); + ret = -1; goto out; } @@ -494,7 +592,8 @@ __dht_rebalance_open_src_file (xlator_t *from, xlator_t *to, loc_t *loc, if (ret) { gf_log (this->name, GF_LOG_ERROR, "failed to set mode on %s in %s (%s)", - loc->path, from->name, strerror (errno)); + loc->path, from->name, strerror (-ret)); + ret = -1; goto out; } @@ -519,12 +618,13 @@ migrate_special_files (xlator_t *this, xlator_t *from, xlator_t *to, loc_t *loc, dict_t *dict = NULL; char *link = NULL; struct iatt stbuf = {0,}; + dht_conf_t *conf = this->private; dict = dict_new (); if (!dict) goto out; - ret = dict_set_int32 (dict, DHT_LINKFILE_KEY, 256); + ret = dict_set_int32 (dict, conf->link_xattr_name, 256); if (ret) { gf_log (this->name, GF_LOG_ERROR, "%s: failed to set 'linkto' key in dict", loc->path); @@ -533,19 +633,21 @@ migrate_special_files (xlator_t *this, xlator_t *from, xlator_t *to, loc_t *loc, /* check in the destination if the file is link file */ ret = syncop_lookup (to, loc, dict, &stbuf, &rsp_dict, NULL); - if ((ret == -1) && (errno != ENOENT)) { + if ((ret < 0) && (-ret != ENOENT)) { gf_log (this->name, GF_LOG_WARNING, "%s: lookup failed (%s)", - loc->path, strerror (errno)); + loc->path, strerror (-ret)); + ret = -1; goto out; } /* we no more require this key */ - dict_del (dict, DHT_LINKFILE_KEY); + dict_del (dict, conf->link_xattr_name); /* file exists in target node, only if it is 'linkfile' its valid, otherwise, error out */ if (!ret) { - if (!check_is_linkfile (loc->inode, &stbuf, rsp_dict)) { + if (!check_is_linkfile (loc->inode, &stbuf, rsp_dict, + conf->link_xattr_name)) { gf_log (this->name, GF_LOG_WARNING, "%s: file exists in destination", loc->path); ret = -1; @@ -557,7 +659,8 @@ migrate_special_files (xlator_t *this, xlator_t *from, xlator_t *to, loc_t *loc, if (ret) { gf_log (this->name, GF_LOG_WARNING, "%s: failed to delete the linkfile (%s)", - loc->path, strerror (errno)); + loc->path, strerror (-ret)); + ret = -1; goto out; } } @@ -577,15 +680,17 @@ migrate_special_files (xlator_t *this, xlator_t *from, xlator_t *to, loc_t *loc, if (ret < 0) { gf_log (this->name, GF_LOG_WARNING, "%s: readlink on symlink failed (%s)", - loc->path, strerror (errno)); + loc->path, strerror (-ret)); + ret = -1; goto out; } - ret = syncop_symlink (to, loc, link, dict); + ret = syncop_symlink (to, loc, link, dict, 0); if (ret) { gf_log (this->name, GF_LOG_WARNING, "%s: creating symlink failed (%s)", - loc->path, strerror (errno)); + loc->path, strerror (-ret)); + ret = -1; goto out; } @@ -595,18 +700,31 @@ migrate_special_files (xlator_t *this, xlator_t *from, xlator_t *to, loc_t *loc, ret = syncop_mknod (to, loc, st_mode_from_ia (buf->ia_prot, buf->ia_type), makedev (ia_major (buf->ia_rdev), - ia_minor (buf->ia_rdev)), dict); + ia_minor (buf->ia_rdev)), dict, 0); if (ret) { gf_log (this->name, GF_LOG_WARNING, "%s: mknod failed (%s)", - loc->path, strerror (errno)); + loc->path, strerror (-ret)); + ret = -1; goto out; } done: + ret = syncop_setattr (to, loc, buf, + (GF_SET_ATTR_UID | GF_SET_ATTR_GID | + GF_SET_ATTR_MODE), NULL, NULL); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "%s: failed to perform setattr on %s (%s)", + loc->path, to->name, strerror (-ret)); + ret = -1; + } + ret = syncop_unlink (from, loc); - if (ret) + if (ret) { gf_log (this->name, GF_LOG_WARNING, "%s: unlink failed (%s)", - loc->path, strerror (errno)); + loc->path, strerror (-ret)); + ret = -1; + } out: if (dict) @@ -640,6 +758,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, dict_t *xattr = NULL; dict_t *xattr_rsp = NULL; int file_has_holes = 0; + dht_conf_t *conf = this->private; gf_log (this->name, GF_LOG_INFO, "%s: attempting to move from %s to %s", loc->path, from->name, to->name); @@ -648,7 +767,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, if (!dict) goto out; - ret = dict_set_int32 (dict, DHT_LINKFILE_KEY, 256); + ret = dict_set_int32 (dict, conf->link_xattr_name, 256); if (ret) { gf_log (this->name, GF_LOG_ERROR, "%s: failed to set 'linkto' key in dict", loc->path); @@ -659,21 +778,24 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, ret = syncop_lookup (from, loc, dict, &stbuf, &xattr_rsp, NULL); if (ret) { gf_log (this->name, GF_LOG_ERROR, "%s: lookup failed on %s (%s)", - loc->path, from->name, strerror (errno)); + loc->path, from->name, strerror (-ret)); + ret = -1; goto out; } /* we no more require this key */ - dict_del (dict, DHT_LINKFILE_KEY); + dict_del (dict, conf->link_xattr_name); /* preserve source mode, so set the same to the destination */ src_ia_prot = stbuf.ia_prot; /* Check if file can be migrated */ ret = __is_file_migratable (this, loc, &stbuf, xattr_rsp, flag); - if (ret) + if (ret) { + if (ret == -2) + ret = 0; goto out; - + } /* Take care of the special files */ if (!IA_ISREG (stbuf.ia_type)) { /* Special files */ @@ -681,9 +803,18 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, goto out; } + /* TODO: move all xattr related operations to fd based operations */ + ret = syncop_listxattr (from, loc, &xattr); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "%s: failed to get xattr from %s (%s)", + loc->path, from->name, strerror (-ret)); + ret = -1; + } + /* create the destination, with required modes/xattr */ ret = __dht_rebalance_create_dst_file (to, from, loc, &stbuf, - dict, &dst_fd); + dict, &dst_fd, xattr); if (ret) goto out; @@ -700,10 +831,12 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, goto out; } + ret = syncop_fstat (from, src_fd, &stbuf); if (ret) { gf_log (this->name, GF_LOG_ERROR, "failed to lookup %s on %s (%s)", - loc->path, from->name, strerror (errno)); + loc->path, from->name, strerror (-ret)); + ret = -1; goto out; } @@ -722,33 +855,22 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, if (ret) { gf_log (this->name, GF_LOG_ERROR, "%s: failed to reset target size back to 0 (%s)", - loc->path, strerror (errno)); + loc->path, strerror (-ret)); } ret = -1; goto out; } - /* TODO: move all xattr related operations to fd based operations */ - ret = syncop_listxattr (from, loc, &xattr); - if (ret == -1) - gf_log (this->name, GF_LOG_WARNING, - "%s: failed to get xattr from %s (%s)", - loc->path, from->name, strerror (errno)); - - ret = syncop_setxattr (to, loc, xattr, 0); - if (ret == -1) - gf_log (this->name, GF_LOG_WARNING, - "%s: failed to set xattr on %s (%s)", - loc->path, to->name, strerror (errno)); - /* TODO: Sync the locks */ ret = syncop_fsync (to, dst_fd, 0); - if (ret) + if (ret) { gf_log (this->name, GF_LOG_WARNING, "%s: failed to fsync on %s (%s)", - loc->path, to->name, strerror (errno)); + loc->path, to->name, strerror (-ret)); + ret = -1; + } /* Phase 2 - Data-Migration Complete, Housekeeping updates pending */ @@ -758,7 +880,8 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, /* Failed to get the stat info */ gf_log (this->name, GF_LOG_ERROR, "failed to fstat file %s on %s (%s)", - loc->path, from->name, strerror (errno)); + loc->path, from->name, strerror (-ret)); + ret = -1; goto out; } @@ -780,7 +903,8 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, if (ret) { gf_log (this->name, GF_LOG_WARNING, "%s: failed to perform setattr on %s (%s)", - loc->path, to->name, strerror (errno)); + loc->path, to->name, strerror (-ret)); + ret = -1; goto out; } @@ -791,7 +915,8 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, if (ret) { gf_log (this->name, GF_LOG_WARNING, "%s: failed to perform setattr on %s (%s)", - loc->path, to->name, strerror (errno)); + loc->path, to->name, strerror (-ret)); + ret = -1; } /* Make the source as a linkfile first before deleting it */ @@ -801,16 +926,37 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, if (ret) { gf_log (this->name, GF_LOG_WARNING, \ "%s: failed to perform setattr on %s (%s)", - loc->path, from->name, strerror (errno)); + loc->path, from->name, strerror (-ret)); + ret = -1; goto out; } + /* Free up the data blocks on the source node, as the whole + file is migrated */ + ret = syncop_ftruncate (from, src_fd, 0); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "%s: failed to perform truncate on %s (%s)", + loc->path, from->name, strerror (-ret)); + ret = -1; + } + + /* remove the 'linkto' xattr from the destination */ + ret = syncop_fremovexattr (to, dst_fd, conf->link_xattr_name, 0); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "%s: failed to perform removexattr on %s (%s)", + loc->path, to->name, strerror (-ret)); + ret = -1; + } + /* Do a stat and check the gfid before unlink */ ret = syncop_stat (from, loc, &empty_iatt); if (ret) { gf_log (this->name, GF_LOG_WARNING, "%s: failed to do a stat on %s (%s)", - loc->path, from->name, strerror (errno)); + loc->path, from->name, strerror (-ret)); + ret = -1; goto out; } @@ -820,33 +966,18 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, if (ret) { gf_log (this->name, GF_LOG_WARNING, "%s: failed to perform unlink on %s (%s)", - loc->path, from->name, strerror (errno)); + loc->path, from->name, strerror (-ret)); + ret = -1; goto out; } } - /* Free up the data blocks on the source node, as the whole - file is migrated */ - ret = syncop_ftruncate (from, src_fd, 0); - if (ret) { - gf_log (this->name, GF_LOG_WARNING, - "%s: failed to perform truncate on %s (%s)", - loc->path, from->name, strerror (errno)); - } - - /* remove the 'linkto' xattr from the destination */ - ret = syncop_fremovexattr (to, dst_fd, DHT_LINKFILE_KEY); - if (ret) { - gf_log (this->name, GF_LOG_WARNING, - "%s: failed to perform removexattr on %s (%s)", - loc->path, to->name, strerror (errno)); - } - ret = syncop_lookup (this, loc, NULL, NULL, NULL, NULL); if (ret) { gf_log (this->name, GF_LOG_DEBUG, "%s: failed to lookup the file on subvolumes (%s)", - loc->path, strerror (errno)); + loc->path, strerror (-ret)); + ret = -1; } gf_log (this->name, GF_LOG_INFO, @@ -1009,10 +1140,10 @@ gf_defrag_handle_migrate_error (int32_t op_errno, gf_defrag_info_t *defrag) { /* if errno is not ENOSPC or ENOTCONN, we can still continue with rebalance process */ - if ((errno != ENOSPC) || (errno != ENOTCONN)) + if ((op_errno != ENOSPC) || (op_errno != ENOTCONN)) return 1; - if (errno == ENOTCONN) { + if (op_errno == ENOTCONN) { /* Most probably mount point went missing (mostly due to a brick down), say rebalance failure to user, let him restart it if everything is fine */ @@ -1020,7 +1151,7 @@ gf_defrag_handle_migrate_error (int32_t op_errno, gf_defrag_info_t *defrag) return -1; } - if (errno == ENOSPC) { + if (op_errno == ENOSPC) { /* rebalance process itself failed, may be remote brick went down, or write failed due to disk full etc etc.. */ @@ -1031,6 +1162,31 @@ gf_defrag_handle_migrate_error (int32_t op_errno, gf_defrag_info_t *defrag) return 0; } +static gf_boolean_t +gf_defrag_pattern_match (gf_defrag_info_t *defrag, char *name, uint64_t size) +{ + gf_defrag_pattern_list_t *trav = NULL; + gf_boolean_t match = _gf_false; + gf_boolean_t ret = _gf_false; + + GF_VALIDATE_OR_GOTO ("dht", defrag, out); + + trav = defrag->defrag_pattern; + while (trav) { + if (!fnmatch (trav->path_pattern, name, FNM_NOESCAPE)) { + match = _gf_true; + break; + } + trav = trav->next; + } + + if ((match == _gf_true) && (size >= trav->size)) + ret = _gf_true; + + out: + return ret; +} + /* We do a depth first traversal of directories. But before we move into * subdirs, we complete the data migration of those directories whose layouts * have been fixed @@ -1053,11 +1209,12 @@ gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, int32_t op_errno = 0; char *uuid_str = NULL; uuid_t node_uuid = {0,}; - int readdir_operrno = 0; struct timeval dir_start = {0,}; struct timeval end = {0,}; double elapsed = {0,}; struct timeval start = {0,}; + int32_t err = 0; + int loglevel = GF_LOG_TRACE; gf_log (this->name, GF_LOG_INFO, "migrate data called on %s", loc->path); @@ -1073,6 +1230,7 @@ gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, if (ret) { gf_log (this->name, GF_LOG_ERROR, "Failed to open dir %s", loc->path); + ret = -1; goto out; } @@ -1080,17 +1238,21 @@ gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, while ((ret = syncop_readdirp (this, fd, 131072, offset, NULL, &entries)) != 0) { - if (ret < 0) - break; - /* Need to keep track of ENOENT errno, that means, there is no - need to send more readdirp() */ - readdir_operrno = errno; + if (ret < 0) { - free_entries = _gf_true; + gf_log (this->name, GF_LOG_ERROR, "Readdir returned %s." + " Aborting migrate-data", + strerror(-ret)); + ret = -1; + goto out; + } if (list_empty (&entries.list)) break; + + free_entries = _gf_true; + list_for_each_entry_safe (entry, tmp, &entries.list, list) { if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { ret = 1; @@ -1110,6 +1272,12 @@ gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, if (defrag->stats == _gf_true) { gettimeofday (&start, NULL); } + if (defrag->defrag_pattern && + (gf_defrag_pattern_match (defrag, entry->d_name, + entry->d_stat.ia_size) + == _gf_false)) { + continue; + } loc_wipe (&entry_loc); ret =dht_build_child_loc (this, &entry_loc, loc, entry->d_name); @@ -1144,6 +1312,7 @@ gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, if (ret) { gf_log (this->name, GF_LOG_ERROR, "%s" " lookup failed", entry_loc.path); + ret = -1; continue; } @@ -1152,6 +1321,7 @@ gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, if(ret < 0) { gf_log (this->name, GF_LOG_ERROR, "Failed to " "get node-uuid for %s", entry_loc.path); + ret = -1; continue; } @@ -1161,6 +1331,7 @@ gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, gf_log (this->name, GF_LOG_ERROR, "Failed to " "get node-uuid from dict for %s", entry_loc.path); + ret = -1; continue; } @@ -1185,30 +1356,50 @@ gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, /* if distribute is present, it will honor this key. - * -1 is returned if distribute is not present or file - * doesn't have a link-file. If file has link-file, the - * path of link-file will be the value, and also that - * guarantees that file has to be mostly migrated */ + * -1, ENODATA is returned if distribute is not present + * or file doesn't have a link-file. If file has + * link-file, the path of link-file will be the value, + * and also that guarantees that file has to be mostly + * migrated */ ret = syncop_getxattr (this, &entry_loc, &dict, GF_XATTR_LINKINFO_KEY); if (ret < 0) { - gf_log (this->name, GF_LOG_TRACE, "failed to " - "get link-to key for %s", - entry_loc.path); + if (-ret != ENODATA) { + loglevel = GF_LOG_ERROR; + defrag->total_failures += 1; + } else { + loglevel = GF_LOG_TRACE; + } + gf_log (this->name, loglevel, "%s: failed to " + "get "GF_XATTR_LINKINFO_KEY" key - %s", + entry_loc.path, strerror (-ret)); + ret = -1; continue; } ret = syncop_setxattr (this, &entry_loc, migrate_data, 0); if (ret) { - gf_log (this->name, GF_LOG_ERROR, "migrate-data" - " failed for %s", entry_loc.path); - defrag->total_failures +=1; + err = op_errno; + /* errno is overloaded. See + * rebalance_task_completion () */ + if (err != ENOSPC) { + gf_log (this->name, GF_LOG_DEBUG, + "migrate-data skipped for %s" + " due to space constraints", + entry_loc.path); + defrag->skipped +=1; + } else{ + gf_log (this->name, GF_LOG_ERROR, + "migrate-data failed for %s", + entry_loc.path); + defrag->total_failures +=1; + } } - if (ret == -1) { - op_errno = errno; + if (ret < 0) { + op_errno = -ret; ret = gf_defrag_handle_migrate_error (op_errno, defrag); @@ -1243,9 +1434,6 @@ gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, gf_dirent_free (&entries); free_entries = _gf_false; INIT_LIST_HEAD (&entries.list); - - if (readdir_operrno == ENOENT) - break; } gettimeofday (&end, NULL); @@ -1269,7 +1457,6 @@ out: } - int gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, dict_t *fix_layout, dict_t *migrate_data) @@ -1289,6 +1476,7 @@ gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, if (ret) { gf_log (this->name, GF_LOG_ERROR, "Lookup failed on %s", loc->path); + ret = -1; goto out; } @@ -1319,12 +1507,19 @@ gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, while ((ret = syncop_readdirp (this, fd, 131072, offset, NULL, &entries)) != 0) { - if ((ret < 0) || (ret && (errno == ENOENT))) - break; - free_entries = _gf_true; + + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "Readdir returned %s" + ". Aborting fix-layout",strerror(-ret)); + ret = -1; + goto out; + } if (list_empty (&entries.list)) break; + + free_entries = _gf_true; + list_for_each_entry_safe (entry, tmp, &entries.list, list) { if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { ret = 1; @@ -1351,7 +1546,7 @@ gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, if (uuid_is_null (entry->d_stat.ia_gfid)) { gf_log (this->name, GF_LOG_ERROR, "%s/%s" - "gfid not present", loc->path, + " gfid not present", loc->path, entry->d_name); continue; } @@ -1361,7 +1556,7 @@ gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, uuid_copy (entry_loc.gfid, entry->d_stat.ia_gfid); if (uuid_is_null (loc->gfid)) { gf_log (this->name, GF_LOG_ERROR, "%s/%s" - "gfid not present", loc->path, + " gfid not present", loc->path, entry->d_name); continue; } @@ -1373,6 +1568,7 @@ gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, if (ret) { gf_log (this->name, GF_LOG_ERROR, "%s" " lookup failed", entry_loc.path); + ret = -1; continue; } @@ -1384,6 +1580,7 @@ gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; defrag->total_failures ++; + ret = -1; goto out; } ret = gf_defrag_fix_layout (this, defrag, &entry_loc, @@ -1464,6 +1661,7 @@ gf_defrag_start_crawl (void *data) if (ret) { gf_log (this->name, GF_LOG_ERROR, "look up on / failed"); + ret = -1; goto out; } @@ -1484,6 +1682,7 @@ gf_defrag_start_crawl (void *data) gf_log (this->name, GF_LOG_ERROR, "fix layout on %s failed", loc.path); defrag->total_failures++; + ret = -1; goto out; } @@ -1590,6 +1789,7 @@ gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict) uint64_t size = 0; uint64_t lookup = 0; uint64_t failures = 0; + uint64_t skipped = 0; char *status = ""; double elapsed = 0; struct timeval end = {0,}; @@ -1606,6 +1806,7 @@ gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict) size = defrag->total_data; lookup = defrag->num_files_lookedup; failures = defrag->total_failures; + skipped = defrag->skipped; gettimeofday (&end, NULL); @@ -1629,6 +1830,7 @@ gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict) gf_log (THIS->name, GF_LOG_WARNING, "failed to set lookedup file count"); + ret = dict_set_int32 (dict, "status", defrag->defrag_status); if (ret) gf_log (THIS->name, GF_LOG_WARNING, @@ -1641,6 +1843,14 @@ gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict) } ret = dict_set_uint64 (dict, "failures", failures); + if (ret) + gf_log (THIS->name, GF_LOG_WARNING, + "failed to set failure count"); + + ret = dict_set_uint64 (dict, "skipped", skipped); + if (ret) + gf_log (THIS->name, GF_LOG_WARNING, + "failed to set skipped file count"); log: switch (defrag->defrag_status) { case GF_DEFRAG_STATUS_NOT_STARTED: @@ -1658,13 +1868,15 @@ log: case GF_DEFRAG_STATUS_FAILED: status = "failed"; break; + default: + break; } gf_log (THIS->name, GF_LOG_INFO, "Rebalance is %s. Time taken is %.2f " "secs", status, elapsed); gf_log (THIS->name, GF_LOG_INFO, "Files migrated: %"PRIu64", size: %" - PRIu64", lookups: %"PRIu64", failures: %"PRIu64, files, size, - lookup, failures); + PRIu64", lookups: %"PRIu64", failures: %"PRIu64", skipped: " + "%"PRIu64, files, size, lookup, failures, skipped); out: @@ -1672,7 +1884,8 @@ out: } int -gf_defrag_stop (gf_defrag_info_t *defrag, dict_t *output) +gf_defrag_stop (gf_defrag_info_t *defrag, gf_defrag_status_t status, + dict_t *output) { /* TODO: set a variable 'stop_defrag' here, it should be checked in defrag loop */ @@ -1684,7 +1897,7 @@ gf_defrag_stop (gf_defrag_info_t *defrag, dict_t *output) } gf_log ("", GF_LOG_INFO, "Received stop command on rebalance"); - defrag->defrag_status = GF_DEFRAG_STATUS_STOPPED; + defrag->defrag_status = status; if (output) gf_defrag_status_get (defrag, output); |
