diff options
Diffstat (limited to 'xlators/cluster/dht/src/dht-rebalance.c')
| -rw-r--r-- | xlators/cluster/dht/src/dht-rebalance.c | 515 |
1 files changed, 394 insertions, 121 deletions
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index ecf664bb7..bcb19f23e 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -1,20 +1,11 @@ /* - Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com> + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ @@ -24,6 +15,8 @@ #endif #include "dht-common.h" +#include "xlator.h" +#include <fnmatch.h> #define GF_DISK_SECTOR_SIZE 512 #define DHT_REBALANCE_PID 4242 /* Change it if required */ @@ -109,12 +102,16 @@ gf_defrag_handle_hardlink (xlator_t *this, loc_t *loc, dict_t *xattrs, data_t *data = NULL; struct iatt iatt = {0,}; int32_t op_errno = 0; + dht_conf_t *conf = NULL; GF_VALIDATE_OR_GOTO ("defrag", loc, out); GF_VALIDATE_OR_GOTO ("defrag", loc->name, out); GF_VALIDATE_OR_GOTO ("defrag", stbuf, out); GF_VALIDATE_OR_GOTO ("defrag", this, out); GF_VALIDATE_OR_GOTO ("defrag", xattrs, out); + GF_VALIDATE_OR_GOTO ("defrag", this->private, out); + + conf = this->private; if (uuid_is_null (loc->pargfid)) { gf_log ("", GF_LOG_ERROR, "loc->pargfid is NULL for " @@ -145,10 +142,10 @@ gf_defrag_handle_hardlink (xlator_t *this, loc_t *loc, dict_t *xattrs, gf_log (this->name, GF_LOG_INFO, "Attempting to migrate hardlink %s " "with gfid %s from %s -> %s", loc->name, uuid_utoa (loc->gfid), cached_subvol->name, hashed_subvol->name); - data = dict_get (xattrs, DHT_LINKFILE_KEY); + data = dict_get (xattrs, conf->link_xattr_name); /* set linkto on cached -> hashed if not present, else link it */ if (!data) { - ret = dict_set_str (xattrs, DHT_LINKFILE_KEY, + ret = dict_set_str (xattrs, conf->link_xattr_name, hashed_subvol->name); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Failed to set " @@ -246,14 +243,16 @@ out: static inline int __dht_rebalance_create_dst_file (xlator_t *to, xlator_t *from, loc_t *loc, struct iatt *stbuf, - dict_t *dict, fd_t **dst_fd) + dict_t *dict, fd_t **dst_fd, dict_t *xattr) { - xlator_t *this = NULL; - int ret = -1; - fd_t *fd = NULL; - struct iatt new_stbuf = {0,}; + xlator_t *this = NULL; + int ret = -1; + fd_t *fd = NULL; + struct iatt new_stbuf = {0,}; + dht_conf_t *conf = NULL; this = THIS; + conf = this->private; ret = dict_set_static_bin (dict, "gfid-req", stbuf->ia_gfid, 16); if (ret) { @@ -262,7 +261,7 @@ __dht_rebalance_create_dst_file (xlator_t *to, xlator_t *from, loc_t *loc, struc goto out; } - ret = dict_set_str (dict, DHT_LINKFILE_KEY, from->name); + ret = dict_set_str (dict, conf->link_xattr_name, from->name); if (ret) { gf_log (this->name, GF_LOG_ERROR, "%s: failed to set gfid in dict for create", loc->path); @@ -300,7 +299,7 @@ __dht_rebalance_create_dst_file (xlator_t *to, xlator_t *from, loc_t *loc, struc /* Create the destination with LINKFILE mode, and linkto xattr, if the linkfile already exists, it will just open the file */ ret = syncop_create (to, loc, O_RDWR, DHT_LINKFILE_MODE, fd, - dict); + dict, &new_stbuf); if (ret < 0) { gf_log (this->name, GF_LOG_ERROR, "failed to create %s on %s (%s)", @@ -308,6 +307,26 @@ __dht_rebalance_create_dst_file (xlator_t *to, xlator_t *from, loc_t *loc, struc goto out; } + ret = syncop_fsetxattr (to, fd, xattr, 0); + if (ret == -1) + gf_log (this->name, GF_LOG_WARNING, + "%s: failed to set xattr on %s (%s)", + loc->path, to->name, strerror (errno)); + + ret = syncop_ftruncate (to, fd, stbuf->ia_size); + if (ret < 0) + gf_log (this->name, GF_LOG_ERROR, + "ftruncate failed for %s on %s (%s)", + loc->path, to->name, strerror (errno)); + + ret = syncop_fsetattr (to, fd, stbuf, + (GF_SET_ATTR_UID | GF_SET_ATTR_GID), + NULL, NULL); + if (ret < 0) + gf_log (this->name, GF_LOG_ERROR, + "chown failed for %s on %s (%s)", + loc->path, to->name, strerror (errno)); + if (dst_fd) *dst_fd = fd; @@ -320,13 +339,16 @@ out: static inline int __dht_check_free_space (xlator_t *to, xlator_t *from, loc_t *loc, - struct iatt *stbuf) + struct iatt *stbuf, int flag) { struct statvfs src_statfs = {0,}; struct statvfs dst_statfs = {0,}; int ret = -1; xlator_t *this = NULL; + uint64_t src_statfs_blocks = 1; + uint64_t dst_statfs_blocks = 1; + this = THIS; ret = syncop_statfs (from, loc, &src_statfs); @@ -344,18 +366,47 @@ __dht_check_free_space (xlator_t *to, xlator_t *from, loc_t *loc, loc->path, to->name, strerror (errno)); goto out; } - if (((dst_statfs.f_bavail * - dst_statfs.f_bsize) / GF_DISK_SECTOR_SIZE) > - (((src_statfs.f_bavail * src_statfs.f_bsize) / - GF_DISK_SECTOR_SIZE) - stbuf->ia_blocks)) { - gf_log (this->name, GF_LOG_WARNING, - "data movement attempted from node (%s) with" - " higher disk space to a node (%s) with " - "lesser disk space (%s)", from->name, - to->name, loc->path); - /* this is not a 'failure', but we don't want to - consider this as 'success' too :-/ */ + /* if force option is given, do not check for space @ dst. + * Check only if space is avail for the file */ + if (flag != GF_DHT_MIGRATE_DATA) + goto check_avail_space; + + /* Check: + During rebalance `migrate-data` - Destination subvol experiences + a `reduction` in 'blocks' of free space, at the same time source + subvol gains certain 'blocks' of free space. A valid check is + necessary here to avoid errorneous move to destination where + the space could be scantily available. + */ + if (stbuf) { + dst_statfs_blocks = ((dst_statfs.f_bavail * + dst_statfs.f_bsize) / + GF_DISK_SECTOR_SIZE); + src_statfs_blocks = ((src_statfs.f_bavail * + src_statfs.f_bsize) / + GF_DISK_SECTOR_SIZE); + if ((dst_statfs_blocks - stbuf->ia_blocks) < + (src_statfs_blocks + stbuf->ia_blocks)) { + gf_log (this->name, GF_LOG_WARNING, + "data movement attempted from node (%s) with" + " higher disk space to a node (%s) with " + "lesser disk space (%s)", from->name, + to->name, loc->path); + + /* this is not a 'failure', but we don't want to + consider this as 'success' too :-/ */ + ret = 1; + goto out; + } + } +check_avail_space: + if (((dst_statfs.f_bavail * dst_statfs.f_bsize) / + GF_DISK_SECTOR_SIZE) < stbuf->ia_blocks) { + gf_log (this->name, GF_LOG_ERROR, + "data movement attempted from node (%s) with " + "to node (%s) which does not have required free space" + " for %s", from->name, to->name, loc->path); ret = 1; goto out; } @@ -399,8 +450,7 @@ __dht_rebalance_migrate_data (xlator_t *from, xlator_t *to, fd_t *src, fd_t *dst offset += ret; total += ret; - if (vector) - GF_FREE (vector); + GF_FREE (vector); if (iobref) iobref_unref (iobref); iobref = NULL; @@ -408,8 +458,7 @@ __dht_rebalance_migrate_data (xlator_t *from, xlator_t *to, fd_t *src, fd_t *dst } if (iobref) iobref_unref (iobref); - if (vector) - GF_FREE (vector); + GF_FREE (vector); if (ret >= 0) ret = 0; @@ -427,8 +476,10 @@ __dht_rebalance_open_src_file (xlator_t *from, xlator_t *to, loc_t *loc, dict_t *dict = NULL; xlator_t *this = NULL; struct iatt iatt = {0,}; + dht_conf_t *conf = NULL; this = THIS; + conf = this->private; fd = fd_create (loc->inode, DHT_REBALANCE_PID); if (!fd) { @@ -451,7 +502,7 @@ __dht_rebalance_open_src_file (xlator_t *from, xlator_t *to, loc_t *loc, if (!dict) goto out; - ret = dict_set_str (dict, DHT_LINKFILE_KEY, to->name); + ret = dict_set_str (dict, conf->link_xattr_name, to->name); if (ret) { gf_log (this->name, GF_LOG_ERROR, "failed to set xattr in dict for %s (linkto:%s)", @@ -504,12 +555,13 @@ migrate_special_files (xlator_t *this, xlator_t *from, xlator_t *to, loc_t *loc, dict_t *dict = NULL; char *link = NULL; struct iatt stbuf = {0,}; + dht_conf_t *conf = this->private; dict = dict_new (); if (!dict) goto out; - ret = dict_set_int32 (dict, DHT_LINKFILE_KEY, 256); + ret = dict_set_int32 (dict, conf->link_xattr_name, 256); if (ret) { gf_log (this->name, GF_LOG_ERROR, "%s: failed to set 'linkto' key in dict", loc->path); @@ -525,12 +577,13 @@ migrate_special_files (xlator_t *this, xlator_t *from, xlator_t *to, loc_t *loc, } /* we no more require this key */ - dict_del (dict, DHT_LINKFILE_KEY); + dict_del (dict, conf->link_xattr_name); /* file exists in target node, only if it is 'linkfile' its valid, otherwise, error out */ if (!ret) { - if (!check_is_linkfile (loc->inode, &stbuf, rsp_dict)) { + if (!check_is_linkfile (loc->inode, &stbuf, rsp_dict, + conf->link_xattr_name)) { gf_log (this->name, GF_LOG_WARNING, "%s: file exists in destination", loc->path); ret = -1; @@ -566,7 +619,7 @@ migrate_special_files (xlator_t *this, xlator_t *from, xlator_t *to, loc_t *loc, goto out; } - ret = syncop_symlink (to, loc, link, dict); + ret = syncop_symlink (to, loc, link, dict, 0); if (ret) { gf_log (this->name, GF_LOG_WARNING, "%s: creating symlink failed (%s)", @@ -580,7 +633,7 @@ migrate_special_files (xlator_t *this, xlator_t *from, xlator_t *to, loc_t *loc, ret = syncop_mknod (to, loc, st_mode_from_ia (buf->ia_prot, buf->ia_type), makedev (ia_major (buf->ia_rdev), - ia_minor (buf->ia_rdev)), dict); + ia_minor (buf->ia_rdev)), dict, 0); if (ret) { gf_log (this->name, GF_LOG_WARNING, "%s: mknod failed (%s)", loc->path, strerror (errno)); @@ -588,6 +641,15 @@ migrate_special_files (xlator_t *this, xlator_t *from, xlator_t *to, loc_t *loc, } done: + ret = syncop_setattr (to, loc, buf, + (GF_SET_ATTR_UID | GF_SET_ATTR_GID | + GF_SET_ATTR_MODE), NULL, NULL); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "%s: failed to perform setattr on %s (%s)", + loc->path, to->name, strerror (errno)); + } + ret = syncop_unlink (from, loc); if (ret) gf_log (this->name, GF_LOG_WARNING, "%s: unlink failed (%s)", @@ -625,6 +687,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, dict_t *xattr = NULL; dict_t *xattr_rsp = NULL; int file_has_holes = 0; + dht_conf_t *conf = this->private; gf_log (this->name, GF_LOG_INFO, "%s: attempting to move from %s to %s", loc->path, from->name, to->name); @@ -633,7 +696,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, if (!dict) goto out; - ret = dict_set_int32 (dict, DHT_LINKFILE_KEY, 256); + ret = dict_set_int32 (dict, conf->link_xattr_name, 256); if (ret) { gf_log (this->name, GF_LOG_ERROR, "%s: failed to set 'linkto' key in dict", loc->path); @@ -649,7 +712,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, } /* we no more require this key */ - dict_del (dict, DHT_LINKFILE_KEY); + dict_del (dict, conf->link_xattr_name); /* preserve source mode, so set the same to the destination */ src_ia_prot = stbuf.ia_prot; @@ -666,18 +729,22 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, goto out; } + /* TODO: move all xattr related operations to fd based operations */ + ret = syncop_listxattr (from, loc, &xattr); + if (ret == -1) + gf_log (this->name, GF_LOG_WARNING, + "%s: failed to get xattr from %s (%s)", + loc->path, from->name, strerror (errno)); + /* create the destination, with required modes/xattr */ ret = __dht_rebalance_create_dst_file (to, from, loc, &stbuf, - dict, &dst_fd); + dict, &dst_fd, xattr); if (ret) goto out; - /* Should happen on all files when 'force' option is not given */ - if (flag == GF_DHT_MIGRATE_DATA) { - ret = __dht_check_free_space (to, from, loc, &stbuf); - if (ret) { - goto out; - } + ret = __dht_check_free_space (to, from, loc, &stbuf, flag); + if (ret) { + goto out; } /* Open the source, and also update mode/xattr */ @@ -688,6 +755,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, goto out; } + ret = syncop_fstat (from, src_fd, &stbuf); if (ret) { gf_log (this->name, GF_LOG_ERROR, "failed to lookup %s on %s (%s)", @@ -717,22 +785,9 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, goto out; } - /* TODO: move all xattr related operations to fd based operations */ - ret = syncop_listxattr (from, loc, &xattr); - if (ret == -1) - gf_log (this->name, GF_LOG_WARNING, - "%s: failed to get xattr from %s (%s)", - loc->path, from->name, strerror (errno)); - - ret = syncop_setxattr (to, loc, xattr, 0); - if (ret == -1) - gf_log (this->name, GF_LOG_WARNING, - "%s: failed to set xattr on %s (%s)", - loc->path, to->name, strerror (errno)); - /* TODO: Sync the locks */ - ret = syncop_fsync (to, dst_fd); + ret = syncop_fsync (to, dst_fd, 0); if (ret) gf_log (this->name, GF_LOG_WARNING, "%s: failed to fsync on %s (%s)", @@ -769,6 +824,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, gf_log (this->name, GF_LOG_WARNING, "%s: failed to perform setattr on %s (%s)", loc->path, to->name, strerror (errno)); + goto out; } /* Because 'futimes' is not portable */ @@ -789,6 +845,24 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, gf_log (this->name, GF_LOG_WARNING, \ "%s: failed to perform setattr on %s (%s)", loc->path, from->name, strerror (errno)); + goto out; + } + + /* Free up the data blocks on the source node, as the whole + file is migrated */ + ret = syncop_ftruncate (from, src_fd, 0); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "%s: failed to perform truncate on %s (%s)", + loc->path, from->name, strerror (errno)); + } + + /* remove the 'linkto' xattr from the destination */ + ret = syncop_fremovexattr (to, dst_fd, conf->link_xattr_name); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "%s: failed to perform removexattr on %s (%s)", + loc->path, to->name, strerror (errno)); } /* Do a stat and check the gfid before unlink */ @@ -797,6 +871,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, gf_log (this->name, GF_LOG_WARNING, "%s: failed to do a stat on %s (%s)", loc->path, from->name, strerror (errno)); + goto out; } if (uuid_compare (empty_iatt.ia_gfid, loc->gfid) == 0) { @@ -806,29 +881,13 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, gf_log (this->name, GF_LOG_WARNING, "%s: failed to perform unlink on %s (%s)", loc->path, from->name, strerror (errno)); + goto out; } } - /* Free up the data blocks on the source node, as the whole - file is migrated */ - ret = syncop_ftruncate (from, src_fd, 0); - if (ret) { - gf_log (this->name, GF_LOG_WARNING, - "%s: failed to perform truncate on %s (%s)", - loc->path, from->name, strerror (errno)); - } - - /* remove the 'linkto' xattr from the destination */ - ret = syncop_fremovexattr (to, dst_fd, DHT_LINKFILE_KEY); - if (ret) { - gf_log (this->name, GF_LOG_WARNING, - "%s: failed to perform removexattr on %s (%s)", - loc->path, to->name, strerror (errno)); - } - ret = syncop_lookup (this, loc, NULL, NULL, NULL, NULL); if (ret) { - gf_log (this->name, GF_LOG_WARNING, + gf_log (this->name, GF_LOG_DEBUG, "%s: failed to lookup the file on subvolumes (%s)", loc->path, strerror (errno)); } @@ -920,7 +979,7 @@ rebalance_task_completion (int op_ret, call_frame_t *sync_frame, void *data) op_errno = EPERM; } - DHT_STACK_UNWIND (setxattr, sync_frame, op_ret, op_errno); + DHT_STACK_UNWIND (setxattr, sync_frame, op_ret, op_errno, NULL); return 0; } @@ -928,25 +987,21 @@ int dht_start_rebalance_task (xlator_t *this, call_frame_t *frame) { int ret = -1; - dht_conf_t *conf = NULL; - conf = this->private; - - ret = synctask_new (conf->env, rebalance_task, + ret = synctask_new (this->ctx->env, rebalance_task, rebalance_task_completion, frame, frame); return ret; } int -gf_listener_stop (void) +gf_listener_stop (xlator_t *this) { glusterfs_ctx_t *ctx = NULL; cmd_args_t *cmd_args = NULL; int ret = 0; - xlator_t *this = NULL; - ctx = glusterfs_ctx_get (); + ctx = this->ctx; GF_ASSERT (ctx); cmd_args = &ctx->cmd_args; if (cmd_args->sock_file) { @@ -957,7 +1012,6 @@ gf_listener_stop (void) } if (ret) { - this = THIS; gf_log (this->name, GF_LOG_ERROR, "Failed to unlink listener " "socket %s, error: %s", cmd_args->sock_file, strerror (errno)); @@ -1020,6 +1074,31 @@ gf_defrag_handle_migrate_error (int32_t op_errno, gf_defrag_info_t *defrag) return 0; } +static gf_boolean_t +gf_defrag_pattern_match (gf_defrag_info_t *defrag, char *name, uint64_t size) +{ + gf_defrag_pattern_list_t *trav = NULL; + gf_boolean_t match = _gf_false; + gf_boolean_t ret = _gf_false; + + GF_VALIDATE_OR_GOTO ("dht", defrag, out); + + trav = defrag->defrag_pattern; + while (trav) { + if (!fnmatch (trav->path_pattern, name, FNM_NOESCAPE)) { + match = _gf_true; + break; + } + trav = trav->next; + } + + if ((match == _gf_true) && (size >= trav->size)) + ret = _gf_true; + + out: + return ret; +} + /* We do a depth first traversal of directories. But before we move into * subdirs, we complete the data migration of those directories whose layouts * have been fixed @@ -1040,9 +1119,19 @@ gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, dict_t *dict = NULL; struct iatt iatt = {0,}; int32_t op_errno = 0; - - gf_log (this->name, GF_LOG_INFO, "migate data called on %s", + char *uuid_str = NULL; + uuid_t node_uuid = {0,}; + int readdir_operrno = 0; + struct timeval dir_start = {0,}; + struct timeval end = {0,}; + double elapsed = {0,}; + struct timeval start = {0,}; + int32_t err = 0; + + gf_log (this->name, GF_LOG_INFO, "migrate data called on %s", loc->path); + gettimeofday (&dir_start, NULL); + fd = fd_create (loc->inode, defrag->pid); if (!fd) { gf_log (this->name, GF_LOG_ERROR, "Failed to create fd"); @@ -1059,15 +1148,25 @@ gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, INIT_LIST_HEAD (&entries.list); while ((ret = syncop_readdirp (this, fd, 131072, offset, NULL, - &entries)) != 0) - { - if ((ret < 0) || (ret && (errno == ENOENT))) - break; + &entries)) != 0) { - free_entries = _gf_true; + if (ret < 0) { + + gf_log (this->name, GF_LOG_ERROR, "Readdir returned %s." + " Aborting migrate-data", + strerror(readdir_operrno)); + goto out; + } + + /* Need to keep track of ENOENT errno, that means, there is no + need to send more readdirp() */ + readdir_operrno = errno; if (list_empty (&entries.list)) break; + + free_entries = _gf_true; + list_for_each_entry_safe (entry, tmp, &entries.list, list) { if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { ret = 1; @@ -1084,7 +1183,15 @@ gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, continue; defrag->num_files_lookedup++; - + if (defrag->stats == _gf_true) { + gettimeofday (&start, NULL); + } + if (defrag->defrag_pattern && + (gf_defrag_pattern_match (defrag, entry->d_name, + entry->d_stat.ia_size) + == _gf_false)) { + continue; + } loc_wipe (&entry_loc); ret =dht_build_child_loc (this, &entry_loc, loc, entry->d_name); @@ -1122,6 +1229,43 @@ gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, continue; } + ret = syncop_getxattr (this, &entry_loc, &dict, + GF_XATTR_NODE_UUID_KEY); + if(ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "get node-uuid for %s", entry_loc.path); + continue; + } + + ret = dict_get_str (dict, GF_XATTR_NODE_UUID_KEY, + &uuid_str); + if(ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "get node-uuid from dict for %s", + entry_loc.path); + continue; + } + + if (uuid_parse (uuid_str, node_uuid)) { + gf_log (this->name, GF_LOG_ERROR, "uuid_parse " + "failed for %s", entry_loc.path); + continue; + } + + /* if file belongs to different node, skip migration + * the other node will take responsibility of migration + */ + if (uuid_compare (node_uuid, defrag->node_uuid)) { + gf_log (this->name, GF_LOG_TRACE, "%s does not" + "belong to this node", entry_loc.path); + continue; + } + + uuid_str = NULL; + + dict_del (dict, GF_XATTR_NODE_UUID_KEY); + + /* if distribute is present, it will honor this key. * -1 is returned if distribute is not present or file * doesn't have a link-file. If file has link-file, the @@ -1131,14 +1275,31 @@ gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, ret = syncop_getxattr (this, &entry_loc, &dict, GF_XATTR_LINKINFO_KEY); if (ret < 0) { + gf_log (this->name, GF_LOG_TRACE, "failed to " + "get link-to key for %s", + entry_loc.path); continue; } ret = syncop_setxattr (this, &entry_loc, migrate_data, 0); - if (ret) - gf_log (this->name, GF_LOG_ERROR, "setxattr " - "failed for %s", entry_loc.path); + if (ret) { + err = op_errno; + /* errno is overloaded. See + * rebalance_task_completion () */ + if (err != ENOSPC) { + gf_log (this->name, GF_LOG_DEBUG, + "migrate-data skipped for %s" + " due to space constraints", + entry_loc.path); + defrag->skipped +=1; + } else{ + gf_log (this->name, GF_LOG_ERROR, + "migrate-data failed for %s", + entry_loc.path); + defrag->total_failures +=1; + } + } if (ret == -1) { op_errno = errno; @@ -1147,7 +1308,7 @@ gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, if (!ret) gf_log (this->name, GF_LOG_DEBUG, - "setxattr on %s failed: %s", + "migrate-data on %s failed: %s", entry_loc.path, strerror (op_errno)); else if (ret == 1) @@ -1162,13 +1323,30 @@ gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, defrag->total_data += iatt.ia_size; } UNLOCK (&defrag->lock); + if (defrag->stats == _gf_true) { + gettimeofday (&end, NULL); + elapsed = (end.tv_sec - start.tv_sec) * 1e6 + + (end.tv_usec - start.tv_usec); + gf_log (this->name, GF_LOG_INFO, "Migration of " + "file:%s size:%"PRIu64" bytes took %.2f" + "secs", entry_loc.path, iatt.ia_size, + elapsed/1e6); + } } gf_dirent_free (&entries); free_entries = _gf_false; INIT_LIST_HEAD (&entries.list); + if (readdir_operrno == ENOENT) + break; } + + gettimeofday (&end, NULL); + elapsed = (end.tv_sec - dir_start.tv_sec) * 1e6 + + (end.tv_usec - dir_start.tv_usec); + gf_log (this->name, GF_LOG_INFO, "Migration operation on dir %s took " + "%.2f secs", loc->path, elapsed/1e6); ret = 0; out: if (free_entries) @@ -1200,6 +1378,7 @@ gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, dict_t *dict = NULL; off_t offset = 0; struct iatt iatt = {0,}; + int readdirp_errno = 0; ret = syncop_lookup (this, loc, NULL, &iatt, NULL, NULL); if (ret) { @@ -1208,8 +1387,11 @@ gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, goto out; } - if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) - gf_defrag_migrate_data (this, defrag, loc, migrate_data); + if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) { + ret = gf_defrag_migrate_data (this, defrag, loc, migrate_data); + if (ret) + goto out; + } gf_log (this->name, GF_LOG_TRACE, "fix layout called on %s", loc->path); @@ -1232,12 +1414,22 @@ gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, while ((ret = syncop_readdirp (this, fd, 131072, offset, NULL, &entries)) != 0) { - if ((ret < 0) || (ret && (errno == ENOENT))) - break; - free_entries = _gf_true; + + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "Readdir returned %s" + ". Aborting fix-layout",strerror(errno)); + goto out; + } + + /* Need to keep track of ENOENT errno, that means, there is no + need to send more readdirp() */ + readdirp_errno = errno; if (list_empty (&entries.list)) break; + + free_entries = _gf_true; + list_for_each_entry_safe (entry, tmp, &entries.list, list) { if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { ret = 1; @@ -1264,7 +1456,7 @@ gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, if (uuid_is_null (entry->d_stat.ia_gfid)) { gf_log (this->name, GF_LOG_ERROR, "%s/%s" - "gfid not present", loc->path, + " gfid not present", loc->path, entry->d_name); continue; } @@ -1274,7 +1466,7 @@ gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, uuid_copy (entry_loc.gfid, entry->d_stat.ia_gfid); if (uuid_is_null (loc->gfid)) { gf_log (this->name, GF_LOG_ERROR, "%s/%s" - "gfid not present", loc->path, + " gfid not present", loc->path, entry->d_name); continue; } @@ -1296,6 +1488,7 @@ gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, "failed for %s", entry_loc.path); defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; + defrag->total_failures ++; goto out; } ret = gf_defrag_fix_layout (this, defrag, &entry_loc, @@ -1304,6 +1497,7 @@ gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, if (ret) { gf_log (this->name, GF_LOG_ERROR, "Fix layout " "failed for %s", entry_loc.path); + defrag->total_failures++; goto out; } @@ -1311,6 +1505,8 @@ gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, gf_dirent_free (&entries); free_entries = _gf_false; INIT_LIST_HEAD (&entries.list); + if (readdirp_errno == ENOENT) + break; } ret = 0; @@ -1343,11 +1539,17 @@ gf_defrag_start_crawl (void *data) struct iatt parent = {0,}; dict_t *fix_layout = NULL; dict_t *migrate_data = NULL; + dict_t *status = NULL; + glusterfs_ctx_t *ctx = NULL; this = data; if (!this) goto out; + ctx = this->ctx; + if (!ctx) + goto out; + conf = this->private; if (!conf) goto out; @@ -1355,6 +1557,8 @@ gf_defrag_start_crawl (void *data) defrag = conf->defrag; if (!defrag) goto out; + + gettimeofday (&defrag->start_time, NULL); dht_build_root_inode (this, &defrag->root_inode); if (!defrag->root_inode) goto out; @@ -1386,6 +1590,7 @@ gf_defrag_start_crawl (void *data) if (ret) { gf_log (this->name, GF_LOG_ERROR, "fix layout on %s failed", loc.path); + defrag->total_failures++; goto out; } @@ -1407,17 +1612,30 @@ gf_defrag_start_crawl (void *data) } ret = gf_defrag_fix_layout (this, defrag, &loc, fix_layout, migrate_data); + if ((defrag->defrag_status != GF_DEFRAG_STATUS_STOPPED) && + (defrag->defrag_status != GF_DEFRAG_STATUS_FAILED)) { + defrag->defrag_status = GF_DEFRAG_STATUS_COMPLETE; + } + + out: LOCK (&defrag->lock); { - gf_defrag_status_get (defrag, NULL); + status = dict_new (); + gf_defrag_status_get (defrag, status); + if (ctx->notify) + ctx->notify (GF_EN_DEFRAG_STATUS, status); + if (status) + dict_unref (status); defrag->is_exiting = 1; } UNLOCK (&defrag->lock); - if (defrag) + if (defrag) { GF_FREE (defrag); + conf->defrag = NULL; + } return ret; } @@ -1426,9 +1644,8 @@ out: static int gf_defrag_done (int ret, call_frame_t *sync_frame, void *data) { - gf_listener_stop(); + gf_listener_stop (sync_frame->this); - GF_FREE (data); STACK_DESTROY (sync_frame->root); kill (getpid(), SIGTERM); return 0; @@ -1456,6 +1673,8 @@ gf_defrag_start (void *data) if (!frame) goto out; + frame->root->pid = GF_CLIENT_PID_DEFRAG; + defrag->pid = frame->root->pid; defrag->defrag_status = GF_DEFRAG_STATUS_STARTED; @@ -1477,6 +1696,12 @@ gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict) uint64_t files = 0; uint64_t size = 0; uint64_t lookup = 0; + uint64_t failures = 0; + uint64_t skipped = 0; + char *status = ""; + double elapsed = 0; + struct timeval end = {0,}; + if (!defrag) goto out; @@ -1488,6 +1713,12 @@ gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict) files = defrag->total_files; size = defrag->total_data; lookup = defrag->num_files_lookedup; + failures = defrag->total_failures; + skipped = defrag->skipped; + + gettimeofday (&end, NULL); + + elapsed = end.tv_sec - defrag->start_time.tv_sec; if (!dict) goto log; @@ -1507,13 +1738,53 @@ gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict) gf_log (THIS->name, GF_LOG_WARNING, "failed to set lookedup file count"); + ret = dict_set_int32 (dict, "status", defrag->defrag_status); if (ret) gf_log (THIS->name, GF_LOG_WARNING, "failed to set status"); + if (elapsed) { + ret = dict_set_double (dict, "run-time", elapsed); + if (ret) + gf_log (THIS->name, GF_LOG_WARNING, + "failed to set run-time"); + } + + ret = dict_set_uint64 (dict, "failures", failures); + if (ret) + gf_log (THIS->name, GF_LOG_WARNING, + "failed to set failure count"); + + ret = dict_set_uint64 (dict, "skipped", skipped); + if (ret) + gf_log (THIS->name, GF_LOG_WARNING, + "failed to set skipped file count"); log: + switch (defrag->defrag_status) { + case GF_DEFRAG_STATUS_NOT_STARTED: + status = "not started"; + break; + case GF_DEFRAG_STATUS_STARTED: + status = "in progress"; + break; + case GF_DEFRAG_STATUS_STOPPED: + status = "stopped"; + break; + case GF_DEFRAG_STATUS_COMPLETE: + status = "completed"; + break; + case GF_DEFRAG_STATUS_FAILED: + status = "failed"; + break; + default: + break; + } + + gf_log (THIS->name, GF_LOG_INFO, "Rebalance is %s. Time taken is %.2f " + "secs", status, elapsed); gf_log (THIS->name, GF_LOG_INFO, "Files migrated: %"PRIu64", size: %" - PRIu64", lookups: %"PRIu64, files, size, lookup); + PRIu64", lookups: %"PRIu64", failures: %"PRIu64", skipped: " + "%"PRIu64, files, size, lookup, failures, skipped); out: @@ -1532,11 +1803,13 @@ gf_defrag_stop (gf_defrag_info_t *defrag, dict_t *output) goto out; } + gf_log ("", GF_LOG_INFO, "Received stop command on rebalance"); defrag->defrag_status = GF_DEFRAG_STATUS_STOPPED; - gf_defrag_status_get (defrag, output); + if (output) + gf_defrag_status_get (defrag, output); ret = 0; out: - gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); return ret; } |
