From b127d0fd7b6c452d3f355c3fc11b068c55b0d457 Mon Sep 17 00:00:00 2001 From: Amar Tumballi Date: Tue, 19 Jul 2011 20:26:52 +0530 Subject: glusterd rebalance: handle the write failure properly also, make sure the sizes are same before renaming the target file to the original file, hence prevent a possible data-loss. Change-Id: Ie88224ba62a4604f8c0149f84fa462abfbd6ad78 BUG: 3193 Reviewed-on: http://review.gluster.com/27 Tested-by: Gluster Build System Reviewed-by: Anand Avati --- xlators/mgmt/glusterd/src/glusterd-rebalance.c | 63 ++++++++++++++++++++++---- 1 file changed, 53 insertions(+), 10 deletions(-) (limited to 'xlators/mgmt/glusterd') diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c index 58e56f1cd..6e478b073 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c +++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c @@ -105,10 +105,12 @@ gf_glusterd_rebalance_move_data (glusterd_volinfo_t *volinfo, const char *dir) struct dirent *entry = NULL; struct stat stbuf = {0,}; struct stat new_stbuf = {0,}; + struct stat dst_stbuf = {0,}; char full_path[PATH_MAX] = {0,}; char tmp_filename[PATH_MAX] = {0,}; char value[16] = {0,}; char linkinfo[PATH_MAX] = {0,}; + char file_not_copied_fully = 0; if (!volinfo->defrag) goto out; @@ -172,24 +174,25 @@ gf_glusterd_rebalance_move_data (glusterd_volinfo_t *volinfo, const char *dir) while (1) { ret = read (src_fd, defrag->databuf, 131072); - if (!ret || (ret < 0)) { + if (ret < 0) { + file_not_copied_fully = 1; break; } + /* If EOF is hit, then we get 'ret == 0' */ + if (!ret) + break; + ret = write (dst_fd, defrag->databuf, ret); if (ret < 0) { + file_not_copied_fully = 1; break; } } - ret = lstat (full_path, &new_stbuf); - if (ret < 0) { - close (dst_fd); - close (src_fd); - continue; - } - /* No need to rebalance, if there is some - activity on source file */ - if (new_stbuf.st_mtime != stbuf.st_mtime) { + if (file_not_copied_fully) { + gf_log (THIS->name, GF_LOG_WARNING, + "failed to copy the file fully : %s (%s)", + full_path, strerror (errno)); close (dst_fd); close (src_fd); continue; @@ -209,6 +212,46 @@ gf_glusterd_rebalance_move_data (glusterd_volinfo_t *volinfo, const char *dir) tmp_filename, strerror (errno)); } + ret = fstat (src_fd, &new_stbuf); + if (ret < 0) { + gf_log (THIS->name, GF_LOG_WARNING, + "failed to get stat: %s (%s)", + full_path, strerror (errno)); + close (dst_fd); + close (src_fd); + continue; + } + + ret = fstat (dst_fd, &dst_stbuf); + if (ret < 0) { + gf_log (THIS->name, GF_LOG_WARNING, + "failed to get stat on temp file: %s (%s)", + tmp_filename, strerror (errno)); + close (dst_fd); + close (src_fd); + continue; + } + + /* No need to rebalance, if there is some + activity on source file */ + if (new_stbuf.st_mtime != stbuf.st_mtime) { + gf_log (THIS->name, GF_LOG_WARNING, + "file got changed after we started copying %s", + full_path); + close (dst_fd); + close (src_fd); + continue; + } + + if (new_stbuf.st_size != dst_stbuf.st_size) { + gf_log (THIS->name, GF_LOG_WARNING, + "file sizes are not same : %s", + full_path); + close (dst_fd); + close (src_fd); + continue; + } + ret = rename (tmp_filename, full_path); if (ret != -1) { LOCK (&defrag->lock); -- cgit