From a59fc489bebce1c81d0a22d53794d7e41f3f4067 Mon Sep 17 00:00:00 2001 From: Amar Tumballi Date: Tue, 19 Jul 2011 20:51:22 +0530 Subject: glusterd rebalance: handle the write failure properly also, make sure the sizes are same before renaming the target file to the original file, hence prevent a possible data-loss. Change-Id: Ie88224ba62a4604f8c0149f84fa462abfbd6ad78 BUG: 3193 Reviewed-on: http://review.gluster.com/29 Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- xlators/mgmt/glusterd/src/glusterd-rebalance.c | 63 ++++++++++++++++++++++---- 1 file changed, 53 insertions(+), 10 deletions(-) diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c index f8194a50c..f98dea0db 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c +++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c @@ -154,9 +154,11 @@ gf_glusterd_rebalance_move_data (glusterd_volinfo_t *volinfo, const char *dir) struct dirent *entry = NULL; struct stat stbuf = {0,}; struct stat new_stbuf = {0,}; + struct stat dst_stbuf = {0,}; char full_path[1024] = {0,}; char tmp_filename[1024] = {0,}; char value[16] = {0,}; + char file_not_copied_fully = 0; if (!volinfo->defrag) goto out; @@ -210,24 +212,25 @@ gf_glusterd_rebalance_move_data (glusterd_volinfo_t *volinfo, const char *dir) while (1) { ret = read (src_fd, defrag->databuf, 131072); - if (!ret || (ret < 0)) { + if (ret < 0) { + file_not_copied_fully = 1; break; } + /* If EOF is hit, then we get 'ret == 0' */ + if (!ret) + break; + ret = write (dst_fd, defrag->databuf, ret); if (ret < 0) { + file_not_copied_fully = 1; break; } } - ret = lstat (full_path, &new_stbuf); - if (ret < 0) { - close (dst_fd); - close (src_fd); - continue; - } - /* No need to rebalance, if there is some - activity on source file */ - if (new_stbuf.st_mtime != stbuf.st_mtime) { + if (file_not_copied_fully) { + gf_log (THIS->name, GF_LOG_WARNING, + "failed to copy the file fully : %s (%s)", + full_path, strerror (errno)); close (dst_fd); close (src_fd); continue; @@ -254,6 +257,46 @@ gf_glusterd_rebalance_move_data (glusterd_volinfo_t *volinfo, const char *dir) tmp_filename, strerror (errno)); } + ret = fstat (src_fd, &new_stbuf); + if (ret < 0) { + gf_log (THIS->name, GF_LOG_WARNING, + "failed to get stat: %s (%s)", + full_path, strerror (errno)); + close (dst_fd); + close (src_fd); + continue; + } + + ret = fstat (dst_fd, &dst_stbuf); + if (ret < 0) { + gf_log (THIS->name, GF_LOG_WARNING, + "failed to get stat on temp file: %s (%s)", + tmp_filename, strerror (errno)); + close (dst_fd); + close (src_fd); + continue; + } + + /* No need to rebalance, if there is some + activity on source file */ + if (new_stbuf.st_mtime != stbuf.st_mtime) { + gf_log (THIS->name, GF_LOG_WARNING, + "file got changed after we started copying %s", + full_path); + close (dst_fd); + close (src_fd); + continue; + } + + if (new_stbuf.st_size != dst_stbuf.st_size) { + gf_log (THIS->name, GF_LOG_WARNING, + "file sizes are not same : %s", + full_path); + close (dst_fd); + close (src_fd); + continue; + } + ret = rename (tmp_filename, full_path); if (ret != -1) { LOCK (&defrag->lock); -- cgit