diff options
| author | Amar Tumballi <amar@gluster.com> | 2011-07-19 20:26:52 +0530 | 
|---|---|---|
| committer | Anand Avati <avati@gluster.com> | 2011-07-22 02:51:18 -0700 | 
| commit | b127d0fd7b6c452d3f355c3fc11b068c55b0d457 (patch) | |
| tree | 740de4b6d2aaca4ae1df0dcfad261d7d8b008458 | |
| parent | 009b05411ca399deed3045acdc93116ebab029c4 (diff) | |
glusterd rebalance: handle the write failure properly
also, make sure the sizes are same before renaming the target file
to the original file, hence prevent a possible data-loss.
Change-Id: Ie88224ba62a4604f8c0149f84fa462abfbd6ad78
BUG: 3193
Reviewed-on: http://review.gluster.com/27
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Anand Avati <avati@gluster.com>
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-rebalance.c | 63 | 
1 files changed, 53 insertions, 10 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c index 58e56f1cd..6e478b073 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c +++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c @@ -105,10 +105,12 @@ gf_glusterd_rebalance_move_data (glusterd_volinfo_t *volinfo, const char *dir)          struct dirent          *entry                  = NULL;          struct stat             stbuf                  = {0,};          struct stat             new_stbuf              = {0,}; +        struct stat             dst_stbuf              = {0,};          char                    full_path[PATH_MAX]    = {0,};          char                    tmp_filename[PATH_MAX] = {0,};          char                    value[16]              = {0,};          char                    linkinfo[PATH_MAX]     = {0,}; +        char                    file_not_copied_fully  = 0;          if (!volinfo->defrag)                  goto out; @@ -172,24 +174,25 @@ gf_glusterd_rebalance_move_data (glusterd_volinfo_t *volinfo, const char *dir)                  while (1) {                          ret = read (src_fd, defrag->databuf, 131072); -                        if (!ret || (ret < 0)) { +                        if (ret < 0) { +                                file_not_copied_fully = 1;                                  break;                          } +                        /* If EOF is hit, then we get 'ret == 0' */ +                        if (!ret) +                                break; +                          ret = write (dst_fd, defrag->databuf, ret);                          if (ret < 0) { +                                file_not_copied_fully = 1;                                  break;                          }                  } -                ret = lstat (full_path, &new_stbuf); -                if (ret < 0) { -                        close (dst_fd); -                        close (src_fd); -                        continue; -                } -                /* No need to rebalance, if there is some -                   activity on source file */ -                if (new_stbuf.st_mtime != stbuf.st_mtime) { +                if (file_not_copied_fully) { +                        gf_log (THIS->name, GF_LOG_WARNING, +                                "failed to copy the file fully : %s (%s)", +                                full_path, strerror (errno));                          close (dst_fd);                          close (src_fd);                          continue; @@ -209,6 +212,46 @@ gf_glusterd_rebalance_move_data (glusterd_volinfo_t *volinfo, const char *dir)                                  tmp_filename, strerror (errno));                  } +                ret = fstat (src_fd, &new_stbuf); +                if (ret < 0) { +                        gf_log (THIS->name, GF_LOG_WARNING, +                                "failed to get stat: %s (%s)", +                                full_path, strerror (errno)); +                        close (dst_fd); +                        close (src_fd); +                        continue; +                } + +                ret = fstat (dst_fd, &dst_stbuf); +                if (ret < 0) { +                        gf_log (THIS->name, GF_LOG_WARNING, +                                "failed to get stat on temp file: %s (%s)", +                                tmp_filename, strerror (errno)); +                        close (dst_fd); +                        close (src_fd); +                        continue; +                } + +                /* No need to rebalance, if there is some +                   activity on source file */ +                if (new_stbuf.st_mtime != stbuf.st_mtime) { +                        gf_log (THIS->name, GF_LOG_WARNING, +                                "file got changed after we started copying %s", +                                full_path); +                        close (dst_fd); +                        close (src_fd); +                        continue; +                } + +                if (new_stbuf.st_size != dst_stbuf.st_size) { +                        gf_log (THIS->name, GF_LOG_WARNING, +                                "file sizes are not same : %s", +                                full_path); +                        close (dst_fd); +                        close (src_fd); +                        continue; +                } +                  ret = rename (tmp_filename, full_path);                  if (ret != -1) {                          LOCK (&defrag->lock);  | 
