summaryrefslogtreecommitdiffstats
path: root/geo-replication/syncdaemon/resource.py
diff options
context:
space:
mode:
authorAravinda VK <avishwan@redhat.com>2015-12-02 19:37:55 +0530
committerAravinda VK <avishwan@redhat.com>2016-03-08 01:59:17 -0800
commit8883c12216cc0c0770a4207e1e2a62fa16dc1528 (patch)
tree7dca2d3793ecd2c2b36100abf2843e2737946ad8 /geo-replication/syncdaemon/resource.py
parentf3b8a931b00cfd0ecee46599ed1ef1aaf236e148 (diff)
geo-rep: Handling Rsync/Tar errors efficiently
Geo-rep processes Changelogs in Batch, if one file in batch fails with rsync error that Changelog file is reprocessed multiple times. After MAX_RETRY, it logs all the GFIDs from that batch as Skipped. This patch addresses following issues, 1. When Rsync/Tar fails do not parse Changelog again for retry 2. When Rsync/Tar fails do not replay Entry operations, only retry rsync/tar for those GFIDs 3. Log Error in Rsync/Tar only in the last Retry 4. Do not log Skipped GFIDs since Rsync/Tar errors are logged for only failed files. 5. Changed Entry failures as Error instead of Warning BUG: 1313309 Change-Id: Ie134ce2572693056ab9b9008cd8aa5b5d87f7975 Signed-off-by: Aravinda VK <avishwan@redhat.com> Reviewed-on: http://review.gluster.org/12856 Reviewed-by: Kotresh HR <khiremat@redhat.com> Reviewed-by: Saravanakumar Arumugam <sarumuga@redhat.com> CentOS-regression: Gluster Build System <jenkins@build.gluster.com> Smoke: Gluster Build System <jenkins@build.gluster.com> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> Reviewed-by: Venky Shankar <vshankar@redhat.com> (cherry picked from commit d136a789258e8f600e536717da156a242d8ed9a5) Reviewed-on: http://review.gluster.org/13558
Diffstat (limited to 'geo-replication/syncdaemon/resource.py')
-rw-r--r--geo-replication/syncdaemon/resource.py49
1 files changed, 26 insertions, 23 deletions
diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py
index 19363401e65..ac697eb39ed 100644
--- a/geo-replication/syncdaemon/resource.py
+++ b/geo-replication/syncdaemon/resource.py
@@ -906,7 +906,7 @@ class SlaveRemote(object):
"RePCe major version mismatch: local %s, remote %s" %
(exrv, rv))
- def rsync(self, files, *args):
+ def rsync(self, files, *args, **kw):
"""invoke rsync"""
if not files:
raise GsyncdError("no files to sync")
@@ -932,12 +932,15 @@ class SlaveRemote(object):
po.stdin.write(f)
po.stdin.write('\0')
- po.stdin.close()
+ stdout, stderr = po.communicate()
+
+ if kw.get("log_err", False):
+ for errline in stderr.strip().split("\n")[:-1]:
+ logging.error("SYNC Error(Rsync): %s" % errline)
if gconf.log_rsync_performance:
- out = po.stdout.read()
rsync_msg = []
- for line in out.split("\n"):
+ for line in stdout.split("\n"):
if line.startswith("Number of files:") or \
line.startswith("Number of regular files transferred:") or \
line.startswith("Total file size:") or \
@@ -949,12 +952,10 @@ class SlaveRemote(object):
line.startswith("sent "):
rsync_msg.append(line)
logging.info("rsync performance: %s" % ", ".join(rsync_msg))
- po.wait()
- po.terminate_geterr(fail_on_err=False)
return po
- def tarssh(self, files, slaveurl):
+ def tarssh(self, files, slaveurl, log_err=False):
"""invoke tar+ssh
-z (compress) can be use if needed, but omitting it now
as it results in weird error (tar+ssh errors out (errcode: 2)
@@ -975,15 +976,16 @@ class SlaveRemote(object):
for f in files:
p0.stdin.write(f)
p0.stdin.write('\n')
- p0.stdin.close()
- # wait() for tar to terminate, collecting any errors, further
+ p0.stdin.close()
+ p0.stdout.close() # Allow p0 to receive a SIGPIPE if p1 exits.
+ # wait for tar to terminate, collecting any errors, further
# waiting for transfer to complete
- p0.wait()
- p0.terminate_geterr(fail_on_err=False)
+ _, stderr1 = p1.communicate()
- p1.wait()
- p1.terminate_geterr(fail_on_err=False)
+ if log_err:
+ for errline in stderr1.strip().split("\n")[:-1]:
+ logging.error("SYNC Error(Untar): %s" % errline)
return p1
@@ -1045,8 +1047,8 @@ class FILE(AbstractUrl, SlaveLocal, SlaveRemote):
"""inhibit the resource beyond"""
os.chdir(self.path)
- def rsync(self, files):
- return sup(self, files, self.path)
+ def rsync(self, files, log_err=False):
+ return sup(self, files, self.path, log_err=log_err)
class GLUSTER(AbstractUrl, SlaveLocal, SlaveRemote):
@@ -1460,11 +1462,11 @@ class GLUSTER(AbstractUrl, SlaveLocal, SlaveRemote):
else:
sup(self, *args)
- def rsync(self, files):
- return sup(self, files, self.slavedir)
+ def rsync(self, files, log_err=False):
+ return sup(self, files, self.slavedir, log_err=log_err)
- def tarssh(self, files):
- return sup(self, files, self.slavedir)
+ def tarssh(self, files, log_err=False):
+ return sup(self, files, self.slavedir, log_err=log_err)
class SSH(AbstractUrl, SlaveRemote):
@@ -1571,12 +1573,13 @@ class SSH(AbstractUrl, SlaveRemote):
self.fd_pair = (i, o)
return 'should'
- def rsync(self, files):
+ def rsync(self, files, log_err=False):
return sup(self, files, '-e',
" ".join(gconf.ssh_command.split() +
["-p", str(gconf.ssh_port)] +
gconf.ssh_ctl_args),
- *(gconf.rsync_ssh_options.split() + [self.slaveurl]))
+ *(gconf.rsync_ssh_options.split() + [self.slaveurl]),
+ log_err=log_err)
- def tarssh(self, files):
- return sup(self, files, self.slaveurl)
+ def tarssh(self, files, log_err=False):
+ return sup(self, files, self.slaveurl, log_err=log_err)