From c399cec72b9985f120a1495e93e1a380911547d9 Mon Sep 17 00:00:00 2001 From: Aravinda VK Date: Fri, 26 Dec 2014 19:12:22 +0530 Subject: geo-rep: Error handling in tar+ssh mode Georep raises exception if tar+ssh fails and worker dies due to the exception. This patch adds resilience to tar+ssh error and geo-rep worker retries when error, and skips those changelogs after maximum retries.(same as rsync mode) Removed warning messages for each rsync/tar+ssh failure per GFID, since skipped list will be populated after Max retry. Retry changelog files log also available, hence warning message for each GFID is redundent. BUG: 1177527 Change-Id: I3019c5c1ada7fc0822e4b14831512d283755b1ea Signed-off-by: Aravinda VK Reviewed-on: http://review.gluster.org/9356 Tested-by: Gluster Build System Reviewed-by: Kotresh HR Reviewed-by: Venky Shankar Tested-by: Venky Shankar --- geo-replication/syncdaemon/master.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py index 48da768c935..138e7e475b8 100644 --- a/geo-replication/syncdaemon/master.py +++ b/geo-replication/syncdaemon/master.py @@ -255,10 +255,11 @@ class TarSSHEngine(object): # stat check for file presence st = lstat(se) if isinstance(st, int): + # file got unlinked in the interim self.unlinked_gfids.append(se) return True - logging.warn('tar+ssh: %s [errcode: %d]' % (se, rv[1])) - se_list = se.split('/'); + + se_list = se.split('/') self.current_files_skipped_count += 1 self.skipped_gfid_list.append(se_list[1]) self.add_job(self.FLAT_DIR_HIERARCHY, 'reg', regjob, f, None, pb) @@ -291,15 +292,14 @@ class RsyncEngine(object): logging.debug('synced ' + se) return True else: - if rv[1] in [23, 24]: - # stat to check if the file exist - st = lstat(se) - if isinstance(st, int): - # file got unlinked in the interim - self.unlinked_gfids.append(se) - return True - logging.warn('Rsync: %s [errcode: %d]' % (se, rv[1])) - se_list = se.split('/'); + # stat to check if the file exist + st = lstat(se) + if isinstance(st, int): + # file got unlinked in the interim + self.unlinked_gfids.append(se) + return True + + se_list = se.split('/') self.current_files_skipped_count += 1 self.skipped_gfid_list.append(se_list[1]) self.add_job(self.FLAT_DIR_HIERARCHY, 'reg', regjob, f, None, pb) @@ -408,7 +408,7 @@ class GMasterCommon(object): self.jobtab = {} if boolify(gconf.use_tarssh): logging.info("using 'tar over ssh' as the sync engine") - self.syncer = Syncer(slave, self.slave.tarssh) + self.syncer = Syncer(slave, self.slave.tarssh, [2]) else: logging.info("using 'rsync' as the sync engine") # partial transfer (cf. rsync(1)), that's normal -- cgit