From 98714f503768db9a2ee442a544702c801a31ae6d Mon Sep 17 00:00:00 2001 From: Kotresh HR Date: Fri, 24 Aug 2018 07:16:18 -0400 Subject: geo-rep: Make automatic gfid conflict resolution optional Autmatic gfid conflict resolution needs to be disabled during failover/failback as it might lead to data loss in the following scenario. 1. Master went down without syncing directory "dir1" to slave. 2. When slave is failed over to master, if a new file is written inside "dir1", creating dir1 again if not present, "dir1" ends up with different gfid on original slave. 3. When original master is up and failed back, due to automatic gfid conflict resolution, "dir1" present in original master is deleted losing all files and only new file created on original slave is restored. Hence during failover/failback, automatic gfid conflict resolution should be disabled. So in these cases, appropriate decision is taken. fixes: bz#1622076 Signed-off-by: Kotresh HR Change-Id: I433616f5d3e13d4b6eb675475bd554ca34928573 --- geo-replication/gsyncd.conf.in | 6 ++++++ geo-replication/syncdaemon/master.py | 22 ++++++++++++---------- 2 files changed, 18 insertions(+), 10 deletions(-) (limited to 'geo-replication') diff --git a/geo-replication/gsyncd.conf.in b/geo-replication/gsyncd.conf.in index bd04400ff69..ce4ef9b663a 100644 --- a/geo-replication/gsyncd.conf.in +++ b/geo-replication/gsyncd.conf.in @@ -169,6 +169,12 @@ help=Do not sync deletes in Slave value = help= +[gfid-conflict-resolution] +value = true +validation=bool +type=bool +help=Disables automatic gfid conflict resolution while syncing + [working-dir] value = ${gsyncd_miscdir}/${master}_${primary_slave_host}_${slavevol}/ template=true diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py index 7985aac5439..3ca60c65a07 100644 --- a/geo-replication/syncdaemon/master.py +++ b/geo-replication/syncdaemon/master.py @@ -1163,16 +1163,18 @@ class GMasterChangelogMixin(GMasterCommon): self.status.inc_value("entry", len(entries)) failures = self.slave.server.entry_ops(entries) - count = 0 - while failures and count < self.MAX_OE_RETRIES: - count += 1 - self.handle_entry_failures(failures, entries) - logging.info("Retry original entries. count = %s" % count) - failures = self.slave.server.entry_ops(entries) - if not failures: - logging.info("Sucessfully fixed all entry ops with gfid " - "mismatch") - break + + if gconf.get("gfid-conflict-resolution"): + count = 0 + while failures and count < self.MAX_OE_RETRIES: + count += 1 + self.handle_entry_failures(failures, entries) + logging.info("Retry original entries. count = %s" % count) + failures = self.slave.server.entry_ops(entries) + if not failures: + logging.info("Sucessfully fixed all entry ops with " + "gfid mismatch") + break self.log_failures(failures, 'gfid', gauxpfx(), 'ENTRY') self.status.dec_value("entry", len(entries)) -- cgit