summaryrefslogtreecommitdiffstats
path: root/geo-replication/syncdaemon
diff options
context:
space:
mode:
authorHarpreet Kaur <hlalwani@redhat.com>2019-01-07 16:38:25 +0530
committerSunny Kumar <sunkumar@redhat.com>2020-01-31 10:13:14 +0000
commit06eb3d0b8488d12a92ea21b92727497c585bae4b (patch)
treeca705130b85fb5123eb51174862e391c242202d6 /geo-replication/syncdaemon
parent12bea344e4cc4fe2101aa7efb8a97606d69aa80f (diff)
geo-rep: Fix for "Transport End Point not connected" issue
problem: Geo-rep gsyncd process mounts the master and slave volume on master nodes and slave nodes respectively and starts the sync. But it doesn't wait for the mount to be in ready state to accept I/O. The gluster mount is considered to be ready when all the distribute sub-volumes is up. If the all the distribute subvolumes are not up, it can cause ENOTCONN error, when lookup on file comes and file is on the subvol that is down. solution: Added a Virtual Xattr "dht.subvol.status" which returns "1" if all subvols are up and "0" if all subvols are not up. Geo-rep then uses this virtual xattr after a fresh mount, to check whether all subvols are up or not and then starts the I/O. fixes: bz#1664335 Change-Id: If3ad01d728b1372da7c08ccbe75a45bdc1ab2a91 Signed-off-by: Harpreet Kaur <hlalwani@redhat.com> Signed-off-by: Kotresh HR <khiremat@redhat.com>
Diffstat (limited to 'geo-replication/syncdaemon')
-rw-r--r--geo-replication/syncdaemon/resource.py11
-rw-r--r--geo-replication/syncdaemon/syncdutils.py20
2 files changed, 28 insertions, 3 deletions
diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py
index ae5600d1d9a..c0ed408cf49 100644
--- a/geo-replication/syncdaemon/resource.py
+++ b/geo-replication/syncdaemon/resource.py
@@ -37,6 +37,7 @@ from syncdutils import NoStimeAvailable, PartialHistoryAvailable
from syncdutils import ChangelogException, ChangelogHistoryNotAvailable
from syncdutils import get_changelog_log_level, get_rsync_version
from syncdutils import GX_GFID_CANONICAL_LEN
+from syncdutils import gf_mount_ready
from gsyncdstatus import GeorepStatus
from syncdutils import lf, Popen, sup
from syncdutils import Xattr, matching_disk_gfid, get_gfid_from_mnt
@@ -950,6 +951,16 @@ class Mounter(object):
logging.exception('mount cleanup failure:')
rv = 200
os._exit(rv)
+
+ #Polling the dht.subvol.status value.
+ RETRIES = 10
+ while not gf_mount_ready():
+ if RETRIES < 0:
+ logging.error('Subvols are not up')
+ break
+ RETRIES -= 1
+ time.sleep(0.2)
+
logging.debug('auxiliary glusterfs mount prepared')
diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py
index 4b6f4a265f6..dc0366266b9 100644
--- a/geo-replication/syncdaemon/syncdutils.py
+++ b/geo-replication/syncdaemon/syncdutils.py
@@ -21,8 +21,8 @@ import subprocess
import socket
from subprocess import PIPE
from threading import Lock, Thread as baseThread
-from errno import EACCES, EAGAIN, EPIPE, ENOTCONN, ECONNABORTED
-from errno import EINTR, ENOENT, ESTALE, EBUSY, errorcode
+from errno import EACCES, EAGAIN, EPIPE, ENOTCONN, ENOMEM, ECONNABORTED
+from errno import EINTR, ENOENT, ESTALE, EBUSY, ENODATA, errorcode
from signal import signal, SIGTERM
import select as oselect
from os import waitpid as owaitpid
@@ -55,6 +55,8 @@ from rconf import rconf
from hashlib import sha256 as sha256
+ENOTSUP = getattr(errno, 'ENOTSUP', 'EOPNOTSUPP')
+
# auxiliary gfid based access prefix
_CL_AUX_GFID_PFX = ".gfid/"
ROOT_GFID = "00000000-0000-0000-0000-000000000001"
@@ -98,6 +100,19 @@ def unescape_space_newline(s):
.replace(NEWLINE_ESCAPE_CHAR, "\n")\
.replace(PERCENTAGE_ESCAPE_CHAR, "%")
+# gf_mount_ready() returns 1 if all subvols are up, else 0
+def gf_mount_ready():
+ ret = errno_wrap(Xattr.lgetxattr,
+ ['.', 'dht.subvol.status', 16],
+ [ENOENT, ENOTSUP, ENODATA], [ENOMEM])
+
+ if isinstance(ret, int):
+ logging.error("failed to get the xattr value")
+ return 1
+ ret = ret.rstrip('\x00')
+ if ret == "1":
+ return 1
+ return 0
def norm(s):
if s:
@@ -562,7 +577,6 @@ def errno_wrap(call, arg=[], errnos=[], retry_errnos=[]):
def lstat(e):
return errno_wrap(os.lstat, [e], [ENOENT], [ESTALE, EBUSY])
-
def get_gfid_from_mnt(gfidpath):
return errno_wrap(Xattr.lgetxattr,
[gfidpath, 'glusterfs.gfid.string',