From fa095c24979db2d0a3a6413aa431fe7256be5206 Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Wed, 21 Aug 2013 16:28:41 -0700 Subject: geo-replication: Use a md5 based unique control path A hostname fqdn can be of length 255 according to RFC1123 -------------------------> /usr/include/bits/posix1_lim.h:#define _POSIX_HOST_NAME_MAX 255 <------------------------- On linux this length is 64 -------------------------> /usr/include/bits/local_lim.h:#define HOST_NAME_MAX 64 <------------------------- When a given hostname is > 45 (characters) - SSH fails with --------------------------> "ControlPath too long for Unix domain socket". <-------------------------- Indicating that the total length of ControlPath which is on linux should be 108 -------------------------> /usr/include/linux/un.h:#define UNIX_PATH_MAX 108 <------------------------- This leads to "faulty" geo-replication status. This patch brings in a new file called manifest which carries given a geo-rep session some unique information - with which a unique `md5` is generated in a 32length digest, this ensures that we don't exceed UNIX_PATH_MAX limitations instead we use a conservative approach and still be able to provide a unique socket path. Change-Id: I3a6a27d605d751a86e7c82eace4561d9b0134fe1 BUG: 990330 Signed-off-by: Harshavardhana Reviewed-on: http://review.gluster.org/5681 Tested-by: Gluster Build System Reviewed-by: Csaba Henk --- geo-replication/syncdaemon/gconf.py | 5 ----- geo-replication/syncdaemon/resource.py | 10 ++++++++-- geo-replication/syncdaemon/syncdutils.py | 33 ++++++++++++++++++++++++++++++++ 3 files changed, 41 insertions(+), 7 deletions(-) (limited to 'geo-replication') diff --git a/geo-replication/syncdaemon/gconf.py b/geo-replication/syncdaemon/gconf.py index 146c72a1825..fe5795f16e2 100644 --- a/geo-replication/syncdaemon/gconf.py +++ b/geo-replication/syncdaemon/gconf.py @@ -12,9 +12,4 @@ class GConf(object): permanent_handles = [] log_metadata = {} - @classmethod - def setup_ssh_ctl(cls, ctld): - cls.ssh_ctl_dir = ctld - cls.ssh_ctl_args = ["-oControlMaster=auto", "-S", os.path.join(ctld, "gsycnd-ssh-%r@%h:%p")] - gconf = GConf() diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py index 4b0183b981d..401bca7f8e9 100644 --- a/geo-replication/syncdaemon/resource.py +++ b/geo-replication/syncdaemon/resource.py @@ -1115,9 +1115,15 @@ class SSH(AbstractUrl, SlaveRemote): """ if go_daemon == 'done': return self.start_fd_client(*self.fd_pair) - gconf.setup_ssh_ctl(tempfile.mkdtemp(prefix='gsyncd-aux-ssh-')) + + syncdutils.setup_ssh_ctl(tempfile.mkdtemp(prefix='gsyncd-aux-ssh-'), + self.remote_addr, + self.inner_rsc.url) + deferred = go_daemon == 'postconn' - ret = sup(self, gconf.ssh_command.split() + gconf.ssh_ctl_args + [self.remote_addr], slave=self.inner_rsc.url, deferred=deferred) + ret = sup(self, gconf.ssh_command.split() + gconf.ssh_ctl_args + [self.remote_addr], + slave=self.inner_rsc.url, deferred=deferred) + if deferred: # send a message to peer so that we can wait for # the answer from which we know connection is diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py index 720200018e5..c09b2505ddd 100644 --- a/geo-replication/syncdaemon/syncdutils.py +++ b/geo-replication/syncdaemon/syncdutils.py @@ -12,6 +12,7 @@ from signal import signal, SIGTERM, SIGKILL from time import sleep import select as oselect from os import waitpid as owaitpid + try: from cPickle import PickleError except ImportError: @@ -78,6 +79,38 @@ def update_file(path, updater, merger = lambda f: True): if fx: fx.close() +def create_manifest(fname, content): + """ + Create manifest file for SSH Control Path + """ + fd = None + try: + fd = os.open(fname, os.O_CREAT|os.O_RDWR) + try: + os.write(fd, content) + except: + os.close(fd) + raise + finally: + if fd != None: + os.close(fd) + +def setup_ssh_ctl(ctld, remote_addr, resource_url): + """ + Setup GConf ssh control path parameters + """ + gconf.ssh_ctl_dir = ctld + content = "SLAVE_HOST=%s\nSLAVE_RESOURCE_URL=%s" % (remote_addr, + resource_url) + content_md5 = md5hex(content) + fname = os.path.join(gconf.ssh_ctl_dir, + "%s.mft" % content_md5) + + create_manifest(fname, content) + ssh_ctl_path = os.path.join(gconf.ssh_ctl_dir, + "%s.sock" % content_md5) + gconf.ssh_ctl_args = ["-oControlMaster=auto", "-S", ssh_ctl_path] + def grabfile(fname, content=None): """open @fname + contest for its fcntl lock -- cgit