summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKotresh H R <khiremat@redhat.com>2014-05-25 23:41:48 +0530
committerVenky Shankar <vshankar@redhat.com>2014-06-05 22:58:26 -0700
commit77498fdbbca8554880eae4b8f559b9d6876e35b7 (patch)
tree29662e2b91634c1abd1c107d5aeeb1d2a4434698
parent535003ca20a9dd00a09dd34ad26947d888aabe39 (diff)
feature/geo-rep: Fix to retain pause state of gsyncd on restart.
A new gsyncd options '--pause-on-start' is introduced. When node reboots, if the status is paused, gsyncd is started with this option. After gsyncd spawns worker and agent, worker will send SIGSTOP to negative pid of monitor to enter pause mode. Change-Id: I5aad82c9a9fc8c243f384940b77d25e26e520d6d BUG: 1101410 Signed-off-by: Kotresh H R <khiremat@redhat.com> Reviewed-on: http://review.gluster.org/7885 Reviewed-by: Aravinda VK <avishwan@redhat.com> Reviewed-by: Venky Shankar <vshankar@redhat.com> Tested-by: Venky Shankar <vshankar@redhat.com>
-rw-r--r--geo-replication/syncdaemon/gsyncd.py1
-rw-r--r--geo-replication/syncdaemon/monitor.py9
-rw-r--r--geo-replication/syncdaemon/resource.py8
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-geo-rep.c5
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c19
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.h2
6 files changed, 33 insertions, 11 deletions
diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py
index 7d463ad23f3..7ddd51267a6 100644
--- a/geo-replication/syncdaemon/gsyncd.py
+++ b/geo-replication/syncdaemon/gsyncd.py
@@ -227,6 +227,7 @@ def main_i():
op.add_option('--ignore-deletes', default=False, action='store_true')
op.add_option('--isolated-slave', default=False, action='store_true')
op.add_option('--use-rsync-xattrs', default=False, action='store_true')
+ op.add_option('--pause-on-start', default=False, action='store_true')
op.add_option('-L', '--log-level', metavar='LVL')
op.add_option('-r', '--remote-gsyncd', metavar='CMD',
default=os.path.abspath(sys.argv[0]))
diff --git a/geo-replication/syncdaemon/monitor.py b/geo-replication/syncdaemon/monitor.py
index f485fe18605..f3700c1a390 100644
--- a/geo-replication/syncdaemon/monitor.py
+++ b/geo-replication/syncdaemon/monitor.py
@@ -73,10 +73,11 @@ class Monitor(object):
"""class which spawns and manages gsyncd workers"""
ST_INIT = 'Initializing...'
+ ST_INIT_PAUSE = 'Initializing...(Paused)'
ST_STABLE = 'Stable'
ST_FAULTY = 'faulty'
ST_INCON = 'inconsistent'
- _ST_ORD = [ST_STABLE, ST_INIT, ST_FAULTY, ST_INCON]
+ _ST_ORD = [ST_STABLE, ST_INIT, ST_INIT_PAUSE, ST_FAULTY, ST_INCON]
def __init__(self):
self.lock = Lock()
@@ -128,7 +129,11 @@ class Monitor(object):
due to the keep-alive thread)
"""
- self.set_state(self.ST_INIT, w)
+ if gconf.pause_on_start:
+ self.set_state(self.ST_INIT_PAUSE, w)
+ else:
+ self.set_state(self.ST_INIT, w)
+
ret = 0
def nwait(p, o=0):
diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py
index dadfc965336..8192a54b0d4 100644
--- a/geo-replication/syncdaemon/resource.py
+++ b/geo-replication/syncdaemon/resource.py
@@ -13,6 +13,7 @@ import os
import sys
import stat
import time
+import signal
import fcntl
import errno
import types
@@ -1290,6 +1291,13 @@ class GLUSTER(AbstractUrl, SlaveLocal, SlaveRemote):
logging.debug("Changelog register failed: %s - %s" %
(e.errno, e.strerror))
+ # Check if gsyncd restarted in pause state. If
+ # yes, send SIGSTOP to negative of monitor pid
+ # to go back to pause state.
+ if gconf.pause_on_start:
+ os.kill(-os.getppid(), signal.SIGSTOP)
+ gconf.pause_on_start = False
+
# oneshot: Try to use changelog history api, if not
# available switch to FS crawl
# Note: if config.change_detector is xsync then
diff --git a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c
index 3e2e308ec13..aa3cc99fbff 100644
--- a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c
+++ b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c
@@ -3724,7 +3724,7 @@ glusterd_check_restart_gsync_session (glusterd_volinfo_t *volinfo, char *slave,
if (ret == 0)
ret = glusterd_start_gsync (volinfo, slave, path_list,
conf_path, uuid_utoa(MY_UUID),
- NULL);
+ NULL, _gf_false);
out:
gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
@@ -4499,7 +4499,8 @@ glusterd_op_gsync_set (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
}
ret = glusterd_start_gsync (volinfo, slave, path_list,
- conf_path, host_uuid, op_errstr);
+ conf_path, host_uuid, op_errstr,
+ _gf_false);
}
if (type == GF_GSYNC_OPTION_TYPE_STOP ||
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index a2a746d247e..15e91ad24dd 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -6572,6 +6572,7 @@ _local_gsyncd_start (dict_t *this, char *key, data_t *value, void *data)
char *op_errstr = NULL;
glusterd_conf_t *priv = NULL;
gf_boolean_t is_template_in_use = _gf_false;
+ gf_boolean_t is_paused = _gf_false;
GF_ASSERT (THIS);
priv = THIS->private;
@@ -6665,9 +6666,9 @@ _local_gsyncd_start (dict_t *this, char *key, data_t *value, void *data)
"%s and %s::%s. Not Restarting", volinfo->volname,
slave_ip, slave_vol);
goto out;
- }
-
- if ((!strcmp (buf, "Config Corrupted"))) {
+ } else if (strstr(buf, "Paused")) {
+ is_paused = _gf_true;
+ } else if ((!strcmp (buf, "Config Corrupted"))) {
gf_log ("", GF_LOG_INFO,
"Recovering from a corrupted config. "
"Not Restarting. Use start (force) to "
@@ -6677,8 +6678,12 @@ _local_gsyncd_start (dict_t *this, char *key, data_t *value, void *data)
goto out;
}
- glusterd_start_gsync (volinfo, slave, path_list, confpath,
- uuid_str, NULL);
+ if (is_paused)
+ glusterd_start_gsync (volinfo, slave, path_list, confpath,
+ uuid_str, NULL, _gf_true);
+ else
+ glusterd_start_gsync (volinfo, slave, path_list, confpath,
+ uuid_str, NULL, _gf_false);
out:
if (statefile)
@@ -8294,7 +8299,7 @@ int
glusterd_start_gsync (glusterd_volinfo_t *master_vol, char *slave,
char *path_list, char *conf_path,
char *glusterd_uuid_str,
- char **op_errstr)
+ char **op_errstr, gf_boolean_t is_pause)
{
int32_t ret = 0;
int32_t status = 0;
@@ -8356,6 +8361,8 @@ glusterd_start_gsync (glusterd_volinfo_t *master_vol, char *slave,
runner_argprintf (&runner, "--glusterd-uuid=%s",
uuid_utoa (priv->uuid));
runner_add_arg (&runner, slave);
+ if (is_pause)
+ runner_add_arg (&runner, "--pause-on-start");
synclock_unlock (&priv->big_lock);
ret = runner_run (&runner);
synclock_lock (&priv->big_lock);
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
index 4b6e2b0cdbc..834d4a52156 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
@@ -413,7 +413,7 @@ int
glusterd_start_gsync (glusterd_volinfo_t *master_vol, char *slave,
char *path_list, char *conf_path,
char *glusterd_uuid_str,
- char **op_errstr);
+ char **op_errstr, gf_boolean_t is_pause);
int
glusterd_get_local_brickpaths (glusterd_volinfo_t *volinfo,
char **pathlist);